From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Dec 2008 20:12:29 +0100
Subject: performance counters: core code

Implement the core kernel bits of Performance Counters subsystem.

The Linux Performance Counter subsystem provides an abstraction of
performance counter hardware capabilities. It provides per task and per
CPU counters, and it provides event capabilities on top of those.

Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.

The special file descriptor is opened via the perf_counter_open()
system call:

 int
 perf_counter_open(u32 hw_event_type,
                   u32 hw_event_period,
                   u32 record_type,
                   pid_t pid,
                   int cpu);

The syscall returns the new fd. The fd can be used via the normal
VFS system calls: read() can be used to read the counter, fcntl()
can be used to set the blocking mode, etc.

Multiple counters can be kept open at a time, and the counters
can be poll()ed.

See more details in Documentation/perf-counters.txt.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 171 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h        |   9 +++
 include/linux/syscalls.h     |   6 ++
 3 files changed, 186 insertions(+)
 create mode 100644 include/linux/perf_counter.h

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 00000000000..22c4469abf4
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,171 @@
+/*
+ *  Performance counters:
+ *
+ *   Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
+ *   Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <asm/atomic.h>
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+
+struct task_struct;
+
+/*
+ * Generalized hardware event types, used by the hw_event_type parameter
+ * of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_types {
+	PERF_COUNT_CYCLES,
+	PERF_COUNT_INSTRUCTIONS,
+	PERF_COUNT_CACHE_REFERENCES,
+	PERF_COUNT_CACHE_MISSES,
+	PERF_COUNT_BRANCH_INSTRUCTIONS,
+	PERF_COUNT_BRANCH_MISSES,
+	/*
+	 * If this bit is set in the type, then trigger NMI sampling:
+	 */
+	PERF_COUNT_NMI			= (1 << 30),
+};
+
+/*
+ * IRQ-notification data record type:
+ */
+enum perf_record_type {
+	PERF_RECORD_SIMPLE,
+	PERF_RECORD_IRQ,
+	PERF_RECORD_GROUP,
+};
+
+/**
+ * struct hw_perf_counter - performance counter hardware details
+ */
+struct hw_perf_counter {
+	u64			config;
+	unsigned long		config_base;
+	unsigned long		counter_base;
+	int			nmi;
+	unsigned int		idx;
+	u64			prev_count;
+	s32			next_count;
+	u64			irq_period;
+};
+
+/*
+ * Hardcoded buffer length limit for now, for IRQ-fed events:
+ */
+#define PERF_DATA_BUFLEN	2048
+
+/**
+ * struct perf_data - performance counter IRQ data sampling ...
+ */
+struct perf_data {
+	int			len;
+	int			rd_idx;
+	int			overrun;
+	u8			data[PERF_DATA_BUFLEN];
+};
+
+/**
+ * struct perf_counter - performance counter kernel representation:
+ */
+struct perf_counter {
+	struct list_head		list;
+	int				active;
+#if BITS_PER_LONG == 64
+	atomic64_t			count;
+#else
+	atomic_t			count32[2];
+#endif
+	u64				__irq_period;
+
+	struct hw_perf_counter		hw;
+
+	struct perf_counter_context	*ctx;
+	struct task_struct		*task;
+
+	/*
+	 * Protect attach/detach:
+	 */
+	struct mutex			mutex;
+
+	int				oncpu;
+	int				cpu;
+
+	s32				hw_event_type;
+	enum perf_record_type		record_type;
+
+	/* read() / irq related data */
+	wait_queue_head_t		waitq;
+	/* optional: for NMIs */
+	int				wakeup_pending;
+	struct perf_data		*irqdata;
+	struct perf_data		*usrdata;
+	struct perf_data		data[2];
+};
+
+/**
+ * struct perf_counter_context - counter context structure
+ *
+ * Used as a container for task counters and CPU counters as well:
+ */
+struct perf_counter_context {
+#ifdef CONFIG_PERF_COUNTERS
+	/*
+	 * Protect the list of counters:
+	 */
+	spinlock_t		lock;
+	struct list_head	counters;
+	int			nr_counters;
+	int			nr_active;
+	struct task_struct	*task;
+#endif
+};
+
+/**
+ * struct perf_counter_cpu_context - per cpu counter context structure
+ */
+struct perf_cpu_context {
+	struct perf_counter_context	ctx;
+	struct perf_counter_context	*task_ctx;
+	int				active_oncpu;
+	int				max_pertask;
+};
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_counters;
+
+#ifdef CONFIG_PERF_COUNTERS
+extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
+extern void perf_counter_task_tick(struct task_struct *task, int cpu);
+extern void perf_counter_init_task(struct task_struct *task);
+extern void perf_counter_notify(struct pt_regs *regs);
+extern void perf_counter_print_debug(void);
+#else
+static inline void
+perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
+static inline void perf_counter_init_task(struct task_struct *task)	{ }
+static inline void perf_counter_notify(struct pt_regs *regs)		{ }
+static inline void perf_counter_print_debug(void)			{ }
+#endif
+
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55e30d11447..4c530278391 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,7 @@ struct sched_param {
 #include <linux/fs_struct.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
+#include <linux/perf_counter.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
@@ -1326,6 +1327,7 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
+	struct perf_counter_context perf_counter_ctx;
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
@@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
+/*
+ * Call the function if the target task is executing on a CPU right now:
+ */
+extern void task_oncpu_function_call(struct task_struct *p,
+				     void (*func) (void *info), void *info);
+
+
 #ifdef CONFIG_MM_OWNER
 extern void mm_update_next_owner(struct mm_struct *mm);
 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 04fb47bfb92..6cce728a626 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
+asmlinkage int
+sys_perf_counter_open(u32 hw_event_type,
+		      u32 hw_event_period,
+		      u32 record_type,
+		      pid_t pid,
+		      int cpu);
 #endif
-- 
cgit v1.2.3-70-g09d2


From 4ac13294e44664bb7edf4daf52edb71e7c6bbe84 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 9 Dec 2008 21:43:39 +0100
Subject: perf counters: protect them against CSTATE transitions

Impact: fix rare lost events problem

There are CPUs whose performance counters misbehave on CSTATE transitions,
so provide a way to just disable/enable them around deep idle methods.

(hw_perf_enable_all() is cheap on x86.)

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 14 +++++++++++++-
 drivers/acpi/processor_idle.c      |  8 ++++++++
 include/linux/perf_counter.h       |  4 ++++
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6a93d1f04d9..0a7f3bea2dc 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -12,6 +12,7 @@
 #include <linux/notifier.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
 
@@ -119,10 +120,21 @@ void hw_perf_enable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
-void hw_perf_disable_all(void)
+void hw_perf_restore_ctrl(u64 ctrl)
 {
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
+}
+EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl);
+
+u64 hw_perf_disable_all(void)
+{
+	u64 ctrl;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
+	return ctrl;
 }
+EXPORT_SYMBOL_GPL(hw_perf_disable_all);
 
 static inline void
 __hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5f8d746a9b8..cca804e6f1d 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -270,8 +270,11 @@ static atomic_t c3_cpu_count;
 /* Common C-state entry for C2, C3, .. */
 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 {
+	u64 pctrl;
+
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
+	pctrl = hw_perf_disable_all();
 	if (cstate->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cstate);
@@ -284,6 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
+	hw_perf_restore_ctrl(pctrl);
 	start_critical_timings();
 }
 #endif /* !CONFIG_CPU_IDLE */
@@ -1425,8 +1429,11 @@ static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
  */
 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
+	u64 pctrl;
+
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
+	pctrl = hw_perf_disable_all();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -1441,6 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
+	hw_perf_restore_ctrl(pctrl);
 	start_critical_timings();
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 22c4469abf4..5031b5614f2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -156,6 +156,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *task);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
+extern void hw_perf_restore_ctrl(u64 ctrl);
+extern u64 hw_perf_disable_all(void);
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -166,6 +168,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *task)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
+static inline void hw_perf_restore_ctrl(u64 ctrl)			{ }
+static inline u64 hw_perf_disable_all(void)		{ return 0; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
-- 
cgit v1.2.3-70-g09d2


From eab656ae04b9d3b83265e3db01c0d2c46b748ef7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Dec 2008 19:26:59 +0100
Subject: perf counters: clean up 'raw' type API

Impact: cleanup

Introduce a separate hw_event type.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  7 +++++++
 include/linux/syscalls.h     |  8 +++-----
 kernel/perf_counter.c        | 15 ++++++++-------
 3 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5031b5614f2..daedd7d87c2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -38,6 +38,7 @@ enum hw_event_types {
 	 * If this bit is set in the type, then trigger NMI sampling:
 	 */
 	PERF_COUNT_NMI			= (1 << 30),
+	PERF_COUNT_RAW			= (1 << 31),
 };
 
 /*
@@ -49,6 +50,12 @@ enum perf_record_type {
 	PERF_RECORD_GROUP,
 };
 
+struct perf_counter_event {
+	u32			hw_event_type;
+	u32			hw_event_period;
+	u64			hw_raw_ctrl;
+};
+
 /**
  * struct hw_perf_counter - performance counter hardware details
  */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6cce728a626..3ecd73d03da 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,6 +54,7 @@ struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
+struct perf_counter_event;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -625,9 +626,6 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage int
-sys_perf_counter_open(u32 hw_event_type,
-		      u32 hw_event_period,
-		      u32 record_type,
-		      pid_t pid,
-		      int cpu);
+sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
+		      pid_t pid, int cpu, int masterfd);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 20508f05365..96c333a5b0f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -734,26 +734,27 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
  * @pid:		target pid
  */
 asmlinkage int
-sys_perf_counter_open(u32 hw_event_type,
-		      u32 hw_event_period,
-		      u32 record_type,
-		      pid_t pid,
-		      int cpu)
+sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
+		      pid_t pid, int cpu, int masterfd)
 {
 	struct perf_counter_context *ctx;
+	struct perf_counter_event event;
 	struct perf_counter *counter;
 	int ret;
 
+	if (copy_from_user(&event, uevent, sizeof(event)) != 0)
+		return -EFAULT;
+
 	ctx = find_get_context(pid, cpu);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(hw_event_period, cpu, record_type);
+	counter = perf_counter_alloc(event.hw_event_period, cpu, record_type);
 	if (!counter)
 		goto err_put_context;
 
-	ret = hw_perf_counter_init(counter, hw_event_type);
+	ret = hw_perf_counter_init(counter, event.hw_event_type);
 	if (ret)
 		goto err_free_put_context;
 
-- 
cgit v1.2.3-70-g09d2


From dfa7c899b401d7dc5d85aca416aee64ac82812f2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 8 Dec 2008 19:35:37 +0100
Subject: perf counters: expand use of counter->event

Impact: change syscall, cleanup

Make use of the new perf_counters event type.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 22 +++++++++++-----------
 include/linux/perf_counter.h       |  4 +---
 kernel/perf_counter.c              | 10 +++++-----
 3 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 0a7f3bea2dc..30e7ebf7827 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -56,9 +56,10 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
-int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
+int hw_perf_counter_init(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
+	u32 hw_event_type = counter->event.hw_event_type;
 
 	if (unlikely(!perf_counters_initialized))
 		return -EINVAL;
@@ -83,7 +84,7 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
 	hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
 	hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
 
-	hwc->irq_period = counter->__irq_period;
+	hwc->irq_period = counter->event.hw_event_period;
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -95,21 +96,19 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type)
 	hwc->next_count = -((s32) hwc->irq_period);
 
 	/*
-	 * Negative event types mean raw encoded event+umask values:
+	 * Raw event type provide the config in the event structure
 	 */
-	if (hw_event_type < 0) {
-		counter->hw_event_type = -hw_event_type;
-		counter->hw_event_type &= ~PERF_COUNT_NMI;
+	hw_event_type &= ~PERF_COUNT_NMI;
+	if (hw_event_type == PERF_COUNT_RAW) {
+		hwc->config |= counter->event.hw_raw_ctrl;
 	} else {
-		hw_event_type &= ~PERF_COUNT_NMI;
 		if (hw_event_type >= max_intel_perfmon_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		counter->hw_event_type = intel_perfmon_event_map[hw_event_type];
+		hwc->config |= intel_perfmon_event_map[hw_event_type];
 	}
-	hwc->config |= counter->hw_event_type;
 	counter->wakeup_pending = 0;
 
 	return 0;
@@ -373,7 +372,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 				perf_save_and_restart(counter);
 			}
 		}
-		perf_store_irq_data(leader, counter->hw_event_type);
+		perf_store_irq_data(leader, counter->event.hw_event_type);
 		perf_store_irq_data(leader, atomic64_counter_read(counter));
 	}
 }
@@ -418,7 +417,8 @@ again:
 			perf_store_irq_data(counter, instruction_pointer(regs));
 			break;
 		case PERF_RECORD_GROUP:
-			perf_store_irq_data(counter, counter->hw_event_type);
+			perf_store_irq_data(counter,
+					    counter->event.hw_event_type);
 			perf_store_irq_data(counter,
 					    atomic64_counter_read(counter));
 			perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index daedd7d87c2..1f0017673e7 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -96,8 +96,7 @@ struct perf_counter {
 #else
 	atomic_t			count32[2];
 #endif
-	u64				__irq_period;
-
+	struct perf_counter_event	event;
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
@@ -111,7 +110,6 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
-	s32				hw_event_type;
 	enum perf_record_type		record_type;
 
 	/* read() / irq related data */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 96c333a5b0f..2557c670a3b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -37,7 +37,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
  * Architecture provided APIs - weak aliases:
  */
 
-int __weak hw_perf_counter_init(struct perf_counter *counter, u32 hw_event_type)
+int __weak hw_perf_counter_init(struct perf_counter *counter)
 {
 	return -EINVAL;
 }
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
+perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
 {
 	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 
@@ -722,7 +722,7 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type)
 	counter->usrdata	= &counter->data[1];
 	counter->cpu		= cpu;
 	counter->record_type	= record_type;
-	counter->__irq_period	= hw_event_period;
+	counter->event		= *event;
 	counter->wakeup_pending = 0;
 
 	return counter;
@@ -750,11 +750,11 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(event.hw_event_period, cpu, record_type);
+	counter = perf_counter_alloc(&event, cpu, record_type);
 	if (!counter)
 		goto err_put_context;
 
-	ret = hw_perf_counter_init(counter, event.hw_event_type);
+	ret = hw_perf_counter_init(counter);
 	if (ret)
 		goto err_free_put_context;
 
-- 
cgit v1.2.3-70-g09d2


From 9f66a3810fe0d4100972db84290f3ae4a4d77025 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 10 Dec 2008 12:33:23 +0100
Subject: perf counters: restructure the API

Impact: clean up new API

Thorough cleanup of the new perf counters API, we now get clean separation
of the various concepts:

 - introduce perf_counter_hw_event to separate out the event source details

 - move special type flags into separate attributes: PERF_COUNT_NMI,
   PERF_COUNT_RAW

 - extend the type to u64 and reserve it fully to the architecture in the
   raw type case.

And make use of all these changes in the core and x86 perfcounters code.

Also change the syscall signature to:

  asmlinkage int sys_perf_counter_open(

	struct perf_counter_hw_event	*hw_event_uptr		__user,
	pid_t				pid,
	int				cpu,
	int				group_fd);

( Note that group_fd is unused for now - it's reserved for the counter
  groups abstraction. )

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 29 ++++++-----
 include/linux/perf_counter.h       | 98 ++++++++++++++++++++++++--------------
 include/linux/syscalls.h           | 12 +++--
 kernel/perf_counter.c              | 38 ++++++++-------
 4 files changed, 106 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 30e7ebf7827..ef1936a871a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
  */
 int hw_perf_counter_init(struct perf_counter *counter)
 {
+	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	struct hw_perf_counter *hwc = &counter->hw;
-	u32 hw_event_type = counter->event.hw_event_type;
 
 	if (unlikely(!perf_counters_initialized))
 		return -EINVAL;
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter)
 	hwc->nmi = 0;
 	if (capable(CAP_SYS_ADMIN)) {
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-		if (hw_event_type & PERF_COUNT_NMI)
+		if (hw_event->nmi)
 			hwc->nmi = 1;
 	}
 
-	hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
-	hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
+	hwc->config_base	= MSR_ARCH_PERFMON_EVENTSEL0;
+	hwc->counter_base	= MSR_ARCH_PERFMON_PERFCTR0;
 
-	hwc->irq_period = counter->event.hw_event_period;
+	hwc->irq_period		= hw_event->irq_period;
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter)
 	if (!hwc->irq_period)
 		hwc->irq_period = 0x7FFFFFFF;
 
-	hwc->next_count = -((s32) hwc->irq_period);
+	hwc->next_count	= -(s32)hwc->irq_period;
 
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	hw_event_type &= ~PERF_COUNT_NMI;
-	if (hw_event_type == PERF_COUNT_RAW) {
-		hwc->config |= counter->event.hw_raw_ctrl;
+	if (hw_event->raw) {
+		hwc->config |= hw_event->type;
 	} else {
-		if (hw_event_type >= max_intel_perfmon_events)
+		if (hw_event->type >= max_intel_perfmon_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= intel_perfmon_event_map[hw_event_type];
+		hwc->config |= intel_perfmon_event_map[hw_event->type];
 	}
 	counter->wakeup_pending = 0;
 
@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 	int bit;
 
 	list_for_each_entry(counter, &ctx->counters, list) {
-		if (counter->record_type != PERF_RECORD_SIMPLE ||
+		if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
 		    counter == leader)
 			continue;
 
@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 				perf_save_and_restart(counter);
 			}
 		}
-		perf_store_irq_data(leader, counter->event.hw_event_type);
+		perf_store_irq_data(leader, counter->hw_event.type);
 		perf_store_irq_data(leader, atomic64_counter_read(counter));
 	}
 }
@@ -410,7 +409,7 @@ again:
 
 		perf_save_and_restart(counter);
 
-		switch (counter->record_type) {
+		switch (counter->hw_event.record_type) {
 		case PERF_RECORD_SIMPLE:
 			continue;
 		case PERF_RECORD_IRQ:
@@ -418,7 +417,7 @@ again:
 			break;
 		case PERF_RECORD_GROUP:
 			perf_store_irq_data(counter,
-					    counter->event.hw_event_type);
+					    counter->hw_event.type);
 			perf_store_irq_data(counter,
 					    atomic64_counter_read(counter));
 			perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1f0017673e7..a2b4852e2d7 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,65 +24,93 @@
 struct task_struct;
 
 /*
- * Generalized hardware event types, used by the hw_event_type parameter
- * of the sys_perf_counter_open() syscall:
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
  */
 enum hw_event_types {
-	PERF_COUNT_CYCLES,
-	PERF_COUNT_INSTRUCTIONS,
-	PERF_COUNT_CACHE_REFERENCES,
-	PERF_COUNT_CACHE_MISSES,
-	PERF_COUNT_BRANCH_INSTRUCTIONS,
-	PERF_COUNT_BRANCH_MISSES,
 	/*
-	 * If this bit is set in the type, then trigger NMI sampling:
+	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_NMI			= (1 << 30),
-	PERF_COUNT_RAW			= (1 << 31),
+	PERF_COUNT_CYCLES		=  0,
+	PERF_COUNT_INSTRUCTIONS		=  1,
+	PERF_COUNT_CACHE_REFERENCES	=  2,
+	PERF_COUNT_CACHE_MISSES		=  3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
+	PERF_COUNT_BRANCH_MISSES	=  5,
+
+	/*
+	 * Special "software" counters provided by the kernel, even if
+	 * the hardware does not support performance counters. These
+	 * counters measure various physical and sw events of the
+	 * kernel (and allow the profiling of them as well):
+	 */
+	PERF_COUNT_CPU_CLOCK		= -1,
+	PERF_COUNT_TASK_CLOCK		= -2,
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
 };
 
 /*
  * IRQ-notification data record type:
  */
-enum perf_record_type {
-	PERF_RECORD_SIMPLE,
-	PERF_RECORD_IRQ,
-	PERF_RECORD_GROUP,
+enum perf_counter_record_type {
+	PERF_RECORD_SIMPLE		=  0,
+	PERF_RECORD_IRQ			=  1,
+	PERF_RECORD_GROUP		=  2,
 };
 
-struct perf_counter_event {
-	u32			hw_event_type;
-	u32			hw_event_period;
-	u64			hw_raw_ctrl;
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+	u64			type;
+
+	u64			irq_period;
+	u32			record_type;
+
+	u32			disabled     :  1, /* off by default */
+				nmi	     :  1, /* NMI sampling   */
+				raw	     :  1, /* raw event type */
+				__reserved_1 : 29;
+
+	u64			__reserved_2;
 };
 
+/*
+ * Kernel-internal data types:
+ */
+
 /**
- * struct hw_perf_counter - performance counter hardware details
+ * struct hw_perf_counter - performance counter hardware details:
  */
 struct hw_perf_counter {
-	u64			config;
-	unsigned long		config_base;
-	unsigned long		counter_base;
-	int			nmi;
-	unsigned int		idx;
-	u64			prev_count;
-	s32			next_count;
-	u64			irq_period;
+	u64				config;
+	unsigned long			config_base;
+	unsigned long			counter_base;
+	int				nmi;
+	unsigned int			idx;
+	u64				prev_count;
+	u64				irq_period;
+	s32				next_count;
 };
 
 /*
  * Hardcoded buffer length limit for now, for IRQ-fed events:
  */
-#define PERF_DATA_BUFLEN	2048
+#define PERF_DATA_BUFLEN		2048
 
 /**
  * struct perf_data - performance counter IRQ data sampling ...
  */
 struct perf_data {
-	int			len;
-	int			rd_idx;
-	int			overrun;
-	u8			data[PERF_DATA_BUFLEN];
+	int				len;
+	int				rd_idx;
+	int				overrun;
+	u8				data[PERF_DATA_BUFLEN];
 };
 
 /**
@@ -96,7 +124,7 @@ struct perf_counter {
 #else
 	atomic_t			count32[2];
 #endif
-	struct perf_counter_event	event;
+	struct perf_counter_hw_event	hw_event;
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
@@ -110,8 +138,6 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
-	enum perf_record_type		record_type;
-
 	/* read() / irq related data */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3ecd73d03da..a549678b7c3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
-struct perf_counter_event;
+struct perf_counter_hw_event;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
-		      pid_t pid, int cpu, int masterfd);
+
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2557c670a3b..0d323ceda3a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct perf_counter *counter = file->private_data;
 
-	switch (counter->record_type) {
+	switch (counter->hw_event.record_type) {
 	case PERF_RECORD_SIMPLE:
 		return perf_read_hw(counter, buf, count);
 
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
+perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
 {
 	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 
@@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
 	INIT_LIST_HEAD(&counter->list);
 	init_waitqueue_head(&counter->waitq);
 
-	counter->irqdata	= &counter->data[0];
-	counter->usrdata	= &counter->data[1];
-	counter->cpu		= cpu;
-	counter->record_type	= record_type;
-	counter->event		= *event;
-	counter->wakeup_pending = 0;
+	counter->irqdata		= &counter->data[0];
+	counter->usrdata		= &counter->data[1];
+	counter->cpu			= cpu;
+	counter->hw_event		= *hw_event;
+	counter->wakeup_pending		= 0;
 
 	return counter;
 }
 
 /**
- * sys_perf_task_open - open a performance counter associate it to a task
- * @hw_event_type:	event type for monitoring/sampling...
+ * sys_perf_task_open - open a performance counter, associate it to a task/cpu
+ *
+ * @hw_event_uptr:	event type attributes for monitoring/sampling
  * @pid:		target pid
+ * @cpu:		target cpu
+ * @group_fd:		group leader counter fd
  */
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
-		      pid_t pid, int cpu, int masterfd)
+asmlinkage int sys_perf_counter_open(
+
+	struct perf_counter_hw_event	*hw_event_uptr		__user,
+	pid_t				pid,
+	int				cpu,
+	int				group_fd)
+
 {
 	struct perf_counter_context *ctx;
-	struct perf_counter_event event;
+	struct perf_counter_hw_event hw_event;
 	struct perf_counter *counter;
 	int ret;
 
-	if (copy_from_user(&event, uevent, sizeof(event)) != 0)
+	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
 		return -EFAULT;
 
 	ctx = find_get_context(pid, cpu);
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(&event, cpu, record_type);
+	counter = perf_counter_alloc(&hw_event, cpu);
 	if (!counter)
 		goto err_put_context;
 
-- 
cgit v1.2.3-70-g09d2


From 04289bb9891882202d7e961c4c04d2376930e9f9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 08:38:42 +0100
Subject: perf counters: add support for group counters

Impact: add group counters

This patch adds the "counter groups" abstraction.

Groups of counters behave much like normal 'single' counters, with a
few semantic and behavioral extensions on top of that.

A counter group is created by creating a new counter with the open()
syscall's group-leader group_fd file descriptor parameter pointing
to another, already existing counter.

Groups of counters are scheduled in and out in one atomic group, and
they are also roundrobin-scheduled atomically.

Counters that are member of a group can also record events with an
(atomic) extended timestamp that extends to all members of the group,
if the record type is set to PERF_RECORD_GROUP.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  28 ++--
 include/linux/perf_counter.h       |   8 +-
 kernel/perf_counter.c              | 282 ++++++++++++++++++++++++++++---------
 3 files changed, 236 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ef1936a871a..54b4ad0cce6 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -346,18 +346,22 @@ static void perf_save_and_restart(struct perf_counter *counter)
 }
 
 static void
-perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
+perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 {
-	struct perf_counter_context *ctx = leader->ctx;
-	struct perf_counter *counter;
+	struct perf_counter *counter, *group_leader = sibling->group_leader;
 	int bit;
 
-	list_for_each_entry(counter, &ctx->counters, list) {
-		if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
-		    counter == leader)
-			continue;
+	/*
+	 * Store the counter's own timestamp first:
+	 */
+	perf_store_irq_data(sibling, sibling->hw_event.type);
+	perf_store_irq_data(sibling, atomic64_counter_read(sibling));
 
-		if (counter->active) {
+	/*
+	 * Then store sibling timestamps (if any):
+	 */
+	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
+		if (!counter->active) {
 			/*
 			 * When counter was not in the overflow mask, we have to
 			 * read it from hardware. We read it as well, when it
@@ -371,8 +375,8 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
 				perf_save_and_restart(counter);
 			}
 		}
-		perf_store_irq_data(leader, counter->hw_event.type);
-		perf_store_irq_data(leader, atomic64_counter_read(counter));
+		perf_store_irq_data(sibling, counter->hw_event.type);
+		perf_store_irq_data(sibling, atomic64_counter_read(counter));
 	}
 }
 
@@ -416,10 +420,6 @@ again:
 			perf_store_irq_data(counter, instruction_pointer(regs));
 			break;
 		case PERF_RECORD_GROUP:
-			perf_store_irq_data(counter,
-					    counter->hw_event.type);
-			perf_store_irq_data(counter,
-					    atomic64_counter_read(counter));
 			perf_handle_group(counter, &status, &ack);
 			break;
 		}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a2b4852e2d7..7af7d896546 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -117,7 +117,10 @@ struct perf_data {
  * struct perf_counter - performance counter kernel representation:
  */
 struct perf_counter {
-	struct list_head		list;
+	struct list_head		list_entry;
+	struct list_head		sibling_list;
+	struct perf_counter		*group_leader;
+
 	int				active;
 #if BITS_PER_LONG == 64
 	atomic64_t			count;
@@ -158,7 +161,8 @@ struct perf_counter_context {
 	 * Protect the list of counters:
 	 */
 	spinlock_t		lock;
-	struct list_head	counters;
+
+	struct list_head	counter_list;
 	int			nr_counters;
 	int			nr_active;
 	struct task_struct	*task;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0d323ceda3a..fa59fe8c02d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/sysfs.h>
 #include <linux/ptrace.h>
@@ -55,7 +56,7 @@ void __weak hw_perf_counter_setup(void) { }
  * Read the cached counter in counter safe against cross CPU / NMI
  * modifications. 64 bit version - no complications.
  */
-static inline u64 perf_read_counter_safe(struct perf_counter *counter)
+static inline u64 perf_counter_read_safe(struct perf_counter *counter)
 {
 	return (u64) atomic64_read(&counter->count);
 }
@@ -66,7 +67,7 @@ static inline u64 perf_read_counter_safe(struct perf_counter *counter)
  * Read the cached counter in counter safe against cross CPU / NMI
  * modifications. 32 bit version.
  */
-static u64 perf_read_counter_safe(struct perf_counter *counter)
+static u64 perf_counter_read_safe(struct perf_counter *counter)
 {
 	u32 cntl, cnth;
 
@@ -83,13 +84,55 @@ static u64 perf_read_counter_safe(struct perf_counter *counter)
 
 #endif
 
+static void
+list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+	struct perf_counter *group_leader = counter->group_leader;
+
+	/*
+	 * Depending on whether it is a standalone or sibling counter,
+	 * add it straight to the context's counter list, or to the group
+	 * leader's sibling list:
+	 */
+	if (counter->group_leader == counter)
+		list_add_tail(&counter->list_entry, &ctx->counter_list);
+	else
+		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+}
+
+static void
+list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
+{
+	struct perf_counter *sibling, *tmp;
+
+	list_del_init(&counter->list_entry);
+
+	if (list_empty(&counter->sibling_list))
+		return;
+
+	/*
+	 * If this was a group counter with sibling counters then
+	 * upgrade the siblings to singleton counters by adding them
+	 * to the context list directly:
+	 */
+	list_for_each_entry_safe(sibling, tmp,
+				 &counter->sibling_list, list_entry) {
+
+		list_del_init(&sibling->list_entry);
+		list_add_tail(&sibling->list_entry, &ctx->counter_list);
+		WARN_ON_ONCE(!sibling->group_leader);
+		WARN_ON_ONCE(sibling->group_leader == sibling);
+		sibling->group_leader = sibling;
+	}
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
  * We disable the counter on the hardware level first. After that we
  * remove it from the context list.
  */
-static void __perf_remove_from_context(void *info)
+static void __perf_counter_remove_from_context(void *info)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
@@ -119,7 +162,7 @@ static void __perf_remove_from_context(void *info)
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
 	hw_perf_disable_all();
-	list_del_init(&counter->list);
+	list_del_counter(counter, ctx);
 	hw_perf_enable_all();
 
 	if (!ctx->task) {
@@ -144,7 +187,7 @@ static void __perf_remove_from_context(void *info)
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
  */
-static void perf_remove_from_context(struct perf_counter *counter)
+static void perf_counter_remove_from_context(struct perf_counter *counter)
 {
 	struct perf_counter_context *ctx = counter->ctx;
 	struct task_struct *task = ctx->task;
@@ -155,32 +198,32 @@ static void perf_remove_from_context(struct perf_counter *counter)
 		 * the removal is always sucessful.
 		 */
 		smp_call_function_single(counter->cpu,
-					 __perf_remove_from_context,
+					 __perf_counter_remove_from_context,
 					 counter, 1);
 		return;
 	}
 
 retry:
-	task_oncpu_function_call(task, __perf_remove_from_context,
+	task_oncpu_function_call(task, __perf_counter_remove_from_context,
 				 counter);
 
 	spin_lock_irq(&ctx->lock);
 	/*
 	 * If the context is active we need to retry the smp call.
 	 */
-	if (ctx->nr_active && !list_empty(&counter->list)) {
+	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
 
 	/*
 	 * The lock prevents that this context is scheduled in so we
-	 * can remove the counter safely, if it the call above did not
+	 * can remove the counter safely, if the call above did not
 	 * succeed.
 	 */
-	if (!list_empty(&counter->list)) {
+	if (!list_empty(&counter->list_entry)) {
 		ctx->nr_counters--;
-		list_del_init(&counter->list);
+		list_del_counter(counter, ctx);
 		counter->task = NULL;
 	}
 	spin_unlock_irq(&ctx->lock);
@@ -211,7 +254,7 @@ static void __perf_install_in_context(void *info)
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
 	hw_perf_disable_all();
-	list_add_tail(&counter->list, &ctx->counters);
+	list_add_counter(counter, ctx);
 	hw_perf_enable_all();
 
 	ctx->nr_counters++;
@@ -268,7 +311,7 @@ retry:
 	 * If the context is active and the counter has not been added
 	 * we need to retry the smp call.
 	 */
-	if (ctx->nr_active && list_empty(&counter->list)) {
+	if (ctx->nr_active && list_empty(&counter->list_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -278,13 +321,45 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list)) {
-		list_add_tail(&counter->list, &ctx->counters);
+	if (list_empty(&counter->list_entry)) {
+		list_add_counter(counter, ctx);
 		ctx->nr_counters++;
 	}
 	spin_unlock_irq(&ctx->lock);
 }
 
+static void
+counter_sched_out(struct perf_counter *counter,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_counter_context *ctx)
+{
+	if (!counter->active)
+		return;
+
+	hw_perf_counter_disable(counter);
+	counter->active	=  0;
+	counter->oncpu	= -1;
+
+	cpuctx->active_oncpu--;
+	ctx->nr_active--;
+}
+
+static void
+group_sched_out(struct perf_counter *group_counter,
+		struct perf_cpu_context *cpuctx,
+		struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_out(counter, cpuctx, ctx);
+}
+
 /*
  * Called from scheduler to remove the counters of the current task,
  * with interrupts disabled.
@@ -306,21 +381,48 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 		return;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counters, list) {
-		if (!ctx->nr_active)
-			break;
-		if (counter->active) {
-			hw_perf_counter_disable(counter);
-			counter->active = 0;
-			counter->oncpu = -1;
-			ctx->nr_active--;
-			cpuctx->active_oncpu--;
-		}
+	if (ctx->nr_active) {
+		list_for_each_entry(counter, &ctx->counter_list, list_entry)
+			group_sched_out(counter, cpuctx, ctx);
 	}
 	spin_unlock(&ctx->lock);
 	cpuctx->task_ctx = NULL;
 }
 
+static void
+counter_sched_in(struct perf_counter *counter,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_counter_context *ctx,
+		 int cpu)
+{
+	if (!counter->active)
+		return;
+
+	hw_perf_counter_enable(counter);
+	counter->active = 1;
+	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+
+	cpuctx->active_oncpu++;
+	ctx->nr_active++;
+}
+
+static void
+group_sched_in(struct perf_counter *group_counter,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx,
+	       int cpu)
+{
+	struct perf_counter *counter;
+
+	counter_sched_in(group_counter, cpuctx, ctx, cpu);
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_in(counter, cpuctx, ctx, cpu);
+}
+
 /*
  * Called from scheduler to add the counters of the current task
  * with interrupts disabled.
@@ -342,19 +444,21 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 		return;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counters, list) {
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (ctx->nr_active == cpuctx->max_pertask)
 			break;
+
+		/*
+		 * Listen to the 'cpu' scheduling filter constraint
+		 * of counters:
+		 */
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		hw_perf_counter_enable(counter);
-		counter->active = 1;
-		counter->oncpu = cpu;
-		ctx->nr_active++;
-		cpuctx->active_oncpu++;
+		group_sched_in(counter, cpuctx, ctx, cpu);
 	}
 	spin_unlock(&ctx->lock);
+
 	cpuctx->task_ctx = ctx;
 }
 
@@ -371,12 +475,12 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	spin_lock(&ctx->lock);
 
 	/*
-	 * Rotate the first entry last:
+	 * Rotate the first entry last (works just fine for group counters too):
 	 */
 	hw_perf_disable_all();
-	list_for_each_entry(counter, &ctx->counters, list) {
-		list_del(&counter->list);
-		list_add_tail(&counter->list, &ctx->counters);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		list_del(&counter->list_entry);
+		list_add_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
 	hw_perf_enable_all();
@@ -386,17 +490,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	perf_counter_task_sched_in(curr, cpu);
 }
 
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	spin_lock_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	ctx->nr_counters	= 0;
+	ctx->task		= task;
+}
 /*
  * Initialize the perf_counter context in task_struct
  */
 void perf_counter_init_task(struct task_struct *task)
 {
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
-
-	spin_lock_init(&ctx->lock);
-	INIT_LIST_HEAD(&ctx->counters);
-	ctx->nr_counters = 0;
-	ctx->task = task;
+	__perf_counter_init_context(&task->perf_counter_ctx, task);
 }
 
 /*
@@ -407,7 +518,7 @@ static void __hw_perf_counter_read(void *info)
 	hw_perf_counter_read(info);
 }
 
-static u64 perf_read_counter(struct perf_counter *counter)
+static u64 perf_counter_read(struct perf_counter *counter)
 {
 	/*
 	 * If counter is enabled and currently active on a CPU, update the
@@ -418,7 +529,7 @@ static u64 perf_read_counter(struct perf_counter *counter)
 					 __hw_perf_counter_read, counter, 1);
 	}
 
-	return perf_read_counter_safe(counter);
+	return perf_counter_read_safe(counter);
 }
 
 /*
@@ -555,7 +666,7 @@ static int perf_release(struct inode *inode, struct file *file)
 
 	mutex_lock(&counter->mutex);
 
-	perf_remove_from_context(counter);
+	perf_counter_remove_from_context(counter);
 	put_context(ctx);
 
 	mutex_unlock(&counter->mutex);
@@ -577,7 +688,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 		return -EINVAL;
 
 	mutex_lock(&counter->mutex);
-	cntval = perf_read_counter(counter);
+	cntval = perf_counter_read(counter);
 	mutex_unlock(&counter->mutex);
 
 	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
@@ -707,15 +818,25 @@ static const struct file_operations perf_fops = {
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
+perf_counter_alloc(struct perf_counter_hw_event *hw_event,
+		   int cpu,
+		   struct perf_counter *group_leader)
 {
 	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 
 	if (!counter)
 		return NULL;
 
+	/*
+	 * Single counters are their own group leaders, with an
+	 * empty sibling list:
+	 */
+	if (!group_leader)
+		group_leader = counter;
+
 	mutex_init(&counter->mutex);
-	INIT_LIST_HEAD(&counter->list);
+	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
 	counter->irqdata		= &counter->data[0];
@@ -723,6 +844,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->wakeup_pending		= 0;
+	counter->group_leader		= group_leader;
 
 	return counter;
 }
@@ -743,20 +865,45 @@ asmlinkage int sys_perf_counter_open(
 	int				group_fd)
 
 {
-	struct perf_counter_context *ctx;
+	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
-	struct perf_counter *counter;
+	struct perf_counter_context *ctx;
+	struct file *group_file = NULL;
+	int fput_needed = 0;
 	int ret;
 
 	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
 		return -EFAULT;
 
+	/*
+	 * Look up the group leader:
+	 */
+	group_leader = NULL;
+	if (group_fd != -1) {
+		ret = -EINVAL;
+		group_file = fget_light(group_fd, &fput_needed);
+		if (!group_file)
+			goto out_fput;
+		if (group_file->f_op != &perf_fops)
+			goto out_fput;
+
+		group_leader = group_file->private_data;
+		/*
+		 * Do not allow a recursive hierarchy:
+		 */
+		if (group_leader->group_leader)
+			goto out_fput;
+	}
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
 	ctx = find_get_context(pid, cpu);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
 	ret = -ENOMEM;
-	counter = perf_counter_alloc(&hw_event, cpu);
+	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
 	if (!counter)
 		goto err_put_context;
 
@@ -770,11 +917,14 @@ asmlinkage int sys_perf_counter_open(
 	if (ret < 0)
 		goto err_remove_free_put_context;
 
+out_fput:
+	fput_light(group_file, fput_needed);
+
 	return ret;
 
 err_remove_free_put_context:
 	mutex_lock(&counter->mutex);
-	perf_remove_from_context(counter);
+	perf_counter_remove_from_context(counter);
 	mutex_unlock(&counter->mutex);
 
 err_free_put_context:
@@ -783,40 +933,40 @@ err_free_put_context:
 err_put_context:
 	put_context(ctx);
 
-	return ret;
+	goto out_fput;
 }
 
-static void __cpuinit perf_init_cpu(int cpu)
+static void __cpuinit perf_counter_init_cpu(int cpu)
 {
-	struct perf_cpu_context *ctx;
+	struct perf_cpu_context *cpuctx;
 
-	ctx = &per_cpu(perf_cpu_context, cpu);
-	spin_lock_init(&ctx->ctx.lock);
-	INIT_LIST_HEAD(&ctx->ctx.counters);
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	__perf_counter_init_context(&cpuctx->ctx, NULL);
 
 	mutex_lock(&perf_resource_mutex);
-	ctx->max_pertask = perf_max_counters - perf_reserved_percpu;
+	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
 	mutex_unlock(&perf_resource_mutex);
+
 	hw_perf_counter_setup();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void __perf_exit_cpu(void *info)
+static void __perf_counter_exit_cpu(void *info)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = &cpuctx->ctx;
 	struct perf_counter *counter, *tmp;
 
-	list_for_each_entry_safe(counter, tmp, &ctx->counters, list)
-		__perf_remove_from_context(counter);
+	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+		__perf_counter_remove_from_context(counter);
 
 }
-static void perf_exit_cpu(int cpu)
+static void perf_counter_exit_cpu(int cpu)
 {
-	smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1);
+	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
 }
 #else
-static inline void perf_exit_cpu(int cpu) { }
+static inline void perf_counter_exit_cpu(int cpu) { }
 #endif
 
 static int __cpuinit
@@ -828,12 +978,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		perf_init_cpu(cpu);
+		perf_counter_init_cpu(cpu);
 		break;
 
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		perf_exit_cpu(cpu);
+		perf_counter_exit_cpu(cpu);
 		break;
 
 	default:
-- 
cgit v1.2.3-70-g09d2


From 621a01eac89b5e2f81a4cf576568b31f40a02724 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 12:46:46 +0100
Subject: perf counters: hw driver API

Impact: restructure code, introduce hw_ops driver abstraction

Introduce this abstraction to handle counter details:

 struct hw_perf_counter_ops {
	void (*hw_perf_counter_enable)	(struct perf_counter *counter);
	void (*hw_perf_counter_disable)	(struct perf_counter *counter);
	void (*hw_perf_counter_read)	(struct perf_counter *counter);
 };

This will be useful to support assymetric hw details, and it will also
be useful to implement "software counters". (Counters that count kernel
managed sw events such as pagefaults, context-switches, wall-clock time
or task-local time.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 37 ++++++++++++++++++++++---------
 include/linux/perf_counter.h       | 15 +++++++++++++
 kernel/perf_counter.c              | 45 ++++++++++++++++++++------------------
 3 files changed, 66 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 54b4ad0cce6..718b635dece 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -56,7 +56,7 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
-int hw_perf_counter_init(struct perf_counter *counter)
+static int __hw_perf_counter_init(struct perf_counter *counter)
 {
 	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -135,7 +135,7 @@ u64 hw_perf_disable_all(void)
 EXPORT_SYMBOL_GPL(hw_perf_disable_all);
 
 static inline void
-__hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
+__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
 {
 	wrmsr(hwc->config_base + idx, hwc->config, 0);
 }
@@ -149,13 +149,13 @@ static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx)
 	wrmsr(hwc->counter_base + idx, hwc->next_count, 0);
 }
 
-static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
 {
 	wrmsr(hwc->config_base + idx,
 	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
-void hw_perf_counter_enable(struct perf_counter *counter)
+static void x86_perf_counter_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -170,12 +170,12 @@ void hw_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__hw_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(hwc, idx);
 
 	cpuc->counters[idx] = counter;
 
 	__hw_perf_counter_set_period(hwc, idx);
-	__hw_perf_counter_enable(hwc, idx);
+	__x86_perf_counter_enable(hwc, idx);
 }
 
 #ifdef CONFIG_X86_64
@@ -282,20 +282,20 @@ void perf_counter_print_debug(void)
 	local_irq_enable();
 }
 
-void hw_perf_counter_disable(struct perf_counter *counter)
+static void x86_perf_counter_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__hw_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
 	__hw_perf_save_counter(counter, hwc, idx);
 }
 
-void hw_perf_counter_read(struct perf_counter *counter)
+static void x86_perf_counter_read(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned long addr = hwc->counter_base + hwc->idx;
@@ -342,7 +342,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	__hw_perf_counter_set_period(hwc, idx);
 
 	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
-		__hw_perf_counter_enable(hwc, idx);
+		__x86_perf_counter_enable(hwc, idx);
 }
 
 static void
@@ -572,3 +572,20 @@ void __init init_hw_perf_counters(void)
 
 	perf_counters_initialized = true;
 }
+
+static struct hw_perf_counter_ops x86_perf_counter_ops = {
+	.hw_perf_counter_enable		= x86_perf_counter_enable,
+	.hw_perf_counter_disable	= x86_perf_counter_disable,
+	.hw_perf_counter_read		= x86_perf_counter_read,
+};
+
+struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter)
+{
+	int err;
+
+	err = __hw_perf_counter_init(counter);
+	if (err)
+		return NULL;
+
+	return &x86_perf_counter_ops;
+}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7af7d896546..27385641ecb 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -113,6 +113,17 @@ struct perf_data {
 	u8				data[PERF_DATA_BUFLEN];
 };
 
+struct perf_counter;
+
+/**
+ * struct hw_perf_counter_ops - performance counter hw ops
+ */
+struct hw_perf_counter_ops {
+	void (*hw_perf_counter_enable)	(struct perf_counter *counter);
+	void (*hw_perf_counter_disable)	(struct perf_counter *counter);
+	void (*hw_perf_counter_read)	(struct perf_counter *counter);
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -120,6 +131,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
+	struct hw_perf_counter_ops	*hw_ops;
 
 	int				active;
 #if BITS_PER_LONG == 64
@@ -185,6 +197,9 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
+extern struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter);
+
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 278209c547a..e6e41ca9546 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -37,18 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex);
 /*
  * Architecture provided APIs - weak aliases:
  */
-
-int __weak hw_perf_counter_init(struct perf_counter *counter)
+extern __weak struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
 {
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
-void __weak hw_perf_counter_enable(struct perf_counter *counter)	 { }
-void __weak hw_perf_counter_disable(struct perf_counter *counter)	 { }
-void __weak hw_perf_counter_read(struct perf_counter *counter)		 { }
-void __weak hw_perf_disable_all(void) { }
-void __weak hw_perf_enable_all(void) { }
-void __weak hw_perf_counter_setup(void) { }
+void __weak hw_perf_disable_all(void)	 { }
+void __weak hw_perf_enable_all(void)	 { }
+void __weak hw_perf_counter_setup(void)	 { }
 
 #if BITS_PER_LONG == 64
 
@@ -146,7 +143,7 @@ static void __perf_counter_remove_from_context(void *info)
 	spin_lock(&ctx->lock);
 
 	if (counter->active) {
-		hw_perf_counter_disable(counter);
+		counter->hw_ops->hw_perf_counter_disable(counter);
 		counter->active = 0;
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
@@ -257,7 +254,7 @@ static void __perf_install_in_context(void *info)
 	ctx->nr_counters++;
 
 	if (cpuctx->active_oncpu < perf_max_counters) {
-		hw_perf_counter_enable(counter);
+		counter->hw_ops->hw_perf_counter_enable(counter);
 		counter->active = 1;
 		counter->oncpu = cpu;
 		ctx->nr_active++;
@@ -333,7 +330,7 @@ counter_sched_out(struct perf_counter *counter,
 	if (!counter->active)
 		return;
 
-	hw_perf_counter_disable(counter);
+	counter->hw_ops->hw_perf_counter_disable(counter);
 	counter->active	=  0;
 	counter->oncpu	= -1;
 
@@ -392,7 +389,7 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	hw_perf_counter_enable(counter);
+	counter->hw_ops->hw_perf_counter_enable(counter);
 	counter->active = 1;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
@@ -509,7 +506,9 @@ void perf_counter_init_task(struct task_struct *task)
  */
 static void __hw_perf_counter_read(void *info)
 {
-	hw_perf_counter_read(info);
+	struct perf_counter *counter = info;
+
+	counter->hw_ops->hw_perf_counter_read(counter);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -816,8 +815,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
 		   struct perf_counter *group_leader)
 {
-	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+	struct hw_perf_counter_ops *hw_ops;
+	struct perf_counter *counter;
 
+	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 	if (!counter)
 		return NULL;
 
@@ -839,6 +840,14 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->hw_event		= *hw_event;
 	counter->wakeup_pending		= 0;
 	counter->group_leader		= group_leader;
+	counter->hw_ops			= NULL;
+
+	hw_ops = hw_perf_counter_init(counter);
+	if (!hw_ops) {
+		kfree(counter);
+		return NULL;
+	}
+	counter->hw_ops = hw_ops;
 
 	return counter;
 }
@@ -908,10 +917,6 @@ asmlinkage int sys_perf_counter_open(
 	if (!counter)
 		goto err_put_context;
 
-	ret = hw_perf_counter_init(counter);
-	if (ret)
-		goto err_free_put_context;
-
 	perf_install_in_context(ctx, counter, cpu);
 
 	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
@@ -927,8 +932,6 @@ err_remove_free_put_context:
 	mutex_lock(&counter->mutex);
 	perf_counter_remove_from_context(counter);
 	mutex_unlock(&counter->mutex);
-
-err_free_put_context:
 	kfree(counter);
 
 err_put_context:
-- 
cgit v1.2.3-70-g09d2


From 5c92d12411dfe5f0f3d1b1c1e2f756245e6f7249 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 13:21:10 +0100
Subject: perf counters: implement PERF_COUNT_CPU_CLOCK

Impact: add new perf-counter type

The 'CPU clock' counter counts the amount of CPU clock time that is
elapsing, in nanoseconds. (regardless of how much of it the task is
spending on a CPU executing)

This counter type is a Linux kernel based abstraction, it is available
even if the hardware does not support native hardware performance counters.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 36 ++-------------
 include/linux/perf_counter.h       |  9 ++--
 kernel/perf_counter.c              | 95 ++++++++++++++++++++++++++++++++------
 3 files changed, 92 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 718b635dece..43c8e9a38b4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -178,35 +178,6 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
 	__x86_perf_counter_enable(hwc, idx);
 }
 
-#ifdef CONFIG_X86_64
-static inline void atomic64_counter_set(struct perf_counter *counter, u64 val)
-{
-	atomic64_set(&counter->count, val);
-}
-
-static inline u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic64_read(&counter->count);
-}
-#else
-/*
- * Todo: add proper atomic64_t support to 32-bit x86:
- */
-static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64)
-{
-	u32 *val32 = (void *)&val64;
-
-	atomic_set(counter->count32 + 0, *(val32 + 0));
-	atomic_set(counter->count32 + 1, *(val32 + 1));
-}
-
-static inline u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic_read(counter->count32 + 0) |
-		(u64) atomic_read(counter->count32 + 1) << 32;
-}
-#endif
-
 static void __hw_perf_save_counter(struct perf_counter *counter,
 				   struct hw_perf_counter *hwc, int idx)
 {
@@ -309,7 +280,7 @@ static void x86_perf_counter_read(struct perf_counter *counter)
 	} while (offs != hwc->prev_count);
 
 	val32 = (s32) val;
-	val =  (s64)hwc->irq_period + (s64)val32;
+	val = (s64)hwc->irq_period + (s64)val32;
 	atomic64_counter_set(counter, hwc->prev_count + val);
 }
 
@@ -573,13 +544,14 @@ void __init init_hw_perf_counters(void)
 	perf_counters_initialized = true;
 }
 
-static struct hw_perf_counter_ops x86_perf_counter_ops = {
+static const struct hw_perf_counter_ops x86_perf_counter_ops = {
 	.hw_perf_counter_enable		= x86_perf_counter_enable,
 	.hw_perf_counter_disable	= x86_perf_counter_disable,
 	.hw_perf_counter_read		= x86_perf_counter_read,
 };
 
-struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter)
+const struct hw_perf_counter_ops *
+hw_perf_counter_init(struct perf_counter *counter)
 {
 	int err;
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 27385641ecb..9a1713a1be2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -131,7 +131,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
-	struct hw_perf_counter_ops	*hw_ops;
+	const struct hw_perf_counter_ops *hw_ops;
 
 	int				active;
 #if BITS_PER_LONG == 64
@@ -197,7 +197,7 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
-extern struct hw_perf_counter_ops *
+extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
@@ -208,6 +208,9 @@ extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
 extern void hw_perf_restore_ctrl(u64 ctrl);
 extern u64 hw_perf_disable_all(void);
+extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
+extern u64 atomic64_counter_read(struct perf_counter *counter);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -219,7 +222,7 @@ static inline void perf_counter_init_task(struct task_struct *task)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void hw_perf_restore_ctrl(u64 ctrl)			{ }
-static inline u64 hw_perf_disable_all(void)		{ return 0; }
+static inline u64 hw_perf_disable_all(void)		      { return 0; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e6e41ca9546..506286e5ba6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -37,15 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex);
 /*
  * Architecture provided APIs - weak aliases:
  */
-extern __weak struct hw_perf_counter_ops *
+extern __weak const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-void __weak hw_perf_disable_all(void)	 { }
-void __weak hw_perf_enable_all(void)	 { }
-void __weak hw_perf_counter_setup(void)	 { }
+u64 __weak hw_perf_disable_all(void)		{ return 0; }
+void __weak hw_perf_restore_ctrl(u64 ctrl)	{ }
+void __weak hw_perf_counter_setup(void)		{ }
 
 #if BITS_PER_LONG == 64
 
@@ -58,6 +58,16 @@ static inline u64 perf_counter_read_safe(struct perf_counter *counter)
 	return (u64) atomic64_read(&counter->count);
 }
 
+void atomic64_counter_set(struct perf_counter *counter, u64 val)
+{
+	atomic64_set(&counter->count, val);
+}
+
+u64 atomic64_counter_read(struct perf_counter *counter)
+{
+	return atomic64_read(&counter->count);
+}
+
 #else
 
 /*
@@ -79,6 +89,20 @@ static u64 perf_counter_read_safe(struct perf_counter *counter)
 	return cntl | ((u64) cnth) << 32;
 }
 
+void atomic64_counter_set(struct perf_counter *counter, u64 val64)
+{
+	u32 *val32 = (void *)&val64;
+
+	atomic_set(counter->count32 + 0, *(val32 + 0));
+	atomic_set(counter->count32 + 1, *(val32 + 1));
+}
+
+u64 atomic64_counter_read(struct perf_counter *counter)
+{
+	return atomic_read(counter->count32 + 0) |
+		(u64) atomic_read(counter->count32 + 1) << 32;
+}
+
 #endif
 
 static void
@@ -131,6 +155,7 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
+	u64 perf_flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -155,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	hw_perf_disable_all();
+	perf_flags = hw_perf_disable_all();
 	list_del_counter(counter, ctx);
-	hw_perf_enable_all();
+	hw_perf_restore_ctrl(perf_flags);
 
 	if (!ctx->task) {
 		/*
@@ -232,6 +257,7 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
 	int cpu = smp_processor_id();
+	u64 perf_flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -247,9 +273,9 @@ static void __perf_install_in_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	hw_perf_disable_all();
+	perf_flags = hw_perf_disable_all();
 	list_add_counter(counter, ctx);
-	hw_perf_enable_all();
+	hw_perf_restore_ctrl(perf_flags);
 
 	ctx->nr_counters++;
 
@@ -457,6 +483,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 {
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
+	u64 perf_flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
@@ -468,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
-	hw_perf_disable_all();
+	perf_flags = hw_perf_disable_all();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		list_del(&counter->list_entry);
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
-	hw_perf_enable_all();
+	hw_perf_restore_ctrl(perf_flags);
 
 	spin_unlock(&ctx->lock);
 
@@ -807,6 +834,42 @@ static const struct file_operations perf_fops = {
 	.poll			= perf_poll,
 };
 
+static void cpu_clock_perf_counter_enable(struct perf_counter *counter)
+{
+}
+
+static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
+{
+}
+
+static void cpu_clock_perf_counter_read(struct perf_counter *counter)
+{
+	int cpu = raw_smp_processor_id();
+
+	atomic64_counter_set(counter, cpu_clock(cpu));
+}
+
+static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+	.hw_perf_counter_enable		= cpu_clock_perf_counter_enable,
+	.hw_perf_counter_disable	= cpu_clock_perf_counter_disable,
+	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
+};
+
+static const struct hw_perf_counter_ops *
+sw_perf_counter_init(struct perf_counter *counter)
+{
+	const struct hw_perf_counter_ops *hw_ops = NULL;
+
+	switch (counter->hw_event.type) {
+	case PERF_COUNT_CPU_CLOCK:
+		hw_ops = &perf_ops_cpu_clock;
+		break;
+	default:
+		break;
+	}
+	return hw_ops;
+}
+
 /*
  * Allocate and initialize a counter structure
  */
@@ -815,7 +878,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
 		   struct perf_counter *group_leader)
 {
-	struct hw_perf_counter_ops *hw_ops;
+	const struct hw_perf_counter_ops *hw_ops;
 	struct perf_counter *counter;
 
 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
@@ -842,7 +905,13 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
 
-	hw_ops = hw_perf_counter_init(counter);
+	hw_ops = NULL;
+	if (!hw_event->raw && hw_event->type < 0)
+		hw_ops = sw_perf_counter_init(counter);
+	if (!hw_ops) {
+		hw_ops = hw_perf_counter_init(counter);
+	}
+
 	if (!hw_ops) {
 		kfree(counter);
 		return NULL;
@@ -912,7 +981,7 @@ asmlinkage int sys_perf_counter_open(
 			goto err_put_context;
 	}
 
-	ret = -ENOMEM;
+	ret = -EINVAL;
 	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
 	if (!counter)
 		goto err_put_context;
-- 
cgit v1.2.3-70-g09d2


From 01b2838c4298c5e0d30b4993c195ac34dd9df61e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 13:45:51 +0100
Subject: perf counters: consolidate hw_perf save/restore APIs

Impact: cleanup

Rename them to better match up the usual IRQ disable/enable APIs:

 hw_perf_disable_all()  => hw_perf_save_disable()
 hw_perf_restore_ctrl() => hw_perf_restore()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  8 ++++----
 drivers/acpi/processor_idle.c      | 10 +++++-----
 include/linux/perf_counter.h       | 10 +++++-----
 kernel/perf_counter.c              | 16 ++++++++--------
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 43c8e9a38b4..3e1dbebe22b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -118,13 +118,13 @@ void hw_perf_enable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
-void hw_perf_restore_ctrl(u64 ctrl)
+void hw_perf_restore(u64 ctrl)
 {
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
 }
-EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl);
+EXPORT_SYMBOL_GPL(hw_perf_restore);
 
-u64 hw_perf_disable_all(void)
+u64 hw_perf_save_disable(void)
 {
 	u64 ctrl;
 
@@ -132,7 +132,7 @@ u64 hw_perf_disable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0);
 	return ctrl;
 }
-EXPORT_SYMBOL_GPL(hw_perf_disable_all);
+EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
 static inline void
 __x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index cca804e6f1d..a3e66a33b7a 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -270,11 +270,11 @@ static atomic_t c3_cpu_count;
 /* Common C-state entry for C2, C3, .. */
 static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 {
-	u64 pctrl;
+	u64 perf_flags;
 
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	pctrl = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	if (cstate->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cstate);
@@ -287,7 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	hw_perf_restore_ctrl(pctrl);
+	hw_perf_restore(perf_flags);
 	start_critical_timings();
 }
 #endif /* !CONFIG_CPU_IDLE */
@@ -1433,7 +1433,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	pctrl = hw_perf_disable_all();
+	pctrl = hw_perf_save_disable();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -1448,7 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	hw_perf_restore_ctrl(pctrl);
+	hw_perf_restore(pctrl);
 	start_critical_timings();
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9a1713a1be2..68f6e3ad531 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -67,7 +67,7 @@ enum perf_counter_record_type {
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	u64			type;
+	s64			type;
 
 	u64			irq_period;
 	u32			record_type;
@@ -206,8 +206,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *task);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
-extern void hw_perf_restore_ctrl(u64 ctrl);
-extern u64 hw_perf_disable_all(void);
+extern u64 hw_perf_save_disable(void);
+extern void hw_perf_restore(u64 ctrl);
 extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
 extern u64 atomic64_counter_read(struct perf_counter *counter);
 
@@ -221,8 +221,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *task)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
-static inline void hw_perf_restore_ctrl(u64 ctrl)			{ }
-static inline u64 hw_perf_disable_all(void)		      { return 0; }
+static inline void hw_perf_restore(u64 ctrl)			{ }
+static inline u64 hw_perf_save_disable(void)		      { return 0; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 506286e5ba6..0e93fea1712 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -43,8 +43,8 @@ hw_perf_counter_init(struct perf_counter *counter)
 	return ERR_PTR(-EINVAL);
 }
 
-u64 __weak hw_perf_disable_all(void)		{ return 0; }
-void __weak hw_perf_restore_ctrl(u64 ctrl)	{ }
+u64 __weak hw_perf_save_disable(void)		{ return 0; }
+void __weak hw_perf_restore(u64 ctrl)	{ }
 void __weak hw_perf_counter_setup(void)		{ }
 
 #if BITS_PER_LONG == 64
@@ -180,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	list_del_counter(counter, ctx);
-	hw_perf_restore_ctrl(perf_flags);
+	hw_perf_restore(perf_flags);
 
 	if (!ctx->task) {
 		/*
@@ -273,9 +273,9 @@ static void __perf_install_in_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	list_add_counter(counter, ctx);
-	hw_perf_restore_ctrl(perf_flags);
+	hw_perf_restore(perf_flags);
 
 	ctx->nr_counters++;
 
@@ -495,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
-	perf_flags = hw_perf_disable_all();
+	perf_flags = hw_perf_save_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		list_del(&counter->list_entry);
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
-	hw_perf_restore_ctrl(perf_flags);
+	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
 
-- 
cgit v1.2.3-70-g09d2


From bae43c9945ebeef15e7952e317efb02393d3bfc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 14:03:20 +0100
Subject: perf counters: implement PERF_COUNT_TASK_CLOCK

Impact: add new perf-counter type

The 'task clock' counter counts the amount of time a task is executing,
in nanoseconds. It stops ticking when a task is scheduled out either due
to it blocking, sleeping or it being preempted.

This counter type is a Linux kernel based abstraction, it is available
even if the hardware does not support native hardware performance counters.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  7 +++++--
 kernel/perf_counter.c        | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 68f6e3ad531..30c0ec8c1ee 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -50,8 +50,11 @@ enum hw_event_types {
 	 */
 	PERF_COUNT_CPU_CLOCK		= -1,
 	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	/*
+	 * Future software events:
+	 */
+	/* PERF_COUNT_PAGE_FAULTS	= -3,
+	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0e93fea1712..a0fe8474ee2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -855,6 +855,25 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
 };
 
+static void task_clock_perf_counter_enable(struct perf_counter *counter)
+{
+}
+
+static void task_clock_perf_counter_disable(struct perf_counter *counter)
+{
+}
+
+static void task_clock_perf_counter_read(struct perf_counter *counter)
+{
+	atomic64_counter_set(counter, current->se.sum_exec_runtime);
+}
+
+static const struct hw_perf_counter_ops perf_ops_task_clock = {
+	.hw_perf_counter_enable		= task_clock_perf_counter_enable,
+	.hw_perf_counter_disable	= task_clock_perf_counter_disable,
+	.hw_perf_counter_read		= task_clock_perf_counter_read,
+};
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -864,6 +883,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CPU_CLOCK:
 		hw_ops = &perf_ops_cpu_clock;
 		break;
+	case PERF_COUNT_TASK_CLOCK:
+		hw_ops = &perf_ops_task_clock;
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3-70-g09d2


From 1d1c7ddbfab358445a542715551301b7fc363e28 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 14:59:31 +0100
Subject: perf counters: add prctl interface to disable/enable counters

Add a way for self-monitoring tasks to disable/enable counters summarily,
via a prctl:

	PR_TASK_PERF_COUNTERS_DISABLE		31
	PR_TASK_PERF_COUNTERS_ENABLE		32

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 +++
 include/linux/prctl.h        |  3 ++
 kernel/perf_counter.c        | 86 ++++++++++++++++++++++++++++++++++++++++----
 kernel/sys.c                 |  7 ++++
 4 files changed, 93 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 30c0ec8c1ee..97d86c293ee 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -213,6 +213,8 @@ extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
 extern u64 atomic64_counter_read(struct perf_counter *counter);
+extern int perf_counter_task_disable(void);
+extern int perf_counter_task_enable(void);
 
 #else
 static inline void
@@ -226,6 +228,8 @@ static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)			{ }
 static inline u64 hw_perf_save_disable(void)		      { return 0; }
+static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
+static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 #endif
 
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 48d887e3c6e..b00df4c79c6 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,4 +85,7 @@
 #define PR_SET_TIMERSLACK 29
 #define PR_GET_TIMERSLACK 30
 
+#define PR_TASK_PERF_COUNTERS_DISABLE		31
+#define PR_TASK_PERF_COUNTERS_ENABLE		32
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a0fe8474ee2..4e679b91d8b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -415,6 +415,9 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
+	if (counter->active == -1)
+		return;
+
 	counter->hw_ops->hw_perf_counter_enable(counter);
 	counter->active = 1;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
@@ -479,6 +482,79 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 	cpuctx->task_ctx = ctx;
 }
 
+int perf_counter_task_disable(void)
+{
+	struct task_struct *curr = current;
+	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter *counter;
+	u64 perf_flags;
+	int cpu;
+
+	if (likely(!ctx->nr_counters))
+		return 0;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+
+	perf_counter_task_sched_out(curr, cpu);
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * Disable all the counters:
+	 */
+	perf_flags = hw_perf_save_disable();
+
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		WARN_ON_ONCE(counter->active == 1);
+		counter->active = -1;
+	}
+	hw_perf_restore(perf_flags);
+
+	spin_unlock(&ctx->lock);
+
+	local_irq_enable();
+
+	return 0;
+}
+
+int perf_counter_task_enable(void)
+{
+	struct task_struct *curr = current;
+	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter *counter;
+	u64 perf_flags;
+	int cpu;
+
+	if (likely(!ctx->nr_counters))
+		return 0;
+
+	local_irq_disable();
+	cpu = smp_processor_id();
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * Disable all the counters:
+	 */
+	perf_flags = hw_perf_save_disable();
+
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->active != -1)
+			continue;
+		counter->active = 0;
+	}
+	hw_perf_restore(perf_flags);
+
+	spin_unlock(&ctx->lock);
+
+	perf_counter_task_sched_in(curr, cpu);
+
+	local_irq_enable();
+
+	return 0;
+}
+
 void perf_counter_task_tick(struct task_struct *curr, int cpu)
 {
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
@@ -951,13 +1027,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd)
-
+asmlinkage int
+sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
+		      pid_t pid, int cpu, int group_fd)
 {
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8f7d1..0f66633be31 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,6 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
+#include <linux/perf_counter.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
@@ -1716,6 +1717,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 		case PR_SET_TSC:
 			error = SET_TSC_CTL(arg2);
 			break;
+		case PR_TASK_PERF_COUNTERS_DISABLE:
+			error = perf_counter_task_disable();
+			break;
+		case PR_TASK_PERF_COUNTERS_ENABLE:
+			error = perf_counter_task_enable();
+			break;
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
 			break;
-- 
cgit v1.2.3-70-g09d2


From 6a930700c8b655a9e25e42fc4adc0b225ebbcefc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Dec 2008 15:17:03 +0100
Subject: perf counters: clean up state transitions

Impact: cleanup

Introduce a proper enum for the 3 states of a counter:

	PERF_COUNTER_STATE_OFF		= -1
	PERF_COUNTER_STATE_INACTIVE	=  0
	PERF_COUNTER_STATE_ACTIVE	=  1

and rename counter->active to counter->state and propagate the
changes everywhere.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  2 +-
 include/linux/perf_counter.h       | 11 ++++++++++-
 kernel/perf_counter.c              | 29 ++++++++++++++---------------
 3 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3e1dbebe22b..4854cca7fff 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -332,7 +332,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 	 * Then store sibling timestamps (if any):
 	 */
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		if (!counter->active) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
 			/*
 			 * When counter was not in the overflow mask, we have to
 			 * read it from hardware. We read it as well, when it
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 97d86c293ee..8cb095fa442 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -127,6 +127,15 @@ struct hw_perf_counter_ops {
 	void (*hw_perf_counter_read)	(struct perf_counter *counter);
 };
 
+/**
+ * enum perf_counter_active_state - the states of a counter
+ */
+enum perf_counter_active_state {
+	PERF_COUNTER_STATE_OFF		= -1,
+	PERF_COUNTER_STATE_INACTIVE	=  0,
+	PERF_COUNTER_STATE_ACTIVE	=  1,
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -136,7 +145,7 @@ struct perf_counter {
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
 
-	int				active;
+	enum perf_counter_active_state	state;
 #if BITS_PER_LONG == 64
 	atomic64_t			count;
 #else
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4e679b91d8b..559130b8774 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -167,9 +167,9 @@ static void __perf_counter_remove_from_context(void *info)
 
 	spin_lock(&ctx->lock);
 
-	if (counter->active) {
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		counter->hw_ops->hw_perf_counter_disable(counter);
-		counter->active = 0;
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
 		counter->task = NULL;
@@ -281,7 +281,7 @@ static void __perf_install_in_context(void *info)
 
 	if (cpuctx->active_oncpu < perf_max_counters) {
 		counter->hw_ops->hw_perf_counter_enable(counter);
-		counter->active = 1;
+		counter->state = PERF_COUNTER_STATE_ACTIVE;
 		counter->oncpu = cpu;
 		ctx->nr_active++;
 		cpuctx->active_oncpu++;
@@ -328,7 +328,6 @@ retry:
 
 	spin_lock_irq(&ctx->lock);
 	/*
-	 * If the context is active and the counter has not been added
 	 * we need to retry the smp call.
 	 */
 	if (ctx->nr_active && list_empty(&counter->list_entry)) {
@@ -353,12 +352,12 @@ counter_sched_out(struct perf_counter *counter,
 		  struct perf_cpu_context *cpuctx,
 		  struct perf_counter_context *ctx)
 {
-	if (!counter->active)
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return;
 
 	counter->hw_ops->hw_perf_counter_disable(counter);
-	counter->active	=  0;
-	counter->oncpu	= -1;
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->oncpu = -1;
 
 	cpuctx->active_oncpu--;
 	ctx->nr_active--;
@@ -415,11 +414,11 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	if (counter->active == -1)
+	if (counter->state == PERF_COUNTER_STATE_OFF)
 		return;
 
 	counter->hw_ops->hw_perf_counter_enable(counter);
-	counter->active = 1;
+	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
 	cpuctx->active_oncpu++;
@@ -506,8 +505,8 @@ int perf_counter_task_disable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		WARN_ON_ONCE(counter->active == 1);
-		counter->active = -1;
+		WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE);
+		counter->state = PERF_COUNTER_STATE_OFF;
 	}
 	hw_perf_restore(perf_flags);
 
@@ -540,9 +539,9 @@ int perf_counter_task_enable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->active != -1)
+		if (counter->state != PERF_COUNTER_STATE_OFF)
 			continue;
-		counter->active = 0;
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
 	}
 	hw_perf_restore(perf_flags);
 
@@ -620,7 +619,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	 * If counter is enabled and currently active on a CPU, update the
 	 * value in the counter structure:
 	 */
-	if (counter->active) {
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
 					 __hw_perf_counter_read, counter, 1);
 	}
@@ -673,7 +672,7 @@ static struct perf_data *perf_switch_irq_data(struct perf_counter *counter)
 
 retry:
 	spin_lock_irq(&ctx->lock);
-	if (!counter->active) {
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
 		counter->irqdata = counter->usrdata;
 		counter->usrdata = oldirqdata;
 		spin_unlock_irq(&ctx->lock);
-- 
cgit v1.2.3-70-g09d2


From ee06094f8279e1312fc0a31591320cc7b6f0ab1e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 13 Dec 2008 09:00:03 +0100
Subject: perfcounters: restructure x86 counter math

Impact: restructure code

Change counter math from absolute values to clear delta logic.

We try to extract elapsed deltas from the raw hw counter - and put
that into the generic counter.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                   |   2 +-
 arch/x86/kernel/cpu/perf_counter.c | 230 ++++++++++++++++++++-----------------
 include/linux/perf_counter.h       |  15 ++-
 kernel/perf_counter.c              |  68 +----------
 4 files changed, 137 insertions(+), 178 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f2fdc186724..fe94490bab6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -643,7 +643,7 @@ config X86_UP_IOAPIC
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_COUNTERS if (!M386 && !M486)
 
 config X86_IO_APIC
 	def_bool y
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b903f8df72b..5afae13d8d5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -53,6 +53,48 @@ const int intel_perfmon_event_map[] =
 
 const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 
+/*
+ * Propagate counter elapsed time into the generic counter.
+ * Can only be executed on the CPU where the counter is active.
+ * Returns the delta events processed.
+ */
+static void
+x86_perf_counter_update(struct perf_counter *counter,
+			struct hw_perf_counter *hwc, int idx)
+{
+	u64 prev_raw_count, new_raw_count, delta;
+
+	WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE);
+	/*
+	 * Careful: an NMI might modify the previous counter value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic counter atomically:
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	rdmsrl(hwc->counter_base + idx, new_raw_count);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (counter-)time and add that to the generic counter.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count, so we do that by clipping the delta to 32 bits:
+	 */
+	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
+	WARN_ON_ONCE((int)delta < 0);
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+}
+
 /*
  * Setup the hardware configuration for a given hw_event_type
  */
@@ -90,10 +132,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	 * so we install an artificial 1<<31 period regardless of
 	 * the generic counter period:
 	 */
-	if (!hwc->irq_period)
+	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
 		hwc->irq_period = 0x7FFFFFFF;
 
-	hwc->next_count	= -(s32)hwc->irq_period;
+	atomic64_set(&hwc->period_left, hwc->irq_period);
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -118,12 +160,6 @@ void hw_perf_enable_all(void)
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0);
 }
 
-void hw_perf_restore(u64 ctrl)
-{
-	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
-}
-EXPORT_SYMBOL_GPL(hw_perf_restore);
-
 u64 hw_perf_save_disable(void)
 {
 	u64 ctrl;
@@ -134,27 +170,74 @@ u64 hw_perf_save_disable(void)
 }
 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
+void hw_perf_restore(u64 ctrl)
+{
+	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0);
+}
+EXPORT_SYMBOL_GPL(hw_perf_restore);
+
 static inline void
-__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx)
+__x86_perf_counter_disable(struct perf_counter *counter,
+			   struct hw_perf_counter *hwc, unsigned int idx)
 {
-	wrmsr(hwc->config_base + idx, hwc->config, 0);
+	int err;
+
+	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
+	WARN_ON_ONCE(err);
 }
 
-static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]);
+static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
 
-static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx)
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the counter disabled in hw:
+ */
+static void
+__hw_perf_counter_set_period(struct perf_counter *counter,
+			     struct hw_perf_counter *hwc, int idx)
 {
-	per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count;
+	s32 left = atomic64_read(&hwc->period_left);
+	s32 period = hwc->irq_period;
+
+	WARN_ON_ONCE(period <= 0);
+
+	/*
+	 * If we are way outside a reasoable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+	}
 
-	wrmsr(hwc->counter_base + idx, hwc->next_count, 0);
+	WARN_ON_ONCE(left <= 0);
+
+	per_cpu(prev_left[idx], smp_processor_id()) = left;
+
+	/*
+	 * The hw counter starts counting from this counter offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	atomic64_set(&hwc->prev_count, (u64)(s64)-left);
+
+	wrmsr(hwc->counter_base + idx, -left, 0);
 }
 
-static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx)
+static void
+__x86_perf_counter_enable(struct perf_counter *counter,
+			  struct hw_perf_counter *hwc, int idx)
 {
 	wrmsr(hwc->config_base + idx,
 	      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
 }
 
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in counter:
+ */
 static void x86_perf_counter_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -170,55 +253,17 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__x86_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(counter, hwc, idx);
 
 	cpuc->counters[idx] = counter;
 
-	__hw_perf_counter_set_period(hwc, idx);
-	__x86_perf_counter_enable(hwc, idx);
-}
-
-static void __hw_perf_save_counter(struct perf_counter *counter,
-				   struct hw_perf_counter *hwc, int idx)
-{
-	s64 raw = -1;
-	s64 delta;
-
-	/*
-	 * Get the raw hw counter value:
-	 */
-	rdmsrl(hwc->counter_base + idx, raw);
-
-	/*
-	 * Rebase it to zero (it started counting at -irq_period),
-	 * to see the delta since ->prev_count:
-	 */
-	delta = (s64)hwc->irq_period + (s64)(s32)raw;
-
-	atomic64_counter_set(counter, hwc->prev_count + delta);
-
-	/*
-	 * Adjust the ->prev_count offset - if we went beyond
-	 * irq_period of units, then we got an IRQ and the counter
-	 * was set back to -irq_period:
-	 */
-	while (delta >= (s64)hwc->irq_period) {
-		hwc->prev_count += hwc->irq_period;
-		delta -= (s64)hwc->irq_period;
-	}
-
-	/*
-	 * Calculate the next raw counter value we'll write into
-	 * the counter at the next sched-in time:
-	 */
-	delta -= (s64)hwc->irq_period;
-
-	hwc->next_count = (s32)delta;
+	__hw_perf_counter_set_period(counter, hwc, idx);
+	__x86_perf_counter_enable(counter, hwc, idx);
 }
 
 void perf_counter_print_debug(void)
 {
-	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count;
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left;
 	int cpu, idx;
 
 	if (!nr_hw_counters)
@@ -241,14 +286,14 @@ void perf_counter_print_debug(void)
 		rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 		rdmsrl(MSR_ARCH_PERFMON_PERFCTR0  + idx, pmc_count);
 
-		next_count = per_cpu(prev_next_count[idx], cpu);
+		prev_left = per_cpu(prev_left[idx], cpu);
 
 		printk(KERN_INFO "CPU#%d: PMC%d ctrl:  %016llx\n",
 			cpu, idx, pmc_ctrl);
 		printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n",
 			cpu, idx, pmc_count);
-		printk(KERN_INFO "CPU#%d: PMC%d next:  %016llx\n",
-			cpu, idx, next_count);
+		printk(KERN_INFO "CPU#%d: PMC%d left:  %016llx\n",
+			cpu, idx, prev_left);
 	}
 	local_irq_enable();
 }
@@ -259,29 +304,16 @@ static void x86_perf_counter_disable(struct perf_counter *counter)
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__x86_perf_counter_disable(hwc, idx);
+	__x86_perf_counter_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
-	__hw_perf_save_counter(counter, hwc, idx);
-}
 
-static void x86_perf_counter_read(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	unsigned long addr = hwc->counter_base + hwc->idx;
-	s64 offs, val = -1LL;
-	s32 val32;
-
-	/* Careful: NMI might modify the counter offset */
-	do {
-		offs = hwc->prev_count;
-		rdmsrl(addr, val);
-	} while (offs != hwc->prev_count);
-
-	val32 = (s32) val;
-	val = (s64)hwc->irq_period + (s64)val32;
-	atomic64_counter_set(counter, hwc->prev_count + val);
+	/*
+	 * Drain the remaining delta count out of a counter
+	 * that we are disabling:
+	 */
+	x86_perf_counter_update(counter, hwc, idx);
 }
 
 static void perf_store_irq_data(struct perf_counter *counter, u64 data)
@@ -299,7 +331,8 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data)
 }
 
 /*
- * NMI-safe enable method:
+ * Save and restart an expired counter. Called by NMI contexts,
+ * so it has to be careful about preempting normal counter ops:
  */
 static void perf_save_and_restart(struct perf_counter *counter)
 {
@@ -309,45 +342,25 @@ static void perf_save_and_restart(struct perf_counter *counter)
 
 	rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl);
 
-	__hw_perf_save_counter(counter, hwc, idx);
-	__hw_perf_counter_set_period(hwc, idx);
+	x86_perf_counter_update(counter, hwc, idx);
+	__hw_perf_counter_set_period(counter, hwc, idx);
 
 	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
-		__x86_perf_counter_enable(hwc, idx);
+		__x86_perf_counter_enable(counter, hwc, idx);
 }
 
 static void
 perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 {
 	struct perf_counter *counter, *group_leader = sibling->group_leader;
-	int bit;
-
-	/*
-	 * Store the counter's own timestamp first:
-	 */
-	perf_store_irq_data(sibling, sibling->hw_event.type);
-	perf_store_irq_data(sibling, atomic64_counter_read(sibling));
 
 	/*
-	 * Then store sibling timestamps (if any):
+	 * Store sibling timestamps (if any):
 	 */
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
-			/*
-			 * When counter was not in the overflow mask, we have to
-			 * read it from hardware. We read it as well, when it
-			 * has not been read yet and clear the bit in the
-			 * status mask.
-			 */
-			bit = counter->hw.idx;
-			if (!test_bit(bit, (unsigned long *) overflown) ||
-			    test_bit(bit, (unsigned long *) status)) {
-				clear_bit(bit, (unsigned long *) status);
-				perf_save_and_restart(counter);
-			}
-		}
+		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 		perf_store_irq_data(sibling, counter->hw_event.type);
-		perf_store_irq_data(sibling, atomic64_counter_read(counter));
+		perf_store_irq_data(sibling, atomic64_read(&counter->count));
 	}
 }
 
@@ -540,6 +553,11 @@ void __init init_hw_perf_counters(void)
 	perf_counters_initialized = true;
 }
 
+static void x86_perf_counter_read(struct perf_counter *counter)
+{
+	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+}
+
 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
 	.hw_perf_counter_enable		= x86_perf_counter_enable,
 	.hw_perf_counter_disable	= x86_perf_counter_disable,
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8cb095fa442..72460289c65 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -91,14 +91,16 @@ struct perf_counter_hw_event {
  * struct hw_perf_counter - performance counter hardware details:
  */
 struct hw_perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
 	u64				config;
 	unsigned long			config_base;
 	unsigned long			counter_base;
 	int				nmi;
 	unsigned int			idx;
-	u64				prev_count;
+	atomic64_t			prev_count;
 	u64				irq_period;
-	s32				next_count;
+	atomic64_t			period_left;
+#endif
 };
 
 /*
@@ -140,17 +142,15 @@ enum perf_counter_active_state {
  * struct perf_counter - performance counter kernel representation:
  */
 struct perf_counter {
+#ifdef CONFIG_PERF_COUNTERS
 	struct list_head		list_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
 
 	enum perf_counter_active_state	state;
-#if BITS_PER_LONG == 64
 	atomic64_t			count;
-#else
-	atomic_t			count32[2];
-#endif
+
 	struct perf_counter_hw_event	hw_event;
 	struct hw_perf_counter		hw;
 
@@ -172,6 +172,7 @@ struct perf_counter {
 	struct perf_data		*irqdata;
 	struct perf_data		*usrdata;
 	struct perf_data		data[2];
+#endif
 };
 
 /**
@@ -220,8 +221,6 @@ extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
 extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
-extern void atomic64_counter_set(struct perf_counter *counter, u64 val64);
-extern u64 atomic64_counter_read(struct perf_counter *counter);
 extern int perf_counter_task_disable(void);
 extern int perf_counter_task_enable(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 559130b8774..416861ce8b2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -44,67 +44,9 @@ hw_perf_counter_init(struct perf_counter *counter)
 }
 
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
-void __weak hw_perf_restore(u64 ctrl)	{ }
+void __weak hw_perf_restore(u64 ctrl)		{ }
 void __weak hw_perf_counter_setup(void)		{ }
 
-#if BITS_PER_LONG == 64
-
-/*
- * Read the cached counter in counter safe against cross CPU / NMI
- * modifications. 64 bit version - no complications.
- */
-static inline u64 perf_counter_read_safe(struct perf_counter *counter)
-{
-	return (u64) atomic64_read(&counter->count);
-}
-
-void atomic64_counter_set(struct perf_counter *counter, u64 val)
-{
-	atomic64_set(&counter->count, val);
-}
-
-u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic64_read(&counter->count);
-}
-
-#else
-
-/*
- * Read the cached counter in counter safe against cross CPU / NMI
- * modifications. 32 bit version.
- */
-static u64 perf_counter_read_safe(struct perf_counter *counter)
-{
-	u32 cntl, cnth;
-
-	local_irq_disable();
-	do {
-		cnth = atomic_read(&counter->count32[1]);
-		cntl = atomic_read(&counter->count32[0]);
-	} while (cnth != atomic_read(&counter->count32[1]));
-
-	local_irq_enable();
-
-	return cntl | ((u64) cnth) << 32;
-}
-
-void atomic64_counter_set(struct perf_counter *counter, u64 val64)
-{
-	u32 *val32 = (void *)&val64;
-
-	atomic_set(counter->count32 + 0, *(val32 + 0));
-	atomic_set(counter->count32 + 1, *(val32 + 1));
-}
-
-u64 atomic64_counter_read(struct perf_counter *counter)
-{
-	return atomic_read(counter->count32 + 0) |
-		(u64) atomic_read(counter->count32 + 1) << 32;
-}
-
-#endif
-
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -280,11 +222,11 @@ static void __perf_install_in_context(void *info)
 	ctx->nr_counters++;
 
 	if (cpuctx->active_oncpu < perf_max_counters) {
-		counter->hw_ops->hw_perf_counter_enable(counter);
 		counter->state = PERF_COUNTER_STATE_ACTIVE;
 		counter->oncpu = cpu;
 		ctx->nr_active++;
 		cpuctx->active_oncpu++;
+		counter->hw_ops->hw_perf_counter_enable(counter);
 	}
 
 	if (!ctx->task && cpuctx->max_pertask)
@@ -624,7 +566,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 					 __hw_perf_counter_read, counter, 1);
 	}
 
-	return perf_counter_read_safe(counter);
+	return atomic64_read(&counter->count);
 }
 
 /*
@@ -921,7 +863,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 {
 	int cpu = raw_smp_processor_id();
 
-	atomic64_counter_set(counter, cpu_clock(cpu));
+	atomic64_set(&counter->count, cpu_clock(cpu));
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
@@ -940,7 +882,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter)
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
-	atomic64_counter_set(counter, current->se.sum_exec_runtime);
+	atomic64_set(&counter->count, current->se.sum_exec_runtime);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-- 
cgit v1.2.3-70-g09d2


From 9b51f66dcb09ac5eb6bc68fc111d5c7a1e0131d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 13:49:45 +0100
Subject: perfcounters: implement "counter inheritance"

Impact: implement new performance feature

Counter inheritance can be used to run performance counters in a workload,
transparently - and pipe back the counter results to the parent counter.

Inheritance for performance counters works the following way: when creating
a counter it can be marked with the .inherit=1 flag. Such counters are then
'inherited' by all child tasks (be they fork()-ed or clone()-ed). These
counters get inherited through exec() boundaries as well (except through
setuid boundaries).

The counter values get added back to the parent counter(s) when the child
task(s) exit - much like stime/utime statistics are gathered. So inherited
counters are ideal to gather summary statistics about an application's
behavior via shell commands, without having to modify that application.

The timec.c command utilizes counter inheritance:

  http://redhat.com/~mingo/perfcounters/timec.c

Sample output:

   $ ./timec -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null

   Performance counter stats for 'ls':

           163516953 instructions
                2295 cache-misses
             2855182 branch-misses

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  24 +++--
 kernel/exit.c                |   7 +-
 kernel/perf_counter.c        | 248 +++++++++++++++++++++++++++++++++++--------
 3 files changed, 228 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 72460289c65..e5d25bf8f74 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -75,10 +75,11 @@ struct perf_counter_hw_event {
 	u64			irq_period;
 	u32			record_type;
 
-	u32			disabled     :  1, /* off by default */
-				nmi	     :  1, /* NMI sampling   */
-				raw	     :  1, /* raw event type */
-				__reserved_1 : 29;
+	u32			disabled     :  1, /* off by default      */
+				nmi	     :  1, /* NMI sampling        */
+				raw	     :  1, /* raw event type      */
+				inherit	     :  1, /* children inherit it */
+				__reserved_1 : 28;
 
 	u64			__reserved_2;
 };
@@ -138,6 +139,8 @@ enum perf_counter_active_state {
 	PERF_COUNTER_STATE_ACTIVE	=  1,
 };
 
+struct file;
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -156,7 +159,10 @@ struct perf_counter {
 
 	struct perf_counter_context	*ctx;
 	struct task_struct		*task;
+	struct file			*filp;
 
+	unsigned int			nr_inherited;
+	struct perf_counter		*parent;
 	/*
 	 * Protect attach/detach:
 	 */
@@ -210,13 +216,16 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
+extern void
+perf_counter_show(struct perf_counter *counter, char *str, int trace);
 extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern void perf_counter_init_task(struct task_struct *task);
+extern void perf_counter_init_task(struct task_struct *child);
+extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
 extern u64 hw_perf_save_disable(void);
@@ -226,12 +235,15 @@ extern int perf_counter_task_enable(void);
 
 #else
 static inline void
+perf_counter_show(struct perf_counter *counter, char *str, int trace)   { }
+static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
 static inline void
 perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline void perf_counter_init_task(struct task_struct *task)	{ }
+static inline void perf_counter_init_task(struct task_struct *child)	{ }
+static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)			{ }
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7ebb0f..d336c90a5f1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1093,11 +1093,12 @@ NORET_TYPE void do_exit(long code)
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 #endif
-#ifdef CONFIG_FUTEX
 	/*
-	 * This must happen late, after the PID is not
-	 * hashed anymore:
+	 * These must happen late, after the PID is not
+	 * hashed anymore, but still at a point that may sleep:
 	 */
+	perf_counter_exit_task(tsk);
+#ifdef CONFIG_FUTEX
 	if (unlikely(!list_empty(&tsk->pi_state_list)))
 		exit_pi_state_list(tsk);
 	if (unlikely(current->pi_state_cache))
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 416861ce8b2..f5e81dd193d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -80,8 +80,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 		list_del_init(&sibling->list_entry);
 		list_add_tail(&sibling->list_entry, &ctx->counter_list);
-		WARN_ON_ONCE(!sibling->group_leader);
-		WARN_ON_ONCE(sibling->group_leader == sibling);
 		sibling->group_leader = sibling;
 	}
 }
@@ -97,6 +95,7 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long flags;
 	u64 perf_flags;
 
 	/*
@@ -107,7 +106,7 @@ static void __perf_counter_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		counter->hw_ops->hw_perf_counter_disable(counter);
@@ -136,7 +135,7 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
-	spin_unlock(&ctx->lock);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 
@@ -199,6 +198,7 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
 	int cpu = smp_processor_id();
+	unsigned long flags;
 	u64 perf_flags;
 
 	/*
@@ -209,7 +209,7 @@ static void __perf_install_in_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -232,7 +232,7 @@ static void __perf_install_in_context(void *info)
 	if (!ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
-	spin_unlock(&ctx->lock);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -446,10 +446,9 @@ int perf_counter_task_disable(void)
 	 */
 	perf_flags = hw_perf_save_disable();
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry)
 		counter->state = PERF_COUNTER_STATE_OFF;
-	}
+
 	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
@@ -525,26 +524,6 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	perf_counter_task_sched_in(curr, cpu);
 }
 
-/*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	spin_lock_init(&ctx->lock);
-	INIT_LIST_HEAD(&ctx->counter_list);
-	ctx->nr_counters	= 0;
-	ctx->task		= task;
-}
-/*
- * Initialize the perf_counter context in task_struct
- */
-void perf_counter_init_task(struct task_struct *task)
-{
-	__perf_counter_init_context(&task->perf_counter_ctx, task);
-}
-
 /*
  * Cross CPU call to read the hardware counter
  */
@@ -663,7 +642,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
 
-		WARN_ON_ONCE(ctx->task);
 		return ctx;
 	}
 
@@ -915,12 +893,13 @@ sw_perf_counter_init(struct perf_counter *counter)
 static struct perf_counter *
 perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
-		   struct perf_counter *group_leader)
+		   struct perf_counter *group_leader,
+		   gfp_t gfpflags)
 {
 	const struct hw_perf_counter_ops *hw_ops;
 	struct perf_counter *counter;
 
-	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+	counter = kzalloc(sizeof(*counter), gfpflags);
 	if (!counter)
 		return NULL;
 
@@ -947,9 +926,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	hw_ops = NULL;
 	if (!hw_event->raw && hw_event->type < 0)
 		hw_ops = sw_perf_counter_init(counter);
-	if (!hw_ops) {
+	if (!hw_ops)
 		hw_ops = hw_perf_counter_init(counter);
-	}
 
 	if (!hw_ops) {
 		kfree(counter);
@@ -975,8 +953,10 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
 	struct perf_counter_context *ctx;
+	struct file *counter_file = NULL;
 	struct file *group_file = NULL;
 	int fput_needed = 0;
+	int fput_needed2 = 0;
 	int ret;
 
 	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
@@ -1017,25 +997,29 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 	}
 
 	ret = -EINVAL;
-	counter = perf_counter_alloc(&hw_event, cpu, group_leader);
+	counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
 	if (!counter)
 		goto err_put_context;
 
-	perf_install_in_context(ctx, counter, cpu);
-
 	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
 	if (ret < 0)
-		goto err_remove_free_put_context;
+		goto err_free_put_context;
+
+	counter_file = fget_light(ret, &fput_needed2);
+	if (!counter_file)
+		goto err_free_put_context;
+
+	counter->filp = counter_file;
+	perf_install_in_context(ctx, counter, cpu);
+
+	fput_light(counter_file, fput_needed2);
 
 out_fput:
 	fput_light(group_file, fput_needed);
 
 	return ret;
 
-err_remove_free_put_context:
-	mutex_lock(&counter->mutex);
-	perf_counter_remove_from_context(counter);
-	mutex_unlock(&counter->mutex);
+err_free_put_context:
 	kfree(counter);
 
 err_put_context:
@@ -1044,6 +1028,186 @@ err_put_context:
 	goto out_fput;
 }
 
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	ctx->task = task;
+}
+
+/*
+ * inherit a counter from parent task to child task:
+ */
+static int
+inherit_counter(struct perf_counter *parent_counter,
+	      struct task_struct *parent,
+	      struct perf_counter_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *child_counter;
+
+	child_counter = perf_counter_alloc(&parent_counter->hw_event,
+					    parent_counter->cpu, NULL,
+					    GFP_ATOMIC);
+	if (!child_counter)
+		return -ENOMEM;
+
+	/*
+	 * Link it up in the child's context:
+	 */
+	child_counter->ctx = child_ctx;
+	child_counter->task = child;
+	list_add_counter(child_counter, child_ctx);
+	child_ctx->nr_counters++;
+
+	child_counter->parent = parent_counter;
+	parent_counter->nr_inherited++;
+	/*
+	 * inherit into child's child as well:
+	 */
+	child_counter->hw_event.inherit = 1;
+
+	/*
+	 * Get a reference to the parent filp - we will fput it
+	 * when the child counter exits. This is safe to do because
+	 * we are in the parent and we know that the filp still
+	 * exists and has a nonzero count:
+	 */
+	atomic_long_inc(&parent_counter->filp->f_count);
+
+	return 0;
+}
+
+static void
+__perf_counter_exit_task(struct task_struct *child,
+			 struct perf_counter *child_counter,
+			 struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *parent_counter;
+	u64 parent_val, child_val;
+	u64 perf_flags;
+
+	/*
+	 * Disable and unlink this counter.
+	 *
+	 * Be careful about zapping the list - IRQ/NMI context
+	 * could still be processing it:
+	 */
+	local_irq_disable();
+	perf_flags = hw_perf_save_disable();
+
+	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE)
+		child_counter->hw_ops->hw_perf_counter_disable(child_counter);
+	list_del_init(&child_counter->list_entry);
+
+	hw_perf_restore(perf_flags);
+	local_irq_enable();
+
+	parent_counter = child_counter->parent;
+	/*
+	 * It can happen that parent exits first, and has counters
+	 * that are still around due to the child reference. These
+	 * counters need to be zapped - but otherwise linger.
+	 */
+	if (!parent_counter)
+		return;
+
+	parent_val = atomic64_read(&parent_counter->count);
+	child_val = atomic64_read(&child_counter->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_counter->count);
+
+	fput(parent_counter->filp);
+
+	kfree(child_counter);
+}
+
+/*
+ * When a child task exist, feed back counter values to parent counters.
+ *
+ * Note: we are running in child context, but the PID is not hashed
+ * anymore so new counters will not be added.
+ */
+void perf_counter_exit_task(struct task_struct *child)
+{
+	struct perf_counter *child_counter, *tmp;
+	struct perf_counter_context *child_ctx;
+
+	child_ctx = &child->perf_counter_ctx;
+
+	if (likely(!child_ctx->nr_counters))
+		return;
+
+	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
+				 list_entry)
+		__perf_counter_exit_task(child, child_counter, child_ctx);
+}
+
+/*
+ * Initialize the perf_counter context in task_struct
+ */
+void perf_counter_init_task(struct task_struct *child)
+{
+	struct perf_counter_context *child_ctx, *parent_ctx;
+	struct perf_counter *counter, *parent_counter;
+	struct task_struct *parent = current;
+	unsigned long flags;
+
+	child_ctx  =  &child->perf_counter_ctx;
+	parent_ctx = &parent->perf_counter_ctx;
+
+	__perf_counter_init_context(child_ctx, child);
+
+	/*
+	 * This is executed from the parent task context, so inherit
+	 * counters that have been marked for cloning:
+	 */
+
+	if (likely(!parent_ctx->nr_counters))
+		return;
+
+	/*
+	 * Lock the parent list. No need to lock the child - not PID
+	 * hashed yet and not running, so nobody can access it.
+	 */
+	spin_lock_irqsave(&parent_ctx->lock, flags);
+
+	/*
+	 * We dont have to disable NMIs - we are only looking at
+	 * the list, not manipulating it:
+	 */
+	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
+		if (!counter->hw_event.inherit || counter->group_leader != counter)
+			continue;
+
+		/*
+		 * Instead of creating recursive hierarchies of counters,
+		 * we link inheritd counters back to the original parent,
+		 * which has a filp for sure, which we use as the reference
+		 * count:
+		 */
+		parent_counter = counter;
+		if (counter->parent)
+			parent_counter = counter->parent;
+
+		if (inherit_counter(parent_counter, parent,
+				  parent_ctx, child, child_ctx))
+			break;
+	}
+
+	spin_unlock_irqrestore(&parent_ctx->lock, flags);
+}
+
 static void __cpuinit perf_counter_init_cpu(int cpu)
 {
 	struct perf_cpu_context *cpuctx;
-- 
cgit v1.2.3-70-g09d2


From 5d6a27d8a096868ae313f71f563b06074a7e34fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 12:28:33 +0100
Subject: perfcounters: add context switch counter

Impact: add new feature, new sw counter

Add a counter that counts the number of context-switches a task
is doing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 ++--
 kernel/perf_counter.c        | 51 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e5d25bf8f74..d2a16563415 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -53,8 +53,8 @@ enum hw_event_types {
 	/*
 	 * Future software events:
 	 */
-	/* PERF_COUNT_PAGE_FAULTS	= -3,
-	   PERF_COUNT_CONTEXT_SWITCHES	= -4, */
+	PERF_COUNT_PAGE_FAULTS		= -3,
+	PERF_COUNT_CONTEXT_SWITCHES	= -4,
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1f81cde0dc4..09287091c52 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -888,6 +888,54 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.hw_perf_counter_read		= task_clock_perf_counter_read,
 };
 
+static u64 get_context_switches(void)
+{
+	struct task_struct *curr = current;
+
+	return curr->nvcsw + curr->nivcsw;
+}
+
+static void context_switches_perf_counter_update(struct perf_counter *counter)
+{
+	u64 prev, now;
+	s64 delta;
+
+	prev = atomic64_read(&counter->hw.prev_count);
+	now = get_context_switches();
+
+	atomic64_set(&counter->hw.prev_count, now);
+
+	delta = now - prev;
+	if (WARN_ON_ONCE(delta < 0))
+		delta = 0;
+
+	atomic64_add(delta, &counter->count);
+}
+
+static void context_switches_perf_counter_read(struct perf_counter *counter)
+{
+	context_switches_perf_counter_update(counter);
+}
+
+static void context_switches_perf_counter_enable(struct perf_counter *counter)
+{
+	/*
+	 * ->nvcsw + curr->nivcsw is a per-task value already,
+	 * so we dont have to clear it on switch-in.
+	 */
+}
+
+static void context_switches_perf_counter_disable(struct perf_counter *counter)
+{
+	context_switches_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_context_switches = {
+	.hw_perf_counter_enable		= context_switches_perf_counter_enable,
+	.hw_perf_counter_disable	= context_switches_perf_counter_disable,
+	.hw_perf_counter_read		= context_switches_perf_counter_read,
+};
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -900,6 +948,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_TASK_CLOCK:
 		hw_ops = &perf_ops_task_clock;
 		break;
+	case PERF_COUNT_CONTEXT_SWITCHES:
+		hw_ops = &perf_ops_context_switches;
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3-70-g09d2


From 6c594c21fcb02c662f11c97be4d7d2b73060a205 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Dec 2008 12:34:15 +0100
Subject: perfcounters: add task migrations counter

Impact: add new feature, new sw counter

Add a counter that counts the number of cross-CPU migrations a
task is suffering.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  8 +++++---
 include/linux/sched.h        |  3 ++-
 kernel/perf_counter.c        | 49 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c               |  7 +++++--
 4 files changed, 61 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d2a16563415..f30486fc55d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -42,6 +42,8 @@ enum hw_event_types {
 	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
 	PERF_COUNT_BRANCH_MISSES	=  5,
 
+	PERF_HW_EVENTS_MAX		=  6,
+
 	/*
 	 * Special "software" counters provided by the kernel, even if
 	 * the hardware does not support performance counters. These
@@ -50,11 +52,11 @@ enum hw_event_types {
 	 */
 	PERF_COUNT_CPU_CLOCK		= -1,
 	PERF_COUNT_TASK_CLOCK		= -2,
-	/*
-	 * Future software events:
-	 */
 	PERF_COUNT_PAGE_FAULTS		= -3,
 	PERF_COUNT_CONTEXT_SWITCHES	= -4,
+	PERF_COUNT_CPU_MIGRATIONS	= -5,
+
+	PERF_SW_EVENTS_MIN		= -6,
 };
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4c530278391..2e15be8fc79 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1014,6 +1014,8 @@ struct sched_entity {
 	u64			last_wakeup;
 	u64			avg_overlap;
 
+	u64			nr_migrations;
+
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
@@ -1029,7 +1031,6 @@ struct sched_entity {
 	u64			exec_max;
 	u64			slice_max;
 
-	u64			nr_migrations;
 	u64			nr_migrations_cold;
 	u64			nr_failed_migrations_affine;
 	u64			nr_failed_migrations_running;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 09287091c52..fb11e351e44 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -936,6 +936,52 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
 	.hw_perf_counter_read		= context_switches_perf_counter_read,
 };
 
+static inline u64 get_cpu_migrations(void)
+{
+	return current->se.nr_migrations;
+}
+
+static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
+{
+	u64 prev, now;
+	s64 delta;
+
+	prev = atomic64_read(&counter->hw.prev_count);
+	now = get_cpu_migrations();
+
+	atomic64_set(&counter->hw.prev_count, now);
+
+	delta = now - prev;
+	if (WARN_ON_ONCE(delta < 0))
+		delta = 0;
+
+	atomic64_add(delta, &counter->count);
+}
+
+static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
+{
+	cpu_migrations_perf_counter_update(counter);
+}
+
+static void cpu_migrations_perf_counter_enable(struct perf_counter *counter)
+{
+	/*
+	 * se.nr_migrations is a per-task value already,
+	 * so we dont have to clear it on switch-in.
+	 */
+}
+
+static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
+{
+	cpu_migrations_perf_counter_update(counter);
+}
+
+static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+	.hw_perf_counter_enable		= cpu_migrations_perf_counter_enable,
+	.hw_perf_counter_disable	= cpu_migrations_perf_counter_disable,
+	.hw_perf_counter_read		= cpu_migrations_perf_counter_read,
+};
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -951,6 +997,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CONTEXT_SWITCHES:
 		hw_ops = &perf_ops_context_switches;
 		break;
+	case PERF_COUNT_CPU_MIGRATIONS:
+		hw_ops = &perf_ops_cpu_migrations;
+		break;
 	default:
 		break;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 5c3f4106314..382cfdb5e38 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1852,12 +1852,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		p->se.sleep_start -= clock_offset;
 	if (p->se.block_start)
 		p->se.block_start -= clock_offset;
+#endif
 	if (old_cpu != new_cpu) {
-		schedstat_inc(p, se.nr_migrations);
+		p->se.nr_migrations++;
+#ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
-	}
 #endif
+	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
 					 new_cfsrq->min_vruntime;
 
@@ -2375,6 +2377,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
+	p->se.nr_migrations		= 0;
 	p->se.last_wakeup		= 0;
 	p->se.avg_overlap		= 0;
 
-- 
cgit v1.2.3-70-g09d2


From 8fb9331391af95ca1f4e5c0a0da8120b13cbae01 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:04:16 +0100
Subject: perfcounters: remove warnings

Impact: remove debug checks

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 7 -------
 include/linux/perf_counter.h       | 4 ----
 kernel/perf_counter.c              | 8 --------
 3 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index bdbdb56eaa3..89fad5d4fb3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -64,7 +64,6 @@ x86_perf_counter_update(struct perf_counter *counter,
 {
 	u64 prev_raw_count, new_raw_count, delta;
 
-	WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE);
 	/*
 	 * Careful: an NMI might modify the previous counter value.
 	 *
@@ -89,7 +88,6 @@ again:
 	 * of the count, so we do that by clipping the delta to 32 bits:
 	 */
 	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
-	WARN_ON_ONCE((int)delta < 0);
 
 	atomic64_add(delta, &counter->count);
 	atomic64_sub(delta, &hwc->period_left);
@@ -193,7 +191,6 @@ __x86_perf_counter_disable(struct perf_counter *counter,
 	int err;
 
 	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
-	WARN_ON_ONCE(err);
 }
 
 static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
@@ -209,8 +206,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 	s32 left = atomic64_read(&hwc->period_left);
 	s32 period = hwc->irq_period;
 
-	WARN_ON_ONCE(period <= 0);
-
 	/*
 	 * If we are way outside a reasoable range then just skip forward:
 	 */
@@ -224,8 +219,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 		atomic64_set(&hwc->period_left, left);
 	}
 
-	WARN_ON_ONCE(left <= 0);
-
 	per_cpu(prev_left[idx], smp_processor_id()) = left;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f30486fc55d..d038450de87 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -218,8 +218,6 @@ struct perf_cpu_context {
 extern int perf_max_counters;
 
 #ifdef CONFIG_PERF_COUNTERS
-extern void
-perf_counter_show(struct perf_counter *counter, char *str, int trace);
 extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
@@ -237,8 +235,6 @@ extern int perf_counter_task_enable(void);
 
 #else
 static inline void
-perf_counter_show(struct perf_counter *counter, char *str, int trace)   { }
-static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
 static inline void
 perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5431e790b5d..aab6c123b02 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -861,8 +861,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
@@ -906,8 +904,6 @@ static void page_faults_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
@@ -954,8 +950,6 @@ static void context_switches_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
@@ -1000,8 +994,6 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
 	atomic64_set(&counter->hw.prev_count, now);
 
 	delta = now - prev;
-	if (WARN_ON_ONCE(delta < 0))
-		delta = 0;
 
 	atomic64_add(delta, &counter->count);
 }
-- 
cgit v1.2.3-70-g09d2


From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 09:09:13 +0100
Subject: x86, perfcounters: prepare for fixed-mode PMCs

Impact: refactor the x86 code for fixed-mode PMCs

Extend the data structures and rename the existing facilities
to allow for a 'generic' versus 'fixed' counter distinction.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_counter.h | 11 ++++++++
 arch/x86/kernel/cpu/perf_counter.c  | 53 ++++++++++++++++++-------------------
 include/linux/perf_counter.h        |  1 +
 3 files changed, 38 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 9dadce1124e..dd5a4a559e2 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -1,6 +1,13 @@
 #ifndef _ASM_X86_PERF_COUNTER_H
 #define _ASM_X86_PERF_COUNTER_H
 
+/*
+ * Performance counter hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC					8
+#define X86_PMC_MAX_FIXED					3
+
 #define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
 
@@ -20,6 +27,10 @@
 
 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
 
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
 union cpuid10_eax {
 	struct {
 		unsigned int version_id:8;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a4a3a09a654..fc3af868823 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -27,13 +27,12 @@ static bool perf_counters_initialized __read_mostly;
 static int nr_hw_counters __read_mostly;
 static u32 perf_counter_mask __read_mostly;
 
-/* No support for fixed function counters yet */
-
-#define MAX_HW_COUNTERS		8
-
 struct cpu_hw_counters {
-	struct perf_counter	*counters[MAX_HW_COUNTERS];
-	unsigned long		used[BITS_TO_LONGS(MAX_HW_COUNTERS)];
+	struct perf_counter	*generic[X86_PMC_MAX_GENERIC];
+	unsigned long		used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)];
+
+	struct perf_counter	*fixed[X86_PMC_MAX_FIXED];
+	unsigned long		used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)];
 };
 
 /*
@@ -185,7 +184,7 @@ void hw_perf_restore(u64 ctrl)
 EXPORT_SYMBOL_GPL(hw_perf_restore);
 
 static inline void
-__x86_perf_counter_disable(struct perf_counter *counter,
+__pmc_generic_disable(struct perf_counter *counter,
 			   struct hw_perf_counter *hwc, unsigned int idx)
 {
 	int err;
@@ -193,7 +192,7 @@ __x86_perf_counter_disable(struct perf_counter *counter,
 	err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
 }
 
-static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]);
+static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
@@ -231,7 +230,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter,
 }
 
 static void
-__x86_perf_counter_enable(struct perf_counter *counter,
+__pmc_generic_enable(struct perf_counter *counter,
 			  struct hw_perf_counter *hwc, int idx)
 {
 	wrmsr(hwc->config_base + idx,
@@ -241,7 +240,7 @@ __x86_perf_counter_enable(struct perf_counter *counter,
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
-static void x86_perf_counter_enable(struct perf_counter *counter)
+static void pmc_generic_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -256,12 +255,12 @@ static void x86_perf_counter_enable(struct perf_counter *counter)
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__x86_perf_counter_disable(counter, hwc, idx);
+	__pmc_generic_disable(counter, hwc, idx);
 
-	cpuc->counters[idx] = counter;
+	cpuc->generic[idx] = counter;
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
-	__x86_perf_counter_enable(counter, hwc, idx);
+	__pmc_generic_enable(counter, hwc, idx);
 }
 
 void perf_counter_print_debug(void)
@@ -301,16 +300,16 @@ void perf_counter_print_debug(void)
 	local_irq_enable();
 }
 
-static void x86_perf_counter_disable(struct perf_counter *counter)
+static void pmc_generic_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__x86_perf_counter_disable(counter, hwc, idx);
+	__pmc_generic_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
-	cpuc->counters[idx] = NULL;
+	cpuc->generic[idx] = NULL;
 
 	/*
 	 * Drain the remaining delta count out of a counter
@@ -349,7 +348,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	__hw_perf_counter_set_period(counter, hwc, idx);
 
 	if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE)
-		__x86_perf_counter_enable(counter, hwc, idx);
+		__pmc_generic_enable(counter, hwc, idx);
 }
 
 static void
@@ -392,7 +391,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 again:
 	ack = status;
 	for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) {
-		struct perf_counter *counter = cpuc->counters[bit];
+		struct perf_counter *counter = cpuc->generic[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
 		if (!counter)
@@ -412,7 +411,7 @@ again:
 		}
 		/*
 		 * From NMI context we cannot call into the scheduler to
-		 * do a task wakeup - but we mark these counters as
+		 * do a task wakeup - but we mark these generic as
 		 * wakeup_pending and initate a wakeup callback:
 		 */
 		if (nmi) {
@@ -462,7 +461,7 @@ void perf_counter_notify(struct pt_regs *regs)
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	for_each_bit(bit, cpuc->used, nr_hw_counters) {
-		struct perf_counter *counter = cpuc->counters[bit];
+		struct perf_counter *counter = cpuc->generic[bit];
 
 		if (!counter)
 			continue;
@@ -539,10 +538,10 @@ void __init init_hw_perf_counters(void)
 	printk(KERN_INFO "... version:      %d\n", eax.split.version_id);
 	printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters);
 	nr_hw_counters = eax.split.num_counters;
-	if (nr_hw_counters > MAX_HW_COUNTERS) {
-		nr_hw_counters = MAX_HW_COUNTERS;
+	if (nr_hw_counters > X86_PMC_MAX_GENERIC) {
+		nr_hw_counters = X86_PMC_MAX_GENERIC;
 		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-			nr_hw_counters, MAX_HW_COUNTERS);
+			nr_hw_counters, X86_PMC_MAX_GENERIC);
 	}
 	perf_counter_mask = (1 << nr_hw_counters) - 1;
 	perf_max_counters = nr_hw_counters;
@@ -556,15 +555,15 @@ void __init init_hw_perf_counters(void)
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-static void x86_perf_counter_read(struct perf_counter *counter)
+static void pmc_generic_read(struct perf_counter *counter)
 {
 	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 }
 
 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
-	.hw_perf_counter_enable		= x86_perf_counter_enable,
-	.hw_perf_counter_disable	= x86_perf_counter_disable,
-	.hw_perf_counter_read		= x86_perf_counter_read,
+	.hw_perf_counter_enable		= pmc_generic_enable,
+	.hw_perf_counter_disable	= pmc_generic_disable,
+	.hw_perf_counter_read		= pmc_generic_read,
 };
 
 const struct hw_perf_counter_ops *
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d038450de87..984da540224 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,6 +14,7 @@
 #define _LINUX_PERF_COUNTER_H
 
 #include <asm/atomic.h>
+#include <asm/perf_counter.h>
 
 #include <linux/list.h>
 #include <linux/mutex.h>
-- 
cgit v1.2.3-70-g09d2


From 7671581f1666ef4b54a1c1e598c51ac44c060a9b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 14:20:28 +0100
Subject: perfcounters: hw ops rename

Impact: rename field names

Shorten them.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  6 ++---
 include/linux/perf_counter.h       |  6 ++---
 kernel/perf_counter.c              | 50 +++++++++++++++++++-------------------
 3 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 358af526640..b6755712142 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -577,9 +577,9 @@ static void pmc_generic_read(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
-	.hw_perf_counter_enable		= pmc_generic_enable,
-	.hw_perf_counter_disable	= pmc_generic_disable,
-	.hw_perf_counter_read		= pmc_generic_read,
+	.enable		= pmc_generic_enable,
+	.disable	= pmc_generic_disable,
+	.read		= pmc_generic_read,
 };
 
 const struct hw_perf_counter_ops *
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 984da540224..48f76d2e54c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -128,9 +128,9 @@ struct perf_counter;
  * struct hw_perf_counter_ops - performance counter hw ops
  */
 struct hw_perf_counter_ops {
-	void (*hw_perf_counter_enable)	(struct perf_counter *counter);
-	void (*hw_perf_counter_disable)	(struct perf_counter *counter);
-	void (*hw_perf_counter_read)	(struct perf_counter *counter);
+	void (*enable)			(struct perf_counter *counter);
+	void (*disable)			(struct perf_counter *counter);
+	void (*read)			(struct perf_counter *counter);
 };
 
 /**
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f8a4d9a5d5d..961d651aa57 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -109,7 +109,7 @@ static void __perf_counter_remove_from_context(void *info)
 	spin_lock_irqsave(&ctx->lock, flags);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		counter->hw_ops->hw_perf_counter_disable(counter);
+		counter->hw_ops->disable(counter);
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		ctx->nr_active--;
 		cpuctx->active_oncpu--;
@@ -226,7 +226,7 @@ static void __perf_install_in_context(void *info)
 		counter->oncpu = cpu;
 		ctx->nr_active++;
 		cpuctx->active_oncpu++;
-		counter->hw_ops->hw_perf_counter_enable(counter);
+		counter->hw_ops->enable(counter);
 	}
 
 	if (!ctx->task && cpuctx->max_pertask)
@@ -297,7 +297,7 @@ counter_sched_out(struct perf_counter *counter,
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return;
 
-	counter->hw_ops->hw_perf_counter_disable(counter);
+	counter->hw_ops->disable(counter);
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->oncpu = -1;
 
@@ -327,7 +327,7 @@ group_sched_out(struct perf_counter *group_counter,
  *
  * We stop each counter and update the counter value in counter->count.
  *
- * This does not protect us against NMI, but hw_perf_counter_disable()
+ * This does not protect us against NMI, but disable()
  * sets the disabled bit in the control field of counter _before_
  * accessing the counter control register. If a NMI hits, then it will
  * not restart the counter.
@@ -359,7 +359,7 @@ counter_sched_in(struct perf_counter *counter,
 	if (counter->state == PERF_COUNTER_STATE_OFF)
 		return;
 
-	counter->hw_ops->hw_perf_counter_enable(counter);
+	counter->hw_ops->enable(counter);
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
@@ -395,7 +395,7 @@ group_sched_in(struct perf_counter *group_counter,
  *
  * We restore the counter value and then enable it.
  *
- * This does not protect us against NMI, but hw_perf_counter_enable()
+ * This does not protect us against NMI, but enable()
  * sets the enabled bit in the control field of counter _before_
  * accessing the counter control register. If a NMI hits, then it will
  * keep the counter running.
@@ -537,11 +537,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 /*
  * Cross CPU call to read the hardware counter
  */
-static void __hw_perf_counter_read(void *info)
+static void __read(void *info)
 {
 	struct perf_counter *counter = info;
 
-	counter->hw_ops->hw_perf_counter_read(counter);
+	counter->hw_ops->read(counter);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -552,7 +552,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	 */
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
-					 __hw_perf_counter_read, counter, 1);
+					 __read, counter, 1);
 	}
 
 	return atomic64_read(&counter->count);
@@ -855,9 +855,9 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
-	.hw_perf_counter_enable		= cpu_clock_perf_counter_enable,
-	.hw_perf_counter_disable	= cpu_clock_perf_counter_disable,
-	.hw_perf_counter_read		= cpu_clock_perf_counter_read,
+	.enable		= cpu_clock_perf_counter_enable,
+	.disable	= cpu_clock_perf_counter_disable,
+	.read		= cpu_clock_perf_counter_read,
 };
 
 static void task_clock_perf_counter_update(struct perf_counter *counter)
@@ -891,9 +891,9 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
-	.hw_perf_counter_enable		= task_clock_perf_counter_enable,
-	.hw_perf_counter_disable	= task_clock_perf_counter_disable,
-	.hw_perf_counter_read		= task_clock_perf_counter_read,
+	.enable		= task_clock_perf_counter_enable,
+	.disable	= task_clock_perf_counter_disable,
+	.read		= task_clock_perf_counter_read,
 };
 
 static u64 get_page_faults(void)
@@ -937,9 +937,9 @@ static void page_faults_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_page_faults = {
-	.hw_perf_counter_enable		= page_faults_perf_counter_enable,
-	.hw_perf_counter_disable	= page_faults_perf_counter_disable,
-	.hw_perf_counter_read		= page_faults_perf_counter_read,
+	.enable		= page_faults_perf_counter_enable,
+	.disable	= page_faults_perf_counter_disable,
+	.read		= page_faults_perf_counter_read,
 };
 
 static u64 get_context_switches(void)
@@ -983,9 +983,9 @@ static void context_switches_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_context_switches = {
-	.hw_perf_counter_enable		= context_switches_perf_counter_enable,
-	.hw_perf_counter_disable	= context_switches_perf_counter_disable,
-	.hw_perf_counter_read		= context_switches_perf_counter_read,
+	.enable		= context_switches_perf_counter_enable,
+	.disable	= context_switches_perf_counter_disable,
+	.read		= context_switches_perf_counter_read,
 };
 
 static inline u64 get_cpu_migrations(void)
@@ -1027,9 +1027,9 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
-	.hw_perf_counter_enable		= cpu_migrations_perf_counter_enable,
-	.hw_perf_counter_disable	= cpu_migrations_perf_counter_disable,
-	.hw_perf_counter_read		= cpu_migrations_perf_counter_read,
+	.enable		= cpu_migrations_perf_counter_enable,
+	.disable	= cpu_migrations_perf_counter_disable,
+	.read		= cpu_migrations_perf_counter_read,
 };
 
 static const struct hw_perf_counter_ops *
@@ -1283,7 +1283,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		child_counter->hw_ops->hw_perf_counter_disable(child_counter);
+		child_counter->hw_ops->disable(child_counter);
 		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
 		child_counter->oncpu = -1;
 
-- 
cgit v1.2.3-70-g09d2


From aa9c4c0f967fdb482ea95e8473ec3d201e6e0781 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 17 Dec 2008 14:10:57 +0100
Subject: perfcounters: fix task clock counter

Impact: fix per task clock counter precision

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h |  8 ++++++
 kernel/exit.c               | 17 +++++++----
 kernel/perf_counter.c       | 70 ++++++++++++++++++++++++++++++++++-----------
 kernel/sched.c              | 49 +++++++++++++++++++++++++++++--
 4 files changed, 120 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4a145caeee0..1b2e3242497 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 	return sum;
 }
 
+
+/*
+ * Lock/unlock the current runqueue - to extract task statistics:
+ */
+extern void curr_rq_lock_irq_save(unsigned long *flags);
+extern void curr_rq_unlock_irq_restore(unsigned long *flags);
+extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
 extern unsigned long long task_delta_exec(struct task_struct *);
+
 extern void account_user_time(struct task_struct *, cputime_t);
 extern void account_user_time_scaled(struct task_struct *, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/kernel/exit.c b/kernel/exit.c
index d336c90a5f1..244edfd9686 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	forget_original_parent(tsk);
 	exit_task_namespaces(tsk);
 
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(tsk);
+
 	write_lock_irq(&tasklist_lock);
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code)
 	mpol_put(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 #endif
-	/*
-	 * These must happen late, after the PID is not
-	 * hashed anymore, but still at a point that may sleep:
-	 */
-	perf_counter_exit_task(tsk);
 #ifdef CONFIG_FUTEX
 	if (unlikely(!list_empty(&tsk->pi_state_list)))
 		exit_pi_state_list(tsk);
@@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	/*
+	 * These must happen late, after the PID is not
+	 * hashed anymore, but still at a point that may sleep:
+	 */
+	perf_counter_exit_task(tsk);
+
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 961d651aa57..f1110ac1267 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
 
 /*
@@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		counter->hw_ops->disable(counter);
@@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
 }
 
 
@@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	spin_lock_irqsave(&ctx->lock, flags);
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info)
 	if (!ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
 }
 
 /*
@@ -438,15 +443,19 @@ int perf_counter_task_disable(void)
 	struct task_struct *curr = current;
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
+	unsigned long flags;
 	u64 perf_flags;
 	int cpu;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	local_irq_disable();
+	curr_rq_lock_irq_save(&flags);
 	cpu = smp_processor_id();
 
+	/* force the update of the task clock: */
+	__task_delta_exec(curr, 1);
+
 	perf_counter_task_sched_out(curr, cpu);
 
 	spin_lock(&ctx->lock);
@@ -463,7 +472,7 @@ int perf_counter_task_disable(void)
 
 	spin_unlock(&ctx->lock);
 
-	local_irq_enable();
+	curr_rq_unlock_irq_restore(&flags);
 
 	return 0;
 }
@@ -473,15 +482,19 @@ int perf_counter_task_enable(void)
 	struct task_struct *curr = current;
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
+	unsigned long flags;
 	u64 perf_flags;
 	int cpu;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	local_irq_disable();
+	curr_rq_lock_irq_save(&flags);
 	cpu = smp_processor_id();
 
+	/* force the update of the task clock: */
+	__task_delta_exec(curr, 1);
+
 	spin_lock(&ctx->lock);
 
 	/*
@@ -493,6 +506,7 @@ int perf_counter_task_enable(void)
 		if (counter->state != PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->hw_event.disabled = 0;
 	}
 	hw_perf_restore(perf_flags);
 
@@ -500,7 +514,7 @@ int perf_counter_task_enable(void)
 
 	perf_counter_task_sched_in(curr, cpu);
 
-	local_irq_enable();
+	curr_rq_unlock_irq_restore(&flags);
 
 	return 0;
 }
@@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 static void __read(void *info)
 {
 	struct perf_counter *counter = info;
+	unsigned long flags;
 
+	curr_rq_lock_irq_save(&flags);
 	counter->hw_ops->read(counter);
+	curr_rq_unlock_irq_restore(&flags);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 	.read		= cpu_clock_perf_counter_read,
 };
 
-static void task_clock_perf_counter_update(struct perf_counter *counter)
+/*
+ * Called from within the scheduler:
+ */
+static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
 {
-	u64 prev, now;
+	struct task_struct *curr = counter->task;
+	u64 delta;
+
+	WARN_ON_ONCE(counter->task != current);
+
+	delta = __task_delta_exec(curr, update);
+
+	return curr->se.sum_exec_runtime + delta;
+}
+
+static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
+{
+	u64 prev;
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = current->se.sum_exec_runtime;
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
 
 static void task_clock_perf_counter_read(struct perf_counter *counter)
 {
-	task_clock_perf_counter_update(counter);
+	u64 now = task_clock_perf_counter_val(counter, 1);
+
+	task_clock_perf_counter_update(counter, now);
 }
 
 static void task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime);
+	u64 now = task_clock_perf_counter_val(counter, 0);
+
+	atomic64_set(&counter->hw.prev_count, now);
 }
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	task_clock_perf_counter_update(counter);
+	u64 now = task_clock_perf_counter_val(counter, 0);
+
+	task_clock_perf_counter_update(counter, now);
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
@@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child,
 {
 	struct perf_counter *parent_counter;
 	u64 parent_val, child_val;
+	unsigned long flags;
 	u64 perf_flags;
 
 	/*
@@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	 * Be careful about zapping the list - IRQ/NMI context
 	 * could still be processing it:
 	 */
-	local_irq_disable();
+	curr_rq_lock_irq_save(&flags);
 	perf_flags = hw_perf_save_disable();
 
 	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
@@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	list_del_init(&child_counter->list_entry);
 
 	hw_perf_restore(perf_flags);
-	local_irq_enable();
+	curr_rq_unlock_irq_restore(&flags);
 
 	parent_counter = child_counter->parent;
 	/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 382cfdb5e38..4d84ff4c877 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq)
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
 {
 	rq->clock = sched_clock_cpu(cpu_of(rq));
 }
@@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 	}
 }
 
+void curr_rq_lock_irq_save(unsigned long *flags)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	local_irq_save(*flags);
+	rq = cpu_rq(smp_processor_id());
+	spin_lock(&rq->lock);
+}
+
+void curr_rq_unlock_irq_restore(unsigned long *flags)
+	__releases(rq->lock)
+{
+	struct rq *rq;
+
+	rq = cpu_rq(smp_processor_id());
+	spin_unlock(&rq->lock);
+	local_irq_restore(*flags);
+}
+
 void task_rq_unlock_wait(struct task_struct *p)
 {
 	struct rq *rq = task_rq(p);
@@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
 		    struct task_struct *next)
 {
 	fire_sched_out_preempt_notifiers(prev, next);
-	perf_counter_task_sched_out(prev, cpu_of(rq));
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -4089,6 +4108,29 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ */
+unsigned long long __task_delta_exec(struct task_struct *p, int update)
+{
+	s64 delta_exec;
+	struct rq *rq;
+
+	rq = task_rq(p);
+	WARN_ON_ONCE(!runqueue_is_locked());
+	WARN_ON_ONCE(!task_current(rq, p));
+
+	if (update)
+		update_rq_clock(rq);
+
+	delta_exec = rq->clock - p->se.exec_start;
+
+	WARN_ON_ONCE(delta_exec < 0);
+
+	return delta_exec;
+}
+
 /*
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
@@ -4316,13 +4358,13 @@ void scheduler_tick(void)
 	update_rq_clock(rq);
 	update_cpu_load(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
+	perf_counter_task_tick(curr, cpu);
 	spin_unlock(&rq->lock);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
 #endif
-	perf_counter_task_tick(curr, cpu);
 }
 
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
@@ -4512,6 +4554,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
+		perf_counter_task_sched_out(prev, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
-- 
cgit v1.2.3-70-g09d2


From eef6cbf5844c620d9db9be99e4908cdf92492fb9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 19 Dec 2008 10:20:42 +0100
Subject: perfcounters: pull inherited counters

Change counter inheritance from a 'push' to a 'pull' model: instead of
child tasks pushing their final counts to the parent, reuse the wait4
infrastructure to pull counters as child tasks are exit-processed,
much like how cutime/cstime is collected.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h |  9 +++++++++
 kernel/exit.c             | 21 +++++++++------------
 2 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 23fd8909b9e..54fa2fa2c8e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -113,6 +113,14 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_INIT_EFF_SET
 #endif
 
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk)					\
+	.perf_counter_ctx.counter_list =				\
+		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -180,6 +188,7 @@ extern struct group_info init_groups;
 	INIT_IDS							\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
+	INIT_PERF_COUNTERS(tsk)						\
 }
 
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 244edfd9686..101b7eeff44 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -153,6 +153,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
+#ifdef CONFIG_PERF_COUNTERS
+	WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+#endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
 }
@@ -922,12 +925,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	forget_original_parent(tsk);
 	exit_task_namespaces(tsk);
 
-	/*
-	 * Flush inherited counters to the parent - before the parent
-	 * gets woken up by child-exit notifications.
-	 */
-	perf_counter_exit_task(tsk);
-
 	write_lock_irq(&tasklist_lock);
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1122,12 +1119,6 @@ NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
-	/*
-	 * These must happen late, after the PID is not
-	 * hashed anymore, but still at a point that may sleep:
-	 */
-	perf_counter_exit_task(tsk);
-
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
@@ -1371,6 +1362,12 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	 */
 	read_unlock(&tasklist_lock);
 
+	/*
+	 * Flush inherited counters to the parent - before the parent
+	 * gets woken up by child-exit notifications.
+	 */
+	perf_counter_exit_task(p);
+
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 		? p->signal->group_exit_code : p->exit_code;
-- 
cgit v1.2.3-70-g09d2


From 78b6084c907cea15bb40a564b974e072f5163781 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 21 Dec 2008 15:07:49 +0100
Subject: perfcounters: fix init context lock

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 54fa2fa2c8e..467cff545c3 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -116,7 +116,9 @@ extern struct group_info init_groups;
 #ifdef CONFIG_PERF_COUNTERS
 # define INIT_PERF_COUNTERS(tsk)					\
 	.perf_counter_ctx.counter_list =				\
-		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),
+		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
+	.perf_counter_ctx.lock =					\
+		__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
 #else
 # define INIT_PERF_COUNTERS(tsk)
 #endif
-- 
cgit v1.2.3-70-g09d2


From 95cdd2e7851cce79ab839cb0b3cbe68d7911d0f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 21 Dec 2008 13:50:42 +0100
Subject: perfcounters: enable lowlevel pmc code to schedule counters

Allow lowlevel ->enable() op to return an error if a counter can not be
added. This can be used to handle counter constraints.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  6 +++-
 include/linux/perf_counter.h       |  2 +-
 kernel/perf_counter.c              | 62 +++++++++++++++++++++++++++-----------
 3 files changed, 51 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b6755712142..74090a393a7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -244,7 +244,7 @@ static int fixed_mode_idx(struct hw_perf_counter *hwc)
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
-static void pmc_generic_enable(struct perf_counter *counter)
+static int pmc_generic_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -253,6 +253,8 @@ static void pmc_generic_enable(struct perf_counter *counter)
 	/* Try to get the previous counter again */
 	if (test_and_set_bit(idx, cpuc->used)) {
 		idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
+		if (idx == nr_counters_generic)
+			return -EAGAIN;
 		set_bit(idx, cpuc->used);
 		hwc->idx = idx;
 	}
@@ -265,6 +267,8 @@ static void pmc_generic_enable(struct perf_counter *counter)
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
 	__pmc_generic_enable(counter, hwc, idx);
+
+	return 0;
 }
 
 void perf_counter_print_debug(void)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 48f76d2e54c..53af11d3767 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -128,7 +128,7 @@ struct perf_counter;
  * struct hw_perf_counter_ops - performance counter hw ops
  */
 struct hw_perf_counter_ops {
-	void (*enable)			(struct perf_counter *counter);
+	int (*enable)			(struct perf_counter *counter);
 	void (*disable)			(struct perf_counter *counter);
 	void (*read)			(struct perf_counter *counter);
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f1110ac1267..2e73929a695 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -355,21 +355,25 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 	cpuctx->task_ctx = NULL;
 }
 
-static void
+static int
 counter_sched_in(struct perf_counter *counter,
 		 struct perf_cpu_context *cpuctx,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
 	if (counter->state == PERF_COUNTER_STATE_OFF)
-		return;
+		return 0;
+
+	if (counter->hw_ops->enable(counter))
+		return -EAGAIN;
 
-	counter->hw_ops->enable(counter);
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
 
 	cpuctx->active_oncpu++;
 	ctx->nr_active++;
+
+	return 0;
 }
 
 static int
@@ -378,20 +382,38 @@ group_sched_in(struct perf_counter *group_counter,
 	       struct perf_counter_context *ctx,
 	       int cpu)
 {
-	struct perf_counter *counter;
-	int was_group = 0;
+	struct perf_counter *counter, *partial_group;
+	int ret = 0;
 
-	counter_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
+		return -EAGAIN;
 
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
-		counter_sched_in(counter, cpuctx, ctx, cpu);
-		was_group = 1;
+		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
+			partial_group = counter;
+			goto group_error;
+		}
+		ret = -EAGAIN;
 	}
 
-	return was_group;
+	return ret;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		if (counter == partial_group)
+			break;
+		counter_sched_out(counter, cpuctx, ctx);
+	}
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	return -EAGAIN;
 }
 
 /*
@@ -416,9 +438,6 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 
 	spin_lock(&ctx->lock);
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (ctx->nr_active == cpuctx->max_pertask)
-			break;
-
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of counters:
@@ -856,8 +875,9 @@ static const struct file_operations perf_fops = {
 	.poll			= perf_poll,
 };
 
-static void cpu_clock_perf_counter_enable(struct perf_counter *counter)
+static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 {
+	return 0;
 }
 
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
@@ -913,11 +933,13 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 	task_clock_perf_counter_update(counter, now);
 }
 
-static void task_clock_perf_counter_enable(struct perf_counter *counter)
+static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
 	u64 now = task_clock_perf_counter_val(counter, 0);
 
 	atomic64_set(&counter->hw.prev_count, now);
+
+	return 0;
 }
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
@@ -960,12 +982,14 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 	page_faults_perf_counter_update(counter);
 }
 
-static void page_faults_perf_counter_enable(struct perf_counter *counter)
+static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
 	/*
 	 * page-faults is a per-task value already,
 	 * so we dont have to clear it on switch-in.
 	 */
+
+	return 0;
 }
 
 static void page_faults_perf_counter_disable(struct perf_counter *counter)
@@ -1006,12 +1030,14 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 	context_switches_perf_counter_update(counter);
 }
 
-static void context_switches_perf_counter_enable(struct perf_counter *counter)
+static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
 	/*
 	 * ->nvcsw + curr->nivcsw is a per-task value already,
 	 * so we dont have to clear it on switch-in.
 	 */
+
+	return 0;
 }
 
 static void context_switches_perf_counter_disable(struct perf_counter *counter)
@@ -1050,12 +1076,14 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 	cpu_migrations_perf_counter_update(counter);
 }
 
-static void cpu_migrations_perf_counter_enable(struct perf_counter *counter)
+static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
 	/*
 	 * se.nr_migrations is a per-task value already,
 	 * so we dont have to clear it on switch-in.
 	 */
+
+	return 0;
 }
 
 static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
-- 
cgit v1.2.3-70-g09d2


From 8fe91e61cdc407c7556d3cd71cf20141a25bbcea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:29:25 +0100
Subject: perfcounters: remove ->nr_inherited

Impact: remove dead code

nr_inherited was not maintained correctly (not decremented) - and also
not used - remove it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 1 -
 kernel/perf_counter.c        | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 53af11d3767..1ea08e9f31c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -164,7 +164,6 @@ struct perf_counter {
 	struct task_struct		*task;
 	struct file			*filp;
 
-	unsigned int			nr_inherited;
 	struct perf_counter		*parent;
 	/*
 	 * Protect attach/detach:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2e73929a695..48e1dbcdc1c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1308,7 +1308,6 @@ inherit_counter(struct perf_counter *parent_counter,
 	child_ctx->nr_counters++;
 
 	child_counter->parent = parent_counter;
-	parent_counter->nr_inherited++;
 	/*
 	 * inherit into child's child as well:
 	 */
-- 
cgit v1.2.3-70-g09d2


From f650a672359819454c3d8d4135ecd1558cde0b24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Dec 2008 12:17:29 +0100
Subject: perfcounters: add PERF_COUNT_BUS_CYCLES

Generalize "bus cycles" hw events - and map them to CPU_CLK_Unhalted.Ref
on x86. (which is a good enough approximation)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 3 ++-
 include/linux/perf_counter.h       | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f3359c2b391..86b2fdd344a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -41,12 +41,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
 static const int intel_perfmon_event_map[] =
 {
-  [PERF_COUNT_CYCLES]			= 0x003c,
+  [PERF_COUNT_CPU_CYCLES]		= 0x003c,
   [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
   [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
   [PERF_COUNT_CACHE_MISSES]		= 0x412e,
   [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
   [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_BUS_CYCLES]		= 0x013c,
 };
 
 static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1ea08e9f31c..ec77d1643d3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -36,14 +36,15 @@ enum hw_event_types {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CYCLES		=  0,
+	PERF_COUNT_CPU_CYCLES		=  0,
 	PERF_COUNT_INSTRUCTIONS		=  1,
 	PERF_COUNT_CACHE_REFERENCES	=  2,
 	PERF_COUNT_CACHE_MISSES		=  3,
 	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
 	PERF_COUNT_BRANCH_MISSES	=  5,
+	PERF_COUNT_BUS_CYCLES		=  6,
 
-	PERF_HW_EVENTS_MAX		=  6,
+	PERF_HW_EVENTS_MAX		=  7,
 
 	/*
 	 * Special "software" counters provided by the kernel, even if
-- 
cgit v1.2.3-70-g09d2


From e44aef58ecfbe061eb4c53b939bcc35fb1bee82d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 25 Dec 2008 09:02:11 +0100
Subject: perfcounters: include asm/perf_counter.h only if
 CONFIG_PERF_COUNTERS=y

Impact: build fix on ia64

KOSAKI Motohiro reported that -tip doesnt build on ia64 because
asm/perf_counter.h only exists on x86 for now. Fix it.

Reported-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index ec77d1643d3..cc3a75a239a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,7 +14,10 @@
 #define _LINUX_PERF_COUNTER_H
 
 #include <asm/atomic.h>
-#include <asm/perf_counter.h>
+
+#ifdef CONFIG_PERF_COUNTERS
+# include <asm/perf_counter.h>
+#endif
 
 #include <linux/list.h>
 #include <linux/mutex.h>
-- 
cgit v1.2.3-70-g09d2


From 3cbed429a9ccdb7a243f733b1056fe5c39e9004c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:43:42 +1100
Subject: perf_counter: Add optional hw_perf_group_sched_in arch function

Impact: extend perf_counter infrastructure

This adds an optional hw_perf_group_sched_in() arch function that enables
a whole group of counters in one go.  It returns 1 if it added the group
successfully, 0 if it did nothing (and therefore the core needs to add
the counters individually), or a negative number if an error occurred.
It should add all the counters and enable any software counters in the
group, or else add none of them and return an error.

There are a couple of related changes/improvements in the group handling
here:

* As an optimization, group_sched_out() and group_sched_in() now check the
  state of the group leader, and do nothing if the leader is not active
  or disabled.

* We now call hw_perf_save_disable/hw_perf_restore around the complete
  set of counter enable/disable calls in __perf_counter_sched_in/out,
  to give the arch code the opportunity to defer updating the hardware
  state until the hw_perf_restore call if it wants.

* We no longer stop adding groups after we get to a group that has more
  than one counter.  We will ultimately add an option for a group to be
  exclusive.  The current code doesn't really implement exclusive groups
  anyway, since a group could end up going on with other counters that
  get added before it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_counter.h |  3 +++
 kernel/perf_counter.c        | 31 ++++++++++++++++++++++++++-----
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index cc3a75a239a..b21d1ea4c05 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -236,6 +236,9 @@ extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern int perf_counter_task_disable(void);
 extern int perf_counter_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu);
 
 #else
 static inline void
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b7a027a2ef0..9ad11e44d9a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -47,6 +47,12 @@ hw_perf_counter_init(struct perf_counter *counter)
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
 void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
 void __weak hw_perf_counter_setup(void)		{ barrier(); }
+int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu)
+{
+	return 0;
+}
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
@@ -341,6 +347,9 @@ group_sched_out(struct perf_counter *group_counter,
 {
 	struct perf_counter *counter;
 
+	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
 	counter_sched_out(group_counter, cpuctx, ctx);
 
 	/*
@@ -354,15 +363,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
 	struct perf_counter *counter;
+	u64 flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
 
 	spin_lock(&ctx->lock);
+	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
+	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
 }
 
@@ -402,7 +414,14 @@ group_sched_in(struct perf_counter *group_counter,
 	       int cpu)
 {
 	struct perf_counter *counter, *partial_group;
-	int ret = 0;
+	int ret;
+
+	if (group_counter->state == PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
 
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
@@ -415,10 +434,9 @@ group_sched_in(struct perf_counter *group_counter,
 			partial_group = counter;
 			goto group_error;
 		}
-		ret = -EAGAIN;
 	}
 
-	return ret;
+	return 0;
 
 group_error:
 	/*
@@ -440,11 +458,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
 {
 	struct perf_counter *counter;
+	u64 flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
 
 	spin_lock(&ctx->lock);
+	flags = hw_perf_save_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
@@ -454,12 +474,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			continue;
 
 		/*
-		 * If we scheduled in a group atomically and
-		 * exclusively, break out:
+		 * If we scheduled in a group atomically and exclusively,
+		 * or if this group can't go on, break out:
 		 */
 		if (group_sched_in(counter, cpuctx, ctx, cpu))
 			break;
 	}
+	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 3b6f9e5cb21964b7ce12bf81076f830885563ec8 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 14 Jan 2009 21:00:30 +1100
Subject: perf_counter: Add support for pinned and exclusive counter groups

Impact: New perf_counter features

A pinned counter group is one that the user wants to have on the CPU
whenever possible, i.e. whenever the associated task is running, for
a per-task group, or always for a per-cpu group.  If the system
cannot satisfy that, it puts the group into an error state where
it is not scheduled any more and reads from it return EOF (i.e. 0
bytes read).  The group can be released from error state and made
readable again using prctl(PR_TASK_PERF_COUNTERS_ENABLE).  When we
have finer-grained enable/disable controls on counters we'll be able
to reset the error state on individual groups.

An exclusive group is one that the user wants to be the only group
using the CPU performance monitor hardware whenever it is on.  The
counter group scheduler will not schedule an exclusive group if there
are already other groups on the CPU and will not schedule other groups
onto the CPU if there is an exclusive group scheduled (that statement
does not apply to groups containing only software counters, which can
always go on and which do not prevent an exclusive group from going on).
With an exclusive group, we will be able to let users program PMU
registers at a low level without the concern that those settings will
perturb other measurements.

Along the way this reorganizes things a little:
- is_software_counter() is moved to perf_counter.h.
- cpuctx->active_oncpu now records the number of hardware counters on
  the CPU, i.e. it now excludes software counters.  Nothing was reading
  cpuctx->active_oncpu before, so this change is harmless.
- A new cpuctx->exclusive field records whether we currently have an
  exclusive group on the CPU.
- counter_sched_out moves higher up in perf_counter.c and gets called
  from __perf_counter_remove_from_context and __perf_counter_exit_task,
  where we used to have essentially the same code.
- __perf_counter_sched_in now goes through the counter list twice, doing
  the pinned counters in the first loop and the non-pinned counters in
  the second loop, in order to give the pinned counters the best chance
  to be scheduled in.

Note that only a group leader can be exclusive or pinned, and that
attribute applies to the whole group.  This avoids some awkwardness in
some corner cases (e.g. where a group leader is closed and the other
group members get added to the context list).  If we want to relax that
restriction later, we can, and it is easier to relax a restriction than
to apply a new one.

This doesn't yet handle the case where a pinned counter is inherited
and goes into error state in the child - the error state is not
propagated up to the parent when the child exits, and arguably it
should.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/perf_counter.c |  10 +-
 include/linux/perf_counter.h       |  15 ++-
 kernel/perf_counter.c              | 226 +++++++++++++++++++++++++------------
 3 files changed, 169 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 85ad25923c2..5b0211348c7 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -35,14 +35,6 @@ void perf_counter_print_debug(void)
 {
 }
 
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return !counter->hw_event.raw && counter->hw_event.type < 0;
-}
-
 /*
  * Read one performance monitor counter (PMC).
  */
@@ -443,6 +435,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	 */
 	for (i = n0; i < n0 + n; ++i)
 		cpuhw->counter[i]->hw.config = cpuhw->events[i];
+	cpuctx->active_oncpu += n;
 	n = 1;
 	counter_sched_in(group_leader, cpu);
 	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
@@ -451,7 +444,6 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 			++n;
 		}
 	}
-	cpuctx->active_oncpu += n;
 	ctx->nr_active += n;
 
 	return 1;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index b21d1ea4c05..7ab8e5f96f5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -86,7 +86,10 @@ struct perf_counter_hw_event {
 				nmi	     :  1, /* NMI sampling        */
 				raw	     :  1, /* raw event type      */
 				inherit	     :  1, /* children inherit it */
-				__reserved_1 : 28;
+				pinned	     :  1, /* must always be on PMU */
+				exclusive    :  1, /* only counter on PMU */
+
+				__reserved_1 : 26;
 
 	u64			__reserved_2;
 };
@@ -141,6 +144,7 @@ struct hw_perf_counter_ops {
  * enum perf_counter_active_state - the states of a counter
  */
 enum perf_counter_active_state {
+	PERF_COUNTER_STATE_ERROR	= -2,
 	PERF_COUNTER_STATE_OFF		= -1,
 	PERF_COUNTER_STATE_INACTIVE	=  0,
 	PERF_COUNTER_STATE_ACTIVE	=  1,
@@ -214,6 +218,7 @@ struct perf_cpu_context {
 	struct perf_counter_context	*task_ctx;
 	int				active_oncpu;
 	int				max_pertask;
+	int				exclusive;
 };
 
 /*
@@ -240,6 +245,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu);
 
+/*
+ * Return 1 for a software counter, 0 for a hardware counter
+ */
+static inline int is_software_counter(struct perf_counter *counter)
+{
+	return !counter->hw_event.raw && counter->hw_event.type < 0;
+}
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 52f2f526248..faf671b2956 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	}
 }
 
+static void
+counter_sched_out(struct perf_counter *counter,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_counter_context *ctx)
+{
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->hw_ops->disable(counter);
+	counter->oncpu = -1;
+
+	if (!is_software_counter(counter))
+		cpuctx->active_oncpu--;
+	ctx->nr_active--;
+	if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
@@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info)
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
 
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->hw_ops->disable(counter);
-		ctx->nr_active--;
-		cpuctx->active_oncpu--;
-		counter->task = NULL;
-		counter->oncpu = -1;
-	}
+	counter_sched_out(counter, cpuctx, ctx);
+
+	counter->task = NULL;
 	ctx->nr_counters--;
 
 	/*
@@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter,
 		 struct perf_counter_context *ctx,
 		 int cpu)
 {
-	if (counter->state == PERF_COUNTER_STATE_OFF)
+	if (counter->state <= PERF_COUNTER_STATE_OFF)
 		return 0;
 
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
@@ -223,12 +237,63 @@ counter_sched_in(struct perf_counter *counter,
 		return -EAGAIN;
 	}
 
-	cpuctx->active_oncpu++;
+	if (!is_software_counter(counter))
+		cpuctx->active_oncpu++;
 	ctx->nr_active++;
 
+	if (counter->hw_event.exclusive)
+		cpuctx->exclusive = 1;
+
 	return 0;
 }
 
+/*
+ * Return 1 for a group consisting entirely of software counters,
+ * 0 if the group contains any hardware counters.
+ */
+static int is_software_only_group(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	if (!is_software_counter(leader))
+		return 0;
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		if (!is_software_counter(counter))
+			return 0;
+	return 1;
+}
+
+/*
+ * Work out whether we can put this counter group on the CPU now.
+ */
+static int group_can_go_on(struct perf_counter *counter,
+			   struct perf_cpu_context *cpuctx,
+			   int can_add_hw)
+{
+	/*
+	 * Groups consisting entirely of software counters can always go on.
+	 */
+	if (is_software_only_group(counter))
+		return 1;
+	/*
+	 * If an exclusive group is already on, no other hardware
+	 * counters can go on.
+	 */
+	if (cpuctx->exclusive)
+		return 0;
+	/*
+	 * If this group is exclusive and there are already
+	 * counters on the CPU, it can't go on.
+	 */
+	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
+		return 0;
+	/*
+	 * Otherwise, try to add it if all previous groups were able
+	 * to go on.
+	 */
+	return can_add_hw;
+}
+
 /*
  * Cross CPU call to install and enable a performance counter
  */
@@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info)
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	u64 perf_flags;
+	int err;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info)
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
 
-	counter_sched_in(counter, cpuctx, ctx, cpu);
+	/*
+	 * An exclusive counter can't go on if there are already active
+	 * hardware counters, and no hardware counter can go on if there
+	 * is already an exclusive counter on.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
+	    !group_can_go_on(counter, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = counter_sched_in(counter, cpuctx, ctx, cpu);
+
+	if (err && counter->hw_event.pinned)
+		counter->state = PERF_COUNTER_STATE_ERROR;
 
-	if (!ctx->task && cpuctx->max_pertask)
+	if (!err && !ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
 	hw_perf_restore(perf_flags);
@@ -326,22 +404,6 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
-static void
-counter_sched_out(struct perf_counter *counter,
-		  struct perf_cpu_context *cpuctx,
-		  struct perf_counter_context *ctx)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->hw_ops->disable(counter);
-	counter->oncpu = -1;
-
-	cpuctx->active_oncpu--;
-	ctx->nr_active--;
-}
-
 static void
 group_sched_out(struct perf_counter *group_counter,
 		struct perf_cpu_context *cpuctx,
@@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter,
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
 		counter_sched_out(counter, cpuctx, ctx);
+
+	if (group_counter->hw_event.exclusive)
+		cpuctx->exclusive = 0;
 }
 
 void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -455,30 +520,6 @@ group_error:
 	return -EAGAIN;
 }
 
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return !counter->hw_event.raw && counter->hw_event.type < 0;
-}
-
-/*
- * Return 1 for a group consisting entirely of software counters,
- * 0 if the group contains any hardware counters.
- */
-static int is_software_only_group(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	if (!is_software_counter(leader))
-		return 0;
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
-		if (!is_software_counter(counter))
-			return 0;
-	return 1;
-}
-
 static void
 __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
@@ -492,7 +533,38 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 
 	spin_lock(&ctx->lock);
 	flags = hw_perf_save_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state <= PERF_COUNTER_STATE_OFF ||
+		    !counter->hw_event.pinned)
+			continue;
+		if (counter->cpu != -1 && counter->cpu != cpu)
+			continue;
+
+		if (group_can_go_on(counter, cpuctx, 1))
+			group_sched_in(counter, cpuctx, ctx, cpu);
+
+		/*
+		 * If this pinned group hasn't been scheduled,
+		 * put it in error state.
+		 */
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+			counter->state = PERF_COUNTER_STATE_ERROR;
+	}
+
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		/*
+		 * Ignore counters in OFF or ERROR state, and
+		 * ignore pinned counters since we did them already.
+		 */
+		if (counter->state <= PERF_COUNTER_STATE_OFF ||
+		    counter->hw_event.pinned)
+			continue;
+
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of counters:
@@ -500,14 +572,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
 
-		/*
-		 * If we scheduled in a group atomically and exclusively,
-		 * or if this group can't go on, don't add any more
-		 * hardware counters.
-		 */
-		if (can_add_hw || is_software_only_group(counter))
+		if (group_can_go_on(counter, cpuctx, can_add_hw)) {
 			if (group_sched_in(counter, cpuctx, ctx, cpu))
 				can_add_hw = 0;
+		}
 	}
 	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
@@ -567,8 +635,10 @@ int perf_counter_task_disable(void)
 	 */
 	perf_flags = hw_perf_save_disable();
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry)
-		counter->state = PERF_COUNTER_STATE_OFF;
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ERROR)
+			counter->state = PERF_COUNTER_STATE_OFF;
+	}
 
 	hw_perf_restore(perf_flags);
 
@@ -607,7 +677,7 @@ int perf_counter_task_enable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_OFF)
+		if (counter->state > PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->hw_event.disabled = 0;
@@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (count != sizeof(cntval))
 		return -EINVAL;
 
+	/*
+	 * Return end-of-file for a read on a counter that is in
+	 * error state (i.e. because it was pinned but it couldn't be
+	 * scheduled on to the CPU at some point).
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ERROR)
+		return 0;
+
 	mutex_lock(&counter->mutex);
 	cntval = perf_counter_read(counter);
 	mutex_unlock(&counter->mutex);
@@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter	*counter,
 {
 	struct perf_data *irqdata, *usrdata;
 	DECLARE_WAITQUEUE(wait, current);
-	ssize_t res;
+	ssize_t res, res2;
 
 	irqdata = counter->irqdata;
 	usrdata = counter->usrdata;
@@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter	*counter,
 		if (signal_pending(current))
 			break;
 
+		if (counter->state == PERF_COUNTER_STATE_ERROR)
+			break;
+
 		spin_unlock_irq(&counter->waitq.lock);
 		schedule();
 		spin_lock_irq(&counter->waitq.lock);
@@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter	*counter,
 	__set_current_state(TASK_RUNNING);
 	spin_unlock_irq(&counter->waitq.lock);
 
-	if (usrdata->len + irqdata->len < count)
+	if (usrdata->len + irqdata->len < count &&
+	    counter->state != PERF_COUNTER_STATE_ERROR)
 		return -ERESTARTSYS;
 read_pending:
 	mutex_lock(&counter->mutex);
@@ -925,11 +1007,12 @@ read_pending:
 
 	/* Switch irq buffer: */
 	usrdata = perf_switch_irq_data(counter);
-	if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) {
+	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
+	if (res2 < 0) {
 		if (!res)
 			res = -EFAULT;
 	} else {
-		res = count;
+		res += res2;
 	}
 out:
 	mutex_unlock(&counter->mutex);
@@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 		 */
 		if (group_leader->ctx != ctx)
 			goto err_put_context;
+		/*
+		 * Only a group leader can be exclusive or pinned
+		 */
+		if (hw_event.exclusive || hw_event.pinned)
+			goto err_put_context;
 	}
 
 	ret = -EINVAL;
@@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
-			child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-			child_counter->hw_ops->disable(child_counter);
-			cpuctx->active_oncpu--;
-			child_ctx->nr_active--;
-			child_counter->oncpu = -1;
-		}
+		counter_sched_out(child_counter, cpuctx, child_ctx);
 
 		list_del_init(&child_counter->list_entry);
 
-- 
cgit v1.2.3-70-g09d2


From d859e29fe34cb833071b20aef860ee94fbad9bb2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 17 Jan 2009 18:10:22 +1100
Subject: perf_counter: Add counter enable/disable ioctls

Impact: New perf_counter features

This primarily adds a way for perf_counter users to enable and disable
counters and groups.  Enabling or disabling a counter or group also
enables or disables all of the child counters that have been cloned
from it to monitor children of the task monitored by the top-level
counter.  The userspace interface to enable/disable counters is via
ioctl on the counter file descriptor.

Along the way this extends the code that handles child counters to
handle child counter groups properly.  A group with multiple counters
will be cloned to child tasks if and only if the group leader has the
hw_event.inherit bit set - if it is set the whole group is cloned as a
group in the child task.

In order to be able to enable or disable all child counters of a given
top-level counter, we need a way to find them all.  Hence I have added
a child_list field to struct perf_counter, which is the head of the
list of children for a top-level counter, or the link in that list for
a child counter.  That list is protected by the perf_counter.mutex
field.

This also adds a mutex to the perf_counter_context struct.  Previously
the list of counters was protected just by the lock field in the
context, which meant that perf_counter_init_task had to take that lock
and then take whatever lock/mutex protects the top-level counter's
child_list.  But the counter enable/disable functions need to take
that lock in order to traverse the list, then for each counter take
the lock in that counter's context in order to change the counter's
state safely, which will lead to a deadlock.

To solve this, we now have both a mutex and a spinlock in the context,
and taking either is sufficient to ensure the list of counters can't
change - you have to take both before changing the list.  Now
perf_counter_init_task takes the mutex instead of the lock (which
incidentally means that inherit_counter can use GFP_KERNEL instead of
GFP_ATOMIC) and thus avoids the possible deadlock.  Similarly the new
enable/disable functions can take the mutex while traversing the list
of child counters without incurring a possible deadlock when the
counter manipulation code locks the context for a child counter.

We also had an misfeature that the first counter added to a context
would possibly not go on until the next sched-in, because we were
using ctx->nr_active to detect if the context was running on a CPU.
But nr_active is the number of active counters, and if that was zero
(because the context didn't have any counters yet) it would look like
the context wasn't running on a cpu and so the retry code in
__perf_install_in_context wouldn't retry.  So this adds an 'is_active'
field that is set when the context is on a CPU, even if it has no
counters.  The is_active field is only used for task contexts, not for
per-cpu contexts.

If we enable a subsidiary counter in a group that is active on a CPU,
and the arch code can't enable the counter, then we have to pull the
whole group off the CPU.  We do this with group_sched_out, which gets
moved up in the file so it comes before all its callers.  This also
adds similar logic to __perf_install_in_context so that the "all on,
or none" invariant of groups is preserved when adding a new counter to
a group.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_counter.h |  21 +-
 kernel/perf_counter.c        | 455 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 415 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7ab8e5f96f5..33ba9fe0a78 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,6 +14,7 @@
 #define _LINUX_PERF_COUNTER_H
 
 #include <asm/atomic.h>
+#include <asm/ioctl.h>
 
 #ifdef CONFIG_PERF_COUNTERS
 # include <asm/perf_counter.h>
@@ -94,6 +95,12 @@ struct perf_counter_hw_event {
 	u64			__reserved_2;
 };
 
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+
 /*
  * Kernel-internal data types:
  */
@@ -173,8 +180,10 @@ struct perf_counter {
 	struct file			*filp;
 
 	struct perf_counter		*parent;
+	struct list_head		child_list;
+
 	/*
-	 * Protect attach/detach:
+	 * Protect attach/detach and child_list:
 	 */
 	struct mutex			mutex;
 
@@ -199,13 +208,21 @@ struct perf_counter {
 struct perf_counter_context {
 #ifdef CONFIG_PERF_COUNTERS
 	/*
-	 * Protect the list of counters:
+	 * Protect the states of the counters in the list,
+	 * nr_active, and the list:
 	 */
 	spinlock_t		lock;
+	/*
+	 * Protect the list of counters.  Locking either mutex or lock
+	 * is sufficient to ensure the list doesn't change; to change
+	 * the list you need to lock both the mutex and the spinlock.
+	 */
+	struct mutex		mutex;
 
 	struct list_head	counter_list;
 	int			nr_counters;
 	int			nr_active;
+	int			is_active;
 	struct task_struct	*task;
 #endif
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index faf671b2956..1ac18daa424 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -112,6 +112,28 @@ counter_sched_out(struct perf_counter *counter,
 		cpuctx->exclusive = 0;
 }
 
+static void
+group_sched_out(struct perf_counter *group_counter,
+		struct perf_cpu_context *cpuctx,
+		struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+
+	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
+	counter_sched_out(group_counter, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+		counter_sched_out(counter, cpuctx, ctx);
+
+	if (group_counter->hw_event.exclusive)
+		cpuctx->exclusive = 0;
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
@@ -168,7 +190,7 @@ static void __perf_counter_remove_from_context(void *info)
 /*
  * Remove the counter from a task's (or a CPU's) list of counters.
  *
- * Must be called with counter->mutex held.
+ * Must be called with counter->mutex and ctx->mutex held.
  *
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
@@ -215,6 +237,99 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Cross CPU call to disable a performance counter
+ */
+static void __perf_counter_disable(void *info)
+{
+	struct perf_counter *counter = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long flags;
+
+	/*
+	 * If this is a per-task counter, need to check whether this
+	 * counter's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
+
+	/*
+	 * If the counter is on, turn it off.
+	 * If it is in error state, leave it in error state.
+	 */
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		if (counter == counter->group_leader)
+			group_sched_out(counter, cpuctx, ctx);
+		else
+			counter_sched_out(counter, cpuctx, ctx);
+		counter->state = PERF_COUNTER_STATE_OFF;
+	}
+
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Disable a counter.
+ */
+static void perf_counter_disable(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Disable the counter on the cpu that it's on
+		 */
+		smp_call_function_single(counter->cpu, __perf_counter_disable,
+					 counter, 1);
+		return;
+	}
+
+ retry:
+	task_oncpu_function_call(task, __perf_counter_disable, counter);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the counter is still active, we need to retry the cross-call.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Disable a counter and all its children.
+ */
+static void perf_counter_disable_family(struct perf_counter *counter)
+{
+	struct perf_counter *child;
+
+	perf_counter_disable(counter);
+
+	/*
+	 * Lock the mutex to protect the list of children
+	 */
+	mutex_lock(&counter->mutex);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_disable(child);
+	mutex_unlock(&counter->mutex);
+}
+
 static int
 counter_sched_in(struct perf_counter *counter,
 		 struct perf_cpu_context *cpuctx,
@@ -302,6 +417,7 @@ static void __perf_install_in_context(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *leader = counter->group_leader;
 	int cpu = smp_processor_id();
 	unsigned long flags;
 	u64 perf_flags;
@@ -327,23 +443,40 @@ static void __perf_install_in_context(void *info)
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
 
+	/*
+	 * Don't put the counter on if it is disabled or if
+	 * it is in a group and the group isn't on.
+	 */
+	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
+	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
+		goto unlock;
+
 	/*
 	 * An exclusive counter can't go on if there are already active
 	 * hardware counters, and no hardware counter can go on if there
 	 * is already an exclusive counter on.
 	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
-	    !group_can_go_on(counter, cpuctx, 1))
+	if (!group_can_go_on(counter, cpuctx, 1))
 		err = -EEXIST;
 	else
 		err = counter_sched_in(counter, cpuctx, ctx, cpu);
 
-	if (err && counter->hw_event.pinned)
-		counter->state = PERF_COUNTER_STATE_ERROR;
+	if (err) {
+		/*
+		 * This counter couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the counter group is pinned then put it in error state.
+		 */
+		if (leader != counter)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->hw_event.pinned)
+			leader->state = PERF_COUNTER_STATE_ERROR;
+	}
 
 	if (!err && !ctx->task && cpuctx->max_pertask)
 		cpuctx->max_pertask--;
 
+ unlock:
 	hw_perf_restore(perf_flags);
 
 	spin_unlock(&ctx->lock);
@@ -359,6 +492,8 @@ static void __perf_install_in_context(void *info)
  * If the counter is attached to a task which is on a CPU we use a smp
  * call to enable it in the task context. The task might have been
  * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
  */
 static void
 perf_install_in_context(struct perf_counter_context *ctx,
@@ -387,7 +522,7 @@ retry:
 	/*
 	 * we need to retry the smp call.
 	 */
-	if (ctx->nr_active && list_empty(&counter->list_entry)) {
+	if (ctx->is_active && list_empty(&counter->list_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -404,26 +539,131 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
-static void
-group_sched_out(struct perf_counter *group_counter,
-		struct perf_cpu_context *cpuctx,
-		struct perf_counter_context *ctx)
+/*
+ * Cross CPU call to enable a performance counter
+ */
+static void __perf_counter_enable(void *info)
 {
-	struct perf_counter *counter;
+	struct perf_counter *counter = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *leader = counter->group_leader;
+	unsigned long flags;
+	int err;
 
-	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+	/*
+	 * If this is a per-task counter, need to check whether this
+	 * counter's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	counter_sched_out(group_counter, cpuctx, ctx);
+	curr_rq_lock_irq_save(&flags);
+	spin_lock(&ctx->lock);
+
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		goto unlock;
+	counter->state = PERF_COUNTER_STATE_INACTIVE;
 
 	/*
-	 * Schedule out siblings (if any):
+	 * If the counter is in a group and isn't the group leader,
+	 * then don't put it on unless the group is on.
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
-		counter_sched_out(counter, cpuctx, ctx);
+	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
+		goto unlock;
 
-	if (group_counter->hw_event.exclusive)
-		cpuctx->exclusive = 0;
+	if (!group_can_go_on(counter, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = counter_sched_in(counter, cpuctx, ctx,
+				       smp_processor_id());
+
+	if (err) {
+		/*
+		 * If this counter can't go on and it's part of a
+		 * group, then the whole group has to come off.
+		 */
+		if (leader != counter)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->hw_event.pinned)
+			leader->state = PERF_COUNTER_STATE_ERROR;
+	}
+
+ unlock:
+	spin_unlock(&ctx->lock);
+	curr_rq_unlock_irq_restore(&flags);
+}
+
+/*
+ * Enable a counter.
+ */
+static void perf_counter_enable(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Enable the counter on the cpu that it's on
+		 */
+		smp_call_function_single(counter->cpu, __perf_counter_enable,
+					 counter, 1);
+		return;
+	}
+
+	spin_lock_irq(&ctx->lock);
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		goto out;
+
+	/*
+	 * If the counter is in error state, clear that first.
+	 * That way, if we see the counter in error state below, we
+	 * know that it has gone back into error state, as distinct
+	 * from the task having been scheduled away before the
+	 * cross-call arrived.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_ERROR)
+		counter->state = PERF_COUNTER_STATE_OFF;
+
+ retry:
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_counter_enable, counter);
+
+	spin_lock_irq(&ctx->lock);
+
+	/*
+	 * If the context is active and the counter is still off,
+	 * we need to retry the cross-call.
+	 */
+	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
+		goto retry;
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (counter->state == PERF_COUNTER_STATE_OFF)
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
+ out:
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Enable a counter and all its children.
+ */
+static void perf_counter_enable_family(struct perf_counter *counter)
+{
+	struct perf_counter *child;
+
+	perf_counter_enable(counter);
+
+	/*
+	 * Lock the mutex to protect the list of children
+	 */
+	mutex_lock(&counter->mutex);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_enable(child);
+	mutex_unlock(&counter->mutex);
 }
 
 void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -432,16 +672,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	struct perf_counter *counter;
 	u64 flags;
 
+	spin_lock(&ctx->lock);
+	ctx->is_active = 0;
 	if (likely(!ctx->nr_counters))
-		return;
+		goto out;
 
-	spin_lock(&ctx->lock);
 	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
 	hw_perf_restore(flags);
+ out:
 	spin_unlock(&ctx->lock);
 }
 
@@ -528,10 +770,11 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	u64 flags;
 	int can_add_hw = 1;
 
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
 	if (likely(!ctx->nr_counters))
-		return;
+		goto out;
 
-	spin_lock(&ctx->lock);
 	flags = hw_perf_save_disable();
 
 	/*
@@ -578,6 +821,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		}
 	}
 	hw_perf_restore(flags);
+ out:
 	spin_unlock(&ctx->lock);
 }
 
@@ -896,12 +1140,14 @@ static int perf_release(struct inode *inode, struct file *file)
 
 	file->private_data = NULL;
 
+	mutex_lock(&ctx->mutex);
 	mutex_lock(&counter->mutex);
 
 	perf_counter_remove_from_context(counter);
 	put_context(ctx);
 
 	mutex_unlock(&counter->mutex);
+	mutex_unlock(&ctx->mutex);
 
 	kfree(counter);
 
@@ -1053,10 +1299,30 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct perf_counter *counter = file->private_data;
+	int err = 0;
+
+	switch (cmd) {
+	case PERF_COUNTER_IOC_ENABLE:
+		perf_counter_enable_family(counter);
+		break;
+	case PERF_COUNTER_IOC_DISABLE:
+		perf_counter_disable_family(counter);
+		break;
+	default:
+		err = -ENOTTY;
+	}
+	return err;
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
 	.poll			= perf_poll,
+	.unlocked_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_ioctl,
 };
 
 static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
@@ -1348,6 +1614,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
+	INIT_LIST_HEAD(&counter->child_list);
+
 	counter->irqdata		= &counter->data[0];
 	counter->usrdata		= &counter->data[1];
 	counter->cpu			= cpu;
@@ -1452,7 +1720,9 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 		goto err_free_put_context;
 
 	counter->filp = counter_file;
+	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, counter, cpu);
+	mutex_unlock(&ctx->mutex);
 
 	fput_light(counter_file, fput_needed2);
 
@@ -1479,6 +1749,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 {
 	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
 	ctx->task = task;
 }
@@ -1486,20 +1757,30 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 /*
  * inherit a counter from parent task to child task:
  */
-static int
+static struct perf_counter *
 inherit_counter(struct perf_counter *parent_counter,
 	      struct task_struct *parent,
 	      struct perf_counter_context *parent_ctx,
 	      struct task_struct *child,
+	      struct perf_counter *group_leader,
 	      struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *child_counter;
 
+	/*
+	 * Instead of creating recursive hierarchies of counters,
+	 * we link inherited counters back to the original parent,
+	 * which has a filp for sure, which we use as the reference
+	 * count:
+	 */
+	if (parent_counter->parent)
+		parent_counter = parent_counter->parent;
+
 	child_counter = perf_counter_alloc(&parent_counter->hw_event,
-					    parent_counter->cpu, NULL,
-					    GFP_ATOMIC);
+					    parent_counter->cpu, group_leader,
+					    GFP_KERNEL);
 	if (!child_counter)
-		return -ENOMEM;
+		return NULL;
 
 	/*
 	 * Link it up in the child's context:
@@ -1523,16 +1804,82 @@ inherit_counter(struct perf_counter *parent_counter,
 	 */
 	atomic_long_inc(&parent_counter->filp->f_count);
 
+	/*
+	 * Link this into the parent counter's child list
+	 */
+	mutex_lock(&parent_counter->mutex);
+	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
+
+	/*
+	 * Make the child state follow the state of the parent counter,
+	 * not its hw_event.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_counter_{en,dis}able_family.
+	 */
+	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+	else
+		child_counter->state = PERF_COUNTER_STATE_OFF;
+
+	mutex_unlock(&parent_counter->mutex);
+
+	return child_counter;
+}
+
+static int inherit_group(struct perf_counter *parent_counter,
+	      struct task_struct *parent,
+	      struct perf_counter_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_counter_context *child_ctx)
+{
+	struct perf_counter *leader;
+	struct perf_counter *sub;
+
+	leader = inherit_counter(parent_counter, parent, parent_ctx,
+				 child, NULL, child_ctx);
+	if (!leader)
+		return -ENOMEM;
+	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+		if (!inherit_counter(sub, parent, parent_ctx,
+				     child, leader, child_ctx))
+			return -ENOMEM;
+	}
 	return 0;
 }
 
+static void sync_child_counter(struct perf_counter *child_counter,
+			       struct perf_counter *parent_counter)
+{
+	u64 parent_val, child_val;
+
+	parent_val = atomic64_read(&parent_counter->count);
+	child_val = atomic64_read(&child_counter->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_counter->count);
+
+	/*
+	 * Remove this counter from the parent's list
+	 */
+	mutex_lock(&parent_counter->mutex);
+	list_del_init(&child_counter->child_list);
+	mutex_unlock(&parent_counter->mutex);
+
+	/*
+	 * Release the parent counter, if this was the last
+	 * reference to it.
+	 */
+	fput(parent_counter->filp);
+}
+
 static void
 __perf_counter_exit_task(struct task_struct *child,
 			 struct perf_counter *child_counter,
 			 struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *parent_counter;
-	u64 parent_val, child_val;
+	struct perf_counter *sub, *tmp;
 
 	/*
 	 * If we do not self-reap then we have to wait for the
@@ -1561,7 +1908,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
-		counter_sched_out(child_counter, cpuctx, child_ctx);
+		group_sched_out(child_counter, cpuctx, child_ctx);
 
 		list_del_init(&child_counter->list_entry);
 
@@ -1577,26 +1924,23 @@ __perf_counter_exit_task(struct task_struct *child,
 	 * that are still around due to the child reference. These
 	 * counters need to be zapped - but otherwise linger.
 	 */
-	if (!parent_counter)
-		return;
-
-	parent_val = atomic64_read(&parent_counter->count);
-	child_val = atomic64_read(&child_counter->count);
-
-	/*
-	 * Add back the child's count to the parent's count:
-	 */
-	atomic64_add(child_val, &parent_counter->count);
-
-	fput(parent_counter->filp);
+	if (parent_counter) {
+		sync_child_counter(child_counter, parent_counter);
+		list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
+					 list_entry) {
+			if (sub->parent)
+				sync_child_counter(sub, sub->parent);
+			kfree(sub);
+		}
+	}
 
 	kfree(child_counter);
 }
 
 /*
- * When a child task exist, feed back counter values to parent counters.
+ * When a child task exits, feed back counter values to parent counters.
  *
- * Note: we are running in child context, but the PID is not hashed
+ * Note: we may be running in child context, but the PID is not hashed
  * anymore so new counters will not be added.
  */
 void perf_counter_exit_task(struct task_struct *child)
@@ -1620,9 +1964,8 @@ void perf_counter_exit_task(struct task_struct *child)
 void perf_counter_init_task(struct task_struct *child)
 {
 	struct perf_counter_context *child_ctx, *parent_ctx;
-	struct perf_counter *counter, *parent_counter;
+	struct perf_counter *counter;
 	struct task_struct *parent = current;
-	unsigned long flags;
 
 	child_ctx  =  &child->perf_counter_ctx;
 	parent_ctx = &parent->perf_counter_ctx;
@@ -1641,32 +1984,22 @@ void perf_counter_init_task(struct task_struct *child)
 	 * Lock the parent list. No need to lock the child - not PID
 	 * hashed yet and not running, so nobody can access it.
 	 */
-	spin_lock_irqsave(&parent_ctx->lock, flags);
+	mutex_lock(&parent_ctx->mutex);
 
 	/*
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
 	 */
 	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
-		if (!counter->hw_event.inherit || counter->group_leader != counter)
+		if (!counter->hw_event.inherit)
 			continue;
 
-		/*
-		 * Instead of creating recursive hierarchies of counters,
-		 * we link inheritd counters back to the original parent,
-		 * which has a filp for sure, which we use as the reference
-		 * count:
-		 */
-		parent_counter = counter;
-		if (counter->parent)
-			parent_counter = counter->parent;
-
-		if (inherit_counter(parent_counter, parent,
+		if (inherit_group(counter, parent,
 				  parent_ctx, child, child_ctx))
 			break;
 	}
 
-	spin_unlock_irqrestore(&parent_ctx->lock, flags);
+	mutex_unlock(&parent_ctx->mutex);
 }
 
 static void __cpuinit perf_counter_init_cpu(int cpu)
@@ -1692,11 +2025,15 @@ static void __perf_counter_exit_cpu(void *info)
 
 	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
 		__perf_counter_remove_from_context(counter);
-
 }
 static void perf_counter_exit_cpu(int cpu)
 {
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx = &cpuctx->ctx;
+
+	mutex_lock(&ctx->mutex);
 	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
+	mutex_unlock(&ctx->mutex);
 }
 #else
 static inline void perf_counter_exit_cpu(int cpu) { }
-- 
cgit v1.2.3-70-g09d2


From 1b023a96d9b44f50f4d8ff28c15f5b80e354760f Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 23 Jan 2009 10:13:01 +0100
Subject: perfcounters: throttle on too high IRQ rates

Starting kerneltop with only -c 100 seems to be a bad idea, it can
easily lock the system due to perfcounter IRQ overload.

So add throttling: if a new IRQ arrives in a shorter than
PERFMON_MIN_PERIOD_NS time, turn off perfcounters and untrottle them
from the next timer tick.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic.c             |  2 ++
 arch/x86/kernel/cpu/perf_counter.c | 38 ++++++++++++++++++++++++++++++++------
 include/linux/perf_counter.h       |  4 ++++
 3 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 7b434e5b14c..849c23009bf 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -781,6 +781,8 @@ static void local_apic_timer_interrupt(void)
 	inc_irq_stat(apic_timer_irqs);
 
 	evt->event_handler(evt);
+
+	perf_counter_unthrottle();
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9376771f757..1a040b179b5 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -33,6 +33,9 @@ static int nr_counters_fixed __read_mostly;
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	u64			last_interrupt;
+	u64			global_enable;
+	int			throttled;
 };
 
 /*
@@ -474,16 +477,19 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
-	u64 ack, status, saved_global;
-	struct cpu_hw_counters *cpuc;
+	u64 ack, status, now;
+	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
 
 	/* Disable counters globally */
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 	ack_APIC_irq();
 
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
+	now = sched_clock();
+	if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS)
+		cpuc->throttled = 1;
+	cpuc->last_interrupt = now;
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	if (!status)
@@ -533,9 +539,29 @@ again:
 		goto again;
 out:
 	/*
-	 * Restore - do not reenable when global enable is off:
+	 * Restore - do not reenable when global enable is off or throttled:
 	 */
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
+	if (!cpuc->throttled)
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+}
+
+void perf_counter_unthrottle(void)
+{
+	struct cpu_hw_counters *cpuc;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+		return;
+
+	if (unlikely(!perf_counters_initialized))
+		return;
+
+	cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
+	if (cpuc->throttled) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n");
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+		cpuc->throttled = 0;
+	}
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 33ba9fe0a78..91f1ca4c01c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -254,6 +254,7 @@ extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
+extern void perf_counter_unthrottle(void);
 extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern int perf_counter_task_disable(void);
@@ -270,6 +271,8 @@ static inline int is_software_counter(struct perf_counter *counter)
 	return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
+#define PERFMON_MIN_PERIOD_NS 10000
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -281,6 +284,7 @@ static inline void perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
+static inline void perf_counter_unthrottle(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)			{ }
 static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-- 
cgit v1.2.3-70-g09d2


From 4b39fd96855254a244f71245b41a91cdecb87d63 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 23 Jan 2009 14:36:16 +0100
Subject: perfcounters: ratelimit performance counter interrupts

Ratelimit performance counter interrupts to 100KHz per CPU.

This replaces the irq-delta-time based method.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 27 +++++++++++++++------------
 include/linux/perf_counter.h       |  2 --
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1a040b179b5..a56d4cf92f3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -33,9 +33,8 @@ static int nr_counters_fixed __read_mostly;
 struct cpu_hw_counters {
 	struct perf_counter	*counters[X86_PMC_IDX_MAX];
 	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	u64			last_interrupt;
+	unsigned long		interrupts;
 	u64			global_enable;
-	int			throttled;
 };
 
 /*
@@ -470,6 +469,11 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 	}
 }
 
+/*
+ * Maximum interrupt frequency of 100KHz per CPU
+ */
+#define PERFMON_MAX_INTERRUPTS 100000/HZ
+
 /*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
@@ -477,7 +481,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
 	int bit, cpu = smp_processor_id();
-	u64 ack, status, now;
+	u64 ack, status;
 	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
@@ -486,11 +490,6 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 	ack_APIC_irq();
 
-	now = sched_clock();
-	if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS)
-		cpuc->throttled = 1;
-	cpuc->last_interrupt = now;
-
 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 	if (!status)
 		goto out;
@@ -541,13 +540,14 @@ out:
 	/*
 	 * Restore - do not reenable when global enable is off or throttled:
 	 */
-	if (!cpuc->throttled)
+	if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
 		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
 }
 
 void perf_counter_unthrottle(void)
 {
 	struct cpu_hw_counters *cpuc;
+	u64 global_enable;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
 		return;
@@ -556,12 +556,15 @@ void perf_counter_unthrottle(void)
 		return;
 
 	cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
-	if (cpuc->throttled) {
+	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
 		if (printk_ratelimit())
-			printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n");
+			printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
 		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
-		cpuc->throttled = 0;
 	}
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
+	if (unlikely(cpuc->global_enable && !global_enable))
+		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+	cpuc->interrupts = 0;
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 91f1ca4c01c..f55381fbcac 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -271,8 +271,6 @@ static inline int is_software_counter(struct perf_counter *counter)
 	return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
-#define PERFMON_MIN_PERIOD_NS 10000
-
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
-- 
cgit v1.2.3-70-g09d2


From 23a185ca8abbeef64b6ffc33059b1d630e43ec10 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 9 Feb 2009 22:42:47 +1100
Subject: perf_counters: make software counters work as per-cpu counters

Impact: kernel crash fix

Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software
counter as a per-cpu counter would reliably crash the system, because
it calls __task_delta_exec with a null pointer.  The page fault,
context switch and cpu migration counters also won't function
correctly as per-cpu counters since they reference the current task.

This fixes the problem by redirecting the task_clock counter to the
cpu_clock counter when used as a per-cpu counter, and by implementing
per-cpu page fault, context switch and cpu migration counters.

Along the way, this:

- Initializes counter->ctx earlier, in perf_counter_alloc, so that
  sw_perf_counter_init can use it
- Adds code to kernel/sched.c to count task migrations into each
  cpu, in rq->nr_migrations_in
- Exports the per-cpu context switch and task migration counts
  via new functions added to kernel/sched.c
- Makes sure that if sw_perf_counter_init fails, we don't try to
  initialize the counter as a hardware counter.  Since the user has
  passed a negative, non-raw event type, they clearly don't intend
  for it to be interpreted as a hardware event.

Reported-by: "Zhang Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 ++
 kernel/perf_counter.c | 78 +++++++++++++++++++++++++++++----------------------
 kernel/sched.c        | 17 +++++++++++
 3 files changed, 64 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b85b10abf77..1e5f70062a9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern u64 cpu_nr_switches(int cpu);
+extern u64 cpu_nr_migrations(int cpu);
 
 struct seq_file;
 struct cfs_rq;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f27a7e9f3c4..544193cbc47 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -20,6 +20,8 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
 
 /*
  * Each CPU has a list of per CPU counters:
@@ -502,7 +504,6 @@ perf_install_in_context(struct perf_counter_context *ctx,
 {
 	struct task_struct *task = ctx->task;
 
-	counter->ctx = ctx;
 	if (!task) {
 		/*
 		 * Per cpu counters are installed via an smp call and
@@ -1417,11 +1418,19 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
-static u64 get_page_faults(void)
+#ifdef CONFIG_VM_EVENT_COUNTERS
+#define cpu_page_faults()	__get_cpu_var(vm_event_states).event[PGFAULT]
+#else
+#define cpu_page_faults()	0
+#endif
+
+static u64 get_page_faults(struct perf_counter *counter)
 {
-	struct task_struct *curr = current;
+	struct task_struct *curr = counter->ctx->task;
 
-	return curr->maj_flt + curr->min_flt;
+	if (curr)
+		return curr->maj_flt + curr->min_flt;
+	return cpu_page_faults();
 }
 
 static void page_faults_perf_counter_update(struct perf_counter *counter)
@@ -1430,7 +1439,7 @@ static void page_faults_perf_counter_update(struct perf_counter *counter)
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_page_faults();
+	now = get_page_faults(counter);
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -1446,11 +1455,7 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 
 static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
-	/*
-	 * page-faults is a per-task value already,
-	 * so we dont have to clear it on switch-in.
-	 */
-
+	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
 	return 0;
 }
 
@@ -1465,11 +1470,13 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = {
 	.read		= page_faults_perf_counter_read,
 };
 
-static u64 get_context_switches(void)
+static u64 get_context_switches(struct perf_counter *counter)
 {
-	struct task_struct *curr = current;
+	struct task_struct *curr = counter->ctx->task;
 
-	return curr->nvcsw + curr->nivcsw;
+	if (curr)
+		return curr->nvcsw + curr->nivcsw;
+	return cpu_nr_switches(smp_processor_id());
 }
 
 static void context_switches_perf_counter_update(struct perf_counter *counter)
@@ -1478,7 +1485,7 @@ static void context_switches_perf_counter_update(struct perf_counter *counter)
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_context_switches();
+	now = get_context_switches(counter);
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -1494,11 +1501,7 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 
 static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
-	/*
-	 * ->nvcsw + curr->nivcsw is a per-task value already,
-	 * so we dont have to clear it on switch-in.
-	 */
-
+	atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
 	return 0;
 }
 
@@ -1513,9 +1516,13 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
 	.read		= context_switches_perf_counter_read,
 };
 
-static inline u64 get_cpu_migrations(void)
+static inline u64 get_cpu_migrations(struct perf_counter *counter)
 {
-	return current->se.nr_migrations;
+	struct task_struct *curr = counter->ctx->task;
+
+	if (curr)
+		return curr->se.nr_migrations;
+	return cpu_nr_migrations(smp_processor_id());
 }
 
 static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
@@ -1524,7 +1531,7 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
 	s64 delta;
 
 	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_cpu_migrations();
+	now = get_cpu_migrations(counter);
 
 	atomic64_set(&counter->hw.prev_count, now);
 
@@ -1540,11 +1547,7 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 
 static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
-	/*
-	 * se.nr_migrations is a per-task value already,
-	 * so we dont have to clear it on switch-in.
-	 */
-
+	atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
 	return 0;
 }
 
@@ -1569,7 +1572,14 @@ sw_perf_counter_init(struct perf_counter *counter)
 		hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
-		hw_ops = &perf_ops_task_clock;
+		/*
+		 * If the user instantiates this as a per-cpu counter,
+		 * use the cpu_clock counter instead.
+		 */
+		if (counter->ctx->task)
+			hw_ops = &perf_ops_task_clock;
+		else
+			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 		hw_ops = &perf_ops_page_faults;
@@ -1592,6 +1602,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 static struct perf_counter *
 perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   int cpu,
+		   struct perf_counter_context *ctx,
 		   struct perf_counter *group_leader,
 		   gfp_t gfpflags)
 {
@@ -1623,6 +1634,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->wakeup_pending		= 0;
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
+	counter->ctx			= ctx;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
@@ -1631,7 +1643,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	hw_ops = NULL;
 	if (!hw_event->raw && hw_event->type < 0)
 		hw_ops = sw_perf_counter_init(counter);
-	if (!hw_ops)
+	else
 		hw_ops = hw_perf_counter_init(counter);
 
 	if (!hw_ops) {
@@ -1707,7 +1719,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
 	}
 
 	ret = -EINVAL;
-	counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
+	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
+				     GFP_KERNEL);
 	if (!counter)
 		goto err_put_context;
 
@@ -1777,15 +1790,14 @@ inherit_counter(struct perf_counter *parent_counter,
 		parent_counter = parent_counter->parent;
 
 	child_counter = perf_counter_alloc(&parent_counter->hw_event,
-					    parent_counter->cpu, group_leader,
-					    GFP_KERNEL);
+					   parent_counter->cpu, child_ctx,
+					   group_leader, GFP_KERNEL);
 	if (!child_counter)
 		return NULL;
 
 	/*
 	 * Link it up in the child's context:
 	 */
-	child_counter->ctx = child_ctx;
 	child_counter->task = child;
 	list_add_counter(child_counter, child_ctx);
 	child_ctx->nr_counters++;
diff --git a/kernel/sched.c b/kernel/sched.c
index 8db1a4cf208..173768f142a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -558,6 +558,7 @@ struct rq {
 	struct load_weight load;
 	unsigned long nr_load_updates;
 	u64 nr_switches;
+	u64 nr_migrations_in;
 
 	struct cfs_rq cfs;
 	struct rt_rq rt;
@@ -1908,6 +1909,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 #endif
 	if (old_cpu != new_cpu) {
 		p->se.nr_migrations++;
+		new_rq->nr_migrations_in++;
 #ifdef CONFIG_SCHEDSTATS
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
@@ -2810,6 +2812,21 @@ unsigned long nr_active(void)
 	return running + uninterruptible;
 }
 
+/*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_switches(cpu) - number of context switches on that cpu
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_switches(int cpu)
+{
+	return cpu_rq(cpu)->nr_switches;
+}
+
+u64 cpu_nr_migrations(int cpu)
+{
+	return cpu_rq(cpu)->nr_migrations_in;
+}
+
 /*
  * Update rq->cpu_load[] statistics. This function is usually called every
  * scheduler tick (TICK_NSEC).
-- 
cgit v1.2.3-70-g09d2


From 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 11 Feb 2009 14:35:35 +1100
Subject: perf_counters: allow users to count user, kernel and/or hypervisor
 events

Impact: new perf_counter feature

This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.

For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish.  Context switches and CPU migrations
are currently considered to occur in kernel mode.

On x86, this changes the previous policy that only root can count
kernel events.  Now non-root users can count kernel events or exclude
them.  Non-root users still can't use NMI events, though.  On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.

On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU.  Counters being added to a group have to have the same
settings as the other hardware counters in the group.  Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU.  If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/perf_counter.c | 68 ++++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/cpu/perf_counter.c | 31 ++++++++++-------
 include/linux/perf_counter.h       | 19 ++++++-----
 kernel/perf_counter.c              | 26 ++++++++++++---
 4 files changed, 117 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5b0211348c7..bd6ba85beb5 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -16,6 +16,7 @@
 #include <asm/reg.h>
 #include <asm/pmc.h>
 #include <asm/machdep.h>
+#include <asm/firmware.h>
 
 struct cpu_hw_counters {
 	int n_counters;
@@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev)
 	return 0;
 }
 
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
+{
+	int eu, ek, eh;
+	int i, n;
+	struct perf_counter *counter;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	eu = ctrs[0]->hw_event.exclude_user;
+	ek = ctrs[0]->hw_event.exclude_kernel;
+	eh = ctrs[0]->hw_event.exclude_hv;
+	if (n_prev == 0)
+		n_prev = 1;
+	for (i = n_prev; i < n; ++i) {
+		counter = ctrs[i];
+		if (counter->hw_event.exclude_user != eu ||
+		    counter->hw_event.exclude_kernel != ek ||
+		    counter->hw_event.exclude_hv != eh)
+			return -EAGAIN;
+	}
+	return 0;
+}
+
 static void power_perf_read(struct perf_counter *counter)
 {
 	long val, delta, prev;
@@ -323,6 +354,20 @@ void hw_perf_restore(u64 disable)
 		goto out;
 	}
 
+	/*
+	 * Add in MMCR0 freeze bits corresponding to the
+	 * hw_event.exclude_* bits for the first counter.
+	 * We have already checked that all counters have the
+	 * same values for these bits as the first counter.
+	 */
+	counter = cpuhw->counter[0];
+	if (counter->hw_event.exclude_user)
+		cpuhw->mmcr[0] |= MMCR0_FCP;
+	if (counter->hw_event.exclude_kernel)
+		cpuhw->mmcr[0] |= MMCR0_FCS;
+	if (counter->hw_event.exclude_hv)
+		cpuhw->mmcr[0] |= MMCR0_FCHV;
+
 	/*
 	 * Write the new configuration to MMCR* with the freeze
 	 * bit set and set the hardware counters to their initial values.
@@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 			   &cpuhw->counter[n0], &cpuhw->events[n0]);
 	if (n < 0)
 		return -EAGAIN;
+	if (check_excludes(cpuhw->counter, n0, n))
+		return -EAGAIN;
 	if (power_check_constraints(cpuhw->events, n + n0))
 		return -EAGAIN;
 	cpuhw->n_counters = n0 + n;
@@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter)
 		goto out;
 	cpuhw->counter[n0] = counter;
 	cpuhw->events[n0] = counter->hw.config;
+	if (check_excludes(cpuhw->counter, n0, 1))
+		goto out;
 	if (power_check_constraints(cpuhw->events, n0 + 1))
 		goto out;
 
@@ -554,6 +603,17 @@ hw_perf_counter_init(struct perf_counter *counter)
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
 
+	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.  This also means that we don't
+	 * set the MMCR0_FCHV bit, which unconditionally freezes
+	 * the counters on the PPC970 variants used in Apple G5
+	 * machines (since MSR.HV is always 1 on those machines).
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		counter->hw_event.exclude_hv = 0;
+	
 	/*
 	 * If this is in a group, check if it can go on with all the
 	 * other hardware counters in the group.  We assume the counter
@@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 		if (n < 0)
 			return NULL;
 	}
-	events[n++] = ev;
-	if (power_check_constraints(events, n))
+	events[n] = ev;
+	if (check_excludes(ctrs, n, 1))
+		return NULL;
+	if (power_check_constraints(events, n + 1))
 		return NULL;
 
-	counter->hw.config = events[n - 1];
+	counter->hw.config = events[n];
 	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
 	return &power_perf_ops;
 }
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9901e46998d..383d4c6423a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		return -EINVAL;
 
 	/*
-	 * Count user events, and generate PMC IRQs:
+	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
 	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT;
+	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
 	/*
-	 * If privileged enough, count OS events too, and allow
-	 * NMI events as well:
+	 * Count user and OS events unless requested not to.
 	 */
-	hwc->nmi = 0;
-	if (capable(CAP_SYS_ADMIN)) {
+	if (!hw_event->exclude_user)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!hw_event->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-		if (hw_event->nmi)
-			hwc->nmi = 1;
-	}
+
+	/*
+	 * If privileged enough, allow NMI events:
+	 */
+	hwc->nmi = 0;
+	if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
+		hwc->nmi = 1;
 
 	hwc->irq_period		= hw_event->irq_period;
 	/*
@@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter,
 	int err;
 
 	/*
-	 * Enable IRQ generation (0x8) and ring-3 counting (0x2),
-	 * and enable ring-0 counting if allowed:
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
 	 */
-	bits = 0x8ULL | 0x2ULL;
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
 	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
 		bits |= 0x1;
 	bits <<= (idx * 4);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f55381fbcac..c83f51d6e35 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -83,14 +83,17 @@ struct perf_counter_hw_event {
 	u64			irq_period;
 	u32			record_type;
 
-	u32			disabled     :  1, /* off by default      */
-				nmi	     :  1, /* NMI sampling        */
-				raw	     :  1, /* raw event type      */
-				inherit	     :  1, /* children inherit it */
-				pinned	     :  1, /* must always be on PMU */
-				exclusive    :  1, /* only counter on PMU */
-
-				__reserved_1 : 26;
+	u32			disabled       :  1, /* off by default        */
+				nmi	       :  1, /* NMI sampling          */
+				raw	       :  1, /* raw event type        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+
+				__reserved_1 : 23;
 
 	u64			__reserved_2;
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 544193cbc47..89d5e3fe970 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter)
 {
 	const struct hw_perf_counter_ops *hw_ops = NULL;
 
+	/*
+	 * Software counters (currently) can't in general distinguish
+	 * between user, kernel and hypervisor events.
+	 * However, context switches and cpu migrations are considered
+	 * to be kernel events, and page faults are never hypervisor
+	 * events.
+	 */
 	switch (counter->hw_event.type) {
 	case PERF_COUNT_CPU_CLOCK:
-		hw_ops = &perf_ops_cpu_clock;
+		if (!(counter->hw_event.exclude_user ||
+		      counter->hw_event.exclude_kernel ||
+		      counter->hw_event.exclude_hv))
+			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
+		if (counter->hw_event.exclude_user ||
+		    counter->hw_event.exclude_kernel ||
+		    counter->hw_event.exclude_hv)
+			break;
 		/*
 		 * If the user instantiates this as a per-cpu counter,
 		 * use the cpu_clock counter instead.
@@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
-		hw_ops = &perf_ops_page_faults;
+		if (!(counter->hw_event.exclude_user ||
+		      counter->hw_event.exclude_kernel))
+			hw_ops = &perf_ops_page_faults;
 		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		hw_ops = &perf_ops_context_switches;
+		if (!counter->hw_event.exclude_kernel)
+			hw_ops = &perf_ops_context_switches;
 		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
-		hw_ops = &perf_ops_cpu_migrations;
+		if (!counter->hw_event.exclude_kernel)
+			hw_ops = &perf_ops_cpu_migrations;
 		break;
 	default:
 		break;
-- 
cgit v1.2.3-70-g09d2


From c07c99b67233ccaad38a961c17405dc1e1542aa4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 13 Feb 2009 22:10:34 +1100
Subject: perfcounters: make context switch and migration software counters
 work again

Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the
context switch and migration software counters to report zero always.
With that commit, the software counters only count events that occur
between sched-in and sched-out for a task.  This is necessary for the
counter enable/disable prctls and ioctls to work.  However, the
context switch and migration counts are incremented after sched-out
for one task and before sched-in for the next.  Since the increment
doesn't occur while a task is scheduled in (as far as the software
counters are concerned) it doesn't count towards any counter.

Thus the context switch and migration counters need to count events
that occur at any time, provided the counter is enabled, not just
those that occur while the task is scheduled in (from the perf_counter
subsystem's point of view).  The problem though is that the software
counter code can't tell the difference between being enabled and being
scheduled in, and between being disabled and being scheduled out,
since we use the one pair of enable/disable entry points for both.
That is, the high-level disable operation simply arranges for the
counter to not be scheduled in any more, and the high-level enable
operation arranges for it to be scheduled in again.

One way to solve this would be to have sched_in/out operations in the
hw_perf_counter_ops struct as well as enable/disable.  However, this
takes a simpler approach: it adds a 'prev_state' field to the
perf_counter struct that allows a counter's enable method to know
whether the counter was previously disabled or just inactive
(scheduled out), and therefore whether the enable method is being
called as a result of a high-level enable or a schedule-in operation.

This then allows the context switch, migration and page fault counters
to reset their hw.prev_count value in their enable functions only if
they are called as a result of a high-level enable operation.
Although page faults would normally only occur while the counter is
scheduled in, this changes the page fault counter code too in case
there are ever circumstances where page faults get counted against a
task while its counters are not scheduled in.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  1 +
 kernel/perf_counter.c        | 21 +++++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c83f51d6e35..32cd1acb738 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -173,6 +173,7 @@ struct perf_counter {
 	const struct hw_perf_counter_ops *hw_ops;
 
 	enum perf_counter_active_state	state;
+	enum perf_counter_active_state	prev_state;
 	atomic64_t			count;
 
 	struct perf_counter_hw_event	hw_event;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fcefb0a726f..ad62965828d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info)
 
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
+	counter->prev_state = PERF_COUNTER_STATE_OFF;
 
 	/*
 	 * Don't put the counter on if it is disabled or if
@@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info)
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
 
+	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
@@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter,
 	if (ret)
 		return ret < 0 ? ret : 0;
 
+	group_counter->prev_state = group_counter->state;
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
 
@@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter,
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+		counter->prev_state = counter->state;
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 
 static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-	u64 now = task_clock_perf_counter_val(counter, 0);
-
-	atomic64_set(&counter->hw.prev_count, now);
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count,
+			     task_clock_perf_counter_val(counter, 0));
 
 	return 0;
 }
@@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 
 static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
 	return 0;
 }
 
@@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 
 static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count,
+			     get_context_switches(counter));
 	return 0;
 }
 
@@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 
 static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
-	atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
+	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+		atomic64_set(&counter->hw.prev_count,
+			     get_cpu_migrations(counter));
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From f3dfd2656deb81a0addee4f4ceff66b50a387388 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 26 Feb 2009 22:43:46 +1100
Subject: perfcounters: fix a few minor cleanliness issues

This fixes three issues noticed by Arnd Bergmann:

- Add #ifdef __KERNEL__ and move some things around in perf_counter.h
  to make sure only the bits that userspace needs are exported to
  userspace.

- Use __u64, __s64, __u32 types in the structs exported to userspace
  rather than u64, s64, u32.

- Make the sys_perf_counter_open syscall available to the SPUs on
  Cell platforms.

And one issue that I noticed in looking at the code again:

- Wrap the perf_counter_open syscall with SYSCALL_DEFINE4 so we get
  the proper handling of int arguments on ppc64 (and some other 64-bit
  architectures).

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/systbl.h |  2 +-
 include/linux/perf_counter.h      | 43 +++++++++++++++++++++------------------
 include/linux/syscalls.h          |  9 +++-----
 kernel/perf_counter.c             |  6 +++---
 4 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 4c8095f6bec..d312eec8abb 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,4 +322,4 @@ SYSCALL_SPU(epoll_create1)
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
-SYSCALL(perf_counter_open)
+SYSCALL_SPU(perf_counter_open)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 32cd1acb738..186efaf4966 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -13,20 +13,8 @@
 #ifndef _LINUX_PERF_COUNTER_H
 #define _LINUX_PERF_COUNTER_H
 
-#include <asm/atomic.h>
-#include <asm/ioctl.h>
-
-#ifdef CONFIG_PERF_COUNTERS
-# include <asm/perf_counter.h>
-#endif
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/spinlock.h>
-
-struct task_struct;
+#include <linux/types.h>
+#include <linux/ioctl.h>
 
 /*
  * User-space ABI bits:
@@ -78,12 +66,12 @@ enum perf_counter_record_type {
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	s64			type;
+	__s64			type;
 
-	u64			irq_period;
-	u32			record_type;
+	__u64			irq_period;
+	__u32			record_type;
 
-	u32			disabled       :  1, /* off by default        */
+	__u32			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
 				raw	       :  1, /* raw event type        */
 				inherit	       :  1, /* children inherit it   */
@@ -95,7 +83,7 @@ struct perf_counter_hw_event {
 
 				__reserved_1 : 23;
 
-	u64			__reserved_2;
+	__u64			__reserved_2;
 };
 
 /*
@@ -104,10 +92,24 @@ struct perf_counter_hw_event {
 #define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
 #define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
 
+#ifdef __KERNEL__
 /*
- * Kernel-internal data types:
+ * Kernel-internal data types and definitions:
  */
 
+#ifdef CONFIG_PERF_COUNTERS
+# include <asm/perf_counter.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+struct task_struct;
+
 /**
  * struct hw_perf_counter - performance counter hardware details:
  */
@@ -293,4 +295,5 @@ static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 #endif
 
+#endif /* __KERNEL__ */
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 88255d3261a..28ef2be839c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -696,10 +696,7 @@ asmlinkage long sys_pipe(int __user *);
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
-asmlinkage int sys_perf_counter_open(
-
-	struct perf_counter_hw_event	*hw_event_uptr		__user,
-	pid_t				pid,
-	int				cpu,
-	int				group_fd);
+asmlinkage long sys_perf_counter_open(
+		const struct perf_counter_hw_event __user *hw_event_uptr,
+		pid_t pid, int cpu, int group_fd);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ad62965828d..16b14ba99d3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1690,9 +1690,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
-		      pid_t pid, int cpu, int group_fd)
+SYSCALL_DEFINE4(perf_counter_open,
+		const struct perf_counter_hw_event __user *, hw_event_uptr,
+		pid_t, pid, int, cpu, int, group_fd)
 {
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
-- 
cgit v1.2.3-70-g09d2


From 2743a5b0fa6f309da904f2190a9cc25deee34dbd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 4 Mar 2009 20:36:51 +1100
Subject: perfcounters: provide expansion room in the ABI

Impact: ABI change

This expands several fields in the perf_counter_hw_event struct and adds
a "flags" argument to the perf_counter_open system call, in order that
features can be added in future without ABI changes.

In particular the record_type field is expanded to 64 bits, and the
space for flag bits has been expanded from 32 to 64 bits.

This also adds some new fields:

* read_format (64 bits) is intended to provide a way to specify what
  userspace wants to get back when it does a read() on a simple
  (non-interrupting) counter;

* exclude_idle (1 bit) provides a way for userspace to ask that events
  that occur when the cpu is idle be excluded;

* extra_config_len will provide a way for userspace to supply an
  arbitrary amount of extra machine-specific PMU configuration data
  immediately following the perf_counter_hw_event struct, to allow
  sophisticated users to program things such as instruction matching
  CAMs and address range registers;

* __reserved_3 and __reserved_4 provide space for future expansion.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_counter.h | 12 +++++++++---
 include/linux/syscalls.h     |  2 +-
 kernel/perf_counter.c        | 10 +++++++---
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 186efaf4966..c42455ab155 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -69,9 +69,10 @@ struct perf_counter_hw_event {
 	__s64			type;
 
 	__u64			irq_period;
-	__u32			record_type;
+	__u64			record_type;
+	__u64			read_format;
 
-	__u32			disabled       :  1, /* off by default        */
+	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
 				raw	       :  1, /* raw event type        */
 				inherit	       :  1, /* children inherit it   */
@@ -80,10 +81,15 @@ struct perf_counter_hw_event {
 				exclude_user   :  1, /* don't count user      */
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
 
-				__reserved_1 : 23;
+				__reserved_1 : 55;
+
+	__u32			extra_config_len;
+	__u32			__reserved_4;
 
 	__u64			__reserved_2;
+	__u64			__reserved_3;
 };
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 28ef2be839c..ab1d7724739 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -698,5 +698,5 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 asmlinkage long sys_perf_counter_open(
 		const struct perf_counter_hw_event __user *hw_event_uptr,
-		pid_t pid, int cpu, int group_fd);
+		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 16b14ba99d3..b2e838959f3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1683,16 +1683,16 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 }
 
 /**
- * sys_perf_task_open - open a performance counter, associate it to a task/cpu
+ * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
  * @hw_event_uptr:	event type attributes for monitoring/sampling
  * @pid:		target pid
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
-SYSCALL_DEFINE4(perf_counter_open,
+SYSCALL_DEFINE5(perf_counter_open,
 		const struct perf_counter_hw_event __user *, hw_event_uptr,
-		pid_t, pid, int, cpu, int, group_fd)
+		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_counter *counter, *group_leader;
 	struct perf_counter_hw_event hw_event;
@@ -1703,6 +1703,10 @@ SYSCALL_DEFINE4(perf_counter_open,
 	int fput_needed2 = 0;
 	int ret;
 
+	/* for future expandability... */
+	if (flags)
+		return -EINVAL;
+
 	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
 		return -EFAULT;
 
-- 
cgit v1.2.3-70-g09d2


From 2485e5184452ccbda34ff83883883d9107800dff Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 5 Mar 2009 12:33:16 +0100
Subject: perfcounters: fix reserved bits sizing

The sum of bits is 65 currently not 64 - so reduce the # of reserved bits
from 55 to 54.

Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c42455ab155..dde564517b6 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -83,7 +83,7 @@ struct perf_counter_hw_event {
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
 
-				__reserved_1 : 55;
+				__reserved_1   : 54;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
-- 
cgit v1.2.3-70-g09d2


From bac429f037f1a51a74d62bad6d1518c3be065df3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 20 Mar 2009 12:50:56 -0400
Subject: tracing: add function profiler

Impact: new profiling feature

This patch adds a function profiler. In debugfs/tracing/ two new
files are created.

  function_profile_enabled  - to enable or disable profiling

  trace_stat/functions   - the profiled functions.

For example:

  echo 1 > /debugfs/tracing/function_profile_enabled
  ./hackbench 50
  echo 0 > /debugfs/tracing/function_profile_enabled

yields:

  cat /debugfs/tracing/trace_stat/functions

  Function                               Hit
  --------                               ---
  _spin_lock                        10106442
  _spin_unlock                      10097492
  kfree                              6013704
  _spin_unlock_irqrestore            4423941
  _spin_lock_irqsave                 4406825
  __phys_addr                        4181686
  __slab_free                        4038222
  dput                               4030130
  path_put                           4023387
  unroll_tree_refs                   4019532
[...]

The most hit functions are listed first. Functions that are not
hit are not listed.

This feature depends on and uses dynamic function tracing. When the
function profiling is disabled, no overhead occurs. But it still
takes up around 300KB to hold the data, thus it is not recomended
to keep it enabled for systems low on memory.

When a '1' is echoed into the function_profile_enabled file, the
counters for is function is reset back to zero. Thus you can see what
functions are hit most by different programs.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/ftrace.h |   4 +
 kernel/trace/Kconfig   |  19 +++
 kernel/trace/ftrace.c  | 313 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 334 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 015a3d22cf7..0456c3a51c6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -153,6 +153,10 @@ struct dyn_ftrace {
 		unsigned long		flags;
 		struct dyn_ftrace	*newlist;
 	};
+#ifdef CONFIG_FUNCTION_PROFILER
+	unsigned long			counter;
+	struct hlist_node		node;
+#endif
 	struct dyn_arch_ftrace		arch;
 };
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8a4d7293104..95e9ad5735d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -105,6 +105,7 @@ config FUNCTION_GRAPH_TRACER
 	  This is done by setting the current return address on the current
 	  task structure into a stack of calls.
 
+
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
 	default n
@@ -376,6 +377,24 @@ config DYNAMIC_FTRACE
 	 were made. If so, it runs stop_machine (stops all CPUS)
 	 and modifies the code to jump over the call to ftrace.
 
+config FUNCTION_PROFILER
+	bool "Kernel function profiler"
+	depends on DYNAMIC_FTRACE
+	default n
+	help
+	 This option enables the kernel function profiler. When the dynamic
+	 function tracing is enabled, a counter is added into the function
+	 records used by the dynamic function tracer. A file is created in
+	 debugfs called function_profile_enabled which defaults to zero.
+	 When a 1 is echoed into this file profiling begins, and when a
+	 zero is entered, profiling stops. A file in the trace_stats
+	 directory called functions, that show the list of functions that
+	 have been hit and their counters.
+
+	 This takes up around 320K more memory.
+
+	 If in doubt, say N
+
 config FTRACE_MCOUNT_RECORD
 	def_bool y
 	depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b8722baf15..11f364c776d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -34,6 +34,7 @@
 #include <asm/ftrace.h>
 
 #include "trace.h"
+#include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)			\
 	do {					\
@@ -261,7 +262,6 @@ struct ftrace_func_probe {
 	struct rcu_head		rcu;
 };
 
-
 enum {
 	FTRACE_ENABLE_CALLS		= (1 << 0),
 	FTRACE_DISABLE_CALLS		= (1 << 1),
@@ -309,6 +309,307 @@ static struct dyn_ftrace *ftrace_free_records;
 		}				\
 	}
 
+#ifdef CONFIG_FUNCTION_PROFILER
+static struct hlist_head *ftrace_profile_hash;
+static int ftrace_profile_bits;
+static int ftrace_profile_enabled;
+static DEFINE_MUTEX(ftrace_profile_lock);
+
+static void *
+function_stat_next(void *v, int idx)
+{
+	struct dyn_ftrace *rec = v;
+	struct ftrace_page *pg;
+
+	pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK);
+
+ again:
+	rec++;
+	if ((void *)rec >= (void *)&pg->records[pg->index]) {
+		pg = pg->next;
+		if (!pg)
+			return NULL;
+		rec = &pg->records[0];
+	}
+
+	if (rec->flags & FTRACE_FL_FREE ||
+	    rec->flags & FTRACE_FL_FAILED ||
+	    !(rec->flags & FTRACE_FL_CONVERTED) ||
+	    /* ignore non hit functions */
+	    !rec->counter)
+		goto again;
+
+	return rec;
+}
+
+static void *function_stat_start(struct tracer_stat *trace)
+{
+	return function_stat_next(&ftrace_pages_start->records[0], 0);
+}
+
+static int function_stat_cmp(void *p1, void *p2)
+{
+	struct dyn_ftrace *a = p1;
+	struct dyn_ftrace *b = p2;
+
+	if (a->counter < b->counter)
+		return -1;
+	if (a->counter > b->counter)
+		return 1;
+	else
+		return 0;
+}
+
+static int function_stat_headers(struct seq_file *m)
+{
+	seq_printf(m, "  Function                               Hit\n"
+		      "  --------                               ---\n");
+	return 0;
+}
+
+static int function_stat_show(struct seq_file *m, void *v)
+{
+	struct dyn_ftrace *rec = v;
+	char str[KSYM_SYMBOL_LEN];
+
+	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+
+	seq_printf(m, "  %-30.30s  %10lu\n", str, rec->counter);
+	return 0;
+}
+
+static struct tracer_stat function_stats = {
+	.name = "functions",
+	.stat_start = function_stat_start,
+	.stat_next = function_stat_next,
+	.stat_cmp = function_stat_cmp,
+	.stat_headers = function_stat_headers,
+	.stat_show = function_stat_show
+};
+
+static void ftrace_profile_init(int nr_funcs)
+{
+	unsigned long addr;
+	int order;
+	int size;
+
+	/*
+	 * We are profiling all functions, lets make it 1/4th of the
+	 * number of functions that are in core kernel. So we have to
+	 * iterate 4 times.
+	 */
+	order = (sizeof(struct hlist_head) * nr_funcs) / 4;
+	order = get_order(order);
+	size = 1 << (PAGE_SHIFT + order);
+
+	pr_info("Allocating %d KB for profiler hash\n", size >> 10);
+
+	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!addr) {
+		pr_warning("Could not allocate function profiler hash\n");
+		return;
+	}
+
+	ftrace_profile_hash = (void *)addr;
+
+	/*
+	 * struct hlist_head should be a pointer of 4 or 8 bytes.
+	 * And a simple bit manipulation can be done, but if for
+	 * some reason struct hlist_head is not a mulitple of 2,
+	 * then we play it safe, and simply count. This function
+	 * is done once at boot up, so it is not that critical in
+	 * performance.
+	 */
+
+	size--;
+	size /= sizeof(struct hlist_head);
+
+	for (; size; size >>= 1)
+		ftrace_profile_bits++;
+
+	pr_info("Function profiler has %d hash buckets\n",
+		1 << ftrace_profile_bits);
+
+	return;
+}
+
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static void ftrace_profile_reset(void)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		rec->counter = 0;
+	} while_for_each_ftrace_rec();
+}
+
+static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip)
+{
+	struct dyn_ftrace *rec;
+	struct hlist_head *hhd;
+	struct hlist_node *n;
+	unsigned long flags;
+	unsigned long key;
+
+	if (!ftrace_profile_hash)
+		return NULL;
+
+	key = hash_long(ip, ftrace_profile_bits);
+	hhd = &ftrace_profile_hash[key];
+
+	if (hlist_empty(hhd))
+		return NULL;
+
+	local_irq_save(flags);
+	hlist_for_each_entry_rcu(rec, n, hhd, node) {
+		if (rec->ip == ip)
+			goto out;
+	}
+	rec = NULL;
+ out:
+	local_irq_restore(flags);
+
+	return rec;
+}
+
+static void
+function_profile_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct dyn_ftrace *rec;
+	unsigned long flags;
+
+	if (!ftrace_profile_enabled)
+		return;
+
+	local_irq_save(flags);
+	rec = ftrace_find_profiled_func(ip);
+	if (!rec)
+		goto out;
+
+	rec->counter++;
+ out:
+	local_irq_restore(flags);
+}
+
+static struct ftrace_ops ftrace_profile_ops __read_mostly =
+{
+	.func = function_profile_call,
+};
+
+static ssize_t
+ftrace_profile_write(struct file *filp, const char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	unsigned long val;
+	char buf[64];
+	int ret;
+
+	if (!ftrace_profile_hash) {
+		pr_info("Can not enable hash due to earlier problems\n");
+		return -ENODEV;
+	}
+
+	if (cnt >= sizeof(buf))
+		return -EINVAL;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	val = !!val;
+
+	mutex_lock(&ftrace_profile_lock);
+	if (ftrace_profile_enabled ^ val) {
+		if (val) {
+			ftrace_profile_reset();
+			register_ftrace_function(&ftrace_profile_ops);
+			ftrace_profile_enabled = 1;
+		} else {
+			ftrace_profile_enabled = 0;
+			unregister_ftrace_function(&ftrace_profile_ops);
+		}
+	}
+	mutex_unlock(&ftrace_profile_lock);
+
+	filp->f_pos += cnt;
+
+	return cnt;
+}
+
+static const struct file_operations ftrace_profile_fops = {
+	.open		= tracing_open_generic,
+	.read		= ftrace_profile_read,
+	.write		= ftrace_profile_write,
+};
+
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+	struct dentry *entry;
+	int ret;
+
+	ret = register_stat_tracer(&function_stats);
+	if (ret) {
+		pr_warning("Warning: could not register "
+			   "function stats\n");
+		return;
+	}
+
+	entry = debugfs_create_file("function_profile_enabled", 0644,
+				    d_tracer, NULL, &ftrace_profile_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'function_profile_enabled' entry\n");
+}
+
+static void ftrace_add_profile(struct dyn_ftrace *rec)
+{
+	unsigned long key;
+
+	if (!ftrace_profile_hash)
+		return;
+
+	key = hash_long(rec->ip, ftrace_profile_bits);
+	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
+}
+
+static void ftrace_profile_release(struct dyn_ftrace *rec)
+{
+	mutex_lock(&ftrace_profile_lock);
+	hlist_del(&rec->node);
+	mutex_unlock(&ftrace_profile_lock);
+}
+
+#else /* CONFIG_FUNCTION_PROFILER */
+static void ftrace_profile_init(int nr_funcs)
+{
+}
+static void ftrace_add_profile(struct dyn_ftrace *rec)
+{
+}
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+}
+static void ftrace_profile_release(struct dyn_ftrace *rec)
+{
+}
+#endif /* CONFIG_FUNCTION_PROFILER */
+
 #ifdef CONFIG_KPROBES
 
 static int frozen_record_count;
@@ -359,8 +660,10 @@ void ftrace_release(void *start, unsigned long size)
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if ((rec->ip >= s) && (rec->ip < e) &&
-		    !(rec->flags & FTRACE_FL_FREE))
+		    !(rec->flags & FTRACE_FL_FREE)) {
 			ftrace_free_rec(rec);
+			ftrace_profile_release(rec);
+		}
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
 }
@@ -414,6 +717,8 @@ ftrace_record_ip(unsigned long ip)
 	rec->newlist = ftrace_new_addrs;
 	ftrace_new_addrs = rec;
 
+	ftrace_add_profile(rec);
+
 	return rec;
 }
 
@@ -2157,6 +2462,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
+	ftrace_profile_debugfs(d_tracer);
+
 	return 0;
 }
 
@@ -2225,6 +2532,8 @@ void __init ftrace_init(void)
 	if (ret)
 		goto failed;
 
+	ftrace_profile_init(count);
+
 	last_ftrace_enabled = ftrace_enabled = 1;
 
 	ret = ftrace_convert_nops(NULL,
-- 
cgit v1.2.3-70-g09d2


From 493762fc534c71d11d489f872c4b4a2c61173668 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 23 Mar 2009 17:12:36 -0400
Subject: tracing: move function profiler data out of function struct

Impact: reduce size of memory in function profiler

The function profiler originally introduces its counters into the
function records itself. There is 20 thousand different functions on
a normal system, and that is adding 20 thousand counters for profiling
event when not needed.

A normal run of the profiler yields only a couple of thousand functions
executed, depending on what is being profiled. This means we have around
18 thousand useless counters.

This patch rectifies this by moving the data out of the function
records used by dynamic ftrace. Data is preallocated to hold the functions
when the profiling begins. Checks are made during profiling to see if
more recorcds should be allocated, and they are allocated if it is safe
to do so.

This also removes the dependency from using dynamic ftrace, and also
removes the overhead by having it enabled.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/ftrace.h |   4 -
 kernel/trace/Kconfig   |  10 +-
 kernel/trace/ftrace.c  | 440 +++++++++++++++++++++++++++++--------------------
 3 files changed, 263 insertions(+), 191 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0456c3a51c6..015a3d22cf7 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -153,10 +153,6 @@ struct dyn_ftrace {
 		unsigned long		flags;
 		struct dyn_ftrace	*newlist;
 	};
-#ifdef CONFIG_FUNCTION_PROFILER
-	unsigned long			counter;
-	struct hlist_node		node;
-#endif
 	struct dyn_arch_ftrace		arch;
 };
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 95e9ad5735d..8a4136096d7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -379,20 +379,16 @@ config DYNAMIC_FTRACE
 
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
-	depends on DYNAMIC_FTRACE
+	depends on FUNCTION_TRACER
 	default n
 	help
-	 This option enables the kernel function profiler. When the dynamic
-	 function tracing is enabled, a counter is added into the function
-	 records used by the dynamic function tracer. A file is created in
-	 debugfs called function_profile_enabled which defaults to zero.
+	 This option enables the kernel function profiler. A file is created
+	 in debugfs called function_profile_enabled which defaults to zero.
 	 When a 1 is echoed into this file profiling begins, and when a
 	 zero is entered, profiling stops. A file in the trace_stats
 	 directory called functions, that show the list of functions that
 	 have been hit and their counters.
 
-	 This takes up around 320K more memory.
-
 	 If in doubt, say N
 
 config FTRACE_MCOUNT_RECORD
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 11f364c776d..24dac448cdc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -241,87 +241,48 @@ static void ftrace_update_pid_func(void)
 #endif
 }
 
-/* set when tracing only a pid */
-struct pid *ftrace_pid_trace;
-static struct pid * const ftrace_swapper_pid = &init_struct_pid;
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-#ifndef CONFIG_FTRACE_MCOUNT_RECORD
-# error Dynamic ftrace depends on MCOUNT_RECORD
-#endif
-
-static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
-
-struct ftrace_func_probe {
-	struct hlist_node	node;
-	struct ftrace_probe_ops	*ops;
-	unsigned long		flags;
-	unsigned long		ip;
-	void			*data;
-	struct rcu_head		rcu;
+#ifdef CONFIG_FUNCTION_PROFILER
+struct ftrace_profile {
+	struct hlist_node		node;
+	unsigned long			ip;
+	unsigned long			counter;
 };
 
-enum {
-	FTRACE_ENABLE_CALLS		= (1 << 0),
-	FTRACE_DISABLE_CALLS		= (1 << 1),
-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
-	FTRACE_ENABLE_MCOUNT		= (1 << 3),
-	FTRACE_DISABLE_MCOUNT		= (1 << 4),
-	FTRACE_START_FUNC_RET		= (1 << 5),
-	FTRACE_STOP_FUNC_RET		= (1 << 6),
+struct ftrace_profile_page {
+	struct ftrace_profile_page	*next;
+	unsigned long			index;
+	struct ftrace_profile		records[];
 };
 
-static int ftrace_filtered;
+#define PROFILE_RECORDS_SIZE						\
+	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
 
-static struct dyn_ftrace *ftrace_new_addrs;
+#define PROFILES_PER_PAGE					\
+	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
 
-static DEFINE_MUTEX(ftrace_regex_lock);
-
-struct ftrace_page {
-	struct ftrace_page	*next;
-	int			index;
-	struct dyn_ftrace	records[];
-};
+/* TODO: make these percpu, to prevent cache line bouncing */
+static struct ftrace_profile_page *profile_pages_start;
+static struct ftrace_profile_page *profile_pages;
 
-#define ENTRIES_PER_PAGE \
-  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
-
-/* estimate from running different kernels */
-#define NR_TO_INIT		10000
-
-static struct ftrace_page	*ftrace_pages_start;
-static struct ftrace_page	*ftrace_pages;
-
-static struct dyn_ftrace *ftrace_free_records;
-
-/*
- * This is a double for. Do not use 'break' to break out of the loop,
- * you must use a goto.
- */
-#define do_for_each_ftrace_rec(pg, rec)					\
-	for (pg = ftrace_pages_start; pg; pg = pg->next) {		\
-		int _____i;						\
-		for (_____i = 0; _____i < pg->index; _____i++) {	\
-			rec = &pg->records[_____i];
-
-#define while_for_each_ftrace_rec()		\
-		}				\
-	}
-
-#ifdef CONFIG_FUNCTION_PROFILER
 static struct hlist_head *ftrace_profile_hash;
 static int ftrace_profile_bits;
 static int ftrace_profile_enabled;
 static DEFINE_MUTEX(ftrace_profile_lock);
 
+static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable);
+
+#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+
+static raw_spinlock_t ftrace_profile_rec_lock =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
 static void *
 function_stat_next(void *v, int idx)
 {
-	struct dyn_ftrace *rec = v;
-	struct ftrace_page *pg;
+	struct ftrace_profile *rec = v;
+	struct ftrace_profile_page *pg;
 
-	pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK);
+	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
 
  again:
 	rec++;
@@ -330,27 +291,22 @@ function_stat_next(void *v, int idx)
 		if (!pg)
 			return NULL;
 		rec = &pg->records[0];
+		if (!rec->counter)
+			goto again;
 	}
 
-	if (rec->flags & FTRACE_FL_FREE ||
-	    rec->flags & FTRACE_FL_FAILED ||
-	    !(rec->flags & FTRACE_FL_CONVERTED) ||
-	    /* ignore non hit functions */
-	    !rec->counter)
-		goto again;
-
 	return rec;
 }
 
 static void *function_stat_start(struct tracer_stat *trace)
 {
-	return function_stat_next(&ftrace_pages_start->records[0], 0);
+	return function_stat_next(&profile_pages_start->records[0], 0);
 }
 
 static int function_stat_cmp(void *p1, void *p2)
 {
-	struct dyn_ftrace *a = p1;
-	struct dyn_ftrace *b = p2;
+	struct ftrace_profile *a = p1;
+	struct ftrace_profile *b = p2;
 
 	if (a->counter < b->counter)
 		return -1;
@@ -369,7 +325,7 @@ static int function_stat_headers(struct seq_file *m)
 
 static int function_stat_show(struct seq_file *m, void *v)
 {
-	struct dyn_ftrace *rec = v;
+	struct ftrace_profile *rec = v;
 	char str[KSYM_SYMBOL_LEN];
 
 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -387,115 +343,191 @@ static struct tracer_stat function_stats = {
 	.stat_show = function_stat_show
 };
 
-static void ftrace_profile_init(int nr_funcs)
+static void ftrace_profile_reset(void)
 {
-	unsigned long addr;
-	int order;
-	int size;
+	struct ftrace_profile_page *pg;
 
-	/*
-	 * We are profiling all functions, lets make it 1/4th of the
-	 * number of functions that are in core kernel. So we have to
-	 * iterate 4 times.
-	 */
-	order = (sizeof(struct hlist_head) * nr_funcs) / 4;
-	order = get_order(order);
-	size = 1 << (PAGE_SHIFT + order);
-
-	pr_info("Allocating %d KB for profiler hash\n", size >> 10);
+	pg = profile_pages = profile_pages_start;
 
-	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!addr) {
-		pr_warning("Could not allocate function profiler hash\n");
-		return;
+	while (pg) {
+		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
+		pg->index = 0;
+		pg = pg->next;
 	}
 
-	ftrace_profile_hash = (void *)addr;
+	memset(ftrace_profile_hash, 0,
+	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
+}
 
-	/*
-	 * struct hlist_head should be a pointer of 4 or 8 bytes.
-	 * And a simple bit manipulation can be done, but if for
-	 * some reason struct hlist_head is not a mulitple of 2,
-	 * then we play it safe, and simply count. This function
-	 * is done once at boot up, so it is not that critical in
-	 * performance.
-	 */
+int ftrace_profile_pages_init(void)
+{
+	struct ftrace_profile_page *pg;
+	int i;
 
-	size--;
-	size /= sizeof(struct hlist_head);
+	/* If we already allocated, do nothing */
+	if (profile_pages)
+		return 0;
 
-	for (; size; size >>= 1)
-		ftrace_profile_bits++;
+	profile_pages = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!profile_pages)
+		return -ENOMEM;
 
-	pr_info("Function profiler has %d hash buckets\n",
-		1 << ftrace_profile_bits);
+	pg = profile_pages_start = profile_pages;
 
-	return;
+	/* allocate 10 more pages to start */
+	for (i = 0; i < 10; i++) {
+		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+		/*
+		 * We only care about allocating profile_pages, if
+		 * we failed to allocate here, hopefully we will allocate
+		 * later.
+		 */
+		if (!pg->next)
+			break;
+		pg = pg->next;
+	}
+
+	return 0;
 }
 
-static ssize_t
-ftrace_profile_read(struct file *filp, char __user *ubuf,
-		     size_t cnt, loff_t *ppos)
+static int ftrace_profile_init(void)
 {
-	char buf[64];
-	int r;
+	int size;
 
-	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
-	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
+	if (ftrace_profile_hash) {
+		/* If the profile is already created, simply reset it */
+		ftrace_profile_reset();
+		return 0;
+	}
 
-static void ftrace_profile_reset(void)
-{
-	struct dyn_ftrace *rec;
-	struct ftrace_page *pg;
+	/*
+	 * We are profiling all functions, but usually only a few thousand
+	 * functions are hit. We'll make a hash of 1024 items.
+	 */
+	size = FTRACE_PROFILE_HASH_SIZE;
 
-	do_for_each_ftrace_rec(pg, rec) {
-		rec->counter = 0;
-	} while_for_each_ftrace_rec();
+	ftrace_profile_hash =
+		kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+
+	if (!ftrace_profile_hash)
+		return -ENOMEM;
+
+	size--;
+
+	for (; size; size >>= 1)
+		ftrace_profile_bits++;
+
+	/* Preallocate a few pages */
+	if (ftrace_profile_pages_init() < 0) {
+		kfree(ftrace_profile_hash);
+		ftrace_profile_hash = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
-static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip)
+/* interrupts must be disabled */
+static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
 {
-	struct dyn_ftrace *rec;
+	struct ftrace_profile *rec;
 	struct hlist_head *hhd;
 	struct hlist_node *n;
-	unsigned long flags;
 	unsigned long key;
 
-	if (!ftrace_profile_hash)
-		return NULL;
-
 	key = hash_long(ip, ftrace_profile_bits);
 	hhd = &ftrace_profile_hash[key];
 
 	if (hlist_empty(hhd))
 		return NULL;
 
-	local_irq_save(flags);
 	hlist_for_each_entry_rcu(rec, n, hhd, node) {
 		if (rec->ip == ip)
-			goto out;
+			return rec;
+	}
+
+	return NULL;
+}
+
+static void ftrace_add_profile(struct ftrace_profile *rec)
+{
+	unsigned long key;
+
+	key = hash_long(rec->ip, ftrace_profile_bits);
+	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
+}
+
+/* Interrupts must be disabled calling this */
+static struct ftrace_profile *
+ftrace_profile_alloc(unsigned long ip, bool alloc_safe)
+{
+	struct ftrace_profile *rec = NULL;
+
+	/* prevent recursion */
+	if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1)
+		goto out;
+
+	__raw_spin_lock(&ftrace_profile_rec_lock);
+
+	/* Try to always keep another page available */
+	if (!profile_pages->next && alloc_safe)
+		profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
+
+	/*
+	 * Try to find the function again since another
+	 * task on another CPU could have added it
+	 */
+	rec = ftrace_find_profiled_func(ip);
+	if (rec)
+		goto out_unlock;
+
+	if (profile_pages->index == PROFILES_PER_PAGE) {
+		if (!profile_pages->next)
+			goto out_unlock;
+		profile_pages = profile_pages->next;
 	}
-	rec = NULL;
+
+	rec = &profile_pages->records[profile_pages->index++];
+	rec->ip = ip;
+	ftrace_add_profile(rec);
+
+ out_unlock:
+	__raw_spin_unlock(&ftrace_profile_rec_lock);
  out:
-	local_irq_restore(flags);
+	atomic_dec(&__get_cpu_var(ftrace_profile_disable));
 
 	return rec;
 }
 
+/*
+ * If we are not in an interrupt, or softirq and
+ * and interrupts are disabled and preemption is not enabled
+ * (not in a spinlock) then it should be safe to allocate memory.
+ */
+static bool ftrace_safe_to_allocate(void)
+{
+	return !in_interrupt() && irqs_disabled() && !preempt_count();
+}
+
 static void
 function_profile_call(unsigned long ip, unsigned long parent_ip)
 {
-	struct dyn_ftrace *rec;
+	struct ftrace_profile *rec;
 	unsigned long flags;
+	bool alloc_safe;
 
 	if (!ftrace_profile_enabled)
 		return;
 
+	alloc_safe = ftrace_safe_to_allocate();
+
 	local_irq_save(flags);
 	rec = ftrace_find_profiled_func(ip);
-	if (!rec)
-		goto out;
+	if (!rec) {
+		rec = ftrace_profile_alloc(ip, alloc_safe);
+		if (!rec)
+			goto out;
+	}
 
 	rec->counter++;
  out:
@@ -515,11 +547,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 	char buf[64];
 	int ret;
 
-	if (!ftrace_profile_hash) {
-		pr_info("Can not enable hash due to earlier problems\n");
-		return -ENODEV;
-	}
-
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
 
@@ -537,7 +564,12 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 	mutex_lock(&ftrace_profile_lock);
 	if (ftrace_profile_enabled ^ val) {
 		if (val) {
-			ftrace_profile_reset();
+			ret = ftrace_profile_init();
+			if (ret < 0) {
+				cnt = ret;
+				goto out;
+			}
+
 			register_ftrace_function(&ftrace_profile_ops);
 			ftrace_profile_enabled = 1;
 		} else {
@@ -545,6 +577,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 			unregister_ftrace_function(&ftrace_profile_ops);
 		}
 	}
+ out:
 	mutex_unlock(&ftrace_profile_lock);
 
 	filp->f_pos += cnt;
@@ -552,6 +585,17 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
 	return cnt;
 }
 
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	int r;
+
+	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
 static const struct file_operations ftrace_profile_fops = {
 	.open		= tracing_open_generic,
 	.read		= ftrace_profile_read,
@@ -577,39 +621,80 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
 			   "'function_profile_enabled' entry\n");
 }
 
-static void ftrace_add_profile(struct dyn_ftrace *rec)
-{
-	unsigned long key;
-
-	if (!ftrace_profile_hash)
-		return;
-
-	key = hash_long(rec->ip, ftrace_profile_bits);
-	hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]);
-}
-
-static void ftrace_profile_release(struct dyn_ftrace *rec)
-{
-	mutex_lock(&ftrace_profile_lock);
-	hlist_del(&rec->node);
-	mutex_unlock(&ftrace_profile_lock);
-}
-
 #else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_init(int nr_funcs)
-{
-}
-static void ftrace_add_profile(struct dyn_ftrace *rec)
-{
-}
 static void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
 }
-static void ftrace_profile_release(struct dyn_ftrace *rec)
-{
-}
 #endif /* CONFIG_FUNCTION_PROFILER */
 
+/* set when tracing only a pid */
+struct pid *ftrace_pid_trace;
+static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+# error Dynamic ftrace depends on MCOUNT_RECORD
+#endif
+
+static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
+
+struct ftrace_func_probe {
+	struct hlist_node	node;
+	struct ftrace_probe_ops	*ops;
+	unsigned long		flags;
+	unsigned long		ip;
+	void			*data;
+	struct rcu_head		rcu;
+};
+
+enum {
+	FTRACE_ENABLE_CALLS		= (1 << 0),
+	FTRACE_DISABLE_CALLS		= (1 << 1),
+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+	FTRACE_ENABLE_MCOUNT		= (1 << 3),
+	FTRACE_DISABLE_MCOUNT		= (1 << 4),
+	FTRACE_START_FUNC_RET		= (1 << 5),
+	FTRACE_STOP_FUNC_RET		= (1 << 6),
+};
+
+static int ftrace_filtered;
+
+static struct dyn_ftrace *ftrace_new_addrs;
+
+static DEFINE_MUTEX(ftrace_regex_lock);
+
+struct ftrace_page {
+	struct ftrace_page	*next;
+	int			index;
+	struct dyn_ftrace	records[];
+};
+
+#define ENTRIES_PER_PAGE \
+  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+
+/* estimate from running different kernels */
+#define NR_TO_INIT		10000
+
+static struct ftrace_page	*ftrace_pages_start;
+static struct ftrace_page	*ftrace_pages;
+
+static struct dyn_ftrace *ftrace_free_records;
+
+/*
+ * This is a double for. Do not use 'break' to break out of the loop,
+ * you must use a goto.
+ */
+#define do_for_each_ftrace_rec(pg, rec)					\
+	for (pg = ftrace_pages_start; pg; pg = pg->next) {		\
+		int _____i;						\
+		for (_____i = 0; _____i < pg->index; _____i++) {	\
+			rec = &pg->records[_____i];
+
+#define while_for_each_ftrace_rec()		\
+		}				\
+	}
+
 #ifdef CONFIG_KPROBES
 
 static int frozen_record_count;
@@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size)
 	mutex_lock(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		if ((rec->ip >= s) && (rec->ip < e) &&
-		    !(rec->flags & FTRACE_FL_FREE)) {
+		    !(rec->flags & FTRACE_FL_FREE))
 			ftrace_free_rec(rec);
-			ftrace_profile_release(rec);
-		}
 	} while_for_each_ftrace_rec();
 	mutex_unlock(&ftrace_lock);
 }
@@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip)
 	rec->newlist = ftrace_new_addrs;
 	ftrace_new_addrs = rec;
 
-	ftrace_add_profile(rec);
-
 	return rec;
 }
 
@@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 			   "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-	ftrace_profile_debugfs(d_tracer);
-
 	return 0;
 }
 
@@ -2532,8 +2611,6 @@ void __init ftrace_init(void)
 	if (ret)
 		goto failed;
 
-	ftrace_profile_init(count);
-
 	last_ftrace_enabled = ftrace_enabled = 1;
 
 	ret = ftrace_convert_nops(NULL,
@@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void)
 	if (!entry)
 		pr_warning("Could not create debugfs "
 			   "'set_ftrace_pid' entry\n");
+
+	ftrace_profile_debugfs(d_tracer);
+
 	return 0;
 }
 fs_initcall(ftrace_init_debugfs);
-- 
cgit v1.2.3-70-g09d2


From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 24 Mar 2009 23:17:58 -0400
Subject: function-graph: add option to calculate graph time or not

graph time is the time that a function is executing another function.
Thus if function A calls B, if graph-time is set, then the time for
A includes B. This is the default behavior. But if graph-time is off,
then the time spent executing B is subtracted from A.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/ftrace.h               |  3 +--
 kernel/trace/ftrace.c                | 21 ++++++++++++++++++++-
 kernel/trace/trace.c                 |  4 +++-
 kernel/trace/trace.h                 |  1 +
 kernel/trace/trace_functions_graph.c |  8 ++++----
 5 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 015a3d22cf7..9e0a8d245e5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -365,6 +365,7 @@ struct ftrace_ret_stack {
 	unsigned long ret;
 	unsigned long func;
 	unsigned long long calltime;
+	unsigned long long subtime;
 };
 
 /*
@@ -376,8 +377,6 @@ extern void return_to_handler(void);
 
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
-extern void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
 
 /*
  * Sometimes we don't want to trace a function with the function
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ed1fc5021d4..71e5faef12a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
 static void profile_graph_return(struct ftrace_graph_ret *trace)
 {
 	struct ftrace_profile_stat *stat;
+	unsigned long long calltime;
 	struct ftrace_profile *rec;
 	unsigned long flags;
 
@@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 	if (!stat->hash)
 		goto out;
 
+	calltime = trace->rettime - trace->calltime;
+
+	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
+		int index;
+
+		index = trace->depth;
+
+		/* Append this call time to the parent time to subtract */
+		if (index)
+			current->ret_stack[index - 1].subtime += calltime;
+
+		if (current->ret_stack[index].subtime < calltime)
+			calltime -= current->ret_stack[index].subtime;
+		else
+			calltime = 0;
+	}
+
 	rec = ftrace_find_profiled_func(stat, trace->func);
 	if (rec)
-		rec->time += trace->rettime - trace->calltime;
+		rec->time += calltime;
+
  out:
 	local_irq_restore(flags);
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 821bf49771d..5d1a16cae37 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
+	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
+	TRACE_ITER_GRAPH_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +318,7 @@ static const char *trace_options[] = {
 	"latency-format",
 	"global-clock",
 	"sleep-time",
+	"graph-time",
 	NULL
 };
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c66ca3b6605..e3429a8ab05 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -685,6 +685,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_LATENCY_FMT		= 0x40000,
 	TRACE_ITER_GLOBAL_CLK		= 0x80000,
 	TRACE_ITER_SLEEP_TIME		= 0x100000,
+	TRACE_ITER_GRAPH_TIME		= 0x200000,
 };
 
 /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 85bba0f018b..10f6ad7d85f 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
+	current->ret_stack[index].subtime = 0;
 	*depth = index;
 
 	return 0;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
-void
+static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 {
 	int index;
@@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
 	trace->depth = index;
-	barrier();
-	current->curr_ret_stack--;
-
 }
 
 /*
@@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void)
 	ftrace_pop_return_trace(&trace, &ret);
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
+	barrier();
+	current->curr_ret_stack--;
 
 	if (unlikely(!ret)) {
 		ftrace_graph_stop();
-- 
cgit v1.2.3-70-g09d2


From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 17 Feb 2009 23:24:03 -0800
Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with
 arch_start_context_switch

Impact: simplification, prepare for later changes

Make lazy cpu mode more specific to context switching, so that
it makes sense to do more context-switch specific things in
the callbacks.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/include/asm/paravirt.h |  8 +++-----
 arch/x86/kernel/paravirt.c      | 13 -------------
 arch/x86/kernel/process_32.c    |  2 +-
 arch/x86/kernel/process_64.c    |  2 +-
 arch/x86/xen/mmu.c              |  5 +----
 include/asm-frv/pgtable.h       |  4 ++--
 include/asm-generic/pgtable.h   | 21 +++++++++++----------
 kernel/sched.c                  |  2 +-
 8 files changed, 20 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0617d5cc971..7b28abac323 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1420,19 +1420,17 @@ void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
 void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
 
-#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-static inline void arch_enter_lazy_cpu_mode(void)
+#define  __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
 }
 
-static inline void arch_leave_lazy_cpu_mode(void)
+static inline void arch_end_context_switch(void)
 {
 	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
 }
 
-void arch_flush_lazy_cpu_mode(void);
-
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
 {
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8ab250ac498..5eea9548216 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -301,19 +301,6 @@ void arch_flush_lazy_mmu_mode(void)
 	preempt_enable();
 }
 
-void arch_flush_lazy_cpu_mode(void)
-{
-	preempt_disable();
-
-	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-		WARN_ON(preempt_count() == 1);
-		arch_leave_lazy_cpu_mode();
-		arch_enter_lazy_cpu_mode();
-	}
-
-	preempt_enable();
-}
-
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 14014d766ca..57e49a8278a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch();
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index abb7e6a7f0c..7115e608532 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch();
 
 	/*
 	 * Switch FS and GS.
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4ec95..6b98f87232a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1119,10 +1119,8 @@ static void drop_other_mm_ref(void *info)
 
 	/* If this cpu still has a stale cr3 reference, then make sure
 	   it has been flushed. */
-	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
+	if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
 		load_cr3(swapper_pg_dir);
-		arch_flush_lazy_cpu_mode();
-	}
 }
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1135,7 +1133,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
 			load_cr3(swapper_pg_dir);
 		else
 			leave_mm(smp_processor_id());
-		arch_flush_lazy_cpu_mode();
 	}
 
 	/* Get the "official" set of cpus referring to our pagetable. */
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index e16fdb1f4f4..235e34a7a34 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; }
 #define pgtable_cache_init()		do {} while (0)
 #define arch_enter_lazy_mmu_mode()	do {} while (0)
 #define arch_leave_lazy_mmu_mode()	do {} while (0)
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
+
+#define arch_start_context_switch()	do {} while (0)
 
 #else /* !CONFIG_MMU */
 /*****************************************************************************/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8e6d0ca70ab..922f03671dd 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 #endif
 
 /*
- * A facility to provide batching of the reload of page tables with the
- * actual context switch code for paravirtualized guests.  By convention,
- * only one of the lazy modes (CPU, MMU) should be active at any given
- * time, entry should never be nested, and entry and exits should always
- * be paired.  This is for sanity of maintaining and reasoning about the
- * kernel code.
+ * A facility to provide batching of the reload of page tables and
+ * other process state with the actual context switch code for
+ * paravirtualized guests.  By convention, only one of the batched
+ * update (lazy) modes (CPU, MMU) should be active at any given time,
+ * entry should never be nested, and entry and exits should always be
+ * paired.  This is for sanity of maintaining and reasoning about the
+ * kernel code.  In this case, the exit (end of the context switch) is
+ * in architecture-specific code, and so doesn't need a generic
+ * definition.
  */
-#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
-#define arch_flush_lazy_cpu_mode()	do {} while (0)
+#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
+#define arch_start_context_switch()	do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/kernel/sched.c b/kernel/sched.c
index 5757e03cfac..7530fdd7c98 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * combine the page table reload and the switch backend into
 	 * one hypercall.
 	 */
-	arch_enter_lazy_cpu_mode();
+	arch_start_context_switch();
 
 	if (unlikely(!mm)) {
 		next->active_mm = oldmm;
-- 
cgit v1.2.3-70-g09d2


From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 18 Feb 2009 11:18:57 -0800
Subject: x86/paravirt: finish change from lazy cpu to context switch start/end

Impact: fix lazy context switch API

Pass the previous and next tasks into the context switch start
end calls, so that the called functions can properly access the
task state (esp in end_context_switch, in which the next task
is not yet completely current).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/x86/include/asm/paravirt.h | 17 ++++++++++-------
 arch/x86/include/asm/pgtable.h  |  2 ++
 arch/x86/kernel/paravirt.c      | 14 ++++++--------
 arch/x86/kernel/process_32.c    |  2 +-
 arch/x86/kernel/process_64.c    |  2 +-
 arch/x86/kernel/vmi_32.c        | 12 ++++++------
 arch/x86/lguest/boot.c          |  8 ++++----
 arch/x86/xen/enlighten.c        | 10 ++++------
 include/asm-frv/pgtable.h       |  2 +-
 include/asm-generic/pgtable.h   |  2 +-
 kernel/sched.c                  |  2 +-
 11 files changed, 37 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 58d2481b01a..dfdee0ca57d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -56,6 +56,7 @@ struct desc_ptr;
 struct tss_struct;
 struct mm_struct;
 struct desc_struct;
+struct task_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
 
 	void (*swapgs)(void);
 
-	struct pv_lazy_ops lazy_mode;
+	void (*start_context_switch)(struct task_struct *prev);
+	void (*end_context_switch)(struct task_struct *next);
 };
 
 struct pv_irq_ops {
@@ -1414,20 +1416,21 @@ enum paravirt_lazy_mode {
 };
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_enter_lazy_cpu(void);
-void paravirt_leave_lazy_cpu(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
 
 #define  __HAVE_ARCH_START_CONTEXT_SWITCH
-static inline void arch_start_context_switch(void)
+static inline void arch_start_context_switch(struct task_struct *prev)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
+	PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
 }
 
-static inline void arch_end_context_switch(void)
+static inline void arch_end_context_switch(struct task_struct *next)
 {
-	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
+	PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
 }
 
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d0812e155f1..24e42836e92 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -83,6 +83,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
 #define pte_val(x)	native_pte_val(x)
 #define __pte(x)	native_make_pte(x)
 
+#define arch_end_context_switch(prev)	do {} while(0)
+
 #endif	/* CONFIG_PARAVIRT */
 
 /*
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 430a0e30577..cf1437503ba 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -270,20 +270,20 @@ void paravirt_leave_lazy_mmu(void)
 	leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
-void paravirt_enter_lazy_cpu(void)
+void paravirt_start_context_switch(struct task_struct *prev)
 {
 	if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
 		arch_leave_lazy_mmu_mode();
-		set_thread_flag(TIF_LAZY_MMU_UPDATES);
+		set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
 	}
 	enter_lazy(PARAVIRT_LAZY_CPU);
 }
 
-void paravirt_leave_lazy_cpu(void)
+void paravirt_end_context_switch(struct task_struct *next)
 {
 	leave_lazy(PARAVIRT_LAZY_CPU);
 
-	if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES))
+	if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
 		arch_enter_lazy_mmu_mode();
 }
 
@@ -399,10 +399,8 @@ struct pv_cpu_ops pv_cpu_ops = {
 	.set_iopl_mask = native_set_iopl_mask,
 	.io_delay = native_io_delay,
 
-	.lazy_mode = {
-		.enter = paravirt_nop,
-		.leave = paravirt_nop,
-	},
+	.start_context_switch = paravirt_nop,
+	.end_context_switch = paravirt_nop,
 };
 
 struct pv_apic_ops pv_apic_ops = {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 57e49a8278a..d766c7616fd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_end_context_switch();
+	arch_end_context_switch(next_p);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7115e608532..e8a9aaf9df8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_end_context_switch();
+	arch_end_context_switch(next_p);
 
 	/*
 	 * Switch FS and GS.
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 950929c607d..55a5d6938e5 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -467,16 +467,16 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 }
 #endif
 
-static void vmi_enter_lazy_cpu(void)
+static void vmi_start_context_switch(struct task_struct *prev)
 {
-	paravirt_enter_lazy_cpu();
+	paravirt_start_context_switch(prev);
 	vmi_ops.set_lazy_mode(2);
 }
 
-static void vmi_leave_lazy_cpu(void)
+static void vmi_end_context_switch(struct task_struct *next)
 {
 	vmi_ops.set_lazy_mode(0);
-	paravirt_leave_lazy_cpu();
+	paravirt_end_context_switch(next);
 }
 
 static void vmi_enter_lazy_mmu(void)
@@ -722,9 +722,9 @@ static inline int __init activate_vmi(void)
 	para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
 	para_fill(pv_cpu_ops.io_delay, IODelay);
 
-	para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+	para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
 		  set_lazy_mode, SetLazyMode);
-	para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu,
+	para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
 		  set_lazy_mode, SetLazyMode);
 
 	para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 41a5562e710..5287081b356 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -153,10 +153,10 @@ static void lguest_leave_lazy_mmu_mode(void)
 	paravirt_leave_lazy_mmu();
 }
 
-static void lguest_leave_lazy_cpu_mode(void)
+static void lguest_end_context_switch(struct task_struct *next)
 {
 	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
-	paravirt_leave_lazy_cpu();
+	paravirt_end_context_switch(next);
 }
 
 /*G:033
@@ -1031,8 +1031,8 @@ __init void lguest_init(void)
 	pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
 	pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
 	pv_cpu_ops.wbinvd = lguest_wbinvd;
-	pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
-	pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode;
+	pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
+	pv_cpu_ops.end_context_switch = lguest_end_context_switch;
 
 	/* pagetable management */
 	pv_mmu_ops.write_cr3 = lguest_write_cr3;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f586e63b9a6..70b355d3a86 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg)
 	return HYPERVISOR_get_debugreg(reg);
 }
 
-static void xen_leave_lazy_cpu(void)
+static void xen_end_context_switch(struct task_struct *next)
 {
 	xen_mc_flush();
-	paravirt_leave_lazy_cpu();
+	paravirt_end_context_switch(next);
 }
 
 static unsigned long xen_store_tr(void)
@@ -817,10 +817,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	/* Xen takes care of %gs when switching to usermode for us */
 	.swapgs = paravirt_nop,
 
-	.lazy_mode = {
-		.enter = paravirt_enter_lazy_cpu,
-		.leave = xen_leave_lazy_cpu,
-	},
+	.start_context_switch = paravirt_start_context_switch,
+	.end_context_switch = xen_end_context_switch,
 };
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 235e34a7a34..09887045d03 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -74,7 +74,7 @@ static inline int pte_file(pte_t pte) { return 0; }
 #define arch_enter_lazy_mmu_mode()	do {} while (0)
 #define arch_leave_lazy_mmu_mode()	do {} while (0)
 
-#define arch_start_context_switch()	do {} while (0)
+#define arch_start_context_switch(prev)	do {} while (0)
 
 #else /* !CONFIG_MMU */
 /*****************************************************************************/
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 922f03671dd..e410f602cab 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -291,7 +291,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
  * definition.
  */
 #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
-#define arch_start_context_switch()	do {} while (0)
+#define arch_start_context_switch(prev)	do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/kernel/sched.c b/kernel/sched.c
index 7530fdd7c98..133762aece5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * combine the page table reload and the switch backend into
 	 * one hypercall.
 	 */
-	arch_start_context_switch();
+	arch_start_context_switch(prev);
 
 	if (unlikely(!mm)) {
 		next->active_mm = oldmm;
-- 
cgit v1.2.3-70-g09d2


From d4c045364d3107603187f21a56ec231e74d26441 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:20:31 -0800
Subject: xen: add irq_from_evtchn

Given an evtchn, return the corresponding irq.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/events.c | 6 ++++++
 include/xen/events.h | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af5dba..1cd2a0e15ae 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq)
 	return info_for_irq(irq)->evtchn;
 }
 
+unsigned irq_from_evtchn(unsigned int evtchn)
+{
+	return evtchn_to_irq[evtchn];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1adc036..e68d59a90ca 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq);
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq);
 
+/* Determine the IRQ which is bound to an event channel */
+unsigned irq_from_evtchn(unsigned int evtchn);
+
 #endif	/* _XEN_EVENTS_H */
-- 
cgit v1.2.3-70-g09d2


From f7116284c734f3a47180cd9c907944a1837ccb3c Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:21:19 -0800
Subject: xen: add /dev/xen/evtchn driver

This driver is used by application which wish to receive notifications
from the hypervisor or other guests via Xen's event channel
mechanism. In particular it is used by the xenstore daemon in domain
0.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/Kconfig  |  10 ++
 drivers/xen/Makefile |   3 +-
 drivers/xen/evtchn.c | 494 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/evtchn.h |  88 +++++++++
 4 files changed, 594 insertions(+), 1 deletion(-)
 create mode 100644 drivers/xen/evtchn.c
 create mode 100644 include/xen/evtchn.h

(limited to 'include')

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 526187c8a12..1bbb9108f31 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES
 	  secure, but slightly less efficient.
 	  If in doubt, say yes.
 
+config XEN_DEV_EVTCHN
+	tristate "Xen /dev/xen/evtchn device"
+	depends on XEN
+	default y
+	help
+	  The evtchn driver allows a userspace process to triger event
+	  channels and to receive notification of an event channel
+	  firing.
+	  If in doubt, say yes.
+
 config XENFS
 	tristate "Xen filesystem"
 	depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc9e10..1567639847e 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,5 @@ obj-y	+= xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
+obj-$(CONFIG_XENFS)		+= xenfs/
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644
index 00000000000..517b9ee63e1
--- /dev/null
+++ b/drivers/xen/evtchn.c
@@ -0,0 +1,494 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004-2005, K A Fraser
+ * Multi-process extensions Copyright (c) 2004, Steven Smith
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/major.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/poll.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+
+struct per_user_data {
+	/* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+	evtchn_port_t *ring;
+	unsigned int ring_cons, ring_prod, ring_overflow;
+	struct mutex ring_cons_mutex; /* protect against concurrent readers */
+
+	/* Processes wait on this queue when ring is empty. */
+	wait_queue_head_t evtchn_wait;
+	struct fasync_struct *evtchn_async_queue;
+	const char *name;
+};
+
+/* Who's bound to each port? */
+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+static DEFINE_SPINLOCK(port_user_lock);
+
+irqreturn_t evtchn_interrupt(int irq, void *data)
+{
+	unsigned int port = (unsigned long)data;
+	struct per_user_data *u;
+
+	spin_lock(&port_user_lock);
+
+	u = port_user[port];
+
+	disable_irq_nosync(irq);
+
+	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+		wmb(); /* Ensure ring contents visible */
+		if (u->ring_cons == u->ring_prod++) {
+			wake_up_interruptible(&u->evtchn_wait);
+			kill_fasync(&u->evtchn_async_queue,
+				    SIGIO, POLL_IN);
+		}
+	} else {
+		u->ring_overflow = 1;
+	}
+
+	spin_unlock(&port_user_lock);
+
+	return IRQ_HANDLED;
+}
+
+static ssize_t evtchn_read(struct file *file, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	int rc;
+	unsigned int c, p, bytes1 = 0, bytes2 = 0;
+	struct per_user_data *u = file->private_data;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	if (count == 0)
+		return 0;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	for (;;) {
+		mutex_lock(&u->ring_cons_mutex);
+
+		rc = -EFBIG;
+		if (u->ring_overflow)
+			goto unlock_out;
+
+		c = u->ring_cons;
+		p = u->ring_prod;
+		if (c != p)
+			break;
+
+		mutex_unlock(&u->ring_cons_mutex);
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		rc = wait_event_interruptible(u->evtchn_wait,
+					      u->ring_cons != u->ring_prod);
+		if (rc)
+			return rc;
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+			sizeof(evtchn_port_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+	} else {
+		bytes1 = (p - c) * sizeof(evtchn_port_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if (bytes1 > count) {
+		bytes1 = count;
+		bytes2 = 0;
+	} else if ((bytes1 + bytes2) > count) {
+		bytes2 = count - bytes1;
+	}
+
+	rc = -EFAULT;
+	rmb(); /* Ensure that we see the port before we copy it. */
+	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+	    ((bytes2 != 0) &&
+	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+		goto unlock_out;
+
+	u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+	rc = bytes1 + bytes2;
+
+ unlock_out:
+	mutex_unlock(&u->ring_cons_mutex);
+	return rc;
+}
+
+static ssize_t evtchn_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	int rc, i;
+	evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	struct per_user_data *u = file->private_data;
+
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	/* Whole number of ports. */
+	count &= ~(sizeof(evtchn_port_t)-1);
+
+	rc = 0;
+	if (count == 0)
+		goto out;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+
+	rc = -EFAULT;
+	if (copy_from_user(kbuf, buf, count) != 0)
+		goto out;
+
+	spin_lock_irq(&port_user_lock);
+	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
+		if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+			enable_irq(irq_from_evtchn(kbuf[i]));
+	spin_unlock_irq(&port_user_lock);
+
+	rc = count;
+
+ out:
+	free_page((unsigned long)kbuf);
+	return rc;
+}
+
+static int evtchn_bind_to_user(struct per_user_data *u, int port)
+{
+	int irq;
+	int rc = 0;
+
+	spin_lock_irq(&port_user_lock);
+
+	BUG_ON(port_user[port] != NULL);
+
+	irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+					u->name, (void *)(unsigned long)port);
+	if (rc < 0)
+		goto fail;
+
+	port_user[port] = u;
+
+fail:
+	spin_unlock_irq(&port_user_lock);
+	return rc;
+}
+
+static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+{
+	int irq = irq_from_evtchn(port);
+
+	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+	port_user[port] = NULL;
+}
+
+static long evtchn_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	int rc;
+	struct per_user_data *u = file->private_data;
+	void __user *uarg = (void __user *) arg;
+
+	switch (cmd) {
+	case IOCTL_EVTCHN_BIND_VIRQ: {
+		struct ioctl_evtchn_bind_virq bind;
+		struct evtchn_bind_virq bind_virq;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_virq.virq = bind.virq;
+		bind_virq.vcpu = 0;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						 &bind_virq);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_virq.port);
+		if (rc == 0)
+			rc = bind_virq.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
+		struct ioctl_evtchn_bind_interdomain bind;
+		struct evtchn_bind_interdomain bind_interdomain;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		bind_interdomain.remote_dom  = bind.remote_domain;
+		bind_interdomain.remote_port = bind.remote_port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+						 &bind_interdomain);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+		if (rc == 0)
+			rc = bind_interdomain.local_port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
+		struct ioctl_evtchn_bind_unbound_port bind;
+		struct evtchn_alloc_unbound alloc_unbound;
+
+		rc = -EFAULT;
+		if (copy_from_user(&bind, uarg, sizeof(bind)))
+			break;
+
+		alloc_unbound.dom        = DOMID_SELF;
+		alloc_unbound.remote_dom = bind.remote_domain;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+						 &alloc_unbound);
+		if (rc != 0)
+			break;
+
+		rc = evtchn_bind_to_user(u, alloc_unbound.port);
+		if (rc == 0)
+			rc = alloc_unbound.port;
+		break;
+	}
+
+	case IOCTL_EVTCHN_UNBIND: {
+		struct ioctl_evtchn_unbind unbind;
+
+		rc = -EFAULT;
+		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
+			break;
+
+		rc = -EINVAL;
+		if (unbind.port >= NR_EVENT_CHANNELS)
+			break;
+
+		spin_lock_irq(&port_user_lock);
+
+		rc = -ENOTCONN;
+		if (port_user[unbind.port] != u) {
+			spin_unlock_irq(&port_user_lock);
+			break;
+		}
+
+		evtchn_unbind_from_user(u, unbind.port);
+
+		spin_unlock_irq(&port_user_lock);
+
+		rc = 0;
+		break;
+	}
+
+	case IOCTL_EVTCHN_NOTIFY: {
+		struct ioctl_evtchn_notify notify;
+
+		rc = -EFAULT;
+		if (copy_from_user(&notify, uarg, sizeof(notify)))
+			break;
+
+		if (notify.port >= NR_EVENT_CHANNELS) {
+			rc = -EINVAL;
+		} else if (port_user[notify.port] != u) {
+			rc = -ENOTCONN;
+		} else {
+			notify_remote_via_evtchn(notify.port);
+			rc = 0;
+		}
+		break;
+	}
+
+	case IOCTL_EVTCHN_RESET: {
+		/* Initialise the ring to empty. Clear errors. */
+		mutex_lock(&u->ring_cons_mutex);
+		spin_lock_irq(&port_user_lock);
+		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+		spin_unlock_irq(&port_user_lock);
+		mutex_unlock(&u->ring_cons_mutex);
+		rc = 0;
+		break;
+	}
+
+	default:
+		rc = -ENOSYS;
+		break;
+	}
+
+	return rc;
+}
+
+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask = POLLOUT | POLLWRNORM;
+	struct per_user_data *u = file->private_data;
+
+	poll_wait(file, &u->evtchn_wait, wait);
+	if (u->ring_cons != u->ring_prod)
+		mask |= POLLIN | POLLRDNORM;
+	if (u->ring_overflow)
+		mask = POLLERR;
+	return mask;
+}
+
+static int evtchn_fasync(int fd, struct file *filp, int on)
+{
+	struct per_user_data *u = filp->private_data;
+	return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+}
+
+static int evtchn_open(struct inode *inode, struct file *filp)
+{
+	struct per_user_data *u;
+
+	u = kzalloc(sizeof(*u), GFP_KERNEL);
+	if (u == NULL)
+		return -ENOMEM;
+
+	u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+	if (u->name == NULL) {
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	init_waitqueue_head(&u->evtchn_wait);
+
+	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+	if (u->ring == NULL) {
+		kfree(u->name);
+		kfree(u);
+		return -ENOMEM;
+	}
+
+	mutex_init(&u->ring_cons_mutex);
+
+	filp->private_data = u;
+
+	return 0;
+}
+
+static int evtchn_release(struct inode *inode, struct file *filp)
+{
+	int i;
+	struct per_user_data *u = filp->private_data;
+
+	spin_lock_irq(&port_user_lock);
+
+	free_page((unsigned long)u->ring);
+
+	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (port_user[i] != u)
+			continue;
+
+		evtchn_unbind_from_user(port_user[i], i);
+	}
+
+	spin_unlock_irq(&port_user_lock);
+
+	kfree(u->name);
+	kfree(u);
+
+	return 0;
+}
+
+static const struct file_operations evtchn_fops = {
+	.owner   = THIS_MODULE,
+	.read    = evtchn_read,
+	.write   = evtchn_write,
+	.unlocked_ioctl = evtchn_ioctl,
+	.poll    = evtchn_poll,
+	.fasync  = evtchn_fasync,
+	.open    = evtchn_open,
+	.release = evtchn_release,
+};
+
+static struct miscdevice evtchn_miscdev = {
+	.minor        = MISC_DYNAMIC_MINOR,
+	.name         = "evtchn",
+	.fops         = &evtchn_fops,
+};
+static int __init evtchn_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	spin_lock_init(&port_user_lock);
+	memset(port_user, 0, sizeof(port_user));
+
+	/* Create '/dev/misc/evtchn'. */
+	err = misc_register(&evtchn_miscdev);
+	if (err != 0) {
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	printk(KERN_INFO "Event-channel device installed.\n");
+
+	return 0;
+}
+
+static void __exit evtchn_cleanup(void)
+{
+	misc_deregister(&evtchn_miscdev);
+}
+
+module_init(evtchn_init);
+module_exit(evtchn_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644
index 00000000000..14e833ee4e0
--- /dev/null
+++ b/include/xen/evtchn.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/evtchn.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_EVTCHN_H__
+#define __LINUX_PUBLIC_EVTCHN_H__
+
+/*
+ * Bind a fresh port to VIRQ @virq.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_VIRQ				\
+	_IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
+struct ioctl_evtchn_bind_virq {
+	unsigned int virq;
+};
+
+/*
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\
+	_IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
+struct ioctl_evtchn_bind_interdomain {
+	unsigned int remote_domain, remote_port;
+};
+
+/*
+ * Allocate a fresh port for binding to @remote_domain.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\
+	_IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
+struct ioctl_evtchn_bind_unbound_port {
+	unsigned int remote_domain;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_UNBIND				\
+	_IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
+struct ioctl_evtchn_unbind {
+	unsigned int port;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY				\
+	_IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
+struct ioctl_evtchn_notify {
+	unsigned int port;
+};
+
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET				\
+	_IOC(_IOC_NONE, 'E', 5, 0)
+
+#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
-- 
cgit v1.2.3-70-g09d2


From c5cfef0f79cacc3aa438fc28f4747f0d10c54d0d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 6 Feb 2009 19:21:19 -0800
Subject: xen: export ioctl headers to userspace

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/Kbuild     | 1 +
 include/xen/Kbuild | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 include/xen/Kbuild

(limited to 'include')

diff --git a/include/Kbuild b/include/Kbuild
index d8c3e3cbf41..fe36accd432 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -8,3 +8,4 @@ header-y += mtd/
 header-y += rdma/
 header-y += video/
 header-y += drm/
+header-y += xen/
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644
index 00000000000..4e65c16a445
--- /dev/null
+++ b/include/xen/Kbuild
@@ -0,0 +1 @@
+header-y += evtchn.h
-- 
cgit v1.2.3-70-g09d2


From a1ce1be578365a4da7e7d7db4812539d2d5da763 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:50 -0800
Subject: xen: remove suspend_cancel hook

Remove suspend_cancel hook from xenbus_driver, in preparation for using
the device model for suspending.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/xenbus/xenbus_probe.c | 23 -----------------------
 include/xen/xenbus.h              |  1 -
 2 files changed, 24 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf2328..bd20361fb09 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -689,27 +689,6 @@ static int suspend_dev(struct device *dev, void *data)
 	return 0;
 }
 
-static int suspend_cancel_dev(struct device *dev, void *data)
-{
-	int err = 0;
-	struct xenbus_driver *drv;
-	struct xenbus_device *xdev;
-
-	DPRINTK("");
-
-	if (dev->driver == NULL)
-		return 0;
-	drv = to_xenbus_driver(dev->driver);
-	xdev = container_of(dev, struct xenbus_device, dev);
-	if (drv->suspend_cancel)
-		err = drv->suspend_cancel(xdev);
-	if (err)
-		printk(KERN_WARNING
-		       "xenbus: suspend_cancel %s failed: %i\n",
-		       dev_name(dev), err);
-	return 0;
-}
-
 static int resume_dev(struct device *dev, void *data)
 {
 	int err;
@@ -777,8 +756,6 @@ EXPORT_SYMBOL_GPL(xenbus_resume);
 void xenbus_suspend_cancel(void)
 {
 	xs_suspend_cancel();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
-	xenbus_backend_resume(suspend_cancel_dev);
 }
 EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
 
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index f87f9614844..0836772b968 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -92,7 +92,6 @@ struct xenbus_driver {
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
 	int (*suspend)(struct xenbus_device *dev);
-	int (*suspend_cancel)(struct xenbus_device *dev);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
-- 
cgit v1.2.3-70-g09d2


From de5b31bd47de7e6f41be2e271318dbc8f1af354d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:50 -0800
Subject: xen: use device model for suspending xenbus devices

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/manage.c              |  9 ++++-----
 drivers/xen/xenbus/xenbus_probe.c | 37 +++++++++----------------------------
 drivers/xen/xenbus/xenbus_xs.c    |  2 ++
 include/xen/xenbus.h              |  2 +-
 4 files changed, 16 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index b703dd2c9f1..5269bb4d249 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -104,9 +104,8 @@ static void do_suspend(void)
 		goto out;
 	}
 
-	printk("suspending xenbus...\n");
-	/* XXX use normal device tree? */
-	xenbus_suspend();
+	printk(KERN_DEBUG "suspending xenstore...\n");
+	xs_suspend();
 
 	err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
 	if (err) {
@@ -116,9 +115,9 @@ static void do_suspend(void)
 
 	if (!cancelled) {
 		xen_arch_resume();
-		xenbus_resume();
+		xs_resume();
 	} else
-		xenbus_suspend_cancel();
+		xs_suspend_cancel();
 
 	device_resume(PMSG_RESUME);
 
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd20361fb09..4649213bed9 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name);
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = {
 		.remove    = xenbus_dev_remove,
 		.shutdown  = xenbus_dev_shutdown,
 		.dev_attrs = xenbus_dev_attrs,
+
+		.suspend   = xenbus_dev_suspend,
+		.resume    = xenbus_dev_resume,
 	},
 };
 
@@ -669,7 +675,7 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };
 
-static int suspend_dev(struct device *dev, void *data)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -682,14 +688,14 @@ static int suspend_dev(struct device *dev, void *data)
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
-		err = drv->suspend(xdev);
+		err = drv->suspend(xdev, state);
 	if (err)
 		printk(KERN_WARNING
 		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
 	return 0;
 }
 
-static int resume_dev(struct device *dev, void *data)
+static int xenbus_dev_resume(struct device *dev)
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -734,31 +740,6 @@ static int resume_dev(struct device *dev, void *data)
 	return 0;
 }
 
-void xenbus_suspend(void)
-{
-	DPRINTK("");
-
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
-	xenbus_backend_suspend(suspend_dev);
-	xs_suspend();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend);
-
-void xenbus_resume(void)
-{
-	xb_init_comms();
-	xs_resume();
-	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
-	xenbus_backend_resume(resume_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_resume);
-
-void xenbus_suspend_cancel(void)
-{
-	xs_suspend_cancel();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
-
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
 int xenstored_ready = 0;
 
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e325eab4724..eab33f1dbdf 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -673,6 +673,8 @@ void xs_resume(void)
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
+	xb_init_comms();
+
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	up_write(&xs_state.transaction_mutex);
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 0836772b968..b9763badbd7 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -91,7 +91,7 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
-	int (*suspend)(struct xenbus_device *dev);
+	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
-- 
cgit v1.2.3-70-g09d2


From cff7e81b3dd7c25cd2248cd7a04c5764552d5d55 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 10 Mar 2009 14:39:59 -0700
Subject: xen: add /sys/hypervisor support

Adds support for Xen info under /sys/hypervisor.  Taken from Novell 2.6.27
backport tree.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/Kconfig             |  10 +
 drivers/xen/Makefile            |   3 +-
 drivers/xen/sys-hypervisor.c    | 475 ++++++++++++++++++++++++++++++++++++++++
 include/xen/interface/version.h |   3 +
 4 files changed, 490 insertions(+), 1 deletion(-)
 create mode 100644 drivers/xen/sys-hypervisor.c

(limited to 'include')

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 526187c8a12..88bca1c42db 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -41,3 +41,13 @@ config XEN_COMPAT_XENFS
          a xen platform.
          If in doubt, say yes.
 
+config XEN_SYS_HYPERVISOR
+       bool "Create xen entries under /sys/hypervisor"
+       depends on XEN && SYSFS
+       select SYS_HYPERVISOR
+       default y
+       help
+         Create entries under /sys/hypervisor describing the Xen
+	 hypervisor environment.  When running native or in another
+	 virtual environment, /sys/hypervisor will still be present,
+	 but will have no xen contents.
\ No newline at end of file
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index ff8accc9e10..f3603a39db5 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -4,4 +4,5 @@ obj-y	+= xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XENFS)		+= xenfs/
\ No newline at end of file
+obj-$(CONFIG_XENFS)		+= xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644
index 00000000000..cb29d1ccf0d
--- /dev/null
+++ b/drivers/xen/sys-hypervisor.c
@@ -0,0 +1,475 @@
+/*
+ *  copyright (c) 2006 IBM Corporation
+ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xenbus.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
+
+#define HYPERVISOR_ATTR_RO(_name) \
+static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
+
+#define HYPERVISOR_ATTR_RW(_name) \
+static struct hyp_sysfs_attr _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+struct hyp_sysfs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct hyp_sysfs_attr *, char *);
+	ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
+	void *hyp_attr_data;
+};
+
+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return sprintf(buffer, "xen\n");
+}
+
+HYPERVISOR_ATTR_RO(type);
+
+static int __init xen_sysfs_type_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
+}
+
+static void xen_sysfs_type_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
+}
+
+/* xen version attributes */
+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version >> 16);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(major);
+
+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+	if (version)
+		return sprintf(buffer, "%d\n", version & 0xff);
+	return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(minor);
+
+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *extra;
+
+	extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
+	if (extra) {
+		ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", extra);
+		kfree(extra);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(extra);
+
+static struct attribute *version_attrs[] = {
+	&major_attr.attr,
+	&minor_attr.attr,
+	&extra_attr.attr,
+	NULL
+};
+
+static struct attribute_group version_group = {
+	.name = "version",
+	.attrs = version_attrs,
+};
+
+static int __init xen_sysfs_version_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &version_group);
+}
+
+static void xen_sysfs_version_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &version_group);
+}
+
+/* UUID */
+
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	char *vm, *val;
+	int ret;
+	extern int xenstored_ready;
+
+	if (!xenstored_ready)
+		return -EBUSY;
+
+	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+	val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
+	kfree(vm);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+	ret = sprintf(buffer, "%s\n", val);
+	kfree(val);
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(uuid);
+
+static int __init xen_sysfs_uuid_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+static void xen_sysfs_uuid_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+/* xen compilation attributes */
+
+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compiler);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiler);
+
+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_by);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiled_by);
+
+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_compile_info *info;
+
+	info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+	if (info) {
+		ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", info->compile_date);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(compile_date);
+
+static struct attribute *xen_compile_attrs[] = {
+	&compiler_attr.attr,
+	&compiled_by_attr.attr,
+	&compile_date_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_compilation_group = {
+	.name = "compilation",
+	.attrs = xen_compile_attrs,
+};
+
+int __init static xen_compilation_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+static void xen_compilation_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+/* xen properties info */
+
+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *caps;
+
+	caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
+	if (caps) {
+		ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", caps);
+		kfree(caps);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(capabilities);
+
+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	char *cset;
+
+	cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
+	if (cset) {
+		ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
+		if (!ret)
+			ret = sprintf(buffer, "%s\n", cset);
+		kfree(cset);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(changeset);
+
+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_platform_parameters *parms;
+
+	parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
+	if (parms) {
+		ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
+					     parms);
+		if (!ret)
+			ret = sprintf(buffer, "%lx\n", parms->virt_start);
+		kfree(parms);
+	}
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(virtual_start);
+
+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	int ret;
+
+	ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
+	if (ret > 0)
+		ret = sprintf(buffer, "%x\n", ret);
+
+	return ret;
+}
+
+HYPERVISOR_ATTR_RO(pagesize);
+
+/* eventually there will be several more features to export */
+static ssize_t xen_feature_show(int index, char *buffer)
+{
+	int ret = -ENOMEM;
+	struct xen_feature_info *info;
+
+	info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
+	if (info) {
+		info->submap_idx = index;
+		ret = HYPERVISOR_xen_version(XENVER_get_features, info);
+		if (!ret)
+			ret = sprintf(buffer, "%d\n", info->submap);
+		kfree(info);
+	}
+
+	return ret;
+}
+
+static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	return xen_feature_show(XENFEAT_writable_page_tables, buffer);
+}
+
+HYPERVISOR_ATTR_RO(writable_pt);
+
+static struct attribute *xen_properties_attrs[] = {
+	&capabilities_attr.attr,
+	&changeset_attr.attr,
+	&virtual_start_attr.attr,
+	&pagesize_attr.attr,
+	&writable_pt_attr.attr,
+	NULL
+};
+
+static struct attribute_group xen_properties_group = {
+	.name = "properties",
+	.attrs = xen_properties_attrs,
+};
+
+static int __init xen_properties_init(void)
+{
+	return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static void xen_properties_destroy(void)
+{
+	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
+}
+
+#ifdef CONFIG_KEXEC
+
+extern size_t vmcoreinfo_size_xen;
+extern unsigned long paddr_vmcoreinfo_xen;
+
+static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
+{
+	return sprintf(page, "%lx %zx\n",
+		paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
+}
+
+HYPERVISOR_ATTR_RO(vmcoreinfo);
+
+static int __init xen_sysfs_vmcoreinfo_init(void)
+{
+	return sysfs_create_file(hypervisor_kobj,
+				 &vmcoreinfo_attr.attr);
+}
+
+static void xen_sysfs_vmcoreinfo_destroy(void)
+{
+	sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr);
+}
+
+#endif
+
+static int __init hyper_sysfs_init(void)
+{
+	int ret;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	ret = xen_sysfs_type_init();
+	if (ret)
+		goto out;
+	ret = xen_sysfs_version_init();
+	if (ret)
+		goto version_out;
+	ret = xen_compilation_init();
+	if (ret)
+		goto comp_out;
+	ret = xen_sysfs_uuid_init();
+	if (ret)
+		goto uuid_out;
+	ret = xen_properties_init();
+	if (ret)
+		goto prop_out;
+#ifdef CONFIG_KEXEC
+	if (vmcoreinfo_size_xen != 0) {
+		ret = xen_sysfs_vmcoreinfo_init();
+		if (ret)
+			goto vmcoreinfo_out;
+	}
+#endif
+
+	goto out;
+
+#ifdef CONFIG_KEXEC
+vmcoreinfo_out:
+#endif
+	xen_properties_destroy();
+prop_out:
+	xen_sysfs_uuid_destroy();
+uuid_out:
+	xen_compilation_destroy();
+comp_out:
+	xen_sysfs_version_destroy();
+version_out:
+	xen_sysfs_type_destroy();
+out:
+	return ret;
+}
+
+static void __exit hyper_sysfs_exit(void)
+{
+#ifdef CONFIG_KEXEC
+	if (vmcoreinfo_size_xen != 0)
+		xen_sysfs_vmcoreinfo_destroy();
+#endif
+	xen_properties_destroy();
+	xen_compilation_destroy();
+	xen_sysfs_uuid_destroy();
+	xen_sysfs_version_destroy();
+	xen_sysfs_type_destroy();
+
+}
+module_init(hyper_sysfs_init);
+module_exit(hyper_sysfs_exit);
+
+static ssize_t hyp_sysfs_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buffer)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->show)
+		return hyp_attr->show(hyp_attr, buffer);
+	return 0;
+}
+
+static ssize_t hyp_sysfs_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buffer,
+			       size_t len)
+{
+	struct hyp_sysfs_attr *hyp_attr;
+	hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+	if (hyp_attr->store)
+		return hyp_attr->store(hyp_attr, buffer, len);
+	return 0;
+}
+
+static struct sysfs_ops hyp_sysfs_ops = {
+	.show = hyp_sysfs_show,
+	.store = hyp_sysfs_store,
+};
+
+static struct kobj_type hyp_sysfs_kobj_type = {
+	.sysfs_ops = &hyp_sysfs_ops,
+};
+
+static int __init hypervisor_subsys_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
+	return 0;
+}
+device_initcall(hypervisor_subsys_init);
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index 453235e923f..e8b6519d47e 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -57,4 +57,7 @@ struct xen_feature_info {
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
 
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
-- 
cgit v1.2.3-70-g09d2


From 15dbf27cc18559a14e99609f78678aa86b9c6ff1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:32 +0100
Subject: perf_counter: software counter event infrastructure

Provide generic software counter infrastructure that supports
software events.

This will be used to allow sample based profiling based on software
events such as pagefaults. The current infrastructure can only
provide a count of such events, no place information.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |   8 +-
 kernel/perf_counter.c        | 201 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index dde564517b6..3fefc3b8150 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -126,6 +126,7 @@ struct hw_perf_counter {
 	unsigned long			counter_base;
 	int				nmi;
 	unsigned int			idx;
+	atomic64_t			count; /* software */
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
@@ -283,6 +284,8 @@ static inline int is_software_counter(struct perf_counter *counter)
 	return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
+extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -295,10 +298,13 @@ static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_notify(struct pt_regs *regs)		{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_counter_unthrottle(void)			{ }
-static inline void hw_perf_restore(u64 ctrl)			{ }
+static inline void hw_perf_restore(u64 ctrl)				{ }
 static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
+
+static inline void perf_swcounter_event(enum hw_event_types event, u64 nr,
+					int nmi, struct pt_regs *regs)	{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0fe22c916e2..eeb1b46cf70 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1328,6 +1328,185 @@ static const struct file_operations perf_fops = {
 	.compat_ioctl		= perf_ioctl,
 };
 
+/*
+ * Generic software counter infrastructure
+ */
+
+static void perf_swcounter_update(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 prev, now;
+	s64 delta;
+
+again:
+	prev = atomic64_read(&hwc->prev_count);
+	now = atomic64_read(&hwc->count);
+	if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
+		goto again;
+
+	delta = now - prev;
+
+	atomic64_add(delta, &counter->count);
+	atomic64_sub(delta, &hwc->period_left);
+}
+
+static void perf_swcounter_set_period(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->irq_period;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_add(period, &hwc->period_left);
+	}
+
+	atomic64_set(&hwc->prev_count, -left);
+	atomic64_set(&hwc->count, -left);
+}
+
+static void perf_swcounter_save_and_restart(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+	perf_swcounter_set_period(counter);
+}
+
+static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
+{
+	struct perf_data *irqdata = counter->irqdata;
+
+	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+		irqdata->overrun++;
+	} else {
+		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+		*p = data;
+		irqdata->len += sizeof(u64);
+	}
+}
+
+static void perf_swcounter_handle_group(struct perf_counter *sibling)
+{
+	struct perf_counter *counter, *group_leader = sibling->group_leader;
+
+	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
+		perf_swcounter_update(counter);
+		perf_swcounter_store_irq(sibling, counter->hw_event.type);
+		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
+	}
+}
+
+static void perf_swcounter_interrupt(struct perf_counter *counter,
+				     int nmi, struct pt_regs *regs)
+{
+	perf_swcounter_save_and_restart(counter);
+
+	switch (counter->hw_event.record_type) {
+	case PERF_RECORD_SIMPLE:
+		break;
+
+	case PERF_RECORD_IRQ:
+		perf_swcounter_store_irq(counter, instruction_pointer(regs));
+		break;
+
+	case PERF_RECORD_GROUP:
+		perf_swcounter_handle_group(counter);
+		break;
+	}
+
+	if (nmi) {
+		counter->wakeup_pending = 1;
+		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+	} else
+		wake_up(&counter->waitq);
+}
+
+static int perf_swcounter_match(struct perf_counter *counter,
+				enum hw_event_types event,
+				struct pt_regs *regs)
+{
+	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return 0;
+
+	if (counter->hw_event.raw)
+		return 0;
+
+	if (counter->hw_event.type != event)
+		return 0;
+
+	if (counter->hw_event.exclude_user && user_mode(regs))
+		return 0;
+
+	if (counter->hw_event.exclude_kernel && !user_mode(regs))
+		return 0;
+
+	return 1;
+}
+
+static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
+				     enum hw_event_types event, u64 nr,
+				     int nmi, struct pt_regs *regs)
+{
+	struct perf_counter *counter;
+	unsigned long flags;
+	int neg;
+
+	if (list_empty(&ctx->counter_list))
+		return;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+
+	/*
+	 * XXX: make counter_list RCU safe
+	 */
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (perf_swcounter_match(counter, event, regs)) {
+			neg = atomic64_add_negative(nr, &counter->hw.count);
+			if (counter->hw.irq_period && !neg)
+				perf_swcounter_interrupt(counter, nmi, regs);
+		}
+	}
+
+	spin_unlock_irqrestore(&ctx->lock, flags);
+}
+
+void perf_swcounter_event(enum hw_event_types event, u64 nr,
+			  int nmi, struct pt_regs *regs)
+{
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+
+	perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs);
+	if (cpuctx->task_ctx)
+		perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs);
+
+	put_cpu_var(perf_cpu_context);
+}
+
+static void perf_swcounter_read(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+}
+
+static int perf_swcounter_enable(struct perf_counter *counter)
+{
+	perf_swcounter_set_period(counter);
+	return 0;
+}
+
+static void perf_swcounter_disable(struct perf_counter *counter)
+{
+	perf_swcounter_update(counter);
+}
+
+/*
+ * Software counter: cpu wall time clock
+ */
+
 static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 {
 	int cpu = raw_smp_processor_id();
@@ -1364,6 +1543,10 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
 	.read		= cpu_clock_perf_counter_read,
 };
 
+/*
+ * Software counter: task time clock
+ */
+
 /*
  * Called from within the scheduler:
  */
@@ -1420,6 +1603,10 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
+/*
+ * Software counter: page faults
+ */
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 #define cpu_page_faults()	__get_cpu_var(vm_event_states).event[PGFAULT]
 #else
@@ -1473,6 +1660,10 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = {
 	.read		= page_faults_perf_counter_read,
 };
 
+/*
+ * Software counter: context switches
+ */
+
 static u64 get_context_switches(struct perf_counter *counter)
 {
 	struct task_struct *curr = counter->ctx->task;
@@ -1521,6 +1712,10 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = {
 	.read		= context_switches_perf_counter_read,
 };
 
+/*
+ * Software counter: cpu migrations
+ */
+
 static inline u64 get_cpu_migrations(struct perf_counter *counter)
 {
 	struct task_struct *curr = counter->ctx->task;
@@ -1572,7 +1767,9 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
+	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct hw_perf_counter_ops *hw_ops = NULL;
+	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -1618,6 +1815,10 @@ sw_perf_counter_init(struct perf_counter *counter)
 	default:
 		break;
 	}
+
+	if (hw_ops)
+		hwc->irq_period = hw_event->irq_period;
+
 	return hw_ops;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:34 +0100
Subject: perf_counter: provide major/minor page fault software events

Provide separate sw counters for major and minor page faults.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/mm/fault.c      |  5 ++++-
 arch/x86/mm/fault.c          |  7 +++++--
 include/linux/perf_counter.h |  4 +++-
 kernel/perf_counter.c        | 22 +++++++++-------------
 4 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index eda5b0ca4af..17bbf6f91fb 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -312,6 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
 			preempt_disable();
@@ -319,8 +320,10 @@ good_area:
 			preempt_enable();
 		}
 #endif
-	} else
+	} else {
 		current->min_flt++;
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+	}
 	up_read(&mm->mmap_sem);
 	return 0;
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c8725752b6c..f2d3324d921 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1140,10 +1140,13 @@ good_area:
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+	} else {
 		tsk->min_flt++;
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+	}
 
 	check_v8086_mode(regs, address, tsk);
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 3fefc3b8150..4b14a8e9dbf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -49,8 +49,10 @@ enum hw_event_types {
 	PERF_COUNT_PAGE_FAULTS		= -3,
 	PERF_COUNT_CONTEXT_SWITCHES	= -4,
 	PERF_COUNT_CPU_MIGRATIONS	= -5,
+	PERF_COUNT_PAGE_FAULTS_MIN	= -6,
+	PERF_COUNT_PAGE_FAULTS_MAJ	= -7,
 
-	PERF_SW_EVENTS_MIN		= -6,
+	PERF_SW_EVENTS_MIN		= -8,
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1773c5d7427..68950a3a52b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1503,6 +1503,12 @@ static void perf_swcounter_disable(struct perf_counter *counter)
 	perf_swcounter_update(counter);
 }
 
+static const struct hw_perf_counter_ops perf_ops_generic = {
+	.enable		= perf_swcounter_enable,
+	.disable	= perf_swcounter_disable,
+	.read		= perf_swcounter_read,
+};
+
 /*
  * Software counter: cpu wall time clock
  */
@@ -1603,16 +1609,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
-/*
- * Software counter: page faults
- */
-
-static const struct hw_perf_counter_ops perf_ops_page_faults = {
-	.enable		= perf_swcounter_enable,
-	.disable	= perf_swcounter_disable,
-	.read		= perf_swcounter_read,
-};
-
 /*
  * Software counter: context switches
  */
@@ -1753,9 +1749,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_cpu_clock;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
-		if (!(counter->hw_event.exclude_user ||
-		      counter->hw_event.exclude_kernel))
-			hw_ops = &perf_ops_page_faults;
+	case PERF_COUNT_PAGE_FAULTS_MIN:
+	case PERF_COUNT_PAGE_FAULTS_MAJ:
+		hw_ops = &perf_ops_generic;
 		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
 		if (!counter->hw_event.exclude_kernel)
-- 
cgit v1.2.3-70-g09d2


From d6d020e9957745c61285ef3da9f294c5e6801f0f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:35 +0100
Subject: perf_counter: hrtimer based sampling for software time events

Use hrtimers to profile timer based sampling for the software time
counters.

This allows platforms without hardware counter support to still
perform sample based profiling.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  20 ++++---
 kernel/perf_counter.c        | 123 ++++++++++++++++++++++++++++++-------------
 2 files changed, 100 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4b14a8e9dbf..dfb4c7ce18b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -114,6 +114,7 @@ struct perf_counter_hw_event {
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
+#include <linux/hrtimer.h>
 #include <asm/atomic.h>
 
 struct task_struct;
@@ -123,12 +124,19 @@ struct task_struct;
  */
 struct hw_perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
-	u64				config;
-	unsigned long			config_base;
-	unsigned long			counter_base;
-	int				nmi;
-	unsigned int			idx;
-	atomic64_t			count; /* software */
+	union {
+		struct { /* hardware */
+			u64				config;
+			unsigned long			config_base;
+			unsigned long			counter_base;
+			int				nmi;
+			unsigned int			idx;
+		};
+		union { /* software */
+			atomic64_t			count;
+			struct hrtimer			hrtimer;
+		};
+	};
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 68950a3a52b..f9330d5827c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1395,7 +1395,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
 	struct perf_counter *counter, *group_leader = sibling->group_leader;
 
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		perf_swcounter_update(counter);
+		counter->hw_ops->read(counter);
 		perf_swcounter_store_irq(sibling, counter->hw_event.type);
 		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
 	}
@@ -1404,8 +1404,6 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
 static void perf_swcounter_interrupt(struct perf_counter *counter,
 				     int nmi, struct pt_regs *regs)
 {
-	perf_swcounter_save_and_restart(counter);
-
 	switch (counter->hw_event.record_type) {
 	case PERF_RECORD_SIMPLE:
 		break;
@@ -1426,6 +1424,38 @@ static void perf_swcounter_interrupt(struct perf_counter *counter,
 		wake_up(&counter->waitq);
 }
 
+static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
+{
+	struct perf_counter *counter;
+	struct pt_regs *regs;
+
+	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
+	counter->hw_ops->read(counter);
+
+	regs = get_irq_regs();
+	/*
+	 * In case we exclude kernel IPs or are somehow not in interrupt
+	 * context, provide the next best thing, the user IP.
+	 */
+	if ((counter->hw_event.exclude_kernel || !regs) &&
+			!counter->hw_event.exclude_user)
+		regs = task_pt_regs(current);
+
+	if (regs)
+		perf_swcounter_interrupt(counter, 0, regs);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+
+	return HRTIMER_RESTART;
+}
+
+static void perf_swcounter_overflow(struct perf_counter *counter,
+				    int nmi, struct pt_regs *regs)
+{
+	perf_swcounter_save_and_restart(counter);
+	perf_swcounter_interrupt(counter, nmi, regs);
+}
+
 static int perf_swcounter_match(struct perf_counter *counter,
 				enum hw_event_types event,
 				struct pt_regs *regs)
@@ -1448,13 +1478,20 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	return 1;
 }
 
+static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
+			       int nmi, struct pt_regs *regs)
+{
+	int neg = atomic64_add_negative(nr, &counter->hw.count);
+	if (counter->hw.irq_period && !neg)
+		perf_swcounter_overflow(counter, nmi, regs);
+}
+
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum hw_event_types event, u64 nr,
 				     int nmi, struct pt_regs *regs)
 {
 	struct perf_counter *counter;
 	unsigned long flags;
-	int neg;
 
 	if (list_empty(&ctx->counter_list))
 		return;
@@ -1465,11 +1502,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 	 * XXX: make counter_list RCU safe
 	 */
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (perf_swcounter_match(counter, event, regs)) {
-			neg = atomic64_add_negative(nr, &counter->hw.count);
-			if (counter->hw.irq_period && !neg)
-				perf_swcounter_interrupt(counter, nmi, regs);
-		}
+		if (perf_swcounter_match(counter, event, regs))
+			perf_swcounter_add(counter, nr, nmi, regs);
 	}
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
@@ -1513,14 +1547,6 @@ static const struct hw_perf_counter_ops perf_ops_generic = {
  * Software counter: cpu wall time clock
  */
 
-static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
-	return 0;
-}
-
 static void cpu_clock_perf_counter_update(struct perf_counter *counter)
 {
 	int cpu = raw_smp_processor_id();
@@ -1533,8 +1559,26 @@ static void cpu_clock_perf_counter_update(struct perf_counter *counter)
 	atomic64_add(now - prev, &counter->count);
 }
 
+static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	if (hwc->irq_period) {
+		hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hwc->hrtimer.function = perf_swcounter_hrtimer;
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(hwc->irq_period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
+	hrtimer_cancel(&counter->hw.hrtimer);
 	cpu_clock_perf_counter_update(counter);
 }
 
@@ -1580,27 +1624,33 @@ static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now
 	atomic64_add(delta, &counter->count);
 }
 
-static void task_clock_perf_counter_read(struct perf_counter *counter)
-{
-	u64 now = task_clock_perf_counter_val(counter, 1);
-
-	task_clock_perf_counter_update(counter, now);
-}
-
 static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count,
-			     task_clock_perf_counter_val(counter, 0));
+	struct hw_perf_counter *hwc = &counter->hw;
+
+	atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0));
+	if (hwc->irq_period) {
+		hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hwc->hrtimer.function = perf_swcounter_hrtimer;
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(hwc->irq_period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
 
 	return 0;
 }
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	u64 now = task_clock_perf_counter_val(counter, 0);
+	hrtimer_cancel(&counter->hw.hrtimer);
+	task_clock_perf_counter_update(counter,
+			task_clock_perf_counter_val(counter, 0));
+}
 
-	task_clock_perf_counter_update(counter, now);
+static void task_clock_perf_counter_read(struct perf_counter *counter)
+{
+	task_clock_perf_counter_update(counter,
+			task_clock_perf_counter_val(counter, 1));
 }
 
 static const struct hw_perf_counter_ops perf_ops_task_clock = {
@@ -1729,16 +1779,12 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 */
 	switch (counter->hw_event.type) {
 	case PERF_COUNT_CPU_CLOCK:
-		if (!(counter->hw_event.exclude_user ||
-		      counter->hw_event.exclude_kernel ||
-		      counter->hw_event.exclude_hv))
-			hw_ops = &perf_ops_cpu_clock;
+		hw_ops = &perf_ops_cpu_clock;
+
+		if (hw_event->irq_period && hw_event->irq_period < 10000)
+			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
-		if (counter->hw_event.exclude_user ||
-		    counter->hw_event.exclude_kernel ||
-		    counter->hw_event.exclude_hv)
-			break;
 		/*
 		 * If the user instantiates this as a per-cpu counter,
 		 * use the cpu_clock counter instead.
@@ -1747,6 +1793,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_task_clock;
 		else
 			hw_ops = &perf_ops_cpu_clock;
+
+		if (hw_event->irq_period && hw_event->irq_period < 10000)
+			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
-- 
cgit v1.2.3-70-g09d2


From 592903cdcbf606a838056bae6d03fc557806c914 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 13 Mar 2009 12:21:36 +0100
Subject: perf_counter: add an event_list

I noticed that the counter_list only includes top-level counters, thus
perf_swcounter_event() will miss sw-counters in groups.

Since perf_swcounter_event() also wants an RCU safe list, create a new
event_list that includes all counters and uses RCU list ops and use call_rcu
to free the counter structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 ++++
 kernel/perf_counter.c        | 30 +++++++++++++++++++-----------
 2 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index dfb4c7ce18b..08c11a6afeb 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -187,6 +187,7 @@ struct file;
 struct perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
 	struct list_head		list_entry;
+	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
@@ -220,6 +221,8 @@ struct perf_counter {
 	struct perf_data		*irqdata;
 	struct perf_data		*usrdata;
 	struct perf_data		data[2];
+
+	struct rcu_head			rcu_head;
 #endif
 };
 
@@ -243,6 +246,7 @@ struct perf_counter_context {
 	struct mutex		mutex;
 
 	struct list_head	counter_list;
+	struct list_head	event_list;
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f9330d5827c..8d6ecfa64c0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -22,6 +22,7 @@
 #include <linux/perf_counter.h>
 #include <linux/mm.h>
 #include <linux/vmstat.h>
+#include <linux/rculist.h>
 
 /*
  * Each CPU has a list of per CPU counters:
@@ -72,6 +73,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 	else
 		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+
+	list_add_rcu(&counter->event_entry, &ctx->event_list);
 }
 
 static void
@@ -80,6 +83,7 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	struct perf_counter *sibling, *tmp;
 
 	list_del_init(&counter->list_entry);
+	list_del_rcu(&counter->event_entry);
 
 	/*
 	 * If this was a group counter with sibling counters then
@@ -1133,6 +1137,14 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	return ctx;
 }
 
+static void free_counter_rcu(struct rcu_head *head)
+{
+	struct perf_counter *counter;
+
+	counter = container_of(head, struct perf_counter, rcu_head);
+	kfree(counter);
+}
+
 /*
  * Called when the last reference to the file is gone.
  */
@@ -1151,7 +1163,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
-	kfree(counter);
+	call_rcu(&counter->rcu_head, free_counter_rcu);
 	put_context(ctx);
 
 	return 0;
@@ -1491,22 +1503,16 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     int nmi, struct pt_regs *regs)
 {
 	struct perf_counter *counter;
-	unsigned long flags;
 
-	if (list_empty(&ctx->counter_list))
+	if (list_empty(&ctx->event_list))
 		return;
 
-	spin_lock_irqsave(&ctx->lock, flags);
-
-	/*
-	 * XXX: make counter_list RCU safe
-	 */
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
 		if (perf_swcounter_match(counter, event, regs))
 			perf_swcounter_add(counter, nr, nmi, regs);
 	}
-
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	rcu_read_unlock();
 }
 
 void perf_swcounter_event(enum hw_event_types event, u64 nr,
@@ -1846,6 +1852,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	mutex_init(&counter->mutex);
 	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
@@ -1992,6 +1999,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->event_list);
 	ctx->task = task;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 01ef09d9ffb5ce9f8d62d1e5206da3d5ca612acc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:11 +0100
Subject: perf_counter: fix uninitialized usage of event_list

Impact: fix boot crash

When doing the generic context switch event I ran into some early
boot hangs, which were caused by inf func recursion (event, fault,
event, fault).

I eventually tracked it down to event_list not being initialized
at the time of the first event. Fix this.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.195392657@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 2 ++
 kernel/perf_counter.c     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 219748d0026..ca226a91abe 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -124,6 +124,8 @@ extern struct cred init_cred;
 # define INIT_PERF_COUNTERS(tsk)					\
 	.perf_counter_ctx.counter_list =				\
 		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
+	.perf_counter_ctx.event_list =					\
+		LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list),	\
 	.perf_counter_ctx.lock =					\
 		__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
 #else
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b39456ad74a..4c4e9eb37ab 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1506,7 +1506,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 {
 	struct perf_counter *counter;
 
-	if (list_empty(&ctx->event_list))
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
 		return;
 
 	rcu_read_lock();
-- 
cgit v1.2.3-70-g09d2


From 4a0deca657f3dbb8a707b5dc8f173beec01e7ed2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:12 +0100
Subject: perf_counter: generic context switch event

Impact: cleanup

Use the generic software events for context switches.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.283522645@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 -
 kernel/perf_counter.c | 60 ++++-----------------------------------------------
 kernel/sched.c        |  6 ------
 3 files changed, 4 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75b2fc5306d..7ed41f7c5ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -138,7 +138,6 @@ extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
-extern u64 cpu_nr_switches(int cpu);
 extern u64 cpu_nr_migrations(int cpu);
 
 extern unsigned long get_parent_ip(unsigned long addr);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4c4e9eb37ab..99d5930f0a5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -710,10 +710,13 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct pt_regs *regs;
 
 	if (likely(!cpuctx->task_ctx))
 		return;
 
+	regs = task_pt_regs(task);
+	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
 	__perf_counter_sched_out(ctx, cpuctx);
 
 	cpuctx->task_ctx = NULL;
@@ -1667,58 +1670,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
-/*
- * Software counter: context switches
- */
-
-static u64 get_context_switches(struct perf_counter *counter)
-{
-	struct task_struct *curr = counter->ctx->task;
-
-	if (curr)
-		return curr->nvcsw + curr->nivcsw;
-	return cpu_nr_switches(smp_processor_id());
-}
-
-static void context_switches_perf_counter_update(struct perf_counter *counter)
-{
-	u64 prev, now;
-	s64 delta;
-
-	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_context_switches(counter);
-
-	atomic64_set(&counter->hw.prev_count, now);
-
-	delta = now - prev;
-
-	atomic64_add(delta, &counter->count);
-}
-
-static void context_switches_perf_counter_read(struct perf_counter *counter)
-{
-	context_switches_perf_counter_update(counter);
-}
-
-static int context_switches_perf_counter_enable(struct perf_counter *counter)
-{
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count,
-			     get_context_switches(counter));
-	return 0;
-}
-
-static void context_switches_perf_counter_disable(struct perf_counter *counter)
-{
-	context_switches_perf_counter_update(counter);
-}
-
-static const struct hw_perf_counter_ops perf_ops_context_switches = {
-	.enable		= context_switches_perf_counter_enable,
-	.disable	= context_switches_perf_counter_disable,
-	.read		= context_switches_perf_counter_read,
-};
-
 /*
  * Software counter: cpu migrations
  */
@@ -1808,11 +1759,8 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
 	case PERF_COUNT_PAGE_FAULTS_MAJ:
-		hw_ops = &perf_ops_generic;
-		break;
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		if (!counter->hw_event.exclude_kernel)
-			hw_ops = &perf_ops_context_switches;
+		hw_ops = &perf_ops_generic;
 		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
 		if (!counter->hw_event.exclude_kernel)
diff --git a/kernel/sched.c b/kernel/sched.c
index 39e70860216..f76e3c0188a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2900,14 +2900,8 @@ unsigned long nr_active(void)
 
 /*
  * Externally visible per-cpu scheduler statistics:
- * cpu_nr_switches(cpu) - number of context switches on that cpu
  * cpu_nr_migrations(cpu) - number of migrations into that cpu
  */
-u64 cpu_nr_switches(int cpu)
-{
-	return cpu_rq(cpu)->nr_switches;
-}
-
 u64 cpu_nr_migrations(int cpu)
 {
 	return cpu_rq(cpu)->nr_migrations_in;
-- 
cgit v1.2.3-70-g09d2


From e077df4f439681e43f0db8255b2d215b342ebdc6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:17 +0100
Subject: perf_counter: hook up the tracepoint events

Impact: new perfcounters feature

Enable usage of tracepoints as perf counter events.

tracepoint event ids can be found in /debug/tracing/event/*/*/id
and (for now) are represented as -65536+id in the type field.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.744044174@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  3 +++
 init/Kconfig                 |  5 +++++
 kernel/perf_counter.c        | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 08c11a6afeb..065984c1ff5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -53,6 +53,8 @@ enum hw_event_types {
 	PERF_COUNT_PAGE_FAULTS_MAJ	= -7,
 
 	PERF_SW_EVENTS_MIN		= -8,
+
+	PERF_TP_EVENTS_MIN		= -65536
 };
 
 /*
@@ -222,6 +224,7 @@ struct perf_counter {
 	struct perf_data		*usrdata;
 	struct perf_data		data[2];
 
+	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
 #endif
 };
diff --git a/init/Kconfig b/init/Kconfig
index 38a2ecd47c3..4f647142f2e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -947,6 +947,11 @@ config PERF_COUNTERS
 
 	  Say Y if unsure.
 
+config EVENT_PROFILE
+	bool "Tracepoint profile sources"
+	depends on PERF_COUNTERS && EVENT_TRACER
+	default y
+
 endmenu
 
 config VM_EVENT_COUNTERS
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 97f891ffeb4..0bbe3e45ba0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1152,6 +1152,9 @@ static void free_counter_rcu(struct rcu_head *head)
 
 static void free_counter(struct perf_counter *counter)
 {
+	if (counter->destroy)
+		counter->destroy(counter);
+
 	call_rcu(&counter->rcu_head, free_counter_rcu);
 }
 
@@ -1727,6 +1730,45 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
 	.read		= cpu_migrations_perf_counter_read,
 };
 
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tpcounter_event(int event_id)
+{
+	perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1,
+			task_pt_regs(current));
+}
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_counter_destroy(struct perf_counter *counter)
+{
+	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+
+	ftrace_profile_disable(event_id);
+}
+
+static const struct hw_perf_counter_ops *
+tp_perf_counter_init(struct perf_counter *counter)
+{
+	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+	int ret;
+
+	ret = ftrace_profile_enable(event_id);
+	if (ret)
+		return NULL;
+
+	counter->destroy = tp_perf_counter_destroy;
+
+	return &perf_ops_generic;
+}
+#else
+static const struct hw_perf_counter_ops *
+tp_perf_counter_init(struct perf_counter *counter)
+{
+	return NULL;
+}
+#endif
+
 static const struct hw_perf_counter_ops *
 sw_perf_counter_init(struct perf_counter *counter)
 {
@@ -1772,6 +1814,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 			hw_ops = &perf_ops_cpu_migrations;
 		break;
 	default:
+		hw_ops = tp_perf_counter_init(counter);
 		break;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From b8e83514b64577b48bfb794fe85fcde40a9343ca Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:18 +0100
Subject: perf_counter: revamp syscall input ABI

Impact: modify ABI

The hardware/software classification in hw_event->type became a little
strained due to the addition of tracepoint tracing.

Instead split up the field and provide a type field to explicitly specify
the counter type, while using the event_id field to specify which event to
use.

Raw counters still work as before, only the raw config now goes into
raw_event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.836807573@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  4 +-
 arch/x86/kernel/cpu/perf_counter.c | 10 ++--
 include/linux/perf_counter.h       | 95 ++++++++++++++++++++++++--------------
 kernel/perf_counter.c              | 83 ++++++++++++++++++++-------------
 4 files changed, 117 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5008762e8bf..26f69dc7130 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 	if ((s64)counter->hw_event.irq_period < 0)
 		return NULL;
-	ev = counter->hw_event.type;
+	ev = counter->hw_event.event_id;
 	if (!counter->hw_event.raw) {
 		if (ev >= ppmu->n_generic ||
 		    ppmu->generic_events[ev] == 0)
@@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter)
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 		if (sub != counter)
 			sub->hw_ops->read(sub);
-		perf_store_irq_data(counter, sub->hw_event.type);
+		perf_store_irq_data(counter, sub->hw_event.event_config);
 		perf_store_irq_data(counter, atomic64_read(&sub->count));
 	}
 }
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6cba9d47b71..d844ae41d5a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (hw_event->raw) {
-		hwc->config |= pmc_ops->raw_event(hw_event->type);
+	if (hw_event->raw_type) {
+		hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
 	} else {
-		if (hw_event->type >= pmc_ops->max_events)
+		if (hw_event->event_id >= pmc_ops->max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= pmc_ops->event_map(hw_event->type);
+		hwc->config |= pmc_ops->event_map(hw_event->event_id);
 	}
 	counter->wakeup_pending = 0;
 
@@ -715,7 +715,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
 
 		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-		perf_store_irq_data(sibling, counter->hw_event.type);
+		perf_store_irq_data(sibling, counter->hw_event.event_config);
 		perf_store_irq_data(sibling, atomic64_read(&counter->count));
 	}
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 065984c1ff5..8f939490550 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -21,56 +21,81 @@
  */
 
 /*
- * Generalized performance counter event types, used by the hw_event.type
- * parameter of the sys_perf_counter_open() syscall:
+ * hw_event.type
  */
-enum hw_event_types {
+enum perf_event_types {
+	PERF_TYPE_HARDWARE		= 0,
+	PERF_TYPE_SOFTWARE		= 1,
+	PERF_TYPE_TRACEPOINT		= 2,
+
 	/*
-	 * Common hardware events, generalized by the kernel:
+	 * available TYPE space, raw is the max value.
 	 */
-	PERF_COUNT_CPU_CYCLES		=  0,
-	PERF_COUNT_INSTRUCTIONS		=  1,
-	PERF_COUNT_CACHE_REFERENCES	=  2,
-	PERF_COUNT_CACHE_MISSES		=  3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
-	PERF_COUNT_BRANCH_MISSES	=  5,
-	PERF_COUNT_BUS_CYCLES		=  6,
 
-	PERF_HW_EVENTS_MAX		=  7,
+	PERF_TYPE_RAW			= 128,
+};
 
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
+ * parameter of the sys_perf_counter_open() syscall:
+ */
+enum hw_event_ids {
 	/*
-	 * Special "software" counters provided by the kernel, even if
-	 * the hardware does not support performance counters. These
-	 * counters measure various physical and sw events of the
-	 * kernel (and allow the profiling of them as well):
+	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CPU_CLOCK		= -1,
-	PERF_COUNT_TASK_CLOCK		= -2,
-	PERF_COUNT_PAGE_FAULTS		= -3,
-	PERF_COUNT_CONTEXT_SWITCHES	= -4,
-	PERF_COUNT_CPU_MIGRATIONS	= -5,
-	PERF_COUNT_PAGE_FAULTS_MIN	= -6,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= -7,
-
-	PERF_SW_EVENTS_MIN		= -8,
+	PERF_COUNT_CPU_CYCLES		= 0,
+	PERF_COUNT_INSTRUCTIONS		= 1,
+	PERF_COUNT_CACHE_REFERENCES	= 2,
+	PERF_COUNT_CACHE_MISSES		= 3,
+	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_BRANCH_MISSES	= 5,
+	PERF_COUNT_BUS_CYCLES		= 6,
+
+	PERF_HW_EVENTS_MAX		= 7,
+};
 
-	PERF_TP_EVENTS_MIN		= -65536
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum sw_event_ids {
+	PERF_COUNT_CPU_CLOCK		= 0,
+	PERF_COUNT_TASK_CLOCK		= 1,
+	PERF_COUNT_PAGE_FAULTS		= 2,
+	PERF_COUNT_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
+
+	PERF_SW_EVENTS_MAX		= 7,
 };
 
 /*
  * IRQ-notification data record type:
  */
 enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		=  0,
-	PERF_RECORD_IRQ			=  1,
-	PERF_RECORD_GROUP		=  2,
+	PERF_RECORD_SIMPLE		= 0,
+	PERF_RECORD_IRQ			= 1,
+	PERF_RECORD_GROUP		= 2,
 };
 
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	__s64			type;
+	union {
+		struct {
+			__u64			event_id	: 56,
+						type		:  8;
+		};
+		struct {
+			__u64			raw_event_id	: 63,
+						raw_type	:  1;
+		};
+		__u64		event_config;
+	};
 
 	__u64			irq_period;
 	__u64			record_type;
@@ -78,7 +103,6 @@ struct perf_counter_hw_event {
 
 	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
-				raw	       :  1, /* raw event type        */
 				inherit	       :  1, /* children inherit it   */
 				pinned	       :  1, /* must always be on PMU */
 				exclusive      :  1, /* only group on PMU     */
@@ -87,7 +111,7 @@ struct perf_counter_hw_event {
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
 
-				__reserved_1   : 54;
+				__reserved_1   : 55;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !counter->hw_event.raw && counter->hw_event.type < 0;
+	return !counter->hw_event.raw_type &&
+		counter->hw_event.type != PERF_TYPE_HARDWARE;
 }
 
-extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *);
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
 
 #else
 static inline void
@@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
-static inline void perf_swcounter_event(enum hw_event_types event, u64 nr,
+static inline void perf_swcounter_event(u32 event, u64 nr,
 					int nmi, struct pt_regs *regs)	{ }
 #endif
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0bbe3e45ba0..68a56a68bc7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 	atomic64_set(&hwc->count, -left);
 }
 
-static void perf_swcounter_save_and_restart(struct perf_counter *counter)
-{
-	perf_swcounter_update(counter);
-	perf_swcounter_set_period(counter);
-}
-
 static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
 {
 	struct perf_data *irqdata = counter->irqdata;
@@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
 
 	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
 		counter->hw_ops->read(counter);
-		perf_swcounter_store_irq(sibling, counter->hw_event.type);
+		perf_swcounter_store_irq(sibling, counter->hw_event.event_config);
 		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
 	}
 }
@@ -1477,21 +1471,25 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 static void perf_swcounter_overflow(struct perf_counter *counter,
 				    int nmi, struct pt_regs *regs)
 {
-	perf_swcounter_save_and_restart(counter);
+	perf_swcounter_update(counter);
+	perf_swcounter_set_period(counter);
 	perf_swcounter_interrupt(counter, nmi, regs);
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-				enum hw_event_types event,
-				struct pt_regs *regs)
+				enum perf_event_types type,
+				u32 event, struct pt_regs *regs)
 {
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return 0;
 
-	if (counter->hw_event.raw)
+	if (counter->hw_event.raw_type)
+		return 0;
+
+	if (counter->hw_event.type != type)
 		return 0;
 
-	if (counter->hw_event.type != event)
+	if (counter->hw_event.event_id != event)
 		return 0;
 
 	if (counter->hw_event.exclude_user && user_mode(regs))
@@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum hw_event_types event, u64 nr,
-				     int nmi, struct pt_regs *regs)
+				     enum perf_event_types type, u32 event,
+				     u64 nr, int nmi, struct pt_regs *regs)
 {
 	struct perf_counter *counter;
 
@@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, event, regs))
+		if (perf_swcounter_match(counter, type, event, regs))
 			perf_swcounter_add(counter, nr, nmi, regs);
 	}
 	rcu_read_unlock();
 }
 
-void perf_swcounter_event(enum hw_event_types event, u64 nr,
-			  int nmi, struct pt_regs *regs)
+static void __perf_swcounter_event(enum perf_event_types type, u32 event,
+				   u64 nr, int nmi, struct pt_regs *regs)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 
-	perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs);
-	if (cpuctx->task_ctx)
-		perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs);
+	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
+	if (cpuctx->task_ctx) {
+		perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
+				nr, nmi, regs);
+	}
 
 	put_cpu_var(perf_cpu_context);
 }
 
+void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
+{
+	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
+}
+
 static void perf_swcounter_read(struct perf_counter *counter)
 {
 	perf_swcounter_update(counter);
@@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
 #ifdef CONFIG_EVENT_PROFILE
 void perf_tpcounter_event(int event_id)
 {
-	perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1,
-			task_pt_regs(current));
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
 }
 
 extern int ftrace_profile_enable(int);
@@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int);
 
 static void tp_perf_counter_destroy(struct perf_counter *counter)
 {
-	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
-
-	ftrace_profile_disable(event_id);
+	ftrace_profile_disable(counter->hw_event.event_id);
 }
 
 static const struct hw_perf_counter_ops *
 tp_perf_counter_init(struct perf_counter *counter)
 {
-	int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN;
+	int event_id = counter->hw_event.event_id;
 	int ret;
 
 	ret = ftrace_profile_enable(event_id);
@@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
+	counter->hw.irq_period = counter->hw_event.irq_period;
 
 	return &perf_ops_generic;
 }
@@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (counter->hw_event.type) {
+	switch (counter->hw_event.event_id) {
 	case PERF_COUNT_CPU_CLOCK:
 		hw_ops = &perf_ops_cpu_clock;
 
@@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter *counter)
 		if (!counter->hw_event.exclude_kernel)
 			hw_ops = &perf_ops_cpu_migrations;
 		break;
-	default:
-		hw_ops = tp_perf_counter_init(counter);
-		break;
 	}
 
 	if (hw_ops)
@@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		counter->state = PERF_COUNTER_STATE_OFF;
 
 	hw_ops = NULL;
-	if (!hw_event->raw && hw_event->type < 0)
-		hw_ops = sw_perf_counter_init(counter);
-	else
+
+	if (hw_event->raw_type)
+		hw_ops = hw_perf_counter_init(counter);
+	else switch (hw_event->type) {
+	case PERF_TYPE_HARDWARE:
 		hw_ops = hw_perf_counter_init(counter);
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		hw_ops = sw_perf_counter_init(counter);
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		hw_ops = tp_perf_counter_init(counter);
+		break;
+	}
 
 	if (!hw_ops) {
 		kfree(counter);
-- 
cgit v1.2.3-70-g09d2


From 0322cd6ec504b0bf08ca7b2c3d7f43bda37d79c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:19 +0100
Subject: perf_counter: unify irq output code

Impact: cleanup

Having 3 slightly different copies of the same code around does nobody
any good. First step in revamping the output format.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Orig-LKML-Reference: <20090319194233.929962222@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  51 +-----------------
 arch/x86/kernel/cpu/perf_counter.c |  53 +------------------
 include/linux/perf_counter.h       |   2 +
 kernel/perf_counter.c              | 106 ++++++++++++++++++++-----------------
 4 files changed, 61 insertions(+), 151 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 26f69dc7130..88b72eb4af1 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -662,41 +662,6 @@ void perf_counter_do_pending(void)
 	}
 }
 
-/*
- * Record data for an irq counter.
- * This function was lifted from the x86 code; maybe it should
- * go in the core?
- */
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
-	struct perf_data *irqdata = counter->irqdata;
-
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-		*p = data;
-		irqdata->len += sizeof(u64);
-	}
-}
-
-/*
- * Record all the values of the counters in a group
- */
-static void perf_handle_group(struct perf_counter *counter)
-{
-	struct perf_counter *leader, *sub;
-
-	leader = counter->group_leader;
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-		if (sub != counter)
-			sub->hw_ops->read(sub);
-		perf_store_irq_data(counter, sub->hw_event.event_config);
-		perf_store_irq_data(counter, atomic64_read(&sub->count));
-	}
-}
-
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -736,20 +701,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	/*
 	 * Finally record data if requested.
 	 */
-	if (record) {
-		switch (counter->hw_event.record_type) {
-		case PERF_RECORD_SIMPLE:
-			break;
-		case PERF_RECORD_IRQ:
-			perf_store_irq_data(counter, instruction_pointer(regs));
-			counter->wakeup_pending = 1;
-			break;
-		case PERF_RECORD_GROUP:
-			perf_handle_group(counter);
-			counter->wakeup_pending = 1;
-			break;
-		}
-	}
+	if (record)
+		perf_counter_output(counter, 1, regs);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d844ae41d5a..902282d68b0 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -674,20 +674,6 @@ static void pmc_generic_disable(struct perf_counter *counter)
 	x86_perf_counter_update(counter, hwc, idx);
 }
 
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
-	struct perf_data *irqdata = counter->irqdata;
-
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-		*p = data;
-		irqdata->len += sizeof(u64);
-	}
-}
-
 /*
  * Save and restart an expired counter. Called by NMI contexts,
  * so it has to be careful about preempting normal counter ops:
@@ -704,22 +690,6 @@ static void perf_save_and_restart(struct perf_counter *counter)
 		__pmc_generic_enable(counter, hwc, idx);
 }
 
-static void
-perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
-{
-	struct perf_counter *counter, *group_leader = sibling->group_leader;
-
-	/*
-	 * Store sibling timestamps (if any):
-	 */
-	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-
-		x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-		perf_store_irq_data(sibling, counter->hw_event.event_config);
-		perf_store_irq_data(sibling, atomic64_read(&counter->count));
-	}
-}
-
 /*
  * Maximum interrupt frequency of 100KHz per CPU
  */
@@ -754,28 +724,7 @@ again:
 			continue;
 
 		perf_save_and_restart(counter);
-
-		switch (counter->hw_event.record_type) {
-		case PERF_RECORD_SIMPLE:
-			continue;
-		case PERF_RECORD_IRQ:
-			perf_store_irq_data(counter, instruction_pointer(regs));
-			break;
-		case PERF_RECORD_GROUP:
-			perf_handle_group(counter, &status, &ack);
-			break;
-		}
-		/*
-		 * From NMI context we cannot call into the scheduler to
-		 * do a task wakeup - but we mark these generic as
-		 * wakeup_pending and initate a wakeup callback:
-		 */
-		if (nmi) {
-			counter->wakeup_pending = 1;
-			set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
-		} else {
-			wake_up(&counter->waitq);
-		}
+		perf_counter_output(counter, nmi, regs);
 	}
 
 	hw_perf_ack_status(ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8f939490550..a4b76c0175f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -317,6 +317,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu);
 
+extern void perf_counter_output(struct perf_counter *counter,
+				int nmi, struct pt_regs *regs);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 68a56a68bc7..f054b8c9bf9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1353,6 +1353,60 @@ static const struct file_operations perf_fops = {
 	.compat_ioctl		= perf_ioctl,
 };
 
+/*
+ * Output
+ */
+
+static void perf_counter_store_irq(struct perf_counter *counter, u64 data)
+{
+	struct perf_data *irqdata = counter->irqdata;
+
+	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
+		irqdata->overrun++;
+	} else {
+		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+
+		*p = data;
+		irqdata->len += sizeof(u64);
+	}
+}
+
+static void perf_counter_handle_group(struct perf_counter *counter)
+{
+	struct perf_counter *leader, *sub;
+
+	leader = counter->group_leader;
+	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		if (sub != counter)
+			sub->hw_ops->read(sub);
+		perf_counter_store_irq(counter, sub->hw_event.event_config);
+		perf_counter_store_irq(counter, atomic64_read(&sub->count));
+	}
+}
+
+void perf_counter_output(struct perf_counter *counter,
+			 int nmi, struct pt_regs *regs)
+{
+	switch (counter->hw_event.record_type) {
+	case PERF_RECORD_SIMPLE:
+		return;
+
+	case PERF_RECORD_IRQ:
+		perf_counter_store_irq(counter, instruction_pointer(regs));
+		break;
+
+	case PERF_RECORD_GROUP:
+		perf_counter_handle_group(counter);
+		break;
+	}
+
+	if (nmi) {
+		counter->wakeup_pending = 1;
+		set_perf_counter_pending();
+	} else
+		wake_up(&counter->waitq);
+}
+
 /*
  * Generic software counter infrastructure
  */
@@ -1395,54 +1449,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 	atomic64_set(&hwc->count, -left);
 }
 
-static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
-{
-	struct perf_data *irqdata = counter->irqdata;
-
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-		*p = data;
-		irqdata->len += sizeof(u64);
-	}
-}
-
-static void perf_swcounter_handle_group(struct perf_counter *sibling)
-{
-	struct perf_counter *counter, *group_leader = sibling->group_leader;
-
-	list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
-		counter->hw_ops->read(counter);
-		perf_swcounter_store_irq(sibling, counter->hw_event.event_config);
-		perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
-	}
-}
-
-static void perf_swcounter_interrupt(struct perf_counter *counter,
-				     int nmi, struct pt_regs *regs)
-{
-	switch (counter->hw_event.record_type) {
-	case PERF_RECORD_SIMPLE:
-		break;
-
-	case PERF_RECORD_IRQ:
-		perf_swcounter_store_irq(counter, instruction_pointer(regs));
-		break;
-
-	case PERF_RECORD_GROUP:
-		perf_swcounter_handle_group(counter);
-		break;
-	}
-
-	if (nmi) {
-		counter->wakeup_pending = 1;
-		set_perf_counter_pending();
-	} else
-		wake_up(&counter->waitq);
-}
-
 static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
 	struct perf_counter *counter;
@@ -1461,7 +1467,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 		regs = task_pt_regs(current);
 
 	if (regs)
-		perf_swcounter_interrupt(counter, 0, regs);
+		perf_counter_output(counter, 0, regs);
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
 
@@ -1473,7 +1479,7 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 {
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	perf_swcounter_interrupt(counter, nmi, regs);
+	perf_counter_output(counter, nmi, regs);
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-- 
cgit v1.2.3-70-g09d2


From 9aaa131a279834dff75c290c91f0058f62d72d46 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 21 Mar 2009 15:31:47 +1100
Subject: perf_counter: fix type/event_id layout on big-endian systems

Impact: build fix for powerpc

Commit db3a944aca35ae61 ("perf_counter: revamp syscall input ABI")
expanded the hw_event.type field into a union of structs containing
bitfields.  In particular it introduced a type field and a raw_type
field, with the intention that the 1-bit raw_type field should
overlay the most-significant bit of the 8-bit type field, and in fact
perf_counter_alloc() now assumes that (or at least, assumes that
raw_type doesn't overlay any of the bits that are 1 in the values of
PERF_TYPE_{HARDWARE,SOFTWARE,TRACEPOINT}).

Unfortunately this is not true on big-endian systems such as PowerPC,
where bitfields are laid out from left to right, i.e. from most
significant bit to least significant.  This means that setting
hw_event.type = PERF_TYPE_SOFTWARE will set hw_event.raw_type to 1.

This fixes it by making the layout depend on whether or not
__BIG_ENDIAN_BITFIELD is defined.  It's a bit ugly, but that's what
we get for using bitfields in a user/kernel ABI.

Also, that commit didn't fix up some places in arch/powerpc/kernel/
perf_counter.c where hw_event.raw and hw_event.event_id were used.
This fixes them too.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/perf_counter.c |  9 +++++----
 include/linux/perf_counter.h       | 12 ++++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 830ca9c4494..6413d9c0313 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,12 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 	if ((s64)counter->hw_event.irq_period < 0)
 		return NULL;
-	ev = counter->hw_event.event_id;
-	if (!counter->hw_event.raw) {
-		if (ev >= ppmu->n_generic ||
-		    ppmu->generic_events[ev] == 0)
+	if (!counter->hw_event.raw_type) {
+		ev = counter->hw_event.event_id;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return NULL;
 		ev = ppmu->generic_events[ev];
+	} else {
+		ev = counter->hw_event.raw_event_id;
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a4b76c0175f..98f5990be1e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/ioctl.h>
+#include <asm/byteorder.h>
 
 /*
  * User-space ABI bits:
@@ -86,6 +87,7 @@ enum perf_counter_record_type {
  */
 struct perf_counter_hw_event {
 	union {
+#ifndef __BIG_ENDIAN_BITFIELD
 		struct {
 			__u64			event_id	: 56,
 						type		:  8;
@@ -94,6 +96,16 @@ struct perf_counter_hw_event {
 			__u64			raw_event_id	: 63,
 						raw_type	:  1;
 		};
+#else
+		struct {
+			__u64			type		:  8,
+						event_id	: 56;
+		};
+		struct {
+			__u64			raw_type	:  1,
+						raw_event_id	: 63;
+		};
+#endif /* __BIT_ENDIAN_BITFIELD */
 		__u64		event_config;
 	};
 
-- 
cgit v1.2.3-70-g09d2


From f4a2deb4860497f4332cf6a1acddab3dd628ddf0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:06 +0100
Subject: perf_counter: remove the event config bitfields

Since the bitfields turned into a bit of a mess, remove them and rely on
good old masks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.059499915@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  6 ++--
 arch/x86/kernel/cpu/perf_counter.c |  8 ++---
 include/linux/perf_counter.h       | 74 +++++++++++++++++++++++++-------------
 kernel/perf_counter.c              | 22 +++++++-----
 4 files changed, 70 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 6413d9c0313..d05651584d4 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,13 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 	if ((s64)counter->hw_event.irq_period < 0)
 		return NULL;
-	if (!counter->hw_event.raw_type) {
-		ev = counter->hw_event.event_id;
+	if (!perf_event_raw(&counter->hw_event)) {
+		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return NULL;
 		ev = ppmu->generic_events[ev];
 	} else {
-		ev = counter->hw_event.raw_event_id;
+		ev = perf_event_config(&counter->hw_event);
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 902282d68b0..3f95b0cdc55 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (hw_event->raw_type) {
-		hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
+	if (perf_event_raw(hw_event)) {
+		hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
 	} else {
-		if (hw_event->event_id >= pmc_ops->max_events)
+		if (perf_event_id(hw_event) >= pmc_ops->max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= pmc_ops->event_map(hw_event->event_id);
+		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
 	}
 	counter->wakeup_pending = 0;
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 98f5990be1e..56099e52970 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -82,32 +82,37 @@ enum perf_counter_record_type {
 	PERF_RECORD_GROUP		= 2,
 };
 
+#define __PERF_COUNTER_MASK(name) 			\
+	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
+	 PERF_COUNTER_##name##_SHIFT)
+
+#define PERF_COUNTER_RAW_BITS		1
+#define PERF_COUNTER_RAW_SHIFT		63
+#define PERF_COUNTER_RAW_MASK		__PERF_COUNTER_MASK(RAW)
+
+#define PERF_COUNTER_CONFIG_BITS	63
+#define PERF_COUNTER_CONFIG_SHIFT	0
+#define PERF_COUNTER_CONFIG_MASK	__PERF_COUNTER_MASK(CONFIG)
+
+#define PERF_COUNTER_TYPE_BITS		7
+#define PERF_COUNTER_TYPE_SHIFT		56
+#define PERF_COUNTER_TYPE_MASK		__PERF_COUNTER_MASK(TYPE)
+
+#define PERF_COUNTER_EVENT_BITS		56
+#define PERF_COUNTER_EVENT_SHIFT	0
+#define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
+
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_hw_event {
-	union {
-#ifndef __BIG_ENDIAN_BITFIELD
-		struct {
-			__u64			event_id	: 56,
-						type		:  8;
-		};
-		struct {
-			__u64			raw_event_id	: 63,
-						raw_type	:  1;
-		};
-#else
-		struct {
-			__u64			type		:  8,
-						event_id	: 56;
-		};
-		struct {
-			__u64			raw_type	:  1,
-						raw_event_id	: 63;
-		};
-#endif /* __BIT_ENDIAN_BITFIELD */
-		__u64		event_config;
-	};
+	/*
+	 * The MSB of the config word signifies if the rest contains cpu
+	 * specific (raw) counter configuration data, if unset, the next
+	 * 7 bits are an event type and the rest of the bits are the event
+	 * identifier.
+	 */
+	__u64			config;
 
 	__u64			irq_period;
 	__u64			record_type;
@@ -157,6 +162,27 @@ struct perf_counter_hw_event {
 
 struct task_struct;
 
+static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event)
+{
+	return hw_event->config & PERF_COUNTER_RAW_MASK;
+}
+
+static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event)
+{
+	return hw_event->config & PERF_COUNTER_CONFIG_MASK;
+}
+
+static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event)
+{
+	return (hw_event->config & PERF_COUNTER_TYPE_MASK) >>
+		PERF_COUNTER_TYPE_SHIFT;
+}
+
+static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event)
+{
+	return hw_event->config & PERF_COUNTER_EVENT_MASK;
+}
+
 /**
  * struct hw_perf_counter - performance counter hardware details:
  */
@@ -336,8 +362,8 @@ extern void perf_counter_output(struct perf_counter *counter,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !counter->hw_event.raw_type &&
-		counter->hw_event.type != PERF_TYPE_HARDWARE;
+	return !perf_event_raw(&counter->hw_event) &&
+		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f054b8c9bf9..ca14fc41ccd 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1379,7 +1379,7 @@ static void perf_counter_handle_group(struct perf_counter *counter)
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 		if (sub != counter)
 			sub->hw_ops->read(sub);
-		perf_counter_store_irq(counter, sub->hw_event.event_config);
+		perf_counter_store_irq(counter, sub->hw_event.config);
 		perf_counter_store_irq(counter, atomic64_read(&sub->count));
 	}
 }
@@ -1489,13 +1489,13 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 		return 0;
 
-	if (counter->hw_event.raw_type)
+	if (perf_event_raw(&counter->hw_event))
 		return 0;
 
-	if (counter->hw_event.type != type)
+	if (perf_event_type(&counter->hw_event) != type)
 		return 0;
 
-	if (counter->hw_event.event_id != event)
+	if (perf_event_id(&counter->hw_event) != event)
 		return 0;
 
 	if (counter->hw_event.exclude_user && user_mode(regs))
@@ -1757,13 +1757,13 @@ extern void ftrace_profile_disable(int);
 
 static void tp_perf_counter_destroy(struct perf_counter *counter)
 {
-	ftrace_profile_disable(counter->hw_event.event_id);
+	ftrace_profile_disable(perf_event_id(&counter->hw_event));
 }
 
 static const struct hw_perf_counter_ops *
 tp_perf_counter_init(struct perf_counter *counter)
 {
-	int event_id = counter->hw_event.event_id;
+	int event_id = perf_event_id(&counter->hw_event);
 	int ret;
 
 	ret = ftrace_profile_enable(event_id);
@@ -1797,7 +1797,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (counter->hw_event.event_id) {
+	switch (perf_event_id(&counter->hw_event)) {
 	case PERF_COUNT_CPU_CLOCK:
 		hw_ops = &perf_ops_cpu_clock;
 
@@ -1882,9 +1882,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	hw_ops = NULL;
 
-	if (hw_event->raw_type)
+	if (perf_event_raw(hw_event)) {
 		hw_ops = hw_perf_counter_init(counter);
-	else switch (hw_event->type) {
+		goto done;
+	}
+
+	switch (perf_event_type(hw_event)) {
 	case PERF_TYPE_HARDWARE:
 		hw_ops = hw_perf_counter_init(counter);
 		break;
@@ -1902,6 +1905,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		kfree(counter);
 		return NULL;
 	}
+done:
 	counter->hw_ops = hw_ops;
 
 	return counter;
-- 
cgit v1.2.3-70-g09d2


From 96f6d4444302bb2ea2cf409529eef816462f6ce0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:07 +0100
Subject: perf_counter: avoid recursion

Tracepoint events like lock_acquire and software counters like
pagefaults can recurse into the perf counter code again, avoid that.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.152096433@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  7 +++++++
 kernel/perf_counter.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 56099e52970..18dc17d0a61 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -328,6 +328,13 @@ struct perf_cpu_context {
 	int				active_oncpu;
 	int				max_pertask;
 	int				exclusive;
+
+	/*
+	 * Recursion avoidance:
+	 *
+	 * task, softirq, irq, nmi context
+	 */
+	int			recursion[4];
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ca14fc41ccd..ce34bff07bd 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/vmstat.h>
 #include <linux/rculist.h>
+#include <linux/hardirq.h>
 
 #include <asm/irq_regs.h>
 
@@ -1532,10 +1533,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 	rcu_read_unlock();
 }
 
+static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
+{
+	if (in_nmi())
+		return &cpuctx->recursion[3];
+
+	if (in_irq())
+		return &cpuctx->recursion[2];
+
+	if (in_softirq())
+		return &cpuctx->recursion[1];
+
+	return &cpuctx->recursion[0];
+}
+
 static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 				   u64 nr, int nmi, struct pt_regs *regs)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int *recursion = perf_swcounter_recursion_context(cpuctx);
+
+	if (*recursion)
+		goto out;
+
+	(*recursion)++;
+	barrier();
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
 	if (cpuctx->task_ctx) {
@@ -1543,6 +1565,10 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 				nr, nmi, regs);
 	}
 
+	barrier();
+	(*recursion)--;
+
+out:
 	put_cpu_var(perf_cpu_context);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 37d81828385f8ff823caaaf1a83e72d065b6cfa1 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 23 Mar 2009 18:22:08 +0100
Subject: perf_counter: add an mmap method to allow userspace to read hardware
 counters

Impact: new feature giving performance improvement

This adds the ability for userspace to do an mmap on a hardware counter
fd and get access to a read-only page that contains the information
needed to translate a hardware counter value to the full 64-bit
counter value that would be returned by a read on the fd.  This is
useful on architectures that allow user programs to read the hardware
counters, such as PowerPC.

The mmap will only succeed if the counter is a hardware counter
monitoring the current process.

On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter
and translate it to the full 64-bit value in about 30ns using the
mmapped page, compared to about 830ns for the read syscall on the
counter, so this does give a significant performance improvement.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Orig-LKML-Reference: <20090323172417.297057964@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  6 +++
 include/linux/perf_counter.h       | 15 ++++++++
 kernel/perf_counter.c              | 76 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d05651584d4..e4349281b07 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable)
 		atomic64_set(&counter->hw.prev_count, val);
 		counter->hw.idx = hwc_index[i] + 1;
 		write_pmc(counter->hw.idx, val);
+		if (counter->user_page)
+			perf_counter_update_userpage(counter);
 	}
 	mb();
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
@@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter)
 			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
 			write_pmc(counter->hw.idx, 0);
 			counter->hw.idx = 0;
+			if (counter->user_page)
+				perf_counter_update_userpage(counter);
 			break;
 		}
 	}
@@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	write_pmc(counter->hw.idx, val);
 	atomic64_set(&counter->hw.prev_count, val);
 	atomic64_set(&counter->hw.period_left, left);
+	if (counter->user_page)
+		perf_counter_update_userpage(counter);
 
 	/*
 	 * Finally record data if requested.
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 18dc17d0a61..40b324e91bf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -143,6 +143,17 @@ struct perf_counter_hw_event {
 #define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
 #define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
 
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware counter identifier */
+	__s64	offset;			/* add to hardware counter value */
+};
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -278,6 +289,9 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
+	/* pointer to page shared with userspace via mmap */
+	unsigned long			user_page;
+
 	/* read() / irq related data */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
@@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void);
 extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu);
+extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 extern void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ce34bff07bd..d9cfd902140 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
+	free_page(counter->user_page);
 	free_counter(counter);
 	put_context(ctx);
 
@@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+	struct perf_counter_mmap_page *userpg;
+
+	if (!counter->user_page)
+		return;
+	userpg = (struct perf_counter_mmap_page *) counter->user_page;
+
+	++userpg->lock;
+	smp_wmb();
+	userpg->index = counter->hw.idx;
+	userpg->offset = atomic64_read(&counter->count);
+	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
+		userpg->offset -= atomic64_read(&counter->hw.prev_count);
+	smp_wmb();
+	++userpg->lock;
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	if (!counter->user_page)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = virt_to_page(counter->user_page);
+	get_page(vmf->page);
+	return 0;
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+	.fault = perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = file->private_data;
+	unsigned long userpg;
+
+	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+		return -EINVAL;
+
+	/*
+	 * For now, restrict to the case of a hardware counter
+	 * on the current task.
+	 */
+	if (is_software_counter(counter) || counter->task != current)
+		return -EINVAL;
+
+	userpg = counter->user_page;
+	if (!userpg) {
+		userpg = get_zeroed_page(GFP_KERNEL);
+		mutex_lock(&counter->mutex);
+		if (counter->user_page) {
+			free_page(userpg);
+			userpg = counter->user_page;
+		} else {
+			counter->user_page = userpg;
+		}
+		mutex_unlock(&counter->mutex);
+		if (!userpg)
+			return -ENOMEM;
+	}
+
+	perf_counter_update_userpage(counter);
+
+	vma->vm_flags &= ~VM_MAYWRITE;
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_ops = &perf_mmap_vmops;
+	return 0;
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
 	.poll			= perf_poll,
 	.unlocked_ioctl		= perf_ioctl,
 	.compat_ioctl		= perf_ioctl,
+	.mmap			= perf_mmap,
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 9ab772cd535c4b256a577eae516f9c7462346b2d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 23 Mar 2009 18:22:09 +0100
Subject: mutex: add atomic_dec_and_mutex_lock()

Much like the atomic_dec_and_lock() function in which we take an hold a
spin_lock if we drop the atomic to 0 this function takes and holds the
mutex if we dec the atomic to 0.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.410913479@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mutex.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3069ec7e0ab..93054fc3635 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 7b732a75047738e4f85438ed2f9cd34bf5f2a19a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Mar 2009 18:22:10 +0100
Subject: perf_counter: new output ABI - part 1

Impact: Rework the perfcounter output ABI

use sys_read() only for instant data and provide mmap() output for all
async overflow data.

The first mmap() determines the size of the output buffer. The mmap()
size must be a PAGE_SIZE multiple of 1+pages, where pages must be a
power of 2 or 0. Further mmap()s of the same fd must have the same
size. Once all maps are gone, you can again mmap() with a new size.

In case of 0 extra pages there is no data output and the first page
only contains meta data.

When there are data pages, a poll() event will be generated for each
full page of data. Furthermore, the output is circular. This means
that although 1 page is a valid configuration, its useless, since
we'll start overwriting it the instant we report a full page.

Future work will focus on the output format (currently maintained)
where we'll likey want each entry denoted by a header which includes a
type and length.

Further future work will allow to splice() the fd, also containing the
async overflow data -- splice() would be mutually exclusive with
mmap() of the data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.470536358@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |   9 +-
 include/linux/perf_counter.h       |  36 ++-
 kernel/perf_counter.c              | 464 ++++++++++++++++++++-----------------
 3 files changed, 263 insertions(+), 246 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index e4349281b07..d48596ab655 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -417,8 +417,7 @@ void hw_perf_restore(u64 disable)
 		atomic64_set(&counter->hw.prev_count, val);
 		counter->hw.idx = hwc_index[i] + 1;
 		write_pmc(counter->hw.idx, val);
-		if (counter->user_page)
-			perf_counter_update_userpage(counter);
+		perf_counter_update_userpage(counter);
 	}
 	mb();
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
@@ -574,8 +573,7 @@ static void power_perf_disable(struct perf_counter *counter)
 			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
 			write_pmc(counter->hw.idx, 0);
 			counter->hw.idx = 0;
-			if (counter->user_page)
-				perf_counter_update_userpage(counter);
+			perf_counter_update_userpage(counter);
 			break;
 		}
 	}
@@ -702,8 +700,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	write_pmc(counter->hw.idx, val);
 	atomic64_set(&counter->hw.prev_count, val);
 	atomic64_set(&counter->hw.period_left, left);
-	if (counter->user_page)
-		perf_counter_update_userpage(counter);
+	perf_counter_update_userpage(counter);
 
 	/*
 	 * Finally record data if requested.
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 40b324e91bf..2b5e66d5ebd 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -152,6 +152,8 @@ struct perf_counter_mmap_page {
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
+
+	__u32   data_head;		/* head in the data section */
 };
 
 #ifdef __KERNEL__
@@ -218,21 +220,6 @@ struct hw_perf_counter {
 #endif
 };
 
-/*
- * Hardcoded buffer length limit for now, for IRQ-fed events:
- */
-#define PERF_DATA_BUFLEN		2048
-
-/**
- * struct perf_data - performance counter IRQ data sampling ...
- */
-struct perf_data {
-	int				len;
-	int				rd_idx;
-	int				overrun;
-	u8				data[PERF_DATA_BUFLEN];
-};
-
 struct perf_counter;
 
 /**
@@ -256,6 +243,14 @@ enum perf_counter_active_state {
 
 struct file;
 
+struct perf_mmap_data {
+	struct rcu_head			rcu_head;
+	int				nr_pages;
+	atomic_t			head;
+	struct perf_counter_mmap_page   *user_page;
+	void 				*data_pages[0];
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -289,16 +284,15 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
-	/* pointer to page shared with userspace via mmap */
-	unsigned long			user_page;
+	/* mmap bits */
+	struct mutex			mmap_mutex;
+	atomic_t			mmap_count;
+	struct perf_mmap_data		*data;
 
-	/* read() / irq related data */
+	/* poll related */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
 	int				wakeup_pending;
-	struct perf_data		*irqdata;
-	struct perf_data		*usrdata;
-	struct perf_data		data[2];
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d9cfd902140..0dfe91094fd 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -4,7 +4,8 @@
  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ *
+ *  For licensing details see kernel-base/COPYING
  */
 
 #include <linux/fs.h>
@@ -1022,66 +1023,6 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	return atomic64_read(&counter->count);
 }
 
-/*
- * Cross CPU call to switch performance data pointers
- */
-static void __perf_switch_irq_data(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_data *oldirqdata = counter->irqdata;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 */
-	if (ctx->task) {
-		if (cpuctx->task_ctx != ctx)
-			return;
-		spin_lock(&ctx->lock);
-	}
-
-	/* Change the pointer NMI safe */
-	atomic_long_set((atomic_long_t *)&counter->irqdata,
-			(unsigned long) counter->usrdata);
-	counter->usrdata = oldirqdata;
-
-	if (ctx->task)
-		spin_unlock(&ctx->lock);
-}
-
-static struct perf_data *perf_switch_irq_data(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_data *oldirqdata = counter->irqdata;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		smp_call_function_single(counter->cpu,
-					 __perf_switch_irq_data,
-					 counter, 1);
-		return counter->usrdata;
-	}
-
-retry:
-	spin_lock_irq(&ctx->lock);
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
-		counter->irqdata = counter->usrdata;
-		counter->usrdata = oldirqdata;
-		spin_unlock_irq(&ctx->lock);
-		return oldirqdata;
-	}
-	spin_unlock_irq(&ctx->lock);
-	task_oncpu_function_call(task, __perf_switch_irq_data, counter);
-	/* Might have failed, because task was scheduled out */
-	if (counter->irqdata == oldirqdata)
-		goto retry;
-
-	return counter->usrdata;
-}
-
 static void put_context(struct perf_counter_context *ctx)
 {
 	if (ctx->task)
@@ -1177,7 +1118,6 @@ static int perf_release(struct inode *inode, struct file *file)
 	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
-	free_page(counter->user_page);
 	free_counter(counter);
 	put_context(ctx);
 
@@ -1192,7 +1132,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
 	u64 cntval;
 
-	if (count != sizeof(cntval))
+	if (count < sizeof(cntval))
 		return -EINVAL;
 
 	/*
@@ -1210,122 +1150,21 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
 }
 
-static ssize_t
-perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count)
-{
-	if (!usrdata->len)
-		return 0;
-
-	count = min(count, (size_t)usrdata->len);
-	if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
-		return -EFAULT;
-
-	/* Adjust the counters */
-	usrdata->len -= count;
-	if (!usrdata->len)
-		usrdata->rd_idx = 0;
-	else
-		usrdata->rd_idx += count;
-
-	return count;
-}
-
-static ssize_t
-perf_read_irq_data(struct perf_counter	*counter,
-		   char __user		*buf,
-		   size_t		count,
-		   int			nonblocking)
-{
-	struct perf_data *irqdata, *usrdata;
-	DECLARE_WAITQUEUE(wait, current);
-	ssize_t res, res2;
-
-	irqdata = counter->irqdata;
-	usrdata = counter->usrdata;
-
-	if (usrdata->len + irqdata->len >= count)
-		goto read_pending;
-
-	if (nonblocking)
-		return -EAGAIN;
-
-	spin_lock_irq(&counter->waitq.lock);
-	__add_wait_queue(&counter->waitq, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (usrdata->len + irqdata->len >= count)
-			break;
-
-		if (signal_pending(current))
-			break;
-
-		if (counter->state == PERF_COUNTER_STATE_ERROR)
-			break;
-
-		spin_unlock_irq(&counter->waitq.lock);
-		schedule();
-		spin_lock_irq(&counter->waitq.lock);
-	}
-	__remove_wait_queue(&counter->waitq, &wait);
-	__set_current_state(TASK_RUNNING);
-	spin_unlock_irq(&counter->waitq.lock);
-
-	if (usrdata->len + irqdata->len < count &&
-	    counter->state != PERF_COUNTER_STATE_ERROR)
-		return -ERESTARTSYS;
-read_pending:
-	mutex_lock(&counter->mutex);
-
-	/* Drain pending data first: */
-	res = perf_copy_usrdata(usrdata, buf, count);
-	if (res < 0 || res == count)
-		goto out;
-
-	/* Switch irq buffer: */
-	usrdata = perf_switch_irq_data(counter);
-	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
-	if (res2 < 0) {
-		if (!res)
-			res = -EFAULT;
-	} else {
-		res += res2;
-	}
-out:
-	mutex_unlock(&counter->mutex);
-
-	return res;
-}
-
 static ssize_t
 perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct perf_counter *counter = file->private_data;
 
-	switch (counter->hw_event.record_type) {
-	case PERF_RECORD_SIMPLE:
-		return perf_read_hw(counter, buf, count);
-
-	case PERF_RECORD_IRQ:
-	case PERF_RECORD_GROUP:
-		return perf_read_irq_data(counter, buf, count,
-					  file->f_flags & O_NONBLOCK);
-	}
-	return -EINVAL;
+	return perf_read_hw(counter, buf, count);
 }
 
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_counter *counter = file->private_data;
-	unsigned int events = 0;
-	unsigned long flags;
+	unsigned int events = POLLIN;
 
 	poll_wait(file, &counter->waitq, wait);
 
-	spin_lock_irqsave(&counter->waitq.lock, flags);
-	if (counter->usrdata->len || counter->irqdata->len)
-		events |= POLLIN;
-	spin_unlock_irqrestore(&counter->waitq.lock, flags);
-
 	return events;
 }
 
@@ -1347,78 +1186,207 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-void perf_counter_update_userpage(struct perf_counter *counter)
+static void __perf_counter_update_userpage(struct perf_counter *counter,
+					   struct perf_mmap_data *data)
 {
-	struct perf_counter_mmap_page *userpg;
-
-	if (!counter->user_page)
-		return;
-	userpg = (struct perf_counter_mmap_page *) counter->user_page;
+	struct perf_counter_mmap_page *userpg = data->user_page;
 
+	/*
+	 * Disable preemption so as to not let the corresponding user-space
+	 * spin too long if we get preempted.
+	 */
+	preempt_disable();
 	++userpg->lock;
 	smp_wmb();
 	userpg->index = counter->hw.idx;
 	userpg->offset = atomic64_read(&counter->count);
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
+
+	userpg->data_head = atomic_read(&data->head);
 	smp_wmb();
 	++userpg->lock;
+	preempt_enable();
+}
+
+void perf_counter_update_userpage(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data)
+		__perf_counter_update_userpage(counter, data);
+	rcu_read_unlock();
 }
 
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct perf_counter *counter = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
 
-	if (!counter->user_page)
-		return VM_FAULT_SIGBUS;
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff == 0) {
+		vmf->page = virt_to_page(data->user_page);
+	} else {
+		int nr = vmf->pgoff - 1;
 
-	vmf->page = virt_to_page(counter->user_page);
+		if ((unsigned)nr > data->nr_pages)
+			goto unlock;
+
+		vmf->page = virt_to_page(data->data_pages[nr]);
+	}
 	get_page(vmf->page);
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	int i;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!data->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!data->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	data->nr_pages = nr_pages;
+
+	rcu_assign_pointer(counter->data, data);
+
 	return 0;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)data->data_pages[i]);
+
+	free_page((unsigned long)data->user_page);
+
+fail_user_page:
+	kfree(data);
+
+fail:
+	return -ENOMEM;
+}
+
+static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data = container_of(rcu_head,
+			struct perf_mmap_data, rcu_head);
+	int i;
+
+	free_page((unsigned long)data->user_page);
+	for (i = 0; i < data->nr_pages; i++)
+		free_page((unsigned long)data->data_pages[i]);
+	kfree(data);
+}
+
+static void perf_mmap_data_free(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data = counter->data;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	rcu_assign_pointer(counter->data, NULL);
+	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+}
+
+static void perf_mmap_open(struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	atomic_inc(&counter->mmap_count);
+}
+
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_counter *counter = vma->vm_file->private_data;
+
+	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
+				      &counter->mmap_mutex)) {
+		perf_mmap_data_free(counter);
+		mutex_unlock(&counter->mmap_mutex);
+	}
 }
 
 static struct vm_operations_struct perf_mmap_vmops = {
+	.open = perf_mmap_open,
+	.close = perf_mmap_close,
 	.fault = perf_mmap_fault,
 };
 
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
-	unsigned long userpg;
+	unsigned long vma_size;
+	unsigned long nr_pages;
+	unsigned long locked, lock_limit;
+	int ret = 0;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
-	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+
+	vma_size = vma->vm_end - vma->vm_start;
+	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	if (nr_pages == 0 || !is_power_of_2(nr_pages))
 		return -EINVAL;
 
-	/*
-	 * For now, restrict to the case of a hardware counter
-	 * on the current task.
-	 */
-	if (is_software_counter(counter) || counter->task != current)
+	if (vma_size != PAGE_SIZE * (1 + nr_pages))
 		return -EINVAL;
 
-	userpg = counter->user_page;
-	if (!userpg) {
-		userpg = get_zeroed_page(GFP_KERNEL);
-		mutex_lock(&counter->mutex);
-		if (counter->user_page) {
-			free_page(userpg);
-			userpg = counter->user_page;
-		} else {
-			counter->user_page = userpg;
-		}
-		mutex_unlock(&counter->mutex);
-		if (!userpg)
-			return -ENOMEM;
-	}
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	locked = vma_size >>  PAGE_SHIFT;
+	locked += vma->vm_mm->locked_vm;
 
-	perf_counter_update_userpage(counter);
+	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit >>= PAGE_SHIFT;
+
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK))
+		return -EPERM;
+
+	mutex_lock(&counter->mmap_mutex);
+	if (atomic_inc_not_zero(&counter->mmap_count))
+		goto out;
+
+	WARN_ON(counter->data);
+	ret = perf_mmap_data_alloc(counter, nr_pages);
+	if (!ret)
+		atomic_set(&counter->mmap_count, 1);
+out:
+	mutex_unlock(&counter->mmap_mutex);
 
 	vma->vm_flags &= ~VM_MAYWRITE;
 	vma->vm_flags |= VM_RESERVED;
 	vma->vm_ops = &perf_mmap_vmops;
-	return 0;
+
+	return ret;
 }
 
 static const struct file_operations perf_fops = {
@@ -1434,30 +1402,94 @@ static const struct file_operations perf_fops = {
  * Output
  */
 
-static void perf_counter_store_irq(struct perf_counter *counter, u64 data)
+static int perf_output_write(struct perf_counter *counter, int nmi,
+			     void *buf, ssize_t size)
 {
-	struct perf_data *irqdata = counter->irqdata;
+	struct perf_mmap_data *data;
+	unsigned int offset, head, nr;
+	unsigned int len;
+	int ret, wakeup;
 
-	if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-		irqdata->overrun++;
-	} else {
-		u64 *p = (u64 *) &irqdata->data[irqdata->len];
+	rcu_read_lock();
+	ret = -ENOSPC;
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto out;
+
+	if (!data->nr_pages)
+		goto out;
+
+	ret = -EINVAL;
+	if (size > PAGE_SIZE)
+		goto out;
+
+	do {
+		offset = head = atomic_read(&data->head);
+		head += sizeof(u64);
+	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
+
+	wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
 
-		*p = data;
-		irqdata->len += sizeof(u64);
+	nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1);
+	offset &= PAGE_SIZE - 1;
+
+	len = min_t(unsigned int, PAGE_SIZE - offset, size);
+	memcpy(data->data_pages[nr] + offset, buf, len);
+	size -= len;
+
+	if (size) {
+		nr = (nr + 1) & (data->nr_pages - 1);
+		memcpy(data->data_pages[nr], buf + len, size);
+	}
+
+	/*
+	 * generate a poll() wakeup for every page boundary crossed
+	 */
+	if (wakeup) {
+		__perf_counter_update_userpage(counter, data);
+		if (nmi) {
+			counter->wakeup_pending = 1;
+			set_perf_counter_pending();
+		} else
+			wake_up(&counter->waitq);
 	}
+	ret = 0;
+out:
+	rcu_read_unlock();
+
+	return ret;
 }
 
-static void perf_counter_handle_group(struct perf_counter *counter)
+static void perf_output_simple(struct perf_counter *counter,
+			       int nmi, struct pt_regs *regs)
+{
+	u64 entry;
+
+	entry = instruction_pointer(regs);
+
+	perf_output_write(counter, nmi, &entry, sizeof(entry));
+}
+
+struct group_entry {
+	u64 event;
+	u64 counter;
+};
+
+static void perf_output_group(struct perf_counter *counter, int nmi)
 {
 	struct perf_counter *leader, *sub;
 
 	leader = counter->group_leader;
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		struct group_entry entry;
+
 		if (sub != counter)
 			sub->hw_ops->read(sub);
-		perf_counter_store_irq(counter, sub->hw_event.config);
-		perf_counter_store_irq(counter, atomic64_read(&sub->count));
+
+		entry.event = sub->hw_event.config;
+		entry.counter = atomic64_read(&sub->count);
+
+		perf_output_write(counter, nmi, &entry, sizeof(entry));
 	}
 }
 
@@ -1469,19 +1501,13 @@ void perf_counter_output(struct perf_counter *counter,
 		return;
 
 	case PERF_RECORD_IRQ:
-		perf_counter_store_irq(counter, instruction_pointer(regs));
+		perf_output_simple(counter, nmi, regs);
 		break;
 
 	case PERF_RECORD_GROUP:
-		perf_counter_handle_group(counter);
+		perf_output_group(counter, nmi);
 		break;
 	}
-
-	if (nmi) {
-		counter->wakeup_pending = 1;
-		set_perf_counter_pending();
-	} else
-		wake_up(&counter->waitq);
 }
 
 /*
@@ -1967,10 +1993,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
 
+	mutex_init(&counter->mmap_mutex);
+
 	INIT_LIST_HEAD(&counter->child_list);
 
-	counter->irqdata		= &counter->data[0];
-	counter->usrdata		= &counter->data[1];
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->wakeup_pending		= 0;
-- 
cgit v1.2.3-70-g09d2


From c7138f37f905bb7987b1f9f5a8ee73667db39f25 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 24 Mar 2009 13:18:16 +0100
Subject: perf_counter: fix perf_poll()

Impact: fix kerneltop 100% CPU usage

Only return a poll event when there's actually been one, poll_wait()
doesn't actually wait for the waitq you pass it, it only enqueues
you on it.

Only once all FDs have been iterated and none of thm returned a
poll-event will it schedule().

Also make it return POLL_HUP when there's not mmap() area to read from.

Further, fix a silly bug in the write code.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Orig-LKML-Reference: <1237897096.24918.181.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  1 +
 kernel/perf_counter.c        | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2b5e66d5ebd..48212c15b7d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -246,6 +246,7 @@ struct file;
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;
+	atomic_t			wakeup;
 	atomic_t			head;
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0dfe91094fd..affe227d56a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1161,7 +1161,16 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_counter *counter = file->private_data;
-	unsigned int events = POLLIN;
+	struct perf_mmap_data *data;
+	unsigned int events;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data)
+		events = atomic_xchg(&data->wakeup, 0);
+	else
+		events = POLL_HUP;
+	rcu_read_unlock();
 
 	poll_wait(file, &counter->waitq, wait);
 
@@ -1425,7 +1434,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
 
 	do {
 		offset = head = atomic_read(&data->head);
-		head += sizeof(u64);
+		head += size;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
 	wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
@@ -1446,6 +1455,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
 	 * generate a poll() wakeup for every page boundary crossed
 	 */
 	if (wakeup) {
+		atomic_xchg(&data->wakeup, POLL_IN);
 		__perf_counter_update_userpage(counter, data);
 		if (nmi) {
 			counter->wakeup_pending = 1;
-- 
cgit v1.2.3-70-g09d2


From 5c1481943250ab65fa5130e05ec479c93216e9f7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:23 +0100
Subject: perf_counter: output objects

Provide a {type,size} header for each output entry.

This should provide extensible output, and the ability to mix multiple streams.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113316.831607932@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 11 +++++++++
 kernel/perf_counter.c        | 53 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 48212c15b7d..c256635377d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -156,6 +156,16 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
+struct perf_event_header {
+	__u32	type;
+	__u32	size;
+};
+
+enum perf_event_type {
+	PERF_EVENT_IP		= 0,
+	PERF_EVENT_GROUP	= 1,
+};
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -260,6 +270,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
+	int 				nr_siblings;
 	struct perf_counter		*group_leader;
 	const struct hw_perf_counter_ops *hw_ops;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0422fd9bf62..d76e3112d38 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -75,8 +75,10 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 */
 	if (counter->group_leader == counter)
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
-	else
+	else {
 		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+		group_leader->nr_siblings++;
+	}
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 }
@@ -89,6 +91,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
 
+	if (counter->group_leader != counter)
+		counter->group_leader->nr_siblings--;
+
 	/*
 	 * If this was a group counter with sibling counters then
 	 * upgrade the siblings to singleton counters by adding them
@@ -381,9 +386,11 @@ static int is_software_only_group(struct perf_counter *leader)
 
 	if (!is_software_counter(leader))
 		return 0;
+
 	list_for_each_entry(counter, &leader->sibling_list, list_entry)
 		if (!is_software_counter(counter))
 			return 0;
+
 	return 1;
 }
 
@@ -1480,6 +1487,9 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	handle->offset = offset;
 }
 
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
 static void perf_output_end(struct perf_output_handle *handle, int nmi)
 {
 	if (handle->wakeup) {
@@ -1514,34 +1524,53 @@ out:
 static void perf_output_simple(struct perf_counter *counter,
 			       int nmi, struct pt_regs *regs)
 {
-	u64 entry;
+	struct {
+		struct perf_event_header header;
+		u64 ip;
+	} event;
 
-	entry = instruction_pointer(regs);
+	event.header.type = PERF_EVENT_IP;
+	event.header.size = sizeof(event);
+	event.ip = instruction_pointer(regs);
 
-	perf_output_write(counter, nmi, &entry, sizeof(entry));
+	perf_output_write(counter, nmi, &event, sizeof(event));
 }
 
-struct group_entry {
-	u64 event;
-	u64 counter;
-};
-
 static void perf_output_group(struct perf_counter *counter, int nmi)
 {
+	struct perf_output_handle handle;
+	struct perf_event_header header;
 	struct perf_counter *leader, *sub;
+	unsigned int size;
+	struct {
+		u64 event;
+		u64 counter;
+	} entry;
+	int ret;
+
+	size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+
+	ret = perf_output_begin(&handle, counter, size);
+	if (ret)
+		return;
+
+	header.type = PERF_EVENT_GROUP;
+	header.size = size;
+
+	perf_output_put(&handle, header);
 
 	leader = counter->group_leader;
 	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-		struct group_entry entry;
-
 		if (sub != counter)
 			sub->hw_ops->read(sub);
 
 		entry.event = sub->hw_event.config;
 		entry.counter = atomic64_read(&sub->count);
 
-		perf_output_write(counter, nmi, &entry, sizeof(entry));
+		perf_output_put(&handle, entry);
 	}
+
+	perf_output_end(&handle, nmi);
 }
 
 void perf_counter_output(struct perf_counter *counter,
-- 
cgit v1.2.3-70-g09d2


From ea5d20cf99db5d26d43b6d322d3ace17e08a6552 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 12:30:25 +0100
Subject: perf_counter: optionally provide the pid/tid of the sampled task

Allow cpu wide counters to profile userspace by providing what process
the sample belongs to.

This raises the first issue with the output type, lots of these
options: group, tid, callchain, etc.. are non-exclusive and could be
combined, suggesting a bitfield.

However, things like the mmap() data stream doesn't fit in that.

How to split the type field...

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Orig-LKML-Reference: <20090325113317.013775235@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  5 ++++-
 kernel/perf_counter.c        | 18 ++++++++++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c256635377d..7fdbdf8be77 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -127,8 +127,9 @@ struct perf_counter_hw_event {
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
+				include_tid    :  1, /* include the tid */
 
-				__reserved_1   : 55;
+				__reserved_1   : 54;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -164,6 +165,8 @@ struct perf_event_header {
 enum perf_event_type {
 	PERF_EVENT_IP		= 0,
 	PERF_EVENT_GROUP	= 1,
+
+	__PERF_EVENT_TID	= 0x100,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7669afe82cc..f3e1b27bc1b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1528,16 +1528,30 @@ out:
 static void perf_output_simple(struct perf_counter *counter,
 			       int nmi, struct pt_regs *regs)
 {
+	unsigned int size;
 	struct {
 		struct perf_event_header header;
 		u64 ip;
+		u32 pid, tid;
 	} event;
 
 	event.header.type = PERF_EVENT_IP;
-	event.header.size = sizeof(event);
 	event.ip = instruction_pointer(regs);
 
-	perf_output_write(counter, nmi, &event, sizeof(event));
+	size = sizeof(event);
+
+	if (counter->hw_event.include_tid) {
+		/* namespace issues */
+		event.pid = current->group_leader->pid;
+		event.tid = current->pid;
+
+		event.header.type |= __PERF_EVENT_TID;
+	} else
+		size -= sizeof(u64);
+
+	event.header.size = size;
+
+	perf_output_write(counter, nmi, &event, size);
 }
 
 static void perf_output_group(struct perf_counter *counter, int nmi)
-- 
cgit v1.2.3-70-g09d2


From 53cfbf593758916aac41db728f029986a62f1254 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 25 Mar 2009 22:46:58 +1100
Subject: perf_counter: record time running and time enabled for each counter

Impact: new functionality

Currently, if there are more counters enabled than can fit on the CPU,
the kernel will multiplex the counters on to the hardware using
round-robin scheduling.  That isn't too bad for sampling counters, but
for counting counters it means that the value read from a counter
represents some unknown fraction of the true count of events that
occurred while the counter was enabled.

This remedies the situation by keeping track of how long each counter
is enabled for, and how long it is actually on the cpu and counting
events.  These times are recorded in nanoseconds using the task clock
for per-task counters and the cpu clock for per-cpu counters.

These values can be supplied to userspace on a read from the counter.
Userspace requests that they be supplied after the counter value by
setting the PERF_FORMAT_TOTAL_TIME_ENABLED and/or
PERF_FORMAT_TOTAL_TIME_RUNNING bits in the hw_event.read_format field
when creating the counter.  (There is no way to change the read format
after the counter is created, though it would be possible to add some
way to do that.)

Using this information it is possible for userspace to scale the count
it reads from the counter to get an estimate of the true count:

true_count_estimate = count * total_time_enabled / total_time_running

This also lets userspace detect the situation where the counter never
got to go on the cpu: total_time_running == 0.

This functionality has been requested by the PAPI developers, and will
be generally needed for interpreting the count values from counting
counters correctly.

In the implementation, this keeps 5 time values (in nanoseconds) for
each counter: total_time_enabled and total_time_running are used when
the counter is in state OFF or ERROR and for reporting back to
userspace.  When the counter is in state INACTIVE or ACTIVE, it is the
tstamp_enabled, tstamp_running and tstamp_stopped values that are
relevant, and total_time_enabled and total_time_running are determined
from them.  (tstamp_stopped is only used in INACTIVE state.)  The
reason for doing it like this is that it means that only counters
being enabled or disabled at sched-in and sched-out time need to be
updated.  There are no new loops that iterate over all counters to
update total_time_enabled or total_time_running.

This also keeps separate child_total_time_running and
child_total_time_enabled fields that get added in when reporting the
totals to userspace.  They are separate fields so that they can be
atomic.  We don't want to use atomics for total_time_running,
total_time_enabled etc., because then we would have to use atomic
sequences to update them, which are slower than regular arithmetic and
memory accesses.

It is possible to measure total_time_running by adding a task_clock
counter to each group of counters, and total_time_enabled can be
measured approximately with a top-level task_clock counter (though
inaccuracies will creep in if you need to disable and enable groups
since it is not possible in general to disable/enable the top-level
task_clock counter simultaneously with another group).  However, that
adds extra overhead - I measured around 15% increase in the context
switch latency reported by lat_ctx (from lmbench) when a task_clock
counter was added to each of 2 groups, and around 25% increase when a
task_clock counter was added to each of 4 groups.  (In both cases a
top-level task-clock counter was also added.)

In contrast, the code added in this commit gives better information
with no overhead that I could measure (in fact in some cases I
measured lower times with this code, but the differences were all less
than one standard deviation).

[ v2: address review comments by Andrew Morton. ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Orig-LKML-Reference: <18890.6578.728637.139402@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |   2 +
 include/linux/perf_counter.h       |  53 +++++++++++++
 kernel/perf_counter.c              | 157 ++++++++++++++++++++++++++++++++-----
 3 files changed, 191 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d48596ab655..df007fe0cc0 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -455,6 +455,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 {
 	counter->state = PERF_COUNTER_STATE_ACTIVE;
 	counter->oncpu = cpu;
+	counter->tstamp_running += counter->ctx->time_now -
+		counter->tstamp_stopped;
 	if (is_software_counter(counter))
 		counter->hw_ops->enable(counter);
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7fdbdf8be77..6bf67ce1762 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -102,6 +102,16 @@ enum perf_counter_record_type {
 #define PERF_COUNTER_EVENT_SHIFT	0
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
+/*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_counter_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1,
+	PERF_FORMAT_TOTAL_TIME_RUNNING	=  2,
+};
+
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
@@ -281,6 +291,32 @@ struct perf_counter {
 	enum perf_counter_active_state	prev_state;
 	atomic64_t			count;
 
+	/*
+	 * These are the total time in nanoseconds that the counter
+	 * has been enabled (i.e. eligible to run, and the task has
+	 * been scheduled in, if this is a per-task counter)
+	 * and running (scheduled onto the CPU), respectively.
+	 *
+	 * They are computed from tstamp_enabled, tstamp_running and
+	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
+	 */
+	u64				total_time_enabled;
+	u64				total_time_running;
+
+	/*
+	 * These are timestamps used for computing total_time_enabled
+	 * and total_time_running when the counter is in INACTIVE or
+	 * ACTIVE state, measured in nanoseconds from an arbitrary point
+	 * in time.
+	 * tstamp_enabled: the notional time when the counter was enabled
+	 * tstamp_running: the notional time when the counter was scheduled on
+	 * tstamp_stopped: in INACTIVE state, the notional time when the
+	 *	counter was scheduled off.
+	 */
+	u64				tstamp_enabled;
+	u64				tstamp_running;
+	u64				tstamp_stopped;
+
 	struct perf_counter_hw_event	hw_event;
 	struct hw_perf_counter		hw;
 
@@ -291,6 +327,13 @@ struct perf_counter {
 	struct perf_counter		*parent;
 	struct list_head		child_list;
 
+	/*
+	 * These accumulate total time (in nanoseconds) that children
+	 * counters have been enabled and running, respectively.
+	 */
+	atomic64_t			child_total_time_enabled;
+	atomic64_t			child_total_time_running;
+
 	/*
 	 * Protect attach/detach and child_list:
 	 */
@@ -339,6 +382,16 @@ struct perf_counter_context {
 	int			nr_active;
 	int			is_active;
 	struct task_struct	*task;
+
+	/*
+	 * time_now is the current time in nanoseconds since an arbitrary
+	 * point in the past.  For per-task counters, this is based on the
+	 * task clock, and for per-cpu counters it is based on the cpu clock.
+	 * time_lost is an offset from the task/cpu clock, used to make it
+	 * appear that time only passes while the context is scheduled in.
+	 */
+	u64			time_now;
+	u64			time_lost;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 95e02575546..3b862a7988c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -116,6 +116,7 @@ counter_sched_out(struct perf_counter *counter,
 		return;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->tstamp_stopped = ctx->time_now;
 	counter->hw_ops->disable(counter);
 	counter->oncpu = -1;
 
@@ -251,6 +252,60 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Get the current time for this context.
+ * If this is a task context, we use the task's task clock,
+ * or for a per-cpu context, we use the cpu clock.
+ */
+static u64 get_context_time(struct perf_counter_context *ctx, int update)
+{
+	struct task_struct *curr = ctx->task;
+
+	if (!curr)
+		return cpu_clock(smp_processor_id());
+
+	return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime;
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_counter_context *ctx, int update)
+{
+	ctx->time_now = get_context_time(ctx, update) - ctx->time_lost;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a counter.
+ */
+static void update_counter_times(struct perf_counter *counter)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	u64 run_end;
+
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		counter->total_time_enabled = ctx->time_now -
+			counter->tstamp_enabled;
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+			run_end = counter->tstamp_stopped;
+		else
+			run_end = ctx->time_now;
+		counter->total_time_running = run_end - counter->tstamp_running;
+	}
+}
+
+/*
+ * Update total_time_enabled and total_time_running for all counters in a group.
+ */
+static void update_group_times(struct perf_counter *leader)
+{
+	struct perf_counter *counter;
+
+	update_counter_times(leader);
+	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+		update_counter_times(counter);
+}
+
 /*
  * Cross CPU call to disable a performance counter
  */
@@ -276,6 +331,8 @@ static void __perf_counter_disable(void *info)
 	 * If it is in error state, leave it in error state.
 	 */
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
+		update_context_time(ctx, 1);
+		update_counter_times(counter);
 		if (counter == counter->group_leader)
 			group_sched_out(counter, cpuctx, ctx);
 		else
@@ -320,8 +377,10 @@ static void perf_counter_disable(struct perf_counter *counter)
 	 * Since we have the lock this context can't be scheduled
 	 * in, so we can change the state safely.
 	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+		update_counter_times(counter);
 		counter->state = PERF_COUNTER_STATE_OFF;
+	}
 
 	spin_unlock_irq(&ctx->lock);
 }
@@ -366,6 +425,8 @@ counter_sched_in(struct perf_counter *counter,
 		return -EAGAIN;
 	}
 
+	counter->tstamp_running += ctx->time_now - counter->tstamp_stopped;
+
 	if (!is_software_counter(counter))
 		cpuctx->active_oncpu++;
 	ctx->nr_active++;
@@ -425,6 +486,17 @@ static int group_can_go_on(struct perf_counter *counter,
 	return can_add_hw;
 }
 
+static void add_counter_to_ctx(struct perf_counter *counter,
+			       struct perf_counter_context *ctx)
+{
+	list_add_counter(counter, ctx);
+	ctx->nr_counters++;
+	counter->prev_state = PERF_COUNTER_STATE_OFF;
+	counter->tstamp_enabled = ctx->time_now;
+	counter->tstamp_running = ctx->time_now;
+	counter->tstamp_stopped = ctx->time_now;
+}
+
 /*
  * Cross CPU call to install and enable a performance counter
  */
@@ -449,6 +521,7 @@ static void __perf_install_in_context(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
+	update_context_time(ctx, 1);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -456,9 +529,7 @@ static void __perf_install_in_context(void *info)
 	 */
 	perf_flags = hw_perf_save_disable();
 
-	list_add_counter(counter, ctx);
-	ctx->nr_counters++;
-	counter->prev_state = PERF_COUNTER_STATE_OFF;
+	add_counter_to_ctx(counter, ctx);
 
 	/*
 	 * Don't put the counter on if it is disabled or if
@@ -486,8 +557,10 @@ static void __perf_install_in_context(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned)
+		if (leader->hw_event.pinned) {
+			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
+		}
 	}
 
 	if (!err && !ctx->task && cpuctx->max_pertask)
@@ -548,10 +621,8 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list_entry)) {
-		list_add_counter(counter, ctx);
-		ctx->nr_counters++;
-	}
+	if (list_empty(&counter->list_entry))
+		add_counter_to_ctx(counter, ctx);
 	spin_unlock_irq(&ctx->lock);
 }
 
@@ -576,11 +647,13 @@ static void __perf_counter_enable(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
+	update_context_time(ctx, 1);
 
 	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -602,8 +675,10 @@ static void __perf_counter_enable(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned)
+		if (leader->hw_event.pinned) {
+			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
+		}
 	}
 
  unlock:
@@ -659,8 +734,11 @@ static void perf_counter_enable(struct perf_counter *counter)
 	 * Since we have the lock this context can't be scheduled
 	 * in, so we can change the state safely.
 	 */
-	if (counter->state == PERF_COUNTER_STATE_OFF)
+	if (counter->state == PERF_COUNTER_STATE_OFF) {
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->tstamp_enabled = ctx->time_now -
+			counter->total_time_enabled;
+	}
  out:
 	spin_unlock_irq(&ctx->lock);
 }
@@ -693,6 +771,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	ctx->is_active = 0;
 	if (likely(!ctx->nr_counters))
 		goto out;
+	update_context_time(ctx, 0);
 
 	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
@@ -797,6 +876,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	if (likely(!ctx->nr_counters))
 		goto out;
 
+	/*
+	 * Add any time since the last sched_out to the lost time
+	 * so it doesn't get included in the total_time_enabled and
+	 * total_time_running measures for counters in the context.
+	 */
+	ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now;
+
 	flags = hw_perf_save_disable();
 
 	/*
@@ -817,8 +903,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		 * If this pinned group hasn't been scheduled,
 		 * put it in error state.
 		 */
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+			update_group_times(counter);
 			counter->state = PERF_COUNTER_STATE_ERROR;
+		}
 	}
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -902,8 +990,10 @@ int perf_counter_task_disable(void)
 	perf_flags = hw_perf_save_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ERROR)
+		if (counter->state != PERF_COUNTER_STATE_ERROR) {
+			update_group_times(counter);
 			counter->state = PERF_COUNTER_STATE_OFF;
+		}
 	}
 
 	hw_perf_restore(perf_flags);
@@ -946,6 +1036,8 @@ int perf_counter_task_enable(void)
 		if (counter->state > PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->tstamp_enabled = ctx->time_now -
+			counter->total_time_enabled;
 		counter->hw_event.disabled = 0;
 	}
 	hw_perf_restore(perf_flags);
@@ -1009,10 +1101,14 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 static void __read(void *info)
 {
 	struct perf_counter *counter = info;
+	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
 
 	curr_rq_lock_irq_save(&flags);
+	if (ctx->is_active)
+		update_context_time(ctx, 1);
 	counter->hw_ops->read(counter);
+	update_counter_times(counter);
 	curr_rq_unlock_irq_restore(&flags);
 }
 
@@ -1025,6 +1121,8 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
 					 __read, counter, 1);
+	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
+		update_counter_times(counter);
 	}
 
 	return atomic64_read(&counter->count);
@@ -1137,10 +1235,8 @@ static int perf_release(struct inode *inode, struct file *file)
 static ssize_t
 perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
-	u64 cntval;
-
-	if (count < sizeof(cntval))
-		return -EINVAL;
+	u64 values[3];
+	int n;
 
 	/*
 	 * Return end-of-file for a read on a counter that is in
@@ -1151,10 +1247,24 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 		return 0;
 
 	mutex_lock(&counter->mutex);
-	cntval = perf_counter_read(counter);
+	values[0] = perf_counter_read(counter);
+	n = 1;
+	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = counter->total_time_enabled +
+			atomic64_read(&counter->child_total_time_enabled);
+	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = counter->total_time_running +
+			atomic64_read(&counter->child_total_time_running);
 	mutex_unlock(&counter->mutex);
 
-	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
+	if (count < n * sizeof(u64))
+		return -EINVAL;
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
 }
 
 static ssize_t
@@ -2290,8 +2400,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	 * Link it up in the child's context:
 	 */
 	child_counter->task = child;
-	list_add_counter(child_counter, child_ctx);
-	child_ctx->nr_counters++;
+	add_counter_to_ctx(child_counter, child_ctx);
 
 	child_counter->parent = parent_counter;
 	/*
@@ -2361,6 +2470,10 @@ static void sync_child_counter(struct perf_counter *child_counter,
 	 * Add back the child's count to the parent's count:
 	 */
 	atomic64_add(child_val, &parent_counter->count);
+	atomic64_add(child_counter->total_time_enabled,
+		     &parent_counter->child_total_time_enabled);
+	atomic64_add(child_counter->total_time_running,
+		     &parent_counter->child_total_time_running);
 
 	/*
 	 * Remove this counter from the parent's list
@@ -2395,6 +2508,7 @@ __perf_counter_exit_task(struct task_struct *child,
 	if (child != current) {
 		wait_task_inactive(child, 0);
 		list_del_init(&child_counter->list_entry);
+		update_counter_times(child_counter);
 	} else {
 		struct perf_cpu_context *cpuctx;
 		unsigned long flags;
@@ -2412,6 +2526,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
 		group_sched_out(child_counter, cpuctx, child_ctx);
+		update_counter_times(child_counter);
 
 		list_del_init(&child_counter->list_entry);
 
-- 
cgit v1.2.3-70-g09d2


From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:02 +0200
Subject: perf_counter: unify and fix delayed counter wakeup

While going over the wakeup code I noticed delayed wakeups only work
for hardware counters but basically all software counters rely on
them.

This patch unifies and generalizes the delayed wakeup to fix this
issue.

Since we're dealing with NMI context bits here, use a cmpxchg() based
single link list implementation to track counters that have pending
wakeups.

[ This should really be generic code for delayed wakeups, but since we
  cannot use cmpxchg()/xchg() in generic code, I've let it live in the
  perf_counter code. -- Eric Dumazet could use it to aggregate the
  network wakeups. ]

Furthermore, the x86 method of using TIF flags was flawed in that its
quite possible to end up setting the bit on the idle task, loosing the
wakeup.

The powerpc method uses per-cpu storage and does appear to be
sufficient.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.153932974@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/include/asm/hw_irq.h   |   4 +-
 arch/powerpc/kernel/irq.c           |   2 +-
 arch/powerpc/kernel/perf_counter.c  |  22 +------
 arch/x86/include/asm/perf_counter.h |   5 +-
 arch/x86/include/asm/thread_info.h  |   4 +-
 arch/x86/kernel/cpu/perf_counter.c  |  29 --------
 arch/x86/kernel/signal.c            |   6 --
 include/linux/perf_counter.h        |  15 +++--
 kernel/perf_counter.c               | 128 +++++++++++++++++++++++++++++++++---
 kernel/timer.c                      |   3 +
 10 files changed, 142 insertions(+), 76 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index cb32d571c9c..20a44d0c9fd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags)
 struct irq_chip;
 
 #ifdef CONFIG_PERF_COUNTERS
-static inline unsigned long get_perf_counter_pending(void)
+static inline unsigned long test_perf_counter_pending(void)
 {
 	unsigned long x;
 
@@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void);
 
 #else
 
-static inline unsigned long get_perf_counter_pending(void)
+static inline unsigned long test_perf_counter_pending(void)
 {
 	return 0;
 }
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 469e9635ff0..2cd471f92fe 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en)
 			iseries_handle_interrupts();
 	}
 
-	if (get_perf_counter_pending()) {
+	if (test_perf_counter_pending()) {
 		clear_perf_counter_pending();
 		perf_counter_do_pending();
 	}
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index df007fe0cc0..cde720fc495 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -649,24 +649,6 @@ hw_perf_counter_init(struct perf_counter *counter)
 	return &power_perf_ops;
 }
 
-/*
- * Handle wakeups.
- */
-void perf_counter_do_pending(void)
-{
-	int i;
-	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-	struct perf_counter *counter;
-
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter && counter->wakeup_pending) {
-			counter->wakeup_pending = 0;
-			wake_up(&counter->waitq);
-		}
-	}
-}
-
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
 	struct perf_counter *counter;
 	long val;
-	int need_wakeup = 0, found = 0;
+	int found = 0;
 
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
@@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
 	 * immediately; otherwise we'll have do the wakeup when interrupts
 	 * get soft-enabled.
 	 */
-	if (get_perf_counter_pending() && regs->softe) {
+	if (test_perf_counter_pending() && regs->softe) {
 		irq_enter();
 		clear_perf_counter_pending();
 		perf_counter_do_pending();
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 1662043b340..e2b0e66b235 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,8 +84,9 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
-#define set_perf_counter_pending()	\
-		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
+#define set_perf_counter_pending()	do { } while (0)
+#define clear_perf_counter_pending()	do { } while (0)
+#define test_perf_counter_pending()	(0)
 
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 3ffd5d2a367..8820a73ae09 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,7 +83,6 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
-#define TIF_PERF_COUNTERS	11	/* notify perf counter work */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* 32bit process */
 #define TIF_FORK		18	/* ret_from_fork */
@@ -107,7 +106,6 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
-#define _TIF_PERF_COUNTERS	(1 << TIF_PERF_COUNTERS)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
@@ -141,7 +139,7 @@ struct thread_info {
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 3f95b0cdc55..7aab177fb56 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -227,7 +227,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		 */
 		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
 	}
-	counter->wakeup_pending = 0;
 
 	return 0;
 }
@@ -773,34 +772,6 @@ void smp_perf_counter_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
-/*
- * This handler is triggered by NMI contexts:
- */
-void perf_counter_notify(struct pt_regs *regs)
-{
-	struct cpu_hw_counters *cpuc;
-	unsigned long flags;
-	int bit, cpu;
-
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
-
-	for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
-		struct perf_counter *counter = cpuc->counters[bit];
-
-		if (!counter)
-			continue;
-
-		if (counter->wakeup_pending) {
-			counter->wakeup_pending = 0;
-			wake_up(&counter->waitq);
-		}
-	}
-
-	local_irq_restore(flags);
-}
-
 void perf_counters_lapic_init(int nmi)
 {
 	u32 apic_val;
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 611615a92c9..0a813b17b17 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,7 +6,6 @@
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  */
-#include <linux/perf_counter.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -872,11 +871,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 		tracehook_notify_resume(regs);
 	}
 
-	if (thread_info_flags & _TIF_PERF_COUNTERS) {
-		clear_thread_flag(TIF_PERF_COUNTERS);
-		perf_counter_notify(regs);
-	}
-
 #ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
 #endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6bf67ce1762..0d833228eee 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -275,6 +275,10 @@ struct perf_mmap_data {
 	void 				*data_pages[0];
 };
 
+struct perf_wakeup_entry {
+	struct perf_wakeup_entry *next;
+};
+
 /**
  * struct perf_counter - performance counter kernel representation:
  */
@@ -350,7 +354,7 @@ struct perf_counter {
 	/* poll related */
 	wait_queue_head_t		waitq;
 	/* optional: for NMIs */
-	int				wakeup_pending;
+	struct perf_wakeup_entry	wakeup;
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
@@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
-extern void perf_counter_notify(struct pt_regs *regs);
+extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void perf_counter_unthrottle(void);
 extern u64 hw_perf_save_disable(void);
@@ -461,7 +465,7 @@ static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
-static inline void perf_counter_notify(struct pt_regs *regs)		{ }
+static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_counter_unthrottle(void)			{ }
 static inline void hw_perf_restore(u64 ctrl)				{ }
@@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void)		      { return 0; }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
-static inline void perf_swcounter_event(u32 event, u64 nr,
-					int nmi, struct pt_regs *regs)	{ }
+static inline void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)	{ }
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b862a7988c..f70ff80e79d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head)
 	kfree(counter);
 }
 
+static void perf_pending_sync(struct perf_counter *counter);
+
 static void free_counter(struct perf_counter *counter)
 {
+	perf_pending_sync(counter);
+
 	if (counter->destroy)
 		counter->destroy(counter);
 
@@ -1528,6 +1532,118 @@ static const struct file_operations perf_fops = {
 	.mmap			= perf_mmap,
 };
 
+/*
+ * Perf counter wakeup
+ *
+ * If there's data, ensure we set the poll() state and publish everything
+ * to user-space before waking everybody up.
+ */
+
+void perf_counter_wakeup(struct perf_counter *counter)
+{
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (data) {
+		(void)atomic_xchg(&data->wakeup, POLL_IN);
+		__perf_counter_update_userpage(counter, data);
+	}
+	rcu_read_unlock();
+
+	wake_up_all(&counter->waitq);
+}
+
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_counter *counter)
+{
+	struct perf_wakeup_entry **head;
+	struct perf_wakeup_entry *prev, *next;
+
+	if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	head = &get_cpu_var(perf_wakeup_head);
+
+	do {
+		prev = counter->wakeup.next = *head;
+		next = &counter->wakeup;
+	} while (cmpxchg(head, prev, next) != prev);
+
+	set_perf_counter_pending();
+
+	put_cpu_var(perf_wakeup_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_wakeup_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		struct perf_counter *counter = container_of(list,
+				struct perf_counter, wakeup);
+
+		list = list->next;
+
+		counter->wakeup.next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		perf_counter_wakeup(counter);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_counter *counter)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return counter->wakeup.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_counter *counter)
+{
+	wait_event(counter->waitq, perf_not_pending(counter));
+}
+
+void perf_counter_do_pending(void)
+{
+	__perf_pending_run();
+}
+
 /*
  * Output
  */
@@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle,
 static void perf_output_end(struct perf_output_handle *handle, int nmi)
 {
 	if (handle->wakeup) {
-		(void)atomic_xchg(&handle->data->wakeup, POLL_IN);
-		__perf_counter_update_userpage(handle->counter, handle->data);
-		if (nmi) {
-			handle->counter->wakeup_pending = 1;
-			set_perf_counter_pending();
-		} else
-			wake_up(&handle->counter->waitq);
+		if (nmi)
+			perf_pending_queue(handle->counter);
+		else
+			perf_counter_wakeup(handle->counter);
 	}
 	rcu_read_unlock();
 }
@@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
-	counter->wakeup_pending		= 0;
 	counter->group_leader		= group_leader;
 	counter->hw_ops			= NULL;
 	counter->ctx			= ctx;
diff --git a/kernel/timer.c b/kernel/timer.c
index b4555568b4e..672ca25fbc4 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1167,6 +1168,8 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
+	perf_counter_do_pending();
+
 	hrtimer_run_pending();
 
 	if (time_after_eq(jiffies, base->timer_jiffies))
-- 
cgit v1.2.3-70-g09d2


From 38ff667b321b00f5e6830e93fb4ab11a653a2920 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:03 +0200
Subject: perf_counter: fix update_userpage()

It just occured to me it is possible to have multiple contending
updates of the userpage (mmap information vs overflow vs counter).
This would break the seqlock logic.

It appear the arch code uses this from NMI context, so we cannot
possibly serialize its use, therefore separate the data_head update
from it and let it return to its original use.

The arch code needs to make sure there are no contending callers by
disabling the counter before using it -- powerpc appears to do this
nicely.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171023.241410660@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 35 +++++++++++++++++++++++++++++++++++
 kernel/perf_counter.c        | 38 +++++++++++++++++++++++---------------
 2 files changed, 58 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0d833228eee..8ac18852dcf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -160,10 +160,45 @@ struct perf_counter_hw_event {
 struct perf_counter_mmap_page {
 	__u32	version;		/* version number of this structure */
 	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 * The index and offset should be read atomically using the seqlock:
+	 *
+	 *   __u32 seq, index;
+	 *   __s64 offset;
+	 *
+	 * again:
+	 *   rmb();
+	 *   seq = pc->lock;
+	 *
+	 *   if (unlikely(seq & 1)) {
+	 *     cpu_relax();
+	 *     goto again;
+	 *   }
+	 *
+	 *   index = pc->index;
+	 *   offset = pc->offset;
+	 *
+	 *   rmb();
+	 *   if (pc->lock != seq)
+	 *     goto again;
+	 *
+	 * After this, index contains architecture specific counter index + 1,
+	 * so that 0 means unavailable, offset contains the value to be added
+	 * to the result of the raw timer read to obtain this counter's value.
+	 */
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
 
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading this value should issue an rmb(), on SMP capable
+	 * platforms, after reading this value -- see perf_counter_wakeup().
+	 */
 	__u32   data_head;		/* head in the data section */
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f70ff80e79d..c95e92329b9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1316,10 +1316,22 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-static void __perf_counter_update_userpage(struct perf_counter *counter,
-					   struct perf_mmap_data *data)
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_counter_update_userpage(struct perf_counter *counter)
 {
-	struct perf_counter_mmap_page *userpg = data->user_page;
+	struct perf_mmap_data *data;
+	struct perf_counter_mmap_page *userpg;
+
+	rcu_read_lock();
+	data = rcu_dereference(counter->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
 
 	/*
 	 * Disable preemption so as to not let the corresponding user-space
@@ -1333,20 +1345,10 @@ static void __perf_counter_update_userpage(struct perf_counter *counter,
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
-	userpg->data_head = atomic_read(&data->head);
 	smp_wmb();
 	++userpg->lock;
 	preempt_enable();
-}
-
-void perf_counter_update_userpage(struct perf_counter *counter)
-{
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data)
-		__perf_counter_update_userpage(counter, data);
+unlock:
 	rcu_read_unlock();
 }
 
@@ -1547,7 +1549,13 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	data = rcu_dereference(counter->data);
 	if (data) {
 		(void)atomic_xchg(&data->wakeup, POLL_IN);
-		__perf_counter_update_userpage(counter, data);
+		/*
+		 * Ensure all data writes are issued before updating the
+		 * user-space data head information. The matching rmb()
+		 * will be in userspace after reading this value.
+		 */
+		smp_wmb();
+		data->user_page->data_head = atomic_read(&data->head);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-70-g09d2


From 0a4a93919bdc5cee48fe4367591e8e0449c1086c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:05 +0200
Subject: perf_counter: executable mmap() information

Currently the profiling information returns userspace IPs but no way
to correlate them to userspace code. Userspace could look into
/proc/$pid/maps but that might not be current or even present anymore
at the time of analyzing the IPs.

Therefore provide means to track the mmap information and provide it
in the output stream.

XXX: only covers mmap()/munmap(), mremap() and mprotect() are missing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Orig-LKML-Reference: <20090330171023.417259499@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  24 ++++++-
 kernel/perf_counter.c        | 145 +++++++++++++++++++++++++++++++++++++++++++
 mm/mmap.c                    |  10 +++
 3 files changed, 177 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8ac18852dcf..037a81145ac 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -137,9 +137,11 @@ struct perf_counter_hw_event {
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
-				include_tid    :  1, /* include the tid */
+				include_tid    :  1, /* include the tid       */
+				mmap           :  1, /* include mmap data     */
+				munmap         :  1, /* include munmap data   */
 
-				__reserved_1   : 54;
+				__reserved_1   : 52;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -211,6 +213,9 @@ enum perf_event_type {
 	PERF_EVENT_IP		= 0,
 	PERF_EVENT_GROUP	= 1,
 
+	PERF_EVENT_MMAP		= 2,
+	PERF_EVENT_MUNMAP	= 3,
+
 	__PERF_EVENT_TID	= 0x100,
 };
 
@@ -491,6 +496,12 @@ static inline int is_software_counter(struct perf_counter *counter)
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
 
+extern void perf_counter_mmap(unsigned long addr, unsigned long len,
+			      unsigned long pgoff, struct file *file);
+
+extern void perf_counter_munmap(unsigned long addr, unsigned long len,
+				unsigned long pgoff, struct file *file);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -511,6 +522,15 @@ static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 static inline void
 perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)	{ }
 
+
+static inline void
+perf_counter_mmap(unsigned long addr, unsigned long len,
+		  unsigned long pgoff, struct file *file)		{ }
+
+static inline void
+perf_counter_munmap(unsigned long addr, unsigned long len,
+		    unsigned long pgoff, struct file *file) 		{ }
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c95e92329b9..f35e89e3d6a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -25,6 +25,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
+#include <linux/dcache.h>
 
 #include <asm/irq_regs.h>
 
@@ -1843,6 +1844,150 @@ void perf_counter_output(struct perf_counter *counter,
 	}
 }
 
+/*
+ * mmap tracking
+ */
+
+struct perf_mmap_event {
+	struct file	*file;
+	char		*file_name;
+	int		file_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+		u64				start;
+		u64				len;
+		u64				pgoff;
+	} event;
+};
+
+static void perf_counter_mmap_output(struct perf_counter *counter,
+				     struct perf_mmap_event *mmap_event)
+{
+	struct perf_output_handle handle;
+	int size = mmap_event->event.header.size;
+	int ret = perf_output_begin(&handle, counter, size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, mmap_event->event);
+	perf_output_copy(&handle, mmap_event->file_name,
+				   mmap_event->file_size);
+	perf_output_end(&handle, 0);
+}
+
+static int perf_counter_mmap_match(struct perf_counter *counter,
+				   struct perf_mmap_event *mmap_event)
+{
+	if (counter->hw_event.mmap &&
+	    mmap_event->event.header.type == PERF_EVENT_MMAP)
+		return 1;
+
+	if (counter->hw_event.munmap &&
+	    mmap_event->event.header.type == PERF_EVENT_MUNMAP)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
+				  struct perf_mmap_event *mmap_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_mmap_match(counter, mmap_event))
+			perf_counter_mmap_output(counter, mmap_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct file *file = mmap_event->file;
+	unsigned int size;
+	char tmp[16];
+	char *buf = NULL;
+	char *name;
+
+	if (file) {
+		buf = kzalloc(PATH_MAX, GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = dentry_path(file->f_dentry, buf, PATH_MAX);
+		if (IS_ERR(name)) {
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
+		}
+	} else {
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
+	}
+
+got_name:
+	size = ALIGN(strlen(name), sizeof(u64));
+
+	mmap_event->file_name = name;
+	mmap_event->file_size = size;
+
+	mmap_event->event.header.size = sizeof(mmap_event->event) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
+	put_cpu_var(perf_cpu_context);
+
+	perf_counter_mmap_ctx(&current->perf_counter_ctx, mmap_event);
+
+	kfree(buf);
+}
+
+void perf_counter_mmap(unsigned long addr, unsigned long len,
+		       unsigned long pgoff, struct file *file)
+{
+	struct perf_mmap_event mmap_event = {
+		.file   = file,
+		.event  = {
+			.header = { .type = PERF_EVENT_MMAP, },
+			.pid	= current->group_leader->pid,
+			.tid	= current->pid,
+			.start  = addr,
+			.len    = len,
+			.pgoff  = pgoff,
+		},
+	};
+
+	perf_counter_mmap_event(&mmap_event);
+}
+
+void perf_counter_munmap(unsigned long addr, unsigned long len,
+			 unsigned long pgoff, struct file *file)
+{
+	struct perf_mmap_event mmap_event = {
+		.file   = file,
+		.event  = {
+			.header = { .type = PERF_EVENT_MUNMAP, },
+			.pid	= current->group_leader->pid,
+			.tid	= current->pid,
+			.start  = addr,
+			.len    = len,
+			.pgoff  = pgoff,
+		},
+	};
+
+	perf_counter_mmap_event(&mmap_event);
+}
+
 /*
  * Generic software counter infrastructure
  */
diff --git a/mm/mmap.c b/mm/mmap.c
index 4a3841186c1..1df63f614f9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,6 +28,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1223,6 +1224,9 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
+	if (vm_flags & VM_EXEC)
+		perf_counter_mmap(addr, len, pgoff, file);
+
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
@@ -1756,6 +1760,12 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 	do {
 		long nrpages = vma_pages(vma);
 
+		if (vma->vm_flags & VM_EXEC) {
+			perf_counter_munmap(vma->vm_start,
+					nrpages << PAGE_SHIFT,
+					vma->vm_pgoff, vma->vm_file);
+		}
+
 		mm->total_vm -= nrpages;
 		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
 		vma = remove_vma(vma);
-- 
cgit v1.2.3-70-g09d2


From 5ed00415e304203a0a9dcaef226d6d3f1106070e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:12 +0200
Subject: perf_counter: re-arrange the perf_event_type

Breaks ABI yet again :-)

Change the event type so that [0, 2^31-1] are regular event types, but
[2^31, 2^32-1] forms a bitmask for overflow events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171024.047961770@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  6 +++--
 kernel/perf_counter.c        | 56 ++++++++++++++++++++------------------------
 2 files changed, 29 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 037a81145ac..edf5bfb7ff5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -210,13 +210,15 @@ struct perf_event_header {
 };
 
 enum perf_event_type {
-	PERF_EVENT_IP		= 0,
+
 	PERF_EVENT_GROUP	= 1,
 
 	PERF_EVENT_MMAP		= 2,
 	PERF_EVENT_MUNMAP	= 3,
 
-	__PERF_EVENT_TID	= 0x100,
+	PERF_EVENT_OVERFLOW	= 1UL << 31,
+	__PERF_EVENT_IP		= 1UL << 30,
+	__PERF_EVENT_TID	= 1UL << 29,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4471e7e2c10..d93e9ddf784 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1754,50 +1754,44 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-static int perf_output_write(struct perf_counter *counter, int nmi,
-			     void *buf, ssize_t size)
-{
-	struct perf_output_handle handle;
-	int ret;
-
-	ret = perf_output_begin(&handle, counter, size, nmi);
-	if (ret)
-		goto out;
-
-	perf_output_copy(&handle, buf, size);
-	perf_output_end(&handle);
-
-out:
-	return ret;
-}
-
 static void perf_output_simple(struct perf_counter *counter,
 			       int nmi, struct pt_regs *regs)
 {
-	unsigned int size;
+	int ret;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	u64 ip;
 	struct {
-		struct perf_event_header header;
-		u64 ip;
 		u32 pid, tid;
-	} event;
+	} tid_entry;
 
-	event.header.type = PERF_EVENT_IP;
-	event.ip = instruction_pointer(regs);
+	header.type = PERF_EVENT_OVERFLOW;
+	header.size = sizeof(header);
 
-	size = sizeof(event);
+	ip = instruction_pointer(regs);
+	header.type |= __PERF_EVENT_IP;
+	header.size += sizeof(ip);
 
 	if (counter->hw_event.include_tid) {
 		/* namespace issues */
-		event.pid = current->group_leader->pid;
-		event.tid = current->pid;
+		tid_entry.pid = current->group_leader->pid;
+		tid_entry.tid = current->pid;
 
-		event.header.type |= __PERF_EVENT_TID;
-	} else
-		size -= sizeof(u64);
+		header.type |= __PERF_EVENT_TID;
+		header.size += sizeof(tid_entry);
+	}
 
-	event.header.size = size;
+	ret = perf_output_begin(&handle, counter, header.size, nmi);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, header);
+	perf_output_put(&handle, ip);
+
+	if (counter->hw_event.include_tid)
+		perf_output_put(&handle, tid_entry);
 
-	perf_output_write(counter, nmi, &event, size);
+	perf_output_end(&handle);
 }
 
 static void perf_output_group(struct perf_counter *counter, int nmi)
-- 
cgit v1.2.3-70-g09d2


From 394ee07623cf556c8daae2b3c00cf5fea47f0811 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 30 Mar 2009 19:07:14 +0200
Subject: perf_counter: provide generic callchain bits

Provide the generic callchain support bits. If hw_event->callchain is
set the arch specific perf_callchain() function is called upon to
provide a perf_callchain_entry structure filled with the current
callchain.

If it does so, it is added to the overflow output event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090330171024.254266860@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 13 ++++++++++++-
 kernel/perf_counter.c        | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index edf5bfb7ff5..43083afffe0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -140,8 +140,9 @@ struct perf_counter_hw_event {
 				include_tid    :  1, /* include the tid       */
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
+				callchain      :  1, /* add callchain data    */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -219,6 +220,7 @@ enum perf_event_type {
 	PERF_EVENT_OVERFLOW	= 1UL << 31,
 	__PERF_EVENT_IP		= 1UL << 30,
 	__PERF_EVENT_TID	= 1UL << 29,
+	__PERF_EVENT_CALLCHAIN  = 1UL << 28,
 };
 
 #ifdef __KERNEL__
@@ -504,6 +506,15 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
+#define MAX_STACK_DEPTH		255
+
+struct perf_callchain_entry {
+	u64	nr;
+	u64	ip[MAX_STACK_DEPTH];
+};
+
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d93e9ddf784..860cdc26bd7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1653,6 +1653,17 @@ void perf_counter_do_pending(void)
 	__perf_pending_run();
 }
 
+/*
+ * Callchain support -- arch specific
+ */
+
+struct perf_callchain_entry *
+__attribute__((weak))
+perf_callchain(struct pt_regs *regs)
+{
+	return NULL;
+}
+
 /*
  * Output
  */
@@ -1764,6 +1775,8 @@ static void perf_output_simple(struct perf_counter *counter,
 	struct {
 		u32 pid, tid;
 	} tid_entry;
+	struct perf_callchain_entry *callchain = NULL;
+	int callchain_size = 0;
 
 	header.type = PERF_EVENT_OVERFLOW;
 	header.size = sizeof(header);
@@ -1781,6 +1794,17 @@ static void perf_output_simple(struct perf_counter *counter,
 		header.size += sizeof(tid_entry);
 	}
 
+	if (counter->hw_event.callchain) {
+		callchain = perf_callchain(regs);
+
+		if (callchain) {
+			callchain_size = (1 + callchain->nr) * sizeof(u64);
+
+			header.type |= __PERF_EVENT_CALLCHAIN;
+			header.size += callchain_size;
+		}
+	}
+
 	ret = perf_output_begin(&handle, counter, header.size, nmi);
 	if (ret)
 		return;
@@ -1791,6 +1815,9 @@ static void perf_output_simple(struct perf_counter *counter,
 	if (counter->hw_event.include_tid)
 		perf_output_put(&handle, tid_entry);
 
+	if (callchain)
+		perf_output_copy(&handle, callchain, callchain_size);
+
 	perf_output_end(&handle);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8a057d84912f36e53f970c4d177cb4bb6b2f9e08 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:11:59 +0200
Subject: perf_counter: move the event overflow output bits to record_type

Per suggestion from Paul, move the event overflow bits to record_type
and sanitize the enums a bit.

Breaks the ABI -- again ;-)

Suggested-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.151921176@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  50 +++++++++++----------
 kernel/perf_counter.c        | 101 +++++++++++++++++--------------------------
 2 files changed, 68 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 43083afffe0..06a6fba9f53 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -73,15 +73,6 @@ enum sw_event_ids {
 	PERF_SW_EVENTS_MAX		= 7,
 };
 
-/*
- * IRQ-notification data record type:
- */
-enum perf_counter_record_type {
-	PERF_RECORD_SIMPLE		= 0,
-	PERF_RECORD_IRQ			= 1,
-	PERF_RECORD_GROUP		= 2,
-};
-
 #define __PERF_COUNTER_MASK(name) 			\
 	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
 	 PERF_COUNTER_##name##_SHIFT)
@@ -102,6 +93,17 @@ enum perf_counter_record_type {
 #define PERF_COUNTER_EVENT_SHIFT	0
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
+/*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_record_format {
+	PERF_RECORD_IP		= 1U << 0,
+	PERF_RECORD_TID		= 1U << 1,
+	PERF_RECORD_GROUP	= 1U << 2,
+	PERF_RECORD_CALLCHAIN	= 1U << 3,
+};
+
 /*
  * Bits that can be set in hw_event.read_format to request that
  * reads on the counter should return the indicated quantities,
@@ -125,8 +127,8 @@ struct perf_counter_hw_event {
 	__u64			config;
 
 	__u64			irq_period;
-	__u64			record_type;
-	__u64			read_format;
+	__u32			record_type;
+	__u32			read_format;
 
 	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
@@ -137,12 +139,10 @@ struct perf_counter_hw_event {
 				exclude_kernel :  1, /* ditto kernel          */
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
-				include_tid    :  1, /* include the tid       */
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
-				callchain      :  1, /* add callchain data    */
 
-				__reserved_1   : 51;
+				__reserved_1   : 53;
 
 	__u32			extra_config_len;
 	__u32			__reserved_4;
@@ -212,15 +212,21 @@ struct perf_event_header {
 
 enum perf_event_type {
 
-	PERF_EVENT_GROUP	= 1,
-
-	PERF_EVENT_MMAP		= 2,
-	PERF_EVENT_MUNMAP	= 3,
+	PERF_EVENT_MMAP			= 1,
+	PERF_EVENT_MUNMAP		= 2,
 
-	PERF_EVENT_OVERFLOW	= 1UL << 31,
-	__PERF_EVENT_IP		= 1UL << 30,
-	__PERF_EVENT_TID	= 1UL << 29,
-	__PERF_EVENT_CALLCHAIN  = 1UL << 28,
+	/*
+	 * Half the event type space is reserved for the counter overflow
+	 * bitfields, as found in hw_event.record_type.
+	 *
+	 * These events will have types of the form:
+	 *   PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } *
+	 */
+	PERF_EVENT_COUNTER_OVERFLOW	= 1UL << 31,
+	__PERF_EVENT_IP			= PERF_RECORD_IP,
+	__PERF_EVENT_TID		= PERF_RECORD_TID,
+	__PERF_EVENT_GROUP		= PERF_RECORD_GROUP,
+	__PERF_EVENT_CALLCHAIN		= PERF_RECORD_CALLCHAIN,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 860cdc26bd7..995063df910 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-static void perf_output_simple(struct perf_counter *counter,
-			       int nmi, struct pt_regs *regs)
+void perf_counter_output(struct perf_counter *counter,
+			 int nmi, struct pt_regs *regs)
 {
 	int ret;
+	u64 record_type = counter->hw_event.record_type;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	u64 ip;
 	struct {
 		u32 pid, tid;
 	} tid_entry;
+	struct {
+		u64 event;
+		u64 counter;
+	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
 
-	header.type = PERF_EVENT_OVERFLOW;
+	header.type = PERF_EVENT_COUNTER_OVERFLOW;
 	header.size = sizeof(header);
 
-	ip = instruction_pointer(regs);
-	header.type |= __PERF_EVENT_IP;
-	header.size += sizeof(ip);
+	if (record_type & PERF_RECORD_IP) {
+		ip = instruction_pointer(regs);
+		header.type |= __PERF_EVENT_IP;
+		header.size += sizeof(ip);
+	}
 
-	if (counter->hw_event.include_tid) {
+	if (record_type & PERF_RECORD_TID) {
 		/* namespace issues */
 		tid_entry.pid = current->group_leader->pid;
 		tid_entry.tid = current->pid;
@@ -1794,7 +1801,13 @@ static void perf_output_simple(struct perf_counter *counter,
 		header.size += sizeof(tid_entry);
 	}
 
-	if (counter->hw_event.callchain) {
+	if (record_type & PERF_RECORD_GROUP) {
+		header.type |= __PERF_EVENT_GROUP;
+		header.size += sizeof(u64) +
+			counter->nr_siblings * sizeof(group_entry);
+	}
+
+	if (record_type & PERF_RECORD_CALLCHAIN) {
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
@@ -1810,69 +1823,35 @@ static void perf_output_simple(struct perf_counter *counter,
 		return;
 
 	perf_output_put(&handle, header);
-	perf_output_put(&handle, ip);
 
-	if (counter->hw_event.include_tid)
-		perf_output_put(&handle, tid_entry);
+	if (record_type & PERF_RECORD_IP)
+		perf_output_put(&handle, ip);
 
-	if (callchain)
-		perf_output_copy(&handle, callchain, callchain_size);
-
-	perf_output_end(&handle);
-}
-
-static void perf_output_group(struct perf_counter *counter, int nmi)
-{
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_counter *leader, *sub;
-	unsigned int size;
-	struct {
-		u64 event;
-		u64 counter;
-	} entry;
-	int ret;
-
-	size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+	if (record_type & PERF_RECORD_TID)
+		perf_output_put(&handle, tid_entry);
 
-	ret = perf_output_begin(&handle, counter, size, nmi);
-	if (ret)
-		return;
+	if (record_type & PERF_RECORD_GROUP) {
+		struct perf_counter *leader, *sub;
+		u64 nr = counter->nr_siblings;
 
-	header.type = PERF_EVENT_GROUP;
-	header.size = size;
+		perf_output_put(&handle, nr);
 
-	perf_output_put(&handle, header);
+		leader = counter->group_leader;
+		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+			if (sub != counter)
+				sub->hw_ops->read(sub);
 
-	leader = counter->group_leader;
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-		if (sub != counter)
-			sub->hw_ops->read(sub);
+			group_entry.event = sub->hw_event.config;
+			group_entry.counter = atomic64_read(&sub->count);
 
-		entry.event = sub->hw_event.config;
-		entry.counter = atomic64_read(&sub->count);
-
-		perf_output_put(&handle, entry);
+			perf_output_put(&handle, group_entry);
+		}
 	}
 
-	perf_output_end(&handle);
-}
-
-void perf_counter_output(struct perf_counter *counter,
-			 int nmi, struct pt_regs *regs)
-{
-	switch (counter->hw_event.record_type) {
-	case PERF_RECORD_SIMPLE:
-		return;
-
-	case PERF_RECORD_IRQ:
-		perf_output_simple(counter, nmi, regs);
-		break;
+	if (callchain)
+		perf_output_copy(&handle, callchain, callchain_size);
 
-	case PERF_RECORD_GROUP:
-		perf_output_group(counter, nmi);
-		break;
-	}
+	perf_output_end(&handle);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From c457810ab4a825161aec6ef71b581e1bc8febd1a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:01 +0200
Subject: perf_counter: per event wakeups

By request, provide a way to request a wakeup every 'n' events instead
of every page of output.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.323309784@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  3 ++-
 kernel/perf_counter.c        | 10 +++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 06a6fba9f53..5428ba120d7 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -145,7 +145,7 @@ struct perf_counter_hw_event {
 				__reserved_1   : 53;
 
 	__u32			extra_config_len;
-	__u32			__reserved_4;
+	__u32			wakeup_events;	/* wakeup every n events */
 
 	__u64			__reserved_2;
 	__u64			__reserved_3;
@@ -321,6 +321,7 @@ struct perf_mmap_data {
 	int				nr_pages;
 	atomic_t			wakeup;
 	atomic_t			head;
+	atomic_t			events;
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 995063df910..9bcab10e735 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1760,7 +1760,15 @@ static void perf_output_copy(struct perf_output_handle *handle,
 
 static void perf_output_end(struct perf_output_handle *handle)
 {
-	if (handle->wakeup)
+	int wakeup_events = handle->counter->hw_event.wakeup_events;
+
+	if (wakeup_events) {
+		int events = atomic_inc_return(&handle->data->events);
+		if (events >= wakeup_events) {
+			atomic_sub(wakeup_events, &handle->data->events);
+			__perf_output_wakeup(handle);
+		}
+	} else if (handle->wakeup)
 		__perf_output_wakeup(handle);
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3-70-g09d2


From 5872bdb88a35fae7d224bd6b21e5f377e854ccfc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:03 +0200
Subject: perf_counter: add more context information

Put in counts to tell which ips belong to what context.

  -----
   | |  hv
   | --
nr | |  kernel
   | --
   | |  user
  -----

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.493101305@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 9 +++++++++
 include/linux/perf_counter.h       | 4 ++--
 kernel/perf_counter.c              | 2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 2a946a160ca..c74e20d593a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1088,6 +1088,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
 	unsigned long bp;
 	char *stack;
+	int nr = entry->nr;
 
 	callchain_store(entry, instruction_pointer(regs));
 
@@ -1099,6 +1100,8 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 #endif
 
 	dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+
+	entry->kernel = entry->nr - nr;
 }
 
 
@@ -1128,6 +1131,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
 	struct stack_frame frame;
 	const void __user *fp;
+	int nr = entry->nr;
 
 	regs = (struct pt_regs *)current->thread.sp0 - 1;
 	fp   = (void __user *)regs->bp;
@@ -1147,6 +1151,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		callchain_store(entry, frame.return_address);
 		fp = frame.next_fp;
 	}
+
+	entry->user = entry->nr - nr;
 }
 
 static void
@@ -1182,6 +1188,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 		entry = &__get_cpu_var(irq_entry);
 
 	entry->nr = 0;
+	entry->hv = 0;
+	entry->kernel = 0;
+	entry->user = 0;
 
 	perf_do_callchain(regs, entry);
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5428ba120d7..90cce0c74a0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -513,10 +513,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
-#define MAX_STACK_DEPTH		255
+#define MAX_STACK_DEPTH		254
 
 struct perf_callchain_entry {
-	u64	nr;
+	u32	nr, hv, kernel, user;
 	u64	ip[MAX_STACK_DEPTH];
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 9bcab10e735..f105a6e696c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1819,7 +1819,7 @@ void perf_counter_output(struct perf_counter *counter,
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
-			callchain_size = (1 + callchain->nr) * sizeof(u64);
+			callchain_size = (2 + callchain->nr) * sizeof(u64);
 
 			header.type |= __PERF_EVENT_CALLCHAIN;
 			header.size += callchain_size;
-- 
cgit v1.2.3-70-g09d2


From 92f22a3865abe87eea2609a6f8e5be5123f7ce4f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 2 Apr 2009 11:12:04 +0200
Subject: perf_counter: update mmap() counter read

Paul noted that we don't need SMP barriers for the mmap() counter read
because its always on the same cpu (otherwise you can't access the hw
counter anyway).

So remove the SMP barriers and replace them with regular compiler
barriers.

Further, update the comment to include a race free method of reading
said hardware counter. The primary change is putting the pmc_read
inside the seq-loop, otherwise we can still race and read rubbish.

Noticed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.577951445@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 22 ++++++++++------------
 kernel/perf_counter.c        |  4 ++--
 2 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 90cce0c74a0..f2b914de3f0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -167,30 +167,28 @@ struct perf_counter_mmap_page {
 	/*
 	 * Bits needed to read the hw counters in user-space.
 	 *
-	 * The index and offset should be read atomically using the seqlock:
-	 *
-	 *   __u32 seq, index;
-	 *   __s64 offset;
+	 *   u32 seq;
+	 *   s64 count;
 	 *
 	 * again:
-	 *   rmb();
 	 *   seq = pc->lock;
-	 *
 	 *   if (unlikely(seq & 1)) {
 	 *     cpu_relax();
 	 *     goto again;
 	 *   }
 	 *
-	 *   index = pc->index;
-	 *   offset = pc->offset;
+	 *   if (pc->index) {
+	 *     count = pmc_read(pc->index - 1);
+	 *     count += pc->offset;
+	 *   } else
+	 *     goto regular_read;
 	 *
-	 *   rmb();
+	 *   barrier();
 	 *   if (pc->lock != seq)
 	 *     goto again;
 	 *
-	 * After this, index contains architecture specific counter index + 1,
-	 * so that 0 means unavailable, offset contains the value to be added
-	 * to the result of the raw timer read to obtain this counter's value.
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
 	 */
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f105a6e696c..2a5d4f52556 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter)
 	 */
 	preempt_disable();
 	++userpg->lock;
-	smp_wmb();
+	barrier();
 	userpg->index = counter->hw.idx;
 	userpg->offset = atomic64_read(&counter->count);
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
-	smp_wmb();
+	barrier();
 	++userpg->lock;
 	preempt_enable();
 unlock:
-- 
cgit v1.2.3-70-g09d2


From 52400ba946759af28442dee6265c5c0180ac7122 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Fri, 3 Apr 2009 13:40:49 -0700
Subject: futex: add requeue_pi functionality

PI Futexes and their underlying rt_mutex cannot be left ownerless if
there are pending waiters as this will break the PI boosting logic, so
the standard requeue commands aren't sufficient.  The new commands
properly manage pi futex ownership by ensuring a futex with waiters
has an owner at all times.  This will allow glibc to properly handle
pi mutexes with pthread_condvars.

The approach taken here is to create two new futex op codes:

FUTEX_WAIT_REQUEUE_PI:
Tasks will use this op code to wait on a futex (such as a non-pi waitqueue)
and wake after they have been requeued to a pi futex.  Prior to returning to
userspace, they will acquire this pi futex (and the underlying rt_mutex).

futex_wait_requeue_pi() is the result of a high speed collision between
futex_wait() and futex_lock_pi() (with the first part of futex_lock_pi() being
done by futex_proxy_trylock_atomic() on behalf of the top_waiter).

FUTEX_REQUEUE_PI (and FUTEX_CMP_REQUEUE_PI):
This call must be used to wake tasks waiting with FUTEX_WAIT_REQUEUE_PI,
regardless of how many tasks the caller intends to wake or requeue.
pthread_cond_broadcast() should call this with nr_wake=1 and
nr_requeue=INT_MAX.  pthread_cond_signal() should call this with nr_wake=1 and
nr_requeue=0.  The reason being we need both callers to get the benefit of the
futex_proxy_trylock_atomic() routine.  futex_requeue() also enqueues the
top_waiter on the rt_mutex via rt_mutex_start_proxy_lock().

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/futex.h       |   8 +
 include/linux/thread_info.h |   3 +-
 kernel/futex.c              | 519 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 510 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 3bf5bb5a34f..b05519ca9e5 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -23,6 +23,9 @@ union ktime;
 #define FUTEX_TRYLOCK_PI	8
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
+#define FUTEX_WAIT_REQUEUE_PI	11
+#define FUTEX_REQUEUE_PI	12
+#define FUTEX_CMP_REQUEUE_PI	13
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -38,6 +41,11 @@ union ktime;
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#define FUTEX_REQUEUE_PI_PRIVATE	(FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e6b820f8b56..a8cc4e13434 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -21,13 +21,14 @@ struct restart_block {
 		struct {
 			unsigned long arg0, arg1, arg2, arg3;
 		};
-		/* For futex_wait */
+		/* For futex_wait and futex_wait_requeue_pi */
 		struct {
 			u32 *uaddr;
 			u32 val;
 			u32 flags;
 			u32 bitset;
 			u64 time;
+			u32 *uaddr2;
 		} futex;
 		/* For nanosleep */
 		struct {
diff --git a/kernel/futex.c b/kernel/futex.c
index dbe857aa438..185c981d89e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -19,6 +19,10 @@
  *  PRIVATE futexes by Eric Dumazet
  *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
  *
+ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
+ *  Copyright (C) IBM Corporation, 2009
+ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -109,6 +113,9 @@ struct futex_q {
 	struct futex_pi_state *pi_state;
 	struct task_struct *task;
 
+	/* rt_waiter storage for requeue_pi: */
+	struct rt_mutex_waiter *rt_waiter;
+
 	/* Bitset for the optional bitmasked wakeup */
 	u32 bitset;
 };
@@ -827,7 +834,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 
 	plist_for_each_entry_safe(this, next, head, list) {
 		if (match_futex (&this->key, &key)) {
-			if (this->pi_state) {
+			if (this->pi_state || this->rt_waiter) {
 				ret = -EINVAL;
 				break;
 			}
@@ -968,20 +975,138 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 	q->key = *key2;
 }
 
-/*
- * Requeue all waiters hashed on one physical page to another
- * physical page.
+/**
+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
+ * q:	the futex_q
+ * key:	the key of the requeue target futex
+ *
+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
+ * target futex if it is uncontended or via a lock steal.  Set the futex_q key
+ * to the requeue target futex so the waiter can detect the wakeup on the right
+ * futex, but remove it from the hb and NULL the rt_waiter so it can detect
+ * atomic lock acquisition.  Must be called with the q->lock_ptr held.
+ */
+static inline
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+{
+	drop_futex_key_refs(&q->key);
+	get_futex_key_refs(key);
+	q->key = *key;
+
+	WARN_ON(plist_node_empty(&q->list));
+	plist_del(&q->list, &q->list.plist);
+
+	WARN_ON(!q->rt_waiter);
+	q->rt_waiter = NULL;
+
+	wake_up(&q->waiter);
+}
+
+/**
+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
+ * @pifutex:	the user address of the to futex
+ * @hb1:	the from futex hash bucket, must be locked by the caller
+ * @hb2:	the to futex hash bucket, must be locked by the caller
+ * @key1:	the from futex key
+ * @key2:	the to futex key
+ *
+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
+ * Wake the top waiter if we succeed.  hb1 and hb2 must be held by the caller.
+ *
+ * Returns:
+ *  0 - failed to acquire the lock atomicly
+ *  1 - acquired the lock
+ * <0 - error
+ */
+static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+				 struct futex_hash_bucket *hb1,
+				 struct futex_hash_bucket *hb2,
+				 union futex_key *key1, union futex_key *key2,
+				 struct futex_pi_state **ps)
+{
+	struct futex_q *top_waiter;
+	u32 curval;
+	int ret;
+
+	if (get_futex_value_locked(&curval, pifutex))
+		return -EFAULT;
+
+	top_waiter = futex_top_waiter(hb1, key1);
+
+	/* There are no waiters, nothing for us to do. */
+	if (!top_waiter)
+		return 0;
+
+	/*
+	 * Either take the lock for top_waiter or set the FUTEX_WAITERS bit.
+	 * The pi_state is returned in ps in contended cases.
+	 */
+	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task);
+	if (ret == 1)
+		requeue_pi_wake_futex(top_waiter, key2);
+
+	return ret;
+}
+
+/**
+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
+ * uaddr1:	source futex user address
+ * uaddr2:	target futex user address
+ * nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
+ * nr_requeue:	number of waiters to requeue (0-INT_MAX)
+ * requeue_pi:	if we are attempting to requeue from a non-pi futex to a
+ * 		pi futex (pi to pi requeue is not supported)
+ *
+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
+ * uaddr2 atomically on behalf of the top waiter.
+ *
+ * Returns:
+ * >=0 - on success, the number of tasks requeued or woken
+ *  <0 - on error
  */
 static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-			 int nr_wake, int nr_requeue, u32 *cmpval)
+			 int nr_wake, int nr_requeue, u32 *cmpval,
+			 int requeue_pi)
 {
 	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+	int drop_count = 0, task_count = 0, ret;
+	struct futex_pi_state *pi_state = NULL;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head1;
 	struct futex_q *this, *next;
-	int ret, drop_count = 0;
+	u32 curval2;
+
+	if (requeue_pi) {
+		/*
+		 * requeue_pi requires a pi_state, try to allocate it now
+		 * without any locks in case it fails.
+		 */
+		if (refill_pi_state_cache())
+			return -ENOMEM;
+		/*
+		 * requeue_pi must wake as many tasks as it can, up to nr_wake
+		 * + nr_requeue, since it acquires the rt_mutex prior to
+		 * returning to userspace, so as to not leave the rt_mutex with
+		 * waiters and no owner.  However, second and third wake-ups
+		 * cannot be predicted as they involve race conditions with the
+		 * first wake and a fault while looking up the pi_state.  Both
+		 * pthread_cond_signal() and pthread_cond_broadcast() should
+		 * use nr_wake=1.
+		 */
+		if (nr_wake != 1)
+			return -EINVAL;
+	}
 
 retry:
+	if (pi_state != NULL) {
+		/*
+		 * We will have to lookup the pi_state again, so free this one
+		 * to keep the accounting correct.
+		 */
+		free_pi_state(pi_state);
+		pi_state = NULL;
+	}
+
 	ret = get_futex_key(uaddr1, fshared, &key1);
 	if (unlikely(ret != 0))
 		goto out;
@@ -1020,19 +1145,94 @@ retry_private:
 		}
 	}
 
+	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+		/* Attempt to acquire uaddr2 and wake the top_waiter. */
+		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+						 &key2, &pi_state);
+
+		/*
+		 * At this point the top_waiter has either taken uaddr2 or is
+		 * waiting on it.  If the former, then the pi_state will not
+		 * exist yet, look it up one more time to ensure we have a
+		 * reference to it.
+		 */
+		if (ret == 1) {
+			WARN_ON(pi_state);
+			task_count++;
+			ret = get_futex_value_locked(&curval2, uaddr2);
+			if (!ret)
+				ret = lookup_pi_state(curval2, hb2, &key2,
+						      &pi_state);
+		}
+
+		switch (ret) {
+		case 0:
+			break;
+		case -EFAULT:
+			double_unlock_hb(hb1, hb2);
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+			ret = get_user(curval2, uaddr2);
+			if (!ret)
+				goto retry;
+			goto out;
+		case -EAGAIN:
+			/* The owner was exiting, try again. */
+			double_unlock_hb(hb1, hb2);
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+			cond_resched();
+			goto retry;
+		default:
+			goto out_unlock;
+		}
+	}
+
 	head1 = &hb1->chain;
 	plist_for_each_entry_safe(this, next, head1, list) {
-		if (!match_futex (&this->key, &key1))
+		if (task_count - nr_wake >= nr_requeue)
+			break;
+
+		if (!match_futex(&this->key, &key1))
 			continue;
-		if (++ret <= nr_wake) {
+
+		WARN_ON(!requeue_pi && this->rt_waiter);
+		WARN_ON(requeue_pi && !this->rt_waiter);
+
+		/*
+		 * Wake nr_wake waiters.  For requeue_pi, if we acquired the
+		 * lock, we already woke the top_waiter.  If not, it will be
+		 * woken by futex_unlock_pi().
+		 */
+		if (++task_count <= nr_wake && !requeue_pi) {
 			wake_futex(this);
-		} else {
-			requeue_futex(this, hb1, hb2, &key2);
-			drop_count++;
+			continue;
+		}
 
-			if (ret - nr_wake >= nr_requeue)
-				break;
+		/*
+		 * Requeue nr_requeue waiters and possibly one more in the case
+		 * of requeue_pi if we couldn't acquire the lock atomically.
+		 */
+		if (requeue_pi) {
+			/* Prepare the waiter to take the rt_mutex. */
+			atomic_inc(&pi_state->refcount);
+			this->pi_state = pi_state;
+			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+							this->rt_waiter,
+							this->task, 1);
+			if (ret == 1) {
+				/* We got the lock. */
+				requeue_pi_wake_futex(this, &key2);
+				continue;
+			} else if (ret) {
+				/* -EDEADLK */
+				this->pi_state = NULL;
+				free_pi_state(pi_state);
+				goto out_unlock;
+			}
 		}
+		requeue_futex(this, hb1, hb2, &key2);
+		drop_count++;
 	}
 
 out_unlock:
@@ -1047,7 +1247,9 @@ out_put_keys:
 out_put_key1:
 	put_futex_key(fshared, &key1);
 out:
-	return ret;
+	if (pi_state != NULL)
+		free_pi_state(pi_state);
+	return ret ? ret : task_count;
 }
 
 /* The key must be already stored in q->key. */
@@ -1270,6 +1472,7 @@ handle_fault:
 #define FLAGS_HAS_TIMEOUT	0x04
 
 static long futex_wait_restart(struct restart_block *restart);
+static long futex_lock_pi_restart(struct restart_block *restart);
 
 /**
  * fixup_owner() - Post lock pi_state and corner case management
@@ -1489,6 +1692,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 
 	q.pi_state = NULL;
 	q.bitset = bitset;
+	q.rt_waiter = NULL;
 
 	if (abs_time) {
 		to = &timeout;
@@ -1596,6 +1800,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 	}
 
 	q.pi_state = NULL;
+	q.rt_waiter = NULL;
 retry:
 	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
@@ -1701,6 +1906,20 @@ uaddr_faulted:
 	goto retry;
 }
 
+static long futex_lock_pi_restart(struct restart_block *restart)
+{
+	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
+	ktime_t t, *tp = NULL;
+	int fshared = restart->futex.flags & FLAGS_SHARED;
+
+	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
+		t.tv64 = restart->futex.time;
+		tp = &t;
+	}
+	restart->fn = do_no_restart_syscall;
+
+	return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0);
+}
 
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
@@ -1803,6 +2022,253 @@ pi_faulted:
 	return ret;
 }
 
+/**
+ * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
+ * @hb:		the hash_bucket futex_q was original enqueued on
+ * @q:		the futex_q woken while waiting to be requeued
+ * @key2:	the futex_key of the requeue target futex
+ * @timeout:	the timeout associated with the wait (NULL if none)
+ *
+ * Detect if the task was woken on the initial futex as opposed to the requeue
+ * target futex.  If so, determine if it was a timeout or a signal that caused
+ * the wakeup and return the appropriate error code to the caller.  Must be
+ * called with the hb lock held.
+ *
+ * Returns
+ *  0 - no early wakeup detected
+ * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?)
+ */
+static inline
+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
+				   struct futex_q *q, union futex_key *key2,
+				   struct hrtimer_sleeper *timeout)
+{
+	int ret = 0;
+
+	/*
+	 * With the hb lock held, we avoid races while we process the wakeup.
+	 * We only need to hold hb (and not hb2) to ensure atomicity as the
+	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
+	 * It can't be requeued from uaddr2 to something else since we don't
+	 * support a PI aware source futex for requeue.
+	 */
+	if (!match_futex(&q->key, key2)) {
+		WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
+		/*
+		 * We were woken prior to requeue by a timeout or a signal.
+		 * Unqueue the futex_q and determine which it was.
+		 */
+		plist_del(&q->list, &q->list.plist);
+		drop_futex_key_refs(&q->key);
+
+		if (timeout && !timeout->task)
+			ret = -ETIMEDOUT;
+		else {
+			/*
+			 * We expect signal_pending(current), but another
+			 * thread may have handled it for us already.
+			 */
+			/* FIXME: ERESTARTSYS or ERESTARTNOINTR?  Do we care if
+			 * the user specified SA_RESTART or not? */
+			ret = -ERESTARTSYS;
+		}
+	}
+	return ret;
+}
+
+/**
+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
+ * @uaddr:	the futex we initialyl wait on (non-pi)
+ * @fshared:	whether the futexes are shared (1) or not (0).  They must be
+ * 		the same type, no requeueing from private to shared, etc.
+ * @val:	the expected value of uaddr
+ * @abs_time:	absolute timeout
+ * @bitset:	32 bit wakeup bitset set by userspace, defaults to all.
+ * @clockrt:	whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
+ * @uaddr2:	the pi futex we will take prior to returning to user-space
+ *
+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
+ * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
+ * complete the acquisition of the rt_mutex prior to returning to userspace.
+ * This ensures the rt_mutex maintains an owner when it has waiters; without
+ * one, the pi logic wouldn't know which task to boost/deboost, if there was a
+ * need to.
+ *
+ * We call schedule in futex_wait_queue_me() when we enqueue and return there
+ * via the following:
+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
+ * 2) wakeup on uaddr2 after a requeue and subsequent unlock
+ * 3) signal (before or after requeue)
+ * 4) timeout (before or after requeue)
+ *
+ * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
+ *
+ * If 2, we may then block on trying to take the rt_mutex and return via:
+ * 5) successful lock
+ * 6) signal
+ * 7) timeout
+ * 8) other lock acquisition failure
+ *
+ * If 6, we setup a restart_block with futex_lock_pi() as the function.
+ *
+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
+ *
+ * Returns:
+ *  0 - On success
+ * <0 - On error
+ */
+static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
+				 u32 val, ktime_t *abs_time, u32 bitset,
+				 int clockrt, u32 __user *uaddr2)
+{
+	struct hrtimer_sleeper timeout, *to = NULL;
+	struct rt_mutex_waiter rt_waiter;
+	struct rt_mutex *pi_mutex = NULL;
+	DECLARE_WAITQUEUE(wait, current);
+	struct restart_block *restart;
+	struct futex_hash_bucket *hb;
+	union futex_key key2;
+	struct futex_q q;
+	int res, ret;
+	u32 uval;
+
+	if (!bitset)
+		return -EINVAL;
+
+	if (abs_time) {
+		to = &timeout;
+		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper(to, current);
+		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+					     current->timer_slack_ns);
+	}
+
+	/*
+	 * The waiter is allocated on our stack, manipulated by the requeue
+	 * code while we sleep on uaddr.
+	 */
+	debug_rt_mutex_init_waiter(&rt_waiter);
+	rt_waiter.task = NULL;
+
+	q.pi_state = NULL;
+	q.bitset = bitset;
+	q.rt_waiter = &rt_waiter;
+
+	key2 = FUTEX_KEY_INIT;
+	ret = get_futex_key(uaddr2, fshared, &key2);
+	if (unlikely(ret != 0))
+		goto out;
+
+	/* Prepare to wait on uaddr. */
+	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+	if (ret) {
+		put_futex_key(fshared, &key2);
+		goto out;
+	}
+
+	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
+	futex_wait_queue_me(hb, &q, to, &wait);
+
+	spin_lock(&hb->lock);
+	ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+	spin_unlock(&hb->lock);
+	if (ret)
+		goto out_put_keys;
+
+	/*
+	 * In order for us to be here, we know our q.key == key2, and since
+	 * we took the hb->lock above, we also know that futex_requeue() has
+	 * completed and we no longer have to concern ourselves with a wakeup
+	 * race with the atomic proxy lock acquition by the requeue code.
+	 */
+
+	/* Check if the requeue code acquired the second futex for us. */
+	if (!q.rt_waiter) {
+		/*
+		 * Got the lock. We might not be the anticipated owner if we
+		 * did a lock-steal - fix up the PI-state in that case.
+		 */
+		if (q.pi_state && (q.pi_state->owner != current)) {
+			spin_lock(q.lock_ptr);
+			ret = fixup_pi_state_owner(uaddr2, &q, current,
+						   fshared);
+			spin_unlock(q.lock_ptr);
+		}
+	} else {
+		/*
+		 * We have been woken up by futex_unlock_pi(), a timeout, or a
+		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
+		 * the pi_state.
+		 */
+		WARN_ON(!&q.pi_state);
+		pi_mutex = &q.pi_state->pi_mutex;
+		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+		debug_rt_mutex_free_waiter(&rt_waiter);
+
+		spin_lock(q.lock_ptr);
+		/*
+		 * Fixup the pi_state owner and possibly acquire the lock if we
+		 * haven't already.
+		 */
+		res = fixup_owner(uaddr2, fshared, &q, !ret);
+		/*
+		 * If fixup_owner() returned an error, proprogate that.  If it
+		 * acquired the lock, clear our -ETIMEDOUT or -EINTR.
+		 */
+		if (res)
+			ret = (res < 0) ? res : 0;
+
+		/* Unqueue and drop the lock. */
+		unqueue_me_pi(&q);
+	}
+
+	/*
+	 * If fixup_pi_state_owner() faulted and was unable to handle the
+	 * fault, unlock the rt_mutex and return the fault to userspace.
+	 */
+	if (ret == -EFAULT) {
+		if (rt_mutex_owner(pi_mutex) == current)
+			rt_mutex_unlock(pi_mutex);
+	} else if (ret == -EINTR) {
+		ret = -EFAULT;
+		if (get_user(uval, uaddr2))
+			goto out_put_keys;
+
+		/*
+		 * We've already been requeued, so restart by calling
+		 * futex_lock_pi() directly, rather then returning to this
+		 * function.
+		 */
+		ret = -ERESTART_RESTARTBLOCK;
+		restart = &current_thread_info()->restart_block;
+		restart->fn = futex_lock_pi_restart;
+		restart->futex.uaddr = (u32 *)uaddr2;
+		restart->futex.val = uval;
+		restart->futex.flags = 0;
+		if (abs_time) {
+			restart->futex.flags |= FLAGS_HAS_TIMEOUT;
+			restart->futex.time = abs_time->tv64;
+		}
+
+		if (fshared)
+			restart->futex.flags |= FLAGS_SHARED;
+		if (clockrt)
+			restart->futex.flags |= FLAGS_CLOCKRT;
+	}
+
+out_put_keys:
+	put_futex_key(fshared, &q.key);
+	put_futex_key(fshared, &key2);
+
+out:
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
+	return ret;
+}
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
@@ -2025,7 +2491,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		fshared = 1;
 
 	clockrt = op & FUTEX_CLOCK_REALTIME;
-	if (clockrt && cmd != FUTEX_WAIT_BITSET)
+	if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
 		return -ENOSYS;
 
 	switch (cmd) {
@@ -2040,10 +2506,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		ret = futex_wake(uaddr, fshared, val, val3);
 		break;
 	case FUTEX_REQUEUE:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
 		break;
 	case FUTEX_CMP_REQUEUE:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    0);
 		break;
 	case FUTEX_WAKE_OP:
 		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
@@ -2060,6 +2527,18 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		if (futex_cmpxchg_enabled)
 			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
 		break;
+	case FUTEX_WAIT_REQUEUE_PI:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
+					    clockrt, uaddr2);
+		break;
+	case FUTEX_REQUEUE_PI:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1);
+		break;
+	case FUTEX_CMP_REQUEUE_PI:
+		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+				    1);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
@@ -2077,7 +2556,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	int cmd = op & FUTEX_CMD_MASK;
 
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-		      cmd == FUTEX_WAIT_BITSET)) {
+		      cmd == FUTEX_WAIT_BITSET ||
+		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
@@ -2089,10 +2569,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		tp = &t;
 	}
 	/*
-	 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
+	 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
 	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
 	 */
 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+	    cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI ||
 	    cmd == FUTEX_WAKE_OP)
 		val2 = (u32) (unsigned long) utime;
 
-- 
cgit v1.2.3-70-g09d2


From a2e87d06ddbe6e6fdb8d6d2e5e985efe4efb07dd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:44:59 +0200
Subject: perf_counter: update mmap() counter read, take 2

Update the userspace read method.

Paul noted that:
 - userspace cannot observe ->lock & 1 on the same cpu.
 - we need a barrier() between reading ->lock and ->index
   to ensure we read them in that prticular order.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.368446033@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f2b914de3f0..e22ab47a2f4 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -170,22 +170,18 @@ struct perf_counter_mmap_page {
 	 *   u32 seq;
 	 *   s64 count;
 	 *
-	 * again:
-	 *   seq = pc->lock;
-	 *   if (unlikely(seq & 1)) {
-	 *     cpu_relax();
-	 *     goto again;
-	 *   }
+	 *   do {
+	 *     seq = pc->lock;
 	 *
-	 *   if (pc->index) {
-	 *     count = pmc_read(pc->index - 1);
-	 *     count += pc->offset;
-	 *   } else
-	 *     goto regular_read;
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
 	 *
-	 *   barrier();
-	 *   if (pc->lock != seq)
-	 *     goto again;
+	 *     barrier();
+	 *   } while (pc->lock != seq);
 	 *
 	 * NOTE: for obvious reason this only works on self-monitoring
 	 *       processes.
-- 
cgit v1.2.3-70-g09d2


From 9c03d88e328d5f28f13191622c2ea1349c36b799 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:00 +0200
Subject: perf_counter: add more context information

Change the callchain context entries to u16, so as to gain some space.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.457320003@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 4 ++--
 kernel/perf_counter.c        | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e22ab47a2f4..f9d5cf0bfbd 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -507,10 +507,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
-#define MAX_STACK_DEPTH		254
+#define MAX_STACK_DEPTH		255
 
 struct perf_callchain_entry {
-	u32	nr, hv, kernel, user;
+	u16	nr, hv, kernel, user;
 	u64	ip[MAX_STACK_DEPTH];
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2a5d4f52556..727624db507 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1657,9 +1657,7 @@ void perf_counter_do_pending(void)
  * Callchain support -- arch specific
  */
 
-struct perf_callchain_entry *
-__attribute__((weak))
-perf_callchain(struct pt_regs *regs)
+__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 {
 	return NULL;
 }
@@ -1819,7 +1817,7 @@ void perf_counter_output(struct perf_counter *counter,
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
-			callchain_size = (2 + callchain->nr) * sizeof(u64);
+			callchain_size = (1 + callchain->nr) * sizeof(u64);
 
 			header.type |= __PERF_EVENT_CALLCHAIN;
 			header.size += callchain_size;
-- 
cgit v1.2.3-70-g09d2


From 3c446b3d3b38f991f97e9d2df0ad26a60a94dcff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:01 +0200
Subject: perf_counter: SIGIO support

Provide support for fcntl() I/O availability signals.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.579788800@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 20 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f9d5cf0bfbd..8d5d11b8d01 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -238,6 +238,7 @@ enum perf_event_type {
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/hrtimer.h>
+#include <linux/fs.h>
 #include <asm/atomic.h>
 
 struct task_struct;
@@ -398,6 +399,7 @@ struct perf_counter {
 
 	/* poll related */
 	wait_queue_head_t		waitq;
+	struct fasync_struct		*fasync;
 	/* optional: for NMIs */
 	struct perf_wakeup_entry	wakeup;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 727624db507..c58cc64319e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1526,6 +1526,22 @@ out:
 	return ret;
 }
 
+static int perf_fasync(int fd, struct file *filp, int on)
+{
+	struct perf_counter *counter = filp->private_data;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = fasync_helper(fd, filp, on, &counter->fasync);
+	mutex_unlock(&inode->i_mutex);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
 static const struct file_operations perf_fops = {
 	.release		= perf_release,
 	.read			= perf_read,
@@ -1533,6 +1549,7 @@ static const struct file_operations perf_fops = {
 	.unlocked_ioctl		= perf_ioctl,
 	.compat_ioctl		= perf_ioctl,
 	.mmap			= perf_mmap,
+	.fasync			= perf_fasync,
 };
 
 /*
@@ -1549,7 +1566,7 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
 	if (data) {
-		(void)atomic_xchg(&data->wakeup, POLL_IN);
+		atomic_set(&data->wakeup, POLL_IN);
 		/*
 		 * Ensure all data writes are issued before updating the
 		 * user-space data head information. The matching rmb()
@@ -1561,6 +1578,7 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	rcu_read_unlock();
 
 	wake_up_all(&counter->waitq);
+	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 671dec5daf3b3c43c5777be282f00120a44cf37f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:02 +0200
Subject: perf_counter: generalize pending infrastructure

Prepare the pending infrastructure to do more than wakeups.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.634732847@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  7 +++---
 kernel/perf_counter.c        | 53 ++++++++++++++++++++++++++------------------
 2 files changed, 36 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8d5d11b8d01..977fb15a53f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -321,8 +321,9 @@ struct perf_mmap_data {
 	void 				*data_pages[0];
 };
 
-struct perf_wakeup_entry {
-	struct perf_wakeup_entry *next;
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
 };
 
 /**
@@ -401,7 +402,7 @@ struct perf_counter {
 	wait_queue_head_t		waitq;
 	struct fasync_struct		*fasync;
 	/* optional: for NMIs */
-	struct perf_wakeup_entry	wakeup;
+	struct perf_pending_entry	pending;
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c58cc64319e..0a2ade2e4f1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1581,6 +1581,14 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
 }
 
+static void perf_pending_wakeup(struct perf_pending_entry *entry)
+{
+	struct perf_counter *counter = container_of(entry,
+			struct perf_counter, pending);
+
+	perf_counter_wakeup(counter);
+}
+
 /*
  * Pending wakeups
  *
@@ -1590,45 +1598,47 @@ void perf_counter_wakeup(struct perf_counter *counter)
  * single linked list and use cmpxchg() to add entries lockless.
  */
 
-#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL)
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
 
-static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = {
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
 	PENDING_TAIL,
 };
 
-static void perf_pending_queue(struct perf_counter *counter)
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
 {
-	struct perf_wakeup_entry **head;
-	struct perf_wakeup_entry *prev, *next;
+	struct perf_pending_entry **head;
 
-	if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL)
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
 		return;
 
-	head = &get_cpu_var(perf_wakeup_head);
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
 
 	do {
-		prev = counter->wakeup.next = *head;
-		next = &counter->wakeup;
-	} while (cmpxchg(head, prev, next) != prev);
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
 
 	set_perf_counter_pending();
 
-	put_cpu_var(perf_wakeup_head);
+	put_cpu_var(perf_pending_head);
 }
 
 static int __perf_pending_run(void)
 {
-	struct perf_wakeup_entry *list;
+	struct perf_pending_entry *list;
 	int nr = 0;
 
-	list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL);
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
 	while (list != PENDING_TAIL) {
-		struct perf_counter *counter = container_of(list,
-				struct perf_counter, wakeup);
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
 
 		list = list->next;
 
-		counter->wakeup.next = NULL;
+		func = entry->func;
+		entry->next = NULL;
 		/*
 		 * Ensure we observe the unqueue before we issue the wakeup,
 		 * so that we won't be waiting forever.
@@ -1636,7 +1646,7 @@ static int __perf_pending_run(void)
 		 */
 		smp_wmb();
 
-		perf_counter_wakeup(counter);
+		func(entry);
 		nr++;
 	}
 
@@ -1658,7 +1668,7 @@ static inline int perf_not_pending(struct perf_counter *counter)
 	 * so that we do not miss the wakeup. -- see perf_pending_handle()
 	 */
 	smp_rmb();
-	return counter->wakeup.next == NULL;
+	return counter->pending.next == NULL;
 }
 
 static void perf_pending_sync(struct perf_counter *counter)
@@ -1695,9 +1705,10 @@ struct perf_output_handle {
 
 static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 {
-	if (handle->nmi)
-		perf_pending_queue(handle->counter);
-	else
+	if (handle->nmi) {
+		perf_pending_queue(&handle->counter->pending,
+				   perf_pending_wakeup);
+	} else
 		perf_counter_wakeup(handle->counter);
 }
 
-- 
cgit v1.2.3-70-g09d2


From f6c7d5fe58b4846ee0cb4b98b6042489705eced4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:04 +0200
Subject: perf_counter: theres more to overflow than writing events

Prepare for more generic overflow handling. The new perf_counter_overflow()
method will handle the generic bits of the counter overflow, and can return
a !0 return value, in which case the counter should be (soft) disabled, so
that it won't count until it's properly disabled.

XXX: do powerpc and swcounter

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094517.812109629@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  2 +-
 arch/x86/kernel/cpu/perf_counter.c |  3 ++-
 include/linux/perf_counter.h       |  4 ++--
 kernel/perf_counter.c              | 29 +++++++++++++++++++++++------
 4 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0a4d14f279a..f88c35d0710 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -732,7 +732,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record)
-		perf_counter_output(counter, 1, regs);
+		perf_counter_overflow(counter, 1, regs);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 438415866fe..1116a41bc7b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -800,7 +800,8 @@ again:
 			continue;
 
 		perf_save_and_restart(counter);
-		perf_counter_output(counter, nmi, regs);
+		if (perf_counter_overflow(counter, nmi, regs))
+			__pmc_generic_disable(counter, &counter->hw, bit);
 	}
 
 	hw_perf_ack_status(ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 977fb15a53f..ca2d4df29e0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -491,8 +491,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_counter_context *ctx, int cpu);
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
-extern void perf_counter_output(struct perf_counter *counter,
-				int nmi, struct pt_regs *regs);
+extern int perf_counter_overflow(struct perf_counter *counter,
+				 int nmi, struct pt_regs *regs);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0a2ade2e4f1..195e976eb07 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1800,8 +1800,8 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-void perf_counter_output(struct perf_counter *counter,
-			 int nmi, struct pt_regs *regs)
+static void perf_counter_output(struct perf_counter *counter,
+				int nmi, struct pt_regs *regs)
 {
 	int ret;
 	u64 record_type = counter->hw_event.record_type;
@@ -2033,6 +2033,17 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 	perf_counter_mmap_event(&mmap_event);
 }
 
+/*
+ * Generic counter overflow handling.
+ */
+
+int perf_counter_overflow(struct perf_counter *counter,
+			  int nmi, struct pt_regs *regs)
+{
+	perf_counter_output(counter, nmi, regs);
+	return 0;
+}
+
 /*
  * Generic software counter infrastructure
  */
@@ -2077,6 +2088,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 
 static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
+	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
 
@@ -2092,12 +2104,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			!counter->hw_event.exclude_user)
 		regs = task_pt_regs(current);
 
-	if (regs)
-		perf_counter_output(counter, 0, regs);
+	if (regs) {
+		if (perf_counter_overflow(counter, 0, regs))
+			ret = HRTIMER_NORESTART;
+	}
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
 
-	return HRTIMER_RESTART;
+	return ret;
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
@@ -2105,7 +2119,10 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 {
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	perf_counter_output(counter, nmi, regs);
+	if (perf_counter_overflow(counter, nmi, regs))
+		/* soft-disable the counter */
+		;
+
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-- 
cgit v1.2.3-70-g09d2


From 339f7c90b8a2f3aa2dd4267e79f797999e8a3c59 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:06 +0200
Subject: perf_counter: PERF_RECORD_TIME

By popular request, provide means to log a timestamp along with the
counter overflow event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.024173282@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index ca2d4df29e0..928a7fae096 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -102,6 +102,7 @@ enum perf_counter_record_format {
 	PERF_RECORD_TID		= 1U << 1,
 	PERF_RECORD_GROUP	= 1U << 2,
 	PERF_RECORD_CALLCHAIN	= 1U << 3,
+	PERF_RECORD_TIME	= 1U << 4,
 };
 
 /*
@@ -221,6 +222,7 @@ enum perf_event_type {
 	__PERF_EVENT_TID		= PERF_RECORD_TID,
 	__PERF_EVENT_GROUP		= PERF_RECORD_GROUP,
 	__PERF_EVENT_CALLCHAIN		= PERF_RECORD_CALLCHAIN,
+	__PERF_EVENT_TIME		= PERF_RECORD_TIME,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c841563de04..19990d1f021 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1826,6 +1826,7 @@ static void perf_counter_output(struct perf_counter *counter,
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
+	u64 time;
 
 	header.type = PERF_EVENT_COUNTER_OVERFLOW;
 	header.size = sizeof(header);
@@ -1862,6 +1863,16 @@ static void perf_counter_output(struct perf_counter *counter,
 		}
 	}
 
+	if (record_type & PERF_RECORD_TIME) {
+		/*
+		 * Maybe do better on x86 and provide cpu_clock_nmi()
+		 */
+		time = sched_clock();
+
+		header.type |= __PERF_EVENT_TIME;
+		header.size += sizeof(u64);
+	}
+
 	ret = perf_output_begin(&handle, counter, header.size, nmi);
 	if (ret)
 		return;
@@ -1895,6 +1906,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (callchain)
 		perf_output_copy(&handle, callchain, callchain_size);
 
+	if (record_type & PERF_RECORD_TIME)
+		perf_output_put(&handle, time);
+
 	perf_output_end(&handle);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 79f146415623fe74f39af67c0f6adc208939a410 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:07 +0200
Subject: perf_counter: counter overflow limit

Provide means to auto-disable the counter after 'n' overflow events.

Create the counter with hw_event.disabled = 1, and then issue an
ioctl(fd, PREF_COUNTER_IOC_REFRESH, n); to set the limit and enable
the counter.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.083139737@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 12 ++++++++---
 kernel/perf_counter.c        | 51 +++++++++++++++++++++++++++++++++++---------
 2 files changed, 50 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 928a7fae096..ef4dcbff75a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -155,8 +155,9 @@ struct perf_counter_hw_event {
 /*
  * Ioctls that can be done on a perf counter fd:
  */
-#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
 
 /*
  * Structure of the page that can be mapped via mmap
@@ -403,9 +404,14 @@ struct perf_counter {
 	/* poll related */
 	wait_queue_head_t		waitq;
 	struct fasync_struct		*fasync;
-	/* optional: for NMIs */
+
+	/* delayed work for NMIs and such */
+	int				pending_wakeup;
+	int				pending_disable;
 	struct perf_pending_entry	pending;
 
+	atomic_t			event_limit;
+
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 19990d1f021..c05e10354bc 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -744,6 +744,12 @@ static void perf_counter_enable(struct perf_counter *counter)
 	spin_unlock_irq(&ctx->lock);
 }
 
+static void perf_counter_refresh(struct perf_counter *counter, int refresh)
+{
+	atomic_add(refresh, &counter->event_limit);
+	perf_counter_enable(counter);
+}
+
 /*
  * Enable a counter and all its children.
  */
@@ -1311,6 +1317,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_COUNTER_IOC_DISABLE:
 		perf_counter_disable_family(counter);
 		break;
+	case PERF_COUNTER_IOC_REFRESH:
+		perf_counter_refresh(counter, arg);
+		break;
 	default:
 		err = -ENOTTY;
 	}
@@ -1590,14 +1599,6 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
 }
 
-static void perf_pending_wakeup(struct perf_pending_entry *entry)
-{
-	struct perf_counter *counter = container_of(entry,
-			struct perf_counter, pending);
-
-	perf_counter_wakeup(counter);
-}
-
 /*
  * Pending wakeups
  *
@@ -1607,6 +1608,22 @@ static void perf_pending_wakeup(struct perf_pending_entry *entry)
  * single linked list and use cmpxchg() to add entries lockless.
  */
 
+static void perf_pending_counter(struct perf_pending_entry *entry)
+{
+	struct perf_counter *counter = container_of(entry,
+			struct perf_counter, pending);
+
+	if (counter->pending_disable) {
+		counter->pending_disable = 0;
+		perf_counter_disable(counter);
+	}
+
+	if (counter->pending_wakeup) {
+		counter->pending_wakeup = 0;
+		perf_counter_wakeup(counter);
+	}
+}
+
 #define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
 
 static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
@@ -1715,8 +1732,9 @@ struct perf_output_handle {
 static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 {
 	if (handle->nmi) {
+		handle->counter->pending_wakeup = 1;
 		perf_pending_queue(&handle->counter->pending,
-				   perf_pending_wakeup);
+				   perf_pending_counter);
 	} else
 		perf_counter_wakeup(handle->counter);
 }
@@ -2063,8 +2081,21 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 int perf_counter_overflow(struct perf_counter *counter,
 			  int nmi, struct pt_regs *regs)
 {
+	int events = atomic_read(&counter->event_limit);
+	int ret = 0;
+
+	if (events && atomic_dec_and_test(&counter->event_limit)) {
+		ret = 1;
+		if (nmi) {
+			counter->pending_disable = 1;
+			perf_pending_queue(&counter->pending,
+					   perf_pending_counter);
+		} else
+			perf_counter_disable(counter);
+	}
+
 	perf_counter_output(counter, nmi, regs);
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 0c593b3411341e3a05a61f5527df36ab02bd11e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:08 +0200
Subject: perf_counter: comment the perf_event_type stuff

Describe the event format.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.211174347@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index ef4dcbff75a..81220188d05 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -208,6 +208,20 @@ struct perf_event_header {
 
 enum perf_event_type {
 
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u32				pid, tid;
+	 * 	u64				addr;
+	 * 	u64				len;
+	 * 	u64				pgoff;
+	 * 	char				filename[];
+	 * };
+	 */
 	PERF_EVENT_MMAP			= 1,
 	PERF_EVENT_MUNMAP		= 2,
 
@@ -217,6 +231,24 @@ enum perf_event_type {
 	 *
 	 * These events will have types of the form:
 	 *   PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } *
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	{ u64			ip;	  } && __PERF_EVENT_IP
+	 * 	{ u32			pid, tid; } && __PERF_EVENT_TID
+	 *
+	 * 	{ u64			nr;
+	 * 	  { u64 event, val; } 	cnt[nr];  } && __PERF_EVENT_GROUP
+	 *
+	 * 	{ u16			nr,
+	 * 				hv,
+	 * 				kernel,
+	 * 				user;
+	 * 	  u64			ips[nr];  } && __PERF_EVENT_CALLCHAIN
+	 *
+	 * 	{ u64			time;     } && __PERF_EVENT_TIME
+	 * };
 	 */
 	PERF_EVENT_COUNTER_OVERFLOW	= 1UL << 31,
 	__PERF_EVENT_IP			= PERF_RECORD_IP,
-- 
cgit v1.2.3-70-g09d2


From 4c9e25428ff46b968a30f1dfafdba550cb6e4141 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:09 +0200
Subject: perf_counter: change event definition

Currently the definition of an event is slightly ambiguous. We have
wakeup events, for poll() and SIGIO, which are either generated
when a record crosses a page boundary (hw_events.wakeup_events == 0),
or every wakeup_events new records.

Now a record can be either a counter overflow record, or a number of
different things, like the mmap PROT_EXEC region notifications.

Then there is the PERF_COUNTER_IOC_REFRESH event limit, which only
considers counter overflows.

This patch changes then wakeup_events and SIGIO notification to only
consider overflow events. Furthermore it changes the SIGIO notification
to report SIGHUP when the event limit is reached and the counter will
be disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.266679874@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  1 +
 kernel/perf_counter.c        | 22 +++++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 81220188d05..0f5a4005048 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -439,6 +439,7 @@ struct perf_counter {
 
 	/* delayed work for NMIs and such */
 	int				pending_wakeup;
+	int				pending_kill;
 	int				pending_disable;
 	struct perf_pending_entry	pending;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c05e10354bc..8c8eaf0625f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1596,7 +1596,11 @@ void perf_counter_wakeup(struct perf_counter *counter)
 	rcu_read_unlock();
 
 	wake_up_all(&counter->waitq);
-	kill_fasync(&counter->fasync, SIGIO, POLL_IN);
+
+	if (counter->pending_kill) {
+		kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
+		counter->pending_kill = 0;
+	}
 }
 
 /*
@@ -1727,6 +1731,7 @@ struct perf_output_handle {
 	unsigned int		head;
 	int			wakeup;
 	int			nmi;
+	int			overflow;
 };
 
 static inline void __perf_output_wakeup(struct perf_output_handle *handle)
@@ -1741,7 +1746,7 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 
 static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size,
-			     int nmi)
+			     int nmi, int overflow)
 {
 	struct perf_mmap_data *data;
 	unsigned int offset, head;
@@ -1751,8 +1756,9 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data)
 		goto out;
 
-	handle->counter	= counter;
-	handle->nmi	= nmi;
+	handle->counter	 = counter;
+	handle->nmi	 = nmi;
+	handle->overflow = overflow;
 
 	if (!data->nr_pages)
 		goto fail;
@@ -1816,7 +1822,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 {
 	int wakeup_events = handle->counter->hw_event.wakeup_events;
 
-	if (wakeup_events) {
+	if (handle->overflow && wakeup_events) {
 		int events = atomic_inc_return(&handle->data->events);
 		if (events >= wakeup_events) {
 			atomic_sub(wakeup_events, &handle->data->events);
@@ -1891,7 +1897,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
-	ret = perf_output_begin(&handle, counter, header.size, nmi);
+	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
 	if (ret)
 		return;
 
@@ -1955,7 +1961,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 {
 	struct perf_output_handle handle;
 	int size = mmap_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0);
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
 
 	if (ret)
 		return;
@@ -2084,8 +2090,10 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->pending_kill = POLL_IN;
 	if (events && atomic_dec_and_test(&counter->event_limit)) {
 		ret = 1;
+		counter->pending_kill = POLL_HUP;
 		if (nmi) {
 			counter->pending_disable = 1;
 			perf_pending_queue(&counter->pending,
-- 
cgit v1.2.3-70-g09d2


From 4af4998b8aa35600f4c4a4f3c3a23baca6081d02 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:10 +0200
Subject: perf_counter: rework context time

Since perf_counter_context is switched along with tasks, we can
maintain the context time without using the task runtime clock.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.353552838@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 10 ++----
 kernel/perf_counter.c        | 78 +++++++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0f5a4005048..7f5d353d78a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -477,14 +477,10 @@ struct perf_counter_context {
 	struct task_struct	*task;
 
 	/*
-	 * time_now is the current time in nanoseconds since an arbitrary
-	 * point in the past.  For per-task counters, this is based on the
-	 * task clock, and for per-cpu counters it is based on the cpu clock.
-	 * time_lost is an offset from the task/cpu clock, used to make it
-	 * appear that time only passes while the context is scheduled in.
+	 * Context clock, runs when context enabled.
 	 */
-	u64			time_now;
-	u64			time_lost;
+	u64			time;
+	u64			timestamp;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8c8eaf0625f..84d85ab4e16 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -117,7 +117,7 @@ counter_sched_out(struct perf_counter *counter,
 		return;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_stopped = ctx->time_now;
+	counter->tstamp_stopped = ctx->time;
 	counter->hw_ops->disable(counter);
 	counter->oncpu = -1;
 
@@ -253,27 +253,20 @@ retry:
 	spin_unlock_irq(&ctx->lock);
 }
 
-/*
- * Get the current time for this context.
- * If this is a task context, we use the task's task clock,
- * or for a per-cpu context, we use the cpu clock.
- */
-static u64 get_context_time(struct perf_counter_context *ctx, int update)
+static inline u64 perf_clock(void)
 {
-	struct task_struct *curr = ctx->task;
-
-	if (!curr)
-		return cpu_clock(smp_processor_id());
-
-	return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime;
+	return cpu_clock(smp_processor_id());
 }
 
 /*
  * Update the record of the current time in a context.
  */
-static void update_context_time(struct perf_counter_context *ctx, int update)
+static void update_context_time(struct perf_counter_context *ctx)
 {
-	ctx->time_now = get_context_time(ctx, update) - ctx->time_lost;
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
 }
 
 /*
@@ -284,15 +277,17 @@ static void update_counter_times(struct perf_counter *counter)
 	struct perf_counter_context *ctx = counter->ctx;
 	u64 run_end;
 
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		counter->total_time_enabled = ctx->time_now -
-			counter->tstamp_enabled;
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
-			run_end = counter->tstamp_stopped;
-		else
-			run_end = ctx->time_now;
-		counter->total_time_running = run_end - counter->tstamp_running;
-	}
+	if (counter->state < PERF_COUNTER_STATE_INACTIVE)
+		return;
+
+	counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
+
+	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+		run_end = counter->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	counter->total_time_running = run_end - counter->tstamp_running;
 }
 
 /*
@@ -332,7 +327,7 @@ static void __perf_counter_disable(void *info)
 	 * If it is in error state, leave it in error state.
 	 */
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		update_context_time(ctx, 1);
+		update_context_time(ctx);
 		update_counter_times(counter);
 		if (counter == counter->group_leader)
 			group_sched_out(counter, cpuctx, ctx);
@@ -426,7 +421,7 @@ counter_sched_in(struct perf_counter *counter,
 		return -EAGAIN;
 	}
 
-	counter->tstamp_running += ctx->time_now - counter->tstamp_stopped;
+	counter->tstamp_running += ctx->time - counter->tstamp_stopped;
 
 	if (!is_software_counter(counter))
 		cpuctx->active_oncpu++;
@@ -493,9 +488,9 @@ static void add_counter_to_ctx(struct perf_counter *counter,
 	list_add_counter(counter, ctx);
 	ctx->nr_counters++;
 	counter->prev_state = PERF_COUNTER_STATE_OFF;
-	counter->tstamp_enabled = ctx->time_now;
-	counter->tstamp_running = ctx->time_now;
-	counter->tstamp_stopped = ctx->time_now;
+	counter->tstamp_enabled = ctx->time;
+	counter->tstamp_running = ctx->time;
+	counter->tstamp_stopped = ctx->time;
 }
 
 /*
@@ -522,7 +517,7 @@ static void __perf_install_in_context(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
-	update_context_time(ctx, 1);
+	update_context_time(ctx);
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -648,13 +643,13 @@ static void __perf_counter_enable(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	spin_lock(&ctx->lock);
-	update_context_time(ctx, 1);
+	update_context_time(ctx);
 
 	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled;
+	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -737,8 +732,8 @@ static void perf_counter_enable(struct perf_counter *counter)
 	 */
 	if (counter->state == PERF_COUNTER_STATE_OFF) {
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->tstamp_enabled = ctx->time_now -
-			counter->total_time_enabled;
+		counter->tstamp_enabled =
+			ctx->time - counter->total_time_enabled;
 	}
  out:
 	spin_unlock_irq(&ctx->lock);
@@ -778,7 +773,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	ctx->is_active = 0;
 	if (likely(!ctx->nr_counters))
 		goto out;
-	update_context_time(ctx, 0);
+	update_context_time(ctx);
 
 	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
@@ -883,12 +878,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	if (likely(!ctx->nr_counters))
 		goto out;
 
-	/*
-	 * Add any time since the last sched_out to the lost time
-	 * so it doesn't get included in the total_time_enabled and
-	 * total_time_running measures for counters in the context.
-	 */
-	ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now;
+	ctx->timestamp = perf_clock();
 
 	flags = hw_perf_save_disable();
 
@@ -1043,8 +1033,8 @@ int perf_counter_task_enable(void)
 		if (counter->state > PERF_COUNTER_STATE_OFF)
 			continue;
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->tstamp_enabled = ctx->time_now -
-			counter->total_time_enabled;
+		counter->tstamp_enabled =
+			ctx->time - counter->total_time_enabled;
 		counter->hw_event.disabled = 0;
 	}
 	hw_perf_restore(perf_flags);
@@ -1113,7 +1103,7 @@ static void __read(void *info)
 
 	curr_rq_lock_irq_save(&flags);
 	if (ctx->is_active)
-		update_context_time(ctx, 1);
+		update_context_time(ctx);
 	counter->hw_ops->read(counter);
 	update_counter_times(counter);
 	curr_rq_unlock_irq_restore(&flags);
-- 
cgit v1.2.3-70-g09d2


From 849691a6cd40270ff5f4a8846d5f6bf8df663ffc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 6 Apr 2009 11:45:12 +0200
Subject: perf_counter: remove rq->lock usage

Now that all the task runtime clock users are gone, remove the ugly
rq->lock usage from perf counters, which solves the nasty deadlock
seen when a software task clock counter was read from an NMI overflow
context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090406094518.531137582@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h |  2 --
 kernel/perf_counter.c       | 42 ++++++++++++++++--------------------------
 kernel/sched.c              | 20 --------------------
 3 files changed, 16 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b6d2887a5d8..080d1fd461d 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -85,8 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 /*
  * Lock/unlock the current runqueue - to extract task statistics:
  */
-extern void curr_rq_lock_irq_save(unsigned long *flags);
-extern void curr_rq_unlock_irq_restore(unsigned long *flags);
 extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
 extern unsigned long long task_delta_exec(struct task_struct *);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 56b7eb53d67..f4f7596f784 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -172,8 +172,7 @@ static void __perf_counter_remove_from_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	counter_sched_out(counter, cpuctx, ctx);
 
@@ -198,8 +197,7 @@ static void __perf_counter_remove_from_context(void *info)
 			    perf_max_counters - perf_reserved_percpu);
 	}
 
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 
@@ -319,8 +317,7 @@ static void __perf_counter_disable(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 
 	/*
 	 * If the counter is on, turn it off.
@@ -336,8 +333,7 @@ static void __perf_counter_disable(void *info)
 		counter->state = PERF_COUNTER_STATE_OFF;
 	}
 
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -515,8 +511,7 @@ static void __perf_install_in_context(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 	update_context_time(ctx);
 
 	/*
@@ -565,8 +560,7 @@ static void __perf_install_in_context(void *info)
  unlock:
 	hw_perf_restore(perf_flags);
 
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -641,8 +635,7 @@ static void __perf_counter_enable(void *info)
 	if (ctx->task && cpuctx->task_ctx != ctx)
 		return;
 
-	curr_rq_lock_irq_save(&flags);
-	spin_lock(&ctx->lock);
+	spin_lock_irqsave(&ctx->lock, flags);
 	update_context_time(ctx);
 
 	counter->prev_state = counter->state;
@@ -678,8 +671,7 @@ static void __perf_counter_enable(void *info)
 	}
 
  unlock:
-	spin_unlock(&ctx->lock);
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
 /*
@@ -971,7 +963,7 @@ int perf_counter_task_disable(void)
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	curr_rq_lock_irq_save(&flags);
+	local_irq_save(flags);
 	cpu = smp_processor_id();
 
 	perf_counter_task_sched_out(curr, cpu);
@@ -992,9 +984,7 @@ int perf_counter_task_disable(void)
 
 	hw_perf_restore(perf_flags);
 
-	spin_unlock(&ctx->lock);
-
-	curr_rq_unlock_irq_restore(&flags);
+	spin_unlock_irqrestore(&ctx->lock, flags);
 
 	return 0;
 }
@@ -1011,7 +1001,7 @@ int perf_counter_task_enable(void)
 	if (likely(!ctx->nr_counters))
 		return 0;
 
-	curr_rq_lock_irq_save(&flags);
+	local_irq_save(flags);
 	cpu = smp_processor_id();
 
 	perf_counter_task_sched_out(curr, cpu);
@@ -1037,7 +1027,7 @@ int perf_counter_task_enable(void)
 
 	perf_counter_task_sched_in(curr, cpu);
 
-	curr_rq_unlock_irq_restore(&flags);
+	local_irq_restore(flags);
 
 	return 0;
 }
@@ -1095,12 +1085,12 @@ static void __read(void *info)
 	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
 
-	curr_rq_lock_irq_save(&flags);
+	local_irq_save(flags);
 	if (ctx->is_active)
 		update_context_time(ctx);
 	counter->hw_ops->read(counter);
 	update_counter_times(counter);
-	curr_rq_unlock_irq_restore(&flags);
+	local_irq_restore(flags);
 }
 
 static u64 perf_counter_read(struct perf_counter *counter)
@@ -2890,7 +2880,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		 * Be careful about zapping the list - IRQ/NMI context
 		 * could still be processing it:
 		 */
-		curr_rq_lock_irq_save(&flags);
+		local_irq_save(flags);
 		perf_flags = hw_perf_save_disable();
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
@@ -2903,7 +2893,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		child_ctx->nr_counters--;
 
 		hw_perf_restore(perf_flags);
-		curr_rq_unlock_irq_restore(&flags);
+		local_irq_restore(flags);
 	}
 
 	parent_counter = child_counter->parent;
diff --git a/kernel/sched.c b/kernel/sched.c
index f76e3c0188a..0de2f814fb1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -997,26 +997,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 	}
 }
 
-void curr_rq_lock_irq_save(unsigned long *flags)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
-
-	local_irq_save(*flags);
-	rq = cpu_rq(smp_processor_id());
-	spin_lock(&rq->lock);
-}
-
-void curr_rq_unlock_irq_restore(unsigned long *flags)
-	__releases(rq->lock)
-{
-	struct rq *rq;
-
-	rq = cpu_rq(smp_processor_id());
-	spin_unlock(&rq->lock);
-	local_irq_restore(*flags);
-}
-
 void task_rq_unlock_wait(struct task_struct *p)
 {
 	struct rq *rq = task_rq(p);
-- 
cgit v1.2.3-70-g09d2


From a26b89f05d194413c7238e0bea071054f6b5d3c8 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:34 +0200
Subject: sched, hw-branch-tracer: add wait_task_context_switch() function to
 sched.h

Add a function to wait until some other task has been
switched out at least once.

This differs from wait_task_inactive() subtly, in that the
latter will wait until the task has left the CPU.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: markus.t.metzger@gmail.com
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144549.794157000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 ++
 kernel/sched.c        | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b94f3541f67..a5b9a83065f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1993,8 +1993,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cc1fd5d507..f91bc8141dc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2002,6 +2002,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 	return 1;
 }
 
+/*
+ * wait_task_context_switch -	wait for a thread to complete at least one
+ *				context switch.
+ *
+ * @p must not be current.
+ */
+void wait_task_context_switch(struct task_struct *p)
+{
+	unsigned long nvcsw, nivcsw, flags;
+	int running;
+	struct rq *rq;
+
+	nvcsw	= p->nvcsw;
+	nivcsw	= p->nivcsw;
+	for (;;) {
+		/*
+		 * The runqueue is assigned before the actual context
+		 * switch. We need to take the runqueue lock.
+		 *
+		 * We could check initially without the lock but it is
+		 * very likely that we need to take the lock in every
+		 * iteration.
+		 */
+		rq = task_rq_lock(p, &flags);
+		running = task_running(rq, p);
+		task_rq_unlock(rq, &flags);
+
+		if (likely(!running))
+			break;
+		/*
+		 * The switch count is incremented before the actual
+		 * context switch. We thus wait for two switches to be
+		 * sure at least one completed.
+		 */
+		if ((p->nvcsw - nvcsw) > 1)
+			break;
+		if ((p->nivcsw - nivcsw) > 1)
+			break;
+
+		cpu_relax();
+	}
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
-- 
cgit v1.2.3-70-g09d2


From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:35 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping

When a ptraced task is unlinked, we need to stop branch tracing for
that task.

Since the unlink is called with interrupts disabled, and we need
interrupts enabled to stop branch tracing, we defer the work.

Collect all branch tracing related stuff in a branch tracing context.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144550.712401000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/processor.h |   4 -
 arch/x86/kernel/ptrace.c         | 254 ++++++++++++++++++++++++++-------------
 include/linux/mm.h               |   3 +-
 include/linux/sched.h            |   9 +-
 mm/mlock.c                       |  13 +-
 5 files changed, 179 insertions(+), 104 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 34c52370f2f..2483807e06e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -458,10 +458,6 @@ struct thread_struct {
 /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
 	struct ds_context	*ds_ctx;
 #endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-	unsigned int	bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fe9345c967d..7c21d1e8cae 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/seccomp.h>
 #include <linux/signal.h>
 #include <linux/ftrace.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -577,17 +578,119 @@ static int ioperm_get(struct task_struct *target,
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+	/* The branch trace handle. */
+	struct bts_tracer	*tracer;
+
+	/* The buffer used to store the branch trace and its size. */
+	void			*buffer;
+	unsigned int		size;
+
+	/* The mm that paid for the above buffer. */
+	struct mm_struct	*mm;
+
+	/* The task this context belongs to. */
+	struct task_struct	*task;
+
+	/* The signal to send on a bts buffer overflow. */
+	unsigned int		bts_ovfl_signal;
+
+	/* The work struct to destroy a context. */
+	struct work_struct	work;
+};
+
+static inline void alloc_bts_buffer(struct bts_context *context,
+				    unsigned int size)
+{
+	void *buffer;
+
+	buffer = alloc_locked_buffer(size);
+	if (buffer) {
+		context->buffer = buffer;
+		context->size = size;
+		context->mm = get_task_mm(current);
+	}
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+	if (!context->buffer)
+		return;
+
+	kfree(context->buffer);
+	context->buffer = NULL;
+
+	refund_locked_buffer_memory(context->mm, context->size);
+	context->size = 0;
+
+	mmput(context->mm);
+	context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+	struct bts_context *context;
+
+	context = container_of(w, struct bts_context, work);
+
+	ds_release_bts(context->tracer);
+	put_task_struct(context->task);
+	free_bts_buffer(context);
+	kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+	INIT_WORK(&context->work, free_bts_context_work);
+	schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+	struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (context) {
+		context->task = task;
+		task->bts = context;
+
+		get_task_struct(task);
+	}
+
+	return context;
+}
+
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct bts_struct bts;
 	const unsigned char *at;
 	int error;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	at = trace->ds.top - ((index + 1) * trace->ds.size);
 	if ((void *)at < trace->ds.begin)
@@ -596,7 +699,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 	if (!trace->read)
 		return -EOPNOTSUPP;
 
-	error = trace->read(child->bts, at, &bts);
+	error = trace->read(context->tracer, at, &bts);
 	if (error < 0)
 		return error;
 
@@ -610,13 +713,18 @@ static int ptrace_bts_drain(struct task_struct *child,
 			    long size,
 			    struct bts_struct __user *out)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	const unsigned char *at;
 	int error, drained = 0;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	if (!trace->read)
 		return -EOPNOTSUPP;
@@ -627,9 +735,8 @@ static int ptrace_bts_drain(struct task_struct *child,
 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
 	     out++, drained++, at += trace->ds.size) {
 		struct bts_struct bts;
-		int error;
 
-		error = trace->read(child->bts, at, &bts);
+		error = trace->read(context->tracer, at, &bts);
 		if (error < 0)
 			return error;
 
@@ -639,35 +746,18 @@ static int ptrace_bts_drain(struct task_struct *child,
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	error = ds_reset_bts(child->bts);
+	error = ds_reset_bts(context->tracer);
 	if (error < 0)
 		return error;
 
 	return drained;
 }
 
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
-	child->bts_buffer = alloc_locked_buffer(size);
-	if (!child->bts_buffer)
-		return -ENOMEM;
-
-	child->bts_size = size;
-
-	return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
-	free_locked_buffer(child->bts_buffer, child->bts_size);
-	child->bts_buffer = NULL;
-	child->bts_size = 0;
-}
-
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	struct ptrace_bts_config cfg;
 	unsigned int flags = 0;
 
@@ -677,28 +767,31 @@ static int ptrace_bts_config(struct task_struct *child,
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
 		return -EFAULT;
 
-	if (child->bts) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-	}
+	context = child->bts;
+	if (!context)
+		context = alloc_bts_context(child);
+	if (!context)
+		return -ENOMEM;
 
 	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
 		if (!cfg.signal)
 			return -EINVAL;
 
-		child->thread.bts_ovfl_signal = cfg.signal;
 		return -EOPNOTSUPP;
+		context->bts_ovfl_signal = cfg.signal;
 	}
 
-	if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
-	    (cfg.size != child->bts_size)) {
-		int error;
+	ds_release_bts(context->tracer);
+	context->tracer = NULL;
 
-		ptrace_bts_free_buffer(child);
+	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+		free_bts_buffer(context);
+		if (!cfg.size)
+			return 0;
 
-		error = ptrace_bts_allocate_buffer(child, cfg.size);
-		if (error < 0)
-			return error;
+		alloc_bts_buffer(context, cfg.size);
+		if (!context->buffer)
+			return -ENOMEM;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -707,15 +800,13 @@ static int ptrace_bts_config(struct task_struct *child,
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		flags |= BTS_TIMESTAMPS;
 
-	child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
-				    /* ovfl = */ NULL, /* th = */ (size_t)-1,
-				    flags);
-	if (IS_ERR(child->bts)) {
-		int error = PTR_ERR(child->bts);
-
-		ptrace_bts_free_buffer(child);
-		child->bts = NULL;
+	context->tracer = ds_request_bts(child, context->buffer, context->size,
+					 NULL, (size_t)-1, flags);
+	if (unlikely(IS_ERR(context->tracer))) {
+		int error = PTR_ERR(context->tracer);
 
+		free_bts_buffer(context);
+		context->tracer = NULL;
 		return error;
 	}
 
@@ -726,20 +817,25 @@ static int ptrace_bts_status(struct task_struct *child,
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 	struct ptrace_bts_config cfg;
 
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
-	trace = ds_read_bts(child->bts);
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(&cfg, 0, sizeof(cfg));
-	cfg.size = trace->ds.end - trace->ds.begin;
-	cfg.signal = child->thread.bts_ovfl_signal;
-	cfg.bts_size = sizeof(struct bts_struct);
+	cfg.size	= trace->ds.end - trace->ds.begin;
+	cfg.signal	= context->bts_ovfl_signal;
+	cfg.bts_size	= sizeof(struct bts_struct);
 
 	if (cfg.signal)
 		cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -758,67 +854,56 @@ static int ptrace_bts_status(struct task_struct *child,
 
 static int ptrace_bts_clear(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-	return ds_reset_bts(child->bts);
+	return ds_reset_bts(context->tracer);
 }
 
 static int ptrace_bts_size(struct task_struct *child)
 {
+	struct bts_context *context;
 	const struct bts_trace *trace;
 
-	trace = ds_read_bts(child->bts);
+	context = child->bts;
+	if (!context)
+		return -ESRCH;
+
+	trace = ds_read_bts(context->tracer);
 	if (!trace)
-		return -EPERM;
+		return -ESRCH;
 
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static void ptrace_bts_fork(struct task_struct *tsk)
+static inline void ptrace_bts_fork(struct task_struct *tsk)
 {
 	tsk->bts = NULL;
-	tsk->bts_buffer = NULL;
-	tsk->bts_size = 0;
-	tsk->thread.bts_ovfl_signal = 0;
 }
 
-static void ptrace_bts_untrace(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+static inline void ptrace_bts_untrace(struct task_struct *child)
 {
 	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
+		free_bts_context(child->bts);
 		child->bts = NULL;
-
-		/* We cannot update total_vm and locked_vm since
-		   child's mm is already gone. But we can reclaim the
-		   memory. */
-		kfree(child->bts_buffer);
-		child->bts_buffer = NULL;
-		child->bts_size = 0;
 	}
 }
-
-static void ptrace_bts_detach(struct task_struct *child)
-{
-	/*
-	 * Ptrace_detach() races with ptrace_untrace() in case
-	 * the child dies and is reaped by another thread.
-	 *
-	 * We only do the memory accounting at this point and
-	 * leave the buffer deallocation and the bts tracer
-	 * release to ptrace_bts_untrace() which will be called
-	 * later on with tasklist_lock held.
-	 */
-	release_locked_buffer(child->bts_buffer, child->bts_size);
-}
 #else
 static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
 static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
@@ -843,7 +928,6 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d475c..64d8ed2538a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,6 +13,7 @@
 #include <linux/prio_tree.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
+#include <linux/sched.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1321,6 +1322,6 @@ void vmemmap_populate_print_last(void);
 
 extern void *alloc_locked_buffer(size_t size);
 extern void free_locked_buffer(void *buffer, size_t size);
-extern void release_locked_buffer(void *buffer, size_t size);
+extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5b9a83065f..52b8cd049c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,8 +96,8 @@ struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
-struct bts_tracer;
 struct fs_struct;
+struct bts_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1210,12 +1210,7 @@ struct task_struct {
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
-	struct bts_tracer *bts;
-	/*
-	 * The buffer to hold the BTS data.
-	 */
-	void *bts_buffer;
-	size_t bts_size;
+	struct bts_context *bts;
 #endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
diff --git a/mm/mlock.c b/mm/mlock.c
index cbe9e0581b7..749383b442c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -660,21 +660,20 @@ void *alloc_locked_buffer(size_t size)
 	return buffer;
 }
 
-void release_locked_buffer(void *buffer, size_t size)
+void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
 {
 	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&mm->mmap_sem);
 
-	current->mm->total_vm  -= pgsz;
-	current->mm->locked_vm -= pgsz;
+	mm->total_vm  -= pgsz;
+	mm->locked_vm -= pgsz;
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&mm->mmap_sem);
 }
 
 void free_locked_buffer(void *buffer, size_t size)
 {
-	release_locked_buffer(buffer, size);
-
+	refund_locked_buffer_memory(current->mm, size);
 	kfree(buffer);
 }
-- 
cgit v1.2.3-70-g09d2


From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 3 Apr 2009 16:43:48 +0200
Subject: x86, ptrace: add bts context unconditionally

Add the ptrace bts context field to task_struct unconditionally.

Initialize the field directly in copy_process().
Remove all the unneeded functionality used to initialize that field.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Cc: roland@redhat.com
Cc: eranian@googlemail.com
Cc: oleg@redhat.com
Cc: juan.villacis@intel.com
Cc: ak@linux.jf.intel.com
LKML-Reference: <20090403144603.292754000@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ptrace.h |  9 ++++-----
 arch/x86/kernel/ptrace.c      | 20 +-------------------
 include/linux/ptrace.h        | 10 ----------
 include/linux/sched.h         |  2 --
 kernel/fork.c                 |  4 ++--
 kernel/ptrace.c               | 10 ----------
 6 files changed, 7 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index e304b66abee..5cdd19f20b5 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
-			    unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
 
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk)	ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index adbb24322d8..b32a8ee5338 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -887,37 +887,19 @@ static int ptrace_bts_size(struct task_struct *child)
 	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static inline void ptrace_bts_fork(struct task_struct *tsk)
-{
-	tsk->bts = NULL;
-}
-
 /*
  * Called from __ptrace_unlink() after the child has been moved back
  * to its original parent.
  */
-static inline void ptrace_bts_untrace(struct task_struct *child)
+void ptrace_bts_untrace(struct task_struct *child)
 {
 	if (unlikely(child->bts)) {
 		free_bts_context(child->bts);
 		child->bts = NULL;
 	}
 }
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
-	ptrace_bts_untrace(child);
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 67c15653fc2..59e133d39d5 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task)
 #define arch_ptrace_untrace(task)		do { } while (0)
 #endif
 
-#ifndef arch_ptrace_fork
-/*
- * Do machine-specific work to initialize a new task.
- *
- * This is called from copy_process().
- */
-#define arch_ptrace_fork(child, clone_flags)	do { } while (0)
-#endif
-
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52b8cd049c2..451186a22ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1205,13 +1205,11 @@ struct task_struct {
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
-#ifdef CONFIG_X86_PTRACE_BTS
 	/*
 	 * This is the tracer handle for the ptrace BTS extension.
 	 * This field actually belongs to the ptracer task.
 	 */
 	struct bts_context *bts;
-#endif /* CONFIG_X86_PTRACE_BTS */
 
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
diff --git a/kernel/fork.c b/kernel/fork.c
index 660c2b8765b..69bde7a22e9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1086,8 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
-	if (unlikely(current->ptrace))
-		ptrace_fork(p, clone_flags);
+
+	p->bts = NULL;
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index aaad0ec3419..321127d965c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -26,16 +26,6 @@
 #include <asm/uaccess.h>
 
 
-/*
- * Initialize a new task whose father had been ptraced.
- *
- * Called from copy_process().
- */
-void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-	arch_ptrace_fork(child, clone_flags);
-}
-
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
-- 
cgit v1.2.3-70-g09d2


From 06f409d76f1d382167eb1cadde2e23a73272865d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 7 Apr 2009 18:10:13 +0100
Subject: ASoC: Provide core support for symmetric sample rates

Many devices require symmetric configurations of capture and playback
data formats, often due to shared clocking but sometimes also due to
other shared playback and record configuration in the device. Start
providing core support for this by allowing the DAIs or the machine
to specify that the sample rates used should be kept symmetric.

A flag symmetric_rates is provided in the snd_soc_dai and
snd_soc_dai_link structures. If this is set in either of the DAIs or in
the machine then a constraint will be applied when a stream is already
open preventing any changes in sample rate.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h |  1 +
 include/sound/soc.h     |  6 ++++++
 sound/soc/soc-core.c    | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 13676472ddf..22b729fbbf8 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -208,6 +208,7 @@ struct snd_soc_dai {
 	/* DAI capabilities */
 	struct snd_soc_pcm_stream capture;
 	struct snd_soc_pcm_stream playback;
+	unsigned int symmetric_rates:1;
 
 	/* DAI runtime info */
 	struct snd_pcm_runtime *runtime;
diff --git a/include/sound/soc.h b/include/sound/soc.h
index a40bc6f316f..b1f2f8819fe 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -417,6 +417,12 @@ struct snd_soc_dai_link  {
 	/* codec/machine specific init - e.g. add machine controls */
 	int (*init)(struct snd_soc_codec *codec);
 
+	/* Symmetry requirements */
+	unsigned int symmetric_rates:1;
+
+	/* Symmetry data - only valid if symmetry is being enforced */
+	unsigned int rate;
+
 	/* DAI pcm */
 	struct snd_pcm *pcm;
 };
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 99712f652d0..dd28009f896 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -113,6 +113,35 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec)
 }
 #endif
 
+static int soc_pcm_apply_symmetry(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_device *socdev = rtd->socdev;
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_dai_link *machine = rtd->dai;
+	struct snd_soc_dai *cpu_dai = machine->cpu_dai;
+	struct snd_soc_dai *codec_dai = machine->codec_dai;
+	int ret;
+
+	if (codec_dai->symmetric_rates || cpu_dai->symmetric_rates ||
+	    machine->symmetric_rates) {
+		dev_dbg(card->dev, "Symmetry forces %dHz rate\n", 
+			machine->rate);
+
+		ret = snd_pcm_hw_constraint_minmax(substream->runtime,
+						   SNDRV_PCM_HW_PARAM_RATE,
+						   machine->rate,
+						   machine->rate);
+		if (ret < 0) {
+			dev_err(card->dev,
+				"Unable to apply rate symmetry constraint: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Called by ALSA when a PCM substream is opened, the runtime->hw record is
  * then initialized and any private data can be allocated. This also calls
@@ -221,6 +250,13 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 		goto machine_err;
 	}
 
+	/* Symmetry only applies if we've already got an active stream. */
+	if (cpu_dai->active || codec_dai->active) {
+		ret = soc_pcm_apply_symmetry(substream);
+		if (ret != 0)
+			goto machine_err;
+	}
+
 	pr_debug("asoc: %s <-> %s info:\n", codec_dai->name, cpu_dai->name);
 	pr_debug("asoc: rate mask 0x%x\n", runtime->hw.rates);
 	pr_debug("asoc: min ch %d max ch %d\n", runtime->hw.channels_min,
@@ -521,6 +557,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 		}
 	}
 
+	machine->rate = params_rate(params);
+
 out:
 	mutex_unlock(&pcm_mutex);
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 44bc9dc729e33a4ec6ebed4d0b6c08e8d20b42cf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 10:47:17 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping, cleanup

Andrew Morton noticed that mm.h needlessly includes sched.h - remove it.

Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 64d8ed2538a..776b641f37e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,7 +13,6 @@
 #include <linux/prio_tree.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
-#include <linux/sched.h>
 
 struct mempolicy;
 struct anon_vma;
-- 
cgit v1.2.3-70-g09d2


From a34b50ddc265bae058c66661b096ef6384c5a8b1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 8 Apr 2009 10:56:54 +0200
Subject: mm, x86, ptrace, bts: defer branch trace stopping, remove dead code

Remove the unused free_locked_buffer() API.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mm.h | 1 -
 mm/mlock.c         | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 776b641f37e..a3963ba23a6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1320,7 +1320,6 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
 extern void *alloc_locked_buffer(size_t size);
-extern void free_locked_buffer(void *buffer, size_t size);
 extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 749383b442c..28be15ead9c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -671,9 +671,3 @@ void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
 
 	up_write(&mm->mmap_sem);
 }
-
-void free_locked_buffer(void *buffer, size_t size)
-{
-	refund_locked_buffer_memory(current->mm, size);
-	kfree(buffer);
-}
-- 
cgit v1.2.3-70-g09d2


From 42d7c5e353cef9062129b0de3ec9ddf10567b9ca Mon Sep 17 00:00:00 2001
From: Becky Bruce <beckyb@kernel.crashing.org>
Date: Wed, 8 Apr 2009 09:09:21 -0500
Subject: swiotlb: change swiotlb_bus_to[phys,virt] prototypes

Add a hwdev argument that is needed on some architectures
in order to access a per-device offset that is taken into
account when producing a physical address (also needed to
get from bus address to virtual address because the physical
address is an intermediate step).

Also make swiotlb_bus_to_virt weak so architectures can
override it.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Cc: jeremy@goop.org
Cc: ian.campbell@citrix.com
LKML-Reference: <1239199761-22886-8-git-send-email-galak@kernel.crashing.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb.c |  2 +-
 include/linux/swiotlb.h       |  3 ++-
 lib/swiotlb.c                 | 10 +++++-----
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 34f12e9996e..887388a1c57 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 	return paddr;
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return baddr;
 }
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ac9ff54f7cb..cb1a6631b8f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
 				      phys_addr_t address);
-extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
+				       dma_addr_t address);
 
 extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d912f068145..bffe6d7ef9d 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 	return paddr;
 }
 
-phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return baddr;
 }
@@ -140,9 +140,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-	return phys_to_virt(swiotlb_bus_to_phys(address));
+	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
 }
 
 int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
@@ -691,7 +691,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			 size_t size, int dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
@@ -728,7 +728,7 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
-- 
cgit v1.2.3-70-g09d2


From 6fab01927e8bdbbc77bafba2abb4810c5591ad52 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:26 +0200
Subject: perf_counter: provide misc bits in the event header

Limit the size of each record to 64k (or should we count in multiples
of u64 and have a 512K limit?), this gives 16 bits or spare room in the
header, which we can use for misc bits, so as to not have to grow the
record with u64 every time we have a few bits to report.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130408.769271806@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 6 +++++-
 kernel/perf_counter.c        | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7f5d353d78a..5bd8817b12d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -201,9 +201,13 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
+#define PERF_EVENT_MISC_KERNEL	(1 << 0)
+#define PERF_EVENT_MISC_USER	(1 << 1)
+
 struct perf_event_header {
 	__u32	type;
-	__u32	size;
+	__u16	misc;
+	__u16	size;
 };
 
 enum perf_event_type {
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 84a39081344..4af98f943d3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1831,6 +1831,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.type = PERF_EVENT_COUNTER_OVERFLOW;
 	header.size = sizeof(header);
 
+	header.misc = user_mode(regs) ?
+		PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
+
 	if (record_type & PERF_RECORD_IP) {
 		ip = instruction_pointer(regs);
 		header.type |= __PERF_EVENT_IP;
-- 
cgit v1.2.3-70-g09d2


From 6b6e5486b3a168f0328c82a8d4376caf901472b1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:27 +0200
Subject: perf_counter: use misc field to widen type

Push the PERF_EVENT_COUNTER_OVERFLOW bit into the misc field so that
we can have the full 32bit for PERF_RECORD_ bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130408.891867663@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 28 ++++++++++------------------
 kernel/perf_counter.c        | 15 ++++++++-------
 2 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5bd8817b12d..4809ae18a94 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -201,8 +201,9 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
-#define PERF_EVENT_MISC_KERNEL	(1 << 0)
-#define PERF_EVENT_MISC_USER	(1 << 1)
+#define PERF_EVENT_MISC_KERNEL		(1 << 0)
+#define PERF_EVENT_MISC_USER		(1 << 1)
+#define PERF_EVENT_MISC_OVERFLOW	(1 << 2)
 
 struct perf_event_header {
 	__u32	type;
@@ -230,36 +231,27 @@ enum perf_event_type {
 	PERF_EVENT_MUNMAP		= 2,
 
 	/*
-	 * Half the event type space is reserved for the counter overflow
-	 * bitfields, as found in hw_event.record_type.
-	 *
-	 * These events will have types of the form:
-	 *   PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } *
+	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+	 * will be PERF_RECORD_*
 	 *
 	 * struct {
 	 * 	struct perf_event_header	header;
 	 *
-	 * 	{ u64			ip;	  } && __PERF_EVENT_IP
-	 * 	{ u32			pid, tid; } && __PERF_EVENT_TID
+	 * 	{ u64			ip;	  } && PERF_RECORD_IP
+	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
 	 *
 	 * 	{ u64			nr;
-	 * 	  { u64 event, val; } 	cnt[nr];  } && __PERF_EVENT_GROUP
+	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
 	 *
 	 * 	{ u16			nr,
 	 * 				hv,
 	 * 				kernel,
 	 * 				user;
-	 * 	  u64			ips[nr];  } && __PERF_EVENT_CALLCHAIN
+	 * 	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
 	 *
-	 * 	{ u64			time;     } && __PERF_EVENT_TIME
+	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * };
 	 */
-	PERF_EVENT_COUNTER_OVERFLOW	= 1UL << 31,
-	__PERF_EVENT_IP			= PERF_RECORD_IP,
-	__PERF_EVENT_TID		= PERF_RECORD_TID,
-	__PERF_EVENT_GROUP		= PERF_RECORD_GROUP,
-	__PERF_EVENT_CALLCHAIN		= PERF_RECORD_CALLCHAIN,
-	__PERF_EVENT_TIME		= PERF_RECORD_TIME,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4af98f943d3..bf12df6f353 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1828,15 +1828,16 @@ static void perf_counter_output(struct perf_counter *counter,
 	int callchain_size = 0;
 	u64 time;
 
-	header.type = PERF_EVENT_COUNTER_OVERFLOW;
+	header.type = 0;
 	header.size = sizeof(header);
 
-	header.misc = user_mode(regs) ?
+	header.misc = PERF_EVENT_MISC_OVERFLOW;
+	header.misc |= user_mode(regs) ?
 		PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
 
 	if (record_type & PERF_RECORD_IP) {
 		ip = instruction_pointer(regs);
-		header.type |= __PERF_EVENT_IP;
+		header.type |= PERF_RECORD_IP;
 		header.size += sizeof(ip);
 	}
 
@@ -1845,12 +1846,12 @@ static void perf_counter_output(struct perf_counter *counter,
 		tid_entry.pid = current->group_leader->pid;
 		tid_entry.tid = current->pid;
 
-		header.type |= __PERF_EVENT_TID;
+		header.type |= PERF_RECORD_TID;
 		header.size += sizeof(tid_entry);
 	}
 
 	if (record_type & PERF_RECORD_GROUP) {
-		header.type |= __PERF_EVENT_GROUP;
+		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
 			counter->nr_siblings * sizeof(group_entry);
 	}
@@ -1861,7 +1862,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
 
-			header.type |= __PERF_EVENT_CALLCHAIN;
+			header.type |= PERF_RECORD_CALLCHAIN;
 			header.size += callchain_size;
 		}
 	}
@@ -1872,7 +1873,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		 */
 		time = sched_clock();
 
-		header.type |= __PERF_EVENT_TIME;
+		header.type |= PERF_RECORD_TIME;
 		header.size += sizeof(u64);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 8740f9418c78dcad694b46ab25d1645d5aef1f5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:29 +0200
Subject: perf_counter: add some comments

Add a few comments because I was forgetting what field what for what
functionality.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.036984214@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4809ae18a94..8bf764fc622 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -344,10 +344,12 @@ struct file;
 
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
-	int				nr_pages;
-	atomic_t			wakeup;
-	atomic_t			head;
-	atomic_t			events;
+	int				nr_pages;	/* nr of data pages  */
+
+	atomic_t			wakeup;		/* POLL_ for wakeups */
+	atomic_t			head;		/* write position    */
+	atomic_t			events;		/* event limit       */
+
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
-- 
cgit v1.2.3-70-g09d2


From 8d1b2d9361b494bfc761700c348c65ebbe3deb5b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:30 +0200
Subject: perf_counter: track task-comm data

Similar to the mmap data stream, add one that tracks the task COMM field,
so that the userspace reporting knows what to call a task.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.127422406@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/exec.c                    |  1 +
 include/linux/perf_counter.h | 16 +++++++-
 kernel/perf_counter.c        | 93 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index e015c0b5a08..bf47ed0278f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -951,6 +951,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	task_lock(tsk);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
+	perf_counter_comm(tsk);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8bf764fc622..a70a55f2759 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -142,8 +142,9 @@ struct perf_counter_hw_event {
 				exclude_idle   :  1, /* don't count when idle */
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
+				comm	       :  1, /* include comm data     */
 
-				__reserved_1   : 53;
+				__reserved_1   : 52;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -230,6 +231,16 @@ enum perf_event_type {
 	PERF_EVENT_MMAP			= 1,
 	PERF_EVENT_MUNMAP		= 2,
 
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u32				pid, tid;
+	 * 	char				comm[];
+	 * };
+	 */
+	PERF_EVENT_COMM			= 3,
+
 	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
@@ -545,6 +556,8 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
+extern void perf_counter_comm(struct task_struct *tsk);
+
 #define MAX_STACK_DEPTH		255
 
 struct perf_callchain_entry {
@@ -583,6 +596,7 @@ static inline void
 perf_counter_munmap(unsigned long addr, unsigned long len,
 		    unsigned long pgoff, struct file *file) 		{ }
 
+static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index bf12df6f353..2d4aebb2982 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1916,6 +1916,99 @@ static void perf_counter_output(struct perf_counter *counter,
 	perf_output_end(&handle);
 }
 
+/*
+ * comm tracking
+ */
+
+struct perf_comm_event {
+	struct task_struct 	*task;
+	char 			*comm;
+	int			comm_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+	} event;
+};
+
+static void perf_counter_comm_output(struct perf_counter *counter,
+				     struct perf_comm_event *comm_event)
+{
+	struct perf_output_handle handle;
+	int size = comm_event->event.header.size;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, comm_event->event);
+	perf_output_copy(&handle, comm_event->comm,
+				   comm_event->comm_size);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_comm_match(struct perf_counter *counter,
+				   struct perf_comm_event *comm_event)
+{
+	if (counter->hw_event.comm &&
+	    comm_event->event.header.type == PERF_EVENT_COMM)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
+				  struct perf_comm_event *comm_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_comm_match(counter, comm_event))
+			perf_counter_comm_output(counter, comm_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_comm_event(struct perf_comm_event *comm_event)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned int size;
+	char *comm = comm_event->task->comm;
+
+	size = ALIGN(strlen(comm), sizeof(u64));
+
+	comm_event->comm = comm;
+	comm_event->comm_size = size;
+
+	comm_event->event.header.size = sizeof(comm_event->event) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
+	put_cpu_var(perf_cpu_context);
+
+	perf_counter_comm_ctx(&current->perf_counter_ctx, comm_event);
+}
+
+void perf_counter_comm(struct task_struct *task)
+{
+	struct perf_comm_event comm_event = {
+		.task	= task,
+		.event  = {
+			.header = { .type = PERF_EVENT_COMM, },
+			.pid	= task->group_leader->pid,
+			.tid	= task->pid,
+		},
+	};
+
+	perf_counter_comm_event(&comm_event);
+}
+
 /*
  * mmap tracking
  */
-- 
cgit v1.2.3-70-g09d2


From 4d855457d84b819fefcd1cd1b0a2a0a0ec475c07 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:32 +0200
Subject: perf_counter: move PERF_RECORD_TIME

Move PERF_RECORD_TIME so that all the fixed length items come before
the variable length ones.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.307926436@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  9 ++++-----
 kernel/perf_counter.c        | 26 +++++++++++++-------------
 2 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a70a55f2759..8bd1be58c93 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -100,9 +100,9 @@ enum sw_event_ids {
 enum perf_counter_record_format {
 	PERF_RECORD_IP		= 1U << 0,
 	PERF_RECORD_TID		= 1U << 1,
-	PERF_RECORD_GROUP	= 1U << 2,
-	PERF_RECORD_CALLCHAIN	= 1U << 3,
-	PERF_RECORD_TIME	= 1U << 4,
+	PERF_RECORD_TIME	= 1U << 2,
+	PERF_RECORD_GROUP	= 1U << 3,
+	PERF_RECORD_CALLCHAIN	= 1U << 4,
 };
 
 /*
@@ -250,6 +250,7 @@ enum perf_event_type {
 	 *
 	 * 	{ u64			ip;	  } && PERF_RECORD_IP
 	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
+	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
@@ -259,8 +260,6 @@ enum perf_event_type {
 	 * 				kernel,
 	 * 				user;
 	 * 	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
-	 *
-	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * };
 	 */
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2d4aebb2982..4dc8600d282 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1850,6 +1850,16 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(tid_entry);
 	}
 
+	if (record_type & PERF_RECORD_TIME) {
+		/*
+		 * Maybe do better on x86 and provide cpu_clock_nmi()
+		 */
+		time = sched_clock();
+
+		header.type |= PERF_RECORD_TIME;
+		header.size += sizeof(u64);
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -1867,16 +1877,6 @@ static void perf_counter_output(struct perf_counter *counter,
 		}
 	}
 
-	if (record_type & PERF_RECORD_TIME) {
-		/*
-		 * Maybe do better on x86 and provide cpu_clock_nmi()
-		 */
-		time = sched_clock();
-
-		header.type |= PERF_RECORD_TIME;
-		header.size += sizeof(u64);
-	}
-
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
 	if (ret)
 		return;
@@ -1889,6 +1889,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_TID)
 		perf_output_put(&handle, tid_entry);
 
+	if (record_type & PERF_RECORD_TIME)
+		perf_output_put(&handle, time);
+
 	if (record_type & PERF_RECORD_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
@@ -1910,9 +1913,6 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (callchain)
 		perf_output_copy(&handle, callchain, callchain_size);
 
-	if (record_type & PERF_RECORD_TIME)
-		perf_output_put(&handle, time);
-
 	perf_output_end(&handle);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 8 Apr 2009 15:01:33 +0200
Subject: perf_counter: allow for data addresses to be recorded

Paul suggested we allow for data addresses to be recorded along with
the traditional IPs as power can provide these.

For now, only the software pagefault events provide data addresses,
but in the future power might as well for some events.

x86 doesn't seem capable of providing this atm.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.394816925@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  2 +-
 arch/powerpc/mm/fault.c            |  8 ++++---
 arch/x86/kernel/cpu/perf_counter.c |  2 +-
 arch/x86/mm/fault.c                |  8 ++++---
 include/linux/perf_counter.h       | 14 +++++++-----
 kernel/perf_counter.c              | 46 ++++++++++++++++++++++++--------------
 6 files changed, 49 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0697ade84dd..c9d019f1907 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record)
-		perf_counter_overflow(counter, 1, regs);
+		perf_counter_overflow(counter, 1, regs, 0);
 }
 
 /*
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 17bbf6f91fb..ac0e112031b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,8 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
 			preempt_disable();
@@ -322,7 +323,8 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1116a41bc7b..0fcbaab83f9 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -800,7 +800,7 @@ again:
 			continue;
 
 		perf_save_and_restart(counter);
-		if (perf_counter_overflow(counter, nmi, regs))
+		if (perf_counter_overflow(counter, nmi, regs, 0))
 			__pmc_generic_disable(counter, &counter->hw, bit);
 	}
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f2d3324d921..6f9df2babe4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1142,10 +1142,12 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
 	}
 
 	check_v8086_mode(regs, address, tsk);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8bd1be58c93..c22363a4f74 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -101,8 +101,9 @@ enum perf_counter_record_format {
 	PERF_RECORD_IP		= 1U << 0,
 	PERF_RECORD_TID		= 1U << 1,
 	PERF_RECORD_TIME	= 1U << 2,
-	PERF_RECORD_GROUP	= 1U << 3,
-	PERF_RECORD_CALLCHAIN	= 1U << 4,
+	PERF_RECORD_ADDR	= 1U << 3,
+	PERF_RECORD_GROUP	= 1U << 4,
+	PERF_RECORD_CALLCHAIN	= 1U << 5,
 };
 
 /*
@@ -251,6 +252,7 @@ enum perf_event_type {
 	 * 	{ u64			ip;	  } && PERF_RECORD_IP
 	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
 	 * 	{ u64			time;     } && PERF_RECORD_TIME
+	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
@@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 extern int perf_counter_overflow(struct perf_counter *counter,
-				 int nmi, struct pt_regs *regs);
+				 int nmi, struct pt_regs *regs, u64 addr);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
@@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter)
 		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
 }
 
-extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 
 extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 			      unsigned long pgoff, struct file *file);
@@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
 static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)	{ }
-
+perf_swcounter_event(u32 event, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
 
 static inline void
 perf_counter_mmap(unsigned long addr, unsigned long len,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4dc8600d282..321c57e3556 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 	update_context_time(ctx);
 
 	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
+	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
 	__perf_counter_sched_out(ctx, cpuctx);
 
 	cpuctx->task_ctx = NULL;
@@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 }
 
 static void perf_counter_output(struct perf_counter *counter,
-				int nmi, struct pt_regs *regs)
+				int nmi, struct pt_regs *regs, u64 addr)
 {
 	int ret;
 	u64 record_type = counter->hw_event.record_type;
@@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
+	if (record_type & PERF_RECORD_ADDR) {
+		header.type |= PERF_RECORD_ADDR;
+		header.size += sizeof(u64);
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_TIME)
 		perf_output_put(&handle, time);
 
+	if (record_type & PERF_RECORD_ADDR)
+		perf_output_put(&handle, addr);
+
 	if (record_type & PERF_RECORD_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
@@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
  */
 
 int perf_counter_overflow(struct perf_counter *counter,
-			  int nmi, struct pt_regs *regs)
+			  int nmi, struct pt_regs *regs, u64 addr)
 {
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
@@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter,
 			perf_counter_disable(counter);
 	}
 
-	perf_counter_output(counter, nmi, regs);
+	perf_counter_output(counter, nmi, regs, addr);
 	return ret;
 }
 
@@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 		regs = task_pt_regs(current);
 
 	if (regs) {
-		if (perf_counter_overflow(counter, 0, regs))
+		if (perf_counter_overflow(counter, 0, regs, 0))
 			ret = HRTIMER_NORESTART;
 	}
 
@@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct pt_regs *regs)
+				    int nmi, struct pt_regs *regs, u64 addr)
 {
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	if (perf_counter_overflow(counter, nmi, regs))
+	if (perf_counter_overflow(counter, nmi, regs, addr))
 		/* soft-disable the counter */
 		;
 
@@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter,
 }
 
 static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct pt_regs *regs)
+			       int nmi, struct pt_regs *regs, u64 addr)
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
 	if (counter->hw.irq_period && !neg)
-		perf_swcounter_overflow(counter, nmi, regs);
+		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum perf_event_types type, u32 event,
-				     u64 nr, int nmi, struct pt_regs *regs)
+				     u64 nr, int nmi, struct pt_regs *regs,
+				     u64 addr)
 {
 	struct perf_counter *counter;
 
@@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
 		if (perf_swcounter_match(counter, type, event, regs))
-			perf_swcounter_add(counter, nr, nmi, regs);
+			perf_swcounter_add(counter, nr, nmi, regs, addr);
 	}
 	rcu_read_unlock();
 }
@@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 }
 
 static void __perf_swcounter_event(enum perf_event_types type, u32 event,
-				   u64 nr, int nmi, struct pt_regs *regs)
+				   u64 nr, int nmi, struct pt_regs *regs,
+				   u64 addr)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 	int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
 	(*recursion)++;
 	barrier();
 
-	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
+	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
+				 nr, nmi, regs, addr);
 	if (cpuctx->task_ctx) {
 		perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
-				nr, nmi, regs);
+					 nr, nmi, regs, addr);
 	}
 
 	barrier();
@@ -2349,9 +2360,10 @@ out:
 	put_cpu_var(perf_cpu_context);
 }
 
-void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
+void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
-	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
+	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
 }
 
 static void perf_swcounter_read(struct perf_counter *counter)
@@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id)
 	if (!regs)
 		regs = task_pt_regs(current);
 
-	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
+	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
 }
 
 extern int ftrace_profile_enable(int);
-- 
cgit v1.2.3-70-g09d2


From 1c1452be2e9ae282a7316c3b23987811bd7acda6 Mon Sep 17 00:00:00 2001
From: Jonas Larsson <jonas.larsson@martinsson.se>
Date: Tue, 31 Mar 2009 11:16:48 +0200
Subject: atmel-mci: Add support for inverted detect pin

Same patch as before, modified to use bool. Also adds description of
the new field in struct atmel_mci that I missed in the first patch.

This patch adds Atmel MCI support for inverted detect pins.

Signed-off-by: Jonas Larsson <jonas.larsson@martinsson.se>
Acked-by: Pierre Ossman <pierre@ossman.eu>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 drivers/mmc/host/atmel-mci.c | 12 +++++++++---
 include/linux/atmel-mci.h    |  2 ++
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index cf6a100bb38..7b603e4b41d 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -177,6 +177,7 @@ struct atmel_mci {
  *	available.
  * @wp_pin: GPIO pin used for card write protect sending, or negative
  *	if not available.
+ * @detect_is_active_high: The state of the detect pin when it is active.
  * @detect_timer: Timer used for debouncing @detect_pin interrupts.
  */
 struct atmel_mci_slot {
@@ -196,6 +197,7 @@ struct atmel_mci_slot {
 
 	int			detect_pin;
 	int			wp_pin;
+	bool			detect_is_active_high;
 
 	struct timer_list	detect_timer;
 };
@@ -924,7 +926,8 @@ static int atmci_get_cd(struct mmc_host *mmc)
 	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
 	if (gpio_is_valid(slot->detect_pin)) {
-		present = !gpio_get_value(slot->detect_pin);
+		present = !(gpio_get_value(slot->detect_pin) ^
+			    slot->detect_is_active_high);
 		dev_dbg(&mmc->class_dev, "card is %spresent\n",
 				present ? "" : "not ");
 	}
@@ -1028,7 +1031,8 @@ static void atmci_detect_change(unsigned long data)
 		return;
 
 	enable_irq(gpio_to_irq(slot->detect_pin));
-	present = !gpio_get_value(slot->detect_pin);
+	present = !(gpio_get_value(slot->detect_pin) ^
+		    slot->detect_is_active_high);
 	present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
 	dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n",
@@ -1456,6 +1460,7 @@ static int __init atmci_init_slot(struct atmel_mci *host,
 	slot->host = host;
 	slot->detect_pin = slot_data->detect_pin;
 	slot->wp_pin = slot_data->wp_pin;
+	slot->detect_is_active_high = slot_data->detect_is_active_high;
 	slot->sdc_reg = sdc_reg;
 
 	mmc->ops = &atmci_ops;
@@ -1477,7 +1482,8 @@ static int __init atmci_init_slot(struct atmel_mci *host,
 		if (gpio_request(slot->detect_pin, "mmc_detect")) {
 			dev_dbg(&mmc->class_dev, "no detect pin available\n");
 			slot->detect_pin = -EBUSY;
-		} else if (gpio_get_value(slot->detect_pin)) {
+		} else if (gpio_get_value(slot->detect_pin) ^
+				slot->detect_is_active_high) {
 			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
 		}
 	}
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 2f1f95737ac..57b1846a3c8 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -10,6 +10,7 @@
  * @bus_width: Number of data lines wired up the slot
  * @detect_pin: GPIO pin wired to the card detect switch
  * @wp_pin: GPIO pin wired to the write protect sensor
+ * @detect_is_active_high: The state of the detect pin when it is active
  *
  * If a given slot is not present on the board, @bus_width should be
  * set to 0. The other fields are ignored in this case.
@@ -24,6 +25,7 @@ struct mci_slot_pdata {
 	unsigned int		bus_width;
 	int			detect_pin;
 	int			wp_pin;
+	bool			detect_is_active_high;
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 1ccd15497869f3ed83b5225d410df53a96e52757 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 9 Apr 2009 10:53:45 +0200
Subject: perf_counter: sysctl for system wide perf counters

Impact: add sysctl for paranoid/relaxed perfcounters policy

Allow the use of system wide perf counters to everybody, but provide
a sysctl to disable it for the paranoid security minded.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090409085524.514046352@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        |  4 +++-
 kernel/sysctl.c              | 11 +++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c22363a4f74..98143288530 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -568,6 +568,8 @@ struct perf_callchain_entry {
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
+extern int sysctl_perf_counter_priv;
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 76376ecb23b..7efb7ebaaae 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,8 @@ static atomic_t nr_mmap_tracking __read_mostly;
 static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
+int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+
 /*
  * Mutex for (sysadmin-configurable) counter reservations:
  */
@@ -1132,7 +1134,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	 */
 	if (cpu != -1) {
 		/* Must be root to operate on a CPU counter: */
-		if (!capable(CAP_SYS_ADMIN))
+		if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
 
 		if (cpu < 0 || cpu > num_possible_cpus())
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4286b62b34a..8ba457838d9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,6 +49,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
+#include <linux/perf_counter.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -920,6 +921,16 @@ static struct ctl_table kern_table[] = {
 		.child		= slow_work_sysctls,
 	},
 #endif
+#ifdef CONFIG_PERF_COUNTERS
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_privileged",
+		.data		= &sysctl_perf_counter_priv,
+		.maxlen		= sizeof(sysctl_perf_counter_priv),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
-- 
cgit v1.2.3-70-g09d2


From 2062501ae6505dbc5bff3a792246c2661d114050 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 6 Apr 2009 01:49:33 +0200
Subject: tracing/lockdep: report the time waited for a lock

While trying to optimize the new lock on reiserfs to replace
the bkl, I find the lock tracing very useful though it lacks
something important for performance (and latency) instrumentation:
the time a task waits for a lock.

That's what this patch implements:

  bash-4816  [000]   202.652815: lock_contended: lock_contended: &sb->s_type->i_mutex_key
  bash-4816  [000]   202.652819: lock_acquired: &rq->lock (0.000 us)
 <...>-4787  [000]   202.652825: lock_acquired: &rq->lock (0.000 us)
 <...>-4787  [000]   202.652829: lock_acquired: &rq->lock (0.000 us)
  bash-4816  [000]   202.652833: lock_acquired: &sb->s_type->i_mutex_key (16.005 us)

As shown above, the "lock acquired" field is followed by the time
it has been waiting for the lock. Usually, a lock contended entry
is followed by a near lock_acquired entry with a non-zero time waited.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1238975373-15739-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/lockdep_event_types.h | 23 ++++++++++++++++++-----
 kernel/lockdep.c                    |  8 ++++----
 2 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
index adccfcd2ec8..863f1e4583a 100644
--- a/include/trace/lockdep_event_types.h
+++ b/include/trace/lockdep_event_types.h
@@ -32,11 +32,24 @@ TRACE_FORMAT(lock_contended,
 	TP_FMT("%s", lock->name)
 	);
 
-TRACE_FORMAT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__entry->name = lock->name;
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
 
 #endif
 #endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b0f01186696..c4582a6ea95 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3061,6 +3061,8 @@ found_it:
 	put_lock_stats(stats);
 }
 
+DEFINE_TRACE(lock_acquired);
+
 static void
 __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
@@ -3099,6 +3101,8 @@ found_it:
 		hlock->holdtime_stamp = now;
 	}
 
+	trace_lock_acquired(lock, ip, waittime);
+
 	stats = get_lock_stats(hlock_class(hlock));
 	if (waittime) {
 		if (hlock->read)
@@ -3137,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-DEFINE_TRACE(lock_acquired);
-
 void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_acquired(lock, ip);
-
 	if (unlikely(!lock_stat))
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From 5cb3d1d9d34ac04bcaa2034139345b2a5fea54c1 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Thu, 9 Apr 2009 14:08:18 +0800
Subject: tracing, net, skb tracepoint: make skb tracepoint use the
 TRACE_EVENT() macro

TRACE_EVENT is a more generic way to define a tracepoint.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Steven Rostedt ;" <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49DD90D2.5020604@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/skb.h               |  4 +---
 include/trace/skb_event_types.h   | 38 ++++++++++++++++++++++++++++++++++++++
 include/trace/trace_event_types.h |  1 +
 include/trace/trace_events.h      |  1 +
 4 files changed, 41 insertions(+), 3 deletions(-)
 create mode 100644 include/trace/skb_event_types.h

(limited to 'include')

diff --git a/include/trace/skb.h b/include/trace/skb.h
index b66206d9be7..d2de7174a6e 100644
--- a/include/trace/skb.h
+++ b/include/trace/skb.h
@@ -4,8 +4,6 @@
 #include <linux/skbuff.h>
 #include <linux/tracepoint.h>
 
-DECLARE_TRACE(kfree_skb,
-	TP_PROTO(struct sk_buff *skb, void *location),
-	TP_ARGS(skb, location));
+#include <trace/skb_event_types.h>
 
 #endif
diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h
new file mode 100644
index 00000000000..4a1c504c0e1
--- /dev/null
+++ b/include/trace/skb_event_types.h
@@ -0,0 +1,38 @@
+
+/* use <trace/skb.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
index df56f5694be..33b6bfcba93 100644
--- a/include/trace/trace_event_types.h
+++ b/include/trace/trace_event_types.h
@@ -3,3 +3,4 @@
 #include <trace/sched_event_types.h>
 #include <trace/irq_event_types.h>
 #include <trace/lockdep_event_types.h>
+#include <trace/skb_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index fd13750ca4b..0e2aa80076d 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -3,3 +3,4 @@
 #include <trace/sched.h>
 #include <trace/irq.h>
 #include <trace/lockdep.h>
+#include <trace/skb.h>
-- 
cgit v1.2.3-70-g09d2


From 02af61bb50f5d5f0322dbe5ab2a0d75808d25c7b Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 10 Apr 2009 14:26:18 +0800
Subject: tracing, kmemtrace: Separate include/trace/kmemtrace.h to kmemtrace
 part and tracepoint part

Impact: refactor code for future changes

Current kmemtrace.h is used both as header file of kmemtrace and kmem's
tracepoints definition.

Tracepoints' definition file may be used by other code, and should only have
definition of tracepoint.

We can separate include/trace/kmemtrace.h into 2 files:

  include/linux/kmemtrace.h: header file for kmemtrace
  include/trace/kmem.h:      definition of kmem tracepoints

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49DEE68A.5040902@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kmemtrace.h | 25 +++++++++++++++++++
 include/linux/slab_def.h  |  2 +-
 include/linux/slub_def.h  |  2 +-
 include/trace/kmem.h      | 44 +++++++++++++++++++++++++++++++++
 include/trace/kmemtrace.h | 63 -----------------------------------------------
 init/main.c               |  2 +-
 kernel/trace/kmemtrace.c  |  2 +-
 kernel/trace/trace.h      |  2 +-
 mm/slab.c                 |  2 +-
 mm/slob.c                 |  2 +-
 mm/slub.c                 |  2 +-
 11 files changed, 77 insertions(+), 71 deletions(-)
 create mode 100644 include/linux/kmemtrace.h
 create mode 100644 include/trace/kmem.h
 delete mode 100644 include/trace/kmemtrace.h

(limited to 'include')

diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 00000000000..15c45a27a92
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <trace/kmem.h>
+
+#ifdef CONFIG_KMEMTRACE
+extern void kmemtrace_init(void);
+#else
+static inline void kmemtrace_init(void)
+{
+}
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 5ac9b0bcaf9..713f841ecaa 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,7 @@
 #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5046f90c117..be5d40c43bd 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,7 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
new file mode 100644
index 00000000000..24d25192818
--- /dev/null
+++ b/include/trace/kmem.h
@@ -0,0 +1,44 @@
+#ifndef _TRACE_KMEM_H
+#define _TRACE_KMEM_H
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+DECLARE_TRACE(kmalloc,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
+DECLARE_TRACE(kmem_cache_alloc,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
+DECLARE_TRACE(kmalloc_node,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags,
+		      int node),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
+DECLARE_TRACE(kmem_cache_alloc_node,
+	      TP_PROTO(unsigned long call_site,
+		      const void *ptr,
+		      size_t bytes_req,
+		      size_t bytes_alloc,
+		      gfp_t gfp_flags,
+		      int node),
+	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
+DECLARE_TRACE(kfree,
+	      TP_PROTO(unsigned long call_site, const void *ptr),
+	      TP_ARGS(call_site, ptr));
+DECLARE_TRACE(kmem_cache_free,
+	      TP_PROTO(unsigned long call_site, const void *ptr),
+	      TP_ARGS(call_site, ptr));
+
+#endif /* _TRACE_KMEM_H */
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644
index 28ee69f9cd4..00000000000
--- a/include/trace/kmemtrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <linux/tracepoint.h>
-#include <linux/types.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-DECLARE_TRACE(kmalloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/init/main.c b/init/main.c
index 3585f073d63..eece40cd8a6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -64,6 +64,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -71,7 +72,6 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
-#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 5011f4d91e3..7a0aa0e260d 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -12,7 +12,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 #include "trace_output.h"
 #include "trace.h"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f76a8f8689d..34b94c3f40a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,7 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <trace/power.h>
 
 enum trace_type {
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00d2f9..f85831da908 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,7 @@
 #include	<linux/cpu.h>
 #include	<linux/sysctl.h>
 #include	<linux/module.h>
-#include	<trace/kmemtrace.h>
+#include	<linux/kmemtrace.h>
 #include	<linux/rcupdate.h>
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index a2d4ab32198..494f05f1941 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,7 +65,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <asm/atomic.h>
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 7ab54ecbd3f..ea9e7160e2e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
-- 
cgit v1.2.3-70-g09d2


From fc182a4330fc22ea1b68fa3d5064dd85a73a4c4a Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 10 Apr 2009 14:27:38 +0800
Subject: tracing, kmemtrace: Make kmem tracepoints use TRACE_EVENT macro

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49DEE6DA.80600@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/kmem.h              |  39 +-------
 include/trace/kmem_event_types.h  | 193 ++++++++++++++++++++++++++++++++++++++
 include/trace/trace_event_types.h |   1 +
 include/trace/trace_events.h      |   1 +
 4 files changed, 197 insertions(+), 37 deletions(-)
 create mode 100644 include/trace/kmem_event_types.h

(limited to 'include')

diff --git a/include/trace/kmem.h b/include/trace/kmem.h
index 24d25192818..46efc2423f0 100644
--- a/include/trace/kmem.h
+++ b/include/trace/kmem.h
@@ -1,44 +1,9 @@
 #ifndef _TRACE_KMEM_H
 #define _TRACE_KMEM_H
 
-#include <linux/tracepoint.h>
 #include <linux/types.h>
+#include <linux/tracepoint.h>
 
-DECLARE_TRACE(kmalloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-	      TP_PROTO(unsigned long call_site,
-		      const void *ptr,
-		      size_t bytes_req,
-		      size_t bytes_alloc,
-		      gfp_t gfp_flags,
-		      int node),
-	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-	      TP_PROTO(unsigned long call_site, const void *ptr),
-	      TP_ARGS(call_site, ptr));
+#include <trace/kmem_event_types.h>
 
 #endif /* _TRACE_KMEM_H */
diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h
new file mode 100644
index 00000000000..4ff420fe467
--- /dev/null
+++ b/include/trace/kmem_event_types.h
@@ -0,0 +1,193 @@
+
+/* use <trace/kmem.h> instead */
+#ifndef TRACE_EVENT
+# error Do not include this file directly.
+# error Unless you know what you are doing.
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
index 33b6bfcba93..552a50e169a 100644
--- a/include/trace/trace_event_types.h
+++ b/include/trace/trace_event_types.h
@@ -4,3 +4,4 @@
 #include <trace/irq_event_types.h>
 #include <trace/lockdep_event_types.h>
 #include <trace/skb_event_types.h>
+#include <trace/kmem_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 0e2aa80076d..13d6b85668c 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -4,3 +4,4 @@
 #include <trace/irq.h>
 #include <trace/lockdep.h>
 #include <trace/skb.h>
+#include <trace/kmem.h>
-- 
cgit v1.2.3-70-g09d2


From f6d655a6e6974e474a11b25052c29d10b80814b3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 13 Apr 2009 11:27:03 +0100
Subject: ASoC: Support DAPM events for DACs and ADCs

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dapm.h | 10 ++++++++++
 sound/soc/soc-dapm.c     | 16 ++++++++++------
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index a7def6a9a03..fcc929da033 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -140,9 +140,19 @@
 #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \
 	.shift = wshift, .invert = winvert}
+#define SND_SOC_DAPM_DAC_E(wname, stname, wreg, wshift, winvert, \
+			   wevent, wflags)				\
+{	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \
+	.shift = wshift, .invert = winvert, \
+	.event = wevent, .event_flags = wflags}
 #define SND_SOC_DAPM_ADC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \
 	.shift = wshift, .invert = winvert}
+#define SND_SOC_DAPM_ADC_E(wname, stname, wreg, wshift, winvert, \
+			   wevent, wflags)				\
+{	.id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \
+	.shift = wshift, .invert = winvert, \
+	.event = wevent, .event_flags = wflags}
 
 /* generic register modifier widget */
 #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 713d1258670..a6d73379ab3 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -598,18 +598,22 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 	if (w->id == snd_soc_dapm_adc && w->active) {
 		in = is_connected_input_ep(w);
 		dapm_clear_walk(w->codec);
-		w->power = (in != 0) ? 1 : 0;
-		dapm_update_bits(w);
-		return 0;
+		power = (in != 0) ? 1 : 0;
+		if (power == w->power)
+			return 0;
+		w->power = power;
+		return dapm_generic_apply_power(w);
 	}
 
 	/* active DAC */
 	if (w->id == snd_soc_dapm_dac && w->active) {
 		out = is_connected_output_ep(w);
 		dapm_clear_walk(w->codec);
-		w->power = (out != 0) ? 1 : 0;
-		dapm_update_bits(w);
-		return 0;
+		power = (out != 0) ? 1 : 0;
+		if (power == w->power)
+			return 0;
+		w->power = power;
+		return dapm_generic_apply_power(w);
 	}
 
 	/* pre and post event widgets */
-- 
cgit v1.2.3-70-g09d2


From fa1b47dd85453ec7d4bcfe4aa4a2d172ba452fc3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 2 Apr 2009 00:09:41 -0400
Subject: ring-buffer: add ring_buffer_discard_commit

The ring_buffer_discard_commit is similar to ring_buffer_event_discard
but it can only be done on an event that has yet to be commited.
Unpredictable results can happen otherwise.

The main difference between ring_buffer_discard_commit and
ring_buffer_event_discard is that ring_buffer_discard_commit will try
to free the data in the ring buffer if nothing has addded data
after the reserved event. If something did, then it acts almost the
same as ring_buffer_event_discard followed by a
ring_buffer_unlock_commit.

Note, either ring_buffer_commit_discard and ring_buffer_unlock_commit
can be called on an event, not both.

This commit also exports both discard functions to be usable by
GPL modules.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ring_buffer.h |  29 ++++++++++
 kernel/trace/ring_buffer.c  | 125 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 133 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e1b7b217388..f0aa486d131 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -68,8 +68,37 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
 	return event->time_delta;
 }
 
+/*
+ * ring_buffer_event_discard can discard any event in the ring buffer.
+ *   it is up to the caller to protect against a reader from
+ *   consuming it or a writer from wrapping and replacing it.
+ *
+ * No external protection is needed if this is called before
+ * the event is commited. But in that case it would be better to
+ * use ring_buffer_discard_commit.
+ *
+ * Note, if an event that has not been committed is discarded
+ * with ring_buffer_event_discard, it must still be committed.
+ */
 void ring_buffer_event_discard(struct ring_buffer_event *event);
 
+/*
+ * ring_buffer_discard_commit will remove an event that has not
+ *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+ *   must not be called on the discarded event. This function
+ *   will try to remove the event from the ring buffer completely
+ *   if another event has not been written after it.
+ *
+ * Example use:
+ *
+ *  if (some_condition)
+ *    ring_buffer_discard_commit(buffer, event);
+ *  else
+ *    ring_buffer_unlock_commit(buffer, event);
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event);
+
 /*
  * size is in bytes for each per CPU buffer.
  */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 74a11808c28..f935bd5ec3e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -205,27 +205,6 @@ static void rb_event_set_padding(struct ring_buffer_event *event)
 	event->time_delta = 0;
 }
 
-/**
- * ring_buffer_event_discard - discard an event in the ring buffer
- * @buffer: the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-	event->type = RINGBUF_TYPE_PADDING;
-	/* time delta must be non zero */
-	if (!event->time_delta)
-		event->time_delta = 1;
-}
-
 static unsigned
 rb_event_data_length(struct ring_buffer_event *event)
 {
@@ -1570,6 +1549,110 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
+/**
+ * ring_buffer_event_discard - discard any event in the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
+{
+	event->type = RINGBUF_TYPE_PADDING;
+	/* time delta must be non zero */
+	if (!event->time_delta)
+		event->time_delta = 1;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
+
+/**
+ * ring_buffer_commit_discard - discard an event that has not been committed
+ * @buffer: the ring buffer
+ * @event: non committed event to discard
+ *
+ * This is similar to ring_buffer_event_discard but must only be
+ * performed on an event that has not been committed yet. The difference
+ * is that this will also try to free the event from the ring buffer
+ * if another event has not been added behind it.
+ *
+ * If another event has been added behind it, it will set the event
+ * up as discarded, and perform the commit.
+ *
+ * If this function is called, do not call ring_buffer_unlock_commit on
+ * the event.
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long new_index, old_index;
+	struct buffer_page *bpage;
+	unsigned long index;
+	unsigned long addr;
+	int cpu;
+
+	/* The event is discarded regardless */
+	ring_buffer_event_discard(event);
+
+	/*
+	 * This must only be called if the event has not been
+	 * committed yet. Thus we can assume that preemption
+	 * is still disabled.
+	 */
+	RB_WARN_ON(buffer, !preempt_count());
+
+	cpu = smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+
+	new_index = rb_event_index(event);
+	old_index = new_index + rb_event_length(event);
+	addr = (unsigned long)event;
+	addr &= PAGE_MASK;
+
+	bpage = cpu_buffer->tail_page;
+
+	if (bpage == (void *)addr && rb_page_write(bpage) == old_index) {
+		/*
+		 * This is on the tail page. It is possible that
+		 * a write could come in and move the tail page
+		 * and write to the next page. That is fine
+		 * because we just shorten what is on this page.
+		 */
+		index = local_cmpxchg(&bpage->write, old_index, new_index);
+		if (index == old_index)
+			goto out;
+	}
+
+	/*
+	 * The commit is still visible by the reader, so we
+	 * must increment entries.
+	 */
+	cpu_buffer->entries++;
+ out:
+	/*
+	 * If a write came in and pushed the tail page
+	 * we still need to update the commit pointer
+	 * if we were the commit.
+	 */
+	if (rb_is_commit(cpu_buffer, event))
+		rb_set_commit_to_write(cpu_buffer);
+
+	/*
+	 * Only the last preempt count needs to restore preemption.
+	 */
+	if (preempt_count() == 1)
+		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+	else
+		preempt_enable_no_resched_notrace();
+
+}
+EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
+
 /**
  * ring_buffer_write - write data to the buffer without reserving
  * @buffer: The ring buffer to write to.
-- 
cgit v1.2.3-70-g09d2


From 5f77a88b3f8268b11940b51d2e03d26a663ceb90 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Wed, 8 Apr 2009 03:14:01 -0500
Subject: tracing/infrastructure: separate event tracer from event support

Add a new config option, CONFIG_EVENT_TRACING that gets selected
when CONFIG_TRACING is selected and adds everything needed by the stuff
in trace_export - basically all the event tracing support needed by e.g.
bprint, minus the actual events, which are only included if
CONFIG_EVENT_TRACER is selected.

So CONFIG_EVENT_TRACER can be used to turn on or off the generated events
(what I think of as the 'event tracer'), while CONFIG_EVENT_TRACING turns
on or off the base event tracing support used by both the event tracer and
the other things such as bprint that can't be configured out.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
LKML-Reference: <1239178441.10295.34.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 kernel/trace/Kconfig              | 4 ++++
 kernel/trace/Makefile             | 6 +++---
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7fa660fd449..7e9b1e9f711 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -61,7 +61,7 @@
 #define BRANCH_PROFILE()
 #endif
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
 			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 23b96ebbf89..644606e899f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -48,6 +48,9 @@ config FTRACE_NMI_ENTER
        depends on HAVE_FTRACE_NMI_ENTER
        default y
 
+config EVENT_TRACING
+	bool
+
 config TRACING
 	bool
 	select DEBUG_FS
@@ -56,6 +59,7 @@ config TRACING
 	select TRACEPOINTS
 	select NOP_TRACER
 	select BINARY_PRINTF
+	select EVENT_TRACING
 
 #
 # Minimum requirements an architecture has to meet for us to
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f5121ec..3ad367e7c97 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -40,11 +40,11 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACER) += events.o
-obj-$(CONFIG_EVENT_TRACER) += trace_export.o
+obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 
 libftrace-y := ftrace.o
-- 
cgit v1.2.3-70-g09d2


From 6e837fb152410e571a81aaadbd9884f0bc46a55e Mon Sep 17 00:00:00 2001
From: Etienne Basset <etienne.basset@numericable.fr>
Date: Wed, 8 Apr 2009 20:39:40 +0200
Subject: smack: implement logging V3

This patch creates auditing functions usable by LSM to audit security
events. It provides standard dumping of FS, NET, task etc ... events
(code borrowed from SELinux)
and provides 2 callbacks to define LSM specific auditing, which should be
flexible enough to convert SELinux too.

Signed-off-by: Etienne Basset <etienne.basset@numericable.fr>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
cked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h | 111 +++++++++++++
 security/lsm_audit.c      | 386 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 497 insertions(+)
 create mode 100644 include/linux/lsm_audit.h
 create mode 100644 security/lsm_audit.c

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
new file mode 100644
index 00000000000..e461b2c3d71
--- /dev/null
+++ b/include/linux/lsm_audit.h
@@ -0,0 +1,111 @@
+/*
+ * Common LSM logging functions
+ * Heavily borrowed from selinux/avc.h
+ *
+ * Author : Etienne BASSET  <etienne.basset@ensta.org>
+ *
+ * All credits to : Stephen Smalley, <sds@epoch.ncsc.mil>
+ * All BUGS to : Etienne BASSET  <etienne.basset@ensta.org>
+ */
+#ifndef _LSM_COMMON_LOGGING_
+#define _LSM_COMMON_LOGGING_
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/audit.h>
+#include <linux/in6.h>
+#include <linux/path.h>
+#include <linux/key.h>
+#include <linux/skbuff.h>
+#include <asm/system.h>
+
+
+/* Auxiliary data to use in generating the audit record. */
+struct common_audit_data {
+	char    type;
+#define LSM_AUDIT_DATA_FS      1
+#define LSM_AUDIT_DATA_NET     2
+#define LSM_AUDIT_DATA_CAP     3
+#define LSM_AUDIT_DATA_IPC     4
+#define LSM_AUDIT_DATA_TASK    5
+#define LSM_AUDIT_DATA_KEY     6
+	struct task_struct *tsk;
+	union 	{
+		struct {
+			struct path path;
+			struct inode *inode;
+		} fs;
+		struct {
+			int netif;
+			struct sock *sk;
+			u16 family;
+			__be16 dport;
+			__be16 sport;
+			union {
+				struct {
+					__be32 daddr;
+					__be32 saddr;
+				} v4;
+				struct {
+					struct in6_addr daddr;
+					struct in6_addr saddr;
+				} v6;
+			} fam;
+		} net;
+		int cap;
+		int ipc_id;
+		struct task_struct *tsk;
+#ifdef CONFIG_KEYS
+		struct {
+			key_serial_t key;
+			char *key_desc;
+		} key_struct;
+#endif
+	} u;
+	const char *function;
+	/* this union contains LSM specific data */
+	union {
+		/* SMACK data */
+		struct smack_audit_data {
+			char *subject;
+			char *object;
+			char *request;
+			int result;
+		} smack_audit_data;
+		/* SELinux data */
+		struct {
+			u32 ssid;
+			u32 tsid;
+			u16 tclass;
+			u32 requested;
+			u32 audited;
+			struct av_decision *avd;
+			int result;
+		} selinux_audit_data;
+	} lsm_priv;
+	/* these callback will be implemented by a specific LSM */
+	void (*lsm_pre_audit)(struct audit_buffer *, void *);
+	void (*lsm_post_audit)(struct audit_buffer *, void *);
+};
+
+#define v4info fam.v4
+#define v6info fam.v6
+
+int ipv4_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto);
+
+int ipv6_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto);
+
+/* Initialize an LSM audit data structure. */
+#define COMMON_AUDIT_DATA_INIT(_d, _t) \
+	{ memset((_d), 0, sizeof(struct common_audit_data)); \
+	 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+
+void common_lsm_audit(struct common_audit_data *a);
+
+#endif
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
new file mode 100644
index 00000000000..94b868494b3
--- /dev/null
+++ b/security/lsm_audit.c
@@ -0,0 +1,386 @@
+/*
+ * common LSM auditing functions
+ *
+ * Based on code written for SELinux by :
+ *			Stephen Smalley, <sds@epoch.ncsc.mil>
+ * 			James Morris <jmorris@redhat.com>
+ * Author : Etienne Basset, <etienne.basset@ensta.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <linux/un.h>
+#include <net/af_unix.h>
+#include <linux/audit.h>
+#include <linux/ipv6.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <linux/lsm_audit.h>
+
+/**
+ * ipv4_skb_to_auditdata : fill auditdata from skb
+ * @skb : the skb
+ * @ad : the audit data to fill
+ * @proto : the layer 4 protocol
+ *
+ * return  0 on success
+ */
+int ipv4_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto)
+{
+	int ret = 0;
+	struct iphdr *ih;
+
+	ih = ip_hdr(skb);
+	if (ih == NULL)
+		return -EINVAL;
+
+	ad->u.net.v4info.saddr = ih->saddr;
+	ad->u.net.v4info.daddr = ih->daddr;
+
+	if (proto)
+		*proto = ih->protocol;
+	/* non initial fragment */
+	if (ntohs(ih->frag_off) & IP_OFFSET)
+		return 0;
+
+	switch (ih->protocol) {
+	case IPPROTO_TCP: {
+		struct tcphdr *th = tcp_hdr(skb);
+		if (th == NULL)
+			break;
+
+		ad->u.net.sport = th->source;
+		ad->u.net.dport = th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr *uh = udp_hdr(skb);
+		if (uh == NULL)
+			break;
+
+		ad->u.net.sport = uh->source;
+		ad->u.net.dport = uh->dest;
+		break;
+	}
+	case IPPROTO_DCCP: {
+		struct dccp_hdr *dh = dccp_hdr(skb);
+		if (dh == NULL)
+			break;
+
+		ad->u.net.sport = dh->dccph_sport;
+		ad->u.net.dport = dh->dccph_dport;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr *sh = sctp_hdr(skb);
+		if (sh == NULL)
+			break;
+		ad->u.net.sport = sh->source;
+		ad->u.net.dport = sh->dest;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+/**
+ * ipv6_skb_to_auditdata : fill auditdata from skb
+ * @skb : the skb
+ * @ad : the audit data to fill
+ * @proto : the layer 4 protocol
+ *
+ * return  0 on success
+ */
+int ipv6_skb_to_auditdata(struct sk_buff *skb,
+		struct common_audit_data *ad, u8 *proto)
+{
+	int offset, ret = 0;
+	struct ipv6hdr *ip6;
+	u8 nexthdr;
+
+	ip6 = ipv6_hdr(skb);
+	if (ip6 == NULL)
+		return -EINVAL;
+	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
+	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ret = 0;
+	/* IPv6 can have several extension header before the Transport header
+	 * skip them */
+	offset = skb_network_offset(skb);
+	offset += sizeof(*ip6);
+	nexthdr = ip6->nexthdr;
+	offset = ipv6_skip_exthdr(skb, offset, &nexthdr);
+	if (offset < 0)
+		return 0;
+	if (proto)
+		*proto = nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_TCP: {
+		struct tcphdr _tcph, *th;
+
+		th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+		if (th == NULL)
+			break;
+
+		ad->u.net.sport = th->source;
+		ad->u.net.dport = th->dest;
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr _udph, *uh;
+
+		uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+		if (uh == NULL)
+			break;
+
+		ad->u.net.sport = uh->source;
+		ad->u.net.dport = uh->dest;
+		break;
+	}
+	case IPPROTO_DCCP: {
+		struct dccp_hdr _dccph, *dh;
+
+		dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
+		if (dh == NULL)
+			break;
+
+		ad->u.net.sport = dh->dccph_sport;
+		ad->u.net.dport = dh->dccph_dport;
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr _sctph, *sh;
+
+		sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph);
+		if (sh == NULL)
+			break;
+		ad->u.net.sport = sh->source;
+		ad->u.net.dport = sh->dest;
+		break;
+	}
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+#endif
+
+
+static inline void print_ipv6_addr(struct audit_buffer *ab,
+				   struct in6_addr *addr, __be16 port,
+				   char *name1, char *name2)
+{
+	if (!ipv6_addr_any(addr))
+		audit_log_format(ab, " %s=%pI6", name1, addr);
+	if (port)
+		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+}
+
+static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
+				   __be16 port, char *name1, char *name2)
+{
+	if (addr)
+		audit_log_format(ab, " %s=%pI4", name1, &addr);
+	if (port)
+		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+}
+
+/**
+ * dump_common_audit_data - helper to dump common audit data
+ * @a : common audit data
+ *
+ */
+static void dump_common_audit_data(struct audit_buffer *ab,
+				   struct common_audit_data *a)
+{
+	struct inode *inode = NULL;
+	struct task_struct *tsk = current;
+
+	if (a->tsk)
+		tsk = a->tsk;
+	if (tsk && tsk->pid) {
+		audit_log_format(ab, " pid=%d comm=", tsk->pid);
+		audit_log_untrustedstring(ab, tsk->comm);
+	}
+
+	switch (a->type) {
+	case LSM_AUDIT_DATA_IPC:
+		audit_log_format(ab, " key=%d ", a->u.ipc_id);
+		break;
+	case LSM_AUDIT_DATA_CAP:
+		audit_log_format(ab, " capability=%d ", a->u.cap);
+		break;
+	case LSM_AUDIT_DATA_FS:
+		if (a->u.fs.path.dentry) {
+			struct dentry *dentry = a->u.fs.path.dentry;
+			if (a->u.fs.path.mnt) {
+				audit_log_d_path(ab, "path=", &a->u.fs.path);
+			} else {
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab,
+						 dentry->d_name.name);
+			}
+			inode = dentry->d_inode;
+		} else if (a->u.fs.inode) {
+			struct dentry *dentry;
+			inode = a->u.fs.inode;
+			dentry = d_find_alias(inode);
+			if (dentry) {
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab,
+						 dentry->d_name.name);
+				dput(dentry);
+			}
+		}
+		if (inode)
+			audit_log_format(ab, " dev=%s ino=%lu",
+					inode->i_sb->s_id,
+					inode->i_ino);
+		break;
+	case LSM_AUDIT_DATA_TASK:
+		tsk = a->u.tsk;
+		if (tsk && tsk->pid) {
+			audit_log_format(ab, " pid=%d comm=", tsk->pid);
+			audit_log_untrustedstring(ab, tsk->comm);
+		}
+		break;
+	case LSM_AUDIT_DATA_NET:
+		if (a->u.net.sk) {
+			struct sock *sk = a->u.net.sk;
+			struct unix_sock *u;
+			int len = 0;
+			char *p = NULL;
+
+			switch (sk->sk_family) {
+			case AF_INET: {
+				struct inet_sock *inet = inet_sk(sk);
+
+				print_ipv4_addr(ab, inet->rcv_saddr,
+						inet->sport,
+						"laddr", "lport");
+				print_ipv4_addr(ab, inet->daddr,
+						inet->dport,
+						"faddr", "fport");
+				break;
+			}
+			case AF_INET6: {
+				struct inet_sock *inet = inet_sk(sk);
+				struct ipv6_pinfo *inet6 = inet6_sk(sk);
+
+				print_ipv6_addr(ab, &inet6->rcv_saddr,
+						inet->sport,
+						"laddr", "lport");
+				print_ipv6_addr(ab, &inet6->daddr,
+						inet->dport,
+						"faddr", "fport");
+				break;
+			}
+			case AF_UNIX:
+				u = unix_sk(sk);
+				if (u->dentry) {
+					struct path path = {
+						.dentry = u->dentry,
+						.mnt = u->mnt
+					};
+					audit_log_d_path(ab, "path=", &path);
+					break;
+				}
+				if (!u->addr)
+					break;
+				len = u->addr->len-sizeof(short);
+				p = &u->addr->name->sun_path[0];
+				audit_log_format(ab, " path=");
+				if (*p)
+					audit_log_untrustedstring(ab, p);
+				else
+					audit_log_n_hex(ab, p, len);
+				break;
+			}
+		}
+
+		switch (a->u.net.family) {
+		case AF_INET:
+			print_ipv4_addr(ab, a->u.net.v4info.saddr,
+					a->u.net.sport,
+					"saddr", "src");
+			print_ipv4_addr(ab, a->u.net.v4info.daddr,
+					a->u.net.dport,
+					"daddr", "dest");
+			break;
+		case AF_INET6:
+			print_ipv6_addr(ab, &a->u.net.v6info.saddr,
+					a->u.net.sport,
+					"saddr", "src");
+			print_ipv6_addr(ab, &a->u.net.v6info.daddr,
+					a->u.net.dport,
+					"daddr", "dest");
+			break;
+		}
+		if (a->u.net.netif > 0) {
+			struct net_device *dev;
+
+			/* NOTE: we always use init's namespace */
+			dev = dev_get_by_index(&init_net, a->u.net.netif);
+			if (dev) {
+				audit_log_format(ab, " netif=%s", dev->name);
+				dev_put(dev);
+			}
+		}
+		break;
+#ifdef CONFIG_KEYS
+	case LSM_AUDIT_DATA_KEY:
+		audit_log_format(ab, " key_serial=%u", a->u.key_struct.key);
+		if (a->u.key_struct.key_desc) {
+			audit_log_format(ab, " key_desc=");
+			audit_log_untrustedstring(ab, a->u.key_struct.key_desc);
+		}
+		break;
+#endif
+	} /* switch (a->type) */
+}
+
+/**
+ * common_lsm_audit - generic LSM auditing function
+ * @a:  auxiliary audit data
+ *
+ * setup the audit buffer for common security information
+ * uses callback to print LSM specific information
+ */
+void common_lsm_audit(struct common_audit_data *a)
+{
+	struct audit_buffer *ab;
+
+	if (a == NULL)
+		return;
+	/* we use GFP_ATOMIC so we won't sleep */
+	ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
+
+	if (ab == NULL)
+		return;
+
+	if (a->lsm_pre_audit)
+		a->lsm_pre_audit(ab, a);
+
+	dump_common_audit_data(ab, a);
+
+	if (a->lsm_post_audit)
+		a->lsm_post_audit(ab, a);
+
+	audit_log_end(ab);
+}
-- 
cgit v1.2.3-70-g09d2


From 7ba5c840e64d4a967379f1ae3eca73278180b11d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 13 Apr 2009 21:31:17 -0700
Subject: rcu: Add __rcu_pending tracing to hierarchical RCU

Add tracing to __rcu_pending() to provide information on why RCU
processing was kicked off.  This is helpful for debugging hierarchical
RCU, and might also be helpful in learning how hierarchical RCU operates.

Located-by: Anton Blanchard <anton@au1.ibm.com>
Tested-by: Anton Blanchard <anton@au1.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: anton@samba.org
Cc: akpm@linux-foundation.org
Cc: dipankar@in.ibm.com
Cc: manfred@colorfullife.com
Cc: cl@linux-foundation.org
Cc: josht@linux.vnet.ibm.com
Cc: schamp@sgi.com
Cc: niv@us.ibm.com
Cc: dvhltc@us.ibm.com
Cc: ego@in.ibm.com
Cc: laijs@cn.fujitsu.com
Cc: rostedt@goodmis.org
Cc: peterz@infradead.org
Cc: penberg@cs.helsinki.fi
Cc: andi@firstfloor.org
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
LKML-Reference: <1239683479943-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutree.h |  9 ++++++-
 kernel/rcutree.c        | 25 ++++++++++++++-----
 kernel/rcutree_trace.c  | 64 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 90 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 58b2aa5312b..5a5153806c4 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -161,8 +161,15 @@ struct rcu_data {
 	unsigned long offline_fqs;	/* Kicked due to being offline. */
 	unsigned long resched_ipi;	/* Sent a resched IPI. */
 
-	/* 5) For future __rcu_pending statistics. */
+	/* 5) __rcu_pending() statistics. */
 	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rp_qs_pending;
+	long n_rp_cb_ready;
+	long n_rp_cpu_needs_gp;
+	long n_rp_gp_completed;
+	long n_rp_gp_started;
+	long n_rp_need_fqs;
+	long n_rp_need_nothing;
 
 	int cpu;
 };
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d2a372fb0b9..0dccfbba6d2 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	check_cpu_stall(rsp, rdp);
 
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
-	if (rdp->qs_pending)
+	if (rdp->qs_pending) {
+		rdp->n_rp_qs_pending++;
 		return 1;
+	}
 
 	/* Does this CPU have callbacks ready to invoke? */
-	if (cpu_has_callbacks_ready_to_invoke(rdp))
+	if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+		rdp->n_rp_cb_ready++;
 		return 1;
+	}
 
 	/* Has RCU gone idle with this CPU needing another grace period? */
-	if (cpu_needs_another_gp(rsp, rdp))
+	if (cpu_needs_another_gp(rsp, rdp)) {
+		rdp->n_rp_cpu_needs_gp++;
 		return 1;
+	}
 
 	/* Has another RCU grace period completed?  */
-	if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+	if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
+		rdp->n_rp_gp_completed++;
 		return 1;
+	}
 
 	/* Has a new RCU grace period started? */
-	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
+		rdp->n_rp_gp_started++;
 		return 1;
+	}
 
 	/* Has an RCU GP gone long enough to send resched IPIs &c? */
 	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
-	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
+	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
+		rdp->n_rp_need_fqs++;
 		return 1;
+	}
 
 	/* nothing to do */
+	rdp->n_rp_need_nothing++;
 	return 0;
 }
 
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4b1875ba940..fe1dcdbf1ca 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = {
 	.release = single_release,
 };
 
-static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
+{
+	seq_printf(m, "%3d%cnp=%ld "
+		   "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
+		   rdp->n_rcu_pending,
+		   rdp->n_rp_qs_pending,
+		   rdp->n_rp_cb_ready,
+		   rdp->n_rp_cpu_needs_gp,
+		   rdp->n_rp_gp_completed,
+		   rdp->n_rp_gp_started,
+		   rdp->n_rp_need_fqs,
+		   rdp->n_rp_need_nothing);
+}
+
+static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
+{
+	int cpu;
+	struct rcu_data *rdp;
+
+	for_each_possible_cpu(cpu) {
+		rdp = rsp->rda[cpu];
+		if (rdp->beenonline)
+			print_one_rcu_pending(m, rdp);
+	}
+}
+
+static int show_rcu_pending(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	print_rcu_pendings(m, &rcu_state);
+	seq_puts(m, "rcu_bh:\n");
+	print_rcu_pendings(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcu_pending_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcu_pending, NULL);
+}
+
+static struct file_operations rcu_pending_fops = {
+	.owner = THIS_MODULE,
+	.open = rcu_pending_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *rcudir;
+static struct dentry *datadir;
+static struct dentry *datadir_csv;
+static struct dentry *gpdir;
+static struct dentry *hierdir;
+static struct dentry *rcu_pendingdir;
+
 static int __init rcuclassic_trace_init(void)
 {
 	rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void)
 						NULL, &rcuhier_fops);
 	if (!hierdir)
 		goto free_out;
+
+	rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
+						NULL, &rcu_pending_fops);
+	if (!rcu_pendingdir)
+		goto free_out;
 	return 0;
 free_out:
 	if (datadir)
@@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void)
 	debugfs_remove(datadir_csv);
 	debugfs_remove(gpdir);
 	debugfs_remove(hierdir);
+	debugfs_remove(rcu_pendingdir);
 	debugfs_remove(rcudir);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 02bec490450836ebbd628e97ec03f10b57def8ce Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Tue, 24 Mar 2009 12:24:35 +0100
Subject: ALSA: lx6464es - driver for the digigram lx6464es interface

prototype of a driver for the digigram lx6464es 64 channel ethersound
interface.

Signed-off-by: Tim Blechmann <tim@klingt.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/pci_ids.h       |    5 +
 sound/pci/Kconfig             |   10 +
 sound/pci/Makefile            |    1 +
 sound/pci/lx6464es/Makefile   |    2 +
 sound/pci/lx6464es/lx6464es.c | 1152 ++++++++++++++++++++++++++++++++
 sound/pci/lx6464es/lx6464es.h |  114 ++++
 sound/pci/lx6464es/lx_core.c  | 1442 +++++++++++++++++++++++++++++++++++++++++
 sound/pci/lx6464es/lx_core.h  |  242 +++++++
 sound/pci/lx6464es/lx_defs.h  |  376 +++++++++++
 9 files changed, 3344 insertions(+)
 create mode 100644 sound/pci/lx6464es/Makefile
 create mode 100644 sound/pci/lx6464es/lx6464es.c
 create mode 100644 sound/pci/lx6464es/lx6464es.h
 create mode 100644 sound/pci/lx6464es/lx_core.c
 create mode 100644 sound/pci/lx6464es/lx_core.h
 create mode 100644 sound/pci/lx6464es/lx_defs.h

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ee98cd57088..2b1a69598e7 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1005,6 +1005,7 @@
 #define PCI_DEVICE_ID_PLX_PCI200SYN	0x3196
 #define PCI_DEVICE_ID_PLX_9030          0x9030
 #define PCI_DEVICE_ID_PLX_9050		0x9050
+#define PCI_DEVICE_ID_PLX_9056		0x9056
 #define PCI_DEVICE_ID_PLX_9080		0x9080
 #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2	0xa001
 
@@ -1847,6 +1848,10 @@
 #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO	0x0107
 #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2	0x0108
 
+#define PCI_VENDOR_ID_DIGIGRAM		0x1369
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM	0xc001
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM	0xc002
+
 #define PCI_VENDOR_ID_KAWASAKI		0x136b
 #define PCI_DEVICE_ID_MCHIP_KL5A72002	0xff01
 
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 93422e3a3f0..e912b70b6f9 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -622,6 +622,16 @@ config SND_KORG1212
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-korg1212.
 
+config SND_LX6464ES
+	tristate "Digigram LX6464ES"
+	select SND_PCM
+	help
+	  Say Y here to include support for Digigram LX6464ES boards.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-lx6464es.
+
+
 config SND_MAESTRO3
 	tristate "ESS Allegro/Maestro3"
 	select SND_AC97_CODEC
diff --git a/sound/pci/Makefile b/sound/pci/Makefile
index 65b25d221cd..7d83e084dcf 100644
--- a/sound/pci/Makefile
+++ b/sound/pci/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_SND) += \
 	ca0106/ \
 	cs46xx/ \
 	cs5535audio/ \
+	lx6464es/ \
 	echoaudio/ \
 	emu10k1/ \
 	hda/ \
diff --git a/sound/pci/lx6464es/Makefile b/sound/pci/lx6464es/Makefile
new file mode 100644
index 00000000000..eb04a6c73d8
--- /dev/null
+++ b/sound/pci/lx6464es/Makefile
@@ -0,0 +1,2 @@
+snd-lx6464es-objs := lx6464es.o lx_core.o
+obj-$(CONFIG_SND_LX6464ES) += snd-lx6464es.o
diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
new file mode 100644
index 00000000000..7bc8b8caa99
--- /dev/null
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -0,0 +1,1152 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ *
+ * Copyright (c) 2008, 2009 Tim Blechmann <tim@klingt.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include <sound/initval.h>
+#include <sound/control.h>
+#include <sound/info.h>
+
+#include "lx6464es.h"
+
+MODULE_AUTHOR("Tim Blechmann");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("digigram lx6464es");
+MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}");
+
+
+static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+static const char card_name[] = "LX6464ES";
+
+
+#define PCI_DEVICE_ID_PLX_LX6464ES		PCI_DEVICE_ID_PLX_9056
+
+static struct pci_device_id snd_lx6464es_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES),
+	  .subvendor = PCI_VENDOR_ID_DIGIGRAM,
+	  .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM
+	},			/* LX6464ES */
+	{ PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES),
+	  .subvendor = PCI_VENDOR_ID_DIGIGRAM,
+	  .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM
+	},			/* LX6464ES-CAE */
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, snd_lx6464es_ids);
+
+
+
+/* PGO pour USERo dans le registre pci_0x06/loc_0xEC */
+#define CHIPSC_RESET_XILINX (1L<<16)
+
+
+/* alsa callbacks */
+static struct snd_pcm_hardware lx_caps = {
+	.info             = (SNDRV_PCM_INFO_MMAP |
+			     SNDRV_PCM_INFO_INTERLEAVED |
+			     SNDRV_PCM_INFO_MMAP_VALID |
+			     SNDRV_PCM_INFO_SYNC_START),
+	.formats	  = (SNDRV_PCM_FMTBIT_S16_LE |
+			     SNDRV_PCM_FMTBIT_S16_BE |
+			     SNDRV_PCM_FMTBIT_S24_3LE |
+			     SNDRV_PCM_FMTBIT_S24_3BE),
+	.rates            = (SNDRV_PCM_RATE_CONTINUOUS |
+			     SNDRV_PCM_RATE_8000_192000),
+	.rate_min         = 8000,
+	.rate_max         = 192000,
+	.channels_min     = 2,
+	.channels_max     = 64,
+	.buffer_bytes_max = 64*2*3*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER,
+	.period_bytes_min = (2*2*MICROBLAZE_IBL_MIN*2),
+	.period_bytes_max = (4*64*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER),
+	.periods_min      = 2,
+	.periods_max      = MAX_STREAM_BUFFER,
+};
+
+static int lx_set_granularity(struct lx6464es *chip, u32 gran);
+
+
+static int lx_hardware_open(struct lx6464es *chip,
+			    struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int channels = runtime->channels;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_pcm_uframes_t period_size = runtime->period_size;
+
+	snd_printd(LXP "allocating pipe for %d channels\n", channels);
+	err = lx_pipe_allocate(chip, 0, is_capture, channels);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "allocating pipe failed\n");
+		return err;
+	}
+
+	err = lx_set_granularity(chip, period_size);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "setting granularity to %ld failed\n",
+			   period_size);
+		return err;
+	}
+
+	return 0;
+}
+
+static int lx_hardware_start(struct lx6464es *chip,
+			     struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "setting stream format\n");
+	err = lx_stream_set_format(chip, runtime, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "setting stream format failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "starting pipe\n");
+	err = lx_pipe_start(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "starting pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "waiting for pipe to start\n");
+	err = lx_pipe_wait_for_start(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "waiting for pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_hardware_stop(struct lx6464es *chip,
+			    struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "pausing pipe\n");
+	err = lx_pipe_pause(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "pausing pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "waiting for pipe to become idle\n");
+	err = lx_pipe_wait_for_idle(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "waiting for pipe failed\n");
+		return err;
+	}
+
+	snd_printd(LXP "stopping pipe\n");
+	err = lx_pipe_stop(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(LXP "stopping pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_hardware_close(struct lx6464es *chip,
+			     struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printd(LXP "releasing pipe\n");
+	err = lx_pipe_release(chip, 0, is_capture);
+	if (err < 0) {
+		snd_printk(LXP "releasing pipe failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+
+static int lx_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	int err = 0;
+	int board_rate;
+
+	snd_printdd("->lx_pcm_open\n");
+	mutex_lock(&chip->setup_mutex);
+
+	/* copy the struct snd_pcm_hardware struct */
+	runtime->hw = lx_caps;
+
+#if 0
+	/* buffer-size should better be multiple of period-size */
+	err = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not constrain periods\n");
+		goto exit;
+	}
+#endif
+
+	/* the clock rate cannot be changed */
+	board_rate = chip->board_sample_rate;
+	err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE,
+					   board_rate, board_rate);
+
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not constrain periods\n");
+		goto exit;
+	}
+
+	/* constrain period size */
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+					   MICROBLAZE_IBL_MIN,
+					   MICROBLAZE_IBL_MAX);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP
+			   "could not constrain period size\n");
+		goto exit;
+	}
+
+	snd_pcm_hw_constraint_step(runtime, 0,
+				   SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 32);
+
+	snd_pcm_set_sync(substream);
+	err = 0;
+
+exit:
+	runtime->private_data = chip;
+
+	mutex_unlock(&chip->setup_mutex);
+	snd_printdd("<-lx_pcm_open, %d\n", err);
+	return err;
+}
+
+static int lx_pcm_close(struct snd_pcm_substream *substream)
+{
+	int err = 0;
+	snd_printdd("->lx_pcm_close\n");
+	return err;
+}
+
+static snd_pcm_uframes_t lx_pcm_stream_pointer(struct snd_pcm_substream
+					       *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	snd_pcm_uframes_t pos;
+	unsigned long flags;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	struct lx_stream *lx_stream = is_capture ? &chip->capture_stream :
+		&chip->playback_stream;
+
+	snd_printdd("->lx_pcm_stream_pointer\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+	pos = lx_stream->frame_pos * substream->runtime->period_size;
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	snd_printdd(LXP "stream_pointer at %ld\n", pos);
+	return pos;
+}
+
+static int lx_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+	const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printdd("->lx_pcm_prepare\n");
+
+	mutex_lock(&chip->setup_mutex);
+
+	if (chip->hardware_running[is_capture]) {
+		err = lx_hardware_stop(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to stop hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		err = lx_hardware_close(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to close hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+	}
+
+	snd_printd(LXP "opening hardware\n");
+	err = lx_hardware_open(chip, substream);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "failed to open hardware. "
+			   "Error code %d\n", err);
+		goto exit;
+	}
+
+	err = lx_hardware_start(chip, substream);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "failed to start hardware. "
+			   "Error code %d\n", err);
+		goto exit;
+	}
+
+	chip->hardware_running[is_capture] = 1;
+
+	if (chip->board_sample_rate != substream->runtime->rate) {
+		if (!err)
+			chip->board_sample_rate = substream->runtime->rate;
+	}
+
+exit:
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static int lx_pcm_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *hw_params, int is_capture)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+
+	snd_printdd("->lx_pcm_hw_params\n");
+
+	mutex_lock(&chip->setup_mutex);
+
+	/* set dma buffer */
+	err = snd_pcm_lib_malloc_pages(substream,
+				       params_buffer_bytes(hw_params));
+
+	if (is_capture)
+		chip->capture_stream.stream = substream;
+	else
+		chip->playback_stream.stream = substream;
+
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static int lx_pcm_hw_params_playback(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	return lx_pcm_hw_params(substream, hw_params, 0);
+}
+
+static int lx_pcm_hw_params_capture(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *hw_params)
+{
+	return lx_pcm_hw_params(substream, hw_params, 1);
+}
+
+static int lx_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	int err = 0;
+	int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+
+	snd_printdd("->lx_pcm_hw_free\n");
+	mutex_lock(&chip->setup_mutex);
+
+	if (chip->hardware_running[is_capture]) {
+		err = lx_hardware_stop(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to stop hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		err = lx_hardware_close(chip, substream);
+		if (err < 0) {
+			snd_printk(KERN_ERR LXP "failed to close hardware. "
+				   "Error code %d\n", err);
+			goto exit;
+		}
+
+		chip->hardware_running[is_capture] = 0;
+	}
+
+	err = snd_pcm_lib_free_pages(substream);
+
+	if (is_capture)
+		chip->capture_stream.stream = 0;
+	else
+		chip->playback_stream.stream = 0;
+
+exit:
+	mutex_unlock(&chip->setup_mutex);
+	return err;
+}
+
+static void lx_trigger_start(struct lx6464es *chip, struct lx_stream *lx_stream)
+{
+	struct snd_pcm_substream *substream = lx_stream->stream;
+	const int is_capture = lx_stream->is_capture;
+
+	int err;
+
+	const u32 channels = substream->runtime->channels;
+	const u32 bytes_per_frame = channels * 3;
+	const u32 period_size = substream->runtime->period_size;
+	const u32 periods = substream->runtime->periods;
+	const u32 period_bytes = period_size * bytes_per_frame;
+
+	dma_addr_t buf = substream->dma_buffer.addr;
+	int i;
+
+	u32 needed, freed;
+	u32 size_array[5];
+
+	for (i = 0; i != periods; ++i) {
+		u32 buffer_index = 0;
+
+		err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed,
+				    size_array);
+		snd_printdd(LXP "starting: needed %d, freed %d\n",
+			    needed, freed);
+
+		err = lx_buffer_give(chip, 0, is_capture, period_bytes,
+				     lower_32_bits(buf), upper_32_bits(buf),
+				     &buffer_index);
+
+		snd_printdd(LXP "starting: buffer index %x on %p (%d bytes)\n",
+			    buffer_index, (void *)buf, period_bytes);
+		buf += period_bytes;
+	}
+
+	err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array);
+	snd_printdd(LXP "starting: needed %d, freed %d\n", needed, freed);
+
+	snd_printd(LXP "starting: starting stream\n");
+	err = lx_stream_start(chip, 0, is_capture);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP "couldn't start stream\n");
+	else
+		lx_stream->status = LX_STREAM_STATUS_RUNNING;
+
+	lx_stream->frame_pos = 0;
+}
+
+static void lx_trigger_stop(struct lx6464es *chip, struct lx_stream *lx_stream)
+{
+	const int is_capture = lx_stream->is_capture;
+	int err;
+
+	snd_printd(LXP "stopping: stopping stream\n");
+	err = lx_stream_stop(chip, 0, is_capture);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP "couldn't stop stream\n");
+	else
+		lx_stream->status = LX_STREAM_STATUS_FREE;
+
+}
+
+static void lx_trigger_tasklet_dispatch_stream(struct lx6464es *chip,
+					       struct lx_stream *lx_stream)
+{
+	switch (lx_stream->status) {
+	case LX_STREAM_STATUS_SCHEDULE_RUN:
+		lx_trigger_start(chip, lx_stream);
+		break;
+
+	case LX_STREAM_STATUS_SCHEDULE_STOP:
+		lx_trigger_stop(chip, lx_stream);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void lx_trigger_tasklet(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	unsigned long flags;
+
+	snd_printdd("->lx_trigger_tasklet\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+	lx_trigger_tasklet_dispatch_stream(chip, &chip->capture_stream);
+	lx_trigger_tasklet_dispatch_stream(chip, &chip->playback_stream);
+	spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static int lx_pcm_trigger_dispatch(struct lx6464es *chip,
+				   struct lx_stream *lx_stream, int cmd)
+{
+	int err = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		lx_stream->status = LX_STREAM_STATUS_SCHEDULE_RUN;
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		lx_stream->status = LX_STREAM_STATUS_SCHEDULE_STOP;
+		break;
+
+	default:
+		err = -EINVAL;
+		goto exit;
+	}
+	tasklet_schedule(&chip->trigger_tasklet);
+
+exit:
+	return err;
+}
+
+
+static int lx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct lx6464es *chip = snd_pcm_substream_chip(substream);
+	const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE);
+	struct lx_stream *stream = is_capture ? &chip->capture_stream :
+		&chip->playback_stream;
+
+	snd_printdd("->lx_pcm_trigger\n");
+
+	return lx_pcm_trigger_dispatch(chip, stream, cmd);
+}
+
+static int snd_lx6464es_free(struct lx6464es *chip)
+{
+	snd_printdd("->snd_lx6464es_free\n");
+
+	lx_irq_disable(chip);
+
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
+
+	iounmap(chip->port_dsp_bar);
+	ioport_unmap(chip->port_plx_remapped);
+
+	pci_release_regions(chip->pci);
+	pci_disable_device(chip->pci);
+
+	kfree(chip);
+
+	return 0;
+}
+
+static int snd_lx6464es_dev_free(struct snd_device *device)
+{
+	return snd_lx6464es_free(device->device_data);
+}
+
+/* reset the dsp during initialization */
+static int __devinit lx_init_xilinx_reset(struct lx6464es *chip)
+{
+	int i;
+	u32 plx_reg = lx_plx_reg_read(chip, ePLX_CHIPSC);
+
+	snd_printdd("->lx_init_xilinx_reset\n");
+
+	/* activate reset of xilinx */
+	plx_reg &= ~CHIPSC_RESET_XILINX;
+
+	lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg);
+	msleep(1);
+
+	lx_plx_reg_write(chip, ePLX_MBOX3, 0);
+	msleep(1);
+
+	plx_reg |= CHIPSC_RESET_XILINX;
+	lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg);
+
+	/* deactivate reset of xilinx */
+	for (i = 0; i != 100; ++i) {
+		u32 reg_mbox3;
+		msleep(10);
+		reg_mbox3 = lx_plx_reg_read(chip, ePLX_MBOX3);
+		if (reg_mbox3) {
+			snd_printd(LXP "xilinx reset done\n");
+			snd_printdd(LXP "xilinx took %d loops\n", i);
+			break;
+		}
+	}
+
+	/* todo: add some error handling? */
+
+	/* clear mr */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	/* le xilinx ES peut ne pas etre encore pret, on attend. */
+	msleep(600);
+
+	return 0;
+}
+
+static int __devinit lx_init_xilinx_test(struct lx6464es *chip)
+{
+	u32 reg;
+
+	snd_printdd("->lx_init_xilinx_test\n");
+
+	/* TEST if we have access to Xilinx/MicroBlaze */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	reg = lx_dsp_reg_read(chip, eReg_CSM);
+
+	if (reg) {
+		snd_printk(KERN_ERR LXP "Problem: Reg_CSM %x.\n", reg);
+
+		/* PCI9056_SPACE0_REMAP */
+		lx_plx_reg_write(chip, ePLX_PCICR, 1);
+
+		reg = lx_dsp_reg_read(chip, eReg_CSM);
+		if (reg) {
+			snd_printk(KERN_ERR LXP "Error: Reg_CSM %x.\n", reg);
+			return -EAGAIN; /* seems to be appropriate */
+		}
+	}
+
+	snd_printd(LXP "Xilinx/MicroBlaze access test successful\n");
+
+	return 0;
+}
+
+/* initialize ethersound */
+static int __devinit lx_init_ethersound_config(struct lx6464es *chip)
+{
+	int i;
+	u32 orig_conf_es = lx_dsp_reg_read(chip, eReg_CONFES);
+
+	u32 default_conf_es = (64 << IOCR_OUTPUTS_OFFSET) |
+		(64 << IOCR_INPUTS_OFFSET) |
+		(FREQ_RATIO_SINGLE_MODE << FREQ_RATIO_OFFSET);
+
+	u32 conf_es = (orig_conf_es & CONFES_READ_PART_MASK)
+		| (default_conf_es & CONFES_WRITE_PART_MASK);
+
+	snd_printdd("->lx_init_ethersound\n");
+
+	chip->freq_ratio = FREQ_RATIO_SINGLE_MODE;
+
+	/*
+	 * write it to the card !
+	 * this actually kicks the ES xilinx, the first time since poweron.
+	 * the MAC address in the Reg_ADMACESMSB Reg_ADMACESLSB registers
+	 * is not ready before this is done, and the bit 2 in Reg_CSES is set.
+	 * */
+	lx_dsp_reg_write(chip, eReg_CONFES, conf_es);
+
+	for (i = 0; i != 1000; ++i) {
+		if (lx_dsp_reg_read(chip, eReg_CSES) & 4) {
+			snd_printd(LXP "ethersound initialized after %dms\n",
+				   i);
+			goto ethersound_initialized;
+		}
+		msleep(1);
+	}
+	snd_printk(KERN_WARNING LXP
+		   "ethersound could not be initialized after %dms\n", i);
+	return -ETIMEDOUT;
+
+ ethersound_initialized:
+	snd_printd(LXP "ethersound initialized\n");
+	return 0;
+}
+
+static int __devinit lx_init_get_version_features(struct lx6464es *chip)
+{
+	u32 dsp_version;
+
+	int err;
+
+	snd_printdd("->lx_init_get_version_features\n");
+
+	err = lx_dsp_get_version(chip, &dsp_version);
+
+	if (err == 0) {
+		u32 freq;
+
+		snd_printk(LXP "DSP version: V%02d.%02d #%d\n",
+			   (dsp_version>>16) & 0xff, (dsp_version>>8) & 0xff,
+			   dsp_version & 0xff);
+
+		/* later: what firmware version do we expect? */
+
+		/* retrieve Play/Rec features */
+		/* done here because we may have to handle alternate
+		 * DSP files. */
+		/* later */
+
+		/* init the EtherSound sample rate */
+		err = lx_dsp_get_clock_frequency(chip, &freq);
+		if (err == 0)
+			chip->board_sample_rate = freq;
+		snd_printd(LXP "actual clock frequency %d\n", freq);
+	} else {
+		snd_printk(KERN_ERR LXP "DSP corrupted \n");
+		err = -EAGAIN;
+	}
+
+	return err;
+}
+
+static int lx_set_granularity(struct lx6464es *chip, u32 gran)
+{
+	int err = 0;
+	u32 snapped_gran = MICROBLAZE_IBL_MIN;
+
+	snd_printdd("->lx_set_granularity\n");
+
+	/* blocksize is a power of 2 */
+	while ((snapped_gran < gran) &&
+	       (snapped_gran < MICROBLAZE_IBL_MAX)) {
+		snapped_gran *= 2;
+	}
+
+	if (snapped_gran == chip->pcm_granularity)
+		return 0;
+
+	err = lx_dsp_set_granularity(chip, snapped_gran);
+	if (err < 0) {
+		snd_printk(KERN_WARNING LXP "could not set granularity\n");
+		err = -EAGAIN;
+	}
+
+	if (snapped_gran != gran)
+		snd_printk(LXP "snapped blocksize to %d\n", snapped_gran);
+
+	snd_printd(LXP "set blocksize on board %d\n", snapped_gran);
+	chip->pcm_granularity = snapped_gran;
+
+	return err;
+}
+
+/* initialize and test the xilinx dsp chip */
+static int __devinit lx_init_dsp(struct lx6464es *chip)
+{
+	int err;
+	u8 mac_address[6];
+	int i;
+
+	snd_printdd("->lx_init_dsp\n");
+
+	snd_printd(LXP "initialize board\n");
+	err = lx_init_xilinx_reset(chip);
+	if (err)
+		return err;
+
+	snd_printd(LXP "testing board\n");
+	err = lx_init_xilinx_test(chip);
+	if (err)
+		return err;
+
+	snd_printd(LXP "initialize ethersound configuration\n");
+	err = lx_init_ethersound_config(chip);
+	if (err)
+		return err;
+
+	lx_irq_enable(chip);
+
+	/** \todo the mac address should be ready by not, but it isn't,
+	 *  so we wait for it */
+	for (i = 0; i != 1000; ++i) {
+		err = lx_dsp_get_mac(chip, mac_address);
+		if (err)
+			return err;
+		if (mac_address[0] || mac_address[1] || mac_address[2] ||
+		    mac_address[3] || mac_address[4] || mac_address[5])
+			goto mac_ready;
+		msleep(1);
+	}
+	return -ETIMEDOUT;
+
+mac_ready:
+	snd_printd(LXP "mac address ready read after: %dms\n", i);
+	snd_printk(LXP "mac address: %02X.%02X.%02X.%02X.%02X.%02X\n",
+		   mac_address[0], mac_address[1], mac_address[2],
+		   mac_address[3], mac_address[4], mac_address[5]);
+
+	err = lx_init_get_version_features(chip);
+	if (err)
+		return err;
+
+	lx_set_granularity(chip, MICROBLAZE_IBL_DEFAULT);
+
+	chip->playback_mute = 0;
+
+	return err;
+}
+
+static struct snd_pcm_ops lx_ops_playback = {
+	.open      = lx_pcm_open,
+	.close     = lx_pcm_close,
+	.ioctl     = snd_pcm_lib_ioctl,
+	.prepare   = lx_pcm_prepare,
+	.hw_params = lx_pcm_hw_params_playback,
+	.hw_free   = lx_pcm_hw_free,
+	.trigger   = lx_pcm_trigger,
+	.pointer   = lx_pcm_stream_pointer,
+};
+
+static struct snd_pcm_ops lx_ops_capture = {
+	.open      = lx_pcm_open,
+	.close     = lx_pcm_close,
+	.ioctl     = snd_pcm_lib_ioctl,
+	.prepare   = lx_pcm_prepare,
+	.hw_params = lx_pcm_hw_params_capture,
+	.hw_free   = lx_pcm_hw_free,
+	.trigger   = lx_pcm_trigger,
+	.pointer   = lx_pcm_stream_pointer,
+};
+
+static int __devinit lx_pcm_create(struct lx6464es *chip)
+{
+	int err;
+	struct snd_pcm *pcm;
+
+	u32 size = 64 *		     /* channels */
+		3 *		     /* 24 bit samples */
+		MAX_STREAM_BUFFER *  /* periods */
+		MICROBLAZE_IBL_MAX * /* frames per period */
+		2;		     /* duplex */
+
+	size = PAGE_ALIGN(size);
+
+	/* hardcoded device name & channel count */
+	err = snd_pcm_new(chip->card, (char *)card_name, 0,
+			  1, 1, &pcm);
+
+	pcm->private_data = chip;
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &lx_ops_playback);
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &lx_ops_capture);
+
+	pcm->info_flags = 0;
+	strcpy(pcm->name, card_name);
+
+	err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+						    snd_dma_pci_data(chip->pci),
+						    size, size);
+	if (err < 0)
+		return err;
+
+	chip->pcm = pcm;
+	chip->capture_stream.is_capture = 1;
+
+	return 0;
+}
+
+static int lx_control_playback_info(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int lx_control_playback_get(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct lx6464es *chip = snd_kcontrol_chip(kcontrol);
+	ucontrol->value.integer.value[0] = chip->playback_mute;
+	return 0;
+}
+
+static int lx_control_playback_put(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct lx6464es *chip = snd_kcontrol_chip(kcontrol);
+	int changed = 0;
+	int current_value = chip->playback_mute;
+
+	if (current_value != ucontrol->value.integer.value[0]) {
+		lx_level_unmute(chip, 0, !current_value);
+		chip->playback_mute = !current_value;
+		changed = 1;
+	}
+	return changed;
+}
+
+static struct snd_kcontrol_new lx_control_playback_switch __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "PCM Playback Switch",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = lx_control_playback_info,
+	.get = lx_control_playback_get,
+	.put = lx_control_playback_put
+};
+
+
+
+static void lx_proc_levels_read(struct snd_info_entry *entry,
+				struct snd_info_buffer *buffer)
+{
+	u32 levels[64];
+	int err;
+	int i, j;
+	struct lx6464es *chip = entry->private_data;
+
+	snd_iprintf(buffer, "capture levels:\n");
+	err = lx_level_peaks(chip, 1, 64, levels);
+	if (err < 0)
+		return;
+
+	for (i = 0; i != 8; ++i) {
+		for (j = 0; j != 8; ++j)
+			snd_iprintf(buffer, "%08x ", levels[i*8+j]);
+		snd_iprintf(buffer, "\n");
+	}
+
+	snd_iprintf(buffer, "\nplayback levels:\n");
+
+	err = lx_level_peaks(chip, 0, 64, levels);
+	if (err < 0)
+		return;
+
+	for (i = 0; i != 8; ++i) {
+		for (j = 0; j != 8; ++j)
+			snd_iprintf(buffer, "%08x ", levels[i*8+j]);
+		snd_iprintf(buffer, "\n");
+	}
+
+	snd_iprintf(buffer, "\n");
+}
+
+static int __devinit lx_proc_create(struct snd_card *card, struct lx6464es *chip)
+{
+	struct snd_info_entry *entry;
+	int err = snd_card_proc_new(card, "levels", &entry);
+	if (err < 0)
+		return err;
+
+	snd_info_set_text_ops(entry, chip, lx_proc_levels_read);
+	return 0;
+}
+
+
+static int __devinit snd_lx6464es_create(struct snd_card *card,
+					 struct pci_dev *pci,
+					 struct lx6464es **rchip)
+{
+	struct lx6464es *chip;
+	int err;
+
+	static struct snd_device_ops ops = {
+		.dev_free = snd_lx6464es_dev_free,
+	};
+
+	snd_printdd("->snd_lx6464es_create\n");
+
+	*rchip = NULL;
+
+	/* enable PCI device */
+	err = pci_enable_device(pci);
+	if (err < 0)
+		return err;
+
+	pci_set_master(pci);
+
+	/* check if we can restrict PCI DMA transfers to 32 bits */
+	err = pci_set_dma_mask(pci, DMA_32BIT_MASK);
+	if (err < 0) {
+		snd_printk(KERN_ERR "architecture does not support "
+			   "32bit PCI busmaster DMA\n");
+		pci_disable_device(pci);
+		return -ENXIO;
+	}
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (chip == NULL) {
+		err = -ENOMEM;
+		goto alloc_failed;
+	}
+
+	chip->card = card;
+	chip->pci = pci;
+	chip->irq = -1;
+
+	/* initialize synchronization structs */
+	spin_lock_init(&chip->lock);
+	spin_lock_init(&chip->msg_lock);
+	mutex_init(&chip->setup_mutex);
+	tasklet_init(&chip->trigger_tasklet, lx_trigger_tasklet,
+		     (unsigned long)chip);
+	tasklet_init(&chip->tasklet_capture, lx_tasklet_capture,
+		     (unsigned long)chip);
+	tasklet_init(&chip->tasklet_playback, lx_tasklet_playback,
+		     (unsigned long)chip);
+
+	/* request resources */
+	err = pci_request_regions(pci, card_name);
+	if (err < 0)
+		goto request_regions_failed;
+
+	/* plx port */
+	chip->port_plx = pci_resource_start(pci, 1);
+	chip->port_plx_remapped = ioport_map(chip->port_plx,
+					     pci_resource_len(pci, 1));
+
+	/* dsp port */
+	chip->port_dsp_bar = pci_ioremap_bar(pci, 2);
+
+	err = request_irq(pci->irq, lx_interrupt, IRQF_SHARED,
+			  card_name, chip);
+	if (err) {
+		snd_printk(KERN_ERR LXP "unable to grab IRQ %d\n", pci->irq);
+		goto request_irq_failed;
+	}
+	chip->irq = pci->irq;
+
+	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+	if (err < 0)
+		goto device_new_failed;
+
+	err = lx_init_dsp(chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "error during DSP initialization\n");
+		return err;
+	}
+
+	err = lx_pcm_create(chip);
+	if (err < 0)
+		return err;
+
+	err = lx_proc_create(card, chip);
+	if (err < 0)
+		return err;
+
+	err = snd_ctl_add(card, snd_ctl_new1(&lx_control_playback_switch,
+					     chip));
+	if (err < 0)
+		return err;
+
+	snd_card_set_dev(card, &pci->dev);
+
+	*rchip = chip;
+	return 0;
+
+device_new_failed:
+	free_irq(pci->irq, chip);
+
+request_irq_failed:
+	pci_release_regions(pci);
+
+request_regions_failed:
+	kfree(chip);
+
+alloc_failed:
+	pci_disable_device(pci);
+
+	return err;
+}
+
+static int __devinit snd_lx6464es_probe(struct pci_dev *pci,
+					const struct pci_device_id *pci_id)
+{
+	static int dev;
+	struct snd_card *card;
+	struct lx6464es *chip;
+	int err;
+
+	snd_printdd("->snd_lx6464es_probe\n");
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+	if (!enable[dev]) {
+		dev++;
+		return -ENOENT;
+	}
+
+	card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+	if (card == NULL)
+		return -ENOMEM;
+
+	err = snd_lx6464es_create(card, pci, &chip);
+	if (err < 0) {
+		snd_printk(KERN_ERR LXP "error during snd_lx6464es_create\n");
+		goto out_free;
+	}
+
+	strcpy(card->driver, "lx6464es");
+	strcpy(card->shortname, "Digigram LX6464ES");
+	sprintf(card->longname, "%s at 0x%lx, 0x%p, irq %i",
+		card->shortname, chip->port_plx,
+		chip->port_dsp_bar, chip->irq);
+
+	err = snd_card_register(card);
+	if (err < 0)
+		goto out_free;
+
+	snd_printdd(LXP "initialization successful\n");
+	pci_set_drvdata(pci, card);
+	dev++;
+	return 0;
+
+out_free:
+	snd_card_free(card);
+	return err;
+
+}
+
+static void __devexit snd_lx6464es_remove(struct pci_dev *pci)
+{
+	snd_card_free(pci_get_drvdata(pci));
+	pci_set_drvdata(pci, NULL);
+}
+
+
+static struct pci_driver driver = {
+	.name =     "Digigram LX6464ES",
+	.id_table = snd_lx6464es_ids,
+	.probe =    snd_lx6464es_probe,
+	.remove = __devexit_p(snd_lx6464es_remove),
+};
+
+
+/* module initialization */
+static int __init mod_init(void)
+{
+	return pci_register_driver(&driver);
+}
+
+static void __exit mod_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h
new file mode 100644
index 00000000000..012c010c8c8
--- /dev/null
+++ b/sound/pci/lx6464es/lx6464es.h
@@ -0,0 +1,114 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX6464ES_H
+#define LX6464ES_H
+
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+
+#include "lx_core.h"
+
+#define LXP "LX6464ES: "
+
+enum {
+    ES_cmd_free         = 0,    /* no command executing */
+    ES_cmd_processing   = 1,	/* execution of a read/write command */
+    ES_read_pending     = 2,    /* a asynchron read command is pending */
+    ES_read_finishing   = 3,    /* a read command has finished waiting (set by
+				 * Interrupt or CancelIrp) */
+};
+
+enum lx_stream_status {
+	LX_STREAM_STATUS_FREE,
+/* 	LX_STREAM_STATUS_OPEN, */
+	LX_STREAM_STATUS_SCHEDULE_RUN,
+/* 	LX_STREAM_STATUS_STARTED, */
+	LX_STREAM_STATUS_RUNNING,
+	LX_STREAM_STATUS_SCHEDULE_STOP,
+/* 	LX_STREAM_STATUS_STOPPED, */
+/* 	LX_STREAM_STATUS_PAUSED */
+};
+
+
+struct lx_stream {
+	struct snd_pcm_substream  *stream;
+	snd_pcm_uframes_t          frame_pos;
+	enum lx_stream_status      status; /* free, open, running, draining
+					    * pause */
+	int                        is_capture:1;
+};
+
+
+struct lx6464es {
+	struct snd_card        *card;
+	struct pci_dev         *pci;
+	int			irq;
+
+	spinlock_t		lock;        /* interrupt spinlock */
+	struct mutex            setup_mutex; /* mutex used in hw_params, open
+					      * and close */
+
+	struct tasklet_struct   trigger_tasklet; /* trigger tasklet */
+	struct tasklet_struct   tasklet_capture;
+	struct tasklet_struct   tasklet_playback;
+
+	/* ports */
+	unsigned long		port_plx;	   /* io port (size=256) */
+	void __iomem           *port_plx_remapped; /* remapped plx port */
+	void __iomem           *port_dsp_bar;      /* memory port (32-bit,
+						    * non-prefetchable,
+						    * size=8K) */
+
+	/* messaging */
+	spinlock_t		msg_lock;          /* message spinlock */
+	atomic_t	        send_message_locked;
+	struct lx_rmh           rmh;
+
+	/* configuration */
+	uint			freq_ratio : 2;
+	uint                    playback_mute : 1;
+	uint                    hardware_running[2];
+	u32                     board_sample_rate; /* sample rate read from
+						    * board */
+	u32                     sample_rate;	   /* our sample rate */
+	u16                     pcm_granularity;   /* board blocksize */
+
+	/* dma */
+	struct snd_dma_buffer   capture_dma_buf;
+	struct snd_dma_buffer   playback_dma_buf;
+
+	/* pcm */
+	struct snd_pcm         *pcm;
+
+	/* streams */
+	struct lx_stream        capture_stream;
+	struct lx_stream        playback_stream;
+};
+
+
+#endif /* LX6464ES_H */
diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c
new file mode 100644
index 00000000000..a9f8f882b10
--- /dev/null
+++ b/sound/pci/lx6464es/lx_core.c
@@ -0,0 +1,1442 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * low-level interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* #define RMH_DEBUG 1 */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "lx6464es.h"
+#include "lx_core.h"
+
+/* low-level register access */
+
+static const unsigned long dsp_port_offsets[] = {
+	0,
+	0x400,
+	0x401,
+	0x402,
+	0x403,
+	0x404,
+	0x405,
+	0x406,
+	0x407,
+	0x408,
+	0x409,
+	0x40a,
+	0x40b,
+	0x40c,
+
+	0x410,
+	0x411,
+	0x412,
+	0x413,
+	0x414,
+	0x415,
+	0x416,
+
+	0x420,
+	0x430,
+	0x431,
+	0x432,
+	0x433,
+	0x434,
+	0x440
+};
+
+static void __iomem *lx_dsp_register(struct lx6464es *chip, int port)
+{
+	void __iomem *base_address = chip->port_dsp_bar;
+	return base_address + dsp_port_offsets[port]*4;
+}
+
+unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	return ioread32(address);
+}
+
+void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	memcpy_fromio(data, address, len*sizeof(u32));
+}
+
+
+void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	iowrite32(data, address);
+}
+
+void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data,
+			 u32 len)
+{
+	void __iomem *address = lx_dsp_register(chip, port);
+	memcpy_toio(address, data, len*sizeof(u32));
+}
+
+
+static const unsigned long plx_port_offsets[] = {
+	0x04,
+	0x40,
+	0x44,
+	0x48,
+	0x4c,
+	0x50,
+	0x54,
+	0x58,
+	0x5c,
+	0x64,
+	0x68,
+	0x6C
+};
+
+static void __iomem *lx_plx_register(struct lx6464es *chip, int port)
+{
+	void __iomem *base_address = chip->port_plx_remapped;
+	return base_address + plx_port_offsets[port];
+}
+
+unsigned long lx_plx_reg_read(struct lx6464es *chip, int port)
+{
+	void __iomem *address = lx_plx_register(chip, port);
+	return ioread32(address);
+}
+
+void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data)
+{
+	void __iomem *address = lx_plx_register(chip, port);
+	iowrite32(data, address);
+}
+
+u32 lx_plx_mbox_read(struct lx6464es *chip, int mbox_nr)
+{
+	int index;
+
+	switch (mbox_nr) {
+	case 1:
+		index = ePLX_MBOX1;    break;
+	case 2:
+		index = ePLX_MBOX2;    break;
+	case 3:
+		index = ePLX_MBOX3;    break;
+	case 4:
+		index = ePLX_MBOX4;    break;
+	case 5:
+		index = ePLX_MBOX5;    break;
+	case 6:
+		index = ePLX_MBOX6;    break;
+	case 7:
+		index = ePLX_MBOX7;    break;
+	case 0:			/* reserved for HF flags */
+		snd_BUG();
+	default:
+		return 0xdeadbeef;
+	}
+
+	return lx_plx_reg_read(chip, index);
+}
+
+int lx_plx_mbox_write(struct lx6464es *chip, int mbox_nr, u32 value)
+{
+	int index = -1;
+
+	switch (mbox_nr) {
+	case 1:
+		index = ePLX_MBOX1;    break;
+	case 3:
+		index = ePLX_MBOX3;    break;
+	case 4:
+		index = ePLX_MBOX4;    break;
+	case 5:
+		index = ePLX_MBOX5;    break;
+	case 6:
+		index = ePLX_MBOX6;    break;
+	case 7:
+		index = ePLX_MBOX7;    break;
+	case 0:			/* reserved for HF flags */
+	case 2:			/* reserved for Pipe States
+				 * the DSP keeps an image of it */
+		snd_BUG();
+		return -EBADRQC;
+	}
+
+	lx_plx_reg_write(chip, index, value);
+	return 0;
+}
+
+
+/* rmh */
+
+#ifdef CONFIG_SND_DEBUG
+#define CMD_NAME(a) a
+#else
+#define CMD_NAME(a) NULL
+#endif
+
+#define Reg_CSM_MR			0x00000002
+#define Reg_CSM_MC			0x00000001
+
+struct dsp_cmd_info {
+	u32    dcCodeOp;	/* Op Code of the command (usually 1st 24-bits
+				 * word).*/
+	u16    dcCmdLength;	/* Command length in words of 24 bits.*/
+	u16    dcStatusType;	/* Status type: 0 for fixed length, 1 for
+				 * random. */
+	u16    dcStatusLength;	/* Status length (if fixed).*/
+	char  *dcOpName;
+};
+
+/*
+  Initialization and control data for the Microblaze interface
+  - OpCode:
+    the opcode field of the command set at the proper offset
+  - CmdLength
+    the number of command words
+  - StatusType
+    offset in the status registers: 0 means that the return value may be
+    different from 0, and must be read
+  - StatusLength
+    the number of status words (in addition to the return value)
+*/
+
+static struct dsp_cmd_info dsp_commands[] =
+{
+	{ (CMD_00_INFO_DEBUG << OPCODE_OFFSET)			, 1 /*custom*/
+	  , 1	, 0 /**/		    , CMD_NAME("INFO_DEBUG") },
+	{ (CMD_01_GET_SYS_CFG << OPCODE_OFFSET) 		, 1 /**/
+	  , 1      , 2 /**/		    , CMD_NAME("GET_SYS_CFG") },
+	{ (CMD_02_SET_GRANULARITY << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_GRANULARITY") },
+	{ (CMD_03_SET_TIMER_IRQ << OPCODE_OFFSET)		, 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_TIMER_IRQ") },
+	{ (CMD_04_GET_EVENT << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 0 /*up to 10*/     , CMD_NAME("GET_EVENT") },
+	{ (CMD_05_GET_PIPES << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 2 /*up to 4*/      , CMD_NAME("GET_PIPES") },
+	{ (CMD_06_ALLOCATE_PIPE << OPCODE_OFFSET)		, 1 /**/
+	  , 0      , 0 /**/		    , CMD_NAME("ALLOCATE_PIPE") },
+	{ (CMD_07_RELEASE_PIPE << OPCODE_OFFSET)		, 1 /**/
+	  , 0      , 0 /**/		    , CMD_NAME("RELEASE_PIPE") },
+	{ (CMD_08_ASK_BUFFERS << OPCODE_OFFSET) 		, 1 /**/
+	  , 1      , MAX_STREAM_BUFFER  , CMD_NAME("ASK_BUFFERS") },
+	{ (CMD_09_STOP_PIPE << OPCODE_OFFSET)			, 1 /**/
+	  , 0      , 0 /*up to 2*/      , CMD_NAME("STOP_PIPE") },
+	{ (CMD_0A_GET_PIPE_SPL_COUNT << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 1 /*up to 2*/      , CMD_NAME("GET_PIPE_SPL_COUNT") },
+	{ (CMD_0B_TOGGLE_PIPE_STATE << OPCODE_OFFSET)           , 1 /*up to 5*/
+	  , 1      , 0 /**/		    , CMD_NAME("TOGGLE_PIPE_STATE") },
+	{ (CMD_0C_DEF_STREAM << OPCODE_OFFSET)			, 1 /*up to 4*/
+	  , 1      , 0 /**/		    , CMD_NAME("DEF_STREAM") },
+	{ (CMD_0D_SET_MUTE  << OPCODE_OFFSET)			, 3 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_MUTE") },
+	{ (CMD_0E_GET_STREAM_SPL_COUNT << OPCODE_OFFSET)        , 1/**/
+	  , 1      , 2 /**/		    , CMD_NAME("GET_STREAM_SPL_COUNT") },
+	{ (CMD_0F_UPDATE_BUFFER << OPCODE_OFFSET)		, 3 /*up to 4*/
+	  , 0      , 1 /**/		    , CMD_NAME("UPDATE_BUFFER") },
+	{ (CMD_10_GET_BUFFER << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 4 /**/		    , CMD_NAME("GET_BUFFER") },
+	{ (CMD_11_CANCEL_BUFFER << OPCODE_OFFSET)		, 1 /**/
+	  , 1      , 1 /*up to 4*/      , CMD_NAME("CANCEL_BUFFER") },
+	{ (CMD_12_GET_PEAK << OPCODE_OFFSET)			, 1 /**/
+	  , 1      , 1 /**/		    , CMD_NAME("GET_PEAK") },
+	{ (CMD_13_SET_STREAM_STATE << OPCODE_OFFSET)	        , 1 /**/
+	  , 1      , 0 /**/		    , CMD_NAME("SET_STREAM_STATE") },
+};
+
+static void lx_message_init(struct lx_rmh *rmh, enum cmd_mb_opcodes cmd)
+{
+	snd_BUG_ON(cmd >= CMD_14_INVALID);
+
+	rmh->cmd[0] = dsp_commands[cmd].dcCodeOp;
+	rmh->cmd_len = dsp_commands[cmd].dcCmdLength;
+	rmh->stat_len = dsp_commands[cmd].dcStatusLength;
+	rmh->dsp_stat = dsp_commands[cmd].dcStatusType;
+	rmh->cmd_idx = cmd;
+	memset(&rmh->cmd[1], 0, (REG_CRM_NUMBER - 1) * sizeof(u32));
+
+#ifdef CONFIG_SND_DEBUG
+	memset(rmh->stat, 0, REG_CRM_NUMBER * sizeof(u32));
+#endif
+#ifdef RMH_DEBUG
+	rmh->cmd_idx = cmd;
+#endif
+}
+
+#ifdef RMH_DEBUG
+#define LXRMH "lx6464es rmh: "
+static void lx_message_dump(struct lx_rmh *rmh)
+{
+	u8 idx = rmh->cmd_idx;
+	int i;
+
+	snd_printk(LXRMH "command %s\n", dsp_commands[idx].dcOpName);
+
+	for (i = 0; i != rmh->cmd_len; ++i)
+		snd_printk(LXRMH "\tcmd[%d] %08x\n", i, rmh->cmd[i]);
+
+	for (i = 0; i != rmh->stat_len; ++i)
+		snd_printk(LXRMH "\tstat[%d]: %08x\n", i, rmh->stat[i]);
+	snd_printk("\n");
+}
+#else
+static inline void lx_message_dump(struct lx_rmh *rmh)
+{}
+#endif
+
+
+
+/* sleep 500 - 100 = 400 times 100us -> the timeout is >= 40 ms */
+#define XILINX_TIMEOUT_MS       40
+#define XILINX_POLL_NO_SLEEP    100
+#define XILINX_POLL_ITERATIONS  150
+
+static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh)
+{
+	u32 reg = ED_DSP_TIMED_OUT;
+	int dwloop;
+	int answer_received;
+
+	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
+		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
+		return -EBUSY;
+	}
+
+	/* write command */
+	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
+
+	snd_BUG_ON(atomic_read(&chip->send_message_locked) != 0);
+	atomic_set(&chip->send_message_locked, 1);
+
+	/* MicoBlaze gogogo */
+	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
+
+	/* wait for interrupt to answer */
+	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS; ++dwloop) {
+		answer_received = atomic_read(&chip->send_message_locked);
+		if (answer_received == 0)
+			break;
+		msleep(1);
+	}
+
+	if (answer_received == 0) {
+		/* in Debug mode verify Reg_CSM_MR */
+		snd_BUG_ON(!(lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR));
+
+		/* command finished, read status */
+		if (rmh->dsp_stat == 0)
+			reg = lx_dsp_reg_read(chip, eReg_CRM1);
+		else
+			reg = 0;
+	} else {
+		int i;
+		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
+			   "Interrupts disabled?\n");
+
+		/* attente bit Reg_CSM_MR */
+		for (i = 0; i != XILINX_POLL_ITERATIONS; i++) {
+			if ((lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)) {
+				if (rmh->dsp_stat == 0)
+					reg = lx_dsp_reg_read(chip, eReg_CRM1);
+				else
+					reg = 0;
+				goto polling_successful;
+			}
+
+			if (i > XILINX_POLL_NO_SLEEP)
+				msleep(1);
+		}
+		snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! "
+			   "polling failed\n");
+
+polling_successful:
+		atomic_set(&chip->send_message_locked, 0);
+	}
+
+	if ((reg & ERROR_VALUE) == 0) {
+		/* read response */
+		if (rmh->stat_len) {
+			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
+
+			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
+					   rmh->stat_len);
+		}
+	} else
+		snd_printk(KERN_WARNING LXP "lx_message_send: error_value %x\n",
+			   reg);
+
+	/* clear Reg_CSM_MR */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	switch (reg) {
+	case ED_DSP_TIMED_OUT:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
+		return -ETIMEDOUT;
+
+	case ED_DSP_CRASHED:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
+		return -EAGAIN;
+	}
+
+	lx_message_dump(rmh);
+	return 0;
+}
+
+static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh)
+{
+	u32 reg = ED_DSP_TIMED_OUT;
+	int dwloop;
+
+	if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) {
+		snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg);
+		return -EBUSY;
+	}
+
+	/* write command */
+	lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len);
+
+	/* MicoBlaze gogogo */
+	lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC);
+
+	/* wait for interrupt to answer */
+	for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS * 1000; ++dwloop) {
+		if (lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR) {
+			if (rmh->dsp_stat == 0)
+				reg = lx_dsp_reg_read(chip, eReg_CRM1);
+			else
+				reg = 0;
+			goto polling_successful;
+		} else
+			udelay(1);
+	}
+	snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send_atomic! "
+		   "polling failed\n");
+
+polling_successful:
+	if ((reg & ERROR_VALUE) == 0) {
+		/* read response */
+		if (rmh->stat_len) {
+			snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1));
+			lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat,
+					   rmh->stat_len);
+		}
+	} else
+		snd_printk(LXP "rmh error: %08x\n", reg);
+
+	/* clear Reg_CSM_MR */
+	lx_dsp_reg_write(chip, eReg_CSM, 0);
+
+	switch (reg) {
+	case ED_DSP_TIMED_OUT:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n");
+		return -ETIMEDOUT;
+
+	case ED_DSP_CRASHED:
+		snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n");
+		return -EAGAIN;
+	}
+
+	lx_message_dump(rmh);
+
+	return reg;
+}
+
+
+/* low-level dsp access */
+int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version)
+{
+	u16 ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG);
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	*rdsp_version = chip->rmh.stat[1];
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq)
+{
+	u16 ret = 0;
+	unsigned long flags;
+	u32 freq_raw = 0;
+	u32 freq = 0;
+	u32 frequency = 0;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG);
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (ret == 0) {
+		freq_raw = chip->rmh.stat[0] >> FREQ_FIELD_OFFSET;
+		freq = freq_raw & XES_FREQ_COUNT8_MASK;
+
+		if ((freq < XES_FREQ_COUNT8_48_MAX) ||
+		    (freq > XES_FREQ_COUNT8_44_MIN))
+			frequency = 0; /* unknown */
+		else if (freq >= XES_FREQ_COUNT8_44_MAX)
+			frequency = 44100;
+		else
+			frequency = 48000;
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	*rfreq = frequency * chip->freq_ratio;
+
+	return ret;
+}
+
+int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address)
+{
+	u32 macmsb, maclsb;
+
+	macmsb = lx_dsp_reg_read(chip, eReg_ADMACESMSB) & 0x00FFFFFF;
+	maclsb = lx_dsp_reg_read(chip, eReg_ADMACESLSB) & 0x00FFFFFF;
+
+	/* todo: endianess handling */
+	mac_address[5] = ((u8 *)(&maclsb))[0];
+	mac_address[4] = ((u8 *)(&maclsb))[1];
+	mac_address[3] = ((u8 *)(&maclsb))[2];
+	mac_address[2] = ((u8 *)(&macmsb))[0];
+	mac_address[1] = ((u8 *)(&macmsb))[1];
+	mac_address[0] = ((u8 *)(&macmsb))[2];
+
+	return 0;
+}
+
+
+int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_02_SET_GRANULARITY);
+	chip->rmh.cmd[0] |= gran;
+
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	lx_message_init(&chip->rmh, CMD_04_GET_EVENT);
+	chip->rmh.stat_len = 9;	/* we don't necessarily need the full length */
+
+	ret = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (!ret)
+		memcpy(data, chip->rmh.stat, chip->rmh.stat_len * sizeof(u32));
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return ret;
+}
+
+#define CSES_TIMEOUT        100     /* microseconds */
+#define CSES_CE             0x0001
+#define CSES_BROADCAST      0x0002
+#define CSES_UPDATE_LDSV    0x0004
+
+int lx_dsp_es_check_pipeline(struct lx6464es *chip)
+{
+	int i;
+
+	for (i = 0; i != CSES_TIMEOUT; ++i) {
+		/*
+		 * le bit CSES_UPDATE_LDSV est Ã  1 dÃ©s que le macprog
+		 * est pret. il re-passe Ã  0 lorsque le premier read a
+		 * Ã©tÃ© fait. pour l'instant on retire le test car ce bit
+		 * passe a 1 environ 200 Ã  400 ms aprÃ©s que le registre
+		 * confES Ã  Ã©tÃ© Ã©crit (kick du xilinx ES).
+		 *
+		 * On ne teste que le bit CE.
+		 * */
+
+		u32 cses = lx_dsp_reg_read(chip, eReg_CSES);
+
+		if ((cses & CSES_CE) == 0)
+			return 0;
+
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+
+#define PIPE_INFO_TO_CMD(capture, pipe)					\
+	((u32)((u32)(pipe) | ((capture) ? ID_IS_CAPTURE : 0L)) << ID_OFFSET)
+
+
+
+/* low-level pipe handling */
+int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture,
+		     int channels)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_06_ALLOCATE_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= channels;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	if (err != 0)
+		snd_printk(KERN_ERR "lx6464es: could not allocate pipe\n");
+
+	return err;
+}
+
+int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_07_RELEASE_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+		  u32 *r_needed, u32 *r_freed, u32 *size_array)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+#ifdef CONFIG_SND_DEBUG
+	if (size_array)
+		memset(size_array, 0, sizeof(u32)*MAX_STREAM_BUFFER);
+#endif
+
+	*r_needed = 0;
+	*r_freed = 0;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_08_ASK_BUFFERS);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (!err) {
+		int i;
+		for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+			u32 stat = chip->rmh.stat[i];
+			if (stat & (BF_EOB << BUFF_FLAGS_OFFSET)) {
+				/* finished */
+				*r_freed += 1;
+				if (size_array)
+					size_array[i] = stat & MASK_DATA_SIZE;
+			} else if ((stat & (BF_VALID << BUFF_FLAGS_OFFSET))
+				   == 0)
+				/* free */
+				*r_needed += 1;
+		}
+
+#if 0
+		snd_printdd(LXP "CMD_08_ASK_BUFFERS: needed %d, freed %d\n",
+			    *r_needed, *r_freed);
+		for (i = 0; i < MAX_STREAM_BUFFER; ++i) {
+			for (i = 0; i != chip->rmh.stat_len; ++i)
+				snd_printdd("  stat[%d]: %x, %x\n", i,
+					    chip->rmh.stat[i],
+					    chip->rmh.stat[i] & MASK_DATA_SIZE);
+		}
+#endif
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_09_STOP_PIPE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static int lx_pipe_toggle_state(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0B_TOGGLE_PIPE_STATE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err;
+
+	err = lx_pipe_wait_for_idle(chip, pipe, is_capture);
+	if (err < 0)
+		return err;
+
+	err = lx_pipe_toggle_state(chip, pipe, is_capture);
+
+	return err;
+}
+
+int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	int err = 0;
+
+	err = lx_pipe_wait_for_start(chip, pipe, is_capture);
+	if (err < 0)
+		return err;
+
+	err = lx_pipe_toggle_state(chip, pipe, is_capture);
+
+	return err;
+}
+
+
+int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture,
+			 u64 *rsample_count)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.stat_len = 2;	/* need all words here! */
+
+	err = lx_message_send_atomic(chip, &chip->rmh); /* don't sleep! */
+
+	if (err != 0)
+		snd_printk(KERN_ERR
+			   "lx6464es: could not query pipe's sample count\n");
+	else {
+		*rsample_count = ((u64)(chip->rmh.stat[0] & MASK_SPL_COUNT_HI)
+				  << 24)     /* hi part */
+			+ chip->rmh.stat[1]; /* lo part */
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err != 0)
+		snd_printk(KERN_ERR "lx6464es: could not query pipe's state\n");
+	else
+		*rstate = (chip->rmh.stat[0] >> PSTATE_OFFSET) & 0x0F;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static int lx_pipe_wait_for_state(struct lx6464es *chip, u32 pipe,
+				  int is_capture, u16 state)
+{
+	int i;
+
+	/* max 2*PCMOnlyGranularity = 2*1024 at 44100 = < 50 ms:
+	 * timeout 50 ms */
+	for (i = 0; i != 50; ++i) {
+		u16 current_state;
+		int err = lx_pipe_state(chip, pipe, is_capture, &current_state);
+
+		if (err < 0)
+			return err;
+
+		if (current_state == state)
+			return 0;
+
+		mdelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_RUN);
+}
+
+int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture)
+{
+	return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_IDLE);
+}
+
+/* low-level stream handling */
+int lx_stream_set_state(struct lx6464es *chip, u32 pipe,
+			       int is_capture, enum stream_state_t state)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_13_SET_STREAM_STATE);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= state;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime,
+			 u32 pipe, int is_capture)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	u32 channels = runtime->channels;
+
+	if (runtime->channels != channels)
+		snd_printk(KERN_ERR LXP "channel count mismatch: %d vs %d",
+			   runtime->channels, channels);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0C_DEF_STREAM);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	if (runtime->sample_bits == 16)
+		/* 16 bit format */
+		chip->rmh.cmd[0] |= (STREAM_FMT_16b << STREAM_FMT_OFFSET);
+
+	if (snd_pcm_format_little_endian(runtime->format))
+		/* little endian/intel format */
+		chip->rmh.cmd[0] |= (STREAM_FMT_intel << STREAM_FMT_OFFSET);
+
+	chip->rmh.cmd[0] |= channels-1;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+
+	return err;
+}
+
+int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture,
+		    int *rstate)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	*rstate = (chip->rmh.stat[0] & SF_START) ? START_STATE : PAUSE_STATE;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture,
+			      u64 *r_bytepos)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	*r_bytepos = ((u64) (chip->rmh.stat[0] & MASK_SPL_COUNT_HI)
+		      << 32)	     /* hi part */
+		+ chip->rmh.stat[1]; /* lo part */
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+/* low-level buffer handling */
+int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi,
+		   u32 *r_buffer_index)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0F_UPDATE_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= BF_NOTIFY_EOB; /* request interrupt notification */
+
+	/* todo: pause request, circular buffer */
+
+	chip->rmh.cmd[1] = buffer_size & MASK_DATA_SIZE;
+	chip->rmh.cmd[2] = buf_address_lo;
+
+	if (buf_address_hi) {
+		chip->rmh.cmd_len = 4;
+		chip->rmh.cmd[3] = buf_address_hi;
+		chip->rmh.cmd[0] |= BF_64BITS_ADR;
+	}
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err == 0) {
+		*r_buffer_index = chip->rmh.stat[0];
+		goto done;
+	}
+
+	if (err == EB_RBUFFERS_TABLE_OVERFLOW)
+		snd_printk(LXP "lx_buffer_give EB_RBUFFERS_TABLE_OVERFLOW\n");
+
+	if (err == EB_INVALID_STREAM)
+		snd_printk(LXP "lx_buffer_give EB_INVALID_STREAM\n");
+
+	if (err == EB_CMD_REFUSED)
+		snd_printk(LXP "lx_buffer_give EB_CMD_REFUSED\n");
+
+ done:
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 *r_buffer_size)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= MASK_BUFFER_ID; /* ask for the current buffer: the
+					     * microblaze will seek for it */
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	if (err == 0)
+		*r_buffer_size = chip->rmh.stat[0]  & MASK_DATA_SIZE;
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture,
+		     u32 buffer_index)
+{
+	int err;
+	unsigned long flags;
+
+	u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe);
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER);
+
+	chip->rmh.cmd[0] |= pipe_cmd;
+	chip->rmh.cmd[0] |= buffer_index;
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+
+/* low-level gain/peak handling
+ *
+ * \todo: can we unmute capture/playback channels independently?
+ *
+ * */
+int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute)
+{
+	int err;
+	unsigned long flags;
+
+	/* bit set to 1: channel muted */
+	u64 mute_mask = unmute ? 0 : 0xFFFFFFFFFFFFFFFFLLU;
+
+	spin_lock_irqsave(&chip->msg_lock, flags);
+	lx_message_init(&chip->rmh, CMD_0D_SET_MUTE);
+
+	chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, 0);
+
+	chip->rmh.cmd[1] = (u32)(mute_mask >> (u64)32);	       /* hi part */
+	chip->rmh.cmd[2] = (u32)(mute_mask & (u64)0xFFFFFFFF); /* lo part */
+
+	snd_printk("mute %x %x %x\n", chip->rmh.cmd[0], chip->rmh.cmd[1],
+		   chip->rmh.cmd[2]);
+
+	err = lx_message_send_atomic(chip, &chip->rmh);
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+static u32 peak_map[] = {
+	0x00000109, /* -90.308dB */
+	0x0000083B, /* -72.247dB */
+	0x000020C4, /* -60.205dB */
+	0x00008273, /* -48.030dB */
+	0x00020756, /* -36.005dB */
+	0x00040C37, /* -30.001dB */
+	0x00081385, /* -24.002dB */
+	0x00101D3F, /* -18.000dB */
+	0x0016C310, /* -15.000dB */
+	0x002026F2, /* -12.001dB */
+	0x002D6A86, /* -9.000dB */
+	0x004026E6, /* -6.004dB */
+	0x005A9DF6, /* -3.000dB */
+	0x0065AC8B, /* -2.000dB */
+	0x00721481, /* -1.000dB */
+	0x007FFFFF, /* FS */
+};
+
+int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels,
+		   u32 *r_levels)
+{
+	int err = 0;
+	unsigned long flags;
+	int i;
+	spin_lock_irqsave(&chip->msg_lock, flags);
+
+	for (i = 0; i < channels; i += 4) {
+		u32 s0, s1, s2, s3;
+
+		lx_message_init(&chip->rmh, CMD_12_GET_PEAK);
+		chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, i);
+
+		err = lx_message_send_atomic(chip, &chip->rmh);
+
+		if (err == 0) {
+			s0 = peak_map[chip->rmh.stat[0] & 0x0F];
+			s1 = peak_map[(chip->rmh.stat[0] >>  4) & 0xf];
+			s2 = peak_map[(chip->rmh.stat[0] >>  8) & 0xf];
+			s3 = peak_map[(chip->rmh.stat[0] >>  12) & 0xf];
+		} else
+			s0 = s1 = s2 = s3 = 0;
+
+		r_levels[0] = s0;
+		r_levels[1] = s1;
+		r_levels[2] = s2;
+		r_levels[3] = s3;
+
+		r_levels += 4;
+	}
+
+	spin_unlock_irqrestore(&chip->msg_lock, flags);
+	return err;
+}
+
+/* interrupt handling */
+#define PCX_IRQ_NONE 0
+#define IRQCS_ACTIVE_PCIDB  0x00002000L         /* Bit nÃÂ¸ 13 */
+#define IRQCS_ENABLE_PCIIRQ 0x00000100L         /* Bit nÃÂ¸ 08 */
+#define IRQCS_ENABLE_PCIDB  0x00000200L         /* Bit nÃÂ¸ 09 */
+
+static u32 lx_interrupt_test_ack(struct lx6464es *chip)
+{
+	u32 irqcs = lx_plx_reg_read(chip, ePLX_IRQCS);
+
+	/* Test if PCI Doorbell interrupt is active */
+	if (irqcs & IRQCS_ACTIVE_PCIDB)	{
+		u32 temp;
+		irqcs = PCX_IRQ_NONE;
+
+		while ((temp = lx_plx_reg_read(chip, ePLX_L2PCIDB))) {
+			/* RAZ interrupt */
+			irqcs |= temp;
+			lx_plx_reg_write(chip, ePLX_L2PCIDB, temp);
+		}
+
+		return irqcs;
+	}
+	return PCX_IRQ_NONE;
+}
+
+static int lx_interrupt_ack(struct lx6464es *chip, u32 *r_irqsrc,
+			    int *r_async_pending, int *r_async_escmd)
+{
+	u32 irq_async;
+	u32 irqsrc = lx_interrupt_test_ack(chip);
+
+	if (irqsrc == PCX_IRQ_NONE)
+		return 0;
+
+	*r_irqsrc = irqsrc;
+
+	irq_async = irqsrc & MASK_SYS_ASYNC_EVENTS; /* + EtherSound response
+						     * (set by xilinx) + EOB */
+
+	if (irq_async & MASK_SYS_STATUS_ESA) {
+		irq_async &= ~MASK_SYS_STATUS_ESA;
+		*r_async_escmd = 1;
+	}
+
+	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
+		/* xilinx command notification */
+		atomic_set(&chip->send_message_locked, 0);
+
+	if (irq_async) {
+		/* snd_printd("interrupt: async event pending\n"); */
+		*r_async_pending = 1;
+	}
+
+	return 1;
+}
+
+static int lx_interrupt_handle_async_events(struct lx6464es *chip, u32 irqsrc,
+					    int *r_freq_changed,
+					    u64 *r_notified_in_pipe_mask,
+					    u64 *r_notified_out_pipe_mask)
+{
+	int err;
+	u32 stat[9];		/* answer from CMD_04_GET_EVENT */
+
+	/* On peut optimiser pour ne pas lire les evenements vides
+	 * les mots de rÃÂ©ponse sont dans l'ordre suivant :
+	 * Stat[0]	mot de status gÃÂ©nÃÂ©ral
+	 * Stat[1]	fin de buffer OUT pF
+	 * Stat[2]	fin de buffer OUT pf
+	 * Stat[3]	fin de buffer IN pF
+	 * Stat[4]	fin de buffer IN pf
+	 * Stat[5]	underrun poid fort
+	 * Stat[6]	underrun poid faible
+	 * Stat[7]	overrun poid fort
+	 * Stat[8]	overrun poid faible
+	 * */
+
+	u64 orun_mask;
+	u64 urun_mask;
+#if 0
+	int has_underrun   = (irqsrc & MASK_SYS_STATUS_URUN) ? 1 : 0;
+	int has_overrun    = (irqsrc & MASK_SYS_STATUS_ORUN) ? 1 : 0;
+#endif
+	int eb_pending_out = (irqsrc & MASK_SYS_STATUS_EOBO) ? 1 : 0;
+	int eb_pending_in  = (irqsrc & MASK_SYS_STATUS_EOBI) ? 1 : 0;
+
+	*r_freq_changed = (irqsrc & MASK_SYS_STATUS_FREQ) ? 1 : 0;
+
+	err = lx_dsp_read_async_events(chip, stat);
+	if (err < 0)
+		return err;
+
+	if (eb_pending_in) {
+		*r_notified_in_pipe_mask = ((u64)stat[3] << 32)
+			+ stat[4];
+		snd_printdd(LXP "interrupt: EOBI pending %llx\n",
+			    *r_notified_in_pipe_mask);
+	}
+	if (eb_pending_out) {
+		*r_notified_out_pipe_mask = ((u64)stat[1] << 32)
+			+ stat[2];
+		snd_printdd(LXP "interrupt: EOBO pending %llx\n",
+			    *r_notified_out_pipe_mask);
+	}
+
+	orun_mask = ((u64)stat[7] << 32) + stat[8];
+	urun_mask = ((u64)stat[5] << 32) + stat[6];
+
+	/* todo: handle xrun notification */
+
+	return err;
+}
+
+static int lx_interrupt_request_new_buffer(struct lx6464es *chip,
+					   struct lx_stream *lx_stream)
+{
+	struct snd_pcm_substream *substream = lx_stream->stream;
+	int is_capture = lx_stream->is_capture;
+	int err;
+	unsigned long flags;
+
+	const u32 channels = substream->runtime->channels;
+	const u32 bytes_per_frame = channels * 3;
+	const u32 period_size = substream->runtime->period_size;
+	const u32 period_bytes = period_size * bytes_per_frame;
+	const u32 pos = lx_stream->frame_pos;
+	const u32 next_pos = ((pos+1) == substream->runtime->periods) ?
+		0 : pos + 1;
+
+	dma_addr_t buf = substream->dma_buffer.addr + pos * period_bytes;
+	u32 buf_hi = 0;
+	u32 buf_lo = 0;
+	u32 buffer_index = 0;
+
+	u32 needed, freed;
+	u32 size_array[MAX_STREAM_BUFFER];
+
+	snd_printdd("->lx_interrupt_request_new_buffer\n");
+
+	spin_lock_irqsave(&chip->lock, flags);
+
+	err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array);
+	snd_printdd(LXP "interrupt: needed %d, freed %d\n", needed, freed);
+
+	unpack_pointer(buf, &buf_lo, &buf_hi);
+	err = lx_buffer_give(chip, 0, is_capture, period_bytes, buf_lo, buf_hi,
+			     &buffer_index);
+	snd_printdd(LXP "interrupt: gave buffer index %x on %p (%d bytes)\n",
+		    buffer_index, (void *)buf, period_bytes);
+
+	lx_stream->frame_pos = next_pos;
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return err;
+}
+
+void lx_tasklet_playback(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	struct lx_stream *lx_stream = &chip->playback_stream;
+	int err;
+
+	snd_printdd("->lx_tasklet_playback\n");
+
+	err = lx_interrupt_request_new_buffer(chip, lx_stream);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP
+			   "cannot request new buffer for playback\n");
+
+	snd_pcm_period_elapsed(lx_stream->stream);
+}
+
+void lx_tasklet_capture(unsigned long data)
+{
+	struct lx6464es *chip = (struct lx6464es *)data;
+	struct lx_stream *lx_stream = &chip->capture_stream;
+	int err;
+
+	snd_printdd("->lx_tasklet_capture\n");
+	err = lx_interrupt_request_new_buffer(chip, lx_stream);
+	if (err < 0)
+		snd_printk(KERN_ERR LXP
+			   "cannot request new buffer for capture\n");
+
+	snd_pcm_period_elapsed(lx_stream->stream);
+}
+
+
+
+static int lx_interrupt_handle_audio_transfer(struct lx6464es *chip,
+					      u64 notified_in_pipe_mask,
+					      u64 notified_out_pipe_mask)
+{
+	int err = 0;
+
+	if (notified_in_pipe_mask) {
+		snd_printdd(LXP "requesting audio transfer for capture\n");
+		tasklet_hi_schedule(&chip->tasklet_capture);
+	}
+
+	if (notified_out_pipe_mask) {
+		snd_printdd(LXP "requesting audio transfer for playback\n");
+		tasklet_hi_schedule(&chip->tasklet_playback);
+	}
+
+	return err;
+}
+
+
+irqreturn_t lx_interrupt(int irq, void *dev_id)
+{
+	struct lx6464es *chip = dev_id;
+	int async_pending, async_escmd;
+	u32 irqsrc;
+
+	spin_lock(&chip->lock);
+
+	snd_printdd("**************************************************\n");
+
+	if (!lx_interrupt_ack(chip, &irqsrc, &async_pending, &async_escmd)) {
+		spin_unlock(&chip->lock);
+		snd_printdd("IRQ_NONE\n");
+		return IRQ_NONE; /* this device did not cause the interrupt */
+	}
+
+	if (irqsrc & MASK_SYS_STATUS_CMD_DONE)
+		goto exit;
+
+#if 0
+	if (irqsrc & MASK_SYS_STATUS_EOBI)
+		snd_printdd(LXP "interrupt: EOBI\n");
+
+	if (irqsrc & MASK_SYS_STATUS_EOBO)
+		snd_printdd(LXP "interrupt: EOBO\n");
+
+	if (irqsrc & MASK_SYS_STATUS_URUN)
+		snd_printdd(LXP "interrupt: URUN\n");
+
+	if (irqsrc & MASK_SYS_STATUS_ORUN)
+		snd_printdd(LXP "interrupt: ORUN\n");
+#endif
+
+	if (async_pending) {
+		u64 notified_in_pipe_mask = 0;
+		u64 notified_out_pipe_mask = 0;
+		int freq_changed;
+		int err;
+
+		/* handle async events */
+		err = lx_interrupt_handle_async_events(chip, irqsrc,
+						       &freq_changed,
+						       &notified_in_pipe_mask,
+						       &notified_out_pipe_mask);
+		if (err)
+			snd_printk(KERN_ERR LXP
+				   "error handling async events\n");
+
+		err = lx_interrupt_handle_audio_transfer(chip,
+							 notified_in_pipe_mask,
+							 notified_out_pipe_mask
+			);
+		if (err)
+			snd_printk(KERN_ERR LXP
+				   "error during audio transfer\n");
+	}
+
+	if (async_escmd) {
+#if 0
+		/* backdoor for ethersound commands
+		 *
+		 * for now, we do not need this
+		 *
+		 * */
+
+		snd_printdd("lx6464es: interrupt requests escmd handling\n");
+#endif
+	}
+
+exit:
+	spin_unlock(&chip->lock);
+	return IRQ_HANDLED;	/* this device caused the interrupt */
+}
+
+
+static void lx_irq_set(struct lx6464es *chip, int enable)
+{
+	u32 reg = lx_plx_reg_read(chip, ePLX_IRQCS);
+
+	/* enable/disable interrupts
+	 *
+	 * Set the Doorbell and PCI interrupt enable bits
+	 *
+	 * */
+	if (enable)
+		reg |=  (IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB);
+	else
+		reg &= ~(IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB);
+	lx_plx_reg_write(chip, ePLX_IRQCS, reg);
+}
+
+void lx_irq_enable(struct lx6464es *chip)
+{
+	snd_printdd("->lx_irq_enable\n");
+	lx_irq_set(chip, 1);
+}
+
+void lx_irq_disable(struct lx6464es *chip)
+{
+	snd_printdd("->lx_irq_disable\n");
+	lx_irq_set(chip, 0);
+}
diff --git a/sound/pci/lx6464es/lx_core.h b/sound/pci/lx6464es/lx_core.h
new file mode 100644
index 00000000000..6bd9cbbbc68
--- /dev/null
+++ b/sound/pci/lx6464es/lx_core.h
@@ -0,0 +1,242 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * low-level interface
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX_CORE_H
+#define LX_CORE_H
+
+#include <linux/interrupt.h>
+
+#include "lx_defs.h"
+
+#define REG_CRM_NUMBER		12
+
+struct lx6464es;
+
+/* low-level register access */
+
+/* dsp register access */
+enum {
+	eReg_BASE,
+	eReg_CSM,
+	eReg_CRM1,
+	eReg_CRM2,
+	eReg_CRM3,
+	eReg_CRM4,
+	eReg_CRM5,
+	eReg_CRM6,
+	eReg_CRM7,
+	eReg_CRM8,
+	eReg_CRM9,
+	eReg_CRM10,
+	eReg_CRM11,
+	eReg_CRM12,
+
+	eReg_ICR,
+	eReg_CVR,
+	eReg_ISR,
+	eReg_RXHTXH,
+	eReg_RXMTXM,
+	eReg_RHLTXL,
+	eReg_RESETDSP,
+
+	eReg_CSUF,
+	eReg_CSES,
+	eReg_CRESMSB,
+	eReg_CRESLSB,
+	eReg_ADMACESMSB,
+	eReg_ADMACESLSB,
+	eReg_CONFES,
+
+	eMaxPortLx
+};
+
+unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port);
+void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len);
+void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data);
+void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data,
+			 u32 len);
+
+/* plx register access */
+enum {
+    ePLX_PCICR,
+
+    ePLX_MBOX0,
+    ePLX_MBOX1,
+    ePLX_MBOX2,
+    ePLX_MBOX3,
+    ePLX_MBOX4,
+    ePLX_MBOX5,
+    ePLX_MBOX6,
+    ePLX_MBOX7,
+
+    ePLX_L2PCIDB,
+    ePLX_IRQCS,
+    ePLX_CHIPSC,
+
+    eMaxPort
+};
+
+unsigned long lx_plx_reg_read(struct lx6464es *chip, int port);
+void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data);
+
+/* rhm */
+struct lx_rmh {
+	u16	cmd_len;	/* length of the command to send (WORDs) */
+	u16	stat_len;	/* length of the status received (WORDs) */
+	u16	dsp_stat;	/* status type, RMP_SSIZE_XXX */
+	u16	cmd_idx;	/* index of the command */
+	u32	cmd[REG_CRM_NUMBER];
+	u32	stat[REG_CRM_NUMBER];
+};
+
+
+/* low-level dsp access */
+int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version);
+int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq);
+int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran);
+int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data);
+int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address);
+
+
+/* low-level pipe handling */
+int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture,
+		     int channels);
+int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture,
+			 u64 *rsample_count);
+int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate);
+int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture);
+
+int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture);
+int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture);
+
+/* low-level stream handling */
+int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime,
+			 u32 pipe, int is_capture);
+int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture,
+		    int *rstate);
+int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture,
+			      u64 *r_bytepos);
+
+int lx_stream_set_state(struct lx6464es *chip, u32 pipe,
+			int is_capture, enum stream_state_t state);
+
+static inline int lx_stream_start(struct lx6464es *chip, u32 pipe,
+				  int is_capture)
+{
+	snd_printdd("->lx_stream_start\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_RUN);
+}
+
+static inline int lx_stream_pause(struct lx6464es *chip, u32 pipe,
+				  int is_capture)
+{
+	snd_printdd("->lx_stream_pause\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_PAUSE);
+}
+
+static inline int lx_stream_stop(struct lx6464es *chip, u32 pipe,
+				 int is_capture)
+{
+	snd_printdd("->lx_stream_stop\n");
+	return lx_stream_set_state(chip, pipe, is_capture, SSTATE_STOP);
+}
+
+/* low-level buffer handling */
+int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture,
+		  u32 *r_needed, u32 *r_freed, u32 *size_array);
+int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi,
+		   u32 *r_buffer_index);
+int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture,
+		   u32 *r_buffer_size);
+int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture,
+		     u32 buffer_index);
+
+/* low-level gain/peak handling */
+int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute);
+int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels,
+		   u32 *r_levels);
+
+
+/* interrupt handling */
+irqreturn_t lx_interrupt(int irq, void *dev_id);
+void lx_irq_enable(struct lx6464es *chip);
+void lx_irq_disable(struct lx6464es *chip);
+
+void lx_tasklet_capture(unsigned long data);
+void lx_tasklet_playback(unsigned long data);
+
+
+/* Stream Format Header Defines (for LIN and IEEE754) */
+#define HEADER_FMT_BASE		HEADER_FMT_BASE_LIN
+#define HEADER_FMT_BASE_LIN	0xFED00000
+#define HEADER_FMT_BASE_FLOAT	0xFAD00000
+#define HEADER_FMT_MONO		0x00000080 /* bit 23 in header_lo. WARNING: old
+					    * bit 22 is ignored in float
+					    * format */
+#define HEADER_FMT_INTEL	0x00008000
+#define HEADER_FMT_16BITS	0x00002000
+#define HEADER_FMT_24BITS	0x00004000
+#define HEADER_FMT_UPTO11	0x00000200 /* frequency is less or equ. to 11k.
+					    * */
+#define HEADER_FMT_UPTO32	0x00000100 /* frequency is over 11k and less
+					    * then 32k.*/
+
+
+#define BIT_FMP_HEADER          23
+#define BIT_FMP_SD              22
+#define BIT_FMP_MULTICHANNEL    19
+
+#define START_STATE             1
+#define PAUSE_STATE             0
+
+
+
+
+
+/* from PcxAll_e.h */
+/* Start/Pause condition for pipes (PCXStartPipe, PCXPausePipe) */
+#define START_PAUSE_IMMEDIATE           0
+#define START_PAUSE_ON_SYNCHRO          1
+#define START_PAUSE_ON_TIME_CODE        2
+
+
+/* Pipe / Stream state */
+#define START_STATE             1
+#define PAUSE_STATE             0
+
+static inline void unpack_pointer(dma_addr_t ptr, u32 *r_low, u32 *r_high)
+{
+	*r_low = (u32)(ptr & 0xffffffff);
+#if BITS_PER_LONG == 32
+	*r_high = 0;
+#else
+	*r_high = (u32)((u64)ptr>>32);
+#endif
+}
+
+#endif /* LX_CORE_H */
diff --git a/sound/pci/lx6464es/lx_defs.h b/sound/pci/lx6464es/lx_defs.h
new file mode 100644
index 00000000000..49d36bdd512
--- /dev/null
+++ b/sound/pci/lx6464es/lx_defs.h
@@ -0,0 +1,376 @@
+/* -*- linux-c -*- *
+ *
+ * ALSA driver for the digigram lx6464es interface
+ * adapted upstream headers
+ *
+ * Copyright (c) 2009 Tim Blechmann <tim@klingt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#ifndef LX_DEFS_H
+#define LX_DEFS_H
+
+/* code adapted from ethersound.h */
+#define	XES_FREQ_COUNT8_MASK    0x00001FFF /* compteur 25MHz entre 8 ech. */
+#define	XES_FREQ_COUNT8_44_MIN  0x00001288 /* 25M /
+					    * [ 44k - ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+#define	XES_FREQ_COUNT8_44_MAX	0x000010F0 /* 25M / [ ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+#define	XES_FREQ_COUNT8_48_MAX	0x00000F08 /* 25M /
+					    * [ 48k + ( 44.1k + 48k ) / 2 ]
+					    * * 8 */
+
+/* code adapted from LXES_registers.h */
+
+#define IOCR_OUTPUTS_OFFSET 0	/* (rw) offset for the number of OUTs in the
+				 * ConfES register. */
+#define IOCR_INPUTS_OFFSET  8	/* (rw) offset for the number of INs in the
+				 * ConfES register. */
+#define FREQ_RATIO_OFFSET  19	/* (rw) offset for frequency ratio in the
+				 * ConfES register. */
+#define	FREQ_RATIO_SINGLE_MODE 0x01 /* value for single mode frequency ratio:
+				     * sample rate = frequency rate. */
+
+#define CONFES_READ_PART_MASK	0x00070000
+#define CONFES_WRITE_PART_MASK	0x00F80000
+
+/* code adapted from if_drv_mb.h */
+
+#define MASK_SYS_STATUS_ERROR	(1L << 31) /* events that lead to a PCI irq if
+					    * not yet pending */
+#define MASK_SYS_STATUS_URUN	(1L << 30)
+#define MASK_SYS_STATUS_ORUN	(1L << 29)
+#define MASK_SYS_STATUS_EOBO	(1L << 28)
+#define MASK_SYS_STATUS_EOBI	(1L << 27)
+#define MASK_SYS_STATUS_FREQ	(1L << 26)
+#define MASK_SYS_STATUS_ESA	(1L << 25) /* reserved, this is set by the
+					    * XES */
+#define MASK_SYS_STATUS_TIMER	(1L << 24)
+
+#define MASK_SYS_ASYNC_EVENTS	(MASK_SYS_STATUS_ERROR |		\
+				 MASK_SYS_STATUS_URUN  |		\
+				 MASK_SYS_STATUS_ORUN  |		\
+				 MASK_SYS_STATUS_EOBO  |		\
+				 MASK_SYS_STATUS_EOBI  |		\
+				 MASK_SYS_STATUS_FREQ  |		\
+				 MASK_SYS_STATUS_ESA)
+
+#define MASK_SYS_PCI_EVENTS		(MASK_SYS_ASYNC_EVENTS |	\
+					 MASK_SYS_STATUS_TIMER)
+
+#define MASK_SYS_TIMER_COUNT	0x0000FFFF
+
+#define MASK_SYS_STATUS_EOT_PLX		(1L << 22) /* event that remains
+						    * internal: reserved fo end
+						    * of plx dma */
+#define MASK_SYS_STATUS_XES		(1L << 21) /* event that remains
+						    * internal: pending XES
+						    * IRQ */
+#define MASK_SYS_STATUS_CMD_DONE	(1L << 20) /* alternate command
+						    * management: notify driver
+						    * instead of polling */
+
+
+#define MAX_STREAM_BUFFER 5	/* max amount of stream buffers. */
+
+#define MICROBLAZE_IBL_MIN		 32
+#define MICROBLAZE_IBL_DEFAULT	        128
+#define MICROBLAZE_IBL_MAX		512
+/* #define MASK_GRANULARITY		(2*MICROBLAZE_IBL_MAX-1) */
+
+
+
+/* command opcodes, see reference for details */
+
+/*
+ the capture bit position in the object_id field in driver commands
+ depends upon the number of managed channels. For now, 64 IN + 64 OUT are
+ supported. HOwever, the communication protocol forsees 1024 channels, hence
+ bit 10 indicates a capture (input) object).
+*/
+#define ID_IS_CAPTURE (1L << 10)
+#define ID_OFFSET	13	/* object ID is at the 13th bit in the
+				 * 1st command word.*/
+#define ID_CH_MASK    0x3F
+#define OPCODE_OFFSET	24	/* offset of the command opcode in the first
+				 * command word.*/
+
+enum cmd_mb_opcodes {
+	CMD_00_INFO_DEBUG	        = 0x00,
+	CMD_01_GET_SYS_CFG		= 0x01,
+	CMD_02_SET_GRANULARITY		= 0x02,
+	CMD_03_SET_TIMER_IRQ		= 0x03,
+	CMD_04_GET_EVENT		= 0x04,
+	CMD_05_GET_PIPES		= 0x05,
+
+	CMD_06_ALLOCATE_PIPE            = 0x06,
+	CMD_07_RELEASE_PIPE		= 0x07,
+	CMD_08_ASK_BUFFERS		= 0x08,
+	CMD_09_STOP_PIPE		= 0x09,
+	CMD_0A_GET_PIPE_SPL_COUNT	= 0x0a,
+	CMD_0B_TOGGLE_PIPE_STATE	= 0x0b,
+
+	CMD_0C_DEF_STREAM		= 0x0c,
+	CMD_0D_SET_MUTE			= 0x0d,
+	CMD_0E_GET_STREAM_SPL_COUNT     = 0x0e,
+	CMD_0F_UPDATE_BUFFER		= 0x0f,
+	CMD_10_GET_BUFFER		= 0x10,
+	CMD_11_CANCEL_BUFFER		= 0x11,
+	CMD_12_GET_PEAK			= 0x12,
+	CMD_13_SET_STREAM_STATE		= 0x13,
+	CMD_14_INVALID			= 0x14,
+};
+
+/* pipe states */
+enum pipe_state_t {
+	PSTATE_IDLE	= 0,	/* the pipe is not processed in the XES_IRQ
+				 * (free or stopped, or paused). */
+	PSTATE_RUN	= 1,	/* sustained play/record state. */
+	PSTATE_PURGE	= 2,	/* the ES channels are now off, render pipes do
+				 * not DMA, record pipe do a last DMA. */
+	PSTATE_ACQUIRE	= 3,	/* the ES channels are now on, render pipes do
+				 * not yet increase their sample count, record
+				 * pipes do not DMA. */
+	PSTATE_CLOSING	= 4,	/* the pipe is releasing, and may not yet
+				 * receive an "alloc" command. */
+};
+
+/* stream states */
+enum stream_state_t {
+	SSTATE_STOP	=  0x00,       /* setting to stop resets the stream spl
+					* count.*/
+	SSTATE_RUN	= (0x01 << 0), /* start DMA and spl count handling. */
+	SSTATE_PAUSE	= (0x01 << 1), /* pause DMA and spl count handling. */
+};
+
+/* buffer flags */
+enum buffer_flags {
+	BF_VALID	= 0x80,	/* set if the buffer is valid, clear if free.*/
+	BF_CURRENT	= 0x40,	/* set if this is the current buffer (there is
+				 * always a current buffer).*/
+	BF_NOTIFY_EOB	= 0x20,	/* set if this buffer must cause a PCI event
+				 * when finished.*/
+	BF_CIRCULAR	= 0x10,	/* set if buffer[1] must be copied to buffer[0]
+				 * by the end of this buffer.*/
+	BF_64BITS_ADR	= 0x08,	/* set if the hi part of the address is valid.*/
+	BF_xx		= 0x04,	/* future extension.*/
+	BF_EOB		= 0x02,	/* set if finished, but not yet free.*/
+	BF_PAUSE	= 0x01,	/* pause stream at buffer end.*/
+	BF_ZERO		= 0x00,	/* no flags (init).*/
+};
+
+/**
+*	Stream Flags definitions
+*/
+enum stream_flags {
+	SF_ZERO		= 0x00000000, /* no flags (stream invalid). */
+	SF_VALID	= 0x10000000, /* the stream has a valid DMA_conf
+				       * info (setstreamformat). */
+	SF_XRUN		= 0x20000000, /* the stream is un x-run state. */
+	SF_START	= 0x40000000, /* the DMA is running.*/
+	SF_ASIO		= 0x80000000, /* ASIO.*/
+};
+
+
+#define MASK_SPL_COUNT_HI 0x00FFFFFF /* 4 MSBits are status bits */
+#define PSTATE_OFFSET             28 /* 4 MSBits are status bits */
+
+
+#define MASK_STREAM_HAS_MAPPING	(1L << 12)
+#define MASK_STREAM_IS_ASIO	(1L <<  9)
+#define STREAM_FMT_OFFSET	10   /* the stream fmt bits start at the 10th
+				      * bit in the command word. */
+
+#define STREAM_FMT_16b          0x02
+#define STREAM_FMT_intel        0x01
+
+#define FREQ_FIELD_OFFSET	15  /* offset of the freq field in the response
+				     * word */
+
+#define BUFF_FLAGS_OFFSET	  24 /*  offset of the buffer flags in the
+				      *  response word. */
+#define MASK_DATA_SIZE	  0x00FFFFFF /* this must match the field size of
+				      * datasize in the buffer_t structure. */
+
+#define MASK_BUFFER_ID	        0xFF /* the cancel command awaits a buffer ID,
+				      * may be 0xFF for "current". */
+
+
+/* code adapted from PcxErr_e.h */
+
+/* Bits masks */
+
+#define ERROR_MASK              0x8000
+
+#define SOURCE_MASK             0x7800
+
+#define E_SOURCE_BOARD          0x4000 /* 8 >> 1 */
+#define E_SOURCE_DRV            0x2000 /* 4 >> 1 */
+#define E_SOURCE_API            0x1000 /* 2 >> 1 */
+/* Error tools */
+#define E_SOURCE_TOOLS          0x0800 /* 1 >> 1 */
+/* Error pcxaudio */
+#define E_SOURCE_AUDIO          0x1800 /* 3 >> 1 */
+/* Error virtual pcx */
+#define E_SOURCE_VPCX           0x2800 /* 5 >> 1 */
+/* Error dispatcher */
+#define E_SOURCE_DISPATCHER     0x3000 /* 6 >> 1 */
+/* Error from CobraNet firmware */
+#define E_SOURCE_COBRANET       0x3800 /* 7 >> 1 */
+
+#define E_SOURCE_USER           0x7800
+
+#define CLASS_MASK              0x0700
+
+#define CODE_MASK               0x00FF
+
+/* Bits values */
+
+/* Values for the error/warning bit */
+#define ERROR_VALUE             0x8000
+#define WARNING_VALUE           0x0000
+
+/* Class values */
+#define E_CLASS_GENERAL                  0x0000
+#define E_CLASS_INVALID_CMD              0x0100
+#define E_CLASS_INVALID_STD_OBJECT       0x0200
+#define E_CLASS_RSRC_IMPOSSIBLE          0x0300
+#define E_CLASS_WRONG_CONTEXT            0x0400
+#define E_CLASS_BAD_SPECIFIC_PARAMETER   0x0500
+#define E_CLASS_REAL_TIME_ERROR          0x0600
+#define E_CLASS_DIRECTSHOW               0x0700
+#define E_CLASS_FREE                     0x0700
+
+
+/* Complete DRV error code for the general class */
+#define ED_GN           (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_GENERAL)
+#define ED_CONCURRENCY                  (ED_GN | 0x01)
+#define ED_DSP_CRASHED                  (ED_GN | 0x02)
+#define ED_UNKNOWN_BOARD                (ED_GN | 0x03)
+#define ED_NOT_INSTALLED                (ED_GN | 0x04)
+#define ED_CANNOT_OPEN_SVC_MANAGER      (ED_GN | 0x05)
+#define ED_CANNOT_READ_REGISTRY         (ED_GN | 0x06)
+#define ED_DSP_VERSION_MISMATCH         (ED_GN | 0x07)
+#define ED_UNAVAILABLE_FEATURE          (ED_GN | 0x08)
+#define ED_CANCELLED                    (ED_GN | 0x09)
+#define ED_NO_RESPONSE_AT_IRQA          (ED_GN | 0x10)
+#define ED_INVALID_ADDRESS              (ED_GN | 0x11)
+#define ED_DSP_CORRUPTED                (ED_GN | 0x12)
+#define ED_PENDING_OPERATION            (ED_GN | 0x13)
+#define ED_NET_ALLOCATE_MEMORY_IMPOSSIBLE   (ED_GN | 0x14)
+#define ED_NET_REGISTER_ERROR               (ED_GN | 0x15)
+#define ED_NET_THREAD_ERROR                 (ED_GN | 0x16)
+#define ED_NET_OPEN_ERROR                   (ED_GN | 0x17)
+#define ED_NET_CLOSE_ERROR                  (ED_GN | 0x18)
+#define ED_NET_NO_MORE_PACKET               (ED_GN | 0x19)
+#define ED_NET_NO_MORE_BUFFER               (ED_GN | 0x1A)
+#define ED_NET_SEND_ERROR                   (ED_GN | 0x1B)
+#define ED_NET_RECEIVE_ERROR                (ED_GN | 0x1C)
+#define ED_NET_WRONG_MSG_SIZE               (ED_GN | 0x1D)
+#define ED_NET_WAIT_ERROR                   (ED_GN | 0x1E)
+#define ED_NET_EEPROM_ERROR                 (ED_GN | 0x1F)
+#define ED_INVALID_RS232_COM_NUMBER         (ED_GN | 0x20)
+#define ED_INVALID_RS232_INIT               (ED_GN | 0x21)
+#define ED_FILE_ERROR                       (ED_GN | 0x22)
+#define ED_INVALID_GPIO_CMD                 (ED_GN | 0x23)
+#define ED_RS232_ALREADY_OPENED             (ED_GN | 0x24)
+#define ED_RS232_NOT_OPENED                 (ED_GN | 0x25)
+#define ED_GPIO_ALREADY_OPENED              (ED_GN | 0x26)
+#define ED_GPIO_NOT_OPENED                  (ED_GN | 0x27)
+#define ED_REGISTRY_ERROR                   (ED_GN | 0x28) /* <- NCX */
+#define ED_INVALID_SERVICE                  (ED_GN | 0x29) /* <- NCX */
+
+#define ED_READ_FILE_ALREADY_OPENED	    (ED_GN | 0x2a) /* <- Decalage
+							    * pour RCX
+							    * (old 0x28)
+							    * */
+#define ED_READ_FILE_INVALID_COMMAND	    (ED_GN | 0x2b) /* ~ */
+#define ED_READ_FILE_INVALID_PARAMETER	    (ED_GN | 0x2c) /* ~ */
+#define ED_READ_FILE_ALREADY_CLOSED	    (ED_GN | 0x2d) /* ~ */
+#define ED_READ_FILE_NO_INFORMATION	    (ED_GN | 0x2e) /* ~ */
+#define ED_READ_FILE_INVALID_HANDLE	    (ED_GN | 0x2f) /* ~ */
+#define ED_READ_FILE_END_OF_FILE	    (ED_GN | 0x30) /* ~ */
+#define ED_READ_FILE_ERROR	            (ED_GN | 0x31) /* ~ */
+
+#define ED_DSP_CRASHED_EXC_DSPSTACK_OVERFLOW (ED_GN | 0x32) /* <- Decalage pour
+							     * PCX (old 0x14) */
+#define ED_DSP_CRASHED_EXC_SYSSTACK_OVERFLOW (ED_GN | 0x33) /* ~ */
+#define ED_DSP_CRASHED_EXC_ILLEGAL           (ED_GN | 0x34) /* ~ */
+#define ED_DSP_CRASHED_EXC_TIMER_REENTRY     (ED_GN | 0x35) /* ~ */
+#define ED_DSP_CRASHED_EXC_FATAL_ERROR       (ED_GN | 0x36) /* ~ */
+
+#define ED_FLASH_PCCARD_NOT_PRESENT          (ED_GN | 0x37)
+
+#define ED_NO_CURRENT_CLOCK                  (ED_GN | 0x38)
+
+/* Complete DRV error code for real time class */
+#define ED_RT           (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_REAL_TIME_ERROR)
+#define ED_DSP_TIMED_OUT                (ED_RT | 0x01)
+#define ED_DSP_CHK_TIMED_OUT            (ED_RT | 0x02)
+#define ED_STREAM_OVERRUN               (ED_RT | 0x03)
+#define ED_DSP_BUSY                     (ED_RT | 0x04)
+#define ED_DSP_SEMAPHORE_TIME_OUT       (ED_RT | 0x05)
+#define ED_BOARD_TIME_OUT               (ED_RT | 0x06)
+#define ED_XILINX_ERROR                 (ED_RT | 0x07)
+#define ED_COBRANET_ITF_NOT_RESPONDING  (ED_RT | 0x08)
+
+/* Complete BOARD error code for the invaid standard object class */
+#define EB_ISO          (ERROR_VALUE | E_SOURCE_BOARD | \
+			 E_CLASS_INVALID_STD_OBJECT)
+#define EB_INVALID_EFFECT               (EB_ISO | 0x00)
+#define EB_INVALID_PIPE                 (EB_ISO | 0x40)
+#define EB_INVALID_STREAM               (EB_ISO | 0x80)
+#define EB_INVALID_AUDIO                (EB_ISO | 0xC0)
+
+/* Complete BOARD error code for impossible resource allocation class */
+#define EB_RI           (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_RSRC_IMPOSSIBLE)
+#define EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE (EB_RI | 0x01)
+#define EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE           (EB_RI | 0x02)
+
+#define EB_ALLOCATE_MEM_STREAM_IMPOSSIBLE		\
+	EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE
+#define EB_ALLOCATE_MEM_PIPE_IMPOSSIBLE			\
+	EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE
+
+#define EB_ALLOCATE_DIFFERED_CMD_IMPOSSIBLE     (EB_RI | 0x03)
+#define EB_TOO_MANY_DIFFERED_CMD                (EB_RI | 0x04)
+#define EB_RBUFFERS_TABLE_OVERFLOW              (EB_RI | 0x05)
+#define EB_ALLOCATE_EFFECTS_IMPOSSIBLE          (EB_RI | 0x08)
+#define EB_ALLOCATE_EFFECT_POS_IMPOSSIBLE       (EB_RI | 0x09)
+#define EB_RBUFFER_NOT_AVAILABLE                (EB_RI | 0x0A)
+#define EB_ALLOCATE_CONTEXT_LIII_IMPOSSIBLE     (EB_RI | 0x0B)
+#define EB_STATUS_DIALOG_IMPOSSIBLE             (EB_RI | 0x1D)
+#define EB_CONTROL_CMD_IMPOSSIBLE               (EB_RI | 0x1E)
+#define EB_STATUS_SEND_IMPOSSIBLE               (EB_RI | 0x1F)
+#define EB_ALLOCATE_PIPE_IMPOSSIBLE             (EB_RI | 0x40)
+#define EB_ALLOCATE_STREAM_IMPOSSIBLE           (EB_RI | 0x80)
+#define EB_ALLOCATE_AUDIO_IMPOSSIBLE            (EB_RI | 0xC0)
+
+/* Complete BOARD error code for wrong call context class */
+#define EB_WCC          (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_WRONG_CONTEXT)
+#define EB_CMD_REFUSED                  (EB_WCC | 0x00)
+#define EB_START_STREAM_REFUSED         (EB_WCC | 0xFC)
+#define EB_SPC_REFUSED                  (EB_WCC | 0xFD)
+#define EB_CSN_REFUSED                  (EB_WCC | 0xFE)
+#define EB_CSE_REFUSED                  (EB_WCC | 0xFF)
+
+
+
+
+#endif /* LX_DEFS_H */
-- 
cgit v1.2.3-70-g09d2


From ea20d9293ce423a39717ed4375393129a2e701f9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 08:54:16 -0400
Subject: tracing: consolidate trace and trace_event headers

Impact: clean up

Neil Horman (et. al.) criticized the way the trace events were broken up
into two files. The reason for that was that ftrace needed to separate out
the declarations from where the #include <linux/tracepoint.h> was used.
It then dawned on me that the tracepoint.h header only needs to define the
TRACE_EVENT macro if it is not already defined.

The solution is simply to test if TRACE_EVENT is defined, and if it is not
then the linux/tracepoint.h header can define it. This change consolidates
all the <traces>.h and <traces>_event_types.h into the <traces>.h file.

Reported-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Theodore Tso <tytso@mit.edu>
Reported-by: Jiaying Zhang <jiayingz@google.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h          |   9 +-
 include/trace/irq.h                 |  51 +++++-
 include/trace/irq_event_types.h     |  55 ------
 include/trace/kmem.h                | 189 +++++++++++++++++++-
 include/trace/lockdep.h             |  52 +++++-
 include/trace/lockdep_event_types.h |  57 ------
 include/trace/sched.h               | 333 ++++++++++++++++++++++++++++++++++-
 include/trace/sched_event_types.h   | 337 ------------------------------------
 include/trace/skb.h                 |  36 +++-
 include/trace/skb_event_types.h     |  38 ----
 include/trace/trace_event_types.h   |   7 -
 kernel/trace/events.c               |   1 +
 kernel/trace/trace_events_stage_1.h |   4 +-
 kernel/trace/trace_events_stage_2.h |   8 +-
 kernel/trace/trace_events_stage_3.h |   4 +-
 15 files changed, 663 insertions(+), 518 deletions(-)
 delete mode 100644 include/trace/irq_event_types.h
 delete mode 100644 include/trace/lockdep_event_types.h
 delete mode 100644 include/trace/sched_event_types.h
 delete mode 100644 include/trace/skb_event_types.h
 delete mode 100644 include/trace/trace_event_types.h

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d35a7ee7611..4353f3f7e62 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,8 @@ struct tracepoint {
 					 * Keep in sync with vmlinux.lds.h.
 					 */
 
+#ifndef DECLARE_TRACE
+
 #define TP_PROTO(args...)	args
 #define TP_ARGS(args...)		args
 
@@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end)
 { }
 #endif /* CONFIG_TRACEPOINTS */
+#endif /* DECLARE_TRACE */
 
 /*
  * Connect a probe to a tracepoint.
@@ -154,10 +157,13 @@ static inline void tracepoint_synchronize_unregister(void)
 }
 
 #define PARAMS(args...) args
+
+#ifndef TRACE_FORMAT
 #define TRACE_FORMAT(name, proto, args, fmt)		\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
-
+#ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
  *
@@ -262,5 +268,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
 #endif
diff --git a/include/trace/irq.h b/include/trace/irq.h
index ff5d4495dc3..04ab4c65222 100644
--- a/include/trace/irq.h
+++ b/include/trace/irq.h
@@ -1,9 +1,54 @@
-#ifndef _TRACE_IRQ_H
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_IRQ_H
 
-#include <linux/interrupt.h>
 #include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+/*
+ * Tracepoint for entry of interrupt handler:
+ */
+TRACE_FORMAT(irq_handler_entry,
+	TP_PROTO(int irq, struct irqaction *action),
+	TP_ARGS(irq, action),
+	TP_FMT("irq=%d handler=%s", irq, action->name)
+	);
+
+/*
+ * Tracepoint for return of an interrupt handler:
+ */
+TRACE_EVENT(irq_handler_exit,
+
+	TP_PROTO(int irq, struct irqaction *action, int ret),
+
+	TP_ARGS(irq, action, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq	)
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq	= irq;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("irq=%d return=%s",
+		  __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+TRACE_FORMAT(softirq_entry,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
 
-#include <trace/irq_event_types.h>
+TRACE_FORMAT(softirq_exit,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
 
 #endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644
index 85964ebd47e..00000000000
--- a/include/trace/irq_event_types.h
+++ /dev/null
@@ -1,55 +0,0 @@
-
-/* use <trace/irq.h> instead */
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-	TP_PROTO(int irq, struct irqaction *action),
-	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-	TP_PROTO(int irq, struct irqaction *action, int ret),
-
-	TP_ARGS(irq, action, ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	irq	)
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->irq	= irq;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("irq=%d return=%s",
-		  __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-TRACE_FORMAT(softirq_exit,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
index 46efc2423f0..d7d12189e5c 100644
--- a/include/trace/kmem.h
+++ b/include/trace/kmem.h
@@ -1,9 +1,192 @@
-#ifndef _TRACE_KMEM_H
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_KMEM_H
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
-#include <trace/kmem_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
 
-#endif /* _TRACE_KMEM_H */
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+#endif
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
index 5ca67df87f2..8ee7900b38c 100644
--- a/include/trace/lockdep.h
+++ b/include/trace/lockdep.h
@@ -1,9 +1,57 @@
-#ifndef _TRACE_LOCKDEP_H
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_LOCKDEP_H
 
 #include <linux/lockdep.h>
 #include <linux/tracepoint.h>
 
-#include <trace/lockdep_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lock
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_FORMAT(lock_acquire,
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+	TP_FMT("%s%s%s", trylock ? "try " : "",
+		read ? "read " : "", lock->name)
+	);
+
+TRACE_FORMAT(lock_release,
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+	TP_ARGS(lock, nested, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_FORMAT(lock_contended,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+	TP_ARGS(lock, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__entry->name = lock->name;
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
 
 #endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644
index 863f1e4583a..00000000000
--- a/include/trace/lockdep_event_types.h
+++ /dev/null
@@ -1,57 +0,0 @@
-
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
-
-TRACE_FORMAT(lock_release,
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-TRACE_EVENT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-
-	TP_ARGS(lock, ip, waittime),
-
-	TP_STRUCT__entry(
-		__field(const char *, name)
-		__field(unsigned long, wait_usec)
-		__field(unsigned long, wait_nsec_rem)
-	),
-	TP_fast_assign(
-		__entry->name = lock->name;
-		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
-		__entry->wait_usec = (unsigned long) waittime;
-	),
-	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
-				       __entry->wait_nsec_rem)
-);
-
-#endif
-#endif
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
index 4e372a1a29b..5b1cf4a2846 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -1,9 +1,336 @@
-#ifndef _TRACE_SCHED_H
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SCHED_H
 
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-#include <trace/sched_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
 
-#endif
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+		__entry->pid	= t->pid;
+	),
+
+	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+);
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+	TP_PROTO(int ret),
+
+	TP_ARGS(ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->ret	= ret;
+	),
+
+	TP_printk("ret %d", __entry->ret)
+);
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wait_task,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p),
+
+	TP_ARGS(rq, p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->prio	= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for waking up a task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for waking up a new task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup_new,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_switch,
+
+	TP_PROTO(struct rq *rq, struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(rq, prev, next),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	int,	prev_prio			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	int,	next_prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_prio	= prev->prio;
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		__entry->next_prio	= next->prio;
+	),
+
+	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->next_comm, __entry->next_pid, __entry->next_prio)
+);
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+
+	TP_ARGS(p, orig_cpu, dest_cpu),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	orig_cpu		)
+		__field(	int,	dest_cpu		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->orig_cpu	= orig_cpu;
+		__entry->dest_cpu	= dest_cpu;
+	),
+
+	TP_printk("task %s:%d [%d] from: %d  to: %d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->orig_cpu, __entry->dest_cpu)
+);
+
+/*
+ * Tracepoint for freeing a task:
+ */
+TRACE_EVENT(sched_process_free,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a task exiting:
+ */
+TRACE_EVENT(sched_process_exit,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+	TP_PROTO(struct pid *pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->pid		= pid_nr(pid);
+		__entry->prio		= current->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+	TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+	TP_ARGS(parent, child),
+
+	TP_STRUCT__entry(
+		__array(	char,	parent_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	parent_pid			)
+		__array(	char,	child_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	child_pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+		__entry->parent_pid	= parent->pid;
+		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+		__entry->child_pid	= child->pid;
+	),
+
+	TP_printk("parent %s:%d  child %s:%d",
+		__entry->parent_comm, __entry->parent_pid,
+		__entry->child_comm, __entry->child_pid)
+);
+
+/*
+ * Tracepoint for sending a signal:
+ */
+TRACE_EVENT(sched_signal_send,
+
+	TP_PROTO(int sig, struct task_struct *p),
+
+	TP_ARGS(sig, p),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig			)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->sig	= sig;
+	),
+
+	TP_printk("sig: %d  task %s:%d",
+		  __entry->sig, __entry->comm, __entry->pid)
+);
+
+#endif /* _TRACE_SCHED_H */
diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h
deleted file mode 100644
index 63547dc1125..00000000000
--- a/include/trace/sched_event_types.h
+++ /dev/null
@@ -1,337 +0,0 @@
-
-/* use <trace/sched.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM sched
-
-/*
- * Tracepoint for calling kthread_stop, performed to end a kthread:
- */
-TRACE_EVENT(sched_kthread_stop,
-
-	TP_PROTO(struct task_struct *t),
-
-	TP_ARGS(t),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
-		__entry->pid	= t->pid;
-	),
-
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
-);
-
-/*
- * Tracepoint for the return value of the kthread stopping:
- */
-TRACE_EVENT(sched_kthread_stop_ret,
-
-	TP_PROTO(int ret),
-
-	TP_ARGS(ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->ret	= ret;
-	),
-
-	TP_printk("ret %d", __entry->ret)
-);
-
-/*
- * Tracepoint for waiting on task to unschedule:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wait_task,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p),
-
-	TP_ARGS(rq, p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->prio	= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for waking up a task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for waking up a new task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup_new,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for task switches, performed by the scheduler:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_switch,
-
-	TP_PROTO(struct rq *rq, struct task_struct *prev,
-		 struct task_struct *next),
-
-	TP_ARGS(rq, prev, next),
-
-	TP_STRUCT__entry(
-		__array(	char,	prev_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	prev_pid			)
-		__field(	int,	prev_prio			)
-		__array(	char,	next_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	next_pid			)
-		__field(	int,	next_prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
-		__entry->prev_pid	= prev->pid;
-		__entry->prev_prio	= prev->prio;
-		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
-		__entry->next_pid	= next->pid;
-		__entry->next_prio	= next->prio;
-	),
-
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
-		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->next_comm, __entry->next_pid, __entry->next_prio)
-);
-
-/*
- * Tracepoint for a task being migrated:
- */
-TRACE_EVENT(sched_migrate_task,
-
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
-
-	TP_ARGS(p, orig_cpu, dest_cpu),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	orig_cpu		)
-		__field(	int,	dest_cpu		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
-		__entry->dest_cpu	= dest_cpu;
-	),
-
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->orig_cpu, __entry->dest_cpu)
-);
-
-/*
- * Tracepoint for freeing a task:
- */
-TRACE_EVENT(sched_process_free,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a task exiting:
- */
-TRACE_EVENT(sched_process_exit,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a waiting task:
- */
-TRACE_EVENT(sched_process_wait,
-
-	TP_PROTO(struct pid *pid),
-
-	TP_ARGS(pid),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-		__entry->pid		= pid_nr(pid);
-		__entry->prio		= current->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for do_fork:
- */
-TRACE_EVENT(sched_process_fork,
-
-	TP_PROTO(struct task_struct *parent, struct task_struct *child),
-
-	TP_ARGS(parent, child),
-
-	TP_STRUCT__entry(
-		__array(	char,	parent_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	parent_pid			)
-		__array(	char,	child_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	child_pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
-		__entry->parent_pid	= parent->pid;
-		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
-		__entry->child_pid	= child->pid;
-	),
-
-	TP_printk("parent %s:%d  child %s:%d",
-		__entry->parent_comm, __entry->parent_pid,
-		__entry->child_comm, __entry->child_pid)
-);
-
-/*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-	TP_PROTO(int sig, struct task_struct *p),
-
-	TP_ARGS(sig, p),
-
-	TP_STRUCT__entry(
-		__field(	int,	sig			)
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->sig	= sig;
-	),
-
-	TP_printk("sig: %d  task %s:%d",
-		  __entry->sig, __entry->comm, __entry->pid)
-);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/skb.h b/include/trace/skb.h
index d2de7174a6e..e6fd281f7f8 100644
--- a/include/trace/skb.h
+++ b/include/trace/skb.h
@@ -1,9 +1,37 @@
-#ifndef _TRACE_SKB_H_
-#define _TRACE_SKB_H_
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
 
 #include <linux/skbuff.h>
 #include <linux/tracepoint.h>
 
-#include <trace/skb_event_types.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
 
-#endif
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h
deleted file mode 100644
index 4a1c504c0e1..00000000000
--- a/include/trace/skb_event_types.h
+++ /dev/null
@@ -1,38 +0,0 @@
-
-/* use <trace/skb.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM skb
-
-/*
- * Tracepoint for free an sk_buff:
- */
-TRACE_EVENT(kfree_skb,
-
-	TP_PROTO(struct sk_buff *skb, void *location),
-
-	TP_ARGS(skb, location),
-
-	TP_STRUCT__entry(
-		__field(	void *,		skbaddr		)
-		__field(	unsigned short,	protocol	)
-		__field(	void *,		location	)
-	),
-
-	TP_fast_assign(
-		__entry->skbaddr = skb;
-		if (skb) {
-			__entry->protocol = ntohs(skb->protocol);
-		}
-		__entry->location = location;
-	),
-
-	TP_printk("skbaddr=%p protocol=%u location=%p",
-		__entry->skbaddr, __entry->protocol, __entry->location)
-);
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644
index 552a50e169a..00000000000
--- a/include/trace/trace_event_types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* trace/<type>_event_types.h here */
-
-#include <trace/sched_event_types.h>
-#include <trace/irq_event_types.h>
-#include <trace/lockdep_event_types.h>
-#include <trace/skb_event_types.h>
-#include <trace/kmem_event_types.h>
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
index 246f2aa6dc4..5a35a914f0e 100644
--- a/kernel/trace/events.c
+++ b/kernel/trace/events.c
@@ -8,6 +8,7 @@
 
 #include "trace_output.h"
 
+#define TRACE_HEADER_MULTI_READ
 #include "trace_events_stage_1.h"
 #include "trace_events_stage_2.h"
 #include "trace_events_stage_3.h"
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
index 38985f9b379..475f46a047a 100644
--- a/kernel/trace/trace_events_stage_1.h
+++ b/kernel/trace/trace_events_stage_1.h
@@ -1,7 +1,7 @@
 /*
  * Stage 1 of the trace events.
  *
- * Override the macros in <trace/trace_event_types.h> to include the following:
+ * Override the macros in <trace/trace_events.h> to include the following:
  *
  * struct ftrace_raw_<call> {
  *	struct trace_entry		ent;
@@ -36,4 +36,4 @@
 	};							\
 	static struct ftrace_event_call event_##name
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 59cfd7dfe68..aa4a67a0656 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -1,7 +1,7 @@
 /*
  * Stage 2 of the trace events.
  *
- * Override the macros in <trace/trace_event_types.h> to include the following:
+ * Override the macros in <trace/trace_events.h> to include the following:
  *
  * enum print_line_t
  * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
@@ -64,7 +64,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	return TRACE_TYPE_HANDLED;					\
 }
 	
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
 
 /*
  * Setup the showing format of trace point.
@@ -128,7 +128,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 	return ret;							\
 }
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
 
 #undef __field
 #define __field(type, item)						\
@@ -167,4 +167,4 @@ ftrace_define_fields_##call(void)					\
 	return ret;							\
 }
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 5bb1b7ffbdb..45c04e1f38d 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -1,7 +1,7 @@
 /*
  * Stage 3 of the trace events.
  *
- * Override the macros in <trace/trace_event_types.h> to include the following:
+ * Override the macros in <trace/trace_events.h> to include the following:
  *
  * static void ftrace_event_<call>(proto)
  * {
@@ -272,7 +272,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	_TRACE_PROFILE_INIT(call)					\
 }
 
-#include <trace/trace_event_types.h>
+#include <trace/trace_events.h>
 
 #undef _TRACE_PROFILE
 #undef _TRACE_PROFILE_INIT
-- 
cgit v1.2.3-70-g09d2


From 78ddb08feb7d4fbe3c0a9931804c51ee58be4023 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 14 Apr 2009 16:53:05 +0200
Subject: wait: don't use __wake_up_common()

'777c6c5 wait: prevent exclusive waiter starvation' made
__wake_up_common() global to be used from abort_exclusive_wait().

It was needed to do a wake-up with the waitqueue lock held while
passing down a key to the wake-up function.

Since '4ede816 epoll keyed wakeups: add __wake_up_locked_key() and
__wake_up_sync_key()' there is an appropriate wrapper for this case:
__wake_up_locked_key().

Use it here and make __wake_up_common() private to the scheduler
again.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1239720785-19661-1-git-send-email-hannes@cmpxchg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h | 2 --
 kernel/sched.c       | 2 +-
 kernel/wait.c        | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 5d631c17eae..67d4e89b62d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
 	list_del(&old->task_list);
 }
 
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-			int nr_exclusive, int sync, void *key);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
diff --git a/kernel/sched.c b/kernel/sched.c
index 36d213bca47..92b4b56ad09 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5345,7 +5345,7 @@ EXPORT_SYMBOL(default_wake_function);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, int sync, void *key)
 {
 	wait_queue_t *curr, *next;
diff --git a/kernel/wait.c b/kernel/wait.c
index 42a2dbc181c..ea7c3b4275c 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
 	if (!list_empty(&wait->task_list))
 		list_del_init(&wait->task_list);
 	else if (waitqueue_active(q))
-		__wake_up_common(q, mode, 1, 0, key);
+		__wake_up_locked_key(q, mode, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(abort_exclusive_wait);
-- 
cgit v1.2.3-70-g09d2


From 72c6a9870f901045f2464c3dc6ee8914bfdc07aa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 14 Apr 2009 17:33:57 +0200
Subject: rculist.h: introduce list_entry_rcu() and list_first_entry_rcu()

I've run into the situation where I need to use list_first_entry with
rcu-guarded list. This patch introduces this.

Also simplify list_for_each_entry_rcu() to use new list_entry_rcu()
instead of list_entry().

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
LKML-Reference: <20090414153356.GC3999@psychotron.englab.brq.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rculist.h | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e649bd3f2c9..5710f43bbc9 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	at->prev = last;
 }
 
+/**
+ * list_entry_rcu - get the struct for this entry
+ * @ptr:        the &struct list_head pointer.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_entry_rcu(ptr, type, member) \
+	container_of(rcu_dereference(ptr), type, member)
+
+/**
+ * list_first_entry_rcu - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_first_entry_rcu(ptr, type, member) \
+	list_entry_rcu((ptr)->next, type, member)
+
 #define __list_for_each_rcu(pos, head) \
 	for (pos = rcu_dereference((head)->next); \
 		pos != (head); \
@@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
+	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
 		prefetch(pos->member.next), &pos->member != (head); \
-		pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
+		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
 /**
-- 
cgit v1.2.3-70-g09d2


From a8d154b009168337494fbf345671bab74d3e4b8b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 09:36:00 -0400
Subject: tracing: create automated trace defines

This patch lowers the number of places a developer must modify to add
new tracepoints. The current method to add a new tracepoint
into an existing system is to write the trace point macro in the
trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or
DECLARE_TRACE, then they must add the same named item into the C file
with the macro DEFINE_TRACE(name) and then add the trace point.

This change cuts out the needing to add the DEFINE_TRACE(name).
Every file that uses the tracepoint must still include the trace/<type>.h
file, but the one C file must also add a define before the including
of that file.

 #define CREATE_TRACE_POINTS
 #include <trace/mytrace.h>

This will cause the trace/mytrace.h file to also produce the C code
necessary to implement the trace point.

Note, if more than one trace/<type>.h is used to create the C code
it is best to list them all together.

 #define CREATE_TRACE_POINTS
 #include <trace/foo.h>
 #include <trace/bar.h>
 #include <trace/fido.h>

Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with
the cleaner solution of the define above the includes over my first
design to have the C code include a "special" header.

This patch converts sched, irq and lockdep and skb to use this new
method.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/define_trace.h | 75 ++++++++++++++++++++++++++++++++++++++++++++
 include/trace/irq.h          |  5 ++-
 include/trace/kmem.h         |  4 ++-
 include/trace/lockdep.h      |  3 ++
 include/trace/sched.h        |  3 ++
 include/trace/skb.h          |  3 ++
 kernel/exit.c                |  4 ---
 kernel/fork.c                |  2 --
 kernel/irq/handle.c          |  7 ++---
 kernel/kthread.c             |  3 --
 kernel/lockdep.c             | 12 ++-----
 kernel/sched.c               | 10 ++----
 kernel/signal.c              |  2 --
 kernel/softirq.c             |  3 --
 mm/util.c                    | 11 ++-----
 net/core/net-traces.c        |  4 +--
 16 files changed, 105 insertions(+), 46 deletions(-)
 create mode 100644 include/trace/define_trace.h

(limited to 'include')

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644
index 00000000000..de9dc7d8508
--- /dev/null
+++ b/include/trace/define_trace.h
@@ -0,0 +1,75 @@
+/*
+ * Trace files that want to automate creationg of all tracepoints defined
+ * in their file should include this file. The following are macros that the
+ * trace file may define:
+ *
+ * TRACE_SYSTEM defines the system the tracepoint is for
+ *
+ * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
+ *     This macro may be defined to tell define_trace.h what file to include.
+ *     Note, leave off the ".h".
+ *
+ * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
+ *     then this macro can define the path to use. Note, the path is relative to
+ *     define_trace.h, not the file including it. Full path names for out of tree
+ *     modules must be used.
+ */
+
+#ifdef CREATE_TRACE_POINTS
+
+/* Prevent recursion */
+#undef CREATE_TRACE_POINTS
+
+#include <linux/stringify.h>
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	DEFINE_TRACE(name)
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(name, proto, args, print)	\
+	DEFINE_TRACE(name)
+
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)	\
+	DEFINE_TRACE(name)
+
+#undef TRACE_INCLUDE
+#undef __TRACE_INCLUDE
+
+#ifndef TRACE_INCLUDE_FILE
+# define TRACE_INCLUDE_FILE TRACE_SYSTEM
+# define UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifndef TRACE_INCLUDE_PATH
+# define __TRACE_INCLUDE(system) <trace/system.h>
+# define UNDEF_TRACE_INCLUDE_FILE
+#else
+# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
+#endif
+
+# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
+
+/* Let the trace headers be reread */
+#define TRACE_HEADER_MULTI_READ
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef TRACE_HEADER_MULTI_READ
+
+/* Only undef what we defined in this file */
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+/* We may be processing more files */
+#define CREATE_TRACE_POINTS
+
+#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/irq.h b/include/trace/irq.h
index 04ab4c65222..75e3468e449 100644
--- a/include/trace/irq.h
+++ b/include/trace/irq.h
@@ -51,4 +51,7 @@ TRACE_FORMAT(softirq_exit,
 	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
 	);
 
-#endif
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
index d7d12189e5c..c22c42f980b 100644
--- a/include/trace/kmem.h
+++ b/include/trace/kmem.h
@@ -188,5 +188,7 @@ TRACE_EVENT(kmem_cache_free,
 
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
+#endif /* _TRACE_KMEM_H */
 
-#endif
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
index 8ee7900b38c..4d301e758de 100644
--- a/include/trace/lockdep.h
+++ b/include/trace/lockdep.h
@@ -55,3 +55,6 @@ TRACE_EVENT(lock_acquired,
 #endif
 
 #endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/sched.h b/include/trace/sched.h
index 5b1cf4a2846..ffa1cab586b 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -334,3 +334,6 @@ TRACE_EVENT(sched_signal_send,
 );
 
 #endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/skb.h b/include/trace/skb.h
index e6fd281f7f8..1e8fabb57c0 100644
--- a/include/trace/skb.h
+++ b/include/trace/skb.h
@@ -35,3 +35,6 @@ TRACE_EVENT(kfree_skb,
 );
 
 #endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3b95c..2fe9d2c7eee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,10 +56,6 @@
 #include <asm/mmu_context.h>
 #include "cred-internals.h"
 
-DEFINE_TRACE(sched_process_free);
-DEFINE_TRACE(sched_process_exit);
-DEFINE_TRACE(sched_process_wait);
-
 static void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd0072..4bebf263923 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -83,8 +83,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
-DEFINE_TRACE(sched_process_fork);
-
 int nr_processes(void)
 {
 	int cpu;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d82142be8dd..983d8be8dff 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,9 +17,11 @@
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <trace/irq.h>
 #include <linux/bootmem.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/irq.h>
+
 #include "internals.h"
 
 /*
@@ -348,9 +350,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
 	       "but no thread function available.", irq, action->name);
 }
 
-DEFINE_TRACE(irq_handler_entry);
-DEFINE_TRACE(irq_handler_exit);
-
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ebaf8519ab..e1c76924545 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-DEFINE_TRACE(sched_kthread_stop);
-DEFINE_TRACE(sched_kthread_stop_ret);
-
 struct kthread_create_info
 {
 	/* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c4582a6ea95..257f21a76c5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -42,12 +42,14 @@
 #include <linux/hash.h>
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
-#include <trace/lockdep.h>
 
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/lockdep.h>
+
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
 module_param(prove_locking, int, 0644);
@@ -2929,8 +2931,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
 }
 EXPORT_SYMBOL_GPL(lock_set_class);
 
-DEFINE_TRACE(lock_acquire);
-
 /*
  * We are not always called with irqs disabled - do that here,
  * and also avoid lockdep recursion:
@@ -2957,8 +2957,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 }
 EXPORT_SYMBOL_GPL(lock_acquire);
 
-DEFINE_TRACE(lock_release);
-
 void lock_release(struct lockdep_map *lock, int nested,
 			  unsigned long ip)
 {
@@ -3061,8 +3059,6 @@ found_it:
 	put_lock_stats(stats);
 }
 
-DEFINE_TRACE(lock_acquired);
-
 static void
 __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
@@ -3118,8 +3114,6 @@ found_it:
 	lock->ip = ip;
 }
 
-DEFINE_TRACE(lock_contended);
-
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index 5724508c3b6..e6d4518d47e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -72,13 +72,15 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/sched.h>
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
-DEFINE_TRACE(sched_wait_task);
-DEFINE_TRACE(sched_wakeup);
-DEFINE_TRACE(sched_wakeup_new);
-DEFINE_TRACE(sched_switch);
-DEFINE_TRACE(sched_migrate_task);
-
 #ifdef CONFIG_SMP
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
diff --git a/kernel/signal.c b/kernel/signal.c
index d8034737db4..1d5703ff003 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,8 +41,6 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
-DEFINE_TRACE(sched_signal_send);
-
 static void __user *sig_handler(struct task_struct *t, int sig)
 {
 	return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2fecefacdc5..a2d9b458ac2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -186,9 +186,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
  */
 #define MAX_SOFTIRQ_RESTART 10
 
-DEFINE_TRACE(softirq_entry);
-DEFINE_TRACE(softirq_exit);
-
 asmlinkage void __do_softirq(void)
 {
 	struct softirq_action *h;
diff --git a/mm/util.c b/mm/util.c
index 2599e83eea1..0e74a22791c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/sched.h>
-#include <linux/tracepoint.h>
 #include <asm/uaccess.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/kmem.h>
+
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
@@ -239,13 +241,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
 /* Tracepoints definitions. */
-DEFINE_TRACE(kmalloc);
-DEFINE_TRACE(kmem_cache_alloc);
-DEFINE_TRACE(kmalloc_node);
-DEFINE_TRACE(kmem_cache_alloc_node);
-DEFINE_TRACE(kfree);
-DEFINE_TRACE(kmem_cache_free);
-
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb45665e4..80177205947 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,11 @@
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
-#include <trace/skb.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/skb.h>
 
-DEFINE_TRACE(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
-- 
cgit v1.2.3-70-g09d2


From 9504504cbab29ecb694186b1c5b15d3579c43c51 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 11 Apr 2009 12:59:57 -0400
Subject: tracing: make trace_seq operations available for core kernel

In the process to make TRACE_EVENT macro work for modules, the trace_seq
operations must be available for core kernel code.

These operations are quite useful and can be used for other implementations.

The main idea is that we create a trace_seq handle that acts very much
like the seq_file handle.

	struct trace_seq *s = kmalloc(sizeof(*s, GFP_KERNEL);

	trace_seq_init(s);
	trace_seq_printf(s, "some data %d\n", variable);

	printk("%s", s->buffer);

The main use is to allow a top level function call several other functions
that may store printf like data into the buffer. Then at the end, the top
level function can process all the data with any method it would like to.
It could be passed to userspace, output via printk or even use seq_file:

	trace_seq_to_user(s, ubuf, cnt);
	seq_puts(m, s->buffer);

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h   | 89 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h        | 15 +-------
 kernel/trace/trace_output.h | 16 +-------
 3 files changed, 92 insertions(+), 28 deletions(-)
 create mode 100644 include/linux/trace_seq.h

(limited to 'include')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644
index 00000000000..28051da876d
--- /dev/null
+++ b/include/linux/trace_seq.h
@@ -0,0 +1,89 @@
+#ifndef _LINUX_TRACE_SEQ_H
+#define _LINUX_TRACE_SEQ_H
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE.
+ */
+
+struct trace_seq {
+	unsigned char		buffer[PAGE_SIZE];
+	unsigned int		len;
+	unsigned int		readpos;
+};
+
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/*
+ * Currently only defined when tracing is enabled.
+ */
+#ifdef CONFIG_TRACING
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt);
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				size_t len);
+extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+extern int trace_seq_path(struct trace_seq *s, struct path *path);
+
+#else /* CONFIG_TRACING */
+static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)))
+{
+	return 0;
+}
+static inline int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+	return 0;
+}
+
+static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+}
+static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+				 size_t cnt)
+{
+	return 0;
+}
+static inline int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+	return 0;
+}
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c);
+{
+	return 0;
+}
+static inline int
+trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+{
+	return 0;
+}
+static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+				       size_t len)
+{
+	return 0;
+}
+static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+	return NULL;
+}
+static inline int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+	return 0;
+}
+#endif /* CONFIG_TRACING */
+
+#endif /* _LINUX_TRACE_SEQ_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b05b6ac982a..1882846b738 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -12,6 +12,8 @@
 #include <linux/kmemtrace.h>
 #include <trace/power.h>
 
+#include <linux/trace_seq.h>
+
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
 
@@ -423,19 +425,6 @@ struct tracer {
 	struct tracer_stat	*stats;
 };
 
-struct trace_seq {
-	unsigned char		buffer[PAGE_SIZE];
-	unsigned int		len;
-	unsigned int		readpos;
-};
-
-static inline void
-trace_seq_init(struct trace_seq *s)
-{
-	s->len = 0;
-	s->readpos = 0;
-}
-
 
 #define TRACE_PIPE_ALL_CPU	-1
 
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 91630217fb4..5c7cbfb65c7 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,6 +1,7 @@
 #ifndef __TRACE_EVENTS_H
 #define __TRACE_EVENTS_H
 
+#include <linux/trace_seq.h>
 #include "trace.h"
 
 typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
@@ -20,24 +21,9 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
 trace_print_printk_msg_only(struct trace_iterator *iter);
 
-extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
-
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
-extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-				size_t len);
-extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
-extern int trace_seq_path(struct trace_seq *s, struct path *path);
 extern int seq_print_userip_objs(const struct userstack_entry *entry,
 				 struct trace_seq *s, unsigned long sym_flags);
 extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
-- 
cgit v1.2.3-70-g09d2


From 97f2025153499faa17267a0d4e18c7afaf73f39d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 13 Apr 2009 11:20:49 -0400
Subject: tracing/events: move declarations from trace directory to core
 include

In preparation to allowing trace events to happen in modules, we need
to move some of the local declarations in the kernel/trace directory
into include/linux.

This patch simply moves the declarations and performs no context changes.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 146 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h         | 120 +----------------------------------
 kernel/trace/trace_output.h  |  14 -----
 3 files changed, 147 insertions(+), 133 deletions(-)
 create mode 100644 include/linux/ftrace_event.h

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644
index 00000000000..496b76d9f9d
--- /dev/null
+++ b/include/linux/ftrace_event.h
@@ -0,0 +1,146 @@
+#ifndef _LINUX_FTRACE_EVENT_H
+#define _LINUX_FTRACE_EVENT_H
+
+#include <linux/trace_seq.h>
+#include <linux/ring_buffer.h>
+
+
+struct trace_array;
+struct tracer;
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+	unsigned char		type;
+	unsigned char		flags;
+	unsigned char		preempt_count;
+	int			pid;
+	int			tgid;
+};
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+	struct trace_array	*tr;
+	struct tracer		*trace;
+	void			*private;
+	int			cpu_file;
+	struct mutex		mutex;
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+
+	/* The below is zeroed out in pipe_read */
+	struct trace_seq	seq;
+	struct trace_entry	*ent;
+	int			cpu;
+	u64			ts;
+
+	unsigned long		iter_flags;
+	loff_t			pos;
+	long			idx;
+
+	cpumask_var_t		started;
+};
+
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+					      int flags);
+struct trace_event {
+	struct hlist_node	node;
+	int			type;
+	trace_print_func	trace;
+	trace_print_func	raw;
+	trace_print_func	hex;
+	trace_print_func	binary;
+};
+
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+/* Return values for print_line callback */
+enum print_line_t {
+	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+	TRACE_TYPE_HANDLED	= 1,
+	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
+	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
+};
+
+
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+				  unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
+
+void tracing_record_cmdline(struct task_struct *tsk);
+
+struct ftrace_event_call {
+	char			*name;
+	char			*system;
+	struct dentry		*dir;
+	int			enabled;
+	int			(*regfunc)(void);
+	void			(*unregfunc)(void);
+	int			id;
+	int			(*raw_init)(void);
+	int			(*show_format)(struct trace_seq *s);
+	int			(*define_fields)(void);
+	struct list_head	fields;
+	int			n_preds;
+	struct filter_pred	**preds;
+
+#ifdef CONFIG_EVENT_PROFILE
+	atomic_t	profile_count;
+	int		(*profile_enable)(struct ftrace_event_call *);
+	void		(*profile_disable)(struct ftrace_event_call *);
+#endif
+};
+
+#define MAX_FILTER_PRED		8
+#define MAX_FILTER_STR_VAL	128
+
+extern int init_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_current_check_discard(struct ftrace_event_call *call,
+					void *rec,
+					struct ring_buffer_event *event);
+
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+			      char *name, int offset, int size);
+
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)				\
+do {									\
+	__trace_printk_check_format(fmt, ##args);			\
+	tracing_record_cmdline(current);				\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt			\
+		  __attribute__((section("__trace_printk_fmt"))) =	\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
+	} else								\
+		__trace_printk(ip, fmt, ##args);			\
+} while (0)
+
+#define __common_field(type, item)					\
+	ret = trace_define_field(event_call, #type, "common_" #item,	\
+				 offsetof(typeof(field.ent), item),	\
+				 sizeof(field.ent.item));		\
+	if (ret)							\
+		return ret;
+
+#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1882846b738..6bcdf4af9b2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,6 +13,7 @@
 #include <trace/power.h>
 
 #include <linux/trace_seq.h>
+#include <linux/ftrace_event.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -43,20 +44,6 @@ enum trace_type {
 	__TRACE_LAST_TYPE,
 };
 
-/*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-	unsigned char		type;
-	unsigned char		flags;
-	unsigned char		preempt_count;
-	int			pid;
-	int			tgid;
-};
-
 /*
  * Function trace entry - function address and parent function addres:
  */
@@ -265,8 +252,6 @@ struct trace_array_cpu {
 	char			comm[TASK_COMM_LEN];
 };
 
-struct trace_iterator;
-
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -341,15 +326,6 @@ extern void __ftrace_bad_type(void);
 		__ftrace_bad_type();					\
 	} while (0)
 
-/* Return values for print_line callback */
-enum print_line_t {
-	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
-	TRACE_TYPE_HANDLED	= 1,
-	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
-	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
-};
-
-
 /*
  * An option specific to a tracer. This is a boolean value.
  * The bit is the bit index that sets its value on the
@@ -428,31 +404,6 @@ struct tracer {
 
 #define TRACE_PIPE_ALL_CPU	-1
 
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-	struct trace_array	*tr;
-	struct tracer		*trace;
-	void			*private;
-	int			cpu_file;
-	struct mutex		mutex;
-	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
-
-	/* The below is zeroed out in pipe_read */
-	struct trace_seq	seq;
-	struct trace_entry	*ent;
-	int			cpu;
-	u64			ts;
-
-	unsigned long		iter_flags;
-	loff_t			pos;
-	long			idx;
-
-	cpumask_var_t		started;
-};
-
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
@@ -479,15 +430,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
 
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
-				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc);
-void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
-
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 
@@ -510,7 +452,6 @@ void tracing_sched_switch_trace(struct trace_array *tr,
 				struct task_struct *prev,
 				struct task_struct *next,
 				unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
 				struct task_struct *wakee,
@@ -790,28 +731,6 @@ struct ftrace_event_field {
 	int			size;
 };
 
-struct ftrace_event_call {
-	char			*name;
-	char			*system;
-	struct dentry		*dir;
-	int			enabled;
-	int			(*regfunc)(void);
-	void			(*unregfunc)(void);
-	int			id;
-	int			(*raw_init)(void);
-	int			(*show_format)(struct trace_seq *s);
-	int			(*define_fields)(void);
-	struct list_head	fields;
-	int			n_preds;
-	struct filter_pred	**preds;
-
-#ifdef CONFIG_EVENT_PROFILE
-	atomic_t	profile_count;
-	int		(*profile_enable)(struct ftrace_event_call *);
-	void		(*profile_disable)(struct ftrace_event_call *);
-#endif
-};
-
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
@@ -825,9 +744,6 @@ struct event_subsystem {
 	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
 	     event++)
 
-#define MAX_FILTER_PRED		8
-#define MAX_FILTER_STR_VAL	128
-
 struct filter_pred;
 
 typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
@@ -845,9 +761,6 @@ struct filter_pred {
 	int clear;
 };
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size);
-extern int init_preds(struct ftrace_event_call *call);
 extern void filter_free_pred(struct filter_pred *pred);
 extern void filter_print_preds(struct filter_pred **preds, int n_preds,
 			       struct trace_seq *s);
@@ -855,13 +768,9 @@ extern int filter_parse(char **pbuf, struct filter_pred *pred);
 extern int filter_add_pred(struct ftrace_event_call *call,
 			   struct filter_pred *pred);
 extern void filter_disable_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern void filter_free_subsystem_preds(struct event_subsystem *system);
 extern int filter_add_subsystem_pred(struct event_subsystem *system,
 				     struct filter_pred *pred);
-extern int filter_current_check_discard(struct ftrace_event_call *call,
-					void *rec,
-					struct ring_buffer_event *event);
 
 static inline int
 filter_check_discard(struct ftrace_event_call *call, void *rec,
@@ -876,14 +785,6 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
-#define __common_field(type, item)					\
-	ret = trace_define_field(event_call, #type, "common_" #item,	\
-				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
-	if (ret)							\
-		return ret;
-
-void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
 
@@ -895,25 +796,6 @@ extern struct ftrace_event_call __stop_ftrace_events[];
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement optimizing out.
- */
-#define event_trace_printk(ip, fmt, args...)				\
-do {									\
-	__trace_printk_check_format(fmt, ##args);			\
-	tracing_record_cmdline(current);				\
-	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
-		  __attribute__((section("__trace_printk_fmt"))) =	\
-			__builtin_constant_p(fmt) ? fmt : NULL;		\
-									\
-		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
-	} else								\
-		__trace_printk(ip, fmt, ##args);			\
-} while (0)
-
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
 	extern struct ftrace_event_call event_##call;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 5c7cbfb65c7..6e220a8e570 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -4,18 +4,6 @@
 #include <linux/trace_seq.h>
 #include "trace.h"
 
-typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
-					      int flags);
-
-struct trace_event {
-	struct hlist_node	node;
-	int			type;
-	trace_print_func	trace;
-	trace_print_func	raw;
-	trace_print_func	hex;
-	trace_print_func	binary;
-};
-
 extern enum print_line_t
 trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
@@ -33,8 +21,6 @@ extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
 extern struct trace_event *ftrace_find_event(int type);
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags);
-- 
cgit v1.2.3-70-g09d2


From f42c85e74faa422cf0bc747ed808681145448f88 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 13 Apr 2009 12:25:37 -0400
Subject: tracing/events: move the ftrace event tracing code to core

This patch moves the ftrace creation into include/trace/ftrace.h and
simplifies the work of developers in adding new tracepoints.
Just the act of creating the trace points in include/trace and including
define_trace.h will create the events in the debugfs/tracing/events
directory.

This patch removes the need of include/trace/trace_events.h

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/define_trace.h        |   4 +
 include/trace/ftrace.h              | 492 ++++++++++++++++++++++++++++++++++++
 include/trace/trace_events.h        |   7 -
 kernel/trace/Makefile               |   1 -
 kernel/trace/events.c               |  15 --
 kernel/trace/trace_events_stage_1.h |  39 ---
 kernel/trace/trace_events_stage_2.h | 170 -------------
 kernel/trace/trace_events_stage_3.h | 279 --------------------
 8 files changed, 496 insertions(+), 511 deletions(-)
 create mode 100644 include/trace/ftrace.h
 delete mode 100644 include/trace/trace_events.h
 delete mode 100644 kernel/trace/events.c
 delete mode 100644 kernel/trace/trace_events_stage_1.h
 delete mode 100644 kernel/trace/trace_events_stage_2.h
 delete mode 100644 kernel/trace/trace_events_stage_3.h

(limited to 'include')

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index de9dc7d8508..980eb66a6e3 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -56,6 +56,10 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#ifdef CONFIG_EVENT_TRACER
+#include <trace/ftrace.h>
+#endif
+
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644
index 00000000000..955b967acd7
--- /dev/null
+++ b/include/trace/ftrace.h
@@ -0,0 +1,492 @@
+/*
+ * Stage 1 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * struct ftrace_raw_<call> {
+ *	struct trace_entry		ent;
+ *	<type>				<item>;
+ *	<type2>				<item2>[<len>];
+ *	[...]
+ * };
+ *
+ * The <type> <item> is created by the __field(type, item) macro or
+ * the __array(type2, item2, len) macro.
+ * We simply do "type item;", and that will create the fields
+ * in the structure.
+ */
+
+#include <linux/ftrace_event.h>
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(call, proto, args, fmt)
+
+#undef __array
+#define __array(type, item, len)	type	item[len];
+
+#undef __field
+#define __field(type, item)		type	item;
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
+	struct ftrace_raw_##name {				\
+		struct trace_entry	ent;			\
+		tstruct						\
+	};							\
+	static struct ftrace_event_call event_##name
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 2 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * enum print_line_t
+ * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
+ * {
+ *	struct trace_seq *s = &iter->seq;
+ *	struct ftrace_raw_<call> *field; <-- defined in stage 1
+ *	struct trace_entry *entry;
+ *	int ret;
+ *
+ *	entry = iter->ent;
+ *
+ *	if (entry->type != event_<call>.id) {
+ *		WARN_ON_ONCE(1);
+ *		return TRACE_TYPE_UNHANDLED;
+ *	}
+ *
+ *	field = (typeof(field))entry;
+ *
+ *	ret = trace_seq_printf(s, <TP_printk> "\n");
+ *	if (!ret)
+ *		return TRACE_TYPE_PARTIAL_LINE;
+ *
+ *	return TRACE_TYPE_HANDLED;
+ * }
+ *
+ * This is the method used to print the raw event to the trace
+ * output format. Note, this is not needed if the data is read
+ * in binary.
+ */
+
+#undef __entry
+#define __entry field
+
+#undef TP_printk
+#define TP_printk(fmt, args...) fmt "\n", args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+enum print_line_t							\
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
+{									\
+	struct trace_seq *s = &iter->seq;				\
+	struct ftrace_raw_##call *field;				\
+	struct trace_entry *entry;					\
+	int ret;							\
+									\
+	entry = iter->ent;						\
+									\
+	if (entry->type != event_##call.id) {				\
+		WARN_ON_ONCE(1);					\
+		return TRACE_TYPE_UNHANDLED;				\
+	}								\
+									\
+	field = (typeof(field))entry;					\
+									\
+	ret = trace_seq_printf(s, #call ": " print);			\
+	if (!ret)							\
+		return TRACE_TYPE_PARTIAL_LINE;				\
+									\
+	return TRACE_TYPE_HANDLED;					\
+}
+	
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *	struct ftrace_raw_##call field;
+ *	int ret;
+ *
+ *	ret = trace_seq_printf(s, #type " " #item ";"
+ *			       " offset:%u; size:%u;\n",
+ *			       offsetof(struct ftrace_raw_##call, item),
+ *			       sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)					\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __array
+#define __array(type, item, len)						\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __entry
+#define __entry REC
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct ftrace_raw_##call field;					\
+	int ret;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef __field
+#define __field(type, item)						\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item));			\
+	if (ret)							\
+		return ret;
+
+#undef __array
+#define __array(type, item, len)					\
+	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
+	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item));			\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+int									\
+ftrace_define_fields_##call(void)					\
+{									\
+	struct ftrace_raw_##call field;					\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	int ret;							\
+									\
+	__common_field(unsigned char, type);				\
+	__common_field(unsigned char, flags);				\
+	__common_field(unsigned char, preempt_count);			\
+	__common_field(int, pid);					\
+	__common_field(int, tgid);					\
+									\
+	tstruct;							\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * static void ftrace_event_<call>(proto)
+ * {
+ *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
+ * }
+ *
+ * static int ftrace_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to  <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_event_<call>);
+ * }
+ *
+ * For those macros defined with TRACE_FORMAT:
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *	.name			= "<call>",
+ *	.regfunc		= ftrace_reg_event_<call>,
+ *	.unregfunc		= ftrace_unreg_event_<call>,
+ * }
+ *
+ *
+ * For those macros defined with TRACE_EVENT:
+ *
+ * static struct ftrace_event_call event_<call>;
+ *
+ * static void ftrace_raw_event_<call>(proto)
+ * {
+ *	struct ring_buffer_event *event;
+ *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
+ *	unsigned long irq_flags;
+ *	int pc;
+ *
+ *	local_save_flags(irq_flags);
+ *	pc = preempt_count();
+ *
+ *	event = trace_current_buffer_lock_reserve(event_<call>.id,
+ *				  sizeof(struct ftrace_raw_<call>),
+ *				  irq_flags, pc);
+ *	if (!event)
+ *		return;
+ *	entry	= ring_buffer_event_data(event);
+ *
+ *	<assign>;  <-- Here we assign the entries by the __field and
+ *			__array macros.
+ *
+ *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
+ * }
+ *
+ * static int ftrace_raw_reg_event_<call>(void)
+ * {
+ *	int ret;
+ *
+ *	ret = register_trace_<call>(ftrace_raw_event_<call>);
+ *	if (!ret)
+ *		pr_info("event trace: Could not activate trace point "
+ *			"probe to <call>");
+ *	return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *	unregister_trace_<call>(ftrace_raw_event_<call>);
+ * }
+ *
+ * static struct trace_event ftrace_event_type_<call> = {
+ *	.trace			= ftrace_raw_output_<call>, <-- stage 2
+ * };
+ *
+ * static int ftrace_raw_init_event_<call>(void)
+ * {
+ *	int id;
+ *
+ *	id = register_ftrace_event(&ftrace_event_type_<call>);
+ *	if (!id)
+ *		return -ENODEV;
+ *	event_<call>.id = id;
+ *	return 0;
+ * }
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *	.name			= "<call>",
+ *	.system			= "<system>",
+ *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.regfunc		= ftrace_reg_event_<call>,
+ *	.unregfunc		= ftrace_unreg_event_<call>,
+ *	.show_format		= ftrace_format_<call>,
+ * }
+ *
+ */
+
+#undef TP_FMT
+#define TP_FMT(fmt, args...)	fmt "\n", ##args
+
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args)				\
+static void ftrace_profile_##call(proto)				\
+{									\
+	extern void perf_tpcounter_event(int);				\
+	perf_tpcounter_event(event_##call.id);				\
+}									\
+									\
+static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
+{									\
+	int ret = 0;							\
+									\
+	if (!atomic_inc_return(&call->profile_count))			\
+		ret = register_trace_##call(ftrace_profile_##call);	\
+									\
+	return ret;							\
+}									\
+									\
+static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
+{									\
+	if (atomic_add_negative(-1, &call->profile_count))		\
+		unregister_trace_##call(ftrace_profile_##call);		\
+}
+
+#define _TRACE_PROFILE_INIT(call)					\
+	.profile_count = ATOMIC_INIT(-1),				\
+	.profile_enable = ftrace_profile_enable_##call,			\
+	.profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
+#define _TRACE_FORMAT(call, proto, args, fmt)				\
+static void ftrace_event_##call(proto)					\
+{									\
+	event_trace_printk(_RET_IP_, #call ": " fmt);			\
+}									\
+									\
+static int ftrace_reg_event_##call(void)				\
+{									\
+	int ret;							\
+									\
+	ret = register_trace_##call(ftrace_event_##call);		\
+	if (ret)							\
+		pr_info("event trace: Could not activate trace point "	\
+			"probe to " #call "\n");			\
+	return ret;							\
+}									\
+									\
+static void ftrace_unreg_event_##call(void)				\
+{									\
+	unregister_trace_##call(ftrace_event_##call);			\
+}									\
+									\
+static struct ftrace_event_call event_##call;				\
+									\
+static int ftrace_init_event_##call(void)				\
+{									\
+	int id;								\
+									\
+	id = register_ftrace_event(NULL);				\
+	if (!id)							\
+		return -ENODEV;						\
+	event_##call.id = id;						\
+	return 0;							\
+}
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(call, proto, args, fmt)				\
+_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_init_event_##call,		\
+	.regfunc		= ftrace_reg_event_##call,		\
+	.unregfunc		= ftrace_unreg_event_##call,		\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#undef __entry
+#define __entry entry
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
+									\
+static struct ftrace_event_call event_##call;				\
+									\
+static void ftrace_raw_event_##call(proto)				\
+{									\
+	struct ftrace_event_call *call = &event_##call;			\
+	struct ring_buffer_event *event;				\
+	struct ftrace_raw_##call *entry;				\
+	unsigned long irq_flags;					\
+	int pc;								\
+									\
+	local_save_flags(irq_flags);					\
+	pc = preempt_count();						\
+									\
+	event = trace_current_buffer_lock_reserve(event_##call.id,	\
+				  sizeof(struct ftrace_raw_##call),	\
+				  irq_flags, pc);			\
+	if (!event)							\
+		return;							\
+	entry	= ring_buffer_event_data(event);			\
+									\
+	assign;								\
+									\
+	if (!filter_current_check_discard(call, entry, event))		\
+		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+}									\
+									\
+static int ftrace_raw_reg_event_##call(void)				\
+{									\
+	int ret;							\
+									\
+	ret = register_trace_##call(ftrace_raw_event_##call);		\
+	if (ret)							\
+		pr_info("event trace: Could not activate trace point "	\
+			"probe to " #call "\n");			\
+	return ret;							\
+}									\
+									\
+static void ftrace_raw_unreg_event_##call(void)				\
+{									\
+	unregister_trace_##call(ftrace_raw_event_##call);		\
+}									\
+									\
+static struct trace_event ftrace_event_type_##call = {			\
+	.trace			= ftrace_raw_output_##call,		\
+};									\
+									\
+static int ftrace_raw_init_event_##call(void)				\
+{									\
+	int id;								\
+									\
+	id = register_ftrace_event(&ftrace_event_type_##call);		\
+	if (!id)							\
+		return -ENODEV;						\
+	event_##call.id = id;						\
+	INIT_LIST_HEAD(&event_##call.fields);				\
+	init_preds(&event_##call);					\
+	return 0;							\
+}									\
+									\
+static struct ftrace_event_call __used					\
+__attribute__((__aligned__(4)))						\
+__attribute__((section("_ftrace_events"))) event_##call = {		\
+	.name			= #call,				\
+	.system			= __stringify(TRACE_SYSTEM),		\
+	.raw_init		= ftrace_raw_init_event_##call,		\
+	.regfunc		= ftrace_raw_reg_event_##call,		\
+	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.show_format		= ftrace_format_##call,			\
+	.define_fields		= ftrace_define_fields_##call,		\
+	_TRACE_PROFILE_INIT(call)					\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644
index 13d6b85668c..00000000000
--- a/include/trace/trace_events.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* trace/<type>.h here */
-
-#include <trace/sched.h>
-#include <trace/irq.h>
-#include <trace/lockdep.h>
-#include <trace/skb.h>
-#include <trace/kmem.h>
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 3ad367e7c97..fb9d7f96489 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events.o
-obj-$(CONFIG_EVENT_TRACER) += events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644
index 5a35a914f0e..00000000000
--- a/kernel/trace/events.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * This is the place to register all trace points as events.
- */
-
-#include <linux/stringify.h>
-
-#include <trace/trace_events.h>
-
-#include "trace_output.h"
-
-#define TRACE_HEADER_MULTI_READ
-#include "trace_events_stage_1.h"
-#include "trace_events_stage_2.h"
-#include "trace_events_stage_3.h"
-
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644
index 475f46a047a..00000000000
--- a/kernel/trace/trace_events_stage_1.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Stage 1 of the trace events.
- *
- * Override the macros in <trace/trace_events.h> to include the following:
- *
- * struct ftrace_raw_<call> {
- *	struct trace_entry		ent;
- *	<type>				<item>;
- *	<type2>				<item2>[<len>];
- *	[...]
- * };
- *
- * The <type> <item> is created by the __field(type, item) macro or
- * the __array(type2, item2, len) macro.
- * We simply do "type item;", and that will create the fields
- * in the structure.
- */
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
-#undef __array
-#define __array(type, item, len)	type	item[len];
-
-#undef __field
-#define __field(type, item)		type	item;
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
-	struct ftrace_raw_##name {				\
-		struct trace_entry	ent;			\
-		tstruct						\
-	};							\
-	static struct ftrace_event_call event_##name
-
-#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644
index aa4a67a0656..00000000000
--- a/kernel/trace/trace_events_stage_2.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Stage 2 of the trace events.
- *
- * Override the macros in <trace/trace_events.h> to include the following:
- *
- * enum print_line_t
- * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
- * {
- *	struct trace_seq *s = &iter->seq;
- *	struct ftrace_raw_<call> *field; <-- defined in stage 1
- *	struct trace_entry *entry;
- *	int ret;
- *
- *	entry = iter->ent;
- *
- *	if (entry->type != event_<call>.id) {
- *		WARN_ON_ONCE(1);
- *		return TRACE_TYPE_UNHANDLED;
- *	}
- *
- *	field = (typeof(field))entry;
- *
- *	ret = trace_seq_printf(s, <TP_printk> "\n");
- *	if (!ret)
- *		return TRACE_TYPE_PARTIAL_LINE;
- *
- *	return TRACE_TYPE_HANDLED;
- * }
- *
- * This is the method used to print the raw event to the trace
- * output format. Note, this is not needed if the data is read
- * in binary.
- */
-
-#undef __entry
-#define __entry field
-
-#undef TP_printk
-#define TP_printk(fmt, args...) fmt "\n", args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-enum print_line_t							\
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
-{									\
-	struct trace_seq *s = &iter->seq;				\
-	struct ftrace_raw_##call *field;				\
-	struct trace_entry *entry;					\
-	int ret;							\
-									\
-	entry = iter->ent;						\
-									\
-	if (entry->type != event_##call.id) {				\
-		WARN_ON_ONCE(1);					\
-		return TRACE_TYPE_UNHANDLED;				\
-	}								\
-									\
-	field = (typeof(field))entry;					\
-									\
-	ret = trace_seq_printf(s, #call ": " print);			\
-	if (!ret)							\
-		return TRACE_TYPE_PARTIAL_LINE;				\
-									\
-	return TRACE_TYPE_HANDLED;					\
-}
-	
-#include <trace/trace_events.h>
-
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-static int								\
-ftrace_format_##call(struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field;					\
-	int ret;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_events.h>
-
-#undef __field
-#define __field(type, item)						\
-	ret = trace_define_field(event_call, #type, #item,		\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#undef __array
-#define __array(type, item, len)					\
-	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
-	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
-				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
-	if (ret)							\
-		return ret;
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-int									\
-ftrace_define_fields_##call(void)					\
-{									\
-	struct ftrace_raw_##call field;					\
-	struct ftrace_event_call *event_call = &event_##call;		\
-	int ret;							\
-									\
-	__common_field(unsigned char, type);				\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
-									\
-	tstruct;							\
-									\
-	return ret;							\
-}
-
-#include <trace/trace_events.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644
index 45c04e1f38d..00000000000
--- a/kernel/trace/trace_events_stage_3.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Stage 3 of the trace events.
- *
- * Override the macros in <trace/trace_events.h> to include the following:
- *
- * static void ftrace_event_<call>(proto)
- * {
- *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
- * }
- *
- * static int ftrace_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to  <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_event_<call>);
- * }
- *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- * }
- *
- *
- * For those macros defined with TRACE_EVENT:
- *
- * static struct ftrace_event_call event_<call>;
- *
- * static void ftrace_raw_event_<call>(proto)
- * {
- *	struct ring_buffer_event *event;
- *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
- *	unsigned long irq_flags;
- *	int pc;
- *
- *	local_save_flags(irq_flags);
- *	pc = preempt_count();
- *
- *	event = trace_current_buffer_lock_reserve(event_<call>.id,
- *				  sizeof(struct ftrace_raw_<call>),
- *				  irq_flags, pc);
- *	if (!event)
- *		return;
- *	entry	= ring_buffer_event_data(event);
- *
- *	<assign>;  <-- Here we assign the entries by the __field and
- *			__array macros.
- *
- *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
- * }
- *
- * static int ftrace_raw_reg_event_<call>(void)
- * {
- *	int ret;
- *
- *	ret = register_trace_<call>(ftrace_raw_event_<call>);
- *	if (!ret)
- *		pr_info("event trace: Could not activate trace point "
- *			"probe to <call>");
- *	return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *	unregister_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
- * static struct trace_event ftrace_event_type_<call> = {
- *	.trace			= ftrace_raw_output_<call>, <-- stage 2
- * };
- *
- * static int ftrace_raw_init_event_<call>(void)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- *	.show_format		= ftrace_format_<call>,
- * }
- *
- */
-
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
-#ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)				\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int);				\
-	perf_tpcounter_event(event_##call.id);				\
-}									\
-									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
-{									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&call->profile_count))			\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
-}									\
-									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
-{									\
-	if (atomic_add_negative(-1, &call->profile_count))		\
-		unregister_trace_##call(ftrace_profile_##call);		\
-}
-
-#define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
-	.profile_enable = ftrace_profile_enable_##call,			\
-	.profile_disable = ftrace_profile_disable_##call,
-
-#else
-#define _TRACE_PROFILE(call, proto, args)
-#define _TRACE_PROFILE_INIT(call)
-#endif
-
-#define _TRACE_FORMAT(call, proto, args, fmt)				\
-static void ftrace_event_##call(proto)					\
-{									\
-	event_trace_printk(_RET_IP_, #call ": " fmt);			\
-}									\
-									\
-static int ftrace_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_event_##call);			\
-}									\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static int ftrace_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(NULL);				\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	return 0;							\
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)				\
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_init_event_##call,		\
-	.regfunc		= ftrace_reg_event_##call,		\
-	.unregfunc		= ftrace_unreg_event_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#undef __entry
-#define __entry entry
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static void ftrace_raw_event_##call(proto)				\
-{									\
-	struct ftrace_event_call *call = &event_##call;			\
-	struct ring_buffer_event *event;				\
-	struct ftrace_raw_##call *entry;				\
-	unsigned long irq_flags;					\
-	int pc;								\
-									\
-	local_save_flags(irq_flags);					\
-	pc = preempt_count();						\
-									\
-	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				  sizeof(struct ftrace_raw_##call),	\
-				  irq_flags, pc);			\
-	if (!event)							\
-		return;							\
-	entry	= ring_buffer_event_data(event);			\
-									\
-	assign;								\
-									\
-	if (!filter_current_check_discard(call, entry, event))		\
-		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
-}									\
-									\
-static int ftrace_raw_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_raw_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_raw_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_raw_event_##call);		\
-}									\
-									\
-static struct trace_event ftrace_event_type_##call = {			\
-	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	init_preds(&event_##call);					\
-	return 0;							\
-}									\
-									\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
-	.regfunc		= ftrace_raw_reg_event_##call,		\
-	.unregfunc		= ftrace_raw_unreg_event_##call,	\
-	.show_format		= ftrace_format_##call,			\
-	.define_fields		= ftrace_define_fields_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
-#include <trace/trace_events.h>
-
-#undef _TRACE_PROFILE
-#undef _TRACE_PROFILE_INIT
-
-- 
cgit v1.2.3-70-g09d2


From a59fd6027218bd7c994e39d14afe0242f895144f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 13:52:20 -0400
Subject: tracing/events: convert event call sites to use a link list

Impact: makes it possible to define events in modules

The events are created by reading down the section that they are linked
in by the macros. But this is not scalable to modules. This patch converts
the manipulations to use a global link list, and on boot up it adds
the items in the section to the list.

This change will allow modules to add their tracing events to the list as
well.

Note, this change alone does not permit modules to use the TRACE_EVENT macros,
but the change is needed for them to eventually do so.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       |  1 +
 kernel/trace/trace.h               | 13 +---------
 kernel/trace/trace_event_profile.c |  4 +--
 kernel/trace/trace_events.c        | 51 +++++++++++++++++++++++---------------
 kernel/trace/trace_events_filter.c |  8 +++---
 5 files changed, 39 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 496b76d9f9d..17810853b4f 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -83,6 +83,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 struct ftrace_event_call {
+	struct list_head	list;
 	char			*name;
 	char			*system;
 	struct dentry		*dir;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6bcdf4af9b2..8817c18ef97 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -739,11 +739,6 @@ struct event_subsystem {
 	struct filter_pred	**preds;
 };
 
-#define events_for_each(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
-
 struct filter_pred;
 
 typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
@@ -785,13 +780,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
-extern struct ftrace_event_call __start_ftrace_events[];
-extern struct ftrace_event_call __stop_ftrace_events[];
-
-#define for_each_event(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
+extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 199de9c7422..7bf2ad65eee 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -11,7 +11,7 @@ int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
 
-	for_each_event(event) {
+	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id)
 			return event->profile_enable(event);
 	}
@@ -23,7 +23,7 @@ void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
 
-	for_each_event(event) {
+	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id)
 			return event->profile_disable(event);
 	}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ead68ac9919..5c66aaff07c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -19,6 +19,8 @@
 
 static DEFINE_MUTEX(event_mutex);
 
+LIST_HEAD(ftrace_events);
+
 int trace_define_field(struct ftrace_event_call *call, char *type,
 		       char *name, int offset, int size)
 {
@@ -54,16 +56,14 @@ err:
 
 static void ftrace_clear_events(void)
 {
-	struct ftrace_event_call *call = (void *)__start_ftrace_events;
-
+	struct ftrace_event_call *call;
 
-	while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
+	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (call->enabled) {
 			call->enabled = 0;
 			call->unregfunc();
 		}
-		call++;
 	}
 }
 
@@ -89,7 +89,7 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 
 static int ftrace_set_clr_event(char *buf, int set)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	char *event = NULL, *sub = NULL, *match;
 	int ret = -EINVAL;
 
@@ -118,7 +118,7 @@ static int ftrace_set_clr_event(char *buf, int set)
 	}
 
 	mutex_lock(&event_mutex);
-	for_each_event(call) {
+	list_for_each_entry(call, &ftrace_events, list) {
 
 		if (!call->name || !call->regfunc)
 			continue;
@@ -224,15 +224,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next = call;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
 	for (;;) {
-		if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+		if (list == &ftrace_events)
 			return NULL;
 
+		call = list_entry(list, struct ftrace_event_call, list);
+
 		/*
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
@@ -240,11 +242,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		if (call->regfunc)
 			break;
 
-		call++;
-		next = call;
+		list = list->next;
 	}
 
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
@@ -257,22 +258,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_call *next;
+	struct list_head *list = m->private;
+	struct ftrace_event_call *call;
 
 	(*pos)++;
 
  retry:
-	if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+	if (list == &ftrace_events)
 		return NULL;
 
+	call = list_entry(list, struct ftrace_event_call, list);
+
 	if (!call->enabled) {
-		call++;
+		list = list->next;
 		goto retry;
 	}
 
-	next = call;
-	m->private = ++next;
+	m->private = list->next;
 
 	return call;
 }
@@ -312,7 +314,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
 	if (!ret) {
 		struct seq_file *m = file->private_data;
 
-		m->private = __start_ftrace_events;
+		m->private = ftrace_events.next;
 	}
 	return ret;
 }
@@ -797,9 +799,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 	return 0;
 }
 
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
+
+#define for_each_event(event)						\
+	for (event = __start_ftrace_events;				\
+	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+	     event++)
+
 static __init int event_trace_init(void)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	struct dentry *d_tracer;
 	struct dentry *entry;
 	struct dentry *d_events;
@@ -830,6 +840,7 @@ static __init int event_trace_init(void)
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
+		list_add(&call->list, &ftrace_events);
 		event_create_dir(call, d_events);
 	}
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index de42dad42a8..d30b06b02b4 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -223,7 +223,7 @@ oom:
 
 void filter_free_subsystem_preds(struct event_subsystem *system)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 	int i;
 
 	if (system->n_preds) {
@@ -234,7 +234,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system)
 		system->n_preds = 0;
 	}
 
-	events_for_each(call) {
+	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->define_fields)
 			continue;
 
@@ -320,7 +320,7 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 int filter_add_subsystem_pred(struct event_subsystem *system,
 			      struct filter_pred *pred)
 {
-	struct ftrace_event_call *call = __start_ftrace_events;
+	struct ftrace_event_call *call;
 
 	if (system->n_preds && !pred->compound)
 		filter_free_subsystem_preds(system);
@@ -337,7 +337,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 
 	system->preds[system->n_preds] = pred;
 
-	events_for_each(call) {
+	list_for_each_entry(call, &ftrace_events, list) {
 		int err;
 
 		if (!call->define_fields)
-- 
cgit v1.2.3-70-g09d2


From 6d723736e472f7a0cd5b62c84152fceead241328 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 10 Apr 2009 14:53:50 -0400
Subject: tracing/events: add support for modules to TRACE_EVENT

Impact: allow modules to add TRACE_EVENTS on load

This patch adds the final hooks to allow modules to use the TRACE_EVENT
macro. A notifier and a data structure are used to link the TRACE_EVENTs
defined in the module to connect them with the ftrace event tracing system.

It also adds the necessary automated clean ups to the trace events when a
module is removed.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |   3 +
 include/linux/module.h       |   4 ++
 include/linux/trace_seq.h    |   2 +
 include/trace/ftrace.h       |   1 +
 kernel/module.c              |   7 +++
 kernel/trace/trace_events.c  | 128 ++++++++++++++++++++++++++++++++-----------
 6 files changed, 113 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 17810853b4f..75f3ac01a87 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -7,6 +7,7 @@
 
 struct trace_array;
 struct tracer;
+struct dentry;
 
 /*
  * The trace entry - the most basic unit of tracing. This is what
@@ -87,6 +88,7 @@ struct ftrace_event_call {
 	char			*name;
 	char			*system;
 	struct dentry		*dir;
+	struct trace_event	*event;
 	int			enabled;
 	int			(*regfunc)(void);
 	void			(*unregfunc)(void);
@@ -97,6 +99,7 @@ struct ftrace_event_call {
 	struct list_head	fields;
 	int			n_preds;
 	struct filter_pred	**preds;
+	void			*mod;
 
 #ifdef CONFIG_EVENT_PROFILE
 	atomic_t	profile_count;
diff --git a/include/linux/module.h b/include/linux/module.h
index 627ac082e2a..6155fa44168 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -337,6 +337,10 @@ struct module
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
 #endif
+#ifdef CONFIG_EVENT_TRACING
+	struct ftrace_event_call *trace_events;
+	unsigned int num_trace_events;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 28051da876d..15ca2c71af1 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_TRACE_SEQ_H
 #define _LINUX_TRACE_SEQ_H
 
+#include <linux/fs.h>
+
 /*
  * Trace sequences are used to allow a function to call several other functions
  * to create a string of data to use (up to a max of PAGE_SIZE.
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 955b967acd7..60c5323bee6 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -477,6 +477,7 @@ __attribute__((__aligned__(4)))						\
 __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
+	.event			= &ftrace_event_type_##call,		\
 	.raw_init		= ftrace_raw_init_event_##call,		\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
diff --git a/kernel/module.c b/kernel/module.c
index e797812a4d9..a0394706f10 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,6 +18,7 @@
 */
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
@@ -2172,6 +2173,12 @@ static noinline struct module *load_module(void __user *umod,
 					sizeof(*mod->tracepoints),
 					&mod->num_tracepoints);
 #endif
+#ifdef CONFIG_EVENT_TRACING
+	mod->trace_events = section_objs(hdr, sechdrs, secstrings,
+					 "_ftrace_events",
+					 sizeof(*mod->trace_events),
+					 &mod->num_trace_events);
+#endif
 
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8b9e621b80b..a4b177720a6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -713,7 +713,13 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 		return d_events;
 	}
 
-	system->name = name;
+	system->name = kstrdup(name, GFP_KERNEL);
+	if (!system->name) {
+		debugfs_remove(system->entry);
+		kfree(system);
+		return d_events;
+	}
+
 	list_add(&system->list, &event_subsystems);
 
 	system->preds = NULL;
@@ -738,7 +744,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 	 * If the trace point header did not define TRACE_SYSTEM
 	 * then the system would be called "TRACE_SYSTEM".
 	 */
-	if (strcmp(call->system, "TRACE_SYSTEM") != 0)
+	if (strcmp(call->system, TRACE_SYSTEM) != 0)
 		d_events = event_subsystem_dir(call->system, d_events);
 
 	if (call->raw_init) {
@@ -757,21 +763,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 		return -1;
 	}
 
-	if (call->regfunc) {
-		entry = debugfs_create_file("enable", 0644, call->dir, call,
-					    &ftrace_enable_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/enable' entry\n", call->name);
-	}
+	if (call->regfunc)
+		entry = trace_create_file("enable", 0644, call->dir, call,
+					  &ftrace_enable_fops);
 
-	if (call->id) {
-		entry = debugfs_create_file("id", 0444, call->dir, call,
-				&ftrace_event_id_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs '%s/id' entry\n",
-					call->name);
-	}
+	if (call->id)
+		entry = trace_create_file("id", 0444, call->dir, call,
+					  &ftrace_event_id_fops);
 
 	if (call->define_fields) {
 		ret = call->define_fields();
@@ -780,40 +778,102 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
 				   " events/%s\n", call->name);
 			return ret;
 		}
-		entry = debugfs_create_file("filter", 0644, call->dir, call,
-					    &ftrace_event_filter_fops);
-		if (!entry)
-			pr_warning("Could not create debugfs "
-				   "'%s/filter' entry\n", call->name);
+		entry = trace_create_file("filter", 0644, call->dir, call,
+					  &ftrace_event_filter_fops);
 	}
 
 	/* A trace may not want to export its format */
 	if (!call->show_format)
 		return 0;
 
-	entry = debugfs_create_file("format", 0444, call->dir, call,
-				    &ftrace_event_format_fops);
-	if (!entry)
-		pr_warning("Could not create debugfs "
-			   "'%s/format' entry\n", call->name);
+	entry = trace_create_file("format", 0444, call->dir, call,
+				  &ftrace_event_format_fops);
+
+	return 0;
+}
+
+#define for_each_event(event, start, end)			\
+	for (event = start;					\
+	     (unsigned long)event < (unsigned long)end;		\
+	     event++)
+
+static void trace_module_add_events(struct module *mod)
+{
+	struct ftrace_event_call *call, *start, *end;
+	struct dentry *d_events;
+
+	start = mod->trace_events;
+	end = mod->trace_events + mod->num_trace_events;
+
+	if (start == end)
+		return;
+
+	d_events = event_trace_events_dir();
+	if (!d_events)
+		return;
+
+	for_each_event(call, start, end) {
+		/* The linker may leave blanks */
+		if (!call->name)
+			continue;
+		call->mod = mod;
+		list_add(&call->list, &ftrace_events);
+		event_create_dir(call, d_events);
+	}
+}
+
+static void trace_module_remove_events(struct module *mod)
+{
+	struct ftrace_event_call *call, *p;
+
+	list_for_each_entry_safe(call, p, &ftrace_events, list) {
+		if (call->mod == mod) {
+			if (call->enabled) {
+				call->enabled = 0;
+				call->unregfunc();
+			}
+			if (call->event)
+				unregister_ftrace_event(call->event);
+			debugfs_remove_recursive(call->dir);
+			list_del(&call->list);
+		}
+	}
+}
+
+int trace_module_notify(struct notifier_block *self,
+			unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	mutex_lock(&event_mutex);
+	switch (val) {
+	case MODULE_STATE_COMING:
+		trace_module_add_events(mod);
+		break;
+	case MODULE_STATE_GOING:
+		trace_module_remove_events(mod);
+		break;
+	}
+	mutex_unlock(&event_mutex);
 
 	return 0;
 }
 
+struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
+
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
 
-#define for_each_event(event)						\
-	for (event = __start_ftrace_events;				\
-	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-	     event++)
-
 static __init int event_trace_init(void)
 {
 	struct ftrace_event_call *call;
 	struct dentry *d_tracer;
 	struct dentry *entry;
 	struct dentry *d_events;
+	int ret;
 
 	d_tracer = tracing_init_dentry();
 	if (!d_tracer)
@@ -837,7 +897,7 @@ static __init int event_trace_init(void)
 	if (!d_events)
 		return 0;
 
-	for_each_event(call) {
+	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
 			continue;
@@ -845,6 +905,10 @@ static __init int event_trace_init(void)
 		event_create_dir(call, d_events);
 	}
 
+	ret = register_module_notifier(&trace_module_nb);
+	if (!ret)
+		pr_warning("Failed to register trace events module notifier\n");
+
 	return 0;
 }
 fs_initcall(event_trace_init);
-- 
cgit v1.2.3-70-g09d2


From ecda8ae02a08ef065ff387f5cb2a2d4999da2408 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 14 Apr 2009 18:49:38 -0400
Subject: tracing/events: fix lockdep system name

Impact: fix compile error of lockdep event tracer

Ingo Molnar pointed out that the system name for the lockdep tracer was "lock"
which is used to include the event trace file name. It should be "lockdep"

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/lockdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
index 4d301e758de..45e326b5c7f 100644
--- a/include/trace/lockdep.h
+++ b/include/trace/lockdep.h
@@ -5,7 +5,7 @@
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
+#define TRACE_SYSTEM lockdep
 
 #ifdef CONFIG_LOCKDEP
 
-- 
cgit v1.2.3-70-g09d2


From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 14 Apr 2009 19:39:12 -0400
Subject: tracing/events: move trace point headers into include/trace/events

Impact: clean up

Create a sub directory in include/trace called events to keep the
trace point headers in their own separate directory. Only headers that
declare trace points should be defined in this directory.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kmemtrace.h         |   2 +-
 include/trace/define_trace.h      |   2 +-
 include/trace/events/irq.h        |  57 +++++++
 include/trace/events/kmem.h       | 194 ++++++++++++++++++++++
 include/trace/events/lockdep.h    |  60 +++++++
 include/trace/events/sched.h      | 339 ++++++++++++++++++++++++++++++++++++++
 include/trace/events/skb.h        |  40 +++++
 include/trace/irq.h               |  57 -------
 include/trace/kmem.h              | 194 ----------------------
 include/trace/lockdep.h           |  60 -------
 include/trace/sched.h             | 339 --------------------------------------
 include/trace/skb.h               |  40 -----
 kernel/exit.c                     |   2 +-
 kernel/fork.c                     |   3 +-
 kernel/irq/handle.c               |   2 +-
 kernel/kthread.c                  |   2 +-
 kernel/lockdep.c                  |   2 +-
 kernel/sched.c                    |   2 +-
 kernel/signal.c                   |   2 +-
 kernel/softirq.c                  |   2 +-
 kernel/trace/ftrace.c             |   2 +-
 kernel/trace/trace_sched_switch.c |   2 +-
 kernel/trace/trace_sched_wakeup.c |   2 +-
 mm/util.c                         |   2 +-
 net/core/drop_monitor.c           |   2 +-
 net/core/net-traces.c             |   2 +-
 net/core/skbuff.c                 |   2 +-
 27 files changed, 708 insertions(+), 707 deletions(-)
 create mode 100644 include/trace/events/irq.h
 create mode 100644 include/trace/events/kmem.h
 create mode 100644 include/trace/events/lockdep.h
 create mode 100644 include/trace/events/sched.h
 create mode 100644 include/trace/events/skb.h
 delete mode 100644 include/trace/irq.h
 delete mode 100644 include/trace/kmem.h
 delete mode 100644 include/trace/lockdep.h
 delete mode 100644 include/trace/sched.h
 delete mode 100644 include/trace/skb.h

(limited to 'include')

diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
index 15c45a27a92..b616d3930c3 100644
--- a/include/linux/kmemtrace.h
+++ b/include/linux/kmemtrace.h
@@ -9,7 +9,7 @@
 
 #ifdef __KERNEL__
 
-#include <trace/kmem.h>
+#include <trace/events/kmem.h>
 
 #ifdef CONFIG_KMEMTRACE
 extern void kmemtrace_init(void);
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 980eb66a6e3..18869417109 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -43,7 +43,7 @@
 #endif
 
 #ifndef TRACE_INCLUDE_PATH
-# define __TRACE_INCLUDE(system) <trace/system.h>
+# define __TRACE_INCLUDE(system) <trace/events/system.h>
 # define UNDEF_TRACE_INCLUDE_FILE
 #else
 # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644
index 00000000000..75e3468e449
--- /dev/null
+++ b/include/trace/events/irq.h
@@ -0,0 +1,57 @@
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+/*
+ * Tracepoint for entry of interrupt handler:
+ */
+TRACE_FORMAT(irq_handler_entry,
+	TP_PROTO(int irq, struct irqaction *action),
+	TP_ARGS(irq, action),
+	TP_FMT("irq=%d handler=%s", irq, action->name)
+	);
+
+/*
+ * Tracepoint for return of an interrupt handler:
+ */
+TRACE_EVENT(irq_handler_exit,
+
+	TP_PROTO(int irq, struct irqaction *action, int ret),
+
+	TP_ARGS(irq, action, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	irq	)
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->irq	= irq;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("irq=%d return=%s",
+		  __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+TRACE_FORMAT(softirq_entry,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
+
+TRACE_FORMAT(softirq_exit,
+	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+	TP_ARGS(h, vec),
+	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
+	);
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644
index 00000000000..c22c42f980b
--- /dev/null
+++ b/include/trace/events/kmem.h
@@ -0,0 +1,194 @@
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KMEM_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+TRACE_EVENT(kmalloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags)
+);
+
+TRACE_EVENT(kmalloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 size_t bytes_req,
+		 size_t bytes_alloc,
+		 gfp_t gfp_flags,
+		 int node),
+
+	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		node		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= bytes_req;
+		__entry->bytes_alloc	= bytes_alloc;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->node		= node;
+	),
+
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+		__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		__entry->gfp_flags,
+		__entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+	TP_PROTO(unsigned long call_site, const void *ptr),
+
+	TP_ARGS(call_site, ptr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+	),
+
+	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+#endif /* _TRACE_KMEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644
index 00000000000..45e326b5c7f
--- /dev/null
+++ b/include/trace/events/lockdep.h
@@ -0,0 +1,60 @@
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKDEP_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockdep
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_FORMAT(lock_acquire,
+	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+		int trylock, int read, int check,
+		struct lockdep_map *next_lock, unsigned long ip),
+	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+	TP_FMT("%s%s%s", trylock ? "try " : "",
+		read ? "read " : "", lock->name)
+	);
+
+TRACE_FORMAT(lock_release,
+	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+	TP_ARGS(lock, nested, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_FORMAT(lock_contended,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+	TP_ARGS(lock, ip),
+	TP_FMT("%s", lock->name)
+	);
+
+TRACE_EVENT(lock_acquired,
+	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+	TP_ARGS(lock, ip, waittime),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(unsigned long, wait_usec)
+		__field(unsigned long, wait_nsec_rem)
+	),
+	TP_fast_assign(
+		__entry->name = lock->name;
+		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+		__entry->wait_usec = (unsigned long) waittime;
+	),
+	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+				       __entry->wait_nsec_rem)
+);
+
+#endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
new file mode 100644
index 00000000000..ffa1cab586b
--- /dev/null
+++ b/include/trace/events/sched.h
@@ -0,0 +1,339 @@
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+		__entry->pid	= t->pid;
+	),
+
+	TP_printk("task %s:%d", __entry->comm, __entry->pid)
+);
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+	TP_PROTO(int ret),
+
+	TP_ARGS(ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->ret	= ret;
+	),
+
+	TP_printk("ret %d", __entry->ret)
+);
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wait_task,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p),
+
+	TP_ARGS(rq, p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->prio	= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for waking up a task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for waking up a new task:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_wakeup_new,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
+
+	TP_ARGS(rq, p, success),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	success			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->success	= success;
+	),
+
+	TP_printk("task %s:%d [%d] success=%d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->success)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ *
+ * (NOTE: the 'rq' argument is not used by generic trace events,
+ *        but used by the latency tracer plugin. )
+ */
+TRACE_EVENT(sched_switch,
+
+	TP_PROTO(struct rq *rq, struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(rq, prev, next),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	int,	prev_prio			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	int,	next_prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_prio	= prev->prio;
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		__entry->next_prio	= next->prio;
+	),
+
+	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->next_comm, __entry->next_pid, __entry->next_prio)
+);
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+
+	TP_ARGS(p, orig_cpu, dest_cpu),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+		__field(	int,	orig_cpu		)
+		__field(	int,	dest_cpu		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+		__entry->orig_cpu	= orig_cpu;
+		__entry->dest_cpu	= dest_cpu;
+	),
+
+	TP_printk("task %s:%d [%d] from: %d  to: %d",
+		  __entry->comm, __entry->pid, __entry->prio,
+		  __entry->orig_cpu, __entry->dest_cpu)
+);
+
+/*
+ * Tracepoint for freeing a task:
+ */
+TRACE_EVENT(sched_process_free,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a task exiting:
+ */
+TRACE_EVENT(sched_process_exit,
+
+	TP_PROTO(struct task_struct *p),
+
+	TP_ARGS(p),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid		= p->pid;
+		__entry->prio		= p->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+	TP_PROTO(struct pid *pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+		__field(	int,	prio			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->pid		= pid_nr(pid);
+		__entry->prio		= current->prio;
+	),
+
+	TP_printk("task %s:%d [%d]",
+		  __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+	TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+	TP_ARGS(parent, child),
+
+	TP_STRUCT__entry(
+		__array(	char,	parent_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	parent_pid			)
+		__array(	char,	child_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	child_pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+		__entry->parent_pid	= parent->pid;
+		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+		__entry->child_pid	= child->pid;
+	),
+
+	TP_printk("parent %s:%d  child %s:%d",
+		__entry->parent_comm, __entry->parent_pid,
+		__entry->child_comm, __entry->child_pid)
+);
+
+/*
+ * Tracepoint for sending a signal:
+ */
+TRACE_EVENT(sched_signal_send,
+
+	TP_PROTO(int sig, struct task_struct *p),
+
+	TP_ARGS(sig, p),
+
+	TP_STRUCT__entry(
+		__field(	int,	sig			)
+		__array(	char,	comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	pid			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid	= p->pid;
+		__entry->sig	= sig;
+	),
+
+	TP_printk("sig: %d  task %s:%d",
+		  __entry->sig, __entry->comm, __entry->pid)
+);
+
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644
index 00000000000..1e8fabb57c0
--- /dev/null
+++ b/include/trace/events/skb.h
@@ -0,0 +1,40 @@
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
+
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+	TP_PROTO(struct sk_buff *skb, void *location),
+
+	TP_ARGS(skb, location),
+
+	TP_STRUCT__entry(
+		__field(	void *,		skbaddr		)
+		__field(	unsigned short,	protocol	)
+		__field(	void *,		location	)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		if (skb) {
+			__entry->protocol = ntohs(skb->protocol);
+		}
+		__entry->location = location;
+	),
+
+	TP_printk("skbaddr=%p protocol=%u location=%p",
+		__entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644
index 75e3468e449..00000000000
--- a/include/trace/irq.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_IRQ_H
-
-#include <linux/tracepoint.h>
-#include <linux/interrupt.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-	TP_PROTO(int irq, struct irqaction *action),
-	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-	TP_PROTO(int irq, struct irqaction *action, int ret),
-
-	TP_ARGS(irq, action, ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	irq	)
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->irq	= irq;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("irq=%d return=%s",
-		  __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-TRACE_FORMAT(softirq_exit,
-	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
-
-#endif /*  _TRACE_IRQ_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/kmem.h b/include/trace/kmem.h
deleted file mode 100644
index c22c42f980b..00000000000
--- a/include/trace/kmem.h
+++ /dev/null
@@ -1,194 +0,0 @@
-#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KMEM_H
-
-#include <linux/types.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kmem
-
-TRACE_EVENT(kmalloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmem_cache_alloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmalloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kmem_cache_alloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kfree,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-
-TRACE_EVENT(kmem_cache_free,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-#endif /* _TRACE_KMEM_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644
index 45e326b5c7f..00000000000
--- a/include/trace/lockdep.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_LOCKDEP_H
-
-#include <linux/lockdep.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lockdep
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-		int trylock, int read, int check,
-		struct lockdep_map *next_lock, unsigned long ip),
-	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
-
-TRACE_FORMAT(lock_release,
-	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
-
-TRACE_EVENT(lock_acquired,
-	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-
-	TP_ARGS(lock, ip, waittime),
-
-	TP_STRUCT__entry(
-		__field(const char *, name)
-		__field(unsigned long, wait_usec)
-		__field(unsigned long, wait_nsec_rem)
-	),
-	TP_fast_assign(
-		__entry->name = lock->name;
-		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
-		__entry->wait_usec = (unsigned long) waittime;
-	),
-	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
-				       __entry->wait_nsec_rem)
-);
-
-#endif
-#endif
-
-#endif /* _TRACE_LOCKDEP_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644
index ffa1cab586b..00000000000
--- a/include/trace/sched.h
+++ /dev/null
@@ -1,339 +0,0 @@
-#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_SCHED_H
-
-#include <linux/sched.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM sched
-
-/*
- * Tracepoint for calling kthread_stop, performed to end a kthread:
- */
-TRACE_EVENT(sched_kthread_stop,
-
-	TP_PROTO(struct task_struct *t),
-
-	TP_ARGS(t),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
-		__entry->pid	= t->pid;
-	),
-
-	TP_printk("task %s:%d", __entry->comm, __entry->pid)
-);
-
-/*
- * Tracepoint for the return value of the kthread stopping:
- */
-TRACE_EVENT(sched_kthread_stop_ret,
-
-	TP_PROTO(int ret),
-
-	TP_ARGS(ret),
-
-	TP_STRUCT__entry(
-		__field(	int,	ret	)
-	),
-
-	TP_fast_assign(
-		__entry->ret	= ret;
-	),
-
-	TP_printk("ret %d", __entry->ret)
-);
-
-/*
- * Tracepoint for waiting on task to unschedule:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wait_task,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p),
-
-	TP_ARGS(rq, p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->prio	= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for waking up a task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for waking up a new task:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_wakeup_new,
-
-	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-	TP_ARGS(rq, p, success),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	success			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->success	= success;
-	),
-
-	TP_printk("task %s:%d [%d] success=%d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
-);
-
-/*
- * Tracepoint for task switches, performed by the scheduler:
- *
- * (NOTE: the 'rq' argument is not used by generic trace events,
- *        but used by the latency tracer plugin. )
- */
-TRACE_EVENT(sched_switch,
-
-	TP_PROTO(struct rq *rq, struct task_struct *prev,
-		 struct task_struct *next),
-
-	TP_ARGS(rq, prev, next),
-
-	TP_STRUCT__entry(
-		__array(	char,	prev_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	prev_pid			)
-		__field(	int,	prev_prio			)
-		__array(	char,	next_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	next_pid			)
-		__field(	int,	next_prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
-		__entry->prev_pid	= prev->pid;
-		__entry->prev_prio	= prev->prio;
-		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
-		__entry->next_pid	= next->pid;
-		__entry->next_prio	= next->prio;
-	),
-
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
-		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->next_comm, __entry->next_pid, __entry->next_prio)
-);
-
-/*
- * Tracepoint for a task being migrated:
- */
-TRACE_EVENT(sched_migrate_task,
-
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
-
-	TP_ARGS(p, orig_cpu, dest_cpu),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-		__field(	int,	orig_cpu		)
-		__field(	int,	dest_cpu		)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
-		__entry->dest_cpu	= dest_cpu;
-	),
-
-	TP_printk("task %s:%d [%d] from: %d  to: %d",
-		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->orig_cpu, __entry->dest_cpu)
-);
-
-/*
- * Tracepoint for freeing a task:
- */
-TRACE_EVENT(sched_process_free,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a task exiting:
- */
-TRACE_EVENT(sched_process_exit,
-
-	TP_PROTO(struct task_struct *p),
-
-	TP_ARGS(p),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->prio		= p->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for a waiting task:
- */
-TRACE_EVENT(sched_process_wait,
-
-	TP_PROTO(struct pid *pid),
-
-	TP_ARGS(pid),
-
-	TP_STRUCT__entry(
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-		__field(	int,	prio			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
-		__entry->pid		= pid_nr(pid);
-		__entry->prio		= current->prio;
-	),
-
-	TP_printk("task %s:%d [%d]",
-		  __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
- * Tracepoint for do_fork:
- */
-TRACE_EVENT(sched_process_fork,
-
-	TP_PROTO(struct task_struct *parent, struct task_struct *child),
-
-	TP_ARGS(parent, child),
-
-	TP_STRUCT__entry(
-		__array(	char,	parent_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	parent_pid			)
-		__array(	char,	child_comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	child_pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
-		__entry->parent_pid	= parent->pid;
-		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
-		__entry->child_pid	= child->pid;
-	),
-
-	TP_printk("parent %s:%d  child %s:%d",
-		__entry->parent_comm, __entry->parent_pid,
-		__entry->child_comm, __entry->child_pid)
-);
-
-/*
- * Tracepoint for sending a signal:
- */
-TRACE_EVENT(sched_signal_send,
-
-	TP_PROTO(int sig, struct task_struct *p),
-
-	TP_ARGS(sig, p),
-
-	TP_STRUCT__entry(
-		__field(	int,	sig			)
-		__array(	char,	comm,	TASK_COMM_LEN	)
-		__field(	pid_t,	pid			)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid	= p->pid;
-		__entry->sig	= sig;
-	),
-
-	TP_printk("sig: %d  task %s:%d",
-		  __entry->sig, __entry->comm, __entry->pid)
-);
-
-#endif /* _TRACE_SCHED_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644
index 1e8fabb57c0..00000000000
--- a/include/trace/skb.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_SKB_H
-
-#include <linux/skbuff.h>
-#include <linux/tracepoint.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM skb
-
-/*
- * Tracepoint for free an sk_buff:
- */
-TRACE_EVENT(kfree_skb,
-
-	TP_PROTO(struct sk_buff *skb, void *location),
-
-	TP_ARGS(skb, location),
-
-	TP_STRUCT__entry(
-		__field(	void *,		skbaddr		)
-		__field(	unsigned short,	protocol	)
-		__field(	void *,		location	)
-	),
-
-	TP_fast_assign(
-		__entry->skbaddr = skb;
-		if (skb) {
-			__entry->protocol = ntohs(skb->protocol);
-		}
-		__entry->location = location;
-	),
-
-	TP_printk("skbaddr=%p protocol=%u location=%p",
-		__entry->skbaddr, __entry->protocol, __entry->location)
-);
-
-#endif /* _TRACE_SKB_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 2fe9d2c7eee..cab535c427b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 4bebf263923..085f73ebcea 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,6 @@
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
-#include <trace/sched.h>
 #include <linux/magic.h>
 
 #include <asm/pgtable.h>
@@ -71,6 +70,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/sched.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 983d8be8dff..37c63633e78 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -20,7 +20,7 @@
 #include <linux/bootmem.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/irq.h>
+#include <trace/events/irq.h>
 
 #include "internals.h"
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index e1c76924545..41c88fe4050 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,7 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 257f21a76c5..47b201ecc6d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -48,7 +48,7 @@
 #include "lockdep_internals.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/lockdep.h>
+#include <trace/events/lockdep.h>
 
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
diff --git a/kernel/sched.c b/kernel/sched.c
index e6d4518d47e..9f7ffd00b6e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -79,7 +79,7 @@
 #include "sched_cpupri.h"
 
 #define CREATE_TRACE_POINTS
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
diff --git a/kernel/signal.c b/kernel/signal.c
index 1d5703ff003..94ec0a4dde0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a2d9b458ac2..7ab9dfd8d08 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,7 +24,7 @@
 #include <linux/ftrace.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
-#include <trace/irq.h>
+#include <trace/events/irq.h>
 
 #include <asm/irq.h>
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8e6a0b5c994..a2348898858 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,7 +29,7 @@
 #include <linux/list.h>
 #include <linux/hash.h>
 
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
 
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9d8cccdfaa0..a98106dd979 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -10,7 +10,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5bc00e8f153..b8b13c5540f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
diff --git a/mm/util.c b/mm/util.c
index 0e74a22791c..6794a336e9a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,7 +7,7 @@
 #include <asm/uaccess.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/kmem.h>
+#include <trace/events/kmem.h>
 
 /**
  * kstrdup - allocate space for and copy an existing string
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3cca9..b75b6cea49d 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -23,7 +23,7 @@
 #include <linux/bitops.h>
 #include <net/genetlink.h>
 
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include <asm/unaligned.h>
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 80177205947..499a67eaf3a 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -24,6 +24,6 @@
 #include <asm/bitops.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ce6356cd9f7..12806b84445 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,7 +65,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include "kmap_skb.h"
 
-- 
cgit v1.2.3-70-g09d2


From 05725f7eb4b8acb147c5fc7b91397b1f6bcab00d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 14 Apr 2009 20:17:16 +0200
Subject: rculist: use list_entry_rcu in places where it's appropriate

Use previously introduced list_entry_rcu instead of an open-coded
list_entry + rcu_dereference combination.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
LKML-Reference: <20090414181715.GA3634@psychotron.englab.brq.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h           | 8 +++++---
 ipc/sem.c                       | 4 ++--
 security/integrity/ima/ima_fs.c | 4 ++--
 security/smack/smackfs.c        | 8 ++++----
 4 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049..886df41e745 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -77,6 +77,7 @@ struct sched_param {
 #include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
 #include <linux/time.h>
@@ -2010,7 +2011,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2049,8 +2051,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-	return list_entry(rcu_dereference(p->thread_group.next),
-			  struct task_struct, thread_group);
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
 }
 
 static inline int thread_group_empty(struct task_struct *p)
diff --git a/ipc/sem.c b/ipc/sem.c
index 16a2189e96f..87c2b641fd7 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk)
 		int i;
 
 		rcu_read_lock();
-		un = list_entry(rcu_dereference(ulp->list_proc.next),
-					struct sem_undo, list_proc);
+		un = list_entry_rcu(ulp->list_proc.next,
+				    struct sem_undo, list_proc);
 		if (&un->list_proc == &ulp->list_proc)
 			semid = -1;
 		 else
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ffbe259700b..510186f0b72 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
 	 * against concurrent list-extension
 	 */
 	rcu_read_lock();
-	qe = list_entry(rcu_dereference(qe->later.next),
-			struct ima_queue_entry, later);
+	qe = list_entry_rcu(qe->later.next,
+			    struct ima_queue_entry, later);
 	rcu_read_unlock();
 	(*pos)++;
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index e03a7e19c73..11d2cb19d7a 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
 		return;
 	}
 
-	m = list_entry(rcu_dereference(smk_netlbladdr_list.next),
-			 struct smk_netlbladdr, list);
+	m = list_entry_rcu(smk_netlbladdr_list.next,
+			   struct smk_netlbladdr, list);
 
 	/* the comparison '>' is a bit hacky, but works */
 	if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
@@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
 			list_add_rcu(&new->list, &m->list);
 			return;
 		}
-		m_next = list_entry(rcu_dereference(m->list.next),
-				 struct smk_netlbladdr, list);
+		m_next = list_entry_rcu(m->list.next,
+					struct smk_netlbladdr, list);
 		if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
 			list_add_rcu(&new->list, &m->list);
 			return;
-- 
cgit v1.2.3-70-g09d2


From e6a1a89d572c31b62d6dcf11a371c7323852d9b2 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 15 Apr 2009 18:22:41 +0900
Subject: dma-debug: add dma_debug_resize_entries() to adjust the number of
 dma_debug_entries

We use a static value for the number of dma_debug_entries. It can be
overwritten by a kernel command line option.

Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel
command line option because they can't know such value until they
finish initializing up their hardware.

This patch adds dma_debug_resize_entries() enables IOMMUs to adjust
the number of dma_debug_entries anytime.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: fujita.tomonori@lab.ntt.co.jp
Cc: akpm@linux-foundation.org
LKML-Reference: <20090415182234R.fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/dma-debug.h |  7 +++++
 lib/dma-debug.c           | 72 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 28d53cb7b5a..171ad8aedc8 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
 
 extern void dma_debug_init(u32 num_entries);
 
+extern int dma_debug_resize_entries(u32 num_entries);
+
 extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       size_t offset, size_t size,
 			       int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
 {
 }
 
+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+	return 0;
+}
+
 static inline void debug_dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction, dma_addr_t dma_addr,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d3da7edc034..5d61019330c 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -85,6 +85,7 @@ static u32 show_num_errors = 1;
 
 static u32 num_free_entries;
 static u32 min_free_entries;
+static u32 nr_total_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
 static u32 req_entries;
@@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
 	put_hash_bucket(bucket, &flags);
 }
 
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+	struct dma_debug_entry *entry;
+
+	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+	list_del(&entry->list);
+	memset(entry, 0, sizeof(*entry));
+
+	num_free_entries -= 1;
+	if (num_free_entries < min_free_entries)
+		min_free_entries = num_free_entries;
+
+	return entry;
+}
+
 /* struct dma_entry allocator
  *
  * The next two functions implement the allocator for
@@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 		goto out;
 	}
 
-	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-	list_del(&entry->list);
-	memset(entry, 0, sizeof(*entry));
+	entry = __dma_entry_alloc();
 
 #ifdef CONFIG_STACKTRACE
 	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 	entry->stacktrace.skip = 2;
 	save_stack_trace(&entry->stacktrace);
 #endif
-	num_free_entries -= 1;
-	if (num_free_entries < min_free_entries)
-		min_free_entries = num_free_entries;
 
 out:
 	spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +321,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
 	spin_unlock_irqrestore(&free_entries_lock, flags);
 }
 
+int dma_debug_resize_entries(u32 num_entries)
+{
+	int i, delta, ret = 0;
+	unsigned long flags;
+	struct dma_debug_entry *entry;
+	LIST_HEAD(tmp);
+
+	spin_lock_irqsave(&free_entries_lock, flags);
+
+	if (nr_total_entries < num_entries) {
+		delta = num_entries - nr_total_entries;
+
+		spin_unlock_irqrestore(&free_entries_lock, flags);
+
+		for (i = 0; i < delta; i++) {
+			entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+			if (!entry)
+				break;
+
+			list_add_tail(&entry->list, &tmp);
+		}
+
+		spin_lock_irqsave(&free_entries_lock, flags);
+
+		list_splice(&tmp, &free_entries);
+		nr_total_entries += i;
+		num_free_entries += i;
+	} else {
+		delta = nr_total_entries - num_entries;
+
+		for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+			entry = __dma_entry_alloc();
+			kfree(entry);
+		}
+
+		nr_total_entries -= i;
+	}
+
+	if (nr_total_entries != num_entries)
+		ret = 1;
+
+	spin_unlock_irqrestore(&free_entries_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
 /*
  * DMA-API debugging init code
  *
@@ -490,6 +548,8 @@ void dma_debug_init(u32 num_entries)
 		return;
 	}
 
+	nr_total_entries = num_free_entries;
+
 	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
 }
 
-- 
cgit v1.2.3-70-g09d2


From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001
From: Shawn Du <duyuyang@gmail.com>
Date: Tue, 14 Apr 2009 13:58:56 +0800
Subject: blktrace: support per-partition tracing

Though one can specify '-d /dev/sda1' when using blktrace, it still
traces the whole sda.

To support per-partition tracing, when we start tracing, we initialize
bt->start_lba and bt->end_lba to the start and end sector of that
partition.

Note some actions are per device, thus we don't filter 0-sector events.

The original patch and discussion can be found here:
	http://marc.info/?l=linux-btrace&m=122949374214540&w=2

Signed-off-by: Shawn Du <duyuyang@gmail.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
LKML-Reference: <49E42620.4050701@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 block/compat_ioctl.c         |  2 +-
 drivers/scsi/sg.c            |  1 +
 include/linux/blktrace_api.h | 24 +++++++++++++-----------
 kernel/trace/blktrace.c      | 29 +++++++++++++++++++++--------
 4 files changed, 36 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615dea46..f8c218cd08e 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
 	memcpy(&buts.name, &cbuts.name, 32);
 
 	mutex_lock(&bdev->bd_mutex);
-	ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
+	ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
 	mutex_unlock(&bdev->bd_mutex);
 	if (ret)
 		return ret;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82312df9b0b..49c98730bb8 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 		return blk_trace_setup(sdp->device->request_queue,
 				       sdp->disk->disk_name,
 				       MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
+				       NULL,
 				       (char *)arg);
 	case BLKTRACESTART:
 		return blk_trace_startstop(sdp->device->request_queue, 1);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d960889e92e..267edc4017e 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -165,8 +165,9 @@ struct blk_trace {
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern int do_blk_trace_setup(struct request_queue *q,
-	char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern int do_blk_trace_setup(struct request_queue *q, char *name,
+			      dev_t dev, struct block_device *bdev,
+			      struct blk_user_trace_setup *buts);
 extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
@@ -193,6 +194,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
 				void *data, size_t len);
 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+			   struct block_device *bdev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
@@ -200,15 +202,15 @@ extern int blk_trace_remove(struct request_queue *q);
 extern struct attribute_group blk_trace_attr_group;
 
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
-#define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
-#define blk_trace_shutdown(q)			do { } while (0)
-#define do_blk_trace_setup(q, name, dev, buts)	(-ENOTTY)
-#define blk_add_driver_data(q, rq, data, len)	do {} while (0)
-#define blk_trace_setup(q, name, dev, arg)	(-ENOTTY)
-#define blk_trace_startstop(q, start)		(-ENOTTY)
-#define blk_trace_remove(q)			(-ENOTTY)
-#define blk_add_trace_msg(q, fmt, ...)		do { } while (0)
-
+# define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
+# define blk_trace_shutdown(q)				do { } while (0)
+# define do_blk_trace_setup(q, name, dev, bdev, buts)	(-ENOTTY)
+# define blk_add_driver_data(q, rq, data, len)		do {} while (0)
+# define blk_trace_setup(q, name, dev, bdev, arg)	(-ENOTTY)
+# define blk_trace_startstop(q, start)			(-ENOTTY)
+# define blk_trace_remove(q)				(-ENOTTY)
+# define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2b98195b338..e932654cf59 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 {
 	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
 		return 1;
-	if (sector < bt->start_lba || sector > bt->end_lba)
+	if (sector && (sector < bt->start_lba || sector > bt->end_lba))
 		return 1;
 	if (bt->pid && pid != bt->pid)
 		return 1;
@@ -192,7 +192,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	what |= MASK_TC_BIT(rw, DISCARD);
 
 	pid = tsk->pid;
-	if (unlikely(act_log_check(bt, what, sector, pid)))
+	if (act_log_check(bt, what, sector, pid))
 		return;
 	cpu = raw_smp_processor_id();
 
@@ -407,11 +407,13 @@ static struct rchan_callbacks blk_relay_callbacks = {
  * Setup everything required to start tracing
  */
 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
-			struct blk_user_trace_setup *buts)
+		       struct block_device *bdev,
+		       struct blk_user_trace_setup *buts)
 {
 	struct blk_trace *old_bt, *bt = NULL;
 	struct dentry *dir = NULL;
 	int ret, i;
+	struct hd_struct *part = NULL;
 
 	if (!buts->buf_size || !buts->buf_nr)
 		return -EINVAL;
@@ -480,11 +482,21 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (!bt->act_mask)
 		bt->act_mask = (u16) -1;
 
-	bt->start_lba = buts->start_lba;
-	bt->end_lba = buts->end_lba;
-	if (!bt->end_lba)
+	if (bdev)
+		part = bdev->bd_part;
+
+	if (part) {
+		bt->start_lba = part->start_sect;
+		bt->end_lba = part->start_sect + part->nr_sects;
+	} else
 		bt->end_lba = -1ULL;
 
+	/* overwrite with user settings */
+	if (buts->start_lba)
+		bt->start_lba = buts->start_lba;
+	if (buts->end_lba)
+		bt->end_lba = buts->end_lba;
+
 	bt->pid = buts->pid;
 	bt->trace_state = Blktrace_setup;
 
@@ -505,6 +517,7 @@ err:
 }
 
 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+		    struct block_device *bdev,
 		    char __user *arg)
 {
 	struct blk_user_trace_setup buts;
@@ -514,7 +527,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (ret)
 		return -EFAULT;
 
-	ret = do_blk_trace_setup(q, name, dev, &buts);
+	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
 	if (ret)
 		return ret;
 
@@ -582,7 +595,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 	switch (cmd) {
 	case BLKTRACESETUP:
 		bdevname(bdev, b);
-		ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
+		ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
 	case BLKTRACESTART:
 		start = 1;
-- 
cgit v1.2.3-70-g09d2


From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 14 Apr 2009 14:00:05 +0800
Subject: blktrace: add trace/ to /sys/block/sda

Impact: allow ftrace-plugin blktrace to trace device-mapper devices

To trace a single partition:
  # echo 1 > /sys/block/sda/sda1/enable

To trace the whole sda instead:
  # echo 1 > /sys/block/sda/enable

Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace
can't be used to trace device-mapper devices.

Now:

  # echo 1 > /sys/block/dm-0/trace/enable
  echo: write error: No such device or address
  # mount -t ext4 /dev/dm-0 /mnt
  # echo 1 > /sys/block/dm-0/trace/enable
  # echo blk > /debug/tracing/current_tracer

Reported-by: Theodore Tso <tytso@mit.edu>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Shawn Du <duyuyang@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
LKML-Reference: <49E42665.6020506@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 block/blk-sysfs.c            | 7 ++++++-
 include/linux/blktrace_api.h | 6 ++++++
 kernel/trace/blktrace.c      | 5 +++++
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 73f36beff5c..8653d710b39 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -387,16 +387,21 @@ struct kobj_type blk_queue_ktype = {
 int blk_register_queue(struct gendisk *disk)
 {
 	int ret;
+	struct device *dev = disk_to_dev(disk);
 
 	struct request_queue *q = disk->queue;
 
 	if (WARN_ON(!q))
 		return -ENXIO;
 
+	ret = blk_trace_init_sysfs(dev);
+	if (ret)
+		return ret;
+
 	if (!q->request_fn)
 		return 0;
 
-	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
+	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
 			  "%s", "queue");
 	if (ret < 0)
 		return ret;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 267edc4017e..62763c95285 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
 
@@ -210,6 +211,11 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+static inline int blk_trace_init_sysfs(struct device *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #endif /* __KERNEL__ */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d1098988052..8e7c5da3a3e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1644,3 +1644,8 @@ out:
 	return ret ? ret : count;
 }
 
+int blk_trace_init_sysfs(struct device *dev)
+{
+	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
+}
+
-- 
cgit v1.2.3-70-g09d2


From 76620aafd66f0004829764940c5466144969cffc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 16 Apr 2009 02:02:07 -0700
Subject: gro: New frags interface to avoid copying shinfo

It turns out that copying a 16-byte area at ~800k times a second
can be really expensive :) This patch redesigns the frags GRO
interface to avoid copying that area twice.

The two disciples of the frags interface have been converted.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cxgb3/adapter.h |  2 +-
 drivers/net/cxgb3/sge.c     | 53 +++++++++++++++++------------
 drivers/net/sfc/rx.c        | 26 ++++++++++-----
 include/linux/if_vlan.h     |  6 ++--
 include/linux/netdevice.h   | 22 ++++++------
 net/8021q/vlan_core.c       |  4 +--
 net/core/dev.c              | 81 ++++++++++++++++++++-------------------------
 7 files changed, 101 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 714df2b675e..322434ac42f 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -195,7 +195,7 @@ struct sge_qset {		/* an SGE queue set */
 	struct sge_rspq rspq;
 	struct sge_fl fl[SGE_RXQ_PER_SET];
 	struct sge_txq txq[SGE_TXQ_PER_SET];
-	struct napi_gro_fraginfo lro_frag_tbl;
+	int nomem;
 	int lro_enabled;
 	void *lro_va;
 	struct net_device *netdev;
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 26d3587f339..73d569e758e 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -654,7 +654,8 @@ static void t3_reset_qset(struct sge_qset *q)
 	q->txq_stopped = 0;
 	q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 	q->rx_reclaim_timer.function = NULL;
-	q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
+	q->nomem = 0;
+	napi_free_frags(&q->napi);
 }
 
 
@@ -2074,20 +2075,19 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 			 struct sge_fl *fl, int len, int complete)
 {
 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
+	struct sk_buff *skb = NULL;
 	struct cpl_rx_pkt *cpl;
-	struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
-	int nr_frags = qs->lro_frag_tbl.nr_frags;
-	int frag_len = qs->lro_frag_tbl.len;
+	struct skb_frag_struct *rx_frag;
+	int nr_frags;
 	int offset = 0;
 
-	if (!nr_frags) {
-		offset = 2 + sizeof(struct cpl_rx_pkt);
-		qs->lro_va = cpl = sd->pg_chunk.va + 2;
+	if (!qs->nomem) {
+		skb = napi_get_frags(&qs->napi);
+		qs->nomem = !skb;
 	}
 
 	fl->credits--;
 
-	len -= offset;
 	pci_dma_sync_single_for_cpu(adap->pdev,
 				    pci_unmap_addr(sd, dma_addr),
 				    fl->buf_size - SGE_PG_RSVD,
@@ -2100,21 +2100,38 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 			       fl->alloc_size,
 			       PCI_DMA_FROMDEVICE);
 
+	if (!skb) {
+		put_page(sd->pg_chunk.page);
+		if (complete)
+			qs->nomem = 0;
+		return;
+	}
+
+	rx_frag = skb_shinfo(skb)->frags;
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	if (!nr_frags) {
+		offset = 2 + sizeof(struct cpl_rx_pkt);
+		qs->lro_va = sd->pg_chunk.va + 2;
+	}
+	len -= offset;
+
 	prefetch(qs->lro_va);
 
 	rx_frag += nr_frags;
 	rx_frag->page = sd->pg_chunk.page;
 	rx_frag->page_offset = sd->pg_chunk.offset + offset;
 	rx_frag->size = len;
-	frag_len += len;
-	qs->lro_frag_tbl.nr_frags++;
-	qs->lro_frag_tbl.len = frag_len;
 
+	skb->len += len;
+	skb->data_len += len;
+	skb->truesize += len;
+	skb_shinfo(skb)->nr_frags++;
 
 	if (!complete)
 		return;
 
-	qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	cpl = qs->lro_va;
 
 	if (unlikely(cpl->vlan_valid)) {
@@ -2123,15 +2140,11 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 		struct vlan_group *grp = pi->vlan_grp;
 
 		if (likely(grp != NULL)) {
-			vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
-				       &qs->lro_frag_tbl);
-			goto out;
+			vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
+			return;
 		}
 	}
-	napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
-
-out:
-	qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
+	napi_gro_frags(&qs->napi);
 }
 
 /**
@@ -2300,8 +2313,6 @@ no_mem:
 			if (fl->use_pages) {
 				void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
 
-				prefetch(&qs->lro_frag_tbl);
-
 				prefetch(addr);
 #if L1_CACHE_BYTES < 128
 				prefetch(addr + L1_CACHE_BYTES);
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 66d7fe3db3e..01f9432c31e 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -450,17 +450,27 @@ static void efx_rx_packet_lro(struct efx_channel *channel,
 
 	/* Pass the skb/page into the LRO engine */
 	if (rx_buf->page) {
-		struct napi_gro_fraginfo info;
+		struct sk_buff *skb = napi_get_frags(napi);
 
-		info.frags[0].page = rx_buf->page;
-		info.frags[0].page_offset = efx_rx_buf_offset(rx_buf);
-		info.frags[0].size = rx_buf->len;
-		info.nr_frags = 1;
-		info.ip_summed = CHECKSUM_UNNECESSARY;
-		info.len = rx_buf->len;
+		if (!skb) {
+			put_page(rx_buf->page);
+			goto out;
+		}
+
+		skb_shinfo(skb)->frags[0].page = rx_buf->page;
+		skb_shinfo(skb)->frags[0].page_offset =
+			efx_rx_buf_offset(rx_buf);
+		skb_shinfo(skb)->frags[0].size = rx_buf->len;
+		skb_shinfo(skb)->nr_frags = 1;
+
+		skb->len = rx_buf->len;
+		skb->data_len = rx_buf->len;
+		skb->truesize += rx_buf->len;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		napi_gro_frags(napi, &info);
+		napi_gro_frags(napi);
 
+out:
 		EFX_BUG_ON_PARANOID(rx_buf->skb);
 		rx_buf->page = NULL;
 	} else {
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index e1ff5b14310..7ff9af1d0f0 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -118,8 +118,7 @@ extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
 extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 			    unsigned int vlan_tci, struct sk_buff *skb);
 extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-			  unsigned int vlan_tci,
-			  struct napi_gro_fraginfo *info);
+			  unsigned int vlan_tci);
 
 #else
 static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -154,8 +153,7 @@ static inline int vlan_gro_receive(struct napi_struct *napi,
 }
 
 static inline int vlan_gro_frags(struct napi_struct *napi,
-				 struct vlan_group *grp, unsigned int vlan_tci,
-				 struct napi_gro_fraginfo *info)
+				 struct vlan_group *grp, unsigned int vlan_tci)
 {
 	return NET_RX_DROP;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e7783f4a75..54db3ebf219 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1047,14 +1047,6 @@ struct packet_type {
 	struct list_head	list;
 };
 
-struct napi_gro_fraginfo {
-	skb_frag_t frags[MAX_SKB_FRAGS];
-	unsigned int nr_frags;
-	unsigned int ip_summed;
-	unsigned int len;
-	__wsum csum;
-};
-
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 
@@ -1442,12 +1434,18 @@ extern int		napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_reuse_skb(struct napi_struct *napi,
 				       struct sk_buff *skb);
-extern struct sk_buff *	napi_fraginfo_skb(struct napi_struct *napi,
-					  struct napi_gro_fraginfo *info);
+extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
 extern int		napi_frags_finish(struct napi_struct *napi,
 					  struct sk_buff *skb, int ret);
-extern int		napi_gro_frags(struct napi_struct *napi,
-				       struct napi_gro_fraginfo *info);
+extern struct sk_buff *	napi_frags_skb(struct napi_struct *napi);
+extern int		napi_gro_frags(struct napi_struct *napi);
+
+static inline void napi_free_frags(struct napi_struct *napi)
+{
+	kfree_skb(napi->skb);
+	napi->skb = NULL;
+}
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 654e45f5719..c1f51e4a01b 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -114,9 +114,9 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 EXPORT_SYMBOL(vlan_gro_receive);
 
 int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
-		   unsigned int vlan_tci, struct napi_gro_fraginfo *info)
+		   unsigned int vlan_tci)
 {
-	struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
 		return NET_RX_DROP;
diff --git a/net/core/dev.c b/net/core/dev.c
index 91d792d17e0..619fa141b8f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2519,16 +2519,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(napi_reuse_skb);
 
-struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
-				  struct napi_gro_fraginfo *info)
+struct sk_buff *napi_get_frags(struct napi_struct *napi)
 {
 	struct net_device *dev = napi->dev;
 	struct sk_buff *skb = napi->skb;
-	struct ethhdr *eth;
-	skb_frag_t *frag;
-	int i;
-
-	napi->skb = NULL;
 
 	if (!skb) {
 		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
@@ -2536,47 +2530,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 			goto out;
 
 		skb_reserve(skb, NET_IP_ALIGN);
-	}
-
-	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
-	frag = &info->frags[info->nr_frags - 1];
 
-	for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
-		skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
-				   frag->size);
-		frag++;
+		napi->skb = skb;
 	}
-	skb_shinfo(skb)->nr_frags = info->nr_frags;
-
-	skb->data_len = info->len;
-	skb->len += info->len;
-	skb->truesize += info->len;
-
-	skb_reset_mac_header(skb);
-	skb_gro_reset_offset(skb);
-
-	eth = skb_gro_header(skb, sizeof(*eth));
-	if (!eth) {
-		napi_reuse_skb(napi, skb);
-		skb = NULL;
-		goto out;
-	}
-
-	skb_gro_pull(skb, sizeof(*eth));
-
-	/*
-	 * This works because the only protocols we care about don't require
-	 * special handling.  We'll fix it up properly at the end.
-	 */
-	skb->protocol = eth->h_proto;
-
-	skb->ip_summed = info->ip_summed;
-	skb->csum = info->csum;
 
 out:
 	return skb;
 }
-EXPORT_SYMBOL(napi_fraginfo_skb);
+EXPORT_SYMBOL(napi_get_frags);
 
 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
 {
@@ -2606,9 +2567,39 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
 }
 EXPORT_SYMBOL(napi_frags_finish);
 
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+struct sk_buff *napi_frags_skb(struct napi_struct *napi)
+{
+	struct sk_buff *skb = napi->skb;
+	struct ethhdr *eth;
+
+	napi->skb = NULL;
+
+	skb_reset_mac_header(skb);
+	skb_gro_reset_offset(skb);
+
+	eth = skb_gro_header(skb, sizeof(*eth));
+	if (!eth) {
+		napi_reuse_skb(napi, skb);
+		skb = NULL;
+		goto out;
+	}
+
+	skb_gro_pull(skb, sizeof(*eth));
+
+	/*
+	 * This works because the only protocols we care about don't require
+	 * special handling.  We'll fix it up properly at the end.
+	 */
+	skb->protocol = eth->h_proto;
+
+out:
+	return skb;
+}
+EXPORT_SYMBOL(napi_frags_skb);
+
+int napi_gro_frags(struct napi_struct *napi)
 {
-	struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
 		return NET_RX_DROP;
@@ -2712,7 +2703,7 @@ void netif_napi_del(struct napi_struct *napi)
 	struct sk_buff *skb, *next;
 
 	list_del_init(&napi->dev_list);
-	kfree_skb(napi->skb);
+	napi_free_frags(napi);
 
 	for (skb = napi->gro_list; skb; skb = next) {
 		next = skb->next;
-- 
cgit v1.2.3-70-g09d2


From 93eb677d74a4f7d3edfb678c94f6c0544d9fbad2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 15 Apr 2009 13:24:06 -0400
Subject: ftrace: use module notifier for function tracer

The hooks in the module code for the function tracer must be called
before any of that module code runs. The function tracer hooks
modify the module (replacing calls to mcount to nops). If the code
is executed while the change occurs, then the CPU can take a GPF.

To handle the above with a bit of paranoia, I originally implemented
the hooks as calls directly from the module code.

After examining the notifier calls, it looks as though the start up
notify is called before any of the module's code is executed. This makes
the use of the notify safe with ftrace.

Only the startup notify is required to be "safe". The shutdown simply
removes the entries from the ftrace function list, and does not modify
any code.

This change has another benefit. It removes a issue with a reverse dependency
in the mutexes of ftrace_lock and module_mutex.

[ Impact: fix lock dependency bug, cleanup ]

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  7 ----
 include/linux/module.h |  4 +++
 kernel/module.c        | 19 ++++-------
 kernel/trace/ftrace.c  | 90 +++++++++++++++++++++++++++++++++++---------------
 4 files changed, 75 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 53869bef610..97c83e1bc58 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
 
-extern void ftrace_release(void *start, unsigned long size);
-
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
 #else
@@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-extern void ftrace_init_module(struct module *mod,
-			       unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
-static inline void
-ftrace_init_module(struct module *mod,
-		   unsigned long *start, unsigned long *end) { }
 #endif
 
 /*
diff --git a/include/linux/module.h b/include/linux/module.h
index 6155fa44168..a8f2c0aa4c3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -341,6 +341,10 @@ struct module
 	struct ftrace_event_call *trace_events;
 	unsigned int num_trace_events;
 #endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	unsigned long *ftrace_callsites;
+	unsigned int num_ftrace_callsites;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
diff --git a/kernel/module.c b/kernel/module.c
index a0394706f10..2383e60fcf3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1490,9 +1490,6 @@ static void free_module(struct module *mod)
 	/* Free any allocated parameters. */
 	destroy_params(mod->kp, mod->num_kp);
 
-	/* release any pointers to mcount in this module */
-	ftrace_release(mod->module_core, mod->core_size);
-
 	/* This may be NULL, but that's OK */
 	module_free(mod, mod->module_init);
 	kfree(mod->args);
@@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
 	unsigned int symindex = 0;
 	unsigned int strindex = 0;
 	unsigned int modindex, versindex, infoindex, pcpuindex;
-	unsigned int num_mcount;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
-	unsigned long *mseg;
 	mm_segment_t old_fs;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod,
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
 #endif
-
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+	/* sechdrs[0].sh_size is always zero */
+	mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
+					     "__mcount_loc",
+					     sizeof(*mod->ftrace_callsites),
+					     &mod->num_ftrace_callsites);
+#endif
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
 	    || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
 			dynamic_debug_setup(debug, num_debug);
 	}
 
-	/* sechdrs[0].sh_size is always zero */
-	mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
-			    sizeof(*mseg), &num_mcount);
-	ftrace_init_module(mod, mseg, mseg + num_mcount);
-
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
 	kobject_del(&mod->mkobj.kobj);
 	kobject_put(&mod->mkobj.kobj);
-	ftrace_release(mod->module_core, mod->core_size);
  free_unload:
 	module_unload_free(mod);
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a2348898858..5b606f45b6c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
 	rec->flags |= FTRACE_FL_FREE;
 }
 
-void ftrace_release(void *start, unsigned long size)
-{
-	struct dyn_ftrace *rec;
-	struct ftrace_page *pg;
-	unsigned long s = (unsigned long)start;
-	unsigned long e = s + size;
-
-	if (ftrace_disabled || !start)
-		return;
-
-	mutex_lock(&ftrace_lock);
-	do_for_each_ftrace_rec(pg, rec) {
-		if ((rec->ip >= s) && (rec->ip < e)) {
-			/*
-			 * rec->ip is changed in ftrace_free_rec()
-			 * It should not between s and e if record was freed.
-			 */
-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-			ftrace_free_rec(rec);
-		}
-	} while_for_each_ftrace_rec();
-	mutex_unlock(&ftrace_lock);
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
 	struct dyn_ftrace *rec;
@@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod,
 	return 0;
 }
 
-void ftrace_init_module(struct module *mod,
-			unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+	unsigned long s = (unsigned long)start;
+	unsigned long e = (unsigned long)end;
+
+	if (ftrace_disabled || !start || start == end)
+		return;
+
+	mutex_lock(&ftrace_lock);
+	do_for_each_ftrace_rec(pg, rec) {
+		if ((rec->ip >= s) && (rec->ip < e)) {
+			/*
+			 * rec->ip is changed in ftrace_free_rec()
+			 * It should not between s and e if record was freed.
+			 */
+			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+			ftrace_free_rec(rec);
+		}
+	} while_for_each_ftrace_rec();
+	mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+			       unsigned long *start, unsigned long *end)
 {
 	if (ftrace_disabled || start == end)
 		return;
 	ftrace_convert_nops(mod, start, end);
 }
 
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		ftrace_init_module(mod, mod->ftrace_callsites,
+				   mod->ftrace_callsites +
+				   mod->num_ftrace_callsites);
+		break;
+	case MODULE_STATE_GOING:
+		ftrace_release(mod->ftrace_callsites,
+			       mod->ftrace_callsites +
+			       mod->num_ftrace_callsites);
+		break;
+	}
+
+	return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+				unsigned long val, void *data)
+{
+	return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+	.notifier_call = ftrace_module_notify,
+	.priority = 0,
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -2791,6 +2825,10 @@ void __init ftrace_init(void)
 				  __start_mcount_loc,
 				  __stop_mcount_loc);
 
+	ret = register_module_notifier(&ftrace_module_nb);
+	if (!ret)
+		pr_warning("Failed to register trace ftrace module notifier\n");
+
 	return;
  failed:
 	ftrace_disabled = 1;
-- 
cgit v1.2.3-70-g09d2


From d1b182a8d49ed6416325b4e0a1cb0f17cd4e702a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 15 Apr 2009 16:53:47 -0400
Subject: tracing/events/ring-buffer: expose format of ring buffer headers to
 users

Currently, every thing needed to read the binary output from the
ring buffers is available, with the exception of the way the ring
buffers handles itself internally.

This patch creates two special files in the debugfs/tracing/events
directory:

 # cat /debug/tracing/events/header_page
        field: u64 timestamp;   offset:0;       size:8;
        field: local_t commit;  offset:8;       size:8;
        field: char data;       offset:16;      size:4080;

 # cat /debug/tracing/events/header_event
        type        :    2 bits
        len         :    3 bits
        time_delta  :   27 bits
        array       :   32 bits

        padding     : type == 0
        time_extend : type == 1
        data        : type == 3

This is to allow a userspace app to see if the ring buffer format changes
or not.

[ Impact: allow userspace apps to know of ringbuffer format changes ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  5 +++++
 kernel/trace/ring_buffer.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_events.c | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index f0aa486d131..fac8f1ac6f4 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -166,6 +166,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
 			  size_t len, int cpu, int full);
 
+struct trace_seq;
+
+int ring_buffer_print_entry_header(struct trace_seq *s);
+int ring_buffer_print_page_header(struct trace_seq *s);
+
 enum ring_buffer_flags {
 	RB_FL_OVERWRITE		= 1 << 0,
 };
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f935bd5ec3e..84a6055f37c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -21,6 +21,28 @@
 
 #include "trace.h"
 
+/*
+ * The ring buffer header is special. We must manually up keep it.
+ */
+int ring_buffer_print_entry_header(struct trace_seq *s)
+{
+	int ret;
+
+	ret = trace_seq_printf(s, "\ttype        :    2 bits\n");
+	ret = trace_seq_printf(s, "\tlen         :    3 bits\n");
+	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
+	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
+	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
+			       RINGBUF_TYPE_PADDING);
+	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
+			       RINGBUF_TYPE_TIME_EXTEND);
+	ret = trace_seq_printf(s, "\tdata        : type == %d\n",
+			       RINGBUF_TYPE_DATA);
+
+	return ret;
+}
+
 /*
  * The ring buffer is made up of a list of pages. A separate list of pages is
  * allocated for each CPU. A writer may only write to a buffer that is
@@ -340,6 +362,28 @@ static inline int test_time_stamp(u64 delta)
 
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
 
+int ring_buffer_print_page_header(struct trace_seq *s)
+{
+	struct buffer_data_page field;
+	int ret;
+
+	ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
+			       "offset:0;\tsize:%u;\n",
+			       (unsigned int)sizeof(field.time_stamp));
+
+	ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), commit),
+			       (unsigned int)sizeof(field.commit));
+
+	ret = trace_seq_printf(s, "\tfield: char data;\t"
+			       "offset:%u;\tsize:%u;\n",
+			       (unsigned int)offsetof(typeof(field), data),
+			       (unsigned int)BUF_PAGE_SIZE);
+
+	return ret;
+}
+
 /*
  * head_page == tail_page && head == tail then buffer is empty.
  */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f81d6eec4e4..7163a2bb021 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -610,6 +610,30 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 	return cnt;
 }
 
+static ssize_t
+show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	int (*func)(struct trace_seq *s) = filp->private_data;
+	struct trace_seq *s;
+	int r;
+
+	if (*ppos)
+		return 0;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	trace_seq_init(s);
+
+	func(s);
+	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return r;
+}
+
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
 	.next = t_next,
@@ -667,6 +691,11 @@ static const struct file_operations ftrace_subsystem_filter_fops = {
 	.write = subsystem_filter_write,
 };
 
+static const struct file_operations ftrace_show_header_fops = {
+	.open = tracing_open_generic,
+	.read = show_header,
+};
+
 static struct dentry *event_trace_events_dir(void)
 {
 	static struct dentry *d_tracer;
@@ -909,6 +938,15 @@ static __init int event_trace_init(void)
 	if (!d_events)
 		return 0;
 
+	/* ring buffer internal formats */
+	trace_create_file("header_page", 0444, d_events,
+			  ring_buffer_print_page_header,
+			  &ftrace_show_header_fops);
+
+	trace_create_file("header_event", 0444, d_events,
+			  ring_buffer_print_entry_header,
+			  &ftrace_show_header_fops);
+
 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
 		/* The linker may leave blanks */
 		if (!call->name)
-- 
cgit v1.2.3-70-g09d2


From 76aa81118ddfbb3dc31533030cf3ec329dd067a6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 16 Apr 2009 23:35:39 -0700
Subject: tracing: avoid warnings from zero-arg tracepoints

Tracepoints with no arguments can issue two warnings:

	"field" defined by not used
	"ret" is uninitialized in this function

Mark field as being OK to leave unused, and initialize ret.

[ Impact: fix false positive compiler warnings. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: mathieu.desnoyers@polymtl.ca
LKML-Reference: <1239950139-1119-5-git-send-email-jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 60c5323bee6..39a3351f2e7 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -160,8 +160,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 static int								\
 ftrace_format_##call(struct trace_seq *s)				\
 {									\
-	struct ftrace_raw_##call field;					\
-	int ret;							\
+	struct ftrace_raw_##call field __attribute__((unused));		\
+	int ret = 0;							\
 									\
 	tstruct;							\
 									\
-- 
cgit v1.2.3-70-g09d2


From 46de405f25f1d9fa73b657ffbb752aa0cc87a91d Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 17 Apr 2009 10:53:43 +0800
Subject: tracing: Remove include/trace/kmem_event_types.h

kmem_event_types.h is no longer necessary since tracepoint definitions
are put into include/trace/events/kmem.h

[ Impact: remove now-unused file. ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49E7EF37.2080205@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/kmem_event_types.h | 193 ---------------------------------------
 1 file changed, 193 deletions(-)
 delete mode 100644 include/trace/kmem_event_types.h

(limited to 'include')

diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h
deleted file mode 100644
index 4ff420fe467..00000000000
--- a/include/trace/kmem_event_types.h
+++ /dev/null
@@ -1,193 +0,0 @@
-
-/* use <trace/kmem.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kmem
-
-TRACE_EVENT(kmalloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmem_cache_alloc,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags)
-);
-
-TRACE_EVENT(kmalloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kmem_cache_alloc_node,
-
-	TP_PROTO(unsigned long call_site,
-		 const void *ptr,
-		 size_t bytes_req,
-		 size_t bytes_alloc,
-		 gfp_t gfp_flags,
-		 int node),
-
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-		__field(	size_t,		bytes_req	)
-		__field(	size_t,		bytes_alloc	)
-		__field(	gfp_t,		gfp_flags	)
-		__field(	int,		node		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-		__entry->bytes_req	= bytes_req;
-		__entry->bytes_alloc	= bytes_alloc;
-		__entry->gfp_flags	= gfp_flags;
-		__entry->node		= node;
-	),
-
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
-		__entry->call_site,
-		__entry->ptr,
-		__entry->bytes_req,
-		__entry->bytes_alloc,
-		__entry->gfp_flags,
-		__entry->node)
-);
-
-TRACE_EVENT(kfree,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-
-TRACE_EVENT(kmem_cache_free,
-
-	TP_PROTO(unsigned long call_site, const void *ptr),
-
-	TP_ARGS(call_site, ptr),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	call_site	)
-		__field(	const void *,	ptr		)
-	),
-
-	TP_fast_assign(
-		__entry->call_site	= call_site;
-		__entry->ptr		= ptr;
-	),
-
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
-);
-
-#undef TRACE_SYSTEM
-- 
cgit v1.2.3-70-g09d2


From b0afdc126d0515e76890f0a5f26b28501cfa298e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 17 Apr 2009 13:02:22 -0400
Subject: tracing/events: enable code with EVENT_TRACING not EVENT_TRACER

The CONFIG_EVENT_TRACER is the way to turn on event tracing when no
other tracing has been configured. All code to get enabled should
depend on CONFIG_EVENT_TRACING. That is what is enabled when TRACING
(or CONFIG_EVENT_TRACER) is selected.

This patch enables the include/trace/ftrace.h file when
CONFIG_EVENT_TRACING is enabled.

[ Impact: fix warning in event tracer selftest ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/define_trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 18869417109..7f1f23d601e 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -56,7 +56,7 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #include <trace/ftrace.h>
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 261842b7c9099f56de2eb969c8ad65402d68e00e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 16 Apr 2009 21:41:52 -0400
Subject: tracing: add same level recursion detection

The tracing infrastructure allows for recursion. That is, an interrupt
may interrupt the act of tracing an event, and that interrupt may very well
perform its own trace. This is a recursive trace, and is fine to do.

The problem arises when there is a bug, and the utility doing the trace
calls something that recurses back into the tracer. This recursion is not
caused by an external event like an interrupt, but by code that is not
expected to recurse. The result could be a lockup.

This patch adds a bitmask to the task structure that keeps track
of the trace recursion. To find the interrupt depth, the following
algorithm is used:

  level = hardirq_count() + softirq_count() + in_nmi;

Here, level will be the depth of interrutps and softirqs, and even handles
the nmi. Then the corresponding bit is set in the recursion bitmask.
If the bit was already set, we know we had a recursion at the same level
and we warn about it and fail the writing to the buffer.

After the data has been committed to the buffer, we clear the bit.
No atomics are needed. The only races are with interrupts and they reset
the bitmask before returning anywy.

[ Impact: detect same irq level trace recursion ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h     |  7 +++++++
 include/linux/init_task.h  |  1 +
 include/linux/sched.h      |  4 +++-
 kernel/trace/ring_buffer.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 53 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 97c83e1bc58..39b95c56587 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -488,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
 
 extern int ftrace_dump_on_oops;
 
+#ifdef CONFIG_PREEMPT
+#define INIT_TRACE_RECURSION		.trace_recursion = 0,
+#endif
+
 #endif /* CONFIG_TRACING */
 
+#ifndef INIT_TRACE_RECURSION
+#define INIT_TRACE_RECURSION
+#endif
 
 #ifdef CONFIG_HW_BRANCH_TRACER
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfb93337e9..6fc21852986 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -187,6 +187,7 @@ extern struct cred init_cred;
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
+	INIT_TRACE_RECURSION						\
 }
 
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049..7ede5e49091 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1428,7 +1428,9 @@ struct task_struct {
 #ifdef CONFIG_TRACING
 	/* state flags for use by tracers */
 	unsigned long trace;
-#endif
+	/* bitmask of trace recursion */
+	unsigned long trace_recursion;
+#endif /* CONFIG_TRACING */
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 84a6055f37c..b421b0ea911 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1481,6 +1481,40 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 	return event;
 }
 
+static int trace_irq_level(void)
+{
+	return hardirq_count() + softirq_count() + in_nmi();
+}
+
+static int trace_recursive_lock(void)
+{
+	int level;
+
+	level = trace_irq_level();
+
+	if (unlikely(current->trace_recursion & (1 << level))) {
+		/* Disable all tracing before we do anything else */
+		tracing_off_permanent();
+		WARN_ON_ONCE(1);
+		return -1;
+	}
+
+	current->trace_recursion |= 1 << level;
+
+	return 0;
+}
+
+static void trace_recursive_unlock(void)
+{
+	int level;
+
+	level = trace_irq_level();
+
+	WARN_ON_ONCE(!current->trace_recursion & (1 << level));
+
+	current->trace_recursion &= ~(1 << level);
+}
+
 static DEFINE_PER_CPU(int, rb_need_resched);
 
 /**
@@ -1514,6 +1548,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	/* If we are tracing schedule, we don't want to recurse */
 	resched = ftrace_preempt_disable();
 
+	if (trace_recursive_lock())
+		goto out_nocheck;
+
 	cpu = raw_smp_processor_id();
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1543,6 +1580,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	return event;
 
  out:
+	trace_recursive_unlock();
+
+ out_nocheck:
 	ftrace_preempt_enable(resched);
 	return NULL;
 }
@@ -1581,6 +1621,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
 	rb_commit(cpu_buffer, event);
 
+	trace_recursive_unlock();
+
 	/*
 	 * Only the last preempt count needs to restore preemption.
 	 */
-- 
cgit v1.2.3-70-g09d2


From bd3ce6556072bdc8ea66dfd5448e184f189bdc7f Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 17 Apr 2009 20:12:35 -0700
Subject: Input: rotary_encoder - add support for REL_* axes

The rotary encoder driver only supports returning input events
for ABS_* axes, this adds support for REL_* axes.  The relative
axis input event is reported as -1 for each counter-clockwise
step and +1 for each clockwise step.

The ability to clamp the position of ABS_* axes between 0 and
a maximum of "steps" has also been added.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/input/rotary-encoder.txt |  9 ++++-
 drivers/input/misc/rotary_encoder.c    | 61 +++++++++++++++++++++++-----------
 include/linux/rotary_encoder.h         |  2 ++
 3 files changed, 51 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
index 435102a26d9..3a6aec40c0b 100644
--- a/Documentation/input/rotary-encoder.txt
+++ b/Documentation/input/rotary-encoder.txt
@@ -67,7 +67,12 @@ data with it.
 struct rotary_encoder_platform_data is declared in
 include/linux/rotary-encoder.h and needs to be filled with the number of
 steps the encoder has and can carry information about externally inverted
-signals (because of used invertig buffer or other reasons).
+signals (because of an inverting buffer or other reasons). The encoder
+can be set up to deliver input information as either an absolute or relative
+axes. For relative axes the input event returns +/-1 for each step. For
+absolute axes the position of the encoder can either roll over between zero
+and the number of steps or will clamp at the maximum and zero depending on
+the configuration.
 
 Because GPIO to IRQ mapping is platform specific, this information must
 be given in seperately to the driver. See the example below.
@@ -85,6 +90,8 @@ be given in seperately to the driver. See the example below.
 static struct rotary_encoder_platform_data my_rotary_encoder_info = {
 	.steps		= 24,
 	.axis		= ABS_X,
+	.relative_axis	= false,
+	.rollover	= false,
 	.gpio_a		= GPIO_ROTARY_A,
 	.gpio_b		= GPIO_ROTARY_B,
 	.inverted_a	= 0,
diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c
index 5bb3ab51b8c..c806fbf1e17 100644
--- a/drivers/input/misc/rotary_encoder.c
+++ b/drivers/input/misc/rotary_encoder.c
@@ -26,13 +26,17 @@
 #define DRV_NAME "rotary-encoder"
 
 struct rotary_encoder {
-	unsigned int irq_a;
-	unsigned int irq_b;
-	unsigned int pos;
-	unsigned int armed;
-	unsigned int dir;
 	struct input_dev *input;
 	struct rotary_encoder_platform_data *pdata;
+
+	unsigned int axis;
+	unsigned int pos;
+
+	unsigned int irq_a;
+	unsigned int irq_b;
+
+	bool armed;
+	unsigned char dir;	/* 0 - clockwise, 1 - CCW */
 };
 
 static irqreturn_t rotary_encoder_irq(int irq, void *dev_id)
@@ -53,21 +57,32 @@ static irqreturn_t rotary_encoder_irq(int irq, void *dev_id)
 		if (!encoder->armed)
 			break;
 
-		if (encoder->dir) {
-			/* turning counter-clockwise */
-			encoder->pos += pdata->steps;
-			encoder->pos--;
-			encoder->pos %= pdata->steps;
+		if (pdata->relative_axis) {
+			input_report_rel(encoder->input, pdata->axis,
+					 encoder->dir ? -1 : 1);
 		} else {
-			/* turning clockwise */
-			encoder->pos++;
-			encoder->pos %= pdata->steps;
+			unsigned int pos = encoder->pos;
+
+			if (encoder->dir) {
+				/* turning counter-clockwise */
+				if (pdata->rollover)
+					pos += pdata->steps;
+				if (pos)
+					pos--;
+			} else {
+				/* turning clockwise */
+				if (pdata->rollover || pos < pdata->steps)
+					pos++;
+			}
+			if (pdata->rollover)
+				pos %= pdata->steps;
+			encoder->pos = pos;
+			input_report_abs(encoder->input, pdata->axis,
+					 encoder->pos);
 		}
-
-		input_report_abs(encoder->input, pdata->axis, encoder->pos);
 		input_sync(encoder->input);
 
-		encoder->armed = 0;
+		encoder->armed = false;
 		break;
 
 	case 0x1:
@@ -77,7 +92,7 @@ static irqreturn_t rotary_encoder_irq(int irq, void *dev_id)
 		break;
 
 	case 0x3:
-		encoder->armed = 1;
+		encoder->armed = true;
 		break;
 	}
 
@@ -113,9 +128,15 @@ static int __devinit rotary_encoder_probe(struct platform_device *pdev)
 	input->name = pdev->name;
 	input->id.bustype = BUS_HOST;
 	input->dev.parent = &pdev->dev;
-	input->evbit[0] = BIT_MASK(EV_ABS);
-	input_set_abs_params(encoder->input,
-			     pdata->axis, 0, pdata->steps, 0, 1);
+
+	if (pdata->relative_axis) {
+		input->evbit[0] = BIT_MASK(EV_REL);
+		input->relbit[0] = BIT_MASK(pdata->axis);
+	} else {
+		input->evbit[0] = BIT_MASK(EV_ABS);
+		input_set_abs_params(encoder->input,
+				     pdata->axis, 0, pdata->steps, 0, 1);
+	}
 
 	err = input_register_device(input);
 	if (err) {
diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h
index 12d63a30c34..215278b8df2 100644
--- a/include/linux/rotary_encoder.h
+++ b/include/linux/rotary_encoder.h
@@ -8,6 +8,8 @@ struct rotary_encoder_platform_data {
 	unsigned int gpio_b;
 	unsigned int inverted_a;
 	unsigned int inverted_b;
+	bool relative_axis;
+	bool rollover;
 };
 
 #endif /* __ROTARY_ENCODER_H__ */
-- 
cgit v1.2.3-70-g09d2


From 8e668b5b3455207e4540fc7ccab9ecf70142f288 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 17 Apr 2009 17:17:55 -0400
Subject: tracing: remove format attribute of inline function

Due to a cut and paste error, I added the gcc attribute for printf
format to the static inline stub of trace_seq_printf.

This will cause a compile failure.

[ Impact: fix compiler error when CONFIG_TRACING is off ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: =?ISO-8859-15?Q?Fr=E9d=E9ric_Weisbecker?= <fweisbec@gmail.com>
LKML-Reference: <alpine.DEB.2.00.0904171717080.1016@gandalf.stny.rr.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/trace_seq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 15ca2c71af1..37db9bdfbc1 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -42,7 +42,6 @@ extern int trace_seq_path(struct trace_seq *s, struct path *path);
 
 #else /* CONFIG_TRACING */
 static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)))
 {
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 46a802e852c2106d755f697819ea80cd3ffdc222 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide kill unused ide_cmd->special

Impact: removal of unused field

No one uses ide_cmd->special anymore.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ide.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index ff65fffb078..846a1e13240 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -324,7 +324,6 @@ struct ide_cmd {
 	unsigned int		cursg_ofs;
 
 	struct request		*rq;		/* copy of request */
-	void			*special;	/* valid_t generally */
 };
 
 /* ATAPI packet command flags */
-- 
cgit v1.2.3-70-g09d2


From a1df5169f9bf08f6067029bfb840a05e282b1b97 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide: add helpers for preparing sense requests

This is in preparation of removing the queueing of a sense request out
of the IRQ handler path.

Use struct request_sense as a general sense buffer for all ATAPI
devices ide-{floppy,tape,cd}.

tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as
      it can cause deadlock.  Converted to use inline struct request
      and blk_rq_init().

    * Added xfer / cdb len selection depending on device type.

    * All sense prep logics folded into ide_prep_sense() which never
      fails.

    * hwif->rq clearing and sense_rq used handling moved into
      ide_queue_sense_rq().

    * blk_rq_map_kern() conversion is moved to later patch.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ide.h     | 11 +++++++++
 2 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2894577237b..c6e03485a63 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+	struct request_sense *sense = &drive->sense_data;
+	struct request *sense_rq = &drive->sense_rq;
+	unsigned int cmd_len, sense_len;
+
+	debug_log("%s: enter\n", __func__);
+
+	switch (drive->media) {
+	case ide_floppy:
+		cmd_len = 255;
+		sense_len = 18;
+		break;
+	case ide_tape:
+		cmd_len = 20;
+		sense_len = 20;
+		break;
+	default:
+		cmd_len = 18;
+		sense_len = 18;
+	}
+
+	BUG_ON(sense_len > sizeof(*sense));
+
+	if (blk_sense_request(rq) || drive->sense_rq_armed)
+		return;
+
+	memset(sense, 0, sizeof(*sense));
+
+	blk_rq_init(rq->q, sense_rq);
+	sense_rq->rq_disk = rq->rq_disk;
+
+	sense_rq->data = sense;
+	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+	sense_rq->cmd[4] = cmd_len;
+	sense_rq->data_len = sense_len;
+
+	sense_rq->cmd_type = REQ_TYPE_SENSE;
+	sense_rq->cmd_flags |= REQ_PREEMPT;
+
+	if (drive->media == ide_tape)
+		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+	drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+	BUG_ON(!drive->sense_rq_armed);
+
+	drive->sense_rq.special = special;
+	drive->sense_rq_armed = false;
+
+	drive->hwif->rq = NULL;
+
+	elv_add_request(drive->queue, &drive->sense_rq,
+			ELEVATOR_INSERT_FRONT, 0);
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
  * We queue a request sense packet command in the head of the request list.
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 846a1e13240..a69ccac5641 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -602,6 +605,11 @@ struct ide_drive_s {
 
 	struct ide_atapi_pc request_sense_pc;
 	struct request request_sense_rq;
+
+	/* current sense rq and buffer */
+	bool sense_rq_armed;
+	struct request sense_rq;
+	struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+
 int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
-- 
cgit v1.2.3-70-g09d2


From 6b544fcc8cd0a04eb42de9d1ecdd345e979d6ada Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense
 buffer

Since we're issuing REQ_TYPE_SENSE now we need to allow those types of
rqs in the ->do_request callbacks. As a future improvement, sense_len
assignment might be unified across all ATAPI devices. Borislav to
check with specs and test.

As a result, get rid of ide_queue_pc_head() and
drive->request_sense_rq.

tj: * Init request sense ide_atapi_pc from sense request.  In the
      longer timer, it would probably better to fold
      ide_create_request_sense_cmd() into its only current user -
      ide_floppy_get_format_progress().

    * ide_retry_pc() no longer takes @disk.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c  | 48 ++++++++++++++----------------------------------
 drivers/ide/ide-floppy.c |  4 +++-
 drivers/ide/ide-tape.c   |  7 +++++--
 include/linux/ide.h      |  3 +--
 4 files changed, 23 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c6e03485a63..972c522516f 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -79,34 +79,6 @@ void ide_init_pc(struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
-/*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-			      struct ide_atapi_pc *pc, struct request *rq)
-{
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = (char *)pc;
-	rq->rq_disk = disk;
-
-	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
-	}
-
-	memcpy(rq->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 /*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
@@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-	struct request *rq = &drive->request_sense_rq;
+	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
-	ide_create_request_sense_cmd(drive, pc);
+
+	/* init pc from sense_rq */
+	ide_init_pc(pc);
+	memcpy(pc->c, sense_rq->cmd, 12);
+	pc->buf = sense_rq->data;
+	pc->req_xfer = sense_rq->data_len;
+
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-	ide_queue_pc_head(drive, disk, pc, rq);
+
+	ide_queue_sense_rq(drive, pc);
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
 			/* Retry operation */
-			ide_retry_pc(drive, rq->rq_disk);
+			ide_retry_pc(drive);
 
 			/* queued, but not started */
 			return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 94600331a27..d3302cc891e 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq)) {
+	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
 		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
@@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		goto out_end;
 	}
 
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index aadf53cfac6..8324dfa78a3 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
 				printk(KERN_ERR "ide-tape: %s: I/O error, ",
 						tape->name);
 			/* Retry operation */
-			ide_retry_pc(drive, tape->disk);
+			ide_retry_pc(drive);
 			return ide_stopped;
 		}
 		pc->error = 0;
@@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			(unsigned long long)rq->sector, rq->nr_sectors,
 			rq->current_nr_sectors);
 
-	if (!blk_special_request(rq)) {
+	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 			"request queue (%d)\n", drive->name, rq->cmd_type);
@@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	BUG();
 
 out:
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a69ccac5641..9e67ccac3c1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -604,7 +604,6 @@ struct ide_drive_s {
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-	struct request request_sense_rq;
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
@@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
 void ide_queue_sense_rq(ide_drive_t *drive, void *special);
-- 
cgit v1.2.3-70-g09d2


From 5c4be57249e2e09136446597d2fe2a967c6ffef0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd,atapi: use bio for internal commands

Impact: unify request data buffer handling

rq->data is used mostly to pass kernel buffer through request queue
without using bio.  There are only a couple of places which still do
this in kernel and converting to bio isn't difficult.

This patch converts ide-cd and atapi to use bio instead of rq->data
for request sense and internal pc commands.  With previous change to
unify sense request handling, this is relatively easily achieved by
adding blk_rq_map_kern() during sense_rq prep and PC issue.

If blk_rq_map_kern() fails for sense, the error is deferred till sense
issue and aborts the failed command which triggered the sense.  Note
that this is a slim possibility as sense prep is done on each command
issue, so for the above condition to actually trigger, all preps since
the last sense issue till the issue of the request which would require
a sense should fail.

* do_request functions might sleep now.  This should be okay as ide
  request_fn - do_ide_request() - is invoked only from make_request
  and plug work.  Make sure this is the case by adding might_sleep()
  to do_ide_request().

* Functions which access the read sense data before the sense request
  is complete now should access bio_data(sense_rq->bio) as the sense
  buffer might have been copied during blk_rq_map_kern().

* ide-tape updated to map sg.

* cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC
  special case.  Simplified.

* tp_ops->output/input_data path dropped from ide_pc_intr().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c | 52 +++++++++++++++++++++++++++++--------------------
 drivers/ide/ide-cd.c    | 28 +++++++++++++-------------
 drivers/ide/ide-io.c    |  3 +++
 drivers/ide/ide-tape.c  |  3 +++
 include/linux/ide.h     |  2 +-
 5 files changed, 52 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 972c522516f..5cefe12f562 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+					GFP_NOIO);
+		if (error)
+			goto put_req;
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		rq->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
 	blk_put_request(rq);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	struct request_sense *sense = &drive->sense_data;
 	struct request *sense_rq = &drive->sense_rq;
 	unsigned int cmd_len, sense_len;
+	int err;
 
 	debug_log("%s: enter\n", __func__);
 
@@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	sense_rq->rq_disk = rq->rq_disk;
 
-	sense_rq->data = sense;
+	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+			      GFP_NOIO);
+	if (unlikely(err)) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: failed to map sense buffer\n",
+			       drive->name);
+		return;
+	}
+
+	sense_rq->rq_disk = rq->rq_disk;
 	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	sense_rq->cmd[4] = cmd_len;
-	sense_rq->data_len = sense_len;
-
 	sense_rq->cmd_type = REQ_TYPE_SENSE;
 	sense_rq->cmd_flags |= REQ_PREEMPT;
 
@@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(ide_prep_sense);
 
-void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
-	BUG_ON(!drive->sense_rq_armed);
+	/* deferred failure from ide_prep_sense() */
+	if (!drive->sense_rq_armed) {
+		printk(KERN_WARNING "%s: failed queue sense request\n",
+		       drive->name);
+		return -ENOMEM;
+	}
 
 	drive->sense_rq.special = special;
 	drive->sense_rq_armed = false;
@@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special)
 
 	elv_add_request(drive->queue, &drive->sense_rq,
 			ELEVATOR_INSERT_FRONT, 0);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
@@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive)
 	/* init pc from sense_rq */
 	ide_init_pc(pc);
 	memcpy(pc->c, sense_rq->cmd, 12);
-	pc->buf = sense_rq->data;
+	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
 	pc->req_xfer = sense_rq->data_len;
 
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
-	ide_queue_sense_rq(drive, pc);
+	if (ide_queue_sense_rq(drive, pc))
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	struct ide_cmd *cmd = &hwif->cmd;
 	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	xfer_func_t *xferfunc;
 	unsigned int timeout, done;
 	u16 bcount;
 	u8 stat, ireason, dsc = 0;
@@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape)
+			if (drive->media == ide_tape && !rq->bio)
 				done = ide_rq_bytes(rq); /* FIXME */
 			else
 				done = blk_rq_bytes(rq);
@@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
-
-	if (drive->media == ide_floppy && pc->buf == NULL) {
+	if (drive->media == ide_tape && pc->bh)
+		done = drive->pc_io_buffers(drive, pc, bcount, write);
+	else {
 		done = min_t(unsigned int, bcount, cmd->nleft);
 		ide_pio_bytes(drive, cmd, write, done);
-	} else if (drive->media == ide_tape && pc->bh) {
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	} else {
-		done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-		xferfunc(drive, NULL, pc->cur_pos, done);
 	}
 
 	/* Update the current position */
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 7b21c7eac5b..392a5bdf2fd 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
 	 * For REQ_TYPE_SENSE, "rq->special" points to the original
-	 * failed request
+	 * failed request.  Also, the sense data should be read
+	 * directly from rq which might be different from the original
+	 * sense buffer if it got copied during mapping.
 	 */
 	struct request *failed = (struct request *)rq->special;
-	struct request_sense *sense = &drive->sense_data;
+	void *sense = bio_data(rq->bio);
 
 	if (failed) {
 		if (failed->sense) {
@@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		ide_queue_sense_rq(drive, NULL);
+		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
 	return 1;
 
 end_request:
@@ -412,8 +414,7 @@ end_request:
 
 		hwif->rq = NULL;
 
-		ide_queue_sense_rq(drive, rq);
-		return 1;
+		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
 }
@@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		rq->cmd_flags |= cmd_flags;
 		rq->timeout = timeout;
 		if (buffer) {
-			rq->data = buffer;
-			rq->data_len = *bufflen;
+			error = blk_rq_map_kern(drive->queue, rq, buffer,
+						*bufflen, GFP_NOIO);
+			if (error) {
+				blk_put_request(rq);
+				return error;
+			}
 		}
 
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	drive->dma = 0;
 
 	/* sg request */
-	if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+	if (rq->bio) {
 		struct request_queue *q = drive->queue;
+		char *buf = bio_data(rq->bio);
 		unsigned int alignment;
-		char *buf;
-
-		if (rq->bio)
-			buf = bio_data(rq->bio);
-		else
-			buf = rq->data;
 
 		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9b9e8b1aae5..3245c2dbda3 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q)
 
 	spin_unlock_irq(q->queue_lock);
 
+	/* HLD do_request() callback might sleep, make sure it's okay */
+	might_sleep();
+
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8324dfa78a3..9b762a2d5d9 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -850,6 +850,9 @@ out:
 
 	cmd.rq = rq;
 
+	ide_init_sg_cmd(&cmd, pc->req_xfer);
+	ide_map_sg(drive, &cmd);
+
 	return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9e67ccac3c1..1957461ac76 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
-void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
-- 
cgit v1.2.3-70-g09d2


From 6d7003877c2f0578f1c08f66d05c3f72ef4ae596 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-atapi: kill unused fields and callbacks

Impact: remove fields and code paths which are no longer necessary

Now that ide-tape uses standard mechanisms to transfer data, special
case handling for bh handling can be dropped from ide-atapi.  Drop the
followings.

* pc->cur_pos, b_count, bh and b_data
* drive->pc_update_buffers() and pc_io_buffers().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c | 17 ++++-------------
 drivers/ide/ide-tape.c  |  1 -
 include/linux/ide.h     | 12 ------------
 3 files changed, 4 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index b9dd4503cbc..afe5a432387 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else {
+		} else
 			pc->xferred = pc->req_xfer;
-			if (drive->pc_update_buffers)
-				drive->pc_update_buffers(drive, pc);
-		}
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	if (drive->media == ide_tape && pc->bh)
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	else {
-		done = min_t(unsigned int, bcount, cmd->nleft);
-		ide_pio_bytes(drive, cmd, write, done);
-	}
+	done = min_t(unsigned int, bcount, cmd->nleft);
+	ide_pio_bytes(drive, cmd, write, done);
 
-	/* Update the current position */
+	/* Update transferred byte count */
 	pc->xferred += done;
-	pc->cur_pos += done;
 
 	bcount -= done;
 
@@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 
 		/* We haven't transferred any data yet */
 		pc->xferred = 0;
-		pc->cur_pos = pc->buf;
 
 		valid_tf = IDE_VALID_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 2599579e417..8dfc68892d6 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = NULL;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1957461ac76..34c128f0a33 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -362,11 +362,7 @@ struct ide_atapi_pc {
 
 	/* data buffer */
 	u8 *buf;
-	/* current buffer position */
-	u8 *cur_pos;
 	int buf_size;
-	/* missing/available data on the current buffer */
-	int b_count;
 
 	/* the corresponding request */
 	struct request *rq;
@@ -379,10 +375,6 @@ struct ide_atapi_pc {
 	 */
 	u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-	/* idetape only */
-	struct idetape_bh *bh;
-	char *b_data;
-
 	unsigned long timeout;
 };
 
@@ -595,10 +587,6 @@ struct ide_drive_s {
 	/* callback for packet commands */
 	int  (*pc_callback)(struct ide_drive_s *, int);
 
-	void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-			      unsigned int, int);
-
 	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
 	unsigned long atapi_flags;
-- 
cgit v1.2.3-70-g09d2


From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Fri, 17 Apr 2009 16:42:14 +0800
Subject: x86, intr-remap: enable interrupt remapping early

Currently, when x2apic is not enabled, interrupt remapping
will be enabled in init_dmars(), where it is too late to remap
ioapic interrupts, that is, ioapic interrupts are really in
compatibility mode, not remappable mode.

This patch always enables interrupt remapping before ioapic
setup, it guarantees all interrupts will be remapped when
interrupt remapping is enabled. Thus it doesn't need to set
the compatibility interrupt bit.

[ Impact: refactor intr-remap init sequence, enable fuller remap mode ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: allen.m.kay@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h  |  7 ++--
 arch/x86/kernel/apic/apic.c  | 76 +++++++++++++++++++++-----------------------
 drivers/pci/intel-iommu.c    |  9 ------
 drivers/pci/intr_remapping.c | 28 ++++++++--------
 include/linux/dmar.h         |  1 +
 5 files changed, 54 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d4cb7e590c0..fbdd65446c7 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -169,7 +169,6 @@ static inline u64 native_x2apic_icr_read(void)
 extern int x2apic, x2apic_phys;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
-extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
 static inline int x2apic_enabled(void)
 {
@@ -190,18 +189,18 @@ static inline void check_x2apic(void)
 static inline void enable_x2apic(void)
 {
 }
-static inline void enable_IR_x2apic(void)
-{
-}
 static inline int x2apic_enabled(void)
 {
 	return 0;
 }
 
 #define	x2apic	0
+#define	x2apic_preenabled 0
 
 #endif
 
+extern void enable_IR_x2apic(void);
+
 extern int get_physical_broadcast(void);
 
 extern void apic_disable(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 83e47febcc8..0cf1eea750c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -141,6 +141,8 @@ static int x2apic_preenabled;
 static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
+	if (x2apic_enabled())
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
 	disable_x2apic = 1;
 	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
 	return 0;
@@ -1345,6 +1347,7 @@ void enable_x2apic(void)
 		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
 	}
 }
+#endif /* CONFIG_X86_X2APIC */
 
 void __init enable_IR_x2apic(void)
 {
@@ -1353,32 +1356,21 @@ void __init enable_IR_x2apic(void)
 	unsigned long flags;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
-	if (!cpu_has_x2apic)
-		return;
-
-	if (!x2apic_preenabled && disable_x2apic) {
-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
-			"because of nox2apic\n");
-		return;
+	ret = dmar_table_init();
+	if (ret) {
+		pr_debug("dmar_table_init() failed with %d:\n", ret);
+		goto ir_failed;
 	}
 
-	if (x2apic_preenabled && disable_x2apic)
-		panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-	if (!x2apic_preenabled && skip_ioapic_setup) {
-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
-			"because of skipping io-apic setup\n");
-		return;
+	if (!intr_remapping_supported()) {
+		pr_debug("intr-remapping not supported\n");
+		goto ir_failed;
 	}
 
-	ret = dmar_table_init();
-	if (ret) {
-		pr_info("dmar_table_init() failed with %d:\n", ret);
 
-		if (x2apic_preenabled)
-			panic("x2apic enabled by bios. But IR enabling failed");
-		else
-			pr_info("Not enabling x2apic,Intr-remapping\n");
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		pr_info("Skipped enabling intr-remap because of skipping "
+			"io-apic setup\n");
 		return;
 	}
 
@@ -1398,20 +1390,25 @@ void __init enable_IR_x2apic(void)
 	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
 
-	ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-
-	if (ret && x2apic_preenabled) {
-		local_irq_restore(flags);
-		panic("x2apic enabled by bios. But IR enabling failed");
-	}
+#ifdef CONFIG_X86_X2APIC
+	if (cpu_has_x2apic)
+		ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
+	else
+#endif
+		ret = enable_intr_remapping(EIM_8BIT_APIC_ID);
 
 	if (ret)
 		goto end_restore;
 
-	if (!x2apic) {
+	pr_info("Enabled Interrupt-remapping\n");
+
+#ifdef CONFIG_X86_X2APIC
+	if (cpu_has_x2apic && !x2apic) {
 		x2apic = 1;
 		enable_x2apic();
+		pr_info("Enabled x2apic\n");
 	}
+#endif
 
 end_restore:
 	if (ret)
@@ -1426,30 +1423,29 @@ end_restore:
 	local_irq_restore(flags);
 
 end:
-	if (!ret) {
-		if (!x2apic_preenabled)
-			pr_info("Enabled x2apic and interrupt-remapping\n");
-		else
-			pr_info("Enabled Interrupt-remapping\n");
-	} else
-		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
 	if (ioapic_entries)
 		free_ioapic_entries(ioapic_entries);
+
+	if (!ret)
+		return;
+
+ir_failed:
+	if (x2apic_preenabled)
+		panic("x2apic enabled by bios. But IR enabling failed");
+	else if (cpu_has_x2apic)
+		pr_info("Not enabling x2apic,Intr-remapping\n");
 #else
 	if (!cpu_has_x2apic)
 		return;
 
 	if (x2apic_preenabled)
 		panic("x2apic enabled prior OS handover,"
-		      " enable CONFIG_INTR_REMAP");
-
-	pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-		" and x2apic\n");
+		      " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
 #endif
 
 	return;
 }
-#endif /* CONFIG_X86_X2APIC */
+
 
 #ifdef CONFIG_X86_64
 /*
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 001b328adf8..9ce8f0764be 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1968,15 +1968,6 @@ static int __init init_dmars(void)
 		}
 	}
 
-#ifdef CONFIG_INTR_REMAP
-	if (!intr_remapping_enabled) {
-		ret = enable_intr_remapping(0);
-		if (ret)
-			printk(KERN_ERR
-			       "IOMMU: enable interrupt remapping failed\n");
-	}
-#endif
-
 	/*
 	 * For each rmrr
 	 *   for each dev attached to rmrr
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f5e0ea724a6..5c2142656e9 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -423,20 +423,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 		      readl, (sts & DMA_GSTS_IRTPS), sts);
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-	if (mode == 0) {
-		spin_lock_irqsave(&iommu->register_lock, flags);
-
-		/* enable comaptiblity format interrupt pass through */
-		cmd = iommu->gcmd | DMA_GCMD_CFI;
-		iommu->gcmd |= DMA_GCMD_CFI;
-		writel(cmd, iommu->reg + DMAR_GCMD_REG);
-
-		IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
-			      readl, (sts & DMA_GSTS_CFIS), sts);
-
-		spin_unlock_irqrestore(&iommu->register_lock, flags);
-	}
-
 	/*
 	 * global invalidation of interrupt entry cache before enabling
 	 * interrupt-remapping.
@@ -516,6 +502,20 @@ end:
 	spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+int __init intr_remapping_supported(void)
+{
+	struct dmar_drhd_unit *drhd;
+
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		if (!ecap_ir_support(iommu->ecap))
+			return 0;
+	}
+
+	return 1;
+}
+
 int __init enable_intr_remapping(int eim)
 {
 	struct dmar_drhd_unit *drhd;
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e397dc342cd..06f592a7f73 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -108,6 +108,7 @@ struct irte {
 };
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
+extern int intr_remapping_supported(void);
 extern int enable_intr_remapping(int);
 extern void disable_intr_remapping(void);
 extern int reenable_intr_remapping(int);
-- 
cgit v1.2.3-70-g09d2


From a0f82f64e26929776c58a5c93c2ecb38e3d82815 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 19 Apr 2009 09:43:48 +0000
Subject: syncookies: remove last_synq_overflow from struct tcp_sock

last_synq_overflow eats 4 or 8 bytes in struct tcp_sock, even
though it is only used when a listening sockets syn queue
is full.

We can (ab)use rx_opt.ts_recent_stamp to store the same information;
it is not used otherwise as long as a socket is in listen state.

Move linger2 around to avoid splitting struct mtu_probe
across cacheline boundary on 32 bit arches.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  4 +---
 include/net/tcp.h     | 13 +++++++++++++
 net/ipv4/syncookies.c |  5 ++---
 net/ipv6/syncookies.c |  4 ++--
 4 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9d5078bd23a..8afac76cd74 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -377,7 +377,7 @@ struct tcp_sock {
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 
-	unsigned long last_synq_overflow; 
+	int			linger2;
 
 /* Receiver side RTT estimation */
 	struct {
@@ -406,8 +406,6 @@ struct tcp_sock {
 /* TCP MD5 Signagure Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
-
-	int			linger2;
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1b94b9bfe2d..b55b4891029 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -265,6 +265,19 @@ static inline int tcp_too_many_orphans(struct sock *sk, int num)
 		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]);
 }
 
+/* syncookies: remember time of last synqueue overflow */
+static inline void tcp_synq_overflow(struct sock *sk)
+{
+	tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies;
+}
+
+/* syncookies: no recent synqueue overflow on this listening socket? */
+static inline int tcp_synq_no_recent_overflow(const struct sock *sk)
+{
+	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT);
+}
+
 extern struct proto tcp_prot;
 
 #define TCP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.tcp_statistics, field)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b35a950d2e0..cd2b97f1b6e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -161,13 +161,12 @@ static __u16 const msstab[] = {
  */
 __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
 	int mssind;
 	const __u16 mss = *mssp;
 
-	tp->last_synq_overflow = jiffies;
+	tcp_synq_overflow(sk);
 
 	/* XXX sort msstab[] by probability?  Binary search? */
 	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
@@ -268,7 +267,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
 
-	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+	if (tcp_synq_no_recent_overflow(sk) ||
 	    (mss = cookie_check(skb, cookie)) == 0) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
 		goto out;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 711175e0571..8c2513982b6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -131,7 +131,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 	int mssind;
 	const __u16 mss = *mssp;
 
-	tcp_sk(sk)->last_synq_overflow = jiffies;
+	tcp_synq_overflow(sk);
 
 	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
 		;
@@ -175,7 +175,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
 
-	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+	if (tcp_synq_no_recent_overflow(sk) ||
 		(mss = cookie_check(skb, cookie)) == 0) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From 23de29de2d8b227943be191d59fb6d983996d55e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 20 Apr 2009 12:59:29 -0400
Subject: tracing: remove dangling semicolon

Due to a cut and paste error, the trace_seq_putc had a semicolon
after the prototype but before the stub function when tracing is
disabled.

[Impact: fix compile error ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 37db9bdfbc1..ba9627f00d3 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -63,7 +63,7 @@ static inline int trace_seq_puts(struct trace_seq *s, const char *str)
 {
 	return 0;
 }
-static inline int trace_seq_putc(struct trace_seq *s, unsigned char c);
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From b75576d76d4be50196773f36709cb7a4f5ac2ab7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 20 Apr 2009 17:56:13 +0100
Subject: ASoC: Make the DAPM power check an operation on the widget

Rather than having switch statements at point of use make the DAPM
power check a member of the widget structure and set it when we
instantiate the widget.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dapm.h |  2 ++
 sound/soc/soc-dapm.c     | 27 +++++++++++++--------------
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index fcc929da033..839a97b6326 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -367,6 +367,8 @@ struct snd_soc_dapm_widget {
 	unsigned char suspend:1;		/* was active before suspend */
 	unsigned char pmdown:1;			/* waiting for timeout */
 
+	int (*power_check)(struct snd_soc_dapm_widget *w);
+
 	/* external events */
 	unsigned short event_flags;		/* flags to specify event types */
 	int (*event)(struct snd_soc_dapm_widget*, struct snd_kcontrol *, int);
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 22522e2d83a..d3d17354e76 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -631,20 +631,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 {
 	int power, ret;
 
-	/* Work out the new power state */
 	switch (w->id) {
-	case snd_soc_dapm_vmid:
-		/* No action required */
-		return 0;
-
-	case snd_soc_dapm_adc:
-		power = dapm_adc_check_power(w);
-		break;
-
-	case snd_soc_dapm_dac:
-		power = dapm_dac_check_power(w);
-		break;
-
 	case snd_soc_dapm_pre:
 		if (!w->event)
 			return 0;
@@ -680,10 +667,13 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 		return 0;
 
 	default:
-		power = dapm_generic_check_power(w);
 		break;
 	}
 
+	if (!w->power_check)
+		return 0;
+
+	power = w->power_check(w);
 	if (w->power == power)
 		return 0;
 	w->power = power;
@@ -1147,15 +1137,22 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec)
 		case snd_soc_dapm_switch:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_mixer(codec, w);
 			break;
 		case snd_soc_dapm_mux:
 		case snd_soc_dapm_value_mux:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_mux(codec, w);
 			break;
 		case snd_soc_dapm_adc:
+			w->power_check = dapm_adc_check_power;
+			break;
 		case snd_soc_dapm_dac:
+			w->power_check = dapm_dac_check_power;
+			break;
 		case snd_soc_dapm_pga:
+			w->power_check = dapm_generic_check_power;
 			dapm_new_pga(codec, w);
 			break;
 		case snd_soc_dapm_input:
@@ -1165,6 +1162,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec)
 		case snd_soc_dapm_hp:
 		case snd_soc_dapm_mic:
 		case snd_soc_dapm_line:
+			w->power_check = dapm_generic_check_power;
+			break;
 		case snd_soc_dapm_vmid:
 		case snd_soc_dapm_pre:
 		case snd_soc_dapm_post:
-- 
cgit v1.2.3-70-g09d2


From f1f9b3b1795da8625e0e6096813c9d18d4a344ce Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 20 Apr 2009 20:38:21 +0200
Subject: perfcounters, sched: remove __task_delta_exec()

This function was left orphan by the latest round of sw-counter
cleanups.

[ Impact: remove unused kernel function ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel_stat.h |  1 -
 kernel/sched.c              | 23 -----------------------
 2 files changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 080d1fd461d..a77c6007dc9 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -85,7 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 /*
  * Lock/unlock the current runqueue - to extract task statistics:
  */
-extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
 extern unsigned long long task_delta_exec(struct task_struct *);
 
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
diff --git a/kernel/sched.c b/kernel/sched.c
index b66a08c2480..a69278eef42 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4546,29 +4546,6 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
 
-/*
- * Return any ns on the sched_clock that have not yet been banked in
- * @p in case that task is currently running.
- */
-unsigned long long __task_delta_exec(struct task_struct *p, int update)
-{
-	s64 delta_exec;
-	struct rq *rq;
-
-	rq = task_rq(p);
-	WARN_ON_ONCE(!runqueue_is_locked());
-	WARN_ON_ONCE(!task_current(rq, p));
-
-	if (update)
-		update_rq_clock(rq);
-
-	delta_exec = rq->clock - p->se.exec_start;
-
-	WARN_ON_ONCE(delta_exec < 0);
-
-	return delta_exec;
-}
-
 /*
  * Return any ns on the sched_clock that have not yet been banked in
  * @p in case that task is currently running.
-- 
cgit v1.2.3-70-g09d2


From cd474f2d548af3c0eb932d9d47ec11483861aa6f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Apr 2009 08:53:08 +0200
Subject: ALSA: Remove deprecated snd_card_new()

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 3dea79829ac..a26bbdcc676 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -300,16 +300,6 @@ int snd_card_create(int idx, const char *id,
 		    struct module *module, int extra_size,
 		    struct snd_card **card_ret);
 
-static inline __deprecated
-struct snd_card *snd_card_new(int idx, const char *id,
-			      struct module *module, int extra_size)
-{
-	struct snd_card *card;
-	if (snd_card_create(idx, id, module, extra_size, &card) < 0)
-		return NULL;
-	return card;
-}
-
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
-- 
cgit v1.2.3-70-g09d2


From ef9dfa4b1052af23a603de382d4665b2d1fccc61 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Apr 2009 08:53:41 +0200
Subject: ALSA: Remove deprecated include/sound/driver.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/driver.h | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 include/sound/driver.h

(limited to 'include')

diff --git a/include/sound/driver.h b/include/sound/driver.h
deleted file mode 100644
index f0359437d01..00000000000
--- a/include/sound/driver.h
+++ /dev/null
@@ -1 +0,0 @@
-#warning "This file is deprecated"
-- 
cgit v1.2.3-70-g09d2


From fc1edaf9e7cc4d4696f83dee495b8f158d01c4eb Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 20 Apr 2009 13:02:27 -0700
Subject: x86: x2apic, IR: Clean up X86_X2APIC and INTR_REMAP config checks

Add x2apic_supported() to clean up CONFIG_X86_X2APIC checks.

Fix CONFIG_INTR_REMAP checks.

[ Impact: cleanup ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: dwmw2@infradead.org
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Weidong Han <weidong.han@intel.com>
LKML-Reference: <20090420200450.128993000@linux-os.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h          | 10 ++++----
 arch/x86/include/asm/io_apic.h       |  2 --
 arch/x86/include/asm/irq_remapping.h |  2 +-
 arch/x86/kernel/apic/apic.c          | 49 +++++++++---------------------------
 arch/x86/kernel/apic/io_apic.c       |  2 --
 arch/x86/kernel/apic/probe_64.c      |  2 +-
 include/linux/dmar.h                 |  2 ++
 7 files changed, 21 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index fbdd65446c7..3738438a91f 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void);
 extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
-#define EIM_8BIT_APIC_ID	0
-#define EIM_32BIT_APIC_ID	1
+extern int x2apic_mode;
 
 #ifdef CONFIG_X86_X2APIC
 /*
@@ -166,7 +165,7 @@ static inline u64 native_x2apic_icr_read(void)
 	return val;
 }
 
-extern int x2apic, x2apic_phys;
+extern int x2apic_phys;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
@@ -182,6 +181,8 @@ static inline int x2apic_enabled(void)
 		return 1;
 	return 0;
 }
+
+#define x2apic_supported()	(cpu_has_x2apic)
 #else
 static inline void check_x2apic(void)
 {
@@ -194,9 +195,8 @@ static inline int x2apic_enabled(void)
 	return 0;
 }
 
-#define	x2apic	0
 #define	x2apic_preenabled 0
-
+#define	x2apic_supported()	0
 #endif
 
 extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e43601..34eaa37f7ad 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -161,7 +161,6 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
-#ifdef CONFIG_X86_64
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
 extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
@@ -169,7 +168,6 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
 	struct IO_APIC_route_entry **ioapic_entries);
-#endif
 
 extern void probe_nr_irqs_gsi(void);
 
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 0396760fccb..f275e224450 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,6 +1,6 @@
 #ifndef _ASM_X86_IRQ_REMAPPING_H
 #define _ASM_X86_IRQ_REMAPPING_H
 
-#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7b41a32339e..2b30e520dce 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -134,8 +134,8 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
+int x2apic_mode;
 #ifdef CONFIG_X86_X2APIC
-int x2apic;
 /* x2apic enabled before OS handover */
 static int x2apic_preenabled;
 static int disable_x2apic;
@@ -858,7 +858,7 @@ void clear_local_APIC(void)
 	u32 v;
 
 	/* APIC hasn't been mapped yet */
-	if (!x2apic && !apic_phys)
+	if (!x2apic_mode && !apic_phys)
 		return;
 
 	maxlvt = lapic_get_maxlvt();
@@ -1330,7 +1330,7 @@ void check_x2apic(void)
 {
 	if (x2apic_enabled()) {
 		pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
-		x2apic_preenabled = x2apic = 1;
+		x2apic_preenabled = x2apic_mode = 1;
 	}
 }
 
@@ -1338,7 +1338,7 @@ void enable_x2apic(void)
 {
 	int msr, msr2;
 
-	if (!x2apic)
+	if (!x2apic_mode)
 		return;
 
 	rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1390,25 +1390,17 @@ void __init enable_IR_x2apic(void)
 	mask_IO_APIC_setup(ioapic_entries);
 	mask_8259A();
 
-#ifdef CONFIG_X86_X2APIC
-	if (cpu_has_x2apic)
-		ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-	else
-#endif
-		ret = enable_intr_remapping(EIM_8BIT_APIC_ID);
-
+	ret = enable_intr_remapping(x2apic_supported());
 	if (ret)
 		goto end_restore;
 
 	pr_info("Enabled Interrupt-remapping\n");
 
-#ifdef CONFIG_X86_X2APIC
-	if (cpu_has_x2apic && !x2apic) {
-		x2apic = 1;
+	if (x2apic_supported() && !x2apic_mode) {
+		x2apic_mode = 1;
 		enable_x2apic();
 		pr_info("Enabled x2apic\n");
 	}
-#endif
 
 end_restore:
 	if (ret)
@@ -1576,7 +1568,7 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
-	if (x2apic) {
+	if (x2apic_mode) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
@@ -2010,10 +2002,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	local_irq_save(flags);
 	disable_local_APIC();
-#ifdef CONFIG_INTR_REMAP
+
 	if (intr_remapping_enabled)
 		disable_intr_remapping();
-#endif
+
 	local_irq_restore(flags);
 	return 0;
 }
@@ -2023,8 +2015,6 @@ static int lapic_resume(struct sys_device *dev)
 	unsigned int l, h;
 	unsigned long flags;
 	int maxlvt;
-
-#ifdef CONFIG_INTR_REMAP
 	int ret;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
@@ -2050,17 +2040,8 @@ static int lapic_resume(struct sys_device *dev)
 		mask_8259A();
 	}
 
-	if (x2apic)
+	if (x2apic_mode)
 		enable_x2apic();
-#else
-	if (!apic_pm_state.active)
-		return 0;
-
-	local_irq_save(flags);
-	if (x2apic)
-		enable_x2apic();
-#endif
-
 	else {
 		/*
 		 * Make sure the APICBASE points to the right address
@@ -2098,18 +2079,12 @@ static int lapic_resume(struct sys_device *dev)
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 
-#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled) {
-		if (x2apic)
-			reenable_intr_remapping(EIM_32BIT_APIC_ID);
-		else
-			reenable_intr_remapping(EIM_8BIT_APIC_ID);
-
+		reenable_intr_remapping(x2apic_mode);
 		unmask_8259A();
 		restore_IO_APIC_setup(ioapic_entries);
 		free_ioapic_entries(ioapic_entries);
 	}
-#endif
 
 	local_irq_restore(flags);
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ea22a86e3cd..3a45d2ec974 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -736,7 +736,6 @@ static int __init ioapic_pirq_setup(char *str)
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
 	int apic;
@@ -857,7 +856,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 
 	kfree(ioapic_entries);
 }
-#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 1783652bb0e..bc3e880f9b8 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = {
 void __init default_setup_apic_routing(void)
 {
 #ifdef CONFIG_X86_X2APIC
-	if (x2apic && (apic != &apic_x2apic_phys &&
+	if (x2apic_mode && (apic != &apic_x2apic_phys &&
 #ifdef CONFIG_X86_UV
 		       apic != &apic_x2apic_uv_x &&
 #endif
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 06f592a7f73..10ff5c49882 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -158,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
 }
 #define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
+#define disable_intr_remapping()	(0)
+#define reenable_intr_remapping(mode)	(0)
 #define intr_remapping_enabled		(0)
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 03ad032bb78b2732b607ed198e951240e1d21e59 Mon Sep 17 00:00:00 2001
From: Peter Holik <peter@holik.at>
Date: Sat, 18 Apr 2009 07:24:17 +0000
Subject: export usbnet_get_ethernet_addr from usbnet and fixed cdc_ether.c

because of using the same function get_ethernet_addr as cdc_ether.c
i export usbnet_get_ethernet_addr from usbnet and fixed cdc_ether
(suggested by Oliver Neukum).

Signed-off-by: Peter Holik <peter@holik.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 33 +--------------------------------
 drivers/net/usb/usbnet.c    | 31 +++++++++++++++++++++++++++++++
 include/linux/usb/usbnet.h  |  1 +
 3 files changed, 33 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 55e8ecc3a9e..01fd528306e 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/ctype.h>
 #include <linux/ethtool.h>
 #include <linux/workqueue.h>
 #include <linux/mii.h>
@@ -389,36 +388,6 @@ static void cdc_status(struct usbnet *dev, struct urb *urb)
 	}
 }
 
-static u8 nibble(unsigned char c)
-{
-	if (likely(isdigit(c)))
-		return c - '0';
-	c = toupper(c);
-	if (likely(isxdigit(c)))
-		return 10 + c - 'A';
-	return 0;
-}
-
-static inline int
-get_ethernet_addr(struct usbnet *dev, struct usb_cdc_ether_desc *e)
-{
-	int 		tmp, i;
-	unsigned char	buf [13];
-
-	tmp = usb_string(dev->udev, e->iMACAddress, buf, sizeof buf);
-	if (tmp != 12) {
-		dev_dbg(&dev->udev->dev,
-			"bad MAC string %d fetch, %d\n", e->iMACAddress, tmp);
-		if (tmp >= 0)
-			tmp = -EINVAL;
-		return tmp;
-	}
-	for (i = tmp = 0; i < 6; i++, tmp += 2)
-		dev->net->dev_addr [i] =
-			(nibble(buf [tmp]) << 4) + nibble(buf [tmp + 1]);
-	return 0;
-}
-
 static int cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	int				status;
@@ -428,7 +397,7 @@ static int cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 	if (status < 0)
 		return status;
 
-	status = get_ethernet_addr(dev, info->ether);
+	status = usbnet_get_ethernet_addr(dev, info->ether->iMACAddress);
 	if (status < 0) {
 		usb_set_intfdata(info->data, NULL);
 		usb_driver_release_interface(driver_of(intf), info->data);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 2b8b9036aff..c94de624314 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -37,6 +37,7 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ctype.h>
 #include <linux/ethtool.h>
 #include <linux/workqueue.h>
 #include <linux/mii.h>
@@ -156,6 +157,36 @@ int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usbnet_get_endpoints);
 
+static u8 nibble(unsigned char c)
+{
+	if (likely(isdigit(c)))
+		return c - '0';
+	c = toupper(c);
+	if (likely(isxdigit(c)))
+		return 10 + c - 'A';
+	return 0;
+}
+
+int usbnet_get_ethernet_addr(struct usbnet *dev, int iMACAddress)
+{
+	int 		tmp, i;
+	unsigned char	buf [13];
+
+	tmp = usb_string(dev->udev, iMACAddress, buf, sizeof buf);
+	if (tmp != 12) {
+		dev_dbg(&dev->udev->dev,
+			"bad MAC string %d fetch, %d\n", iMACAddress, tmp);
+		if (tmp >= 0)
+			tmp = -EINVAL;
+		return tmp;
+	}
+	for (i = tmp = 0; i < 6; i++, tmp += 2)
+		dev->net->dev_addr [i] =
+			(nibble(buf [tmp]) << 4) + nibble(buf [tmp + 1]);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usbnet_get_ethernet_addr);
+
 static void intr_complete (struct urb *urb);
 
 static int init_status (struct usbnet *dev, struct usb_interface *intf)
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 36fabb95c7d..5d44059f6d6 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -183,6 +183,7 @@ extern void usbnet_tx_timeout (struct net_device *net);
 extern int usbnet_change_mtu (struct net_device *net, int new_mtu);
 
 extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *);
+extern int usbnet_get_ethernet_addr(struct usbnet *, int);
 extern void usbnet_defer_kevent (struct usbnet *, int);
 extern void usbnet_skb_return (struct usbnet *, struct sk_buff *);
 extern void usbnet_unlink_rx_urbs(struct usbnet *);
-- 
cgit v1.2.3-70-g09d2


From b1b67dd45a6b629eb41553856805aaa1614fbb83 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 20 Apr 2009 04:49:28 +0000
Subject: net: factor out ethtool invocation of vlan/macvlan drivers

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 22 +++-------------------
 include/linux/netdevice.h | 24 +++++++++++++++++++++++-
 net/8021q/vlan_dev.c      | 23 +++--------------------
 3 files changed, 29 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 214a8cf2b70..329cd50d0e2 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -374,36 +374,20 @@ static void macvlan_ethtool_get_drvinfo(struct net_device *dev,
 static u32 macvlan_ethtool_get_rx_csum(struct net_device *dev)
 {
 	const struct macvlan_dev *vlan = netdev_priv(dev);
-	struct net_device *lowerdev = vlan->lowerdev;
-
-	if (lowerdev->ethtool_ops == NULL ||
-	    lowerdev->ethtool_ops->get_rx_csum == NULL)
-		return 0;
-	return lowerdev->ethtool_ops->get_rx_csum(lowerdev);
+	return dev_ethtool_get_rx_csum(vlan->lowerdev);
 }
 
 static int macvlan_ethtool_get_settings(struct net_device *dev,
 					struct ethtool_cmd *cmd)
 {
 	const struct macvlan_dev *vlan = netdev_priv(dev);
-	struct net_device *lowerdev = vlan->lowerdev;
-
-	if (!lowerdev->ethtool_ops ||
-	    !lowerdev->ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-
-	return lowerdev->ethtool_ops->get_settings(lowerdev, cmd);
+	return dev_ethtool_get_settings(vlan->lowerdev, cmd);
 }
 
 static u32 macvlan_ethtool_get_flags(struct net_device *dev)
 {
 	const struct macvlan_dev *vlan = netdev_priv(dev);
-	struct net_device *lowerdev = vlan->lowerdev;
-
-	if (!lowerdev->ethtool_ops ||
-	    !lowerdev->ethtool_ops->get_flags)
-		return 0;
-	return lowerdev->ethtool_ops->get_flags(lowerdev);
+	return dev_ethtool_get_flags(vlan->lowerdev);
 }
 
 static const struct ethtool_ops macvlan_ethtool_ops = {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 54db3ebf219..31167451d08 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -42,6 +42,7 @@
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
 
+#include <linux/ethtool.h>
 #include <net/net_namespace.h>
 #include <net/dsa.h>
 #ifdef CONFIG_DCB
@@ -49,7 +50,6 @@
 #endif
 
 struct vlan_group;
-struct ethtool_ops;
 struct netpoll_info;
 /* 802.11 specific */
 struct wireless_dev;
@@ -1906,6 +1906,28 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 }
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
+
+static inline int dev_ethtool_get_settings(struct net_device *dev,
+					   struct ethtool_cmd *cmd)
+{
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+	return dev->ethtool_ops->get_settings(dev, cmd);
+}
+
+static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
+{
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum)
+		return 0;
+	return dev->ethtool_ops->get_rx_csum(dev);
+}
+
+static inline u32 dev_ethtool_get_flags(struct net_device *dev)
+{
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_flags)
+		return 0;
+	return dev->ethtool_ops->get_flags(dev);
+}
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6b092136401..04dc8c8a685 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -666,13 +666,7 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
 				     struct ethtool_cmd *cmd)
 {
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
-	struct net_device *real_dev = vlan->real_dev;
-
-	if (!real_dev->ethtool_ops ||
-	    !real_dev->ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-
-	return real_dev->ethtool_ops->get_settings(real_dev, cmd);
+	return dev_ethtool_get_settings(vlan->real_dev, cmd);
 }
 
 static void vlan_ethtool_get_drvinfo(struct net_device *dev,
@@ -686,24 +680,13 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
 static u32 vlan_ethtool_get_rx_csum(struct net_device *dev)
 {
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
-	struct net_device *real_dev = vlan->real_dev;
-
-	if (real_dev->ethtool_ops == NULL ||
-	    real_dev->ethtool_ops->get_rx_csum == NULL)
-		return 0;
-	return real_dev->ethtool_ops->get_rx_csum(real_dev);
+	return dev_ethtool_get_rx_csum(vlan->real_dev);
 }
 
 static u32 vlan_ethtool_get_flags(struct net_device *dev)
 {
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
-	struct net_device *real_dev = vlan->real_dev;
-
-	if (!(real_dev->features & NETIF_F_HW_VLAN_RX) ||
-	    real_dev->ethtool_ops == NULL ||
-	    real_dev->ethtool_ops->get_flags == NULL)
-		return 0;
-	return real_dev->ethtool_ops->get_flags(real_dev);
+	return dev_ethtool_get_flags(vlan->real_dev);
 }
 
 static const struct ethtool_ops vlan_ethtool_ops = {
-- 
cgit v1.2.3-70-g09d2


From 0a1ec07a67bd8b0033dace237249654d015efa21 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 20 Apr 2009 01:25:46 +0000
Subject: net: skb_copy_datagram_const_iovec()

There's an skb_copy_datagram_iovec() to copy out of a paged skb,
but it modifies the iovec, and does not support starting
at an offset in the destination. We want both in tun.c, so let's
add the function.

It's a carbon copy of skb_copy_datagram_iovec() with enough changes to
be annoying.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  5 +++
 include/linux/socket.h |  2 ++
 net/core/datagram.c    | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/iovec.c       | 26 ++++++++++++++
 4 files changed, 125 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5fd389162f0..af2b21bdda8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1717,6 +1717,11 @@ extern int	       skb_copy_datagram_from_iovec(struct sk_buff *skb,
 						    int offset,
 						    struct iovec *from,
 						    int len);
+extern int	       skb_copy_datagram_const_iovec(const struct sk_buff *from,
+						     int offset,
+						     const struct iovec *to,
+						     int to_offset,
+						     int size);
 extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 extern int	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
 					 unsigned int flags);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 421afb4d29b..171b08db9c4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -318,6 +318,8 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 
 extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
+extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
+			     int offset, int len);
 extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index d0de644b378..4dbb05cd572 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -338,6 +338,98 @@ fault:
 	return -EFAULT;
 }
 
+/**
+ *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: io vector to copy to
+ *	@to_offset: offset in the io vector to start copying to
+ *	@len: amount of data to copy from buffer to iovec
+ *
+ *	Returns 0 or -EFAULT.
+ *	Note: the iovec is not modified during the copy.
+ */
+int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
+				  const struct iovec *to, int to_offset,
+				  int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to_offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			int err;
+			u8  *vaddr;
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+			vaddr = kmap(page);
+			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
+						offset - start, to_offset, copy);
+			kunmap(page);
+			if (err)
+				goto fault;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+			to_offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			WARN_ON(start > offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if (skb_copy_datagram_const_iovec(list,
+							    offset - start,
+							    to, to_offset,
+							    copy))
+					goto fault;
+				if ((len -= copy) == 0)
+					return 0;
+				offset += copy;
+				to_offset += copy;
+			}
+			start = end;
+		}
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
+
 /**
  *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
  *	@skb: buffer to copy
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 4c9c0121c9d..a215545c0a3 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -97,6 +97,31 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 	return 0;
 }
 
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ */
+
+int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
+		      int offset, int len)
+{
+	int copy;
+	for (; len > 0; ++iov) {
+		/* Skip over the finished iovecs */
+		if (unlikely(offset >= iov->iov_len)) {
+			offset -= iov->iov_len;
+			continue;
+		}
+		copy = min_t(unsigned int, iov->iov_len - offset, len);
+		offset = 0;
+		if (copy_to_user(iov->iov_base, kdata, copy))
+			return -EFAULT;
+		kdata += copy;
+		len -= copy;
+	}
+
+	return 0;
+}
+
 /*
  *	Copy iovec to kernel. Returns -EFAULT on error.
  *
@@ -236,3 +261,4 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
 EXPORT_SYMBOL(memcpy_fromiovec);
 EXPORT_SYMBOL(memcpy_fromiovecend);
 EXPORT_SYMBOL(memcpy_toiovec);
+EXPORT_SYMBOL(memcpy_toiovecend);
-- 
cgit v1.2.3-70-g09d2


From 6f26c9a7555e5bcca3560919db9b852015077dae Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 20 Apr 2009 01:26:11 +0000
Subject: tun: fix tun_chr_aio_write so that aio works

aio_write gets const struct iovec * but tun_chr_aio_write casts this to struct
iovec * and modifies the iovec. As a result, attempts to use io_submit
to send packets to a tun device fail with weird errors such as EINVAL.

Since tun is the only user of skb_copy_datagram_from_iovec, we can
fix this simply by changing the later so that it does not
touch the iovec passed to it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 13 ++++++++-----
 include/linux/skbuff.h |  3 ++-
 include/linux/socket.h |  4 ++--
 net/core/datagram.c    | 20 ++++++++++++++------
 net/core/iovec.c       |  7 ++++---
 5 files changed, 30 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3b513e29d39..589f0ca668d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -540,31 +540,34 @@ static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
 
 /* Get packet from user space buffer */
 static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
-				       struct iovec *iv, size_t count,
+				       const struct iovec *iv, size_t count,
 				       int noblock)
 {
 	struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
 	struct sk_buff *skb;
 	size_t len = count, align = 0;
 	struct virtio_net_hdr gso = { 0 };
+	int offset = 0;
 
 	if (!(tun->flags & TUN_NO_PI)) {
 		if ((len -= sizeof(pi)) > count)
 			return -EINVAL;
 
-		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
+		if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
 			return -EFAULT;
+		offset += sizeof(pi);
 	}
 
 	if (tun->flags & TUN_VNET_HDR) {
 		if ((len -= sizeof(gso)) > count)
 			return -EINVAL;
 
-		if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
+		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
 			return -EFAULT;
 
 		if (gso.hdr_len > len)
 			return -EINVAL;
+		offset += sizeof(pi);
 	}
 
 	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
@@ -581,7 +584,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
 		return PTR_ERR(skb);
 	}
 
-	if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
+	if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) {
 		tun->dev->stats.rx_dropped++;
 		kfree_skb(skb);
 		return -EFAULT;
@@ -673,7 +676,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 
 	DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
 
-	result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count),
+	result = tun_get_user(tun, iv, iov_length(iv, count),
 			      file->f_flags & O_NONBLOCK);
 
 	tun_put(tun);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index af2b21bdda8..1b5c3d298f4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1715,7 +1715,8 @@ extern int	       skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 							struct iovec *iov);
 extern int	       skb_copy_datagram_from_iovec(struct sk_buff *skb,
 						    int offset,
-						    struct iovec *from,
+						    const struct iovec *from,
+						    int from_offset,
 						    int len);
 extern int	       skb_copy_datagram_const_iovec(const struct sk_buff *from,
 						     int offset,
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 171b08db9c4..42a0396f2c5 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -309,8 +309,8 @@ struct ucred {
 
 #ifdef __KERNEL__
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
-extern int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, 
-				int offset, int len);
+extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			       int offset, int len);
 extern int csum_partial_copy_fromiovecend(unsigned char *kdata, 
 					  struct iovec *iov, 
 					  int offset, 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4dbb05cd572..914d5fa773b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -435,13 +435,15 @@ EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
  *	@skb: buffer to copy
  *	@offset: offset in the buffer to start copying to
  *	@from: io vector to copy to
+ *	@from_offset: offset in the io vector to start copying from
  *	@len: amount of data to copy to buffer from iovec
  *
  *	Returns 0 or -EFAULT.
- *	Note: the iovec is modified during the copy.
+ *	Note: the iovec is not modified during the copy.
  */
 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
-				 struct iovec *from, int len)
+				 const struct iovec *from, int from_offset,
+				 int len)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
@@ -450,11 +452,12 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		if (memcpy_fromiovec(skb->data + offset, from, copy))
+		if (memcpy_fromiovecend(skb->data + offset, from, 0, copy))
 			goto fault;
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
+		from_offset += copy;
 	}
 
 	/* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -473,8 +476,9 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap(page);
-			err = memcpy_fromiovec(vaddr + frag->page_offset +
-					       offset - start, from, copy);
+			err = memcpy_fromiovecend(vaddr + frag->page_offset +
+						  offset - start,
+						  from, from_offset, copy);
 			kunmap(page);
 			if (err)
 				goto fault;
@@ -482,6 +486,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 			if (!(len -= copy))
 				return 0;
 			offset += copy;
+			from_offset += copy;
 		}
 		start = end;
 	}
@@ -500,11 +505,14 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 					copy = len;
 				if (skb_copy_datagram_from_iovec(list,
 								 offset - start,
-								 from, copy))
+								 from,
+								 from_offset,
+								 copy))
 					goto fault;
 				if ((len -= copy) == 0)
 					return 0;
 				offset += copy;
+				from_offset += copy;
 			}
 			start = end;
 		}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index a215545c0a3..40a76ce19d9 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -147,10 +147,11 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 }
 
 /*
- *	For use with ip_build_xmit
+ *	Copy iovec from kernel. Returns -EFAULT on error.
  */
-int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset,
-			int len)
+
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len)
 {
 	/* Skip over the finished iovecs */
 	while (offset >= iov->iov_len) {
-- 
cgit v1.2.3-70-g09d2


From 7a4f453b6d7379a7c380825949977c5a838aa012 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 22 Apr 2009 16:53:34 +0800
Subject: tracing/events: make struct trace_entry->type to be int type

struct trace_entry->type is unsigned char, while trace event's id is
int type, thus for a event with id >= 256, it's entry->type is cast
to (id % 256), and then we can't see the trace output of this event.

 # insmod trace-events-sample.ko
 # echo foo_bar > /mnt/tracing/set_event
 # cat /debug/tracing/events/trace-events-sample/foo_bar/id
 256
 # cat /mnt/tracing/trace_pipe
           <...>-3548  [001]   215.091142: Unknown type 0
           <...>-3548  [001]   216.089207: Unknown type 0
           <...>-3548  [001]   217.087271: Unknown type 0
           <...>-3548  [001]   218.085332: Unknown type 0

[ Impact: fix output for trace events with id >= 256 ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <49EEDB0E.5070207@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h | 4 ++--
 include/trace/ftrace.h       | 2 +-
 kernel/trace/trace.c         | 4 ++--
 kernel/trace/trace.h         | 2 +-
 kernel/trace/trace_events.c  | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 75f3ac01a87..2a4a4074991 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -16,7 +16,7 @@ struct dentry;
  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  */
 struct trace_entry {
-	unsigned char		type;
+	int			type;
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
@@ -73,7 +73,7 @@ enum print_line_t {
 
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc);
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 39a3351f2e7..15ef08d9add 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -198,7 +198,7 @@ ftrace_define_fields_##call(void)					\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	int ret;							\
 									\
-	__common_field(unsigned char, type);				\
+	__common_field(int, type);					\
 	__common_field(unsigned char, flags);				\
 	__common_field(unsigned char, preempt_count);			\
 	__common_field(int, pid);					\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b9a3adce922..b6183bc9eca 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -838,7 +838,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 }
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags, int pc)
 {
@@ -881,7 +881,7 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc)
 {
 	return trace_buffer_lock_reserve(&global_trace,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 247948e81b0..7d55bcf50e4 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -422,7 +422,7 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 struct ring_buffer_event;
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    unsigned char type,
+						    int type,
 						    unsigned long len,
 						    unsigned long flags,
 						    int pc);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9ea55a7dfde..5d6e879cf87 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
-				FIELD(unsigned char, type),
+				FIELD(int, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
 				FIELD(int, pid),
-- 
cgit v1.2.3-70-g09d2


From 9cbf117662e24c6d33245666804487f92c21b59d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 Apr 2009 04:51:29 +0200
Subject: tracing/events: provide string with undefined size support

This patch provides the support for dynamic size strings on
event tracing.

The key concept is to use a structure with an ending char array field of
undefined size and use such ability to allocate the minimal size on the
ring buffer to make one or more string entries fit inside, as opposite
to a fixed length strings with upper bound.

The strings themselves are represented using fields which have an offset
value from the beginning of the entry.

This patch provides three new macros:

__string(item, src)

This one declares a string to the structure inside TP_STRUCT__entry.
You need to provide the name of the string field and the source that will
be copied inside.
This will also add the dynamic size of the string needed for the ring
buffer entry allocation.
A stack allocated structure is used to temporarily store the offset
of each strings, avoiding double calls to strlen() on each event
insertion.

__get_str(field)

This one will give you a pointer to the string you have created. This
is an abstract helper to resolve the absolute address given the field
name which is a relative address from the beginning of the trace_structure.

__assign_str(dst, src)

Use this macro to automatically perform the string copy from src to
dst. src must be a variable to assign and dst is the name of a __string
field.

Example on how to use it:

TRACE_EVENT(my_event,
	TP_PROTO(char *src1, char *src2),

	TP_ARGS(src1, src2),
	TP_STRUCT__entry(
		__string(str1, src1)
		__string(str2, src2)
	),
	TP_fast_assign(
		__assign_str(str1, src1);
		__assign_str(str2, src2);
	),
	TP_printk("%s %s", __get_str(src1), __get_str(src2))
)

Of course you can mix-up any __field or __array inside this
TRACE_EVENT. The position of the __string or __assign_str
doesn't matter.

Changes in v2:

Address the suggestion of Steven Rostedt: drop the opening_string() macro
and redefine __ending_string() to get the size of the string to be copied
instead of overwritting the whole ring buffer allocation.

Changes in v3:

Address other suggestions of Steven Rostedt and Peter Zijlstra with
some changes: drop the __ending_string and the need to have only one
string field.
Use offsets instead of absolute addresses.

[ Impact: allow more compact memory usage for string tracing ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/trace/ftrace.h | 88 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 85 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 15ef08d9add..5a7d18c4363 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -27,6 +27,9 @@
 #undef __field
 #define __field(type, item)		type	item;
 
+#undef __string
+#define __string(item, src)		int	__str_loc_##item;
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -35,14 +38,53 @@
 	struct ftrace_raw_##name {				\
 		struct trace_entry	ent;			\
 		tstruct						\
+		char			__str_data[0];		\
 	};							\
 	static struct ftrace_event_call event_##name
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+
 /*
  * Stage 2 of the trace events.
  *
+ * Include the following:
+ *
+ * struct ftrace_str_offsets_<call> {
+ *	int				<str1>;
+ *	int				<str2>;
+ *	[...]
+ * };
+ *
+ * The __string() macro will create each int <str>, this is to
+ * keep the offset of each string from the beggining of the event
+ * once we perform the strlen() of the src strings.
+ *
+ */
+
+#undef TRACE_FORMAT
+#define TRACE_FORMAT(call, proto, args, fmt)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __field
+#define __field(type, item);
+
+#undef __string
+#define __string(item, src)	int item;
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+	struct ftrace_str_offsets_##call {				\
+		tstruct;						\
+	};
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
  * Override the macros in <trace/trace_events.h> to include the following:
  *
  * enum print_line_t
@@ -80,6 +122,9 @@
 #undef TP_printk
 #define TP_printk(fmt, args...) fmt "\n", args
 
+#undef __get_str
+#define __get_str(field)	(char *)__entry + __entry->__str_loc_##field
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 enum print_line_t							\
@@ -146,6 +191,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	if (!ret)							\
 		return 0;
 
+#undef __string
+#define __string(item, src)						       \
+	ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t"	       \
+			       "offset:%u;tsize:%u;\n",			       \
+			       (unsigned int)offsetof(typeof(field),	       \
+					__str_loc_##item),		       \
+			       (unsigned int)sizeof(field.__str_loc_##item));  \
+	if (!ret)							       \
+		return 0;
+
 #undef __entry
 #define __entry REC
 
@@ -189,6 +244,12 @@ ftrace_format_##call(struct trace_seq *s)				\
 	if (ret)							\
 		return ret;
 
+#undef __string
+#define __string(item, src)						       \
+	ret = trace_define_field(event_call, "__str_loc", #item,	       \
+				offsetof(typeof(field), __str_loc_##item),     \
+				sizeof(field.__str_loc_##item));
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 int									\
@@ -212,7 +273,7 @@ ftrace_define_fields_##call(void)					\
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
- * Stage 3 of the trace events.
+ * Stage 4 of the trace events.
  *
  * Override the macros in <trace/trace_events.h> to include the following:
  *
@@ -409,6 +470,23 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef __entry
 #define __entry entry
 
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __string
+#define __string(item, src)						       \
+	__str_offsets.item = __str_size +				       \
+			     offsetof(typeof(*entry), __str_data);	       \
+	__str_size += strlen(src) + 1;
+
+#undef __assign_str
+#define __assign_str(dst, src)						\
+	__entry->__str_loc_##dst = __str_offsets.dst;			\
+	strcpy(__get_str(dst), src);
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 _TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
@@ -417,18 +495,22 @@ static struct ftrace_event_call event_##call;				\
 									\
 static void ftrace_raw_event_##call(proto)				\
 {									\
+	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;  \
 	struct ftrace_event_call *call = &event_##call;			\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	unsigned long irq_flags;					\
+	int __str_size = 0;						\
 	int pc;								\
 									\
 	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
+	tstruct;							\
+									\
 	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				  sizeof(struct ftrace_raw_##call),	\
-				  irq_flags, pc);			\
+				 sizeof(struct ftrace_raw_##call) + __str_size,\
+				 irq_flags, pc);			\
 	if (!event)							\
 		return;							\
 	entry	= ring_buffer_event_data(event);			\
-- 
cgit v1.2.3-70-g09d2


From 7e7ca9a22dbbc5c91763cd16923c7509918709b6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 Apr 2009 04:54:49 +0200
Subject: tracing/lock: provide lock_acquired event support for dynamic size
 string

Now that we can support the dynamic sized string, make the lock tracing
able to use it, making it safe against modules removal and consuming
the right amount of memory needed for each lock name

Changes in v2:
adapt to the __ending_string() updates and the opening_string() removal.

[ Impact: protect lock tracer against module removal ]

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/lockdep.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
index 45e326b5c7f..3ca315c1429 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lockdep.h
@@ -38,16 +38,16 @@ TRACE_EVENT(lock_acquired,
 	TP_ARGS(lock, ip, waittime),
 
 	TP_STRUCT__entry(
-		__field(const char *, name)
+		__string(name, lock->name)
 		__field(unsigned long, wait_usec)
 		__field(unsigned long, wait_nsec_rem)
 	),
 	TP_fast_assign(
-		__entry->name = lock->name;
+		__assign_str(name, lock->name);
 		__entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
 		__entry->wait_usec = (unsigned long) waittime;
 	),
-	TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec,
+	TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
 				       __entry->wait_nsec_rem)
 );
 
-- 
cgit v1.2.3-70-g09d2


From 6a74aa40907757ec98d8710ff66cd4cfe064e7d8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 22 Apr 2009 00:41:09 +0200
Subject: tracing/events: protect __get_str()

The __get_str() macro is used in a code part then its content should be
protected with parenthesis.

[ Impact: make macro definition more robust ]

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5a7d18c4363..a77f71a46db 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -123,7 +123,7 @@
 #define TP_printk(fmt, args...) fmt "\n", args
 
 #undef __get_str
-#define __get_str(field)	(char *)__entry + __entry->__str_loc_##field
+#define __get_str(field)	((char *)__entry + __entry->__str_loc_##field)
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-- 
cgit v1.2.3-70-g09d2


From 246d0a17f5e09af0794960164269fc8988a8811c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 22 Apr 2009 18:24:55 +0100
Subject: ASoC: Add power supply widget to DAPM

Many modern CODECs have shared resources on chip which must be enabled
for portions of the chip to work but which can be disabled at other times
in order to achieve power savings. Examples of such resources include
power supplies and some internal clocks.

Since these widgets are dependencies for the audio path but do not carry
audio signals they require slightly different handling to most widgets -
they do not contribute to the audio path and so should not be counted as
either inputs or outputs during path walks.

Cases where one supply provides a supply for another will require
additional work. There is also room for more optimisation of the graph
walking to avoid repeated checks for the same thing.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 Documentation/sound/alsa/soc/dapm.txt |  1 +
 include/sound/soc-dapm.h              |  7 +++++-
 sound/soc/soc-dapm.c                  | 44 +++++++++++++++++++++++++++++++----
 3 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 9e6763264a2..9ac842be9b4 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:-
  o Mic        - Mic (and optional Jack)
  o Line       - Line Input/Output (and optional Jack)
  o Speaker    - Speaker
+ o Supply     - Power or clock supply widget used by other widgets.
  o Pre        - Special PRE widget (exec before all others)
  o Post       - Special POST widget (exec after all others)
 
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 839a97b6326..533f9f25649 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -154,12 +154,16 @@
 	.shift = wshift, .invert = winvert, \
 	.event = wevent, .event_flags = wflags}
 
-/* generic register modifier widget */
+/* generic widgets */
 #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \
 {	.id = wid, .name = wname, .kcontrols = NULL, .num_kcontrols = 0, \
 	.reg = -((wreg) + 1), .shift = wshift, .mask = wmask, \
 	.on_val = won_val, .off_val = woff_val, .event = dapm_reg_event, \
 	.event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD}
+#define SND_SOC_DAPM_SUPPLY(wname, wreg, wshift, winvert, wevent, wflags) \
+{	.id = snd_soc_dapm_supply, .name = wname, .reg = wreg,	\
+	.shift = wshift, .invert = winvert, .event = wevent, \
+	.event_flags = wflags}
 
 /* dapm kcontrol types */
 #define SOC_DAPM_SINGLE(xname, reg, shift, max, invert) \
@@ -308,6 +312,7 @@ enum snd_soc_dapm_type {
 	snd_soc_dapm_vmid,			/* codec bias/vmid - to minimise pops */
 	snd_soc_dapm_pre,			/* machine specific pre widget - exec first */
 	snd_soc_dapm_post,			/* machine specific post widget - exec last */
+	snd_soc_dapm_supply,		/* power/clock supply */
 };
 
 /*
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index d3d17354e76..7847f80e96d 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -52,17 +52,19 @@
 
 /* dapm power sequences - make this per codec in the future */
 static int dapm_up_seq[] = {
-	snd_soc_dapm_pre, snd_soc_dapm_micbias, snd_soc_dapm_mic,
-	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_dac,
-	snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_pga,
-	snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_post
+	snd_soc_dapm_pre, snd_soc_dapm_supply, snd_soc_dapm_micbias,
+	snd_soc_dapm_mic, snd_soc_dapm_mux, snd_soc_dapm_value_mux,
+	snd_soc_dapm_dac, snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl,
+	snd_soc_dapm_pga, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk,
+	snd_soc_dapm_post
 };
 
 static int dapm_down_seq[] = {
 	snd_soc_dapm_pre, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk,
 	snd_soc_dapm_pga, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_mixer,
 	snd_soc_dapm_dac, snd_soc_dapm_mic, snd_soc_dapm_micbias,
-	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_post
+	snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_supply,
+	snd_soc_dapm_post
 };
 
 static int dapm_status = 1;
@@ -165,6 +167,7 @@ static void dapm_set_path_status(struct snd_soc_dapm_widget *w,
 	case snd_soc_dapm_dac:
 	case snd_soc_dapm_micbias:
 	case snd_soc_dapm_vmid:
+	case snd_soc_dapm_supply:
 		p->connect = 1;
 	break;
 	/* does effect routing - dynamically connected */
@@ -435,6 +438,9 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget)
 	struct snd_soc_dapm_path *path;
 	int con = 0;
 
+	if (widget->id == snd_soc_dapm_supply)
+		return 0;
+
 	if (widget->id == snd_soc_dapm_adc && widget->active)
 		return 1;
 
@@ -471,6 +477,9 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget)
 	struct snd_soc_dapm_path *path;
 	int con = 0;
 
+	if (widget->id == snd_soc_dapm_supply)
+		return 0;
+
 	/* active stream ? */
 	if (widget->id == snd_soc_dapm_dac && widget->active)
 		return 1;
@@ -622,6 +631,26 @@ static int dapm_dac_check_power(struct snd_soc_dapm_widget *w)
 	}
 }
 
+/* Check to see if a power supply is needed */
+static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
+{
+	struct snd_soc_dapm_path *path;
+	int power = 0;
+
+	/* Check if one of our outputs is connected */
+	list_for_each_entry(path, &w->sinks, list_source) {
+		if (path->sink && path->sink->power_check &&
+		    path->sink->power_check(path->sink)) {
+			power = 1;
+			break;
+		}
+	}
+
+	dapm_clear_walk(w->codec);
+
+	return power;
+}
+
 /*
  * Scan a single DAPM widget for a complete audio path and update the
  * power status appropriately.
@@ -752,6 +781,7 @@ static void dbg_dump_dapm(struct snd_soc_codec* codec, const char *action)
 		case snd_soc_dapm_pga:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+		case snd_soc_dapm_supply:
 			if (w->name) {
 				in = is_connected_input_ep(w);
 				dapm_clear_walk(w->codec);
@@ -880,6 +910,7 @@ static ssize_t dapm_widget_show(struct device *dev,
 		case snd_soc_dapm_pga:
 		case snd_soc_dapm_mixer:
 		case snd_soc_dapm_mixer_named_ctl:
+		case snd_soc_dapm_supply:
 			if (w->name)
 				count += sprintf(buf + count, "%s: %s\n",
 					w->name, w->power ? "On":"Off");
@@ -1044,6 +1075,7 @@ static int snd_soc_dapm_add_route(struct snd_soc_codec *codec,
 	case snd_soc_dapm_vmid:
 	case snd_soc_dapm_pre:
 	case snd_soc_dapm_post:
+	case snd_soc_dapm_supply:
 		list_add(&path->list, &codec->dapm_paths);
 		list_add(&path->list_sink, &wsink->sources);
 		list_add(&path->list_source, &wsource->sinks);
@@ -1164,6 +1196,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec)
 		case snd_soc_dapm_line:
 			w->power_check = dapm_generic_check_power;
 			break;
+		case snd_soc_dapm_supply:
+			w->power_check = dapm_supply_check_power;
 		case snd_soc_dapm_vmid:
 		case snd_soc_dapm_pre:
 		case snd_soc_dapm_post:
-- 
cgit v1.2.3-70-g09d2


From c1c6b14b22af0f85d05a70405dc3fba5de840c7b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 30 Mar 2009 11:32:46 +0200
Subject: rfkill: remove deprecated state constants

I only did superficial review, but these constants are stupid
to have and without proper warnings nobody will review the
code anyway, no amount of shouting will help.

Also fix wimax to use correct states.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/arm/mach-pxa/tosa-bt.c | 4 ++--
 drivers/net/usb/hso.c       | 4 ++--
 include/linux/rfkill.h      | 8 --------
 net/wimax/op-rfkill.c       | 8 ++++----
 4 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c
index fb0294bd431..bde42aa2937 100644
--- a/arch/arm/mach-pxa/tosa-bt.c
+++ b/arch/arm/mach-pxa/tosa-bt.c
@@ -38,9 +38,9 @@ static void tosa_bt_off(struct tosa_bt_data *data)
 static int tosa_bt_toggle_radio(void *data, enum rfkill_state state)
 {
 	pr_info("BT_RADIO going: %s\n",
-			state == RFKILL_STATE_ON ? "on" : "off");
+			state == RFKILL_STATE_UNBLOCKED ? "on" : "off");
 
-	if (state == RFKILL_STATE_ON) {
+	if (state == RFKILL_STATE_UNBLOCKED) {
 		pr_info("TOSA_BT: going ON\n");
 		tosa_bt_on(data);
 	} else {
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index f84b78d94c4..d696e5fbc17 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2484,7 +2484,7 @@ static int add_net_device(struct hso_device *hso_dev)
 static int hso_radio_toggle(void *data, enum rfkill_state state)
 {
 	struct hso_device *hso_dev = data;
-	int enabled = (state == RFKILL_STATE_ON);
+	int enabled = (state == RFKILL_STATE_UNBLOCKED);
 	int rv;
 
 	mutex_lock(&hso_dev->mutex);
@@ -2522,7 +2522,7 @@ static void hso_create_rfkill(struct hso_device *hso_dev,
 	snprintf(rfkn, 20, "hso-%d",
 		 interface->altsetting->desc.bInterfaceNumber);
 	hso_net->rfkill->name = rfkn;
-	hso_net->rfkill->state = RFKILL_STATE_ON;
+	hso_net->rfkill->state = RFKILL_STATE_UNBLOCKED;
 	hso_net->rfkill->data = hso_dev;
 	hso_net->rfkill->toggle_radio = hso_radio_toggle;
 	if (rfkill_register(hso_net->rfkill) < 0) {
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 164332cbb77..e1ec7d9aa49 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -52,14 +52,6 @@ enum rfkill_state {
 	RFKILL_STATE_MAX,		/* marker for last valid state */
 };
 
-/*
- * These are DEPRECATED, drivers using them should be verified to
- * comply with the rfkill usage guidelines in Documentation/rfkill.txt
- * and then converted to use the new names for rfkill_state
- */
-#define RFKILL_STATE_OFF RFKILL_STATE_SOFT_BLOCKED
-#define RFKILL_STATE_ON  RFKILL_STATE_UNBLOCKED
-
 /**
  * struct rfkill - rfkill control structure.
  * @name: Name of the switch.
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index 2b75aee0421..870032faece 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -113,7 +113,7 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev,
 	if (state != wimax_dev->rf_hw) {
 		wimax_dev->rf_hw = state;
 		rfkill_state = state == WIMAX_RF_ON ?
-			RFKILL_STATE_OFF : RFKILL_STATE_ON;
+			RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
 		if (wimax_dev->rf_hw == WIMAX_RF_ON
 		    && wimax_dev->rf_sw == WIMAX_RF_ON)
 			wimax_state = WIMAX_ST_READY;
@@ -259,10 +259,10 @@ int wimax_rfkill_toggle_radio(void *data, enum rfkill_state state)
 
 	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
 	switch (state) {
-	case RFKILL_STATE_ON:
+	case RFKILL_STATE_SOFT_BLOCKED:
 		rf_state = WIMAX_RF_OFF;
 		break;
-	case RFKILL_STATE_OFF:
+	case RFKILL_STATE_UNBLOCKED:
 		rf_state = WIMAX_RF_ON;
 		break;
 	default:
@@ -361,7 +361,7 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev)
 	wimax_dev->rfkill = rfkill;
 
 	rfkill->name = wimax_dev->name;
-	rfkill->state = RFKILL_STATE_OFF;
+	rfkill->state = RFKILL_STATE_UNBLOCKED;
 	rfkill->data = wimax_dev;
 	rfkill->toggle_radio = wimax_rfkill_toggle_radio;
 	rfkill->user_claim_unsupported = 1;
-- 
cgit v1.2.3-70-g09d2


From 621cac85297de5ba655e3430b007dd2e0da91da6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 27 Mar 2009 14:14:31 +0100
Subject: rfkill: remove user_claim stuff

Almost all drivers do not support user_claim, so remove it
completely and always report -EOPNOTSUPP to userspace. Since
userspace cannot really drive rfkill _anyway_ (due to the
odd restrictions imposed by the documentation) having this
code is just pointless.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt                  | 16 +++++------
 drivers/net/wireless/ath9k/main.c         |  1 -
 drivers/net/wireless/b43/rfkill.c         |  1 -
 drivers/net/wireless/b43legacy/rfkill.c   |  1 -
 drivers/net/wireless/iwlwifi/iwl-rfkill.c |  1 -
 drivers/platform/x86/acer-wmi.c           |  1 -
 drivers/platform/x86/hp-wmi.c             |  3 ---
 drivers/platform/x86/sony-laptop.c        |  4 ---
 drivers/platform/x86/toshiba_acpi.c       |  1 -
 include/linux/rfkill.h                    |  6 -----
 net/rfkill/rfkill.c                       | 45 +++----------------------------
 net/wimax/op-rfkill.c                     |  1 -
 12 files changed, 9 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 4d3ee317a4a..40c3a3f1081 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -521,16 +521,12 @@ status of the system.
 Input devices may issue events that are related to rfkill.  These are the
 various KEY_* events and SW_* events supported by rfkill-input.c.
 
-******IMPORTANT******
-When rfkill-input is ACTIVE, userspace is NOT TO CHANGE THE STATE OF AN RFKILL
-SWITCH IN RESPONSE TO AN INPUT EVENT also handled by rfkill-input, unless it
-has set to true the user_claim attribute for that particular switch.  This rule
-is *absolute*; do NOT violate it.
-******IMPORTANT******
-
-Userspace must not assume it is the only source of control for rfkill switches.
-Their state CAN and WILL change due to firmware actions, direct user actions,
-and the rfkill-input EPO override for *_RFKILL_ALL.
+Userspace may not change the state of an rfkill switch in response to an
+input event, it should refrain from changing states entirely.
+
+Userspace cannot assume it is the only source of control for rfkill switches.
+Their state can change due to firmware actions, direct user actions, and the
+rfkill-input EPO override for *_RFKILL_ALL.
 
 When rfkill-input is not active, userspace must initiate a rfkill status
 change by writing to the "state" attribute in order for anything to happen.
diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c
index 13d4e6756c9..0607df20e49 100644
--- a/drivers/net/wireless/ath9k/main.c
+++ b/drivers/net/wireless/ath9k/main.c
@@ -1267,7 +1267,6 @@ static int ath_init_sw_rfkill(struct ath_softc *sc)
 	sc->rf_kill.rfkill->data = sc;
 	sc->rf_kill.rfkill->toggle_radio = ath_sw_toggle_radio;
 	sc->rf_kill.rfkill->state = RFKILL_STATE_UNBLOCKED;
-	sc->rf_kill.rfkill->user_claim_unsupported = 1;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c
index afad4235869..9e1d00bc24d 100644
--- a/drivers/net/wireless/b43/rfkill.c
+++ b/drivers/net/wireless/b43/rfkill.c
@@ -139,7 +139,6 @@ void b43_rfkill_init(struct b43_wldev *dev)
 	rfk->rfkill->state = RFKILL_STATE_UNBLOCKED;
 	rfk->rfkill->data = dev;
 	rfk->rfkill->toggle_radio = b43_rfkill_soft_toggle;
-	rfk->rfkill->user_claim_unsupported = 1;
 
 	rfk->poll_dev = input_allocate_polled_device();
 	if (!rfk->poll_dev) {
diff --git a/drivers/net/wireless/b43legacy/rfkill.c b/drivers/net/wireless/b43legacy/rfkill.c
index b32bf6a94f1..4b0c7d27a51 100644
--- a/drivers/net/wireless/b43legacy/rfkill.c
+++ b/drivers/net/wireless/b43legacy/rfkill.c
@@ -142,7 +142,6 @@ void b43legacy_rfkill_init(struct b43legacy_wldev *dev)
 	rfk->rfkill->state = RFKILL_STATE_UNBLOCKED;
 	rfk->rfkill->data = dev;
 	rfk->rfkill->toggle_radio = b43legacy_rfkill_soft_toggle;
-	rfk->rfkill->user_claim_unsupported = 1;
 
 	rfk->poll_dev = input_allocate_polled_device();
 	if (!rfk->poll_dev) {
diff --git a/drivers/net/wireless/iwlwifi/iwl-rfkill.c b/drivers/net/wireless/iwlwifi/iwl-rfkill.c
index 2ad9faf1508..65605ad44e4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rfkill.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rfkill.c
@@ -91,7 +91,6 @@ int iwl_rfkill_init(struct iwl_priv *priv)
 	priv->rfkill->data = priv;
 	priv->rfkill->state = RFKILL_STATE_UNBLOCKED;
 	priv->rfkill->toggle_radio = iwl_rfkill_soft_rf_kill;
-	priv->rfkill->user_claim_unsupported = 1;
 
 	priv->rfkill->dev.class->suspend = NULL;
 	priv->rfkill->dev.class->resume = NULL;
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 0f6e43bf4fc..62d02b3c998 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -1005,7 +1005,6 @@ enum rfkill_type type, char *name, u32 cap)
 	*data = cap;
 	rfkill_dev->data = data;
 	rfkill_dev->toggle_radio = acer_rfkill_set;
-	rfkill_dev->user_claim_unsupported = 1;
 
 	err = rfkill_register(rfkill_dev);
 	if (err) {
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 50d9019de2b..fe171fad12c 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -434,7 +434,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 		wifi_rfkill->name = "hp-wifi";
 		wifi_rfkill->state = hp_wmi_wifi_state();
 		wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
-		wifi_rfkill->user_claim_unsupported = 1;
 		err = rfkill_register(wifi_rfkill);
 		if (err)
 			goto add_sysfs_error;
@@ -446,7 +445,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 		bluetooth_rfkill->name = "hp-bluetooth";
 		bluetooth_rfkill->state = hp_wmi_bluetooth_state();
 		bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
-		bluetooth_rfkill->user_claim_unsupported = 1;
 		err = rfkill_register(bluetooth_rfkill);
 		if (err)
 			goto register_bluetooth_error;
@@ -457,7 +455,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 		wwan_rfkill->name = "hp-wwan";
 		wwan_rfkill->state = hp_wmi_wwan_state();
 		wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
-		wwan_rfkill->user_claim_unsupported = 1;
 		err = rfkill_register(wwan_rfkill);
 		if (err)
 			goto register_wwan_err;
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index d3c92d777bd..184e99e7268 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1097,7 +1097,6 @@ static int sony_nc_setup_wifi_rfkill(struct acpi_device *device)
 	sony_wifi_rfkill->name = "sony-wifi";
 	sony_wifi_rfkill->toggle_radio = sony_nc_rfkill_set;
 	sony_wifi_rfkill->get_state = sony_nc_rfkill_get;
-	sony_wifi_rfkill->user_claim_unsupported = 1;
 	sony_wifi_rfkill->data = (void *)SONY_WIFI;
 	err = rfkill_register(sony_wifi_rfkill);
 	if (err)
@@ -1119,7 +1118,6 @@ static int sony_nc_setup_bluetooth_rfkill(struct acpi_device *device)
 	sony_bluetooth_rfkill->name = "sony-bluetooth";
 	sony_bluetooth_rfkill->toggle_radio = sony_nc_rfkill_set;
 	sony_bluetooth_rfkill->get_state = sony_nc_rfkill_get;
-	sony_bluetooth_rfkill->user_claim_unsupported = 1;
 	sony_bluetooth_rfkill->data = (void *)SONY_BLUETOOTH;
 	err = rfkill_register(sony_bluetooth_rfkill);
 	if (err)
@@ -1140,7 +1138,6 @@ static int sony_nc_setup_wwan_rfkill(struct acpi_device *device)
 	sony_wwan_rfkill->name = "sony-wwan";
 	sony_wwan_rfkill->toggle_radio = sony_nc_rfkill_set;
 	sony_wwan_rfkill->get_state = sony_nc_rfkill_get;
-	sony_wwan_rfkill->user_claim_unsupported = 1;
 	sony_wwan_rfkill->data = (void *)SONY_WWAN;
 	err = rfkill_register(sony_wwan_rfkill);
 	if (err)
@@ -1161,7 +1158,6 @@ static int sony_nc_setup_wimax_rfkill(struct acpi_device *device)
 	sony_wimax_rfkill->name = "sony-wimax";
 	sony_wimax_rfkill->toggle_radio = sony_nc_rfkill_set;
 	sony_wimax_rfkill->get_state = sony_nc_rfkill_get;
-	sony_wimax_rfkill->user_claim_unsupported = 1;
 	sony_wimax_rfkill->data = (void *)SONY_WIMAX;
 	err = rfkill_register(sony_wimax_rfkill);
 	if (err)
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 9f187265db8..4345089f517 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -803,7 +803,6 @@ static int __init toshiba_acpi_init(void)
 
 		toshiba_acpi.rfk_dev->name = toshiba_acpi.bt_name;
 		toshiba_acpi.rfk_dev->toggle_radio = bt_rfkill_toggle_radio;
-		toshiba_acpi.rfk_dev->user_claim_unsupported = 1;
 		toshiba_acpi.rfk_dev->data = &toshiba_acpi;
 
 		if (hci_get_bt_on(&bt_on) == HCI_SUCCESS && bt_on) {
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index e1ec7d9aa49..de18ef227e0 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -58,9 +58,6 @@ enum rfkill_state {
  * @type: Radio type which the button controls, the value stored
  *	here should be a value from enum rfkill_type.
  * @state: State of the switch, "UNBLOCKED" means radio can operate.
- * @user_claim_unsupported: Whether the hardware supports exclusive
- *	RF-kill control by userspace. Set this before registering.
- * @user_claim: Set when the switch is controlled exlusively by userspace.
  * @mutex: Guards switch state transitions.  It serializes callbacks
  *	and also protects the state.
  * @data: Pointer to the RF button drivers private data which will be
@@ -83,9 +80,6 @@ struct rfkill {
 	const char *name;
 	enum rfkill_type type;
 
-	bool user_claim_unsupported;
-	bool user_claim;
-
 	/* the mutex serializes callbacks and also protects
 	 * the state */
 	struct mutex mutex;
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 3eaa39403c1..df1269c5ca7 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -200,7 +200,7 @@ static void __rfkill_switch_all(const enum rfkill_type type,
 
 	rfkill_global_states[type].current_state = state;
 	list_for_each_entry(rfkill, &rfkill_list, node) {
-		if ((!rfkill->user_claim) && (rfkill->type == type)) {
+		if (rfkill->type == type) {
 			mutex_lock(&rfkill->mutex);
 			rfkill_toggle_radio(rfkill, state, 0);
 			mutex_unlock(&rfkill->mutex);
@@ -447,53 +447,14 @@ static ssize_t rfkill_claim_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
 {
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	return sprintf(buf, "%d\n", rfkill->user_claim);
+	return sprintf(buf, "%d\n", 0);
 }
 
 static ssize_t rfkill_claim_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
-	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long claim_tmp;
-	bool claim;
-	int error;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (rfkill->user_claim_unsupported)
-		return -EOPNOTSUPP;
-
-	error = strict_strtoul(buf, 0, &claim_tmp);
-	if (error)
-		return error;
-	claim = !!claim_tmp;
-
-	/*
-	 * Take the global lock to make sure the kernel is not in
-	 * the middle of rfkill_switch_all
-	 */
-	error = mutex_lock_killable(&rfkill_global_mutex);
-	if (error)
-		return error;
-
-	if (rfkill->user_claim != claim) {
-		if (!claim && !rfkill_epo_lock_active) {
-			mutex_lock(&rfkill->mutex);
-			rfkill_toggle_radio(rfkill,
-					rfkill_global_states[rfkill->type].current_state,
-					0);
-			mutex_unlock(&rfkill->mutex);
-		}
-		rfkill->user_claim = claim;
-	}
-
-	mutex_unlock(&rfkill_global_mutex);
-
-	return error ? error : count;
+	return -EOPNOTSUPP;
 }
 
 static struct device_attribute rfkill_dev_attrs[] = {
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index 870032faece..a3616e2ccb8 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -364,7 +364,6 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev)
 	rfkill->state = RFKILL_STATE_UNBLOCKED;
 	rfkill->data = wimax_dev;
 	rfkill->toggle_radio = wimax_rfkill_toggle_radio;
-	rfkill->user_claim_unsupported = 1;
 
 	/* Initialize the input device for the hw key */
 	input_dev = input_allocate_device();
-- 
cgit v1.2.3-70-g09d2


From 07f62d01c1f476a3b328a44faafdfd103a4dedc3 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Fri, 27 Mar 2009 22:20:27 +0800
Subject: cfg80211: remove duplicated #include

Remove duplicated #include in include/net/cfg80211.h.

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5389afdc129..d1b8f0d53c5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -10,7 +10,6 @@
 #include <net/iw_handler.h>
 #include <net/genetlink.h>
 /* remove once we remove the wext stuff */
-#include <net/iw_handler.h>
 
 /*
  * 802.11 configuration in-kernel interface
-- 
cgit v1.2.3-70-g09d2


From 53b46b8444f600cc1744521ea096ea0c5d494dd0 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Fri, 27 Mar 2009 20:53:56 +0200
Subject: nl80211: Generate deauth/disassoc event for locally generated frames

Previously, nl80211 mlme events were generated only for received
deauthentication and disassociation frames. We need to do the same for
locally generated ones in order to let applications know that we
disconnected (e.g., when AP does not reply to a probe). Rename the
nl80211 and cfg80211 functions (s/rx_//) to make it clearer that they
are used for both received and locally generated frames.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 16 ++++++++--------
 net/mac80211/mlme.c    |  8 ++++++--
 net/wireless/mlme.c    | 13 ++++++-------
 net/wireless/nl80211.c | 11 +++++------
 net/wireless/nl80211.h | 12 ++++++------
 5 files changed, 31 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d1b8f0d53c5..ec33096fc65 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -917,28 +917,28 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
- * cfg80211_send_rx_deauth - notification of processed deauthentication
+ * cfg80211_send_deauth - notification of processed deauthentication
  * @dev: network device
  * @buf: deauthentication frame (header + body)
  * @len: length of the frame data
  *
  * This function is called whenever deauthentication has been processed in
- * station mode.
+ * station mode. This includes both received deauthentication frames and
+ * locally generated ones.
  */
-void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf,
-			     size_t len);
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
- * cfg80211_send_rx_disassoc - notification of processed disassociation
+ * cfg80211_send_disassoc - notification of processed disassociation
  * @dev: network device
  * @buf: disassociation response frame (header + body)
  * @len: length of the frame data
  *
  * This function is called whenever disassociation has been processed in
- * station mode.
+ * station mode. This includes both received disassociation frames and locally
+ * generated ones.
  */
-void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf,
-			       size_t len);
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
  * cfg80211_hold_bss - exclude bss from expiration
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 132938b073d..08db02c237c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -325,6 +325,10 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	/* u.deauth.reason_code == u.disassoc.reason_code */
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
+	if (stype == IEEE80211_STYPE_DEAUTH)
+		cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len);
+	else
+		cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len);
 	ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
@@ -1187,7 +1191,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, true, false, 0);
 	ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED;
-	cfg80211_send_rx_deauth(sdata->dev, (u8 *) mgmt, len);
+	cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, len);
 }
 
 
@@ -1218,7 +1222,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	ieee80211_set_disassoc(sdata, false, false, reason_code);
-	cfg80211_send_rx_disassoc(sdata->dev, (u8 *) mgmt, len);
+	cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, len);
 }
 
 
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index bec5721b6f9..33ff7cca496 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -28,19 +28,18 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
 
-void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_rx_deauth(rdev, dev, buf, len);
+	nl80211_send_deauth(rdev, dev, buf, len);
 }
-EXPORT_SYMBOL(cfg80211_send_rx_deauth);
+EXPORT_SYMBOL(cfg80211_send_deauth);
 
-void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf,
-			       size_t len)
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_rx_disassoc(rdev, dev, buf, len);
+	nl80211_send_disassoc(rdev, dev, buf, len);
 }
-EXPORT_SYMBOL(cfg80211_send_rx_disassoc);
+EXPORT_SYMBOL(cfg80211_send_disassoc);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c04df6a6af7..195424eee77 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3415,17 +3415,16 @@ void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 	nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE);
 }
 
-void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev,
-			    struct net_device *netdev, const u8 *buf,
-			    size_t len)
+void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
+			 struct net_device *netdev, const u8 *buf, size_t len)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
 				NL80211_CMD_DEAUTHENTICATE);
 }
 
-void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev,
-			      struct net_device *netdev, const u8 *buf,
-			      size_t len)
+void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
+			   struct net_device *netdev, const u8 *buf,
+			   size_t len)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
 				NL80211_CMD_DISASSOCIATE);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b77af4ab80b..55686fc264f 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -17,11 +17,11 @@ extern void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 extern void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 				  struct net_device *netdev,
 				  const u8 *buf, size_t len);
-extern void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev,
-				   struct net_device *netdev,
-				   const u8 *buf, size_t len);
-extern void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev,
-				     struct net_device *netdev,
-				     const u8 *buf, size_t len);
+extern void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev,
+				const u8 *buf, size_t len);
+extern void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev,
+				  const u8 *buf, size_t len);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3-70-g09d2


From a3b8b0569fbef725597f05278ec58083321f6e9d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Fri, 27 Mar 2009 21:59:49 +0200
Subject: nl80211: Add Michael MIC failure event

Define a new nl80211 event, NL80211_CMD_MICHAEL_MIC_FAILURE, to be
used to notify user space about locally detected Michael MIC failures.
This matches with the MLME-MICHAELMICFAILURE.indication() primitive.

Since we do not actually have TSC in the skb anymore when
mac80211_ev_michael_mic_failure() is called, that function is changed
to take in the TSC as an optional parameter instead of as a
requirement to include the TSC after the hdr field (which we did not
really follow). For now, TSC is not included in the events from
mac80211, but it could be added at some point.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 28 ++++++++++++++++++++++++++++
 include/net/cfg80211.h     | 16 ++++++++++++++++
 net/mac80211/event.c       | 17 +++++++++--------
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/rx.c          |  2 +-
 net/mac80211/wpa.c         |  2 +-
 net/wireless/mlme.c        | 10 ++++++++++
 net/wireless/nl80211.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h     |  5 +++++
 9 files changed, 111 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index cbe8ce3bf48..27f230f063b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -199,6 +199,14 @@
  *	NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to
  *	MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives).
  *
+ * @NL80211_CMD_MICHAEL_MIC_FAILURE: notification of a locally detected Michael
+ *	MIC (part of TKIP) failure; sent on the "mlme" multicast group; the
+ *	event includes %NL80211_ATTR_MAC to describe the source MAC address of
+ *	the frame with invalid MIC, %NL80211_ATTR_KEY_TYPE to show the key
+ *	type, %NL80211_ATTR_KEY_IDX to indicate the key identifier, and
+ *	%NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this
+ *	event matches with MLME-MICHAELMICFAILURE.indication() primitive
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -260,6 +268,8 @@ enum nl80211_commands {
 	NL80211_CMD_DEAUTHENTICATE,
 	NL80211_CMD_DISASSOCIATE,
 
+	NL80211_CMD_MICHAEL_MIC_FAILURE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -408,6 +418,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and
  *	%NL80211_CMD_DISASSOCIATE, u16
  *
+ * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as
+ *	a u32
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -492,6 +505,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_AUTH_TYPE,
 	NL80211_ATTR_REASON_CODE,
 
+	NL80211_ATTR_KEY_TYPE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1062,4 +1077,17 @@ enum nl80211_auth_type {
 	NL80211_AUTHTYPE_FT,
 	NL80211_AUTHTYPE_NETWORK_EAP,
 };
+
+/**
+ * enum nl80211_key_type - Key Type
+ * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key
+ * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key
+ * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS)
+ */
+enum nl80211_key_type {
+	NL80211_KEYTYPE_GROUP,
+	NL80211_KEYTYPE_PAIRWISE,
+	NL80211_KEYTYPE_PEERKEY,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ec33096fc65..f8bf0c86650 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -957,4 +957,20 @@ void cfg80211_hold_bss(struct cfg80211_bss *bss);
  */
 void cfg80211_unhold_bss(struct cfg80211_bss *bss);
 
+/**
+ * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
+ * @dev: network device
+ * @addr: The source MAC address of the frame
+ * @key_type: The key type that the received frame used
+ * @key_id: Key identifier (0..3)
+ * @tsc: The TSC value of the frame that generated the MIC failure (6 octets)
+ *
+ * This function is called whenever the local MAC detects a MIC failure in a
+ * received frame. This matches with MLME-MICHAELMICFAILURE.indication()
+ * primitive.
+ */
+void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
+				  enum nl80211_key_type key_type, int key_id,
+				  const u8 *tsc);
+
 #endif /* __NET_CFG80211_H */
diff --git a/net/mac80211/event.c b/net/mac80211/event.c
index 0d95561c0ee..f288d01a634 100644
--- a/net/mac80211/event.c
+++ b/net/mac80211/event.c
@@ -12,12 +12,12 @@
 #include "ieee80211_i.h"
 
 /*
- * indicate a failed Michael MIC to userspace; the passed packet
- * (in the variable hdr) must be long enough to extract the TKIP
- * fields like TSC
+ * Indicate a failed Michael MIC to userspace. If the caller knows the TSC of
+ * the frame that generated the MIC failure (i.e., if it was provided by the
+ * driver or is still in the frame), it should provide that information.
  */
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
-				     struct ieee80211_hdr *hdr)
+				     struct ieee80211_hdr *hdr, const u8 *tsc)
 {
 	union iwreq_data wrqu;
 	char *buf = kmalloc(128, GFP_ATOMIC);
@@ -34,8 +34,9 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke
 		kfree(buf);
 	}
 
-	/*
-	 * TODO: re-add support for sending MIC failure indication
-	 * with all info via nl80211
-	 */
+	cfg80211_michael_mic_failure(sdata->dev, hdr->addr2,
+				     (hdr->addr1[0] & 0x01) ?
+				     NL80211_KEYTYPE_GROUP :
+				     NL80211_KEYTYPE_PAIRWISE,
+				     keyidx, tsc);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e6ed78cb16b..312347d102c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1060,7 +1060,7 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
-				     struct ieee80211_hdr *hdr);
+				     struct ieee80211_hdr *hdr, const u8 *tsc);
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata);
 void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5fa7aedd90e..19c4b4589fe 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1932,7 +1932,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 	    !ieee80211_is_auth(hdr->frame_control))
 		goto ignore;
 
-	mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr);
+	mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr, NULL);
  ignore:
 	dev_kfree_skb(rx->skb);
 	rx->skb = NULL;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 4f8bfea278f..dcfae8884b8 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -122,7 +122,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 			return RX_DROP_UNUSABLE;
 
 		mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
-						(void *) skb->data);
+						(void *) skb->data, NULL);
 		return RX_DROP_UNUSABLE;
 	}
 
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 33ff7cca496..1407244a647 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -43,3 +43,13 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 	nl80211_send_disassoc(rdev, dev, buf, len);
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
+
+void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
+				  enum nl80211_key_type key_type, int key_id,
+				  const u8 *tsc)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_michael_mic_failure(rdev, dev, addr, key_type, key_id, tsc);
+}
+EXPORT_SYMBOL(cfg80211_michael_mic_failure);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 195424eee77..1394115cde9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3430,6 +3430,46 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				NL80211_CMD_DISASSOCIATE);
 }
 
+void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
+				 struct net_device *netdev, const u8 *addr,
+				 enum nl80211_key_type key_type, int key_id,
+				 const u8 *tsc)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_MICHAEL_MIC_FAILURE);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	if (addr)
+		NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
+	NLA_PUT_U32(msg, NL80211_ATTR_KEY_TYPE, key_type);
+	NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id);
+	if (tsc)
+		NLA_PUT(msg, NL80211_ATTR_KEY_SEQ, 6, tsc);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 55686fc264f..e4b92cccd15 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -23,5 +23,10 @@ extern void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 extern void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				  struct net_device *netdev,
 				  const u8 *buf, size_t len);
+extern void
+nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
+			    struct net_device *netdev, const u8 *addr,
+			    enum nl80211_key_type key_type,
+			    int key_id, const u8 *tsc);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3-70-g09d2


From 06aa7afaaa21a4e7f1bcb196bd3f29193924a603 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Thu, 26 Mar 2009 23:40:09 +0200
Subject: cfg80211: add cfg80211_inform_bss

Added cfg80211_inform_bss() for full-mac devices to use.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  8 ++++++++
 net/wireless/scan.c    | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f8bf0c86650..2b1f6c69773 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -863,6 +863,14 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 			  struct ieee80211_mgmt *mgmt, size_t len,
 			  s32 signal, gfp_t gfp);
 
+struct cfg80211_bss*
+cfg80211_inform_bss(struct wiphy *wiphy,
+		    struct ieee80211_channel *channel,
+		    const u8 *bssid,
+		    u64 timestamp, u16 capability, u16 beacon_interval,
+		    const u8 *ie, size_t ielen,
+		    s32 signal, gfp_t gfp);
+
 struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
 				      struct ieee80211_channel *channel,
 				      const u8 *bssid,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2ae65b39b52..1396248dc5c 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -414,6 +414,55 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 	return found;
 }
 
+struct cfg80211_bss*
+cfg80211_inform_bss(struct wiphy *wiphy,
+		    struct ieee80211_channel *channel,
+		    const u8 *bssid,
+		    u64 timestamp, u16 capability, u16 beacon_interval,
+		    const u8 *ie, size_t ielen,
+		    s32 signal, gfp_t gfp)
+{
+	struct cfg80211_internal_bss *res;
+	size_t privsz;
+
+	if (WARN_ON(!wiphy))
+		return NULL;
+
+	privsz = wiphy->bss_priv_size;
+
+	if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
+			(signal < 0 || signal > 100)))
+		return NULL;
+
+	res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
+	if (!res)
+		return NULL;
+
+	memcpy(res->pub.bssid, bssid, ETH_ALEN);
+	res->pub.channel = channel;
+	res->pub.signal = signal;
+	res->pub.tsf = timestamp;
+	res->pub.beacon_interval = beacon_interval;
+	res->pub.capability = capability;
+	/* point to after the private area */
+	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
+	memcpy(res->pub.information_elements, ie, ielen);
+	res->pub.len_information_elements = ielen;
+
+	kref_init(&res->ref);
+
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0);
+	if (!res)
+		return NULL;
+
+	if (res->pub.capability & WLAN_CAPABILITY_ESS)
+		regulatory_hint_found_beacon(wiphy, channel, gfp);
+
+	/* cfg80211_bss_update gives us a referenced result */
+	return &res->pub;
+}
+EXPORT_SYMBOL(cfg80211_inform_bss);
+
 struct cfg80211_bss *
 cfg80211_inform_bss_frame(struct wiphy *wiphy,
 			  struct ieee80211_channel *channel,
-- 
cgit v1.2.3-70-g09d2


From 18a8365992a8041aa178ae9ad5f0d951d0457230 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 31 Mar 2009 12:12:05 +0200
Subject: cfg80211: introduce scan IE limit attribute

This patch introduces a new attribute for a wiphy that tells
userspace how long the information elements added to a probe
request frame can be at most. It also updates the at76 to
advertise that it cannot support that, and, for now until I
can fix that, iwlwifi too.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/at76c50x-usb.c     |  1 +
 drivers/net/wireless/iwlwifi/iwl-core.c |  1 +
 include/linux/nl80211.h                 |  4 ++++
 include/net/wireless.h                  |  1 +
 net/mac80211/main.c                     | 13 ++++++++++++-
 net/mac80211/util.c                     |  2 ++
 net/wireless/nl80211.c                  |  7 +++++++
 7 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 69248ded510..55f947ac56d 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -2250,6 +2250,7 @@ static int at76_init_new_device(struct at76_priv *priv,
 
 	/* mac80211 initialisation */
 	priv->hw->wiphy->max_scan_ssids = 1;
+	priv->hw->wiphy->max_scan_ie_len = 0;
 	priv->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
 	priv->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &at76_supported_band;
 	priv->hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 82abb1f9087..ef55f91374a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1306,6 +1306,7 @@ int iwl_setup_mac(struct iwl_priv *priv)
 
 	hw->wiphy->custom_regulatory = true;
 	hw->wiphy->max_scan_ssids = 1;
+	hw->wiphy->max_scan_ie_len = 0; /* XXX for now */
 
 	/* Default value; 4 EDCA QOS priorities */
 	hw->queues = 4;
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 27f230f063b..209cacee528 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -390,6 +390,8 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with
  *	a single scan request, a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_IE_LEN: maximum length of information elements
+ *	that can be added to a scan request
  *
  * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz)
  * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive
@@ -507,6 +509,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_KEY_TYPE,
 
+	NL80211_ATTR_MAX_SCAN_IE_LEN,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 64a76208580..2bcdeda46d8 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -222,6 +222,7 @@ struct wiphy {
 
 	int bss_priv_size;
 	u8 max_scan_ssids;
+	u16 max_scan_ie_len;
 
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fbcbed6cad0..ee58a787369 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -728,7 +728,18 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 		return NULL;
 
 	wiphy->privid = mac80211_wiphy_privid;
-	wiphy->max_scan_ssids = 4;
+
+	if (!ops->hw_scan) {
+		/* For hw_scan, driver needs to set these up. */
+		wiphy->max_scan_ssids = 4;
+
+		/* we support a maximum of 32 rates in cfg80211 */
+		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN
+					 - 2 - 32 /* SSID */
+					 - 4 - 32 /* (ext) supp rates */;
+
+	}
+
 	/* Yes, putting cfg80211_bss into ieee80211_bss is a hack */
 	wiphy->bss_priv_size = sizeof(struct ieee80211_bss) -
 			       sizeof(struct cfg80211_bss);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index fdf432f1455..05caf34f31d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -890,6 +890,8 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 		*pos = rate->bitrate / 5;
 	}
 
+	/* if adding more here, adjust max_scan_ie_len */
+
 	if (ie)
 		memcpy(skb_put(skb, ie_len), ie, ie_len);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1394115cde9..447fa1790b4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -181,6 +181,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
+	NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
+		    dev->wiphy.max_scan_ie_len);
 
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
@@ -2528,6 +2530,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	else
 		ie_len = 0;
 
+	if (ie_len > wiphy->max_scan_ie_len) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	request = kzalloc(sizeof(*request)
 			+ sizeof(*ssid) * n_ssids
 			+ sizeof(channel) * n_channels
-- 
cgit v1.2.3-70-g09d2


From de95a54b1aebe5592cae971ca5e5d9ec6a381a17 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Apr 2009 11:58:36 +0200
Subject: mac80211: pass all probe request IEs to driver

Instead of just passing the cfg80211-requested IEs, pass
the locally generated ones as well.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  2 +-
 include/net/mac80211.h     | 13 +++++---
 net/mac80211/ieee80211_i.h |  9 ++++--
 net/mac80211/main.c        | 49 +++++++++++++++++++---------
 net/mac80211/scan.c        | 24 +++++++++++++-
 net/mac80211/util.c        | 79 ++++++++++++++++++++++++++++------------------
 net/wireless/nl80211.c     |  3 +-
 7 files changed, 123 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2b1f6c69773..d303c269a69 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -503,7 +503,7 @@ struct cfg80211_scan_request {
 	int n_ssids;
 	struct ieee80211_channel **channels;
 	u32 n_channels;
-	u8 *ie;
+	const u8 *ie;
 	size_t ie_len;
 
 	/* internal */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3b83a80e3fe..2c6f976831b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1330,11 +1330,14 @@ enum ieee80211_ampdu_mlme_action {
  *	the scan state machine in stack. The scan must honour the channel
  *	configuration done by the regulatory agent in the wiphy's
  *	registered bands. The hardware (or the driver) needs to make sure
- *	that power save is disabled. When the scan finishes,
- *	ieee80211_scan_completed() must be called; note that it also must
- *	be called when the scan cannot finish because the hardware is
- *	turned off! Anything else is a bug! Returns a negative error code
- *	which will be seen in userspace.
+ *	that power save is disabled.
+ *	The @req ie/ie_len members are rewritten by mac80211 to contain the
+ *	entire IEs after the SSID, so that drivers need not look at these
+ *	at all but just send them after the SSID -- mac80211 includes the
+ *	(extended) supported rates and HT information (where applicable).
+ *	When the scan finishes, ieee80211_scan_completed() must be called;
+ *	note that it also must be called when the scan cannot finish due to
+ *	any error unless this callback returned a negative error code.
  *
  * @sw_scan_start: Notifier function that is called just before a software scan
  *	is started. Can be NULL, if the driver doesn't need this notification.
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 73d9f894ed5..cb80a80504e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -671,7 +671,10 @@ struct ieee80211_local {
 	struct cfg80211_scan_request int_scan_req;
 	struct cfg80211_scan_request *scan_req;
 	struct ieee80211_channel *scan_channel;
+	const u8 *orig_ies;
+	int orig_ies_len;
 	int scan_channel_idx;
+	int scan_ies_len;
 
 	enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
 	unsigned long last_scan_completed;
@@ -1090,9 +1093,11 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg,
 			 u8 *extra, size_t extra_len,
 			 const u8 *bssid, int encrypt);
+int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+			     const u8 *ie, size_t ie_len);
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
-			      u8 *ssid, size_t ssid_len,
-			      u8 *ie, size_t ie_len);
+			      const u8 *ssid, size_t ssid_len,
+			      const u8 *ie, size_t ie_len);
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 				  const size_t supp_rates_len,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ee58a787369..b3bbe78821d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -729,22 +729,12 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	wiphy->privid = mac80211_wiphy_privid;
 
-	if (!ops->hw_scan) {
-		/* For hw_scan, driver needs to set these up. */
-		wiphy->max_scan_ssids = 4;
-
-		/* we support a maximum of 32 rates in cfg80211 */
-		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN
-					 - 2 - 32 /* SSID */
-					 - 4 - 32 /* (ext) supp rates */;
-
-	}
-
 	/* Yes, putting cfg80211_bss into ieee80211_bss is a hack */
 	wiphy->bss_priv_size = sizeof(struct ieee80211_bss) -
 			       sizeof(struct cfg80211_bss);
 
 	local = wiphy_priv(wiphy);
+
 	local->hw.wiphy = wiphy;
 
 	local->hw.priv = (char *)local +
@@ -831,7 +821,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	enum ieee80211_band band;
 	struct net_device *mdev;
 	struct ieee80211_master_priv *mpriv;
-	int channels, i, j;
+	int channels, i, j, max_bitrates;
 
 	/*
 	 * generic code guarantees at least one band,
@@ -839,18 +829,23 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	 * that hw.conf.channel is assigned
 	 */
 	channels = 0;
+	max_bitrates = 0;
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		struct ieee80211_supported_band *sband;
 
 		sband = local->hw.wiphy->bands[band];
-		if (sband && !local->oper_channel) {
+		if (!sband)
+			continue;
+		if (!local->oper_channel) {
 			/* init channel we're on */
 			local->hw.conf.channel =
 			local->oper_channel =
 			local->scan_channel = &sband->channels[0];
 		}
-		if (sband)
-			channels += sband->n_channels;
+		channels += sband->n_channels;
+
+		if (max_bitrates < sband->n_bitrates)
+			max_bitrates = sband->n_bitrates;
 	}
 
 	local->int_scan_req.n_channels = channels;
@@ -870,6 +865,30 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
 		local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
 
+	/*
+	 * Calculate scan IE length -- we need this to alloc
+	 * memory and to subtract from the driver limit. It
+	 * includes the (extended) supported rates and HT
+	 * information -- SSID is the driver's responsibility.
+	 */
+	local->scan_ies_len = 4 + max_bitrates; /* (ext) supp rates */
+
+	if (!local->ops->hw_scan) {
+		/* For hw_scan, driver needs to set these up. */
+		local->hw.wiphy->max_scan_ssids = 4;
+		local->hw.wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
+	}
+
+	/*
+	 * If the driver supports any scan IEs, then assume the
+	 * limit includes the IEs mac80211 will add, otherwise
+	 * leave it at zero and let the driver sort it out; we
+	 * still pass our IEs to the driver but userspace will
+	 * not be allowed to in that case.
+	 */
+	if (local->hw.wiphy->max_scan_ie_len)
+		local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
+
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
 		goto fail_wiphy_register;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 3bf9839f591..4ec1bfc7f6a 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -285,6 +285,12 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	if (WARN_ON(!local->scan_req))
 		return;
 
+	if (local->hw_scanning) {
+		kfree(local->scan_req->ie);
+		local->scan_req->ie = local->orig_ies;
+		local->scan_req->ie_len = local->orig_ies_len;
+	}
+
 	if (local->scan_req != &local->int_scan_req)
 		cfg80211_scan_done(local->scan_req, aborted);
 	local->scan_req = NULL;
@@ -457,12 +463,28 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
 	}
 
 	if (local->ops->hw_scan) {
-		int rc;
+		u8 *ies;
+		int rc, ielen;
+
+		ies = kmalloc(2 + IEEE80211_MAX_SSID_LEN +
+			      local->scan_ies_len + req->ie_len, GFP_KERNEL);
+		if (!ies)
+			return -ENOMEM;
+
+		ielen = ieee80211_build_preq_ies(local, ies,
+						 req->ie, req->ie_len);
+		local->orig_ies = req->ie;
+		local->orig_ies_len = req->ie_len;
+		req->ie = ies;
+		req->ie_len = ielen;
 
 		local->hw_scanning = true;
 		rc = local->ops->hw_scan(local_to_hw(local), req);
 		if (rc) {
 			local->hw_scanning = false;
+			kfree(ies);
+			req->ie_len = local->orig_ies_len;
+			req->ie = local->orig_ies;
 			return rc;
 		}
 		local->scan_sdata = scan_sdata;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 05caf34f31d..72b091317a7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -831,16 +831,57 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	ieee80211_tx_skb(sdata, skb, encrypt);
 }
 
+int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+			     const u8 *ie, size_t ie_len)
+{
+	struct ieee80211_supported_band *sband;
+	u8 *pos, *supp_rates_len, *esupp_rates_len = NULL;
+	int i;
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+	pos = buffer;
+
+	*pos++ = WLAN_EID_SUPP_RATES;
+	supp_rates_len = pos;
+	*pos++ = 0;
+
+	for (i = 0; i < sband->n_bitrates; i++) {
+		struct ieee80211_rate *rate = &sband->bitrates[i];
+
+		if (esupp_rates_len) {
+			*esupp_rates_len += 1;
+		} else if (*supp_rates_len == 8) {
+			*pos++ = WLAN_EID_EXT_SUPP_RATES;
+			esupp_rates_len = pos;
+			*pos++ = 1;
+		} else
+			*supp_rates_len += 1;
+
+		*pos++ = rate->bitrate / 5;
+	}
+
+	/*
+	 * If adding more here, adjust code in main.c
+	 * that calculates local->scan_ies_len.
+	 */
+
+	if (ie) {
+		memcpy(pos, ie, ie_len);
+		pos += ie_len;
+	}
+
+	return pos - buffer;
+}
+
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
-			      u8 *ssid, size_t ssid_len,
-			      u8 *ie, size_t ie_len)
+			      const u8 *ssid, size_t ssid_len,
+			      const u8 *ie, size_t ie_len)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_supported_band *sband;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *supp_rates, *esupp_rates = NULL;
-	int i;
+	u8 *pos;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 +
 			    ie_len);
@@ -867,33 +908,9 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	*pos++ = WLAN_EID_SSID;
 	*pos++ = ssid_len;
 	memcpy(pos, ssid, ssid_len);
+	pos += ssid_len;
 
-	supp_rates = skb_put(skb, 2);
-	supp_rates[0] = WLAN_EID_SUPP_RATES;
-	supp_rates[1] = 0;
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	for (i = 0; i < sband->n_bitrates; i++) {
-		struct ieee80211_rate *rate = &sband->bitrates[i];
-		if (esupp_rates) {
-			pos = skb_put(skb, 1);
-			esupp_rates[1]++;
-		} else if (supp_rates[1] == 8) {
-			esupp_rates = skb_put(skb, 3);
-			esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
-			esupp_rates[1] = 1;
-			pos = &esupp_rates[2];
-		} else {
-			pos = skb_put(skb, 1);
-			supp_rates[1]++;
-		}
-		*pos = rate->bitrate / 5;
-	}
-
-	/* if adding more here, adjust max_scan_ie_len */
-
-	if (ie)
-		memcpy(skb_put(skb, ie_len), ie, ie_len);
+	skb_put(skb, ieee80211_build_preq_ies(local, pos, ie, ie_len));
 
 	ieee80211_tx_skb(sdata, skb, 0);
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 447fa1790b4..68c51022e9d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2597,7 +2597,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 
 	if (info->attrs[NL80211_ATTR_IE]) {
 		request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
-		memcpy(request->ie, nla_data(info->attrs[NL80211_ATTR_IE]),
+		memcpy((void *)request->ie,
+		       nla_data(info->attrs[NL80211_ATTR_IE]),
 		       request->ie_len);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 6bad8766620a3c8b64afa981502fdb543e3cfd6c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 2 Apr 2009 14:08:09 -0400
Subject: cfg80211: send regulatory beacon hint events to userspace

This informs userspace when a change has occured on a world
roaming wiphy's channel which has lifted some restrictions
due to a regulatory beacon hint.

Because this is now sent to userspace through the regulatory
multicast group we remove the debug prints we used to use as
they are no longer necessary.

Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 34 ++++++++++++++++++++++++++++++-
 net/wireless/nl80211.c  | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  5 +++++
 net/wireless/reg.c      | 28 ++++++++++++-------------
 4 files changed, 106 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 209cacee528..05ba3539b77 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -7,7 +7,7 @@
  * Copyright 2008 Michael Wu <flamingice@sourmilk.net>
  * Copyright 2008 Luis Carlos Cobo <luisca@cozybit.com>
  * Copyright 2008 Michael Buesch <mb@bu3sch.de>
- * Copyright 2008 Luis R. Rodriguez <lrodriguez@atheros.com>
+ * Copyright 2008, 2009 Luis R. Rodriguez <lrodriguez@atheros.com>
  * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
  * Copyright 2008 Colin McCabe <colin@cozybit.com>
  *
@@ -166,6 +166,22 @@
  * 	set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is
  * 	%NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on
  * 	to (%NL80211_ATTR_REG_ALPHA2).
+ * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon
+ * 	has been found while world roaming thus enabling active scan or
+ * 	any mode of operation that initiates TX (beacons) on a channel
+ * 	where we would not have been able to do either before. As an example
+ * 	if you are world roaming (regulatory domain set to world or if your
+ * 	driver is using a custom world roaming regulatory domain) and while
+ * 	doing a passive scan on the 5 GHz band you find an AP there (if not
+ * 	on a DFS channel) you will now be able to actively scan for that AP
+ * 	or use AP mode on your card on that same channel. Note that this will
+ * 	never be used for channels 1-11 on the 2 GHz band as they are always
+ * 	enabled world wide. This beacon hint is only sent if your device had
+ * 	either disabled active scanning or beaconing on a channel. We send to
+ * 	userspace the wiphy on which we removed a restriction from
+ * 	(%NL80211_ATTR_WIPHY) and the channel on which this occurred
+ * 	before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER)
+ * 	the beacon hint was processed.
  *
  * @NL80211_CMD_AUTHENTICATE: authentication request and notification.
  *	This command is used both as a command (request to authenticate) and
@@ -270,6 +286,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_MICHAEL_MIC_FAILURE,
 
+	NL80211_CMD_REG_BEACON_HINT,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -288,6 +306,7 @@ enum nl80211_commands {
 #define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE
 #define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE
 #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
+#define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
 
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
@@ -423,6 +442,17 @@ enum nl80211_commands {
  * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as
  *	a u32
  *
+ * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change
+ * 	due to considerations from a beacon hint. This attribute reflects
+ * 	the state of the channel _before_ the beacon hint processing. This
+ * 	attributes consists of a nested attribute containing
+ * 	NL80211_FREQUENCY_ATTR_*
+ * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change
+ * 	due to considerations from a beacon hint. This attribute reflects
+ * 	the state of the channel _after_ the beacon hint processing. This
+ * 	attributes consists of a nested attribute containing
+ * 	NL80211_FREQUENCY_ATTR_*
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -511,6 +541,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAX_SCAN_IE_LEN,
 
+	NL80211_ATTR_FREQ_BEFORE,
+	NL80211_ATTR_FREQ_AFTER,
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7285bdc4e59..85b5aa3c76f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3491,6 +3491,60 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
+				    struct ieee80211_channel *channel_before,
+				    struct ieee80211_channel *channel_after)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	struct nlattr *nl_freq;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_BEACON_HINT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	/*
+	 * Since we are applying the beacon hint to a wiphy we know its
+	 * wiphy_idx is valid
+	 */
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy));
+
+	/* Before */
+	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);
+	if (!nl_freq)
+		goto nla_put_failure;
+	if (nl80211_msg_put_channel(msg, channel_before))
+		goto nla_put_failure;
+	nla_nest_end(msg, nl_freq);
+
+	/* After */
+	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);
+	if (!nl_freq)
+		goto nla_put_failure;
+	if (nl80211_msg_put_channel(msg, channel_after))
+		goto nla_put_failure;
+	nla_nest_end(msg, nl_freq);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_ATOMIC);
+
+	return;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index e4b92cccd15..b3aaa59afa0 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -29,4 +29,9 @@ nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    enum nl80211_key_type key_type,
 			    int key_id, const u8 *tsc);
 
+extern void
+nl80211_send_beacon_hint_event(struct wiphy *wiphy,
+			       struct ieee80211_channel *channel_before,
+			       struct ieee80211_channel *channel_after);
+
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4af4304cec3..574e217bcc8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1049,18 +1049,10 @@ static void handle_reg_beacon(struct wiphy *wiphy,
 			      unsigned int chan_idx,
 			      struct reg_beacon *reg_beacon)
 {
-#ifdef CONFIG_CFG80211_REG_DEBUG
-#define REG_DEBUG_BEACON_FLAG(desc) \
-	printk(KERN_DEBUG "cfg80211: Enabling " desc " on " \
-		"frequency: %d MHz (Ch %d) on %s\n", \
-		reg_beacon->chan.center_freq, \
-		ieee80211_frequency_to_channel(reg_beacon->chan.center_freq), \
-		wiphy_name(wiphy));
-#else
-#define REG_DEBUG_BEACON_FLAG(desc) do {} while (0)
-#endif
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *chan;
+	bool channel_changed = false;
+	struct ieee80211_channel chan_before;
 
 	assert_cfg80211_lock();
 
@@ -1070,20 +1062,28 @@ static void handle_reg_beacon(struct wiphy *wiphy,
 	if (likely(chan->center_freq != reg_beacon->chan.center_freq))
 		return;
 
+	if (chan->beacon_found)
+		return;
+
+	chan->beacon_found = true;
+
+	chan_before.center_freq = chan->center_freq;
+	chan_before.flags = chan->flags;
+
 	if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) &&
 	    !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) {
 		chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN;
-		REG_DEBUG_BEACON_FLAG("active scanning");
+		channel_changed = true;
 	}
 
 	if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&
 	    !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) {
 		chan->flags &= ~IEEE80211_CHAN_NO_IBSS;
-		REG_DEBUG_BEACON_FLAG("beaconing");
+		channel_changed = true;
 	}
 
-	chan->beacon_found = true;
-#undef REG_DEBUG_BEACON_FLAG
+	if (channel_changed)
+		nl80211_send_beacon_hint_event(wiphy, &chan_before, chan);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 25e47c18ac4d8ad09c2ed4b99c1dbbcb7e3d2c51 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 2 Apr 2009 20:14:06 +0200
Subject: cfg80211: add cipher capabilities

This adds the necessary code and fields to let drivers specify
their cipher capabilities and exports them to userspace. Also
update mac80211 to export the ciphers it has.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 include/net/wireless.h  |  5 +++++
 net/mac80211/main.c     | 14 ++++++++++++++
 net/wireless/nl80211.c  | 14 +++++++++++++-
 4 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 05ba3539b77..c01423888db 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -453,6 +453,9 @@ enum nl80211_commands {
  * 	attributes consists of a nested attribute containing
  * 	NL80211_FREQUENCY_ATTR_*
  *
+ * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported
+ *	cipher suites
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -540,6 +543,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_KEY_TYPE,
 
 	NL80211_ATTR_MAX_SCAN_IE_LEN,
+	NL80211_ATTR_CIPHER_SUITES,
 
 	NL80211_ATTR_FREQ_BEFORE,
 	NL80211_ATTR_FREQ_AFTER,
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 2bcdeda46d8..44c2642d3c0 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -205,6 +205,8 @@ struct ieee80211_supported_band {
  *	on the reg_notifier() if it chooses to ignore future
  *	regulatory domain changes caused by other drivers.
  * @signal_type: signal type reported in &struct cfg80211_bss.
+ * @cipher_suites: supported cipher suites
+ * @n_cipher_suites: number of supported cipher suites
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -224,6 +226,9 @@ struct wiphy {
 	u8 max_scan_ssids;
 	u16 max_scan_ie_len;
 
+	int n_cipher_suites;
+	const u32 *cipher_suites;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 679b3a14f11..c1145be72da 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -823,6 +823,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	struct ieee80211_master_priv *mpriv;
 	int channels, i, j, max_bitrates;
 	bool supp_ht;
+	static const u32 cipher_suites[] = {
+		WLAN_CIPHER_SUITE_WEP40,
+		WLAN_CIPHER_SUITE_WEP104,
+		WLAN_CIPHER_SUITE_TKIP,
+		WLAN_CIPHER_SUITE_CCMP,
+
+		/* keep last -- depends on hw flags! */
+		WLAN_CIPHER_SUITE_AES_CMAC
+	};
 
 	/*
 	 * generic code guarantees at least one band,
@@ -894,6 +903,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (local->hw.wiphy->max_scan_ie_len)
 		local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
 
+	local->hw.wiphy->cipher_suites = cipher_suites;
+	local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
+	if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE))
+		local->hw.wiphy->n_cipher_suites--;
+
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
 		goto fail_wiphy_register;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 85b5aa3c76f..d33cab0e0fb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -208,6 +208,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
 		    dev->wiphy.max_scan_ie_len);
 
+	NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
+		sizeof(u32) * dev->wiphy.n_cipher_suites,
+		dev->wiphy.cipher_suites);
+
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
 		goto nla_put_failure;
@@ -979,7 +983,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
-	int err;
+	int err, i;
 	struct net_device *dev;
 	struct key_params params;
 	u8 key_idx = 0;
@@ -1048,6 +1052,14 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
+	for (i = 0; i < drv->wiphy.n_cipher_suites; i++)
+		if (params.cipher == drv->wiphy.cipher_suites[i])
+			break;
+	if (i == drv->wiphy.n_cipher_suites) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	if (!drv->ops->add_key) {
 		err = -EOPNOTSUPP;
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From 6a362bb1c9f900f7e6daeee52ff2d538badae49b Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Sun, 5 Apr 2009 11:01:20 -0400
Subject: ath5k: add support for Fukato Datacask Jupiter LEDs

This adds support for the LEDs on the Jupiter netbook.

Reported-by: Martin Bammer <mrb74@gmx.at>

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath5k/led.c | 2 ++
 include/linux/pci_ids.h              | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c
index 006f7716168..cbdc0b30842 100644
--- a/drivers/net/wireless/ath/ath5k/led.c
+++ b/drivers/net/wireless/ath/ath5k/led.c
@@ -67,6 +67,8 @@ static const struct pci_device_id ath5k_led_devices[] = {
 	{ ATH_SDEVICE(PCI_VENDOR_ID_AMBIT, 0x0428), ATH_LED(3, 0) },
 	/* Acer Extensa 5620z (nekoreeve@gmail.com) */
 	{ ATH_SDEVICE(PCI_VENDOR_ID_QMI, 0x0105), ATH_LED(3, 0) },
+	/* Fukato Datacask Jupiter 1014a (mrb74@gmx.at) */
+	{ ATH_SDEVICE(PCI_VENDOR_ID_AZWAVE, 0x1026), ATH_LED(3, 0) },
 	{ }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ee98cd57088..ea061e290d0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2313,6 +2313,8 @@
 
 #define PCI_VENDOR_ID_QMI		0x1a32
 
+#define PCI_VENDOR_ID_AZWAVE		0x1a3b
+
 #define PCI_VENDOR_ID_TEKRAM		0x1de1
 #define PCI_DEVICE_ID_TEKRAM_DC290	0xdc29
 
-- 
cgit v1.2.3-70-g09d2


From e45d8e534b67580eedd9b4910ccc16d6dd3cceff Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Mon, 6 Apr 2009 15:50:56 -0700
Subject: libertas: add support for Marvell SD8688 chip

libertas: add support for Marvell SD8688 chip

Use RxPD->pkt_ptr to locate eth803 header in the packet
received since SD8688/v10 firmware allows a gap between
RxPD and eth803 header.

Set SDIO block size to 256 for CMD53.
The maximum block size for SD8688 WLAN function is set
to 512 in TPLFE_MAX_BLK_SIZE. But using 512 as block size
results upto 2K bytes data (4 blocks) being transferred
and causes buffer overflow in firmware.

Both changes above are backward compatible with earlier
firmware versions for SD8385/SD8686.

The SDIO_DEVICE_IDs for SD8688 chip are added in
include/linux/mmc/sdio_ids.h

Signed-off-by: Kiran Divekar <dkiran@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/Kconfig            |  4 ++--
 drivers/net/wireless/libertas/if_sdio.c | 17 +++++++++++++----
 drivers/net/wireless/libertas/if_sdio.h |  2 ++
 drivers/net/wireless/libertas/rx.c      | 15 ++++++++-------
 include/linux/mmc/sdio_ids.h            |  2 ++
 5 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 8f279e7bd04..ad99470ae92 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -146,10 +146,10 @@ config LIBERTAS_CS
 	  A driver for Marvell Libertas 8385 CompactFlash devices.
 
 config LIBERTAS_SDIO
-	tristate "Marvell Libertas 8385 and 8686 SDIO 802.11b/g cards"
+	tristate "Marvell Libertas 8385/8686/8688 SDIO 802.11b/g cards"
 	depends on LIBERTAS && MMC
 	---help---
-	  A driver for Marvell Libertas 8385 and 8686 SDIO devices.
+	  A driver for Marvell Libertas 8385/8686/8688 SDIO devices.
 
 config LIBERTAS_SPI
 	tristate "Marvell Libertas 8686 SPI 802.11b/g cards"
diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c
index 76f4c653d64..55864c10f9f 100644
--- a/drivers/net/wireless/libertas/if_sdio.c
+++ b/drivers/net/wireless/libertas/if_sdio.c
@@ -48,8 +48,11 @@ static char *lbs_fw_name = NULL;
 module_param_named(fw_name, lbs_fw_name, charp, 0644);
 
 static const struct sdio_device_id if_sdio_ids[] = {
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_LIBERTAS) },
-	{ /* end: all zeroes */						},
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL,
+			SDIO_DEVICE_ID_MARVELL_LIBERTAS) },
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL,
+			SDIO_DEVICE_ID_MARVELL_8688WLAN) },
+	{ /* end: all zeroes */				},
 };
 
 MODULE_DEVICE_TABLE(sdio, if_sdio_ids);
@@ -73,6 +76,12 @@ static struct if_sdio_model if_sdio_models[] = {
 		.helper = "sd8686_helper.bin",
 		.firmware = "sd8686.bin",
 	},
+	{
+		/* 8688 */
+		.model = 0x10,
+		.helper = "sd8688_helper.bin",
+		.firmware = "sd8688.bin",
+	},
 };
 
 struct if_sdio_packet {
@@ -488,7 +497,7 @@ static int if_sdio_prog_helper(struct if_sdio_card *card)
 	ret = 0;
 
 release:
-	sdio_set_block_size(card->func, 0);
+	sdio_set_block_size(card->func, IF_SDIO_BLOCK_SIZE);
 	sdio_release_host(card->func);
 	kfree(chunk_buffer);
 release_fw:
@@ -624,7 +633,7 @@ static int if_sdio_prog_real(struct if_sdio_card *card)
 	ret = 0;
 
 release:
-	sdio_set_block_size(card->func, 0);
+	sdio_set_block_size(card->func, IF_SDIO_BLOCK_SIZE);
 	sdio_release_host(card->func);
 	kfree(chunk_buffer);
 release_fw:
diff --git a/drivers/net/wireless/libertas/if_sdio.h b/drivers/net/wireless/libertas/if_sdio.h
index 533bdfbf5d2..37ada2c29aa 100644
--- a/drivers/net/wireless/libertas/if_sdio.h
+++ b/drivers/net/wireless/libertas/if_sdio.h
@@ -42,4 +42,6 @@
 
 #define IF_SDIO_EVENT           0x80fc
 
+#define IF_SDIO_BLOCK_SIZE	256
+
 #endif
diff --git a/drivers/net/wireless/libertas/rx.c b/drivers/net/wireless/libertas/rx.c
index 820c22d9220..bd845d09f17 100644
--- a/drivers/net/wireless/libertas/rx.c
+++ b/drivers/net/wireless/libertas/rx.c
@@ -25,7 +25,6 @@ struct rfc1042hdr {
 } __attribute__ ((packed));
 
 struct rxpackethdr {
-	struct rxpd rx_pd;
 	struct eth803hdr eth803_hdr;
 	struct rfc1042hdr rfc1042_hdr;
 } __attribute__ ((packed));
@@ -158,8 +157,9 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb)
 	if (priv->monitormode)
 		return process_rxed_802_11_packet(priv, skb);
 
-	p_rx_pkt = (struct rxpackethdr *) skb->data;
-	p_rx_pd = &p_rx_pkt->rx_pd;
+	p_rx_pd = (struct rxpd *) skb->data;
+	p_rx_pkt = (struct rxpackethdr *) ((u8 *)p_rx_pd +
+		le32_to_cpu(p_rx_pd->pkt_ptr));
 	if (priv->mesh_dev) {
 		if (priv->mesh_fw_ver == MESH_FW_OLD) {
 			if (p_rx_pd->rx_control & RxPD_MESH_FRAME)
@@ -181,8 +181,9 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb)
 		goto done;
 	}
 
-	lbs_deb_rx("rx data: skb->len-sizeof(RxPd) = %d-%zd = %zd\n",
-	       skb->len, sizeof(struct rxpd), skb->len - sizeof(struct rxpd));
+	lbs_deb_rx("rx data: skb->len - pkt_ptr = %d-%zd = %zd\n",
+		skb->len, le32_to_cpu(p_rx_pd->pkt_ptr),
+		skb->len - le32_to_cpu(p_rx_pd->pkt_ptr));
 
 	lbs_deb_hex(LBS_DEB_RX, "RX Data: Dest", p_rx_pkt->eth803_hdr.dest_addr,
 		sizeof(p_rx_pkt->eth803_hdr.dest_addr));
@@ -216,14 +217,14 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb)
 		/* Chop off the rxpd + the excess memory from the 802.2/llc/snap header
 		 *   that was removed
 		 */
-		hdrchop = (u8 *) p_ethhdr - (u8 *) p_rx_pkt;
+		hdrchop = (u8 *)p_ethhdr - (u8 *)p_rx_pd;
 	} else {
 		lbs_deb_hex(LBS_DEB_RX, "RX Data: LLC/SNAP",
 			(u8 *) & p_rx_pkt->rfc1042_hdr,
 			sizeof(p_rx_pkt->rfc1042_hdr));
 
 		/* Chop off the rxpd */
-		hdrchop = (u8 *) & p_rx_pkt->eth803_hdr - (u8 *) p_rx_pkt;
+		hdrchop = (u8 *)&p_rx_pkt->eth803_hdr - (u8 *)p_rx_pd;
 	}
 
 	/* Chop off the leading header bytes so the skb points to the start of
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index ea1bf5ba092..c7211ab6dd4 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -25,5 +25,7 @@
 
 #define SDIO_VENDOR_ID_MARVELL			0x02df
 #define SDIO_DEVICE_ID_MARVELL_LIBERTAS		0x9103
+#define SDIO_DEVICE_ID_MARVELL_8688WLAN		0x9104
+#define SDIO_DEVICE_ID_MARVELL_8688BT		0x9105
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From f2753ddbadb0873a98421415882318251bbd9eaa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 14 Apr 2009 10:09:24 +0200
Subject: mac80211: add hardware restart function

Some hardware defects may require the hardware to be re-initialised
completely from scratch. Drivers would need much information (for
instance the current MAC address, crypto keys, beaconing information,
etc.) stored duplicated from mac80211 to be able to do this, so let
mac80211 help them.

The new ieee80211_restart_hw() function requires the same code as
resuming, so move that code into a new ieee80211_reconfig() function
in util.c and leave only the suspend code in pm.c.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  14 ++++++
 net/mac80211/ieee80211_i.h |  13 ++++-
 net/mac80211/main.c        |  24 +++++++++
 net/mac80211/pm.c          | 110 ++----------------------------------------
 net/mac80211/util.c        | 118 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2c6f976831b..a593bedcfed 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1575,6 +1575,20 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw);
  */
 void ieee80211_free_hw(struct ieee80211_hw *hw);
 
+/**
+ * ieee80211_restart_hw - restart hardware completely
+ *
+ * Call this function when the hardware was restarted for some reason
+ * (hardware error, ...) and the driver is unable to restore its state
+ * by itself. mac80211 assumes that at this point the driver/hardware
+ * is completely uninitialised and stopped, it starts the process by
+ * calling the ->start() operation. The driver will need to reset all
+ * internal state that it has prior to calling this function.
+ *
+ * @hw: the hardware to restart
+ */
+void ieee80211_restart_hw(struct ieee80211_hw *hw);
+
 /* trick to avoid symbol clashes with the ieee80211 subsystem */
 void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		    struct ieee80211_rx_status *status);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cb80a80504e..13d6f890ced 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -748,6 +748,8 @@ struct ieee80211_local {
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
 
+	struct work_struct restart_work;
+
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct local_debugfsdentries {
 		struct dentry *rcdir;
@@ -1036,15 +1038,22 @@ void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 				 u16 capab_info, u8 *pwr_constr_elem,
 				 u8 pwr_constr_elem_len);
 
-/* Suspend/resume */
+/* Suspend/resume and hw reconfiguration */
+int ieee80211_reconfig(struct ieee80211_local *local);
+
 #ifdef CONFIG_PM
 int __ieee80211_suspend(struct ieee80211_hw *hw);
-int __ieee80211_resume(struct ieee80211_hw *hw);
+
+static inline int __ieee80211_resume(struct ieee80211_hw *hw)
+{
+	return ieee80211_reconfig(hw_to_local(hw));
+}
 #else
 static inline int __ieee80211_suspend(struct ieee80211_hw *hw)
 {
 	return 0;
 }
+
 static inline int __ieee80211_resume(struct ieee80211_hw *hw)
 {
 	return 0;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c1145be72da..80c0e28bf54 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -696,6 +696,28 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(ieee80211_tx_status);
 
+static void ieee80211_restart_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, restart_work);
+
+	rtnl_lock();
+	ieee80211_reconfig(local);
+	rtnl_unlock();
+}
+
+void ieee80211_restart_hw(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	/* use this reason, __ieee80211_resume will unblock it */
+	ieee80211_stop_queues_by_reason(hw,
+		IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+
+	schedule_work(&local->restart_work);
+}
+EXPORT_SYMBOL(ieee80211_restart_hw);
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -768,6 +790,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
+	INIT_WORK(&local->restart_work, ieee80211_restart_work);
+
 	INIT_WORK(&local->dynamic_ps_enable_work,
 		  ieee80211_dynamic_ps_enable_work);
 	INIT_WORK(&local->dynamic_ps_disable_work,
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 2b4c95cd9da..b38986c9dee 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -72,108 +72,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 	return 0;
 }
 
-int __ieee80211_resume(struct ieee80211_hw *hw)
-{
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_if_init_conf conf;
-	struct sta_info *sta;
-	unsigned long flags;
-	int res;
-
-	/* restart hardware */
-	if (local->open_count) {
-		res = local->ops->start(hw);
-
-		ieee80211_led_radio(local, hw->conf.radio_enabled);
-	}
-
-	/* add interfaces */
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
-		    sdata->vif.type != NL80211_IFTYPE_MONITOR &&
-		    netif_running(sdata->dev)) {
-			conf.vif = &sdata->vif;
-			conf.type = sdata->vif.type;
-			conf.mac_addr = sdata->dev->dev_addr;
-			res = local->ops->add_interface(hw, &conf);
-		}
-	}
-
-	/* add STAs back */
-	if (local->ops->sta_notify) {
-		spin_lock_irqsave(&local->sta_lock, flags);
-		list_for_each_entry(sta, &local->sta_list, list) {
-			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-				sdata = container_of(sdata->bss,
-					     struct ieee80211_sub_if_data,
-					     u.ap);
-
-			local->ops->sta_notify(hw, &sdata->vif,
-				STA_NOTIFY_ADD, &sta->sta);
-		}
-		spin_unlock_irqrestore(&local->sta_lock, flags);
-	}
-
-	/* Clear Suspend state so that ADDBA requests can be processed */
-
-	rcu_read_lock();
-
-	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
-		list_for_each_entry_rcu(sta, &local->sta_list, list) {
-			clear_sta_flags(sta, WLAN_STA_SUSPEND);
-		}
-	}
-
-	rcu_read_unlock();
-
-	/* setup RTS threshold */
-	if (local->ops->set_rts_threshold)
-		local->ops->set_rts_threshold(hw, local->rts_threshold);
-
-	/* reconfigure hardware */
-	ieee80211_hw_config(local, ~0);
-
-	netif_addr_lock_bh(local->mdev);
-	ieee80211_configure_filter(local);
-	netif_addr_unlock_bh(local->mdev);
-
-	/* Finally also reconfigure all the BSS information */
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		u32 changed = ~0;
-		if (!netif_running(sdata->dev))
-			continue;
-		switch (sdata->vif.type) {
-		case NL80211_IFTYPE_STATION:
-			/* disable beacon change bits */
-			changed &= ~IEEE80211_IFCC_BEACON;
-			/* fall through */
-		case NL80211_IFTYPE_ADHOC:
-		case NL80211_IFTYPE_AP:
-		case NL80211_IFTYPE_MESH_POINT:
-			WARN_ON(ieee80211_if_config(sdata, changed));
-			ieee80211_bss_info_change_notify(sdata, ~0);
-			break;
-		case NL80211_IFTYPE_WDS:
-			break;
-		case NL80211_IFTYPE_AP_VLAN:
-		case NL80211_IFTYPE_MONITOR:
-			/* ignore virtual */
-			break;
-		case NL80211_IFTYPE_UNSPECIFIED:
-		case __NL80211_IFTYPE_AFTER_LAST:
-			WARN_ON(1);
-			break;
-		}
-	}
-
-	/* add back keys */
-	list_for_each_entry(sdata, &local->interfaces, list)
-		if (netif_running(sdata->dev))
-			ieee80211_enable_keys(sdata);
-
-	ieee80211_wake_queues_by_reason(hw,
-			IEEE80211_QUEUE_STOP_REASON_SUSPEND);
-
-	return 0;
-}
+/*
+ * __ieee80211_resume() is a static inline which just calls
+ * ieee80211_reconfig(), which is also needed for hardware
+ * hang/firmware failure/etc. recovery.
+ */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1ff83532120..b361e2acfce 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -28,6 +28,7 @@
 #include "rate.h"
 #include "mesh.h"
 #include "wme.h"
+#include "led.h"
 
 /* privid for wiphys to determine whether they belong to us or not */
 void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
@@ -966,3 +967,120 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
 	}
 	return supp_rates;
 }
+
+int ieee80211_reconfig(struct ieee80211_local *local)
+{
+	struct ieee80211_hw *hw = &local->hw;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_init_conf conf;
+	struct sta_info *sta;
+	unsigned long flags;
+	int res;
+
+	/* restart hardware */
+	if (local->open_count) {
+		res = local->ops->start(hw);
+
+		ieee80211_led_radio(local, hw->conf.radio_enabled);
+	}
+
+	/* add interfaces */
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+		    sdata->vif.type != NL80211_IFTYPE_MONITOR &&
+		    netif_running(sdata->dev)) {
+			conf.vif = &sdata->vif;
+			conf.type = sdata->vif.type;
+			conf.mac_addr = sdata->dev->dev_addr;
+			res = local->ops->add_interface(hw, &conf);
+		}
+	}
+
+	/* add STAs back */
+	if (local->ops->sta_notify) {
+		spin_lock_irqsave(&local->sta_lock, flags);
+		list_for_each_entry(sta, &local->sta_list, list) {
+			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+				sdata = container_of(sdata->bss,
+					     struct ieee80211_sub_if_data,
+					     u.ap);
+
+			local->ops->sta_notify(hw, &sdata->vif,
+				STA_NOTIFY_ADD, &sta->sta);
+		}
+		spin_unlock_irqrestore(&local->sta_lock, flags);
+	}
+
+	/* Clear Suspend state so that ADDBA requests can be processed */
+
+	rcu_read_lock();
+
+	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
+		list_for_each_entry_rcu(sta, &local->sta_list, list) {
+			clear_sta_flags(sta, WLAN_STA_SUSPEND);
+		}
+	}
+
+	rcu_read_unlock();
+
+	/* setup RTS threshold */
+	if (local->ops->set_rts_threshold)
+		local->ops->set_rts_threshold(hw, local->rts_threshold);
+
+	/* reconfigure hardware */
+	ieee80211_hw_config(local, ~0);
+
+	netif_addr_lock_bh(local->mdev);
+	ieee80211_configure_filter(local);
+	netif_addr_unlock_bh(local->mdev);
+
+	/* Finally also reconfigure all the BSS information */
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		u32 changed = ~0;
+		if (!netif_running(sdata->dev))
+			continue;
+		switch (sdata->vif.type) {
+		case NL80211_IFTYPE_STATION:
+			/* disable beacon change bits */
+			changed &= ~IEEE80211_IFCC_BEACON;
+			/* fall through */
+		case NL80211_IFTYPE_ADHOC:
+		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_MESH_POINT:
+			/*
+			 * Driver's config_interface can fail if rfkill is
+			 * enabled. Accommodate this return code.
+			 * FIXME: When mac80211 has knowledge of rfkill
+			 * state the code below can change back to:
+			 *   WARN(ieee80211_if_config(sdata, changed));
+			 *   ieee80211_bss_info_change_notify(sdata, ~0);
+			 */
+			if (ieee80211_if_config(sdata, changed))
+				printk(KERN_DEBUG "%s: failed to configure interface during resume\n",
+				       sdata->dev->name);
+			else
+				ieee80211_bss_info_change_notify(sdata, ~0);
+			break;
+		case NL80211_IFTYPE_WDS:
+			break;
+		case NL80211_IFTYPE_AP_VLAN:
+		case NL80211_IFTYPE_MONITOR:
+			/* ignore virtual */
+			break;
+		case NL80211_IFTYPE_UNSPECIFIED:
+		case __NL80211_IFTYPE_AFTER_LAST:
+			WARN_ON(1);
+			break;
+		}
+	}
+
+	/* add back keys */
+	list_for_each_entry(sdata, &local->interfaces, list)
+		if (netif_running(sdata->dev))
+			ieee80211_enable_keys(sdata);
+
+	ieee80211_wake_queues_by_reason(hw,
+			IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 10f644a47b76d3e61b98f2d02ce9690b94c51ee5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 16 Apr 2009 13:17:25 +0200
Subject: mac80211: disable powersave if pm_qos asks for low latency

When an application asks for a latency lower than the beacon interval
there's nothing we can do -- we need to stay awake and not have the
AP buffer frames for us. Add code to automatically calculate this
constraint in mac80211 so drivers need not concern themselves with it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  9 +++++++++
 net/mac80211/ieee80211_i.h |  5 ++++-
 net/mac80211/iface.c       |  4 ++--
 net/mac80211/main.c        | 31 ++++++++++++++++++++++++-------
 net/mac80211/mlme.c        | 36 ++++++++++++++++++++++++++++++++----
 net/mac80211/wext.c        |  2 +-
 6 files changed, 72 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4b501b48ce8..53563d53b5a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1383,4 +1383,13 @@ static inline int ieee80211_freq_to_ofdm_chan(int s_freq, int freq)
 		return -1;
 }
 
+/**
+ * ieee80211_tu_to_usec - convert time units (TU) to microseconds
+ * @tu: the TUs
+ */
+static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
+{
+	return 1024 * tu;
+}
+
 #endif /* LINUX_IEEE80211_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ff40dd7b523..b1d18d967d8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -750,6 +750,7 @@ struct ieee80211_local {
 	struct work_struct dynamic_ps_enable_work;
 	struct work_struct dynamic_ps_disable_work;
 	struct timer_list dynamic_ps_timer;
+	struct notifier_block network_latency_notifier;
 
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
@@ -938,7 +939,9 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason
 int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_ps(struct ieee80211_local *local);
+void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
+int ieee80211_max_network_latency(struct notifier_block *nb,
+				  unsigned long data, void *dummy);
 
 /* IBSS code */
 int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6240f76e2a4..5d60deb219d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -317,7 +317,7 @@ static int ieee80211_open(struct net_device *dev)
 		ieee80211_set_wmm_default(sdata);
 	}
 
-	ieee80211_recalc_ps(local);
+	ieee80211_recalc_ps(local, -1);
 
 	/*
 	 * ieee80211_sta_work is disabled while network interface
@@ -574,7 +574,7 @@ static int ieee80211_stop(struct net_device *dev)
 		hw_reconf_flags = 0;
 	}
 
-	ieee80211_recalc_ps(local);
+	ieee80211_recalc_ps(local, -1);
 
 	/* do after stop to avoid reconfiguring when we stop anyway */
 	if (hw_reconf_flags)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 80c0e28bf54..049ce863980 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -21,6 +21,7 @@
 #include <linux/wireless.h>
 #include <linux/rtnetlink.h>
 #include <linux/bitmap.h>
+#include <linux/pm_qos_params.h>
 #include <net/net_namespace.h>
 #include <net/cfg80211.h>
 
@@ -1038,25 +1039,38 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
+	local->network_latency_notifier.notifier_call =
+		ieee80211_max_network_latency;
+	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
+				     &local->network_latency_notifier);
+
+	if (result) {
+		rtnl_lock();
+		goto fail_pm_qos;
+	}
+
 	return 0;
 
-fail_wep:
+ fail_pm_qos:
+	ieee80211_led_exit(local);
+	ieee80211_remove_interfaces(local);
+ fail_wep:
 	rate_control_deinitialize(local);
-fail_rate:
+ fail_rate:
 	unregister_netdevice(local->mdev);
 	local->mdev = NULL;
-fail_dev:
+ fail_dev:
 	rtnl_unlock();
 	sta_info_stop(local);
-fail_sta_info:
+ fail_sta_info:
 	debugfs_hw_del(local);
 	destroy_workqueue(local->hw.workqueue);
-fail_workqueue:
+ fail_workqueue:
 	if (local->mdev)
 		free_netdev(local->mdev);
-fail_mdev_alloc:
+ fail_mdev_alloc:
 	wiphy_unregister(local->hw.wiphy);
-fail_wiphy_register:
+ fail_wiphy_register:
 	kfree(local->int_scan_req.channels);
 	return result;
 }
@@ -1069,6 +1083,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	tasklet_kill(&local->tx_pending_tasklet);
 	tasklet_kill(&local->tasklet);
 
+	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
+			       &local->network_latency_notifier);
+
 	rtnl_lock();
 
 	/*
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 06d9a1d2325..c39a214e7ad 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -17,6 +17,7 @@
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/pm_qos_params.h>
 #include <net/mac80211.h>
 #include <asm/unaligned.h>
 
@@ -515,7 +516,7 @@ static void ieee80211_change_ps(struct ieee80211_local *local)
 }
 
 /* need to hold RTNL or interface lock */
-void ieee80211_recalc_ps(struct ieee80211_local *local)
+void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 {
 	struct ieee80211_sub_if_data *sdata, *found = NULL;
 	int count = 0;
@@ -534,10 +535,22 @@ void ieee80211_recalc_ps(struct ieee80211_local *local)
 		count++;
 	}
 
-	if (count == 1 && found->u.mgd.powersave)
-		local->ps_sdata = found;
-	else
+	if (count == 1 && found->u.mgd.powersave) {
+		s32 beaconint_us;
+
+		if (latency < 0)
+			latency = pm_qos_requirement(PM_QOS_NETWORK_LATENCY);
+
+		beaconint_us = ieee80211_tu_to_usec(
+					found->vif.bss_conf.beacon_int);
+
+		if (beaconint_us > latency)
+			local->ps_sdata = NULL;
+		else
+			local->ps_sdata = found;
+	} else {
 		local->ps_sdata = NULL;
+	}
 
 	ieee80211_change_ps(local);
 }
@@ -2324,3 +2337,18 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 		ieee80211_restart_sta_timer(sdata);
 	rcu_read_unlock();
 }
+
+int ieee80211_max_network_latency(struct notifier_block *nb,
+				  unsigned long data, void *dummy)
+{
+	s32 latency_usec = (s32) data;
+	struct ieee80211_local *local =
+		container_of(nb, struct ieee80211_local,
+			     network_latency_notifier);
+
+	mutex_lock(&local->iflist_mtx);
+	ieee80211_recalc_ps(local, latency_usec);
+	mutex_unlock(&local->iflist_mtx);
+
+	return 0;
+}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 81f63e57027..1c4664b8b1a 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -789,7 +789,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 		ieee80211_hw_config(local,
 				    IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT);
 
-	ieee80211_recalc_ps(local);
+	ieee80211_recalc_ps(local, -1);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 955394c98c9cb79bdb3e6b479695af3a90ea0623 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 16 Apr 2009 17:04:25 +0200
Subject: mac80211: document powersaving/beacon filter future

Document what mac80211 will do in the future to help save power.
We're not quite there yet, but a plan helps. Also, while at it,
fix the docs wrt. multicast traffic.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 60 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 50 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a593bedcfed..52808bdcc6c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1109,11 +1109,9 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * need software support for parsing the TIM bitmap. This is also supported
  * by mac80211 by combining the %IEEE80211_HW_SUPPORTS_PS and
  * %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still
- * required to pass up beacons. Additionally, in this case, mac80211 will
- * wake up the hardware when multicast traffic is announced in the beacon.
- *
- * FIXME: I don't think we can be fast enough in software when we want to
- *	  receive multicast traffic?
+ * required to pass up beacons. The hardware is still required to handle
+ * waking up for multicast traffic; if it cannot the driver must handle that
+ * as best as it can, mac80211 is too slow.
  *
  * Dynamic powersave mode is an extension to normal powersave mode in which
  * the hardware stays awake for a user-specified period of time after sending
@@ -1134,11 +1132,53 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * way the host will only receive beacons where some relevant information
  * (for example ERP protection or WMM settings) have changed.
  *
- * Beacon filter support is informed with %IEEE80211_HW_BEACON_FILTER flag.
- * The driver needs to enable beacon filter support whenever power save is
- * enabled, that is %IEEE80211_CONF_PS is set. When power save is enabled,
- * the stack will not check for beacon miss at all and the driver needs to
- * notify about complete loss of beacons with ieee80211_beacon_loss().
+ * Beacon filter support is advertised with the %IEEE80211_HW_BEACON_FILTER
+ * hardware capability. The driver needs to enable beacon filter support
+ * whenever power save is enabled, that is %IEEE80211_CONF_PS is set. When
+ * power save is enabled, the stack will not check for beacon loss and the
+ * driver needs to notify about loss of beacons with ieee80211_beacon_loss().
+ *
+ * The time (or number of beacons missed) until the firmware notifies the
+ * driver of a beacon loss event (which in turn causes the driver to call
+ * ieee80211_beacon_loss()) should be configurable and will be controlled
+ * by mac80211 and the roaming algorithm in the future.
+ *
+ * Since there may be constantly changing information elements that nothing
+ * in the software stack cares about, we will, in the future, have mac80211
+ * tell the driver which information elements are interesting in the sense
+ * that we want to see changes in them. This will include
+ *  - a list of information element IDs
+ *  - a list of OUIs for the vendor information element
+ *
+ * Ideally, the hardware would filter out any beacons without changes in the
+ * requested elements, but if it cannot support that it may, at the expense
+ * of some efficiency, filter out only a subset. For example, if the device
+ * doesn't support checking for OUIs it should pass up all changes in all
+ * vendor information elements.
+ *
+ * Note that change, for the sake of simplification, also includes information
+ * elements appearing or disappearing from the beacon.
+ *
+ * Some hardware supports an "ignore list" instead, just make sure nothing
+ * that was requested is on the ignore list, and include commonly changing
+ * information element IDs in the ignore list, for example 11 (BSS load) and
+ * the various vendor-assigned IEs with unknown contents (128, 129, 133-136,
+ * 149, 150, 155, 156, 173, 176, 178, 179, 219); for forward compatibility
+ * it could also include some currently unused IDs.
+ *
+ *
+ * In addition to these capabilities, hardware should support notifying the
+ * host of changes in the beacon RSSI. This is relevant to implement roaming
+ * when no traffic is flowing (when traffic is flowing we see the RSSI of
+ * the received data packets). This can consist in notifying the host when
+ * the RSSI changes significantly or when it drops below or rises above
+ * configurable thresholds. In the future these thresholds will also be
+ * configured by mac80211 (which gets them from userspace) to implement
+ * them as the roaming algorithm requires.
+ *
+ * If the hardware cannot implement this, the driver should ask it to
+ * periodically pass beacon frames to the host so that software can do the
+ * signal strength threshold checking.
  */
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 691597cb26f236ac7471f1adf925a134c86799d6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 19 Apr 2009 19:57:45 +0200
Subject: cfg80211/mac80211: move wext SIWMLME into cfg80211

Since we have ->deauth and ->disassoc we can support the
wext SIWMLME call directly without driver wext handlers.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  3 +++
 net/mac80211/cfg.c         | 12 ++++--------
 net/mac80211/mlme.c        |  6 ------
 net/mac80211/wext.c        | 25 +------------------------
 net/wireless/wext-compat.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 53 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d303c269a69..019a41efa0b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -833,6 +833,9 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 int cfg80211_wext_giwscan(struct net_device *dev,
 			  struct iw_request_info *info,
 			  struct iw_point *data, char *extra);
+int cfg80211_wext_siwmlme(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_point *data, char *extra);
 int cfg80211_wext_giwrange(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_point *data, char *extra);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 520efa8a079..daf75287e92 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1268,22 +1268,18 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev,
 			    struct cfg80211_deauth_request *req)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	/* TODO: req->ie */
+	/* TODO: req->ie, req->peer_addr */
 	return ieee80211_sta_deauthenticate(sdata, req->reason_code);
 }
 
 static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
 			      struct cfg80211_disassoc_request *req)
 {
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	/* TODO: req->ie */
+	/* TODO: req->ie, req->peer_addr */
 	return ieee80211_sta_disassociate(sdata, req->reason_code);
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a16c9d724be..428742d7f44 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2338,9 +2338,6 @@ int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason
 	printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n",
 	       sdata->dev->name, reason);
 
-	if (sdata->vif.type != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
 	ieee80211_set_disassoc(sdata, true, true, reason);
 	return 0;
 }
@@ -2352,9 +2349,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
 	printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n",
 	       sdata->dev->name, reason);
 
-	if (sdata->vif.type != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
 	if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED))
 		return -ENOLINK;
 
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 1c4664b8b1a..896704cf94e 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -612,29 +612,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_ioctl_siwmlme(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_point *data, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata;
-	struct iw_mlme *mlme = (struct iw_mlme *) extra;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	if (!(sdata->vif.type == NL80211_IFTYPE_STATION))
-		return -EINVAL;
-
-	switch (mlme->cmd) {
-	case IW_MLME_DEAUTH:
-		/* TODO: mlme->addr.sa_data */
-		return ieee80211_sta_deauthenticate(sdata, mlme->reason_code);
-	case IW_MLME_DISASSOC:
-		/* TODO: mlme->addr.sa_data */
-		return ieee80211_sta_disassociate(sdata, mlme->reason_code);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 
 static int ieee80211_ioctl_siwencode(struct net_device *dev,
 				     struct iw_request_info *info,
@@ -1076,7 +1053,7 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* SIOCGIWTHRSPY */
 	(iw_handler) ieee80211_ioctl_siwap,		/* SIOCSIWAP */
 	(iw_handler) ieee80211_ioctl_giwap,		/* SIOCGIWAP */
-	(iw_handler) ieee80211_ioctl_siwmlme,		/* SIOCSIWMLME */
+	(iw_handler) cfg80211_wext_siwmlme,		/* SIOCSIWMLME */
 	(iw_handler) NULL,				/* SIOCGIWAPLIST */
 	(iw_handler) cfg80211_wext_siwscan,		/* SIOCSIWSCAN */
 	(iw_handler) cfg80211_wext_giwscan,		/* SIOCGIWSCAN */
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 0fd1db6e95b..6fd7bf7b448 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -10,6 +10,7 @@
 
 #include <linux/wireless.h>
 #include <linux/nl80211.h>
+#include <linux/if_arp.h>
 #include <net/iw_handler.h>
 #include <net/wireless.h>
 #include <net/cfg80211.h>
@@ -206,7 +207,6 @@ int cfg80211_wext_giwrange(struct net_device *dev,
 	range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
 			  IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP;
 
-
 	for (band = 0; band < IEEE80211_NUM_BANDS; band ++) {
 		int i;
 		struct ieee80211_supported_band *sband;
@@ -241,3 +241,47 @@ int cfg80211_wext_giwrange(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL(cfg80211_wext_giwrange);
+
+int cfg80211_wext_siwmlme(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_point *data, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct iw_mlme *mlme = (struct iw_mlme *)extra;
+	struct cfg80211_registered_device *rdev;
+	union {
+		struct cfg80211_disassoc_request disassoc;
+		struct cfg80211_deauth_request deauth;
+	} cmd;
+
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	rdev = wiphy_to_dev(wdev->wiphy);
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EINVAL;
+
+	if (mlme->addr.sa_family != ARPHRD_ETHER)
+		return -EINVAL;
+
+	memset(&cmd, 0, sizeof(cmd));
+
+	switch (mlme->cmd) {
+	case IW_MLME_DEAUTH:
+		if (!rdev->ops->deauth)
+			return -EOPNOTSUPP;
+		cmd.deauth.peer_addr = mlme->addr.sa_data;
+		cmd.deauth.reason_code = mlme->reason_code;
+		return rdev->ops->deauth(wdev->wiphy, dev, &cmd.deauth);
+	case IW_MLME_DISASSOC:
+		if (!rdev->ops->disassoc)
+			return -EOPNOTSUPP;
+		cmd.disassoc.peer_addr = mlme->addr.sa_data;
+		cmd.disassoc.reason_code = mlme->reason_code;
+		return rdev->ops->disassoc(wdev->wiphy, dev, &cmd.disassoc);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL(cfg80211_wext_siwmlme);
-- 
cgit v1.2.3-70-g09d2


From 04a773ade0680d862b479d7219973df60f7a3834 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 19 Apr 2009 21:24:32 +0200
Subject: cfg80211/nl80211: add IBSS API

This adds IBSS API along with (preliminary) wext handlers.
The wext handlers can only do IBSS so you need to call them
from your own wext handlers if the mode is IBSS.

The nl80211 API requires
 * an SSID
 * a channel (frequency) for the case that a new IBSS
   has to be created

It optionally supports
 * a flag to fix the channel
 * a fixed BSSID

The cfg80211 code also takes care to leave the IBSS before
the netdev is set down. If wireless extensions are used, it
also caches values when the interface is down and instructs
the driver to join when the interface is set up.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  18 +++
 include/net/cfg80211.h     |  72 +++++++++
 include/net/wireless.h     |  14 ++
 net/wireless/Makefile      |   2 +-
 net/wireless/core.c        |  16 ++
 net/wireless/core.h        |   8 +
 net/wireless/ibss.c        | 360 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c     | 182 +++++++++++++++++++++--
 net/wireless/nl80211.h     |   4 +
 net/wireless/wext-compat.c |  30 ++++
 10 files changed, 693 insertions(+), 13 deletions(-)
 create mode 100644 net/wireless/ibss.c

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c01423888db..25ce3e42bd1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -223,6 +223,15 @@
  *	%NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this
  *	event matches with MLME-MICHAELMICFAILURE.indication() primitive
  *
+ * @NL80211_CMD_JOIN_IBSS: Join a new IBSS -- given at least an SSID and a
+ *	FREQ attribute (for the initial frequency if no peer can be found)
+ *	and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those
+ *	should be fixed rather than automatically determined. Can only be
+ *	executed on a network interface that is UP, and fixed BSSID/FREQ
+ *	may be rejected.
+ * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is
+ *	determined by the network interface.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -288,6 +297,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_REG_BEACON_HINT,
 
+	NL80211_CMD_JOIN_IBSS,
+	NL80211_CMD_LEAVE_IBSS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -456,6 +468,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported
  *	cipher suites
  *
+ * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look
+ *	for other networks on different channels
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -547,6 +562,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_FREQ_BEFORE,
 	NL80211_ATTR_FREQ_AFTER,
+
+	NL80211_ATTR_FREQ_FIXED,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 019a41efa0b..5287a3e56e7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -657,6 +657,31 @@ struct cfg80211_disassoc_request {
 	size_t ie_len;
 };
 
+/**
+ * struct cfg80211_ibss_params - IBSS parameters
+ *
+ * This structure defines the IBSS parameters for the join_ibss()
+ * method.
+ *
+ * @ssid: The SSID, will always be non-null.
+ * @ssid_len: The length of the SSID, will always be non-zero.
+ * @bssid: Fixed BSSID requested, maybe be %NULL, if set do not
+ *	search for IBSSs with a different BSSID.
+ * @channel: The channel to use if no IBSS can be found to join.
+ * @channel_fixed: The channel should be fixed -- do not search for
+ *	IBSSs to join on other channels.
+ * @ie: information element(s) to include in the beacon
+ * @ie_len: length of that
+ */
+struct cfg80211_ibss_params {
+	u8 *ssid;
+	u8 *bssid;
+	struct ieee80211_channel *channel;
+	u8 *ie;
+	u8 ssid_len, ie_len;
+	bool channel_fixed;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -732,6 +757,11 @@ struct cfg80211_disassoc_request {
  * @assoc: Request to (re)associate with the specified peer
  * @deauth: Request to deauthenticate from the specified peer
  * @disassoc: Request to disassociate from the specified peer
+ *
+ * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call
+ *	cfg80211_ibss_joined(), also call that function when changing BSSID due
+ *	to a merge.
+ * @leave_ibss: Leave the IBSS.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -817,6 +847,10 @@ struct cfg80211_ops {
 			  struct cfg80211_deauth_request *req);
 	int	(*disassoc)(struct wiphy *wiphy, struct net_device *dev,
 			    struct cfg80211_disassoc_request *req);
+
+	int	(*join_ibss)(struct wiphy *wiphy, struct net_device *dev,
+			     struct cfg80211_ibss_params *params);
+	int	(*leave_ibss)(struct wiphy *wiphy, struct net_device *dev);
 };
 
 /* temporary wext handlers */
@@ -839,6 +873,28 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
 int cfg80211_wext_giwrange(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_point *data, char *extra);
+int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_freq *freq, char *extra);
+int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_freq *freq, char *extra);
+int cfg80211_ibss_wext_siwessid(struct net_device *dev,
+				struct iw_request_info *info,
+				struct iw_point *data, char *ssid);
+int cfg80211_ibss_wext_giwessid(struct net_device *dev,
+				struct iw_request_info *info,
+				struct iw_point *data, char *ssid);
+int cfg80211_ibss_wext_siwap(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct sockaddr *ap_addr, char *extra);
+int cfg80211_ibss_wext_giwap(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct sockaddr *ap_addr, char *extra);
+
+/* wext helper for now (to be removed) */
+struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
+					     struct iw_freq *freq);
 
 /**
  * cfg80211_scan_done - notify that scan finished
@@ -984,4 +1040,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 				  enum nl80211_key_type key_type, int key_id,
 				  const u8 *tsc);
 
+/**
+ * cfg80211_ibss_joined - notify cfg80211 that device joined an IBSS
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the IBSS joined
+ * @gfp: allocation flags
+ *
+ * This function notifies cfg80211 that the device joined an IBSS or
+ * switched to a different BSSID. Before this function can be called,
+ * either a beacon has to have been received from the IBSS, or one of
+ * the cfg80211_inform_bss{,_frame} functions must have been called
+ * with the locally generated beacon -- this guarantees that there is
+ * always a scan result for this IBSS. cfg80211 will handle the rest.
+ */
+void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp);
+
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/wireless.h b/include/net/wireless.h
index 44c2642d3c0..abd27b03333 100644
--- a/include/net/wireless.h
+++ b/include/net/wireless.h
@@ -265,6 +265,8 @@ struct wiphy {
  *
  * @wiphy: pointer to hardware description
  * @iftype: interface type
+ * @list: (private)
+ * @netdev (private)
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -273,6 +275,18 @@ struct wireless_dev {
 	/* private to the generic wireless code */
 	struct list_head list;
 	struct net_device *netdev;
+
+	/* currently used for IBSS - might be rearranged in the future */
+	struct cfg80211_bss *current_bss;
+	u8 bssid[ETH_ALEN];
+	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 ssid_len;
+
+#ifdef CONFIG_WIRELESS_EXT
+	/* wext data */
+	struct cfg80211_ibss_params wext;
+	u8 wext_bssid[ETH_ALEN];
+#endif
 };
 
 /**
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 6d1e7b27b75..14ea01c4a10 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
 obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
 obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
-cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o
+cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o
 cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d1f556535f6..de1ac51ae4e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -450,6 +450,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		dev->ieee80211_ptr->netdev = dev;
 		mutex_unlock(&rdev->devlist_mtx);
 		break;
+	case NETDEV_GOING_DOWN:
+		if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
+			break;
+		if (!dev->ieee80211_ptr->ssid_len)
+			break;
+		cfg80211_leave_ibss(rdev, dev);
+		break;
+	case NETDEV_UP:
+#ifdef CONFIG_WIRELESS_EXT
+		if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
+			break;
+		if (!dev->ieee80211_ptr->wext.ssid_len)
+			break;
+		cfg80211_join_ibss(rdev, dev, &dev->ieee80211_ptr->wext);
+		break;
+#endif
 	case NETDEV_UNREGISTER:
 		mutex_lock(&rdev->devlist_mtx);
 		if (!list_empty(&dev->ieee80211_ptr->list)) {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 02668b02e33..2ef3595fd6e 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -144,4 +144,12 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
 void cfg80211_bss_age(struct cfg80211_registered_device *dev,
                       unsigned long age_secs);
 
+/* IBSS */
+int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       struct cfg80211_ibss_params *params);
+void cfg80211_clear_ibss(struct net_device *dev);
+int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
+			struct net_device *dev);
+
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
new file mode 100644
index 00000000000..2bf42fdef3a
--- /dev/null
+++ b/net/wireless/ibss.c
@@ -0,0 +1,360 @@
+/*
+ * Some IBSS support code for cfg80211.
+ *
+ * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <net/cfg80211.h>
+#include <net/wireless.h>
+#include "nl80211.h"
+
+
+void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_bss *bss;
+#ifdef CONFIG_WIRELESS_EXT
+	union iwreq_data wrqu;
+#endif
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return;
+
+	if (WARN_ON(!wdev->ssid_len))
+		return;
+
+	if (memcmp(bssid, wdev->bssid, ETH_ALEN) == 0)
+		return;
+
+	bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
+			       wdev->ssid, wdev->ssid_len,
+			       WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS);
+
+	if (WARN_ON(!bss))
+		return;
+
+	if (wdev->current_bss) {
+		cfg80211_unhold_bss(wdev->current_bss);
+		cfg80211_put_bss(wdev->current_bss);
+	}
+
+	cfg80211_hold_bss(bss);
+	wdev->current_bss = bss;
+	memcpy(wdev->bssid, bssid, ETH_ALEN);
+
+	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, gfp);
+#ifdef CONFIG_WIRELESS_EXT
+	memset(&wrqu, 0, sizeof(wrqu));
+	memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
+	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+#endif
+}
+EXPORT_SYMBOL(cfg80211_ibss_joined);
+
+int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       struct cfg80211_ibss_params *params)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	if (wdev->ssid_len)
+		return -EALREADY;
+
+#ifdef CONFIG_WIRELESS_EXT
+	wdev->wext.channel = params->channel;
+#endif
+	err = rdev->ops->join_ibss(&rdev->wiphy, dev, params);
+
+	if (err)
+		return err;
+
+	memcpy(wdev->ssid, params->ssid, params->ssid_len);
+	wdev->ssid_len = params->ssid_len;
+
+	return 0;
+}
+
+void cfg80211_clear_ibss(struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (wdev->current_bss) {
+		cfg80211_unhold_bss(wdev->current_bss);
+		cfg80211_put_bss(wdev->current_bss);
+	}
+
+	wdev->current_bss = NULL;
+	wdev->ssid_len = 0;
+	memset(wdev->bssid, 0, ETH_ALEN);
+}
+
+int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
+			struct net_device *dev)
+{
+	int err;
+
+	err = rdev->ops->leave_ibss(&rdev->wiphy, dev);
+
+	if (err)
+		return err;
+
+	cfg80211_clear_ibss(dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_WIRELESS_EXT
+static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
+				   struct wireless_dev *wdev)
+{
+	enum ieee80211_band band;
+	int i;
+
+	/* try to find an IBSS channel if none requested ... */
+	if (!wdev->wext.channel) {
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			struct ieee80211_supported_band *sband;
+			struct ieee80211_channel *chan;
+
+			sband = rdev->wiphy.bands[band];
+			if (!sband)
+				continue;
+
+			for (i = 0; i < sband->n_channels; i++) {
+				chan = &sband->channels[i];
+				if (chan->flags & IEEE80211_CHAN_NO_IBSS)
+					continue;
+				if (chan->flags & IEEE80211_CHAN_DISABLED)
+					continue;
+				wdev->wext.channel = chan;
+				break;
+			}
+
+			if (wdev->wext.channel)
+				break;
+		}
+
+		if (!wdev->wext.channel)
+			return -EINVAL;
+	}
+
+	/* don't join -- SSID is not there */
+	if (!wdev->wext.ssid_len)
+		return 0;
+
+	if (!netif_running(wdev->netdev))
+		return 0;
+
+	return cfg80211_join_ibss(wiphy_to_dev(wdev->wiphy),
+				  wdev->netdev, &wdev->wext);
+}
+
+int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_freq *freq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct ieee80211_channel *chan;
+	int err;
+
+	/* call only for ibss! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return -EINVAL;
+
+	if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss)
+		return -EOPNOTSUPP;
+
+	chan = cfg80211_wext_freq(wdev->wiphy, freq);
+	if (chan && IS_ERR(chan))
+		return PTR_ERR(chan);
+
+	if (chan &&
+	    (chan->flags & IEEE80211_CHAN_NO_IBSS ||
+	     chan->flags & IEEE80211_CHAN_DISABLED))
+		return -EINVAL;
+
+	if (wdev->wext.channel == chan)
+		return 0;
+
+	if (wdev->ssid_len) {
+		err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy), dev);
+		if (err)
+			return err;
+	}
+
+	if (chan) {
+		wdev->wext.channel = chan;
+		wdev->wext.channel_fixed = true;
+	} else {
+		/* cfg80211_ibss_wext_join will pick one if needed */
+		wdev->wext.channel_fixed = false;
+	}
+
+	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwfreq);
+
+int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_freq *freq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct ieee80211_channel *chan = NULL;
+
+	/* call only for ibss! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return -EINVAL;
+
+	if (wdev->current_bss)
+		chan = wdev->current_bss->channel;
+	else if (wdev->wext.channel)
+		chan = wdev->wext.channel;
+
+	if (chan) {
+		freq->m = chan->center_freq;
+		freq->e = 6;
+		return 0;
+	}
+
+	/* no channel if not joining */
+	return -EINVAL;
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwfreq);
+
+int cfg80211_ibss_wext_siwessid(struct net_device *dev,
+				struct iw_request_info *info,
+				struct iw_point *data, char *ssid)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	size_t len = data->length;
+	int err;
+
+	/* call only for ibss! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return -EINVAL;
+
+	if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss)
+		return -EOPNOTSUPP;
+
+	if (wdev->ssid_len) {
+		err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy), dev);
+		if (err)
+			return err;
+	}
+
+	/* iwconfig uses nul termination in SSID.. */
+	if (len > 0 && ssid[len - 1] == '\0')
+		len--;
+
+	wdev->wext.ssid = wdev->ssid;
+	memcpy(wdev->wext.ssid, ssid, len);
+	wdev->wext.ssid_len = len;
+
+	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwessid);
+
+int cfg80211_ibss_wext_giwessid(struct net_device *dev,
+				struct iw_request_info *info,
+				struct iw_point *data, char *ssid)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	/* call only for ibss! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return -EINVAL;
+
+	data->flags = 0;
+
+	if (wdev->ssid_len) {
+		data->flags = 1;
+		data->length = wdev->ssid_len;
+		memcpy(ssid, wdev->ssid, data->length);
+	} else if (wdev->wext.ssid) {
+		data->flags = 1;
+		data->length = wdev->wext.ssid_len;
+		memcpy(ssid, wdev->wext.ssid, data->length);
+	}
+
+	return 0;
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwessid);
+
+int cfg80211_ibss_wext_siwap(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct sockaddr *ap_addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	u8 *bssid = ap_addr->sa_data;
+	int err;
+
+	/* call only for ibss! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return -EINVAL;
+
+	if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss)
+		return -EOPNOTSUPP;
+
+	if (ap_addr->sa_family != ARPHRD_ETHER)
+		return -EINVAL;
+
+	/* automatic mode */
+	if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
+		bssid = NULL;
+
+	/* both automatic */
+	if (!bssid && !wdev->wext.bssid)
+		return 0;
+
+	/* fixed already - and no change */
+	if (wdev->wext.bssid && bssid &&
+	    compare_ether_addr(bssid, wdev->wext.bssid) == 0)
+		return 0;
+
+	if (wdev->ssid_len) {
+		err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy), dev);
+		if (err)
+			return err;
+	}
+
+	if (bssid) {
+		memcpy(wdev->wext_bssid, bssid, ETH_ALEN);
+		wdev->wext.bssid = wdev->wext_bssid;
+	} else
+		wdev->wext.bssid = NULL;
+
+	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwap);
+
+int cfg80211_ibss_wext_giwap(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct sockaddr *ap_addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	/* call only for ibss! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
+		return -EINVAL;
+
+	ap_addr->sa_family = ARPHRD_ETHER;
+
+	if (wdev->wext.bssid) {
+		memcpy(ap_addr->sa_data, wdev->wext.bssid, ETH_ALEN);
+		return 0;
+	}
+
+	memcpy(ap_addr->sa_data, wdev->bssid, ETH_ALEN);
+	return 0;
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwap);
+#endif
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d2cfde659e7..16f86356ac9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -116,6 +116,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				.len = IEEE80211_MAX_SSID_LEN },
 	[NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
+	[NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG },
 };
 
 /* IE validation */
@@ -322,6 +323,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(assoc, ASSOCIATE);
 	CMD(deauth, DEAUTHENTICATE);
 	CMD(disassoc, DISASSOCIATE);
+	CMD(join_ibss, JOIN_IBSS);
 
 #undef CMD
 	nla_nest_end(msg, nl_cmds);
@@ -668,7 +670,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *drv;
 	struct vif_params params;
 	int err, ifindex;
-	enum nl80211_iftype type;
+	enum nl80211_iftype otype, ntype;
 	struct net_device *dev;
 	u32 _flags, *flags = NULL;
 	bool change = false;
@@ -682,30 +684,27 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 		goto unlock_rtnl;
 
 	ifindex = dev->ifindex;
-	type = dev->ieee80211_ptr->iftype;
+	otype = ntype = dev->ieee80211_ptr->iftype;
 	dev_put(dev);
 
 	if (info->attrs[NL80211_ATTR_IFTYPE]) {
-		enum nl80211_iftype ntype;
-
 		ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
-		if (type != ntype)
+		if (otype != ntype)
 			change = true;
-		type = ntype;
-		if (type > NL80211_IFTYPE_MAX) {
+		if (ntype > NL80211_IFTYPE_MAX) {
 			err = -EINVAL;
 			goto unlock;
 		}
 	}
 
 	if (!drv->ops->change_virtual_intf ||
-	    !(drv->wiphy.interface_modes & (1 << type))) {
+	    !(drv->wiphy.interface_modes & (1 << ntype))) {
 		err = -EOPNOTSUPP;
 		goto unlock;
 	}
 
 	if (info->attrs[NL80211_ATTR_MESH_ID]) {
-		if (type != NL80211_IFTYPE_MESH_POINT) {
+		if (ntype != NL80211_IFTYPE_MESH_POINT) {
 			err = -EINVAL;
 			goto unlock;
 		}
@@ -715,7 +714,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
-		if (type != NL80211_IFTYPE_MONITOR) {
+		if (ntype != NL80211_IFTYPE_MONITOR) {
 			err = -EINVAL;
 			goto unlock;
 		}
@@ -730,12 +729,17 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 
 	if (change)
 		err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex,
-						    type, flags, &params);
+						    ntype, flags, &params);
 	else
 		err = 0;
 
 	dev = __dev_get_by_index(&init_net, ifindex);
-	WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type));
+	WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != ntype));
+
+	if (dev && !err && (ntype != otype)) {
+		if (otype == NL80211_IFTYPE_ADHOC)
+			cfg80211_clear_ibss(dev);
+	}
 
  unlock:
 	cfg80211_put_dev(drv);
@@ -3052,6 +3056,114 @@ unlock_rtnl:
 	return err;
 }
 
+static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_ibss_params ibss;
+	struct wiphy *wiphy;
+	int err;
+
+	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
+	    !info->attrs[NL80211_ATTR_SSID] ||
+	    !nla_len(info->attrs[NL80211_ATTR_SSID]))
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!drv->ops->join_ibss) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	wiphy = &drv->wiphy;
+	memset(&ibss, 0, sizeof(ibss));
+
+	if (info->attrs[NL80211_ATTR_MAC])
+		ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	ibss.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+	ibss.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		ibss.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	ibss.channel = ieee80211_get_channel(wiphy,
+		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (!ibss.channel ||
+	    ibss.channel->flags & IEEE80211_CHAN_NO_IBSS ||
+	    ibss.channel->flags & IEEE80211_CHAN_DISABLED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
+
+	err = cfg80211_join_ibss(drv, dev, &ibss);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	int err;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!drv->ops->leave_ibss) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	err = cfg80211_leave_ibss(drv, dev);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -3253,6 +3365,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_JOIN_IBSS,
+		.doit = nl80211_join_ibss,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_LEAVE_IBSS,
+		.doit = nl80211_leave_ibss,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
@@ -3466,6 +3590,40 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				NL80211_CMD_DISASSOCIATE);
 }
 
+void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev, const u8 *bssid,
+			     gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_JOIN_IBSS);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *addr,
 				 enum nl80211_key_type key_type, int key_id,
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b3aaa59afa0..17d2d8bfaf7 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -34,4 +34,8 @@ nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 			       struct ieee80211_channel *channel_before,
 			       struct ieee80211_channel *channel_after);
 
+void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev, const u8 *bssid,
+			     gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 6fd7bf7b448..57eaea26b67 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -285,3 +285,33 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
 	}
 }
 EXPORT_SYMBOL(cfg80211_wext_siwmlme);
+
+
+/**
+ * cfg80211_wext_freq - get wext frequency for non-"auto"
+ * @wiphy: the wiphy
+ * @freq: the wext freq encoding
+ *
+ * Returns a channel, %NULL for auto, or an ERR_PTR for errors!
+ */
+struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
+					     struct iw_freq *freq)
+{
+	if (freq->e == 0) {
+		if (freq->m < 0)
+			return NULL;
+		else
+			return ieee80211_get_channel(wiphy,
+				ieee80211_channel_to_frequency(freq->m));
+	} else {
+		int i, div = 1000000;
+		for (i = 0; i < freq->e; i++)
+			div /= 10;
+		if (div > 0)
+			return ieee80211_get_channel(wiphy, freq->m / div);
+		else
+			return ERR_PTR(-EINVAL);
+	}
+
+}
+EXPORT_SYMBOL(cfg80211_wext_freq);
-- 
cgit v1.2.3-70-g09d2


From d323655372590c533c275b1d798f9d1221efb5c6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 20 Apr 2009 14:31:42 +0200
Subject: cfg80211: clean up includes

Trying to separate header files into net/wireless.h and
net/cfg80211.h has been a source of confusion. Remove
net/wireless.h (because there also is the linux/wireless.h)
and subsume everything into net/cfg80211.h -- except the
definitions for regulatory structures which get moved to
a new header net/regulatory.h.

The "new" net/cfg80211.h is now divided into sections.

There are no real changes in this patch but code shuffling
and some very minor documentation fixes.

I have also, to make things reflect reality, put in a
copyright line for Luis to net/regulatory.h since that
is probably exclusively written by him but was formerly
in a file that only had my copyright line.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ar9170/ar9170.h |   2 +-
 drivers/net/wireless/ath/ar9170/usb.h    |   2 +-
 drivers/net/wireless/ath/ath9k/eeprom.h  |   2 +-
 drivers/net/wireless/ath/regd.c          |   1 -
 drivers/net/wireless/ath/regd.h          |   1 -
 drivers/net/wireless/rndis_wlan.c        |   1 -
 include/net/cfg80211.h                   | 709 +++++++++++++++++++++++++------
 include/net/mac80211.h                   |   1 -
 include/net/regulatory.h                 | 101 +++++
 include/net/wireless.h                   | 492 ---------------------
 net/mac80211/ht.c                        |   1 -
 net/mac80211/ieee80211_i.h               |   1 -
 net/mac80211/spectmgmt.c                 |   2 +-
 net/wireless/core.c                      |   1 -
 net/wireless/core.h                      |   1 -
 net/wireless/ibss.c                      |   1 -
 net/wireless/reg.c                       |   1 -
 net/wireless/util.c                      |   6 +-
 net/wireless/wext-compat.c               |   1 -
 19 files changed, 690 insertions(+), 637 deletions(-)
 create mode 100644 include/net/regulatory.h
 delete mode 100644 include/net/wireless.h

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ar9170/ar9170.h b/drivers/net/wireless/ath/ar9170/ar9170.h
index f797495faa5..2522a190fdf 100644
--- a/drivers/net/wireless/ath/ar9170/ar9170.h
+++ b/drivers/net/wireless/ath/ar9170/ar9170.h
@@ -40,7 +40,7 @@
 
 #include <linux/completion.h>
 #include <linux/spinlock.h>
-#include <net/wireless.h>
+#include <net/cfg80211.h>
 #include <net/mac80211.h>
 #ifdef CONFIG_AR9170_LEDS
 #include <linux/leds.h>
diff --git a/drivers/net/wireless/ath/ar9170/usb.h b/drivers/net/wireless/ath/ar9170/usb.h
index f5852924cd6..ac42586495d 100644
--- a/drivers/net/wireless/ath/ar9170/usb.h
+++ b/drivers/net/wireless/ath/ar9170/usb.h
@@ -43,7 +43,7 @@
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <linux/leds.h>
-#include <net/wireless.h>
+#include <net/cfg80211.h>
 #include <net/mac80211.h>
 #include <linux/firmware.h>
 #include "eeprom.h"
diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h
index 9a7715df5cf..7c59dc47f91 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom.h
+++ b/drivers/net/wireless/ath/ath9k/eeprom.h
@@ -17,7 +17,7 @@
 #ifndef EEPROM_H
 #define EEPROM_H
 
-#include <net/wireless.h>
+#include <net/cfg80211.h>
 
 #define AH_USE_EEPROM   0x1
 
diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c
index 526c7f1308d..fdf07c82208 100644
--- a/drivers/net/wireless/ath/regd.c
+++ b/drivers/net/wireless/ath/regd.c
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
-#include <net/wireless.h>
 #include "regd.h"
 #include "regd_common.h"
 
diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
index eba7c7123b5..07291ccb23f 100644
--- a/drivers/net/wireless/ath/regd.h
+++ b/drivers/net/wireless/ath/regd.h
@@ -20,7 +20,6 @@
 #include <linux/nl80211.h>
 
 #include <net/cfg80211.h>
-#include <net/wireless.h>
 
 #define NO_CTL 0xff
 #define SD_NO_CTL               0xE0
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 239e6a14eae..43e0ba61df2 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -42,7 +42,6 @@
 #include <linux/ctype.h>
 #include <linux/spinlock.h>
 #include <net/iw_handler.h>
-#include <net/wireless.h>
 #include <net/cfg80211.h>
 #include <linux/usb/usbnet.h>
 #include <linux/usb/rndis_host.h>
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5287a3e56e7..601eac64b02 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1,70 +1,217 @@
 #ifndef __NET_CFG80211_H
 #define __NET_CFG80211_H
+/*
+ * 802.11 device and configuration interface
+ *
+ * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
 
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include <linux/list.h>
 #include <linux/netlink.h>
 #include <linux/skbuff.h>
 #include <linux/nl80211.h>
 #include <linux/if_ether.h>
 #include <linux/ieee80211.h>
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#include <net/genetlink.h>
+#include <net/regulatory.h>
+
 /* remove once we remove the wext stuff */
+#include <net/iw_handler.h>
+#include <linux/wireless.h>
+
 
 /*
- * 802.11 configuration in-kernel interface
+ * wireless hardware capability structures
+ */
+
+/**
+ * enum ieee80211_band - supported frequency bands
+ *
+ * The bands are assigned this way because the supported
+ * bitrates differ in these bands.
  *
- * Copyright 2006, 2007	Johannes Berg <johannes@sipsolutions.net>
+ * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band
+ * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7)
  */
+enum ieee80211_band {
+	IEEE80211_BAND_2GHZ,
+	IEEE80211_BAND_5GHZ,
+
+	/* keep last */
+	IEEE80211_NUM_BANDS
+};
 
 /**
- * struct vif_params - describes virtual interface parameters
- * @mesh_id: mesh ID to use
- * @mesh_id_len: length of the mesh ID
+ * enum ieee80211_channel_flags - channel flags
+ *
+ * Channel flags set by the regulatory control code.
+ *
+ * @IEEE80211_CHAN_DISABLED: This channel is disabled.
+ * @IEEE80211_CHAN_PASSIVE_SCAN: Only passive scanning is permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel.
+ * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
+ * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel
+ * 	is not permitted.
+ * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel
+ * 	is not permitted.
  */
-struct vif_params {
-       u8 *mesh_id;
-       int mesh_id_len;
+enum ieee80211_channel_flags {
+	IEEE80211_CHAN_DISABLED		= 1<<0,
+	IEEE80211_CHAN_PASSIVE_SCAN	= 1<<1,
+	IEEE80211_CHAN_NO_IBSS		= 1<<2,
+	IEEE80211_CHAN_RADAR		= 1<<3,
+	IEEE80211_CHAN_NO_FAT_ABOVE	= 1<<4,
+	IEEE80211_CHAN_NO_FAT_BELOW	= 1<<5,
 };
 
-/* Radiotap header iteration
- *   implemented in net/wireless/radiotap.c
- *   docs in Documentation/networking/radiotap-headers.txt
+/**
+ * struct ieee80211_channel - channel definition
+ *
+ * This structure describes a single channel for use
+ * with cfg80211.
+ *
+ * @center_freq: center frequency in MHz
+ * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz
+ * @hw_value: hardware-specific value for the channel
+ * @flags: channel flags from &enum ieee80211_channel_flags.
+ * @orig_flags: channel flags at registration time, used by regulatory
+ *	code to support devices with additional restrictions
+ * @band: band this channel belongs to.
+ * @max_antenna_gain: maximum antenna gain in dBi
+ * @max_power: maximum transmission power (in dBm)
+ * @beacon_found: helper to regulatory code to indicate when a beacon
+ *	has been found on this channel. Use regulatory_hint_found_beacon()
+ *	to enable this, this is is useful only on 5 GHz band.
+ * @orig_mag: internal use
+ * @orig_mpwr: internal use
  */
+struct ieee80211_channel {
+	enum ieee80211_band band;
+	u16 center_freq;
+	u8 max_bandwidth;
+	u16 hw_value;
+	u32 flags;
+	int max_antenna_gain;
+	int max_power;
+	bool beacon_found;
+	u32 orig_flags;
+	int orig_mag, orig_mpwr;
+};
+
 /**
- * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args
- * @rtheader: pointer to the radiotap header we are walking through
- * @max_length: length of radiotap header in cpu byte ordering
- * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg
- * @this_arg: pointer to current radiotap arg
- * @arg_index: internal next argument index
- * @arg: internal next argument pointer
- * @next_bitmap: internal pointer to next present u32
- * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present
+ * enum ieee80211_rate_flags - rate flags
+ *
+ * Hardware/specification flags for rates. These are structured
+ * in a way that allows using the same bitrate structure for
+ * different bands/PHY modes.
+ *
+ * @IEEE80211_RATE_SHORT_PREAMBLE: Hardware can send with short
+ *	preamble on this bitrate; only relevant in 2.4GHz band and
+ *	with CCK rates.
+ * @IEEE80211_RATE_MANDATORY_A: This bitrate is a mandatory rate
+ *	when used with 802.11a (on the 5 GHz band); filled by the
+ *	core code when registering the wiphy.
+ * @IEEE80211_RATE_MANDATORY_B: This bitrate is a mandatory rate
+ *	when used with 802.11b (on the 2.4 GHz band); filled by the
+ *	core code when registering the wiphy.
+ * @IEEE80211_RATE_MANDATORY_G: This bitrate is a mandatory rate
+ *	when used with 802.11g (on the 2.4 GHz band); filled by the
+ *	core code when registering the wiphy.
+ * @IEEE80211_RATE_ERP_G: This is an ERP rate in 802.11g mode.
  */
+enum ieee80211_rate_flags {
+	IEEE80211_RATE_SHORT_PREAMBLE	= 1<<0,
+	IEEE80211_RATE_MANDATORY_A	= 1<<1,
+	IEEE80211_RATE_MANDATORY_B	= 1<<2,
+	IEEE80211_RATE_MANDATORY_G	= 1<<3,
+	IEEE80211_RATE_ERP_G		= 1<<4,
+};
 
-struct ieee80211_radiotap_iterator {
-	struct ieee80211_radiotap_header *rtheader;
-	int max_length;
-	int this_arg_index;
-	u8 *this_arg;
+/**
+ * struct ieee80211_rate - bitrate definition
+ *
+ * This structure describes a bitrate that an 802.11 PHY can
+ * operate with. The two values @hw_value and @hw_value_short
+ * are only for driver use when pointers to this structure are
+ * passed around.
+ *
+ * @flags: rate-specific flags
+ * @bitrate: bitrate in units of 100 Kbps
+ * @hw_value: driver/hardware value for this rate
+ * @hw_value_short: driver/hardware value for this rate when
+ *	short preamble is used
+ */
+struct ieee80211_rate {
+	u32 flags;
+	u16 bitrate;
+	u16 hw_value, hw_value_short;
+};
 
-	int arg_index;
-	u8 *arg;
-	__le32 *next_bitmap;
-	u32 bitmap_shifter;
+/**
+ * struct ieee80211_sta_ht_cap - STA's HT capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11n HT capabilities for an STA.
+ *
+ * @ht_supported: is HT supported by the STA
+ * @cap: HT capabilities map as described in 802.11n spec
+ * @ampdu_factor: Maximum A-MPDU length factor
+ * @ampdu_density: Minimum A-MPDU spacing
+ * @mcs: Supported MCS rates
+ */
+struct ieee80211_sta_ht_cap {
+	u16 cap; /* use IEEE80211_HT_CAP_ */
+	bool ht_supported;
+	u8 ampdu_factor;
+	u8 ampdu_density;
+	struct ieee80211_mcs_info mcs;
 };
 
-extern int ieee80211_radiotap_iterator_init(
-   struct ieee80211_radiotap_iterator *iterator,
-   struct ieee80211_radiotap_header *radiotap_header,
-   int max_length);
+/**
+ * struct ieee80211_supported_band - frequency band definition
+ *
+ * This structure describes a frequency band a wiphy
+ * is able to operate in.
+ *
+ * @channels: Array of channels the hardware can operate in
+ *	in this band.
+ * @band: the band this structure represents
+ * @n_channels: Number of channels in @channels
+ * @bitrates: Array of bitrates the hardware can operate with
+ *	in this band. Must be sorted to give a valid "supported
+ *	rates" IE, i.e. CCK rates first, then OFDM.
+ * @n_bitrates: Number of bitrates in @bitrates
+ */
+struct ieee80211_supported_band {
+	struct ieee80211_channel *channels;
+	struct ieee80211_rate *bitrates;
+	enum ieee80211_band band;
+	int n_channels;
+	int n_bitrates;
+	struct ieee80211_sta_ht_cap ht_cap;
+};
 
-extern int ieee80211_radiotap_iterator_next(
-   struct ieee80211_radiotap_iterator *iterator);
+/*
+ * Wireless hardware/device configuration structures and methods
+ */
 
+/**
+ * struct vif_params - describes virtual interface parameters
+ * @mesh_id: mesh ID to use
+ * @mesh_id_len: length of the mesh ID
+ */
+struct vif_params {
+       u8 *mesh_id;
+       int mesh_id_len;
+};
 
- /**
+/**
  * struct key_params - key information
  *
  * Information about a key
@@ -347,92 +494,6 @@ struct bss_parameters {
 	u8 basic_rates_len;
 };
 
-/**
- * enum environment_cap - Environment parsed from country IE
- * @ENVIRON_ANY: indicates country IE applies to both indoor and
- *	outdoor operation.
- * @ENVIRON_INDOOR: indicates country IE applies only to indoor operation
- * @ENVIRON_OUTDOOR: indicates country IE applies only to outdoor operation
- */
-enum environment_cap {
-	ENVIRON_ANY,
-	ENVIRON_INDOOR,
-	ENVIRON_OUTDOOR,
-};
-
-/**
- * struct regulatory_request - used to keep track of regulatory requests
- *
- * @wiphy_idx: this is set if this request's initiator is
- * 	%REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This
- * 	can be used by the wireless core to deal with conflicts
- * 	and potentially inform users of which devices specifically
- * 	cased the conflicts.
- * @initiator: indicates who sent this request, could be any of
- * 	of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*)
- * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested
- * 	regulatory domain. We have a few special codes:
- * 	00 - World regulatory domain
- * 	99 - built by driver but a specific alpha2 cannot be determined
- * 	98 - result of an intersection between two regulatory domains
- * @intersect: indicates whether the wireless core should intersect
- * 	the requested regulatory domain with the presently set regulatory
- * 	domain.
- * @country_ie_checksum: checksum of the last processed and accepted
- * 	country IE
- * @country_ie_env: lets us know if the AP is telling us we are outdoor,
- * 	indoor, or if it doesn't matter
- * @list: used to insert into the reg_requests_list linked list
- */
-struct regulatory_request {
-	int wiphy_idx;
-	enum nl80211_reg_initiator initiator;
-	char alpha2[2];
-	bool intersect;
-	u32 country_ie_checksum;
-	enum environment_cap country_ie_env;
-	struct list_head list;
-};
-
-struct ieee80211_freq_range {
-	u32 start_freq_khz;
-	u32 end_freq_khz;
-	u32 max_bandwidth_khz;
-};
-
-struct ieee80211_power_rule {
-	u32 max_antenna_gain;
-	u32 max_eirp;
-};
-
-struct ieee80211_reg_rule {
-	struct ieee80211_freq_range freq_range;
-	struct ieee80211_power_rule power_rule;
-	u32 flags;
-};
-
-struct ieee80211_regdomain {
-	u32 n_reg_rules;
-	char alpha2[2];
-	struct ieee80211_reg_rule reg_rules[];
-};
-
-#define MHZ_TO_KHZ(freq) ((freq) * 1000)
-#define KHZ_TO_MHZ(freq) ((freq) / 1000)
-#define DBI_TO_MBI(gain) ((gain) * 100)
-#define MBI_TO_DBI(gain) ((gain) / 100)
-#define DBM_TO_MBM(gain) ((gain) * 100)
-#define MBM_TO_DBM(gain) ((gain) / 100)
-
-#define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \
-	.freq_range.start_freq_khz = MHZ_TO_KHZ(start), \
-	.freq_range.end_freq_khz = MHZ_TO_KHZ(end), \
-	.freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw), \
-	.power_rule.max_antenna_gain = DBI_TO_MBI(gain), \
-	.power_rule.max_eirp = DBM_TO_MBM(eirp), \
-	.flags = reg_flags, \
-	}
-
 struct mesh_config {
 	/* Timeouts in ms */
 	/* Mesh plink management parameters */
@@ -853,7 +914,396 @@ struct cfg80211_ops {
 	int	(*leave_ibss)(struct wiphy *wiphy, struct net_device *dev);
 };
 
-/* temporary wext handlers */
+/*
+ * wireless hardware and networking interfaces structures
+ * and registration/helper functions
+ */
+
+/**
+ * struct wiphy - wireless hardware description
+ * @idx: the wiphy index assigned to this item
+ * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
+ * @custom_regulatory: tells us the driver for this device
+ * 	has its own custom regulatory domain and cannot identify the
+ * 	ISO / IEC 3166 alpha2 it belongs to. When this is enabled
+ * 	we will disregard the first regulatory hint (when the
+ * 	initiator is %REGDOM_SET_BY_CORE).
+ * @strict_regulatory: tells us the driver for this device will ignore
+ * 	regulatory domain settings until it gets its own regulatory domain
+ * 	via its regulatory_hint(). After its gets its own regulatory domain
+ * 	it will only allow further regulatory domain settings to further
+ * 	enhance compliance. For example if channel 13 and 14 are disabled
+ * 	by this regulatory domain no user regulatory domain can enable these
+ * 	channels at a later time. This can be used for devices which do not
+ * 	have calibration information gauranteed for frequencies or settings
+ * 	outside of its regulatory domain.
+ * @reg_notifier: the driver's regulatory notification callback
+ * @regd: the driver's regulatory domain, if one was requested via
+ * 	the regulatory_hint() API. This can be used by the driver
+ *	on the reg_notifier() if it chooses to ignore future
+ *	regulatory domain changes caused by other drivers.
+ * @signal_type: signal type reported in &struct cfg80211_bss.
+ * @cipher_suites: supported cipher suites
+ * @n_cipher_suites: number of supported cipher suites
+ */
+struct wiphy {
+	/* assign these fields before you register the wiphy */
+
+	/* permanent MAC address */
+	u8 perm_addr[ETH_ALEN];
+
+	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
+	u16 interface_modes;
+
+	bool custom_regulatory;
+	bool strict_regulatory;
+
+	enum cfg80211_signal_type signal_type;
+
+	int bss_priv_size;
+	u8 max_scan_ssids;
+	u16 max_scan_ie_len;
+
+	int n_cipher_suites;
+	const u32 *cipher_suites;
+
+	/* If multiple wiphys are registered and you're handed e.g.
+	 * a regular netdev with assigned ieee80211_ptr, you won't
+	 * know whether it points to a wiphy your driver has registered
+	 * or not. Assign this to something global to your driver to
+	 * help determine whether you own this wiphy or not. */
+	void *privid;
+
+	struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS];
+
+	/* Lets us get back the wiphy on the callback */
+	int (*reg_notifier)(struct wiphy *wiphy,
+			    struct regulatory_request *request);
+
+	/* fields below are read-only, assigned by cfg80211 */
+
+	const struct ieee80211_regdomain *regd;
+
+	/* the item in /sys/class/ieee80211/ points to this,
+	 * you need use set_wiphy_dev() (see below) */
+	struct device dev;
+
+	/* dir in debugfs: ieee80211/<wiphyname> */
+	struct dentry *debugfsdir;
+
+	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+/**
+ * wiphy_priv - return priv from wiphy
+ *
+ * @wiphy: the wiphy whose priv pointer to return
+ */
+static inline void *wiphy_priv(struct wiphy *wiphy)
+{
+	BUG_ON(!wiphy);
+	return &wiphy->priv;
+}
+
+/**
+ * set_wiphy_dev - set device pointer for wiphy
+ *
+ * @wiphy: The wiphy whose device to bind
+ * @dev: The device to parent it to
+ */
+static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev)
+{
+	wiphy->dev.parent = dev;
+}
+
+/**
+ * wiphy_dev - get wiphy dev pointer
+ *
+ * @wiphy: The wiphy whose device struct to look up
+ */
+static inline struct device *wiphy_dev(struct wiphy *wiphy)
+{
+	return wiphy->dev.parent;
+}
+
+/**
+ * wiphy_name - get wiphy name
+ *
+ * @wiphy: The wiphy whose name to return
+ */
+static inline const char *wiphy_name(struct wiphy *wiphy)
+{
+	return dev_name(&wiphy->dev);
+}
+
+/**
+ * wiphy_new - create a new wiphy for use with cfg80211
+ *
+ * @ops: The configuration operations for this device
+ * @sizeof_priv: The size of the private area to allocate
+ *
+ * Create a new wiphy and associate the given operations with it.
+ * @sizeof_priv bytes are allocated for private use.
+ *
+ * The returned pointer must be assigned to each netdev's
+ * ieee80211_ptr for proper operation.
+ */
+struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv);
+
+/**
+ * wiphy_register - register a wiphy with cfg80211
+ *
+ * @wiphy: The wiphy to register.
+ *
+ * Returns a non-negative wiphy index or a negative error code.
+ */
+extern int wiphy_register(struct wiphy *wiphy);
+
+/**
+ * wiphy_unregister - deregister a wiphy from cfg80211
+ *
+ * @wiphy: The wiphy to unregister.
+ *
+ * After this call, no more requests can be made with this priv
+ * pointer, but the call may sleep to wait for an outstanding
+ * request that is being handled.
+ */
+extern void wiphy_unregister(struct wiphy *wiphy);
+
+/**
+ * wiphy_free - free wiphy
+ *
+ * @wiphy: The wiphy to free
+ */
+extern void wiphy_free(struct wiphy *wiphy);
+
+/**
+ * struct wireless_dev - wireless per-netdev state
+ *
+ * This structure must be allocated by the driver/stack
+ * that uses the ieee80211_ptr field in struct net_device
+ * (this is intentional so it can be allocated along with
+ * the netdev.)
+ *
+ * @wiphy: pointer to hardware description
+ * @iftype: interface type
+ * @list: (private) Used to collect the interfaces
+ * @netdev: (private) Used to reference back to the netdev
+ * @current_bss: (private) Used by the internal configuration code
+ * @bssid: (private) Used by the internal configuration code
+ * @ssid: (private) Used by the internal configuration code
+ * @ssid_len: (private) Used by the internal configuration code
+ * @wext: (private) Used by the internal wireless extensions compat code
+ * @wext_bssid: (private) Used by the internal wireless extensions compat code
+ */
+struct wireless_dev {
+	struct wiphy *wiphy;
+	enum nl80211_iftype iftype;
+
+	/* private to the generic wireless code */
+	struct list_head list;
+	struct net_device *netdev;
+
+	/* currently used for IBSS - might be rearranged in the future */
+	struct cfg80211_bss *current_bss;
+	u8 bssid[ETH_ALEN];
+	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 ssid_len;
+
+#ifdef CONFIG_WIRELESS_EXT
+	/* wext data */
+	struct cfg80211_ibss_params wext;
+	u8 wext_bssid[ETH_ALEN];
+#endif
+};
+
+/**
+ * wdev_priv - return wiphy priv from wireless_dev
+ *
+ * @wdev: The wireless device whose wiphy's priv pointer to return
+ */
+static inline void *wdev_priv(struct wireless_dev *wdev)
+{
+	BUG_ON(!wdev);
+	return wiphy_priv(wdev->wiphy);
+}
+
+/*
+ * Utility functions
+ */
+
+/**
+ * ieee80211_channel_to_frequency - convert channel number to frequency
+ */
+extern int ieee80211_channel_to_frequency(int chan);
+
+/**
+ * ieee80211_frequency_to_channel - convert frequency to channel number
+ */
+extern int ieee80211_frequency_to_channel(int freq);
+
+/*
+ * Name indirection necessary because the ieee80211 code also has
+ * a function named "ieee80211_get_channel", so if you include
+ * cfg80211's header file you get cfg80211's version, if you try
+ * to include both header files you'll (rightfully!) get a symbol
+ * clash.
+ */
+extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy,
+							 int freq);
+/**
+ * ieee80211_get_channel - get channel struct from wiphy for specified frequency
+ */
+static inline struct ieee80211_channel *
+ieee80211_get_channel(struct wiphy *wiphy, int freq)
+{
+	return __ieee80211_get_channel(wiphy, freq);
+}
+
+/**
+ * ieee80211_get_response_rate - get basic rate for a given rate
+ *
+ * @sband: the band to look for rates in
+ * @basic_rates: bitmap of basic rates
+ * @bitrate: the bitrate for which to find the basic rate
+ *
+ * This function returns the basic rate corresponding to a given
+ * bitrate, that is the next lower bitrate contained in the basic
+ * rate map, which is, for this function, given as a bitmap of
+ * indices of rates in the band's bitrate table.
+ */
+struct ieee80211_rate *
+ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
+			    u32 basic_rates, int bitrate);
+
+/*
+ * Radiotap parsing functions -- for controlled injection support
+ *
+ * Implemented in net/wireless/radiotap.c
+ * Documentation in Documentation/networking/radiotap-headers.txt
+ */
+
+/**
+ * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args
+ * @rtheader: pointer to the radiotap header we are walking through
+ * @max_length: length of radiotap header in cpu byte ordering
+ * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg
+ * @this_arg: pointer to current radiotap arg
+ * @arg_index: internal next argument index
+ * @arg: internal next argument pointer
+ * @next_bitmap: internal pointer to next present u32
+ * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present
+ */
+
+struct ieee80211_radiotap_iterator {
+	struct ieee80211_radiotap_header *rtheader;
+	int max_length;
+	int this_arg_index;
+	u8 *this_arg;
+
+	int arg_index;
+	u8 *arg;
+	__le32 *next_bitmap;
+	u32 bitmap_shifter;
+};
+
+extern int ieee80211_radiotap_iterator_init(
+   struct ieee80211_radiotap_iterator *iterator,
+   struct ieee80211_radiotap_header *radiotap_header,
+   int max_length);
+
+extern int ieee80211_radiotap_iterator_next(
+   struct ieee80211_radiotap_iterator *iterator);
+
+/*
+ * Regulatory helper functions for wiphys
+ */
+
+/**
+ * regulatory_hint - driver hint to the wireless core a regulatory domain
+ * @wiphy: the wireless device giving the hint (used only for reporting
+ *	conflicts)
+ * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain
+ * 	should be in. If @rd is set this should be NULL. Note that if you
+ * 	set this to NULL you should still set rd->alpha2 to some accepted
+ * 	alpha2.
+ *
+ * Wireless drivers can use this function to hint to the wireless core
+ * what it believes should be the current regulatory domain by
+ * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
+ * domain should be in or by providing a completely build regulatory domain.
+ * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried
+ * for a regulatory domain structure for the respective country.
+ *
+ * The wiphy must have been registered to cfg80211 prior to this call.
+ * For cfg80211 drivers this means you must first use wiphy_register(),
+ * for mac80211 drivers you must first use ieee80211_register_hw().
+ *
+ * Drivers should check the return value, its possible you can get
+ * an -ENOMEM.
+ */
+extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
+
+/**
+ * regulatory_hint_11d - hints a country IE as a regulatory domain
+ * @wiphy: the wireless device giving the hint (used only for reporting
+ *	conflicts)
+ * @country_ie: pointer to the country IE
+ * @country_ie_len: length of the country IE
+ *
+ * We will intersect the rd with the what CRDA tells us should apply
+ * for the alpha2 this country IE belongs to, this prevents APs from
+ * sending us incorrect or outdated information against a country.
+ */
+extern void regulatory_hint_11d(struct wiphy *wiphy,
+				u8 *country_ie,
+				u8 country_ie_len);
+/**
+ * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
+ * @wiphy: the wireless device we want to process the regulatory domain on
+ * @regd: the custom regulatory domain to use for this wiphy
+ *
+ * Drivers can sometimes have custom regulatory domains which do not apply
+ * to a specific country. Drivers can use this to apply such custom regulatory
+ * domains. This routine must be called prior to wiphy registration. The
+ * custom regulatory domain will be trusted completely and as such previous
+ * default channel settings will be disregarded. If no rule is found for a
+ * channel on the regulatory domain the channel will be disabled.
+ */
+extern void wiphy_apply_custom_regulatory(
+	struct wiphy *wiphy,
+	const struct ieee80211_regdomain *regd);
+
+/**
+ * freq_reg_info - get regulatory information for the given frequency
+ * @wiphy: the wiphy for which we want to process this rule for
+ * @center_freq: Frequency in KHz for which we want regulatory information for
+ * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one
+ * 	you can set this to 0. If this frequency is allowed we then set
+ * 	this value to the maximum allowed bandwidth.
+ * @reg_rule: the regulatory rule which we have for this frequency
+ *
+ * Use this function to get the regulatory rule for a specific frequency on
+ * a given wireless device. If the device has a specific regulatory domain
+ * it wants to follow we respect that unless a country IE has been received
+ * and processed already.
+ *
+ * Returns 0 if it was able to find a valid regulatory rule which does
+ * apply to the given center_freq otherwise it returns non-zero. It will
+ * also return -ERANGE if we determine the given center_freq does not even have
+ * a regulatory rule for a frequency range in the center_freq's band. See
+ * freq_in_rule_band() for our current definition of a band -- this is purely
+ * subjective and right now its 802.11 specific.
+ */
+extern int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
+			 const struct ieee80211_reg_rule **reg_rule);
+
+/*
+ * Temporary wext handlers & helper functions
+ *
+ * In the future cfg80211 will simply assign the entire wext handler
+ * structure to netdevs it manages, but we're not there yet.
+ */
 int cfg80211_wext_giwname(struct net_device *dev,
 			  struct iw_request_info *info,
 			  char *name, char *extra);
@@ -892,10 +1342,14 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 			     struct iw_request_info *info,
 			     struct sockaddr *ap_addr, char *extra);
 
-/* wext helper for now (to be removed) */
 struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 					     struct iw_freq *freq);
 
+/*
+ * callbacks for asynchronous cfg80211 methods, notification
+ * functions and BSS handling helpers
+ */
+
 /**
  * cfg80211_scan_done - notify that scan finished
  *
@@ -949,6 +1403,7 @@ struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy,
 				       const u8 *meshid, size_t meshidlen,
 				       const u8 *meshcfg);
 void cfg80211_put_bss(struct cfg80211_bss *bss);
+
 /**
  * cfg80211_unlink_bss - unlink BSS from internal data structures
  * @wiphy: the wiphy
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 52808bdcc6c..d9686917252 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -19,7 +19,6 @@
 #include <linux/wireless.h>
 #include <linux/device.h>
 #include <linux/ieee80211.h>
-#include <net/wireless.h>
 #include <net/cfg80211.h>
 
 /**
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
new file mode 100644
index 00000000000..47995b81c5d
--- /dev/null
+++ b/include/net/regulatory.h
@@ -0,0 +1,101 @@
+#ifndef __NET_REGULATORY_H
+#define __NET_REGULATORY_H
+/*
+ * regulatory support structures
+ *
+ * Copyright 2008-2009	Luis R. Rodriguez <lrodriguez@atheros.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+/**
+ * enum environment_cap - Environment parsed from country IE
+ * @ENVIRON_ANY: indicates country IE applies to both indoor and
+ *	outdoor operation.
+ * @ENVIRON_INDOOR: indicates country IE applies only to indoor operation
+ * @ENVIRON_OUTDOOR: indicates country IE applies only to outdoor operation
+ */
+enum environment_cap {
+	ENVIRON_ANY,
+	ENVIRON_INDOOR,
+	ENVIRON_OUTDOOR,
+};
+
+/**
+ * struct regulatory_request - used to keep track of regulatory requests
+ *
+ * @wiphy_idx: this is set if this request's initiator is
+ * 	%REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This
+ * 	can be used by the wireless core to deal with conflicts
+ * 	and potentially inform users of which devices specifically
+ * 	cased the conflicts.
+ * @initiator: indicates who sent this request, could be any of
+ * 	of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*)
+ * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested
+ * 	regulatory domain. We have a few special codes:
+ * 	00 - World regulatory domain
+ * 	99 - built by driver but a specific alpha2 cannot be determined
+ * 	98 - result of an intersection between two regulatory domains
+ * @intersect: indicates whether the wireless core should intersect
+ * 	the requested regulatory domain with the presently set regulatory
+ * 	domain.
+ * @country_ie_checksum: checksum of the last processed and accepted
+ * 	country IE
+ * @country_ie_env: lets us know if the AP is telling us we are outdoor,
+ * 	indoor, or if it doesn't matter
+ * @list: used to insert into the reg_requests_list linked list
+ */
+struct regulatory_request {
+	int wiphy_idx;
+	enum nl80211_reg_initiator initiator;
+	char alpha2[2];
+	bool intersect;
+	u32 country_ie_checksum;
+	enum environment_cap country_ie_env;
+	struct list_head list;
+};
+
+struct ieee80211_freq_range {
+	u32 start_freq_khz;
+	u32 end_freq_khz;
+	u32 max_bandwidth_khz;
+};
+
+struct ieee80211_power_rule {
+	u32 max_antenna_gain;
+	u32 max_eirp;
+};
+
+struct ieee80211_reg_rule {
+	struct ieee80211_freq_range freq_range;
+	struct ieee80211_power_rule power_rule;
+	u32 flags;
+};
+
+struct ieee80211_regdomain {
+	u32 n_reg_rules;
+	char alpha2[2];
+	struct ieee80211_reg_rule reg_rules[];
+};
+
+#define MHZ_TO_KHZ(freq) ((freq) * 1000)
+#define KHZ_TO_MHZ(freq) ((freq) / 1000)
+#define DBI_TO_MBI(gain) ((gain) * 100)
+#define MBI_TO_DBI(gain) ((gain) / 100)
+#define DBM_TO_MBM(gain) ((gain) * 100)
+#define MBM_TO_DBM(gain) ((gain) / 100)
+
+#define REG_RULE(start, end, bw, gain, eirp, reg_flags) \
+{							\
+	.freq_range.start_freq_khz = MHZ_TO_KHZ(start),	\
+	.freq_range.end_freq_khz = MHZ_TO_KHZ(end),	\
+	.freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw),	\
+	.power_rule.max_antenna_gain = DBI_TO_MBI(gain),\
+	.power_rule.max_eirp = DBM_TO_MBM(eirp),	\
+	.flags = reg_flags,				\
+}
+
+#endif
diff --git a/include/net/wireless.h b/include/net/wireless.h
deleted file mode 100644
index abd27b03333..00000000000
--- a/include/net/wireless.h
+++ /dev/null
@@ -1,492 +0,0 @@
-#ifndef __NET_WIRELESS_H
-#define __NET_WIRELESS_H
-
-/*
- * 802.11 device management
- *
- * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
- */
-
-#include <linux/netdevice.h>
-#include <linux/debugfs.h>
-#include <linux/list.h>
-#include <linux/ieee80211.h>
-#include <net/cfg80211.h>
-
-/**
- * enum ieee80211_band - supported frequency bands
- *
- * The bands are assigned this way because the supported
- * bitrates differ in these bands.
- *
- * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band
- * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7)
- */
-enum ieee80211_band {
-	IEEE80211_BAND_2GHZ,
-	IEEE80211_BAND_5GHZ,
-
-	/* keep last */
-	IEEE80211_NUM_BANDS
-};
-
-/**
- * enum ieee80211_channel_flags - channel flags
- *
- * Channel flags set by the regulatory control code.
- *
- * @IEEE80211_CHAN_DISABLED: This channel is disabled.
- * @IEEE80211_CHAN_PASSIVE_SCAN: Only passive scanning is permitted
- *	on this channel.
- * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel.
- * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
- * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel
- * 	is not permitted.
- * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel
- * 	is not permitted.
- */
-enum ieee80211_channel_flags {
-	IEEE80211_CHAN_DISABLED		= 1<<0,
-	IEEE80211_CHAN_PASSIVE_SCAN	= 1<<1,
-	IEEE80211_CHAN_NO_IBSS		= 1<<2,
-	IEEE80211_CHAN_RADAR		= 1<<3,
-	IEEE80211_CHAN_NO_FAT_ABOVE	= 1<<4,
-	IEEE80211_CHAN_NO_FAT_BELOW	= 1<<5,
-};
-
-/**
- * struct ieee80211_channel - channel definition
- *
- * This structure describes a single channel for use
- * with cfg80211.
- *
- * @center_freq: center frequency in MHz
- * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz
- * @hw_value: hardware-specific value for the channel
- * @flags: channel flags from &enum ieee80211_channel_flags.
- * @orig_flags: channel flags at registration time, used by regulatory
- *	code to support devices with additional restrictions
- * @band: band this channel belongs to.
- * @max_antenna_gain: maximum antenna gain in dBi
- * @max_power: maximum transmission power (in dBm)
- * @beacon_found: helper to regulatory code to indicate when a beacon
- *	has been found on this channel. Use regulatory_hint_found_beacon()
- *	to enable this, this is is useful only on 5 GHz band.
- * @orig_mag: internal use
- * @orig_mpwr: internal use
- */
-struct ieee80211_channel {
-	enum ieee80211_band band;
-	u16 center_freq;
-	u8 max_bandwidth;
-	u16 hw_value;
-	u32 flags;
-	int max_antenna_gain;
-	int max_power;
-	bool beacon_found;
-	u32 orig_flags;
-	int orig_mag, orig_mpwr;
-};
-
-/**
- * enum ieee80211_rate_flags - rate flags
- *
- * Hardware/specification flags for rates. These are structured
- * in a way that allows using the same bitrate structure for
- * different bands/PHY modes.
- *
- * @IEEE80211_RATE_SHORT_PREAMBLE: Hardware can send with short
- *	preamble on this bitrate; only relevant in 2.4GHz band and
- *	with CCK rates.
- * @IEEE80211_RATE_MANDATORY_A: This bitrate is a mandatory rate
- *	when used with 802.11a (on the 5 GHz band); filled by the
- *	core code when registering the wiphy.
- * @IEEE80211_RATE_MANDATORY_B: This bitrate is a mandatory rate
- *	when used with 802.11b (on the 2.4 GHz band); filled by the
- *	core code when registering the wiphy.
- * @IEEE80211_RATE_MANDATORY_G: This bitrate is a mandatory rate
- *	when used with 802.11g (on the 2.4 GHz band); filled by the
- *	core code when registering the wiphy.
- * @IEEE80211_RATE_ERP_G: This is an ERP rate in 802.11g mode.
- */
-enum ieee80211_rate_flags {
-	IEEE80211_RATE_SHORT_PREAMBLE	= 1<<0,
-	IEEE80211_RATE_MANDATORY_A	= 1<<1,
-	IEEE80211_RATE_MANDATORY_B	= 1<<2,
-	IEEE80211_RATE_MANDATORY_G	= 1<<3,
-	IEEE80211_RATE_ERP_G		= 1<<4,
-};
-
-/**
- * struct ieee80211_rate - bitrate definition
- *
- * This structure describes a bitrate that an 802.11 PHY can
- * operate with. The two values @hw_value and @hw_value_short
- * are only for driver use when pointers to this structure are
- * passed around.
- *
- * @flags: rate-specific flags
- * @bitrate: bitrate in units of 100 Kbps
- * @hw_value: driver/hardware value for this rate
- * @hw_value_short: driver/hardware value for this rate when
- *	short preamble is used
- */
-struct ieee80211_rate {
-	u32 flags;
-	u16 bitrate;
-	u16 hw_value, hw_value_short;
-};
-
-/**
- * struct ieee80211_sta_ht_cap - STA's HT capabilities
- *
- * This structure describes most essential parameters needed
- * to describe 802.11n HT capabilities for an STA.
- *
- * @ht_supported: is HT supported by the STA
- * @cap: HT capabilities map as described in 802.11n spec
- * @ampdu_factor: Maximum A-MPDU length factor
- * @ampdu_density: Minimum A-MPDU spacing
- * @mcs: Supported MCS rates
- */
-struct ieee80211_sta_ht_cap {
-	u16 cap; /* use IEEE80211_HT_CAP_ */
-	bool ht_supported;
-	u8 ampdu_factor;
-	u8 ampdu_density;
-	struct ieee80211_mcs_info mcs;
-};
-
-/**
- * struct ieee80211_supported_band - frequency band definition
- *
- * This structure describes a frequency band a wiphy
- * is able to operate in.
- *
- * @channels: Array of channels the hardware can operate in
- *	in this band.
- * @band: the band this structure represents
- * @n_channels: Number of channels in @channels
- * @bitrates: Array of bitrates the hardware can operate with
- *	in this band. Must be sorted to give a valid "supported
- *	rates" IE, i.e. CCK rates first, then OFDM.
- * @n_bitrates: Number of bitrates in @bitrates
- */
-struct ieee80211_supported_band {
-	struct ieee80211_channel *channels;
-	struct ieee80211_rate *bitrates;
-	enum ieee80211_band band;
-	int n_channels;
-	int n_bitrates;
-	struct ieee80211_sta_ht_cap ht_cap;
-};
-
-/**
- * struct wiphy - wireless hardware description
- * @idx: the wiphy index assigned to this item
- * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
- * @custom_regulatory: tells us the driver for this device
- * 	has its own custom regulatory domain and cannot identify the
- * 	ISO / IEC 3166 alpha2 it belongs to. When this is enabled
- * 	we will disregard the first regulatory hint (when the
- * 	initiator is %REGDOM_SET_BY_CORE).
- * @strict_regulatory: tells us the driver for this device will ignore
- * 	regulatory domain settings until it gets its own regulatory domain
- * 	via its regulatory_hint(). After its gets its own regulatory domain
- * 	it will only allow further regulatory domain settings to further
- * 	enhance compliance. For example if channel 13 and 14 are disabled
- * 	by this regulatory domain no user regulatory domain can enable these
- * 	channels at a later time. This can be used for devices which do not
- * 	have calibration information gauranteed for frequencies or settings
- * 	outside of its regulatory domain.
- * @reg_notifier: the driver's regulatory notification callback
- * @regd: the driver's regulatory domain, if one was requested via
- * 	the regulatory_hint() API. This can be used by the driver
- *	on the reg_notifier() if it chooses to ignore future
- *	regulatory domain changes caused by other drivers.
- * @signal_type: signal type reported in &struct cfg80211_bss.
- * @cipher_suites: supported cipher suites
- * @n_cipher_suites: number of supported cipher suites
- */
-struct wiphy {
-	/* assign these fields before you register the wiphy */
-
-	/* permanent MAC address */
-	u8 perm_addr[ETH_ALEN];
-
-	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
-	u16 interface_modes;
-
-	bool custom_regulatory;
-	bool strict_regulatory;
-
-	enum cfg80211_signal_type signal_type;
-
-	int bss_priv_size;
-	u8 max_scan_ssids;
-	u16 max_scan_ie_len;
-
-	int n_cipher_suites;
-	const u32 *cipher_suites;
-
-	/* If multiple wiphys are registered and you're handed e.g.
-	 * a regular netdev with assigned ieee80211_ptr, you won't
-	 * know whether it points to a wiphy your driver has registered
-	 * or not. Assign this to something global to your driver to
-	 * help determine whether you own this wiphy or not. */
-	void *privid;
-
-	struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS];
-
-	/* Lets us get back the wiphy on the callback */
-	int (*reg_notifier)(struct wiphy *wiphy,
-			    struct regulatory_request *request);
-
-	/* fields below are read-only, assigned by cfg80211 */
-
-	const struct ieee80211_regdomain *regd;
-
-	/* the item in /sys/class/ieee80211/ points to this,
-	 * you need use set_wiphy_dev() (see below) */
-	struct device dev;
-
-	/* dir in debugfs: ieee80211/<wiphyname> */
-	struct dentry *debugfsdir;
-
-	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
-};
-
-/** struct wireless_dev - wireless per-netdev state
- *
- * This structure must be allocated by the driver/stack
- * that uses the ieee80211_ptr field in struct net_device
- * (this is intentional so it can be allocated along with
- * the netdev.)
- *
- * @wiphy: pointer to hardware description
- * @iftype: interface type
- * @list: (private)
- * @netdev (private)
- */
-struct wireless_dev {
-	struct wiphy *wiphy;
-	enum nl80211_iftype iftype;
-
-	/* private to the generic wireless code */
-	struct list_head list;
-	struct net_device *netdev;
-
-	/* currently used for IBSS - might be rearranged in the future */
-	struct cfg80211_bss *current_bss;
-	u8 bssid[ETH_ALEN];
-	u8 ssid[IEEE80211_MAX_SSID_LEN];
-	u8 ssid_len;
-
-#ifdef CONFIG_WIRELESS_EXT
-	/* wext data */
-	struct cfg80211_ibss_params wext;
-	u8 wext_bssid[ETH_ALEN];
-#endif
-};
-
-/**
- * wiphy_priv - return priv from wiphy
- */
-static inline void *wiphy_priv(struct wiphy *wiphy)
-{
-	BUG_ON(!wiphy);
-	return &wiphy->priv;
-}
-
-/**
- * set_wiphy_dev - set device pointer for wiphy
- */
-static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev)
-{
-	wiphy->dev.parent = dev;
-}
-
-/**
- * wiphy_dev - get wiphy dev pointer
- */
-static inline struct device *wiphy_dev(struct wiphy *wiphy)
-{
-	return wiphy->dev.parent;
-}
-
-/**
- * wiphy_name - get wiphy name
- */
-static inline const char *wiphy_name(struct wiphy *wiphy)
-{
-	return dev_name(&wiphy->dev);
-}
-
-/**
- * wdev_priv - return wiphy priv from wireless_dev
- */
-static inline void *wdev_priv(struct wireless_dev *wdev)
-{
-	BUG_ON(!wdev);
-	return wiphy_priv(wdev->wiphy);
-}
-
-/**
- * wiphy_new - create a new wiphy for use with cfg80211
- *
- * create a new wiphy and associate the given operations with it.
- * @sizeof_priv bytes are allocated for private use.
- *
- * the returned pointer must be assigned to each netdev's
- * ieee80211_ptr for proper operation.
- */
-struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv);
-
-/**
- * wiphy_register - register a wiphy with cfg80211
- *
- * register the given wiphy
- *
- * Returns a non-negative wiphy index or a negative error code.
- */
-extern int wiphy_register(struct wiphy *wiphy);
-
-/**
- * wiphy_unregister - deregister a wiphy from cfg80211
- *
- * unregister a device with the given priv pointer.
- * After this call, no more requests can be made with this priv
- * pointer, but the call may sleep to wait for an outstanding
- * request that is being handled.
- */
-extern void wiphy_unregister(struct wiphy *wiphy);
-
-/**
- * wiphy_free - free wiphy
- */
-extern void wiphy_free(struct wiphy *wiphy);
-
-/**
- * ieee80211_channel_to_frequency - convert channel number to frequency
- */
-extern int ieee80211_channel_to_frequency(int chan);
-
-/**
- * ieee80211_frequency_to_channel - convert frequency to channel number
- */
-extern int ieee80211_frequency_to_channel(int freq);
-
-/*
- * Name indirection necessary because the ieee80211 code also has
- * a function named "ieee80211_get_channel", so if you include
- * cfg80211's header file you get cfg80211's version, if you try
- * to include both header files you'll (rightfully!) get a symbol
- * clash.
- */
-extern struct ieee80211_channel *__ieee80211_get_channel(struct wiphy *wiphy,
-							 int freq);
-/**
- * ieee80211_get_channel - get channel struct from wiphy for specified frequency
- */
-static inline struct ieee80211_channel *
-ieee80211_get_channel(struct wiphy *wiphy, int freq)
-{
-	return __ieee80211_get_channel(wiphy, freq);
-}
-
-/**
- * ieee80211_get_response_rate - get basic rate for a given rate
- *
- * @sband: the band to look for rates in
- * @basic_rates: bitmap of basic rates
- * @bitrate: the bitrate for which to find the basic rate
- *
- * This function returns the basic rate corresponding to a given
- * bitrate, that is the next lower bitrate contained in the basic
- * rate map, which is, for this function, given as a bitmap of
- * indices of rates in the band's bitrate table.
- */
-struct ieee80211_rate *
-ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
-			    u32 basic_rates, int bitrate);
-
-/**
- * regulatory_hint - driver hint to the wireless core a regulatory domain
- * @wiphy: the wireless device giving the hint (used only for reporting
- *	conflicts)
- * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain
- * 	should be in. If @rd is set this should be NULL. Note that if you
- * 	set this to NULL you should still set rd->alpha2 to some accepted
- * 	alpha2.
- *
- * Wireless drivers can use this function to hint to the wireless core
- * what it believes should be the current regulatory domain by
- * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory
- * domain should be in or by providing a completely build regulatory domain.
- * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried
- * for a regulatory domain structure for the respective country.
- *
- * The wiphy must have been registered to cfg80211 prior to this call.
- * For cfg80211 drivers this means you must first use wiphy_register(),
- * for mac80211 drivers you must first use ieee80211_register_hw().
- *
- * Drivers should check the return value, its possible you can get
- * an -ENOMEM.
- */
-extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
-
-/**
- * regulatory_hint_11d - hints a country IE as a regulatory domain
- * @wiphy: the wireless device giving the hint (used only for reporting
- *	conflicts)
- * @country_ie: pointer to the country IE
- * @country_ie_len: length of the country IE
- *
- * We will intersect the rd with the what CRDA tells us should apply
- * for the alpha2 this country IE belongs to, this prevents APs from
- * sending us incorrect or outdated information against a country.
- */
-extern void regulatory_hint_11d(struct wiphy *wiphy,
-				u8 *country_ie,
-				u8 country_ie_len);
-/**
- * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
- * @wiphy: the wireless device we want to process the regulatory domain on
- * @regd: the custom regulatory domain to use for this wiphy
- *
- * Drivers can sometimes have custom regulatory domains which do not apply
- * to a specific country. Drivers can use this to apply such custom regulatory
- * domains. This routine must be called prior to wiphy registration. The
- * custom regulatory domain will be trusted completely and as such previous
- * default channel settings will be disregarded. If no rule is found for a
- * channel on the regulatory domain the channel will be disabled.
- */
-extern void wiphy_apply_custom_regulatory(
-	struct wiphy *wiphy,
-	const struct ieee80211_regdomain *regd);
-
-/**
- * freq_reg_info - get regulatory information for the given frequency
- * @wiphy: the wiphy for which we want to process this rule for
- * @center_freq: Frequency in KHz for which we want regulatory information for
- * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one
- * 	you can set this to 0. If this frequency is allowed we then set
- * 	this value to the maximum allowed bandwidth.
- * @reg_rule: the regulatory rule which we have for this frequency
- *
- * Use this function to get the regulatory rule for a specific frequency on
- * a given wireless device. If the device has a specific regulatory domain
- * it wants to follow we respect that unless a country IE has been received
- * and processed already.
- *
- * Returns 0 if it was able to find a valid regulatory rule which does
- * apply to the given center_freq otherwise it returns non-zero. It will
- * also return -ERANGE if we determine the given center_freq does not even have
- * a regulatory rule for a frequency range in the center_freq's band. See
- * freq_in_rule_band() for our current definition of a band -- this is purely
- * subjective and right now its 802.11 specific.
- */
-extern int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
-			 const struct ieee80211_reg_rule **reg_rule);
-
-#endif /* __NET_WIRELESS_H */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 73bd427750e..0891bfb0699 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -14,7 +14,6 @@
  */
 
 #include <linux/ieee80211.h>
-#include <net/wireless.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
 #include "rate.h"
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e770c1e8e08..92a573bf103 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -24,7 +24,6 @@
 #include <linux/spinlock.h>
 #include <linux/etherdevice.h>
 #include <net/cfg80211.h>
-#include <net/wireless.h>
 #include <net/iw_handler.h>
 #include <net/mac80211.h>
 #include "key.h"
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 5f7a2624ed7..48bf78e7fa7 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -15,7 +15,7 @@
  */
 
 #include <linux/ieee80211.h>
-#include <net/wireless.h>
+#include <net/cfg80211.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
 #include "sta_info.h"
diff --git a/net/wireless/core.c b/net/wireless/core.c
index de1ac51ae4e..827a5626355 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -14,7 +14,6 @@
 #include <linux/device.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
-#include <net/wireless.h>
 #include "nl80211.h"
 #include "core.h"
 #include "sysfs.h"
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 2ef3595fd6e..89a8159ef0b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -11,7 +11,6 @@
 #include <linux/kref.h>
 #include <linux/rbtree.h>
 #include <net/genetlink.h>
-#include <net/wireless.h>
 #include <net/cfg80211.h>
 #include "reg.h"
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 2bf42fdef3a..4e1fcb0c9e4 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -7,7 +7,6 @@
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
 #include <net/cfg80211.h>
-#include <net/wireless.h>
 #include "nl80211.h"
 
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 574e217bcc8..f38cc39fa79 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -37,7 +37,6 @@
 #include <linux/random.h>
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
-#include <net/wireless.h>
 #include <net/cfg80211.h>
 #include "core.h"
 #include "reg.h"
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 487cdd9bcff..5f7e997195c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1,10 +1,10 @@
 /*
  * Wireless utility functions
  *
- * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  */
-#include <net/wireless.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
+#include <net/cfg80211.h>
 #include "core.h"
 
 struct ieee80211_rate *
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 57eaea26b67..4e054ea9c0a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -12,7 +12,6 @@
 #include <linux/nl80211.h>
 #include <linux/if_arp.h>
 #include <net/iw_handler.h>
-#include <net/wireless.h>
 #include <net/cfg80211.h>
 #include "core.h"
 
-- 
cgit v1.2.3-70-g09d2


From b9a5f8cab751d362f7c2d94899ca788c22fcd1ef Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 20 Apr 2009 18:39:05 +0200
Subject: nl80211: Add set/get for frag/rts threshold and retry limits

Add new nl80211 attributes that can be used with NL80211_CMD_SET_WIPHY
and NL80211_CMD_GET_WIPHY to manage fragmentation/RTS threshold and
retry limits.

Since these values are stored in struct wiphy, remove the local copy
from mac80211 where feasible (frag & rts threshold). The retry limits
are currently needed in struct ieee80211_conf, but these could be
eventually removed since the driver should have access to the values
in struct wiphy.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  24 ++++++-
 include/net/cfg80211.h     |  50 +++++++++++++++
 net/mac80211/cfg.c         |  27 ++++++++
 net/mac80211/debugfs.c     |   8 +--
 net/mac80211/ieee80211_i.h |   3 -
 net/mac80211/main.c        |   6 +-
 net/mac80211/tx.c          |   9 ++-
 net/mac80211/util.c        |   2 +-
 net/mac80211/wext.c        | 138 ++---------------------------------------
 net/wireless/core.c        |  10 +++
 net/wireless/nl80211.c     |  95 ++++++++++++++++++++++++++++
 net/wireless/wext-compat.c | 151 +++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 372 insertions(+), 151 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 25ce3e42bd1..dc9d9ec5d1a 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -46,8 +46,10 @@
  *	to get a list of all present wiphys.
  * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
  *	%NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME,
- *	%NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or
- *	%NL80211_ATTR_WIPHY_CHANNEL_TYPE.
+ *	%NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ,
+ *	%NL80211_ATTR_WIPHY_CHANNEL_TYPE, %NL80211_ATTR_WIPHY_RETRY_SHORT,
+ *	%NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+ *	and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -337,6 +339,18 @@ enum nl80211_commands {
  *	NL80211_CHAN_HT20 = HT20 only
  *	NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel
  *	NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel
+ * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is
+ *	less than or equal to the RTS threshold; allowed range: 1..255;
+ *	dot11ShortRetryLimit; u8
+ * @NL80211_ATTR_WIPHY_RETRY_LONG: TX retry limit for frames whose length is
+ *	greater than the RTS threshold; allowed range: 1..255;
+ *	dot11ShortLongLimit; u8
+ * @NL80211_ATTR_WIPHY_FRAG_THRESHOLD: fragmentation threshold, i.e., maximum
+ *	length in octets for frames; allowed range: 256..8000, disable
+ *	fragmentation with (u32)-1; dot11FragmentationThreshold; u32
+ * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length
+ *	larger than or equal to this use RTS/CTS handshake); allowed range:
+ *	0..65536, disable with (u32)-1; dot11RTSThreshold; u32
  *
  * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
  * @NL80211_ATTR_IFNAME: network interface name
@@ -565,6 +579,12 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_FREQ_FIXED,
 
+
+	NL80211_ATTR_WIPHY_RETRY_SHORT,
+	NL80211_ATTR_WIPHY_RETRY_LONG,
+	NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+	NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 601eac64b02..54bc69c8369 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -743,6 +743,20 @@ struct cfg80211_ibss_params {
 	bool channel_fixed;
 };
 
+/**
+ * enum wiphy_params_flags - set_wiphy_params bitfield values
+ * WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed
+ * WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed
+ * WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed
+ * WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
+ */
+enum wiphy_params_flags {
+	WIPHY_PARAM_RETRY_SHORT		= 1 << 0,
+	WIPHY_PARAM_RETRY_LONG		= 1 << 1,
+	WIPHY_PARAM_FRAG_THRESHOLD	= 1 << 2,
+	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -823,6 +837,11 @@ struct cfg80211_ibss_params {
  *	cfg80211_ibss_joined(), also call that function when changing BSSID due
  *	to a merge.
  * @leave_ibss: Leave the IBSS.
+ *
+ * @set_wiphy_params: Notify that wiphy parameters have changed;
+ *	@changed bitfield (see &enum wiphy_params_flags) describes which values
+ *	have changed. The actual parameter values are available in
+ *	struct wiphy. If returning an error, no value should be changed.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -912,6 +931,8 @@ struct cfg80211_ops {
 	int	(*join_ibss)(struct wiphy *wiphy, struct net_device *dev,
 			     struct cfg80211_ibss_params *params);
 	int	(*leave_ibss)(struct wiphy *wiphy, struct net_device *dev);
+
+	int	(*set_wiphy_params)(struct wiphy *wiphy, u32 changed);
 };
 
 /*
@@ -945,6 +966,11 @@ struct cfg80211_ops {
  * @signal_type: signal type reported in &struct cfg80211_bss.
  * @cipher_suites: supported cipher suites
  * @n_cipher_suites: number of supported cipher suites
+ * @retry_short: Retry limit for short frames (dot11ShortRetryLimit)
+ * @retry_long: Retry limit for long frames (dot11LongRetryLimit)
+ * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold);
+ *	-1 = fragmentation disabled, only odd values >= 256 used
+ * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -967,6 +993,11 @@ struct wiphy {
 	int n_cipher_suites;
 	const u32 *cipher_suites;
 
+	u8 retry_short;
+	u8 retry_long;
+	u32 frag_threshold;
+	u32 rts_threshold;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
@@ -1345,6 +1376,25 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 					     struct iw_freq *freq);
 
+int cfg80211_wext_siwrts(struct net_device *dev,
+			 struct iw_request_info *info,
+			 struct iw_param *rts, char *extra);
+int cfg80211_wext_giwrts(struct net_device *dev,
+			 struct iw_request_info *info,
+			 struct iw_param *rts, char *extra);
+int cfg80211_wext_siwfrag(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *frag, char *extra);
+int cfg80211_wext_giwfrag(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *frag, char *extra);
+int cfg80211_wext_siwretry(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *retry, char *extra);
+int cfg80211_wext_giwretry(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *retry, char *extra);
+
 /*
  * callbacks for asynchronous cfg80211 methods, notification
  * functions and BSS handling helpers
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 14013dc6447..5e1c230744b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1298,6 +1298,32 @@ static int ieee80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev)
 	return ieee80211_ibss_leave(sdata);
 }
 
+static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	if (changed & WIPHY_PARAM_RTS_THRESHOLD) {
+		int err;
+
+		if (local->ops->set_rts_threshold) {
+			err = local->ops->set_rts_threshold(
+				local_to_hw(local), wiphy->rts_threshold);
+			if (err)
+				return err;
+		}
+	}
+
+	if (changed & WIPHY_PARAM_RETRY_SHORT)
+		local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
+	if (changed & WIPHY_PARAM_RETRY_LONG)
+		local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
+	if (changed &
+	    (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG))
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS);
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1336,4 +1362,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.disassoc = ieee80211_disassoc,
 	.join_ibss = ieee80211_join_ibss,
 	.leave_ibss = ieee80211_leave_ibss,
+	.set_wiphy_params = ieee80211_set_wiphy_params,
 };
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 210b9b6fecd..5001328be46 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -52,13 +52,13 @@ static const struct file_operations name## _ops = {			\
 DEBUGFS_READONLY_FILE(frequency, 20, "%d",
 		      local->hw.conf.channel->center_freq);
 DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
-		      local->rts_threshold);
+		      local->hw.wiphy->rts_threshold);
 DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d",
-		      local->fragmentation_threshold);
+		      local->hw.wiphy->frag_threshold);
 DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d",
-		      local->hw.conf.short_frame_max_tx_count);
+		      local->hw.wiphy->retry_short);
 DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d",
-		      local->hw.conf.long_frame_max_tx_count);
+		      local->hw.wiphy->retry_long);
 DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d",
 		      local->total_ps_buffered);
 DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x",
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 92a573bf103..dba78d89a10 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -643,9 +643,6 @@ struct ieee80211_local {
 
 	struct rate_control_ref *rate_ctrl;
 
-	int rts_threshold;
-	int fragmentation_threshold;
-
 	struct crypto_blkcipher *wep_tx_tfm;
 	struct crypto_blkcipher *wep_rx_tfm;
 	u32 wep_iv;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d26fc399285..5320e08434a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -776,10 +776,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	/* set up some defaults */
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
-	local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
-	local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
-	local->hw.conf.long_frame_max_tx_count = 4;
-	local->hw.conf.short_frame_max_tx_count = 7;
+	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
+	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
 	local->hw.conf.radio_enabled = true;
 
 	INIT_LIST_HEAD(&local->interfaces);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c53d77db3e4..9ab49826c15 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -516,7 +516,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
 	len = min_t(int, tx->skb->len + FCS_LEN,
-			 tx->local->fragmentation_threshold);
+			 tx->local->hw.wiphy->frag_threshold);
 
 	/* set up the tx rate control struct we give the RC algo */
 	txrc.hw = local_to_hw(tx->local);
@@ -527,8 +527,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx;
 
 	/* set up RTS protection if desired */
-	if (tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD &&
-	    len > tx->local->rts_threshold) {
+	if (len > tx->local->hw.wiphy->rts_threshold) {
 		txrc.rts = rts = true;
 	}
 
@@ -770,7 +769,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	struct sk_buff *skb = tx->skb;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (void *)skb->data;
-	int frag_threshold = tx->local->fragmentation_threshold;
+	int frag_threshold = tx->local->hw.wiphy->frag_threshold;
 	int hdrlen;
 	int fragnum;
 
@@ -1088,7 +1087,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	if (tx->flags & IEEE80211_TX_FRAGMENTED) {
 		if ((tx->flags & IEEE80211_TX_UNICAST) &&
-		    skb->len + FCS_LEN > local->fragmentation_threshold &&
+		    skb->len + FCS_LEN > local->hw.wiphy->frag_threshold &&
 		    !(info->flags & IEEE80211_TX_CTL_AMPDU))
 			tx->flags |= IEEE80211_TX_FRAGMENTED;
 		else
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3dd490fa4b6..11244212f41 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1044,7 +1044,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	/* setup RTS threshold */
 	if (local->ops->set_rts_threshold)
-		local->ops->set_rts_threshold(hw, local->rts_threshold);
+		local->ops->set_rts_threshold(hw, hw->wiphy->rts_threshold);
 
 	/* reconfigure hardware */
 	ieee80211_hw_config(local, ~0);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index eb63fc14801..1eb6d8642a7 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -472,132 +472,6 @@ static int ieee80211_ioctl_giwtxpower(struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_ioctl_siwrts(struct net_device *dev,
-				  struct iw_request_info *info,
-				  struct iw_param *rts, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	if (rts->disabled)
-		local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
-	else if (!rts->fixed)
-		/* if the rts value is not fixed, then take default */
-		local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
-	else if (rts->value < 0 || rts->value > IEEE80211_MAX_RTS_THRESHOLD)
-		return -EINVAL;
-	else
-		local->rts_threshold = rts->value;
-
-	/* If the wlan card performs RTS/CTS in hardware/firmware,
-	 * configure it here */
-
-	if (local->ops->set_rts_threshold)
-		local->ops->set_rts_threshold(local_to_hw(local),
-					     local->rts_threshold);
-
-	return 0;
-}
-
-static int ieee80211_ioctl_giwrts(struct net_device *dev,
-				  struct iw_request_info *info,
-				  struct iw_param *rts, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	rts->value = local->rts_threshold;
-	rts->disabled = (rts->value >= IEEE80211_MAX_RTS_THRESHOLD);
-	rts->fixed = 1;
-
-	return 0;
-}
-
-
-static int ieee80211_ioctl_siwfrag(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_param *frag, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	if (frag->disabled)
-		local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
-	else if (!frag->fixed)
-		local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
-	else if (frag->value < 256 ||
-		 frag->value > IEEE80211_MAX_FRAG_THRESHOLD)
-		return -EINVAL;
-	else {
-		/* Fragment length must be even, so strip LSB. */
-		local->fragmentation_threshold = frag->value & ~0x1;
-	}
-
-	return 0;
-}
-
-static int ieee80211_ioctl_giwfrag(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_param *frag, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	frag->value = local->fragmentation_threshold;
-	frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD);
-	frag->fixed = 1;
-
-	return 0;
-}
-
-
-static int ieee80211_ioctl_siwretry(struct net_device *dev,
-				    struct iw_request_info *info,
-				    struct iw_param *retry, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	if (retry->disabled ||
-	    (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT)
-		return -EINVAL;
-
-	if (retry->flags & IW_RETRY_MAX) {
-		local->hw.conf.long_frame_max_tx_count = retry->value;
-	} else if (retry->flags & IW_RETRY_MIN) {
-		local->hw.conf.short_frame_max_tx_count = retry->value;
-	} else {
-		local->hw.conf.long_frame_max_tx_count = retry->value;
-		local->hw.conf.short_frame_max_tx_count = retry->value;
-	}
-
-	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS);
-
-	return 0;
-}
-
-
-static int ieee80211_ioctl_giwretry(struct net_device *dev,
-				    struct iw_request_info *info,
-				    struct iw_param *retry, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	retry->disabled = 0;
-	if (retry->flags == 0 || retry->flags & IW_RETRY_MIN) {
-		/* first return min value, iwconfig will ask max value
-		 * later if needed */
-		retry->flags |= IW_RETRY_LIMIT;
-		retry->value = local->hw.conf.short_frame_max_tx_count;
-		if (local->hw.conf.long_frame_max_tx_count !=
-		    local->hw.conf.short_frame_max_tx_count)
-			retry->flags |= IW_RETRY_MIN;
-		return 0;
-	}
-	if (retry->flags & IW_RETRY_MAX) {
-		retry->flags = IW_RETRY_LIMIT | IW_RETRY_MAX;
-		retry->value = local->hw.conf.long_frame_max_tx_count;
-	}
-
-	return 0;
-}
-
-
 static int ieee80211_ioctl_siwencode(struct net_device *dev,
 				     struct iw_request_info *info,
 				     struct iw_point *erq, char *keybuf)
@@ -1050,14 +924,14 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) ieee80211_ioctl_siwrate,		/* SIOCSIWRATE */
 	(iw_handler) ieee80211_ioctl_giwrate,		/* SIOCGIWRATE */
-	(iw_handler) ieee80211_ioctl_siwrts,		/* SIOCSIWRTS */
-	(iw_handler) ieee80211_ioctl_giwrts,		/* SIOCGIWRTS */
-	(iw_handler) ieee80211_ioctl_siwfrag,		/* SIOCSIWFRAG */
-	(iw_handler) ieee80211_ioctl_giwfrag,		/* SIOCGIWFRAG */
+	(iw_handler) cfg80211_wext_siwrts,		/* SIOCSIWRTS */
+	(iw_handler) cfg80211_wext_giwrts,		/* SIOCGIWRTS */
+	(iw_handler) cfg80211_wext_siwfrag,		/* SIOCSIWFRAG */
+	(iw_handler) cfg80211_wext_giwfrag,		/* SIOCGIWFRAG */
 	(iw_handler) ieee80211_ioctl_siwtxpower,	/* SIOCSIWTXPOW */
 	(iw_handler) ieee80211_ioctl_giwtxpower,	/* SIOCGIWTXPOW */
-	(iw_handler) ieee80211_ioctl_siwretry,		/* SIOCSIWRETRY */
-	(iw_handler) ieee80211_ioctl_giwretry,		/* SIOCGIWRETRY */
+	(iw_handler) cfg80211_wext_siwretry,		/* SIOCSIWRETRY */
+	(iw_handler) cfg80211_wext_giwretry,		/* SIOCGIWRETRY */
 	(iw_handler) ieee80211_ioctl_siwencode,		/* SIOCSIWENCODE */
 	(iw_handler) ieee80211_ioctl_giwencode,		/* SIOCGIWENCODE */
 	(iw_handler) ieee80211_ioctl_siwpower,		/* SIOCSIWPOWER */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 827a5626355..f256b4f7e83 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -273,6 +273,16 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 	drv->wiphy.dev.class = &ieee80211_class;
 	drv->wiphy.dev.platform_data = drv;
 
+	/*
+	 * Initialize wiphy parameters to IEEE 802.11 MIB default values.
+	 * Fragmentation and RTS threshold are disabled by default with the
+	 * special -1 value.
+	 */
+	drv->wiphy.retry_short = 7;
+	drv->wiphy.retry_long = 4;
+	drv->wiphy.frag_threshold = (u32) -1;
+	drv->wiphy.rts_threshold = (u32) -1;
+
 	return &drv->wiphy;
 }
 EXPORT_SYMBOL(wiphy_new);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 16f86356ac9..5a9a5c6c71d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -61,6 +61,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_RETRY_SHORT] = { .type = NLA_U8 },
+	[NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 },
+	[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -204,6 +208,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx);
 	NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
+
+	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
+		   dev->wiphy.retry_short);
+	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
+		   dev->wiphy.retry_long);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+		    dev->wiphy.frag_threshold);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+		    dev->wiphy.rts_threshold);
+
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
 	NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
@@ -416,6 +430,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev;
 	int result = 0, rem_txq_params = 0;
 	struct nlattr *nl_txq_params;
+	u32 changed;
+	u8 retry_short = 0, retry_long = 0;
+	u32 frag_threshold = 0, rts_threshold = 0;
 
 	rtnl_lock();
 
@@ -530,6 +547,84 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			goto bad_res;
 	}
 
+	changed = 0;
+
+	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
+		retry_short = nla_get_u8(
+			info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]);
+		if (retry_short == 0) {
+			result = -EINVAL;
+			goto bad_res;
+		}
+		changed |= WIPHY_PARAM_RETRY_SHORT;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) {
+		retry_long = nla_get_u8(
+			info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]);
+		if (retry_long == 0) {
+			result = -EINVAL;
+			goto bad_res;
+		}
+		changed |= WIPHY_PARAM_RETRY_LONG;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) {
+		frag_threshold = nla_get_u32(
+			info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]);
+		if (frag_threshold < 256) {
+			result = -EINVAL;
+			goto bad_res;
+		}
+		if (frag_threshold != (u32) -1) {
+			/*
+			 * Fragments (apart from the last one) are required to
+			 * have even length. Make the fragmentation code
+			 * simpler by stripping LSB should someone try to use
+			 * odd threshold value.
+			 */
+			frag_threshold &= ~0x1;
+		}
+		changed |= WIPHY_PARAM_FRAG_THRESHOLD;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]) {
+		rts_threshold = nla_get_u32(
+			info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]);
+		changed |= WIPHY_PARAM_RTS_THRESHOLD;
+	}
+
+	if (changed) {
+		u8 old_retry_short, old_retry_long;
+		u32 old_frag_threshold, old_rts_threshold;
+
+		if (!rdev->ops->set_wiphy_params) {
+			result = -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		old_retry_short = rdev->wiphy.retry_short;
+		old_retry_long = rdev->wiphy.retry_long;
+		old_frag_threshold = rdev->wiphy.frag_threshold;
+		old_rts_threshold = rdev->wiphy.rts_threshold;
+
+		if (changed & WIPHY_PARAM_RETRY_SHORT)
+			rdev->wiphy.retry_short = retry_short;
+		if (changed & WIPHY_PARAM_RETRY_LONG)
+			rdev->wiphy.retry_long = retry_long;
+		if (changed & WIPHY_PARAM_FRAG_THRESHOLD)
+			rdev->wiphy.frag_threshold = frag_threshold;
+		if (changed & WIPHY_PARAM_RTS_THRESHOLD)
+			rdev->wiphy.rts_threshold = rts_threshold;
+
+		result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
+		if (result) {
+			rdev->wiphy.retry_short = old_retry_short;
+			rdev->wiphy.retry_long = old_retry_long;
+			rdev->wiphy.frag_threshold = old_frag_threshold;
+			rdev->wiphy.rts_threshold = old_rts_threshold;
+		}
+	}
 
  bad_res:
 	mutex_unlock(&rdev->mtx);
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 4e054ea9c0a..3279e7f038d 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -314,3 +314,154 @@ struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 
 }
 EXPORT_SYMBOL(cfg80211_wext_freq);
+
+int cfg80211_wext_siwrts(struct net_device *dev,
+			 struct iw_request_info *info,
+			 struct iw_param *rts, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	u32 orts = wdev->wiphy->rts_threshold;
+	int err;
+
+	if (rts->disabled || !rts->fixed)
+		wdev->wiphy->rts_threshold = (u32) -1;
+	else if (rts->value < 0)
+		return -EINVAL;
+	else
+		wdev->wiphy->rts_threshold = rts->value;
+
+	err = rdev->ops->set_wiphy_params(wdev->wiphy,
+					  WIPHY_PARAM_RTS_THRESHOLD);
+	if (err)
+		wdev->wiphy->rts_threshold = orts;
+
+	return err;
+}
+EXPORT_SYMBOL(cfg80211_wext_siwrts);
+
+int cfg80211_wext_giwrts(struct net_device *dev,
+			 struct iw_request_info *info,
+			 struct iw_param *rts, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	rts->value = wdev->wiphy->rts_threshold;
+	rts->disabled = rts->value == (u32) -1;
+	rts->fixed = 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_wext_giwrts);
+
+int cfg80211_wext_siwfrag(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *frag, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	u32 ofrag = wdev->wiphy->frag_threshold;
+	int err;
+
+	if (frag->disabled || !frag->fixed)
+		wdev->wiphy->frag_threshold = (u32) -1;
+	else if (frag->value < 256)
+		return -EINVAL;
+	else {
+		/* Fragment length must be even, so strip LSB. */
+		wdev->wiphy->frag_threshold = frag->value & ~0x1;
+	}
+
+	err = rdev->ops->set_wiphy_params(wdev->wiphy,
+					  WIPHY_PARAM_FRAG_THRESHOLD);
+	if (err)
+		wdev->wiphy->frag_threshold = ofrag;
+
+	return err;
+}
+EXPORT_SYMBOL(cfg80211_wext_siwfrag);
+
+int cfg80211_wext_giwfrag(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *frag, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	frag->value = wdev->wiphy->frag_threshold;
+	frag->disabled = frag->value == (u32) -1;
+	frag->fixed = 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_wext_giwfrag);
+
+int cfg80211_wext_siwretry(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *retry, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	u32 changed = 0;
+	u8 olong = wdev->wiphy->retry_long;
+	u8 oshort = wdev->wiphy->retry_short;
+	int err;
+
+	if (retry->disabled ||
+	    (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT)
+		return -EINVAL;
+
+	if (retry->flags & IW_RETRY_LONG) {
+		wdev->wiphy->retry_long = retry->value;
+		changed |= WIPHY_PARAM_RETRY_LONG;
+	} else if (retry->flags & IW_RETRY_SHORT) {
+		wdev->wiphy->retry_short = retry->value;
+		changed |= WIPHY_PARAM_RETRY_SHORT;
+	} else {
+		wdev->wiphy->retry_short = retry->value;
+		wdev->wiphy->retry_long = retry->value;
+		changed |= WIPHY_PARAM_RETRY_LONG;
+		changed |= WIPHY_PARAM_RETRY_SHORT;
+	}
+
+	if (!changed)
+		return 0;
+
+	err = rdev->ops->set_wiphy_params(wdev->wiphy, changed);
+	if (err) {
+		wdev->wiphy->retry_short = oshort;
+		wdev->wiphy->retry_long = olong;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(cfg80211_wext_siwretry);
+
+int cfg80211_wext_giwretry(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *retry, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	retry->disabled = 0;
+
+	if (retry->flags == 0 || (retry->flags & IW_RETRY_SHORT)) {
+		/*
+		 * First return short value, iwconfig will ask long value
+		 * later if needed
+		 */
+		retry->flags |= IW_RETRY_LIMIT;
+		retry->value = wdev->wiphy->retry_short;
+		if (wdev->wiphy->retry_long != wdev->wiphy->retry_short)
+			retry->flags |= IW_RETRY_LONG;
+
+		return 0;
+	}
+
+	if (retry->flags & IW_RETRY_LONG) {
+		retry->flags = IW_RETRY_LIMIT | IW_RETRY_LONG;
+		retry->value = wdev->wiphy->retry_long;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_wext_giwretry);
-- 
cgit v1.2.3-70-g09d2


From e7ec86f54e519e8e86f1cf328db13263f3ef8bd4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 18 Apr 2009 17:33:24 +0200
Subject: mac80211: validate TIM IE length (redux)

The TIM IE must not be shorter than 4 bytes, so verify that
when parsing it and use the proper type. To ease that adjust
struct ieee80211_tim_ie to have a virtual bitmap of size
at least 1.

Also check that the TIM IE is actually present before trying
to parse it!

Because other people may need the function, make it a static
inline in ieee80211.h.

(The original "mac80211: validate TIM IE length" was a minimal fix for
2.6.30.  This purports to be the full, correct fix. -- JWL)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 32 +++++++++++++++++++++++++++++++-
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/mlme.c        | 27 ++-------------------------
 net/mac80211/util.c        |  6 ++++--
 4 files changed, 38 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 53563d53b5a..c52e7fba4e4 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -540,7 +540,7 @@ struct ieee80211_tim_ie {
 	u8 dtim_period;
 	u8 bitmap_ctrl;
 	/* variable size: 1 - 251 bytes */
-	u8 virtual_map[0];
+	u8 virtual_map[1];
 } __attribute__ ((packed));
 
 #define WLAN_SA_QUERY_TR_ID_LEN 16
@@ -1392,4 +1392,34 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
 	return 1024 * tu;
 }
 
+/**
+ * ieee80211_check_tim - check if AID bit is set in TIM
+ * @tim: the TIM IE
+ * @tim_len: length of the TIM IE
+ * @aid: the AID to look for
+ */
+static inline bool ieee80211_check_tim(struct ieee80211_tim_ie *tim,
+				       u8 tim_len, u16 aid)
+{
+	u8 mask;
+	u8 index, indexn1, indexn2;
+
+	if (unlikely(!tim || tim_len < sizeof(*tim)))
+		return false;
+
+	aid &= 0x3fff;
+	index = aid / 8;
+	mask  = 1 << (aid & 7);
+
+	indexn1 = tim->bitmap_ctrl & 0xfe;
+	indexn2 = tim_len + indexn1 - 4;
+
+	if (index < indexn1 || index > indexn2)
+		return false;
+
+	index -= indexn1;
+
+	return !!(tim->virtual_map[index] & mask);
+}
+
 #endif /* LINUX_IEEE80211_H */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index dba78d89a10..1579bc92c88 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -832,7 +832,7 @@ struct ieee802_11_elems {
 	u8 *fh_params;
 	u8 *ds_params;
 	u8 *cf_params;
-	u8 *tim;
+	struct ieee80211_tim_ie *tim;
 	u8 *ibss_params;
 	u8 *challenge;
 	u8 *wpa;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 428742d7f44..1b0b7aa387e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -675,30 +675,6 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	}
 }
 
-static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid)
-{
-	u8 mask;
-	u8 index, indexn1, indexn2;
-	struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim;
-
-	if (unlikely(!tim || elems->tim_len < 4))
-		return false;
-
-	aid &= 0x3fff;
-	index = aid / 8;
-	mask  = 1 << (aid & 7);
-
-	indexn1 = tim->bitmap_ctrl & 0xfe;
-	indexn2 = elems->tim_len + indexn1 - 4;
-
-	if (index < indexn1 || index > indexn2)
-		return false;
-
-	index -= indexn1;
-
-	return !!(tim->virtual_map[index] & mask);
-}
-
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 					   u16 capab, bool erp_valid, u8 erp)
 {
@@ -1806,7 +1782,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 					  care_about_ies, ncrc);
 
 	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
-		directed_tim = ieee80211_check_tim(&elems, ifmgd->aid);
+		directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len,
+						   ifmgd->aid);
 
 	ncrc = crc32_be(ncrc, (void *)&directed_tim, sizeof(directed_tim));
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 11244212f41..61876eb50b4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -588,8 +588,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->cf_params_len = elen;
 			break;
 		case WLAN_EID_TIM:
-			elems->tim = pos;
-			elems->tim_len = elen;
+			if (elen >= sizeof(struct ieee80211_tim_ie)) {
+				elems->tim = (void *)pos;
+				elems->tim_len = elen;
+			}
 			break;
 		case WLAN_EID_IBSS_PARAMS:
 			elems->ibss_params = pos;
-- 
cgit v1.2.3-70-g09d2


From e255d5eb2b478eec1416b46aea03798b64355402 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 22 Apr 2009 12:40:07 +0200
Subject: mac80211: remove IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT

Just setting IEEE80211_CONF_CHANGE_PS should be sufficient
for changes in the power saving things. The driver already
tells us whether it wants notification of dynps via the
"have dynps support" hw flag.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 10 ++++------
 net/mac80211/wext.c    |  3 +--
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d9686917252..183956e4930 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -532,8 +532,7 @@ enum ieee80211_conf_flags {
  * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed
  * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed
  * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed
- * @IEEE80211_CONF_CHANGE_PS: the PS flag changed
- * @IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT: the dynamic PS timeout changed
+ * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed
  * @IEEE80211_CONF_CHANGE_POWER: the TX power changed
  * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed
  * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed
@@ -544,10 +543,9 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
 	IEEE80211_CONF_CHANGE_RADIOTAP		= BIT(3),
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
-	IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT	= BIT(5),
-	IEEE80211_CONF_CHANGE_POWER		= BIT(6),
-	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(7),
-	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(8),
+	IEEE80211_CONF_CHANGE_POWER		= BIT(5),
+	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
+	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
 };
 
 /**
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 1eb6d8642a7..1a649da42c4 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -622,8 +622,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
 	conf->dynamic_ps_timeout = timeout;
 
 	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
-		ieee80211_hw_config(local,
-				    IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT);
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 
 	ieee80211_recalc_ps(local, -1);
 
-- 
cgit v1.2.3-70-g09d2


From 8e30bc55de98c000b0b836cb42525c82f605f191 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 22 Apr 2009 17:45:38 +0200
Subject: nl80211: allow configuring IBSS beacon interval

Make the JOIN_IBSS command look at the beacon interval
attribute to see if the user requested a specific beacon
interval, if not default to 100 TU (wext too).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 +++-
 include/net/cfg80211.h  |  2 ++
 net/wireless/ibss.c     |  3 +++
 net/wireless/nl80211.c  | 12 +++++++++++-
 4 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index dc9d9ec5d1a..b6a48dd502c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -230,7 +230,9 @@
  *	and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those
  *	should be fixed rather than automatically determined. Can only be
  *	executed on a network interface that is UP, and fixed BSSID/FREQ
- *	may be rejected.
+ *	may be rejected. Another optional parameter is the beacon interval,
+ *	given in the %NL80211_ATTR_BEACON_INTERVAL attribute, which if not
+ *	given defaults to 100 TU (102.4ms).
  * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is
  *	determined by the network interface.
  *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 54bc69c8369..7f7b53b69cb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -733,6 +733,7 @@ struct cfg80211_disassoc_request {
  *	IBSSs to join on other channels.
  * @ie: information element(s) to include in the beacon
  * @ie_len: length of that
+ * @beacon_interval: beacon interval to use
  */
 struct cfg80211_ibss_params {
 	u8 *ssid;
@@ -740,6 +741,7 @@ struct cfg80211_ibss_params {
 	struct ieee80211_channel *channel;
 	u8 *ie;
 	u8 ssid_len, ie_len;
+	u16 beacon_interval;
 	bool channel_fixed;
 };
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index b5c601e1b1b..3c38afaed28 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -116,6 +116,9 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 	enum ieee80211_band band;
 	int i;
 
+	if (!wdev->wext.beacon_interval)
+		wdev->wext.beacon_interval = 100;
+
 	/* try to find an IBSS channel if none requested ... */
 	if (!wdev->wext.channel) {
 		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 97bb5c80125..3b21b3e89e9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3159,6 +3159,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	struct wiphy *wiphy;
 	int err;
 
+	memset(&ibss, 0, sizeof(ibss));
+
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
 
@@ -3167,6 +3169,15 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	    !nla_len(info->attrs[NL80211_ATTR_SSID]))
 		return -EINVAL;
 
+	ibss.beacon_interval = 100;
+
+	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+		ibss.beacon_interval =
+			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
+		if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
+			return -EINVAL;
+	}
+
 	rtnl_lock();
 
 	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
@@ -3189,7 +3200,6 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	wiphy = &drv->wiphy;
-	memset(&ibss, 0, sizeof(ibss));
 
 	if (info->attrs[NL80211_ATTR_MAC])
 		ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
-- 
cgit v1.2.3-70-g09d2


From 04fe20372e70685d9f15966216cdffd3795fe590 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 22 Apr 2009 18:44:37 +0200
Subject: mac80211: calculate maximum sleep interval

The maximum sleep interval, for powersave purposes, is
determined by the DTIM period (it may not be larger)
and the required networking latency (it must be small
enough to fulfil those constraints).

This makes mac80211 calculate the maximum sleep interval
based on those constraints, and pass it to the driver.
Then the driver should instruct the device to sleep at
most that long.

Note that the device is responsible for aligning the
maximum sleep interval between DTIMs, we make sure it's
not longer but it needs to make sure it's between them.

Also, group some powersave documentation together and
make it more explicit that we support managed mode only,
and no IBSS powersaving (yet).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 19 ++++++++++++++++---
 net/mac80211/mlme.c    | 20 ++++++++++++++++----
 2 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 183956e4930..446dbf75a1c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -517,7 +517,7 @@ struct ieee80211_rx_status {
  * Flags to define PHY configuration options
  *
  * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported)
- * @IEEE80211_CONF_PS: Enable 802.11 power save mode
+ * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only)
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_RADIOTAP		= (1<<0),
@@ -553,14 +553,26 @@ enum ieee80211_conf_changed {
  *
  * This struct indicates how the driver shall configure the hardware.
  *
+ * @flags: configuration flags defined above
+ *
  * @radio_enabled: when zero, driver is required to switch off the radio.
  * @beacon_int: beacon interval (TODO make interface config)
+ *
  * @listen_interval: listen interval in units of beacon interval
- * @flags: configuration flags defined above
+ * @max_sleep_interval: the maximum number of beacon intervals to sleep for
+ *	before checking the beacon for a TIM bit (managed mode only); this
+ *	value will be only achievable between DTIM frames, the hardware
+ *	needs to check for the multicast traffic bit in DTIM beacons.
+ *	This variable is valid only when the CONF_PS flag is set.
+ * @dynamic_ps_timeout: The dynamic powersave timeout (in ms), see the
+ *	powersave documentation below. This variable is valid only when
+ *	the CONF_PS flag is set.
+ *
  * @power_level: requested transmit power (in dBm)
- * @dynamic_ps_timeout: dynamic powersave timeout (in ms)
+ *
  * @channel: the channel to tune to
  * @channel_type: the channel (HT) type
+ *
  * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame
  *    (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11,
  *    but actually means the number of transmissions not the number of retries
@@ -572,6 +584,7 @@ struct ieee80211_conf {
 	int beacon_int;
 	u32 flags;
 	int power_level, dynamic_ps_timeout;
+	int max_sleep_interval;
 
 	u16 listen_interval;
 	bool radio_enabled;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e819c02d13f..df27c68620c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -545,10 +545,19 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
 
-		if (beaconint_us > latency)
+		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
-		else
+		} else {
+			u8 dtimper = found->vif.bss_conf.dtim_period;
+			int maxslp = 1;
+
+			if (dtimper > 1)
+				maxslp = min_t(int, dtimper,
+						    latency / beaconint_us);
+
+			local->hw.conf.max_sleep_interval = maxslp;
 			local->ps_sdata = found;
+		}
 	} else {
 		local->ps_sdata = NULL;
 	}
@@ -851,8 +860,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	/* will be same as sdata */
-	if (local->ps_sdata)
-		ieee80211_enable_ps(local, sdata);
+	if (local->ps_sdata) {
+		mutex_lock(&local->iflist_mtx);
+		ieee80211_recalc_ps(local, -1);
+		mutex_unlock(&local->iflist_mtx);
+	}
 
 	netif_tx_start_all_queues(sdata->dev);
 	netif_carrier_on(sdata->dev);
-- 
cgit v1.2.3-70-g09d2


From 1965c85331ed29dc4fd32479ff31663e3e9a518f Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 22 Apr 2009 21:38:25 +0300
Subject: nl80211: Add event for authentication/association timeout

SME needs to be notified when the authentication or association
attempt times out and MLME has stopped processing in order to allow
the SME to decide what to do next.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 13 +++++++++++--
 include/net/cfg80211.h  | 22 ++++++++++++++++++++--
 net/mac80211/mlme.c     |  4 ++--
 net/wireless/mlme.c     | 27 +++++++++++++++++++++++++++
 net/wireless/nl80211.c  | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  6 ++++++
 6 files changed, 115 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b6a48dd502c..e9fd13aa79f 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -203,8 +203,12 @@
  *	frame, i.e., it was for the local STA and was received in correct
  *	state. This is similar to MLME-AUTHENTICATE.confirm primitive in the
  *	MLME SAP interface (kernel providing MLME, userspace SME). The
- *	included NL80211_ATTR_FRAME attribute contains the management frame
- *	(including both the header and frame body, but not FCS).
+ *	included %NL80211_ATTR_FRAME attribute contains the management frame
+ *	(including both the header and frame body, but not FCS). This event is
+ *	also used to indicate if the authentication attempt timed out. In that
+ *	case the %NL80211_ATTR_FRAME attribute is replaced with a
+ *	%NL80211_ATTR_TIMED_OUT flag (and %NL80211_ATTR_MAC to indicate which
+ *	pending authentication timed out).
  * @NL80211_CMD_ASSOCIATE: association request and notification; like
  *	NL80211_CMD_AUTHENTICATE but for Association and Reassociation
  *	(similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request,
@@ -487,6 +491,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look
  *	for other networks on different channels
  *
+ * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this
+ *	is used, e.g., with %NL80211_CMD_AUTHENTICATE event
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -587,6 +594,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
 	NL80211_ATTR_WIPHY_RTS_THRESHOLD,
 
+	NL80211_ATTR_TIMED_OUT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7f7b53b69cb..b8a76764e1c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1475,10 +1475,19 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);
  * @len: length of the frame data
  *
  * This function is called whenever an authentication has been processed in
- * station mode.
+ * station mode. The driver is required to call either this function or
+ * cfg80211_send_auth_timeout() to indicate the result of cfg80211_ops::auth()
+ * call.
  */
 void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 
+/**
+ * cfg80211_send_auth_timeout - notification of timed out authentication
+ * @dev: network device
+ * @addr: The MAC address of the device with which the authentication timed out
+ */
+void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr);
+
 /**
  * cfg80211_send_rx_assoc - notification of processed association
  * @dev: network device
@@ -1486,10 +1495,19 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
  * @len: length of the frame data
  *
  * This function is called whenever a (re)association response has been
- * processed in station mode.
+ * processed in station mode. The driver is required to call either this
+ * function or cfg80211_send_assoc_timeout() to indicate the result of
+ * cfg80211_ops::assoc() call.
  */
 void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len);
 
+/**
+ * cfg80211_send_assoc_timeout - notification of timed out association
+ * @dev: network device
+ * @addr: The MAC address of the device with which the association timed out
+ */
+void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr);
+
 /**
  * cfg80211_send_deauth - notification of processed deauthentication
  * @dev: network device
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index df27c68620c..3610c11286b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -932,7 +932,7 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
 		       " timed out\n",
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
-		ieee80211_sta_send_apinfo(sdata);
+		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid);
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
 				sdata->local->hw.conf.channel->center_freq,
 				ifmgd->ssid, ifmgd->ssid_len);
@@ -1115,7 +1115,7 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata)
 		       " timed out\n",
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
-		ieee80211_sta_send_apinfo(sdata);
+		cfg80211_send_assoc_timeout(sdata->dev, ifmgd->bssid);
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
 				sdata->local->hw.conf.channel->center_freq,
 				ifmgd->ssid, ifmgd->ssid_len);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1407244a647..42184361a10 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -44,6 +44,33 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
+static void cfg80211_wext_disconnected(struct net_device *dev)
+{
+#ifdef CONFIG_WIRELESS_EXT
+	union iwreq_data wrqu;
+	memset(&wrqu, 0, sizeof(wrqu));
+	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+#endif
+}
+
+void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_send_auth_timeout(rdev, dev, addr);
+	cfg80211_wext_disconnected(dev);
+}
+EXPORT_SYMBOL(cfg80211_send_auth_timeout);
+
+void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	nl80211_send_assoc_timeout(rdev, dev, addr);
+	cfg80211_wext_disconnected(dev);
+}
+EXPORT_SYMBOL(cfg80211_send_assoc_timeout);
+
 void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 				  enum nl80211_key_type key_type, int key_id,
 				  const u8 *tsc)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3b21b3e89e9..b1fc98225fd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -121,6 +121,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
 	[NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG },
+	[NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG },
 };
 
 /* IE validation */
@@ -3695,6 +3696,54 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				NL80211_CMD_DISASSOCIATE);
 }
 
+void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
+			       struct net_device *netdev, int cmd,
+			       const u8 *addr)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT_FLAG(msg, NL80211_ATTR_TIMED_OUT);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
+			       struct net_device *netdev, const u8 *addr)
+{
+	nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_AUTHENTICATE,
+				  addr);
+}
+
+void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *addr)
+{
+	nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_ASSOCIATE, addr);
+}
+
 void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, const u8 *bssid,
 			     gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 17d2d8bfaf7..5c12ad13499 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -23,6 +23,12 @@ extern void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 extern void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				  struct net_device *netdev,
 				  const u8 *buf, size_t len);
+extern void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
+				      struct net_device *netdev,
+				      const u8 *addr);
+extern void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
+				       struct net_device *netdev,
+				       const u8 *addr);
 extern void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev, const u8 *addr,
-- 
cgit v1.2.3-70-g09d2


From 9d5c5d8f4105dc56ec10864b195dd1714f282c22 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 23:26:22 +0000
Subject: af_iucv: add sockopt() to enable/disable use of IPRM_DATA msgs

Provide the socket operations getsocktopt() and setsockopt() to enable/disable
sending of data in the parameter list of IUCV messages.
The patch sets respective flag only.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h     |  1 +
 include/net/iucv/af_iucv.h |  4 +++
 net/iucv/af_iucv.c         | 79 ++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 81 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 42a0396f2c5..d2310cb45d2 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -303,6 +303,7 @@ struct ucred {
 #define SOL_BLUETOOTH	274
 #define SOL_PNPIPE	275
 #define SOL_RDS		276
+#define SOL_IUCV	277
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 85f80eadfa3..78a72764aef 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -73,8 +73,12 @@ struct iucv_sock {
 	struct sk_buff_head	backlog_skb_q;
 	struct sock_msg_q	message_q;
 	unsigned int		send_tag;
+	u8			flags;
 };
 
+/* iucv socket options (SOL_IUCV) */
+#define SO_IPRMDATA_MSG	0x0080		/* send/recv IPRM_DATA msgs */
+
 struct iucv_sock_list {
 	struct hlist_head head;
 	rwlock_t	  lock;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6cf02b41ef9..b7c40c97992 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -32,7 +32,7 @@
 #define CONFIG_IUCV_SOCK_DEBUG 1
 
 #define IPRMDATA 0x80
-#define VERSION "1.0"
+#define VERSION "1.1"
 
 static char iucv_userid[80];
 
@@ -226,6 +226,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 	spin_lock_init(&iucv_sk(sk)->message_q.lock);
 	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
 	iucv_sk(sk)->send_tag = 0;
+	iucv_sk(sk)->flags = 0;
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -1003,6 +1004,78 @@ static int iucv_sock_release(struct socket *sock)
 	return err;
 }
 
+/* getsockopt and setsockopt */
+static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
+				char __user *optval, int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	int val;
+	int rc;
+
+	if (level != SOL_IUCV)
+		return -ENOPROTOOPT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *) optval))
+		return -EFAULT;
+
+	rc = 0;
+
+	lock_sock(sk);
+	switch (optname) {
+	case SO_IPRMDATA_MSG:
+		if (val)
+			iucv->flags |= IUCV_IPRMDATA;
+		else
+			iucv->flags &= ~IUCV_IPRMDATA;
+		break;
+	default:
+		rc = -ENOPROTOOPT;
+		break;
+	}
+	release_sock(sk);
+
+	return rc;
+}
+
+static int iucv_sock_getsockopt(struct socket *sock, int level, int optname,
+				char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	int val, len;
+
+	if (level != SOL_IUCV)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if (len < 0)
+		return -EINVAL;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	switch (optname) {
+	case SO_IPRMDATA_MSG:
+		val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+
 /* Callback wrappers - called from iucv base support */
 static int iucv_callback_connreq(struct iucv_path *path,
 				 u8 ipvmid[8], u8 ipuser[16])
@@ -1229,8 +1302,8 @@ static struct proto_ops iucv_sock_ops = {
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
 	.shutdown	= iucv_sock_shutdown,
-	.setsockopt	= sock_no_setsockopt,
-	.getsockopt	= sock_no_getsockopt
+	.setsockopt	= iucv_sock_setsockopt,
+	.getsockopt	= iucv_sock_getsockopt,
 };
 
 static struct net_proto_family iucv_sock_family_ops = {
-- 
cgit v1.2.3-70-g09d2


From 44b1e6b5f9a93cc2ba024e09cf137d5f1b5f8426 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 23:26:24 +0000
Subject: af_iucv: Modify iucv msg target class using control msghdr

Allow 'classification' of socket data that is sent or received over
an af_iucv socket. For classification of data, the target class of an
(native) iucv message is used.

This patch provides the cmsg interface for iucv_sock_recvmsg() and
iucv_sock_sendmsg().  Applications can use the msg_control field of
struct msghdr to set or get the target class as a
"socket control message" (SCM/CMSG).

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iucv/af_iucv.h |  3 ++
 net/iucv/af_iucv.c         | 79 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 78 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 78a72764aef..b57739e49dc 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -79,6 +79,9 @@ struct iucv_sock {
 /* iucv socket options (SOL_IUCV) */
 #define SO_IPRMDATA_MSG	0x0080		/* send/recv IPRM_DATA msgs */
 
+/* iucv related control messages (scm) */
+#define SCM_IUCV_TRGCLS	0x0001		/* target class control message */
+
 struct iucv_sock_list {
 	struct hlist_head head;
 	rwlock_t	  lock;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 5fc077ee583..47c5c8d3703 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -45,6 +45,15 @@ static struct proto iucv_proto = {
 static const u8 iprm_shutdown[8] =
 	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
 
+#define TRGCLS_SIZE	(sizeof(((struct iucv_message *)0)->class))
+
+/* macros to set/get socket control buffer at correct offset */
+#define CB_TAG(skb)	((skb)->cb)		/* iucv message tag */
+#define CB_TAG_LEN	(sizeof(((struct iucv_message *) 0)->tag))
+#define CB_TRGCLS(skb)	((skb)->cb + CB_TAG_LEN) /* iucv msg target class */
+#define CB_TRGCLS_LEN	(TRGCLS_SIZE)
+
+
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
 
@@ -698,6 +707,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct iucv_sock *iucv = iucv_sk(sk);
 	struct sk_buff *skb;
 	struct iucv_message txmsg;
+	struct cmsghdr *cmsg;
+	int cmsg_done;
 	char user_id[9];
 	char appl_id[9];
 	int err;
@@ -717,6 +728,48 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	if (sk->sk_state == IUCV_CONNECTED) {
+		/* initialize defaults */
+		cmsg_done   = 0;	/* check for duplicate headers */
+		txmsg.class = 0;
+
+		/* iterate over control messages */
+		for (cmsg = CMSG_FIRSTHDR(msg); cmsg;
+		     cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+			if (!CMSG_OK(msg, cmsg)) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (cmsg->cmsg_level != SOL_IUCV)
+				continue;
+
+			if (cmsg->cmsg_type & cmsg_done) {
+				err = -EINVAL;
+				goto out;
+			}
+			cmsg_done |= cmsg->cmsg_type;
+
+			switch (cmsg->cmsg_type) {
+			case SCM_IUCV_TRGCLS:
+				if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) {
+					err = -EINVAL;
+					goto out;
+				}
+
+				/* set iucv message target class */
+				memcpy(&txmsg.class,
+					(void *) CMSG_DATA(cmsg), TRGCLS_SIZE);
+
+				break;
+
+			default:
+				err = -EINVAL;
+				goto out;
+				break;
+			}
+		}
+
 		if (!(skb = sock_alloc_send_skb(sk, len,
 						msg->msg_flags & MSG_DONTWAIT,
 						&err)))
@@ -727,10 +780,9 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			goto fail;
 		}
 
-		txmsg.class = 0;
-		memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len);
+		/* increment and save iucv message tag for msg_completion cbk */
 		txmsg.tag = iucv->send_tag++;
-		memcpy(skb->cb, &txmsg.tag, 4);
+		memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
 		skb_queue_tail(&iucv->send_skb_q, skb);
 
 		if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
@@ -801,6 +853,10 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
 		if (!nskb)
 			return -ENOMEM;
 
+		/* copy target class to control buffer of new skb */
+		memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN);
+
+		/* copy data fragment */
 		memcpy(nskb->data, skb->data + copied, size);
 		copied += size;
 		dataleft -= size;
@@ -824,6 +880,10 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
 
 	len = iucv_msg_length(msg);
 
+	/* store msg target class in the second 4 bytes of skb ctrl buffer */
+	/* Note: the first 4 bytes are reserved for msg tag */
+	memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN);
+
 	/* check for special IPRM messages (e.g. iucv_sock_shutdown) */
 	if ((msg->flags & IUCV_IPRMDATA) && len > 7) {
 		if (memcmp(msg->rmmsg, iprm_shutdown, 8) == 0) {
@@ -915,6 +975,17 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	len -= copied;
 
+	/* create control message to store iucv msg target class:
+	 * get the trgcls from the control buffer of the skb due to
+	 * fragmentation of original iucv message. */
+	err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS,
+			CB_TRGCLS_LEN, CB_TRGCLS(skb));
+	if (err) {
+		if (!(flags & MSG_PEEK))
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		return err;
+	}
+
 	/* Mark read part of skb as used */
 	if (!(flags & MSG_PEEK)) {
 		skb_pull(skb, copied);
@@ -1316,7 +1387,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
 		spin_lock_irqsave(&list->lock, flags);
 
 		while (list_skb != (struct sk_buff *)list) {
-			if (!memcmp(&msg->tag, list_skb->cb, 4)) {
+			if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) {
 				this = list_skb;
 				break;
 			}
-- 
cgit v1.2.3-70-g09d2


From 09488e2e0fab14ebe41135f0d066cfe2c56ba9e5 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 23:26:27 +0000
Subject: af_iucv: New socket option for setting IUCV MSGLIMITs

The SO_MSGLIMIT socket option modifies the message limit for new
IUCV communication paths.

The message limit specifies the maximum number of outstanding messages
that are allowed for connections. This setting can be lowered by z/VM
when an IUCV connection is established.

Expects an integer value in the range of 1 to 65535.
The default value is 65535.

The message limit must be set before calling connect() or listen()
for sockets.

If sockets are already connected or in state listen, changing the message
limit is not supported.
For reading the message limit value, unconnected sockets return the limit
that has been set or the default limit. For connected sockets, the actual
message limit is returned. The actual message limit is assigned by z/VM
for each connection and it depends on IUCV MSGLIMIT authorizations
specified for the z/VM guest virtual machine.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iucv/af_iucv.h |  2 ++
 net/iucv/af_iucv.c         | 27 +++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index b57739e49dc..21ee49ffcba 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -74,10 +74,12 @@ struct iucv_sock {
 	struct sock_msg_q	message_q;
 	unsigned int		send_tag;
 	u8			flags;
+	u16			msglimit;
 };
 
 /* iucv socket options (SOL_IUCV) */
 #define SO_IPRMDATA_MSG	0x0080		/* send/recv IPRM_DATA msgs */
+#define SO_MSGLIMIT	0x1000		/* get/set IUCV MSGLIMIT */
 
 /* iucv related control messages (scm) */
 #define SCM_IUCV_TRGCLS	0x0001		/* target class control message */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index f0dea1b8ed4..264c6b36931 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -268,6 +268,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
 	iucv_sk(sk)->send_tag = 0;
 	iucv_sk(sk)->flags = 0;
+	iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT;
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -536,7 +537,7 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 
 	iucv = iucv_sk(sk);
 	/* Create path. */
-	iucv->path = iucv_path_alloc(IUCV_QUEUELEN_DEFAULT,
+	iucv->path = iucv_path_alloc(iucv->msglimit,
 				     IUCV_IPRMDATA, GFP_KERNEL);
 	if (!iucv->path) {
 		err = -ENOMEM;
@@ -1219,6 +1220,20 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
 		else
 			iucv->flags &= ~IUCV_IPRMDATA;
 		break;
+	case SO_MSGLIMIT:
+		switch (sk->sk_state) {
+		case IUCV_OPEN:
+		case IUCV_BOUND:
+			if (val < 1 || val > (u16)(~0))
+				rc = -EINVAL;
+			else
+				iucv->msglimit = val;
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		break;
@@ -1250,6 +1265,12 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname,
 	case SO_IPRMDATA_MSG:
 		val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0;
 		break;
+	case SO_MSGLIMIT:
+		lock_sock(sk);
+		val = (iucv->path != NULL) ? iucv->path->msglim	/* connected */
+					   : iucv->msglimit;	/* default */
+		release_sock(sk);
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1339,7 +1360,9 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	memcpy(nuser_data + 8, niucv->src_name, 8);
 	ASCEBC(nuser_data + 8, 8);
 
-	path->msglim = IUCV_QUEUELEN_DEFAULT;
+	/* set message limit for path based on msglimit of accepting socket */
+	niucv->msglimit = iucv->msglimit;
+	path->msglim = iucv->msglimit;
 	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
 	if (err) {
 		err = iucv_path_sever(path, user_data);
-- 
cgit v1.2.3-70-g09d2


From 89ec0dee9eba6275d47be0b878cf5f6d5c2fb6eb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 26 Mar 2009 11:03:29 -0400
Subject: tracing: increase size of number of possible events

With the new event tracing registration, we must increase the number
of events that can be registered. Currently the type field is only
one byte, which leaves us only 256 possible events.

Since we do not save the CPU number in the tracer anymore (it is determined
by the per cpu ring buffer that is used) we have an extra byte to use.

This patch increases the size of type from 1 byte (256 events) to
2 bytes (65,536 events).

It also adds a WARN_ON_ONCE if we exceed that limit.

[ Impact: allow more than 255 events ]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/ftrace_event.h | 5 ++++-
 kernel/trace/trace_events.c  | 2 +-
 kernel/trace/trace_output.c  | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2a4a4074991..07e0a6d64a2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -16,13 +16,16 @@ struct dentry;
  *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
  */
 struct trace_entry {
-	int			type;
+	unsigned short		type;
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
 	int			tgid;
 };
 
+#define FTRACE_MAX_EVENT						\
+	((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
+
 /*
  * Trace iterator - used by printout routines who present trace
  * results to users and which routines might sleep, etc:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5d6e879cf87..9887131afa0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
-				FIELD(int, type),
+				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
 				FIELD(int, pid),
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 83a8abb9640..06997e75114 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -537,6 +537,8 @@ int register_ftrace_event(struct trace_event *event)
  out:
 	mutex_unlock(&trace_event_mutex);
 
+	WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_event);
-- 
cgit v1.2.3-70-g09d2


From c2518c4366f087ebc10b3919cb2461bbe4f42d0c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 23 Apr 2009 23:26:18 -0400
Subject: tracing: fix cut and paste macro error

In case a module uses the TRACE_EVENT macro for creating automated
events in ftrace, it may choose to use a different file name
than the defined system name, or choose to use a different path than
the default "include/trace/events" include path.

If this is done, then before including trace/define_trace.h the
header would define either "TRACE_INCLUDE_FILE" for the file
name or "TRACE_INCLUDE_PATH" for the include path.

If it does not define these, then the define_trace.h defines them
instead. If define trace defines them, then define_trace.h should
also undefine them before exiting. To do this a macro is used
to note this:

 #ifndef TRACE_INCLUDE_FILE
 # define TRACE_INCLUDE_FILE TRACE_SYSTEM
 # define UNDEF_TRACE_INCLUDE_FILE
 #endif

[...]

 #ifdef UNDEF_TRACE_INCLUDE_FILE
 # undef TRACE_INCLUDE_FILE
 # undef UNDEF_TRACE_INCLUDE_FILE
 #endif

The UNDEF_TRACE_INCLUDE_FILE acts as a CPP variable to know to undef
the TRACE_INCLUDE_FILE before leaving define_trace.h.

Unfortunately, due to cut and paste errors, the macros between
FILE and PATH got mixed up.

[ Impact: undef TRACE_INCLUDE_FILE and/or TRACE_INCLUDE_PATH when needed ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/define_trace.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 7f1f23d601e..abc611feeb8 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -44,7 +44,7 @@
 
 #ifndef TRACE_INCLUDE_PATH
 # define __TRACE_INCLUDE(system) <trace/events/system.h>
-# define UNDEF_TRACE_INCLUDE_FILE
+# define UNDEF_TRACE_INCLUDE_PATH
 #else
 # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
 #endif
@@ -64,13 +64,13 @@
 
 /* Only undef what we defined in this file */
 #ifdef UNDEF_TRACE_INCLUDE_FILE
-# undef TRACE_INCLUDE_PATH
+# undef TRACE_INCLUDE_FILE
 # undef UNDEF_TRACE_INCLUDE_FILE
 #endif
 
-#ifdef UNDEF_TRACE_INCLUDE_FILE
+#ifdef UNDEF_TRACE_INCLUDE_PATH
 # undef TRACE_INCLUDE_PATH
-# undef UNDEF_TRACE_INCLUDE_FILE
+# undef UNDEF_TRACE_INCLUDE_PATH
 #endif
 
 /* We may be processing more files */
-- 
cgit v1.2.3-70-g09d2


From 334d4169a6592d3fcd863bbe822a8f6985ffa9af Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 24 Apr 2009 11:27:05 +0800
Subject: ring_buffer: compressed event header

RB_MAX_SMALL_DATA = 28bytes is too small for most tracers, it wastes
an 'u32' to save the actually length for events which data size > 28.

This fix uses compressed event header and enlarges RB_MAX_SMALL_DATA.

[ Impact: saves about 0%-12.5%(depends on tracer) memory in ring_buffer ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <49F13189.3090000@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 16 +++++----
 kernel/trace/ring_buffer.c  | 83 ++++++++++++++++++++++-----------------------
 2 files changed, 50 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index fac8f1ac6f4..1c2f80911fb 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -11,7 +11,7 @@ struct ring_buffer_iter;
  * Don't refer to this struct directly, use functions below.
  */
 struct ring_buffer_event {
-	u32		type:2, len:3, time_delta:27;
+	u32		type_len:5, time_delta:27;
 	u32		array[];
 };
 
@@ -24,7 +24,8 @@ struct ring_buffer_event {
  *				  size is variable depending on how much
  *				  padding is needed
  *				 If time_delta is non zero:
- *				  everything else same as RINGBUF_TYPE_DATA
+ *				  array[0] holds the actual length
+ *				  size = 4 + length (bytes)
  *
  * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
  *				 array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@ struct ring_buffer_event {
  *				 array[1..2] = tv_sec
  *				 size = 16 bytes
  *
- * @RINGBUF_TYPE_DATA:		Data record
- *				 If len is zero:
+ * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
+ *				Data record
+ *				 If type_len is zero:
  *				  array[0] holds the actual length
  *				  array[1..(length+3)/4] holds data
- *				  size = 4 + 4 + length (bytes)
+ *				  size = 4 + length (bytes)
  *				 else
- *				  length = len << 2
+ *				  length = type_len << 2
  *				  array[0..(length+3)/4-1] holds data
  *				  size = 4 + length (bytes)
  */
 enum ring_buffer_type {
+	RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
 	RINGBUF_TYPE_PADDING,
 	RINGBUF_TYPE_TIME_EXTEND,
 	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
 	RINGBUF_TYPE_TIME_STAMP,
-	RINGBUF_TYPE_DATA,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 61dbdf21cd3..9692f100ec1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -28,8 +28,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 {
 	int ret;
 
-	ret = trace_seq_printf(s, "\ttype        :    2 bits\n");
-	ret = trace_seq_printf(s, "\tlen         :    3 bits\n");
+	ret = trace_seq_printf(s, "# compressed entry header\n");
+	ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
 	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
 	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
 	ret = trace_seq_printf(s, "\n");
@@ -37,8 +37,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 			       RINGBUF_TYPE_PADDING);
 	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
 			       RINGBUF_TYPE_TIME_EXTEND);
-	ret = trace_seq_printf(s, "\tdata        : type == %d\n",
-			       RINGBUF_TYPE_DATA);
+	ret = trace_seq_printf(s, "\tdata max type_len  == %d\n",
+			       RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 
 	return ret;
 }
@@ -204,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT		4U
-#define RB_MAX_SMALL_DATA	28
+#define RB_MAX_SMALL_DATA	(RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+
+/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
+#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
 enum {
 	RB_LEN_TIME_EXTEND = 8,
@@ -213,17 +216,18 @@ enum {
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+	return event->type_len == RINGBUF_TYPE_PADDING
+			&& event->time_delta == 0;
 }
 
 static inline int rb_discarded_event(struct ring_buffer_event *event)
 {
-	return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+	return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
 }
 
 static void rb_event_set_padding(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
+	event->type_len = RINGBUF_TYPE_PADDING;
 	event->time_delta = 0;
 }
 
@@ -232,8 +236,8 @@ rb_event_data_length(struct ring_buffer_event *event)
 {
 	unsigned length;
 
-	if (event->len)
-		length = event->len * RB_ALIGNMENT;
+	if (event->type_len)
+		length = event->type_len * RB_ALIGNMENT;
 	else
 		length = event->array[0];
 	return length + RB_EVNT_HDR_SIZE;
@@ -243,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event)
 static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			/* undefined */
 			return -1;
-		return rb_event_data_length(event);
+		return  event->array[0] + RB_EVNT_HDR_SIZE;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
 		return RB_LEN_TIME_EXTEND;
@@ -272,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event)
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
 	unsigned length = rb_event_length(event);
-	if (event->type != RINGBUF_TYPE_DATA)
+	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 		return length;
 	length -= RB_EVNT_HDR_SIZE;
 	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -285,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
-	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 	/* If length is in len field, then array[0] has the data */
-	if (event->len)
+	if (event->type_len)
 		return (void *)&event->array[0];
 	/* Otherwise length is in array[0] and array[1] has the data */
 	return (void *)&event->array[1];
@@ -988,7 +992,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 			return;
 		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
+		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
 		cpu_buffer->overrun++;
 		cpu_buffer->entries--;
@@ -1133,28 +1137,21 @@ static void
 rb_update_event(struct ring_buffer_event *event,
 			 unsigned type, unsigned length)
 {
-	event->type = type;
+	event->type_len = type;
 
 	switch (type) {
 
 	case RINGBUF_TYPE_PADDING:
-		break;
-
 	case RINGBUF_TYPE_TIME_EXTEND:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
-		break;
-
 	case RINGBUF_TYPE_TIME_STAMP:
-		event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
 		break;
 
-	case RINGBUF_TYPE_DATA:
+	case 0:
 		length -= RB_EVNT_HDR_SIZE;
-		if (length > RB_MAX_SMALL_DATA) {
-			event->len = 0;
+		if (length > RB_MAX_SMALL_DATA)
 			event->array[0] = length;
-		} else
-			event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+		else
+			event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 		break;
 	default:
 		BUG();
@@ -1562,7 +1559,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 	if (length > BUF_PAGE_SIZE)
 		goto out;
 
-	event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+	event = rb_reserve_next_event(cpu_buffer, 0, length);
 	if (!event)
 		goto out;
 
@@ -1634,7 +1631,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
-	event->type = RINGBUF_TYPE_PADDING;
+	/* array[0] holds the actual length for the discarded event */
+	event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+	event->type_len = RINGBUF_TYPE_PADDING;
 	/* time delta must be non zero */
 	if (!event->time_delta)
 		event->time_delta = 1;
@@ -1786,8 +1785,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 		goto out;
 
 	event_length = rb_calculate_event_length(length);
-	event = rb_reserve_next_event(cpu_buffer,
-				      RINGBUF_TYPE_DATA, event_length);
+	event = rb_reserve_next_event(cpu_buffer, 0, event_length);
 	if (!event)
 		goto out;
 
@@ -2035,7 +2033,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -2066,7 +2064,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 {
 	u64 delta;
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		return;
 
@@ -2181,7 +2179,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 
 	event = rb_reader_event(cpu_buffer);
 
-	if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
+	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
+			|| rb_discarded_event(event))
 		cpu_buffer->entries--;
 
 	rb_update_read_stamp(cpu_buffer, event);
@@ -2262,7 +2261,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
 	event = rb_reader_event(cpu_buffer);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event))
 			RB_WARN_ON(cpu_buffer, 1);
@@ -2334,7 +2333,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
 	event = rb_iter_head_event(iter);
 
-	switch (event->type) {
+	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
 		if (rb_null_event(event)) {
 			rb_inc_iter(iter);
@@ -2393,7 +2392,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	event = rb_buffer_peek(buffer, cpu, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2421,7 +2420,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	event = rb_iter_peek(iter, ts);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2466,7 +2465,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  out:
 	preempt_enable();
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2559,7 +2558,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  out:
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-	if (event && event->type == RINGBUF_TYPE_PADDING) {
+	if (event && event->type_len == RINGBUF_TYPE_PADDING) {
 		cpu_relax();
 		goto again;
 	}
@@ -2766,7 +2765,7 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
 		if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
 			return;
 		/* Only count data entries */
-		if (event->type != RINGBUF_TYPE_DATA)
+		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
 		cpu_buffer->entries--;
 	}
-- 
cgit v1.2.3-70-g09d2


From 1cb81b143fa8f0e4629f10690862e2e52ca792ff Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 24 Apr 2009 09:51:43 +0200
Subject: x86, bts, mm: clean up buffer allocation

The current mm interface is asymetric. One function allocates a locked
buffer, another function only refunds the memory.

Change this to have two functions for accounting and refunding locked
memory, respectively; and do the actual buffer allocation in ptrace.

[ Impact: refactor BTS buffer allocation code ]

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090424095143.A30265@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ptrace.c | 39 ++++++++++++++++++++++++++-------------
 include/linux/mm.h       |  6 ++++--
 mm/mlock.c               | 36 +++++++++++++++++-------------------
 3 files changed, 47 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d5252ae6c52..09ecbde91c1 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -617,17 +617,28 @@ struct bts_context {
 	struct work_struct	work;
 };
 
-static inline void alloc_bts_buffer(struct bts_context *context,
-				    unsigned int size)
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
 {
-	void *buffer;
+	void *buffer = NULL;
+	int err = -ENOMEM;
 
-	buffer = alloc_locked_buffer(size);
-	if (buffer) {
-		context->buffer = buffer;
-		context->size = size;
-		context->mm = get_task_mm(current);
-	}
+	err = account_locked_memory(current->mm, current->signal->rlim, size);
+	if (err < 0)
+		return err;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		goto out_refund;
+
+	context->buffer = buffer;
+	context->size = size;
+	context->mm = get_task_mm(current);
+
+	return 0;
+
+ out_refund:
+	refund_locked_memory(current->mm, size);
+	return err;
 }
 
 static inline void free_bts_buffer(struct bts_context *context)
@@ -638,7 +649,7 @@ static inline void free_bts_buffer(struct bts_context *context)
 	kfree(context->buffer);
 	context->buffer = NULL;
 
-	refund_locked_buffer_memory(context->mm, context->size);
+	refund_locked_memory(context->mm, context->size);
 	context->size = 0;
 
 	mmput(context->mm);
@@ -786,13 +797,15 @@ static int ptrace_bts_config(struct task_struct *child,
 	context->tracer = NULL;
 
 	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+		int err;
+
 		free_bts_buffer(context);
 		if (!cfg.size)
 			return 0;
 
-		alloc_bts_buffer(context, cfg.size);
-		if (!context->buffer)
-			return -ENOMEM;
+		err = alloc_bts_buffer(context, cfg.size);
+		if (err < 0)
+			return err;
 	}
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a3963ba23a6..009eabd3c21 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@ struct anon_vma;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
+struct rlimit;
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -1319,7 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page,
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
-extern void *alloc_locked_buffer(size_t size);
-extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
+extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+				 size_t size);
+extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/mlock.c b/mm/mlock.c
index 28be15ead9c..ac130433c7d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -629,38 +629,36 @@ void user_shm_unlock(size_t size, struct user_struct *user)
 	free_uid(user);
 }
 
-void *alloc_locked_buffer(size_t size)
+int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+			  size_t size)
 {
-	unsigned long rlim, vm, pgsz;
-	void *buffer = NULL;
+	unsigned long lim, vm, pgsz;
+	int error = -ENOMEM;
 
 	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	down_write(&current->mm->mmap_sem);
-
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->total_vm + pgsz;
-	if (rlim < vm)
-		goto out;
+	down_write(&mm->mmap_sem);
 
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm   = current->mm->locked_vm + pgsz;
-	if (rlim < vm)
+	lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->total_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	buffer = kzalloc(size, GFP_KERNEL);
-	if (!buffer)
+	lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = mm->locked_vm + pgsz;
+	if (lim < vm)
 		goto out;
 
-	current->mm->total_vm  += pgsz;
-	current->mm->locked_vm += pgsz;
+	mm->total_vm  += pgsz;
+	mm->locked_vm += pgsz;
 
+	error = 0;
  out:
-	up_write(&current->mm->mmap_sem);
-	return buffer;
+	up_write(&mm->mmap_sem);
+	return error;
 }
 
-void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
+void refund_locked_memory(struct mm_struct *mm, size_t size)
 {
 	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-- 
cgit v1.2.3-70-g09d2


From 39517091f88fae32b52254b561ced78da1eaf0a7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 11:05:52 -0400
Subject: tracing/lockdep: convert lockdep to use TRACE_EVENT macro

The TRACE_FORMAT will soon be deprecated. This patch converts it to
the TRACE_EVENT macro.

Note, this change should also speed up the tracing.

[ Impact: remove a user of deprecated TRACE_FORMAT ]

Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/lockdep.h | 56 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
index 3ca315c1429..0e956c9dfd7 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lockdep.h
@@ -9,28 +9,64 @@
 
 #ifdef CONFIG_LOCKDEP
 
-TRACE_FORMAT(lock_acquire,
+TRACE_EVENT(lock_acquire,
+
 	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
 		int trylock, int read, int check,
 		struct lockdep_map *next_lock, unsigned long ip),
+
 	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-	TP_FMT("%s%s%s", trylock ? "try " : "",
-		read ? "read " : "", lock->name)
-	);
 
-TRACE_FORMAT(lock_release,
+	TP_STRUCT__entry(
+		__field(unsigned int, flags)
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+		  (__entry->flags & 2) ? "read " : "",
+		  __get_str(name))
+);
+
+TRACE_EVENT(lock_release,
+
 	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+
 	TP_ARGS(lock, nested, ip),
-	TP_FMT("%s", lock->name)
-	);
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
 
 #ifdef CONFIG_LOCK_STAT
 
-TRACE_FORMAT(lock_contended,
+TRACE_EVENT(lock_contended,
+
 	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
 	TP_ARGS(lock, ip),
-	TP_FMT("%s", lock->name)
-	);
+
+	TP_STRUCT__entry(
+		__string(name, lock->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, lock->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
 
 TRACE_EVENT(lock_acquired,
 	TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
-- 
cgit v1.2.3-70-g09d2


From 160031b556e93590fa8635210d73d93c3d3853a9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 11:26:55 -0400
Subject: tracing/irq: convert irq traces to use TRACE_EVENT macro

The TRACE_FORMAT will soon be deprecated. This patch converts it to
the TRACE_EVENT macro.

Note, this change should also speed up the tracing.

[ Impact: remove a user of deprecated TRACE_FORMAT ]

Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/irq.h | 57 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 75e3468e449..76868646751 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -10,11 +10,24 @@
 /*
  * Tracepoint for entry of interrupt handler:
  */
-TRACE_FORMAT(irq_handler_entry,
+TRACE_EVENT(irq_handler_entry,
+
 	TP_PROTO(int irq, struct irqaction *action),
+
 	TP_ARGS(irq, action),
-	TP_FMT("irq=%d handler=%s", irq, action->name)
-	);
+
+	TP_STRUCT__entry(
+		__field(	int,	irq		)
+		__string(	name,	action->name	)
+	),
+
+	TP_fast_assign(
+		__entry->irq = irq;
+		__assign_str(name, action->name);
+	),
+
+	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+);
 
 /*
  * Tracepoint for return of an interrupt handler:
@@ -39,17 +52,43 @@ TRACE_EVENT(irq_handler_exit,
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
-TRACE_FORMAT(softirq_entry,
+TRACE_EVENT(softirq_entry,
+
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
 	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
 
-TRACE_FORMAT(softirq_exit,
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+		__string(	name,	softirq_to_name[h-vec]	)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+		__assign_str(name, softirq_to_name[h-vec]);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+);
+
+TRACE_EVENT(softirq_exit,
+
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
 	TP_ARGS(h, vec),
-	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-	);
+
+	TP_STRUCT__entry(
+		__field(	int,	vec			)
+		__string(	name,	softirq_to_name[h-vec]	)
+	),
+
+	TP_fast_assign(
+		__entry->vec = (int)(h - vec);
+		__assign_str(name, softirq_to_name[h-vec]);
+	),
+
+	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+);
 
 #endif /*  _TRACE_IRQ_H */
 
-- 
cgit v1.2.3-70-g09d2


From b8e65554d80b4c560d201362d0e8fa02109d89fd Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 11:50:39 -0400
Subject: tracing: remove deprecated TRACE_FORMAT

The TRACE_FORMAT macro has been deprecated by the TRACE_EVENT macro.
There are no more users. All new users must use the TRACE_EVENT macro.

[ Impact: remove old functionality ]

Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   |  5 ----
 include/trace/define_trace.h |  4 ---
 include/trace/ftrace.h       | 66 --------------------------------------------
 3 files changed, 75 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 4353f3f7e62..14df7e635d4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -158,11 +158,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define PARAMS(args...) args
 
-#ifndef TRACE_FORMAT
-#define TRACE_FORMAT(name, proto, args, fmt)		\
-	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
-#endif
-
 #ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index abc611feeb8..f7a7ae1e8f9 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,10 +26,6 @@
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
 	DEFINE_TRACE(name)
 
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(name, proto, args, print)	\
-	DEFINE_TRACE(name)
-
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a77f71a46db..1e681142f1d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,9 +18,6 @@
 
 #include <linux/ftrace_event.h>
 
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
@@ -62,9 +59,6 @@
  *
  */
 
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
 #undef __array
 #define __array(type, item, len)
 
@@ -298,16 +292,6 @@ ftrace_define_fields_##call(void)					\
  *	unregister_trace_<call>(ftrace_event_<call>);
  * }
  *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *	.name			= "<call>",
- *	.regfunc		= ftrace_reg_event_<call>,
- *	.unregfunc		= ftrace_unreg_event_<call>,
- * }
- *
  *
  * For those macros defined with TRACE_EVENT:
  *
@@ -417,56 +401,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
 #define _TRACE_PROFILE_INIT(call)
 #endif
 
-#define _TRACE_FORMAT(call, proto, args, fmt)				\
-static void ftrace_event_##call(proto)					\
-{									\
-	event_trace_printk(_RET_IP_, #call ": " fmt);			\
-}									\
-									\
-static int ftrace_reg_event_##call(void)				\
-{									\
-	int ret;							\
-									\
-	ret = register_trace_##call(ftrace_event_##call);		\
-	if (ret)							\
-		pr_info("event trace: Could not activate trace point "	\
-			"probe to " #call "\n");			\
-	return ret;							\
-}									\
-									\
-static void ftrace_unreg_event_##call(void)				\
-{									\
-	unregister_trace_##call(ftrace_event_##call);			\
-}									\
-									\
-static struct ftrace_event_call event_##call;				\
-									\
-static int ftrace_init_event_##call(void)				\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(NULL);				\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	return 0;							\
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)				\
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
-	.name			= #call,				\
-	.system			= __stringify(TRACE_SYSTEM),		\
-	.raw_init		= ftrace_init_event_##call,		\
-	.regfunc		= ftrace_reg_event_##call,		\
-	.unregfunc		= ftrace_unreg_event_##call,		\
-	_TRACE_PROFILE_INIT(call)					\
-}
-
 #undef __entry
 #define __entry entry
 
-- 
cgit v1.2.3-70-g09d2


From 7629ad24f2b3df95c8b4cd8869e3c04e1df6c442 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Fri, 24 Apr 2009 16:37:44 +0200
Subject: ASoC: add SOC_DOUBLE_EXT macro

Add a macro for double controls with special callback functions.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index b1f2f8819fe..6ab80bf7abd 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -118,6 +118,14 @@
 	.info = snd_soc_info_volsw, \
 	.get = xhandler_get, .put = xhandler_put, \
 	.private_value = SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) }
+#define SOC_DOUBLE_EXT(xname, xreg, shift_left, shift_right, xmax, xinvert,\
+	 xhandler_get, xhandler_put) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\
+	.info = snd_soc_info_volsw, \
+	.get = xhandler_get, .put = xhandler_put, \
+	.private_value = (unsigned long)&(struct soc_mixer_control) \
+		{.reg = xreg, .shift = shift_left, .rshift = shift_right, \
+		 .max = xmax, .invert = xinvert} }
 #define SOC_SINGLE_EXT_TLV(xname, xreg, xshift, xmax, xinvert,\
 	 xhandler_get, xhandler_put, tlv_array) \
 {	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
-- 
cgit v1.2.3-70-g09d2


From a9e61e25f9d2e7e43bf17625f5cb56c9e0a89b17 Mon Sep 17 00:00:00 2001
From: Felix Blyakher <felixb@sgi.com>
Date: Tue, 31 Mar 2009 15:12:56 -0500
Subject: lockd: call locks_release_private to cleanup per-filesystem state

For every lock request lockd creates a new file_lock object
in nlmsvc_setgrantargs() by copying the passed in file_lock with
locks_copy_lock(). A filesystem can attach it's own lock_operations
vector to the file_lock. It has to be cleaned up at the end of the
file_lock's life. However, lockd doesn't do it today, yet it
asserts in nlmclnt_release_lockargs() that the per-filesystem
state is clean.
This patch fixes it by exporting locks_release_private() and adding
it to nlmsvc_freegrantargs(), to be symmetrical to creating a
file_lock in nlmsvc_setgrantargs().

Signed-off-by: Felix Blyakher <felixb@sgi.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/svclock.c | 2 ++
 fs/locks.c         | 3 ++-
 include/linux/fs.h | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 83ee34203bd..e577a78d7ba 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -326,6 +326,8 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
 {
 	if (call->a_args.lock.oh.data != call->a_owner)
 		kfree(call->a_args.lock.oh.data);
+
+	locks_release_private(&call->a_args.lock.fl);
 }
 
 /*
diff --git a/fs/locks.c b/fs/locks.c
index ec3deea29e3..b6440f52178 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -151,7 +151,7 @@ static struct file_lock *locks_alloc_lock(void)
 	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
 
-static void locks_release_private(struct file_lock *fl)
+void locks_release_private(struct file_lock *fl)
 {
 	if (fl->fl_ops) {
 		if (fl->fl_ops->fl_release_private)
@@ -165,6 +165,7 @@ static void locks_release_private(struct file_lock *fl)
 	}
 
 }
+EXPORT_SYMBOL_GPL(locks_release_private);
 
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bed436f435..5ba615e8f53 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1108,6 +1108,7 @@ extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
+extern void locks_release_private(struct file_lock *);
 extern void posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
-- 
cgit v1.2.3-70-g09d2


From 060fa5c83e67901ba47ab484cfcdb32737d630ba Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 24 Apr 2009 12:20:52 -0400
Subject: tracing/events: reuse trace event ids after overflow

With modules being able to add trace events, and the max trace event
counter is 16 bits (65536) we can overflow the counter easily
with a simple while loop adding and removing modules that contain
trace events.

This patch links together the registered trace events and on overflow
searches for available trace event ids. It will still fail if
over 65536 events are registered, but considering that a typical
kernel only has 22000 functions, 65000 events should be sufficient.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  1 +
 kernel/trace/trace_output.c  | 71 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 61 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 07e0a6d64a2..78a9ba24cbf 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -56,6 +56,7 @@ typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
 					      int flags);
 struct trace_event {
 	struct hlist_node	node;
+	struct list_head	list;
 	int			type;
 	trace_print_func	trace;
 	trace_print_func	raw;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 06997e75114..5fc51f0f75f 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -483,6 +483,36 @@ struct trace_event *ftrace_find_event(int type)
 	return NULL;
 }
 
+static LIST_HEAD(ftrace_event_list);
+
+static int trace_search_list(struct list_head **list)
+{
+	struct trace_event *e;
+	int last = __TRACE_LAST_TYPE;
+
+	if (list_empty(&ftrace_event_list)) {
+		*list = &ftrace_event_list;
+		return last + 1;
+	}
+
+	/*
+	 * We used up all possible max events,
+	 * lets see if somebody freed one.
+	 */
+	list_for_each_entry(e, &ftrace_event_list, list) {
+		if (e->type != last + 1)
+			break;
+		last++;
+	}
+
+	/* Did we used up all 65 thousand events??? */
+	if ((last + 1) > FTRACE_MAX_EVENT)
+		return 0;
+
+	*list = &e->list;
+	return last + 1;
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -505,20 +535,40 @@ int register_ftrace_event(struct trace_event *event)
 
 	mutex_lock(&trace_event_mutex);
 
-	if (!event) {
-		ret = next_event_type++;
+	if (WARN_ON(!event))
 		goto out;
-	}
 
-	if (!event->type)
-		event->type = next_event_type++;
-	else if (event->type > __TRACE_LAST_TYPE) {
+	INIT_LIST_HEAD(&event->list);
+
+	if (!event->type) {
+		struct list_head *list;
+
+		if (next_event_type > FTRACE_MAX_EVENT) {
+
+			event->type = trace_search_list(&list);
+			if (!event->type)
+				goto out;
+
+		} else {
+			
+			event->type = next_event_type++;
+			list = &ftrace_event_list;
+		}
+
+		if (WARN_ON(ftrace_find_event(event->type)))
+			goto out;
+
+		list_add_tail(&event->list, list);
+
+	} else if (event->type > __TRACE_LAST_TYPE) {
 		printk(KERN_WARNING "Need to add type to trace.h\n");
 		WARN_ON(1);
-	}
-
-	if (ftrace_find_event(event->type))
 		goto out;
+	} else {
+		/* Is this event already used */
+		if (ftrace_find_event(event->type))
+			goto out;
+	}
 
 	if (event->trace == NULL)
 		event->trace = trace_nop_print;
@@ -537,8 +587,6 @@ int register_ftrace_event(struct trace_event *event)
  out:
 	mutex_unlock(&trace_event_mutex);
 
-	WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_ftrace_event);
@@ -551,6 +599,7 @@ int unregister_ftrace_event(struct trace_event *event)
 {
 	mutex_lock(&trace_event_mutex);
 	hlist_del(&event->node);
+	list_del(&event->list);
 	mutex_unlock(&trace_event_mutex);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 92ae3efa53b481ad669d8b85c6cfa37d774bb21e Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Mon, 27 Apr 2009 02:35:32 -0700
Subject: ipv4: remove unused member in fib_table.

This patch removes an unused parameter (tb_stamp) from fib_table
structure in include/net/ip_fib.h.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 8b12667f7a2..34855eede5f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -145,7 +145,6 @@ struct fib_result_nl {
 struct fib_table {
 	struct hlist_node tb_hlist;
 	u32		tb_id;
-	unsigned	tb_stamp;
 	int		tb_default;
 	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
 	int		(*tb_insert)(struct fib_table *, struct fib_config *);
-- 
cgit v1.2.3-70-g09d2


From edf391ff17232f097d72441c9ad467bcb3b5db18 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 27 Apr 2009 02:45:02 -0700
Subject: snmp: add missing counters for RFC 4293

The IP MIB (RFC 4293) defines stats for InOctets, OutOctets, InMcastOctets and
OutMcastOctets:
http://tools.ietf.org/html/rfc4293
But it seems we don't track those in any way that easy to separate from other
protocols.  This patch adds those missing counters to the stats file. Tested
successfully by me

With help from Eric Dumazet.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h  | 10 ++++++++--
 include/net/ip.h      |  3 +++
 include/net/ipv6.h    | 15 ++++++++++++++-
 include/net/snmp.h    | 19 ++++++++++++++++++-
 net/ipv4/ip_input.c   | 13 ++++++++-----
 net/ipv4/ip_output.c  | 12 ++++++------
 net/ipv4/proc.c       | 10 ++++++++--
 net/ipv6/ip6_input.c  |  7 ++++---
 net/ipv6/ip6_output.c |  9 +++++----
 net/ipv6/mcast.c      | 19 ++++++++++++-------
 net/ipv6/ndisc.c      |  4 ++--
 net/ipv6/proc.c       | 10 ++++++++--
 net/ipv6/raw.c        |  2 +-
 13 files changed, 97 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index aee3f1e1d1c..0f953fe4041 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -18,7 +18,7 @@
 enum
 {
 	IPSTATS_MIB_NUM = 0,
-	IPSTATS_MIB_INRECEIVES,			/* InReceives */
+	IPSTATS_MIB_INPKTS,			/* InReceives */
 	IPSTATS_MIB_INHDRERRORS,		/* InHdrErrors */
 	IPSTATS_MIB_INTOOBIGERRORS,		/* InTooBigErrors */
 	IPSTATS_MIB_INNOROUTES,			/* InNoRoutes */
@@ -28,7 +28,7 @@ enum
 	IPSTATS_MIB_INDISCARDS,			/* InDiscards */
 	IPSTATS_MIB_INDELIVERS,			/* InDelivers */
 	IPSTATS_MIB_OUTFORWDATAGRAMS,		/* OutForwDatagrams */
-	IPSTATS_MIB_OUTREQUESTS,		/* OutRequests */
+	IPSTATS_MIB_OUTPKTS,			/* OutRequests */
 	IPSTATS_MIB_OUTDISCARDS,		/* OutDiscards */
 	IPSTATS_MIB_OUTNOROUTES,		/* OutNoRoutes */
 	IPSTATS_MIB_REASMTIMEOUT,		/* ReasmTimeout */
@@ -42,6 +42,12 @@ enum
 	IPSTATS_MIB_OUTMCASTPKTS,		/* OutMcastPkts */
 	IPSTATS_MIB_INBCASTPKTS,		/* InBcastPkts */
 	IPSTATS_MIB_OUTBCASTPKTS,		/* OutBcastPkts */
+	IPSTATS_MIB_INOCTETS,			/* InOctets */
+	IPSTATS_MIB_OUTOCTETS,			/* OutOctets */
+	IPSTATS_MIB_INMCASTOCTETS,		/* InMcastOctets */
+	IPSTATS_MIB_OUTMCASTOCTETS,		/* OutMcastOctets */
+	IPSTATS_MIB_INBCASTOCTETS,		/* InBcastOctets */
+	IPSTATS_MIB_OUTBCASTOCTETS,		/* OutBcastOctets */
 	__IPSTATS_MIB_MAX
 };
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 4ac7577f98d..72c36926c26 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -168,7 +168,10 @@ struct ipv4_config
 extern struct ipv4_config ipv4_config;
 #define IP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.ip_statistics, field)
 #define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.ip_statistics, field)
+#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.ip_statistics, field, val)
 #define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_INC_STATS_USER(net, field) 	SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c1f16fc49ad..f27fd83d67d 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -126,15 +126,28 @@ extern struct ctl_path net_ipv6_ctl_path[];
 	SNMP_ADD_STATS##modifier((net)->mib.statname##_statistics, (field), (val));\
 })
 
+#define _DEVUPD(net, statname, modifier, idev, field, val)		\
+({									\
+	struct inet6_dev *_idev = (idev);				\
+	if (likely(_idev != NULL))					\
+		SNMP_UPD_PO_STATS##modifier((_idev)->stats.statname, field, (val)); \
+	SNMP_UPD_PO_STATS##modifier((net)->mib.statname##_statistics, field, (val));\
+})
+
 /* MIBs */
 
 #define IP6_INC_STATS(net, idev,field)		\
 		_DEVINC(net, ipv6, , idev, field)
 #define IP6_INC_STATS_BH(net, idev,field)	\
 		_DEVINC(net, ipv6, _BH, idev, field)
+#define IP6_ADD_STATS(net, idev,field,val)	\
+		_DEVADD(net, ipv6, , idev, field, val)
 #define IP6_ADD_STATS_BH(net, idev,field,val)	\
 		_DEVADD(net, ipv6, _BH, idev, field, val)
-
+#define IP6_UPD_PO_STATS(net, idev,field,val)   \
+		_DEVUPD(net, ipv6, , idev, field, val)
+#define IP6_UPD_PO_STATS_BH(net, idev,field,val)   \
+		_DEVUPD(net, ipv6, _BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)	\
 		_DEVINC(net, icmpv6, , idev, field)
 #define ICMP6_INC_STATS_BH(net, idev, field)	\
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 57c93628695..8c842e06bec 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -153,6 +153,11 @@ struct linux_xfrm_mib {
 		per_cpu_ptr(mib[!in_softirq()], get_cpu())->mibs[field]--; \
 		put_cpu(); \
 	} while (0)
+#define SNMP_ADD_STATS(mib, field, addend) 	\
+	do { \
+		per_cpu_ptr(mib[!in_softirq()], get_cpu())->mibs[field] += addend; \
+		put_cpu(); \
+	} while (0)
 #define SNMP_ADD_STATS_BH(mib, field, addend) 	\
 	(per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field] += addend)
 #define SNMP_ADD_STATS_USER(mib, field, addend) 	\
@@ -160,5 +165,17 @@ struct linux_xfrm_mib {
 		per_cpu_ptr(mib[1], get_cpu())->mibs[field] += addend; \
 		put_cpu(); \
 	} while (0)
-
+#define SNMP_UPD_PO_STATS(mib, basefield, addend)	\
+	do { \
+		__typeof__(mib[0]) ptr = per_cpu_ptr(mib[!in_softirq()], get_cpu());\
+		ptr->mibs[basefield##PKTS]++; \
+		ptr->mibs[basefield##OCTETS] += addend;\
+		put_cpu(); \
+	} while (0)
+#define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
+	do { \
+		__typeof__(mib[0]) ptr = per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id());\
+		ptr->mibs[basefield##PKTS]++; \
+		ptr->mibs[basefield##OCTETS] += addend;\
+	} while (0)
 #endif
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1a58a6fa1dc..40f6206b2aa 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -358,10 +358,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
 		goto drop;
 
 	rt = skb->rtable;
-	if (rt->rt_type == RTN_MULTICAST)
-		IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS);
-	else if (rt->rt_type == RTN_BROADCAST)
-		IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS);
+	if (rt->rt_type == RTN_MULTICAST) {
+		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
+				skb->len);
+	} else if (rt->rt_type == RTN_BROADCAST)
+		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
+				skb->len);
 
 	return dst_input(skb);
 
@@ -384,7 +386,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto drop;
 
-	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
+
+	IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
 		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e7e910c7c0..ea19c37ccc0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -181,10 +181,10 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 
-	if (rt->rt_type == RTN_MULTICAST)
-		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
-	else if (rt->rt_type == RTN_BROADCAST)
-		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
+	if (rt->rt_type == RTN_MULTICAST) {
+		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
+	} else if (rt->rt_type == RTN_BROADCAST)
+		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
 
 	/* Be paranoid, rather than too clever. */
 	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -244,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb)
 	/*
 	 *	If the indicated interface is up and running, send the packet.
 	 */
-	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
+	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -298,7 +298,7 @@ int ip_output(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dst->dev;
 
-	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
+	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cf0cdeeb1db..f25542c48b7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,14 +90,14 @@ static const struct file_operations sockstat_seq_fops = {
 
 /* snmp items */
 static const struct snmp_mib snmp4_ipstats_list[] = {
-	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
+	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS),
 	SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS),
 	SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS),
 	SNMP_MIB_ITEM("ForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
 	SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
 	SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS),
 	SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS),
-	SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS),
+	SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS),
 	SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS),
 	SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
 	SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -118,6 +118,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
 	SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
 	SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS),
 	SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS),
+	SNMP_MIB_ITEM("InOctets", IPSTATS_MIB_INOCTETS),
+	SNMP_MIB_ITEM("OutOctets", IPSTATS_MIB_OUTOCTETS),
+	SNMP_MIB_ITEM("InMcastOctets", IPSTATS_MIB_INMCASTOCTETS),
+	SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
+	SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
+	SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 8f04bd9da27..bc1a920c34a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -70,7 +70,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 
 	idev = __in6_dev_get(skb->dev);
 
-	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INRECEIVES);
+	IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
 
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
 	    !idev || unlikely(idev->cnf.disable_ipv6)) {
@@ -242,8 +242,9 @@ int ip6_mc_input(struct sk_buff *skb)
 	struct ipv6hdr *hdr;
 	int deliver;
 
-	IP6_INC_STATS_BH(dev_net(skb->dst->dev),
-			 ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
+	IP6_UPD_PO_STATS_BH(dev_net(skb->dst->dev),
+			 ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCAST,
+			 skb->len);
 
 	hdr = ipv6_hdr(skb);
 	deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9fb49c3b518..735a2bf4b5f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -159,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb)
 			}
 		}
 
-		IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
+				skb->len);
 	}
 
 	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
@@ -275,8 +276,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 
 	mtu = dst_mtu(dst);
 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
-		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
-			      IPSTATS_MIB_OUTREQUESTS);
+		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst),
+			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 				dst_output);
 	}
@@ -1516,7 +1517,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->mark = sk->sk_mark;
 
 	skb->dst = dst_clone(&rt->u.dst);
-	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a51fb33e686..4b48819a5b8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1449,7 +1449,8 @@ static void mld_sendpack(struct sk_buff *skb)
 	int err;
 	struct flowi fl;
 
-	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+
 	payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
 	mldlen = skb->tail - skb->transport_header;
 	pip6->payload_len = htons(payload_len);
@@ -1473,13 +1474,15 @@ static void mld_sendpack(struct sk_buff *skb)
 	if (err)
 		goto err_out;
 
+	payload_len = skb->len;
+
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
 	} else
 		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
@@ -1773,10 +1776,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 		     IPV6_TLV_PADN, 0 };
 	struct flowi fl;
 
-	rcu_read_lock();
-	IP6_INC_STATS(net, __in6_dev_get(dev),
-		      IPSTATS_MIB_OUTREQUESTS);
-	rcu_read_unlock();
 	if (type == ICMPV6_MGM_REDUCTION)
 		snd_addr = &in6addr_linklocal_allrouters;
 	else
@@ -1786,6 +1785,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	payload_len = len + sizeof(ra);
 	full_len = sizeof(struct ipv6hdr) + payload_len;
 
+	rcu_read_lock();
+	IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
+		      IPSTATS_MIB_OUT, full_len);
+	rcu_read_unlock();
+
 	skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
 
 	if (skb == NULL) {
@@ -1838,13 +1842,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	if (err)
 		goto err_out;
 
+
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS);
+		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9f061d1adbc..ab65cc51b00 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -533,7 +533,7 @@ void ndisc_send_skb(struct sk_buff *skb,
 	skb->dst = dst;
 
 	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 		      dst_output);
@@ -1613,7 +1613,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	buff->dst = dst;
 	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 97c17fdd6f7..590ddefb7ff 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -61,7 +61,7 @@ static const struct file_operations sockstat6_seq_fops = {
 
 static struct snmp_mib snmp6_ipstats_list[] = {
 /* ipv6 mib according to RFC 2465 */
-	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES),
+	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
 	SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
 	SNMP_MIB_ITEM("Ip6InTooBigErrors", IPSTATS_MIB_INTOOBIGERRORS),
 	SNMP_MIB_ITEM("Ip6InNoRoutes", IPSTATS_MIB_INNOROUTES),
@@ -71,7 +71,7 @@ static struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
 	SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
 	SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
-	SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
+	SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS),
 	SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
 	SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
 	SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -83,6 +83,12 @@ static struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_ITEM("Ip6FragCreates", IPSTATS_MIB_FRAGCREATES),
 	SNMP_MIB_ITEM("Ip6InMcastPkts", IPSTATS_MIB_INMCASTPKTS),
 	SNMP_MIB_ITEM("Ip6OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
+	SNMP_MIB_ITEM("Ip6InOctets", IPSTATS_MIB_INOCTETS),
+	SNMP_MIB_ITEM("Ip6OutOctets", IPSTATS_MIB_OUTOCTETS),
+	SNMP_MIB_ITEM("Ip6InMcastOctets", IPSTATS_MIB_INMCASTOCTETS),
+	SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
+	SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
+	SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 61f6827e590..e99307fba0b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -638,7 +638,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	if (err)
 		goto error_fault;
 
-	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
+	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
 		      dst_output);
 	if (err > 0)
-- 
cgit v1.2.3-70-g09d2


From 739649c53d7f78f5bf41bdfd1a858ee90c7a687a Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 25 Apr 2009 12:52:40 +0000
Subject: of: add of_parse_phandle() helper for parsing phandle properties

of_parse_phandle() is a helper function to read and parse a phandle
property and return a pointer to the resulting device_node.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/base.c  | 24 ++++++++++++++++++++++++
 include/linux/of.h |  3 +++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 41c5dfd8535..ddf224d456b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -494,6 +494,30 @@ int of_modalias_node(struct device_node *node, char *modalias, int len)
 }
 EXPORT_SYMBOL_GPL(of_modalias_node);
 
+/**
+ * of_parse_phandle - Resolve a phandle property to a device_node pointer
+ * @np: Pointer to device node holding phandle property
+ * @phandle_name: Name of property holding a phandle value
+ * @index: For properties holding a table of phandles, this is the index into
+ *         the table
+ *
+ * Returns the device_node pointer with refcount incremented.  Use
+ * of_node_put() on it when done.
+ */
+struct device_node *
+of_parse_phandle(struct device_node *np, const char *phandle_name, int index)
+{
+	const phandle *phandle;
+	int size;
+
+	phandle = of_get_property(np, phandle_name, &size);
+	if ((!phandle) || (size < sizeof(*phandle) * (index + 1)))
+		return NULL;
+
+	return of_find_node_by_phandle(phandle[index]);
+}
+EXPORT_SYMBOL(of_parse_phandle);
+
 /**
  * of_parse_phandles_with_args - Find a node pointed by phandle in a list
  * @np:		pointer to a device tree node containing a list
diff --git a/include/linux/of.h b/include/linux/of.h
index 6a7efa242f5..7be2d1043c1 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -77,6 +77,9 @@ extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
 extern int of_modalias_node(struct device_node *node, char *modalias, int len);
+extern struct device_node *of_parse_phandle(struct device_node *np,
+					    const char *phandle_name,
+					    int index);
 extern int of_parse_phandles_with_args(struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct device_node **out_node, const void **out_args);
-- 
cgit v1.2.3-70-g09d2


From 4dea547fef1ba23f9d23f5e7f5218187a7dcf1b3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 25 Apr 2009 12:52:46 +0000
Subject: phylib: rework to prepare for OF registration of PHYs

This patch makes changes in preparation for supporting open firmware
device tree descriptions of MDIO busses.  Changes include:
- Cleanup handling of phy_map[] entries; they are already NULLed when
  registering and so don't need to be re-cleared, and it is good practice
  to clear them out when unregistering.
- Split phy_device registration out into a new function so that the
  OF helpers can do two stage registration (separate allocation and
  registration steps).

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 29 +++-------------------------
 drivers/net/phy/phy_device.c | 45 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/phy.h          |  1 +
 3 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index b754020cbe7..bd4e8d72dc0 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -113,7 +113,6 @@ int mdiobus_register(struct mii_bus *bus)
 		bus->reset(bus);
 
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
-		bus->phy_map[i] = NULL;
 		if ((bus->phy_mask & (1 << i)) == 0) {
 			struct phy_device *phydev;
 
@@ -150,6 +149,7 @@ void mdiobus_unregister(struct mii_bus *bus)
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
 		if (bus->phy_map[i])
 			device_unregister(&bus->phy_map[i]->dev);
+		bus->phy_map[i] = NULL;
 	}
 }
 EXPORT_SYMBOL(mdiobus_unregister);
@@ -188,35 +188,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
 	if (IS_ERR(phydev) || phydev == NULL)
 		return phydev;
 
-	/* There's a PHY at this address
-	 * We need to set:
-	 * 1) IRQ
-	 * 2) bus_id
-	 * 3) parent
-	 * 4) bus
-	 * 5) mii_bus
-	 * And, we need to register it */
-
-	phydev->irq = bus->irq != NULL ? bus->irq[addr] : PHY_POLL;
-
-	phydev->dev.parent = bus->parent;
-	phydev->dev.bus = &mdio_bus_type;
-	dev_set_name(&phydev->dev, PHY_ID_FMT, bus->id, addr);
-
-	phydev->bus = bus;
-
-	/* Run all of the fixups for this PHY */
-	phy_scan_fixups(phydev);
-
-	err = device_register(&phydev->dev);
+	err = phy_device_register(phydev);
 	if (err) {
-		printk(KERN_ERR "phy %d failed to register\n", addr);
 		phy_device_free(phydev);
-		phydev = NULL;
+		return NULL;
 	}
 
-	bus->phy_map[addr] = phydev;
-
 	return phydev;
 }
 EXPORT_SYMBOL(mdiobus_scan);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0a06e4fd37d..9352ca8fa2c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -39,20 +39,21 @@ MODULE_DESCRIPTION("PHY library");
 MODULE_AUTHOR("Andy Fleming");
 MODULE_LICENSE("GPL");
 
-static struct phy_driver genphy_driver;
-extern int mdio_bus_init(void);
-extern void mdio_bus_exit(void);
-
 void phy_device_free(struct phy_device *phydev)
 {
 	kfree(phydev);
 }
+EXPORT_SYMBOL(phy_device_free);
 
 static void phy_device_release(struct device *dev)
 {
 	phy_device_free(to_phy_device(dev));
 }
 
+static struct phy_driver genphy_driver;
+extern int mdio_bus_init(void);
+extern void mdio_bus_exit(void);
+
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
@@ -166,6 +167,10 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	dev->addr = addr;
 	dev->phy_id = phy_id;
 	dev->bus = bus;
+	dev->dev.parent = bus->parent;
+	dev->dev.bus = &mdio_bus_type;
+	dev->irq = bus->irq != NULL ? bus->irq[addr] : PHY_POLL;
+	dev_set_name(&dev->dev, PHY_ID_FMT, bus->id, addr);
 
 	dev->state = PHY_DOWN;
 
@@ -235,6 +240,38 @@ struct phy_device * get_phy_device(struct mii_bus *bus, int addr)
 
 	return dev;
 }
+EXPORT_SYMBOL(get_phy_device);
+
+/**
+ * phy_device_register - Register the phy device on the MDIO bus
+ * @phy_device: phy_device structure to be added to the MDIO bus
+ */
+int phy_device_register(struct phy_device *phydev)
+{
+	int err;
+
+	/* Don't register a phy if one is already registered at this
+	 * address */
+	if (phydev->bus->phy_map[phydev->addr])
+		return -EINVAL;
+	phydev->bus->phy_map[phydev->addr] = phydev;
+
+	/* Run all of the fixups for this PHY */
+	phy_scan_fixups(phydev);
+
+	err = device_register(&phydev->dev);
+	if (err) {
+		pr_err("phy %d failed to register\n", phydev->addr);
+		goto out;
+	}
+
+	return 0;
+
+ out:
+	phydev->bus->phy_map[phydev->addr] = NULL;
+	return err;
+}
+EXPORT_SYMBOL(phy_device_register);
 
 /**
  * phy_prepare_link - prepares the PHY layer to monitor link status
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 97e40cb6b58..bf0b5f112dc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -444,6 +444,7 @@ static inline int phy_write(struct phy_device *phydev, u16 regnum, u16 val)
 
 int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id);
 struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
+int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
 struct phy_device * phy_attach(struct net_device *dev,
-- 
cgit v1.2.3-70-g09d2


From fa94f6d93c5382810ff41f010f12ca8698fc775e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 25 Apr 2009 12:52:51 +0000
Subject: phylib: add *_direct() variants of phy_connect and phy_attach
 functions

Add phy_connect_direct() and phy_attach_direct() functions so that
drivers can use a pointer to the phy_device instead of trying to determine
the phy's bus_id string.

This patch is useful for OF device tree descriptions of phy devices where
the driver doesn't need or know what the bus_id value in order to get a
phy_device pointer.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 118 +++++++++++++++++++++++++++++++------------
 include/linux/phy.h          |   5 ++
 2 files changed, 90 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9352ca8fa2c..a2ece89622d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -291,6 +291,33 @@ void phy_prepare_link(struct phy_device *phydev,
 	phydev->adjust_link = handler;
 }
 
+/**
+ * phy_connect_direct - connect an ethernet device to a specific phy_device
+ * @dev: the network device to connect
+ * @phydev: the pointer to the phy device
+ * @handler: callback function for state change notifications
+ * @flags: PHY device's dev_flags
+ * @interface: PHY device's interface
+ */
+int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
+		       void (*handler)(struct net_device *), u32 flags,
+		       phy_interface_t interface)
+{
+	int rc;
+
+	rc = phy_attach_direct(dev, phydev, flags, interface);
+	if (rc)
+		return rc;
+
+	phy_prepare_link(phydev, handler);
+	phy_start_machine(phydev, NULL);
+	if (phydev->irq > 0)
+		phy_start_interrupts(phydev);
+
+	return 0;
+}
+EXPORT_SYMBOL(phy_connect_direct);
+
 /**
  * phy_connect - connect an ethernet device to a PHY device
  * @dev: the network device to connect
@@ -312,18 +339,21 @@ struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
 		phy_interface_t interface)
 {
 	struct phy_device *phydev;
+	struct device *d;
+	int rc;
 
-	phydev = phy_attach(dev, bus_id, flags, interface);
-
-	if (IS_ERR(phydev))
-		return phydev;
-
-	phy_prepare_link(phydev, handler);
-
-	phy_start_machine(phydev, NULL);
+	/* Search the list of PHY devices on the mdio bus for the
+	 * PHY with the requested name */
+	d = bus_find_device_by_name(&mdio_bus_type, NULL, bus_id);
+	if (!d) {
+		pr_err("PHY %s not found\n", bus_id);
+		return ERR_PTR(-ENODEV);
+	}
+	phydev = to_phy_device(d);
 
-	if (phydev->irq > 0)
-		phy_start_interrupts(phydev);
+	rc = phy_connect_direct(dev, phydev, handler, flags, interface);
+	if (rc)
+		return ERR_PTR(rc);
 
 	return phydev;
 }
@@ -347,9 +377,9 @@ void phy_disconnect(struct phy_device *phydev)
 EXPORT_SYMBOL(phy_disconnect);
 
 /**
- * phy_attach - attach a network device to a particular PHY device
+ * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
- * @bus_id: PHY device to attach
+ * @phydev: Pointer to phy_device to attach
  * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  *
@@ -360,22 +390,10 @@ EXPORT_SYMBOL(phy_disconnect);
  *     the attaching device, and given a callback for link status
  *     change.  The phy_device is returned to the attaching driver.
  */
-struct phy_device *phy_attach(struct net_device *dev,
-		const char *bus_id, u32 flags, phy_interface_t interface)
+int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
+		      u32 flags, phy_interface_t interface)
 {
-	struct bus_type *bus = &mdio_bus_type;
-	struct phy_device *phydev;
-	struct device *d;
-
-	/* Search the list of PHY devices on the mdio bus for the
-	 * PHY with the requested name */
-	d = bus_find_device_by_name(bus, NULL, bus_id);
-	if (d) {
-		phydev = to_phy_device(d);
-	} else {
-		printk(KERN_ERR "%s not found\n", bus_id);
-		return ERR_PTR(-ENODEV);
-	}
+	struct device *d = &phydev->dev;
 
 	/* Assume that if there is no driver, that it doesn't
 	 * exist, and we should use the genphy driver. */
@@ -388,13 +406,12 @@ struct phy_device *phy_attach(struct net_device *dev,
 			err = device_bind_driver(d);
 
 		if (err)
-			return ERR_PTR(err);
+			return err;
 	}
 
 	if (phydev->attached_dev) {
-		printk(KERN_ERR "%s: %s already attached\n",
-				dev->name, bus_id);
-		return ERR_PTR(-EBUSY);
+		dev_err(&dev->dev, "PHY already attached\n");
+		return -EBUSY;
 	}
 
 	phydev->attached_dev = dev;
@@ -412,13 +429,48 @@ struct phy_device *phy_attach(struct net_device *dev,
 		err = phy_scan_fixups(phydev);
 
 		if (err < 0)
-			return ERR_PTR(err);
+			return err;
 
 		err = phydev->drv->config_init(phydev);
 
 		if (err < 0)
-			return ERR_PTR(err);
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(phy_attach_direct);
+
+/**
+ * phy_attach - attach a network device to a particular PHY device
+ * @dev: network device to attach
+ * @bus_id: Bus ID of PHY device to attach
+ * @flags: PHY device's dev_flags
+ * @interface: PHY device's interface
+ *
+ * Description: Same as phy_attach_direct() except that a PHY bus_id
+ *     string is passed instead of a pointer to a struct phy_device.
+ */
+struct phy_device *phy_attach(struct net_device *dev,
+		const char *bus_id, u32 flags, phy_interface_t interface)
+{
+	struct bus_type *bus = &mdio_bus_type;
+	struct phy_device *phydev;
+	struct device *d;
+	int rc;
+
+	/* Search the list of PHY devices on the mdio bus for the
+	 * PHY with the requested name */
+	d = bus_find_device_by_name(bus, NULL, bus_id);
+	if (!d) {
+		pr_err("PHY %s not found\n", bus_id);
+		return ERR_PTR(-ENODEV);
 	}
+	phydev = to_phy_device(d);
+
+	rc = phy_attach_direct(dev, phydev, flags, interface);
+	if (rc)
+		return ERR_PTR(rc);
 
 	return phydev;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index bf0b5f112dc..c216e4e503b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -447,8 +447,13 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
+		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
 		const char *bus_id, u32 flags, phy_interface_t interface);
+int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
+		void (*handler)(struct net_device *), u32 flags,
+		phy_interface_t interface);
 struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
 		void (*handler)(struct net_device *), u32 flags,
 		phy_interface_t interface);
-- 
cgit v1.2.3-70-g09d2


From 8bc487d150b939e69830c39322df4ee486efe381 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 25 Apr 2009 12:52:56 +0000
Subject: openfirmware: Add OF phylib support code

Add support for parsing the device tree for PHY devices on an MDIO bus.
Currently many of the PowerPC ethernet drivers are open coding a solution
for reading data out of the device tree to find the correct PHY device.
This patch implements a set of common routines to:

a) let MDIO bus drivers register phy_devices described in the tree, and
b) let MAC drivers find the correct phy_device via the tree.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/Kconfig      |   6 +++
 drivers/of/Makefile     |   1 +
 drivers/of/of_mdio.c    | 139 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_mdio.h |  22 ++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 drivers/of/of_mdio.c
 create mode 100644 include/linux/of_mdio.h

(limited to 'include')

diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index f821dbc952a..6fe043bd377 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -19,3 +19,9 @@ config OF_SPI
 	depends on OF && PPC_OF && SPI
 	help
 	  OpenFirmware SPI accessors
+
+config OF_MDIO
+	def_tristate PHYLIB
+	depends on OF && PHYLIB
+	help
+	  OpenFirmware MDIO bus (Ethernet PHY) accessors
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 4c3c6f8e36f..bdfb5f5d4b0 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
 obj-$(CONFIG_OF_SPI)	+= of_spi.o
+obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
new file mode 100644
index 00000000000..aee967d7f76
--- /dev/null
+++ b/drivers/of/of_mdio.c
@@ -0,0 +1,139 @@
+/*
+ * OF helpers for the MDIO (Ethernet PHY) API
+ *
+ * Copyright (c) 2009 Secret Lab Technologies, Ltd.
+ *
+ * This file is released under the GPLv2
+ *
+ * This file provides helper functions for extracting PHY device information
+ * out of the OpenFirmware device tree and using it to populate an mii_bus.
+ */
+
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_LICENSE("GPL");
+
+/**
+ * of_mdiobus_register - Register mii_bus and create PHYs from the device tree
+ * @mdio: pointer to mii_bus structure
+ * @np: pointer to device_node of MDIO bus.
+ *
+ * This function registers the mii_bus structure and registers a phy_device
+ * for each child node of @np.
+ */
+int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
+{
+	struct phy_device *phy;
+	struct device_node *child;
+	int rc, i;
+
+	/* Mask out all PHYs from auto probing.  Instead the PHYs listed in
+	 * the device tree are populated after the bus has been registered */
+	mdio->phy_mask = ~0;
+
+	/* Clear all the IRQ properties */
+	if (mdio->irq)
+		for (i=0; i<PHY_MAX_ADDR; i++)
+			mdio->irq[i] = PHY_POLL;
+
+	/* Register the MDIO bus */
+	rc = mdiobus_register(mdio);
+	if (rc)
+		return rc;
+
+	/* Loop over the child nodes and register a phy_device for each one */
+	for_each_child_of_node(np, child) {
+		const u32 *addr;
+		int len;
+
+		/* A PHY must have a reg property in the range [0-31] */
+		addr = of_get_property(child, "reg", &len);
+		if (!addr || len < sizeof(*addr) || *addr >= 32 || *addr < 0) {
+			dev_err(&mdio->dev, "%s has invalid PHY address\n",
+				child->full_name);
+			continue;
+		}
+
+		if (mdio->irq) {
+			mdio->irq[*addr] = irq_of_parse_and_map(child, 0);
+			if (!mdio->irq[*addr])
+				mdio->irq[*addr] = PHY_POLL;
+		}
+
+		phy = get_phy_device(mdio, *addr);
+		if (!phy) {
+			dev_err(&mdio->dev, "error probing PHY at address %i\n",
+				*addr);
+			continue;
+		}
+		phy_scan_fixups(phy);
+
+		/* Associate the OF node with the device structure so it
+		 * can be looked up later */
+		of_node_get(child);
+		dev_archdata_set_node(&phy->dev.archdata, child);
+
+		/* All data is now stored in the phy struct; register it */
+		rc = phy_device_register(phy);
+		if (rc) {
+			phy_device_free(phy);
+			of_node_put(child);
+			continue;
+		}
+
+		dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
+			child->name, *addr);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(of_mdiobus_register);
+
+/**
+ * of_phy_find_device - Give a PHY node, find the phy_device
+ * @phy_np: Pointer to the phy's device tree node
+ *
+ * Returns a pointer to the phy_device.
+ */
+struct phy_device *of_phy_find_device(struct device_node *phy_np)
+{
+	struct device *d;
+	int match(struct device *dev, void *phy_np)
+	{
+		return dev_archdata_get_node(&dev->archdata) == phy_np;
+	}
+
+	if (!phy_np)
+		return NULL;
+
+	d = bus_find_device(&mdio_bus_type, NULL, phy_np, match);
+	return d ? to_phy_device(d) : NULL;
+}
+EXPORT_SYMBOL(of_phy_find_device);
+
+/**
+ * of_phy_connect - Connect to the phy described in the device tree
+ * @dev: pointer to net_device claiming the phy
+ * @phy_np: Pointer to device tree node for the PHY
+ * @hndlr: Link state callback for the network device
+ * @iface: PHY data interface type
+ *
+ * Returns a pointer to the phy_device if successfull.  NULL otherwise
+ */
+struct phy_device *of_phy_connect(struct net_device *dev,
+				  struct device_node *phy_np,
+				  void (*hndlr)(struct net_device *), u32 flags,
+				  phy_interface_t iface)
+{
+	struct phy_device *phy = of_phy_find_device(phy_np);
+
+	if (!phy)
+		return NULL;
+
+	return phy_connect_direct(dev, phy, hndlr, flags, iface) ? NULL : phy;
+}
+EXPORT_SYMBOL(of_phy_connect);
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
new file mode 100644
index 00000000000..c9663c69030
--- /dev/null
+++ b/include/linux/of_mdio.h
@@ -0,0 +1,22 @@
+/*
+ * OF helpers for the MDIO (Ethernet PHY) API
+ *
+ * Copyright (c) 2009 Secret Lab Technologies, Ltd.
+ *
+ * This file is released under the GPLv2
+ */
+
+#ifndef __LINUX_OF_MDIO_H
+#define __LINUX_OF_MDIO_H
+
+#include <linux/phy.h>
+#include <linux/of.h>
+
+extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
+extern struct phy_device *of_phy_find_device(struct device_node *phy_np);
+extern struct phy_device *of_phy_connect(struct net_device *dev,
+					 struct device_node *phy_np,
+					 void (*hndlr)(struct net_device *),
+					 u32 flags, phy_interface_t iface);
+
+#endif /* __LINUX_OF_MDIO_H */
-- 
cgit v1.2.3-70-g09d2


From aa73832c5a80d6c52c69b18af858d88fa595dd3c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sat, 25 Apr 2009 12:53:33 +0000
Subject: net: Rework fs_enet driver to use of_mdio infrastructure

This patch simplifies the driver by making use of more common code.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fs_enet/fs_enet-main.c | 69 ++++++--------------------------------
 drivers/net/fs_enet/mii-bitbang.c  | 29 ++--------------
 drivers/net/fs_enet/mii-fec.c      | 26 +-------------
 include/linux/fs_enet_pd.h         |  6 ++--
 4 files changed, 16 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index a9cbc3191a2..9604aaed61d 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -36,6 +36,8 @@
 #include <linux/fs.h>
 #include <linux/platform_device.h>
 #include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
 
@@ -752,9 +754,10 @@ static int fs_init_phy(struct net_device *dev)
 	fep->oldlink = 0;
 	fep->oldspeed = 0;
 	fep->oldduplex = -1;
-	if(fep->fpi->bus_id)
-		phydev = phy_connect(dev, fep->fpi->bus_id, &fs_adjust_link, 0,
-				PHY_INTERFACE_MODE_MII);
+	if(fep->fpi->phy_node)
+		phydev = of_phy_connect(dev, fep->fpi->phy_node,
+					&fs_adjust_link, 0,
+					PHY_INTERFACE_MODE_MII);
 	else {
 		printk("No phy bus ID specified in BSP code\n");
 		return -EINVAL;
@@ -962,57 +965,6 @@ static void cleanup_immap(void)
 
 /**************************************************************************************/
 
-static int __devinit find_phy(struct device_node *np,
-                              struct fs_platform_info *fpi)
-{
-	struct device_node *phynode, *mdionode;
-	int ret = 0, len, bus_id;
-	const u32 *data;
-
-	data  = of_get_property(np, "fixed-link", NULL);
-	if (data) {
-		snprintf(fpi->bus_id, 16, "%x:%02x", 0, *data);
-		return 0;
-	}
-
-	data = of_get_property(np, "phy-handle", &len);
-	if (!data || len != 4)
-		return -EINVAL;
-
-	phynode = of_find_node_by_phandle(*data);
-	if (!phynode)
-		return -EINVAL;
-
-	data = of_get_property(phynode, "reg", &len);
-	if (!data || len != 4) {
-		ret = -EINVAL;
-		goto out_put_phy;
-	}
-
-	mdionode = of_get_parent(phynode);
-	if (!mdionode) {
-		ret = -EINVAL;
-		goto out_put_phy;
-	}
-
-	bus_id = of_get_gpio(mdionode, 0);
-	if (bus_id < 0) {
-		struct resource res;
-		ret = of_address_to_resource(mdionode, 0, &res);
-		if (ret)
-			goto out_put_mdio;
-		bus_id = res.start;
-	}
-
-	snprintf(fpi->bus_id, 16, "%x:%02x", bus_id, *data);
-
-out_put_mdio:
-	of_node_put(mdionode);
-out_put_phy:
-	of_node_put(phynode);
-	return ret;
-}
-
 #ifdef CONFIG_FS_ENET_HAS_FEC
 #define IS_FEC(match) ((match)->data == &fs_fec_ops)
 #else
@@ -1062,9 +1014,9 @@ static int __devinit fs_enet_probe(struct of_device *ofdev,
 	fpi->rx_copybreak = 240;
 	fpi->use_napi = 1;
 	fpi->napi_weight = 17;
-
-	ret = find_phy(ofdev->node, fpi);
-	if (ret)
+	fpi->phy_node = of_parse_phandle(ofdev->node, "phy-handle", 0);
+	if ((!fpi->phy_node) && (!of_get_property(ofdev->node, "fixed-link",
+						  NULL)))
 		goto out_free_fpi;
 
 	privsize = sizeof(*fep) +
@@ -1136,6 +1088,7 @@ out_cleanup_data:
 out_free_dev:
 	free_netdev(ndev);
 	dev_set_drvdata(&ofdev->dev, NULL);
+	of_node_put(fpi->phy_node);
 out_free_fpi:
 	kfree(fpi);
 	return ret;
@@ -1151,7 +1104,7 @@ static int fs_enet_remove(struct of_device *ofdev)
 	fep->ops->free_bd(ndev);
 	fep->ops->cleanup_data(ndev);
 	dev_set_drvdata(fep->dev, NULL);
-
+	of_node_put(fep->fpi->phy_node);
 	free_netdev(ndev);
 	return 0;
 }
diff --git a/drivers/net/fs_enet/mii-bitbang.c b/drivers/net/fs_enet/mii-bitbang.c
index 49b6645d7e0..93b481b0e3c 100644
--- a/drivers/net/fs_enet/mii-bitbang.c
+++ b/drivers/net/fs_enet/mii-bitbang.c
@@ -22,6 +22,7 @@
 #include <linux/mii.h>
 #include <linux/platform_device.h>
 #include <linux/mdio-bitbang.h>
+#include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 
 #include "fs_enet.h"
@@ -149,31 +150,12 @@ static int __devinit fs_mii_bitbang_init(struct mii_bus *bus,
 	return 0;
 }
 
-static void __devinit add_phy(struct mii_bus *bus, struct device_node *np)
-{
-	const u32 *data;
-	int len, id, irq;
-
-	data = of_get_property(np, "reg", &len);
-	if (!data || len != 4)
-		return;
-
-	id = *data;
-	bus->phy_mask &= ~(1 << id);
-
-	irq = of_irq_to_resource(np, 0, NULL);
-	if (irq != NO_IRQ)
-		bus->irq[id] = irq;
-}
-
 static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
                                         const struct of_device_id *match)
 {
-	struct device_node *np = NULL;
 	struct mii_bus *new_bus;
 	struct bb_info *bitbang;
 	int ret = -ENOMEM;
-	int i;
 
 	bitbang = kzalloc(sizeof(struct bb_info), GFP_KERNEL);
 	if (!bitbang)
@@ -196,17 +178,10 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
 	if (!new_bus->irq)
 		goto out_unmap_regs;
 
-	for (i = 0; i < PHY_MAX_ADDR; i++)
-		new_bus->irq[i] = -1;
-
-	while ((np = of_get_next_child(ofdev->node, np)))
-		if (!strcmp(np->type, "ethernet-phy"))
-			add_phy(new_bus, np);
-
 	new_bus->parent = &ofdev->dev;
 	dev_set_drvdata(&ofdev->dev, new_bus);
 
-	ret = mdiobus_register(new_bus);
+	ret = of_mdiobus_register(new_bus, ofdev->node);
 	if (ret)
 		goto out_free_irqs;
 
diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
index 61aaae444b4..75a09994d66 100644
--- a/drivers/net/fs_enet/mii-fec.c
+++ b/drivers/net/fs_enet/mii-fec.c
@@ -100,23 +100,6 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
 	return 0;
 }
 
-static void __devinit add_phy(struct mii_bus *bus, struct device_node *np)
-{
-	const u32 *data;
-	int len, id, irq;
-
-	data = of_get_property(np, "reg", &len);
-	if (!data || len != 4)
-		return;
-
-	id = *data;
-	bus->phy_mask &= ~(1 << id);
-
-	irq = of_irq_to_resource(np, 0, NULL);
-	if (irq != NO_IRQ)
-		bus->irq[id] = irq;
-}
-
 static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
                                         const struct of_device_id *match)
 {
@@ -163,17 +146,10 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
 	if (!new_bus->irq)
 		goto out_unmap_regs;
 
-	for (i = 0; i < PHY_MAX_ADDR; i++)
-		new_bus->irq[i] = -1;
-
-	while ((np = of_get_next_child(ofdev->node, np)))
-		if (!strcmp(np->type, "ethernet-phy"))
-			add_phy(new_bus, np);
-
 	new_bus->parent = &ofdev->dev;
 	dev_set_drvdata(&ofdev->dev, new_bus);
 
-	ret = mdiobus_register(new_bus);
+	ret = of_mdiobus_register(new_bus, ofdev->node);
 	if (ret)
 		goto out_free_irqs;
 
diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h
index 8300cab30f9..51b793466ff 100644
--- a/include/linux/fs_enet_pd.h
+++ b/include/linux/fs_enet_pd.h
@@ -17,6 +17,7 @@
 #define FS_ENET_PD_H
 
 #include <linux/string.h>
+#include <linux/of_mdio.h>
 #include <asm/types.h>
 
 #define FS_ENET_NAME	"fs_enet"
@@ -130,10 +131,7 @@ struct fs_platform_info {
 	
 	u32 device_flags;
 
-	int phy_addr;		/* the phy address (-1 no phy) */
-	char bus_id[16];
-	int phy_irq;		/* the phy irq (if it exists)  */
-
+	struct device_node *phy_node;
 	const struct fs_mii_bus_info *bus_info;
 
 	int rx_ring, tx_ring;	/* number of buffers on rx     */
-- 
cgit v1.2.3-70-g09d2


From f85ba78068ac137fe9c1f50d25405d2783d75c77 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 27 Apr 2009 03:23:54 -0700
Subject: tun: add IFF_TUN_EXCL flag to avoid opening a persistent device.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When creating a certain types of VPN, NetworkManager will first attempt
to find an available tun device by iterating through 'vpn%d' until it
finds one that isn't already busy. Then it'll set that to be persistent
and owned by the otherwise unprivileged user that the VPN dæmon itself
runs as.

There's a race condition here -- during the period where the vpn%d
device is created and we're waiting for the VPN dæmon to actually
connect and use it, if we try to create _another_ device we could end up
re-using the same one -- because trying to open it again doesn't get
-EBUSY as it would while it's _actually_ busy.

So solve this, we add an IFF_TUN_EXCL flag which causes tun_set_iff() to
fail if it would be opening an existing persistent tundevice -- so that
we can make sure we're getting an entirely _new_ device.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 2 ++
 include/linux/if_tun.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 589f0ca668d..94622e5fb93 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -874,6 +874,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 	dev = __dev_get_by_name(net, ifr->ifr_name);
 	if (dev) {
+		if (ifr->ifr_flags & IFF_TUN_EXCL)
+			return -EBUSY;
 		if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
 			tun = netdev_priv(dev);
 		else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 049d6c9428d..915ba5789f0 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -55,6 +55,7 @@
 #define IFF_NO_PI	0x1000
 #define IFF_ONE_QUEUE	0x2000
 #define IFF_VNET_HDR	0x4000
+#define IFF_TUN_EXCL	0x8000
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
-- 
cgit v1.2.3-70-g09d2


From edbd9e30306067c3a45c035eb95a6f49daaa2337 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 27 Apr 2009 05:44:29 -0700
Subject: gro: Fix handling of headers that extend over the tail

The skb_gro_* code fails to handle the case where a header starts
in the linear area but ends in the frags area.  Since the goal
of skb_gro_* is to optimise the case of completely non-linear
packets, we can simply bail out if we have anything in the linear
area.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 11 +++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 31167451d08..c9ef4191607 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1137,7 +1137,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb)
 
 static inline void *skb_gro_mac_header(struct sk_buff *skb)
 {
-	return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) :
+	return skb_headlen(skb) ? skb_mac_header(skb) :
 	       page_address(skb_shinfo(skb)->frags[0].page) +
 	       skb_shinfo(skb)->frags[0].page_offset;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index e48c08af76a..6785b067ad5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2378,18 +2378,13 @@ void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
 	unsigned int offset = skb_gro_offset(skb);
 
 	hlen += offset;
-	if (hlen <= skb_headlen(skb))
-		return skb->data + offset;
-
-	if (unlikely(!skb_shinfo(skb)->nr_frags ||
-		     skb_shinfo(skb)->frags[0].size <=
-		     hlen - skb_headlen(skb) ||
+	if (unlikely(skb_headlen(skb) ||
+		     skb_shinfo(skb)->frags[0].size < hlen ||
 		     PageHighMem(skb_shinfo(skb)->frags[0].page)))
 		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
 
 	return page_address(skb_shinfo(skb)->frags[0].page) +
-	       skb_shinfo(skb)->frags[0].page_offset +
-	       offset - skb_headlen(skb);
+	       skb_shinfo(skb)->frags[0].page_offset + offset;
 }
 EXPORT_SYMBOL(skb_gro_header);
 
-- 
cgit v1.2.3-70-g09d2


From 36e7b1b8dac1a785abca3a121b6b0b79f1a8d7df Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 27 Apr 2009 05:44:45 -0700
Subject: gro: Fix COMPLETE checksum handling

On a brand new GRO skb, we cannot call ip_hdr since the header
may lie in the non-linear area.  This patch adds the helper
skb_gro_network_header to handle this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++++
 net/ipv4/tcp_ipv4.c       | 2 +-
 net/ipv6/tcp_ipv6.c       | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c9ef4191607..fe20d171acf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1142,6 +1142,13 @@ static inline void *skb_gro_mac_header(struct sk_buff *skb)
 	       skb_shinfo(skb)->frags[0].page_offset;
 }
 
+static inline void *skb_gro_network_header(struct sk_buff *skb)
+{
+	return skb_headlen(skb) ? skb_network_header(skb) :
+	       page_address(skb_shinfo(skb)->frags[0].page) +
+	       skb_shinfo(skb)->frags[0].page_offset + skb_network_offset(skb);
+}
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5d427f86b41..bda74e8aed7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2343,7 +2343,7 @@ void tcp4_proc_exit(void)
 
 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 {
-	struct iphdr *iph = ip_hdr(skb);
+	struct iphdr *iph = skb_gro_network_header(skb);
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4b5aa185426..d9dd94b6bf6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -943,7 +943,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 
 struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct ipv6hdr *iph = skb_gro_network_header(skb);
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
-- 
cgit v1.2.3-70-g09d2


From 879c5e6b7cb4c689d08ca9b2e353d8ab3dc425d5 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 17 Jun 2009 11:47:48 -0400
Subject: jbd2: convert instrumentation from markers to tracepoints

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c        |   5 +-
 fs/jbd2/commit.c            |  13 ++--
 fs/jbd2/journal.c           |  69 ++++++++++++++++++
 include/linux/jbd2.h        |   6 ++
 include/trace/events/jbd2.h | 168 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 252 insertions(+), 9 deletions(-)
 create mode 100644 include/trace/events/jbd2.h

(limited to 'include')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 17159cacbd9..5d70b3e6d49 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,9 +20,9 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/marker.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <trace/events/jbd2.h>
 
 /*
  * Unlink a buffer from a transaction checkpoint list.
@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
 	 * journal straight away.
 	 */
 	result = jbd2_cleanup_journal_tail(journal);
-	trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d",
-		   journal->j_devname, result);
+	trace_jbd2_checkpoint(journal, result);
 	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 	if (result <= 0)
 		return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0b7d3b8226f..7b4088b2364 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,7 +16,6 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/marker.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -26,6 +25,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
+#include <trace/events/jbd2.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 		 * block allocation  with delalloc. We need to write
 		 * only allocated blocks here.
 		 */
+		trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
 		err = journal_submit_inode_data_buffers(mapping);
 		if (!ret)
 			ret = err;
@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	commit_transaction = journal->j_running_transaction;
 	J_ASSERT(commit_transaction->t_state == T_RUNNING);
 
-	trace_mark(jbd2_start_commit, "dev %s transaction %d",
-		   journal->j_devname, commit_transaction->t_tid);
+	trace_jbd2_start_commit(journal, commit_transaction);
 	jbd_debug(1, "JBD: starting commit of transaction %d\n",
 			commit_transaction->t_tid);
 
@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	 */
 	if (commit_transaction->t_synchronous_commit)
 		write_op = WRITE_SYNC_PLUG;
+	trace_jbd2_commit_locking(journal, commit_transaction);
 	stats.u.run.rs_wait = commit_transaction->t_max_wait;
 	stats.u.run.rs_locked = jiffies;
 	stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	 */
 	jbd2_journal_switch_revoke_table(journal);
 
+	trace_jbd2_commit_flushing(journal, commit_transaction);
 	stats.u.run.rs_flushing = jiffies;
 	stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
 					       stats.u.run.rs_flushing);
@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	commit_transaction->t_state = T_COMMIT;
 	spin_unlock(&journal->j_state_lock);
 
+	trace_jbd2_commit_logging(journal, commit_transaction);
 	stats.u.run.rs_logging = jiffies;
 	stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
 						 stats.u.run.rs_logging);
@@ -1054,9 +1057,7 @@ restart_loop:
 	if (journal->j_commit_callback)
 		journal->j_commit_callback(journal, commit_transaction);
 
-	trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
-		   journal->j_devname, commit_transaction->t_tid,
-		   journal->j_tail_sequence);
+	trace_jbd2_end_commit(journal, commit_transaction);
 	jbd_debug(1, "JBD: commit %d complete, head %d\n",
 		  journal->j_commit_sequence, journal->j_tail_sequence);
 	if (to_free)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 62be7d294ec..18bfd5dab64 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -38,6 +38,10 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/math64.h>
+#include <linux/hash.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd2.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
 	jbd2_journal_destroy_caches();
 }
 
+/* 
+ * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 
+ * tracing infrastructure to map a dev_t to a device name.
+ *
+ * The caller should use rcu_read_lock() in order to make sure the
+ * device name stays valid until its done with it.  We use
+ * rcu_read_lock() as well to make sure we're safe in case the caller
+ * gets sloppy, and because rcu_read_lock() is cheap and can be safely
+ * nested.
+ */
+struct devname_cache {
+	struct rcu_head	rcu;
+	dev_t		device;
+	char		devname[BDEVNAME_SIZE];
+};
+#define CACHE_SIZE_BITS 6
+static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
+static DEFINE_SPINLOCK(devname_cache_lock);
+
+static void free_devcache(struct rcu_head *rcu)
+{
+	kfree(rcu);
+}
+
+const char *jbd2_dev_to_name(dev_t device)
+{
+	int	i = hash_32(device, CACHE_SIZE_BITS);
+	char	*ret;
+	struct block_device *bd;
+
+	rcu_read_lock();
+	if (devcache[i] && devcache[i]->device == device) {
+		ret = devcache[i]->devname;
+		rcu_read_unlock();
+		return ret;
+	}
+	rcu_read_unlock();
+
+	spin_lock(&devname_cache_lock);
+	if (devcache[i]) {
+		if (devcache[i]->device == device) {
+			ret = devcache[i]->devname;
+			spin_unlock(&devname_cache_lock);
+			return ret;
+		}
+		call_rcu(&devcache[i]->rcu, free_devcache);
+	}
+	devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+	if (!devcache[i]) {
+		spin_unlock(&devname_cache_lock);
+		return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+	}
+	devcache[i]->device = device;
+	bd = bdget(device);
+	if (bd) {
+		bdevname(bd, devcache[i]->devname);
+		bdput(bd);
+	} else
+		__bdevname(device, devcache[i]->devname);
+	ret = devcache[i]->devname;
+	spin_unlock(&devname_cache_lock);
+	return ret;
+}
+EXPORT_SYMBOL(jbd2_dev_to_name);
+
 MODULE_LICENSE("GPL");
 module_init(journal_init);
 module_exit(journal_exit);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index cc02393bfce..d97eb652d6c 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1315,6 +1315,12 @@ extern int jbd_blocks_per_page(struct inode *inode);
 #define BUFFER_TRACE2(bh, bh2, info)	do {} while (0)
 #define JBUFFER_TRACE(jh, info)	do {} while (0)
 
+/* 
+ * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 
+ * tracing infrastructure to map a dev_t to a device name.
+ */
+extern const char *jbd2_dev_to_name(dev_t device);
+
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_JBD2_H */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
new file mode 100644
index 00000000000..845b0b4b48f
--- /dev/null
+++ b/include/trace/events/jbd2.h
@@ -0,0 +1,168 @@
+#if !defined(_TRACE_JBD2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_JBD2_H
+
+#include <linux/jbd2.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM jbd2
+
+TRACE_EVENT(jbd2_checkpoint,
+
+	TP_PROTO(journal_t *journal, int result),
+
+	TP_ARGS(journal, result),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	int,	result			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->result		= result;
+	),
+
+	TP_printk("dev %s result %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->result)
+);
+
+TRACE_EVENT(jbd2_start_commit,
+
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+	TP_ARGS(journal, commit_transaction),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	char,	sync_commit		  )
+		__field(	int,	transaction		  )
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->sync_commit = commit_transaction->t_synchronous_commit;
+		__entry->transaction	= commit_transaction->t_tid;
+	),
+
+	TP_printk("dev %s transaction %d sync %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd2_commit_locking,
+
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+	TP_ARGS(journal, commit_transaction),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	char,	sync_commit		  )
+		__field(	int,	transaction		  )
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->sync_commit = commit_transaction->t_synchronous_commit;
+		__entry->transaction	= commit_transaction->t_tid;
+	),
+
+	TP_printk("dev %s transaction %d sync %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd2_commit_flushing,
+
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+	TP_ARGS(journal, commit_transaction),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	char,	sync_commit		  )
+		__field(	int,	transaction		  )
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->sync_commit = commit_transaction->t_synchronous_commit;
+		__entry->transaction	= commit_transaction->t_tid;
+	),
+
+	TP_printk("dev %s transaction %d sync %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd2_commit_logging,
+
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+	TP_ARGS(journal, commit_transaction),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	char,	sync_commit		  )
+		__field(	int,	transaction		  )
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->sync_commit = commit_transaction->t_synchronous_commit;
+		__entry->transaction	= commit_transaction->t_tid;
+	),
+
+	TP_printk("dev %s transaction %d sync %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd2_end_commit,
+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+	TP_ARGS(journal, commit_transaction),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	char,	sync_commit		  )
+		__field(	int,	transaction		  )
+		__field(	int,	head		  	  )
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->sync_commit = commit_transaction->t_synchronous_commit;
+		__entry->transaction	= commit_transaction->t_tid;
+		__entry->head		= journal->j_tail_sequence;
+	),
+
+	TP_printk("dev %s transaction %d sync %d head %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit, __entry->head)
+);
+
+TRACE_EVENT(jbd2_submit_inode_data,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+	),
+
+	TP_printk("dev %s ino %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino)
+);
+
+#endif /* _TRACE_JBD2_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3-70-g09d2


From 9bffad1ed2a003a355ed1b42424a0ae3575275ed Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 17 Jun 2009 11:48:11 -0400
Subject: ext4: convert instrumentation from markers to tracepoints

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/fsync.c             |   8 +-
 fs/ext4/ialloc.c            |  15 +-
 fs/ext4/inode.c             |  69 +----
 fs/ext4/mballoc.c           |  77 ++---
 fs/ext4/mballoc.h           |   1 -
 fs/ext4/super.c             |   6 +-
 include/trace/events/ext4.h | 719 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 774 insertions(+), 121 deletions(-)
 create mode 100644 include/trace/events/ext4.h

(limited to 'include')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5afe4370840..83cf6415f59 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,10 +28,12 @@
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
 #include <linux/blkdev.h>
-#include <linux/marker.h>
+
 #include "ext4.h"
 #include "ext4_jbd2.h"
 
+#include <trace/events/ext4.h>
+
 /*
  * akpm: A new design for ext4_sync_file().
  *
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 
 	J_ASSERT(ext4_journal_current_handle() == NULL);
 
-	trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
-		   inode->i_sb->s_id, datasync, inode->i_ino,
-		   dentry->d_parent->d_inode->i_ino);
+	trace_ext4_sync_file(file, dentry, datasync);
 
 	/*
 	 * data=writeback:
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3743bd849bc..7d502f3be91 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,11 +23,14 @@
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <asm/byteorder.h>
+
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 
+#include <trace/events/ext4.h>
+
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
  */
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
 	ino = inode->i_ino;
 	ext4_debug("freeing inode %lu\n", ino);
-	trace_mark(ext4_free_inode,
-		   "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
-		   sb->s_id, inode->i_ino, inode->i_mode,
-		   (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
-		   (unsigned long long) inode->i_blocks);
+	trace_ext4_free_inode(inode);
 
 	/*
 	 * Note: we must free any quota before locking the superblock,
@@ -815,8 +814,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 
 	sb = dir->i_sb;
 	ngroups = ext4_get_groups_count(sb);
-	trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
-		   dir->i_ino, mode);
+	trace_ext4_request_inode(dir, mode);
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
@@ -1047,8 +1045,7 @@ got:
 	}
 
 	ext4_debug("allocating inode %lu\n", inode->i_ino);
-	trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d",
-		   sb->s_id, inode->i_ino, dir->i_ino, mode);
+	trace_ext4_allocate_inode(inode, dir, mode);
 	goto really_out;
 fail:
 	ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 875db944b22..2418ad36eab 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,11 +37,14 @@
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
+
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "ext4_extents.h"
 
+#include <trace/events/ext4.h>
+
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
@@ -1466,10 +1469,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
  	pgoff_t index;
 	unsigned from, to;
 
-	trace_mark(ext4_write_begin,
-		   "dev %s ino %lu pos %llu len %u flags %u",
-		   inode->i_sb->s_id, inode->i_ino,
-		   (unsigned long long) pos, len, flags);
+	trace_ext4_write_begin(inode, pos, len, flags);
 	/*
 	 * Reserve one block more for addition to orphan list in case
 	 * we allocate blocks but write fails for some reason
@@ -1611,10 +1611,7 @@ static int ext4_ordered_write_end(struct file *file,
 	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
 
-	trace_mark(ext4_ordered_write_end,
-		   "dev %s ino %lu pos %llu len %u copied %u",
-		   inode->i_sb->s_id, inode->i_ino,
-		   (unsigned long long) pos, len, copied);
+	trace_ext4_ordered_write_end(inode, pos, len, copied);
 	ret = ext4_jbd2_file_inode(handle, inode);
 
 	if (ret == 0) {
@@ -1658,10 +1655,7 @@ static int ext4_writeback_write_end(struct file *file,
 	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
 
-	trace_mark(ext4_writeback_write_end,
-		   "dev %s ino %lu pos %llu len %u copied %u",
-		   inode->i_sb->s_id, inode->i_ino,
-		   (unsigned long long) pos, len, copied);
+	trace_ext4_writeback_write_end(inode, pos, len, copied);
 	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
 							page, fsdata);
 	copied = ret2;
@@ -1705,10 +1699,7 @@ static int ext4_journalled_write_end(struct file *file,
 	unsigned from, to;
 	loff_t new_i_size;
 
-	trace_mark(ext4_journalled_write_end,
-		   "dev %s ino %lu pos %llu len %u copied %u",
-		   inode->i_sb->s_id, inode->i_ino,
-		   (unsigned long long) pos, len, copied);
+	trace_ext4_journalled_write_end(inode, pos, len, copied);
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
@@ -2554,9 +2545,7 @@ static int ext4_da_writepage(struct page *page,
 	struct buffer_head *page_bufs;
 	struct inode *inode = page->mapping->host;
 
-	trace_mark(ext4_da_writepage,
-		   "dev %s ino %lu page_index %lu",
-		   inode->i_sb->s_id, inode->i_ino, page->index);
+	trace_ext4_da_writepage(inode, page);
 	size = i_size_read(inode);
 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
@@ -2667,19 +2656,7 @@ static int ext4_da_writepages(struct address_space *mapping,
 	int needed_blocks, ret = 0, nr_to_writebump = 0;
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 
-	trace_mark(ext4_da_writepages,
-		   "dev %s ino %lu nr_t_write %ld "
-		   "pages_skipped %ld range_start %llu "
-		   "range_end %llu nonblocking %d "
-		   "for_kupdate %d for_reclaim %d "
-		   "for_writepages %d range_cyclic %d",
-		   inode->i_sb->s_id, inode->i_ino,
-		   wbc->nr_to_write, wbc->pages_skipped,
-		   (unsigned long long) wbc->range_start,
-		   (unsigned long long) wbc->range_end,
-		   wbc->nonblocking, wbc->for_kupdate,
-		   wbc->for_reclaim, wbc->for_writepages,
-		   wbc->range_cyclic);
+	trace_ext4_da_writepages(inode, wbc);
 
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
@@ -2845,14 +2822,7 @@ out_writepages:
 	if (!no_nrwrite_index_update)
 		wbc->no_nrwrite_index_update = 0;
 	wbc->nr_to_write -= nr_to_writebump;
-	trace_mark(ext4_da_writepage_result,
-		   "dev %s ino %lu ret %d pages_written %d "
-		   "pages_skipped %ld congestion %d "
-		   "more_io %d no_nrwrite_index_update %d",
-		   inode->i_sb->s_id, inode->i_ino, ret,
-		   pages_written, wbc->pages_skipped,
-		   wbc->encountered_congestion, wbc->more_io,
-		   wbc->no_nrwrite_index_update);
+	trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
 	return ret;
 }
 
@@ -2904,11 +2874,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 					len, flags, pagep, fsdata);
 	}
 	*fsdata = (void *)0;
-
-	trace_mark(ext4_da_write_begin,
-		   "dev %s ino %lu pos %llu len %u flags %u",
-		   inode->i_sb->s_id, inode->i_ino,
-		   (unsigned long long) pos, len, flags);
+	trace_ext4_da_write_begin(inode, pos, len, flags);
 retry:
 	/*
 	 * With delayed allocation, we don't log the i_disksize update
@@ -3001,10 +2967,7 @@ static int ext4_da_write_end(struct file *file,
 		}
 	}
 
-	trace_mark(ext4_da_write_end,
-		   "dev %s ino %lu pos %llu len %u copied %u",
-		   inode->i_sb->s_id, inode->i_ino,
-		   (unsigned long long) pos, len, copied);
+	trace_ext4_da_write_end(inode, pos, len, copied);
 	start = pos & (PAGE_CACHE_SIZE - 1);
 	end = start + copied - 1;
 
@@ -3255,9 +3218,7 @@ static int ext4_normal_writepage(struct page *page,
 	loff_t size = i_size_read(inode);
 	loff_t len;
 
-	trace_mark(ext4_normal_writepage,
-		   "dev %s ino %lu page_index %lu",
-		   inode->i_sb->s_id, inode->i_ino, page->index);
+	trace_ext4_normal_writepage(inode, page);
 	J_ASSERT(PageLocked(page));
 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
@@ -3343,9 +3304,7 @@ static int ext4_journalled_writepage(struct page *page,
 	loff_t size = i_size_read(inode);
 	loff_t len;
 
-	trace_mark(ext4_journalled_writepage,
-		   "dev %s ino %lu page_index %lu",
-		   inode->i_sb->s_id, inode->i_ino, page->index);
+	trace_ext4_journalled_writepage(inode, page);
 	J_ASSERT(PageLocked(page));
 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e22c0..8d98070b48f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,8 @@
  */
 
 #include "mballoc.h"
+#include <trace/events/ext4.h>
+
 /*
  * MUSTDO:
  *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 						ext4_group_t group);
 static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
 
-
-
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
 #if BITS_PER_LONG == 64
@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 		discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
 			+ entry->start_blk
 			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-		trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u",
-			   sb->s_id, (unsigned long long) discard_block,
-			   entry->count);
+		trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
+					  entry->count);
 		sb_issue_discard(sb, discard_block, entry->count);
 
 		kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 
 	mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
 			pa->pa_pstart, pa->pa_len, pa->pa_lstart);
-	trace_mark(ext4_mb_new_inode_pa,
-		   "dev %s ino %lu pstart %llu len %u lstart %u",
-		   sb->s_id, ac->ac_inode->i_ino,
-		   pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+	trace_ext4_mb_new_inode_pa(ac, pa);
 
 	ext4_mb_use_inode_pa(ac, pa);
 	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 	pa->pa_type = MB_GROUP_PA;
 
 	mb_debug("new group pa %p: %llu/%u for %u\n", pa,
-		 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
-	trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u",
-		   sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+			pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+	trace_ext4_mb_new_group_pa(ac, pa);
 
 	ext4_mb_use_group_pa(ac, pa);
 	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			ext4_mb_store_history(ac);
 		}
 
-		trace_mark(ext4_mb_release_inode_pa,
-			   "dev %s ino %lu block %llu count %u",
-			   sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
-			   next - bit);
+		trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
+					       next - bit);
 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
 		bit = next + 1;
 	}
@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 	if (ac)
 		ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 
-	trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d",
-		   sb->s_id, pa->pa_pstart, pa->pa_len);
+	trace_ext4_mb_release_group_pa(ac, pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 
 	INIT_LIST_HEAD(&list);
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+	if (ac)
+		ac->ac_sb = sb;
 repeat:
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
 	}
 
 	mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
-	trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id,
-		   inode->i_ino);
+	trace_ext4_discard_preallocations(inode);
 
 	INIT_LIST_HEAD(&list);
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+	if (ac) {
+		ac->ac_sb = sb;
+		ac->ac_inode = inode;
+	}
 repeat:
 	/* first, collect all pa's in the inode */
 	spin_lock(&ei->i_prealloc_lock);
@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 
 	INIT_LIST_HEAD(&discard_list);
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+	if (ac)
+		ac->ac_sb = sb;
 
 	spin_lock(&lg->lg_prealloc_lock);
 	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 	int ret;
 	int freed = 0;
 
-	trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
-		   sb->s_id, needed);
+	trace_ext4_mb_discard_preallocations(sb, needed);
 	for (i = 0; i < ngroups && needed > 0; i++) {
 		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
 		freed += ret;
@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 	sb = ar->inode->i_sb;
 	sbi = EXT4_SB(sb);
 
-	trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu "
-		   "lblk %llu goal %llu lleft %llu lright %llu "
-		   "pleft %llu pright %llu ",
-		   sb->s_id, ar->flags, ar->len,
-		   ar->inode ? ar->inode->i_ino : 0,
-		   (unsigned long long) ar->logical,
-		   (unsigned long long) ar->goal,
-		   (unsigned long long) ar->lleft,
-		   (unsigned long long) ar->lright,
-		   (unsigned long long) ar->pleft,
-		   (unsigned long long) ar->pright);
+	trace_ext4_request_blocks(ar);
 
 	/*
 	 * For delayed allocation, we could skip the ENOSPC and
@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 	}
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (!ac) {
+	if (ac) {
+		ac->ac_sb = sb;
+		ac->ac_inode = ar->inode;
+	} else {
 		ar->len = 0;
 		*errp = -ENOMEM;
 		goto out1;
@@ -4594,18 +4585,7 @@ out3:
 						reserv_blks);
 	}
 
-	trace_mark(ext4_allocate_blocks,
-		   "dev %s block %llu flags %u len %u ino %lu "
-		   "logical %llu goal %llu lleft %llu lright %llu "
-		   "pleft %llu pright %llu ",
-		   sb->s_id, (unsigned long long) block,
-		   ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
-		   (unsigned long long) ar->logical,
-		   (unsigned long long) ar->goal,
-		   (unsigned long long) ar->lleft,
-		   (unsigned long long) ar->lright,
-		   (unsigned long long) ar->pleft,
-		   (unsigned long long) ar->pright);
+	trace_ext4_allocate_blocks(ar, (unsigned long long)block);
 
 	return block;
 }
@@ -4740,10 +4720,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	ext4_debug("freeing block %lu\n", block);
-	trace_mark(ext4_free_blocks,
-		   "dev %s block %llu count %lu metadata %d ino %lu",
-		   sb->s_id, (unsigned long long) block, count, metadata,
-		   inode ? inode->i_ino : 0);
+	trace_ext4_free_blocks(inode, block, count, metadata);
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 	if (ac) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 75e34f69215..c96bb19f58f 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -19,7 +19,6 @@
 #include <linux/seq_file.h>
 #include <linux/version.h>
 #include <linux/blkdev.h>
-#include <linux/marker.h>
 #include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397..e8f0b2af460 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,7 +37,6 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/ctype.h>
-#include <linux/marker.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
 #include <asm/uaccess.h>
@@ -47,6 +46,9 @@
 #include "xattr.h"
 #include "acl.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext4.h>
+
 static int default_mb_history_length = 1000;
 
 module_param_named(default_mb_history_length, default_mb_history_length,
@@ -3346,7 +3348,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	int ret = 0;
 	tid_t target;
 
-	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
+	trace_ext4_sync_fs(sb, wait);
 	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
 		if (wait)
 			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
new file mode 100644
index 00000000000..acf4cc9cd36
--- /dev/null
+++ b/include/trace/events/ext4.h
@@ -0,0 +1,719 @@
+#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EXT4_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ext4
+
+#include <linux/writeback.h>
+#include "../../../fs/ext4/ext4.h"
+#include "../../../fs/ext4/mballoc.h"
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(ext4_free_inode,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	uid_t,	uid			)
+		__field(	gid_t,	gid			)
+		__field(	blkcnt_t, blocks		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->uid	= inode->i_uid;
+		__entry->gid	= inode->i_gid;
+		__entry->blocks	= inode->i_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
+		  __entry->uid, __entry->gid, __entry->blocks)
+);
+
+TRACE_EVENT(ext4_request_inode,
+	TP_PROTO(struct inode *dir, int mode),
+
+	TP_ARGS(dir, mode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	dir			)
+		__field(	umode_t, mode			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= dir->i_sb->s_dev;
+		__entry->dir	= dir->i_ino;
+		__entry->mode	= mode;
+	),
+
+	TP_printk("dev %s dir %lu mode %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext4_allocate_inode,
+	TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+	TP_ARGS(inode, dir, mode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	ino_t,	dir			)
+		__field(	umode_t, mode			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->dir	= dir->i_ino;
+		__entry->mode	= mode;
+	),
+
+	TP_printk("dev %s ino %lu dir %lu mode %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext4_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned int, len		)
+		__field(	unsigned int, flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
+		  __entry->flags)
+);
+
+TRACE_EVENT(ext4_ordered_write_end,
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+			unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned int, len		)
+		__field(	unsigned int, copied		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->copied	= copied;
+	),
+
+	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
+		  __entry->copied)
+);
+
+TRACE_EVENT(ext4_writeback_write_end,
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned int, len		)
+		__field(	unsigned int, copied		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->copied	= copied;
+	),
+
+	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
+		  __entry->copied)
+);
+
+TRACE_EVENT(ext4_journalled_write_end,
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+		 unsigned int copied),
+	TP_ARGS(inode, pos, len, copied),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned int, len		)
+		__field(	unsigned int, copied		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->copied	= copied;
+	),
+
+	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
+		  __entry->copied)
+);
+
+TRACE_EVENT(ext4_da_writepage,
+	TP_PROTO(struct inode *inode, struct page *page),
+
+	TP_ARGS(inode, page),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	pgoff_t, index			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->index	= page->index;
+	),
+
+	TP_printk("dev %s ino %lu page_index %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
+);
+
+TRACE_EVENT(ext4_da_writepages,
+	TP_PROTO(struct inode *inode, struct writeback_control *wbc),
+
+	TP_ARGS(inode, wbc),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	long,	nr_to_write		)
+		__field(	long,	pages_skipped		)
+		__field(	loff_t,	range_start		)
+		__field(	loff_t,	range_end		)
+		__field(	char,	nonblocking		)
+		__field(	char,	for_kupdate		)
+		__field(	char,	for_reclaim		)
+		__field(	char,	for_writepages		)
+		__field(	char,	range_cyclic		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->nr_to_write	= wbc->nr_to_write;
+		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->range_start	= wbc->range_start;
+		__entry->range_end	= wbc->range_end;
+		__entry->nonblocking	= wbc->nonblocking;
+		__entry->for_kupdate	= wbc->for_kupdate;
+		__entry->for_reclaim	= wbc->for_reclaim;
+		__entry->for_writepages	= wbc->for_writepages;
+		__entry->range_cyclic	= wbc->range_cyclic;
+	),
+
+	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
+		  __entry->pages_skipped, __entry->range_start,
+		  __entry->range_end, __entry->nonblocking,
+		  __entry->for_kupdate, __entry->for_reclaim,
+		  __entry->for_writepages, __entry->range_cyclic)
+);
+
+TRACE_EVENT(ext4_da_writepages_result,
+	TP_PROTO(struct inode *inode, struct writeback_control *wbc,
+			int ret, int pages_written),
+
+	TP_ARGS(inode, wbc, ret, pages_written),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	int,	ret			)
+		__field(	int,	pages_written		)
+		__field(	long,	pages_skipped		)
+		__field(	char,	encountered_congestion	)
+		__field(	char,	more_io			)	
+		__field(	char,	no_nrwrite_index_update )
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->ret		= ret;
+		__entry->pages_written	= pages_written;
+		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->encountered_congestion	= wbc->encountered_congestion;
+		__entry->more_io	= wbc->more_io;
+		__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+	),
+
+	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret,
+		  __entry->pages_written, __entry->pages_skipped,
+		  __entry->encountered_congestion, __entry->more_io,
+		  __entry->no_nrwrite_index_update)
+);
+
+TRACE_EVENT(ext4_da_write_begin,
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+			unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned int, len		)
+		__field(	unsigned int, flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
+		  __entry->flags)
+);
+
+TRACE_EVENT(ext4_da_write_end,
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+			unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned int, len		)
+		__field(	unsigned int, copied		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->copied	= copied;
+	),
+
+	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
+		  __entry->copied)
+);
+
+TRACE_EVENT(ext4_normal_writepage,
+	TP_PROTO(struct inode *inode, struct page *page),
+
+	TP_ARGS(inode, page),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	pgoff_t, index			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->index	= page->index;
+	),
+
+	TP_printk("dev %s ino %lu page_index %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
+);
+
+TRACE_EVENT(ext4_journalled_writepage,
+	TP_PROTO(struct inode *inode, struct page *page),
+
+	TP_ARGS(inode, page),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	pgoff_t, index			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->index	= page->index;
+	),
+
+	TP_printk("dev %s ino %lu page_index %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
+);
+
+TRACE_EVENT(ext4_discard_blocks,
+	TP_PROTO(struct super_block *sb, unsigned long long blk,
+			unsigned long long count),
+
+	TP_ARGS(sb, blk, count),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	__u64,	blk			)
+		__field(	__u64,	count			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->blk	= blk;
+		__entry->count	= count;
+	),
+
+	TP_printk("dev %s blk %llu count %llu",
+		  jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
+);
+
+TRACE_EVENT(ext4_mb_new_inode_pa,
+	TP_PROTO(struct ext4_allocation_context *ac,
+		 struct ext4_prealloc_space *pa),
+
+	TP_ARGS(ac, pa),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	pa_pstart		)
+		__field(	__u32,	pa_len			)
+		__field(	__u64,	pa_lstart		)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->pa_pstart	= pa->pa_pstart;
+		__entry->pa_len		= pa->pa_len;
+		__entry->pa_lstart	= pa->pa_lstart;
+	),
+
+	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
+		  __entry->pa_len, __entry->pa_lstart)
+);
+
+TRACE_EVENT(ext4_mb_new_group_pa,
+	TP_PROTO(struct ext4_allocation_context *ac,
+		 struct ext4_prealloc_space *pa),
+
+	TP_ARGS(ac, pa),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	pa_pstart		)
+		__field(	__u32,	pa_len			)
+		__field(	__u64,	pa_lstart		)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->pa_pstart	= pa->pa_pstart;
+		__entry->pa_len		= pa->pa_len;
+		__entry->pa_lstart	= pa->pa_lstart;
+	),
+
+	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
+		  __entry->pa_len, __entry->pa_lstart)
+);
+
+TRACE_EVENT(ext4_mb_release_inode_pa,
+	TP_PROTO(struct ext4_allocation_context *ac,
+		 struct ext4_prealloc_space *pa,
+		 unsigned long long block, unsigned int count),
+
+	TP_ARGS(ac, pa, block, count),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	block			)
+		__field(	__u32,	count			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->block		= block;
+		__entry->count		= count;
+	),
+
+	TP_printk("dev %s ino %lu block %llu count %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
+		  __entry->count)
+);
+
+TRACE_EVENT(ext4_mb_release_group_pa,
+	TP_PROTO(struct ext4_allocation_context *ac,
+		 struct ext4_prealloc_space *pa),
+
+	TP_ARGS(ac, pa),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	pa_pstart		)
+		__field(	__u32,	pa_len			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->pa_pstart	= pa->pa_pstart;
+		__entry->pa_len		= pa->pa_len;
+	),
+
+	TP_printk("dev %s pstart %llu len %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
+);
+
+TRACE_EVENT(ext4_discard_preallocations,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+	),
+
+	TP_printk("dev %s ino %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino)
+);
+
+TRACE_EVENT(ext4_mb_discard_preallocations,
+	TP_PROTO(struct super_block *sb, int needed),
+
+	TP_ARGS(sb, needed),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	int,	needed			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->needed	= needed;
+	),
+
+	TP_printk("dev %s needed %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->needed)
+);
+
+TRACE_EVENT(ext4_request_blocks,
+	TP_PROTO(struct ext4_allocation_request *ar),
+
+	TP_ARGS(ar),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	unsigned int, flags		)
+		__field(	unsigned int, len		)
+		__field(	__u64,  logical			)
+		__field(	__u64,	goal			)
+		__field(	__u64,	lleft			)
+		__field(	__u64,	lright			)
+		__field(	__u64,	pleft			)
+		__field(	__u64,	pright			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= ar->inode->i_sb->s_dev;
+		__entry->ino	= ar->inode->i_ino;
+		__entry->flags	= ar->flags;
+		__entry->len	= ar->len;
+		__entry->logical = ar->logical;
+		__entry->goal	= ar->goal;
+		__entry->lleft	= ar->lleft;
+		__entry->lright	= ar->lright;
+		__entry->pleft	= ar->pleft;
+		__entry->pright	= ar->pright;
+	),
+
+	TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
+		  __entry->len,
+		  (unsigned long long) __entry->logical,
+		  (unsigned long long) __entry->goal,
+		  (unsigned long long) __entry->lleft,
+		  (unsigned long long) __entry->lright,
+		  (unsigned long long) __entry->pleft,
+		  (unsigned long long) __entry->pright)
+);
+
+TRACE_EVENT(ext4_allocate_blocks,
+	TP_PROTO(struct ext4_allocation_request *ar, unsigned long long block),
+
+	TP_ARGS(ar, block),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	block			)
+		__field(	unsigned int, flags		)
+		__field(	unsigned int, len		)
+		__field(	__u64,  logical			)
+		__field(	__u64,	goal			)
+		__field(	__u64,	lleft			)
+		__field(	__u64,	lright			)
+		__field(	__u64,	pleft			)
+		__field(	__u64,	pright			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= ar->inode->i_sb->s_dev;
+		__entry->ino	= ar->inode->i_ino;
+		__entry->block	= block;
+		__entry->flags	= ar->flags;
+		__entry->len	= ar->len;
+		__entry->logical = ar->logical;
+		__entry->goal	= ar->goal;
+		__entry->lleft	= ar->lleft;
+		__entry->lright	= ar->lright;
+		__entry->pleft	= ar->pleft;
+		__entry->pright	= ar->pright;
+	),
+
+	TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
+		  __entry->len, __entry->block,
+		  (unsigned long long) __entry->logical,
+		  (unsigned long long) __entry->goal,
+		  (unsigned long long) __entry->lleft,
+		  (unsigned long long) __entry->lright,
+		  (unsigned long long) __entry->pleft,
+		  (unsigned long long) __entry->pright)
+);
+
+TRACE_EVENT(ext4_free_blocks,
+	TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
+			int metadata),
+
+	TP_ARGS(inode, block, count, metadata),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	block			)
+		__field(	unsigned long,	count		)
+		__field(	int,	metadata		)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->block		= block;
+		__entry->count		= count;
+		__entry->metadata	= metadata;
+	),
+
+	TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
+		  __entry->count, __entry->metadata)
+);
+
+TRACE_EVENT(ext4_sync_file,
+	TP_PROTO(struct file *file, struct dentry *dentry, int datasync),
+
+	TP_ARGS(file, dentry, datasync),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	ino_t,	parent			)
+		__field(	int,	datasync		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= dentry->d_inode->i_sb->s_dev;
+		__entry->ino		= dentry->d_inode->i_ino;
+		__entry->datasync	= datasync;
+		__entry->parent		= dentry->d_parent->d_inode->i_ino;
+	),
+
+	TP_printk("dev %s ino %ld parent %ld datasync %d ",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent,
+		  __entry->datasync)
+);
+
+TRACE_EVENT(ext4_sync_fs,
+	TP_PROTO(struct super_block *sb, int wait),
+
+	TP_ARGS(sb, wait),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	int,	wait			)
+
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->wait	= wait;
+	),
+
+	TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
+		  __entry->wait)
+);
+
+#endif /* _TRACE_EXT4_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3-70-g09d2


From e686307fdc84f249490e6c9da92fcb2424491f14 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 17 Apr 2009 08:41:21 +0200
Subject: loop: use BIO list management functions

Now that the bio list management stuff is generic, convert loop to use
bio lists instead of its own private bio list implementation.

Cc:  Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/loop.c | 26 +++++++-------------------
 include/linux/bio.h  |  2 +-
 include/linux/loop.h |  3 +--
 3 files changed, 9 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ddae8082589..9ca4bb01465 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -511,11 +511,7 @@ out:
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
-	if (lo->lo_biotail) {
-		lo->lo_biotail->bi_next = bio;
-		lo->lo_biotail = bio;
-	} else
-		lo->lo_bio = lo->lo_biotail = bio;
+	bio_list_add(&lo->lo_bio_list, bio);
 }
 
 /*
@@ -523,16 +519,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
-	struct bio *bio;
-
-	if ((bio = lo->lo_bio)) {
-		if (bio == lo->lo_biotail)
-			lo->lo_biotail = NULL;
-		lo->lo_bio = bio->bi_next;
-		bio->bi_next = NULL;
-	}
-
-	return bio;
+	return bio_list_pop(&lo->lo_bio_list);
 }
 
 static int loop_make_request(struct request_queue *q, struct bio *old_bio)
@@ -609,12 +596,13 @@ static int loop_thread(void *data)
 
 	set_user_nice(current, -20);
 
-	while (!kthread_should_stop() || lo->lo_bio) {
+	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
 
 		wait_event_interruptible(lo->lo_event,
-				lo->lo_bio || kthread_should_stop());
+				!bio_list_empty(&lo->lo_bio_list) ||
+				kthread_should_stop());
 
-		if (!lo->lo_bio)
+		if (bio_list_empty(&lo->lo_bio_list))
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
@@ -841,7 +829,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
-	lo->lo_bio = lo->lo_biotail = NULL;
+	bio_list_init(&lo->lo_bio_list);
 
 	/*
 	 * set queue make_request_fn, and add limits based on lower level
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7b214fd672a..f37ca8c726b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -506,7 +506,7 @@ static inline int bio_has_data(struct bio *bio)
 }
 
 /*
- * BIO list managment for use by remapping drivers (e.g. DM or MD).
+ * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
  *
  * A bio_list anchors a singly-linked list of bios chained through the bi_next
  * member of the bio.  The bio_list also caches the last list member to allow
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 40725447f5e..66c194e2d9b 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -56,8 +56,7 @@ struct loop_device {
 	gfp_t		old_gfp_mask;
 
 	spinlock_t		lo_lock;
-	struct bio 		*lo_bio;
-	struct bio		*lo_biotail;
+	struct bio_list		lo_bio_list;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
-- 
cgit v1.2.3-70-g09d2


From 214ae19104141404f18ad6651752eb38e3dd584e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:41 +0900
Subject: ide kill unused ide_cmd->special

Impact: removal of unused field

No one uses ide_cmd->special anymore.  Kill it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ide.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index ff65fffb078..846a1e13240 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -324,7 +324,6 @@ struct ide_cmd {
 	unsigned int		cursg_ofs;
 
 	struct request		*rq;		/* copy of request */
-	void			*special;	/* valid_t generally */
 };
 
 /* ATAPI packet command flags */
-- 
cgit v1.2.3-70-g09d2


From e69d800f7e8797a8e3423380ee9d8ca1cb90c388 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide: add helpers for preparing sense requests

This is in preparation of removing the queueing of a sense request out
of the IRQ handler path.

Use struct request_sense as a general sense buffer for all ATAPI
devices ide-{floppy,tape,cd}.

tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as
      it can cause deadlock.  Converted to use inline struct request
      and blk_rq_init().

    * Added xfer / cdb len selection depending on device type.

    * All sense prep logics folded into ide_prep_sense() which never
      fails.

    * hwif->rq clearing and sense_rq used handling moved into
      ide_queue_sense_rq().

    * blk_rq_map_kern() conversion is moved to later patch.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ide.h     | 11 +++++++++
 2 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2894577237b..c6e03485a63 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+	struct request_sense *sense = &drive->sense_data;
+	struct request *sense_rq = &drive->sense_rq;
+	unsigned int cmd_len, sense_len;
+
+	debug_log("%s: enter\n", __func__);
+
+	switch (drive->media) {
+	case ide_floppy:
+		cmd_len = 255;
+		sense_len = 18;
+		break;
+	case ide_tape:
+		cmd_len = 20;
+		sense_len = 20;
+		break;
+	default:
+		cmd_len = 18;
+		sense_len = 18;
+	}
+
+	BUG_ON(sense_len > sizeof(*sense));
+
+	if (blk_sense_request(rq) || drive->sense_rq_armed)
+		return;
+
+	memset(sense, 0, sizeof(*sense));
+
+	blk_rq_init(rq->q, sense_rq);
+	sense_rq->rq_disk = rq->rq_disk;
+
+	sense_rq->data = sense;
+	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+	sense_rq->cmd[4] = cmd_len;
+	sense_rq->data_len = sense_len;
+
+	sense_rq->cmd_type = REQ_TYPE_SENSE;
+	sense_rq->cmd_flags |= REQ_PREEMPT;
+
+	if (drive->media == ide_tape)
+		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+	drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+	BUG_ON(!drive->sense_rq_armed);
+
+	drive->sense_rq.special = special;
+	drive->sense_rq_armed = false;
+
+	drive->hwif->rq = NULL;
+
+	elv_add_request(drive->queue, &drive->sense_rq,
+			ELEVATOR_INSERT_FRONT, 0);
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
  * We queue a request sense packet command in the head of the request list.
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 846a1e13240..a69ccac5641 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -602,6 +605,11 @@ struct ide_drive_s {
 
 	struct ide_atapi_pc request_sense_pc;
 	struct request request_sense_rq;
+
+	/* current sense rq and buffer */
+	bool sense_rq_armed;
+	struct request sense_rq;
+	struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+
 int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
-- 
cgit v1.2.3-70-g09d2


From 068753203e6cd085664a62e0fc0636e19b148a12 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense
 buffer

Since we're issuing REQ_TYPE_SENSE now we need to allow those types of
rqs in the ->do_request callbacks. As a future improvement, sense_len
assignment might be unified across all ATAPI devices. Borislav to
check with specs and test.

As a result, get rid of ide_queue_pc_head() and
drive->request_sense_rq.

tj: * Init request sense ide_atapi_pc from sense request.  In the
      longer timer, it would probably better to fold
      ide_create_request_sense_cmd() into its only current user -
      ide_floppy_get_format_progress().

    * ide_retry_pc() no longer takes @disk.

CC: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c  | 48 ++++++++++++++----------------------------------
 drivers/ide/ide-floppy.c |  4 +++-
 drivers/ide/ide-tape.c   |  7 +++++--
 include/linux/ide.h      |  3 +--
 4 files changed, 23 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index c6e03485a63..972c522516f 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -79,34 +79,6 @@ void ide_init_pc(struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
-/*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-			      struct ide_atapi_pc *pc, struct request *rq)
-{
-	blk_rq_init(NULL, rq);
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = (char *)pc;
-	rq->rq_disk = disk;
-
-	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
-	}
-
-	memcpy(rq->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->hwif->rq = NULL;
-
-	elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 /*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
@@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-	struct request *rq = &drive->request_sense_rq;
+	struct request *sense_rq = &drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
-	ide_create_request_sense_cmd(drive, pc);
+
+	/* init pc from sense_rq */
+	ide_init_pc(pc);
+	memcpy(pc->c, sense_rq->cmd, 12);
+	pc->buf = sense_rq->data;
+	pc->req_xfer = sense_rq->data_len;
+
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-	ide_queue_pc_head(drive, disk, pc, rq);
+
+	ide_queue_sense_rq(drive, pc);
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
 			/* Retry operation */
-			ide_retry_pc(drive, rq->rq_disk);
+			ide_retry_pc(drive);
 
 			/* queued, but not started */
 			return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 94600331a27..d3302cc891e 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq)) {
+	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
 		pc = (struct ide_atapi_pc *)rq->special;
 	} else if (blk_pc_request(rq)) {
 		pc = &floppy->queued_pc;
@@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 		goto out_end;
 	}
 
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index aadf53cfac6..8324dfa78a3 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
 				printk(KERN_ERR "ide-tape: %s: I/O error, ",
 						tape->name);
 			/* Retry operation */
-			ide_retry_pc(drive, tape->disk);
+			ide_retry_pc(drive);
 			return ide_stopped;
 		}
 		pc->error = 0;
@@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			(unsigned long long)rq->sector, rq->nr_sectors,
 			rq->current_nr_sectors);
 
-	if (!blk_special_request(rq)) {
+	if (!(blk_special_request(rq) || blk_sense_request(rq))) {
 		/* We do not support buffer cache originated requests. */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
 			"request queue (%d)\n", drive->name, rq->cmd_type);
@@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	BUG();
 
 out:
+	/* prepare sense request for this command */
+	ide_prep_sense(drive, rq);
+
 	memset(&cmd, 0, sizeof(cmd));
 
 	if (rq_data_dir(rq))
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a69ccac5641..9e67ccac3c1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -604,7 +604,6 @@ struct ide_drive_s {
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-	struct request request_sense_rq;
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
@@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
 void ide_queue_sense_rq(ide_drive_t *drive, void *special);
-- 
cgit v1.2.3-70-g09d2


From 02e7cf8f848841ca21864ccd019e480b73c323b7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 07:00:42 +0900
Subject: ide-cd,atapi: use bio for internal commands

Impact: unify request data buffer handling

rq->data is used mostly to pass kernel buffer through request queue
without using bio.  There are only a couple of places which still do
this in kernel and converting to bio isn't difficult.

This patch converts ide-cd and atapi to use bio instead of rq->data
for request sense and internal pc commands.  With previous change to
unify sense request handling, this is relatively easily achieved by
adding blk_rq_map_kern() during sense_rq prep and PC issue.

If blk_rq_map_kern() fails for sense, the error is deferred till sense
issue and aborts the failed command which triggered the sense.  Note
that this is a slim possibility as sense prep is done on each command
issue, so for the above condition to actually trigger, all preps since
the last sense issue till the issue of the request which would require
a sense should fail.

* do_request functions might sleep now.  This should be okay as ide
  request_fn - do_ide_request() - is invoked only from make_request
  and plug work.  Make sure this is the case by adding might_sleep()
  to do_ide_request().

* Functions which access the read sense data before the sense request
  is complete now should access bio_data(sense_rq->bio) as the sense
  buffer might have been copied during blk_rq_map_kern().

* ide-tape updated to map sg.

* cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC
  special case.  Simplified.

* tp_ops->output/input_data path dropped from ide_pc_intr().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c | 52 +++++++++++++++++++++++++++++--------------------
 drivers/ide/ide-cd.c    | 28 +++++++++++++-------------
 drivers/ide/ide-io.c    |  3 +++
 drivers/ide/ide-tape.c  |  3 +++
 include/linux/ide.h     |  2 +-
 5 files changed, 52 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 972c522516f..5cefe12f562 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	rq->special = (char *)pc;
 
 	if (pc->req_xfer) {
-		rq->data = pc->buf;
-		rq->data_len = pc->req_xfer;
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+					GFP_NOIO);
+		if (error)
+			goto put_req;
 	}
 
 	memcpy(rq->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
 		rq->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
 	blk_put_request(rq);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	struct request_sense *sense = &drive->sense_data;
 	struct request *sense_rq = &drive->sense_rq;
 	unsigned int cmd_len, sense_len;
+	int err;
 
 	debug_log("%s: enter\n", __func__);
 
@@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
-	sense_rq->rq_disk = rq->rq_disk;
 
-	sense_rq->data = sense;
+	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+			      GFP_NOIO);
+	if (unlikely(err)) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: failed to map sense buffer\n",
+			       drive->name);
+		return;
+	}
+
+	sense_rq->rq_disk = rq->rq_disk;
 	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	sense_rq->cmd[4] = cmd_len;
-	sense_rq->data_len = sense_len;
-
 	sense_rq->cmd_type = REQ_TYPE_SENSE;
 	sense_rq->cmd_flags |= REQ_PREEMPT;
 
@@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(ide_prep_sense);
 
-void ide_queue_sense_rq(ide_drive_t *drive, void *special)
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
-	BUG_ON(!drive->sense_rq_armed);
+	/* deferred failure from ide_prep_sense() */
+	if (!drive->sense_rq_armed) {
+		printk(KERN_WARNING "%s: failed queue sense request\n",
+		       drive->name);
+		return -ENOMEM;
+	}
 
 	drive->sense_rq.special = special;
 	drive->sense_rq_armed = false;
@@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special)
 
 	elv_add_request(drive->queue, &drive->sense_rq,
 			ELEVATOR_INSERT_FRONT, 0);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 
@@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive)
 	/* init pc from sense_rq */
 	ide_init_pc(pc);
 	memcpy(pc->c, sense_rq->cmd, 12);
-	pc->buf = sense_rq->data;
+	pc->buf = bio_data(sense_rq->bio);	/* pointer to mapped address */
 	pc->req_xfer = sense_rq->data_len;
 
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
-	ide_queue_sense_rq(drive, pc);
+	if (ide_queue_sense_rq(drive, pc))
+		ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	struct ide_cmd *cmd = &hwif->cmd;
 	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	xfer_func_t *xferfunc;
 	unsigned int timeout, done;
 	u16 bcount;
 	u8 stat, ireason, dsc = 0;
@@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					rq->errors = -EIO;
 			}
 
-			if (drive->media == ide_tape)
+			if (drive->media == ide_tape && !rq->bio)
 				done = ide_rq_bytes(rq); /* FIXME */
 			else
 				done = blk_rq_bytes(rq);
@@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
-
-	if (drive->media == ide_floppy && pc->buf == NULL) {
+	if (drive->media == ide_tape && pc->bh)
+		done = drive->pc_io_buffers(drive, pc, bcount, write);
+	else {
 		done = min_t(unsigned int, bcount, cmd->nleft);
 		ide_pio_bytes(drive, cmd, write, done);
-	} else if (drive->media == ide_tape && pc->bh) {
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	} else {
-		done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-		xferfunc(drive, NULL, pc->cur_pos, done);
 	}
 
 	/* Update the current position */
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 061d7bbcd34..673628790f1 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
 	/*
 	 * For REQ_TYPE_SENSE, "rq->special" points to the original
-	 * failed request
+	 * failed request.  Also, the sense data should be read
+	 * directly from rq which might be different from the original
+	 * sense buffer if it got copied during mapping.
 	 */
 	struct request *failed = (struct request *)rq->special;
-	struct request_sense *sense = &drive->sense_data;
+	void *sense = bio_data(rq->bio);
 
 	if (failed) {
 		if (failed->sense) {
@@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
 	/* if we got a CHECK_CONDITION status, queue a request sense command */
 	if (stat & ATA_ERR)
-		ide_queue_sense_rq(drive, NULL);
+		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
 	return 1;
 
 end_request:
@@ -412,8 +414,7 @@ end_request:
 
 		hwif->rq = NULL;
 
-		ide_queue_sense_rq(drive, rq);
-		return 1;
+		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
 	} else
 		return 2;
 }
@@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		rq->cmd_flags |= cmd_flags;
 		rq->timeout = timeout;
 		if (buffer) {
-			rq->data = buffer;
-			rq->data_len = *bufflen;
+			error = blk_rq_map_kern(drive->queue, rq, buffer,
+						*bufflen, GFP_NOIO);
+			if (error) {
+				blk_put_request(rq);
+				return error;
+			}
 		}
 
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	drive->dma = 0;
 
 	/* sg request */
-	if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+	if (rq->bio) {
 		struct request_queue *q = drive->queue;
+		char *buf = bio_data(rq->bio);
 		unsigned int alignment;
-		char *buf;
-
-		if (rq->bio)
-			buf = bio_data(rq->bio);
-		else
-			buf = rq->data;
 
 		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9b9e8b1aae5..3245c2dbda3 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q)
 
 	spin_unlock_irq(q->queue_lock);
 
+	/* HLD do_request() callback might sleep, make sure it's okay */
+	might_sleep();
+
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 8324dfa78a3..9b762a2d5d9 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -850,6 +850,9 @@ out:
 
 	cmd.rq = rq;
 
+	ide_init_sg_cmd(&cmd, pc->req_xfer);
+	ide_map_sg(drive, &cmd);
+
 	return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9e67ccac3c1..1957461ac76 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *drive);
 
 void ide_prep_sense(ide_drive_t *drive, struct request *rq);
-void ide_queue_sense_rq(ide_drive_t *drive, void *special);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
-- 
cgit v1.2.3-70-g09d2


From 29d1a4371035e01b0d079bc5aa88b50f5af7a566 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 19 Apr 2009 08:46:03 +0900
Subject: ide-atapi: kill unused fields and callbacks

Impact: remove fields and code paths which are no longer necessary

Now that ide-tape uses standard mechanisms to transfer data, special
case handling for bh handling can be dropped from ide-atapi.  Drop the
followings.

* pc->cur_pos, b_count, bh and b_data
* drive->pc_update_buffers() and pc_io_buffers().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ide/ide-atapi.c | 17 ++++-------------
 drivers/ide/ide-tape.c  |  1 -
 include/linux/ide.h     | 12 ------------
 3 files changed, 4 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index b9dd4503cbc..afe5a432387 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else {
+		} else
 			pc->xferred = pc->req_xfer;
-			if (drive->pc_update_buffers)
-				drive->pc_update_buffers(drive, pc);
-		}
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		return ide_do_reset(drive);
 	}
 
-	if (drive->media == ide_tape && pc->bh)
-		done = drive->pc_io_buffers(drive, pc, bcount, write);
-	else {
-		done = min_t(unsigned int, bcount, cmd->nleft);
-		ide_pio_bytes(drive, cmd, write, done);
-	}
+	done = min_t(unsigned int, bcount, cmd->nleft);
+	ide_pio_bytes(drive, cmd, write, done);
 
-	/* Update the current position */
+	/* Update transferred byte count */
 	pc->xferred += done;
-	pc->cur_pos += done;
 
 	bcount -= done;
 
@@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 
 		/* We haven't transferred any data yet */
 		pc->xferred = 0;
-		pc->cur_pos = pc->buf;
 
 		valid_tf = IDE_VALID_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 2599579e417..8dfc68892d6 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->bh = NULL;
 	pc->buf = NULL;
 	pc->buf_size = length * tape->blk_size;
 	pc->req_xfer = pc->buf_size;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1957461ac76..34c128f0a33 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -362,11 +362,7 @@ struct ide_atapi_pc {
 
 	/* data buffer */
 	u8 *buf;
-	/* current buffer position */
-	u8 *cur_pos;
 	int buf_size;
-	/* missing/available data on the current buffer */
-	int b_count;
 
 	/* the corresponding request */
 	struct request *rq;
@@ -379,10 +375,6 @@ struct ide_atapi_pc {
 	 */
 	u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-	/* idetape only */
-	struct idetape_bh *bh;
-	char *b_data;
-
 	unsigned long timeout;
 };
 
@@ -595,10 +587,6 @@ struct ide_drive_s {
 	/* callback for packet commands */
 	int  (*pc_callback)(struct ide_drive_s *, int);
 
-	void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-			      unsigned int, int);
-
 	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
 	unsigned long atapi_flags;
-- 
cgit v1.2.3-70-g09d2


From a7f557923441186a3cdbabc54f1bcacf42b63bf5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:17 +0900
Subject: block: kill blk_start_queueing()

blk_start_queueing() is identical to __blk_run_queue() except that it
doesn't check for recursion.  None of the current users depends on
blk_start_queueing() running request_fn directly.  Replace usages of
blk_start_queueing() with [__]blk_run_queue() and kill it.

[ Impact: removal of mostly duplicate interface function ]

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/as-iosched.c     |  6 +-----
 block/blk-core.c       | 28 ++--------------------------
 block/cfq-iosched.c    |  6 +++---
 block/elevator.c       |  7 +++----
 include/linux/blkdev.h |  1 -
 5 files changed, 9 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index c48fa670d22..45bd07059c2 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1312,12 +1312,8 @@ static void as_merged_requests(struct request_queue *q, struct request *req,
 static void as_work_handler(struct work_struct *work)
 {
 	struct as_data *ad = container_of(work, struct as_data, antic_work);
-	struct request_queue *q = ad->q;
-	unsigned long flags;
 
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_start_queueing(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_run_queue(ad->q);
 }
 
 static int as_may_queue(struct request_queue *q, int rw)
diff --git a/block/blk-core.c b/block/blk-core.c
index 02f53bc00e4..8b4a0af7d69 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -433,9 +433,7 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Invoke request handling on this queue, if it has pending work to do.
- *    May be used to restart queueing when a request has completed. Also
- *    See @blk_start_queueing.
- *
+ *    May be used to restart queueing when a request has completed.
  */
 void blk_run_queue(struct request_queue *q)
 {
@@ -894,28 +892,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_get_request);
 
-/**
- * blk_start_queueing - initiate dispatch of requests to device
- * @q:		request queue to kick into gear
- *
- * This is basically a helper to remove the need to know whether a queue
- * is plugged or not if someone just wants to initiate dispatch of requests
- * for this queue. Should be used to start queueing on a device outside
- * of ->request_fn() context. Also see @blk_run_queue.
- *
- * The queue lock must be held with interrupts disabled.
- */
-void blk_start_queueing(struct request_queue *q)
-{
-	if (!blk_queue_plugged(q)) {
-		if (unlikely(blk_queue_stopped(q)))
-			return;
-		q->request_fn(q);
-	} else
-		__generic_unplug_device(q);
-}
-EXPORT_SYMBOL(blk_start_queueing);
-
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
@@ -984,7 +960,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 
 	drive_stat_acct(rq, 1);
 	__elv_add_request(q, rq, where, 0);
-	blk_start_queueing(q);
+	__blk_run_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL(blk_insert_request);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a55a9bd75bd..def0c698a4b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2088,7 +2088,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
 			    cfqd->busy_queues > 1) {
 				del_timer(&cfqd->idle_slice_timer);
-				blk_start_queueing(cfqd->queue);
+			__blk_run_queue(cfqd->queue);
 			}
 			cfq_mark_cfqq_must_dispatch(cfqq);
 		}
@@ -2100,7 +2100,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * this new queue is RT and the current one is BE
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
-		blk_start_queueing(cfqd->queue);
+		__blk_run_queue(cfqd->queue);
 	}
 }
 
@@ -2345,7 +2345,7 @@ static void cfq_kick_queue(struct work_struct *work)
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
-	blk_start_queueing(q);
+	__blk_run_queue(cfqd->queue);
 	spin_unlock_irq(q->queue_lock);
 }
 
diff --git a/block/elevator.c b/block/elevator.c
index 7073a907257..2e0fb21485b 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -599,7 +599,7 @@ void elv_quiesce_start(struct request_queue *q)
 	 */
 	elv_drain_elevator(q);
 	while (q->rq.elvpriv) {
-		blk_start_queueing(q);
+		__blk_run_queue(q);
 		spin_unlock_irq(q->queue_lock);
 		msleep(10);
 		spin_lock_irq(q->queue_lock);
@@ -643,8 +643,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		 *   with anything.  There's no point in delaying queue
 		 *   processing.
 		 */
-		blk_remove_plug(q);
-		blk_start_queueing(q);
+		__blk_run_queue(q);
 		break;
 
 	case ELEVATOR_INSERT_SORT:
@@ -971,7 +970,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
 		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
-			blk_start_queueing(q);
+			__blk_run_queue(q);
 		}
 	}
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2755d5c6da2..12e20de44b6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -797,7 +797,6 @@ extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
-extern void blk_start_queueing(struct request_queue *);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v1.2.3-70-g09d2


From 5efccd17ceb0fc43837a331297c2c407969d7201 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: reorder request completion functions

Reorder request completion functions such that

* All request completion functions are located together.

* Functions which are used by only one caller is put right above the
  caller.

* end_request() is put after other completion functions but before
  blk_update_request().

This change is for completion function cleanup which will follow.

[ Impact: cleanup, code reorganization ]

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-core.c       | 144 ++++++++++++++++++++++++-------------------------
 include/linux/blkdev.h |  16 +++---
 2 files changed, 80 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index cf10dfcda99..406a93e526b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1683,6 +1683,35 @@ static void blk_account_io_done(struct request *req)
 	}
 }
 
+/**
+ * blk_rq_bytes - Returns bytes left to complete in the entire request
+ * @rq: the request being processed
+ **/
+unsigned int blk_rq_bytes(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return rq->hard_nr_sectors << 9;
+
+	return rq->data_len;
+}
+EXPORT_SYMBOL_GPL(blk_rq_bytes);
+
+/**
+ * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
+ * @rq: the request being processed
+ **/
+unsigned int blk_rq_cur_bytes(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return rq->current_nr_sectors << 9;
+
+	if (rq->bio)
+		return rq->bio->bi_size;
+
+	return rq->data_len;
+}
+EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
+
 /**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
@@ -1797,6 +1826,22 @@ static int __end_that_request_first(struct request *req, int error,
 	return 1;
 }
 
+static int end_that_request_data(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
+{
+	if (rq->bio) {
+		if (__end_that_request_first(rq, error, nr_bytes))
+			return 1;
+
+		/* Bidi request must be completed as a whole */
+		if (blk_bidi_rq(rq) &&
+		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * queue lock must be held
  */
@@ -1825,78 +1870,6 @@ static void end_that_request_last(struct request *req, int error)
 	}
 }
 
-/**
- * blk_rq_bytes - Returns bytes left to complete in the entire request
- * @rq: the request being processed
- **/
-unsigned int blk_rq_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->hard_nr_sectors << 9;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_bytes);
-
-/**
- * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
- * @rq: the request being processed
- **/
-unsigned int blk_rq_cur_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->current_nr_sectors << 9;
-
-	if (rq->bio)
-		return rq->bio->bi_size;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
-
-/**
- * end_request - end I/O on the current segment of the request
- * @req:	the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
- *
- * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
- **/
-void end_request(struct request *req, int uptodate)
-{
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_request(req, error, req->hard_cur_sectors << 9);
-}
-EXPORT_SYMBOL(end_request);
-
-static int end_that_request_data(struct request *rq, int error,
-				 unsigned int nr_bytes, unsigned int bidi_bytes)
-{
-	if (rq->bio) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
-
-		/* Bidi request must be completed as a whole */
-		if (blk_bidi_rq(rq) &&
-		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
-			return 1;
-	}
-
-	return 0;
-}
-
 /**
  * blk_end_io - Generic end_io function to complete a request.
  * @rq:           the request being processed
@@ -2006,6 +1979,33 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 }
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
+/**
+ * end_request - end I/O on the current segment of the request
+ * @req:	the request being processed
+ * @uptodate:	error value or %0/%1 uptodate flag
+ *
+ * Description:
+ *     Ends I/O on the current segment of a request. If that is the only
+ *     remaining segment, the request is also completed and freed.
+ *
+ *     This is a remnant of how older block drivers handled I/O completions.
+ *     Modern drivers typically end I/O on the full request in one go, unless
+ *     they have a residual value to account for. For that case this function
+ *     isn't really useful, unless the residual just happens to be the
+ *     full current segment. In other words, don't use this function in new
+ *     code. Use blk_end_request() or __blk_end_request() to end a request.
+ **/
+void end_request(struct request *req, int uptodate)
+{
+	int error = 0;
+
+	if (uptodate <= 0)
+		error = uptodate ? uptodate : -EIO;
+
+	__blk_end_request(req, error, req->hard_cur_sectors << 9);
+}
+EXPORT_SYMBOL(end_request);
+
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @rq:           the request being processed
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 12e20de44b6..156ffd9de96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -831,6 +831,14 @@ static inline void blk_run_address_space(struct address_space *mapping)
 
 extern void blkdev_dequeue_request(struct request *req);
 
+/*
+ * blk_end_request() takes bytes instead of sectors as a complete size.
+ * blk_rq_bytes() returns bytes left to complete in the entire request.
+ * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
+ */
+extern unsigned int blk_rq_bytes(struct request *rq);
+extern unsigned int blk_rq_cur_bytes(struct request *rq);
+
 /*
  * blk_end_request() and friends.
  * __blk_end_request() and end_request() must be called with
@@ -857,14 +865,6 @@ extern void blk_abort_queue(struct request_queue *);
 extern void blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
 
-/*
- * blk_end_request() takes bytes instead of sectors as a complete size.
- * blk_rq_bytes() returns bytes left to complete in the entire request.
- * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
- */
-extern unsigned int blk_rq_bytes(struct request *rq);
-extern unsigned int blk_rq_cur_bytes(struct request *rq);
-
 /*
  * Access functions for manipulating queue properties
  */
-- 
cgit v1.2.3-70-g09d2


From 0b302d5aa7975006fa2ec3d66386610b9b36c669 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: kill blk_end_request_callback()

With recent IDE updates, blk_end_request_callback() doesn't have any
user now.  Kill it.

[ Impact: removal of unused convoluted interface ]

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 block/blk-core.c       | 48 +++---------------------------------------------
 include/linux/blkdev.h |  3 ---
 2 files changed, 3 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 678ede23ed0..2f277ea0e59 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1971,10 +1971,6 @@ static void end_that_request_last(struct request *req, int error)
  * @error:        %0 for success, < %0 for error
  * @nr_bytes:     number of bytes to complete @rq
  * @bidi_bytes:   number of bytes to complete @rq->next_rq
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non %0, this helper returns without
- *                completion of the request.
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
@@ -1985,8 +1981,7 @@ static void end_that_request_last(struct request *req, int error)
  *     %1 - this request is not freed yet, it still has pending buffers.
  **/
 static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes,
-		      int (drv_callback)(struct request *))
+		      unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags = 0UL;
@@ -1994,10 +1989,6 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
 	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
 		return 1;
 
-	/* Special feature for tricky drivers */
-	if (drv_callback && drv_callback(rq))
-		return 1;
-
 	add_disk_randomness(rq->rq_disk);
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -2023,7 +2014,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
  **/
 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
-	return blk_end_io(rq, error, nr_bytes, 0, NULL);
+	return blk_end_io(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL_GPL(blk_end_request);
 
@@ -2070,7 +2061,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 			 unsigned int bidi_bytes)
 {
-	return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);
+	return blk_end_io(rq, error, nr_bytes, bidi_bytes);
 }
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
@@ -2131,39 +2122,6 @@ void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-/**
- * blk_end_request_callback - Special helper function for tricky drivers
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non %0, this helper returns without
- *                completion of the request.
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- *     This special helper function is used only for existing tricky drivers.
- *     (e.g. cdrom_newpc_intr() of ide-cd)
- *     This interface will be removed when such drivers are rewritten.
- *     Don't use this interface in other places anymore.
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - this request is not freed yet.
- *          this request still has pending buffers or
- *          the driver doesn't want to finish this request yet.
- **/
-int blk_end_request_callback(struct request *rq, int error,
-			     unsigned int nr_bytes,
-			     int (drv_callback)(struct request *))
-{
-	return blk_end_io(rq, error, nr_bytes, 0, drv_callback);
-}
-EXPORT_SYMBOL_GPL(blk_end_request_callback);
-
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 156ffd9de96..1fa9dcf9aa6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -855,9 +855,6 @@ extern int __blk_end_request(struct request *rq, int error,
 extern int blk_end_bidi_request(struct request *rq, int error,
 				unsigned int nr_bytes, unsigned int bidi_bytes);
 extern void end_request(struct request *, int);
-extern int blk_end_request_callback(struct request *rq, int error,
-				unsigned int nr_bytes,
-				int (drv_callback)(struct request *));
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
-- 
cgit v1.2.3-70-g09d2


From 2e60e02297cf54e367567f2d85b2ca56b1c4a906 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:18 +0900
Subject: block: clean up request completion API

Request completion has gone through several changes and became a bit
messy over the time.  Clean it up.

1. end_that_request_data() is a thin wrapper around
   end_that_request_data_first() which checks whether bio is NULL
   before doing anything and handles bidi completion.
   blk_update_request() is a thin wrapper around
   end_that_request_data() which clears nr_sectors on the last
   iteration but doesn't use the bidi completion.

   Clean it up by moving the initial bio NULL check and nr_sectors
   clearing on the last iteration into end_that_request_data() and
   renaming it to blk_update_request(), which makes blk_end_io() the
   only user of end_that_request_data().  Collapse
   end_that_request_data() into blk_end_io().

2. There are four visible completion variants - blk_end_request(),
   __blk_end_request(), blk_end_bidi_request() and end_request().
   blk_end_request() and blk_end_bidi_request() uses blk_end_request()
   as the backend but __blk_end_request() and end_request() use
   separate implementation in __blk_end_request() due to different
   locking rules.

   blk_end_bidi_request() is identical to blk_end_io().  Collapse
   blk_end_io() into blk_end_bidi_request(), separate out request
   update into internal helper blk_update_bidi_request() and add
   __blk_end_bidi_request().  Redefine [__]blk_end_request() as thin
   inline wrappers around [__]blk_end_bidi_request().

3. As the whole request issue/completion usages are about to be
   modified and audited, it's a good chance to convert completion
   functions return bool which better indicates the intended meaning
   of return values.

4. The function name end_that_request_last() is from the days when it
   was a public interface and slighly confusing.  Give it a proper
   internal name - blk_finish_request().

5. Add description explaning that blk_end_bidi_request() can be safely
   used for uni requests as suggested by Boaz Harrosh.

The only visible behavior change is from #1.  nr_sectors counts are
cleared after the final iteration no matter which function is used to
complete the request.  I couldn't find any place where the code
assumes those nr_sectors counters contain the values for the last
segment and this change is good as it makes the API much more
consistent as the end result is now same whether a request is
completed using [__]blk_end_request() alone or in combination with
blk_update_request().

API further cleaned up per Christoph's suggestion.

[ Impact: cleanup, rq->*nr_sectors always updated after req completion ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Christoph Hellwig <hch@infradead.org>
---
 block/blk-core.c       | 226 ++++++++++++++++---------------------------------
 include/linux/blkdev.h |  94 +++++++++++++++++---
 2 files changed, 157 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2f277ea0e59..89cc05d9a7a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1808,25 +1808,35 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
 }
 
 /**
- * __end_that_request_first - end I/O on a request
- * @req:      the request being processed
+ * blk_update_request - Special helper function for request stacking drivers
+ * @rq:	      the request being processed
  * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
+ * @nr_bytes: number of bytes to complete @rq
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @req, and sets it up
- *     for the next range of segments (if any) in the cluster.
+ *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
+ *     the request structure even if @rq doesn't have leftover.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ *     This special helper function is only for request stacking drivers
+ *     (e.g. request-based dm) so that they can handle partial completion.
+ *     Actual device drivers should use blk_end_request instead.
+ *
+ *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
+ *     %false return from this function.
  *
  * Return:
- *     %0 - we are done with this request, call end_that_request_last()
- *     %1 - still buffers pending for this request
+ *     %false - this request doesn't have any more data
+ *     %true  - this request has more data
  **/
-static int __end_that_request_first(struct request *req, int error,
-				    int nr_bytes)
+bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 {
 	int total_bytes, bio_nbytes, next_idx = 0;
 	struct bio *bio;
 
+	if (!req->bio)
+		return false;
+
 	trace_block_rq_complete(req->q, req);
 
 	/*
@@ -1903,8 +1913,16 @@ static int __end_that_request_first(struct request *req, int error,
 	/*
 	 * completely done
 	 */
-	if (!req->bio)
-		return 0;
+	if (!req->bio) {
+		/*
+		 * Reset counters so that the request stacking driver
+		 * can find how many bytes remain in the request
+		 * later.
+		 */
+		req->nr_sectors = req->hard_nr_sectors = 0;
+		req->current_nr_sectors = req->hard_cur_sectors = 0;
+		return false;
+	}
 
 	/*
 	 * if the request wasn't completed, update state
@@ -1918,29 +1936,31 @@ static int __end_that_request_first(struct request *req, int error,
 
 	blk_recalc_rq_sectors(req, total_bytes >> 9);
 	blk_recalc_rq_segments(req);
-	return 1;
+	return true;
 }
+EXPORT_SYMBOL_GPL(blk_update_request);
 
-static int end_that_request_data(struct request *rq, int error,
-				 unsigned int nr_bytes, unsigned int bidi_bytes)
+static bool blk_update_bidi_request(struct request *rq, int error,
+				    unsigned int nr_bytes,
+				    unsigned int bidi_bytes)
 {
-	if (rq->bio) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
+	if (blk_update_request(rq, error, nr_bytes))
+		return true;
 
-		/* Bidi request must be completed as a whole */
-		if (blk_bidi_rq(rq) &&
-		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
-			return 1;
-	}
+	/* Bidi request must be completed as a whole */
+	if (unlikely(blk_bidi_rq(rq)) &&
+	    blk_update_request(rq->next_rq, error, bidi_bytes))
+		return true;
 
-	return 0;
+	add_disk_randomness(rq->rq_disk);
+
+	return false;
 }
 
 /*
  * queue lock must be held
  */
-static void end_that_request_last(struct request *req, int error)
+static void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
@@ -1966,161 +1986,65 @@ static void end_that_request_last(struct request *req, int error)
 }
 
 /**
- * blk_end_io - Generic end_io function to complete a request.
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete @rq
- * @bidi_bytes:   number of bytes to complete @rq->next_rq
+ * blk_end_bidi_request - Complete a bidi request
+ * @rq:         the request to complete
+ * @error:      %0 for success, < %0 for error
+ * @nr_bytes:   number of bytes to complete @rq
+ * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- *     If @rq has leftover, sets it up for the next range of segments.
+ *     Drivers that supports bidi can safely call this member for any
+ *     type of request, bidi or uni.  In the later case @bidi_bytes is
+ *     just ignored.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - this request is not freed yet, it still has pending buffers.
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes)
+bool blk_end_bidi_request(struct request *rq, int error,
+			  unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
-	unsigned long flags = 0UL;
-
-	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
-		return 1;
+	unsigned long flags;
 
-	add_disk_randomness(rq->rq_disk);
+	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+		return true;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	end_that_request_last(rq, error);
+	blk_finish_request(rq, error);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	return 0;
-}
-
-/**
- * blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
-{
-	return blk_end_io(rq, error, nr_bytes, 0);
-}
-EXPORT_SYMBOL_GPL(blk_end_request);
-
-/**
- * __blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Must be called with queue lock held unlike blk_end_request().
- *
- * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
-{
-	if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
-		return 1;
-
-	add_disk_randomness(rq->rq_disk);
-
-	end_that_request_last(rq, error);
-
-	return 0;
+	return false;
 }
-EXPORT_SYMBOL_GPL(__blk_end_request);
+EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
- * blk_end_bidi_request - Helper function for drivers to complete bidi request.
- * @rq:         the bidi request being processed
+ * __blk_end_bidi_request - Complete a bidi request with queue lock held
+ * @rq:         the request to complete
  * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
  * Description:
- *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
+ *     Identical to blk_end_bidi_request() except that queue lock is
+ *     assumed to be locked on entry and remains so on return.
  *
  * Return:
- *     %0 - we are done with this request
- *     %1 - still buffers pending for this request
- **/
-int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
-			 unsigned int bidi_bytes)
-{
-	return blk_end_io(rq, error, nr_bytes, bidi_bytes);
-}
-EXPORT_SYMBOL_GPL(blk_end_bidi_request);
-
-/**
- * end_request - end I/O on the current segment of the request
- * @req:	the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
- *
- * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  **/
-void end_request(struct request *req, int uptodate)
+bool __blk_end_bidi_request(struct request *rq, int error,
+			    unsigned int nr_bytes, unsigned int bidi_bytes)
 {
-	int error = 0;
+	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+		return true;
 
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
+	blk_finish_request(rq, error);
 
-	__blk_end_request(req, error, req->hard_cur_sectors << 9);
-}
-EXPORT_SYMBOL(end_request);
-
-/**
- * blk_update_request - Special helper function for request stacking drivers
- * @rq:           the request being processed
- * @error:        %0 for success, < %0 for error
- * @nr_bytes:     number of bytes to complete @rq
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
- *     the request structure even if @rq doesn't have leftover.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- *     This special helper function is only for request stacking drivers
- *     (e.g. request-based dm) so that they can handle partial completion.
- *     Actual device drivers should use blk_end_request instead.
- */
-void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
-{
-	if (!end_that_request_data(rq, error, nr_bytes, 0)) {
-		/*
-		 * These members are not updated in end_that_request_data()
-		 * when all bios are completed.
-		 * Update them so that the request stacking driver can find
-		 * how many bytes remain in the request later.
-		 */
-		rq->nr_sectors = rq->hard_nr_sectors = 0;
-		rq->current_nr_sectors = rq->hard_cur_sectors = 0;
-	}
+	return false;
 }
-EXPORT_SYMBOL_GPL(blk_update_request);
+EXPORT_SYMBOL_GPL(__blk_end_bidi_request);
 
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1fa9dcf9aa6..501f6845cc7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -840,27 +840,97 @@ extern unsigned int blk_rq_bytes(struct request *rq);
 extern unsigned int blk_rq_cur_bytes(struct request *rq);
 
 /*
- * blk_end_request() and friends.
- * __blk_end_request() and end_request() must be called with
- * the request queue spinlock acquired.
+ * Request completion related functions.
+ *
+ * blk_update_request() completes given number of bytes and updates
+ * the request without completing it.
+ *
+ * blk_end_request() and friends.  __blk_end_request() and
+ * end_request() must be called with the request queue spinlock
+ * acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern int blk_end_request(struct request *rq, int error,
-				unsigned int nr_bytes);
-extern int __blk_end_request(struct request *rq, int error,
-				unsigned int nr_bytes);
-extern int blk_end_bidi_request(struct request *rq, int error,
-				unsigned int nr_bytes, unsigned int bidi_bytes);
-extern void end_request(struct request *, int);
+extern bool blk_update_request(struct request *rq, int error,
+			       unsigned int nr_bytes);
+extern bool blk_end_bidi_request(struct request *rq, int error,
+				 unsigned int nr_bytes,
+				 unsigned int bidi_bytes);
+extern bool __blk_end_bidi_request(struct request *rq, int error,
+				   unsigned int nr_bytes,
+				   unsigned int bidi_bytes);
+
+/**
+ * blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+static inline bool blk_end_request(struct request *rq, int error,
+				   unsigned int nr_bytes)
+{
+	return blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+
+/**
+ * __blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Must be called with queue lock held unlike blk_end_request().
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+static inline bool __blk_end_request(struct request *rq, int error,
+				     unsigned int nr_bytes)
+{
+	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+
+/**
+ * end_request - end I/O on the current segment of the request
+ * @rq:		the request being processed
+ * @uptodate:	error value or %0/%1 uptodate flag
+ *
+ * Description:
+ *     Ends I/O on the current segment of a request. If that is the only
+ *     remaining segment, the request is also completed and freed.
+ *
+ *     This is a remnant of how older block drivers handled I/O completions.
+ *     Modern drivers typically end I/O on the full request in one go, unless
+ *     they have a residual value to account for. For that case this function
+ *     isn't really useful, unless the residual just happens to be the
+ *     full current segment. In other words, don't use this function in new
+ *     code. Use blk_end_request() or __blk_end_request() to end a request.
+ **/
+static inline void end_request(struct request *rq, int uptodate)
+{
+	int error = 0;
+
+	if (uptodate <= 0)
+		error = uptodate ? uptodate : -EIO;
+
+	__blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0);
+}
+
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
-extern void blk_update_request(struct request *rq, int error,
-			       unsigned int nr_bytes);
 
 /*
  * Access functions for manipulating queue properties
-- 
cgit v1.2.3-70-g09d2


From 40cbbb781d3eba5d6ac0860db078af490e5c7c6b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:19 +0900
Subject: block: implement and use [__]blk_end_request_all()

There are many [__]blk_end_request() call sites which call it with
full request length and expect full completion.  Many of them ensure
that the request actually completes by doing BUG_ON() the return
value, which is awkward and error-prone.

This patch adds [__]blk_end_request_all() which takes @rq and @error
and fully completes the request.  BUG_ON() is added to to ensure that
this actually happens.

Most conversions are simple but there are a few noteworthy ones.

* cdrom/viocd: viocd_end_request() replaced with direct calls to
  __blk_end_request_all().

* s390/block/dasd: dasd_end_request() replaced with direct calls to
  __blk_end_request_all().

* s390/char/tape_block: tapeblock_end_request() replaced with direct
  calls to blk_end_request_all().

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Mike Miller <mike.miller@hp.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 arch/arm/plat-omap/mailbox.c        | 11 +++--------
 block/blk-barrier.c                 |  9 ++-------
 block/blk-core.c                    |  2 +-
 block/elevator.c                    |  2 +-
 drivers/block/cpqarray.c            |  3 +--
 drivers/block/sx8.c                 |  3 +--
 drivers/block/virtio_blk.c          |  2 +-
 drivers/block/xen-blkfront.c        |  4 +---
 drivers/cdrom/gdrom.c               |  2 +-
 drivers/cdrom/viocd.c               | 25 ++++---------------------
 drivers/memstick/core/mspro_block.c |  2 +-
 drivers/s390/block/dasd.c           | 17 ++++-------------
 drivers/s390/char/tape_block.c      | 15 ++++-----------
 drivers/scsi/scsi_lib.c             |  2 +-
 include/linux/blkdev.h              | 32 ++++++++++++++++++++++++++++++++
 15 files changed, 58 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 0abfbaa5987..cf81bad8aec 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -192,8 +192,7 @@ static void mbox_tx_work(struct work_struct *work)
 		}
 
 		spin_lock(q->queue_lock);
-		if (__blk_end_request(rq, 0, 0))
-			BUG();
+		__blk_end_request_all(rq, 0);
 		spin_unlock(q->queue_lock);
 	}
 }
@@ -224,10 +223,7 @@ static void mbox_rx_work(struct work_struct *work)
 			break;
 
 		msg = (mbox_msg_t) rq->data;
-
-		if (blk_end_request(rq, 0, 0))
-			BUG();
-
+		blk_end_request_all(rq, 0);
 		mbox->rxq->callback((void *)msg);
 	}
 }
@@ -337,8 +333,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf)
 
 		*p = (mbox_msg_t) rq->data;
 
-		if (blk_end_request(rq, 0, 0))
-			BUG();
+		blk_end_request_all(rq, 0);
 
 		if (unlikely(mbox_seq_test(mbox, *p))) {
 			pr_info("mbox: Illegal seq bit!(%08x) ignored\n", *p);
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 20b4111fa05..c8d087655ef 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -106,10 +106,7 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	 */
 	q->ordseq = 0;
 	rq = q->orig_bar_rq;
-
-	if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
-		BUG();
-
+	__blk_end_request_all(rq, q->orderr);
 	return true;
 }
 
@@ -252,9 +249,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 			 * with prejudice.
 			 */
 			elv_dequeue_request(q, rq);
-			if (__blk_end_request(rq, -EOPNOTSUPP,
-					      blk_rq_bytes(rq)))
-				BUG();
+			__blk_end_request_all(rq, -EOPNOTSUPP);
 			*rqp = NULL;
 			return false;
 		}
diff --git a/block/blk-core.c b/block/blk-core.c
index b84250d3019..0520cc70458 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1780,7 +1780,7 @@ struct request *elv_next_request(struct request_queue *q)
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
-			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+			__blk_end_request_all(rq, -EIO);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
diff --git a/block/elevator.c b/block/elevator.c
index b03b8752e18..1af5d9f04af 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -810,7 +810,7 @@ void elv_abort_queue(struct request_queue *q)
 		rq = list_entry_rq(q->queue_head.next);
 		rq->cmd_flags |= REQ_QUIET;
 		trace_block_rq_abort(q, rq);
-		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+		__blk_end_request_all(rq, -EIO);
 	}
 }
 EXPORT_SYMBOL(elv_abort_queue);
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index ca268ca1115..488a8f4a60a 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -1024,8 +1024,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout)
 				cmd->req.sg[i].size, ddir);
 
 	DBGPX(printk("Done with %p\n", rq););
-	if (__blk_end_request(rq, error, blk_rq_bytes(rq)))
-		BUG();
+	__blk_end_request_all(rq, error);
 }
 
 /*
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index ff0448e4bf0..60e85bb6f79 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -749,8 +749,7 @@ static inline void carm_end_request_queued(struct carm_host *host,
 	struct request *req = crq->rq;
 	int rc;
 
-	rc = __blk_end_request(req, error, blk_rq_bytes(req));
-	assert(rc == 0);
+	__blk_end_request_all(req, error);
 
 	rc = carm_put_request(host, crq);
 	assert(rc == 0);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5d34764c8a8..50745e64414 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -62,7 +62,7 @@ static void blk_done(struct virtqueue *vq)
 			break;
 		}
 
-		__blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req));
+		__blk_end_request_all(vbr->req, error);
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
 	}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8f905089b72..cd6cfe3b51e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -551,7 +551,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 	for (i = info->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
-		int ret;
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		id   = bret->id;
@@ -578,8 +577,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			ret = __blk_end_request(req, error, blk_rq_bytes(req));
-			BUG_ON(ret);
+			__blk_end_request_all(req, error);
 			break;
 		default:
 			BUG();
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 2eecb779437..fee9a9e83fc 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -632,7 +632,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		* before handling ending the request */
 		spin_lock(&gdrom_lock);
 		list_del_init(&req->queuelist);
-		__blk_end_request(req, err, blk_rq_bytes(req));
+		__blk_end_request_all(req, err);
 	}
 	spin_unlock(&gdrom_lock);
 	kfree(read_command);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 13929356135..cc3efa096e1 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -291,23 +291,6 @@ static int send_request(struct request *req)
 	return 0;
 }
 
-static void viocd_end_request(struct request *req, int error)
-{
-	int nsectors = req->hard_nr_sectors;
-
-	/*
-	 * Make sure it's fully ended, and ensure that we process
-	 * at least one sector.
-	 */
-	if (blk_pc_request(req))
-		nsectors = (req->data_len + 511) >> 9;
-	if (!nsectors)
-		nsectors = 1;
-
-	if (__blk_end_request(req, error, nsectors << 9))
-		BUG();
-}
-
 static int rwreq;
 
 static void do_viocd_request(struct request_queue *q)
@@ -316,11 +299,11 @@ static void do_viocd_request(struct request_queue *q)
 
 	while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) {
 		if (!blk_fs_request(req))
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
 			printk(VIOCD_KERN_WARNING
 					"unable to send message to OS/400!");
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		} else
 			rwreq++;
 	}
@@ -531,9 +514,9 @@ return_complete:
 					"with rc %d:0x%04X: %s\n",
 					req, event->xRc,
 					bevent->sub_result, err->msg);
-			viocd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 		} else
-			viocd_end_request(req, 0);
+			__blk_end_request_all(req, 0);
 
 		/* restart handling of incoming requests */
 		spin_unlock_irqrestore(&viocd_reqlock, flags);
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index de143deb06f..a41634699f8 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -826,7 +826,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
 	if (msb->eject) {
 		while ((req = elv_next_request(q)) != NULL)
-			__blk_end_request(req, -ENODEV, blk_rq_bytes(req));
+			__blk_end_request_all(req, -ENODEV);
 
 		return;
 	}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index d1815272c43..fabec95686b 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1613,15 +1613,6 @@ void dasd_block_clear_timer(struct dasd_block *block)
 	del_timer(&block->timer);
 }
 
-/*
- * posts the buffer_cache about a finalized request
- */
-static inline void dasd_end_request(struct request *req, int error)
-{
-	if (__blk_end_request(req, error, blk_rq_bytes(req)))
-		BUG();
-}
-
 /*
  * Process finished error recovery ccw.
  */
@@ -1676,7 +1667,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "Rejecting write request %p",
 				      req);
 			blkdev_dequeue_request(req);
-			dasd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -1705,7 +1696,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "on request %p",
 				      PTR_ERR(cqr), req);
 			blkdev_dequeue_request(req);
-			dasd_end_request(req, -EIO);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		/*
@@ -1731,7 +1722,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 	status = cqr->block->base->discipline->free_cp(cqr, req);
 	if (status <= 0)
 		error = status ? status : -EIO;
-	dasd_end_request(req, error);
+	__blk_end_request_all(req, error);
 }
 
 /*
@@ -2040,7 +2031,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 	spin_lock_irq(&block->request_queue_lock);
 	while ((req = elv_next_request(block->request_queue))) {
 		blkdev_dequeue_request(req);
-		dasd_end_request(req, -EIO);
+		__blk_end_request_all(req, -EIO);
 	}
 	spin_unlock_irq(&block->request_queue_lock);
 }
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index f32e89e7c4f..86596d3813b 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -73,13 +73,6 @@ tapeblock_trigger_requeue(struct tape_device *device)
 /*
  * Post finished request.
  */
-static void
-tapeblock_end_request(struct request *req, int error)
-{
-	if (blk_end_request(req, error, blk_rq_bytes(req)))
-		BUG();
-}
-
 static void
 __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 {
@@ -90,7 +83,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 
 	device = ccw_req->device;
 	req = (struct request *) data;
-	tapeblock_end_request(req, (ccw_req->rc == 0) ? 0 : -EIO);
+	blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO);
 	if (ccw_req->rc == 0)
 		/* Update position. */
 		device->blk_data.block_position =
@@ -118,7 +111,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req)
 	ccw_req = device->discipline->bread(device, req);
 	if (IS_ERR(ccw_req)) {
 		DBF_EVENT(1, "TBLOCK: bread failed\n");
-		tapeblock_end_request(req, -EIO);
+		blk_end_request_all(req, -EIO);
 		return PTR_ERR(ccw_req);
 	}
 	ccw_req->callback = __tapeblock_end_request;
@@ -131,7 +124,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req)
 		 * Start/enqueueing failed. No retries in
 		 * this case.
 		 */
-		tapeblock_end_request(req, -EIO);
+		blk_end_request_all(req, -EIO);
 		device->discipline->free_bread(ccw_req);
 	}
 
@@ -177,7 +170,7 @@ tapeblock_requeue(struct work_struct *work) {
 			DBF_EVENT(1, "TBLOCK: Rejecting write request\n");
 			blkdev_dequeue_request(req);
 			spin_unlock_irq(&device->blk_data.request_queue_lock);
-			tapeblock_end_request(req, -EIO);
+			blk_end_request_all(req, -EIO);
 			spin_lock_irq(&device->blk_data.request_queue_lock);
 			continue;
 		}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d1cb64ad1a3..756ac7c93de 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -922,7 +922,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			if (driver_byte(result) & DRIVER_SENSE)
 				scsi_print_sense("", cmd);
 		}
-		blk_end_request(req, -EIO, blk_rq_bytes(req));
+		blk_end_request_all(req, -EIO);
 		scsi_next_command(cmd);
 		break;
 	case ACTION_REPREP:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 501f6845cc7..e33c8356b3d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -882,6 +882,22 @@ static inline bool blk_end_request(struct request *rq, int error,
 	return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 
+/**
+ * blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.
+ */
+static inline void blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+
+	pending = blk_end_request(rq, error, blk_rq_bytes(rq));
+	BUG_ON(pending);
+}
+
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
@@ -901,6 +917,22 @@ static inline bool __blk_end_request(struct request *rq, int error,
 	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 
+/**
+ * __blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.  Must be called with queue lock held.
+ */
+static inline void __blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+
+	pending = __blk_end_request(rq, error, blk_rq_bytes(rq));
+	BUG_ON(pending);
+}
+
 /**
  * end_request - end I/O on the current segment of the request
  * @rq:		the request being processed
-- 
cgit v1.2.3-70-g09d2


From f06d9a2b52e246a66b606130cea3f0d7b7be17a7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:19 +0900
Subject: block: replace end_request() with [__]blk_end_request_cur()

end_request() has been kept around for backward compatibility;
however, it's about time for it to go away.

* There aren't too many users left.

* Its use of @updtodate is pretty confusing.

* In some cases, newer code ends up using mixture of end_request() and
  [__]blk_end_request[_all](), which is way too confusing.

So, add [__]blk_end_request_cur() and replace end_request() with it.
Most conversions are straightforward.  Noteworthy ones are...

* paride/pcd: next_request() updated to take 0/-errno instead of 1/0.

* paride/pf: pf_end_request() and next_request() updated to take
  0/-errno instead of 1/0.

* xd: xd_readwrite() updated to return 0/-errno instead of 1/0.

* mtd/mtd_blkdevs: blktrans_discard_request() updated to return
  0/-errno instead of 1/0.  Unnecessary local variable res
  initialization removed from mtd_blktrans_thread().

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Joerg Dorchain <joerg@dorchain.net>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Laurent Vivier <Laurent@lvivier.info>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: unsik Kim <donari75@gmail.com>
---
 drivers/block/amiflop.c         | 10 ++++-----
 drivers/block/ataflop.c         | 14 ++++++-------
 drivers/block/hd.c              | 14 ++++++-------
 drivers/block/mg_disk.c         | 16 +++++++-------
 drivers/block/paride/pcd.c      | 12 +++++------
 drivers/block/paride/pd.c       |  5 +++--
 drivers/block/paride/pf.c       | 28 ++++++++++++-------------
 drivers/block/ps3disk.c         |  6 +++---
 drivers/block/swim.c            | 14 ++++++-------
 drivers/block/swim3.c           | 26 +++++++++++------------
 drivers/block/xd.c              | 15 +++++++-------
 drivers/block/xen-blkfront.c    |  2 +-
 drivers/block/xsysace.c         |  4 ++--
 drivers/block/z2ram.c           |  4 ++--
 drivers/cdrom/gdrom.c           |  6 +++---
 drivers/message/i2o/i2o_block.c |  2 +-
 drivers/mtd/mtd_blkdevs.c       | 22 ++++++++++----------
 drivers/sbus/char/jsflash.c     |  8 +++----
 include/linux/blkdev.h          | 46 ++++++++++++++++++++---------------------
 19 files changed, 128 insertions(+), 126 deletions(-)

(limited to 'include')

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8df436ff706..b99a2a606d0 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1359,7 +1359,7 @@ static void redo_fd_request(void)
 #endif
 		block = CURRENT->sector + cnt;
 		if ((int)block > floppy->blocks) {
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 
@@ -1373,11 +1373,11 @@ static void redo_fd_request(void)
 
 		if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) {
 			printk(KERN_WARNING "do_fd_request: unknown command\n");
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 		if (get_track(drive, track) == -1) {
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 
@@ -1391,7 +1391,7 @@ static void redo_fd_request(void)
 
 			/* keep the drive spinning while writes are scheduled */
 			if (!fd_motor_on(drive)) {
-				end_request(CURRENT, 0);
+				__blk_end_request_cur(CURRENT, -EIO);
 				goto repeat;
 			}
 			/*
@@ -1410,7 +1410,7 @@ static void redo_fd_request(void)
 	CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 	CURRENT->sector += CURRENT->current_nr_sectors;
 
-	end_request(CURRENT, 1);
+	__blk_end_request_cur(CURRENT, 0);
 	goto repeat;
 }
 
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4234c11c1e4..44a8702136a 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -612,7 +612,7 @@ static void fd_error( void )
 	CURRENT->errors++;
 	if (CURRENT->errors >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 	}
 	else if (CURRENT->errors == RECALIBRATE_ERRORS) {
 		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -734,7 +734,7 @@ static void do_fd_action( int drive )
 			/* all sectors finished */
 			CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 			CURRENT->sector += CURRENT->current_nr_sectors;
-			end_request(CURRENT, 1);
+			__blk_end_request_cur(CURRENT, 0);
 			redo_fd_request();
 			return;
 		    }
@@ -1141,7 +1141,7 @@ static void fd_rwsec_done1(int status)
 		/* all sectors finished */
 		CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 		CURRENT->sector += CURRENT->current_nr_sectors;
-		end_request(CURRENT, 1);
+		__blk_end_request_cur(CURRENT, 0);
 		redo_fd_request();
 	}
 	return;
@@ -1414,7 +1414,7 @@ repeat:
 	if (!UD.connected) {
 		/* drive not connected */
 		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 		goto repeat;
 	}
 		
@@ -1430,12 +1430,12 @@ repeat:
 		/* user supplied disk type */
 		if (--type >= NUM_DISK_MINORS) {
 			printk(KERN_WARNING "fd%d: invalid disk format", drive );
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 		if (minor2disktype[type].drive_types > DriveType)  {
 			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-			end_request(CURRENT, 0);
+			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
 		}
 		type = minor2disktype[type].index;
@@ -1445,7 +1445,7 @@ repeat:
 	}
 	
 	if (CURRENT->sector + 1 > UDT->blocks) {
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 		goto repeat;
 	}
 
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index baaa9e486e5..5cb300b81c6 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -410,7 +410,7 @@ static void bad_rw_intr(void)
 	if (req != NULL) {
 		struct hd_i_struct *disk = req->rq_disk->private_data;
 		if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			disk->special_op = disk->recalibrate = 1;
 		} else if (req->errors % RESET_FREQ == 0)
 			reset = 1;
@@ -466,7 +466,7 @@ ok_to_read:
 		req->buffer+512);
 #endif
 	if (req->current_nr_sectors <= 0)
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	if (i > 0) {
 		SET_HANDLER(&read_intr);
 		return;
@@ -505,7 +505,7 @@ ok_to_write:
 	--req->current_nr_sectors;
 	req->buffer += 512;
 	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	if (i > 0) {
 		SET_HANDLER(&write_intr);
 		outsw(HD_DATA, req->buffer, 256);
@@ -548,7 +548,7 @@ static void hd_times_out(unsigned long dummy)
 #ifdef DEBUG
 		printk("%s: too many errors\n", name);
 #endif
-		end_request(CURRENT, 0);
+		__blk_end_request_cur(CURRENT, -EIO);
 	}
 	hd_request();
 	spin_unlock_irq(hd_queue->queue_lock);
@@ -563,7 +563,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req)
 	}
 	if (disk->head > 16) {
 		printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 	}
 	disk->special_op = 0;
 	return 1;
@@ -607,7 +607,7 @@ repeat:
 	    ((block+nsect) > get_capacity(req->rq_disk))) {
 		printk("%s: bad access: block=%d, count=%d\n",
 			req->rq_disk->disk_name, block, nsect);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		goto repeat;
 	}
 
@@ -647,7 +647,7 @@ repeat:
 			break;
 		default:
 			printk("unknown hd-command\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			break;
 		}
 	}
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index f3898353d0a..408c2bd8a43 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -285,7 +285,7 @@ static void mg_bad_rw_intr(struct mg_host *host)
 	if (req != NULL)
 		if (++req->errors >= MG_MAX_ERRORS ||
 				host->error == MG_ERR_TIMEOUT)
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 }
 
 static unsigned int mg_out(struct mg_host *host,
@@ -351,7 +351,7 @@ static void mg_read(struct request *req)
 
 		if (req->current_nr_sectors <= 0) {
 			MG_DBG("remain : %d sects\n", remains);
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			if (remains > 0)
 				req = elv_next_request(host->breq);
 		}
@@ -395,7 +395,7 @@ static void mg_write(struct request *req)
 
 		if (req->current_nr_sectors <= 0) {
 			MG_DBG("remain : %d sects\n", remains);
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			if (remains > 0)
 				req = elv_next_request(host->breq);
 		}
@@ -448,7 +448,7 @@ ok_to_read:
 
 	/* let know if current segment done */
 	if (req->current_nr_sectors <= 0)
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 
 	/* set handler if read remains */
 	if (i > 0) {
@@ -497,7 +497,7 @@ ok_to_write:
 
 	/* let know if current segment or all done */
 	if (!i || (req->bio && req->current_nr_sectors <= 0))
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 
 	/* write 1 sector and set handler if remains */
 	if (i > 0) {
@@ -563,7 +563,7 @@ static void mg_request_poll(struct request_queue *q)
 			default:
 				printk(KERN_WARNING "%s:%d unknown command\n",
 						__func__, __LINE__);
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				break;
 			}
 		}
@@ -617,7 +617,7 @@ static unsigned int mg_issue_req(struct request *req,
 	default:
 		printk(KERN_WARNING "%s:%d unknown command\n",
 				__func__, __LINE__);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		break;
 	}
 	return MG_ERR_NONE;
@@ -655,7 +655,7 @@ static void mg_request(struct request_queue *q)
 					"%s: bad access: sector=%d, count=%d\n",
 					req->rq_disk->disk_name,
 					sect_num, sect_cnt);
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index e91d4b4b014..9fd57c2aa46 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -735,16 +735,16 @@ static void do_pcd_request(struct request_queue * q)
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
 		} else
-			end_request(pcd_req, 0);
+			__blk_end_request_cur(pcd_req, -EIO);
 	}
 }
 
-static inline void next_request(int success)
+static inline void next_request(int err)
 {
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pcd_lock, saved_flags);
-	end_request(pcd_req, success);
+	__blk_end_request_cur(pcd_req, err);
 	pcd_busy = 0;
 	do_pcd_request(pcd_queue);
 	spin_unlock_irqrestore(&pcd_lock, saved_flags);
@@ -781,7 +781,7 @@ static void pcd_start(void)
 
 	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
 		pcd_bufblk = -1;
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
@@ -796,7 +796,7 @@ static void do_pcd_read(void)
 	pcd_retries = 0;
 	pcd_transfer();
 	if (!pcd_count) {
-		next_request(1);
+		next_request(0);
 		return;
 	}
 
@@ -815,7 +815,7 @@ static void do_pcd_read_drq(void)
 			return;
 		}
 		pcd_bufblk = -1;
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 9299455b0af..0732df4e901 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -410,7 +410,8 @@ static void run_fsm(void)
 				pd_claimed = 0;
 				phase = NULL;
 				spin_lock_irqsave(&pd_lock, saved_flags);
-				end_request(pd_req, res);
+				__blk_end_request_cur(pd_req,
+						      res == Ok ? 0 : -EIO);
 				pd_req = elv_next_request(pd_queue);
 				if (!pd_req)
 					stop = 1;
@@ -477,7 +478,7 @@ static int pd_next_buf(void)
 	if (pd_count)
 		return 0;
 	spin_lock_irqsave(&pd_lock, saved_flags);
-	end_request(pd_req, 1);
+	__blk_end_request_cur(pd_req, 0);
 	pd_count = pd_req->current_nr_sectors;
 	pd_buf = pd_req->buffer;
 	spin_unlock_irqrestore(&pd_lock, saved_flags);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index bef3b997ba3..3871e3586d6 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -750,10 +750,10 @@ static int pf_ready(void)
 
 static struct request_queue *pf_queue;
 
-static void pf_end_request(int uptodate)
+static void pf_end_request(int err)
 {
 	if (pf_req) {
-		end_request(pf_req, uptodate);
+		__blk_end_request_cur(pf_req, err);
 		pf_req = NULL;
 	}
 }
@@ -773,7 +773,7 @@ repeat:
 	pf_count = pf_req->current_nr_sectors;
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		pf_end_request(0);
+		pf_end_request(-EIO);
 		goto repeat;
 	}
 
@@ -788,7 +788,7 @@ repeat:
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		pf_end_request(0);
+		pf_end_request(-EIO);
 		goto repeat;
 	}
 }
@@ -805,7 +805,7 @@ static int pf_next_buf(void)
 		return 1;
 	if (!pf_count) {
 		spin_lock_irqsave(&pf_spin_lock, saved_flags);
-		pf_end_request(1);
+		pf_end_request(0);
 		pf_req = elv_next_request(pf_queue);
 		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 		if (!pf_req)
@@ -816,12 +816,12 @@ static int pf_next_buf(void)
 	return 0;
 }
 
-static inline void next_request(int success)
+static inline void next_request(int err)
 {
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
-	pf_end_request(success);
+	pf_end_request(err);
 	pf_busy = 0;
 	do_pf_request(pf_queue);
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
@@ -844,7 +844,7 @@ static void do_pf_read_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_read_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 	pf_mask = STAT_DRQ;
@@ -863,7 +863,7 @@ static void do_pf_read_drq(void)
 				pi_do_claimed(pf_current->pi, do_pf_read_start);
 				return;
 			}
-			next_request(0);
+			next_request(-EIO);
 			return;
 		}
 		pi_read_block(pf_current->pi, pf_buf, 512);
@@ -871,7 +871,7 @@ static void do_pf_read_drq(void)
 			break;
 	}
 	pi_disconnect(pf_current->pi);
-	next_request(1);
+	next_request(0);
 }
 
 static void do_pf_write(void)
@@ -890,7 +890,7 @@ static void do_pf_write_start(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 
@@ -903,7 +903,7 @@ static void do_pf_write_start(void)
 				pi_do_claimed(pf_current->pi, do_pf_write_start);
 				return;
 			}
-			next_request(0);
+			next_request(-EIO);
 			return;
 		}
 		pi_write_block(pf_current->pi, pf_buf, 512);
@@ -923,11 +923,11 @@ static void do_pf_write_done(void)
 			pi_do_claimed(pf_current->pi, do_pf_write_start);
 			return;
 		}
-		next_request(0);
+		next_request(-EIO);
 		return;
 	}
 	pi_disconnect(pf_current->pi);
-	next_request(1);
+	next_request(0);
 }
 
 static int __init pf_init(void)
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index bccc42bb921..d23b54bc2f5 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
 			__LINE__, op, res);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		return 0;
 	}
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
 	if (res) {
 		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
 			__func__, __LINE__, res);
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 		return 0;
 	}
 
@@ -205,7 +205,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 				break;
 		} else {
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 	}
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index d22cc385693..6544a7b06bf 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -532,39 +532,39 @@ static void redo_fd_request(struct request_queue *q)
 
 		fs = req->rq_disk->private_data;
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (req->current_nr_sectors == 0) {
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			continue;
 		}
 		if (!fs->disk_in) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (rq_data_dir(req) == WRITE) {
 			if (fs->write_protected) {
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
 		}
 		switch (rq_data_dir(req)) {
 		case WRITE:
 			/* NOT IMPLEMENTED */
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			break;
 		case READ:
 			if (floppy_read_sectors(fs, req->sector,
 						req->current_nr_sectors,
 						req->buffer)) {
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
 			req->nr_sectors -= req->current_nr_sectors;
 			req->sector += req->current_nr_sectors;
 			req->buffer += req->current_nr_sectors * 512;
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			break;
 		}
 	}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 612965307ba..5904f7b73c6 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -320,15 +320,15 @@ static void start_request(struct floppy_state *fs)
 #endif
 
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (req->current_nr_sectors == 0) {
-			end_request(req, 1);
+			__blk_end_request_cur(req, 0);
 			continue;
 		}
 		if (fs->ejected) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
@@ -336,7 +336,7 @@ static void start_request(struct floppy_state *fs)
 			if (fs->write_prot < 0)
 				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 			if (fs->write_prot) {
-				end_request(req, 0);
+				__blk_end_request_cur(req, -EIO);
 				continue;
 			}
 		}
@@ -508,7 +508,7 @@ static void act(struct floppy_state *fs)
 		case do_transfer:
 			if (fs->cur_cyl != fs->req_cyl) {
 				if (fs->retries > 5) {
-					end_request(fd_req, 0);
+					__blk_end_request_cur(fd_req, -EIO);
 					fs->state = idle;
 					return;
 				}
@@ -540,7 +540,7 @@ static void scan_timeout(unsigned long data)
 	out_8(&sw->intr_enable, 0);
 	fs->cur_cyl = -1;
 	if (fs->retries > 5) {
-		end_request(fd_req, 0);
+		__blk_end_request_cur(fd_req, -EIO);
 		fs->state = idle;
 		start_request(fs);
 	} else {
@@ -559,7 +559,7 @@ static void seek_timeout(unsigned long data)
 	out_8(&sw->select, RELAX);
 	out_8(&sw->intr_enable, 0);
 	printk(KERN_ERR "swim3: seek timeout\n");
-	end_request(fd_req, 0);
+	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -583,7 +583,7 @@ static void settle_timeout(unsigned long data)
 		return;
 	}
 	printk(KERN_ERR "swim3: seek settle timeout\n");
-	end_request(fd_req, 0);
+	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -615,7 +615,7 @@ static void xfer_timeout(unsigned long data)
 	fd_req->current_nr_sectors -= s;
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
 	       (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector);
-	end_request(fd_req, 0);
+	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
 }
@@ -646,7 +646,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk(KERN_ERR "swim3: seen sector but cyl=ff?\n");
 				fs->cur_cyl = -1;
 				if (fs->retries > 5) {
-					end_request(fd_req, 0);
+					__blk_end_request_cur(fd_req, -EIO);
 					fs->state = idle;
 					start_request(fs);
 				} else {
@@ -731,7 +731,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk("swim3: error %sing block %ld (err=%x)\n",
 				       rq_data_dir(fd_req) == WRITE? "writ": "read",
 				       (long)fd_req->sector, err);
-				end_request(fd_req, 0);
+				__blk_end_request_cur(fd_req, -EIO);
 				fs->state = idle;
 			}
 		} else {
@@ -740,7 +740,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 				printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid);
 				printk(KERN_ERR "  state=%d, dir=%x, intr=%x, err=%x\n",
 				       fs->state, rq_data_dir(fd_req), intr, err);
-				end_request(fd_req, 0);
+				__blk_end_request_cur(fd_req, -EIO);
 				fs->state = idle;
 				start_request(fs);
 				break;
@@ -749,7 +749,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 			fd_req->current_nr_sectors -= fs->scount;
 			fd_req->buffer += fs->scount * 512;
 			if (fd_req->current_nr_sectors <= 0) {
-				end_request(fd_req, 1);
+				__blk_end_request_cur(fd_req, 0);
 				fs->state = idle;
 			} else {
 				fs->req_sector += fs->scount;
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 64b496fce98..6f6ad82ec0c 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -314,21 +314,22 @@ static void do_xd_request (struct request_queue * q)
 		int retry;
 
 		if (!blk_fs_request(req)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (block + count > get_capacity(req->rq_disk)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		if (rw != READ && rw != WRITE) {
 			printk("do_xd_request: unknown request\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
 			res = xd_readwrite(rw, disk, req->buffer, block, count);
-		end_request(req, res);	/* wrap up, 0 = fail, 1 = success */
+		/* wrap up, 0 = success, -errno = fail */
+		__blk_end_request_cur(req, res);
 	}
 }
 
@@ -418,7 +419,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_
 				printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write"));
 				xd_recalibrate(drive);
 				spin_lock_irq(&xd_lock);
-				return (0);
+				return -EIO;
 			case 2:
 				if (sense[0] & 0x30) {
 					printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing"));
@@ -439,7 +440,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_
 				else
 					printk(" - no valid disk address\n");
 				spin_lock_irq(&xd_lock);
-				return (0);
+				return -EIO;
 		}
 		if (xd_dma_buffer)
 			for (i=0; i < (temp * 0x200); i++)
@@ -448,7 +449,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_
 		count -= temp, buffer += temp * 0x200, block += temp;
 	}
 	spin_lock_irq(&xd_lock);
-	return (1);
+	return 0;
 }
 
 /* xd_recalibrate: recalibrate a given drive and reset controller if necessary */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index cd6cfe3b51e..b4564479f64 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -302,7 +302,7 @@ static void do_blkif_request(struct request_queue *rq)
 	while ((req = elv_next_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
 		if (!blk_fs_request(req)) {
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 4aecf5dc6a9..b1e1d7e5ab1 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -466,7 +466,7 @@ struct request *ace_get_next_request(struct request_queue * q)
 	while ((req = elv_next_request(q)) != NULL) {
 		if (blk_fs_request(req))
 			break;
-		end_request(req, 0);
+		__blk_end_request_cur(req, -EIO);
 	}
 	return req;
 }
@@ -494,7 +494,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Drop all pending requests */
 		while ((req = elv_next_request(ace->queue)) != NULL)
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 80754cdd311..b66ad58a3c3 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -77,7 +77,7 @@ static void do_z2_request(struct request_queue *q)
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
 				req->sector, req->current_nr_sectors);
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 		while (len) {
@@ -93,7 +93,7 @@ static void do_z2_request(struct request_queue *q)
 			start += size;
 			len -= size;
 		}
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	}
 }
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index fee9a9e83fc..cab2b1fb2fe 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -654,17 +654,17 @@ static void gdrom_request(struct request_queue *rq)
 	while ((req = elv_next_request(rq)) != NULL) {
 		if (!blk_fs_request(req)) {
 			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 		}
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_NOTICE "GDROM: Read only device -");
 			printk(" write request ignored\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 		}
 		if (req->nr_sectors)
 			gdrom_request_handler_dma(req);
 		else
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 	}
 }
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index a443e136dc4..221317e6a00 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -923,7 +923,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 				break;
 			}
 		} else
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 	}
 };
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index a49a9c8f2cb..76c4c8d1307 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -54,33 +54,33 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
 	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return !tr->discard(dev, block, nsect);
+		return tr->discard(dev, block, nsect);
 
 	if (!blk_fs_request(req))
-		return 0;
+		return -EIO;
 
 	if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk))
-		return 0;
+		return -EIO;
 
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->readsect(dev, block, buf))
-				return 0;
-		return 1;
+				return -EIO;
+		return 0;
 
 	case WRITE:
 		if (!tr->writesect)
-			return 0;
+			return -EIO;
 
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
-				return 0;
-		return 1;
+				return -EIO;
+		return 0;
 
 	default:
 		printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
-		return 0;
+		return -EIO;
 	}
 }
 
@@ -96,7 +96,7 @@ static int mtd_blktrans_thread(void *arg)
 	while (!kthread_should_stop()) {
 		struct request *req;
 		struct mtd_blktrans_dev *dev;
-		int res = 0;
+		int res;
 
 		req = elv_next_request(rq);
 
@@ -119,7 +119,7 @@ static int mtd_blktrans_thread(void *arg)
 
 		spin_lock_irq(rq->queue_lock);
 
-		end_request(req, res);
+		__blk_end_request_cur(req, res);
 	}
 	spin_unlock_irq(rq->queue_lock);
 
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index a85ad05e854..09617884a50 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -192,25 +192,25 @@ static void jsfd_do_request(struct request_queue *q)
 		size_t len = req->current_nr_sectors << 9;
 
 		if ((offset + len) > jdp->dsize) {
-               		end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
 		if (rq_data_dir(req) != READ) {
 			printk(KERN_ERR "jsfd: write\n");
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
 		if ((jdp->dbase & 0xff000000) != 0x20000000) {
 			printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase);
-			end_request(req, 0);
+			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
 
 		jsfd_read(req->buffer, jdp->dbase + offset, len);
 
-		end_request(req, 1);
+		__blk_end_request_cur(req, 0);
 	}
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e33c8356b3d..cfeb3c2feb2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,9 +845,8 @@ extern unsigned int blk_rq_cur_bytes(struct request *rq);
  * blk_update_request() completes given number of bytes and updates
  * the request without completing it.
  *
- * blk_end_request() and friends.  __blk_end_request() and
- * end_request() must be called with the request queue spinlock
- * acquired.
+ * blk_end_request() and friends.  __blk_end_request() must be called
+ * with the request queue spinlock acquired.
  *
  * Several drivers define their own end_request and call
  * blk_end_request() for parts of the original function.
@@ -898,6 +897,19 @@ static inline void blk_end_request_all(struct request *rq, int error)
 	BUG_ON(pending);
 }
 
+/**
+ * blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.
+ */
+static inline void blk_end_request_cur(struct request *rq, int error)
+{
+	blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+}
+
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
@@ -934,29 +946,17 @@ static inline void __blk_end_request_all(struct request *rq, int error)
 }
 
 /**
- * end_request - end I/O on the current segment of the request
- * @rq:		the request being processed
- * @uptodate:	error value or %0/%1 uptodate flag
+ * __blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
  *
  * Description:
- *     Ends I/O on the current segment of a request. If that is the only
- *     remaining segment, the request is also completed and freed.
- *
- *     This is a remnant of how older block drivers handled I/O completions.
- *     Modern drivers typically end I/O on the full request in one go, unless
- *     they have a residual value to account for. For that case this function
- *     isn't really useful, unless the residual just happens to be the
- *     full current segment. In other words, don't use this function in new
- *     code. Use blk_end_request() or __blk_end_request() to end a request.
- **/
-static inline void end_request(struct request *rq, int uptodate)
+ *     Complete the current consecutively mapped chunk from @rq.  Must
+ *     be called with queue lock held.
+ */
+static inline void __blk_end_request_cur(struct request *rq, int error)
 {
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0);
+	__blk_end_request(rq, error, rq->hard_cur_sectors << 9);
 }
 
 extern void blk_complete_request(struct request *);
-- 
cgit v1.2.3-70-g09d2


From 731ec497e5888c6792ad62613ae9be97eebcd7ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Apr 2009 11:05:20 +0900
Subject: block: kill rq->data

Now that all block request data transfer is done via bio, rq->data
isn't used.  Kill it.

While at it, make the roles of rq->special and buffer clear.

[ Impact: drop now unncessary field from struct request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Boaz Harrosh <bharrosh@panasas.com>
---
 block/blk-core.c        | 5 ++---
 block/blk-map.c         | 6 +++---
 block/scsi_ioctl.c      | 1 -
 drivers/scsi/scsi_lib.c | 1 -
 include/linux/blkdev.h  | 5 ++---
 5 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0520cc70458..6dd180cf15d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -189,10 +189,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 						(unsigned long long)rq->sector,
 						rq->nr_sectors,
 						rq->current_nr_sectors);
-	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, data %p, len %u\n",
+	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 						rq->bio, rq->biotail,
-						rq->buffer, rq->data,
-						rq->data_len);
+						rq->buffer, rq->data_len);
 
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
diff --git a/block/blk-map.c b/block/blk-map.c
index f103729b462..694fefad34e 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -156,7 +156,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 unmap_rq:
 	blk_rq_unmap_user(bio);
@@ -235,7 +235,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 	blk_queue_bounce(q, &bio);
 	bio_get(bio);
 	blk_rq_bio_prep(q, rq, bio);
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
@@ -313,7 +313,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 
 	blk_rq_bio_prep(q, rq, bio);
 	blk_queue_bounce(q, &rq->bio);
-	rq->buffer = rq->data = NULL;
+	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 82a0ca2f672..bb9aa43551b 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -500,7 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->data = NULL;
 	rq->data_len = 0;
 	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 756ac7c93de..aa9fc572e45 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1088,7 +1088,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 			return ret;
 	} else {
 		BUG_ON(req->data_len);
-		BUG_ON(req->data);
 
 		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 		req->buffer = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cfeb3c2feb2..12c545e2737 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -211,8 +211,8 @@ struct request {
 
 	unsigned short ioprio;
 
-	void *special;
-	char *buffer;
+	void *special;		/* opaque pointer available for LLD use */
+	char *buffer;		/* kaddr of the current segment if available */
 
 	int tag;
 	int errors;
@@ -229,7 +229,6 @@ struct request {
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
-	void *data;
 	void *sense;
 
 	unsigned long deadline;
-- 
cgit v1.2.3-70-g09d2


From 9fd8d0e1bcb848257968d9a7d73ca4d890ea8bd1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:04 +0900
Subject: block: make blk_end_request_cur() return bool

In the process of mindlessly copying [__]blk_end_request_all(),
[__]blk_end_request_cur() ended up returning void even though they're
partial completion functions.  Fix it.

[ Impact: fix braindead API ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 12c545e2737..3a5b1bd6582 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -903,10 +903,14 @@ static inline void blk_end_request_all(struct request *rq, int error)
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  */
-static inline void blk_end_request_cur(struct request *rq, int error)
+static inline bool blk_end_request_cur(struct request *rq, int error)
 {
-	blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return blk_end_request(rq, error, rq->hard_cur_sectors << 9);
 }
 
 /**
@@ -952,10 +956,14 @@ static inline void __blk_end_request_all(struct request *rq, int error)
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
  *     be called with queue lock held.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
  */
-static inline void __blk_end_request_cur(struct request *rq, int error)
+static inline bool __blk_end_request_cur(struct request *rq, int error)
 {
-	__blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return __blk_end_request(rq, error, rq->hard_cur_sectors << 9);
 }
 
 extern void blk_complete_request(struct request *);
-- 
cgit v1.2.3-70-g09d2


From eec9462088a26c046d4db3100796a340a50890b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Apr 2009 13:06:14 +0900
Subject: mg_disk: fold mg_disk.h into mg_disk.c

include/linux/mg_disk.h is used only by drivers/block/mg_disk.c.  No
reason to put it in a separate header.  Fold it into mg_disk.c.

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/mg_disk.c | 186 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mg_disk.h | 206 ------------------------------------------------
 2 files changed, 185 insertions(+), 207 deletions(-)
 delete mode 100644 include/linux/mg_disk.h

(limited to 'include')

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 2c00ad90cd6..2c6127ee834 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -22,10 +22,194 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
-#include <linux/mg_disk.h>
 
 #define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
 
+/* name for block device */
+#define MG_DISK_NAME "mgd"
+/* name for platform device */
+#define MG_DEV_NAME "mg_disk"
+
+#define MG_DISK_MAJ 0
+#define MG_DISK_MAX_PART 16
+#define MG_SECTOR_SIZE 512
+#define MG_MAX_SECTS 256
+
+/* Register offsets */
+#define MG_BUFF_OFFSET			0x8000
+#define MG_STORAGE_BUFFER_SIZE		0x200
+#define MG_REG_OFFSET			0xC000
+#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
+#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
+#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
+#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
+#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
+#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
+#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
+#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
+#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
+#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
+#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
+
+/* "Drive Select/Head Register" bit values */
+#define MG_REG_HEAD_MUST_BE_ON		0xA0 /* These 2 bits are always on */
+#define MG_REG_HEAD_DRIVE_MASTER	(0x00 | MG_REG_HEAD_MUST_BE_ON)
+#define MG_REG_HEAD_DRIVE_SLAVE		(0x10 | MG_REG_HEAD_MUST_BE_ON)
+#define MG_REG_HEAD_LBA_MODE		(0x40 | MG_REG_HEAD_MUST_BE_ON)
+
+
+/* "Device Control Register" bit values */
+#define MG_REG_CTRL_INTR_ENABLE			0x0
+#define MG_REG_CTRL_INTR_DISABLE		(0x1<<1)
+#define MG_REG_CTRL_RESET			(0x1<<2)
+#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH	0x0
+#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW	(0x1<<4)
+#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW		0x0
+#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH	(0x1<<5)
+#define MG_REG_CTRL_DPD_DISABLE			0x0
+#define MG_REG_CTRL_DPD_ENABLE			(0x1<<6)
+
+/* Status register bit */
+/* error bit in status register */
+#define MG_REG_STATUS_BIT_ERROR			0x01
+/* corrected error in status register */
+#define MG_REG_STATUS_BIT_CORRECTED_ERROR	0x04
+/* data request bit in status register */
+#define MG_REG_STATUS_BIT_DATA_REQ		0x08
+/* DSC - Drive Seek Complete */
+#define MG_REG_STATUS_BIT_SEEK_DONE		0x10
+/* DWF - Drive Write Fault */
+#define MG_REG_STATUS_BIT_WRITE_FAULT		0x20
+#define MG_REG_STATUS_BIT_READY			0x40
+#define MG_REG_STATUS_BIT_BUSY			0x80
+
+/* handy status */
+#define MG_STAT_READY	(MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE)
+#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | \
+				(MG_REG_STATUS_BIT_BUSY | \
+				 MG_REG_STATUS_BIT_WRITE_FAULT | \
+				 MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY)
+
+/* Error register */
+#define MG_REG_ERR_AMNF		0x01
+#define MG_REG_ERR_ABRT		0x04
+#define MG_REG_ERR_IDNF		0x10
+#define MG_REG_ERR_UNC		0x40
+#define MG_REG_ERR_BBK		0x80
+
+/* error code for others */
+#define MG_ERR_NONE		0
+#define MG_ERR_TIMEOUT		0x100
+#define MG_ERR_INIT_STAT	0x101
+#define MG_ERR_TRANSLATION	0x102
+#define MG_ERR_CTRL_RST		0x103
+#define MG_ERR_INV_STAT		0x104
+#define MG_ERR_RSTOUT		0x105
+
+#define MG_MAX_ERRORS	6	/* Max read/write errors */
+
+/* command */
+#define MG_CMD_RD 0x20
+#define MG_CMD_WR 0x30
+#define MG_CMD_SLEEP 0x99
+#define MG_CMD_WAKEUP 0xC3
+#define MG_CMD_ID 0xEC
+#define MG_CMD_WR_CONF 0x3C
+#define MG_CMD_RD_CONF 0x40
+
+/* operation mode */
+#define MG_OP_CASCADE (1 << 0)
+#define MG_OP_CASCADE_SYNC_RD (1 << 1)
+#define MG_OP_CASCADE_SYNC_WR (1 << 2)
+#define MG_OP_INTERLEAVE (1 << 3)
+
+/* synchronous */
+#define MG_BURST_LAT_4 (3 << 4)
+#define MG_BURST_LAT_5 (4 << 4)
+#define MG_BURST_LAT_6 (5 << 4)
+#define MG_BURST_LAT_7 (6 << 4)
+#define MG_BURST_LAT_8 (7 << 4)
+#define MG_BURST_LEN_4 (1 << 1)
+#define MG_BURST_LEN_8 (2 << 1)
+#define MG_BURST_LEN_16 (3 << 1)
+#define MG_BURST_LEN_32 (4 << 1)
+#define MG_BURST_LEN_CONT (0 << 1)
+
+/* timeout value (unit: ms) */
+#define MG_TMAX_CONF_TO_CMD	1
+#define MG_TMAX_WAIT_RD_DRQ	10
+#define MG_TMAX_WAIT_WR_DRQ	500
+#define MG_TMAX_RST_TO_BUSY	10
+#define MG_TMAX_HDRST_TO_RDY	500
+#define MG_TMAX_SWRST_TO_RDY	500
+#define MG_TMAX_RSTOUT		3000
+
+/* device attribution */
+/* use mflash as boot device */
+#define MG_BOOT_DEV		(1 << 0)
+/* use mflash as storage device */
+#define MG_STORAGE_DEV		(1 << 1)
+/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
+#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
+
+#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
+
+/* names of GPIO resource */
+#define MG_RST_PIN	"mg_rst"
+/* except MG_BOOT_DEV, reset-out pin should be assigned */
+#define MG_RSTOUT_PIN	"mg_rstout"
+
+/* private driver data */
+struct mg_drv_data {
+	/* disk resource */
+	u32 use_polling;
+
+	/* device attribution */
+	u32 dev_attr;
+
+	/* internally used */
+	struct mg_host *host;
+};
+
+/* main structure for mflash driver */
+struct mg_host {
+	struct device *dev;
+
+	struct request_queue *breq;
+	spinlock_t lock;
+	struct gendisk *gd;
+
+	struct timer_list timer;
+	void (*mg_do_intr) (struct mg_host *);
+
+	u16 id[ATA_ID_WORDS];
+
+	u16 cyls;
+	u16 heads;
+	u16 sectors;
+	u32 n_sectors;
+	u32 nres_sectors;
+
+	void __iomem *dev_base;
+	unsigned int irq;
+	unsigned int rst;
+	unsigned int rstout;
+
+	u32 major;
+	u32 error;
+};
+
+/*
+ * Debugging macro and defines
+ */
+#undef DO_MG_DEBUG
+#ifdef DO_MG_DEBUG
+#  define MG_DBG(fmt, args...) \
+	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
+#else /* CONFIG_MG_DEBUG */
+#  define MG_DBG(fmt, args...) do { } while (0)
+#endif /* CONFIG_MG_DEBUG */
+
 static void mg_request(struct request_queue *);
 
 static void mg_dump_status(const char *msg, unsigned int stat,
diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h
deleted file mode 100644
index 1f76b1ebf62..00000000000
--- a/include/linux/mg_disk.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- *  include/linux/mg_disk.c
- *
- *  Support for the mGine m[g]flash IO mode.
- *  Based on legacy hd.c
- *
- * (c) 2008 mGine Co.,LTD
- * (c) 2008 unsik Kim <donari75@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#ifndef __MG_DISK_H__
-#define __MG_DISK_H__
-
-#include <linux/blkdev.h>
-#include <linux/ata.h>
-
-/* name for block device */
-#define MG_DISK_NAME "mgd"
-/* name for platform device */
-#define MG_DEV_NAME "mg_disk"
-
-#define MG_DISK_MAJ 0
-#define MG_DISK_MAX_PART 16
-#define MG_SECTOR_SIZE 512
-#define MG_MAX_SECTS 256
-
-/* Register offsets */
-#define MG_BUFF_OFFSET			0x8000
-#define MG_STORAGE_BUFFER_SIZE		0x200
-#define MG_REG_OFFSET			0xC000
-#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
-#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
-#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
-#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
-#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
-#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
-#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
-#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
-#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
-#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
-#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
-
-/* "Drive Select/Head Register" bit values */
-#define MG_REG_HEAD_MUST_BE_ON		0xA0 /* These 2 bits are always on */
-#define MG_REG_HEAD_DRIVE_MASTER	(0x00 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_DRIVE_SLAVE		(0x10 | MG_REG_HEAD_MUST_BE_ON)
-#define MG_REG_HEAD_LBA_MODE		(0x40 | MG_REG_HEAD_MUST_BE_ON)
-
-
-/* "Device Control Register" bit values */
-#define MG_REG_CTRL_INTR_ENABLE			0x0
-#define MG_REG_CTRL_INTR_DISABLE		(0x1<<1)
-#define MG_REG_CTRL_RESET			(0x1<<2)
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH	0x0
-#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW	(0x1<<4)
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW		0x0
-#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH	(0x1<<5)
-#define MG_REG_CTRL_DPD_DISABLE			0x0
-#define MG_REG_CTRL_DPD_ENABLE			(0x1<<6)
-
-/* Status register bit */
-/* error bit in status register */
-#define MG_REG_STATUS_BIT_ERROR			0x01
-/* corrected error in status register */
-#define MG_REG_STATUS_BIT_CORRECTED_ERROR	0x04
-/* data request bit in status register */
-#define MG_REG_STATUS_BIT_DATA_REQ		0x08
-/* DSC - Drive Seek Complete */
-#define MG_REG_STATUS_BIT_SEEK_DONE		0x10
-/* DWF - Drive Write Fault */
-#define MG_REG_STATUS_BIT_WRITE_FAULT		0x20
-#define MG_REG_STATUS_BIT_READY			0x40
-#define MG_REG_STATUS_BIT_BUSY			0x80
-
-/* handy status */
-#define MG_STAT_READY	(MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE)
-#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | \
-				(MG_REG_STATUS_BIT_BUSY | \
-				 MG_REG_STATUS_BIT_WRITE_FAULT | \
-				 MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY)
-
-/* Error register */
-#define MG_REG_ERR_AMNF		0x01
-#define MG_REG_ERR_ABRT		0x04
-#define MG_REG_ERR_IDNF		0x10
-#define MG_REG_ERR_UNC		0x40
-#define MG_REG_ERR_BBK		0x80
-
-/* error code for others */
-#define MG_ERR_NONE		0
-#define MG_ERR_TIMEOUT		0x100
-#define MG_ERR_INIT_STAT	0x101
-#define MG_ERR_TRANSLATION	0x102
-#define MG_ERR_CTRL_RST		0x103
-#define MG_ERR_INV_STAT		0x104
-#define MG_ERR_RSTOUT		0x105
-
-#define MG_MAX_ERRORS	6	/* Max read/write errors */
-
-/* command */
-#define MG_CMD_RD 0x20
-#define MG_CMD_WR 0x30
-#define MG_CMD_SLEEP 0x99
-#define MG_CMD_WAKEUP 0xC3
-#define MG_CMD_ID 0xEC
-#define MG_CMD_WR_CONF 0x3C
-#define MG_CMD_RD_CONF 0x40
-
-/* operation mode */
-#define MG_OP_CASCADE (1 << 0)
-#define MG_OP_CASCADE_SYNC_RD (1 << 1)
-#define MG_OP_CASCADE_SYNC_WR (1 << 2)
-#define MG_OP_INTERLEAVE (1 << 3)
-
-/* synchronous */
-#define MG_BURST_LAT_4 (3 << 4)
-#define MG_BURST_LAT_5 (4 << 4)
-#define MG_BURST_LAT_6 (5 << 4)
-#define MG_BURST_LAT_7 (6 << 4)
-#define MG_BURST_LAT_8 (7 << 4)
-#define MG_BURST_LEN_4 (1 << 1)
-#define MG_BURST_LEN_8 (2 << 1)
-#define MG_BURST_LEN_16 (3 << 1)
-#define MG_BURST_LEN_32 (4 << 1)
-#define MG_BURST_LEN_CONT (0 << 1)
-
-/* timeout value (unit: ms) */
-#define MG_TMAX_CONF_TO_CMD	1
-#define MG_TMAX_WAIT_RD_DRQ	10
-#define MG_TMAX_WAIT_WR_DRQ	500
-#define MG_TMAX_RST_TO_BUSY	10
-#define MG_TMAX_HDRST_TO_RDY	500
-#define MG_TMAX_SWRST_TO_RDY	500
-#define MG_TMAX_RSTOUT		3000
-
-/* device attribution */
-/* use mflash as boot device */
-#define MG_BOOT_DEV		(1 << 0)
-/* use mflash as storage device */
-#define MG_STORAGE_DEV		(1 << 1)
-/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
-#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
-
-#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
-
-/* names of GPIO resource */
-#define MG_RST_PIN	"mg_rst"
-/* except MG_BOOT_DEV, reset-out pin should be assigned */
-#define MG_RSTOUT_PIN	"mg_rstout"
-
-/* private driver data */
-struct mg_drv_data {
-	/* disk resource */
-	u32 use_polling;
-
-	/* device attribution */
-	u32 dev_attr;
-
-	/* internally used */
-	struct mg_host *host;
-};
-
-/* main structure for mflash driver */
-struct mg_host {
-	struct device *dev;
-
-	struct request_queue *breq;
-	spinlock_t lock;
-	struct gendisk *gd;
-
-	struct timer_list timer;
-	void (*mg_do_intr) (struct mg_host *);
-
-	u16 id[ATA_ID_WORDS];
-
-	u16 cyls;
-	u16 heads;
-	u16 sectors;
-	u32 n_sectors;
-	u32 nres_sectors;
-
-	void __iomem *dev_base;
-	unsigned int irq;
-	unsigned int rst;
-	unsigned int rstout;
-
-	u32 major;
-	u32 error;
-};
-
-/*
- * Debugging macro and defines
- */
-#undef DO_MG_DEBUG
-#ifdef DO_MG_DEBUG
-#  define MG_DBG(fmt, args...) \
-	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
-#else /* CONFIG_MG_DEBUG */
-#  define MG_DBG(fmt, args...) do { } while (0)
-#endif /* CONFIG_MG_DEBUG */
-
-#endif
-- 
cgit v1.2.3-70-g09d2


From 833cc67c7722e35863c6aaee9df56b442ef957ae Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Mon, 27 Apr 2009 21:32:16 +0000
Subject: smsc911x: add fifo byteswap support V2

This is V2 of the smsc911x fifo byteswap patch.

The smsc911x hardware supports both big and little and endian
hardware configurations, and the linux smsc911x driver currently
detects word order.

For correct operation on big endian platforms lacking swapped
byte lanes the following patch is needed. Only fifo data is
swapped, register data does not require any swapping.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Steve Glendinning <steve.glendinning@smsc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/smsc911x.c   | 13 +++++++++++++
 include/linux/smsc911x.h | 10 ++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index eb7db032a78..5113b26fc2d 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -47,6 +47,7 @@
 #include <linux/bitops.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/swab.h>
 #include <linux/phy.h>
 #include <linux/smsc911x.h>
 #include "smsc911x.h"
@@ -175,6 +176,12 @@ static inline void
 smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
 		      unsigned int wordcount)
 {
+	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+		while (wordcount--)
+			smsc911x_reg_write(pdata, TX_DATA_FIFO, swab32(*buf++));
+		return;
+	}
+
 	if (pdata->config.flags & SMSC911X_USE_32BIT) {
 		writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount);
 		return;
@@ -194,6 +201,12 @@ static inline void
 smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
 		     unsigned int wordcount)
 {
+	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+		while (wordcount--)
+			*buf++ = swab32(smsc911x_reg_read(pdata, RX_DATA_FIFO));
+		return;
+	}
+
 	if (pdata->config.flags & SMSC911X_USE_32BIT) {
 		readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount);
 		return;
diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h
index b32725075d7..5241e4fb4ec 100644
--- a/include/linux/smsc911x.h
+++ b/include/linux/smsc911x.h
@@ -47,4 +47,14 @@ struct smsc911x_platform_config {
 #define SMSC911X_FORCE_EXTERNAL_PHY 		(BIT(3))
 #define SMSC911X_SAVE_MAC_ADDRESS		(BIT(4))
 
+/*
+ * SMSC911X_SWAP_FIFO:
+ * Enables software byte swap for fifo data. Should only be used as a
+ * "last resort" in the case of big endian mode on boards with incorrectly
+ * routed data bus to older devices such as LAN9118. Newer devices such as
+ * LAN9221 can handle this in hardware, there are registers to control
+ * this swapping but the driver doesn't currently use them.
+ */
+#define SMSC911X_SWAP_FIFO			(BIT(5))
+
 #endif /* __LINUX_SMSC911X_H__ */
-- 
cgit v1.2.3-70-g09d2


From 8dc92f7e2ecfd93f5c57da78594a7a5482e2c15e Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Mon, 27 Apr 2009 22:35:52 +0000
Subject: sctp: add feature bit for SCTP offload in hardware

this is the sctp code to enable hardware crc32c offload for
adapters that support it.

Originally by: Vlad Yasevich <vladislav.yasevich@hp.com>

modified by Jesse Brandeburg <jesse.brandeburg@intel.com>

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/sctp/output.c         | 17 ++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fe20d171acf..f8c3619d551 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -670,7 +670,9 @@ struct net_device
 #define NETIF_F_GRO		16384	/* Generic receive offload */
 #define NETIF_F_LRO		32768	/* large receive offload */
 
+/* the GSO_MASK reserves bits 16 through 23 */
 #define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
+#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 7d08f522ec8..f0c91df59d4 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -412,6 +412,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 
 	/* Build the SCTP header.  */
 	sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
+	skb_reset_transport_header(nskb);
 	sh->source = htons(packet->source_port);
 	sh->dest   = htons(packet->destination_port);
 
@@ -527,15 +528,25 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * Note: Adler-32 is no longer applicable, as has been replaced
 	 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
 	 */
-	if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) {
+	if (!sctp_checksum_disable &&
+	    !(dst->dev->features & (NETIF_F_NO_CSUM | NETIF_F_SCTP_CSUM))) {
 		__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
 
 		/* 3) Put the resultant value into the checksum field in the
 		 *    common header, and leave the rest of the bits unchanged.
 		 */
 		sh->checksum = sctp_end_cksum(crc32);
-	} else
-		nskb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else {
+		if (dst->dev->features & NETIF_F_SCTP_CSUM) {
+			/* no need to seed psuedo checksum for SCTP */
+			nskb->ip_summed = CHECKSUM_PARTIAL;
+			nskb->csum_start = (skb_transport_header(nskb) -
+			                    nskb->head);
+			nskb->csum_offset = offsetof(struct sctphdr, checksum);
+		} else {
+			nskb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+	}
 
 	/* IP layer ECN support
 	 * From RFC 2481
-- 
cgit v1.2.3-70-g09d2


From 9ec4fa271faf2db3b8e1419c998da1ca6b094eb6 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:57:18 -0700
Subject: irq, cpumask: correct CPUMASKS_OFFSTACK typo and fix fallout

CPUMASKS_OFFSTACK is not defined anywhere (it is CPUMASK_OFFSTACK).
It is a typo and init_allocate_desc_masks() is called before it set
affinity to all cpus...

Split init_alloc_desc_masks() into all_desc_masks() and init_desc_masks().

Also use CPUMASK_OFFSTACK in alloc_desc_masks().

[ Impact: fix smp_affinity copying/setup when moving irq_desc between CPUs ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
LKML-Reference: <49F6546E.3040406@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h       | 27 ++++++++++++++++++---------
 kernel/irq/handle.c       |  9 ++++++---
 kernel/irq/numa_migrate.c |  2 +-
 3 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b7cbeed972e..c4953cf27e5 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -424,27 +424,25 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
 #ifdef CONFIG_SMP
 /**
- * init_alloc_desc_masks - allocate cpumasks for irq_desc
+ * alloc_desc_masks - allocate cpumasks for irq_desc
  * @desc:	pointer to irq_desc struct
  * @cpu:	cpu which will be handling the cpumasks
  * @boot:	true if need bootmem
  *
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
- * Side effect: affinity has all bits set, pending_mask has all bits clear.
  */
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
 								bool boot)
 {
+#ifdef CONFIG_CPUMASK_OFFSTACK
 	int node;
 
 	if (boot) {
 		alloc_bootmem_cpumask_var(&desc->affinity);
-		cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 		alloc_bootmem_cpumask_var(&desc->pending_mask);
-		cpumask_clear(desc->pending_mask);
 #endif
 		return true;
 	}
@@ -453,18 +451,25 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 
 	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
 		return false;
-	cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
 		free_cpumask_var(desc->affinity);
 		return false;
 	}
-	cpumask_clear(desc->pending_mask);
+#endif
 #endif
 	return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+	cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
 /**
  * init_copy_desc_masks - copy cpumasks for irq_desc
  * @old_desc:	pointer to old irq_desc struct
@@ -478,7 +483,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
-#ifdef CONFIG_CPUMASKS_OFFSTACK
+#ifdef CONFIG_CPUMASK_OFFSTACK
 	cpumask_copy(new_desc->affinity, old_desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -499,12 +504,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc,
 
 #else /* !CONFIG_SMP */
 
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
 								bool boot)
 {
 	return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+}
+
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
 					struct irq_desc *new_desc)
 {
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d82142be8dd..882c7980010 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -115,10 +115,11 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
 		printk(KERN_ERR "can not alloc kstat_irqs\n");
 		BUG_ON(1);
 	}
-	if (!init_alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, cpu, false)) {
 		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
 		BUG_ON(1);
 	}
+	init_desc_masks(desc);
 	arch_init_chip_data(desc, cpu);
 }
 
@@ -169,7 +170,8 @@ int __init early_irq_init(void)
 		desc[i].irq = i;
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-		init_alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], 0, true);
+		init_desc_masks(&desc[i]);
 		irq_desc_ptrs[i] = desc + i;
 	}
 
@@ -256,7 +258,8 @@ int __init early_irq_init(void)
 
 	for (i = 0; i < count; i++) {
 		desc[i].irq = i;
-		init_alloc_desc_masks(&desc[i], 0, true);
+		alloc_desc_masks(&desc[i], 0, true);
+		init_desc_masks(&desc[i]);
 		desc[i].kstat_irqs = kstat_irqs_all[i];
 	}
 	return arch_early_irq_init();
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 44bbdcbaf8d..5760d725162 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -37,7 +37,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
 		 struct irq_desc *desc, int cpu)
 {
 	memcpy(desc, old_desc, sizeof(struct irq_desc));
-	if (!init_alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, cpu, false)) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
 				"for migration.\n", irq);
 		return false;
-- 
cgit v1.2.3-70-g09d2


From fcef5911c7ea89b80d5bfc727f402f37c9eefd57 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:58:23 -0700
Subject: x86/irq: remove leftover code from NUMA_MIGRATE_IRQ_DESC

The original feature of migrating irq_desc dynamic was too fragile
and was causing problems: it caused crashes on systems with lots of
cards with MSI-X when user-space irq-balancer was enabled.

We now have new patches that create irq_desc according to device
numa node. This patch removes the leftover bits of the dynamic balancer.

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F654AF.8000808@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                  | 10 -------
 arch/x86/configs/x86_64_defconfig |  1 -
 arch/x86/kernel/apic/io_apic.c    | 56 +++------------------------------------
 include/linux/irq.h               | 10 -------
 kernel/irq/Makefile               |  2 +-
 kernel/irq/chip.c                 | 12 ++-------
 kernel/irq/handle.c               |  9 ++-----
 kernel/irq/numa_migrate.c         |  2 --
 8 files changed, 9 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index df9e885eee1..e1b2543f8ed 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,16 +274,6 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say N.
 
-config NUMA_MIGRATE_IRQ_DESC
-	bool "Move irq desc when changing irq smp_affinity"
-	depends on SPARSE_IRQ && NUMA
-	depends on BROKEN
-	default n
-	---help---
-	  This enables moving irq_desc to cpu/node that irq will use handled.
-
-	  If you don't know what to do here, say N.
-
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
 	default y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9fe5d212ab4..27b8ce0f590 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -195,7 +195,6 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
 CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
 # CONFIG_X86_ELAN is not set
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 30da617d18e..9fbf0f7ec7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -148,9 +148,6 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -254,8 +251,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
 	return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
 init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
 {
@@ -356,19 +352,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 		old_desc->chip_data = NULL;
 	}
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg = desc->chip_data;
-
-	if (!cfg->move_in_progress) {
-		/* it means that domain is not changed */
-		if (!cpumask_intersects(desc->affinity, mask))
-			cfg->move_desc_pending = 1;
-	}
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +362,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -592,9 +569,6 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 	if (assign_irq_vector(irq, cfg, mask))
 		return BAD_APICID;
 
-	/* check that before desc->addinity get updated */
-	set_extra_move_desc(desc, mask);
-
 	cpumask_copy(desc->affinity, mask);
 
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
@@ -2393,8 +2367,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	set_extra_move_desc(desc, mask);
-
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
 	irte.vector = cfg->vector;
@@ -2491,34 +2463,14 @@ static void irq_complete_move(struct irq_desc **descp)
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		if (likely(!cfg->move_desc_pending))
-			return;
-
-		/* domain has not changed, but affinity did */
-		me = smp_processor_id();
-		if (cpumask_test_cpu(me, desc->affinity)) {
-			*descp = desc = move_irq_desc(desc, me);
-			/* get the new one */
-			cfg = desc->chip_data;
-			cfg->move_desc_pending = 0;
-		}
-#endif
+	if (likely(!cfg->move_in_progress))
 		return;
-	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		*descp = desc = move_irq_desc(desc, me);
-		/* get the new one */
-		cfg = desc->chip_data;
-#endif
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
-	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c4953cf27e5..2a34cd6281d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -212,16 +212,6 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
 extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
-static inline struct irq_desc *
-irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
-{
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	return irq_to_desc(irq);
-#else
-	return desc;
-#endif
-}
-
 /*
  * Migration helpers for obsolete names, they will go away:
  */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 3394f8f5296..2f065277f8e 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c687ba4363f..13c68e71b72 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
 	spin_lock(&desc->lock);
 	mask_ack_irq(desc, irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	desc->status &= ~IRQ_INPROGRESS;
 out:
 	desc->chip->eoi(irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	spin_unlock(&desc->lock);
 }
@@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		    !desc->action)) {
 		desc->status |= (IRQ_PENDING | IRQ_MASKED);
 		mask_ack_irq(desc, irq);
-		desc = irq_remap_to_desc(irq, desc);
 		goto out_unlock;
 	}
 	kstat_incr_irqs_this_cpu(irq, desc);
@@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 	/* Start handling the irq */
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
-	desc = irq_remap_to_desc(irq, desc);
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	if (desc->chip->eoi) {
+	if (desc->chip->eoi)
 		desc->chip->eoi(irq);
-		desc = irq_remap_to_desc(irq, desc);
-	}
 }
 
 void
@@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
-		if (desc->chip != &no_irq_chip) {
+		if (desc->chip != &no_irq_chip)
 			mask_ack_irq(desc, irq);
-			desc = irq_remap_to_desc(irq, desc);
-		}
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 882c7980010..3e0cbc44bd7 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -458,11 +458,8 @@ unsigned int __do_IRQ(unsigned int irq)
 		/*
 		 * No locking required for CPU-local interrupts:
 		 */
-		if (desc->chip->ack) {
+		if (desc->chip->ack)
 			desc->chip->ack(irq);
-			/* get new one */
-			desc = irq_remap_to_desc(irq, desc);
-		}
 		if (likely(!(desc->status & IRQ_DISABLED))) {
 			action_ret = handle_IRQ_event(irq, desc->action);
 			if (!noirqdebug)
@@ -473,10 +470,8 @@ unsigned int __do_IRQ(unsigned int irq)
 	}
 
 	spin_lock(&desc->lock);
-	if (desc->chip->ack) {
+	if (desc->chip->ack)
 		desc->chip->ack(irq);
-		desc = irq_remap_to_desc(irq, desc);
-	}
 	/*
 	 * REPLAY is when Linux resends an IRQ that was dropped earlier
 	 * WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 5760d725162..ce72bc3f4ce 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -97,9 +97,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 
 	/* free the old one */
 	free_one_irq_desc(old_desc, desc);
-	spin_unlock(&old_desc->lock);
 	kfree(old_desc);
-	spin_lock(&desc->lock);
 
 	return desc;
 
-- 
cgit v1.2.3-70-g09d2


From d5dedd4507d307eb3f35f21b6e16f336fdc0d82a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 17:59:21 -0700
Subject: irq: change ->set_affinity() to return status

according to Ingo, change set_affinity() in irq_chip should return int,
because that way we can handle failure cases in a much cleaner way, in
the genirq layer.

v2: fix two typos

[ Impact: extend API ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-arch@vger.kernel.org
LKML-Reference: <49F654E9.4070809@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/kernel/sys_dp264.c         |  8 +++--
 arch/alpha/kernel/sys_titan.c         |  4 ++-
 arch/arm/common/gic.c                 |  4 ++-
 arch/cris/arch-v32/kernel/irq.c       |  4 ++-
 arch/ia64/hp/sim/hpsim_irq.c          |  3 +-
 arch/ia64/kernel/iosapic.c            | 10 +++---
 arch/ia64/kernel/msi_ia64.c           | 16 +++++----
 arch/ia64/sn/kernel/irq.c             |  4 ++-
 arch/ia64/sn/kernel/msi_sn.c          |  8 +++--
 arch/mips/cavium-octeon/octeon-irq.c  |  8 +++--
 arch/mips/include/asm/irq.h           |  2 +-
 arch/mips/kernel/irq-gic.c            |  5 +--
 arch/mips/mti-malta/malta-smtc.c      |  4 ++-
 arch/mips/sibyte/bcm1480/irq.c        |  8 +++--
 arch/mips/sibyte/sb1250/irq.c         |  8 +++--
 arch/parisc/kernel/irq.c              |  6 ++--
 arch/powerpc/platforms/pseries/xics.c | 12 ++++---
 arch/powerpc/sysdev/mpic.c            |  4 ++-
 arch/sparc/kernel/irq_64.c            | 12 +++++--
 arch/x86/kernel/apic/io_apic.c        | 64 ++++++++++++++++++++++-------------
 drivers/parisc/iosapic.c              |  6 ++--
 drivers/xen/events.c                  | 12 ++++---
 include/linux/irq.h                   |  2 +-
 23 files changed, 140 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 9c9d1fd4155..5bd5259324b 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 	}
 }
 
-static void
+static int
 dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
 	cpu_set_irq_affinity(irq, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
+
+	return 0;
 }
 
-static void
+static int
 clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
 	cpu_set_irq_affinity(irq - 16, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
+
+	return 0;
 }
 
 static struct hw_interrupt_type dp264_irq_type = {
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 27f840a4ad3..8dd239ebdb9 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 
 }
 
-static void
+static int
 titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&titan_irq_lock);
 	titan_cpu_set_irq_affinity(irq - 16, *affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
+
+	return 0;
 }
 
 static void
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index c6884ba1d5e..90f6b7f52d4 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
+static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
@@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 	val |= 1 << (cpu + shift);
 	writel(val, reg);
 	spin_unlock(&irq_controller_lock);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index df3925cb1c7..d70b445f4a8 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq)
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
+int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
 	irq_allocations[irq - FIRST_IRQ].mask = *dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
+
+	return 0;
 }
 
 static struct irq_chip crisv32_irq_type = {
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index cc0a3182db3..acb5047ab57 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq)
 {
 }
 
-static void
+static int
 hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
+	return 0;
 }
 
 static struct hw_interrupt_type irq_type_hp_sim = {
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 166e0d839fa..f92cef47bf8 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -329,7 +329,7 @@ unmask_irq (unsigned int irq)
 }
 
 
-static void
+static int
 iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
@@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	cpu = cpumask_first_and(cpu_online_mask, mask);
 	if (cpu >= nr_cpu_ids)
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	dest = cpu_physical_id(cpu);
 
 	if (!iosapic_intr_info[irq].count)
-		return;			/* not an IOSAPIC interrupt */
+		return -1;			/* not an IOSAPIC interrupt */
 
 	set_irq_affinity_info(irq, dest, redir);
 
@@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 		iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
 		iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
 	}
+
 #endif
+	return 0;
 }
 
 /*
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 2b15e233f7f..0f8ade9331b 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -12,7 +12,7 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq,
+static int ia64_set_msi_irq_affinity(unsigned int irq,
 				      const cpumask_t *cpu_mask)
 {
 	struct msi_msg msg;
@@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
 	int cpu = first_cpu(*cpu_mask);
 
 	if (!cpu_online(cpu))
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	read_msi_msg(irq, &msg);
 
@@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
 
 	write_msi_msg(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
@@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
 	int cpu = cpumask_first(mask);
 
 	if (!cpu_online(cpu))
-		return;
+		return -1;
 
 	if (irq_prepare_move(irq, cpu))
-		return;
+		return -1;
 
 	dmar_msi_read(irq, &msg);
 
@@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dmar_msi_write(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, mask);
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 66fd705e82c..764f26abac0 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,7 +227,7 @@ finish_up:
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
@@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
 		(void)sn_retarget_vector(sn_irq_info, nasid, slice);
+
+	return 0;
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 81e428943d7..fbbfb970120 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq,
+static int sn_set_msi_irq_affinity(unsigned int irq,
 				    const struct cpumask *cpu_mask)
 {
 	struct msi_msg msg;
@@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 	cpu = cpumask_first(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
-		return;
+		return -1;
 
 	/*
 	 * Release XIO resources for the old MSI PCI address
@@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 	new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
 	sn_msi_info[irq].sn_irq_info = new_irq_info;
 	if (new_irq_info == NULL)
-		return;
+		return -1;
 
 	/*
 	 * Map the xio address into bus space
@@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 
 	write_msi_msg(irq, &msg);
 	cpumask_copy(irq_desc[irq].affinity, cpu_mask);
+
+	return 0;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 1c19af8daa6..d3a0c8154be 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu;
 	int bit = irq - OCTEON_IRQ_WORKQ0;	/* Bit 0-63 of EN0 */
@@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask
 	 */
 	cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
 	write_unlock(&octeon_irq_ciu0_rwlock);
+
+	return 0;
 }
 #endif
 
@@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu;
 	int bit = irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
@@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask
 	 */
 	cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
 	write_unlock(&octeon_irq_ciu1_rwlock);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 3214ade02d1..4f1eed107b0 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq)
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq,
+extern int plat_set_irq_affinity(unsigned int irq,
 				  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 87deb8f6c45..3f43c2e3aa5 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 
 	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
-		return;
+		return -1;
 
 	/* Assumption : cpumask refers to a single CPU */
 	spin_lock_irqsave(&gic_lock, flags);
@@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	cpumask_copy(irq_desc[irq].affinity, cpumask);
 	spin_unlock_irqrestore(&gic_lock, flags);
 
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ba31888fef..499ffe5475d 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = {
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
 	cpumask_t tmask;
 	int cpu = 0;
@@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 
 	/* Do any generic SMTC IRQ affinity setup */
 	smtc_set_irq_affinity(irq, tmask);
+
+	return 0;
 }
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index 352352b3cb2..4f256a131bf 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -119,7 +119,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	if (cpumask_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-		return;
+		return -1;
 	}
 	i = cpumask_first(mask);
 
@@ -155,6 +155,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 	}
 	spin_unlock(&bcm1480_imr_lock);
 	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index c08ff582da6..e389507f1f9 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
@@ -114,7 +114,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	if (cpumask_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-		return;
+		return -1;
 	}
 
 	/* Convert logical CPU to physical CPU */
@@ -146,6 +146,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 	}
 	spin_unlock(&sb1250_imr_lock);
 	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 4ea4229d765..8007f1e6572 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest)
 	return cpu_dest;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
 	int cpu_dest;
 
 	cpu_dest = cpu_check_affinity(irq, dest);
 	if (cpu_dest < 0)
-		return;
+		return -1;
 
 	cpumask_copy(&irq_desc[irq].affinity, dest);
+
+	return 0;
 }
 #endif
 
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 80b513449f4..be3581a8c29 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq)
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 
 	irq = (unsigned int)irq_map[virq].hwirq;
 	if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
-		return;
+		return -1;
 
 	status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
 			__func__, irq, status);
-		return;
+		return -1;
 	}
 
 	/*
@@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
-		return;
+		return -1;
 	}
 
 	status = rtas_call(ibm_set_xive, 3, 1, NULL,
@@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 	if (status) {
 		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
 			__func__, irq, status);
-		return;
+		return -1;
 	}
+
+	return 0;
 }
 
 static struct irq_chip xics_pic_direct = {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 21b95670159..f4cbd15cf22 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
 	}
+
+	return 0;
 }
 
 static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5deabe921a4..e5e78f9cfc9 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq,
+static int sun4u_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
+
+	return 0;
 }
 
 /* Don't do anything.  The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq,
+static int sun4v_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq,
 	if (err != HV_EOK)
 		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 		       "err(%d)\n", ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
+static int sun4v_virt_set_affinity(unsigned int virt_irq,
 				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq,
 		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 		       "err(%d)\n",
 		       dev_handle, dev_ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9fbf0f7ec7e..5c7630b40a5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -574,13 +574,14 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
 }
 
-static void
+static int
 set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
@@ -591,18 +592,21 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
 		__target_IO_APIC_irq(irq, dest, cfg);
+		ret = 0;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return ret;
 }
 
-static void
+static int
 set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc;
 
 	desc = irq_to_desc(irq);
 
-	set_ioapic_affinity_irq_desc(desc, mask);
+	return set_ioapic_affinity_irq_desc(desc, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -2348,24 +2352,25 @@ static int ioapic_retrigger_irq(unsigned int irq)
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return ret;
 
 	irq = desc->irq;
 	if (get_irte(irq, &irte))
-		return;
+		return ret;
 
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
+		return ret;
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2381,27 +2386,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 		send_cleanup_vector(cfg);
 
 	cpumask_copy(desc->affinity, mask);
+
+	return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					    const struct cpumask *mask)
 {
-	migrate_ioapic_irq_desc(desc, mask);
+	return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	set_ir_ioapic_affinity_irq_desc(desc, mask);
+	return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 						   const struct cpumask *mask)
 {
+	return 0;
 }
 #endif
 
@@ -3318,7 +3326,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3327,7 +3335,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3339,13 +3347,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
+
+	return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -3354,11 +3364,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	struct irte irte;
 
 	if (get_irte(irq, &irte))
-		return;
+		return -1;
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3375,6 +3385,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
+
+	return 0;
 }
 
 #endif
@@ -3528,7 +3540,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3537,7 +3549,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3549,6 +3561,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3582,7 +3596,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3591,7 +3605,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3603,6 +3617,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3659,7 +3675,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3667,11 +3683,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
 	target_ht_irq(irq, dest, cfg->vector);
+
+	return 0;
 }
 
 #endif
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 73348c4047e..4a9cc92d4d1 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq,
+static int iosapic_set_affinity_irq(unsigned int irq,
 				     const struct cpumask *dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
@@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
 
 	dest_cpu = cpu_check_affinity(irq, dest);
 	if (dest_cpu < 0)
-		return;
+		return -1;
 
 	cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
 	vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
@@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq,
 	iosapic_set_irt_data(vi, &dummy_d0, &d1);
 	iosapic_wr_irt_entry(vi, d0, d1);
 	spin_unlock_irqrestore(&iosapic_lock, flags);
+
+	return 0;
 }
 #endif
 
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af5dba..33389880279 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -688,13 +688,13 @@ void rebind_evtchn_irq(int evtchn, int irq)
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
 	struct evtchn_bind_vcpu bind_vcpu;
 	int evtchn = evtchn_from_irq(irq);
 
 	if (!VALID_EVTCHN(evtchn))
-		return;
+		return -1;
 
 	/* Send future instances of this interrupt to other vcpu. */
 	bind_vcpu.port = evtchn;
@@ -707,13 +707,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 	 */
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
 		bind_evtchn_to_cpu(evtchn, tcpu);
-}
 
+	return 0;
+}
 
-static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
 	unsigned tcpu = cpumask_first(dest);
-	rebind_irq_to_cpu(irq, tcpu);
+
+	return rebind_irq_to_cpu(irq, tcpu);
 }
 
 int resend_irq_on_evtchn(unsigned int irq)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2a34cd6281d..8e4c18b2915 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -117,7 +117,7 @@ struct irq_chip {
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq,
+	int		(*set_affinity)(unsigned int irq,
 					const struct cpumask *dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
-- 
cgit v1.2.3-70-g09d2


From 85ac16d033370caf6f48d743c8dc8103700f5cc5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:00:38 -0700
Subject: x86/irq: change irq_desc_alloc() to take node instead of cpu

This simplifies the node awareness of the code. All our allocators
only deal with a NUMA node ID locality not with CPU ids anyway - so
there's no need to maintain (and transform) a CPU id all across the
IRq layer.

v2: keep move_irq_desc related

[ Impact: cleanup, prepare IRQ code to be NUMA-aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
LKML-Reference: <49F65536.2020300@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 58 +++++++++++++++++++-----------------------
 arch/x86/lguest/boot.c         |  2 +-
 drivers/pci/intr_remapping.c   | 15 +++++------
 drivers/xen/events.c           |  2 +-
 include/linux/interrupt.h      |  2 +-
 include/linux/irq.h            | 16 +++++-------
 kernel/irq/handle.c            | 28 ++++++++------------
 kernel/irq/internals.h         |  2 +-
 kernel/irq/numa_migrate.c      | 36 +++++++++-----------------
 kernel/softirq.c               |  2 +-
 10 files changed, 66 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c7630b40a5..560b887ba27 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -129,12 +129,9 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
 	struct irq_pin_list *pin;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -209,12 +206,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 	return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
 	struct irq_cfg *cfg;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
@@ -235,13 +229,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(cpu);
+		desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
@@ -253,7 +247,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
 
 /* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
 	struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -262,7 +256,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
 	if (!old_entry)
 		return;
 
-	entry = get_one_free_irq_2_pin(cpu);
+	entry = get_one_free_irq_2_pin(node);
 	if (!entry)
 		return;
 
@@ -272,7 +266,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
 	tail		= entry;
 	old_entry	= old_entry->next;
 	while (old_entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			entry = head;
 			while (entry) {
@@ -312,12 +306,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-				 struct irq_desc *desc, int cpu)
+				 struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 	struct irq_cfg *old_cfg;
 
-	cfg = get_one_free_irq_cfg(cpu);
+	cfg = get_one_free_irq_cfg(node);
 
 	if (!cfg)
 		return;
@@ -328,7 +322,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -615,13 +609,13 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
 					apic, pin);
@@ -641,7 +635,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(cpu);
+	entry->next = get_one_free_irq_2_pin(node);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -650,7 +644,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -670,7 +664,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+		add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -1612,7 +1606,7 @@ static void __init setup_IO_APIC_irqs(void)
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1647,13 +1641,13 @@ static void __init setup_IO_APIC_irqs(void)
 					apic->multi_timer_check(apic_id, irq))
 				continue;
 
-			desc = irq_to_desc_alloc_cpu(irq, cpu);
+			desc = irq_to_desc_alloc_node(irq, node);
 			if (!desc) {
 				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 				continue;
 			}
 			cfg = desc->chip_data;
-			add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+			add_pin_to_irq_node(cfg, node, apic_id, pin);
 
 			setup_IO_APIC_irq(apic_id, pin, irq, desc,
 					irq_trigger(idx), irq_polarity(idx));
@@ -2863,7 +2857,7 @@ static inline void __init check_timer(void)
 {
 	struct irq_desc *desc = irq_to_desc(0);
 	struct irq_cfg *cfg = desc->chip_data;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -2929,7 +2923,7 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+			add_pin_to_irq_node(cfg, node, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		} else {
 			/* for edge trigger, setup_IO_APIC_irq already
@@ -2966,7 +2960,7 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -3185,7 +3179,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3194,7 +3188,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new < nr_irqs; new++) {
-		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		desc_new = irq_to_desc_alloc_node(new, node);
 		if (!desc_new) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", new);
 			continue;
@@ -3968,7 +3962,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 {
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -3976,7 +3970,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 		return -EINVAL;
 	}
 
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	desc = irq_to_desc_alloc_node(irq, node);
 	if (!desc) {
 		printk(KERN_INFO "can not get irq_desc %d\n", irq);
 		return 0;
@@ -3987,7 +3981,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 	 */
 	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+		add_pin_to_irq_node(cfg, node, ioapic, pin);
 	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ca7ec44bafc..45acbcf2568 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -636,7 +636,7 @@ static void __init lguest_init_IRQ(void)
 
 void lguest_setup_irq(unsigned int irq)
 {
-	irq_to_desc_alloc_cpu(irq, 0);
+	irq_to_desc_alloc_node(irq, 0);
 	set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
 				      handle_level_irq, "level");
 }
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f5e0ea724a6..9eff36a293e 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -23,15 +23,12 @@ struct irq_2_iommu {
 };
 
 #ifdef CONFIG_GENERIC_HARDIRQS
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
 {
 	struct irq_2_iommu *iommu;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+	printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
 
 	return iommu;
 }
@@ -48,7 +45,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 	return desc->irq_2_iommu;
 }
 
-static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	struct irq_2_iommu *irq_iommu;
@@ -56,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
 	/*
 	 * alloc irq desc if not allocated already.
 	 */
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	desc = irq_to_desc_alloc_node(irq, node);
 	if (!desc) {
 		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 		return NULL;
@@ -65,14 +62,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
 	irq_iommu = desc->irq_2_iommu;
 
 	if (!irq_iommu)
-		desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(node);
 
 	return desc->irq_2_iommu;
 }
 
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
-	return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+	return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id));
 }
 
 #else /* !CONFIG_SPARSE_IRQ */
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 33389880279..be437c2bc94 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -335,7 +335,7 @@ static int find_unbound_irq(void)
 	if (irq == nr_irqs)
 		panic("No available IRQ to bind to: increase nr_irqs!\n");
 
-	desc = irq_to_desc_alloc_cpu(irq, 0);
+	desc = irq_to_desc_alloc_node(irq, 0);
 	if (WARN_ON(desc == NULL))
 		return -1;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 91bb76f44f1..ff374ceface 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -566,6 +566,6 @@ struct irq_desc;
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int arch_init_chip_data(struct irq_desc *desc, int node);
 
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8e4c18b2915..a09baf8f9d9 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -187,7 +187,7 @@ struct irq_desc {
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
-	unsigned int		cpu;
+	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
@@ -201,16 +201,16 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-					struct irq_desc *desc, int cpu);
+					struct irq_desc *desc, int node);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
 #else /* CONFIG_SPARSE_IRQ */
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
 #endif /* CONFIG_SPARSE_IRQ */
 
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
 /*
  * Migration helpers for obsolete names, they will go away:
@@ -422,12 +422,10 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
  */
-static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 								bool boot)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
-	int node;
-
 	if (boot) {
 		alloc_bootmem_cpumask_var(&desc->affinity);
 
@@ -437,8 +435,6 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
 		return true;
 	}
 
-	node = cpu_to_node(cpu);
-
 	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
 		return false;
 
@@ -494,7 +490,7 @@ static inline void free_desc_masks(struct irq_desc *old_desc,
 
 #else /* !CONFIG_SMP */
 
-static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
 								bool boot)
 {
 	return true;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3e0cbc44bd7..a6368db2618 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -81,12 +81,10 @@ static struct irq_desc irq_desc_init = {
 	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
-void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void init_kstat_irqs(struct irq_desc *desc, int node, int nr)
 {
-	int node;
 	void *ptr;
 
-	node = cpu_to_node(cpu);
 	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
 
 	/*
@@ -94,33 +92,32 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 	 * init_copy_kstat_irqs() could still use old one
 	 */
 	if (ptr) {
-		printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
-			 cpu, node);
+		printk(KERN_DEBUG "  alloc kstat_irqs on node %d\n", node);
 		desc->kstat_irqs = ptr;
 	}
 }
 
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 {
 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 
 	spin_lock_init(&desc->lock);
 	desc->irq = irq;
 #ifdef CONFIG_SMP
-	desc->cpu = cpu;
+	desc->node = node;
 #endif
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_kstat_irqs(desc, cpu, nr_cpu_ids);
+	init_kstat_irqs(desc, node, nr_cpu_ids);
 	if (!desc->kstat_irqs) {
 		printk(KERN_ERR "can not alloc kstat_irqs\n");
 		BUG_ON(1);
 	}
-	if (!alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, node, false)) {
 		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
 		BUG_ON(1);
 	}
 	init_desc_masks(desc);
-	arch_init_chip_data(desc, cpu);
+	arch_init_chip_data(desc, node);
 }
 
 /*
@@ -189,11 +186,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
-	int node;
 
 	if (irq >= nr_irqs) {
 		WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
@@ -212,15 +208,13 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
 	if (desc)
 		goto out_unlock;
 
-	node = cpu_to_node(cpu);
 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-	printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
-		 irq, cpu, node);
+	printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
 	if (!desc) {
 		printk(KERN_ERR "can not alloc irq_desc\n");
 		BUG_ON(1);
 	}
-	init_one_irq_desc(irq, desc, cpu);
+	init_one_irq_desc(irq, desc, node);
 
 	irq_desc_ptrs[irq] = desc;
 
@@ -270,7 +264,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
 	return irq_to_desc(irq);
 }
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index de5f412f6a9..73468253143 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
 extern struct lock_class_key irq_desc_lock_class;
-extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
 extern spinlock_t sparse_irq_lock;
 
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index ce72bc3f4ce..2f69bee57bf 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -15,9 +15,9 @@
 
 static void init_copy_kstat_irqs(struct irq_desc *old_desc,
 				 struct irq_desc *desc,
-				 int cpu, int nr)
+				 int node, int nr)
 {
-	init_kstat_irqs(desc, cpu, nr);
+	init_kstat_irqs(desc, node, nr);
 
 	if (desc->kstat_irqs != old_desc->kstat_irqs)
 		memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
@@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
 }
 
 static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
-		 struct irq_desc *desc, int cpu)
+		 struct irq_desc *desc, int node)
 {
 	memcpy(desc, old_desc, sizeof(struct irq_desc));
-	if (!alloc_desc_masks(desc, cpu, false)) {
+	if (!alloc_desc_masks(desc, node, false)) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
 				"for migration.\n", irq);
 		return false;
 	}
 	spin_lock_init(&desc->lock);
-	desc->cpu = cpu;
+	desc->node = node;
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+	init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
 	init_copy_desc_masks(old_desc, desc);
-	arch_init_copy_chip_data(old_desc, desc, cpu);
+	arch_init_copy_chip_data(old_desc, desc, node);
 	return true;
 }
 
@@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
 }
 
 static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
-						int cpu)
+						int node)
 {
 	struct irq_desc *desc;
 	unsigned int irq;
 	unsigned long flags;
-	int node;
 
 	irq = old_desc->irq;
 
@@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 	if (desc && old_desc != desc)
 		goto out_unlock;
 
-	node = cpu_to_node(cpu);
 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
 	if (!desc) {
 		printk(KERN_ERR "irq %d: can not get new irq_desc "
@@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 		desc = old_desc;
 		goto out_unlock;
 	}
-	if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+	if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
 		/* still use old one */
 		kfree(desc);
 		desc = old_desc;
@@ -107,24 +105,14 @@ out_unlock:
 	return desc;
 }
 
-struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
-	int old_cpu;
-	int node, old_node;
-
 	/* those all static, do move them */
 	if (desc->irq < NR_IRQS_LEGACY)
 		return desc;
 
-	old_cpu = desc->cpu;
-	if (old_cpu != cpu) {
-		node = cpu_to_node(cpu);
-		old_node = cpu_to_node(old_cpu);
-		if (old_node != node)
-			desc = __real_move_irq_desc(desc, cpu);
-		else
-			desc->cpu = cpu;
-	}
+	if (desc->node != node)
+		desc = __real_move_irq_desc(desc, node);
 
 	return desc;
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b525dd34851..f674f332a02 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -828,7 +828,7 @@ int __init __weak arch_early_irq_init(void)
 	return 0;
 }
 
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From a2f809b08ae4dddc1015c7dcd8659e5729e45b3e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:01:20 -0700
Subject: irq: change ACPI GSI APIs to also take a device argument

We want to use dev_to_node() later on, to be aware of the 'home node'
of the GSI in question.

[ Impact: cleanup, prepare the IRQ code to be more NUMA aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Len Brown <lenb@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Len Brown <lenb@kernel.org>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-acpi@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
LKML-Reference: <49F65560.20904@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/kernel/acpi.c        | 5 +++--
 arch/x86/include/asm/io_apic.h | 4 ++--
 arch/x86/include/asm/mpspec.h  | 4 +++-
 arch/x86/kernel/acpi/boot.c    | 8 ++++----
 arch/x86/kernel/apic/io_apic.c | 3 ++-
 drivers/acpi/pci_irq.c         | 5 +++--
 drivers/char/hpet.c            | 4 ++--
 drivers/pnp/pnpacpi/rsparser.c | 2 +-
 include/linux/acpi.h           | 2 +-
 9 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 5510317db37..baec6f00f7f 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
 		return gsi;
@@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 
 	fadt = (struct acpi_table_fadt *)fadt_header;
 
-	acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+	acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+				 ACPI_ACTIVE_LOW);
 	return 0;
 }
 
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9d826e43601..07f2913ba5d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -154,8 +154,8 @@ extern int timer_through_8259;
 extern int io_apic_get_unique_id(int ioapic, int apic_id);
 extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
-				   int edge_level, int active_high_low);
+extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin,
+				  int irq, int edge_level, int active_high_low);
 #endif /* CONFIG_ACPI */
 
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 642fc7fc8cd..3ea1f531f53 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -72,7 +72,9 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
 extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 				   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
-extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+struct device;
+extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
+				 int active_high_low);
 extern int acpi_probe_gsi(void);
 #ifdef CONFIG_X86_IO_APIC
 extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 723989d7f80..6ee96b5530f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -522,7 +522,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	unsigned int irq;
 	unsigned int plat_gsi = gsi;
@@ -539,7 +539,7 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
 
 #ifdef CONFIG_X86_IO_APIC
 	if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
-		plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+		plat_gsi = mp_register_gsi(dev, gsi, triggering, polarity);
 	}
 #endif
 	acpi_gsi_to_irq(plat_gsi, &irq);
@@ -1158,7 +1158,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	}
 }
 
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
 	int ioapic;
 	int ioapic_pin;
@@ -1253,7 +1253,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 		}
 	}
 #endif
-	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+	io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi,
 				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
 				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
 	return gsi;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 560b887ba27..d9346622601 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3958,7 +3958,8 @@ int __init io_apic_get_version(int ioapic)
 }
 #endif
 
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
+int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq,
+				 int triggering, int polarity)
 {
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 51b9f8280f8..2faa9e2ac89 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		/* Interrupt Line values above 0xF are forbidden */
 		if (dev->irq > 0 && (dev->irq <= 0xF)) {
 			printk(" - using IRQ %d\n", dev->irq);
-			acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE,
+			acpi_register_gsi(&dev->dev, dev->irq,
+					  ACPI_LEVEL_SENSITIVE,
 					  ACPI_ACTIVE_LOW);
 			return 0;
 		} else {
@@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		}
 	}
 
-	rc = acpi_register_gsi(gsi, triggering, polarity);
+	rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
 	if (rc < 0) {
 		dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
 			 pin_name(pin));
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 340ba4f9dc5..4a9f3492b92 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
 			break;
 		}
 
-		gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE,
+		gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
 					ACPI_ACTIVE_LOW);
 		if (gsi > 0)
 			break;
@@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 		irqp = &res->data.extended_irq;
 
 		for (i = 0; i < irqp->interrupt_count; i++) {
-			irq = acpi_register_gsi(irqp->interrupts[i],
+			irq = acpi_register_gsi(NULL, irqp->interrupts[i],
 				      irqp->triggering, irqp->polarity);
 			if (irq < 0)
 				return AE_ERROR;
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index adf17856bac..7f207f335be 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev,
 	}
 
 	flags = irq_flags(triggering, polarity, shareable);
-	irq = acpi_register_gsi(gsi, triggering, polarity);
+	irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
 	if (irq >= 0)
 		pcibios_penalize_isa_irq(irq, 1);
 	else
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88be890ee3c..51b4b0a5ce8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num;
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
-int acpi_register_gsi (u32 gsi, int triggering, int polarity);
+int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
 #ifdef CONFIG_X86_IO_APIC
-- 
cgit v1.2.3-70-g09d2


From d047f53a2ecce37e3bdf79eac5a326fbaadb3628 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 27 Apr 2009 18:02:23 -0700
Subject: x86/irq: change MSI irq_desc to be more numa aware

Try to get irq_desc on the home node in create_irq_nr().

v2: don't check if we can move it when sparse_irq is not used
v3: use move_irq_des, if that node is not what we want

[ Impact: optimization, make MSI IRQ descriptors more NUMA aware ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F6559F.7070005@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 17 +++++++++++++----
 include/linux/irq.h            |  2 +-
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 82376e021b5..9cd4806cdf5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3172,14 +3172,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
 	/* Allocate an unused irq */
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int node = cpu_to_node(boot_cpu_id);
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3197,6 +3196,13 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 		if (cfg_new->vector != 0)
 			continue;
+
+#ifdef CONFIG_NUMA_IRQ_DESC
+		/* different node ?*/
+		if (desc_new->node != node)
+			desc = move_irq_desc(desc, node);
+#endif
+
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
 		break;
@@ -3214,11 +3220,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
+	int node = cpu_to_node(boot_cpu_id);
 	unsigned int irq_want;
 	int irq;
 
 	irq_want = nr_irqs_gsi;
-	irq = create_irq_nr(irq_want);
+	irq = create_irq_nr(irq_want, node);
 
 	if (irq == 0)
 		irq = -1;
@@ -3476,15 +3483,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	unsigned int irq_want;
 	struct intel_iommu *iommu = NULL;
 	int index = 0;
+	int node;
 
 	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
+	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want);
+		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -1;
 		irq_want = irq + 1;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index a09baf8f9d9..4b95ddb5304 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -376,7 +376,7 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
-extern unsigned int create_irq_nr(unsigned int irq_want);
+extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
-- 
cgit v1.2.3-70-g09d2


From 6a321cb370ad3db4ba6e405e638b3a42c41089b0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 28 Apr 2009 04:43:42 -0700
Subject: net: netif_tx_queue_stopped too expensive

netif_tx_queue_stopped(txq) is most of the time false.

Yet its cost is very expensive on SMP.

static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
{
	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
}

I saw this on oprofile hunting and bnx2 driver bnx2_tx_int().

We probably should split "struct netdev_queue" in two parts, one
being read mostly.

__netif_tx_lock() touches _xmit_lock & xmit_lock_owner, these
deserve a separate cache line.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8c3619d551..505a3c6cb12 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -447,12 +447,18 @@ enum netdev_queue_state_t
 };
 
 struct netdev_queue {
+/*
+ * read mostly part
+ */
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
 	unsigned long		state;
-	spinlock_t		_xmit_lock;
-	int			xmit_lock_owner;
 	struct Qdisc		*qdisc_sleeping;
+/*
+ * write mostly part
+ */
+	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
+	int			xmit_lock_owner;
 } ____cacheline_aligned_in_smp;
 
 
-- 
cgit v1.2.3-70-g09d2


From abc5c44d6284fab8fb21bcfc52c0f16f980637df Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 23 Apr 2009 19:31:25 -0400
Subject: SUNRPC: Fix error return value of svc_addr_len()

The svc_addr_len() helper function returns -EAFNOSUPPORT if it doesn't
recognize the address family of the passed-in socket address.  However,
the return type of this function is size_t, which means -EAFNOSUPPORT
is turned into a very large positive value in this case.

The check in svc_udp_recvfrom() to see if the return value is less
than zero therefore won't work at all.

Additionally, handle_connect_req() passes this value directly to
memset().  This could cause memset() to clobber a large chunk of memory
if svc_addr_len() has returned an error.  Currently the address family
of these addresses, however, is known to be supported long before
handle_connect_req() is called, so this isn't a real risk.

Change the error return value of svc_addr_len() to zero, which fits in
the range of size_t, and is safer to pass to memset() directly.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc_xprt.h | 5 +++--
 net/sunrpc/svcsock.c            | 7 ++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 0d9cb6ef28b..d790c52525c 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -118,7 +118,7 @@ static inline unsigned short svc_addr_port(const struct sockaddr *sa)
 	return 0;
 }
 
-static inline size_t svc_addr_len(struct sockaddr *sa)
+static inline size_t svc_addr_len(const struct sockaddr *sa)
 {
 	switch (sa->sa_family) {
 	case AF_INET:
@@ -126,7 +126,8 @@ static inline size_t svc_addr_len(struct sockaddr *sa)
 	case AF_INET6:
 		return sizeof(struct sockaddr_in6);
 	}
-	return -EAFNOSUPPORT;
+
+	return 0;
 }
 
 static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index af3198814c1..8b083283413 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -426,13 +426,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
 	} buffer;
 	struct cmsghdr *cmh = &buffer.hdr;
-	int		err, len;
 	struct msghdr msg = {
 		.msg_name = svc_addr(rqstp),
 		.msg_control = cmh,
 		.msg_controllen = sizeof(buffer),
 		.msg_flags = MSG_DONTWAIT,
 	};
+	size_t len;
+	int err;
 
 	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
 	    /* udp sockets need large rcvbuf as all pending
@@ -464,8 +465,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		return -EAGAIN;
 	}
 	len = svc_addr_len(svc_addr(rqstp));
-	if (len < 0)
-		return len;
+	if (len == 0)
+		return -EAFNOSUPPORT;
 	rqstp->rq_addrlen = len;
 	if (skb->tstamp.tv64 == 0) {
 		skb->tstamp = ktime_get_real();
-- 
cgit v1.2.3-70-g09d2


From 335c54bdc4d3bacdbd619ec95cd0b352435bd37f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 23 Apr 2009 19:32:25 -0400
Subject: NFSD: Prevent a buffer overflow in svc_xprt_names()

The svc_xprt_names() function can overflow its buffer if it's so near
the end of the passed in buffer that the "name too long" string still
doesn't fit.  Of course, it could never tell if it was near the end
of the passed in buffer, since its only caller passes in zero as the
buffer length.

Let's make this API a little safer.

Change svc_xprt_names() so it *always* checks for a buffer overflow,
and change its only caller to pass in the correct buffer length.

If svc_xprt_names() does overflow its buffer, it now fails with an
ENAMETOOLONG errno, instead of trying to write a message at the end
of the buffer.  I don't like this much, but I can't figure out a clean
way that's always safe to return some of the names, *and* an
indication that the buffer was not long enough.

The displayed error when doing a 'cat /proc/fs/nfsd/portlist' is
"File name too long".

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c                |  2 +-
 include/linux/sunrpc/svc_xprt.h |  2 +-
 net/sunrpc/svc_xprt.c           | 56 ++++++++++++++++++++++++++++-------------
 3 files changed, 41 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e051847b93f..6a1cd908e6b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -918,7 +918,7 @@ static ssize_t __write_ports_names(char *buf)
 {
 	if (nfsd_serv == NULL)
 		return 0;
-	return svc_xprt_names(nfsd_serv, buf, 0);
+	return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT);
 }
 
 /*
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index d790c52525c..2223ae0b5ed 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -83,7 +83,7 @@ int	svc_port_is_privileged(struct sockaddr *sin);
 int	svc_print_xprts(char *buf, int maxlen);
 struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
 			const sa_family_t af, const unsigned short port);
-int	svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
+int	svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
 {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f200393ac87..6f33d33cc06 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1098,36 +1098,58 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
 }
 EXPORT_SYMBOL_GPL(svc_find_xprt);
 
-/*
- * Format a buffer with a list of the active transports. A zero for
- * the buflen parameter disables target buffer overflow checking.
+static int svc_one_xprt_name(const struct svc_xprt *xprt,
+			     char *pos, int remaining)
+{
+	int len;
+
+	len = snprintf(pos, remaining, "%s %u\n",
+			xprt->xpt_class->xcl_name,
+			svc_xprt_local_port(xprt));
+	if (len >= remaining)
+		return -ENAMETOOLONG;
+	return len;
+}
+
+/**
+ * svc_xprt_names - format a buffer with a list of transport names
+ * @serv: pointer to an RPC service
+ * @buf: pointer to a buffer to be filled in
+ * @buflen: length of buffer to be filled in
+ *
+ * Fills in @buf with a string containing a list of transport names,
+ * each name terminated with '\n'.
+ *
+ * Returns positive length of the filled-in string on success; otherwise
+ * a negative errno value is returned if an error occurs.
  */
-int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
+int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen)
 {
 	struct svc_xprt *xprt;
-	char xprt_str[64];
-	int totlen = 0;
-	int len;
+	int len, totlen;
+	char *pos;
 
 	/* Sanity check args */
 	if (!serv)
 		return 0;
 
 	spin_lock_bh(&serv->sv_lock);
+
+	pos = buf;
+	totlen = 0;
 	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
-		len = snprintf(xprt_str, sizeof(xprt_str),
-			       "%s %d\n", xprt->xpt_class->xcl_name,
-			       svc_xprt_local_port(xprt));
-		/* If the string was truncated, replace with error string */
-		if (len >= sizeof(xprt_str))
-			strcpy(xprt_str, "name-too-long\n");
-		/* Don't overflow buffer */
-		len = strlen(xprt_str);
-		if (buflen && (len + totlen >= buflen))
+		len = svc_one_xprt_name(xprt, pos, buflen - totlen);
+		if (len < 0) {
+			*buf = '\0';
+			totlen = len;
+		}
+		if (len <= 0)
 			break;
-		strcpy(buf+totlen, xprt_str);
+
+		pos += len;
 		totlen += len;
 	}
+
 	spin_unlock_bh(&serv->sv_lock);
 	return totlen;
 }
-- 
cgit v1.2.3-70-g09d2


From bfba9ab4c64f0e5c33930711e6c073c285e01fcf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 23 Apr 2009 19:32:33 -0400
Subject: SUNRPC: pass buffer size to svc_addsock()

Adjust the synopsis of svc_addsock() to pass in the size of the output
buffer.  Add a documenting comment.

This is a cosmetic change for now.  A subsequent patch will make sure
the buffer length is passed to one_sock_name(), where the length will
actually be useful.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c               |  2 +-
 include/linux/sunrpc/svcsock.h |  3 ++-
 net/sunrpc/svcsock.c           | 16 +++++++++++++---
 3 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6a1cd908e6b..1f1c2159b80 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -943,7 +943,7 @@ static ssize_t __write_ports_addfd(char *buf)
 	if (err != 0)
 		goto out;
 
-	err = svc_addsock(nfsd_serv, fd, buf);
+	err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
 	if (err < 0)
 		lockd_down();
 
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 483e10380aa..e23241c53f4 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -39,7 +39,8 @@ int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
 int		svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
-int		svc_addsock(struct svc_serv *serv, int fd, char *name_return);
+int		svc_addsock(struct svc_serv *serv, const int fd,
+					char *name_return, const size_t len);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8b083283413..6bec1e25b54 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1128,9 +1128,19 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	return svsk;
 }
 
-int svc_addsock(struct svc_serv *serv,
-		int fd,
-		char *name_return)
+/**
+ * svc_addsock - add a listener socket to an RPC service
+ * @serv: pointer to RPC service to which to add a new listener
+ * @fd: file descriptor of the new listener
+ * @name_return: pointer to buffer to fill in with name of listener
+ * @len: size of the buffer
+ *
+ * Fills in socket name and returns positive length of name if successful.
+ * Name is terminated with '\n'.  On error, returns a negative errno
+ * value.
+ */
+int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
+		const size_t len)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
-- 
cgit v1.2.3-70-g09d2


From 8435d34dbbe75678c3cdad3d53b1e7996a79b3bf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 23 Apr 2009 19:32:40 -0400
Subject: SUNRPC: pass buffer size to svc_sock_names()

Adjust the synopsis of svc_sock_names() to pass in the size of the
output buffer.  Add a documenting comment.

This is a cosmetic change for now.  A subsequent patch will make sure
the buffer length is passed to one_sock_name(), where the length will
actually be useful.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c               |  3 ++-
 include/linux/sunrpc/svcsock.h |  4 +++-
 net/sunrpc/svcsock.c           | 19 +++++++++++++++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f1c2159b80..b64a7fbfccf 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -966,7 +966,8 @@ static ssize_t __write_ports_delfd(char *buf)
 		return -ENOMEM;
 
 	if (nfsd_serv != NULL)
-		len = svc_sock_names(buf, nfsd_serv, toclose);
+		len = svc_sock_names(nfsd_serv, buf,
+					SIMPLE_TRANSACTION_LIMIT, toclose);
 	if (len >= 0)
 		lockd_down();
 
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index e23241c53f4..82716313894 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -38,7 +38,9 @@ int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
-int		svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
+int		svc_sock_names(struct svc_serv *serv, char *buf,
+					const size_t buflen,
+					const char *toclose);
 int		svc_addsock(struct svc_serv *serv, const int fd,
 					char *name_return, const size_t len);
 void		svc_init_xprt_sock(void);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6bec1e25b54..032b52ea954 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -259,8 +259,23 @@ static int one_sock_name(char *buf, struct svc_sock *svsk)
 	return len;
 }
 
-int
-svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
+/**
+ * svc_sock_names - construct a list of listener names in a string
+ * @serv: pointer to RPC service
+ * @buf: pointer to a buffer to fill in with socket names
+ * @buflen: size of the buffer to be filled
+ * @toclose: pointer to '\0'-terminated C string containing the name
+ *		of a listener to be closed
+ *
+ * Fills in @buf with a '\n'-separated list of names of listener
+ * sockets.  If @toclose is not NULL, the socket named by @toclose
+ * is closed, and is not included in the output list.
+ *
+ * Returns positive length of the socket name string, or a negative
+ * errno value on error.
+ */
+int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
+		   const char *toclose)
 {
 	struct svc_sock *svsk, *closesk = NULL;
 	int len = 0;
-- 
cgit v1.2.3-70-g09d2


From 4ed0d3e6c64cfd9ba4ceb2099b10d1cf8ece4320 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 24 Apr 2009 17:30:20 -0700
Subject: Intel IOMMU Pass Through Support

The patch adds kernel parameter intel_iommu=pt to set up pass through
mode in context mapping entry. This disables DMAR in linux kernel; but
KVM still runs on VT-d and interrupt remapping still works.

In this mode, kernel uses swiotlb for DMA API functions but other VT-d
functionalities are enabled for KVM. KVM always uses multi level
translation page table in VT-d. By default, pass though mode is disabled
in kernel.

This is useful when people don't want to enable VT-d DMAR in kernel but
still want to use KVM and interrupt remapping for reasons like DMAR
performance concern or debug purpose.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Weidong Han <weidong@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/kernel-parameters.txt |   1 +
 arch/ia64/include/asm/iommu.h       |   1 +
 arch/ia64/kernel/pci-swiotlb.c      |   2 +-
 arch/x86/include/asm/iommu.h        |   1 +
 arch/x86/kernel/pci-dma.c           |   6 ++
 arch/x86/kernel/pci-swiotlb.c       |   3 +-
 drivers/pci/dmar.c                  |  11 ++-
 drivers/pci/intel-iommu.c           | 180 ++++++++++++++++++++++++++----------
 include/linux/dma_remapping.h       |   8 ++
 include/linux/intel-iommu.h         |   2 +
 10 files changed, 165 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 600cdd72900..fa4faeb7597 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -965,6 +965,7 @@ and is between 256 and 4096 characters. It is defined in the file
 		nomerge
 		forcesac
 		soft
+		pt	[x86, IA64]
 
 	io7=		[HW] IO7 for Marvel based alpha systems
 			See comment before marvel_specify_io7 in
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 0490794fe4a..37d41ca5645 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -9,6 +9,7 @@ extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
+extern int iommu_pass_through;
 extern void iommu_dma_init(void);
 extern void machvec_init(const char *name);
 
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 285aae8431c..223abb13410 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void)
 
 void __init pci_swiotlb_init(void)
 {
-	if (!iommu_detected) {
+	if (!iommu_detected || iommu_pass_through) {
 #ifdef CONFIG_IA64_GENERIC
 		swiotlb = 1;
 		printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index af326a2975b..fd6d21bbee6 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,6 +6,7 @@ extern void no_iommu_init(void);
 extern struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
+extern int iommu_pass_through;
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 745579bc825..8cad0d85424 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -160,6 +160,8 @@ again:
 	return page_address(page);
 }
 
+extern int iommu_pass_through;
+
 /*
  * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
  * documentation.
@@ -209,6 +211,10 @@ static __init int iommu_setup(char *p)
 #ifdef CONFIG_SWIOTLB
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
+		if (!strncmp(p, "pt", 2)) {
+			iommu_pass_through = 1;
+			return 1;
+		}
 #endif
 
 		gart_parse_options(p);
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 221a3853e26..3a0c51e0ba6 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void)
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
 #ifdef CONFIG_X86_64
-	if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
+	if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) ||
+		iommu_pass_through)
 	       swiotlb = 1;
 #endif
 	if (swiotlb_force)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index fa3a11365ec..d3d86b749ee 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -515,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	u32 ver;
 	static int iommu_allocated = 0;
 	int agaw = 0;
+	int msagaw = 0;
 
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -535,12 +536,20 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	agaw = iommu_calculate_agaw(iommu);
 	if (agaw < 0) {
 		printk(KERN_ERR
-			"Cannot get a valid agaw for iommu (seq_id = %d)\n",
+		       "Cannot get a valid agaw for iommu (seq_id = %d)\n",
+		       iommu->seq_id);
+		goto error;
+	}
+	msagaw = iommu_calculate_max_sagaw(iommu);
+	if (msagaw < 0) {
+		printk(KERN_ERR
+			"Cannot get a valid max agaw for iommu (seq_id = %d)\n",
 			iommu->seq_id);
 		goto error;
 	}
 #endif
 	iommu->agaw = agaw;
+	iommu->msagaw = msagaw;
 
 	/* the registers might be more than one page */
 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 001b328adf8..13121821db7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,6 +53,8 @@
 
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 
+#define MAX_AGAW_WIDTH 64
+
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 
 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
@@ -127,8 +129,6 @@ static inline void context_set_fault_enable(struct context_entry *context)
 	context->lo &= (((u64)-1) << 2) | 1;
 }
 
-#define CONTEXT_TT_MULTI_LEVEL 0
-
 static inline void context_set_translation_type(struct context_entry *context,
 						unsigned long value)
 {
@@ -288,6 +288,7 @@ int dmar_disabled = 1;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
+int iommu_pass_through;
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
@@ -397,17 +398,13 @@ void free_iova_mem(struct iova *iova)
 
 static inline int width_to_agaw(int width);
 
-/* calculate agaw for each iommu.
- * "SAGAW" may be different across iommus, use a default agaw, and
- * get a supported less agaw for iommus that don't support the default agaw.
- */
-int iommu_calculate_agaw(struct intel_iommu *iommu)
+static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
 {
 	unsigned long sagaw;
 	int agaw = -1;
 
 	sagaw = cap_sagaw(iommu->cap);
-	for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	for (agaw = width_to_agaw(max_gaw);
 	     agaw >= 0; agaw--) {
 		if (test_bit(agaw, &sagaw))
 			break;
@@ -416,6 +413,24 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
 	return agaw;
 }
 
+/*
+ * Calculate max SAGAW for each iommu.
+ */
+int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
+{
+	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
+}
+
+/*
+ * calculate agaw for each iommu.
+ * "SAGAW" may be different across iommus, use a default agaw, and
+ * get a supported less agaw for iommus that don't support the default agaw.
+ */
+int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+}
+
 /* in native case, each domain is related to only one iommu */
 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
@@ -1321,8 +1336,8 @@ static void domain_exit(struct dmar_domain *domain)
 	free_domain_mem(domain);
 }
 
-static int domain_context_mapping_one(struct dmar_domain *domain,
-				      int segment, u8 bus, u8 devfn)
+static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
+				 u8 bus, u8 devfn, int translation)
 {
 	struct context_entry *context;
 	unsigned long flags;
@@ -1335,7 +1350,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
 	BUG_ON(!domain->pgd);
+	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
+	       translation != CONTEXT_TT_MULTI_LEVEL);
 
 	iommu = device_to_iommu(segment, bus, devfn);
 	if (!iommu)
@@ -1395,9 +1413,18 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	}
 
 	context_set_domain_id(context, id);
-	context_set_address_width(context, iommu->agaw);
-	context_set_address_root(context, virt_to_phys(pgd));
-	context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
+
+	/*
+	 * In pass through mode, AW must be programmed to indicate the largest
+	 * AGAW value supported by hardware. And ASR is ignored by hardware.
+	 */
+	if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) {
+		context_set_address_width(context, iommu->agaw);
+		context_set_address_root(context, virt_to_phys(pgd));
+	} else
+		context_set_address_width(context, iommu->msagaw);
+
+	context_set_translation_type(context, translation);
 	context_set_fault_enable(context);
 	context_set_present(context);
 	domain_flush_cache(domain, context, sizeof(*context));
@@ -1422,13 +1449,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 }
 
 static int
-domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
+domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
+			int translation)
 {
 	int ret;
 	struct pci_dev *tmp, *parent;
 
 	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
-					 pdev->bus->number, pdev->devfn);
+					 pdev->bus->number, pdev->devfn,
+					 translation);
 	if (ret)
 		return ret;
 
@@ -1442,7 +1471,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
 		ret = domain_context_mapping_one(domain,
 						 pci_domain_nr(parent->bus),
 						 parent->bus->number,
-						 parent->devfn);
+						 parent->devfn, translation);
 		if (ret)
 			return ret;
 		parent = parent->bus->self;
@@ -1450,12 +1479,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
 	if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
 		return domain_context_mapping_one(domain,
 					pci_domain_nr(tmp->subordinate),
-					tmp->subordinate->number, 0);
+					tmp->subordinate->number, 0,
+					translation);
 	else /* this is a legacy PCI bridge */
 		return domain_context_mapping_one(domain,
 						  pci_domain_nr(tmp->bus),
 						  tmp->bus->number,
-						  tmp->devfn);
+						  tmp->devfn,
+						  translation);
 }
 
 static int domain_context_mapped(struct pci_dev *pdev)
@@ -1752,7 +1783,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
 		goto error;
 
 	/* context entry init */
-	ret = domain_context_mapping(domain, pdev);
+	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 	if (!ret)
 		return 0;
 error:
@@ -1853,6 +1884,23 @@ static inline void iommu_prepare_isa(void)
 }
 #endif /* !CONFIG_DMAR_FLPY_WA */
 
+/* Initialize each context entry as pass through.*/
+static int __init init_context_pass_through(void)
+{
+	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain;
+	int ret;
+
+	for_each_pci_dev(pdev) {
+		domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+		ret = domain_context_mapping(domain, pdev,
+					     CONTEXT_TT_PASS_THROUGH);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
 static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -1860,6 +1908,7 @@ static int __init init_dmars(void)
 	struct pci_dev *pdev;
 	struct intel_iommu *iommu;
 	int i, ret;
+	int pass_through = 1;
 
 	/*
 	 * for each drhd
@@ -1913,7 +1962,15 @@ static int __init init_dmars(void)
 			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
 			goto error;
 		}
+		if (!ecap_pass_through(iommu->ecap))
+			pass_through = 0;
 	}
+	if (iommu_pass_through)
+		if (!pass_through) {
+			printk(KERN_INFO
+			       "Pass Through is not supported by hardware.\n");
+			iommu_pass_through = 0;
+		}
 
 	/*
 	 * Start from the sane iommu hardware state.
@@ -1976,37 +2033,57 @@ static int __init init_dmars(void)
 			       "IOMMU: enable interrupt remapping failed\n");
 	}
 #endif
+	/*
+	 * If pass through is set and enabled, context entries of all pci
+	 * devices are intialized by pass through translation type.
+	 */
+	if (iommu_pass_through) {
+		ret = init_context_pass_through();
+		if (ret) {
+			printk(KERN_ERR "IOMMU: Pass through init failed.\n");
+			iommu_pass_through = 0;
+		}
+	}
 
 	/*
-	 * For each rmrr
-	 *   for each dev attached to rmrr
-	 *   do
-	 *     locate drhd for dev, alloc domain for dev
-	 *     allocate free domain
-	 *     allocate page table entries for rmrr
-	 *     if context not allocated for bus
-	 *           allocate and init context
-	 *           set present in root table for this bus
-	 *     init context with domain, translation etc
-	 *    endfor
-	 * endfor
+	 * If pass through is not set or not enabled, setup context entries for
+	 * identity mappings for rmrr, gfx, and isa.
 	 */
-	for_each_rmrr_units(rmrr) {
-		for (i = 0; i < rmrr->devices_cnt; i++) {
-			pdev = rmrr->devices[i];
-			/* some BIOS lists non-exist devices in DMAR table */
-			if (!pdev)
-				continue;
-			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
-			if (ret)
-				printk(KERN_ERR
+	if (!iommu_pass_through) {
+		/*
+		 * For each rmrr
+		 *   for each dev attached to rmrr
+		 *   do
+		 *     locate drhd for dev, alloc domain for dev
+		 *     allocate free domain
+		 *     allocate page table entries for rmrr
+		 *     if context not allocated for bus
+		 *           allocate and init context
+		 *           set present in root table for this bus
+		 *     init context with domain, translation etc
+		 *    endfor
+		 * endfor
+		 */
+		for_each_rmrr_units(rmrr) {
+			for (i = 0; i < rmrr->devices_cnt; i++) {
+				pdev = rmrr->devices[i];
+				/*
+				 * some BIOS lists non-exist devices in DMAR
+				 * table.
+				 */
+				if (!pdev)
+					continue;
+				ret = iommu_prepare_rmrr_dev(rmrr, pdev);
+				if (ret)
+					printk(KERN_ERR
 				 "IOMMU: mapping reserved region failed\n");
+			}
 		}
-	}
 
-	iommu_prepare_gfx_mapping();
+		iommu_prepare_gfx_mapping();
 
-	iommu_prepare_isa();
+		iommu_prepare_isa();
+	}
 
 	/*
 	 * for each drhd
@@ -2117,7 +2194,8 @@ get_valid_domain_for_dev(struct pci_dev *pdev)
 
 	/* make sure context mapping is ok */
 	if (unlikely(!domain_context_mapped(pdev))) {
-		ret = domain_context_mapping(domain, pdev);
+		ret = domain_context_mapping(domain, pdev,
+					     CONTEXT_TT_MULTI_LEVEL);
 		if (ret) {
 			printk(KERN_ERR
 				"Domain context map for %s failed",
@@ -2786,7 +2864,7 @@ int __init intel_iommu_init(void)
 	 * Check the need for DMA-remapping initialization now.
 	 * Above initialization will also be used by Interrupt-remapping.
 	 */
-	if (no_iommu || swiotlb || dmar_disabled)
+	if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled)
 		return -ENODEV;
 
 	iommu_init_mempool();
@@ -2806,7 +2884,15 @@ int __init intel_iommu_init(void)
 
 	init_timer(&unmap_timer);
 	force_iommu = 1;
-	dma_ops = &intel_dma_ops;
+
+	if (!iommu_pass_through) {
+		printk(KERN_INFO
+		       "Multi-level page-table translation for DMAR.\n");
+		dma_ops = &intel_dma_ops;
+	} else
+		printk(KERN_INFO
+		       "DMAR: Pass through translation for DMAR.\n");
+
 	init_iommu_sysfs();
 
 	register_iommu(&intel_iommu_ops);
@@ -3146,7 +3232,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		return -EFAULT;
 	}
 
-	ret = domain_context_mapping(dmar_domain, pdev);
+	ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 1a455f1f86d..e0a03aff63d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -13,6 +13,9 @@
 #define DMA_PTE_WRITE (2)
 #define DMA_PTE_SNP (1 << 11)
 
+#define CONTEXT_TT_MULTI_LEVEL	0
+#define CONTEXT_TT_PASS_THROUGH 2
+
 struct intel_iommu;
 struct dmar_domain;
 struct root_entry;
@@ -21,11 +24,16 @@ extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 #ifdef CONFIG_DMAR
 extern int iommu_calculate_agaw(struct intel_iommu *iommu);
+extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 #else
 static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
 {
 	return 0;
 }
+static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
+{
+	return 0;
+}
 #endif
 
 extern int dmar_disabled;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index aa8c5317123..7246971a7fe 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -120,6 +120,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
 #define ecap_coherent(e)	((e) & 0x1)
 #define ecap_qis(e)		((e) & 0x2)
+#define ecap_pass_through(e)	((e >> 6) & 0x1)
 #define ecap_eim_support(e)	((e >> 4) & 0x1)
 #define ecap_ir_support(e)	((e >> 3) & 0x1)
 #define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
@@ -302,6 +303,7 @@ struct intel_iommu {
 	spinlock_t	register_lock; /* protect register handling */
 	int		seq_id;	/* sequence id of the iommu */
 	int		agaw; /* agaw of this iommu */
+	int		msagaw; /* max sagaw of this iommu */
 	unsigned int 	irq;
 	unsigned char 	name[13];    /* Device Name */
 
-- 
cgit v1.2.3-70-g09d2


From 0f9a623dd6c9b5b4dd00c232f29525bfc7a8ecf2 Mon Sep 17 00:00:00 2001
From: Stuart Bennett <stuart@freedesktop.org>
Date: Tue, 28 Apr 2009 20:17:51 +0100
Subject: tracing: x86, mmiotrace: only register for die notifier when tracer
 active

Follow up to afcfe024aebd74b0984a41af9a34e009cf5badaf in Linus' tree
("x86: mmiotrace: quieten spurious warning message")

Signed-off-by: Stuart Bennett <stuart@freedesktop.org>
Acked-by: Pekka Paalanen <pq@iki.fi>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1240946271-7083-5-git-send-email-stuart@freedesktop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/kmmio.c       | 27 ++++++++++++++++++++++-----
 arch/x86/mm/mmio-mod.c    |  2 ++
 include/linux/mmiotrace.h |  2 ++
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index a769d1a2d93..256ce643b0b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -311,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
 	if (!ctx->active) {
-		pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+		/*
+		 * debug traps without an active context are due to either
+		 * something external causing them (f.e. using a debugger while
+		 * mmio tracing enabled), or erroneous behaviour
+		 */
+		pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
 							smp_processor_id());
 		goto out;
 	}
@@ -529,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 }
 EXPORT_SYMBOL(unregister_kmmio_probe);
 
-static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
-								void *args)
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
 {
 	struct die_args *arg = args;
 
@@ -545,11 +550,23 @@ static struct notifier_block nb_die = {
 	.notifier_call = kmmio_die_notifier
 };
 
-static int __init init_kmmio(void)
+int kmmio_init(void)
 {
 	int i;
+
 	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
 		INIT_LIST_HEAD(&kmmio_page_table[i]);
+
 	return register_die_notifier(&nb_die);
 }
-fs_initcall(init_kmmio); /* should be before device_initcall() */
+
+void kmmio_cleanup(void)
+{
+	int i;
+
+	unregister_die_notifier(&nb_die);
+	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+		WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+			KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
+	}
+}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index c9342ed8b40..132772a8ec5 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -451,6 +451,7 @@ void enable_mmiotrace(void)
 
 	if (nommiotrace)
 		pr_info(NAME "MMIO tracing disabled.\n");
+	kmmio_init();
 	enter_uniprocessor();
 	spin_lock_irq(&trace_lock);
 	atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@ void disable_mmiotrace(void)
 
 	clear_trace_list(); /* guarantees: no more kmmio callbacks */
 	leave_uniprocessor();
+	kmmio_cleanup();
 	pr_info(NAME "disabled.\n");
 out:
 	mutex_unlock(&mmiotrace_mutex);
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 3d1b7bde128..97491f78b08 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -30,6 +30,8 @@ extern unsigned int kmmio_count;
 
 extern int register_kmmio_probe(struct kmmio_probe *p);
 extern void unregister_kmmio_probe(struct kmmio_probe *p);
+extern int kmmio_init(void);
+extern void kmmio_cleanup(void);
 
 #ifdef CONFIG_MMIOTRACE
 /* kmmio is active by some kmmio_probes? */
-- 
cgit v1.2.3-70-g09d2


From 30e673b230f9d556eb81ef68a7b1a08c8b3b142c Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 28 Apr 2009 03:04:47 -0500
Subject: tracing/filters: move preds into event_filter object

Create a new event_filter object, and move the pred-related members
out of the call and subsystem objects and into the filter object - the
details of the filter implementation don't need to be exposed in the
call and subsystem in any case, and it will also help make the new
parser implementation a little cleaner.

[ Impact: refactor trace-filter code to prepare for new features ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1240905887.6416.119.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |   4 +-
 kernel/trace/trace.h               |  10 ++--
 kernel/trace/trace_events.c        |   3 +-
 kernel/trace/trace_events_filter.c | 107 +++++++++++++++++++++++--------------
 4 files changed, 76 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 78a9ba24cbf..46a27f2695a 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -101,8 +101,8 @@ struct ftrace_event_call {
 	int			(*show_format)(struct trace_seq *s);
 	int			(*define_fields)(void);
 	struct list_head	fields;
-	int			n_preds;
-	struct filter_pred	**preds;
+	int			filter_active;
+	void			*filter;
 	void			*mod;
 
 #ifdef CONFIG_EVENT_PROFILE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7d55bcf50e4..1fb7d6ccadf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -731,12 +731,16 @@ struct ftrace_event_field {
 	int			size;
 };
 
+struct event_filter {
+	int			n_preds;
+	struct filter_pred	**preds;
+};
+
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
 	struct dentry		*entry;
-	int			n_preds;
-	struct filter_pred	**preds;
+	void			*filter;
 };
 
 struct filter_pred;
@@ -774,7 +778,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 		     struct ring_buffer *buffer,
 		     struct ring_buffer_event *event)
 {
-	if (unlikely(call->n_preds) && !filter_match_preds(call, rec)) {
+	if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
 		ring_buffer_discard_commit(buffer, event);
 		return 1;
 	}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index be4d3a437c1..1cd1f37373d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -757,8 +757,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
 	list_add(&system->list, &event_subsystems);
 
-	system->preds = NULL;
-	system->n_preds = 0;
+	system->filter = NULL;
 
 	entry = debugfs_create_file("filter", 0644, system->entry, system,
 				    &ftrace_subsystem_filter_fops);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 65418288f95..1e861eca3d0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -93,11 +93,12 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
+	struct event_filter *filter = call->filter;
 	int i, matched, and_failed = 0;
 	struct filter_pred *pred;
 
-	for (i = 0; i < call->n_preds; i++) {
-		pred = call->preds[i];
+	for (i = 0; i < filter->n_preds; i++) {
+		pred = filter->preds[i];
 		if (and_failed && !pred->or)
 			continue;
 		matched = pred->fn(pred, rec);
@@ -115,20 +116,20 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec)
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void __filter_print_preds(struct filter_pred **preds, int n_preds,
+static void __filter_print_preds(struct event_filter *filter,
 				 struct trace_seq *s)
 {
-	char *field_name;
 	struct filter_pred *pred;
+	char *field_name;
 	int i;
 
-	if (!n_preds) {
+	if (!filter || !filter->n_preds) {
 		trace_seq_printf(s, "none\n");
 		return;
 	}
 
-	for (i = 0; i < n_preds; i++) {
-		pred = preds[i];
+	for (i = 0; i < filter->n_preds; i++) {
+		pred = filter->preds[i];
 		field_name = pred->field_name;
 		if (i)
 			trace_seq_printf(s, pred->or ? "|| " : "&& ");
@@ -144,7 +145,7 @@ static void __filter_print_preds(struct filter_pred **preds, int n_preds,
 void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(call->preds, call->n_preds, s);
+	__filter_print_preds(call->filter, s);
 	mutex_unlock(&filter_mutex);
 }
 
@@ -152,7 +153,7 @@ void filter_print_subsystem_preds(struct event_subsystem *system,
 				  struct trace_seq *s)
 {
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(system->preds, system->n_preds, s);
+	__filter_print_preds(system->filter, s);
 	mutex_unlock(&filter_mutex);
 }
 
@@ -200,12 +201,14 @@ static int filter_set_pred(struct filter_pred *dest,
 
 static void __filter_disable_preds(struct ftrace_event_call *call)
 {
+	struct event_filter *filter = call->filter;
 	int i;
 
-	call->n_preds = 0;
+	call->filter_active = 0;
+	filter->n_preds = 0;
 
 	for (i = 0; i < MAX_FILTER_PRED; i++)
-		call->preds[i]->fn = filter_pred_none;
+		filter->preds[i]->fn = filter_pred_none;
 }
 
 void filter_disable_preds(struct ftrace_event_call *call)
@@ -217,32 +220,39 @@ void filter_disable_preds(struct ftrace_event_call *call)
 
 int init_preds(struct ftrace_event_call *call)
 {
+	struct event_filter *filter;
 	struct filter_pred *pred;
 	int i;
 
-	call->n_preds = 0;
-
-	call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
-	if (!call->preds)
+	filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!call->filter)
 		return -ENOMEM;
 
+	call->filter_active = 0;
+	filter->n_preds = 0;
+
+	filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
+	if (!filter->preds)
+		goto oom;
+
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
 		pred = kzalloc(sizeof(*pred), GFP_KERNEL);
 		if (!pred)
 			goto oom;
 		pred->fn = filter_pred_none;
-		call->preds[i] = pred;
+		filter->preds[i] = pred;
 	}
 
 	return 0;
 
 oom:
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (call->preds[i])
-			filter_free_pred(call->preds[i]);
+		if (filter->preds[i])
+			filter_free_pred(filter->preds[i]);
 	}
-	kfree(call->preds);
-	call->preds = NULL;
+	kfree(filter->preds);
+	kfree(call->filter);
+	call->filter = NULL;
 
 	return -ENOMEM;
 }
@@ -250,15 +260,16 @@ EXPORT_SYMBOL_GPL(init_preds);
 
 static void __filter_free_subsystem_preds(struct event_subsystem *system)
 {
+	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 	int i;
 
-	if (system->n_preds) {
-		for (i = 0; i < system->n_preds; i++)
-			filter_free_pred(system->preds[i]);
-		kfree(system->preds);
-		system->preds = NULL;
-		system->n_preds = 0;
+	if (filter && filter->n_preds) {
+		for (i = 0; i < filter->n_preds; i++)
+			filter_free_pred(filter->preds[i]);
+		kfree(filter->preds);
+		kfree(filter);
+		system->filter = NULL;
 	}
 
 	list_for_each_entry(call, &ftrace_events, list) {
@@ -281,21 +292,23 @@ static int filter_add_pred_fn(struct ftrace_event_call *call,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
+	struct event_filter *filter = call->filter;
 	int idx, err;
 
-	if (call->n_preds && !pred->compound)
+	if (filter->n_preds && !pred->compound)
 		__filter_disable_preds(call);
 
-	if (call->n_preds == MAX_FILTER_PRED)
+	if (filter->n_preds == MAX_FILTER_PRED)
 		return -ENOSPC;
 
-	idx = call->n_preds;
-	filter_clear_pred(call->preds[idx]);
-	err = filter_set_pred(call->preds[idx], pred, fn);
+	idx = filter->n_preds;
+	filter_clear_pred(filter->preds[idx]);
+	err = filter_set_pred(filter->preds[idx], pred, fn);
 	if (err)
 		return err;
 
-	call->n_preds++;
+	filter->n_preds++;
+	call->filter_active = 1;
 
 	return 0;
 }
@@ -366,29 +379,41 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
 int filter_add_subsystem_pred(struct event_subsystem *system,
 			      struct filter_pred *pred)
 {
+	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 
 	mutex_lock(&filter_mutex);
 
-	if (system->n_preds && !pred->compound)
+	if (filter && filter->n_preds && !pred->compound) {
 		__filter_free_subsystem_preds(system);
+		filter = NULL;
+	}
 
-	if (!system->n_preds) {
-		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+	if (!filter) {
+		system->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+		if (!system->filter) {
+			mutex_unlock(&filter_mutex);
+			return -ENOMEM;
+		}
+		filter = system->filter;
+		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
 					GFP_KERNEL);
-		if (!system->preds) {
+
+		if (!filter->preds) {
+			kfree(system->filter);
+			system->filter = NULL;
 			mutex_unlock(&filter_mutex);
 			return -ENOMEM;
 		}
 	}
 
-	if (system->n_preds == MAX_FILTER_PRED) {
+	if (filter->n_preds == MAX_FILTER_PRED) {
 		mutex_unlock(&filter_mutex);
 		return -ENOSPC;
 	}
 
-	system->preds[system->n_preds] = pred;
-	system->n_preds++;
+	filter->preds[filter->n_preds] = pred;
+	filter->n_preds++;
 
 	list_for_each_entry(call, &ftrace_events, list) {
 		int err;
@@ -401,8 +426,8 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 
 		err = __filter_add_pred(call, pred);
 		if (err == -ENOMEM) {
-			system->preds[system->n_preds] = NULL;
-			system->n_preds--;
+			filter->preds[filter->n_preds] = NULL;
+			filter->n_preds--;
 			mutex_unlock(&filter_mutex);
 			return err;
 		}
-- 
cgit v1.2.3-70-g09d2


From a118e4d1402f1349fe3d953493e4168a300a752d Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 28 Apr 2009 03:04:53 -0500
Subject: tracing/filters: distinguish between signed and unsigned fields

The new filter comparison ops need to be able to distinguish between
signed and unsigned field types, so add an is_signed flag/param to the
event field struct/trace_define_fields().  Also define a simple macro,
is_signed_type() to determine the signedness at compile time, used in the
trace macros.  If the is_signed_type() macro won't work with a specific
type, a new slightly modified version of TRACE_FIELD() called
TRACE_FIELD_SIGN(), allows the signedness to be set explicitly.

[ Impact: extend trace-filter code for new feature ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1240905893.6416.120.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h     |  7 ++++---
 include/trace/ftrace.h           | 16 ++++++++--------
 kernel/trace/trace.h             |  1 +
 kernel/trace/trace_event_types.h |  4 ++--
 kernel/trace/trace_events.c      |  3 ++-
 kernel/trace/trace_export.c      | 29 ++++++++++++++++++++++-------
 6 files changed, 39 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 46a27f2695a..e61a7403f3d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -122,8 +122,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call,
 					struct ring_buffer_event *event);
 
 extern int trace_define_field(struct ftrace_event_call *call, char *type,
-			      char *name, int offset, int size);
+			      char *name, int offset, int size, int is_signed);
 
+#define is_signed_type(type)	(((type)(-1)) < 0)
 
 /*
  * The double __builtin_constant_p is because gcc will give us an error
@@ -144,10 +145,10 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-#define __common_field(type, item)					\
+#define __common_field(type, item, is_signed)				\
 	ret = trace_define_field(event_call, #type, "common_" #item,	\
 				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item));		\
+				 sizeof(field.ent.item), is_signed);	\
 	if (ret)							\
 		return ret;
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1e681142f1d..edb02bc9f8f 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -225,7 +225,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define __field(type, item)						\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), is_signed_type(type));	\
 	if (ret)							\
 		return ret;
 
@@ -234,7 +234,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), 0);		\
 	if (ret)							\
 		return ret;
 
@@ -242,7 +242,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define __string(item, src)						       \
 	ret = trace_define_field(event_call, "__str_loc", #item,	       \
 				offsetof(typeof(field), __str_loc_##item),     \
-				sizeof(field.__str_loc_##item));
+				 sizeof(field.__str_loc_##item), 0);
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
@@ -253,11 +253,11 @@ ftrace_define_fields_##call(void)					\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	int ret;							\
 									\
-	__common_field(int, type);					\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
+	__common_field(int, type, 1);					\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
 									\
 	tstruct;							\
 									\
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1fb7d6ccadf..866d0108fd2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -729,6 +729,7 @@ struct ftrace_event_field {
 	char			*type;
 	int			offset;
 	int			size;
+	int			is_signed;
 };
 
 struct event_filter {
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index cfcecc4fd86..5e32e375134 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -141,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
 
 TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
 	TRACE_STRUCT(
-		TRACE_FIELD(ktime_t, state_data.stamp, stamp)
-		TRACE_FIELD(ktime_t, state_data.end, end)
+		TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
+		TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
 		TRACE_FIELD(int, state_data.type, type)
 		TRACE_FIELD(int, state_data.state, state)
 	),
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1cd1f37373d..bbbea747937 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -26,7 +26,7 @@ static DEFINE_MUTEX(event_mutex);
 LIST_HEAD(ftrace_events);
 
 int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size)
+		       char *name, int offset, int size, int is_signed)
 {
 	struct ftrace_event_field *field;
 
@@ -44,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
 
 	field->offset = offset;
 	field->size = size;
+	field->is_signed = is_signed;
 	list_add(&field->link, &call->fields);
 
 	return 0;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 0cb1a142c74..d06cf898dc8 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -50,6 +50,9 @@ extern void __bad_type_size(void);
 	if (!ret)							\
 		return 0;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
 
 #undef TP_RAW_FMT
 #define TP_RAW_FMT(args...) args
@@ -98,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define TRACE_FIELD(type, item, assign)\
 	entry->item = assign;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)	\
+	TRACE_FIELD(type, item, assign)
+
 #undef TP_CMD
 #define TP_CMD(cmd...)	cmd
 
@@ -149,7 +156,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD(type, item, assign)					\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), is_signed_type(type));	\
 	if (ret)							\
 		return ret;
 
@@ -157,7 +164,15 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD_SPECIAL(type, item, len, cmd)			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item));			\
+				 sizeof(field.item), 0);		\
+	if (ret)							\
+		return ret;
+
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)			\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item), is_signed);	\
 	if (ret)							\
 		return ret;
 
@@ -173,11 +188,11 @@ ftrace_define_fields_##call(void)					\
 	struct args field;						\
 	int ret;							\
 									\
-	__common_field(unsigned char, type);				\
-	__common_field(unsigned char, flags);				\
-	__common_field(unsigned char, preempt_count);			\
-	__common_field(int, pid);					\
-	__common_field(int, tgid);					\
+	__common_field(unsigned char, type, 0);				\
+	__common_field(unsigned char, flags, 0);			\
+	__common_field(unsigned char, preempt_count, 0);		\
+	__common_field(int, pid, 1);					\
+	__common_field(int, tgid, 1);					\
 									\
 	tstruct;							\
 									\
-- 
cgit v1.2.3-70-g09d2


From 8b3725621074040d380664964ffbc40610aef8c6 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 28 Apr 2009 03:04:59 -0500
Subject: tracing/filters: a better event parser

Replace the current event parser hack with a better one.  Filters are
no longer specified predicate by predicate, but all at once and can
use parens and any of the following operators:

numeric fields:

==, !=, <, <=, >, >=

string fields:

==, !=

predicates can be combined with the logical operators:

&&, ||

examples:

"common_preempt_count > 4" > filter

"((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter

If there was an error, the erroneous string along with an error
message can be seen by looking at the filter e.g.:

((sig >= 10 && sig < 15) || dsig == 17) && comm != bash
^
parse_error: Field not found

Currently the caret for an error always appears at the beginning of
the filter; a real position should be used, but the error message
should be useful even without it.

To clear a filter, '0' can be written to the filter file.

Filters can also be set or cleared for a complete subsystem by writing
the same filter as would be written to an individual event to the
filter file at the root of the subsytem.  Note however, that if any
event in the subsystem lacks a field specified in the filter being
set, the set will fail and all filters in the subsytem are
automatically cleared.  This change from the previous version was made
because using only the fields that happen to exist for a given event
would most likely result in a meaningless filter.

Because the logical operators are now implemented as predicates, the
maximum number of predicates in a filter was increased from 8 to 16.

[ Impact: add new, extended trace-filter implementation ]

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: fweisbec@gmail.com
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <1240905899.6416.121.camel@tropicana>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |    2 +-
 kernel/trace/trace.h               |   66 ++-
 kernel/trace/trace_events.c        |   86 ++-
 kernel/trace/trace_events_filter.c | 1020 ++++++++++++++++++++++++++++--------
 4 files changed, 884 insertions(+), 290 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index e61a7403f3d..5fff40c9ff5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -112,7 +112,7 @@ struct ftrace_event_call {
 #endif
 };
 
-#define MAX_FILTER_PRED		8
+#define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	128
 
 extern int init_preds(struct ftrace_event_call *call);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 866d0108fd2..7736fe8c1b7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -735,6 +735,7 @@ struct ftrace_event_field {
 struct event_filter {
 	int			n_preds;
 	struct filter_pred	**preds;
+	char			*filter_string;
 };
 
 struct event_subsystem {
@@ -746,7 +747,8 @@ struct event_subsystem {
 
 struct filter_pred;
 
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
+				 int val1, int val2);
 
 struct filter_pred {
 	filter_pred_fn_t fn;
@@ -756,23 +758,18 @@ struct filter_pred {
 	char *field_name;
 	int offset;
 	int not;
-	int or;
-	int compound;
-	int clear;
+	int op;
+	int pop_n;
 };
 
-extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct ftrace_event_call *call,
+extern void print_event_filter(struct ftrace_event_call *call,
 			       struct trace_seq *s);
-extern int filter_parse(char **pbuf, struct filter_pred *pred);
-extern int filter_add_pred(struct ftrace_event_call *call,
-			   struct filter_pred *pred);
-extern void filter_disable_preds(struct ftrace_event_call *call);
-extern void filter_free_subsystem_preds(struct event_subsystem *system);
-extern void filter_print_subsystem_preds(struct event_subsystem *system,
+extern int apply_event_filter(struct ftrace_event_call *call,
+			      char *filter_string);
+extern int apply_subsystem_event_filter(struct event_subsystem *system,
+					char *filter_string);
+extern void print_subsystem_event_filter(struct event_subsystem *system,
 					 struct trace_seq *s);
-extern int filter_add_subsystem_pred(struct event_subsystem *system,
-				     struct filter_pred *pred);
 
 static inline int
 filter_check_discard(struct ftrace_event_call *call, void *rec,
@@ -787,6 +784,47 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
 	return 0;
 }
 
+#define DEFINE_COMPARISON_PRED(type)					\
+static int filter_pred_##type(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	type *addr = (type *)(event + pred->offset);			\
+	type val = (type)pred->val;					\
+	int match = 0;							\
+									\
+	switch (pred->op) {						\
+	case OP_LT:							\
+		match = (*addr < val);					\
+		break;							\
+	case OP_LE:							\
+		match = (*addr <= val);					\
+		break;							\
+	case OP_GT:							\
+		match = (*addr > val);					\
+		break;							\
+	case OP_GE:							\
+		match = (*addr >= val);					\
+		break;							\
+	default:							\
+		break;							\
+	}								\
+									\
+	return match;							\
+}
+
+#define DEFINE_EQUALITY_PRED(size)					\
+static int filter_pred_##size(struct filter_pred *pred, void *event,	\
+			      int val1, int val2)			\
+{									\
+	u##size *addr = (u##size *)(event + pred->offset);		\
+	u##size val = (u##size)pred->val;				\
+	int match;							\
+									\
+	match = (val == *addr) ^ pred->not;				\
+									\
+	return match;							\
+}
+
 extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index bbbea747937..f789ca540fe 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -492,7 +492,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_preds(call, s);
+	print_event_filter(call, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -505,40 +505,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
 	struct ftrace_event_call *call = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
-	}
-
-	if (pred->clear) {
-		filter_disable_preds(call);
-		filter_free_pred(pred);
-		return cnt;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	err = filter_add_pred(call, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
+	err = apply_event_filter(call, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
-
-	filter_free_pred(pred);
 
 	*ppos += cnt;
 
@@ -562,7 +548,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
 	trace_seq_init(s);
 
-	filter_print_subsystem_preds(system, s);
+	print_subsystem_event_filter(system, s);
 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
@@ -575,38 +561,26 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		       loff_t *ppos)
 {
 	struct event_subsystem *system = filp->private_data;
-	char buf[64], *pbuf = buf;
-	struct filter_pred *pred;
+	char *buf;
 	int err;
 
-	if (cnt >= sizeof(buf))
+	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
 
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-	buf[cnt] = '\0';
-
-	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-	if (!pred)
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return -ENOMEM;
 
-	err = filter_parse(&pbuf, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
-		return err;
-	}
-
-	if (pred->clear) {
-		filter_free_subsystem_preds(system);
-		filter_free_pred(pred);
-		return cnt;
+	if (copy_from_user(buf, ubuf, cnt)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
 	}
+	buf[cnt] = '\0';
 
-	err = filter_add_subsystem_pred(system, pred);
-	if (err < 0) {
-		filter_free_pred(pred);
+	err = apply_subsystem_event_filter(system, buf);
+	free_page((unsigned long) buf);
+	if (err < 0)
 		return err;
-	}
 
 	*ppos += cnt;
 
@@ -760,11 +734,21 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
 
 	system->filter = NULL;
 
+	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+	if (!system->filter) {
+		pr_warning("Could not allocate filter for subsystem "
+			   "'%s'\n", name);
+		return system->entry;
+	}
+
 	entry = debugfs_create_file("filter", 0644, system->entry, system,
 				    &ftrace_subsystem_filter_fops);
-	if (!entry)
+	if (!entry) {
+		kfree(system->filter);
+		system->filter = NULL;
 		pr_warning("Could not create debugfs "
 			   "'%s/filter' entry\n", name);
+	}
 
 	return system->entry;
 }
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1e861eca3d0..f49486687ee 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -29,51 +29,130 @@
 
 static DEFINE_MUTEX(filter_mutex);
 
-static int filter_pred_64(struct filter_pred *pred, void *event)
+enum filter_op_ids
 {
-	u64 *addr = (u64 *)(event + pred->offset);
-	u64 val = (u64)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_32(struct filter_pred *pred, void *event)
-{
-	u32 *addr = (u32 *)(event + pred->offset);
-	u32 val = (u32)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
-}
-
-static int filter_pred_16(struct filter_pred *pred, void *event)
+	OP_OR,
+	OP_AND,
+	OP_NE,
+	OP_EQ,
+	OP_LT,
+	OP_LE,
+	OP_GT,
+	OP_GE,
+	OP_NONE,
+	OP_OPEN_PAREN,
+};
+
+struct filter_op {
+	int id;
+	char *string;
+	int precedence;
+};
+
+static struct filter_op filter_ops[] = {
+	{ OP_OR, "||", 1 },
+	{ OP_AND, "&&", 2 },
+	{ OP_NE, "!=", 4 },
+	{ OP_EQ, "==", 4 },
+	{ OP_LT, "<", 5 },
+	{ OP_LE, "<=", 5 },
+	{ OP_GT, ">", 5 },
+	{ OP_GE, ">=", 5 },
+	{ OP_NONE, "OP_NONE", 0 },
+	{ OP_OPEN_PAREN, "(", 0 },
+};
+
+enum {
+	FILT_ERR_NONE,
+	FILT_ERR_INVALID_OP,
+	FILT_ERR_UNBALANCED_PAREN,
+	FILT_ERR_TOO_MANY_OPERANDS,
+	FILT_ERR_OPERAND_TOO_LONG,
+	FILT_ERR_FIELD_NOT_FOUND,
+	FILT_ERR_ILLEGAL_FIELD_OP,
+	FILT_ERR_ILLEGAL_INTVAL,
+	FILT_ERR_BAD_SUBSYS_FILTER,
+	FILT_ERR_TOO_MANY_PREDS,
+	FILT_ERR_MISSING_FIELD,
+	FILT_ERR_INVALID_FILTER,
+};
+
+static char *err_text[] = {
+	"No error",
+	"Invalid operator",
+	"Unbalanced parens",
+	"Too many operands",
+	"Operand too long",
+	"Field not found",
+	"Illegal operation for field type",
+	"Illegal integer value",
+	"Couldn't find or set field in one of a subsystem's events",
+	"Too many terms in predicate expression",
+	"Missing field name and/or value",
+	"Meaningless filter expression",
+};
+
+struct opstack_op {
+	int op;
+	struct list_head list;
+};
+
+struct postfix_elt {
+	int op;
+	char *operand;
+	struct list_head list;
+};
+
+struct filter_parse_state {
+	struct filter_op *ops;
+	struct list_head opstack;
+	struct list_head postfix;
+	int lasterr;
+	int lasterr_pos;
+
+	struct {
+		char *string;
+		unsigned int cnt;
+		unsigned int tail;
+	} infix;
+
+	struct {
+		char string[MAX_FILTER_STR_VAL];
+		int pos;
+		unsigned int tail;
+	} operand;
+};
+
+DEFINE_COMPARISON_PRED(s64);
+DEFINE_COMPARISON_PRED(u64);
+DEFINE_COMPARISON_PRED(s32);
+DEFINE_COMPARISON_PRED(u32);
+DEFINE_COMPARISON_PRED(s16);
+DEFINE_COMPARISON_PRED(u16);
+DEFINE_COMPARISON_PRED(s8);
+DEFINE_COMPARISON_PRED(u8);
+
+DEFINE_EQUALITY_PRED(64);
+DEFINE_EQUALITY_PRED(32);
+DEFINE_EQUALITY_PRED(16);
+DEFINE_EQUALITY_PRED(8);
+
+static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
+			   void *event __attribute((unused)),
+			   int val1, int val2)
 {
-	u16 *addr = (u16 *)(event + pred->offset);
-	u16 val = (u16)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
+	return val1 && val2;
 }
 
-static int filter_pred_8(struct filter_pred *pred, void *event)
+static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
+			  void *event __attribute((unused)),
+			  int val1, int val2)
 {
-	u8 *addr = (u8 *)(event + pred->offset);
-	u8 val = (u8)pred->val;
-	int match;
-
-	match = (val == *addr) ^ pred->not;
-
-	return match;
+	return val1 || val2;
 }
 
-static int filter_pred_string(struct filter_pred *pred, void *event)
+static int filter_pred_string(struct filter_pred *pred, void *event,
+			      int val1, int val2)
 {
 	char *addr = (char *)(event + pred->offset);
 	int cmp, match;
@@ -85,7 +164,8 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
 	return match;
 }
 
-static int filter_pred_none(struct filter_pred *pred, void *event)
+static int filter_pred_none(struct filter_pred *pred, void *event,
+			    int val1, int val2)
 {
 	return 0;
 }
@@ -94,66 +174,119 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
 int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
 	struct event_filter *filter = call->filter;
-	int i, matched, and_failed = 0;
+	int match, top = 0, val1 = 0, val2 = 0;
+	int stack[MAX_FILTER_PRED];
 	struct filter_pred *pred;
+	int i;
 
 	for (i = 0; i < filter->n_preds; i++) {
 		pred = filter->preds[i];
-		if (and_failed && !pred->or)
+		if (!pred->pop_n) {
+			match = pred->fn(pred, rec, val1, val2);
+			stack[top++] = match;
 			continue;
-		matched = pred->fn(pred, rec);
-		if (!matched && !pred->or) {
-			and_failed = 1;
-			continue;
-		} else if (matched && pred->or)
-			return 1;
+		}
+		if (pred->pop_n > top) {
+			WARN_ON_ONCE(1);
+			return 0;
+		}
+		val1 = stack[--top];
+		val2 = stack[--top];
+		match = pred->fn(pred, rec, val1, val2);
+		stack[top++] = match;
 	}
 
-	if (and_failed)
-		return 0;
-
-	return 1;
+	return stack[--top];
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void __filter_print_preds(struct event_filter *filter,
-				 struct trace_seq *s)
+static void parse_error(struct filter_parse_state *ps, int err, int pos)
 {
-	struct filter_pred *pred;
-	char *field_name;
-	int i;
+	ps->lasterr = err;
+	ps->lasterr_pos = pos;
+}
 
-	if (!filter || !filter->n_preds) {
-		trace_seq_printf(s, "none\n");
+static void remove_filter_string(struct event_filter *filter)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = NULL;
+}
+
+static int replace_filter_string(struct event_filter *filter,
+				 char *filter_string)
+{
+	kfree(filter->filter_string);
+	filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+	if (!filter->filter_string)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int append_filter_string(struct event_filter *filter,
+				char *string)
+{
+	int newlen;
+	char *new_filter_string;
+
+	BUG_ON(!filter->filter_string);
+	newlen = strlen(filter->filter_string) + strlen(string) + 1;
+	new_filter_string = kmalloc(newlen, GFP_KERNEL);
+	if (!new_filter_string)
+		return -ENOMEM;
+
+	strcpy(new_filter_string, filter->filter_string);
+	strcat(new_filter_string, string);
+	kfree(filter->filter_string);
+	filter->filter_string = new_filter_string;
+
+	return 0;
+}
+
+static void append_filter_err(struct filter_parse_state *ps,
+			      struct event_filter *filter)
+{
+	int pos = ps->lasterr_pos;
+	char *buf, *pbuf;
+
+	buf = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!buf)
 		return;
-	}
 
-	for (i = 0; i < filter->n_preds; i++) {
-		pred = filter->preds[i];
-		field_name = pred->field_name;
-		if (i)
-			trace_seq_printf(s, pred->or ? "|| " : "&& ");
-		trace_seq_printf(s, "%s ", field_name);
-		trace_seq_printf(s, pred->not ? "!= " : "== ");
-		if (pred->str_len)
-			trace_seq_printf(s, "%s\n", pred->str_val);
-		else
-			trace_seq_printf(s, "%llu\n", pred->val);
-	}
+	append_filter_string(filter, "\n");
+	memset(buf, ' ', PAGE_SIZE);
+	if (pos > PAGE_SIZE - 128)
+		pos = 0;
+	buf[pos] = '^';
+	pbuf = &buf[pos] + 1;
+
+	sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
+	append_filter_string(filter, buf);
+	free_page((unsigned long) buf);
 }
 
-void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s)
+void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
+	struct event_filter *filter = call->filter;
+
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(call->filter, s);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
 	mutex_unlock(&filter_mutex);
 }
 
-void filter_print_subsystem_preds(struct event_subsystem *system,
+void print_subsystem_event_filter(struct event_subsystem *system,
 				  struct trace_seq *s)
 {
+	struct event_filter *filter = system->filter;
+
 	mutex_lock(&filter_mutex);
-	__filter_print_preds(system->filter, s);
+	if (filter->filter_string)
+		trace_seq_printf(s, "%s\n", filter->filter_string);
+	else
+		trace_seq_printf(s, "none\n");
 	mutex_unlock(&filter_mutex);
 }
 
@@ -170,7 +303,7 @@ find_event_field(struct ftrace_event_call *call, char *name)
 	return NULL;
 }
 
-void filter_free_pred(struct filter_pred *pred)
+static void filter_free_pred(struct filter_pred *pred)
 {
 	if (!pred)
 		return;
@@ -191,15 +324,17 @@ static int filter_set_pred(struct filter_pred *dest,
 			   filter_pred_fn_t fn)
 {
 	*dest = *src;
-	dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
-	if (!dest->field_name)
-		return -ENOMEM;
+	if (src->field_name) {
+		dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
+		if (!dest->field_name)
+			return -ENOMEM;
+	}
 	dest->fn = fn;
 
 	return 0;
 }
 
-static void __filter_disable_preds(struct ftrace_event_call *call)
+static void filter_disable_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter = call->filter;
 	int i;
@@ -211,13 +346,6 @@ static void __filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
-void filter_disable_preds(struct ftrace_event_call *call)
-{
-	mutex_lock(&filter_mutex);
-	__filter_disable_preds(call);
-	mutex_unlock(&filter_mutex);
-}
-
 int init_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter;
@@ -258,48 +386,43 @@ oom:
 }
 EXPORT_SYMBOL_GPL(init_preds);
 
-static void __filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
 	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 	int i;
 
-	if (filter && filter->n_preds) {
+	if (filter->n_preds) {
 		for (i = 0; i < filter->n_preds; i++)
 			filter_free_pred(filter->preds[i]);
 		kfree(filter->preds);
-		kfree(filter);
-		system->filter = NULL;
+		filter->preds = NULL;
+		filter->n_preds = 0;
 	}
 
 	list_for_each_entry(call, &ftrace_events, list) {
 		if (!call->define_fields)
 			continue;
 
-		if (!strcmp(call->system, system->name))
-			__filter_disable_preds(call);
+		if (!strcmp(call->system, system->name)) {
+			filter_disable_preds(call);
+			remove_filter_string(call->filter);
+		}
 	}
 }
 
-void filter_free_subsystem_preds(struct event_subsystem *system)
-{
-	mutex_lock(&filter_mutex);
-	__filter_free_subsystem_preds(system);
-	mutex_unlock(&filter_mutex);
-}
-
-static int filter_add_pred_fn(struct ftrace_event_call *call,
+static int filter_add_pred_fn(struct filter_parse_state *ps,
+			      struct ftrace_event_call *call,
 			      struct filter_pred *pred,
 			      filter_pred_fn_t fn)
 {
 	struct event_filter *filter = call->filter;
 	int idx, err;
 
-	if (filter->n_preds && !pred->compound)
-		__filter_disable_preds(call);
-
-	if (filter->n_preds == MAX_FILTER_PRED)
+	if (filter->n_preds == MAX_FILTER_PRED) {
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
 		return -ENOSPC;
+	}
 
 	idx = filter->n_preds;
 	filter_clear_pred(filter->preds[idx]);
@@ -321,94 +444,132 @@ static int is_string_field(const char *type)
 	return 0;
 }
 
-static int __filter_add_pred(struct ftrace_event_call *call,
-			     struct filter_pred *pred)
+static int is_legal_op(struct ftrace_event_field *field, int op)
+{
+	if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
+		return 0;
+
+	return 1;
+}
+
+static filter_pred_fn_t select_comparison_fn(int op, int field_size,
+					     int field_is_signed)
+{
+	filter_pred_fn_t fn = NULL;
+
+	switch (field_size) {
+	case 8:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_64;
+		else if (field_is_signed)
+			fn = filter_pred_s64;
+		else
+			fn = filter_pred_u64;
+		break;
+	case 4:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_32;
+		else if (field_is_signed)
+			fn = filter_pred_s32;
+		else
+			fn = filter_pred_u32;
+		break;
+	case 2:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_16;
+		else if (field_is_signed)
+			fn = filter_pred_s16;
+		else
+			fn = filter_pred_u16;
+		break;
+	case 1:
+		if (op == OP_EQ || op == OP_NE)
+			fn = filter_pred_8;
+		else if (field_is_signed)
+			fn = filter_pred_s8;
+		else
+			fn = filter_pred_u8;
+		break;
+	}
+
+	return fn;
+}
+
+static int filter_add_pred(struct filter_parse_state *ps,
+			   struct ftrace_event_call *call,
+			   struct filter_pred *pred)
 {
 	struct ftrace_event_field *field;
 	filter_pred_fn_t fn;
 	unsigned long long val;
 
+	pred->fn = filter_pred_none;
+
+	if (pred->op == OP_AND) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_and);
+	} else if (pred->op == OP_OR) {
+		pred->pop_n = 2;
+		return filter_add_pred_fn(ps, call, pred, filter_pred_or);
+	}
+
 	field = find_event_field(call, pred->field_name);
-	if (!field)
+	if (!field) {
+		parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
 		return -EINVAL;
+	}
 
-	pred->fn = filter_pred_none;
 	pred->offset = field->offset;
 
+	if (!is_legal_op(field, pred->op)) {
+		parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
+		return -EINVAL;
+	}
+
 	if (is_string_field(field->type)) {
 		fn = filter_pred_string;
 		pred->str_len = field->size;
-		return filter_add_pred_fn(call, pred, fn);
+		if (pred->op == OP_NE)
+			pred->not = 1;
+		return filter_add_pred_fn(ps, call, pred, fn);
 	} else {
-		if (strict_strtoull(pred->str_val, 0, &val))
+		if (strict_strtoull(pred->str_val, 0, &val)) {
+			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
 			return -EINVAL;
+		}
 		pred->val = val;
 	}
 
-	switch (field->size) {
-	case 8:
-		fn = filter_pred_64;
-		break;
-	case 4:
-		fn = filter_pred_32;
-		break;
-	case 2:
-		fn = filter_pred_16;
-		break;
-	case 1:
-		fn = filter_pred_8;
-		break;
-	default:
+	fn = select_comparison_fn(pred->op, field->size, field->is_signed);
+	if (!fn) {
+		parse_error(ps, FILT_ERR_INVALID_OP, 0);
 		return -EINVAL;
 	}
 
-	return filter_add_pred_fn(call, pred, fn);
-}
-
-int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
-{
-	int err;
-
-	mutex_lock(&filter_mutex);
-	err = __filter_add_pred(call, pred);
-	mutex_unlock(&filter_mutex);
+	if (pred->op == OP_NE)
+		pred->not = 1;
 
-	return err;
+	return filter_add_pred_fn(ps, call, pred, fn);
 }
 
-int filter_add_subsystem_pred(struct event_subsystem *system,
-			      struct filter_pred *pred)
+static int filter_add_subsystem_pred(struct filter_parse_state *ps,
+				     struct event_subsystem *system,
+				     struct filter_pred *pred,
+				     char *filter_string)
 {
 	struct event_filter *filter = system->filter;
 	struct ftrace_event_call *call;
 
-	mutex_lock(&filter_mutex);
-
-	if (filter && filter->n_preds && !pred->compound) {
-		__filter_free_subsystem_preds(system);
-		filter = NULL;
-	}
-
-	if (!filter) {
-		system->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-		if (!system->filter) {
-			mutex_unlock(&filter_mutex);
-			return -ENOMEM;
-		}
-		filter = system->filter;
+	if (!filter->preds) {
 		filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
 					GFP_KERNEL);
 
-		if (!filter->preds) {
-			kfree(system->filter);
-			system->filter = NULL;
-			mutex_unlock(&filter_mutex);
+		if (!filter->preds)
 			return -ENOMEM;
-		}
 	}
 
 	if (filter->n_preds == MAX_FILTER_PRED) {
-		mutex_unlock(&filter_mutex);
+		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
 		return -ENOSPC;
 	}
 
@@ -424,97 +585,508 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
 		if (strcmp(call->system, system->name))
 			continue;
 
-		err = __filter_add_pred(call, pred);
-		if (err == -ENOMEM) {
-			filter->preds[filter->n_preds] = NULL;
-			filter->n_preds--;
-			mutex_unlock(&filter_mutex);
+		err = filter_add_pred(ps, call, pred);
+		if (err) {
+			filter_free_subsystem_preds(system);
+			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
 			return err;
 		}
+		replace_filter_string(call->filter, filter_string);
 	}
 
-	mutex_unlock(&filter_mutex);
+	return 0;
+}
+
+static void parse_init(struct filter_parse_state *ps,
+		       struct filter_op *ops,
+		       char *infix_string)
+{
+	memset(ps, '\0', sizeof(*ps));
+
+	ps->infix.string = infix_string;
+	ps->infix.cnt = strlen(infix_string);
+	ps->ops = ops;
+
+	INIT_LIST_HEAD(&ps->opstack);
+	INIT_LIST_HEAD(&ps->postfix);
+}
+
+static char infix_next(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+
+	return ps->infix.string[ps->infix.tail++];
+}
+
+static char infix_peek(struct filter_parse_state *ps)
+{
+	if (ps->infix.tail == strlen(ps->infix.string))
+		return 0;
+
+	return ps->infix.string[ps->infix.tail];
+}
+
+static void infix_advance(struct filter_parse_state *ps)
+{
+	ps->infix.cnt--;
+	ps->infix.tail++;
+}
+
+static inline int is_precedence_lower(struct filter_parse_state *ps,
+				      int a, int b)
+{
+	return ps->ops[a].precedence < ps->ops[b].precedence;
+}
+
+static inline int is_op_char(struct filter_parse_state *ps, char c)
+{
+	int i;
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (ps->ops[i].string[0] == c)
+			return 1;
+	}
 
 	return 0;
 }
 
-/*
- * The filter format can be
- *   - 0, which means remove all filter preds
- *   - [||/&&] <field> ==/!= <val>
- */
-int filter_parse(char **pbuf, struct filter_pred *pred)
-{
-	char *tok, *val_str = NULL;
-	int tok_n = 0;
-
-	while ((tok = strsep(pbuf, " \n"))) {
-		if (tok_n == 0) {
-			if (!strcmp(tok, "0")) {
-				pred->clear = 1;
-				return 0;
-			} else if (!strcmp(tok, "&&")) {
-				pred->or = 0;
-				pred->compound = 1;
-			} else if (!strcmp(tok, "||")) {
-				pred->or = 1;
-				pred->compound = 1;
-			} else
-				pred->field_name = tok;
-			tok_n = 1;
-			continue;
+static int infix_get_op(struct filter_parse_state *ps, char firstc)
+{
+	char nextc = infix_peek(ps);
+	char opstr[3];
+	int i;
+
+	opstr[0] = firstc;
+	opstr[1] = nextc;
+	opstr[2] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string)) {
+			infix_advance(ps);
+			return ps->ops[i].id;
 		}
-		if (tok_n == 1) {
-			if (!pred->field_name)
-				pred->field_name = tok;
-			else if (!strcmp(tok, "!="))
-				pred->not = 1;
-			else if (!strcmp(tok, "=="))
-				pred->not = 0;
-			else {
-				pred->field_name = NULL;
+	}
+
+	opstr[1] = '\0';
+
+	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+		if (!strcmp(opstr, ps->ops[i].string))
+			return ps->ops[i].id;
+	}
+
+	return OP_NONE;
+}
+
+static inline void clear_operand_string(struct filter_parse_state *ps)
+{
+	memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
+	ps->operand.tail = 0;
+}
+
+static inline int append_operand_char(struct filter_parse_state *ps, char c)
+{
+	if (ps->operand.tail == MAX_FILTER_STR_VAL)
+		return -EINVAL;
+
+	ps->operand.string[ps->operand.tail++] = c;
+
+	return 0;
+}
+
+static int filter_opstack_push(struct filter_parse_state *ps, int op)
+{
+	struct opstack_op *opstack_op;
+
+	opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
+	if (!opstack_op)
+		return -ENOMEM;
+
+	opstack_op->op = op;
+	list_add(&opstack_op->list, &ps->opstack);
+
+	return 0;
+}
+
+static int filter_opstack_empty(struct filter_parse_state *ps)
+{
+	return list_empty(&ps->opstack);
+}
+
+static int filter_opstack_top(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+
+	return opstack_op->op;
+}
+
+static int filter_opstack_pop(struct filter_parse_state *ps)
+{
+	struct opstack_op *opstack_op;
+	int op;
+
+	if (filter_opstack_empty(ps))
+		return OP_NONE;
+
+	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+	op = opstack_op->op;
+	list_del(&opstack_op->list);
+
+	kfree(opstack_op);
+
+	return op;
+}
+
+static void filter_opstack_clear(struct filter_parse_state *ps)
+{
+	while (!filter_opstack_empty(ps))
+		filter_opstack_pop(ps);
+}
+
+static char *curr_operand(struct filter_parse_state *ps)
+{
+	return ps->operand.string;
+}
+
+static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = OP_NONE;
+	elt->operand = kstrdup(operand, GFP_KERNEL);
+	if (!elt->operand) {
+		kfree(elt);
+		return -ENOMEM;
+	}
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static int postfix_append_op(struct filter_parse_state *ps, int op)
+{
+	struct postfix_elt *elt;
+
+	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+	if (!elt)
+		return -ENOMEM;
+
+	elt->op = op;
+	elt->operand = NULL;
+
+	list_add_tail(&elt->list, &ps->postfix);
+
+	return 0;
+}
+
+static void postfix_clear(struct filter_parse_state *ps)
+{
+	struct postfix_elt *elt;
+
+	while (!list_empty(&ps->postfix)) {
+		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
+		kfree(elt->operand);
+		list_del(&elt->list);
+	}
+}
+
+static int filter_parse(struct filter_parse_state *ps)
+{
+	int op, top_op;
+	char ch;
+
+	while ((ch = infix_next(ps))) {
+		if (isspace(ch))
+			continue;
+
+		if (is_op_char(ps, ch)) {
+			op = infix_get_op(ps, ch);
+			if (op == OP_NONE) {
+				parse_error(ps, FILT_ERR_INVALID_OP, 0);
 				return -EINVAL;
 			}
-			tok_n = 2;
+
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			while (!filter_opstack_empty(ps)) {
+				top_op = filter_opstack_top(ps);
+				if (!is_precedence_lower(ps, top_op, op)) {
+					top_op = filter_opstack_pop(ps);
+					postfix_append_op(ps, top_op);
+					continue;
+				}
+				break;
+			}
+
+			filter_opstack_push(ps, op);
 			continue;
 		}
-		if (tok_n == 2) {
-			if (pred->compound) {
-				if (!strcmp(tok, "!="))
-					pred->not = 1;
-				else if (!strcmp(tok, "=="))
-					pred->not = 0;
-				else {
-					pred->field_name = NULL;
-					return -EINVAL;
-				}
-			} else {
-				val_str = tok;
-				break; /* done */
+
+		if (ch == '(') {
+			filter_opstack_push(ps, OP_OPEN_PAREN);
+			continue;
+		}
+
+		if (ch == ')') {
+			if (strlen(curr_operand(ps))) {
+				postfix_append_operand(ps, curr_operand(ps));
+				clear_operand_string(ps);
+			}
+
+			top_op = filter_opstack_pop(ps);
+			while (top_op != OP_NONE) {
+				if (top_op == OP_OPEN_PAREN)
+					break;
+				postfix_append_op(ps, top_op);
+				top_op = filter_opstack_pop(ps);
+			}
+			if (top_op == OP_NONE) {
+				parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+				return -EINVAL;
 			}
-			tok_n = 3;
 			continue;
 		}
-		if (tok_n == 3) {
-			val_str = tok;
-			break; /* done */
+		if (append_operand_char(ps, ch)) {
+			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
+			return -EINVAL;
+		}
+	}
+
+	if (strlen(curr_operand(ps)))
+		postfix_append_operand(ps, curr_operand(ps));
+
+	while (!filter_opstack_empty(ps)) {
+		top_op = filter_opstack_pop(ps);
+		if (top_op == OP_NONE)
+			break;
+		if (top_op == OP_OPEN_PAREN) {
+			parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+			return -EINVAL;
+		}
+		postfix_append_op(ps, top_op);
+	}
+
+	return 0;
+}
+
+static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->field_name = kstrdup(operand1, GFP_KERNEL);
+	if (!pred->field_name) {
+		kfree(pred);
+		return NULL;
+	}
+
+	strcpy(pred->str_val, operand2);
+	pred->str_len = strlen(operand2);
+
+	pred->op = op;
+
+	return pred;
+}
+
+static struct filter_pred *create_logical_pred(int op)
+{
+	struct filter_pred *pred;
+
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
+		return NULL;
+
+	pred->op = op;
+
+	return pred;
+}
+
+static int check_preds(struct filter_parse_state *ps)
+{
+	int n_normal_preds = 0, n_logical_preds = 0;
+	struct postfix_elt *elt;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE)
+			continue;
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			n_logical_preds++;
+			continue;
 		}
+		n_normal_preds++;
 	}
 
-	if (!val_str || !strlen(val_str)
-	    || strlen(val_str) >= MAX_FILTER_STR_VAL) {
-		pred->field_name = NULL;
+	if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
+		parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
 		return -EINVAL;
 	}
 
-	strcpy(pred->str_val, val_str);
-	pred->str_len = strlen(val_str);
+	return 0;
+}
 
-	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-	if (!pred->field_name)
-		return -ENOMEM;
+static int replace_preds(struct event_subsystem *system,
+			 struct ftrace_event_call *call,
+			 struct filter_parse_state *ps,
+			 char *filter_string)
+{
+	char *operand1 = NULL, *operand2 = NULL;
+	struct filter_pred *pred;
+	struct postfix_elt *elt;
+	int err;
+
+	err = check_preds(ps);
+	if (err)
+		return err;
+
+	list_for_each_entry(elt, &ps->postfix, list) {
+		if (elt->op == OP_NONE) {
+			if (!operand1)
+				operand1 = elt->operand;
+			else if (!operand2)
+				operand2 = elt->operand;
+			else {
+				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
+				return -EINVAL;
+			}
+			continue;
+		}
+
+		if (elt->op == OP_AND || elt->op == OP_OR) {
+			pred = create_logical_pred(elt->op);
+			if (call) {
+				err = filter_add_pred(ps, call, pred);
+				filter_free_pred(pred);
+			} else
+				err = filter_add_subsystem_pred(ps, system,
+							pred, filter_string);
+			if (err)
+				return err;
+
+			operand1 = operand2 = NULL;
+			continue;
+		}
+
+		if (!operand1 || !operand2) {
+			parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
+			return -EINVAL;
+		}
+
+		pred = create_pred(elt->op, operand1, operand2);
+		if (call) {
+			err = filter_add_pred(ps, call, pred);
+			filter_free_pred(pred);
+		} else
+			err = filter_add_subsystem_pred(ps, system, pred,
+							filter_string);
+		if (err)
+			return err;
+
+		operand1 = operand2 = NULL;
+	}
 
 	return 0;
 }
 
+int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_disable_preds(call);
+		remove_filter_string(call->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		return -ENOMEM;
+
+	filter_disable_preds(call);
+	replace_filter_string(call->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, call->filter);
+		goto out;
+	}
+
+	err = replace_preds(NULL, call, ps, filter_string);
+	if (err)
+		append_filter_err(ps, call->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
+
+int apply_subsystem_event_filter(struct event_subsystem *system,
+				 char *filter_string)
+{
+	int err;
+
+	struct filter_parse_state *ps;
+
+	mutex_lock(&filter_mutex);
+
+	if (!strcmp(strstrip(filter_string), "0")) {
+		filter_free_subsystem_preds(system);
+		remove_filter_string(system->filter);
+		mutex_unlock(&filter_mutex);
+		return 0;
+	}
+
+	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	if (!ps)
+		return -ENOMEM;
+
+	filter_free_subsystem_preds(system);
+	replace_filter_string(system->filter, filter_string);
+
+	parse_init(ps, filter_ops, filter_string);
+	err = filter_parse(ps);
+	if (err) {
+		append_filter_err(ps, system->filter);
+		goto out;
+	}
+
+	err = replace_preds(system, NULL, ps, filter_string);
+	if (err)
+		append_filter_err(ps, system->filter);
+
+out:
+	filter_opstack_clear(ps);
+	postfix_clear(ps);
+	kfree(ps);
+
+	mutex_unlock(&filter_mutex);
+
+	return err;
+}
 
-- 
cgit v1.2.3-70-g09d2


From 829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:46:59 +0200
Subject: perf_counter, x86: declare perf_max_counters only for
 CONFIG_PERF_COUNTERS

This is only needed for CONFIG_PERF_COUNTERS enabled.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1241002046-8832-3-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 98143288530..be10b3ffe32 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -512,12 +512,13 @@ struct perf_cpu_context {
 	int			recursion[4];
 };
 
+#ifdef CONFIG_PERF_COUNTERS
+
 /*
  * Set by architecture code:
  */
 extern int perf_max_counters;
 
-#ifdef CONFIG_PERF_COUNTERS
 extern const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter);
 
-- 
cgit v1.2.3-70-g09d2


From 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:03 +0200
Subject: perfcounters: rename struct hw_perf_counter_ops into struct pmu

This patch renames struct hw_perf_counter_ops into struct pmu. It
introduces a structure to describe a cpu specific pmu (performance
monitoring unit). It may contain ops and data. The new name of the
structure fits better, is shorter, and thus better to handle. Where it
was appropriate, names of function and variable have been changed too.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-7-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c | 25 +++++++-------
 arch/x86/kernel/cpu/perf_counter.c | 37 ++++++++++-----------
 include/linux/perf_counter.h       |  9 +++--
 kernel/perf_counter.c              | 68 ++++++++++++++++++--------------------
 4 files changed, 66 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bd76d0fa2c3..d9bbe5efc64 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
 	return 0;
 }
 
-static void power_perf_read(struct perf_counter *counter)
+static void power_pmu_read(struct perf_counter *counter)
 {
 	long val, delta, prev;
 
@@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable)
 	for (i = 0; i < cpuhw->n_counters; ++i) {
 		counter = cpuhw->counter[i];
 		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-			power_perf_read(counter);
+			power_pmu_read(counter);
 			write_pmc(counter->hw.idx, 0);
 			counter->hw.idx = 0;
 		}
@@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 	counter->oncpu = cpu;
 	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
 	if (is_software_counter(counter))
-		counter->hw_ops->enable(counter);
+		counter->pmu->enable(counter);
 }
 
 /*
@@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
  * re-enable the PMU in order to get hw_perf_restore to do the
  * actual work of reconfiguring the PMU.
  */
-static int power_perf_enable(struct perf_counter *counter)
+static int power_pmu_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	unsigned long flags;
@@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter)
 /*
  * Remove a counter from the PMU.
  */
-static void power_perf_disable(struct perf_counter *counter)
+static void power_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	long i;
@@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter)
 	local_irq_save(flags);
 	pmudis = hw_perf_save_disable();
 
-	power_perf_read(counter);
+	power_pmu_read(counter);
 
 	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	for (i = 0; i < cpuhw->n_counters; ++i) {
@@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter)
 	local_irq_restore(flags);
 }
 
-struct hw_perf_counter_ops power_perf_ops = {
-	.enable = power_perf_enable,
-	.disable = power_perf_disable,
-	.read = power_perf_read
+struct pmu power_pmu = {
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
 };
 
 /* Number of perf_counters counting hardware events */
@@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
 	}
 }
 
-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	unsigned long ev;
 	struct perf_counter *ctrs[MAX_HWCOUNTERS];
@@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 
 	if (err)
 		return ERR_PTR(err);
-	return &power_perf_ops;
+	return &power_pmu;
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ad663d5ad2d..95de980c74a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -515,8 +515,8 @@ __pmc_fixed_disable(struct perf_counter *counter,
 }
 
 static inline void
-__pmc_generic_disable(struct perf_counter *counter,
-			   struct hw_perf_counter *hwc, unsigned int idx)
+__x86_pmu_disable(struct perf_counter *counter,
+		  struct hw_perf_counter *hwc, unsigned int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_disable(counter, hwc, idx);
@@ -591,8 +591,8 @@ __pmc_fixed_enable(struct perf_counter *counter,
 }
 
 static void
-__pmc_generic_enable(struct perf_counter *counter,
-			  struct hw_perf_counter *hwc, int idx)
+__x86_pmu_enable(struct perf_counter *counter,
+		 struct hw_perf_counter *hwc, int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_enable(counter, hwc, idx);
@@ -626,7 +626,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 /*
  * Find a PMC slot for the freshly enabled / scheduled in counter:
  */
-static int pmc_generic_enable(struct perf_counter *counter)
+static int x86_pmu_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -667,7 +667,7 @@ try_generic:
 
 	perf_counters_lapic_init(hwc->nmi);
 
-	__pmc_generic_disable(counter, hwc, idx);
+	__x86_pmu_disable(counter, hwc, idx);
 
 	cpuc->counters[idx] = counter;
 	/*
@@ -676,7 +676,7 @@ try_generic:
 	barrier();
 
 	__hw_perf_counter_set_period(counter, hwc, idx);
-	__pmc_generic_enable(counter, hwc, idx);
+	__x86_pmu_enable(counter, hwc, idx);
 
 	return 0;
 }
@@ -731,13 +731,13 @@ void perf_counter_print_debug(void)
 	local_irq_enable();
 }
 
-static void pmc_generic_disable(struct perf_counter *counter)
+static void x86_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
 	unsigned int idx = hwc->idx;
 
-	__pmc_generic_disable(counter, hwc, idx);
+	__x86_pmu_disable(counter, hwc, idx);
 
 	clear_bit(idx, cpuc->used);
 	cpuc->counters[idx] = NULL;
@@ -767,7 +767,7 @@ static void perf_save_and_restart(struct perf_counter *counter)
 	__hw_perf_counter_set_period(counter, hwc, idx);
 
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		__pmc_generic_enable(counter, hwc, idx);
+		__x86_pmu_enable(counter, hwc, idx);
 }
 
 /*
@@ -805,7 +805,7 @@ again:
 
 		perf_save_and_restart(counter);
 		if (perf_counter_overflow(counter, nmi, regs, 0))
-			__pmc_generic_disable(counter, &counter->hw, bit);
+			__x86_pmu_disable(counter, &counter->hw, bit);
 	}
 
 	hw_perf_ack_status(ack);
@@ -1034,19 +1034,18 @@ void __init init_hw_perf_counters(void)
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
 
-static void pmc_generic_read(struct perf_counter *counter)
+static void x86_pmu_read(struct perf_counter *counter)
 {
 	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
 }
 
-static const struct hw_perf_counter_ops x86_perf_counter_ops = {
-	.enable		= pmc_generic_enable,
-	.disable	= pmc_generic_disable,
-	.read		= pmc_generic_read,
+static const struct pmu pmu = {
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.read		= x86_pmu_read,
 };
 
-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	int err;
 
@@ -1054,7 +1053,7 @@ hw_perf_counter_init(struct perf_counter *counter)
 	if (err)
 		return ERR_PTR(err);
 
-	return &x86_perf_counter_ops;
+	return &pmu;
 }
 
 /*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index be10b3ffe32..c3db52dc876 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -334,9 +334,9 @@ struct hw_perf_counter {
 struct perf_counter;
 
 /**
- * struct hw_perf_counter_ops - performance counter hw ops
+ * struct pmu - generic performance monitoring unit
  */
-struct hw_perf_counter_ops {
+struct pmu {
 	int (*enable)			(struct perf_counter *counter);
 	void (*disable)			(struct perf_counter *counter);
 	void (*read)			(struct perf_counter *counter);
@@ -381,7 +381,7 @@ struct perf_counter {
 	struct list_head		sibling_list;
 	int 				nr_siblings;
 	struct perf_counter		*group_leader;
-	const struct hw_perf_counter_ops *hw_ops;
+	const struct pmu		*pmu;
 
 	enum perf_counter_active_state	state;
 	enum perf_counter_active_state	prev_state;
@@ -519,8 +519,7 @@ struct perf_cpu_context {
  */
 extern int perf_max_counters;
 
-extern const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter);
+extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 09396098dd0..582108addef 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -52,8 +52,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
 /*
  * Architecture provided APIs - weak aliases:
  */
-extern __weak const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
 	return NULL;
 }
@@ -124,7 +123,7 @@ counter_sched_out(struct perf_counter *counter,
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_stopped = ctx->time;
-	counter->hw_ops->disable(counter);
+	counter->pmu->disable(counter);
 	counter->oncpu = -1;
 
 	if (!is_software_counter(counter))
@@ -417,7 +416,7 @@ counter_sched_in(struct perf_counter *counter,
 	 */
 	smp_wmb();
 
-	if (counter->hw_ops->enable(counter)) {
+	if (counter->pmu->enable(counter)) {
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->oncpu = -1;
 		return -EAGAIN;
@@ -1096,7 +1095,7 @@ static void __read(void *info)
 	local_irq_save(flags);
 	if (ctx->is_active)
 		update_context_time(ctx);
-	counter->hw_ops->read(counter);
+	counter->pmu->read(counter);
 	update_counter_times(counter);
 	local_irq_restore(flags);
 }
@@ -1922,7 +1921,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		leader = counter->group_leader;
 		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
 			if (sub != counter)
-				sub->hw_ops->read(sub);
+				sub->pmu->read(sub);
 
 			group_entry.event = sub->hw_event.config;
 			group_entry.counter = atomic64_read(&sub->count);
@@ -2264,7 +2263,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	struct pt_regs *regs;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
-	counter->hw_ops->read(counter);
+	counter->pmu->read(counter);
 
 	regs = get_irq_regs();
 	/*
@@ -2410,7 +2409,7 @@ static void perf_swcounter_disable(struct perf_counter *counter)
 	perf_swcounter_update(counter);
 }
 
-static const struct hw_perf_counter_ops perf_ops_generic = {
+static const struct pmu perf_ops_generic = {
 	.enable		= perf_swcounter_enable,
 	.disable	= perf_swcounter_disable,
 	.read		= perf_swcounter_read,
@@ -2460,7 +2459,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 	cpu_clock_perf_counter_update(counter);
 }
 
-static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
+static const struct pmu perf_ops_cpu_clock = {
 	.enable		= cpu_clock_perf_counter_enable,
 	.disable	= cpu_clock_perf_counter_disable,
 	.read		= cpu_clock_perf_counter_read,
@@ -2522,7 +2521,7 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 	task_clock_perf_counter_update(counter, time);
 }
 
-static const struct hw_perf_counter_ops perf_ops_task_clock = {
+static const struct pmu perf_ops_task_clock = {
 	.enable		= task_clock_perf_counter_enable,
 	.disable	= task_clock_perf_counter_disable,
 	.read		= task_clock_perf_counter_read,
@@ -2574,7 +2573,7 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
 	cpu_migrations_perf_counter_update(counter);
 }
 
-static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
+static const struct pmu perf_ops_cpu_migrations = {
 	.enable		= cpu_migrations_perf_counter_enable,
 	.disable	= cpu_migrations_perf_counter_disable,
 	.read		= cpu_migrations_perf_counter_read,
@@ -2600,8 +2599,7 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
 	ftrace_profile_disable(perf_event_id(&counter->hw_event));
 }
 
-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
 	int event_id = perf_event_id(&counter->hw_event);
 	int ret;
@@ -2616,18 +2614,16 @@ tp_perf_counter_init(struct perf_counter *counter)
 	return &perf_ops_generic;
 }
 #else
-static const struct hw_perf_counter_ops *
-tp_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
 	return NULL;
 }
 #endif
 
-static const struct hw_perf_counter_ops *
-sw_perf_counter_init(struct perf_counter *counter)
+static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
 	struct perf_counter_hw_event *hw_event = &counter->hw_event;
-	const struct hw_perf_counter_ops *hw_ops = NULL;
+	const struct pmu *pmu = NULL;
 	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
@@ -2639,7 +2635,7 @@ sw_perf_counter_init(struct perf_counter *counter)
 	 */
 	switch (perf_event_id(&counter->hw_event)) {
 	case PERF_COUNT_CPU_CLOCK:
-		hw_ops = &perf_ops_cpu_clock;
+		pmu = &perf_ops_cpu_clock;
 
 		if (hw_event->irq_period && hw_event->irq_period < 10000)
 			hw_event->irq_period = 10000;
@@ -2650,9 +2646,9 @@ sw_perf_counter_init(struct perf_counter *counter)
 		 * use the cpu_clock counter instead.
 		 */
 		if (counter->ctx->task)
-			hw_ops = &perf_ops_task_clock;
+			pmu = &perf_ops_task_clock;
 		else
-			hw_ops = &perf_ops_cpu_clock;
+			pmu = &perf_ops_cpu_clock;
 
 		if (hw_event->irq_period && hw_event->irq_period < 10000)
 			hw_event->irq_period = 10000;
@@ -2661,18 +2657,18 @@ sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_PAGE_FAULTS_MIN:
 	case PERF_COUNT_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		hw_ops = &perf_ops_generic;
+		pmu = &perf_ops_generic;
 		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
 		if (!counter->hw_event.exclude_kernel)
-			hw_ops = &perf_ops_cpu_migrations;
+			pmu = &perf_ops_cpu_migrations;
 		break;
 	}
 
-	if (hw_ops)
+	if (pmu)
 		hwc->irq_period = hw_event->irq_period;
 
-	return hw_ops;
+	return pmu;
 }
 
 /*
@@ -2685,7 +2681,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 		   struct perf_counter *group_leader,
 		   gfp_t gfpflags)
 {
-	const struct hw_perf_counter_ops *hw_ops;
+	const struct pmu *pmu;
 	struct perf_counter *counter;
 	long err;
 
@@ -2713,46 +2709,46 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->group_leader		= group_leader;
-	counter->hw_ops			= NULL;
+	counter->pmu			= NULL;
 	counter->ctx			= ctx;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
 
-	hw_ops = NULL;
+	pmu = NULL;
 
 	if (perf_event_raw(hw_event)) {
-		hw_ops = hw_perf_counter_init(counter);
+		pmu = hw_perf_counter_init(counter);
 		goto done;
 	}
 
 	switch (perf_event_type(hw_event)) {
 	case PERF_TYPE_HARDWARE:
-		hw_ops = hw_perf_counter_init(counter);
+		pmu = hw_perf_counter_init(counter);
 		break;
 
 	case PERF_TYPE_SOFTWARE:
-		hw_ops = sw_perf_counter_init(counter);
+		pmu = sw_perf_counter_init(counter);
 		break;
 
 	case PERF_TYPE_TRACEPOINT:
-		hw_ops = tp_perf_counter_init(counter);
+		pmu = tp_perf_counter_init(counter);
 		break;
 	}
 done:
 	err = 0;
-	if (!hw_ops)
+	if (!pmu)
 		err = -EINVAL;
-	else if (IS_ERR(hw_ops))
-		err = PTR_ERR(hw_ops);
+	else if (IS_ERR(pmu))
+		err = PTR_ERR(pmu);
 
 	if (err) {
 		kfree(counter);
 		return ERR_PTR(err);
 	}
 
-	counter->hw_ops = hw_ops;
+	counter->pmu = pmu;
 
 	if (counter->hw_event.mmap)
 		atomic_inc(&nr_mmap_tracking);
-- 
cgit v1.2.3-70-g09d2


From 6f00cada07bb5da7f751929d3173494dcc5446cc Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 29 Apr 2009 12:47:17 +0200
Subject: perf_counter, x86: consistent use of type int for counter index

The type of counter index is sometimes implemented as unsigned
int. This patch changes this to have a consistent usage of int.

[ Impact: cleanup ]

Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1241002046-8832-21-git-send-email-robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 8 ++++----
 include/linux/perf_counter.h       | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f7fd4a35515..d8beebeb270 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -459,7 +459,7 @@ static void hw_perf_disable(int idx, u64 config)
 
 static inline void
 __pmc_fixed_disable(struct perf_counter *counter,
-		    struct hw_perf_counter *hwc, unsigned int __idx)
+		    struct hw_perf_counter *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, mask;
@@ -474,7 +474,7 @@ __pmc_fixed_disable(struct perf_counter *counter,
 
 static inline void
 __x86_pmu_disable(struct perf_counter *counter,
-		  struct hw_perf_counter *hwc, unsigned int idx)
+		  struct hw_perf_counter *hwc, int idx)
 {
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
 		__pmc_fixed_disable(counter, hwc, idx);
@@ -523,7 +523,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 
 static inline void
 __pmc_fixed_enable(struct perf_counter *counter,
-		   struct hw_perf_counter *hwc, unsigned int __idx)
+		   struct hw_perf_counter *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, bits, mask;
@@ -691,7 +691,7 @@ static void x86_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	struct hw_perf_counter *hwc = &counter->hw;
-	unsigned int idx = hwc->idx;
+	int idx = hwc->idx;
 
 	/*
 	 * Must be done before we disable, otherwise the nmi handler
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c3db52dc876..41aed427005 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -318,7 +318,7 @@ struct hw_perf_counter {
 			unsigned long			config_base;
 			unsigned long			counter_base;
 			int				nmi;
-			unsigned int			idx;
+			int				idx;
 		};
 		union { /* software */
 			atomic64_t			count;
-- 
cgit v1.2.3-70-g09d2


From b1fca26631f76a5e8b18435a43f5d82b8734da4b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 23 Mar 2009 18:22:09 +0100
Subject: mutex: add atomic_dec_and_mutex_lock()

Much like the atomic_dec_and_lock() function in which we take an hold a
spin_lock if we drop the atomic to 0 this function takes and holds the
mutex if we dec the atomic to 0.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Orig-LKML-Reference: <20090323172417.410913479@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mutex.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 3069ec7e0ab..93054fc3635 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From c654b8a9cba6002aad1c01919e4928a79a4a6dcf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 16 Apr 2009 17:33:25 -0400
Subject: nfsd: support ext4 i_version

ext4 supports a real NFSv4 change attribute, which is bumped whenever
the ctime would be updated, including times when two updates arrive
within a jiffy of each other.  (Note that although ext4 has space for
nanosecond-precision ctime, the real resolution is lower: it actually
uses jiffies as the time-source.)  This ensures clients will invalidate
their caches when they need to.

There is some fear that keeping the i_version up-to-date could have
performance drawbacks, so for now it's turned on only by a mount option.
We hope to do something better eventually.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Theodore Tso <tytso@mit.edu>
---
 fs/nfsd/nfs3xdr.c          |  1 +
 fs/nfsd/nfs4xdr.c          | 63 ++++++++++++++++++++++++++++++----------------
 include/linux/nfsd/nfsfh.h |  7 ++++++
 include/linux/nfsd/xdr4.h  | 17 ++++++++++---
 4 files changed, 63 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 17d0dd99720..01d4ec1c88e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp)
 
 	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
 			&fhp->fh_post_attr);
+	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
 	if (err)
 		fhp->fh_post_saved = 0;
 	else
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4a71fcd3f03..12d36a7361c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1490,13 +1490,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	memcpy(p, ptr, nbytes);					\
 	p += XDR_QUADLEN(nbytes);				\
 }} while (0)
-#define WRITECINFO(c)		do {				\
-	*p++ = htonl(c.atomic);					\
-	*p++ = htonl(c.before_ctime_sec);				\
-	*p++ = htonl(c.before_ctime_nsec);				\
-	*p++ = htonl(c.after_ctime_sec);				\
-	*p++ = htonl(c.after_ctime_nsec);				\
-} while (0)
+
+static void write32(__be32 **p, u32 n)
+{
+	*(*p)++ = n;
+}
+
+static void write64(__be32 **p, u64 n)
+{
+	write32(p, (u32)(n >> 32));
+	write32(p, (u32)n);
+}
+
+static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+{
+	if (IS_I_VERSION(inode)) {
+		write64(p, inode->i_version);
+	} else {
+		write32(p, stat->ctime.tv_sec);
+		write32(p, stat->ctime.tv_nsec);
+	}
+}
+
+static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+{
+	write32(p, c->atomic);
+	if (c->change_supported) {
+		write64(p, c->before_change);
+		write64(p, c->after_change);
+	} else {
+		write32(p, c->before_ctime_sec);
+		write32(p, c->before_ctime_nsec);
+		write32(p, c->after_ctime_sec);
+		write32(p, c->after_ctime_nsec);
+	}
+}
 
 #define RESERVE_SPACE(nbytes)	do {				\
 	p = resp->p;						\
@@ -1849,16 +1877,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
-		/*
-		 * Note: This _must_ be consistent with the scheme for writing
-		 * change_info, so any changes made here must be reflected there
-		 * as well.  (See xdr4.h:set_change_info() and the WRITECINFO()
-		 * macro above.)
-		 */
 		if ((buflen -= 8) < 0)
 			goto out_resource;
-		WRITE32(stat.ctime.tv_sec);
-		WRITE32(stat.ctime.tv_nsec);
+		write_change(&p, &stat, dentry->d_inode);
 	}
 	if (bmval0 & FATTR4_WORD0_SIZE) {
 		if ((buflen -= 8) < 0)
@@ -2364,7 +2385,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 
 	if (!nfserr) {
 		RESERVE_SPACE(32);
-		WRITECINFO(create->cr_cinfo);
+		write_cinfo(&p, &create->cr_cinfo);
 		WRITE32(2);
 		WRITE32(create->cr_bmval[0]);
 		WRITE32(create->cr_bmval[1]);
@@ -2475,7 +2496,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
 
 	if (!nfserr) {
 		RESERVE_SPACE(20);
-		WRITECINFO(link->li_cinfo);
+		write_cinfo(&p, &link->li_cinfo);
 		ADJUST_ARGS();
 	}
 	return nfserr;
@@ -2493,7 +2514,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
 
 	nfsd4_encode_stateid(resp, &open->op_stateid);
 	RESERVE_SPACE(40);
-	WRITECINFO(open->op_cinfo);
+	write_cinfo(&p, &open->op_cinfo);
 	WRITE32(open->op_rflags);
 	WRITE32(2);
 	WRITE32(open->op_bmval[0]);
@@ -2771,7 +2792,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 
 	if (!nfserr) {
 		RESERVE_SPACE(20);
-		WRITECINFO(remove->rm_cinfo);
+		write_cinfo(&p, &remove->rm_cinfo);
 		ADJUST_ARGS();
 	}
 	return nfserr;
@@ -2784,8 +2805,8 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 
 	if (!nfserr) {
 		RESERVE_SPACE(40);
-		WRITECINFO(rename->rn_sinfo);
-		WRITECINFO(rename->rn_tinfo);
+		write_cinfo(&p, &rename->rn_sinfo);
+		write_cinfo(&p, &rename->rn_tinfo);
 		ADJUST_ARGS();
 	}
 	return nfserr;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index afa19016c4a..8f641c90845 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -151,9 +151,15 @@ typedef struct svc_fh {
 	__u64			fh_pre_size;	/* size before operation */
 	struct timespec		fh_pre_mtime;	/* mtime before oper */
 	struct timespec		fh_pre_ctime;	/* ctime before oper */
+	/*
+	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
+	 *  to find out if it is valid.
+	 */
+	u64			fh_pre_change;
 
 	/* Post-op attributes saved in fh_unlock */
 	struct kstat		fh_post_attr;	/* full attrs after operation */
+	u64			fh_post_change; /* nfsv4 change; see above */
 #endif /* CONFIG_NFSD_V3 */
 
 } svc_fh;
@@ -298,6 +304,7 @@ fill_pre_wcc(struct svc_fh *fhp)
 		fhp->fh_pre_mtime = inode->i_mtime;
 		fhp->fh_pre_ctime = inode->i_ctime;
 		fhp->fh_pre_size  = inode->i_size;
+		fhp->fh_pre_change = inode->i_version;
 		fhp->fh_pre_saved = 1;
 	}
 }
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index f80d6013fdc..d0f050f01ec 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -64,10 +64,13 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
 
 struct nfsd4_change_info {
 	u32		atomic;
+	bool		change_supported;
 	u32		before_ctime_sec;
 	u32		before_ctime_nsec;
+	u64		before_change;
 	u32		after_ctime_sec;
 	u32		after_ctime_nsec;
+	u64		after_change;
 };
 
 struct nfsd4_access {
@@ -503,10 +506,16 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 {
 	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
 	cinfo->atomic = 1;
-	cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
-	cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
-	cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
-	cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+	if (cinfo->change_supported) {
+		cinfo->before_change = fhp->fh_pre_change;
+		cinfo->after_change = fhp->fh_post_change;
+	} else {
+		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+	}
 }
 
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-- 
cgit v1.2.3-70-g09d2


From 3cef9ab266a932899e756f7e1ea7a988a97bf3b2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 23 Feb 2009 21:42:10 -0800
Subject: nfsd4: lookup up callback cred only once

Lookup the callback cred once and then use it for all subsequent
callbacks.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 26 ++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c        |  4 ++++
 include/linux/nfsd/state.h |  1 +
 3 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 711c6282151..cc10ed35ac8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -415,6 +415,22 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
 		(int)clp->cl_name.len, clp->cl_name.data, reason);
 }
 
+static struct rpc_cred *lookup_cb_cred(struct nfs4_callback *cb)
+{
+	struct auth_cred acred = {
+		.machine_cred = 1
+	};
+
+	/*
+	 * Note in the gss case this doesn't actually have to wait for a
+	 * gss upcall (or any calls to the client); this just creates a
+	 * non-uptodate cred which the rpc state machine will fill in with
+	 * a refresh_upcall later.
+	 */
+	return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred,
+							RPCAUTH_LOOKUP_NEW);
+}
+
 static int do_probe_callback(void *data)
 {
 	struct nfs4_client *clp = data;
@@ -423,9 +439,18 @@ static int do_probe_callback(void *data)
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
+	struct rpc_cred *cred;
 	int status;
 
+	cred = lookup_cb_cred(cb);
+	if (IS_ERR(cred)) {
+		status = PTR_ERR(cred);
+		goto out;
+	}
+	cb->cb_cred = cred;
+	msg.rpc_cred = cb->cb_cred;
 	status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT);
+out:
 	if (status)
 		warn_no_callback_path(clp, status);
 	else
@@ -475,6 +500,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
 		.rpc_argp = cbr,
+		.rpc_cred = clp->cl_callback.cb_cred
 	};
 	int retries = 1;
 	int status = 0;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7e1fcc3aade..b205c7d7bc6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -643,6 +643,10 @@ shutdown_callback_client(struct nfs4_client *clp)
 		clp->cl_callback.cb_client = NULL;
 		rpc_shutdown_client(clnt);
 	}
+	if (clp->cl_callback.cb_cred) {
+		put_rpccred(clp->cl_callback.cb_cred);
+		clp->cl_callback.cb_cred = NULL;
+	}
 }
 
 static inline void
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 4d61c873fee..8d882a3eb4b 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -97,6 +97,7 @@ struct nfs4_callback {
 	/* RPC client info */
 	atomic_t		cb_set;     /* successful CB_NULL call */
 	struct rpc_clnt *       cb_client;
+	struct rpc_cred	*	cb_cred;
 };
 
 /* Maximum number of slots per session. 128 is useful for long haul TCP */
-- 
cgit v1.2.3-70-g09d2


From 3bcac0263f0b45e67a64034ebcb69eb9abb742f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 29 Apr 2009 13:45:05 +0100
Subject: SELinux: Don't flush inherited SIGKILL during execve()

Don't flush inherited SIGKILL during execve() in SELinux's post cred commit
hook.  This isn't really a security problem: if the SIGKILL came before the
credentials were changed, then we were right to receive it at the time, and
should honour it; if it came after the creds were changed, then we definitely
should honour it; and in any case, all that will happen is that the process
will be scrapped before it ever returns to userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/sched.h    |  1 +
 kernel/signal.c          | 11 ++++++++---
 security/selinux/hooks.c |  9 +++++----
 3 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1d19c025f9d..d3b787c7aef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1875,6 +1875,7 @@ extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
 extern void flush_signals(struct task_struct *);
+extern void __flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
diff --git a/kernel/signal.c b/kernel/signal.c
index 1c8814481a1..f93efec14ff 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -238,14 +238,19 @@ void flush_sigqueue(struct sigpending *queue)
 /*
  * Flush all pending signals for a task.
  */
+void __flush_signals(struct task_struct *t)
+{
+	clear_tsk_thread_flag(t, TIF_SIGPENDING);
+	flush_sigqueue(&t->pending);
+	flush_sigqueue(&t->signal->shared_pending);
+}
+
 void flush_signals(struct task_struct *t)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&t->sighand->siglock, flags);
-	clear_tsk_thread_flag(t, TIF_SIGPENDING);
-	flush_sigqueue(&t->pending);
-	flush_sigqueue(&t->signal->shared_pending);
+	__flush_signals(t);
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 }
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index dd19ba81201..5a345115036 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2394,11 +2394,12 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
 		memset(&itimer, 0, sizeof itimer);
 		for (i = 0; i < 3; i++)
 			do_setitimer(i, &itimer, NULL);
-		flush_signals(current);
 		spin_lock_irq(&current->sighand->siglock);
-		flush_signal_handlers(current, 1);
-		sigemptyset(&current->blocked);
-		recalc_sigpending();
+		if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
+			__flush_signals(current);
+			flush_signal_handlers(current, 1);
+			sigemptyset(&current->blocked);
+		}
 		spin_unlock_irq(&current->sighand->siglock);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 0821c71751ef88f4251d7206e76ce497ee267a2d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:02:59 +0000
Subject: ethtool: Add port type PORT_OTHER

Add a PORT_OTHER to represent all other physical port types.  Current
NICs generally do not allow switching between multiple port types in
software so specific types should not be needed.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 131b127b70f..5ccb6bd660c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -605,6 +605,7 @@ struct ethtool_ops {
 #define PORT_MII		0x02
 #define PORT_FIBRE		0x03
 #define PORT_BNC		0x04
+#define PORT_OTHER		0xff
 
 /* Which transceiver to use. */
 #define XCVR_INTERNAL		0x00
-- 
cgit v1.2.3-70-g09d2


From 52c94dfae11d9ffd70b7bd003a36a4e210f2866a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:04:14 +0000
Subject: mdio: Add register definitions for MDIO (clause 45)

IEEE 802.3 clause 45 specifies the MDIO interface and registers for
use in 10G and other PHYs, similar to the MII management interface.

PHYs may have up to 32 MMDs corresponding to different sub-layers and
functions, each with up to 65536 registers.  These are addressed by
PRTAD (similar to the MII PHY address) and DEVAD.  Define a mapping
for specifying PRTAD and DEVAD through the existing MII ioctls.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 include/linux/mdio.h

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
new file mode 100644
index 00000000000..d57ddb08a1b
--- /dev/null
+++ b/include/linux/mdio.h
@@ -0,0 +1,237 @@
+/*
+ * linux/mdio.h: definitions for MDIO (clause 45) transceivers
+ * Copyright 2006-2009 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef __LINUX_MDIO_H__
+#define __LINUX_MDIO_H__
+
+#include <linux/mii.h>
+
+/* MDIO Manageable Devices (MMDs). */
+#define MDIO_MMD_PMAPMD		1	/* Physical Medium Attachment/
+					 * Physical Medium Dependent */
+#define MDIO_MMD_WIS		2	/* WAN Interface Sublayer */
+#define MDIO_MMD_PCS		3	/* Physical Coding Sublayer */
+#define MDIO_MMD_PHYXS		4	/* PHY Extender Sublayer */
+#define MDIO_MMD_DTEXS		5	/* DTE Extender Sublayer */
+#define MDIO_MMD_TC		6	/* Transmission Convergence */
+#define MDIO_MMD_AN		7	/* Auto-Negotiation */
+#define MDIO_MMD_C22EXT		29	/* Clause 22 extension */
+#define MDIO_MMD_VEND1		30	/* Vendor specific 1 */
+#define MDIO_MMD_VEND2		31	/* Vendor specific 2 */
+
+/* Generic MDIO registers. */
+#define MDIO_CTRL1		MII_BMCR
+#define MDIO_STAT1		MII_BMSR
+#define MDIO_DEVID1		MII_PHYSID1
+#define MDIO_DEVID2		MII_PHYSID2
+#define MDIO_SPEED		4	/* Speed ability */
+#define MDIO_DEVS1		5	/* Devices in package */
+#define MDIO_DEVS2		6
+#define MDIO_CTRL2		7	/* 10G control 2 */
+#define MDIO_STAT2		8	/* 10G status 2 */
+#define MDIO_PMA_TXDIS		9	/* 10G PMA/PMD transmit disable */
+#define MDIO_PMA_RXDET		10	/* 10G PMA/PMD receive signal detect */
+#define MDIO_PMA_EXTABLE	11	/* 10G PMA/PMD extended ability */
+#define MDIO_PKGID1		14	/* Package identifier */
+#define MDIO_PKGID2		15
+#define MDIO_AN_ADVERTISE	16	/* AN advertising (base page) */
+#define MDIO_AN_LPA		19	/* AN LP abilities (base page) */
+#define MDIO_PHYXS_LNSTAT	24	/* PHY XGXS lane state */
+
+/* Media-dependent registers. */
+#define MDIO_PMA_10GBT_TXPWR	131	/* 10GBASE-T TX power control */
+#define MDIO_PCS_10GBX_STAT1	24	/* 10GBASE-X PCS status 1 */
+#define MDIO_PCS_10GBRT_STAT1	32	/* 10GBASE-R/-T PCS status 1 */
+#define MDIO_PCS_10GBRT_STAT2	33	/* 10GBASE-R/-T PCS status 2 */
+#define MDIO_AN_10GBT_CTRL	32	/* 10GBASE-T auto-negotiation control */
+#define MDIO_AN_10GBT_STAT	33	/* 10GBASE-T auto-negotiation status */
+
+/* Control register 1. */
+/* Enable extended speed selection */
+#define MDIO_CTRL1_SPEEDSELEXT		(BMCR_SPEED1000 | BMCR_SPEED100)
+/* All speed selection bits */
+#define MDIO_CTRL1_SPEEDSEL		(MDIO_CTRL1_SPEEDSELEXT | 0x003c)
+#define MDIO_CTRL1_FULLDPLX		BMCR_FULLDPLX
+#define MDIO_CTRL1_LPOWER		BMCR_PDOWN
+#define MDIO_CTRL1_RESET		BMCR_RESET
+#define MDIO_PMA_CTRL1_LOOPBACK		0x0001
+#define MDIO_PMA_CTRL1_SPEED1000	BMCR_SPEED1000
+#define MDIO_PMA_CTRL1_SPEED100		BMCR_SPEED100
+#define MDIO_PCS_CTRL1_LOOPBACK		BMCR_LOOPBACK
+#define MDIO_PHYXS_CTRL1_LOOPBACK	BMCR_LOOPBACK
+#define MDIO_AN_CTRL1_RESTART		BMCR_ANRESTART
+#define MDIO_AN_CTRL1_ENABLE		BMCR_ANENABLE
+#define MDIO_AN_CTRL1_XNP		0x2000	/* Enable extended next page */
+
+/* 10 Gb/s */
+#define MDIO_CTRL1_SPEED10G		(MDIO_CTRL1_SPEEDSELEXT | 0x00)
+/* 10PASS-TS/2BASE-TL */
+#define MDIO_CTRL1_SPEED10P2B		(MDIO_CTRL1_SPEEDSELEXT | 0x04)
+
+/* Status register 1. */
+#define MDIO_STAT1_LPOWERABLE		0x0002	/* Low-power ability */
+#define MDIO_STAT1_LSTATUS		BMSR_LSTATUS
+#define MDIO_STAT1_FAULT		0x0080	/* Fault */
+#define MDIO_AN_STAT1_LPABLE		0x0001	/* Link partner AN ability */
+#define MDIO_AN_STAT1_ABLE		BMSR_ANEGCAPABLE
+#define MDIO_AN_STAT1_RFAULT		BMSR_RFAULT
+#define MDIO_AN_STAT1_COMPLETE		BMSR_ANEGCOMPLETE
+#define MDIO_AN_STAT1_PAGE		0x0040	/* Page received */
+#define MDIO_AN_STAT1_XNP		0x0080	/* Extended next page status */
+
+/* Speed register. */
+#define MDIO_SPEED_10G			0x0001	/* 10G capable */
+#define MDIO_PMA_SPEED_2B		0x0002	/* 2BASE-TL capable */
+#define MDIO_PMA_SPEED_10P		0x0004	/* 10PASS-TS capable */
+#define MDIO_PMA_SPEED_1000		0x0010	/* 1000M capable */
+#define MDIO_PMA_SPEED_100		0x0020	/* 100M capable */
+#define MDIO_PMA_SPEED_10		0x0040	/* 10M capable */
+#define MDIO_PCS_SPEED_10P2B		0x0002	/* 10PASS-TS/2BASE-TL capable */
+
+/* Device present registers. */
+#define MDIO_DEVS_PRESENT(devad)	(1 << (devad))
+#define MDIO_DEVS_PMAPMD		MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD)
+#define MDIO_DEVS_WIS			MDIO_DEVS_PRESENT(MDIO_MMD_WIS)
+#define MDIO_DEVS_PCS			MDIO_DEVS_PRESENT(MDIO_MMD_PCS)
+#define MDIO_DEVS_PHYXS			MDIO_DEVS_PRESENT(MDIO_MMD_PHYXS)
+#define MDIO_DEVS_DTEXS			MDIO_DEVS_PRESENT(MDIO_MMD_DTEXS)
+#define MDIO_DEVS_TC			MDIO_DEVS_PRESENT(MDIO_MMD_TC)
+#define MDIO_DEVS_AN			MDIO_DEVS_PRESENT(MDIO_MMD_AN)
+#define MDIO_DEVS_C22EXT		MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT)
+
+/* Control register 2. */
+#define MDIO_PMA_CTRL2_TYPE		0x000f	/* PMA/PMD type selection */
+#define MDIO_PMA_CTRL2_10GBCX4		0x0000	/* 10GBASE-CX4 type */
+#define MDIO_PMA_CTRL2_10GBEW		0x0001	/* 10GBASE-EW type */
+#define MDIO_PMA_CTRL2_10GBLW		0x0002	/* 10GBASE-LW type */
+#define MDIO_PMA_CTRL2_10GBSW		0x0003	/* 10GBASE-SW type */
+#define MDIO_PMA_CTRL2_10GBLX4		0x0004	/* 10GBASE-LX4 type */
+#define MDIO_PMA_CTRL2_10GBER		0x0005	/* 10GBASE-ER type */
+#define MDIO_PMA_CTRL2_10GBLR		0x0006	/* 10GBASE-LR type */
+#define MDIO_PMA_CTRL2_10GBSR		0x0007	/* 10GBASE-SR type */
+#define MDIO_PMA_CTRL2_10GBLRM		0x0008	/* 10GBASE-LRM type */
+#define MDIO_PMA_CTRL2_10GBT		0x0009	/* 10GBASE-T type */
+#define MDIO_PMA_CTRL2_10GBKX4		0x000a	/* 10GBASE-KX4 type */
+#define MDIO_PMA_CTRL2_10GBKR		0x000b	/* 10GBASE-KR type */
+#define MDIO_PMA_CTRL2_1000BT		0x000c	/* 1000BASE-T type */
+#define MDIO_PMA_CTRL2_1000BKX		0x000d	/* 1000BASE-KX type */
+#define MDIO_PMA_CTRL2_100BTX		0x000e	/* 100BASE-TX type */
+#define MDIO_PMA_CTRL2_10BT		0x000f	/* 10BASE-T type */
+#define MDIO_PCS_CTRL2_TYPE		0x0003	/* PCS type selection */
+#define MDIO_PCS_CTRL2_10GBR		0x0000	/* 10GBASE-R type */
+#define MDIO_PCS_CTRL2_10GBX		0x0001	/* 10GBASE-X type */
+#define MDIO_PCS_CTRL2_10GBW		0x0002	/* 10GBASE-W type */
+#define MDIO_PCS_CTRL2_10GBT		0x0003	/* 10GBASE-T type */
+
+/* Status register 2. */
+#define MDIO_STAT2_RXFAULT		0x0400	/* Receive fault */
+#define MDIO_STAT2_TXFAULT		0x0800	/* Transmit fault */
+#define MDIO_STAT2_DEVPRST		0xc000	/* Device present */
+#define MDIO_STAT2_DEVPRST_VAL		0x8000	/* Device present value */
+#define MDIO_PMA_STAT2_LBABLE		0x0001	/* PMA loopback ability */
+#define MDIO_PMA_STAT2_10GBEW		0x0002	/* 10GBASE-EW ability */
+#define MDIO_PMA_STAT2_10GBLW		0x0004	/* 10GBASE-LW ability */
+#define MDIO_PMA_STAT2_10GBSW		0x0008	/* 10GBASE-SW ability */
+#define MDIO_PMA_STAT2_10GBLX4		0x0010	/* 10GBASE-LX4 ability */
+#define MDIO_PMA_STAT2_10GBER		0x0020	/* 10GBASE-ER ability */
+#define MDIO_PMA_STAT2_10GBLR		0x0040	/* 10GBASE-LR ability */
+#define MDIO_PMA_STAT2_10GBSR		0x0080	/* 10GBASE-SR ability */
+#define MDIO_PMD_STAT2_TXDISAB		0x0100	/* PMD TX disable ability */
+#define MDIO_PMA_STAT2_EXTABLE		0x0200	/* Extended abilities */
+#define MDIO_PMA_STAT2_RXFLTABLE	0x1000	/* Receive fault ability */
+#define MDIO_PMA_STAT2_TXFLTABLE	0x2000	/* Transmit fault ability */
+#define MDIO_PCS_STAT2_10GBR		0x0001	/* 10GBASE-R capable */
+#define MDIO_PCS_STAT2_10GBX		0x0002	/* 10GBASE-X capable */
+#define MDIO_PCS_STAT2_10GBW		0x0004	/* 10GBASE-W capable */
+#define MDIO_PCS_STAT2_RXFLTABLE	0x1000	/* Receive fault ability */
+#define MDIO_PCS_STAT2_TXFLTABLE	0x2000	/* Transmit fault ability */
+
+/* Transmit disable register. */
+#define MDIO_PMD_TXDIS_GLOBAL		0x0001	/* Global PMD TX disable */
+#define MDIO_PMD_TXDIS_0		0x0002	/* PMD TX disable 0 */
+#define MDIO_PMD_TXDIS_1		0x0004	/* PMD TX disable 1 */
+#define MDIO_PMD_TXDIS_2		0x0008	/* PMD TX disable 2 */
+#define MDIO_PMD_TXDIS_3		0x0010	/* PMD TX disable 3 */
+
+/* Receive signal detect register. */
+#define MDIO_PMD_RXDET_GLOBAL		0x0001	/* Global PMD RX signal detect */
+#define MDIO_PMD_RXDET_0		0x0002	/* PMD RX signal detect 0 */
+#define MDIO_PMD_RXDET_1		0x0004	/* PMD RX signal detect 1 */
+#define MDIO_PMD_RXDET_2		0x0008	/* PMD RX signal detect 2 */
+#define MDIO_PMD_RXDET_3		0x0010	/* PMD RX signal detect 3 */
+
+/* Extended abilities register. */
+#define MDIO_PMA_EXTABLE_10GCX4		0x0001	/* 10GBASE-CX4 ability */
+#define MDIO_PMA_EXTABLE_10GBLRM	0x0002	/* 10GBASE-LRM ability */
+#define MDIO_PMA_EXTABLE_10GBT		0x0004	/* 10GBASE-T ability */
+#define MDIO_PMA_EXTABLE_10GBKX4	0x0008	/* 10GBASE-KX4 ability */
+#define MDIO_PMA_EXTABLE_10GBKR		0x0010	/* 10GBASE-KR ability */
+#define MDIO_PMA_EXTABLE_1000BT		0x0020	/* 1000BASE-T ability */
+#define MDIO_PMA_EXTABLE_1000BKX	0x0040	/* 1000BASE-KX ability */
+#define MDIO_PMA_EXTABLE_100BTX		0x0080	/* 100BASE-TX ability */
+#define MDIO_PMA_EXTABLE_10BT		0x0100	/* 10BASE-T ability */
+
+/* PHY XGXS lane state register. */
+#define MDIO_PHYXS_LNSTAT_SYNC0		0x0001
+#define MDIO_PHYXS_LNSTAT_SYNC1		0x0002
+#define MDIO_PHYXS_LNSTAT_SYNC2		0x0004
+#define MDIO_PHYXS_LNSTAT_SYNC3		0x0008
+#define MDIO_PHYXS_LNSTAT_ALIGN		0x1000
+
+/* PMA 10GBASE-T TX power register. */
+#define MDIO_PMA_10GBT_TXPWR_SHORT	0x0001	/* Short-reach mode */
+
+/* PCS 10GBASE-R/-T status register 1. */
+#define MDIO_PCS_10GBRT_STAT1_BLKLK	0x0001	/* Block lock attained */
+
+/* PCS 10GBASE-R/-T status register 2. */
+#define MDIO_PCS_10GBRT_STAT2_ERR	0x00ff
+#define MDIO_PCS_10GBRT_STAT2_BER	0x3f00
+
+/* AN 10GBASE-T control register. */
+#define MDIO_AN_10GBT_CTRL_ADV10G	0x1000	/* Advertise 10GBASE-T */
+
+/* AN 10GBASE-T status register. */
+#define MDIO_AN_10GBT_STAT_LPTRR	0x0200	/* LP training reset req. */
+#define MDIO_AN_10GBT_STAT_LPLTABLE	0x0400	/* LP loop timing ability */
+#define MDIO_AN_10GBT_STAT_LP10G	0x0800	/* LP is 10GBT capable */
+#define MDIO_AN_10GBT_STAT_REMOK	0x1000	/* Remote OK */
+#define MDIO_AN_10GBT_STAT_LOCOK	0x2000	/* Local OK */
+#define MDIO_AN_10GBT_STAT_MS		0x4000	/* Master/slave config */
+#define MDIO_AN_10GBT_STAT_MSFLT	0x8000	/* Master/slave config fault */
+
+/* Mapping between MDIO PRTAD/DEVAD and mii_ioctl_data::phy_id */
+
+#define MDIO_PHY_ID_C45			0x8000
+#define MDIO_PHY_ID_PRTAD		0x03e0
+#define MDIO_PHY_ID_DEVAD		0x001f
+#define MDIO_PHY_ID_C45_MASK						\
+	(MDIO_PHY_ID_C45 | MDIO_PHY_ID_PRTAD | MDIO_PHY_ID_DEVAD)
+
+static inline __u16 mdio_phy_id_c45(int prtad, int devad)
+{
+	return MDIO_PHY_ID_C45 | (prtad << 5) | devad;
+}
+
+static inline bool mdio_phy_id_is_c45(int phy_id)
+{
+	return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK);
+}
+
+static inline __u16 mdio_phy_id_prtad(int phy_id)
+{
+	return (phy_id & MDIO_PHY_ID_PRTAD) >> 5;
+}
+
+static inline __u16 mdio_phy_id_devad(int phy_id)
+{
+	return phy_id & MDIO_PHY_ID_DEVAD;
+}
+
+#endif /* __LINUX_MDIO_H__ */
-- 
cgit v1.2.3-70-g09d2


From 1b1c2e95103ce391c2ea39a9460968fcb73deb30 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:04:46 +0000
Subject: mdio: Add generic MDIO (clause 45) support functions

These roughly mirror many of the MII library functions and are based
on code from the sfc driver.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig  |   3 +
 drivers/net/Makefile |   1 +
 drivers/net/mdio.c   | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mdio.h |  66 ++++++++++
 4 files changed, 429 insertions(+)
 create mode 100644 drivers/net/mdio.c

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index bc7eef12d95..2f81db51b45 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2443,6 +2443,9 @@ menuconfig NETDEV_10000
 
 if NETDEV_10000
 
+config MDIO
+	tristate
+
 config CHELSIO_T1
         tristate "Chelsio 10Gb Ethernet support"
         depends on PCI
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 80420f6d079..dcd5f15dd9e 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o
 #
 
 obj-$(CONFIG_MII) += mii.o
+obj-$(CONFIG_MDIO) += mdio.o
 obj-$(CONFIG_PHYLIB) += phy/
 
 obj-$(CONFIG_SUNDANCE) += sundance.o
diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
new file mode 100644
index 00000000000..0604a25748e
--- /dev/null
+++ b/drivers/net/mdio.c
@@ -0,0 +1,359 @@
+/*
+ * mdio.c: Generic support for MDIO-compatible transceivers
+ * Copyright 2006-2009 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/kernel.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+
+/**
+ * mdio45_probe - probe for an MDIO (clause 45) device
+ * @mdio: MDIO interface
+ * @prtad: Expected PHY address
+ *
+ * This sets @prtad and @mmds in the MDIO interface if successful.
+ * Returns 0 on success, negative on error.
+ */
+int mdio45_probe(struct mdio_if_info *mdio, int prtad)
+{
+	int mmd, stat2, devs1, devs2;
+
+	/* Assume PHY must have at least one of PMA/PMD, WIS, PCS, PHY
+	 * XS or DTE XS; give up if none is present. */
+	for (mmd = 1; mmd <= 5; mmd++) {
+		/* Is this MMD present? */
+		stat2 = mdio->mdio_read(mdio->dev, prtad, mmd, MDIO_STAT2);
+		if (stat2 < 0 ||
+		    (stat2 & MDIO_STAT2_DEVPRST) != MDIO_STAT2_DEVPRST_VAL)
+			continue;
+
+		/* It should tell us about all the other MMDs */
+		devs1 = mdio->mdio_read(mdio->dev, prtad, mmd, MDIO_DEVS1);
+		devs2 = mdio->mdio_read(mdio->dev, prtad, mmd, MDIO_DEVS2);
+		if (devs1 < 0 || devs2 < 0)
+			continue;
+
+		mdio->prtad = prtad;
+		mdio->mmds = devs1 | (devs2 << 16);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(mdio45_probe);
+
+/**
+ * mdio_set_flag - set or clear flag in an MDIO register
+ * @mdio: MDIO interface
+ * @prtad: PHY address
+ * @devad: MMD address
+ * @addr: Register address
+ * @mask: Mask for flag (single bit set)
+ * @sense: New value of flag
+ *
+ * This debounces changes: it does not write the register if the flag
+ * already has the proper value.  Returns 0 on success, negative on error.
+ */
+int mdio_set_flag(const struct mdio_if_info *mdio,
+		  int prtad, int devad, u16 addr, int mask,
+		  bool sense)
+{
+	int old_val = mdio->mdio_read(mdio->dev, prtad, devad, addr);
+	int new_val;
+
+	if (old_val < 0)
+		return old_val;
+	if (sense)
+		new_val = old_val | mask;
+	else
+		new_val = old_val & ~mask;
+	if (old_val == new_val)
+		return 0;
+	return mdio->mdio_write(mdio->dev, prtad, devad, addr, new_val);
+}
+EXPORT_SYMBOL(mdio_set_flag);
+
+/**
+ * mdio_link_ok - is link status up/OK
+ * @mdio: MDIO interface
+ * @mmd_mask: Mask for MMDs to check
+ *
+ * Returns 1 if the PHY reports link status up/OK, 0 otherwise.
+ * @mmd_mask is normally @mdio->mmds, but if loopback is enabled
+ * the MMDs being bypassed should be excluded from the mask.
+ */
+int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmd_mask)
+{
+	int devad, reg;
+
+	if (!mmd_mask) {
+		/* Use absence of XGMII faults in lieu of link state */
+		reg = mdio->mdio_read(mdio->dev, mdio->prtad,
+				      MDIO_MMD_PHYXS, MDIO_STAT2);
+		return reg >= 0 && !(reg & MDIO_STAT2_RXFAULT);
+	}
+
+	for (devad = 0; mmd_mask; devad++) {
+		if (mmd_mask & (1 << devad)) {
+			mmd_mask &= ~(1 << devad);
+
+			/* Read twice because link state is latched and a
+			 * read moves the current state into the register */
+			mdio->mdio_read(mdio->dev, mdio->prtad,
+					devad, MDIO_STAT1);
+			reg = mdio->mdio_read(mdio->dev, mdio->prtad,
+					      devad, MDIO_STAT1);
+			if (reg < 0 || !(reg & MDIO_STAT1_LSTATUS))
+				return false;
+		}
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(mdio45_links_ok);
+
+/**
+ * mdio45_nway_restart - restart auto-negotiation for this interface
+ * @mdio: MDIO interface
+ *
+ * Returns 0 on success, negative on error.
+ */
+int mdio45_nway_restart(const struct mdio_if_info *mdio)
+{
+	if (!(mdio->mmds & MDIO_DEVS_AN))
+		return -EOPNOTSUPP;
+
+	mdio_set_flag(mdio, mdio->prtad, MDIO_MMD_AN, MDIO_CTRL1,
+		      MDIO_AN_CTRL1_RESTART, true);
+	return 0;
+}
+EXPORT_SYMBOL(mdio45_nway_restart);
+
+static u32 mdio45_get_an(const struct mdio_if_info *mdio, u16 addr)
+{
+	u32 result = 0;
+	int reg;
+
+	reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN, addr);
+	if (reg & ADVERTISE_10HALF)
+		result |= ADVERTISED_10baseT_Half;
+	if (reg & ADVERTISE_10FULL)
+		result |= ADVERTISED_10baseT_Full;
+	if (reg & ADVERTISE_100HALF)
+		result |= ADVERTISED_100baseT_Half;
+	if (reg & ADVERTISE_100FULL)
+		result |= ADVERTISED_100baseT_Full;
+	return result;
+}
+
+/**
+ * mdio45_ethtool_gset_npage - get settings for ETHTOOL_GSET
+ * @mdio: MDIO interface
+ * @ecmd: Ethtool request structure
+ * @npage_adv: Modes currently advertised on next pages
+ * @npage_lpa: Modes advertised by link partner on next pages
+ *
+ * Since the CSRs for auto-negotiation using next pages are not fully
+ * standardised, this function does not attempt to decode them.  The
+ * caller must pass them in.
+ */
+void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
+			       struct ethtool_cmd *ecmd,
+			       u32 npage_adv, u32 npage_lpa)
+{
+	int reg;
+
+	ecmd->transceiver = XCVR_INTERNAL;
+	ecmd->phy_address = mdio->prtad;
+
+	reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
+			      MDIO_CTRL2);
+	switch (reg & MDIO_PMA_CTRL2_TYPE) {
+	case MDIO_PMA_CTRL2_10GBT:
+	case MDIO_PMA_CTRL2_1000BT:
+	case MDIO_PMA_CTRL2_100BTX:
+	case MDIO_PMA_CTRL2_10BT:
+		ecmd->port = PORT_TP;
+		ecmd->supported = SUPPORTED_TP;
+		reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
+				      MDIO_SPEED);
+		if (reg & MDIO_SPEED_10G)
+			ecmd->supported |= SUPPORTED_10000baseT_Full;
+		if (reg & MDIO_PMA_SPEED_1000)
+			ecmd->supported |= (SUPPORTED_1000baseT_Full |
+					    SUPPORTED_1000baseT_Half);
+		if (reg & MDIO_PMA_SPEED_100)
+			ecmd->supported |= (SUPPORTED_100baseT_Full |
+					    SUPPORTED_100baseT_Half);
+		if (reg & MDIO_PMA_SPEED_10)
+			ecmd->supported |= (SUPPORTED_10baseT_Full |
+					    SUPPORTED_10baseT_Half);
+		ecmd->advertising = ADVERTISED_TP;
+		break;
+
+	case MDIO_PMA_CTRL2_10GBCX4:
+	case MDIO_PMA_CTRL2_10GBKX4:
+	case MDIO_PMA_CTRL2_10GBKR:
+	case MDIO_PMA_CTRL2_1000BKX:
+		ecmd->port = PORT_OTHER;
+		ecmd->supported = 0;
+		ecmd->advertising = 0;
+		break;
+
+	/* All the other defined modes are flavours of optical */
+	default:
+		ecmd->port = PORT_FIBRE;
+		ecmd->supported = SUPPORTED_FIBRE;
+		ecmd->advertising = ADVERTISED_FIBRE;
+		break;
+	}
+
+	if (mdio->mmds & MDIO_DEVS_AN) {
+		ecmd->supported |= SUPPORTED_Autoneg;
+		reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN,
+				      MDIO_CTRL1);
+		if (reg & MDIO_AN_CTRL1_ENABLE) {
+			ecmd->autoneg = AUTONEG_ENABLE;
+			ecmd->advertising |=
+				ADVERTISED_Autoneg |
+				mdio45_get_an(mdio, MDIO_AN_ADVERTISE) |
+				npage_adv;
+		} else {
+			ecmd->autoneg = AUTONEG_DISABLE;
+		}
+	} else {
+		ecmd->autoneg = AUTONEG_DISABLE;
+	}
+
+	if (ecmd->autoneg) {
+		u32 modes = 0;
+
+		/* If AN is complete and successful, report best common
+		 * mode, otherwise report best advertised mode. */
+		if (mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN,
+				    MDIO_STAT1) &
+		    MDIO_AN_STAT1_COMPLETE)
+			modes = (ecmd->advertising &
+				 (mdio45_get_an(mdio, MDIO_AN_LPA) |
+				  npage_lpa));
+		if (modes == 0)
+			modes = ecmd->advertising;
+
+		if (modes & ADVERTISED_10000baseT_Full) {
+			ecmd->speed = SPEED_10000;
+			ecmd->duplex = DUPLEX_FULL;
+		} else if (modes & (ADVERTISED_1000baseT_Full |
+				    ADVERTISED_1000baseT_Half)) {
+			ecmd->speed = SPEED_1000;
+			ecmd->duplex = !!(modes & ADVERTISED_1000baseT_Full);
+		} else if (modes & (ADVERTISED_100baseT_Full |
+				    ADVERTISED_100baseT_Half)) {
+			ecmd->speed = SPEED_100;
+			ecmd->duplex = !!(modes & ADVERTISED_100baseT_Full);
+		} else {
+			ecmd->speed = SPEED_10;
+			ecmd->duplex = !!(modes & ADVERTISED_10baseT_Full);
+		}
+	} else {
+		/* Report forced settings */
+		reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
+				      MDIO_CTRL1);
+		ecmd->speed = (((reg & MDIO_PMA_CTRL1_SPEED1000) ? 100 : 1) *
+			       ((reg & MDIO_PMA_CTRL1_SPEED100) ? 100 : 10));
+		ecmd->duplex = (reg & MDIO_CTRL1_FULLDPLX ||
+				ecmd->speed == SPEED_10000);
+	}
+}
+EXPORT_SYMBOL(mdio45_ethtool_gset_npage);
+
+/**
+ * mdio_mii_ioctl - MII ioctl interface for MDIO (clause 22 or 45) PHYs
+ * @mdio: MDIO interface
+ * @mii_data: MII ioctl data structure
+ * @cmd: MII ioctl command
+ *
+ * Returns 0 on success, negative on error.
+ */
+int mdio_mii_ioctl(const struct mdio_if_info *mdio,
+		   struct mii_ioctl_data *mii_data, int cmd)
+{
+	int prtad, devad;
+	u16 addr = mii_data->reg_num;
+
+	/* Validate/convert cmd to one of SIOC{G,S}MIIREG */
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		if (mdio->prtad == MDIO_PRTAD_NONE)
+			return -EOPNOTSUPP;
+		mii_data->phy_id = mdio->prtad;
+		cmd = SIOCGMIIREG;
+		break;
+	case SIOCGMIIREG:
+		break;
+	case SIOCSMIIREG:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* Validate/convert phy_id */
+	if ((mdio->mode_support & MDIO_SUPPORTS_C45) &&
+	    mdio_phy_id_is_c45(mii_data->phy_id)) {
+		prtad = mdio_phy_id_prtad(mii_data->phy_id);
+		devad = mdio_phy_id_devad(mii_data->phy_id);
+	} else if ((mdio->mode_support & MDIO_SUPPORTS_C22) &&
+		   mii_data->phy_id < 0x20) {
+		prtad = mii_data->phy_id;
+		devad = MDIO_DEVAD_NONE;
+		addr &= 0x1f;
+	} else if ((mdio->mode_support & MDIO_EMULATE_C22) &&
+		   mdio->prtad != MDIO_PRTAD_NONE &&
+		   mii_data->phy_id == mdio->prtad) {
+		/* Remap commonly-used MII registers. */
+		prtad = mdio->prtad;
+		switch (addr) {
+		case MII_BMCR:
+		case MII_BMSR:
+		case MII_PHYSID1:
+		case MII_PHYSID2:
+			devad = __ffs(mdio->mmds);
+			break;
+		case MII_ADVERTISE:
+		case MII_LPA:
+			if (!(mdio->mmds & MDIO_DEVS_AN))
+				return -EINVAL;
+			devad = MDIO_MMD_AN;
+			if (addr == MII_ADVERTISE)
+				addr = MDIO_AN_ADVERTISE;
+			else
+				addr = MDIO_AN_LPA;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	if (cmd == SIOCGMIIREG) {
+		int rc = mdio->mdio_read(mdio->dev, prtad, devad, addr);
+		if (rc < 0)
+			return rc;
+		mii_data->val_out = rc;
+		return 0;
+	} else {
+		return mdio->mdio_write(mdio->dev, prtad, devad, addr,
+					mii_data->val_in);
+	}
+}
+EXPORT_SYMBOL(mdio_mii_ioctl);
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index d57ddb08a1b..ba41537eaa8 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -234,4 +234,70 @@ static inline __u16 mdio_phy_id_devad(int phy_id)
 	return phy_id & MDIO_PHY_ID_DEVAD;
 }
 
+#ifdef __KERNEL__ 
+
+/**
+ * struct mdio_if_info - Ethernet controller MDIO interface
+ * @prtad: PRTAD of the PHY (%MDIO_PRTAD_NONE if not present/unknown)
+ * @mmds: Mask of MMDs expected to be present in the PHY.  This must be
+ *	non-zero unless @prtad = %MDIO_PRTAD_NONE.
+ * @mode_support: MDIO modes supported.  If %MDIO_SUPPORTS_C22 is set then
+ *	MII register access will be passed through with @devad =
+ *	%MDIO_DEVAD_NONE.  If %MDIO_EMULATE_C22 is set then access to
+ *	commonly used clause 22 registers will be translated into
+ *	clause 45 registers.
+ * @dev: Net device structure
+ * @mdio_read: Register read function; returns value or negative error code
+ * @mdio_write: Register write function; returns 0 or negative error code
+ */
+struct mdio_if_info {
+	int prtad;
+	u32 __bitwise mmds;
+	unsigned mode_support;
+
+	struct net_device *dev;
+	int (*mdio_read)(struct net_device *dev, int prtad, int devad,
+			 u16 addr);
+	int (*mdio_write)(struct net_device *dev, int prtad, int devad,
+			  u16 addr, u16 val);
+};
+
+#define MDIO_PRTAD_NONE			(-1)
+#define MDIO_DEVAD_NONE			(-1)
+#define MDIO_SUPPORTS_C22		1
+#define MDIO_SUPPORTS_C45		2
+#define MDIO_EMULATE_C22		4
+
+struct ethtool_cmd;
+struct ethtool_pauseparam;
+extern int mdio45_probe(struct mdio_if_info *mdio, int prtad);
+extern int mdio_set_flag(const struct mdio_if_info *mdio,
+			 int prtad, int devad, u16 addr, int mask,
+			 bool sense);
+extern int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmds);
+extern int mdio45_nway_restart(const struct mdio_if_info *mdio);
+extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
+				      struct ethtool_cmd *ecmd,
+				      u32 npage_adv, u32 npage_lpa);
+
+/**
+ * mdio45_ethtool_gset - get settings for ETHTOOL_GSET
+ * @mdio: MDIO interface
+ * @ecmd: Ethtool request structure
+ *
+ * Since the CSRs for auto-negotiation using next pages are not fully
+ * standardised, this function does not attempt to decode them.  Use
+ * mdio45_ethtool_gset_npage() to specify advertisement bits from next
+ * pages.
+ */
+static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio,
+				       struct ethtool_cmd *ecmd)
+{
+	mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0);
+}
+
+extern int mdio_mii_ioctl(const struct mdio_if_info *mdio,
+			  struct mii_ioctl_data *mii_data, int cmd);
+
+#endif /* __KERNEL__ */
 #endif /* __LINUX_MDIO_H__ */
-- 
cgit v1.2.3-70-g09d2


From 44c22ee91b56d7cad3b48c439dd96aad2e910fbc Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:15:05 +0000
Subject: mii: Simplify mii_resolve_flowctrl_fdx()

This is a shorter and more comprehensible formulation of the
conditions for each flow control mode.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index ad748588faf..14ecb2e114f 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -250,18 +250,12 @@ static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv)
 {
 	u8 cap = 0;
 
-	if (lcladv & ADVERTISE_PAUSE_CAP) {
-		if (lcladv & ADVERTISE_PAUSE_ASYM) {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-			else if (rmtadv & LPA_PAUSE_ASYM)
-				cap = FLOW_CTRL_RX;
-		} else {
-			if (rmtadv & LPA_PAUSE_CAP)
-				cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
-		}
-	} else if (lcladv & ADVERTISE_PAUSE_ASYM) {
-		if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM))
+	if (lcladv & rmtadv & ADVERTISE_PAUSE_CAP) {
+		cap = FLOW_CTRL_TX | FLOW_CTRL_RX;
+	} else if (lcladv & rmtadv & ADVERTISE_PAUSE_ASYM) {
+		if (lcladv & ADVERTISE_PAUSE_CAP)
+			cap = FLOW_CTRL_RX;
+		else if (rmtadv & ADVERTISE_PAUSE_CAP)
 			cap = FLOW_CTRL_TX;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a8c30832b5b12e5d4e9d1c20cdac3cc2880e08b8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:19:03 +0000
Subject: mii: Add mii_advertise_flowctrl()

This converts flow control capabilites to an advertising mask and can
be useful in combination with mii_resolve_flowctrl_fdx().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 14ecb2e114f..359fba88027 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -239,6 +239,22 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock,
 	return 0;
 }
 
+/**
+ * mii_advertise_flowctrl - get flow control advertisement flags
+ * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
+ */
+static inline u16 mii_advertise_flowctrl(int cap)
+{
+	u16 adv = 0;
+
+	if (cap & FLOW_CTRL_RX)
+		adv = ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+	if (cap & FLOW_CTRL_TX)
+		adv ^= ADVERTISE_PAUSE_ASYM;
+
+	return adv;
+}
+
 /**
  * mii_resolve_flowctrl_fdx
  * @lcladv: value of MII ADVERTISE register
-- 
cgit v1.2.3-70-g09d2


From af2a3eac2fe6a6d8e9fdf6927284b34466a7d808 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:19:36 +0000
Subject: mdio: Add mdio45_ethtool_spauseparam_an()

This implements the ETHTOOL_SPAUSEPARAM operation for MDIO (clause 45)
PHYs with auto-negotiation MMDs.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio.c   | 30 ++++++++++++++++++++++++++++++
 include/linux/mdio.h |  3 +++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index 0604a25748e..2fb0d16b61f 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -274,6 +274,36 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 }
 EXPORT_SYMBOL(mdio45_ethtool_gset_npage);
 
+/**
+ * mdio45_ethtool_spauseparam_an - set auto-negotiated pause parameters
+ * @mdio: MDIO interface
+ * @ecmd: Ethtool request structure
+ *
+ * This function assumes that the PHY has an auto-negotiation MMD.  It
+ * will enable and disable advertising of flow control as appropriate.
+ */
+void mdio45_ethtool_spauseparam_an(const struct mdio_if_info *mdio,
+				   const struct ethtool_pauseparam *ecmd)
+{
+	int adv, old_adv;
+
+	WARN_ON(!(mdio->mmds & MDIO_DEVS_AN));
+
+	old_adv = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN,
+				  MDIO_AN_ADVERTISE);
+	adv = old_adv & ~(ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM);
+	if (ecmd->autoneg)
+		adv |= mii_advertise_flowctrl(
+			(ecmd->rx_pause ? FLOW_CTRL_RX : 0) |
+			(ecmd->tx_pause ? FLOW_CTRL_TX : 0));
+	if (adv != old_adv) {
+		mdio->mdio_write(mdio->dev, mdio->prtad, MDIO_MMD_AN,
+				 MDIO_AN_ADVERTISE, adv);
+		mdio45_nway_restart(mdio);
+	}
+}
+EXPORT_SYMBOL(mdio45_ethtool_spauseparam_an);
+
 /**
  * mdio_mii_ioctl - MII ioctl interface for MDIO (clause 22 or 45) PHYs
  * @mdio: MDIO interface
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index ba41537eaa8..5645c0f863d 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -279,6 +279,9 @@ extern int mdio45_nway_restart(const struct mdio_if_info *mdio);
 extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 				      struct ethtool_cmd *ecmd,
 				      u32 npage_adv, u32 npage_lpa);
+extern void
+mdio45_ethtool_spauseparam_an(const struct mdio_if_info *mdio,
+			      const struct ethtool_pauseparam *ecmd);
 
 /**
  * mdio45_ethtool_gset - get settings for ETHTOOL_GSET
-- 
cgit v1.2.3-70-g09d2


From 0c09c1a49cc7b819b33566a49d9901f7cfdd6889 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:21:53 +0000
Subject: ethtool/mdio: Report MDIO mode support and link partner advertising

Add mdio_support and lp_advertising fields to ethtool_cmd.  Set these
in mdio45_ethtool_gset{,_npage}().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio.c      | 19 ++++++++++++-------
 include/linux/ethtool.h |  4 +++-
 include/linux/mdio.h    |  5 +++--
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index 2fb0d16b61f..ee383c25693 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -173,6 +173,8 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 
 	ecmd->transceiver = XCVR_INTERNAL;
 	ecmd->phy_address = mdio->prtad;
+	ecmd->mdio_support =
+		mdio->mode_support & (MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22);
 
 	reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
 			      MDIO_CTRL2);
@@ -235,16 +237,19 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 
 	if (ecmd->autoneg) {
 		u32 modes = 0;
+		int an_stat = mdio->mdio_read(mdio->dev, mdio->prtad,
+					      MDIO_MMD_AN, MDIO_STAT1);
 
 		/* If AN is complete and successful, report best common
 		 * mode, otherwise report best advertised mode. */
-		if (mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_AN,
-				    MDIO_STAT1) &
-		    MDIO_AN_STAT1_COMPLETE)
-			modes = (ecmd->advertising &
-				 (mdio45_get_an(mdio, MDIO_AN_LPA) |
-				  npage_lpa));
-		if (modes == 0)
+		if (an_stat & MDIO_AN_STAT1_COMPLETE) {
+			ecmd->lp_advertising =
+				mdio45_get_an(mdio, MDIO_AN_LPA) | npage_lpa;
+			if (an_stat & MDIO_AN_STAT1_LPABLE)
+				ecmd->lp_advertising |= ADVERTISED_Autoneg;
+			modes = ecmd->advertising & ecmd->lp_advertising;
+		}
+		if ((modes & ~ADVERTISED_Autoneg) == 0)
 			modes = ecmd->advertising;
 
 		if (modes & ADVERTISED_10000baseT_Full) {
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 5ccb6bd660c..14e6bc86011 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -26,11 +26,13 @@ struct ethtool_cmd {
 	__u8	phy_address;
 	__u8	transceiver;	/* Which transceiver to use */
 	__u8	autoneg;	/* Enable or disable autonegotiation */
+	__u8	mdio_support;
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
 	__u16	speed_hi;
 	__u16	reserved2;
-	__u32	reserved[3];
+	__u32	lp_advertising;	/* Features the link partner advertises */
+	__u32	reserved[2];
 };
 
 static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 5645c0f863d..1bff2f2d0e1 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -234,6 +234,9 @@ static inline __u16 mdio_phy_id_devad(int phy_id)
 	return phy_id & MDIO_PHY_ID_DEVAD;
 }
 
+#define MDIO_SUPPORTS_C22		1
+#define MDIO_SUPPORTS_C45		2
+
 #ifdef __KERNEL__ 
 
 /**
@@ -264,8 +267,6 @@ struct mdio_if_info {
 
 #define MDIO_PRTAD_NONE			(-1)
 #define MDIO_DEVAD_NONE			(-1)
-#define MDIO_SUPPORTS_C22		1
-#define MDIO_SUPPORTS_C45		2
 #define MDIO_EMULATE_C22		4
 
 struct ethtool_cmd;
-- 
cgit v1.2.3-70-g09d2


From 894b19a6b343ce3589237167a56e6df0fe72ef0d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 29 Apr 2009 08:25:57 +0000
Subject: ethtool/mdio: Support backplane mode negotiation

Compile-tested only.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio.c      | 30 +++++++++++++++++++++++++-----
 include/linux/ethtool.h | 10 ++++++++++
 include/linux/mdio.h    |  5 +++++
 3 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index ee383c25693..66483035f68 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -202,12 +202,29 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 		break;
 
 	case MDIO_PMA_CTRL2_10GBCX4:
+		ecmd->port = PORT_OTHER;
+		ecmd->supported = 0;
+		ecmd->advertising = 0;
+		break;
+
 	case MDIO_PMA_CTRL2_10GBKX4:
 	case MDIO_PMA_CTRL2_10GBKR:
 	case MDIO_PMA_CTRL2_1000BKX:
 		ecmd->port = PORT_OTHER;
-		ecmd->supported = 0;
-		ecmd->advertising = 0;
+		ecmd->supported = SUPPORTED_Backplane;
+		reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
+				      MDIO_PMA_EXTABLE);
+		if (reg & MDIO_PMA_EXTABLE_10GBKX4)
+			ecmd->supported |= SUPPORTED_10000baseKX4_Full;
+		if (reg & MDIO_PMA_EXTABLE_10GBKR)
+			ecmd->supported |= SUPPORTED_10000baseKR_Full;
+		if (reg & MDIO_PMA_EXTABLE_1000BKX)
+			ecmd->supported |= SUPPORTED_1000baseKX_Full;
+		reg = mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
+				      MDIO_PMA_10GBR_FECABLE);
+		if (reg & MDIO_PMA_10GBR_FECABLE_ABLE)
+			ecmd->supported |= SUPPORTED_10000baseR_FEC;
+		ecmd->advertising = ADVERTISED_Backplane;
 		break;
 
 	/* All the other defined modes are flavours of optical */
@@ -252,13 +269,16 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 		if ((modes & ~ADVERTISED_Autoneg) == 0)
 			modes = ecmd->advertising;
 
-		if (modes & ADVERTISED_10000baseT_Full) {
+		if (modes & (ADVERTISED_10000baseT_Full |
+			     ADVERTISED_10000baseKX4_Full |
+			     ADVERTISED_10000baseKR_Full)) {
 			ecmd->speed = SPEED_10000;
 			ecmd->duplex = DUPLEX_FULL;
 		} else if (modes & (ADVERTISED_1000baseT_Full |
-				    ADVERTISED_1000baseT_Half)) {
+				    ADVERTISED_1000baseT_Half |
+				    ADVERTISED_1000baseKX_Full)) {
 			ecmd->speed = SPEED_1000;
-			ecmd->duplex = !!(modes & ADVERTISED_1000baseT_Full);
+			ecmd->duplex = !(modes & ADVERTISED_1000baseT_Half);
 		} else if (modes & (ADVERTISED_100baseT_Full |
 				    ADVERTISED_100baseT_Half)) {
 			ecmd->speed = SPEED_100;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 14e6bc86011..380b04272bf 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -565,6 +565,11 @@ struct ethtool_ops {
 #define SUPPORTED_Pause			(1 << 13)
 #define SUPPORTED_Asym_Pause		(1 << 14)
 #define SUPPORTED_2500baseX_Full	(1 << 15)
+#define SUPPORTED_Backplane		(1 << 16)
+#define SUPPORTED_1000baseKX_Full	(1 << 17)
+#define SUPPORTED_10000baseKX4_Full	(1 << 18)
+#define SUPPORTED_10000baseKR_Full	(1 << 19)
+#define SUPPORTED_10000baseR_FEC	(1 << 20)
 
 /* Indicates what features are advertised by the interface. */
 #define ADVERTISED_10baseT_Half		(1 << 0)
@@ -583,6 +588,11 @@ struct ethtool_ops {
 #define ADVERTISED_Pause		(1 << 13)
 #define ADVERTISED_Asym_Pause		(1 << 14)
 #define ADVERTISED_2500baseX_Full	(1 << 15)
+#define ADVERTISED_Backplane		(1 << 16)
+#define ADVERTISED_1000baseKX_Full	(1 << 17)
+#define ADVERTISED_10000baseKX4_Full	(1 << 18)
+#define ADVERTISED_10000baseKR_Full	(1 << 19)
+#define ADVERTISED_10000baseR_FEC	(1 << 20)
 
 /* The following are all involved in forcing a particular link
  * mode for the device for setting things.  When getting the
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 1bff2f2d0e1..26b4eb3bbee 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -46,6 +46,7 @@
 
 /* Media-dependent registers. */
 #define MDIO_PMA_10GBT_TXPWR	131	/* 10GBASE-T TX power control */
+#define MDIO_PMA_10GBR_FECABLE	170	/* 10GBASE-R FEC ability */
 #define MDIO_PCS_10GBX_STAT1	24	/* 10GBASE-X PCS status 1 */
 #define MDIO_PCS_10GBRT_STAT1	32	/* 10GBASE-R/-T PCS status 1 */
 #define MDIO_PCS_10GBRT_STAT2	33	/* 10GBASE-R/-T PCS status 2 */
@@ -187,6 +188,10 @@
 /* PMA 10GBASE-T TX power register. */
 #define MDIO_PMA_10GBT_TXPWR_SHORT	0x0001	/* Short-reach mode */
 
+/* PMA 10GBASE-R FEC ability register. */
+#define MDIO_PMA_10GBR_FECABLE_ABLE	0x0001	/* FEC ability */
+#define MDIO_PMA_10GBR_FECABLE_ERRABLE	0x0002	/* FEC error indic. ability */
+
 /* PCS 10GBASE-R/-T status register 1. */
 #define MDIO_PCS_10GBRT_STAT1_BLKLK	0x0001	/* Block lock attained */
 
-- 
cgit v1.2.3-70-g09d2


From a511e3f968c462a55ef58697257f5347c73d306e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 29 Apr 2009 15:59:58 -0700
Subject: mutex: add atomic_dec_and_mutex_lock(), fix

include/linux/mutex.h:136: warning: 'mutex_lock' declared inline after being called
 include/linux/mutex.h:136: warning: previous declaration of 'mutex_lock' was here

uninline it.

[ Impact: clean up and uninline, address compiler warning ]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric Paris <eparis@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <200904292318.n3TNIsi6028340@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mutex.h | 24 +-----------------------
 kernel/mutex.c        | 25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 93054fc3635..878cab4f5fc 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -150,28 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
-
-/**
- * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
- * @cnt: the atomic which we are to dec
- * @lock: the mutex to return holding if we dec to 0
- *
- * return true and hold lock if we dec to 0, return false otherwise
- */
-static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
-{
-	/* dec if we can't possibly hit 0 */
-	if (atomic_add_unless(cnt, -1, 1))
-		return 0;
-	/* we might hit 0, so take the lock */
-	mutex_lock(lock);
-	if (!atomic_dec_and_test(cnt)) {
-		/* when we actually did the dec, we didn't hit 0 */
-		mutex_unlock(lock);
-		return 0;
-	}
-	/* we hit 0, and we hold the lock */
-	return 1;
-}
+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
 #endif
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 507cf2b5e9f..e2d25e9e62a 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -471,5 +471,28 @@ int __sched mutex_trylock(struct mutex *lock)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(mutex_trylock);
+
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+	/* dec if we can't possibly hit 0 */
+	if (atomic_add_unless(cnt, -1, 1))
+		return 0;
+	/* we might hit 0, so take the lock */
+	mutex_lock(lock);
+	if (!atomic_dec_and_test(cnt)) {
+		/* when we actually did the dec, we didn't hit 0 */
+		mutex_unlock(lock);
+		return 0;
+	}
+	/* we hit 0, and we hold the lock */
+	return 1;
+}
+EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
-- 
cgit v1.2.3-70-g09d2


From ba9c22f2c01cf5c88beed5a6b9e07d42e10bd358 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Mon, 20 Apr 2009 22:22:22 -0700
Subject: futex: remove FUTEX_REQUEUE_PI (non CMP)

The new requeue PI futex op codes were modeled after the existing
FUTEX_REQUEUE and FUTEX_CMP_REQUEUE calls.  I was unaware at the time
that FUTEX_REQUEUE was only around for compatibility reasons and
shouldn't be used in new code.  Ulrich Drepper elaborates on this in his
Futexes are Tricky paper: http://people.redhat.com/drepper/futex.pdf.
The deprecated call doesn't catch changes to the futex corresponding to
the destination futex which can lead to deadlock.

Therefor, I feel it best to remove FUTEX_REQUEUE_PI and leave only
FUTEX_CMP_REQUEUE_PI as there are not yet any existing users of the API.
This patch does change the OP code value of FUTEX_CMP_REQUEUE_PI to 12
from 13.  Since my test case is the only known user of this API, I felt
this was the right thing to do, rather than leave a hole in the
enumeration.

I chose to continue using the _CMP_ modifier in the OP code to make it
explicit to the user that the test is being done.

Builds, boots, and ran several hundred iterations requeue_pi.c.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
LKML-Reference: <49ED580E.1050502@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/futex.h | 4 +---
 kernel/futex.c        | 6 +-----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index b05519ca9e5..34956c8fdeb 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -24,8 +24,7 @@ union ktime;
 #define FUTEX_WAIT_BITSET	9
 #define FUTEX_WAKE_BITSET	10
 #define FUTEX_WAIT_REQUEUE_PI	11
-#define FUTEX_REQUEUE_PI	12
-#define FUTEX_CMP_REQUEUE_PI	13
+#define FUTEX_CMP_REQUEUE_PI	12
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -43,7 +42,6 @@ union ktime;
 #define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
-#define FUTEX_REQUEUE_PI_PRIVATE	(FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 6d2daa46f9f..aec8bf89bf4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2555,9 +2555,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
 					    clockrt, uaddr2);
 		break;
-	case FUTEX_REQUEUE_PI:
-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1);
-		break;
 	case FUTEX_CMP_REQUEUE_PI:
 		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
 				    1);
@@ -2596,8 +2593,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
 	 */
 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-	    cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI ||
-	    cmd == FUTEX_WAKE_OP)
+	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
 		val2 = (u32) (unsigned long) utime;
 
 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
-- 
cgit v1.2.3-70-g09d2


From 62ab4505e3efaf67784f84059e0fb9cedb1728ea Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 4 Apr 2009 21:01:06 +0000
Subject: signals: implement sys_rt_tgsigqueueinfo

sys_kill has the per thread counterpart sys_tgkill. sigqueueinfo is
missing a thread directed counterpart. Such an interface is important
for migrating applications from other OSes which have the per thread
delivery implemented.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Acked-by: Ulrich Drepper <drepper@redhat.com>
---
 include/linux/compat.h |  2 ++
 include/linux/signal.h |  2 ++
 kernel/compat.c        | 11 +++++++++++
 kernel/signal.c        | 26 ++++++++++++++++++++++++++
 4 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index f2ded21f9a3..af931ee43dd 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
+long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+				  struct compat_siginfo __user *uinfo);
 
 static inline int compat_timeval_compare(struct compat_timeval *lhs,
 					struct compat_timeval *rhs)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 84f997f8aa5..c7552836bd9 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig)
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
+				 siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern int show_unhandled_signals;
diff --git a/kernel/compat.c b/kernel/compat.c
index 42d56544460..f6c204f07ea 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 
 }
 
+asmlinkage long
+compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+			     struct compat_siginfo __user *uinfo)
+{
+	siginfo_t info;
+
+	if (copy_siginfo_from_user32(&info, uinfo))
+		return -EFAULT;
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 56d27acad87..f79b3b9f837 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2369,6 +2369,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
 	return kill_proc_info(sig, &info, pid);
 }
 
+long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+{
+	/* This is only valid for single tasks */
+	if (pid <= 0 || tgid <= 0)
+		return -EINVAL;
+
+	/* Not even root can pretend to send signals from the kernel.
+	   Nor can they impersonate a kill(), which adds source info.  */
+	if (info->si_code >= 0)
+		return -EPERM;
+	info->si_signo = sig;
+
+	return do_send_specific(tgid, pid, sig, info);
+}
+
+SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
+		siginfo_t __user *, uinfo)
+{
+	siginfo_t info;
+
+	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
+		return -EFAULT;
+
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *t = current;
-- 
cgit v1.2.3-70-g09d2


From c33a0bc4e41ef169d6e807d8abb9502544b518e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 1 May 2009 12:23:16 +0200
Subject: perf_counter: fix race in perf_output_*

When two (or more) contexts output to the same buffer, it is possible
to observe half written output.

Suppose we have CPU0 doing perf_counter_mmap(), CPU1 doing
perf_counter_overflow(). If CPU1 does a wakeup and exposes head to
user-space, then CPU2 can observe the data CPU0 is still writing.

[ Impact: fix occasionally corrupted profiling records ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090501102533.007821627@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |   5 +-
 kernel/perf_counter.c        | 130 +++++++++++++++++++++++++++++++++----------
 2 files changed, 105 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 41aed427005..f776851f8c4 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -358,10 +358,13 @@ struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;	/* nr of data pages  */
 
-	atomic_t			wakeup;		/* POLL_ for wakeups */
+	atomic_t			poll;		/* POLL_ for wakeups */
 	atomic_t			head;		/* write position    */
 	atomic_t			events;		/* event limit       */
 
+	atomic_t			wakeup_head;	/* completed head    */
+	atomic_t			lock;		/* concurrent writes */
+
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 75f2b6c8239..8660ae57953 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1279,14 +1279,12 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
 	struct perf_counter *counter = file->private_data;
 	struct perf_mmap_data *data;
-	unsigned int events;
+	unsigned int events = POLL_HUP;
 
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
 	if (data)
-		events = atomic_xchg(&data->wakeup, 0);
-	else
-		events = POLL_HUP;
+		events = atomic_xchg(&data->poll, 0);
 	rcu_read_unlock();
 
 	poll_wait(file, &counter->waitq, wait);
@@ -1568,22 +1566,6 @@ static const struct file_operations perf_fops = {
 
 void perf_counter_wakeup(struct perf_counter *counter)
 {
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data) {
-		atomic_set(&data->wakeup, POLL_IN);
-		/*
-		 * Ensure all data writes are issued before updating the
-		 * user-space data head information. The matching rmb()
-		 * will be in userspace after reading this value.
-		 */
-		smp_wmb();
-		data->user_page->data_head = atomic_read(&data->head);
-	}
-	rcu_read_unlock();
-
 	wake_up_all(&counter->waitq);
 
 	if (counter->pending_kill) {
@@ -1721,10 +1703,14 @@ struct perf_output_handle {
 	int			wakeup;
 	int			nmi;
 	int			overflow;
+	int			locked;
+	unsigned long		flags;
 };
 
-static inline void __perf_output_wakeup(struct perf_output_handle *handle)
+static void perf_output_wakeup(struct perf_output_handle *handle)
 {
+	atomic_set(&handle->data->poll, POLL_IN);
+
 	if (handle->nmi) {
 		handle->counter->pending_wakeup = 1;
 		perf_pending_queue(&handle->counter->pending,
@@ -1733,6 +1719,86 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle)
 		perf_counter_wakeup(handle->counter);
 }
 
+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event doesn't publish a head when a former
+ * event isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int cpu;
+
+	handle->locked = 0;
+
+	local_irq_save(handle->flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi() && atomic_read(&data->lock) == cpu)
+		return;
+
+	while (atomic_cmpxchg(&data->lock, 0, cpu) != 0)
+		cpu_relax();
+
+	handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int head, cpu;
+
+	if (handle->wakeup)
+		data->wakeup_head = data->head;
+
+	if (!handle->locked)
+		goto out;
+
+again:
+	/*
+	 * The xchg implies a full barrier that ensures all writes are done
+	 * before we publish the new head, matched by a rmb() in userspace when
+	 * reading this position.
+	 */
+	while ((head = atomic_xchg(&data->wakeup_head, 0))) {
+		data->user_page->data_head = head;
+		handle->wakeup = 1;
+	}
+
+	/*
+	 * NMI can happen here, which means we can miss a wakeup_head update.
+	 */
+
+	cpu = atomic_xchg(&data->lock, 0);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
+	/*
+	 * Therefore we have to validate we did not indeed do so.
+	 */
+	if (unlikely(atomic_read(&data->wakeup_head))) {
+		/*
+		 * Since we had it locked, we can lock it again.
+		 */
+		while (atomic_cmpxchg(&data->lock, 0, cpu) != 0)
+			cpu_relax();
+
+		goto again;
+	}
+
+	if (handle->wakeup)
+		perf_output_wakeup(handle);
+out:
+	local_irq_restore(handle->flags);
+}
+
 static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size,
 			     int nmi, int overflow)
@@ -1745,6 +1811,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data)
 		goto out;
 
+	handle->data	 = data;
 	handle->counter	 = counter;
 	handle->nmi	 = nmi;
 	handle->overflow = overflow;
@@ -1752,12 +1819,13 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data->nr_pages)
 		goto fail;
 
+	perf_output_lock(handle);
+
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
 	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
 
-	handle->data	= data;
 	handle->offset	= offset;
 	handle->head	= head;
 	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
@@ -1765,7 +1833,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	return 0;
 
 fail:
-	__perf_output_wakeup(handle);
+	perf_output_wakeup(handle);
 out:
 	rcu_read_unlock();
 
@@ -1809,16 +1877,20 @@ static void perf_output_copy(struct perf_output_handle *handle,
 
 static void perf_output_end(struct perf_output_handle *handle)
 {
-	int wakeup_events = handle->counter->hw_event.wakeup_events;
+	struct perf_counter *counter = handle->counter;
+	struct perf_mmap_data *data = handle->data;
+
+	int wakeup_events = counter->hw_event.wakeup_events;
 
 	if (handle->overflow && wakeup_events) {
-		int events = atomic_inc_return(&handle->data->events);
+		int events = atomic_inc_return(&data->events);
 		if (events >= wakeup_events) {
-			atomic_sub(wakeup_events, &handle->data->events);
-			__perf_output_wakeup(handle);
+			atomic_sub(wakeup_events, &data->events);
+			handle->wakeup = 1;
 		}
-	} else if (handle->wakeup)
-		__perf_output_wakeup(handle);
+	}
+
+	perf_output_unlock(handle);
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9ee1983c9aa18f12388ef660d0c76a23dc112959 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 30 Apr 2009 13:29:47 -0400
Subject: tracing: add irq tracepoint documentation

Document irqs for the newly created docbook.

[ Impact: add documentation ]

Signed-off-by: Jason Baron <jbaron@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: akpm@linux-foundation.org
Cc: rostedt@goodmis.org
Cc: fweisbec@gmail.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: wcohen@redhat.com
LKML-Reference: <73ff42be3420157667ec548e9b0e409c3cfad05f.1241107197.git.jbaron@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/DocBook/tracepoint.tmpl |  5 ++++
 include/trace/events/irq.h            | 46 ++++++++++++++++++++++++++++++++---
 2 files changed, 47 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index 70891bc6849..b0756d0fd57 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -81,4 +81,9 @@
    </para>
   </chapter>
 
+  <chapter id="irq">
+   <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+  </chapter>
+
 </book>
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 76868646751..32a9f7ef432 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -7,8 +7,16 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM irq
 
-/*
- * Tracepoint for entry of interrupt handler:
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
  */
 TRACE_EVENT(irq_handler_entry,
 
@@ -29,8 +37,16 @@ TRACE_EVENT(irq_handler_entry,
 	TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
 );
 
-/*
- * Tracepoint for return of an interrupt handler:
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
  */
 TRACE_EVENT(irq_handler_exit,
 
@@ -52,6 +68,17 @@ TRACE_EVENT(irq_handler_exit,
 		  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
 TRACE_EVENT(softirq_entry,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
@@ -71,6 +98,17 @@ TRACE_EVENT(softirq_entry,
 	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
 );
 
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the struct softirq_action
+ * that has handled the softirq. By subtracting the @vec pointer from
+ * the @h pointer, we can determine the softirq number. Also, when used in
+ * combination with the softirq_entry tracepoint we can determine the softirq
+ * latency.
+ */
 TRACE_EVENT(softirq_exit,
 
 	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-- 
cgit v1.2.3-70-g09d2


From 15e957d08dd4a841359cfec59ecb74041e0097aa Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 30 Apr 2009 01:17:50 -0700
Subject: x86/irq: use move_irq_desc() in create_irq_nr()

move_irq_desc() will try to move irq_desc to the home node if
the allocated one is not correct, in create_irq_nr().

( This can happen on devices that are on different nodes that
  are using MSI, when drivers are loaded and unloaded randomly. )

v2: fix non-smp build
v3: add NUMA_IRQ_DESC to eliminate #ifdefs

[ Impact: improve irq descriptor locality on NUMA systems ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <49F95EAE.2050903@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig               |  4 ++++
 arch/x86/kernel/apic/io_apic.c |  6 +-----
 include/linux/irq.h            | 11 +++++++++--
 kernel/irq/Makefile            |  2 +-
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e1b2543f8ed..674e21e9f0a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -274,6 +274,10 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say N.
 
+config NUMA_IRQ_DESC
+	def_bool y
+	depends on SPARSE_IRQ && NUMA
+
 config X86_MPPARSE
 	bool "Enable MPS table" if ACPI
 	default y
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9cd4806cdf5..e583291fe6c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3197,11 +3197,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
 		if (cfg_new->vector != 0)
 			continue;
 
-#ifdef CONFIG_NUMA_IRQ_DESC
-		/* different node ?*/
-		if (desc_new->node != node)
-			desc = move_irq_desc(desc, node);
-#endif
+		desc_new = move_irq_desc(desc_new, node);
 
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4b95ddb5304..eedbb8e5e0c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -206,9 +206,16 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-#else /* CONFIG_SPARSE_IRQ */
+#endif
+
+#ifdef CONFIG_NUMA_IRQ_DESC
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
-#endif /* CONFIG_SPARSE_IRQ */
+#else
+static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
+	return desc;
+}
+#endif
 
 extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 2f065277f8e..7d047808419 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o
+obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
-- 
cgit v1.2.3-70-g09d2


From c237dc0303bcf6f4cc2e0efe4fe4e341c6f34dac Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 29 Apr 2009 19:09:19 -0400
Subject: nfsd4: rename callback struct to cb_conn

I want to use the name for a struct that actually does represent a
single callback.

(Actually, I've never been sure it helps to a separate struct for the
callback information.  Some day maybe those fields could just be dumped
into struct nfs4_client.  I don't know.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 16 ++++++++--------
 fs/nfsd/nfs4state.c        | 22 +++++++++++-----------
 include/linux/nfsd/state.h |  4 ++--
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0aaf68beedb..ed860d7ddd1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -369,7 +369,7 @@ static int max_cb_time(void)
 int setup_callback_client(struct nfs4_client *clp)
 {
 	struct sockaddr_in	addr;
-	struct nfs4_callback    *cb = &clp->cl_callback;
+	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_timeout	timeparms = {
 		.to_initval	= max_cb_time(),
 		.to_retries	= 0,
@@ -422,7 +422,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
 	if (task->tk_status)
 		warn_no_callback_path(clp, task->tk_status);
 	else
-		atomic_set(&clp->cl_callback.cb_set, 1);
+		atomic_set(&clp->cl_cb_conn.cb_set, 1);
 	put_nfs4_client(clp);
 }
 
@@ -430,7 +430,7 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
 	.rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *lookup_cb_cred(struct nfs4_callback *cb)
+static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
 {
 	struct auth_cred acred = {
 		.machine_cred = 1
@@ -448,7 +448,7 @@ static struct rpc_cred *lookup_cb_cred(struct nfs4_callback *cb)
 
 void do_probe_callback(struct nfs4_client *clp)
 {
-	struct nfs4_callback    *cb = &clp->cl_callback;
+	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
@@ -480,7 +480,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 {
 	int status;
 
-	BUG_ON(atomic_read(&clp->cl_callback.cb_set));
+	BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
 
 	status = setup_callback_client(clp);
 	if (status) {
@@ -501,12 +501,12 @@ void
 nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
-	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
 	struct nfs4_cb_recall *cbr = &dp->dl_recall;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
 		.rpc_argp = cbr,
-		.rpc_cred = clp->cl_callback.cb_cred
+		.rpc_cred = clp->cl_cb_conn.cb_cred
 	};
 	int retries = 1;
 	int status = 0;
@@ -519,7 +519,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 		switch (status) {
 			case -EIO:
 				/* Network partition? */
-				atomic_set(&clp->cl_callback.cb_set, 0);
+				atomic_set(&clp->cl_cb_conn.cb_set, 0);
 			case -EBADHANDLE:
 			case -NFS4ERR_BAD_STATEID:
 				/* Race: client probably got cb_recall
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b205c7d7bc6..d7b5e6b8920 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -182,7 +182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_file *fp = stp->st_file;
-	struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
+	struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
 
 	dprintk("NFSD alloc_init_deleg\n");
 	if (fp->fi_had_conflict)
@@ -633,19 +633,19 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 static void
 shutdown_callback_client(struct nfs4_client *clp)
 {
-	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
 
 	if (clnt) {
 		/*
 		 * Callback threads take a reference on the client, so there
 		 * should be no outstanding callbacks at this point.
 		 */
-		clp->cl_callback.cb_client = NULL;
+		clp->cl_cb_conn.cb_client = NULL;
 		rpc_shutdown_client(clnt);
 	}
-	if (clp->cl_callback.cb_cred) {
-		put_rpccred(clp->cl_callback.cb_cred);
-		clp->cl_callback.cb_cred = NULL;
+	if (clp->cl_cb_conn.cb_cred) {
+		put_rpccred(clp->cl_cb_conn.cb_cred);
+		clp->cl_cb_conn.cb_cred = NULL;
 	}
 }
 
@@ -719,7 +719,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
 		return NULL;
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
-	atomic_set(&clp->cl_callback.cb_set, 0);
+	atomic_set(&clp->cl_cb_conn.cb_set, 0);
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
@@ -971,7 +971,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
 static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
-	struct nfs4_callback *cb = &clp->cl_callback;
+	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 
 	/* Currently, we only support tcp for the callback channel */
 	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
@@ -1691,7 +1691,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			atomic_set(&conf->cl_callback.cb_set, 0);
+			atomic_set(&conf->cl_cb_conn.cb_set, 0);
 			expire_client(unconf);
 			status = nfs_ok;
 
@@ -2425,7 +2425,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_stateowner *sop = stp->st_stateowner;
-	struct nfs4_callback *cb = &sop->so_client->cl_callback;
+	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
 	struct file_lock fl, *flp = &fl;
 	int status, flag = 0;
 
@@ -2617,7 +2617,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	renew_client(clp);
 	status = nfserr_cb_path_down;
 	if (!list_empty(&clp->cl_delegations)
-			&& !atomic_read(&clp->cl_callback.cb_set))
+			&& !atomic_read(&clp->cl_cb_conn.cb_set))
 		goto out;
 	status = nfs_ok;
 out:
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 8d882a3eb4b..563c367a301 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -88,7 +88,7 @@ struct nfs4_delegation {
 #define dl_fh           dl_recall.cbr_fh
 
 /* client delegation callback info */
-struct nfs4_callback {
+struct nfs4_cb_conn {
 	/* SETCLIENTID info */
 	u32                     cb_addr;
 	unsigned short          cb_port;
@@ -186,7 +186,7 @@ struct nfs4_client {
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
-	struct nfs4_callback	cl_callback;    /* callback info */
+	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
 	atomic_t		cl_count;	/* ref count */
 	u32			cl_firststate;	/* recovery dir creation */
 
-- 
cgit v1.2.3-70-g09d2


From b53d40c5070bffde1b2bcaf848412a50d8894794 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 1 May 2009 19:50:00 -0400
Subject: nfsd4: eliminate struct nfs4_cb_recall

The nfs4_cb_recall struct is used only in nfs4_delegation, so its
pointer to the containing delegation is unnecessary--we could just use
container_of().

But there's no real reason to have this a separate struct at all--just
move these fields to nfs4_delegation.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 24 +++++++++++-------------
 fs/nfsd/nfs4state.c        |  5 ++---
 include/linux/nfsd/state.h | 18 +++++-------------
 3 files changed, 18 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ed860d7ddd1..2509305f6f5 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -215,18 +215,18 @@ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 }
 
 static int
-encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
+encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp)
 {
 	__be32 *p;
-	int len = cb_rec->cbr_fh.fh_size;
+	int len = dp->dl_fh.fh_size;
 
-	RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
+	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
 	WRITE32(OP_CB_RECALL);
-	WRITE32(cb_rec->cbr_stateid.si_generation);
-	WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t));
-	WRITE32(cb_rec->cbr_trunc);
+	WRITE32(dp->dl_stateid.si_generation);
+	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
+	WRITE32(dp->dl_trunc);
 	WRITE32(len);
-	WRITEMEM(&cb_rec->cbr_fh.fh_base, len);
+	WRITEMEM(&dp->dl_fh.fh_base, len);
 	return 0;
 }
 
@@ -241,11 +241,11 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
 }
 
 static int
-nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args)
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args)
 {
 	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr = {
-		.ident = args->cbr_ident,
+		.ident = args->dl_ident,
 		.nops   = 1,
 	};
 
@@ -502,17 +502,15 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 {
 	struct nfs4_client *clp = dp->dl_client;
 	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
-	struct nfs4_cb_recall *cbr = &dp->dl_recall;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
-		.rpc_argp = cbr,
+		.rpc_argp = dp,
 		.rpc_cred = clp->cl_cb_conn.cb_cred
 	};
 	int retries = 1;
 	int status = 0;
 
-	cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
-	cbr->cbr_dp = dp;
+	dp->dl_trunc = 0; /* XXX need to implement truncate optimization */
 
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
 	while (retries--) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d7b5e6b8920..3e5345e01b1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -203,9 +203,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	get_file(stp->st_vfs_file);
 	dp->dl_vfs_file = stp->st_vfs_file;
 	dp->dl_type = type;
-	dp->dl_recall.cbr_dp = NULL;
-	dp->dl_recall.cbr_ident = cb->cb_ident;
-	dp->dl_recall.cbr_trunc = 0;
+	dp->dl_ident = cb->cb_ident;
+	dp->dl_trunc = 0;
 	dp->dl_stateid.si_boot = get_seconds();
 	dp->dl_stateid.si_stateownerid = current_delegid++;
 	dp->dl_stateid.si_fileid = 0;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 563c367a301..233b60d39b8 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -61,15 +61,6 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
-
-struct nfs4_cb_recall {
-	u32			cbr_ident;
-	int			cbr_trunc;
-	stateid_t		cbr_stateid;
-	struct knfsd_fh		cbr_fh;
-	struct nfs4_delegation	*cbr_dp;
-};
-
 struct nfs4_delegation {
 	struct list_head	dl_perfile;
 	struct list_head	dl_perclnt;
@@ -81,12 +72,13 @@ struct nfs4_delegation {
 	struct file		*dl_vfs_file;
 	u32			dl_type;
 	time_t			dl_time;
-	struct nfs4_cb_recall	dl_recall;
+/* For recall: */
+	u32			dl_ident;
+	int			dl_trunc;
+	stateid_t		dl_stateid;
+	struct knfsd_fh		dl_fh;
 };
 
-#define dl_stateid      dl_recall.cbr_stateid
-#define dl_fh           dl_recall.cbr_fh
-
 /* client delegation callback info */
 struct nfs4_cb_conn {
 	/* SETCLIENTID info */
-- 
cgit v1.2.3-70-g09d2


From 6707bd3d420f53ae8f090dac871843f6f43c9980 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 1 May 2009 19:57:46 -0400
Subject: nfsd4: remove unused dl_trunc

There's no point in keeping this field around--it's always zero.

(Background: the protocol allows you to tell the client that the file is
about to be truncated, as an optimization to save the client from
writing back dirty pages that will just be discarded.  We don't
implement this hint.  If we do some day, adding this field back in will
be the least of the work involved.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 4 +---
 fs/nfsd/nfs4state.c        | 1 -
 include/linux/nfsd/state.h | 1 -
 3 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 2509305f6f5..0420b5e6e20 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -224,7 +224,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp)
 	WRITE32(OP_CB_RECALL);
 	WRITE32(dp->dl_stateid.si_generation);
 	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
-	WRITE32(dp->dl_trunc);
+	WRITE32(0); /* truncate optimization not implemented */
 	WRITE32(len);
 	WRITEMEM(&dp->dl_fh.fh_base, len);
 	return 0;
@@ -510,8 +510,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 	int retries = 1;
 	int status = 0;
 
-	dp->dl_trunc = 0; /* XXX need to implement truncate optimization */
-
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
 	while (retries--) {
 		switch (status) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3e5345e01b1..cbb16e191d5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -204,7 +204,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	dp->dl_vfs_file = stp->st_vfs_file;
 	dp->dl_type = type;
 	dp->dl_ident = cb->cb_ident;
-	dp->dl_trunc = 0;
 	dp->dl_stateid.si_boot = get_seconds();
 	dp->dl_stateid.si_stateownerid = current_delegid++;
 	dp->dl_stateid.si_fileid = 0;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 233b60d39b8..346b603072c 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -74,7 +74,6 @@ struct nfs4_delegation {
 	time_t			dl_time;
 /* For recall: */
 	u32			dl_ident;
-	int			dl_trunc;
 	stateid_t		dl_stateid;
 	struct knfsd_fh		dl_fh;
 };
-- 
cgit v1.2.3-70-g09d2


From 3aea09dc9106407d8bc18e593fbffda9ad632844 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 1 May 2009 20:11:12 -0400
Subject: nfsd4: track recall retries in nfs4_delegation

Move this out of a local variable into the nfs4_delegation object in
preparation for making this an async rpc call (at which point we'll need
any state like this in a common object that's preserved across function
calls).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 4 ++--
 include/linux/nfsd/state.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0420b5e6e20..b88b207d75d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -507,11 +507,11 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 		.rpc_argp = dp,
 		.rpc_cred = clp->cl_cb_conn.cb_cred
 	};
-	int retries = 1;
 	int status = 0;
 
+	dp->dl_retries = 1;
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
-	while (retries--) {
+	while (dp->dl_retries--) {
 		switch (status) {
 			case -EIO:
 				/* Network partition? */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 346b603072c..c0c49215ddc 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -76,6 +76,7 @@ struct nfs4_delegation {
 	u32			dl_ident;
 	stateid_t		dl_stateid;
 	struct knfsd_fh		dl_fh;
+	int			dl_retries;
 };
 
 /* client delegation callback info */
-- 
cgit v1.2.3-70-g09d2


From a25cbd045a2ffc42787d4dbcbb9c7118f5f42732 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Fri, 1 May 2009 14:45:46 +0900
Subject: clocksource: setup mult_orig in clocksource_enable()

Setup clocksource mult_orig in clocksource_enable().

Clocksource drivers can save power by using keeping the
device clock disabled while the clocksource is unused.

In practice this means that the enable() and disable()
callbacks perform clk_enable() and clk_disable().

The enable() callback may also use clk_get_rate() to get
the clock rate from the clock framework. This information
can then be used to calculate the shift and mult variables.

Currently the mult_orig variable is setup from mult at
registration time only. This is conflicting with the above
case since the clock is disabled and the mult variable is
not yet calculated at the time of registration.

Moving the mult_orig setup code to clocksource_enable()
allows us to both handle the common case with no enable()
callback and the mult-changed-after-enable() case.

[ Impact: allow dynamic clock source usage ]

Signed-off-by: Magnus Damm <damm@igel.co.jp>
LKML-Reference: <20090501054546.8193.10688.sendpatchset@rx1.opensource.se>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 10 +++++++++-
 kernel/time/clocksource.c   |  3 ---
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 5a40d14daa9..c56457c8334 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -288,7 +288,15 @@ static inline cycle_t clocksource_read(struct clocksource *cs)
  */
 static inline int clocksource_enable(struct clocksource *cs)
 {
-	return cs->enable ? cs->enable(cs) : 0;
+	int ret = 0;
+
+	if (cs->enable)
+		ret = cs->enable(cs);
+
+	/* save mult_orig on enable */
+	cs->mult_orig = cs->mult;
+
+	return ret;
 }
 
 /**
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ecfd7b5187e..80189f6f1c5 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -402,9 +402,6 @@ int clocksource_register(struct clocksource *c)
 	unsigned long flags;
 	int ret;
 
-	/* save mult_orig on registration */
-	c->mult_orig = c->mult;
-
 	spin_lock_irqsave(&clocksource_lock, flags);
 	ret = clocksource_enqueue(c);
 	if (!ret)
-- 
cgit v1.2.3-70-g09d2


From 7d27558c4138ac6b3684dea35c2f4379b940a7dd Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Fri, 1 May 2009 13:10:26 -0700
Subject: timekeeping: create arch_gettimeoffset infrastructure

Some arches don't supply their own clocksource. This is mainly the
case in architectures that get their inter-tick times by reading the
counter on their interval timer.  Since these timers wrap every tick,
they're not really useful as clocksources.  Wrapping them to act like
one is possible but not very efficient. So we provide a callout these
arches can implement for use with the jiffies clocksource to provide
finer then tick granular time.

[ Impact: ease the migration to generic time keeping ]

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h      | 15 +++++++++++++++
 kernel/time/timekeeping.c |  7 +++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 242f62499bb..ea16c1a01d5 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,6 +113,21 @@ struct timespec current_kernel_time(void);
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
 
+/* Some architectures do not supply their own clocksource.
+ * This is mainly the case in architectures that get their
+ * inter-tick times by reading the counter on their interval
+ * timer. Since these timers wrap every tick, they're not really
+ * useful as clocksources. Wrapping them to act like one is possible
+ * but not very efficient. So we provide a callout these arches
+ * can implement for use with the jiffies clocksource to provide
+ * finer then tick granular time.
+ */
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+extern u32 arch_gettimeoffset(void);
+#else
+static inline u32 arch_gettimeoffset(void) { return 0; }
+#endif
+
 extern void do_gettimeofday(struct timeval *tv);
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 687dff49f6e..e97c50f8458 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -77,6 +77,10 @@ static void clocksource_forward_now(void)
 	clock->cycle_last = cycle_now;
 
 	nsec = cyc2ns(clock, cycle_delta);
+
+	/* If arch requires, add in gettimeoffset() */
+	nsec += arch_gettimeoffset();
+
 	timespec_add_ns(&xtime, nsec);
 
 	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
@@ -111,6 +115,9 @@ void getnstimeofday(struct timespec *ts)
 		/* convert to nanoseconds: */
 		nsecs = cyc2ns(clock, cycle_delta);
 
+		/* If arch requires, add in gettimeoffset() */
+		nsecs += arch_gettimeoffset();
+
 	} while (read_seqretry(&xtime_lock, seq));
 
 	timespec_add_ns(ts, nsecs);
-- 
cgit v1.2.3-70-g09d2


From 33f503c96c976fd585dedb76514ca6cb286e60d9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 2 May 2009 12:24:55 +0100
Subject: ASoC: Use a shared define for AC97 CODEC data formats

The AC97 wire format is completely fixed so CODECs don't have any choice
about the formats they accept but controllers accept a variety of data
formats and render them down onto the bus.  Have a shared define so all
the CODEC drivers will interoperate with any of our controller drivers.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h   | 3 +++
 sound/soc/codecs/ac97.c   | 4 ++--
 sound/soc/codecs/ad1980.c | 4 ++--
 sound/soc/codecs/wm9705.c | 4 ++--
 sound/soc/codecs/wm9712.c | 6 +++---
 sound/soc/codecs/wm9713.c | 6 +++---
 6 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 22b729fbbf8..ea07b4bd516 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -96,6 +96,9 @@ struct snd_pcm_substream;
 #define SND_SOC_CLOCK_IN		0
 #define SND_SOC_CLOCK_OUT		1
 
+#define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\
+                               SNDRV_PCM_FMTBIT_S32_LE)
+
 struct snd_soc_dai_ops;
 struct snd_soc_dai;
 struct snd_ac97_bus_ops;
diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c
index b0d4af145b8..932299bb5d1 100644
--- a/sound/soc/codecs/ac97.c
+++ b/sound/soc/codecs/ac97.c
@@ -53,13 +53,13 @@ struct snd_soc_dai ac97_dai = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = STD_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "AC97 Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = STD_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &ac97_dai_ops,
 };
 EXPORT_SYMBOL_GPL(ac97_dai);
diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c
index ddb3b08ac23..d7440a982d2 100644
--- a/sound/soc/codecs/ad1980.c
+++ b/sound/soc/codecs/ad1980.c
@@ -137,13 +137,13 @@ struct snd_soc_dai ad1980_dai = {
 		.channels_min = 2,
 		.channels_max = 6,
 		.rates = SNDRV_PCM_RATE_48000,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE, },
+		.formats = SND_SOC_STD_AC97_FMTS, },
 	.capture = {
 		.stream_name = "Capture",
 		.channels_min = 2,
 		.channels_max = 2,
 		.rates = SNDRV_PCM_RATE_48000,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE, },
+		.formats = SND_SOC_STD_AC97_FMTS, },
 };
 EXPORT_SYMBOL_GPL(ad1980_dai);
 
diff --git a/sound/soc/codecs/wm9705.c b/sound/soc/codecs/wm9705.c
index c2d1a7a18fa..fa88b463e71 100644
--- a/sound/soc/codecs/wm9705.c
+++ b/sound/soc/codecs/wm9705.c
@@ -282,14 +282,14 @@ struct snd_soc_dai wm9705_dai[] = {
 			.channels_min = 1,
 			.channels_max = 2,
 			.rates = WM9705_AC97_RATES,
-			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.formats = SND_SOC_STD_AC97_FMTS,
 		},
 		.capture = {
 			.stream_name = "HiFi Capture",
 			.channels_min = 1,
 			.channels_max = 2,
 			.rates = WM9705_AC97_RATES,
-			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.formats = SND_SOC_STD_AC97_FMTS,
 		},
 		.ops = &wm9705_dai_ops,
 	},
diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c
index 765cf1e7369..550c903f23b 100644
--- a/sound/soc/codecs/wm9712.c
+++ b/sound/soc/codecs/wm9712.c
@@ -534,13 +534,13 @@ struct snd_soc_dai wm9712_dai[] = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "HiFi Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9712_dai_ops_hifi,
 },
 {
@@ -550,7 +550,7 @@ struct snd_soc_dai wm9712_dai[] = {
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = WM9712_AC97_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9712_dai_ops_aux,
 }
 };
diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index a6feb784231..d1744e96f30 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c
@@ -1040,13 +1040,13 @@ struct snd_soc_dai wm9713_dai[] = {
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.capture = {
 		.stream_name = "HiFi Capture",
 		.channels_min = 1,
 		.channels_max = 2,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9713_dai_ops_hifi,
 	},
 	{
@@ -1056,7 +1056,7 @@ struct snd_soc_dai wm9713_dai[] = {
 		.channels_min = 1,
 		.channels_max = 1,
 		.rates = WM9713_RATES,
-		.formats = SNDRV_PCM_FMTBIT_S16_LE,},
+		.formats = SND_SOC_STD_AC97_FMTS,},
 	.ops = &wm9713_dai_ops_aux,
 	},
 	{
-- 
cgit v1.2.3-70-g09d2


From 4072604b9dd18f25a98cc0f4d3d4553ed1ad4152 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 2 May 2009 12:28:25 +0100
Subject: ASoC: Remove unused DAI format defines

The defines for TDM and synchronous clocks are not used - they are
mostly a legacy of the automatic clocking configuration.  TDM will
require configuration of the number of timeslots and which ones to use
so can't be fit into the DAI format and synchronous mode is handled by
symmetric_rates (and needs to be done by constraints rather than when
the DAI format is being configured).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index ea07b4bd516..a997c2cac63 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -44,24 +44,6 @@ struct snd_pcm_substream;
 #define SND_SOC_DAIFMT_CONT		(0 << 4) /* continuous clock */
 #define SND_SOC_DAIFMT_GATED		(1 << 4) /* clock is gated */
 
-/*
- * DAI Left/Right Clocks.
- *
- * Specifies whether the DAI can support different samples for similtanious
- * playback and capture. This usually requires a seperate physical frame
- * clock for playback and capture.
- */
-#define SND_SOC_DAIFMT_SYNC		(0 << 5) /* Tx FRM = Rx FRM */
-#define SND_SOC_DAIFMT_ASYNC		(1 << 5) /* Tx FRM ~ Rx FRM */
-
-/*
- * TDM
- *
- * Time Division Multiplexing. Allows PCM data to be multplexed with other
- * data on the DAI.
- */
-#define SND_SOC_DAIFMT_TDM		(1 << 6)
-
 /*
  * DAI hardware signal inversions.
  *
-- 
cgit v1.2.3-70-g09d2


From d5ed4c2e5ce9f5f6fd6a5a39ee1196a1f8a46eed Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 30 Apr 2009 07:02:49 +0000
Subject: clocksource: SuperH MTU2 Timer driver

This patch adds a MTU2 driver for the SuperH architecture.

The MTU2 driver is a platform driver with early platform
support to allow using a MTU2 channel as only clockevent
during system bootup.

Clocksource on sh2a is currently unsupported due to code
generation issues with 64-bit math, so at this point only
periodic clockevent support is in place.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig               |  11 ++
 drivers/clocksource/Makefile  |   1 +
 drivers/clocksource/sh_mtu2.c | 357 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/sh_mtu2.h       |  12 ++
 4 files changed, 381 insertions(+)
 create mode 100644 drivers/clocksource/sh_mtu2.c
 create mode 100644 include/linux/sh_mtu2.h

(limited to 'include')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6f91478826d..6d0dd378eca 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -113,6 +113,9 @@ config SYS_SUPPORTS_PCI
 config SYS_SUPPORTS_CMT
 	bool
 
+config SYS_SUPPORTS_MTU2
+	bool
+
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -478,6 +481,14 @@ config SH_MTU2
 	help
 	  This enables the use of the MTU2 as the system timer.
 
+config SH_TIMER_MTU2
+	bool "MTU2 timer driver"
+	depends on SYS_SUPPORTS_MTU2 && !SH_MTU2
+	default y
+	select GENERIC_CLOCKEVENTS
+	help
+	  This enables build of the MTU2 timer driver.
+
 config SH_TIMER_IRQ
 	int
 	default "28" if CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 1efb2879a94..9785586dc8c 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_X86_CYCLONE_TIMER)	+= cyclone.o
 obj-$(CONFIG_X86_PM_TIMER)	+= acpi_pm.o
 obj-$(CONFIG_SCx200HR_TIMER)	+= scx200_hrt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
+obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
new file mode 100644
index 00000000000..420566f4c50
--- /dev/null
+++ b/drivers/clocksource/sh_mtu2.c
@@ -0,0 +1,357 @@
+/*
+ * SuperH Timer Support - MTU2
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/clockchips.h>
+#include <linux/sh_mtu2.h>
+
+struct sh_mtu2_priv {
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct irqaction irqaction;
+	struct platform_device *pdev;
+	unsigned long rate;
+	unsigned long periodic;
+	struct clock_event_device ced;
+};
+
+static DEFINE_SPINLOCK(sh_mtu2_lock);
+
+#define TSTR -1 /* shared register */
+#define TCR  0 /* channel register */
+#define TMDR 1 /* channel register */
+#define TIOR 2 /* channel register */
+#define TIER 3 /* channel register */
+#define TSR  4 /* channel register */
+#define TCNT 5 /* channel register */
+#define TGR  6 /* channel register */
+
+static unsigned long mtu2_reg_offs[] = {
+	[TCR] = 0,
+	[TMDR] = 1,
+	[TIOR] = 2,
+	[TIER] = 4,
+	[TSR] = 5,
+	[TCNT] = 6,
+	[TGR] = 8,
+};
+
+static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR)
+		return ioread8(base + cfg->channel_offset);
+
+	offs = mtu2_reg_offs[reg_nr];
+
+	if ((reg_nr == TCNT) || (reg_nr == TGR))
+		return ioread16(base + offs);
+	else
+		return ioread8(base + offs);
+}
+
+static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
+				unsigned long value)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR) {
+		iowrite8(value, base + cfg->channel_offset);
+		return;
+	}
+
+	offs = mtu2_reg_offs[reg_nr];
+
+	if ((reg_nr == TCNT) || (reg_nr == TGR))
+		iowrite16(value, base + offs);
+	else
+		iowrite8(value, base + offs);
+}
+
+static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	unsigned long flags, value;
+
+	/* start stop register shared by multiple timer channels */
+	spin_lock_irqsave(&sh_mtu2_lock, flags);
+	value = sh_mtu2_read(p, TSTR);
+
+	if (start)
+		value |= 1 << cfg->timer_bit;
+	else
+		value &= ~(1 << cfg->timer_bit);
+
+	sh_mtu2_write(p, TSTR, value);
+	spin_unlock_irqrestore(&sh_mtu2_lock, flags);
+}
+
+static int sh_mtu2_enable(struct sh_mtu2_priv *p)
+{
+	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		pr_err("sh_mtu2: cannot enable clock \"%s\"\n", cfg->clk);
+		return ret;
+	}
+
+	/* make sure channel is disabled */
+	sh_mtu2_start_stop_ch(p, 0);
+
+	p->rate = clk_get_rate(p->clk) / 64;
+	p->periodic = (p->rate + HZ/2) / HZ;
+
+	/* "Periodic Counter Operation" */
+	sh_mtu2_write(p, TCR, 0x23); /* TGRA clear, divide clock by 64 */
+	sh_mtu2_write(p, TIOR, 0);
+	sh_mtu2_write(p, TGR, p->periodic);
+	sh_mtu2_write(p, TCNT, 0);
+	sh_mtu2_write(p, TMDR, 0);
+	sh_mtu2_write(p, TIER, 0x01);
+
+	/* enable channel */
+	sh_mtu2_start_stop_ch(p, 1);
+
+	return 0;
+}
+
+static void sh_mtu2_disable(struct sh_mtu2_priv *p)
+{
+	/* disable channel */
+	sh_mtu2_start_stop_ch(p, 0);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static irqreturn_t sh_mtu2_interrupt(int irq, void *dev_id)
+{
+	struct sh_mtu2_priv *p = dev_id;
+
+	/* acknowledge interrupt */
+	sh_mtu2_read(p, TSR);
+	sh_mtu2_write(p, TSR, 0xfe);
+
+	/* notify clockevent layer */
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static struct sh_mtu2_priv *ced_to_sh_mtu2(struct clock_event_device *ced)
+{
+	return container_of(ced, struct sh_mtu2_priv, ced);
+}
+
+static void sh_mtu2_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct sh_mtu2_priv *p = ced_to_sh_mtu2(ced);
+	int disabled = 0;
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		sh_mtu2_disable(p);
+		disabled = 1;
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		pr_info("sh_mtu2: %s used for periodic clock events\n",
+			ced->name);
+		sh_mtu2_enable(p);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+		if (!disabled)
+			sh_mtu2_disable(p);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+}
+
+static void sh_mtu2_register_clockevent(struct sh_mtu2_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clock_event_device *ced = &p->ced;
+	int ret;
+
+	memset(ced, 0, sizeof(*ced));
+
+	ced->name = name;
+	ced->features = CLOCK_EVT_FEAT_PERIODIC;
+	ced->rating = rating;
+	ced->cpumask = cpumask_of(0);
+	ced->set_mode = sh_mtu2_clock_event_mode;
+
+	ret = setup_irq(p->irqaction.irq, &p->irqaction);
+	if (ret) {
+		pr_err("sh_mtu2: failed to request irq %d\n",
+		       p->irqaction.irq);
+		return;
+	}
+
+	pr_info("sh_mtu2: %s used for clock events\n", ced->name);
+	clockevents_register_device(ced);
+}
+
+int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
+		     unsigned long clockevent_rating)
+{
+	if (clockevent_rating)
+		sh_mtu2_register_clockevent(p, name, clockevent_rating);
+
+	return 0;
+}
+
+static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
+{
+	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	struct resource *res;
+	int irq, ret;
+	ret = -ENXIO;
+
+	memset(p, 0, sizeof(*p));
+	p->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&p->pdev->dev, "missing platform data\n");
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&p->pdev->dev, "failed to get I/O memory\n");
+		goto err0;
+	}
+
+	irq = platform_get_irq(p->pdev, 0);
+	if (irq < 0) {
+		dev_err(&p->pdev->dev, "failed to get irq\n");
+		goto err0;
+	}
+
+	/* map memory, let mapbase point to our channel */
+	p->mapbase = ioremap_nocache(res->start, resource_size(res));
+	if (p->mapbase == NULL) {
+		pr_err("sh_mtu2: failed to remap I/O memory\n");
+		goto err0;
+	}
+
+	/* setup data for setup_irq() (too early for request_irq()) */
+	p->irqaction.name = cfg->name;
+	p->irqaction.handler = sh_mtu2_interrupt;
+	p->irqaction.dev_id = p;
+	p->irqaction.irq = irq;
+	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL;
+	p->irqaction.mask = CPU_MASK_NONE;
+
+	/* get hold of clock */
+	p->clk = clk_get(&p->pdev->dev, cfg->clk);
+	if (IS_ERR(p->clk)) {
+		pr_err("sh_mtu2: cannot get clock \"%s\"\n", cfg->clk);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	return sh_mtu2_register(p, cfg->name, cfg->clockevent_rating);
+ err1:
+	iounmap(p->mapbase);
+ err0:
+	return ret;
+}
+
+static int __devinit sh_mtu2_probe(struct platform_device *pdev)
+{
+	struct sh_mtu2_priv *p = platform_get_drvdata(pdev);
+	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	int ret;
+
+	if (p) {
+		pr_info("sh_mtu2: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	ret = sh_mtu2_setup(p, pdev);
+	if (ret) {
+		kfree(p);
+		platform_set_drvdata(pdev, NULL);
+	}
+	return ret;
+}
+
+static int __devexit sh_mtu2_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent */
+}
+
+static struct platform_driver sh_mtu2_device_driver = {
+	.probe		= sh_mtu2_probe,
+	.remove		= __devexit_p(sh_mtu2_remove),
+	.driver		= {
+		.name	= "sh_mtu2",
+	}
+};
+
+static int __init sh_mtu2_init(void)
+{
+	return platform_driver_register(&sh_mtu2_device_driver);
+}
+
+static void __exit sh_mtu2_exit(void)
+{
+	platform_driver_unregister(&sh_mtu2_device_driver);
+}
+
+early_platform_init("earlytimer", &sh_mtu2_device_driver);
+module_init(sh_mtu2_init);
+module_exit(sh_mtu2_exit);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("SuperH MTU2 Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h
new file mode 100644
index 00000000000..a98876340ff
--- /dev/null
+++ b/include/linux/sh_mtu2.h
@@ -0,0 +1,12 @@
+#ifndef __SH_MTU2_H__
+#define __SH_MTU2_H__
+
+struct sh_mtu2_config {
+	char *name;
+	int channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+};
+
+#endif /* __SH_MTU2_H__ */
-- 
cgit v1.2.3-70-g09d2


From 9570ef20423b549757aa484ad388f9a7d5bdc4d9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 1 May 2009 06:51:00 +0000
Subject: clocksource: SuperH TMU Timer driver

This patch adds a TMU driver for the SuperH architecture.

The TMU driver is a platform driver with early platform
support to allow using a TMU channel as clockevent or
clocksource during system bootup or later.

Clocksource or clockevent can be selected.
Both periodic and oneshot clockevents are supported.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig              |  12 ++
 drivers/clocksource/Makefile |   1 +
 drivers/clocksource/sh_tmu.c | 461 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/sh_tmu.h       |  13 ++
 4 files changed, 487 insertions(+)
 create mode 100644 drivers/clocksource/sh_tmu.c
 create mode 100644 include/linux/sh_tmu.h

(limited to 'include')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index bda3dc1b0c2..1f74737c4f2 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -116,6 +116,9 @@ config SYS_SUPPORTS_CMT
 config SYS_SUPPORTS_MTU2
 	bool
 
+config SYS_SUPPORTS_TMU
+	bool
+
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -470,6 +473,15 @@ config SH_TMU
 	help
 	  This enables the use of the TMU as the system timer.
 
+config SH_TIMER_TMU
+	bool "TMU timer driver"
+	depends on !SH_TMU && SYS_SUPPORTS_TMU
+	default y
+	select GENERIC_TIME
+	select GENERIC_CLOCKEVENTS
+	help
+	  This enables the build of the TMU timer driver.
+
 config SH_TIMER_CMT
 	bool "CMT timer driver"
 	depends on SYS_SUPPORTS_CMT
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 9785586dc8c..eef216f7f61 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_PM_TIMER)	+= acpi_pm.o
 obj-$(CONFIG_SCx200HR_TIMER)	+= scx200_hrt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
+obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
new file mode 100644
index 00000000000..21bd77aa6a3
--- /dev/null
+++ b/drivers/clocksource/sh_tmu.c
@@ -0,0 +1,461 @@
+/*
+ * SuperH Timer Support - TMU
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/sh_tmu.h>
+
+struct sh_tmu_priv {
+	void __iomem *mapbase;
+	struct clk *clk;
+	struct irqaction irqaction;
+	struct platform_device *pdev;
+	unsigned long rate;
+	unsigned long periodic;
+	struct clock_event_device ced;
+	struct clocksource cs;
+};
+
+static DEFINE_SPINLOCK(sh_tmu_lock);
+
+#define TSTR -1 /* shared register */
+#define TCOR  0 /* channel register */
+#define TCNT 1 /* channel register */
+#define TCR 2 /* channel register */
+
+static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR)
+		return ioread8(base - cfg->channel_offset);
+
+	offs = reg_nr << 2;
+
+	if (reg_nr == TCR)
+		return ioread16(base + offs);
+	else
+		return ioread32(base + offs);
+}
+
+static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
+				unsigned long value)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	void __iomem *base = p->mapbase;
+	unsigned long offs;
+
+	if (reg_nr == TSTR) {
+		iowrite8(value, base - cfg->channel_offset);
+		return;
+	}
+
+	offs = reg_nr << 2;
+
+	if (reg_nr == TCR)
+		iowrite16(value, base + offs);
+	else
+		iowrite32(value, base + offs);
+}
+
+static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	unsigned long flags, value;
+
+	/* start stop register shared by multiple timer channels */
+	spin_lock_irqsave(&sh_tmu_lock, flags);
+	value = sh_tmu_read(p, TSTR);
+
+	if (start)
+		value |= 1 << cfg->timer_bit;
+	else
+		value &= ~(1 << cfg->timer_bit);
+
+	sh_tmu_write(p, TSTR, value);
+	spin_unlock_irqrestore(&sh_tmu_lock, flags);
+}
+
+static int sh_tmu_enable(struct sh_tmu_priv *p)
+{
+	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		pr_err("sh_tmu: cannot enable clock \"%s\"\n", cfg->clk);
+		return ret;
+	}
+
+	/* make sure channel is disabled */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* maximum timeout */
+	sh_tmu_write(p, TCOR, 0xffffffff);
+	sh_tmu_write(p, TCNT, 0xffffffff);
+
+	/* configure channel to parent clock / 4, irq off */
+	p->rate = clk_get_rate(p->clk) / 4;
+	sh_tmu_write(p, TCR, 0x0000);
+
+	/* enable channel */
+	sh_tmu_start_stop_ch(p, 1);
+
+	return 0;
+}
+
+static void sh_tmu_disable(struct sh_tmu_priv *p)
+{
+	/* disable channel */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta,
+			    int periodic)
+{
+	/* stop timer */
+	sh_tmu_start_stop_ch(p, 0);
+
+	/* acknowledge interrupt */
+	sh_tmu_read(p, TCR);
+
+	/* enable interrupt */
+	sh_tmu_write(p, TCR, 0x0020);
+
+	/* reload delta value in case of periodic timer */
+	if (periodic)
+		sh_tmu_write(p, TCOR, delta);
+	else
+		sh_tmu_write(p, TCOR, 0);
+
+	sh_tmu_write(p, TCNT, delta);
+
+	/* start timer */
+	sh_tmu_start_stop_ch(p, 1);
+}
+
+static irqreturn_t sh_tmu_interrupt(int irq, void *dev_id)
+{
+	struct sh_tmu_priv *p = dev_id;
+
+	/* disable or acknowledge interrupt */
+	if (p->ced.mode == CLOCK_EVT_MODE_ONESHOT)
+		sh_tmu_write(p, TCR, 0x0000);
+	else
+		sh_tmu_write(p, TCR, 0x0020);
+
+	/* notify clockevent layer */
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static struct sh_tmu_priv *cs_to_sh_tmu(struct clocksource *cs)
+{
+	return container_of(cs, struct sh_tmu_priv, cs);
+}
+
+static cycle_t sh_tmu_clocksource_read(struct clocksource *cs)
+{
+	struct sh_tmu_priv *p = cs_to_sh_tmu(cs);
+
+	return sh_tmu_read(p, TCNT) ^ 0xffffffff;
+}
+
+static int sh_tmu_clocksource_enable(struct clocksource *cs)
+{
+	struct sh_tmu_priv *p = cs_to_sh_tmu(cs);
+	int ret;
+
+	ret = sh_tmu_enable(p);
+	if (ret)
+		return ret;
+
+	/* TODO: calculate good shift from rate and counter bit width */
+	cs->shift = 10;
+	cs->mult = clocksource_hz2mult(p->rate, cs->shift);
+	return 0;
+}
+
+static void sh_tmu_clocksource_disable(struct clocksource *cs)
+{
+	sh_tmu_disable(cs_to_sh_tmu(cs));
+}
+
+static int sh_tmu_register_clocksource(struct sh_tmu_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clocksource *cs = &p->cs;
+
+	cs->name = name;
+	cs->rating = rating;
+	cs->read = sh_tmu_clocksource_read;
+	cs->enable = sh_tmu_clocksource_enable;
+	cs->disable = sh_tmu_clocksource_disable;
+	cs->mask = CLOCKSOURCE_MASK(32);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	pr_info("sh_tmu: %s used as clock source\n", cs->name);
+	clocksource_register(cs);
+	return 0;
+}
+
+static struct sh_tmu_priv *ced_to_sh_tmu(struct clock_event_device *ced)
+{
+	return container_of(ced, struct sh_tmu_priv, ced);
+}
+
+static void sh_tmu_clock_event_start(struct sh_tmu_priv *p, int periodic)
+{
+	struct clock_event_device *ced = &p->ced;
+
+	sh_tmu_enable(p);
+
+	/* TODO: calculate good shift from rate and counter bit width */
+
+	ced->shift = 32;
+	ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift);
+	ced->max_delta_ns = clockevent_delta2ns(0xffffffff, ced);
+	ced->min_delta_ns = 5000;
+
+	if (periodic) {
+		p->periodic = (p->rate + HZ/2) / HZ;
+		sh_tmu_set_next(p, p->periodic, 1);
+	}
+}
+
+static void sh_tmu_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct sh_tmu_priv *p = ced_to_sh_tmu(ced);
+	int disabled = 0;
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		sh_tmu_disable(p);
+		disabled = 1;
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		pr_info("sh_tmu: %s used for periodic clock events\n",
+			ced->name);
+		sh_tmu_clock_event_start(p, 1);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		pr_info("sh_tmu: %s used for oneshot clock events\n",
+			ced->name);
+		sh_tmu_clock_event_start(p, 0);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+		if (!disabled)
+			sh_tmu_disable(p);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+}
+
+static int sh_tmu_clock_event_next(unsigned long delta,
+				   struct clock_event_device *ced)
+{
+	struct sh_tmu_priv *p = ced_to_sh_tmu(ced);
+
+	BUG_ON(ced->mode != CLOCK_EVT_MODE_ONESHOT);
+
+	/* program new delta value */
+	sh_tmu_set_next(p, delta, 0);
+	return 0;
+}
+
+static void sh_tmu_register_clockevent(struct sh_tmu_priv *p,
+				       char *name, unsigned long rating)
+{
+	struct clock_event_device *ced = &p->ced;
+	int ret;
+
+	memset(ced, 0, sizeof(*ced));
+
+	ced->name = name;
+	ced->features = CLOCK_EVT_FEAT_PERIODIC;
+	ced->features |= CLOCK_EVT_FEAT_ONESHOT;
+	ced->rating = rating;
+	ced->cpumask = cpumask_of(0);
+	ced->set_next_event = sh_tmu_clock_event_next;
+	ced->set_mode = sh_tmu_clock_event_mode;
+
+	ret = setup_irq(p->irqaction.irq, &p->irqaction);
+	if (ret) {
+		pr_err("sh_tmu: failed to request irq %d\n",
+		       p->irqaction.irq);
+		return;
+	}
+
+	pr_info("sh_tmu: %s used for clock events\n", ced->name);
+	clockevents_register_device(ced);
+}
+
+static int sh_tmu_register(struct sh_tmu_priv *p, char *name,
+		    unsigned long clockevent_rating,
+		    unsigned long clocksource_rating)
+{
+	if (clockevent_rating)
+		sh_tmu_register_clockevent(p, name, clockevent_rating);
+	else if (clocksource_rating)
+		sh_tmu_register_clocksource(p, name, clocksource_rating);
+
+	return 0;
+}
+
+static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
+{
+	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	struct resource *res;
+	int irq, ret;
+	ret = -ENXIO;
+
+	memset(p, 0, sizeof(*p));
+	p->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&p->pdev->dev, "missing platform data\n");
+		goto err0;
+	}
+
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&p->pdev->dev, "failed to get I/O memory\n");
+		goto err0;
+	}
+
+	irq = platform_get_irq(p->pdev, 0);
+	if (irq < 0) {
+		dev_err(&p->pdev->dev, "failed to get irq\n");
+		goto err0;
+	}
+
+	/* map memory, let mapbase point to our channel */
+	p->mapbase = ioremap_nocache(res->start, resource_size(res));
+	if (p->mapbase == NULL) {
+		pr_err("sh_tmu: failed to remap I/O memory\n");
+		goto err0;
+	}
+
+	/* setup data for setup_irq() (too early for request_irq()) */
+	p->irqaction.name = cfg->name;
+	p->irqaction.handler = sh_tmu_interrupt;
+	p->irqaction.dev_id = p;
+	p->irqaction.irq = irq;
+	p->irqaction.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL;
+	p->irqaction.mask = CPU_MASK_NONE;
+
+	/* get hold of clock */
+	p->clk = clk_get(&p->pdev->dev, cfg->clk);
+	if (IS_ERR(p->clk)) {
+		pr_err("sh_tmu: cannot get clock \"%s\"\n", cfg->clk);
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	return sh_tmu_register(p, cfg->name,
+			       cfg->clockevent_rating,
+			       cfg->clocksource_rating);
+ err1:
+	iounmap(p->mapbase);
+ err0:
+	return ret;
+}
+
+static int __devinit sh_tmu_probe(struct platform_device *pdev)
+{
+	struct sh_tmu_priv *p = platform_get_drvdata(pdev);
+	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	int ret;
+
+	if (p) {
+		pr_info("sh_tmu: %s kept as earlytimer\n", cfg->name);
+		return 0;
+	}
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	ret = sh_tmu_setup(p, pdev);
+	if (ret) {
+		kfree(p);
+		platform_set_drvdata(pdev, NULL);
+	}
+	return ret;
+}
+
+static int __devexit sh_tmu_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent and clocksource */
+}
+
+static struct platform_driver sh_tmu_device_driver = {
+	.probe		= sh_tmu_probe,
+	.remove		= __devexit_p(sh_tmu_remove),
+	.driver		= {
+		.name	= "sh_tmu",
+	}
+};
+
+static int __init sh_tmu_init(void)
+{
+	return platform_driver_register(&sh_tmu_device_driver);
+}
+
+static void __exit sh_tmu_exit(void)
+{
+	platform_driver_unregister(&sh_tmu_device_driver);
+}
+
+early_platform_init("earlytimer", &sh_tmu_device_driver);
+module_init(sh_tmu_init);
+module_exit(sh_tmu_exit);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("SuperH TMU Timer Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h
new file mode 100644
index 00000000000..b1195e8a37d
--- /dev/null
+++ b/include/linux/sh_tmu.h
@@ -0,0 +1,13 @@
+#ifndef __SH_TMU_H__
+#define __SH_TMU_H__
+
+struct sh_tmu_config {
+	char *name;
+	unsigned long channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+	unsigned long clocksource_rating;
+};
+
+#endif /* __SH_TMU_H__ */
-- 
cgit v1.2.3-70-g09d2


From 46a12f7426d71cabc08972cf8d3ffdd441d26a3a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Sun, 3 May 2009 17:57:17 +0900
Subject: sh: Consolidate MTU2/CMT/TMU timer platform data.

All of the SH timers use a roughly identical structure for platform data,
which presently is broken out for each block. Consolidate all of these
definitions, as there is no reason for them to be broken out in the first
place.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh2/setup-sh7619.c  |  6 +++---
 arch/sh/kernel/cpu/sh2a/setup-mxg.c    |  8 ++++----
 arch/sh/kernel/cpu/sh2a/setup-sh7201.c |  8 ++++----
 arch/sh/kernel/cpu/sh2a/setup-sh7203.c | 11 +++++------
 arch/sh/kernel/cpu/sh2a/setup-sh7206.c | 13 ++++++-------
 arch/sh/kernel/cpu/sh4a/setup-sh7343.c |  4 ++--
 arch/sh/kernel/cpu/sh4a/setup-sh7366.c |  4 ++--
 arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 11 +++++------
 arch/sh/kernel/cpu/sh4a/setup-sh7723.c |  4 ++--
 arch/sh/kernel/cpu/sh4a/setup-sh7724.c |  4 ++--
 drivers/clocksource/sh_cmt.c           | 14 +++++++-------
 drivers/clocksource/sh_mtu2.c          | 14 +++++++-------
 drivers/clocksource/sh_tmu.c           | 14 +++++++-------
 include/linux/sh_cmt.h                 | 13 -------------
 include/linux/sh_mtu2.h                | 12 ------------
 include/linux/sh_timer.h               | 13 +++++++++++++
 include/linux/sh_tmu.h                 | 13 -------------
 17 files changed, 69 insertions(+), 97 deletions(-)
 delete mode 100644 include/linux/sh_cmt.h
 delete mode 100644 include/linux/sh_mtu2.h
 create mode 100644 include/linux/sh_timer.h
 delete mode 100644 include/linux/sh_tmu.h

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index d70c263eb07..94ac27fc223 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -111,7 +111,7 @@ static struct platform_device eth_device = {
 	.resource = eth_resources,
 };
 
-static struct sh_cmt_config cmt0_platform_data = {
+static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
@@ -143,7 +143,7 @@ static struct platform_device cmt0_device = {
 	.num_resources	= ARRAY_SIZE(cmt0_resources),
 };
 
-static struct sh_cmt_config cmt1_platform_data = {
+static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 870030aa05b..a452d964906 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 
 enum {
 	UNUSED = 0,
@@ -114,7 +114,7 @@ static struct intc_mask_reg mask_registers[] __initdata = {
 static DECLARE_INTC_DESC(intc_desc, "mxg", vectors, groups,
 			 mask_registers, prio_registers, NULL);
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -145,7 +145,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
@@ -176,7 +176,7 @@ static struct platform_device mtu2_1_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
 };
 
-static struct sh_mtu2_config mtu2_2_platform_data = {
+static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index 074aa9062e4..772358b7685 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -251,7 +251,7 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -282,7 +282,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
@@ -313,7 +313,7 @@ static struct platform_device mtu2_1_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
 };
 
-static struct sh_mtu2_config mtu2_2_platform_data = {
+static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 3448164b573..d7493418ba6 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -11,8 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_cmt.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -208,7 +207,7 @@ static struct platform_device sci_device = {
 	},
 };
 
-static struct sh_cmt_config cmt0_platform_data = {
+static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
@@ -240,7 +239,7 @@ static struct platform_device cmt0_device = {
 	.num_resources	= ARRAY_SIZE(cmt0_resources),
 };
 
-static struct sh_cmt_config cmt1_platform_data = {
+static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
@@ -272,7 +271,7 @@ static struct platform_device cmt1_device = {
 	.num_resources	= ARRAY_SIZE(cmt1_resources),
 };
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -303,7 +302,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index e700559b6b8..2fc6bff5c5f 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -12,8 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_cmt.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 
 enum {
@@ -168,7 +167,7 @@ static struct platform_device sci_device = {
 	},
 };
 
-static struct sh_cmt_config cmt0_platform_data = {
+static struct sh_timer_config cmt0_platform_data = {
 	.name = "CMT0",
 	.channel_offset = 0x02,
 	.timer_bit = 0,
@@ -200,7 +199,7 @@ static struct platform_device cmt0_device = {
 	.num_resources	= ARRAY_SIZE(cmt0_resources),
 };
 
-static struct sh_cmt_config cmt1_platform_data = {
+static struct sh_timer_config cmt1_platform_data = {
 	.name = "CMT1",
 	.channel_offset = 0x08,
 	.timer_bit = 1,
@@ -232,7 +231,7 @@ static struct platform_device cmt1_device = {
 	.num_resources	= ARRAY_SIZE(cmt1_resources),
 };
 
-static struct sh_mtu2_config mtu2_0_platform_data = {
+static struct sh_timer_config mtu2_0_platform_data = {
 	.name = "MTU2_0",
 	.channel_offset = -0x80,
 	.timer_bit = 0,
@@ -263,7 +262,7 @@ static struct platform_device mtu2_0_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_0_resources),
 };
 
-static struct sh_mtu2_config mtu2_1_platform_data = {
+static struct sh_timer_config mtu2_1_platform_data = {
 	.name = "MTU2_1",
 	.channel_offset = -0x100,
 	.timer_bit = 1,
@@ -294,7 +293,7 @@ static struct platform_device mtu2_1_device = {
 	.num_resources	= ARRAY_SIZE(mtu2_1_resources),
 };
 
-static struct sh_mtu2_config mtu2_2_platform_data = {
+static struct sh_timer_config mtu2_2_platform_data = {
 	.name = "MTU2_2",
 	.channel_offset = 0x80,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index cb5b4db1ca2..f327b7eaf47 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -12,7 +12,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 
 static struct resource iic0_resources[] = {
@@ -141,7 +141,7 @@ static struct platform_device jpu_device = {
 	.num_resources	= ARRAY_SIZE(jpu_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 2a771f48e9e..7361ea989b9 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -14,7 +14,7 @@
 #include <linux/serial.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 
 static struct resource iic_resources[] = {
@@ -148,7 +148,7 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 512735c5cc8..624e8e5a605 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -13,8 +13,7 @@
 #include <linux/serial_sci.h>
 #include <linux/mm.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
-#include <linux/sh_tmu.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -178,7 +177,7 @@ static struct platform_device jpu_device = {
 	.num_resources	= ARRAY_SIZE(jpu_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
@@ -210,7 +209,7 @@ static struct platform_device cmt_device = {
 	.num_resources	= ARRAY_SIZE(cmt_resources),
 };
 
-static struct sh_tmu_config tmu0_platform_data = {
+static struct sh_timer_config tmu0_platform_data = {
 	.name = "TMU0",
 	.channel_offset = 0x04,
 	.timer_bit = 0,
@@ -241,7 +240,7 @@ static struct platform_device tmu0_device = {
 	.num_resources	= ARRAY_SIZE(tmu0_resources),
 };
 
-static struct sh_tmu_config tmu1_platform_data = {
+static struct sh_timer_config tmu1_platform_data = {
 	.name = "TMU1",
 	.channel_offset = 0x10,
 	.timer_bit = 1,
@@ -272,7 +271,7 @@ static struct platform_device tmu1_device = {
 	.num_resources	= ARRAY_SIZE(tmu1_resources),
 };
 
-static struct sh_tmu_config tmu2_platform_data = {
+static struct sh_timer_config tmu2_platform_data = {
 	.name = "TMU2",
 	.channel_offset = 0x1c,
 	.timer_bit = 2,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index dbb44949ed1..fb9b5427a06 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -13,7 +13,7 @@
 #include <linux/mm.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 
@@ -101,7 +101,7 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 8429396acb5..e074951b88b 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -18,7 +18,7 @@
 #include <linux/mm.h>
 #include <linux/serial_sci.h>
 #include <linux/uio_driver.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 #include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
@@ -230,7 +230,7 @@ static struct platform_device veu1_device = {
 	.num_resources	= ARRAY_SIZE(veu1_resources),
 };
 
-static struct sh_cmt_config cmt_platform_data = {
+static struct sh_timer_config cmt_platform_data = {
 	.name = "CMT",
 	.channel_offset = 0x60,
 	.timer_bit = 5,
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 4ff1508e5ab..aeb8c9b27b5 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -28,7 +28,7 @@
 #include <linux/err.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/sh_cmt.h>
+#include <linux/sh_timer.h>
 
 struct sh_cmt_priv {
 	void __iomem *mapbase;
@@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(sh_cmt_lock);
 
 static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -83,7 +83,7 @@ static inline unsigned long sh_cmt_read(struct sh_cmt_priv *p, int reg_nr)
 static inline void sh_cmt_write(struct sh_cmt_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -131,7 +131,7 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_priv *p,
 
 static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -149,7 +149,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
 
 static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
 {
-	struct sh_cmt_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -560,7 +560,7 @@ int sh_cmt_register(struct sh_cmt_priv *p, char *name,
 
 static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 {
-	struct sh_cmt_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -638,7 +638,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 static int __devinit sh_cmt_probe(struct platform_device *pdev)
 {
 	struct sh_cmt_priv *p = platform_get_drvdata(pdev);
-	struct sh_cmt_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (p) {
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index 420566f4c50..ef02185a827 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/err.h>
 #include <linux/clockchips.h>
-#include <linux/sh_mtu2.h>
+#include <linux/sh_timer.h>
 
 struct sh_mtu2_priv {
 	void __iomem *mapbase;
@@ -63,7 +63,7 @@ static unsigned long mtu2_reg_offs[] = {
 
 static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -81,7 +81,7 @@ static inline unsigned long sh_mtu2_read(struct sh_mtu2_priv *p, int reg_nr)
 static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -100,7 +100,7 @@ static inline void sh_mtu2_write(struct sh_mtu2_priv *p, int reg_nr,
 
 static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -118,7 +118,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start)
 
 static int sh_mtu2_enable(struct sh_mtu2_priv *p)
 {
-	struct sh_mtu2_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -243,7 +243,7 @@ int sh_mtu2_register(struct sh_mtu2_priv *p, char *name,
 
 static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
 {
-	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -303,7 +303,7 @@ static int sh_mtu2_setup(struct sh_mtu2_priv *p, struct platform_device *pdev)
 static int __devinit sh_mtu2_probe(struct platform_device *pdev)
 {
 	struct sh_mtu2_priv *p = platform_get_drvdata(pdev);
-	struct sh_mtu2_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (p) {
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 21bd77aa6a3..d6ea4398bf6 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -29,7 +29,7 @@
 #include <linux/err.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/sh_tmu.h>
+#include <linux/sh_timer.h>
 
 struct sh_tmu_priv {
 	void __iomem *mapbase;
@@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(sh_tmu_lock);
 
 static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -69,7 +69,7 @@ static inline unsigned long sh_tmu_read(struct sh_tmu_priv *p, int reg_nr)
 static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
 				unsigned long value)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	void __iomem *base = p->mapbase;
 	unsigned long offs;
 
@@ -88,7 +88,7 @@ static inline void sh_tmu_write(struct sh_tmu_priv *p, int reg_nr,
 
 static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -106,7 +106,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start)
 
 static int sh_tmu_enable(struct sh_tmu_priv *p)
 {
-	struct sh_tmu_config *cfg = p->pdev->dev.platform_data;
+	struct sh_timer_config *cfg = p->pdev->dev.platform_data;
 	int ret;
 
 	/* enable clock */
@@ -345,7 +345,7 @@ static int sh_tmu_register(struct sh_tmu_priv *p, char *name,
 
 static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
 {
-	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	struct resource *res;
 	int irq, ret;
 	ret = -ENXIO;
@@ -407,7 +407,7 @@ static int sh_tmu_setup(struct sh_tmu_priv *p, struct platform_device *pdev)
 static int __devinit sh_tmu_probe(struct platform_device *pdev)
 {
 	struct sh_tmu_priv *p = platform_get_drvdata(pdev);
-	struct sh_tmu_config *cfg = pdev->dev.platform_data;
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (p) {
diff --git a/include/linux/sh_cmt.h b/include/linux/sh_cmt.h
deleted file mode 100644
index 68cacde5954..00000000000
--- a/include/linux/sh_cmt.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __SH_CMT_H__
-#define __SH_CMT_H__
-
-struct sh_cmt_config {
-	char *name;
-	unsigned long channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-	unsigned long clocksource_rating;
-};
-
-#endif /* __SH_CMT_H__ */
diff --git a/include/linux/sh_mtu2.h b/include/linux/sh_mtu2.h
deleted file mode 100644
index a98876340ff..00000000000
--- a/include/linux/sh_mtu2.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __SH_MTU2_H__
-#define __SH_MTU2_H__
-
-struct sh_mtu2_config {
-	char *name;
-	int channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-};
-
-#endif /* __SH_MTU2_H__ */
diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h
new file mode 100644
index 00000000000..864bd56bd3b
--- /dev/null
+++ b/include/linux/sh_timer.h
@@ -0,0 +1,13 @@
+#ifndef __SH_TIMER_H__
+#define __SH_TIMER_H__
+
+struct sh_timer_config {
+	char *name;
+	long channel_offset;
+	int timer_bit;
+	char *clk;
+	unsigned long clockevent_rating;
+	unsigned long clocksource_rating;
+};
+
+#endif /* __SH_TIMER_H__ */
diff --git a/include/linux/sh_tmu.h b/include/linux/sh_tmu.h
deleted file mode 100644
index b1195e8a37d..00000000000
--- a/include/linux/sh_tmu.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __SH_TMU_H__
-#define __SH_TMU_H__
-
-struct sh_tmu_config {
-	char *name;
-	unsigned long channel_offset;
-	int timer_bit;
-	char *clk;
-	unsigned long clockevent_rating;
-	unsigned long clocksource_rating;
-};
-
-#endif /* __SH_TMU_H__ */
-- 
cgit v1.2.3-70-g09d2


From accc5b4f902b0ba83b2c6c48f2d9e7c204cef4a8 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Sun, 3 May 2009 08:58:48 +0000
Subject: ipv4: remove unused macro (FIB_RES_RESET) from ip_fib.h.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 34855eede5f..ef91fe924ba 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -124,14 +124,12 @@ struct fib_result_nl {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 #define FIB_RES_NH(res)		((res).fi->fib_nh[(res).nh_sel])
-#define FIB_RES_RESET(res)	((res).nh_sel = 0)
 
 #define FIB_TABLE_HASHSZ 2
 
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
 
 #define FIB_RES_NH(res)		((res).fi->fib_nh[0])
-#define FIB_RES_RESET(res)
 
 #define FIB_TABLE_HASHSZ 256
 
-- 
cgit v1.2.3-70-g09d2


From 0d905bca23aca5c86a10ee101bcd3b1abbd40b25 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 4 May 2009 19:13:30 +0200
Subject: perf_counter: initialize the per-cpu context earlier

percpu scheduling for perfcounters wants to take the context lock,
but that lock first needs to be initialized. Currently it is an
early_initcall() - but that is too late, the task tick runs much
sooner than that.

Call it explicitly from the scheduler init sequence instead.

[ Impact: fix access-before-init crash ]

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 5 ++++-
 kernel/perf_counter.c        | 5 +----
 kernel/sched.c               | 5 ++++-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f776851f8c4..a356fa69796 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -573,6 +573,8 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
 
+extern void perf_counter_init(void);
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -600,9 +602,10 @@ perf_counter_mmap(unsigned long addr, unsigned long len,
 
 static inline void
 perf_counter_munmap(unsigned long addr, unsigned long len,
-		    unsigned long pgoff, struct file *file) 		{ }
+		    unsigned long pgoff, struct file *file)		{ }
 
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
+static inline void perf_counter_init(void)				{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b9679c36bcc..fcdafa234a5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3265,15 +3265,12 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = {
 	.notifier_call		= perf_cpu_notify,
 };
 
-static int __init perf_counter_init(void)
+void __init perf_counter_init(void)
 {
 	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
 	register_cpu_notifier(&perf_cpu_nb);
-
-	return 0;
 }
-early_initcall(perf_counter_init);
 
 static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f600e30dcf..a728976a3a6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,6 +39,7 @@
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
 #include <linux/debug_locks.h>
+#include <linux/perf_counter.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -8996,7 +8997,7 @@ void __init sched_init(void)
 		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
 		 * then A0's share of the cpu resource is:
 		 *
-		 * 	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
+		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
 		 *
 		 * We achieve this by letting init_task_group's tasks sit
 		 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
@@ -9097,6 +9098,8 @@ void __init sched_init(void)
 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
 #endif /* SMP */
 
+	perf_counter_init();
+
 	scheduler_running = 1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From bbd993077d788589a86a718ba7a7895ba5e71a17 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 May 2009 10:27:38 +0100
Subject: ASoC: Remove redundant codec pointer from DAIs

The DAI structure has two pointers to the codec, one in the body of the
DAI and one in a union for a parent pointer.  Drop the parent pointer
version.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index a997c2cac63..496dc30457b 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -205,11 +205,8 @@ struct snd_soc_dai {
 	/* DAI private data */
 	void *private_data;
 
-	/* parent codec/platform */
-	union {
-		struct snd_soc_codec *codec;
-		struct snd_soc_platform *platform;
-	};
+	/* parent platform */
+	struct snd_soc_platform *platform;
 
 	struct list_head list;
 };
-- 
cgit v1.2.3-70-g09d2


From 4bbe1ddf89a5ba3ec30fe5980912d8bda3a3cbb2 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 13 Oct 2008 03:07:14 +0200
Subject: ALSA: Add extra delay count in PCM

Added runtime->delay field to adjust the delayed samples for snd_pcm_delay().
Typically a hardware FIFO length is stored in this field, so that the
extra delay between hwptr and applptr can be computed.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     | 1 +
 sound/core/pcm_native.c | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c1729689161..267effddb07 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -270,6 +270,7 @@ struct snd_pcm_runtime {
 	snd_pcm_uframes_t hw_ptr_base;	/* Position at buffer restart */
 	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
 	unsigned long hw_ptr_jiffies;	/* Time when hw_ptr is updated */
+	snd_pcm_sframes_t delay;	/* extra delay; typically FIFO size */
 
 	/* -- HW params -- */
 	snd_pcm_access_t access;	/* access mode */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index fc6f98e257d..cb769d415db 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -587,14 +587,15 @@ int snd_pcm_status(struct snd_pcm_substream *substream,
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		status->avail = snd_pcm_playback_avail(runtime);
 		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING ||
-		    runtime->status->state == SNDRV_PCM_STATE_DRAINING)
+		    runtime->status->state == SNDRV_PCM_STATE_DRAINING) {
 			status->delay = runtime->buffer_size - status->avail;
-		else
+			status->delay += runtime->delay;
+		} else
 			status->delay = 0;
 	} else {
 		status->avail = snd_pcm_capture_avail(runtime);
 		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING)
-			status->delay = status->avail;
+			status->delay = status->avail + runtime->delay;
 		else
 			status->delay = 0;
 	}
@@ -2404,6 +2405,7 @@ static int snd_pcm_delay(struct snd_pcm_substream *substream,
 			n = snd_pcm_playback_hw_avail(runtime);
 		else
 			n = snd_pcm_capture_avail(runtime);
+		n += runtime->delay;
 		break;
 	case SNDRV_PCM_STATE_XRUN:
 		err = -EPIPE;
-- 
cgit v1.2.3-70-g09d2


From f0d2c681ac0a85142fc8abe65fc33fcad35cb9b7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 29 Apr 2009 13:43:37 -0400
Subject: ring-buffer: add counters for commit overrun and nmi dropped entries

The WARN_ON in the ring buffer when a commit is preempted and the
buffer is filled by preceding writes can happen in normal operations.
The WARN_ON makes it look like a bug, not to mention, because
it does not stop tracing and calls printk which can also recurse, this
is prone to deadlock (the WARN_ON is not in a position to recurse).

This patch removes the WARN_ON and replaces it with a counter that
can be retrieved by a tracer. This counter is called commit_overrun.

While at it, I added a nmi_dropped counter to count any time an NMI entry
is dropped because the NMI could not take the spinlock.

[ Impact: prevent deadlock by printing normal case warning ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  2 ++
 kernel/trace/ring_buffer.c  | 52 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 51 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 1c2f80911fb..f1345828c7c 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -153,6 +153,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3e86da9b2a0..26e1359fe19 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -402,6 +402,8 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
+	unsigned long			nmi_dropped;
+	unsigned long			commit_overrun;
 	unsigned long			overrun;
 	unsigned long			entries;
 	u64				write_stamp;
@@ -1216,8 +1218,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * simply fail.
 		 */
 		if (unlikely(in_nmi())) {
-			if (!__raw_spin_trylock(&cpu_buffer->lock))
+			if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+				cpu_buffer->nmi_dropped++;
 				goto out_reset;
+			}
 		} else
 			__raw_spin_lock(&cpu_buffer->lock);
 
@@ -1238,8 +1242,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * about it.
 		 */
 		if (unlikely(next_page == commit_page)) {
-			/* This can easily happen on small ring buffers */
-			WARN_ON_ONCE(buffer->pages > 2);
+			cpu_buffer->commit_overrun++;
 			goto out_reset;
 		}
 
@@ -1925,6 +1928,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
+/**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->nmi_dropped;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->commit_overrun;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+
 /**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
@@ -2595,6 +2639,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
+	cpu_buffer->nmi_dropped = 0;
+	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
 
-- 
cgit v1.2.3-70-g09d2


From c66de4a5be7913247bd83d79168f8e4420c9cfbc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:22 +0200
Subject: perf_counter: uncouple data_head updates from wakeups

Keep data_head up-to-date irrespective of notifications. This fixes
the case where you disable a counter and don't get a notification for
the last few pending events, and it also allows polling usage.

[ Impact: increase precision of perfcounter mmap-ed fields ]

Suggested-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090505155436.925084300@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 +++-
 kernel/perf_counter.c        | 20 +++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a356fa69796..17b63105f2a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -362,9 +362,11 @@ struct perf_mmap_data {
 	atomic_t			head;		/* write position    */
 	atomic_t			events;		/* event limit       */
 
-	atomic_t			wakeup_head;	/* completed head    */
+	atomic_t			done_head;	/* completed head    */
 	atomic_t			lock;		/* concurrent writes */
 
+	atomic_t			wakeup;		/* needs a wakeup    */
+
 	struct perf_counter_mmap_page   *user_page;
 	void 				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5f86a1156c9..ba5e921e1f3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1696,7 +1696,6 @@ struct perf_output_handle {
 	struct perf_mmap_data	*data;
 	unsigned int		offset;
 	unsigned int		head;
-	int			wakeup;
 	int			nmi;
 	int			overflow;
 	int			locked;
@@ -1752,8 +1751,7 @@ static void perf_output_unlock(struct perf_output_handle *handle)
 	struct perf_mmap_data *data = handle->data;
 	int head, cpu;
 
-	if (handle->wakeup)
-		data->wakeup_head = data->head;
+	data->done_head = data->head;
 
 	if (!handle->locked)
 		goto out;
@@ -1764,13 +1762,11 @@ again:
 	 * before we publish the new head, matched by a rmb() in userspace when
 	 * reading this position.
 	 */
-	while ((head = atomic_xchg(&data->wakeup_head, 0))) {
+	while ((head = atomic_xchg(&data->done_head, 0)))
 		data->user_page->data_head = head;
-		handle->wakeup = 1;
-	}
 
 	/*
-	 * NMI can happen here, which means we can miss a wakeup_head update.
+	 * NMI can happen here, which means we can miss a done_head update.
 	 */
 
 	cpu = atomic_xchg(&data->lock, 0);
@@ -1779,7 +1775,7 @@ again:
 	/*
 	 * Therefore we have to validate we did not indeed do so.
 	 */
-	if (unlikely(atomic_read(&data->wakeup_head))) {
+	if (unlikely(atomic_read(&data->done_head))) {
 		/*
 		 * Since we had it locked, we can lock it again.
 		 */
@@ -1789,7 +1785,7 @@ again:
 		goto again;
 	}
 
-	if (handle->wakeup)
+	if (atomic_xchg(&data->wakeup, 0))
 		perf_output_wakeup(handle);
 out:
 	local_irq_restore(handle->flags);
@@ -1824,7 +1820,9 @@ static int perf_output_begin(struct perf_output_handle *handle,
 
 	handle->offset	= offset;
 	handle->head	= head;
-	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
+
+	if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
+		atomic_set(&data->wakeup, 1);
 
 	return 0;
 
@@ -1882,7 +1880,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 		int events = atomic_inc_return(&data->events);
 		if (events >= wakeup_events) {
 			atomic_sub(wakeup_events, &data->events);
-			handle->wakeup = 1;
+			atomic_set(&data->wakeup, 1);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 6de6a7b95705b859b61430fa3afa1403034eb3e6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:23 +0200
Subject: perf_counter: add ioctl(PERF_COUNTER_IOC_RESET)

Provide a way to reset an existing counter - this eases PAPI
libraries around perfcounters.

Similar to read() it doesn't collapse pending child counters.

[ Impact: new perfcounter fd ioctl method to reset counters ]

Suggested-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090505155437.022272933@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 1 +
 kernel/perf_counter.c        | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 17b63105f2a..0fcbf34a4f7 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -160,6 +160,7 @@ struct perf_counter_hw_event {
 #define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
 #define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
 #define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 
 /*
  * Structure of the page that can be mapped via mmap
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ba5e921e1f3..6e6834e0587 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1288,6 +1288,11 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
+static void perf_counter_reset(struct perf_counter *counter)
+{
+	atomic_set(&counter->count, 0);
+}
+
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
@@ -1303,6 +1308,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_COUNTER_IOC_REFRESH:
 		perf_counter_refresh(counter, arg);
 		break;
+	case PERF_COUNTER_IOC_RESET:
+		perf_counter_reset(counter);
+		break;
 	default:
 		err = -ENOTTY;
 	}
-- 
cgit v1.2.3-70-g09d2


From c5078f78b455fbf67ea71442c7e7ca8acf9ff095 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 May 2009 17:50:24 +0200
Subject: perf_counter: provide an mlock threshold

Provide a threshold to relax the mlock accounting, increasing usability.

Each counter gets perf_counter_mlock_kb for free.

[ Impact: allow more mmap buffering ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090505155437.112113632@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 15 +++++++++++----
 kernel/sysctl.c              |  8 ++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0fcbf34a4f7..00081d84169 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -358,6 +358,7 @@ struct file;
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;	/* nr of data pages  */
+	int				nr_locked;	/* nr pages mlocked  */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 	atomic_t			head;		/* write position    */
@@ -575,6 +576,7 @@ struct perf_callchain_entry {
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
+extern int sysctl_perf_counter_mlock;
 
 extern void perf_counter_init(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 6e6834e0587..2d134273830 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -44,6 +44,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1461,7 +1462,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
-		vma->vm_mm->locked_vm -= counter->data->nr_pages + 1;
+		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
 	}
@@ -1480,6 +1481,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long nr_pages;
 	unsigned long locked, lock_limit;
 	int ret = 0;
+	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1507,8 +1509,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	locked = vma->vm_mm->locked_vm;
-	locked += nr_pages + 1;
+	extra = nr_pages /* + 1 only account the data pages */;
+	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	if (extra < 0)
+		extra = 0;
+
+	locked = vma->vm_mm->locked_vm + extra;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
@@ -1524,7 +1530,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
-	vma->vm_mm->locked_vm += nr_pages + 1;
+	vma->vm_mm->locked_vm += extra;
+	counter->data->nr_locked = extra;
 unlock:
 	mutex_unlock(&counter->mmap_mutex);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8203d70928d..3b05c2b088d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -920,6 +920,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_mlock_kb",
+		.data		= &sysctl_perf_counter_mlock,
+		.maxlen		= sizeof(sysctl_perf_counter_mlock),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 /*
  * NOTE: do not add new entries to this table unless you have read
-- 
cgit v1.2.3-70-g09d2


From f001fde5eadd915f4858d22ed70d7040f48767cf Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 5 May 2009 02:48:28 +0000
Subject: net: introduce a list of device addresses dev_addr_list (v6)

v5 -> v6 (current):
-removed so far unused static functions
-corrected dev_addr_del_multiple to call del instead of add

v4 -> v5:
-added device address type (suggested by davem)
-removed refcounting (better to have simplier code then safe potentially few
 bytes)

v3 -> v4:
-changed kzalloc to kmalloc in __hw_addr_add_ii()
-ASSERT_RTNL() avoided in dev_addr_flush() and dev_addr_init()

v2 -> v3:
-removed unnecessary rcu read locking
-moved dev_addr_flush() calling to ensure no null dereference of dev_addr

v1 -> v2:
-added forgotten ASSERT_RTNL to dev_addr_init and dev_addr_flush
-removed unnecessary rcu_read locking in dev_addr_init
-use compare_ether_addr_64bits instead of compare_ether_addr
-use L1_CACHE_BYTES as size for allocating struct netdev_hw_addr
-use call_rcu instead of rcu_synchronize
-moved is_etherdev_addr into __KERNEL__ ifdef

This patch introduces a new list in struct net_device and brings a set of
functions to handle the work with device address list. The list is a replacement
for the original dev_addr field and because in some situations there is need to
carry several device addresses with the net device. To be backward compatible,
dev_addr is made to point to the first member of the list so original drivers
sees no difference.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  27 +++++
 include/linux/netdevice.h   |  37 ++++++-
 net/core/dev.c              | 250 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 312 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index a1f17abba7d..3d7a6687d24 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -182,6 +182,33 @@ static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2],
 	return compare_ether_addr(addr1, addr2);
 #endif
 }
+
+/**
+ * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
+ * @dev: Pointer to a device structure
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Compare passed address with all addresses of the device. Return true if the
+ * address if one of the device addresses.
+ *
+ * Note that this function calls compare_ether_addr_64bits() so take care of
+ * the right padding.
+ */
+static inline bool is_etherdev_addr(const struct net_device *dev,
+				    const u8 addr[6 + 2])
+{
+	struct netdev_hw_addr *ha;
+	int res = 1;
+
+	rcu_read_lock();
+	for_each_dev_addr(dev, ha) {
+		res = compare_ether_addr_64bits(addr, ha->addr);
+		if (!res)
+			break;
+	}
+	rcu_read_unlock();
+	return !res;
+}
 #endif	/* __KERNEL__ */
 
 /**
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ff42aba403c..02882e2ebd4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -210,6 +210,16 @@ struct dev_addr_list
 #define dmi_users	da_users
 #define dmi_gusers	da_gusers
 
+struct netdev_hw_addr {
+	struct list_head	list;
+	unsigned char		addr[MAX_ADDR_LEN];
+	unsigned char		type;
+#define NETDEV_HW_ADDR_T_LAN	1
+#define NETDEV_HW_ADDR_T_SAN	2
+#define NETDEV_HW_ADDR_T_SLAVE	3
+	struct rcu_head		rcu_head;
+};
+
 struct hh_cache
 {
 	struct hh_cache *hh_next;	/* Next entry			     */
@@ -784,8 +794,11 @@ struct net_device
  */
 	unsigned long		last_rx;	/* Time of last Rx	*/
 	/* Interface address info used in eth_type_trans() */
-	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast
-							   because most packets are unicast) */
+	unsigned char		*dev_addr;	/* hw address, (before bcast
+						   because most packets are
+						   unicast) */
+
+	struct list_head	dev_addr_list; /* list of device hw addresses */
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
@@ -1791,6 +1804,13 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 	spin_unlock_bh(&dev->addr_list_lock);
 }
 
+/*
+ * dev_addr_list walker. Should be used only for read access. Call with
+ * rcu_read_lock held.
+ */
+#define for_each_dev_addr(dev, ha) \
+		list_for_each_entry_rcu(ha, &dev->dev_addr_list, list)
+
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
 extern void		ether_setup(struct net_device *dev);
@@ -1803,6 +1823,19 @@ extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	alloc_netdev_mq(sizeof_priv, name, setup, 1)
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
+
+/* Functions used for device addresses handling */
+extern int dev_addr_add(struct net_device *dev, unsigned char *addr,
+			unsigned char addr_type);
+extern int dev_addr_del(struct net_device *dev, unsigned char *addr,
+			unsigned char addr_type);
+extern int dev_addr_add_multiple(struct net_device *to_dev,
+				 struct net_device *from_dev,
+				 unsigned char addr_type);
+extern int dev_addr_del_multiple(struct net_device *to_dev,
+				 struct net_device *from_dev,
+				 unsigned char addr_type);
+
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 3c8073fe970..637ea71b0a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3434,6 +3434,252 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
+/* hw addresses list handling functions */
+
+static int __hw_addr_add(struct list_head *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	int alloc_size;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	alloc_size = sizeof(*ha);
+	if (alloc_size < L1_CACHE_BYTES)
+		alloc_size = L1_CACHE_BYTES;
+	ha = kmalloc(alloc_size, GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->type = addr_type;
+	list_add_tail_rcu(&ha->list, list);
+	return 0;
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+	struct netdev_hw_addr *ha;
+
+	ha = container_of(head, struct netdev_hw_addr, rcu_head);
+	kfree(ha);
+}
+
+static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
+			    int addr_len, unsigned char addr_type,
+			    int ignore_index)
+{
+	struct netdev_hw_addr *ha;
+	int i = 0;
+
+	list_for_each_entry(ha, list, list) {
+		if (i++ != ignore_index &&
+		    !memcmp(ha->addr, addr, addr_len) &&
+		    (ha->type == addr_type || !addr_type)) {
+			list_del_rcu(&ha->list);
+			call_rcu(&ha->rcu_head, ha_rcu_free);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_add_multiple_ii(struct list_head *to_list,
+				     struct list_head *from_list,
+				     int addr_len, unsigned char addr_type,
+				     int ignore_index)
+{
+	int err;
+	struct netdev_hw_addr *ha, *ha2;
+	unsigned char type;
+
+	list_for_each_entry(ha, from_list, list) {
+		type = addr_type ? addr_type : ha->type;
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		if (err)
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	list_for_each_entry(ha2, from_list, list) {
+		if (ha2 == ha)
+			break;
+		type = addr_type ? addr_type : ha2->type;
+		__hw_addr_del_ii(to_list, ha2->addr, addr_len, type,
+				 ignore_index);
+	}
+	return err;
+}
+
+static void __hw_addr_del_multiple_ii(struct list_head *to_list,
+				      struct list_head *from_list,
+				      int addr_len, unsigned char addr_type,
+				      int ignore_index)
+{
+	struct netdev_hw_addr *ha;
+	unsigned char type;
+
+	list_for_each_entry(ha, from_list, list) {
+		type = addr_type ? addr_type : ha->type;
+		__hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type,
+				 ignore_index);
+	}
+}
+
+static void __hw_addr_flush(struct list_head *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, list, list) {
+		list_del_rcu(&ha->list);
+		call_rcu(&ha->rcu_head, ha_rcu_free);
+	}
+}
+
+/* Device addresses handling functions */
+
+static void dev_addr_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->dev_addr_list);
+	dev->dev_addr = NULL;
+}
+
+static int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	/* rtnl_mutex must be held here */
+
+	INIT_LIST_HEAD(&dev->dev_addr_list);
+	memset(addr, 0, sizeof(*addr));
+	err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr),
+			    NETDEV_HW_ADDR_T_LAN);
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		ha = list_first_entry(&dev->dev_addr_list,
+				      struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+	}
+	return err;
+}
+
+/**
+ *	dev_addr_add	- Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *	@addr_type: address type
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len,
+			    addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del	- Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *	@addr_type: address type
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len,
+			       addr_type, 0);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple	- Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Add device addresses of the one device to another.
+ **
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
+					&from_dev->dev_addr_list,
+					to_dev->addr_len, addr_type, 0);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple	- Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device by which addresses the addresses will be deleted
+ *	@addr_type: address type - 0 means type will used from from_dev
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_del_multiple_ii(&to_dev->dev_addr_list,
+				  &from_dev->dev_addr_list,
+				  to_dev->addr_len, addr_type, 0);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/* unicast and multicast addresses handling functions */
+
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
 		      void *addr, int alen, int glbl)
 {
@@ -4776,6 +5022,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
+	dev_addr_init(dev);
 	netdev_init_queues(dev);
 
 	INIT_LIST_HEAD(&dev->napi_list);
@@ -4801,6 +5048,9 @@ void free_netdev(struct net_device *dev)
 
 	kfree(dev->_tx);
 
+	/* Flush device addresses */
+	dev_addr_flush(dev);
+
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
-- 
cgit v1.2.3-70-g09d2


From 2df75e415709ad12862028916c772c1f377f6a7c Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 6 May 2009 10:33:04 +0800
Subject: tracing/events: fix memory leak when unloading module

When unloading a module, memory allocated by init_preds() and
trace_define_field() is not freed.

[ Impact: fix memory leak ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <4A00F6E0.3040503@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  1 +
 kernel/trace/trace_events.c        | 18 ++++++++++++++++++
 kernel/trace/trace_events_filter.c | 22 +++++++++++++++-------
 3 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5fff40c9ff5..662c1becf36 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -116,6 +116,7 @@ struct ftrace_event_call {
 #define MAX_FILTER_STR_VAL	128
 
 extern int init_preds(struct ftrace_event_call *call);
+extern void destroy_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern int filter_current_check_discard(struct ftrace_event_call *call,
 					void *rec,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f789ca540fe..f251a150e75 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -60,6 +60,22 @@ err:
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
+#ifdef CONFIG_MODULES
+
+static void trace_destroy_fields(struct ftrace_event_call *call)
+{
+	struct ftrace_event_field *field, *next;
+
+	list_for_each_entry_safe(field, next, &call->fields, link) {
+		list_del(&field->link);
+		kfree(field->type);
+		kfree(field->name);
+		kfree(field);
+	}
+}
+
+#endif /* CONFIG_MODULES */
+
 static void ftrace_clear_events(void)
 {
 	struct ftrace_event_call *call;
@@ -925,6 +941,8 @@ static void trace_module_remove_events(struct module *mod)
 				unregister_ftrace_event(call->event);
 			debugfs_remove_recursive(call->dir);
 			list_del(&call->list);
+			trace_destroy_fields(call);
+			destroy_preds(call);
 		}
 	}
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f49486687ee..ce07b818671 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -346,6 +346,20 @@ static void filter_disable_preds(struct ftrace_event_call *call)
 		filter->preds[i]->fn = filter_pred_none;
 }
 
+void destroy_preds(struct ftrace_event_call *call)
+{
+	struct event_filter *filter = call->filter;
+	int i;
+
+	for (i = 0; i < MAX_FILTER_PRED; i++) {
+		if (filter->preds[i])
+			filter_free_pred(filter->preds[i]);
+	}
+	kfree(filter->preds);
+	kfree(filter);
+	call->filter = NULL;
+}
+
 int init_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter;
@@ -374,13 +388,7 @@ int init_preds(struct ftrace_event_call *call)
 	return 0;
 
 oom:
-	for (i = 0; i < MAX_FILTER_PRED; i++) {
-		if (filter->preds[i])
-			filter_free_pred(filter->preds[i]);
-	}
-	kfree(filter->preds);
-	kfree(call->filter);
-	call->filter = NULL;
+	destroy_preds(call);
 
 	return -ENOMEM;
 }
-- 
cgit v1.2.3-70-g09d2


From 22cfbbfd9f67b67fe073010f51cb71d3632387d5 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 6 May 2009 11:43:57 +0200
Subject: ntp: adjust SHIFT_PLL to improve NTP convergence

The conversion to the ntpv4 reference model
f19923937321244e7dc334767eb4b67e0e3d5c74 ("ntp: convert to the NTP4
reference model") in 2.6.19 added nanosecond resolution the adjtimex
interface, but also changed the "stiffness" of the frequency adjustments,
causing NTP convergence time to greatly increase.

SHIFT_PLL, which reduces the stiffness of the freq adjustments, was
designed to be inversely linked to HZ, and the reference value of 4 was
designed for Unix systems using HZ=100.  However Linux's clock steering
code mostly independent of HZ.

So this patch reduces the SHIFT_PLL value from 4 to 2, which causes NTPd
behavior to match kernels prior to 2.6.19, greatly reducing convergence
times, and improving close synchronization through environmental thermal
changes.

The patch also changes some l's to L's in nearby code to avoid misreading
50l as 501.

[ Impact: tweak NTP algorithm for faster convergence ]

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: zippel@linux-m68k.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <200905051956.n45JuVo9025575@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/timex.h | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index aa3475fcff6..0daf9611ef4 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -170,17 +170,37 @@ struct timex {
 #include <asm/timex.h>
 
 /*
- * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
- * for a slightly underdamped convergence characteristic. SHIFT_KH
- * establishes the damping of the FLL and is chosen by wisdom and black
- * art.
+ * SHIFT_PLL is used as a dampening factor to define how much we
+ * adjust the frequency correction for a given offset in PLL mode.
+ * It also used in dampening the offset correction, to define how
+ * much of the current value in time_offset we correct for each
+ * second. Changing this value changes the stiffness of the ntp
+ * adjustment code. A lower value makes it more flexible, reducing
+ * NTP convergence time. A higher value makes it stiffer, increasing
+ * convergence time, but making the clock more stable.
  *
- * MAXTC establishes the maximum time constant of the PLL. With the
- * SHIFT_KG and SHIFT_KF values given and a time constant range from
- * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
- * respectively.
+ * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 4.
+ * However this seems to increase convergence time much too long.
+ *
+ * https://lists.ntp.org/pipermail/hackers/2008-January/003487.html
+ *
+ * In the above mailing list discussion, it seems the value of 4
+ * was appropriate for other Unix systems with HZ=100, and that
+ * SHIFT_PLL should be decreased as HZ increases. However, Linux's
+ * clock steering implementation is HZ independent.
+ *
+ * Through experimentation, a SHIFT_PLL value of 2 was found to allow
+ * for fast convergence (very similar to the NTPv3 code used prior to
+ * v2.6.19), with good clock stability.
+ *
+ *
+ * SHIFT_FLL is used as a dampening factor to define how much we
+ * adjust the frequency correction for a given offset in FLL mode.
+ * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 2.
+ *
+ * MAXTC establishes the maximum time constant of the PLL.
  */
-#define SHIFT_PLL	4	/* PLL frequency factor (shift) */
+#define SHIFT_PLL	2	/* PLL frequency factor (shift) */
 #define SHIFT_FLL	2	/* FLL frequency factor (shift) */
 #define MAXTC		10	/* maximum time constant (shift) */
 
@@ -192,10 +212,10 @@ struct timex {
 #define SHIFT_USEC 16		/* frequency offset scale (shift) */
 #define PPM_SCALE ((s64)NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
 #define PPM_SCALE_INV_SHIFT 19
-#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
+#define PPM_SCALE_INV ((1LL << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
 		       PPM_SCALE + 1)
 
-#define MAXPHASE 500000000l	/* max phase error (ns) */
+#define MAXPHASE 500000000L	/* max phase error (ns) */
 #define MAXFREQ 500000		/* max frequency error (ns/s) */
 #define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT)
 #define MINSEC 256		/* min interval between updates (s) */
-- 
cgit v1.2.3-70-g09d2


From de1d7286060430e79a1d50ad6e5fee8fe863c5f6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Tue, 5 May 2009 16:49:59 +0800
Subject: tracepoint: trace_sched_migrate_task(): remove parameter

The orig_cpu parameter in trace_sched_migrate_task() is not necessary,
it can be got by using task_cpu(p) in the probe.

[ Impact: micro-optimization ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
[ modified from Mathieu's patch. The original patch is at:
  http://marc.info/?l=linux-kernel&m=123791201716239&w=2 ]
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: fweisbec@gmail.com
Cc: rostedt@goodmis.org
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: zhaolei@cn.fujitsu.com
Cc: laijs@cn.fujitsu.com
LKML-Reference: <49FFFDB7.1050402@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 6 +++---
 kernel/sched.c               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index ffa1cab586b..dd4033cf5b0 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -180,9 +180,9 @@ TRACE_EVENT(sched_switch,
  */
 TRACE_EVENT(sched_migrate_task,
 
-	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+	TP_PROTO(struct task_struct *p, int dest_cpu),
 
-	TP_ARGS(p, orig_cpu, dest_cpu),
+	TP_ARGS(p, dest_cpu),
 
 	TP_STRUCT__entry(
 		__array(	char,	comm,	TASK_COMM_LEN	)
@@ -196,7 +196,7 @@ TRACE_EVENT(sched_migrate_task,
 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
-		__entry->orig_cpu	= orig_cpu;
+		__entry->orig_cpu	= task_cpu(p);
 		__entry->dest_cpu	= dest_cpu;
 	),
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 9f7ffd00b6e..9cdedbd181c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1954,7 +1954,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	clock_offset = old_rq->clock - new_rq->clock;
 
-	trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+	trace_sched_migrate_task(p, new_cpu);
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
-- 
cgit v1.2.3-70-g09d2


From a42aaa3bbce85ac487ad4fad5db99e8e91b7aac1 Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Mon, 4 May 2009 16:27:26 -0400
Subject: blktrace: correct remap names

This attempts to clarify names utilized during block I/O remap
operations (partition, volume manager). It correctly matches up the
/from/ information for both device & sector. This takes in the concept
from Kosaki Motohiro and extends it to include better naming for the
"device_from" field.

[ Impact: cleanup ]

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <49FF4FAE.3000301@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/blktrace_api.h |  4 ++--
 include/trace/block.h        |  4 ++--
 kernel/trace/blktrace.c      | 24 ++++++++++++------------
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 62763c95285..82b4636030e 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -116,9 +116,9 @@ struct blk_io_trace {
  * The remap event
  */
 struct blk_io_trace_remap {
-	__be32 device;
 	__be32 device_from;
-	__be64 sector;
+	__be32 device_to;
+	__be64 sector_from;
 };
 
 enum {
diff --git a/include/trace/block.h b/include/trace/block.h
index 25b7068b819..87f6456fd32 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
 
 DECLARE_TRACE(block_remap,
 	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t from, sector_t to),
-	      TP_ARGS(q, bio, dev, from, to));
+		 sector_t to, sector_t from),
+	      TP_ARGS(q, bio, dev, to, from));
 
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c32062bd10b..f8d46d6f5d3 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -830,8 +830,8 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  * @q:		queue the io is for
  * @bio:	the source bio
  * @dev:	target device
- * @from:	source sector
  * @to:		target sector
+ * @from:	source sector
  *
  * Description:
  *     Device mapper or raid target sometimes need to split a bio because
@@ -839,7 +839,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t from, sector_t to)
+				       dev_t dev, sector_t to, sector_t from)
 {
 	struct blk_trace *bt = q->blk_trace;
 	struct blk_io_trace_remap r;
@@ -847,9 +847,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 	if (likely(!bt))
 		return;
 
-	r.device = cpu_to_be32(dev);
-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-	r.sector = cpu_to_be64(to);
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
+	r.sector_from = cpu_to_be64(from);
 
 	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
 			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
@@ -1028,11 +1028,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
 			  struct blk_io_trace_remap *r)
 {
 	const struct blk_io_trace_remap *__r = pdu_start(ent);
-	__u64 sector = __r->sector;
+	__u64 sector_from = __r->sector_from;
 
-	r->device = be32_to_cpu(__r->device);
 	r->device_from = be32_to_cpu(__r->device_from);
-	r->sector = be64_to_cpu(sector);
+	r->device_to   = be32_to_cpu(__r->device_to);
+	r->sector_from = be64_to_cpu(sector_from);
 }
 
 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1148,13 +1148,13 @@ static int blk_log_with_error(struct trace_seq *s,
 
 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
 {
-	struct blk_io_trace_remap r = { .device = 0, };
+	struct blk_io_trace_remap r = { .device_from = 0, };
 
 	get_pdu_remap(ent, &r);
 	return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
-			       t_sector(ent),
-			       t_sec(ent), MAJOR(r.device), MINOR(r.device),
-			       (unsigned long long)r.sector);
+				t_sector(ent), t_sec(ent),
+				MAJOR(r.device_from), MINOR(r.device_from),
+				(unsigned long long)r.sector_from);
 }
 
 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
-- 
cgit v1.2.3-70-g09d2


From 22a7c31a9659deaddafbbcec6562d44141e84474 Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Mon, 4 May 2009 16:35:08 -0400
Subject: blktrace: from-sector redundant in trace_block_remap

Remove redundant from-sector parameter: it's /always/ the bio's sector
passed in.

[ Impact: cleanup ]

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <49FF517C.7000503@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 block/blk-core.c        | 5 ++---
 drivers/md/dm.c         | 3 +--
 include/trace/block.h   | 4 ++--
 kernel/trace/blktrace.c | 8 ++++----
 4 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 07ab75403e1..a5f747a8312 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1275,7 +1275,7 @@ static inline void blk_partition_remap(struct bio *bio)
 		bio->bi_bdev = bdev->bd_contains;
 
 		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-				    bdev->bd_dev, bio->bi_sector,
+				    bdev->bd_dev,
 				    bio->bi_sector - p->start_sect);
 	}
 }
@@ -1444,8 +1444,7 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 
 		if (old_sector != -1)
-			trace_block_remap(q, bio, old_dev, bio->bi_sector,
-					    old_sector);
+			trace_block_remap(q, bio, old_dev, old_sector);
 
 		trace_block_bio_queue(q, bio);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8a994be035b..b01514afb6b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -657,8 +657,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 		/* the bio has been remapped so dispatch it */
 
 		trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev,
-				    clone->bi_sector, sector);
+				    tio->io->bio->bi_bdev->bd_dev, sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/include/trace/block.h b/include/trace/block.h
index 87f6456fd32..8ac945b7746 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
 
 DECLARE_TRACE(block_remap,
 	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t to, sector_t from),
-	      TP_ARGS(q, bio, dev, to, from));
+		 sector_t to),
+	      TP_ARGS(q, bio, dev, to));
 
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index f8d46d6f5d3..e099f8cc1d1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -830,7 +830,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  * @q:		queue the io is for
  * @bio:	the source bio
  * @dev:	target device
- * @to:		target sector
  * @from:	source sector
  *
  * Description:
@@ -839,7 +838,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-				       dev_t dev, sector_t to, sector_t from)
+				       dev_t dev, sector_t from)
 {
 	struct blk_trace *bt = q->blk_trace;
 	struct blk_io_trace_remap r;
@@ -851,8 +850,9 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
 	r.sector_from = cpu_to_be64(from);
 
-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
-			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+			BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
+			sizeof(r), &r);
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 9ccebe6148bcb0aba2d89743df2ff182ced505ec Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 23 Apr 2009 10:32:36 +0200
Subject: mac80211: rename max_sleep_interval to max_sleep_period

Kalle points out that max_sleep_interval is somewhat confusing
because the value is measured in beacon intervals, and not in
TU. Rename it to max_sleep_period to be consistent with things
like DTIM period that are also measured in beacon intervals.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 4 ++--
 net/mac80211/mlme.c    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 446dbf75a1c..81d706d8522 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -559,7 +559,7 @@ enum ieee80211_conf_changed {
  * @beacon_int: beacon interval (TODO make interface config)
  *
  * @listen_interval: listen interval in units of beacon interval
- * @max_sleep_interval: the maximum number of beacon intervals to sleep for
+ * @max_sleep_period: the maximum number of beacon intervals to sleep for
  *	before checking the beacon for a TIM bit (managed mode only); this
  *	value will be only achievable between DTIM frames, the hardware
  *	needs to check for the multicast traffic bit in DTIM beacons.
@@ -584,7 +584,7 @@ struct ieee80211_conf {
 	int beacon_int;
 	u32 flags;
 	int power_level, dynamic_ps_timeout;
-	int max_sleep_interval;
+	int max_sleep_period;
 
 	u16 listen_interval;
 	bool radio_enabled;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2029b71eb87..f8925ca7c8f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -562,7 +562,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 				maxslp = min_t(int, dtimper,
 						    latency / beaconint_us);
 
-			local->hw.conf.max_sleep_interval = maxslp;
+			local->hw.conf.max_sleep_period = maxslp;
 			local->ps_sdata = found;
 		}
 	} else {
-- 
cgit v1.2.3-70-g09d2


From 57c4d7b4c4986037be51476b8e3025d5ba18d8b8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 23 Apr 2009 16:10:04 +0200
Subject: mac80211: clean up beacon interval settings

We currently have two beacon interval configuration knobs:
hw.conf.beacon_int and vif.bss_info.beacon_int. This is
rather confusing, even though the former is used when we
beacon ourselves and the latter when we are associated to
an AP.

This just deprecates the hw.conf.beacon_int setting in favour
of always using vif.bss_info.beacon_int. Since it touches all
the beaconing IBSS code anyway, we can also add support for
the cfg80211 IBSS beacon interval configuration easily.

NOTE: The hw.conf.beacon_int setting is retained for now due
      to drivers still using it -- I couldn't untangle all
      drivers, some are updated in this patch.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ar9170/mac.c       |  6 +++---
 drivers/net/wireless/ath/ar9170/main.c      |  5 ++++-
 drivers/net/wireless/ath/ath5k/base.c       |  5 ++++-
 drivers/net/wireless/ath/ath9k/ath9k.h      |  2 ++
 drivers/net/wireless/ath/ath9k/beacon.c     |  9 +++------
 drivers/net/wireless/ath/ath9k/main.c       | 22 ++++++++++++----------
 drivers/net/wireless/ath/ath9k/xmit.c       |  2 +-
 drivers/net/wireless/b43/main.c             | 12 +++++++-----
 drivers/net/wireless/b43legacy/main.c       | 10 +++++-----
 drivers/net/wireless/iwlwifi/iwl-agn.c      |  3 ++-
 drivers/net/wireless/iwlwifi/iwl-core.c     |  3 +--
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  5 ++---
 drivers/net/wireless/libertas_tf/main.c     |  2 +-
 drivers/net/wireless/mac80211_hwsim.c       | 20 ++++++++++++--------
 include/net/mac80211.h                      | 14 ++++++++++++--
 net/mac80211/cfg.c                          | 17 +++++------------
 net/mac80211/ibss.c                         | 25 +++++++++++++++++--------
 net/mac80211/main.c                         | 17 +++++++++++------
 net/mac80211/mlme.c                         |  1 +
 net/mac80211/sta_info.c                     | 12 ++++++------
 net/mac80211/tx.c                           |  2 +-
 21 files changed, 112 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ar9170/mac.c b/drivers/net/wireless/ath/ar9170/mac.c
index c8fa3073169..0e5967dd119 100644
--- a/drivers/net/wireless/ath/ar9170/mac.c
+++ b/drivers/net/wireless/ath/ar9170/mac.c
@@ -316,9 +316,9 @@ int ar9170_set_beacon_timers(struct ar9170 *ar)
 	u32 v = 0;
 	u32 pretbtt = 0;
 
-	v |= ar->hw->conf.beacon_int;
-
 	if (ar->vif) {
+		v |= ar->vif->bss_conf.beacon_int;
+
 		switch (ar->vif->type) {
 		case NL80211_IFTYPE_MESH_POINT:
 		case NL80211_IFTYPE_ADHOC:
@@ -326,7 +326,7 @@ int ar9170_set_beacon_timers(struct ar9170 *ar)
 			break;
 		case NL80211_IFTYPE_AP:
 			v |= BIT(24);
-			pretbtt = (ar->hw->conf.beacon_int - 6) << 16;
+			pretbtt = (ar->vif->bss_conf.beacon_int - 6) << 16;
 			break;
 		default:
 			break;
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 1b60906b80c..49c729bc714 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -1337,7 +1337,7 @@ static int ar9170_op_config(struct ieee80211_hw *hw, u32 changed)
 			goto out;
 	}
 
-	if (changed & IEEE80211_CONF_CHANGE_BEACON_INTERVAL) {
+	if (changed & BSS_CHANGED_BEACON_INT) {
 		err = ar9170_set_beacon_timers(ar);
 		if (err)
 			goto out;
@@ -1499,6 +1499,9 @@ static void ar9170_op_bss_info_changed(struct ieee80211_hw *hw,
 #endif /* CONFIG_AR9170_LEDS */
 	}
 
+	if (changed & BSS_CHANGED_BEACON_INT)
+		err = ar9170_set_beacon_timers(ar);
+
 	if (changed & BSS_CHANGED_HT) {
 		/* TODO */
 		err = 0;
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index c8c658bfcf9..e4b1d9efa42 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -2756,7 +2756,6 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed)
 
 	mutex_lock(&sc->lock);
 
-	sc->bintval = conf->beacon_int;
 	sc->power_level = conf->power_level;
 
 	ret = ath5k_chan_set(sc, conf->channel);
@@ -3083,6 +3082,10 @@ static void ath5k_bss_info_changed(struct ieee80211_hw *hw,
 				    u32 changes)
 {
 	struct ath5k_softc *sc = hw->priv;
+
+	if (changes & BSS_CHANGED_BEACON_INT)
+		sc->bintval = bss_conf->beacon_int;
+
 	if (changes & BSS_CHANGED_ASSOC) {
 		mutex_lock(&sc->lock);
 		sc->assoc = bss_conf->assoc;
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index c92d46fa9d5..d5084ddf44f 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -590,6 +590,8 @@ struct ath_softc {
 	int led_on_cnt;
 	int led_off_cnt;
 
+	int beacon_interval;
+
 	struct ath_rfkill rf_kill;
 	struct ath_ani ani;
 	struct ath9k_node_stats nodestats;
diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
index eb4759fc6a0..c17b8382e32 100644
--- a/drivers/net/wireless/ath/ath9k/beacon.c
+++ b/drivers/net/wireless/ath/ath9k/beacon.c
@@ -320,8 +320,7 @@ int ath_beacon_alloc(struct ath_wiphy *aphy, struct ieee80211_vif *vif)
 		u64 tsfadjust;
 		int intval;
 
-		intval = sc->hw->conf.beacon_int ?
-			sc->hw->conf.beacon_int : ATH_DEFAULT_BINTVAL;
+		intval = sc->beacon_interval ? : ATH_DEFAULT_BINTVAL;
 
 		/*
 		 * Calculate the TSF offset for this beacon slot, i.e., the
@@ -431,8 +430,7 @@ void ath_beacon_tasklet(unsigned long data)
 	 * on the tsf to safeguard against missing an swba.
 	 */
 
-	intval = sc->hw->conf.beacon_int ?
-		sc->hw->conf.beacon_int : ATH_DEFAULT_BINTVAL;
+	intval = sc->beacon_interval ? : ATH_DEFAULT_BINTVAL;
 
 	tsf = ath9k_hw_gettsf64(ah);
 	tsftu = TSF_TO_TU(tsf>>32, tsf);
@@ -711,8 +709,7 @@ void ath_beacon_config(struct ath_softc *sc, struct ieee80211_vif *vif)
 	/* Setup the beacon configuration parameters */
 
 	memset(&conf, 0, sizeof(struct ath_beacon_config));
-	conf.beacon_interval = sc->hw->conf.beacon_int ?
-		sc->hw->conf.beacon_int : ATH_DEFAULT_BINTVAL;
+	conf.beacon_interval = sc->beacon_interval ? : ATH_DEFAULT_BINTVAL;
 	conf.listen_interval = 1;
 	conf.dtim_period = conf.beacon_interval;
 	conf.dtim_count = 1;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 2398d4f45f2..28cc9cd52f3 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2358,16 +2358,6 @@ skip_chan_change:
 	if (changed & IEEE80211_CONF_CHANGE_POWER)
 		sc->config.txpowlimit = 2 * conf->power_level;
 
-	/*
-	 * The HW TSF has to be reset when the beacon interval changes.
-	 * We set the flag here, and ath_beacon_config_ap() would take this
-	 * into account when it gets called through the subsequent
-	 * config_interface() call - with IFCC_BEACON in the changed field.
-	 */
-
-	if (changed & IEEE80211_CONF_CHANGE_BEACON_INTERVAL)
-		sc->sc_flags |= SC_OP_TSF_RESET;
-
 	mutex_unlock(&sc->mutex);
 
 	return 0;
@@ -2635,6 +2625,18 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 		ath9k_bss_assoc_info(sc, vif, bss_conf);
 	}
 
+	/*
+	 * The HW TSF has to be reset when the beacon interval changes.
+	 * We set the flag here, and ath_beacon_config_ap() would take this
+	 * into account when it gets called through the subsequent
+	 * config_interface() call - with IFCC_BEACON in the changed field.
+	 */
+
+	if (changed & BSS_CHANGED_BEACON_INT) {
+		sc->sc_flags |= SC_OP_TSF_RESET;
+		sc->beacon_interval = bss_conf->beacon_int;
+	}
+
 	mutex_unlock(&sc->mutex);
 }
 
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index faf2cab49ea..501493ffc3a 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -971,7 +971,7 @@ int ath_cabq_update(struct ath_softc *sc)
 	else if (sc->config.cabqReadytime > ATH9K_READY_TIME_HI_BOUND)
 		sc->config.cabqReadytime = ATH9K_READY_TIME_HI_BOUND;
 
-	qi.tqi_readyTime = (sc->hw->conf.beacon_int *
+	qi.tqi_readyTime = (sc->beacon_interval *
 			    sc->config.cabqReadytime) / 100;
 	ath_txq_update(sc, qnum, &qi);
 
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index a97c6ff0f12..3c693e6ec90 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3468,11 +3468,6 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed)
 	if (phy->ops->set_rx_antenna)
 		phy->ops->set_rx_antenna(dev, antenna);
 
-	/* Update templates for AP/mesh mode. */
-	if (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
-	    b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT))
-		b43_set_beacon_int(dev, conf->beacon_int);
-
 	if (!!conf->radio_enabled != phy->radio_on) {
 		if (conf->radio_enabled) {
 			b43_software_rfkill(dev, RFKILL_STATE_UNBLOCKED);
@@ -3556,6 +3551,13 @@ static void b43_op_bss_info_changed(struct ieee80211_hw *hw,
 		goto out_unlock_mutex;
 	b43_mac_suspend(dev);
 
+	/* Update templates for AP/mesh mode. */
+	if (changed & BSS_CHANGED_BEACON_INT &&
+	    (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
+	     b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT) ||
+	     b43_is_mode(wl, NL80211_IFTYPE_ADHOC)))
+		b43_set_beacon_int(dev, conf->beacon_int);
+
 	if (changed & BSS_CHANGED_BASIC_RATES)
 		b43_update_basic_rates(dev, conf->basic_rates);
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index ee202b4f77b..276f314688f 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2721,11 +2721,6 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw,
 	/* Antennas for RX and management frame TX. */
 	b43legacy_mgmtframe_txantenna(dev, antenna_tx);
 
-	/* Update templates for AP mode. */
-	if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP))
-		b43legacy_set_beacon_int(dev, conf->beacon_int);
-
-
 	if (!!conf->radio_enabled != phy->radio_on) {
 		if (conf->radio_enabled) {
 			b43legacy_radio_turn_on(dev);
@@ -2827,6 +2822,11 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw,
 
 	b43legacy_mac_suspend(dev);
 
+	if (changed & BSS_CHANGED_BEACON_INT &&
+	    (b43legacy_is_mode(wl, NL80211_IFTYPE_AP) ||
+	     b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)))
+		b43legacy_set_beacon_int(dev, conf->beacon_int);
+
 	if (changed & BSS_CHANGED_BASIC_RATES)
 		b43legacy_update_basic_rates(dev, conf->basic_rates);
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 277dfc57fde..4920dcf7d7b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -567,7 +567,8 @@ static void iwl_setup_rxon_timing(struct iwl_priv *priv)
 		beacon_int = iwl_adjust_beacon_interval(priv->beacon_int);
 		priv->rxon_timing.atim_window = 0;
 	} else {
-		beacon_int = iwl_adjust_beacon_interval(conf->beacon_int);
+		beacon_int = iwl_adjust_beacon_interval(
+			priv->vif->bss_conf.beacon_int);
 
 		/* TODO: we need to get atim_window from upper stack
 		 * for now we set to 0 */
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 3dec2d25fa3..cd8a5ed97c4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1313,7 +1313,6 @@ int iwl_setup_mac(struct iwl_priv *priv)
 	/* Default value; 4 EDCA QOS priorities */
 	hw->queues = 4;
 
-	hw->conf.beacon_int = 100;
 	hw->max_listen_interval = IWL_CONN_MAX_LISTEN_INTERVAL;
 
 	if (priv->bands[IEEE80211_BAND_2GHZ].n_channels)
@@ -2751,7 +2750,7 @@ void iwl_mac_reset_tsf(struct ieee80211_hw *hw)
 
 	priv->ibss_beacon = NULL;
 
-	priv->beacon_int = priv->hw->conf.beacon_int;
+	priv->beacon_int = priv->vif->bss_conf.beacon_int;
 	priv->timestamp = 0;
 	if ((priv->iw_mode == NL80211_IFTYPE_STATION))
 		priv->beacon_int = 0;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index a782292ed43..f9d9b65ef30 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -551,7 +551,8 @@ static void iwl3945_setup_rxon_timing(struct iwl_priv *priv)
 		priv->rxon_timing.atim_window = 0;
 	} else {
 		priv->rxon_timing.beacon_interval =
-			iwl3945_adjust_beacon_interval(conf->beacon_int);
+			iwl3945_adjust_beacon_interval(
+				priv->vif->bss_conf.beacon_int);
 		/* TODO: we need to get atim_window from upper stack
 		 * for now we set to 0 */
 		priv->rxon_timing.atim_window = 0;
@@ -4211,8 +4212,6 @@ static int iwl3945_setup_mac(struct iwl_priv *priv)
 	/* Default value; 4 EDCA QOS priorities */
 	hw->queues = 4;
 
-	hw->conf.beacon_int = 100;
-
 	if (priv->bands[IEEE80211_BAND_2GHZ].n_channels)
 		priv->hw->wiphy->bands[IEEE80211_BAND_2GHZ] =
 			&priv->bands[IEEE80211_BAND_2GHZ];
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index e7289e2e7f1..68e47fb47a1 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -380,7 +380,7 @@ static int lbtf_op_config_interface(struct ieee80211_hw *hw,
 		if (beacon) {
 			lbtf_beacon_set(priv, beacon);
 			kfree_skb(beacon);
-			lbtf_beacon_ctrl(priv, 1, hw->conf.beacon_int);
+			lbtf_beacon_ctrl(priv, 1, vif->bss_conf.beacon_int);
 		}
 		break;
 	default:
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index d4fdc8b7d7d..24c95a619e4 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -553,18 +553,13 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 	struct mac80211_hwsim_data *data = hw->priv;
 	struct ieee80211_conf *conf = &hw->conf;
 
-	printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d beacon_int=%d)\n",
+	printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d)\n",
 	       wiphy_name(hw->wiphy), __func__,
-	       conf->channel->center_freq, conf->radio_enabled,
-	       conf->beacon_int);
+	       conf->channel->center_freq, conf->radio_enabled);
 
 	data->channel = conf->channel;
 	data->radio_enabled = conf->radio_enabled;
-	data->beacon_int = 1024 * conf->beacon_int / 1000 * HZ / 1000;
-	if (data->beacon_int < 1)
-		data->beacon_int = 1;
-
-	if (!data->started || !data->radio_enabled)
+	if (!data->started || !data->radio_enabled || !data->beacon_int)
 		del_timer(&data->beacon_timer);
 	else
 		mod_timer(&data->beacon_timer, jiffies + data->beacon_int);
@@ -615,6 +610,7 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 					    u32 changed)
 {
 	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
+	struct mac80211_hwsim_data *data = hw->priv;
 
 	hwsim_check_magic(vif);
 
@@ -628,6 +624,14 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 		vp->aid = info->aid;
 	}
 
+	if (changed & BSS_CHANGED_BEACON_INT) {
+		printk(KERN_DEBUG "  %s: BCNINT: %d\n",
+		       wiphy_name(hw->wiphy), info->beacon_int);
+		data->beacon_int = 1024 * info->beacon_int / 1000 * HZ / 1000;
+		if (WARN_ON(data->beacon_int))
+			data->beacon_int = 1;
+	}
+
 	if (changed & BSS_CHANGED_ERP_CTS_PROT) {
 		printk(KERN_DEBUG "  %s: ERP_CTS_PROT: %d\n",
 		       wiphy_name(hw->wiphy), info->use_cts_prot);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 81d706d8522..22c65e8cbb7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -149,6 +149,7 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_ERP_SLOT: slot timing changed
  * @BSS_CHANGED_HT: 802.11n parameters changed
  * @BSS_CHANGED_BASIC_RATES: Basic rateset changed
+ * @BSS_CHANGED_BEACON_INT: Beacon interval changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -157,6 +158,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_ERP_SLOT		= 1<<3,
 	BSS_CHANGED_HT                  = 1<<4,
 	BSS_CHANGED_BASIC_RATES		= 1<<5,
+	BSS_CHANGED_BEACON_INT		= 1<<6,
 };
 
 /**
@@ -529,7 +531,7 @@ enum ieee80211_conf_flags {
  * enum ieee80211_conf_changed - denotes which configuration changed
  *
  * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed
- * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed
+ * @_IEEE80211_CONF_CHANGE_BEACON_INTERVAL: DEPRECATED
  * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed
  * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed
  * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed
@@ -539,7 +541,7 @@ enum ieee80211_conf_flags {
  */
 enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
-	IEEE80211_CONF_CHANGE_BEACON_INTERVAL	= BIT(1),
+	_IEEE80211_CONF_CHANGE_BEACON_INTERVAL	= BIT(1),
 	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
 	IEEE80211_CONF_CHANGE_RADIOTAP		= BIT(3),
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
@@ -548,6 +550,14 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
 };
 
+static inline __deprecated enum ieee80211_conf_changed
+__IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void)
+{
+	return _IEEE80211_CONF_CHANGE_BEACON_INTERVAL;
+}
+#define IEEE80211_CONF_CHANGE_BEACON_INTERVAL \
+	__IEEE80211_CONF_CHANGE_BEACON_INTERVAL()
+
 /**
  * struct ieee80211_conf - configuration of the device
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5e1c230744b..a898ccd3f2c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -451,18 +451,11 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 	 * This is a kludge. beacon interval should really be part
 	 * of the beacon information.
 	 */
-	if (params->interval && (sdata->local->hw.conf.beacon_int !=
-				 params->interval)) {
-		sdata->local->hw.conf.beacon_int = params->interval;
-		err = ieee80211_hw_config(sdata->local,
-					IEEE80211_CONF_CHANGE_BEACON_INTERVAL);
-		if (err < 0)
-			return err;
-		/*
-		 * We updated some parameter so if below bails out
-		 * it's not an error.
-		 */
-		err = 0;
+	if (params->interval &&
+	    (sdata->vif.bss_conf.beacon_int != params->interval)) {
+		sdata->vif.bss_conf.beacon_int = params->interval;
+		ieee80211_bss_info_change_notify(sdata,
+						 BSS_CHANGED_BEACON_INT);
 	}
 
 	/* Need to have a beacon head if we don't have one yet */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 25ff583612e..f4879dad3cd 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -73,6 +73,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
 	struct ieee80211_supported_band *sband;
+	u32 bss_change;
 
 	if (local->ops->reset_tsf) {
 		/* Reset own TSF to allow time synchronization work. */
@@ -92,8 +93,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 
 	memcpy(ifibss->bssid, bssid, ETH_ALEN);
 
-	local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10;
-
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
 	ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
@@ -101,6 +100,12 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	local->oper_channel = chan;
 	local->oper_channel_type = NL80211_CHAN_NO_HT;
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+
+	sdata->vif.bss_conf.beacon_int = beacon_int;
+	bss_change = BSS_CHANGED_BEACON_INT;
+	bss_change |= ieee80211_reset_erp_info(sdata);
+	ieee80211_bss_info_change_notify(sdata, bss_change);
+
 	sband = local->hw.wiphy->bands[chan->band];
 
 	/* Build IBSS probe response */
@@ -111,7 +116,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	memset(mgmt->da, 0xff, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
-	mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int);
+	mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int);
 	mgmt->u.beacon.timestamp = cpu_to_le64(tsf);
 	mgmt->u.beacon.capab_info = cpu_to_le16(capability);
 
@@ -181,8 +186,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_bss *bss)
 {
+	u16 beacon_int = bss->cbss.beacon_interval;
+
+	if (beacon_int < 10)
+		beacon_int = 10;
+
 	__ieee80211_sta_join_ibss(sdata, bss->cbss.bssid,
-				  bss->cbss.beacon_interval,
+				  beacon_int,
 				  bss->cbss.channel,
 				  bss->supp_rates_len, bss->supp_rates,
 				  bss->cbss.capability,
@@ -464,9 +474,6 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 
 	sband = local->hw.wiphy->bands[ifibss->channel->band];
 
-	if (local->hw.conf.beacon_int == 0)
-		local->hw.conf.beacon_int = 100;
-
 	capability = WLAN_CAPABILITY_IBSS;
 
 	if (sdata->default_key)
@@ -480,7 +487,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 		*pos++ = (u8) (rate / 5);
 	}
 
-	__ieee80211_sta_join_ibss(sdata, bssid, local->hw.conf.beacon_int,
+	__ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int,
 				  ifibss->channel, sband->n_bitrates,
 				  supp_rates, capability, 0);
 }
@@ -823,6 +830,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	} else
 		sdata->u.ibss.fixed_bssid = false;
 
+	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
+
 	sdata->u.ibss.channel = params->channel;
 	sdata->u.ibss.fixed_channel = params->channel_fixed;
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e00d124e4ef..b254879d863 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -294,9 +294,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 
-	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
-		return;
-
 	if (!changed)
 		return;
 
@@ -305,6 +302,17 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 					     &sdata->vif,
 					     &sdata->vif.bss_conf,
 					     changed);
+
+	/*
+	 * DEPRECATED
+	 *
+	 * ~changed is just there to not do this at resume time
+	 */
+	if (changed & BSS_CHANGED_BEACON_INT && ~changed) {
+		local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int;
+		ieee80211_hw_config(local,
+				    _IEEE80211_CONF_CHANGE_BEACON_INTERVAL);
+	}
 }
 
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
@@ -971,9 +979,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	debugfs_hw_add(local);
 
-	if (local->hw.conf.beacon_int < 10)
-		local->hw.conf.beacon_int = 100;
-
 	if (local->hw.max_listen_interval == 0)
 		local->hw.max_listen_interval = 1;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a2f5e622305..bfd571e6f22 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -842,6 +842,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		sdata->vif.bss_conf.timestamp = bss->cbss.tsf;
 		sdata->vif.bss_conf.dtim_period = bss->dtim_period;
 
+		bss_info_changed |= BSS_CHANGED_BEACON_INT;
 		bss_info_changed |= ieee80211_handle_bss_capability(sdata,
 			bss->cbss.capability, bss->has_erp_value, bss->erp_value);
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 654a8e963cc..7116220d06b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -543,9 +543,8 @@ void sta_info_unlink(struct sta_info **sta)
 	spin_unlock_irqrestore(&local->sta_lock, flags);
 }
 
-static inline int sta_info_buffer_expired(struct ieee80211_local *local,
-					  struct sta_info *sta,
-					  struct sk_buff *skb)
+static int sta_info_buffer_expired(struct sta_info *sta,
+				   struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info;
 	int timeout;
@@ -556,8 +555,9 @@ static inline int sta_info_buffer_expired(struct ieee80211_local *local,
 	info = IEEE80211_SKB_CB(skb);
 
 	/* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */
-	timeout = (sta->listen_interval * local->hw.conf.beacon_int * 32 /
-		   15625) * HZ;
+	timeout = (sta->listen_interval *
+		   sta->sdata->vif.bss_conf.beacon_int *
+		   32 / 15625) * HZ;
 	if (timeout < STA_TX_BUFFER_EXPIRE)
 		timeout = STA_TX_BUFFER_EXPIRE;
 	return time_after(jiffies, info->control.jiffies + timeout);
@@ -577,7 +577,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	for (;;) {
 		spin_lock_irqsave(&sta->ps_tx_buf.lock, flags);
 		skb = skb_peek(&sta->ps_tx_buf);
-		if (sta_info_buffer_expired(local, sta, skb))
+		if (sta_info_buffer_expired(sta, skb))
 			skb = __skb_dequeue(&sta->ps_tx_buf);
 		else
 			skb = NULL;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1865622003c..29df65045fc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2132,7 +2132,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 		memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 		/* BSSID is left zeroed, wildcard value */
 		mgmt->u.beacon.beacon_int =
-			cpu_to_le16(local->hw.conf.beacon_int);
+			cpu_to_le16(sdata->vif.bss_conf.beacon_int);
 		mgmt->u.beacon.capab_info = 0x0; /* 0x0 for MPs */
 
 		pos = skb_put(skb, 2);
-- 
cgit v1.2.3-70-g09d2


From 2d0ddec5b2b859f06116f631fc0ffe94fbceb556 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 23 Apr 2009 16:13:26 +0200
Subject: mac80211: unify config_interface and bss_info_changed

The config_interface method is a little strange, it contains the
BSSID and beacon updates, while bss_info_changed contains most
other BSS information for each interface. This patch removes
config_interface and rolls all the information it previously
passed to drivers into bss_info_changed.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c              |  14 ++-
 drivers/net/wireless/at76c50x-usb.c         |  15 ++-
 drivers/net/wireless/ath/ar9170/main.c      |  39 ++----
 drivers/net/wireless/ath/ath5k/base.c       |  71 +++++------
 drivers/net/wireless/ath/ath9k/main.c       | 182 +++++++++++++---------------
 drivers/net/wireless/b43/main.c             |  64 ++++------
 drivers/net/wireless/b43legacy/main.c       |  63 ++++------
 drivers/net/wireless/iwlwifi/iwl-agn.c      |   1 -
 drivers/net/wireless/iwlwifi/iwl-core.c     | 162 +++++++++----------------
 drivers/net/wireless/iwlwifi/iwl-core.h     |   3 -
 drivers/net/wireless/iwlwifi/iwl3945-base.c |   1 -
 drivers/net/wireless/libertas_tf/main.c     |  56 ++++-----
 drivers/net/wireless/mac80211_hwsim.c       |  25 ++--
 drivers/net/wireless/mwl8k.c                |  18 +--
 drivers/net/wireless/p54/p54common.c        |  63 ++++------
 drivers/net/wireless/rt2x00/rt2400pci.c     |   1 -
 drivers/net/wireless/rt2x00/rt2500pci.c     |   1 -
 drivers/net/wireless/rt2x00/rt2500usb.c     |   1 -
 drivers/net/wireless/rt2x00/rt2x00.h        |   3 -
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  88 ++++++--------
 drivers/net/wireless/rt2x00/rt61pci.c       |   1 -
 drivers/net/wireless/rt2x00/rt73usb.c       |   1 -
 drivers/net/wireless/rtl818x/rtl8180_dev.c  |  33 +++--
 drivers/net/wireless/rtl818x/rtl8187_dev.c  |  48 ++++----
 drivers/net/wireless/zd1211rw/zd_mac.c      |  80 +++++-------
 include/net/mac80211.h                      |  51 ++------
 net/mac80211/cfg.c                          |   8 +-
 net/mac80211/ibss.c                         |  18 ++-
 net/mac80211/ieee80211_i.h                  |   1 -
 net/mac80211/main.c                         | 131 +++++++++-----------
 net/mac80211/mesh.c                         |   6 +-
 net/mac80211/mlme.c                         |   8 +-
 net/mac80211/scan.c                         |   8 +-
 net/mac80211/util.c                         |  17 +--
 34 files changed, 504 insertions(+), 778 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index f7182179501..2b9e379994a 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1311,18 +1311,20 @@ static int adm8211_config(struct ieee80211_hw *dev, u32 changed)
 	return 0;
 }
 
-static int adm8211_config_interface(struct ieee80211_hw *dev,
-				    struct ieee80211_vif *vif,
-				    struct ieee80211_if_conf *conf)
+static void adm8211_bss_info_changed(struct ieee80211_hw *dev,
+				     struct ieee80211_vif *vif,
+				     struct ieee80211_bss_conf *conf,
+				     u32 changes)
 {
 	struct adm8211_priv *priv = dev->priv;
 
+	if (!(changes & BSS_CHANGED_BSSID))
+		return;
+
 	if (memcmp(conf->bssid, priv->bssid, ETH_ALEN)) {
 		adm8211_set_bssid(dev, conf->bssid);
 		memcpy(priv->bssid, conf->bssid, ETH_ALEN);
 	}
-
-	return 0;
 }
 
 static void adm8211_configure_filter(struct ieee80211_hw *dev,
@@ -1753,7 +1755,7 @@ static const struct ieee80211_ops adm8211_ops = {
 	.add_interface		= adm8211_add_interface,
 	.remove_interface	= adm8211_remove_interface,
 	.config			= adm8211_config,
-	.config_interface	= adm8211_config_interface,
+	.bss_info_changed	= adm8211_bss_info_changed,
 	.configure_filter	= adm8211_configure_filter,
 	.get_stats		= adm8211_get_stats,
 	.get_tx_stats		= adm8211_get_tx_stats,
diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 55f947ac56d..cea7f1466c5 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1965,13 +1965,18 @@ static int at76_config(struct ieee80211_hw *hw, u32 changed)
 	return 0;
 }
 
-static int at76_config_interface(struct ieee80211_hw *hw,
-				 struct ieee80211_vif *vif,
-				 struct ieee80211_if_conf *conf)
+static void at76_bss_info_changed(struct ieee80211_hw *hw,
+				  struct ieee80211_vif *vif,
+				  struct ieee80211_bss_conf *conf,
+				  u32 changed)
 {
 	struct at76_priv *priv = hw->priv;
 
 	at76_dbg(DBG_MAC80211, "%s():", __func__);
+
+	if (!(changed & BSS_CHANGED_BSSID))
+		return;
+
 	at76_dbg_dump(DBG_MAC80211, conf->bssid, ETH_ALEN, "bssid:");
 
 	mutex_lock(&priv->mtx);
@@ -1983,8 +1988,6 @@ static int at76_config_interface(struct ieee80211_hw *hw,
 		at76_join(priv);
 
 	mutex_unlock(&priv->mtx);
-
-	return 0;
 }
 
 /* must be atomic */
@@ -2076,7 +2079,7 @@ static const struct ieee80211_ops at76_ops = {
 	.add_interface = at76_add_interface,
 	.remove_interface = at76_remove_interface,
 	.config = at76_config,
-	.config_interface = at76_config_interface,
+	.bss_info_changed = at76_bss_info_changed,
 	.configure_filter = at76_configure_filter,
 	.start = at76_mac80211_start,
 	.stop = at76_mac80211_stop,
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 49c729bc714..3bd3a61225c 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -1360,33 +1360,6 @@ out:
 	return err;
 }
 
-static int ar9170_op_config_interface(struct ieee80211_hw *hw,
-				      struct ieee80211_vif *vif,
-				      struct ieee80211_if_conf *conf)
-{
-	struct ar9170 *ar = hw->priv;
-	int err = 0;
-
-	mutex_lock(&ar->mutex);
-
-	if (conf->changed & IEEE80211_IFCC_BSSID) {
-		memcpy(ar->bssid, conf->bssid, ETH_ALEN);
-		err = ar9170_set_operating_mode(ar);
-	}
-
-	if (conf->changed & IEEE80211_IFCC_BEACON) {
-		err = ar9170_update_beacon(ar);
-
-		if (err)
-			goto out;
-		err = ar9170_set_beacon_timers(ar);
-	}
-
-out:
-	mutex_unlock(&ar->mutex);
-	return err;
-}
-
 static void ar9170_set_filters(struct work_struct *work)
 {
 	struct ar9170 *ar = container_of(work, struct ar9170,
@@ -1488,6 +1461,17 @@ static void ar9170_op_bss_info_changed(struct ieee80211_hw *hw,
 
 	mutex_lock(&ar->mutex);
 
+	if (changed & BSS_CHANGED_BSSID) {
+		memcpy(ar->bssid, bss_conf->bssid, ETH_ALEN);
+		err = ar9170_set_operating_mode(ar);
+	}
+
+	if (changed & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED)) {
+		err = ar9170_update_beacon(ar);
+		if (!err)
+			ar9170_set_beacon_timers(ar);
+	}
+
 	ar9170_regwrite_begin(ar);
 
 	if (changed & BSS_CHANGED_ASSOC) {
@@ -1796,7 +1780,6 @@ static const struct ieee80211_ops ar9170_ops = {
 	.add_interface		= ar9170_op_add_interface,
 	.remove_interface	= ar9170_op_remove_interface,
 	.config			= ar9170_op_config,
-	.config_interface	= ar9170_op_config_interface,
 	.configure_filter	= ar9170_op_configure_filter,
 	.conf_tx		= ar9170_conf_tx,
 	.bss_info_changed	= ar9170_op_bss_info_changed,
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index e4b1d9efa42..410fc4cfc4c 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -227,9 +227,6 @@ static int ath5k_add_interface(struct ieee80211_hw *hw,
 static void ath5k_remove_interface(struct ieee80211_hw *hw,
 		struct ieee80211_if_init_conf *conf);
 static int ath5k_config(struct ieee80211_hw *hw, u32 changed);
-static int ath5k_config_interface(struct ieee80211_hw *hw,
-		struct ieee80211_vif *vif,
-		struct ieee80211_if_conf *conf);
 static void ath5k_configure_filter(struct ieee80211_hw *hw,
 		unsigned int changed_flags,
 		unsigned int *new_flags,
@@ -259,7 +256,6 @@ static const struct ieee80211_ops ath5k_hw_ops = {
 	.add_interface 	= ath5k_add_interface,
 	.remove_interface = ath5k_remove_interface,
 	.config 	= ath5k_config,
-	.config_interface = ath5k_config_interface,
 	.configure_filter = ath5k_configure_filter,
 	.set_key 	= ath5k_set_key,
 	.get_stats 	= ath5k_get_stats,
@@ -2764,44 +2760,6 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed)
 	return ret;
 }
 
-static int
-ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-			struct ieee80211_if_conf *conf)
-{
-	struct ath5k_softc *sc = hw->priv;
-	struct ath5k_hw *ah = sc->ah;
-	int ret = 0;
-
-	mutex_lock(&sc->lock);
-	if (sc->vif != vif) {
-		ret = -EIO;
-		goto unlock;
-	}
-	if (conf->changed & IEEE80211_IFCC_BSSID && conf->bssid) {
-		/* Cache for later use during resets */
-		memcpy(ah->ah_bssid, conf->bssid, ETH_ALEN);
-		/* XXX: assoc id is set to 0 for now, mac80211 doesn't have
-		 * a clean way of letting us retrieve this yet. */
-		ath5k_hw_set_associd(ah, ah->ah_bssid, 0);
-		mmiowb();
-	}
-	if (conf->changed & IEEE80211_IFCC_BEACON &&
-			(vif->type == NL80211_IFTYPE_ADHOC ||
-			 vif->type == NL80211_IFTYPE_MESH_POINT ||
-			 vif->type == NL80211_IFTYPE_AP)) {
-		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
-		if (!beacon) {
-			ret = -ENOMEM;
-			goto unlock;
-		}
-		ath5k_beacon_update(sc, beacon);
-	}
-
-unlock:
-	mutex_unlock(&sc->lock);
-	return ret;
-}
-
 #define SUPPORTED_FIF_FLAGS \
 	FIF_PROMISC_IN_BSS |  FIF_ALLMULTI | FIF_FCSFAIL | \
 	FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | \
@@ -3082,15 +3040,40 @@ static void ath5k_bss_info_changed(struct ieee80211_hw *hw,
 				    u32 changes)
 {
 	struct ath5k_softc *sc = hw->priv;
+	struct ath5k_hw *ah = sc->ah;
+
+	mutex_lock(&sc->lock);
+	if (WARN_ON(sc->vif != vif))
+		goto unlock;
+
+	if (changes & BSS_CHANGED_BSSID) {
+		/* Cache for later use during resets */
+		memcpy(ah->ah_bssid, bss_conf->bssid, ETH_ALEN);
+		/* XXX: assoc id is set to 0 for now, mac80211 doesn't have
+		 * a clean way of letting us retrieve this yet. */
+		ath5k_hw_set_associd(ah, ah->ah_bssid, 0);
+		mmiowb();
+	}
 
 	if (changes & BSS_CHANGED_BEACON_INT)
 		sc->bintval = bss_conf->beacon_int;
 
 	if (changes & BSS_CHANGED_ASSOC) {
-		mutex_lock(&sc->lock);
 		sc->assoc = bss_conf->assoc;
 		if (sc->opmode == NL80211_IFTYPE_STATION)
 			set_beacon_filter(hw, sc->assoc);
-		mutex_unlock(&sc->lock);
 	}
+
+	if (changes & BSS_CHANGED_BEACON &&
+	    (vif->type == NL80211_IFTYPE_ADHOC ||
+	     vif->type == NL80211_IFTYPE_MESH_POINT ||
+	     vif->type == NL80211_IFTYPE_AP)) {
+		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+
+		if (beacon)
+			ath5k_beacon_update(sc, beacon);
+	}
+
+ unlock:
+	mutex_unlock(&sc->lock);
 }
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 28cc9cd52f3..d3dc8e2c77b 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2363,104 +2363,6 @@ skip_chan_change:
 	return 0;
 }
 
-static int ath9k_config_interface(struct ieee80211_hw *hw,
-				  struct ieee80211_vif *vif,
-				  struct ieee80211_if_conf *conf)
-{
-	struct ath_wiphy *aphy = hw->priv;
-	struct ath_softc *sc = aphy->sc;
-	struct ath_hw *ah = sc->sc_ah;
-	struct ath_vif *avp = (void *)vif->drv_priv;
-	u32 rfilt = 0;
-	int error, i;
-
-	mutex_lock(&sc->mutex);
-
-	/* TODO: Need to decide which hw opmode to use for multi-interface
-	 * cases */
-	if (vif->type == NL80211_IFTYPE_AP &&
-	    ah->opmode != NL80211_IFTYPE_AP) {
-		ah->opmode = NL80211_IFTYPE_STATION;
-		ath9k_hw_setopmode(ah);
-		memcpy(sc->curbssid, sc->sc_ah->macaddr, ETH_ALEN);
-		sc->curaid = 0;
-		ath9k_hw_write_associd(sc);
-		/* Request full reset to get hw opmode changed properly */
-		sc->sc_flags |= SC_OP_FULL_RESET;
-	}
-
-	if ((conf->changed & IEEE80211_IFCC_BSSID) &&
-	    !is_zero_ether_addr(conf->bssid)) {
-		switch (vif->type) {
-		case NL80211_IFTYPE_STATION:
-		case NL80211_IFTYPE_ADHOC:
-		case NL80211_IFTYPE_MESH_POINT:
-			/* Set BSSID */
-			memcpy(sc->curbssid, conf->bssid, ETH_ALEN);
-			memcpy(avp->bssid, conf->bssid, ETH_ALEN);
-			sc->curaid = 0;
-			ath9k_hw_write_associd(sc);
-
-			/* Set aggregation protection mode parameters */
-			sc->config.ath_aggr_prot = 0;
-
-			DPRINTF(sc, ATH_DBG_CONFIG,
-				"RX filter 0x%x bssid %pM aid 0x%x\n",
-				rfilt, sc->curbssid, sc->curaid);
-
-			/* need to reconfigure the beacon */
-			sc->sc_flags &= ~SC_OP_BEACONS ;
-
-			break;
-		default:
-			break;
-		}
-	}
-
-	if ((vif->type == NL80211_IFTYPE_ADHOC) ||
-	    (vif->type == NL80211_IFTYPE_AP) ||
-	    (vif->type == NL80211_IFTYPE_MESH_POINT)) {
-		if ((conf->changed & IEEE80211_IFCC_BEACON) ||
-		    (conf->changed & IEEE80211_IFCC_BEACON_ENABLED &&
-		     conf->enable_beacon)) {
-			/*
-			 * Allocate and setup the beacon frame.
-			 *
-			 * Stop any previous beacon DMA.  This may be
-			 * necessary, for example, when an ibss merge
-			 * causes reconfiguration; we may be called
-			 * with beacon transmission active.
-			 */
-			ath9k_hw_stoptxdma(sc->sc_ah, sc->beacon.beaconq);
-
-			error = ath_beacon_alloc(aphy, vif);
-			if (error != 0) {
-				mutex_unlock(&sc->mutex);
-				return error;
-			}
-
-			ath_beacon_config(sc, vif);
-		}
-	}
-
-	/* Check for WLAN_CAPABILITY_PRIVACY ? */
-	if ((avp->av_opmode != NL80211_IFTYPE_STATION)) {
-		for (i = 0; i < IEEE80211_WEP_NKID; i++)
-			if (ath9k_hw_keyisvalid(sc->sc_ah, (u16)i))
-				ath9k_hw_keysetmac(sc->sc_ah,
-						   (u16)i,
-						   sc->curbssid);
-	}
-
-	/* Only legacy IBSS for now */
-	if (vif->type == NL80211_IFTYPE_ADHOC)
-		ath_update_chainmask(sc, 0);
-
-	mutex_unlock(&sc->mutex);
-
-	return 0;
-}
-
 #define SUPPORTED_FILTERS			\
 	(FIF_PROMISC_IN_BSS |			\
 	FIF_ALLMULTI |				\
@@ -2597,9 +2499,92 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw,
 {
 	struct ath_wiphy *aphy = hw->priv;
 	struct ath_softc *sc = aphy->sc;
+	struct ath_hw *ah = sc->sc_ah;
+	struct ath_vif *avp = (void *)vif->drv_priv;
+	u32 rfilt = 0;
+	int error, i;
 
 	mutex_lock(&sc->mutex);
 
+	/*
+	 * TODO: Need to decide which hw opmode to use for
+	 *       multi-interface cases
+	 * XXX: This belongs into add_interface!
+	 */
+	if (vif->type == NL80211_IFTYPE_AP &&
+	    ah->opmode != NL80211_IFTYPE_AP) {
+		ah->opmode = NL80211_IFTYPE_STATION;
+		ath9k_hw_setopmode(ah);
+		memcpy(sc->curbssid, sc->sc_ah->macaddr, ETH_ALEN);
+		sc->curaid = 0;
+		ath9k_hw_write_associd(sc);
+		/* Request full reset to get hw opmode changed properly */
+		sc->sc_flags |= SC_OP_FULL_RESET;
+	}
+
+	if ((changed & BSS_CHANGED_BSSID) &&
+	    !is_zero_ether_addr(bss_conf->bssid)) {
+		switch (vif->type) {
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_ADHOC:
+		case NL80211_IFTYPE_MESH_POINT:
+			/* Set BSSID */
+			memcpy(sc->curbssid, bss_conf->bssid, ETH_ALEN);
+			memcpy(avp->bssid, bss_conf->bssid, ETH_ALEN);
+			sc->curaid = 0;
+			ath9k_hw_write_associd(sc);
+
+			/* Set aggregation protection mode parameters */
+			sc->config.ath_aggr_prot = 0;
+
+			DPRINTF(sc, ATH_DBG_CONFIG,
+				"RX filter 0x%x bssid %pM aid 0x%x\n",
+				rfilt, sc->curbssid, sc->curaid);
+
+			/* need to reconfigure the beacon */
+			sc->sc_flags &= ~SC_OP_BEACONS ;
+
+			break;
+		default:
+			break;
+		}
+	}
+
+	if ((vif->type == NL80211_IFTYPE_ADHOC) ||
+	    (vif->type == NL80211_IFTYPE_AP) ||
+	    (vif->type == NL80211_IFTYPE_MESH_POINT)) {
+		if ((changed & BSS_CHANGED_BEACON) ||
+		    (changed & BSS_CHANGED_BEACON_ENABLED &&
+		     bss_conf->enable_beacon)) {
+			/*
+			 * Allocate and setup the beacon frame.
+			 *
+			 * Stop any previous beacon DMA.  This may be
+			 * necessary, for example, when an ibss merge
+			 * causes reconfiguration; we may be called
+			 * with beacon transmission active.
+			 */
+			ath9k_hw_stoptxdma(sc->sc_ah, sc->beacon.beaconq);
+
+			error = ath_beacon_alloc(aphy, vif);
+			if (!error)
+				ath_beacon_config(sc, vif);
+		}
+	}
+
+	/* Check for WLAN_CAPABILITY_PRIVACY ? */
+	if ((avp->av_opmode != NL80211_IFTYPE_STATION)) {
+		for (i = 0; i < IEEE80211_WEP_NKID; i++)
+			if (ath9k_hw_keyisvalid(sc->sc_ah, (u16)i))
+				ath9k_hw_keysetmac(sc->sc_ah,
+						   (u16)i,
+						   sc->curbssid);
+	}
+
+	/* Only legacy IBSS for now */
+	if (vif->type == NL80211_IFTYPE_ADHOC)
+		ath_update_chainmask(sc, 0);
+
 	if (changed & BSS_CHANGED_ERP_PREAMBLE) {
 		DPRINTF(sc, ATH_DBG_CONFIG, "BSS Changed PREAMBLE %d\n",
 			bss_conf->use_short_preamble);
@@ -2757,7 +2742,6 @@ struct ieee80211_ops ath9k_ops = {
 	.add_interface 	    = ath9k_add_interface,
 	.remove_interface   = ath9k_remove_interface,
 	.config 	    = ath9k_config,
-	.config_interface   = ath9k_config_interface,
 	.configure_filter   = ath9k_configure_filter,
 	.sta_notify         = ath9k_sta_notify,
 	.conf_tx 	    = ath9k_conf_tx,
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 3c693e6ec90..2615aaf7df6 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3543,12 +3543,38 @@ static void b43_op_bss_info_changed(struct ieee80211_hw *hw,
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	struct b43_wldev *dev;
+	unsigned long flags;
 
 	mutex_lock(&wl->mutex);
 
 	dev = wl->current_dev;
 	if (!dev || b43_status(dev) < B43_STAT_STARTED)
 		goto out_unlock_mutex;
+
+	B43_WARN_ON(wl->vif != vif);
+
+	if (changed & BSS_CHANGED_BSSID) {
+		spin_lock_irqsave(&wl->irq_lock, flags);
+		if (conf->bssid)
+			memcpy(wl->bssid, conf->bssid, ETH_ALEN);
+		else
+			memset(wl->bssid, 0, ETH_ALEN);
+
+		if (b43_status(dev) >= B43_STAT_INITIALIZED) {
+			if (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
+			    b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) {
+				B43_WARN_ON(vif->type != wl->if_type);
+				if (changed & BSS_CHANGED_BEACON)
+					b43_update_templates(wl);
+			} else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) {
+				if (changed & BSS_CHANGED_BEACON)
+					b43_update_templates(wl);
+			}
+			b43_write_mac_bssid_templates(dev);
+		}
+		spin_unlock_irqrestore(&wl->irq_lock, flags);
+	}
+
 	b43_mac_suspend(dev);
 
 	/* Update templates for AP/mesh mode. */
@@ -3571,8 +3597,6 @@ static void b43_op_bss_info_changed(struct ieee80211_hw *hw,
 	b43_mac_enable(dev);
 out_unlock_mutex:
 	mutex_unlock(&wl->mutex);
-
-	return;
 }
 
 static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
@@ -3730,41 +3754,6 @@ static void b43_op_configure_filter(struct ieee80211_hw *hw,
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 }
 
-static int b43_op_config_interface(struct ieee80211_hw *hw,
-				   struct ieee80211_vif *vif,
-				   struct ieee80211_if_conf *conf)
-{
-	struct b43_wl *wl = hw_to_b43_wl(hw);
-	struct b43_wldev *dev = wl->current_dev;
-	unsigned long flags;
-
-	if (!dev)
-		return -ENODEV;
-	mutex_lock(&wl->mutex);
-	spin_lock_irqsave(&wl->irq_lock, flags);
-	B43_WARN_ON(wl->vif != vif);
-	if (conf->bssid)
-		memcpy(wl->bssid, conf->bssid, ETH_ALEN);
-	else
-		memset(wl->bssid, 0, ETH_ALEN);
-	if (b43_status(dev) >= B43_STAT_INITIALIZED) {
-		if (b43_is_mode(wl, NL80211_IFTYPE_AP) ||
-		    b43_is_mode(wl, NL80211_IFTYPE_MESH_POINT)) {
-			B43_WARN_ON(vif->type != wl->if_type);
-			if (conf->changed & IEEE80211_IFCC_BEACON)
-				b43_update_templates(wl);
-		} else if (b43_is_mode(wl, NL80211_IFTYPE_ADHOC)) {
-			if (conf->changed & IEEE80211_IFCC_BEACON)
-				b43_update_templates(wl);
-		}
-		b43_write_mac_bssid_templates(dev);
-	}
-	spin_unlock_irqrestore(&wl->irq_lock, flags);
-	mutex_unlock(&wl->mutex);
-
-	return 0;
-}
-
 /* Locking: wl->mutex */
 static void b43_wireless_core_stop(struct b43_wldev *dev)
 {
@@ -4434,7 +4423,6 @@ static const struct ieee80211_ops b43_hw_ops = {
 	.remove_interface	= b43_op_remove_interface,
 	.config			= b43_op_config,
 	.bss_info_changed	= b43_op_bss_info_changed,
-	.config_interface	= b43_op_config_interface,
 	.configure_filter	= b43_op_configure_filter,
 	.set_key		= b43_op_set_key,
 	.get_stats		= b43_op_get_stats,
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 276f314688f..07c7898c87a 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2804,6 +2804,7 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw,
 	u32 savedirqs;
 
 	mutex_lock(&wl->mutex);
+	B43legacy_WARN_ON(wl->vif != vif);
 
 	dev = wl->current_dev;
 	phy = &dev->phy;
@@ -2817,8 +2818,29 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw,
 		goto out_unlock_mutex;
 	}
 	savedirqs = b43legacy_interrupt_disable(dev, B43legacy_IRQ_ALL);
-	spin_unlock_irqrestore(&wl->irq_lock, flags);
-	b43legacy_synchronize_irq(dev);
+
+	if (changed & BSS_CHANGED_BSSID) {
+		spin_unlock_irqrestore(&wl->irq_lock, flags);
+		b43legacy_synchronize_irq(dev);
+
+		if (conf->bssid)
+			memcpy(wl->bssid, conf->bssid, ETH_ALEN);
+		else
+			memset(wl->bssid, 0, ETH_ALEN);
+
+		if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) {
+			if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) {
+				B43legacy_WARN_ON(vif->type != NL80211_IFTYPE_AP);
+				if (changed & BSS_CHANGED_BEACON)
+					b43legacy_update_templates(wl);
+			} else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) {
+				if (changed & BSS_CHANGED_BEACON)
+					b43legacy_update_templates(wl);
+			}
+			b43legacy_write_mac_bssid_templates(dev);
+		}
+		spin_unlock_irqrestore(&wl->irq_lock, flags);
+	}
 
 	b43legacy_mac_suspend(dev);
 
@@ -2846,8 +2868,6 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw,
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
  out_unlock_mutex:
 	mutex_unlock(&wl->mutex);
-
-	return;
 }
 
 static void b43legacy_op_configure_filter(struct ieee80211_hw *hw,
@@ -2889,40 +2909,6 @@ static void b43legacy_op_configure_filter(struct ieee80211_hw *hw,
 	spin_unlock_irqrestore(&wl->irq_lock, flags);
 }
 
-static int b43legacy_op_config_interface(struct ieee80211_hw *hw,
-					 struct ieee80211_vif *vif,
-					 struct ieee80211_if_conf *conf)
-{
-	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
-	struct b43legacy_wldev *dev = wl->current_dev;
-	unsigned long flags;
-
-	if (!dev)
-		return -ENODEV;
-	mutex_lock(&wl->mutex);
-	spin_lock_irqsave(&wl->irq_lock, flags);
-	B43legacy_WARN_ON(wl->vif != vif);
-	if (conf->bssid)
-		memcpy(wl->bssid, conf->bssid, ETH_ALEN);
-	else
-		memset(wl->bssid, 0, ETH_ALEN);
-	if (b43legacy_status(dev) >= B43legacy_STAT_INITIALIZED) {
-		if (b43legacy_is_mode(wl, NL80211_IFTYPE_AP)) {
-			B43legacy_WARN_ON(vif->type != NL80211_IFTYPE_AP);
-			if (conf->changed & IEEE80211_IFCC_BEACON)
-				b43legacy_update_templates(wl);
-		} else if (b43legacy_is_mode(wl, NL80211_IFTYPE_ADHOC)) {
-			if (conf->changed & IEEE80211_IFCC_BEACON)
-				b43legacy_update_templates(wl);
-		}
-		b43legacy_write_mac_bssid_templates(dev);
-	}
-	spin_unlock_irqrestore(&wl->irq_lock, flags);
-	mutex_unlock(&wl->mutex);
-
-	return 0;
-}
-
 /* Locking: wl->mutex */
 static void b43legacy_wireless_core_stop(struct b43legacy_wldev *dev)
 {
@@ -3563,7 +3549,6 @@ static const struct ieee80211_ops b43legacy_hw_ops = {
 	.remove_interface	= b43legacy_op_remove_interface,
 	.config			= b43legacy_op_dev_config,
 	.bss_info_changed	= b43legacy_op_bss_info_changed,
-	.config_interface	= b43legacy_op_config_interface,
 	.configure_filter	= b43legacy_op_configure_filter,
 	.get_stats		= b43legacy_op_get_stats,
 	.get_tx_stats		= b43legacy_op_get_tx_stats,
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 4920dcf7d7b..dd8b15ed829 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2623,7 +2623,6 @@ static struct ieee80211_ops iwl_hw_ops = {
 	.add_interface = iwl_mac_add_interface,
 	.remove_interface = iwl_mac_remove_interface,
 	.config = iwl_mac_config,
-	.config_interface = iwl_mac_config_interface,
 	.configure_filter = iwl_configure_filter,
 	.set_key = iwl_mac_set_key,
 	.update_tkip_key = iwl_mac_update_tkip_key,
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index cd8a5ed97c4..0f21fc136ca 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -2238,15 +2238,69 @@ static void iwl_ht_conf(struct iwl_priv *priv,
 
 #define IWL_DELAY_NEXT_SCAN_AFTER_ASSOC (HZ*6)
 void iwl_bss_info_changed(struct ieee80211_hw *hw,
-				     struct ieee80211_vif *vif,
-				     struct ieee80211_bss_conf *bss_conf,
-				     u32 changes)
+			  struct ieee80211_vif *vif,
+			  struct ieee80211_bss_conf *bss_conf,
+			  u32 changes)
 {
 	struct iwl_priv *priv = hw->priv;
 	int ret;
 
 	IWL_DEBUG_MAC80211(priv, "changes = 0x%X\n", changes);
 
+	if (!iwl_is_alive(priv))
+		return;
+
+	mutex_lock(&priv->mutex);
+
+	if (changes & BSS_CHANGED_BEACON &&
+	    priv->iw_mode == NL80211_IFTYPE_AP) {
+		dev_kfree_skb(priv->ibss_beacon);
+		priv->ibss_beacon = ieee80211_beacon_get(hw, vif);
+	}
+
+	if ((changes & BSS_CHANGED_BSSID) && !iwl_is_rfkill(priv)) {
+		/* If there is currently a HW scan going on in the background
+		 * then we need to cancel it else the RXON below will fail. */
+		if (iwl_scan_cancel_timeout(priv, 100)) {
+			IWL_WARN(priv, "Aborted scan still in progress "
+				    "after 100ms\n");
+			IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n");
+			mutex_unlock(&priv->mutex);
+			return;
+		}
+		memcpy(priv->staging_rxon.bssid_addr,
+		       bss_conf->bssid, ETH_ALEN);
+
+		/* TODO: Audit driver for usage of these members and see
+		 * if mac80211 deprecates them (priv->bssid looks like it
+		 * shouldn't be there, but I haven't scanned the IBSS code
+		 * to verify) - jpk */
+		memcpy(priv->bssid, bss_conf->bssid, ETH_ALEN);
+
+		if (priv->iw_mode == NL80211_IFTYPE_AP)
+			iwlcore_config_ap(priv);
+		else {
+			int rc = iwlcore_commit_rxon(priv);
+			if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc)
+				iwl_rxon_add_station(
+					priv, priv->active_rxon.bssid_addr, 1);
+		}
+	} else if (!iwl_is_rfkill(priv)) {
+		iwl_scan_cancel_timeout(priv, 100);
+		priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK;
+		iwlcore_commit_rxon(priv);
+	}
+
+	if (priv->iw_mode == NL80211_IFTYPE_ADHOC &&
+	    changes & BSS_CHANGED_BEACON) {
+		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+
+		if (beacon)
+			iwl_mac_beacon_update(hw, beacon);
+	}
+
+	mutex_unlock(&priv->mutex);
+
 	if (changes & BSS_CHANGED_ERP_PREAMBLE) {
 		IWL_DEBUG_MAC80211(priv, "ERP_PREAMBLE %d\n",
 				   bss_conf->use_short_preamble);
@@ -2305,7 +2359,7 @@ void iwl_bss_info_changed(struct ieee80211_hw *hw,
 					&priv->staging_rxon,
 					sizeof(struct iwl_rxon_cmd));
 	}
-
+	IWL_DEBUG_MAC80211(priv, "leave\n");
 }
 EXPORT_SYMBOL(iwl_bss_info_changed);
 
@@ -2589,106 +2643,6 @@ out:
 }
 EXPORT_SYMBOL(iwl_mac_config);
 
-int iwl_mac_config_interface(struct ieee80211_hw *hw,
-					struct ieee80211_vif *vif,
-				    struct ieee80211_if_conf *conf)
-{
-	struct iwl_priv *priv = hw->priv;
-	int rc;
-
-	if (conf == NULL)
-		return -EIO;
-
-	if (priv->vif != vif) {
-		IWL_DEBUG_MAC80211(priv, "leave - priv->vif != vif\n");
-		return 0;
-	}
-
-	if (priv->iw_mode == NL80211_IFTYPE_ADHOC &&
-	    conf->changed & IEEE80211_IFCC_BEACON) {
-		struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
-		if (!beacon)
-			return -ENOMEM;
-		mutex_lock(&priv->mutex);
-		rc = iwl_mac_beacon_update(hw, beacon);
-		mutex_unlock(&priv->mutex);
-		if (rc)
-			return rc;
-	}
-
-	if (!iwl_is_alive(priv))
-		return -EAGAIN;
-
-	mutex_lock(&priv->mutex);
-
-	if (conf->bssid)
-		IWL_DEBUG_MAC80211(priv, "bssid: %pM\n", conf->bssid);
-
-/*
- * very dubious code was here; the probe filtering flag is never set:
- *
-	if (unlikely(test_bit(STATUS_SCANNING, &priv->status)) &&
-	    !(priv->hw->flags & IEEE80211_HW_NO_PROBE_FILTERING)) {
- */
-
-	if (priv->iw_mode == NL80211_IFTYPE_AP) {
-		if (!conf->bssid) {
-			conf->bssid = priv->mac_addr;
-			memcpy(priv->bssid, priv->mac_addr, ETH_ALEN);
-			IWL_DEBUG_MAC80211(priv, "bssid was set to: %pM\n",
-					   conf->bssid);
-		}
-		if (priv->ibss_beacon)
-			dev_kfree_skb(priv->ibss_beacon);
-
-		priv->ibss_beacon = ieee80211_beacon_get(hw, vif);
-	}
-
-	if (iwl_is_rfkill(priv))
-		goto done;
-
-	if (conf->bssid && !is_zero_ether_addr(conf->bssid) &&
-	    !is_multicast_ether_addr(conf->bssid)) {
-		/* If there is currently a HW scan going on in the background
-		 * then we need to cancel it else the RXON below will fail. */
-		if (iwl_scan_cancel_timeout(priv, 100)) {
-			IWL_WARN(priv, "Aborted scan still in progress "
-				    "after 100ms\n");
-			IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n");
-			mutex_unlock(&priv->mutex);
-			return -EAGAIN;
-		}
-		memcpy(priv->staging_rxon.bssid_addr, conf->bssid, ETH_ALEN);
-
-		/* TODO: Audit driver for usage of these members and see
-		 * if mac80211 deprecates them (priv->bssid looks like it
-		 * shouldn't be there, but I haven't scanned the IBSS code
-		 * to verify) - jpk */
-		memcpy(priv->bssid, conf->bssid, ETH_ALEN);
-
-		if (priv->iw_mode == NL80211_IFTYPE_AP)
-			iwlcore_config_ap(priv);
-		else {
-			rc = iwlcore_commit_rxon(priv);
-			if ((priv->iw_mode == NL80211_IFTYPE_STATION) && rc)
-				iwl_rxon_add_station(
-					priv, priv->active_rxon.bssid_addr, 1);
-		}
-
-	} else {
-		iwl_scan_cancel_timeout(priv, 100);
-		priv->staging_rxon.filter_flags &= ~RXON_FILTER_ASSOC_MSK;
-		iwlcore_commit_rxon(priv);
-	}
-
- done:
-	IWL_DEBUG_MAC80211(priv, "leave\n");
-	mutex_unlock(&priv->mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL(iwl_mac_config_interface);
-
 int iwl_mac_get_tx_stats(struct ieee80211_hw *hw,
 			 struct ieee80211_tx_queue_stats *stats)
 {
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index d4c60afa289..bd7f9d9616b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -281,9 +281,6 @@ void iwl_mac_remove_interface(struct ieee80211_hw *hw,
 				 struct ieee80211_if_init_conf *conf);
 int iwl_mac_config(struct ieee80211_hw *hw, u32 changed);
 void iwl_config_ap(struct iwl_priv *priv);
-int iwl_mac_config_interface(struct ieee80211_hw *hw,
-				struct ieee80211_vif *vif,
-				struct ieee80211_if_conf *conf);
 int iwl_mac_get_tx_stats(struct ieee80211_hw *hw,
 			 struct ieee80211_tx_queue_stats *stats);
 void iwl_mac_reset_tsf(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index f9d9b65ef30..5cd4321d7cf 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -4106,7 +4106,6 @@ static struct ieee80211_ops iwl3945_hw_ops = {
 	.add_interface = iwl_mac_add_interface,
 	.remove_interface = iwl_mac_remove_interface,
 	.config = iwl_mac_config,
-	.config_interface = iwl_mac_config_interface,
 	.configure_filter = iwl_configure_filter,
 	.set_key = iwl3945_mac_set_key,
 	.get_tx_stats = iwl_mac_get_tx_stats,
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 68e47fb47a1..10a99e26d39 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -366,36 +366,6 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed)
 	return 0;
 }
 
-static int lbtf_op_config_interface(struct ieee80211_hw *hw,
-			struct ieee80211_vif *vif,
-			struct ieee80211_if_conf *conf)
-{
-	struct lbtf_private *priv = hw->priv;
-	struct sk_buff *beacon;
-
-	switch (priv->vif->type) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_MESH_POINT:
-		beacon = ieee80211_beacon_get(hw, vif);
-		if (beacon) {
-			lbtf_beacon_set(priv, beacon);
-			kfree_skb(beacon);
-			lbtf_beacon_ctrl(priv, 1, vif->bss_conf.beacon_int);
-		}
-		break;
-	default:
-		break;
-	}
-
-	if (conf->bssid) {
-		u8 null_bssid[ETH_ALEN] = {0};
-		bool activate = compare_ether_addr(conf->bssid, null_bssid);
-		lbtf_set_bssid(priv, activate, conf->bssid);
-	}
-
-	return 0;
-}
-
 #define SUPPORTED_FIF_FLAGS  (FIF_PROMISC_IN_BSS | FIF_ALLMULTI)
 static void lbtf_op_configure_filter(struct ieee80211_hw *hw,
 			unsigned int changed_flags,
@@ -451,6 +421,29 @@ static void lbtf_op_bss_info_changed(struct ieee80211_hw *hw,
 			u32 changes)
 {
 	struct lbtf_private *priv = hw->priv;
+	struct sk_buff *beacon;
+
+	if (changes & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_INT)) {
+		switch (priv->vif->type) {
+		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_MESH_POINT:
+			beacon = ieee80211_beacon_get(hw, vif);
+			if (beacon) {
+				lbtf_beacon_set(priv, beacon);
+				kfree_skb(beacon);
+				lbtf_beacon_ctrl(priv, 1,
+						 bss_conf->beacon_int);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (changes & BSS_CHANGED_BSSID) {
+		bool activate = !is_zero_ether_addr(bss_conf->bssid);
+		lbtf_set_bssid(priv, activate, bss_conf->bssid);
+	}
 
 	if (changes & BSS_CHANGED_ERP_PREAMBLE) {
 		if (bss_conf->use_short_preamble)
@@ -459,8 +452,6 @@ static void lbtf_op_bss_info_changed(struct ieee80211_hw *hw,
 			priv->preamble = CMD_TYPE_LONG_PREAMBLE;
 		lbtf_set_radio_control(priv);
 	}
-
-	return;
 }
 
 static const struct ieee80211_ops lbtf_ops = {
@@ -470,7 +461,6 @@ static const struct ieee80211_ops lbtf_ops = {
 	.add_interface		= lbtf_op_add_interface,
 	.remove_interface	= lbtf_op_remove_interface,
 	.config			= lbtf_op_config,
-	.config_interface	= lbtf_op_config_interface,
 	.configure_filter	= lbtf_op_configure_filter,
 	.bss_info_changed	= lbtf_op_bss_info_changed,
 };
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 24c95a619e4..e67b424f925 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -587,23 +587,6 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw,
 	*total_flags = data->rx_filter;
 }
 
-static int mac80211_hwsim_config_interface(struct ieee80211_hw *hw,
-					   struct ieee80211_vif *vif,
-					   struct ieee80211_if_conf *conf)
-{
-	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
-
-	hwsim_check_magic(vif);
-	if (conf->changed & IEEE80211_IFCC_BSSID) {
-		DECLARE_MAC_BUF(mac);
-		printk(KERN_DEBUG "%s:%s: BSSID changed: %pM\n",
-		       wiphy_name(hw->wiphy), __func__,
-		       conf->bssid);
-		memcpy(vp->bssid, conf->bssid, ETH_ALEN);
-	}
-	return 0;
-}
-
 static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 					    struct ieee80211_vif *vif,
 					    struct ieee80211_bss_conf *info,
@@ -617,6 +600,13 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 	printk(KERN_DEBUG "%s:%s(changed=0x%x)\n",
 	       wiphy_name(hw->wiphy), __func__, changed);
 
+	if (changed & BSS_CHANGED_BSSID) {
+		printk(KERN_DEBUG "%s:%s: BSSID changed: %pM\n",
+		       wiphy_name(hw->wiphy), __func__,
+		       info->bssid);
+		memcpy(vp->bssid, info->bssid, ETH_ALEN);
+	}
+
 	if (changed & BSS_CHANGED_ASSOC) {
 		printk(KERN_DEBUG "  %s: ASSOC: assoc=%d aid=%d\n",
 		       wiphy_name(hw->wiphy), info->assoc, info->aid);
@@ -708,7 +698,6 @@ static const struct ieee80211_ops mac80211_hwsim_ops =
 	.remove_interface = mac80211_hwsim_remove_interface,
 	.config = mac80211_hwsim_config,
 	.configure_filter = mac80211_hwsim_configure_filter,
-	.config_interface = mac80211_hwsim_config_interface,
 	.bss_info_changed = mac80211_hwsim_bss_info_changed,
 	.sta_notify = mac80211_hwsim_sta_notify,
 	.set_tim = mac80211_hwsim_set_tim,
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index a9a970469c2..46b288dc8f4 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3089,19 +3089,6 @@ static int mwl8k_config(struct ieee80211_hw *hw, u32 changed)
 	return rc ? -EINVAL : 0;
 }
 
-static int mwl8k_config_interface(struct ieee80211_hw *hw,
-				  struct ieee80211_vif *vif,
-				  struct ieee80211_if_conf *conf)
-{
-	struct mwl8k_vif *mv_vif = MWL8K_VIF(vif);
-	u32 changed = conf->changed;
-
-	if (changed & IEEE80211_IFCC_BSSID)
-		memcpy(mv_vif->bssid, conf->bssid, IEEE80211_ADDR_LEN);
-
-	return 0;
-}
-
 struct mwl8k_bss_info_changed_worker {
 	struct mwl8k_work_struct header;
 	struct ieee80211_vif *vif;
@@ -3183,8 +3170,12 @@ static void mwl8k_bss_info_changed(struct ieee80211_hw *hw,
 {
 	struct mwl8k_bss_info_changed_worker *worker;
 	struct mwl8k_priv *priv = hw->priv;
+	struct mwl8k_vif *mv_vif = MWL8K_VIF(vif);
 	int rc;
 
+	if (changed & BSS_CHANGED_BSSID)
+		memcpy(mv_vif->bssid, info->bssid, IEEE80211_ADDR_LEN);
+
 	if ((changed & BSS_CHANGED_ASSOC) == 0)
 		return;
 
@@ -3442,7 +3433,6 @@ static const struct ieee80211_ops mwl8k_ops = {
 	.add_interface		= mwl8k_add_interface,
 	.remove_interface	= mwl8k_remove_interface,
 	.config			= mwl8k_config,
-	.config_interface	= mwl8k_config_interface,
 	.bss_info_changed	= mwl8k_bss_info_changed,
 	.configure_filter	= mwl8k_configure_filter,
 	.set_rts_threshold	= mwl8k_set_rts_threshold,
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 71394968d45..b8578529180 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -2204,41 +2204,6 @@ out:
 	return ret;
 }
 
-static int p54_config_interface(struct ieee80211_hw *dev,
-				struct ieee80211_vif *vif,
-				struct ieee80211_if_conf *conf)
-{
-	struct p54_common *priv = dev->priv;
-	int ret = 0;
-
-	mutex_lock(&priv->conf_mutex);
-	if (conf->changed & IEEE80211_IFCC_BSSID) {
-		memcpy(priv->bssid, conf->bssid, ETH_ALEN);
-		ret = p54_setup_mac(dev);
-		if (ret)
-			goto out;
-	}
-
-	if (conf->changed & IEEE80211_IFCC_BEACON) {
-		ret = p54_scan(dev, P54_SCAN_EXIT, 0);
-		if (ret)
-			goto out;
-		ret = p54_setup_mac(dev);
-		if (ret)
-			goto out;
-		ret = p54_beacon_update(dev, vif);
-		if (ret)
-			goto out;
-		ret = p54_set_edcf(dev);
-		if (ret)
-			goto out;
-	}
-
-out:
-	mutex_unlock(&priv->conf_mutex);
-	return ret;
-}
-
 static void p54_configure_filter(struct ieee80211_hw *dev,
 				 unsigned int changed_flags,
 				 unsigned int *total_flags,
@@ -2342,8 +2307,32 @@ static void p54_bss_info_changed(struct ieee80211_hw *dev,
 				 u32 changed)
 {
 	struct p54_common *priv = dev->priv;
+	int ret;
+
+	mutex_lock(&priv->conf_mutex);
+	if (changed & BSS_CHANGED_BSSID) {
+		memcpy(priv->bssid, info->bssid, ETH_ALEN);
+		ret = p54_setup_mac(dev);
+		if (ret)
+			goto out;
+	}
+
+	if (changed & BSS_CHANGED_BEACON) {
+		ret = p54_scan(dev, P54_SCAN_EXIT, 0);
+		if (ret)
+			goto out;
+		ret = p54_setup_mac(dev);
+		if (ret)
+			goto out;
+		ret = p54_beacon_update(dev, vif);
+		if (ret)
+			goto out;
+	}
+	/* XXX: this mimics having two callbacks... clean up */
+ out:
+	mutex_unlock(&priv->conf_mutex);
 
-	if (changed & BSS_CHANGED_ERP_SLOT) {
+	if (changed & (BSS_CHANGED_ERP_SLOT | BSS_CHANGED_BEACON)) {
 		priv->use_short_slot = info->use_short_slot;
 		p54_set_edcf(dev);
 	}
@@ -2364,7 +2353,6 @@ static void p54_bss_info_changed(struct ieee80211_hw *dev,
 			p54_setup_mac(dev);
 		}
 	}
-
 }
 
 static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
@@ -2619,7 +2607,6 @@ static const struct ieee80211_ops p54_ops = {
 	.sta_notify		= p54_sta_notify,
 	.set_key		= p54_set_key,
 	.config			= p54_config,
-	.config_interface	= p54_config_interface,
 	.bss_info_changed	= p54_bss_info_changed,
 	.configure_filter	= p54_configure_filter,
 	.conf_tx		= p54_conf_tx,
diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c
index 411eb9cbb4e..6f39ba66218 100644
--- a/drivers/net/wireless/rt2x00/rt2400pci.c
+++ b/drivers/net/wireless/rt2x00/rt2400pci.c
@@ -1580,7 +1580,6 @@ static const struct ieee80211_ops rt2400pci_mac80211_ops = {
 	.add_interface		= rt2x00mac_add_interface,
 	.remove_interface	= rt2x00mac_remove_interface,
 	.config			= rt2x00mac_config,
-	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.get_stats		= rt2x00mac_get_stats,
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c
index e1be67ca23d..906960f67b6 100644
--- a/drivers/net/wireless/rt2x00/rt2500pci.c
+++ b/drivers/net/wireless/rt2x00/rt2500pci.c
@@ -1879,7 +1879,6 @@ static const struct ieee80211_ops rt2500pci_mac80211_ops = {
 	.add_interface		= rt2x00mac_add_interface,
 	.remove_interface	= rt2x00mac_remove_interface,
 	.config			= rt2x00mac_config,
-	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.get_stats		= rt2x00mac_get_stats,
 	.bss_info_changed	= rt2x00mac_bss_info_changed,
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 9e630e70fc9..08e88333b0f 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -1908,7 +1908,6 @@ static const struct ieee80211_ops rt2500usb_mac80211_ops = {
 	.add_interface		= rt2x00mac_add_interface,
 	.remove_interface	= rt2x00mac_remove_interface,
 	.config			= rt2x00mac_config,
-	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.set_key		= rt2x00mac_set_key,
 	.get_stats		= rt2x00mac_get_stats,
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index e03d69975ea..0be39552826 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -943,9 +943,6 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw,
 void rt2x00mac_remove_interface(struct ieee80211_hw *hw,
 				struct ieee80211_if_init_conf *conf);
 int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed);
-int rt2x00mac_config_interface(struct ieee80211_hw *hw,
-			       struct ieee80211_vif *vif,
-			       struct ieee80211_if_conf *conf);
 void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
 				unsigned int changed_flags,
 				unsigned int *total_flags,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index c41a0b9e473..c4c06b4e1f0 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -390,56 +390,6 @@ int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed)
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_config);
 
-int rt2x00mac_config_interface(struct ieee80211_hw *hw,
-			       struct ieee80211_vif *vif,
-			       struct ieee80211_if_conf *conf)
-{
-	struct rt2x00_dev *rt2x00dev = hw->priv;
-	struct rt2x00_intf *intf = vif_to_intf(vif);
-	int update_bssid = 0;
-	int status = 0;
-
-	/*
-	 * Mac80211 might be calling this function while we are trying
-	 * to remove the device or perhaps suspending it.
-	 */
-	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
-		return 0;
-
-	spin_lock(&intf->lock);
-
-	/*
-	 * conf->bssid can be NULL if coming from the internal
-	 * beacon update routine.
-	 */
-	if (conf->changed & IEEE80211_IFCC_BSSID && conf->bssid) {
-		update_bssid = 1;
-		memcpy(&intf->bssid, conf->bssid, ETH_ALEN);
-	}
-
-	spin_unlock(&intf->lock);
-
-	/*
-	 * Call rt2x00_config_intf() outside of the spinlock context since
-	 * the call will sleep for USB drivers. By using the ieee80211_if_conf
-	 * values as arguments we make keep access to rt2x00_intf thread safe
-	 * even without the lock.
-	 */
-	rt2x00lib_config_intf(rt2x00dev, intf, vif->type, NULL,
-			      update_bssid ? conf->bssid : NULL);
-
-	/*
-	 * Update the beacon.
-	 */
-	if (conf->changed & (IEEE80211_IFCC_BEACON |
-			     IEEE80211_IFCC_BEACON_ENABLED))
-		status = rt2x00queue_update_beacon(rt2x00dev, vif,
-						   conf->enable_beacon);
-
-	return status;
-}
-EXPORT_SYMBOL_GPL(rt2x00mac_config_interface);
-
 void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
 				unsigned int changed_flags,
 				unsigned int *total_flags,
@@ -623,6 +573,44 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw,
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct rt2x00_intf *intf = vif_to_intf(vif);
 	unsigned int delayed = 0;
+	int update_bssid = 0;
+
+	/*
+	 * Mac80211 might be calling this function while we are trying
+	 * to remove the device or perhaps suspending it.
+	 */
+	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
+		return;
+
+	spin_lock(&intf->lock);
+
+	/*
+	 * conf->bssid can be NULL if coming from the internal
+	 * beacon update routine.
+	 */
+	if (changes & BSS_CHANGED_BSSID) {
+		update_bssid = 1;
+		memcpy(&intf->bssid, bss_conf->bssid, ETH_ALEN);
+	}
+
+	spin_unlock(&intf->lock);
+
+	/*
+	 * Call rt2x00_config_intf() outside of the spinlock context since
+	 * the call will sleep for USB drivers. By using the ieee80211_if_conf
+	 * values as arguments we make keep access to rt2x00_intf thread safe
+	 * even without the lock.
+	 */
+	if (changes & BSS_CHANGED_BSSID)
+		rt2x00lib_config_intf(rt2x00dev, intf, vif->type, NULL,
+				      update_bssid ? bss_conf->bssid : NULL);
+
+	/*
+	 * Update the beacon.
+	 */
+	if (changes & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED))
+		rt2x00queue_update_beacon(rt2x00dev, vif,
+					  bss_conf->enable_beacon);
 
 	/*
 	 * When the association status has changed we must reset the link
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 4346cd1494b..cb521ee7a8f 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -2735,7 +2735,6 @@ static const struct ieee80211_ops rt61pci_mac80211_ops = {
 	.add_interface		= rt2x00mac_add_interface,
 	.remove_interface	= rt2x00mac_remove_interface,
 	.config			= rt2x00mac_config,
-	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.set_key		= rt2x00mac_set_key,
 	.get_stats		= rt2x00mac_get_stats,
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index 853b2b279b6..6bd28a6bcef 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -2259,7 +2259,6 @@ static const struct ieee80211_ops rt73usb_mac80211_ops = {
 	.add_interface		= rt2x00mac_add_interface,
 	.remove_interface	= rt2x00mac_remove_interface,
 	.config			= rt2x00mac_config,
-	.config_interface	= rt2x00mac_config_interface,
 	.configure_filter	= rt2x00mac_configure_filter,
 	.set_key		= rt2x00mac_set_key,
 	.get_stats		= rt2x00mac_get_stats,
diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c
index 387c133ec0f..7e65d7c3180 100644
--- a/drivers/net/wireless/rtl818x/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c
@@ -702,30 +702,26 @@ static int rtl8180_config(struct ieee80211_hw *dev, u32 changed)
 	return 0;
 }
 
-static int rtl8180_config_interface(struct ieee80211_hw *dev,
-				    struct ieee80211_vif *vif,
-				    struct ieee80211_if_conf *conf)
-{
-	struct rtl8180_priv *priv = dev->priv;
-	int i;
-
-	for (i = 0; i < ETH_ALEN; i++)
-		rtl818x_iowrite8(priv, &priv->map->BSSID[i], conf->bssid[i]);
-
-	if (is_valid_ether_addr(conf->bssid))
-		rtl818x_iowrite8(priv, &priv->map->MSR, RTL818X_MSR_INFRA);
-	else
-		rtl818x_iowrite8(priv, &priv->map->MSR, RTL818X_MSR_NO_LINK);
-
-	return 0;
-}
-
 static void rtl8180_bss_info_changed(struct ieee80211_hw *dev,
 				     struct ieee80211_vif *vif,
 				     struct ieee80211_bss_conf *info,
 				     u32 changed)
 {
 	struct rtl8180_priv *priv = dev->priv;
+	int i;
+
+	if (changed & BSS_CHANGED_BSSID) {
+		for (i = 0; i < ETH_ALEN; i++)
+			rtl818x_iowrite8(priv, &priv->map->BSSID[i],
+					 info->bssid[i]);
+
+		if (is_valid_ether_addr(info->bssid))
+			rtl818x_iowrite8(priv, &priv->map->MSR,
+					 RTL818X_MSR_INFRA);
+		else
+			rtl818x_iowrite8(priv, &priv->map->MSR,
+					 RTL818X_MSR_NO_LINK);
+	}
 
 	if (changed & BSS_CHANGED_ERP_SLOT && priv->rf->conf_erp)
 	        priv->rf->conf_erp(dev, info);
@@ -770,7 +766,6 @@ static const struct ieee80211_ops rtl8180_ops = {
 	.add_interface		= rtl8180_add_interface,
 	.remove_interface	= rtl8180_remove_interface,
 	.config			= rtl8180_config,
-	.config_interface	= rtl8180_config_interface,
 	.bss_info_changed	= rtl8180_bss_info_changed,
 	.configure_filter	= rtl8180_configure_filter,
 };
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index ac558da92aa..158827e50c5 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -1090,32 +1090,6 @@ static int rtl8187_config(struct ieee80211_hw *dev, u32 changed)
 	return 0;
 }
 
-static int rtl8187_config_interface(struct ieee80211_hw *dev,
-				    struct ieee80211_vif *vif,
-				    struct ieee80211_if_conf *conf)
-{
-	struct rtl8187_priv *priv = dev->priv;
-	int i;
-	u8 reg;
-
-	mutex_lock(&priv->conf_mutex);
-	for (i = 0; i < ETH_ALEN; i++)
-		rtl818x_iowrite8(priv, &priv->map->BSSID[i], conf->bssid[i]);
-
-	if (is_valid_ether_addr(conf->bssid)) {
-		reg = RTL818X_MSR_INFRA;
-		if (priv->is_rtl8187b)
-			reg |= RTL818X_MSR_ENEDCA;
-		rtl818x_iowrite8(priv, &priv->map->MSR, reg);
-	} else {
-		reg = RTL818X_MSR_NO_LINK;
-		rtl818x_iowrite8(priv, &priv->map->MSR, reg);
-	}
-
-	mutex_unlock(&priv->conf_mutex);
-	return 0;
-}
-
 /*
  * With 8187B, AC_*_PARAM clashes with FEMR definition in struct rtl818x_csr for
  * example. Thus we have to use raw values for AC_*_PARAM register addresses.
@@ -1193,6 +1167,27 @@ static void rtl8187_bss_info_changed(struct ieee80211_hw *dev,
 				     u32 changed)
 {
 	struct rtl8187_priv *priv = dev->priv;
+	int i;
+	u8 reg;
+
+	if (changed & BSS_CHANGED_BSSID) {
+		mutex_lock(&priv->conf_mutex);
+		for (i = 0; i < ETH_ALEN; i++)
+			rtl818x_iowrite8(priv, &priv->map->BSSID[i],
+					 info->bssid[i]);
+
+		if (is_valid_ether_addr(info->bssid)) {
+			reg = RTL818X_MSR_INFRA;
+			if (priv->is_rtl8187b)
+				reg |= RTL818X_MSR_ENEDCA;
+			rtl818x_iowrite8(priv, &priv->map->MSR, reg);
+		} else {
+			reg = RTL818X_MSR_NO_LINK;
+			rtl818x_iowrite8(priv, &priv->map->MSR, reg);
+		}
+
+		mutex_unlock(&priv->conf_mutex);
+	}
 
 	if (changed & (BSS_CHANGED_ERP_SLOT | BSS_CHANGED_ERP_PREAMBLE))
 		rtl8187_conf_erp(priv, info->use_short_slot,
@@ -1274,7 +1269,6 @@ static const struct ieee80211_ops rtl8187_ops = {
 	.add_interface		= rtl8187_add_interface,
 	.remove_interface	= rtl8187_remove_interface,
 	.config			= rtl8187_config,
-	.config_interface	= rtl8187_config_interface,
 	.bss_info_changed	= rtl8187_bss_info_changed,
 	.configure_filter	= rtl8187_configure_filter,
 	.conf_tx		= rtl8187_conf_tx
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index c3a51266de2..6bdb1704083 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -755,52 +755,6 @@ static int zd_op_config(struct ieee80211_hw *hw, u32 changed)
 	return zd_chip_set_channel(&mac->chip, conf->channel->hw_value);
 }
 
-static int zd_op_config_interface(struct ieee80211_hw *hw,
-				  struct ieee80211_vif *vif,
-				   struct ieee80211_if_conf *conf)
-{
-	struct zd_mac *mac = zd_hw_mac(hw);
-	int associated;
-	int r;
-
-	if (mac->type == NL80211_IFTYPE_MESH_POINT ||
-	    mac->type == NL80211_IFTYPE_ADHOC) {
-		associated = true;
-		if (conf->changed & IEEE80211_IFCC_BEACON) {
-			struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
-
-			if (!beacon)
-				return -ENOMEM;
-			r = zd_mac_config_beacon(hw, beacon);
-			kfree_skb(beacon);
-
-			if (r < 0)
-				return r;
-		}
-
-		if (conf->changed & IEEE80211_IFCC_BEACON_ENABLED) {
-			u32 interval;
-
-			if (conf->enable_beacon)
-				interval = BCN_MODE_IBSS | hw->conf.beacon_int;
-			else
-				interval = 0;
-
-			r = zd_set_beacon_interval(&mac->chip, interval);
-			if (r < 0)
-				return r;
-		}
-	} else
-		associated = is_valid_ether_addr(conf->bssid);
-
-	spin_lock_irq(&mac->lock);
-	mac->associated = associated;
-	spin_unlock_irq(&mac->lock);
-
-	/* TODO: do hardware bssid filtering */
-	return 0;
-}
-
 static void zd_process_intr(struct work_struct *work)
 {
 	u16 int_status;
@@ -923,9 +877,42 @@ static void zd_op_bss_info_changed(struct ieee80211_hw *hw,
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
 	unsigned long flags;
+	int associated;
 
 	dev_dbg_f(zd_mac_dev(mac), "changes: %x\n", changes);
 
+	if (mac->type == NL80211_IFTYPE_MESH_POINT ||
+	    mac->type == NL80211_IFTYPE_ADHOC) {
+		associated = true;
+		if (changes & BSS_CHANGED_BEACON) {
+			struct sk_buff *beacon = ieee80211_beacon_get(hw, vif);
+
+			if (beacon) {
+				zd_mac_config_beacon(hw, beacon);
+				kfree_skb(beacon);
+			}
+		}
+
+		if (changes & BSS_CHANGED_BEACON_ENABLED) {
+			u32 interval;
+
+			if (bss_conf->enable_beacon)
+				interval = BCN_MODE_IBSS |
+						bss_conf->beacon_int;
+			else
+				interval = 0;
+
+			zd_set_beacon_interval(&mac->chip, interval);
+		}
+	} else
+		associated = is_valid_ether_addr(bss_conf->bssid);
+
+	spin_lock_irq(&mac->lock);
+	mac->associated = associated;
+	spin_unlock_irq(&mac->lock);
+
+	/* TODO: do hardware bssid filtering */
+
 	if (changes & BSS_CHANGED_ERP_PREAMBLE) {
 		spin_lock_irqsave(&mac->lock, flags);
 		mac->short_preamble = bss_conf->use_short_preamble;
@@ -952,7 +939,6 @@ static const struct ieee80211_ops zd_ops = {
 	.add_interface		= zd_op_add_interface,
 	.remove_interface	= zd_op_remove_interface,
 	.config			= zd_op_config,
-	.config_interface	= zd_op_config_interface,
 	.configure_filter	= zd_op_configure_filter,
 	.bss_info_changed	= zd_op_bss_info_changed,
 	.get_tsf		= zd_op_get_tsf,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 22c65e8cbb7..7806e22f4ac 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -150,6 +150,12 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_HT: 802.11n parameters changed
  * @BSS_CHANGED_BASIC_RATES: Basic rateset changed
  * @BSS_CHANGED_BEACON_INT: Beacon interval changed
+ * @BSS_CHANGED_BSSID: BSSID changed, for whatever
+ *	reason (IBSS and managed mode)
+ * @BSS_CHANGED_BEACON: Beacon data changed, retrieve
+ *	new beacon (beaconing modes)
+ * @BSS_CHANGED_BEACON_ENABLED: Beaconing should be
+ *	enabled/disabled (beaconing modes)
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -159,6 +165,9 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_HT                  = 1<<4,
 	BSS_CHANGED_BASIC_RATES		= 1<<5,
 	BSS_CHANGED_BEACON_INT		= 1<<6,
+	BSS_CHANGED_BSSID		= 1<<7,
+	BSS_CHANGED_BEACON		= 1<<8,
+	BSS_CHANGED_BEACON_ENABLED	= 1<<9,
 };
 
 /**
@@ -192,8 +201,11 @@ struct ieee80211_bss_ht_conf {
  * @basic_rates: bitmap of basic rates, each bit stands for an
  *	index into the rate table configured by the driver in
  *	the current band.
+ * @bssid: The BSSID for this BSS
+ * @enable_beacon: whether beaconing should be enabled or not
  */
 struct ieee80211_bss_conf {
+	const u8 *bssid;
 	/* association related data */
 	bool assoc;
 	u16 aid;
@@ -201,6 +213,7 @@ struct ieee80211_bss_conf {
 	bool use_cts_prot;
 	bool use_short_preamble;
 	bool use_short_slot;
+	bool enable_beacon;
 	u8 dtim_period;
 	u16 beacon_int;
 	u16 assoc_capability;
@@ -659,37 +672,6 @@ struct ieee80211_if_init_conf {
 	void *mac_addr;
 };
 
-/**
- * enum ieee80211_if_conf_change - interface config change flags
- *
- * @IEEE80211_IFCC_BSSID: The BSSID changed.
- * @IEEE80211_IFCC_BEACON: The beacon for this interface changed
- *	(currently AP and MESH only), use ieee80211_beacon_get().
- * @IEEE80211_IFCC_BEACON_ENABLED: The enable_beacon value changed.
- */
-enum ieee80211_if_conf_change {
-	IEEE80211_IFCC_BSSID		= BIT(0),
-	IEEE80211_IFCC_BEACON		= BIT(1),
-	IEEE80211_IFCC_BEACON_ENABLED	= BIT(2),
-};
-
-/**
- * struct ieee80211_if_conf - configuration of an interface
- *
- * @changed: parameters that have changed, see &enum ieee80211_if_conf_change.
- * @bssid: BSSID of the network we are associated to/creating.
- * @enable_beacon: Indicates whether beacons can be sent.
- *	This is valid only for AP/IBSS/MESH modes.
- *
- * This structure is passed to the config_interface() callback of
- * &struct ieee80211_hw.
- */
-struct ieee80211_if_conf {
-	u32 changed;
-	const u8 *bssid;
-	bool enable_beacon;
-};
-
 /**
  * enum ieee80211_key_alg - key algorithm
  * @ALG_WEP: WEP40 or WEP104
@@ -1358,10 +1340,6 @@ enum ieee80211_ampdu_mlme_action {
  *	This function should never fail but returns a negative error code
  *	if it does.
  *
- * @config_interface: Handler for configuration requests related to interfaces
- *	(e.g. BSSID changes.)
- *	Returns a negative error code which will be seen in userspace.
- *
  * @bss_info_changed: Handler for configuration requests related to BSS
  *	parameters that may vary during BSS's lifespan, and may affect low
  *	level driver (e.g. assoc/disassoc status, erp parameters).
@@ -1463,9 +1441,6 @@ struct ieee80211_ops {
 	void (*remove_interface)(struct ieee80211_hw *hw,
 				 struct ieee80211_if_init_conf *conf);
 	int (*config)(struct ieee80211_hw *hw, u32 changed);
-	int (*config_interface)(struct ieee80211_hw *hw,
-				struct ieee80211_vif *vif,
-				struct ieee80211_if_conf *conf);
 	void (*bss_info_changed)(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *info,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a898ccd3f2c..648bac1c850 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -521,8 +521,9 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
 
 	kfree(old);
 
-	return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
-					  IEEE80211_IFCC_BEACON_ENABLED);
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
+						BSS_CHANGED_BEACON);
+	return 0;
 }
 
 static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
@@ -573,7 +574,8 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
 	synchronize_rcu();
 	kfree(old);
 
-	return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED);
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+	return 0;
 }
 
 /* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f4879dad3cd..c87caad383f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -95,17 +95,10 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
-	ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
-
 	local->oper_channel = chan;
 	local->oper_channel_type = NL80211_CHAN_NO_HT;
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
-	sdata->vif.bss_conf.beacon_int = beacon_int;
-	bss_change = BSS_CHANGED_BEACON_INT;
-	bss_change |= ieee80211_reset_erp_info(sdata);
-	ieee80211_bss_info_change_notify(sdata, bss_change);
-
 	sband = local->hw.wiphy->bands[chan->band];
 
 	/* Build IBSS probe response */
@@ -161,8 +154,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 
 	rcu_assign_pointer(ifibss->presp, skb);
 
-	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
-				   IEEE80211_IFCC_BEACON_ENABLED);
+	sdata->vif.bss_conf.beacon_int = beacon_int;
+	bss_change = BSS_CHANGED_BEACON_INT;
+	bss_change |= ieee80211_reset_erp_info(sdata);
+	bss_change |= BSS_CHANGED_BSSID;
+	bss_change |= BSS_CHANGED_BEACON;
+	bss_change |= BSS_CHANGED_BEACON_ENABLED;
+	ieee80211_bss_info_change_notify(sdata, bss_change);
 
 	rates = 0;
 	for (i = 0; i < supp_rates_len; i++) {
@@ -887,7 +885,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	kfree(sdata->u.ibss.ie);
 	skb = sdata->u.ibss.presp;
 	rcu_assign_pointer(sdata->u.ibss.presp, NULL);
-	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED);
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
 	synchronize_rcu();
 	kfree_skb(skb);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e6fd4bcf1c3..d8de1e159ee 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -906,7 +906,6 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 
 
 int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
-int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed);
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
 void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				      u32 changed);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b254879d863..c817c9ef215 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -152,82 +152,6 @@ static void ieee80211_master_set_multicast_list(struct net_device *dev)
 	ieee80211_configure_filter(local);
 }
 
-/* everything else */
-
-int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_if_conf conf;
-
-	if (WARN_ON(!netif_running(sdata->dev)))
-		return 0;
-
-	memset(&conf, 0, sizeof(conf));
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		conf.bssid = sdata->u.mgd.bssid;
-	else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		conf.bssid = sdata->u.ibss.bssid;
-	else if (sdata->vif.type == NL80211_IFTYPE_AP)
-		conf.bssid = sdata->dev->dev_addr;
-	else if (ieee80211_vif_is_mesh(&sdata->vif)) {
-		static const u8 zero[ETH_ALEN] = { 0 };
-		conf.bssid = zero;
-	} else {
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	if (!local->ops->config_interface)
-		return 0;
-
-	switch (sdata->vif.type) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_ADHOC:
-	case NL80211_IFTYPE_MESH_POINT:
-		break;
-	default:
-		/* do not warn to simplify caller in scan.c */
-		changed &= ~IEEE80211_IFCC_BEACON_ENABLED;
-		if (WARN_ON(changed & IEEE80211_IFCC_BEACON))
-			return -EINVAL;
-		changed &= ~IEEE80211_IFCC_BEACON;
-		break;
-	}
-
-	if (changed & IEEE80211_IFCC_BEACON_ENABLED) {
-		if (local->sw_scanning) {
-			conf.enable_beacon = false;
-		} else {
-			/*
-			 * Beacon should be enabled, but AP mode must
-			 * check whether there is a beacon configured.
-			 */
-			switch (sdata->vif.type) {
-			case NL80211_IFTYPE_AP:
-				conf.enable_beacon =
-					!!rcu_dereference(sdata->u.ap.beacon);
-				break;
-			case NL80211_IFTYPE_ADHOC:
-				conf.enable_beacon = !!sdata->u.ibss.presp;
-				break;
-			case NL80211_IFTYPE_MESH_POINT:
-				conf.enable_beacon = true;
-				break;
-			default:
-				/* not reached */
-				WARN_ON(1);
-				break;
-			}
-		}
-	}
-
-	conf.changed = changed;
-
-	return local->ops->config_interface(local_to_hw(local),
-					    &sdata->vif, &conf);
-}
-
 int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 {
 	struct ieee80211_channel *chan;
@@ -297,6 +221,61 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 	if (!changed)
 		return;
 
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid;
+	else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+		sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
+	else if (sdata->vif.type == NL80211_IFTYPE_AP)
+		sdata->vif.bss_conf.bssid = sdata->dev->dev_addr;
+	else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		static const u8 zero[ETH_ALEN] = { 0 };
+		sdata->vif.bss_conf.bssid = zero;
+	} else {
+		WARN_ON(1);
+		return;
+	}
+
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_MESH_POINT:
+		break;
+	default:
+		/* do not warn to simplify caller in scan.c */
+		changed &= ~BSS_CHANGED_BEACON_ENABLED;
+		if (WARN_ON(changed & BSS_CHANGED_BEACON))
+			return;
+		break;
+	}
+
+	if (changed & BSS_CHANGED_BEACON_ENABLED) {
+		if (local->sw_scanning) {
+			sdata->vif.bss_conf.enable_beacon = false;
+		} else {
+			/*
+			 * Beacon should be enabled, but AP mode must
+			 * check whether there is a beacon configured.
+			 */
+			switch (sdata->vif.type) {
+			case NL80211_IFTYPE_AP:
+				sdata->vif.bss_conf.enable_beacon =
+					!!rcu_dereference(sdata->u.ap.beacon);
+				break;
+			case NL80211_IFTYPE_ADHOC:
+				sdata->vif.bss_conf.enable_beacon =
+					!!rcu_dereference(sdata->u.ibss.presp);
+				break;
+			case NL80211_IFTYPE_MESH_POINT:
+				sdata->vif.bss_conf.enable_beacon = true;
+				break;
+			default:
+				/* not reached */
+				WARN_ON(1);
+				break;
+			}
+		}
+	}
+
 	if (local->ops->bss_info_changed)
 		local->ops->bss_info_changed(local_to_hw(local),
 					     &sdata->vif,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9a3e5de0410..9000b01a167 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -417,7 +417,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
 
 	free_plinks = mesh_plink_availables(sdata);
 	if (free_plinks != sdata->u.mesh.accepting_plinks)
-		ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
 
 	ifmsh->housekeeping = false;
 	mod_timer(&ifmsh->housekeeping_timer,
@@ -432,8 +432,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 
 	ifmsh->housekeeping = true;
 	queue_work(local->hw.workqueue, &ifmsh->work);
-	ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
-				   IEEE80211_IFCC_BEACON_ENABLED);
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
+						BSS_CHANGED_BEACON_ENABLED);
 }
 
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bfd571e6f22..c7971196d9d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2289,12 +2289,8 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
 		ifmgd->flags &= ~IEEE80211_STA_BSSID_SET;
 	}
 
-	if (netif_running(sdata->dev)) {
-		if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) {
-			printk(KERN_DEBUG "%s: Failed to config new BSSID to "
-			       "the low-level driver\n", sdata->dev->name);
-		}
-	}
+	if (netif_running(sdata->dev))
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
 
 	return ieee80211_sta_commit(sdata);
 }
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 086d21645cb..04e270abdd2 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -346,8 +346,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 		if (sdata->vif.type == NL80211_IFTYPE_AP ||
 		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
 		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
-			ieee80211_if_config(sdata,
-					    IEEE80211_IFCC_BEACON_ENABLED);
+			ieee80211_bss_info_change_notify(
+				sdata, BSS_CHANGED_BEACON_ENABLED);
 	}
 	mutex_unlock(&local->iflist_mtx);
 
@@ -387,8 +387,8 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 		if (sdata->vif.type == NL80211_IFTYPE_AP ||
 		    sdata->vif.type == NL80211_IFTYPE_ADHOC ||
 		    sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
-			ieee80211_if_config(sdata,
-					    IEEE80211_IFCC_BEACON_ENABLED);
+			ieee80211_bss_info_change_notify(
+				sdata, BSS_CHANGED_BEACON_ENABLED);
 
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 61876eb50b4..2cde9bbfe7d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1063,24 +1063,13 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
 			/* disable beacon change bits */
-			changed &= ~IEEE80211_IFCC_BEACON;
+			changed &= ~(BSS_CHANGED_BEACON |
+				     BSS_CHANGED_BEACON_ENABLED);
 			/* fall through */
 		case NL80211_IFTYPE_ADHOC:
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_MESH_POINT:
-			/*
-			 * Driver's config_interface can fail if rfkill is
-			 * enabled. Accommodate this return code.
-			 * FIXME: When mac80211 has knowledge of rfkill
-			 * state the code below can change back to:
-			 *   WARN(ieee80211_if_config(sdata, changed));
-			 *   ieee80211_bss_info_change_notify(sdata, ~0);
-			 */
-			if (ieee80211_if_config(sdata, changed))
-				printk(KERN_DEBUG "%s: failed to configure interface during resume\n",
-				       sdata->dev->name);
-			else
-				ieee80211_bss_info_change_notify(sdata, ~0);
+			ieee80211_bss_info_change_notify(sdata, changed);
 			break;
 		case NL80211_IFTYPE_WDS:
 			break;
-- 
cgit v1.2.3-70-g09d2


From 5cff20e6c5a6591a79d3b027af222870f52bb550 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 29 Apr 2009 12:26:17 +0200
Subject: mac80211: tell driver when idle

When we aren't doing anything in mac80211, we can turn off
much of the hardware, depending on the driver/hw. Not doing
anything, aka being idle, means:

 * no monitor interfaces
 * no AP/mesh/wds interfaces
 * any station interfaces are in DISABLED state
 * any IBSS interfaces aren't trying to be in a network
 * we aren't trying to scan

By creating a new function that verifies these conditions and calling
it at strategic points where the states of those conditions change,
we can easily make mac80211 tell the driver when we are idle to save
power.

Additionally, this fixes a small quirk where a recalculated powersave
state is passed to the driver even if the hardware is about to stopped
completely.

This patch intentionally doesn't touch radio_enabled because that is
currently implemented to be a soft rfkill which is inappropriate here
when we need to be able to wake up with low latency.

One thing I'm not entirely sure about is this:

  phy0: device no longer idle - in use
  wlan0: direct probe to AP 00:11:24:91:07:4d try 1
  wlan0 direct probe responded
  wlan0: authenticate with AP 00:11:24:91:07:4d
  wlan0: authenticated
> phy0: device now idle
> phy0: device no longer idle - in use
  wlan0: associate with AP 00:11:24:91:07:4d
  wlan0: RX AssocResp from 00:11:24:91:07:4d (capab=0x401 status=0 aid=1)
  wlan0: associated

Is it appropriate to go into idle state for a short time when we have
just authenticated, but not associated yet? This happens only with the
userspace SME, because we cannot really know how long it will wait
before asking us to associate. Would going idle after a short timeout
be more appropriate? We may need to revisit this, depending on what
happens.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  6 ++-
 include/net/mac80211.h                |  8 ++++
 net/mac80211/ibss.c                   |  5 +++
 net/mac80211/ieee80211_i.h            |  2 +
 net/mac80211/iface.c                  | 78 ++++++++++++++++++++++++++++++++++-
 net/mac80211/mlme.c                   | 11 +++++
 net/mac80211/scan.c                   | 17 ++++----
 7 files changed, 113 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 8fa6e6ce570..b1213b6a6b9 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -553,9 +553,11 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 	struct mac80211_hwsim_data *data = hw->priv;
 	struct ieee80211_conf *conf = &hw->conf;
 
-	printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d)\n",
+	printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d idle=%d ps=%d)\n",
 	       wiphy_name(hw->wiphy), __func__,
-	       conf->channel->center_freq, conf->radio_enabled);
+	       conf->channel->center_freq, conf->radio_enabled,
+	       !!(conf->flags & IEEE80211_CONF_IDLE),
+	       !!(conf->flags & IEEE80211_CONF_PS));
 
 	data->channel = conf->channel;
 	data->radio_enabled = conf->radio_enabled;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7806e22f4ac..38dc1cd1027 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -533,10 +533,16 @@ struct ieee80211_rx_status {
  *
  * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported)
  * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only)
+ * @IEEE80211_CONF_IDLE: The device is running, but idle; if the flag is set
+ *	the driver should be prepared to handle configuration requests but
+ *	may turn the device off as much as possible. Typically, this flag will
+ *	be set when an interface is set UP but not associated or scanning, but
+ *	it can also be unset in that case when monitor interfaces are active.
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_RADIOTAP		= (1<<0),
 	IEEE80211_CONF_PS		= (1<<1),
+	IEEE80211_CONF_IDLE		= (1<<2),
 };
 
 
@@ -551,6 +557,7 @@ enum ieee80211_conf_flags {
  * @IEEE80211_CONF_CHANGE_POWER: the TX power changed
  * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed
  * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed
+ * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed
  */
 enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
@@ -561,6 +568,7 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_POWER		= BIT(5),
 	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
 	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
+	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
 };
 
 static inline __deprecated enum ieee80211_conf_changed
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a8e23232267..aa537681f87 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -862,6 +862,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 
 	sdata->u.ibss.ssid_len = params->ssid_len;
 
+	ieee80211_recalc_idle(sdata->local);
+
 	set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
 	queue_work(sdata->local->hw.workqueue, &sdata->u.ibss.work);
 
@@ -889,6 +891,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
 	skb_queue_purge(&sdata->u.ibss.skb_queue);
 	memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
+	sdata->u.ibss.ssid_len = 0;
+
+	ieee80211_recalc_idle(sdata->local);
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 236ea098bb6..03e0d22603c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -985,6 +985,8 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 			     enum nl80211_iftype type);
 void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
 void ieee80211_remove_interfaces(struct ieee80211_local *local);
+u32 __ieee80211_recalc_idle(struct ieee80211_local *local);
+void ieee80211_recalc_idle(struct ieee80211_local *local);
 
 /* tx handling */
 void ieee80211_clear_tx_pending(struct ieee80211_local *local);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 256fa19e14e..8b6daf0219f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -301,6 +301,8 @@ static int ieee80211_open(struct net_device *dev)
 	if (sdata->flags & IEEE80211_SDATA_PROMISC)
 		atomic_inc(&local->iff_promiscs);
 
+	hw_reconf_flags |= __ieee80211_recalc_idle(local);
+
 	local->open_count++;
 	if (hw_reconf_flags) {
 		ieee80211_hw_config(local, hw_reconf_flags);
@@ -548,6 +550,10 @@ static int ieee80211_stop(struct net_device *dev)
 
 	sdata->bss = NULL;
 
+	hw_reconf_flags |= __ieee80211_recalc_idle(local);
+
+	ieee80211_recalc_ps(local, -1);
+
 	if (local->open_count == 0) {
 		if (netif_running(local->mdev))
 			dev_close(local->mdev);
@@ -565,8 +571,6 @@ static int ieee80211_stop(struct net_device *dev)
 		hw_reconf_flags = 0;
 	}
 
-	ieee80211_recalc_ps(local, -1);
-
 	/* do after stop to avoid reconfiguring when we stop anyway */
 	if (hw_reconf_flags)
 		ieee80211_hw_config(local, hw_reconf_flags);
@@ -892,3 +896,73 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
 		unregister_netdevice(sdata->dev);
 	}
 }
+
+static u32 ieee80211_idle_off(struct ieee80211_local *local,
+			      const char *reason)
+{
+	if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE))
+		return 0;
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	printk(KERN_DEBUG "%s: device no longer idle - %s\n",
+	       wiphy_name(local->hw.wiphy), reason);
+#endif
+
+	local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
+	return IEEE80211_CONF_CHANGE_IDLE;
+}
+
+static u32 ieee80211_idle_on(struct ieee80211_local *local)
+{
+	if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
+		return 0;
+
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	printk(KERN_DEBUG "%s: device now idle\n",
+	       wiphy_name(local->hw.wiphy));
+#endif
+
+	local->hw.conf.flags |= IEEE80211_CONF_IDLE;
+	return IEEE80211_CONF_CHANGE_IDLE;
+}
+
+u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+	int count = 0;
+
+	if (local->hw_scanning || local->sw_scanning)
+		return ieee80211_idle_off(local, "scanning");
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (!netif_running(sdata->dev))
+			continue;
+		/* do not count disabled managed interfaces */
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    sdata->u.mgd.state == IEEE80211_STA_MLME_DISABLED)
+			continue;
+		/* do not count unused IBSS interfaces */
+		if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
+		    !sdata->u.ibss.ssid_len)
+			continue;
+		/* count everything else */
+		count++;
+	}
+
+	if (!count)
+		return ieee80211_idle_on(local);
+	else
+		return ieee80211_idle_off(local, "in use");
+
+	return 0;
+}
+
+void ieee80211_recalc_idle(struct ieee80211_local *local)
+{
+	u32 chg;
+
+	mutex_lock(&local->iflist_mtx);
+	chg = __ieee80211_recalc_idle(local);
+	mutex_unlock(&local->iflist_mtx);
+	ieee80211_hw_config(local, chg);
+}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2ded4766d01..5509c5aa6be 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -890,6 +890,7 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata)
 		printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+		ieee80211_recalc_idle(local);
 		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid);
 
 		/*
@@ -938,6 +939,7 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
 		       " timed out\n",
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+		ieee80211_recalc_idle(local);
 		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid);
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
 				sdata->local->hw.conf.channel->center_freq,
@@ -1041,6 +1043,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_unlock();
 
+	ieee80211_recalc_idle(local);
+
 	/* channel(_type) changes are handled by ieee80211_hw_config */
 	local->oper_channel_type = NL80211_CHAN_NO_HT;
 
@@ -1121,6 +1125,7 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata)
 		       " timed out\n",
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+		ieee80211_recalc_idle(local);
 		cfg80211_send_assoc_timeout(sdata->dev, ifmgd->bssid);
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
 				sdata->local->hw.conf.channel->center_freq,
@@ -1141,6 +1146,7 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata)
 		printk(KERN_DEBUG "%s: mismatch in privacy configuration and "
 		       "mixed-cell disabled - abort association\n", sdata->dev->name);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+		ieee80211_recalc_idle(local);
 		return;
 	}
 
@@ -1279,6 +1285,7 @@ static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata)
 	if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
 		/* Wait for SME to request association */
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+		ieee80211_recalc_idle(sdata->local);
 	} else
 		ieee80211_associate(sdata);
 }
@@ -1515,6 +1522,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
 			/* Wait for SME to decide what to do next */
 			ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+			ieee80211_recalc_idle(local);
 		}
 		return;
 	}
@@ -2083,6 +2091,7 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata)
 		} else {
 			ifmgd->assoc_scan_tries = 0;
 			ifmgd->state = IEEE80211_STA_MLME_DISABLED;
+			ieee80211_recalc_idle(local);
 		}
 	}
 	return -1;
@@ -2126,6 +2135,8 @@ static void ieee80211_sta_work(struct work_struct *work)
 	} else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request))
 		return;
 
+	ieee80211_recalc_idle(local);
+
 	switch (ifmgd->state) {
 	case IEEE80211_STA_MLME_DISABLED:
 		break;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 127bd54e0e3..c99ef8d04d3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -302,17 +302,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	/* we only have to protect scan_req and hw/sw scan */
 	mutex_unlock(&local->scan_mtx);
 
-	if (was_hw_scan) {
-		/*
-		 * Somebody might have requested channel change during scan
-		 * that we won't have acted upon, try now. ieee80211_hw_config
-		 * will set the flag based on actual changes.
-		 */
-		ieee80211_hw_config(local, 0);
-		goto done;
-	}
-
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (was_hw_scan)
+		goto done;
 
 	netif_tx_lock_bh(local->mdev);
 	netif_addr_lock(local->mdev);
@@ -351,6 +343,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	mutex_unlock(&local->iflist_mtx);
 
  done:
+	ieee80211_recalc_idle(local);
 	ieee80211_mlme_notify_scan_completed(local);
 	ieee80211_ibss_notify_scan_completed(local);
 	ieee80211_mesh_notify_scan_completed(local);
@@ -471,6 +464,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	 * dependency, so that the scan completed calls
 	 * have more locking freedom.
 	 */
+
+	ieee80211_recalc_idle(local);
 	mutex_unlock(&local->scan_mtx);
 
 	if (local->ops->hw_scan)
@@ -487,6 +482,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 		} else
 			local->sw_scanning = false;
 
+		ieee80211_recalc_idle(local);
+
 		local->scan_req = NULL;
 		local->scan_sdata = NULL;
 	}
-- 
cgit v1.2.3-70-g09d2


From 2f01a1f58889fbfeb68b1bc1b52e4197f3333490 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Wed, 29 Apr 2009 23:33:31 +0300
Subject: wl12xx: add driver

wl12xx is a driver for TI wl1251 802.11 chipset designed for embedded
devices, supporting both SDIO and SPI busses. Currently the driver
supports only SPI. Adding support 1253 (the 5 GHz version) should be
relatively easy. More information here:

http://focus.ti.com/general/docs/wtbu/wtbuproductcontent.tsp?contentId=4711&navigationId=12494&templateId=6123

(Collapsed original sequence of pre-merge patches into single commit for
initial merge. -- JWL)

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/Kconfig               |    1 +
 drivers/net/wireless/Makefile              |    2 +
 drivers/net/wireless/wl12xx/Kconfig        |   11 +
 drivers/net/wireless/wl12xx/Makefile       |    4 +
 drivers/net/wireless/wl12xx/acx.c          |  689 ++++++++++++++
 drivers/net/wireless/wl12xx/acx.h          | 1245 +++++++++++++++++++++++++
 drivers/net/wireless/wl12xx/boot.c         |  295 ++++++
 drivers/net/wireless/wl12xx/boot.h         |   40 +
 drivers/net/wireless/wl12xx/cmd.c          |  353 ++++++++
 drivers/net/wireless/wl12xx/cmd.h          |  265 ++++++
 drivers/net/wireless/wl12xx/debugfs.c      |  508 +++++++++++
 drivers/net/wireless/wl12xx/debugfs.h      |   33 +
 drivers/net/wireless/wl12xx/event.c        |  127 +++
 drivers/net/wireless/wl12xx/event.h        |  121 +++
 drivers/net/wireless/wl12xx/init.c         |  200 ++++
 drivers/net/wireless/wl12xx/init.h         |   40 +
 drivers/net/wireless/wl12xx/main.c         | 1358 ++++++++++++++++++++++++++++
 drivers/net/wireless/wl12xx/ps.c           |  151 ++++
 drivers/net/wireless/wl12xx/ps.h           |   36 +
 drivers/net/wireless/wl12xx/reg.h          |  745 +++++++++++++++
 drivers/net/wireless/wl12xx/rx.c           |  208 +++++
 drivers/net/wireless/wl12xx/rx.h           |  122 +++
 drivers/net/wireless/wl12xx/spi.c          |  358 ++++++++
 drivers/net/wireless/wl12xx/spi.h          |  109 +++
 drivers/net/wireless/wl12xx/tx.c           |  557 ++++++++++++
 drivers/net/wireless/wl12xx/tx.h           |  215 +++++
 drivers/net/wireless/wl12xx/wl1251.c       |  709 +++++++++++++++
 drivers/net/wireless/wl12xx/wl1251.h       |  165 ++++
 drivers/net/wireless/wl12xx/wl12xx.h       |  409 +++++++++
 drivers/net/wireless/wl12xx/wl12xx_80211.h |  156 ++++
 include/linux/spi/wl12xx.h                 |   31 +
 31 files changed, 9263 insertions(+)
 create mode 100644 drivers/net/wireless/wl12xx/Kconfig
 create mode 100644 drivers/net/wireless/wl12xx/Makefile
 create mode 100644 drivers/net/wireless/wl12xx/acx.c
 create mode 100644 drivers/net/wireless/wl12xx/acx.h
 create mode 100644 drivers/net/wireless/wl12xx/boot.c
 create mode 100644 drivers/net/wireless/wl12xx/boot.h
 create mode 100644 drivers/net/wireless/wl12xx/cmd.c
 create mode 100644 drivers/net/wireless/wl12xx/cmd.h
 create mode 100644 drivers/net/wireless/wl12xx/debugfs.c
 create mode 100644 drivers/net/wireless/wl12xx/debugfs.h
 create mode 100644 drivers/net/wireless/wl12xx/event.c
 create mode 100644 drivers/net/wireless/wl12xx/event.h
 create mode 100644 drivers/net/wireless/wl12xx/init.c
 create mode 100644 drivers/net/wireless/wl12xx/init.h
 create mode 100644 drivers/net/wireless/wl12xx/main.c
 create mode 100644 drivers/net/wireless/wl12xx/ps.c
 create mode 100644 drivers/net/wireless/wl12xx/ps.h
 create mode 100644 drivers/net/wireless/wl12xx/reg.h
 create mode 100644 drivers/net/wireless/wl12xx/rx.c
 create mode 100644 drivers/net/wireless/wl12xx/rx.h
 create mode 100644 drivers/net/wireless/wl12xx/spi.c
 create mode 100644 drivers/net/wireless/wl12xx/spi.h
 create mode 100644 drivers/net/wireless/wl12xx/tx.c
 create mode 100644 drivers/net/wireless/wl12xx/tx.h
 create mode 100644 drivers/net/wireless/wl12xx/wl1251.c
 create mode 100644 drivers/net/wireless/wl12xx/wl1251.h
 create mode 100644 drivers/net/wireless/wl12xx/wl12xx.h
 create mode 100644 drivers/net/wireless/wl12xx/wl12xx_80211.h
 create mode 100644 include/linux/spi/wl12xx.h

(limited to 'include')

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 2d8434f409b..91be3e7bf13 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -500,5 +500,6 @@ source "drivers/net/wireless/b43legacy/Kconfig"
 source "drivers/net/wireless/zd1211rw/Kconfig"
 source "drivers/net/wireless/rt2x00/Kconfig"
 source "drivers/net/wireless/orinoco/Kconfig"
+source "drivers/net/wireless/wl12xx/Kconfig"
 
 endmenu
diff --git a/drivers/net/wireless/Makefile b/drivers/net/wireless/Makefile
index 0625e91b599..f2b1861e6bc 100644
--- a/drivers/net/wireless/Makefile
+++ b/drivers/net/wireless/Makefile
@@ -58,3 +58,5 @@ obj-$(CONFIG_P54_COMMON)	+= p54/
 obj-$(CONFIG_ATH_COMMON)	+= ath/
 
 obj-$(CONFIG_MAC80211_HWSIM)	+= mac80211_hwsim.o
+
+obj-$(CONFIG_WL12XX)	+= wl12xx/
diff --git a/drivers/net/wireless/wl12xx/Kconfig b/drivers/net/wireless/wl12xx/Kconfig
new file mode 100644
index 00000000000..20a9633569f
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/Kconfig
@@ -0,0 +1,11 @@
+config WL12XX
+	tristate "TI wl1251/wl1271 support"
+	depends on MAC80211 && WLAN_80211 && SPI_MASTER && EXPERIMENTAL
+	select FW_LOADER
+	select CRC7
+	---help---
+	  This module adds support for wireless adapters based on
+	  TI wl1251/wl1271 chipsets.
+
+	  If you choose to build a module, it'll be called wl12xx. Say N if
+	  unsure.
diff --git a/drivers/net/wireless/wl12xx/Makefile b/drivers/net/wireless/wl12xx/Makefile
new file mode 100644
index 00000000000..d43de27dc54
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/Makefile
@@ -0,0 +1,4 @@
+wl12xx-objs		= main.o spi.o event.o tx.o rx.o \
+			  ps.o cmd.o acx.o boot.o init.o wl1251.o \
+			  debugfs.o
+obj-$(CONFIG_WL12XX)	+= wl12xx.o
diff --git a/drivers/net/wireless/wl12xx/acx.c b/drivers/net/wireless/wl12xx/acx.c
new file mode 100644
index 00000000000..1cfd458ad5a
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/acx.c
@@ -0,0 +1,689 @@
+#include "acx.h"
+
+#include <linux/module.h>
+#include <linux/crc7.h>
+#include <linux/spi/spi.h>
+
+#include "wl12xx.h"
+#include "wl12xx_80211.h"
+#include "reg.h"
+#include "spi.h"
+#include "ps.h"
+
+int wl12xx_acx_frame_rates(struct wl12xx *wl, u8 ctrl_rate, u8 ctrl_mod,
+			   u8 mgt_rate, u8 mgt_mod)
+{
+	int ret;
+	struct acx_fw_gen_frame_rates rates;
+
+	wl12xx_debug(DEBUG_ACX, "acx frame rates");
+
+	rates.header.id = ACX_FW_GEN_FRAME_RATES;
+	rates.header.len = sizeof(struct acx_fw_gen_frame_rates) -
+		sizeof(struct acx_header);
+
+	rates.tx_ctrl_frame_rate = ctrl_rate;
+	rates.tx_ctrl_frame_mod = ctrl_mod;
+	rates.tx_mgt_frame_rate = mgt_rate;
+	rates.tx_mgt_frame_mod = mgt_mod;
+
+	ret = wl12xx_cmd_configure(wl, &rates, sizeof(rates));
+	if (ret < 0) {
+		wl12xx_error("Failed to set FW rates and modulation");
+		return ret;
+	}
+
+	return 0;
+}
+
+
+int wl12xx_acx_station_id(struct wl12xx *wl)
+{
+	int ret, i;
+	struct dot11_station_id mac;
+
+	wl12xx_debug(DEBUG_ACX, "acx dot11_station_id");
+
+	mac.header.id = DOT11_STATION_ID;
+	mac.header.len = sizeof(mac) - sizeof(struct acx_header);
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac.mac[i] = wl->mac_addr[ETH_ALEN - 1 - i];
+
+	ret = wl12xx_cmd_configure(wl, &mac, sizeof(mac));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_acx_default_key(struct wl12xx *wl, u8 key_id)
+{
+	struct acx_dot11_default_key default_key;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx dot11_default_key (%d)", key_id);
+
+	default_key.header.id = DOT11_DEFAULT_KEY;
+	default_key.header.len = sizeof(default_key) -
+		sizeof(struct acx_header);
+
+	default_key.id = key_id;
+
+	ret = wl12xx_cmd_configure(wl, &default_key, sizeof(default_key));
+	if (ret < 0) {
+		wl12xx_error("Couldnt set default key");
+		return ret;
+	}
+
+	wl->default_key = key_id;
+
+	return 0;
+}
+
+int wl12xx_acx_wake_up_conditions(struct wl12xx *wl, u8 listen_interval)
+{
+	struct acx_wake_up_condition wake_up;
+
+	wl12xx_debug(DEBUG_ACX, "acx wake up conditions");
+
+	wake_up.header.id = ACX_WAKE_UP_CONDITIONS;
+	wake_up.header.len = sizeof(wake_up) - sizeof(struct acx_header);
+
+	wake_up.wake_up_event = WAKE_UP_EVENT_DTIM_BITMAP;
+	wake_up.listen_interval = listen_interval;
+
+	return wl12xx_cmd_configure(wl, &wake_up, sizeof(wake_up));
+}
+
+int wl12xx_acx_sleep_auth(struct wl12xx *wl, u8 sleep_auth)
+{
+	int ret;
+	struct acx_sleep_auth auth;
+
+	wl12xx_debug(DEBUG_ACX, "acx sleep auth");
+
+	auth.header.id = ACX_SLEEP_AUTH;
+	auth.header.len = sizeof(auth) - sizeof(struct acx_header);
+
+	auth.sleep_auth = sleep_auth;
+
+	ret = wl12xx_cmd_configure(wl, &auth, sizeof(auth));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_acx_fw_version(struct wl12xx *wl, char *buf, size_t len)
+{
+	struct wl12xx_command cmd;
+	struct acx_revision *rev;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx fw rev");
+
+	memset(&cmd, 0, sizeof(cmd));
+
+	ret = wl12xx_cmd_interrogate(wl, ACX_FW_REV, sizeof(*rev), &cmd);
+	if (ret < 0) {
+		wl12xx_warning("ACX_FW_REV interrogate failed");
+		return ret;
+	}
+
+	rev = (struct acx_revision *) &cmd.parameters;
+
+	/* be careful with the buffer sizes */
+	strncpy(buf, rev->fw_version, min(len, sizeof(rev->fw_version)));
+
+	/*
+	 * if the firmware version string is exactly
+	 * sizeof(rev->fw_version) long or fw_len is less than
+	 * sizeof(rev->fw_version) it won't be null terminated
+	 */
+	buf[min(len, sizeof(rev->fw_version)) - 1] = '\0';
+
+	return 0;
+}
+
+int wl12xx_acx_tx_power(struct wl12xx *wl, int power)
+{
+	struct acx_current_tx_power ie;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx dot11_cur_tx_pwr");
+
+	if (power < 0 || power > 25)
+		return -EINVAL;
+
+	memset(&ie, 0, sizeof(ie));
+
+	ie.header.id = DOT11_CUR_TX_PWR;
+	ie.header.len = sizeof(ie) - sizeof(struct acx_header);
+	ie.current_tx_power = power * 10;
+
+	ret = wl12xx_cmd_configure(wl, &ie, sizeof(ie));
+	if (ret < 0) {
+		wl12xx_warning("configure of tx power failed: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_feature_cfg(struct wl12xx *wl)
+{
+	struct acx_feature_config feature;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx feature cfg");
+
+	memset(&feature, 0, sizeof(feature));
+
+	feature.header.id = ACX_FEATURE_CFG;
+	feature.header.len = sizeof(feature) - sizeof(struct acx_header);
+
+	/* DF_ENCRYPTION_DISABLE and DF_SNIFF_MODE_ENABLE are disabled */
+	feature.data_flow_options = 0;
+	feature.options = 0;
+
+	ret = wl12xx_cmd_configure(wl, &feature, sizeof(feature));
+	if (ret < 0)
+		wl12xx_error("Couldnt set HW encryption");
+
+	return ret;
+}
+
+int wl12xx_acx_mem_map(struct wl12xx *wl, void *mem_map, size_t len)
+{
+	struct wl12xx_command cmd;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx mem map");
+
+	ret = wl12xx_cmd_interrogate(wl, ACX_MEM_MAP, len, &cmd);
+	if (ret < 0)
+		return ret;
+	else if (cmd.status != CMD_STATUS_SUCCESS)
+		return -EIO;
+
+	memcpy(mem_map, &cmd.parameters, len);
+
+	return 0;
+}
+
+int wl12xx_acx_data_path_params(struct wl12xx *wl,
+				struct acx_data_path_params_resp *data_path)
+{
+	struct acx_data_path_params params;
+	struct wl12xx_command cmd;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx data path params");
+
+	params.rx_packet_ring_chunk_size = DP_RX_PACKET_RING_CHUNK_SIZE;
+	params.tx_packet_ring_chunk_size = DP_TX_PACKET_RING_CHUNK_SIZE;
+
+	params.rx_packet_ring_chunk_num = DP_RX_PACKET_RING_CHUNK_NUM;
+	params.tx_packet_ring_chunk_num = DP_TX_PACKET_RING_CHUNK_NUM;
+
+	params.tx_complete_threshold = 1;
+
+	params.tx_complete_ring_depth = FW_TX_CMPLT_BLOCK_SIZE;
+
+	params.tx_complete_timeout = DP_TX_COMPLETE_TIME_OUT;
+
+	params.header.id = ACX_DATA_PATH_PARAMS;
+	params.header.len = sizeof(params) - sizeof(struct acx_header);
+
+	ret = wl12xx_cmd_configure(wl, &params, sizeof(params));
+	if (ret < 0)
+		return ret;
+
+
+	ret = wl12xx_cmd_interrogate(wl, ACX_DATA_PATH_PARAMS,
+				     sizeof(struct acx_data_path_params_resp),
+				     &cmd);
+
+	if (ret < 0) {
+		wl12xx_warning("failed to read data path parameters: %d", ret);
+		return ret;
+	} else if (cmd.status != CMD_STATUS_SUCCESS) {
+		wl12xx_warning("data path parameter acx status failed");
+		return -EIO;
+	}
+
+	memcpy(data_path, &cmd.parameters, sizeof(*data_path));
+
+	return 0;
+}
+
+int wl12xx_acx_rx_msdu_life_time(struct wl12xx *wl, u32 life_time)
+{
+	struct rx_msdu_lifetime msdu_lifetime;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx rx msdu life time");
+
+	msdu_lifetime.header.id = DOT11_RX_MSDU_LIFE_TIME;
+	msdu_lifetime.header.len = sizeof(msdu_lifetime) -
+		sizeof(struct acx_header);
+	msdu_lifetime.lifetime = life_time;
+
+	ret = wl12xx_cmd_configure(wl, &msdu_lifetime, sizeof(msdu_lifetime));
+	if (ret < 0) {
+		wl12xx_warning("failed to set rx msdu life time: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_rx_config(struct wl12xx *wl, u32 config, u32 filter)
+{
+	struct acx_rx_config rx_config;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx rx config");
+
+	rx_config.header.id = ACX_RX_CFG;
+	rx_config.header.len = sizeof(rx_config) - sizeof(struct acx_header);
+	rx_config.config_options = config;
+	rx_config.filter_options = filter;
+
+	ret = wl12xx_cmd_configure(wl, &rx_config, sizeof(rx_config));
+	if (ret < 0) {
+		wl12xx_warning("failed to set rx config: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_pd_threshold(struct wl12xx *wl)
+{
+	struct acx_packet_detection packet_detection;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx data pd threshold");
+
+	/* FIXME: threshold value not set */
+	packet_detection.header.id = ACX_PD_THRESHOLD;
+	packet_detection.header.len = sizeof(packet_detection) -
+		sizeof(struct acx_header);
+
+	ret = wl12xx_cmd_configure(wl, &packet_detection,
+				   sizeof(packet_detection));
+	if (ret < 0) {
+		wl12xx_warning("failed to set pd threshold: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_slot(struct wl12xx *wl, enum acx_slot_type slot_time)
+{
+	struct acx_slot slot;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx slot");
+
+	slot.header.id = ACX_SLOT;
+	slot.header.len = sizeof(slot) - sizeof(struct acx_header);
+
+	slot.wone_index = STATION_WONE_INDEX;
+	slot.slot_time = slot_time;
+
+	ret = wl12xx_cmd_configure(wl, &slot, sizeof(slot));
+	if (ret < 0) {
+		wl12xx_warning("failed to set slot time: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_group_address_tbl(struct wl12xx *wl)
+{
+	struct multicast_grp_addr_start multicast;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx group address tbl");
+
+	/* MAC filtering */
+	multicast.header.id = DOT11_GROUP_ADDRESS_TBL;
+	multicast.header.len = sizeof(multicast) - sizeof(struct acx_header);
+
+	multicast.enabled = 0;
+	multicast.num_groups = 0;
+	memset(multicast.mac_table, 0, ADDRESS_GROUP_MAX_LEN);
+
+	ret = wl12xx_cmd_configure(wl, &multicast, sizeof(multicast));
+	if (ret < 0) {
+		wl12xx_warning("failed to set group addr table: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_service_period_timeout(struct wl12xx *wl)
+{
+	struct acx_rx_timeout rx_timeout;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx service period timeout");
+
+	/* RX timeout */
+	rx_timeout.header.id = ACX_SERVICE_PERIOD_TIMEOUT;
+	rx_timeout.header.len = sizeof(rx_timeout) - sizeof(struct acx_header);
+
+	rx_timeout.ps_poll_timeout = RX_TIMEOUT_PS_POLL_DEF;
+	rx_timeout.upsd_timeout = RX_TIMEOUT_UPSD_DEF;
+
+	ret = wl12xx_cmd_configure(wl, &rx_timeout, sizeof(rx_timeout));
+	if (ret < 0) {
+		wl12xx_warning("failed to set service period timeout: %d",
+			       ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_rts_threshold(struct wl12xx *wl, u16 rts_threshold)
+{
+	struct acx_rts_threshold rts;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx rts threshold");
+
+	rts.header.id = DOT11_RTS_THRESHOLD;
+	rts.header.len = sizeof(rts) - sizeof(struct acx_header);
+
+	rts.threshold = rts_threshold;
+
+	ret = wl12xx_cmd_configure(wl, &rts, sizeof(rts));
+	if (ret < 0) {
+		wl12xx_warning("failed to set rts threshold: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_beacon_filter_opt(struct wl12xx *wl)
+{
+	struct acx_beacon_filter_option beacon_filter;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx beacon filter opt");
+
+	beacon_filter.header.id = ACX_BEACON_FILTER_OPT;
+	beacon_filter.header.len = sizeof(beacon_filter) -
+		sizeof(struct acx_header);
+
+	beacon_filter.enable = 0;
+	beacon_filter.max_num_beacons = 0;
+
+	ret = wl12xx_cmd_configure(wl, &beacon_filter, sizeof(beacon_filter));
+	if (ret < 0) {
+		wl12xx_warning("failed to set beacon filter opt: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_beacon_filter_table(struct wl12xx *wl)
+{
+	struct acx_beacon_filter_ie_table ie_table;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx beacon filter table");
+
+	ie_table.header.id = ACX_BEACON_FILTER_TABLE;
+	ie_table.header.len = sizeof(ie_table) - sizeof(struct acx_header);
+
+	ie_table.num_ie = 0;
+	memset(ie_table.table, 0, BEACON_FILTER_TABLE_MAX_SIZE);
+
+	ret = wl12xx_cmd_configure(wl, &ie_table, sizeof(ie_table));
+	if (ret < 0) {
+		wl12xx_warning("failed to set beacon filter table: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_sg_enable(struct wl12xx *wl)
+{
+	struct acx_bt_wlan_coex pta;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx sg enable");
+
+	pta.header.id = ACX_SG_ENABLE;
+	pta.header.len = sizeof(pta) - sizeof(struct acx_header);
+
+	pta.enable = SG_ENABLE;
+
+	ret = wl12xx_cmd_configure(wl, &pta, sizeof(pta));
+	if (ret < 0) {
+		wl12xx_warning("failed to set softgemini enable: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_sg_cfg(struct wl12xx *wl)
+{
+	struct acx_bt_wlan_coex_param param;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx sg cfg");
+
+	/* BT-WLAN coext parameters */
+	param.header.id = ACX_SG_CFG;
+	param.header.len = sizeof(param) - sizeof(struct acx_header);
+
+	param.min_rate = RATE_INDEX_24MBPS;
+	param.bt_hp_max_time = PTA_BT_HP_MAXTIME_DEF;
+	param.wlan_hp_max_time = PTA_WLAN_HP_MAX_TIME_DEF;
+	param.sense_disable_timer = PTA_SENSE_DISABLE_TIMER_DEF;
+	param.rx_time_bt_hp = PTA_PROTECTIVE_RX_TIME_DEF;
+	param.tx_time_bt_hp = PTA_PROTECTIVE_TX_TIME_DEF;
+	param.rx_time_bt_hp_fast = PTA_PROTECTIVE_RX_TIME_FAST_DEF;
+	param.tx_time_bt_hp_fast = PTA_PROTECTIVE_TX_TIME_FAST_DEF;
+	param.wlan_cycle_fast = PTA_CYCLE_TIME_FAST_DEF;
+	param.bt_anti_starvation_period = PTA_ANTI_STARVE_PERIOD_DEF;
+	param.next_bt_lp_packet = PTA_TIMEOUT_NEXT_BT_LP_PACKET_DEF;
+	param.wake_up_beacon = PTA_TIME_BEFORE_BEACON_DEF;
+	param.hp_dm_max_guard_time = PTA_HPDM_MAX_TIME_DEF;
+	param.next_wlan_packet = PTA_TIME_OUT_NEXT_WLAN_DEF;
+	param.antenna_type = PTA_ANTENNA_TYPE_DEF;
+	param.signal_type = PTA_SIGNALING_TYPE_DEF;
+	param.afh_leverage_on = PTA_AFH_LEVERAGE_ON_DEF;
+	param.quiet_cycle_num = PTA_NUMBER_QUIET_CYCLE_DEF;
+	param.max_cts = PTA_MAX_NUM_CTS_DEF;
+	param.wlan_packets_num = PTA_NUMBER_OF_WLAN_PACKETS_DEF;
+	param.bt_packets_num = PTA_NUMBER_OF_BT_PACKETS_DEF;
+	param.missed_rx_avalanche = PTA_RX_FOR_AVALANCHE_DEF;
+	param.wlan_elp_hp = PTA_ELP_HP_DEF;
+	param.bt_anti_starvation_cycles = PTA_ANTI_STARVE_NUM_CYCLE_DEF;
+	param.ack_mode_dual_ant = PTA_ACK_MODE_DEF;
+	param.pa_sd_enable = PTA_ALLOW_PA_SD_DEF;
+	param.pta_auto_mode_enable = PTA_AUTO_MODE_NO_CTS_DEF;
+	param.bt_hp_respected_num = PTA_BT_HP_RESPECTED_DEF;
+
+	ret = wl12xx_cmd_configure(wl, &param, sizeof(param));
+	if (ret < 0) {
+		wl12xx_warning("failed to set sg config: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_cca_threshold(struct wl12xx *wl)
+{
+	struct acx_energy_detection detection;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx cca threshold");
+
+	detection.header.id = ACX_CCA_THRESHOLD;
+	detection.header.len = sizeof(detection) - sizeof(struct acx_header);
+
+	detection.rx_cca_threshold = CCA_THRSH_DISABLE_ENERGY_D;
+	detection.tx_energy_detection = 0;
+
+	ret = wl12xx_cmd_configure(wl, &detection, sizeof(detection));
+	if (ret < 0) {
+		wl12xx_warning("failed to set cca threshold: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_bcn_dtim_options(struct wl12xx *wl)
+{
+	struct acx_beacon_broadcast bb;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx bcn dtim options");
+
+	bb.header.id = ACX_BCN_DTIM_OPTIONS;
+	bb.header.len = sizeof(bb) - sizeof(struct acx_header);
+
+	bb.beacon_rx_timeout = BCN_RX_TIMEOUT_DEF_VALUE;
+	bb.broadcast_timeout = BROADCAST_RX_TIMEOUT_DEF_VALUE;
+	bb.rx_broadcast_in_ps = RX_BROADCAST_IN_PS_DEF_VALUE;
+	bb.ps_poll_threshold = CONSECUTIVE_PS_POLL_FAILURE_DEF;
+
+	ret = wl12xx_cmd_configure(wl, &bb, sizeof(bb));
+	if (ret < 0) {
+		wl12xx_warning("failed to set rx config: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_aid(struct wl12xx *wl, u16 aid)
+{
+	struct acx_aid acx_aid;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx aid");
+
+	acx_aid.header.id = ACX_AID;
+	acx_aid.header.len = sizeof(acx_aid) - sizeof(struct acx_header);
+
+	acx_aid.aid = aid;
+
+	ret = wl12xx_cmd_configure(wl, &acx_aid, sizeof(acx_aid));
+	if (ret < 0) {
+		wl12xx_warning("failed to set aid: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_event_mbox_mask(struct wl12xx *wl, u32 event_mask)
+{
+	struct acx_event_mask mask;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx event mbox mask");
+
+	mask.header.id = ACX_EVENT_MBOX_MASK;
+	mask.header.len = sizeof(mask) - sizeof(struct acx_header);
+
+	/* high event mask is unused */
+	mask.high_event_mask = 0xffffffff;
+
+	mask.event_mask = event_mask;
+
+	ret = wl12xx_cmd_configure(wl, &mask, sizeof(mask));
+	if (ret < 0) {
+		wl12xx_warning("failed to set aid: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_acx_set_preamble(struct wl12xx *wl, enum acx_preamble_type preamble)
+{
+	struct acx_preamble ie;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx_set_preamble");
+
+	memset(&ie, 0, sizeof(ie));
+
+	ie.header.id = ACX_PREAMBLE_TYPE;
+	ie.header.len = sizeof(ie) - sizeof(struct acx_header);
+	ie.preamble = preamble;
+	ret = wl12xx_cmd_configure(wl, &ie, sizeof(ie));
+	if (ret < 0) {
+		wl12xx_warning("Setting of preamble failed: %d", ret);
+		return ret;
+	}
+	return 0;
+}
+
+int wl12xx_acx_cts_protect(struct wl12xx *wl,
+			   enum acx_ctsprotect_type ctsprotect)
+{
+	struct acx_ctsprotect ie;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx_set_ctsprotect");
+
+	memset(&ie, 0, sizeof(ie));
+
+	ie.header.id = ACX_CTS_PROTECTION;
+	ie.header.len = sizeof(ie) - sizeof(struct acx_header);
+	ie.ctsprotect = ctsprotect;
+	ret = wl12xx_cmd_configure(wl, &ie, sizeof(ie));
+	if (ret < 0) {
+		wl12xx_warning("Setting of ctsprotect failed: %d", ret);
+		return ret;
+	}
+	return 0;
+}
+
+int wl12xx_acx_statistics(struct wl12xx *wl, struct acx_statistics *stats)
+{
+	struct wl12xx_command *answer;
+	int ret;
+
+	wl12xx_debug(DEBUG_ACX, "acx statistics");
+
+	answer = kmalloc(sizeof(*answer), GFP_KERNEL);
+	if (!answer) {
+		wl12xx_warning("could not allocate memory for acx statistics");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = wl12xx_cmd_interrogate(wl, ACX_STATISTICS, sizeof(*answer),
+				     answer);
+	if (ret < 0) {
+		wl12xx_warning("acx statistics failed: %d", ret);
+		goto out;
+	}
+
+	memcpy(stats, answer->parameters, sizeof(*stats));
+
+out:
+	kfree(answer);
+	return ret;
+}
diff --git a/drivers/net/wireless/wl12xx/acx.h b/drivers/net/wireless/wl12xx/acx.h
new file mode 100644
index 00000000000..fb2d2340993
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/acx.h
@@ -0,0 +1,1245 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_ACX_H__
+#define __WL12XX_ACX_H__
+
+#include "wl12xx.h"
+
+/* Target's information element */
+struct acx_header {
+	u16 id;
+	u16 len;
+};
+
+struct acx_error_counter {
+	struct acx_header header;
+
+	/* The number of PLCP errors since the last time this */
+	/* information element was interrogated. This field is */
+	/* automatically cleared when it is interrogated.*/
+	u32 PLCP_error;
+
+	/* The number of FCS errors since the last time this */
+	/* information element was interrogated. This field is */
+	/* automatically cleared when it is interrogated.*/
+	u32 FCS_error;
+
+	/* The number of MPDUs without PLCP header errors received*/
+	/* since the last time this information element was interrogated. */
+	/* This field is automatically cleared when it is interrogated.*/
+	u32 valid_frame;
+
+	/* the number of missed sequence numbers in the squentially */
+	/* values of frames seq numbers */
+	u32 seq_num_miss;
+} __attribute__ ((packed));
+
+struct acx_revision {
+	struct acx_header header;
+
+	/*
+	 * The WiLink firmware version, an ASCII string x.x.x.x,
+	 * that uniquely identifies the current firmware.
+	 * The left most digit is incremented each time a
+	 * significant change is made to the firmware, such as
+	 * code redesign or new platform support.
+	 * The second digit is incremented when major enhancements
+	 * are added or major fixes are made.
+	 * The third digit is incremented for each GA release.
+	 * The fourth digit is incremented for each build.
+	 * The first two digits identify a firmware release version,
+	 * in other words, a unique set of features.
+	 * The first three digits identify a GA release.
+	 */
+	char fw_version[20];
+
+	/*
+	 * This 4 byte field specifies the WiLink hardware version.
+	 * bits 0  - 15: Reserved.
+	 * bits 16 - 23: Version ID - The WiLink version ID
+	 *              (1 = first spin, 2 = second spin, and so on).
+	 * bits 24 - 31: Chip ID - The WiLink chip ID.
+	 */
+	u32 hw_version;
+} __attribute__ ((packed));
+
+enum wl12xx_psm_mode {
+	/* Active mode */
+	WL12XX_PSM_CAM = 0,
+
+	/* Power save mode */
+	WL12XX_PSM_PS = 1,
+
+	/* Extreme low power */
+	WL12XX_PSM_ELP = 2,
+};
+
+struct acx_sleep_auth {
+	struct acx_header header;
+
+	/* The sleep level authorization of the device. */
+	/* 0 - Always active*/
+	/* 1 - Power down mode: light / fast sleep*/
+	/* 2 - ELP mode: Deep / Max sleep*/
+	u8  sleep_auth;
+	u8  padding[3];
+} __attribute__ ((packed));
+
+#define TIM_ELE_ID    5
+#define PARTIAL_VBM_MAX    251
+
+struct tim {
+	u8 identity;
+	u8 length;
+	u8 dtim_count;
+	u8 dtim_period;
+	u8 bitmap_ctrl;
+	u8 pvb_field[PARTIAL_VBM_MAX]; /* Partial Virtual Bitmap */
+} __attribute__ ((packed));
+
+/* Virtual Bit Map update */
+struct vbm_update_request {
+	__le16 len;
+	u8  padding[2];
+	struct tim tim;
+} __attribute__ ((packed));
+
+enum {
+	HOSTIF_PCI_MASTER_HOST_INDIRECT,
+	HOSTIF_PCI_MASTER_HOST_DIRECT,
+	HOSTIF_SLAVE,
+	HOSTIF_PKT_RING,
+	HOSTIF_DONTCARE = 0xFF
+};
+
+#define DEFAULT_UCAST_PRIORITY          0
+#define DEFAULT_RX_Q_PRIORITY           0
+#define DEFAULT_NUM_STATIONS            1
+#define DEFAULT_RXQ_PRIORITY            0 /* low 0 .. 15 high  */
+#define DEFAULT_RXQ_TYPE                0x07    /* All frames, Data/Ctrl/Mgmt */
+#define TRACE_BUFFER_MAX_SIZE           256
+
+#define  DP_RX_PACKET_RING_CHUNK_SIZE 1600
+#define  DP_TX_PACKET_RING_CHUNK_SIZE 1600
+#define  DP_RX_PACKET_RING_CHUNK_NUM 2
+#define  DP_TX_PACKET_RING_CHUNK_NUM 2
+#define  DP_TX_COMPLETE_TIME_OUT 20
+#define  FW_TX_CMPLT_BLOCK_SIZE 16
+
+struct acx_data_path_params {
+	struct acx_header header;
+
+	u16 rx_packet_ring_chunk_size;
+	u16 tx_packet_ring_chunk_size;
+
+	u8 rx_packet_ring_chunk_num;
+	u8 tx_packet_ring_chunk_num;
+
+	/*
+	 * Maximum number of packets that can be gathered
+	 * in the TX complete ring before an interrupt
+	 * is generated.
+	 */
+	u8 tx_complete_threshold;
+
+	/* Number of pending TX complete entries in cyclic ring.*/
+	u8 tx_complete_ring_depth;
+
+	/*
+	 * Max num microseconds since a packet enters the TX
+	 * complete ring until an interrupt is generated.
+	 */
+	u32 tx_complete_timeout;
+} __attribute__ ((packed));
+
+
+struct acx_data_path_params_resp {
+	struct acx_header header;
+
+	u16 rx_packet_ring_chunk_size;
+	u16 tx_packet_ring_chunk_size;
+
+	u8 rx_packet_ring_chunk_num;
+	u8 tx_packet_ring_chunk_num;
+
+	u8 pad[2];
+
+	u32 rx_packet_ring_addr;
+	u32 tx_packet_ring_addr;
+
+	u32 rx_control_addr;
+	u32 tx_control_addr;
+
+	u32 tx_complete_addr;
+} __attribute__ ((packed));
+
+#define TX_MSDU_LIFETIME_MIN       0
+#define TX_MSDU_LIFETIME_MAX       3000
+#define TX_MSDU_LIFETIME_DEF       512
+#define RX_MSDU_LIFETIME_MIN       0
+#define RX_MSDU_LIFETIME_MAX       0xFFFFFFFF
+#define RX_MSDU_LIFETIME_DEF       512000
+
+struct rx_msdu_lifetime {
+	struct acx_header header;
+
+	/*
+	 * The maximum amount of time, in TU, before the
+	 * firmware discards the MSDU.
+	 */
+	u32 lifetime;
+} __attribute__ ((packed));
+
+/*
+ * RX Config Options Table
+ * Bit		Definition
+ * ===		==========
+ * 31:14		Reserved
+ * 13		Copy RX Status - when set, write three receive status words
+ * 	 	to top of rx'd MPDUs.
+ * 		When cleared, do not write three status words (added rev 1.5)
+ * 12		Reserved
+ * 11		RX Complete upon FCS error - when set, give rx complete
+ *	 	interrupt for FCS errors, after the rx filtering, e.g. unicast
+ *	 	frames not to us with FCS error will not generate an interrupt.
+ * 10		SSID Filter Enable - When set, the WiLink discards all beacon,
+ *	        probe request, and probe response frames with an SSID that does
+ *		not match the SSID specified by the host in the START/JOIN
+ *		command.
+ *		When clear, the WiLink receives frames with any SSID.
+ * 9		Broadcast Filter Enable - When set, the WiLink discards all
+ * 	 	broadcast frames. When clear, the WiLink receives all received
+ *		broadcast frames.
+ * 8:6		Reserved
+ * 5		BSSID Filter Enable - When set, the WiLink discards any frames
+ * 	 	with a BSSID that does not match the BSSID specified by the
+ *		host.
+ *		When clear, the WiLink receives frames from any BSSID.
+ * 4		MAC Addr Filter - When set, the WiLink discards any frames
+ * 	 	with a destination address that does not match the MAC address
+ *		of the adaptor.
+ *		When clear, the WiLink receives frames destined to any MAC
+ *		address.
+ * 3		Promiscuous - When set, the WiLink receives all valid frames
+ * 	 	(i.e., all frames that pass the FCS check).
+ *		When clear, only frames that pass the other filters specified
+ *		are received.
+ * 2		FCS - When set, the WiLink includes the FCS with the received
+ *	 	frame.
+ *		When cleared, the FCS is discarded.
+ * 1		PLCP header - When set, write all data from baseband to frame
+ * 	 	buffer including PHY header.
+ * 0		Reserved - Always equal to 0.
+ *
+ * RX Filter Options Table
+ * Bit		Definition
+ * ===		==========
+ * 31:12		Reserved - Always equal to 0.
+ * 11		Association - When set, the WiLink receives all association
+ * 	 	related frames (association request/response, reassocation
+ *		request/response, and disassociation). When clear, these frames
+ *		are discarded.
+ * 10		Auth/De auth - When set, the WiLink receives all authentication
+ * 	 	and de-authentication frames. When clear, these frames are
+ *		discarded.
+ * 9		Beacon - When set, the WiLink receives all beacon frames.
+ * 	 	When clear, these frames are discarded.
+ * 8		Contention Free - When set, the WiLink receives all contention
+ * 	 	free frames.
+ *		When clear, these frames are discarded.
+ * 7		Control - When set, the WiLink receives all control frames.
+ * 	 	When clear, these frames are discarded.
+ * 6		Data - When set, the WiLink receives all data frames.
+ * 	 	When clear, these frames are discarded.
+ * 5		FCS Error - When set, the WiLink receives frames that have FCS
+ *	 	errors.
+ *		When clear, these frames are discarded.
+ * 4		Management - When set, the WiLink receives all management
+ *		frames.
+ * 	 	When clear, these frames are discarded.
+ * 3		Probe Request - When set, the WiLink receives all probe request
+ * 	 	frames.
+ *		When clear, these frames are discarded.
+ * 2		Probe Response - When set, the WiLink receives all probe
+ * 		response frames.
+ *		When clear, these frames are discarded.
+ * 1		RTS/CTS/ACK - When set, the WiLink receives all RTS, CTS and ACK
+ * 	 	frames.
+ *		When clear, these frames are discarded.
+ * 0		Rsvd Type/Sub Type - When set, the WiLink receives all frames
+ * 	 	that have reserved frame types and sub types as defined by the
+ *		802.11 specification.
+ *		When clear, these frames are discarded.
+ */
+struct acx_rx_config {
+	struct acx_header header;
+
+	u32 config_options;
+	u32 filter_options;
+} __attribute__ ((packed));
+
+enum {
+	QOS_AC_BE = 0,
+	QOS_AC_BK,
+	QOS_AC_VI,
+	QOS_AC_VO,
+	QOS_HIGHEST_AC_INDEX = QOS_AC_VO,
+};
+
+#define MAX_NUM_OF_AC             (QOS_HIGHEST_AC_INDEX+1)
+#define FIRST_AC_INDEX            QOS_AC_BE
+#define MAX_NUM_OF_802_1d_TAGS    8
+#define AC_PARAMS_MAX_TSID        15
+#define MAX_APSD_CONF             0xffff
+
+#define  QOS_TX_HIGH_MIN      (0)
+#define  QOS_TX_HIGH_MAX      (100)
+
+#define  QOS_TX_HIGH_BK_DEF   (25)
+#define  QOS_TX_HIGH_BE_DEF   (35)
+#define  QOS_TX_HIGH_VI_DEF   (35)
+#define  QOS_TX_HIGH_VO_DEF   (35)
+
+#define  QOS_TX_LOW_BK_DEF    (15)
+#define  QOS_TX_LOW_BE_DEF    (25)
+#define  QOS_TX_LOW_VI_DEF    (25)
+#define  QOS_TX_LOW_VO_DEF    (25)
+
+struct acx_tx_queue_qos_config {
+	struct acx_header header;
+
+	u8 qid;
+	u8 pad[3];
+
+	/* Max number of blocks allowd in the queue */
+	u16 high_threshold;
+
+	/* Lowest memory blocks guaranteed for this queue */
+	u16 low_threshold;
+} __attribute__ ((packed));
+
+struct acx_packet_detection {
+	struct acx_header header;
+
+	u32 threshold;
+} __attribute__ ((packed));
+
+
+enum acx_slot_type {
+	SLOT_TIME_LONG = 0,
+	SLOT_TIME_SHORT = 1,
+	DEFAULT_SLOT_TIME = SLOT_TIME_SHORT,
+	MAX_SLOT_TIMES = 0xFF
+};
+
+#define STATION_WONE_INDEX 0
+
+struct acx_slot {
+	struct acx_header header;
+
+	u8 wone_index; /* Reserved */
+	u8 slot_time;
+	u8 reserved[6];
+} __attribute__ ((packed));
+
+
+#define ADDRESS_GROUP_MAX	(8)
+#define ADDRESS_GROUP_MAX_LEN	(ETH_ALEN * ADDRESS_GROUP_MAX)
+
+struct multicast_grp_addr_start {
+	struct acx_header header;
+
+	u8 enabled;
+	u8 num_groups;
+	u8 pad[2];
+	u8 mac_table[ADDRESS_GROUP_MAX_LEN];
+} __attribute__ ((packed));
+
+
+#define  RX_TIMEOUT_PS_POLL_MIN    0
+#define  RX_TIMEOUT_PS_POLL_MAX    (200000)
+#define  RX_TIMEOUT_PS_POLL_DEF    (15)
+#define  RX_TIMEOUT_UPSD_MIN       0
+#define  RX_TIMEOUT_UPSD_MAX       (200000)
+#define  RX_TIMEOUT_UPSD_DEF       (15)
+
+struct acx_rx_timeout {
+	struct acx_header header;
+
+	/*
+	 * The longest time the STA will wait to receive
+	 * traffic from the AP after a PS-poll has been
+	 * transmitted.
+	 */
+	u16 ps_poll_timeout;
+
+	/*
+	 * The longest time the STA will wait to receive
+	 * traffic from the AP after a frame has been sent
+	 * from an UPSD enabled queue.
+	 */
+	u16 upsd_timeout;
+} __attribute__ ((packed));
+
+#define RTS_THRESHOLD_MIN              0
+#define RTS_THRESHOLD_MAX              4096
+#define RTS_THRESHOLD_DEF              2347
+
+struct acx_rts_threshold {
+	struct acx_header header;
+
+	u16 threshold;
+	u8 pad[2];
+} __attribute__ ((packed));
+
+struct acx_beacon_filter_option {
+	struct acx_header header;
+
+	u8 enable;
+
+	/*
+	 * The number of beacons without the unicast TIM
+	 * bit set that the firmware buffers before
+	 * signaling the host about ready frames.
+	 * When set to 0 and the filter is enabled, beacons
+	 * without the unicast TIM bit set are dropped.
+	 */
+	u8 max_num_beacons;
+	u8 pad[2];
+} __attribute__ ((packed));
+
+/*
+ * ACXBeaconFilterEntry (not 221)
+ * Byte Offset     Size (Bytes)    Definition
+ * ===========     ============    ==========
+ * 0				1               IE identifier
+ * 1               1               Treatment bit mask
+ *
+ * ACXBeaconFilterEntry (221)
+ * Byte Offset     Size (Bytes)    Definition
+ * ===========     ============    ==========
+ * 0               1               IE identifier
+ * 1               1               Treatment bit mask
+ * 2               3               OUI
+ * 5               1               Type
+ * 6               2               Version
+ *
+ *
+ * Treatment bit mask - The information element handling:
+ * bit 0 - The information element is compared and transferred
+ * in case of change.
+ * bit 1 - The information element is transferred to the host
+ * with each appearance or disappearance.
+ * Note that both bits can be set at the same time.
+ */
+#define	BEACON_FILTER_TABLE_MAX_IE_NUM		       (32)
+#define BEACON_FILTER_TABLE_MAX_VENDOR_SPECIFIC_IE_NUM (6)
+#define BEACON_FILTER_TABLE_IE_ENTRY_SIZE	       (2)
+#define BEACON_FILTER_TABLE_EXTRA_VENDOR_SPECIFIC_IE_SIZE (6)
+#define BEACON_FILTER_TABLE_MAX_SIZE ((BEACON_FILTER_TABLE_MAX_IE_NUM * \
+			    BEACON_FILTER_TABLE_IE_ENTRY_SIZE) + \
+			   (BEACON_FILTER_TABLE_MAX_VENDOR_SPECIFIC_IE_NUM * \
+			    BEACON_FILTER_TABLE_EXTRA_VENDOR_SPECIFIC_IE_SIZE))
+
+struct acx_beacon_filter_ie_table {
+	struct acx_header header;
+
+	u8 num_ie;
+	u8 table[BEACON_FILTER_TABLE_MAX_SIZE];
+	u8 pad[3];
+} __attribute__ ((packed));
+
+enum {
+	SG_ENABLE = 0,
+	SG_DISABLE,
+	SG_SENSE_NO_ACTIVITY,
+	SG_SENSE_ACTIVE
+};
+
+struct acx_bt_wlan_coex {
+	struct acx_header header;
+
+	/*
+	 * 0 -> PTA enabled
+	 * 1 -> PTA disabled
+	 * 2 -> sense no active mode, i.e.
+	 *      an interrupt is sent upon
+	 *      BT activity.
+	 * 3 -> PTA is switched on in response
+	 *      to the interrupt sending.
+	 */
+	u8 enable;
+	u8 pad[3];
+} __attribute__ ((packed));
+
+#define PTA_ANTENNA_TYPE_DEF		  (0)
+#define PTA_BT_HP_MAXTIME_DEF		  (2000)
+#define PTA_WLAN_HP_MAX_TIME_DEF	  (5000)
+#define PTA_SENSE_DISABLE_TIMER_DEF	  (1350)
+#define PTA_PROTECTIVE_RX_TIME_DEF	  (1500)
+#define PTA_PROTECTIVE_TX_TIME_DEF	  (1500)
+#define PTA_TIMEOUT_NEXT_BT_LP_PACKET_DEF (3000)
+#define PTA_SIGNALING_TYPE_DEF		  (1)
+#define PTA_AFH_LEVERAGE_ON_DEF		  (0)
+#define PTA_NUMBER_QUIET_CYCLE_DEF	  (0)
+#define PTA_MAX_NUM_CTS_DEF		  (3)
+#define PTA_NUMBER_OF_WLAN_PACKETS_DEF	  (2)
+#define PTA_NUMBER_OF_BT_PACKETS_DEF	  (2)
+#define PTA_PROTECTIVE_RX_TIME_FAST_DEF	  (1500)
+#define PTA_PROTECTIVE_TX_TIME_FAST_DEF	  (3000)
+#define PTA_CYCLE_TIME_FAST_DEF		  (8700)
+#define PTA_RX_FOR_AVALANCHE_DEF	  (5)
+#define PTA_ELP_HP_DEF			  (0)
+#define PTA_ANTI_STARVE_PERIOD_DEF	  (500)
+#define PTA_ANTI_STARVE_NUM_CYCLE_DEF	  (4)
+#define PTA_ALLOW_PA_SD_DEF		  (1)
+#define PTA_TIME_BEFORE_BEACON_DEF	  (6300)
+#define PTA_HPDM_MAX_TIME_DEF		  (1600)
+#define PTA_TIME_OUT_NEXT_WLAN_DEF	  (2550)
+#define PTA_AUTO_MODE_NO_CTS_DEF	  (0)
+#define PTA_BT_HP_RESPECTED_DEF		  (3)
+#define PTA_WLAN_RX_MIN_RATE_DEF	  (24)
+#define PTA_ACK_MODE_DEF		  (1)
+
+struct acx_bt_wlan_coex_param {
+	struct acx_header header;
+
+	/*
+	 * The minimum rate of a received WLAN packet in the STA,
+	 * during protective mode, of which a new BT-HP request
+	 * during this Rx will always be respected and gain the antenna.
+	 */
+	u32 min_rate;
+
+	/* Max time the BT HP will be respected. */
+	u16 bt_hp_max_time;
+
+	/* Max time the WLAN HP will be respected. */
+	u16 wlan_hp_max_time;
+
+	/*
+	 * The time between the last BT activity
+	 * and the moment when the sense mode returns
+	 * to SENSE_INACTIVE.
+	 */
+	u16 sense_disable_timer;
+
+	/* Time before the next BT HP instance */
+	u16 rx_time_bt_hp;
+	u16 tx_time_bt_hp;
+
+	/* range: 10-20000    default: 1500 */
+	u16 rx_time_bt_hp_fast;
+	u16 tx_time_bt_hp_fast;
+
+	/* range: 2000-65535  default: 8700 */
+	u16 wlan_cycle_fast;
+
+	/* range: 0 - 15000 (Msec) default: 1000 */
+	u16 bt_anti_starvation_period;
+
+	/* range 400-10000(Usec) default: 3000 */
+	u16 next_bt_lp_packet;
+
+	/* Deafult: worst case for BT DH5 traffic */
+	u16 wake_up_beacon;
+
+	/* range: 0-50000(Usec) default: 1050 */
+	u16 hp_dm_max_guard_time;
+
+	/*
+	 * This is to prevent both BT & WLAN antenna
+	 * starvation.
+	 * Range: 100-50000(Usec) default:2550
+	 */
+	u16 next_wlan_packet;
+
+	/* 0 -> shared antenna */
+	u8 antenna_type;
+
+	/*
+	 * 0 -> TI legacy
+	 * 1 -> Palau
+	 */
+	u8 signal_type;
+
+	/*
+	 * BT AFH status
+	 * 0 -> no AFH
+	 * 1 -> from dedicated GPIO
+	 * 2 -> AFH on (from host)
+	 */
+	u8 afh_leverage_on;
+
+	/*
+	 * The number of cycles during which no
+	 * TX will be sent after 1 cycle of RX
+	 * transaction in protective mode
+	 */
+	u8 quiet_cycle_num;
+
+	/*
+	 * The maximum number of CTSs that will
+	 * be sent for receiving RX packet in
+	 * protective mode
+	 */
+	u8 max_cts;
+
+	/*
+	 * The number of WLAN packets
+	 * transferred in common mode before
+	 * switching to BT.
+	 */
+	u8 wlan_packets_num;
+
+	/*
+	 * The number of BT packets
+	 * transferred in common mode before
+	 * switching to WLAN.
+	 */
+	u8 bt_packets_num;
+
+	/* range: 1-255  default: 5 */
+	u8 missed_rx_avalanche;
+
+	/* range: 0-1    default: 1 */
+	u8 wlan_elp_hp;
+
+	/* range: 0 - 15  default: 4 */
+	u8 bt_anti_starvation_cycles;
+
+	u8 ack_mode_dual_ant;
+
+	/*
+	 * Allow PA_SD assertion/de-assertion
+	 * during enabled BT activity.
+	 */
+	u8 pa_sd_enable;
+
+	/*
+	 * Enable/Disable PTA in auto mode:
+	 * Support Both Active & P.S modes
+	 */
+	u8 pta_auto_mode_enable;
+
+	/* range: 0 - 20  default: 1 */
+	u8 bt_hp_respected_num;
+} __attribute__ ((packed));
+
+#define CCA_THRSH_ENABLE_ENERGY_D       0x140A
+#define CCA_THRSH_DISABLE_ENERGY_D      0xFFEF
+
+struct acx_energy_detection {
+	struct acx_header header;
+
+	/* The RX Clear Channel Assessment threshold in the PHY */
+	u16 rx_cca_threshold;
+	u8 tx_energy_detection;
+	u8 pad;
+} __attribute__ ((packed));
+
+#define BCN_RX_TIMEOUT_DEF_VALUE        10000
+#define BROADCAST_RX_TIMEOUT_DEF_VALUE  20000
+#define RX_BROADCAST_IN_PS_DEF_VALUE    1
+#define CONSECUTIVE_PS_POLL_FAILURE_DEF 4
+
+struct acx_beacon_broadcast {
+	struct acx_header header;
+
+	u16 beacon_rx_timeout;
+	u16 broadcast_timeout;
+
+	/* Enables receiving of broadcast packets in PS mode */
+	u8 rx_broadcast_in_ps;
+
+	/* Consecutive PS Poll failures before updating the host */
+	u8 ps_poll_threshold;
+	u8 pad[2];
+} __attribute__ ((packed));
+
+struct acx_event_mask {
+	struct acx_header header;
+
+	u32 event_mask;
+	u32 high_event_mask; /* Unused */
+} __attribute__ ((packed));
+
+#define CFG_RX_FCS		BIT(2)
+#define CFG_RX_ALL_GOOD		BIT(3)
+#define CFG_UNI_FILTER_EN	BIT(4)
+#define CFG_BSSID_FILTER_EN	BIT(5)
+#define CFG_MC_FILTER_EN	BIT(6)
+#define CFG_MC_ADDR0_EN		BIT(7)
+#define CFG_MC_ADDR1_EN		BIT(8)
+#define CFG_BC_REJECT_EN	BIT(9)
+#define CFG_SSID_FILTER_EN	BIT(10)
+#define CFG_RX_INT_FCS_ERROR	BIT(11)
+#define CFG_RX_INT_ENCRYPTED	BIT(12)
+#define CFG_RX_WR_RX_STATUS	BIT(13)
+#define CFG_RX_FILTER_NULTI	BIT(14)
+#define CFG_RX_RESERVE		BIT(15)
+#define CFG_RX_TIMESTAMP_TSF	BIT(16)
+
+#define CFG_RX_RSV_EN		BIT(0)
+#define CFG_RX_RCTS_ACK		BIT(1)
+#define CFG_RX_PRSP_EN		BIT(2)
+#define CFG_RX_PREQ_EN		BIT(3)
+#define CFG_RX_MGMT_EN		BIT(4)
+#define CFG_RX_FCS_ERROR	BIT(5)
+#define CFG_RX_DATA_EN		BIT(6)
+#define CFG_RX_CTL_EN		BIT(7)
+#define CFG_RX_CF_EN		BIT(8)
+#define CFG_RX_BCN_EN		BIT(9)
+#define CFG_RX_AUTH_EN		BIT(10)
+#define CFG_RX_ASSOC_EN		BIT(11)
+
+#define SCAN_PASSIVE		BIT(0)
+#define SCAN_5GHZ_BAND		BIT(1)
+#define SCAN_TRIGGERED		BIT(2)
+#define SCAN_PRIORITY_HIGH	BIT(3)
+
+struct acx_fw_gen_frame_rates {
+	struct acx_header header;
+
+	u8 tx_ctrl_frame_rate; /* RATE_* */
+	u8 tx_ctrl_frame_mod; /* CCK_* or PBCC_* */
+	u8 tx_mgt_frame_rate;
+	u8 tx_mgt_frame_mod;
+} __attribute__ ((packed));
+
+/* STA MAC */
+struct dot11_station_id {
+	struct acx_header header;
+
+	u8 mac[ETH_ALEN];
+	u8 pad[2];
+} __attribute__ ((packed));
+
+/* HW encryption keys */
+#define NUM_ACCESS_CATEGORIES_COPY 4
+#define MAX_KEY_SIZE 32
+
+/* When set, disable HW encryption */
+#define DF_ENCRYPTION_DISABLE      0x01
+/* When set, disable HW decryption */
+#define DF_SNIFF_MODE_ENABLE       0x80
+
+struct acx_feature_config {
+	struct acx_header header;
+
+	u32 options;
+	u32 data_flow_options;
+} __attribute__ ((packed));
+
+enum acx_key_action {
+	KEY_ADD_OR_REPLACE = 1,
+	KEY_REMOVE         = 2,
+	KEY_SET_ID         = 3,
+	MAX_KEY_ACTION     = 0xffff,
+};
+
+enum acx_key_type {
+	KEY_WEP_DEFAULT       = 0,
+	KEY_WEP_ADDR          = 1,
+	KEY_AES_GROUP         = 4,
+	KEY_AES_PAIRWISE      = 5,
+	KEY_WEP_GROUP         = 6,
+	KEY_TKIP_MIC_GROUP    = 10,
+	KEY_TKIP_MIC_PAIRWISE = 11,
+};
+
+/*
+ *
+ * key_type_e   key size    key format
+ * ----------   ---------   ----------
+ * 0x00         5, 13, 29   Key data
+ * 0x01         5, 13, 29   Key data
+ * 0x04         16          16 bytes of key data
+ * 0x05         16          16 bytes of key data
+ * 0x0a         32          16 bytes of TKIP key data
+ *                          8 bytes of RX MIC key data
+ *                          8 bytes of TX MIC key data
+ * 0x0b         32          16 bytes of TKIP key data
+ *                          8 bytes of RX MIC key data
+ *                          8 bytes of TX MIC key data
+ *
+ */
+
+struct acx_set_key {
+	/* Ignored for default WEP key */
+	u8 addr[ETH_ALEN];
+
+	/* key_action_e */
+	u16 key_action;
+
+	u16 reserved_1;
+
+	/* key size in bytes */
+	u8 key_size;
+
+	/* key_type_e */
+	u8 key_type;
+	u8 ssid_profile;
+
+	/*
+	 * TKIP, AES: frame's key id field.
+	 * For WEP default key: key id;
+	 */
+	u8 id;
+	u8 reserved_2[6];
+	u8 key[MAX_KEY_SIZE];
+	u16 ac_seq_num16[NUM_ACCESS_CATEGORIES_COPY];
+	u32 ac_seq_num32[NUM_ACCESS_CATEGORIES_COPY];
+} __attribute__ ((packed));
+
+struct acx_current_tx_power {
+	struct acx_header header;
+
+	u8  current_tx_power;
+	u8  padding[3];
+} __attribute__ ((packed));
+
+struct acx_dot11_default_key {
+	struct acx_header header;
+
+	u8 id;
+	u8 pad[3];
+} __attribute__ ((packed));
+
+struct acx_tsf_info {
+	struct acx_header header;
+
+	u32 current_tsf_msb;
+	u32 current_tsf_lsb;
+	u32 last_TBTT_msb;
+	u32 last_TBTT_lsb;
+	u8 last_dtim_count;
+	u8 pad[3];
+} __attribute__ ((packed));
+
+/* 802.11 PS */
+enum acx_ps_mode {
+	STATION_ACTIVE_MODE,
+	STATION_POWER_SAVE_MODE
+};
+
+struct acx_ps_params {
+	u8 ps_mode; /* STATION_* */
+	u8 send_null_data; /* Do we have to send NULL data packet ? */
+	u8 retries; /* Number of retires for the initial NULL data packet */
+
+	 /*
+	  * TUs during which the target stays awake after switching
+	  * to power save mode.
+	  */
+	u8 hang_over_period;
+	u16 null_data_rate;
+	u8 pad[2];
+} __attribute__ ((packed));
+
+enum acx_wake_up_event {
+	WAKE_UP_EVENT_BEACON_BITMAP	= 0x01, /* Wake on every Beacon*/
+	WAKE_UP_EVENT_DTIM_BITMAP	= 0x02,	/* Wake on every DTIM*/
+	WAKE_UP_EVENT_N_DTIM_BITMAP	= 0x04, /* Wake on every Nth DTIM */
+	WAKE_UP_EVENT_N_BEACONS_BITMAP	= 0x08, /* Wake on every Nth Beacon */
+	WAKE_UP_EVENT_BITS_MASK		= 0x0F
+};
+
+struct acx_wake_up_condition {
+	struct acx_header header;
+
+	u8 wake_up_event; /* Only one bit can be set */
+	u8 listen_interval;
+	u8 pad[2];
+} __attribute__ ((packed));
+
+struct acx_aid {
+	struct acx_header header;
+
+	/*
+	 * To be set when associated with an AP.
+	 */
+	u16 aid;
+	u8 pad[2];
+} __attribute__ ((packed));
+
+enum acx_preamble_type {
+	ACX_PREAMBLE_LONG = 0,
+	ACX_PREAMBLE_SHORT = 1
+};
+
+struct acx_preamble {
+	struct acx_header header;
+	/*
+	 * When set, the WiLink transmits the frames with a short preamble and
+	 * when cleared, the WiLink transmits the frames with a long preamble.
+	 */
+	u8 preamble;
+	u8 padding[3];
+} __attribute__ ((packed));
+
+enum acx_ctsprotect_type {
+	CTSPROTECT_DISABLE = 0,
+	CTSPROTECT_ENABLE = 1
+};
+
+struct acx_ctsprotect {
+	struct acx_header header;
+	u8 ctsprotect;
+	u8 padding[3];
+} __attribute__ ((packed));
+
+struct acx_tx_statistics {
+	u32 internal_desc_overflow;
+}  __attribute__ ((packed));
+
+struct acx_rx_statistics {
+	u32 out_of_mem;
+	u32 hdr_overflow;
+	u32 hw_stuck;
+	u32 dropped;
+	u32 fcs_err;
+	u32 xfr_hint_trig;
+	u32 path_reset;
+	u32 reset_counter;
+} __attribute__ ((packed));
+
+struct acx_dma_statistics {
+	u32 rx_requested;
+	u32 rx_errors;
+	u32 tx_requested;
+	u32 tx_errors;
+}  __attribute__ ((packed));
+
+struct acx_isr_statistics {
+	/* host command complete */
+	u32 cmd_cmplt;
+
+	/* fiqisr() */
+	u32 fiqs;
+
+	/* (INT_STS_ND & INT_TRIG_RX_HEADER) */
+	u32 rx_headers;
+
+	/* (INT_STS_ND & INT_TRIG_RX_CMPLT) */
+	u32 rx_completes;
+
+	/* (INT_STS_ND & INT_TRIG_NO_RX_BUF) */
+	u32 rx_mem_overflow;
+
+	/* (INT_STS_ND & INT_TRIG_S_RX_RDY) */
+	u32 rx_rdys;
+
+	/* irqisr() */
+	u32 irqs;
+
+	/* (INT_STS_ND & INT_TRIG_TX_PROC) */
+	u32 tx_procs;
+
+	/* (INT_STS_ND & INT_TRIG_DECRYPT_DONE) */
+	u32 decrypt_done;
+
+	/* (INT_STS_ND & INT_TRIG_DMA0) */
+	u32 dma0_done;
+
+	/* (INT_STS_ND & INT_TRIG_DMA1) */
+	u32 dma1_done;
+
+	/* (INT_STS_ND & INT_TRIG_TX_EXC_CMPLT) */
+	u32 tx_exch_complete;
+
+	/* (INT_STS_ND & INT_TRIG_COMMAND) */
+	u32 commands;
+
+	/* (INT_STS_ND & INT_TRIG_RX_PROC) */
+	u32 rx_procs;
+
+	/* (INT_STS_ND & INT_TRIG_PM_802) */
+	u32 hw_pm_mode_changes;
+
+	/* (INT_STS_ND & INT_TRIG_ACKNOWLEDGE) */
+	u32 host_acknowledges;
+
+	/* (INT_STS_ND & INT_TRIG_PM_PCI) */
+	u32 pci_pm;
+
+	/* (INT_STS_ND & INT_TRIG_ACM_WAKEUP) */
+	u32 wakeups;
+
+	/* (INT_STS_ND & INT_TRIG_LOW_RSSI) */
+	u32 low_rssi;
+} __attribute__ ((packed));
+
+struct acx_wep_statistics {
+	/* WEP address keys configured */
+	u32 addr_key_count;
+
+	/* default keys configured */
+	u32 default_key_count;
+
+	u32 reserved;
+
+	/* number of times that WEP key not found on lookup */
+	u32 key_not_found;
+
+	/* number of times that WEP key decryption failed */
+	u32 decrypt_fail;
+
+	/* WEP packets decrypted */
+	u32 packets;
+
+	/* WEP decrypt interrupts */
+	u32 interrupt;
+} __attribute__ ((packed));
+
+#define ACX_MISSED_BEACONS_SPREAD 10
+
+struct acx_pwr_statistics {
+	/* the amount of enters into power save mode (both PD & ELP) */
+	u32 ps_enter;
+
+	/* the amount of enters into ELP mode */
+	u32 elp_enter;
+
+	/* the amount of missing beacon interrupts to the host */
+	u32 missing_bcns;
+
+	/* the amount of wake on host-access times */
+	u32 wake_on_host;
+
+	/* the amount of wake on timer-expire */
+	u32 wake_on_timer_exp;
+
+	/* the number of packets that were transmitted with PS bit set */
+	u32 tx_with_ps;
+
+	/* the number of packets that were transmitted with PS bit clear */
+	u32 tx_without_ps;
+
+	/* the number of received beacons */
+	u32 rcvd_beacons;
+
+	/* the number of entering into PowerOn (power save off) */
+	u32 power_save_off;
+
+	/* the number of entries into power save mode */
+	u16 enable_ps;
+
+	/*
+	 * the number of exits from power save, not including failed PS
+	 * transitions
+	 */
+	u16 disable_ps;
+
+	/*
+	 * the number of times the TSF counter was adjusted because
+	 * of drift
+	 */
+	u32 fix_tsf_ps;
+
+	/* Gives statistics about the spread continuous missed beacons.
+	 * The 16 LSB are dedicated for the PS mode.
+	 * The 16 MSB are dedicated for the PS mode.
+	 * cont_miss_bcns_spread[0] - single missed beacon.
+	 * cont_miss_bcns_spread[1] - two continuous missed beacons.
+	 * cont_miss_bcns_spread[2] - three continuous missed beacons.
+	 * ...
+	 * cont_miss_bcns_spread[9] - ten and more continuous missed beacons.
+	*/
+	u32 cont_miss_bcns_spread[ACX_MISSED_BEACONS_SPREAD];
+
+	/* the number of beacons in awake mode */
+	u32 rcvd_awake_beacons;
+} __attribute__ ((packed));
+
+struct acx_mic_statistics {
+	u32 rx_pkts;
+	u32 calc_failure;
+} __attribute__ ((packed));
+
+struct acx_aes_statistics {
+	u32 encrypt_fail;
+	u32 decrypt_fail;
+	u32 encrypt_packets;
+	u32 decrypt_packets;
+	u32 encrypt_interrupt;
+	u32 decrypt_interrupt;
+} __attribute__ ((packed));
+
+struct acx_event_statistics {
+	u32 heart_beat;
+	u32 calibration;
+	u32 rx_mismatch;
+	u32 rx_mem_empty;
+	u32 rx_pool;
+	u32 oom_late;
+	u32 phy_transmit_error;
+	u32 tx_stuck;
+} __attribute__ ((packed));
+
+struct acx_ps_statistics {
+	u32 pspoll_timeouts;
+	u32 upsd_timeouts;
+	u32 upsd_max_sptime;
+	u32 upsd_max_apturn;
+	u32 pspoll_max_apturn;
+	u32 pspoll_utilization;
+	u32 upsd_utilization;
+} __attribute__ ((packed));
+
+struct acx_rxpipe_statistics {
+	u32 rx_prep_beacon_drop;
+	u32 descr_host_int_trig_rx_data;
+	u32 beacon_buffer_thres_host_int_trig_rx_data;
+	u32 missed_beacon_host_int_trig_rx_data;
+	u32 tx_xfr_host_int_trig_rx_data;
+} __attribute__ ((packed));
+
+struct acx_statistics {
+	struct acx_header header;
+
+	struct acx_tx_statistics tx;
+	struct acx_rx_statistics rx;
+	struct acx_dma_statistics dma;
+	struct acx_isr_statistics isr;
+	struct acx_wep_statistics wep;
+	struct acx_pwr_statistics pwr;
+	struct acx_aes_statistics aes;
+	struct acx_mic_statistics mic;
+	struct acx_event_statistics event;
+	struct acx_ps_statistics ps;
+	struct acx_rxpipe_statistics rxpipe;
+} __attribute__ ((packed));
+
+enum {
+	ACX_WAKE_UP_CONDITIONS      = 0x0002,
+	ACX_MEM_CFG                 = 0x0003,
+	ACX_SLOT                    = 0x0004,
+	ACX_QUEUE_HEAD              = 0x0005, /* for MASTER mode only */
+	ACX_AC_CFG                  = 0x0007,
+	ACX_MEM_MAP                 = 0x0008,
+	ACX_AID                     = 0x000A,
+	ACX_RADIO_PARAM             = 0x000B, /* Not used */
+	ACX_CFG                     = 0x000C, /* Not used */
+	ACX_FW_REV                  = 0x000D,
+	ACX_MEDIUM_USAGE            = 0x000F,
+	ACX_RX_CFG                  = 0x0010,
+	ACX_TX_QUEUE_CFG            = 0x0011, /* FIXME: only used by wl1251 */
+	ACX_BSS_IN_PS               = 0x0012, /* for AP only */
+	ACX_STATISTICS              = 0x0013, /* Debug API */
+	ACX_FEATURE_CFG             = 0x0015,
+	ACX_MISC_CFG                = 0x0017, /* Not used */
+	ACX_TID_CFG                 = 0x001A,
+	ACX_BEACON_FILTER_OPT       = 0x001F,
+	ACX_LOW_RSSI                = 0x0020,
+	ACX_NOISE_HIST              = 0x0021,
+	ACX_HDK_VERSION             = 0x0022, /* ??? */
+	ACX_PD_THRESHOLD            = 0x0023,
+	ACX_DATA_PATH_PARAMS        = 0x0024, /* WO */
+	ACX_DATA_PATH_RESP_PARAMS   = 0x0024, /* RO */
+	ACX_CCA_THRESHOLD           = 0x0025,
+	ACX_EVENT_MBOX_MASK         = 0x0026,
+#ifdef FW_RUNNING_AS_AP
+	ACX_DTIM_PERIOD             = 0x0027, /* for AP only */
+#else
+	ACX_WR_TBTT_AND_DTIM        = 0x0027, /* STA only */
+#endif
+	ACX_ACI_OPTION_CFG          = 0x0029, /* OBSOLETE (for 1251)*/
+	ACX_GPIO_CFG                = 0x002A, /* Not used */
+	ACX_GPIO_SET                = 0x002B, /* Not used */
+	ACX_PM_CFG                  = 0x002C, /* To Be Documented */
+	ACX_CONN_MONIT_PARAMS       = 0x002D,
+	ACX_AVERAGE_RSSI            = 0x002E, /* Not used */
+	ACX_CONS_TX_FAILURE         = 0x002F,
+	ACX_BCN_DTIM_OPTIONS        = 0x0031,
+	ACX_SG_ENABLE               = 0x0032,
+	ACX_SG_CFG                  = 0x0033,
+	ACX_ANTENNA_DIVERSITY_CFG   = 0x0035, /* To Be Documented */
+	ACX_LOW_SNR		    = 0x0037, /* To Be Documented */
+	ACX_BEACON_FILTER_TABLE     = 0x0038,
+	ACX_ARP_IP_FILTER           = 0x0039,
+	ACX_ROAMING_STATISTICS_TBL  = 0x003B,
+	ACX_RATE_POLICY             = 0x003D,
+	ACX_CTS_PROTECTION          = 0x003E,
+	ACX_SLEEP_AUTH              = 0x003F,
+	ACX_PREAMBLE_TYPE	    = 0x0040,
+	ACX_ERROR_CNT               = 0x0041,
+	ACX_FW_GEN_FRAME_RATES      = 0x0042,
+	ACX_IBSS_FILTER		    = 0x0044,
+	ACX_SERVICE_PERIOD_TIMEOUT  = 0x0045,
+	ACX_TSF_INFO                = 0x0046,
+	ACX_CONFIG_PS_WMM           = 0x0049,
+	ACX_ENABLE_RX_DATA_FILTER   = 0x004A,
+	ACX_SET_RX_DATA_FILTER      = 0x004B,
+	ACX_GET_DATA_FILTER_STATISTICS = 0x004C,
+	ACX_POWER_LEVEL_TABLE       = 0x004D,
+	ACX_BET_ENABLE              = 0x0050,
+	DOT11_STATION_ID            = 0x1001,
+	DOT11_RX_MSDU_LIFE_TIME     = 0x1004,
+	DOT11_CUR_TX_PWR            = 0x100D,
+	DOT11_DEFAULT_KEY           = 0x1010,
+	DOT11_RX_DOT11_MODE         = 0x1012,
+	DOT11_RTS_THRESHOLD         = 0x1013,
+	DOT11_GROUP_ADDRESS_TBL     = 0x1014,
+
+	MAX_DOT11_IE = DOT11_GROUP_ADDRESS_TBL,
+
+	MAX_IE = 0xFFFF
+};
+
+
+int wl12xx_acx_frame_rates(struct wl12xx *wl, u8 ctrl_rate, u8 ctrl_mod,
+			   u8 mgt_rate, u8 mgt_mod);
+int wl12xx_acx_station_id(struct wl12xx *wl);
+int wl12xx_acx_default_key(struct wl12xx *wl, u8 key_id);
+int wl12xx_acx_wake_up_conditions(struct wl12xx *wl, u8 listen_interval);
+int wl12xx_acx_sleep_auth(struct wl12xx *wl, u8 sleep_auth);
+int wl12xx_acx_fw_version(struct wl12xx *wl, char *buf, size_t len);
+int wl12xx_acx_tx_power(struct wl12xx *wl, int power);
+int wl12xx_acx_feature_cfg(struct wl12xx *wl);
+int wl12xx_acx_mem_map(struct wl12xx *wl, void *mem_map, size_t len);
+int wl12xx_acx_data_path_params(struct wl12xx *wl,
+				struct acx_data_path_params_resp *data_path);
+int wl12xx_acx_rx_msdu_life_time(struct wl12xx *wl, u32 life_time);
+int wl12xx_acx_rx_config(struct wl12xx *wl, u32 config, u32 filter);
+int wl12xx_acx_pd_threshold(struct wl12xx *wl);
+int wl12xx_acx_slot(struct wl12xx *wl, enum acx_slot_type slot_time);
+int wl12xx_acx_group_address_tbl(struct wl12xx *wl);
+int wl12xx_acx_service_period_timeout(struct wl12xx *wl);
+int wl12xx_acx_rts_threshold(struct wl12xx *wl, u16 rts_threshold);
+int wl12xx_acx_beacon_filter_opt(struct wl12xx *wl);
+int wl12xx_acx_beacon_filter_table(struct wl12xx *wl);
+int wl12xx_acx_sg_enable(struct wl12xx *wl);
+int wl12xx_acx_sg_cfg(struct wl12xx *wl);
+int wl12xx_acx_cca_threshold(struct wl12xx *wl);
+int wl12xx_acx_bcn_dtim_options(struct wl12xx *wl);
+int wl12xx_acx_aid(struct wl12xx *wl, u16 aid);
+int wl12xx_acx_event_mbox_mask(struct wl12xx *wl, u32 event_mask);
+int wl12xx_acx_set_preamble(struct wl12xx *wl, enum acx_preamble_type preamble);
+int wl12xx_acx_cts_protect(struct wl12xx *wl,
+			    enum acx_ctsprotect_type ctsprotect);
+int wl12xx_acx_statistics(struct wl12xx *wl, struct acx_statistics *stats);
+
+#endif /* __WL12XX_ACX_H__ */
diff --git a/drivers/net/wireless/wl12xx/boot.c b/drivers/net/wireless/wl12xx/boot.c
new file mode 100644
index 00000000000..48ac08c429b
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/boot.c
@@ -0,0 +1,295 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/gpio.h>
+
+#include "reg.h"
+#include "boot.h"
+#include "spi.h"
+#include "event.h"
+
+static void wl12xx_boot_enable_interrupts(struct wl12xx *wl)
+{
+	enable_irq(wl->irq);
+}
+
+void wl12xx_boot_target_enable_interrupts(struct wl12xx *wl)
+{
+	wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, ~(wl->intr_mask));
+	wl12xx_reg_write32(wl, HI_CFG, HI_CFG_DEF_VAL);
+}
+
+int wl12xx_boot_soft_reset(struct wl12xx *wl)
+{
+	unsigned long timeout;
+	u32 boot_data;
+
+	/* perform soft reset */
+	wl12xx_reg_write32(wl, ACX_REG_SLV_SOFT_RESET, ACX_SLV_SOFT_RESET_BIT);
+
+	/* SOFT_RESET is self clearing */
+	timeout = jiffies + usecs_to_jiffies(SOFT_RESET_MAX_TIME);
+	while (1) {
+		boot_data = wl12xx_reg_read32(wl, ACX_REG_SLV_SOFT_RESET);
+		wl12xx_debug(DEBUG_BOOT, "soft reset bootdata 0x%x", boot_data);
+		if ((boot_data & ACX_SLV_SOFT_RESET_BIT) == 0)
+			break;
+
+		if (time_after(jiffies, timeout)) {
+			/* 1.2 check pWhalBus->uSelfClearTime if the
+			 * timeout was reached */
+			wl12xx_error("soft reset timeout");
+			return -1;
+		}
+
+		udelay(SOFT_RESET_STALL_TIME);
+	}
+
+	/* disable Rx/Tx */
+	wl12xx_reg_write32(wl, ENABLE, 0x0);
+
+	/* disable auto calibration on start*/
+	wl12xx_reg_write32(wl, SPARE_A2, 0xffff);
+
+	return 0;
+}
+
+int wl12xx_boot_init_seq(struct wl12xx *wl)
+{
+	u32 scr_pad6, init_data, tmp, elp_cmd, ref_freq;
+
+	/*
+	 * col #1: INTEGER_DIVIDER
+	 * col #2: FRACTIONAL_DIVIDER
+	 * col #3: ATTN_BB
+	 * col #4: ALPHA_BB
+	 * col #5: STOP_TIME_BB
+	 * col #6: BB_PLL_LOOP_FILTER
+	 */
+	static const u32 LUT[REF_FREQ_NUM][LUT_PARAM_NUM] = {
+
+		{   83, 87381,  0xB, 5, 0xF00,  3}, /* REF_FREQ_19_2*/
+		{   61, 141154, 0xB, 5, 0x1450, 2}, /* REF_FREQ_26_0*/
+		{   41, 174763, 0xC, 6, 0x2D00, 1}, /* REF_FREQ_38_4*/
+		{   40, 0,      0xC, 6, 0x2EE0, 1}, /* REF_FREQ_40_0*/
+		{   47, 162280, 0xC, 6, 0x2760, 1}  /* REF_FREQ_33_6        */
+	};
+
+	/* read NVS params */
+	scr_pad6 = wl12xx_reg_read32(wl, SCR_PAD6);
+	wl12xx_debug(DEBUG_BOOT, "scr_pad6 0x%x", scr_pad6);
+
+	/* read ELP_CMD */
+	elp_cmd = wl12xx_reg_read32(wl, ELP_CMD);
+	wl12xx_debug(DEBUG_BOOT, "elp_cmd 0x%x", elp_cmd);
+
+	/* set the BB calibration time to be 300 usec (PLL_CAL_TIME) */
+	ref_freq = scr_pad6 & 0x000000FF;
+	wl12xx_debug(DEBUG_BOOT, "ref_freq 0x%x", ref_freq);
+
+	wl12xx_reg_write32(wl, PLL_CAL_TIME, 0x9);
+
+	/*
+	 * PG 1.2: set the clock buffer time to be 210 usec (CLK_BUF_TIME)
+	 */
+	wl12xx_reg_write32(wl, CLK_BUF_TIME, 0x6);
+
+	/*
+	 * set the clock detect feature to work in the restart wu procedure
+	 * (ELP_CFG_MODE[14]) and Select the clock source type
+	 * (ELP_CFG_MODE[13:12])
+	 */
+	tmp = ((scr_pad6 & 0x0000FF00) << 4) | 0x00004000;
+	wl12xx_reg_write32(wl, ELP_CFG_MODE, tmp);
+
+	/* PG 1.2: enable the BB PLL fix. Enable the PLL_LIMP_CLK_EN_CMD */
+	elp_cmd |= 0x00000040;
+	wl12xx_reg_write32(wl, ELP_CMD, elp_cmd);
+
+	/* PG 1.2: Set the BB PLL stable time to be 1000usec
+	 * (PLL_STABLE_TIME) */
+	wl12xx_reg_write32(wl, CFG_PLL_SYNC_CNT, 0x20);
+
+	/* PG 1.2: read clock request time */
+	init_data = wl12xx_reg_read32(wl, CLK_REQ_TIME);
+
+	/*
+	 * PG 1.2: set the clock request time to be ref_clk_settling_time -
+	 * 1ms = 4ms
+	 */
+	if (init_data > 0x21)
+		tmp = init_data - 0x21;
+	else
+		tmp = 0;
+	wl12xx_reg_write32(wl, CLK_REQ_TIME, tmp);
+
+	/* set BB PLL configurations in RF AFE */
+	wl12xx_reg_write32(wl, 0x003058cc, 0x4B5);
+
+	/* set RF_AFE_REG_5 */
+	wl12xx_reg_write32(wl, 0x003058d4, 0x50);
+
+	/* set RF_AFE_CTRL_REG_2 */
+	wl12xx_reg_write32(wl, 0x00305948, 0x11c001);
+
+	/*
+	 * change RF PLL and BB PLL divider for VCO clock and adjust VCO
+	 * bais current(RF_AFE_REG_13)
+	 */
+	wl12xx_reg_write32(wl, 0x003058f4, 0x1e);
+
+	/* set BB PLL configurations */
+	tmp = LUT[ref_freq][LUT_PARAM_INTEGER_DIVIDER] | 0x00017000;
+	wl12xx_reg_write32(wl, 0x00305840, tmp);
+
+	/* set fractional divider according to Appendix C-BB PLL
+	 * Calculations
+	 */
+	tmp = LUT[ref_freq][LUT_PARAM_FRACTIONAL_DIVIDER];
+	wl12xx_reg_write32(wl, 0x00305844, tmp);
+
+	/* set the initial data for the sigma delta */
+	wl12xx_reg_write32(wl, 0x00305848, 0x3039);
+
+	/*
+	 * set the accumulator attenuation value, calibration loop1
+	 * (alpha), calibration loop2 (beta), calibration loop3 (gamma) and
+	 * the VCO gain
+	 */
+	tmp = (LUT[ref_freq][LUT_PARAM_ATTN_BB] << 16) |
+		(LUT[ref_freq][LUT_PARAM_ALPHA_BB] << 12) | 0x1;
+	wl12xx_reg_write32(wl, 0x00305854, tmp);
+
+	/*
+	 * set the calibration stop time after holdoff time expires and set
+	 * settling time HOLD_OFF_TIME_BB
+	 */
+	tmp = LUT[ref_freq][LUT_PARAM_STOP_TIME_BB] | 0x000A0000;
+	wl12xx_reg_write32(wl, 0x00305858, tmp);
+
+	/*
+	 * set BB PLL Loop filter capacitor3- BB_C3[2:0] and set BB PLL
+	 * constant leakage current to linearize PFD to 0uA -
+	 * BB_ILOOPF[7:3]
+	 */
+	tmp = LUT[ref_freq][LUT_PARAM_BB_PLL_LOOP_FILTER] | 0x00000030;
+	wl12xx_reg_write32(wl, 0x003058f8, tmp);
+
+	/*
+	 * set regulator output voltage for n divider to
+	 * 1.35-BB_REFDIV[1:0], set charge pump current- BB_CPGAIN[4:2],
+	 * set BB PLL Loop filter capacitor2- BB_C2[7:5], set gain of BB
+	 * PLL auto-call to normal mode- BB_CALGAIN_3DB[8]
+	 */
+	wl12xx_reg_write32(wl, 0x003058f0, 0x29);
+
+	/* enable restart wakeup sequence (ELP_CMD[0]) */
+	wl12xx_reg_write32(wl, ELP_CMD, elp_cmd | 0x1);
+
+	/* restart sequence completed */
+	udelay(2000);
+
+	return 0;
+}
+
+int wl12xx_boot_run_firmware(struct wl12xx *wl)
+{
+	int loop, ret;
+	u32 chip_id, interrupt;
+
+	wl->chip.op_set_ecpu_ctrl(wl, ECPU_CONTROL_HALT);
+
+	chip_id = wl12xx_reg_read32(wl, CHIP_ID_B);
+
+	wl12xx_debug(DEBUG_BOOT, "chip id after firmware boot: 0x%x", chip_id);
+
+	if (chip_id != wl->chip.id) {
+		wl12xx_error("chip id doesn't match after firmware boot");
+		return -EIO;
+	}
+
+	/* wait for init to complete */
+	loop = 0;
+	while (loop++ < INIT_LOOP) {
+		udelay(INIT_LOOP_DELAY);
+		interrupt = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_NO_CLEAR);
+
+		if (interrupt == 0xffffffff) {
+			wl12xx_error("error reading hardware complete "
+				     "init indication");
+			return -EIO;
+		}
+		/* check that ACX_INTR_INIT_COMPLETE is enabled */
+		else if (interrupt & wl->chip.intr_init_complete) {
+			wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_ACK,
+					   wl->chip.intr_init_complete);
+			break;
+		}
+	}
+
+	if (loop >= INIT_LOOP) {
+		wl12xx_error("timeout waiting for the hardware to "
+			     "complete initialization");
+		return -EIO;
+	}
+
+	/* get hardware config command mail box */
+	wl->cmd_box_addr = wl12xx_reg_read32(wl, REG_COMMAND_MAILBOX_PTR);
+
+	/* get hardware config event mail box */
+	wl->event_box_addr = wl12xx_reg_read32(wl, REG_EVENT_MAILBOX_PTR);
+
+	/* set the working partition to its "running" mode offset */
+	wl12xx_set_partition(wl,
+			     wl->chip.p_table[PART_WORK].mem.start,
+			     wl->chip.p_table[PART_WORK].mem.size,
+			     wl->chip.p_table[PART_WORK].reg.start,
+			     wl->chip.p_table[PART_WORK].reg.size);
+
+	wl12xx_debug(DEBUG_MAILBOX, "cmd_box_addr 0x%x event_box_addr 0x%x",
+		     wl->cmd_box_addr, wl->event_box_addr);
+
+	/*
+	 * in case of full asynchronous mode the firmware event must be
+	 * ready to receive event from the command mailbox
+	 */
+
+	/* enable gpio interrupts */
+	wl12xx_boot_enable_interrupts(wl);
+
+	wl->chip.op_target_enable_interrupts(wl);
+
+	/* unmask all mbox events  */
+	wl->event_mask = 0xffffffff;
+
+	ret = wl12xx_event_unmask(wl);
+	if (ret < 0) {
+		wl12xx_error("EVENT mask setting failed");
+		return ret;
+	}
+
+	wl12xx_event_mbox_config(wl);
+
+	/* firmware startup completed */
+	return 0;
+}
diff --git a/drivers/net/wireless/wl12xx/boot.h b/drivers/net/wireless/wl12xx/boot.h
new file mode 100644
index 00000000000..4fa73132baa
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/boot.h
@@ -0,0 +1,40 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __BOOT_H__
+#define __BOOT_H__
+
+#include "wl12xx.h"
+
+int wl12xx_boot_soft_reset(struct wl12xx *wl);
+int wl12xx_boot_init_seq(struct wl12xx *wl);
+int wl12xx_boot_run_firmware(struct wl12xx *wl);
+void wl12xx_boot_target_enable_interrupts(struct wl12xx *wl);
+
+/* number of times we try to read the INIT interrupt */
+#define INIT_LOOP 20000
+
+/* delay between retries */
+#define INIT_LOOP_DELAY 50
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/cmd.c b/drivers/net/wireless/wl12xx/cmd.c
new file mode 100644
index 00000000000..f73ab602b7a
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/cmd.c
@@ -0,0 +1,353 @@
+#include "cmd.h"
+
+#include <linux/module.h>
+#include <linux/crc7.h>
+#include <linux/spi/spi.h>
+
+#include "wl12xx.h"
+#include "wl12xx_80211.h"
+#include "reg.h"
+#include "spi.h"
+#include "ps.h"
+
+int wl12xx_cmd_send(struct wl12xx *wl, u16 type, void *buf, size_t buf_len)
+{
+	struct wl12xx_command cmd;
+	unsigned long timeout;
+	size_t cmd_len;
+	u32 intr;
+	int ret = 0;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.id = type;
+	cmd.status = 0;
+	memcpy(cmd.parameters, buf, buf_len);
+	cmd_len = ALIGN(buf_len, 4) + CMDMBOX_HEADER_LEN;
+
+	wl12xx_ps_elp_wakeup(wl);
+
+	wl12xx_spi_mem_write(wl, wl->cmd_box_addr, &cmd, cmd_len);
+
+	wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_TRIG, INTR_TRIG_CMD);
+
+	timeout = jiffies + msecs_to_jiffies(WL12XX_COMMAND_TIMEOUT);
+
+	intr = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_NO_CLEAR);
+	while (!(intr & wl->chip.intr_cmd_complete)) {
+		if (time_after(jiffies, timeout)) {
+			wl12xx_error("command complete timeout");
+			ret = -ETIMEDOUT;
+			goto out;
+		}
+
+		msleep(1);
+
+		intr = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_NO_CLEAR);
+	}
+
+	wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_ACK,
+			   wl->chip.intr_cmd_complete);
+
+out:
+	wl12xx_ps_elp_sleep(wl);
+
+	return ret;
+}
+
+int wl12xx_cmd_test(struct wl12xx *wl, void *buf, size_t buf_len, u8 answer)
+{
+	int ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd test");
+
+	ret = wl12xx_cmd_send(wl, CMD_TEST, buf, buf_len);
+	if (ret < 0) {
+		wl12xx_warning("TEST command failed");
+		return ret;
+	}
+
+	if (answer) {
+		struct wl12xx_command *cmd_answer;
+
+		/*
+		 * The test command got in, we can read the answer.
+		 * The answer would be a wl12xx_command, where the
+		 * parameter array contains the actual answer.
+		 */
+
+		wl12xx_ps_elp_wakeup(wl);
+
+		wl12xx_spi_mem_read(wl, wl->cmd_box_addr, buf, buf_len);
+
+		wl12xx_ps_elp_sleep(wl);
+
+		cmd_answer = buf;
+		if (cmd_answer->status != CMD_STATUS_SUCCESS)
+			wl12xx_error("TEST command answer error: %d",
+				     cmd_answer->status);
+	}
+
+	return 0;
+}
+
+
+int wl12xx_cmd_interrogate(struct wl12xx *wl, u16 ie_id, u16 ie_len,
+			   void *answer)
+{
+	struct wl12xx_command *cmd;
+	struct acx_header header;
+	int ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd interrogate");
+
+	header.id = ie_id;
+	header.len = ie_len - sizeof(header);
+
+	ret = wl12xx_cmd_send(wl, CMD_INTERROGATE, &header, sizeof(header));
+	if (ret < 0) {
+		wl12xx_error("INTERROGATE command failed");
+		return ret;
+	}
+
+	wl12xx_ps_elp_wakeup(wl);
+
+	/* the interrogate command got in, we can read the answer */
+	wl12xx_spi_mem_read(wl, wl->cmd_box_addr, answer,
+			    CMDMBOX_HEADER_LEN + ie_len);
+
+	wl12xx_ps_elp_sleep(wl);
+
+	cmd = answer;
+	if (cmd->status != CMD_STATUS_SUCCESS)
+		wl12xx_error("INTERROGATE command error: %d",
+			     cmd->status);
+
+	return 0;
+
+}
+
+int wl12xx_cmd_configure(struct wl12xx *wl, void *ie, int ie_len)
+{
+	int ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd configure");
+
+	ret = wl12xx_cmd_send(wl, CMD_CONFIGURE, ie,
+			      ie_len);
+	if (ret < 0) {
+		wl12xx_warning("CONFIGURE command NOK");
+		return ret;
+	}
+
+	return 0;
+
+}
+
+int wl12xx_cmd_vbm(struct wl12xx *wl, u8 identity,
+		   void *bitmap, u16 bitmap_len, u8 bitmap_control)
+{
+	struct vbm_update_request vbm;
+	int ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd vbm");
+
+	/* Count and period will be filled by the target */
+	vbm.tim.bitmap_ctrl = bitmap_control;
+	if (bitmap_len > PARTIAL_VBM_MAX) {
+		wl12xx_warning("cmd vbm len is %d B, truncating to %d",
+			       bitmap_len, PARTIAL_VBM_MAX);
+		bitmap_len = PARTIAL_VBM_MAX;
+	}
+	memcpy(vbm.tim.pvb_field, bitmap, bitmap_len);
+	vbm.tim.identity = identity;
+	vbm.tim.length = bitmap_len + 3;
+
+	vbm.len = cpu_to_le16(bitmap_len + 5);
+
+	ret = wl12xx_cmd_send(wl, CMD_VBM, &vbm, sizeof(vbm));
+	if (ret < 0) {
+		wl12xx_error("VBM command failed");
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_cmd_data_path(struct wl12xx *wl, u8 channel, u8 enable)
+{
+	int ret;
+	u16 cmd_rx, cmd_tx;
+
+	wl12xx_debug(DEBUG_CMD, "cmd data path");
+
+	if (enable) {
+		cmd_rx = CMD_ENABLE_RX;
+		cmd_tx = CMD_ENABLE_TX;
+	} else {
+		cmd_rx = CMD_DISABLE_RX;
+		cmd_tx = CMD_DISABLE_TX;
+	}
+
+	ret = wl12xx_cmd_send(wl, cmd_rx, &channel, sizeof(channel));
+	if (ret < 0) {
+		wl12xx_error("rx %s cmd for channel %d failed",
+			     enable ? "start" : "stop", channel);
+		return ret;
+	}
+
+	wl12xx_debug(DEBUG_BOOT, "rx %s cmd channel %d",
+		     enable ? "start" : "stop", channel);
+
+	ret = wl12xx_cmd_send(wl, cmd_tx, &channel, sizeof(channel));
+	if (ret < 0) {
+		wl12xx_error("tx %s cmd for channel %d failed",
+			     enable ? "start" : "stop", channel);
+		return ret;
+	}
+
+	wl12xx_debug(DEBUG_BOOT, "tx %s cmd channel %d",
+		     enable ? "start" : "stop", channel);
+
+	return 0;
+}
+
+int wl12xx_cmd_join(struct wl12xx *wl, u8 bss_type, u8 dtim_interval,
+		    u16 beacon_interval, u8 wait)
+{
+	unsigned long timeout;
+	struct cmd_join join = {};
+	int ret, i;
+	u8 *bssid;
+
+	/* FIXME: this should be in main.c */
+	ret = wl12xx_acx_frame_rates(wl, DEFAULT_HW_GEN_TX_RATE,
+				     DEFAULT_HW_GEN_MODULATION_TYPE,
+				     wl->tx_mgmt_frm_rate,
+				     wl->tx_mgmt_frm_mod);
+	if (ret < 0)
+		return ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd join");
+
+	/* Reverse order BSSID */
+	bssid = (u8 *)&join.bssid_lsb;
+	for (i = 0; i < ETH_ALEN; i++)
+		bssid[i] = wl->bssid[ETH_ALEN - i - 1];
+
+	join.rx_config_options = wl->rx_config;
+	join.rx_filter_options = wl->rx_filter;
+
+	join.basic_rate_set = RATE_MASK_1MBPS | RATE_MASK_2MBPS |
+		RATE_MASK_5_5MBPS | RATE_MASK_11MBPS;
+
+	join.beacon_interval = beacon_interval;
+	join.dtim_interval = dtim_interval;
+	join.bss_type = bss_type;
+	join.channel = wl->channel;
+	join.ctrl = JOIN_CMD_CTRL_TX_FLUSH;
+
+	ret = wl12xx_cmd_send(wl, CMD_START_JOIN, &join, sizeof(join));
+	if (ret < 0) {
+		wl12xx_error("failed to initiate cmd join");
+		return ret;
+	}
+
+	timeout = msecs_to_jiffies(JOIN_TIMEOUT);
+
+	/*
+	 * ugly hack: we should wait for JOIN_EVENT_COMPLETE_ID but to
+	 * simplify locking we just sleep instead, for now
+	 */
+	if (wait)
+		msleep(10);
+
+	return 0;
+}
+
+int wl12xx_cmd_ps_mode(struct wl12xx *wl, u8 ps_mode)
+{
+	int ret;
+	struct acx_ps_params ps_params;
+
+	/* FIXME: this should be in ps.c */
+	ret = wl12xx_acx_wake_up_conditions(wl, wl->listen_int);
+	if (ret < 0) {
+		wl12xx_error("Couldnt set wake up conditions");
+		return ret;
+	}
+
+	wl12xx_debug(DEBUG_CMD, "cmd set ps mode");
+
+	ps_params.ps_mode = ps_mode;
+	ps_params.send_null_data = 1;
+	ps_params.retries = 5;
+	ps_params.hang_over_period = 128;
+	ps_params.null_data_rate = 1; /* 1 Mbps */
+
+	ret = wl12xx_cmd_send(wl, CMD_SET_PS_MODE, &ps_params,
+			      sizeof(ps_params));
+	if (ret < 0) {
+		wl12xx_error("cmd set_ps_mode failed");
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_cmd_read_memory(struct wl12xx *wl, u32 addr, u32 len, void *answer)
+{
+	struct cmd_read_write_memory mem_cmd, *mem_answer;
+	struct wl12xx_command cmd;
+	int ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd read memory");
+
+	memset(&mem_cmd, 0, sizeof(mem_cmd));
+	mem_cmd.addr = addr;
+	mem_cmd.size = len;
+
+	ret = wl12xx_cmd_send(wl, CMD_READ_MEMORY, &mem_cmd, sizeof(mem_cmd));
+	if (ret < 0) {
+		wl12xx_error("read memory command failed: %d", ret);
+		return ret;
+	}
+
+	/* the read command got in, we can now read the answer */
+	wl12xx_spi_mem_read(wl, wl->cmd_box_addr, &cmd,
+			    CMDMBOX_HEADER_LEN + sizeof(mem_cmd));
+
+	if (cmd.status != CMD_STATUS_SUCCESS)
+		wl12xx_error("error in read command result: %d", cmd.status);
+
+	mem_answer = (struct cmd_read_write_memory *) cmd.parameters;
+	memcpy(answer, mem_answer->value, len);
+
+	return 0;
+}
+
+int wl12xx_cmd_template_set(struct wl12xx *wl, u16 cmd_id,
+			    void *buf, size_t buf_len)
+{
+	struct wl12xx_cmd_packet_template template;
+	int ret;
+
+	wl12xx_debug(DEBUG_CMD, "cmd template %d", cmd_id);
+
+	memset(&template, 0, sizeof(template));
+
+	WARN_ON(buf_len > WL12XX_MAX_TEMPLATE_SIZE);
+	buf_len = min_t(size_t, buf_len, WL12XX_MAX_TEMPLATE_SIZE);
+	template.size = cpu_to_le16(buf_len);
+
+	if (buf)
+		memcpy(template.template, buf, buf_len);
+
+	ret = wl12xx_cmd_send(wl, cmd_id, &template,
+			      sizeof(template.size) + buf_len);
+	if (ret < 0) {
+		wl12xx_warning("cmd set_template failed: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/wireless/wl12xx/cmd.h b/drivers/net/wireless/wl12xx/cmd.h
new file mode 100644
index 00000000000..aa307dcd081
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/cmd.h
@@ -0,0 +1,265 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_CMD_H__
+#define __WL12XX_CMD_H__
+
+#include "wl12xx.h"
+
+int wl12xx_cmd_send(struct wl12xx *wl, u16 type, void *buf, size_t buf_len);
+int wl12xx_cmd_test(struct wl12xx *wl, void *buf, size_t buf_len, u8 answer);
+int wl12xx_cmd_interrogate(struct wl12xx *wl, u16 ie_id, u16 ie_len,
+			   void *answer);
+int wl12xx_cmd_configure(struct wl12xx *wl, void *ie, int ie_len);
+int wl12xx_cmd_vbm(struct wl12xx *wl, u8 identity,
+		   void *bitmap, u16 bitmap_len, u8 bitmap_control);
+int wl12xx_cmd_data_path(struct wl12xx *wl, u8 channel, u8 enable);
+int wl12xx_cmd_join(struct wl12xx *wl, u8 bss_type, u8 dtim_interval,
+		    u16 beacon_interval, u8 wait);
+int wl12xx_cmd_ps_mode(struct wl12xx *wl, u8 ps_mode);
+int wl12xx_cmd_read_memory(struct wl12xx *wl, u32 addr, u32 len, void *answer);
+int wl12xx_cmd_template_set(struct wl12xx *wl, u16 cmd_id,
+			    void *buf, size_t buf_len);
+
+/* unit ms */
+#define WL12XX_COMMAND_TIMEOUT 2000
+
+#define WL12XX_MAX_TEMPLATE_SIZE 300
+
+struct wl12xx_cmd_packet_template {
+	__le16 size;
+	u8 template[WL12XX_MAX_TEMPLATE_SIZE];
+} __attribute__ ((packed));
+
+enum wl12xx_commands {
+	CMD_RESET           = 0,
+	CMD_INTERROGATE     = 1,    /*use this to read information elements*/
+	CMD_CONFIGURE       = 2,    /*use this to write information elements*/
+	CMD_ENABLE_RX       = 3,
+	CMD_ENABLE_TX       = 4,
+	CMD_DISABLE_RX      = 5,
+	CMD_DISABLE_TX      = 6,
+	CMD_SCAN            = 8,
+	CMD_STOP_SCAN       = 9,
+	CMD_VBM             = 10,
+	CMD_START_JOIN      = 11,
+	CMD_SET_KEYS        = 12,
+	CMD_READ_MEMORY     = 13,
+	CMD_WRITE_MEMORY    = 14,
+	CMD_BEACON          = 19,
+	CMD_PROBE_RESP      = 20,
+	CMD_NULL_DATA       = 21,
+	CMD_PROBE_REQ       = 22,
+	CMD_TEST            = 23,
+	CMD_RADIO_CALIBRATE     = 25,   /* OBSOLETE */
+	CMD_ENABLE_RX_PATH      = 27,   /* OBSOLETE */
+	CMD_NOISE_HIST      = 28,
+	CMD_RX_RESET        = 29,
+	CMD_PS_POLL         = 30,
+	CMD_QOS_NULL_DATA   = 31,
+	CMD_LNA_CONTROL     = 32,
+	CMD_SET_BCN_MODE    = 33,
+	CMD_MEASUREMENT      = 34,
+	CMD_STOP_MEASUREMENT = 35,
+	CMD_DISCONNECT       = 36,
+	CMD_SET_PS_MODE      = 37,
+	CMD_CHANNEL_SWITCH   = 38,
+	CMD_STOP_CHANNEL_SWICTH = 39,
+	CMD_AP_DISCOVERY     = 40,
+	CMD_STOP_AP_DISCOVERY = 41,
+	CMD_SPS_SCAN = 42,
+	CMD_STOP_SPS_SCAN = 43,
+	CMD_HEALTH_CHECK     = 45,
+	CMD_DEBUG            = 46,
+	CMD_TRIGGER_SCAN_TO  = 47,
+
+	NUM_COMMANDS,
+	MAX_COMMAND_ID = 0xFFFF,
+};
+
+#define MAX_CMD_PARAMS 572
+
+struct  wl12xx_command {
+	u16 id;
+	u16 status;
+	u8  parameters[MAX_CMD_PARAMS];
+};
+
+enum {
+	CMD_MAILBOX_IDLE              		=  0,
+	CMD_STATUS_SUCCESS            		=  1,
+	CMD_STATUS_UNKNOWN_CMD        		=  2,
+	CMD_STATUS_UNKNOWN_IE         		=  3,
+	CMD_STATUS_REJECT_MEAS_SG_ACTIVE 	= 11,
+	CMD_STATUS_RX_BUSY            		= 13,
+	CMD_STATUS_INVALID_PARAM      		= 14,
+	CMD_STATUS_TEMPLATE_TOO_LARGE 		= 15,
+	CMD_STATUS_OUT_OF_MEMORY      		= 16,
+	CMD_STATUS_STA_TABLE_FULL     		= 17,
+	CMD_STATUS_RADIO_ERROR        		= 18,
+	CMD_STATUS_WRONG_NESTING      		= 19,
+	CMD_STATUS_TIMEOUT            		= 21, /* Driver internal use.*/
+	CMD_STATUS_FW_RESET           		= 22, /* Driver internal use.*/
+	MAX_COMMAND_STATUS            		= 0xff
+};
+
+
+/*
+ * CMD_READ_MEMORY
+ *
+ * The host issues this command to read the WiLink device memory/registers.
+ *
+ * Note: The Base Band address has special handling (16 bits registers and
+ * addresses). For more information, see the hardware specification.
+ */
+/*
+ * CMD_WRITE_MEMORY
+ *
+ * The host issues this command to write the WiLink device memory/registers.
+ *
+ * The Base Band address has special handling (16 bits registers and
+ * addresses). For more information, see the hardware specification.
+ */
+#define MAX_READ_SIZE 256
+
+struct cmd_read_write_memory {
+	/* The address of the memory to read from or write to.*/
+	u32 addr;
+
+	/* The amount of data in bytes to read from or write to the WiLink
+	 * device.*/
+	u32 size;
+
+	/* The actual value read from or written to the Wilink. The source
+	   of this field is the Host in WRITE command or the Wilink in READ
+	   command. */
+	u8 value[MAX_READ_SIZE];
+};
+
+#define CMDMBOX_HEADER_LEN 4
+#define CMDMBOX_INFO_ELEM_HEADER_LEN 4
+
+
+struct basic_scan_parameters {
+	u32 rx_config_options;
+	u32 rx_filter_options;
+
+	/*
+	 * Scan options:
+	 * bit 0: When this bit is set, passive scan.
+	 * bit 1: Band, when this bit is set we scan
+	 * in the 5Ghz band.
+	 * bit 2: voice mode, 0 for normal scan.
+	 * bit 3: scan priority, 1 for high priority.
+	 */
+	u16 scan_options;
+
+	/* Number of channels to scan */
+	u8 num_channels;
+
+	/* Number opf probe requests to send, per channel */
+	u8 num_probe_requests;
+
+	/* Rate and modulation for probe requests */
+	u16 tx_rate;
+
+	u8 tid_trigger;
+	u8 ssid_len;
+	u32 ssid[8];
+
+} __attribute__ ((packed));
+
+struct basic_scan_channel_parameters {
+	u32 min_duration; /* in TU */
+	u32 max_duration; /* in TU */
+	u32 bssid_lsb;
+	u16 bssid_msb;
+
+	/*
+	 * bits 0-3: Early termination count.
+	 * bits 4-5: Early termination condition.
+	 */
+	u8 early_termination;
+
+	u8 tx_power_att;
+	u8 channel;
+	u8 pad[3];
+} __attribute__ ((packed));
+
+/* SCAN parameters */
+#define SCAN_MAX_NUM_OF_CHANNELS 16
+
+struct cmd_scan {
+	struct basic_scan_parameters params;
+	struct basic_scan_channel_parameters channels[SCAN_MAX_NUM_OF_CHANNELS];
+} __attribute__ ((packed));
+
+enum {
+	BSS_TYPE_IBSS = 0,
+	BSS_TYPE_STA_BSS = 2,
+	BSS_TYPE_AP_BSS = 3,
+	MAX_BSS_TYPE = 0xFF
+};
+
+#define JOIN_CMD_CTRL_TX_FLUSH             0x80 /* Firmware flushes all Tx */
+#define JOIN_CMD_CTRL_EARLY_WAKEUP_ENABLE  0x01 /* Early wakeup time */
+
+
+struct cmd_join {
+	u32 bssid_lsb;
+	u16 bssid_msb;
+	u16 beacon_interval; /* in TBTTs */
+	u32 rx_config_options;
+	u32 rx_filter_options;
+
+	/*
+	 * The target uses this field to determine the rate at
+	 * which to transmit control frame responses (such as
+	 * ACK or CTS frames).
+	 */
+	u16 basic_rate_set;
+	u8 dtim_interval;
+	u8 tx_ctrl_frame_rate; /* OBSOLETE */
+	u8 tx_ctrl_frame_mod;  /* OBSOLETE */
+	/*
+	 * bits 0-2: This bitwise field specifies the type
+	 * of BSS to start or join (BSS_TYPE_*).
+	 * bit 4: Band - The radio band in which to join
+	 * or start.
+	 *  0 - 2.4GHz band
+	 *  1 - 5GHz band
+	 * bits 3, 5-7: Reserved
+	 */
+	u8 bss_type;
+	u8 channel;
+	u8 ssid_len;
+	u8 ssid[IW_ESSID_MAX_SIZE];
+	u8 ctrl; /* JOIN_CMD_CTRL_* */
+	u8 tx_mgt_frame_rate; /* OBSOLETE */
+	u8 tx_mgt_frame_mod;  /* OBSOLETE */
+	u8 reserved;
+} __attribute__ ((packed));
+
+
+#endif /* __WL12XX_CMD_H__ */
diff --git a/drivers/net/wireless/wl12xx/debugfs.c b/drivers/net/wireless/wl12xx/debugfs.c
new file mode 100644
index 00000000000..cdb368ce4da
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/debugfs.c
@@ -0,0 +1,508 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include "debugfs.h"
+
+#include <linux/skbuff.h>
+
+#include "wl12xx.h"
+#include "acx.h"
+
+/* ms */
+#define WL12XX_DEBUGFS_STATS_LIFETIME 1000
+
+/* debugfs macros idea from mac80211 */
+
+#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...)		\
+static ssize_t name## _read(struct file *file, char __user *userbuf,	\
+			    size_t count, loff_t *ppos)			\
+{									\
+	struct wl12xx *wl = file->private_data;				\
+	char buf[buflen];						\
+	int res;							\
+									\
+	res = scnprintf(buf, buflen, fmt "\n", ##value);		\
+	return simple_read_from_buffer(userbuf, count, ppos, buf, res);	\
+}									\
+									\
+static const struct file_operations name## _ops = {			\
+	.read = name## _read,						\
+	.open = wl12xx_open_file_generic,				\
+};
+
+#define DEBUGFS_ADD(name, parent)					\
+	wl->debugfs.name = debugfs_create_file(#name, 0400, parent,	\
+					       wl, &name## _ops);	\
+	if (IS_ERR(wl->debugfs.name)) {					\
+		ret = PTR_ERR(wl->debugfs.name);			\
+		wl->debugfs.name = NULL;				\
+		goto out;						\
+	}
+
+#define DEBUGFS_DEL(name)						\
+	do {								\
+		debugfs_remove(wl->debugfs.name);			\
+		wl->debugfs.name = NULL;				\
+	} while (0)
+
+#define DEBUGFS_FWSTATS_FILE(sub, name, buflen, fmt)			\
+static ssize_t sub## _ ##name## _read(struct file *file,		\
+				      char __user *userbuf,		\
+				      size_t count, loff_t *ppos)	\
+{									\
+	struct wl12xx *wl = file->private_data;				\
+	char buf[buflen];						\
+	int res;							\
+									\
+	wl12xx_debugfs_update_stats(wl);				\
+									\
+	res = scnprintf(buf, buflen, fmt "\n",				\
+			wl->stats.fw_stats->sub.name);			\
+	return simple_read_from_buffer(userbuf, count, ppos, buf, res);	\
+}									\
+									\
+static const struct file_operations sub## _ ##name## _ops = {		\
+	.read = sub## _ ##name## _read,					\
+	.open = wl12xx_open_file_generic,				\
+};
+
+#define DEBUGFS_FWSTATS_ADD(sub, name)				\
+	DEBUGFS_ADD(sub## _ ##name, wl->debugfs.fw_statistics)
+
+#define DEBUGFS_FWSTATS_DEL(sub, name)				\
+	DEBUGFS_DEL(sub## _ ##name)
+
+static void wl12xx_debugfs_update_stats(struct wl12xx *wl)
+{
+	mutex_lock(&wl->mutex);
+
+	if (wl->state == WL12XX_STATE_ON &&
+	    time_after(jiffies, wl->stats.fw_stats_update +
+		       msecs_to_jiffies(WL12XX_DEBUGFS_STATS_LIFETIME))) {
+		wl12xx_acx_statistics(wl, wl->stats.fw_stats);
+		wl->stats.fw_stats_update = jiffies;
+	}
+
+	mutex_unlock(&wl->mutex);
+}
+
+static int wl12xx_open_file_generic(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+DEBUGFS_FWSTATS_FILE(tx, internal_desc_overflow, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(rx, out_of_mem, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, hdr_overflow, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, hw_stuck, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, dropped, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, fcs_err, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, xfr_hint_trig, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, path_reset, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rx, reset_counter, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(dma, rx_requested, 20, "%u");
+DEBUGFS_FWSTATS_FILE(dma, rx_errors, 20, "%u");
+DEBUGFS_FWSTATS_FILE(dma, tx_requested, 20, "%u");
+DEBUGFS_FWSTATS_FILE(dma, tx_errors, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(isr, cmd_cmplt, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, fiqs, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, rx_headers, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, rx_mem_overflow, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, rx_rdys, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, irqs, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, tx_procs, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, decrypt_done, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, dma0_done, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, dma1_done, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, tx_exch_complete, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, commands, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, rx_procs, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, hw_pm_mode_changes, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, host_acknowledges, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, pci_pm, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, wakeups, 20, "%u");
+DEBUGFS_FWSTATS_FILE(isr, low_rssi, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(wep, addr_key_count, 20, "%u");
+DEBUGFS_FWSTATS_FILE(wep, default_key_count, 20, "%u");
+/* skipping wep.reserved */
+DEBUGFS_FWSTATS_FILE(wep, key_not_found, 20, "%u");
+DEBUGFS_FWSTATS_FILE(wep, decrypt_fail, 20, "%u");
+DEBUGFS_FWSTATS_FILE(wep, packets, 20, "%u");
+DEBUGFS_FWSTATS_FILE(wep, interrupt, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(pwr, ps_enter, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, elp_enter, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, missing_bcns, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, wake_on_host, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, wake_on_timer_exp, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, tx_with_ps, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, tx_without_ps, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, rcvd_beacons, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, power_save_off, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, enable_ps, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, disable_ps, 20, "%u");
+DEBUGFS_FWSTATS_FILE(pwr, fix_tsf_ps, 20, "%u");
+/* skipping cont_miss_bcns_spread for now */
+DEBUGFS_FWSTATS_FILE(pwr, rcvd_awake_beacons, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(mic, rx_pkts, 20, "%u");
+DEBUGFS_FWSTATS_FILE(mic, calc_failure, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(aes, encrypt_fail, 20, "%u");
+DEBUGFS_FWSTATS_FILE(aes, decrypt_fail, 20, "%u");
+DEBUGFS_FWSTATS_FILE(aes, encrypt_packets, 20, "%u");
+DEBUGFS_FWSTATS_FILE(aes, decrypt_packets, 20, "%u");
+DEBUGFS_FWSTATS_FILE(aes, encrypt_interrupt, 20, "%u");
+DEBUGFS_FWSTATS_FILE(aes, decrypt_interrupt, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(event, heart_beat, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, calibration, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, rx_mismatch, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, rx_mem_empty, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, rx_pool, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, oom_late, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, phy_transmit_error, 20, "%u");
+DEBUGFS_FWSTATS_FILE(event, tx_stuck, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(ps, pspoll_timeouts, 20, "%u");
+DEBUGFS_FWSTATS_FILE(ps, upsd_timeouts, 20, "%u");
+DEBUGFS_FWSTATS_FILE(ps, upsd_max_sptime, 20, "%u");
+DEBUGFS_FWSTATS_FILE(ps, upsd_max_apturn, 20, "%u");
+DEBUGFS_FWSTATS_FILE(ps, pspoll_max_apturn, 20, "%u");
+DEBUGFS_FWSTATS_FILE(ps, pspoll_utilization, 20, "%u");
+DEBUGFS_FWSTATS_FILE(ps, upsd_utilization, 20, "%u");
+
+DEBUGFS_FWSTATS_FILE(rxpipe, rx_prep_beacon_drop, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rxpipe, descr_host_int_trig_rx_data, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rxpipe, beacon_buffer_thres_host_int_trig_rx_data,
+		     20, "%u");
+DEBUGFS_FWSTATS_FILE(rxpipe, missed_beacon_host_int_trig_rx_data, 20, "%u");
+DEBUGFS_FWSTATS_FILE(rxpipe, tx_xfr_host_int_trig_rx_data, 20, "%u");
+
+DEBUGFS_READONLY_FILE(retry_count, 20, "%u", wl->stats.retry_count);
+DEBUGFS_READONLY_FILE(excessive_retries, 20, "%u",
+		      wl->stats.excessive_retries);
+
+static ssize_t tx_queue_len_read(struct file *file, char __user *userbuf,
+				 size_t count, loff_t *ppos)
+{
+	struct wl12xx *wl = file->private_data;
+	u32 queue_len;
+	char buf[20];
+	int res;
+
+	queue_len = skb_queue_len(&wl->tx_queue);
+
+	res = scnprintf(buf, sizeof(buf), "%u\n", queue_len);
+	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+
+static const struct file_operations tx_queue_len_ops = {
+	.read = tx_queue_len_read,
+	.open = wl12xx_open_file_generic,
+};
+
+static void wl12xx_debugfs_delete_files(struct wl12xx *wl)
+{
+	DEBUGFS_FWSTATS_DEL(tx, internal_desc_overflow);
+
+	DEBUGFS_FWSTATS_DEL(rx, out_of_mem);
+	DEBUGFS_FWSTATS_DEL(rx, hdr_overflow);
+	DEBUGFS_FWSTATS_DEL(rx, hw_stuck);
+	DEBUGFS_FWSTATS_DEL(rx, dropped);
+	DEBUGFS_FWSTATS_DEL(rx, fcs_err);
+	DEBUGFS_FWSTATS_DEL(rx, xfr_hint_trig);
+	DEBUGFS_FWSTATS_DEL(rx, path_reset);
+	DEBUGFS_FWSTATS_DEL(rx, reset_counter);
+
+	DEBUGFS_FWSTATS_DEL(dma, rx_requested);
+	DEBUGFS_FWSTATS_DEL(dma, rx_errors);
+	DEBUGFS_FWSTATS_DEL(dma, tx_requested);
+	DEBUGFS_FWSTATS_DEL(dma, tx_errors);
+
+	DEBUGFS_FWSTATS_DEL(isr, cmd_cmplt);
+	DEBUGFS_FWSTATS_DEL(isr, fiqs);
+	DEBUGFS_FWSTATS_DEL(isr, rx_headers);
+	DEBUGFS_FWSTATS_DEL(isr, rx_mem_overflow);
+	DEBUGFS_FWSTATS_DEL(isr, rx_rdys);
+	DEBUGFS_FWSTATS_DEL(isr, irqs);
+	DEBUGFS_FWSTATS_DEL(isr, tx_procs);
+	DEBUGFS_FWSTATS_DEL(isr, decrypt_done);
+	DEBUGFS_FWSTATS_DEL(isr, dma0_done);
+	DEBUGFS_FWSTATS_DEL(isr, dma1_done);
+	DEBUGFS_FWSTATS_DEL(isr, tx_exch_complete);
+	DEBUGFS_FWSTATS_DEL(isr, commands);
+	DEBUGFS_FWSTATS_DEL(isr, rx_procs);
+	DEBUGFS_FWSTATS_DEL(isr, hw_pm_mode_changes);
+	DEBUGFS_FWSTATS_DEL(isr, host_acknowledges);
+	DEBUGFS_FWSTATS_DEL(isr, pci_pm);
+	DEBUGFS_FWSTATS_DEL(isr, wakeups);
+	DEBUGFS_FWSTATS_DEL(isr, low_rssi);
+
+	DEBUGFS_FWSTATS_DEL(wep, addr_key_count);
+	DEBUGFS_FWSTATS_DEL(wep, default_key_count);
+	/* skipping wep.reserved */
+	DEBUGFS_FWSTATS_DEL(wep, key_not_found);
+	DEBUGFS_FWSTATS_DEL(wep, decrypt_fail);
+	DEBUGFS_FWSTATS_DEL(wep, packets);
+	DEBUGFS_FWSTATS_DEL(wep, interrupt);
+
+	DEBUGFS_FWSTATS_DEL(pwr, ps_enter);
+	DEBUGFS_FWSTATS_DEL(pwr, elp_enter);
+	DEBUGFS_FWSTATS_DEL(pwr, missing_bcns);
+	DEBUGFS_FWSTATS_DEL(pwr, wake_on_host);
+	DEBUGFS_FWSTATS_DEL(pwr, wake_on_timer_exp);
+	DEBUGFS_FWSTATS_DEL(pwr, tx_with_ps);
+	DEBUGFS_FWSTATS_DEL(pwr, tx_without_ps);
+	DEBUGFS_FWSTATS_DEL(pwr, rcvd_beacons);
+	DEBUGFS_FWSTATS_DEL(pwr, power_save_off);
+	DEBUGFS_FWSTATS_DEL(pwr, enable_ps);
+	DEBUGFS_FWSTATS_DEL(pwr, disable_ps);
+	DEBUGFS_FWSTATS_DEL(pwr, fix_tsf_ps);
+	/* skipping cont_miss_bcns_spread for now */
+	DEBUGFS_FWSTATS_DEL(pwr, rcvd_awake_beacons);
+
+	DEBUGFS_FWSTATS_DEL(mic, rx_pkts);
+	DEBUGFS_FWSTATS_DEL(mic, calc_failure);
+
+	DEBUGFS_FWSTATS_DEL(aes, encrypt_fail);
+	DEBUGFS_FWSTATS_DEL(aes, decrypt_fail);
+	DEBUGFS_FWSTATS_DEL(aes, encrypt_packets);
+	DEBUGFS_FWSTATS_DEL(aes, decrypt_packets);
+	DEBUGFS_FWSTATS_DEL(aes, encrypt_interrupt);
+	DEBUGFS_FWSTATS_DEL(aes, decrypt_interrupt);
+
+	DEBUGFS_FWSTATS_DEL(event, heart_beat);
+	DEBUGFS_FWSTATS_DEL(event, calibration);
+	DEBUGFS_FWSTATS_DEL(event, rx_mismatch);
+	DEBUGFS_FWSTATS_DEL(event, rx_mem_empty);
+	DEBUGFS_FWSTATS_DEL(event, rx_pool);
+	DEBUGFS_FWSTATS_DEL(event, oom_late);
+	DEBUGFS_FWSTATS_DEL(event, phy_transmit_error);
+	DEBUGFS_FWSTATS_DEL(event, tx_stuck);
+
+	DEBUGFS_FWSTATS_DEL(ps, pspoll_timeouts);
+	DEBUGFS_FWSTATS_DEL(ps, upsd_timeouts);
+	DEBUGFS_FWSTATS_DEL(ps, upsd_max_sptime);
+	DEBUGFS_FWSTATS_DEL(ps, upsd_max_apturn);
+	DEBUGFS_FWSTATS_DEL(ps, pspoll_max_apturn);
+	DEBUGFS_FWSTATS_DEL(ps, pspoll_utilization);
+	DEBUGFS_FWSTATS_DEL(ps, upsd_utilization);
+
+	DEBUGFS_FWSTATS_DEL(rxpipe, rx_prep_beacon_drop);
+	DEBUGFS_FWSTATS_DEL(rxpipe, descr_host_int_trig_rx_data);
+	DEBUGFS_FWSTATS_DEL(rxpipe, beacon_buffer_thres_host_int_trig_rx_data);
+	DEBUGFS_FWSTATS_DEL(rxpipe, missed_beacon_host_int_trig_rx_data);
+	DEBUGFS_FWSTATS_DEL(rxpipe, tx_xfr_host_int_trig_rx_data);
+
+	DEBUGFS_DEL(tx_queue_len);
+	DEBUGFS_DEL(retry_count);
+	DEBUGFS_DEL(excessive_retries);
+}
+
+static int wl12xx_debugfs_add_files(struct wl12xx *wl)
+{
+	int ret = 0;
+
+	DEBUGFS_FWSTATS_ADD(tx, internal_desc_overflow);
+
+	DEBUGFS_FWSTATS_ADD(rx, out_of_mem);
+	DEBUGFS_FWSTATS_ADD(rx, hdr_overflow);
+	DEBUGFS_FWSTATS_ADD(rx, hw_stuck);
+	DEBUGFS_FWSTATS_ADD(rx, dropped);
+	DEBUGFS_FWSTATS_ADD(rx, fcs_err);
+	DEBUGFS_FWSTATS_ADD(rx, xfr_hint_trig);
+	DEBUGFS_FWSTATS_ADD(rx, path_reset);
+	DEBUGFS_FWSTATS_ADD(rx, reset_counter);
+
+	DEBUGFS_FWSTATS_ADD(dma, rx_requested);
+	DEBUGFS_FWSTATS_ADD(dma, rx_errors);
+	DEBUGFS_FWSTATS_ADD(dma, tx_requested);
+	DEBUGFS_FWSTATS_ADD(dma, tx_errors);
+
+	DEBUGFS_FWSTATS_ADD(isr, cmd_cmplt);
+	DEBUGFS_FWSTATS_ADD(isr, fiqs);
+	DEBUGFS_FWSTATS_ADD(isr, rx_headers);
+	DEBUGFS_FWSTATS_ADD(isr, rx_mem_overflow);
+	DEBUGFS_FWSTATS_ADD(isr, rx_rdys);
+	DEBUGFS_FWSTATS_ADD(isr, irqs);
+	DEBUGFS_FWSTATS_ADD(isr, tx_procs);
+	DEBUGFS_FWSTATS_ADD(isr, decrypt_done);
+	DEBUGFS_FWSTATS_ADD(isr, dma0_done);
+	DEBUGFS_FWSTATS_ADD(isr, dma1_done);
+	DEBUGFS_FWSTATS_ADD(isr, tx_exch_complete);
+	DEBUGFS_FWSTATS_ADD(isr, commands);
+	DEBUGFS_FWSTATS_ADD(isr, rx_procs);
+	DEBUGFS_FWSTATS_ADD(isr, hw_pm_mode_changes);
+	DEBUGFS_FWSTATS_ADD(isr, host_acknowledges);
+	DEBUGFS_FWSTATS_ADD(isr, pci_pm);
+	DEBUGFS_FWSTATS_ADD(isr, wakeups);
+	DEBUGFS_FWSTATS_ADD(isr, low_rssi);
+
+	DEBUGFS_FWSTATS_ADD(wep, addr_key_count);
+	DEBUGFS_FWSTATS_ADD(wep, default_key_count);
+	/* skipping wep.reserved */
+	DEBUGFS_FWSTATS_ADD(wep, key_not_found);
+	DEBUGFS_FWSTATS_ADD(wep, decrypt_fail);
+	DEBUGFS_FWSTATS_ADD(wep, packets);
+	DEBUGFS_FWSTATS_ADD(wep, interrupt);
+
+	DEBUGFS_FWSTATS_ADD(pwr, ps_enter);
+	DEBUGFS_FWSTATS_ADD(pwr, elp_enter);
+	DEBUGFS_FWSTATS_ADD(pwr, missing_bcns);
+	DEBUGFS_FWSTATS_ADD(pwr, wake_on_host);
+	DEBUGFS_FWSTATS_ADD(pwr, wake_on_timer_exp);
+	DEBUGFS_FWSTATS_ADD(pwr, tx_with_ps);
+	DEBUGFS_FWSTATS_ADD(pwr, tx_without_ps);
+	DEBUGFS_FWSTATS_ADD(pwr, rcvd_beacons);
+	DEBUGFS_FWSTATS_ADD(pwr, power_save_off);
+	DEBUGFS_FWSTATS_ADD(pwr, enable_ps);
+	DEBUGFS_FWSTATS_ADD(pwr, disable_ps);
+	DEBUGFS_FWSTATS_ADD(pwr, fix_tsf_ps);
+	/* skipping cont_miss_bcns_spread for now */
+	DEBUGFS_FWSTATS_ADD(pwr, rcvd_awake_beacons);
+
+	DEBUGFS_FWSTATS_ADD(mic, rx_pkts);
+	DEBUGFS_FWSTATS_ADD(mic, calc_failure);
+
+	DEBUGFS_FWSTATS_ADD(aes, encrypt_fail);
+	DEBUGFS_FWSTATS_ADD(aes, decrypt_fail);
+	DEBUGFS_FWSTATS_ADD(aes, encrypt_packets);
+	DEBUGFS_FWSTATS_ADD(aes, decrypt_packets);
+	DEBUGFS_FWSTATS_ADD(aes, encrypt_interrupt);
+	DEBUGFS_FWSTATS_ADD(aes, decrypt_interrupt);
+
+	DEBUGFS_FWSTATS_ADD(event, heart_beat);
+	DEBUGFS_FWSTATS_ADD(event, calibration);
+	DEBUGFS_FWSTATS_ADD(event, rx_mismatch);
+	DEBUGFS_FWSTATS_ADD(event, rx_mem_empty);
+	DEBUGFS_FWSTATS_ADD(event, rx_pool);
+	DEBUGFS_FWSTATS_ADD(event, oom_late);
+	DEBUGFS_FWSTATS_ADD(event, phy_transmit_error);
+	DEBUGFS_FWSTATS_ADD(event, tx_stuck);
+
+	DEBUGFS_FWSTATS_ADD(ps, pspoll_timeouts);
+	DEBUGFS_FWSTATS_ADD(ps, upsd_timeouts);
+	DEBUGFS_FWSTATS_ADD(ps, upsd_max_sptime);
+	DEBUGFS_FWSTATS_ADD(ps, upsd_max_apturn);
+	DEBUGFS_FWSTATS_ADD(ps, pspoll_max_apturn);
+	DEBUGFS_FWSTATS_ADD(ps, pspoll_utilization);
+	DEBUGFS_FWSTATS_ADD(ps, upsd_utilization);
+
+	DEBUGFS_FWSTATS_ADD(rxpipe, rx_prep_beacon_drop);
+	DEBUGFS_FWSTATS_ADD(rxpipe, descr_host_int_trig_rx_data);
+	DEBUGFS_FWSTATS_ADD(rxpipe, beacon_buffer_thres_host_int_trig_rx_data);
+	DEBUGFS_FWSTATS_ADD(rxpipe, missed_beacon_host_int_trig_rx_data);
+	DEBUGFS_FWSTATS_ADD(rxpipe, tx_xfr_host_int_trig_rx_data);
+
+	DEBUGFS_ADD(tx_queue_len, wl->debugfs.rootdir);
+	DEBUGFS_ADD(retry_count, wl->debugfs.rootdir);
+	DEBUGFS_ADD(excessive_retries, wl->debugfs.rootdir);
+
+out:
+	if (ret < 0)
+		wl12xx_debugfs_delete_files(wl);
+
+	return ret;
+}
+
+void wl12xx_debugfs_reset(struct wl12xx *wl)
+{
+	memset(wl->stats.fw_stats, 0, sizeof(*wl->stats.fw_stats));
+	wl->stats.retry_count = 0;
+	wl->stats.excessive_retries = 0;
+}
+
+int wl12xx_debugfs_init(struct wl12xx *wl)
+{
+	int ret;
+
+	wl->debugfs.rootdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	if (IS_ERR(wl->debugfs.rootdir)) {
+		ret = PTR_ERR(wl->debugfs.rootdir);
+		wl->debugfs.rootdir = NULL;
+		goto err;
+	}
+
+	wl->debugfs.fw_statistics = debugfs_create_dir("fw-statistics",
+						       wl->debugfs.rootdir);
+
+	if (IS_ERR(wl->debugfs.fw_statistics)) {
+		ret = PTR_ERR(wl->debugfs.fw_statistics);
+		wl->debugfs.fw_statistics = NULL;
+		goto err_root;
+	}
+
+	wl->stats.fw_stats = kzalloc(sizeof(*wl->stats.fw_stats),
+				      GFP_KERNEL);
+
+	if (!wl->stats.fw_stats) {
+		ret = -ENOMEM;
+		goto err_fw;
+	}
+
+	wl->stats.fw_stats_update = jiffies;
+
+	ret = wl12xx_debugfs_add_files(wl);
+
+	if (ret < 0)
+		goto err_file;
+
+	return 0;
+
+err_file:
+	kfree(wl->stats.fw_stats);
+	wl->stats.fw_stats = NULL;
+
+err_fw:
+	debugfs_remove(wl->debugfs.fw_statistics);
+	wl->debugfs.fw_statistics = NULL;
+
+err_root:
+	debugfs_remove(wl->debugfs.rootdir);
+	wl->debugfs.rootdir = NULL;
+
+err:
+	return ret;
+}
+
+void wl12xx_debugfs_exit(struct wl12xx *wl)
+{
+	wl12xx_debugfs_delete_files(wl);
+
+	kfree(wl->stats.fw_stats);
+	wl->stats.fw_stats = NULL;
+
+	debugfs_remove(wl->debugfs.fw_statistics);
+	wl->debugfs.fw_statistics = NULL;
+
+	debugfs_remove(wl->debugfs.rootdir);
+	wl->debugfs.rootdir = NULL;
+
+}
diff --git a/drivers/net/wireless/wl12xx/debugfs.h b/drivers/net/wireless/wl12xx/debugfs.h
new file mode 100644
index 00000000000..562cdcbcc87
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/debugfs.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef WL12XX_DEBUGFS_H
+#define WL12XX_DEBUGFS_H
+
+#include "wl12xx.h"
+
+int wl12xx_debugfs_init(struct wl12xx *wl);
+void wl12xx_debugfs_exit(struct wl12xx *wl);
+void wl12xx_debugfs_reset(struct wl12xx *wl);
+
+#endif /* WL12XX_DEBUGFS_H */
diff --git a/drivers/net/wireless/wl12xx/event.c b/drivers/net/wireless/wl12xx/event.c
new file mode 100644
index 00000000000..99529ca89a7
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/event.c
@@ -0,0 +1,127 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include "wl12xx.h"
+#include "reg.h"
+#include "spi.h"
+#include "event.h"
+#include "ps.h"
+
+static int wl12xx_event_scan_complete(struct wl12xx *wl,
+				      struct event_mailbox *mbox)
+{
+	wl12xx_debug(DEBUG_EVENT, "status: 0x%x, channels: %d",
+		     mbox->scheduled_scan_status,
+		     mbox->scheduled_scan_channels);
+
+	if (wl->scanning) {
+		mutex_unlock(&wl->mutex);
+		ieee80211_scan_completed(wl->hw, false);
+		mutex_lock(&wl->mutex);
+		wl->scanning = false;
+	}
+
+	return 0;
+}
+
+static void wl12xx_event_mbox_dump(struct event_mailbox *mbox)
+{
+	wl12xx_debug(DEBUG_EVENT, "MBOX DUMP:");
+	wl12xx_debug(DEBUG_EVENT, "\tvector: 0x%x", mbox->events_vector);
+	wl12xx_debug(DEBUG_EVENT, "\tmask: 0x%x", mbox->events_mask);
+}
+
+static int wl12xx_event_process(struct wl12xx *wl, struct event_mailbox *mbox)
+{
+	int ret;
+	u32 vector;
+
+	wl12xx_event_mbox_dump(mbox);
+
+	vector = mbox->events_vector & ~(mbox->events_mask);
+	wl12xx_debug(DEBUG_EVENT, "vector: 0x%x", vector);
+
+	if (vector & SCAN_COMPLETE_EVENT_ID) {
+		ret = wl12xx_event_scan_complete(wl, mbox);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (vector & BSS_LOSE_EVENT_ID) {
+		wl12xx_debug(DEBUG_EVENT, "BSS_LOSE_EVENT");
+
+		if (wl->psm_requested && wl->psm) {
+			ret = wl12xx_ps_set_mode(wl, STATION_ACTIVE_MODE);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+int wl12xx_event_unmask(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_event_mbox_mask(wl, ~(wl->event_mask));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+void wl12xx_event_mbox_config(struct wl12xx *wl)
+{
+	wl->mbox_ptr[0] = wl12xx_reg_read32(wl, REG_EVENT_MAILBOX_PTR);
+	wl->mbox_ptr[1] = wl->mbox_ptr[0] + sizeof(struct event_mailbox);
+
+	wl12xx_debug(DEBUG_EVENT, "MBOX ptrs: 0x%x 0x%x",
+		     wl->mbox_ptr[0], wl->mbox_ptr[1]);
+}
+
+int wl12xx_event_handle(struct wl12xx *wl, u8 mbox_num)
+{
+	struct event_mailbox mbox;
+	int ret;
+
+	wl12xx_debug(DEBUG_EVENT, "EVENT on mbox %d", mbox_num);
+
+	if (mbox_num > 1)
+		return -EINVAL;
+
+	/* first we read the mbox descriptor */
+	wl12xx_spi_mem_read(wl, wl->mbox_ptr[mbox_num], &mbox,
+			    sizeof(struct event_mailbox));
+
+	/* process the descriptor */
+	ret = wl12xx_event_process(wl, &mbox);
+	if (ret < 0)
+		return ret;
+
+	/* then we let the firmware know it can go on...*/
+	wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_TRIG, INTR_TRIG_EVENT_ACK);
+
+	return 0;
+}
diff --git a/drivers/net/wireless/wl12xx/event.h b/drivers/net/wireless/wl12xx/event.h
new file mode 100644
index 00000000000..1f4c2f7438a
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/event.h
@@ -0,0 +1,121 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_EVENT_H__
+#define __WL12XX_EVENT_H__
+
+/*
+ * Mbox events
+ *
+ * The event mechanism is based on a pair of event buffers (buffers A and
+ * B) at fixed locations in the target's memory. The host processes one
+ * buffer while the other buffer continues to collect events. If the host
+ * is not processing events, an interrupt is issued to signal that a buffer
+ * is ready. Once the host is done with processing events from one buffer,
+ * it signals the target (with an ACK interrupt) that the event buffer is
+ * free.
+ */
+
+enum {
+	RESERVED1_EVENT_ID                       = BIT(0),
+	RESERVED2_EVENT_ID                       = BIT(1),
+	MEASUREMENT_START_EVENT_ID               = BIT(2),
+	SCAN_COMPLETE_EVENT_ID                   = BIT(3),
+	CALIBRATION_COMPLETE_EVENT_ID            = BIT(4),
+	ROAMING_TRIGGER_LOW_RSSI_EVENT_ID        = BIT(5),
+	PS_REPORT_EVENT_ID                       = BIT(6),
+	SYNCHRONIZATION_TIMEOUT_EVENT_ID         = BIT(7),
+	HEALTH_REPORT_EVENT_ID                   = BIT(8),
+	ACI_DETECTION_EVENT_ID                   = BIT(9),
+	DEBUG_REPORT_EVENT_ID                    = BIT(10),
+	MAC_STATUS_EVENT_ID                      = BIT(11),
+	DISCONNECT_EVENT_COMPLETE_ID             = BIT(12),
+	JOIN_EVENT_COMPLETE_ID                   = BIT(13),
+	CHANNEL_SWITCH_COMPLETE_EVENT_ID         = BIT(14),
+	BSS_LOSE_EVENT_ID                        = BIT(15),
+	ROAMING_TRIGGER_MAX_TX_RETRY_EVENT_ID    = BIT(16),
+	MEASUREMENT_COMPLETE_EVENT_ID            = BIT(17),
+	AP_DISCOVERY_COMPLETE_EVENT_ID           = BIT(18),
+	SCHEDULED_SCAN_COMPLETE_EVENT_ID         = BIT(19),
+	PSPOLL_DELIVERY_FAILURE_EVENT_ID 	 = BIT(20),
+	RESET_BSS_EVENT_ID                       = BIT(21),
+	REGAINED_BSS_EVENT_ID                    = BIT(22),
+	ROAMING_TRIGGER_REGAINED_RSSI_EVENT_ID   = BIT(23),
+	ROAMING_TRIGGER_LOW_SNR_EVENT_ID         = BIT(24),
+	ROAMING_TRIGGER_REGAINED_SNR_EVENT_ID    = BIT(25),
+
+	DBG_EVENT_ID                             = BIT(26),
+	BT_PTA_SENSE_EVENT_ID                    = BIT(27),
+	BT_PTA_PREDICTION_EVENT_ID               = BIT(28),
+	BT_PTA_AVALANCHE_EVENT_ID                = BIT(29),
+
+	PLT_RX_CALIBRATION_COMPLETE_EVENT_ID     = BIT(30),
+
+	EVENT_MBOX_ALL_EVENT_ID                  = 0x7fffffff,
+};
+
+struct event_debug_report {
+	u8 debug_event_id;
+	u8 num_params;
+	u16 pad;
+	u32 report_1;
+	u32 report_2;
+	u32 report_3;
+} __attribute__ ((packed));
+
+struct event_mailbox {
+	u32 events_vector;
+	u32 events_mask;
+	u32 reserved_1;
+	u32 reserved_2;
+
+	char average_rssi_level;
+	u8 ps_status;
+	u8 channel_switch_status;
+	u8 scheduled_scan_status;
+
+	/* Channels scanned by the scheduled scan */
+	u16 scheduled_scan_channels;
+
+	/* If bit 0 is set -> target's fatal error */
+	u16 health_report;
+	u16 bad_fft_counter;
+	u8 bt_pta_sense_info;
+	u8 bt_pta_protective_info;
+	u32 reserved;
+	u32 debug_report[2];
+
+	/* Number of FCS errors since last event */
+	u32 fcs_err_counter;
+
+	struct event_debug_report report;
+	u8 average_snr_level;
+	u8 padding[19];
+} __attribute__ ((packed));
+
+int wl12xx_event_unmask(struct wl12xx *wl);
+void wl12xx_event_mbox_config(struct wl12xx *wl);
+int wl12xx_event_handle(struct wl12xx *wl, u8 mbox);
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/init.c b/drivers/net/wireless/wl12xx/init.c
new file mode 100644
index 00000000000..2a573a6010b
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/init.c
@@ -0,0 +1,200 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "init.h"
+#include "wl12xx_80211.h"
+#include "acx.h"
+#include "cmd.h"
+
+int wl12xx_hw_init_hwenc_config(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_feature_cfg(wl);
+	if (ret < 0) {
+		wl12xx_warning("couldn't set feature config");
+		return ret;
+	}
+
+	ret = wl12xx_acx_default_key(wl, wl->default_key);
+	if (ret < 0) {
+		wl12xx_warning("couldn't set default key");
+		return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_hw_init_templates_config(struct wl12xx *wl)
+{
+	int ret;
+	u8 partial_vbm[PARTIAL_VBM_MAX];
+
+	/* send empty templates for fw memory reservation */
+	ret = wl12xx_cmd_template_set(wl, CMD_PROBE_REQ, NULL,
+				      sizeof(struct wl12xx_probe_req_template));
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_template_set(wl, CMD_NULL_DATA, NULL,
+				      sizeof(struct wl12xx_null_data_template));
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_template_set(wl, CMD_PS_POLL, NULL,
+				      sizeof(struct wl12xx_ps_poll_template));
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_template_set(wl, CMD_QOS_NULL_DATA, NULL,
+				      sizeof
+				      (struct wl12xx_qos_null_data_template));
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_template_set(wl, CMD_PROBE_RESP, NULL,
+				      sizeof
+				      (struct wl12xx_probe_resp_template));
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_template_set(wl, CMD_BEACON, NULL,
+				      sizeof
+				      (struct wl12xx_beacon_template));
+	if (ret < 0)
+		return ret;
+
+	/* tim templates, first reserve space then allocate an empty one */
+	memset(partial_vbm, 0, PARTIAL_VBM_MAX);
+	ret = wl12xx_cmd_vbm(wl, TIM_ELE_ID, partial_vbm, PARTIAL_VBM_MAX, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_vbm(wl, TIM_ELE_ID, partial_vbm, 1, 0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_rx_config(struct wl12xx *wl, u32 config, u32 filter)
+{
+	int ret;
+
+	ret = wl12xx_acx_rx_msdu_life_time(wl, RX_MSDU_LIFETIME_DEF);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_rx_config(wl, config, filter);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_phy_config(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_pd_threshold(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_slot(wl, DEFAULT_SLOT_TIME);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_group_address_tbl(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_service_period_timeout(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_rts_threshold(wl, RTS_THRESHOLD_DEF);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_beacon_filter(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_beacon_filter_opt(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_beacon_filter_table(wl);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_pta(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_sg_enable(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_acx_sg_cfg(wl);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_energy_detection(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_cca_threshold(wl);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_beacon_broadcast(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl12xx_acx_bcn_dtim_options(wl);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_hw_init_power_auth(struct wl12xx *wl)
+{
+	return wl12xx_acx_sleep_auth(wl, WL12XX_PSM_CAM);
+}
diff --git a/drivers/net/wireless/wl12xx/init.h b/drivers/net/wireless/wl12xx/init.h
new file mode 100644
index 00000000000..c8b6cd0b7c3
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/init.h
@@ -0,0 +1,40 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_INIT_H__
+#define __WL12XX_INIT_H__
+
+#include "wl12xx.h"
+
+int wl12xx_hw_init_hwenc_config(struct wl12xx *wl);
+int wl12xx_hw_init_templates_config(struct wl12xx *wl);
+int wl12xx_hw_init_mem_config(struct wl12xx *wl);
+int wl12xx_hw_init_rx_config(struct wl12xx *wl, u32 config, u32 filter);
+int wl12xx_hw_init_phy_config(struct wl12xx *wl);
+int wl12xx_hw_init_beacon_filter(struct wl12xx *wl);
+int wl12xx_hw_init_pta(struct wl12xx *wl);
+int wl12xx_hw_init_energy_detection(struct wl12xx *wl);
+int wl12xx_hw_init_beacon_broadcast(struct wl12xx *wl);
+int wl12xx_hw_init_power_auth(struct wl12xx *wl);
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c
new file mode 100644
index 00000000000..744f4ce5993
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/main.c
@@ -0,0 +1,1358 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/spi/spi.h>
+#include <linux/crc32.h>
+#include <linux/etherdevice.h>
+#include <linux/spi/wl12xx.h>
+
+#include "wl12xx.h"
+#include "wl12xx_80211.h"
+#include "reg.h"
+#include "wl1251.h"
+#include "spi.h"
+#include "event.h"
+#include "tx.h"
+#include "rx.h"
+#include "ps.h"
+#include "init.h"
+#include "debugfs.h"
+
+static void wl12xx_disable_interrupts(struct wl12xx *wl)
+{
+	disable_irq(wl->irq);
+}
+
+static void wl12xx_power_off(struct wl12xx *wl)
+{
+	wl->set_power(false);
+}
+
+static void wl12xx_power_on(struct wl12xx *wl)
+{
+	wl->set_power(true);
+}
+
+static irqreturn_t wl12xx_irq(int irq, void *cookie)
+{
+	struct wl12xx *wl;
+
+	wl12xx_debug(DEBUG_IRQ, "IRQ");
+
+	wl = cookie;
+
+	schedule_work(&wl->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static int wl12xx_fetch_firmware(struct wl12xx *wl)
+{
+	const struct firmware *fw;
+	int ret;
+
+	ret = request_firmware(&fw, wl->chip.fw_filename, &wl->spi->dev);
+
+	if (ret < 0) {
+		wl12xx_error("could not get firmware: %d", ret);
+		return ret;
+	}
+
+	if (fw->size % 4) {
+		wl12xx_error("firmware size is not multiple of 32 bits: %d",
+			     fw->size);
+		ret = -EILSEQ;
+		goto out;
+	}
+
+	wl->fw_len = fw->size;
+	wl->fw = kmalloc(wl->fw_len, GFP_KERNEL);
+
+	if (!wl->fw) {
+		wl12xx_error("could not allocate memory for the firmware");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(wl->fw, fw->data, wl->fw_len);
+
+	ret = 0;
+
+out:
+	release_firmware(fw);
+
+	return ret;
+}
+
+static int wl12xx_fetch_nvs(struct wl12xx *wl)
+{
+	const struct firmware *fw;
+	int ret;
+
+	ret = request_firmware(&fw, wl->chip.nvs_filename, &wl->spi->dev);
+
+	if (ret < 0) {
+		wl12xx_error("could not get nvs file: %d", ret);
+		return ret;
+	}
+
+	if (fw->size % 4) {
+		wl12xx_error("nvs size is not multiple of 32 bits: %d",
+			     fw->size);
+		ret = -EILSEQ;
+		goto out;
+	}
+
+	wl->nvs_len = fw->size;
+	wl->nvs = kmalloc(wl->nvs_len, GFP_KERNEL);
+
+	if (!wl->nvs) {
+		wl12xx_error("could not allocate memory for the nvs file");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(wl->nvs, fw->data, wl->nvs_len);
+
+	ret = 0;
+
+out:
+	release_firmware(fw);
+
+	return ret;
+}
+
+static void wl12xx_fw_wakeup(struct wl12xx *wl)
+{
+	u32 elp_reg;
+
+	elp_reg = ELPCTRL_WAKE_UP;
+	wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, elp_reg);
+	elp_reg = wl12xx_read32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR);
+
+	if (!(elp_reg & ELPCTRL_WLAN_READY)) {
+		wl12xx_warning("WLAN not ready");
+		elp_reg = ELPCTRL_WAKE_UP_WLAN_READY;
+		wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, elp_reg);
+	}
+}
+
+static int wl12xx_chip_wakeup(struct wl12xx *wl)
+{
+	int ret = 0;
+
+	wl12xx_power_on(wl);
+	msleep(wl->chip.power_on_sleep);
+	wl12xx_spi_reset(wl);
+	wl12xx_spi_init(wl);
+
+	/* We don't need a real memory partition here, because we only want
+	 * to use the registers at this point. */
+	wl12xx_set_partition(wl,
+			     0x00000000,
+			     0x00000000,
+			     REGISTERS_BASE,
+			     REGISTERS_DOWN_SIZE);
+
+	/* ELP module wake up */
+	wl12xx_fw_wakeup(wl);
+
+	/* whal_FwCtrl_BootSm() */
+
+	/* 0. read chip id from CHIP_ID */
+	wl->chip.id = wl12xx_reg_read32(wl, CHIP_ID_B);
+
+	/* 1. check if chip id is valid */
+
+	switch (wl->chip.id) {
+	case CHIP_ID_1251_PG12:
+		wl12xx_debug(DEBUG_BOOT, "chip id 0x%x (1251 PG12)",
+			     wl->chip.id);
+
+		wl1251_setup(wl);
+
+		break;
+	case CHIP_ID_1271_PG10:
+	case CHIP_ID_1251_PG10:
+	case CHIP_ID_1251_PG11:
+	default:
+		wl12xx_error("unsupported chip id: 0x%x", wl->chip.id);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (wl->fw == NULL) {
+		ret = wl12xx_fetch_firmware(wl);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* No NVS from netlink, try to get it from the filesystem */
+	if (wl->nvs == NULL) {
+		ret = wl12xx_fetch_nvs(wl);
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static void wl12xx_filter_work(struct work_struct *work)
+{
+	struct wl12xx *wl =
+		container_of(work, struct wl12xx, filter_work);
+	int ret;
+
+	mutex_lock(&wl->mutex);
+
+	if (wl->state == WL12XX_STATE_OFF)
+		goto out;
+
+	ret = wl12xx_cmd_join(wl, wl->bss_type, 1, 100, 0);
+	if (ret < 0)
+		goto out;
+
+out:
+	mutex_unlock(&wl->mutex);
+}
+
+int wl12xx_plt_start(struct wl12xx *wl)
+{
+	int ret;
+
+	wl12xx_notice("power up");
+
+	if (wl->state != WL12XX_STATE_OFF) {
+		wl12xx_error("cannot go into PLT state because not "
+			     "in off state: %d", wl->state);
+		return -EBUSY;
+	}
+
+	wl->state = WL12XX_STATE_PLT;
+
+	ret = wl12xx_chip_wakeup(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl->chip.op_boot(wl);
+	if (ret < 0)
+		return ret;
+
+	wl12xx_notice("firmware booted in PLT mode (%s)", wl->chip.fw_ver);
+
+	ret = wl->chip.op_plt_init(wl);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int wl12xx_plt_stop(struct wl12xx *wl)
+{
+	wl12xx_notice("power down");
+
+	if (wl->state != WL12XX_STATE_PLT) {
+		wl12xx_error("cannot power down because not in PLT "
+			     "state: %d", wl->state);
+		return -EBUSY;
+	}
+
+	wl12xx_disable_interrupts(wl);
+	wl12xx_power_off(wl);
+
+	wl->state = WL12XX_STATE_OFF;
+
+	return 0;
+}
+
+
+static int wl12xx_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
+	struct wl12xx *wl = hw->priv;
+
+	skb_queue_tail(&wl->tx_queue, skb);
+
+	schedule_work(&wl->tx_work);
+
+	/*
+	 * The workqueue is slow to process the tx_queue and we need stop
+	 * the queue here, otherwise the queue will get too long.
+	 */
+	if (skb_queue_len(&wl->tx_queue) >= WL12XX_TX_QUEUE_MAX_LENGTH) {
+		ieee80211_stop_queues(wl->hw);
+
+		/*
+		 * FIXME: this is racy, the variable is not properly
+		 * protected. Maybe fix this by removing the stupid
+		 * variable altogether and checking the real queue state?
+		 */
+		wl->tx_queue_stopped = true;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int wl12xx_op_start(struct ieee80211_hw *hw)
+{
+	struct wl12xx *wl = hw->priv;
+	int ret = 0;
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 start");
+
+	mutex_lock(&wl->mutex);
+
+	if (wl->state != WL12XX_STATE_OFF) {
+		wl12xx_error("cannot start because not in off state: %d",
+			     wl->state);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ret = wl12xx_chip_wakeup(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl->chip.op_boot(wl);
+	if (ret < 0)
+		goto out;
+
+	ret = wl->chip.op_hw_init(wl);
+	if (ret < 0)
+		goto out;
+
+	ret = wl12xx_acx_station_id(wl);
+	if (ret < 0)
+		goto out;
+
+	wl->state = WL12XX_STATE_ON;
+
+	wl12xx_info("firmware booted (%s)", wl->chip.fw_ver);
+
+out:
+	if (ret < 0)
+		wl12xx_power_off(wl);
+
+	mutex_unlock(&wl->mutex);
+
+	return ret;
+}
+
+static void wl12xx_op_stop(struct ieee80211_hw *hw)
+{
+	struct wl12xx *wl = hw->priv;
+
+	wl12xx_info("down");
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 stop");
+
+	mutex_lock(&wl->mutex);
+
+	WARN_ON(wl->state != WL12XX_STATE_ON);
+
+	if (wl->scanning) {
+		mutex_unlock(&wl->mutex);
+		ieee80211_scan_completed(wl->hw, true);
+		mutex_lock(&wl->mutex);
+		wl->scanning = false;
+	}
+
+	wl->state = WL12XX_STATE_OFF;
+
+	wl12xx_disable_interrupts(wl);
+
+	mutex_unlock(&wl->mutex);
+
+	cancel_work_sync(&wl->irq_work);
+	cancel_work_sync(&wl->tx_work);
+	cancel_work_sync(&wl->filter_work);
+
+	mutex_lock(&wl->mutex);
+
+	/* let's notify MAC80211 about the remaining pending TX frames */
+	wl12xx_tx_flush(wl);
+
+	wl12xx_power_off(wl);
+
+	memset(wl->bssid, 0, ETH_ALEN);
+	wl->listen_int = 1;
+	wl->bss_type = MAX_BSS_TYPE;
+
+	wl->data_in_count = 0;
+	wl->rx_counter = 0;
+	wl->rx_handled = 0;
+	wl->rx_current_buffer = 0;
+	wl->rx_last_id = 0;
+	wl->next_tx_complete = 0;
+	wl->elp = false;
+	wl->psm = 0;
+	wl->tx_queue_stopped = false;
+	wl->power_level = WL12XX_DEFAULT_POWER_LEVEL;
+
+	wl12xx_debugfs_reset(wl);
+
+	mutex_unlock(&wl->mutex);
+}
+
+static int wl12xx_op_add_interface(struct ieee80211_hw *hw,
+				   struct ieee80211_if_init_conf *conf)
+{
+	struct wl12xx *wl = hw->priv;
+	DECLARE_MAC_BUF(mac);
+	int ret = 0;
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %s",
+		     conf->type, print_mac(mac, conf->mac_addr));
+
+	mutex_lock(&wl->mutex);
+
+	switch (conf->type) {
+	case NL80211_IFTYPE_STATION:
+		wl->bss_type = BSS_TYPE_STA_BSS;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		wl->bss_type = BSS_TYPE_IBSS;
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (memcmp(wl->mac_addr, conf->mac_addr, ETH_ALEN)) {
+		memcpy(wl->mac_addr, conf->mac_addr, ETH_ALEN);
+		SET_IEEE80211_PERM_ADDR(wl->hw, wl->mac_addr);
+		ret = wl12xx_acx_station_id(wl);
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	mutex_unlock(&wl->mutex);
+	return ret;
+}
+
+static void wl12xx_op_remove_interface(struct ieee80211_hw *hw,
+					 struct ieee80211_if_init_conf *conf)
+{
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 remove interface");
+}
+
+static int wl12xx_build_null_data(struct wl12xx *wl)
+{
+	struct wl12xx_null_data_template template;
+
+	if (!is_zero_ether_addr(wl->bssid)) {
+		memcpy(template.header.da, wl->bssid, ETH_ALEN);
+		memcpy(template.header.bssid, wl->bssid, ETH_ALEN);
+	} else {
+		memset(template.header.da, 0xff, ETH_ALEN);
+		memset(template.header.bssid, 0xff, ETH_ALEN);
+	}
+
+	memcpy(template.header.sa, wl->mac_addr, ETH_ALEN);
+	template.header.frame_ctl = cpu_to_le16(IEEE80211_FTYPE_DATA |
+						IEEE80211_STYPE_NULLFUNC);
+
+	return wl12xx_cmd_template_set(wl, CMD_NULL_DATA, &template,
+				       sizeof(template));
+
+}
+
+static int wl12xx_build_ps_poll(struct wl12xx *wl, u16 aid)
+{
+	struct wl12xx_ps_poll_template template;
+
+	memcpy(template.bssid, wl->bssid, ETH_ALEN);
+	memcpy(template.ta, wl->mac_addr, ETH_ALEN);
+	template.aid = aid;
+	template.fc = cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL);
+
+	return wl12xx_cmd_template_set(wl, CMD_PS_POLL, &template,
+				       sizeof(template));
+
+}
+
+static int wl12xx_op_config(struct ieee80211_hw *hw, u32 changed)
+{
+	struct wl12xx *wl = hw->priv;
+	struct ieee80211_conf *conf = &hw->conf;
+	int channel, ret = 0;
+
+	channel = ieee80211_frequency_to_channel(conf->channel->center_freq);
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 config ch %d psm %s power %d",
+		     channel,
+		     conf->flags & IEEE80211_CONF_PS ? "on" : "off",
+		     conf->power_level);
+
+	mutex_lock(&wl->mutex);
+
+	if (channel != wl->channel) {
+		/* FIXME: use beacon interval provided by mac80211 */
+		ret = wl12xx_cmd_join(wl, wl->bss_type, 1, 100, 0);
+		if (ret < 0)
+			goto out;
+
+		wl->channel = channel;
+	}
+
+	ret = wl12xx_build_null_data(wl);
+	if (ret < 0)
+		goto out;
+
+	if (conf->flags & IEEE80211_CONF_PS && !wl->psm_requested) {
+		wl12xx_info("psm enabled");
+
+		wl->psm_requested = true;
+
+		/*
+		 * We enter PSM only if we're already associated.
+		 * If we're not, we'll enter it when joining an SSID,
+		 * through the bss_info_changed() hook.
+		 */
+		ret = wl12xx_ps_set_mode(wl, STATION_POWER_SAVE_MODE);
+	} else if (!(conf->flags & IEEE80211_CONF_PS) &&
+		   wl->psm_requested) {
+		wl12xx_info("psm disabled");
+
+		wl->psm_requested = false;
+
+		if (wl->psm)
+			ret = wl12xx_ps_set_mode(wl, STATION_ACTIVE_MODE);
+	}
+
+	if (conf->power_level != wl->power_level) {
+		ret = wl12xx_acx_tx_power(wl, conf->power_level);
+		if (ret < 0)
+			goto out;
+
+		wl->power_level = conf->power_level;
+	}
+
+out:
+	mutex_unlock(&wl->mutex);
+	return ret;
+}
+
+#define WL12XX_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \
+				  FIF_ALLMULTI | \
+				  FIF_FCSFAIL | \
+				  FIF_BCN_PRBRESP_PROMISC | \
+				  FIF_CONTROL | \
+				  FIF_OTHER_BSS)
+
+static void wl12xx_op_configure_filter(struct ieee80211_hw *hw,
+				       unsigned int changed,
+				       unsigned int *total,
+				       int mc_count,
+				       struct dev_addr_list *mc_list)
+{
+	struct wl12xx *wl = hw->priv;
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 configure filter");
+
+	*total &= WL12XX_SUPPORTED_FILTERS;
+	changed &= WL12XX_SUPPORTED_FILTERS;
+
+	if (changed == 0)
+		/* no filters which we support changed */
+		return;
+
+	/* FIXME: wl->rx_config and wl->rx_filter are not protected */
+
+	wl->rx_config = WL12XX_DEFAULT_RX_CONFIG;
+	wl->rx_filter = WL12XX_DEFAULT_RX_FILTER;
+
+	if (*total & FIF_PROMISC_IN_BSS) {
+		wl->rx_config |= CFG_BSSID_FILTER_EN;
+		wl->rx_config |= CFG_RX_ALL_GOOD;
+	}
+	if (*total & FIF_ALLMULTI)
+		/*
+		 * CFG_MC_FILTER_EN in rx_config needs to be 0 to receive
+		 * all multicast frames
+		 */
+		wl->rx_config &= ~CFG_MC_FILTER_EN;
+	if (*total & FIF_FCSFAIL)
+		wl->rx_filter |= CFG_RX_FCS_ERROR;
+	if (*total & FIF_BCN_PRBRESP_PROMISC) {
+		wl->rx_config &= ~CFG_BSSID_FILTER_EN;
+		wl->rx_config &= ~CFG_SSID_FILTER_EN;
+	}
+	if (*total & FIF_CONTROL)
+		wl->rx_filter |= CFG_RX_CTL_EN;
+	if (*total & FIF_OTHER_BSS)
+		wl->rx_filter &= ~CFG_BSSID_FILTER_EN;
+
+	/*
+	 * FIXME: workqueues need to be properly cancelled on stop(), for
+	 * now let's just disable changing the filter settings. They will
+	 * be updated any on config().
+	 */
+	/* schedule_work(&wl->filter_work); */
+}
+
+/* HW encryption */
+static int wl12xx_set_key_type(struct wl12xx *wl, struct acx_set_key *key,
+			       enum set_key_cmd cmd,
+			       struct ieee80211_key_conf *mac80211_key,
+			       const u8 *addr)
+{
+	switch (mac80211_key->alg) {
+	case ALG_WEP:
+		if (is_broadcast_ether_addr(addr))
+			key->key_type = KEY_WEP_DEFAULT;
+		else
+			key->key_type = KEY_WEP_ADDR;
+
+		mac80211_key->hw_key_idx = mac80211_key->keyidx;
+		break;
+	case ALG_TKIP:
+		if (is_broadcast_ether_addr(addr))
+			key->key_type = KEY_TKIP_MIC_GROUP;
+		else
+			key->key_type = KEY_TKIP_MIC_PAIRWISE;
+
+		mac80211_key->hw_key_idx = mac80211_key->keyidx;
+		break;
+	case ALG_CCMP:
+		if (is_broadcast_ether_addr(addr))
+			key->key_type = KEY_AES_GROUP;
+		else
+			key->key_type = KEY_AES_PAIRWISE;
+		mac80211_key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV;
+		break;
+	default:
+		wl12xx_error("Unknown key algo 0x%x", mac80211_key->alg);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int wl12xx_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
+			     struct ieee80211_vif *vif,
+			     struct ieee80211_sta *sta,
+			     struct ieee80211_key_conf *key)
+{
+	struct wl12xx *wl = hw->priv;
+	struct acx_set_key wl_key;
+	const u8 *addr;
+	int ret;
+
+	static const u8 bcast_addr[ETH_ALEN] =
+		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 set key");
+
+	memset(&wl_key, 0, sizeof(wl_key));
+
+	addr = sta ? sta->addr : bcast_addr;
+
+	wl12xx_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd);
+	wl12xx_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN);
+	wl12xx_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x",
+		     key->alg, key->keyidx, key->keylen, key->flags);
+	wl12xx_dump(DEBUG_CRYPT, "KEY: ", key->key, key->keylen);
+
+	mutex_lock(&wl->mutex);
+
+	switch (cmd) {
+	case SET_KEY:
+		wl_key.key_action = KEY_ADD_OR_REPLACE;
+		break;
+	case DISABLE_KEY:
+		wl_key.key_action = KEY_REMOVE;
+		break;
+	default:
+		wl12xx_error("Unsupported key cmd 0x%x", cmd);
+		break;
+	}
+
+	ret = wl12xx_set_key_type(wl, &wl_key, cmd, key, addr);
+	if (ret < 0) {
+		wl12xx_error("Set KEY type failed");
+		goto out;
+	}
+
+	if (wl_key.key_type != KEY_WEP_DEFAULT)
+		memcpy(wl_key.addr, addr, ETH_ALEN);
+
+	if ((wl_key.key_type == KEY_TKIP_MIC_GROUP) ||
+	    (wl_key.key_type == KEY_TKIP_MIC_PAIRWISE)) {
+		/*
+		 * We get the key in the following form:
+		 * TKIP (16 bytes) - TX MIC (8 bytes) - RX MIC (8 bytes)
+		 * but the target is expecting:
+		 * TKIP - RX MIC - TX MIC
+		 */
+		memcpy(wl_key.key, key->key, 16);
+		memcpy(wl_key.key + 16, key->key + 24, 8);
+		memcpy(wl_key.key + 24, key->key + 16, 8);
+
+	} else {
+		memcpy(wl_key.key, key->key, key->keylen);
+	}
+	wl_key.key_size = key->keylen;
+
+	wl_key.id = key->keyidx;
+	wl_key.ssid_profile = 0;
+
+	wl12xx_dump(DEBUG_CRYPT, "TARGET KEY: ", &wl_key, sizeof(wl_key));
+
+	if (wl12xx_cmd_send(wl, CMD_SET_KEYS, &wl_key, sizeof(wl_key)) < 0) {
+		wl12xx_error("Set KEY failed");
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+out:
+	mutex_unlock(&wl->mutex);
+	return ret;
+}
+
+static int wl12xx_build_basic_rates(char *rates)
+{
+	u8 index = 0;
+
+	rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_1MB;
+	rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_2MB;
+	rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_5MB;
+	rates[index++] = IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_11MB;
+
+	return index;
+}
+
+static int wl12xx_build_extended_rates(char *rates)
+{
+	u8 index = 0;
+
+	rates[index++] = IEEE80211_OFDM_RATE_6MB;
+	rates[index++] = IEEE80211_OFDM_RATE_9MB;
+	rates[index++] = IEEE80211_OFDM_RATE_12MB;
+	rates[index++] = IEEE80211_OFDM_RATE_18MB;
+	rates[index++] = IEEE80211_OFDM_RATE_24MB;
+	rates[index++] = IEEE80211_OFDM_RATE_36MB;
+	rates[index++] = IEEE80211_OFDM_RATE_48MB;
+	rates[index++] = IEEE80211_OFDM_RATE_54MB;
+
+	return index;
+}
+
+
+static int wl12xx_build_probe_req(struct wl12xx *wl, u8 *ssid, size_t ssid_len)
+{
+	struct wl12xx_probe_req_template template;
+	struct wl12xx_ie_rates *rates;
+	char *ptr;
+	u16 size;
+
+	ptr = (char *)&template;
+	size = sizeof(struct ieee80211_header);
+
+	memset(template.header.da, 0xff, ETH_ALEN);
+	memset(template.header.bssid, 0xff, ETH_ALEN);
+	memcpy(template.header.sa, wl->mac_addr, ETH_ALEN);
+	template.header.frame_ctl = cpu_to_le16(IEEE80211_STYPE_PROBE_REQ);
+
+	/* IEs */
+	/* SSID */
+	template.ssid.header.id = WLAN_EID_SSID;
+	template.ssid.header.len = ssid_len;
+	if (ssid_len && ssid)
+		memcpy(template.ssid.ssid, ssid, ssid_len);
+	size += sizeof(struct wl12xx_ie_header) + ssid_len;
+	ptr += size;
+
+	/* Basic Rates */
+	rates = (struct wl12xx_ie_rates *)ptr;
+	rates->header.id = WLAN_EID_SUPP_RATES;
+	rates->header.len = wl12xx_build_basic_rates(rates->rates);
+	size += sizeof(struct wl12xx_ie_header) + rates->header.len;
+	ptr += sizeof(struct wl12xx_ie_header) + rates->header.len;
+
+	/* Extended rates */
+	rates = (struct wl12xx_ie_rates *)ptr;
+	rates->header.id = WLAN_EID_EXT_SUPP_RATES;
+	rates->header.len = wl12xx_build_extended_rates(rates->rates);
+	size += sizeof(struct wl12xx_ie_header) + rates->header.len;
+
+	wl12xx_dump(DEBUG_SCAN, "PROBE REQ: ", &template, size);
+
+	return wl12xx_cmd_template_set(wl, CMD_PROBE_REQ, &template,
+				      size);
+}
+
+static int wl12xx_hw_scan(struct wl12xx *wl, u8 *ssid, size_t len,
+			  u8 active_scan, u8 high_prio, u8 num_channels,
+			  u8 probe_requests)
+{
+	int i, ret;
+	u32 split_scan = 0;
+	u16 scan_options = 0;
+	struct cmd_scan *params;
+	struct wl12xx_command *cmd_answer;
+
+	if (wl->scanning)
+		return -EINVAL;
+
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	params->params.rx_config_options = cpu_to_le32(CFG_RX_ALL_GOOD);
+	params->params.rx_filter_options =
+		cpu_to_le32(CFG_RX_PRSP_EN | CFG_RX_MGMT_EN | CFG_RX_BCN_EN);
+
+	/* High priority scan */
+	if (!active_scan)
+		scan_options |= SCAN_PASSIVE;
+	if (high_prio)
+		scan_options |= SCAN_PRIORITY_HIGH;
+	params->params.scan_options = scan_options;
+
+	params->params.num_channels = num_channels;
+	params->params.num_probe_requests = probe_requests;
+	params->params.tx_rate = cpu_to_le16(1 << 1); /* 2 Mbps */
+	params->params.tid_trigger = 0;
+
+	for (i = 0; i < num_channels; i++) {
+		params->channels[i].min_duration = cpu_to_le32(30000);
+		params->channels[i].max_duration = cpu_to_le32(60000);
+		memset(&params->channels[i].bssid_lsb, 0xff, 4);
+		memset(&params->channels[i].bssid_msb, 0xff, 2);
+		params->channels[i].early_termination = 0;
+		params->channels[i].tx_power_att = 0;
+		params->channels[i].channel = i + 1;
+		memset(params->channels[i].pad, 0, 3);
+	}
+
+	for (i = num_channels; i < SCAN_MAX_NUM_OF_CHANNELS; i++)
+		memset(&params->channels[i], 0,
+		       sizeof(struct basic_scan_channel_parameters));
+
+	if (len && ssid) {
+		params->params.ssid_len = len;
+		memcpy(params->params.ssid, ssid, len);
+	} else {
+		params->params.ssid_len = 0;
+		memset(params->params.ssid, 0, 32);
+	}
+
+	ret = wl12xx_build_probe_req(wl, ssid, len);
+	if (ret < 0) {
+		wl12xx_error("PROBE request template failed");
+		goto out;
+	}
+
+	ret = wl12xx_cmd_send(wl, CMD_TRIGGER_SCAN_TO, &split_scan,
+			      sizeof(u32));
+	if (ret < 0) {
+		wl12xx_error("Split SCAN failed");
+		goto out;
+	}
+
+	wl12xx_dump(DEBUG_SCAN, "SCAN: ", params, sizeof(*params));
+
+	wl->scanning = true;
+
+	ret = wl12xx_cmd_send(wl, CMD_SCAN, params, sizeof(*params));
+	if (ret < 0)
+		wl12xx_error("SCAN failed");
+
+	wl12xx_spi_mem_read(wl, wl->cmd_box_addr, params, sizeof(*params));
+
+	cmd_answer = (struct wl12xx_command *) params;
+	if (cmd_answer->status != CMD_STATUS_SUCCESS) {
+		wl12xx_error("TEST command answer error: %d",
+			     cmd_answer->status);
+		wl->scanning = false;
+		ret = -EIO;
+		goto out;
+	}
+
+out:
+	kfree(params);
+	return ret;
+
+}
+
+static int wl12xx_op_hw_scan(struct ieee80211_hw *hw,
+			     struct cfg80211_scan_request *req)
+{
+	struct wl12xx *wl = hw->priv;
+	int ret;
+	u8 *ssid = NULL;
+	size_t ssid_len = 0;
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 hw scan");
+
+	if (req->n_ssids) {
+		ssid = req->ssids[0].ssid;
+		ssid_len = req->ssids[0].ssid_len;
+	}
+
+	mutex_lock(&wl->mutex);
+	ret = wl12xx_hw_scan(hw->priv, ssid, ssid_len, 1, 0, 13, 3);
+	mutex_unlock(&wl->mutex);
+
+	return ret;
+}
+
+static int wl12xx_op_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
+{
+	struct wl12xx *wl = hw->priv;
+	int ret;
+
+	ret = wl12xx_acx_rts_threshold(wl, (u16) value);
+
+	if (ret < 0)
+		wl12xx_warning("wl12xx_op_set_rts_threshold failed: %d", ret);
+
+	return ret;
+}
+
+static void wl12xx_op_bss_info_changed(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif,
+				       struct ieee80211_bss_conf *bss_conf,
+				       u32 changed)
+{
+	enum acx_ps_mode mode;
+	struct wl12xx *wl = hw->priv;
+	struct sk_buff *beacon;
+	int ret;
+
+	wl12xx_debug(DEBUG_MAC80211, "mac80211 bss info changed");
+
+	mutex_lock(&wl->mutex);
+
+	if (changed & BSS_CHANGED_ASSOC) {
+		if (bss_conf->assoc) {
+			wl->aid = bss_conf->aid;
+
+			ret = wl12xx_build_ps_poll(wl, wl->aid);
+			if (ret < 0)
+				goto out;
+
+			ret = wl12xx_acx_aid(wl, wl->aid);
+			if (ret < 0)
+				goto out;
+
+			/* If we want to go in PSM but we're not there yet */
+			if (wl->psm_requested && !wl->psm) {
+				mode = STATION_POWER_SAVE_MODE;
+				ret = wl12xx_ps_set_mode(wl, mode);
+				if (ret < 0)
+					goto out;
+			}
+		}
+	}
+	if (changed & BSS_CHANGED_ERP_SLOT) {
+		if (bss_conf->use_short_slot)
+			ret = wl12xx_acx_slot(wl, SLOT_TIME_SHORT);
+		else
+			ret = wl12xx_acx_slot(wl, SLOT_TIME_LONG);
+		if (ret < 0) {
+			wl12xx_warning("Set slot time failed %d", ret);
+			goto out;
+		}
+	}
+
+	if (changed & BSS_CHANGED_ERP_PREAMBLE) {
+		if (bss_conf->use_short_preamble)
+			wl12xx_acx_set_preamble(wl, ACX_PREAMBLE_SHORT);
+		else
+			wl12xx_acx_set_preamble(wl, ACX_PREAMBLE_LONG);
+	}
+
+	if (changed & BSS_CHANGED_ERP_CTS_PROT) {
+		if (bss_conf->use_cts_prot)
+			ret = wl12xx_acx_cts_protect(wl, CTSPROTECT_ENABLE);
+		else
+			ret = wl12xx_acx_cts_protect(wl, CTSPROTECT_DISABLE);
+		if (ret < 0) {
+			wl12xx_warning("Set ctsprotect failed %d", ret);
+			goto out;
+		}
+	}
+
+	if (changed & BSS_CHANGED_BSSID) {
+		memcpy(wl->bssid, bss_conf->bssid, ETH_ALEN);
+
+		ret = wl12xx_build_null_data(wl);
+		if (ret < 0)
+			goto out;
+
+		if (wl->bss_type != BSS_TYPE_IBSS) {
+			ret = wl12xx_cmd_join(wl, wl->bss_type, 5, 100, 1);
+			if (ret < 0)
+				goto out;
+		}
+	}
+
+	if (changed & BSS_CHANGED_BEACON) {
+		beacon = ieee80211_beacon_get(hw, vif);
+		ret = wl12xx_cmd_template_set(wl, CMD_BEACON, beacon->data,
+					      beacon->len);
+
+		if (ret < 0) {
+			dev_kfree_skb(beacon);
+			goto out;
+		}
+
+		ret = wl12xx_cmd_template_set(wl, CMD_PROBE_RESP, beacon->data,
+					      beacon->len);
+
+		dev_kfree_skb(beacon);
+
+		if (ret < 0)
+			goto out;
+
+		ret = wl12xx_cmd_join(wl, wl->bss_type, 1, 100, 0);
+
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	mutex_unlock(&wl->mutex);
+}
+
+
+/* can't be const, mac80211 writes to this */
+static struct ieee80211_rate wl12xx_rates[] = {
+	{ .bitrate = 10,
+	  .hw_value = 0x1,
+	  .hw_value_short = 0x1, },
+	{ .bitrate = 20,
+	  .hw_value = 0x2,
+	  .hw_value_short = 0x2,
+	  .flags = IEEE80211_RATE_SHORT_PREAMBLE },
+	{ .bitrate = 55,
+	  .hw_value = 0x4,
+	  .hw_value_short = 0x4,
+	  .flags = IEEE80211_RATE_SHORT_PREAMBLE },
+	{ .bitrate = 110,
+	  .hw_value = 0x20,
+	  .hw_value_short = 0x20,
+	  .flags = IEEE80211_RATE_SHORT_PREAMBLE },
+	{ .bitrate = 60,
+	  .hw_value = 0x8,
+	  .hw_value_short = 0x8, },
+	{ .bitrate = 90,
+	  .hw_value = 0x10,
+	  .hw_value_short = 0x10, },
+	{ .bitrate = 120,
+	  .hw_value = 0x40,
+	  .hw_value_short = 0x40, },
+	{ .bitrate = 180,
+	  .hw_value = 0x80,
+	  .hw_value_short = 0x80, },
+	{ .bitrate = 240,
+	  .hw_value = 0x200,
+	  .hw_value_short = 0x200, },
+	{ .bitrate = 360,
+	 .hw_value = 0x400,
+	 .hw_value_short = 0x400, },
+	{ .bitrate = 480,
+	  .hw_value = 0x800,
+	  .hw_value_short = 0x800, },
+	{ .bitrate = 540,
+	  .hw_value = 0x1000,
+	  .hw_value_short = 0x1000, },
+};
+
+/* can't be const, mac80211 writes to this */
+static struct ieee80211_channel wl12xx_channels[] = {
+	{ .hw_value = 1, .center_freq = 2412},
+	{ .hw_value = 2, .center_freq = 2417},
+	{ .hw_value = 3, .center_freq = 2422},
+	{ .hw_value = 4, .center_freq = 2427},
+	{ .hw_value = 5, .center_freq = 2432},
+	{ .hw_value = 6, .center_freq = 2437},
+	{ .hw_value = 7, .center_freq = 2442},
+	{ .hw_value = 8, .center_freq = 2447},
+	{ .hw_value = 9, .center_freq = 2452},
+	{ .hw_value = 10, .center_freq = 2457},
+	{ .hw_value = 11, .center_freq = 2462},
+	{ .hw_value = 12, .center_freq = 2467},
+	{ .hw_value = 13, .center_freq = 2472},
+};
+
+/* can't be const, mac80211 writes to this */
+static struct ieee80211_supported_band wl12xx_band_2ghz = {
+	.channels = wl12xx_channels,
+	.n_channels = ARRAY_SIZE(wl12xx_channels),
+	.bitrates = wl12xx_rates,
+	.n_bitrates = ARRAY_SIZE(wl12xx_rates),
+};
+
+static const struct ieee80211_ops wl12xx_ops = {
+	.start = wl12xx_op_start,
+	.stop = wl12xx_op_stop,
+	.add_interface = wl12xx_op_add_interface,
+	.remove_interface = wl12xx_op_remove_interface,
+	.config = wl12xx_op_config,
+	.configure_filter = wl12xx_op_configure_filter,
+	.tx = wl12xx_op_tx,
+	.set_key = wl12xx_op_set_key,
+	.hw_scan = wl12xx_op_hw_scan,
+	.bss_info_changed = wl12xx_op_bss_info_changed,
+	.set_rts_threshold = wl12xx_op_set_rts_threshold,
+};
+
+static int wl12xx_register_hw(struct wl12xx *wl)
+{
+	int ret;
+
+	if (wl->mac80211_registered)
+		return 0;
+
+	SET_IEEE80211_PERM_ADDR(wl->hw, wl->mac_addr);
+
+	ret = ieee80211_register_hw(wl->hw);
+	if (ret < 0) {
+		wl12xx_error("unable to register mac80211 hw: %d", ret);
+		return ret;
+	}
+
+	wl->mac80211_registered = true;
+
+	wl12xx_notice("loaded");
+
+	return 0;
+}
+
+static int wl12xx_init_ieee80211(struct wl12xx *wl)
+{
+	/* The tx descriptor buffer and the TKIP space */
+	wl->hw->extra_tx_headroom = sizeof(struct tx_double_buffer_desc)
+		+ WL12XX_TKIP_IV_SPACE;
+
+	/* unit us */
+	/* FIXME: find a proper value */
+	wl->hw->channel_change_time = 10000;
+
+	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
+		IEEE80211_HW_NOISE_DBM;
+
+	wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+	wl->hw->wiphy->max_scan_ssids = 1;
+	wl->hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &wl12xx_band_2ghz;
+
+	SET_IEEE80211_DEV(wl->hw, &wl->spi->dev);
+
+	return 0;
+}
+
+#define WL12XX_DEFAULT_CHANNEL 1
+static int __devinit wl12xx_probe(struct spi_device *spi)
+{
+	struct wl12xx_platform_data *pdata;
+	struct ieee80211_hw *hw;
+	struct wl12xx *wl;
+	int ret, i;
+	static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
+
+	pdata = spi->dev.platform_data;
+	if (!pdata) {
+		wl12xx_error("no platform data");
+		return -ENODEV;
+	}
+
+	hw = ieee80211_alloc_hw(sizeof(*wl), &wl12xx_ops);
+	if (!hw) {
+		wl12xx_error("could not alloc ieee80211_hw");
+		return -ENOMEM;
+	}
+
+	wl = hw->priv;
+	memset(wl, 0, sizeof(*wl));
+
+	wl->hw = hw;
+	dev_set_drvdata(&spi->dev, wl);
+	wl->spi = spi;
+
+	wl->data_in_count = 0;
+
+	skb_queue_head_init(&wl->tx_queue);
+
+	INIT_WORK(&wl->tx_work, wl12xx_tx_work);
+	INIT_WORK(&wl->filter_work, wl12xx_filter_work);
+	wl->channel = WL12XX_DEFAULT_CHANNEL;
+	wl->scanning = false;
+	wl->default_key = 0;
+	wl->listen_int = 1;
+	wl->rx_counter = 0;
+	wl->rx_handled = 0;
+	wl->rx_current_buffer = 0;
+	wl->rx_last_id = 0;
+	wl->rx_config = WL12XX_DEFAULT_RX_CONFIG;
+	wl->rx_filter = WL12XX_DEFAULT_RX_FILTER;
+	wl->elp = false;
+	wl->psm = 0;
+	wl->psm_requested = false;
+	wl->tx_queue_stopped = false;
+	wl->power_level = WL12XX_DEFAULT_POWER_LEVEL;
+
+	/* We use the default power on sleep time until we know which chip
+	 * we're using */
+	wl->chip.power_on_sleep = WL12XX_DEFAULT_POWER_ON_SLEEP;
+
+	for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++)
+		wl->tx_frames[i] = NULL;
+
+	wl->next_tx_complete = 0;
+
+	/*
+	 * In case our MAC address is not correctly set,
+	 * we use a random but Nokia MAC.
+	 */
+	memcpy(wl->mac_addr, nokia_oui, 3);
+	get_random_bytes(wl->mac_addr + 3, 3);
+
+	wl->state = WL12XX_STATE_OFF;
+	mutex_init(&wl->mutex);
+
+	wl->tx_mgmt_frm_rate = DEFAULT_HW_GEN_TX_RATE;
+	wl->tx_mgmt_frm_mod = DEFAULT_HW_GEN_MODULATION_TYPE;
+
+	/* This is the only SPI value that we need to set here, the rest
+	 * comes from the board-peripherals file */
+	spi->bits_per_word = 32;
+
+	ret = spi_setup(spi);
+	if (ret < 0) {
+		wl12xx_error("spi_setup failed");
+		goto out_free;
+	}
+
+	wl->set_power = pdata->set_power;
+	if (!wl->set_power) {
+		wl12xx_error("set power function missing in platform data");
+		return -ENODEV;
+	}
+
+	wl->irq = spi->irq;
+	if (wl->irq < 0) {
+		wl12xx_error("irq missing in platform data");
+		return -ENODEV;
+	}
+
+	ret = request_irq(wl->irq, wl12xx_irq, 0, DRIVER_NAME, wl);
+	if (ret < 0) {
+		wl12xx_error("request_irq() failed: %d", ret);
+		goto out_free;
+	}
+
+	set_irq_type(wl->irq, IRQ_TYPE_EDGE_RISING);
+
+	disable_irq(wl->irq);
+
+	ret = wl12xx_init_ieee80211(wl);
+	if (ret)
+		goto out_irq;
+
+	ret = wl12xx_register_hw(wl);
+	if (ret)
+		goto out_irq;
+
+	wl12xx_debugfs_init(wl);
+
+	wl12xx_notice("initialized");
+
+	return 0;
+
+ out_irq:
+	free_irq(wl->irq, wl);
+
+ out_free:
+	ieee80211_free_hw(hw);
+
+	return ret;
+}
+
+static int __devexit wl12xx_remove(struct spi_device *spi)
+{
+	struct wl12xx *wl = dev_get_drvdata(&spi->dev);
+
+	ieee80211_unregister_hw(wl->hw);
+
+	wl12xx_debugfs_exit(wl);
+
+	free_irq(wl->irq, wl);
+	kfree(wl->target_mem_map);
+	kfree(wl->data_path);
+	kfree(wl->fw);
+	wl->fw = NULL;
+	kfree(wl->nvs);
+	wl->nvs = NULL;
+	ieee80211_free_hw(wl->hw);
+
+	return 0;
+}
+
+
+static struct spi_driver wl12xx_spi_driver = {
+	.driver = {
+		.name		= "wl12xx",
+		.bus		= &spi_bus_type,
+		.owner		= THIS_MODULE,
+	},
+
+	.probe		= wl12xx_probe,
+	.remove		= __devexit_p(wl12xx_remove),
+};
+
+static int __init wl12xx_init(void)
+{
+	int ret;
+
+	ret = spi_register_driver(&wl12xx_spi_driver);
+	if (ret < 0) {
+		wl12xx_error("failed to register spi driver: %d", ret);
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+static void __exit wl12xx_exit(void)
+{
+	spi_unregister_driver(&wl12xx_spi_driver);
+
+	wl12xx_notice("unloaded");
+}
+
+module_init(wl12xx_init);
+module_exit(wl12xx_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kalle Valo <Kalle.Valo@nokia.com>, "
+		"Luciano Coelho <luciano.coelho@nokia.com>");
diff --git a/drivers/net/wireless/wl12xx/ps.c b/drivers/net/wireless/wl12xx/ps.c
new file mode 100644
index 00000000000..83a10117330
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/ps.c
@@ -0,0 +1,151 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include "reg.h"
+#include "ps.h"
+#include "spi.h"
+
+#define WL12XX_WAKEUP_TIMEOUT 2000
+
+/* Routines to toggle sleep mode while in ELP */
+void wl12xx_ps_elp_sleep(struct wl12xx *wl)
+{
+	if (wl->elp || !wl->psm)
+		return;
+
+	wl12xx_debug(DEBUG_PSM, "chip to elp");
+
+	wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, ELPCTRL_SLEEP);
+
+	wl->elp = true;
+}
+
+int wl12xx_ps_elp_wakeup(struct wl12xx *wl)
+{
+	unsigned long timeout;
+	u32 elp_reg;
+
+	if (!wl->elp)
+		return 0;
+
+	wl12xx_debug(DEBUG_PSM, "waking up chip from elp");
+
+	timeout = jiffies + msecs_to_jiffies(WL12XX_WAKEUP_TIMEOUT);
+
+	wl12xx_write32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR, ELPCTRL_WAKE_UP);
+
+	elp_reg = wl12xx_read32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR);
+
+	/*
+	 * FIXME: we should wait for irq from chip but, as a temporary
+	 * solution to simplify locking, let's poll instead
+	 */
+	while (!(elp_reg & ELPCTRL_WLAN_READY)) {
+		if (time_after(jiffies, timeout)) {
+			wl12xx_error("elp wakeup timeout");
+			return -ETIMEDOUT;
+		}
+		msleep(1);
+		elp_reg = wl12xx_read32(wl, HW_ACCESS_ELP_CTRL_REG_ADDR);
+	}
+
+	wl12xx_debug(DEBUG_PSM, "wakeup time: %u ms",
+		     jiffies_to_msecs(jiffies) -
+		     (jiffies_to_msecs(timeout) - WL12XX_WAKEUP_TIMEOUT));
+
+	wl->elp = false;
+
+	return 0;
+}
+
+static int wl12xx_ps_set_elp(struct wl12xx *wl, bool enable)
+{
+	int ret;
+
+	if (enable) {
+		wl12xx_debug(DEBUG_PSM, "sleep auth psm/elp");
+
+		/*
+		 * FIXME: we should PSM_ELP, but because of firmware wakeup
+		 * problems let's use only PSM_PS
+		 */
+		ret = wl12xx_acx_sleep_auth(wl, WL12XX_PSM_PS);
+		if (ret < 0)
+			return ret;
+
+		wl12xx_ps_elp_sleep(wl);
+	} else {
+		wl12xx_debug(DEBUG_PSM, "sleep auth cam");
+
+		/*
+		 * When the target is in ELP, we can only
+		 * access the ELP control register. Thus,
+		 * we have to wake the target up before
+		 * changing the power authorization.
+		 */
+
+		wl12xx_ps_elp_wakeup(wl);
+
+		ret = wl12xx_acx_sleep_auth(wl, WL12XX_PSM_CAM);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+int wl12xx_ps_set_mode(struct wl12xx *wl, enum acx_ps_mode mode)
+{
+	int ret;
+
+	switch (mode) {
+	case STATION_POWER_SAVE_MODE:
+		wl12xx_debug(DEBUG_PSM, "entering psm");
+		ret = wl12xx_cmd_ps_mode(wl, STATION_POWER_SAVE_MODE);
+		if (ret < 0)
+			return ret;
+
+		ret = wl12xx_ps_set_elp(wl, true);
+		if (ret < 0)
+			return ret;
+
+		wl->psm = 1;
+		break;
+	case STATION_ACTIVE_MODE:
+	default:
+		wl12xx_debug(DEBUG_PSM, "leaving psm");
+		ret = wl12xx_ps_set_elp(wl, false);
+		if (ret < 0)
+			return ret;
+
+		ret = wl12xx_cmd_ps_mode(wl, STATION_ACTIVE_MODE);
+		if (ret < 0)
+			return ret;
+
+		wl->psm = 0;
+		break;
+	}
+
+	return ret;
+}
+
diff --git a/drivers/net/wireless/wl12xx/ps.h b/drivers/net/wireless/wl12xx/ps.h
new file mode 100644
index 00000000000..5d7c5255383
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/ps.h
@@ -0,0 +1,36 @@
+#ifndef __WL12XX_PS_H__
+#define __WL12XX_PS_H__
+
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include "wl12xx.h"
+#include "acx.h"
+
+int wl12xx_ps_set_mode(struct wl12xx *wl, enum acx_ps_mode mode);
+void wl12xx_ps_elp_sleep(struct wl12xx *wl);
+int wl12xx_ps_elp_wakeup(struct wl12xx *wl);
+
+
+#endif /* __WL12XX_PS_H__ */
diff --git a/drivers/net/wireless/wl12xx/reg.h b/drivers/net/wireless/wl12xx/reg.h
new file mode 100644
index 00000000000..e421643215c
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/reg.h
@@ -0,0 +1,745 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __REG_H__
+#define __REG_H__
+
+#include <linux/bitops.h>
+#include "wl12xx.h"
+
+#define REGISTERS_BASE 0x00300000
+#define DRPW_BASE      0x00310000
+
+#define REGISTERS_DOWN_SIZE 0x00008800
+#define REGISTERS_WORK_SIZE 0x0000b000
+
+#define HW_ACCESS_ELP_CTRL_REG_ADDR         0x1FFFC
+
+/* ELP register commands */
+#define ELPCTRL_WAKE_UP             0x1
+#define ELPCTRL_WAKE_UP_WLAN_READY  0x5
+#define ELPCTRL_SLEEP               0x0
+/* ELP WLAN_READY bit */
+#define ELPCTRL_WLAN_READY          0x2
+
+/*
+ * Interrupt registers.
+ * 64 bit interrupt sources registers ws ced.
+ * sme interupts were removed and new ones were added.
+ * Order was changed.
+ */
+#define FIQ_MASK                       (REGISTERS_BASE + 0x0400)
+#define FIQ_MASK_L                     (REGISTERS_BASE + 0x0400)
+#define FIQ_MASK_H                     (REGISTERS_BASE + 0x0404)
+#define FIQ_MASK_SET                   (REGISTERS_BASE + 0x0408)
+#define FIQ_MASK_SET_L                 (REGISTERS_BASE + 0x0408)
+#define FIQ_MASK_SET_H                 (REGISTERS_BASE + 0x040C)
+#define FIQ_MASK_CLR                   (REGISTERS_BASE + 0x0410)
+#define FIQ_MASK_CLR_L                 (REGISTERS_BASE + 0x0410)
+#define FIQ_MASK_CLR_H                 (REGISTERS_BASE + 0x0414)
+#define IRQ_MASK                       (REGISTERS_BASE + 0x0418)
+#define IRQ_MASK_L                     (REGISTERS_BASE + 0x0418)
+#define IRQ_MASK_H                     (REGISTERS_BASE + 0x041C)
+#define IRQ_MASK_SET                   (REGISTERS_BASE + 0x0420)
+#define IRQ_MASK_SET_L                 (REGISTERS_BASE + 0x0420)
+#define IRQ_MASK_SET_H                 (REGISTERS_BASE + 0x0424)
+#define IRQ_MASK_CLR                   (REGISTERS_BASE + 0x0428)
+#define IRQ_MASK_CLR_L                 (REGISTERS_BASE + 0x0428)
+#define IRQ_MASK_CLR_H                 (REGISTERS_BASE + 0x042C)
+#define ECPU_MASK                      (REGISTERS_BASE + 0x0448)
+#define FIQ_STS_L                      (REGISTERS_BASE + 0x044C)
+#define FIQ_STS_H                      (REGISTERS_BASE + 0x0450)
+#define IRQ_STS_L                      (REGISTERS_BASE + 0x0454)
+#define IRQ_STS_H                      (REGISTERS_BASE + 0x0458)
+#define INT_STS_ND                     (REGISTERS_BASE + 0x0464)
+#define INT_STS_RAW_L                  (REGISTERS_BASE + 0x0464)
+#define INT_STS_RAW_H                  (REGISTERS_BASE + 0x0468)
+#define INT_STS_CLR                    (REGISTERS_BASE + 0x04B4)
+#define INT_STS_CLR_L                  (REGISTERS_BASE + 0x04B4)
+#define INT_STS_CLR_H                  (REGISTERS_BASE + 0x04B8)
+#define INT_ACK                        (REGISTERS_BASE + 0x046C)
+#define INT_ACK_L                      (REGISTERS_BASE + 0x046C)
+#define INT_ACK_H                      (REGISTERS_BASE + 0x0470)
+#define INT_TRIG                       (REGISTERS_BASE + 0x0474)
+#define INT_TRIG_L                     (REGISTERS_BASE + 0x0474)
+#define INT_TRIG_H                     (REGISTERS_BASE + 0x0478)
+#define HOST_STS_L                     (REGISTERS_BASE + 0x045C)
+#define HOST_STS_H                     (REGISTERS_BASE + 0x0460)
+#define HOST_MASK                      (REGISTERS_BASE + 0x0430)
+#define HOST_MASK_L                    (REGISTERS_BASE + 0x0430)
+#define HOST_MASK_H                    (REGISTERS_BASE + 0x0434)
+#define HOST_MASK_SET                  (REGISTERS_BASE + 0x0438)
+#define HOST_MASK_SET_L                (REGISTERS_BASE + 0x0438)
+#define HOST_MASK_SET_H                (REGISTERS_BASE + 0x043C)
+#define HOST_MASK_CLR                  (REGISTERS_BASE + 0x0440)
+#define HOST_MASK_CLR_L                (REGISTERS_BASE + 0x0440)
+#define HOST_MASK_CLR_H                (REGISTERS_BASE + 0x0444)
+
+/* Host Interrupts*/
+#define HINT_MASK                      (REGISTERS_BASE + 0x0494)
+#define HINT_MASK_SET                  (REGISTERS_BASE + 0x0498)
+#define HINT_MASK_CLR                  (REGISTERS_BASE + 0x049C)
+#define HINT_STS_ND_MASKED             (REGISTERS_BASE + 0x04A0)
+/*1150 spec calls this HINT_STS_RAW*/
+#define HINT_STS_ND		       (REGISTERS_BASE + 0x04B0)
+#define HINT_STS_CLR                   (REGISTERS_BASE + 0x04A4)
+#define HINT_ACK                       (REGISTERS_BASE + 0x04A8)
+#define HINT_TRIG                      (REGISTERS_BASE + 0x04AC)
+
+/* Device Configuration registers*/
+#define SOR_CFG                        (REGISTERS_BASE + 0x0800)
+#define ECPU_CTRL                      (REGISTERS_BASE + 0x0804)
+#define HI_CFG                         (REGISTERS_BASE + 0x0808)
+#define EE_START                       (REGISTERS_BASE + 0x080C)
+
+#define CHIP_ID_B                      (REGISTERS_BASE + 0x5674)
+
+#define CHIP_ID_1251_PG10	           (0x7010101)
+#define CHIP_ID_1251_PG11	           (0x7020101)
+#define CHIP_ID_1251_PG12	           (0x7030101)
+
+#define ENABLE                         (REGISTERS_BASE + 0x5450)
+
+/* Power Management registers */
+#define ELP_CFG_MODE                   (REGISTERS_BASE + 0x5804)
+#define ELP_CMD                        (REGISTERS_BASE + 0x5808)
+#define PLL_CAL_TIME                   (REGISTERS_BASE + 0x5810)
+#define CLK_REQ_TIME                   (REGISTERS_BASE + 0x5814)
+#define CLK_BUF_TIME                   (REGISTERS_BASE + 0x5818)
+
+#define CFG_PLL_SYNC_CNT               (REGISTERS_BASE + 0x5820)
+
+/* Scratch Pad registers*/
+#define SCR_PAD0                       (REGISTERS_BASE + 0x5608)
+#define SCR_PAD1                       (REGISTERS_BASE + 0x560C)
+#define SCR_PAD2                       (REGISTERS_BASE + 0x5610)
+#define SCR_PAD3                       (REGISTERS_BASE + 0x5614)
+#define SCR_PAD4                       (REGISTERS_BASE + 0x5618)
+#define SCR_PAD4_SET                   (REGISTERS_BASE + 0x561C)
+#define SCR_PAD4_CLR                   (REGISTERS_BASE + 0x5620)
+#define SCR_PAD5                       (REGISTERS_BASE + 0x5624)
+#define SCR_PAD5_SET                   (REGISTERS_BASE + 0x5628)
+#define SCR_PAD5_CLR                   (REGISTERS_BASE + 0x562C)
+#define SCR_PAD6                       (REGISTERS_BASE + 0x5630)
+#define SCR_PAD7                       (REGISTERS_BASE + 0x5634)
+#define SCR_PAD8                       (REGISTERS_BASE + 0x5638)
+#define SCR_PAD9                       (REGISTERS_BASE + 0x563C)
+
+/* Spare registers*/
+#define SPARE_A1                       (REGISTERS_BASE + 0x0994)
+#define SPARE_A2                       (REGISTERS_BASE + 0x0998)
+#define SPARE_A3                       (REGISTERS_BASE + 0x099C)
+#define SPARE_A4                       (REGISTERS_BASE + 0x09A0)
+#define SPARE_A5                       (REGISTERS_BASE + 0x09A4)
+#define SPARE_A6                       (REGISTERS_BASE + 0x09A8)
+#define SPARE_A7                       (REGISTERS_BASE + 0x09AC)
+#define SPARE_A8                       (REGISTERS_BASE + 0x09B0)
+#define SPARE_B1                       (REGISTERS_BASE + 0x5420)
+#define SPARE_B2                       (REGISTERS_BASE + 0x5424)
+#define SPARE_B3                       (REGISTERS_BASE + 0x5428)
+#define SPARE_B4                       (REGISTERS_BASE + 0x542C)
+#define SPARE_B5                       (REGISTERS_BASE + 0x5430)
+#define SPARE_B6                       (REGISTERS_BASE + 0x5434)
+#define SPARE_B7                       (REGISTERS_BASE + 0x5438)
+#define SPARE_B8                       (REGISTERS_BASE + 0x543C)
+
+enum wl12xx_acx_int_reg {
+	ACX_REG_INTERRUPT_TRIG,
+	ACX_REG_INTERRUPT_TRIG_H,
+
+/*=============================================
+  Host Interrupt Mask Register - 32bit (RW)
+  ------------------------------------------
+  Setting a bit in this register masks the
+  corresponding interrupt to the host.
+  0 - RX0		- Rx first dubble buffer Data Interrupt
+  1 - TXD		- Tx Data Interrupt
+  2 - TXXFR		- Tx Transfer Interrupt
+  3 - RX1		- Rx second dubble buffer Data Interrupt
+  4 - RXXFR		- Rx Transfer Interrupt
+  5 - EVENT_A	- Event Mailbox interrupt
+  6 - EVENT_B	- Event Mailbox interrupt
+  7 - WNONHST	- Wake On Host Interrupt
+  8 - TRACE_A	- Debug Trace interrupt
+  9 - TRACE_B	- Debug Trace interrupt
+ 10 - CDCMP		- Command Complete Interrupt
+ 11 -
+ 12 -
+ 13 -
+ 14 - ICOMP		- Initialization Complete Interrupt
+ 16 - SG SE		- Soft Gemini - Sense enable interrupt
+ 17 - SG SD		- Soft Gemini - Sense disable interrupt
+ 18 -			-
+ 19 -			-
+ 20 -			-
+ 21-			-
+ Default: 0x0001
+*==============================================*/
+	ACX_REG_INTERRUPT_MASK,
+
+/*=============================================
+  Host Interrupt Mask Set 16bit, (Write only)
+  ------------------------------------------
+ Setting a bit in this register sets
+ the corresponding bin in ACX_HINT_MASK register
+ without effecting the mask
+ state of other bits (0 = no effect).
+==============================================*/
+	ACX_REG_HINT_MASK_SET,
+
+/*=============================================
+  Host Interrupt Mask Clear 16bit,(Write only)
+  ------------------------------------------
+ Setting a bit in this register clears
+ the corresponding bin in ACX_HINT_MASK register
+ without effecting the mask
+ state of other bits (0 = no effect).
+=============================================*/
+	ACX_REG_HINT_MASK_CLR,
+
+/*=============================================
+  Host Interrupt Status Nondestructive Read
+  16bit,(Read only)
+  ------------------------------------------
+ The host can read this register to determine
+ which interrupts are active.
+ Reading this register doesn't
+ effect its content.
+=============================================*/
+	ACX_REG_INTERRUPT_NO_CLEAR,
+
+/*=============================================
+  Host Interrupt Status Clear on Read  Register
+  16bit,(Read only)
+  ------------------------------------------
+ The host can read this register to determine
+ which interrupts are active.
+ Reading this register clears it,
+ thus making all interrupts inactive.
+==============================================*/
+	ACX_REG_INTERRUPT_CLEAR,
+
+/*=============================================
+  Host Interrupt Acknowledge Register
+  16bit,(Write only)
+  ------------------------------------------
+ The host can set individual bits in this
+ register to clear (acknowledge) the corresp.
+ interrupt status bits in the HINT_STS_CLR and
+ HINT_STS_ND registers, thus making the
+ assotiated interrupt inactive. (0-no effect)
+==============================================*/
+	ACX_REG_INTERRUPT_ACK,
+
+/*===============================================
+   Host Software Reset - 32bit RW
+ ------------------------------------------
+    [31:1] Reserved
+    0  SOFT_RESET Soft Reset  - When this bit is set,
+    it holds the Wlan hardware in a soft reset state.
+    This reset disables all MAC and baseband processor
+    clocks except the CardBus/PCI interface clock.
+    It also initializes all MAC state machines except
+    the host interface. It does not reload the
+    contents of the EEPROM. When this bit is cleared
+    (not self-clearing), the Wlan hardware
+    exits the software reset state.
+===============================================*/
+	ACX_REG_SLV_SOFT_RESET,
+
+/*===============================================
+ EEPROM Burst Read Start  - 32bit RW
+ ------------------------------------------
+ [31:1] Reserved
+ 0  ACX_EE_START -  EEPROM Burst Read Start 0
+ Setting this bit starts a burst read from
+ the external EEPROM.
+ If this bit is set (after reset) before an EEPROM read/write,
+ the burst read starts at EEPROM address 0.
+ Otherwise, it starts at the address
+ following the address of the previous access.
+ TheWlan hardware hardware clears this bit automatically.
+
+ Default: 0x00000000
+*================================================*/
+	ACX_REG_EE_START,
+
+/* Embedded ARM CPU Control */
+
+/*===============================================
+ Halt eCPU   - 32bit RW
+ ------------------------------------------
+ 0 HALT_ECPU Halt Embedded CPU - This bit is the
+ compliment of bit 1 (MDATA2) in the SOR_CFG register.
+ During a hardware reset, this bit holds
+ the inverse of MDATA2.
+ When downloading firmware from the host,
+ set this bit (pull down MDATA2).
+ The host clears this bit after downloading the firmware into
+ zero-wait-state SSRAM.
+ When loading firmware from Flash, clear this bit (pull up MDATA2)
+ so that the eCPU can run the bootloader code in Flash
+ HALT_ECPU eCPU State
+ --------------------
+ 1 halt eCPU
+ 0 enable eCPU
+ ===============================================*/
+	ACX_REG_ECPU_CONTROL,
+
+	ACX_REG_TABLE_LEN
+};
+
+#define ACX_SLV_SOFT_RESET_BIT   BIT(1)
+#define ACX_REG_EEPROM_START_BIT BIT(1)
+
+/* Command/Information Mailbox Pointers */
+
+/*===============================================
+  Command Mailbox Pointer - 32bit RW
+ ------------------------------------------
+ This register holds the start address of
+ the command mailbox located in the Wlan hardware memory.
+ The host must read this pointer after a reset to
+ find the location of the command mailbox.
+ The Wlan hardware initializes the command mailbox
+ pointer with the default address of the command mailbox.
+ The command mailbox pointer is not valid until after
+ the host receives the Init Complete interrupt from
+ the Wlan hardware.
+ ===============================================*/
+#define REG_COMMAND_MAILBOX_PTR				(SCR_PAD0)
+
+/*===============================================
+  Information Mailbox Pointer - 32bit RW
+ ------------------------------------------
+ This register holds the start address of
+ the information mailbox located in the Wlan hardware memory.
+ The host must read this pointer after a reset to find
+ the location of the information mailbox.
+ The Wlan hardware initializes the information mailbox pointer
+ with the default address of the information mailbox.
+ The information mailbox pointer is not valid
+ until after the host receives the Init Complete interrupt from
+ the Wlan hardware.
+ ===============================================*/
+#define REG_EVENT_MAILBOX_PTR				(SCR_PAD1)
+
+
+/* Misc */
+
+#define REG_ENABLE_TX_RX				(ENABLE)
+/*
+ * Rx configuration (filter) information element
+ * ---------------------------------------------
+ */
+#define REG_RX_CONFIG				(RX_CFG)
+#define REG_RX_FILTER				(RX_FILTER_CFG)
+
+
+#define RX_CFG_ENABLE_PHY_HEADER_PLCP	 0x0002
+
+/* promiscuous - receives all valid frames */
+#define RX_CFG_PROMISCUOUS		 0x0008
+
+/* receives frames from any BSSID */
+#define RX_CFG_BSSID			 0x0020
+
+/* receives frames destined to any MAC address */
+#define RX_CFG_MAC			 0x0010
+
+#define RX_CFG_ENABLE_ONLY_MY_DEST_MAC	 0x0010
+#define RX_CFG_ENABLE_ANY_DEST_MAC	 0x0000
+#define RX_CFG_ENABLE_ONLY_MY_BSSID	 0x0020
+#define RX_CFG_ENABLE_ANY_BSSID		 0x0000
+
+/* discards all broadcast frames */
+#define RX_CFG_DISABLE_BCAST		 0x0200
+
+#define RX_CFG_ENABLE_ONLY_MY_SSID	 0x0400
+#define RX_CFG_ENABLE_RX_CMPLT_FCS_ERROR 0x0800
+#define RX_CFG_COPY_RX_STATUS		 0x2000
+#define RX_CFG_TSF			 0x10000
+
+#define RX_CONFIG_OPTION_ANY_DST_MY_BSS	 (RX_CFG_ENABLE_ANY_DEST_MAC | \
+					  RX_CFG_ENABLE_ONLY_MY_BSSID)
+
+#define RX_CONFIG_OPTION_MY_DST_ANY_BSS	 (RX_CFG_ENABLE_ONLY_MY_DEST_MAC\
+					  | RX_CFG_ENABLE_ANY_BSSID)
+
+#define RX_CONFIG_OPTION_ANY_DST_ANY_BSS (RX_CFG_ENABLE_ANY_DEST_MAC | \
+					  RX_CFG_ENABLE_ANY_BSSID)
+
+#define RX_CONFIG_OPTION_MY_DST_MY_BSS	 (RX_CFG_ENABLE_ONLY_MY_DEST_MAC\
+					  | RX_CFG_ENABLE_ONLY_MY_BSSID)
+
+#define RX_CONFIG_OPTION_FOR_SCAN  (RX_CFG_ENABLE_PHY_HEADER_PLCP \
+				    | RX_CFG_ENABLE_RX_CMPLT_FCS_ERROR \
+				    | RX_CFG_COPY_RX_STATUS | RX_CFG_TSF)
+
+#define RX_CONFIG_OPTION_FOR_MEASUREMENT (RX_CFG_ENABLE_ANY_DEST_MAC)
+
+#define RX_CONFIG_OPTION_FOR_JOIN	 (RX_CFG_ENABLE_ONLY_MY_BSSID | \
+					  RX_CFG_ENABLE_ONLY_MY_DEST_MAC)
+
+#define RX_CONFIG_OPTION_FOR_IBSS_JOIN   (RX_CFG_ENABLE_ONLY_MY_SSID | \
+					  RX_CFG_ENABLE_ONLY_MY_DEST_MAC)
+
+#define RX_FILTER_OPTION_DEF	      (CFG_RX_MGMT_EN | CFG_RX_DATA_EN\
+				       | CFG_RX_CTL_EN | CFG_RX_BCN_EN\
+				       | CFG_RX_AUTH_EN | CFG_RX_ASSOC_EN)
+
+#define RX_FILTER_OPTION_FILTER_ALL	 0
+
+#define RX_FILTER_OPTION_DEF_PRSP_BCN  (CFG_RX_PRSP_EN | CFG_RX_MGMT_EN\
+					| CFG_RX_RCTS_ACK | CFG_RX_BCN_EN)
+
+#define RX_FILTER_OPTION_JOIN	     (CFG_RX_MGMT_EN | CFG_RX_DATA_EN\
+				      | CFG_RX_BCN_EN | CFG_RX_AUTH_EN\
+				      | CFG_RX_ASSOC_EN | CFG_RX_RCTS_ACK\
+				      | CFG_RX_PRSP_EN)
+
+
+/*===============================================
+  Phy regs
+ ===============================================*/
+#define ACX_PHY_ADDR_REG                SBB_ADDR
+#define ACX_PHY_DATA_REG                SBB_DATA
+#define ACX_PHY_CTRL_REG                SBB_CTL
+#define ACX_PHY_REG_WR_MASK             0x00000001ul
+#define ACX_PHY_REG_RD_MASK             0x00000002ul
+
+
+/*===============================================
+ EEPROM Read/Write Request 32bit RW
+ ------------------------------------------
+ 1 EE_READ - EEPROM Read Request 1 - Setting this bit
+ loads a single byte of data into the EE_DATA
+ register from the EEPROM location specified in
+ the EE_ADDR register.
+ The Wlan hardware hardware clears this bit automatically.
+ EE_DATA is valid when this bit is cleared.
+
+ 0 EE_WRITE  - EEPROM Write Request  - Setting this bit
+ writes a single byte of data from the EE_DATA register into the
+ EEPROM location specified in the EE_ADDR register.
+ The Wlan hardware hardware clears this bit automatically.
+*===============================================*/
+#define ACX_EE_CTL_REG                      EE_CTL
+#define EE_WRITE                            0x00000001ul
+#define EE_READ                             0x00000002ul
+
+/*===============================================
+  EEPROM Address  - 32bit RW
+  ------------------------------------------
+  This register specifies the address
+  within the EEPROM from/to which to read/write data.
+  ===============================================*/
+#define ACX_EE_ADDR_REG                     EE_ADDR
+
+/*===============================================
+  EEPROM Data  - 32bit RW
+  ------------------------------------------
+  This register either holds the read 8 bits of
+  data from the EEPROM or the write data
+  to be written to the EEPROM.
+  ===============================================*/
+#define ACX_EE_DATA_REG                     EE_DATA
+
+/*===============================================
+  EEPROM Base Address  - 32bit RW
+  ------------------------------------------
+  This register holds the upper nine bits
+  [23:15] of the 24-bit Wlan hardware memory
+  address for burst reads from EEPROM accesses.
+  The EEPROM provides the lower 15 bits of this address.
+  The MSB of the address from the EEPROM is ignored.
+  ===============================================*/
+#define ACX_EE_CFG                          EE_CFG
+
+/*===============================================
+  GPIO Output Values  -32bit, RW
+  ------------------------------------------
+  [31:16]  Reserved
+  [15: 0]  Specify the output values (at the output driver inputs) for
+  GPIO[15:0], respectively.
+  ===============================================*/
+#define ACX_GPIO_OUT_REG            GPIO_OUT
+#define ACX_MAX_GPIO_LINES          15
+
+/*===============================================
+  Contention window  -32bit, RW
+  ------------------------------------------
+  [31:26]  Reserved
+  [25:16]  Max (0x3ff)
+  [15:07]  Reserved
+  [06:00]  Current contention window value - default is 0x1F
+  ===============================================*/
+#define ACX_CONT_WIND_CFG_REG    CONT_WIND_CFG
+#define ACX_CONT_WIND_MIN_MASK   0x0000007f
+#define ACX_CONT_WIND_MAX        0x03ff0000
+
+/*
+ * Indirect slave register/memory registers
+ * ----------------------------------------
+ */
+#define HW_SLAVE_REG_ADDR_REG		0x00000004
+#define HW_SLAVE_REG_DATA_REG		0x00000008
+#define HW_SLAVE_REG_CTRL_REG		0x0000000c
+
+#define SLAVE_AUTO_INC				0x00010000
+#define SLAVE_NO_AUTO_INC			0x00000000
+#define SLAVE_HOST_LITTLE_ENDIAN	0x00000000
+
+#define HW_SLAVE_MEM_ADDR_REG		SLV_MEM_ADDR
+#define HW_SLAVE_MEM_DATA_REG		SLV_MEM_DATA
+#define HW_SLAVE_MEM_CTRL_REG		SLV_MEM_CTL
+#define HW_SLAVE_MEM_ENDIAN_REG		SLV_END_CTL
+
+#define HW_FUNC_EVENT_INT_EN		0x8000
+#define HW_FUNC_EVENT_MASK_REG		0x00000034
+
+#define ACX_MAC_TIMESTAMP_REG	(MAC_TIMESTAMP)
+
+/*===============================================
+  HI_CFG Interface Configuration Register Values
+  ------------------------------------------
+  ===============================================*/
+#define HI_CFG_UART_ENABLE          0x00000004
+#define HI_CFG_RST232_ENABLE        0x00000008
+#define HI_CFG_CLOCK_REQ_SELECT     0x00000010
+#define HI_CFG_HOST_INT_ENABLE      0x00000020
+#define HI_CFG_VLYNQ_OUTPUT_ENABLE  0x00000040
+#define HI_CFG_HOST_INT_ACTIVE_LOW  0x00000080
+#define HI_CFG_UART_TX_OUT_GPIO_15  0x00000100
+#define HI_CFG_UART_TX_OUT_GPIO_14  0x00000200
+#define HI_CFG_UART_TX_OUT_GPIO_7   0x00000400
+
+/*
+ * NOTE: USE_ACTIVE_HIGH compilation flag should be defined in makefile
+ *       for platforms using active high interrupt level
+ */
+#ifdef USE_ACTIVE_HIGH
+#define HI_CFG_DEF_VAL              \
+	(HI_CFG_UART_ENABLE |        \
+	HI_CFG_RST232_ENABLE |      \
+	HI_CFG_CLOCK_REQ_SELECT |   \
+	HI_CFG_HOST_INT_ENABLE)
+#else
+#define HI_CFG_DEF_VAL              \
+	(HI_CFG_UART_ENABLE |        \
+	HI_CFG_RST232_ENABLE |      \
+	HI_CFG_CLOCK_REQ_SELECT |   \
+	HI_CFG_HOST_INT_ENABLE)
+
+#endif
+
+#define REF_FREQ_19_2                       0
+#define REF_FREQ_26_0                       1
+#define REF_FREQ_38_4                       2
+#define REF_FREQ_40_0                       3
+#define REF_FREQ_33_6                       4
+#define REF_FREQ_NUM                        5
+
+#define LUT_PARAM_INTEGER_DIVIDER           0
+#define LUT_PARAM_FRACTIONAL_DIVIDER        1
+#define LUT_PARAM_ATTN_BB                   2
+#define LUT_PARAM_ALPHA_BB                  3
+#define LUT_PARAM_STOP_TIME_BB              4
+#define LUT_PARAM_BB_PLL_LOOP_FILTER        5
+#define LUT_PARAM_NUM                       6
+
+#define ACX_EEPROMLESS_IND_REG              (SCR_PAD4)
+#define USE_EEPROM                          0
+#define SOFT_RESET_MAX_TIME                 1000000
+#define SOFT_RESET_STALL_TIME               1000
+#define NVS_DATA_BUNDARY_ALIGNMENT          4
+
+
+/* Firmware image load chunk size */
+#define CHUNK_SIZE          512
+
+/* Firmware image header size */
+#define FW_HDR_SIZE 8
+
+#define ECPU_CONTROL_HALT					0x00000101
+
+
+/******************************************************************************
+
+    CHANNELS, BAND & REG DOMAINS definitions
+
+******************************************************************************/
+
+
+enum {
+	RADIO_BAND_2_4GHZ = 0,  /* 2.4 Ghz band */
+	RADIO_BAND_5GHZ = 1,    /* 5 Ghz band */
+	RADIO_BAND_JAPAN_4_9_GHZ = 2,
+	DEFAULT_BAND = RADIO_BAND_2_4GHZ,
+	INVALID_BAND = 0xFE,
+	MAX_RADIO_BANDS = 0xFF
+};
+
+enum {
+	NO_RATE      = 0,
+	RATE_1MBPS   = 0x0A,
+	RATE_2MBPS   = 0x14,
+	RATE_5_5MBPS = 0x37,
+	RATE_6MBPS   = 0x0B,
+	RATE_9MBPS   = 0x0F,
+	RATE_11MBPS  = 0x6E,
+	RATE_12MBPS  = 0x0A,
+	RATE_18MBPS  = 0x0E,
+	RATE_22MBPS  = 0xDC,
+	RATE_24MBPS  = 0x09,
+	RATE_36MBPS  = 0x0D,
+	RATE_48MBPS  = 0x08,
+	RATE_54MBPS  = 0x0C
+};
+
+enum {
+	RATE_INDEX_1MBPS   =  0,
+	RATE_INDEX_2MBPS   =  1,
+	RATE_INDEX_5_5MBPS =  2,
+	RATE_INDEX_6MBPS   =  3,
+	RATE_INDEX_9MBPS   =  4,
+	RATE_INDEX_11MBPS  =  5,
+	RATE_INDEX_12MBPS  =  6,
+	RATE_INDEX_18MBPS  =  7,
+	RATE_INDEX_22MBPS  =  8,
+	RATE_INDEX_24MBPS  =  9,
+	RATE_INDEX_36MBPS  =  10,
+	RATE_INDEX_48MBPS  =  11,
+	RATE_INDEX_54MBPS  =  12,
+	RATE_INDEX_MAX     =  RATE_INDEX_54MBPS,
+	MAX_RATE_INDEX,
+	INVALID_RATE_INDEX = MAX_RATE_INDEX,
+	RATE_INDEX_ENUM_MAX_SIZE = 0x7FFFFFFF
+};
+
+enum {
+	RATE_MASK_1MBPS = 0x1,
+	RATE_MASK_2MBPS = 0x2,
+	RATE_MASK_5_5MBPS = 0x4,
+	RATE_MASK_11MBPS = 0x20,
+};
+
+#define SHORT_PREAMBLE_BIT   BIT(0) /* CCK or Barker depending on the rate */
+#define OFDM_RATE_BIT        BIT(6)
+#define PBCC_RATE_BIT        BIT(7)
+
+enum {
+	CCK_LONG = 0,
+	CCK_SHORT = SHORT_PREAMBLE_BIT,
+	PBCC_LONG = PBCC_RATE_BIT,
+	PBCC_SHORT = PBCC_RATE_BIT | SHORT_PREAMBLE_BIT,
+	OFDM = OFDM_RATE_BIT
+};
+
+/******************************************************************************
+
+Transmit-Descriptor RATE-SET field definitions...
+
+Define a new "Rate-Set" for TX path that incorporates the
+Rate & Modulation info into a single 16-bit field.
+
+TxdRateSet_t:
+b15   - Indicates Preamble type (1=SHORT, 0=LONG).
+	Notes:
+	Must be LONG (0) for 1Mbps rate.
+	Does not apply (set to 0) for RevG-OFDM rates.
+b14   - Indicates PBCC encoding (1=PBCC, 0=not).
+	Notes:
+	Does not apply (set to 0) for rates 1 and 2 Mbps.
+	Does not apply (set to 0) for RevG-OFDM rates.
+b13    - Unused (set to 0).
+b12-b0 - Supported Rate indicator bits as defined below.
+
+******************************************************************************/
+
+
+#define TNETW1251_CHIP_ID_PG1_0         0x07010101
+#define TNETW1251_CHIP_ID_PG1_1         0x07020101
+#define TNETW1251_CHIP_ID_PG1_2	        0x07030101
+
+/*************************************************************************
+
+    Interrupt Trigger Register (Host -> WiLink)
+
+**************************************************************************/
+
+/* Hardware to Embedded CPU Interrupts - first 32-bit register set */
+
+/*
+ * Host Command Interrupt. Setting this bit masks
+ * the interrupt that the host issues to inform
+ * the FW that it has sent a command
+ * to the Wlan hardware Command Mailbox.
+ */
+#define INTR_TRIG_CMD       BIT(0)
+
+/*
+ * Host Event Acknowlegde Interrupt. The host
+ * sets this bit to acknowledge that it received
+ * the unsolicited information from the event
+ * mailbox.
+ */
+#define INTR_TRIG_EVENT_ACK BIT(1)
+
+/*
+ * The host sets this bit to inform the Wlan
+ * FW that a TX packet is in the XFER
+ * Buffer #0.
+ */
+#define INTR_TRIG_TX_PROC0 BIT(2)
+
+/*
+ * The host sets this bit to inform the FW
+ * that it read a packet from RX XFER
+ * Buffer #0.
+ */
+#define INTR_TRIG_RX_PROC0 BIT(3)
+
+#define INTR_TRIG_DEBUG_ACK BIT(4)
+
+#define INTR_TRIG_STATE_CHANGED BIT(5)
+
+
+/* Hardware to Embedded CPU Interrupts - second 32-bit register set */
+
+/*
+ * The host sets this bit to inform the FW
+ * that it read a packet from RX XFER
+ * Buffer #1.
+ */
+#define INTR_TRIG_RX_PROC1 BIT(17)
+
+/*
+ * The host sets this bit to inform the Wlan
+ * hardware that a TX packet is in the XFER
+ * Buffer #1.
+ */
+#define INTR_TRIG_TX_PROC1 BIT(18)
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/rx.c b/drivers/net/wireless/wl12xx/rx.c
new file mode 100644
index 00000000000..981ea259eb8
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/rx.c
@@ -0,0 +1,208 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/skbuff.h>
+#include <net/mac80211.h>
+
+#include "wl12xx.h"
+#include "reg.h"
+#include "spi.h"
+#include "rx.h"
+
+static void wl12xx_rx_header(struct wl12xx *wl,
+			     struct wl12xx_rx_descriptor *desc)
+{
+	u32 rx_packet_ring_addr;
+
+	rx_packet_ring_addr = wl->data_path->rx_packet_ring_addr;
+	if (wl->rx_current_buffer)
+		rx_packet_ring_addr += wl->data_path->rx_packet_ring_chunk_size;
+
+	wl12xx_spi_mem_read(wl, rx_packet_ring_addr, desc,
+			    sizeof(struct wl12xx_rx_descriptor));
+}
+
+static void wl12xx_rx_status(struct wl12xx *wl,
+			     struct wl12xx_rx_descriptor *desc,
+			     struct ieee80211_rx_status *status,
+			     u8 beacon)
+{
+	memset(status, 0, sizeof(struct ieee80211_rx_status));
+
+	status->band = IEEE80211_BAND_2GHZ;
+	status->mactime = desc->timestamp;
+
+	/*
+	 * The rx status timestamp is a 32 bits value while the TSF is a
+	 * 64 bits one.
+	 * For IBSS merging, TSF is mandatory, so we have to get it
+	 * somehow, so we ask for ACX_TSF_INFO.
+	 * That could be moved to the get_tsf() hook, but unfortunately,
+	 * this one must be atomic, while our SPI routines can sleep.
+	 */
+	if ((wl->bss_type == BSS_TYPE_IBSS) && beacon) {
+		u64 mactime;
+		int ret;
+		struct wl12xx_command cmd;
+		struct acx_tsf_info *tsf_info;
+
+		memset(&cmd, 0, sizeof(cmd));
+
+		ret = wl12xx_cmd_interrogate(wl, ACX_TSF_INFO,
+					     sizeof(struct acx_tsf_info),
+					     &cmd);
+		if (ret < 0) {
+			wl12xx_warning("ACX_FW_REV interrogate failed");
+			return;
+		}
+
+		tsf_info = (struct acx_tsf_info *)&(cmd.parameters);
+
+		mactime = tsf_info->current_tsf_lsb |
+			(tsf_info->current_tsf_msb << 31);
+
+		status->mactime = mactime;
+	}
+
+	status->signal = desc->rssi;
+	status->qual = (desc->rssi - WL12XX_RX_MIN_RSSI) * 100 /
+		(WL12XX_RX_MAX_RSSI - WL12XX_RX_MIN_RSSI);
+	status->qual = min(status->qual, 100);
+	status->qual = max(status->qual, 0);
+
+	/*
+	 * FIXME: guessing that snr needs to be divided by two, otherwise
+	 * the values don't make any sense
+	 */
+	status->noise = desc->rssi - desc->snr / 2;
+
+	status->freq = ieee80211_channel_to_frequency(desc->channel);
+
+	status->flag |= RX_FLAG_TSFT;
+
+	if (desc->flags & RX_DESC_ENCRYPTION_MASK) {
+		status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
+
+		if (likely(!(desc->flags & RX_DESC_DECRYPT_FAIL)))
+			status->flag |= RX_FLAG_DECRYPTED;
+
+		if (unlikely(desc->flags & RX_DESC_MIC_FAIL))
+			status->flag |= RX_FLAG_MMIC_ERROR;
+	}
+
+	if (unlikely(!(desc->flags & RX_DESC_VALID_FCS)))
+		status->flag |= RX_FLAG_FAILED_FCS_CRC;
+
+
+	/* FIXME: set status->rate_idx */
+}
+
+static void wl12xx_rx_body(struct wl12xx *wl,
+			   struct wl12xx_rx_descriptor *desc)
+{
+	struct sk_buff *skb;
+	struct ieee80211_rx_status status;
+	u8 *rx_buffer, beacon = 0;
+	u16 length, *fc;
+	u32 curr_id, last_id_inc, rx_packet_ring_addr;
+
+	length = WL12XX_RX_ALIGN(desc->length  - PLCP_HEADER_LENGTH);
+	curr_id = (desc->flags & RX_DESC_SEQNUM_MASK) >> RX_DESC_PACKETID_SHIFT;
+	last_id_inc = (wl->rx_last_id + 1) % (RX_MAX_PACKET_ID + 1);
+
+	if (last_id_inc != curr_id) {
+		wl12xx_warning("curr ID:%d, last ID inc:%d",
+			       curr_id, last_id_inc);
+		wl->rx_last_id = curr_id;
+	} else {
+		wl->rx_last_id = last_id_inc;
+	}
+
+	rx_packet_ring_addr = wl->data_path->rx_packet_ring_addr +
+		sizeof(struct wl12xx_rx_descriptor) + 20;
+	if (wl->rx_current_buffer)
+		rx_packet_ring_addr += wl->data_path->rx_packet_ring_chunk_size;
+
+	skb = dev_alloc_skb(length);
+	if (!skb) {
+		wl12xx_error("Couldn't allocate RX frame");
+		return;
+	}
+
+	rx_buffer = skb_put(skb, length);
+	wl12xx_spi_mem_read(wl, rx_packet_ring_addr, rx_buffer, length);
+
+	/* The actual lenght doesn't include the target's alignment */
+	skb->len = desc->length  - PLCP_HEADER_LENGTH;
+
+	fc = (u16 *)skb->data;
+
+	if ((*fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_BEACON)
+		beacon = 1;
+
+	wl12xx_rx_status(wl, desc, &status, beacon);
+
+	wl12xx_debug(DEBUG_RX, "rx skb 0x%p: %d B %s", skb, skb->len,
+		     beacon ? "beacon" : "");
+
+	ieee80211_rx(wl->hw, skb, &status);
+}
+
+static void wl12xx_rx_ack(struct wl12xx *wl)
+{
+	u32 data, addr;
+
+	if (wl->rx_current_buffer) {
+		addr = ACX_REG_INTERRUPT_TRIG_H;
+		data = INTR_TRIG_RX_PROC1;
+	} else {
+		addr = ACX_REG_INTERRUPT_TRIG;
+		data = INTR_TRIG_RX_PROC0;
+	}
+
+	wl12xx_reg_write32(wl, addr, data);
+
+	/* Toggle buffer ring */
+	wl->rx_current_buffer = !wl->rx_current_buffer;
+}
+
+
+void wl12xx_rx(struct wl12xx *wl)
+{
+	struct wl12xx_rx_descriptor rx_desc;
+
+	if (wl->state != WL12XX_STATE_ON)
+		return;
+
+	/* We first read the frame's header */
+	wl12xx_rx_header(wl, &rx_desc);
+
+	/* Now we can read the body */
+	wl12xx_rx_body(wl, &rx_desc);
+
+	/* Finally, we need to ACK the RX */
+	wl12xx_rx_ack(wl);
+
+	return;
+}
diff --git a/drivers/net/wireless/wl12xx/rx.h b/drivers/net/wireless/wl12xx/rx.h
new file mode 100644
index 00000000000..8a23fdea501
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/rx.h
@@ -0,0 +1,122 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_RX_H__
+#define __WL12XX_RX_H__
+
+#include <linux/bitops.h>
+
+/*
+ * RX PATH
+ *
+ * The Rx path uses a double buffer and an rx_contro structure, each located
+ * at a fixed address in the device memory. The host keeps track of which
+ * buffer is available and alternates between them on a per packet basis.
+ * The size of each of the two buffers is large enough to hold the longest
+ * 802.3 packet.
+ * The RX path goes like that:
+ * 1) The target generates an interrupt each time a new packet is received.
+ *   There are 2 RX interrupts, one for each buffer.
+ * 2) The host reads the received packet from one of the double buffers.
+ * 3) The host triggers a target interrupt.
+ * 4) The target prepares the next RX packet.
+ */
+
+#define WL12XX_RX_MAX_RSSI -30
+#define WL12XX_RX_MIN_RSSI -95
+
+#define WL12XX_RX_ALIGN_TO 4
+#define WL12XX_RX_ALIGN(len) (((len) + WL12XX_RX_ALIGN_TO - 1) & \
+			     ~(WL12XX_RX_ALIGN_TO - 1))
+
+#define SHORT_PREAMBLE_BIT   BIT(0)
+#define OFDM_RATE_BIT        BIT(6)
+#define PBCC_RATE_BIT        BIT(7)
+
+#define PLCP_HEADER_LENGTH 8
+#define RX_DESC_PACKETID_SHIFT 11
+#define RX_MAX_PACKET_ID 3
+
+#define RX_DESC_VALID_FCS         0x0001
+#define RX_DESC_MATCH_RXADDR1     0x0002
+#define RX_DESC_MCAST             0x0004
+#define RX_DESC_STAINTIM          0x0008
+#define RX_DESC_VIRTUAL_BM        0x0010
+#define RX_DESC_BCAST             0x0020
+#define RX_DESC_MATCH_SSID        0x0040
+#define RX_DESC_MATCH_BSSID       0x0080
+#define RX_DESC_ENCRYPTION_MASK   0x0300
+#define RX_DESC_MEASURMENT        0x0400
+#define RX_DESC_SEQNUM_MASK       0x1800
+#define	RX_DESC_MIC_FAIL	  0x2000
+#define	RX_DESC_DECRYPT_FAIL	  0x4000
+
+struct wl12xx_rx_descriptor {
+	u32 timestamp; /* In microseconds */
+	u16 length; /* Paylod length, including headers */
+	u16 flags;
+
+	/*
+	 * 0 - 802.11
+	 * 1 - 802.3
+	 * 2 - IP
+	 * 3 - Raw Codec
+	 */
+	u8 type;
+
+	/*
+	 * Recevied Rate:
+	 * 0x0A - 1MBPS
+	 * 0x14 - 2MBPS
+	 * 0x37 - 5_5MBPS
+	 * 0x0B - 6MBPS
+	 * 0x0F - 9MBPS
+	 * 0x6E - 11MBPS
+	 * 0x0A - 12MBPS
+	 * 0x0E - 18MBPS
+	 * 0xDC - 22MBPS
+	 * 0x09 - 24MBPS
+	 * 0x0D - 36MBPS
+	 * 0x08 - 48MBPS
+	 * 0x0C - 54MBPS
+	 */
+	u8 rate;
+
+	u8 mod_pre; /* Modulation and preamble */
+	u8 channel;
+
+	/*
+	 * 0 - 2.4 Ghz
+	 * 1 - 5 Ghz
+	 */
+	u8 band;
+
+	s8 rssi; /* in dB */
+	u8 rcpi; /* in dB */
+	u8 snr; /* in dB */
+} __attribute__ ((packed));
+
+void wl12xx_rx(struct wl12xx *wl);
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/spi.c b/drivers/net/wireless/wl12xx/spi.c
new file mode 100644
index 00000000000..abdf171a47e
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/spi.c
@@ -0,0 +1,358 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/crc7.h>
+#include <linux/spi/spi.h>
+
+#include "wl12xx.h"
+#include "wl12xx_80211.h"
+#include "reg.h"
+#include "spi.h"
+#include "ps.h"
+
+static int wl12xx_translate_reg_addr(struct wl12xx *wl, int addr)
+{
+	/* If the address is lower than REGISTERS_BASE, it means that this is
+	 * a chip-specific register address, so look it up in the registers
+	 * table */
+	if (addr < REGISTERS_BASE) {
+		/* Make sure we don't go over the table */
+		if (addr >= ACX_REG_TABLE_LEN) {
+			wl12xx_error("address out of range (%d)", addr);
+			return -EINVAL;
+		}
+		addr = wl->chip.acx_reg_table[addr];
+	}
+
+	return addr - wl->physical_reg_addr + wl->virtual_reg_addr;
+}
+
+static int wl12xx_translate_mem_addr(struct wl12xx *wl, int addr)
+{
+	return addr - wl->physical_mem_addr + wl->virtual_mem_addr;
+}
+
+
+void wl12xx_spi_reset(struct wl12xx *wl)
+{
+	u8 *cmd;
+	struct spi_transfer t;
+	struct spi_message m;
+
+	cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
+	if (!cmd) {
+		wl12xx_error("could not allocate cmd for spi reset");
+		return;
+	}
+
+	memset(&t, 0, sizeof(t));
+	spi_message_init(&m);
+
+	memset(cmd, 0xff, WSPI_INIT_CMD_LEN);
+
+	t.tx_buf = cmd;
+	t.len = WSPI_INIT_CMD_LEN;
+	spi_message_add_tail(&t, &m);
+
+	spi_sync(wl->spi, &m);
+
+	wl12xx_dump(DEBUG_SPI, "spi reset -> ", cmd, WSPI_INIT_CMD_LEN);
+}
+
+void wl12xx_spi_init(struct wl12xx *wl)
+{
+	u8 crc[WSPI_INIT_CMD_CRC_LEN], *cmd;
+	struct spi_transfer t;
+	struct spi_message m;
+
+	cmd = kzalloc(WSPI_INIT_CMD_LEN, GFP_KERNEL);
+	if (!cmd) {
+		wl12xx_error("could not allocate cmd for spi init");
+		return;
+	}
+
+	memset(crc, 0, sizeof(crc));
+	memset(&t, 0, sizeof(t));
+	spi_message_init(&m);
+
+	/*
+	 * Set WSPI_INIT_COMMAND
+	 * the data is being send from the MSB to LSB
+	 */
+	cmd[2] = 0xff;
+	cmd[3] = 0xff;
+	cmd[1] = WSPI_INIT_CMD_START | WSPI_INIT_CMD_TX;
+	cmd[0] = 0;
+	cmd[7] = 0;
+	cmd[6] |= HW_ACCESS_WSPI_INIT_CMD_MASK << 3;
+	cmd[6] |= HW_ACCESS_WSPI_FIXED_BUSY_LEN & WSPI_INIT_CMD_FIXEDBUSY_LEN;
+
+	if (HW_ACCESS_WSPI_FIXED_BUSY_LEN == 0)
+		cmd[5] |=  WSPI_INIT_CMD_DIS_FIXEDBUSY;
+	else
+		cmd[5] |= WSPI_INIT_CMD_EN_FIXEDBUSY;
+
+	cmd[5] |= WSPI_INIT_CMD_IOD | WSPI_INIT_CMD_IP | WSPI_INIT_CMD_CS
+		| WSPI_INIT_CMD_WSPI | WSPI_INIT_CMD_WS;
+
+	crc[0] = cmd[1];
+	crc[1] = cmd[0];
+	crc[2] = cmd[7];
+	crc[3] = cmd[6];
+	crc[4] = cmd[5];
+
+	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
+	cmd[4] |= WSPI_INIT_CMD_END;
+
+	t.tx_buf = cmd;
+	t.len = WSPI_INIT_CMD_LEN;
+	spi_message_add_tail(&t, &m);
+
+	spi_sync(wl->spi, &m);
+
+	wl12xx_dump(DEBUG_SPI, "spi init -> ", cmd, WSPI_INIT_CMD_LEN);
+}
+
+/* Set the SPI partitions to access the chip addresses
+ *
+ * There are two VIRTUAL (SPI) partitions (the memory partition and the
+ * registers partition), which are mapped to two different areas of the
+ * PHYSICAL (hardware) memory.  This function also makes other checks to
+ * ensure that the partitions are not overlapping.  In the diagram below, the
+ * memory partition comes before the register partition, but the opposite is
+ * also supported.
+ *
+ *                               PHYSICAL address
+ *                                     space
+ *
+ *                                    |    |
+ *                                 ...+----+--> mem_start
+ *          VIRTUAL address     ...   |    |
+ *               space       ...      |    | [PART_0]
+ *                        ...         |    |
+ * 0x00000000 <--+----+...         ...+----+--> mem_start + mem_size
+ *               |    |         ...   |    |
+ *               |MEM |      ...      |    |
+ *               |    |   ...         |    |
+ *  part_size <--+----+...            |    | {unused area)
+ *               |    |   ...         |    |
+ *               |REG |      ...      |    |
+ *  part_size    |    |         ...   |    |
+ *      +     <--+----+...         ...+----+--> reg_start
+ *  reg_size              ...         |    |
+ *                           ...      |    | [PART_1]
+ *                              ...   |    |
+ *                                 ...+----+--> reg_start + reg_size
+ *                                    |    |
+ *
+ */
+void wl12xx_set_partition(struct wl12xx *wl,
+			  u32 mem_start, u32 mem_size,
+			  u32 reg_start, u32 reg_size)
+{
+	u8 tx_buf[sizeof(u32) + 2 * sizeof(struct wl12xx_partition)];
+	struct wl12xx_partition *partition;
+	struct spi_transfer t;
+	struct spi_message m;
+	u32 *cmd;
+	size_t len;
+	int addr;
+
+	spi_message_init(&m);
+	memset(&t, 0, sizeof(t));
+	memset(tx_buf, 0, sizeof(tx_buf));
+
+	cmd = (u32 *) tx_buf;
+	partition = (struct wl12xx_partition *) (tx_buf + sizeof(u32));
+	addr = HW_ACCESS_PART0_SIZE_ADDR;
+	len = 2 * sizeof(struct wl12xx_partition);
+
+	*cmd |= WSPI_CMD_WRITE;
+	*cmd |= (len << WSPI_CMD_BYTE_LENGTH_OFFSET) & WSPI_CMD_BYTE_LENGTH;
+	*cmd |= addr & WSPI_CMD_BYTE_ADDR;
+
+	wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X",
+		     mem_start, mem_size);
+	wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X",
+		     reg_start, reg_size);
+
+	/* Make sure that the two partitions together don't exceed the
+	 * address range */
+	if ((mem_size + reg_size) > HW_ACCESS_MEMORY_MAX_RANGE) {
+		wl12xx_debug(DEBUG_SPI, "Total size exceeds maximum virtual"
+			     " address range.  Truncating partition[0].");
+		mem_size = HW_ACCESS_MEMORY_MAX_RANGE - reg_size;
+		wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X",
+			     mem_start, mem_size);
+		wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X",
+			     reg_start, reg_size);
+	}
+
+	if ((mem_start < reg_start) &&
+	    ((mem_start + mem_size) > reg_start)) {
+		/* Guarantee that the memory partition doesn't overlap the
+		 * registers partition */
+		wl12xx_debug(DEBUG_SPI, "End of partition[0] is "
+			     "overlapping partition[1].  Adjusted.");
+		mem_size = reg_start - mem_start;
+		wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X",
+			     mem_start, mem_size);
+		wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X",
+			     reg_start, reg_size);
+	} else if ((reg_start < mem_start) &&
+		   ((reg_start + reg_size) > mem_start)) {
+		/* Guarantee that the register partition doesn't overlap the
+		 * memory partition */
+		wl12xx_debug(DEBUG_SPI, "End of partition[1] is"
+			     " overlapping partition[0].  Adjusted.");
+		reg_size = mem_start - reg_start;
+		wl12xx_debug(DEBUG_SPI, "mem_start %08X mem_size %08X",
+			     mem_start, mem_size);
+		wl12xx_debug(DEBUG_SPI, "reg_start %08X reg_size %08X",
+			     reg_start, reg_size);
+	}
+
+	partition[0].start = mem_start;
+	partition[0].size  = mem_size;
+	partition[1].start = reg_start;
+	partition[1].size  = reg_size;
+
+	wl->physical_mem_addr = mem_start;
+	wl->physical_reg_addr = reg_start;
+
+	wl->virtual_mem_addr = 0;
+	wl->virtual_reg_addr = mem_size;
+
+	t.tx_buf = tx_buf;
+	t.len = sizeof(tx_buf);
+	spi_message_add_tail(&t, &m);
+
+	spi_sync(wl->spi, &m);
+}
+
+void wl12xx_spi_read(struct wl12xx *wl, int addr, void *buf,
+		     size_t len)
+{
+	struct spi_transfer t[3];
+	struct spi_message m;
+	char busy_buf[TNETWIF_READ_OFFSET_BYTES];
+	u32 cmd;
+
+	cmd = 0;
+	cmd |= WSPI_CMD_READ;
+	cmd |= (len << WSPI_CMD_BYTE_LENGTH_OFFSET) & WSPI_CMD_BYTE_LENGTH;
+	cmd |= addr & WSPI_CMD_BYTE_ADDR;
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof(t));
+
+	t[0].tx_buf = &cmd;
+	t[0].len = 4;
+	spi_message_add_tail(&t[0], &m);
+
+	/* Busy and non busy words read */
+	t[1].rx_buf = busy_buf;
+	t[1].len = TNETWIF_READ_OFFSET_BYTES;
+	spi_message_add_tail(&t[1], &m);
+
+	t[2].rx_buf = buf;
+	t[2].len = len;
+	spi_message_add_tail(&t[2], &m);
+
+	spi_sync(wl->spi, &m);
+
+	/* FIXME: check busy words */
+
+	wl12xx_dump(DEBUG_SPI, "spi_read cmd -> ", &cmd, sizeof(cmd));
+	wl12xx_dump(DEBUG_SPI, "spi_read buf <- ", buf, len);
+}
+
+void wl12xx_spi_write(struct wl12xx *wl, int addr, void *buf,
+		      size_t len)
+{
+	struct spi_transfer t[2];
+	struct spi_message m;
+	u32 cmd;
+
+	cmd = 0;
+	cmd |= WSPI_CMD_WRITE;
+	cmd |= (len << WSPI_CMD_BYTE_LENGTH_OFFSET) & WSPI_CMD_BYTE_LENGTH;
+	cmd |= addr & WSPI_CMD_BYTE_ADDR;
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof(t));
+
+	t[0].tx_buf = &cmd;
+	t[0].len = sizeof(cmd);
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].tx_buf = buf;
+	t[1].len = len;
+	spi_message_add_tail(&t[1], &m);
+
+	spi_sync(wl->spi, &m);
+
+	wl12xx_dump(DEBUG_SPI, "spi_write cmd -> ", &cmd, sizeof(cmd));
+	wl12xx_dump(DEBUG_SPI, "spi_write buf -> ", buf, len);
+}
+
+void wl12xx_spi_mem_read(struct wl12xx *wl, int addr, void *buf,
+			 size_t len)
+{
+	int physical;
+
+	physical = wl12xx_translate_mem_addr(wl, addr);
+
+	wl12xx_spi_read(wl, physical, buf, len);
+}
+
+void wl12xx_spi_mem_write(struct wl12xx *wl, int addr, void *buf,
+			  size_t len)
+{
+	int physical;
+
+	physical = wl12xx_translate_mem_addr(wl, addr);
+
+	wl12xx_spi_write(wl, physical, buf, len);
+}
+
+u32 wl12xx_mem_read32(struct wl12xx *wl, int addr)
+{
+	return wl12xx_read32(wl, wl12xx_translate_mem_addr(wl, addr));
+}
+
+void wl12xx_mem_write32(struct wl12xx *wl, int addr, u32 val)
+{
+	wl12xx_write32(wl, wl12xx_translate_mem_addr(wl, addr), val);
+}
+
+u32 wl12xx_reg_read32(struct wl12xx *wl, int addr)
+{
+	return wl12xx_read32(wl, wl12xx_translate_reg_addr(wl, addr));
+}
+
+void wl12xx_reg_write32(struct wl12xx *wl, int addr, u32 val)
+{
+	wl12xx_write32(wl, wl12xx_translate_reg_addr(wl, addr), val);
+}
diff --git a/drivers/net/wireless/wl12xx/spi.h b/drivers/net/wireless/wl12xx/spi.h
new file mode 100644
index 00000000000..fd3227e904a
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/spi.h
@@ -0,0 +1,109 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_SPI_H__
+#define __WL12XX_SPI_H__
+
+#include "cmd.h"
+#include "acx.h"
+#include "reg.h"
+
+#define HW_ACCESS_MEMORY_MAX_RANGE		0x1FFC0
+
+#define HW_ACCESS_PART0_SIZE_ADDR           0x1FFC0
+#define HW_ACCESS_PART0_START_ADDR          0x1FFC4
+#define HW_ACCESS_PART1_SIZE_ADDR           0x1FFC8
+#define HW_ACCESS_PART1_START_ADDR          0x1FFCC
+
+#define HW_ACCESS_REGISTER_SIZE             4
+
+#define HW_ACCESS_PRAM_MAX_RANGE		0x3c000
+
+#define WSPI_CMD_READ                 0x40000000
+#define WSPI_CMD_WRITE                0x00000000
+#define WSPI_CMD_FIXED                0x20000000
+#define WSPI_CMD_BYTE_LENGTH          0x1FFE0000
+#define WSPI_CMD_BYTE_LENGTH_OFFSET   17
+#define WSPI_CMD_BYTE_ADDR            0x0001FFFF
+
+#define WSPI_INIT_CMD_CRC_LEN       5
+
+#define WSPI_INIT_CMD_START         0x00
+#define WSPI_INIT_CMD_TX            0x40
+/* the extra bypass bit is sampled by the TNET as '1' */
+#define WSPI_INIT_CMD_BYPASS_BIT    0x80
+#define WSPI_INIT_CMD_FIXEDBUSY_LEN 0x07
+#define WSPI_INIT_CMD_EN_FIXEDBUSY  0x80
+#define WSPI_INIT_CMD_DIS_FIXEDBUSY 0x00
+#define WSPI_INIT_CMD_IOD           0x40
+#define WSPI_INIT_CMD_IP            0x20
+#define WSPI_INIT_CMD_CS            0x10
+#define WSPI_INIT_CMD_WS            0x08
+#define WSPI_INIT_CMD_WSPI          0x01
+#define WSPI_INIT_CMD_END           0x01
+
+#define WSPI_INIT_CMD_LEN           8
+
+#define TNETWIF_READ_OFFSET_BYTES  8
+#define HW_ACCESS_WSPI_FIXED_BUSY_LEN \
+		((TNETWIF_READ_OFFSET_BYTES - 4) / sizeof(u32))
+#define HW_ACCESS_WSPI_INIT_CMD_MASK  0
+
+
+/* Raw target IO, address is not translated */
+void wl12xx_spi_read(struct wl12xx *wl, int addr, void *buf, size_t len);
+void wl12xx_spi_write(struct wl12xx *wl, int addr, void *buf, size_t len);
+
+/* Memory target IO, address is tranlated to partition 0 */
+void wl12xx_spi_mem_read(struct wl12xx *wl, int addr, void *buf, size_t len);
+void wl12xx_spi_mem_write(struct wl12xx *wl, int addr, void *buf, size_t len);
+u32 wl12xx_mem_read32(struct wl12xx *wl, int addr);
+void wl12xx_mem_write32(struct wl12xx *wl, int addr, u32 val);
+
+/* Registers IO */
+u32 wl12xx_reg_read32(struct wl12xx *wl, int addr);
+void wl12xx_reg_write32(struct wl12xx *wl, int addr, u32 val);
+
+/* INIT and RESET words */
+void wl12xx_spi_reset(struct wl12xx *wl);
+void wl12xx_spi_init(struct wl12xx *wl);
+void wl12xx_set_partition(struct wl12xx *wl,
+			  u32 part_start, u32 part_size,
+			  u32 reg_start,  u32 reg_size);
+
+static inline u32 wl12xx_read32(struct wl12xx *wl, int addr)
+{
+	u32 response;
+
+	wl12xx_spi_read(wl, addr, &response, sizeof(u32));
+
+	return response;
+}
+
+static inline void wl12xx_write32(struct wl12xx *wl, int addr, u32 val)
+{
+	wl12xx_spi_write(wl, addr, &val, sizeof(u32));
+}
+
+#endif /* __WL12XX_SPI_H__ */
diff --git a/drivers/net/wireless/wl12xx/tx.c b/drivers/net/wireless/wl12xx/tx.c
new file mode 100644
index 00000000000..62145e205a8
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/tx.c
@@ -0,0 +1,557 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "wl12xx.h"
+#include "reg.h"
+#include "spi.h"
+#include "tx.h"
+#include "ps.h"
+
+static bool wl12xx_tx_double_buffer_busy(struct wl12xx *wl, u32 data_out_count)
+{
+	int used, data_in_count;
+
+	data_in_count = wl->data_in_count;
+
+	if (data_in_count < data_out_count)
+		/* data_in_count has wrapped */
+		data_in_count += TX_STATUS_DATA_OUT_COUNT_MASK + 1;
+
+	used = data_in_count - data_out_count;
+
+	WARN_ON(used < 0);
+	WARN_ON(used > DP_TX_PACKET_RING_CHUNK_NUM);
+
+	if (used >= DP_TX_PACKET_RING_CHUNK_NUM)
+		return true;
+	else
+		return false;
+}
+
+static int wl12xx_tx_path_status(struct wl12xx *wl)
+{
+	u32 status, addr, data_out_count;
+	bool busy;
+
+	addr = wl->data_path->tx_control_addr;
+	status = wl12xx_mem_read32(wl, addr);
+	data_out_count = status & TX_STATUS_DATA_OUT_COUNT_MASK;
+	busy = wl12xx_tx_double_buffer_busy(wl, data_out_count);
+
+	if (busy)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int wl12xx_tx_id(struct wl12xx *wl, struct sk_buff *skb)
+{
+	int i;
+
+	for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++)
+		if (wl->tx_frames[i] == NULL) {
+			wl->tx_frames[i] = skb;
+			return i;
+		}
+
+	return -EBUSY;
+}
+
+static void wl12xx_tx_control(struct tx_double_buffer_desc *tx_hdr,
+			      struct ieee80211_tx_info *control, u16 fc)
+{
+	*(u16 *)&tx_hdr->control = 0;
+
+	tx_hdr->control.rate_policy = 0;
+
+	/* 802.11 packets */
+	tx_hdr->control.packet_type = 0;
+
+	if (control->flags & IEEE80211_TX_CTL_NO_ACK)
+		tx_hdr->control.ack_policy = 1;
+
+	tx_hdr->control.tx_complete = 1;
+
+	if ((fc & IEEE80211_FTYPE_DATA) &&
+	    ((fc & IEEE80211_STYPE_QOS_DATA) ||
+	     (fc & IEEE80211_STYPE_QOS_NULLFUNC)))
+		tx_hdr->control.qos = 1;
+}
+
+/* RSN + MIC = 8 + 8 = 16 bytes (worst case - AES). */
+#define MAX_MSDU_SECURITY_LENGTH      16
+#define MAX_MPDU_SECURITY_LENGTH      16
+#define WLAN_QOS_HDR_LEN              26
+#define MAX_MPDU_HEADER_AND_SECURITY  (MAX_MPDU_SECURITY_LENGTH + \
+				       WLAN_QOS_HDR_LEN)
+#define HW_BLOCK_SIZE                 252
+static void wl12xx_tx_frag_block_num(struct tx_double_buffer_desc *tx_hdr)
+{
+	u16 payload_len, frag_threshold, mem_blocks;
+	u16 num_mpdus, mem_blocks_per_frag;
+
+	frag_threshold = IEEE80211_MAX_FRAG_THRESHOLD;
+	tx_hdr->frag_threshold = cpu_to_le16(frag_threshold);
+
+	payload_len = tx_hdr->length + MAX_MSDU_SECURITY_LENGTH;
+
+	if (payload_len > frag_threshold) {
+		mem_blocks_per_frag =
+			((frag_threshold + MAX_MPDU_HEADER_AND_SECURITY) /
+			 HW_BLOCK_SIZE) + 1;
+		num_mpdus = payload_len / frag_threshold;
+		mem_blocks = num_mpdus * mem_blocks_per_frag;
+		payload_len -= num_mpdus * frag_threshold;
+		num_mpdus++;
+
+	} else {
+		mem_blocks_per_frag = 0;
+		mem_blocks = 0;
+		num_mpdus = 1;
+	}
+
+	mem_blocks += (payload_len / HW_BLOCK_SIZE) + 1;
+
+	if (num_mpdus > 1)
+		mem_blocks += min(num_mpdus, mem_blocks_per_frag);
+
+	tx_hdr->num_mem_blocks = mem_blocks;
+}
+
+static int wl12xx_tx_fill_hdr(struct wl12xx *wl, struct sk_buff *skb,
+			      struct ieee80211_tx_info *control)
+{
+	struct tx_double_buffer_desc *tx_hdr;
+	struct ieee80211_rate *rate;
+	int id;
+	u16 fc;
+
+	if (!skb)
+		return -EINVAL;
+
+	id = wl12xx_tx_id(wl, skb);
+	if (id < 0)
+		return id;
+
+	fc = *(u16 *)skb->data;
+	tx_hdr = (struct tx_double_buffer_desc *) skb_push(skb,
+							   sizeof(*tx_hdr));
+
+	tx_hdr->length = cpu_to_le16(skb->len - sizeof(*tx_hdr));
+	rate = ieee80211_get_tx_rate(wl->hw, control);
+	tx_hdr->rate = cpu_to_le16(rate->hw_value);
+	tx_hdr->expiry_time = cpu_to_le32(1 << 16);
+	tx_hdr->id = id;
+
+	/* FIXME: how to get the correct queue id? */
+	tx_hdr->xmit_queue = 0;
+
+	wl12xx_tx_control(tx_hdr, control, fc);
+	wl12xx_tx_frag_block_num(tx_hdr);
+
+	return 0;
+}
+
+/* We copy the packet to the target */
+static int wl12xx_tx_send_packet(struct wl12xx *wl, struct sk_buff *skb,
+				 struct ieee80211_tx_info *control)
+{
+	struct tx_double_buffer_desc *tx_hdr;
+	int len;
+	u32 addr;
+
+	if (!skb)
+		return -EINVAL;
+
+	tx_hdr = (struct tx_double_buffer_desc *) skb->data;
+
+	if (control->control.hw_key &&
+	    control->control.hw_key->alg == ALG_TKIP) {
+		int hdrlen;
+		u16 fc;
+		u8 *pos;
+
+		fc = *(u16 *)(skb->data + sizeof(*tx_hdr));
+		tx_hdr->length += WL12XX_TKIP_IV_SPACE;
+
+		hdrlen = ieee80211_hdrlen(fc);
+
+		pos = skb_push(skb, WL12XX_TKIP_IV_SPACE);
+		memmove(pos, pos + WL12XX_TKIP_IV_SPACE,
+			sizeof(*tx_hdr) + hdrlen);
+	}
+
+	/* Revisit. This is a workaround for getting non-aligned packets.
+	   This happens at least with EAPOL packets from the user space.
+	   Our DMA requires packets to be aligned on a 4-byte boundary.
+	*/
+	if (unlikely((long)skb->data & 0x03)) {
+		int offset = (4 - (long)skb->data) & 0x03;
+		wl12xx_debug(DEBUG_TX, "skb offset %d", offset);
+
+		/* check whether the current skb can be used */
+		if (!skb_cloned(skb) && (skb_tailroom(skb) >= offset)) {
+			unsigned char *src = skb->data;
+
+			/* align the buffer on a 4-byte boundary */
+			skb_reserve(skb, offset);
+			memmove(skb->data, src, skb->len);
+		} else {
+			wl12xx_info("No handler, fixme!");
+			return -EINVAL;
+		}
+	}
+
+	/* Our skb->data at this point includes the HW header */
+	len = WL12XX_TX_ALIGN(skb->len);
+
+	if (wl->data_in_count & 0x1)
+		addr = wl->data_path->tx_packet_ring_addr +
+			wl->data_path->tx_packet_ring_chunk_size;
+	else
+		addr = wl->data_path->tx_packet_ring_addr;
+
+	wl12xx_spi_mem_write(wl, addr, skb->data, len);
+
+	wl12xx_debug(DEBUG_TX, "tx id %u skb 0x%p payload %u rate 0x%x",
+		     tx_hdr->id, skb, tx_hdr->length, tx_hdr->rate);
+
+	return 0;
+}
+
+static void wl12xx_tx_trigger(struct wl12xx *wl)
+{
+	u32 data, addr;
+
+	if (wl->data_in_count & 0x1) {
+		addr = ACX_REG_INTERRUPT_TRIG_H;
+		data = INTR_TRIG_TX_PROC1;
+	} else {
+		addr = ACX_REG_INTERRUPT_TRIG;
+		data = INTR_TRIG_TX_PROC0;
+	}
+
+	wl12xx_reg_write32(wl, addr, data);
+
+	/* Bumping data in */
+	wl->data_in_count = (wl->data_in_count + 1) &
+		TX_STATUS_DATA_OUT_COUNT_MASK;
+}
+
+/* caller must hold wl->mutex */
+static int wl12xx_tx_frame(struct wl12xx *wl, struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info;
+	int ret = 0;
+	u8 idx;
+
+	info = IEEE80211_SKB_CB(skb);
+
+	if (info->control.hw_key) {
+		idx = info->control.hw_key->hw_key_idx;
+		if (unlikely(wl->default_key != idx)) {
+			ret = wl12xx_acx_default_key(wl, idx);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	ret = wl12xx_tx_path_status(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_tx_fill_hdr(wl, skb, info);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_tx_send_packet(wl, skb, info);
+	if (ret < 0)
+		return ret;
+
+	wl12xx_tx_trigger(wl);
+
+	return ret;
+}
+
+void wl12xx_tx_work(struct work_struct *work)
+{
+	struct wl12xx *wl = container_of(work, struct wl12xx, tx_work);
+	struct sk_buff *skb;
+	bool woken_up = false;
+	int ret;
+
+	mutex_lock(&wl->mutex);
+
+	if (unlikely(wl->state == WL12XX_STATE_OFF))
+		goto out;
+
+	while ((skb = skb_dequeue(&wl->tx_queue))) {
+		if (!woken_up) {
+			wl12xx_ps_elp_wakeup(wl);
+			woken_up = true;
+		}
+
+		ret = wl12xx_tx_frame(wl, skb);
+		if (ret == -EBUSY) {
+			/* firmware buffer is full, stop queues */
+			wl12xx_debug(DEBUG_TX, "tx_work: fw buffer full, "
+				     "stop queues");
+			ieee80211_stop_queues(wl->hw);
+			wl->tx_queue_stopped = true;
+			skb_queue_head(&wl->tx_queue, skb);
+			goto out;
+		} else if (ret < 0) {
+			dev_kfree_skb(skb);
+			goto out;
+		}
+	}
+
+out:
+	if (woken_up)
+		wl12xx_ps_elp_sleep(wl);
+
+	mutex_unlock(&wl->mutex);
+}
+
+static const char *wl12xx_tx_parse_status(u8 status)
+{
+	/* 8 bit status field, one character per bit plus null */
+	static char buf[9];
+	int i = 0;
+
+	memset(buf, 0, sizeof(buf));
+
+	if (status & TX_DMA_ERROR)
+		buf[i++] = 'm';
+	if (status & TX_DISABLED)
+		buf[i++] = 'd';
+	if (status & TX_RETRY_EXCEEDED)
+		buf[i++] = 'r';
+	if (status & TX_TIMEOUT)
+		buf[i++] = 't';
+	if (status & TX_KEY_NOT_FOUND)
+		buf[i++] = 'k';
+	if (status & TX_ENCRYPT_FAIL)
+		buf[i++] = 'e';
+	if (status & TX_UNAVAILABLE_PRIORITY)
+		buf[i++] = 'p';
+
+	/* bit 0 is unused apparently */
+
+	return buf;
+}
+
+static void wl12xx_tx_packet_cb(struct wl12xx *wl,
+				struct tx_result *result)
+{
+	struct ieee80211_tx_info *info;
+	struct sk_buff *skb;
+	int hdrlen, ret;
+	u8 *frame;
+
+	skb = wl->tx_frames[result->id];
+	if (skb == NULL) {
+		wl12xx_error("SKB for packet %d is NULL", result->id);
+		return;
+	}
+
+	info = IEEE80211_SKB_CB(skb);
+
+	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
+	    (result->status == TX_SUCCESS))
+		info->flags |= IEEE80211_TX_STAT_ACK;
+
+	info->status.rates[0].count = result->ack_failures + 1;
+	wl->stats.retry_count += result->ack_failures;
+
+	/*
+	 * We have to remove our private TX header before pushing
+	 * the skb back to mac80211.
+	 */
+	frame = skb_pull(skb, sizeof(struct tx_double_buffer_desc));
+	if (info->control.hw_key &&
+	    info->control.hw_key->alg == ALG_TKIP) {
+		hdrlen = ieee80211_get_hdrlen_from_skb(skb);
+		memmove(frame + WL12XX_TKIP_IV_SPACE, frame, hdrlen);
+		skb_pull(skb, WL12XX_TKIP_IV_SPACE);
+	}
+
+	wl12xx_debug(DEBUG_TX, "tx status id %u skb 0x%p failures %u rate 0x%x"
+		     " status 0x%x (%s)",
+		     result->id, skb, result->ack_failures, result->rate,
+		     result->status, wl12xx_tx_parse_status(result->status));
+
+
+	ieee80211_tx_status(wl->hw, skb);
+
+	wl->tx_frames[result->id] = NULL;
+
+	if (wl->tx_queue_stopped) {
+		wl12xx_debug(DEBUG_TX, "cb: queue was stopped");
+
+		skb = skb_dequeue(&wl->tx_queue);
+
+		/* The skb can be NULL because tx_work might have been
+		   scheduled before the queue was stopped making the
+		   queue empty */
+
+		if (skb) {
+			ret = wl12xx_tx_frame(wl, skb);
+			if (ret == -EBUSY) {
+				/* firmware buffer is still full */
+				wl12xx_debug(DEBUG_TX, "cb: fw buffer "
+					     "still full");
+				skb_queue_head(&wl->tx_queue, skb);
+				return;
+			} else if (ret < 0) {
+				dev_kfree_skb(skb);
+				return;
+			}
+		}
+
+		wl12xx_debug(DEBUG_TX, "cb: waking queues");
+		ieee80211_wake_queues(wl->hw);
+		wl->tx_queue_stopped = false;
+	}
+}
+
+/* Called upon reception of a TX complete interrupt */
+void wl12xx_tx_complete(struct wl12xx *wl)
+{
+	int i, result_index, num_complete = 0;
+	struct tx_result result[FW_TX_CMPLT_BLOCK_SIZE], *result_ptr;
+
+	if (unlikely(wl->state != WL12XX_STATE_ON))
+		return;
+
+	/* First we read the result */
+	wl12xx_spi_mem_read(wl, wl->data_path->tx_complete_addr,
+			    result, sizeof(result));
+
+	result_index = wl->next_tx_complete;
+
+	for (i = 0; i < ARRAY_SIZE(result); i++) {
+		result_ptr = &result[result_index];
+
+		if (result_ptr->done_1 == 1 &&
+		    result_ptr->done_2 == 1) {
+			wl12xx_tx_packet_cb(wl, result_ptr);
+
+			result_ptr->done_1 = 0;
+			result_ptr->done_2 = 0;
+
+			result_index = (result_index + 1) &
+				(FW_TX_CMPLT_BLOCK_SIZE - 1);
+			num_complete++;
+		} else {
+			break;
+		}
+	}
+
+	/* Every completed frame needs to be acknowledged */
+	if (num_complete) {
+		/*
+		 * If we've wrapped, we have to clear
+		 * the results in 2 steps.
+		 */
+		if (result_index > wl->next_tx_complete) {
+			/* Only 1 write is needed */
+			wl12xx_spi_mem_write(wl,
+					     wl->data_path->tx_complete_addr +
+					     (wl->next_tx_complete *
+					      sizeof(struct tx_result)),
+					     &result[wl->next_tx_complete],
+					     num_complete *
+					     sizeof(struct tx_result));
+
+
+		} else if (result_index < wl->next_tx_complete) {
+			/* 2 writes are needed */
+			wl12xx_spi_mem_write(wl,
+					     wl->data_path->tx_complete_addr +
+					     (wl->next_tx_complete *
+					      sizeof(struct tx_result)),
+					     &result[wl->next_tx_complete],
+					     (FW_TX_CMPLT_BLOCK_SIZE -
+					      wl->next_tx_complete) *
+					     sizeof(struct tx_result));
+
+			wl12xx_spi_mem_write(wl,
+					     wl->data_path->tx_complete_addr,
+					     result,
+					     (num_complete -
+					      FW_TX_CMPLT_BLOCK_SIZE +
+					      wl->next_tx_complete) *
+					     sizeof(struct tx_result));
+
+		} else {
+			/* We have to write the whole array */
+			wl12xx_spi_mem_write(wl,
+					     wl->data_path->tx_complete_addr,
+					     result,
+					     FW_TX_CMPLT_BLOCK_SIZE *
+					     sizeof(struct tx_result));
+		}
+
+	}
+
+	wl->next_tx_complete = result_index;
+}
+
+/* caller must hold wl->mutex */
+void wl12xx_tx_flush(struct wl12xx *wl)
+{
+	int i;
+	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
+
+	/* TX failure */
+/* 	control->flags = 0; FIXME */
+
+	while ((skb = skb_dequeue(&wl->tx_queue))) {
+		info = IEEE80211_SKB_CB(skb);
+
+		wl12xx_debug(DEBUG_TX, "flushing skb 0x%p", skb);
+
+		if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS))
+				continue;
+
+		ieee80211_tx_status(wl->hw, skb);
+	}
+
+	for (i = 0; i < FW_TX_CMPLT_BLOCK_SIZE; i++)
+		if (wl->tx_frames[i] != NULL) {
+			skb = wl->tx_frames[i];
+			info = IEEE80211_SKB_CB(skb);
+
+			if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS))
+				continue;
+
+			ieee80211_tx_status(wl->hw, skb);
+			wl->tx_frames[i] = NULL;
+		}
+}
diff --git a/drivers/net/wireless/wl12xx/tx.h b/drivers/net/wireless/wl12xx/tx.h
new file mode 100644
index 00000000000..dc82691f4c1
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/tx.h
@@ -0,0 +1,215 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_TX_H__
+#define __WL12XX_TX_H__
+
+#include <linux/bitops.h>
+
+/*
+ *
+ * TX PATH
+ *
+ * The Tx path uses a double buffer and a tx_control structure, each located
+ * at a fixed address in the device's memory. On startup, the host retrieves
+ * the pointers to these addresses. A double buffer allows for continuous data
+ * flow towards the device. The host keeps track of which buffer is available
+ * and alternates between these two buffers on a per packet basis.
+ *
+ * The size of each of the two buffers is large enough to hold the longest
+ * 802.3 packet - maximum size Ethernet packet + header + descriptor.
+ * TX complete indication will be received a-synchronously in a TX done cyclic
+ * buffer which is composed of 16 tx_result descriptors structures and is used
+ * in a cyclic manner.
+ *
+ * The TX (HOST) procedure is as follows:
+ * 1. Read the Tx path status, that will give the data_out_count.
+ * 2. goto 1, if not possible.
+ *    i.e. if data_in_count - data_out_count >= HwBuffer size (2 for double
+ *    buffer).
+ * 3. Copy the packet (preceded by double_buffer_desc), if possible.
+ *    i.e. if data_in_count - data_out_count < HwBuffer size (2 for double
+ *    buffer).
+ * 4. increment data_in_count.
+ * 5. Inform the firmware by generating a firmware internal interrupt.
+ * 6. FW will increment data_out_count after it reads the buffer.
+ *
+ * The TX Complete procedure:
+ * 1. To get a TX complete indication the host enables the tx_complete flag in
+ *    the TX descriptor Structure.
+ * 2. For each packet with a Tx Complete field set, the firmware adds the
+ *    transmit results to the cyclic buffer (txDoneRing) and sets both done_1
+ *    and done_2 to 1 to indicate driver ownership.
+ * 3. The firmware sends a Tx Complete interrupt to the host to trigger the
+ *    host to process the new data. Note: interrupt will be send per packet if
+ *    TX complete indication was requested in tx_control or per crossing
+ *    aggregation threshold.
+ * 4. After receiving the Tx Complete interrupt, the host reads the
+ *    TxDescriptorDone information in a cyclic manner and clears both done_1
+ *    and done_2 fields.
+ *
+ */
+
+#define TX_COMPLETE_REQUIRED_BIT	0x80
+#define TX_STATUS_DATA_OUT_COUNT_MASK   0xf
+#define WL12XX_TX_ALIGN_TO 4
+#define WL12XX_TX_ALIGN(len) (((len) + WL12XX_TX_ALIGN_TO - 1) & \
+			     ~(WL12XX_TX_ALIGN_TO - 1))
+#define WL12XX_TKIP_IV_SPACE 4
+
+struct tx_control {
+	/* Rate Policy (class) index */
+	unsigned rate_policy:3;
+
+	/* When set, no ack policy is expected */
+	unsigned ack_policy:1;
+
+	/*
+	 * Packet type:
+	 * 0 -> 802.11
+	 * 1 -> 802.3
+	 * 2 -> IP
+	 * 3 -> raw codec
+	 */
+	unsigned packet_type:2;
+
+	/* If set, this is a QoS-Null or QoS-Data frame */
+	unsigned qos:1;
+
+	/*
+	 * If set, the target triggers the tx complete INT
+	 * upon frame sending completion.
+	 */
+	unsigned tx_complete:1;
+
+	/* 2 bytes padding before packet header */
+	unsigned xfer_pad:1;
+
+	unsigned reserved:7;
+} __attribute__ ((packed));
+
+
+struct tx_double_buffer_desc {
+	/* Length of payload, including headers. */
+	u16 length;
+
+	/*
+	 * A bit mask that specifies the initial rate to be used
+	 * Possible values are:
+	 * 0x0001 - 1Mbits
+	 * 0x0002 - 2Mbits
+	 * 0x0004 - 5.5Mbits
+	 * 0x0008 - 6Mbits
+	 * 0x0010 - 9Mbits
+	 * 0x0020 - 11Mbits
+	 * 0x0040 - 12Mbits
+	 * 0x0080 - 18Mbits
+	 * 0x0100 - 22Mbits
+	 * 0x0200 - 24Mbits
+	 * 0x0400 - 36Mbits
+	 * 0x0800 - 48Mbits
+	 * 0x1000 - 54Mbits
+	 */
+	u16 rate;
+
+	/* Time in us that a packet can spend in the target */
+	u32 expiry_time;
+
+	/* index of the TX queue used for this packet */
+	u8 xmit_queue;
+
+	/* Used to identify a packet */
+	u8 id;
+
+	struct tx_control control;
+
+	/*
+	 * The FW should cut the packet into fragments
+	 * of this size.
+	 */
+	u16 frag_threshold;
+
+	/* Numbers of HW queue blocks to be allocated */
+	u8 num_mem_blocks;
+
+	u8 reserved;
+} __attribute__ ((packed));
+
+enum {
+	TX_SUCCESS              = 0,
+	TX_DMA_ERROR            = BIT(7),
+	TX_DISABLED             = BIT(6),
+	TX_RETRY_EXCEEDED       = BIT(5),
+	TX_TIMEOUT              = BIT(4),
+	TX_KEY_NOT_FOUND        = BIT(3),
+	TX_ENCRYPT_FAIL         = BIT(2),
+	TX_UNAVAILABLE_PRIORITY = BIT(1),
+};
+
+struct tx_result {
+	/*
+	 * Ownership synchronization between the host and
+	 * the firmware. If done_1 and done_2 are cleared,
+	 * owned by the FW (no info ready).
+	 */
+	u8 done_1;
+
+	/* same as double_buffer_desc->id */
+	u8 id;
+
+	/*
+	 * Total air access duration consumed by this
+	 * packet, including all retries and overheads.
+	 */
+	u16 medium_usage;
+
+	/* Total media delay (from 1st EDCA AIFS counter until TX Complete). */
+	u32 medium_delay;
+
+	/* Time between host xfer and tx complete */
+	u32 fw_hnadling_time;
+
+	/* The LS-byte of the last TKIP sequence number. */
+	u8 lsb_seq_num;
+
+	/* Retry count */
+	u8 ack_failures;
+
+	/* At which rate we got a ACK */
+	u16 rate;
+
+	u16 reserved;
+
+	/* TX_* */
+	u8 status;
+
+	/* See done_1 */
+	u8 done_2;
+} __attribute__ ((packed));
+
+void wl12xx_tx_work(struct work_struct *work);
+void wl12xx_tx_complete(struct wl12xx *wl);
+void wl12xx_tx_flush(struct wl12xx *wl);
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/wl1251.c b/drivers/net/wireless/wl12xx/wl1251.c
new file mode 100644
index 00000000000..bd0decdcad2
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/wl1251.c
@@ -0,0 +1,709 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "wl1251.h"
+#include "reg.h"
+#include "spi.h"
+#include "boot.h"
+#include "event.h"
+#include "acx.h"
+#include "tx.h"
+#include "rx.h"
+#include "ps.h"
+#include "init.h"
+
+static struct wl12xx_partition_set wl1251_part_table[PART_TABLE_LEN] = {
+	[PART_DOWN] = {
+		.mem = {
+			.start = 0x00000000,
+			.size  = 0x00016800
+		},
+		.reg = {
+			.start = REGISTERS_BASE,
+			.size  = REGISTERS_DOWN_SIZE
+		},
+	},
+
+	[PART_WORK] = {
+		.mem = {
+			.start = 0x00028000,
+			.size  = 0x00014000
+		},
+		.reg = {
+			.start = REGISTERS_BASE,
+			.size  = REGISTERS_WORK_SIZE
+		},
+	},
+
+	/* WL1251 doesn't use the DRPW partition, so we don't set it here */
+};
+
+static enum wl12xx_acx_int_reg wl1251_acx_reg_table[ACX_REG_TABLE_LEN] = {
+	[ACX_REG_INTERRUPT_TRIG]     = (REGISTERS_BASE + 0x0474),
+	[ACX_REG_INTERRUPT_TRIG_H]   = (REGISTERS_BASE + 0x0478),
+	[ACX_REG_INTERRUPT_MASK]     = (REGISTERS_BASE + 0x0494),
+	[ACX_REG_HINT_MASK_SET]      = (REGISTERS_BASE + 0x0498),
+	[ACX_REG_HINT_MASK_CLR]      = (REGISTERS_BASE + 0x049C),
+	[ACX_REG_INTERRUPT_NO_CLEAR] = (REGISTERS_BASE + 0x04B0),
+	[ACX_REG_INTERRUPT_CLEAR]    = (REGISTERS_BASE + 0x04A4),
+	[ACX_REG_INTERRUPT_ACK]      = (REGISTERS_BASE + 0x04A8),
+	[ACX_REG_SLV_SOFT_RESET]     = (REGISTERS_BASE + 0x0000),
+	[ACX_REG_EE_START]           = (REGISTERS_BASE + 0x080C),
+	[ACX_REG_ECPU_CONTROL]       = (REGISTERS_BASE + 0x0804)
+};
+
+static int wl1251_upload_firmware(struct wl12xx *wl)
+{
+	struct wl12xx_partition_set *p_table = wl->chip.p_table;
+	int addr, chunk_num, partition_limit;
+	size_t fw_data_len;
+	u8 *p;
+
+	/* whal_FwCtrl_LoadFwImageSm() */
+
+	wl12xx_debug(DEBUG_BOOT, "chip id before fw upload: 0x%x",
+		     wl12xx_reg_read32(wl, CHIP_ID_B));
+
+	/* 10.0 check firmware length and set partition */
+	fw_data_len =  (wl->fw[4] << 24) | (wl->fw[5] << 16) |
+		(wl->fw[6] << 8) | (wl->fw[7]);
+
+	wl12xx_debug(DEBUG_BOOT, "fw_data_len %d chunk_size %d", fw_data_len,
+		CHUNK_SIZE);
+
+	if ((fw_data_len % 4) != 0) {
+		wl12xx_error("firmware length not multiple of four");
+		return -EIO;
+	}
+
+	wl12xx_set_partition(wl,
+			     p_table[PART_DOWN].mem.start,
+			     p_table[PART_DOWN].mem.size,
+			     p_table[PART_DOWN].reg.start,
+			     p_table[PART_DOWN].reg.size);
+
+	/* 10.1 set partition limit and chunk num */
+	chunk_num = 0;
+	partition_limit = p_table[PART_DOWN].mem.size;
+
+	while (chunk_num < fw_data_len / CHUNK_SIZE) {
+		/* 10.2 update partition, if needed */
+		addr = p_table[PART_DOWN].mem.start +
+			(chunk_num + 2) * CHUNK_SIZE;
+		if (addr > partition_limit) {
+			addr = p_table[PART_DOWN].mem.start +
+				chunk_num * CHUNK_SIZE;
+			partition_limit = chunk_num * CHUNK_SIZE +
+				p_table[PART_DOWN].mem.size;
+			wl12xx_set_partition(wl,
+					     addr,
+					     p_table[PART_DOWN].mem.size,
+					     p_table[PART_DOWN].reg.start,
+					     p_table[PART_DOWN].reg.size);
+		}
+
+		/* 10.3 upload the chunk */
+		addr = p_table[PART_DOWN].mem.start + chunk_num * CHUNK_SIZE;
+		p = wl->fw + FW_HDR_SIZE + chunk_num * CHUNK_SIZE;
+		wl12xx_debug(DEBUG_BOOT, "uploading fw chunk 0x%p to 0x%x",
+			     p, addr);
+		wl12xx_spi_mem_write(wl, addr, p, CHUNK_SIZE);
+
+		chunk_num++;
+	}
+
+	/* 10.4 upload the last chunk */
+	addr = p_table[PART_DOWN].mem.start + chunk_num * CHUNK_SIZE;
+	p = wl->fw + FW_HDR_SIZE + chunk_num * CHUNK_SIZE;
+	wl12xx_debug(DEBUG_BOOT, "uploading fw last chunk (%d B) 0x%p to 0x%x",
+		     fw_data_len % CHUNK_SIZE, p, addr);
+	wl12xx_spi_mem_write(wl, addr, p, fw_data_len % CHUNK_SIZE);
+
+	return 0;
+}
+
+static int wl1251_upload_nvs(struct wl12xx *wl)
+{
+	size_t nvs_len, nvs_bytes_written, burst_len;
+	int nvs_start, i;
+	u32 dest_addr, val;
+	u8 *nvs_ptr, *nvs;
+
+	nvs = wl->nvs;
+	if (nvs == NULL)
+		return -ENODEV;
+
+	nvs_ptr = nvs;
+
+	nvs_len = wl->nvs_len;
+	nvs_start = wl->fw_len;
+
+	/*
+	 * Layout before the actual NVS tables:
+	 * 1 byte : burst length.
+	 * 2 bytes: destination address.
+	 * n bytes: data to burst copy.
+	 *
+	 * This is ended by a 0 length, then the NVS tables.
+	 */
+
+	while (nvs_ptr[0]) {
+		burst_len = nvs_ptr[0];
+		dest_addr = (nvs_ptr[1] & 0xfe) | ((u32)(nvs_ptr[2] << 8));
+
+		/* We move our pointer to the data */
+		nvs_ptr += 3;
+
+		for (i = 0; i < burst_len; i++) {
+			val = (nvs_ptr[0] | (nvs_ptr[1] << 8)
+			       | (nvs_ptr[2] << 16) | (nvs_ptr[3] << 24));
+
+			wl12xx_debug(DEBUG_BOOT,
+				     "nvs burst write 0x%x: 0x%x",
+				     dest_addr, val);
+			wl12xx_mem_write32(wl, dest_addr, val);
+
+			nvs_ptr += 4;
+			dest_addr += 4;
+		}
+	}
+
+	/*
+	 * We've reached the first zero length, the first NVS table
+	 * is 7 bytes further.
+	 */
+	nvs_ptr += 7;
+	nvs_len -= nvs_ptr - nvs;
+	nvs_len = ALIGN(nvs_len, 4);
+
+	/* Now we must set the partition correctly */
+	wl12xx_set_partition(wl, nvs_start,
+			     wl->chip.p_table[PART_DOWN].mem.size,
+			     wl->chip.p_table[PART_DOWN].reg.start,
+			     wl->chip.p_table[PART_DOWN].reg.size);
+
+	/* And finally we upload the NVS tables */
+	nvs_bytes_written = 0;
+	while (nvs_bytes_written < nvs_len) {
+		val = (nvs_ptr[0] | (nvs_ptr[1] << 8)
+		       | (nvs_ptr[2] << 16) | (nvs_ptr[3] << 24));
+
+		val = cpu_to_le32(val);
+
+		wl12xx_debug(DEBUG_BOOT,
+			     "nvs write table 0x%x: 0x%x",
+			     nvs_start, val);
+		wl12xx_mem_write32(wl, nvs_start, val);
+
+		nvs_ptr += 4;
+		nvs_bytes_written += 4;
+		nvs_start += 4;
+	}
+
+	return 0;
+}
+
+static int wl1251_boot(struct wl12xx *wl)
+{
+	int ret = 0, minor_minor_e2_ver;
+	u32 tmp, boot_data;
+
+	ret = wl12xx_boot_soft_reset(wl);
+	if (ret < 0)
+		goto out;
+
+	/* 2. start processing NVS file */
+	ret = wl->chip.op_upload_nvs(wl);
+	if (ret < 0)
+		goto out;
+
+	/* write firmware's last address (ie. it's length) to
+	 * ACX_EEPROMLESS_IND_REG */
+	wl12xx_reg_write32(wl, ACX_EEPROMLESS_IND_REG, wl->fw_len);
+
+	/* 6. read the EEPROM parameters */
+	tmp = wl12xx_reg_read32(wl, SCR_PAD2);
+
+	/* 7. read bootdata */
+	wl->boot_attr.radio_type = (tmp & 0x0000FF00) >> 8;
+	wl->boot_attr.major = (tmp & 0x00FF0000) >> 16;
+	tmp = wl12xx_reg_read32(wl, SCR_PAD3);
+
+	/* 8. check bootdata and call restart sequence */
+	wl->boot_attr.minor = (tmp & 0x00FF0000) >> 16;
+	minor_minor_e2_ver = (tmp & 0xFF000000) >> 24;
+
+	wl12xx_debug(DEBUG_BOOT, "radioType 0x%x majorE2Ver 0x%x "
+		     "minorE2Ver 0x%x minor_minor_e2_ver 0x%x",
+		     wl->boot_attr.radio_type, wl->boot_attr.major,
+		     wl->boot_attr.minor, minor_minor_e2_ver);
+
+	ret = wl12xx_boot_init_seq(wl);
+	if (ret < 0)
+		goto out;
+
+	/* 9. NVS processing done */
+	boot_data = wl12xx_reg_read32(wl, ACX_REG_ECPU_CONTROL);
+
+	wl12xx_debug(DEBUG_BOOT, "halt boot_data 0x%x", boot_data);
+
+	/* 10. check that ECPU_CONTROL_HALT bits are set in
+	 * pWhalBus->uBootData and start uploading firmware
+	 */
+	if ((boot_data & ECPU_CONTROL_HALT) == 0) {
+		wl12xx_error("boot failed, ECPU_CONTROL_HALT not set");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = wl->chip.op_upload_fw(wl);
+	if (ret < 0)
+		goto out;
+
+	/* 10.5 start firmware */
+	ret = wl12xx_boot_run_firmware(wl);
+	if (ret < 0)
+		goto out;
+
+	/* Get and save the firmware version */
+	wl12xx_acx_fw_version(wl, wl->chip.fw_ver, sizeof(wl->chip.fw_ver));
+
+out:
+	return ret;
+}
+
+static int wl1251_mem_cfg(struct wl12xx *wl)
+{
+	struct wl1251_acx_config_memory mem_conf;
+	int ret, i;
+
+	wl12xx_debug(DEBUG_ACX, "wl1251 mem cfg");
+
+	/* memory config */
+	mem_conf.mem_config.num_stations = cpu_to_le16(DEFAULT_NUM_STATIONS);
+	mem_conf.mem_config.rx_mem_block_num = 35;
+	mem_conf.mem_config.tx_min_mem_block_num = 64;
+	mem_conf.mem_config.num_tx_queues = MAX_TX_QUEUES;
+	mem_conf.mem_config.host_if_options = HOSTIF_PKT_RING;
+	mem_conf.mem_config.num_ssid_profiles = 1;
+	mem_conf.mem_config.debug_buffer_size =
+		cpu_to_le16(TRACE_BUFFER_MAX_SIZE);
+
+	/* RX queue config */
+	mem_conf.rx_queue_config.dma_address = 0;
+	mem_conf.rx_queue_config.num_descs = ACX_RX_DESC_DEF;
+	mem_conf.rx_queue_config.priority = DEFAULT_RXQ_PRIORITY;
+	mem_conf.rx_queue_config.type = DEFAULT_RXQ_TYPE;
+
+	/* TX queue config */
+	for (i = 0; i < MAX_TX_QUEUES; i++) {
+		mem_conf.tx_queue_config[i].num_descs = ACX_TX_DESC_DEF;
+		mem_conf.tx_queue_config[i].attributes = i;
+	}
+
+	mem_conf.header.id = ACX_MEM_CFG;
+	mem_conf.header.len = sizeof(struct wl1251_acx_config_memory) -
+		sizeof(struct acx_header);
+	mem_conf.header.len -=
+		(MAX_TX_QUEUE_CONFIGS - mem_conf.mem_config.num_tx_queues) *
+		sizeof(struct wl1251_acx_tx_queue_config);
+
+	ret = wl12xx_cmd_configure(wl, &mem_conf,
+				   sizeof(struct wl1251_acx_config_memory));
+	if (ret < 0)
+		wl12xx_warning("wl1251 mem config failed: %d", ret);
+
+	return ret;
+}
+
+static int wl1251_hw_init_mem_config(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl1251_mem_cfg(wl);
+	if (ret < 0)
+		return ret;
+
+	wl->target_mem_map = kzalloc(sizeof(struct wl1251_acx_mem_map),
+					  GFP_KERNEL);
+	if (!wl->target_mem_map) {
+		wl12xx_error("couldn't allocate target memory map");
+		return -ENOMEM;
+	}
+
+	/* we now ask for the firmware built memory map */
+	ret = wl12xx_acx_mem_map(wl, wl->target_mem_map,
+				 sizeof(struct wl1251_acx_mem_map));
+	if (ret < 0) {
+		wl12xx_error("couldn't retrieve firmware memory map");
+		kfree(wl->target_mem_map);
+		wl->target_mem_map = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+static void wl1251_set_ecpu_ctrl(struct wl12xx *wl, u32 flag)
+{
+	u32 cpu_ctrl;
+
+	/* 10.5.0 run the firmware (I) */
+	cpu_ctrl = wl12xx_reg_read32(wl, ACX_REG_ECPU_CONTROL);
+
+	/* 10.5.1 run the firmware (II) */
+	cpu_ctrl &= ~flag;
+	wl12xx_reg_write32(wl, ACX_REG_ECPU_CONTROL, cpu_ctrl);
+}
+
+static void wl1251_target_enable_interrupts(struct wl12xx *wl)
+{
+	/* Enable target's interrupts */
+	wl->intr_mask = WL1251_ACX_INTR_RX0_DATA |
+		WL1251_ACX_INTR_RX1_DATA |
+		WL1251_ACX_INTR_TX_RESULT |
+		WL1251_ACX_INTR_EVENT_A |
+		WL1251_ACX_INTR_EVENT_B |
+		WL1251_ACX_INTR_INIT_COMPLETE;
+	wl12xx_boot_target_enable_interrupts(wl);
+}
+
+static void wl1251_irq_work(struct work_struct *work)
+{
+	u32 intr;
+	struct wl12xx *wl =
+		container_of(work, struct wl12xx, irq_work);
+
+	mutex_lock(&wl->mutex);
+
+	wl12xx_debug(DEBUG_IRQ, "IRQ work");
+
+	if (wl->state == WL12XX_STATE_OFF)
+		goto out;
+
+	wl12xx_ps_elp_wakeup(wl);
+
+	wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, WL1251_ACX_INTR_ALL);
+
+	intr = wl12xx_reg_read32(wl, ACX_REG_INTERRUPT_CLEAR);
+	wl12xx_debug(DEBUG_IRQ, "intr: 0x%x", intr);
+
+	if (wl->data_path) {
+		wl12xx_spi_mem_read(wl, wl->data_path->rx_control_addr,
+				    &wl->rx_counter, sizeof(u32));
+
+		/* We handle a frmware bug here */
+		switch ((wl->rx_counter - wl->rx_handled) & 0xf) {
+		case 0:
+			wl12xx_debug(DEBUG_IRQ, "RX: FW and host in sync");
+			intr &= ~WL1251_ACX_INTR_RX0_DATA;
+			intr &= ~WL1251_ACX_INTR_RX1_DATA;
+			break;
+		case 1:
+			wl12xx_debug(DEBUG_IRQ, "RX: FW +1");
+			intr |= WL1251_ACX_INTR_RX0_DATA;
+			intr &= ~WL1251_ACX_INTR_RX1_DATA;
+			break;
+		case 2:
+			wl12xx_debug(DEBUG_IRQ, "RX: FW +2");
+			intr |= WL1251_ACX_INTR_RX0_DATA;
+			intr |= WL1251_ACX_INTR_RX1_DATA;
+			break;
+		default:
+			wl12xx_warning("RX: FW and host out of sync: %d",
+				       wl->rx_counter - wl->rx_handled);
+			break;
+		}
+
+		wl->rx_handled = wl->rx_counter;
+
+
+		wl12xx_debug(DEBUG_IRQ, "RX counter: %d", wl->rx_counter);
+	}
+
+	intr &= wl->intr_mask;
+
+	if (intr == 0) {
+		wl12xx_debug(DEBUG_IRQ, "INTR is 0");
+		wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK,
+				   ~(wl->intr_mask));
+
+		goto out_sleep;
+	}
+
+	if (intr & WL1251_ACX_INTR_RX0_DATA) {
+		wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_RX0_DATA");
+		wl12xx_rx(wl);
+	}
+
+	if (intr & WL1251_ACX_INTR_RX1_DATA) {
+		wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_RX1_DATA");
+		wl12xx_rx(wl);
+	}
+
+	if (intr & WL1251_ACX_INTR_TX_RESULT) {
+		wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_TX_RESULT");
+		wl12xx_tx_complete(wl);
+	}
+
+	if (intr & (WL1251_ACX_INTR_EVENT_A | WL1251_ACX_INTR_EVENT_B)) {
+		wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_EVENT (0x%x)", intr);
+		if (intr & WL1251_ACX_INTR_EVENT_A)
+			wl12xx_event_handle(wl, 0);
+		else
+			wl12xx_event_handle(wl, 1);
+	}
+
+	if (intr & WL1251_ACX_INTR_INIT_COMPLETE)
+		wl12xx_debug(DEBUG_IRQ, "WL1251_ACX_INTR_INIT_COMPLETE");
+
+	wl12xx_reg_write32(wl, ACX_REG_INTERRUPT_MASK, ~(wl->intr_mask));
+
+out_sleep:
+	wl12xx_ps_elp_sleep(wl);
+out:
+	mutex_unlock(&wl->mutex);
+}
+
+static int wl1251_hw_init_txq_fill(u8 qid,
+				   struct acx_tx_queue_qos_config *config,
+				   u32 num_blocks)
+{
+	config->qid = qid;
+
+	switch (qid) {
+	case QOS_AC_BE:
+		config->high_threshold =
+			(QOS_TX_HIGH_BE_DEF * num_blocks) / 100;
+		config->low_threshold =
+			(QOS_TX_LOW_BE_DEF * num_blocks) / 100;
+		break;
+	case QOS_AC_BK:
+		config->high_threshold =
+			(QOS_TX_HIGH_BK_DEF * num_blocks) / 100;
+		config->low_threshold =
+			(QOS_TX_LOW_BK_DEF * num_blocks) / 100;
+		break;
+	case QOS_AC_VI:
+		config->high_threshold =
+			(QOS_TX_HIGH_VI_DEF * num_blocks) / 100;
+		config->low_threshold =
+			(QOS_TX_LOW_VI_DEF * num_blocks) / 100;
+		break;
+	case QOS_AC_VO:
+		config->high_threshold =
+			(QOS_TX_HIGH_VO_DEF * num_blocks) / 100;
+		config->low_threshold =
+			(QOS_TX_LOW_VO_DEF * num_blocks) / 100;
+		break;
+	default:
+		wl12xx_error("Invalid TX queue id: %d", qid);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wl1251_hw_init_tx_queue_config(struct wl12xx *wl)
+{
+	struct acx_tx_queue_qos_config config;
+	struct wl1251_acx_mem_map *wl_mem_map = wl->target_mem_map;
+	int ret, i;
+
+	wl12xx_debug(DEBUG_ACX, "acx tx queue config");
+
+	config.header.id = ACX_TX_QUEUE_CFG;
+	config.header.len = sizeof(struct acx_tx_queue_qos_config) -
+		sizeof(struct acx_header);
+
+	for (i = 0; i < MAX_NUM_OF_AC; i++) {
+		ret = wl1251_hw_init_txq_fill(i, &config,
+					      wl_mem_map->num_tx_mem_blocks);
+		if (ret < 0)
+			return ret;
+
+		ret = wl12xx_cmd_configure(wl, &config, sizeof(config));
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int wl1251_hw_init_data_path_config(struct wl12xx *wl)
+{
+	int ret;
+
+	/* asking for the data path parameters */
+	wl->data_path = kzalloc(sizeof(struct acx_data_path_params_resp),
+				GFP_KERNEL);
+	if (!wl->data_path) {
+		wl12xx_error("Couldnt allocate data path parameters");
+		return -ENOMEM;
+	}
+
+	ret = wl12xx_acx_data_path_params(wl, wl->data_path);
+	if (ret < 0) {
+		kfree(wl->data_path);
+		wl->data_path = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int wl1251_hw_init(struct wl12xx *wl)
+{
+	struct wl1251_acx_mem_map *wl_mem_map;
+	int ret;
+
+	ret = wl12xx_hw_init_hwenc_config(wl);
+	if (ret < 0)
+		return ret;
+
+	/* Template settings */
+	ret = wl12xx_hw_init_templates_config(wl);
+	if (ret < 0)
+		return ret;
+
+	/* Default memory configuration */
+	ret = wl1251_hw_init_mem_config(wl);
+	if (ret < 0)
+		return ret;
+
+	/* Default data path configuration  */
+	ret = wl1251_hw_init_data_path_config(wl);
+	if (ret < 0)
+		goto out_free_memmap;
+
+	/* RX config */
+	ret = wl12xx_hw_init_rx_config(wl,
+				       RX_CFG_PROMISCUOUS | RX_CFG_TSF,
+				       RX_FILTER_OPTION_DEF);
+	/* RX_CONFIG_OPTION_ANY_DST_ANY_BSS,
+	   RX_FILTER_OPTION_FILTER_ALL); */
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* TX queues config */
+	ret = wl1251_hw_init_tx_queue_config(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* PHY layer config */
+	ret = wl12xx_hw_init_phy_config(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* Beacon filtering */
+	ret = wl12xx_hw_init_beacon_filter(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* Bluetooth WLAN coexistence */
+	ret = wl12xx_hw_init_pta(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* Energy detection */
+	ret = wl12xx_hw_init_energy_detection(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* Beacons and boradcast settings */
+	ret = wl12xx_hw_init_beacon_broadcast(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* Enable data path */
+	ret = wl12xx_cmd_data_path(wl, wl->channel, 1);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	/* Default power state */
+	ret = wl12xx_hw_init_power_auth(wl);
+	if (ret < 0)
+		goto out_free_data_path;
+
+	wl_mem_map = wl->target_mem_map;
+	wl12xx_info("%d tx blocks at 0x%x, %d rx blocks at 0x%x",
+		    wl_mem_map->num_tx_mem_blocks,
+		    wl->data_path->tx_control_addr,
+		    wl_mem_map->num_rx_mem_blocks,
+		    wl->data_path->rx_control_addr);
+
+	return 0;
+
+ out_free_data_path:
+	kfree(wl->data_path);
+
+ out_free_memmap:
+	kfree(wl->target_mem_map);
+
+	return ret;
+}
+
+static int wl1251_plt_init(struct wl12xx *wl)
+{
+	int ret;
+
+	ret = wl1251_hw_init_mem_config(wl);
+	if (ret < 0)
+		return ret;
+
+	ret = wl12xx_cmd_data_path(wl, wl->channel, 1);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+void wl1251_setup(struct wl12xx *wl)
+{
+	/* FIXME: Is it better to use strncpy here or is this ok? */
+	wl->chip.fw_filename = WL1251_FW_NAME;
+	wl->chip.nvs_filename = WL1251_NVS_NAME;
+
+	/* Now we know what chip we're using, so adjust the power on sleep
+	 * time accordingly */
+	wl->chip.power_on_sleep = WL1251_POWER_ON_SLEEP;
+
+	wl->chip.intr_cmd_complete = WL1251_ACX_INTR_CMD_COMPLETE;
+	wl->chip.intr_init_complete = WL1251_ACX_INTR_INIT_COMPLETE;
+
+	wl->chip.op_upload_nvs = wl1251_upload_nvs;
+	wl->chip.op_upload_fw = wl1251_upload_firmware;
+	wl->chip.op_boot = wl1251_boot;
+	wl->chip.op_set_ecpu_ctrl = wl1251_set_ecpu_ctrl;
+	wl->chip.op_target_enable_interrupts = wl1251_target_enable_interrupts;
+	wl->chip.op_hw_init = wl1251_hw_init;
+	wl->chip.op_plt_init = wl1251_plt_init;
+
+	wl->chip.p_table = wl1251_part_table;
+	wl->chip.acx_reg_table = wl1251_acx_reg_table;
+
+	INIT_WORK(&wl->irq_work, wl1251_irq_work);
+}
diff --git a/drivers/net/wireless/wl12xx/wl1251.h b/drivers/net/wireless/wl12xx/wl1251.h
new file mode 100644
index 00000000000..1f4a4433039
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/wl1251.h
@@ -0,0 +1,165 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL1251_H__
+#define __WL1251_H__
+
+#include <linux/bitops.h>
+
+#include "wl12xx.h"
+#include "acx.h"
+
+#define WL1251_FW_NAME "wl1251-fw.bin"
+#define WL1251_NVS_NAME "wl1251-nvs.bin"
+
+#define WL1251_POWER_ON_SLEEP 10 /* in miliseconds */
+
+void wl1251_setup(struct wl12xx *wl);
+
+
+struct wl1251_acx_memory {
+	__le16 num_stations; /* number of STAs to be supported. */
+	u16 reserved_1;
+
+	/*
+	 * Nmber of memory buffers for the RX mem pool.
+	 * The actual number may be less if there are
+	 * not enough blocks left for the minimum num
+	 * of TX ones.
+	 */
+	u8 rx_mem_block_num;
+	u8 reserved_2;
+	u8 num_tx_queues; /* From 1 to 16 */
+	u8 host_if_options; /* HOST_IF* */
+	u8 tx_min_mem_block_num;
+	u8 num_ssid_profiles;
+	__le16 debug_buffer_size;
+} __attribute__ ((packed));
+
+
+#define ACX_RX_DESC_MIN                1
+#define ACX_RX_DESC_MAX                127
+#define ACX_RX_DESC_DEF                32
+struct wl1251_acx_rx_queue_config {
+	u8 num_descs;
+	u8 pad;
+	u8 type;
+	u8 priority;
+	__le32 dma_address;
+} __attribute__ ((packed));
+
+#define ACX_TX_DESC_MIN                1
+#define ACX_TX_DESC_MAX                127
+#define ACX_TX_DESC_DEF                16
+struct wl1251_acx_tx_queue_config {
+    u8 num_descs;
+    u8 pad[2];
+    u8 attributes;
+} __attribute__ ((packed));
+
+#define MAX_TX_QUEUE_CONFIGS 5
+#define MAX_TX_QUEUES 4
+struct wl1251_acx_config_memory {
+	struct acx_header header;
+
+	struct wl1251_acx_memory mem_config;
+	struct wl1251_acx_rx_queue_config rx_queue_config;
+	struct wl1251_acx_tx_queue_config tx_queue_config[MAX_TX_QUEUE_CONFIGS];
+} __attribute__ ((packed));
+
+struct wl1251_acx_mem_map {
+	struct acx_header header;
+
+	void *code_start;
+	void *code_end;
+
+	void *wep_defkey_start;
+	void *wep_defkey_end;
+
+	void *sta_table_start;
+	void *sta_table_end;
+
+	void *packet_template_start;
+	void *packet_template_end;
+
+	void *queue_memory_start;
+	void *queue_memory_end;
+
+	void *packet_memory_pool_start;
+	void *packet_memory_pool_end;
+
+	void *debug_buffer1_start;
+	void *debug_buffer1_end;
+
+	void *debug_buffer2_start;
+	void *debug_buffer2_end;
+
+	/* Number of blocks FW allocated for TX packets */
+	u32 num_tx_mem_blocks;
+
+	/* Number of blocks FW allocated for RX packets */
+	u32 num_rx_mem_blocks;
+} __attribute__ ((packed));
+
+/*************************************************************************
+
+    Host Interrupt Register (WiLink -> Host)
+
+**************************************************************************/
+
+/* RX packet is ready in Xfer buffer #0 */
+#define WL1251_ACX_INTR_RX0_DATA      BIT(0)
+
+/* TX result(s) are in the TX complete buffer */
+#define WL1251_ACX_INTR_TX_RESULT	BIT(1)
+
+/* OBSOLETE */
+#define WL1251_ACX_INTR_TX_XFR		BIT(2)
+
+/* RX packet is ready in Xfer buffer #1 */
+#define WL1251_ACX_INTR_RX1_DATA	BIT(3)
+
+/* Event was entered to Event MBOX #A */
+#define WL1251_ACX_INTR_EVENT_A		BIT(4)
+
+/* Event was entered to Event MBOX #B */
+#define WL1251_ACX_INTR_EVENT_B		BIT(5)
+
+/* OBSOLETE */
+#define WL1251_ACX_INTR_WAKE_ON_HOST	BIT(6)
+
+/* Trace meassge on MBOX #A */
+#define WL1251_ACX_INTR_TRACE_A		BIT(7)
+
+/* Trace meassge on MBOX #B */
+#define WL1251_ACX_INTR_TRACE_B		BIT(8)
+
+/* Command processing completion */
+#define WL1251_ACX_INTR_CMD_COMPLETE	BIT(9)
+
+/* Init sequence is done */
+#define WL1251_ACX_INTR_INIT_COMPLETE	BIT(14)
+
+#define WL1251_ACX_INTR_ALL           0xFFFFFFFF
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/wl12xx.h b/drivers/net/wireless/wl12xx/wl12xx.h
new file mode 100644
index 00000000000..48641437414
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/wl12xx.h
@@ -0,0 +1,409 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (c) 1998-2007 Texas Instruments Incorporated
+ * Copyright (C) 2008-2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef __WL12XX_H__
+#define __WL12XX_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/bitops.h>
+#include <net/mac80211.h>
+
+#define DRIVER_NAME "wl12xx"
+#define DRIVER_PREFIX DRIVER_NAME ": "
+
+enum {
+	DEBUG_NONE	= 0,
+	DEBUG_IRQ	= BIT(0),
+	DEBUG_SPI	= BIT(1),
+	DEBUG_BOOT	= BIT(2),
+	DEBUG_MAILBOX	= BIT(3),
+	DEBUG_NETLINK	= BIT(4),
+	DEBUG_EVENT	= BIT(5),
+	DEBUG_TX	= BIT(6),
+	DEBUG_RX	= BIT(7),
+	DEBUG_SCAN	= BIT(8),
+	DEBUG_CRYPT	= BIT(9),
+	DEBUG_PSM	= BIT(10),
+	DEBUG_MAC80211	= BIT(11),
+	DEBUG_CMD	= BIT(12),
+	DEBUG_ACX	= BIT(13),
+	DEBUG_ALL	= ~0,
+};
+
+#define DEBUG_LEVEL (DEBUG_NONE)
+
+#define DEBUG_DUMP_LIMIT 1024
+
+#define wl12xx_error(fmt, arg...) \
+	printk(KERN_ERR DRIVER_PREFIX "ERROR " fmt "\n", ##arg)
+
+#define wl12xx_warning(fmt, arg...) \
+	printk(KERN_WARNING DRIVER_PREFIX "WARNING " fmt "\n", ##arg)
+
+#define wl12xx_notice(fmt, arg...) \
+	printk(KERN_INFO DRIVER_PREFIX fmt "\n", ##arg)
+
+#define wl12xx_info(fmt, arg...) \
+	printk(KERN_DEBUG DRIVER_PREFIX fmt "\n", ##arg)
+
+#define wl12xx_debug(level, fmt, arg...) \
+	do { \
+		if (level & DEBUG_LEVEL) \
+			printk(KERN_DEBUG DRIVER_PREFIX fmt "\n", ##arg); \
+	} while (0)
+
+#define wl12xx_dump(level, prefix, buf, len)	\
+	do { \
+		if (level & DEBUG_LEVEL) \
+			print_hex_dump(KERN_DEBUG, DRIVER_PREFIX prefix, \
+				       DUMP_PREFIX_OFFSET, 16, 1,	\
+				       buf,				\
+				       min_t(size_t, len, DEBUG_DUMP_LIMIT), \
+				       0);				\
+	} while (0)
+
+#define wl12xx_dump_ascii(level, prefix, buf, len)	\
+	do { \
+		if (level & DEBUG_LEVEL) \
+			print_hex_dump(KERN_DEBUG, DRIVER_PREFIX prefix, \
+				       DUMP_PREFIX_OFFSET, 16, 1,	\
+				       buf,				\
+				       min_t(size_t, len, DEBUG_DUMP_LIMIT), \
+				       true);				\
+	} while (0)
+
+#define WL12XX_DEFAULT_RX_CONFIG (CFG_UNI_FILTER_EN |	\
+				  CFG_BSSID_FILTER_EN)
+
+#define WL12XX_DEFAULT_RX_FILTER (CFG_RX_PRSP_EN |  \
+				  CFG_RX_MGMT_EN |  \
+				  CFG_RX_DATA_EN |  \
+				  CFG_RX_CTL_EN |   \
+				  CFG_RX_BCN_EN |   \
+				  CFG_RX_AUTH_EN |  \
+				  CFG_RX_ASSOC_EN)
+
+
+struct boot_attr {
+	u32 radio_type;
+	u8 mac_clock;
+	u8 arm_clock;
+	int firmware_debug;
+	u32 minor;
+	u32 major;
+	u32 bugfix;
+};
+
+enum wl12xx_state {
+	WL12XX_STATE_OFF,
+	WL12XX_STATE_ON,
+	WL12XX_STATE_PLT,
+};
+
+enum wl12xx_partition_type {
+	PART_DOWN,
+	PART_WORK,
+	PART_DRPW,
+
+	PART_TABLE_LEN
+};
+
+struct wl12xx_partition {
+	u32 size;
+	u32 start;
+};
+
+struct wl12xx_partition_set {
+	struct wl12xx_partition mem;
+	struct wl12xx_partition reg;
+};
+
+struct wl12xx;
+
+/* FIXME: I'm not sure about this structure name */
+struct wl12xx_chip {
+	u32 id;
+
+	const char *fw_filename;
+	const char *nvs_filename;
+
+	char fw_ver[21];
+
+	unsigned int power_on_sleep;
+	int intr_cmd_complete;
+	int intr_init_complete;
+
+	int (*op_upload_fw)(struct wl12xx *wl);
+	int (*op_upload_nvs)(struct wl12xx *wl);
+	int (*op_boot)(struct wl12xx *wl);
+	void (*op_set_ecpu_ctrl)(struct wl12xx *wl, u32 flag);
+	void (*op_target_enable_interrupts)(struct wl12xx *wl);
+	int (*op_hw_init)(struct wl12xx *wl);
+	int (*op_plt_init)(struct wl12xx *wl);
+
+	struct wl12xx_partition_set *p_table;
+	enum wl12xx_acx_int_reg *acx_reg_table;
+};
+
+struct wl12xx_stats {
+	struct acx_statistics *fw_stats;
+	unsigned long fw_stats_update;
+
+	unsigned int retry_count;
+	unsigned int excessive_retries;
+};
+
+struct wl12xx_debugfs {
+	struct dentry *rootdir;
+	struct dentry *fw_statistics;
+
+	struct dentry *tx_internal_desc_overflow;
+
+	struct dentry *rx_out_of_mem;
+	struct dentry *rx_hdr_overflow;
+	struct dentry *rx_hw_stuck;
+	struct dentry *rx_dropped;
+	struct dentry *rx_fcs_err;
+	struct dentry *rx_xfr_hint_trig;
+	struct dentry *rx_path_reset;
+	struct dentry *rx_reset_counter;
+
+	struct dentry *dma_rx_requested;
+	struct dentry *dma_rx_errors;
+	struct dentry *dma_tx_requested;
+	struct dentry *dma_tx_errors;
+
+	struct dentry *isr_cmd_cmplt;
+	struct dentry *isr_fiqs;
+	struct dentry *isr_rx_headers;
+	struct dentry *isr_rx_mem_overflow;
+	struct dentry *isr_rx_rdys;
+	struct dentry *isr_irqs;
+	struct dentry *isr_tx_procs;
+	struct dentry *isr_decrypt_done;
+	struct dentry *isr_dma0_done;
+	struct dentry *isr_dma1_done;
+	struct dentry *isr_tx_exch_complete;
+	struct dentry *isr_commands;
+	struct dentry *isr_rx_procs;
+	struct dentry *isr_hw_pm_mode_changes;
+	struct dentry *isr_host_acknowledges;
+	struct dentry *isr_pci_pm;
+	struct dentry *isr_wakeups;
+	struct dentry *isr_low_rssi;
+
+	struct dentry *wep_addr_key_count;
+	struct dentry *wep_default_key_count;
+	/* skipping wep.reserved */
+	struct dentry *wep_key_not_found;
+	struct dentry *wep_decrypt_fail;
+	struct dentry *wep_packets;
+	struct dentry *wep_interrupt;
+
+	struct dentry *pwr_ps_enter;
+	struct dentry *pwr_elp_enter;
+	struct dentry *pwr_missing_bcns;
+	struct dentry *pwr_wake_on_host;
+	struct dentry *pwr_wake_on_timer_exp;
+	struct dentry *pwr_tx_with_ps;
+	struct dentry *pwr_tx_without_ps;
+	struct dentry *pwr_rcvd_beacons;
+	struct dentry *pwr_power_save_off;
+	struct dentry *pwr_enable_ps;
+	struct dentry *pwr_disable_ps;
+	struct dentry *pwr_fix_tsf_ps;
+	/* skipping cont_miss_bcns_spread for now */
+	struct dentry *pwr_rcvd_awake_beacons;
+
+	struct dentry *mic_rx_pkts;
+	struct dentry *mic_calc_failure;
+
+	struct dentry *aes_encrypt_fail;
+	struct dentry *aes_decrypt_fail;
+	struct dentry *aes_encrypt_packets;
+	struct dentry *aes_decrypt_packets;
+	struct dentry *aes_encrypt_interrupt;
+	struct dentry *aes_decrypt_interrupt;
+
+	struct dentry *event_heart_beat;
+	struct dentry *event_calibration;
+	struct dentry *event_rx_mismatch;
+	struct dentry *event_rx_mem_empty;
+	struct dentry *event_rx_pool;
+	struct dentry *event_oom_late;
+	struct dentry *event_phy_transmit_error;
+	struct dentry *event_tx_stuck;
+
+	struct dentry *ps_pspoll_timeouts;
+	struct dentry *ps_upsd_timeouts;
+	struct dentry *ps_upsd_max_sptime;
+	struct dentry *ps_upsd_max_apturn;
+	struct dentry *ps_pspoll_max_apturn;
+	struct dentry *ps_pspoll_utilization;
+	struct dentry *ps_upsd_utilization;
+
+	struct dentry *rxpipe_rx_prep_beacon_drop;
+	struct dentry *rxpipe_descr_host_int_trig_rx_data;
+	struct dentry *rxpipe_beacon_buffer_thres_host_int_trig_rx_data;
+	struct dentry *rxpipe_missed_beacon_host_int_trig_rx_data;
+	struct dentry *rxpipe_tx_xfr_host_int_trig_rx_data;
+
+	struct dentry *tx_queue_len;
+
+	struct dentry *retry_count;
+	struct dentry *excessive_retries;
+};
+
+struct wl12xx {
+	struct ieee80211_hw *hw;
+	bool mac80211_registered;
+
+	struct spi_device *spi;
+
+	void (*set_power)(bool enable);
+	int irq;
+
+	enum wl12xx_state state;
+	struct mutex mutex;
+
+	int physical_mem_addr;
+	int physical_reg_addr;
+	int virtual_mem_addr;
+	int virtual_reg_addr;
+
+	struct wl12xx_chip chip;
+
+	int cmd_box_addr;
+	int event_box_addr;
+	struct boot_attr boot_attr;
+
+	u8 *fw;
+	size_t fw_len;
+	u8 *nvs;
+	size_t nvs_len;
+
+	u8 bssid[ETH_ALEN];
+	u8 mac_addr[ETH_ALEN];
+	u8 bss_type;
+	u8 listen_int;
+	int channel;
+
+	void *target_mem_map;
+	struct acx_data_path_params_resp *data_path;
+
+	/* Number of TX packets transferred to the FW, modulo 16 */
+	u32 data_in_count;
+
+	/* Frames scheduled for transmission, not handled yet */
+	struct sk_buff_head tx_queue;
+	bool tx_queue_stopped;
+
+	struct work_struct tx_work;
+	struct work_struct filter_work;
+
+	/* Pending TX frames */
+	struct sk_buff *tx_frames[16];
+
+	/*
+	 * Index pointing to the next TX complete entry
+	 * in the cyclic XT complete array we get from
+	 * the FW.
+	 */
+	u32 next_tx_complete;
+
+	/* FW Rx counter */
+	u32 rx_counter;
+
+	/* Rx frames handled */
+	u32 rx_handled;
+
+	/* Current double buffer */
+	u32 rx_current_buffer;
+	u32 rx_last_id;
+
+	/* The target interrupt mask */
+	u32 intr_mask;
+	struct work_struct irq_work;
+
+	/* The mbox event mask */
+	u32 event_mask;
+
+	/* Mailbox pointers */
+	u32 mbox_ptr[2];
+
+	/* Are we currently scanning */
+	bool scanning;
+
+	/* Our association ID */
+	u16 aid;
+
+	/* Default key (for WEP) */
+	u32 default_key;
+
+	unsigned int tx_mgmt_frm_rate;
+	unsigned int tx_mgmt_frm_mod;
+
+	unsigned int rx_config;
+	unsigned int rx_filter;
+
+	/* is firmware in elp mode */
+	bool elp;
+
+	/* we can be in psm, but not in elp, we have to differentiate */
+	bool psm;
+
+	/* PSM mode requested */
+	bool psm_requested;
+
+	/* in dBm */
+	int power_level;
+
+	struct wl12xx_stats stats;
+	struct wl12xx_debugfs debugfs;
+};
+
+int wl12xx_plt_start(struct wl12xx *wl);
+int wl12xx_plt_stop(struct wl12xx *wl);
+
+#define DEFAULT_HW_GEN_MODULATION_TYPE    CCK_LONG /* Long Preamble */
+#define DEFAULT_HW_GEN_TX_RATE          RATE_2MBPS
+#define JOIN_TIMEOUT 5000 /* 5000 milliseconds to join */
+
+#define WL12XX_DEFAULT_POWER_LEVEL 20
+
+#define WL12XX_TX_QUEUE_MAX_LENGTH 20
+
+/* Different chips need different sleep times after power on.  WL1271 needs
+ * 200ms, WL1251 needs only 10ms.  By default we use 200ms, but as soon as we
+ * know the chip ID, we change the sleep value in the wl12xx chip structure,
+ * so in subsequent power ons, we don't waste more time then needed.  */
+#define WL12XX_DEFAULT_POWER_ON_SLEEP 200
+
+#define CHIP_ID_1251_PG10	           (0x7010101)
+#define CHIP_ID_1251_PG11	           (0x7020101)
+#define CHIP_ID_1251_PG12	           (0x7030101)
+#define CHIP_ID_1271_PG10	           (0x4030101)
+
+#endif
diff --git a/drivers/net/wireless/wl12xx/wl12xx_80211.h b/drivers/net/wireless/wl12xx/wl12xx_80211.h
new file mode 100644
index 00000000000..657c2dbcb7d
--- /dev/null
+++ b/drivers/net/wireless/wl12xx/wl12xx_80211.h
@@ -0,0 +1,156 @@
+#ifndef __WL12XX_80211_H__
+#define __WL12XX_80211_H__
+
+#include <linux/if_ether.h>	/* ETH_ALEN */
+
+/* RATES */
+#define IEEE80211_CCK_RATE_1MB		        0x02
+#define IEEE80211_CCK_RATE_2MB		        0x04
+#define IEEE80211_CCK_RATE_5MB		        0x0B
+#define IEEE80211_CCK_RATE_11MB		        0x16
+#define IEEE80211_OFDM_RATE_6MB		        0x0C
+#define IEEE80211_OFDM_RATE_9MB		        0x12
+#define IEEE80211_OFDM_RATE_12MB		0x18
+#define IEEE80211_OFDM_RATE_18MB		0x24
+#define IEEE80211_OFDM_RATE_24MB		0x30
+#define IEEE80211_OFDM_RATE_36MB		0x48
+#define IEEE80211_OFDM_RATE_48MB		0x60
+#define IEEE80211_OFDM_RATE_54MB		0x6C
+#define IEEE80211_BASIC_RATE_MASK		0x80
+
+#define IEEE80211_CCK_RATE_1MB_MASK		(1<<0)
+#define IEEE80211_CCK_RATE_2MB_MASK		(1<<1)
+#define IEEE80211_CCK_RATE_5MB_MASK		(1<<2)
+#define IEEE80211_CCK_RATE_11MB_MASK		(1<<3)
+#define IEEE80211_OFDM_RATE_6MB_MASK		(1<<4)
+#define IEEE80211_OFDM_RATE_9MB_MASK		(1<<5)
+#define IEEE80211_OFDM_RATE_12MB_MASK		(1<<6)
+#define IEEE80211_OFDM_RATE_18MB_MASK		(1<<7)
+#define IEEE80211_OFDM_RATE_24MB_MASK		(1<<8)
+#define IEEE80211_OFDM_RATE_36MB_MASK		(1<<9)
+#define IEEE80211_OFDM_RATE_48MB_MASK		(1<<10)
+#define IEEE80211_OFDM_RATE_54MB_MASK		(1<<11)
+
+#define IEEE80211_CCK_RATES_MASK	  0x0000000F
+#define IEEE80211_CCK_BASIC_RATES_MASK	 (IEEE80211_CCK_RATE_1MB_MASK | \
+	IEEE80211_CCK_RATE_2MB_MASK)
+#define IEEE80211_CCK_DEFAULT_RATES_MASK (IEEE80211_CCK_BASIC_RATES_MASK | \
+	IEEE80211_CCK_RATE_5MB_MASK | \
+	IEEE80211_CCK_RATE_11MB_MASK)
+
+#define IEEE80211_OFDM_RATES_MASK	  0x00000FF0
+#define IEEE80211_OFDM_BASIC_RATES_MASK	  (IEEE80211_OFDM_RATE_6MB_MASK | \
+	IEEE80211_OFDM_RATE_12MB_MASK | \
+	IEEE80211_OFDM_RATE_24MB_MASK)
+#define IEEE80211_OFDM_DEFAULT_RATES_MASK (IEEE80211_OFDM_BASIC_RATES_MASK | \
+	IEEE80211_OFDM_RATE_9MB_MASK  | \
+	IEEE80211_OFDM_RATE_18MB_MASK | \
+	IEEE80211_OFDM_RATE_36MB_MASK | \
+	IEEE80211_OFDM_RATE_48MB_MASK | \
+	IEEE80211_OFDM_RATE_54MB_MASK)
+#define IEEE80211_DEFAULT_RATES_MASK (IEEE80211_OFDM_DEFAULT_RATES_MASK | \
+				      IEEE80211_CCK_DEFAULT_RATES_MASK)
+
+
+/* This really should be 8, but not for our firmware */
+#define MAX_SUPPORTED_RATES 32
+#define COUNTRY_STRING_LEN 3
+#define MAX_COUNTRY_TRIPLETS 32
+
+/* Headers */
+struct ieee80211_header {
+	__le16 frame_ctl;
+	__le16 duration_id;
+	u8 da[ETH_ALEN];
+	u8 sa[ETH_ALEN];
+	u8 bssid[ETH_ALEN];
+	__le16 seq_ctl;
+	u8 payload[0];
+} __attribute__ ((packed));
+
+struct wl12xx_ie_header {
+	u8 id;
+	u8 len;
+} __attribute__ ((packed));
+
+/* IEs */
+
+struct wl12xx_ie_ssid {
+	struct wl12xx_ie_header header;
+	char ssid[IW_ESSID_MAX_SIZE];
+} __attribute__ ((packed));
+
+struct wl12xx_ie_rates {
+	struct wl12xx_ie_header header;
+	u8 rates[MAX_SUPPORTED_RATES];
+} __attribute__ ((packed));
+
+struct wl12xx_ie_ds_params {
+	struct wl12xx_ie_header header;
+	u8 channel;
+} __attribute__ ((packed));
+
+struct country_triplet {
+	u8 channel;
+	u8 num_channels;
+	u8 max_tx_power;
+} __attribute__ ((packed));
+
+struct wl12xx_ie_country {
+	struct wl12xx_ie_header header;
+	u8 country_string[COUNTRY_STRING_LEN];
+	struct country_triplet triplets[MAX_COUNTRY_TRIPLETS];
+} __attribute__ ((packed));
+
+
+/* Templates */
+
+struct wl12xx_beacon_template {
+	struct ieee80211_header header;
+	__le32 time_stamp[2];
+	__le16 beacon_interval;
+	__le16 capability;
+	struct wl12xx_ie_ssid ssid;
+	struct wl12xx_ie_rates rates;
+	struct wl12xx_ie_rates ext_rates;
+	struct wl12xx_ie_ds_params ds_params;
+	struct wl12xx_ie_country country;
+} __attribute__ ((packed));
+
+struct wl12xx_null_data_template {
+	struct ieee80211_header header;
+} __attribute__ ((packed));
+
+struct wl12xx_ps_poll_template {
+	u16 fc;
+	u16 aid;
+	u8 bssid[ETH_ALEN];
+	u8 ta[ETH_ALEN];
+} __attribute__ ((packed));
+
+struct wl12xx_qos_null_data_template {
+	struct ieee80211_header header;
+	__le16 qos_ctl;
+} __attribute__ ((packed));
+
+struct wl12xx_probe_req_template {
+	struct ieee80211_header header;
+	struct wl12xx_ie_ssid ssid;
+	struct wl12xx_ie_rates rates;
+	struct wl12xx_ie_rates ext_rates;
+} __attribute__ ((packed));
+
+
+struct wl12xx_probe_resp_template {
+	struct ieee80211_header header;
+	__le32 time_stamp[2];
+	__le16 beacon_interval;
+	__le16 capability;
+	struct wl12xx_ie_ssid ssid;
+	struct wl12xx_ie_rates rates;
+	struct wl12xx_ie_rates ext_rates;
+	struct wl12xx_ie_ds_params ds_params;
+	struct wl12xx_ie_country country;
+} __attribute__ ((packed));
+
+#endif
diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h
new file mode 100644
index 00000000000..11430cab2aa
--- /dev/null
+++ b/include/linux/spi/wl12xx.h
@@ -0,0 +1,31 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2009 Nokia Corporation
+ *
+ * Contact: Kalle Valo <kalle.valo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#ifndef _LINUX_SPI_WL12XX_H
+#define _LINUX_SPI_WL12XX_H
+
+struct wl12xx_platform_data {
+	void (*set_power)(bool enable);
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 9dfd6ba353b993d648dcda72480c7ce92cd27c7e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 6 May 2009 20:34:10 +0300
Subject: mac80211: Update SA Query transaction id length

IEEE 802.11w/D8.0 changed the length of the SA Query transaction
identifier from 16 to 2 octets.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c52e7fba4e4..dc92359f37e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -543,7 +543,7 @@ struct ieee80211_tim_ie {
 	u8 virtual_map[1];
 } __attribute__ ((packed));
 
-#define WLAN_SA_QUERY_TR_ID_LEN 16
+#define WLAN_SA_QUERY_TR_ID_LEN 2
 
 struct ieee80211_mgmt {
 	__le16 frame_control;
-- 
cgit v1.2.3-70-g09d2


From 4d5b78c055c76bb563c4a43d2adb92735b3b9405 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 6 May 2009 16:52:51 -0700
Subject: net: Add missing rculist.h include to netdevice.h

Otherwise list_for_each_entry_rcu() et al. aren't visible
and we get build failures in some configurations.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 02882e2ebd4..2af89b662ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -39,6 +39,7 @@
 
 #include <linux/device.h>
 #include <linux/percpu.h>
+#include <linux/rculist.h>
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
 
-- 
cgit v1.2.3-70-g09d2


From 08ce4c91e44d51bb6c946f2756825a462d53c545 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 7 Apr 2009 23:40:39 +0200
Subject: dlm: Make name input parameter of {,dlm_}new_lockspace() const

| fs/gfs2/lock_dlm.c:207: warning: passing argument 1 of 'dlm_new_lockspace' discards qualifiers from pointer target type

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lockspace.c  | 4 ++--
 include/linux/dlm.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index cd8e2df3c29..82528d9b72e 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -384,7 +384,7 @@ static void threads_stop(void)
 	dlm_astd_stop();
 }
 
-static int new_lockspace(char *name, int namelen, void **lockspace,
+static int new_lockspace(const char *name, int namelen, void **lockspace,
 			 uint32_t flags, int lvblen)
 {
 	struct dlm_ls *ls;
@@ -614,7 +614,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	return error;
 }
 
-int dlm_new_lockspace(char *name, int namelen, void **lockspace,
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 		      uint32_t flags, int lvblen)
 {
 	int error = 0;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index b9cd38603fd..0b3518c4235 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -81,8 +81,8 @@ struct dlm_lksb {
  * the cluster, the calling node joins it.
  */
 
-int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace,
-		      uint32_t flags, int lvblen);
+int dlm_new_lockspace(const char *name, int namelen,
+		      dlm_lockspace_t **lockspace, uint32_t flags, int lvblen);
 
 /*
  * dlm_release_lockspace
-- 
cgit v1.2.3-70-g09d2


From f5f8d86b231e0489c33542c42afbb14d32411ee8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 7 May 2009 07:08:38 +0000
Subject: tcp: tcp_prequeue() cleanup

Small cleanup patch to reduce line lengths, before a change in
tcp_prequeue().

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b55b4891029..ac37228b700 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -890,30 +890,31 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!sysctl_tcp_low_latency && tp->ucopy.task) {
-		__skb_queue_tail(&tp->ucopy.prequeue, skb);
-		tp->ucopy.memory += skb->truesize;
-		if (tp->ucopy.memory > sk->sk_rcvbuf) {
-			struct sk_buff *skb1;
-
-			BUG_ON(sock_owned_by_user(sk));
-
-			while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
-				sk_backlog_rcv(sk, skb1);
-				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED);
-			}
-
-			tp->ucopy.memory = 0;
-		} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-			wake_up_interruptible(sk->sk_sleep);
-			if (!inet_csk_ack_scheduled(sk))
-				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						          (3 * TCP_RTO_MIN) / 4,
-							  TCP_RTO_MAX);
+	if (sysctl_tcp_low_latency || !tp->ucopy.task)
+		return 0;
+
+	__skb_queue_tail(&tp->ucopy.prequeue, skb);
+	tp->ucopy.memory += skb->truesize;
+	if (tp->ucopy.memory > sk->sk_rcvbuf) {
+		struct sk_buff *skb1;
+
+		BUG_ON(sock_owned_by_user(sk));
+
+		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+			sk_backlog_rcv(sk, skb1);
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPPREQUEUEDROPPED);
 		}
-		return 1;
+
+		tp->ucopy.memory = 0;
+	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
+		wake_up_interruptible(sk->sk_sleep);
+		if (!inet_csk_ack_scheduled(sk))
+			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+						  (3 * TCP_RTO_MIN) / 4,
+						  TCP_RTO_MAX);
 	}
-	return 0;
+	return 1;
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 7aedec2ad5314b173e78ca3f4edb4ceaa02248bb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 7 May 2009 07:20:39 +0000
Subject: tcp: tcp_prequeue() can use keyed wakeups

We can avoid waking up tasks not interested in receive notifications,
using wake_up_interruptible_poll() instead of wake_up_interruptible()

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index ac37228b700..87d210bb12a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -908,7 +908,8 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 
 		tp->ucopy.memory = 0;
 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible_poll(sk->sk_sleep,
+					   POLLIN | POLLRDNORM | POLLRDBAND);
 		if (!inet_csk_ack_scheduled(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  (3 * TCP_RTO_MIN) / 4,
-- 
cgit v1.2.3-70-g09d2


From 9080b72819650c3a757d173a19bc930d603b79d6 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:13:58 +0000
Subject: sh-sci: remove early_sci_setup()

Remove unused early_sci_setup() function from sh-sci.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/sh-sci.c    | 14 --------------
 include/linux/serial_sci.h |  2 --
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index af5da110d52..5f37f7d3166 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -1082,20 +1082,6 @@ static void __init sci_init_ports(void)
 	}
 }
 
-int __init early_sci_setup(struct uart_port *port)
-{
-	if (unlikely(port->line > SCI_NPORTS))
-		return -ENODEV;
-
-	sci_init_ports();
-
-	sci_ports[port->line].port.membase	= port->membase;
-	sci_ports[port->line].port.mapbase	= port->mapbase;
-	sci_ports[port->line].port.type		= port->type;
-
-	return 0;
-}
-
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
 /*
  *	Print a string to the serial port trying not to disturb
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 893cc53486b..717059d6791 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -27,6 +27,4 @@ struct plat_sci_port {
 	upf_t		flags;			/* UPF_* flags */
 };
 
-int early_sci_setup(struct uart_port *port);
-
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3-70-g09d2


From 501b825d01efb93766c87d29f299851152cf4eb0 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 21 Jan 2009 15:14:30 +0000
Subject: sh-sci: improve clock framework support

Use enable/disable hooks for clock framework integration.
Make sure we control the clock for the serial console as well.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/sh-sci.c    | 77 +++++++++++++++++++++++++++-------------------
 include/linux/serial_sci.h |  1 +
 2 files changed, 47 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 408624ae7fe..3daf76725ac 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -76,8 +76,10 @@ struct sci_port {
 	int			break_flag;
 
 #ifdef CONFIG_HAVE_CLK
-	/* Port clock */
-	struct clk		*clk;
+	/* Interface clock */
+	struct clk		*iclk;
+	/* Data clock */
+	struct clk		*dclk;
 #endif
 	struct list_head	node;
 };
@@ -166,12 +168,12 @@ static void h8300_sci_config(struct uart_port *port, unsigned int ctrl)
 		*mstpcrl &= ~mask;
 }
 
-static inline void h8300_sci_enable(struct uart_port *port)
+static void h8300_sci_enable(struct uart_port *port)
 {
 	h8300_sci_config(port, sci_enable);
 }
 
-static inline void h8300_sci_disable(struct uart_port *port)
+static void h8300_sci_disable(struct uart_port *port)
 {
 	h8300_sci_config(port, sci_disable);
 }
@@ -742,13 +744,34 @@ static int sci_notifier(struct notifier_block *self,
 	    (phase == CPUFREQ_RESUMECHANGE)) {
 		spin_lock_irqsave(&priv->lock, flags);
 		list_for_each_entry(sci_port, &priv->ports, node)
-			sci_port->port.uartclk = clk_get_rate(sci_port->clk);
+			sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
 
 		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 	return NOTIFY_OK;
 }
+
+static void sci_clk_enable(struct uart_port *port)
+{
+	struct sci_port *sci_port = to_sci_port(port);
+
+	clk_enable(sci_port->dclk);
+	sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
+
+	if (sci_port->iclk)
+		clk_enable(sci_port->iclk);
+}
+
+static void sci_clk_disable(struct uart_port *port)
+{
+	struct sci_port *sci_port = to_sci_port(port);
+
+	if (sci_port->iclk)
+		clk_disable(sci_port->iclk);
+
+	clk_disable(sci_port->dclk);
+}
 #endif
 
 static int sci_request_irq(struct sci_port *port)
@@ -880,10 +903,6 @@ static int sci_startup(struct uart_port *port)
 	if (s->enable)
 		s->enable(port);
 
-#ifdef CONFIG_HAVE_CLK
-	s->clk = clk_get(NULL, "module_clk");
-#endif
-
 	sci_request_irq(s);
 	sci_start_tx(port);
 	sci_start_rx(port, 1);
@@ -901,11 +920,6 @@ static void sci_shutdown(struct uart_port *port)
 
 	if (s->disable)
 		s->disable(port);
-
-#ifdef CONFIG_HAVE_CLK
-	clk_put(s->clk);
-	s->clk = NULL;
-#endif
 }
 
 static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
@@ -1048,7 +1062,8 @@ static struct uart_ops sci_uart_ops = {
 #endif
 };
 
-static void __devinit sci_init_single(struct sci_port *sci_port,
+static void __devinit sci_init_single(struct platform_device *dev,
+				      struct sci_port *sci_port,
 				      unsigned int index,
 				      struct plat_sci_port *p)
 {
@@ -1064,14 +1079,10 @@ static void __devinit sci_init_single(struct sci_port *sci_port,
 #endif
 	sci_port->port.uartclk	= CONFIG_CPU_CLOCK;
 #elif defined(CONFIG_HAVE_CLK)
-	/*
-	 * XXX: We should use a proper SCI/SCIF clock
-	 */
-	{
-		struct clk *clk = clk_get(NULL, "module_clk");
-		sci_port->port.uartclk = clk_get_rate(clk);
-		clk_put(clk);
-	}
+	sci_port->iclk		= p->clk ? clk_get(&dev->dev, p->clk) : NULL;
+	sci_port->dclk		= clk_get(&dev->dev, "module_clk");
+	sci_port->enable	= sci_clk_enable;
+	sci_port->disable	= sci_clk_disable;
 #else
 #error "Need a valid uartclk"
 #endif
@@ -1085,9 +1096,11 @@ static void __devinit sci_init_single(struct sci_port *sci_port,
 
 	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
 	sci_port->port.flags	= p->flags;
+	sci_port->port.dev	= &dev->dev;
 	sci_port->type		= sci_port->port.type = p->type;
 
 	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
+
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
@@ -1111,14 +1124,21 @@ static void serial_console_write(struct console *co, const char *s,
 				 unsigned count)
 {
 	struct uart_port *port = co->data;
+	struct sci_port *sci_port = to_sci_port(port);
 	unsigned short bits;
 
-	uart_console_write(co->data, s, count, serial_console_putchar);
+	if (sci_port->enable)
+		sci_port->enable(port);
+
+	uart_console_write(port, s, count, serial_console_putchar);
 
 	/* wait until fifo is empty and last bit has been transmitted */
 	bits = SCxSR_TDxE(port) | SCxSR_TEND(port);
 	while ((sci_in(port, SCxSR) & bits) != bits)
 		cpu_relax();
+
+	if (sci_port->disable);
+		sci_port->disable(port);
 }
 
 static int __init serial_console_setup(struct console *co, char *options)
@@ -1152,11 +1172,6 @@ static int __init serial_console_setup(struct console *co, char *options)
 	if (!port->type)
 		return -ENODEV;
 
-#ifdef CONFIG_HAVE_CLK
-	if (!sci_port->clk)
-		sci_port->clk = clk_get(NULL, "module_clk");
-#endif
-
 	sci_config_port(port, 0);
 
 	if (sci_port->enable)
@@ -1171,6 +1186,7 @@ static int __init serial_console_setup(struct console *co, char *options)
 	if (ret == 0)
 		sci_stop_rx(port);
 #endif
+	/* TODO: disable clock */
 	return ret;
 }
 
@@ -1250,8 +1266,7 @@ static int __devinit sci_probe_single(struct platform_device *dev,
 		return 0;
 	}
 
-	sciport->port.dev = &dev->dev;
-	sci_init_single(sciport, index, p);
+	sci_init_single(dev, sciport, index, p);
 
 	ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
 	if (ret)
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 717059d6791..1c297ddc9d5 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -25,6 +25,7 @@ struct plat_sci_port {
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
+	char		*clk;			/* clock string */
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3-70-g09d2


From 3df5edad87a998273aa5a9a8c728c05d855ad00e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:22 +0200
Subject: perf_counter: rework ioctl()s

Corey noticed that ioctl()s on grouped counters didn't work on
the whole group. This extends the ioctl() interface to take a
second argument that is interpreted as a flags field. We then
provide PERF_IOC_FLAG_GROUP to toggle the behaviour.

Having this flag gives the greatest flexibility, allowing you
to individually enable/disable/reset counters in a group, or
all together.

[ Impact: fix group counter enable/disable semantics ]

Reported-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090508170028.837558214@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  10 +++--
 kernel/perf_counter.c        | 104 ++++++++++++++++++++++++-------------------
 2 files changed, 65 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 00081d84169..88f863ec274 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -157,10 +157,14 @@ struct perf_counter_hw_event {
 /*
  * Ioctls that can be done on a perf counter fd:
  */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_ENABLE		_IOW('$', 0, u32)
+#define PERF_COUNTER_IOC_DISABLE	_IOW('$', 1, u32)
 #define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_RESET		_IOW('$', 3, u32)
+
+enum perf_counter_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
 
 /*
  * Structure of the page that can be mapped via mmap
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fdb0d242127..f4883f1f47e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -82,7 +82,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * add it straight to the context's counter list, or to the group
 	 * leader's sibling list:
 	 */
-	if (counter->group_leader == counter)
+	if (group_leader == counter)
 		list_add_tail(&counter->list_entry, &ctx->counter_list);
 	else {
 		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
@@ -385,24 +385,6 @@ static void perf_counter_disable(struct perf_counter *counter)
 	spin_unlock_irq(&ctx->lock);
 }
 
-/*
- * Disable a counter and all its children.
- */
-static void perf_counter_disable_family(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-
-	perf_counter_disable(counter);
-
-	/*
-	 * Lock the mutex to protect the list of children
-	 */
-	mutex_lock(&counter->mutex);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		perf_counter_disable(child);
-	mutex_unlock(&counter->mutex);
-}
-
 static int
 counter_sched_in(struct perf_counter *counter,
 		 struct perf_cpu_context *cpuctx,
@@ -753,24 +735,6 @@ static int perf_counter_refresh(struct perf_counter *counter, int refresh)
 	return 0;
 }
 
-/*
- * Enable a counter and all its children.
- */
-static void perf_counter_enable_family(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-
-	perf_counter_enable(counter);
-
-	/*
-	 * Lock the mutex to protect the list of children
-	 */
-	mutex_lock(&counter->mutex);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		perf_counter_enable(child);
-	mutex_unlock(&counter->mutex);
-}
-
 void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
@@ -1307,31 +1271,79 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 
 static void perf_counter_reset(struct perf_counter *counter)
 {
+	(void)perf_counter_read(counter);
 	atomic_set(&counter->count, 0);
+	perf_counter_update_userpage(counter);
+}
+
+static void perf_counter_for_each_sibling(struct perf_counter *counter,
+					  void (*func)(struct perf_counter *))
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	struct perf_counter *sibling;
+
+	spin_lock_irq(&ctx->lock);
+	counter = counter->group_leader;
+
+	func(counter);
+	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+		func(sibling);
+	spin_unlock_irq(&ctx->lock);
+}
+
+static void perf_counter_for_each_child(struct perf_counter *counter,
+					void (*func)(struct perf_counter *))
+{
+	struct perf_counter *child;
+
+	mutex_lock(&counter->mutex);
+	func(counter);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		func(child);
+	mutex_unlock(&counter->mutex);
+}
+
+static void perf_counter_for_each(struct perf_counter *counter,
+				  void (*func)(struct perf_counter *))
+{
+	struct perf_counter *child;
+
+	mutex_lock(&counter->mutex);
+	perf_counter_for_each_sibling(counter, func);
+	list_for_each_entry(child, &counter->child_list, child_list)
+		perf_counter_for_each_sibling(child, func);
+	mutex_unlock(&counter->mutex);
 }
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
-	int err = 0;
+	void (*func)(struct perf_counter *);
+	u32 flags = arg;
 
 	switch (cmd) {
 	case PERF_COUNTER_IOC_ENABLE:
-		perf_counter_enable_family(counter);
+		func = perf_counter_enable;
 		break;
 	case PERF_COUNTER_IOC_DISABLE:
-		perf_counter_disable_family(counter);
-		break;
-	case PERF_COUNTER_IOC_REFRESH:
-		err = perf_counter_refresh(counter, arg);
+		func = perf_counter_disable;
 		break;
 	case PERF_COUNTER_IOC_RESET:
-		perf_counter_reset(counter);
+		func = perf_counter_reset;
 		break;
+
+	case PERF_COUNTER_IOC_REFRESH:
+		return perf_counter_refresh(counter, arg);
 	default:
-		err = -ENOTTY;
+		return -ENOTTY;
 	}
-	return err;
+
+	if (flags & PERF_IOC_FLAG_GROUP)
+		perf_counter_for_each(counter, func);
+	else
+		perf_counter_for_each_child(counter, func);
+
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a85f61abe11a46553c4562e74edb27ebc782aeb7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:23 +0200
Subject: perf_counter: add PERF_RECORD_CONFIG

Much like CONFIG_RECORD_GROUP records the hw_event.config to
identify the values, allow to record this for all counters.

[ Impact: extend perfcounter output record format ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090508170028.923228280@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 2 ++
 kernel/perf_counter.c        | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 88f863ec274..0e6303d36c6 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -104,6 +104,7 @@ enum perf_counter_record_format {
 	PERF_RECORD_ADDR	= 1U << 3,
 	PERF_RECORD_GROUP	= 1U << 4,
 	PERF_RECORD_CALLCHAIN	= 1U << 5,
+	PERF_RECORD_CONFIG	= 1U << 6,
 };
 
 /*
@@ -258,6 +259,7 @@ enum perf_event_type {
 	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
 	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
+	 * 	{ u64			config;   } && PERF_RECORD_CONFIG
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f4883f1f47e..c615f52aa40 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1994,6 +1994,11 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
+	if (record_type & PERF_RECORD_CONFIG) {
+		header.type |= PERF_RECORD_CONFIG;
+		header.size += sizeof(u64);
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -2029,6 +2034,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_ADDR)
 		perf_output_put(&handle, addr);
 
+	if (record_type & PERF_RECORD_CONFIG)
+		perf_output_put(&handle, counter->hw_event.config);
+
 	/*
 	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
 	 */
-- 
cgit v1.2.3-70-g09d2


From f370e1e2f195ec1e6420e26fc83e0319595db578 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 May 2009 18:52:24 +0200
Subject: perf_counter: add PERF_RECORD_CPU

Allow recording the CPU number the event was generated on.

RFC: this leaves a u32 as reserved, should we fill in the
     node_id() there, or leave this open for future extention,
     as userspace can already easily do the cpu->node mapping
     if needed.

[ Impact: extend perfcounter output record format ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090508170029.008627711@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0e6303d36c6..614f921d616 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -105,6 +105,7 @@ enum perf_counter_record_format {
 	PERF_RECORD_GROUP	= 1U << 4,
 	PERF_RECORD_CALLCHAIN	= 1U << 5,
 	PERF_RECORD_CONFIG	= 1U << 6,
+	PERF_RECORD_CPU		= 1U << 7,
 };
 
 /*
@@ -260,6 +261,7 @@ enum perf_event_type {
 	 * 	{ u64			time;     } && PERF_RECORD_TIME
 	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
 	 * 	{ u64			config;   } && PERF_RECORD_CONFIG
+	 * 	{ u32			cpu, res; } && PERF_RECORD_CPU
 	 *
 	 * 	{ u64			nr;
 	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c615f52aa40..d850a1fb8d4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1956,6 +1956,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
 	u64 time;
+	struct {
+		u32 cpu, reserved;
+	} cpu_entry;
 
 	header.type = 0;
 	header.size = sizeof(header);
@@ -1999,6 +2002,13 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
+	if (record_type & PERF_RECORD_CPU) {
+		header.type |= PERF_RECORD_CPU;
+		header.size += sizeof(cpu_entry);
+
+		cpu_entry.cpu = raw_smp_processor_id();
+	}
+
 	if (record_type & PERF_RECORD_GROUP) {
 		header.type |= PERF_RECORD_GROUP;
 		header.size += sizeof(u64) +
@@ -2037,6 +2047,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (record_type & PERF_RECORD_CONFIG)
 		perf_output_put(&handle, counter->hw_event.config);
 
+	if (record_type & PERF_RECORD_CPU)
+		perf_output_put(&handle, cpu_entry);
+
 	/*
 	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
 	 */
-- 
cgit v1.2.3-70-g09d2


From 4671c79408a3f8a5a6a45e39c4c164dada3a5678 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 8 May 2009 16:27:41 -0400
Subject: tracing: add trace_set_clr_event to export event enabling function

Other parts of the kernel may need to be able to enable or disable
specific events. Especially parts that create trace events.

[ Impact: allow enabling of trace events by those that create the event ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  2 ++
 kernel/trace/trace_events.c  | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 662c1becf36..bae51ddfabd 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -127,6 +127,8 @@ extern int trace_define_field(struct ftrace_event_call *call, char *type,
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
 
+int trace_set_clr_event(const char *system, const char *event, int set);
+
 /*
  * The double __builtin_constant_p is because gcc will give us an error
  * if we try to allocate the static variable to fmt if it is not a
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2eecb87e42d..0eec0c55dd8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -177,6 +177,23 @@ static int ftrace_set_clr_event(char *buf, int set)
 	return __ftrace_set_clr_event(match, sub, event, set);
 }
 
+/**
+ * trace_set_clr_event - enable or disable an event
+ * @system: system name to match (NULL for any system)
+ * @event: event name to match (NULL for all events, within system)
+ * @set: 1 to enable, 0 to disable
+ *
+ * This is a way for other parts of the kernel to enable or disable
+ * event recording.
+ *
+ * Returns 0 on success, -EINVAL if the parameters do not match any
+ * registered events.
+ */
+int trace_set_clr_event(const char *system, const char *event, int set)
+{
+	return __ftrace_set_clr_event(NULL, system, event, set);
+}
+
 /* 128 should be much more than enough */
 #define EVENT_BUF_SIZE		127
 
-- 
cgit v1.2.3-70-g09d2


From 7e044e056a6aa0dc695db50461d7b326fde15e8b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 9 May 2009 16:08:05 -0700
Subject: Input: serio - do not use deprecated dev.power.power_state

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/gameport/gameport.c |  6 +++---
 drivers/input/serio/i8042.c       | 17 +++++++++--------
 drivers/input/serio/serio.c       | 37 +++++++++++++++++++------------------
 include/linux/gameport.h          |  3 ++-
 include/linux/serio.h             |  9 ++++++---
 5 files changed, 39 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index 0279d6983cc..ac11be08585 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -723,7 +723,7 @@ int __gameport_register_driver(struct gameport_driver *drv, struct module *owner
 	 * Temporarily disable automatic binding because probing
 	 * takes long time and we are better off doing it in kgameportd
 	 */
-	drv->ignore = 1;
+	drv->ignore = true;
 
 	error = driver_register(&drv->driver);
 	if (error) {
@@ -736,7 +736,7 @@ int __gameport_register_driver(struct gameport_driver *drv, struct module *owner
 	/*
 	 * Reset ignore flag and let kgameportd bind the driver to free ports
 	 */
-	drv->ignore = 0;
+	drv->ignore = false;
 	error = gameport_queue_event(drv, NULL, GAMEPORT_ATTACH_DRIVER);
 	if (error) {
 		driver_unregister(&drv->driver);
@@ -753,7 +753,7 @@ void gameport_unregister_driver(struct gameport_driver *drv)
 
 	mutex_lock(&gameport_mutex);
 
-	drv->ignore = 1;	/* so gameport_find_driver ignores it */
+	drv->ignore = true;	/* so gameport_find_driver ignores it */
 	gameport_remove_pending_events(drv);
 
 start_over:
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 3cffb704e37..f919bf57293 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -10,6 +10,7 @@
  * the Free Software Foundation.
  */
 
+#include <linux/types.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -921,6 +922,9 @@ static void i8042_dritek_enable(void)
 #endif
 
 #ifdef CONFIG_PM
+
+static bool i8042_suspended;
+
 /*
  * Here we try to restore the original BIOS settings. We only want to
  * do that once, when we really suspend, not when we taking memory
@@ -930,11 +934,9 @@ static void i8042_dritek_enable(void)
 
 static int i8042_suspend(struct platform_device *dev, pm_message_t state)
 {
-	if (dev->dev.power.power_state.event != state.event) {
-		if (state.event == PM_EVENT_SUSPEND)
-			i8042_controller_reset();
-
-		dev->dev.power.power_state = state;
+	if (!i8042_suspended && state.event == PM_EVENT_SUSPEND) {
+		i8042_controller_reset();
+		i8042_suspended = true;
 	}
 
 	return 0;
@@ -952,7 +954,7 @@ static int i8042_resume(struct platform_device *dev)
 /*
  * Do not bother with restoring state if we haven't suspened yet
  */
-	if (dev->dev.power.power_state.event == PM_EVENT_ON)
+	if (!i8042_suspended)
 		return 0;
 
 	error = i8042_controller_check();
@@ -998,10 +1000,9 @@ static int i8042_resume(struct platform_device *dev)
 	if (i8042_ports[I8042_KBD_PORT_NO].serio)
 		i8042_enable_kbd_port();
 
+	i8042_suspended = false;
 	i8042_interrupt(0, NULL);
 
-	dev->dev.power.power_state = PMSG_ON;
-
 	return 0;
 }
 #endif /* CONFIG_PM */
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 8d2df5d2d5a..fb17573f8f2 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -495,9 +495,9 @@ static ssize_t serio_set_bind_mode(struct device *dev, struct device_attribute *
 
 	retval = count;
 	if (!strncmp(buf, "manual", count)) {
-		serio->manual_bind = 1;
+		serio->manual_bind = true;
 	} else if (!strncmp(buf, "auto", count)) {
-		serio->manual_bind = 0;
+		serio->manual_bind = false;
 	} else {
 		retval = -EINVAL;
 	}
@@ -570,7 +570,7 @@ static void serio_add_port(struct serio *serio)
 			"serio: device_add() failed for %s (%s), error: %d\n",
 			serio->phys, serio->name, error);
 	else {
-		serio->registered = 1;
+		serio->registered = true;
 		error = sysfs_create_group(&serio->dev.kobj, &serio_id_attr_group);
 		if (error)
 			printk(KERN_ERR
@@ -606,7 +606,7 @@ static void serio_destroy_port(struct serio *serio)
 	if (serio->registered) {
 		sysfs_remove_group(&serio->dev.kobj, &serio_id_attr_group);
 		device_del(&serio->dev);
-		serio->registered = 0;
+		serio->registered = false;
 	}
 
 	list_del_init(&serio->node);
@@ -750,9 +750,9 @@ static ssize_t serio_driver_set_bind_mode(struct device_driver *drv, const char
 
 	retval = count;
 	if (!strncmp(buf, "manual", count)) {
-		serio_drv->manual_bind = 1;
+		serio_drv->manual_bind = true;
 	} else if (!strncmp(buf, "auto", count)) {
-		serio_drv->manual_bind = 0;
+		serio_drv->manual_bind = false;
 	} else {
 		retval = -EINVAL;
 	}
@@ -812,7 +812,7 @@ static void serio_attach_driver(struct serio_driver *drv)
 
 int __serio_register_driver(struct serio_driver *drv, struct module *owner, const char *mod_name)
 {
-	int manual_bind = drv->manual_bind;
+	bool manual_bind = drv->manual_bind;
 	int error;
 
 	drv->driver.bus = &serio_bus;
@@ -823,7 +823,7 @@ int __serio_register_driver(struct serio_driver *drv, struct module *owner, cons
 	 * Temporarily disable automatic binding because probing
 	 * takes long time and we are better off doing it in kseriod
 	 */
-	drv->manual_bind = 1;
+	drv->manual_bind = true;
 
 	error = driver_register(&drv->driver);
 	if (error) {
@@ -838,7 +838,7 @@ int __serio_register_driver(struct serio_driver *drv, struct module *owner, cons
 	 * driver to free ports
 	 */
 	if (!manual_bind) {
-		drv->manual_bind = 0;
+		drv->manual_bind = false;
 		error = serio_queue_event(drv, NULL, SERIO_ATTACH_DRIVER);
 		if (error) {
 			driver_unregister(&drv->driver);
@@ -856,7 +856,7 @@ void serio_unregister_driver(struct serio_driver *drv)
 
 	mutex_lock(&serio_mutex);
 
-	drv->manual_bind = 1;	/* so serio_find_driver ignores it */
+	drv->manual_bind = true;	/* so serio_find_driver ignores it */
 	serio_remove_pending_events(drv);
 
 start_over:
@@ -933,11 +933,11 @@ static int serio_uevent(struct device *dev, struct kobj_uevent_env *env)
 #ifdef CONFIG_PM
 static int serio_suspend(struct device *dev, pm_message_t state)
 {
-	if (dev->power.power_state.event != state.event) {
-		if (state.event == PM_EVENT_SUSPEND)
-			serio_cleanup(to_serio_port(dev));
+	struct serio *serio = to_serio_port(dev);
 
-		dev->power.power_state = state;
+	if (!serio->suspended && state.event == PM_EVENT_SUSPEND) {
+		serio_cleanup(serio);
+		serio->suspended = true;
 	}
 
 	return 0;
@@ -945,14 +945,15 @@ static int serio_suspend(struct device *dev, pm_message_t state)
 
 static int serio_resume(struct device *dev)
 {
+	struct serio *serio = to_serio_port(dev);
+
 	/*
 	 * Driver reconnect can take a while, so better let kseriod
 	 * deal with it.
 	 */
-	if (dev->power.power_state.event != PM_EVENT_ON) {
-		dev->power.power_state = PMSG_ON;
-		serio_queue_event(to_serio_port(dev), NULL,
-				  SERIO_RECONNECT_PORT);
+	if (serio->suspended) {
+		serio->suspended = false;
+		serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT);
 	}
 
 	return 0;
diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 0cd825f7363..1bc08541c2b 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -11,6 +11,7 @@
 
 #ifdef __KERNEL__
 #include <asm/io.h>
+#include <linux/types.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/device.h>
@@ -62,7 +63,7 @@ struct gameport_driver {
 
 	struct device_driver driver;
 
-	unsigned int ignore;
+	bool ignore;
 };
 #define to_gameport_driver(d)	container_of(d, struct gameport_driver, driver)
 
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e0417e4d3f1..126d24c9eaa 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -15,6 +15,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -28,7 +29,10 @@ struct serio {
 	char name[32];
 	char phys[32];
 
-	unsigned int manual_bind;
+	bool manual_bind;
+	bool registered;	/* port has been fully registered with driver core */
+	bool suspended;		/* port is suspended */
+
 
 	struct serio_device_id id;
 
@@ -47,7 +51,6 @@ struct serio {
 	struct mutex drv_mutex;		/* protects serio->drv so attributes can pin driver */
 
 	struct device dev;
-	unsigned int registered;	/* port has been fully registered with driver core */
 
 	struct list_head node;
 };
@@ -58,7 +61,7 @@ struct serio_driver {
 	char *description;
 
 	struct serio_device_id *id_table;
-	unsigned int manual_bind;
+	bool manual_bind;
 
 	void (*write_wakeup)(struct serio *);
 	irqreturn_t (*interrupt)(struct serio *, unsigned char, unsigned int);
-- 
cgit v1.2.3-70-g09d2


From 4c25a2c1b90bf785fc2e2f0f0c74a80b3e070d39 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 10 May 2009 17:16:06 +0100
Subject: intel-iommu: Clean up handling of "caching mode" vs. context
 flushing.

It really doesn't make a lot of sense to have some of the logic to
handle caching vs. non-caching mode duplicated in qi_flush_context() and
__iommu_flush_context(), while the return value indicates whether the
caller should take other action which depends on the same thing.

Especially since qi_flush_context() thought it was returning something
entirely different anyway.

This patch makes qi_flush_context() and __iommu_flush_context() both
return void, removes the 'non_present_entry_flush' argument and makes
the only call site which _set_ that argument to 1 do the right thing.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c          | 13 +++---------
 drivers/pci/intel-iommu.c   | 52 ++++++++++++++++++---------------------------
 include/linux/intel-iommu.h |  8 +++----
 3 files changed, 28 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index d3d86b749ee..10a071ba323 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -723,23 +723,16 @@ void qi_global_iec(struct intel_iommu *iommu)
 	qi_submit_sync(&desc, iommu);
 }
 
-int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
-		     u64 type, int non_present_entry_flush)
+void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
+		      u64 type)
 {
 	struct qi_desc desc;
 
-	if (non_present_entry_flush) {
-		if (!cap_caching_mode(iommu->cap))
-			return 1;
-		else
-			did = 0;
-	}
-
 	desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
 			| QI_CC_GRAN(type) | QI_CC_TYPE;
 	desc.high = 0;
 
-	return qi_submit_sync(&desc, iommu);
+	qi_submit_sync(&desc, iommu);
 }
 
 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index d6f4ee50924..9f5d9151edc 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -857,26 +857,13 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
 }
 
 /* return value determine if we need a write buffer flush */
-static int __iommu_flush_context(struct intel_iommu *iommu,
-	u16 did, u16 source_id, u8 function_mask, u64 type,
-	int non_present_entry_flush)
+static void __iommu_flush_context(struct intel_iommu *iommu,
+				  u16 did, u16 source_id, u8 function_mask,
+				  u64 type)
 {
 	u64 val = 0;
 	unsigned long flag;
 
-	/*
-	 * In the non-present entry flush case, if hardware doesn't cache
-	 * non-present entry we do nothing and if hardware cache non-present
-	 * entry, we flush entries of domain 0 (the domain id is used to cache
-	 * any non-present entries)
-	 */
-	if (non_present_entry_flush) {
-		if (!cap_caching_mode(iommu->cap))
-			return 1;
-		else
-			did = 0;
-	}
-
 	switch (type) {
 	case DMA_CCMD_GLOBAL_INVL:
 		val = DMA_CCMD_GLOBAL_INVL;
@@ -901,9 +888,6 @@ static int __iommu_flush_context(struct intel_iommu *iommu,
 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
 
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
-
-	/* flush context entry will implicitly flush write buffer */
-	return 0;
 }
 
 /* return value determine if we need a write buffer flush */
@@ -1428,14 +1412,21 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 	context_set_present(context);
 	domain_flush_cache(domain, context, sizeof(*context));
 
-	/* it's a non-present to present mapping */
-	if (iommu->flush.flush_context(iommu, id,
-		(((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
-		DMA_CCMD_DEVICE_INVL, 1))
-		iommu_flush_write_buffer(iommu);
-	else
+	/*
+	 * It's a non-present to present mapping. If hardware doesn't cache
+	 * non-present entry we only need to flush the write-buffer. If the
+	 * _does_ cache non-present entries, then it does so in the special
+	 * domain #0, which we have to flush:
+	 */
+	if (cap_caching_mode(iommu->cap)) {
+		iommu->flush.flush_context(iommu, 0,
+					   (((u16)bus) << 8) | devfn,
+					   DMA_CCMD_MASK_NOBIT,
+					   DMA_CCMD_DEVICE_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
-
+	} else {
+		iommu_flush_write_buffer(iommu);
+	}
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
@@ -1566,7 +1557,7 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
 
 	clear_context_table(iommu, bus, devfn);
 	iommu->flush.flush_context(iommu, 0, 0, 0,
-					   DMA_CCMD_GLOBAL_INVL, 0);
+					   DMA_CCMD_GLOBAL_INVL);
 	iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 					 DMA_TLB_GLOBAL_FLUSH, 0);
 }
@@ -2104,8 +2095,7 @@ static int __init init_dmars(void)
 
 		iommu_set_root_entry(iommu);
 
-		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
-					   0);
+		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
 					 0);
 		iommu_disable_protect_mem_regions(iommu);
@@ -2721,7 +2711,7 @@ static int init_iommu_hw(void)
 		iommu_set_root_entry(iommu);
 
 		iommu->flush.flush_context(iommu, 0, 0, 0,
-						DMA_CCMD_GLOBAL_INVL, 0);
+						DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 						DMA_TLB_GLOBAL_FLUSH, 0);
 		iommu_disable_protect_mem_regions(iommu);
@@ -2738,7 +2728,7 @@ static void iommu_flush_all(void)
 
 	for_each_active_iommu(iommu, drhd) {
 		iommu->flush.flush_context(iommu, 0, 0, 0,
-						DMA_CCMD_GLOBAL_INVL, 0);
+						DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 						DMA_TLB_GLOBAL_FLUSH, 0);
 	}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 7246971a7fe..f2b94dafbf3 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -281,8 +281,8 @@ struct ir_table {
 #endif
 
 struct iommu_flush {
-	int (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
-		u64 type, int non_present_entry_flush);
+	void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid,
+			      u8 fm, u64 type);
 	int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr,
 		unsigned int size_order, u64 type, int non_present_entry_flush);
 };
@@ -339,8 +339,8 @@ extern void dmar_disable_qi(struct intel_iommu *iommu);
 extern int dmar_reenable_qi(struct intel_iommu *iommu);
 extern void qi_global_iec(struct intel_iommu *iommu);
 
-extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
-			        u8 fm, u64 type, int non_present_entry_flush);
+extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
+			     u8 fm, u64 type);
 extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type,
 			  int non_present_entry_flush);
-- 
cgit v1.2.3-70-g09d2


From 1f0ef2aa18802a8ce7eb5a5164aaaf4d59073801 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 10 May 2009 19:58:49 +0100
Subject: intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing.

As we just did for context cache flushing, clean up the logic around
whether we need to flush the iotlb or just the write-buffer, depending
on caching mode.

Fix the same bug in qi_flush_iotlb() that qi_flush_context() had -- it
isn't supposed to be returning an error; it's supposed to be returning a
flag which triggers a write-buffer flush.

Remove some superfluous conditional write-buffer flushes which could
never have happened because they weren't for non-present-to-present
mapping changes anyway.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c          | 14 ++------
 drivers/pci/intel-iommu.c   | 78 +++++++++++++++++----------------------------
 include/linux/intel-iommu.h |  9 +++---
 3 files changed, 37 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 10a071ba323..df6af0d4ec0 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -735,22 +735,14 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
 	qi_submit_sync(&desc, iommu);
 }
 
-int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
-		   unsigned int size_order, u64 type,
-		   int non_present_entry_flush)
+void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+		    unsigned int size_order, u64 type)
 {
 	u8 dw = 0, dr = 0;
 
 	struct qi_desc desc;
 	int ih = 0;
 
-	if (non_present_entry_flush) {
-		if (!cap_caching_mode(iommu->cap))
-			return 1;
-		else
-			did = 0;
-	}
-
 	if (cap_write_drain(iommu->cap))
 		dw = 1;
 
@@ -762,7 +754,7 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 	desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
 		| QI_IOTLB_AM(size_order);
 
-	return qi_submit_sync(&desc, iommu);
+	qi_submit_sync(&desc, iommu);
 }
 
 /*
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 9f5d9151edc..f47d04aced8 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -891,27 +891,13 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
 }
 
 /* return value determine if we need a write buffer flush */
-static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
-	u64 addr, unsigned int size_order, u64 type,
-	int non_present_entry_flush)
+static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
+				u64 addr, unsigned int size_order, u64 type)
 {
 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
 	u64 val = 0, val_iva = 0;
 	unsigned long flag;
 
-	/*
-	 * In the non-present entry flush case, if hardware doesn't cache
-	 * non-present entry we do nothing and if hardware cache non-present
-	 * entry, we flush entries of domain 0 (the domain id is used to cache
-	 * any non-present entries)
-	 */
-	if (non_present_entry_flush) {
-		if (!cap_caching_mode(iommu->cap))
-			return 1;
-		else
-			did = 0;
-	}
-
 	switch (type) {
 	case DMA_TLB_GLOBAL_FLUSH:
 		/* global flush doesn't need set IVA_REG */
@@ -959,12 +945,10 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
 			(unsigned long long)DMA_TLB_IIRG(type),
 			(unsigned long long)DMA_TLB_IAIG(val));
-	/* flush iotlb entry will implicitly flush write buffer */
-	return 0;
 }
 
-static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
-	u64 addr, unsigned int pages, int non_present_entry_flush)
+static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
+				  u64 addr, unsigned int pages)
 {
 	unsigned int mask;
 
@@ -974,8 +958,7 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 	/* Fallback to domain selective flush if no PSI support */
 	if (!cap_pgsel_inv(iommu->cap))
 		return iommu->flush.flush_iotlb(iommu, did, 0, 0,
-						DMA_TLB_DSI_FLUSH,
-						non_present_entry_flush);
+						DMA_TLB_DSI_FLUSH);
 
 	/*
 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
@@ -985,11 +968,10 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 	/* Fallback to domain selective flush if size is too big */
 	if (mask > cap_max_amask_val(iommu->cap))
 		return iommu->flush.flush_iotlb(iommu, did, 0, 0,
-			DMA_TLB_DSI_FLUSH, non_present_entry_flush);
+						DMA_TLB_DSI_FLUSH);
 
 	return iommu->flush.flush_iotlb(iommu, did, addr, mask,
-					DMA_TLB_PSI_FLUSH,
-					non_present_entry_flush);
+					DMA_TLB_PSI_FLUSH);
 }
 
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1423,7 +1405,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 					   (((u16)bus) << 8) | devfn,
 					   DMA_CCMD_MASK_NOBIT,
 					   DMA_CCMD_DEVICE_INVL);
-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
 	} else {
 		iommu_flush_write_buffer(iommu);
 	}
@@ -1558,8 +1540,7 @@ static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
 	clear_context_table(iommu, bus, devfn);
 	iommu->flush.flush_context(iommu, 0, 0, 0,
 					   DMA_CCMD_GLOBAL_INVL);
-	iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-					 DMA_TLB_GLOBAL_FLUSH, 0);
+	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
 }
 
 static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -2096,8 +2077,7 @@ static int __init init_dmars(void)
 		iommu_set_root_entry(iommu);
 
 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
-					 0);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
 		iommu_disable_protect_mem_regions(iommu);
 
 		ret = iommu_enable_translation(iommu);
@@ -2244,10 +2224,11 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 	if (ret)
 		goto error;
 
-	/* it's a non-present to present mapping */
-	ret = iommu_flush_iotlb_psi(iommu, domain->id,
-			start_paddr, size >> VTD_PAGE_SHIFT, 1);
-	if (ret)
+	/* it's a non-present to present mapping. Only flush if caching mode */
+	if (cap_caching_mode(iommu->cap))
+		iommu_flush_iotlb_psi(iommu, 0, start_paddr,
+				      size >> VTD_PAGE_SHIFT);
+	else
 		iommu_flush_write_buffer(iommu);
 
 	return start_paddr + ((u64)paddr & (~PAGE_MASK));
@@ -2283,7 +2264,7 @@ static void flush_unmaps(void)
 
 		if (deferred_flush[i].next) {
 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-						 DMA_TLB_GLOBAL_FLUSH, 0);
+						 DMA_TLB_GLOBAL_FLUSH);
 			for (j = 0; j < deferred_flush[i].next; j++) {
 				__free_iova(&deferred_flush[i].domain[j]->iovad,
 						deferred_flush[i].iova[j]);
@@ -2362,9 +2343,8 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 	/* free page tables */
 	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
 	if (intel_iommu_strict) {
-		if (iommu_flush_iotlb_psi(iommu,
-			domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
-			iommu_flush_write_buffer(iommu);
+		iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
+				      size >> VTD_PAGE_SHIFT);
 		/* free iova */
 		__free_iova(&domain->iovad, iova);
 	} else {
@@ -2455,9 +2435,8 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	/* free page tables */
 	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
 
-	if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
-			size >> VTD_PAGE_SHIFT, 0))
-		iommu_flush_write_buffer(iommu);
+	iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
+			      size >> VTD_PAGE_SHIFT);
 
 	/* free iova */
 	__free_iova(&domain->iovad, iova);
@@ -2549,10 +2528,13 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 		offset += size;
 	}
 
-	/* it's a non-present to present mapping */
-	if (iommu_flush_iotlb_psi(iommu, domain->id,
-			start_addr, offset >> VTD_PAGE_SHIFT, 1))
+	/* it's a non-present to present mapping. Only flush if caching mode */
+	if (cap_caching_mode(iommu->cap))
+		iommu_flush_iotlb_psi(iommu, 0, start_addr,
+				      offset >> VTD_PAGE_SHIFT);
+	else
 		iommu_flush_write_buffer(iommu);
+
 	return nelems;
 }
 
@@ -2711,9 +2693,9 @@ static int init_iommu_hw(void)
 		iommu_set_root_entry(iommu);
 
 		iommu->flush.flush_context(iommu, 0, 0, 0,
-						DMA_CCMD_GLOBAL_INVL);
+					   DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-						DMA_TLB_GLOBAL_FLUSH, 0);
+					 DMA_TLB_GLOBAL_FLUSH);
 		iommu_disable_protect_mem_regions(iommu);
 		iommu_enable_translation(iommu);
 	}
@@ -2728,9 +2710,9 @@ static void iommu_flush_all(void)
 
 	for_each_active_iommu(iommu, drhd) {
 		iommu->flush.flush_context(iommu, 0, 0, 0,
-						DMA_CCMD_GLOBAL_INVL);
+					   DMA_CCMD_GLOBAL_INVL);
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-						DMA_TLB_GLOBAL_FLUSH, 0);
+					 DMA_TLB_GLOBAL_FLUSH);
 	}
 }
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index f2b94dafbf3..29e05a034c0 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -283,8 +283,8 @@ struct ir_table {
 struct iommu_flush {
 	void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid,
 			      u8 fm, u64 type);
-	int (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr,
-		unsigned int size_order, u64 type, int non_present_entry_flush);
+	void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr,
+			    unsigned int size_order, u64 type);
 };
 
 enum {
@@ -341,9 +341,8 @@ extern void qi_global_iec(struct intel_iommu *iommu);
 
 extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
 			     u8 fm, u64 type);
-extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
-			  unsigned int size_order, u64 type,
-			  int non_present_entry_flush);
+extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+			  unsigned int size_order, u64 type);
 
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-- 
cgit v1.2.3-70-g09d2


From 5e751e992f3fb08ba35e1ca8095ec8fbf9eda523 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 May 2009 13:55:22 +0100
Subject: CRED: Rename cred_exec_mutex to reflect that it's a guard against
 ptrace

Rename cred_exec_mutex to reflect that it's a guard against foreign
intervention on a process's credential state, such as is made by ptrace().  The
attachment of a debugger to a process affects execve()'s calculation of the new
credential state - _and_ also setprocattr()'s calculation of that state.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/compat.c               |  6 +++---
 fs/exec.c                 | 10 +++++-----
 include/linux/init_task.h |  4 ++--
 include/linux/sched.h     |  4 +++-
 kernel/cred.c             |  4 ++--
 kernel/ptrace.c           |  9 +++++----
 6 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/compat.c b/fs/compat.c
index 681ed81e6be..bb2a9b2e817 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1488,7 +1488,7 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
 	if (retval < 0)
 		goto out_free;
 	current->in_execve = 1;
@@ -1550,7 +1550,7 @@ int compat_do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1573,7 +1573,7 @@ out_unmark:
 
 out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/fs/exec.c b/fs/exec.c
index 639177b0eea..998e856c307 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1045,7 +1045,7 @@ void install_exec_creds(struct linux_binprm *bprm)
 	commit_creds(bprm->cred);
 	bprm->cred = NULL;
 
-	/* cred_exec_mutex must be held at least to this point to prevent
+	/* cred_guard_mutex must be held at least to this point to prevent
 	 * ptrace_attach() from altering our determination of the task's
 	 * credentials; any time after this it may be unlocked */
 
@@ -1055,7 +1055,7 @@ EXPORT_SYMBOL(install_exec_creds);
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold current->cred_exec_mutex to protect against
+ * - the caller must hold current->cred_guard_mutex to protect against
  *   PTRACE_ATTACH
  */
 int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1297,7 +1297,7 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&current->cred_guard_mutex);
 	if (retval < 0)
 		goto out_free;
 	current->in_execve = 1;
@@ -1360,7 +1360,7 @@ int do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1383,7 +1383,7 @@ out_unmark:
 
 out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_exec_mutex);
+	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d2641..7f54ba94242 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -145,8 +145,8 @@ extern struct cred init_cred;
 	.group_leader	= &tsk,						\
 	.real_cred	= &init_cred,					\
 	.cred		= &init_cred,					\
-	.cred_exec_mutex =						\
-		 __MUTEX_INITIALIZER(tsk.cred_exec_mutex),		\
+	.cred_guard_mutex =						\
+		 __MUTEX_INITIALIZER(tsk.cred_guard_mutex),		\
 	.comm		= "swapper",					\
 	.thread		= INIT_THREAD,					\
 	.fs		= &init_fs,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3fa82b353c9..5932ace2240 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1247,7 +1247,9 @@ struct task_struct {
 					 * credentials (COW) */
 	const struct cred *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct mutex cred_exec_mutex;	/* execve vs ptrace cred calculation mutex */
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a039189d70..1bb4d7e5d61 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -167,7 +167,7 @@ EXPORT_SYMBOL(prepare_creds);
 
 /*
  * Prepare credentials for current to perform an execve()
- * - The caller must hold current->cred_exec_mutex
+ * - The caller must hold current->cred_guard_mutex
  */
 struct cred *prepare_exec_creds(void)
 {
@@ -276,7 +276,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	struct cred *new;
 	int ret;
 
-	mutex_init(&p->cred_exec_mutex);
+	mutex_init(&p->cred_guard_mutex);
 
 	if (
 #ifdef CONFIG_KEYS
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0692ab5a0d6..27ac80298bf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -185,10 +185,11 @@ int ptrace_attach(struct task_struct *task)
 	if (same_thread_group(task, current))
 		goto out;
 
-	/* Protect exec's credential calculations against our interference;
-	 * SUID, SGID and LSM creds get determined differently under ptrace.
+	/* Protect the target's credential calculations against our
+	 * interference; SUID, SGID and LSM creds get determined differently
+	 * under ptrace.
 	 */
-	retval = mutex_lock_interruptible(&task->cred_exec_mutex);
+	retval = mutex_lock_interruptible(&task->cred_guard_mutex);
 	if (retval  < 0)
 		goto out;
 
@@ -232,7 +233,7 @@ repeat:
 bad:
 	write_unlock_irqrestore(&tasklist_lock, flags);
 	task_unlock(task);
-	mutex_unlock(&task->cred_exec_mutex);
+	mutex_unlock(&task->cred_guard_mutex);
 out:
 	return retval;
 }
-- 
cgit v1.2.3-70-g09d2


From 7303f240981888884412a97ac742772527356880 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 11 May 2009 09:59:34 +0300
Subject: slob: use PG_slab for identifying SLOB pages

For the sake of consistency.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Acked-by: Matt Mackall <mpm@selenic.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/page-flags.h | 2 --
 mm/slob.c                  | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 62214c7d2d9..f036dfbdad5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -120,7 +120,6 @@ enum pageflags {
 	PG_savepinned = PG_dirty,
 
 	/* SLOB */
-	PG_slob_page = PG_active,
 	PG_slob_free = PG_private,
 
 	/* SLUB */
@@ -203,7 +202,6 @@ PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
-__PAGEFLAG(SlobPage, slob_page)
 __PAGEFLAG(SlobFree, slob_free)
 
 __PAGEFLAG(SlubFrozen, slub_frozen)
diff --git a/mm/slob.c b/mm/slob.c
index a2d4ab32198..aad9dad2e82 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -131,17 +131,17 @@ static LIST_HEAD(free_slob_large);
  */
 static inline int is_slob_page(struct slob_page *sp)
 {
-	return PageSlobPage((struct page *)sp);
+	return PageSlab((struct page *)sp);
 }
 
 static inline void set_slob_page(struct slob_page *sp)
 {
-	__SetPageSlobPage((struct page *)sp);
+	__SetPageSlab((struct page *)sp);
 }
 
 static inline void clear_slob_page(struct slob_page *sp)
 {
-	__ClearPageSlobPage((struct page *)sp);
+	__ClearPageSlab((struct page *)sp);
 }
 
 static inline struct slob_page *slob_page(const void *addr)
-- 
cgit v1.2.3-70-g09d2


From c3a4d78c580de4edc9ef0f7c59812fb02ceb037f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:37 +0900
Subject: block: add rq->resid_len

rq->data_len served two purposes - the length of data buffer on issue
and the residual count on completion.  This duality creates some
headaches.

First of all, block layer and low level drivers can't really determine
what rq->data_len contains while a request is executing.  It could be
the total request length or it coulde be anything else one of the
lower layers is using to keep track of residual count.  This
complicates things because blk_rq_bytes() and thus
[__]blk_end_request_all() relies on rq->data_len for PC commands.
Drivers which want to report residual count should first cache the
total request length, update rq->data_len and then complete the
request with the cached data length.

Secondly, it makes requests default to reporting full residual count,
ie. reporting that no data transfer occurred.  The residual count is
an exception not the norm; however, the driver should clear
rq->data_len to zero to signify the normal cases while leaving it
alone means no data transfer occurred at all.  This reverse default
behavior complicates code unnecessarily and renders block PC on some
drivers (ide-tape/floppy) unuseable.

This patch adds rq->resid_len which is used only for residual count.

While at it, remove now unnecessasry blk_rq_bytes() caching in
ide_pc_intr() as rq->data_len is not changed anymore.

Boaz	: spotted missing conversion in osd
Sergei	: spotted too early conversion to blk_rq_bytes() in ide-tape

[ Impact: cleanup residual count handling, report 0 resid by default ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Mike Miller <mike.miller@hp.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Doug Gilbert <dgilbert@interlog.com>
Cc: Mike Miller <mike.miller@hp.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Darrick J. Wong <djwong@us.ibm.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/bsg.c                              |  8 +++----
 block/scsi_ioctl.c                       |  2 +-
 drivers/block/cciss.c                    | 13 ++++-------
 drivers/block/ub.c                       |  6 ++---
 drivers/ide/ide-atapi.c                  |  9 +-------
 drivers/ide/ide-cd.c                     | 13 +++++------
 drivers/ide/ide-tape.c                   |  4 ++--
 drivers/message/fusion/mptsas.c          |  3 +--
 drivers/scsi/libsas/sas_expander.c       |  6 +----
 drivers/scsi/libsas/sas_host_smp.c       | 38 +++++++++++++++++---------------
 drivers/scsi/mpt2sas/mpt2sas_transport.c |  4 +---
 drivers/scsi/scsi_lib.c                  | 24 ++++++++++----------
 drivers/scsi/sg.c                        |  2 +-
 drivers/scsi/st.c                        |  2 +-
 fs/exofs/osd.c                           |  4 ++--
 include/linux/blkdev.h                   |  1 +
 16 files changed, 59 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/block/bsg.c b/block/bsg.c
index 206060e795d..2d746e34f4c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -445,14 +445,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 	}
 
 	if (rq->next_rq) {
-		hdr->dout_resid = rq->data_len;
-		hdr->din_resid = rq->next_rq->data_len;
+		hdr->dout_resid = rq->resid_len;
+		hdr->din_resid = rq->next_rq->resid_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = rq->data_len;
+		hdr->din_resid = rq->resid_len;
 	else
-		hdr->dout_resid = rq->data_len;
+		hdr->dout_resid = rq->resid_len;
 
 	/*
 	 * If the request generated a negative error number, return it
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 58cf4560f74..a9670dd4b5d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -230,7 +230,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 	hdr->info = 0;
 	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
 		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = rq->data_len;
+	hdr->resid = rq->resid_len;
 	hdr->sb_len_wr = 0;
 
 	if (rq->sense_len && hdr->sbp) {
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4d4d5e0d3fa..f22d4932433 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1299,7 +1299,6 @@ static void cciss_softirq_done(struct request *rq)
 {
 	CommandList_struct *cmd = rq->completion_data;
 	ctlr_info_t *h = hba[cmd->ctlr];
-	unsigned int nr_bytes;
 	unsigned long flags;
 	u64bit temp64;
 	int i, ddir;
@@ -1321,15 +1320,11 @@ static void cciss_softirq_done(struct request *rq)
 	printk("Done with %p\n", rq);
 #endif				/* CCISS_DEBUG */
 
-	/*
-	 * Store the full size and set the residual count for pc requests
-	 */
-	nr_bytes = blk_rq_bytes(rq);
+	/* set the residual count for pc requests */
 	if (blk_pc_request(rq))
-		rq->data_len = cmd->err_info->ResidualCnt;
+		rq->resid_len = cmd->err_info->ResidualCnt;
 
-	if (blk_end_request(rq, (rq->errors == 0) ? 0 : -EIO, nr_bytes))
-		BUG();
+	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
 
 	spin_lock_irqsave(&h->lock, flags);
 	cmd_free(h, cmd, 1);
@@ -2691,7 +2686,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 			printk(KERN_WARNING "cciss: cmd %p has"
 			       " completed with data underrun "
 			       "reported\n", cmd);
-			cmd->rq->data_len = cmd->err_info->ResidualCnt;
+			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
 		}
 		break;
 	case CMD_DATA_OVERRUN:
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 689cd27ac89..8c2cc71327e 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -783,10 +783,8 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	if (cmd->error == 0) {
 		if (blk_pc_request(rq)) {
-			if (cmd->act_len >= rq->data_len)
-				rq->data_len = 0;
-			else
-				rq->data_len -= cmd->act_len;
+			if (cmd->act_len < rq->data_len)
+				rq->resid_len = rq->data_len - cmd->act_len;
 			scsi_status = 0;
 		} else {
 			if (cmd->act_len != cmd->len) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index afe5a432387..e4a02a05fc8 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -367,7 +367,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	/* No more interrupts */
 	if ((stat & ATA_DRQ) == 0) {
 		int uptodate, error;
-		unsigned int done;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
 			  pc->xferred);
@@ -406,12 +405,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
 			dsc = 1;
 
-		/*
-		 * ->pc_callback() might change rq->data_len for
-		 * residual count, cache total length.
-		 */
-		done = blk_rq_bytes(rq);
-
 		/* Command finished - Call the callback function */
 		uptodate = drive->pc_callback(drive, dsc);
 
@@ -431,7 +424,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			error = uptodate ? 0 : -EIO;
 		}
 
-		ide_complete_rq(drive, error, done);
+		ide_complete_rq(drive, error, blk_rq_bytes(rq));
 		return ide_stopped;
 	}
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 673628790f1..8bbe222c5e4 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -519,7 +519,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
 
 		if (buffer)
-			*bufflen = rq->data_len;
+			*bufflen = rq->resid_len;
 
 		flags = rq->cmd_flags;
 		blk_put_request(rq);
@@ -707,11 +707,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
 out_end:
 	if (blk_pc_request(rq) && rc == 0) {
-		unsigned int dlen = rq->data_len;
-
-		rq->data_len = 0;
-
-		if (blk_end_request(rq, 0, dlen))
+		if (blk_end_request(rq, 0, rq->data_len))
 			BUG();
 
 		hwif->rq = NULL;
@@ -740,9 +736,10 @@ out_end:
 			nsectors = 1;
 
 		if (blk_fs_request(rq) == 0) {
-			rq->data_len -= (cmd->nbytes - cmd->nleft);
+			rq->resid_len = rq->data_len -
+				(cmd->nbytes - cmd->nleft);
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
-				rq->data_len += cmd->last_xfer_len;
+				rq->resid_len += cmd->last_xfer_len;
 		}
 
 		ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7149224d1fe..65c5b705883 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -380,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->data_len -= blocks * tape->blk_size;
+		rq->resid_len = rq->data_len - blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
@@ -903,7 +903,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
 	/* calculate the number of transferred bytes and update buffer state */
-	size -= rq->data_len;
+	size -= rq->resid_len;
 	tape->cur = tape->buf;
 	if (cmd == REQ_IDETAPE_READ)
 		tape->valid = size;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index a9019f081b9..5d5f34715de 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1357,8 +1357,7 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
 		memcpy(req->sense, smprep, sizeof(*smprep));
 		req->sense_len = sizeof(*smprep);
-		req->data_len = 0;
-		rsp->data_len -= smprep->ResponseDataLength;
+		rsp->resid_len = rsp->data_len - smprep->ResponseDataLength;
 	} else {
 		printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n",
 		    ioc->name, __func__);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 3da02e43678..6605ec905cc 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1936,12 +1936,8 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 			       bio_data(rsp->bio), rsp->data_len);
 	if (ret > 0) {
 		/* positive number is the untransferred residual */
-		rsp->data_len = ret;
-		req->data_len = 0;
+		rsp->resid_len = ret;
 		ret = 0;
-	} else if (ret == 0) {
-		rsp->data_len = 0;
-		req->data_len = 0;
 	}
 
 	return ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index d110a366c48..89952edd0be 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -134,7 +134,7 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 {
 	u8 *req_data = NULL, *resp_data = NULL, *buf;
 	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
-	int error = -EINVAL, resp_data_len = rsp->data_len;
+	int error = -EINVAL;
 
 	/* eight is the minimum size for request and response frames */
 	if (req->data_len < 8 || rsp->data_len < 8)
@@ -176,17 +176,20 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	resp_data[1] = req_data[1];
 	resp_data[2] = SMP_RESP_FUNC_UNK;
 
+	req->resid_len = req->data_len;
+	rsp->resid_len = rsp->data_len;
+
 	switch (req_data[1]) {
 	case SMP_REPORT_GENERAL:
-		req->data_len -= 8;
-		resp_data_len -= 32;
+		req->resid_len -= 8;
+		rsp->resid_len -= 32;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		resp_data[9] = sas_ha->num_phys;
 		break;
 
 	case SMP_REPORT_MANUF_INFO:
-		req->data_len -= 8;
-		resp_data_len -= 64;
+		req->resid_len -= 8;
+		rsp->resid_len -= 64;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		memcpy(resp_data + 12, shost->hostt->name,
 		       SAS_EXPANDER_VENDOR_ID_LEN);
@@ -199,13 +202,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_DISCOVER:
-		req->data_len -= 16;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 16;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 56;
+		rsp->resid_len -= 56;
 		sas_host_smp_discover(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -215,13 +218,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_REPORT_PHY_SATA:
-		req->data_len -= 16;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 16;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 60;
+		rsp->resid_len -= 60;
 		sas_report_phy_sata(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -238,13 +241,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_PHY_CONTROL:
-		req->data_len -= 44;
-		if ((int)req->data_len < 0) {
-			req->data_len = 0;
+		req->resid_len -= 44;
+		if ((int)req->resid_len < 0) {
+			req->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		resp_data_len -= 8;
+		rsp->resid_len -= 8;
 		sas_phy_control(sas_ha, req_data[9], req_data[10],
 				req_data[32] >> 4, req_data[33] >> 4,
 				resp_data);
@@ -265,7 +268,6 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 	flush_kernel_dcache_page(bio_page(rsp->bio));
 	kunmap_atomic(buf - bio_offset(rsp->bio), KM_USER0);
 	local_irq_enable();
-	rsp->data_len = resp_data_len;
 
  out:
 	kfree(req_data);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index e03dc0b1e1a..53759c566bf 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1170,9 +1170,7 @@ transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
 		memcpy(req->sense, mpi_reply, sizeof(*mpi_reply));
 		req->sense_len = sizeof(*mpi_reply);
-		req->data_len = 0;
-		rsp->data_len -= mpi_reply->ResponseDataLength;
-
+		rsp->resid_len = rsp->data_len - mpi_reply->ResponseDataLength;
 	} else {
 		dtransportprintk(ioc, printk(MPT2SAS_DEBUG_FMT
 		    "%s - no reply\n", ioc->name, __func__));
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aa9fc572e45..7d49ef589f3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -240,11 +240,11 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	 * is invalid.  Prevent the garbage from being misinterpreted
 	 * and prevent security leaks by zeroing out the excess data.
 	 */
-	if (unlikely(req->data_len > 0 && req->data_len <= bufflen))
-		memset(buffer + (bufflen - req->data_len), 0, req->data_len);
+	if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen))
+		memset(buffer + (bufflen - req->resid_len), 0, req->resid_len);
 
 	if (resid)
-		*resid = req->data_len;
+		*resid = req->resid_len;
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -549,7 +549,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 		int leftover = (req->hard_nr_sectors << 9);
 
 		if (blk_pc_request(req))
-			leftover = req->data_len;
+			leftover = req->resid_len;
 
 		/* kill remainder if no retrys */
 		if (error && scsi_noretry_cmd(cmd))
@@ -673,11 +673,11 @@ void scsi_release_buffers(struct scsi_cmnd *cmd)
 EXPORT_SYMBOL(scsi_release_buffers);
 
 /*
- * Bidi commands Must be complete as a whole, both sides at once.
- * If part of the bytes were written and lld returned
- * scsi_in()->resid and/or scsi_out()->resid this information will be left
- * in req->data_len and req->next_rq->data_len. The upper-layer driver can
- * decide what to do with this information.
+ * Bidi commands Must be complete as a whole, both sides at once.  If
+ * part of the bytes were written and lld returned scsi_in()->resid
+ * and/or scsi_out()->resid this information will be left in
+ * req->resid_len and req->next_rq->resid_len. The upper-layer driver
+ * can decide what to do with this information.
  */
 static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
 {
@@ -685,8 +685,8 @@ static void scsi_end_bidi_request(struct scsi_cmnd *cmd)
 	unsigned int dlen = req->data_len;
 	unsigned int next_dlen = req->next_rq->data_len;
 
-	req->data_len = scsi_out(cmd)->resid;
-	req->next_rq->data_len = scsi_in(cmd)->resid;
+	req->resid_len = scsi_out(cmd)->resid;
+	req->next_rq->resid_len = scsi_in(cmd)->resid;
 
 	/* The req and req->next_rq have not been completed */
 	BUG_ON(blk_end_bidi_request(req, 0, dlen, next_dlen));
@@ -778,7 +778,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_end_bidi_request(cmd);
 			return;
 		}
-		req->data_len = scsi_get_resid(cmd);
+		req->resid_len = scsi_get_resid(cmd);
 	}
 
 	BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82312df9b0b..dec4c70677d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1260,7 +1260,7 @@ static void sg_rq_end_io(struct request *rq, int uptodate)
 
 	sense = rq->sense;
 	result = rq->errors;
-	resid = rq->data_len;
+	resid = rq->resid_len;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n",
 		sdp->disk->disk_name, srp->header.pack_id, result));
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index eb24efea8f1..8681b708344 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -463,7 +463,7 @@ static void st_scsi_execute_end(struct request *req, int uptodate)
 	struct scsi_tape *STp = SRpnt->stp;
 
 	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
-	STp->buffer->cmdstat.residual = req->data_len;
+	STp->buffer->cmdstat.residual = req->resid_len;
 
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b249ae97fb1..06ca92672eb 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -50,10 +50,10 @@ int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
 
 	/* FIXME: should be include in osd_sense_info */
 	if (in_resid)
-		*in_resid = or->in.req ? or->in.req->data_len : 0;
+		*in_resid = or->in.req ? or->in.req->resid_len : 0;
 
 	if (out_resid)
-		*out_resid = or->out.req ? or->out.req->data_len : 0;
+		*out_resid = or->out.req ? or->out.req->resid_len : 0;
 
 	return ret;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3a5b1bd6582..6a967cad89f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -229,6 +229,7 @@ struct request {
 	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
+	unsigned int resid_len;	/* residual count */
 	void *sense;
 
 	unsigned long deadline;
-- 
cgit v1.2.3-70-g09d2


From 5b93629b4509c03ffa87a9316412fedf6f58cb37 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:38 +0900
Subject: block: implement blk_rq_pos/[cur_]sectors() and convert obvious ones

Implement accessors - blk_rq_pos(), blk_rq_sectors() and
blk_rq_cur_sectors() which return rq->hard_sector, rq->hard_nr_sectors
and rq->hard_cur_sectors respectively and convert direct references of
the said fields to the accessors.

This is in preparation of request data length handling cleanup.

Geert	: suggested adding const to struct request * parameter to accessors
Sergei	: spotted error in patch description

[ Impact: cleanup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
Tested-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Ackec-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c             |  2 +-
 block/blk-core.c                |  2 +-
 block/cfq-iosched.c             |  2 +-
 drivers/block/ps3disk.c         |  2 +-
 drivers/block/viodasd.c         |  6 +++---
 drivers/block/xsysace.c         | 10 +++++-----
 drivers/ide/ide-cd.c            |  8 ++++----
 drivers/ide/ide-io.c            |  4 ++--
 drivers/message/i2o/i2o_block.c |  2 +-
 drivers/scsi/scsi_lib.c         |  2 +-
 include/linux/blkdev.h          | 23 ++++++++++++++++++++---
 kernel/trace/blktrace.c         |  4 ++--
 12 files changed, 42 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index c8d087655ef..c167de5b9ef 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -163,7 +163,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	 * For an empty barrier, there's no actual BAR request, which
 	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
 	 */
-	if (!rq->hard_nr_sectors) {
+	if (!blk_rq_sectors(rq)) {
 		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 				QUEUE_ORDERED_DO_POSTFLUSH);
 		/*
diff --git a/block/blk-core.c b/block/blk-core.c
index 394c5bd8127..895e55b74a4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1683,7 +1683,7 @@ static void blk_account_io_done(struct request *req)
 unsigned int blk_rq_bytes(struct request *rq)
 {
 	if (blk_fs_request(rq))
-		return rq->hard_nr_sectors << 9;
+		return blk_rq_sectors(rq) << 9;
 
 	return rq->data_len;
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index def0c698a4b..575083a9ffe 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -760,7 +760,7 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
 						cfqd->rq_in_driver);
 
-	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
+	cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
 }
 
 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index f6586e4d351..c2388673684 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -136,7 +136,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	dev_dbg(&dev->sbd.core,
 		"%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n",
 		__func__, __LINE__, op, n, req->nr_sectors,
-		req->hard_nr_sectors);
+		blk_rq_sectors(req));
 #endif
 
 	start_sector = req->sector * priv->blocking_factor;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index ecccf65dce2..e821eed7132 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -368,12 +368,12 @@ static void do_viodasd_request(struct request_queue *q)
 		blkdev_dequeue_request(req);
 		/* check that request contains a valid command */
 		if (!blk_fs_request(req)) {
-			viodasd_end_request(req, -EIO, req->hard_nr_sectors);
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 			continue;
 		}
 		/* Try sending the request */
 		if (send_request(req) != 0)
-			viodasd_end_request(req, -EIO, req->hard_nr_sectors);
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 	}
 }
 
@@ -590,7 +590,7 @@ static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
 		err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
 		printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n",
 				event->xRc, bevent->sub_result, err->msg);
-		num_sect = req->hard_nr_sectors;
+		num_sect = blk_rq_sectors(req);
 	}
 	qlock = req->q->queue_lock;
 	spin_lock_irqsave(qlock, irq_flags);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index b1e1d7e5ab1..5722931d14c 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -645,8 +645,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
-			"request: sec=%llx hcnt=%lx, ccnt=%x, dir=%i\n",
-			(unsigned long long) req->sector, req->hard_nr_sectors,
+			"request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
+			(unsigned long long) req->sector, blk_rq_sectors(req),
 			req->current_nr_sectors, rq_data_dir(req));
 
 		ace->req = req;
@@ -654,7 +654,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR;
 		ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF);
 
-		count = req->hard_nr_sectors;
+		count = blk_rq_sectors(req);
 		if (rq_data_dir(req)) {
 			/* Kick off write request */
 			dev_dbg(ace->dev, "write data\n");
@@ -719,8 +719,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		/* bio finished; is there another one? */
 		if (__blk_end_request(ace->req, 0,
 					blk_rq_cur_bytes(ace->req))) {
-			/* dev_dbg(ace->dev, "next block; h=%li c=%i\n",
-			 *      ace->req->hard_nr_sectors,
+			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
+			 *      blk_rq_sectors(ace->req),
 			 *      ace->req->current_nr_sectors);
 			 */
 			ace->data_ptr = ace->req->buffer;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 8bbe222c5e4..182320dd6ea 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -730,7 +730,7 @@ out_end:
 		if (blk_pc_request(rq))
 			nsectors = (rq->data_len + 511) >> 9;
 		else
-			nsectors = rq->hard_nr_sectors;
+			nsectors = blk_rq_sectors(rq);
 
 		if (nsectors == 0)
 			nsectors = 1;
@@ -875,7 +875,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 
 	return ide_issue_pc(drive, &cmd);
 out_end:
-	nsectors = rq->hard_nr_sectors;
+	nsectors = blk_rq_sectors(rq);
 
 	if (nsectors == 0)
 		nsectors = 1;
@@ -1359,8 +1359,8 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 {
 	int hard_sect = queue_hardsect_size(q);
-	long block = (long)rq->hard_sector / (hard_sect >> 9);
-	unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9);
+	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
+	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
 
 	memset(rq->cmd, 0, BLK_MAX_CDB);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a0309ea661a..df23bcbd94b 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -118,7 +118,7 @@ unsigned int ide_rq_bytes(struct request *rq)
 	if (blk_pc_request(rq))
 		return rq->data_len;
 	else
-		return rq->hard_cur_sectors << 9;
+		return blk_rq_cur_sectors(rq) << 9;
 }
 EXPORT_SYMBOL_GPL(ide_rq_bytes);
 
@@ -133,7 +133,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 	 * and complete the whole request right now
 	 */
 	if (blk_noretry_request(rq) && error <= 0)
-		nr_bytes = rq->hard_nr_sectors << 9;
+		nr_bytes = blk_rq_sectors(rq) << 9;
 
 	rc = ide_end_rq(drive, rq, error, nr_bytes);
 	if (rc == 0)
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 221317e6a00..56e60f0d531 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -427,7 +427,7 @@ static void i2o_block_end_request(struct request *req, int error,
 	unsigned long flags;
 
 	if (blk_end_request(req, error, nr_bytes)) {
-		int leftover = (req->hard_nr_sectors << KERNEL_SECTOR_SHIFT);
+		int leftover = (blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
 
 		if (blk_pc_request(req))
 			leftover = req->data_len;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7d49ef589f3..9ff0ca9988a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -546,7 +546,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
 	 * to queue the remainder of them.
 	 */
 	if (blk_end_request(req, error, bytes)) {
-		int leftover = (req->hard_nr_sectors << 9);
+		int leftover = blk_rq_sectors(req) << 9;
 
 		if (blk_pc_request(req))
 			leftover = req->resid_len;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6a967cad89f..4e5f8559872 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -832,13 +832,30 @@ static inline void blk_run_address_space(struct address_space *mapping)
 extern void blkdev_dequeue_request(struct request *req);
 
 /*
- * blk_end_request() takes bytes instead of sectors as a complete size.
- * blk_rq_bytes() returns bytes left to complete in the entire request.
- * blk_rq_cur_bytes() returns bytes left to complete in the current segment.
+ * blk_rq_pos()		: the current sector
+ * blk_rq_bytes()	: bytes left in the entire request
+ * blk_rq_cur_bytes()	: bytes left in the current segment
+ * blk_rq_sectors()	: sectors left in the entire request
+ * blk_rq_cur_sectors()	: sectors left in the current segment
  */
+static inline sector_t blk_rq_pos(const struct request *rq)
+{
+	return rq->hard_sector;
+}
+
 extern unsigned int blk_rq_bytes(struct request *rq);
 extern unsigned int blk_rq_cur_bytes(struct request *rq);
 
+static inline unsigned int blk_rq_sectors(const struct request *rq)
+{
+	return rq->hard_nr_sectors;
+}
+
+static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
+{
+	return rq->hard_cur_sectors;
+}
+
 /*
  * Request completion related functions.
  *
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d1f0b..42f1c11e754 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -646,7 +646,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 				rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
 				rw, what, rq->errors, 0, NULL);
 	}
 }
@@ -857,7 +857,7 @@ void blk_add_driver_data(struct request_queue *q,
 		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
 				rq->errors, len, data);
 	else
-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
 				0, BLK_TA_DRV_DATA, rq->errors, len, data);
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
-- 
cgit v1.2.3-70-g09d2


From 83096ebf1263b2c1ee5e653ba37d993d02e3eb7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:39 +0900
Subject: block: convert to pos and nr_sectors accessors

With recent cleanups, there is no place where low level driver
directly manipulates request fields.  This means that the 'hard'
request fields always equal the !hard fields.  Convert all
rq->sectors, nr_sectors and current_nr_sectors references to
accessors.

While at it, drop superflous blk_rq_pos() < 0 test in swim.c.

[ Impact: use pos and nr_sectors accessors ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Tested-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Acked-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Acked-by: Mike Miller <mike.miller@hp.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Eric Moore <Eric.Moore@lsi.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Paul Clements <paul.clements@steeleye.com>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Dario Ballabio <ballabio_dario@emc.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: unsik Kim <donari75@gmail.com>
Cc: Laurent Vivier <Laurent@lvivier.info>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/um/drivers/ubd_kern.c          |  2 +-
 block/as-iosched.c                  | 18 +++++++------
 block/blk-barrier.c                 |  2 +-
 block/blk-core.c                    | 17 ++++++------
 block/blk-merge.c                   | 10 +++----
 block/cfq-iosched.c                 | 18 ++++++-------
 block/deadline-iosched.c            |  2 +-
 block/elevator.c                    | 22 +++++++--------
 drivers/block/DAC960.c              |  6 ++---
 drivers/block/amiflop.c             |  6 ++---
 drivers/block/ataflop.c             | 10 +++----
 drivers/block/cciss.c               | 22 +++++++--------
 drivers/block/cpqarray.c            |  9 ++++---
 drivers/block/floppy.c              | 53 +++++++++++++++++++------------------
 drivers/block/hd.c                  | 14 +++++-----
 drivers/block/mg_disk.c             | 25 ++++++++---------
 drivers/block/nbd.c                 | 12 ++++-----
 drivers/block/paride/pcd.c          |  4 +--
 drivers/block/paride/pd.c           |  8 +++---
 drivers/block/paride/pf.c           |  8 +++---
 drivers/block/ps3disk.c             |  9 +++----
 drivers/block/sunvdc.c              |  2 +-
 drivers/block/swim.c                |  6 ++---
 drivers/block/swim3.c               | 34 +++++++++++++-----------
 drivers/block/sx8.c                 |  6 ++---
 drivers/block/ub.c                  |  6 ++---
 drivers/block/viodasd.c             |  2 +-
 drivers/block/virtio_blk.c          |  2 +-
 drivers/block/xd.c                  |  4 +--
 drivers/block/xen-blkfront.c        | 11 ++++----
 drivers/block/xsysace.c             | 17 ++++++------
 drivers/block/z2ram.c               |  6 ++---
 drivers/cdrom/gdrom.c               |  6 ++---
 drivers/cdrom/viocd.c               |  2 +-
 drivers/memstick/core/mspro_block.c |  6 ++---
 drivers/message/i2o/i2o_block.c     | 20 +++++++-------
 drivers/mmc/card/block.c            | 10 +++----
 drivers/mtd/mtd_blkdevs.c           |  7 ++---
 drivers/s390/block/dasd.c           |  2 +-
 drivers/s390/block/dasd_diag.c      |  5 ++--
 drivers/s390/block/dasd_eckd.c      |  6 ++---
 drivers/s390/block/dasd_fba.c       |  7 ++---
 drivers/s390/char/tape_34xx.c       |  2 +-
 drivers/s390/char/tape_3590.c       |  2 +-
 drivers/s390/char/tape_block.c      |  2 +-
 drivers/sbus/char/jsflash.c         |  4 +--
 drivers/scsi/eata.c                 | 24 ++++++++---------
 drivers/scsi/lpfc/lpfc_scsi.c       | 22 +++++++--------
 drivers/scsi/scsi_lib.c             |  6 ++---
 drivers/scsi/sd.c                   | 24 ++++++++---------
 drivers/scsi/sd_dif.c               |  2 +-
 drivers/scsi/sr.c                   | 15 ++++++-----
 drivers/scsi/u14-34f.c              | 22 ++++++++-------
 include/scsi/scsi_cmnd.h            |  2 +-
 54 files changed, 292 insertions(+), 279 deletions(-)

(limited to 'include')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 433012764a3..402ba8f70fc 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1239,7 +1239,7 @@ static void do_ubd_request(struct request_queue *q)
 		}
 
 		req = dev->request;
-		sector = req->sector;
+		sector = blk_rq_pos(req);
 		while(dev->start_sg < dev->end_sg){
 			struct scatterlist *sg = &dev->sg[dev->start_sg];
 
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 45bd07059c2..7a12cf6ee1d 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -306,8 +306,8 @@ as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
 	data_dir = rq_is_sync(rq1);
 
 	last = ad->last_sector[data_dir];
-	s1 = rq1->sector;
-	s2 = rq2->sector;
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
 
 	BUG_ON(data_dir != rq_is_sync(rq2));
 
@@ -566,13 +566,15 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 			as_update_thinktime(ad, aic, thinktime);
 
 			/* Calculate read -> read seek distance */
-			if (aic->last_request_pos < rq->sector)
-				seek_dist = rq->sector - aic->last_request_pos;
+			if (aic->last_request_pos < blk_rq_pos(rq))
+				seek_dist = blk_rq_pos(rq) -
+					    aic->last_request_pos;
 			else
-				seek_dist = aic->last_request_pos - rq->sector;
+				seek_dist = aic->last_request_pos -
+					    blk_rq_pos(rq);
 			as_update_seekdist(ad, aic, seek_dist);
 		}
-		aic->last_request_pos = rq->sector + rq->nr_sectors;
+		aic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 		set_bit(AS_TASK_IOSTARTED, &aic->state);
 		spin_unlock(&aic->lock);
 	}
@@ -587,7 +589,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
 {
 	unsigned long delay;	/* jiffies */
 	sector_t last = ad->last_sector[ad->batch_data_dir];
-	sector_t next = rq->sector;
+	sector_t next = blk_rq_pos(rq);
 	sector_t delta; /* acceptable close offset (in sectors) */
 	sector_t s;
 
@@ -981,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 	 * This has to be set in order to be correctly updated by
 	 * as_find_next_rq
 	 */
-	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
+	ad->last_sector[data_dir] = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (data_dir == BLK_RW_SYNC) {
 		struct io_context *ioc = RQ_IOC(rq);
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index c167de5b9ef..8713c2fbc4f 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -324,7 +324,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 	/*
 	 * The driver must store the error location in ->bi_sector, if
 	 * it supports it. For non-stacked drivers, this should be copied
-	 * from rq->sector.
+	 * from blk_rq_pos(rq).
 	 */
 	if (error_sector)
 		*error_sector = bio->bi_sector;
diff --git a/block/blk-core.c b/block/blk-core.c
index 895e55b74a4..82dc20621c0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -72,7 +72,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		return;
 
 	cpu = part_stat_lock();
-	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
 
 	if (!new_io)
 		part_stat_inc(cpu, part, merges[rw]);
@@ -185,10 +185,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		rq->cmd_flags);
 
-	printk(KERN_INFO "  sector %llu, nr/cnr %lu/%u\n",
-						(unsigned long long)rq->sector,
-						rq->nr_sectors,
-						rq->current_nr_sectors);
+	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
+	       (unsigned long long)blk_rq_pos(rq),
+	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 						rq->bio, rq->biotail,
 						rq->buffer, rq->data_len);
@@ -1557,7 +1556,7 @@ EXPORT_SYMBOL(submit_bio);
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
-	if (rq->nr_sectors > q->max_sectors ||
+	if (blk_rq_sectors(rq) > q->max_sectors ||
 	    rq->data_len > q->max_hw_sectors << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
@@ -1645,7 +1644,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
 		part_stat_unlock();
 	}
@@ -1665,7 +1664,7 @@ static void blk_account_io_done(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
@@ -1846,7 +1845,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
 		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
-				(unsigned long long)req->sector);
+				(unsigned long long)blk_rq_pos(req));
 	}
 
 	blk_account_io_completion(req, nr_bytes);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 23d2a6fe34a..bf62a87a9da 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -259,7 +259,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 	else
 		max_sectors = q->max_sectors;
 
-	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -284,7 +284,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 		max_sectors = q->max_sectors;
 
 
-	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
+	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -315,7 +315,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	/*
 	 * Will it become too large?
 	 */
-	if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
+	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors)
 		return 0;
 
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -345,7 +345,7 @@ static void blk_account_io_merge(struct request *req)
 		int cpu;
 
 		cpu = part_stat_lock();
-		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
 		part_dec_in_flight(part);
@@ -366,7 +366,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	/*
 	 * not contiguous
 	 */
-	if (req->sector + req->nr_sectors != next->sector)
+	if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
 		return 0;
 
 	if (rq_data_dir(req) != rq_data_dir(next)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 575083a9ffe..db4d990a66b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -349,8 +349,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
 	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
 		return rq2;
 
-	s1 = rq1->sector;
-	s2 = rq2->sector;
+	s1 = blk_rq_pos(rq1);
+	s2 = blk_rq_pos(rq2);
 
 	last = cfqd->last_position;
 
@@ -949,10 +949,10 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
 static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
 					  struct request *rq)
 {
-	if (rq->sector >= cfqd->last_position)
-		return rq->sector - cfqd->last_position;
+	if (blk_rq_pos(rq) >= cfqd->last_position)
+		return blk_rq_pos(rq) - cfqd->last_position;
 	else
-		return cfqd->last_position - rq->sector;
+		return cfqd->last_position - blk_rq_pos(rq);
 }
 
 #define CIC_SEEK_THR	8 * 1024
@@ -1918,10 +1918,10 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
 
 	if (!cic->last_request_pos)
 		sdist = 0;
-	else if (cic->last_request_pos < rq->sector)
-		sdist = rq->sector - cic->last_request_pos;
+	else if (cic->last_request_pos < blk_rq_pos(rq))
+		sdist = blk_rq_pos(rq) - cic->last_request_pos;
 	else
-		sdist = cic->last_request_pos - rq->sector;
+		sdist = cic->last_request_pos - blk_rq_pos(rq);
 
 	/*
 	 * Don't allow the seek distance to get too large from the
@@ -2071,7 +2071,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	cfq_update_io_seektime(cfqd, cic, rq);
 	cfq_update_idle_window(cfqd, cfqq, cic);
 
-	cic->last_request_pos = rq->sector + rq->nr_sectors;
+	cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
 
 	if (cfqq == cfqd->active_queue) {
 		/*
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c4d991d4ade..b547cbca7b2 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -138,7 +138,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
 
 		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
 		if (__rq) {
-			BUG_ON(sector != __rq->sector);
+			BUG_ON(sector != blk_rq_pos(__rq));
 
 			if (elv_rq_merge_ok(__rq, bio)) {
 				ret = ELEVATOR_FRONT_MERGE;
diff --git a/block/elevator.c b/block/elevator.c
index 1af5d9f04af..918920056e4 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -52,7 +52,7 @@ static const int elv_hash_shift = 6;
 #define ELV_HASH_FN(sec)	\
 		(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
 #define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
+#define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 DEFINE_TRACE(block_rq_insert);
 DEFINE_TRACE(block_rq_issue);
@@ -119,9 +119,9 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio)
 	 * we can merge and sequence is ok, check if it's possible
 	 */
 	if (elv_rq_merge_ok(__rq, bio)) {
-		if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
+		if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
 			ret = ELEVATOR_BACK_MERGE;
-		else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
+		else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
 			ret = ELEVATOR_FRONT_MERGE;
 	}
 
@@ -370,9 +370,9 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
 		parent = *p;
 		__rq = rb_entry(parent, struct request, rb_node);
 
-		if (rq->sector < __rq->sector)
+		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
 			p = &(*p)->rb_left;
-		else if (rq->sector > __rq->sector)
+		else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
 			p = &(*p)->rb_right;
 		else
 			return __rq;
@@ -400,9 +400,9 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
 	while (n) {
 		rq = rb_entry(n, struct request, rb_node);
 
-		if (sector < rq->sector)
+		if (sector < blk_rq_pos(rq))
 			n = n->rb_left;
-		else if (sector > rq->sector)
+		else if (sector > blk_rq_pos(rq))
 			n = n->rb_right;
 		else
 			return rq;
@@ -441,14 +441,14 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
 			break;
 		if (pos->cmd_flags & stop_flags)
 			break;
-		if (rq->sector >= boundary) {
-			if (pos->sector < boundary)
+		if (blk_rq_pos(rq) >= boundary) {
+			if (blk_rq_pos(pos) < boundary)
 				continue;
 		} else {
-			if (pos->sector >= boundary)
+			if (blk_rq_pos(pos) >= boundary)
 				break;
 		}
-		if (rq->sector >= pos->sector)
+		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
 			break;
 	}
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index f22ed6cc69f..774ab05973a 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3338,8 +3338,8 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 	}
 	Command->Completion = Request->end_io_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
-	Command->BlockNumber = Request->sector;
-	Command->BlockCount = Request->nr_sectors;
+	Command->BlockNumber = blk_rq_pos(Request);
+	Command->BlockCount = blk_rq_sectors(Request);
 	Command->Request = Request;
 	blkdev_dequeue_request(Request);
 	Command->SegmentCount = blk_rq_map_sg(req_q,
@@ -3431,7 +3431,7 @@ static void DAC960_queue_partial_rw(DAC960_Command_T *Command)
    * successfully as possible.
    */
   Command->SegmentCount = 1;
-  Command->BlockNumber = Request->sector;
+  Command->BlockNumber = blk_rq_pos(Request);
   Command->BlockCount = 1;
   DAC960_QueueReadWriteCommand(Command);
   return;
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8ff95f2c0ed..e4a14b94828 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1351,13 +1351,13 @@ static void redo_fd_request(void)
 	drive = floppy - unit;
 
 	/* Here someone could investigate to be more efficient */
-	for (cnt = 0; cnt < CURRENT->current_nr_sectors; cnt++) { 
+	for (cnt = 0; cnt < blk_rq_cur_sectors(CURRENT); cnt++) {
 #ifdef DEBUG
 		printk("fd: sector %ld + %d requested for %s\n",
-		       CURRENT->sector,cnt,
+		       blk_rq_pos(CURRENT), cnt,
 		       (rq_data_dir(CURRENT) == READ) ? "read" : "write");
 #endif
-		block = CURRENT->sector + cnt;
+		block = blk_rq_pos(CURRENT) + cnt;
 		if ((int)block > floppy->blocks) {
 			__blk_end_request_cur(CURRENT, -EIO);
 			goto repeat;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 25067287211..234024cda5e 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -725,7 +725,7 @@ static void do_fd_action( int drive )
 	    if (IS_BUFFERED( drive, ReqSide, ReqTrack )) {
 		if (ReqCmd == READ) {
 		    copy_buffer( SECTOR_BUFFER(ReqSector), ReqData );
-		    if (++ReqCnt < CURRENT->current_nr_sectors) {
+		    if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) {
 			/* read next sector */
 			setup_req_params( drive );
 			goto repeat;
@@ -1130,7 +1130,7 @@ static void fd_rwsec_done1(int status)
 		}
 	}
   
-	if (++ReqCnt < CURRENT->current_nr_sectors) {
+	if (++ReqCnt < blk_rq_cur_sectors(CURRENT)) {
 		/* read next sector */
 		setup_req_params( SelectedDrive );
 		do_fd_action( SelectedDrive );
@@ -1394,7 +1394,7 @@ static void redo_fd_request(void)
 
 	DPRINT(("redo_fd_request: CURRENT=%p dev=%s CURRENT->sector=%ld\n",
 		CURRENT, CURRENT ? CURRENT->rq_disk->disk_name : "",
-		CURRENT ? CURRENT->sector : 0 ));
+		CURRENT ? blk_rq_pos(CURRENT) : 0 ));
 
 	IsFormatting = 0;
 
@@ -1440,7 +1440,7 @@ repeat:
 		UD.autoprobe = 0;
 	}
 	
-	if (CURRENT->sector + 1 > UDT->blocks) {
+	if (blk_rq_pos(CURRENT) + 1 > UDT->blocks) {
 		__blk_end_request_cur(CURRENT, -EIO);
 		goto repeat;
 	}
@@ -1450,7 +1450,7 @@ repeat:
 		
 	ReqCnt = 0;
 	ReqCmd = rq_data_dir(CURRENT);
-	ReqBlock = CURRENT->sector;
+	ReqBlock = blk_rq_pos(CURRENT);
 	ReqBuffer = CURRENT->buffer;
 	setup_req_params( drive );
 	do_fd_action( drive );
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index f22d4932433..ab7b04c0db7 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2835,10 +2835,10 @@ static void do_cciss_request(struct request_queue *q)
 	c->Request.Timeout = 0;	// Don't time out
 	c->Request.CDB[0] =
 	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
-	start_blk = creq->sector;
+	start_blk = blk_rq_pos(creq);
 #ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n", (int)creq->sector,
-	       (int)creq->nr_sectors);
+	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
+	       (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
 #endif				/* CCISS_DEBUG */
 
 	sg_init_table(tmp_sg, MAXSGENTRIES);
@@ -2864,8 +2864,8 @@ static void do_cciss_request(struct request_queue *q)
 		h->maxSG = seg;
 
 #ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss: Submitting %lu sectors in %d segments\n",
-	       creq->nr_sectors, seg);
+	printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n",
+	       blk_rq_sectors(creq), seg);
 #endif				/* CCISS_DEBUG */
 
 	c->Header.SGList = c->Header.SGTotal = seg;
@@ -2877,8 +2877,8 @@ static void do_cciss_request(struct request_queue *q)
 			c->Request.CDB[4] = (start_blk >> 8) & 0xff;
 			c->Request.CDB[5] = start_blk & 0xff;
 			c->Request.CDB[6] = 0;	// (sect >> 24) & 0xff; MSB
-			c->Request.CDB[7] = (creq->nr_sectors >> 8) & 0xff;
-			c->Request.CDB[8] = creq->nr_sectors & 0xff;
+			c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
+			c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
 		} else {
 			u32 upper32 = upper_32_bits(start_blk);
@@ -2893,10 +2893,10 @@ static void do_cciss_request(struct request_queue *q)
 			c->Request.CDB[7]= (start_blk >> 16) & 0xff;
 			c->Request.CDB[8]= (start_blk >>  8) & 0xff;
 			c->Request.CDB[9]= start_blk & 0xff;
-			c->Request.CDB[10]= (creq->nr_sectors >>  24) & 0xff;
-			c->Request.CDB[11]= (creq->nr_sectors >>  16) & 0xff;
-			c->Request.CDB[12]= (creq->nr_sectors >>  8) & 0xff;
-			c->Request.CDB[13]= creq->nr_sectors & 0xff;
+			c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
+			c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
+			c->Request.CDB[12]= (blk_rq_sectors(creq) >>  8) & 0xff;
+			c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[14] = c->Request.CDB[15] = 0;
 		}
 	} else if (blk_pc_request(creq)) {
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 488a8f4a60a..a5caeff4718 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -919,10 +919,11 @@ queue_next:
 	c->hdr.size = sizeof(rblk_t) >> 2;
 	c->size += sizeof(rblk_t);
 
-	c->req.hdr.blk = creq->sector;
+	c->req.hdr.blk = blk_rq_pos(creq);
 	c->rq = creq;
 DBGPX(
-	printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors);
+	printk("sector=%d, nr_sectors=%u\n",
+	       blk_rq_pos(creq), blk_rq_sectors(creq));
 );
 	sg_init_table(tmp_sg, SG_MAX);
 	seg = blk_rq_map_sg(q, creq, tmp_sg);
@@ -940,9 +941,9 @@ DBGPX(
 						 tmp_sg[i].offset,
 						 tmp_sg[i].length, dir);
 	}
-DBGPX(	printk("Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); );
+DBGPX(	printk("Submitting %u sectors in %d segments\n", blk_rq_sectors(creq), seg); );
 	c->req.hdr.sg_cnt = seg;
-	c->req.hdr.blk_cnt = creq->nr_sectors;
+	c->req.hdr.blk_cnt = blk_rq_sectors(creq);
 	c->req.hdr.cmd = (rq_data_dir(creq) == READ) ? IDA_READ : IDA_WRITE;
 	c->type = CMD_RWREQ;
 
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 1300df6f164..45248628338 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2303,7 +2303,7 @@ static void floppy_end_request(struct request *req, int error)
 
 	/* current_count_sectors can be zero if transfer failed */
 	if (error)
-		nr_sectors = req->current_nr_sectors;
+		nr_sectors = blk_rq_cur_sectors(req);
 	if (__blk_end_request(req, error, nr_sectors << 9))
 		return;
 
@@ -2332,7 +2332,7 @@ static void request_done(int uptodate)
 	if (uptodate) {
 		/* maintain values for invalidation on geometry
 		 * change */
-		block = current_count_sectors + req->sector;
+		block = current_count_sectors + blk_rq_pos(req);
 		INFBOUND(DRS->maxblock, block);
 		if (block > _floppy->sect)
 			DRS->maxtrack = 1;
@@ -2346,10 +2346,10 @@ static void request_done(int uptodate)
 			/* record write error information */
 			DRWE->write_errors++;
 			if (DRWE->write_errors == 1) {
-				DRWE->first_error_sector = req->sector;
+				DRWE->first_error_sector = blk_rq_pos(req);
 				DRWE->first_error_generation = DRS->generation;
 			}
-			DRWE->last_error_sector = req->sector;
+			DRWE->last_error_sector = blk_rq_pos(req);
 			DRWE->last_error_generation = DRS->generation;
 		}
 		spin_lock_irqsave(q->queue_lock, flags);
@@ -2503,24 +2503,24 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 
 	max_sector = transfer_size(ssize,
 				   min(max_sector, max_sector_2),
-				   current_req->nr_sectors);
+				   blk_rq_sectors(current_req));
 
 	if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
-	    buffer_max > fsector_t + current_req->nr_sectors)
+	    buffer_max > fsector_t + blk_rq_sectors(current_req))
 		current_count_sectors = min_t(int, buffer_max - fsector_t,
-					      current_req->nr_sectors);
+					      blk_rq_sectors(current_req));
 
 	remaining = current_count_sectors << 9;
 #ifdef FLOPPY_SANITY_CHECK
-	if ((remaining >> 9) > current_req->nr_sectors &&
+	if ((remaining >> 9) > blk_rq_sectors(current_req) &&
 	    CT(COMMAND) == FD_WRITE) {
 		DPRINT("in copy buffer\n");
 		printk("current_count_sectors=%ld\n", current_count_sectors);
 		printk("remaining=%d\n", remaining >> 9);
-		printk("current_req->nr_sectors=%ld\n",
-		       current_req->nr_sectors);
+		printk("current_req->nr_sectors=%u\n",
+		       blk_rq_sectors(current_req));
 		printk("current_req->current_nr_sectors=%u\n",
-		       current_req->current_nr_sectors);
+		       blk_rq_cur_sectors(current_req));
 		printk("max_sector=%d\n", max_sector);
 		printk("ssize=%d\n", ssize);
 	}
@@ -2530,7 +2530,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 
 	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
 
-	size = current_req->current_nr_sectors << 9;
+	size = blk_rq_cur_sectors(current_req) << 9;
 
 	rq_for_each_segment(bv, current_req, iter) {
 		if (!remaining)
@@ -2648,10 +2648,10 @@ static int make_raw_rw_request(void)
 
 	max_sector = _floppy->sect * _floppy->head;
 
-	TRACK = (int)current_req->sector / max_sector;
-	fsector_t = (int)current_req->sector % max_sector;
+	TRACK = (int)blk_rq_pos(current_req) / max_sector;
+	fsector_t = (int)blk_rq_pos(current_req) % max_sector;
 	if (_floppy->track && TRACK >= _floppy->track) {
-		if (current_req->current_nr_sectors & 1) {
+		if (blk_rq_cur_sectors(current_req) & 1) {
 			current_count_sectors = 1;
 			return 1;
 		} else
@@ -2669,7 +2669,7 @@ static int make_raw_rw_request(void)
 		if (fsector_t >= max_sector) {
 			current_count_sectors =
 			    min_t(int, _floppy->sect - fsector_t,
-				  current_req->nr_sectors);
+				  blk_rq_sectors(current_req));
 			return 1;
 		}
 		SIZECODE = 2;
@@ -2720,7 +2720,7 @@ static int make_raw_rw_request(void)
 
 	in_sector_offset = (fsector_t % _floppy->sect) % ssize;
 	aligned_sector_t = fsector_t - in_sector_offset;
-	max_size = current_req->nr_sectors;
+	max_size = blk_rq_sectors(current_req);
 	if ((raw_cmd->track == buffer_track) &&
 	    (current_drive == buffer_drive) &&
 	    (fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
@@ -2729,10 +2729,10 @@ static int make_raw_rw_request(void)
 			copy_buffer(1, max_sector, buffer_max);
 			return 1;
 		}
-	} else if (in_sector_offset || current_req->nr_sectors < ssize) {
+	} else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) {
 		if (CT(COMMAND) == FD_WRITE) {
-			if (fsector_t + current_req->nr_sectors > ssize &&
-			    fsector_t + current_req->nr_sectors < ssize + ssize)
+			if (fsector_t + blk_rq_sectors(current_req) > ssize &&
+			    fsector_t + blk_rq_sectors(current_req) < ssize + ssize)
 				max_size = ssize + ssize;
 			else
 				max_size = ssize;
@@ -2776,7 +2776,7 @@ static int make_raw_rw_request(void)
 		    (indirect * 2 > direct * 3 &&
 		     *errors < DP->max_errors.read_track && ((!probing
 		       || (DP->read_track & (1 << DRS->probed_format)))))) {
-			max_size = current_req->nr_sectors;
+			max_size = blk_rq_sectors(current_req);
 		} else {
 			raw_cmd->kernel_data = current_req->buffer;
 			raw_cmd->length = current_count_sectors << 9;
@@ -2801,7 +2801,7 @@ static int make_raw_rw_request(void)
 	    fsector_t > buffer_max ||
 	    fsector_t < buffer_min ||
 	    ((CT(COMMAND) == FD_READ ||
-	      (!in_sector_offset && current_req->nr_sectors >= ssize)) &&
+	      (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) &&
 	     max_sector > 2 * max_buffer_sectors + buffer_min &&
 	     max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)
 	    /* not enough space */
@@ -2879,8 +2879,8 @@ static int make_raw_rw_request(void)
 				printk("write\n");
 			return 0;
 		}
-	} else if (raw_cmd->length > current_req->nr_sectors << 9 ||
-		   current_count_sectors > current_req->nr_sectors) {
+	} else if (raw_cmd->length > blk_rq_sectors(current_req) << 9 ||
+		   current_count_sectors > blk_rq_sectors(current_req)) {
 		DPRINT("buffer overrun in direct transfer\n");
 		return 0;
 	} else if (raw_cmd->length < current_count_sectors << 9) {
@@ -2990,8 +2990,9 @@ static void do_fd_request(struct request_queue * q)
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
-		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
-		       current_req->cmd_type, current_req->cmd_flags);
+		printk("sect=%ld type=%x flags=%x\n",
+		       (long)blk_rq_pos(current_req), current_req->cmd_type,
+		       current_req->cmd_flags);
 		return;
 	}
 	if (test_bit(0, &fdc_busy)) {
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 75b9ca95c4e..a3b39940ce0 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -228,7 +228,7 @@ static void dump_status(const char *msg, unsigned int stat)
 			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
 				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
 			if (CURRENT)
-				printk(", sector=%ld", CURRENT->sector);
+				printk(", sector=%ld", blk_rq_pos(CURRENT));
 		}
 		printk("\n");
 	}
@@ -457,9 +457,9 @@ ok_to_read:
 	req = CURRENT;
 	insw(HD_DATA, req->buffer, 256);
 #ifdef DEBUG
-	printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n",
-		req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1,
-		req->buffer+512);
+	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
+	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
+	       blk_rq_sectors(req) - 1, req->buffer+512);
 #endif
 	if (__blk_end_request(req, 0, 512)) {
 		SET_HANDLER(&read_intr);
@@ -485,7 +485,7 @@ static void write_intr(void)
 			continue;
 		if (!OK_STATUS(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & DRQ_STAT))
+		if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
 			goto ok_to_write;
 	} while (--retries > 0);
 	dump_status("write_intr", i);
@@ -589,8 +589,8 @@ repeat:
 		return;
 	}
 	disk = req->rq_disk->private_data;
-	block = req->sector;
-	nsect = req->nr_sectors;
+	block = blk_rq_pos(req);
+	nsect = blk_rq_sectors(req);
 	if (block >= get_capacity(req->rq_disk) ||
 	    ((block+nsect) > get_capacity(req->rq_disk))) {
 		printk("%s: bad access: block=%d, count=%d\n",
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 71e56cc28ca..826c3492b9f 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -220,7 +220,8 @@ static void mg_dump_status(const char *msg, unsigned int stat,
 			if (host->breq) {
 				req = elv_next_request(host->breq);
 				if (req)
-					printk(", sector=%u", (u32)req->sector);
+					printk(", sector=%u",
+					       (unsigned int)blk_rq_pos(req));
 			}
 
 		}
@@ -493,12 +494,12 @@ static void mg_read(struct request *req)
 	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) !=
-			MG_ERR_NONE)
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_RD, NULL) != MG_ERR_NONE)
 		mg_bad_rw_intr(host);
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       req->nr_sectors, req->sector, req->buffer);
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
 
 	do {
 		u16 *buff = (u16 *)req->buffer;
@@ -522,14 +523,14 @@ static void mg_write(struct request *req)
 	u32 j;
 	struct mg_host *host = req->rq_disk->private_data;
 
-	if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) !=
-			MG_ERR_NONE) {
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_WR, NULL) != MG_ERR_NONE) {
 		mg_bad_rw_intr(host);
 		return;
 	}
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       req->nr_sectors, req->sector, req->buffer);
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
 
 	do {
 		u16 *buff = (u16 *)req->buffer;
@@ -579,7 +580,7 @@ ok_to_read:
 			      (i << 1));
 
 	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-			req->sector, req->nr_sectors - 1, req->buffer);
+	       blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer);
 
 	/* send read confirm */
 	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
@@ -609,7 +610,7 @@ static void mg_write_intr(struct mg_host *host)
 			break;
 		if (!MG_READY_OK(i))
 			break;
-		if ((req->nr_sectors <= 1) || (i & ATA_DRQ))
+		if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ))
 			goto ok_to_write;
 	} while (0);
 	mg_dump_status("mg_write_intr", i, host);
@@ -627,7 +628,7 @@ ok_to_write:
 			buff++;
 		}
 		MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-				req->sector, req->nr_sectors, req->buffer);
+		       blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
 		host->mg_do_intr = mg_write_intr;
 		mod_timer(&host->timer, jiffies + 3 * HZ);
 	}
@@ -749,9 +750,9 @@ static void mg_request(struct request_queue *q)
 
 		del_timer(&host->timer);
 
-		sect_num = req->sector;
+		sect_num = blk_rq_pos(req);
 		/* deal whole segments */
-		sect_cnt = req->nr_sectors;
+		sect_cnt = blk_rq_sectors(req);
 
 		/* sanity check */
 		if (sect_num >= get_capacity(req->rq_disk) ||
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a9ab8be9d92..977a5737793 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -110,7 +110,7 @@ static void nbd_end_request(struct request *req)
 			req, error ? "failed" : "done");
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__blk_end_request(req, error, req->nr_sectors << 9);
+	__blk_end_request(req, error, blk_rq_sectors(req) << 9);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -231,19 +231,19 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 {
 	int result, flags;
 	struct nbd_request request;
-	unsigned long size = req->nr_sectors << 9;
+	unsigned long size = blk_rq_sectors(req) << 9;
 
 	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(nbd_cmd(req));
-	request.from = cpu_to_be64((u64) req->sector << 9);
+	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 	request.len = htonl(size);
 	memcpy(request.handle, &req, sizeof(req));
 
-	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
+	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
 			lo->disk->disk_name, req,
 			nbdcmd_to_ascii(nbd_cmd(req)),
-			(unsigned long long)req->sector << 9,
-			req->nr_sectors << 9);
+			(unsigned long long)blk_rq_pos(req) << 9,
+			blk_rq_sectors(req) << 9);
 	result = sock_xmit(lo, 1, &request, sizeof(request),
 			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 9fd57c2aa46..2d5dc0af55e 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -728,8 +728,8 @@ static void do_pcd_request(struct request_queue * q)
 			if (cd != pcd_current)
 				pcd_bufblk = -1;
 			pcd_current = cd;
-			pcd_sector = pcd_req->sector;
-			pcd_count = pcd_req->current_nr_sectors;
+			pcd_sector = blk_rq_pos(pcd_req);
+			pcd_count = blk_rq_cur_sectors(pcd_req);
 			pcd_buf = pcd_req->buffer;
 			pcd_busy = 1;
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 0732df4e901..9ec5d4ac0b6 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -444,11 +444,11 @@ static enum action do_pd_io_start(void)
 
 	pd_cmd = rq_data_dir(pd_req);
 	if (pd_cmd == READ || pd_cmd == WRITE) {
-		pd_block = pd_req->sector;
-		pd_count = pd_req->current_nr_sectors;
+		pd_block = blk_rq_pos(pd_req);
+		pd_count = blk_rq_cur_sectors(pd_req);
 		if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
 			return Fail;
-		pd_run = pd_req->nr_sectors;
+		pd_run = blk_rq_sectors(pd_req);
 		pd_buf = pd_req->buffer;
 		pd_retries = 0;
 		if (pd_cmd == READ)
@@ -479,7 +479,7 @@ static int pd_next_buf(void)
 		return 0;
 	spin_lock_irqsave(&pd_lock, saved_flags);
 	__blk_end_request_cur(pd_req, 0);
-	pd_count = pd_req->current_nr_sectors;
+	pd_count = blk_rq_cur_sectors(pd_req);
 	pd_buf = pd_req->buffer;
 	spin_unlock_irqrestore(&pd_lock, saved_flags);
 	return 0;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 3871e3586d6..e88c889aa7f 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -768,9 +768,9 @@ repeat:
 		return;
 
 	pf_current = pf_req->rq_disk->private_data;
-	pf_block = pf_req->sector;
-	pf_run = pf_req->nr_sectors;
-	pf_count = pf_req->current_nr_sectors;
+	pf_block = blk_rq_pos(pf_req);
+	pf_run = blk_rq_sectors(pf_req);
+	pf_count = blk_rq_cur_sectors(pf_req);
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
 		pf_end_request(-EIO);
@@ -810,7 +810,7 @@ static int pf_next_buf(void)
 		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 		if (!pf_req)
 			return 1;
-		pf_count = pf_req->current_nr_sectors;
+		pf_count = blk_rq_cur_sectors(pf_req);
 		pf_buf = pf_req->buffer;
 	}
 	return 0;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index c2388673684..8d583081b50 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -134,13 +134,12 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
 	rq_for_each_segment(bv, req, iter)
 		n++;
 	dev_dbg(&dev->sbd.core,
-		"%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n",
-		__func__, __LINE__, op, n, req->nr_sectors,
-		blk_rq_sectors(req));
+		"%s:%u: %s req has %u bvecs for %u sectors\n",
+		__func__, __LINE__, op, n, blk_rq_sectors(req));
 #endif
 
-	start_sector = req->sector * priv->blocking_factor;
-	sectors = req->nr_sectors * priv->blocking_factor;
+	start_sector = blk_rq_pos(req) * priv->blocking_factor;
+	sectors = blk_rq_sectors(req) * priv->blocking_factor;
 	dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n",
 		__func__, __LINE__, op, sectors, start_sector);
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index f59887c5ffb..9f351bfa15e 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -416,7 +416,7 @@ static int __send_request(struct request *req)
 		desc->slice = 0;
 	}
 	desc->status = ~0;
-	desc->offset = (req->sector << 9) / port->vdisk_block_size;
+	desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
 	desc->size = len;
 	desc->ncookies = err;
 
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 97ef4266c4c..fc6a1c32293 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -531,7 +531,7 @@ static void redo_fd_request(struct request_queue *q)
 	while ((req = elv_next_request(q))) {
 
 		fs = req->rq_disk->private_data;
-		if (req->sector < 0 || req->sector >= fs->total_secs) {
+		if (blk_rq_pos(req) >= fs->total_secs) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
@@ -551,8 +551,8 @@ static void redo_fd_request(struct request_queue *q)
 			__blk_end_request_cur(req, -EIO);
 			break;
 		case READ:
-			if (floppy_read_sectors(fs, req->sector,
-						req->current_nr_sectors,
+			if (floppy_read_sectors(fs, blk_rq_pos(req),
+						blk_rq_cur_sectors(req),
 						req->buffer)) {
 				__blk_end_request_cur(req, -EIO);
 				continue;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 424855945b9..c1b9a4dc11b 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -312,14 +312,14 @@ static void start_request(struct floppy_state *fs)
 	}
 	while (fs->state == idle && (req = elv_next_request(swim3_queue))) {
 #if 0
-		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
+		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
-		       (long)req->sector, req->nr_sectors, req->buffer);
-		printk("           errors=%d current_nr_sectors=%ld\n",
-		       req->errors, req->current_nr_sectors);
+		       (long)blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
+		printk("           errors=%d current_nr_sectors=%u\n",
+		       req->errors, blk_rq_cur_sectors(req));
 #endif
 
-		if (req->sector >= fs->total_secs) {
+		if (blk_rq_pos(req) >= fs->total_secs) {
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
@@ -337,13 +337,14 @@ static void start_request(struct floppy_state *fs)
 			}
 		}
 
-		/* Do not remove the cast. req->sector is now a sector_t and
-		 * can be 64 bits, but it will never go past 32 bits for this
-		 * driver anyway, so we can safely cast it down and not have
-		 * to do a 64/32 division
+		/* Do not remove the cast. blk_rq_pos(req) is now a
+		 * sector_t and can be 64 bits, but it will never go
+		 * past 32 bits for this driver anyway, so we can
+		 * safely cast it down and not have to do a 64/32
+		 * division
 		 */
-		fs->req_cyl = ((long)req->sector) / fs->secpercyl;
-		x = ((long)req->sector) % fs->secpercyl;
+		fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl;
+		x = ((long)blk_rq_pos(req)) % fs->secpercyl;
 		fs->head = x / fs->secpertrack;
 		fs->req_sector = x % fs->secpertrack + 1;
 		fd_req = req;
@@ -420,7 +421,7 @@ static inline void setup_transfer(struct floppy_state *fs)
 	struct dbdma_cmd *cp = fs->dma_cmd;
 	struct dbdma_regs __iomem *dr = fs->dma;
 
-	if (fd_req->current_nr_sectors <= 0) {
+	if (blk_rq_cur_sectors(fd_req) <= 0) {
 		printk(KERN_ERR "swim3: transfer 0 sectors?\n");
 		return;
 	}
@@ -428,8 +429,8 @@ static inline void setup_transfer(struct floppy_state *fs)
 		n = 1;
 	else {
 		n = fs->secpertrack - fs->req_sector + 1;
-		if (n > fd_req->current_nr_sectors)
-			n = fd_req->current_nr_sectors;
+		if (n > blk_rq_cur_sectors(fd_req))
+			n = blk_rq_cur_sectors(fd_req);
 	}
 	fs->scount = n;
 	swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0);
@@ -600,7 +601,8 @@ static void xfer_timeout(unsigned long data)
 	out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
 	out_8(&sw->select, RELAX);
 	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
-	       (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector);
+	       (rq_data_dir(fd_req)==WRITE? "writ": "read"),
+	       (long)blk_rq_pos(fd_req));
 	__blk_end_request_cur(fd_req, -EIO);
 	fs->state = idle;
 	start_request(fs);
@@ -714,7 +716,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
 			} else {
 				printk("swim3: error %sing block %ld (err=%x)\n",
 				       rq_data_dir(fd_req) == WRITE? "writ": "read",
-				       (long)fd_req->sector, err);
+				       (long)blk_rq_pos(fd_req), err);
 				__blk_end_request_cur(fd_req, -EIO);
 				fs->state = idle;
 			}
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 60e85bb6f79..087c94c8b2d 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -903,10 +903,10 @@ queue_one_request:
 	msg->sg_count	= n_elem;
 	msg->sg_type	= SGT_32BIT;
 	msg->handle	= cpu_to_le32(TAG_ENCODE(crq->tag));
-	msg->lba	= cpu_to_le32(rq->sector & 0xffffffff);
-	tmp		= (rq->sector >> 16) >> 16;
+	msg->lba	= cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
+	tmp		= (blk_rq_pos(rq) >> 16) >> 16;
 	msg->lba_high	= cpu_to_le16( (u16) tmp );
-	msg->lba_count	= cpu_to_le16(rq->nr_sectors);
+	msg->lba_count	= cpu_to_le16(blk_rq_sectors(rq));
 
 	msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
 	for (i = 0; i < n_elem; i++) {
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 8c2cc71327e..dc3b899ad2b 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -726,8 +726,8 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	 * The call to blk_queue_hardsect_size() guarantees that request
 	 * is aligned, but it is given in terms of 512 byte units, always.
 	 */
-	block = rq->sector >> lun->capacity.bshift;
-	nblks = rq->nr_sectors >> lun->capacity.bshift;
+	block = blk_rq_pos(rq) >> lun->capacity.bshift;
+	nblks = blk_rq_sectors(rq) >> lun->capacity.bshift;
 
 	cmd->cdb[0] = (cmd->dir == UB_DIR_READ)? READ_10: WRITE_10;
 	/* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */
@@ -739,7 +739,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	cmd->cdb[8] = nblks;
 	cmd->cdb_len = 10;
 
-	cmd->len = rq->nr_sectors * 512;
+	cmd->len = blk_rq_sectors(rq) * 512;
 }
 
 static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index e821eed7132..2086cb12d3e 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -252,7 +252,7 @@ static int send_request(struct request *req)
 	struct viodasd_device *d;
 	unsigned long flags;
 
-	start = (u64)req->sector << 9;
+	start = (u64)blk_rq_pos(req) << 9;
 
 	if (rq_data_dir(req) == READ) {
 		direction = DMA_FROM_DEVICE;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 50745e64414..1980ab45635 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -85,7 +85,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 	vbr->req = req;
 	if (blk_fs_request(vbr->req)) {
 		vbr->out_hdr.type = 0;
-		vbr->out_hdr.sector = vbr->req->sector;
+		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else if (blk_pc_request(vbr->req)) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 14be4c1ed1a..4ef88018bcd 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -306,8 +306,8 @@ static void do_xd_request (struct request_queue * q)
 		return;
 
 	while ((req = elv_next_request(q)) != NULL) {
-		unsigned block = req->sector;
-		unsigned count = req->nr_sectors;
+		unsigned block = blk_rq_pos(req);
+		unsigned count = blk_rq_sectors(req);
 		XD_INFO *disk = req->rq_disk->private_data;
 		int res = 0;
 		int retry;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index b4564479f64..91fc56597e9 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -231,7 +231,7 @@ static int blkif_queue_request(struct request *req)
 	info->shadow[id].request = (unsigned long)req;
 
 	ring_req->id = id;
-	ring_req->sector_number = (blkif_sector_t)req->sector;
+	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
 	ring_req->handle = info->handle;
 
 	ring_req->operation = rq_data_dir(req) ?
@@ -310,11 +310,10 @@ static void do_blkif_request(struct request_queue *rq)
 			goto wait;
 
 		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
-			 "(%u/%li) buffer:%p [%s]\n",
-			 req, req->cmd, (unsigned long)req->sector,
-			 req->current_nr_sectors,
-			 req->nr_sectors, req->buffer,
-			 rq_data_dir(req) ? "write" : "read");
+			 "(%u/%u) buffer:%p [%s]\n",
+			 req, req->cmd, (unsigned long)blk_rq_pos(req),
+			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
+			 req->buffer, rq_data_dir(req) ? "write" : "read");
 
 
 		blkdev_dequeue_request(req);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 5722931d14c..97c99b43f88 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -646,13 +646,14 @@ static void ace_fsm_dostate(struct ace_device *ace)
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
 			"request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
-			(unsigned long long) req->sector, blk_rq_sectors(req),
-			req->current_nr_sectors, rq_data_dir(req));
+			(unsigned long long)blk_rq_pos(req),
+			blk_rq_sectors(req), blk_rq_cur_sectors(req),
+			rq_data_dir(req));
 
 		ace->req = req;
 		ace->data_ptr = req->buffer;
-		ace->data_count = req->current_nr_sectors * ACE_BUF_PER_SECTOR;
-		ace_out32(ace, ACE_MPULBA, req->sector & 0x0FFFFFFF);
+		ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR;
+		ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF);
 
 		count = blk_rq_sectors(req);
 		if (rq_data_dir(req)) {
@@ -688,7 +689,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			dev_dbg(ace->dev,
 				"CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n",
 				ace->fsm_task, ace->fsm_iter_num,
-				ace->req->current_nr_sectors * 16,
+				blk_rq_cur_sectors(ace->req) * 16,
 				ace->data_count, ace->in_irq);
 			ace_fsm_yield(ace);	/* need to poll CFBSY bit */
 			break;
@@ -697,7 +698,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			dev_dbg(ace->dev,
 				"DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n",
 				ace->fsm_task, ace->fsm_iter_num,
-				ace->req->current_nr_sectors * 16,
+				blk_rq_cur_sectors(ace->req) * 16,
 				ace->data_count, ace->in_irq);
 			ace_fsm_yieldirq(ace);
 			break;
@@ -721,10 +722,10 @@ static void ace_fsm_dostate(struct ace_device *ace)
 					blk_rq_cur_bytes(ace->req))) {
 			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
 			 *      blk_rq_sectors(ace->req),
-			 *      ace->req->current_nr_sectors);
+			 *      blk_rq_cur_sectors(ace->req));
 			 */
 			ace->data_ptr = ace->req->buffer;
-			ace->data_count = ace->req->current_nr_sectors * 16;
+			ace->data_count = blk_rq_cur_sectors(ace->req) * 16;
 			ace_fsm_yieldirq(ace);
 			break;
 		}
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index b66ad58a3c3..d4e6b71f514 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -71,12 +71,12 @@ static void do_z2_request(struct request_queue *q)
 {
 	struct request *req;
 	while ((req = elv_next_request(q)) != NULL) {
-		unsigned long start = req->sector << 9;
-		unsigned long len  = req->current_nr_sectors << 9;
+		unsigned long start = blk_rq_pos(req) << 9;
+		unsigned long len  = blk_rq_cur_sectors(req) << 9;
 
 		if (start + len > z2ram_size) {
 			printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n",
-				req->sector, req->current_nr_sectors);
+				blk_rq_pos(req), blk_rq_cur_sectors(req));
 			__blk_end_request_cur(req, -EIO);
 			continue;
 		}
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index cab2b1fb2fe..488423cab51 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -584,8 +584,8 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 	list_for_each_safe(elem, next, &gdrom_deferred) {
 		req = list_entry(elem, struct request, queuelist);
 		spin_unlock(&gdrom_lock);
-		block = req->sector/GD_TO_BLK + GD_SESSION_OFFSET;
-		block_cnt = req->nr_sectors/GD_TO_BLK;
+		block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
+		block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
 		ctrl_outl(PHYSADDR(req->buffer), GDROM_DMA_STARTADDR_REG);
 		ctrl_outl(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
 		ctrl_outl(1, GDROM_DMA_DIRECTION_REG);
@@ -661,7 +661,7 @@ static void gdrom_request(struct request_queue *rq)
 			printk(" write request ignored\n");
 			__blk_end_request_cur(req, -EIO);
 		}
-		if (req->nr_sectors)
+		if (blk_rq_sectors(req))
 			gdrom_request_handler_dma(req);
 		else
 			__blk_end_request_cur(req, -EIO);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index cc3efa096e1..6e190a93d8d 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -282,7 +282,7 @@ static int send_request(struct request *req)
 			viopath_targetinst(viopath_hostLp),
 			(u64)req, VIOVERSION << 16,
 			((u64)DEVICE_NR(diskinfo) << 48) | dmaaddr,
-			(u64)req->sector * 512, len, 0);
+			(u64)blk_rq_pos(req) * 512, len, 0);
 	if (hvrc != HvLpEvent_Rc_Good) {
 		printk(VIOCD_KERN_WARNING "hv error on op %d\n", (int)hvrc);
 		return -1;
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index a41634699f8..9e600d22f40 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -677,10 +677,10 @@ try_again:
 			continue;
 		}
 
-		t_sec = msb->block_req->sector << 9;
+		t_sec = blk_rq_pos(msb->block_req) << 9;
 		sector_div(t_sec, msb->page_size);
 
-		count = msb->block_req->nr_sectors << 9;
+		count = blk_rq_sectors(msb->block_req) << 9;
 		count /= msb->page_size;
 
 		param.system = msb->system;
@@ -745,7 +745,7 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
 					t_len *= msb->page_size;
 			}
 		} else
-			t_len = msb->block_req->nr_sectors << 9;
+			t_len = blk_rq_sectors(msb->block_req) << 9;
 
 		dev_dbg(&card->dev, "transferred %x (%d)\n", t_len, error);
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 56e60f0d531..510eb472637 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -761,7 +761,7 @@ static int i2o_block_transfer(struct request *req)
 			break;
 
 		case CACHE_SMARTFETCH:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x201F0008;
 			else
 				ctl_flags = 0x001F0000;
@@ -781,13 +781,13 @@ static int i2o_block_transfer(struct request *req)
 			ctl_flags = 0x001F0010;
 			break;
 		case CACHE_SMARTBACK:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x001F0004;
 			else
 				ctl_flags = 0x001F0010;
 			break;
 		case CACHE_SMARTTHROUGH:
-			if (req->nr_sectors > 16)
+			if (blk_rq_sectors(req) > 16)
 				ctl_flags = 0x001F0004;
 			else
 				ctl_flags = 0x001F0010;
@@ -827,22 +827,24 @@ static int i2o_block_transfer(struct request *req)
 
 		*mptr++ = cpu_to_le32(scsi_flags);
 
-		*((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec);
-		*((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec);
+		*((u32 *) & cmd[2]) = cpu_to_be32(blk_rq_pos(req) * hwsec);
+		*((u16 *) & cmd[7]) = cpu_to_be16(blk_rq_sectors(req) * hwsec);
 
 		memcpy(mptr, cmd, 10);
 		mptr += 4;
-		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
+		*mptr++ =
+		    cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
 	} else
 #endif
 	{
 		msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
 		*mptr++ = cpu_to_le32(ctl_flags);
-		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
 		*mptr++ =
-		    cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT));
+		    cpu_to_le32(blk_rq_sectors(req) << KERNEL_SECTOR_SHIFT);
+		*mptr++ =
+		    cpu_to_le32((u32) (blk_rq_pos(req) << KERNEL_SECTOR_SHIFT));
 		*mptr++ =
-		    cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT));
+		    cpu_to_le32(blk_rq_pos(req) >> (32 - KERNEL_SECTOR_SHIFT));
 	}
 
 	if (!i2o_block_sglist_alloc(c, ireq, &mptr)) {
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index fe8041e619e..949e99770ad 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -243,7 +243,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.mrq.cmd = &brq.cmd;
 		brq.mrq.data = &brq.data;
 
-		brq.cmd.arg = req->sector;
+		brq.cmd.arg = blk_rq_pos(req);
 		if (!mmc_card_blockaddr(card))
 			brq.cmd.arg <<= 9;
 		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
@@ -251,7 +251,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		brq.stop.opcode = MMC_STOP_TRANSMISSION;
 		brq.stop.arg = 0;
 		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-		brq.data.blocks = req->nr_sectors;
+		brq.data.blocks = blk_rq_sectors(req);
 
 		/*
 		 * After a read error, we redo the request one sector at a time
@@ -293,7 +293,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		 * Adjust the sg list so it is the same size as the
 		 * request.
 		 */
-		if (brq.data.blocks != req->nr_sectors) {
+		if (brq.data.blocks != blk_rq_sectors(req)) {
 			int i, data_size = brq.data.blocks << 9;
 			struct scatterlist *sg;
 
@@ -344,8 +344,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			printk(KERN_ERR "%s: error %d transferring data,"
 			       " sector %u, nr %u, card status %#x\n",
 			       req->rq_disk->disk_name, brq.data.error,
-			       (unsigned)req->sector,
-			       (unsigned)req->nr_sectors, status);
+			       (unsigned)blk_rq_pos(req),
+			       (unsigned)blk_rq_sectors(req), status);
 		}
 
 		if (brq.stop.error) {
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 76c4c8d1307..4ea2e67ac97 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -47,8 +47,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	unsigned long block, nsect;
 	char *buf;
 
-	block = req->sector << 9 >> tr->blkshift;
-	nsect = req->current_nr_sectors << 9 >> tr->blkshift;
+	block = blk_rq_pos(req) << 9 >> tr->blkshift;
+	nsect = blk_rq_cur_sectors(req) << 9 >> tr->blkshift;
 
 	buf = req->buffer;
 
@@ -59,7 +59,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	if (!blk_fs_request(req))
 		return -EIO;
 
-	if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk))
+	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
+	    get_capacity(req->rq_disk))
 		return -EIO;
 
 	switch(rq_data_dir(req)) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index fabec95686b..7df03c7aea0 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -603,7 +603,7 @@ static void dasd_profile_end(struct dasd_block *block,
 	if (dasd_profile_level != DASD_PROFILE_ON)
 		return;
 
-	sectors = req->nr_sectors;
+	sectors = blk_rq_sectors(req);
 	if (!cqr->buildclk || !cqr->startclk ||
 	    !cqr->stopclk || !cqr->endclk ||
 	    !sectors)
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index b9a7f773344..2efaddfae56 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -505,8 +505,9 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
 		return ERR_PTR(-EINVAL);
 	blksize = block->bp_block;
 	/* Calculate record id of first and last block. */
-	first_rec = req->sector >> block->s2b_shift;
-	last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+	first_rec = blk_rq_pos(req) >> block->s2b_shift;
+	last_rec =
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	rq_for_each_segment(bv, req, iter) {
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index cb52da033f0..a41c94053e6 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2354,10 +2354,10 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev,
 	blksize = block->bp_block;
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
 	/* Calculate record id of first and last block. */
-	first_rec = first_trk = req->sector >> block->s2b_shift;
+	first_rec = first_trk = blk_rq_pos(req) >> block->s2b_shift;
 	first_offs = sector_div(first_trk, blk_per_trk);
 	last_rec = last_trk =
-		(req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	last_offs = sector_div(last_trk, blk_per_trk);
 	cdlspecial = (private->uses_cdl && first_rec < 2*blk_per_trk);
 
@@ -2420,7 +2420,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
 	private = (struct dasd_eckd_private *) cqr->block->base->private;
 	blksize = cqr->block->bp_block;
 	blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize);
-	recid = req->sector >> cqr->block->s2b_shift;
+	recid = blk_rq_pos(req) >> cqr->block->s2b_shift;
 	ccw = cqr->cpaddr;
 	/* Skip over define extent & locate record. */
 	ccw++;
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index a3eb6fd1467..8912358daa2 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -270,8 +270,9 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
 		return ERR_PTR(-EINVAL);
 	blksize = block->bp_block;
 	/* Calculate record id of first and last block. */
-	first_rec = req->sector >> block->s2b_shift;
-	last_rec = (req->sector + req->nr_sectors - 1) >> block->s2b_shift;
+	first_rec = blk_rq_pos(req) >> block->s2b_shift;
+	last_rec =
+		(blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	cidaw = 0;
@@ -309,7 +310,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev,
 	ccw = cqr->cpaddr;
 	/* First ccw is define extent. */
 	define_extent(ccw++, cqr->data, rq_data_dir(req),
-		      block->bp_block, req->sector, req->nr_sectors);
+		      block->bp_block, blk_rq_pos(req), blk_rq_sectors(req));
 	/* Build locate_record + read/write ccws. */
 	idaws = (unsigned long *) (cqr->data + sizeof(struct DE_fba_data));
 	LO_data = (struct LO_fba_data *) (idaws + cidaw);
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 5f8e8ef43dd..2d00a383a47 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -1134,7 +1134,7 @@ tape_34xx_bread(struct tape_device *device, struct request *req)
 	/* Setup ccws. */
 	request->op = TO_BLOCK;
 	start_block = (struct tape_34xx_block_id *) request->cpdata;
-	start_block->block = req->sector >> TAPEBLOCK_HSEC_S2B;
+	start_block->block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block->block);
 
 	ccw = request->cpaddr;
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index 823b05bd0dd..c453b2f3e9f 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -633,7 +633,7 @@ tape_3590_bread(struct tape_device *device, struct request *req)
 	struct req_iterator iter;
 
 	DBF_EVENT(6, "xBREDid:");
-	start_block = req->sector >> TAPEBLOCK_HSEC_S2B;
+	start_block = blk_rq_pos(req) >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block);
 
 	rq_for_each_segment(bv, req, iter)
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 86596d3813b..5d035e4939d 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -87,7 +87,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 	if (ccw_req->rc == 0)
 		/* Update position. */
 		device->blk_data.block_position =
-			(req->sector + req->nr_sectors) >> TAPEBLOCK_HSEC_S2B;
+		  (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B;
 	else
 		/* We lost the position information due to an error. */
 		device->blk_data.block_position = -1;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 09617884a50..2132c906e53 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -188,8 +188,8 @@ static void jsfd_do_request(struct request_queue *q)
 
 	while ((req = elv_next_request(q)) != NULL) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
-		unsigned long offset = req->sector << 9;
-		size_t len = req->current_nr_sectors << 9;
+		unsigned long offset = blk_rq_pos(req) << 9;
+		size_t len = blk_rq_cur_sectors(req) << 9;
 
 		if ((offset + len) > jdp->dsize) {
 			__blk_end_request_cur(req, -EIO);
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index be5099dd94b..c7076ce25e2 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1825,7 +1825,7 @@ static int eata2x_queuecommand(struct scsi_cmnd *SCpnt,
 	if (linked_comm && SCpnt->device->queue_depth > 2
 	    && TLDEV(SCpnt->device->type)) {
 		ha->cp_stat[i] = READY;
-		flush_dev(SCpnt->device, SCpnt->request->sector, ha, 0);
+		flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 0);
 		return 0;
 	}
 
@@ -2144,13 +2144,13 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 		if (!cpp->din)
 			input_only = 0;
 
-		if (SCpnt->request->sector < minsec)
-			minsec = SCpnt->request->sector;
-		if (SCpnt->request->sector > maxsec)
-			maxsec = SCpnt->request->sector;
+		if (blk_rq_pos(SCpnt->request) < minsec)
+			minsec = blk_rq_pos(SCpnt->request);
+		if (blk_rq_pos(SCpnt->request) > maxsec)
+			maxsec = blk_rq_pos(SCpnt->request);
 
-		sl[n] = SCpnt->request->sector;
-		ioseek += SCpnt->request->nr_sectors;
+		sl[n] = blk_rq_pos(SCpnt->request);
+		ioseek += blk_rq_sectors(SCpnt->request);
 
 		if (!n)
 			continue;
@@ -2190,7 +2190,7 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 			k = il[n];
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
-			ll[n] = SCpnt->request->nr_sectors;
+			ll[n] = blk_rq_sectors(SCpnt->request);
 			pl[n] = SCpnt->serial_number;
 
 			if (!n)
@@ -2236,12 +2236,12 @@ static int reorder(struct hostdata *ha, unsigned long cursec,
 			cpp = &ha->cp[k];
 			SCpnt = cpp->SCpnt;
 			scmd_printk(KERN_INFO, SCpnt,
-			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %ld"
+			    "%s pid %ld mb %d fc %d nr %d sec %ld ns %u"
 			     " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
 			     (ihdlr ? "ihdlr" : "qcomm"),
 			     SCpnt->serial_number, k, flushcount,
-			     n_ready, SCpnt->request->sector,
-			     SCpnt->request->nr_sectors, cursec, YESNO(s),
+			     n_ready, blk_rq_pos(SCpnt->request),
+			     blk_rq_sectors(SCpnt->request), cursec, YESNO(s),
 			     YESNO(r), YESNO(rev), YESNO(input_only),
 			     YESNO(overlap), cpp->din);
 		}
@@ -2408,7 +2408,7 @@ static irqreturn_t ihdlr(struct Scsi_Host *shost)
 
 	if (linked_comm && SCpnt->device->queue_depth > 2
 	    && TLDEV(SCpnt->device->type))
-		flush_dev(SCpnt->device, SCpnt->request->sector, ha, 1);
+		flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), ha, 1);
 
 	tstatus = status_byte(spp->target_status);
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 36fd2e75da1..a8fab397711 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1313,10 +1313,10 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd,
 	uint32_t bgstat = bgf->bgstat;
 	uint64_t failing_sector = 0;
 
-	printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%lx "
+	printk(KERN_ERR "BG ERROR in cmd 0x%x lba 0x%llx blk cnt 0x%x "
 			"bgstat=0x%x bghm=0x%x\n",
 			cmd->cmnd[0], (unsigned long long)scsi_get_lba(cmd),
-			cmd->request->nr_sectors, bgstat, bghm);
+			blk_rq_sectors(cmd->request), bgstat, bghm);
 
 	spin_lock(&_dump_buf_lock);
 	if (!_dump_buf_done) {
@@ -2375,15 +2375,15 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 		if (cmnd->cmnd[0] == READ_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					"9035 BLKGRD: READ @ sector %llu, "
-					 "count %lu\n",
-					 (unsigned long long)scsi_get_lba(cmnd),
-					cmnd->request->nr_sectors);
+					"count %u\n",
+					(unsigned long long)scsi_get_lba(cmnd),
+					blk_rq_sectors(cmnd->request));
 		else if (cmnd->cmnd[0] == WRITE_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					"9036 BLKGRD: WRITE @ sector %llu, "
-					"count %lu cmd=%p\n",
+					"count %u cmd=%p\n",
 					(unsigned long long)scsi_get_lba(cmnd),
-					cmnd->request->nr_sectors,
+					blk_rq_sectors(cmnd->request),
 					cmnd);
 
 		err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
@@ -2403,15 +2403,15 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
 		if (cmnd->cmnd[0] == READ_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9040 dbg: READ @ sector %llu, "
-					 "count %lu\n",
+					 "count %u\n",
 					 (unsigned long long)scsi_get_lba(cmnd),
-					 cmnd->request->nr_sectors);
+					 blk_rq_sectors(cmnd->request));
 		else if (cmnd->cmnd[0] == WRITE_10)
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9041 dbg: WRITE @ sector %llu, "
-					 "count %lu cmd=%p\n",
+					 "count %u cmd=%p\n",
 					 (unsigned long long)scsi_get_lba(cmnd),
-					 cmnd->request->nr_sectors, cmnd);
+					 blk_rq_sectors(cmnd->request), cmnd);
 		else
 			lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG,
 					 "9042 dbg: parser not implemented\n");
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9ff0ca9988a..39b3acfc0dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -787,9 +787,9 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 * Next deal with any sectors which we were able to correctly
 	 * handle.
 	 */
-	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
+	SCSI_LOG_HLCOMPLETE(1, printk("%u sectors total, "
 				      "%d bytes done.\n",
-				      req->nr_sectors, good_bytes));
+				      blk_rq_sectors(req), good_bytes));
 
 	/*
 	 * Recovered errors need reporting, but they're always treated
@@ -968,7 +968,7 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
 	if (blk_pc_request(req))
 		sdb->length = req->data_len;
 	else
-		sdb->length = req->nr_sectors << 9;
+		sdb->length = blk_rq_sectors(req) << 9;
 	return BLKPREP_OK;
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3fcb64b91c4..70c4dd99bbf 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -383,9 +383,9 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	struct scsi_device *sdp = q->queuedata;
 	struct gendisk *disk = rq->rq_disk;
 	struct scsi_disk *sdkp;
-	sector_t block = rq->sector;
+	sector_t block = blk_rq_pos(rq);
 	sector_t threshold;
-	unsigned int this_count = rq->nr_sectors;
+	unsigned int this_count = blk_rq_sectors(rq);
 	int ret, host_dif;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -412,10 +412,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 					this_count));
 
 	if (!sdp || !scsi_device_online(sdp) ||
- 	    block + rq->nr_sectors > get_capacity(disk)) {
+	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
-						"Finishing %ld sectors\n",
-						rq->nr_sectors));
+						"Finishing %u sectors\n",
+						blk_rq_sectors(rq)));
 		SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
 						"Retry with 0x%p\n", SCpnt));
 		goto out;
@@ -462,7 +462,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	 * for this.
 	 */
 	if (sdp->sector_size == 1024) {
-		if ((block & 1) || (rq->nr_sectors & 1)) {
+		if ((block & 1) || (blk_rq_sectors(rq) & 1)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -472,7 +472,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 		}
 	}
 	if (sdp->sector_size == 2048) {
-		if ((block & 3) || (rq->nr_sectors & 3)) {
+		if ((block & 3) || (blk_rq_sectors(rq) & 3)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -482,7 +482,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 		}
 	}
 	if (sdp->sector_size == 4096) {
-		if ((block & 7) || (rq->nr_sectors & 7)) {
+		if ((block & 7) || (blk_rq_sectors(rq) & 7)) {
 			scmd_printk(KERN_ERR, SCpnt,
 				    "Bad block number requested\n");
 			goto out;
@@ -511,10 +511,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	}
 
 	SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt,
-					"%s %d/%ld 512 byte blocks.\n",
+					"%s %d/%u 512 byte blocks.\n",
 					(rq_data_dir(rq) == WRITE) ?
 					"writing" : "reading", this_count,
-					rq->nr_sectors));
+					blk_rq_sectors(rq)));
 
 	/* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
 	host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
@@ -970,8 +970,8 @@ static struct block_device_operations sd_fops = {
 
 static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
 {
-	u64 start_lba = scmd->request->sector;
-	u64 end_lba = scmd->request->sector + (scsi_bufflen(scmd) / 512);
+	u64 start_lba = blk_rq_pos(scmd->request);
+	u64 end_lba = blk_rq_pos(scmd->request) + (scsi_bufflen(scmd) / 512);
 	u64 bad_lba;
 	int info_valid;
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 184dff49279..82f14a9482d 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -507,7 +507,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	sector_sz = scmd->device->sector_size;
 	sectors = good_bytes / sector_sz;
 
-	phys = scmd->request->sector & 0xffffffff;
+	phys = blk_rq_pos(scmd->request) & 0xffffffff;
 	if (sector_sz == 4096)
 		phys >>= 3;
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0e1a0f2d2ad..fddba53c7fe 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -292,7 +292,8 @@ static int sr_done(struct scsi_cmnd *SCpnt)
 			if (cd->device->sector_size == 2048)
 				error_sector <<= 2;
 			error_sector &= ~(block_sectors - 1);
-			good_bytes = (error_sector - SCpnt->request->sector) << 9;
+			good_bytes = (error_sector -
+				      blk_rq_pos(SCpnt->request)) << 9;
 			if (good_bytes < 0 || good_bytes >= this_count)
 				good_bytes = 0;
 			/*
@@ -349,8 +350,8 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 				cd->disk->disk_name, block));
 
 	if (!cd->device || !scsi_device_online(cd->device)) {
-		SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n",
-					rq->nr_sectors));
+		SCSI_LOG_HLQUEUE(2, printk("Finishing %u sectors\n",
+					   blk_rq_sectors(rq)));
 		SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
 		goto out;
 	}
@@ -413,7 +414,7 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 	/*
 	 * request doesn't start on hw block boundary, add scatter pads
 	 */
-	if (((unsigned int)rq->sector % (s_size >> 9)) ||
+	if (((unsigned int)blk_rq_pos(rq) % (s_size >> 9)) ||
 	    (scsi_bufflen(SCpnt) % s_size)) {
 		scmd_printk(KERN_NOTICE, SCpnt, "unaligned transfer\n");
 		goto out;
@@ -422,14 +423,14 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 	this_count = (scsi_bufflen(SCpnt) >> 9) / (s_size >> 9);
 
 
-	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n",
+	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%u 512 byte blocks.\n",
 				cd->cdi.name,
 				(rq_data_dir(rq) == WRITE) ?
 					"writing" : "reading",
-				this_count, rq->nr_sectors));
+				this_count, blk_rq_sectors(rq)));
 
 	SCpnt->cmnd[1] = 0;
-	block = (unsigned int)rq->sector / (s_size >> 9);
+	block = (unsigned int)blk_rq_pos(rq) / (s_size >> 9);
 
 	if (this_count > 0xffff) {
 		this_count = 0xffff;
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index 601e95141cb..54023d41fd1 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -1306,7 +1306,7 @@ static int u14_34f_queuecommand(struct scsi_cmnd *SCpnt, void (*done)(struct scs
    if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type)) {
       HD(j)->cp_stat[i] = READY;
-      flush_dev(SCpnt->device, SCpnt->request->sector, j, FALSE);
+      flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, FALSE);
       return 0;
       }
 
@@ -1610,11 +1610,13 @@ static int reorder(unsigned int j, unsigned long cursec,
 
       if (!(cpp->xdir == DTD_IN)) input_only = FALSE;
 
-      if (SCpnt->request->sector < minsec) minsec = SCpnt->request->sector;
-      if (SCpnt->request->sector > maxsec) maxsec = SCpnt->request->sector;
+      if (blk_rq_pos(SCpnt->request) < minsec)
+	 minsec = blk_rq_pos(SCpnt->request);
+      if (blk_rq_pos(SCpnt->request) > maxsec)
+	 maxsec = blk_rq_pos(SCpnt->request);
 
-      sl[n] = SCpnt->request->sector;
-      ioseek += SCpnt->request->nr_sectors;
+      sl[n] = blk_rq_pos(SCpnt->request);
+      ioseek += blk_rq_sectors(SCpnt->request);
 
       if (!n) continue;
 
@@ -1642,7 +1644,7 @@ static int reorder(unsigned int j, unsigned long cursec,
 
    if (!input_only) for (n = 0; n < n_ready; n++) {
       k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-      ll[n] = SCpnt->request->nr_sectors; pl[n] = SCpnt->serial_number;
+      ll[n] = blk_rq_sectors(SCpnt->request); pl[n] = SCpnt->serial_number;
 
       if (!n) continue;
 
@@ -1666,12 +1668,12 @@ static int reorder(unsigned int j, unsigned long cursec,
    if (link_statistics && (overlap || !(flushcount % link_statistics)))
       for (n = 0; n < n_ready; n++) {
          k = il[n]; cpp = &HD(j)->cp[k]; SCpnt = cpp->SCpnt;
-         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %ld"\
+         printk("%s %d.%d:%d pid %ld mb %d fc %d nr %d sec %ld ns %u"\
                 " cur %ld s:%c r:%c rev:%c in:%c ov:%c xd %d.\n",
                 (ihdlr ? "ihdlr" : "qcomm"), SCpnt->channel, SCpnt->target,
                 SCpnt->lun, SCpnt->serial_number, k, flushcount, n_ready,
-                SCpnt->request->sector, SCpnt->request->nr_sectors, cursec,
-                YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
+                blk_rq_pos(SCpnt->request), blk_rq_sectors(SCpnt->request),
+		cursec, YESNO(s), YESNO(r), YESNO(rev), YESNO(input_only),
                 YESNO(overlap), cpp->xdir);
          }
 #endif
@@ -1799,7 +1801,7 @@ static irqreturn_t ihdlr(unsigned int j)
 
    if (linked_comm && SCpnt->device->queue_depth > 2
                                      && TLDEV(SCpnt->device->type))
-      flush_dev(SCpnt->device, SCpnt->request->sector, j, TRUE);
+      flush_dev(SCpnt->device, blk_rq_pos(SCpnt->request), j, TRUE);
 
    tstatus = status_byte(spp->target_status);
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 43b50d36925..3878d1dc7f5 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -270,7 +270,7 @@ static inline unsigned char scsi_get_prot_type(struct scsi_cmnd *scmd)
 
 static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
 {
-	return scmd->request->sector;
+	return blk_rq_pos(scmd->request);
 }
 
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
-- 
cgit v1.2.3-70-g09d2


From 2e46e8b27aa57c6bd34b3102b40ee4d0144b4fab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:41 +0900
Subject: block: drop request->hard_* and *nr_sectors

struct request has had a few different ways to represent some
properties of a request.  ->hard_* represent block layer's view of the
request progress (completion cursor) and the ones without the prefix
are supposed to represent the issue cursor and allowed to be updated
as necessary by the low level drivers.  The thing is that as block
layer supports partial completion, the two cursors really aren't
necessary and only cause confusion.  In addition, manual management of
request detail from low level drivers is cumbersome and error-prone at
the very least.

Another interesting duplicate fields are rq->[hard_]nr_sectors and
rq->{hard_cur|current}_nr_sectors against rq->data_len and
rq->bio->bi_size.  This is more convoluted than the hard_ case.

rq->[hard_]nr_sectors are initialized for requests with bio but
blk_rq_bytes() uses it only for !pc requests.  rq->data_len is
initialized for all request but blk_rq_bytes() uses it only for pc
requests.  This causes good amount of confusion throughout block layer
and its drivers and determining the request length has been a bit of
black magic which may or may not work depending on circumstances and
what the specific LLD is actually doing.

rq->{hard_cur|current}_nr_sectors represent the number of sectors in
the contiguous data area at the front.  This is mainly used by drivers
which transfers data by walking request segment-by-segment.  This
value always equals rq->bio->bi_size >> 9.  However, data length for
pc requests may not be multiple of 512 bytes and using this field
becomes a bit confusing.

In general, having multiple fields to represent the same property
leads only to confusion and subtle bugs.  With recent block low level
driver cleanups, no driver is accessing or manipulating these
duplicate fields directly.  Drop all the duplicates.  Now rq->sector
means the current sector, rq->data_len the current total length and
rq->bio->bi_size the current segment length.  Everything else is
defined in terms of these three and available only through accessors.

* blk_recalc_rq_sectors() is collapsed into blk_update_request() and
  now handles pc and fs requests equally other than rq->sector update.
  This means that now pc requests can use partial completion too (no
  in-kernel user yet tho).

* bio_cur_sectors() is replaced with bio_cur_bytes() as block layer
  now uses byte count as the primary data length.

* blk_rq_pos() is now guranteed to be always correct.  In-block users
  converted.

* blk_rq_bytes() is now guaranteed to be always valid as is
  blk_rq_sectors().  In-block users converted.

* blk_rq_sectors() is now guaranteed to equal blk_rq_bytes() >> 9.
  More convenient one is used.

* blk_rq_bytes() and blk_rq_cur_bytes() are now inlined and take const
  pointer to request.

[ Impact: API cleanup, single way to represent one property of a request ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c         | 81 ++++++++++++++++++------------------------------
 block/blk-merge.c        | 36 +++------------------
 block/blk.h              |  1 -
 block/cfq-iosched.c      | 10 +++---
 include/linux/bio.h      |  6 ++--
 include/linux/blkdev.h   | 37 ++++++++++------------
 include/linux/elevator.h |  2 +-
 kernel/trace/blktrace.c  | 16 +++++-----
 8 files changed, 67 insertions(+), 122 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 82dc20621c0..3596ca71909 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
-	rq->sector = rq->hard_sector = (sector_t) -1;
+	rq->sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
@@ -189,8 +189,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 	       (unsigned long long)blk_rq_pos(rq),
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
-						rq->bio, rq->biotail,
-						rq->buffer, rq->data_len);
+	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 
 	if (blk_pc_request(rq)) {
 		printk(KERN_INFO "  cdb: ");
@@ -1096,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
-	req->hard_sector = req->sector = bio->bi_sector;
+	req->sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
@@ -1113,14 +1112,13 @@ static inline bool queue_should_plug(struct request_queue *q)
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
-	int el_ret, nr_sectors;
+	int el_ret;
+	unsigned int bytes = bio->bi_size;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	const int unplug = bio_unplug(bio);
 	int rw_flags;
 
-	nr_sectors = bio_sectors(bio);
-
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
@@ -1145,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
-		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+		req->data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1171,10 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
-		req->current_nr_sectors = bio_cur_sectors(bio);
-		req->hard_cur_sectors = req->current_nr_sectors;
-		req->sector = req->hard_sector = bio->bi_sector;
-		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+		req->sector = bio->bi_sector;
+		req->data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1557,7 +1553,7 @@ EXPORT_SYMBOL(submit_bio);
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
 	if (blk_rq_sectors(rq) > q->max_sectors ||
-	    rq->data_len > q->max_hw_sectors << 9) {
+	    blk_rq_bytes(rq) > q->max_hw_sectors << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
@@ -1675,35 +1671,6 @@ static void blk_account_io_done(struct request *req)
 	}
 }
 
-/**
- * blk_rq_bytes - Returns bytes left to complete in the entire request
- * @rq: the request being processed
- **/
-unsigned int blk_rq_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return blk_rq_sectors(rq) << 9;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_bytes);
-
-/**
- * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
- * @rq: the request being processed
- **/
-unsigned int blk_rq_cur_bytes(struct request *rq)
-{
-	if (blk_fs_request(rq))
-		return rq->current_nr_sectors << 9;
-
-	if (rq->bio)
-		return rq->bio->bi_size;
-
-	return rq->data_len;
-}
-EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
-
 struct request *elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
@@ -1736,7 +1703,7 @@ struct request *elv_next_request(struct request_queue *q)
 		if (rq->cmd_flags & REQ_DONTPREP)
 			break;
 
-		if (q->dma_drain_size && rq->data_len) {
+		if (q->dma_drain_size && blk_rq_bytes(rq)) {
 			/*
 			 * make sure space for the drain appears we
 			 * know we can do this because max_hw_segments
@@ -1759,7 +1726,7 @@ struct request *elv_next_request(struct request_queue *q)
 			 * avoid resource deadlock.  REQ_STARTED will
 			 * prevent other fs requests from passing this one.
 			 */
-			if (q->dma_drain_size && rq->data_len &&
+			if (q->dma_drain_size && blk_rq_bytes(rq) &&
 			    !(rq->cmd_flags & REQ_DONTPREP)) {
 				/*
 				 * remove the space for the drain we added
@@ -1911,8 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
-		req->nr_sectors = req->hard_nr_sectors = 0;
-		req->current_nr_sectors = req->hard_cur_sectors = 0;
+		req->data_len = 0;
 		return false;
 	}
 
@@ -1926,8 +1892,25 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 
-	blk_recalc_rq_sectors(req, total_bytes >> 9);
+	req->data_len -= total_bytes;
+	req->buffer = bio_data(req->bio);
+
+	/* update sector only for requests with clear definition of sector */
+	if (blk_fs_request(req) || blk_discard_rq(req))
+		req->sector += total_bytes >> 9;
+
+	/*
+	 * If total number of sectors is less than the first segment
+	 * size, something has gone terribly wrong.
+	 */
+	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+		printk(KERN_ERR "blk: request botched\n");
+		req->data_len = blk_rq_cur_bytes(req);
+	}
+
+	/* recalculate the number of segments */
 	blk_recalc_rq_segments(req);
+
 	return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
@@ -2049,11 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
-	rq->current_nr_sectors = bio_cur_sectors(bio);
-	rq->hard_cur_sectors = rq->current_nr_sectors;
-	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
 	rq->data_len = bio->bi_size;
-
 	rq->bio = rq->biotail = bio;
 
 	if (bio->bi_bdev)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index bf62a87a9da..b8df66aef0f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,35 +9,6 @@
 
 #include "blk.h"
 
-void blk_recalc_rq_sectors(struct request *rq, int nsect)
-{
-	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
-		rq->hard_sector += nsect;
-		rq->hard_nr_sectors -= nsect;
-
-		/*
-		 * Move the I/O submission pointers ahead if required.
-		 */
-		if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
-		    (rq->sector <= rq->hard_sector)) {
-			rq->sector = rq->hard_sector;
-			rq->nr_sectors = rq->hard_nr_sectors;
-			rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
-			rq->current_nr_sectors = rq->hard_cur_sectors;
-			rq->buffer = bio_data(rq->bio);
-		}
-
-		/*
-		 * if total number of sectors is less than the first segment
-		 * size, something has gone terribly wrong
-		 */
-		if (rq->nr_sectors < rq->current_nr_sectors) {
-			printk(KERN_ERR "blk: request botched\n");
-			rq->nr_sectors = rq->current_nr_sectors;
-		}
-	}
-}
-
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
@@ -199,8 +170,9 @@ new_segment:
 
 
 	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
-	    (rq->data_len & q->dma_pad_mask)) {
-		unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1;
+	    (blk_rq_bytes(rq) & q->dma_pad_mask)) {
+		unsigned int pad_len =
+			(q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
 
 		sg->length += pad_len;
 		rq->extra_len += pad_len;
@@ -398,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 
-	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+	req->data_len += blk_rq_bytes(next);
 
 	elv_merge_requests(q, req, next);
 
diff --git a/block/blk.h b/block/blk.h
index 51115599df9..ab54529103c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -101,7 +101,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
 void blk_recalc_rq_segments(struct request *rq);
-void blk_recalc_rq_sectors(struct request *rq, int nsect);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index db4d990a66b..99ac4304d71 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -579,9 +579,9 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
 		 * Sort strictly based on sector.  Smallest to the left,
 		 * largest to the right.
 		 */
-		if (sector > cfqq->next_rq->sector)
+		if (sector > blk_rq_pos(cfqq->next_rq))
 			n = &(*p)->rb_right;
-		else if (sector < cfqq->next_rq->sector)
+		else if (sector < blk_rq_pos(cfqq->next_rq))
 			n = &(*p)->rb_left;
 		else
 			break;
@@ -611,8 +611,8 @@ static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		return;
 
 	cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
-	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector,
-					 &parent, &p);
+	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
+				      blk_rq_pos(cfqq->next_rq), &parent, &p);
 	if (!__cfqq) {
 		rb_link_node(&cfqq->p_node, parent, p);
 		rb_insert_color(&cfqq->p_node, cfqq->p_root);
@@ -996,7 +996,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
 	if (cfq_rq_close(cfqd, __cfqq->next_rq))
 		return __cfqq;
 
-	if (__cfqq->next_rq->sector < sector)
+	if (blk_rq_pos(__cfqq->next_rq) < sector)
 		node = rb_next(&__cfqq->p_node);
 	else
 		node = rb_prev(&__cfqq->p_node);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f37ca8c726b..d30ec6f30dd 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -218,12 +218,12 @@ struct bio {
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
 #define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
 
-static inline unsigned int bio_cur_sectors(struct bio *bio)
+static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
 	if (bio->bi_vcnt)
-		return bio_iovec(bio)->bv_len >> 9;
+		return bio_iovec(bio)->bv_len;
 	else /* dataless requests such as discard */
-		return bio->bi_size >> 9;
+		return bio->bi_size;
 }
 
 static inline void *bio_data(struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4e5f8559872..ce2bf5efa9b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -166,19 +166,8 @@ struct request {
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 
-	/* Maintain bio traversal state for part by part I/O submission.
-	 * hard_* are block layer internals, no driver should touch them!
-	 */
-
-	sector_t sector;		/* next sector to submit */
-	sector_t hard_sector;		/* next sector to complete */
-	unsigned long nr_sectors;	/* no. of sectors left to submit */
-	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
-	/* no. of sectors left to submit in the current segment */
-	unsigned int current_nr_sectors;
-
-	/* no. of sectors left to complete in the current segment */
-	unsigned int hard_cur_sectors;
+	sector_t sector;	/* sector cursor */
+	unsigned int data_len;	/* total data len, don't access directly */
 
 	struct bio *bio;
 	struct bio *biotail;
@@ -226,7 +215,6 @@ struct request {
 	unsigned char __cmd[BLK_MAX_CDB];
 	unsigned char *cmd;
 
-	unsigned int data_len;
 	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
 	unsigned int resid_len;	/* residual count */
@@ -840,20 +828,27 @@ extern void blkdev_dequeue_request(struct request *req);
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
-	return rq->hard_sector;
+	return rq->sector;
+}
+
+static inline unsigned int blk_rq_bytes(const struct request *rq)
+{
+	return rq->data_len;
 }
 
-extern unsigned int blk_rq_bytes(struct request *rq);
-extern unsigned int blk_rq_cur_bytes(struct request *rq);
+static inline int blk_rq_cur_bytes(const struct request *rq)
+{
+	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
+}
 
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
-	return rq->hard_nr_sectors;
+	return blk_rq_bytes(rq) >> 9;
 }
 
 static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 {
-	return rq->hard_cur_sectors;
+	return blk_rq_cur_bytes(rq) >> 9;
 }
 
 /*
@@ -928,7 +923,7 @@ static inline void blk_end_request_all(struct request *rq, int error)
  */
 static inline bool blk_end_request_cur(struct request *rq, int error)
 {
-	return blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 
 /**
@@ -981,7 +976,7 @@ static inline void __blk_end_request_all(struct request *rq, int error)
  */
 static inline bool __blk_end_request_cur(struct request *rq, int error)
 {
-	return __blk_end_request(rq, error, rq->hard_cur_sectors << 9);
+	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
 
 extern void blk_complete_request(struct request *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c59b769f62b..4e462878c9c 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -171,7 +171,7 @@ enum {
 	ELV_MQUEUE_MUST,
 };
 
-#define rq_end_sector(rq)	((rq)->sector + (rq)->nr_sectors)
+#define rq_end_sector(rq)	(blk_rq_pos(rq) + blk_rq_sectors(rq))
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 /*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 42f1c11e754..5708a14bee5 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -642,12 +642,12 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 
 	if (blk_pc_request(rq)) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
-				rq->cmd_len, rq->cmd);
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+				what, rq->errors, rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
-				rw, what, rq->errors, 0, NULL);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
+				what, rq->errors, 0, NULL);
 	}
 }
 
@@ -854,11 +854,11 @@ void blk_add_driver_data(struct request_queue *q,
 		return;
 
 	if (blk_pc_request(rq))
-		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
-				rq->errors, len, data);
+		__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
+				BLK_TA_DRV_DATA, rq->errors, len, data);
 	else
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_sectors(rq) << 9,
-				0, BLK_TA_DRV_DATA, rq->errors, len, data);
+		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0,
+				BLK_TA_DRV_DATA, rq->errors, len, data);
 }
 EXPORT_SYMBOL_GPL(blk_add_driver_data);
 
-- 
cgit v1.2.3-70-g09d2


From a2dec7b36364a5cc564c4d76cf16d2e7d33f5c05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 7 May 2009 22:24:44 +0900
Subject: block: hide request sector and data_len

Block low level drivers for some reason have been pretty good at
abusing block layer API.  Especially struct request's fields tend to
get violated in all possible ways.  Make it clear that low level
drivers MUST NOT access or manipulate rq->sector and rq->data_len
directly by prefixing them with double underscores.

This change is also necessary to break build of out-of-tree codes
which assume the previous block API where internal fields can be
manipulated and rq->data_len carries residual count on completion.

[ Impact: hide internal fields, block API change ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 20 ++++++++++----------
 block/blk-map.c        |  2 +-
 block/blk-merge.c      |  2 +-
 include/linux/blkdev.h |  9 +++++----
 4 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3596ca71909..6226a380fb6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,7 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->cpu = -1;
 	rq->q = q;
-	rq->sector = (sector_t) -1;
+	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->cmd = rq->__cmd;
@@ -1095,7 +1095,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
-	req->sector = bio->bi_sector;
+	req->__sector = bio->bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
@@ -1143,7 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
-		req->data_len += bytes;
+		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1169,8 +1169,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 		 * not touch req->buffer either...
 		 */
 		req->buffer = bio_data(bio);
-		req->sector = bio->bi_sector;
-		req->data_len += bytes;
+		req->__sector = bio->bi_sector;
+		req->__data_len += bytes;
 		req->ioprio = ioprio_best(req->ioprio, prio);
 		if (!blk_rq_cpu_valid(req))
 			req->cpu = bio->bi_comp_cpu;
@@ -1878,7 +1878,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		 * can find how many bytes remain in the request
 		 * later.
 		 */
-		req->data_len = 0;
+		req->__data_len = 0;
 		return false;
 	}
 
@@ -1892,12 +1892,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		bio_iovec(bio)->bv_len -= nr_bytes;
 	}
 
-	req->data_len -= total_bytes;
+	req->__data_len -= total_bytes;
 	req->buffer = bio_data(req->bio);
 
 	/* update sector only for requests with clear definition of sector */
 	if (blk_fs_request(req) || blk_discard_rq(req))
-		req->sector += total_bytes >> 9;
+		req->__sector += total_bytes >> 9;
 
 	/*
 	 * If total number of sectors is less than the first segment
@@ -1905,7 +1905,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	 */
 	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
 		printk(KERN_ERR "blk: request botched\n");
-		req->data_len = blk_rq_cur_bytes(req);
+		req->__data_len = blk_rq_cur_bytes(req);
 	}
 
 	/* recalculate the number of segments */
@@ -2032,7 +2032,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
-	rq->data_len = bio->bi_size;
+	rq->__data_len = bio->bi_size;
 	rq->bio = rq->biotail = bio;
 
 	if (bio->bi_bdev)
diff --git a/block/blk-map.c b/block/blk-map.c
index 694fefad34e..56082bea450 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,7 +20,7 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
-		rq->data_len += bio->bi_size;
+		rq->__data_len += bio->bi_size;
 	}
 	return 0;
 }
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b8df66aef0f..4974dd5767e 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -370,7 +370,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
 
-	req->data_len += blk_rq_bytes(next);
+	req->__data_len += blk_rq_bytes(next);
 
 	elv_merge_requests(q, req, next);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ce2bf5efa9b..c7558034570 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -166,8 +166,9 @@ struct request {
 	enum rq_cmd_type_bits cmd_type;
 	unsigned long atomic_flags;
 
-	sector_t sector;	/* sector cursor */
-	unsigned int data_len;	/* total data len, don't access directly */
+	/* the following two fields are internal, NEVER access directly */
+	sector_t __sector;		/* sector cursor */
+	unsigned int __data_len;	/* total data len */
 
 	struct bio *bio;
 	struct bio *biotail;
@@ -828,12 +829,12 @@ extern void blkdev_dequeue_request(struct request *req);
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
-	return rq->sector;
+	return rq->__sector;
 }
 
 static inline unsigned int blk_rq_bytes(const struct request *rq)
 {
-	return rq->data_len;
+	return rq->__data_len;
 }
 
 static inline int blk_rq_cur_bytes(const struct request *rq)
-- 
cgit v1.2.3-70-g09d2


From 9934c8c04561413609d2bc38c6b9f268cba774a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 May 2009 11:54:16 +0900
Subject: block: implement and enforce request peek/start/fetch

Till now block layer allowed two separate modes of request execution.
A request is always acquired from the request queue via
elv_next_request().  After that, drivers are free to either dequeue it
or process it without dequeueing.  Dequeue allows elv_next_request()
to return the next request so that multiple requests can be in flight.

Executing requests without dequeueing has its merits mostly in
allowing drivers for simpler devices which can't do sg to deal with
segments only without considering request boundary.  However, the
benefit this brings is dubious and declining while the cost of the API
ambiguity is increasing.  Segment based drivers are usually for very
old or limited devices and as converting to dequeueing model isn't
difficult, it doesn't justify the API overhead it puts on block layer
and its more modern users.

Previous patches converted all block low level drivers to dequeueing
model.  This patch completes the API transition by...

* renaming elv_next_request() to blk_peek_request()

* renaming blkdev_dequeue_request() to blk_start_request()

* adding blk_fetch_request() which is combination of peek and start

* disallowing completion of queued (not started) requests

* applying new API to all LLDs

Renamings are for consistency and to break out of tree code so that
it's apparent that out of tree drivers need updating.

[ Impact: block request issue API cleanup, no functional change ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Mike Miller <mike.miller@hp.com>
Cc: unsik Kim <donari75@gmail.com>
Cc: Paul Clements <paul.clements@steeleye.com>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Laurent Vivier <Laurent@lvivier.info>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Borislav Petkov <petkovbb@googlemail.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: Stefan Weinhuber <wein@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/arm/plat-omap/mailbox.c        |  12 ++---
 arch/um/drivers/ubd_kern.c          |   3 +-
 block/blk-barrier.c                 |   4 +-
 block/blk-core.c                    | 105 ++++++++++++++++++++++++++----------
 block/blk-tag.c                     |   2 +-
 block/blk.h                         |   1 +
 drivers/block/DAC960.c              |   4 +-
 drivers/block/amiflop.c             |   3 +-
 drivers/block/ataflop.c             |   3 +-
 drivers/block/cciss.c               |   4 +-
 drivers/block/cpqarray.c            |   4 +-
 drivers/block/floppy.c              |   6 +--
 drivers/block/hd.c                  |   3 +-
 drivers/block/mg_disk.c             |  12 ++---
 drivers/block/nbd.c                 |   4 +-
 drivers/block/paride/pcd.c          |   3 +-
 drivers/block/paride/pd.c           |   7 +--
 drivers/block/paride/pf.c           |   3 +-
 drivers/block/ps3disk.c             |   4 +-
 drivers/block/sunvdc.c              |   3 +-
 drivers/block/swim.c                |  12 ++---
 drivers/block/swim3.c               |   3 +-
 drivers/block/sx8.c                 |   8 ++-
 drivers/block/ub.c                  |   8 +--
 drivers/block/viodasd.c             |   4 +-
 drivers/block/virtio_blk.c          |   4 +-
 drivers/block/xd.c                  |  12 ++---
 drivers/block/xen-blkfront.c        |   4 +-
 drivers/block/xsysace.c             |  10 ++--
 drivers/block/z2ram.c               |  12 ++---
 drivers/cdrom/gdrom.c               |   4 +-
 drivers/cdrom/viocd.c               |   4 +-
 drivers/ide/ide-atapi.c             |   2 +-
 drivers/ide/ide-io.c                |   9 ++--
 drivers/memstick/core/mspro_block.c |   9 ++--
 drivers/message/i2o/i2o_block.c     |   6 +--
 drivers/mmc/card/queue.c            |  11 ++--
 drivers/mtd/mtd_blkdevs.c           |   7 +--
 drivers/s390/block/dasd.c           |  16 ++----
 drivers/s390/char/tape_block.c      |   7 +--
 drivers/sbus/char/jsflash.c         |  12 ++---
 drivers/scsi/scsi_lib.c             |  10 ++--
 drivers/scsi/scsi_transport_sas.c   |   4 +-
 include/linux/blkdev.h              |   9 +++-
 include/linux/elevator.h            |   2 -
 45 files changed, 172 insertions(+), 207 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 7a1f5c25fd1..40424edae93 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -197,9 +197,7 @@ static void mbox_tx_work(struct work_struct *work)
 		struct omap_msg_tx_data *tx_data;
 
 		spin_lock(q->queue_lock);
-		rq = elv_next_request(q);
-		if (rq)
-			blkdev_dequeue_request(rq);
+		rq = blk_fetch_request(q);
 		spin_unlock(q->queue_lock);
 
 		if (!rq)
@@ -242,9 +240,7 @@ static void mbox_rx_work(struct work_struct *work)
 
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
-		rq = elv_next_request(q);
-		if (rq)
-			blkdev_dequeue_request(rq);
+		rq = blk_fetch_request(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		if (!rq)
 			break;
@@ -351,9 +347,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf)
 
 	while (1) {
 		spin_lock_irqsave(q->queue_lock, flags);
-		rq = elv_next_request(q);
-		if (rq)
-			blkdev_dequeue_request(rq);
+		rq = blk_fetch_request(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
 		if (!rq)
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 402ba8f70fc..aa9e926e13d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1228,12 +1228,11 @@ static void do_ubd_request(struct request_queue *q)
 	while(1){
 		struct ubd *dev = q->queuedata;
 		if(dev->end_sg == 0){
-			struct request *req = elv_next_request(q);
+			struct request *req = blk_fetch_request(q);
 			if(req == NULL)
 				return;
 
 			dev->request = req;
-			blkdev_dequeue_request(req);
 			dev->start_sg = 0;
 			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
 		}
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8713c2fbc4f..0ab81a0a750 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -180,7 +180,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	}
 
 	/* stash away the original request */
-	elv_dequeue_request(q, rq);
+	blk_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = NULL;
 
@@ -248,7 +248,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 			 * Queue ordering not supported.  Terminate
 			 * with prejudice.
 			 */
-			elv_dequeue_request(q, rq);
+			blk_dequeue_request(rq);
 			__blk_end_request_all(rq, -EOPNOTSUPP);
 			*rqp = NULL;
 			return false;
diff --git a/block/blk-core.c b/block/blk-core.c
index 6226a380fb6..93691d2ac5a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -902,6 +902,8 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	BUG_ON(blk_queued_rq(rq));
+
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
@@ -1610,28 +1612,6 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
-/**
- * blkdev_dequeue_request - dequeue request and start timeout timer
- * @req: request to dequeue
- *
- * Dequeue @req and start timeout timer on it.  This hands off the
- * request to the driver.
- *
- * Block internal functions which don't want to start timer should
- * call elv_dequeue_request().
- */
-void blkdev_dequeue_request(struct request *req)
-{
-	elv_dequeue_request(req->q, req);
-
-	/*
-	 * We are now handing the request to the hardware, add the
-	 * timeout handler.
-	 */
-	blk_add_timer(req);
-}
-EXPORT_SYMBOL(blkdev_dequeue_request);
-
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
@@ -1671,7 +1651,23 @@ static void blk_account_io_done(struct request *req)
 	}
 }
 
-struct request *elv_next_request(struct request_queue *q)
+/**
+ * blk_peek_request - peek at the top of a request queue
+ * @q: request queue to peek at
+ *
+ * Description:
+ *     Return the request at the top of @q.  The returned request
+ *     should be started using blk_start_request() before LLD starts
+ *     processing it.
+ *
+ * Return:
+ *     Pointer to the request at the top of @q if available.  Null
+ *     otherwise.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+struct request *blk_peek_request(struct request_queue *q)
 {
 	struct request *rq;
 	int ret;
@@ -1748,10 +1744,12 @@ struct request *elv_next_request(struct request_queue *q)
 
 	return rq;
 }
-EXPORT_SYMBOL(elv_next_request);
+EXPORT_SYMBOL(blk_peek_request);
 
-void elv_dequeue_request(struct request_queue *q, struct request *rq)
+void blk_dequeue_request(struct request *rq)
 {
+	struct request_queue *q = rq->q;
+
 	BUG_ON(list_empty(&rq->queuelist));
 	BUG_ON(ELV_ON_HASH(rq));
 
@@ -1766,6 +1764,58 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
 		q->in_flight++;
 }
 
+/**
+ * blk_start_request - start request processing on the driver
+ * @req: request to dequeue
+ *
+ * Description:
+ *     Dequeue @req and start timeout timer on it.  This hands off the
+ *     request to the driver.
+ *
+ *     Block internal functions which don't want to start timer should
+ *     call blk_dequeue_request().
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+void blk_start_request(struct request *req)
+{
+	blk_dequeue_request(req);
+
+	/*
+	 * We are now handing the request to the hardware, add the
+	 * timeout handler.
+	 */
+	blk_add_timer(req);
+}
+EXPORT_SYMBOL(blk_start_request);
+
+/**
+ * blk_fetch_request - fetch a request from a request queue
+ * @q: request queue to fetch a request from
+ *
+ * Description:
+ *     Return the request at the top of @q.  The request is started on
+ *     return and LLD can start processing it immediately.
+ *
+ * Return:
+ *     Pointer to the request at the top of @q if available.  Null
+ *     otherwise.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+struct request *blk_fetch_request(struct request_queue *q)
+{
+	struct request *rq;
+
+	rq = blk_peek_request(q);
+	if (rq)
+		blk_start_request(rq);
+	return rq;
+}
+EXPORT_SYMBOL(blk_fetch_request);
+
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @rq:	      the request being processed
@@ -1937,12 +1987,11 @@ static bool blk_update_bidi_request(struct request *rq, int error,
  */
 static void blk_finish_request(struct request *req, int error)
 {
+	BUG_ON(blk_queued_rq(req));
+
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
-	if (blk_queued_rq(req))
-		elv_dequeue_request(req->q, req);
-
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
 
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 3c518e3303a..c260f7c30dd 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -374,7 +374,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 	list_add(&rq->queuelist, &q->tag_busy_list);
 	return 0;
 }
diff --git a/block/blk.h b/block/blk.h
index ab54529103c..9e0042ca949 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,7 @@ extern struct kobj_type blk_queue_ktype;
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
+void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
 void blk_unplug_work(struct work_struct *work);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 774ab05973a..668dc234b8e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3321,7 +3321,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 	DAC960_Command_T *Command;
 
    while(1) {
-	Request = elv_next_request(req_q);
+	Request = blk_peek_request(req_q);
 	if (!Request)
 		return 1;
 
@@ -3341,7 +3341,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 	Command->BlockNumber = blk_rq_pos(Request);
 	Command->BlockCount = blk_rq_sectors(Request);
 	Command->Request = Request;
-	blkdev_dequeue_request(Request);
+	blk_start_request(Request);
 	Command->SegmentCount = blk_rq_map_sg(req_q,
 		  Command->Request, Command->cmd_sglist);
 	/* pci_map_sg MAY change the value of SegCount */
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 80a68b2e045..9c6e5b0fe89 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1342,12 +1342,11 @@ static void redo_fd_request(void)
 	int err;
 
 next_req:
-	rq = elv_next_request(floppy_queue);
+	rq = blk_fetch_request(floppy_queue);
 	if (!rq) {
 		/* Nothing left to do */
 		return;
 	}
-	blkdev_dequeue_request(rq);
 
 	floppy = rq->rq_disk->private_data;
 	drive = floppy - unit;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 89a591d9c83..f5e7180d7f4 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1404,10 +1404,9 @@ static void redo_fd_request(void)
 
 repeat:
 	if (!fd_request) {
-		fd_request = elv_next_request(floppy_queue);
+		fd_request = blk_fetch_request(floppy_queue);
 		if (!fd_request)
 			goto the_end;
-		blkdev_dequeue_request(fd_request);
 	}
 
 	floppy = fd_request->rq_disk->private_data;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ab7b04c0db7..e714e7cce6f 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2801,7 +2801,7 @@ static void do_cciss_request(struct request_queue *q)
 		goto startio;
 
       queue:
-	creq = elv_next_request(q);
+	creq = blk_peek_request(q);
 	if (!creq)
 		goto startio;
 
@@ -2810,7 +2810,7 @@ static void do_cciss_request(struct request_queue *q)
 	if ((c = cmd_alloc(h, 1)) == NULL)
 		goto full;
 
-	blkdev_dequeue_request(creq);
+	blk_start_request(creq);
 
 	spin_unlock_irq(q->queue_lock);
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index a5caeff4718..a02dcfc00f1 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -903,7 +903,7 @@ static void do_ida_request(struct request_queue *q)
 		goto startio;
 
 queue_next:
-	creq = elv_next_request(q);
+	creq = blk_peek_request(q);
 	if (!creq)
 		goto startio;
 
@@ -912,7 +912,7 @@ queue_next:
 	if ((c = cmd_alloc(h,1)) == NULL)
 		goto startio;
 
-	blkdev_dequeue_request(creq);
+	blk_start_request(creq);
 
 	c->ctlr = h->ctlr;
 	c->hdr.unit = (drv_info_t *)(creq->rq_disk->private_data) - h->drv;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index e2c70d2085a..90877fee0ee 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -931,7 +931,7 @@ static inline void unlock_fdc(void)
 	del_timer(&fd_timeout);
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
-	if (current_req || elv_next_request(floppy_queue))
+	if (current_req || blk_peek_request(floppy_queue))
 		do_fd_request(floppy_queue);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 	wake_up(&fdc_wait);
@@ -2912,9 +2912,7 @@ static void redo_fd_request(void)
 			struct request *req;
 
 			spin_lock_irq(floppy_queue->queue_lock);
-			req = elv_next_request(floppy_queue);
-			if (req)
-				blkdev_dequeue_request(req);
+			req = blk_fetch_request(floppy_queue);
 			spin_unlock_irq(floppy_queue->queue_lock);
 			if (!req) {
 				do_floppy = NULL;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 288ab63c102..961de56d00a 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -592,12 +592,11 @@ repeat:
 	del_timer(&device_timer);
 
 	if (!hd_req) {
-		hd_req = elv_next_request(hd_queue);
+		hd_req = blk_fetch_request(hd_queue);
 		if (!hd_req) {
 			do_hd = NULL;
 			return;
 		}
-		blkdev_dequeue_request(hd_req);
 	}
 	req = hd_req;
 
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 1ca5d1423fa..c0cd0a03f69 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -671,10 +671,8 @@ static void mg_request_poll(struct request_queue *q)
 
 	while (1) {
 		if (!host->req) {
-			host->req = elv_next_request(q);
-			if (host->req)
-				blkdev_dequeue_request(host->req);
-			else
+			host->req = blk_fetch_request(q);
+			if (!host->req)
 				break;
 		}
 
@@ -744,10 +742,8 @@ static void mg_request(struct request_queue *q)
 
 	while (1) {
 		if (!host->req) {
-			host->req = elv_next_request(q);
-			if (host->req)
-				blkdev_dequeue_request(host->req);
-			else
+			host->req = blk_fetch_request(q);
+			if (!host->req)
 				break;
 		}
 		req = host->req;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index fad167de23b..5d23ffad7c7 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -533,11 +533,9 @@ static void do_nbd_request(struct request_queue *q)
 {
 	struct request *req;
 	
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_fetch_request(q)) != NULL) {
 		struct nbd_device *lo;
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(q->queue_lock);
 
 		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 425f81586a3..911dfd98d81 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -720,10 +720,9 @@ static void do_pcd_request(struct request_queue * q)
 		return;
 	while (1) {
 		if (!pcd_req) {
-			pcd_req = elv_next_request(q);
+			pcd_req = blk_fetch_request(q);
 			if (!pcd_req)
 				return;
-			blkdev_dequeue_request(pcd_req);
 		}
 
 		if (rq_data_dir(pcd_req) == READ) {
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index d2ca3f55206..bf5955b3d87 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -412,11 +412,9 @@ static void run_fsm(void)
 				spin_lock_irqsave(&pd_lock, saved_flags);
 				if (!__blk_end_request_cur(pd_req,
 						res == Ok ? 0 : -EIO)) {
-					pd_req = elv_next_request(pd_queue);
+					pd_req = blk_fetch_request(pd_queue);
 					if (!pd_req)
 						stop = 1;
-					else
-						blkdev_dequeue_request(pd_req);
 				}
 				spin_unlock_irqrestore(&pd_lock, saved_flags);
 				if (stop)
@@ -706,10 +704,9 @@ static void do_pd_request(struct request_queue * q)
 {
 	if (pd_req)
 		return;
-	pd_req = elv_next_request(q);
+	pd_req = blk_fetch_request(q);
 	if (!pd_req)
 		return;
-	blkdev_dequeue_request(pd_req);
 
 	schedule_fsm();
 }
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index d6f7bd84ed3..68a90834e99 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -762,10 +762,9 @@ static void do_pf_request(struct request_queue * q)
 		return;
 repeat:
 	if (!pf_req) {
-		pf_req = elv_next_request(q);
+		pf_req = blk_fetch_request(q);
 		if (!pf_req)
 			return;
-		blkdev_dequeue_request(pf_req);
 	}
 
 	pf_current = pf_req->rq_disk->private_data;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index f4d8db944e7..338cee4cc0b 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -194,9 +194,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
 
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
-	while ((req = elv_next_request(q))) {
-		blkdev_dequeue_request(req);
-
+	while ((req = blk_fetch_request(q))) {
 		if (blk_fs_request(req)) {
 			if (ps3disk_submit_request_sg(dev, req))
 				break;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 9f351bfa15e..cbfd9c0aef0 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -441,12 +441,11 @@ out:
 static void do_vdc_request(struct request_queue *q)
 {
 	while (1) {
-		struct request *req = elv_next_request(q);
+		struct request *req = blk_fetch_request(q);
 
 		if (!req)
 			break;
 
-		blkdev_dequeue_request(req);
 		if (__send_request(req) < 0)
 			__blk_end_request_all(req, -EIO);
 	}
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index dedd4893f5e..cf7877fb8a7 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -528,10 +528,7 @@ static void redo_fd_request(struct request_queue *q)
 	struct request *req;
 	struct floppy_state *fs;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		int err = -EIO;
 
@@ -554,11 +551,8 @@ static void redo_fd_request(struct request_queue *q)
 			break;
 		}
 	done:
-		if (!__blk_end_request_cur(req, err)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index f48c6dd47e0..80df93e3cdd 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -326,10 +326,9 @@ static void start_request(struct floppy_state *fs)
 	}
 	while (fs->state == idle) {
 		if (!fd_req) {
-			fd_req = elv_next_request(swim3_queue);
+			fd_req = blk_fetch_request(swim3_queue);
 			if (!fd_req)
 				break;
-			blkdev_dequeue_request(fd_req);
 		}
 		req = fd_req;
 #if 0
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 087c94c8b2d..da403b6a7f4 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -810,12 +810,10 @@ static void carm_oob_rq_fn(struct request_queue *q)
 
 	while (1) {
 		DPRINTK("get req\n");
-		rq = elv_next_request(q);
+		rq = blk_fetch_request(q);
 		if (!rq)
 			break;
 
-		blkdev_dequeue_request(rq);
-
 		crq = rq->special;
 		assert(crq != NULL);
 		assert(crq->rq == rq);
@@ -846,7 +844,7 @@ static void carm_rq_fn(struct request_queue *q)
 
 queue_one_request:
 	VPRINTK("get req\n");
-	rq = elv_next_request(q);
+	rq = blk_peek_request(q);
 	if (!rq)
 		return;
 
@@ -857,7 +855,7 @@ queue_one_request:
 	}
 	crq->rq = rq;
 
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 
 	if (rq_data_dir(rq) == WRITE) {
 		writing = 1;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 40d03cf63f2..178f459a50e 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -627,7 +627,7 @@ static void ub_request_fn(struct request_queue *q)
 	struct ub_lun *lun = q->queuedata;
 	struct request *rq;
 
-	while ((rq = elv_next_request(q)) != NULL) {
+	while ((rq = blk_peek_request(q)) != NULL) {
 		if (ub_request_fn_1(lun, rq) != 0) {
 			blk_stop_queue(q);
 			break;
@@ -643,13 +643,13 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 	int n_elem;
 
 	if (atomic_read(&sc->poison)) {
-		blkdev_dequeue_request(rq);
+		blk_start_request(rq);
 		ub_end_rq(rq, DID_NO_CONNECT << 16, blk_rq_bytes(rq));
 		return 0;
 	}
 
 	if (lun->changed && !blk_pc_request(rq)) {
-		blkdev_dequeue_request(rq);
+		blk_start_request(rq);
 		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION, blk_rq_bytes(rq));
 		return 0;
 	}
@@ -660,7 +660,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 		return -1;
 	memset(cmd, 0, sizeof(struct ub_scsi_cmd));
 
-	blkdev_dequeue_request(rq);
+	blk_start_request(rq);
 
 	urq = &lun->urq;
 	memset(urq, 0, sizeof(struct ub_request));
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 2086cb12d3e..390d69bb7c4 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -361,11 +361,9 @@ static void do_viodasd_request(struct request_queue *q)
 	 * back later.
 	 */
 	while (num_req_outstanding < VIOMAXREQ) {
-		req = elv_next_request(q);
+		req = blk_fetch_request(q);
 		if (req == NULL)
 			return;
-		/* dequeue the current request from the queue */
-		blkdev_dequeue_request(req);
 		/* check that request contains a valid command */
 		if (!blk_fs_request(req)) {
 			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1980ab45635..29a9daf4862 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -128,7 +128,7 @@ static void do_virtblk_request(struct request_queue *q)
 	struct request *req;
 	unsigned int issued = 0;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_peek_request(q)) != NULL) {
 		vblk = req->rq_disk->private_data;
 		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
@@ -138,7 +138,7 @@ static void do_virtblk_request(struct request_queue *q)
 			blk_stop_queue(q);
 			break;
 		}
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		issued++;
 	}
 
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index d4c4352354b..ce242921992 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -305,10 +305,7 @@ static void do_xd_request (struct request_queue * q)
 	if (xdc_busy)
 		return;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		unsigned block = blk_rq_pos(req);
 		unsigned count = blk_rq_cur_sectors(req);
@@ -325,11 +322,8 @@ static void do_xd_request (struct request_queue * q)
 					   block, count);
 	done:
 		/* wrap up, 0 = success, -errno = fail */
-		if (!__blk_end_request_cur(req, res)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, res))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 66f834571b8..6d4ac76c280 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -299,13 +299,13 @@ static void do_blkif_request(struct request_queue *rq)
 
 	queued = 0;
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
 
 		if (RING_FULL(&info->ring))
 			goto wait;
 
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 
 		if (!blk_fs_request(req)) {
 			__blk_end_request_all(req, -EIO);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index edf137b6c37..3a4397edab7 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -463,10 +463,10 @@ struct request *ace_get_next_request(struct request_queue * q)
 {
 	struct request *req;
 
-	while ((req = elv_next_request(q)) != NULL) {
+	while ((req = blk_peek_request(q)) != NULL) {
 		if (blk_fs_request(req))
 			break;
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		__blk_end_request_all(req, -EIO);
 	}
 	return req;
@@ -498,10 +498,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			__blk_end_request_all(ace->req, -EIO);
 			ace->req = NULL;
 		}
-		while ((req = elv_next_request(ace->queue)) != NULL) {
-			blkdev_dequeue_request(req);
+		while ((req = blk_fetch_request(ace->queue)) != NULL)
 			__blk_end_request_all(req, -EIO);
-		}
 
 		/* Drop back to IDLE state and notify waiters */
 		ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -649,7 +647,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			ace->fsm_state = ACE_FSM_STATE_IDLE;
 			break;
 		}
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 
 		/* Okay, it's a data request, set it up for transfer */
 		dev_dbg(ace->dev,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index c909c1a3f65..4575171e5be 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -71,10 +71,7 @@ static void do_z2_request(struct request_queue *q)
 {
 	struct request *req;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		unsigned long start = blk_rq_pos(req) << 9;
 		unsigned long len  = blk_rq_cur_bytes(req);
@@ -100,11 +97,8 @@ static void do_z2_request(struct request_queue *q)
 			len -= size;
 		}
 	done:
-		if (!__blk_end_request_cur(req, err)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 3cc02bfe828..1e366ad8f68 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -642,9 +642,7 @@ static void gdrom_request(struct request_queue *rq)
 {
 	struct request *req;
 
-	while ((req = elv_next_request(rq)) != NULL) {
-		blkdev_dequeue_request(req);
-
+	while ((req = blk_fetch_request(rq)) != NULL) {
 		if (!blk_fs_request(req)) {
 			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
 			__blk_end_request_all(req, -EIO);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index bbe9f086734..ca741c21e4a 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -297,9 +297,7 @@ static void do_viocd_request(struct request_queue *q)
 {
 	struct request *req;
 
-	while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) {
-		blkdev_dequeue_request(req);
-
+	while ((rwreq == 0) && ((req = blk_fetch_request(q)) != NULL)) {
 		if (!blk_fs_request(req))
 			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2874c3d703a..8a894fa37b5 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -269,7 +269,7 @@ void ide_retry_pc(ide_drive_t *drive)
 	blk_requeue_request(failed_rq->q, failed_rq);
 	drive->hwif->rq = NULL;
 	if (ide_queue_sense_rq(drive, pc)) {
-		blkdev_dequeue_request(failed_rq);
+		blk_start_request(failed_rq);
 		ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
 	}
 }
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index abda7337b3f..e4e3a0e3201 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -519,11 +519,8 @@ repeat:
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
-		if (!rq) {
-			rq = elv_next_request(drive->queue);
-			if (rq)
-				blkdev_dequeue_request(rq);
-		}
+		if (!rq)
+			rq = blk_fetch_request(drive->queue);
 
 		spin_unlock_irq(q->queue_lock);
 		spin_lock_irq(&hwif->lock);
@@ -536,7 +533,7 @@ repeat:
 		/*
 		 * Sanity: don't accept a request that isn't a PM request
 		 * if we are currently power managed. This is very important as
-		 * blk_stop_queue() doesn't prevent the elv_next_request()
+		 * blk_stop_queue() doesn't prevent the blk_fetch_request()
 		 * above to return us whatever is in the queue. Since we call
 		 * ide_do_request() ourselves, we end up taking requests while
 		 * the queue is blocked...
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 58f5be8cd69..c0bebc6a2f2 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -704,13 +704,12 @@ try_again:
 		return 0;
 	}
 
-	dev_dbg(&card->dev, "elv_next\n");
-	msb->block_req = elv_next_request(msb->queue);
+	dev_dbg(&card->dev, "blk_fetch\n");
+	msb->block_req = blk_fetch_request(msb->queue);
 	if (!msb->block_req) {
 		dev_dbg(&card->dev, "issue end\n");
 		return -EAGAIN;
 	}
-	blkdev_dequeue_request(msb->block_req);
 
 	dev_dbg(&card->dev, "trying again\n");
 	chunk = 1;
@@ -825,10 +824,8 @@ static void mspro_block_submit_req(struct request_queue *q)
 		return;
 
 	if (msb->eject) {
-		while ((req = elv_next_request(q)) != NULL) {
-			blkdev_dequeue_request(req);
+		while ((req = blk_fetch_request(q)) != NULL)
 			__blk_end_request_all(req, -ENODEV);
-		}
 
 		return;
 	}
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 8b5cbfc3ba9..6573ef4408f 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -877,7 +877,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 	struct request *req;
 
 	while (!blk_queue_plugged(q)) {
-		req = elv_next_request(q);
+		req = blk_peek_request(q);
 		if (!req)
 			break;
 
@@ -890,7 +890,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 
 			if (queue_depth < I2O_BLOCK_MAX_OPEN_REQUESTS) {
 				if (!i2o_block_transfer(req)) {
-					blkdev_dequeue_request(req);
+					blk_start_request(req);
 					continue;
 				} else
 					osm_info("transfer error\n");
@@ -917,7 +917,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 				break;
 			}
 		} else {
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			__blk_end_request_all(req, -EIO);
 		}
 	}
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 4b70f1e2834..49e582356c6 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -54,11 +54,8 @@ static int mmc_queue_thread(void *d)
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!blk_queue_plugged(q)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!blk_queue_plugged(q))
+			req = blk_fetch_request(q);
 		mq->req = req;
 		spin_unlock_irq(q->queue_lock);
 
@@ -94,10 +91,8 @@ static void mmc_request(struct request_queue *q)
 
 	if (!mq) {
 		printk(KERN_ERR "MMC: killing requests for dead queue\n");
-		while ((req = elv_next_request(q)) != NULL) {
-			blkdev_dequeue_request(req);
+		while ((req = blk_fetch_request(q)) != NULL)
 			__blk_end_request_all(req, -EIO);
-		}
 		return;
 	}
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 3e10442615d..502622f628b 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -100,12 +100,7 @@ static int mtd_blktrans_thread(void *arg)
 		struct mtd_blktrans_dev *dev;
 		int res;
 
-		if (!req) {
-			req = elv_next_request(rq);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
-		if (!req) {
+		if (!req && !(req = blk_fetch_request(rq))) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(rq->queue_lock);
 			schedule();
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 7df03c7aea0..e64f62d5e0f 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1656,17 +1656,13 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 	if (basedev->state < DASD_STATE_READY)
 		return;
 	/* Now we try to fetch requests from the request queue */
-	while (!blk_queue_plugged(queue) &&
-	       elv_next_request(queue)) {
-
-		req = elv_next_request(queue);
-
+	while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
 		if (basedev->features & DASD_FEATURE_READONLY &&
 		    rq_data_dir(req) == WRITE) {
 			DBF_DEV_EVENT(DBF_ERR, basedev,
 				      "Rejecting write request %p",
 				      req);
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
@@ -1695,7 +1691,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 				      "CCW creation failed (rc=%ld) "
 				      "on request %p",
 				      PTR_ERR(cqr), req);
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
@@ -1705,7 +1701,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
 		 */
 		cqr->callback_data = (void *) req;
 		cqr->status = DASD_CQR_FILLED;
-		blkdev_dequeue_request(req);
+		blk_start_request(req);
 		list_add_tail(&cqr->blocklist, &block->ccw_queue);
 		dasd_profile_start(block, cqr, req);
 	}
@@ -2029,10 +2025,8 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 		return;
 
 	spin_lock_irq(&block->request_queue_lock);
-	while ((req = elv_next_request(block->request_queue))) {
-		blkdev_dequeue_request(req);
+	while ((req = blk_fetch_request(block->request_queue)))
 		__blk_end_request_all(req, -EIO);
-	}
 	spin_unlock_irq(&block->request_queue_lock);
 }
 
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 5d035e4939d..1e796767598 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -93,7 +93,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data)
 		device->blk_data.block_position = -1;
 	device->discipline->free_bread(ccw_req);
 	if (!list_empty(&device->req_queue) ||
-	    elv_next_request(device->blk_data.request_queue))
+	    blk_peek_request(device->blk_data.request_queue))
 		tapeblock_trigger_requeue(device);
 }
 
@@ -162,19 +162,16 @@ tapeblock_requeue(struct work_struct *work) {
 	spin_lock_irq(&device->blk_data.request_queue_lock);
 	while (
 		!blk_queue_plugged(queue) &&
-		elv_next_request(queue)   &&
+		(req = blk_fetch_request(queue)) &&
 		nr_queued < TAPEBLOCK_MIN_REQUEUE
 	) {
-		req = elv_next_request(queue);
 		if (rq_data_dir(req) == WRITE) {
 			DBF_EVENT(1, "TBLOCK: Rejecting write request\n");
-			blkdev_dequeue_request(req);
 			spin_unlock_irq(&device->blk_data.request_queue_lock);
 			blk_end_request_all(req, -EIO);
 			spin_lock_irq(&device->blk_data.request_queue_lock);
 			continue;
 		}
-		blkdev_dequeue_request(req);
 		nr_queued++;
 		spin_unlock_irq(&device->blk_data.request_queue_lock);
 		rc = tapeblock_start_request(device, req);
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index f572a4a1d14..6d465168468 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -186,10 +186,7 @@ static void jsfd_do_request(struct request_queue *q)
 {
 	struct request *req;
 
-	req = elv_next_request(q);
-	if (req)
-		blkdev_dequeue_request(req);
-
+	req = blk_fetch_request(q);
 	while (req) {
 		struct jsfd_part *jdp = req->rq_disk->private_data;
 		unsigned long offset = blk_rq_pos(req) << 9;
@@ -212,11 +209,8 @@ static void jsfd_do_request(struct request_queue *q)
 		jsfd_read(req->buffer, jdp->dbase + offset, len);
 		err = 0;
 	end:
-		if (!__blk_end_request_cur(req, err)) {
-			req = elv_next_request(q);
-			if (req)
-				blkdev_dequeue_request(req);
-		}
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
 	}
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ee308f6f798..b12750f8216 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1207,7 +1207,7 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 		break;
 	case BLKPREP_DEFER:
 		/*
-		 * If we defer, the elv_next_request() returns NULL, but the
+		 * If we defer, the blk_peek_request() returns NULL, but the
 		 * queue must be restarted, so we plug here if no returning
 		 * command will automatically do that.
 		 */
@@ -1385,7 +1385,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	struct scsi_target *starget = scsi_target(sdev);
 	struct Scsi_Host *shost = sdev->host;
 
-	blkdev_dequeue_request(req);
+	blk_start_request(req);
 
 	if (unlikely(cmd == NULL)) {
 		printk(KERN_CRIT "impossible request in %s.\n",
@@ -1477,7 +1477,7 @@ static void scsi_request_fn(struct request_queue *q)
 
 	if (!sdev) {
 		printk("scsi: killing requests for dead queue\n");
-		while ((req = elv_next_request(q)) != NULL)
+		while ((req = blk_peek_request(q)) != NULL)
 			scsi_kill_request(req, q);
 		return;
 	}
@@ -1498,7 +1498,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * that the request is fully prepared even if we cannot 
 		 * accept it.
 		 */
-		req = elv_next_request(q);
+		req = blk_peek_request(q);
 		if (!req || !scsi_dev_queue_ready(q, sdev))
 			break;
 
@@ -1514,7 +1514,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * Remove the request from the request list.
 		 */
 		if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
-			blkdev_dequeue_request(req);
+			blk_start_request(req);
 		sdev->device_busy++;
 
 		spin_unlock(q->queue_lock);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 50988cbf7b2..d606452297c 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -163,12 +163,10 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 	int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
 	while (!blk_queue_plugged(q)) {
-		req = elv_next_request(q);
+		req = blk_fetch_request(q);
 		if (!req)
 			break;
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(q->queue_lock);
 
 		handler = to_sas_internal(shost->transportt)->f->smp_handler;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7558034570..6e59d3b92ff 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -818,8 +818,6 @@ static inline void blk_run_address_space(struct address_space *mapping)
 		blk_run_backing_dev(mapping->backing_dev_info, NULL);
 }
 
-extern void blkdev_dequeue_request(struct request *req);
-
 /*
  * blk_rq_pos()		: the current sector
  * blk_rq_bytes()	: bytes left in the entire request
@@ -852,6 +850,13 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
+/*
+ * Request issue related functions.
+ */
+extern struct request *blk_peek_request(struct request_queue *q);
+extern void blk_start_request(struct request *rq);
+extern struct request *blk_fetch_request(struct request_queue *q);
+
 /*
  * Request completion related functions.
  *
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4e462878c9c..1cb3372e65d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -103,10 +103,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
-extern void elv_dequeue_request(struct request_queue *, struct request *);
 extern void elv_requeue_request(struct request_queue *, struct request *);
 extern int elv_queue_empty(struct request_queue *);
-extern struct request *elv_next_request(struct request_queue *q);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
-- 
cgit v1.2.3-70-g09d2


From 1822952ba2b9f22f79019d07ebbeca31dc14b718 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 11 May 2009 17:56:07 +0900
Subject: block: let blk_end_request_all handle bidi requests

blk_end_request_all() and __blk_end_request_all() should finish all
bytes including bidi, by definition. That's what all bidi users need ,
bidi requests must be complete as a whole (partial completion is
impossible).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6e59d3b92ff..1069f4483c6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -910,8 +910,12 @@ static inline bool blk_end_request(struct request *rq, int error,
 static inline void blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
+	unsigned int bidi_bytes = 0;
 
-	pending = blk_end_request(rq, error, blk_rq_bytes(rq));
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 
@@ -962,8 +966,12 @@ static inline bool __blk_end_request(struct request *rq, int error,
 static inline void __blk_end_request_all(struct request *rq, int error)
 {
 	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
 
-	pending = __blk_end_request(rq, error, blk_rq_bytes(rq));
+	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
 	BUG_ON(pending);
 }
 
-- 
cgit v1.2.3-70-g09d2


From b1f744937f1be3e6d3009382a755679133cf782d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 11 May 2009 17:56:09 +0900
Subject: block: move completion related functions back to blk-core.c

Let's put the completion related functions back to block/blk-core.c
where they have lived. We can also unexport blk_end_bidi_request() and
__blk_end_bidi_request(), which nobody uses.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 128 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/blkdev.h | 128 ++++---------------------------------------------
 2 files changed, 130 insertions(+), 126 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 93691d2ac5a..a2d97de1a12 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2026,8 +2026,8 @@ static void blk_finish_request(struct request *req, int error)
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_bidi_request(struct request *rq, int error,
-			  unsigned int nr_bytes, unsigned int bidi_bytes)
+static bool blk_end_bidi_request(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
@@ -2041,7 +2041,6 @@ bool blk_end_bidi_request(struct request *rq, int error,
 
 	return false;
 }
-EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
@@ -2058,8 +2057,8 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_bidi_request(struct request *rq, int error,
-			    unsigned int nr_bytes, unsigned int bidi_bytes)
+static bool __blk_end_bidi_request(struct request *rq, int error,
+				   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
 	if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
 		return true;
@@ -2068,7 +2067,124 @@ bool __blk_end_bidi_request(struct request *rq, int error,
 
 	return false;
 }
-EXPORT_SYMBOL_GPL(__blk_end_bidi_request);
+
+/**
+ * blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	return blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+EXPORT_SYMBOL_GPL(blk_end_request);
+
+/**
+ * blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.
+ */
+void blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+	BUG_ON(pending);
+}
+EXPORT_SYMBOL_GPL(blk_end_request_all);
+
+/**
+ * blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool blk_end_request_cur(struct request *rq, int error)
+{
+	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(blk_end_request_cur);
+
+/**
+ * __blk_end_request - Helper function for drivers to complete the request.
+ * @rq:       the request being processed
+ * @error:    %0 for success, < %0 for error
+ * @nr_bytes: number of bytes to complete
+ *
+ * Description:
+ *     Must be called with queue lock held unlike blk_end_request().
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ **/
+bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
+}
+EXPORT_SYMBOL_GPL(__blk_end_request);
+
+/**
+ * __blk_end_request_all - Helper function for drives to finish the request.
+ * @rq: the request to finish
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Completely finish @rq.  Must be called with queue lock held.
+ */
+void __blk_end_request_all(struct request *rq, int error)
+{
+	bool pending;
+	unsigned int bidi_bytes = 0;
+
+	if (unlikely(blk_bidi_rq(rq)))
+		bidi_bytes = blk_rq_bytes(rq->next_rq);
+
+	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+	BUG_ON(pending);
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_all);
+
+/**
+ * __blk_end_request_cur - Helper function to finish the current request chunk.
+ * @rq: the request to finish the current chunk for
+ * @err: %0 for success, < %0 for error
+ *
+ * Description:
+ *     Complete the current consecutively mapped chunk from @rq.  Must
+ *     be called with queue lock held.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool __blk_end_request_cur(struct request *rq, int error)
+{
+	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_cur);
 
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1069f4483c6..f9d60a78c08 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -872,126 +872,14 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  */
 extern bool blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
-extern bool blk_end_bidi_request(struct request *rq, int error,
-				 unsigned int nr_bytes,
-				 unsigned int bidi_bytes);
-extern bool __blk_end_bidi_request(struct request *rq, int error,
-				   unsigned int nr_bytes,
-				   unsigned int bidi_bytes);
-
-/**
- * blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- **/
-static inline bool blk_end_request(struct request *rq, int error,
-				   unsigned int nr_bytes)
-{
-	return blk_end_bidi_request(rq, error, nr_bytes, 0);
-}
-
-/**
- * blk_end_request_all - Helper function for drives to finish the request.
- * @rq: the request to finish
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Completely finish @rq.
- */
-static inline void blk_end_request_all(struct request *rq, int error)
-{
-	bool pending;
-	unsigned int bidi_bytes = 0;
-
-	if (unlikely(blk_bidi_rq(rq)))
-		bidi_bytes = blk_rq_bytes(rq->next_rq);
-
-	pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
-	BUG_ON(pending);
-}
-
-/**
- * blk_end_request_cur - Helper function to finish the current request chunk.
- * @rq: the request to finish the current chunk for
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Complete the current consecutively mapped chunk from @rq.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-static inline bool blk_end_request_cur(struct request *rq, int error)
-{
-	return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
-}
-
-/**
- * __blk_end_request - Helper function for drivers to complete the request.
- * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
- * @nr_bytes: number of bytes to complete
- *
- * Description:
- *     Must be called with queue lock held unlike blk_end_request().
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- **/
-static inline bool __blk_end_request(struct request *rq, int error,
-				     unsigned int nr_bytes)
-{
-	return __blk_end_bidi_request(rq, error, nr_bytes, 0);
-}
-
-/**
- * __blk_end_request_all - Helper function for drives to finish the request.
- * @rq: the request to finish
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Completely finish @rq.  Must be called with queue lock held.
- */
-static inline void __blk_end_request_all(struct request *rq, int error)
-{
-	bool pending;
-	unsigned int bidi_bytes = 0;
-
-	if (unlikely(blk_bidi_rq(rq)))
-		bidi_bytes = blk_rq_bytes(rq->next_rq);
-
-	pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
-	BUG_ON(pending);
-}
-
-/**
- * __blk_end_request_cur - Helper function to finish the current request chunk.
- * @rq: the request to finish the current chunk for
- * @err: %0 for success, < %0 for error
- *
- * Description:
- *     Complete the current consecutively mapped chunk from @rq.  Must
- *     be called with queue lock held.
- *
- * Return:
- *     %false - we are done with this request
- *     %true  - still buffers pending for this request
- */
-static inline bool __blk_end_request_cur(struct request *rq, int error)
-{
-	return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
-}
+extern bool blk_end_request(struct request *rq, int error,
+			    unsigned int nr_bytes);
+extern void blk_end_request_all(struct request *rq, int error);
+extern bool blk_end_request_cur(struct request *rq, int error);
+extern bool __blk_end_request(struct request *rq, int error,
+			      unsigned int nr_bytes);
+extern void __blk_end_request_all(struct request *rq, int error);
+extern bool __blk_end_request_cur(struct request *rq, int error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
-- 
cgit v1.2.3-70-g09d2


From 79c5d3ce614d8fe706545c7bca2158b63db6bb5e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 11 May 2009 15:06:46 +0800
Subject: blktrace: from-sector redundant in trace_block_remap, cleanup

The last argument of block_remap prober is the original sector
before remap, so it should be 'from', not 'to'.

[ Impact: clean up ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
LKML-Reference: <4A07CE86.5090301@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/block.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/block.h b/include/trace/block.h
index 8ac945b7746..5b12efa096b 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
 
 DECLARE_TRACE(block_remap,
 	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t to),
-	      TP_ARGS(q, bio, dev, to));
+		 sector_t from),
+	      TP_ARGS(q, bio, dev, from));
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 6818173bd658439b83896a2a7586f64ab51bf29c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 May 2009 15:37:36 +0200
Subject: splice: implement default splice_read method

If f_op->splice_read() is not implemented, fall back to a plain read.
Use vfs_readv() to read into previously allocated pages.

This will allow splice and functions using splice, such as the loop
device, to work on all filesystems.  This includes "direct_io" files
in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/loop.c      |  11 +----
 fs/coda/file.c            |   9 ++--
 fs/pipe.c                 |  14 ++++++
 fs/read_write.c           |   7 +--
 fs/splice.c               | 120 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fs.h        |   2 +
 include/linux/pipe_fs_i.h |   1 +
 7 files changed, 140 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9ca4bb01465..801f4ab8330 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -709,10 +709,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		goto out_putf;
 
-	/* new backing store needs to support loop (eg splice_read) */
-	if (!inode->i_fop->splice_read)
-		goto out_putf;
-
 	/* size of the new backing store needs to be the same */
 	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 		goto out_putf;
@@ -788,12 +784,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	error = -EINVAL;
 	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 		const struct address_space_operations *aops = mapping->a_ops;
-		/*
-		 * If we can't read - sorry. If we only can't write - well,
-		 * it's going to be read-only.
-		 */
-		if (!file->f_op->splice_read)
-			goto out_putf;
+
 		if (aops->write_begin)
 			lo_flags |= LO_FLAGS_USE_AOPS;
 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6a347fbc998..ffd42815fda 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
 		      struct pipe_inode_info *pipe, size_t count,
 		      unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	struct coda_file_info *cfi;
 	struct file *host_file;
 
@@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->splice_read)
-		return -EINVAL;
+	splice_read = host_file->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
 
-	return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
+	return splice_read(host_file, ppos, pipe, count, flags);
 }
 
 static ssize_t
diff --git a/fs/pipe.c b/fs/pipe.c
index 13414ec45b8..f7dd21ad85a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -302,6 +302,20 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info,
 	return 0;
 }
 
+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to put a reference to
+ *
+ * Description:
+ *	This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+			      struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+}
+
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
diff --git a/fs/read_write.c b/fs/read_write.c
index 9d1e76bb9ee..6c8c55dec2b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -805,12 +805,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 		goto out;
 	if (!(in_file->f_mode & FMODE_READ))
 		goto fput_in;
-	retval = -EINVAL;
-	in_inode = in_file->f_path.dentry->d_inode;
-	if (!in_inode)
-		goto fput_in;
-	if (!in_file->f_op || !in_file->f_op->splice_read)
-		goto fput_in;
 	retval = -ESPIPE;
 	if (!ppos)
 		ppos = &in_file->f_pos;
@@ -834,6 +828,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	retval = -EINVAL;
 	if (!out_file->f_op || !out_file->f_op->sendpage)
 		goto fput_out;
+	in_inode = in_file->f_path.dentry->d_inode;
 	out_inode = out_file->f_path.dentry->d_inode;
 	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 	if (retval < 0)
diff --git a/fs/splice.c b/fs/splice.c
index e405cf552f5..3bd9cb21b38 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -507,9 +507,116 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 
 	return ret;
 }
-
 EXPORT_SYMBOL(generic_file_splice_read);
 
+static const struct pipe_buf_operations default_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = generic_pipe_buf_release,
+	.steal = generic_pipe_buf_steal,
+	.get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+			    unsigned long vlen, loff_t offset)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+	set_fs(old_fs);
+
+	return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
+{
+	unsigned int nr_pages;
+	unsigned int nr_freed;
+	size_t offset;
+	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
+	struct iovec vec[PIPE_BUFFERS];
+	pgoff_t index;
+	ssize_t res;
+	size_t this_len;
+	int error;
+	int i;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &default_pipe_buf_ops,
+		.spd_release = spd_release_page,
+	};
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+		struct page *page;
+
+		page = alloc_page(GFP_HIGHUSER);
+		error = -ENOMEM;
+		if (!page)
+			goto err;
+
+		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		vec[i].iov_base = (void __user *) kmap(page);
+		vec[i].iov_len = this_len;
+		pages[i] = page;
+		spd.nr_pages++;
+		len -= this_len;
+		offset = 0;
+	}
+
+	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+	if (res < 0)
+		goto err;
+
+	error = 0;
+	if (!res)
+		goto err;
+
+	nr_freed = 0;
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		this_len = min_t(size_t, vec[i].iov_len, res);
+		partial[i].offset = 0;
+		partial[i].len = this_len;
+		if (!this_len) {
+			__free_page(pages[i]);
+			pages[i] = NULL;
+			nr_freed++;
+		}
+		res -= this_len;
+	}
+	spd.nr_pages -= nr_freed;
+
+	res = splice_to_pipe(pipe, &spd);
+	if (res > 0)
+		*ppos += res;
+
+	return res;
+
+err:
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		__free_page(pages[i]);
+	}
+	return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
@@ -933,11 +1040,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!in->f_op || !in->f_op->splice_read))
-		return -EINVAL;
-
 	if (unlikely(!(in->f_mode & FMODE_READ)))
 		return -EBADF;
 
@@ -945,7 +1051,11 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(ret < 0))
 		return ret;
 
-	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	splice_read = in->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
+
+	return splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5bed436f435..d926c2bea16 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2204,6 +2204,8 @@ extern int generic_segment_checks(const struct iovec *iov,
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index c8f038554e8..b43a9e03905 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -152,5 +152,6 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From dc6382ced07d6bad61d0b591fb12ab5da7ca632c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 6 May 2009 22:09:37 +0300
Subject: nl80211 : Add support for configuring MFP

NL80211_CMD_ASSOCIATE request must be able to indicate whether
management frame protection (IEEE 802.11w) is being used. mac80211 was
able to use MFP in client mode only with WEXT, but the new
NL80211_ATTR_USE_MFP attribute will allow this to be done with
nl80211, too.

Since we are currently using nl80211 for MFP only with drivers that
use user space SME, only MFP disabled and required values are
used. However, the NL80211_ATTR_USE_MFP attribute is an enum that can
be extended with MFP optional in the future, if that is needed with
some drivers (e.g., if the RSN IE is generated by the driver).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 17 +++++++++++++++++
 include/net/cfg80211.h  |  2 ++
 net/mac80211/cfg.c      |  8 ++++++++
 net/wireless/nl80211.c  | 12 ++++++++++++
 4 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index e9fd13aa79f..58c4ee1822d 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -494,6 +494,11 @@ enum nl80211_commands {
  * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this
  *	is used, e.g., with %NL80211_CMD_AUTHENTICATE event
  *
+ * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is
+ *	used for the association (&enum nl80211_mfp, represented as a u32);
+ *	this attribute can be used
+ *	with %NL80211_CMD_ASSOCIATE request
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -596,6 +601,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TIMED_OUT,
 
+	NL80211_ATTR_USE_MFP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1179,4 +1186,14 @@ enum nl80211_key_type {
 	NL80211_KEYTYPE_PEERKEY,
 };
 
+/**
+ * enum nl80211_mfp - Management frame protection state
+ * @NL80211_MFP_NO: Management frame protection not used
+ * @NL80211_MFP_REQUIRED: Management frame protection required
+ */
+enum nl80211_mfp {
+	NL80211_MFP_NO,
+	NL80211_MFP_REQUIRED,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b8a76764e1c..47e30e1d91f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -672,6 +672,7 @@ struct cfg80211_auth_request {
  * @ssid_len: Length of ssid in octets
  * @ie: Extra IEs to add to (Re)Association Request frame or %NULL
  * @ie_len: Length of ie buffer in octets
+ * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
  */
 struct cfg80211_assoc_request {
 	struct ieee80211_channel *chan;
@@ -680,6 +681,7 @@ struct cfg80211_assoc_request {
 	size_t ssid_len;
 	const u8 *ie;
 	size_t ie_len;
+	bool use_mfp;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d0ca6da33ca..4e627cf2b8c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1253,6 +1253,14 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 	if (ret)
 		return ret;
 
+	if (req->use_mfp) {
+		sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED;
+		sdata->u.mgd.flags |= IEEE80211_STA_MFP_ENABLED;
+	} else {
+		sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED;
+	}
+
 	sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME;
 	sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE;
 	ieee80211_sta_req_auth(sdata);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3c53c5cbc3a..79927706937 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -122,6 +122,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
 	[NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_USE_MFP] = { .type = NLA_U32 },
 };
 
 /* IE validation */
@@ -3012,6 +3013,17 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
+	if (info->attrs[NL80211_ATTR_USE_MFP]) {
+		enum nl80211_mfp use_mfp =
+			nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+		if (use_mfp == NL80211_MFP_REQUIRED)
+			req.use_mfp = true;
+		else if (use_mfp != NL80211_MFP_NO) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
 	err = drv->ops->assoc(&drv->wiphy, dev, &req);
 
 out:
-- 
cgit v1.2.3-70-g09d2


From 9ed6bcce77f75d98af6ee07069deac6041948bee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 8 May 2009 20:47:39 +0200
Subject: mac80211: move HT operation mode BSS info

There really is no need to have a separate struct for a
single variable. The fact that it exists is due to the
code legacy, but we can remove that now. Very simple.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c |  4 ++--
 drivers/net/wireless/mac80211_hwsim.c   |  2 +-
 drivers/net/wireless/mwl8k.c            |  2 +-
 include/net/mac80211.h                  | 12 +++---------
 net/mac80211/mlme.c                     | 11 ++++-------
 5 files changed, 11 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 77f4b43b9b7..a9a4fcef7e4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -2225,9 +2225,9 @@ static void iwl_ht_conf(struct iwl_priv *priv,
 
 	iwl_conf->tx_chan_width = iwl_conf->supported_chan_width != 0;
 	iwl_conf->ht_protection =
-		bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_PROTECTION;
+		bss_conf->ht_operation_mode & IEEE80211_HT_OP_MODE_PROTECTION;
 	iwl_conf->non_GF_STA_present =
-		!!(bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT);
+		!!(bss_conf->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT);
 
 	rcu_read_unlock();
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index b1213b6a6b9..61a4ad7cc1c 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -642,7 +642,7 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_HT) {
 		printk(KERN_DEBUG "  %s: HT: op_mode=0x%x\n",
 		       wiphy_name(hw->wiphy),
-		       info->ht.operation_mode);
+		       info->ht_operation_mode);
 	}
 
 	if (changed & BSS_CHANGED_BASIC_RATES) {
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 46b288dc8f4..a263d5c84c0 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -2369,7 +2369,7 @@ static int mwl8k_cmd_set_aid(struct ieee80211_hw *hw,
 	if (info->use_cts_prot) {
 		prot_mode = MWL8K_FRAME_PROT_11G;
 	} else {
-		switch (info->ht.operation_mode &
+		switch (info->ht_operation_mode &
 			IEEE80211_HT_OP_MODE_PROTECTION) {
 		case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
 			prot_mode = MWL8K_FRAME_PROT_11N_HT_40MHZ_ONLY;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 38dc1cd1027..03591fcf519 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -170,14 +170,6 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BEACON_ENABLED	= 1<<9,
 };
 
-/**
- * struct ieee80211_bss_ht_conf - BSS's changing HT configuration
- * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info)
- */
-struct ieee80211_bss_ht_conf {
-	u16 operation_mode;
-};
-
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -203,6 +195,8 @@ struct ieee80211_bss_ht_conf {
  *	the current band.
  * @bssid: The BSSID for this BSS
  * @enable_beacon: whether beaconing should be enabled or not
+ * @ht_operation_mode: HT operation mode (like in &struct ieee80211_ht_info).
+ *	This field is only valid when the channel type is one of the HT types.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -219,7 +213,7 @@ struct ieee80211_bss_conf {
 	u16 assoc_capability;
 	u64 timestamp;
 	u32 basic_rates;
-	struct ieee80211_bss_ht_conf ht;
+	u16 ht_operation_mode;
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c5445bae9d6..a1f2332a6fe 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -95,16 +95,14 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_bss_ht_conf ht;
 	struct sta_info *sta;
 	u32 changed = 0;
+	u16 ht_opmode;
 	bool enable_ht = true, ht_changed;
 	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
-	memset(&ht, 0, sizeof(ht));
-
 	/* HT is not supported */
 	if (!sband->ht_cap.ht_supported)
 		enable_ht = false;
@@ -148,19 +146,18 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 						 IEEE80211_RC_HT_CHANGED);
 
 		rcu_read_unlock();
-
         }
 
 	/* disable HT */
 	if (!enable_ht)
 		return 0;
 
-	ht.operation_mode = le16_to_cpu(hti->operation_mode);
+	ht_opmode = le16_to_cpu(hti->operation_mode);
 
 	/* if bss configuration changed store the new one */
-	if (memcmp(&sdata->vif.bss_conf.ht, &ht, sizeof(ht))) {
+	if (sdata->vif.bss_conf.ht_operation_mode != ht_opmode) {
 		changed |= BSS_CHANGED_HT;
-		sdata->vif.bss_conf.ht = ht;
+		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
 	}
 
 	return changed;
-- 
cgit v1.2.3-70-g09d2


From 44033f80cefd1d7b474efdabc412476d4bafb8f4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 8 May 2009 21:21:41 +0200
Subject: mac80211: remove ieee80211_ht_bss_info

This struct is no longer used (and hasn't been for a while).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 03591fcf519..d10ed1776fc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -72,22 +72,6 @@
  * not do so then mac80211 may add this under certain circumstances.
  */
 
-/**
- * struct ieee80211_ht_bss_info - describing BSS's HT characteristics
- *
- * This structure describes most essential parameters needed
- * to describe 802.11n HT characteristics in a BSS.
- *
- * @primary_channel: channel number of primery channel
- * @bss_cap: 802.11n's general BSS capabilities (e.g. channel width)
- * @bss_op_mode: 802.11n's BSS operation modes (e.g. HT protection)
- */
-struct ieee80211_ht_bss_info {
-	u8 primary_channel;
-	u8 bss_cap;  /* use IEEE80211_HT_IE_CHA_ */
-	u8 bss_op_mode; /* use IEEE80211_HT_IE_ */
-};
-
 /**
  * enum ieee80211_max_queues - maximum number of queues
  *
-- 
cgit v1.2.3-70-g09d2


From 4e943900fb9675d3a5ebdabc2cd4a9a54edace97 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 9 May 2009 20:06:47 +0200
Subject: cfg80211: constify key mac address in ops

The address pointed to by mac_addr can be marked as const.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 6 +++---
 net/mac80211/cfg.c     | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 47e30e1d91f..35afe973d46 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -860,13 +860,13 @@ struct cfg80211_ops {
 				       struct vif_params *params);
 
 	int	(*add_key)(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, u8 *mac_addr,
+			   u8 key_index, const u8 *mac_addr,
 			   struct key_params *params);
 	int	(*get_key)(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, u8 *mac_addr, void *cookie,
+			   u8 key_index, const u8 *mac_addr, void *cookie,
 			   void (*callback)(void *cookie, struct key_params*));
 	int	(*del_key)(struct wiphy *wiphy, struct net_device *netdev,
-			   u8 key_index, u8 *mac_addr);
+			   u8 key_index, const u8 *mac_addr);
 	int	(*set_default_key)(struct wiphy *wiphy,
 				   struct net_device *netdev,
 				   u8 key_index);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4e627cf2b8c..be86e159e6e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -112,7 +112,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
 }
 
 static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
-			     u8 key_idx, u8 *mac_addr,
+			     u8 key_idx, const u8 *mac_addr,
 			     struct key_params *params)
 {
 	struct ieee80211_sub_if_data *sdata;
@@ -166,7 +166,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
-			     u8 key_idx, u8 *mac_addr)
+			     u8 key_idx, const u8 *mac_addr)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
@@ -208,7 +208,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
-			     u8 key_idx, u8 *mac_addr, void *cookie,
+			     u8 key_idx, const u8 *mac_addr, void *cookie,
 			     void (*callback)(void *cookie,
 					      struct key_params *params))
 {
-- 
cgit v1.2.3-70-g09d2


From cbe8fa9c5e88fd5f554ebc519af3b0ed17dde0dd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 9 May 2009 20:09:03 +0200
Subject: cfg80211: put wext data into substructure

To make it more apparent in the code what is for wext
only (and needs to be #ifdef'ed) put all the info for
wext into a substruct in each wireless_dev.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  6 +++--
 net/wireless/core.c    |  4 ++--
 net/wireless/ibss.c    | 60 +++++++++++++++++++++++++-------------------------
 3 files changed, 36 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 35afe973d46..4c748935ce5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1147,8 +1147,10 @@ struct wireless_dev {
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* wext data */
-	struct cfg80211_ibss_params wext;
-	u8 wext_bssid[ETH_ALEN];
+	struct {
+		struct cfg80211_ibss_params ibss;
+		u8 bssid[ETH_ALEN];
+	} wext;
 #endif
 };
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 2006a4ee60e..15b2a179480 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -470,9 +470,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 #ifdef CONFIG_WIRELESS_EXT
 		if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
 			break;
-		if (!dev->ieee80211_ptr->wext.ssid_len)
+		if (!dev->ieee80211_ptr->wext.ibss.ssid_len)
 			break;
-		cfg80211_join_ibss(rdev, dev, &dev->ieee80211_ptr->wext);
+		cfg80211_join_ibss(rdev, dev, &dev->ieee80211_ptr->wext.ibss);
 		break;
 #endif
 	case NETDEV_UNREGISTER:
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 3c38afaed28..a4a1c3498ff 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -63,7 +63,7 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 		return -EALREADY;
 
 #ifdef CONFIG_WIRELESS_EXT
-	wdev->wext.channel = params->channel;
+	wdev->wext.ibss.channel = params->channel;
 #endif
 	err = rdev->ops->join_ibss(&rdev->wiphy, dev, params);
 
@@ -90,7 +90,7 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 	memset(wdev->bssid, 0, ETH_ALEN);
 #ifdef CONFIG_WIRELESS_EXT
 	if (!nowext)
-		wdev->wext.ssid_len = 0;
+		wdev->wext.ibss.ssid_len = 0;
 #endif
 }
 
@@ -116,11 +116,11 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 	enum ieee80211_band band;
 	int i;
 
-	if (!wdev->wext.beacon_interval)
-		wdev->wext.beacon_interval = 100;
+	if (!wdev->wext.ibss.beacon_interval)
+		wdev->wext.ibss.beacon_interval = 100;
 
 	/* try to find an IBSS channel if none requested ... */
-	if (!wdev->wext.channel) {
+	if (!wdev->wext.ibss.channel) {
 		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 			struct ieee80211_supported_band *sband;
 			struct ieee80211_channel *chan;
@@ -135,27 +135,27 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 					continue;
 				if (chan->flags & IEEE80211_CHAN_DISABLED)
 					continue;
-				wdev->wext.channel = chan;
+				wdev->wext.ibss.channel = chan;
 				break;
 			}
 
-			if (wdev->wext.channel)
+			if (wdev->wext.ibss.channel)
 				break;
 		}
 
-		if (!wdev->wext.channel)
+		if (!wdev->wext.ibss.channel)
 			return -EINVAL;
 	}
 
 	/* don't join -- SSID is not there */
-	if (!wdev->wext.ssid_len)
+	if (!wdev->wext.ibss.ssid_len)
 		return 0;
 
 	if (!netif_running(wdev->netdev))
 		return 0;
 
 	return cfg80211_join_ibss(wiphy_to_dev(wdev->wiphy),
-				  wdev->netdev, &wdev->wext);
+				  wdev->netdev, &wdev->wext.ibss);
 }
 
 int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
@@ -182,7 +182,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 	     chan->flags & IEEE80211_CHAN_DISABLED))
 		return -EINVAL;
 
-	if (wdev->wext.channel == chan)
+	if (wdev->wext.ibss.channel == chan)
 		return 0;
 
 	if (wdev->ssid_len) {
@@ -193,11 +193,11 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 	}
 
 	if (chan) {
-		wdev->wext.channel = chan;
-		wdev->wext.channel_fixed = true;
+		wdev->wext.ibss.channel = chan;
+		wdev->wext.ibss.channel_fixed = true;
 	} else {
 		/* cfg80211_ibss_wext_join will pick one if needed */
-		wdev->wext.channel_fixed = false;
+		wdev->wext.ibss.channel_fixed = false;
 	}
 
 	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
@@ -218,8 +218,8 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
 
 	if (wdev->current_bss)
 		chan = wdev->current_bss->channel;
-	else if (wdev->wext.channel)
-		chan = wdev->wext.channel;
+	else if (wdev->wext.ibss.channel)
+		chan = wdev->wext.ibss.channel;
 
 	if (chan) {
 		freq->m = chan->center_freq;
@@ -259,9 +259,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 	if (len > 0 && ssid[len - 1] == '\0')
 		len--;
 
-	wdev->wext.ssid = wdev->ssid;
-	memcpy(wdev->wext.ssid, ssid, len);
-	wdev->wext.ssid_len = len;
+	wdev->wext.ibss.ssid = wdev->ssid;
+	memcpy(wdev->wext.ibss.ssid, ssid, len);
+	wdev->wext.ibss.ssid_len = len;
 
 	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
 }
@@ -284,10 +284,10 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
 		data->flags = 1;
 		data->length = wdev->ssid_len;
 		memcpy(ssid, wdev->ssid, data->length);
-	} else if (wdev->wext.ssid && wdev->wext.ssid_len) {
+	} else if (wdev->wext.ibss.ssid && wdev->wext.ibss.ssid_len) {
 		data->flags = 1;
-		data->length = wdev->wext.ssid_len;
-		memcpy(ssid, wdev->wext.ssid, data->length);
+		data->length = wdev->wext.ibss.ssid_len;
+		memcpy(ssid, wdev->wext.ibss.ssid, data->length);
 	}
 
 	return 0;
@@ -318,12 +318,12 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
 		bssid = NULL;
 
 	/* both automatic */
-	if (!bssid && !wdev->wext.bssid)
+	if (!bssid && !wdev->wext.ibss.bssid)
 		return 0;
 
 	/* fixed already - and no change */
-	if (wdev->wext.bssid && bssid &&
-	    compare_ether_addr(bssid, wdev->wext.bssid) == 0)
+	if (wdev->wext.ibss.bssid && bssid &&
+	    compare_ether_addr(bssid, wdev->wext.ibss.bssid) == 0)
 		return 0;
 
 	if (wdev->ssid_len) {
@@ -334,10 +334,10 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
 	}
 
 	if (bssid) {
-		memcpy(wdev->wext_bssid, bssid, ETH_ALEN);
-		wdev->wext.bssid = wdev->wext_bssid;
+		memcpy(wdev->wext.bssid, bssid, ETH_ALEN);
+		wdev->wext.ibss.bssid = wdev->wext.bssid;
 	} else
-		wdev->wext.bssid = NULL;
+		wdev->wext.ibss.bssid = NULL;
 
 	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
 }
@@ -356,8 +356,8 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 
 	ap_addr->sa_family = ARPHRD_ETHER;
 
-	if (wdev->wext.bssid) {
-		memcpy(ap_addr->sa_data, wdev->wext.bssid, ETH_ALEN);
+	if (wdev->wext.ibss.bssid) {
+		memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN);
 		return 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 528769cf1e422d932052be1487459262f3d75333 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 11 May 2009 10:20:35 +0300
Subject: mac80211: Robust Action frame categories for MFP

IEEE 802.11w/D9.0 introduces a mechanism for Action field Category
values to be used to select which Action frames are Robust. Public and
Vendor-specific categories are marked as not Robust in IEEE 802.11w;
HT will be marked not Robust in IEEE 802.11n. A new Vendor-specific
Protected category is allocated for Robust vendor-specific Action
frames. Another new category, Protected Dual of Action, is introduced
for protecting some existing Public Action frames (e.g., IEEE 802.11y
protected enablement).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index dc92359f37e..05c29c01174 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1068,8 +1068,12 @@ enum ieee80211_category {
 	WLAN_CATEGORY_DLS = 2,
 	WLAN_CATEGORY_BACK = 3,
 	WLAN_CATEGORY_PUBLIC = 4,
+	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
+	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
 	WLAN_CATEGORY_WMM = 17,
+	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
+	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
 
 /* SPECTRUM_MGMT action code */
@@ -1261,7 +1265,9 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 		if (ieee80211_has_protected(hdr->frame_control))
 			return true;
 		category = ((u8 *) hdr) + 24;
-		return *category != WLAN_CATEGORY_PUBLIC;
+		return *category != WLAN_CATEGORY_PUBLIC &&
+			*category != WLAN_CATEGORY_HT &&
+			*category != WLAN_CATEGORY_VENDOR_SPECIFIC;
 	}
 
 	return false;
-- 
cgit v1.2.3-70-g09d2


From e13cf6e04ebc231653e03ebde4799dc55bf62849 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Mon, 11 May 2009 18:13:13 -0700
Subject: ntp: fix comment typos

Bernhard Schiffner noticed I had a few comment typos in this patch,
(note: to save embarrassment, when making typos, avoid copying and
pasting them) so this patch corrects them.

[ Impact: cleanup ]

Reported-by: Bernhard Schiffner <bernhard@schiffner-limbach.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: riel@redhat.com
Cc: akpm@linux-foundation.org
LKML-Reference: <1242090794.7214.131.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/timex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 0daf9611ef4..9910e3bd5b3 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -179,7 +179,7 @@ struct timex {
  * NTP convergence time. A higher value makes it stiffer, increasing
  * convergence time, but making the clock more stable.
  *
- * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 4.
+ * In David Mills' nanokernel reference implementation SHIFT_PLL is 4.
  * However this seems to increase convergence time much too long.
  *
  * https://lists.ntp.org/pipermail/hackers/2008-January/003487.html
@@ -196,7 +196,7 @@ struct timex {
  *
  * SHIFT_FLL is used as a dampening factor to define how much we
  * adjust the frequency correction for a given offset in FLL mode.
- * In David Mills' nanokenrel reference implmentation SHIFT_PLL is 2.
+ * In David Mills' nanokernel reference implementation SHIFT_FLL is 2.
  *
  * MAXTC establishes the maximum time constant of the PLL.
  */
-- 
cgit v1.2.3-70-g09d2


From 2b1ccc0ee918a653d0483fdad9dd6112ce8e9043 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 12 May 2009 11:11:48 +0200
Subject: splice: fix misleading comment

Splice is tied to pipes by design, it'll not change. And now that
the splice stuff is in splice.h (and note pipe.h), the rest of the comment
is out-of-date as well.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/splice.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/splice.h b/include/linux/splice.h
index 5f3faa9d15a..18e7c7c0cae 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -11,8 +11,7 @@
 #include <linux/pipe_fs_i.h>
 
 /*
- * splice is tied to pipes as a transport (at least for now), so we'll just
- * add the splice flags here.
+ * Flags passed in from splice/tee/vmsplice
  */
 #define SPLICE_F_MOVE	(0x01)	/* move pages instead of copying */
 #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
-- 
cgit v1.2.3-70-g09d2


From 597d0275736dad9c3bda6f0a00a1c477dc0f37b1 Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Thu, 16 Apr 2009 12:13:26 +0530
Subject: timers: Framework for identifying pinned timers

* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]:

This patch creates a new framework for identifying cpu-pinned timers
and hrtimers.

This framework is needed because pinned timers are expected to fire on
the same CPU on which they are queued. So it is essential to identify
these and not migrate them, in case there are any.

For regular timers, the currently existing add_timer_on() can be used
queue pinned timers and subsequently mod_timer_pinned() can be used
to modify the 'expires' field.

For hrtimers, new modes HRTIMER_ABS_PINNED and HRTIMER_REL_PINNED are
added to queue cpu-pinned hrtimer.

[ tglx: use .._PINNED mode argument instead of creating tons of new
functions ]

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h |  7 +++++--
 include/linux/timer.h   |  3 +++
 kernel/hrtimer.c        |  7 ++++---
 kernel/timer.c          | 31 +++++++++++++++++++++++++++----
 4 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0d2f7c8a33d..7400900de94 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -30,8 +30,11 @@ struct hrtimer_cpu_base;
  * Mode arguments of xxx_hrtimer functions:
  */
 enum hrtimer_mode {
-	HRTIMER_MODE_ABS,	/* Time value is absolute */
-	HRTIMER_MODE_REL,	/* Time value is relative to now */
+	HRTIMER_MODE_ABS = 0x0,		/* Time value is absolute */
+	HRTIMER_MODE_REL = 0x1,		/* Time value is relative to now */
+	HRTIMER_MODE_PINNED = 0x02,	/* Timer is bound to CPU */
+	HRTIMER_MODE_ABS_PINNED = 0x02,
+	HRTIMER_MODE_REL_PINNED = 0x03,
 };
 
 /*
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 6cdb6f3331f..ccf882eed8f 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -163,7 +163,10 @@ extern void add_timer_on(struct timer_list *timer, int cpu);
 extern int del_timer(struct timer_list * timer);
 extern int mod_timer(struct timer_list *timer, unsigned long expires);
 extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
+extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
 
+#define TIMER_NOT_PINNED	0
+#define TIMER_PINNED		1
 /*
  * The jiffies value which is added to now, when there is no timer
  * in the timer wheel:
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cb8a15c1958..c71bcd54924 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -193,7 +193,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
  * Switch the timer base to the current CPU when possible.
  */
 static inline struct hrtimer_clock_base *
-switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
+		    int pinned)
 {
 	struct hrtimer_clock_base *new_base;
 	struct hrtimer_cpu_base *new_cpu_base;
@@ -907,9 +908,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 	ret = remove_hrtimer(timer, base);
 
 	/* Switch the timer base, if necessary: */
-	new_base = switch_hrtimer_base(timer, base);
+	new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
 
-	if (mode == HRTIMER_MODE_REL) {
+	if (mode & HRTIMER_MODE_REL) {
 		tim = ktime_add_safe(tim, new_base->get_time());
 		/*
 		 * CONFIG_TIME_LOW_RES is a temporary way for architectures
diff --git a/kernel/timer.c b/kernel/timer.c
index 5c1e84beaf4..3424dfd11d5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -604,7 +604,8 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
 }
 
 static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
+__mod_timer(struct timer_list *timer, unsigned long expires,
+						bool pending_only, int pinned)
 {
 	struct tvec_base *base, *new_base;
 	unsigned long flags;
@@ -668,7 +669,7 @@ out_unlock:
  */
 int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 {
-	return __mod_timer(timer, expires, true);
+	return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer_pending);
 
@@ -702,10 +703,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
 	if (timer->expires == expires && timer_pending(timer))
 		return 1;
 
-	return __mod_timer(timer, expires, false);
+	return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer);
 
+/**
+ * mod_timer_pinned - modify a timer's timeout
+ * @timer: the timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer_pinned() is a way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * and not allow the timer to be migrated to a different CPU.
+ *
+ * mod_timer_pinned(timer, expires) is equivalent to:
+ *
+ *     del_timer(timer); timer->expires = expires; add_timer(timer);
+ */
+int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
+{
+	if (timer->expires == expires && timer_pending(timer))
+		return 1;
+
+	return __mod_timer(timer, expires, false, TIMER_PINNED);
+}
+EXPORT_SYMBOL(mod_timer_pinned);
+
 /**
  * add_timer - start a timer
  * @timer: the timer to be added
@@ -1356,7 +1379,7 @@ signed long __sched schedule_timeout(signed long timeout)
 	expire = timeout + jiffies;
 
 	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
-	__mod_timer(&timer, expire, false);
+	__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
 	schedule();
 	del_singleshot_timer_sync(&timer);
 
-- 
cgit v1.2.3-70-g09d2


From cd1bb94b4a0531e8211a3774f17de831f8285f76 Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Thu, 16 Apr 2009 12:15:34 +0530
Subject: timers: /proc/sys sysctl hook to enable timer migration

* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]:

This patch creates the /proc/sys sysctl interface at
/proc/sys/kernel/timer_migration

Timer migration is enabled by default.

To disable timer migration, when CONFIG_SCHED_DEBUG = y,

echo 0 > /proc/sys/kernel/timer_migration

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 1 +
 kernel/sched.c        | 2 ++
 kernel/sysctl.c       | 8 ++++++++
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049..61850401040 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1766,6 +1766,7 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c
index 9c5b4d3f97a..7f1dd56af86 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8731,6 +8731,8 @@ void __init sched_init_smp(void)
 }
 #endif /* CONFIG_SMP */
 
+const_debug unsigned int sysctl_timer_migration = 1;
+
 int in_sched_functions(unsigned long addr)
 {
 	return in_lock_functions(addr) ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e3d2c7dd59b..b3ce5813730 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -324,6 +324,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "timer_migration",
+		.data		= &sysctl_timer_migration,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-- 
cgit v1.2.3-70-g09d2


From eea08f32adb3f97553d49a4f79a119833036000a Mon Sep 17 00:00:00 2001
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Date: Thu, 16 Apr 2009 12:16:41 +0530
Subject: timers: Logic to move non pinned timers

* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]:

This patch migrates all non pinned timers and hrtimers to the current
idle load balancer, from all the idle CPUs. Timers firing on busy CPUs
are not migrated.

While migrating hrtimers, care should be taken to check if migrating
a hrtimer would result in a latency or not. So we compare the expiry of the
hrtimer with the next timer interrupt on the target cpu and migrate the
hrtimer only if it expires *after* the next interrupt on the target cpu.
So, added a clockevents_get_next_event() helper function to return the
next_event on the target cpu's clock_event_device.

[ tglx: cleanups and simplifications ]

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h |  9 ++++++++
 include/linux/sched.h      | 12 +++++++++++
 kernel/hrtimer.c           | 51 ++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched.c             |  5 +++++
 kernel/time/clockevents.c  | 12 +++++++++++
 kernel/timer.c             | 17 +++++++++++++---
 6 files changed, 101 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3a1dbba4d3a..20a100fe2b4 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
 #endif
 
 #endif
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+extern ktime_t clockevents_get_next_event(int cpu);
+#else
+static inline ktime_t clockevents_get_next_event(int cpu)
+{
+	return (ktime_t) { .tv64 = KTIME_MAX };
+}
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61850401040..311dec12397 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
 extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
+extern int get_nohz_load_balancer(void);
 #else
 static inline int select_nohz_load_balancer(int cpu)
 {
@@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c71bcd54924..b675a67c9ac 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 {
 	struct hrtimer_clock_base *new_base;
 	struct hrtimer_cpu_base *new_cpu_base;
+	int cpu, preferred_cpu = -1;
+
+	cpu = smp_processor_id();
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+		preferred_cpu = get_nohz_load_balancer();
+		if (preferred_cpu >= 0)
+			cpu = preferred_cpu;
+	}
+#endif
 
-	new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
 	new_base = &new_cpu_base->clock_base[base->index];
 
 	if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 		timer->base = NULL;
 		spin_unlock(&base->cpu_base->lock);
 		spin_lock(&new_base->cpu_base->lock);
+
+		/* Optimized away for NOHZ=n SMP=n */
+		if (cpu == preferred_cpu) {
+			/* Calculate clock monotonic expiry time */
+#ifdef CONFIG_HIGH_RES_TIMERS
+			ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
+							new_base->offset);
+#else
+			ktime_t expires = hrtimer_get_expires(timer);
+#endif
+
+			/*
+			 * Get the next event on target cpu from the
+			 * clock events layer.
+			 * This covers the highres=off nohz=on case as well.
+			 */
+			ktime_t next = clockevents_get_next_event(cpu);
+
+			ktime_t delta = ktime_sub(expires, next);
+
+			/*
+			 * We do not migrate the timer when it is expiring
+			 * before the next event on the target cpu because
+			 * we cannot reprogram the target cpu hardware and
+			 * we would cause it to fire late.
+			 */
+			if (delta.tv64 < 0) {
+				cpu = smp_processor_id();
+				spin_unlock(&new_base->cpu_base->lock);
+				spin_lock(&base->cpu_base->lock);
+				timer->base = base;
+				goto again;
+			}
+		}
 		timer->base = new_base;
 	}
 	return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
 	return base;
 }
 
-# define switch_hrtimer_base(t, b)	(b)
+# define switch_hrtimer_base(t, b, p)	(b)
 
 #endif	/* !CONFIG_SMP */
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 7f1dd56af86..9fe3774a0fd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4244,6 +4244,11 @@ static struct {
 	.load_balancer = ATOMIC_INIT(-1),
 };
 
+int get_nohz_load_balancer(void)
+{
+	return atomic_read(&nohz.load_balancer);
+}
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a79..ab20ded013b 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
 #include <linux/notifier.h>
 #include <linux/smp.h>
 #include <linux/sysdev.h>
+#include <linux/tick.h>
 
 /* The registered clock event devices */
 static LIST_HEAD(clockevent_devices);
@@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg)
 	spin_unlock(&clockevents_lock);
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
+
+ktime_t clockevents_get_next_event(int cpu)
+{
+	struct tick_device *td;
+	struct clock_event_device *dev;
+
+	td = &per_cpu(tick_cpu_device, cpu);
+	dev = td->evtdev;
+
+	return dev->next_event;
+}
 #endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 3424dfd11d5..3f841db5edf 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <linux/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 {
 	struct tvec_base *base, *new_base;
 	unsigned long flags;
-	int ret;
-
-	ret = 0;
+	int ret = 0 , cpu;
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(!timer->function);
@@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 
 	new_base = __get_cpu_var(tvec_bases);
 
+	cpu = smp_processor_id();
+
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+		int preferred_cpu = get_nohz_load_balancer();
+
+		if (preferred_cpu >= 0)
+			cpu = preferred_cpu;
+	}
+#endif
+	new_base = per_cpu(tvec_bases, cpu);
+
 	if (base != new_base) {
 		/*
 		 * We are trying to schedule the timer on the local CPU.
-- 
cgit v1.2.3-70-g09d2


From 08645126dd24872c2e27014f93968f7312e29176 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 11 May 2009 13:54:58 +0200
Subject: cfg80211: implement wext key handling

Move key handling wireless extension ioctls from mac80211 to cfg80211
so that all drivers that implement the cfg80211 operations get wext
compatibility.

Note that this drops the SIOCGIWENCODE ioctl support for getting
IW_ENCODE_RESTRICTED/IW_ENCODE_OPEN. This means that iwconfig will
no longer report "Security mode:open" or "Security mode:restricted"
for mac80211. However, what we displayed there (the authentication
algo used) was actually wrong -- linux/wireless.h states that this
setting is meant to differentiate between "Refuse non-encoded packets"
and "Accept non-encoded packets".

(Combined with "cfg80211: fix a couple of bugs with key ioctls". -- JWL)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  10 ++
 net/mac80211/wext.c        | 279 +--------------------------------------------
 net/wireless/core.c        |   6 +-
 net/wireless/core.h        |   6 +-
 net/wireless/nl80211.c     |  57 +++------
 net/wireless/util.c        |  45 ++++++++
 net/wireless/wext-compat.c | 257 ++++++++++++++++++++++++++++++++++++++++-
 7 files changed, 343 insertions(+), 317 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4c748935ce5..e69e6c66dd1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1150,6 +1150,7 @@ struct wireless_dev {
 	struct {
 		struct cfg80211_ibss_params ibss;
 		u8 bssid[ETH_ALEN];
+		s8 default_key, default_mgmt_key;
 	} wext;
 #endif
 };
@@ -1400,6 +1401,15 @@ int cfg80211_wext_siwretry(struct net_device *dev,
 int cfg80211_wext_giwretry(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_param *retry, char *extra);
+int cfg80211_wext_siwencodeext(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *erq, char *extra);
+int cfg80211_wext_siwencode(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct iw_point *erq, char *keybuf);
+int cfg80211_wext_giwencode(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct iw_point *erq, char *keybuf);
 
 /*
  * callbacks for asynchronous cfg80211 methods, notification
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 6b4eb8d43a4..d8450264468 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -27,100 +27,6 @@
 #include "aes_ccm.h"
 
 
-static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta_addr,
-				    int idx, int alg, int remove,
-				    int set_tx_key, const u8 *_key,
-				    size_t key_len)
-{
-	struct ieee80211_local *local = sdata->local;
-	struct sta_info *sta;
-	struct ieee80211_key *key;
-	int err;
-
-	if (alg == ALG_AES_CMAC) {
-		if (idx < NUM_DEFAULT_KEYS ||
-		    idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
-			printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d "
-			       "(BIP)\n", sdata->dev->name, idx);
-			return -EINVAL;
-		}
-	} else if (idx < 0 || idx >= NUM_DEFAULT_KEYS) {
-		printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n",
-		       sdata->dev->name, idx);
-		return -EINVAL;
-	}
-
-	if (remove) {
-		rcu_read_lock();
-
-		err = 0;
-
-		if (is_broadcast_ether_addr(sta_addr)) {
-			key = sdata->keys[idx];
-		} else {
-			sta = sta_info_get(local, sta_addr);
-			if (!sta) {
-				err = -ENOENT;
-				goto out_unlock;
-			}
-			key = sta->key;
-		}
-
-		ieee80211_key_free(key);
-	} else {
-		key = ieee80211_key_alloc(alg, idx, key_len, _key);
-		if (!key)
-			return -ENOMEM;
-
-		sta = NULL;
-		err = 0;
-
-		rcu_read_lock();
-
-		if (!is_broadcast_ether_addr(sta_addr)) {
-			set_tx_key = 0;
-			/*
-			 * According to the standard, the key index of a
-			 * pairwise key must be zero. However, some AP are
-			 * broken when it comes to WEP key indices, so we
-			 * work around this.
-			 */
-			if (idx != 0 && alg != ALG_WEP) {
-				ieee80211_key_free(key);
-				err = -EINVAL;
-				goto out_unlock;
-			}
-
-			sta = sta_info_get(local, sta_addr);
-			if (!sta) {
-				ieee80211_key_free(key);
-				err = -ENOENT;
-				goto out_unlock;
-			}
-		}
-
-		if (alg == ALG_WEP &&
-			key_len != LEN_WEP40 && key_len != LEN_WEP104) {
-			ieee80211_key_free(key);
-			err = -EINVAL;
-			goto out_unlock;
-		}
-
-		ieee80211_key_link(key, sdata, sta);
-
-		if (set_tx_key || (!sta && !sdata->default_key && key))
-			ieee80211_set_default_key(sdata, idx);
-		if (alg == ALG_AES_CMAC &&
-		    (set_tx_key || (!sta && !sdata->default_mgmt_key && key)))
-			ieee80211_set_default_mgmt_key(sdata, idx);
-	}
-
- out_unlock:
-	rcu_read_unlock();
-
-	return err;
-}
-
 static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 				    struct iw_request_info *info,
 				    struct iw_point *data, char *extra)
@@ -472,109 +378,6 @@ static int ieee80211_ioctl_giwtxpower(struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_ioctl_siwencode(struct net_device *dev,
-				     struct iw_request_info *info,
-				     struct iw_point *erq, char *keybuf)
-{
-	struct ieee80211_sub_if_data *sdata;
-	int idx, i, alg = ALG_WEP;
-	u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-	int remove = 0, ret;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	idx = erq->flags & IW_ENCODE_INDEX;
-	if (idx == 0) {
-		if (sdata->default_key)
-			for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
-				if (sdata->default_key == sdata->keys[i]) {
-					idx = i;
-					break;
-				}
-			}
-	} else if (idx < 1 || idx > 4)
-		return -EINVAL;
-	else
-		idx--;
-
-	if (erq->flags & IW_ENCODE_DISABLED)
-		remove = 1;
-	else if (erq->length == 0) {
-		/* No key data - just set the default TX key index */
-		ieee80211_set_default_key(sdata, idx);
-		return 0;
-	}
-
-	ret = ieee80211_set_encryption(
-		sdata, bcaddr,
-		idx, alg, remove,
-		!sdata->default_key,
-		keybuf, erq->length);
-
-	if (!ret && sdata->vif.type == NL80211_IFTYPE_STATION) {
-		if (remove)
-			sdata->u.mgd.flags &= ~IEEE80211_STA_TKIP_WEP_USED;
-		else
-			sdata->u.mgd.flags |= IEEE80211_STA_TKIP_WEP_USED;
-	}
-
-	return ret;
-}
-
-
-static int ieee80211_ioctl_giwencode(struct net_device *dev,
-				     struct iw_request_info *info,
-				     struct iw_point *erq, char *key)
-{
-	struct ieee80211_sub_if_data *sdata;
-	int idx, i;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	idx = erq->flags & IW_ENCODE_INDEX;
-	if (idx < 1 || idx > 4) {
-		idx = -1;
-		if (!sdata->default_key)
-			idx = 0;
-		else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
-			if (sdata->default_key == sdata->keys[i]) {
-				idx = i;
-				break;
-			}
-		}
-		if (idx < 0)
-			return -EINVAL;
-	} else
-		idx--;
-
-	erq->flags = idx + 1;
-
-	if (!sdata->keys[idx]) {
-		erq->length = 0;
-		erq->flags |= IW_ENCODE_DISABLED;
-		return 0;
-	}
-
-	memcpy(key, sdata->keys[idx]->conf.key,
-	       min_t(int, erq->length, sdata->keys[idx]->conf.keylen));
-	erq->length = sdata->keys[idx]->conf.keylen;
-	erq->flags |= IW_ENCODE_ENABLED;
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		switch (sdata->u.mgd.auth_alg) {
-		case WLAN_AUTH_OPEN:
-		case WLAN_AUTH_LEAP:
-			erq->flags |= IW_ENCODE_OPEN;
-			break;
-		case WLAN_AUTH_SHARED_KEY:
-			erq->flags |= IW_ENCODE_RESTRICTED;
-			break;
-		}
-	}
-
-	return 0;
-}
-
 static int ieee80211_ioctl_siwpower(struct net_device *dev,
 				    struct iw_request_info *info,
 				    struct iw_param *wrq,
@@ -809,82 +612,6 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
 }
 
 
-static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
-					struct iw_request_info *info,
-					struct iw_point *erq, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
-	int uninitialized_var(alg), idx, i, remove = 0;
-
-	switch (ext->alg) {
-	case IW_ENCODE_ALG_NONE:
-		remove = 1;
-		break;
-	case IW_ENCODE_ALG_WEP:
-		alg = ALG_WEP;
-		break;
-	case IW_ENCODE_ALG_TKIP:
-		alg = ALG_TKIP;
-		break;
-	case IW_ENCODE_ALG_CCMP:
-		alg = ALG_CCMP;
-		break;
-	case IW_ENCODE_ALG_AES_CMAC:
-		alg = ALG_AES_CMAC;
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	if (erq->flags & IW_ENCODE_DISABLED)
-		remove = 1;
-
-	idx = erq->flags & IW_ENCODE_INDEX;
-	if (alg == ALG_AES_CMAC) {
-		if (idx < NUM_DEFAULT_KEYS + 1 ||
-		    idx > NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
-			idx = -1;
-			if (!sdata->default_mgmt_key)
-				idx = 0;
-			else for (i = NUM_DEFAULT_KEYS;
-				  i < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS;
-				  i++) {
-				if (sdata->default_mgmt_key == sdata->keys[i])
-				{
-					idx = i;
-					break;
-				}
-			}
-			if (idx < 0)
-				return -EINVAL;
-		} else
-			idx--;
-	} else {
-		if (idx < 1 || idx > 4) {
-			idx = -1;
-			if (!sdata->default_key)
-				idx = 0;
-			else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
-				if (sdata->default_key == sdata->keys[i]) {
-					idx = i;
-					break;
-				}
-			}
-			if (idx < 0)
-				return -EINVAL;
-		} else
-			idx--;
-	}
-
-	return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg,
-					remove,
-					ext->ext_flags &
-					IW_ENCODE_EXT_SET_TX_KEY,
-					ext->key, ext->key_len);
-}
-
-
 /* Structures to export the Wireless Handlers */
 
 static const iw_handler ieee80211_handler[] =
@@ -931,8 +658,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) ieee80211_ioctl_giwtxpower,	/* SIOCGIWTXPOW */
 	(iw_handler) cfg80211_wext_siwretry,		/* SIOCSIWRETRY */
 	(iw_handler) cfg80211_wext_giwretry,		/* SIOCGIWRETRY */
-	(iw_handler) ieee80211_ioctl_siwencode,		/* SIOCSIWENCODE */
-	(iw_handler) ieee80211_ioctl_giwencode,		/* SIOCGIWENCODE */
+	(iw_handler) cfg80211_wext_siwencode,		/* SIOCSIWENCODE */
+	(iw_handler) cfg80211_wext_giwencode,		/* SIOCGIWENCODE */
 	(iw_handler) ieee80211_ioctl_siwpower,		/* SIOCSIWPOWER */
 	(iw_handler) ieee80211_ioctl_giwpower,		/* SIOCGIWPOWER */
 	(iw_handler) NULL,				/* -- hole -- */
@@ -941,7 +668,7 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* SIOCGIWGENIE */
 	(iw_handler) ieee80211_ioctl_siwauth,		/* SIOCSIWAUTH */
 	(iw_handler) ieee80211_ioctl_giwauth,		/* SIOCGIWAUTH */
-	(iw_handler) ieee80211_ioctl_siwencodeext,	/* SIOCSIWENCODEEXT */
+	(iw_handler) cfg80211_wext_siwencodeext,	/* SIOCSIWENCODEEXT */
 	(iw_handler) NULL,				/* SIOCGIWENCODEEXT */
 	(iw_handler) NULL,				/* SIOCSIWPMKSA */
 	(iw_handler) NULL,				/* -- hole -- */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 15b2a179480..47c20eb0c04 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1,7 +1,7 @@
 /*
  * This is the linux wireless configuration interface.
  *
- * Copyright 2006-2008		Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2009		Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/if.h>
@@ -457,6 +457,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 				"symlink to netdev!\n");
 		}
 		dev->ieee80211_ptr->netdev = dev;
+#ifdef CONFIG_WIRELESS_EXT
+		dev->ieee80211_ptr->wext.default_key = -1;
+		dev->ieee80211_ptr->wext.default_mgmt_key = -1;
+#endif
 		mutex_unlock(&rdev->devlist_mtx);
 		break;
 	case NETDEV_GOING_DOWN:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3e49d339931..f14b6c5f422 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -1,7 +1,7 @@
 /*
  * Wireless configuration interface internals.
  *
- * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
  */
 #ifndef __NET_WIRELESS_CORE_H
 #define __NET_WIRELESS_CORE_H
@@ -151,4 +151,8 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
 int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, bool nowext);
 
+/* internal helpers */
+int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
+				   const u8 *mac_addr);
+
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a39e4644778..f88dbbec752 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1,7 +1,7 @@
 /*
  * This is the new netlink-based wireless configuration interface.
  *
- * Copyright 2006, 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/if.h>
@@ -1073,6 +1073,14 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	err = func(&drv->wiphy, dev, key_idx);
+#ifdef CONFIG_WIRELESS_EXT
+	if (!err) {
+		if (func == drv->ops->set_default_key)
+			dev->ieee80211_ptr->wext.default_key = key_idx;
+		else
+			dev->ieee80211_ptr->wext.default_mgmt_key = key_idx;
+	}
+#endif
 
  out:
 	cfg80211_put_dev(drv);
@@ -1111,45 +1119,9 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	if (key_idx > 5)
+	if (cfg80211_validate_key_settings(&params, key_idx, mac_addr))
 		return -EINVAL;
 
-	/*
-	 * Disallow pairwise keys with non-zero index unless it's WEP
-	 * (because current deployments use pairwise WEP keys with
-	 * non-zero indizes but 802.11i clearly specifies to use zero)
-	 */
-	if (mac_addr && key_idx &&
-	    params.cipher != WLAN_CIPHER_SUITE_WEP40 &&
-	    params.cipher != WLAN_CIPHER_SUITE_WEP104)
-		return -EINVAL;
-
-	/* TODO: add definitions for the lengths to linux/ieee80211.h */
-	switch (params.cipher) {
-	case WLAN_CIPHER_SUITE_WEP40:
-		if (params.key_len != 5)
-			return -EINVAL;
-		break;
-	case WLAN_CIPHER_SUITE_TKIP:
-		if (params.key_len != 32)
-			return -EINVAL;
-		break;
-	case WLAN_CIPHER_SUITE_CCMP:
-		if (params.key_len != 16)
-			return -EINVAL;
-		break;
-	case WLAN_CIPHER_SUITE_WEP104:
-		if (params.key_len != 13)
-			return -EINVAL;
-		break;
-	case WLAN_CIPHER_SUITE_AES_CMAC:
-		if (params.key_len != 16)
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	rtnl_lock();
 
 	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
@@ -1210,6 +1182,15 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 
 	err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr);
 
+#ifdef CONFIG_WIRELESS_EXT
+	if (!err) {
+		if (key_idx == dev->ieee80211_ptr->wext.default_key)
+			dev->ieee80211_ptr->wext.default_key = -1;
+		else if (key_idx == dev->ieee80211_ptr->wext.default_mgmt_key)
+			dev->ieee80211_ptr->wext.default_mgmt_key = -1;
+	}
+#endif
+
  out:
 	cfg80211_put_dev(drv);
 	dev_put(dev);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 5f7e997195c..beb226e78cd 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -138,3 +138,48 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy)
 		if (wiphy->bands[band])
 			set_mandatory_flags_band(wiphy->bands[band], band);
 }
+
+int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
+				   const u8 *mac_addr)
+{
+	if (key_idx > 5)
+		return -EINVAL;
+
+	/*
+	 * Disallow pairwise keys with non-zero index unless it's WEP
+	 * (because current deployments use pairwise WEP keys with
+	 * non-zero indizes but 802.11i clearly specifies to use zero)
+	 */
+	if (mac_addr && key_idx &&
+	    params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
+	    params->cipher != WLAN_CIPHER_SUITE_WEP104)
+		return -EINVAL;
+
+	/* TODO: add definitions for the lengths to linux/ieee80211.h */
+	switch (params->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		if (params->key_len != 5)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_TKIP:
+		if (params->key_len != 32)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_CCMP:
+		if (params->key_len != 16)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_WEP104:
+		if (params->key_len != 13)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		if (params->key_len != 16)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index abf6b0a047d..ffc98a8d6e5 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -5,12 +5,13 @@
  * into cfg80211, when that happens all the exports here go away and
  * we directly assign the wireless handlers of wireless interfaces.
  *
- * Copyright 2008	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008-2009	Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/wireless.h>
 #include <linux/nl80211.h>
 #include <linux/if_arp.h>
+#include <linux/etherdevice.h>
 #include <net/iw_handler.h>
 #include <net/cfg80211.h>
 #include "core.h"
@@ -477,3 +478,257 @@ int cfg80211_wext_giwretry(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry);
+
+static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
+				   struct net_device *dev, const u8 *addr,
+				   bool remove, bool tx_key, int idx,
+				   struct key_params *params)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
+		if (!rdev->ops->set_default_mgmt_key)
+			return -EOPNOTSUPP;
+
+		if (idx < 4 || idx > 5)
+			return -EINVAL;
+	} else if (idx < 0 || idx > 3)
+		return -EINVAL;
+
+	if (remove) {
+		err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr);
+		if (!err) {
+			if (idx == wdev->wext.default_key)
+				wdev->wext.default_key = -1;
+			else if (idx == wdev->wext.default_mgmt_key)
+				wdev->wext.default_mgmt_key = -1;
+		}
+		return err;
+	} else {
+		if (addr)
+			tx_key = false;
+
+		if (cfg80211_validate_key_settings(params, idx, addr))
+			return -EINVAL;
+
+		err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params);
+		if (err)
+			return err;
+
+		if (tx_key || (!addr && wdev->wext.default_key == -1)) {
+			err = rdev->ops->set_default_key(&rdev->wiphy,
+							 dev, idx);
+			if (!err)
+				wdev->wext.default_key = idx;
+			return err;
+		}
+
+		if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC &&
+		    (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) {
+			err = rdev->ops->set_default_mgmt_key(&rdev->wiphy,
+							      dev, idx);
+			if (!err)
+				wdev->wext.default_mgmt_key = idx;
+			return err;
+		}
+
+		return 0;
+	}
+}
+
+int cfg80211_wext_siwencode(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct iw_point *erq, char *keybuf)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	int idx, err;
+	bool remove = false;
+	struct key_params params;
+
+	/* no use -- only MFP (set_default_mgmt_key) is optional */
+	if (!rdev->ops->del_key ||
+	    !rdev->ops->add_key ||
+	    !rdev->ops->set_default_key)
+		return -EOPNOTSUPP;
+
+	idx = erq->flags & IW_ENCODE_INDEX;
+	if (idx == 0) {
+		idx = wdev->wext.default_key;
+		if (idx < 0)
+			idx = 0;
+	} else if (idx < 1 || idx > 4)
+		return -EINVAL;
+	else
+		idx--;
+
+	if (erq->flags & IW_ENCODE_DISABLED)
+		remove = true;
+	else if (erq->length == 0) {
+		/* No key data - just set the default TX key index */
+		err = rdev->ops->set_default_key(&rdev->wiphy, dev, idx);
+		if (!err)
+			wdev->wext.default_key = idx;
+		return err;
+	}
+
+	memset(&params, 0, sizeof(params));
+	params.key = keybuf;
+	params.key_len = erq->length;
+	if (erq->length == 5)
+		params.cipher = WLAN_CIPHER_SUITE_WEP40;
+	else if (erq->length == 13)
+		params.cipher = WLAN_CIPHER_SUITE_WEP104;
+	else if (!remove)
+		return -EINVAL;
+
+	return cfg80211_set_encryption(rdev, dev, NULL, remove,
+				       wdev->wext.default_key == -1,
+				       idx, &params);
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwencode);
+
+int cfg80211_wext_siwencodeext(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *erq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct iw_encode_ext *ext = (struct iw_encode_ext *) extra;
+	const u8 *addr;
+	int idx;
+	bool remove = false;
+	struct key_params params;
+	u32 cipher;
+
+	/* no use -- only MFP (set_default_mgmt_key) is optional */
+	if (!rdev->ops->del_key ||
+	    !rdev->ops->add_key ||
+	    !rdev->ops->set_default_key)
+		return -EOPNOTSUPP;
+
+	switch (ext->alg) {
+	case IW_ENCODE_ALG_NONE:
+		remove = true;
+		cipher = 0;
+		break;
+	case IW_ENCODE_ALG_WEP:
+		if (ext->key_len == 5)
+			cipher = WLAN_CIPHER_SUITE_WEP40;
+		else if (ext->key_len == 13)
+			cipher = WLAN_CIPHER_SUITE_WEP104;
+		else
+			return -EINVAL;
+		break;
+	case IW_ENCODE_ALG_TKIP:
+		cipher = WLAN_CIPHER_SUITE_TKIP;
+		break;
+	case IW_ENCODE_ALG_CCMP:
+		cipher = WLAN_CIPHER_SUITE_CCMP;
+		break;
+	case IW_ENCODE_ALG_AES_CMAC:
+		cipher = WLAN_CIPHER_SUITE_AES_CMAC;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (erq->flags & IW_ENCODE_DISABLED)
+		remove = true;
+
+	idx = erq->flags & IW_ENCODE_INDEX;
+	if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
+		if (idx < 4 || idx > 5) {
+			idx = wdev->wext.default_mgmt_key;
+			if (idx < 0)
+				return -EINVAL;
+		} else
+			idx--;
+	} else {
+		if (idx < 1 || idx > 4) {
+			idx = wdev->wext.default_key;
+			if (idx < 0)
+				return -EINVAL;
+		} else
+			idx--;
+	}
+
+	addr = ext->addr.sa_data;
+	if (is_broadcast_ether_addr(addr))
+		addr = NULL;
+
+	memset(&params, 0, sizeof(params));
+	params.key = ext->key;
+	params.key_len = ext->key_len;
+	params.cipher = cipher;
+
+	return cfg80211_set_encryption(
+			rdev, dev, addr, remove,
+			ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
+			idx, &params);
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwencodeext);
+
+struct giwencode_cookie {
+	size_t buflen;
+	char *keybuf;
+};
+
+static void giwencode_get_key_cb(void *cookie, struct key_params *params)
+{
+	struct giwencode_cookie *data = cookie;
+
+	if (!params->key) {
+		data->buflen = 0;
+		return;
+	}
+
+	data->buflen = min_t(size_t, data->buflen, params->key_len);
+	memcpy(data->keybuf, params->key, data->buflen);
+}
+
+int cfg80211_wext_giwencode(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct iw_point *erq, char *keybuf)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	int idx, err;
+	struct giwencode_cookie data = {
+		.keybuf = keybuf,
+		.buflen = erq->length,
+	};
+
+	if (!rdev->ops->get_key)
+		return -EOPNOTSUPP;
+
+	idx = erq->flags & IW_ENCODE_INDEX;
+	if (idx == 0) {
+		idx = wdev->wext.default_key;
+		if (idx < 0)
+			idx = 0;
+	} else if (idx < 1 || idx > 4)
+		return -EINVAL;
+	else
+		idx--;
+
+	erq->flags = idx + 1;
+
+	err = rdev->ops->get_key(&rdev->wiphy, dev, idx, NULL, &data,
+				 giwencode_get_key_cb);
+	if (!err) {
+		erq->length = data.buflen;
+		erq->flags |= IW_ENCODE_ENABLED;
+		return 0;
+	}
+
+	if (err == -ENOENT) {
+		erq->flags |= IW_ENCODE_DISABLED;
+		erq->length = 0;
+		return 0;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode);
-- 
cgit v1.2.3-70-g09d2


From eccb8e8f0c3af47aeb6dbe4012eb8d4fc888767a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 11 May 2009 21:57:56 +0300
Subject: nl80211: improve station flags handling

It is currently not possible to modify station flags, but that
capability would be very useful. This patch introduces a new
nl80211 attribute that contains a set/mask for station flags,
and updates the internal API (and mac80211) to mirror that.

The new attribute is parsed before falling back to the old so
that userspace can specify both (if it can) to work on all
kernels.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 21 ++++++++++++++++++++-
 include/net/cfg80211.h  | 28 +++++-----------------------
 net/mac80211/cfg.c      | 28 ++++++++++++++++------------
 net/wireless/nl80211.c  | 38 ++++++++++++++++++++++++++++++--------
 4 files changed, 71 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 58c4ee1822d..aeefccfac0e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -25,6 +25,8 @@
  *
  */
 
+#include <linux/types.h>
+
 /**
  * DOC: Station handling
  *
@@ -380,7 +382,7 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_STA_AID: Association ID for the station (u16)
  * @NL80211_ATTR_STA_FLAGS: flags, nested element with NLA_FLAG attributes of
- *	&enum nl80211_sta_flags.
+ *	&enum nl80211_sta_flags (deprecated, use %NL80211_ATTR_STA_FLAGS2)
  * @NL80211_ATTR_STA_LISTEN_INTERVAL: listen interval as defined by
  *	IEEE 802.11 7.3.1.6 (u16).
  * @NL80211_ATTR_STA_SUPPORTED_RATES: supported rates, array of supported
@@ -499,6 +501,9 @@ enum nl80211_commands {
  *	this attribute can be used
  *	with %NL80211_CMD_ASSOCIATE request
  *
+ * @NL80211_ATTR_STA_FLAGS2: Attribute containing a
+ *	&struct nl80211_sta_flag_update.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -603,6 +608,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_USE_MFP,
 
+	NL80211_ATTR_STA_FLAGS2,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -691,6 +698,18 @@ enum nl80211_sta_flags {
 	NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1
 };
 
+/**
+ * struct nl80211_sta_flag_update - station flags mask/set
+ * @mask: mask of station flags to set
+ * @set: which values to set them to
+ *
+ * Both mask and set contain bits as per &enum nl80211_sta_flags.
+ */
+struct nl80211_sta_flag_update {
+	__u32 mask;
+	__u32 set;
+} __attribute__((packed));
+
 /**
  * enum nl80211_rate_info - bitrate information
  *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e69e6c66dd1..0dae6b38294 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -251,27 +251,6 @@ struct beacon_parameters {
 	int head_len, tail_len;
 };
 
-/**
- * enum station_flags - station flags
- *
- * Station capability flags. Note that these must be the bits
- * according to the nl80211 flags.
- *
- * @STATION_FLAG_CHANGED: station flags were changed
- * @STATION_FLAG_AUTHORIZED: station is authorized to send frames (802.1X)
- * @STATION_FLAG_SHORT_PREAMBLE: station is capable of receiving frames
- *	with short preambles
- * @STATION_FLAG_WME: station is WME/QoS capable
- * @STATION_FLAG_MFP: station uses management frame protection
- */
-enum station_flags {
-	STATION_FLAG_CHANGED		= 1<<0,
-	STATION_FLAG_AUTHORIZED		= 1<<NL80211_STA_FLAG_AUTHORIZED,
-	STATION_FLAG_SHORT_PREAMBLE	= 1<<NL80211_STA_FLAG_SHORT_PREAMBLE,
-	STATION_FLAG_WME		= 1<<NL80211_STA_FLAG_WME,
-	STATION_FLAG_MFP		= 1<<NL80211_STA_FLAG_MFP,
-};
-
 /**
  * enum plink_action - actions to perform in mesh peers
  *
@@ -294,14 +273,17 @@ enum plink_actions {
  * @supported_rates: supported rates in IEEE 802.11 format
  *	(or NULL for no change)
  * @supported_rates_len: number of supported rates
- * @station_flags: station flags (see &enum station_flags)
+ * @sta_flags_mask: station flags that changed
+ *	(bitmask of BIT(NL80211_STA_FLAG_...))
+ * @sta_flags_set: station flags values
+ *	(bitmask of BIT(NL80211_STA_FLAG_...))
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
  */
 struct station_parameters {
 	u8 *supported_rates;
 	struct net_device *vlan;
-	u32 station_flags;
+	u32 sta_flags_mask, sta_flags_set;
 	int listen_interval;
 	u16 aid;
 	u8 supported_rates_len;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index be86e159e6e..d591a936f5c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -629,34 +629,38 @@ static void sta_apply_parameters(struct ieee80211_local *local,
 	int i, j;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	u32 mask, set;
 
 	sband = local->hw.wiphy->bands[local->oper_channel->band];
 
-	/*
-	 * FIXME: updating the flags is racy when this function is
-	 *	  called from ieee80211_change_station(), this will
-	 *	  be resolved in a future patch.
-	 */
+	spin_lock_bh(&sta->lock);
+	mask = params->sta_flags_mask;
+	set = params->sta_flags_set;
 
-	if (params->station_flags & STATION_FLAG_CHANGED) {
-		spin_lock_bh(&sta->lock);
+	if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
 		sta->flags &= ~WLAN_STA_AUTHORIZED;
-		if (params->station_flags & STATION_FLAG_AUTHORIZED)
+		if (set & BIT(NL80211_STA_FLAG_AUTHORIZED))
 			sta->flags |= WLAN_STA_AUTHORIZED;
+	}
 
+	if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) {
 		sta->flags &= ~WLAN_STA_SHORT_PREAMBLE;
-		if (params->station_flags & STATION_FLAG_SHORT_PREAMBLE)
+		if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE))
 			sta->flags |= WLAN_STA_SHORT_PREAMBLE;
+	}
 
+	if (mask & BIT(NL80211_STA_FLAG_WME)) {
 		sta->flags &= ~WLAN_STA_WME;
-		if (params->station_flags & STATION_FLAG_WME)
+		if (set & BIT(NL80211_STA_FLAG_WME))
 			sta->flags |= WLAN_STA_WME;
+	}
 
+	if (mask & BIT(NL80211_STA_FLAG_MFP)) {
 		sta->flags &= ~WLAN_STA_MFP;
-		if (params->station_flags & STATION_FLAG_MFP)
+		if (set & BIT(NL80211_STA_FLAG_MFP))
 			sta->flags |= WLAN_STA_MFP;
-		spin_unlock_bh(&sta->lock);
 	}
+	spin_unlock_bh(&sta->lock);
 
 	/*
 	 * FIXME: updating the following information is racy when this
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2353ddbf493..66024ef57ba 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -123,6 +123,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_USE_MFP] = { .type = NLA_U32 },
+	[NL80211_ATTR_STA_FLAGS2] = {
+		.len = sizeof(struct nl80211_sta_flag_update),
+	},
 };
 
 /* IE validation */
@@ -1334,13 +1337,33 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
 	[NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG },
 };
 
-static int parse_station_flags(struct nlattr *nla, u32 *staflags)
+static int parse_station_flags(struct genl_info *info,
+			       struct station_parameters *params)
 {
 	struct nlattr *flags[NL80211_STA_FLAG_MAX + 1];
+	struct nlattr *nla;
 	int flag;
 
-	*staflags = 0;
+	/*
+	 * Try parsing the new attribute first so userspace
+	 * can specify both for older kernels.
+	 */
+	nla = info->attrs[NL80211_ATTR_STA_FLAGS2];
+	if (nla) {
+		struct nl80211_sta_flag_update *sta_flags;
+
+		sta_flags = nla_data(nla);
+		params->sta_flags_mask = sta_flags->mask;
+		params->sta_flags_set = sta_flags->set;
+		if ((params->sta_flags_mask |
+		     params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))
+			return -EINVAL;
+		return 0;
+	}
+
+	/* if present, parse the old attribute */
 
+	nla = info->attrs[NL80211_ATTR_STA_FLAGS];
 	if (!nla)
 		return 0;
 
@@ -1348,11 +1371,12 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags)
 			     nla, sta_flags_policy))
 		return -EINVAL;
 
-	*staflags = STATION_FLAG_CHANGED;
+	params->sta_flags_mask = (1 << __NL80211_STA_FLAG_AFTER_LAST) - 1;
+	params->sta_flags_mask &= ~1;
 
 	for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++)
 		if (flags[flag])
-			*staflags |= (1<<flag);
+			params->sta_flags_set |= (1<<flag);
 
 	return 0;
 }
@@ -1648,8 +1672,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.ht_capa =
 			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
 
-	if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
-				&params.station_flags))
+	if (parse_station_flags(info, &params))
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
@@ -1718,8 +1741,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.ht_capa =
 			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
 
-	if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS],
-				&params.station_flags))
+	if (parse_station_flags(info, &params))
 		return -EINVAL;
 
 	rtnl_lock();
-- 
cgit v1.2.3-70-g09d2


From 3f77316c6b99f596bfbf72c0542f47f7230b702e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 11 May 2009 21:57:57 +0300
Subject: nl80211: Add IEEE 802.1X PAE control for station mode

Add a new NL80211_ATTR_CONTROL_PORT flag for NL80211_CMD_ASSOCIATE to
allow user space to indicate that it will control the IEEE 802.1X port
in station mode. Previously, mac80211 was always marking the port
authorized in station mode. This was enough when drop_unencrypted flag
was set. However, drop_unencrypted can currently be controlled only
with WEXT and the current nl80211 design does not allow fully secure
configuration. Fix this by providing a mechanism for user space to
control the IEEE 802.1X port in station mode (i.e., do the same that
we are already doing in AP mode).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    | 9 +++++++++
 include/net/cfg80211.h     | 5 +++++
 net/mac80211/cfg.c         | 5 +++++
 net/mac80211/ieee80211_i.h | 2 +-
 net/mac80211/mlme.c        | 5 +++--
 net/mac80211/wext.c        | 3 +++
 net/wireless/nl80211.c     | 3 +++
 7 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index aeefccfac0e..2781525b03d 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -504,6 +504,13 @@ enum nl80211_commands {
  * @NL80211_ATTR_STA_FLAGS2: Attribute containing a
  *	&struct nl80211_sta_flag_update.
  *
+ * @NL80211_ATTR_CONTROL_PORT: A flag indicating whether user space controls
+ *	IEEE 802.1X port, i.e., sets/clears %NL80211_STA_FLAG_AUTHORIZED, in
+ *	station mode. If the flag is included in %NL80211_CMD_ASSOCIATE
+ *	request, the driver will assume that the port is unauthorized until
+ *	authorized by user space. Otherwise, port is marked authorized by
+ *	default in station mode.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -610,6 +617,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_STA_FLAGS2,
 
+	NL80211_ATTR_CONTROL_PORT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0dae6b38294..9e17a83d343 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -655,6 +655,10 @@ struct cfg80211_auth_request {
  * @ie: Extra IEs to add to (Re)Association Request frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
+ * @control_port: Whether user space controls IEEE 802.1X port, i.e.,
+ *	sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
+ *	required to assume that the port is unauthorized until authorized by
+ *	user space. Otherwise, port is marked authorized by default.
  */
 struct cfg80211_assoc_request {
 	struct ieee80211_channel *chan;
@@ -664,6 +668,7 @@ struct cfg80211_assoc_request {
 	const u8 *ie;
 	size_t ie_len;
 	bool use_mfp;
+	bool control_port;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d591a936f5c..6464bfd232c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1265,6 +1265,11 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 		sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED;
 	}
 
+	if (req->control_port)
+		sdata->u.mgd.flags |= IEEE80211_STA_CONTROL_PORT;
+	else
+		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
+
 	sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME;
 	sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE;
 	ieee80211_sta_req_auth(sdata);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 081c5742730..56a49ef446c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -235,7 +235,7 @@ struct mesh_preq_queue {
 #define IEEE80211_STA_ASSOCIATED	BIT(4)
 #define IEEE80211_STA_PROBEREQ_POLL	BIT(5)
 #define IEEE80211_STA_CREATE_IBSS	BIT(6)
-/* hole at 7, please re-use */
+#define IEEE80211_STA_CONTROL_PORT	BIT(7)
 #define IEEE80211_STA_WMM_ENABLED	BIT(8)
 /* hole at 9, please re-use */
 #define IEEE80211_STA_AUTO_SSID_SEL	BIT(10)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6d00e3f738c..2806f6af7ae 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1581,8 +1581,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	 *	  to between the sta_info_alloc() and sta_info_insert() above.
 	 */
 
-	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP |
-			   WLAN_STA_AUTHORIZED);
+	set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP);
+	if (!(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
+		set_sta_flags(sta, WLAN_STA_AUTHORIZED);
 
 	rates = 0;
 	basic_rates = 0;
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index d8450264468..c14394744a9 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -41,6 +41,7 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
 			return ret;
 		sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
 		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
 		ieee80211_sta_req_auth(sdata);
 		return 0;
 	}
@@ -124,6 +125,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 			return ret;
 
 		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
 		ieee80211_sta_req_auth(sdata);
 		return 0;
 	}
@@ -181,6 +183,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 		if (ret)
 			return ret;
 		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
+		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
 		ieee80211_sta_req_auth(sdata);
 		return 0;
 	} else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 66024ef57ba..cad281390cf 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -126,6 +126,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_STA_FLAGS2] = {
 		.len = sizeof(struct nl80211_sta_flag_update),
 	},
+	[NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
 };
 
 /* IE validation */
@@ -3040,6 +3041,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	req.control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
+
 	err = drv->ops->assoc(&drv->wiphy, dev, &req);
 
 out:
-- 
cgit v1.2.3-70-g09d2


From faa8fdc85347cc76d87b43ea718785661c54f656 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 11 May 2009 21:57:58 +0300
Subject: nl80211: Add RSC configuration for new keys

When setting a key with NL80211_CMD_NEW_KEY, we should allow the key
sequence number (RSC) to be set in order to allow replay protection to
work correctly for group keys. This patch documents this use for
nl80211 and adds the couple of missing pieces in nl80211/cfg80211 and
mac80211 to support this. In addition, WEXT SIOCSIWENCODEEXT compat
processing in cfg80211 is extended to handle the RSC (this was already
specified in WEXT, but just not implemented in cfg80211/mac80211).

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  4 ++--
 net/mac80211/cfg.c         |  3 ++-
 net/mac80211/key.c         | 21 ++++++++++++++++++++-
 net/mac80211/key.h         |  3 ++-
 net/wireless/nl80211.c     |  5 +++++
 net/wireless/wext-compat.c |  5 +++++
 6 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2781525b03d..dbea93b694e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -79,8 +79,8 @@
  * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT,
  *	%NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD.
  * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA,
- *	%NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC and %NL80211_ATTR_KEY_CIPHER
- *	attributes.
+ *	%NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER,
+ *	and %NL80211_ATTR_KEY_SEQ attributes.
  * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX
  *	or %NL80211_ATTR_MAC.
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6464bfd232c..77e9ff5ec4f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -141,7 +141,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 		return -EINVAL;
 	}
 
-	key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key);
+	key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
+				  params->seq_len, params->seq);
 	if (!key)
 		return -ENOMEM;
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index b7e1350273b..827ea8e6ee0 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -290,9 +290,11 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 					  int idx,
 					  size_t key_len,
-					  const u8 *key_data)
+					  const u8 *key_data,
+					  size_t seq_len, const u8 *seq)
 {
 	struct ieee80211_key *key;
+	int i, j;
 
 	BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
 
@@ -318,14 +320,31 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 	case ALG_TKIP:
 		key->conf.iv_len = TKIP_IV_LEN;
 		key->conf.icv_len = TKIP_ICV_LEN;
+		if (seq && seq_len == 6) {
+			for (i = 0; i < NUM_RX_DATA_QUEUES; i++) {
+				key->u.tkip.rx[i].iv32 =
+					get_unaligned_le32(&seq[2]);
+				key->u.tkip.rx[i].iv16 =
+					get_unaligned_le16(seq);
+			}
+		}
 		break;
 	case ALG_CCMP:
 		key->conf.iv_len = CCMP_HDR_LEN;
 		key->conf.icv_len = CCMP_MIC_LEN;
+		if (seq && seq_len == CCMP_PN_LEN) {
+			for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
+				for (j = 0; j < CCMP_PN_LEN; j++)
+					key->u.ccmp.rx_pn[i][j] =
+						seq[CCMP_PN_LEN - j - 1];
+		}
 		break;
 	case ALG_AES_CMAC:
 		key->conf.iv_len = 0;
 		key->conf.icv_len = sizeof(struct ieee80211_mmie);
+		if (seq && seq_len == 6)
+			for (j = 0; j < 6; j++)
+				key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
 		break;
 	}
 	memcpy(key->conf.key, key_data, key_len);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 215d3ef42a4..9572e00f532 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -144,7 +144,8 @@ struct ieee80211_key {
 struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
 					  int idx,
 					  size_t key_len,
-					  const u8 *key_data);
+					  const u8 *key_data,
+					  size_t seq_len, const u8 *seq);
 /*
  * Insert a key into data structures (sdata, sta if necessary)
  * to make it used, free old key.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cad281390cf..f0fec2f4982 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1115,6 +1115,11 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 		params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]);
 	}
 
+	if (info->attrs[NL80211_ATTR_KEY_SEQ]) {
+		params.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]);
+		params.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]);
+	}
+
 	if (info->attrs[NL80211_ATTR_KEY_IDX])
 		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index ffc98a8d6e5..f98090b90fb 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -663,6 +663,11 @@ int cfg80211_wext_siwencodeext(struct net_device *dev,
 	params.key_len = ext->key_len;
 	params.cipher = cipher;
 
+	if (ext->ext_flags & IW_ENCODE_EXT_RX_SEQ_VALID) {
+		params.seq = ext->rx_seq;
+		params.seq_len = 6;
+	}
+
 	return cfg80211_set_encryption(
 			rdev, dev, addr, remove,
 			ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
-- 
cgit v1.2.3-70-g09d2


From d34c43078236b41146877c49af341ab85b5fb8db Mon Sep 17 00:00:00 2001
From: Jon Smirl <jonsmirl@gmail.com>
Date: Wed, 13 May 2009 21:59:14 -0400
Subject: ASoC: Add SNDRV_PCM_FMTBIT_S32_BE as a valid AC97 format

Signed-off-by: Jon Smirl <jonsmirl@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dai.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 496dc30457b..352d7eee9b6 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -79,7 +79,8 @@ struct snd_pcm_substream;
 #define SND_SOC_CLOCK_OUT		1
 
 #define SND_SOC_STD_AC97_FMTS (SNDRV_PCM_FMTBIT_S16_LE |\
-                               SNDRV_PCM_FMTBIT_S32_LE)
+                               SNDRV_PCM_FMTBIT_S32_LE |\
+                               SNDRV_PCM_FMTBIT_S32_BE)
 
 struct snd_soc_dai_ops;
 struct snd_soc_dai;
-- 
cgit v1.2.3-70-g09d2


From 9fc20f030ba457d20eb994e1def7e2ce7d5ae1a8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 14 May 2009 15:14:18 +0200
Subject: ALSA: ctxfi - Move PCI ID definitions to linux/pci_ids.h

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/pci_ids.h |  7 +++++++
 sound/pci/ctxfi/ctatc.c | 17 +++++++++--------
 sound/pci/ctxfi/ctdrv.h | 30 ------------------------------
 sound/pci/ctxfi/xfi.c   |  6 +++---
 4 files changed, 19 insertions(+), 41 deletions(-)
 delete mode 100644 sound/pci/ctxfi/ctdrv.h

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06ba90c211a..61915313898 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1314,6 +1314,13 @@
 
 #define PCI_VENDOR_ID_CREATIVE		0x1102 /* duplicate: ECTIVA */
 #define PCI_DEVICE_ID_CREATIVE_EMU10K1	0x0002
+#define PCI_DEVICE_ID_CREATIVE_20K1	0x0005
+#define PCI_DEVICE_ID_CREATIVE_20K2	0x000b
+#define PCI_SUBDEVICE_ID_CREATIVE_SB0760	0x0024
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08801	0x0041
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08802	0x0042
+#define PCI_SUBDEVICE_ID_CREATIVE_SB08803	0x0043
+#define PCI_SUBDEVICE_ID_CREATIVE_HENDRIX	0x6000
 
 #define PCI_VENDOR_ID_ECTIVA		0x1102 /* duplicate: CREATIVE */
 #define PCI_DEVICE_ID_ECTIVA_EV1938	0x8938
diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c
index 5f35374863f..073fe2a59da 100644
--- a/sound/pci/ctxfi/ctatc.c
+++ b/sound/pci/ctxfi/ctatc.c
@@ -18,7 +18,6 @@
 #include "ctatc.h"
 #include "ctpcm.h"
 #include "ctmixer.h"
-#include "ctdrv.h"
 #include "cthardware.h"
 #include "ctsrc.h"
 #include "ctamixer.h"
@@ -40,23 +39,25 @@
 			    | ((IEC958_AES3_CON_FS_48000) << 24))
 
 static const struct ct_atc_chip_sub_details atc_sub_details[NUM_CTCARDS] = {
-	[CTSB0760] = {.subsys = PCI_SUBSYS_CREATIVE_SB0760,
+	[CTSB0760] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB0760,
 		      .nm_model = "SB076x"},
-	[CTHENDRIX] = {.subsys = PCI_SUBSYS_CREATIVE_HENDRIX,
+	[CTHENDRIX] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_HENDRIX,
 		      .nm_model = "Hendrix"},
-	[CTSB08801] = {.subsys = PCI_SUBSYS_CREATIVE_SB08801,
+	[CTSB08801] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08801,
 		      .nm_model = "SB0880"},
-	[CTSB08802] = {.subsys = PCI_SUBSYS_CREATIVE_SB08802,
+	[CTSB08802] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08802,
 		      .nm_model = "SB0880"},
-	[CTSB08803] = {.subsys = PCI_SUBSYS_CREATIVE_SB08803,
+	[CTSB08803] = {.subsys = PCI_SUBDEVICE_ID_CREATIVE_SB08803,
 		      .nm_model = "SB0880"}
 };
 
 static struct ct_atc_chip_details atc_chip_details[] = {
-	{.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K1,
+	{.vendor = PCI_VENDOR_ID_CREATIVE,
+	 .device = PCI_DEVICE_ID_CREATIVE_20K1,
 	 .sub_details = NULL,
 	 .nm_card = "X-Fi 20k1"},
-	{.vendor = PCI_VENDOR_CREATIVE, .device = PCI_DEVICE_CREATIVE_20K2,
+	{.vendor = PCI_VENDOR_ID_CREATIVE,
+	 .device = PCI_DEVICE_ID_CREATIVE_20K2,
 	 .sub_details = atc_sub_details,
 	 .nm_card = "X-Fi 20k2"},
 	{} /* terminator */
diff --git a/sound/pci/ctxfi/ctdrv.h b/sound/pci/ctxfi/ctdrv.h
deleted file mode 100644
index f776a44f52c..00000000000
--- a/sound/pci/ctxfi/ctdrv.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Copyright (C) 2008, Creative Technology Ltd. All Rights Reserved.
- *
- * This source file is released under GPL v2 license (no other versions).
- * See the COPYING file included in the main directory of this source
- * distribution for the license terms and conditions.
- *
- * @file    ctdrv.h
- *
- * @breaf
- * This file contains the definition of card IDs supported by this driver.
- *
- * @author Liu Chun
- *
- */
-
-#ifndef CTDRV_H
-#define CTDRV_H
-
-#define PCI_VENDOR_CREATIVE		0x1102
-#define PCI_DEVICE_CREATIVE_20K1	0x0005
-#define PCI_DEVICE_CREATIVE_20K2	0x000B
-#define PCI_SUBVENDOR_CREATIVE		0x1102
-#define PCI_SUBSYS_CREATIVE_SB0760	0x0024
-#define PCI_SUBSYS_CREATIVE_SB08801	0x0041
-#define PCI_SUBSYS_CREATIVE_SB08802	0x0042
-#define PCI_SUBSYS_CREATIVE_SB08803	0x0043
-#define PCI_SUBSYS_CREATIVE_HENDRIX	0x6000
-
-#endif /* CTDRV_H */
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index f9114403651..b7368fded53 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -11,10 +11,10 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/moduleparam.h>
+#include <linux/pci_ids.h>
 #include <sound/core.h>
 #include <sound/initval.h>
 #include "ctatc.h"
-#include "ctdrv.h"
 
 MODULE_AUTHOR("Creative Technology Ltd");
 MODULE_DESCRIPTION("X-Fi driver version 1.03");
@@ -32,8 +32,8 @@ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
 static struct pci_device_id ct_pci_dev_ids[] = {
 	/* only X-Fi is supported, so... */
-	{ PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K1) },
-	{ PCI_DEVICE(PCI_VENDOR_CREATIVE, PCI_DEVICE_CREATIVE_20K2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_20K2) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ct_pci_dev_ids);
-- 
cgit v1.2.3-70-g09d2


From 077e6dba20e74a455a0452379d2a965c7e1b01ad Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Fri, 1 May 2009 21:21:02 +0200
Subject: ide-atapi: switch to rq->resid_len

Now that we have rq->resid_len, use it to account partial completion
amount during the lifetime of an rq, decrementing it on each successful
transfer. As a result, get rid of now unused pc->xferred.

While at it, remove noisy debug call in ide_prep_sense.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
---
 drivers/ide/ide-atapi.c | 19 ++++++++-----------
 drivers/ide/ide-tape.c  | 12 +++++-------
 include/linux/ide.h     |  2 --
 3 files changed, 13 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7129495b3e4..1022e421abd 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -172,8 +172,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	unsigned int cmd_len, sense_len;
 	int err;
 
-	debug_log("%s: enter\n", __func__);
-
 	switch (drive->media) {
 	case ide_floppy:
 		cmd_len = 255;
@@ -370,7 +368,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
 		} else
-			pc->xferred = blk_rq_bytes(rq);
+			rq->resid_len = 0;
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -379,7 +377,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		int uptodate, error;
 
 		debug_log("Packet command completed, %d bytes transferred\n",
-			  pc->xferred);
+			  blk_rq_bytes(rq));
 
 		pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
 
@@ -467,15 +465,15 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	ide_pio_bytes(drive, cmd, write, done);
 
 	/* Update transferred byte count */
-	pc->xferred += done;
+	rq->resid_len -= done;
 
 	bcount -= done;
 
 	if (bcount)
 		ide_pad_transfer(drive, write, bcount);
 
-	debug_log("[cmd %x] transferred %d bytes, padded %d bytes\n",
-		  rq->cmd[0], done, bcount);
+	debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n",
+		  rq->cmd[0], done, bcount, rq->resid_len);
 
 	/* And set the interrupt handler again */
 	ide_set_handler(drive, ide_pc_intr, timeout);
@@ -643,16 +641,15 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 	} else {
 		pc = drive->pc;
 
-		/* We haven't transferred any data yet */
-		pc->xferred = 0;
-
 		valid_tf = IDE_VALID_DEVICE;
 		bytes = blk_rq_bytes(rq);
-
 		bcount = ((drive->media == ide_tape) ? bytes
 						     : min_t(unsigned int,
 							     bytes, 63 * 1024));
 
+		/* We haven't transferred any data yet */
+		rq->resid_len = bcount;
+
 		if (pc->flags & PC_FLAG_DMA_ERROR) {
 			pc->flags &= ~PC_FLAG_DMA_ERROR;
 			ide_dma_off(drive);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index aaeef12f80a..c9337099797 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -301,11 +301,9 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 	debug_log(DBG_ERR, "pc = %x, sense key = %x, asc = %x, ascq = %x\n",
 		 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
 
-	/* Correct pc->xferred by asking the tape.	 */
+	/* correct remaining bytes to transfer */
 	if (pc->flags & PC_FLAG_DMA_ERROR)
-		pc->xferred = blk_rq_bytes(rq) -
-			tape->blk_size *
-			get_unaligned_be32(&sense[3]);
+		rq->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]);
 
 	/*
 	 * If error was the result of a zero-length read or write command,
@@ -339,7 +337,7 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
 			pc->flags |= PC_FLAG_ABORT;
 		}
 		if (!(pc->flags & PC_FLAG_ABORT) &&
-		    pc->xferred)
+		    (blk_rq_bytes(rq) - rq->resid_len))
 			pc->retries = IDETAPE_MAX_PC_RETRIES + 1;
 	}
 }
@@ -369,7 +367,8 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 			printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
 					"itself - Aborting request!\n");
 	} else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
-		int blocks = pc->xferred / tape->blk_size;
+		unsigned int blocks =
+			(blk_rq_bytes(rq) - rq->resid_len) / tape->blk_size;
 
 		tape->avg_size += blocks * tape->blk_size;
 
@@ -381,7 +380,6 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 		}
 
 		tape->first_frame += blocks;
-		rq->resid_len = blk_rq_bytes(rq) - blocks * tape->blk_size;
 
 		if (pc->error) {
 			uptodate = 0;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 34c128f0a33..745a393af27 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -357,8 +357,6 @@ struct ide_atapi_pc {
 
 	/* bytes to transfer */
 	int req_xfer;
-	/* bytes actually transferred */
-	int xferred;
 
 	/* data buffer */
 	u8 *buf;
-- 
cgit v1.2.3-70-g09d2


From 5a0e43b5e2ee9a295f864c38f0e853b1a4fc3892 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Fri, 1 May 2009 23:27:11 +0200
Subject: ide-atapi: add a len-parameter to ide_queue_pc_tail

This is in preparation for removing ide_atapi_pc.

There should be no functional change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
---
 drivers/ide/ide-atapi.c        | 12 ++++++------
 drivers/ide/ide-floppy.c       |  4 ++--
 drivers/ide/ide-floppy_ioctl.c |  8 ++++----
 drivers/ide/ide-tape.c         | 26 +++++++++++++-------------
 include/linux/ide.h            |  3 ++-
 5 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 1022e421abd..0d4da2c1adc 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(ide_init_pc);
  * and wait for it to be serviced.
  */
 int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
-		      struct ide_atapi_pc *pc)
+		      struct ide_atapi_pc *pc, unsigned int bufflen)
 {
 	struct request *rq;
 	int error;
@@ -93,8 +93,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->special = (char *)pc;
 
-	if (pc->req_xfer) {
-		error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+	if (bufflen) {
+		error = blk_rq_map_kern(drive->queue, rq, pc->buf, bufflen,
 					GFP_NOIO);
 		if (error)
 			goto put_req;
@@ -117,7 +117,7 @@ int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk)
 	ide_init_pc(&pc);
 	pc.c[0] = TEST_UNIT_READY;
 
-	return ide_queue_pc_tail(drive, disk, &pc);
+	return ide_queue_pc_tail(drive, disk, &pc, 0);
 }
 EXPORT_SYMBOL_GPL(ide_do_test_unit_ready);
 
@@ -132,7 +132,7 @@ int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start)
 	if (drive->media == ide_tape)
 		pc.flags |= PC_FLAG_WAIT_FOR_DSC;
 
-	return ide_queue_pc_tail(drive, disk, &pc);
+	return ide_queue_pc_tail(drive, disk, &pc, 0);
 }
 EXPORT_SYMBOL_GPL(ide_do_start_stop);
 
@@ -147,7 +147,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
 	pc.c[0] = ALLOW_MEDIUM_REMOVAL;
 	pc.c[4] = on;
 
-	return ide_queue_pc_tail(drive, disk, &pc);
+	return ide_queue_pc_tail(drive, disk, &pc, 0);
 }
 EXPORT_SYMBOL_GPL(ide_set_media_lock);
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index a1c55985d4a..5df00d4ab8d 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -318,7 +318,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive,
 
 	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
 
-	if (ide_queue_pc_tail(drive, disk, pc)) {
+	if (ide_queue_pc_tail(drive, disk, pc, pc->req_xfer)) {
 		printk(KERN_ERR PFX "Can't get flexible disk page params\n");
 		return 1;
 	}
@@ -390,7 +390,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	pc.buf = &pc_buf[0];
 	pc.buf_size = sizeof(pc_buf);
 
-	if (ide_queue_pc_tail(drive, disk, &pc)) {
+	if (ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer)) {
 		printk(KERN_ERR PFX "Can't get floppy parameters\n");
 		return 1;
 	}
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index cd8a42027ed..75f1d50276a 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -50,7 +50,7 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive,
 	pc->buf = &pc_buf[0];
 	pc->buf_size = sizeof(pc_buf);
 
-	if (ide_queue_pc_tail(drive, floppy->disk, pc)) {
+	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) {
 		printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
 		return -EIO;
 	}
@@ -124,7 +124,7 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc)
 	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE);
 	pc->flags |= PC_FLAG_SUPPRESS_ERROR;
 
-	if (ide_queue_pc_tail(drive, floppy->disk, pc))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer))
 		return 1;
 
 	if (pc->buf[8 + 2] & 0x40)
@@ -172,7 +172,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	ide_floppy_get_sfrp_bit(drive, pc);
 	ide_floppy_create_format_unit_cmd(pc, blocks, length, flags);
 
-	if (ide_queue_pc_tail(drive, floppy->disk, pc))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer))
 		err = -EIO;
 
 out:
@@ -200,7 +200,7 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive,
 
 	if (drive->atapi_flags & IDE_AFLAG_SRFP) {
 		ide_create_request_sense_cmd(drive, pc);
-		if (ide_queue_pc_tail(drive, floppy->disk, pc))
+		if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer))
 			return -EIO;
 
 		if (floppy->sense_key == 2 &&
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index c9337099797..f09a263b72f 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -770,7 +770,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive)
 	int rc;
 
 	idetape_create_write_filemark_cmd(drive, &pc, 0);
-	rc = ide_queue_pc_tail(drive, tape->disk, &pc);
+	rc = ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer);
 	if (rc)
 		return rc;
 	idetape_wait_ready(drive, 60 * 5 * HZ);
@@ -793,7 +793,7 @@ static int idetape_read_position(ide_drive_t *drive)
 	debug_log(DBG_PROCS, "Enter %s\n", __func__);
 
 	idetape_create_read_position_cmd(&pc);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc))
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer))
 		return -1;
 	position = tape->first_frame;
 	return position;
@@ -846,12 +846,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block,
 		__ide_tape_discard_merge_buffer(drive);
 	idetape_wait_ready(drive, 60 * 5 * HZ);
 	idetape_create_locate_cmd(drive, &pc, block, partition, skip);
-	retval = ide_queue_pc_tail(drive, disk, &pc);
+	retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 	if (retval)
 		return (retval);
 
 	idetape_create_read_position_cmd(&pc);
-	return ide_queue_pc_tail(drive, disk, &pc);
+	return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 }
 
 static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
@@ -1047,12 +1047,12 @@ static int idetape_rewind_tape(ide_drive_t *drive)
 	debug_log(DBG_SENSE, "Enter %s\n", __func__);
 
 	idetape_create_rewind_cmd(drive, &pc);
-	retval = ide_queue_pc_tail(drive, disk, &pc);
+	retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 	if (retval)
 		return retval;
 
 	idetape_create_read_position_cmd(&pc);
-	retval = ide_queue_pc_tail(drive, disk, &pc);
+	retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 	if (retval)
 		return retval;
 	return 0;
@@ -1120,7 +1120,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
 	case MTBSF:
 		idetape_create_space_cmd(&pc, mt_count - count,
 					 IDETAPE_SPACE_OVER_FILEMARK);
-		return ide_queue_pc_tail(drive, disk, &pc);
+		return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 	case MTFSFM:
 	case MTBSFM:
 		if (!sprev)
@@ -1259,7 +1259,7 @@ static int idetape_write_filemark(ide_drive_t *drive)
 
 	/* Write a filemark */
 	idetape_create_write_filemark_cmd(drive, &pc, 1);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: Couldn't write a filemark\n");
 		return -EIO;
 	}
@@ -1344,11 +1344,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
 			IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK);
 	case MTEOM:
 		idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD);
-		return ide_queue_pc_tail(drive, disk, &pc);
+		return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 	case MTERASE:
 		(void)idetape_rewind_tape(drive);
 		idetape_create_erase_cmd(&pc);
-		return ide_queue_pc_tail(drive, disk, &pc);
+		return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
 	case MTSETBLK:
 		if (mt_count) {
 			if (mt_count < tape->blk_size ||
@@ -1457,7 +1457,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive)
 	struct ide_atapi_pc pc;
 
 	idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: Can't get block descriptor\n");
 		if (tape->blk_size == 0) {
 			printk(KERN_WARNING "ide-tape: Cannot deal with zero "
@@ -1613,7 +1613,7 @@ static void idetape_get_inquiry_results(ide_drive_t *drive)
 	pc.buf = &pc_buf[0];
 	pc.buf_size = sizeof(pc_buf);
 
-	if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n",
 				tape->name);
 		return;
@@ -1642,7 +1642,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 	u8 speed, max_speed;
 
 	idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming"
 				" some default values\n");
 		tape->blk_size = 512;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 745a393af27..7e15bd1eaae 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1160,7 +1160,8 @@ enum {
 	REQ_IDETAPE_WRITE	= (1 << 3),
 };
 
-int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *);
+int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *,
+		      unsigned int);
 
 int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
-- 
cgit v1.2.3-70-g09d2


From b13345f39dadbabdabaf8819cf6df26913da9e8d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Sat, 2 May 2009 10:26:12 +0200
Subject: ide-atapi: add a buffer-arg to ide_queue_pc_tail

This is in preparation of removing ide_atapi_pc. Expose the buffer as an
argument to ide_queue_pc_tail with later replacing it with local buffer
or even kmalloc'ed one if needed due to stack usage constraints.

Also, add the possibility of passing a NULL-ptr buffer for cmds which
don't transfer data besides the cdb. While at it, switch to local buffer
in idetape_get_mode_sense_results().

There should be no functional change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
---
 drivers/ide/ide-atapi.c        | 12 ++++++------
 drivers/ide/ide-floppy.c       | 17 ++++++++---------
 drivers/ide/ide-floppy_ioctl.c | 16 ++++++++--------
 drivers/ide/ide-tape.c         | 37 ++++++++++++++++++-------------------
 include/linux/ide.h            |  2 +-
 5 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 0d4da2c1adc..b12be1f17f1 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(ide_init_pc);
  * and wait for it to be serviced.
  */
 int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
-		      struct ide_atapi_pc *pc, unsigned int bufflen)
+		      struct ide_atapi_pc *pc, void *buf, unsigned int bufflen)
 {
 	struct request *rq;
 	int error;
@@ -93,8 +93,8 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->special = (char *)pc;
 
-	if (bufflen) {
-		error = blk_rq_map_kern(drive->queue, rq, pc->buf, bufflen,
+	if (buf && bufflen) {
+		error = blk_rq_map_kern(drive->queue, rq, buf, bufflen,
 					GFP_NOIO);
 		if (error)
 			goto put_req;
@@ -117,7 +117,7 @@ int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk)
 	ide_init_pc(&pc);
 	pc.c[0] = TEST_UNIT_READY;
 
-	return ide_queue_pc_tail(drive, disk, &pc, 0);
+	return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 }
 EXPORT_SYMBOL_GPL(ide_do_test_unit_ready);
 
@@ -132,7 +132,7 @@ int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start)
 	if (drive->media == ide_tape)
 		pc.flags |= PC_FLAG_WAIT_FOR_DSC;
 
-	return ide_queue_pc_tail(drive, disk, &pc, 0);
+	return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 }
 EXPORT_SYMBOL_GPL(ide_do_start_stop);
 
@@ -147,7 +147,7 @@ int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
 	pc.c[0] = ALLOW_MEDIUM_REMOVAL;
 	pc.c[4] = on;
 
-	return ide_queue_pc_tail(drive, disk, &pc, 0);
+	return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 }
 EXPORT_SYMBOL_GPL(ide_set_media_lock);
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 5df00d4ab8d..be21cf23f8c 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -318,7 +318,7 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive,
 
 	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
 
-	if (ide_queue_pc_tail(drive, disk, pc, pc->req_xfer)) {
+	if (ide_queue_pc_tail(drive, disk, pc, pc->buf, pc->req_xfer)) {
 		printk(KERN_ERR PFX "Can't get flexible disk page params\n");
 		return 1;
 	}
@@ -387,22 +387,21 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	drive->capacity64 = 0;
 
 	ide_floppy_create_read_capacity_cmd(&pc);
-	pc.buf = &pc_buf[0];
 	pc.buf_size = sizeof(pc_buf);
 
-	if (ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer)) {
+	if (ide_queue_pc_tail(drive, disk, &pc, pc_buf, pc.req_xfer)) {
 		printk(KERN_ERR PFX "Can't get floppy parameters\n");
 		return 1;
 	}
-	header_len = pc.buf[3];
-	cap_desc = &pc.buf[4];
+	header_len = pc_buf[3];
+	cap_desc = &pc_buf[4];
 	desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
 
 	for (i = 0; i < desc_cnt; i++) {
 		unsigned int desc_start = 4 + i*8;
 
-		blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]);
-		length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]);
+		blocks = be32_to_cpup((__be32 *)&pc_buf[desc_start]);
+		length = be16_to_cpup((__be16 *)&pc_buf[desc_start + 6]);
 
 		ide_debug_log(IDE_DBG_PROBE, "Descriptor %d: %dkB, %d blocks, "
 					     "%d sector size",
@@ -415,7 +414,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 		 * the code below is valid only for the 1st descriptor, ie i=0
 		 */
 
-		switch (pc.buf[desc_start + 4] & 0x03) {
+		switch (pc_buf[desc_start + 4] & 0x03) {
 		/* Clik! drive returns this instead of CAPACITY_CURRENT */
 		case CAPACITY_UNFORMATTED:
 			if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
@@ -464,7 +463,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 			break;
 		}
 		ide_debug_log(IDE_DBG_PROBE, "Descriptor 0 Code: %d",
-					     pc.buf[desc_start + 4] & 0x03);
+					     pc_buf[desc_start + 4] & 0x03);
 	}
 
 	/* Clik! disk does not support get_flexible_disk_page */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index 75f1d50276a..9c2518d7514 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -47,15 +47,14 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive,
 		return -EINVAL;
 
 	ide_floppy_create_read_capacity_cmd(pc);
-	pc->buf = &pc_buf[0];
 	pc->buf_size = sizeof(pc_buf);
 
-	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer)) {
+	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc_buf, pc->req_xfer)) {
 		printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
 		return -EIO;
 	}
 
-	header_len = pc->buf[3];
+	header_len = pc_buf[3];
 	desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
 
 	u_index = 0;
@@ -72,8 +71,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive,
 		if (u_index >= u_array_size)
 			break;	/* User-supplied buffer too small */
 
-		blocks = be32_to_cpup((__be32 *)&pc->buf[desc_start]);
-		length = be16_to_cpup((__be16 *)&pc->buf[desc_start + 6]);
+		blocks = be32_to_cpup((__be32 *)&pc_buf[desc_start]);
+		length = be16_to_cpup((__be16 *)&pc_buf[desc_start + 6]);
 
 		if (put_user(blocks, argp))
 			return -EFAULT;
@@ -124,7 +123,7 @@ static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc)
 	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE);
 	pc->flags |= PC_FLAG_SUPPRESS_ERROR;
 
-	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->buf, pc->req_xfer))
 		return 1;
 
 	if (pc->buf[8 + 2] & 0x40)
@@ -172,7 +171,7 @@ static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc,
 	ide_floppy_get_sfrp_bit(drive, pc);
 	ide_floppy_create_format_unit_cmd(pc, blocks, length, flags);
 
-	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer))
+	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->buf, pc->req_xfer))
 		err = -EIO;
 
 out:
@@ -200,7 +199,8 @@ static int ide_floppy_get_format_progress(ide_drive_t *drive,
 
 	if (drive->atapi_flags & IDE_AFLAG_SRFP) {
 		ide_create_request_sense_cmd(drive, pc);
-		if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->req_xfer))
+		if (ide_queue_pc_tail(drive, floppy->disk, pc, pc->buf,
+				      pc->req_xfer))
 			return -EIO;
 
 		if (floppy->sense_key == 2 &&
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index f09a263b72f..1f7f50473a4 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -770,7 +770,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive)
 	int rc;
 
 	idetape_create_write_filemark_cmd(drive, &pc, 0);
-	rc = ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer);
+	rc = ide_queue_pc_tail(drive, tape->disk, &pc, NULL, 0);
 	if (rc)
 		return rc;
 	idetape_wait_ready(drive, 60 * 5 * HZ);
@@ -793,7 +793,7 @@ static int idetape_read_position(ide_drive_t *drive)
 	debug_log(DBG_PROCS, "Enter %s\n", __func__);
 
 	idetape_create_read_position_cmd(&pc);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer))
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.buf, pc.req_xfer))
 		return -1;
 	position = tape->first_frame;
 	return position;
@@ -846,12 +846,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block,
 		__ide_tape_discard_merge_buffer(drive);
 	idetape_wait_ready(drive, 60 * 5 * HZ);
 	idetape_create_locate_cmd(drive, &pc, block, partition, skip);
-	retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+	retval = ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 	if (retval)
 		return (retval);
 
 	idetape_create_read_position_cmd(&pc);
-	return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+	return ide_queue_pc_tail(drive, disk, &pc, pc.buf, pc.req_xfer);
 }
 
 static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
@@ -1047,12 +1047,12 @@ static int idetape_rewind_tape(ide_drive_t *drive)
 	debug_log(DBG_SENSE, "Enter %s\n", __func__);
 
 	idetape_create_rewind_cmd(drive, &pc);
-	retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+	retval = ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 	if (retval)
 		return retval;
 
 	idetape_create_read_position_cmd(&pc);
-	retval = ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+	retval = ide_queue_pc_tail(drive, disk, &pc, pc.buf, pc.req_xfer);
 	if (retval)
 		return retval;
 	return 0;
@@ -1120,7 +1120,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
 	case MTBSF:
 		idetape_create_space_cmd(&pc, mt_count - count,
 					 IDETAPE_SPACE_OVER_FILEMARK);
-		return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+		return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 	case MTFSFM:
 	case MTBSFM:
 		if (!sprev)
@@ -1259,7 +1259,7 @@ static int idetape_write_filemark(ide_drive_t *drive)
 
 	/* Write a filemark */
 	idetape_create_write_filemark_cmd(drive, &pc, 1);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, NULL, 0)) {
 		printk(KERN_ERR "ide-tape: Couldn't write a filemark\n");
 		return -EIO;
 	}
@@ -1344,11 +1344,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
 			IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK);
 	case MTEOM:
 		idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD);
-		return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+		return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 	case MTERASE:
 		(void)idetape_rewind_tape(drive);
 		idetape_create_erase_cmd(&pc);
-		return ide_queue_pc_tail(drive, disk, &pc, pc.req_xfer);
+		return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
 	case MTSETBLK:
 		if (mt_count) {
 			if (mt_count < tape->blk_size ||
@@ -1457,7 +1457,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive)
 	struct ide_atapi_pc pc;
 
 	idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.buf, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: Can't get block descriptor\n");
 		if (tape->blk_size == 0) {
 			printk(KERN_WARNING "ide-tape: Cannot deal with zero "
@@ -1610,17 +1610,16 @@ static void idetape_get_inquiry_results(ide_drive_t *drive)
 	char fw_rev[4], vendor_id[8], product_id[16];
 
 	idetape_create_inquiry_cmd(&pc);
-	pc.buf = &pc_buf[0];
 	pc.buf_size = sizeof(pc_buf);
 
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc_buf, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n",
 				tape->name);
 		return;
 	}
-	memcpy(vendor_id, &pc.buf[8], 8);
-	memcpy(product_id, &pc.buf[16], 16);
-	memcpy(fw_rev, &pc.buf[32], 4);
+	memcpy(vendor_id, &pc_buf[8], 8);
+	memcpy(product_id, &pc_buf[16], 16);
+	memcpy(fw_rev, &pc_buf[32], 4);
 
 	ide_fixstring(vendor_id, 8, 0);
 	ide_fixstring(product_id, 16, 0);
@@ -1638,11 +1637,11 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 {
 	idetape_tape_t *tape = drive->driver_data;
 	struct ide_atapi_pc pc;
-	u8 *caps;
+	u8 buf[24], *caps;
 	u8 speed, max_speed;
 
 	idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc.req_xfer)) {
+	if (ide_queue_pc_tail(drive, tape->disk, &pc, buf, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming"
 				" some default values\n");
 		tape->blk_size = 512;
@@ -1651,7 +1650,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive)
 		put_unaligned(6*52, (u16 *)&tape->caps[16]);
 		return;
 	}
-	caps = pc.buf + 4 + pc.buf[3];
+	caps = buf + 4 + buf[3];
 
 	/* convert to host order and save for later use */
 	speed = be16_to_cpup((__be16 *)&caps[14]);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 7e15bd1eaae..4cd7157a403 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1161,7 +1161,7 @@ enum {
 };
 
 int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *,
-		      unsigned int);
+		      void *, unsigned int);
 
 int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
-- 
cgit v1.2.3-70-g09d2


From 19f52a784f7ecb5b51cd73cc4514614b600b995a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Mon, 4 May 2009 09:53:03 +0200
Subject: ide-atapi: remove pc->buf

Now after all users of pc->buf have been converted, remove the 64B buffer
embedded in each packet command.

There should be no functional change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
---
 drivers/ide/ide-atapi.c        |  6 ------
 drivers/ide/ide-floppy.c       |  8 +-------
 drivers/ide/ide-floppy_ioctl.c |  1 -
 drivers/ide/ide-tape.c         |  7 ++-----
 include/linux/ide.h            | 11 -----------
 5 files changed, 3 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 66ea1e7774f..3075b041466 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -74,8 +74,6 @@ EXPORT_SYMBOL_GPL(ide_check_atapi_device);
 void ide_init_pc(struct ide_atapi_pc *pc)
 {
 	memset(pc, 0, sizeof(*pc));
-	pc->buf = pc->pc_buf;
-	pc->buf_size = IDE_PC_BUFFER_SIZE;
 }
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
@@ -254,10 +252,6 @@ void ide_retry_pc(ide_drive_t *drive)
 	ide_init_pc(pc);
 	memcpy(pc->c, sense_rq->cmd, 12);
 
-	/* pointer to mapped address */
-	pc->buf = bio_data(sense_rq->bio);
-	pc->req_xfer = blk_rq_bytes(sense_rq);
-
 	if (drive->media == ide_tape)
 		set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 14e5e9ca2ad..800c83a9db8 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -210,8 +210,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 	pc->rq = rq;
 	if (rq->cmd_flags & REQ_RW)
 		pc->flags |= PC_FLAG_WRITING;
-	pc->buf = NULL;
-	pc->buf_size = blk_rq_bytes(rq);
+
 	pc->flags |= PC_FLAG_DMA_OK;
 }
 
@@ -226,9 +225,6 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 		if (rq_data_dir(rq) == WRITE)
 			pc->flags |= PC_FLAG_WRITING;
 	}
-	/* pio will be performed by ide_pio_bytes() which handles sg fine */
-	pc->buf = NULL;
-	pc->buf_size = blk_rq_bytes(rq);
 }
 
 static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
@@ -388,8 +384,6 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
 	drive->capacity64 = 0;
 
 	ide_floppy_create_read_capacity_cmd(&pc);
-	pc.buf_size = sizeof(pc_buf);
-
 	if (ide_queue_pc_tail(drive, disk, &pc, pc_buf, pc.req_xfer)) {
 		printk(KERN_ERR PFX "Can't get floppy parameters\n");
 		return 1;
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index 3a1f9b50b3e..9c2288234de 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -47,7 +47,6 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive,
 		return -EINVAL;
 
 	ide_floppy_create_read_capacity_cmd(pc);
-	pc->buf_size = sizeof(pc_buf);
 
 	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc_buf, pc->req_xfer)) {
 		printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index ead2734bc71..9ca2665faf3 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -568,9 +568,8 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 	ide_init_pc(pc);
 	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
 	pc->c[1] = 1;
-	pc->buf = NULL;
-	pc->buf_size = blk_rq_bytes(rq);
-	if (pc->buf_size == tape->buffer_size)
+
+	if (blk_rq_bytes(rq) == tape->buffer_size)
 		pc->flags |= PC_FLAG_DMA_OK;
 
 	if (opcode == READ_6)
@@ -1608,8 +1607,6 @@ static void idetape_get_inquiry_results(ide_drive_t *drive)
 	char fw_rev[4], vendor_id[8], product_id[16];
 
 	idetape_create_inquiry_cmd(&pc);
-	pc.buf_size = sizeof(pc_buf);
-
 	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc_buf, pc.req_xfer)) {
 		printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n",
 				tape->name);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 4cd7157a403..59aedcd7fae 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -341,11 +341,6 @@ enum {
 	PC_FLAG_WRITING			= (1 << 6),
 };
 
-/*
- * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes.
- * This is used for several packet commands (not for READ/WRITE commands).
- */
-#define IDE_PC_BUFFER_SIZE	64
 #define ATAPI_WAIT_PC		(60 * HZ)
 
 struct ide_atapi_pc {
@@ -358,10 +353,6 @@ struct ide_atapi_pc {
 	/* bytes to transfer */
 	int req_xfer;
 
-	/* data buffer */
-	u8 *buf;
-	int buf_size;
-
 	/* the corresponding request */
 	struct request *rq;
 
@@ -371,8 +362,6 @@ struct ide_atapi_pc {
 	 * those are more or less driver-specific and some of them are subject
 	 * to change/removal later.
 	 */
-	u8 pc_buf[IDE_PC_BUFFER_SIZE];
-
 	unsigned long timeout;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 103f7033bd0f7b65ff3e0a5ea72449d08010b031 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Sun, 26 Apr 2009 10:39:07 +0200
Subject: ide: unify interrupt reason checking

Add ide_check_ireason() function that handles all ATAPI devices.
Reorganize all unlikely cases in ireason checking further down in the
code path.

In addition, add PFX for printks originating from ide-atapi. Finally,
remove ide_cd_check_ireason.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
---
 drivers/ide/ide-atapi.c | 95 +++++++++++++++++++++++++++++++++++--------------
 drivers/ide/ide-cd.c    | 47 +-----------------------
 include/linux/ide.h     |  2 ++
 3 files changed, 71 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 3075b041466..1125ce29809 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -10,6 +10,9 @@
 
 #include <scsi/scsi.h>
 
+#define DRV_NAME "ide-atapi"
+#define PFX DRV_NAME ": "
+
 #ifdef DEBUG
 #define debug_log(fmt, args...) \
 	printk(KERN_INFO "ide: " fmt, ## args)
@@ -197,8 +200,8 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 			      GFP_NOIO);
 	if (unlikely(err)) {
 		if (printk_ratelimit())
-			printk(KERN_WARNING "%s: failed to map sense buffer\n",
-			       drive->name);
+			printk(KERN_WARNING PFX "%s: failed to map sense "
+					    "buffer\n", drive->name);
 		return;
 	}
 
@@ -219,7 +222,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 {
 	/* deferred failure from ide_prep_sense() */
 	if (!drive->sense_rq_armed) {
-		printk(KERN_WARNING "%s: failed queue sense request\n",
+		printk(KERN_WARNING PFX "%s: error queuing a sense request\n",
 		       drive->name);
 		return -ENOMEM;
 	}
@@ -292,7 +295,7 @@ int ide_cd_expiry(ide_drive_t *drive)
 		break;
 	default:
 		if (!(rq->cmd_flags & REQ_QUIET))
-			printk(KERN_INFO "cmd 0x%x timed out\n",
+			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
 					 rq->cmd[0]);
 		wait = 0;
 		break;
@@ -325,6 +328,55 @@ void ide_read_bcount_and_ireason(ide_drive_t *drive, u16 *bcount, u8 *ireason)
 }
 EXPORT_SYMBOL_GPL(ide_read_bcount_and_ireason);
 
+/*
+ * Check the contents of the interrupt reason register and attempt to recover if
+ * there are problems.
+ *
+ * Returns:
+ * - 0 if everything's ok
+ * - 1 if the request has to be terminated.
+ */
+int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len,
+		      int ireason, int rw)
+{
+	ide_hwif_t *hwif = drive->hwif;
+
+	debug_log("ireason: 0x%x, rw: 0x%x\n", ireason, rw);
+
+	if (ireason == (!rw << 1))
+		return 0;
+	else if (ireason == (rw << 1)) {
+		printk(KERN_ERR PFX "%s: %s: wrong transfer direction!\n",
+				drive->name, __func__);
+
+		if (dev_is_idecd(drive))
+			ide_pad_transfer(drive, rw, len);
+	} else if (!rw && ireason == ATAPI_COD) {
+		if (dev_is_idecd(drive)) {
+			/*
+			 * Some drives (ASUS) seem to tell us that status info
+			 * is available.  Just get it and ignore.
+			 */
+			(void)hwif->tp_ops->read_status(hwif);
+			return 0;
+		}
+	} else {
+		if (ireason & ATAPI_COD)
+			printk(KERN_ERR PFX "%s: CoD != 0 in %s\n", drive->name,
+					__func__);
+
+		/* drive wants a command packet, or invalid ireason... */
+		printk(KERN_ERR PFX "%s: %s: bad interrupt reason 0x%02x\n",
+				drive->name, __func__, ireason);
+	}
+
+	if (dev_is_idecd(drive) && rq->cmd_type == REQ_TYPE_ATA_PC)
+		rq->cmd_flags |= REQ_FAILED;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ide_check_ireason);
+
 /*
  * This is the usual interrupt handler which will be called during a packet
  * command.  We will transfer some of the data (as requested by the drive)
@@ -359,7 +411,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		if (rc || (drive->media == ide_tape && (stat & ATA_ERR))) {
 			if (drive->media == ide_floppy)
-				printk(KERN_ERR "%s: DMA %s error\n",
+				printk(KERN_ERR PFX "%s: DMA %s error\n",
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
@@ -391,8 +443,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 				pc->rq->errors++;
 
 			if (rq->cmd[0] == REQUEST_SENSE) {
-				printk(KERN_ERR "%s: I/O error in request sense"
-						" command\n", drive->name);
+				printk(KERN_ERR PFX "%s: I/O error in request "
+						"sense command\n", drive->name);
 				return ide_do_reset(drive);
 			}
 
@@ -434,8 +486,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
 		pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-		printk(KERN_ERR "%s: The device wants to issue more interrupts "
-				"in DMA mode\n", drive->name);
+		printk(KERN_ERR PFX "%s: The device wants to issue more "
+				"interrupts in DMA mode\n", drive->name);
 		ide_dma_off(drive);
 		return ide_do_reset(drive);
 	}
@@ -443,19 +495,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	/* Get the number of bytes to transfer on this interrupt. */
 	ide_read_bcount_and_ireason(drive, &bcount, &ireason);
 
-	if (ireason & ATAPI_COD) {
-		printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__);
+	if (ide_check_ireason(drive, rq, bcount, ireason, write))
 		return ide_do_reset(drive);
-	}
-
-	if (((ireason & ATAPI_IO) == ATAPI_IO) == write) {
-		/* Hopefully, we will never get here */
-		printk(KERN_ERR "%s: We wanted to %s, but the device wants us "
-				"to %s!\n", drive->name,
-				(ireason & ATAPI_IO) ? "Write" : "Read",
-				(ireason & ATAPI_IO) ? "Read" : "Write");
-		return ide_do_reset(drive);
-	}
 
 	done = min_t(unsigned int, bcount, cmd->nleft);
 	ide_pio_bytes(drive, cmd, write, done);
@@ -503,13 +544,13 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason)
 
 	while (retries-- && ((ireason & ATAPI_COD) == 0 ||
 		(ireason & ATAPI_IO))) {
-		printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
+		printk(KERN_ERR PFX "%s: (IO,CoD != (0,1) while issuing "
 				"a packet command, retrying\n", drive->name);
 		udelay(100);
 		ireason = ide_read_ireason(drive);
 		if (retries == 0) {
-			printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
-					"a packet command, ignoring\n",
+			printk(KERN_ERR PFX "%s: (IO,CoD != (0,1) while issuing"
+					" a packet command, ignoring\n",
 					drive->name);
 			ireason |= ATAPI_COD;
 			ireason &= ~ATAPI_IO;
@@ -540,7 +581,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	u8 ireason;
 
 	if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) {
-		printk(KERN_ERR "%s: Strange, packet command initiated yet "
+		printk(KERN_ERR PFX "%s: Strange, packet command initiated yet "
 				"DRQ isn't asserted\n", drive->name);
 		return startstop;
 	}
@@ -582,8 +623,8 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 			ireason = ide_wait_ireason(drive, ireason);
 
 		if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
-			printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
-					"a packet command\n", drive->name);
+			printk(KERN_ERR PFX "%s: (IO,CoD) != (0,1) while "
+				"issuing a packet command\n", drive->name);
 
 			return ide_do_reset(drive);
 		}
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index dca41ae0d04..d299713bfdc 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -410,50 +410,6 @@ end_request:
 		return 2;
 }
 
-/*
- * Check the contents of the interrupt reason register from the cdrom
- * and attempt to recover if there are problems.  Returns  0 if everything's
- * ok; nonzero if the request has been terminated.
- */
-static int ide_cd_check_ireason(ide_drive_t *drive, struct request *rq,
-				int len, int ireason, int rw)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	ide_debug_log(IDE_DBG_FUNC, "ireason: 0x%x, rw: 0x%x", ireason, rw);
-
-	/*
-	 * ireason == 0: the drive wants to receive data from us
-	 * ireason == 2: the drive is expecting to transfer data to us
-	 */
-	if (ireason == (!rw << 1))
-		return 0;
-	else if (ireason == (rw << 1)) {
-
-		/* whoops... */
-		printk(KERN_ERR PFX "%s: %s: wrong transfer direction!\n",
-				drive->name, __func__);
-
-		ide_pad_transfer(drive, rw, len);
-	} else  if (rw == 0 && ireason == 1) {
-		/*
-		 * Some drives (ASUS) seem to tell us that status info is
-		 * available.  Just get it and ignore.
-		 */
-		(void)hwif->tp_ops->read_status(hwif);
-		return 0;
-	} else {
-		/* drive wants a command packet, or invalid ireason... */
-		printk(KERN_ERR PFX "%s: %s: bad interrupt reason 0x%02x\n",
-				drive->name, __func__, ireason);
-	}
-
-	if (rq->cmd_type == REQ_TYPE_ATA_PC)
-		rq->cmd_flags |= REQ_FAILED;
-
-	return -1;
-}
-
 static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 {
 	struct request *rq = cmd->rq;
@@ -645,8 +601,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 		goto out_end;
 	}
 
-	/* check which way to transfer data */
-	rc = ide_cd_check_ireason(drive, rq, len, ireason, write);
+	rc = ide_check_ireason(drive, rq, len, ireason, write);
 	if (rc)
 		goto out_end;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 59aedcd7fae..70a2c94d668 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1125,6 +1125,8 @@ void SELECT_MASK(ide_drive_t *, int);
 u8 ide_read_error(ide_drive_t *);
 void ide_read_bcount_and_ireason(ide_drive_t *, u16 *, u8 *);
 
+int ide_check_ireason(ide_drive_t *, struct request *, int, int, int);
+
 int ide_check_atapi_device(ide_drive_t *, const char *);
 
 void ide_init_pc(struct ide_atapi_pc *);
-- 
cgit v1.2.3-70-g09d2


From 9e35ad388bea89f7d6f375af4c0ae98803688666 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 13 May 2009 16:21:38 +0200
Subject: perf_counter: Rework the perf counter disable/enable

The current disable/enable mechanism is:

	token = hw_perf_save_disable();
	...
	/* do bits */
	...
	hw_perf_restore(token);

This works well, provided that the use nests properly. Except we don't.

x86 NMI/INT throttling has non-nested use of this, breaking things. Therefore
provide a reference counter disable/enable interface, where the first disable
disables the hardware, and the last enable enables the hardware again.

[ Impact: refactor, simplify the PMU disable/enable logic ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  24 ++++----
 arch/x86/kernel/cpu/perf_counter.c | 113 ++++++++++++++-----------------------
 drivers/acpi/processor_idle.c      |   6 +-
 include/linux/perf_counter.h       |  10 ++--
 kernel/perf_counter.c              |  76 +++++++++++++++----------
 5 files changed, 109 insertions(+), 120 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 15cdc8e6722..bb1b463c136 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -386,7 +386,7 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
  * Disable all counters to prevent PMU interrupts and to allow
  * counters to be added or removed.
  */
-u64 hw_perf_save_disable(void)
+void hw_perf_disable(void)
 {
 	struct cpu_hw_counters *cpuhw;
 	unsigned long ret;
@@ -428,7 +428,6 @@ u64 hw_perf_save_disable(void)
 		mb();
 	}
 	local_irq_restore(flags);
-	return ret;
 }
 
 /*
@@ -436,7 +435,7 @@ u64 hw_perf_save_disable(void)
  * If we were previously disabled and counters were added, then
  * put the new config on the PMU.
  */
-void hw_perf_restore(u64 disable)
+void hw_perf_enable(void)
 {
 	struct perf_counter *counter;
 	struct cpu_hw_counters *cpuhw;
@@ -448,9 +447,12 @@ void hw_perf_restore(u64 disable)
 	int n_lim;
 	int idx;
 
-	if (disable)
-		return;
 	local_irq_save(flags);
+	if (!cpuhw->disabled) {
+		local_irq_restore(flags);
+		return;
+	}
+
 	cpuhw = &__get_cpu_var(cpu_hw_counters);
 	cpuhw->disabled = 0;
 
@@ -649,19 +651,18 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
 /*
  * Add a counter to the PMU.
  * If all counters are not already frozen, then we disable and
- * re-enable the PMU in order to get hw_perf_restore to do the
+ * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
  */
 static int power_pmu_enable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	unsigned long flags;
-	u64 pmudis;
 	int n0;
 	int ret = -EAGAIN;
 
 	local_irq_save(flags);
-	pmudis = hw_perf_save_disable();
+	perf_disable();
 
 	/*
 	 * Add the counter to the list (if there is room)
@@ -685,7 +686,7 @@ static int power_pmu_enable(struct perf_counter *counter)
 
 	ret = 0;
  out:
-	hw_perf_restore(pmudis);
+	perf_enable();
 	local_irq_restore(flags);
 	return ret;
 }
@@ -697,11 +698,10 @@ static void power_pmu_disable(struct perf_counter *counter)
 {
 	struct cpu_hw_counters *cpuhw;
 	long i;
-	u64 pmudis;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	pmudis = hw_perf_save_disable();
+	perf_disable();
 
 	power_pmu_read(counter);
 
@@ -735,7 +735,7 @@ static void power_pmu_disable(struct perf_counter *counter)
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 	}
 
-	hw_perf_restore(pmudis);
+	perf_enable();
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 7601c014f8f..313638cecbb 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -31,7 +31,6 @@ struct cpu_hw_counters {
 	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
-	u64			throttle_ctrl;
 	int			enabled;
 };
 
@@ -42,8 +41,8 @@ struct x86_pmu {
 	const char	*name;
 	int		version;
 	int		(*handle_irq)(struct pt_regs *, int);
-	u64		(*save_disable_all)(void);
-	void		(*restore_all)(u64);
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
 	void		(*enable)(struct hw_perf_counter *, int);
 	void		(*disable)(struct hw_perf_counter *, int);
 	unsigned	eventsel;
@@ -56,6 +55,7 @@ struct x86_pmu {
 	int		counter_bits;
 	u64		counter_mask;
 	u64		max_period;
+	u64		intel_ctrl;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -311,22 +311,19 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	return 0;
 }
 
-static u64 intel_pmu_save_disable_all(void)
+static void intel_pmu_disable_all(void)
 {
-	u64 ctrl;
-
-	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	return ctrl;
 }
 
-static u64 amd_pmu_save_disable_all(void)
+static void amd_pmu_disable_all(void)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int enabled, idx;
+	int idx;
+
+	if (!cpuc->enabled)
+		return;
 
-	enabled = cpuc->enabled;
 	cpuc->enabled = 0;
 	/*
 	 * ensure we write the disable before we start disabling the
@@ -334,8 +331,6 @@ static u64 amd_pmu_save_disable_all(void)
 	 * right thing.
 	 */
 	barrier();
-	if (!enabled)
-		goto out;
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
@@ -348,37 +343,31 @@ static u64 amd_pmu_save_disable_all(void)
 		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
 		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
 	}
-
-out:
-	return enabled;
 }
 
-u64 hw_perf_save_disable(void)
+void hw_perf_disable(void)
 {
 	if (!x86_pmu_initialized())
-		return 0;
-	return x86_pmu.save_disable_all();
+		return;
+	return x86_pmu.disable_all();
 }
-/*
- * Exported because of ACPI idle
- */
-EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
-static void intel_pmu_restore_all(u64 ctrl)
+static void intel_pmu_enable_all(void)
 {
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 }
 
-static void amd_pmu_restore_all(u64 ctrl)
+static void amd_pmu_enable_all(void)
 {
 	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
 	int idx;
 
-	cpuc->enabled = ctrl;
-	barrier();
-	if (!ctrl)
+	if (cpuc->enabled)
 		return;
 
+	cpuc->enabled = 1;
+	barrier();
+
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		u64 val;
 
@@ -392,16 +381,12 @@ static void amd_pmu_restore_all(u64 ctrl)
 	}
 }
 
-void hw_perf_restore(u64 ctrl)
+void hw_perf_enable(void)
 {
 	if (!x86_pmu_initialized())
 		return;
-	x86_pmu.restore_all(ctrl);
+	x86_pmu.enable_all();
 }
-/*
- * Exported because of ACPI idle
- */
-EXPORT_SYMBOL_GPL(hw_perf_restore);
 
 static inline u64 intel_pmu_get_status(void)
 {
@@ -735,15 +720,14 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	int bit, cpu = smp_processor_id();
 	u64 ack, status;
 	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
-	int ret = 0;
-
-	cpuc->throttle_ctrl = intel_pmu_save_disable_all();
 
+	perf_disable();
 	status = intel_pmu_get_status();
-	if (!status)
-		goto out;
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
 
-	ret = 1;
 again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
@@ -767,19 +751,11 @@ again:
 	status = intel_pmu_get_status();
 	if (status)
 		goto again;
-out:
-	/*
-	 * Restore - do not reenable when global enable is off or throttled:
-	 */
-	if (cpuc->throttle_ctrl) {
-		if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) {
-			intel_pmu_restore_all(cpuc->throttle_ctrl);
-		} else {
-			pr_info("CPU#%d: perfcounters: max interrupt rate exceeded! Throttle on.\n", smp_processor_id());
-		}
-	}
 
-	return ret;
+	if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS)
+		perf_enable();
+
+	return 1;
 }
 
 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
@@ -792,13 +768,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	struct hw_perf_counter *hwc;
 	int idx, throttle = 0;
 
-	cpuc->throttle_ctrl = cpuc->enabled;
-	cpuc->enabled = 0;
-	barrier();
-
-	if (cpuc->throttle_ctrl) {
-		if (++cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
-			throttle = 1;
+	if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
+		throttle = 1;
+		__perf_disable();
+		cpuc->enabled = 0;
+		barrier();
 	}
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -824,9 +798,6 @@ next:
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
-	if (cpuc->throttle_ctrl && !throttle)
-		cpuc->enabled = 1;
-
 	return handled;
 }
 
@@ -839,13 +810,11 @@ void perf_counter_unthrottle(void)
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
-		pr_info("CPU#%d: perfcounters: throttle off.\n", smp_processor_id());
-
 		/*
 		 * Clear them before re-enabling irqs/NMIs again:
 		 */
 		cpuc->interrupts = 0;
-		hw_perf_restore(cpuc->throttle_ctrl);
+		perf_enable();
 	} else {
 		cpuc->interrupts = 0;
 	}
@@ -931,8 +900,8 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 static struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
-	.save_disable_all	= intel_pmu_save_disable_all,
-	.restore_all		= intel_pmu_restore_all,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
 	.enable			= intel_pmu_enable_counter,
 	.disable		= intel_pmu_disable_counter,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
@@ -951,8 +920,8 @@ static struct x86_pmu intel_pmu = {
 static struct x86_pmu amd_pmu = {
 	.name			= "AMD",
 	.handle_irq		= amd_pmu_handle_irq,
-	.save_disable_all	= amd_pmu_save_disable_all,
-	.restore_all		= amd_pmu_restore_all,
+	.disable_all		= amd_pmu_disable_all,
+	.enable_all		= amd_pmu_enable_all,
 	.enable			= amd_pmu_enable_counter,
 	.disable		= amd_pmu_disable_counter,
 	.eventsel		= MSR_K7_EVNTSEL0,
@@ -1003,6 +972,8 @@ static int intel_pmu_init(void)
 	x86_pmu.counter_bits = eax.split.bit_width;
 	x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
 
+	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
 	return 0;
 }
 
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index d2830f39d46..9645758c047 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -763,11 +763,9 @@ static int acpi_idle_bm_check(void)
  */
 static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 {
-	u64 perf_flags;
-
 	/* Don't trace irqs off for idle */
 	stop_critical_timings();
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 	if (cx->entry_method == ACPI_CSTATE_FFH) {
 		/* Call into architectural FFH based C-state */
 		acpi_processor_ffh_cstate_enter(cx);
@@ -782,7 +780,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		   gets asserted in time to freeze execution properly. */
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
-	hw_perf_restore(perf_flags);
+	perf_enable();
 	start_critical_timings();
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 614f921d616..e543ecc129f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -544,8 +544,10 @@ extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void perf_counter_unthrottle(void);
-extern u64 hw_perf_save_disable(void);
-extern void hw_perf_restore(u64 ctrl);
+extern void __perf_disable(void);
+extern bool __perf_enable(void);
+extern void perf_disable(void);
+extern void perf_enable(void);
 extern int perf_counter_task_disable(void);
 extern int perf_counter_task_enable(void);
 extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -600,8 +602,8 @@ static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_counter_unthrottle(void)			{ }
-static inline void hw_perf_restore(u64 ctrl)				{ }
-static inline u64 hw_perf_save_disable(void)		      { return 0; }
+static inline void perf_disable(void)					{ }
+static inline void perf_enable(void)					{ }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
 static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 985be0b662a..e814ff04d7c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -60,8 +60,9 @@ extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counte
 	return NULL;
 }
 
-u64 __weak hw_perf_save_disable(void)		{ return 0; }
-void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
+void __weak hw_perf_disable(void)		{ barrier(); }
+void __weak hw_perf_enable(void)		{ barrier(); }
+
 void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
 int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
@@ -72,6 +73,32 @@ int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 
 void __weak perf_counter_print_debug(void)	{ }
 
+static DEFINE_PER_CPU(int, disable_count);
+
+void __perf_disable(void)
+{
+	__get_cpu_var(disable_count)++;
+}
+
+bool __perf_enable(void)
+{
+	return !--__get_cpu_var(disable_count);
+}
+
+void perf_disable(void)
+{
+	__perf_disable();
+	hw_perf_disable();
+}
+EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
+
+void perf_enable(void)
+{
+	if (__perf_enable())
+		hw_perf_enable();
+}
+EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
+
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -170,7 +197,6 @@ static void __perf_counter_remove_from_context(void *info)
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
 	unsigned long flags;
-	u64 perf_flags;
 
 	/*
 	 * If this is a task context, we need to check whether it is
@@ -191,9 +217,9 @@ static void __perf_counter_remove_from_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 	list_del_counter(counter, ctx);
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	if (!ctx->task) {
 		/*
@@ -538,7 +564,6 @@ static void __perf_install_in_context(void *info)
 	struct perf_counter *leader = counter->group_leader;
 	int cpu = smp_processor_id();
 	unsigned long flags;
-	u64 perf_flags;
 	int err;
 
 	/*
@@ -556,7 +581,7 @@ static void __perf_install_in_context(void *info)
 	 * Protect the list operation against NMI by disabling the
 	 * counters on a global level. NOP for non NMI based counters.
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 
 	add_counter_to_ctx(counter, ctx);
 
@@ -596,7 +621,7 @@ static void __perf_install_in_context(void *info)
 		cpuctx->max_pertask--;
 
  unlock:
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
 }
@@ -663,7 +688,6 @@ static void __perf_counter_enable(void *info)
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_counter_context *ctx = counter->ctx;
 	struct perf_counter *leader = counter->group_leader;
-	unsigned long pmuflags;
 	unsigned long flags;
 	int err;
 
@@ -693,14 +717,14 @@ static void __perf_counter_enable(void *info)
 	if (!group_can_go_on(counter, cpuctx, 1)) {
 		err = -EEXIST;
 	} else {
-		pmuflags = hw_perf_save_disable();
+		perf_disable();
 		if (counter == leader)
 			err = group_sched_in(counter, cpuctx, ctx,
 					     smp_processor_id());
 		else
 			err = counter_sched_in(counter, cpuctx, ctx,
 					       smp_processor_id());
-		hw_perf_restore(pmuflags);
+		perf_enable();
 	}
 
 	if (err) {
@@ -795,7 +819,6 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
 	struct perf_counter *counter;
-	u64 flags;
 
 	spin_lock(&ctx->lock);
 	ctx->is_active = 0;
@@ -803,12 +826,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 		goto out;
 	update_context_time(ctx);
 
-	flags = hw_perf_save_disable();
+	perf_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
-	hw_perf_restore(flags);
+	perf_enable();
  out:
 	spin_unlock(&ctx->lock);
 }
@@ -860,7 +883,6 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
 {
 	struct perf_counter *counter;
-	u64 flags;
 	int can_add_hw = 1;
 
 	spin_lock(&ctx->lock);
@@ -870,7 +892,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 
 	ctx->timestamp = perf_clock();
 
-	flags = hw_perf_save_disable();
+	perf_disable();
 
 	/*
 	 * First go through the list and put on any pinned groups
@@ -917,7 +939,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 				can_add_hw = 0;
 		}
 	}
-	hw_perf_restore(flags);
+	perf_enable();
  out:
 	spin_unlock(&ctx->lock);
 }
@@ -955,7 +977,6 @@ int perf_counter_task_disable(void)
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
 	unsigned long flags;
-	u64 perf_flags;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
@@ -969,7 +990,7 @@ int perf_counter_task_disable(void)
 	/*
 	 * Disable all the counters:
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (counter->state != PERF_COUNTER_STATE_ERROR) {
@@ -978,7 +999,7 @@ int perf_counter_task_disable(void)
 		}
 	}
 
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
 
@@ -991,7 +1012,6 @@ int perf_counter_task_enable(void)
 	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
 	struct perf_counter *counter;
 	unsigned long flags;
-	u64 perf_flags;
 	int cpu;
 
 	if (likely(!ctx->nr_counters))
@@ -1007,7 +1027,7 @@ int perf_counter_task_enable(void)
 	/*
 	 * Disable all the counters:
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (counter->state > PERF_COUNTER_STATE_OFF)
@@ -1017,7 +1037,7 @@ int perf_counter_task_enable(void)
 			ctx->time - counter->total_time_enabled;
 		counter->hw_event.disabled = 0;
 	}
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock(&ctx->lock);
 
@@ -1034,7 +1054,6 @@ int perf_counter_task_enable(void)
 static void rotate_ctx(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
-	u64 perf_flags;
 
 	if (!ctx->nr_counters)
 		return;
@@ -1043,12 +1062,12 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 	/*
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
-	perf_flags = hw_perf_save_disable();
+	perf_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		list_move_tail(&counter->list_entry, &ctx->counter_list);
 		break;
 	}
-	hw_perf_restore(perf_flags);
+	perf_enable();
 
 	spin_unlock(&ctx->lock);
 }
@@ -3194,7 +3213,6 @@ __perf_counter_exit_task(struct task_struct *child,
 	} else {
 		struct perf_cpu_context *cpuctx;
 		unsigned long flags;
-		u64 perf_flags;
 
 		/*
 		 * Disable and unlink this counter.
@@ -3203,7 +3221,7 @@ __perf_counter_exit_task(struct task_struct *child,
 		 * could still be processing it:
 		 */
 		local_irq_save(flags);
-		perf_flags = hw_perf_save_disable();
+		perf_disable();
 
 		cpuctx = &__get_cpu_var(perf_cpu_context);
 
@@ -3214,7 +3232,7 @@ __perf_counter_exit_task(struct task_struct *child,
 
 		child_ctx->nr_counters--;
 
-		hw_perf_restore(perf_flags);
+		perf_enable();
 		local_irq_restore(flags);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 789f90fcf6b0b54e655740e9396c954378542c79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:27 +0200
Subject: perf_counter: per user mlock gift

Instead of a per-process mlock gift for perf-counters, use a
per-user gift so that there is less of a DoS potential.

[ Impact: allow less worst-case unprivileged memory consumption ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.496182835@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/perf_counter.c | 22 +++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d1857580a13..ff59d123151 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,6 +674,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0173738dd54..93f4a0e4b87 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
@@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_mmap_vmops = {
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	struct user_struct *user = current_user();
 	unsigned long vma_size;
 	unsigned long nr_pages;
+	unsigned long user_locked, user_lock_limit;
 	unsigned long locked, lock_limit;
+	long user_extra, extra;
 	int ret = 0;
-	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	extra = nr_pages /* + 1 only account the data pages */;
-	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-	if (extra < 0)
-		extra = 0;
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-	locked = vma->vm_mm->locked_vm + extra;
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
@@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
 unlock:
-- 
cgit v1.2.3-70-g09d2


From 60db5e09c13109b13830cc9dcae688003fd39e79 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 May 2009 15:19:28 +0200
Subject: perf_counter: frequency based adaptive irq_period

Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

[ Impact: new perf-counter attribute/feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.646195868@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c | 13 ++++----
 arch/x86/kernel/cpu/perf_counter.c |  9 ++----
 include/linux/perf_counter.h       | 10 ++++--
 kernel/perf_counter.c              | 63 ++++++++++++++++++++++++++++++--------
 4 files changed, 68 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bb1b463c136..db8d5cafc15 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw_event.irq_period) {
+		if (counter->hw.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if ((s64)counter->hw_event.irq_period < 0)
-		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
+	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	val = 0;
 	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (counter->hw_event.irq_period) {
+	if (period) {
 		if (left <= 0) {
-			left += counter->hw_event.irq_period;
+			left += period;
 			if (left <= 0)
-				left = counter->hw_event.irq_period;
+				left = period;
 			record = 1;
 		}
 		if (left < 0x80000000L)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a7f718eb1e..886dcf334bc 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->nmi = 1;
 	}
 
-	hwc->irq_period	= hw_event->irq_period;
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-		hwc->irq_period = x86_pmu.max_period;
-
-	atomic64_set(&hwc->period_left, hwc->irq_period);
+	atomic64_set(&hwc->period_left,
+			min(x86_pmu.max_period, hwc->irq_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = min(x86_pmu.max_period, hwc->irq_period);
 	int err;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e543ecc129f..004b6e162b9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
 	 */
 	__u64			config;
 
-	__u64			irq_period;
+	union {
+		__u64		irq_period;
+		__u64		irq_freq;
+	};
+
 	__u32			record_type;
 	__u32			read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
+	u64				interrupts;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 93f4a0e4b87..0ad1db4f3d6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	u64 irq_period;
+	u64 events, period;
+	s64 delta;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+			continue;
+
+		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		period = div64_u64(events, counter->hw_event.irq_freq);
+
+		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta >>= 1;
+
+		irq_period = counter->hw.irq_period + delta;
+
+		if (!irq_period)
+			irq_period = 1;
+
+		counter->hw.irq_period = irq_period;
+		counter->hw.interrupts = 0;
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = &curr->perf_counter_ctx;
 
+	perf_adjust_freq(&cpuctx->ctx);
+	perf_adjust_freq(ctx);
+
 	perf_counter_cpu_sched_out(cpuctx);
 	__perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->hw.interrupts++;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
+	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			ret = HRTIMER_NORESTART;
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+	period = max_t(u64, 10000, counter->hw.irq_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct pmu *pmu = NULL;
-	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
 		/*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		else
 			pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		break;
 	}
 
-	if (pmu)
-		hwc->irq_period = hw_event->irq_period;
-
 	return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 {
 	const struct pmu *pmu;
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	pmu = NULL;
 
+	hwc = &counter->hw;
+	if (hw_event->freq && hw_event->irq_freq)
+		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+	else
+		hwc->irq_period = hw_event->irq_period;
+
 	/*
 	 * we currently do not support PERF_RECORD_GROUP on inherited counters
 	 */
-- 
cgit v1.2.3-70-g09d2


From dce48a84adf1806676319f6f480e30a6daa012f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 11 Apr 2009 10:43:41 +0200
Subject: sched, timers: move calc_load() to scheduler

Dimitri Sivanich noticed that xtime_lock is held write locked across
calc_load() which iterates over all online CPUs. That can cause long
latencies for xtime_lock readers on large SMP systems.

The load average calculation is an rough estimate anyway so there is
no real need to protect the readers vs. the update. It's not a problem
when the avenrun array is updated while a reader copies the values.

Instead of iterating over all online CPUs let the scheduler_tick code
update the number of active tasks shortly before the avenrun update
happens. The avenrun update itself is handled by the CPU which calls
do_timer().

[ Impact: reduce xtime_lock write locked section ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/sched.h     |  2 +-
 kernel/sched.c            | 84 +++++++++++++++++++++++++++++++++++++++++------
 kernel/sched_idletask.c   |  3 +-
 kernel/time/timekeeping.c |  2 +-
 kernel/timer.c            | 54 ++----------------------------
 5 files changed, 80 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049..6eb4892efe4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -135,8 +135,8 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
-extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern void calc_global_load(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 8908d190a34..f4eb88153bd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -630,6 +630,10 @@ struct rq {
 	struct list_head migration_queue;
 #endif
 
+	/* calc_load related fields */
+	unsigned long calc_load_update;
+	long calc_load_active;
+
 #ifdef CONFIG_SCHED_HRTICK
 #ifdef CONFIG_SMP
 	int hrtick_csd_pending;
@@ -1728,6 +1732,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 }
 #endif
 
+static void calc_load_account_active(struct rq *this_rq);
+
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
@@ -2856,19 +2862,57 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
-unsigned long nr_active(void)
+/* Variables and functions for calc_load */
+static atomic_long_t calc_load_tasks;
+static unsigned long calc_load_update;
+unsigned long avenrun[3];
+EXPORT_SYMBOL(avenrun);
+
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
 {
-	unsigned long i, running = 0, uninterruptible = 0;
+	load *= exp;
+	load += active * (FIXED_1 - exp);
+	return load >> FSHIFT;
+}
 
-	for_each_online_cpu(i) {
-		running += cpu_rq(i)->nr_running;
-		uninterruptible += cpu_rq(i)->nr_uninterruptible;
-	}
+/*
+ * calc_load - update the avenrun load estimates 10 ticks after the
+ * CPUs have updated calc_load_tasks.
+ */
+void calc_global_load(void)
+{
+	unsigned long upd = calc_load_update + 10;
+	long active;
+
+	if (time_before(jiffies, upd))
+		return;
 
-	if (unlikely((long)uninterruptible < 0))
-		uninterruptible = 0;
+	active = atomic_long_read(&calc_load_tasks);
+	active = active > 0 ? active * FIXED_1 : 0;
 
-	return running + uninterruptible;
+	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+	calc_load_update += LOAD_FREQ;
+}
+
+/*
+ * Either called from update_cpu_load() or from a cpu going idle
+ */
+static void calc_load_account_active(struct rq *this_rq)
+{
+	long nr_active, delta;
+
+	nr_active = this_rq->nr_running;
+	nr_active += (long) this_rq->nr_uninterruptible;
+
+	if (nr_active != this_rq->calc_load_active) {
+		delta = nr_active - this_rq->calc_load_active;
+		this_rq->calc_load_active = nr_active;
+		atomic_long_add(delta, &calc_load_tasks);
+	}
 }
 
 /*
@@ -2899,6 +2943,11 @@ static void update_cpu_load(struct rq *this_rq)
 			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
 	}
+
+	if (time_after_eq(jiffies, this_rq->calc_load_update)) {
+		this_rq->calc_load_update += LOAD_FREQ;
+		calc_load_account_active(this_rq);
+	}
 }
 
 #ifdef CONFIG_SMP
@@ -7091,6 +7140,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 
 	}
 }
+
+/*
+ * remove the tasks which were accounted by rq from calc_load_tasks.
+ */
+static void calc_global_load_remove(struct rq *rq)
+{
+	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7325,6 +7382,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		/* Update our root-domain */
 		rq = cpu_rq(cpu);
 		spin_lock_irqsave(&rq->lock, flags);
+		rq->calc_load_update = calc_load_update;
+		rq->calc_load_active = 0;
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 
@@ -7364,7 +7423,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		cpuset_unlock();
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
-
+		calc_global_load_remove(rq);
 		/*
 		 * No need to migrate the tasks: it was best-effort if
 		 * they didn't take sched_hotcpu_mutex. Just wake up
@@ -9059,6 +9118,8 @@ void __init sched_init(void)
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
 		rq->nr_running = 0;
+		rq->calc_load_active = 0;
+		rq->calc_load_update = jiffies + LOAD_FREQ;
 		init_cfs_rq(&rq->cfs, rq);
 		init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -9166,6 +9227,9 @@ void __init sched_init(void)
 	 * when this runqueue becomes "idle".
 	 */
 	init_idle(current, smp_processor_id());
+
+	calc_load_update = jiffies + LOAD_FREQ;
+
 	/*
 	 * During early bootup we pretend to be a normal task:
 	 */
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 8a21a2e28c1..499672c10cb 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
 static struct task_struct *pick_next_task_idle(struct rq *rq)
 {
 	schedstat_inc(rq, sched_goidle);
-
+	/* adjust the active tasks as we might go into a long sleep */
+	calc_load_account_active(rq);
 	return rq->idle;
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 687dff49f6e..52a8bf8931f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,7 +22,7 @@
 
 /*
  * This read-write spinlock protects us from races in SMP while
- * playing with xtime and avenrun.
+ * playing with xtime.
  */
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
 
diff --git a/kernel/timer.c b/kernel/timer.c
index cffffad01c3..6a21d7af962 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1122,47 +1122,6 @@ void update_process_times(int user_tick)
 	run_posix_cpu_timers(p);
 }
 
-/*
- * Nr of active tasks - counted in fixed-point numbers
- */
-static unsigned long count_active_tasks(void)
-{
-	return nr_active() * FIXED_1;
-}
-
-/*
- * Hmm.. Changed this, as the GNU make sources (load.c) seems to
- * imply that avenrun[] is the standard name for this kind of thing.
- * Nothing else seems to be standardized: the fractional size etc
- * all seem to differ on different machines.
- *
- * Requires xtime_lock to access.
- */
-unsigned long avenrun[3];
-
-EXPORT_SYMBOL(avenrun);
-
-/*
- * calc_load - given tick count, update the avenrun load estimates.
- * This is called while holding a write_lock on xtime_lock.
- */
-static inline void calc_load(unsigned long ticks)
-{
-	unsigned long active_tasks; /* fixed-point */
-	static int count = LOAD_FREQ;
-
-	count -= ticks;
-	if (unlikely(count < 0)) {
-		active_tasks = count_active_tasks();
-		do {
-			CALC_LOAD(avenrun[0], EXP_1, active_tasks);
-			CALC_LOAD(avenrun[1], EXP_5, active_tasks);
-			CALC_LOAD(avenrun[2], EXP_15, active_tasks);
-			count += LOAD_FREQ;
-		} while (count < 0);
-	}
-}
-
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
@@ -1186,16 +1145,6 @@ void run_local_timers(void)
 	softlockup_tick();
 }
 
-/*
- * Called by the timer interrupt. xtime_lock must already be taken
- * by the timer IRQ!
- */
-static inline void update_times(unsigned long ticks)
-{
-	update_wall_time();
-	calc_load(ticks);
-}
-
 /*
  * The 64-bit jiffies value is not atomic - you MUST NOT read it
  * without sampling the sequence number in xtime_lock.
@@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks)
 void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
-	update_times(ticks);
+	update_wall_time();
+	calc_global_load();
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
-- 
cgit v1.2.3-70-g09d2


From 2d02494f5a90f2e4b3c4c6acc85ec94674cdc431 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 2 May 2009 20:08:52 +0200
Subject: sched, timers: cleanup avenrun users

avenrun is an rough estimate so we don't have to worry about
consistency of the three avenrun values. Remove the xtime lock
dependency and provide a function to scale the values. Cleanup the
users.

[ Impact: cleanup ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
 fs/proc/loadavg.c     | 18 ++++++------------
 include/linux/sched.h |  1 +
 kernel/sched.c        | 15 +++++++++++++++
 kernel/timer.c        | 32 ++++++--------------------------
 4 files changed, 28 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 9bca39cf99e..1afa4dd4cae 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -12,20 +12,14 @@
 
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
-	int a, b, c;
-	unsigned long seq;
+	unsigned long avnrun[3];
 
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		a = avenrun[0] + (FIXED_1/200);
-		b = avenrun[1] + (FIXED_1/200);
-		c = avenrun[2] + (FIXED_1/200);
-	} while (read_seqretry(&xtime_lock, seq));
+	get_avenrun(avnrun, FIXED_1/200, 0);
 
-	seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
-		LOAD_INT(a), LOAD_FRAC(a),
-		LOAD_INT(b), LOAD_FRAC(b),
-		LOAD_INT(c), LOAD_FRAC(c),
+	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
 		nr_running(), nr_threads,
 		task_active_pid_ns(current)->last_pid);
 	return 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6eb4892efe4..de7b3b21777 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -116,6 +116,7 @@ struct fs_struct;
  *    11 bit fractions.
  */
 extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 
 #define FSHIFT		11		/* nr of bits of precision */
 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
diff --git a/kernel/sched.c b/kernel/sched.c
index f4eb88153bd..497c09ba61e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2868,6 +2868,21 @@ static unsigned long calc_load_update;
 unsigned long avenrun[3];
 EXPORT_SYMBOL(avenrun);
 
+/**
+ * get_avenrun - get the load average array
+ * @loads:	pointer to dest load array
+ * @offset:	offset to add
+ * @shift:	shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+{
+	loads[0] = (avenrun[0] + offset) << shift;
+	loads[1] = (avenrun[1] + offset) << shift;
+	loads[2] = (avenrun[2] + offset) << shift;
+}
+
 static unsigned long
 calc_load(unsigned long load, unsigned long exp, unsigned long active)
 {
diff --git a/kernel/timer.c b/kernel/timer.c
index 6a21d7af962..a26ed294f93 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1356,37 +1356,17 @@ int do_sysinfo(struct sysinfo *info)
 {
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
-	unsigned long seq;
+	struct timespec tp;
 
 	memset(info, 0, sizeof(struct sysinfo));
 
-	do {
-		struct timespec tp;
-		seq = read_seqbegin(&xtime_lock);
-
-		/*
-		 * This is annoying.  The below is the same thing
-		 * posix_get_clock_monotonic() does, but it wants to
-		 * take the lock which we want to cover the loads stuff
-		 * too.
-		 */
-
-		getnstimeofday(&tp);
-		tp.tv_sec += wall_to_monotonic.tv_sec;
-		tp.tv_nsec += wall_to_monotonic.tv_nsec;
-		monotonic_to_bootbased(&tp);
-		if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
-			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
-			tp.tv_sec++;
-		}
-		info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+	ktime_get_ts(&tp);
+	monotonic_to_bootbased(&tp);
+	info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
-		info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-		info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-		info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+	get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
 
-		info->procs = nr_threads;
-	} while (read_seqretry(&xtime_lock, seq));
+	info->procs = nr_threads;
 
 	si_meminfo(info);
 	si_swapinfo(info);
-- 
cgit v1.2.3-70-g09d2


From 9d23a90a67261e73b2fcac04d8ca963c6b496afb Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 14 May 2009 21:48:08 +1000
Subject: perf_counter: allow arch to supply event misc flags and instruction
 pointer

At present the values we put in overflow events for the misc
flags indicating processor mode and the instruction pointer are
obtained using the standard user_mode() and
instruction_pointer() functions. Those functions tell you where
the performance monitor interrupt was taken, which might not be
exactly where the counter overflow occurred, for example
because interrupts were disabled at the point where the
overflow occurred, or because the processor had many
instructions in flight and chose to complete some more
instructions beyond the one that caused the counter overflow.

Some architectures (e.g. powerpc) can supply more precise
information about where the counter overflow occurred and the
processor mode at that point.  This introduces new functions,
perf_misc_flags() and perf_instruction_pointer(), which arch
code can override to provide more precise information if
available.  They have default implementations which are
identical to the existing code.

This also adds a new misc flag value,
PERF_EVENT_MISC_HYPERVISOR, for the case where a counter
overflow occurred in the hypervisor.  We encode the processor
mode in the 2 bits previously used to indicate user or kernel
mode; the values for user and kernel mode are unchanged and
hypervisor mode is indicated by both bits being set.

[ Impact: generalize perfcounter core facilities ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18956.1272.818511.561835@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 11 ++++++++++-
 kernel/perf_counter.c        |  5 ++---
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 004b6e162b9..c8c1dfc22c9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -215,8 +215,11 @@ struct perf_counter_mmap_page {
 	__u32   data_head;		/* head in the data section */
 };
 
+#define PERF_EVENT_MISC_CPUMODE_MASK	(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN	(0 << 0)
 #define PERF_EVENT_MISC_KERNEL		(1 << 0)
-#define PERF_EVENT_MISC_USER		(1 << 1)
+#define PERF_EVENT_MISC_USER		(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR	(3 << 0)
 #define PERF_EVENT_MISC_OVERFLOW	(1 << 2)
 
 struct perf_event_header {
@@ -596,6 +599,12 @@ extern int sysctl_perf_counter_mlock;
 
 extern void perf_counter_init(void);
 
+#ifndef perf_misc_flags
+#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
+				 PERF_EVENT_MISC_KERNEL)
+#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+#endif
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 728a595399b..57840a94b16 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2042,11 +2042,10 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.size = sizeof(header);
 
 	header.misc = PERF_EVENT_MISC_OVERFLOW;
-	header.misc |= user_mode(regs) ?
-		PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
+	header.misc |= perf_misc_flags(regs);
 
 	if (record_type & PERF_RECORD_IP) {
-		ip = instruction_pointer(regs);
+		ip = perf_instruction_pointer(regs);
 		header.type |= PERF_RECORD_IP;
 		header.size += sizeof(ip);
 	}
-- 
cgit v1.2.3-70-g09d2


From a48b2d4a0091904b4cf57d667adc2faf689750d3 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <me@felipebalbi.com>
Date: Fri, 15 May 2009 20:12:47 -0700
Subject: Input: introduce lm8323 keypad driver

lm8323 is the keypad driver used in n810 device.

[akpm@linux-foundation.org: coding-style fixes]
[dtor@mail.ru: various cleanups]
Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Reviewed-by: Trilok Soni <soni.trilok@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig  |  13 +-
 drivers/input/keyboard/Makefile |   1 +
 drivers/input/keyboard/lm8323.c | 878 ++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/lm8323.h      |  46 +++
 4 files changed, 937 insertions(+), 1 deletion(-)
 create mode 100644 drivers/input/keyboard/lm8323.c
 create mode 100644 include/linux/i2c/lm8323.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 54775aaa7be..9d8f796c674 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -250,6 +250,17 @@ config KEYBOARD_HP7XX
 	  To compile this driver as a module, choose M here: the
 	  module will be called jornada720_kbd.
 
+config KEYBOARD_LM8323
+	tristate "LM8323 keypad chip"
+	depends on I2C
+	depends on LEDS_CLASS
+	help
+	  If you say yes here you get support for the National Semiconductor
+	  LM8323 keypad controller.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called lm8323.
+
 config KEYBOARD_OMAP
 	tristate "TI OMAP keypad support"
 	depends on (ARCH_OMAP1 || ARCH_OMAP2)
@@ -332,7 +343,7 @@ config KEYBOARD_SH_KEYSC
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called sh_keysc.
-+
+
 config KEYBOARD_EP93XX
 	tristate "EP93xx Matrix Keypad support"
 	depends on ARCH_EP93XX
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 13ba9c95493..156b647a259 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_KEYBOARD_SPITZ)		+= spitzkbd.o
 obj-$(CONFIG_KEYBOARD_TOSA)		+= tosakbd.o
 obj-$(CONFIG_KEYBOARD_HIL)		+= hil_kbd.o
 obj-$(CONFIG_KEYBOARD_HIL_OLD)		+= hilkbd.o
+obj-$(CONFIG_KEYBOARD_LM8323)		+= lm8323.o
 obj-$(CONFIG_KEYBOARD_OMAP)		+= omap-keypad.o
 obj-$(CONFIG_KEYBOARD_PXA27x)		+= pxa27x_keypad.o
 obj-$(CONFIG_KEYBOARD_PXA930_ROTARY)	+= pxa930_rotary.o
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
new file mode 100644
index 00000000000..574eda2a495
--- /dev/null
+++ b/drivers/input/keyboard/lm8323.c
@@ -0,0 +1,878 @@
+/*
+ * drivers/i2c/chips/lm8323.c
+ *
+ * Copyright (C) 2007-2009 Nokia Corporation
+ *
+ * Written by Daniel Stone <daniel.stone@nokia.com>
+ *            Timo O. Karjalainen <timo.o.karjalainen@nokia.com>
+ *
+ * Updated by Felipe Balbi <felipe.balbi@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License only).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <linux/leds.h>
+#include <linux/i2c/lm8323.h>
+
+/* Commands to send to the chip. */
+#define LM8323_CMD_READ_ID		0x80 /* Read chip ID. */
+#define LM8323_CMD_WRITE_CFG		0x81 /* Set configuration item. */
+#define LM8323_CMD_READ_INT		0x82 /* Get interrupt status. */
+#define LM8323_CMD_RESET		0x83 /* Reset, same as external one */
+#define LM8323_CMD_WRITE_PORT_SEL	0x85 /* Set GPIO in/out. */
+#define LM8323_CMD_WRITE_PORT_STATE	0x86 /* Set GPIO pullup. */
+#define LM8323_CMD_READ_PORT_SEL	0x87 /* Get GPIO in/out. */
+#define LM8323_CMD_READ_PORT_STATE	0x88 /* Get GPIO pullup. */
+#define LM8323_CMD_READ_FIFO		0x89 /* Read byte from FIFO. */
+#define LM8323_CMD_RPT_READ_FIFO	0x8a /* Read FIFO (no increment). */
+#define LM8323_CMD_SET_ACTIVE		0x8b /* Set active time. */
+#define LM8323_CMD_READ_ERR		0x8c /* Get error status. */
+#define LM8323_CMD_READ_ROTATOR		0x8e /* Read rotator status. */
+#define LM8323_CMD_SET_DEBOUNCE		0x8f /* Set debouncing time. */
+#define LM8323_CMD_SET_KEY_SIZE		0x90 /* Set keypad size. */
+#define LM8323_CMD_READ_KEY_SIZE	0x91 /* Get keypad size. */
+#define LM8323_CMD_READ_CFG		0x92 /* Get configuration item. */
+#define LM8323_CMD_WRITE_CLOCK		0x93 /* Set clock config. */
+#define LM8323_CMD_READ_CLOCK		0x94 /* Get clock config. */
+#define LM8323_CMD_PWM_WRITE		0x95 /* Write PWM script. */
+#define LM8323_CMD_START_PWM		0x96 /* Start PWM engine. */
+#define LM8323_CMD_STOP_PWM		0x97 /* Stop PWM engine. */
+
+/* Interrupt status. */
+#define INT_KEYPAD			0x01 /* Key event. */
+#define INT_ROTATOR			0x02 /* Rotator event. */
+#define INT_ERROR			0x08 /* Error: use CMD_READ_ERR. */
+#define INT_NOINIT			0x10 /* Lost configuration. */
+#define INT_PWM1			0x20 /* PWM1 stopped. */
+#define INT_PWM2			0x40 /* PWM2 stopped. */
+#define INT_PWM3			0x80 /* PWM3 stopped. */
+
+/* Errors (signalled by INT_ERROR, read with CMD_READ_ERR). */
+#define ERR_BADPAR			0x01 /* Bad parameter. */
+#define ERR_CMDUNK			0x02 /* Unknown command. */
+#define ERR_KEYOVR			0x04 /* Too many keys pressed. */
+#define ERR_FIFOOVER			0x40 /* FIFO overflow. */
+
+/* Configuration keys (CMD_{WRITE,READ}_CFG). */
+#define CFG_MUX1SEL			0x01 /* Select MUX1_OUT input. */
+#define CFG_MUX1EN			0x02 /* Enable MUX1_OUT. */
+#define CFG_MUX2SEL			0x04 /* Select MUX2_OUT input. */
+#define CFG_MUX2EN			0x08 /* Enable MUX2_OUT. */
+#define CFG_PSIZE			0x20 /* Package size (must be 0). */
+#define CFG_ROTEN			0x40 /* Enable rotator. */
+
+/* Clock settings (CMD_{WRITE,READ}_CLOCK). */
+#define CLK_RCPWM_INTERNAL		0x00
+#define CLK_RCPWM_EXTERNAL		0x03
+#define CLK_SLOWCLKEN			0x08 /* Enable 32.768kHz clock. */
+#define CLK_SLOWCLKOUT			0x40 /* Enable slow pulse output. */
+
+/* The possible addresses corresponding to CONFIG1 and CONFIG2 pin wirings. */
+#define LM8323_I2C_ADDR00		(0x84 >> 1)	/* 1000 010x */
+#define LM8323_I2C_ADDR01		(0x86 >> 1)	/* 1000 011x */
+#define LM8323_I2C_ADDR10		(0x88 >> 1)	/* 1000 100x */
+#define LM8323_I2C_ADDR11		(0x8A >> 1)	/* 1000 101x */
+
+/* Key event fifo length */
+#define LM8323_FIFO_LEN			15
+
+/* Commands for PWM engine; feed in with PWM_WRITE. */
+/* Load ramp counter from duty cycle field (range 0 - 0xff). */
+#define PWM_SET(v)			(0x4000 | ((v) & 0xff))
+/* Go to start of script. */
+#define PWM_GOTOSTART			0x0000
+/*
+ * Stop engine (generates interrupt).  If reset is 1, clear the program
+ * counter, else leave it.
+ */
+#define PWM_END(reset)			(0xc000 | (!!(reset) << 11))
+/*
+ * Ramp.  If s is 1, divide clock by 512, else divide clock by 16.
+ * Take t clock scales (up to 63) per step, for n steps (up to 126).
+ * If u is set, ramp up, else ramp down.
+ */
+#define PWM_RAMP(s, t, n, u)		((!!(s) << 14) | ((t) & 0x3f) << 8 | \
+					 ((n) & 0x7f) | ((u) ? 0 : 0x80))
+/*
+ * Loop (i.e. jump back to pos) for a given number of iterations (up to 63).
+ * If cnt is zero, execute until PWM_END is encountered.
+ */
+#define PWM_LOOP(cnt, pos)		(0xa000 | (((cnt) & 0x3f) << 7) | \
+					 ((pos) & 0x3f))
+/*
+ * Wait for trigger.  Argument is a mask of channels, shifted by the channel
+ * number, e.g. 0xa for channels 3 and 1.  Note that channels are numbered
+ * from 1, not 0.
+ */
+#define PWM_WAIT_TRIG(chans)		(0xe000 | (((chans) & 0x7) << 6))
+/* Send trigger.  Argument is same as PWM_WAIT_TRIG. */
+#define PWM_SEND_TRIG(chans)		(0xe000 | ((chans) & 0x7))
+
+struct lm8323_pwm {
+	int			id;
+	int			fade_time;
+	int			brightness;
+	int			desired_brightness;
+	bool			enabled;
+	bool			running;
+	/* pwm lock */
+	struct mutex		lock;
+	struct work_struct	work;
+	struct led_classdev	cdev;
+	struct lm8323_chip	*chip;
+};
+
+struct lm8323_chip {
+	/* device lock */
+	struct mutex		lock;
+	struct i2c_client	*client;
+	struct work_struct	work;
+	struct input_dev	*idev;
+	bool			kp_enabled;
+	bool			pm_suspend;
+	unsigned		keys_down;
+	char			phys[32];
+	unsigned short		keymap[LM8323_KEYMAP_SIZE];
+	int			size_x;
+	int			size_y;
+	int			debounce_time;
+	int			active_time;
+	struct lm8323_pwm	pwm[LM8323_NUM_PWMS];
+};
+
+#define client_to_lm8323(c)	container_of(c, struct lm8323_chip, client)
+#define dev_to_lm8323(d)	container_of(d, struct lm8323_chip, client->dev)
+#define work_to_lm8323(w)	container_of(w, struct lm8323_chip, work)
+#define cdev_to_pwm(c)		container_of(c, struct lm8323_pwm, cdev)
+#define work_to_pwm(w)		container_of(w, struct lm8323_pwm, work)
+
+#define LM8323_MAX_DATA 8
+
+/*
+ * To write, we just access the chip's address in write mode, and dump the
+ * command and data out on the bus.  The command byte and data are taken as
+ * sequential u8s out of varargs, to a maximum of LM8323_MAX_DATA.
+ */
+static int lm8323_write(struct lm8323_chip *lm, int len, ...)
+{
+	int ret, i;
+	va_list ap;
+	u8 data[LM8323_MAX_DATA];
+
+	va_start(ap, len);
+
+	if (unlikely(len > LM8323_MAX_DATA)) {
+		dev_err(&lm->client->dev, "tried to send %d bytes\n", len);
+		va_end(ap);
+		return 0;
+	}
+
+	for (i = 0; i < len; i++)
+		data[i] = va_arg(ap, int);
+
+	va_end(ap);
+
+	/*
+	 * If the host is asleep while we send the data, we can get a NACK
+	 * back while it wakes up, so try again, once.
+	 */
+	ret = i2c_master_send(lm->client, data, len);
+	if (unlikely(ret == -EREMOTEIO))
+		ret = i2c_master_send(lm->client, data, len);
+	if (unlikely(ret != len))
+		dev_err(&lm->client->dev, "sent %d bytes of %d total\n",
+			len, ret);
+
+	return ret;
+}
+
+/*
+ * To read, we first send the command byte to the chip and end the transaction,
+ * then access the chip in read mode, at which point it will send the data.
+ */
+static int lm8323_read(struct lm8323_chip *lm, u8 cmd, u8 *buf, int len)
+{
+	int ret;
+
+	/*
+	 * If the host is asleep while we send the byte, we can get a NACK
+	 * back while it wakes up, so try again, once.
+	 */
+	ret = i2c_master_send(lm->client, &cmd, 1);
+	if (unlikely(ret == -EREMOTEIO))
+		ret = i2c_master_send(lm->client, &cmd, 1);
+	if (unlikely(ret != 1)) {
+		dev_err(&lm->client->dev, "sending read cmd 0x%02x failed\n",
+			cmd);
+		return 0;
+	}
+
+	ret = i2c_master_recv(lm->client, buf, len);
+	if (unlikely(ret != len))
+		dev_err(&lm->client->dev, "wanted %d bytes, got %d\n",
+			len, ret);
+
+	return ret;
+}
+
+/*
+ * Set the chip active time (idle time before it enters halt).
+ */
+static void lm8323_set_active_time(struct lm8323_chip *lm, int time)
+{
+	lm8323_write(lm, 2, LM8323_CMD_SET_ACTIVE, time >> 2);
+}
+
+/*
+ * The signals are AT-style: the low 7 bits are the keycode, and the top
+ * bit indicates the state (1 for down, 0 for up).
+ */
+static inline u8 lm8323_whichkey(u8 event)
+{
+	return event & 0x7f;
+}
+
+static inline int lm8323_ispress(u8 event)
+{
+	return (event & 0x80) ? 1 : 0;
+}
+
+static void process_keys(struct lm8323_chip *lm)
+{
+	u8 event;
+	u8 key_fifo[LM8323_FIFO_LEN + 1];
+	int old_keys_down = lm->keys_down;
+	int ret;
+	int i = 0;
+
+	/*
+	 * Read all key events from the FIFO at once. Next READ_FIFO clears the
+	 * FIFO even if we didn't read all events previously.
+	 */
+	ret = lm8323_read(lm, LM8323_CMD_READ_FIFO, key_fifo, LM8323_FIFO_LEN);
+
+	if (ret < 0) {
+		dev_err(&lm->client->dev, "Failed reading fifo \n");
+		return;
+	}
+	key_fifo[ret] = 0;
+
+	while ((event = key_fifo[i++])) {
+		u8 key = lm8323_whichkey(event);
+		int isdown = lm8323_ispress(event);
+		unsigned short keycode = lm->keymap[key];
+
+		dev_vdbg(&lm->client->dev, "key 0x%02x %s\n",
+			 key, isdown ? "down" : "up");
+
+		if (lm->kp_enabled) {
+			input_event(lm->idev, EV_MSC, MSC_SCAN, key);
+			input_report_key(lm->idev, keycode, isdown);
+			input_sync(lm->idev);
+		}
+
+		if (isdown)
+			lm->keys_down++;
+		else
+			lm->keys_down--;
+	}
+
+	/*
+	 * Errata: We need to ensure that the chip never enters halt mode
+	 * during a keypress, so set active time to 0.  When it's released,
+	 * we can enter halt again, so set the active time back to normal.
+	 */
+	if (!old_keys_down && lm->keys_down)
+		lm8323_set_active_time(lm, 0);
+	if (old_keys_down && !lm->keys_down)
+		lm8323_set_active_time(lm, lm->active_time);
+}
+
+static void lm8323_process_error(struct lm8323_chip *lm)
+{
+	u8 error;
+
+	if (lm8323_read(lm, LM8323_CMD_READ_ERR, &error, 1) == 1) {
+		if (error & ERR_FIFOOVER)
+			dev_vdbg(&lm->client->dev, "fifo overflow!\n");
+		if (error & ERR_KEYOVR)
+			dev_vdbg(&lm->client->dev,
+					"more than two keys pressed\n");
+		if (error & ERR_CMDUNK)
+			dev_vdbg(&lm->client->dev,
+					"unknown command submitted\n");
+		if (error & ERR_BADPAR)
+			dev_vdbg(&lm->client->dev, "bad command parameter\n");
+	}
+}
+
+static void lm8323_reset(struct lm8323_chip *lm)
+{
+	/* The docs say we must pass 0xAA as the data byte. */
+	lm8323_write(lm, 2, LM8323_CMD_RESET, 0xAA);
+}
+
+static int lm8323_configure(struct lm8323_chip *lm)
+{
+	int keysize = (lm->size_x << 4) | lm->size_y;
+	int clock = (CLK_SLOWCLKEN | CLK_RCPWM_EXTERNAL);
+	int debounce = lm->debounce_time >> 2;
+	int active = lm->active_time >> 2;
+
+	/*
+	 * Active time must be greater than the debounce time: if it's
+	 * a close-run thing, give ourselves a 12ms buffer.
+	 */
+	if (debounce >= active)
+		active = debounce + 3;
+
+	lm8323_write(lm, 2, LM8323_CMD_WRITE_CFG, 0);
+	lm8323_write(lm, 2, LM8323_CMD_WRITE_CLOCK, clock);
+	lm8323_write(lm, 2, LM8323_CMD_SET_KEY_SIZE, keysize);
+	lm8323_set_active_time(lm, lm->active_time);
+	lm8323_write(lm, 2, LM8323_CMD_SET_DEBOUNCE, debounce);
+	lm8323_write(lm, 3, LM8323_CMD_WRITE_PORT_STATE, 0xff, 0xff);
+	lm8323_write(lm, 3, LM8323_CMD_WRITE_PORT_SEL, 0, 0);
+
+	/*
+	 * Not much we can do about errors at this point, so just hope
+	 * for the best.
+	 */
+
+	return 0;
+}
+
+static void pwm_done(struct lm8323_pwm *pwm)
+{
+	mutex_lock(&pwm->lock);
+	pwm->running = false;
+	if (pwm->desired_brightness != pwm->brightness)
+		schedule_work(&pwm->work);
+	mutex_unlock(&pwm->lock);
+}
+
+/*
+ * Bottom half: handle the interrupt by posting key events, or dealing with
+ * errors appropriately.
+ */
+static void lm8323_work(struct work_struct *work)
+{
+	struct lm8323_chip *lm = work_to_lm8323(work);
+	u8 ints;
+	int i;
+
+	mutex_lock(&lm->lock);
+
+	while ((lm8323_read(lm, LM8323_CMD_READ_INT, &ints, 1) == 1) && ints) {
+		if (likely(ints & INT_KEYPAD))
+			process_keys(lm);
+		if (ints & INT_ROTATOR) {
+			/* We don't currently support the rotator. */
+			dev_vdbg(&lm->client->dev, "rotator fired\n");
+		}
+		if (ints & INT_ERROR) {
+			dev_vdbg(&lm->client->dev, "error!\n");
+			lm8323_process_error(lm);
+		}
+		if (ints & INT_NOINIT) {
+			dev_err(&lm->client->dev, "chip lost config; "
+						  "reinitialising\n");
+			lm8323_configure(lm);
+		}
+		for (i = 0; i < LM8323_NUM_PWMS; i++) {
+			if (ints & (1 << (INT_PWM1 + i))) {
+				dev_vdbg(&lm->client->dev,
+					 "pwm%d engine completed\n", i);
+				pwm_done(&lm->pwm[i]);
+			}
+		}
+	}
+
+	mutex_unlock(&lm->lock);
+}
+
+/*
+ * We cannot use I2C in interrupt context, so we just schedule work.
+ */
+static irqreturn_t lm8323_irq(int irq, void *data)
+{
+	struct lm8323_chip *lm = data;
+
+	schedule_work(&lm->work);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Read the chip ID.
+ */
+static int lm8323_read_id(struct lm8323_chip *lm, u8 *buf)
+{
+	int bytes;
+
+	bytes = lm8323_read(lm, LM8323_CMD_READ_ID, buf, 2);
+	if (unlikely(bytes != 2))
+		return -EIO;
+
+	return 0;
+}
+
+static void lm8323_write_pwm_one(struct lm8323_pwm *pwm, int pos, u16 cmd)
+{
+	lm8323_write(pwm->chip, 4, LM8323_CMD_PWM_WRITE, (pos << 2) | pwm->id,
+		     (cmd & 0xff00) >> 8, cmd & 0x00ff);
+}
+
+/*
+ * Write a script into a given PWM engine, concluding with PWM_END.
+ * If 'kill' is nonzero, the engine will be shut down at the end
+ * of the script, producing a zero output. Otherwise the engine
+ * will be kept running at the final PWM level indefinitely.
+ */
+static void lm8323_write_pwm(struct lm8323_pwm *pwm, int kill,
+			     int len, const u16 *cmds)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		lm8323_write_pwm_one(pwm, i, cmds[i]);
+
+	lm8323_write_pwm_one(pwm, i++, PWM_END(kill));
+	lm8323_write(pwm->chip, 2, LM8323_CMD_START_PWM, pwm->id);
+	pwm->running = true;
+}
+
+static void lm8323_pwm_work(struct work_struct *work)
+{
+	struct lm8323_pwm *pwm = work_to_pwm(work);
+	int div512, perstep, steps, hz, up, kill;
+	u16 pwm_cmds[3];
+	int num_cmds = 0;
+
+	mutex_lock(&pwm->lock);
+
+	/*
+	 * Do nothing if we're already at the requested level,
+	 * or previous setting is not yet complete. In the latter
+	 * case we will be called again when the previous PWM script
+	 * finishes.
+	 */
+	if (pwm->running || pwm->desired_brightness == pwm->brightness)
+		goto out;
+
+	kill = (pwm->desired_brightness == 0);
+	up = (pwm->desired_brightness > pwm->brightness);
+	steps = abs(pwm->desired_brightness - pwm->brightness);
+
+	/*
+	 * Convert time (in ms) into a divisor (512 or 16 on a refclk of
+	 * 32768Hz), and number of ticks per step.
+	 */
+	if ((pwm->fade_time / steps) > (32768 / 512)) {
+		div512 = 1;
+		hz = 32768 / 512;
+	} else {
+		div512 = 0;
+		hz = 32768 / 16;
+	}
+
+	perstep = (hz * pwm->fade_time) / (steps * 1000);
+
+	if (perstep == 0)
+		perstep = 1;
+	else if (perstep > 63)
+		perstep = 63;
+
+	while (steps) {
+		int s;
+
+		s = min(126, steps);
+		pwm_cmds[num_cmds++] = PWM_RAMP(div512, perstep, s, up);
+		steps -= s;
+	}
+
+	lm8323_write_pwm(pwm, kill, num_cmds, pwm_cmds);
+	pwm->brightness = pwm->desired_brightness;
+
+ out:
+	mutex_unlock(&pwm->lock);
+}
+
+static void lm8323_pwm_set_brightness(struct led_classdev *led_cdev,
+				      enum led_brightness brightness)
+{
+	struct lm8323_pwm *pwm = cdev_to_pwm(led_cdev);
+	struct lm8323_chip *lm = pwm->chip;
+
+	mutex_lock(&pwm->lock);
+	pwm->desired_brightness = brightness;
+	mutex_unlock(&pwm->lock);
+
+	if (in_interrupt()) {
+		schedule_work(&pwm->work);
+	} else {
+		/*
+		 * Schedule PWM work as usual unless we are going into suspend
+		 */
+		mutex_lock(&lm->lock);
+		if (likely(!lm->pm_suspend))
+			schedule_work(&pwm->work);
+		else
+			lm8323_pwm_work(&pwm->work);
+		mutex_unlock(&lm->lock);
+	}
+}
+
+static ssize_t lm8323_pwm_show_time(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct lm8323_pwm *pwm = cdev_to_pwm(led_cdev);
+
+	return sprintf(buf, "%d\n", pwm->fade_time);
+}
+
+static ssize_t lm8323_pwm_store_time(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct lm8323_pwm *pwm = cdev_to_pwm(led_cdev);
+	int ret;
+	unsigned long time;
+
+	ret = strict_strtoul(buf, 10, &time);
+	/* Numbers only, please. */
+	if (ret)
+		return -EINVAL;
+
+	pwm->fade_time = time;
+
+	return strlen(buf);
+}
+static DEVICE_ATTR(time, 0644, lm8323_pwm_show_time, lm8323_pwm_store_time);
+
+static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
+		    const char *name)
+{
+	struct lm8323_pwm *pwm;
+
+	BUG_ON(id > 3);
+
+	pwm = &lm->pwm[id - 1];
+
+	pwm->id = id;
+	pwm->fade_time = 0;
+	pwm->brightness = 0;
+	pwm->desired_brightness = 0;
+	pwm->running = false;
+	pwm->enabled = false;
+	INIT_WORK(&pwm->work, lm8323_pwm_work);
+	mutex_init(&pwm->lock);
+	pwm->chip = lm;
+
+	if (name) {
+		pwm->cdev.name = name;
+		pwm->cdev.brightness_set = lm8323_pwm_set_brightness;
+		if (led_classdev_register(dev, &pwm->cdev) < 0) {
+			dev_err(dev, "couldn't register PWM %d\n", id);
+			return -1;
+		}
+		if (device_create_file(pwm->cdev.dev,
+					&dev_attr_time) < 0) {
+			dev_err(dev, "couldn't register time attribute\n");
+			led_classdev_unregister(&pwm->cdev);
+			return -1;
+		}
+		pwm->enabled = true;
+	}
+
+	return 0;
+}
+
+static struct i2c_driver lm8323_i2c_driver;
+
+static ssize_t lm8323_show_disable(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct lm8323_chip *lm = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", !lm->kp_enabled);
+}
+
+static ssize_t lm8323_set_disable(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct lm8323_chip *lm = dev_get_drvdata(dev);
+	int ret;
+	unsigned long i;
+
+	ret = strict_strtoul(buf, 10, &i);
+
+	mutex_lock(&lm->lock);
+	lm->kp_enabled = !i;
+	mutex_unlock(&lm->lock);
+
+	return count;
+}
+static DEVICE_ATTR(disable_kp, 0644, lm8323_show_disable, lm8323_set_disable);
+
+static int __devinit lm8323_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct lm8323_platform_data *pdata = client->dev.platform_data;
+	struct input_dev *idev;
+	struct lm8323_chip *lm;
+	int i, err;
+	unsigned long tmo;
+	u8 data[2];
+
+	if (!pdata || !pdata->size_x || !pdata->size_y) {
+		dev_err(&client->dev, "missing platform_data\n");
+		return -EINVAL;
+	}
+
+	if (pdata->size_x > 8) {
+		dev_err(&client->dev, "invalid x size %d specified\n",
+			pdata->size_x);
+		return -EINVAL;
+	}
+
+	if (pdata->size_y > 12) {
+		dev_err(&client->dev, "invalid y size %d specified\n",
+			pdata->size_y);
+		return -EINVAL;
+	}
+
+	lm = kzalloc(sizeof *lm, GFP_KERNEL);
+	idev = input_allocate_device();
+	if (!lm || !idev) {
+		err = -ENOMEM;
+		goto fail1;
+	}
+
+	i2c_set_clientdata(client, lm);
+
+	lm->client = client;
+	lm->idev = idev;
+	mutex_init(&lm->lock);
+	INIT_WORK(&lm->work, lm8323_work);
+
+	lm->size_x = pdata->size_x;
+	lm->size_y = pdata->size_y;
+	dev_vdbg(&client->dev, "Keypad size: %d x %d\n",
+		 lm->size_x, lm->size_y);
+
+	lm->debounce_time = pdata->debounce_time;
+	lm->active_time = pdata->active_time;
+
+	lm8323_reset(lm);
+
+	/* Nothing's set up to service the IRQ yet, so just spin for max.
+	 * 100ms until we can configure. */
+	tmo = jiffies + msecs_to_jiffies(100);
+	while (lm8323_read(lm, LM8323_CMD_READ_INT, data, 1) == 1) {
+		if (data[0] & INT_NOINIT)
+			break;
+
+		if (time_after(jiffies, tmo)) {
+			dev_err(&client->dev,
+				"timeout waiting for initialisation\n");
+			break;
+		}
+
+		msleep(1);
+	}
+
+	lm8323_configure(lm);
+
+	/* If a true probe check the device */
+	if (lm8323_read_id(lm, data) != 0) {
+		dev_err(&client->dev, "device not found\n");
+		err = -ENODEV;
+		goto fail1;
+	}
+
+	for (i = 0; i < LM8323_NUM_PWMS; i++) {
+		err = init_pwm(lm, i + 1, &client->dev, pdata->pwm_names[i]);
+		if (err < 0)
+			goto fail2;
+	}
+
+	lm->kp_enabled = true;
+	err = device_create_file(&client->dev, &dev_attr_disable_kp);
+	if (err < 0)
+		goto fail2;
+
+	idev->name = pdata->name ? : "LM8323 keypad";
+	snprintf(lm->phys, sizeof(lm->phys),
+		 "%s/input-kp", dev_name(&client->dev));
+	idev->phys = lm->phys;
+
+	idev->evbit[0] = BIT(EV_KEY) | BIT(EV_MSC);
+	__set_bit(MSC_SCAN, idev->mscbit);
+	for (i = 0; i < LM8323_KEYMAP_SIZE; i++) {
+		__set_bit(pdata->keymap[i], idev->keybit);
+		lm->keymap[i] = pdata->keymap[i];
+	}
+	__clear_bit(KEY_RESERVED, idev->keybit);
+
+	if (pdata->repeat)
+		__set_bit(EV_REP, idev->evbit);
+
+	err = input_register_device(idev);
+	if (err) {
+		dev_dbg(&client->dev, "error registering input device\n");
+		goto fail3;
+	}
+
+	err = request_irq(client->irq, lm8323_irq,
+			  IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+			  "lm8323", lm);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
+		goto fail4;
+	}
+
+	device_init_wakeup(&client->dev, 1);
+	enable_irq_wake(client->irq);
+
+	return 0;
+
+fail4:
+	input_unregister_device(idev);
+	idev = NULL;
+fail3:
+	device_remove_file(&client->dev, &dev_attr_disable_kp);
+fail2:
+	while (--i >= 0)
+		if (lm->pwm[i].enabled)
+			led_classdev_unregister(&lm->pwm[i].cdev);
+fail1:
+	input_free_device(idev);
+	kfree(lm);
+	return err;
+}
+
+static int __devexit lm8323_remove(struct i2c_client *client)
+{
+	struct lm8323_chip *lm = i2c_get_clientdata(client);
+	int i;
+
+	disable_irq_wake(client->irq);
+	free_irq(client->irq, lm);
+	cancel_work_sync(&lm->work);
+
+	input_unregister_device(lm->idev);
+
+	device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
+
+	for (i = 0; i < 3; i++)
+		if (lm->pwm[i].enabled)
+			led_classdev_unregister(&lm->pwm[i].cdev);
+
+	kfree(lm);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+ * We don't need to explicitly suspend the chip, as it already switches off
+ * when there's no activity.
+ */
+static int lm8323_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	struct lm8323_chip *lm = i2c_get_clientdata(client);
+	int i;
+
+	set_irq_wake(client->irq, 0);
+	disable_irq(client->irq);
+
+	mutex_lock(&lm->lock);
+	lm->pm_suspend = true;
+	mutex_unlock(&lm->lock);
+
+	for (i = 0; i < 3; i++)
+		if (lm->pwm[i].enabled)
+			led_classdev_suspend(&lm->pwm[i].cdev);
+
+	return 0;
+}
+
+static int lm8323_resume(struct i2c_client *client)
+{
+	struct lm8323_chip *lm = i2c_get_clientdata(client);
+	int i;
+
+	mutex_lock(&lm->lock);
+	lm->pm_suspend = false;
+	mutex_unlock(&lm->lock);
+
+	for (i = 0; i < 3; i++)
+		if (lm->pwm[i].enabled)
+			led_classdev_resume(&lm->pwm[i].cdev);
+
+	enable_irq(client->irq);
+	set_irq_wake(client->irq, 1);
+
+	return 0;
+}
+#else
+#define lm8323_suspend	NULL
+#define lm8323_resume	NULL
+#endif
+
+static const struct i2c_device_id lm8323_id[] = {
+	{ "lm8323", 0 },
+	{ }
+};
+
+static struct i2c_driver lm8323_i2c_driver = {
+	.driver = {
+		.name	= "lm8323",
+	},
+	.probe		= lm8323_probe,
+	.remove		= __devexit_p(lm8323_remove),
+	.suspend	= lm8323_suspend,
+	.resume		= lm8323_resume,
+	.id_table	= lm8323_id,
+};
+MODULE_DEVICE_TABLE(i2c, lm8323_id);
+
+static int __init lm8323_init(void)
+{
+	return i2c_add_driver(&lm8323_i2c_driver);
+}
+module_init(lm8323_init);
+
+static void __exit lm8323_exit(void)
+{
+	i2c_del_driver(&lm8323_i2c_driver);
+}
+module_exit(lm8323_exit);
+
+MODULE_AUTHOR("Timo O. Karjalainen <timo.o.karjalainen@nokia.com>");
+MODULE_AUTHOR("Daniel Stone");
+MODULE_AUTHOR("Felipe Balbi <felipe.balbi@nokia.com>");
+MODULE_DESCRIPTION("LM8323 keypad driver");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/i2c/lm8323.h b/include/linux/i2c/lm8323.h
new file mode 100644
index 00000000000..478d668bc59
--- /dev/null
+++ b/include/linux/i2c/lm8323.h
@@ -0,0 +1,46 @@
+/*
+ * lm8323.h - Configuration for LM8323 keypad driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License only).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __LINUX_LM8323_H
+#define __LINUX_LM8323_H
+
+#include <linux/types.h>
+
+/*
+ * Largest keycode that the chip can send, plus one,
+ * so keys can be mapped directly at the index of the
+ * LM8323 keycode instead of subtracting one.
+ */
+#define LM8323_KEYMAP_SIZE	(0x7f + 1)
+
+#define LM8323_NUM_PWMS		3
+
+struct lm8323_platform_data {
+	int debounce_time; /* Time to watch for key bouncing, in ms. */
+	int active_time; /* Idle time until sleep, in ms. */
+
+	int size_x;
+	int size_y;
+	bool repeat;
+	const unsigned short *keymap;
+
+	const char *pwm_names[LM8323_NUM_PWMS];
+
+	const char *name; /* Device name. */
+};
+
+#endif /* __LINUX_LM8323_H */
-- 
cgit v1.2.3-70-g09d2


From ca1b96e00ab5d1b0838965834469a0284c81a517 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:21 +0200
Subject: ide: replace special_t typedef by IDE_SFLAG_* flags

Replace:
- special_t typedef by IDE_SFLAG_* flags
- 'special_t special' ide_drive_t's field by 'u8 special_flags' one

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c     |  4 ++--
 drivers/ide/ide-eh.c       |  9 ++++-----
 drivers/ide/ide-io.c       | 21 +++++++++++----------
 drivers/ide/ide-probe.c    |  6 +++---
 drivers/ide/ide-taskfile.c |  2 +-
 drivers/ide/siimage.c      |  4 ++--
 include/linux/ide.h        | 21 ++++++---------------
 7 files changed, 29 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index c2438804d3c..d345f5f23f0 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -428,14 +428,14 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
 		return -EINVAL;
 
-	if (drive->special.b.set_multmode)
+	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
-	drive->special.b.set_multmode = 1;
+	drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 	error = blk_execute_rq(drive->queue, NULL, rq, 0);
 	blk_put_request(rq);
 
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 5d5fb961b5c..39d589254d4 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -52,7 +52,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
 	}
 
 	if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
-		drive->special.b.recalibrate = 1;
+		drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 
 	++rq->errors;
 
@@ -268,9 +268,8 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
 {
 	int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
 
-	drive->special.all = 0;
-	drive->special.b.set_geometry = legacy;
-	drive->special.b.recalibrate  = legacy;
+	drive->special_flags =
+		legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0;
 
 	drive->mult_count = 0;
 	drive->dev_flags &= ~IDE_DFLAG_PARKED;
@@ -280,7 +279,7 @@ static void ide_disk_pre_reset(ide_drive_t *drive)
 		drive->mult_req = 0;
 
 	if (drive->mult_req != drive->mult_count)
-		drive->special.b.set_multmode = 1;
+		drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 }
 
 static void pre_reset(ide_drive_t *drive)
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 18557683ed5..644d7b4454a 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -194,14 +194,14 @@ static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
 
 static ide_startstop_t do_special(ide_drive_t *drive)
 {
-	special_t *s = &drive->special;
 	struct ide_cmd cmd;
 
 #ifdef DEBUG
-	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__, s->all);
+	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__,
+		drive->special_flags);
 #endif
 	if (drive->media != ide_disk) {
-		s->all = 0;
+		drive->special_flags = 0;
 		drive->mult_req = 0;
 		return ide_stopped;
 	}
@@ -209,14 +209,14 @@ static ide_startstop_t do_special(ide_drive_t *drive)
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.protocol = ATA_PROT_NODATA;
 
-	if (s->b.set_geometry) {
-		s->b.set_geometry = 0;
+	if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) {
+		drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY;
 		ide_tf_set_specify_cmd(drive, &cmd.tf);
-	} else if (s->b.recalibrate) {
-		s->b.recalibrate = 0;
+	} else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) {
+		drive->special_flags &= ~IDE_SFLAG_RECALIBRATE;
 		ide_tf_set_restore_cmd(drive, &cmd.tf);
-	} else if (s->b.set_multmode) {
-		s->b.set_multmode = 0;
+	} else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) {
+		drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE;
 		ide_tf_set_setmult_cmd(drive, &cmd.tf);
 	} else
 		BUG();
@@ -339,7 +339,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
 		return startstop;
 	}
-	if (!drive->special.all) {
+
+	if (drive->special_flags == 0) {
 		struct ide_driver *drv;
 
 		/*
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index b609a581df4..727a67109ff 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -97,7 +97,7 @@ static void ide_disk_init_mult_count(ide_drive_t *drive)
 		drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
 
 		if (drive->mult_req)
-			drive->special.b.set_multmode = 1;
+			drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
 	}
 }
 
@@ -1138,8 +1138,8 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 		drive->hwif			= hwif;
 		drive->ready_stat		= ATA_DRDY;
 		drive->bad_wstat		= BAD_W_STAT;
-		drive->special.b.recalibrate	= 1;
-		drive->special.b.set_geometry	= 1;
+		drive->special_flags		= IDE_SFLAG_RECALIBRATE |
+						  IDE_SFLAG_SET_GEOMETRY;
 		drive->name[0]			= 'h';
 		drive->name[1]			= 'd';
 		drive->name[2]			= 'a' + j;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index f400eb4d4af..8cab3c26acd 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -166,7 +166,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
 	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
 		if (custom && tf->command == ATA_CMD_SET_MULTI) {
 			drive->mult_req = drive->mult_count = 0;
-			drive->special.b.recalibrate = 1;
+			drive->special_flags |= IDE_SFLAG_RECALIBRATE;
 			(void)ide_dump_status(drive, __func__, stat);
 			return ide_stopped;
 		} else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) {
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index e4973cd1fba..bd82d228608 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -451,8 +451,8 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
 static void sil_sata_pre_reset(ide_drive_t *drive)
 {
 	if (drive->media == ide_disk) {
-		drive->special.b.set_geometry = 0;
-		drive->special.b.recalibrate = 0;
+		drive->special_flags &=
+			~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE);
 	}
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 34c128f0a33..fc61328a4cd 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -218,21 +218,12 @@ static inline void ide_std_init_ports(hw_regs_t *hw,
 
 /*
  * Special Driver Flags
- *
- * set_geometry	: respecify drive geometry
- * recalibrate	: seek to cyl 0
- * set_multmode	: set multmode count
- * reserved	: unused
  */
-typedef union {
-	unsigned all			: 8;
-	struct {
-		unsigned set_geometry	: 1;
-		unsigned recalibrate	: 1;
-		unsigned set_multmode	: 1;
-		unsigned reserved	: 5;
-	} b;
-} special_t;
+enum {
+	IDE_SFLAG_SET_GEOMETRY		= (1 << 0),
+	IDE_SFLAG_RECALIBRATE		= (1 << 1),
+	IDE_SFLAG_SET_MULTMODE		= (1 << 2),
+};
 
 /*
  * Status returned from various ide_ functions
@@ -530,7 +521,7 @@ struct ide_drive_s {
 	unsigned long sleep;		/* sleep until this time */
 	unsigned long timeout;		/* max time to wait for irq */
 
-	special_t	special;	/* special action flags */
+	u8	special_flags;		/* special action flags */
 
 	u8	select;			/* basic drive/head select reg value */
 	u8	retry_pio;		/* retrying dma capable host in pio */
-- 
cgit v1.2.3-70-g09d2


From 29e52cf793ded6bece50de50e738596f94f07d9f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:22 +0200
Subject: ide: remove chipset field from hw_regs_t

* Convert host drivers that still use hw_regs_t's chipset field to use
  the one in struct ide_port_info instead.

* Move special handling of ide_pci chipset type from ide_hw_configure()
  to ide_init_port().

* Remove chipset field from hw_regs_t.

While at it:
- remove stale comment in delkin_cb.c

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/at91_ide.c     | 2 +-
 drivers/ide/au1xxx-ide.c   | 2 +-
 drivers/ide/buddha.c       | 3 +--
 drivers/ide/cmd640.c       | 2 --
 drivers/ide/delkin_cb.c    | 2 +-
 drivers/ide/falconide.c    | 3 +--
 drivers/ide/gayle.c        | 3 +--
 drivers/ide/icside.c       | 3 ++-
 drivers/ide/ide-4drives.c  | 2 +-
 drivers/ide/ide-cs.c       | 2 +-
 drivers/ide/ide-generic.c  | 3 +--
 drivers/ide/ide-h8300.c    | 2 +-
 drivers/ide/ide-legacy.c   | 1 -
 drivers/ide/ide-pnp.c      | 2 +-
 drivers/ide/ide-probe.c    | 4 +---
 drivers/ide/ide_platform.c | 3 +--
 drivers/ide/macide.c       | 3 +--
 drivers/ide/palm_bk3710.c  | 2 +-
 drivers/ide/q40ide.c       | 3 +--
 drivers/ide/rapide.c       | 2 +-
 drivers/ide/scc_pata.c     | 2 +-
 drivers/ide/setup-pci.c    | 1 -
 drivers/ide/sgiioc4.c      | 1 -
 include/linux/ide.h        | 1 -
 24 files changed, 20 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 403d0e4265d..8d39cc9bdf9 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -216,6 +216,7 @@ static const struct ide_port_info at91_ide_port_info __initdata = {
 	.host_flags 	= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA | IDE_HFLAG_SINGLE |
 			  IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_UNMASK_IRQS,
 	.pio_mask 	= ATA_PIO6,
+	.chipset	= ide_generic,
 };
 
 /*
@@ -304,7 +305,6 @@ static int __init at91_ide_probe(struct platform_device *pdev)
 		ide_std_init_ports(&hw, tf_base, ctl_base + 6);
 
 	hw.irq = board->irq_pin;
-	hw.chipset = ide_generic;
 	hw.dev = &pdev->dev;
 
 	host = ide_host_alloc(&at91_ide_port_info, hws);
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 46013644c96..9b31f830e2f 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -499,6 +499,7 @@ static const struct ide_port_info au1xxx_port_info = {
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	.mwdma_mask		= ATA_MWDMA2,
 #endif
+	.chipset		= ide_au1xxx,
 };
 
 static int au_ide_probe(struct platform_device *dev)
@@ -548,7 +549,6 @@ static int au_ide_probe(struct platform_device *dev)
 	auide_setup_ports(&hw, ahwif);
 	hw.irq = ahwif->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_au1xxx;
 
 	ret = ide_host_add(&au1xxx_port_info, hws, &host);
 	if (ret)
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index d028f8864bc..9aa2cd9be31 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -139,13 +139,12 @@ static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info buddha_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
     /*
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 8890276fef7..e862a2503ab 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -762,11 +762,9 @@ static int __init cmd640x_init(void)
 
 	ide_std_init_ports(&hw[0], 0x1f0, 0x3f6);
 	hw[0].irq = 14;
-	hw[0].chipset = ide_cmd640;
 
 	ide_std_init_ports(&hw[1], 0x170, 0x376);
 	hw[1].irq = 15;
-	hw[1].chipset = ide_cmd640;
 
 	printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x"
 			 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr);
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index f153b95619b..a0de834a81c 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -68,6 +68,7 @@ static const struct ide_port_info delkin_cb_port_info = {
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
 	.init_chipset		= delkin_cb_init_chipset,
+	.chipset		= ide_pci,
 };
 
 static int __devinit
@@ -97,7 +98,6 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_pci;		/* this enables IRQ sharing */
 
 	rc = ide_host_add(&delkin_cb_port_info, hws, &host);
 	if (rc)
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 0e2df6755ec..770cfa67bdc 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -111,6 +111,7 @@ static const struct ide_port_info falconide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 static void __init falconide_setup_ports(hw_regs_t *hw)
@@ -128,8 +129,6 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
 
 	hw->irq = IRQ_MFP_IDE;
 	hw->ack_intr = NULL;
-
-	hw->chipset = ide_generic;
 }
 
     /*
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index c7119516c5a..71db2f9c336 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -106,14 +106,13 @@ static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = IRQ_AMIGA_PORTS;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info gayle_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
     /*
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 36da913cc55..6352a44ed17 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -398,11 +398,11 @@ static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
 
 	hw->irq = ec->irq;
 	hw->dev = &ec->dev;
-	hw->chipset = ide_acorn;
 }
 
 static const struct ide_port_info icside_v5_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_acorn,
 };
 
 static int __devinit
@@ -457,6 +457,7 @@ static const struct ide_port_info icside_v6_port_info __initdata = {
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
 	.mwdma_mask		= ATA_MWDMA2,
 	.swdma_mask		= ATA_SWDMA2,
+	.chipset		= ide_acorn,
 };
 
 static int __devinit
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 78aca75a2c4..617ca7a5ec8 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -25,6 +25,7 @@ static const struct ide_port_info ide_4drives_port_info = {
 	.port_ops		= &ide_4drives_port_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA |
 				  IDE_HFLAG_4DRIVES,
+	.chipset		= ide_4drives,
 };
 
 static int __init ide_4drives_init(void)
@@ -52,7 +53,6 @@ static int __init ide_4drives_init(void)
 
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = 14;
-	hw.chipset = ide_4drives;
 
 	return ide_host_add(&ide_4drives_port_info, hws, NULL);
 }
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 9e47f3529d5..43d09dcae28 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -155,6 +155,7 @@ static const struct ide_port_info idecs_port_info = {
 	.port_ops		= &idecs_port_ops,
 	.host_flags		= IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_pci,
 };
 
 static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
@@ -181,7 +182,6 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     memset(&hw, 0, sizeof(hw));
     ide_std_init_ports(&hw, io, ctl);
     hw.irq = irq;
-    hw.chipset = ide_pci;
     hw.dev = &handle->dev;
 
     rc = ide_host_add(&idecs_port_info, hws, &host);
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 7812ca0be13..0427759d018 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -29,6 +29,7 @@ MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports");
 
 static const struct ide_port_info ide_generic_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 #ifdef CONFIG_ARM
@@ -132,8 +133,6 @@ static int __init ide_generic_init(void)
 #else
 			hw.irq = legacy_irqs[i];
 #endif
-			hw.chipset = ide_generic;
-
 			rc = ide_host_add(&ide_generic_port_info, hws, NULL);
 			if (rc) {
 				release_region(io_addr + 0x206, 1);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index c06ebdc4a13..40eff6c9759 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -73,12 +73,12 @@ static inline void hw_setup(hw_regs_t *hw)
 		hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
 	hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info h8300_port_info = {
 	.tp_ops			= &h8300_tp_ops,
 	.host_flags		= IDE_HFLAG_NO_IO_32BIT | IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int __init h8300_ide_init(void)
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 8c5dcbf2254..0c5b29c56cb 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -33,7 +33,6 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 
 	ide_std_init_ports(hw, base, ctl);
 	hw->irq = irq;
-	hw->chipset = d->chipset;
 	hw->config = config;
 
 	hws[port_no] = hw;
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 6e80b774e88..47043fda239 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -29,6 +29,7 @@ static struct pnp_device_id idepnp_devices[] = {
 
 static const struct ide_port_info ide_pnp_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
@@ -62,7 +63,6 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = pnp_irq(dev, 0);
-	hw.chipset = ide_generic;
 
 	rc = ide_host_add(&ide_pnp_port_info, hws, &host);
 	if (rc)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 727a67109ff..f17ba1932ad 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1048,8 +1048,7 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 {
 	hwif->channel = port;
 
-	if (d->chipset)
-		hwif->chipset = d->chipset;
+	hwif->chipset = d->chipset ? d->chipset : ide_pci;
 
 	if (d->init_iops)
 		d->init_iops(hwif);
@@ -1178,7 +1177,6 @@ static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
 {
 	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
-	hwif->chipset = hw->chipset;
 	hwif->dev = hw->dev;
 	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
 	hwif->ack_intr = hw->ack_intr;
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 051b4ab0f35..813653362a2 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -40,12 +40,11 @@ static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
 	hw->io_ports.ctl_addr = (unsigned long)ctrl;
 
 	hw->irq = irq;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info platform_ide_port_info = {
 	.host_flags		= IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static int __devinit plat_ide_probe(struct platform_device *pdev)
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 4b1718e8328..3af9e96da61 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -76,13 +76,12 @@ static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static const struct ide_port_info macide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 static const char *mac_ide_name[] =
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 09d813d313f..a455c25f43c 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -306,6 +306,7 @@ static struct ide_port_info __devinitdata palm_bk3710_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO,
 	.pio_mask		= ATA_PIO4,
 	.mwdma_mask		= ATA_MWDMA2,
+	.chipset		= ide_palm3710,
 };
 
 static int __init palm_bk3710_probe(struct platform_device *pdev)
@@ -363,7 +364,6 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 			(base + IDE_PALM_ATA_PRI_CTL_OFFSET);
 	hw.irq = irq->start;
 	hw.dev = &pdev->dev;
-	hw.chipset = ide_palm3710;
 
 	palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
 							     ATA_UDMA5;
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index c7934667924..7488d4ff3d7 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -70,8 +70,6 @@ static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
 
 	hw->irq = irq;
 	hw->ack_intr = ack_intr;
-
-	hw->chipset = ide_generic;
 }
 
 static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
@@ -119,6 +117,7 @@ static const struct ide_port_info q40ide_port_info = {
 	.tp_ops			= &q40ide_tp_ops,
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
+	.chipset		= ide_generic,
 };
 
 /* 
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index d5003ca6980..bd4d7a8a666 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -13,6 +13,7 @@
 
 static const struct ide_port_info rapide_port_info = {
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
+	.chipset		= ide_generic,
 };
 
 static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
@@ -49,7 +50,6 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 
 	memset(&hw, 0, sizeof(hw));
 	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
-	hw.chipset = ide_generic;
 	hw.dev = &ec->dev;
 
 	ret = ide_host_add(&rapide_port_info, hws, &host);
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 5be41f25204..9e3aef31733 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -567,7 +567,6 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 		hw.io_ports_array[i] = ports->dma + 0x20 + i * 4;
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
-	hw.chipset = ide_pci;
 
 	rc = ide_host_add(d, hws, &host);
 	if (rc)
@@ -823,6 +822,7 @@ static const struct ide_port_info scc_chipset __devinitdata = {
 	.host_flags	= IDE_HFLAG_SINGLE,
 	.irq_flags	= IRQF_SHARED,
 	.pio_mask	= ATA_PIO4,
+	.chipset	= ide_pci,
 };
 
 /**
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 7a3a12d6e63..82519ddc910 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -344,7 +344,6 @@ static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
 
 	memset(hw, 0, sizeof(*hw));
 	hw->dev = &dev->dev;
-	hw->chipset = d->chipset ? d->chipset : ide_pci;
 	ide_std_init_ports(hw, base, ctl | 2);
 
 	return 0;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index e5d2a48a84d..676d41c7add 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -575,7 +575,6 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	memset(&hw, 0, sizeof(hw));
 	sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
 	hw.irq = dev->irq;
-	hw.chipset = ide_pci;
 	hw.dev = &dev->dev;
 
 	/* Initializing chipset IRQ Registers */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fc61328a4cd..9652edbd26a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -186,7 +186,6 @@ typedef struct hw_regs_s {
 
 	int		irq;			/* our irq number */
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
-	hwif_chipset_t  chipset;
 	struct device	*dev, *parent;
 	unsigned long	config;
 } hw_regs_t;
-- 
cgit v1.2.3-70-g09d2


From dca3983059a4481e4ae97bbf0ac4b4c21429e1a5 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:24 +0200
Subject: ide: pass number of ports to ide_host_{alloc,add}() (v2)

Pass number of ports to ide_host_{alloc,add}() and then update
all users accordingly.

v2:
- drop no longer needed NULL initializers in buddha.c, cmd640.c and gayle.c
  (noticed by Sergei)

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/at91_ide.c     | 5 ++---
 drivers/ide/au1xxx-ide.c   | 4 ++--
 drivers/ide/buddha.c       | 4 ++--
 drivers/ide/cmd640.c       | 5 +++--
 drivers/ide/cs5520.c       | 4 ++--
 drivers/ide/delkin_cb.c    | 4 ++--
 drivers/ide/falconide.c    | 4 ++--
 drivers/ide/gayle.c        | 4 ++--
 drivers/ide/icside.c       | 8 ++++----
 drivers/ide/ide-4drives.c  | 4 ++--
 drivers/ide/ide-cs.c       | 4 ++--
 drivers/ide/ide-generic.c  | 4 ++--
 drivers/ide/ide-h8300.c    | 4 ++--
 drivers/ide/ide-legacy.c   | 4 ++--
 drivers/ide/ide-pnp.c      | 4 ++--
 drivers/ide/ide-probe.c    | 9 +++++----
 drivers/ide/ide_platform.c | 4 ++--
 drivers/ide/macide.c       | 4 ++--
 drivers/ide/palm_bk3710.c  | 4 ++--
 drivers/ide/pmac.c         | 4 ++--
 drivers/ide/q40ide.c       | 4 ++--
 drivers/ide/rapide.c       | 4 ++--
 drivers/ide/scc_pata.c     | 4 ++--
 drivers/ide/setup-pci.c    | 6 +++---
 drivers/ide/sgiioc4.c      | 4 ++--
 drivers/ide/tx4938ide.c    | 5 ++---
 drivers/ide/tx4939ide.c    | 5 ++---
 include/linux/ide.h        | 5 +++--
 28 files changed, 64 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 8d39cc9bdf9..11fe1ffdff7 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -247,8 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id)
 static int __init at91_ide_probe(struct platform_device *pdev)
 {
 	int ret;
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	unsigned long tf_base = 0, ctl_base = 0;
@@ -307,7 +306,7 @@ static int __init at91_ide_probe(struct platform_device *pdev)
 	hw.irq = board->irq_pin;
 	hw.dev = &pdev->dev;
 
-	host = ide_host_alloc(&at91_ide_port_info, hws);
+	host = ide_host_alloc(&at91_ide_port_info, hws, 1);
 	if (!host) {
 		perr("failed to allocate ide host\n");
 		return -ENOMEM;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 9b31f830e2f..32f5be68601 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -508,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev)
 	struct resource *res;
 	struct ide_host *host;
 	int ret = 0;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
@@ -550,7 +550,7 @@ static int au_ide_probe(struct platform_device *dev)
 	hw.irq = ahwif->irq;
 	hw.dev = &dev->dev;
 
-	ret = ide_host_add(&au1xxx_port_info, hws, &host);
+	ret = ide_host_add(&au1xxx_port_info, hws, 1, &host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index 9aa2cd9be31..0450652cdab 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -160,7 +160,7 @@ static int __init buddha_init(void)
 
 	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
 		unsigned long board;
-		hw_regs_t hw[MAX_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+		hw_regs_t hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
 
 		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
 			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
@@ -224,7 +224,7 @@ fail_base2:
 			hws[i] = &hw[i];
 		}
 
-		ide_host_add(&buddha_port_info, hws, NULL);
+		ide_host_add(&buddha_port_info, hws, i, NULL);
 	}
 
 	return 0;
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index e862a2503ab..edb3a7a35c8 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@ static int __init cmd640x_init(void)
 	int second_port_cmd640 = 0, rc;
 	const char *bus_type, *port2;
 	u8 b, cfr;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[2];
 
 	if (cmd640_vlb && probe_for_cmd640_vlb()) {
 		bus_type = "VLB";
@@ -822,7 +822,8 @@ static int __init cmd640x_init(void)
 	cmd640_dump_regs();
 #endif
 
-	return ide_host_add(&cmd640_port_info, hws, NULL);
+	return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1,
+			    NULL);
 }
 
 module_param_named(probe_vlb, cmd640_vlb, bool, 0);
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 87987a7d36c..a9023d7843f 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = {
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct ide_port_info *d = &cyrix_chipset;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
 
@@ -136,7 +136,7 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
 	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
 	hw[0].irq = 14;
 
-	return ide_host_add(d, hws, NULL);
+	return ide_host_add(d, hws, 2, NULL);
 }
 
 static const struct pci_device_id cs5520_pci_tbl[] = {
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index a0de834a81c..d4a76f22ed1 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -77,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	struct ide_host *host;
 	unsigned long base;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	rc = pci_enable_device(dev);
 	if (rc) {
@@ -99,7 +99,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
 
-	rc = ide_host_add(&delkin_cb_port_info, hws, &host);
+	rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host);
 	if (rc)
 		goto out_disable;
 
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 770cfa67bdc..adb5b0cf762 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -138,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
 static int __init falconide_init(void)
 {
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int rc;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
@@ -153,7 +153,7 @@ static int __init falconide_init(void)
 
 	falconide_setup_ports(&hw);
 
-	host = ide_host_alloc(&falconide_port_info, hws);
+	host = ide_host_alloc(&falconide_port_info, hws, 1);
 	if (host == NULL) {
 		rc = -ENOMEM;
 		goto err;
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index 71db2f9c336..253ff34afd8 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -125,7 +125,7 @@ static int __init gayle_init(void)
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
     int a4000, i, rc;
-    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
 
     if (!MACH_IS_AMIGA)
 	return -ENODEV;
@@ -170,7 +170,7 @@ found:
 	hws[i] = &hw[i];
     }
 
-    rc = ide_host_add(&gayle_port_info, hws, NULL);
+    rc = ide_host_add(&gayle_port_info, hws, i, NULL);
     if (rc)
 	release_mem_region(res_start, res_n);
 
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 6352a44ed17..6223b80beb3 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -410,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 {
 	void __iomem *base;
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int ret;
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -431,7 +431,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 
 	icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
 
-	host = ide_host_alloc(&icside_v5_port_info, hws);
+	host = ide_host_alloc(&icside_v5_port_info, hws, 1);
 	if (host == NULL)
 		return -ENODEV;
 
@@ -467,7 +467,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	struct ide_host *host;
 	unsigned int sel = 0;
 	int ret;
-	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1] };
 	struct ide_port_info d = icside_v6_port_info;
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
@@ -507,7 +507,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
 	icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
 
-	host = ide_host_alloc(&d, hws);
+	host = ide_host_alloc(&d, hws, 2);
 	if (host == NULL)
 		return -ENODEV;
 
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 617ca7a5ec8..189b8bd9957 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -31,7 +31,7 @@ static const struct ide_port_info ide_4drives_port_info = {
 static int __init ide_4drives_init(void)
 {
 	unsigned long base = 0x1f0, ctl = 0x3f6;
-	hw_regs_t hw, *hws[] = { &hw, &hw, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw, &hw };
 
 	if (probe_4drives == 0)
 		return -ENODEV;
@@ -54,7 +54,7 @@ static int __init ide_4drives_init(void)
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = 14;
 
-	return ide_host_add(&ide_4drives_port_info, hws, NULL);
+	return ide_host_add(&ide_4drives_port_info, hws, 2, NULL);
 }
 
 module_init(ide_4drives_init);
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 43d09dcae28..63309ad04cb 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -164,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     struct ide_host *host;
     ide_hwif_t *hwif;
     int i, rc;
-    hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+    hw_regs_t hw, *hws[] = { &hw };
 
     if (!request_region(io, 8, DRV_NAME)) {
 	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
@@ -184,7 +184,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     hw.irq = irq;
     hw.dev = &handle->dev;
 
-    rc = ide_host_add(&idecs_port_info, hws, &host);
+    rc = ide_host_add(&idecs_port_info, hws, 1, &host);
     if (rc)
 	goto out_release;
 
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 0427759d018..0d40848540d 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -86,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
 
 static int __init ide_generic_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	unsigned long io_addr;
 	int i, rc = 0, primary = 0, secondary = 0;
 
@@ -133,7 +133,7 @@ static int __init ide_generic_init(void)
 #else
 			hw.irq = legacy_irqs[i];
 #endif
-			rc = ide_host_add(&ide_generic_port_info, hws, NULL);
+			rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL);
 			if (rc) {
 				release_region(io_addr + 0x206, 1);
 				release_region(io_addr, 8);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index 40eff6c9759..0b5fabe2806 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -83,7 +83,7 @@ static const struct ide_port_info h8300_port_info = {
 
 static int __init h8300_ide_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
 
@@ -96,7 +96,7 @@ static int __init h8300_ide_init(void)
 
 	hw_setup(&hw);
 
-	return ide_host_add(&h8300_port_info, hws, NULL);
+	return ide_host_add(&h8300_port_info, hws, 1, NULL);
 
 out_busy:
 	printk(KERN_ERR "ide-h8300: IDE I/F resource already used.\n");
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 0c5b29c56cb..98389e53990 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -40,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 
 int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 {
-	hw_regs_t hw[2], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL };
 
 	memset(&hw, 0, sizeof(hw));
 
@@ -52,6 +52,6 @@ int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 	    (d->host_flags & IDE_HFLAG_SINGLE))
 		return -ENOENT;
 
-	return ide_host_add(d, hws, NULL);
+	return ide_host_add(d, hws, 2, NULL);
 }
 EXPORT_SYMBOL_GPL(ide_legacy_device_add);
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 47043fda239..6bca0f05ee9 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -37,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	struct ide_host *host;
 	unsigned long base, ctl;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
 
@@ -64,7 +64,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	ide_std_init_ports(&hw, base, ctl);
 	hw.irq = pnp_irq(dev, 0);
 
-	rc = ide_host_add(&ide_pnp_port_info, hws, &host);
+	rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host);
 	if (rc)
 		goto out;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f17ba1932ad..6c7451a6e60 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1261,7 +1261,8 @@ out_nomem:
 	return -ENOMEM;
 }
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
+struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws,
+				unsigned int n_ports)
 {
 	struct ide_host *host;
 	struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1272,7 +1273,7 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
 	if (host == NULL)
 		return NULL;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
+	for (i = 0; i < n_ports; i++) {
 		ide_hwif_t *hwif;
 		int idx;
 
@@ -1443,12 +1444,12 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 EXPORT_SYMBOL_GPL(ide_host_register);
 
 int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
-		 struct ide_host **hostp)
+		 unsigned int n_ports, struct ide_host **hostp)
 {
 	struct ide_host *host;
 	int rc;
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, n_ports);
 	if (host == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 813653362a2..47413c2b5f8 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -54,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	struct pata_platform_info *pdata;
 	struct ide_host *host;
 	int ret = 0, mmio = 0;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_port_info d = platform_ide_port_info;
 
 	pdata = pdev->dev.platform_data;
@@ -98,7 +98,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	if (mmio)
 		d.host_flags |= IDE_HFLAG_MMIO;
 
-	ret = ide_host_add(&d, hws, &host);
+	ret = ide_host_add(&d, hws, 1, &host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 3af9e96da61..31aa2781860 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -96,7 +96,7 @@ static int __init macide_init(void)
 	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
@@ -126,7 +126,7 @@ static int __init macide_init(void)
 
 	macide_setup_ports(&hw, base, irq, ack_intr);
 
-	return ide_host_add(&macide_port_info, hws, NULL);
+	return ide_host_add(&macide_port_info, hws, 1, NULL);
 }
 
 module_init(macide_init);
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index a455c25f43c..4507a6d801b 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -316,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 	void __iomem *base;
 	unsigned long rate, mem_size;
 	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
@@ -369,7 +369,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 							     ATA_UDMA5;
 
 	/* Register the IDE interface with Linux */
-	rc = ide_host_add(&palm_bk3710_port_info, hws, NULL);
+	rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL);
 	if (rc)
 		goto out;
 
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index f76e4e6b408..f4f806476e0 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1029,7 +1029,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 	const int *bidp;
 	struct ide_host *host;
 	ide_hwif_t *hwif;
-	hw_regs_t *hws[] = { hw, NULL, NULL, NULL };
+	hw_regs_t *hws[] = { hw };
 	struct ide_port_info d = pmac_port_info;
 	int rc;
 
@@ -1077,7 +1077,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 	/* Make sure we have sane timings */
 	sanitize_timings(pmif);
 
-	host = ide_host_alloc(&d, hws);
+	host = ide_host_alloc(&d, hws, 1);
 	if (host == NULL)
 		return -ENOMEM;
 	hwif = host->ports[0];
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index 7488d4ff3d7..e46229fe5ea 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -135,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
 static int __init q40ide_init(void)
 {
     int i;
-    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL, NULL, NULL };
+    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
 
     if (!MACH_IS_Q40)
       return -ENODEV;
@@ -162,7 +162,7 @@ static int __init q40ide_init(void)
 	hws[i] = &hw[i];
     }
 
-    return ide_host_add(&q40ide_port_info, hws, NULL);
+    return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL);
 }
 
 module_init(q40ide_init);
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index bd4d7a8a666..c4da3dd39f5 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -36,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	void __iomem *base;
 	struct ide_host *host;
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
@@ -52,7 +52,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
 	hw.dev = &ec->dev;
 
-	ret = ide_host_add(&rapide_port_info, hws, &host);
+	ret = ide_host_add(&rapide_port_info, hws, 1, &host);
 	if (ret)
 		goto release;
 
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 9e3aef31733..9415f8c8a41 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 {
 	struct scc_ports *ports = pci_get_drvdata(dev);
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int i, rc;
 
 	memset(&hw, 0, sizeof(hw));
@@ -568,7 +568,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 	hw.irq = dev->irq;
 	hw.dev = &dev->dev;
 
-	rc = ide_host_add(d, hws, &host);
+	rc = ide_host_add(d, hws, 1, &host);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 82519ddc910..d78f4c99451 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -538,7 +538,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 		     void *priv)
 {
 	struct ide_host *host;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	hw_regs_t hw[2], *hws[] = { NULL, NULL };
 	int ret;
 
 	ret = ide_setup_pci_controller(dev, d, 1);
@@ -547,7 +547,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 
 	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, 2);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
@@ -596,7 +596,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 		ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
 	}
 
-	host = ide_host_alloc(d, hws);
+	host = ide_host_alloc(d, hws, 4);
 	if (host == NULL) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index 676d41c7add..3f8ee357ffb 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	unsigned long cmd_base, irqport;
 	unsigned long bar0, cmd_phys_base, ctl;
 	void __iomem *virt_base;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	int rc;
 
 	/*  Get the CmdBlk and CtrlBlk Base Registers */
@@ -580,7 +580,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	/* Initializing chipset IRQ Registers */
 	writel(0x03, (void __iomem *)(irqport + IOC4_INTR_SET * 4));
 
-	rc = ide_host_add(&sgiioc4_port_info, hws, NULL);
+	rc = ide_host_add(&sgiioc4_port_info, hws, 1, NULL);
 	if (!rc)
 		return 0;
 
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index e33d764e294..16adc18499f 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -130,8 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
@@ -183,7 +182,7 @@ static int __init tx4938ide_probe(struct platform_device *pdev)
 		tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0);
 	else
 		d.port_ops = NULL;
-	ret = ide_host_add(&d, hws, &host);
+	ret = ide_host_add(&d, hws, 1, &host);
 	if (!ret)
 		platform_set_drvdata(pdev, host);
 	return ret;
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 564422d2397..fa57920d003 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -537,8 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw;
-	hw_regs_t *hws[] = { &hw, NULL, NULL, NULL };
+	hw_regs_t hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	int irq, ret;
@@ -581,7 +580,7 @@ static int __init tx4939ide_probe(struct platform_device *pdev)
 	hw.dev = &pdev->dev;
 
 	pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq);
-	host = ide_host_alloc(&tx4939ide_port_info, hws);
+	host = ide_host_alloc(&tx4939ide_port_info, hws, 1);
 	if (!host)
 		return -ENOMEM;
 	/* use extra_base for base address of the all registers */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9652edbd26a..a3cd568553d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1456,11 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *);
 void ide_port_apply_params(ide_hwif_t *);
 int ide_sysfs_register_port(ide_hwif_t *);
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
+struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **,
+				unsigned int);
 void ide_host_free(struct ide_host *);
 int ide_host_register(struct ide_host *, const struct ide_port_info *,
 		      hw_regs_t **);
-int ide_host_add(const struct ide_port_info *, hw_regs_t **,
+int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int,
 		 struct ide_host **);
 void ide_host_remove(struct ide_host *);
 int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
-- 
cgit v1.2.3-70-g09d2


From 9f36d31437922354d104a2db407f397e79e4027e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 17 May 2009 19:12:25 +0200
Subject: ide: remove hw_regs_t typedef

Remove hw_regs_t typedef and rename struct hw_regs_s to struct ide_hw.

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/at91_ide.c     |  2 +-
 drivers/ide/au1xxx-ide.c   |  4 ++--
 drivers/ide/buddha.c       |  4 ++--
 drivers/ide/cmd640.c       |  2 +-
 drivers/ide/cs5520.c       |  2 +-
 drivers/ide/delkin_cb.c    |  2 +-
 drivers/ide/falconide.c    |  4 ++--
 drivers/ide/gayle.c        |  4 ++--
 drivers/ide/icside.c       |  6 +++---
 drivers/ide/ide-4drives.c  |  2 +-
 drivers/ide/ide-cs.c       |  2 +-
 drivers/ide/ide-generic.c  |  2 +-
 drivers/ide/ide-h8300.c    |  6 +++---
 drivers/ide/ide-legacy.c   |  4 ++--
 drivers/ide/ide-pnp.c      |  2 +-
 drivers/ide/ide-probe.c    | 10 +++++-----
 drivers/ide/ide_platform.c |  4 ++--
 drivers/ide/macide.c       |  4 ++--
 drivers/ide/palm_bk3710.c  |  2 +-
 drivers/ide/pmac.c         | 11 ++++++-----
 drivers/ide/q40ide.c       |  6 +++---
 drivers/ide/rapide.c       |  4 ++--
 drivers/ide/scc_pata.c     |  2 +-
 drivers/ide/setup-pci.c    | 16 ++++++++--------
 drivers/ide/sgiioc4.c      |  4 ++--
 drivers/ide/tx4938ide.c    |  2 +-
 drivers/ide/tx4939ide.c    |  2 +-
 include/linux/ide.h        | 14 +++++++-------
 28 files changed, 65 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 11fe1ffdff7..fc0949a8cfd 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -247,7 +247,7 @@ irqreturn_t at91_irq_handler(int irq, void *dev_id)
 static int __init at91_ide_probe(struct platform_device *pdev)
 {
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	unsigned long tf_base = 0, ctl_base = 0;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 32f5be68601..58121bd6c11 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -449,7 +449,7 @@ static int auide_ddma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 }
 #endif
 
-static void auide_setup_ports(hw_regs_t *hw, _auide_hwif *ahwif)
+static void auide_setup_ports(struct ide_hw *hw, _auide_hwif *ahwif)
 {
 	int i;
 	unsigned long *ata_regs = hw->io_ports_array;
@@ -508,7 +508,7 @@ static int au_ide_probe(struct platform_device *dev)
 	struct resource *res;
 	struct ide_host *host;
 	int ret = 0;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index 0450652cdab..e3c6a591330 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -121,7 +121,7 @@ static int xsurf_ack_intr(ide_hwif_t *hwif)
     return 1;
 }
 
-static void __init buddha_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
 				      unsigned long ctl, unsigned long irq_port,
 				      ide_ack_intr_t *ack_intr)
 {
@@ -160,7 +160,7 @@ static int __init buddha_init(void)
 
 	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
 		unsigned long board;
-		hw_regs_t hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
+		struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
 
 		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
 			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index edb3a7a35c8..1683ed5c732 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -708,7 +708,7 @@ static int __init cmd640x_init(void)
 	int second_port_cmd640 = 0, rc;
 	const char *bus_type, *port2;
 	u8 b, cfr;
-	hw_regs_t hw[2], *hws[2];
+	struct ide_hw hw[2], *hws[2];
 
 	if (cmd640_vlb && probe_for_cmd640_vlb()) {
 		bus_type = "VLB";
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index a9023d7843f..bd066bb9d61 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -110,7 +110,7 @@ static const struct ide_port_info cyrix_chipset __devinitdata = {
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	const struct ide_port_info *d = &cyrix_chipset;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 
 	ide_setup_pci_noise(dev, d);
 
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
index d4a76f22ed1..1e10eba62ce 100644
--- a/drivers/ide/delkin_cb.c
+++ b/drivers/ide/delkin_cb.c
@@ -77,7 +77,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	struct ide_host *host;
 	unsigned long base;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	rc = pci_enable_device(dev);
 	if (rc) {
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index adb5b0cf762..22fa27389c3 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -114,7 +114,7 @@ static const struct ide_port_info falconide_port_info = {
 	.chipset		= ide_generic,
 };
 
-static void __init falconide_setup_ports(hw_regs_t *hw)
+static void __init falconide_setup_ports(struct ide_hw *hw)
 {
 	int i;
 
@@ -138,7 +138,7 @@ static void __init falconide_setup_ports(hw_regs_t *hw)
 static int __init falconide_init(void)
 {
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int rc;
 
 	if (!MACH_IS_ATARI || !ATARIHW_PRESENT(IDE))
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index 253ff34afd8..4451a6a5dfe 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -88,7 +88,7 @@ static int gayle_ack_intr_a1200(ide_hwif_t *hwif)
     return 1;
 }
 
-static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
 				     unsigned long ctl, unsigned long irq_port,
 				     ide_ack_intr_t *ack_intr)
 {
@@ -125,7 +125,7 @@ static int __init gayle_init(void)
     unsigned long base, ctrlport, irqport;
     ide_ack_intr_t *ack_intr;
     int a4000, i, rc;
-    hw_regs_t hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
+    struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
 
     if (!MACH_IS_AMIGA)
 	return -ENODEV;
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 6223b80beb3..c5269fa1f73 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -381,7 +381,7 @@ static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d)
 	return -EOPNOTSUPP;
 }
 
-static void icside_setup_ports(hw_regs_t *hw, void __iomem *base,
+static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
 			       struct cardinfo *info, struct expansion_card *ec)
 {
 	unsigned long port = (unsigned long)base + info->dataoffset;
@@ -410,7 +410,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 {
 	void __iomem *base;
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int ret;
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
@@ -467,7 +467,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	struct ide_host *host;
 	unsigned int sel = 0;
 	int ret;
-	hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1] };
+	struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] };
 	struct ide_port_info d = icside_v6_port_info;
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 189b8bd9957..979d342c338 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -31,7 +31,7 @@ static const struct ide_port_info ide_4drives_port_info = {
 static int __init ide_4drives_init(void)
 {
 	unsigned long base = 0x1f0, ctl = 0x3f6;
-	hw_regs_t hw, *hws[] = { &hw, &hw };
+	struct ide_hw hw, *hws[] = { &hw, &hw };
 
 	if (probe_4drives == 0)
 		return -ENODEV;
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index 63309ad04cb..527908ff298 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -164,7 +164,7 @@ static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
     struct ide_host *host;
     ide_hwif_t *hwif;
     int i, rc;
-    hw_regs_t hw, *hws[] = { &hw };
+    struct ide_hw hw, *hws[] = { &hw };
 
     if (!request_region(io, 8, DRV_NAME)) {
 	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
index 0d40848540d..54d7c4685d2 100644
--- a/drivers/ide/ide-generic.c
+++ b/drivers/ide/ide-generic.c
@@ -86,7 +86,7 @@ static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
 
 static int __init ide_generic_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	unsigned long io_addr;
 	int i, rc = 0, primary = 0, secondary = 0;
 
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index 0b5fabe2806..520f42c5445 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -64,11 +64,11 @@ static const struct ide_tp_ops h8300_tp_ops = {
 
 #define H8300_IDE_GAP (2)
 
-static inline void hw_setup(hw_regs_t *hw)
+static inline void hw_setup(struct ide_hw *hw)
 {
 	int i;
 
-	memset(hw, 0, sizeof(hw_regs_t));
+	memset(hw, 0, sizeof(*hw));
 	for (i = 0; i <= 7; i++)
 		hw->io_ports_array[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports.ctl_addr = CONFIG_H8300_IDE_ALT;
@@ -83,7 +83,7 @@ static const struct ide_port_info h8300_port_info = {
 
 static int __init h8300_ide_init(void)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": H8/300 generic IDE interface\n");
 
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
index 98389e53990..b9654a7bb7b 100644
--- a/drivers/ide/ide-legacy.c
+++ b/drivers/ide/ide-legacy.c
@@ -1,7 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 
-static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
+static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
 				u8 port_no, const struct ide_port_info *d,
 				unsigned long config)
 {
@@ -40,7 +40,7 @@ static void ide_legacy_init_one(hw_regs_t **hws, hw_regs_t *hw,
 
 int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
 {
-	hw_regs_t hw[2], *hws[] = { NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 
 	memset(&hw, 0, sizeof(hw));
 
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 6bca0f05ee9..017b1df3b80 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -37,7 +37,7 @@ static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 	struct ide_host *host;
 	unsigned long base, ctl;
 	int rc;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 6c7451a6e60..29363829a3f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1173,7 +1173,7 @@ static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
 	ide_port_init_devices_data(hwif);
 }
 
-static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
+static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
 {
 	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
@@ -1261,8 +1261,8 @@ out_nomem:
 	return -ENOMEM;
 }
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws,
-				unsigned int n_ports)
+struct ide_host *ide_host_alloc(const struct ide_port_info *d,
+				struct ide_hw **hws, unsigned int n_ports)
 {
 	struct ide_host *host;
 	struct device *dev = hws[0] ? hws[0]->dev : NULL;
@@ -1349,7 +1349,7 @@ static void ide_disable_port(ide_hwif_t *hwif)
 }
 
 int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
-		      hw_regs_t **hws)
+		      struct ide_hw **hws)
 {
 	ide_hwif_t *hwif, *mate = NULL;
 	int i, j = 0;
@@ -1443,7 +1443,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 }
 EXPORT_SYMBOL_GPL(ide_host_register);
 
-int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
+int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws,
 		 unsigned int n_ports, struct ide_host **hostp)
 {
 	struct ide_host *host;
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 47413c2b5f8..ee9b55ecc62 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -21,7 +21,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
-static void __devinit plat_ide_setup_ports(hw_regs_t *hw,
+static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
 					   void __iomem *base,
 					   void __iomem *ctrl,
 					   struct pata_platform_info *pdata,
@@ -54,7 +54,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	struct pata_platform_info *pdata;
 	struct ide_host *host;
 	int ret = 0, mmio = 0;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_port_info d = platform_ide_port_info;
 
 	pdata = pdev->dev.platform_data;
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 31aa2781860..1447c8c9056 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -62,7 +62,7 @@ int macide_ack_intr(ide_hwif_t* hwif)
 	return 0;
 }
 
-static void __init macide_setup_ports(hw_regs_t *hw, unsigned long base,
+static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
 				      int irq, ide_ack_intr_t *ack_intr)
 {
 	int i;
@@ -96,7 +96,7 @@ static int __init macide_init(void)
 	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 4507a6d801b..3c1dc015215 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -316,7 +316,7 @@ static int __init palm_bk3710_probe(struct platform_device *pdev)
 	void __iomem *base;
 	unsigned long rate, mem_size;
 	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	clk = clk_get(&pdev->dev, "IDECLK");
 	if (IS_ERR(clk))
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index f4f806476e0..97642a7a79c 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1023,13 +1023,14 @@ static const struct ide_port_info pmac_port_info = {
  * Setup, register & probe an IDE channel driven by this driver, this is
  * called by one of the 2 probe functions (macio or PCI).
  */
-static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
+static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif,
+					   struct ide_hw *hw)
 {
 	struct device_node *np = pmif->node;
 	const int *bidp;
 	struct ide_host *host;
 	ide_hwif_t *hwif;
-	hw_regs_t *hws[] = { hw };
+	struct ide_hw *hws[] = { hw };
 	struct ide_port_info d = pmac_port_info;
 	int rc;
 
@@ -1124,7 +1125,7 @@ static int __devinit pmac_ide_setup_device(pmac_ide_hwif_t *pmif, hw_regs_t *hw)
 	return 0;
 }
 
-static void __devinit pmac_ide_init_ports(hw_regs_t *hw, unsigned long base)
+static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
 {
 	int i;
 
@@ -1144,7 +1145,7 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match)
 	unsigned long regbase;
 	pmac_ide_hwif_t *pmif;
 	int irq, rc;
-	hw_regs_t hw;
+	struct ide_hw hw;
 
 	pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
 	if (pmif == NULL)
@@ -1268,7 +1269,7 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	void __iomem *base;
 	unsigned long rbase, rlen;
 	int rc;
-	hw_regs_t hw;
+	struct ide_hw hw;
 
 	np = pci_device_to_OF_node(pdev);
 	if (np == NULL) {
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index e46229fe5ea..ab49a97023d 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -51,11 +51,11 @@ static int q40ide_default_irq(unsigned long base)
 /*
  * Addresses are pretranslated for Q40 ISA access.
  */
-static void q40_ide_setup_ports(hw_regs_t *hw, unsigned long base,
+static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
 			ide_ack_intr_t *ack_intr,
 			int irq)
 {
-	memset(hw, 0, sizeof(hw_regs_t));
+	memset(hw, 0, sizeof(*hw));
 	/* BIG FAT WARNING: 
 	   assumption: only DATA port is ever used in 16 bit mode */
 	hw->io_ports.data_addr = Q40_ISA_IO_W(base);
@@ -135,7 +135,7 @@ static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
 static int __init q40ide_init(void)
 {
     int i;
-    hw_regs_t hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
+    struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
 
     if (!MACH_IS_Q40)
       return -ENODEV;
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
index c4da3dd39f5..00f54248f41 100644
--- a/drivers/ide/rapide.c
+++ b/drivers/ide/rapide.c
@@ -16,7 +16,7 @@ static const struct ide_port_info rapide_port_info = {
 	.chipset		= ide_generic,
 };
 
-static void rapide_setup_ports(hw_regs_t *hw, void __iomem *base,
+static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base,
 			       void __iomem *ctrl, unsigned int sz, int irq)
 {
 	unsigned long port = (unsigned long)base;
@@ -36,7 +36,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	void __iomem *base;
 	struct ide_host *host;
 	int ret;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 9415f8c8a41..1104bb301eb 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -559,7 +559,7 @@ static int scc_ide_setup_pci_device(struct pci_dev *dev,
 {
 	struct scc_ports *ports = pci_get_drvdata(dev);
 	struct ide_host *host;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int i, rc;
 
 	memset(&hw, 0, sizeof(hw));
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index d78f4c99451..5314edffc30 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -301,11 +301,11 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
 }
 
 /**
- *	ide_hw_configure	-	configure a hw_regs_t instance
+ *	ide_hw_configure	-	configure a struct ide_hw instance
  *	@dev: PCI device holding interface
  *	@d: IDE port info
  *	@port: port number
- *	@hw: hw_regs_t instance corresponding to this port
+ *	@hw: struct ide_hw instance corresponding to this port
  *
  *	Perform the initial set up for the hardware interface structure. This
  *	is done per interface port rather than per PCI device. There may be
@@ -315,7 +315,7 @@ static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *
  */
 
 static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
-			    unsigned int port, hw_regs_t *hw)
+			    unsigned int port, struct ide_hw *hw)
 {
 	unsigned long ctl = 0, base = 0;
 
@@ -445,8 +445,8 @@ out:
  *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
  *	@dev: PCI device
  *	@d: IDE port info
- *	@hw: hw_regs_t instances corresponding to this PCI IDE device
- *	@hws: hw_regs_t pointers table to update
+ *	@hw: struct ide_hw instances corresponding to this PCI IDE device
+ *	@hws: struct ide_hw pointers table to update
  *
  *	Scan the interfaces attached to this device and do any
  *	necessary per port setup. Attach the devices and ask the
@@ -458,7 +458,7 @@ out:
  */
 
 void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
-			 hw_regs_t *hw, hw_regs_t **hws)
+			 struct ide_hw *hw, struct ide_hw **hws)
 {
 	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
 	u8 tmp;
@@ -538,7 +538,7 @@ int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
 		     void *priv)
 {
 	struct ide_host *host;
-	hw_regs_t hw[2], *hws[] = { NULL, NULL };
+	struct ide_hw hw[2], *hws[] = { NULL, NULL };
 	int ret;
 
 	ret = ide_setup_pci_controller(dev, d, 1);
@@ -586,7 +586,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
 	struct pci_dev *pdev[] = { dev1, dev2 };
 	struct ide_host *host;
 	int ret, i;
-	hw_regs_t hw[4], *hws[] = { NULL, NULL, NULL, NULL };
+	struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
 
 	for (i = 0; i < 2; i++) {
 		ret = ide_setup_pci_controller(pdev[i], d, !i);
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index 3f8ee357ffb..5f37f168f94 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -91,7 +91,7 @@ typedef struct {
 
 
 static void
-sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
+sgiioc4_init_hwif_ports(struct ide_hw *hw, unsigned long data_port,
 			unsigned long ctrl_port, unsigned long irq_port)
 {
 	unsigned long reg = data_port;
@@ -546,7 +546,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	unsigned long cmd_base, irqport;
 	unsigned long bar0, cmd_phys_base, ctl;
 	void __iomem *virt_base;
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	int rc;
 
 	/*  Get the CmdBlk and CtrlBlk Base Registers */
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index 16adc18499f..ea89fddeed9 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -130,7 +130,7 @@ static const struct ide_port_info tx4938ide_port_info __initdata = {
 
 static int __init tx4938ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	struct tx4938ide_platform_info *pdata = pdev->dev.platform_data;
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index fa57920d003..9f73fd43d1f 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -537,7 +537,7 @@ static const struct ide_port_info tx4939ide_port_info __initdata = {
 
 static int __init tx4939ide_probe(struct platform_device *pdev)
 {
-	hw_regs_t hw, *hws[] = { &hw };
+	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_host *host;
 	struct resource *res;
 	int irq, ret;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a3cd568553d..b1b903a0dac 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -178,7 +178,7 @@ typedef u8 hwif_chipset_t;
 /*
  * Structure to hold all information about the location of this port
  */
-typedef struct hw_regs_s {
+struct ide_hw {
 	union {
 		struct ide_io_ports	io_ports;
 		unsigned long		io_ports_array[IDE_NR_PORTS];
@@ -188,9 +188,9 @@ typedef struct hw_regs_s {
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
 	struct device	*dev, *parent;
 	unsigned long	config;
-} hw_regs_t;
+};
 
-static inline void ide_std_init_ports(hw_regs_t *hw,
+static inline void ide_std_init_ports(struct ide_hw *hw,
 				      unsigned long io_addr,
 				      unsigned long ctl_addr)
 {
@@ -1212,7 +1212,7 @@ static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev)
 }
 
 void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *,
-			 hw_regs_t *, hw_regs_t **);
+			 struct ide_hw *, struct ide_hw **);
 void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
@@ -1456,12 +1456,12 @@ void ide_undecoded_slave(ide_drive_t *);
 void ide_port_apply_params(ide_hwif_t *);
 int ide_sysfs_register_port(ide_hwif_t *);
 
-struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **,
+struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **,
 				unsigned int);
 void ide_host_free(struct ide_host *);
 int ide_host_register(struct ide_host *, const struct ide_port_info *,
-		      hw_regs_t **);
-int ide_host_add(const struct ide_port_info *, hw_regs_t **, unsigned int,
+		      struct ide_hw **);
+int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int,
 		 struct ide_host **);
 void ide_host_remove(struct ide_host *);
 int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
-- 
cgit v1.2.3-70-g09d2


From 8b3521eeb7598c3b10c7e14361a7974464527702 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Mon, 11 May 2009 05:52:49 +0000
Subject: ipv4: remove an unused parameter from configure method of
 fib_rules_ops.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 1 -
 net/core/fib_rules.c    | 2 +-
 net/decnet/dn_rules.c   | 2 +-
 net/ipv4/fib_rules.c    | 2 +-
 net/ipv6/fib6_rules.c   | 2 +-
 5 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c2bb5cae651..b9b63395d00 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -48,7 +48,6 @@ struct fib_rules_ops
 					 struct flowi *, int);
 	int			(*configure)(struct fib_rule *,
 					     struct sk_buff *,
-					     struct nlmsghdr *,
 					     struct fib_rule_hdr *,
 					     struct nlattr **);
 	int			(*compare)(struct fib_rule *,
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98691e1466b..17d9f497b79 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,7 +299,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	} else if (rule->action == FR_ACT_GOTO)
 		goto errout_free;
 
-	err = ops->configure(rule, skb, nlh, frh, tb);
+	err = ops->configure(rule, skb, frh, tb);
 	if (err < 0)
 		goto errout_free;
 
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 14fbca55e90..a2690b12e03 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -115,7 +115,7 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 }
 
 static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
-				 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+				 struct fib_rule_hdr *frh,
 				 struct nlattr **tb)
 {
 	int err = -EINVAL;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6080d712082..38904be4102 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -134,7 +134,7 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 };
 
 static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
-			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct fib_rule_hdr *frh,
 			       struct nlattr **tb)
 {
 	struct net *net = sock_net(skb->sk);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index f5de3f9dc69..e1a36dbb5a2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -151,7 +151,7 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
 };
 
 static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
-			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct fib_rule_hdr *frh,
 			       struct nlattr **tb)
 {
 	int err = -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From 9d21493b4beb8f918ba248032fefa393074a5e2b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 17 May 2009 20:55:16 -0700
Subject: net: tx scalability works : trans_start

struct net_device trans_start field is a hot spot on SMP and high performance
devices, particularly multi queues ones, because every transmitter dirties
it. Is main use is tx watchdog and bonding alive checks.

But as most devices dont use NETIF_F_LLTX, we have to lock
a netdev_queue before calling their ndo_start_xmit(). So it makes
sense to move trans_start from net_device to netdev_queue. Its update
will occur on a already present (and in exclusive state) cache line, for
free.

We can do this transition smoothly. An old driver continue to
update dev->trans_start, while an updated one updates txq->trans_start.

Further patches could also put tx_bytes/tx_packets counters in
netdev_queue to avoid dirtying dev->stats (vlan device comes to mind)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  8 ++++----
 include/linux/netdevice.h       | 11 +++++++++++
 net/sched/sch_generic.c         | 40 +++++++++++++++++++++++++++++++---------
 3 files changed, 46 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 815191dd03c..30b9ea6d62b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2795,7 +2795,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 	 */
 	bond_for_each_slave(bond, slave, i) {
 		if (slave->link != BOND_LINK_UP) {
-			if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) &&
+			if (time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks) &&
 			    time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) {
 
 				slave->link  = BOND_LINK_UP;
@@ -2827,7 +2827,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 			 * when the source ip is 0, so don't take the link down
 			 * if we don't know our ip yet
 			 */
-			if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) ||
+			if (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2*delta_in_ticks) ||
 			    (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) {
 
 				slave->link  = BOND_LINK_DOWN;
@@ -2938,7 +2938,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
 		 *    the bond has an IP address)
 		 */
 		if ((slave->state == BOND_STATE_ACTIVE) &&
-		    (time_after_eq(jiffies, slave->dev->trans_start +
+		    (time_after_eq(jiffies, dev_trans_start(slave->dev) +
 				    2 * delta_in_ticks) ||
 		      (time_after_eq(jiffies, slave_last_rx(bond, slave)
 				     + 2 * delta_in_ticks)))) {
@@ -2982,7 +2982,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
 			write_lock_bh(&bond->curr_slave_lock);
 
 			if (!bond->curr_active_slave &&
-			    time_before_eq(jiffies, slave->dev->trans_start +
+			    time_before_eq(jiffies, dev_trans_start(slave->dev) +
 					   delta_in_ticks)) {
 				slave->link = BOND_LINK_UP;
 				bond_change_active_slave(bond, slave);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2af89b662ca..cd547d04a8c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -470,6 +470,10 @@ struct netdev_queue {
  */
 	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
 	int			xmit_lock_owner;
+	/*
+	 * please use this field instead of dev->trans_start
+	 */
+	unsigned long		trans_start;
 } ____cacheline_aligned_in_smp;
 
 
@@ -819,6 +823,11 @@ struct net_device
  * One part is mostly used on xmit path (device)
  */
 	/* These may be needed for future network-power-down code. */
+
+	/*
+	 * trans_start here is expensive for high speed devices on SMP,
+	 * please use netdev_queue->trans_start instead.
+	 */
 	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
 
 	int			watchdog_timeo; /* used by dev_watchdog() */
@@ -1541,6 +1550,8 @@ static inline int netif_carrier_ok(const struct net_device *dev)
 	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
 }
 
+extern unsigned long dev_trans_start(struct net_device *dev);
+
 extern void __netdev_watchdog_up(struct net_device *dev);
 
 extern void netif_carrier_on(struct net_device *dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5f5efe4e607..27d03816ec3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -196,6 +196,21 @@ void __qdisc_run(struct Qdisc *q)
 	clear_bit(__QDISC_STATE_RUNNING, &q->state);
 }
 
+unsigned long dev_trans_start(struct net_device *dev)
+{
+	unsigned long val, res = dev->trans_start;
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		val = netdev_get_tx_queue(dev, i)->trans_start;
+		if (val && time_after(val, res))
+			res = val;
+	}
+	dev->trans_start = res;
+	return res;
+}
+EXPORT_SYMBOL(dev_trans_start);
+
 static void dev_watchdog(unsigned long arg)
 {
 	struct net_device *dev = (struct net_device *)arg;
@@ -205,25 +220,30 @@ static void dev_watchdog(unsigned long arg)
 		if (netif_device_present(dev) &&
 		    netif_running(dev) &&
 		    netif_carrier_ok(dev)) {
-			int some_queue_stopped = 0;
+			int some_queue_timedout = 0;
 			unsigned int i;
+			unsigned long trans_start;
 
 			for (i = 0; i < dev->num_tx_queues; i++) {
 				struct netdev_queue *txq;
 
 				txq = netdev_get_tx_queue(dev, i);
-				if (netif_tx_queue_stopped(txq)) {
-					some_queue_stopped = 1;
+				/*
+				 * old device drivers set dev->trans_start
+				 */
+				trans_start = txq->trans_start ? : dev->trans_start;
+				if (netif_tx_queue_stopped(txq) &&
+				    time_after(jiffies, (trans_start +
+							 dev->watchdog_timeo))) {
+					some_queue_timedout = 1;
 					break;
 				}
 			}
 
-			if (some_queue_stopped &&
-			    time_after(jiffies, (dev->trans_start +
-						 dev->watchdog_timeo))) {
+			if (some_queue_timedout) {
 				char drivername[64];
-				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
-				       dev->name, netdev_drivername(dev, drivername, 64));
+				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
+				       dev->name, netdev_drivername(dev, drivername, 64), i);
 				dev->netdev_ops->ndo_tx_timeout(dev);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
@@ -602,8 +622,10 @@ static void transition_one_qdisc(struct net_device *dev,
 		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
 
 	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
-	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
+	if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
+		dev_queue->trans_start = 0;
 		*need_watchdog_p = 1;
+	}
 }
 
 void dev_activate(struct net_device *dev)
-- 
cgit v1.2.3-70-g09d2


From f2a3e626202a87734a47153935ec9d15c7fcf761 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 15 May 2009 06:04:12 +0000
Subject: mdio: Add 10GBASE-T SNR register definition

These do not have an in-kernel user but may be useful to user-space.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 26b4eb3bbee..825f1e2e2ec 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -46,6 +46,8 @@
 
 /* Media-dependent registers. */
 #define MDIO_PMA_10GBT_TXPWR	131	/* 10GBASE-T TX power control */
+#define MDIO_PMA_10GBT_SNR	133	/* 10GBASE-T SNR margin, lane A.
+					 * Lanes B-D are numbered 134-136. */
 #define MDIO_PMA_10GBR_FECABLE	170	/* 10GBASE-R FEC ability */
 #define MDIO_PCS_10GBX_STAT1	24	/* 10GBASE-X PCS status 1 */
 #define MDIO_PCS_10GBRT_STAT1	32	/* 10GBASE-R/-T PCS status 1 */
@@ -188,6 +190,11 @@
 /* PMA 10GBASE-T TX power register. */
 #define MDIO_PMA_10GBT_TXPWR_SHORT	0x0001	/* Short-reach mode */
 
+/* PMA 10GBASE-T SNR registers. */
+/* Value is SNR margin in dB, clamped to range [-127, 127], plus 0x8000. */
+#define MDIO_PMA_10GBT_SNR_BIAS		0x8000
+#define MDIO_PMA_10GBT_SNR_MAX		127
+
 /* PMA 10GBASE-R FEC ability register. */
 #define MDIO_PMA_10GBR_FECABLE_ABLE	0x0001	/* FEC ability */
 #define MDIO_PMA_10GBR_FECABLE_ERRABLE	0x0002	/* FEC error indic. ability */
-- 
cgit v1.2.3-70-g09d2


From e0b221bf4e07edf2fda645e457dc3c35c2f2f3a9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 15 May 2009 06:05:49 +0000
Subject: mdio: Add XENPAK LASI register definitions

These registers were originally defined for XENPAK modules, but are
also implemented by many other 10G PHYs.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 825f1e2e2ec..56851646529 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -55,6 +55,14 @@
 #define MDIO_AN_10GBT_CTRL	32	/* 10GBASE-T auto-negotiation control */
 #define MDIO_AN_10GBT_STAT	33	/* 10GBASE-T auto-negotiation status */
 
+/* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
+#define MDIO_PMA_LASI_RXCTRL	0x9000	/* RX_ALARM control */
+#define MDIO_PMA_LASI_TXCTRL	0x9001	/* TX_ALARM control */
+#define MDIO_PMA_LASI_CTRL	0x9002	/* LASI control */
+#define MDIO_PMA_LASI_RXSTAT	0x9003	/* RX_ALARM status */
+#define MDIO_PMA_LASI_TXSTAT	0x9004	/* TX_ALARM status */
+#define MDIO_PMA_LASI_STAT	0x9005	/* LASI status */
+
 /* Control register 1. */
 /* Enable extended speed selection */
 #define MDIO_CTRL1_SPEEDSELEXT		(BMCR_SPEED1000 | BMCR_SPEED100)
@@ -218,6 +226,26 @@
 #define MDIO_AN_10GBT_STAT_MS		0x4000	/* Master/slave config */
 #define MDIO_AN_10GBT_STAT_MSFLT	0x8000	/* Master/slave config fault */
 
+/* LASI RX_ALARM control/status registers. */
+#define MDIO_PMA_LASI_RX_PHYXSLFLT	0x0001	/* PHY XS RX local fault */
+#define MDIO_PMA_LASI_RX_PCSLFLT	0x0008	/* PCS RX local fault */
+#define MDIO_PMA_LASI_RX_PMALFLT	0x0010	/* PMA/PMD RX local fault */
+#define MDIO_PMA_LASI_RX_OPTICPOWERFLT	0x0020	/* RX optical power fault */
+#define MDIO_PMA_LASI_RX_WISLFLT	0x0200	/* WIS local fault */
+
+/* LASI TX_ALARM control/status registers. */
+#define MDIO_PMA_LASI_TX_PHYXSLFLT	0x0001	/* PHY XS TX local fault */
+#define MDIO_PMA_LASI_TX_PCSLFLT	0x0008	/* PCS TX local fault */
+#define MDIO_PMA_LASI_TX_PMALFLT	0x0010	/* PMA/PMD TX local fault */
+#define MDIO_PMA_LASI_TX_LASERPOWERFLT	0x0080	/* Laser output power fault */
+#define MDIO_PMA_LASI_TX_LASERTEMPFLT	0x0100	/* Laser temperature fault */
+#define MDIO_PMA_LASI_TX_LASERBICURRFLT	0x0200	/* Laser bias current fault */
+
+/* LASI control/status registers. */
+#define MDIO_PMA_LASI_LSALARM		0x0001	/* LS_ALARM enable/status */
+#define MDIO_PMA_LASI_TXALARM		0x0002	/* TX_ALARM enable/status */
+#define MDIO_PMA_LASI_RXALARM		0x0004	/* RX_ALARM enable/status */
+
 /* Mapping between MDIO PRTAD/DEVAD and mii_ioctl_data::phy_id */
 
 #define MDIO_PHY_ID_C45			0x8000
-- 
cgit v1.2.3-70-g09d2


From 184dd3459bb334d9061b58faed3610d08d6c7ff8 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Sun, 17 May 2009 12:33:28 +0000
Subject: fcoe: adds spma mode support

If we can find a type NETDEV_HW_ADDR_T_SAN mac address from the
corresponding netdev for a fcoe interface then sets up added the
fc->ctlr.spma flag and stores spma mode address in ctl_src_addr.

In case the spma flag is set then:-

 1. Adds spma mode MAC address in ctl_src_addr as secondary
    MAC address, the FLOGI for FIP and pre-FIP will go out
    using this address.
 2. Cleans up stored spma MAC address in ctl_src_addr in
    fcoe_netdev_cleanup.
 3. Sets up spma bit in fip_flags for FIP solicitations along
    with exiting FPMA bit setting.
 4. Initialize the FLOGI FIP MAC descriptor to stored spma
    MAC address in ctl_src_addr. This is used as proposed
    FCoE MAC address from initiator along with both SPMA
    and FPMA bit set in FIP solicitation, in response the
    switch may grant any FPMA or SPMA mode MAC address to
    initiator.

Removes FIP descriptor type checking against ELS type
ELS_FLOGI in fcoe_ctlr_encaps to update a FIP MAC descriptor,
instead now checks against FIP_DT_FLOGI.

I've tested this with available FPMA-only FCoE switch but
since data_src_addr is updated using same old code for
both FPMA and SPMA modes with FIP or pre-FIP links, so added
SPMA mode will work with SPMA-only switch also provided that
switch grants a valid MAC address.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/fcoe/fcoe.c    | 24 ++++++++++++++++++++++--
 drivers/scsi/fcoe/libfcoe.c | 10 +++++++++-
 include/scsi/libfcoe.h      |  2 ++
 3 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 04f500571d4..f2d16127bd0 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -238,6 +238,9 @@ void fcoe_netdev_cleanup(struct fcoe_softc *fc)
 	if (!is_zero_ether_addr(fc->ctlr.data_src_addr))
 		dev_unicast_delete(fc->real_dev,
 				   fc->ctlr.data_src_addr, ETH_ALEN);
+	if (fc->ctlr.spma)
+		dev_unicast_delete(fc->real_dev,
+				   fc->ctlr.ctl_src_addr, ETH_ALEN);
 	dev_mc_delete(fc->real_dev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 	rtnl_unlock();
 }
@@ -257,6 +260,7 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev)
 	u64 wwnn, wwpn;
 	struct fcoe_softc *fc;
 	u8 flogi_maddr[ETH_ALEN];
+	struct netdev_hw_addr *ha;
 
 	/* Setup lport private data to point to fcoe softc */
 	fc = lport_priv(lp);
@@ -313,9 +317,23 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev)
 	skb_queue_head_init(&fc->fcoe_pending_queue);
 	fc->fcoe_pending_queue_active = 0;
 
+	/* look for SAN MAC address, if multiple SAN MACs exist, only
+	 * use the first one for SPMA */
+	rcu_read_lock();
+	for_each_dev_addr(netdev, ha) {
+		if ((ha->type == NETDEV_HW_ADDR_T_SAN) &&
+		    (is_valid_ether_addr(fc->ctlr.ctl_src_addr))) {
+			memcpy(fc->ctlr.ctl_src_addr, ha->addr, ETH_ALEN);
+			fc->ctlr.spma = 1;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
 	/* setup Source Mac Address */
-	memcpy(fc->ctlr.ctl_src_addr, fc->real_dev->dev_addr,
-	       fc->real_dev->addr_len);
+	if (!fc->ctlr.spma)
+		memcpy(fc->ctlr.ctl_src_addr, fc->real_dev->dev_addr,
+		       fc->real_dev->addr_len);
 
 	wwnn = fcoe_wwn_from_mac(fc->real_dev->dev_addr, 1, 0);
 	fc_set_wwnn(lp, wwnn);
@@ -331,6 +349,8 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev)
 	rtnl_lock();
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
 	dev_unicast_add(fc->real_dev, flogi_maddr, ETH_ALEN);
+	if (fc->ctlr.spma)
+		dev_unicast_add(fc->real_dev, fc->ctlr.ctl_src_addr, ETH_ALEN);
 	rtnl_unlock();
 
 	/*
diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index f410f4abb54..444a06bdb70 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -198,6 +198,8 @@ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf)
 	sol->fip.fip_subcode = FIP_SC_SOL;
 	sol->fip.fip_dl_len = htons(sizeof(sol->desc) / FIP_BPW);
 	sol->fip.fip_flags = htons(FIP_FL_FPMA);
+	if (fip->spma)
+		sol->fip.fip_flags |= htons(FIP_FL_SPMA);
 
 	sol->desc.mac.fd_desc.fip_dtype = FIP_DT_MAC;
 	sol->desc.mac.fd_desc.fip_dlen = sizeof(sol->desc.mac) / FIP_BPW;
@@ -350,6 +352,8 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip, int ports, u8 *sa)
 	kal->fip.fip_dl_len = htons((sizeof(kal->mac) +
 				    ports * sizeof(*vn)) / FIP_BPW);
 	kal->fip.fip_flags = htons(FIP_FL_FPMA);
+	if (fip->spma)
+		kal->fip.fip_flags |= htons(FIP_FL_SPMA);
 
 	kal->mac.fd_desc.fip_dtype = FIP_DT_MAC;
 	kal->mac.fd_desc.fip_dlen = sizeof(kal->mac) / FIP_BPW;
@@ -413,6 +417,8 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip,
 	cap->fip.fip_subcode = FIP_SC_REQ;
 	cap->fip.fip_dl_len = htons((dlen + sizeof(*mac)) / FIP_BPW);
 	cap->fip.fip_flags = htons(FIP_FL_FPMA);
+	if (fip->spma)
+		cap->fip.fip_flags |= htons(FIP_FL_SPMA);
 
 	cap->encaps.fd_desc.fip_dtype = dtype;
 	cap->encaps.fd_desc.fip_dlen = dlen / FIP_BPW;
@@ -421,8 +427,10 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip,
 	memset(mac, 0, sizeof(mac));
 	mac->fd_desc.fip_dtype = FIP_DT_MAC;
 	mac->fd_desc.fip_dlen = sizeof(*mac) / FIP_BPW;
-	if (dtype != ELS_FLOGI)
+	if (dtype != FIP_DT_FLOGI)
 		memcpy(mac->fd_mac, fip->data_src_addr, ETH_ALEN);
+	else if (fip->spma)
+		memcpy(mac->fd_mac, fip->ctl_src_addr, ETH_ALEN);
 
 	skb->protocol = htons(ETH_P_802_3);
 	skb_reset_mac_header(skb);
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index 666cc131732..b2410605b74 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -73,6 +73,7 @@ enum fip_state {
  * @link:	current link status for libfc.
  * @last_link:	last link state reported to libfc.
  * @map_dest:	use the FC_MAP mode for destination MAC addresses.
+ * @spma:	supports SPMA server-provided MACs mode
  * @dest_addr:	MAC address of the selected FC forwarder.
  * @ctl_src_addr: the native MAC address of our local port.
  * @data_src_addr: the assigned MAC address for the local port after FLOGI.
@@ -104,6 +105,7 @@ struct fcoe_ctlr {
 	u8 link;
 	u8 last_link;
 	u8 map_dest;
+	u8 spma;
 	u8 dest_addr[ETH_ALEN];
 	u8 ctl_src_addr[ETH_ALEN];
 	u8 data_src_addr[ETH_ALEN];
-- 
cgit v1.2.3-70-g09d2


From 888a589f6be07d624e21e2174d98375e9f95911b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 15 May 2009 13:59:37 -0700
Subject: mm, x86: remove MEMORY_HOTPLUG_RESERVE related code

after:

 | commit b263295dbffd33b0fbff670720fa178c30e3392a
 | Author: Christoph Lameter <clameter@sgi.com>
 | Date:   Wed Jan 30 13:30:47 2008 +0100
 |
 |    x86: 64-bit, make sparsemem vmemmap the only memory model

we don't have MEMORY_HOTPLUG_RESERVE anymore.

Historically, x86-64 had an architecture-specific method for memory hotplug
whereby it scanned the SRAT for physical memory ranges that could be
potentially used for memory hot-add later. By reserving those ranges
without physical memory, the memmap would be allocated and left dormant
until needed. This depended on the DISCONTIG memory model which has been
removed so the code implementing HOTPLUG_RESERVE is now dead.

This patch removes the dead code used by MEMORY_HOTPLUG_RESERVE.

(Changelog authored by Mel.)

v2: updated changelog, and remove hotadd= in doc

[ Impact: remove dead code ]

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Workflow-found-OK-by: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4A0C4910.7090508@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/x86/x86_64/boot-options.txt |  5 ---
 arch/x86/include/asm/numa_64.h            |  3 --
 arch/x86/mm/numa_64.c                     |  5 ---
 arch/x86/mm/srat_64.c                     | 63 ++++++----------------------
 include/linux/mm.h                        |  2 -
 mm/page_alloc.c                           | 69 -------------------------------
 6 files changed, 12 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c13040a71..2db5893d6c9 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -150,11 +150,6 @@ NUMA
 		Otherwise, the remaining system RAM is allocated to an
 		additional node.
 
-  numa=hotadd=percent
-		Only allow hotadd memory to preallocate page structures upto
-		percent of already available memory.
-		numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off	Don't enable ACPI
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 064ed6df4cb..7feff0648d7 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index fb61d81a656..a6a93c39523 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -272,9 +272,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
 				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-	srat_reserve_add_area(nodeid);
-#endif
 	node_set_online(nodeid);
 }
 
@@ -591,8 +588,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
-	if (!strncmp(opt, "hotadd=", 7))
-		hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
 	return 0;
 }
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 87b45bff250..b0dbbd48e58 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -31,8 +31,6 @@ static nodemask_t nodes_parsed __initdata;
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
@@ -66,9 +64,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
 {
 	struct bootnode *nd = &nodes[i];
 
-	if (found_add_area)
-		return;
-
 	if (nd->start < start) {
 		nd->start = start;
 		if (nd->end < nd->start)
@@ -86,7 +81,6 @@ static __init void bad_srat(void)
 	int i;
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	found_add_area = 0;
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		apicid_to_node[i] = NUMA_NO_NODE;
 	for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +176,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
 	unsigned long s_pfn = start >> PAGE_SHIFT;
 	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int ret = 0, changed = 0;
+	int changed = 0;
 	struct bootnode *nd = &nodes_add[node];
 
 	/* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +201,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 	   mistakes */
 	if ((signed long)(end - start) < NODE_MIN_SIZE) {
 		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return -1;
+		return;
 	}
 
 	/* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +209,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 		printk(KERN_ERR
 			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
 			s_pfn, e_pfn);
-		return -1;
-	}
-
-	if (!hotadd_enough_memory(&nodes_add[node]))  {
-		printk(KERN_ERR "SRAT: Hotplug area too large\n");
-		return -1;
+		return;
 	}
 
 	/* Looks good */
@@ -245,11 +231,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
 			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
 	}
 
-	ret = update_end_of_memory(nd->end);
-
 	if (changed)
-	 	printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-	return ret;
+		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+				 nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +294,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	       start, end);
 	e820_register_active_regions(node, start >> PAGE_SHIFT,
 				     end >> PAGE_SHIFT);
-	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-						nd->end >> PAGE_SHIFT);
 
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-	    (reserve_hotadd(node, start, end) < 0)) {
-		/* Ignore hotadd region. Undo damage */
-		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+		update_nodes_add(node, start, end);
+		/* restore nodes[node] */
 		*nd = oldnode;
 		if ((nd->start | nd->end) == 0)
 			node_clear(node, nodes_parsed);
@@ -510,26 +491,6 @@ static int null_slit_node_compare(int a, int b)
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-	if (found_add_area && nodes_add[nodeid].end) {
-		u64 total_mb;
-
-		printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-				"for node %d at %Lx-%Lx\n",
-			nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-		total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-					>> PAGE_SHIFT;
-		total_mb *= sizeof(struct page);
-		total_mb >>= 20;
-		printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-				"pre-allocated memory.\n", (unsigned long long)total_mb);
-		reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-			       nodes_add[nodeid].end - nodes_add[nodeid].start,
-			       BOOTMEM_DEFAULT);
-	}
-}
-
 int __node_distance(int a, int b)
 {
 	int index;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d475c..511b0986709 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1031,8 +1031,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe753ecf2aa..474c7e9dd51 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3102,64 +3098,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
 				early_node_map[i].end_pfn);
 }
 
-/**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering push_node_boundaries(%u, %lu, %lu)\n",
-			nid, start_pfn, end_pfn);
-
-	/* Initialise the boundary for this node if necessary */
-	if (node_boundary_end_pfn[nid] == 0)
-		node_boundary_start_pfn[nid] = -1UL;
-
-	/* Update the boundaries */
-	if (node_boundary_start_pfn[nid] > start_pfn)
-		node_boundary_start_pfn[nid] = start_pfn;
-	if (node_boundary_end_pfn[nid] < end_pfn)
-		node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-			"Entering account_node_boundary(%u, %lu, %lu)\n",
-			nid, *start_pfn, *end_pfn);
-
-	/* Return if boundary information has not been provided */
-	if (node_boundary_end_pfn[nid] == 0)
-		return;
-
-	/* Check the boundaries and update if necessary */
-	if (node_boundary_start_pfn[nid] < *start_pfn)
-		*start_pfn = node_boundary_start_pfn[nid];
-	if (node_boundary_end_pfn[nid] > *end_pfn)
-		*end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-		unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-		unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
 /**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
 
 	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-
-	/* Push the node boundaries out if requested */
-	account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
 {
 	memset(early_node_map, 0, sizeof(early_node_map));
 	nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-	memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-	memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
-- 
cgit v1.2.3-70-g09d2


From 302b4215daa0a704c843da40fd2529e5757a72da Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Mon, 18 May 2009 13:51:32 +0800
Subject: PCI: support the ATS capability

The PCIe ATS capability makes the Endpoint be able to request the
DMA address translation from the IOMMU and cache the translation
in the device side, thus alleviate IOMMU pressure and improve the
hardware performance in the I/O virtualization environment.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/iov.c        | 105 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.h        |  37 +++++++++++++++++
 include/linux/pci.h      |   2 +
 include/linux/pci_regs.h |  10 +++++
 4 files changed, 154 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index b497daab3d4..0a7a1b40286 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -5,6 +5,7 @@
  *
  * PCI Express I/O Virtualization (IOV) support.
  *   Single Root IOV 1.0
+ *   Address Translation Service 1.0
  */
 
 #include <linux/pci.h>
@@ -679,3 +680,107 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev)
 	return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE;
 }
 EXPORT_SYMBOL_GPL(pci_sriov_migration);
+
+static int ats_alloc_one(struct pci_dev *dev, int ps)
+{
+	int pos;
+	u16 cap;
+	struct pci_ats *ats;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
+	if (!pos)
+		return -ENODEV;
+
+	ats = kzalloc(sizeof(*ats), GFP_KERNEL);
+	if (!ats)
+		return -ENOMEM;
+
+	ats->pos = pos;
+	ats->stu = ps;
+	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
+	ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
+					    PCI_ATS_MAX_QDEP;
+	dev->ats = ats;
+
+	return 0;
+}
+
+static void ats_free_one(struct pci_dev *dev)
+{
+	kfree(dev->ats);
+	dev->ats = NULL;
+}
+
+/**
+ * pci_enable_ats - enable the ATS capability
+ * @dev: the PCI device
+ * @ps: the IOMMU page shift
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	int rc;
+	u16 ctrl;
+
+	BUG_ON(dev->ats);
+
+	if (ps < PCI_ATS_MIN_STU)
+		return -EINVAL;
+
+	rc = ats_alloc_one(dev, ps);
+	if (rc)
+		return rc;
+
+	ctrl = PCI_ATS_CTRL_ENABLE;
+	ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU);
+	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
+
+	return 0;
+}
+
+/**
+ * pci_disable_ats - disable the ATS capability
+ * @dev: the PCI device
+ */
+void pci_disable_ats(struct pci_dev *dev)
+{
+	u16 ctrl;
+
+	BUG_ON(!dev->ats);
+
+	pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl);
+	ctrl &= ~PCI_ATS_CTRL_ENABLE;
+	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
+
+	ats_free_one(dev);
+}
+
+/**
+ * pci_ats_queue_depth - query the ATS Invalidate Queue Depth
+ * @dev: the PCI device
+ *
+ * Returns the queue depth on success, or negative on failure.
+ *
+ * The ATS spec uses 0 in the Invalidate Queue Depth field to
+ * indicate that the function can accept 32 Invalidate Request.
+ * But here we use the `real' values (i.e. 1~32) for the Queue
+ * Depth.
+ */
+int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	int pos;
+	u16 cap;
+
+	if (dev->ats)
+		return dev->ats->qdep;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
+	if (!pos)
+		return -ENODEV;
+
+	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
+
+	return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
+				       PCI_ATS_MAX_QDEP;
+}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d03f6b99f29..3c2ec64f78e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -229,6 +229,13 @@ struct pci_sriov {
 	u8 __iomem *mstate;	/* VF Migration State Array */
 };
 
+/* Address Translation Service */
+struct pci_ats {
+	int pos;	/* capability position */
+	int stu;	/* Smallest Translation Unit */
+	int qdep;	/* Invalidate Queue Depth */
+};
+
 #ifdef CONFIG_PCI_IOV
 extern int pci_iov_init(struct pci_dev *dev);
 extern void pci_iov_release(struct pci_dev *dev);
@@ -236,6 +243,20 @@ extern int pci_iov_resource_bar(struct pci_dev *dev, int resno,
 				enum pci_bar_type *type);
 extern void pci_restore_iov_state(struct pci_dev *dev);
 extern int pci_iov_bus_range(struct pci_bus *bus);
+
+extern int pci_enable_ats(struct pci_dev *dev, int ps);
+extern void pci_disable_ats(struct pci_dev *dev);
+extern int pci_ats_queue_depth(struct pci_dev *dev);
+/**
+ * pci_ats_enabled - query the ATS status
+ * @dev: the PCI device
+ *
+ * Returns 1 if ATS capability is enabled, or 0 if not.
+ */
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return !!dev->ats;
+}
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
@@ -257,6 +278,22 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
 {
 	return 0;
 }
+
+static inline int pci_enable_ats(struct pci_dev *dev, int ps)
+{
+	return -ENODEV;
+}
+static inline void pci_disable_ats(struct pci_dev *dev)
+{
+}
+static inline int pci_ats_queue_depth(struct pci_dev *dev)
+{
+	return -ENODEV;
+}
+static inline int pci_ats_enabled(struct pci_dev *dev)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_IOV */
 
 #endif /* DRIVERS_PCI_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 72698d89e76..bd3e4a798c4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -188,6 +188,7 @@ struct pci_cap_saved_state {
 struct pcie_link_state;
 struct pci_vpd;
 struct pci_sriov;
+struct pci_ats;
 
 /*
  * The pci_dev structure is used to describe PCI devices.
@@ -285,6 +286,7 @@ struct pci_dev {
 		struct pci_sriov *sriov;	/* SR-IOV capability related */
 		struct pci_dev *physfn;	/* the PF this VF is associated with */
 	};
+	struct pci_ats	*ats;	/* Address Translation Service */
 #endif
 };
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index e4d08c1b2e0..c03189c56c7 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -501,6 +501,7 @@
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
 #define PCI_EXT_CAP_ID_ARI	14
+#define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
 
 /* Advanced Error Reporting */
@@ -619,6 +620,15 @@
 #define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
 #define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
 
+/* Address Translation Service */
+#define PCI_ATS_CAP		0x04	/* ATS Capability Register */
+#define  PCI_ATS_CAP_QDEP(x)	((x) & 0x1f)	/* Invalidate Queue Depth */
+#define  PCI_ATS_MAX_QDEP	32	/* Max Invalidate Queue Depth */
+#define PCI_ATS_CTRL		0x06	/* ATS Control Register */
+#define  PCI_ATS_CTRL_ENABLE	0x8000	/* ATS Enable */
+#define  PCI_ATS_CTRL_STU(x)	((x) & 0x1f)	/* Smallest Translation Unit */
+#define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
+
 /* Single Root I/O Virtualization */
 #define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
 #define  PCI_SRIOV_CAP_VFM	0x01	/* VF Migration Capable */
-- 
cgit v1.2.3-70-g09d2


From 1cde26f928863d90e9e7c1217880c8450464d305 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 18 May 2009 14:41:30 +0200
Subject: virtio_blk: SG_IO passthru support

Add support for SG_IO passthru to virtio_blk.  We add the scsi command
block after the normal outhdr, and the scsi inhdr with full status
information aswell as the sense buffer before the regular inhdr.

[hch: forward ported, added the VIRTIO_BLK_F_SCSI flags, some comments
 and tested the whole beast]
[axboe: updated to use ->resid and not dual-path the byte count]

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ checkpatch.pl tweak)
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/virtio_blk.c | 64 ++++++++++++++++++++++++++++++++++++----------
 include/linux/virtio_blk.h |  8 ++++++
 2 files changed, 58 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dfe9ee5f169..62275dbdf2e 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -37,6 +37,7 @@ struct virtblk_req
 	struct list_head list;
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
+	struct virtio_scsi_inhdr in_hdr;
 	u8 status;
 };
 
@@ -49,7 +50,9 @@ static void blk_done(struct virtqueue *vq)
 
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
+		unsigned int nr_bytes;
 		int error;
+
 		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
 			error = 0;
@@ -62,6 +65,12 @@ static void blk_done(struct virtqueue *vq)
 			break;
 		}
 
+		if (blk_pc_request(vbr->req)) {
+			vbr->req->resid_len = vbr->in_hdr.residual;
+			vbr->req->sense_len = vbr->in_hdr.sense_len;
+			vbr->req->errors = vbr->in_hdr.errors;
+		}
+
 		__blk_end_request_all(vbr->req, error);
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
@@ -74,7 +83,7 @@ static void blk_done(struct virtqueue *vq)
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		   struct request *req)
 {
-	unsigned long num, out, in;
+	unsigned long num, out = 0, in = 0;
 	struct virtblk_req *vbr;
 
 	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
@@ -99,18 +108,36 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 	if (blk_barrier_rq(vbr->req))
 		vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
 
-	sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
-	num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
-	sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status));
+	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
 
-	if (rq_data_dir(vbr->req) == WRITE) {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
-		out = 1 + num;
-		in = 1;
-	} else {
-		vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
-		out = 1;
-		in = 1 + num;
+	/*
+	 * If this is a packet command we need a couple of additional headers.
+	 * Behind the normal outhdr we put a segment with the scsi command
+	 * block, and before the normal inhdr we put the sense data and the
+	 * inhdr with additional status information before the normal inhdr.
+	 */
+	if (blk_pc_request(vbr->req))
+		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
+
+	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
+
+	if (blk_pc_request(vbr->req)) {
+		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
+			   sizeof(vbr->in_hdr));
+	}
+
+	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
+		   sizeof(vbr->status));
+
+	if (num) {
+		if (rq_data_dir(vbr->req) == WRITE) {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			out += num;
+		} else {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			in += num;
+		}
 	}
 
 	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
@@ -148,8 +175,16 @@ static void do_virtblk_request(struct request_queue *q)
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
-	return scsi_cmd_ioctl(bdev->bd_disk->queue,
-			      bdev->bd_disk, mode, cmd,
+	struct gendisk *disk = bdev->bd_disk;
+	struct virtio_blk *vblk = disk->private_data;
+
+	/*
+	 * Only allow the generic SCSI ioctls if the host can support it.
+	 */
+	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
+		return -ENOIOCTLCMD;
+
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
 			      (void __user *)data);
 }
 
@@ -356,6 +391,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
+	VIRTIO_BLK_F_SCSI,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 94c56d29869..4dbcbc1c348 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -15,6 +15,7 @@
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
+#define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
 
 struct virtio_blk_config
 {
@@ -55,6 +56,13 @@ struct virtio_blk_outhdr
 	__u64 sector;
 };
 
+struct virtio_scsi_inhdr {
+	__u32 errors;
+	__u32 data_len;
+	__u32 sense_len;
+	__u32 residual;
+};
+
 /* And this is the final byte of the write scatter-gather list. */
 #define VIRTIO_BLK_S_OK		0
 #define VIRTIO_BLK_S_IOERR	1
-- 
cgit v1.2.3-70-g09d2


From aa5d2b515b6fca5f8a56eac84f7fa0a68c1ce9b7 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Mon, 18 May 2009 13:51:34 +0800
Subject: VT-d: parse ATSR in DMA Remapping Reporting Structure

Parse the Root Port ATS Capability Reporting Structure in the DMA
Remapping Reporting Structure ACPI table.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c          | 112 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/dmar.h        |   9 ++++
 include/linux/intel-iommu.h |   1 +
 3 files changed, 116 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f23460a5d10..6d7f9619b8a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -267,6 +267,84 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
 	}
 	return ret;
 }
+
+static LIST_HEAD(dmar_atsr_units);
+
+static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
+{
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
+	if (!atsru)
+		return -ENOMEM;
+
+	atsru->hdr = hdr;
+	atsru->include_all = atsr->flags & 0x1;
+
+	list_add(&atsru->list, &dmar_atsr_units);
+
+	return 0;
+}
+
+static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
+{
+	int rc;
+	struct acpi_dmar_atsr *atsr;
+
+	if (atsru->include_all)
+		return 0;
+
+	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+	rc = dmar_parse_dev_scope((void *)(atsr + 1),
+				(void *)atsr + atsr->header.length,
+				&atsru->devices_cnt, &atsru->devices,
+				atsr->segment);
+	if (rc || !atsru->devices_cnt) {
+		list_del(&atsru->list);
+		kfree(atsru);
+	}
+
+	return rc;
+}
+
+int dmar_find_matched_atsr_unit(struct pci_dev *dev)
+{
+	int i;
+	struct pci_bus *bus;
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	list_for_each_entry(atsru, &dmar_atsr_units, list) {
+		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+		if (atsr->segment == pci_domain_nr(dev->bus))
+			goto found;
+	}
+
+	return 0;
+
+found:
+	for (bus = dev->bus; bus; bus = bus->parent) {
+		struct pci_dev *bridge = bus->self;
+
+		if (!bridge || !bridge->is_pcie ||
+		    bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+			return 0;
+
+		if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
+			for (i = 0; i < atsru->devices_cnt; i++)
+				if (atsru->devices[i] == bridge)
+					return 1;
+			break;
+		}
+	}
+
+	if (atsru->include_all)
+		return 1;
+
+	return 0;
+}
 #endif
 
 static void __init
@@ -274,22 +352,28 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct acpi_dmar_reserved_memory *rmrr;
+	struct acpi_dmar_atsr *atsr;
 
 	switch (header->type) {
 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
-		drhd = (struct acpi_dmar_hardware_unit *)header;
+		drhd = container_of(header, struct acpi_dmar_hardware_unit,
+				    header);
 		printk (KERN_INFO PREFIX
-			"DRHD (flags: 0x%08x)base: 0x%016Lx\n",
-			drhd->flags, (unsigned long long)drhd->address);
+			"DRHD base: %#016Lx flags: %#x\n",
+			(unsigned long long)drhd->address, drhd->flags);
 		break;
 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
-		rmrr = (struct acpi_dmar_reserved_memory *)header;
-
+		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
+				    header);
 		printk (KERN_INFO PREFIX
-			"RMRR base: 0x%016Lx end: 0x%016Lx\n",
+			"RMRR base: %#016Lx end: %#016Lx\n",
 			(unsigned long long)rmrr->base_address,
 			(unsigned long long)rmrr->end_address);
 		break;
+	case ACPI_DMAR_TYPE_ATSR:
+		atsr = container_of(header, struct acpi_dmar_atsr, header);
+		printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
+		break;
 	}
 }
 
@@ -361,6 +445,11 @@ parse_dmar_table(void)
 		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 #ifdef CONFIG_DMAR
 			ret = dmar_parse_one_rmrr(entry_header);
+#endif
+			break;
+		case ACPI_DMAR_TYPE_ATSR:
+#ifdef CONFIG_DMAR
+			ret = dmar_parse_one_atsr(entry_header);
 #endif
 			break;
 		default:
@@ -431,11 +520,19 @@ int __init dmar_dev_scope_init(void)
 #ifdef CONFIG_DMAR
 	{
 		struct dmar_rmrr_unit *rmrr, *rmrr_n;
+		struct dmar_atsr_unit *atsr, *atsr_n;
+
 		list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
 			ret = rmrr_parse_dev(rmrr);
 			if (ret)
 				return ret;
 		}
+
+		list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
+			ret = atsr_parse_dev(atsr);
+			if (ret)
+				return ret;
+		}
 	}
 #endif
 
@@ -468,6 +565,9 @@ int __init dmar_table_init(void)
 #ifdef CONFIG_DMAR
 	if (list_empty(&dmar_rmrr_units))
 		printk(KERN_INFO PREFIX "No RMRR found\n");
+
+	if (list_empty(&dmar_atsr_units))
+		printk(KERN_INFO PREFIX "No ATSR found\n");
 #endif
 
 #ifdef CONFIG_INTR_REMAP
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e397dc342cd..7c9a207e5da 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -185,6 +185,15 @@ struct dmar_rmrr_unit {
 
 #define for_each_rmrr_units(rmrr) \
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
+
+struct dmar_atsr_unit {
+	struct list_head list;		/* list of ATSR units */
+	struct acpi_dmar_header *hdr;	/* ACPI header */
+	struct pci_dev **devices;	/* target devices */
+	int devices_cnt;		/* target device count */
+	u8 include_all:1;		/* include all ports */
+};
+
 /* Intel DMAR  initialization functions */
 extern int intel_iommu_init(void);
 #else
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 29e05a034c0..0a1939f200f 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -331,6 +331,7 @@ static inline void __iommu_flush_cache(
 }
 
 extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+extern int dmar_find_matched_atsr_unit(struct pci_dev *dev);
 
 extern int alloc_iommu(struct dmar_drhd_unit *drhd);
 extern void free_iommu(struct intel_iommu *iommu);
-- 
cgit v1.2.3-70-g09d2


From 6ba6c3a4cacfd68bf970e3e04e2ff0d66fa0f695 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Mon, 18 May 2009 13:51:35 +0800
Subject: VT-d: add device IOTLB invalidation support

Support device IOTLB invalidation to flush the translation cached
in the Endpoint.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c          | 77 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/intel-iommu.h | 14 ++++++++-
 2 files changed, 82 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 6d7f9619b8a..7b287cb38b7 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -699,7 +699,8 @@ void free_iommu(struct intel_iommu *iommu)
  */
 static inline void reclaim_free_desc(struct q_inval *qi)
 {
-	while (qi->desc_status[qi->free_tail] == QI_DONE) {
+	while (qi->desc_status[qi->free_tail] == QI_DONE ||
+	       qi->desc_status[qi->free_tail] == QI_ABORT) {
 		qi->desc_status[qi->free_tail] = QI_FREE;
 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
 		qi->free_cnt++;
@@ -709,10 +710,13 @@ static inline void reclaim_free_desc(struct q_inval *qi)
 static int qi_check_fault(struct intel_iommu *iommu, int index)
 {
 	u32 fault;
-	int head;
+	int head, tail;
 	struct q_inval *qi = iommu->qi;
 	int wait_index = (index + 1) % QI_LENGTH;
 
+	if (qi->desc_status[wait_index] == QI_ABORT)
+		return -EAGAIN;
+
 	fault = readl(iommu->reg + DMAR_FSTS_REG);
 
 	/*
@@ -722,7 +726,11 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 	 */
 	if (fault & DMA_FSTS_IQE) {
 		head = readl(iommu->reg + DMAR_IQH_REG);
-		if ((head >> 4) == index) {
+		if ((head >> DMAR_IQ_SHIFT) == index) {
+			printk(KERN_ERR "VT-d detected invalid descriptor: "
+				"low=%llx, high=%llx\n",
+				(unsigned long long)qi->desc[index].low,
+				(unsigned long long)qi->desc[index].high);
 			memcpy(&qi->desc[index], &qi->desc[wait_index],
 					sizeof(struct qi_desc));
 			__iommu_flush_cache(iommu, &qi->desc[index],
@@ -732,6 +740,32 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 		}
 	}
 
+	/*
+	 * If ITE happens, all pending wait_desc commands are aborted.
+	 * No new descriptors are fetched until the ITE is cleared.
+	 */
+	if (fault & DMA_FSTS_ITE) {
+		head = readl(iommu->reg + DMAR_IQH_REG);
+		head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+		head |= 1;
+		tail = readl(iommu->reg + DMAR_IQT_REG);
+		tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+
+		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
+
+		do {
+			if (qi->desc_status[head] == QI_IN_USE)
+				qi->desc_status[head] = QI_ABORT;
+			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
+		} while (head != tail);
+
+		if (qi->desc_status[wait_index] == QI_ABORT)
+			return -EAGAIN;
+	}
+
+	if (fault & DMA_FSTS_ICE)
+		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
+
 	return 0;
 }
 
@@ -741,7 +775,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
  */
 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 {
-	int rc = 0;
+	int rc;
 	struct q_inval *qi = iommu->qi;
 	struct qi_desc *hw, wait_desc;
 	int wait_index, index;
@@ -752,6 +786,9 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 
 	hw = qi->desc;
 
+restart:
+	rc = 0;
+
 	spin_lock_irqsave(&qi->q_lock, flags);
 	while (qi->free_cnt < 3) {
 		spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -782,7 +819,7 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 	 * update the HW tail register indicating the presence of
 	 * new descriptors.
 	 */
-	writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
+	writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
 
 	while (qi->desc_status[wait_index] != QI_DONE) {
 		/*
@@ -794,18 +831,21 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 		 */
 		rc = qi_check_fault(iommu, index);
 		if (rc)
-			goto out;
+			break;
 
 		spin_unlock(&qi->q_lock);
 		cpu_relax();
 		spin_lock(&qi->q_lock);
 	}
-out:
-	qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
+
+	qi->desc_status[index] = QI_DONE;
 
 	reclaim_free_desc(qi);
 	spin_unlock_irqrestore(&qi->q_lock, flags);
 
+	if (rc == -EAGAIN)
+		goto restart;
+
 	return rc;
 }
 
@@ -857,6 +897,27 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 	qi_submit_sync(&desc, iommu);
 }
 
+void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
+			u64 addr, unsigned mask)
+{
+	struct qi_desc desc;
+
+	if (mask) {
+		BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
+		addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
+		desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+	} else
+		desc.high = QI_DEV_IOTLB_ADDR(addr);
+
+	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
+		qdep = 0;
+
+	desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+		   QI_DIOTLB_TYPE;
+
+	qi_submit_sync(&desc, iommu);
+}
+
 /*
  * Disable Queued Invalidation interface.
  */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0a1939f200f..40561b224a1 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -53,6 +53,7 @@
 #define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
 #define DMAR_IQH_REG	0x80	/* Invalidation queue head register */
 #define DMAR_IQT_REG	0x88	/* Invalidation queue tail register */
+#define DMAR_IQ_SHIFT	4	/* Invalidation queue head/tail shift */
 #define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
 #define DMAR_ICS_REG	0x98	/* Invalidation complete status register */
 #define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
@@ -198,6 +199,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_FSTS_PPF ((u32)2)
 #define DMA_FSTS_PFO ((u32)1)
 #define DMA_FSTS_IQE (1 << 4)
+#define DMA_FSTS_ICE (1 << 5)
+#define DMA_FSTS_ITE (1 << 6)
 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
 
 /* FRCD_REG, 32 bits access */
@@ -226,7 +229,8 @@ do {									\
 enum {
 	QI_FREE,
 	QI_IN_USE,
-	QI_DONE
+	QI_DONE,
+	QI_ABORT
 };
 
 #define QI_CC_TYPE		0x1
@@ -255,6 +259,12 @@ enum {
 #define QI_CC_DID(did)		(((u64)did) << 16)
 #define QI_CC_GRAN(gran)	(((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4))
 
+#define QI_DEV_IOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
+#define QI_DEV_IOTLB_QDEP(qdep)	(((qdep) & 0x1f) << 16)
+#define QI_DEV_IOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_SIZE	1
+#define QI_DEV_IOTLB_MAX_INVS	32
+
 struct qi_desc {
 	u64 low, high;
 };
@@ -344,6 +354,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
 			     u8 fm, u64 type);
 extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type);
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
+			       u64 addr, unsigned mask);
 
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-- 
cgit v1.2.3-70-g09d2


From 93a23a7271dfb811b3adb72779054c3a24433112 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Mon, 18 May 2009 13:51:37 +0800
Subject: VT-d: support the device IOTLB

Enable the device IOTLB (i.e. ATS) for both the bare metal and KVM
environments.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 109 ++++++++++++++++++++++++++++++++++++++----
 include/linux/dma_remapping.h |   1 +
 include/linux/intel-iommu.h   |   1 +
 3 files changed, 102 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6d7cb84c63e..c3cdfc90c13 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -252,6 +252,7 @@ struct device_domain_info {
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
 	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
 };
 
@@ -945,6 +946,77 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
 			(unsigned long long)DMA_TLB_IAIG(val));
 }
 
+static struct device_domain_info *iommu_support_dev_iotlb(
+	struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
+{
+	int found = 0;
+	unsigned long flags;
+	struct device_domain_info *info;
+	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
+
+	if (!ecap_dev_iotlb_support(iommu->ecap))
+		return NULL;
+
+	if (!iommu->qi)
+		return NULL;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	list_for_each_entry(info, &domain->devices, link)
+		if (info->bus == bus && info->devfn == devfn) {
+			found = 1;
+			break;
+		}
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+	if (!found || !info->dev)
+		return NULL;
+
+	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
+		return NULL;
+
+	if (!dmar_find_matched_atsr_unit(info->dev))
+		return NULL;
+
+	info->iommu = iommu;
+
+	return info;
+}
+
+static void iommu_enable_dev_iotlb(struct device_domain_info *info)
+{
+	if (!info)
+		return;
+
+	pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
+}
+
+static void iommu_disable_dev_iotlb(struct device_domain_info *info)
+{
+	if (!info->dev || !pci_ats_enabled(info->dev))
+		return;
+
+	pci_disable_ats(info->dev);
+}
+
+static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
+				  u64 addr, unsigned mask)
+{
+	u16 sid, qdep;
+	unsigned long flags;
+	struct device_domain_info *info;
+
+	spin_lock_irqsave(&device_domain_lock, flags);
+	list_for_each_entry(info, &domain->devices, link) {
+		if (!info->dev || !pci_ats_enabled(info->dev))
+			continue;
+
+		sid = info->bus << 8 | info->devfn;
+		qdep = pci_ats_queue_depth(info->dev);
+		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
+	}
+	spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 				  u64 addr, unsigned int pages)
 {
@@ -965,6 +1037,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
 	else
 		iommu->flush.flush_iotlb(iommu, did, addr, mask,
 						DMA_TLB_PSI_FLUSH);
+	if (did)
+		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
 }
 
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1305,6 +1379,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 	unsigned long ndomains;
 	int id;
 	int agaw;
+	struct device_domain_info *info = NULL;
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1372,15 +1447,21 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 
 	context_set_domain_id(context, id);
 
+	if (translation != CONTEXT_TT_PASS_THROUGH) {
+		info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
+		translation = info ? CONTEXT_TT_DEV_IOTLB :
+				     CONTEXT_TT_MULTI_LEVEL;
+	}
 	/*
 	 * In pass through mode, AW must be programmed to indicate the largest
 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
 	 */
-	if (likely(translation == CONTEXT_TT_MULTI_LEVEL)) {
-		context_set_address_width(context, iommu->agaw);
-		context_set_address_root(context, virt_to_phys(pgd));
-	} else
+	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
 		context_set_address_width(context, iommu->msagaw);
+	else {
+		context_set_address_root(context, virt_to_phys(pgd));
+		context_set_address_width(context, iommu->agaw);
+	}
 
 	context_set_translation_type(context, translation);
 	context_set_fault_enable(context);
@@ -1402,6 +1483,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
 	} else {
 		iommu_flush_write_buffer(iommu);
 	}
+	iommu_enable_dev_iotlb(info);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	spin_lock_irqsave(&domain->iommu_lock, flags);
@@ -1552,6 +1634,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 			info->dev->dev.archdata.iommu = NULL;
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 
+		iommu_disable_dev_iotlb(info);
 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
 		iommu_detach_dev(iommu, info->bus, info->devfn);
 		free_devinfo_mem(info);
@@ -2259,10 +2342,16 @@ static void flush_unmaps(void)
 			continue;
 
 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-					 DMA_TLB_GLOBAL_FLUSH, 0);
+					 DMA_TLB_GLOBAL_FLUSH);
 		for (j = 0; j < deferred_flush[i].next; j++) {
-			__free_iova(&deferred_flush[i].domain[j]->iovad,
-					deferred_flush[i].iova[j]);
+			unsigned long mask;
+			struct iova *iova = deferred_flush[i].iova[j];
+
+			mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT;
+			mask = ilog2(mask >> VTD_PAGE_SHIFT);
+			iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
+					iova->pfn_lo << PAGE_SHIFT, mask);
+			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
 		}
 		deferred_flush[i].next = 0;
 	}
@@ -2943,6 +3032,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
 				info->dev->dev.archdata.iommu = NULL;
 			spin_unlock_irqrestore(&device_domain_lock, flags);
 
+			iommu_disable_dev_iotlb(info);
 			iommu_detach_dev(iommu, info->bus, info->devfn);
 			iommu_detach_dependent_devices(iommu, pdev);
 			free_devinfo_mem(info);
@@ -2993,6 +3083,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
 
 		spin_unlock_irqrestore(&device_domain_lock, flags1);
 
+		iommu_disable_dev_iotlb(info);
 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
 		iommu_detach_dev(iommu, info->bus, info->devfn);
 		iommu_detach_dependent_devices(iommu, info->dev);
@@ -3197,11 +3288,11 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		return -EFAULT;
 	}
 
-	ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
+	ret = vm_domain_add_dev_info(dmar_domain, pdev);
 	if (ret)
 		return ret;
 
-	ret = vm_domain_add_dev_info(dmar_domain, pdev);
+	ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 	return ret;
 }
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index e0a03aff63d..5619f852273 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -14,6 +14,7 @@
 #define DMA_PTE_SNP (1 << 11)
 
 #define CONTEXT_TT_MULTI_LEVEL	0
+#define CONTEXT_TT_DEV_IOTLB	1
 #define CONTEXT_TT_PASS_THROUGH 2
 
 struct intel_iommu;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 40561b224a1..482dc91fd53 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -124,6 +124,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define ecap_pass_through(e)	((e >> 6) & 0x1)
 #define ecap_eim_support(e)	((e >> 4) & 0x1)
 #define ecap_ir_support(e)	((e >> 3) & 0x1)
+#define ecap_dev_iotlb_support(e)	(((e) >> 2) & 0x1)
 #define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
 #define ecap_sc_support(e)	((e >> 7) & 0x1) /* Snooping Control */
 
-- 
cgit v1.2.3-70-g09d2


From 6d3ddc81f5762d54ce7d1db70eb757c6c12fabbc Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 16 May 2009 17:47:29 +0100
Subject: ASoC: Split DAPM power checks from sequencing of power changes

DAPM has always applied any changes to the power state of widgets as soon
as it has determined that they are required. Instead of doing this store
all the changes that are required on lists of widgets to power up and
down, then iterate over those lists and apply the changes. This changes
the sequence in which changes are implemented, doing all power downs
before power ups and always using the up/down sequences (previously they
were only used when changes were due to DAC/ADC power events). The error
handling is also changed so that we continue attempting to power widgets
if some changes fail.

The main benefit of this is to allow future changes to do optimisations
over the whole power sequence and to reduce the number of walks of the
widget graph required to check the power status of widgets.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dapm.h |  3 ++
 include/sound/soc.h      |  2 ++
 sound/soc/soc-dapm.c     | 81 +++++++++++++++++++++++++++++++++---------------
 3 files changed, 61 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 533f9f25649..b3f789d0cee 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -385,6 +385,9 @@ struct snd_soc_dapm_widget {
 	/* widget input and outputs */
 	struct list_head sources;
 	struct list_head sinks;
+
+	/* used during DAPM updates */
+	struct list_head power_list;
 };
 
 #endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 6ab80bf7abd..8309ce81cf3 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -372,6 +372,8 @@ struct snd_soc_codec {
 	enum snd_soc_bias_level bias_level;
 	enum snd_soc_bias_level suspend_bias_level;
 	struct delayed_work delayed_work;
+	struct list_head up_list;
+	struct list_head down_list;
 
 	/* codec DAI's */
 	struct snd_soc_dai *dai;
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 7847f80e96d..04ef84106d7 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -658,7 +658,7 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
 static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 			     struct snd_soc_dapm_widget *w)
 {
-	int power, ret;
+	int ret;
 
 	switch (w->id) {
 	case snd_soc_dapm_pre:
@@ -696,18 +696,8 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 		return 0;
 
 	default:
-		break;
+		return dapm_generic_apply_power(w);
 	}
-
-	if (!w->power_check)
-		return 0;
-
-	power = w->power_check(w);
-	if (w->power == power)
-		return 0;
-	w->power = power;
-
-	return dapm_generic_apply_power(w);
 }
 
 /*
@@ -722,27 +712,68 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
 static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 {
 	struct snd_soc_dapm_widget *w;
-	int i, c = 1, *seq = NULL, ret = 0;
-
-	/* do we have a sequenced stream event */
-	if (event == SND_SOC_DAPM_STREAM_START) {
-		c = ARRAY_SIZE(dapm_up_seq);
-		seq = dapm_up_seq;
-	} else if (event == SND_SOC_DAPM_STREAM_STOP) {
-		c = ARRAY_SIZE(dapm_down_seq);
-		seq = dapm_down_seq;
+	int ret = 0;
+	int i, power;
+
+	INIT_LIST_HEAD(&codec->up_list);
+	INIT_LIST_HEAD(&codec->down_list);
+
+	/* Check which widgets we need to power and store them in
+	 * lists indicating if they should be powered up or down.
+	 */
+	list_for_each_entry(w, &codec->dapm_widgets, list) {
+		switch (w->id) {
+		case snd_soc_dapm_pre:
+			list_add_tail(&codec->down_list, &w->power_list);
+			break;
+		case snd_soc_dapm_post:
+			list_add_tail(&codec->up_list, &w->power_list);
+			break;
+
+		default:
+			if (!w->power_check)
+				continue;
+
+			power = w->power_check(w);
+			if (w->power == power)
+				continue;
+
+			if (power)
+				list_add_tail(&w->power_list, &codec->up_list);
+			else
+				list_add_tail(&w->power_list,
+					      &codec->down_list);
+
+			w->power = power;
+			break;
+		}
 	}
 
-	for (i = 0; i < c; i++) {
-		list_for_each_entry(w, &codec->dapm_widgets, list) {
+	/* Power down widgets first; try to avoid amplifying pops. */
+	for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) {
+		list_for_each_entry(w, &codec->down_list, power_list) {
+			/* is widget in stream order */
+			if (w->id != dapm_down_seq[i])
+				continue;
+
+			ret = dapm_power_widget(codec, event, w);
+			if (ret != 0)
+				pr_err("Failed to power down %s: %d\n",
+				       w->name, ret);
+		}
+	}
 
+	/* Now power up. */
+	for (i = 0; i < ARRAY_SIZE(dapm_up_seq); i++) {
+		list_for_each_entry(w, &codec->up_list, power_list) {
 			/* is widget in stream order */
-			if (seq && seq[i] && w->id != seq[i])
+			if (w->id != dapm_up_seq[i])
 				continue;
 
 			ret = dapm_power_widget(codec, event, w);
 			if (ret != 0)
-				return ret;
+				pr_err("Failed to power up %s: %d\n",
+				       w->name, ret);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 452c5eaa0d5162e02ffee742ea17540887bc2904 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 17 May 2009 21:41:23 +0100
Subject: ASoC: Integrate bias management with DAPM power management

Rather than managing the bias level of the system based on if there is
an active audio stream manage it based on there being an active DAPM
widget. This simplifies the code a little, moving the power handling
into one place, and improves audio performance for bypass paths when no
playbacks or captures are active.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc-dapm.h |  2 --
 include/sound/soc.h      |  1 +
 sound/soc/soc-core.c     | 61 +++++++------------------------------
 sound/soc/soc-dapm.c     | 78 +++++++++++++++++++++++++++++++++---------------
 4 files changed, 65 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index b3f789d0cee..ec8a45f9a06 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -279,8 +279,6 @@ int snd_soc_dapm_add_routes(struct snd_soc_codec *codec,
 /* dapm events */
 int snd_soc_dapm_stream_event(struct snd_soc_codec *codec, char *stream,
 	int event);
-int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
-	enum snd_soc_bias_level level);
 
 /* dapm sys fs - used by the core */
 int snd_soc_dapm_sys_add(struct device *dev);
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 8309ce81cf3..2af3213df90 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -339,6 +339,7 @@ struct snd_soc_codec {
 	struct module *owner;
 	struct mutex mutex;
 	struct device *dev;
+	struct snd_soc_device *socdev;
 
 	struct list_head list;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index c0e706645ec..4aa8e2d3506 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -299,7 +299,6 @@ static void close_delayed_work(struct work_struct *work)
 {
 	struct snd_soc_card *card = container_of(work, struct snd_soc_card,
 						 delayed_work.work);
-	struct snd_soc_device *socdev = card->socdev;
 	struct snd_soc_codec *codec = card->codec;
 	struct snd_soc_dai *codec_dai;
 	int i;
@@ -315,27 +314,10 @@ static void close_delayed_work(struct work_struct *work)
 
 		/* are we waiting on this codec DAI stream */
 		if (codec_dai->pop_wait == 1) {
-
-			/* Reduce power if no longer active */
-			if (codec->active == 0) {
-				pr_debug("pop wq D1 %s %s\n", codec->name,
-					 codec_dai->playback.stream_name);
-				snd_soc_dapm_set_bias_level(socdev,
-					SND_SOC_BIAS_PREPARE);
-			}
-
 			codec_dai->pop_wait = 0;
 			snd_soc_dapm_stream_event(codec,
 				codec_dai->playback.stream_name,
 				SND_SOC_DAPM_STREAM_STOP);
-
-			/* Fall into standby if no longer active */
-			if (codec->active == 0) {
-				pr_debug("pop wq D3 %s %s\n", codec->name,
-					 codec_dai->playback.stream_name);
-				snd_soc_dapm_set_bias_level(socdev,
-					SND_SOC_BIAS_STANDBY);
-			}
 		}
 	}
 	mutex_unlock(&pcm_mutex);
@@ -399,10 +381,6 @@ static int soc_codec_close(struct snd_pcm_substream *substream)
 		snd_soc_dapm_stream_event(codec,
 			codec_dai->capture.stream_name,
 			SND_SOC_DAPM_STREAM_STOP);
-
-		if (codec->active == 0 && codec_dai->pop_wait == 0)
-			snd_soc_dapm_set_bias_level(socdev,
-						SND_SOC_BIAS_STANDBY);
 	}
 
 	mutex_unlock(&pcm_mutex);
@@ -467,36 +445,16 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 		cancel_delayed_work(&card->delayed_work);
 	}
 
-	/* do we need to power up codec */
-	if (codec->bias_level != SND_SOC_BIAS_ON) {
-		snd_soc_dapm_set_bias_level(socdev,
-					    SND_SOC_BIAS_PREPARE);
-
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->playback.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-		else
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->capture.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-
-		snd_soc_dapm_set_bias_level(socdev, SND_SOC_BIAS_ON);
-		snd_soc_dai_digital_mute(codec_dai, 0);
-
-	} else {
-		/* codec already powered - power on widgets */
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->playback.stream_name,
-					SND_SOC_DAPM_STREAM_START);
-		else
-			snd_soc_dapm_stream_event(codec,
-					codec_dai->capture.stream_name,
-					SND_SOC_DAPM_STREAM_START);
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		snd_soc_dapm_stream_event(codec,
+					  codec_dai->playback.stream_name,
+					  SND_SOC_DAPM_STREAM_START);
+	else
+		snd_soc_dapm_stream_event(codec,
+					  codec_dai->capture.stream_name,
+					  SND_SOC_DAPM_STREAM_START);
 
-		snd_soc_dai_digital_mute(codec_dai, 0);
-	}
+	snd_soc_dai_digital_mute(codec_dai, 0);
 
 out:
 	mutex_unlock(&pcm_mutex);
@@ -1372,6 +1330,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid)
 		return ret;
 	}
 
+	codec->socdev = socdev;
 	codec->card->dev = socdev->dev;
 	codec->card->private_data = codec;
 	strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver));
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index d130602b307..4ca5e56388a 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -94,6 +94,30 @@ static inline struct snd_soc_dapm_widget *dapm_cnew_widget(
 	return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL);
 }
 
+/**
+ * snd_soc_dapm_set_bias_level - set the bias level for the system
+ * @socdev: audio device
+ * @level: level to configure
+ *
+ * Configure the bias (power) levels for the SoC audio device.
+ *
+ * Returns 0 for success else error.
+ */
+static int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
+				       enum snd_soc_bias_level level)
+{
+	struct snd_soc_card *card = socdev->card;
+	struct snd_soc_codec *codec = socdev->card->codec;
+	int ret = 0;
+
+	if (card->set_bias_level)
+		ret = card->set_bias_level(card, level);
+	if (ret == 0 && codec->set_bias_level)
+		ret = codec->set_bias_level(codec, level);
+
+	return ret;
+}
+
 /* set up initial codec paths */
 static void dapm_set_path_status(struct snd_soc_dapm_widget *w,
 	struct snd_soc_dapm_path *p, int i)
@@ -707,9 +731,11 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event,
  */
 static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 {
+	struct snd_soc_device *socdev = codec->socdev;
 	struct snd_soc_dapm_widget *w;
 	int ret = 0;
 	int i, power;
+	int sys_power = 0;
 
 	INIT_LIST_HEAD(&codec->up_list);
 	INIT_LIST_HEAD(&codec->down_list);
@@ -731,6 +757,9 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 				continue;
 
 			power = w->power_check(w);
+			if (power)
+				sys_power = 1;
+
 			if (w->power == power)
 				continue;
 
@@ -745,6 +774,15 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 		}
 	}
 
+	/* If we're changing to all on or all off then prepare */
+	if ((sys_power && codec->bias_level == SND_SOC_BIAS_STANDBY) ||
+	    (!sys_power && codec->bias_level == SND_SOC_BIAS_ON)) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_PREPARE);
+		if (ret != 0)
+			pr_err("Failed to prepare bias: %d\n", ret);
+	}
+
 	/* Power down widgets first; try to avoid amplifying pops. */
 	for (i = 0; i < ARRAY_SIZE(dapm_down_seq); i++) {
 		list_for_each_entry(w, &codec->down_list, power_list) {
@@ -773,6 +811,22 @@ static int dapm_power_widgets(struct snd_soc_codec *codec, int event)
 		}
 	}
 
+	/* If we just powered the last thing off drop to standby bias */
+	if (codec->bias_level == SND_SOC_BIAS_PREPARE && !sys_power) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_STANDBY);
+		if (ret != 0)
+			pr_err("Failed to apply standby bias: %d\n", ret);
+	}
+
+	/* If we just powered up then move to active bias */
+	if (codec->bias_level == SND_SOC_BIAS_PREPARE && sys_power) {
+		ret = snd_soc_dapm_set_bias_level(socdev,
+						  SND_SOC_BIAS_ON);
+		if (ret != 0)
+			pr_err("Failed to apply active bias: %d\n", ret);
+	}
+
 	return 0;
 }
 
@@ -1720,30 +1774,6 @@ int snd_soc_dapm_stream_event(struct snd_soc_codec *codec,
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_stream_event);
 
-/**
- * snd_soc_dapm_set_bias_level - set the bias level for the system
- * @socdev: audio device
- * @level: level to configure
- *
- * Configure the bias (power) levels for the SoC audio device.
- *
- * Returns 0 for success else error.
- */
-int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev,
-				enum snd_soc_bias_level level)
-{
-	struct snd_soc_card *card = socdev->card;
-	struct snd_soc_codec *codec = socdev->card->codec;
-	int ret = 0;
-
-	if (card->set_bias_level)
-		ret = card->set_bias_level(card, level);
-	if (ret == 0 && codec->set_bias_level)
-		ret = codec->set_bias_level(codec, level);
-
-	return ret;
-}
-
 /**
  * snd_soc_dapm_enable_pin - enable pin.
  * @codec: SoC codec
-- 
cgit v1.2.3-70-g09d2


From 7004bf252c53da18f6b55103e0c92f777f846806 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 18 May 2009 00:34:33 +0000
Subject: net: add tx_packets/tx_bytes/tx_dropped counters in struct
 netdev_queue

offsetof(struct net_device, features)=0x44
offsetof(struct net_device, stats.tx_packets)=0x54
offsetof(struct net_device, stats.tx_bytes)=0x5c
offsetof(struct net_device, stats.tx_dropped)=0x6c

Network drivers that touch dev->stats.tx_packets/stats.tx_bytes in their
tx path can slow down SMP operations, since they dirty a cache line
that should stay shared (dev->features is needed in rx and tx paths)

We could move away stats field in net_device but it wont help that much.
(Two cache lines dirtied in tx path, we can do one only)

Better solution is to add tx_packets/tx_bytes/tx_dropped in struct
netdev_queue because this structure is already touched in tx path and
counters updates will then be free (no increase in size)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/core/dev.c            | 23 ++++++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cd547d04a8c..f8574e76b74 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -474,6 +474,9 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
+	unsigned long		tx_bytes;
+	unsigned long		tx_packets;
+	unsigned long		tx_dropped;
 } ____cacheline_aligned_in_smp;
 
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 14dd725aaab..6d3630d1627 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4943,13 +4943,30 @@ void netdev_run_todo(void)
  *	the internal statistics structure is used.
  */
 const struct net_device_stats *dev_get_stats(struct net_device *dev)
- {
+{
 	const struct net_device_ops *ops = dev->netdev_ops;
 
 	if (ops->ndo_get_stats)
 		return ops->ndo_get_stats(dev);
-	else
-		return &dev->stats;
+	else {
+		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+		struct net_device_stats *stats = &dev->stats;
+		unsigned int i;
+		struct netdev_queue *txq;
+
+		for (i = 0; i < dev->num_tx_queues; i++) {
+			txq = netdev_get_tx_queue(dev, i);
+			tx_bytes   += txq->tx_bytes;
+			tx_packets += txq->tx_packets;
+			tx_dropped += txq->tx_dropped;
+		}
+		if (tx_bytes || tx_packets || tx_dropped) {
+			stats->tx_bytes   = tx_bytes;
+			stats->tx_packets = tx_packets;
+			stats->tx_dropped = tx_dropped;
+		}
+		return stats;
+	}
 }
 EXPORT_SYMBOL(dev_get_stats);
 
-- 
cgit v1.2.3-70-g09d2


From 75834fc3b6fcff00327f5d2a18760c1e8e0179c5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 18 May 2009 10:26:10 -0400
Subject: SELinux: move SELINUX_MAGIC into magic.h

The selinuxfs superblock magic is used inside the IMA code, but is being
defined in two places and could someday get out of sync.  This patch moves the
declaration into magic.h so it is only done once.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/magic.h               | 1 +
 security/integrity/ima/ima_policy.c | 8 +++-----
 security/selinux/include/security.h | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/magic.h b/include/linux/magic.h
index 5b4e28bcb78..927138cf305 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -9,6 +9,7 @@
 #define DEBUGFS_MAGIC          0x64626720
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
+#define SELINUX_MAGIC		0xf97cff8c
 #define TMPFS_MAGIC		0x01021994
 #define SQUASHFS_MAGIC		0x73717368
 #define EFS_SUPER_MAGIC		0x414A53
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index dec6dcb1c8d..31d677f7c65 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -49,14 +49,12 @@ struct ima_measure_rule_entry {
  * written in terms of .action, .func, .mask, .fsmagic, and .uid
  */
 static struct ima_measure_rule_entry default_rules[] = {
-	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,
-	 .flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
-	{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,
-	 .flags = IMA_FSMAGIC},
-	{.action = DONT_MEASURE,.fsmagic = 0xF97CFF8C,.flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE,.fsmagic = SELINUX_MAGIC,.flags = IMA_FSMAGIC},
 	{.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC,
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC,
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index a7be3f01fb0..ca835795a8b 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -8,14 +8,13 @@
 #ifndef _SELINUX_SECURITY_H_
 #define _SELINUX_SECURITY_H_
 
+#include <linux/magic.h>
 #include "flask.h"
 
 #define SECSID_NULL			0x00000000 /* unspecified SID */
 #define SECSID_WILD			0xffffffff /* wildcard SID */
 #define SECCLASS_NULL			0x0000 /* no class */
 
-#define SELINUX_MAGIC 0xf97cff8c
-
 /* Identify specific policy version changes */
 #define POLICYDB_VERSION_BASE		15
 #define POLICYDB_VERSION_BOOL		16
-- 
cgit v1.2.3-70-g09d2


From 39549eef3587f1c1e8c65c88a2400d10fd30ea17 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Fri, 15 May 2009 23:39:29 +0000
Subject: can: CAN Network device driver and Netlink interface

The CAN network device driver interface provides a generic interface to
setup, configure and monitor CAN network devices. It exports a set of
common data structures and functions, which all real CAN network device
drivers should use. Please have a look to the SJA1000 or MSCAN driver
to understand how to use them. The name of the module is can-dev.ko.

Furthermore, it adds a Netlink interface allowing to configure the CAN
device using the program "ip" from the iproute2 utility suite.

For further information please check "Documentation/networking/can.txt"

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig     |  23 ++
 drivers/net/can/Makefile    |   5 +
 drivers/net/can/dev.c       | 657 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/can/Kbuild    |   1 +
 include/linux/can/dev.h     |  70 +++++
 include/linux/can/netlink.h | 113 ++++++++
 6 files changed, 869 insertions(+)
 create mode 100644 drivers/net/can/dev.c
 create mode 100644 include/linux/can/dev.h
 create mode 100644 include/linux/can/netlink.h

(limited to 'include')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 57def0d5737..77adb8ef6e4 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -12,6 +12,29 @@ config CAN_VCAN
 	  This driver can also be built as a module.  If so, the module
 	  will be called vcan.
 
+config CAN_DEV
+	tristate "Platform CAN drivers with Netlink support"
+	depends on CAN
+	default Y
+	---help---
+	  Enables the common framework for platform CAN drivers with Netlink
+	  support. This is the standard library for CAN drivers.
+	  If unsure, say Y.
+
+config CAN_CALC_BITTIMING
+	bool "CAN bit-timing calculation"
+	depends on CAN_DEV
+	default Y
+	---help---
+	  If enabled, CAN bit-timing parameters will be calculated for the
+	  bit-rate specified via Netlink argument "bitrate" when the device
+	  get started. This works fine for the most common CAN controllers
+	  with standard bit-rates but may fail for exotic bit-rates or CAN
+	  source clock frequencies. Disabling saves some space, but then the
+	  bit-timing parameters must be specified directly using the Netlink
+	  arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" and "sjw".
+	  If unsure, say Y.
+
 config CAN_DEBUG_DEVICES
 	bool "CAN devices debugging messages"
 	depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index c4bead705cd..6c865475cd8 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -3,3 +3,8 @@
 #
 
 obj-$(CONFIG_CAN_VCAN)		+= vcan.o
+
+obj-$(CONFIG_CAN_DEV)		+= can-dev.o
+can-dev-y			:= dev.o
+
+ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
new file mode 100644
index 00000000000..52b0e7d8901
--- /dev/null
+++ b/drivers/net/can/dev.c
@@ -0,0 +1,657 @@
+/*
+ * Copyright (C) 2005 Marc Kleine-Budde, Pengutronix
+ * Copyright (C) 2006 Andrey Volkov, Varma Electronics
+ * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/netlink.h>
+#include <net/rtnetlink.h>
+
+#define MOD_DESC "CAN device driver interface"
+
+MODULE_DESCRIPTION(MOD_DESC);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
+
+#ifdef CONFIG_CAN_CALC_BITTIMING
+#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */
+
+/*
+ * Bit-timing calculation derived from:
+ *
+ * Code based on LinCAN sources and H8S2638 project
+ * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz
+ * Copyright 2005      Stanislav Marek
+ * email: pisa@cmp.felk.cvut.cz
+ *
+ * Calculates proper bit-timing parameters for a specified bit-rate
+ * and sample-point, which can then be used to set the bit-timing
+ * registers of the CAN controller. You can find more information
+ * in the header file linux/can/netlink.h.
+ */
+static int can_update_spt(const struct can_bittiming_const *btc,
+			  int sampl_pt, int tseg, int *tseg1, int *tseg2)
+{
+	*tseg2 = tseg + 1 - (sampl_pt * (tseg + 1)) / 1000;
+	if (*tseg2 < btc->tseg2_min)
+		*tseg2 = btc->tseg2_min;
+	if (*tseg2 > btc->tseg2_max)
+		*tseg2 = btc->tseg2_max;
+	*tseg1 = tseg - *tseg2;
+	if (*tseg1 > btc->tseg1_max) {
+		*tseg1 = btc->tseg1_max;
+		*tseg2 = tseg - *tseg1;
+	}
+	return 1000 * (tseg + 1 - *tseg2) / (tseg + 1);
+}
+
+static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	const struct can_bittiming_const *btc = priv->bittiming_const;
+	long rate, best_rate = 0;
+	long best_error = 1000000000, error = 0;
+	int best_tseg = 0, best_brp = 0, brp = 0;
+	int tsegall, tseg = 0, tseg1 = 0, tseg2 = 0;
+	int spt_error = 1000, spt = 0, sampl_pt;
+	u64 v64;
+
+	if (!priv->bittiming_const)
+		return -ENOTSUPP;
+
+	/* Use CIA recommended sample points */
+	if (bt->sample_point) {
+		sampl_pt = bt->sample_point;
+	} else {
+		if (bt->bitrate > 800000)
+			sampl_pt = 750;
+		else if (bt->bitrate > 500000)
+			sampl_pt = 800;
+		else
+			sampl_pt = 875;
+	}
+
+	/* tseg even = round down, odd = round up */
+	for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1;
+	     tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) {
+		tsegall = 1 + tseg / 2;
+		/* Compute all possible tseg choices (tseg=tseg1+tseg2) */
+		brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2;
+		/* chose brp step which is possible in system */
+		brp = (brp / btc->brp_inc) * btc->brp_inc;
+		if ((brp < btc->brp_min) || (brp > btc->brp_max))
+			continue;
+		rate = priv->clock.freq / (brp * tsegall);
+		error = bt->bitrate - rate;
+		/* tseg brp biterror */
+		if (error < 0)
+			error = -error;
+		if (error > best_error)
+			continue;
+		best_error = error;
+		if (error == 0) {
+			spt = can_update_spt(btc, sampl_pt, tseg / 2,
+					     &tseg1, &tseg2);
+			error = sampl_pt - spt;
+			if (error < 0)
+				error = -error;
+			if (error > spt_error)
+				continue;
+			spt_error = error;
+		}
+		best_tseg = tseg / 2;
+		best_brp = brp;
+		best_rate = rate;
+		if (error == 0)
+			break;
+	}
+
+	if (best_error) {
+		/* Error in one-tenth of a percent */
+		error = (best_error * 1000) / bt->bitrate;
+		if (error > CAN_CALC_MAX_ERROR) {
+			dev_err(dev->dev.parent,
+				"bitrate error %ld.%ld%% too high\n",
+				error / 10, error % 10);
+			return -EDOM;
+		} else {
+			dev_warn(dev->dev.parent, "bitrate error %ld.%ld%%\n",
+				 error / 10, error % 10);
+		}
+	}
+
+	/* real sample point */
+	bt->sample_point = can_update_spt(btc, sampl_pt, best_tseg,
+					  &tseg1, &tseg2);
+
+	v64 = (u64)best_brp * 1000000000UL;
+	do_div(v64, priv->clock.freq);
+	bt->tq = (u32)v64;
+	bt->prop_seg = tseg1 / 2;
+	bt->phase_seg1 = tseg1 - bt->prop_seg;
+	bt->phase_seg2 = tseg2;
+	bt->sjw = 1;
+	bt->brp = best_brp;
+	/* real bit-rate */
+	bt->bitrate = priv->clock.freq / (bt->brp * (tseg1 + tseg2 + 1));
+
+	return 0;
+}
+#else /* !CONFIG_CAN_CALC_BITTIMING */
+static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt)
+{
+	dev_err(dev->dev.parent, "bit-timing calculation not available\n");
+	return -EINVAL;
+}
+#endif /* CONFIG_CAN_CALC_BITTIMING */
+
+/*
+ * Checks the validity of the specified bit-timing parameters prop_seg,
+ * phase_seg1, phase_seg2 and sjw and tries to determine the bitrate
+ * prescaler value brp. You can find more information in the header
+ * file linux/can/netlink.h.
+ */
+static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	const struct can_bittiming_const *btc = priv->bittiming_const;
+	int tseg1, alltseg;
+	u64 brp64;
+
+	if (!priv->bittiming_const)
+		return -ENOTSUPP;
+
+	tseg1 = bt->prop_seg + bt->phase_seg1;
+	if (!bt->sjw)
+		bt->sjw = 1;
+	if (bt->sjw > btc->sjw_max ||
+	    tseg1 < btc->tseg1_min || tseg1 > btc->tseg1_max ||
+	    bt->phase_seg2 < btc->tseg2_min || bt->phase_seg2 > btc->tseg2_max)
+		return -ERANGE;
+
+	brp64 = (u64)priv->clock.freq * (u64)bt->tq;
+	if (btc->brp_inc > 1)
+		do_div(brp64, btc->brp_inc);
+	brp64 += 500000000UL - 1;
+	do_div(brp64, 1000000000UL); /* the practicable BRP */
+	if (btc->brp_inc > 1)
+		brp64 *= btc->brp_inc;
+	bt->brp = (u32)brp64;
+
+	if (bt->brp < btc->brp_min || bt->brp > btc->brp_max)
+		return -EINVAL;
+
+	alltseg = bt->prop_seg + bt->phase_seg1 + bt->phase_seg2 + 1;
+	bt->bitrate = priv->clock.freq / (bt->brp * alltseg);
+	bt->sample_point = ((tseg1 + 1) * 1000) / alltseg;
+
+	return 0;
+}
+
+int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* Check if the CAN device has bit-timing parameters */
+	if (priv->bittiming_const) {
+
+		/* Non-expert mode? Check if the bitrate has been pre-defined */
+		if (!bt->tq)
+			/* Determine bit-timing parameters */
+			err = can_calc_bittiming(dev, bt);
+		else
+			/* Check bit-timing params and calculate proper brp */
+			err = can_fixup_bittiming(dev, bt);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/*
+ * Local echo of CAN messages
+ *
+ * CAN network devices *should* support a local echo functionality
+ * (see Documentation/networking/can.txt). To test the handling of CAN
+ * interfaces that do not support the local echo both driver types are
+ * implemented. In the case that the driver does not support the echo
+ * the IFF_ECHO remains clear in dev->flags. This causes the PF_CAN core
+ * to perform the echo as a fallback solution.
+ */
+static void can_flush_echo_skb(struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	int i;
+
+	for (i = 0; i < CAN_ECHO_SKB_MAX; i++) {
+		if (priv->echo_skb[i]) {
+			kfree_skb(priv->echo_skb[i]);
+			priv->echo_skb[i] = NULL;
+			stats->tx_dropped++;
+			stats->tx_aborted_errors++;
+		}
+	}
+}
+
+/*
+ * Put the skb on the stack to be looped backed locally lateron
+ *
+ * The function is typically called in the start_xmit function
+ * of the device driver. The driver must protect access to
+ * priv->echo_skb, if necessary.
+ */
+void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	/* check flag whether this packet has to be looped back */
+	if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK) {
+		kfree_skb(skb);
+		return;
+	}
+
+	if (!priv->echo_skb[idx]) {
+		struct sock *srcsk = skb->sk;
+
+		if (atomic_read(&skb->users) != 1) {
+			struct sk_buff *old_skb = skb;
+
+			skb = skb_clone(old_skb, GFP_ATOMIC);
+			kfree_skb(old_skb);
+			if (!skb)
+				return;
+		} else
+			skb_orphan(skb);
+
+		skb->sk = srcsk;
+
+		/* make settings for echo to reduce code in irq context */
+		skb->protocol = htons(ETH_P_CAN);
+		skb->pkt_type = PACKET_BROADCAST;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->dev = dev;
+
+		/* save this skb for tx interrupt echo handling */
+		priv->echo_skb[idx] = skb;
+	} else {
+		/* locking problem with netif_stop_queue() ?? */
+		dev_err(dev->dev.parent, "%s: BUG! echo_skb is occupied!\n",
+			__func__);
+		kfree_skb(skb);
+	}
+}
+EXPORT_SYMBOL_GPL(can_put_echo_skb);
+
+/*
+ * Get the skb from the stack and loop it back locally
+ *
+ * The function is typically called when the TX done interrupt
+ * is handled in the device driver. The driver must protect
+ * access to priv->echo_skb, if necessary.
+ */
+void can_get_echo_skb(struct net_device *dev, int idx)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	if ((dev->flags & IFF_ECHO) && priv->echo_skb[idx]) {
+		netif_rx(priv->echo_skb[idx]);
+		priv->echo_skb[idx] = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(can_get_echo_skb);
+
+/*
+ * CAN device restart for bus-off recovery
+ */
+void can_restart(unsigned long data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	struct can_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct sk_buff *skb;
+	struct can_frame *cf;
+	int err;
+
+	BUG_ON(netif_carrier_ok(dev));
+
+	/*
+	 * No synchronization needed because the device is bus-off and
+	 * no messages can come in or go out.
+	 */
+	can_flush_echo_skb(dev);
+
+	/* send restart message upstream */
+	skb = dev_alloc_skb(sizeof(struct can_frame));
+	if (skb == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_CAN);
+	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	memset(cf, 0, sizeof(struct can_frame));
+	cf->can_id = CAN_ERR_FLAG | CAN_ERR_RESTARTED;
+	cf->can_dlc = CAN_ERR_DLC;
+
+	netif_rx(skb);
+
+	dev->last_rx = jiffies;
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+
+	dev_dbg(dev->dev.parent, "restarted\n");
+	priv->can_stats.restarts++;
+
+	/* Now restart the device */
+	err = priv->do_set_mode(dev, CAN_MODE_START);
+
+out:
+	netif_carrier_on(dev);
+	if (err)
+		dev_err(dev->dev.parent, "Error %d during restart", err);
+}
+
+int can_restart_now(struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	/*
+	 * A manual restart is only permitted if automatic restart is
+	 * disabled and the device is in the bus-off state
+	 */
+	if (priv->restart_ms)
+		return -EINVAL;
+	if (priv->state != CAN_STATE_BUS_OFF)
+		return -EBUSY;
+
+	/* Runs as soon as possible in the timer context */
+	mod_timer(&priv->restart_timer, jiffies);
+
+	return 0;
+}
+
+/*
+ * CAN bus-off
+ *
+ * This functions should be called when the device goes bus-off to
+ * tell the netif layer that no more packets can be sent or received.
+ * If enabled, a timer is started to trigger bus-off recovery.
+ */
+void can_bus_off(struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	dev_dbg(dev->dev.parent, "bus-off\n");
+
+	netif_carrier_off(dev);
+	priv->can_stats.bus_off++;
+
+	if (priv->restart_ms)
+		mod_timer(&priv->restart_timer,
+			  jiffies + (priv->restart_ms * HZ) / 1000);
+}
+EXPORT_SYMBOL_GPL(can_bus_off);
+
+static void can_setup(struct net_device *dev)
+{
+	dev->type = ARPHRD_CAN;
+	dev->mtu = sizeof(struct can_frame);
+	dev->hard_header_len = 0;
+	dev->addr_len = 0;
+	dev->tx_queue_len = 10;
+
+	/* New-style flags. */
+	dev->flags = IFF_NOARP;
+	dev->features = NETIF_F_NO_CSUM;
+}
+
+/*
+ * Allocate and setup space for the CAN network device
+ */
+struct net_device *alloc_candev(int sizeof_priv)
+{
+	struct net_device *dev;
+	struct can_priv *priv;
+
+	dev = alloc_netdev(sizeof_priv, "can%d", can_setup);
+	if (!dev)
+		return NULL;
+
+	priv = netdev_priv(dev);
+
+	priv->state = CAN_STATE_STOPPED;
+
+	init_timer(&priv->restart_timer);
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(alloc_candev);
+
+/*
+ * Free space of the CAN network device
+ */
+void free_candev(struct net_device *dev)
+{
+	free_netdev(dev);
+}
+EXPORT_SYMBOL_GPL(free_candev);
+
+/*
+ * Common open function when the device gets opened.
+ *
+ * This function should be called in the open function of the device
+ * driver.
+ */
+int open_candev(struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	if (!priv->bittiming.tq && !priv->bittiming.bitrate) {
+		dev_err(dev->dev.parent, "bit-timing not yet defined\n");
+		return -EINVAL;
+	}
+
+	setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev);
+
+	return 0;
+}
+EXPORT_SYMBOL(open_candev);
+
+/*
+ * Common close function for cleanup before the device gets closed.
+ *
+ * This function should be called in the close function of the device
+ * driver.
+ */
+void close_candev(struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	if (del_timer_sync(&priv->restart_timer))
+		dev_put(dev);
+	can_flush_echo_skb(dev);
+}
+EXPORT_SYMBOL_GPL(close_candev);
+
+/*
+ * CAN netlink interface
+ */
+static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
+	[IFLA_CAN_STATE]	= { .type = NLA_U32 },
+	[IFLA_CAN_CTRLMODE]	= { .len = sizeof(struct can_ctrlmode) },
+	[IFLA_CAN_RESTART_MS]	= { .type = NLA_U32 },
+	[IFLA_CAN_RESTART]	= { .type = NLA_U32 },
+	[IFLA_CAN_BITTIMING]	= { .len = sizeof(struct can_bittiming) },
+	[IFLA_CAN_BITTIMING_CONST]
+				= { .len = sizeof(struct can_bittiming_const) },
+	[IFLA_CAN_CLOCK]	= { .len = sizeof(struct can_clock) },
+};
+
+static int can_changelink(struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[])
+{
+	struct can_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* We need synchronization with dev->stop() */
+	ASSERT_RTNL();
+
+	if (data[IFLA_CAN_CTRLMODE]) {
+		struct can_ctrlmode *cm;
+
+		/* Do not allow changing controller mode while running */
+		if (dev->flags & IFF_UP)
+			return -EBUSY;
+		cm = nla_data(data[IFLA_CAN_CTRLMODE]);
+		priv->ctrlmode &= ~cm->mask;
+		priv->ctrlmode |= cm->flags;
+	}
+
+	if (data[IFLA_CAN_BITTIMING]) {
+		struct can_bittiming bt;
+
+		/* Do not allow changing bittiming while running */
+		if (dev->flags & IFF_UP)
+			return -EBUSY;
+		memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt));
+		if ((!bt.bitrate && !bt.tq) || (bt.bitrate && bt.tq))
+			return -EINVAL;
+		err = can_get_bittiming(dev, &bt);
+		if (err)
+			return err;
+		memcpy(&priv->bittiming, &bt, sizeof(bt));
+
+		if (priv->do_set_bittiming) {
+			/* Finally, set the bit-timing registers */
+			err = priv->do_set_bittiming(dev);
+			if (err)
+				return err;
+		}
+	}
+
+	if (data[IFLA_CAN_RESTART_MS]) {
+		/* Do not allow changing restart delay while running */
+		if (dev->flags & IFF_UP)
+			return -EBUSY;
+		priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]);
+	}
+
+	if (data[IFLA_CAN_RESTART]) {
+		/* Do not allow a restart while not running */
+		if (!(dev->flags & IFF_UP))
+			return -EINVAL;
+		err = can_restart_now(dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	struct can_ctrlmode cm = {.flags = priv->ctrlmode};
+	enum can_state state = priv->state;
+
+	if (priv->do_get_state)
+		priv->do_get_state(dev, &state);
+	NLA_PUT_U32(skb, IFLA_CAN_STATE, state);
+	NLA_PUT(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm);
+	NLA_PUT_U32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms);
+	NLA_PUT(skb, IFLA_CAN_BITTIMING,
+		sizeof(priv->bittiming), &priv->bittiming);
+	NLA_PUT(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock);
+	if (priv->bittiming_const)
+		NLA_PUT(skb, IFLA_CAN_BITTIMING_CONST,
+			sizeof(*priv->bittiming_const), priv->bittiming_const);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int can_fill_xstats(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	NLA_PUT(skb, IFLA_INFO_XSTATS,
+		sizeof(priv->can_stats), &priv->can_stats);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static struct rtnl_link_ops can_link_ops __read_mostly = {
+	.kind		= "can",
+	.maxtype	= IFLA_CAN_MAX,
+	.policy		= can_policy,
+	.setup		= can_setup,
+	.changelink	= can_changelink,
+	.fill_info	= can_fill_info,
+	.fill_xstats	= can_fill_xstats,
+};
+
+/*
+ * Register the CAN network device
+ */
+int register_candev(struct net_device *dev)
+{
+	dev->rtnl_link_ops = &can_link_ops;
+	return register_netdev(dev);
+}
+EXPORT_SYMBOL_GPL(register_candev);
+
+/*
+ * Unregister the CAN network device
+ */
+void unregister_candev(struct net_device *dev)
+{
+	unregister_netdev(dev);
+}
+EXPORT_SYMBOL_GPL(unregister_candev);
+
+static __init int can_dev_init(void)
+{
+	int err;
+
+	err = rtnl_link_register(&can_link_ops);
+	if (!err)
+		printk(KERN_INFO MOD_DESC "\n");
+
+	return err;
+}
+module_init(can_dev_init);
+
+static __exit void can_dev_exit(void)
+{
+	rtnl_link_unregister(&can_link_ops);
+}
+module_exit(can_dev_exit);
+
+MODULE_ALIAS_RTNL_LINK("can");
diff --git a/include/linux/can/Kbuild b/include/linux/can/Kbuild
index eff898aac02..8cb05aae661 100644
--- a/include/linux/can/Kbuild
+++ b/include/linux/can/Kbuild
@@ -1,3 +1,4 @@
 header-y += raw.h
 header-y += bcm.h
 header-y += error.h
+header-y += netlink.h
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
new file mode 100644
index 00000000000..4a37a56f6cd
--- /dev/null
+++ b/include/linux/can/dev.h
@@ -0,0 +1,70 @@
+/*
+ * linux/can/dev.h
+ *
+ * Definitions for the CAN network device driver interface
+ *
+ * Copyright (C) 2006 Andrey Volkov <avolkov@varma-el.com>
+ *               Varma Electronics Oy
+ *
+ * Copyright (C) 2008 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ */
+
+#ifndef CAN_DEV_H
+#define CAN_DEV_H
+
+#include <linux/can/netlink.h>
+#include <linux/can/error.h>
+
+/*
+ * CAN mode
+ */
+enum can_mode {
+	CAN_MODE_STOP = 0,
+	CAN_MODE_START,
+	CAN_MODE_SLEEP
+};
+
+/*
+ * CAN common private data
+ */
+#define CAN_ECHO_SKB_MAX  4
+
+struct can_priv {
+	struct can_device_stats can_stats;
+
+	struct can_bittiming bittiming;
+	struct can_bittiming_const *bittiming_const;
+	struct can_clock clock;
+
+	enum can_state state;
+	u32 ctrlmode;
+
+	int restart_ms;
+	struct timer_list restart_timer;
+
+	struct sk_buff *echo_skb[CAN_ECHO_SKB_MAX];
+
+	int (*do_set_bittiming)(struct net_device *dev);
+	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
+	int (*do_get_state)(const struct net_device *dev,
+			    enum can_state *state);
+};
+
+struct net_device *alloc_candev(int sizeof_priv);
+void free_candev(struct net_device *dev);
+
+int open_candev(struct net_device *dev);
+void close_candev(struct net_device *dev);
+
+int register_candev(struct net_device *dev);
+void unregister_candev(struct net_device *dev);
+
+int can_restart_now(struct net_device *dev);
+void can_bus_off(struct net_device *dev);
+
+void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx);
+void can_get_echo_skb(struct net_device *dev, int idx);
+
+#endif /* CAN_DEV_H */
diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h
new file mode 100644
index 00000000000..9ecbb7871c0
--- /dev/null
+++ b/include/linux/can/netlink.h
@@ -0,0 +1,113 @@
+/*
+ * linux/can/netlink.h
+ *
+ * Definitions for the CAN netlink interface
+ *
+ * Copyright (c) 2009 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#ifndef CAN_NETLINK_H
+#define CAN_NETLINK_H
+
+#include <linux/types.h>
+
+/*
+ * CAN bit-timing parameters
+ *
+ * For futher information, please read chapter "8 BIT TIMING
+ * REQUIREMENTS" of the "Bosch CAN Specification version 2.0"
+ * at http://www.semiconductors.bosch.de/pdf/can2spec.pdf.
+ */
+struct can_bittiming {
+	__u32 bitrate;		/* Bit-rate in bits/second */
+	__u32 sample_point;	/* Sample point in one-tenth of a percent */
+	__u32 tq;		/* Time quanta (TQ) in nanoseconds */
+	__u32 prop_seg;		/* Propagation segment in TQs */
+	__u32 phase_seg1;	/* Phase buffer segment 1 in TQs */
+	__u32 phase_seg2;	/* Phase buffer segment 2 in TQs */
+	__u32 sjw;		/* Synchronisation jump width in TQs */
+	__u32 brp;		/* Bit-rate prescaler */
+};
+
+/*
+ * CAN harware-dependent bit-timing constant
+ *
+ * Used for calculating and checking bit-timing parameters
+ */
+struct can_bittiming_const {
+	char name[16];		/* Name of the CAN controller hardware */
+	__u32 tseg1_min;	/* Time segement 1 = prop_seg + phase_seg1 */
+	__u32 tseg1_max;
+	__u32 tseg2_min;	/* Time segement 2 = phase_seg2 */
+	__u32 tseg2_max;
+	__u32 sjw_max;		/* Synchronisation jump width */
+	__u32 brp_min;		/* Bit-rate prescaler */
+	__u32 brp_max;
+	__u32 brp_inc;
+};
+
+/*
+ * CAN clock parameters
+ */
+struct can_clock {
+	__u32 freq;		/* CAN system clock frequency in Hz */
+};
+
+/*
+ * CAN operational and error states
+ */
+enum can_state {
+	CAN_STATE_ERROR_ACTIVE = 0,	/* RX/TX error count < 96 */
+	CAN_STATE_ERROR_WARNING,	/* RX/TX error count < 128 */
+	CAN_STATE_ERROR_PASSIVE,	/* RX/TX error count < 256 */
+	CAN_STATE_BUS_OFF,		/* RX/TX error count >= 256 */
+	CAN_STATE_STOPPED,		/* Device is stopped */
+	CAN_STATE_SLEEPING,		/* Device is sleeping */
+	CAN_STATE_MAX
+};
+
+/*
+ * CAN controller mode
+ */
+struct can_ctrlmode {
+	__u32 mask;
+	__u32 flags;
+};
+
+#define CAN_CTRLMODE_LOOPBACK	0x1	/* Loopback mode */
+#define CAN_CTRLMODE_LISTENONLY	0x2 	/* Listen-only mode */
+#define CAN_CTRLMODE_3_SAMPLES	0x4	/* Triple sampling mode */
+
+/*
+ * CAN device statistics
+ */
+struct can_device_stats {
+	__u32 bus_error;	/* Bus errors */
+	__u32 error_warning;	/* Changes to error warning state */
+	__u32 error_passive;	/* Changes to error passive state */
+	__u32 bus_off;		/* Changes to bus off state */
+	__u32 arbitration_lost; /* Arbitration lost errors */
+	__u32 restarts;		/* CAN controller re-starts */
+};
+
+/*
+ * CAN netlink interface
+ */
+enum {
+	IFLA_CAN_UNSPEC,
+	IFLA_CAN_BITTIMING,
+	IFLA_CAN_BITTIMING_CONST,
+	IFLA_CAN_CLOCK,
+	IFLA_CAN_STATE,
+	IFLA_CAN_CTRLMODE,
+	IFLA_CAN_RESTART_MS,
+	IFLA_CAN_RESTART,
+	__IFLA_CAN_MAX
+};
+
+#define IFLA_CAN_MAX	(__IFLA_CAN_MAX - 1)
+
+#endif /* CAN_NETLINK_H */
-- 
cgit v1.2.3-70-g09d2


From f534e52f091a7b9f51fb6726710bdf731b663e94 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Fri, 15 May 2009 23:39:31 +0000
Subject: can: SJA1000 generic platform bus driver

This driver adds support for the SJA1000 chips connected to the
"platform bus", which can be found on various embedded systems.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig                    |  10 ++
 drivers/net/can/sja1000/Makefile           |   1 +
 drivers/net/can/sja1000/sja1000_platform.c | 164 +++++++++++++++++++++++++++++
 include/linux/can/platform/sja1000.h       |  32 ++++++
 4 files changed, 207 insertions(+)
 create mode 100644 drivers/net/can/sja1000/sja1000_platform.c
 create mode 100644 include/linux/can/platform/sja1000.h

(limited to 'include')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index c7e5ce339ea..8fe79741d3c 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -41,6 +41,16 @@ config CAN_SJA1000
 	---help---
 	  Driver for the SJA1000 CAN controllers from Philips or NXP
 
+config CAN_SJA1000_PLATFORM
+	depends on CAN_SJA1000
+	tristate "Generic Platform Bus based SJA1000 driver"
+	---help---
+	  This driver adds support for the SJA1000 chips connected to
+	  the "platform bus" (Linux abstraction for directly to the
+	  processor attached devices).  Which can be found on various
+	  boards from Phytec (http://www.phytec.de) like the PCM027,
+	  PCM038.
+
 config CAN_DEBUG_DEVICES
 	bool "CAN devices debugging messages"
 	depends on CAN
diff --git a/drivers/net/can/sja1000/Makefile b/drivers/net/can/sja1000/Makefile
index 893ab2cfe24..5115cc90ab5 100644
--- a/drivers/net/can/sja1000/Makefile
+++ b/drivers/net/can/sja1000/Makefile
@@ -3,5 +3,6 @@
 #
 
 obj-$(CONFIG_CAN_SJA1000) += sja1000.o
+obj-$(CONFIG_CAN_SJA1000_PLATFORM) += sja1000_platform.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
new file mode 100644
index 00000000000..8017229d6fd
--- /dev/null
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2005 Sascha Hauer, Pengutronix
+ * Copyright (C) 2007 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/platform/sja1000.h>
+#include <linux/io.h>
+
+#include "sja1000.h"
+
+#define DRV_NAME "sja1000_platform"
+
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("Socket-CAN driver for SJA1000 on the platform bus");
+MODULE_LICENSE("GPL v2");
+
+static u8 sp_read_reg(const struct net_device *dev, int reg)
+{
+	return ioread8((void __iomem *)(dev->base_addr + reg));
+}
+
+static void sp_write_reg(const struct net_device *dev, int reg, u8 val)
+{
+	iowrite8(val, (void __iomem *)(dev->base_addr + reg));
+}
+
+static int sp_probe(struct platform_device *pdev)
+{
+	int err;
+	void __iomem *addr;
+	struct net_device *dev;
+	struct sja1000_priv *priv;
+	struct resource *res_mem, *res_irq;
+	struct sja1000_platform_data *pdata;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data provided!\n");
+		err = -ENODEV;
+		goto exit;
+	}
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res_mem || !res_irq) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	if (!request_mem_region(res_mem->start, resource_size(res_mem),
+				DRV_NAME)) {
+		err = -EBUSY;
+		goto exit;
+	}
+
+	addr = ioremap_nocache(res_mem->start, resource_size(res_mem));
+	if (!addr) {
+		err = -ENOMEM;
+		goto exit_release;
+	}
+
+	dev = alloc_sja1000dev(0);
+	if (!dev) {
+		err = -ENOMEM;
+		goto exit_iounmap;
+	}
+	priv = netdev_priv(dev);
+
+	dev->base_addr = (unsigned long)addr;
+	dev->irq = res_irq->start;
+	priv->irq_flags = res_irq->flags & IRQF_TRIGGER_MASK;
+	priv->read_reg = sp_read_reg;
+	priv->write_reg = sp_write_reg;
+	priv->can.clock.freq = pdata->clock;
+	priv->ocr = pdata->ocr;
+	priv->cdr = pdata->cdr;
+
+	dev_set_drvdata(&pdev->dev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	err = register_sja1000dev(dev);
+	if (err) {
+		dev_err(&pdev->dev, "registering %s failed (err=%d)\n",
+			DRV_NAME, err);
+		goto exit_free;
+	}
+
+	dev_info(&pdev->dev, "%s device registered (base_addr=%#lx, irq=%d)\n",
+		 DRV_NAME, dev->base_addr, dev->irq);
+	return 0;
+
+ exit_free:
+	free_sja1000dev(dev);
+ exit_iounmap:
+	iounmap(addr);
+ exit_release:
+	release_mem_region(res_mem->start, resource_size(res_mem));
+ exit:
+	return err;
+}
+
+static int sp_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = dev_get_drvdata(&pdev->dev);
+	struct resource *res;
+
+	unregister_sja1000dev(dev);
+	dev_set_drvdata(&pdev->dev, NULL);
+
+	if (dev->base_addr)
+		iounmap((void __iomem *)dev->base_addr);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, resource_size(res));
+
+	free_sja1000dev(dev);
+
+	return 0;
+}
+
+static struct platform_driver sp_driver = {
+	.probe = sp_probe,
+	.remove = sp_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init sp_init(void)
+{
+	return platform_driver_register(&sp_driver);
+}
+
+static void __exit sp_exit(void)
+{
+	platform_driver_unregister(&sp_driver);
+}
+
+module_init(sp_init);
+module_exit(sp_exit);
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
new file mode 100644
index 00000000000..37966e630ff
--- /dev/null
+++ b/include/linux/can/platform/sja1000.h
@@ -0,0 +1,32 @@
+#ifndef _CAN_PLATFORM_SJA1000_H_
+#define _CAN_PLATFORM_SJA1000_H_
+
+/* clock divider register */
+#define CDR_CLKOUT_MASK 0x07
+#define CDR_CLK_OFF	0x08 /* Clock off (CLKOUT pin) */
+#define CDR_RXINPEN	0x20 /* TX1 output is RX irq output */
+#define CDR_CBP		0x40 /* CAN input comparator bypass */
+#define CDR_PELICAN	0x80 /* PeliCAN mode */
+
+/* output control register */
+#define OCR_MODE_BIPHASE  0x00
+#define OCR_MODE_TEST     0x01
+#define OCR_MODE_NORMAL   0x02
+#define OCR_MODE_CLOCK    0x03
+#define OCR_TX0_INVERT    0x04
+#define OCR_TX0_PULLDOWN  0x08
+#define OCR_TX0_PULLUP    0x10
+#define OCR_TX0_PUSHPULL  0x18
+#define OCR_TX1_INVERT    0x20
+#define OCR_TX1_PULLDOWN  0x40
+#define OCR_TX1_PULLUP    0x80
+#define OCR_TX1_PUSHPULL  0xc0
+
+struct sja1000_platform_data {
+	u32 clock;	/* CAN bus oscillator frequency in Hz */
+
+	u8 ocr;		/* output control register */
+	u8 cdr;		/* clock divider register */
+};
+
+#endif	/* !_CAN_PLATFORM_SJA1000_H_ */
-- 
cgit v1.2.3-70-g09d2


From 69e3c75f4d541a6eb151b3ef91f34033cb3ad6e1 Mon Sep 17 00:00:00 2001
From: Johann Baudy <johann.baudy@gnu-log.net>
Date: Mon, 18 May 2009 22:11:22 -0700
Subject: net: TX_RING and packet mmap

New packet socket feature that makes packet socket more efficient for
transmission.

- It reduces number of system call through a PACKET_TX_RING mechanism,
  based on PACKET_RX_RING (Circular buffer allocated in kernel space
  which is mmapped from user space).

- It minimizes CPU copy using fragmented SKB (almost zero copy).

Signed-off-by: Johann Baudy <johann.baudy@gnu-log.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 140 +++++++-
 include/linux/if_packet.h                |  20 +-
 include/linux/skbuff.h                   |   3 +
 net/packet/af_packet.c                   | 588 +++++++++++++++++++++++++------
 4 files changed, 616 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 07c53d59603..a22fd85e379 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -4,16 +4,18 @@
 
 This file documents the CONFIG_PACKET_MMAP option available with the PACKET
 socket interface on 2.4 and 2.6 kernels. This type of sockets is used for 
-capture network traffic with utilities like tcpdump or any other that uses 
-the libpcap library. 
-
-You can find the latest version of this document at
+capture network traffic with utilities like tcpdump or any other that needs
+raw access to network interface.
 
+You can find the latest version of this document at:
     http://pusa.uv.es/~ulisses/packet_mmap/
 
-Please send me your comments to
+Howto can be found at:
+    http://wiki.gnu-log.net (packet_mmap)
 
+Please send your comments to
     Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
+    Johann Baudy <johann.baudy@gnu-log.net>
 
 -------------------------------------------------------------------------------
 + Why use PACKET_MMAP
@@ -25,19 +27,24 @@ to capture each packet, it requires two if you want to get packet's
 timestamp (like libpcap always does).
 
 In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size 
-configurable circular buffer mapped in user space. This way reading packets just 
-needs to wait for them, most of the time there is no need to issue a single 
-system call. By using a shared buffer between the kernel and the user 
-also has the benefit of minimizing packet copies.
-
-It's fine to use PACKET_MMAP to improve the performance of the capture process, 
-but it isn't everything. At least, if you are capturing at high speeds (this 
-is relative to the cpu speed), you should check if the device driver of your 
-network interface card supports some sort of interrupt load mitigation or 
-(even better) if it supports NAPI, also make sure it is enabled.
+configurable circular buffer mapped in user space that can be used to either
+send or receive packets. This way reading packets just needs to wait for them,
+most of the time there is no need to issue a single system call. Concerning
+transmission, multiple packets can be sent through one system call to get the
+highest bandwidth.
+By using a shared buffer between the kernel and the user also has the benefit
+of minimizing packet copies.
+
+It's fine to use PACKET_MMAP to improve the performance of the capture and
+transmission process, but it isn't everything. At least, if you are capturing
+at high speeds (this is relative to the cpu speed), you should check if the
+device driver of your network interface card supports some sort of interrupt
+load mitigation or (even better) if it supports NAPI, also make sure it is
+enabled. For transmission, check the MTU (Maximum Transmission Unit) used and
+supported by devices of your network.
 
 --------------------------------------------------------------------------------
-+ How to use CONFIG_PACKET_MMAP
++ How to use CONFIG_PACKET_MMAP to improve capture process
 --------------------------------------------------------------------------------
 
 From the user standpoint, you should use the higher level libpcap library, which
@@ -57,7 +64,7 @@ the low level details or want to improve libpcap by including PACKET_MMAP
 support.
 
 --------------------------------------------------------------------------------
-+ How to use CONFIG_PACKET_MMAP directly
++ How to use CONFIG_PACKET_MMAP directly to improve capture process
 --------------------------------------------------------------------------------
 
 From the system calls stand point, the use of PACKET_MMAP involves
@@ -66,6 +73,7 @@ the following process:
 
 [setup]     socket() -------> creation of the capture socket
             setsockopt() ---> allocation of the circular buffer (ring)
+                              option: PACKET_RX_RING
             mmap() ---------> mapping of the allocated buffer to the
                               user process
 
@@ -96,6 +104,65 @@ Next I will describe PACKET_MMAP settings and it's constraints,
 also the mapping of the circular buffer in the user process and 
 the use of this buffer.
 
+--------------------------------------------------------------------------------
++ How to use CONFIG_PACKET_MMAP directly to improve transmission process
+--------------------------------------------------------------------------------
+Transmission process is similar to capture as shown below.
+
+[setup]          socket() -------> creation of the transmission socket
+                 setsockopt() ---> allocation of the circular buffer (ring)
+                                   option: PACKET_TX_RING
+                 bind() ---------> bind transmission socket with a network interface
+                 mmap() ---------> mapping of the allocated buffer to the
+                                   user process
+
+[transmission]   poll() ---------> wait for free packets (optional)
+                 send() ---------> send all packets that are set as ready in
+                                   the ring
+                                   The flag MSG_DONTWAIT can be used to return
+                                   before end of transfer.
+
+[shutdown]  close() --------> destruction of the transmission socket and
+                              deallocation of all associated resources.
+
+Binding the socket to your network interface is mandatory (with zero copy) to
+know the header size of frames used in the circular buffer.
+
+As capture, each frame contains two parts:
+
+ --------------------
+| struct tpacket_hdr | Header. It contains the status of
+|                    | of this frame
+|--------------------|
+| data buffer        |
+.                    .  Data that will be sent over the network interface.
+.                    .
+ --------------------
+
+ bind() associates the socket to your network interface thanks to
+ sll_ifindex parameter of struct sockaddr_ll.
+
+ Initialization example:
+
+ struct sockaddr_ll my_addr;
+ struct ifreq s_ifr;
+ ...
+
+ strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
+
+ /* get interface index of eth0 */
+ ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
+
+ /* fill sockaddr_ll struct to prepare binding */
+ my_addr.sll_family = AF_PACKET;
+ my_addr.sll_protocol = ETH_P_ALL;
+ my_addr.sll_ifindex =  s_ifr.ifr_ifindex;
+
+ /* bind socket to eth0 */
+ bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
+
+ A complete tutorial is available at: http://wiki.gnu-log.net/
+
 --------------------------------------------------------------------------------
 + PACKET_MMAP settings
 --------------------------------------------------------------------------------
@@ -103,7 +170,10 @@ the use of this buffer.
 
 To setup PACKET_MMAP from user level code is done with a call like
 
+ - Capture process
      setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req))
+ - Transmission process
+     setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req))
 
 The most significant argument in the previous call is the req parameter, 
 this parameter must to have the following structure:
@@ -117,11 +187,11 @@ this parameter must to have the following structure:
     };
 
 This structure is defined in /usr/include/linux/if_packet.h and establishes a 
-circular buffer (ring) of unswappable memory mapped in the capture process. 
+circular buffer (ring) of unswappable memory.
 Being mapped in the capture process allows reading the captured frames and 
 related meta-information like timestamps without requiring a system call.
 
-Captured frames are grouped in blocks. Each block is a physically contiguous 
+Frames are grouped in blocks. Each block is a physically contiguous
 region of memory and holds tp_block_size/tp_frame_size frames. The total number 
 of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because
 
@@ -336,6 +406,7 @@ struct tpacket_hdr). If this field is 0 means that the frame is ready
 to be used for the kernel, If not, there is a frame the user can read 
 and the following flags apply:
 
++++ Capture process:
      from include/linux/if_packet.h
 
      #define TP_STATUS_COPY          2 
@@ -391,6 +462,37 @@ packets are in the ring:
 It doesn't incur in a race condition to first check the status value and 
 then poll for frames.
 
+
+++ Transmission process
+Those defines are also used for transmission:
+
+     #define TP_STATUS_AVAILABLE        0 // Frame is available
+     #define TP_STATUS_SEND_REQUEST     1 // Frame will be sent on next send()
+     #define TP_STATUS_SENDING          2 // Frame is currently in transmission
+     #define TP_STATUS_WRONG_FORMAT     4 // Frame format is not correct
+
+First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a
+packet, the user fills a data buffer of an available frame, sets tp_len to
+current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST.
+This can be done on multiple frames. Once the user is ready to transmit, it
+calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are
+forwarded to the network device. The kernel updates each status of sent
+frames with TP_STATUS_SENDING until the end of transfer.
+At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE.
+
+    header->tp_len = in_i_size;
+    header->tp_status = TP_STATUS_SEND_REQUEST;
+    retval = send(this->socket, NULL, 0, 0);
+
+The user can also use poll() to check if a buffer is available:
+(status == TP_STATUS_SENDING)
+
+    struct pollfd pfd;
+    pfd.fd = fd;
+    pfd.revents = 0;
+    pfd.events = POLLOUT;
+    retval = poll(&pfd, 1, timeout);
+
 --------------------------------------------------------------------------------
 + THANKS
 --------------------------------------------------------------------------------
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 18db0668065..5b2badeb949 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -46,6 +46,8 @@ struct sockaddr_ll
 #define PACKET_VERSION			10
 #define PACKET_HDRLEN			11
 #define PACKET_RESERVE			12
+#define PACKET_TX_RING			13
+#define PACKET_LOSS			14
 
 struct tpacket_stats
 {
@@ -63,14 +65,22 @@ struct tpacket_auxdata
 	__u16		tp_vlan_tci;
 };
 
+/* Rx ring - header status */
+#define TP_STATUS_KERNEL	0x0
+#define TP_STATUS_USER		0x1
+#define TP_STATUS_COPY		0x2
+#define TP_STATUS_LOSING	0x4
+#define TP_STATUS_CSUMNOTREADY	0x8
+
+/* Tx ring - header status */
+#define TP_STATUS_AVAILABLE	0x0
+#define TP_STATUS_SEND_REQUEST	0x1
+#define TP_STATUS_SENDING	0x2
+#define TP_STATUS_WRONG_FORMAT	0x4
+
 struct tpacket_hdr
 {
 	unsigned long	tp_status;
-#define TP_STATUS_KERNEL	0
-#define TP_STATUS_USER		1
-#define TP_STATUS_COPY		2
-#define TP_STATUS_LOSING	4
-#define TP_STATUS_CSUMNOTREADY	8
 	unsigned int	tp_len;
 	unsigned int	tp_snaplen;
 	unsigned short	tp_mac;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1b5c3d298f4..aff494ba6a3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -203,6 +203,9 @@ struct skb_shared_info {
 #ifdef CONFIG_HAS_DMA
 	dma_addr_t	dma_maps[MAX_SKB_FRAGS + 1];
 #endif
+	/* Intermediate layers must ensure that destructor_arg
+	 * remains valid until skb destructor */
+	void *		destructor_arg;
 };
 
 /* We divide dataref into two halves.  The higher 16 bits hold references
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f546e81acc4..766e6b41f7c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -39,6 +39,7 @@
  *					will simply extend the hardware address
  *					byte arrays at the end of sockaddr_ll
  *					and packet_mreq.
+ *		Johann Baudy	:	Added TX RING.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -157,7 +158,25 @@ struct packet_mreq_max
 };
 
 #ifdef CONFIG_PACKET_MMAP
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
+static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
+		int closing, int tx_ring);
+
+struct packet_ring_buffer {
+	char *			*pg_vec;
+	unsigned int		head;
+	unsigned int		frames_per_block;
+	unsigned int		frame_size;
+	unsigned int		frame_max;
+
+	unsigned int		pg_vec_order;
+	unsigned int		pg_vec_pages;
+	unsigned int		pg_vec_len;
+
+	atomic_t		pending;
+};
+
+struct packet_sock;
+static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
 #endif
 
 static void packet_flush_mclist(struct sock *sk);
@@ -167,11 +186,8 @@ struct packet_sock {
 	struct sock		sk;
 	struct tpacket_stats	stats;
 #ifdef CONFIG_PACKET_MMAP
-	char *			*pg_vec;
-	unsigned int		head;
-	unsigned int            frames_per_block;
-	unsigned int		frame_size;
-	unsigned int		frame_max;
+	struct packet_ring_buffer	rx_ring;
+	struct packet_ring_buffer	tx_ring;
 	int			copy_thresh;
 #endif
 	struct packet_type	prot_hook;
@@ -185,12 +201,10 @@ struct packet_sock {
 	struct packet_mclist	*mclist;
 #ifdef CONFIG_PACKET_MMAP
 	atomic_t		mapped;
-	unsigned int            pg_vec_order;
-	unsigned int		pg_vec_pages;
-	unsigned int		pg_vec_len;
 	enum tpacket_versions	tp_version;
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
+	unsigned int		tp_loss:1;
 #endif
 };
 
@@ -206,36 +220,33 @@ struct packet_skb_cb {
 
 #ifdef CONFIG_PACKET_MMAP
 
-static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
-				 int status)
+static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 {
-	unsigned int pg_vec_pos, frame_offset;
 	union {
 		struct tpacket_hdr *h1;
 		struct tpacket2_hdr *h2;
 		void *raw;
 	} h;
 
-	pg_vec_pos = position / po->frames_per_block;
-	frame_offset = position % po->frames_per_block;
-
-	h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
+	h.raw = frame;
 	switch (po->tp_version) {
 	case TPACKET_V1:
-		if (status != (h.h1->tp_status ? TP_STATUS_USER :
-						TP_STATUS_KERNEL))
-			return NULL;
+		h.h1->tp_status = status;
+		flush_dcache_page(virt_to_page(&h.h1->tp_status));
 		break;
 	case TPACKET_V2:
-		if (status != (h.h2->tp_status ? TP_STATUS_USER :
-						TP_STATUS_KERNEL))
-			return NULL;
+		h.h2->tp_status = status;
+		flush_dcache_page(virt_to_page(&h.h2->tp_status));
 		break;
+	default:
+		printk(KERN_ERR "TPACKET version not supported\n");
+		BUG();
 	}
-	return h.raw;
+
+	smp_wmb();
 }
 
-static void __packet_set_status(struct packet_sock *po, void *frame, int status)
+static int __packet_get_status(struct packet_sock *po, void *frame)
 {
 	union {
 		struct tpacket_hdr *h1;
@@ -243,16 +254,66 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 		void *raw;
 	} h;
 
+	smp_rmb();
+
 	h.raw = frame;
 	switch (po->tp_version) {
 	case TPACKET_V1:
-		h.h1->tp_status = status;
-		break;
+		flush_dcache_page(virt_to_page(&h.h1->tp_status));
+		return h.h1->tp_status;
 	case TPACKET_V2:
-		h.h2->tp_status = status;
-		break;
+		flush_dcache_page(virt_to_page(&h.h2->tp_status));
+		return h.h2->tp_status;
+	default:
+		printk(KERN_ERR "TPACKET version not supported\n");
+		BUG();
+		return 0;
 	}
 }
+
+static void *packet_lookup_frame(struct packet_sock *po,
+		struct packet_ring_buffer *rb,
+		unsigned int position,
+		int status)
+{
+	unsigned int pg_vec_pos, frame_offset;
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} h;
+
+	pg_vec_pos = position / rb->frames_per_block;
+	frame_offset = position % rb->frames_per_block;
+
+	h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
+
+	if (status != __packet_get_status(po, h.raw))
+		return NULL;
+
+	return h.raw;
+}
+
+static inline void *packet_current_frame(struct packet_sock *po,
+		struct packet_ring_buffer *rb,
+		int status)
+{
+	return packet_lookup_frame(po, rb, rb->head, status);
+}
+
+static inline void *packet_previous_frame(struct packet_sock *po,
+		struct packet_ring_buffer *rb,
+		int status)
+{
+	unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
+	return packet_lookup_frame(po, rb, previous, status);
+}
+
+static inline void packet_increment_head(struct packet_ring_buffer *buff)
+{
+	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
+}
+
 #endif
 
 static inline struct packet_sock *pkt_sk(struct sock *sk)
@@ -648,7 +709,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		macoff = netoff - maclen;
 	}
 
-	if (macoff + snaplen > po->frame_size) {
+	if (macoff + snaplen > po->rx_ring.frame_size) {
 		if (po->copy_thresh &&
 		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
 		    (unsigned)sk->sk_rcvbuf) {
@@ -661,16 +722,16 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 			if (copy_skb)
 				skb_set_owner_r(copy_skb, sk);
 		}
-		snaplen = po->frame_size - macoff;
+		snaplen = po->rx_ring.frame_size - macoff;
 		if ((int)snaplen < 0)
 			snaplen = 0;
 	}
 
 	spin_lock(&sk->sk_receive_queue.lock);
-	h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
+	h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
 	if (!h.raw)
 		goto ring_is_full;
-	po->head = po->head != po->frame_max ? po->head+1 : 0;
+	packet_increment_head(&po->rx_ring);
 	po->stats.tp_packets++;
 	if (copy_skb) {
 		status |= TP_STATUS_COPY;
@@ -727,7 +788,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 
 	__packet_set_status(po, h.raw, status);
 	smp_mb();
-
 	{
 		struct page *p_start, *p_end;
 		u8 *h_end = h.raw + macoff + snaplen - 1;
@@ -760,10 +820,249 @@ ring_is_full:
 	goto drop_n_restore;
 }
 
-#endif
+static void tpacket_destruct_skb(struct sk_buff *skb)
+{
+	struct packet_sock *po = pkt_sk(skb->sk);
+	void * ph;
 
+	BUG_ON(skb == NULL);
 
-static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
+	if (likely(po->tx_ring.pg_vec)) {
+		ph = skb_shinfo(skb)->destructor_arg;
+		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
+		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
+		atomic_dec(&po->tx_ring.pending);
+		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
+	}
+
+	sock_wfree(skb);
+}
+
+static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff * skb,
+		void * frame, struct net_device *dev, int size_max,
+		__be16 proto, unsigned char * addr)
+{
+	union {
+		struct tpacket_hdr *h1;
+		struct tpacket2_hdr *h2;
+		void *raw;
+	} ph;
+	int to_write, offset, len, tp_len, nr_frags, len_max;
+	struct socket *sock = po->sk.sk_socket;
+	struct page *page;
+	void *data;
+	int err;
+
+	ph.raw = frame;
+
+	skb->protocol = proto;
+	skb->dev = dev;
+	skb->priority = po->sk.sk_priority;
+	skb_shinfo(skb)->destructor_arg = ph.raw;
+
+	switch (po->tp_version) {
+	case TPACKET_V2:
+		tp_len = ph.h2->tp_len;
+		break;
+	default:
+		tp_len = ph.h1->tp_len;
+		break;
+	}
+	if (unlikely(tp_len > size_max)) {
+		printk(KERN_ERR "packet size is too long (%d > %d)\n",
+				tp_len, size_max);
+		return -EMSGSIZE;
+	}
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+	skb_reset_network_header(skb);
+
+	data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+	to_write = tp_len;
+
+	if (sock->type == SOCK_DGRAM) {
+		err = dev_hard_header(skb, dev, ntohs(proto), addr,
+				NULL, tp_len);
+		if (unlikely(err < 0))
+			return -EINVAL;
+	} else if (dev->hard_header_len ) {
+		/* net device doesn't like empty head */
+		if (unlikely(tp_len <= dev->hard_header_len)) {
+			printk(KERN_ERR "packet size is too short "
+					"(%d < %d)\n", tp_len,
+					dev->hard_header_len);
+			return -EINVAL;
+		}
+
+		skb_push(skb, dev->hard_header_len);
+		err = skb_store_bits(skb, 0, data,
+				dev->hard_header_len);
+		if (unlikely(err))
+			return err;
+
+		data += dev->hard_header_len;
+		to_write -= dev->hard_header_len;
+	}
+
+	err = -EFAULT;
+	page = virt_to_page(data);
+	offset = offset_in_page(data);
+	len_max = PAGE_SIZE - offset;
+	len = ((to_write > len_max) ? len_max : to_write);
+
+	skb->data_len = to_write;
+	skb->len += to_write;
+	skb->truesize += to_write;
+	atomic_add(to_write, &po->sk.sk_wmem_alloc);
+
+	while (likely(to_write)) {
+		nr_frags = skb_shinfo(skb)->nr_frags;
+
+		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
+			printk(KERN_ERR "Packet exceed the number "
+					"of skb frags(%lu)\n",
+					MAX_SKB_FRAGS);
+			return -EFAULT;
+		}
+
+		flush_dcache_page(page);
+		get_page(page);
+		skb_fill_page_desc(skb,
+				nr_frags,
+				page++, offset, len);
+		to_write -= len;
+		offset = 0;
+		len_max = PAGE_SIZE;
+		len = ((to_write > len_max) ? len_max : to_write);
+	}
+
+	return tp_len;
+}
+
+static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
+{
+	struct socket *sock;
+	struct sk_buff *skb;
+	struct net_device *dev;
+	__be16 proto;
+	int ifindex, err, reserve = 0;
+	void * ph;
+	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
+	int tp_len, size_max;
+	unsigned char *addr;
+	int len_sum = 0;
+	int status = 0;
+
+	sock = po->sk.sk_socket;
+
+	mutex_lock(&po->pg_vec_lock);
+
+	err = -EBUSY;
+	if (saddr == NULL) {
+		ifindex	= po->ifindex;
+		proto	= po->num;
+		addr	= NULL;
+	} else {
+		err = -EINVAL;
+		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
+			goto out;
+		if (msg->msg_namelen < (saddr->sll_halen
+					+ offsetof(struct sockaddr_ll,
+						sll_addr)))
+			goto out;
+		ifindex	= saddr->sll_ifindex;
+		proto	= saddr->sll_protocol;
+		addr	= saddr->sll_addr;
+	}
+
+	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
+	err = -ENXIO;
+	if (unlikely(dev == NULL))
+		goto out;
+
+	reserve = dev->hard_header_len;
+
+	err = -ENETDOWN;
+	if (unlikely(!(dev->flags & IFF_UP)))
+		goto out_put;
+
+	size_max = po->tx_ring.frame_size
+		- sizeof(struct skb_shared_info)
+		- po->tp_hdrlen
+		- LL_ALLOCATED_SPACE(dev)
+		- sizeof(struct sockaddr_ll);
+
+	if (size_max > dev->mtu + reserve)
+		size_max = dev->mtu + reserve;
+
+	do {
+		ph = packet_current_frame(po, &po->tx_ring,
+				TP_STATUS_SEND_REQUEST);
+
+		if (unlikely(ph == NULL)) {
+			schedule();
+			continue;
+		}
+
+		status = TP_STATUS_SEND_REQUEST;
+		skb = sock_alloc_send_skb(&po->sk,
+				LL_ALLOCATED_SPACE(dev)
+				+ sizeof(struct sockaddr_ll),
+				0, &err);
+
+		if (unlikely(skb == NULL))
+			goto out_status;
+
+		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
+				addr);
+
+		if (unlikely(tp_len < 0)) {
+			if (po->tp_loss) {
+				__packet_set_status(po, ph,
+						TP_STATUS_AVAILABLE);
+				packet_increment_head(&po->tx_ring);
+				kfree_skb(skb);
+				continue;
+			} else {
+				status = TP_STATUS_WRONG_FORMAT;
+				err = tp_len;
+				goto out_status;
+			}
+		}
+
+		skb->destructor = tpacket_destruct_skb;
+		__packet_set_status(po, ph, TP_STATUS_SENDING);
+		atomic_inc(&po->tx_ring.pending);
+
+		status = TP_STATUS_SEND_REQUEST;
+		err = dev_queue_xmit(skb);
+		if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0))
+			goto out_xmit;
+		packet_increment_head(&po->tx_ring);
+		len_sum += tp_len;
+	}
+	while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT))
+					&& (atomic_read(&po->tx_ring.pending))))
+	      );
+
+	err = len_sum;
+	goto out_put;
+
+out_xmit:
+	skb->destructor = sock_wfree;
+	atomic_dec(&po->tx_ring.pending);
+out_status:
+	__packet_set_status(po, ph, status);
+	kfree_skb(skb);
+out_put:
+	dev_put(dev);
+out:
+	mutex_unlock(&po->pg_vec_lock);
+	return err;
+}
+#endif
+
+static int packet_snd(struct socket *sock,
 			  struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
@@ -854,6 +1153,19 @@ out:
 	return err;
 }
 
+static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
+		struct msghdr *msg, size_t len)
+{
+#ifdef CONFIG_PACKET_MMAP
+	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
+	if (po->tx_ring.pg_vec)
+		return tpacket_snd(po, msg);
+	else
+#endif
+		return packet_snd(sock, msg, len);
+}
+
 /*
  *	Close a PACKET socket. This is fairly simple. We immediately go
  *	to 'closed' state and remove our protocol entry in the device list.
@@ -864,6 +1176,9 @@ static int packet_release(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct packet_sock *po;
 	struct net *net;
+#ifdef CONFIG_PACKET_MMAP
+	struct tpacket_req req;
+#endif
 
 	if (!sk)
 		return 0;
@@ -893,11 +1208,13 @@ static int packet_release(struct socket *sock)
 	packet_flush_mclist(sk);
 
 #ifdef CONFIG_PACKET_MMAP
-	if (po->pg_vec) {
-		struct tpacket_req req;
-		memset(&req, 0, sizeof(req));
-		packet_set_ring(sk, &req, 1);
-	}
+	memset(&req, 0, sizeof(req));
+
+	if (po->rx_ring.pg_vec)
+		packet_set_ring(sk, &req, 1, 0);
+
+	if (po->tx_ring.pg_vec)
+		packet_set_ring(sk, &req, 1, 1);
 #endif
 
 	/*
@@ -1391,7 +1708,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
 
-	switch(optname)	{
+	switch (optname) {
 	case PACKET_ADD_MEMBERSHIP:
 	case PACKET_DROP_MEMBERSHIP:
 	{
@@ -1415,6 +1732,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 #ifdef CONFIG_PACKET_MMAP
 	case PACKET_RX_RING:
+	case PACKET_TX_RING:
 	{
 		struct tpacket_req req;
 
@@ -1422,7 +1740,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 			return -EINVAL;
 		if (copy_from_user(&req,optval,sizeof(req)))
 			return -EFAULT;
-		return packet_set_ring(sk, &req, 0);
+		return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
 	}
 	case PACKET_COPY_THRESH:
 	{
@@ -1442,7 +1760,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen != sizeof(val))
 			return -EINVAL;
-		if (po->pg_vec)
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
 			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
@@ -1461,13 +1779,26 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen != sizeof(val))
 			return -EINVAL;
-		if (po->pg_vec)
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
 			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
 		po->tp_reserve = val;
 		return 0;
 	}
+	case PACKET_LOSS:
+	{
+		unsigned int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+			return -EBUSY;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		po->tp_loss = !!val;
+		return 0;
+	}
 #endif
 	case PACKET_AUXDATA:
 	{
@@ -1517,7 +1848,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	if (len < 0)
 		return -EINVAL;
 
-	switch(optname)	{
+	switch (optname) {
 	case PACKET_STATISTICS:
 		if (len > sizeof(struct tpacket_stats))
 			len = sizeof(struct tpacket_stats);
@@ -1573,6 +1904,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->tp_reserve;
 		data = &val;
 		break;
+	case PACKET_LOSS:
+		if (len > sizeof(unsigned int))
+			len = sizeof(unsigned int);
+		val = po->tp_loss;
+		data = &val;
+		break;
 #endif
 	default:
 		return -ENOPROTOOPT;
@@ -1643,7 +1980,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 {
 	struct sock *sk = sock->sk;
 
-	switch(cmd) {
+	switch (cmd) {
 		case SIOCOUTQ:
 		{
 			int amount = atomic_read(&sk->sk_wmem_alloc);
@@ -1705,13 +2042,17 @@ static unsigned int packet_poll(struct file * file, struct socket *sock,
 	unsigned int mask = datagram_poll(file, sock, wait);
 
 	spin_lock_bh(&sk->sk_receive_queue.lock);
-	if (po->pg_vec) {
-		unsigned last = po->head ? po->head-1 : po->frame_max;
-
-		if (packet_lookup_frame(po, last, TP_STATUS_USER))
+	if (po->rx_ring.pg_vec) {
+		if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
 			mask |= POLLIN | POLLRDNORM;
 	}
 	spin_unlock_bh(&sk->sk_receive_queue.lock);
+	spin_lock_bh(&sk->sk_write_queue.lock);
+	if (po->tx_ring.pg_vec) {
+		if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
+			mask |= POLLOUT | POLLWRNORM;
+	}
+	spin_unlock_bh(&sk->sk_write_queue.lock);
 	return mask;
 }
 
@@ -1788,21 +2129,33 @@ out_free_pgvec:
 	goto out;
 }
 
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
+static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
+		int closing, int tx_ring)
 {
 	char **pg_vec = NULL;
 	struct packet_sock *po = pkt_sk(sk);
 	int was_running, order = 0;
+	struct packet_ring_buffer *rb;
+	struct sk_buff_head *rb_queue;
 	__be16 num;
-	int err = 0;
+	int err;
 
-	if (req->tp_block_nr) {
-		int i;
+	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
+	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 
-		/* Sanity tests and some calculations */
+	err = -EBUSY;
+	if (!closing) {
+		if (atomic_read(&po->mapped))
+			goto out;
+		if (atomic_read(&rb->pending))
+			goto out;
+	}
 
-		if (unlikely(po->pg_vec))
-			return -EBUSY;
+	if (req->tp_block_nr) {
+		/* Sanity tests and some calculations */
+		err = -EBUSY;
+		if (unlikely(rb->pg_vec))
+			goto out;
 
 		switch (po->tp_version) {
 		case TPACKET_V1:
@@ -1813,42 +2166,35 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 			break;
 		}
 
+		err = -EINVAL;
 		if (unlikely((int)req->tp_block_size <= 0))
-			return -EINVAL;
+			goto out;
 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
-			return -EINVAL;
+			goto out;
 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
-						  po->tp_reserve))
-			return -EINVAL;
+					po->tp_reserve))
+			goto out;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
-			return -EINVAL;
+			goto out;
 
-		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
-		if (unlikely(po->frames_per_block <= 0))
-			return -EINVAL;
-		if (unlikely((po->frames_per_block * req->tp_block_nr) !=
-			     req->tp_frame_nr))
-			return -EINVAL;
+		rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
+		if (unlikely(rb->frames_per_block <= 0))
+			goto out;
+		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
+					req->tp_frame_nr))
+			goto out;
 
 		err = -ENOMEM;
 		order = get_order(req->tp_block_size);
 		pg_vec = alloc_pg_vec(req, order);
 		if (unlikely(!pg_vec))
 			goto out;
-
-		for (i = 0; i < req->tp_block_nr; i++) {
-			void *ptr = pg_vec[i];
-			int k;
-
-			for (k = 0; k < po->frames_per_block; k++) {
-				__packet_set_status(po, ptr, TP_STATUS_KERNEL);
-				ptr += req->tp_frame_size;
-			}
-		}
-		/* Done */
-	} else {
+	}
+	/* Done */
+	else {
+		err = -EINVAL;
 		if (unlikely(req->tp_frame_nr))
-			return -EINVAL;
+			goto out;
 	}
 
 	lock_sock(sk);
@@ -1872,23 +2218,24 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 	if (closing || atomic_read(&po->mapped) == 0) {
 		err = 0;
 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
-
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		pg_vec = XC(po->pg_vec, pg_vec);
-		po->frame_max = (req->tp_frame_nr - 1);
-		po->head = 0;
-		po->frame_size = req->tp_frame_size;
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-
-		order = XC(po->pg_vec_order, order);
-		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
-
-		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
-		po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
-		skb_queue_purge(&sk->sk_receive_queue);
+		spin_lock_bh(&rb_queue->lock);
+		pg_vec = XC(rb->pg_vec, pg_vec);
+		rb->frame_max = (req->tp_frame_nr - 1);
+		rb->head = 0;
+		rb->frame_size = req->tp_frame_size;
+		spin_unlock_bh(&rb_queue->lock);
+
+		order = XC(rb->pg_vec_order, order);
+		req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
+
+		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
+		po->prot_hook.func = (po->rx_ring.pg_vec) ?
+						tpacket_rcv : packet_rcv;
+		skb_queue_purge(rb_queue);
 #undef XC
 		if (atomic_read(&po->mapped))
-			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
+			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n",
+						atomic_read(&po->mapped));
 	}
 	mutex_unlock(&po->pg_vec_lock);
 
@@ -1909,11 +2256,13 @@ out:
 	return err;
 }
 
-static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+static int packet_mmap(struct file *file, struct socket *sock,
+		struct vm_area_struct *vma)
 {
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
-	unsigned long size;
+	unsigned long size, expected_size;
+	struct packet_ring_buffer *rb;
 	unsigned long start;
 	int err = -EINVAL;
 	int i;
@@ -1921,26 +2270,43 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 	if (vma->vm_pgoff)
 		return -EINVAL;
 
-	size = vma->vm_end - vma->vm_start;
-
 	mutex_lock(&po->pg_vec_lock);
-	if (po->pg_vec == NULL)
+
+	expected_size = 0;
+	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
+		if (rb->pg_vec) {
+			expected_size += rb->pg_vec_len
+						* rb->pg_vec_pages
+						* PAGE_SIZE;
+		}
+	}
+
+	if (expected_size == 0)
 		goto out;
-	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
+
+	size = vma->vm_end - vma->vm_start;
+	if (size != expected_size)
 		goto out;
 
 	start = vma->vm_start;
-	for (i = 0; i < po->pg_vec_len; i++) {
-		struct page *page = virt_to_page(po->pg_vec[i]);
-		int pg_num;
-
-		for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
-			err = vm_insert_page(vma, start, page);
-			if (unlikely(err))
-				goto out;
-			start += PAGE_SIZE;
+	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
+		if (rb->pg_vec == NULL)
+			continue;
+
+		for (i = 0; i < rb->pg_vec_len; i++) {
+			struct page *page = virt_to_page(rb->pg_vec[i]);
+			int pg_num;
+
+			for (pg_num = 0; pg_num < rb->pg_vec_pages;
+					pg_num++,page++) {
+				err = vm_insert_page(vma, start, page);
+				if (unlikely(err))
+					goto out;
+				start += PAGE_SIZE;
+			}
 		}
 	}
+
 	atomic_inc(&po->mapped);
 	vma->vm_ops = &packet_mmap_ops;
 	err = 0;
-- 
cgit v1.2.3-70-g09d2


From 690cc3ffe33ac4a2857583c22d4c6244ae11684d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 May 2009 16:55:10 +0000
Subject: syscall: Implement a convinience function restart_syscall

Currently when we have a signal pending we have the functionality
to restart that the current system call.  There are other cases
such as nasty lock ordering issues where it makes sense to have
a simple fix that uses try lock and restarts the system call.
Buying time to figure out how to rework the locking strategy.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sched.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049..d853f6bb0ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2178,6 +2178,12 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
+static inline int restart_syscall(void)
+{
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	return -ERESTARTNOINTR;
+}
+
 static inline int signal_pending(struct task_struct *p)
 {
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
-- 
cgit v1.2.3-70-g09d2


From 93f154b594fe47e4a7e5358b309add449a046cd3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 18 May 2009 22:19:19 -0700
Subject: net: release dst entry in dev_hard_start_xmit()

One point of contention in high network loads is the dst_release() performed
when a transmited skb is freed. This is because NIC tx completion calls
dev_kree_skb() long after original call to dev_queue_xmit(skb).

CPU cache is cold and the atomic op in dst_release() stalls. On SMP, this is
quite visible if one CPU is 100% handling softirqs for a network device,
since dst_clone() is done by other cpus, involving cache line ping pongs.

It seems right place to release dst is in dev_hard_start_xmit(), for most
devices but ones that are virtual, and some exceptions.

David Miller suggested to define a new device flag, set in alloc_netdev_mq()
(so that most devices set it at init time), and carefuly unset in devices
which dont want a NULL skb->dst in their ndo_start_xmit().

List of devices that must clear this flag is :

- loopback device, because it calls netif_rx() and quoting Patrick :
    "ip_route_input() doesn't accept loopback addresses, so loopback packets
     already need to have a dst_entry attached."
- appletalk/ipddp.c : needs skb->dst in its xmit function

- And all devices that call again dev_queue_xmit() from their xmit function
(as some classifiers need skb->dst) : bonding, vlan, macvlan, eql, ifb, hdlc_fr

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ipddp.c   | 1 +
 drivers/net/bonding/bond_main.c | 1 +
 drivers/net/eql.c               | 1 +
 drivers/net/ifb.c               | 1 +
 drivers/net/loopback.c          | 1 +
 drivers/net/macvlan.c           | 1 +
 drivers/net/wan/hdlc_fr.c       | 1 +
 include/linux/if.h              | 3 +++
 net/8021q/vlan_dev.c            | 1 +
 net/core/dev.c                  | 9 +++++++++
 10 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index da64ba88d7f..f939e92fcf8 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -71,6 +71,7 @@ static struct net_device * __init ipddp_init(void)
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 	strcpy(dev->name, "ipddp%d");
 
 	if (version_printed++ == 0)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 96d7689995c..92a9d69c565 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -5148,6 +5148,7 @@ int bond_create(char *name, struct bond_params *params)
 		goto out_rtnl;
 	}
 
+	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 	if (!name) {
 		res = dev_alloc_name(bond_dev, "bond%d");
 		if (res < 0)
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index 5210bb1027c..19b7dd98394 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -194,6 +194,7 @@ static void __init eql_setup(struct net_device *dev)
 
 	dev->type       	= ARPHRD_SLIP;
 	dev->tx_queue_len 	= 5;		/* Hands them off fast */
+	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
 }
 
 static int eql_open(struct net_device *dev)
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 60a26300193..96713ef0629 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -156,6 +156,7 @@ static void ifb_setup(struct net_device *dev)
 
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 	random_ether_addr(dev->dev_addr);
 }
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 6f71157bea8..da472c68748 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -170,6 +170,7 @@ static void loopback_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
 	dev->flags		= IFF_LOOPBACK;
+	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 		| NETIF_F_TSO
 		| NETIF_F_NO_CSUM
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 329cd50d0e2..d5334b41e4b 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -414,6 +414,7 @@ static void macvlan_setup(struct net_device *dev)
 {
 	ether_setup(dev);
 
+	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
 	dev->netdev_ops		= &macvlan_netdev_ops;
 	dev->destructor		= free_netdev;
 	dev->header_ops		= &macvlan_hard_header_ops,
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 80053010109..bfa0161a02d 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1054,6 +1054,7 @@ static void pvc_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT;
 	dev->hard_header_len = 10;
 	dev->addr_len = 2;
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 }
 
 static const struct net_device_ops pvc_ops = {
diff --git a/include/linux/if.h b/include/linux/if.h
index 1108f3e099e..b9a6229f3be 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -67,6 +67,9 @@
 #define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
 #define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
 #define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
+#define IFF_XMIT_DST_RELEASE 0x400	/* dev_hard_start_xmit() is allowed to
+					 * release skb->dst
+					 */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8faacee6863..ff7572ac548 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -739,6 +739,7 @@ void vlan_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->priv_flags		|= IFF_802_1Q_VLAN;
+	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
 	dev->tx_queue_len	= 0;
 
 	dev->netdev_ops		= &vlan_netdev_ops;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6d3630d1627..92ebeca2990 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1688,6 +1688,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				goto gso;
 		}
 
+		/*
+		 * If device doesnt need skb->dst, release it right now while
+		 * its hot in this cpu cache
+		 */
+		if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) {
+			dst_release(skb->dst);
+			skb->dst = NULL;
+		}
 		rc = ops->ndo_start_xmit(skb, dev);
 		/*
 		 * TODO: if skb_orphan() was called by
@@ -5045,6 +5053,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	netdev_init_queues(dev);
 
 	INIT_LIST_HEAD(&dev->napi_list);
+	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
-- 
cgit v1.2.3-70-g09d2


From bfa568d19a3faed3b94978ad48ac15d1b0d7e5bc Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Sat, 2 May 2009 06:16:47 +0400
Subject: powerpc/85xx: Add PCI IDs for MPC8569 family processors

This patch adds PCI IDs for MPC8569 and MPC8569E processors,
plus adds appropriate quirks for these IDs, and thus makes
PCI-E actually work on MPC8569E-MDS boards.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 2 ++
 include/linux/pci_ids.h       | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 376603df7c4..94d8b3feb1e 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -285,6 +285,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8543, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8547E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8545E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8545, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8569E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8569, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8568E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8568, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8567E, quirk_fsl_pcie_header);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 06ba90c211a..7cc5b80327f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2274,6 +2274,8 @@
 #define PCI_DEVICE_ID_MPC8547E		0x0018
 #define PCI_DEVICE_ID_MPC8545E		0x0019
 #define PCI_DEVICE_ID_MPC8545		0x001a
+#define PCI_DEVICE_ID_MPC8569E		0x0061
+#define PCI_DEVICE_ID_MPC8569		0x0060
 #define PCI_DEVICE_ID_MPC8568E		0x0020
 #define PCI_DEVICE_ID_MPC8568		0x0021
 #define PCI_DEVICE_ID_MPC8567E		0x0022
-- 
cgit v1.2.3-70-g09d2


From 01af9507ff36578dad89b1cc88ff37ac18e719cb Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Wed, 15 Apr 2009 14:38:40 -0500
Subject: powerpc/85xx: Add P2020DS board support

The P2020 is a dual e500v2 core based SOC with:
* 3 PCIe controllers
* 2 General purpose DMA controllers
* 2 sRIO controllers
* 3 eTSECS
* USB 2.0
* SDHC
* SPI, I2C, DUART
* enhanced localbus
* and optional Security (P2020E) security w/XOR acceleration

The p2020 DS reference board is pretty similar to the existing MPC85xx
DS boards and has a ULI 1575 connected on one of the PCIe controllers.

Signed-off-by: Ted Peters <Ted.Peters@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p2020ds.dts        | 704 +++++++++++++++++++++++++++++++
 arch/powerpc/platforms/85xx/mpc85xx_ds.c |  43 +-
 arch/powerpc/platforms/fsl_uli1575.c     |   1 +
 arch/powerpc/sysdev/fsl_pci.c            |   2 +
 include/linux/pci_ids.h                  |   2 +
 5 files changed, 747 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/p2020ds.dts

(limited to 'include')

diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts
new file mode 100644
index 00000000000..11019142813
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020ds.dts
@@ -0,0 +1,704 @@
+/*
+ * P2020 DS Device Tree Source
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	model = "fsl,P2020";
+	compatible = "fsl,P2020DS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P2020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+
+		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
+			  0x1 0x0 0x0 0xe0000000 0x08000000
+			  0x2 0x0 0x0 0xffa00000 0x00040000
+			  0x3 0x0 0x0 0xffdf0000 0x00008000
+			  0x4 0x0 0x0 0xffa40000 0x00040000
+			  0x5 0x0 0x0 0xffa80000 0x00040000
+			  0x6 0x0 0x0 0xffac0000 0x00040000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			ramdisk@0 {
+				reg = <0x0 0x03000000>;
+				read-only;
+			};
+
+			diagnostic@3000000 {
+				reg = <0x03000000 0x00e00000>;
+				read-only;
+			};
+
+			dink@3e00000 {
+				reg = <0x03e00000 0x00200000>;
+				read-only;
+			};
+
+			kernel@4000000 {
+				reg = <0x04000000 0x00400000>;
+				read-only;
+			};
+
+			jffs2@4400000 {
+				reg = <0x04400000 0x03b00000>;
+			};
+
+			dtb@7f00000 {
+				reg = <0x07f00000 0x00080000>;
+				read-only;
+			};
+
+			u-boot@7f80000 {
+				reg = <0x07f80000 0x00080000>;
+				read-only;
+			};
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x2 0x0 0x40000>;
+
+			u-boot@0 {
+				reg = <0x0 0x02000000>;
+				read-only;
+			};
+
+			jffs2@2000000 {
+				reg = <0x02000000 0x10000000>;
+			};
+
+			ramdisk@12000000 {
+				reg = <0x12000000 0x08000000>;
+				read-only;
+			};
+
+			kernel@1a000000 {
+				reg = <0x1a000000 0x04000000>;
+			};
+
+			dtb@1e000000 {
+				reg = <0x1e000000 0x01000000>;
+				read-only;
+			};
+
+			empty@1f000000 {
+				reg = <0x1f000000 0x21000000>;
+			};
+		};
+
+		nand@4,0 {
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x4 0x0 0x40000>;
+		};
+
+		nand@5,0 {
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x5 0x0 0x40000>;
+		};
+
+		nand@6,0 {
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0x6 0x0 0x40000>;
+		};
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p2020-immr", "simple-bus";
+		ranges = <0x0 0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p2020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p2020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p2020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>; // L2, 512k
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "rgmii-id";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+
+				phy0: ethernet-phy@0 {
+					interrupt-parent = <&mpic>;
+					interrupts = <3 1>;
+					reg = <0x0>;
+				};
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&mpic>;
+					interrupts = <3 1>;
+					reg = <0x1>;
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&mpic>;
+					interrupts = <3 1>;
+					reg = <0x2>;
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi1>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi2>;
+			phy-handle = <&phy2>;
+			phy-connection-type = "rgmii-id";
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+
+				tbi2: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		sdhci@2e000 {
+			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe08000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe08000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x8 0x1
+			0000 0x0 0x0 0x2 &mpic 0x9 0x1
+			0000 0x0 0x0 0x3 &mpic 0xa 0x1
+			0000 0x0 0x0 0x4 &mpic 0xb 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+
+	pci1: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+		interrupt-map = <
+
+			// IDSEL 0x11 func 0 - PCI slot 1
+			0x8800 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8800 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 1 - PCI slot 1
+			0x8900 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8900 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 2 - PCI slot 1
+			0x8a00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8a00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 3 - PCI slot 1
+			0x8b00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8b00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 4 - PCI slot 1
+			0x8c00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8c00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 5 - PCI slot 1
+			0x8d00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8d00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 6 - PCI slot 1
+			0x8e00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8e00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x11 func 7 - PCI slot 1
+			0x8f00 0x0 0x0 0x1 &i8259 0x9 0x2
+			0x8f00 0x0 0x0 0x2 &i8259 0xa 0x2
+
+			// IDSEL 0x1d  Audio
+			0xe800 0x0 0x0 0x1 &i8259 0x6 0x2
+
+			// IDSEL 0x1e Legacy
+			0xf000 0x0 0x0 0x1 &i8259 0x7 0x2
+			0xf100 0x0 0x0 0x1 &i8259 0x7 0x2
+
+			// IDSEL 0x1f IDE/SATA
+			0xf800 0x0 0x0 0x1 &i8259 0xe 0x2
+			0xf900 0x0 0x0 0x1 &i8259 0x5 0x2
+			>;
+
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+			uli1575@0 {
+				reg = <0x0 0x0 0x0 0x0 0x0>;
+				#size-cells = <2>;
+				#address-cells = <3>;
+				ranges = <0x2000000 0x0 0xa0000000
+					  0x2000000 0x0 0xa0000000
+					  0x0 0x20000000
+
+					  0x1000000 0x0 0x0
+					  0x1000000 0x0 0x0
+					  0x0 0x10000>;
+				isa@1e {
+					device_type = "isa";
+					#interrupt-cells = <2>;
+					#size-cells = <1>;
+					#address-cells = <2>;
+					reg = <0xf000 0x0 0x0 0x0 0x0>;
+					ranges = <0x1 0x0 0x1000000 0x0 0x0
+						  0x1000>;
+					interrupt-parent = <&i8259>;
+
+					i8259: interrupt-controller@20 {
+						reg = <0x1 0x20 0x2
+						       0x1 0xa0 0x2
+						       0x1 0x4d0 0x2>;
+						interrupt-controller;
+						device_type = "interrupt-controller";
+						#address-cells = <0>;
+						#interrupt-cells = <2>;
+						compatible = "chrp,iic";
+						interrupts = <4 1>;
+						interrupt-parent = <&mpic>;
+					};
+
+					i8042@60 {
+						#size-cells = <0>;
+						#address-cells = <1>;
+						reg = <0x1 0x60 0x1 0x1 0x64 0x1>;
+						interrupts = <1 3 12 3>;
+						interrupt-parent =
+							<&i8259>;
+
+						keyboard@0 {
+							reg = <0x0>;
+							compatible = "pnpPNP,303";
+						};
+
+						mouse@1 {
+							reg = <0x1>;
+							compatible = "pnpPNP,f03";
+						};
+					};
+
+					rtc@70 {
+						compatible = "pnpPNP,b00";
+						reg = <0x1 0x70 0x2>;
+					};
+
+					gpio@400 {
+						reg = <0x1 0x400 0x80>;
+					};
+				};
+			};
+		};
+
+	};
+
+	pci2: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x10000>;
+		};
+	};
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index de66de7a9ca..53d5851a6c9 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -163,7 +163,8 @@ static void __init mpc85xx_ds_setup_arch(void)
 #ifdef CONFIG_PCI
 	for_each_node_by_type(np, "pci") {
 		if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
-		    of_device_is_compatible(np, "fsl,mpc8548-pcie")) {
+		    of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
+		    of_device_is_compatible(np, "fsl,p2020-pcie")) {
 			struct resource rsrc;
 			of_address_to_resource(np, 0, &rsrc);
 			if ((rsrc.start & 0xfffff) == primary_phb_addr)
@@ -195,9 +196,9 @@ static int __init mpc8544_ds_probe(void)
 		primary_phb_addr = 0xb000;
 #endif
 		return 1;
-	} else {
-		return 0;
 	}
+
+	return 0;
 }
 
 static struct of_device_id __initdata mpc85xxds_ids[] = {
@@ -214,6 +215,7 @@ static int __init mpc85xxds_publish_devices(void)
 }
 machine_device_initcall(mpc8544_ds, mpc85xxds_publish_devices);
 machine_device_initcall(mpc8572_ds, mpc85xxds_publish_devices);
+machine_device_initcall(p2020_ds, mpc85xxds_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
@@ -227,9 +229,26 @@ static int __init mpc8572_ds_probe(void)
 		primary_phb_addr = 0x8000;
 #endif
 		return 1;
-	} else {
-		return 0;
 	}
+
+	return 0;
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p2020_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P2020DS")) {
+#ifdef CONFIG_PCI
+		primary_phb_addr = 0x9000;
+#endif
+		return 1;
+	}
+
+	return 0;
 }
 
 define_machine(mpc8544_ds) {
@@ -259,3 +278,17 @@ define_machine(mpc8572_ds) {
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= udbg_progress,
 };
+
+define_machine(p2020_ds) {
+	.name			= "P2020 DS",
+	.probe			= p2020_ds_probe,
+	.setup_arch		= mpc85xx_ds_setup_arch,
+	.init_IRQ		= mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index efd41f40984..fd23a1d4b39 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -55,6 +55,7 @@ static inline bool is_quirk_valid(void)
 {
 	return (machine_is(mpc86xx_hpcn) ||
 		machine_is(mpc8544_ds) ||
+		machine_is(p2020_ds) ||
 		machine_is(mpc8572_ds));
 }
 
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 94d8b3feb1e..b20171d9df0 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -302,6 +302,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8536, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641D, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8610, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header);
 #endif /* CONFIG_PPC_85xx || CONFIG_PPC_86xx */
 
 #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7cc5b80327f..bfa0402d343 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2288,6 +2288,8 @@
 #define PCI_DEVICE_ID_MPC8572		0x0041
 #define PCI_DEVICE_ID_MPC8536E		0x0050
 #define PCI_DEVICE_ID_MPC8536		0x0051
+#define PCI_DEVICE_ID_P2020E		0x0070
+#define PCI_DEVICE_ID_P2020		0x0071
 #define PCI_DEVICE_ID_MPC8641		0x7010
 #define PCI_DEVICE_ID_MPC8641D		0x7011
 #define PCI_DEVICE_ID_MPC8610		0x7018
-- 
cgit v1.2.3-70-g09d2


From 4200efd9acda4accf24640f1e77d24fdcdb524df Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 19 May 2009 09:22:19 +0200
Subject: sched: properly define the sched_group::cpumask and
 sched_domain::span fields

Properly document the variable-size structure tricks we are doing
wrt. struct sched_group and sched_domain, and use the field[0] GCC
extension instead of defining a vla array.

Dont use unions for this, as pointed out by Linus.

[ Impact: cleanup, un-confuse Sparse and LLVM ]

Reported-by: Jeff Garzik <jeff@garzik.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.01.0905180850110.3301@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 25 ++++++++++++++++++++++---
 kernel/sched.c        |  5 +++--
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index de7b3b21777..dbb1043e865 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -839,7 +839,17 @@ struct sched_group {
 	 */
 	u32 reciprocal_cpu_power;
 
-	unsigned long cpumask[];
+	/*
+	 * The CPUs this group covers.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_group' in kernel/sched.c)
+	 */
+	unsigned long cpumask[0];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
@@ -925,8 +935,17 @@ struct sched_domain {
 	char *name;
 #endif
 
-	/* span of all CPUs in this domain */
-	unsigned long span[];
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 *
+	 * It is also be embedded into static data structures at build
+	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
+	 */
+	unsigned long span[0];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
diff --git a/kernel/sched.c b/kernel/sched.c
index 497c09ba61e..228acae8821 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7948,8 +7948,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
 /*
  * The cpus mask in sched_group and sched_domain hangs off the end.
- * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
- * for nr_cpu_ids < CONFIG_NR_CPUS.
+ *
+ * ( See the the comments in include/linux/sched.h:struct sched_group
+ *   and struct sched_domain. )
  */
 struct static_sched_group {
 	struct sched_group sg;
-- 
cgit v1.2.3-70-g09d2


From 79eb63e9e5875b84341a3a05f8e6ae9cdb4bb6f6 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 17 May 2009 18:57:15 +0300
Subject: block: Add blk_make_request(), takes bio, returns a request

New block API:
given a struct bio allocates a new request. This is the parallel of
generic_make_request for BLOCK_PC commands users.

The passed bio may be a chained-bio. The bio is bounced if needed
inside the call to this member.

This is in the effort of un-exporting blk_rq_append_bio().

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index e3f7e6a3a09..bec1d69952d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -890,6 +890,51 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_get_request);
 
+/**
+ * blk_make_request - given a bio, allocate a corresponding struct request.
+ *
+ * @bio:  The bio describing the memory mappings that will be submitted for IO.
+ *        It may be a chained-bio properly constructed by block/bio layer.
+ *
+ * blk_make_request is the parallel of generic_make_request for BLOCK_PC
+ * type commands. Where the struct request needs to be farther initialized by
+ * the caller. It is passed a &struct bio, which describes the memory info of
+ * the I/O transfer.
+ *
+ * The caller of blk_make_request must make sure that bi_io_vec
+ * are set to describe the memory buffers. That bio_data_dir() will return
+ * the needed direction of the request. (And all bio's in the passed bio-chain
+ * are properly set accordingly)
+ *
+ * If called under none-sleepable conditions, mapped bio buffers must not
+ * need bouncing, by calling the appropriate masked or flagged allocator,
+ * suitable for the target device. Otherwise the call to blk_queue_bounce will
+ * BUG.
+ */
+struct request *blk_make_request(struct request_queue *q, struct bio *bio,
+				 gfp_t gfp_mask)
+{
+	struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
+
+	if (unlikely(!rq))
+		return ERR_PTR(-ENOMEM);
+
+	for_each_bio(bio) {
+		struct bio *bounce_bio = bio;
+		int ret;
+
+		blk_queue_bounce(q, &bounce_bio);
+		ret = blk_rq_append_bio(q, rq, bounce_bio);
+		if (unlikely(ret)) {
+			blk_put_request(rq);
+			return ERR_PTR(ret);
+		}
+	}
+
+	return rq;
+}
+EXPORT_SYMBOL(blk_make_request);
+
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f9d60a78c08..88a83e112c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -740,6 +740,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_make_request(struct request_queue *, struct bio *,
+					gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
-- 
cgit v1.2.3-70-g09d2


From a411f4bbb89f1f08687b344064d6775bce1e4658 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 17 May 2009 19:00:01 +0300
Subject: block: Un-export blk_rq_append_bio

OSD was the last in-tree user of blk_rq_append_bio(). Now
that it is fixed blk_rq_append_bio is un-exported and
is only used internally by block layer.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-map.c        | 1 -
 block/blk.h            | 2 ++
 include/linux/blkdev.h | 6 ------
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/block/blk-map.c b/block/blk-map.c
index caa05a66774..ef2492adca7 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -24,7 +24,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(blk_rq_append_bio);
 
 static int __blk_rq_unmap_user(struct bio *bio)
 {
diff --git a/block/blk.h b/block/blk.h
index 9e0042ca949..c863ec2281e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,8 @@ extern struct kobj_type blk_queue_ktype;
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
+int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+		      struct bio *bio);
 void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88a83e112c9..564445be7a6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -757,12 +757,6 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
-/*
- * Temporary export, until SCSI gets fixed up.
- */
-extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-			     struct bio *bio);
-
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
-- 
cgit v1.2.3-70-g09d2


From c44d70a340554a33071339064a303ac0f1a31623 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 17 May 2009 11:24:08 +0200
Subject: perf_counter: fix counter inheritance race

Context rotation should not occur when we are in the middle of
walking the counter list when inheriting counters ...

[ Impact: fix occasionally incorrect perf stat results ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  1 +
 kernel/perf_counter.c        | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c8c1dfc22c9..13cb2fbbf33 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -508,6 +508,7 @@ struct perf_counter_context {
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
+	int			rr_allowed;
 	struct task_struct	*task;
 
 	/*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7af16d1c480..4d8f97375f3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1120,7 +1120,8 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
-	rotate_ctx(ctx);
+	if (ctx->rr_allowed)
+		rotate_ctx(ctx);
 
 	perf_counter_cpu_sched_in(cpuctx, cpu);
 	perf_counter_task_sched_in(curr, cpu);
@@ -3108,6 +3109,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
 	INIT_LIST_HEAD(&ctx->event_list);
+	ctx->rr_allowed = 1;
 	ctx->task = task;
 }
 
@@ -3348,6 +3350,9 @@ void perf_counter_init_task(struct task_struct *child)
 	 */
 	mutex_lock(&parent_ctx->mutex);
 
+	parent_ctx->rr_allowed = 0;
+	barrier(); /* irqs */
+
 	/*
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
@@ -3361,6 +3366,9 @@ void perf_counter_init_task(struct task_struct *child)
 			break;
 	}
 
+	barrier(); /* irqs */
+	parent_ctx->rr_allowed = 1;
+
 	mutex_unlock(&parent_ctx->mutex);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 645069299a1c7358cf7330afe293f07552f11a5d Mon Sep 17 00:00:00 2001
From: Sascha Hlusiak <contact@saschahlusiak.de>
Date: Tue, 19 May 2009 12:56:52 +0000
Subject: sit: stateless autoconf for isatap

be sent periodically. The rs_delay can be speficied when adding the
PRL entry and defaults to 15 minutes.

The RS is sent from every link local adress that's assigned to the
tunnel interface. It's directed to the (guessed) linklocal address
of the router and is sent through the tunnel.

Better: send to ff02::2 encapsuled in unicast directed to router-v4.

Signed-off-by: Sascha Hlusiak <contact@saschahlusiak.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  2 +-
 include/net/ipip.h        |  7 ++++++
 net/ipv6/ndisc.c          |  1 +
 net/ipv6/sit.c            | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 5a9aae4adb4..5eb9b0f857e 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -44,7 +44,7 @@ struct ip_tunnel_prl {
 	__u16			flags;
 	__u16			__reserved;
 	__u32			datalen;
-	__u32			__reserved2;
+	__u32			rs_delay;
 	/* data follows */
 };
 
diff --git a/include/net/ipip.h b/include/net/ipip.h
index fdf9bd74370..5d3036fa151 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -28,11 +28,18 @@ struct ip_tunnel
 	unsigned int			prl_count;	/* # of entries in PRL */
 };
 
+/* ISATAP: default interval between RS in secondy */
+#define IPTUNNEL_RS_DEFAULT_DELAY	(900)
+
 struct ip_tunnel_prl_entry
 {
 	struct ip_tunnel_prl_entry	*next;
 	__be32				addr;
 	u16				flags;
+	unsigned long			rs_delay;
+	struct timer_list		rs_timer;
+	struct ip_tunnel		*tunnel;
+	spinlock_t			lock;
 };
 
 #define IPTUNNEL_XMIT() do {						\
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ab65cc51b00..e09f12ee57c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -658,6 +658,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		     &icmp6h, NULL,
 		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
 }
+EXPORT_SYMBOL(ndisc_send_rs);
 
 
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3fd060076e7..b3a59bd40f0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -15,6 +15,7 @@
  * Roger Venning <r.venning@telstra.com>:	6to4 support
  * Nate Thompson <nate@thebog.net>:		6to4 support
  * Fred Templin <fred.l.templin@boeing.com>:	isatap support
+ * Sascha Hlusiak <mail@saschahlusiak.de>:	stateless autoconf for isatap
  */
 
 #include <linux/module.h>
@@ -222,6 +223,44 @@ failed:
 	return NULL;
 }
 
+static void ipip6_tunnel_rs_timer(unsigned long data)
+{
+	struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *) data;
+	struct inet6_dev *ifp;
+	struct inet6_ifaddr *addr;
+
+	spin_lock(&p->lock);
+	ifp = __in6_dev_get(p->tunnel->dev);
+
+	read_lock_bh(&ifp->lock);
+	for (addr = ifp->addr_list; addr; addr = addr->if_next) {
+		struct in6_addr rtr;
+
+		if (!(ipv6_addr_type(&addr->addr) & IPV6_ADDR_LINKLOCAL))
+			continue;
+
+		/* Send RS to guessed linklocal address of router
+		 *
+		 * Better: send to ff02::2 encapsuled in unicast directly
+		 * to router-v4 instead of guessing the v6 address.
+		 *
+		 * Cisco/Windows seem to not set the u/l bit correctly,
+		 * so we won't guess right.
+		 */
+		ipv6_addr_set(&rtr,  htonl(0xFE800000), 0, 0, 0);
+		if (!__ipv6_isatap_ifid(rtr.s6_addr + 8,
+					p->addr)) {
+			ndisc_send_rs(p->tunnel->dev, &addr->addr, &rtr);
+		}
+	}
+	read_unlock_bh(&ifp->lock);
+
+	mod_timer(&p->rs_timer, jiffies + HZ * p->rs_delay);
+	spin_unlock(&p->lock);
+
+	return;
+}
+
 static struct ip_tunnel_prl_entry *
 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 {
@@ -280,6 +319,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 			continue;
 		kp[c].addr = prl->addr;
 		kp[c].flags = prl->flags;
+		kp[c].rs_delay = prl->rs_delay;
 		c++;
 		if (kprl.addr != htonl(INADDR_ANY))
 			break;
@@ -329,11 +369,23 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	}
 
 	p->next = t->prl;
+	p->tunnel = t;
 	t->prl = p;
 	t->prl_count++;
+
+	spin_lock_init(&p->lock);
+	setup_timer(&p->rs_timer, ipip6_tunnel_rs_timer, (unsigned long) p);
 update:
 	p->addr = a->addr;
 	p->flags = a->flags;
+	p->rs_delay = a->rs_delay;
+	if (p->rs_delay == 0)
+		p->rs_delay = IPTUNNEL_RS_DEFAULT_DELAY;
+	spin_lock(&p->lock);
+	del_timer(&p->rs_timer);
+	if (p->flags & PRL_DEFAULT)
+		mod_timer(&p->rs_timer, jiffies + 1);
+	spin_unlock(&p->lock);
 out:
 	write_unlock(&ipip6_lock);
 	return err;
@@ -352,6 +404,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 			if ((*p)->addr == a->addr) {
 				x = *p;
 				*p = x->next;
+				spin_lock(&x->lock);
+				del_timer(&x->rs_timer);
+				spin_unlock(&x->lock);
 				kfree(x);
 				t->prl_count--;
 				goto out;
@@ -362,6 +417,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		while (t->prl) {
 			x = t->prl;
 			t->prl = t->prl->next;
+			spin_lock(&x->lock);
+			del_timer(&x->rs_timer);
+			spin_unlock(&x->lock);
 			kfree(x);
 			t->prl_count--;
 		}
-- 
cgit v1.2.3-70-g09d2


From 86579a4cccf18a2ddbf7de8fc5a0f5d9b94ed76d Mon Sep 17 00:00:00 2001
From: Michael Roth <mroth@nessie.de>
Date: Mon, 18 May 2009 16:04:44 -0700
Subject: Input: ads7846 - support swapping x and y axes

Signed-off-by: Michael Roth <mroth@nessie.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ads7846.c | 7 +++++++
 include/linux/spi/ads7846.h         | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 2b01e56568f..b5ad252f5cf 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -97,6 +97,8 @@ struct ads7846 {
 	u16			x_plate_ohms;
 	u16			pressure_max;
 
+	bool			swap_xy;
+
 	struct ads7846_packet	*packet;
 
 	struct spi_transfer	xfer[18];
@@ -599,6 +601,10 @@ static void ads7846_rx(void *ads)
 			dev_dbg(&ts->spi->dev, "DOWN\n");
 #endif
 		}
+
+		if (ts->swap_xy)
+			swap(x, y);
+
 		input_report_abs(input, ABS_X, x);
 		input_report_abs(input, ABS_Y, y);
 		input_report_abs(input, ABS_PRESSURE, Rt);
@@ -917,6 +923,7 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 	ts->spi = spi;
 	ts->input = input_dev;
 	ts->vref_mv = pdata->vref_mv;
+	ts->swap_xy = pdata->swap_xy;
 
 	hrtimer_init(&ts->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	ts->timer.function = ads7846_timer;
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 2ea20320c09..51948eb6927 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -17,6 +17,7 @@ struct ads7846_platform_data {
 	u16	vref_mv;		/* external vref value, milliVolts */
 	bool	keep_vref_on;		/* set to keep vref on for differential
 					 * measurements as well */
+	bool	swap_xy;		/* swap x and y axes */
 
 	/* Settling time of the analog signals; a function of Vcc and the
 	 * capacitance on the X/Y drivers.  If set to non-zero, two samples
-- 
cgit v1.2.3-70-g09d2


From 0a7ae2ff0d29bb3b327edff4c8ab67b3834fa811 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 20 May 2009 08:54:31 +0200
Subject: block: change the tag sync vs async restriction logic

Make them fully share the tag space, but disallow async requests using
the last any two slots.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    |  2 +-
 block/blk-core.c       |  2 +-
 block/blk-tag.c        | 15 +++++++++------
 block/elevator.c       |  8 ++++----
 include/linux/blkdev.h |  7 ++++++-
 5 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 0ab81a0a750..0d98054cdbd 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -218,7 +218,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	} else
 		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
+	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
 		rq = NULL;
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
diff --git a/block/blk-core.c b/block/blk-core.c
index 49065075d46..1c748403882 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1815,7 +1815,7 @@ void blk_dequeue_request(struct request *rq)
 	 * the driver side.
 	 */
 	if (blk_account_rq(rq))
-		q->in_flight++;
+		q->in_flight[rq_is_sync(rq)]++;
 }
 
 /**
diff --git a/block/blk-tag.c b/block/blk-tag.c
index c260f7c30dd..2e5cfeb5933 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
-	unsigned max_depth, offset;
+	unsigned max_depth;
 	int tag;
 
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -355,13 +355,16 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	 * to starve sync IO on behalf of flooding async IO.
 	 */
 	max_depth = bqt->max_depth;
-	if (rq_is_sync(rq))
-		offset = 0;
-	else
-		offset = max_depth >> 2;
+	if (!rq_is_sync(rq) && max_depth > 1) {
+		max_depth -= 2;
+		if (!max_depth)
+			max_depth = 1;
+		if (q->in_flight[0] > max_depth)
+			return 1;
+	}
 
 	do {
-		tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
+		tag = find_first_zero_bit(bqt->tag_map, max_depth);
 		if (tag >= max_depth)
 			return 1;
 
diff --git a/block/elevator.c b/block/elevator.c
index 918920056e4..ebee948293e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -546,7 +546,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 	 * in_flight count again
 	 */
 	if (blk_account_rq(rq)) {
-		q->in_flight--;
+		q->in_flight[rq_is_sync(rq)]--;
 		if (blk_sorted_rq(rq))
 			elv_deactivate_rq(q, rq);
 	}
@@ -685,7 +685,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 
 	if (unplug_it && blk_queue_plugged(q)) {
 		int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
-			- q->in_flight;
+				- queue_in_flight(q);
 
 		if (nrq >= q->unplug_thresh)
 			__generic_unplug_device(q);
@@ -823,7 +823,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	 * request is released from the driver, io must be done
 	 */
 	if (blk_account_rq(rq)) {
-		q->in_flight--;
+		q->in_flight[rq_is_sync(rq)]--;
 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
@@ -838,7 +838,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 		if (!list_empty(&q->queue_head))
 			next = list_entry_rq(q->queue_head.next);
 
-		if (!q->in_flight &&
+		if (!queue_in_flight(q) &&
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
 		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 564445be7a6..a967dd775db 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -404,7 +404,7 @@ struct request_queue
 	struct list_head	tag_busy_list;
 
 	unsigned int		nr_sorted;
-	unsigned int		in_flight;
+	unsigned int		in_flight[2];
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
@@ -511,6 +511,11 @@ static inline void queue_flag_clear_unlocked(unsigned int flag,
 	__clear_bit(flag, &q->queue_flags);
 }
 
+static inline int queue_in_flight(struct request_queue *q)
+{
+	return q->in_flight[0] + q->in_flight[1];
+}
+
 static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	WARN_ON_ONCE(!queue_is_locked(q));
-- 
cgit v1.2.3-70-g09d2


From d7b629a34fc4134a43c730b5f0197855dc4948d0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 May 2009 12:21:19 +0200
Subject: perf_counter: Solve the rotate_ctx vs inherit race differently

Instead of disabling RR scheduling of the counters, use a different list
that does not get rotated to iterate the counters on inheritance.

[ Impact: cleanup, optimization ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090520102553.237504544@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  1 -
 kernel/perf_counter.c        | 15 +++++----------
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 13cb2fbbf33..c8c1dfc22c9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -508,7 +508,6 @@ struct perf_counter_context {
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
-	int			rr_allowed;
 	struct task_struct	*task;
 
 	/*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4d8f97375f3..64113e6d194 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1120,8 +1120,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
-	if (ctx->rr_allowed)
-		rotate_ctx(ctx);
+	rotate_ctx(ctx);
 
 	perf_counter_cpu_sched_in(cpuctx, cpu);
 	perf_counter_task_sched_in(curr, cpu);
@@ -3109,7 +3108,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	mutex_init(&ctx->mutex);
 	INIT_LIST_HEAD(&ctx->counter_list);
 	INIT_LIST_HEAD(&ctx->event_list);
-	ctx->rr_allowed = 1;
 	ctx->task = task;
 }
 
@@ -3350,14 +3348,14 @@ void perf_counter_init_task(struct task_struct *child)
 	 */
 	mutex_lock(&parent_ctx->mutex);
 
-	parent_ctx->rr_allowed = 0;
-	barrier(); /* irqs */
-
 	/*
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
 	 */
-	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
+	list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
+		if (counter != counter->group_leader)
+			continue;
+
 		if (!counter->hw_event.inherit)
 			continue;
 
@@ -3366,9 +3364,6 @@ void perf_counter_init_task(struct task_struct *child)
 			break;
 	}
 
-	barrier(); /* irqs */
-	parent_ctx->rr_allowed = 1;
-
 	mutex_unlock(&parent_ctx->mutex);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 26b119bc811a73bac6ecf95bdf284bf31c7955f0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 20 May 2009 12:21:20 +0200
Subject: perf_counter: Log irq_period changes

For the dynamic irq_period code, log whenever we change the period so that
analyzing code can normalize the event flow.

[ Impact: add new feature to allow more precise profiling ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090520102553.298769743@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  8 ++++++++
 kernel/perf_counter.c        | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c8c1dfc22c9..f612941ef46 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -257,6 +257,14 @@ enum perf_event_type {
 	 */
 	PERF_EVENT_COMM			= 3,
 
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				irq_period;
+	 * };
+	 */
+	PERF_EVENT_PERIOD		= 4,
+
 	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 64113e6d194..db02eb16c77 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,7 +1046,9 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
-void perf_adjust_freq(struct perf_counter_context *ctx)
+static void perf_log_period(struct perf_counter *counter, u64 period);
+
+static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
 	u64 irq_period;
@@ -1072,6 +1074,8 @@ void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (!irq_period)
 			irq_period = 1;
 
+		perf_log_period(counter, irq_period);
+
 		counter->hw.irq_period = irq_period;
 		counter->hw.interrupts = 0;
 	}
@@ -2406,6 +2410,40 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 	perf_counter_mmap_event(&mmap_event);
 }
 
+/*
+ *
+ */
+
+static void perf_log_period(struct perf_counter *counter, u64 period)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+		u64				period;
+	} freq_event = {
+		.header = {
+			.type = PERF_EVENT_PERIOD,
+			.misc = 0,
+			.size = sizeof(freq_event),
+		},
+		.time = sched_clock(),
+		.period = period,
+	};
+
+	if (counter->hw.irq_period == period)
+		return;
+
+	ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, freq_event);
+	perf_output_end(&handle);
+}
+
 /*
  * Generic counter overflow handling.
  */
-- 
cgit v1.2.3-70-g09d2


From 89f536ccfa8b370ff4d054f4061858ca9322c25a Mon Sep 17 00:00:00 2001
From: Stephane Chatty <chatty@enac.fr>
Date: Wed, 20 May 2009 15:41:24 +0200
Subject: HID: add new multitouch and digitizer contants

Added constants to hid.h for all digitizer usages (including the new multitouch
ones that are not yet in the official USB spec but are being pushed by Microsft
as described in their paper "Digitizer Drivers for Windows Touch and Pen-Based
Computers"). Updated hid-debug.c to support the new MT input constants such as
ABS_MT_POSITION_X.

Signed-off-by: Stephane Chatty <chatty@enac.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 23 +++++++++++++++++++++--
 include/linux/hid.h     | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 47ac1a7d66e..04359ed64b8 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -137,6 +137,14 @@ static const struct hid_usage_entry hid_usage_table[] = {
     {0, 0x44, "BarrelSwitch"},
     {0, 0x45, "Eraser"},
     {0, 0x46, "TabletPick"},
+    {0, 0x47, "Confidence"},
+    {0, 0x48, "Width"},
+    {0, 0x49, "Height"},
+    {0, 0x51, "ContactID"},
+    {0, 0x52, "InputMode"},
+    {0, 0x53, "DeviceIndex"},
+    {0, 0x54, "ContactCount"},
+    {0, 0x55, "ContactMaximumNumber"},
   { 15, 0, "PhysicalInterfaceDevice" },
     {0, 0x00, "Undefined"},
     {0, 0x01, "Physical_Interface_Device"},
@@ -514,9 +522,11 @@ static const char *events[EV_MAX + 1] = {
 	[EV_FF_STATUS] = "ForceFeedbackStatus",
 };
 
-static const char *syncs[2] = {
+static const char *syncs[3] = {
 	[SYN_REPORT] = "Report",		[SYN_CONFIG] = "Config",
+	[SYN_MT_REPORT] = "MT Report",
 };
+
 static const char *keys[KEY_MAX + 1] = {
 	[KEY_RESERVED] = "Reserved",		[KEY_ESC] = "Esc",
 	[KEY_1] = "1",				[KEY_2] = "2",
@@ -734,8 +744,17 @@ static const char *absolutes[ABS_MAX + 1] = {
 	[ABS_HAT2Y] = "Hat2Y",		[ABS_HAT3X] = "Hat3X",
 	[ABS_HAT3Y] = "Hat 3Y",		[ABS_PRESSURE] = "Pressure",
 	[ABS_DISTANCE] = "Distance",	[ABS_TILT_X] = "XTilt",
-	[ABS_TILT_Y] = "YTilt",		[ABS_TOOL_WIDTH] = "Tool Width",
+	[ABS_TILT_Y] = "YTilt",		[ABS_TOOL_WIDTH] = "ToolWidth",
 	[ABS_VOLUME] = "Volume",	[ABS_MISC] = "Misc",
+	[ABS_MT_TOUCH_MAJOR] = "MTMajor",
+	[ABS_MT_TOUCH_MINOR] = "MTMinor",
+	[ABS_MT_WIDTH_MAJOR] = "MTMajorW",
+	[ABS_MT_WIDTH_MINOR] = "MTMinorW",
+	[ABS_MT_ORIENTATION] = "MTOrientation",
+	[ABS_MT_POSITION_X] = "MTPositionX",
+	[ABS_MT_POSITION_Y] = "MTPositionY",
+	[ABS_MT_TOOL_TYPE] = "MTToolType",
+	[ABS_MT_BLOB_ID] = "MTBlobID",
 };
 
 static const char *misc[MSC_MAX + 1] = {
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a72876e4358..53489fd4d70 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -238,6 +238,42 @@ struct hid_item {
 #define HID_GD_RIGHT		0x00010092
 #define HID_GD_LEFT		0x00010093
 
+#define HID_DG_DIGITIZER	0x000d0001
+#define HID_DG_PEN		0x000d0002
+#define HID_DG_LIGHTPEN		0x000d0003
+#define HID_DG_TOUCHSCREEN	0x000d0004
+#define HID_DG_TOUCHPAD		0x000d0005
+#define HID_DG_STYLUS		0x000d0020
+#define HID_DG_PUCK		0x000d0021
+#define HID_DG_FINGER		0x000d0022
+#define HID_DG_TIPPRESSURE	0x000d0030
+#define HID_DG_BARRELPRESSURE	0x000d0031
+#define HID_DG_INRANGE		0x000d0032
+#define HID_DG_TOUCH		0x000d0033
+#define HID_DG_UNTOUCH		0x000d0034
+#define HID_DG_TAP		0x000d0035
+#define HID_DG_TABLETFUNCTIONKEY	0x000d0039
+#define HID_DG_PROGRAMCHANGEKEY	0x000d003a
+#define HID_DG_INVERT		0x000d003c
+#define HID_DG_TIPSWITCH	0x000d0042
+#define HID_DG_TIPSWITCH2	0x000d0043
+#define HID_DG_BARRELSWITCH	0x000d0044
+#define HID_DG_ERASER		0x000d0045
+#define HID_DG_TABLETPICK	0x000d0046
+/*
+ * as of May 20, 2009 the usages below are not yet in the official USB spec
+ * but are being pushed by Microsft as described in their paper "Digitizer
+ * Drivers for Windows Touch and Pen-Based Computers"
+ */
+#define HID_DG_CONFIDENCE	0x000d0047
+#define HID_DG_WIDTH		0x000d0048
+#define HID_DG_HEIGHT		0x000d0049
+#define HID_DG_CONTACTID	0x000d0051
+#define HID_DG_INPUTMODE	0x000d0052
+#define HID_DG_DEVICEINDEX	0x000d0053
+#define HID_DG_CONTACTCOUNT	0x000d0054
+#define HID_DG_CONTACTMAX	0x000d0055
+
 /*
  * HID report types --- Ouch! HID spec says 1 2 3!
  */
-- 
cgit v1.2.3-70-g09d2


From 038659e7c6b385065cb223872771ac437ef70b62 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Sat, 2 May 2009 00:37:17 -0400
Subject: cfg80211: Process regulatory max bandwidth checks for HT40

We are not correctly listening to the regulatory max bandwidth
settings. To actually make use of it we need to redesign things
a bit. This patch does the work for that. We do this to so we
can obey to regulatory rules accordingly for use of HT40.

We end up dealing with HT40 by having two passes for each channel.

The first check will see if a 20 MHz channel fits into the channel's
center freq on a given frequency range. We check for a 20 MHz
banwidth channel as that is the maximum an individual channel
will use, at least for now. The first pass will go ahead and
check if the regulatory rule for that given center of frequency
allows 40 MHz bandwidths and we use this to determine whether
or not the channel supports HT40 or not. So to support HT40 you'll
need at a regulatory rule that allows you to use 40 MHz channels
but you're channel must also be enabled and support 20 MHz by itself.

The second pass is done after we do the regulatory checks over
an device's supported channel list. On each channel we'll check
if the control channel and the extension both:

 o exist
 o are enabled
 o regulatory allows 40 MHz bandwidth on its frequency range

This work allows allows us to idependently check for HT40- and
HT40+.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/regd.c |  10 +-
 include/net/cfg80211.h          |  14 ++-
 net/wireless/reg.c              | 201 +++++++++++++++++++++++++++++++---------
 3 files changed, 175 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c
index fdf07c82208..7a89f9fac7d 100644
--- a/drivers/net/wireless/ath/regd.c
+++ b/drivers/net/wireless/ath/regd.c
@@ -200,8 +200,10 @@ ath_reg_apply_beaconing_flags(struct wiphy *wiphy,
 				continue;
 
 			if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) {
-				r = freq_reg_info(wiphy, ch->center_freq,
-					&bandwidth, &reg_rule);
+				r = freq_reg_info(wiphy,
+						  ch->center_freq,
+						  bandwidth,
+						  &reg_rule);
 				if (r)
 					continue;
 				/*
@@ -265,7 +267,7 @@ ath_reg_apply_active_scan_flags(struct wiphy *wiphy,
 	 */
 
 	ch = &sband->channels[11]; /* CH 12 */
-	r = freq_reg_info(wiphy, ch->center_freq, &bandwidth, &reg_rule);
+	r = freq_reg_info(wiphy, ch->center_freq, bandwidth, &reg_rule);
 	if (!r) {
 		if (!(reg_rule->flags & NL80211_RRF_PASSIVE_SCAN))
 			if (ch->flags & IEEE80211_CHAN_PASSIVE_SCAN)
@@ -273,7 +275,7 @@ ath_reg_apply_active_scan_flags(struct wiphy *wiphy,
 	}
 
 	ch = &sband->channels[12]; /* CH 13 */
-	r = freq_reg_info(wiphy, ch->center_freq, &bandwidth, &reg_rule);
+	r = freq_reg_info(wiphy, ch->center_freq, bandwidth, &reg_rule);
 	if (!r) {
 		if (!(reg_rule->flags & NL80211_RRF_PASSIVE_SCAN))
 			if (ch->flags & IEEE80211_CHAN_PASSIVE_SCAN)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9e17a83d343..282d58d52fc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -70,6 +70,9 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_FAT_BELOW	= 1<<5,
 };
 
+#define IEEE80211_CHAN_NO_HT40 \
+	(IEEE80211_CHAN_NO_FAT_ABOVE | IEEE80211_CHAN_NO_FAT_BELOW)
+
 /**
  * struct ieee80211_channel - channel definition
  *
@@ -1303,9 +1306,10 @@ extern void wiphy_apply_custom_regulatory(
  * freq_reg_info - get regulatory information for the given frequency
  * @wiphy: the wiphy for which we want to process this rule for
  * @center_freq: Frequency in KHz for which we want regulatory information for
- * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one
- * 	you can set this to 0. If this frequency is allowed we then set
- * 	this value to the maximum allowed bandwidth.
+ * @desired_bw_khz: the desired max bandwidth you want to use per
+ *	channel. Note that this is still 20 MHz if you want to use HT40
+ *	as HT40 makes use of two channels for its 40 MHz width bandwidth.
+ *	If set to 0 we'll assume you want the standard 20 MHz.
  * @reg_rule: the regulatory rule which we have for this frequency
  *
  * Use this function to get the regulatory rule for a specific frequency on
@@ -1320,7 +1324,9 @@ extern void wiphy_apply_custom_regulatory(
  * freq_in_rule_band() for our current definition of a band -- this is purely
  * subjective and right now its 802.11 specific.
  */
-extern int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
+extern int freq_reg_info(struct wiphy *wiphy,
+			 u32 center_freq,
+			 u32 desired_bw_khz,
 			 const struct ieee80211_reg_rule **reg_rule);
 
 /*
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 537af62ec42..d897528ee7f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -48,12 +48,6 @@ static struct regulatory_request *last_request;
 /* To trigger userspace events */
 static struct platform_device *reg_pdev;
 
-/* Keep the ordering from large to small */
-static u32 supported_bandwidths[] = {
-	MHZ_TO_KHZ(40),
-	MHZ_TO_KHZ(20),
-};
-
 /*
  * Central wireless core regulatory domains, we only need two,
  * the current one and a world regulatory domain in case we have no
@@ -435,19 +429,20 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd)
 	return true;
 }
 
-/* Returns value in KHz */
-static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range,
-	u32 freq)
+static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range,
+			    u32 center_freq_khz,
+			    u32 bw_khz)
 {
-	unsigned int i;
-	for (i = 0; i < ARRAY_SIZE(supported_bandwidths); i++) {
-		u32 start_freq_khz = freq - supported_bandwidths[i]/2;
-		u32 end_freq_khz = freq + supported_bandwidths[i]/2;
-		if (start_freq_khz >= freq_range->start_freq_khz &&
-			end_freq_khz <= freq_range->end_freq_khz)
-			return supported_bandwidths[i];
-	}
-	return 0;
+	u32 start_freq_khz, end_freq_khz;
+
+	start_freq_khz = center_freq_khz - (bw_khz/2);
+	end_freq_khz = center_freq_khz + (bw_khz/2);
+
+	if (start_freq_khz >= freq_range->start_freq_khz &&
+	    end_freq_khz <= freq_range->end_freq_khz)
+		return true;
+
+	return false;
 }
 
 /**
@@ -847,14 +842,17 @@ static u32 map_regdom_flags(u32 rd_flags)
 
 static int freq_reg_info_regd(struct wiphy *wiphy,
 			      u32 center_freq,
-			      u32 *bandwidth,
+			      u32 desired_bw_khz,
 			      const struct ieee80211_reg_rule **reg_rule,
 			      const struct ieee80211_regdomain *custom_regd)
 {
 	int i;
 	bool band_rule_found = false;
 	const struct ieee80211_regdomain *regd;
-	u32 max_bandwidth = 0;
+	bool bw_fits = false;
+
+	if (!desired_bw_khz)
+		desired_bw_khz = MHZ_TO_KHZ(20);
 
 	regd = custom_regd ? custom_regd : cfg80211_regdomain;
 
@@ -887,38 +885,54 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
 		if (!band_rule_found)
 			band_rule_found = freq_in_rule_band(fr, center_freq);
 
-		max_bandwidth = freq_max_bandwidth(fr, center_freq);
+		bw_fits = reg_does_bw_fit(fr,
+					  center_freq,
+					  desired_bw_khz);
 
-		if (max_bandwidth && *bandwidth <= max_bandwidth) {
+		if (band_rule_found && bw_fits) {
 			*reg_rule = rr;
-			*bandwidth = max_bandwidth;
-			break;
+			return 0;
 		}
 	}
 
 	if (!band_rule_found)
 		return -ERANGE;
 
-	return !max_bandwidth;
+	return -EINVAL;
 }
 EXPORT_SYMBOL(freq_reg_info);
 
-int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
-			 const struct ieee80211_reg_rule **reg_rule)
+int freq_reg_info(struct wiphy *wiphy,
+		  u32 center_freq,
+		  u32 desired_bw_khz,
+		  const struct ieee80211_reg_rule **reg_rule)
 {
 	assert_cfg80211_lock();
-	return freq_reg_info_regd(wiphy, center_freq,
-		bandwidth, reg_rule, NULL);
+	return freq_reg_info_regd(wiphy,
+				  center_freq,
+				  desired_bw_khz,
+				  reg_rule,
+				  NULL);
 }
 
+/*
+ * Note that right now we assume the desired channel bandwidth
+ * is always 20 MHz for each individual channel (HT40 uses 20 MHz
+ * per channel, the primary and the extension channel). To support
+ * smaller custom bandwidths such as 5 MHz or 10 MHz we'll need a
+ * new ieee80211_channel.target_bw and re run the regulatory check
+ * on the wiphy with the target_bw specified. Then we can simply use
+ * that below for the desired_bw_khz below.
+ */
 static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 			   unsigned int chan_idx)
 {
 	int r;
-	u32 flags;
-	u32 max_bandwidth = 0;
+	u32 flags, bw_flags = 0;
+	u32 desired_bw_khz = MHZ_TO_KHZ(20);
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
+	const struct ieee80211_freq_range *freq_range = NULL;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *chan;
 	struct wiphy *request_wiphy = NULL;
@@ -933,8 +947,10 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 
 	flags = chan->orig_flags;
 
-	r = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq),
-		&max_bandwidth, &reg_rule);
+	r = freq_reg_info(wiphy,
+			  MHZ_TO_KHZ(chan->center_freq),
+			  desired_bw_khz,
+			  &reg_rule);
 
 	if (r) {
 		/*
@@ -977,6 +993,10 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 	}
 
 	power_rule = &reg_rule->power_rule;
+	freq_range = &reg_rule->freq_range;
+
+	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
+		bw_flags = IEEE80211_CHAN_NO_HT40;
 
 	if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 	    request_wiphy && request_wiphy == wiphy &&
@@ -987,19 +1007,19 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 		 * settings
 		 */
 		chan->flags = chan->orig_flags =
-			map_regdom_flags(reg_rule->flags);
+			map_regdom_flags(reg_rule->flags) | bw_flags;
 		chan->max_antenna_gain = chan->orig_mag =
 			(int) MBI_TO_DBI(power_rule->max_antenna_gain);
-		chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
+		chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz);
 		chan->max_power = chan->orig_mpwr =
 			(int) MBM_TO_DBM(power_rule->max_eirp);
 		return;
 	}
 
-	chan->flags = flags | map_regdom_flags(reg_rule->flags);
+	chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags);
 	chan->max_antenna_gain = min(chan->orig_mag,
 		(int) MBI_TO_DBI(power_rule->max_antenna_gain));
-	chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
+	chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz);
 	if (chan->orig_mpwr)
 		chan->max_power = min(chan->orig_mpwr,
 			(int) MBM_TO_DBM(power_rule->max_eirp));
@@ -1156,6 +1176,93 @@ static void reg_process_beacons(struct wiphy *wiphy)
 	wiphy_update_beacon_reg(wiphy);
 }
 
+static bool is_ht40_not_allowed(struct ieee80211_channel *chan)
+{
+	if (!chan)
+		return true;
+	if (chan->flags & IEEE80211_CHAN_DISABLED)
+		return true;
+	/* This would happen when regulatory rules disallow HT40 completely */
+	if (IEEE80211_CHAN_NO_HT40 == (chan->flags & (IEEE80211_CHAN_NO_HT40)))
+		return true;
+	return false;
+}
+
+static void reg_process_ht_flags_channel(struct wiphy *wiphy,
+					 enum ieee80211_band band,
+					 unsigned int chan_idx)
+{
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_channel *channel;
+	struct ieee80211_channel *channel_before = NULL, *channel_after = NULL;
+	unsigned int i;
+
+	assert_cfg80211_lock();
+
+	sband = wiphy->bands[band];
+	BUG_ON(chan_idx >= sband->n_channels);
+	channel = &sband->channels[chan_idx];
+
+	if (is_ht40_not_allowed(channel)) {
+		channel->flags |= IEEE80211_CHAN_NO_HT40;
+		return;
+	}
+
+	/*
+	 * We need to ensure the extension channels exist to
+	 * be able to use HT40- or HT40+, this finds them (or not)
+	 */
+	for (i = 0; i < sband->n_channels; i++) {
+		struct ieee80211_channel *c = &sband->channels[i];
+		if (c->center_freq == (channel->center_freq - 20))
+			channel_before = c;
+		if (c->center_freq == (channel->center_freq + 20))
+			channel_after = c;
+	}
+
+	/*
+	 * Please note that this assumes target bandwidth is 20 MHz,
+	 * if that ever changes we also need to change the below logic
+	 * to include that as well.
+	 */
+	if (is_ht40_not_allowed(channel_before))
+		channel->flags |= IEEE80211_CHAN_NO_FAT_BELOW;
+	else
+		channel->flags &= ~IEEE80211_CHAN_NO_FAT_BELOW;
+
+	if (is_ht40_not_allowed(channel_after))
+		channel->flags |= IEEE80211_CHAN_NO_FAT_ABOVE;
+	else
+		channel->flags &= ~IEEE80211_CHAN_NO_FAT_ABOVE;
+}
+
+static void reg_process_ht_flags_band(struct wiphy *wiphy,
+				      enum ieee80211_band band)
+{
+	unsigned int i;
+	struct ieee80211_supported_band *sband;
+
+	BUG_ON(!wiphy->bands[band]);
+	sband = wiphy->bands[band];
+
+	for (i = 0; i < sband->n_channels; i++)
+		reg_process_ht_flags_channel(wiphy, band, i);
+}
+
+static void reg_process_ht_flags(struct wiphy *wiphy)
+{
+	enum ieee80211_band band;
+
+	if (!wiphy)
+		return;
+
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+		if (wiphy->bands[band])
+			reg_process_ht_flags_band(wiphy, band);
+	}
+
+}
+
 void wiphy_update_regulatory(struct wiphy *wiphy,
 			     enum nl80211_reg_initiator initiator)
 {
@@ -1169,6 +1276,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy,
 	}
 out:
 	reg_process_beacons(wiphy);
+	reg_process_ht_flags(wiphy);
 	if (wiphy->reg_notifier)
 		wiphy->reg_notifier(wiphy, last_request);
 }
@@ -1179,9 +1287,11 @@ static void handle_channel_custom(struct wiphy *wiphy,
 				  const struct ieee80211_regdomain *regd)
 {
 	int r;
-	u32 max_bandwidth = 0;
+	u32 desired_bw_khz = MHZ_TO_KHZ(20);
+	u32 bw_flags = 0;
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
+	const struct ieee80211_freq_range *freq_range = NULL;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *chan;
 
@@ -1191,8 +1301,11 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	BUG_ON(chan_idx >= sband->n_channels);
 	chan = &sband->channels[chan_idx];
 
-	r = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
-		&max_bandwidth, &reg_rule, regd);
+	r = freq_reg_info_regd(wiphy,
+			       MHZ_TO_KHZ(chan->center_freq),
+			       desired_bw_khz,
+			       &reg_rule,
+			       regd);
 
 	if (r) {
 		chan->flags = IEEE80211_CHAN_DISABLED;
@@ -1200,10 +1313,14 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	}
 
 	power_rule = &reg_rule->power_rule;
+	freq_range = &reg_rule->freq_range;
+
+	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
+		bw_flags = IEEE80211_CHAN_NO_HT40;
 
-	chan->flags |= map_regdom_flags(reg_rule->flags);
+	chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
 	chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain);
-	chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
+	chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz);
 	chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 689da1b3b8b37ff41e79f3fb973c06cdfeef12e5 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Sat, 2 May 2009 00:37:18 -0400
Subject: wireless: rename IEEE80211_CHAN_NO_FAT_* to HT40-/+

This is more consistent with our nl80211 naming convention
for HT40-/+.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c   |  4 ++--
 drivers/net/wireless/iwlwifi/iwl-eeprom.c |  8 ++++----
 include/net/cfg80211.h                    | 10 +++++-----
 net/mac80211/mlme.c                       |  4 ++--
 net/wireless/reg.c                        |  8 ++++----
 5 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 5393fb3f452..6c1b171d92b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -603,10 +603,10 @@ static u8 iwl_is_channel_extension(struct iwl_priv *priv,
 
 	if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE)
 		return !(ch_info->fat_extension_channel &
-					IEEE80211_CHAN_NO_FAT_ABOVE);
+					IEEE80211_CHAN_NO_HT40PLUS);
 	else if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW)
 		return !(ch_info->fat_extension_channel &
-					IEEE80211_CHAN_NO_FAT_BELOW);
+					IEEE80211_CHAN_NO_HT40MINUS);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-eeprom.c
index 401438aec19..b400bd510fc 100644
--- a/drivers/net/wireless/iwlwifi/iwl-eeprom.c
+++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.c
@@ -481,8 +481,8 @@ int iwl_init_channel_map(struct iwl_priv *priv)
 			/* First write that fat is not enabled, and then enable
 			 * one by one */
 			ch_info->fat_extension_channel =
-				(IEEE80211_CHAN_NO_FAT_ABOVE |
-				 IEEE80211_CHAN_NO_FAT_BELOW);
+				(IEEE80211_CHAN_NO_HT40PLUS |
+				 IEEE80211_CHAN_NO_HT40MINUS);
 
 			if (!(is_channel_valid(ch_info))) {
 				IWL_DEBUG_INFO(priv, "Ch. %d Flags %x [%sGHz] - "
@@ -561,7 +561,7 @@ int iwl_init_channel_map(struct iwl_priv *priv)
 				fat_extension_chan = 0;
 			else
 				fat_extension_chan =
-					IEEE80211_CHAN_NO_FAT_BELOW;
+					IEEE80211_CHAN_NO_HT40MINUS;
 
 			/* Set up driver's info for lower half */
 			iwl_set_fat_chan_info(priv, ieeeband,
@@ -573,7 +573,7 @@ int iwl_init_channel_map(struct iwl_priv *priv)
 			iwl_set_fat_chan_info(priv, ieeeband,
 						(eeprom_ch_index[ch] + 4),
 						&(eeprom_ch_info[ch]),
-						IEEE80211_CHAN_NO_FAT_ABOVE);
+						IEEE80211_CHAN_NO_HT40PLUS);
 		}
 	}
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 282d58d52fc..7391ad10c8c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -56,9 +56,9 @@ enum ieee80211_band {
  *	on this channel.
  * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel.
  * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
- * @IEEE80211_CHAN_NO_FAT_ABOVE: extension channel above this channel
+ * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel
  * 	is not permitted.
- * @IEEE80211_CHAN_NO_FAT_BELOW: extension channel below this channel
+ * @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel
  * 	is not permitted.
  */
 enum ieee80211_channel_flags {
@@ -66,12 +66,12 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_PASSIVE_SCAN	= 1<<1,
 	IEEE80211_CHAN_NO_IBSS		= 1<<2,
 	IEEE80211_CHAN_RADAR		= 1<<3,
-	IEEE80211_CHAN_NO_FAT_ABOVE	= 1<<4,
-	IEEE80211_CHAN_NO_FAT_BELOW	= 1<<5,
+	IEEE80211_CHAN_NO_HT40PLUS	= 1<<4,
+	IEEE80211_CHAN_NO_HT40MINUS	= 1<<5,
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
-	(IEEE80211_CHAN_NO_FAT_ABOVE | IEEE80211_CHAN_NO_FAT_BELOW)
+	(IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS)
 
 /**
  * struct ieee80211_channel - channel definition
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ae030688771..da582b643b9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -349,13 +349,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 
 		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
 		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-			if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) {
+			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
 				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
 				cap &= ~IEEE80211_HT_CAP_SGI_40;
 			}
 			break;
 		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-			if (flags & IEEE80211_CHAN_NO_FAT_BELOW) {
+			if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
 				cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
 				cap &= ~IEEE80211_HT_CAP_SGI_40;
 			}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d897528ee7f..48db569d4c6 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1226,14 +1226,14 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 	 * to include that as well.
 	 */
 	if (is_ht40_not_allowed(channel_before))
-		channel->flags |= IEEE80211_CHAN_NO_FAT_BELOW;
+		channel->flags |= IEEE80211_CHAN_NO_HT40MINUS;
 	else
-		channel->flags &= ~IEEE80211_CHAN_NO_FAT_BELOW;
+		channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
 
 	if (is_ht40_not_allowed(channel_after))
-		channel->flags |= IEEE80211_CHAN_NO_FAT_ABOVE;
+		channel->flags |= IEEE80211_CHAN_NO_HT40PLUS;
 	else
-		channel->flags &= ~IEEE80211_CHAN_NO_FAT_ABOVE;
+		channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
 }
 
 static void reg_process_ht_flags_band(struct wiphy *wiphy,
-- 
cgit v1.2.3-70-g09d2


From 3dcf670baf3d3a9bfc752e59d0b1a8d886230750 Mon Sep 17 00:00:00 2001
From: David Kilroy <kilroyd@googlemail.com>
Date: Sat, 16 May 2009 23:13:46 +0100
Subject: cfg80211: mark ops as pointer to const

This allows drivers to mark their cfg80211_ops tables const.

Signed-off-by: David Kilroy <kilroyd@googlemail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 net/wireless/core.c    | 2 +-
 net/wireless/core.h    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7391ad10c8c..09a823520e5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1073,7 +1073,7 @@ static inline const char *wiphy_name(struct wiphy *wiphy)
  * The returned pointer must be assigned to each netdev's
  * ieee80211_ptr for proper operation.
  */
-struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv);
+struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv);
 
 /**
  * wiphy_register - register a wiphy with cfg80211
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b6ef5337c85..a5dbea1da47 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -229,7 +229,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 
 /* exported functions */
 
-struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
+struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 {
 	static int wiphy_counter;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 88f234c8f6b..9511c2be83d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -16,7 +16,7 @@
 #include "reg.h"
 
 struct cfg80211_registered_device {
-	struct cfg80211_ops *ops;
+	const struct cfg80211_ops *ops;
 	struct list_head list;
 	/* we hold this mutex during any call so that
 	 * we cannot do multiple calls at once, and also
-- 
cgit v1.2.3-70-g09d2


From cf5aa2f1f39d367d449df4c10de3d8e6f9c4443b Mon Sep 17 00:00:00 2001
From: David Kilroy <kilroyd@googlemail.com>
Date: Sat, 16 May 2009 23:13:47 +0100
Subject: cfg80211: mark wiphy->privid as pointer to const

This allows drivers to use a const pointer as the privid without a cast.

Signed-off-by: David Kilroy <kilroyd@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 09a823520e5..81a3bfa4547 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -997,7 +997,7 @@ struct wiphy {
 	 * know whether it points to a wiphy your driver has registered
 	 * or not. Assign this to something global to your driver to
 	 * help determine whether you own this wiphy or not. */
-	void *privid;
+	const void *privid;
 
 	struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS];
 
-- 
cgit v1.2.3-70-g09d2


From e3da574a0ddd3e90a1e2b788b84b94bc17a75172 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 18 May 2009 19:56:36 +0200
Subject: cfg80211: allow wext to remove keys that don't exist

Some applications using wireless extensions expect to be able to
remove a key that doesn't exist. One example is wpa_supplicant
which doesn't actually change behaviour when running into an
error while trying to do that, but it prints an error message
which users interpret as wpa_supplicant having problems.

The safe thing to do is not change the behaviour of wireless
extensions any more, so when the driver reports -ENOENT let
the wext bridge code return success to userspace. To guarantee
this, also document that drivers should return -ENOENT when the
key doesn't exist.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 5 +++--
 net/wireless/wext-compat.c | 7 +++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 81a3bfa4547..389f1d20adf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -781,10 +781,11 @@ enum wiphy_params_flags {
  * @get_key: get information about the key with the given parameters.
  *	@mac_addr will be %NULL when requesting information for a group
  *	key. All pointers given to the @callback function need not be valid
- *	after it returns.
+ *	after it returns. This function should return an error if it is
+ *	not possible to retrieve the key, -ENOENT if it doesn't exist.
  *
  * @del_key: remove a key given the @mac_addr (%NULL for a group key)
- *	and @key_index
+ *	and @key_index, return -ENOENT if the key doesn't exist.
  *
  * @set_default_key: set the default key on an interface
  *
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index f98090b90fb..711e00a0c9b 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -504,6 +504,13 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 			else if (idx == wdev->wext.default_mgmt_key)
 				wdev->wext.default_mgmt_key = -1;
 		}
+		/*
+		 * Applications using wireless extensions expect to be
+		 * able to delete keys that don't exist, so allow that.
+		 */
+		if (err == -ENOENT)
+			return 0;
+
 		return err;
 	} else {
 		if (addr)
-- 
cgit v1.2.3-70-g09d2


From cce4c77b87ce7e71a0f244a3dfb6ac1c3a1bc67e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 19 May 2009 10:39:34 +0200
Subject: mac80211: fix kernel-doc

Moving information from config_interface to bss_info_changed
removed struct ieee80211_if_conf which the documentation still
refers to, additionally there's one kernel-doc description too
much and one other missing, fix all this.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/DocBook/mac80211.tmpl | 1 -
 include/net/mac80211.h              | 1 -
 net/mac80211/sta_info.h             | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl
index fbeaffc1dcc..e3698666357 100644
--- a/Documentation/DocBook/mac80211.tmpl
+++ b/Documentation/DocBook/mac80211.tmpl
@@ -145,7 +145,6 @@ usage should require reading the full document.
         interface in STA mode at first!
       </para>
 !Finclude/net/mac80211.h ieee80211_if_init_conf
-!Finclude/net/mac80211.h ieee80211_if_conf
     </chapter>
 
     <chapter id="rx-tx">
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d10ed1776fc..2d0610581ef 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -173,7 +173,6 @@ enum ieee80211_bss_change {
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
  * @assoc_capability: capabilities taken from assoc resp
- * @ht: BSS's HT configuration
  * @basic_rates: bitmap of basic rates, each bit stands for an
  *	index into the rate table configured by the driver in
  *	the current band.
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index f53aa9dc6e9..49a1a1f7651 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -216,6 +216,7 @@ struct sta_ampdu_mlme {
  * @plink_state: peer link state
  * @plink_timeout: timeout of peer link
  * @plink_timer: peer link watch timer
+ * @plink_timer_was_running: used by suspend/resume to restore timers
  * @debugfs: debug filesystem info
  * @sta: station information we share with the driver
  */
-- 
cgit v1.2.3-70-g09d2


From 04af8cf6f320031090ab6fa4600b912b0c18fb4b Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Wed, 20 May 2009 17:26:23 -0700
Subject: net: Remove unused parameter from fill method in fib_rules_ops.

The netlink message header (struct nlmsghdr) is an unused parameter in
fill method of fib_rules_ops struct.  This patch removes this
parameter from this method and fixes the places where this method is
called.

(include/net/fib_rules.h)

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 1 -
 net/core/fib_rules.c    | 2 +-
 net/decnet/dn_rules.c   | 2 +-
 net/ipv4/fib_rules.c    | 2 +-
 net/ipv6/fib6_rules.c   | 2 +-
 5 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b9b63395d00..ca4b2e84007 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -54,7 +54,6 @@ struct fib_rules_ops
 					   struct fib_rule_hdr *,
 					   struct nlattr **);
 	int			(*fill)(struct fib_rule *, struct sk_buff *,
-					struct nlmsghdr *,
 					struct fib_rule_hdr *);
 	u32			(*default_pref)(struct fib_rules_ops *ops);
 	size_t			(*nlmsg_payload)(struct fib_rule *);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 17d9f497b79..bd309384f8b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -500,7 +500,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (rule->target)
 		NLA_PUT_U32(skb, FRA_GOTO, rule->target);
 
-	if (ops->fill(rule, skb, nlh, frh) < 0)
+	if (ops->fill(rule, skb, frh) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index a2690b12e03..72495f25269 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -192,7 +192,7 @@ unsigned dnet_addr_type(__le16 addr)
 }
 
 static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
-			    struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+			    struct fib_rule_hdr *frh)
 {
 	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 38904be4102..92d9d97ec5e 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -209,7 +209,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 }
 
 static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
-			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+			  struct fib_rule_hdr *frh)
 {
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index e1a36dbb5a2..00a7a5e4ac9 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -211,7 +211,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 }
 
 static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
-			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+			  struct fib_rule_hdr *frh)
 {
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
-- 
cgit v1.2.3-70-g09d2


From d95ed9275edcb8995bda31005bb3f55e087626d7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 19 May 2009 18:27:17 +0000
Subject: af_packet: Teach to listen for multiple unicast addresses.

The the PACKET_ADD_MEMBERSHIP and the PACKET_DROP_MEMBERSHIP setsockopt
calls for af_packet already has all of the infrastructure needed to subscribe
to multiple mac addresses.  All that is missing is a flag to say that
the address we want to listen on is a unicast address.

So introduce PACKET_MR_UNICAST and wire it up to dev_unicast_add and
dev_unicast_delete.

Additionally I noticed that errors from dev_mc_add were not propagated
from packet_dev_mc so fix that.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  1 +
 net/packet/af_packet.c    | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 5b2badeb949..dea7d6b7cf9 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -145,5 +145,6 @@ struct packet_mreq
 #define PACKET_MR_MULTICAST	0
 #define PACKET_MR_PROMISC	1
 #define PACKET_MR_ALLMULTI	2
+#define PACKET_MR_UNICAST	3
 
 #endif
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 766e6b41f7c..c7c5d524967 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1570,9 +1570,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 	switch (i->type) {
 	case PACKET_MR_MULTICAST:
 		if (what > 0)
-			dev_mc_add(dev, i->addr, i->alen, 0);
+			return dev_mc_add(dev, i->addr, i->alen, 0);
 		else
-			dev_mc_delete(dev, i->addr, i->alen, 0);
+			return dev_mc_delete(dev, i->addr, i->alen, 0);
 		break;
 	case PACKET_MR_PROMISC:
 		return dev_set_promiscuity(dev, what);
@@ -1580,6 +1580,12 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 	case PACKET_MR_ALLMULTI:
 		return dev_set_allmulti(dev, what);
 		break;
+	case PACKET_MR_UNICAST:
+		if (what > 0)
+			return dev_unicast_add(dev, i->addr, i->alen);
+		else
+			return dev_unicast_delete(dev, i->addr, i->alen);
+		break;
 	default:;
 	}
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From b9fc745db833bbf74b4988493b8cd902a84c9415 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 19 May 2009 13:25:57 -0400
Subject: integrity: path_check update

- Add support in ima_path_check() for integrity checking without
incrementing the counts. (Required for nfsd.)
- rename and export opencount_get to ima_counts_get
- replace ima_shm_check calls with ima_counts_get
- export ima_path_check

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/exec.c                         |  5 ++--
 fs/namei.c                        |  6 +++--
 include/linux/ima.h               | 11 +++++----
 ipc/shm.c                         |  4 ++--
 mm/shmem.c                        |  2 +-
 security/integrity/ima/ima_main.c | 48 +++++++++++++++++++++++----------------
 6 files changed, 46 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 998e856c307..618d6d1e2c5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -130,7 +130,8 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 				 MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
-	error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
+	error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN,
+			       IMA_COUNT_UPDATE);
 	if (error)
 		goto exit;
 
@@ -680,7 +681,7 @@ struct file *open_exec(const char *name)
 	err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
 	if (err)
 		goto out_path_put;
-	err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
+	err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN, IMA_COUNT_UPDATE);
 	if (err)
 		goto out_path_put;
 
diff --git a/fs/namei.c b/fs/namei.c
index 78f253cd2d4..b05a2b1dea6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -853,7 +853,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 			err = inode_permission(nd->path.dentry->d_inode,
 					       MAY_EXEC);
 		if (!err)
-			err = ima_path_check(&nd->path, MAY_EXEC);
+			err = ima_path_check(&nd->path, MAY_EXEC,
+				             IMA_COUNT_UPDATE);
  		if (err)
 			break;
 
@@ -1515,7 +1516,8 @@ int may_open(struct path *path, int acc_mode, int flag)
 		return error;
 
 	error = ima_path_check(path,
-			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+			       IMA_COUNT_UPDATE);
 	if (error)
 		return error;
 	/*
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e2aa45cb0c..b1b827d091a 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,14 +13,17 @@
 #include <linux/fs.h>
 struct linux_binprm;
 
+#define IMA_COUNT_UPDATE 1
+#define IMA_COUNT_LEAVE 0
+
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct path *path, int mask, int update_counts);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
-extern void ima_shm_check(struct file *file);
+extern void ima_counts_get(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct path *path, int mask, int update_counts)
 {
 	return 0;
 }
@@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
-static inline void ima_shm_check(struct file *file)
+static inline void ima_counts_get(struct file *file)
 {
 	return;
 }
diff --git a/ipc/shm.c b/ipc/shm.c
index faa46da99eb..47b464229cd 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -384,7 +384,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto no_file;
-	ima_shm_check(file);
+	ima_counts_get(file);
 
 	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 	if (id < 0) {
@@ -891,7 +891,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations);
 	if (!file)
 		goto out_free;
-	ima_shm_check(file);
+	ima_counts_get(file);
 
 	file->private_data = sfd;
 	file->f_mapping = shp->shm_file->f_mapping;
diff --git a/mm/shmem.c b/mm/shmem.c
index b25f95ce3db..a817f75f144 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2684,7 +2684,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	ima_shm_check(file);
+	ima_counts_get(file);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	vma->vm_file = file;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index c4228c0eb2d..a2eb23310ea 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -125,6 +125,15 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
 	return rc;
 }
 
+static void ima_update_counts(struct ima_iint_cache *iint, int mask)
+{
+	iint->opencount++;
+	if ((mask & MAY_WRITE) || (mask == 0))
+		iint->writecount++;
+	else if (mask & (MAY_READ | MAY_EXEC))
+		iint->readcount++;
+}
+
 /**
  * ima_path_check - based on policy, collect/store measurement.
  * @path: contains a pointer to the path to be measured
@@ -143,7 +152,7 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
  * Return 0 on success, an error code on failure.
  * (Based on the results of appraise_measurement().)
  */
-int ima_path_check(struct path *path, int mask)
+int ima_path_check(struct path *path, int mask, int update_counts)
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -157,11 +166,8 @@ int ima_path_check(struct path *path, int mask)
 		return 0;
 
 	mutex_lock(&iint->mutex);
-	iint->opencount++;
-	if ((mask & MAY_WRITE) || (mask == 0))
-		iint->writecount++;
-	else if (mask & (MAY_READ | MAY_EXEC))
-		iint->readcount++;
+	if (update_counts)
+		ima_update_counts(iint, mask);
 
 	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
 	if (rc < 0)
@@ -197,6 +203,7 @@ out:
 	kref_put(&iint->refcount, iint_free);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ima_path_check);
 
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
@@ -225,7 +232,16 @@ out:
 	return rc;
 }
 
-static void opencount_get(struct file *file)
+/*
+ * ima_opens_get - increment file counts
+ *
+ * - for IPC shm and shmat file.
+ * - for nfsd exported files.
+ *
+ * Increment the counts for these files to prevent unnecessary
+ * imbalance messages.
+ */
+void ima_counts_get(struct file *file)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -237,8 +253,14 @@ static void opencount_get(struct file *file)
 		return;
 	mutex_lock(&iint->mutex);
 	iint->opencount++;
+	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+		iint->readcount++;
+
+	if (file->f_mode & FMODE_WRITE)
+		iint->writecount++;
 	mutex_unlock(&iint->mutex);
 }
+EXPORT_SYMBOL_GPL(ima_counts_get);
 
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
@@ -263,18 +285,6 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
-/*
- * ima_shm_check - IPC shm and shmat create/fput a file
- *
- * Maintain the opencount for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_shm_check(struct file *file)
-{
-	opencount_get(file);
-	return;
-}
-
 /**
  * ima_bprm_check - based on policy, collect/store measurement.
  * @bprm: contains the linux_binprm structure
-- 
cgit v1.2.3-70-g09d2


From 4ea7e38696c7e798c47ebbecadfd392f23f814f9 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 21 May 2009 07:36:08 +0000
Subject: dropmon: add ability to detect when hardware dropsrxpackets

Patch to add the ability to detect drops in hardware interfaces via dropwatch.
Adds a tracepoint to net_rx_action to signal everytime a napi instance is
polled.  The dropmon code then periodically checks to see if the rx_frames
counter has changed, and if so, adds a drop notification to the netlink
protocol, using the reserved all-0's vector to indicate the drop location was in
hardware, rather than somewhere in the code.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/linux/net_dropmon.h |    8 ++
 include/trace/napi.h        |   11 +++
 net/core/dev.c              |    5 +
 net/core/drop_monitor.c     |  124 ++++++++++++++++++++++++++++++++++++++++++--
 net/core/net-traces.c       |    4 +
 net/core/netpoll.c          |    2
 6 files changed, 149 insertions(+), 5 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dropmon.h |   8 +++
 include/trace/napi.h        |  11 ++++
 net/core/dev.c              |   5 +-
 net/core/drop_monitor.c     | 124 ++++++++++++++++++++++++++++++++++++++++++--
 net/core/net-traces.c       |   4 ++
 net/core/netpoll.c          |   2 +
 6 files changed, 149 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/napi.h

(limited to 'include')

diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h
index 0217fb81a63..e8a8b5c50ed 100644
--- a/include/linux/net_dropmon.h
+++ b/include/linux/net_dropmon.h
@@ -2,12 +2,20 @@
 #define __NET_DROPMON_H
 
 #include <linux/netlink.h>
+#include <linux/types.h>
 
 struct net_dm_drop_point {
 	__u8 pc[8];
 	__u32 count;
 };
 
+#define is_drop_point_hw(x) do {\
+	int ____i, ____j;\
+	for (____i = 0; ____i < 8; i ____i++)\
+		____j |= x[____i];\
+	____j;\
+} while (0)
+
 #define NET_DM_CFG_VERSION  0
 #define NET_DM_CFG_ALERT_COUNT  1
 #define NET_DM_CFG_ALERT_DELAY 2
diff --git a/include/trace/napi.h b/include/trace/napi.h
new file mode 100644
index 00000000000..a8989c4547e
--- /dev/null
+++ b/include/trace/napi.h
@@ -0,0 +1,11 @@
+#ifndef _TRACE_NAPI_H_
+#define _TRACE_NAPI_H_
+
+#include <linux/netdevice.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(napi_poll,
+	TP_PROTO(struct napi_struct *napi),
+	TP_ARGS(napi));
+
+#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 92ebeca2990..3942266d1f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -126,6 +126,7 @@
 #include <linux/in.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <trace/napi.h>
 
 #include "net-sysfs.h"
 
@@ -2771,8 +2772,10 @@ static void net_rx_action(struct softirq_action *h)
 		 * accidently calling ->poll() when NAPI is not scheduled.
 		 */
 		work = 0;
-		if (test_bit(NAPI_STATE_SCHED, &n->state))
+		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 			work = n->poll(n, weight);
+			trace_napi_poll(n);
+		}
 
 		WARN_ON_ONCE(work > weight);
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 2797b711a97..a6c2ac2828f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -22,8 +22,10 @@
 #include <linux/timer.h>
 #include <linux/bitops.h>
 #include <net/genetlink.h>
+#include <net/netevent.h>
 
 #include <trace/skb.h>
+#include <trace/napi.h>
 
 #include <asm/unaligned.h>
 
@@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused);
  * and the work handle that will send up
  * netlink alerts
  */
-struct sock *dm_sock;
+static int trace_state = TRACE_OFF;
+static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED;
 
 struct per_cpu_dm_data {
 	struct work_struct dm_alert_work;
@@ -47,6 +50,13 @@ struct per_cpu_dm_data {
 	struct timer_list send_timer;
 };
 
+struct dm_hw_stat_delta {
+	struct net_device *dev;
+	struct list_head list;
+	struct rcu_head rcu;
+	unsigned long last_drop_val;
+};
+
 static struct genl_family net_drop_monitor_family = {
 	.id             = GENL_ID_GENERATE,
 	.hdrsize        = 0,
@@ -59,7 +69,8 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
 
 static int dm_hit_limit = 64;
 static int dm_delay = 1;
-
+static unsigned long dm_hw_check_delta = 2*HZ;
+static LIST_HEAD(hw_stats_list);
 
 static void reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
@@ -115,7 +126,7 @@ static void sched_send_work(unsigned long unused)
 	schedule_work(&data->dm_alert_work);
 }
 
-static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+static void trace_drop_common(struct sk_buff *skb, void *location)
 {
 	struct net_dm_alert_msg *msg;
 	struct nlmsghdr *nlh;
@@ -159,24 +170,80 @@ out:
 	return;
 }
 
+static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+{
+	trace_drop_common(skb, location);
+}
+
+static void trace_napi_poll_hit(struct napi_struct *napi)
+{
+	struct dm_hw_stat_delta *new_stat;
+
+	/*
+	 * Ratelimit our check time to dm_hw_check_delta jiffies
+	 */
+	if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+		if ((new_stat->dev == napi->dev)  &&
+		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+			trace_drop_common(NULL, NULL);
+			new_stat->last_drop_val = napi->dev->stats.rx_dropped;
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+
+
+static void free_dm_hw_stat(struct rcu_head *head)
+{
+	struct dm_hw_stat_delta *n;
+	n = container_of(head, struct dm_hw_stat_delta, rcu);
+	kfree(n);
+}
+
 static int set_all_monitor_traces(int state)
 {
 	int rc = 0;
+	struct dm_hw_stat_delta *new_stat = NULL;
+	struct dm_hw_stat_delta *temp;
+
+	spin_lock(&trace_state_lock);
 
 	switch (state) {
 	case TRACE_ON:
 		rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
+		rc |= register_trace_napi_poll(trace_napi_poll_hit);
 		break;
 	case TRACE_OFF:
 		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
+		rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
 
 		tracepoint_synchronize_unregister();
+
+		/*
+		 * Clean the device list
+		 */
+		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+			if (new_stat->dev == NULL) {
+				list_del_rcu(&new_stat->list);
+				call_rcu(&new_stat->rcu, free_dm_hw_stat);
+			}
+		}
 		break;
 	default:
 		rc = 1;
 		break;
 	}
 
+	if (!rc)
+		trace_state = state;
+
+	spin_unlock(&trace_state_lock);
+
 	if (rc)
 		return -EINPROGRESS;
 	return rc;
@@ -204,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
 	return -ENOTSUPP;
 }
 
+static int dropmon_net_event(struct notifier_block *ev_block,
+			unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct dm_hw_stat_delta *new_stat = NULL;
+	struct dm_hw_stat_delta *tmp;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+
+		if (!new_stat)
+			goto out;
+
+		new_stat->dev = dev;
+		INIT_RCU_HEAD(&new_stat->rcu);
+		spin_lock(&trace_state_lock);
+		list_add_rcu(&new_stat->list, &hw_stats_list);
+		spin_unlock(&trace_state_lock);
+		break;
+	case NETDEV_UNREGISTER:
+		spin_lock(&trace_state_lock);
+		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
+			if (new_stat->dev == dev) {
+				new_stat->dev = NULL;
+				if (trace_state == TRACE_OFF) {
+					list_del_rcu(&new_stat->list);
+					call_rcu(&new_stat->rcu, free_dm_hw_stat);
+					break;
+				}
+			}
+		}
+		spin_unlock(&trace_state_lock);
+		break;
+	}
+out:
+	return NOTIFY_DONE;
+}
 
 static struct genl_ops dropmon_ops[] = {
 	{
@@ -220,6 +325,10 @@ static struct genl_ops dropmon_ops[] = {
 	},
 };
 
+static struct notifier_block dropmon_net_notifier = {
+	.notifier_call = dropmon_net_event
+};
+
 static int __init init_net_drop_monitor(void)
 {
 	int cpu;
@@ -243,12 +352,18 @@ static int __init init_net_drop_monitor(void)
 		ret = genl_register_ops(&net_drop_monitor_family,
 					&dropmon_ops[i]);
 		if (ret) {
-			printk(KERN_CRIT "failed to register operation %d\n",
+			printk(KERN_CRIT "Failed to register operation %d\n",
 				dropmon_ops[i].cmd);
 			goto out_unreg;
 		}
 	}
 
+	rc = register_netdevice_notifier(&dropmon_net_notifier);
+	if (rc < 0) {
+		printk(KERN_CRIT "Failed to register netdevice notifier\n");
+		goto out_unreg;
+	}
+
 	rc = 0;
 
 	for_each_present_cpu(cpu) {
@@ -259,6 +374,7 @@ static int __init init_net_drop_monitor(void)
 		data->send_timer.data = cpu;
 		data->send_timer.function = sched_send_work;
 	}
+
 	goto out;
 
 out_unreg:
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb45665e4..b07b25bd2cd 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -20,6 +20,7 @@
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
 #include <trace/skb.h>
+#include <trace/napi.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
@@ -27,3 +28,6 @@
 
 DEFINE_TRACE(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
+
+DEFINE_TRACE(napi_poll);
+EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 64f51eec657..00b14e2c50e 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -24,6 +24,7 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <asm/unaligned.h>
+#include <trace/napi.h>
 
 /*
  * We maintain a small pool of fully-sized skbs, to make sure the
@@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo,
 	set_bit(NAPI_STATE_NPSVC, &napi->state);
 
 	work = napi->poll(napi, budget);
+	trace_napi_poll(napi->dev);
 
 	clear_bit(NAPI_STATE_NPSVC, &napi->state);
 	atomic_dec(&trapped);
-- 
cgit v1.2.3-70-g09d2


From a7b11d738282337488ae158c975d76271ad43a98 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Thu, 21 May 2009 10:34:04 +0000
Subject: genetlink: Introduce genl_register_family_with_ops()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This introduces genl_register_family_with_ops() that registers a genetlink
family along with operations from a table. This is used to kill copy'n'paste
occurrences in following patches.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h |  2 ++
 net/netlink/genetlink.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 747c255d1df..1b0e3ee4ddd 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -88,6 +88,8 @@ struct genl_ops
 };
 
 extern int genl_register_family(struct genl_family *family);
+extern int genl_register_family_with_ops(struct genl_family *family,
+	struct genl_ops *ops, size_t n_ops);
 extern int genl_unregister_family(struct genl_family *family);
 extern int genl_register_ops(struct genl_family *, struct genl_ops *ops);
 extern int genl_unregister_ops(struct genl_family *, struct genl_ops *ops);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1d3dd30099d..eed4c6a8afc 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -383,6 +383,52 @@ errout:
 	return err;
 }
 
+/**
+ * genl_register_family_with_ops - register a generic netlink family
+ * @family: generic netlink family
+ * @ops: operations to be registered
+ * @n_ops: number of elements to register
+ *
+ * Registers the specified family and operations from the specified table.
+ * Only one family may be registered with the same family name or identifier.
+ *
+ * The family id may equal GENL_ID_GENERATE causing an unique id to
+ * be automatically generated and assigned.
+ *
+ * Either a doit or dumpit callback must be specified for every registered
+ * operation or the function will fail. Only one operation structure per
+ * command identifier may be registered.
+ *
+ * See include/net/genetlink.h for more documenation on the operations
+ * structure.
+ *
+ * This is equivalent to calling genl_register_family() followed by
+ * genl_register_ops() for every operation entry in the table taking
+ * care to unregister the family on error path.
+ *
+ * Return 0 on success or a negative error code.
+ */
+int genl_register_family_with_ops(struct genl_family *family,
+	struct genl_ops *ops, size_t n_ops)
+{
+	int err, i;
+
+	err = genl_register_family(family);
+	if (err)
+		return err;
+
+	for (i = 0; i < n_ops; ++i, ++ops) {
+		err = genl_register_ops(family, ops);
+		if (err)
+			goto err_out;
+	}
+	return 0;
+err_out:
+	genl_unregister_family(family);
+	return err;
+}
+EXPORT_SYMBOL(genl_register_family_with_ops);
+
 /**
  * genl_unregister_family - unregister generic netlink family
  * @family: generic netlink family
-- 
cgit v1.2.3-70-g09d2


From 5c82f56736e4c3a9eaf53c94366b056c8622d79e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 22 May 2009 09:41:30 +0100
Subject: AsoC: Make snd_soc_read() and snd_soc_write() functions

Should be no impact on the generated code but it helps the compiler
print clearer messages.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 2af3213df90..cf6111d72b1 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -214,10 +214,6 @@ void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count,
 			struct snd_soc_jack_gpio *gpios);
 #endif
 
-/* codec IO */
-#define snd_soc_read(codec, reg) codec->read(codec, reg)
-#define snd_soc_write(codec, reg, value) codec->write(codec, reg, value)
-
 /* codec register bit access */
 int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned short reg,
 				unsigned short mask, unsigned short value);
@@ -507,6 +503,19 @@ struct soc_enum {
 	void *dapm;
 };
 
+/* codec IO */
+static inline unsigned int snd_soc_read(struct snd_soc_codec *codec,
+					unsigned int reg)
+{
+	return codec->read(codec, reg);
+}
+
+static inline unsigned int snd_soc_write(struct snd_soc_codec *codec,
+					 unsigned int reg, unsigned int val)
+{
+	return codec->write(codec, reg, val);
+}
+
 #include <sound/soc-dai.h>
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 22 May 2009 14:17:31 +1000
Subject: perf_counter: Dynamically allocate tasks' perf_counter_context struct

This replaces the struct perf_counter_context in the task_struct with
a pointer to a dynamically allocated perf_counter_context struct.  The
main reason for doing is this is to allow us to transfer a
perf_counter_context from one task to another when we do lazy PMU
switching in a later patch.

This has a few side-benefits: the task_struct becomes a little smaller,
we save some memory because only tasks that have perf_counters attached
get a perf_counter_context allocated for them, and we can remove the
inclusion of <linux/perf_counter.h> in sched.h, meaning that we don't
end up recompiling nearly everything whenever perf_counter.h changes.

The perf_counter_context structures are reference-counted and freed
when the last reference is dropped.  A context can have references
from its task and the counters on its task.  Counters can outlive the
task so it is possible that a context will be freed well after its
task has exited.

Contexts are allocated on fork if the parent had a context, or
otherwise the first time that a per-task counter is created on a task.
In the latter case, we set the context pointer in the task struct
locklessly using an atomic compare-and-exchange operation in case we
raced with some other task in creating a context for the subject task.

This also removes the task pointer from the perf_counter struct.  The
task pointer was not used anywhere and would make it harder to move a
context from one task to another.  Anything that needed to know which
task a counter was attached to was already using counter->ctx->task.

The __perf_counter_init_context function moves up in perf_counter.c
so that it can be called from find_get_context, and now initializes
the refcount, but is otherwise unchanged.

We were potentially calling list_del_counter twice: once from
__perf_counter_exit_task when the task exits and once from
__perf_counter_remove_from_context when the counter's fd gets closed.
This adds a check in list_del_counter so it doesn't do anything if
the counter has already been removed from the lists.

Since perf_counter_task_sched_in doesn't do anything if the task doesn't
have a context, and leaves cpuctx->task_ctx = NULL, this adds code to
__perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in
the case where the current task adds the first counter to itself and
thus creates a context for itself.

This also adds similar code to __perf_counter_enable to handle a
similar situation which can arise when the counters have been disabled
using prctl; that also leaves cpuctx->task_ctx = NULL.

[ Impact: refactor counter context management to prepare for new feature ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c  |   1 +
 include/linux/init_task.h    |  13 ---
 include/linux/perf_counter.h |   4 +-
 include/linux/sched.h        |   6 +-
 kernel/exit.c                |   3 +-
 kernel/fork.c                |   1 +
 kernel/perf_counter.c        | 218 +++++++++++++++++++++++++++----------------
 7 files changed, 145 insertions(+), 101 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e9021a90802..b4f64402a82 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,6 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
+#include <linux/perf_counter.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 503afaa0afa..d87247d2641 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,18 +108,6 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-#ifdef CONFIG_PERF_COUNTERS
-# define INIT_PERF_COUNTERS(tsk)					\
-	.perf_counter_ctx.counter_list =				\
-		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
-	.perf_counter_ctx.event_list =					\
-		LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list),	\
-	.perf_counter_ctx.lock =					\
-		__SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
-#else
-# define INIT_PERF_COUNTERS(tsk)
-#endif
-
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -183,7 +171,6 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
-	INIT_PERF_COUNTERS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f612941ef46..07130900546 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -449,7 +449,6 @@ struct perf_counter {
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
-	struct task_struct		*task;
 	struct file			*filp;
 
 	struct perf_counter		*parent;
@@ -498,7 +497,6 @@ struct perf_counter {
  * Used as a container for task counters and CPU counters as well:
  */
 struct perf_counter_context {
-#ifdef CONFIG_PERF_COUNTERS
 	/*
 	 * Protect the states of the counters in the list,
 	 * nr_active, and the list:
@@ -516,6 +514,7 @@ struct perf_counter_context {
 	int			nr_counters;
 	int			nr_active;
 	int			is_active;
+	atomic_t		refcount;
 	struct task_struct	*task;
 
 	/*
@@ -523,7 +522,6 @@ struct perf_counter_context {
 	 */
 	u64			time;
 	u64			timestamp;
-#endif
 };
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ff59d123151..9714d450f41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,7 +71,6 @@ struct sched_param {
 #include <linux/path.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
-#include <linux/perf_counter.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
@@ -99,6 +98,7 @@ struct robust_list_head;
 struct bio;
 struct bts_tracer;
 struct fs_struct;
+struct perf_counter_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -1387,7 +1387,9 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
-	struct perf_counter_context perf_counter_ctx;
+#ifdef CONFIG_PERF_COUNTERS
+	struct perf_counter_context *perf_counter_ctxp;
+#endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
 	short il_next;
diff --git a/kernel/exit.c b/kernel/exit.c
index f9dfedd94af..99ad4063ee4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,6 +48,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
+#include <linux/perf_counter.h>
 #include <trace/sched.h>
 
 #include <asm/uaccess.h>
@@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
 #ifdef CONFIG_PERF_COUNTERS
-	WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+	WARN_ON_ONCE(tsk->perf_counter_ctxp);
 #endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index d32fef4d38e..e72a09f5355 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -63,6 +63,7 @@
 #include <linux/fs_struct.h>
 #include <trace/sched.h>
 #include <linux/magic.h>
+#include <linux/perf_counter.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 08584c16049..06ea3eae886 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -97,6 +97,17 @@ void perf_enable(void)
 		hw_perf_enable();
 }
 
+static void get_ctx(struct perf_counter_context *ctx)
+{
+	atomic_inc(&ctx->refcount);
+}
+
+static void put_ctx(struct perf_counter_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->refcount))
+		kfree(ctx);
+}
+
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -118,11 +129,17 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	ctx->nr_counters++;
 }
 
+/*
+ * Remove a counter from the lists for its context.
+ * Must be called with counter->mutex and ctx->mutex held.
+ */
 static void
 list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
 	struct perf_counter *sibling, *tmp;
 
+	if (list_empty(&counter->list_entry))
+		return;
 	ctx->nr_counters--;
 
 	list_del_init(&counter->list_entry);
@@ -216,8 +233,6 @@ static void __perf_counter_remove_from_context(void *info)
 
 	counter_sched_out(counter, cpuctx, ctx);
 
-	counter->task = NULL;
-
 	list_del_counter(counter, ctx);
 
 	if (!ctx->task) {
@@ -279,7 +294,6 @@ retry:
 	 */
 	if (!list_empty(&counter->list_entry)) {
 		list_del_counter(counter, ctx);
-		counter->task = NULL;
 	}
 	spin_unlock_irq(&ctx->lock);
 }
@@ -568,11 +582,17 @@ static void __perf_install_in_context(void *info)
 	 * If this is a task context, we need to check whether it is
 	 * the current task context of this cpu. If not it has been
 	 * scheduled out before the smp call arrived.
+	 * Or possibly this is the right context but it isn't
+	 * on this cpu because it had no counters.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
 
 	spin_lock_irqsave(&ctx->lock, flags);
+	ctx->is_active = 1;
 	update_context_time(ctx);
 
 	/*
@@ -653,7 +673,6 @@ perf_install_in_context(struct perf_counter_context *ctx,
 		return;
 	}
 
-	counter->task = task;
 retry:
 	task_oncpu_function_call(task, __perf_install_in_context,
 				 counter);
@@ -693,10 +712,14 @@ static void __perf_counter_enable(void *info)
 	 * If this is a per-task counter, need to check whether this
 	 * counter's task is the current task on this cpu.
 	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
 
 	spin_lock_irqsave(&ctx->lock, flags);
+	ctx->is_active = 1;
 	update_context_time(ctx);
 
 	counter->prev_state = counter->state;
@@ -852,10 +875,10 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
 	struct pt_regs *regs;
 
-	if (likely(!cpuctx->task_ctx))
+	if (likely(!ctx || !cpuctx->task_ctx))
 		return;
 
 	update_context_time(ctx);
@@ -871,6 +894,8 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
 {
 	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 
+	if (!cpuctx->task_ctx)
+		return;
 	__perf_counter_sched_out(ctx, cpuctx);
 	cpuctx->task_ctx = NULL;
 }
@@ -969,8 +994,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &task->perf_counter_ctx;
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
 
+	if (likely(!ctx))
+		return;
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 	cpuctx->task_ctx = ctx;
 }
@@ -985,11 +1012,11 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 int perf_counter_task_disable(void)
 {
 	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
 	unsigned long flags;
 
-	if (likely(!ctx->nr_counters))
+	if (!ctx || !ctx->nr_counters)
 		return 0;
 
 	local_irq_save(flags);
@@ -1020,12 +1047,12 @@ int perf_counter_task_disable(void)
 int perf_counter_task_enable(void)
 {
 	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
+	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
 	unsigned long flags;
 	int cpu;
 
-	if (likely(!ctx->nr_counters))
+	if (!ctx || !ctx->nr_counters)
 		return 0;
 
 	local_irq_save(flags);
@@ -1128,19 +1155,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 		return;
 
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	ctx = &curr->perf_counter_ctx;
+	ctx = curr->perf_counter_ctxp;
 
 	perf_adjust_freq(&cpuctx->ctx);
-	perf_adjust_freq(ctx);
+	if (ctx)
+		perf_adjust_freq(ctx);
 
 	perf_counter_cpu_sched_out(cpuctx);
-	__perf_counter_task_sched_out(ctx);
+	if (ctx)
+		__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
-	rotate_ctx(ctx);
+	if (ctx)
+		rotate_ctx(ctx);
 
 	perf_counter_cpu_sched_in(cpuctx, cpu);
-	perf_counter_task_sched_in(curr, cpu);
+	if (ctx)
+		perf_counter_task_sched_in(curr, cpu);
 }
 
 /*
@@ -1176,6 +1207,22 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	return atomic64_read(&counter->count);
 }
 
+/*
+ * Initialize the perf_counter context in a task_struct:
+ */
+static void
+__perf_counter_init_context(struct perf_counter_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->event_list);
+	atomic_set(&ctx->refcount, 1);
+	ctx->task = task;
+}
+
 static void put_context(struct perf_counter_context *ctx)
 {
 	if (ctx->task)
@@ -1186,6 +1233,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
+	struct perf_counter_context *tctx;
 	struct task_struct *task;
 
 	/*
@@ -1225,15 +1273,36 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	if (!task)
 		return ERR_PTR(-ESRCH);
 
-	ctx = &task->perf_counter_ctx;
-	ctx->task = task;
-
 	/* Reuse ptrace permission checks for now. */
 	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
-		put_context(ctx);
+		put_task_struct(task);
 		return ERR_PTR(-EACCES);
 	}
 
+	ctx = task->perf_counter_ctxp;
+	if (!ctx) {
+		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
+		if (!ctx) {
+			put_task_struct(task);
+			return ERR_PTR(-ENOMEM);
+		}
+		__perf_counter_init_context(ctx, task);
+		/*
+		 * Make sure other cpus see correct values for *ctx
+		 * once task->perf_counter_ctxp is visible to them.
+		 */
+		smp_wmb();
+		tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx);
+		if (tctx) {
+			/*
+			 * We raced with some other task; use
+			 * the context they set.
+			 */
+			kfree(ctx);
+			ctx = tctx;
+		}
+	}
+
 	return ctx;
 }
 
@@ -1242,6 +1311,7 @@ static void free_counter_rcu(struct rcu_head *head)
 	struct perf_counter *counter;
 
 	counter = container_of(head, struct perf_counter, rcu_head);
+	put_ctx(counter->ctx);
 	kfree(counter);
 }
 
@@ -2247,7 +2317,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
 	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
 	put_cpu_var(perf_cpu_context);
 
-	perf_counter_comm_ctx(&current->perf_counter_ctx, comm_event);
+	perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event);
 }
 
 void perf_counter_comm(struct task_struct *task)
@@ -2256,7 +2326,9 @@ void perf_counter_comm(struct task_struct *task)
 
 	if (!atomic_read(&nr_comm_tracking))
 		return;
-       
+	if (!current->perf_counter_ctxp)
+		return;
+
 	comm_event = (struct perf_comm_event){
 		.task	= task,
 		.event  = {
@@ -2372,7 +2444,7 @@ got_name:
 	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
 	put_cpu_var(perf_cpu_context);
 
-	perf_counter_mmap_ctx(&current->perf_counter_ctx, mmap_event);
+	perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event);
 
 	kfree(buf);
 }
@@ -2384,6 +2456,8 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 
 	if (!atomic_read(&nr_mmap_tracking))
 		return;
+	if (!current->perf_counter_ctxp)
+		return;
 
 	mmap_event = (struct perf_mmap_event){
 		.file   = file,
@@ -2985,6 +3059,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->group_leader		= group_leader;
 	counter->pmu			= NULL;
 	counter->ctx			= ctx;
+	get_ctx(ctx);
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
@@ -3149,21 +3224,6 @@ err_put_context:
 	goto out_fput;
 }
 
-/*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	spin_lock_init(&ctx->lock);
-	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->counter_list);
-	INIT_LIST_HEAD(&ctx->event_list);
-	ctx->task = task;
-}
-
 /*
  * inherit a counter from parent task to child task:
  */
@@ -3195,7 +3255,6 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Link it up in the child's context:
 	 */
-	child_counter->task = child;
 	add_counter_to_ctx(child_counter, child_ctx);
 
 	child_counter->parent = parent_counter;
@@ -3294,40 +3353,15 @@ __perf_counter_exit_task(struct task_struct *child,
 	struct perf_counter *parent_counter;
 
 	/*
-	 * If we do not self-reap then we have to wait for the
-	 * child task to unschedule (it will happen for sure),
-	 * so that its counter is at its final count. (This
-	 * condition triggers rarely - child tasks usually get
-	 * off their CPU before the parent has a chance to
-	 * get this far into the reaping action)
+	 * Protect against concurrent operations on child_counter
+	 * due its fd getting closed, etc.
 	 */
-	if (child != current) {
-		wait_task_inactive(child, 0);
-		update_counter_times(child_counter);
-		list_del_counter(child_counter, child_ctx);
-	} else {
-		struct perf_cpu_context *cpuctx;
-		unsigned long flags;
-
-		/*
-		 * Disable and unlink this counter.
-		 *
-		 * Be careful about zapping the list - IRQ/NMI context
-		 * could still be processing it:
-		 */
-		local_irq_save(flags);
-		perf_disable();
-
-		cpuctx = &__get_cpu_var(perf_cpu_context);
+	mutex_lock(&child_counter->mutex);
 
-		group_sched_out(child_counter, cpuctx, child_ctx);
-		update_counter_times(child_counter);
+	update_counter_times(child_counter);
+	list_del_counter(child_counter, child_ctx);
 
-		list_del_counter(child_counter, child_ctx);
-
-		perf_enable();
-		local_irq_restore(flags);
-	}
+	mutex_unlock(&child_counter->mutex);
 
 	parent_counter = child_counter->parent;
 	/*
@@ -3346,19 +3380,29 @@ __perf_counter_exit_task(struct task_struct *child,
  *
  * Note: we may be running in child context, but the PID is not hashed
  * anymore so new counters will not be added.
+ * (XXX not sure that is true when we get called from flush_old_exec.
+ *  -- paulus)
  */
 void perf_counter_exit_task(struct task_struct *child)
 {
 	struct perf_counter *child_counter, *tmp;
 	struct perf_counter_context *child_ctx;
+	unsigned long flags;
 
 	WARN_ON_ONCE(child != current);
 
-	child_ctx = &child->perf_counter_ctx;
+	child_ctx = child->perf_counter_ctxp;
 
-	if (likely(!child_ctx->nr_counters))
+	if (likely(!child_ctx))
 		return;
 
+	local_irq_save(flags);
+	__perf_counter_task_sched_out(child_ctx);
+	child->perf_counter_ctxp = NULL;
+	local_irq_restore(flags);
+
+	mutex_lock(&child_ctx->mutex);
+
 again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
@@ -3371,6 +3415,10 @@ again:
 	 */
 	if (!list_empty(&child_ctx->counter_list))
 		goto again;
+
+	mutex_unlock(&child_ctx->mutex);
+
+	put_ctx(child_ctx);
 }
 
 /*
@@ -3382,19 +3430,25 @@ void perf_counter_init_task(struct task_struct *child)
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 
-	child_ctx  =  &child->perf_counter_ctx;
-	parent_ctx = &parent->perf_counter_ctx;
-
-	__perf_counter_init_context(child_ctx, child);
+	child->perf_counter_ctxp = NULL;
 
 	/*
 	 * This is executed from the parent task context, so inherit
-	 * counters that have been marked for cloning:
+	 * counters that have been marked for cloning.
+	 * First allocate and initialize a context for the child.
 	 */
 
-	if (likely(!parent_ctx->nr_counters))
+	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
+	if (!child_ctx)
+		return;
+
+	parent_ctx = parent->perf_counter_ctxp;
+	if (likely(!parent_ctx || !parent_ctx->nr_counters))
 		return;
 
+	__perf_counter_init_context(child_ctx, child);
+	child->perf_counter_ctxp = child_ctx;
+
 	/*
 	 * Lock the parent list. No need to lock the child - not PID
 	 * hashed yet and not running, so nobody can access it.
-- 
cgit v1.2.3-70-g09d2


From 564c2b210add41df9a3a5aaa365c1d97cff6110d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 22 May 2009 14:27:22 +1000
Subject: perf_counter: Optimize context switch between identical inherited
 contexts

When monitoring a process and its descendants with a set of inherited
counters, we can often get the situation in a context switch where
both the old (outgoing) and new (incoming) process have the same set
of counters, and their values are ultimately going to be added together.
In that situation it doesn't matter which set of counters are used to
count the activity for the new process, so there is really no need to
go through the process of reading the hardware counters and updating
the old task's counters and then setting up the PMU for the new task.

This optimizes the context switch in this situation.  Instead of
scheduling out the perf_counter_context for the old task and
scheduling in the new context, we simply transfer the old context
to the new task and keep using it without interruption.  The new
context gets transferred to the old task.  This means that both
tasks still have a valid perf_counter_context, so no special case
is introduced when the old task gets scheduled in again, either on
this CPU or another CPU.

The equivalence of contexts is detected by keeping a pointer in
each cloned context pointing to the context it was cloned from.
To cope with the situation where a context is changed by adding
or removing counters after it has been cloned, we also keep a
generation number on each context which is incremented every time
a context is changed.  When a context is cloned we take a copy
of the parent's generation number, and two cloned contexts are
equivalent only if they have the same parent and the same
generation number.  In order that the parent context pointer
remains valid (and is not reused), we increment the parent
context's reference count for each context cloned from it.

Since we don't have individual fds for the counters in a cloned
context, the only thing that can make two clones of a given parent
different after they have been cloned is enabling or disabling all
counters with prctl.  To account for this, we keep a count of the
number of enabled counters in each context.  Two contexts must have
the same number of enabled counters to be considered equivalent.

Here are some measurements of the context switch time as measured with
the lat_ctx benchmark from lmbench, comparing the times obtained with
and without this patch series:

		-----Unmodified-----		With this patch series
Counters:	none	2 HW	4H+4S	none	2 HW	4H+4S

2 processes:
Average		3.44	6.45	11.24	3.12	3.39	3.60
St dev		0.04	0.04	0.13	0.05	0.17	0.19

8 processes:
Average		6.45	8.79	14.00	5.57	6.23	7.57
St dev		1.27	1.04	0.88	1.42	1.46	1.42

32 processes:
Average		5.56	8.43	13.78	5.28	5.55	7.15
St dev		0.41	0.47	0.53	0.54	0.57	0.81

The numbers are the mean and standard deviation of 20 runs of
lat_ctx.  The "none" columns are lat_ctx run directly without any
counters.  The "2 HW" columns are with lat_ctx run under perfstat,
counting cycles and instructions.  The "4H+4S" columns are lat_ctx run
under perfstat with 4 hardware counters and 4 software counters
(cycles, instructions, cache references, cache misses, task
clock, context switch, cpu migrations, and page faults).

[ Impact: performance optimization of counter context-switches ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  12 ++++-
 kernel/perf_counter.c        | 109 +++++++++++++++++++++++++++++++++++++------
 kernel/sched.c               |   2 +-
 3 files changed, 107 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 07130900546..4cae01a5045 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -513,6 +513,7 @@ struct perf_counter_context {
 	struct list_head	event_list;
 	int			nr_counters;
 	int			nr_active;
+	int			nr_enabled;
 	int			is_active;
 	atomic_t		refcount;
 	struct task_struct	*task;
@@ -522,6 +523,14 @@ struct perf_counter_context {
 	 */
 	u64			time;
 	u64			timestamp;
+
+	/*
+	 * These fields let us detect when two contexts have both
+	 * been cloned (inherited) from a common ancestor.
+	 */
+	struct perf_counter_context *parent_ctx;
+	u32			parent_gen;
+	u32			generation;
 };
 
 /**
@@ -552,7 +561,8 @@ extern int perf_max_counters;
 extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
 
 extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
+extern void perf_counter_task_sched_out(struct task_struct *task,
+					struct task_struct *next, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 06ea3eae886..c10055416de 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -104,8 +104,11 @@ static void get_ctx(struct perf_counter_context *ctx)
 
 static void put_ctx(struct perf_counter_context *ctx)
 {
-	if (atomic_dec_and_test(&ctx->refcount))
+	if (atomic_dec_and_test(&ctx->refcount)) {
+		if (ctx->parent_ctx)
+			put_ctx(ctx->parent_ctx);
 		kfree(ctx);
+	}
 }
 
 static void
@@ -127,6 +130,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 	ctx->nr_counters++;
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		ctx->nr_enabled++;
 }
 
 /*
@@ -141,6 +146,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	if (list_empty(&counter->list_entry))
 		return;
 	ctx->nr_counters--;
+	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		ctx->nr_enabled--;
 
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
@@ -203,6 +210,22 @@ group_sched_out(struct perf_counter *group_counter,
 		cpuctx->exclusive = 0;
 }
 
+/*
+ * Mark this context as not being a clone of another.
+ * Called when counters are added to or removed from this context.
+ * We also increment our generation number so that anything that
+ * was cloned from this context before this will not match anything
+ * cloned from this context after this.
+ */
+static void unclone_ctx(struct perf_counter_context *ctx)
+{
+	++ctx->generation;
+	if (!ctx->parent_ctx)
+		return;
+	put_ctx(ctx->parent_ctx);
+	ctx->parent_ctx = NULL;
+}
+
 /*
  * Cross CPU call to remove a performance counter
  *
@@ -263,6 +286,7 @@ static void perf_counter_remove_from_context(struct perf_counter *counter)
 	struct perf_counter_context *ctx = counter->ctx;
 	struct task_struct *task = ctx->task;
 
+	unclone_ctx(ctx);
 	if (!task) {
 		/*
 		 * Per cpu counters are removed via an smp call and
@@ -378,6 +402,7 @@ static void __perf_counter_disable(void *info)
 		else
 			counter_sched_out(counter, cpuctx, ctx);
 		counter->state = PERF_COUNTER_STATE_OFF;
+		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
@@ -419,6 +444,7 @@ static void perf_counter_disable(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
 		update_counter_times(counter);
 		counter->state = PERF_COUNTER_STATE_OFF;
+		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irq(&ctx->lock);
@@ -727,6 +753,7 @@ static void __perf_counter_enable(void *info)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+	ctx->nr_enabled++;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -817,6 +844,7 @@ static void perf_counter_enable(struct perf_counter *counter)
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->tstamp_enabled =
 			ctx->time - counter->total_time_enabled;
+		ctx->nr_enabled++;
 	}
  out:
 	spin_unlock_irq(&ctx->lock);
@@ -861,6 +889,25 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 	spin_unlock(&ctx->lock);
 }
 
+/*
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled counters.
+ * If the number of enabled counters is the same, then the set
+ * of enabled counters should be the same, because these are both
+ * inherited contexts, therefore we can't access individual counters
+ * in them directly with an fd; we can only enable/disable all
+ * counters via prctl, or enable/disable all counters in a family
+ * via ioctl, which will have the same effect on both contexts.
+ */
+static int context_equiv(struct perf_counter_context *ctx1,
+			 struct perf_counter_context *ctx2)
+{
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& ctx1->nr_enabled == ctx2->nr_enabled;
+}
+
 /*
  * Called from scheduler to remove the counters of the current task,
  * with interrupts disabled.
@@ -872,10 +919,12 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
  * accessing the counter control register. If a NMI hits, then it will
  * not restart the counter.
  */
-void perf_counter_task_sched_out(struct task_struct *task, int cpu)
+void perf_counter_task_sched_out(struct task_struct *task,
+				 struct task_struct *next, int cpu)
 {
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+	struct perf_counter_context *next_ctx;
 	struct pt_regs *regs;
 
 	if (likely(!ctx || !cpuctx->task_ctx))
@@ -885,6 +934,16 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
 
 	regs = task_pt_regs(task);
 	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
+	next_ctx = next->perf_counter_ctxp;
+	if (next_ctx && context_equiv(ctx, next_ctx)) {
+		task->perf_counter_ctxp = next_ctx;
+		next->perf_counter_ctxp = ctx;
+		ctx->task = next;
+		next_ctx->task = task;
+		return;
+	}
+
 	__perf_counter_sched_out(ctx, cpuctx);
 
 	cpuctx->task_ctx = NULL;
@@ -998,6 +1057,8 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
 
 	if (likely(!ctx))
 		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 	cpuctx->task_ctx = ctx;
 }
@@ -3252,6 +3313,16 @@ inherit_counter(struct perf_counter *parent_counter,
 	if (IS_ERR(child_counter))
 		return child_counter;
 
+	/*
+	 * Make the child state follow the state of the parent counter,
+	 * not its hw_event.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_counter_{en,dis}able_family.
+	 */
+	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
+		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
+	else
+		child_counter->state = PERF_COUNTER_STATE_OFF;
+
 	/*
 	 * Link it up in the child's context:
 	 */
@@ -3277,16 +3348,6 @@ inherit_counter(struct perf_counter *parent_counter,
 	mutex_lock(&parent_counter->mutex);
 	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
 
-	/*
-	 * Make the child state follow the state of the parent counter,
-	 * not its hw_event.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en,dis}able_family.
-	 */
-	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-	else
-		child_counter->state = PERF_COUNTER_STATE_OFF;
-
 	mutex_unlock(&parent_counter->mutex);
 
 	return child_counter;
@@ -3429,6 +3490,7 @@ void perf_counter_init_task(struct task_struct *child)
 	struct perf_counter_context *child_ctx, *parent_ctx;
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
+	int inherited_all = 1;
 
 	child->perf_counter_ctxp = NULL;
 
@@ -3463,12 +3525,31 @@ void perf_counter_init_task(struct task_struct *child)
 		if (counter != counter->group_leader)
 			continue;
 
-		if (!counter->hw_event.inherit)
+		if (!counter->hw_event.inherit) {
+			inherited_all = 0;
 			continue;
+		}
 
 		if (inherit_group(counter, parent,
-				  parent_ctx, child, child_ctx))
+				  parent_ctx, child, child_ctx)) {
+			inherited_all = 0;
 			break;
+		}
+	}
+
+	if (inherited_all) {
+		/*
+		 * Mark the child context as a clone of the parent
+		 * context, or of whatever the parent is a clone of.
+		 */
+		if (parent_ctx->parent_ctx) {
+			child_ctx->parent_ctx = parent_ctx->parent_ctx;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
+		} else {
+			child_ctx->parent_ctx = parent_ctx;
+			child_ctx->parent_gen = parent_ctx->generation;
+		}
+		get_ctx(child_ctx->parent_ctx);
 	}
 
 	mutex_unlock(&parent_ctx->mutex);
diff --git a/kernel/sched.c b/kernel/sched.c
index 419a39d0988..4c0d58bce6b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5091,7 +5091,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_counter_task_sched_out(prev, cpu);
+		perf_counter_task_sched_out(prev, next, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
-- 
cgit v1.2.3-70-g09d2


From 910431c7f2e963017d767b29c80ae706421e569f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 22 May 2009 12:32:15 +0200
Subject: perf_counter: fix !PERF_COUNTERS build failure

Update the !CONFIG_PERF_COUNTERS prototype too, for
perf_counter_task_sched_out().

[ Impact: build fix ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <18966.10666.517218.332164@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4cae01a5045..2eedae8498d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -625,7 +625,8 @@ extern void perf_counter_init(void);
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
 static inline void
-perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
+perf_counter_task_sched_out(struct task_struct *task,
+			    struct task_struct *next, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline void perf_counter_init_task(struct task_struct *child)	{ }
-- 
cgit v1.2.3-70-g09d2


From 86ed3669f068b514ab85ffd548456a342b3fb8d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 22 May 2009 15:01:19 +0100
Subject: ASoC: WM9081 mono DAC with integrated 2.6W class AB/D amplifier
 driver

The WM9081 is designed to provide high power output at low distortion
levels in space-constrained portable applications.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/wm9081.h    |   25 +
 sound/soc/codecs/Kconfig  |    4 +
 sound/soc/codecs/Makefile |    2 +
 sound/soc/codecs/wm9081.c | 1532 +++++++++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/wm9081.h |  787 +++++++++++++++++++++++
 5 files changed, 2350 insertions(+)
 create mode 100644 include/sound/wm9081.h
 create mode 100644 sound/soc/codecs/wm9081.c
 create mode 100644 sound/soc/codecs/wm9081.h

(limited to 'include')

diff --git a/include/sound/wm9081.h b/include/sound/wm9081.h
new file mode 100644
index 00000000000..e173ddbf6bd
--- /dev/null
+++ b/include/sound/wm9081.h
@@ -0,0 +1,25 @@
+/*
+ * linux/sound/wm9081.h -- Platform data for WM9081
+ *
+ * Copyright 2009 Wolfson Microelectronics. PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_SND_WM_9081_H
+#define __LINUX_SND_WM_9081_H
+
+struct wm9081_retune_mobile_setting {
+	const char *name;
+	unsigned int rate;
+	u16 config[20];
+};
+
+struct wm9081_retune_mobile_config {
+	struct wm9081_retune_mobile_setting *configs;
+	int num_configs;
+};
+
+#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 1c19ad54a9f..7f78b65fc4e 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -40,6 +40,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_WM8971 if I2C
 	select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI
 	select SND_SOC_WM8990 if I2C
+	select SND_SOC_WM9081 if I2C
 	select SND_SOC_WM9705 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9712 if SND_SOC_AC97_BUS
 	select SND_SOC_WM9713 if SND_SOC_AC97_BUS
@@ -156,6 +157,9 @@ config SND_SOC_WM8988
 config SND_SOC_WM8990
 	tristate
 
+config SND_SOC_WM9081
+	tristate
+
 config SND_SOC_WM9705
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 3d31b6bea83..70c55fa2c43 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -28,6 +28,7 @@ snd-soc-wm8960-objs := wm8960.o
 snd-soc-wm8971-objs := wm8971.o
 snd-soc-wm8988-objs := wm8988.o
 snd-soc-wm8990-objs := wm8990.o
+snd-soc-wm9081-objs := wm9081.o
 snd-soc-wm9705-objs := wm9705.o
 snd-soc-wm9712-objs := wm9712.o
 snd-soc-wm9713-objs := wm9713.o
@@ -62,6 +63,7 @@ obj-$(CONFIG_SND_SOC_WM8940)	+= snd-soc-wm8940.o
 obj-$(CONFIG_SND_SOC_WM8960)	+= snd-soc-wm8960.o
 obj-$(CONFIG_SND_SOC_WM8988)	+= snd-soc-wm8988.o
 obj-$(CONFIG_SND_SOC_WM8990)	+= snd-soc-wm8990.o
+obj-$(CONFIG_SND_SOC_WM9081)	+= snd-soc-wm9081.o
 obj-$(CONFIG_SND_SOC_WM9705)	+= snd-soc-wm9705.o
 obj-$(CONFIG_SND_SOC_WM9712)	+= snd-soc-wm9712.o
 obj-$(CONFIG_SND_SOC_WM9713)	+= snd-soc-wm9713.o
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
new file mode 100644
index 00000000000..83e3148c258
--- /dev/null
+++ b/sound/soc/codecs/wm9081.c
@@ -0,0 +1,1532 @@
+/*
+ * wm9081.c  --  WM9081 ALSA SoC Audio driver
+ *
+ * Author: Mark Brown
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include <sound/wm9081.h>
+#include "wm9081.h"
+
+static u16 wm9081_reg_defaults[] = {
+	0x0000,     /* R0  - Software Reset */
+	0x0000,     /* R1 */
+	0x00B9,     /* R2  - Analogue Lineout */
+	0x00B9,     /* R3  - Analogue Speaker PGA */
+	0x0001,     /* R4  - VMID Control */
+	0x0068,     /* R5  - Bias Control 1 */
+	0x0000,     /* R6 */
+	0x0000,     /* R7  - Analogue Mixer */
+	0x0000,     /* R8  - Anti Pop Control */
+	0x01DB,     /* R9  - Analogue Speaker 1 */
+	0x0018,     /* R10 - Analogue Speaker 2 */
+	0x0180,     /* R11 - Power Management */
+	0x0000,     /* R12 - Clock Control 1 */
+	0x0038,     /* R13 - Clock Control 2 */
+	0x4000,     /* R14 - Clock Control 3 */
+	0x0000,     /* R15 */
+	0x0000,     /* R16 - FLL Control 1 */
+	0x0200,     /* R17 - FLL Control 2 */
+	0x0000,     /* R18 - FLL Control 3 */
+	0x0204,     /* R19 - FLL Control 4 */
+	0x0000,     /* R20 - FLL Control 5 */
+	0x0000,     /* R21 */
+	0x0000,     /* R22 - Audio Interface 1 */
+	0x0002,     /* R23 - Audio Interface 2 */
+	0x0008,     /* R24 - Audio Interface 3 */
+	0x0022,     /* R25 - Audio Interface 4 */
+	0x0000,     /* R26 - Interrupt Status */
+	0x0006,     /* R27 - Interrupt Status Mask */
+	0x0000,     /* R28 - Interrupt Polarity */
+	0x0000,     /* R29 - Interrupt Control */
+	0x00C0,     /* R30 - DAC Digital 1 */
+	0x0008,     /* R31 - DAC Digital 2 */
+	0x09AF,     /* R32 - DRC 1 */
+	0x4201,     /* R33 - DRC 2 */
+	0x0000,     /* R34 - DRC 3 */
+	0x0000,     /* R35 - DRC 4 */
+	0x0000,     /* R36 */
+	0x0000,     /* R37 */
+	0x0000,     /* R38 - Write Sequencer 1 */
+	0x0000,     /* R39 - Write Sequencer 2 */
+	0x0002,     /* R40 - MW Slave 1 */
+	0x0000,     /* R41 */
+	0x0000,     /* R42 - EQ 1 */
+	0x0000,     /* R43 - EQ 2 */
+	0x0FCA,     /* R44 - EQ 3 */
+	0x0400,     /* R45 - EQ 4 */
+	0x00B8,     /* R46 - EQ 5 */
+	0x1EB5,     /* R47 - EQ 6 */
+	0xF145,     /* R48 - EQ 7 */
+	0x0B75,     /* R49 - EQ 8 */
+	0x01C5,     /* R50 - EQ 9 */
+	0x169E,     /* R51 - EQ 10 */
+	0xF829,     /* R52 - EQ 11 */
+	0x07AD,     /* R53 - EQ 12 */
+	0x1103,     /* R54 - EQ 13 */
+	0x1C58,     /* R55 - EQ 14 */
+	0xF373,     /* R56 - EQ 15 */
+	0x0A54,     /* R57 - EQ 16 */
+	0x0558,     /* R58 - EQ 17 */
+	0x0564,     /* R59 - EQ 18 */
+	0x0559,     /* R60 - EQ 19 */
+	0x4000,     /* R61 - EQ 20 */
+};
+
+static struct {
+	int ratio;
+	int clk_sys_rate;
+} clk_sys_rates[] = {
+	{ 64,   0 },
+	{ 128,  1 },
+	{ 192,  2 },
+	{ 256,  3 },
+	{ 384,  4 },
+	{ 512,  5 },
+	{ 768,  6 },
+	{ 1024, 7 },
+	{ 1408, 8 },
+	{ 1536, 9 },
+};
+
+static struct {
+	int rate;
+	int sample_rate;
+} sample_rates[] = {
+	{ 8000,  0  },
+	{ 11025, 1  },
+	{ 12000, 2  },
+	{ 16000, 3  },
+	{ 22050, 4  },
+	{ 24000, 5  },
+	{ 32000, 6  },
+	{ 44100, 7  },
+	{ 48000, 8  },
+	{ 88200, 9  },
+	{ 96000, 10 },
+};
+
+static struct {
+	int div; /* *10 due to .5s */
+	int bclk_div;
+} bclk_divs[] = {
+	{ 10,  0  },
+	{ 15,  1  },
+	{ 20,  2  },
+	{ 30,  3  },
+	{ 40,  4  },
+	{ 50,  5  },
+	{ 55,  6  },
+	{ 60,  7  },
+	{ 80,  8  },
+	{ 100, 9  },
+	{ 110, 10 },
+	{ 120, 11 },
+	{ 160, 12 },
+	{ 200, 13 },
+	{ 220, 14 },
+	{ 240, 15 },
+	{ 250, 16 },
+	{ 300, 17 },
+	{ 320, 18 },
+	{ 440, 19 },
+	{ 480, 20 },
+};
+
+struct wm9081_priv {
+	struct snd_soc_codec codec;
+	u16 reg_cache[WM9081_MAX_REGISTER + 1];
+	int sysclk_source;
+	int mclk_rate;
+	int sysclk_rate;
+	int fs;
+	int bclk;
+	int master;
+	int fll_fref;
+	int fll_fout;
+	struct wm9081_retune_mobile_config *retune;
+};
+
+static int wm9081_reg_is_volatile(int reg)
+{
+	switch (reg) {
+	default:
+		return 0;
+	}
+}
+
+static unsigned int wm9081_read_reg_cache(struct snd_soc_codec *codec,
+					  unsigned int reg)
+{
+	u16 *cache = codec->reg_cache;
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+	return cache[reg];
+}
+
+static unsigned int wm9081_read_hw(struct snd_soc_codec *codec, u8 reg)
+{
+	struct i2c_msg xfer[2];
+	u16 data;
+	int ret;
+	struct i2c_client *client = codec->control_data;
+
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+
+	/* Write register */
+	xfer[0].addr = client->addr;
+	xfer[0].flags = 0;
+	xfer[0].len = 1;
+	xfer[0].buf = &reg;
+
+	/* Read data */
+	xfer[1].addr = client->addr;
+	xfer[1].flags = I2C_M_RD;
+	xfer[1].len = 2;
+	xfer[1].buf = (u8 *)&data;
+
+	ret = i2c_transfer(client->adapter, xfer, 2);
+	if (ret != 2) {
+		dev_err(&client->dev, "i2c_transfer() returned %d\n", ret);
+		return 0;
+	}
+
+	return (data >> 8) | ((data & 0xff) << 8);
+}
+
+static unsigned int wm9081_read(struct snd_soc_codec *codec, unsigned int reg)
+{
+	if (wm9081_reg_is_volatile(reg))
+		return wm9081_read_hw(codec, reg);
+	else
+		return wm9081_read_reg_cache(codec, reg);
+}
+
+static int wm9081_write(struct snd_soc_codec *codec, unsigned int reg,
+			unsigned int value)
+{
+	u16 *cache = codec->reg_cache;
+	u8 data[3];
+
+	BUG_ON(reg > WM9081_MAX_REGISTER);
+
+	if (!wm9081_reg_is_volatile(reg))
+		cache[reg] = value;
+
+	data[0] = reg;
+	data[1] = value >> 8;
+	data[2] = value & 0x00ff;
+
+	if (codec->hw_write(codec->control_data, data, 3) == 3)
+		return 0;
+	else
+		return -EIO;
+}
+
+static int wm9081_reset(struct snd_soc_codec *codec)
+{
+	return wm9081_write(codec, WM9081_SOFTWARE_RESET, 0);
+}
+
+static const DECLARE_TLV_DB_SCALE(drc_in_tlv, -4500, 75, 0);
+static const DECLARE_TLV_DB_SCALE(drc_out_tlv, -2250, 75, 0);
+static const DECLARE_TLV_DB_SCALE(drc_min_tlv, -1800, 600, 0);
+static unsigned int drc_max_tlv[] = {
+	TLV_DB_RANGE_HEAD(4),
+	0, 0, TLV_DB_SCALE_ITEM(1200, 0, 0),
+	1, 1, TLV_DB_SCALE_ITEM(1800, 0, 0),
+	2, 2, TLV_DB_SCALE_ITEM(2400, 0, 0),
+	3, 3, TLV_DB_SCALE_ITEM(3600, 0, 0),
+};
+static const DECLARE_TLV_DB_SCALE(drc_qr_tlv, 1200, 600, 0);
+static const DECLARE_TLV_DB_SCALE(drc_startup_tlv, -300, 50, 0);
+
+static const DECLARE_TLV_DB_SCALE(eq_tlv, -1200, 100, 0);
+
+static const DECLARE_TLV_DB_SCALE(in_tlv, -600, 600, 0);
+static const DECLARE_TLV_DB_SCALE(dac_tlv, -7200, 75, 1);
+static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0);
+
+static const char *drc_high_text[] = {
+	"1",
+	"1/2",
+	"1/4",
+	"1/8",
+	"1/16",
+	"0",
+};
+
+static const struct soc_enum drc_high =
+	SOC_ENUM_SINGLE(WM9081_DRC_3, 3, 6, drc_high_text);
+
+static const char *drc_low_text[] = {
+	"1",
+	"1/2",
+	"1/4",
+	"1/8",
+	"0",
+};
+
+static const struct soc_enum drc_low =
+	SOC_ENUM_SINGLE(WM9081_DRC_3, 0, 5, drc_low_text);
+
+static const char *drc_atk_text[] = {
+	"181us",
+	"181us",
+	"363us",
+	"726us",
+	"1.45ms",
+	"2.9ms",
+	"5.8ms",
+	"11.6ms",
+	"23.2ms",
+	"46.4ms",
+	"92.8ms",
+	"185.6ms",
+};
+
+static const struct soc_enum drc_atk =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 12, 12, drc_atk_text);
+
+static const char *drc_dcy_text[] = {
+	"186ms",
+	"372ms",
+	"743ms",
+	"1.49s",
+	"2.97s",
+	"5.94s",
+	"11.89s",
+	"23.78s",
+	"47.56s",
+};
+
+static const struct soc_enum drc_dcy =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 8, 9, drc_dcy_text);
+
+static const char *drc_qr_dcy_text[] = {
+	"0.725ms",
+	"1.45ms",
+	"5.8ms",
+};
+
+static const struct soc_enum drc_qr_dcy =
+	SOC_ENUM_SINGLE(WM9081_DRC_2, 4, 3, drc_qr_dcy_text);
+
+static const char *dac_deemph_text[] = {
+	"None",
+	"32kHz",
+	"44.1kHz",
+	"48kHz",
+};
+
+static const struct soc_enum dac_deemph =
+	SOC_ENUM_SINGLE(WM9081_DAC_DIGITAL_2, 1, 4, dac_deemph_text);
+
+static const char *speaker_mode_text[] = {
+	"Class D",
+	"Class AB",
+};
+
+static const struct soc_enum speaker_mode =
+	SOC_ENUM_SINGLE(WM9081_ANALOGUE_SPEAKER_2, 6, 2, speaker_mode_text);
+
+static int speaker_mode_get(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int reg;
+
+	reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2);
+	if (reg & WM9081_SPK_MODE)
+		ucontrol->value.integer.value[0] = 1;
+	else
+		ucontrol->value.integer.value[0] = 0;
+
+	return 0;
+}
+
+/*
+ * Stop any attempts to change speaker mode while the speaker is enabled.
+ *
+ * We also have some special anti-pop controls dependant on speaker
+ * mode which must be changed along with the mode.
+ */
+static int speaker_mode_put(struct snd_kcontrol *kcontrol,
+			    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
+	unsigned int reg_pwr = wm9081_read(codec, WM9081_POWER_MANAGEMENT);
+	unsigned int reg2 = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_2);
+
+	/* Are we changing anything? */
+	if (ucontrol->value.integer.value[0] ==
+	    ((reg2 & WM9081_SPK_MODE) != 0))
+		return 0;
+
+	/* Don't try to change modes while enabled */
+	if (reg_pwr & WM9081_SPK_ENA)
+		return -EINVAL;
+
+	if (ucontrol->value.integer.value[0]) {
+		/* Class AB */
+		reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL);
+		reg2 |= WM9081_SPK_MODE;
+	} else {
+		/* Class D */
+		reg2 |= WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL;
+		reg2 &= ~WM9081_SPK_MODE;
+	}
+
+	wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_2, reg2);
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new wm9081_snd_controls[] = {
+SOC_SINGLE_TLV("IN1 Volume", WM9081_ANALOGUE_MIXER, 1, 1, 1, in_tlv),
+SOC_SINGLE_TLV("IN2 Volume", WM9081_ANALOGUE_MIXER, 3, 1, 1, in_tlv),
+
+SOC_SINGLE_TLV("Playback Volume", WM9081_DAC_DIGITAL_1, 1, 96, 0, dac_tlv),
+
+SOC_SINGLE("LINEOUT Switch", WM9081_ANALOGUE_LINEOUT, 7, 1, 1),
+SOC_SINGLE("LINEOUT ZC Switch", WM9081_ANALOGUE_LINEOUT, 6, 1, 0),
+SOC_SINGLE_TLV("LINEOUT Volume", WM9081_ANALOGUE_LINEOUT, 0, 63, 0, out_tlv),
+
+SOC_SINGLE("DRC Switch", WM9081_DRC_1, 15, 1, 0),
+SOC_ENUM("DRC High Slope", drc_high),
+SOC_ENUM("DRC Low Slope", drc_low),
+SOC_SINGLE_TLV("DRC Input Volume", WM9081_DRC_4, 5, 60, 1, drc_in_tlv),
+SOC_SINGLE_TLV("DRC Output Volume", WM9081_DRC_4, 0, 30, 1, drc_out_tlv),
+SOC_SINGLE_TLV("DRC Minimum Volume", WM9081_DRC_2, 2, 3, 1, drc_min_tlv),
+SOC_SINGLE_TLV("DRC Maximum Volume", WM9081_DRC_2, 0, 3, 0, drc_max_tlv),
+SOC_ENUM("DRC Attack", drc_atk),
+SOC_ENUM("DRC Decay", drc_dcy),
+SOC_SINGLE("DRC Quick Release Switch", WM9081_DRC_1, 2, 1, 0),
+SOC_SINGLE_TLV("DRC Quick Release Volume", WM9081_DRC_2, 6, 3, 0, drc_qr_tlv),
+SOC_ENUM("DRC Quick Release Decay", drc_qr_dcy),
+SOC_SINGLE_TLV("DRC Startup Volume", WM9081_DRC_1, 6, 18, 0, drc_startup_tlv),
+
+SOC_SINGLE("EQ Switch", WM9081_EQ_1, 0, 1, 0),
+
+SOC_SINGLE("Speaker DC Volume", WM9081_ANALOGUE_SPEAKER_1, 3, 5, 0),
+SOC_SINGLE("Speaker AC Volume", WM9081_ANALOGUE_SPEAKER_1, 0, 5, 0),
+SOC_SINGLE("Speaker Switch", WM9081_ANALOGUE_SPEAKER_PGA, 7, 1, 1),
+SOC_SINGLE("Speaker ZC Switch", WM9081_ANALOGUE_SPEAKER_PGA, 6, 1, 0),
+SOC_SINGLE_TLV("Speaker Volume", WM9081_ANALOGUE_SPEAKER_PGA, 0, 63, 0,
+	       out_tlv),
+SOC_ENUM("DAC Deemphasis", dac_deemph),
+SOC_ENUM_EXT("Speaker Mode", speaker_mode, speaker_mode_get, speaker_mode_put),
+};
+
+static const struct snd_kcontrol_new wm9081_eq_controls[] = {
+SOC_SINGLE_TLV("EQ1 Volume", WM9081_EQ_1, 11, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ2 Volume", WM9081_EQ_1, 6, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ3 Volume", WM9081_EQ_1, 1, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ4 Volume", WM9081_EQ_2, 11, 24, 0, eq_tlv),
+SOC_SINGLE_TLV("EQ5 Volume", WM9081_EQ_2, 6, 24, 0, eq_tlv),
+};
+
+static const struct snd_kcontrol_new mixer[] = {
+SOC_DAPM_SINGLE("IN1 Switch", WM9081_ANALOGUE_MIXER, 0, 1, 0),
+SOC_DAPM_SINGLE("IN2 Switch", WM9081_ANALOGUE_MIXER, 2, 1, 0),
+SOC_DAPM_SINGLE("Playback Switch", WM9081_ANALOGUE_MIXER, 4, 1, 0),
+};
+
+static int speaker_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	unsigned int reg = wm9081_read(codec, WM9081_POWER_MANAGEMENT);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		reg |= WM9081_SPK_ENA;
+		break;
+
+	case SND_SOC_DAPM_PRE_PMD:
+		reg &= ~WM9081_SPK_ENA;
+		break;
+	}
+
+	wm9081_write(codec, WM9081_POWER_MANAGEMENT, reg);
+
+	return 0;
+}
+
+struct _fll_div {
+	u16 fll_fratio;
+	u16 fll_outdiv;
+	u16 fll_clk_ref_div;
+	u16 n;
+	u16 k;
+};
+
+/* The size in bits of the FLL divide multiplied by 10
+ * to allow rounding later */
+#define FIXED_FLL_SIZE ((1 << 16) * 10)
+
+static struct {
+	unsigned int min;
+	unsigned int max;
+	u16 fll_fratio;
+	int ratio;
+} fll_fratios[] = {
+	{       0,    64000, 4, 16 },
+	{   64000,   128000, 3,  8 },
+	{  128000,   256000, 2,  4 },
+	{  256000,  1000000, 1,  2 },
+	{ 1000000, 13500000, 0,  1 },
+};
+
+static int fll_factors(struct _fll_div *fll_div, unsigned int Fref,
+		       unsigned int Fout)
+{
+	u64 Kpart;
+	unsigned int K, Ndiv, Nmod, target;
+	unsigned int div;
+	int i;
+
+	/* Fref must be <=13.5MHz */
+	div = 1;
+	while ((Fref / div) > 13500000) {
+		div *= 2;
+
+		if (div > 8) {
+			pr_err("Can't scale %dMHz input down to <=13.5MHz\n",
+			       Fref);
+			return -EINVAL;
+		}
+	}
+	fll_div->fll_clk_ref_div = div / 2;
+
+	pr_debug("Fref=%u Fout=%u\n", Fref, Fout);
+
+	/* Apply the division for our remaining calculations */
+	Fref /= div;
+
+	/* Fvco should be 90-100MHz; don't check the upper bound */
+	div = 0;
+	target = Fout * 2;
+	while (target < 90000000) {
+		div++;
+		target *= 2;
+		if (div > 7) {
+			pr_err("Unable to find FLL_OUTDIV for Fout=%uHz\n",
+			       Fout);
+			return -EINVAL;
+		}
+	}
+	fll_div->fll_outdiv = div;
+
+	pr_debug("Fvco=%dHz\n", target);
+
+	/* Find an appropraite FLL_FRATIO and factor it out of the target */
+	for (i = 0; i < ARRAY_SIZE(fll_fratios); i++) {
+		if (fll_fratios[i].min <= Fref && Fref <= fll_fratios[i].max) {
+			fll_div->fll_fratio = fll_fratios[i].fll_fratio;
+			target /= fll_fratios[i].ratio;
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(fll_fratios)) {
+		pr_err("Unable to find FLL_FRATIO for Fref=%uHz\n", Fref);
+		return -EINVAL;
+	}
+
+	/* Now, calculate N.K */
+	Ndiv = target / Fref;
+
+	fll_div->n = Ndiv;
+	Nmod = target % Fref;
+	pr_debug("Nmod=%d\n", Nmod);
+
+	/* Calculate fractional part - scale up so we can round. */
+	Kpart = FIXED_FLL_SIZE * (long long)Nmod;
+
+	do_div(Kpart, Fref);
+
+	K = Kpart & 0xFFFFFFFF;
+
+	if ((K % 10) >= 5)
+		K += 5;
+
+	/* Move down to proper range now rounding is done */
+	fll_div->k = K / 10;
+
+	pr_debug("N=%x K=%x FLL_FRATIO=%x FLL_OUTDIV=%x FLL_CLK_REF_DIV=%x\n",
+		 fll_div->n, fll_div->k,
+		 fll_div->fll_fratio, fll_div->fll_outdiv,
+		 fll_div->fll_clk_ref_div);
+
+	return 0;
+}
+
+static int wm9081_set_fll(struct snd_soc_codec *codec, int fll_id,
+			  unsigned int Fref, unsigned int Fout)
+{
+	struct wm9081_priv *wm9081 = codec->private_data;
+	u16 reg1, reg4, reg5;
+	struct _fll_div fll_div;
+	int ret;
+	int clk_sys_reg;
+
+	/* Any change? */
+	if (Fref == wm9081->fll_fref && Fout == wm9081->fll_fout)
+		return 0;
+
+	/* Disable the FLL */
+	if (Fout == 0) {
+		dev_dbg(codec->dev, "FLL disabled\n");
+		wm9081->fll_fref = 0;
+		wm9081->fll_fout = 0;
+
+		return 0;
+	}
+
+	ret = fll_factors(&fll_div, Fref, Fout);
+	if (ret != 0)
+		return ret;
+
+	reg5 = wm9081_read(codec, WM9081_FLL_CONTROL_5);
+	reg5 &= ~WM9081_FLL_CLK_SRC_MASK;
+
+	switch (fll_id) {
+	case WM9081_SYSCLK_FLL_MCLK:
+		reg5 |= 0x1;
+		break;
+
+	default:
+		dev_err(codec->dev, "Unknown FLL ID %d\n", fll_id);
+		return -EINVAL;
+	}
+
+	/* Disable CLK_SYS while we reconfigure */
+	clk_sys_reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3);
+	if (clk_sys_reg & WM9081_CLK_SYS_ENA)
+		wm9081_write(codec, WM9081_CLOCK_CONTROL_3,
+			     clk_sys_reg & ~WM9081_CLK_SYS_ENA);
+
+	/* Any FLL configuration change requires that the FLL be
+	 * disabled first. */
+	reg1 = wm9081_read(codec, WM9081_FLL_CONTROL_1);
+	reg1 &= ~WM9081_FLL_ENA;
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1);
+
+	/* Apply the configuration */
+	if (fll_div.k)
+		reg1 |= WM9081_FLL_FRAC_MASK;
+	else
+		reg1 &= ~WM9081_FLL_FRAC_MASK;
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1);
+
+	wm9081_write(codec, WM9081_FLL_CONTROL_2,
+		     (fll_div.fll_outdiv << WM9081_FLL_OUTDIV_SHIFT) |
+		     (fll_div.fll_fratio << WM9081_FLL_FRATIO_SHIFT));
+	wm9081_write(codec, WM9081_FLL_CONTROL_3, fll_div.k);
+
+	reg4 = wm9081_read(codec, WM9081_FLL_CONTROL_4);
+	reg4 &= ~WM9081_FLL_N_MASK;
+	reg4 |= fll_div.n << WM9081_FLL_N_SHIFT;
+	wm9081_write(codec, WM9081_FLL_CONTROL_4, reg4);
+
+	reg5 &= ~WM9081_FLL_CLK_REF_DIV_MASK;
+	reg5 |= fll_div.fll_clk_ref_div << WM9081_FLL_CLK_REF_DIV_SHIFT;
+	wm9081_write(codec, WM9081_FLL_CONTROL_5, reg5);
+
+	/* Enable the FLL */
+	wm9081_write(codec, WM9081_FLL_CONTROL_1, reg1 | WM9081_FLL_ENA);
+
+	/* Then bring CLK_SYS up again if it was disabled */
+	if (clk_sys_reg & WM9081_CLK_SYS_ENA)
+		wm9081_write(codec, WM9081_CLOCK_CONTROL_3, clk_sys_reg);
+
+	dev_dbg(codec->dev, "FLL enabled at %dHz->%dHz\n", Fref, Fout);
+
+	wm9081->fll_fref = Fref;
+	wm9081->fll_fout = Fout;
+
+	return 0;
+}
+
+static int configure_clock(struct snd_soc_codec *codec)
+{
+	struct wm9081_priv *wm9081 = codec->private_data;
+	int new_sysclk, i, target;
+	unsigned int reg;
+	int ret = 0;
+	int mclkdiv = 0;
+	int fll = 0;
+
+	switch (wm9081->sysclk_source) {
+	case WM9081_SYSCLK_MCLK:
+		if (wm9081->mclk_rate > 12225000) {
+			mclkdiv = 1;
+			wm9081->sysclk_rate = wm9081->mclk_rate / 2;
+		} else {
+			wm9081->sysclk_rate = wm9081->mclk_rate;
+		}
+		wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK, 0, 0);
+		break;
+
+	case WM9081_SYSCLK_FLL_MCLK:
+		/* If we have a sample rate calculate a CLK_SYS that
+		 * gives us a suitable DAC configuration, plus BCLK.
+		 * Ideally we would check to see if we can clock
+		 * directly from MCLK and only use the FLL if this is
+		 * not the case, though care must be taken with free
+		 * running mode.
+		 */
+		if (wm9081->master && wm9081->bclk) {
+			/* Make sure we can generate CLK_SYS and BCLK
+			 * and that we've got 3MHz for optimal
+			 * performance. */
+			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
+				target = wm9081->fs * clk_sys_rates[i].ratio;
+				if (target >= wm9081->bclk &&
+				    target > 3000000)
+					new_sysclk = target;
+			}
+		} else if (wm9081->fs) {
+			for (i = 0; i < ARRAY_SIZE(clk_sys_rates); i++) {
+				new_sysclk = clk_sys_rates[i].ratio
+					* wm9081->fs;
+				if (new_sysclk > 3000000)
+					break;
+			}
+		} else {
+			new_sysclk = 12288000;
+		}
+
+		ret = wm9081_set_fll(codec, WM9081_SYSCLK_FLL_MCLK,
+				     wm9081->mclk_rate, new_sysclk);
+		if (ret == 0) {
+			wm9081->sysclk_rate = new_sysclk;
+
+			/* Switch SYSCLK over to FLL */
+			fll = 1;
+		} else {
+			wm9081->sysclk_rate = wm9081->mclk_rate;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_1);
+	if (mclkdiv)
+		reg |= WM9081_MCLKDIV2;
+	else
+		reg &= ~WM9081_MCLKDIV2;
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_1, reg);
+
+	reg = wm9081_read(codec, WM9081_CLOCK_CONTROL_3);
+	if (fll)
+		reg |= WM9081_CLK_SRC_SEL;
+	else
+		reg &= ~WM9081_CLK_SRC_SEL;
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_3, reg);
+
+	dev_dbg(codec->dev, "CLK_SYS is %dHz\n", wm9081->sysclk_rate);
+
+	return ret;
+}
+
+static int clk_sys_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+
+	/* This should be done on init() for bypass paths */
+	switch (wm9081->sysclk_source) {
+	case WM9081_SYSCLK_MCLK:
+		dev_dbg(codec->dev, "Using %dHz MCLK\n", wm9081->mclk_rate);
+		break;
+	case WM9081_SYSCLK_FLL_MCLK:
+		dev_dbg(codec->dev, "Using %dHz MCLK with FLL\n",
+			wm9081->mclk_rate);
+		break;
+	default:
+		dev_err(codec->dev, "System clock not configured\n");
+		return -EINVAL;
+	}
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		configure_clock(codec);
+		break;
+
+	case SND_SOC_DAPM_POST_PMD:
+		/* Disable the FLL if it's running */
+		wm9081_set_fll(codec, 0, 0, 0);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_widget wm9081_dapm_widgets[] = {
+SND_SOC_DAPM_INPUT("IN1"),
+SND_SOC_DAPM_INPUT("IN2"),
+
+SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM9081_POWER_MANAGEMENT, 0, 0),
+
+SND_SOC_DAPM_MIXER_NAMED_CTL("Mixer", SND_SOC_NOPM, 0, 0,
+			     mixer, ARRAY_SIZE(mixer)),
+
+SND_SOC_DAPM_PGA("LINEOUT PGA", WM9081_POWER_MANAGEMENT, 4, 0, NULL, 0),
+
+SND_SOC_DAPM_PGA_E("Speaker PGA", WM9081_POWER_MANAGEMENT, 2, 0, NULL, 0,
+		   speaker_event,
+		   SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+SND_SOC_DAPM_OUTPUT("LINEOUT"),
+SND_SOC_DAPM_OUTPUT("SPKN"),
+SND_SOC_DAPM_OUTPUT("SPKP"),
+
+SND_SOC_DAPM_SUPPLY("CLK_SYS", WM9081_CLOCK_CONTROL_3, 0, 0, clk_sys_event,
+		    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+SND_SOC_DAPM_SUPPLY("CLK_DSP", WM9081_CLOCK_CONTROL_3, 1, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("TOCLK", WM9081_CLOCK_CONTROL_3, 2, 0, NULL, 0),
+};
+
+
+static const struct snd_soc_dapm_route audio_paths[] = {
+	{ "DAC", NULL, "CLK_SYS" },
+	{ "DAC", NULL, "CLK_DSP" },
+
+	{ "Mixer", "IN1 Switch", "IN1" },
+	{ "Mixer", "IN2 Switch", "IN2" },
+	{ "Mixer", "Playback Switch", "DAC" },
+
+	{ "LINEOUT PGA", NULL, "Mixer" },
+	{ "LINEOUT PGA", NULL, "TOCLK" },
+	{ "LINEOUT PGA", NULL, "CLK_SYS" },
+
+	{ "LINEOUT", NULL, "LINEOUT PGA" },
+
+	{ "Speaker PGA", NULL, "Mixer" },
+	{ "Speaker PGA", NULL, "TOCLK" },
+	{ "Speaker PGA", NULL, "CLK_SYS" },
+
+	{ "SPKN", NULL, "Speaker PGA" },
+	{ "SPKP", NULL, "Speaker PGA" },
+};
+
+static int wm9081_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	u16 reg;
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+
+	case SND_SOC_BIAS_PREPARE:
+		/* VMID=2*40k */
+		reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+		reg &= ~WM9081_VMID_SEL_MASK;
+		reg |= 0x2;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Normal bias current */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg &= ~WM9081_STBY_BIAS_ENA;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		/* Initial cold start */
+		if (codec->bias_level == SND_SOC_BIAS_OFF) {
+			/* Disable LINEOUT discharge */
+			reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL);
+			reg &= ~WM9081_LINEOUT_DISCH;
+			wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg);
+
+			/* Select startup bias source */
+			reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+			reg |= WM9081_BIAS_SRC | WM9081_BIAS_ENA;
+			wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+
+			/* VMID 2*4k; Soft VMID ramp enable */
+			reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+			reg |= WM9081_VMID_RAMP | 0x6;
+			wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+			mdelay(100);
+
+			/* Normal bias enable & soft start off */
+			reg |= WM9081_BIAS_ENA;
+			reg &= ~WM9081_VMID_RAMP;
+			wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+			/* Standard bias source */
+			reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+			reg &= ~WM9081_BIAS_SRC;
+			wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		}
+
+		/* VMID 2*240k */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg &= ~WM9081_VMID_SEL_MASK;
+		reg |= 0x40;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Standby bias current on */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg |= WM9081_STBY_BIAS_ENA;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+		break;
+
+	case SND_SOC_BIAS_OFF:
+		/* Startup bias source */
+		reg = wm9081_read(codec, WM9081_BIAS_CONTROL_1);
+		reg |= WM9081_BIAS_SRC;
+		wm9081_write(codec, WM9081_BIAS_CONTROL_1, reg);
+
+		/* Disable VMID and biases with soft ramping */
+		reg = wm9081_read(codec, WM9081_VMID_CONTROL);
+		reg &= ~(WM9081_VMID_SEL_MASK | WM9081_BIAS_ENA);
+		reg |= WM9081_VMID_RAMP;
+		wm9081_write(codec, WM9081_VMID_CONTROL, reg);
+
+		/* Actively discharge LINEOUT */
+		reg = wm9081_read(codec, WM9081_ANTI_POP_CONTROL);
+		reg |= WM9081_LINEOUT_DISCH;
+		wm9081_write(codec, WM9081_ANTI_POP_CONTROL, reg);
+		break;
+	}
+
+	codec->bias_level = level;
+
+	return 0;
+}
+
+static int wm9081_set_dai_fmt(struct snd_soc_dai *dai,
+			      unsigned int fmt)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+	unsigned int aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2);
+
+	aif2 &= ~(WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV |
+		  WM9081_BCLK_DIR | WM9081_LRCLK_DIR | WM9081_AIF_FMT_MASK);
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+		wm9081->master = 0;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFM:
+		aif2 |= WM9081_LRCLK_DIR;
+		wm9081->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFS:
+		aif2 |= WM9081_BCLK_DIR;
+		wm9081->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFM:
+		aif2 |= WM9081_LRCLK_DIR | WM9081_BCLK_DIR;
+		wm9081->master = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_B:
+		aif2 |= WM9081_AIF_LRCLK_INV;
+	case SND_SOC_DAIFMT_DSP_A:
+		aif2 |= 0x3;
+		break;
+	case SND_SOC_DAIFMT_I2S:
+		aif2 |= 0x2;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		aif2 |= 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_A:
+	case SND_SOC_DAIFMT_DSP_B:
+		/* frame inversion not valid for DSP modes */
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			aif2 |= WM9081_AIF_BCLK_INV;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+
+	case SND_SOC_DAIFMT_I2S:
+	case SND_SOC_DAIFMT_RIGHT_J:
+	case SND_SOC_DAIFMT_LEFT_J:
+		switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+		case SND_SOC_DAIFMT_NB_NF:
+			break;
+		case SND_SOC_DAIFMT_IB_IF:
+			aif2 |= WM9081_AIF_BCLK_INV | WM9081_AIF_LRCLK_INV;
+			break;
+		case SND_SOC_DAIFMT_IB_NF:
+			aif2 |= WM9081_AIF_BCLK_INV;
+			break;
+		case SND_SOC_DAIFMT_NB_IF:
+			aif2 |= WM9081_AIF_LRCLK_INV;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2);
+
+	return 0;
+}
+
+static int wm9081_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+	int ret, i, best, best_val, cur_val;
+	unsigned int clk_ctrl2, aif1, aif2, aif3, aif4;
+
+	clk_ctrl2 = wm9081_read(codec, WM9081_CLOCK_CONTROL_2);
+	clk_ctrl2 &= ~(WM9081_CLK_SYS_RATE_MASK | WM9081_SAMPLE_RATE_MASK);
+
+	aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1);
+
+	aif2 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_2);
+	aif2 &= ~WM9081_AIF_WL_MASK;
+
+	aif3 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_3);
+	aif3 &= ~WM9081_BCLK_DIV_MASK;
+
+	aif4 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_4);
+	aif4 &= ~WM9081_LRCLK_RATE_MASK;
+
+	/* What BCLK do we need? */
+	wm9081->fs = params_rate(params);
+	wm9081->bclk = 2 * wm9081->fs;
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		wm9081->bclk *= 16;
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		wm9081->bclk *= 20;
+		aif2 |= 0x4;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		wm9081->bclk *= 24;
+		aif2 |= 0x8;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		wm9081->bclk *= 32;
+		aif2 |= 0xc;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (aif1 & WM9081_AIFDAC_TDM_MODE_MASK) {
+		int slots = ((aif1 & WM9081_AIFDAC_TDM_MODE_MASK) >>
+			     WM9081_AIFDAC_TDM_MODE_SHIFT) + 1;
+		wm9081->bclk *= slots;
+	}
+
+	dev_dbg(codec->dev, "Target BCLK is %dHz\n", wm9081->bclk);
+
+	ret = configure_clock(codec);
+	if (ret != 0)
+		return ret;
+
+	/* Select nearest CLK_SYS_RATE */
+	best = 0;
+	best_val = abs((wm9081->sysclk_rate / clk_sys_rates[0].ratio)
+		       - wm9081->fs);
+	for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
+		cur_val = abs((wm9081->sysclk_rate /
+			       clk_sys_rates[i].ratio) - wm9081->fs);;
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	dev_dbg(codec->dev, "Selected CLK_SYS_RATIO of %d\n",
+		clk_sys_rates[best].ratio);
+	clk_ctrl2 |= (clk_sys_rates[best].clk_sys_rate
+		      << WM9081_CLK_SYS_RATE_SHIFT);
+
+	/* SAMPLE_RATE */
+	best = 0;
+	best_val = abs(wm9081->fs - sample_rates[0].rate);
+	for (i = 1; i < ARRAY_SIZE(sample_rates); i++) {
+		/* Closest match */
+		cur_val = abs(wm9081->fs - sample_rates[i].rate);
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	dev_dbg(codec->dev, "Selected SAMPLE_RATE of %dHz\n",
+		sample_rates[best].rate);
+	clk_ctrl2 |= (sample_rates[i].sample_rate << WM9081_SAMPLE_RATE_SHIFT);
+
+	/* BCLK_DIV */
+	best = 0;
+	best_val = INT_MAX;
+	for (i = 0; i < ARRAY_SIZE(bclk_divs); i++) {
+		cur_val = ((wm9081->sysclk_rate * 10) / bclk_divs[i].div)
+			- wm9081->bclk;
+		if (cur_val < 0) /* Table is sorted */
+			break;
+		if (cur_val < best_val) {
+			best = i;
+			best_val = cur_val;
+		}
+	}
+	wm9081->bclk = (wm9081->sysclk_rate * 10) / bclk_divs[best].div;
+	dev_dbg(codec->dev, "Selected BCLK_DIV of %d for %dHz BCLK\n",
+		bclk_divs[best].div, wm9081->bclk);
+	aif3 |= bclk_divs[best].bclk_div;
+
+	/* LRCLK is a simple fraction of BCLK */
+	dev_dbg(codec->dev, "LRCLK_RATE is %d\n", wm9081->bclk / wm9081->fs);
+	aif4 |= wm9081->bclk / wm9081->fs;
+
+	/* Apply a ReTune Mobile configuration if it's in use */
+	if (wm9081->retune) {
+		struct wm9081_retune_mobile_config *retune = wm9081->retune;
+		struct wm9081_retune_mobile_setting *s;
+		int eq1;
+
+		best = 0;
+		best_val = abs(retune->configs[0].rate - wm9081->fs);
+		for (i = 0; i < retune->num_configs; i++) {
+			cur_val = abs(retune->configs[i].rate - wm9081->fs);
+			if (cur_val < best_val) {
+				best_val = cur_val;
+				best = i;
+			}
+		}
+		s = &retune->configs[best];
+
+		dev_dbg(codec->dev, "ReTune Mobile %s tuned for %dHz\n",
+			s->name, s->rate);
+
+		/* If the EQ is enabled then disable it while we write out */
+		eq1 = wm9081_read(codec, WM9081_EQ_1) & WM9081_EQ_ENA;
+		if (eq1 & WM9081_EQ_ENA)
+			wm9081_write(codec, WM9081_EQ_1, 0);
+
+		/* Write out the other values */
+		for (i = 1; i < ARRAY_SIZE(s->config); i++)
+			wm9081_write(codec, WM9081_EQ_1 + i, s->config[i]);
+
+		eq1 |= (s->config[0] & ~WM9081_EQ_ENA);
+		wm9081_write(codec, WM9081_EQ_1, eq1);
+	}
+
+	wm9081_write(codec, WM9081_CLOCK_CONTROL_2, clk_ctrl2);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_2, aif2);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_3, aif3);
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_4, aif4);
+
+	return 0;
+}
+
+static int wm9081_digital_mute(struct snd_soc_dai *codec_dai, int mute)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	unsigned int reg;
+
+	reg = wm9081_read(codec, WM9081_DAC_DIGITAL_2);
+
+	if (mute)
+		reg |= WM9081_DAC_MUTE;
+	else
+		reg &= ~WM9081_DAC_MUTE;
+
+	wm9081_write(codec, WM9081_DAC_DIGITAL_2, reg);
+
+	return 0;
+}
+
+static int wm9081_set_sysclk(struct snd_soc_dai *codec_dai,
+			     int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_codec *codec = codec_dai->codec;
+	struct wm9081_priv *wm9081 = codec->private_data;
+
+	switch (clk_id) {
+	case WM9081_SYSCLK_MCLK:
+	case WM9081_SYSCLK_FLL_MCLK:
+		wm9081->sysclk_source = clk_id;
+		wm9081->mclk_rate = freq;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm9081_set_tdm_slot(struct snd_soc_dai *dai,
+			       unsigned int mask, int slots)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	unsigned int aif1 = wm9081_read(codec, WM9081_AUDIO_INTERFACE_1);
+
+	aif1 &= ~(WM9081_AIFDAC_TDM_SLOT_MASK | WM9081_AIFDAC_TDM_MODE_MASK);
+
+	if (slots < 1 || slots > 4)
+		return -EINVAL;
+
+	aif1 |= (slots - 1) << WM9081_AIFDAC_TDM_MODE_SHIFT;
+
+	switch (mask) {
+	case 1:
+		break;
+	case 2:
+		aif1 |= 0x10;
+		break;
+	case 4:
+		aif1 |= 0x20;
+		break;
+	case 8:
+		aif1 |= 0x30;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	wm9081_write(codec, WM9081_AUDIO_INTERFACE_1, aif1);
+
+	return 0;
+}
+
+#define WM9081_RATES SNDRV_PCM_RATE_8000_96000
+
+#define WM9081_FORMATS \
+	(SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+	 SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_ops wm9081_dai_ops = {
+	.hw_params = wm9081_hw_params,
+	.set_sysclk = wm9081_set_sysclk,
+	.set_fmt = wm9081_set_dai_fmt,
+	.digital_mute = wm9081_digital_mute,
+	.set_tdm_slot = wm9081_set_tdm_slot,
+};
+
+/* We report two channels because the CODEC processes a stereo signal, even
+ * though it is only capable of handling a mono output.
+ */
+struct snd_soc_dai wm9081_dai = {
+	.name = "WM9081",
+	.playback = {
+		.stream_name = "HiFi Playback",
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = WM9081_RATES,
+		.formats = WM9081_FORMATS,
+	},
+	.ops = &wm9081_dai_ops,
+};
+EXPORT_SYMBOL_GPL(wm9081_dai);
+
+
+static struct snd_soc_codec *wm9081_codec;
+
+static int wm9081_probe(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec;
+	struct wm9081_priv *wm9081;
+	int ret = 0;
+
+	if (wm9081_codec == NULL) {
+		dev_err(&pdev->dev, "Codec device not registered\n");
+		return -ENODEV;
+	}
+
+	socdev->card->codec = wm9081_codec;
+	codec = wm9081_codec;
+	wm9081 = codec->private_data;
+
+	/* register pcms */
+	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to create pcms: %d\n", ret);
+		goto pcm_err;
+	}
+
+	snd_soc_add_controls(codec, wm9081_snd_controls,
+			     ARRAY_SIZE(wm9081_snd_controls));
+	if (!wm9081->retune) {
+		dev_dbg(codec->dev,
+			"No ReTune Mobile data, using normal EQ\n");
+		snd_soc_add_controls(codec, wm9081_eq_controls,
+				     ARRAY_SIZE(wm9081_eq_controls));
+	}
+
+	snd_soc_dapm_new_controls(codec, wm9081_dapm_widgets,
+				  ARRAY_SIZE(wm9081_dapm_widgets));
+	snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths));
+	snd_soc_dapm_new_widgets(codec);
+
+	ret = snd_soc_init_card(socdev);
+	if (ret < 0) {
+		dev_err(codec->dev, "failed to register card: %d\n", ret);
+		goto card_err;
+	}
+
+	return ret;
+
+card_err:
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+pcm_err:
+	return ret;
+}
+
+static int wm9081_remove(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+
+	snd_soc_free_pcms(socdev);
+	snd_soc_dapm_free(socdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int wm9081_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_OFF);
+
+	return 0;
+}
+
+static int wm9081_resume(struct platform_device *pdev)
+{
+	struct snd_soc_device *socdev = platform_get_drvdata(pdev);
+	struct snd_soc_codec *codec = socdev->card->codec;
+	u16 *reg_cache = codec->reg_cache;
+	int i;
+
+	for (i = 0; i < codec->reg_cache_size; i++) {
+		if (i == WM9081_SOFTWARE_RESET)
+			continue;
+
+		wm9081_write(codec, i, reg_cache[i]);
+	}
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	return 0;
+}
+#else
+#define wm9081_suspend NULL
+#define wm9081_resume NULL
+#endif
+
+struct snd_soc_codec_device soc_codec_dev_wm9081 = {
+	.probe = 	wm9081_probe,
+	.remove = 	wm9081_remove,
+	.suspend =	wm9081_suspend,
+	.resume =	wm9081_resume,
+};
+EXPORT_SYMBOL_GPL(soc_codec_dev_wm9081);
+
+static int wm9081_register(struct wm9081_priv *wm9081)
+{
+	struct snd_soc_codec *codec = &wm9081->codec;
+	int ret;
+	u16 reg;
+
+	if (wm9081_codec) {
+		dev_err(codec->dev, "Another WM9081 is registered\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	mutex_init(&codec->mutex);
+	INIT_LIST_HEAD(&codec->dapm_widgets);
+	INIT_LIST_HEAD(&codec->dapm_paths);
+
+	codec->private_data = wm9081;
+	codec->name = "WM9081";
+	codec->owner = THIS_MODULE;
+	codec->read = wm9081_read;
+	codec->write = wm9081_write;
+	codec->dai = &wm9081_dai;
+	codec->num_dai = 1;
+	codec->reg_cache_size = ARRAY_SIZE(wm9081->reg_cache);
+	codec->reg_cache = &wm9081->reg_cache;
+	codec->bias_level = SND_SOC_BIAS_OFF;
+	codec->set_bias_level = wm9081_set_bias_level;
+
+	memcpy(codec->reg_cache, wm9081_reg_defaults,
+	       sizeof(wm9081_reg_defaults));
+
+	reg = wm9081_read_hw(codec, WM9081_SOFTWARE_RESET);
+	if (reg != 0x9081) {
+		dev_err(codec->dev, "Device is not a WM9081: ID=0x%x\n", reg);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = wm9081_reset(codec);
+	if (ret < 0) {
+		dev_err(codec->dev, "Failed to issue reset\n");
+		return ret;
+	}
+
+	wm9081_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+
+	/* Enable zero cross by default */
+	reg = wm9081_read(codec, WM9081_ANALOGUE_LINEOUT);
+	wm9081_write(codec, WM9081_ANALOGUE_LINEOUT, reg | WM9081_LINEOUTZC);
+	reg = wm9081_read(codec, WM9081_ANALOGUE_SPEAKER_PGA);
+	wm9081_write(codec, WM9081_ANALOGUE_SPEAKER_PGA,
+		     reg | WM9081_SPKPGAZC);
+
+	wm9081_dai.dev = codec->dev;
+
+	wm9081_codec = codec;
+
+	ret = snd_soc_register_codec(codec);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register codec: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_register_dai(&wm9081_dai);
+	if (ret != 0) {
+		dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
+		snd_soc_unregister_codec(codec);
+		return ret;
+	}
+
+	return 0;
+
+err:
+	kfree(wm9081);
+	return ret;
+}
+
+static void wm9081_unregister(struct wm9081_priv *wm9081)
+{
+	wm9081_set_bias_level(&wm9081->codec, SND_SOC_BIAS_OFF);
+	snd_soc_unregister_dai(&wm9081_dai);
+	snd_soc_unregister_codec(&wm9081->codec);
+	kfree(wm9081);
+	wm9081_codec = NULL;
+}
+
+static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
+				      const struct i2c_device_id *id)
+{
+	struct wm9081_priv *wm9081;
+	struct snd_soc_codec *codec;
+
+	wm9081 = kzalloc(sizeof(struct wm9081_priv), GFP_KERNEL);
+	if (wm9081 == NULL)
+		return -ENOMEM;
+
+	codec = &wm9081->codec;
+	codec->hw_write = (hw_write_t)i2c_master_send;
+	wm9081->retune = i2c->dev.platform_data;
+
+	i2c_set_clientdata(i2c, wm9081);
+	codec->control_data = i2c;
+
+	codec->dev = &i2c->dev;
+
+	return wm9081_register(wm9081);
+}
+
+static __devexit int wm9081_i2c_remove(struct i2c_client *client)
+{
+	struct wm9081_priv *wm9081 = i2c_get_clientdata(client);
+	wm9081_unregister(wm9081);
+	return 0;
+}
+
+static const struct i2c_device_id wm9081_i2c_id[] = {
+	{ "wm9081", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm9081_i2c_id);
+
+static struct i2c_driver wm9081_i2c_driver = {
+	.driver = {
+		.name = "wm9081",
+		.owner = THIS_MODULE,
+	},
+	.probe =    wm9081_i2c_probe,
+	.remove =   __devexit_p(wm9081_i2c_remove),
+	.id_table = wm9081_i2c_id,
+};
+
+static int __init wm9081_modinit(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm9081_i2c_driver);
+	if (ret != 0) {
+		printk(KERN_ERR "Failed to register WM9081 I2C driver: %d\n",
+		       ret);
+	}
+
+	return ret;
+}
+module_init(wm9081_modinit);
+
+static void __exit wm9081_exit(void)
+{
+	i2c_del_driver(&wm9081_i2c_driver);
+}
+module_exit(wm9081_exit);
+
+
+MODULE_DESCRIPTION("ASoC WM9081 driver");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/wm9081.h b/sound/soc/codecs/wm9081.h
new file mode 100644
index 00000000000..42d3bc75702
--- /dev/null
+++ b/sound/soc/codecs/wm9081.h
@@ -0,0 +1,787 @@
+#ifndef WM9081_H
+#define WM9081_H
+
+/*
+ * wm9081.c  --  WM9081 ALSA SoC Audio driver
+ *
+ * Author: Mark Brown
+ *
+ * Copyright 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <sound/soc.h>
+
+extern struct snd_soc_dai wm9081_dai;
+extern struct snd_soc_codec_device soc_codec_dev_wm9081;
+
+/*
+ * SYSCLK sources
+ */
+#define WM9081_SYSCLK_MCLK      1   /* Use MCLK without FLL */
+#define WM9081_SYSCLK_FLL_MCLK  2   /* Use MCLK, enabling FLL if required */
+
+/*
+ * Register values.
+ */
+#define WM9081_SOFTWARE_RESET                   0x00
+#define WM9081_ANALOGUE_LINEOUT                 0x02
+#define WM9081_ANALOGUE_SPEAKER_PGA             0x03
+#define WM9081_VMID_CONTROL                     0x04
+#define WM9081_BIAS_CONTROL_1                   0x05
+#define WM9081_ANALOGUE_MIXER                   0x07
+#define WM9081_ANTI_POP_CONTROL                 0x08
+#define WM9081_ANALOGUE_SPEAKER_1               0x09
+#define WM9081_ANALOGUE_SPEAKER_2               0x0A
+#define WM9081_POWER_MANAGEMENT                 0x0B
+#define WM9081_CLOCK_CONTROL_1                  0x0C
+#define WM9081_CLOCK_CONTROL_2                  0x0D
+#define WM9081_CLOCK_CONTROL_3                  0x0E
+#define WM9081_FLL_CONTROL_1                    0x10
+#define WM9081_FLL_CONTROL_2                    0x11
+#define WM9081_FLL_CONTROL_3                    0x12
+#define WM9081_FLL_CONTROL_4                    0x13
+#define WM9081_FLL_CONTROL_5                    0x14
+#define WM9081_AUDIO_INTERFACE_1                0x16
+#define WM9081_AUDIO_INTERFACE_2                0x17
+#define WM9081_AUDIO_INTERFACE_3                0x18
+#define WM9081_AUDIO_INTERFACE_4                0x19
+#define WM9081_INTERRUPT_STATUS                 0x1A
+#define WM9081_INTERRUPT_STATUS_MASK            0x1B
+#define WM9081_INTERRUPT_POLARITY               0x1C
+#define WM9081_INTERRUPT_CONTROL                0x1D
+#define WM9081_DAC_DIGITAL_1                    0x1E
+#define WM9081_DAC_DIGITAL_2                    0x1F
+#define WM9081_DRC_1                            0x20
+#define WM9081_DRC_2                            0x21
+#define WM9081_DRC_3                            0x22
+#define WM9081_DRC_4                            0x23
+#define WM9081_WRITE_SEQUENCER_1                0x26
+#define WM9081_WRITE_SEQUENCER_2                0x27
+#define WM9081_MW_SLAVE_1                       0x28
+#define WM9081_EQ_1                             0x2A
+#define WM9081_EQ_2                             0x2B
+#define WM9081_EQ_3                             0x2C
+#define WM9081_EQ_4                             0x2D
+#define WM9081_EQ_5                             0x2E
+#define WM9081_EQ_6                             0x2F
+#define WM9081_EQ_7                             0x30
+#define WM9081_EQ_8                             0x31
+#define WM9081_EQ_9                             0x32
+#define WM9081_EQ_10                            0x33
+#define WM9081_EQ_11                            0x34
+#define WM9081_EQ_12                            0x35
+#define WM9081_EQ_13                            0x36
+#define WM9081_EQ_14                            0x37
+#define WM9081_EQ_15                            0x38
+#define WM9081_EQ_16                            0x39
+#define WM9081_EQ_17                            0x3A
+#define WM9081_EQ_18                            0x3B
+#define WM9081_EQ_19                            0x3C
+#define WM9081_EQ_20                            0x3D
+
+#define WM9081_REGISTER_COUNT                   55
+#define WM9081_MAX_REGISTER                     0x3D
+
+/*
+ * Field Definitions.
+ */
+
+/*
+ * R0 (0x00) - Software Reset
+ */
+#define WM9081_SW_RST_DEV_ID1_MASK              0xFFFF  /* SW_RST_DEV_ID1 - [15:0] */
+#define WM9081_SW_RST_DEV_ID1_SHIFT                  0  /* SW_RST_DEV_ID1 - [15:0] */
+#define WM9081_SW_RST_DEV_ID1_WIDTH                 16  /* SW_RST_DEV_ID1 - [15:0] */
+
+/*
+ * R2 (0x02) - Analogue Lineout
+ */
+#define WM9081_LINEOUT_MUTE                     0x0080  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_MASK                0x0080  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_SHIFT                    7  /* LINEOUT_MUTE */
+#define WM9081_LINEOUT_MUTE_WIDTH                    1  /* LINEOUT_MUTE */
+#define WM9081_LINEOUTZC                        0x0040  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_MASK                   0x0040  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_SHIFT                       6  /* LINEOUTZC */
+#define WM9081_LINEOUTZC_WIDTH                       1  /* LINEOUTZC */
+#define WM9081_LINEOUT_VOL_MASK                 0x003F  /* LINEOUT_VOL - [5:0] */
+#define WM9081_LINEOUT_VOL_SHIFT                     0  /* LINEOUT_VOL - [5:0] */
+#define WM9081_LINEOUT_VOL_WIDTH                     6  /* LINEOUT_VOL - [5:0] */
+
+/*
+ * R3 (0x03) - Analogue Speaker PGA
+ */
+#define WM9081_SPKPGA_MUTE                      0x0080  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_MASK                 0x0080  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_SHIFT                     7  /* SPKPGA_MUTE */
+#define WM9081_SPKPGA_MUTE_WIDTH                     1  /* SPKPGA_MUTE */
+#define WM9081_SPKPGAZC                         0x0040  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_MASK                    0x0040  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_SHIFT                        6  /* SPKPGAZC */
+#define WM9081_SPKPGAZC_WIDTH                        1  /* SPKPGAZC */
+#define WM9081_SPKPGA_VOL_MASK                  0x003F  /* SPKPGA_VOL - [5:0] */
+#define WM9081_SPKPGA_VOL_SHIFT                      0  /* SPKPGA_VOL - [5:0] */
+#define WM9081_SPKPGA_VOL_WIDTH                      6  /* SPKPGA_VOL - [5:0] */
+
+/*
+ * R4 (0x04) - VMID Control
+ */
+#define WM9081_VMID_BUF_ENA                     0x0020  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_MASK                0x0020  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_SHIFT                    5  /* VMID_BUF_ENA */
+#define WM9081_VMID_BUF_ENA_WIDTH                    1  /* VMID_BUF_ENA */
+#define WM9081_VMID_RAMP                        0x0008  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_MASK                   0x0008  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_SHIFT                       3  /* VMID_RAMP */
+#define WM9081_VMID_RAMP_WIDTH                       1  /* VMID_RAMP */
+#define WM9081_VMID_SEL_MASK                    0x0006  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_SEL_SHIFT                        1  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_SEL_WIDTH                        2  /* VMID_SEL - [2:1] */
+#define WM9081_VMID_FAST_ST                     0x0001  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_MASK                0x0001  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_SHIFT                    0  /* VMID_FAST_ST */
+#define WM9081_VMID_FAST_ST_WIDTH                    1  /* VMID_FAST_ST */
+
+/*
+ * R5 (0x05) - Bias Control 1
+ */
+#define WM9081_BIAS_SRC                         0x0040  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_MASK                    0x0040  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_SHIFT                        6  /* BIAS_SRC */
+#define WM9081_BIAS_SRC_WIDTH                        1  /* BIAS_SRC */
+#define WM9081_STBY_BIAS_LVL                    0x0020  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_MASK               0x0020  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_SHIFT                   5  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_LVL_WIDTH                   1  /* STBY_BIAS_LVL */
+#define WM9081_STBY_BIAS_ENA                    0x0010  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_MASK               0x0010  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_SHIFT                   4  /* STBY_BIAS_ENA */
+#define WM9081_STBY_BIAS_ENA_WIDTH                   1  /* STBY_BIAS_ENA */
+#define WM9081_BIAS_LVL_MASK                    0x000C  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_LVL_SHIFT                        2  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_LVL_WIDTH                        2  /* BIAS_LVL - [3:2] */
+#define WM9081_BIAS_ENA                         0x0002  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_MASK                    0x0002  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_SHIFT                        1  /* BIAS_ENA */
+#define WM9081_BIAS_ENA_WIDTH                        1  /* BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA                 0x0001  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_MASK            0x0001  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_SHIFT                0  /* STARTUP_BIAS_ENA */
+#define WM9081_STARTUP_BIAS_ENA_WIDTH                1  /* STARTUP_BIAS_ENA */
+
+/*
+ * R7 (0x07) - Analogue Mixer
+ */
+#define WM9081_DAC_SEL                          0x0010  /* DAC_SEL */
+#define WM9081_DAC_SEL_MASK                     0x0010  /* DAC_SEL */
+#define WM9081_DAC_SEL_SHIFT                         4  /* DAC_SEL */
+#define WM9081_DAC_SEL_WIDTH                         1  /* DAC_SEL */
+#define WM9081_IN2_VOL                          0x0008  /* IN2_VOL */
+#define WM9081_IN2_VOL_MASK                     0x0008  /* IN2_VOL */
+#define WM9081_IN2_VOL_SHIFT                         3  /* IN2_VOL */
+#define WM9081_IN2_VOL_WIDTH                         1  /* IN2_VOL */
+#define WM9081_IN2_ENA                          0x0004  /* IN2_ENA */
+#define WM9081_IN2_ENA_MASK                     0x0004  /* IN2_ENA */
+#define WM9081_IN2_ENA_SHIFT                         2  /* IN2_ENA */
+#define WM9081_IN2_ENA_WIDTH                         1  /* IN2_ENA */
+#define WM9081_IN1_VOL                          0x0002  /* IN1_VOL */
+#define WM9081_IN1_VOL_MASK                     0x0002  /* IN1_VOL */
+#define WM9081_IN1_VOL_SHIFT                         1  /* IN1_VOL */
+#define WM9081_IN1_VOL_WIDTH                         1  /* IN1_VOL */
+#define WM9081_IN1_ENA                          0x0001  /* IN1_ENA */
+#define WM9081_IN1_ENA_MASK                     0x0001  /* IN1_ENA */
+#define WM9081_IN1_ENA_SHIFT                         0  /* IN1_ENA */
+#define WM9081_IN1_ENA_WIDTH                         1  /* IN1_ENA */
+
+/*
+ * R8 (0x08) - Anti Pop Control
+ */
+#define WM9081_LINEOUT_DISCH                    0x0004  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_MASK               0x0004  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_SHIFT                   2  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_DISCH_WIDTH                   1  /* LINEOUT_DISCH */
+#define WM9081_LINEOUT_VROI                     0x0002  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_MASK                0x0002  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_SHIFT                    1  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_VROI_WIDTH                    1  /* LINEOUT_VROI */
+#define WM9081_LINEOUT_CLAMP                    0x0001  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_MASK               0x0001  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_SHIFT                   0  /* LINEOUT_CLAMP */
+#define WM9081_LINEOUT_CLAMP_WIDTH                   1  /* LINEOUT_CLAMP */
+
+/*
+ * R9 (0x09) - Analogue Speaker 1
+ */
+#define WM9081_SPK_DCGAIN_MASK                  0x0038  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_DCGAIN_SHIFT                      3  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_DCGAIN_WIDTH                      3  /* SPK_DCGAIN - [5:3] */
+#define WM9081_SPK_ACGAIN_MASK                  0x0007  /* SPK_ACGAIN - [2:0] */
+#define WM9081_SPK_ACGAIN_SHIFT                      0  /* SPK_ACGAIN - [2:0] */
+#define WM9081_SPK_ACGAIN_WIDTH                      3  /* SPK_ACGAIN - [2:0] */
+
+/*
+ * R10 (0x0A) - Analogue Speaker 2
+ */
+#define WM9081_SPK_MODE                         0x0040  /* SPK_MODE */
+#define WM9081_SPK_MODE_MASK                    0x0040  /* SPK_MODE */
+#define WM9081_SPK_MODE_SHIFT                        6  /* SPK_MODE */
+#define WM9081_SPK_MODE_WIDTH                        1  /* SPK_MODE */
+#define WM9081_SPK_INV_MUTE                     0x0010  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_MASK                0x0010  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_SHIFT                    4  /* SPK_INV_MUTE */
+#define WM9081_SPK_INV_MUTE_WIDTH                    1  /* SPK_INV_MUTE */
+#define WM9081_OUT_SPK_CTRL                     0x0008  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_MASK                0x0008  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_SHIFT                    3  /* OUT_SPK_CTRL */
+#define WM9081_OUT_SPK_CTRL_WIDTH                    1  /* OUT_SPK_CTRL */
+
+/*
+ * R11 (0x0B) - Power Management
+ */
+#define WM9081_TSHUT_ENA                        0x0100  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_MASK                   0x0100  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_SHIFT                       8  /* TSHUT_ENA */
+#define WM9081_TSHUT_ENA_WIDTH                       1  /* TSHUT_ENA */
+#define WM9081_TSENSE_ENA                       0x0080  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_MASK                  0x0080  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_SHIFT                      7  /* TSENSE_ENA */
+#define WM9081_TSENSE_ENA_WIDTH                      1  /* TSENSE_ENA */
+#define WM9081_TEMP_SHUT                        0x0040  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_MASK                   0x0040  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_SHIFT                       6  /* TEMP_SHUT */
+#define WM9081_TEMP_SHUT_WIDTH                       1  /* TEMP_SHUT */
+#define WM9081_LINEOUT_ENA                      0x0010  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_MASK                 0x0010  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_SHIFT                     4  /* LINEOUT_ENA */
+#define WM9081_LINEOUT_ENA_WIDTH                     1  /* LINEOUT_ENA */
+#define WM9081_SPKPGA_ENA                       0x0004  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_MASK                  0x0004  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_SHIFT                      2  /* SPKPGA_ENA */
+#define WM9081_SPKPGA_ENA_WIDTH                      1  /* SPKPGA_ENA */
+#define WM9081_SPK_ENA                          0x0002  /* SPK_ENA */
+#define WM9081_SPK_ENA_MASK                     0x0002  /* SPK_ENA */
+#define WM9081_SPK_ENA_SHIFT                         1  /* SPK_ENA */
+#define WM9081_SPK_ENA_WIDTH                         1  /* SPK_ENA */
+#define WM9081_DAC_ENA                          0x0001  /* DAC_ENA */
+#define WM9081_DAC_ENA_MASK                     0x0001  /* DAC_ENA */
+#define WM9081_DAC_ENA_SHIFT                         0  /* DAC_ENA */
+#define WM9081_DAC_ENA_WIDTH                         1  /* DAC_ENA */
+
+/*
+ * R12 (0x0C) - Clock Control 1
+ */
+#define WM9081_CLK_OP_DIV_MASK                  0x1C00  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_OP_DIV_SHIFT                     10  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_OP_DIV_WIDTH                      3  /* CLK_OP_DIV - [12:10] */
+#define WM9081_CLK_TO_DIV_MASK                  0x0300  /* CLK_TO_DIV - [9:8] */
+#define WM9081_CLK_TO_DIV_SHIFT                      8  /* CLK_TO_DIV - [9:8] */
+#define WM9081_CLK_TO_DIV_WIDTH                      2  /* CLK_TO_DIV - [9:8] */
+#define WM9081_MCLKDIV2                         0x0080  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_MASK                    0x0080  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_SHIFT                        7  /* MCLKDIV2 */
+#define WM9081_MCLKDIV2_WIDTH                        1  /* MCLKDIV2 */
+
+/*
+ * R13 (0x0D) - Clock Control 2
+ */
+#define WM9081_CLK_SYS_RATE_MASK                0x00F0  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_CLK_SYS_RATE_SHIFT                    4  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_CLK_SYS_RATE_WIDTH                    4  /* CLK_SYS_RATE - [7:4] */
+#define WM9081_SAMPLE_RATE_MASK                 0x000F  /* SAMPLE_RATE - [3:0] */
+#define WM9081_SAMPLE_RATE_SHIFT                     0  /* SAMPLE_RATE - [3:0] */
+#define WM9081_SAMPLE_RATE_WIDTH                     4  /* SAMPLE_RATE - [3:0] */
+
+/*
+ * R14 (0x0E) - Clock Control 3
+ */
+#define WM9081_CLK_SRC_SEL                      0x2000  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_MASK                 0x2000  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_SHIFT                    13  /* CLK_SRC_SEL */
+#define WM9081_CLK_SRC_SEL_WIDTH                     1  /* CLK_SRC_SEL */
+#define WM9081_CLK_OP_ENA                       0x0020  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_MASK                  0x0020  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_SHIFT                      5  /* CLK_OP_ENA */
+#define WM9081_CLK_OP_ENA_WIDTH                      1  /* CLK_OP_ENA */
+#define WM9081_CLK_TO_ENA                       0x0004  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_MASK                  0x0004  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_SHIFT                      2  /* CLK_TO_ENA */
+#define WM9081_CLK_TO_ENA_WIDTH                      1  /* CLK_TO_ENA */
+#define WM9081_CLK_DSP_ENA                      0x0002  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_MASK                 0x0002  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_SHIFT                     1  /* CLK_DSP_ENA */
+#define WM9081_CLK_DSP_ENA_WIDTH                     1  /* CLK_DSP_ENA */
+#define WM9081_CLK_SYS_ENA                      0x0001  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_MASK                 0x0001  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_SHIFT                     0  /* CLK_SYS_ENA */
+#define WM9081_CLK_SYS_ENA_WIDTH                     1  /* CLK_SYS_ENA */
+
+/*
+ * R16 (0x10) - FLL Control 1
+ */
+#define WM9081_FLL_HOLD                         0x0008  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_MASK                    0x0008  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_SHIFT                        3  /* FLL_HOLD */
+#define WM9081_FLL_HOLD_WIDTH                        1  /* FLL_HOLD */
+#define WM9081_FLL_FRAC                         0x0004  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_MASK                    0x0004  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_SHIFT                        2  /* FLL_FRAC */
+#define WM9081_FLL_FRAC_WIDTH                        1  /* FLL_FRAC */
+#define WM9081_FLL_ENA                          0x0001  /* FLL_ENA */
+#define WM9081_FLL_ENA_MASK                     0x0001  /* FLL_ENA */
+#define WM9081_FLL_ENA_SHIFT                         0  /* FLL_ENA */
+#define WM9081_FLL_ENA_WIDTH                         1  /* FLL_ENA */
+
+/*
+ * R17 (0x11) - FLL Control 2
+ */
+#define WM9081_FLL_OUTDIV_MASK                  0x0700  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_OUTDIV_SHIFT                      8  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_OUTDIV_WIDTH                      3  /* FLL_OUTDIV - [10:8] */
+#define WM9081_FLL_CTRL_RATE_MASK               0x0070  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_CTRL_RATE_SHIFT                   4  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_CTRL_RATE_WIDTH                   3  /* FLL_CTRL_RATE - [6:4] */
+#define WM9081_FLL_FRATIO_MASK                  0x0007  /* FLL_FRATIO - [2:0] */
+#define WM9081_FLL_FRATIO_SHIFT                      0  /* FLL_FRATIO - [2:0] */
+#define WM9081_FLL_FRATIO_WIDTH                      3  /* FLL_FRATIO - [2:0] */
+
+/*
+ * R18 (0x12) - FLL Control 3
+ */
+#define WM9081_FLL_K_MASK                       0xFFFF  /* FLL_K - [15:0] */
+#define WM9081_FLL_K_SHIFT                           0  /* FLL_K - [15:0] */
+#define WM9081_FLL_K_WIDTH                          16  /* FLL_K - [15:0] */
+
+/*
+ * R19 (0x13) - FLL Control 4
+ */
+#define WM9081_FLL_N_MASK                       0x7FE0  /* FLL_N - [14:5] */
+#define WM9081_FLL_N_SHIFT                           5  /* FLL_N - [14:5] */
+#define WM9081_FLL_N_WIDTH                          10  /* FLL_N - [14:5] */
+#define WM9081_FLL_GAIN_MASK                    0x000F  /* FLL_GAIN - [3:0] */
+#define WM9081_FLL_GAIN_SHIFT                        0  /* FLL_GAIN - [3:0] */
+#define WM9081_FLL_GAIN_WIDTH                        4  /* FLL_GAIN - [3:0] */
+
+/*
+ * R20 (0x14) - FLL Control 5
+ */
+#define WM9081_FLL_CLK_REF_DIV_MASK             0x0018  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_REF_DIV_SHIFT                 3  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_REF_DIV_WIDTH                 2  /* FLL_CLK_REF_DIV - [4:3] */
+#define WM9081_FLL_CLK_SRC_MASK                 0x0003  /* FLL_CLK_SRC - [1:0] */
+#define WM9081_FLL_CLK_SRC_SHIFT                     0  /* FLL_CLK_SRC - [1:0] */
+#define WM9081_FLL_CLK_SRC_WIDTH                     2  /* FLL_CLK_SRC - [1:0] */
+
+/*
+ * R22 (0x16) - Audio Interface 1
+ */
+#define WM9081_AIFDAC_CHAN                      0x0040  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_MASK                 0x0040  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_SHIFT                     6  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_CHAN_WIDTH                     1  /* AIFDAC_CHAN */
+#define WM9081_AIFDAC_TDM_SLOT_MASK             0x0030  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_SLOT_SHIFT                 4  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_SLOT_WIDTH                 2  /* AIFDAC_TDM_SLOT - [5:4] */
+#define WM9081_AIFDAC_TDM_MODE_MASK             0x000C  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_AIFDAC_TDM_MODE_SHIFT                 2  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_AIFDAC_TDM_MODE_WIDTH                 2  /* AIFDAC_TDM_MODE - [3:2] */
+#define WM9081_DAC_COMP                         0x0002  /* DAC_COMP */
+#define WM9081_DAC_COMP_MASK                    0x0002  /* DAC_COMP */
+#define WM9081_DAC_COMP_SHIFT                        1  /* DAC_COMP */
+#define WM9081_DAC_COMP_WIDTH                        1  /* DAC_COMP */
+#define WM9081_DAC_COMPMODE                     0x0001  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_MASK                0x0001  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_SHIFT                    0  /* DAC_COMPMODE */
+#define WM9081_DAC_COMPMODE_WIDTH                    1  /* DAC_COMPMODE */
+
+/*
+ * R23 (0x17) - Audio Interface 2
+ */
+#define WM9081_AIF_TRIS                         0x0200  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_MASK                    0x0200  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_SHIFT                        9  /* AIF_TRIS */
+#define WM9081_AIF_TRIS_WIDTH                        1  /* AIF_TRIS */
+#define WM9081_DAC_DAT_INV                      0x0100  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_MASK                 0x0100  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_SHIFT                     8  /* DAC_DAT_INV */
+#define WM9081_DAC_DAT_INV_WIDTH                     1  /* DAC_DAT_INV */
+#define WM9081_AIF_BCLK_INV                     0x0080  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_MASK                0x0080  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_SHIFT                    7  /* AIF_BCLK_INV */
+#define WM9081_AIF_BCLK_INV_WIDTH                    1  /* AIF_BCLK_INV */
+#define WM9081_BCLK_DIR                         0x0040  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_MASK                    0x0040  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_SHIFT                        6  /* BCLK_DIR */
+#define WM9081_BCLK_DIR_WIDTH                        1  /* BCLK_DIR */
+#define WM9081_LRCLK_DIR                        0x0020  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_MASK                   0x0020  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_SHIFT                       5  /* LRCLK_DIR */
+#define WM9081_LRCLK_DIR_WIDTH                       1  /* LRCLK_DIR */
+#define WM9081_AIF_LRCLK_INV                    0x0010  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_MASK               0x0010  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_SHIFT                   4  /* AIF_LRCLK_INV */
+#define WM9081_AIF_LRCLK_INV_WIDTH                   1  /* AIF_LRCLK_INV */
+#define WM9081_AIF_WL_MASK                      0x000C  /* AIF_WL - [3:2] */
+#define WM9081_AIF_WL_SHIFT                          2  /* AIF_WL - [3:2] */
+#define WM9081_AIF_WL_WIDTH                          2  /* AIF_WL - [3:2] */
+#define WM9081_AIF_FMT_MASK                     0x0003  /* AIF_FMT - [1:0] */
+#define WM9081_AIF_FMT_SHIFT                         0  /* AIF_FMT - [1:0] */
+#define WM9081_AIF_FMT_WIDTH                         2  /* AIF_FMT - [1:0] */
+
+/*
+ * R24 (0x18) - Audio Interface 3
+ */
+#define WM9081_BCLK_DIV_MASK                    0x001F  /* BCLK_DIV - [4:0] */
+#define WM9081_BCLK_DIV_SHIFT                        0  /* BCLK_DIV - [4:0] */
+#define WM9081_BCLK_DIV_WIDTH                        5  /* BCLK_DIV - [4:0] */
+
+/*
+ * R25 (0x19) - Audio Interface 4
+ */
+#define WM9081_LRCLK_RATE_MASK                  0x07FF  /* LRCLK_RATE - [10:0] */
+#define WM9081_LRCLK_RATE_SHIFT                      0  /* LRCLK_RATE - [10:0] */
+#define WM9081_LRCLK_RATE_WIDTH                     11  /* LRCLK_RATE - [10:0] */
+
+/*
+ * R26 (0x1A) - Interrupt Status
+ */
+#define WM9081_WSEQ_BUSY_EINT                   0x0004  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_MASK              0x0004  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_SHIFT                  2  /* WSEQ_BUSY_EINT */
+#define WM9081_WSEQ_BUSY_EINT_WIDTH                  1  /* WSEQ_BUSY_EINT */
+#define WM9081_TSHUT_EINT                       0x0001  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_MASK                  0x0001  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_SHIFT                      0  /* TSHUT_EINT */
+#define WM9081_TSHUT_EINT_WIDTH                      1  /* TSHUT_EINT */
+
+/*
+ * R27 (0x1B) - Interrupt Status Mask
+ */
+#define WM9081_IM_WSEQ_BUSY_EINT                0x0004  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_MASK           0x0004  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_SHIFT               2  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_WSEQ_BUSY_EINT_WIDTH               1  /* IM_WSEQ_BUSY_EINT */
+#define WM9081_IM_TSHUT_EINT                    0x0001  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_MASK               0x0001  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_SHIFT                   0  /* IM_TSHUT_EINT */
+#define WM9081_IM_TSHUT_EINT_WIDTH                   1  /* IM_TSHUT_EINT */
+
+/*
+ * R28 (0x1C) - Interrupt Polarity
+ */
+#define WM9081_TSHUT_INV                        0x0001  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_MASK                   0x0001  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_SHIFT                       0  /* TSHUT_INV */
+#define WM9081_TSHUT_INV_WIDTH                       1  /* TSHUT_INV */
+
+/*
+ * R29 (0x1D) - Interrupt Control
+ */
+#define WM9081_IRQ_POL                          0x8000  /* IRQ_POL */
+#define WM9081_IRQ_POL_MASK                     0x8000  /* IRQ_POL */
+#define WM9081_IRQ_POL_SHIFT                        15  /* IRQ_POL */
+#define WM9081_IRQ_POL_WIDTH                         1  /* IRQ_POL */
+#define WM9081_IRQ_OP_CTRL                      0x0001  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_MASK                 0x0001  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_SHIFT                     0  /* IRQ_OP_CTRL */
+#define WM9081_IRQ_OP_CTRL_WIDTH                     1  /* IRQ_OP_CTRL */
+
+/*
+ * R30 (0x1E) - DAC Digital 1
+ */
+#define WM9081_DAC_VOL_MASK                     0x00FF  /* DAC_VOL - [7:0] */
+#define WM9081_DAC_VOL_SHIFT                         0  /* DAC_VOL - [7:0] */
+#define WM9081_DAC_VOL_WIDTH                         8  /* DAC_VOL - [7:0] */
+
+/*
+ * R31 (0x1F) - DAC Digital 2
+ */
+#define WM9081_DAC_MUTERATE                     0x0400  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_MASK                0x0400  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_SHIFT                   10  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTERATE_WIDTH                    1  /* DAC_MUTERATE */
+#define WM9081_DAC_MUTEMODE                     0x0200  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_MASK                0x0200  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_SHIFT                    9  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTEMODE_WIDTH                    1  /* DAC_MUTEMODE */
+#define WM9081_DAC_MUTE                         0x0008  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_MASK                    0x0008  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_SHIFT                        3  /* DAC_MUTE */
+#define WM9081_DAC_MUTE_WIDTH                        1  /* DAC_MUTE */
+#define WM9081_DEEMPH_MASK                      0x0006  /* DEEMPH - [2:1] */
+#define WM9081_DEEMPH_SHIFT                          1  /* DEEMPH - [2:1] */
+#define WM9081_DEEMPH_WIDTH                          2  /* DEEMPH - [2:1] */
+
+/*
+ * R32 (0x20) - DRC 1
+ */
+#define WM9081_DRC_ENA                          0x8000  /* DRC_ENA */
+#define WM9081_DRC_ENA_MASK                     0x8000  /* DRC_ENA */
+#define WM9081_DRC_ENA_SHIFT                        15  /* DRC_ENA */
+#define WM9081_DRC_ENA_WIDTH                         1  /* DRC_ENA */
+#define WM9081_DRC_STARTUP_GAIN_MASK            0x07C0  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_STARTUP_GAIN_SHIFT                6  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_STARTUP_GAIN_WIDTH                5  /* DRC_STARTUP_GAIN - [10:6] */
+#define WM9081_DRC_FF_DLY                       0x0020  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_MASK                  0x0020  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_SHIFT                      5  /* DRC_FF_DLY */
+#define WM9081_DRC_FF_DLY_WIDTH                      1  /* DRC_FF_DLY */
+#define WM9081_DRC_QR                           0x0004  /* DRC_QR */
+#define WM9081_DRC_QR_MASK                      0x0004  /* DRC_QR */
+#define WM9081_DRC_QR_SHIFT                          2  /* DRC_QR */
+#define WM9081_DRC_QR_WIDTH                          1  /* DRC_QR */
+#define WM9081_DRC_ANTICLIP                     0x0002  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_MASK                0x0002  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_SHIFT                    1  /* DRC_ANTICLIP */
+#define WM9081_DRC_ANTICLIP_WIDTH                    1  /* DRC_ANTICLIP */
+
+/*
+ * R33 (0x21) - DRC 2
+ */
+#define WM9081_DRC_ATK_MASK                     0xF000  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_ATK_SHIFT                        12  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_ATK_WIDTH                         4  /* DRC_ATK - [15:12] */
+#define WM9081_DRC_DCY_MASK                     0x0F00  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_DCY_SHIFT                         8  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_DCY_WIDTH                         4  /* DRC_DCY - [11:8] */
+#define WM9081_DRC_QR_THR_MASK                  0x00C0  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_THR_SHIFT                      6  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_THR_WIDTH                      2  /* DRC_QR_THR - [7:6] */
+#define WM9081_DRC_QR_DCY_MASK                  0x0030  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_QR_DCY_SHIFT                      4  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_QR_DCY_WIDTH                      2  /* DRC_QR_DCY - [5:4] */
+#define WM9081_DRC_MINGAIN_MASK                 0x000C  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MINGAIN_SHIFT                     2  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MINGAIN_WIDTH                     2  /* DRC_MINGAIN - [3:2] */
+#define WM9081_DRC_MAXGAIN_MASK                 0x0003  /* DRC_MAXGAIN - [1:0] */
+#define WM9081_DRC_MAXGAIN_SHIFT                     0  /* DRC_MAXGAIN - [1:0] */
+#define WM9081_DRC_MAXGAIN_WIDTH                     2  /* DRC_MAXGAIN - [1:0] */
+
+/*
+ * R34 (0x22) - DRC 3
+ */
+#define WM9081_DRC_HI_COMP_MASK                 0x0038  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_HI_COMP_SHIFT                     3  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_HI_COMP_WIDTH                     3  /* DRC_HI_COMP - [5:3] */
+#define WM9081_DRC_LO_COMP_MASK                 0x0007  /* DRC_LO_COMP - [2:0] */
+#define WM9081_DRC_LO_COMP_SHIFT                     0  /* DRC_LO_COMP - [2:0] */
+#define WM9081_DRC_LO_COMP_WIDTH                     3  /* DRC_LO_COMP - [2:0] */
+
+/*
+ * R35 (0x23) - DRC 4
+ */
+#define WM9081_DRC_KNEE_IP_MASK                 0x07E0  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_IP_SHIFT                     5  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_IP_WIDTH                     6  /* DRC_KNEE_IP - [10:5] */
+#define WM9081_DRC_KNEE_OP_MASK                 0x001F  /* DRC_KNEE_OP - [4:0] */
+#define WM9081_DRC_KNEE_OP_SHIFT                     0  /* DRC_KNEE_OP - [4:0] */
+#define WM9081_DRC_KNEE_OP_WIDTH                     5  /* DRC_KNEE_OP - [4:0] */
+
+/*
+ * R38 (0x26) - Write Sequencer 1
+ */
+#define WM9081_WSEQ_ENA                         0x8000  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_MASK                    0x8000  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_SHIFT                       15  /* WSEQ_ENA */
+#define WM9081_WSEQ_ENA_WIDTH                        1  /* WSEQ_ENA */
+#define WM9081_WSEQ_ABORT                       0x0200  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_MASK                  0x0200  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_SHIFT                      9  /* WSEQ_ABORT */
+#define WM9081_WSEQ_ABORT_WIDTH                      1  /* WSEQ_ABORT */
+#define WM9081_WSEQ_START                       0x0100  /* WSEQ_START */
+#define WM9081_WSEQ_START_MASK                  0x0100  /* WSEQ_START */
+#define WM9081_WSEQ_START_SHIFT                      8  /* WSEQ_START */
+#define WM9081_WSEQ_START_WIDTH                      1  /* WSEQ_START */
+#define WM9081_WSEQ_START_INDEX_MASK            0x007F  /* WSEQ_START_INDEX - [6:0] */
+#define WM9081_WSEQ_START_INDEX_SHIFT                0  /* WSEQ_START_INDEX - [6:0] */
+#define WM9081_WSEQ_START_INDEX_WIDTH                7  /* WSEQ_START_INDEX - [6:0] */
+
+/*
+ * R39 (0x27) - Write Sequencer 2
+ */
+#define WM9081_WSEQ_CURRENT_INDEX_MASK          0x07F0  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_CURRENT_INDEX_SHIFT              4  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_CURRENT_INDEX_WIDTH              7  /* WSEQ_CURRENT_INDEX - [10:4] */
+#define WM9081_WSEQ_BUSY                        0x0001  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_MASK                   0x0001  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_SHIFT                       0  /* WSEQ_BUSY */
+#define WM9081_WSEQ_BUSY_WIDTH                       1  /* WSEQ_BUSY */
+
+/*
+ * R40 (0x28) - MW Slave 1
+ */
+#define WM9081_SPI_CFG                          0x0020  /* SPI_CFG */
+#define WM9081_SPI_CFG_MASK                     0x0020  /* SPI_CFG */
+#define WM9081_SPI_CFG_SHIFT                         5  /* SPI_CFG */
+#define WM9081_SPI_CFG_WIDTH                         1  /* SPI_CFG */
+#define WM9081_SPI_4WIRE                        0x0010  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_MASK                   0x0010  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_SHIFT                       4  /* SPI_4WIRE */
+#define WM9081_SPI_4WIRE_WIDTH                       1  /* SPI_4WIRE */
+#define WM9081_ARA_ENA                          0x0008  /* ARA_ENA */
+#define WM9081_ARA_ENA_MASK                     0x0008  /* ARA_ENA */
+#define WM9081_ARA_ENA_SHIFT                         3  /* ARA_ENA */
+#define WM9081_ARA_ENA_WIDTH                         1  /* ARA_ENA */
+#define WM9081_AUTO_INC                         0x0002  /* AUTO_INC */
+#define WM9081_AUTO_INC_MASK                    0x0002  /* AUTO_INC */
+#define WM9081_AUTO_INC_SHIFT                        1  /* AUTO_INC */
+#define WM9081_AUTO_INC_WIDTH                        1  /* AUTO_INC */
+
+/*
+ * R42 (0x2A) - EQ 1
+ */
+#define WM9081_EQ_B1_GAIN_MASK                  0xF800  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B1_GAIN_SHIFT                     11  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B1_GAIN_WIDTH                      5  /* EQ_B1_GAIN - [15:11] */
+#define WM9081_EQ_B2_GAIN_MASK                  0x07C0  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B2_GAIN_SHIFT                      6  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B2_GAIN_WIDTH                      5  /* EQ_B2_GAIN - [10:6] */
+#define WM9081_EQ_B4_GAIN_MASK                  0x003E  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_B4_GAIN_SHIFT                      1  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_B4_GAIN_WIDTH                      5  /* EQ_B4_GAIN - [5:1] */
+#define WM9081_EQ_ENA                           0x0001  /* EQ_ENA */
+#define WM9081_EQ_ENA_MASK                      0x0001  /* EQ_ENA */
+#define WM9081_EQ_ENA_SHIFT                          0  /* EQ_ENA */
+#define WM9081_EQ_ENA_WIDTH                          1  /* EQ_ENA */
+
+/*
+ * R43 (0x2B) - EQ 2
+ */
+#define WM9081_EQ_B3_GAIN_MASK                  0xF800  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B3_GAIN_SHIFT                     11  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B3_GAIN_WIDTH                      5  /* EQ_B3_GAIN - [15:11] */
+#define WM9081_EQ_B5_GAIN_MASK                  0x07C0  /* EQ_B5_GAIN - [10:6] */
+#define WM9081_EQ_B5_GAIN_SHIFT                      6  /* EQ_B5_GAIN - [10:6] */
+#define WM9081_EQ_B5_GAIN_WIDTH                      5  /* EQ_B5_GAIN - [10:6] */
+
+/*
+ * R44 (0x2C) - EQ 3
+ */
+#define WM9081_EQ_B1_A_MASK                     0xFFFF  /* EQ_B1_A - [15:0] */
+#define WM9081_EQ_B1_A_SHIFT                         0  /* EQ_B1_A - [15:0] */
+#define WM9081_EQ_B1_A_WIDTH                        16  /* EQ_B1_A - [15:0] */
+
+/*
+ * R45 (0x2D) - EQ 4
+ */
+#define WM9081_EQ_B1_B_MASK                     0xFFFF  /* EQ_B1_B - [15:0] */
+#define WM9081_EQ_B1_B_SHIFT                         0  /* EQ_B1_B - [15:0] */
+#define WM9081_EQ_B1_B_WIDTH                        16  /* EQ_B1_B - [15:0] */
+
+/*
+ * R46 (0x2E) - EQ 5
+ */
+#define WM9081_EQ_B1_PG_MASK                    0xFFFF  /* EQ_B1_PG - [15:0] */
+#define WM9081_EQ_B1_PG_SHIFT                        0  /* EQ_B1_PG - [15:0] */
+#define WM9081_EQ_B1_PG_WIDTH                       16  /* EQ_B1_PG - [15:0] */
+
+/*
+ * R47 (0x2F) - EQ 6
+ */
+#define WM9081_EQ_B2_A_MASK                     0xFFFF  /* EQ_B2_A - [15:0] */
+#define WM9081_EQ_B2_A_SHIFT                         0  /* EQ_B2_A - [15:0] */
+#define WM9081_EQ_B2_A_WIDTH                        16  /* EQ_B2_A - [15:0] */
+
+/*
+ * R48 (0x30) - EQ 7
+ */
+#define WM9081_EQ_B2_B_MASK                     0xFFFF  /* EQ_B2_B - [15:0] */
+#define WM9081_EQ_B2_B_SHIFT                         0  /* EQ_B2_B - [15:0] */
+#define WM9081_EQ_B2_B_WIDTH                        16  /* EQ_B2_B - [15:0] */
+
+/*
+ * R49 (0x31) - EQ 8
+ */
+#define WM9081_EQ_B2_C_MASK                     0xFFFF  /* EQ_B2_C - [15:0] */
+#define WM9081_EQ_B2_C_SHIFT                         0  /* EQ_B2_C - [15:0] */
+#define WM9081_EQ_B2_C_WIDTH                        16  /* EQ_B2_C - [15:0] */
+
+/*
+ * R50 (0x32) - EQ 9
+ */
+#define WM9081_EQ_B2_PG_MASK                    0xFFFF  /* EQ_B2_PG - [15:0] */
+#define WM9081_EQ_B2_PG_SHIFT                        0  /* EQ_B2_PG - [15:0] */
+#define WM9081_EQ_B2_PG_WIDTH                       16  /* EQ_B2_PG - [15:0] */
+
+/*
+ * R51 (0x33) - EQ 10
+ */
+#define WM9081_EQ_B4_A_MASK                     0xFFFF  /* EQ_B4_A - [15:0] */
+#define WM9081_EQ_B4_A_SHIFT                         0  /* EQ_B4_A - [15:0] */
+#define WM9081_EQ_B4_A_WIDTH                        16  /* EQ_B4_A - [15:0] */
+
+/*
+ * R52 (0x34) - EQ 11
+ */
+#define WM9081_EQ_B4_B_MASK                     0xFFFF  /* EQ_B4_B - [15:0] */
+#define WM9081_EQ_B4_B_SHIFT                         0  /* EQ_B4_B - [15:0] */
+#define WM9081_EQ_B4_B_WIDTH                        16  /* EQ_B4_B - [15:0] */
+
+/*
+ * R53 (0x35) - EQ 12
+ */
+#define WM9081_EQ_B4_C_MASK                     0xFFFF  /* EQ_B4_C - [15:0] */
+#define WM9081_EQ_B4_C_SHIFT                         0  /* EQ_B4_C - [15:0] */
+#define WM9081_EQ_B4_C_WIDTH                        16  /* EQ_B4_C - [15:0] */
+
+/*
+ * R54 (0x36) - EQ 13
+ */
+#define WM9081_EQ_B4_PG_MASK                    0xFFFF  /* EQ_B4_PG - [15:0] */
+#define WM9081_EQ_B4_PG_SHIFT                        0  /* EQ_B4_PG - [15:0] */
+#define WM9081_EQ_B4_PG_WIDTH                       16  /* EQ_B4_PG - [15:0] */
+
+/*
+ * R55 (0x37) - EQ 14
+ */
+#define WM9081_EQ_B3_A_MASK                     0xFFFF  /* EQ_B3_A - [15:0] */
+#define WM9081_EQ_B3_A_SHIFT                         0  /* EQ_B3_A - [15:0] */
+#define WM9081_EQ_B3_A_WIDTH                        16  /* EQ_B3_A - [15:0] */
+
+/*
+ * R56 (0x38) - EQ 15
+ */
+#define WM9081_EQ_B3_B_MASK                     0xFFFF  /* EQ_B3_B - [15:0] */
+#define WM9081_EQ_B3_B_SHIFT                         0  /* EQ_B3_B - [15:0] */
+#define WM9081_EQ_B3_B_WIDTH                        16  /* EQ_B3_B - [15:0] */
+
+/*
+ * R57 (0x39) - EQ 16
+ */
+#define WM9081_EQ_B3_C_MASK                     0xFFFF  /* EQ_B3_C - [15:0] */
+#define WM9081_EQ_B3_C_SHIFT                         0  /* EQ_B3_C - [15:0] */
+#define WM9081_EQ_B3_C_WIDTH                        16  /* EQ_B3_C - [15:0] */
+
+/*
+ * R58 (0x3A) - EQ 17
+ */
+#define WM9081_EQ_B3_PG_MASK                    0xFFFF  /* EQ_B3_PG - [15:0] */
+#define WM9081_EQ_B3_PG_SHIFT                        0  /* EQ_B3_PG - [15:0] */
+#define WM9081_EQ_B3_PG_WIDTH                       16  /* EQ_B3_PG - [15:0] */
+
+/*
+ * R59 (0x3B) - EQ 18
+ */
+#define WM9081_EQ_B5_A_MASK                     0xFFFF  /* EQ_B5_A - [15:0] */
+#define WM9081_EQ_B5_A_SHIFT                         0  /* EQ_B5_A - [15:0] */
+#define WM9081_EQ_B5_A_WIDTH                        16  /* EQ_B5_A - [15:0] */
+
+/*
+ * R60 (0x3C) - EQ 19
+ */
+#define WM9081_EQ_B5_B_MASK                     0xFFFF  /* EQ_B5_B - [15:0] */
+#define WM9081_EQ_B5_B_SHIFT                         0  /* EQ_B5_B - [15:0] */
+#define WM9081_EQ_B5_B_WIDTH                        16  /* EQ_B5_B - [15:0] */
+
+/*
+ * R61 (0x3D) - EQ 20
+ */
+#define WM9081_EQ_B5_PG_MASK                    0xFFFF  /* EQ_B5_PG - [15:0] */
+#define WM9081_EQ_B5_PG_SHIFT                        0  /* EQ_B5_PG - [15:0] */
+#define WM9081_EQ_B5_PG_WIDTH                       16  /* EQ_B5_PG - [15:0] */
+
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From e31a16d6f64ef0e324c6f54d5112703c3f13a9c4 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 21 May 2009 21:47:03 +0800
Subject: wireless: move some utility functions from mac80211 to cfg80211

The patch moves some utility functions from mac80211 to cfg80211.
Because these functions are doing generic 802.11 operations so they
are not mac80211 specific. The moving allows some fullmac drivers
to be also benefit from these utility functions.

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Samuel Ortiz <samuel.ortiz@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ar9170/main.c     |   2 +-
 drivers/net/wireless/ath/ath5k/pcu.c       |   4 +-
 drivers/net/wireless/ath/ath9k/hw.c        |   8 +-
 drivers/net/wireless/b43/main.c            |   2 +-
 drivers/net/wireless/rt2x00/rt2x00crypto.c |   2 +-
 include/linux/ieee80211.h                  |   9 +
 include/net/cfg80211.h                     |  47 +++++
 include/net/mac80211.h                     |  28 ---
 net/mac80211/ieee80211_i.h                 |   2 -
 net/mac80211/mesh.h                        |   4 -
 net/mac80211/rx.c                          |  89 +--------
 net/mac80211/util.c                        |  73 -------
 net/mac80211/wme.c                         |  30 +--
 net/wireless/util.c                        | 305 +++++++++++++++++++++++++++++
 14 files changed, 375 insertions(+), 230 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 4ef1d2fc859..99df9ddae9c 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -1555,7 +1555,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 
 	switch (key->alg) {
 	case ALG_WEP:
-		if (key->keylen == LEN_WEP40)
+		if (key->keylen == WLAN_KEY_LEN_WEP40)
 			ktype = AR9170_ENC_ALG_WEP64;
 		else
 			ktype = AR9170_ENC_ALG_WEP128;
diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c
index 579aa0a96ab..ec35503f6a4 100644
--- a/drivers/net/wireless/ath/ath5k/pcu.c
+++ b/drivers/net/wireless/ath/ath5k/pcu.c
@@ -1038,9 +1038,9 @@ int ath5k_keycache_type(const struct ieee80211_key_conf *key)
 	case ALG_CCMP:
 		return AR5K_KEYTABLE_TYPE_CCM;
 	case ALG_WEP:
-		if (key->keylen == LEN_WEP40)
+		if (key->keylen == WLAN_KEY_LEN_WEP40)
 			return AR5K_KEYTABLE_TYPE_40;
-		else if (key->keylen == LEN_WEP104)
+		else if (key->keylen == WLAN_KEY_LEN_WEP104)
 			return AR5K_KEYTABLE_TYPE_104;
 		return -EINVAL;
 	default:
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 4acfab51491..1579c9407ed 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -2472,14 +2472,14 @@ bool ath9k_hw_set_keycache_entry(struct ath_hw *ah, u16 entry,
 		}
 		break;
 	case ATH9K_CIPHER_WEP:
-		if (k->kv_len < LEN_WEP40) {
+		if (k->kv_len < WLAN_KEY_LEN_WEP40) {
 			DPRINTF(ah->ah_sc, ATH_DBG_ANY,
 				"WEP key length %u too small\n", k->kv_len);
 			return false;
 		}
-		if (k->kv_len <= LEN_WEP40)
+		if (k->kv_len <= WLAN_KEY_LEN_WEP40)
 			keyType = AR_KEYTABLE_TYPE_40;
-		else if (k->kv_len <= LEN_WEP104)
+		else if (k->kv_len <= WLAN_KEY_LEN_WEP104)
 			keyType = AR_KEYTABLE_TYPE_104;
 		else
 			keyType = AR_KEYTABLE_TYPE_128;
@@ -2498,7 +2498,7 @@ bool ath9k_hw_set_keycache_entry(struct ath_hw *ah, u16 entry,
 	key2 = get_unaligned_le32(k->kv_val + 6);
 	key3 = get_unaligned_le16(k->kv_val + 10);
 	key4 = get_unaligned_le32(k->kv_val + 12);
-	if (k->kv_len <= LEN_WEP104)
+	if (k->kv_len <= WLAN_KEY_LEN_WEP104)
 		key4 &= 0xff;
 
 	/*
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index ec8516eadc4..cb4a8712946 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3637,7 +3637,7 @@ static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	err = -EINVAL;
 	switch (key->alg) {
 	case ALG_WEP:
-		if (key->keylen == LEN_WEP40)
+		if (key->keylen == WLAN_KEY_LEN_WEP40)
 			algorithm = B43_SEC_ALGO_WEP40;
 		else
 			algorithm = B43_SEC_ALGO_WEP104;
diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c
index 57ab42cfed3..bc4e81e2184 100644
--- a/drivers/net/wireless/rt2x00/rt2x00crypto.c
+++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c
@@ -33,7 +33,7 @@ enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *key)
 {
 	switch (key->alg) {
 	case ALG_WEP:
-		if (key->keylen == LEN_WEP40)
+		if (key->keylen == WLAN_KEY_LEN_WEP40)
 			return CIPHER_WEP64;
 		else
 			return CIPHER_WEP128;
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 05c29c01174..34de8b21f6d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -493,6 +493,7 @@ struct ieee80211s_hdr {
 /* Mesh flags */
 #define MESH_FLAGS_AE_A4 	0x1
 #define MESH_FLAGS_AE_A5_A6	0x2
+#define MESH_FLAGS_AE		0x3
 #define MESH_FLAGS_PS_DEEP	0x4
 
 /**
@@ -1085,6 +1086,14 @@ enum ieee80211_spectrum_mgmt_actioncode {
 	WLAN_ACTION_SPCT_CHL_SWITCH = 4,
 };
 
+/* Security key length */
+enum ieee80211_key_len {
+	WLAN_KEY_LEN_WEP40 = 5,
+	WLAN_KEY_LEN_WEP104 = 13,
+	WLAN_KEY_LEN_CCMP = 16,
+	WLAN_KEY_LEN_TKIP = 32,
+};
+
 /*
  * IEEE 802.11-2007 7.3.2.9 Country information element
  *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 389f1d20adf..f20da7d63b1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1244,6 +1244,53 @@ extern int ieee80211_radiotap_iterator_init(
 extern int ieee80211_radiotap_iterator_next(
    struct ieee80211_radiotap_iterator *iterator);
 
+extern const unsigned char rfc1042_header[6];
+extern const unsigned char bridge_tunnel_header[6];
+
+/**
+ * ieee80211_get_hdrlen_from_skb - get header length from data
+ *
+ * Given an skb with a raw 802.11 header at the data pointer this function
+ * returns the 802.11 header length in bytes (not including encryption
+ * headers). If the data in the sk_buff is too short to contain a valid 802.11
+ * header the function returns 0.
+ *
+ * @skb: the frame
+ */
+unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
+
+/**
+ * ieee80211_hdrlen - get header length in bytes from frame control
+ * @fc: frame control field in little-endian format
+ */
+unsigned int ieee80211_hdrlen(__le16 fc);
+
+/**
+ * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
+ * @skb: the 802.11 data frame
+ * @addr: the device MAC address
+ * @iftype: the virtual interface type
+ */
+int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr,
+			   enum nl80211_iftype iftype);
+
+/**
+ * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11
+ * @skb: the 802.3 frame
+ * @addr: the device MAC address
+ * @iftype: the virtual interface type
+ * @bssid: the network bssid (used only for iftype STATION and ADHOC)
+ * @qos: build 802.11 QoS data frame
+ */
+int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr,
+			     enum nl80211_iftype iftype, u8 *bssid, bool qos);
+
+/**
+ * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame
+ * @skb: the data frame
+ */
+unsigned int cfg80211_classify8021d(struct sk_buff *skb);
+
 /*
  * Regulatory helper functions for wiphys
  */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2d0610581ef..d72346ff324 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -671,16 +671,6 @@ enum ieee80211_key_alg {
 	ALG_AES_CMAC,
 };
 
-/**
- * enum ieee80211_key_len - key length
- * @LEN_WEP40: WEP 5-byte long key
- * @LEN_WEP104: WEP 13-byte long key
- */
-enum ieee80211_key_len {
-	LEN_WEP40 = 5,
-	LEN_WEP104 = 13,
-};
-
 /**
  * enum ieee80211_key_flags - key flags
  *
@@ -1811,24 +1801,6 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
 struct sk_buff *
 ieee80211_get_buffered_bc(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 
-/**
- * ieee80211_get_hdrlen_from_skb - get header length from data
- *
- * Given an skb with a raw 802.11 header at the data pointer this function
- * returns the 802.11 header length in bytes (not including encryption
- * headers). If the data in the sk_buff is too short to contain a valid 802.11
- * header the function returns 0.
- *
- * @skb: the frame
- */
-unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb);
-
-/**
- * ieee80211_hdrlen - get header length in bytes from frame control
- * @fc: frame control field in little-endian format
- */
-unsigned int ieee80211_hdrlen(__le16 fc);
-
 /**
  * ieee80211_get_tkip_key - get a TKIP rc4 for skb
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8db8d16d206..c088c46704a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1085,8 +1085,6 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
 
 /* utility functions/constants */
 extern void *mac80211_wiphy_privid; /* for wiphy privid */
-extern const unsigned char rfc1042_header[6];
-extern const unsigned char bridge_tunnel_header[6];
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 			enum nl80211_iftype type);
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 832bb503ca9..c7d72819cdd 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -191,12 +191,8 @@ struct mesh_rmc {
 #define PLINK_CATEGORY		30
 #define MESH_PATH_SEL_CATEGORY	32
 
-/* Mesh Header Flags */
-#define IEEE80211S_FLAGS_AE	0x3
-
 /* Public interfaces */
 /* Various */
-int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
 int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
 		struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f3a041cc5dc..6a9b8e63a6b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1247,93 +1247,12 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
 }
 
 static int
-ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
+__ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
 {
 	struct net_device *dev = rx->dev;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data;
-	u16 hdrlen, ethertype;
-	u8 *payload;
-	u8 dst[ETH_ALEN];
-	u8 src[ETH_ALEN] __aligned(2);
-	struct sk_buff *skb = rx->skb;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
-		return -1;
-
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-
-	/* convert IEEE 802.11 header + possible LLC headers into Ethernet
-	 * header
-	 * IEEE 802.11 address fields:
-	 * ToDS FromDS Addr1 Addr2 Addr3 Addr4
-	 *   0     0   DA    SA    BSSID n/a
-	 *   0     1   DA    BSSID SA    n/a
-	 *   1     0   BSSID SA    DA    n/a
-	 *   1     1   RA    TA    DA    SA
-	 */
-	memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN);
-	memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN);
-
-	switch (hdr->frame_control &
-		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
-	case cpu_to_le16(IEEE80211_FCTL_TODS):
-		if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP &&
-			     sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
-			return -1;
-		break;
-	case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
-		if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS &&
-			     sdata->vif.type != NL80211_IFTYPE_MESH_POINT))
-			return -1;
-		if (ieee80211_vif_is_mesh(&sdata->vif)) {
-			struct ieee80211s_hdr *meshdr = (struct ieee80211s_hdr *)
-				(skb->data + hdrlen);
-			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
-			if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
-				memcpy(dst, meshdr->eaddr1, ETH_ALEN);
-				memcpy(src, meshdr->eaddr2, ETH_ALEN);
-			}
-		}
-		break;
-	case cpu_to_le16(IEEE80211_FCTL_FROMDS):
-		if (sdata->vif.type != NL80211_IFTYPE_STATION ||
-		    (is_multicast_ether_addr(dst) &&
-		     !compare_ether_addr(src, dev->dev_addr)))
-			return -1;
-		break;
-	case cpu_to_le16(0):
-		if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
-			return -1;
-		break;
-	}
-
-	if (unlikely(skb->len - hdrlen < 8))
-		return -1;
-
-	payload = skb->data + hdrlen;
-	ethertype = (payload[6] << 8) | payload[7];
-
-	if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
-		    ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
-		   compare_ether_addr(payload, bridge_tunnel_header) == 0)) {
-		/* remove RFC1042 or Bridge-Tunnel encapsulation and
-		 * replace EtherType */
-		skb_pull(skb, hdrlen + 6);
-		memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
-		memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
-	} else {
-		struct ethhdr *ehdr;
-		__be16 len;
-
-		skb_pull(skb, hdrlen);
-		len = htons(skb->len);
-		ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
-		memcpy(ehdr->h_dest, dst, ETH_ALEN);
-		memcpy(ehdr->h_source, src, ETH_ALEN);
-		ehdr->h_proto = len;
-	}
-	return 0;
+	return ieee80211_data_to_8023(rx->skb, dev->dev_addr, sdata->vif.type);
 }
 
 /*
@@ -1472,7 +1391,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_AMSDU))
 		return RX_CONTINUE;
 
-	err = ieee80211_data_to_8023(rx);
+	err = __ieee80211_data_to_8023(rx);
 	if (unlikely(err))
 		return RX_DROP_UNUSABLE;
 
@@ -1658,7 +1577,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return RX_DROP_MONITOR;
 
-	err = ieee80211_data_to_8023(rx);
+	err = __ieee80211_data_to_8023(rx);
 	if (unlikely(err))
 		return RX_DROP_UNUSABLE;
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ffb6e88f2ec..949d857debd 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -35,15 +35,6 @@
 /* privid for wiphys to determine whether they belong to us or not */
 void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
 
-/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
-/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
-const unsigned char rfc1042_header[] __aligned(2) =
-	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
-
-/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
-const unsigned char bridge_tunnel_header[] __aligned(2) =
-	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
-
 struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
 {
 	struct ieee80211_local *local;
@@ -103,70 +94,6 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 	return NULL;
 }
 
-unsigned int ieee80211_hdrlen(__le16 fc)
-{
-	unsigned int hdrlen = 24;
-
-	if (ieee80211_is_data(fc)) {
-		if (ieee80211_has_a4(fc))
-			hdrlen = 30;
-		if (ieee80211_is_data_qos(fc))
-			hdrlen += IEEE80211_QOS_CTL_LEN;
-		goto out;
-	}
-
-	if (ieee80211_is_ctl(fc)) {
-		/*
-		 * ACK and CTS are 10 bytes, all others 16. To see how
-		 * to get this condition consider
-		 *   subtype mask:   0b0000000011110000 (0x00F0)
-		 *   ACK subtype:    0b0000000011010000 (0x00D0)
-		 *   CTS subtype:    0b0000000011000000 (0x00C0)
-		 *   bits that matter:         ^^^      (0x00E0)
-		 *   value of those: 0b0000000011000000 (0x00C0)
-		 */
-		if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0))
-			hdrlen = 10;
-		else
-			hdrlen = 16;
-	}
-out:
-	return hdrlen;
-}
-EXPORT_SYMBOL(ieee80211_hdrlen);
-
-unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
-{
-	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)skb->data;
-	unsigned int hdrlen;
-
-	if (unlikely(skb->len < 10))
-		return 0;
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-	if (unlikely(hdrlen > skb->len))
-		return 0;
-	return hdrlen;
-}
-EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
-
-int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
-{
-	int ae = meshhdr->flags & IEEE80211S_FLAGS_AE;
-	/* 7.1.3.5a.2 */
-	switch (ae) {
-	case 0:
-		return 6;
-	case 1:
-		return 12;
-	case 2:
-		return 18;
-	case 3:
-		return 24;
-	default:
-		return 6;
-	}
-}
-
 void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb = tx->skb;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 45b74f38b86..694343b9102 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -23,34 +23,6 @@
  */
 const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 };
 
-static const char llc_ip_hdr[8] = {0xAA, 0xAA, 0x3, 0, 0, 0, 0x08, 0};
-
-/* Given a data frame determine the 802.1p/1d tag to use.  */
-static unsigned int classify_1d(struct sk_buff *skb)
-{
-	unsigned int dscp;
-
-	/* skb->priority values from 256->263 are magic values to
-	 * directly indicate a specific 802.1d priority.  This is used
-	 * to allow 802.1d priority to be passed directly in from VLAN
-	 * tags, etc.
-	 */
-	if (skb->priority >= 256 && skb->priority <= 263)
-		return skb->priority - 256;
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		dscp = ip_hdr(skb)->tos & 0xfc;
-		break;
-
-	default:
-		return 0;
-	}
-
-	return dscp >> 5;
-}
-
-
 static int wme_downgrade_ac(struct sk_buff *skb)
 {
 	switch (skb->priority) {
@@ -94,7 +66,7 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb)
 
 	/* use the data classifier to determine what 802.1d tag the
 	 * data frame has */
-	skb->priority = classify_1d(skb);
+	skb->priority = cfg80211_classify8021d(skb);
 
 	/* in case we are a client verify acm is not set for this ac */
 	while (unlikely(local->wmm_acm & BIT(skb->priority))) {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b94c8604ad7..d072bff463a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -4,7 +4,9 @@
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  */
 #include <linux/bitops.h>
+#include <linux/etherdevice.h>
 #include <net/cfg80211.h>
+#include <net/ip.h>
 #include "core.h"
 
 struct ieee80211_rate *
@@ -198,3 +200,306 @@ int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
 
 	return 0;
 }
+
+/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
+/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
+const unsigned char rfc1042_header[] __aligned(2) =
+	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
+EXPORT_SYMBOL(rfc1042_header);
+
+/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
+const unsigned char bridge_tunnel_header[] __aligned(2) =
+	{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
+EXPORT_SYMBOL(bridge_tunnel_header);
+
+unsigned int ieee80211_hdrlen(__le16 fc)
+{
+	unsigned int hdrlen = 24;
+
+	if (ieee80211_is_data(fc)) {
+		if (ieee80211_has_a4(fc))
+			hdrlen = 30;
+		if (ieee80211_is_data_qos(fc))
+			hdrlen += IEEE80211_QOS_CTL_LEN;
+		goto out;
+	}
+
+	if (ieee80211_is_ctl(fc)) {
+		/*
+		 * ACK and CTS are 10 bytes, all others 16. To see how
+		 * to get this condition consider
+		 *   subtype mask:   0b0000000011110000 (0x00F0)
+		 *   ACK subtype:    0b0000000011010000 (0x00D0)
+		 *   CTS subtype:    0b0000000011000000 (0x00C0)
+		 *   bits that matter:         ^^^      (0x00E0)
+		 *   value of those: 0b0000000011000000 (0x00C0)
+		 */
+		if ((fc & cpu_to_le16(0x00E0)) == cpu_to_le16(0x00C0))
+			hdrlen = 10;
+		else
+			hdrlen = 16;
+	}
+out:
+	return hdrlen;
+}
+EXPORT_SYMBOL(ieee80211_hdrlen);
+
+unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
+{
+	const struct ieee80211_hdr *hdr =
+			(const struct ieee80211_hdr *)skb->data;
+	unsigned int hdrlen;
+
+	if (unlikely(skb->len < 10))
+		return 0;
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	if (unlikely(hdrlen > skb->len))
+		return 0;
+	return hdrlen;
+}
+EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
+
+int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
+{
+	int ae = meshhdr->flags & MESH_FLAGS_AE;
+	/* 7.1.3.5a.2 */
+	switch (ae) {
+	case 0:
+		return 6;
+	case 1:
+		return 12;
+	case 2:
+		return 18;
+	case 3:
+		return 24;
+	default:
+		return 6;
+	}
+}
+
+int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr,
+			   enum nl80211_iftype iftype)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	u16 hdrlen, ethertype;
+	u8 *payload;
+	u8 dst[ETH_ALEN];
+	u8 src[ETH_ALEN] __aligned(2);
+
+	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
+		return -1;
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+
+	/* convert IEEE 802.11 header + possible LLC headers into Ethernet
+	 * header
+	 * IEEE 802.11 address fields:
+	 * ToDS FromDS Addr1 Addr2 Addr3 Addr4
+	 *   0     0   DA    SA    BSSID n/a
+	 *   0     1   DA    BSSID SA    n/a
+	 *   1     0   BSSID SA    DA    n/a
+	 *   1     1   RA    TA    DA    SA
+	 */
+	memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN);
+	memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN);
+
+	switch (hdr->frame_control &
+		cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
+	case cpu_to_le16(IEEE80211_FCTL_TODS):
+		if (unlikely(iftype != NL80211_IFTYPE_AP &&
+			     iftype != NL80211_IFTYPE_AP_VLAN))
+			return -1;
+		break;
+	case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
+		if (unlikely(iftype != NL80211_IFTYPE_WDS &&
+			     iftype != NL80211_IFTYPE_MESH_POINT))
+			return -1;
+		if (iftype == NL80211_IFTYPE_MESH_POINT) {
+			struct ieee80211s_hdr *meshdr =
+				(struct ieee80211s_hdr *) (skb->data + hdrlen);
+			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
+			if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
+				memcpy(dst, meshdr->eaddr1, ETH_ALEN);
+				memcpy(src, meshdr->eaddr2, ETH_ALEN);
+			}
+		}
+		break;
+	case cpu_to_le16(IEEE80211_FCTL_FROMDS):
+		if (iftype != NL80211_IFTYPE_STATION ||
+		    (is_multicast_ether_addr(dst) &&
+		     !compare_ether_addr(src, addr)))
+			return -1;
+		break;
+	case cpu_to_le16(0):
+		if (iftype != NL80211_IFTYPE_ADHOC)
+			return -1;
+		break;
+	}
+
+	if (unlikely(skb->len - hdrlen < 8))
+		return -1;
+
+	payload = skb->data + hdrlen;
+	ethertype = (payload[6] << 8) | payload[7];
+
+	if (likely((compare_ether_addr(payload, rfc1042_header) == 0 &&
+		    ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+		   compare_ether_addr(payload, bridge_tunnel_header) == 0)) {
+		/* remove RFC1042 or Bridge-Tunnel encapsulation and
+		 * replace EtherType */
+		skb_pull(skb, hdrlen + 6);
+		memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
+		memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
+	} else {
+		struct ethhdr *ehdr;
+		__be16 len;
+
+		skb_pull(skb, hdrlen);
+		len = htons(skb->len);
+		ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
+		memcpy(ehdr->h_dest, dst, ETH_ALEN);
+		memcpy(ehdr->h_source, src, ETH_ALEN);
+		ehdr->h_proto = len;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ieee80211_data_to_8023);
+
+int ieee80211_data_from_8023(struct sk_buff *skb, u8 *addr,
+			     enum nl80211_iftype iftype, u8 *bssid, bool qos)
+{
+	struct ieee80211_hdr hdr;
+	u16 hdrlen, ethertype;
+	__le16 fc;
+	const u8 *encaps_data;
+	int encaps_len, skip_header_bytes;
+	int nh_pos, h_pos;
+	int head_need;
+
+	if (unlikely(skb->len < ETH_HLEN))
+		return -EINVAL;
+
+	nh_pos = skb_network_header(skb) - skb->data;
+	h_pos = skb_transport_header(skb) - skb->data;
+
+	/* convert Ethernet header to proper 802.11 header (based on
+	 * operation mode) */
+	ethertype = (skb->data[12] << 8) | skb->data[13];
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
+
+	switch (iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
+		/* DA BSSID SA */
+		memcpy(hdr.addr1, skb->data, ETH_ALEN);
+		memcpy(hdr.addr2, addr, ETH_ALEN);
+		memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
+		hdrlen = 24;
+		break;
+	case NL80211_IFTYPE_STATION:
+		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
+		/* BSSID SA DA */
+		memcpy(hdr.addr1, bssid, ETH_ALEN);
+		memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+		memcpy(hdr.addr3, skb->data, ETH_ALEN);
+		hdrlen = 24;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		/* DA SA BSSID */
+		memcpy(hdr.addr1, skb->data, ETH_ALEN);
+		memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+		memcpy(hdr.addr3, bssid, ETH_ALEN);
+		hdrlen = 24;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (qos) {
+		fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
+		hdrlen += 2;
+	}
+
+	hdr.frame_control = fc;
+	hdr.duration_id = 0;
+	hdr.seq_ctrl = 0;
+
+	skip_header_bytes = ETH_HLEN;
+	if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
+		encaps_data = bridge_tunnel_header;
+		encaps_len = sizeof(bridge_tunnel_header);
+		skip_header_bytes -= 2;
+	} else if (ethertype > 0x600) {
+		encaps_data = rfc1042_header;
+		encaps_len = sizeof(rfc1042_header);
+		skip_header_bytes -= 2;
+	} else {
+		encaps_data = NULL;
+		encaps_len = 0;
+	}
+
+	skb_pull(skb, skip_header_bytes);
+	nh_pos -= skip_header_bytes;
+	h_pos -= skip_header_bytes;
+
+	head_need = hdrlen + encaps_len - skb_headroom(skb);
+
+	if (head_need > 0 || skb_cloned(skb)) {
+		head_need = max(head_need, 0);
+		if (head_need)
+			skb_orphan(skb);
+
+		if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) {
+			printk(KERN_ERR "failed to reallocate Tx buffer\n");
+			return -ENOMEM;
+		}
+		skb->truesize += head_need;
+	}
+
+	if (encaps_data) {
+		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
+		nh_pos += encaps_len;
+		h_pos += encaps_len;
+	}
+
+	memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
+
+	nh_pos += hdrlen;
+	h_pos += hdrlen;
+
+	/* Update skb pointers to various headers since this modified frame
+	 * is going to go through Linux networking code that may potentially
+	 * need things like pointer to IP header. */
+	skb_set_mac_header(skb, 0);
+	skb_set_network_header(skb, nh_pos);
+	skb_set_transport_header(skb, h_pos);
+
+	return 0;
+}
+EXPORT_SYMBOL(ieee80211_data_from_8023);
+
+/* Given a data frame determine the 802.1p/1d tag to use. */
+unsigned int cfg80211_classify8021d(struct sk_buff *skb)
+{
+	unsigned int dscp;
+
+	/* skb->priority values from 256->263 are magic values to
+	 * directly indicate a specific 802.1d priority.  This is used
+	 * to allow 802.1d priority to be passed directly in from VLAN
+	 * tags, etc.
+	 */
+	if (skb->priority >= 256 && skb->priority <= 263)
+		return skb->priority - 256;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		dscp = ip_hdr(skb)->tos & 0xfc;
+		break;
+	default:
+		return 0;
+	}
+
+	return dscp >> 5;
+}
+EXPORT_SYMBOL(cfg80211_classify8021d);
-- 
cgit v1.2.3-70-g09d2


From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:49 -0400
Subject: block: Do away with the notion of hardsect_size

Until now we have had a 1:1 mapping between storage device physical
block size and the logical block sized used when addressing the device.
With SATA 4KB drives coming out that will no longer be the case.  The
sector size will be 4KB but the logical block size will remain
512-bytes.  Hence we need to distinguish between the physical block size
and the logical ditto.

This patch renames hardsect_size to logical_block_size.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/powerpc/sysdev/axonram.c       |  2 +-
 block/blk-integrity.c               |  2 +-
 block/blk-settings.c                | 21 ++++++++++-----------
 block/blk-sysfs.c                   | 12 +++++++++---
 block/compat_ioctl.c                |  2 +-
 block/ioctl.c                       |  2 +-
 drivers/block/cciss.c               |  6 +++---
 drivers/block/cpqarray.c            |  4 ++--
 drivers/block/hd.c                  |  2 +-
 drivers/block/mg_disk.c             |  2 +-
 drivers/block/pktcdvd.c             |  2 +-
 drivers/block/ps3disk.c             |  2 +-
 drivers/block/ub.c                  |  6 +++---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/block/xen-blkfront.c        |  2 +-
 drivers/block/xsysace.c             |  2 +-
 drivers/cdrom/gdrom.c               |  2 +-
 drivers/cdrom/viocd.c               |  4 ++--
 drivers/char/raw.c                  |  2 +-
 drivers/ide/ide-cd.c                | 12 ++++++------
 drivers/md/bitmap.c                 |  4 ++--
 drivers/md/dm-exception-store.c     |  2 +-
 drivers/md/dm-log.c                 |  3 ++-
 drivers/md/dm-snap-persistent.c     |  2 +-
 drivers/md/dm-table.c               | 12 +++++++-----
 drivers/md/md.c                     |  2 +-
 drivers/memstick/core/mspro_block.c |  2 +-
 drivers/message/i2o/i2o_block.c     |  5 +++--
 drivers/mmc/card/block.c            |  2 +-
 drivers/mtd/mtd_blkdevs.c           |  2 +-
 drivers/s390/block/dasd.c           |  2 +-
 drivers/s390/block/dcssblk.c        |  2 +-
 drivers/s390/block/xpram.c          |  2 +-
 drivers/s390/char/tape_block.c      |  2 +-
 drivers/scsi/sd.c                   |  2 +-
 drivers/scsi/sr.c                   |  2 +-
 fs/bio.c                            |  3 ++-
 fs/block_dev.c                      |  6 +++---
 fs/buffer.c                         |  6 +++---
 fs/direct-io.c                      |  2 +-
 fs/ext3/super.c                     |  4 ++--
 fs/ext4/super.c                     |  2 +-
 fs/gfs2/ops_fstype.c                |  4 ++--
 fs/gfs2/rgrp.c                      |  2 +-
 fs/nilfs2/the_nilfs.c               |  2 +-
 fs/ntfs/super.c                     |  6 +++---
 fs/ocfs2/cluster/heartbeat.c        |  2 +-
 fs/ocfs2/super.c                    |  2 +-
 fs/partitions/ibm.c                 |  2 +-
 fs/partitions/msdos.c               |  4 ++--
 fs/udf/super.c                      |  2 +-
 fs/xfs/linux-2.6/xfs_buf.c          |  2 +-
 include/linux/blkdev.h              | 14 +++++++-------
 include/linux/device-mapper.h       |  2 +-
 54 files changed, 108 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 9e105cbc5e5..a4779912a5c 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -250,7 +250,7 @@ axon_ram_probe(struct of_device *device, const struct of_device_id *device_id)
 
 	set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
 	blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
-	blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
+	blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
 	add_disk(bank->disk);
 
 	bank->irq_id = irq_of_parse_and_map(device->node, 0);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 91fa8e06b6a..73e28d35568 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -340,7 +340,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
 		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
-		bi->sector_size = disk->queue->hardsect_size;
+		bi->sector_size = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
 		bi = disk->integrity;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 57af728d94b..15c3164537b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -134,7 +134,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
 	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
-	blk_queue_hardsect_size(q, 512);
+	blk_queue_logical_block_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
@@ -288,21 +288,20 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 
 /**
- * blk_queue_hardsect_size - set hardware sector size for the queue
+ * blk_queue_logical_block_size - set logical block size for the queue
  * @q:  the request queue for the device
- * @size:  the hardware sector size, in bytes
+ * @size:  the logical block size, in bytes
  *
  * Description:
- *   This should typically be set to the lowest possible sector size
- *   that the hardware can operate on (possible without reverting to
- *   even internal read-modify-write operations). Usually the default
- *   of 512 covers most hardware.
+ *   This should be set to the lowest possible block size that the
+ *   storage device can address.  The default of 512 covers most
+ *   hardware.
  **/
-void blk_queue_hardsect_size(struct request_queue *q, unsigned short size)
+void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
-	q->hardsect_size = size;
+	q->logical_block_size = size;
 }
-EXPORT_SYMBOL(blk_queue_hardsect_size);
+EXPORT_SYMBOL(blk_queue_logical_block_size);
 
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
@@ -324,7 +323,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 	t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
 	t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
 	t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
-	t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
+	t->logical_block_size = max(t->logical_block_size, b->logical_block_size);
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba3379..13d38b7e4d0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -100,9 +100,9 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_sectors_kb, (page));
 }
 
-static ssize_t queue_hw_sector_size_show(struct request_queue *q, char *page)
+static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->hardsect_size, page);
+	return queue_var_show(queue_logical_block_size(q), page);
 }
 
 static ssize_t
@@ -249,7 +249,12 @@ static struct queue_sysfs_entry queue_iosched_entry = {
 
 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
 	.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
-	.show = queue_hw_sector_size_show,
+	.show = queue_logical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_logical_block_size_entry = {
+	.attr = {.name = "logical_block_size", .mode = S_IRUGO },
+	.show = queue_logical_block_size_show,
 };
 
 static struct queue_sysfs_entry queue_nonrot_entry = {
@@ -283,6 +288,7 @@ static struct attribute *default_attrs[] = {
 	&queue_max_sectors_entry.attr,
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
+	&queue_logical_block_size_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615dea46..9eaa1940273 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -763,7 +763,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
 		return compat_put_int(arg, block_size(bdev));
 	case BLKSSZGET: /* get block device hardware sector size */
-		return compat_put_int(arg, bdev_hardsect_size(bdev));
+		return compat_put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
 					 bdev_get_queue(bdev)->max_sectors);
diff --git a/block/ioctl.c b/block/ioctl.c
index ad474d4bbcc..7aa97f65da8 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -311,7 +311,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
 	case BLKSSZGET: /* get block device hardware sector size */
-		return put_int(arg, bdev_hardsect_size(bdev));
+		return put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
 	case BLKRASET:
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e714e7cce6f..94474f5f8bc 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1389,8 +1389,8 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 
 	disk->queue->queuedata = h;
 
-	blk_queue_hardsect_size(disk->queue,
-				h->drv[drv_index].block_size);
+	blk_queue_logical_block_size(disk->queue,
+				     h->drv[drv_index].block_size);
 
 	/* Make sure all queue data is written out before */
 	/* setting h->drv[drv_index].queue, as setting this */
@@ -2298,7 +2298,7 @@ static int cciss_revalidate(struct gendisk *disk)
 	cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
 			       inq_buff, drv);
 
-	blk_queue_hardsect_size(drv->queue, drv->block_size);
+	blk_queue_logical_block_size(drv->queue, drv->block_size);
 	set_capacity(disk, drv->nr_blocks);
 
 	kfree(inq_buff);
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index a02dcfc00f1..44fa2018f6b 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -474,7 +474,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
 		disk->fops = &ida_fops;
 		if (j && !drv->nr_blks)
 			continue;
-		blk_queue_hardsect_size(hba[i]->queue, drv->blk_size);
+		blk_queue_logical_block_size(hba[i]->queue, drv->blk_size);
 		set_capacity(disk, drv->nr_blks);
 		disk->queue = hba[i]->queue;
 		disk->private_data = drv;
@@ -1546,7 +1546,7 @@ static int revalidate_allvol(ctlr_info_t *host)
 		drv_info_t *drv = &host->drv[i];
 		if (i && !drv->nr_blks)
 			continue;
-		blk_queue_hardsect_size(host->queue, drv->blk_size);
+		blk_queue_logical_block_size(host->queue, drv->blk_size);
 		set_capacity(disk, drv->nr_blks);
 		disk->queue = host->queue;
 		disk->private_data = drv;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 961de56d00a..f65b3f369eb 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -724,7 +724,7 @@ static int __init hd_init(void)
 	blk_queue_max_sectors(hd_queue, 255);
 	init_timer(&device_timer);
 	device_timer.function = hd_times_out;
-	blk_queue_hardsect_size(hd_queue, 512);
+	blk_queue_logical_block_size(hd_queue, 512);
 
 	if (!NR_HD) {
 		/*
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index c0cd0a03f69..60de5a01e71 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -996,7 +996,7 @@ static int mg_probe(struct platform_device *plat_dev)
 		goto probe_err_6;
 	}
 	blk_queue_max_sectors(host->breq, MG_MAX_SECTS);
-	blk_queue_hardsect_size(host->breq, MG_SECTOR_SIZE);
+	blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
 
 	init_timer(&host->timer);
 	host->timer.function = mg_times_out;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index dc7a8c352da..293f5858921 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2657,7 +2657,7 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
 	struct request_queue *q = pd->disk->queue;
 
 	blk_queue_make_request(q, pkt_make_request);
-	blk_queue_hardsect_size(q, CD_FRAMESIZE);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
 	blk_queue_max_sectors(q, PACKET_MAX_SECTORS);
 	blk_queue_merge_bvec(q, pkt_merge_bvec);
 	q->queuedata = pd;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 338cee4cc0b..aaeeb544228 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -477,7 +477,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_max_sectors(queue, dev->bounce_size >> 9);
 	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
-	blk_queue_hardsect_size(queue, dev->blk_size);
+	blk_queue_logical_block_size(queue, dev->blk_size);
 
 	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
 			  ps3disk_prepare_flush);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index e67bbae9547..cc54473b8e7 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -722,7 +722,7 @@ static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	/*
 	 * build the command
 	 *
-	 * The call to blk_queue_hardsect_size() guarantees that request
+	 * The call to blk_queue_logical_block_size() guarantees that request
 	 * is aligned, but it is given in terms of 512 byte units, always.
 	 */
 	block = blk_rq_pos(rq) >> lun->capacity.bshift;
@@ -1749,7 +1749,7 @@ static int ub_bd_revalidate(struct gendisk *disk)
 	ub_revalidate(lun->udev, lun);
 
 	/* XXX Support sector size switching like in sr.c */
-	blk_queue_hardsect_size(disk->queue, lun->capacity.bsize);
+	blk_queue_logical_block_size(disk->queue, lun->capacity.bsize);
 	set_capacity(disk, lun->capacity.nsec);
 	// set_disk_ro(sdkp->disk, lun->readonly);
 
@@ -2324,7 +2324,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
 	blk_queue_segment_boundary(q, 0xffffffff);	/* Dubious. */
 	blk_queue_max_sectors(q, UB_MAX_SECTORS);
-	blk_queue_hardsect_size(q, lun->capacity.bsize);
+	blk_queue_logical_block_size(q, lun->capacity.bsize);
 
 	lun->disk = disk;
 	q->queuedata = lun;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 511d4ae2d17..c4845b16946 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -347,7 +347,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 				offsetof(struct virtio_blk_config, blk_size),
 				&blk_size);
 	if (!err)
-		blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+		blk_queue_logical_block_size(vblk->disk->queue, blk_size);
 
 	add_disk(vblk->disk);
 	return 0;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 132120ae4bd..c1996829d5e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -344,7 +344,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, sector_size);
+	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 3a4397edab7..f08491a3a81 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -984,7 +984,7 @@ static int __devinit ace_setup(struct ace_device *ace)
 	ace->queue = blk_init_queue(ace_request, &ace->lock);
 	if (ace->queue == NULL)
 		goto err_blk_initq;
-	blk_queue_hardsect_size(ace->queue, 512);
+	blk_queue_logical_block_size(ace->queue, 512);
 
 	/*
 	 * Allocate and initialize GD structure
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1e366ad8f68..b5621f27c4b 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -739,7 +739,7 @@ static void __devinit probe_gdrom_setupdisk(void)
 
 static int __devinit probe_gdrom_setupqueue(void)
 {
-	blk_queue_hardsect_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
+	blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
 	/* using DMA so memory will need to be contiguous */
 	blk_queue_max_hw_segments(gd.gdrom_rq, 1);
 	/* set a large max size to get most from DMA */
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index f177c2d4017..0fff646cc2f 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -469,8 +469,8 @@ static void vio_handle_cd_event(struct HvLpEvent *event)
 	case viocdopen:
 		if (event->xRc == 0) {
 			di = &viocd_diskinfo[bevent->disk];
-			blk_queue_hardsect_size(di->viocd_disk->queue,
-					bevent->block_size);
+			blk_queue_logical_block_size(di->viocd_disk->queue,
+						     bevent->block_size);
 			set_capacity(di->viocd_disk,
 					bevent->media_size *
 					bevent->block_size / 512);
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 20d90e6a6e5..db32f0e4c7d 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -71,7 +71,7 @@ static int raw_open(struct inode *inode, struct file *filp)
 	err = bd_claim(bdev, raw_open);
 	if (err)
 		goto out1;
-	err = set_blocksize(bdev, bdev_hardsect_size(bdev));
+	err = set_blocksize(bdev, bdev_logical_block_size(bdev));
 	if (err)
 		goto out2;
 	filp->f_flags |= O_DIRECT;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 1799328decf..424140c6c40 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -182,7 +182,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
 				 (sense->information[2] <<  8) |
 				 (sense->information[3]);
 
-			if (drive->queue->hardsect_size == 2048)
+			if (queue_logical_block_size(drive->queue) == 2048)
 				/* device sector size is 2K */
 				sector <<= 2;
 
@@ -737,7 +737,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
 	struct request_queue *q = drive->queue;
 	int write = rq_data_dir(rq) == WRITE;
 	unsigned short sectors_per_frame =
-		queue_hardsect_size(q) >> SECTOR_BITS;
+		queue_logical_block_size(q) >> SECTOR_BITS;
 
 	ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
 				  "secs_per_frame: %u",
@@ -1021,8 +1021,8 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense)
 	/* save a private copy of the TOC capacity for error handling */
 	drive->probed_capacity = toc->capacity * sectors_per_frame;
 
-	blk_queue_hardsect_size(drive->queue,
-				sectors_per_frame << SECTOR_BITS);
+	blk_queue_logical_block_size(drive->queue,
+				     sectors_per_frame << SECTOR_BITS);
 
 	/* first read just the header, so we know how long the TOC is */
 	stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
@@ -1338,7 +1338,7 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
 /* standard prep_rq_fn that builds 10 byte cmds */
 static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 {
-	int hard_sect = queue_hardsect_size(q);
+	int hard_sect = queue_logical_block_size(q);
 	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
 	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
 
@@ -1543,7 +1543,7 @@ static int ide_cdrom_setup(ide_drive_t *drive)
 
 	nslots = ide_cdrom_probe_capabilities(drive);
 
-	blk_queue_hardsect_size(q, CD_FRAMESIZE);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
 
 	if (ide_cdrom_register(drive, nslots)) {
 		printk(KERN_ERR PFX "%s: %s failed to register device with the"
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 47c68bc75a1..06b0ded1ce2 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -232,7 +232,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset,
 		target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
 
 		if (sync_page_io(rdev->bdev, target,
-				 roundup(size, bdev_hardsect_size(rdev->bdev)),
+				 roundup(size, bdev_logical_block_size(rdev->bdev)),
 				 page, READ)) {
 			page->index = index;
 			attach_page_buffers(page, NULL); /* so that free_buffer will
@@ -287,7 +287,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
 			int size = PAGE_SIZE;
 			if (page->index == bitmap->file_pages-1)
 				size = roundup(bitmap->last_page_size,
-					       bdev_hardsect_size(rdev->bdev));
+					       bdev_logical_block_size(rdev->bdev));
 			/* Just make sure we aren't corrupting data or
 			 * metadata
 			 */
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index a2e26c24214..75d8081a904 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -178,7 +178,7 @@ static int set_chunk_size(struct dm_exception_store *store,
 	}
 
 	/* Validate the chunk size against the device block size */
-	if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+	if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
 		*error = "Chunk size is not a multiple of device blocksize";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index be233bc4d91..6fa8ccf91c7 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -413,7 +413,8 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
 		 * Buffer holds both header and bitset.
 		 */
 		buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
-				       bitset_size, ti->limits.hardsect_size);
+				       bitset_size,
+				       ti->limits.logical_block_size);
 
 		if (buf_size > dev->bdev->bd_inode->i_size) {
 			DMWARN("log device %s too small: need %llu bytes",
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index e75c6dd76a9..2662a41337e 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -282,7 +282,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 	 */
 	if (!ps->store->chunk_size) {
 		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-		    bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+		    bdev_logical_block_size(ps->store->cow->bdev) >> 9);
 		ps->store->chunk_mask = ps->store->chunk_size - 1;
 		ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
 		chunk_size_supplied = 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 429b50b975d..65e2d975985 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -108,7 +108,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
 	lhs->max_hw_segments =
 		min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
 
-	lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
+	lhs->logical_block_size = max(lhs->logical_block_size,
+				      rhs->logical_block_size);
 
 	lhs->max_segment_size =
 		min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
@@ -529,7 +530,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 	rs->max_hw_segments =
 		min_not_zero(rs->max_hw_segments, q->max_hw_segments);
 
-	rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
+	rs->logical_block_size = max(rs->logical_block_size,
+				     queue_logical_block_size(q));
 
 	rs->max_segment_size =
 		min_not_zero(rs->max_segment_size, q->max_segment_size);
@@ -683,8 +685,8 @@ static void check_for_valid_limits(struct io_restrictions *rs)
 		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
 	if (!rs->max_hw_segments)
 		rs->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!rs->hardsect_size)
-		rs->hardsect_size = 1 << SECTOR_SHIFT;
+	if (!rs->logical_block_size)
+		rs->logical_block_size = 1 << SECTOR_SHIFT;
 	if (!rs->max_segment_size)
 		rs->max_segment_size = MAX_SEGMENT_SIZE;
 	if (!rs->seg_boundary_mask)
@@ -914,7 +916,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	blk_queue_max_sectors(q, t->limits.max_sectors);
 	q->max_phys_segments = t->limits.max_phys_segments;
 	q->max_hw_segments = t->limits.max_hw_segments;
-	q->hardsect_size = t->limits.hardsect_size;
+	q->logical_block_size = t->limits.logical_block_size;
 	q->max_segment_size = t->limits.max_segment_size;
 	q->max_hw_sectors = t->limits.max_hw_sectors;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fccc8343a25..4cbc19f5c30 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1202,7 +1202,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
 
 	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
-	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
+	bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
 	if (rdev->sb_size & bmask)
 		rdev->sb_size = (rdev->sb_size | bmask) + 1;
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c0bebc6a2f2..7847bbc1440 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1242,7 +1242,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 
 	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
 
-	blk_queue_hardsect_size(msb->queue, msb->page_size);
+	blk_queue_logical_block_size(msb->queue, msb->page_size);
 
 	capacity = be16_to_cpu(sys_info->user_block_count);
 	capacity *= be16_to_cpu(sys_info->block_size);
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 6573ef4408f..335d4c78a77 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -794,8 +794,9 @@ static int i2o_block_transfer(struct request *req)
 	if (c->adaptec) {
 		u8 cmd[10];
 		u32 scsi_flags;
-		u16 hwsec = queue_hardsect_size(req->q) >> KERNEL_SECTOR_SHIFT;
+		u16 hwsec;
 
+		hwsec = queue_logical_block_size(req->q) >> KERNEL_SECTOR_SHIFT;
 		memset(cmd, 0, 10);
 
 		sgl_offset = SGL_OFFSET_12;
@@ -1078,7 +1079,7 @@ static int i2o_block_probe(struct device *dev)
 	 */
 	if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) ||
 	    !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) {
-		blk_queue_hardsect_size(queue, le32_to_cpu(blocksize));
+		blk_queue_logical_block_size(queue, le32_to_cpu(blocksize));
 	} else
 		osm_warn("unable to get blocksize of %s\n", gd->disk_name);
 
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index c5df8654645..98ffc41eaf2 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -521,7 +521,7 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 
 	sprintf(md->disk->disk_name, "mmcblk%d", devidx);
 
-	blk_queue_hardsect_size(md->queue.queue, 512);
+	blk_queue_logical_block_size(md->queue.queue, 512);
 
 	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
 		/*
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 502622f628b..aaac3b6800b 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -378,7 +378,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	}
 
 	tr->blkcore_priv->rq->queuedata = tr;
-	blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
+	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
 		blk_queue_set_discard(tr->blkcore_priv->rq,
 				      blktrans_discard_request);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e64f62d5e0f..27a1be0cd4d 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1990,7 +1990,7 @@ static void dasd_setup_queue(struct dasd_block *block)
 {
 	int max;
 
-	blk_queue_hardsect_size(block->request_queue, block->bp_block);
+	blk_queue_logical_block_size(block->request_queue, block->bp_block);
 	max = block->base->discipline->max_blocks << block->s2b_shift;
 	blk_queue_max_sectors(block->request_queue, max);
 	blk_queue_max_phys_segments(block->request_queue, -1L);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index cfdcf1aed33..a4c7ffcd998 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -602,7 +602,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	dev_info->gd->private_data = dev_info;
 	dev_info->gd->driverfs_dev = &dev_info->dev;
 	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
-	blk_queue_hardsect_size(dev_info->dcssblk_queue, 4096);
+	blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
 
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 76814f3e898..0ae0c83ef87 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -343,7 +343,7 @@ static int __init xpram_setup_blkdev(void)
 			goto out;
 		}
 		blk_queue_make_request(xpram_queues[i], xpram_make_request);
-		blk_queue_hardsect_size(xpram_queues[i], 4096);
+		blk_queue_logical_block_size(xpram_queues[i], 4096);
 	}
 
 	/*
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 1e796767598..47ff695255e 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -222,7 +222,7 @@ tapeblock_setup_device(struct tape_device * device)
 	if (rc)
 		goto cleanup_queue;
 
-	blk_queue_hardsect_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
+	blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
 	blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
 	blk_queue_max_phys_segments(blkdat->request_queue, -1L);
 	blk_queue_max_hw_segments(blkdat->request_queue, -1L);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 40d2860f235..bcf3bd40bbd 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1510,7 +1510,7 @@ got_data:
 		 */
 		sector_size = 512;
 	}
-	blk_queue_hardsect_size(sdp->request_queue, sector_size);
+	blk_queue_logical_block_size(sdp->request_queue, sector_size);
 
 	{
 		char cap_str_2[10], cap_str_10[10];
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index fddba53c7fe..cd350dfc121 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -727,7 +727,7 @@ static void get_sectorsize(struct scsi_cd *cd)
 	}
 
 	queue = cd->device->request_queue;
-	blk_queue_hardsect_size(queue, sector_size);
+	blk_queue_logical_block_size(queue, sector_size);
 
 	return;
 }
diff --git a/fs/bio.c b/fs/bio.c
index 81dc93e7253..4445c382173 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1490,11 +1490,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 sector_t bio_sector_offset(struct bio *bio, unsigned short index,
 			   unsigned int offset)
 {
-	unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+	unsigned int sector_sz;
 	struct bio_vec *bv;
 	sector_t sectors;
 	int i;
 
+	sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
 	sectors = 0;
 
 	if (index >= bio->bi_idx)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a85fe310fc6..a29b4dcc1bc 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -76,7 +76,7 @@ int set_blocksize(struct block_device *bdev, int size)
 		return -EINVAL;
 
 	/* Size cannot be smaller than the size supported by the device */
-	if (size < bdev_hardsect_size(bdev))
+	if (size < bdev_logical_block_size(bdev))
 		return -EINVAL;
 
 	/* Don't change the size if it is same as current */
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(sb_set_blocksize);
 
 int sb_min_blocksize(struct super_block *sb, int size)
 {
-	int minsize = bdev_hardsect_size(sb->s_bdev);
+	int minsize = bdev_logical_block_size(sb->s_bdev);
 	if (size < minsize)
 		size = minsize;
 	return sb_set_blocksize(sb, size);
@@ -1117,7 +1117,7 @@ EXPORT_SYMBOL(check_disk_change);
 
 void bd_set_size(struct block_device *bdev, loff_t size)
 {
-	unsigned bsize = bdev_hardsect_size(bdev);
+	unsigned bsize = bdev_logical_block_size(bdev);
 
 	bdev->bd_inode->i_size = size;
 	while (bsize < PAGE_CACHE_SIZE) {
diff --git a/fs/buffer.c b/fs/buffer.c
index aed297739eb..36e2bbc60ec 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1085,12 +1085,12 @@ static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
 	/* Size must be multiple of hard sectorsize */
-	if (unlikely(size & (bdev_hardsect_size(bdev)-1) ||
+	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
 			(size < 512 || size > PAGE_SIZE))) {
 		printk(KERN_ERR "getblk(): invalid block size %d requested\n",
 					size);
-		printk(KERN_ERR "hardsect size: %d\n",
-					bdev_hardsect_size(bdev));
+		printk(KERN_ERR "logical block size: %d\n",
+					bdev_logical_block_size(bdev));
 
 		dump_stack();
 		return NULL;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 05763bbc205..8b10b87dc01 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1127,7 +1127,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		rw = WRITE_ODIRECT;
 
 	if (bdev)
-		bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
+		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
 
 	if (offset & blocksize_mask) {
 		if (bdev)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 599dbfe504c..acbb94fdf90 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1696,7 +1696,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	hblock = bdev_hardsect_size(sb->s_bdev);
+	hblock = bdev_logical_block_size(sb->s_bdev);
 	if (sb->s_blocksize != blocksize) {
 		/*
 		 * Make sure the blocksize for the filesystem is larger
@@ -2119,7 +2119,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	}
 
 	blocksize = sb->s_blocksize;
-	hblock = bdev_hardsect_size(bdev);
+	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
 			"EXT3-fs: blocksize too small for journal device.\n");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e6f22..a30549f7a30 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2962,7 +2962,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	}
 
 	blocksize = sb->s_blocksize;
-	hblock = bdev_hardsect_size(bdev);
+	hblock = bdev_logical_block_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
 			"EXT4-fs: blocksize too small for journal device.\n");
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1ff9473ea75..a3b2ac989fc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -526,11 +526,11 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
 	}
 
 	/* Set up the buffer cache and SB for real */
-	if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
+	if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
 		ret = -EINVAL;
 		fs_err(sdp, "FS block size (%u) is too small for device "
 		       "block size (%u)\n",
-		       sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
+		       sdp->sd_sb.sb_bsize, bdev_logical_block_size(sb->s_bdev));
 		goto out;
 	}
 	if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 565038243fa..a971d24e10c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -845,7 +845,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 	struct super_block *sb = sdp->sd_vfs;
 	struct block_device *bdev = sb->s_bdev;
 	const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
-					   bdev_hardsect_size(sb->s_bdev);
+					   bdev_logical_block_size(sb->s_bdev);
 	u64 blk;
 	sector_t start = 0;
 	sector_t nr_sects = 0;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 7f65b3be4aa..a91f15b8673 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -515,7 +515,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
 	if (sb->s_blocksize != blocksize) {
-		int hw_blocksize = bdev_hardsect_size(sb->s_bdev);
+		int hw_blocksize = bdev_logical_block_size(sb->s_bdev);
 
 		if (blocksize < hw_blocksize) {
 			printk(KERN_ERR
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index f76951dcd4a..6aa7c471353 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
-#include <linux/blkdev.h>	/* For bdev_hardsect_size(). */
+#include <linux/blkdev.h>	/* For bdev_logical_block_size(). */
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
@@ -2785,13 +2785,13 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		goto err_out_now;
 
 	/* We support sector sizes up to the PAGE_CACHE_SIZE. */
-	if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+	if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
 		if (!silent)
 			ntfs_error(sb, "Device has unsupported sector size "
 					"(%i).  The maximum supported sector "
 					"size on this architecture is %lu "
 					"bytes.",
-					bdev_hardsect_size(sb->s_bdev),
+					bdev_logical_block_size(sb->s_bdev),
 					PAGE_CACHE_SIZE);
 		goto err_out_now;
 	}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 4f85eceab37..09cc25d0461 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1371,7 +1371,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 
 	bdevname(reg->hr_bdev, reg->hr_dev_name);
 
-	sectsize = bdev_hardsect_size(reg->hr_bdev);
+	sectsize = bdev_logical_block_size(reg->hr_bdev);
 	if (sectsize != reg->hr_block_bytes) {
 		mlog(ML_ERROR,
 		     "blocksize %u incorrect for device, expected %d",
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 79ff8d9d37e..5c6163f5503 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -713,7 +713,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
 	*bh = NULL;
 
 	/* may be > 512 */
-	*sector_size = bdev_hardsect_size(sb->s_bdev);
+	*sector_size = bdev_logical_block_size(sb->s_bdev);
 	if (*sector_size > OCFS2_MAX_BLOCKSIZE) {
 		mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n",
 		     *sector_size, OCFS2_MAX_BLOCKSIZE);
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 46297683cd3..fc71aab0846 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -76,7 +76,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	Sector sect;
 
 	res = 0;
-	blocksize = bdev_hardsect_size(bdev);
+	blocksize = bdev_logical_block_size(bdev);
 	if (blocksize <= 0)
 		goto out_exit;
 	i_size = i_size_read(bdev->bd_inode);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 796511886f2..0028d2ef066 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -110,7 +110,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
 	Sector sect;
 	unsigned char *data;
 	u32 this_sector, this_size;
-	int sector_size = bdev_hardsect_size(bdev) / 512;
+	int sector_size = bdev_logical_block_size(bdev) / 512;
 	int loopct = 0;		/* number of links followed
 				   without finding a data partition */
 	int i;
@@ -415,7 +415,7 @@ static struct {
  
 int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
-	int sector_size = bdev_hardsect_size(bdev) / 512;
+	int sector_size = bdev_logical_block_size(bdev) / 512;
 	Sector sect;
 	unsigned char *data;
 	struct partition *p;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 72348cc855a..0ba44107d8f 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1915,7 +1915,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	if (uopt.flags & (1 << UDF_FLAG_BLOCKSIZE_SET)) {
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 	} else {
-		uopt.blocksize = bdev_hardsect_size(sb->s_bdev);
+		uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
 		ret = udf_load_vrs(sb, &uopt, silent, &fileset);
 		if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
 			if (!silent)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e28800a9f2b..1418b916fc2 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1501,7 +1501,7 @@ xfs_setsize_buftarg_early(
 	struct block_device	*bdev)
 {
 	return xfs_setsize_buftarg_flags(btp,
-			PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+			PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0);
 }
 
 int
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 56ce53fce72..872b78b7a10 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,7 +391,7 @@ struct request_queue
 	unsigned int		max_hw_sectors;
 	unsigned short		max_phys_segments;
 	unsigned short		max_hw_segments;
-	unsigned short		hardsect_size;
+	unsigned short		logical_block_size;
 	unsigned int		max_segment_size;
 
 	unsigned long		seg_boundary_mask;
@@ -901,7 +901,7 @@ extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
-extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
+extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
@@ -988,19 +988,19 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline int queue_hardsect_size(struct request_queue *q)
+static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 
-	if (q && q->hardsect_size)
-		retval = q->hardsect_size;
+	if (q && q->logical_block_size)
+		retval = q->logical_block_size;
 
 	return retval;
 }
 
-static inline int bdev_hardsect_size(struct block_device *bdev)
+static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 {
-	return queue_hardsect_size(bdev_get_queue(bdev));
+	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 
 static inline int queue_dma_alignment(struct request_queue *q)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ded2d7c4266..49c2362977f 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -149,7 +149,7 @@ struct io_restrictions {
 	unsigned max_hw_sectors;
 	unsigned max_sectors;
 	unsigned max_segment_size;
-	unsigned short hardsect_size;
+	unsigned short logical_block_size;
 	unsigned short max_hw_segments;
 	unsigned short max_phys_segments;
 	unsigned char no_cluster; /* inverted so that 0 is default */
-- 
cgit v1.2.3-70-g09d2


From ae03bf639a5027d27270123f5f6e3ee6a412781d Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:50 -0400
Subject: block: Use accessor functions for queue limits

Convert all external users of queue limits to using wrapper functions
instead of poking the request queue variables directly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c            |  8 ++++----
 block/blk-core.c               | 16 ++++++++--------
 block/blk-map.c                |  4 ++--
 block/blk-merge.c              | 27 ++++++++++++++-------------
 block/blk-settings.c           | 15 ++++++++++++---
 block/blk-sysfs.c              |  8 ++++----
 block/compat_ioctl.c           |  2 +-
 block/ioctl.c                  | 10 +++++-----
 block/scsi_ioctl.c             |  8 ++++----
 drivers/block/pktcdvd.c        |  6 ++++--
 drivers/cdrom/cdrom.c          |  4 ++--
 drivers/md/dm-table.c          | 28 ++++++++++++++--------------
 drivers/md/linear.c            |  2 +-
 drivers/md/multipath.c         |  4 ++--
 drivers/md/raid0.c             |  2 +-
 drivers/md/raid1.c             |  4 ++--
 drivers/md/raid10.c            |  8 ++++----
 drivers/md/raid5.c             |  4 ++--
 drivers/scsi/sg.c              | 15 ++++++++-------
 drivers/scsi/st.c              |  4 ++--
 drivers/usb/storage/scsiglue.c |  4 ++--
 fs/bio.c                       | 19 ++++++++++---------
 include/linux/bio.h            |  2 +-
 include/linux/blkdev.h         | 36 ++++++++++++++++++++++++++++++++++++
 mm/bounce.c                    |  4 ++--
 25 files changed, 147 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 0d98054cdbd..30022b4e2f6 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -388,10 +388,10 @@ int blkdev_issue_discard(struct block_device *bdev,
 
 		bio->bi_sector = sector;
 
-		if (nr_sects > q->max_hw_sectors) {
-			bio->bi_size = q->max_hw_sectors << 9;
-			nr_sects -= q->max_hw_sectors;
-			sector += q->max_hw_sectors;
+		if (nr_sects > queue_max_hw_sectors(q)) {
+			bio->bi_size = queue_max_hw_sectors(q) << 9;
+			nr_sects -= queue_max_hw_sectors(q);
+			sector += queue_max_hw_sectors(q);
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 59c4af52311..7a4c40184a6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1437,11 +1437,11 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > q->max_hw_sectors)) {
+		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
-				bdevname(bio->bi_bdev, b),
-				bio_sectors(bio),
-				q->max_hw_sectors);
+			       bdevname(bio->bi_bdev, b),
+			       bio_sectors(bio),
+			       queue_max_hw_sectors(q));
 			goto end_io;
 		}
 
@@ -1608,8 +1608,8 @@ EXPORT_SYMBOL(submit_bio);
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
-	if (blk_rq_sectors(rq) > q->max_sectors ||
-	    blk_rq_bytes(rq) > q->max_hw_sectors << 9) {
+	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
+	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
 	}
@@ -1621,8 +1621,8 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
-	if (rq->nr_phys_segments > q->max_phys_segments ||
-	    rq->nr_phys_segments > q->max_hw_segments) {
+	if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
+	    rq->nr_phys_segments > queue_max_hw_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
diff --git a/block/blk-map.c b/block/blk-map.c
index ef2492adca7..9083cf0180c 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -115,7 +115,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 	struct bio *bio = NULL;
 	int ret;
 
-	if (len > (q->max_hw_sectors << 9))
+	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len)
 		return -EINVAL;
@@ -292,7 +292,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	struct bio *bio;
 	int ret;
 
-	if (len > (q->max_hw_sectors << 9))
+	if (len > (queue_max_hw_sectors(q) << 9))
 		return -EINVAL;
 	if (!len || !kbuf)
 		return -EINVAL;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4974dd5767e..39ce64432ba 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -32,11 +32,12 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 			 * never considered part of another segment, since that
 			 * might change with the bounce page.
 			 */
-			high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+			high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q);
 			if (high || highprv)
 				goto new_segment;
 			if (cluster) {
-				if (seg_size + bv->bv_len > q->max_segment_size)
+				if (seg_size + bv->bv_len
+				    > queue_max_segment_size(q))
 					goto new_segment;
 				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
 					goto new_segment;
@@ -91,7 +92,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 		return 0;
 
 	if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
-	    q->max_segment_size)
+	    queue_max_segment_size(q))
 		return 0;
 
 	if (!bio_has_data(bio))
@@ -134,7 +135,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 		int nbytes = bvec->bv_len;
 
 		if (bvprv && cluster) {
-			if (sg->length + nbytes > q->max_segment_size)
+			if (sg->length + nbytes > queue_max_segment_size(q))
 				goto new_segment;
 
 			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
@@ -205,8 +206,8 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
-	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
+	if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
+	    req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -227,9 +228,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 	unsigned short max_sectors;
 
 	if (unlikely(blk_pc_request(req)))
-		max_sectors = q->max_hw_sectors;
+		max_sectors = queue_max_hw_sectors(q);
 	else
-		max_sectors = q->max_sectors;
+		max_sectors = queue_max_sectors(q);
 
 	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
 		req->cmd_flags |= REQ_NOMERGE;
@@ -251,9 +252,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 	unsigned short max_sectors;
 
 	if (unlikely(blk_pc_request(req)))
-		max_sectors = q->max_hw_sectors;
+		max_sectors = queue_max_hw_sectors(q);
 	else
-		max_sectors = q->max_sectors;
+		max_sectors = queue_max_sectors(q);
 
 
 	if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
@@ -287,7 +288,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	/*
 	 * Will it become too large?
 	 */
-	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > q->max_sectors)
+	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
 		return 0;
 
 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -299,10 +300,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 		total_phys_segments--;
 	}
 
-	if (total_phys_segments > q->max_phys_segments)
+	if (total_phys_segments > queue_max_phys_segments(q))
 		return 0;
 
-	if (total_phys_segments > q->max_hw_segments)
+	if (total_phys_segments > queue_max_hw_segments(q))
 		return 0;
 
 	/* Merge is OK... */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 15c3164537b..0b32f984eed 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -219,6 +219,15 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
 
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
+{
+	if (BLK_DEF_MAX_SECTORS > max_sectors)
+		q->max_hw_sectors = BLK_DEF_MAX_SECTORS;
+	else
+		q->max_hw_sectors = max_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_hw_sectors);
+
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
@@ -395,11 +404,11 @@ int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size)
 {
-	if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
+	if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
-	--q->max_hw_segments;
-	--q->max_phys_segments;
+	blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
+	blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
 	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 13d38b7e4d0..142a4acddd4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -95,7 +95,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
 
 static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 {
-	int max_sectors_kb = q->max_sectors >> 1;
+	int max_sectors_kb = queue_max_sectors(q) >> 1;
 
 	return queue_var_show(max_sectors_kb, (page));
 }
@@ -109,7 +109,7 @@ static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long max_sectors_kb,
-			max_hw_sectors_kb = q->max_hw_sectors >> 1,
+		max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
 			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 
@@ -117,7 +117,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 		return -EINVAL;
 
 	spin_lock_irq(q->queue_lock);
-	q->max_sectors = max_sectors_kb << 1;
+	blk_queue_max_sectors(q, max_sectors_kb << 1);
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
@@ -125,7 +125,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 
 static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 {
-	int max_hw_sectors_kb = q->max_hw_sectors >> 1;
+	int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
 
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 9eaa1940273..df18a156d01 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -766,7 +766,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		return compat_put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
-					 bdev_get_queue(bdev)->max_sectors);
+					 queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET: /* compatible, but no compat_ptr (!) */
 	case BLKFRASET:
 		if (!capable(CAP_SYS_ADMIN))
diff --git a/block/ioctl.c b/block/ioctl.c
index 7aa97f65da8..500e4c73cc5 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -152,10 +152,10 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 		bio->bi_private = &wait;
 		bio->bi_sector = start;
 
-		if (len > q->max_hw_sectors) {
-			bio->bi_size = q->max_hw_sectors << 9;
-			len -= q->max_hw_sectors;
-			start += q->max_hw_sectors;
+		if (len > queue_max_hw_sectors(q)) {
+			bio->bi_size = queue_max_hw_sectors(q) << 9;
+			len -= queue_max_hw_sectors(q);
+			start += queue_max_hw_sectors(q);
 		} else {
 			bio->bi_size = len << 9;
 			len = 0;
@@ -313,7 +313,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKSSZGET: /* get block device hardware sector size */
 		return put_int(arg, bdev_logical_block_size(bdev));
 	case BLKSECTGET:
-		return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
+		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
 	case BLKFRASET:
 		if(!capable(CAP_SYS_ADMIN))
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a9670dd4b5d..5f8e798ede4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -75,7 +75,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
 
 static int sg_get_reserved_size(struct request_queue *q, int __user *p)
 {
-	unsigned val = min(q->sg_reserved_size, q->max_sectors << 9);
+	unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9);
 
 	return put_user(val, p);
 }
@@ -89,8 +89,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p)
 
 	if (size < 0)
 		return -EINVAL;
-	if (size > (q->max_sectors << 9))
-		size = q->max_sectors << 9;
+	if (size > (queue_max_sectors(q) << 9))
+		size = queue_max_sectors(q) << 9;
 
 	q->sg_reserved_size = size;
 	return 0;
@@ -264,7 +264,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	if (hdr->cmd_len > BLK_MAX_CDB)
 		return -EINVAL;
 
-	if (hdr->dxfer_len > (q->max_hw_sectors << 9))
+	if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
 		return -EIO;
 
 	if (hdr->dxfer_len)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 293f5858921..d57f1175948 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -991,13 +991,15 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
  */
 static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
 {
-	if ((pd->settings.size << 9) / CD_FRAMESIZE <= q->max_phys_segments) {
+	if ((pd->settings.size << 9) / CD_FRAMESIZE
+	    <= queue_max_phys_segments(q)) {
 		/*
 		 * The cdrom device can handle one segment/frame
 		 */
 		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
 		return 0;
-	} else if ((pd->settings.size << 9) / PAGE_SIZE <= q->max_phys_segments) {
+	} else if ((pd->settings.size << 9) / PAGE_SIZE
+		   <= queue_max_phys_segments(q)) {
 		/*
 		 * We can handle this case at the expense of some extra memory
 		 * copies during write operations
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index cceace61ef2..71d1b9bab70 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2101,8 +2101,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		nr = nframes;
 		if (cdi->cdda_method == CDDA_BPC_SINGLE)
 			nr = 1;
-		if (nr * CD_FRAMESIZE_RAW > (q->max_sectors << 9))
-			nr = (q->max_sectors << 9) / CD_FRAMESIZE_RAW;
+		if (nr * CD_FRAMESIZE_RAW > (queue_max_sectors(q) << 9))
+			nr = (queue_max_sectors(q) << 9) / CD_FRAMESIZE_RAW;
 
 		len = nr * CD_FRAMESIZE_RAW;
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 65e2d975985..e9a73bb242b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -510,7 +510,7 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 	 *        combine_restrictions_low()
 	 */
 	rs->max_sectors =
-		min_not_zero(rs->max_sectors, q->max_sectors);
+		min_not_zero(rs->max_sectors, queue_max_sectors(q));
 
 	/*
 	 * Check if merge fn is supported.
@@ -525,25 +525,25 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 
 	rs->max_phys_segments =
 		min_not_zero(rs->max_phys_segments,
-			     q->max_phys_segments);
+			     queue_max_phys_segments(q));
 
 	rs->max_hw_segments =
-		min_not_zero(rs->max_hw_segments, q->max_hw_segments);
+		min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
 
 	rs->logical_block_size = max(rs->logical_block_size,
 				     queue_logical_block_size(q));
 
 	rs->max_segment_size =
-		min_not_zero(rs->max_segment_size, q->max_segment_size);
+		min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
 
 	rs->max_hw_sectors =
-		min_not_zero(rs->max_hw_sectors, q->max_hw_sectors);
+		min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
 
 	rs->seg_boundary_mask =
 		min_not_zero(rs->seg_boundary_mask,
-			     q->seg_boundary_mask);
+			     queue_segment_boundary(q));
 
-	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
 
 	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
@@ -914,13 +914,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	 * restrictions.
 	 */
 	blk_queue_max_sectors(q, t->limits.max_sectors);
-	q->max_phys_segments = t->limits.max_phys_segments;
-	q->max_hw_segments = t->limits.max_hw_segments;
-	q->logical_block_size = t->limits.logical_block_size;
-	q->max_segment_size = t->limits.max_segment_size;
-	q->max_hw_sectors = t->limits.max_hw_sectors;
-	q->seg_boundary_mask = t->limits.seg_boundary_mask;
-	q->bounce_pfn = t->limits.bounce_pfn;
+	blk_queue_max_phys_segments(q, t->limits.max_phys_segments);
+	blk_queue_max_hw_segments(q, t->limits.max_hw_segments);
+	blk_queue_logical_block_size(q, t->limits.logical_block_size);
+	blk_queue_max_segment_size(q, t->limits.max_segment_size);
+	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
+	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
+	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 7a36e38393a..64f1f3e046e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->num_sectors = rdev->sectors;
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 41ced0cbe82..4ee31aa13c4 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -303,7 +303,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 		 * merge_bvec_fn will be involved in multipath.)
 		 */
 			if (q->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+			    queue_max_sectors(q) > (PAGE_SIZE>>9))
 				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			conf->working_disks++;
@@ -467,7 +467,7 @@ static int multipath_run (mddev_t *mddev)
 		 * violating it, not that we ever expect a device with
 		 * a merge_bvec_fn to be involved in multipath */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!test_bit(Faulty, &rdev->flags))
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c08d7559be5..925507e7d67 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -144,7 +144,7 @@ static int create_strip_zones (mddev_t *mddev)
 		 */
 
 		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!smallest || (rdev1->sectors < smallest->sectors))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 36df9109cde..e23758b4a34 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1130,7 +1130,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			 * a one page request is never in violation.
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
@@ -1996,7 +1996,7 @@ static int run(mddev_t *mddev)
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 499620afb44..750550c1166 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1158,8 +1158,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			 * a one page request is never in violation.
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
-				mddev->queue->max_sectors = (PAGE_SIZE>>9);
+			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
@@ -2145,8 +2145,8 @@ static int run(mddev_t *mddev)
 		 * a one page request is never in violation.
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
-		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
-			mddev->queue->max_sectors = (PAGE_SIZE>>9);
+		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
+			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4616bc3a6e7..7970dc8c522 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3463,10 +3463,10 @@ static int bio_fits_rdev(struct bio *bi)
 {
 	struct request_queue *q = bdev_get_queue(bi->bi_bdev);
 
-	if ((bi->bi_size>>9) > q->max_sectors)
+	if ((bi->bi_size>>9) > queue_max_sectors(q))
 		return 0;
 	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > q->max_phys_segments)
+	if (bi->bi_phys_segments > queue_max_phys_segments(q))
 		return 0;
 
 	if (q->merge_bvec_fn)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0fc2c0ae769..9bd407fa98e 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -289,8 +289,8 @@ sg_open(struct inode *inode, struct file *filp)
 	if (list_empty(&sdp->sfds)) {	/* no existing opens on this device */
 		sdp->sgdebug = 0;
 		q = sdp->device->request_queue;
-		sdp->sg_tablesize = min(q->max_hw_segments,
-					q->max_phys_segments);
+		sdp->sg_tablesize = min(queue_max_hw_segments(q),
+					queue_max_phys_segments(q));
 	}
 	if ((sfp = sg_add_sfp(sdp, dev)))
 		filp->private_data = sfp;
@@ -909,7 +909,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
                 if (val < 0)
                         return -EINVAL;
 		val = min_t(int, val,
-				sdp->device->request_queue->max_sectors * 512);
+			    queue_max_sectors(sdp->device->request_queue) * 512);
 		if (val != sfp->reserve.bufflen) {
 			if (sg_res_in_use(sfp) || sfp->mmap_called)
 				return -EBUSY;
@@ -919,7 +919,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 		return 0;
 	case SG_GET_RESERVED_SIZE:
 		val = min_t(int, sfp->reserve.bufflen,
-				sdp->device->request_queue->max_sectors * 512);
+			    queue_max_sectors(sdp->device->request_queue) * 512);
 		return put_user(val, ip);
 	case SG_SET_COMMAND_Q:
 		result = get_user(val, ip);
@@ -1059,7 +1059,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 			return -ENODEV;
 		return scsi_ioctl(sdp->device, cmd_in, p);
 	case BLKSECTGET:
-		return put_user(sdp->device->request_queue->max_sectors * 512,
+		return put_user(queue_max_sectors(sdp->device->request_queue) * 512,
 				ip);
 	case BLKTRACESETUP:
 		return blk_trace_setup(sdp->device->request_queue,
@@ -1377,7 +1377,8 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 	sdp->device = scsidp;
 	INIT_LIST_HEAD(&sdp->sfds);
 	init_waitqueue_head(&sdp->o_excl_wait);
-	sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments);
+	sdp->sg_tablesize = min(queue_max_hw_segments(q),
+				queue_max_phys_segments(q));
 	sdp->index = k;
 	kref_init(&sdp->d_ref);
 
@@ -2055,7 +2056,7 @@ sg_add_sfp(Sg_device * sdp, int dev)
 		sg_big_buff = def_reserved_size;
 
 	bufflen = min_t(int, sg_big_buff,
-			sdp->device->request_queue->max_sectors * 512);
+			queue_max_sectors(sdp->device->request_queue) * 512);
 	sg_build_reserve(sfp, bufflen);
 	SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp:   bufflen=%d, k_use_sg=%d\n",
 			   sfp->reserve.bufflen, sfp->reserve.k_use_sg));
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 8681b708344..89bd438e1fe 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3983,8 +3983,8 @@ static int st_probe(struct device *dev)
 		return -ENODEV;
 	}
 
-	i = min(SDp->request_queue->max_hw_segments,
-		SDp->request_queue->max_phys_segments);
+	i = min(queue_max_hw_segments(SDp->request_queue),
+		queue_max_phys_segments(SDp->request_queue));
 	if (st_max_sg_segs < i)
 		i = st_max_sg_segs;
 	buffer = new_tape_buffer((SDp->host)->unchecked_isa_dma, i);
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 4ca3b586064..cfa26d56ce6 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -132,7 +132,7 @@ static int slave_configure(struct scsi_device *sdev)
 
 		if (us->fflags & US_FL_MAX_SECTORS_MIN)
 			max_sectors = PAGE_CACHE_SIZE >> 9;
-		if (sdev->request_queue->max_sectors > max_sectors)
+		if (queue_max_sectors(sdev->request_queue) > max_sectors)
 			blk_queue_max_sectors(sdev->request_queue,
 					      max_sectors);
 	} else if (sdev->type == TYPE_TAPE) {
@@ -483,7 +483,7 @@ static ssize_t show_max_sectors(struct device *dev, struct device_attribute *att
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
 
-	return sprintf(buf, "%u\n", sdev->request_queue->max_sectors);
+	return sprintf(buf, "%u\n", queue_max_sectors(sdev->request_queue));
 }
 
 /* Input routine for the sysfs max_sectors file */
diff --git a/fs/bio.c b/fs/bio.c
index 4445c382173..ab423a1024a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -499,11 +499,11 @@ int bio_get_nr_vecs(struct block_device *bdev)
 	struct request_queue *q = bdev_get_queue(bdev);
 	int nr_pages;
 
-	nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (nr_pages > q->max_phys_segments)
-		nr_pages = q->max_phys_segments;
-	if (nr_pages > q->max_hw_segments)
-		nr_pages = q->max_hw_segments;
+	nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (nr_pages > queue_max_phys_segments(q))
+		nr_pages = queue_max_phys_segments(q);
+	if (nr_pages > queue_max_hw_segments(q))
+		nr_pages = queue_max_hw_segments(q);
 
 	return nr_pages;
 }
@@ -562,8 +562,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 	 * make this too complex.
 	 */
 
-	while (bio->bi_phys_segments >= q->max_phys_segments
-	       || bio->bi_phys_segments >= q->max_hw_segments) {
+	while (bio->bi_phys_segments >= queue_max_phys_segments(q)
+	       || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
 
 		if (retried_segments)
 			return 0;
@@ -634,7 +634,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
 		    unsigned int len, unsigned int offset)
 {
-	return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors);
+	return __bio_add_page(q, bio, page, len, offset,
+			      queue_max_hw_sectors(q));
 }
 
 /**
@@ -654,7 +655,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, q->max_sectors);
+	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
 }
 
 struct bio_map_data {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d30ec6f30dd..12737be5860 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -279,7 +279,7 @@ static inline int bio_has_allocated_vec(struct bio *bio)
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
-	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask)
+	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
 #define BIO_SEG_BOUNDARY(q, b1, b2) \
 	BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 872b78b7a10..29b48f7b4ba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -898,6 +898,7 @@ extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
@@ -988,6 +989,41 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
+static inline unsigned long queue_bounce_pfn(struct request_queue *q)
+{
+	return q->bounce_pfn;
+}
+
+static inline unsigned long queue_segment_boundary(struct request_queue *q)
+{
+	return q->seg_boundary_mask;
+}
+
+static inline unsigned int queue_max_sectors(struct request_queue *q)
+{
+	return q->max_sectors;
+}
+
+static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
+{
+	return q->max_hw_sectors;
+}
+
+static inline unsigned short queue_max_hw_segments(struct request_queue *q)
+{
+	return q->max_hw_segments;
+}
+
+static inline unsigned short queue_max_phys_segments(struct request_queue *q)
+{
+	return q->max_phys_segments;
+}
+
+static inline unsigned int queue_max_segment_size(struct request_queue *q)
+{
+	return q->max_segment_size;
+}
+
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272fe7a..8dcd4315e01 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -192,7 +192,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 		/*
 		 * is destination page below bounce pfn?
 		 */
-		if (page_to_pfn(page) <= q->bounce_pfn)
+		if (page_to_pfn(page) <= queue_bounce_pfn(q))
 			continue;
 
 		/*
@@ -284,7 +284,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	 * don't waste time iterating over bio segments
 	 */
 	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (q->bounce_pfn >= blk_max_pfn)
+		if (queue_bounce_pfn(q) >= blk_max_pfn)
 			return;
 		pool = page_pool;
 	} else {
-- 
cgit v1.2.3-70-g09d2


From 025146e13b63483add912706c101fb0fb6f015cc Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:51 -0400
Subject: block: Move queue limits to an embedded struct

To accommodate stacking drivers that do not have an associated request
queue we're moving the limits to a separate, embedded structure.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 55 +++++++++++++++++++++++++++++++-------------------
 include/linux/blkdev.h | 44 +++++++++++++++++++++++-----------------
 2 files changed, 60 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 0b32f984eed..b0f547cecfb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -179,16 +179,16 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 	 */
 	if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
 		dma = 1;
-	q->bounce_pfn = max_low_pfn;
+	q->limits.bounce_pfn = max_low_pfn;
 #else
 	if (b_pfn < blk_max_low_pfn)
 		dma = 1;
-	q->bounce_pfn = b_pfn;
+	q->limits.bounce_pfn = b_pfn;
 #endif
 	if (dma) {
 		init_emergency_isa_pool();
 		q->bounce_gfp = GFP_NOIO | GFP_DMA;
-		q->bounce_pfn = b_pfn;
+		q->limits.bounce_pfn = b_pfn;
 	}
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
@@ -211,10 +211,10 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
 	}
 
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->max_hw_sectors = q->max_sectors = max_sectors;
+		q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
 	else {
-		q->max_sectors = BLK_DEF_MAX_SECTORS;
-		q->max_hw_sectors = max_sectors;
+		q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
+		q->limits.max_hw_sectors = max_sectors;
 	}
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
@@ -222,9 +222,9 @@ EXPORT_SYMBOL(blk_queue_max_sectors);
 void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 {
 	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->max_hw_sectors = BLK_DEF_MAX_SECTORS;
+		q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
 	else
-		q->max_hw_sectors = max_sectors;
+		q->limits.max_hw_sectors = max_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
@@ -247,7 +247,7 @@ void blk_queue_max_phys_segments(struct request_queue *q,
 		       __func__, max_segments);
 	}
 
-	q->max_phys_segments = max_segments;
+	q->limits.max_phys_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_phys_segments);
 
@@ -271,7 +271,7 @@ void blk_queue_max_hw_segments(struct request_queue *q,
 		       __func__, max_segments);
 	}
 
-	q->max_hw_segments = max_segments;
+	q->limits.max_hw_segments = max_segments;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_segments);
 
@@ -292,7 +292,7 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 		       __func__, max_size);
 	}
 
-	q->max_segment_size = max_size;
+	q->limits.max_segment_size = max_size;
 }
 EXPORT_SYMBOL(blk_queue_max_segment_size);
 
@@ -308,7 +308,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
  **/
 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
-	q->logical_block_size = size;
+	q->limits.logical_block_size = size;
 }
 EXPORT_SYMBOL(blk_queue_logical_block_size);
 
@@ -325,14 +325,27 @@ EXPORT_SYMBOL(blk_queue_logical_block_size);
 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 {
 	/* zero is "infinity" */
-	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
-	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
-	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
-
-	t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
-	t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
-	t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
-	t->logical_block_size = max(t->logical_block_size, b->logical_block_size);
+	t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
+					     queue_max_sectors(b));
+
+	t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
+						queue_max_hw_sectors(b));
+
+	t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
+						   queue_segment_boundary(b));
+
+	t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
+						   queue_max_phys_segments(b));
+
+	t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
+						 queue_max_hw_segments(b));
+
+	t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
+						  queue_max_segment_size(b));
+
+	t->limits.logical_block_size = max(queue_logical_block_size(t),
+					   queue_logical_block_size(b));
+
 	if (!t->queue_lock)
 		WARN_ON_ONCE(1);
 	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
@@ -430,7 +443,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 		       __func__, mask);
 	}
 
-	q->seg_boundary_mask = mask;
+	q->limits.seg_boundary_mask = mask;
 }
 EXPORT_SYMBOL(blk_queue_segment_boundary);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 29b48f7b4ba..b7bb6fdba12 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -307,6 +307,21 @@ struct blk_cmd_filter {
 	struct kobject kobj;
 };
 
+struct queue_limits {
+	unsigned long		bounce_pfn;
+	unsigned long		seg_boundary_mask;
+
+	unsigned int		max_hw_sectors;
+	unsigned int		max_sectors;
+	unsigned int		max_segment_size;
+
+	unsigned short		logical_block_size;
+	unsigned short		max_hw_segments;
+	unsigned short		max_phys_segments;
+
+	unsigned char		no_cluster;
+};
+
 struct request_queue
 {
 	/*
@@ -358,7 +373,6 @@ struct request_queue
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
-	unsigned long		bounce_pfn;
 	gfp_t			bounce_gfp;
 
 	/*
@@ -387,14 +401,6 @@ struct request_queue
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 
-	unsigned int		max_sectors;
-	unsigned int		max_hw_sectors;
-	unsigned short		max_phys_segments;
-	unsigned short		max_hw_segments;
-	unsigned short		logical_block_size;
-	unsigned int		max_segment_size;
-
-	unsigned long		seg_boundary_mask;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
 	unsigned int		dma_pad_mask;
@@ -410,6 +416,8 @@ struct request_queue
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 
+	struct queue_limits	limits;
+
 	/*
 	 * sg stuff
 	 */
@@ -991,45 +999,45 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
 
 static inline unsigned long queue_bounce_pfn(struct request_queue *q)
 {
-	return q->bounce_pfn;
+	return q->limits.bounce_pfn;
 }
 
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
-	return q->seg_boundary_mask;
+	return q->limits.seg_boundary_mask;
 }
 
 static inline unsigned int queue_max_sectors(struct request_queue *q)
 {
-	return q->max_sectors;
+	return q->limits.max_sectors;
 }
 
 static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
 {
-	return q->max_hw_sectors;
+	return q->limits.max_hw_sectors;
 }
 
 static inline unsigned short queue_max_hw_segments(struct request_queue *q)
 {
-	return q->max_hw_segments;
+	return q->limits.max_hw_segments;
 }
 
 static inline unsigned short queue_max_phys_segments(struct request_queue *q)
 {
-	return q->max_phys_segments;
+	return q->limits.max_phys_segments;
 }
 
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
 {
-	return q->max_segment_size;
+	return q->limits.max_segment_size;
 }
 
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
 	int retval = 512;
 
-	if (q && q->logical_block_size)
-		retval = q->logical_block_size;
+	if (q && q->limits.logical_block_size)
+		retval = q->limits.logical_block_size;
 
 	return retval;
 }
-- 
cgit v1.2.3-70-g09d2


From c72758f33784e5e2a1a4bb9421ef3e6de8f9fcf3 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 22 May 2009 17:17:53 -0400
Subject: block: Export I/O topology for block devices and partitions

To support devices with physical block sizes bigger than 512 bytes we
need to ensure proper alignment.  This patch adds support for exposing
I/O topology characteristics as devices are stacked.

  logical_block_size is the smallest unit the device can address.

  physical_block_size indicates the smallest I/O the device can write
  without incurring a read-modify-write penalty.

  The io_min parameter is the smallest preferred I/O size reported by
  the device.  In many cases this is the same as the physical block
  size.  However, the io_min parameter can be scaled up when stacking
  (RAID5 chunk size > physical block size).

  The io_opt characteristic indicates the optimal I/O size reported by
  the device.  This is usually the stripe width for arrays.

  The alignment_offset parameter indicates the number of bytes the start
  of the device/partition is offset from the device's natural alignment.
  Partition tools and MD/DM utilities can use this to pad their offsets
  so filesystems start on proper boundaries.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 Documentation/ABI/testing/sysfs-block |  59 +++++++++++
 block/blk-settings.c                  | 186 ++++++++++++++++++++++++++++++++++
 block/blk-sysfs.c                     |  33 ++++++
 block/genhd.c                         |  11 ++
 fs/partitions/check.c                 |  10 ++
 include/linux/blkdev.h                |  47 +++++++++
 include/linux/genhd.h                 |   1 +
 7 files changed, 347 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 44f52a4f590..cbbd3e06994 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -60,3 +60,62 @@ Description:
 		Indicates whether the block layer should automatically
 		generate checksums for write requests bound for
 		devices that support receiving integrity metadata.
+
+What:		/sys/block/<disk>/alignment_offset
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a physical block size that is
+		bigger than the logical block size (for instance a drive
+		with 4KB physical sectors exposing 512-byte logical
+		blocks to the operating system).  This parameter
+		indicates how many bytes the beginning of the device is
+		offset from the disk's natural alignment.
+
+What:		/sys/block/<disk>/<partition>/alignment_offset
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a physical block size that is
+		bigger than the logical block size (for instance a drive
+		with 4KB physical sectors exposing 512-byte logical
+		blocks to the operating system).  This parameter
+		indicates how many bytes the beginning of the partition
+		is offset from the disk's natural alignment.
+
+What:		/sys/block/<disk>/queue/logical_block_size
+Date:		May 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		This is the smallest unit the storage device can
+		address.  It is typically 512 bytes.
+
+What:		/sys/block/<disk>/queue/physical_block_size
+Date:		May 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		This is the smallest unit the storage device can write
+		without resorting to read-modify-write operation.  It is
+		usually the same as the logical block size but may be
+		bigger.  One example is SATA drives with 4KB sectors
+		that expose a 512-byte logical block size to the
+		operating system.
+
+What:		/sys/block/<disk>/queue/minimum_io_size
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report a preferred minimum I/O size,
+		which is the smallest request the device can perform
+		without incurring a read-modify-write penalty.  For disk
+		drives this is often the physical block size.  For RAID
+		arrays it is often the stripe chunk size.
+
+What:		/sys/block/<disk>/queue/optimal_io_size
+Date:		April 2009
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Storage devices may report an optimal I/O size, which is
+		the device's preferred unit of receiving I/O.  This is
+		rarely reported for disk drives.  For RAID devices it is
+		usually the stripe width or the internal block size.
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b0f547cecfb..5649f34adb4 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -309,9 +309,94 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
 void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
 {
 	q->limits.logical_block_size = size;
+
+	if (q->limits.physical_block_size < size)
+		q->limits.physical_block_size = size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
 }
 EXPORT_SYMBOL(blk_queue_logical_block_size);
 
+/**
+ * blk_queue_physical_block_size - set physical block size for the queue
+ * @q:  the request queue for the device
+ * @size:  the physical block size, in bytes
+ *
+ * Description:
+ *   This should be set to the lowest possible sector size that the
+ *   hardware can operate on without reverting to read-modify-write
+ *   operations.
+ */
+void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
+{
+	q->limits.physical_block_size = size;
+
+	if (q->limits.physical_block_size < q->limits.logical_block_size)
+		q->limits.physical_block_size = q->limits.logical_block_size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
+}
+EXPORT_SYMBOL(blk_queue_physical_block_size);
+
+/**
+ * blk_queue_alignment_offset - set physical block alignment offset
+ * @q:	the request queue for the device
+ * @alignment:	alignment offset in bytes
+ *
+ * Description:
+ *   Some devices are naturally misaligned to compensate for things like
+ *   the legacy DOS partition table 63-sector offset.  Low-level drivers
+ *   should call this function for devices whose first sector is not
+ *   naturally aligned.
+ */
+void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
+{
+	q->limits.alignment_offset =
+		offset & (q->limits.physical_block_size - 1);
+	q->limits.misaligned = 0;
+}
+EXPORT_SYMBOL(blk_queue_alignment_offset);
+
+/**
+ * blk_queue_io_min - set minimum request size for the queue
+ * @q:	the request queue for the device
+ * @io_min:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Some devices have an internal block size bigger than the reported
+ *   hardware sector size.  This function can be used to signal the
+ *   smallest I/O the device can perform without incurring a performance
+ *   penalty.
+ */
+void blk_queue_io_min(struct request_queue *q, unsigned int min)
+{
+	q->limits.io_min = min;
+
+	if (q->limits.io_min < q->limits.logical_block_size)
+		q->limits.io_min = q->limits.logical_block_size;
+
+	if (q->limits.io_min < q->limits.physical_block_size)
+		q->limits.io_min = q->limits.physical_block_size;
+}
+EXPORT_SYMBOL(blk_queue_io_min);
+
+/**
+ * blk_queue_io_opt - set optimal request size for the queue
+ * @q:	the request queue for the device
+ * @io_opt:  optimal request size in bytes
+ *
+ * Description:
+ *   Drivers can call this function to set the preferred I/O request
+ *   size for devices that report such a value.
+ */
+void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
+{
+	q->limits.io_opt = opt;
+}
+EXPORT_SYMBOL(blk_queue_io_opt);
+
 /*
  * Returns the minimum that is _not_ zero, unless both are zero.
  */
@@ -357,6 +442,107 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
+/**
+ * blk_stack_limits - adjust queue_limits for stacked devices
+ * @t:	the stacking driver limits (top)
+ * @bdev:  the underlying queue limits (bottom)
+ * @offset:  offset to beginning of data within component device
+ *
+ * Description:
+ *    Merges two queue_limit structs.  Returns 0 if alignment didn't
+ *    change.  Returns -1 if adding the bottom device caused
+ *    misalignment.
+ */
+int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+		     sector_t offset)
+{
+	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
+	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+
+	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
+					    b->seg_boundary_mask);
+
+	t->max_phys_segments = min_not_zero(t->max_phys_segments,
+					    b->max_phys_segments);
+
+	t->max_hw_segments = min_not_zero(t->max_hw_segments,
+					  b->max_hw_segments);
+
+	t->max_segment_size = min_not_zero(t->max_segment_size,
+					   b->max_segment_size);
+
+	t->logical_block_size = max(t->logical_block_size,
+				    b->logical_block_size);
+
+	t->physical_block_size = max(t->physical_block_size,
+				     b->physical_block_size);
+
+	t->io_min = max(t->io_min, b->io_min);
+	t->no_cluster |= b->no_cluster;
+
+	/* Bottom device offset aligned? */
+	if (offset &&
+	    (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+		t->misaligned = 1;
+		return -1;
+	}
+
+	/* If top has no alignment offset, inherit from bottom */
+	if (!t->alignment_offset)
+		t->alignment_offset =
+			b->alignment_offset & (b->physical_block_size - 1);
+
+	/* Top device aligned on logical block boundary? */
+	if (t->alignment_offset & (t->logical_block_size - 1)) {
+		t->misaligned = 1;
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * disk_stack_limits - adjust queue limits for stacked drivers
+ * @t:	MD/DM gendisk (top)
+ * @bdev:  the underlying block device (bottom)
+ * @offset:  offset to beginning of data within component device
+ *
+ * Description:
+ *    Merges the limits for two queues.  Returns 0 if alignment
+ *    didn't change.  Returns -1 if adding the bottom device caused
+ *    misalignment.
+ */
+void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
+		       sector_t offset)
+{
+	struct request_queue *t = disk->queue;
+	struct request_queue *b = bdev_get_queue(bdev);
+
+	offset += get_start_sect(bdev) << 9;
+
+	if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
+		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
+
+		disk_name(disk, 0, top);
+		bdevname(bdev, bottom);
+
+		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
+		       top, bottom);
+	}
+
+	if (!t->queue_lock)
+		WARN_ON_ONCE(1);
+	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(t->queue_lock, flags);
+		if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+			queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
+		spin_unlock_irqrestore(t->queue_lock, flags);
+	}
+}
+EXPORT_SYMBOL(disk_stack_limits);
+
 /**
  * blk_queue_dma_pad - set pad mask
  * @q:     the request queue for the device
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ccdadb8e20..9337e17f911 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -105,6 +105,21 @@ static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page
 	return queue_var_show(queue_logical_block_size(q), page);
 }
 
+static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_physical_block_size(q), page);
+}
+
+static ssize_t queue_io_min_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_io_min(q), page);
+}
+
+static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_io_opt(q), page);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -257,6 +272,21 @@ static struct queue_sysfs_entry queue_logical_block_size_entry = {
 	.show = queue_logical_block_size_show,
 };
 
+static struct queue_sysfs_entry queue_physical_block_size_entry = {
+	.attr = {.name = "physical_block_size", .mode = S_IRUGO },
+	.show = queue_physical_block_size_show,
+};
+
+static struct queue_sysfs_entry queue_io_min_entry = {
+	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
+	.show = queue_io_min_show,
+};
+
+static struct queue_sysfs_entry queue_io_opt_entry = {
+	.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
+	.show = queue_io_opt_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_nonrot_show,
@@ -289,6 +319,9 @@ static struct attribute *default_attrs[] = {
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_logical_block_size_entry.attr,
+	&queue_physical_block_size_entry.attr,
+	&queue_io_min_entry.attr,
+	&queue_io_opt_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
diff --git a/block/genhd.c b/block/genhd.c
index 1a4916e0173..fe7ccc0a618 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -852,11 +852,21 @@ static ssize_t disk_capability_show(struct device *dev,
 	return sprintf(buf, "%x\n", disk->flags);
 }
 
+static ssize_t disk_alignment_offset_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -875,6 +885,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_removable.attr,
 	&dev_attr_ro.attr,
 	&dev_attr_size.attr,
+	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 99e33ef40be..0af36085eb2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -219,6 +219,13 @@ ssize_t part_size_show(struct device *dev,
 	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
 
+ssize_t part_alignment_offset_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+	return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
+}
+
 ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
@@ -272,6 +279,7 @@ ssize_t part_fail_store(struct device *dev,
 static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
@@ -282,6 +290,7 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_partition.attr,
 	&dev_attr_start.attr,
 	&dev_attr_size.attr,
+	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
@@ -383,6 +392,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	pdev = part_to_dev(p);
 
 	p->start_sect = start;
+	p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b7bb6fdba12..5e740a135e7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -314,11 +314,16 @@ struct queue_limits {
 	unsigned int		max_hw_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
+	unsigned int		physical_block_size;
+	unsigned int		alignment_offset;
+	unsigned int		io_min;
+	unsigned int		io_opt;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
 	unsigned short		max_phys_segments;
 
+	unsigned char		misaligned;
 	unsigned char		no_cluster;
 };
 
@@ -911,6 +916,15 @@ extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_alignment_offset(struct request_queue *q,
+				       unsigned int alignment);
+extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
+extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+			    sector_t offset);
+extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
+			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
 extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
@@ -1047,6 +1061,39 @@ static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 
+static inline unsigned int queue_physical_block_size(struct request_queue *q)
+{
+	return q->limits.physical_block_size;
+}
+
+static inline unsigned int queue_io_min(struct request_queue *q)
+{
+	return q->limits.io_min;
+}
+
+static inline unsigned int queue_io_opt(struct request_queue *q)
+{
+	return q->limits.io_opt;
+}
+
+static inline int queue_alignment_offset(struct request_queue *q)
+{
+	if (q && q->limits.misaligned)
+		return -1;
+
+	if (q && q->limits.alignment_offset)
+		return q->limits.alignment_offset;
+
+	return 0;
+}
+
+static inline int queue_sector_alignment_offset(struct request_queue *q,
+						sector_t sector)
+{
+	return ((sector << 9) - q->limits.alignment_offset)
+		& (q->limits.io_min - 1);
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a1a28caed23..149fda264c8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -90,6 +90,7 @@ struct disk_stats {
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
+	sector_t alignment_offset;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
-- 
cgit v1.2.3-70-g09d2


From e220d2dcb944c5c488b6855d15ec66d76900514f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:55 +0200
Subject: perf_counter: Fix dynamic irq_period logging

We call perf_adjust_freq() from perf_counter_task_tick() which
is is called under the rq->lock causing lock recursion.
However, it's no longer required to be called under the
rq->lock, so remove it from under it.

Also, fix up some related comments.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.476197912@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 1 +
 kernel/perf_counter.c        | 3 ++-
 kernel/sched.c               | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2eedae8498d..23ddd29730f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -260,6 +260,7 @@ enum perf_event_type {
 	/*
 	 * struct {
 	 * 	struct perf_event_header	header;
+	 * 	u64				time;
 	 * 	u64				irq_period;
 	 * };
 	 */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c10055416de..2f410ea2cb3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2559,7 +2559,8 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 }
 
 /*
- *
+ * Log irq_period changes so that analyzing tools can re-normalize the
+ * event flow.
  */
 
 static void perf_log_period(struct perf_counter *counter, u64 period)
diff --git a/kernel/sched.c b/kernel/sched.c
index 4c0d58bce6b..ad079f07c9c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4875,9 +4875,10 @@ void scheduler_tick(void)
 	update_rq_clock(rq);
 	update_cpu_load(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
-	perf_counter_task_tick(curr, cpu);
 	spin_unlock(&rq->lock);
 
+	perf_counter_task_tick(curr, cpu);
+
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
 	trigger_load_balance(rq, cpu);
-- 
cgit v1.2.3-70-g09d2


From fccc714b3148ab9741fafc1e90c3876d50df6093 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:28:56 +0200
Subject: perf_counter: Sanitize counter->mutex

s/counter->mutex/counter->child_mutex/ and make sure its only
used to protect child_list.

The usage in __perf_counter_exit_task() doesn't appear to be
problematic since ctx->mutex also covers anything related to fd
tear-down.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.533186528@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  7 +++----
 kernel/perf_counter.c        | 47 ++++++++++++++++++--------------------------
 2 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 23ddd29730f..4ab8050eb9e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -452,9 +452,6 @@ struct perf_counter {
 	struct perf_counter_context	*ctx;
 	struct file			*filp;
 
-	struct perf_counter		*parent;
-	struct list_head		child_list;
-
 	/*
 	 * These accumulate total time (in nanoseconds) that children
 	 * counters have been enabled and running, respectively.
@@ -465,7 +462,9 @@ struct perf_counter {
 	/*
 	 * Protect attach/detach and child_list:
 	 */
-	struct mutex			mutex;
+	struct mutex			child_mutex;
+	struct list_head		child_list;
+	struct perf_counter		*parent;
 
 	int				oncpu;
 	int				cpu;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2f410ea2cb3..679c3b5bb7d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -111,6 +111,10 @@ static void put_ctx(struct perf_counter_context *ctx)
 	}
 }
 
+/*
+ * Add a counter from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
@@ -136,7 +140,7 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 /*
  * Remove a counter from the lists for its context.
- * Must be called with counter->mutex and ctx->mutex held.
+ * Must be called with ctx->mutex and ctx->lock held.
  */
 static void
 list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
@@ -276,7 +280,7 @@ static void __perf_counter_remove_from_context(void *info)
 /*
  * Remove the counter from a task's (or a CPU's) list of counters.
  *
- * Must be called with counter->mutex and ctx->mutex held.
+ * Must be called with ctx->mutex held.
  *
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
@@ -1407,11 +1411,7 @@ static int perf_release(struct inode *inode, struct file *file)
 	file->private_data = NULL;
 
 	mutex_lock(&ctx->mutex);
-	mutex_lock(&counter->mutex);
-
 	perf_counter_remove_from_context(counter);
-
-	mutex_unlock(&counter->mutex);
 	mutex_unlock(&ctx->mutex);
 
 	free_counter(counter);
@@ -1437,7 +1437,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->state == PERF_COUNTER_STATE_ERROR)
 		return 0;
 
-	mutex_lock(&counter->mutex);
+	mutex_lock(&counter->child_mutex);
 	values[0] = perf_counter_read(counter);
 	n = 1;
 	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -1446,7 +1446,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
-	mutex_unlock(&counter->mutex);
+	mutex_unlock(&counter->child_mutex);
 
 	if (count < n * sizeof(u64))
 		return -EINVAL;
@@ -1510,11 +1510,11 @@ static void perf_counter_for_each_child(struct perf_counter *counter,
 {
 	struct perf_counter *child;
 
-	mutex_lock(&counter->mutex);
+	mutex_lock(&counter->child_mutex);
 	func(counter);
 	list_for_each_entry(child, &counter->child_list, child_list)
 		func(child);
-	mutex_unlock(&counter->mutex);
+	mutex_unlock(&counter->child_mutex);
 }
 
 static void perf_counter_for_each(struct perf_counter *counter,
@@ -1522,11 +1522,11 @@ static void perf_counter_for_each(struct perf_counter *counter,
 {
 	struct perf_counter *child;
 
-	mutex_lock(&counter->mutex);
+	mutex_lock(&counter->child_mutex);
 	perf_counter_for_each_sibling(counter, func);
 	list_for_each_entry(child, &counter->child_list, child_list)
 		perf_counter_for_each_sibling(child, func);
-	mutex_unlock(&counter->mutex);
+	mutex_unlock(&counter->child_mutex);
 }
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -3106,7 +3106,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	if (!group_leader)
 		group_leader = counter;
 
-	mutex_init(&counter->mutex);
+	mutex_init(&counter->child_mutex);
+	INIT_LIST_HEAD(&counter->child_list);
+
 	INIT_LIST_HEAD(&counter->list_entry);
 	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
@@ -3114,8 +3116,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	mutex_init(&counter->mmap_mutex);
 
-	INIT_LIST_HEAD(&counter->child_list);
-
 	counter->cpu			= cpu;
 	counter->hw_event		= *hw_event;
 	counter->group_leader		= group_leader;
@@ -3346,10 +3346,9 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Link this into the parent counter's child list
 	 */
-	mutex_lock(&parent_counter->mutex);
+	mutex_lock(&parent_counter->child_mutex);
 	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
-
-	mutex_unlock(&parent_counter->mutex);
+	mutex_unlock(&parent_counter->child_mutex);
 
 	return child_counter;
 }
@@ -3396,9 +3395,9 @@ static void sync_child_counter(struct perf_counter *child_counter,
 	/*
 	 * Remove this counter from the parent's list
 	 */
-	mutex_lock(&parent_counter->mutex);
+	mutex_lock(&parent_counter->child_mutex);
 	list_del_init(&child_counter->child_list);
-	mutex_unlock(&parent_counter->mutex);
+	mutex_unlock(&parent_counter->child_mutex);
 
 	/*
 	 * Release the parent counter, if this was the last
@@ -3414,17 +3413,9 @@ __perf_counter_exit_task(struct task_struct *child,
 {
 	struct perf_counter *parent_counter;
 
-	/*
-	 * Protect against concurrent operations on child_counter
-	 * due its fd getting closed, etc.
-	 */
-	mutex_lock(&child_counter->mutex);
-
 	update_counter_times(child_counter);
 	list_del_counter(child_counter, child_ctx);
 
-	mutex_unlock(&child_counter->mutex);
-
 	parent_counter = child_counter->parent;
 	/*
 	 * It can happen that parent exits first, and has counters
-- 
cgit v1.2.3-70-g09d2


From 10eb0f013c63c71c82ede77945a5f390c10cfda6 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:38 -0500
Subject: [SCSI] iscsi: pass ep connect shost

When we create the tcp/ip connection by calling ep_connect, we currently
just go by the routing table info.

I think there are two problems with this.

1. Some drivers do not have access to a routing table. Some drivers like
qla4xxx do not even know about other ports.

2. If you have two initiator ports on the same subnet, the user may have
set things up so that session1 was supposed to be run through port1. and
session2 was supposed to be run through port2. It looks like we could
end with both sessions going through one of the ports.

Fixes for cxgb3i from Karen Xie.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c |  3 +-
 drivers/scsi/cxgb3i/cxgb3i.h             |  1 -
 drivers/scsi/cxgb3i/cxgb3i_iscsi.c       | 25 +++++++++++++---
 drivers/scsi/cxgb3i/cxgb3i_offload.c     | 23 ++++++++------
 drivers/scsi/cxgb3i/cxgb3i_offload.h     |  3 +-
 drivers/scsi/scsi_transport_iscsi.c      | 51 +++++++++++++++++++++++++-------
 include/scsi/iscsi_if.h                  |  7 ++++-
 include/scsi/scsi_transport_iscsi.h      |  3 +-
 8 files changed, 87 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 75223f50de5..ffbe0c76bc1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -517,7 +517,8 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
 }
 
 static struct iscsi_endpoint *
-iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking)
+iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
+		      int non_blocking)
 {
 	int err;
 	struct iser_conn *ib_conn;
diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h
index 59b0958d2d1..e3133b58e59 100644
--- a/drivers/scsi/cxgb3i/cxgb3i.h
+++ b/drivers/scsi/cxgb3i/cxgb3i.h
@@ -144,7 +144,6 @@ struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *);
 void cxgb3i_adapter_open(struct t3cdev *);
 void cxgb3i_adapter_close(struct t3cdev *);
 
-struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *);
 struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *,
 				       struct net_device *);
 void cxgb3i_hba_host_remove(struct cxgb3i_hba *);
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index 9212400b9b1..04a43744aed 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -178,7 +178,7 @@ void cxgb3i_adapter_close(struct t3cdev *t3dev)
  * cxgb3i_hba_find_by_netdev - find the cxgb3i_hba structure via net_device
  * @t3dev: t3cdev adapter
  */
-struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
+static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
 {
 	struct cxgb3i_adapter *snic;
 	int i;
@@ -261,20 +261,27 @@ void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba)
 
 /**
  * cxgb3i_ep_connect - establish TCP connection to target portal
+ * @shost:		scsi host to use
  * @dst_addr:		target IP address
  * @non_blocking:	blocking or non-blocking call
  *
  * Initiates a TCP/IP connection to the dst_addr
  */
-static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
+static struct iscsi_endpoint *cxgb3i_ep_connect(struct Scsi_Host *shost,
+						struct sockaddr *dst_addr,
 						int non_blocking)
 {
 	struct iscsi_endpoint *ep;
 	struct cxgb3i_endpoint *cep;
-	struct cxgb3i_hba *hba;
+	struct cxgb3i_hba *hba = NULL;
 	struct s3_conn *c3cn = NULL;
 	int err = 0;
 
+	if (shost)
+		hba = iscsi_host_priv(shost);
+
+	cxgb3i_api_debug("shost 0x%p, hba 0x%p.\n", shost, hba);
+
 	c3cn = cxgb3i_c3cn_create();
 	if (!c3cn) {
 		cxgb3i_log_info("ep connect OOM.\n");
@@ -282,17 +289,27 @@ static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr,
 		goto release_conn;
 	}
 
-	err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr);
+	err = cxgb3i_c3cn_connect(hba ? hba->ndev : NULL, c3cn,
+				 (struct sockaddr_in *)dst_addr);
 	if (err < 0) {
 		cxgb3i_log_info("ep connect failed.\n");
 		goto release_conn;
 	}
+
 	hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev);
 	if (!hba) {
 		err = -ENOSPC;
 		cxgb3i_log_info("NOT going through cxgbi device.\n");
 		goto release_conn;
 	}
+
+	if (shost && hba != iscsi_host_priv(shost)) {
+		err = -ENOSPC;
+		cxgb3i_log_info("Could not connect through request host%u\n",
+				shost->host_no);
+		goto release_conn;
+	}
+
 	if (c3cn_is_closing(c3cn)) {
 		err = -ENOSPC;
 		cxgb3i_log_info("ep connect unable to connect.\n");
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index e11c9c180f3..c1d5be4adf9 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -1479,12 +1479,13 @@ static struct net_device *cxgb3_egress_dev(struct net_device *root_dev,
 	return NULL;
 }
 
-static struct rtable *find_route(__be32 saddr, __be32 daddr,
+static struct rtable *find_route(struct net_device *dev,
+				 __be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
 {
 	struct rtable *rt;
 	struct flowi fl = {
-		.oif = 0,
+		.oif = dev ? dev->ifindex : 0,
 		.nl_u = {
 			 .ip4_u = {
 				   .daddr = daddr,
@@ -1573,36 +1574,40 @@ out_err:
  *
  * return 0 if active open request is sent, < 0 otherwise.
  */
-int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin)
+int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn,
+			struct sockaddr_in *usin)
 {
 	struct rtable *rt;
-	struct net_device *dev;
 	struct cxgb3i_sdev_data *cdata;
 	struct t3cdev *cdev;
 	__be32 sipv4;
 	int err;
 
+	c3cn_conn_debug("c3cn 0x%p, dev 0x%p.\n", c3cn, dev);
+
 	if (usin->sin_family != AF_INET)
 		return -EAFNOSUPPORT;
 
 	c3cn->daddr.sin_port = usin->sin_port;
 	c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr;
 
-	rt = find_route(c3cn->saddr.sin_addr.s_addr,
+	rt = find_route(dev, c3cn->saddr.sin_addr.s_addr,
 			c3cn->daddr.sin_addr.s_addr,
 			c3cn->saddr.sin_port,
 			c3cn->daddr.sin_port);
 	if (rt == NULL) {
-		c3cn_conn_debug("NO route to 0x%x, port %u.\n",
+		c3cn_conn_debug("NO route to 0x%x, port %u, dev %s.\n",
 				c3cn->daddr.sin_addr.s_addr,
-				ntohs(c3cn->daddr.sin_port));
+				ntohs(c3cn->daddr.sin_port),
+				dev ? dev->name : "any");
 		return -ENETUNREACH;
 	}
 
 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
-		c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n",
+		c3cn_conn_debug("multi-cast route to 0x%x, port %u, dev %s.\n",
 				c3cn->daddr.sin_addr.s_addr,
-				ntohs(c3cn->daddr.sin_port));
+				ntohs(c3cn->daddr.sin_port),
+				dev ? dev->name : "any");
 		ip_rt_put(rt);
 		return -ENETUNREACH;
 	}
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h
index ebfca960c0a..6a1d86b1faf 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.h
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h
@@ -169,7 +169,8 @@ void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *);
 void cxgb3i_sdev_remove(struct t3cdev *);
 
 struct s3_conn *cxgb3i_c3cn_create(void);
-int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *);
+int cxgb3i_c3cn_connect(struct net_device *, struct s3_conn *,
+			struct sockaddr_in *);
 void cxgb3i_c3cn_rx_credits(struct s3_conn *, int);
 int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *);
 void cxgb3i_c3cn_release(struct s3_conn *);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 0a2ce7b6325..d69a53aa406 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1268,26 +1268,54 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 	return err;
 }
 
+static int iscsi_if_ep_connect(struct iscsi_transport *transport,
+			       struct iscsi_uevent *ev, int msg_type)
+{
+	struct iscsi_endpoint *ep;
+	struct sockaddr *dst_addr;
+	struct Scsi_Host *shost = NULL;
+	int non_blocking, err = 0;
+
+	if (!transport->ep_connect)
+		return -EINVAL;
+
+	if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) {
+		shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no);
+		if (!shost) {
+			printk(KERN_ERR "ep connect failed. Could not find "
+			       "host no %u\n",
+			       ev->u.ep_connect_through_host.host_no);
+			return -ENODEV;
+		}
+		non_blocking = ev->u.ep_connect_through_host.non_blocking;
+	} else
+		non_blocking = ev->u.ep_connect.non_blocking;
+
+	dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
+	ep = transport->ep_connect(shost, dst_addr, non_blocking);
+	if (IS_ERR(ep)) {
+		err = PTR_ERR(ep);
+		goto release_host;
+	}
+
+	ev->r.ep_connect_ret.handle = ep->id;
+release_host:
+	if (shost)
+		scsi_host_put(shost);
+	return err;
+}
+
 static int
 iscsi_if_transport_ep(struct iscsi_transport *transport,
 		      struct iscsi_uevent *ev, int msg_type)
 {
 	struct iscsi_endpoint *ep;
-	struct sockaddr *dst_addr;
 	int rc = 0;
 
 	switch (msg_type) {
+	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
 	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
-		if (!transport->ep_connect)
-			return -EINVAL;
-
-		dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
-		ep = transport->ep_connect(dst_addr,
-					   ev->u.ep_connect.non_blocking);
-		if (IS_ERR(ep))
-			return PTR_ERR(ep);
-
-		ev->r.ep_connect_ret.handle = ep->id;
+		rc = iscsi_if_ep_connect(transport, ev, msg_type);
 		break;
 	case ISCSI_UEVENT_TRANSPORT_EP_POLL:
 		if (!transport->ep_poll)
@@ -1469,6 +1497,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT:
 	case ISCSI_UEVENT_TRANSPORT_EP_POLL:
 	case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
+	case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST:
 		err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type);
 		break;
 	case ISCSI_UEVENT_TGT_DSCVR:
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index d0ed5226f8c..2c1a4af9eaf 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -50,7 +50,8 @@ enum iscsi_uevent_e {
 	ISCSI_UEVENT_TGT_DSCVR		= UEVENT_BASE + 15,
 	ISCSI_UEVENT_SET_HOST_PARAM	= UEVENT_BASE + 16,
 	ISCSI_UEVENT_UNBIND_SESSION	= UEVENT_BASE + 17,
-	ISCSI_UEVENT_CREATE_BOUND_SESSION	= UEVENT_BASE + 18,
+	ISCSI_UEVENT_CREATE_BOUND_SESSION		= UEVENT_BASE + 18,
+	ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST	= UEVENT_BASE + 19,
 
 	/* up events */
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
@@ -131,6 +132,10 @@ struct iscsi_uevent {
 		struct msg_transport_connect {
 			uint32_t	non_blocking;
 		} ep_connect;
+		struct msg_transport_connect_through_host {
+			uint32_t	host_no;
+			uint32_t	non_blocking;
+		} ep_connect_through_host;
 		struct msg_transport_poll {
 			uint64_t	ep_handle;
 			uint32_t	timeout_ms;
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 457588e1119..8cb7a31d996 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -126,7 +126,8 @@ struct iscsi_transport {
 			       int *index, int *age);
 
 	void (*session_recovery_timedout) (struct iscsi_cls_session *session);
-	struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr,
+	struct iscsi_endpoint *(*ep_connect) (struct Scsi_Host *shost,
+					      struct sockaddr *dst_addr,
 					      int non_blocking);
 	int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms);
 	void (*ep_disconnect) (struct iscsi_endpoint *ep);
-- 
cgit v1.2.3-70-g09d2


From 8f9256cea10ca43ac80f66e176643eb41db34244 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:41 -0500
Subject: [SCSI] libiscsi: export iscsi_itt_to_task for bnx2i

bnx2i needs to be able to look up mgmt task like login and nop, because
it does some processing of them on the completion path. This exports
iscsi_itt_to_task so it can look up the task.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libiscsi.c | 3 ++-
 include/scsi/libiscsi.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b4aaf2e5fe7..a6e6eef04fe 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -828,7 +828,7 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
  *
  * The session lock must be held.
  */
-static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
+struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 {
 	struct iscsi_session *session = conn->session;
 	int i;
@@ -845,6 +845,7 @@ static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 
 	return session->cmds[i];
 }
+EXPORT_SYMBOL_GPL(iscsi_itt_to_task);
 
 /**
  * __iscsi_complete_pdu - complete pdu
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 0289f5745fb..88d33a46efa 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -406,6 +406,7 @@ extern int __iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
 				char *, int);
 extern int iscsi_verify_itt(struct iscsi_conn *, itt_t);
 extern struct iscsi_task *iscsi_itt_to_ctask(struct iscsi_conn *, itt_t);
+extern struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *, itt_t);
 extern void iscsi_requeue_task(struct iscsi_task *task);
 extern void iscsi_put_task(struct iscsi_task *task);
 extern void __iscsi_get_task(struct iscsi_task *task);
-- 
cgit v1.2.3-70-g09d2


From 3bbaaad95fd38dedb7c66a601f14825b4e0c5a59 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:46 -0500
Subject: [SCSI] libiscsi: handle cleanup task races

bnx2i needs to send a hardware specific cleanup command if
a command has not completed normally (iscsi/scsi response from
target), and the session is still ok (this is the case when we
send a TMF to stop the command).

At this time it will need to drop the session lock. The problem
with the current code is that fail_all_commands assumes we
will hold the lock the entire time, so it uses list_for_each_entry_safe.
If while bnx2i drops the session lock multiple cmds complete then
list_for_each_entry_safe will not handle this correctly.

This patch removes the running lists and just has us loop over
the cmds array (in later patches we will then replace that
array with a block tag map at the session level). It also fixes
up the completion path so that if the TMF code and the normal recv
path were completing the same command then they both do not try
to do release the refcount taken when the task is queued.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libiscsi.c | 225 +++++++++++++++++++++++++-----------------------
 include/scsi/libiscsi.h |   5 +-
 2 files changed, 118 insertions(+), 112 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c648bd328a2..a9d7e520e55 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -109,7 +109,7 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 		 * if the window closed with IO queued, then kick the
 		 * xmit thread
 		 */
-		if (!list_empty(&session->leadconn->xmitqueue) ||
+		if (!list_empty(&session->leadconn->cmdqueue) ||
 		    !list_empty(&session->leadconn->mgmtqueue)) {
 			if (!(session->tt->caps & CAP_DATA_PATH_OFFLOAD))
 				iscsi_conn_queue_work(session->leadconn);
@@ -366,7 +366,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 		return -EIO;
 
 	task->state = ISCSI_TASK_RUNNING;
-	list_move_tail(&task->running, &conn->run_list);
 
 	conn->scsicmd_pdus_cnt++;
 	ISCSI_DBG_SESSION(session, "iscsi prep [%s cid %d sc %p cdb 0x%x "
@@ -382,26 +381,23 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 }
 
 /**
- * iscsi_complete_command - finish a task
+ * iscsi_free_task - free a task
  * @task: iscsi cmd task
  *
  * Must be called with session lock.
  * This function returns the scsi command to scsi-ml or cleans
  * up mgmt tasks then returns the task to the pool.
  */
-static void iscsi_complete_command(struct iscsi_task *task)
+static void iscsi_free_task(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
 	struct iscsi_session *session = conn->session;
 	struct scsi_cmnd *sc = task->sc;
 
 	session->tt->cleanup_task(task);
-	list_del_init(&task->running);
-	task->state = ISCSI_TASK_COMPLETED;
+	task->state = ISCSI_TASK_FREE;
 	task->sc = NULL;
 
-	if (conn->task == task)
-		conn->task = NULL;
 	/*
 	 * login task is preallocated so do not free
 	 */
@@ -410,9 +406,6 @@ static void iscsi_complete_command(struct iscsi_task *task)
 
 	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
 
-	if (conn->ping_task == task)
-		conn->ping_task = NULL;
-
 	if (sc) {
 		task->sc = NULL;
 		/* SCSI eh reuses commands to verify us */
@@ -435,7 +428,7 @@ EXPORT_SYMBOL_GPL(__iscsi_get_task);
 static void __iscsi_put_task(struct iscsi_task *task)
 {
 	if (atomic_dec_and_test(&task->refcount))
-		iscsi_complete_command(task);
+		iscsi_free_task(task);
 }
 
 void iscsi_put_task(struct iscsi_task *task)
@@ -448,14 +441,50 @@ void iscsi_put_task(struct iscsi_task *task)
 }
 EXPORT_SYMBOL_GPL(iscsi_put_task);
 
+/**
+ * iscsi_complete_task - finish a task
+ * @task: iscsi cmd task
+ *
+ * Must be called with session lock.
+ */
+static void iscsi_complete_task(struct iscsi_task *task)
+{
+	struct iscsi_conn *conn = task->conn;
+
+	if (task->state == ISCSI_TASK_COMPLETED)
+		return;
+	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
+
+	task->state = ISCSI_TASK_COMPLETED;
+
+	if (!list_empty(&task->running))
+		list_del_init(&task->running);
+
+	if (conn->task == task)
+		conn->task = NULL;
+
+	if (conn->ping_task == task)
+		conn->ping_task = NULL;
+
+	/* release get from queueing */
+	__iscsi_put_task(task);
+}
+
 /*
- * session lock must be held
+ * session lock must be held and if not called for a task that is
+ * still pending or from the xmit thread, then xmit thread must
+ * be suspended.
  */
-static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
-			 int err)
+static void fail_scsi_task(struct iscsi_task *task, int err)
 {
+	struct iscsi_conn *conn = task->conn;
 	struct scsi_cmnd *sc;
 
+	/*
+	 * if a command completes and we get a successful tmf response
+	 * we will hit this because the scsi eh abort code does not take
+	 * a ref to the task.
+	 */
 	sc = task->sc;
 	if (!sc)
 		return;
@@ -475,10 +504,7 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
 		scsi_in(sc)->resid = scsi_in(sc)->length;
 	}
 
-	if (conn->task == task)
-		conn->task = NULL;
-	/* release ref from queuecommand */
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 }
 
 static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
@@ -518,7 +544,6 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
 		session->state = ISCSI_STATE_LOGGING_OUT;
 
 	task->state = ISCSI_TASK_RUNNING;
-	list_move_tail(&task->running, &conn->mgmt_run_list);
 	ISCSI_DBG_SESSION(session, "mgmtpdu [op 0x%x hdr->itt 0x%x "
 			  "datalen %d]\n", hdr->opcode & ISCSI_OPCODE_MASK,
 			  hdr->itt, task->data_count);
@@ -564,6 +589,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	atomic_set(&task->refcount, 1);
 	task->conn = conn;
 	task->sc = NULL;
+	INIT_LIST_HEAD(&task->running);
+	task->state = ISCSI_TASK_PENDING;
 
 	if (data_size) {
 		memcpy(task->data, data, data_size);
@@ -575,7 +602,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
 			iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
 					 "pdu for mgmt task.\n");
-			goto requeue_task;
+			goto free_task;
 		}
 	}
 
@@ -591,30 +618,22 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 						   task->conn->session->age);
 	}
 
-	INIT_LIST_HEAD(&task->running);
-	list_add_tail(&task->running, &conn->mgmtqueue);
-
 	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
 		if (iscsi_prep_mgmt_task(conn, task))
 			goto free_task;
 
 		if (session->tt->xmit_task(task))
 			goto free_task;
-
-	} else
+	} else {
+		list_add_tail(&task->running, &conn->mgmtqueue);
 		iscsi_conn_queue_work(conn);
+	}
 
 	return task;
 
 free_task:
 	__iscsi_put_task(task);
 	return NULL;
-
-requeue_task:
-	if (task != conn->login_task)
-		__kfifo_put(session->cmdpool.queue, (void*)&task,
-			    sizeof(void*));
-	return NULL;
 }
 
 int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
@@ -709,11 +728,10 @@ invalid_datalen:
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 out:
-	ISCSI_DBG_SESSION(session, "done [sc %p res %d itt 0x%x]\n",
+	ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 }
 
 /**
@@ -747,8 +765,11 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 
+	ISCSI_DBG_SESSION(conn->session, "data in with status done "
+			  "[sc %p res %d itt 0x%x]\n",
+			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -969,7 +990,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		}
 
 		iscsi_tmf_rsp(conn, hdr);
-		__iscsi_put_task(task);
+		iscsi_complete_task(task);
 		break;
 	case ISCSI_OP_NOOP_IN:
 		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -987,7 +1008,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			goto recv_pdu;
 
 		mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
-		__iscsi_put_task(task);
+		iscsi_complete_task(task);
 		break;
 	default:
 		rc = ISCSI_ERR_BAD_OPCODE;
@@ -999,7 +1020,7 @@ out:
 recv_pdu:
 	if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
 		rc = ISCSI_ERR_CONN_FAILED;
-	__iscsi_put_task(task);
+	iscsi_complete_task(task);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
@@ -1176,7 +1197,12 @@ void iscsi_requeue_task(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
 
-	list_move_tail(&task->running, &conn->requeue);
+	/*
+	 * this may be on the requeue list already if the xmit_task callout
+	 * is handling the r2ts while we are adding new ones
+	 */
+	if (list_empty(&task->running))
+		list_add_tail(&task->running, &conn->requeue);
 	iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1216,6 +1242,7 @@ check_mgmt:
 	while (!list_empty(&conn->mgmtqueue)) {
 		conn->task = list_entry(conn->mgmtqueue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		if (iscsi_prep_mgmt_task(conn, conn->task)) {
 			__iscsi_put_task(conn->task);
 			conn->task = NULL;
@@ -1227,23 +1254,26 @@ check_mgmt:
 	}
 
 	/* process pending command queue */
-	while (!list_empty(&conn->xmitqueue)) {
+	while (!list_empty(&conn->cmdqueue)) {
 		if (conn->tmf_state == TMF_QUEUED)
 			break;
 
-		conn->task = list_entry(conn->xmitqueue.next,
+		conn->task = list_entry(conn->cmdqueue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-			fail_command(conn, conn->task, DID_IMM_RETRY << 16);
+			fail_scsi_task(conn->task, DID_IMM_RETRY << 16);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
 		if (rc) {
 			if (rc == -ENOMEM) {
+				list_add_tail(&conn->task->running,
+					      &conn->cmdqueue);
 				conn->task = NULL;
 				goto again;
 			} else
-				fail_command(conn, conn->task, DID_ABORT << 16);
+				fail_scsi_task(conn->task, DID_ABORT << 16);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1270,8 +1300,8 @@ check_mgmt:
 
 		conn->task = list_entry(conn->requeue.next,
 					 struct iscsi_task, running);
+		list_del_init(&conn->task->running);
 		conn->task->state = ISCSI_TASK_RUNNING;
-		list_move_tail(conn->requeue.next, &conn->run_list);
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto again;
@@ -1412,7 +1442,6 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		reason = FAILURE_OOM;
 		goto reject;
 	}
-	list_add_tail(&task->running, &conn->xmitqueue);
 
 	if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
 		reason = iscsi_prep_scsi_cmd_pdu(task);
@@ -1429,8 +1458,10 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			reason = FAILURE_SESSION_NOT_READY;
 			goto prepd_reject;
 		}
-	} else
+	} else {
+		list_add_tail(&task->running, &conn->cmdqueue);
 		iscsi_conn_queue_work(conn);
+	}
 
 	session->queued_cmdsn++;
 	spin_unlock(&session->lock);
@@ -1439,7 +1470,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 prepd_reject:
 	sc->scsi_done = NULL;
-	iscsi_complete_command(task);
+	iscsi_complete_task(task);
 reject:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
@@ -1449,7 +1480,7 @@ reject:
 
 prepd_fault:
 	sc->scsi_done = NULL;
-	iscsi_complete_command(task);
+	iscsi_complete_task(task);
 fault:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
@@ -1618,44 +1649,24 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
  * Fail commands. session lock held and recv side suspended and xmit
  * thread flushed
  */
-static void fail_all_commands(struct iscsi_conn *conn, unsigned lun,
-			      int error)
+static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
+			    int error)
 {
-	struct iscsi_task *task, *tmp;
-
-	if (conn->task) {
-		if (lun == -1 ||
-		    (conn->task->sc && conn->task->sc->device->lun == lun))
-			conn->task = NULL;
-	}
+	struct iscsi_task *task;
+	int i;
 
-	/* flush pending */
-	list_for_each_entry_safe(task, tmp, &conn->xmitqueue, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					  "failing pending sc %p itt 0x%x\n",
-					  task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
-	}
+	for (i = 0; i < conn->session->cmds_max; i++) {
+		task = conn->session->cmds[i];
+		if (!task->sc || task->state == ISCSI_TASK_FREE)
+			continue;
 
-	list_for_each_entry_safe(task, tmp, &conn->requeue, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					  "failing requeued sc %p itt 0x%x\n",
-					  task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
-	}
+		if (lun != -1 && lun != task->sc->device->lun)
+			continue;
 
-	/* fail all other running */
-	list_for_each_entry_safe(task, tmp, &conn->run_list, running) {
-		if (lun == task->sc->device->lun || lun == -1) {
-			ISCSI_DBG_SESSION(conn->session,
-					 "failing in progress sc %p itt 0x%x\n",
-					 task->sc, task->itt);
-			fail_command(conn, task, error << 16);
-		}
+		ISCSI_DBG_SESSION(conn->session,
+				  "failing sc %p itt 0x%x state %d\n",
+				  task->sc, task->itt, task->state);
+		fail_scsi_task(task, error << 16);
 	}
 }
 
@@ -1859,7 +1870,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	}
 
 	if (task->state == ISCSI_TASK_PENDING) {
-		fail_command(conn, task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT << 16);
 		goto success;
 	}
 
@@ -1890,7 +1901,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 		 * then sent more data for the cmd.
 		 */
 		spin_lock(&session->lock);
-		fail_command(conn, task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT << 16);
 		conn->tmf_state = TMF_INITIAL;
 		spin_unlock(&session->lock);
 		iscsi_start_tx(conn);
@@ -1997,7 +2008,7 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc)
 	iscsi_suspend_tx(conn);
 
 	spin_lock_bh(&session->lock);
-	fail_all_commands(conn, sc->device->lun, DID_ERROR);
+	fail_scsi_tasks(conn, sc->device->lun, DID_ERROR);
 	conn->tmf_state = TMF_INITIAL;
 	spin_unlock_bh(&session->lock);
 
@@ -2304,6 +2315,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 		if (cmd_task_size)
 			task->dd_data = &task[1];
 		task->itt = cmd_i;
+		task->state = ISCSI_TASK_FREE;
 		INIT_LIST_HEAD(&task->running);
 	}
 
@@ -2390,10 +2402,8 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	conn->transport_timer.data = (unsigned long)conn;
 	conn->transport_timer.function = iscsi_check_transport_timeouts;
 
-	INIT_LIST_HEAD(&conn->run_list);
-	INIT_LIST_HEAD(&conn->mgmt_run_list);
 	INIT_LIST_HEAD(&conn->mgmtqueue);
-	INIT_LIST_HEAD(&conn->xmitqueue);
+	INIT_LIST_HEAD(&conn->cmdqueue);
 	INIT_LIST_HEAD(&conn->requeue);
 	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
@@ -2561,27 +2571,24 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
 EXPORT_SYMBOL_GPL(iscsi_conn_start);
 
 static void
-flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn)
+fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 {
-	struct iscsi_task *task, *tmp;
+	struct iscsi_task *task;
+	int i;
 
-	/* handle pending */
-	list_for_each_entry_safe(task, tmp, &conn->mgmtqueue, running) {
-		ISCSI_DBG_SESSION(session, "flushing pending mgmt task "
-				  "itt 0x%x\n", task->itt);
-		/* release ref from prep task */
-		__iscsi_put_task(task);
-	}
+	for (i = 0; i < conn->session->cmds_max; i++) {
+		task = conn->session->cmds[i];
+		if (task->sc)
+			continue;
 
-	/* handle running */
-	list_for_each_entry_safe(task, tmp, &conn->mgmt_run_list, running) {
-		ISCSI_DBG_SESSION(session, "flushing running mgmt task "
-				  "itt 0x%x\n", task->itt);
-		/* release ref from prep task */
-		__iscsi_put_task(task);
-	}
+		if (task->state == ISCSI_TASK_FREE)
+			continue;
 
-	conn->task = NULL;
+		ISCSI_DBG_SESSION(conn->session,
+				  "failing mgmt itt 0x%x state %d\n",
+				  task->itt, task->state);
+		iscsi_complete_task(task);
+	}
 }
 
 static void iscsi_start_session_recovery(struct iscsi_session *session,
@@ -2638,10 +2645,10 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 	 */
 	spin_lock_bh(&session->lock);
 	if (flag == STOP_CONN_RECOVER)
-		fail_all_commands(conn, -1, DID_TRANSPORT_DISRUPTED);
+		fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
 	else
-		fail_all_commands(conn, -1, DID_ERROR);
-	flush_control_queues(session, conn);
+		fail_scsi_tasks(conn, -1, DID_ERROR);
+	fail_mgmt_tasks(session, conn);
 	spin_unlock_bh(&session->lock);
 	mutex_unlock(&session->eh_mutex);
 }
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 88d33a46efa..facae71183a 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -82,6 +82,7 @@ enum {
 
 
 enum {
+	ISCSI_TASK_FREE,
 	ISCSI_TASK_COMPLETED,
 	ISCSI_TASK_PENDING,
 	ISCSI_TASK_RUNNING,
@@ -181,9 +182,7 @@ struct iscsi_conn {
 
 	/* xmit */
 	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
-	struct list_head	mgmt_run_list;	/* list of control tasks */
-	struct list_head	xmitqueue;	/* data-path cmd queue */
-	struct list_head	run_list;	/* list of cmds in progress */
+	struct list_head	cmdqueue;	/* data-path cmd queue */
 	struct list_head	requeue;	/* tasks needing another run */
 	struct work_struct	xmitwork;	/* per-conn. xmit workqueue */
 	unsigned long		suspend_tx;	/* suspend Tx */
-- 
cgit v1.2.3-70-g09d2


From b3cd5050bf8eb32ceecee129cac7c59e6f1668c4 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 13 May 2009 17:57:49 -0500
Subject: [SCSI] libiscsi: add task aborted state

If a task did not complete normally due to a TMF, libiscsi will
now complete the task with the state ISCSI_TASK_ABRT_TMF. Drivers
like bnx2i that need to free resources if a command did not complete normally
can then check the task state. If a driver does not need to send
a special command if we have dropped the session then they can check
for ISCSI_TASK_ABRT_SESS_RECOV.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c |  7 ++--
 drivers/scsi/libiscsi.c                  | 60 +++++++++++++++++++-------------
 drivers/scsi/libiscsi_tcp.c              |  4 +--
 include/scsi/libiscsi.h                  |  2 ++
 4 files changed, 41 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index ffbe0c76bc1..0ba6ec87629 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -257,11 +257,8 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
 
-	/*
-	 * mgmt tasks do not need special cleanup and we do not
-	 * allocate anything in the init task callout
-	 */
-	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+	/* mgmt tasks do not need special cleanup */
+	if (!task->sc)
 		return;
 
 	if (iser_task->status == ISER_TASK_STATUS_STARTED) {
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index dafa054537f..b00be6c3efc 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -443,18 +443,20 @@ EXPORT_SYMBOL_GPL(iscsi_put_task);
 /**
  * iscsi_complete_task - finish a task
  * @task: iscsi cmd task
+ * @state: state to complete task with
  *
  * Must be called with session lock.
  */
-static void iscsi_complete_task(struct iscsi_task *task)
+static void iscsi_complete_task(struct iscsi_task *task, int state)
 {
 	struct iscsi_conn *conn = task->conn;
 
-	if (task->state == ISCSI_TASK_COMPLETED)
+	if (task->state == ISCSI_TASK_COMPLETED ||
+	    task->state == ISCSI_TASK_ABRT_TMF ||
+	    task->state == ISCSI_TASK_ABRT_SESS_RECOV)
 		return;
 	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
-
-	task->state = ISCSI_TASK_COMPLETED;
+	task->state = state;
 
 	if (!list_empty(&task->running))
 		list_del_init(&task->running);
@@ -478,6 +480,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 {
 	struct iscsi_conn *conn = task->conn;
 	struct scsi_cmnd *sc;
+	int state;
 
 	/*
 	 * if a command completes and we get a successful tmf response
@@ -488,14 +491,20 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 	if (!sc)
 		return;
 
-	if (task->state == ISCSI_TASK_PENDING)
+	if (task->state == ISCSI_TASK_PENDING) {
 		/*
 		 * cmd never made it to the xmit thread, so we should not count
 		 * the cmd in the sequencing
 		 */
 		conn->session->queued_cmdsn--;
+		/* it was never sent so just complete like normal */
+		state = ISCSI_TASK_COMPLETED;
+	} else if (err == DID_TRANSPORT_DISRUPTED)
+		state = ISCSI_TASK_ABRT_SESS_RECOV;
+	else
+		state = ISCSI_TASK_ABRT_TMF;
 
-	sc->result = err;
+	sc->result = err << 16;
 	if (!scsi_bidi_cmnd(sc))
 		scsi_set_resid(sc, scsi_bufflen(sc));
 	else {
@@ -503,7 +512,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 		scsi_in(sc)->resid = scsi_in(sc)->length;
 	}
 
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, state);
 }
 
 static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
@@ -731,7 +740,7 @@ out:
 	ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 /**
@@ -769,7 +778,7 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			  "[sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -990,7 +999,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		}
 
 		iscsi_tmf_rsp(conn, hdr);
-		iscsi_complete_task(task);
+		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 		break;
 	case ISCSI_OP_NOOP_IN:
 		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -1008,7 +1017,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			goto recv_pdu;
 
 		mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
-		iscsi_complete_task(task);
+		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 		break;
 	default:
 		rc = ISCSI_ERR_BAD_OPCODE;
@@ -1020,7 +1029,7 @@ out:
 recv_pdu:
 	if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
 		rc = ISCSI_ERR_CONN_FAILED;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
@@ -1262,7 +1271,7 @@ check_mgmt:
 					 struct iscsi_task, running);
 		list_del_init(&conn->task->running);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-			fail_scsi_task(conn->task, DID_IMM_RETRY << 16);
+			fail_scsi_task(conn->task, DID_IMM_RETRY);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
@@ -1273,7 +1282,7 @@ check_mgmt:
 				conn->task = NULL;
 				goto again;
 			} else
-				fail_scsi_task(conn->task, DID_ABORT << 16);
+				fail_scsi_task(conn->task, DID_ABORT);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1469,7 +1478,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 
 prepd_reject:
 	sc->scsi_done = NULL;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 reject:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
@@ -1479,7 +1488,7 @@ reject:
 
 prepd_fault:
 	sc->scsi_done = NULL;
-	iscsi_complete_task(task);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 fault:
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
@@ -1665,7 +1674,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
 		ISCSI_DBG_SESSION(conn->session,
 				  "failing sc %p itt 0x%x state %d\n",
 				  task->sc, task->itt, task->state);
-		fail_scsi_task(task, error << 16);
+		fail_scsi_task(task, error);
 	}
 }
 
@@ -1868,7 +1877,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 	}
 
 	if (task->state == ISCSI_TASK_PENDING) {
-		fail_scsi_task(task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT);
 		goto success;
 	}
 
@@ -1899,7 +1908,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 		 * then sent more data for the cmd.
 		 */
 		spin_lock(&session->lock);
-		fail_scsi_task(task, DID_ABORT << 16);
+		fail_scsi_task(task, DID_ABORT);
 		conn->tmf_state = TMF_INITIAL;
 		spin_unlock(&session->lock);
 		iscsi_start_tx(conn);
@@ -2572,7 +2581,7 @@ static void
 fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 {
 	struct iscsi_task *task;
-	int i;
+	int i, state;
 
 	for (i = 0; i < conn->session->cmds_max; i++) {
 		task = conn->session->cmds[i];
@@ -2585,7 +2594,11 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 		ISCSI_DBG_SESSION(conn->session,
 				  "failing mgmt itt 0x%x state %d\n",
 				  task->itt, task->state);
-		iscsi_complete_task(task);
+		state = ISCSI_TASK_ABRT_SESS_RECOV;
+		if (task->state == ISCSI_TASK_PENDING)
+			state = ISCSI_TASK_COMPLETED;
+		iscsi_complete_task(task, state);
+
 	}
 }
 
@@ -2642,10 +2655,7 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 	 * flush queues.
 	 */
 	spin_lock_bh(&session->lock);
-	if (flag == STOP_CONN_RECOVER)
-		fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
-	else
-		fail_scsi_tasks(conn, -1, DID_ERROR);
+	fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
 	fail_mgmt_tasks(session, conn);
 	spin_unlock_bh(&session->lock);
 	mutex_unlock(&session->eh_mutex);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index b84a1d853f2..2bc07090321 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -440,8 +440,8 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct iscsi_r2t_info *r2t;
 
-	/* nothing to do for mgmt or pending tasks */
-	if (!task->sc || task->state == ISCSI_TASK_PENDING)
+	/* nothing to do for mgmt */
+	if (!task->sc)
 		return;
 
 	/* flush task's r2t queues */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index facae71183a..196525cd402 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -86,6 +86,8 @@ enum {
 	ISCSI_TASK_COMPLETED,
 	ISCSI_TASK_PENDING,
 	ISCSI_TASK_RUNNING,
+	ISCSI_TASK_ABRT_TMF,		/* aborted due to TMF */
+	ISCSI_TASK_ABRT_SESS_RECOV,	/* aborted due to session recovery */
 };
 
 struct iscsi_r2t_info {
-- 
cgit v1.2.3-70-g09d2


From a366695592ebc9151dd5a248681270f0925d8324 Mon Sep 17 00:00:00 2001
From: Abhijeet Joglekar <abjoglek@cisco.com>
Date: Fri, 1 May 2009 10:01:26 -0700
Subject: [SCSI] libfc,fcoe,fnic: Separate rport and lport max retry counts

This allows fnic to configure number of retries for lport and rport
separately.

Signed-off-by: Abhijeet Joglekar <abjoglek@cisco.com>
Acked-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/fcoe/fcoe.c      | 1 +
 drivers/scsi/fnic/fnic_main.c | 1 +
 drivers/scsi/libfc/fc_rport.c | 2 +-
 include/scsi/libfc.h          | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index d08121f246c..6acb7778f55 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -146,6 +146,7 @@ static int fcoe_lport_config(struct fc_lport *lp)
 	lp->link_up = 0;
 	lp->qfull = 0;
 	lp->max_retry_count = 3;
+	lp->max_rport_retry_count = 3;
 	lp->e_d_tov = 2 * 1000;	/* FC-FS default */
 	lp->r_a_tov = 2 * 2 * 1000;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 32ef6b87d89..a84072865fc 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -680,6 +680,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	}
 
 	lp->max_retry_count = fnic->config.flogi_retries;
+	lp->max_rport_retry_count = fnic->config.plogi_retries;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
 			      FCP_SPPF_CONF_COMPL);
 	if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR)
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 3f5094ebc39..7bfbff7e0ef 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -478,7 +478,7 @@ static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp)
 	if (PTR_ERR(fp) == -FC_EX_CLOSED)
 		return fc_rport_error(rport, fp);
 
-	if (rdata->retries < rdata->local_port->max_retry_count) {
+	if (rdata->retries < rdata->local_port->max_rport_retry_count) {
 		FC_DEBUG_RPORT("Error %ld in state %s, retrying\n",
 			       PTR_ERR(fp), fc_rport_state(rport));
 		rdata->retries++;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 45f9cc642c4..ebdd9f4cf07 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -679,6 +679,7 @@ struct fc_lport {
 	unsigned int		e_d_tov;
 	unsigned int		r_a_tov;
 	u8			max_retry_count;
+	u8			max_rport_retry_count;
 	u16			link_speed;
 	u16			link_supported_speeds;
 	u16			lro_xid;	/* max xid for fcoe lro */
-- 
cgit v1.2.3-70-g09d2


From 082ff5a2767a0679ee543f14883adbafb631ffbe Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:29:00 +0200
Subject: perf_counter: Change pctrl() behaviour

Instead of en/dis-abling all counters acting on a particular
task, en/dis- able all counters we created.

[ v2: fix crash on first counter enable ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163012.916937244@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h    | 10 +++++
 include/linux/perf_counter.h |  3 ++
 include/linux/sched.h        |  2 +
 kernel/perf_counter.c        | 87 ++++++++++++--------------------------------
 4 files changed, 39 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d2641..353c0ac7723 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,6 +108,15 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk)					\
+	.perf_counter_mutex = 						\
+		 __MUTEX_INITIALIZER(tsk.perf_counter_mutex),		\
+	.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -171,6 +180,7 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
+	INIT_PERF_COUNTERS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4ab8050eb9e..4159ee5940f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -469,6 +469,9 @@ struct perf_counter {
 	int				oncpu;
 	int				cpu;
 
+	struct list_head		owner_entry;
+	struct task_struct		*owner;
+
 	/* mmap bits */
 	struct mutex			mmap_mutex;
 	atomic_t			mmap_count;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9714d450f41..bc9326dcdde 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1389,6 +1389,8 @@ struct task_struct {
 #endif
 #ifdef CONFIG_PERF_COUNTERS
 	struct perf_counter_context *perf_counter_ctxp;
+	struct mutex perf_counter_mutex;
+	struct list_head perf_counter_list;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0e97f896133..4c86a636976 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1076,79 +1076,26 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 }
 
-int perf_counter_task_disable(void)
+int perf_counter_task_enable(void)
 {
-	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
-	unsigned long flags;
-
-	if (!ctx || !ctx->nr_counters)
-		return 0;
-
-	local_irq_save(flags);
 
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
-
-	/*
-	 * Disable all the counters:
-	 */
-	perf_disable();
-
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ERROR) {
-			update_group_times(counter);
-			counter->state = PERF_COUNTER_STATE_OFF;
-		}
-	}
-
-	perf_enable();
-
-	spin_unlock_irqrestore(&ctx->lock, flags);
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_enable(counter);
+	mutex_unlock(&current->perf_counter_mutex);
 
 	return 0;
 }
 
-int perf_counter_task_enable(void)
+int perf_counter_task_disable(void)
 {
-	struct task_struct *curr = current;
-	struct perf_counter_context *ctx = curr->perf_counter_ctxp;
 	struct perf_counter *counter;
-	unsigned long flags;
-	int cpu;
-
-	if (!ctx || !ctx->nr_counters)
-		return 0;
-
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
 
-	/*
-	 * Disable all the counters:
-	 */
-	perf_disable();
-
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		if (counter->state > PERF_COUNTER_STATE_OFF)
-			continue;
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->tstamp_enabled =
-			ctx->time - counter->total_time_enabled;
-		counter->hw_event.disabled = 0;
-	}
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-
-	perf_counter_task_sched_in(curr, cpu);
-
-	local_irq_restore(flags);
+	mutex_lock(&current->perf_counter_mutex);
+	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
+		perf_counter_disable(counter);
+	mutex_unlock(&current->perf_counter_mutex);
 
 	return 0;
 }
@@ -1416,6 +1363,11 @@ static int perf_release(struct inode *inode, struct file *file)
 	perf_counter_remove_from_context(counter);
 	mutex_unlock(&ctx->mutex);
 
+	mutex_lock(&counter->owner->perf_counter_mutex);
+	list_del_init(&counter->owner_entry);
+	mutex_unlock(&counter->owner->perf_counter_mutex);
+	put_task_struct(counter->owner);
+
 	free_counter(counter);
 	put_context(ctx);
 
@@ -3272,6 +3224,12 @@ SYSCALL_DEFINE5(perf_counter_open,
 	perf_install_in_context(ctx, counter, cpu);
 	mutex_unlock(&ctx->mutex);
 
+	counter->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_counter_mutex);
+	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
+	mutex_unlock(&current->perf_counter_mutex);
+
 	fput_light(counter_file, fput_needed2);
 
 out_fput:
@@ -3488,6 +3446,9 @@ void perf_counter_init_task(struct task_struct *child)
 
 	child->perf_counter_ctxp = NULL;
 
+	mutex_init(&child->perf_counter_mutex);
+	INIT_LIST_HEAD(&child->perf_counter_list);
+
 	/*
 	 * This is executed from the parent task context, so inherit
 	 * counters that have been marked for cloning.
-- 
cgit v1.2.3-70-g09d2


From 475c55797323b67435083f6e2eb8ee670f6410ec Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 23 May 2009 18:29:01 +0200
Subject: perf_counter: Remove perf_counter_context::nr_enabled

now that pctrl() no longer disables other people's counters,
remove the PMU cache code that deals with that.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090523163013.032998331@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  1 -
 kernel/perf_counter.c        | 11 +----------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4159ee5940f..2ddf5e3c551 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -516,7 +516,6 @@ struct perf_counter_context {
 	struct list_head	event_list;
 	int			nr_counters;
 	int			nr_active;
-	int			nr_enabled;
 	int			is_active;
 	atomic_t		refcount;
 	struct task_struct	*task;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4c86a636976..cb4062559b4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -134,8 +134,6 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 	ctx->nr_counters++;
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		ctx->nr_enabled++;
 }
 
 /*
@@ -150,8 +148,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	if (list_empty(&counter->list_entry))
 		return;
 	ctx->nr_counters--;
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		ctx->nr_enabled--;
 
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
@@ -406,7 +402,6 @@ static void __perf_counter_disable(void *info)
 		else
 			counter_sched_out(counter, cpuctx, ctx);
 		counter->state = PERF_COUNTER_STATE_OFF;
-		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
@@ -448,7 +443,6 @@ static void perf_counter_disable(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
 		update_counter_times(counter);
 		counter->state = PERF_COUNTER_STATE_OFF;
-		ctx->nr_enabled--;
 	}
 
 	spin_unlock_irq(&ctx->lock);
@@ -759,7 +753,6 @@ static void __perf_counter_enable(void *info)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	ctx->nr_enabled++;
 
 	/*
 	 * If the counter is in a group and isn't the group leader,
@@ -850,7 +843,6 @@ static void perf_counter_enable(struct perf_counter *counter)
 		counter->state = PERF_COUNTER_STATE_INACTIVE;
 		counter->tstamp_enabled =
 			ctx->time - counter->total_time_enabled;
-		ctx->nr_enabled++;
 	}
  out:
 	spin_unlock_irq(&ctx->lock);
@@ -910,8 +902,7 @@ static int context_equiv(struct perf_counter_context *ctx1,
 			 struct perf_counter_context *ctx2)
 {
 	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen
-		&& ctx1->nr_enabled == ctx2->nr_enabled;
+		&& ctx1->parent_gen == ctx2->parent_gen;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 7df3bb8f59ca8e346bb834006c257cc367c6250a Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Fri, 22 May 2009 11:04:44 +0000
Subject: mISDN: Add watchdog functionality to hfcmulti driver

This patch was made by Titus Moldovan and provides IOCTL functions for enabling
and disabling the controller's built in watchdog. The use is optional.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfc_multi.h |  1 +
 drivers/isdn/hardware/mISDN/hfcmulti.c  | 32 +++++++++++++++++++++++++++++++-
 include/linux/mISDNif.h                 |  3 ++-
 3 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h
index 663b77f578b..5765e196291 100644
--- a/drivers/isdn/hardware/mISDN/hfc_multi.h
+++ b/drivers/isdn/hardware/mISDN/hfc_multi.h
@@ -62,6 +62,7 @@ struct hfcm_hw {
 	u_char	r_sci_msk;
 	u_char	r_tx0, r_tx1;
 	u_char	a_st_ctrl0[8];
+	u_char	r_bert_wd_md;
 	timer_t	timer;
 };
 
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 0b28141e43b..ca153de6954 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -4013,11 +4013,41 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch,
 static int
 channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq)
 {
+	struct hfc_multi	*hc = dch->hw;
 	int	ret = 0;
+	int	wd_mode, wd_cnt;
 
 	switch (cq->op) {
 	case MISDN_CTRL_GETOP:
-		cq->op = 0;
+		cq->op = MISDN_CTRL_HFC_OP;
+		break;
+	case MISDN_CTRL_HFC_WD_INIT: /* init the watchdog */
+		wd_cnt = cq->p1 & 0xf;
+		wd_mode = !!(cq->p1 >> 4);
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG
+			    "%s: MISDN_CTRL_HFC_WD_INIT mode %s counter 0x%x\n",
+			    __func__, wd_mode ? "AUTO" : "MANUAL", wd_cnt);
+		/* set the watchdog timer */
+		HFC_outb(hc, R_TI_WD, poll_timer | (wd_cnt << 4));
+		hc->hw.r_bert_wd_md = (wd_mode ? V_AUTO_WD_RES : 0);
+		if (hc->ctype == HFC_TYPE_XHFC)
+			hc->hw.r_bert_wd_md |= 0x40 /* V_WD_EN */;
+		/* init the watchdog register and reset the counter */
+		HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES);
+		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			/* enable the watchdog output for Speech-Design */
+			HFC_outb(hc, R_GPIO_SEL, V_GPIO_SEL7);
+			HFC_outb(hc, R_GPIO_EN1, V_GPIO_EN15);
+			HFC_outb(hc, R_GPIO_OUT1, 0);
+			HFC_outb(hc, R_GPIO_OUT1, V_GPIO_OUT15);
+		}
+		break;
+	case MISDN_CTRL_HFC_WD_RESET: /* reset the watchdog counter */
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: MISDN_CTRL_HFC_WD_RESET\n",
+			    __func__);
+		HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES);
 		break;
 	default:
 		printk(KERN_WARNING "%s: unknown Op %x\n",
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 5da3d95b27f..cf974593a99 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -362,7 +362,8 @@ clear_channelmap(u_int nr, u_char *map)
 #define MISDN_CTRL_HFC_RECEIVE_ON	0x4006
 #define MISDN_CTRL_HFC_ECHOCAN_ON 	0x4007
 #define MISDN_CTRL_HFC_ECHOCAN_OFF 	0x4008
-
+#define MISDN_CTRL_HFC_WD_INIT		0x4009
+#define MISDN_CTRL_HFC_WD_RESET		0x400A
 
 /* socket options */
 #define MISDN_TIME_STAMP		0x0001
-- 
cgit v1.2.3-70-g09d2


From 7cfa153dd709f15188fe84b78ae76387841fe17b Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Fri, 22 May 2009 11:04:46 +0000
Subject: mISDN: Echo canceler now gets delay information from hardware

Added tx-fifo information for calculation of current delay to sync tx and rx
streams for echo canceler.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfc_multi.h |  1 +
 drivers/isdn/hardware/mISDN/hfcmulti.c  |  8 +++-
 drivers/isdn/hardware/mISDN/hfcpci.c    | 70 +++++++++++++++++++--------------
 drivers/isdn/hardware/mISDN/hfcsusb.c   |  4 +-
 drivers/isdn/mISDN/dsp.h                |  2 +-
 drivers/isdn/mISDN/dsp_core.c           |  2 +-
 drivers/isdn/mISDN/dsp_pipeline.c       |  5 ++-
 drivers/isdn/mISDN/hwchannel.c          |  4 +-
 include/linux/mISDNdsp.h                |  3 +-
 include/linux/mISDNhw.h                 |  2 +-
 10 files changed, 60 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h
index 5765e196291..c4878cc712c 100644
--- a/drivers/isdn/hardware/mISDN/hfc_multi.h
+++ b/drivers/isdn/hardware/mISDN/hfc_multi.h
@@ -44,6 +44,7 @@ struct hfc_chan {
 	int		conf;	/* conference setting of TX slot */
 	int		txpending;	/* if there is currently data in */
 					/* the FIFO 0=no, 1=yes, 2=splloop */
+	int		Zfill;	/* rx-fifo level on last hfcmulti_tx */
 	int		rx_off; /* set to turn fifo receive off */
 	int		coeff_count; /* curren coeff block */
 	s32		*coeff; /* memory pointer to 8 coeff blocks */
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index ca153de6954..3a7c26ce12c 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -1945,6 +1945,9 @@ next_frame:
 				"%d!=%d\n", __func__, hc->id + 1, temp, z2);
 		z2 = temp; /* repeat unti Z2 is equal */
 	}
+	hc->chan[ch].Zfill = z1 - z2;
+	if (hc->chan[ch].Zfill < 0)
+		hc->chan[ch].Zfill += hc->Zlen;
 	Zspace = z2 - z1;
 	if (Zspace <= 0)
 		Zspace += hc->Zlen;
@@ -2031,6 +2034,7 @@ next_frame:
 
 	/* Have to prep the audio data */
 	hc->write_fifo(hc, d, ii - i);
+	hc->chan[ch].Zfill += ii - i;
 	*idxp = ii;
 
 	/* if not all data has been written */
@@ -2226,7 +2230,7 @@ next_frame:
 			if (dch)
 				recv_Dchannel(dch);
 			else
-				recv_Bchannel(bch);
+				recv_Bchannel(bch, MISDN_ID_ANY);
 			*sp = skb;
 			again++;
 			goto next_frame;
@@ -2258,7 +2262,7 @@ next_frame:
 			    "(z1=%04x, z2=%04x) TRANS\n",
 				__func__, hc->id + 1, ch, Zsize, z1, z2);
 		/* only bch is transparent */
-		recv_Bchannel(bch);
+		recv_Bchannel(bch, hc->chan[ch].Zfill);
 		*sp = skb;
 	}
 }
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 641a9cd1a53..60dc92562c6 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -452,7 +452,7 @@ hfcpci_empty_bfifo(struct bchannel *bch, struct bzfifo *bz,
 		}
 		bz->za[new_f2].z2 = cpu_to_le16(new_z2);
 		bz->f2 = new_f2;	/* next buffer */
-		recv_Bchannel(bch);
+		recv_Bchannel(bch, MISDN_ID_ANY);
 	}
 }
 
@@ -541,35 +541,45 @@ receive_dmsg(struct hfc_pci *hc)
  * check for transparent receive data and read max one 'poll' size if avail
  */
 static void
-hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *bz, u_char *bdata)
+hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *rxbz,
+	struct bzfifo *txbz, u_char *bdata)
 {
-	 __le16 *z1r, *z2r;
-	int		new_z2, fcnt, maxlen;
-	u_char		*ptr, *ptr1;
+	 __le16	*z1r, *z2r, *z1t, *z2t;
+	int	new_z2, fcnt_rx, fcnt_tx, maxlen;
+	u_char	*ptr, *ptr1;
 
-	z1r = &bz->za[MAX_B_FRAMES].z1;		/* pointer to z reg */
+	z1r = &rxbz->za[MAX_B_FRAMES].z1;	/* pointer to z reg */
 	z2r = z1r + 1;
+	z1t = &txbz->za[MAX_B_FRAMES].z1;
+	z2t = z1t + 1;
 
-	fcnt = le16_to_cpu(*z1r) - le16_to_cpu(*z2r);
-	if (!fcnt)
+	fcnt_rx = le16_to_cpu(*z1r) - le16_to_cpu(*z2r);
+	if (!fcnt_rx)
 		return;	/* no data avail */
 
-	if (fcnt <= 0)
-		fcnt += B_FIFO_SIZE;	/* bytes actually buffered */
-	new_z2 = le16_to_cpu(*z2r) + fcnt;	/* new position in fifo */
+	if (fcnt_rx <= 0)
+		fcnt_rx += B_FIFO_SIZE;	/* bytes actually buffered */
+	new_z2 = le16_to_cpu(*z2r) + fcnt_rx;	/* new position in fifo */
 	if (new_z2 >= (B_FIFO_SIZE + B_SUB_VAL))
 		new_z2 -= B_FIFO_SIZE;	/* buffer wrap */
 
-	if (fcnt > MAX_DATA_SIZE) {	/* flush, if oversized */
+	if (fcnt_rx > MAX_DATA_SIZE) {	/* flush, if oversized */
 		*z2r = cpu_to_le16(new_z2);		/* new position */
 		return;
 	}
 
-	bch->rx_skb = mI_alloc_skb(fcnt, GFP_ATOMIC);
+	fcnt_tx = le16_to_cpu(*z2t) - le16_to_cpu(*z1t);
+	if (fcnt_tx <= 0)
+		fcnt_tx += B_FIFO_SIZE;
+		    /* fcnt_tx contains available bytes in tx-fifo */
+	fcnt_tx = B_FIFO_SIZE - fcnt_tx;
+		    /* remaining bytes to send (bytes in tx-fifo) */
+
+	bch->rx_skb = mI_alloc_skb(fcnt_rx, GFP_ATOMIC);
 	if (bch->rx_skb) {
-		ptr = skb_put(bch->rx_skb, fcnt);
-		if (le16_to_cpu(*z2r) + fcnt <= B_FIFO_SIZE + B_SUB_VAL)
-			maxlen = fcnt;	/* complete transfer */
+		ptr = skb_put(bch->rx_skb, fcnt_rx);
+		if (le16_to_cpu(*z2r) + fcnt_rx <= B_FIFO_SIZE + B_SUB_VAL)
+			maxlen = fcnt_rx;	/* complete transfer */
 		else
 			maxlen = B_FIFO_SIZE + B_SUB_VAL - le16_to_cpu(*z2r);
 			    /* maximum */
@@ -577,14 +587,14 @@ hfcpci_empty_fifo_trans(struct bchannel *bch, struct bzfifo *bz, u_char *bdata)
 		ptr1 = bdata + (le16_to_cpu(*z2r) - B_SUB_VAL);
 		    /* start of data */
 		memcpy(ptr, ptr1, maxlen);	/* copy data */
-		fcnt -= maxlen;
+		fcnt_rx -= maxlen;
 
-		if (fcnt) {	/* rest remaining */
+		if (fcnt_rx) {	/* rest remaining */
 			ptr += maxlen;
 			ptr1 = bdata;	/* start of buffer */
-			memcpy(ptr, ptr1, fcnt);	/* rest */
+			memcpy(ptr, ptr1, fcnt_rx);	/* rest */
 		}
-		recv_Bchannel(bch);
+		recv_Bchannel(bch, fcnt_tx); /* bch, id */
 	} else
 		printk(KERN_WARNING "HFCPCI: receive out of memory\n");
 
@@ -600,26 +610,28 @@ main_rec_hfcpci(struct bchannel *bch)
 	struct hfc_pci	*hc = bch->hw;
 	int		rcnt, real_fifo;
 	int		receive = 0, count = 5;
-	struct bzfifo	*bz;
+	struct bzfifo	*txbz, *rxbz;
 	u_char		*bdata;
 	struct zt	*zp;
 
 	if ((bch->nr & 2) && (!hc->hw.bswapped)) {
-		bz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b2;
+		rxbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b2;
+		txbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b2;
 		bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.rxdat_b2;
 		real_fifo = 1;
 	} else {
-		bz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b1;
+		rxbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.rxbz_b1;
+		txbz = &((union fifo_area *)(hc->hw.fifos))->b_chans.txbz_b1;
 		bdata = ((union fifo_area *)(hc->hw.fifos))->b_chans.rxdat_b1;
 		real_fifo = 0;
 	}
 Begin:
 	count--;
-	if (bz->f1 != bz->f2) {
+	if (rxbz->f1 != rxbz->f2) {
 		if (bch->debug & DEBUG_HW_BCHANNEL)
 			printk(KERN_DEBUG "hfcpci rec ch(%x) f1(%d) f2(%d)\n",
-			    bch->nr, bz->f1, bz->f2);
-		zp = &bz->za[bz->f2];
+			    bch->nr, rxbz->f1, rxbz->f2);
+		zp = &rxbz->za[rxbz->f2];
 
 		rcnt = le16_to_cpu(zp->z1) - le16_to_cpu(zp->z2);
 		if (rcnt < 0)
@@ -630,8 +642,8 @@ Begin:
 			    "hfcpci rec ch(%x) z1(%x) z2(%x) cnt(%d)\n",
 			    bch->nr, le16_to_cpu(zp->z1),
 			    le16_to_cpu(zp->z2), rcnt);
-		hfcpci_empty_bfifo(bch, bz, bdata, rcnt);
-		rcnt = bz->f1 - bz->f2;
+		hfcpci_empty_bfifo(bch, rxbz, bdata, rcnt);
+		rcnt = rxbz->f1 - rxbz->f2;
 		if (rcnt < 0)
 			rcnt += MAX_B_FRAMES + 1;
 		if (hc->hw.last_bfifo_cnt[real_fifo] > rcnt + 1) {
@@ -644,7 +656,7 @@ Begin:
 		else
 			receive = 0;
 	} else if (test_bit(FLG_TRANSPARENT, &bch->Flags)) {
-		hfcpci_empty_fifo_trans(bch, bz, bdata);
+		hfcpci_empty_fifo_trans(bch, rxbz, txbz, bdata);
 		return;
 	} else
 		receive = 0;
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 9c427fb204e..6b7704c41b9 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -947,7 +947,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
 				if (fifo->dch)
 					recv_Dchannel(fifo->dch);
 				if (fifo->bch)
-					recv_Bchannel(fifo->bch);
+					recv_Bchannel(fifo->bch, MISDN_ID_ANY);
 				if (fifo->ech)
 					recv_Echannel(fifo->ech,
 						     &hw->dch);
@@ -969,7 +969,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
 	} else {
 		/* deliver transparent data to layer2 */
 		if (rx_skb->len >= poll)
-			recv_Bchannel(fifo->bch);
+			recv_Bchannel(fifo->bch, MISDN_ID_ANY);
 	}
 	spin_unlock(&hw->lock);
 }
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
index 0a4a362b89c..4a1c444d73a 100644
--- a/drivers/isdn/mISDN/dsp.h
+++ b/drivers/isdn/mISDN/dsp.h
@@ -262,5 +262,5 @@ extern int  dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg);
 extern void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data,
 		int len);
 extern void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data,
-		int len);
+		int len, unsigned int txlen);
 
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 3083338716b..1c49368e0a9 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -710,7 +710,7 @@ dsp_function(struct mISDNchannel *ch,  struct sk_buff *skb)
 		/* pipeline */
 		if (dsp->pipeline.inuse)
 			dsp_pipeline_process_rx(&dsp->pipeline, skb->data,
-				skb->len);
+				skb->len, hh->id);
 		/* change volume if requested */
 		if (dsp->rx_volume)
 			dsp_change_volume(skb, dsp->rx_volume);
diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
index 18cf87c113e..ac61f198eb3 100644
--- a/drivers/isdn/mISDN/dsp_pipeline.c
+++ b/drivers/isdn/mISDN/dsp_pipeline.c
@@ -347,7 +347,8 @@ void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data, int len)
 			entry->elem->process_tx(entry->p, data, len);
 }
 
-void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len)
+void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len,
+	unsigned int txlen)
 {
 	struct dsp_pipeline_entry *entry;
 
@@ -356,7 +357,7 @@ void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len)
 
 	list_for_each_entry_reverse(entry, &pipeline->list, list)
 		if (entry->elem->process_rx)
-			entry->elem->process_rx(entry->p, data, len);
+			entry->elem->process_rx(entry->p, data, len, txlen);
 }
 
 
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
index ab1168a110a..0481a0cdf6d 100644
--- a/drivers/isdn/mISDN/hwchannel.c
+++ b/drivers/isdn/mISDN/hwchannel.c
@@ -185,13 +185,13 @@ recv_Echannel(struct dchannel *ech, struct dchannel *dch)
 EXPORT_SYMBOL(recv_Echannel);
 
 void
-recv_Bchannel(struct bchannel *bch)
+recv_Bchannel(struct bchannel *bch, unsigned int id)
 {
 	struct mISDNhead *hh;
 
 	hh = mISDN_HEAD_P(bch->rx_skb);
 	hh->prim = PH_DATA_IND;
-	hh->id = MISDN_ID_ANY;
+	hh->id = id;
 	if (bch->rcount >= 64) {
 		printk(KERN_WARNING "B-channel %p receive queue overflow, "
 			"fushing!\n", bch);
diff --git a/include/linux/mISDNdsp.h b/include/linux/mISDNdsp.h
index 6b71d2dce50..2c483d45f14 100644
--- a/include/linux/mISDNdsp.h
+++ b/include/linux/mISDNdsp.h
@@ -12,7 +12,8 @@ struct mISDN_dsp_element {
 	void	*(*new)(const char *arg);
 	void	(*free)(void *p);
 	void	(*process_tx)(void *p, unsigned char *data, int len);
-	void	(*process_rx)(void *p, unsigned char *data, int len);
+	void	(*process_rx)(void *p, unsigned char *data, int len,
+			unsigned int txlen);
 	int	num_args;
 	struct mISDN_dsp_element_arg
 		*args;
diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index 97ffdc1d344..ce900f4c245 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -185,7 +185,7 @@ extern int	dchannel_senddata(struct dchannel *, struct sk_buff *);
 extern int	bchannel_senddata(struct bchannel *, struct sk_buff *);
 extern void	recv_Dchannel(struct dchannel *);
 extern void	recv_Echannel(struct dchannel *, struct dchannel *);
-extern void	recv_Bchannel(struct bchannel *);
+extern void	recv_Bchannel(struct bchannel *, unsigned int id);
 extern void	recv_Dchannel_skb(struct dchannel *, struct sk_buff *);
 extern void	recv_Bchannel_skb(struct bchannel *, struct sk_buff *);
 extern void	confirm_Bsend(struct bchannel *bch);
-- 
cgit v1.2.3-70-g09d2


From e73f6b2260daf02793071e5ce06ea87df762920a Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Fri, 22 May 2009 11:04:48 +0000
Subject: mISDN: Added layer-1-hold feature

Add IMHOLD_L1 ioctl.
The feature will be disabled on closing.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/socket.c | 21 ++++++++++++++++++---
 drivers/isdn/mISDN/tei.c    | 43 +++++++++++++++++++++++++++++++------------
 include/linux/mISDNif.h     |  2 ++
 3 files changed, 51 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 508945d1b9c..530f6897736 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -292,7 +292,7 @@ static int
 data_sock_ioctl_bound(struct sock *sk, unsigned int cmd, void __user *p)
 {
 	struct mISDN_ctrl_req	cq;
-	int			err = -EINVAL, val;
+	int			err = -EINVAL, val[2];
 	struct mISDNchannel	*bchan, *next;
 
 	lock_sock(sk);
@@ -328,12 +328,27 @@ data_sock_ioctl_bound(struct sock *sk, unsigned int cmd, void __user *p)
 			err = -EINVAL;
 			break;
 		}
-		if (get_user(val, (int __user *)p)) {
+		val[0] = cmd;
+		if (get_user(val[1], (int __user *)p)) {
 			err = -EFAULT;
 			break;
 		}
 		err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr,
-		    CONTROL_CHANNEL, &val);
+		    CONTROL_CHANNEL, val);
+		break;
+	case IMHOLD_L1:
+		if (sk->sk_protocol != ISDN_P_LAPD_NT
+		 && sk->sk_protocol != ISDN_P_LAPD_TE) {
+			err = -EINVAL;
+			break;
+		}
+		val[0] = cmd;
+		if (get_user(val[1], (int __user *)p)) {
+			err = -EFAULT;
+			break;
+		}
+		err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr,
+		    CONTROL_CHANNEL, val);
 		break;
 	default:
 		err = -EINVAL;
diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
index b452dead8fd..c75af762a06 100644
--- a/drivers/isdn/mISDN/tei.c
+++ b/drivers/isdn/mISDN/tei.c
@@ -122,8 +122,11 @@ da_deactivate(struct FsmInst *fi, int event, void *arg)
 	}
 	read_unlock_irqrestore(&mgr->lock, flags);
 	/* All TEI are inactiv */
-	mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, NULL, 1);
-	mISDN_FsmChangeState(fi, ST_L1_DEACT_PENDING);
+	if (!test_bit(OPTION_L1_HOLD, &mgr->options)) {
+		mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER,
+			NULL, 1);
+		mISDN_FsmChangeState(fi, ST_L1_DEACT_PENDING);
+	}
 }
 
 static void
@@ -132,9 +135,11 @@ da_ui(struct FsmInst *fi, int event, void *arg)
 	struct manager	*mgr = fi->userdata;
 
 	/* restart da timer */
-	mISDN_FsmDelTimer(&mgr->datimer, 2);
-	mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, NULL, 2);
-
+	if (!test_bit(OPTION_L1_HOLD, &mgr->options)) {
+		mISDN_FsmDelTimer(&mgr->datimer, 2);
+		mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER,
+			NULL, 2);
+	}
 }
 
 static void
@@ -1103,6 +1108,7 @@ free_teimanager(struct manager *mgr)
 {
 	struct layer2	*l2, *nl2;
 
+	test_and_clear_bit(OPTION_L1_HOLD, &mgr->options);
 	if (test_bit(MGR_OPT_NETWORK, &mgr->options)) {
 		/* not locked lock is taken in release tei */
 		mgr->up = NULL;
@@ -1133,13 +1139,26 @@ static int
 ctrl_teimanager(struct manager *mgr, void *arg)
 {
 	/* currently we only have one option */
-	int	clean = *((int *)arg);
-
-	if (clean)
-		test_and_set_bit(OPTION_L2_CLEANUP, &mgr->options);
-	else
-		test_and_clear_bit(OPTION_L2_CLEANUP, &mgr->options);
-	return 0;
+	int	*val = (int *)arg;
+	int	ret = 0;
+
+	switch (val[0]) {
+	case IMCLEAR_L2:
+		if (val[1])
+			test_and_set_bit(OPTION_L2_CLEANUP, &mgr->options);
+		else
+			test_and_clear_bit(OPTION_L2_CLEANUP, &mgr->options);
+		break;
+	case IMHOLD_L1:
+		if (val[1])
+			test_and_set_bit(OPTION_L1_HOLD, &mgr->options);
+		else
+			test_and_clear_bit(OPTION_L1_HOLD, &mgr->options);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
 }
 
 /* This function does create a L2 for fixed TEI in NT Mode */
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index cf974593a99..0b28fd1e393 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -229,6 +229,7 @@
 #define OPTION_L2_PTP		2
 #define OPTION_L2_FIXEDTEI	3
 #define OPTION_L2_CLEANUP	4
+#define OPTION_L1_HOLD		5
 
 /* should be in sync with linux/kobject.h:KOBJ_NAME_LEN */
 #define MISDN_MAX_IDLEN		20
@@ -317,6 +318,7 @@ struct ph_info {
 #define IMCTRLREQ	_IOR('I', 69, int)
 #define IMCLEAR_L2	_IOR('I', 70, int)
 #define IMSETDEVNAME	_IOR('I', 71, struct mISDN_devrename)
+#define IMHOLD_L1	_IOR('I', 72, int)
 
 static inline int
 test_channelmap(u_int nr, u_char *map)
-- 
cgit v1.2.3-70-g09d2


From db9bb63a1b5b65df41d112a8c21adbbfc6a4ac08 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Fri, 22 May 2009 11:04:53 +0000
Subject: mISDN: Add XHFC support for embedded Speech-Design board to hfcmulti

New version without emulating arch specific stuff for the other
architectures, the special IO and init functions for the 8xx
microcontroller are in a separate include file.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/Kconfig         |  11 +-
 drivers/isdn/hardware/mISDN/hfc_multi.h     |  45 ++-
 drivers/isdn/hardware/mISDN/hfc_multi_8xx.h | 167 +++++++++++
 drivers/isdn/hardware/mISDN/hfcmulti.c      | 416 +++++++++++++++++++---------
 drivers/isdn/mISDN/dsp_cmx.c                |   4 +
 include/linux/mISDNdsp.h                    |   1 +
 6 files changed, 500 insertions(+), 144 deletions(-)
 create mode 100644 drivers/isdn/hardware/mISDN/hfc_multi_8xx.h

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig
index fd112ae252c..3024566dd09 100644
--- a/drivers/isdn/hardware/mISDN/Kconfig
+++ b/drivers/isdn/hardware/mISDN/Kconfig
@@ -13,7 +13,7 @@ config MISDN_HFCPCI
 
 config MISDN_HFCMULTI
 	tristate "Support for HFC multiport cards (HFC-4S/8S/E1)"
-	depends on PCI
+	depends on PCI || 8xx
 	depends on MISDN
 	help
 	  Enable support for cards with Cologne Chip AG's HFC multiport
@@ -23,6 +23,15 @@ config MISDN_HFCMULTI
 	   * HFC-8S (8 S/T interfaces on one chip)
 	   * HFC-E1 (E1 interface for 2Mbit ISDN)
 
+config MISDN_HFCMULTI_8xx
+	boolean "Support for XHFC embedded board in HFC multiport driver"
+	depends on MISDN
+	depends on MISDN_HFCMULTI
+	depends on 8xx
+	default 8xx
+	help
+	  Enable support for the XHFC embedded solution from Speech Design.
+
 config MISDN_HFCUSB
 	tristate "Support for HFC-S USB based TAs"
 	depends on USB
diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h
index c4878cc712c..0c773866efc 100644
--- a/drivers/isdn/hardware/mISDN/hfc_multi.h
+++ b/drivers/isdn/hardware/mISDN/hfc_multi.h
@@ -17,6 +17,16 @@
 #define	PCI_ENA_REGIO	0x01
 #define	PCI_ENA_MEMIO	0x02
 
+#define XHFC_IRQ	4		/* SIU_IRQ2 */
+#define XHFC_MEMBASE	0xFE000000
+#define XHFC_MEMSIZE    0x00001000
+#define XHFC_OFFSET	0x00001000
+#define PA_XHFC_A0	0x0020		/* PA10 */
+#define PB_XHFC_IRQ1	0x00000100	/* PB23 */
+#define PB_XHFC_IRQ2	0x00000200	/* PB22 */
+#define PB_XHFC_IRQ3	0x00000400	/* PB21 */
+#define PB_XHFC_IRQ4	0x00000800	/* PB20 */
+
 /*
  * NOTE: some registers are assigned multiple times due to different modes
  *       also registers are assigned differen for HFC-4s/8s and HFC-E1
@@ -81,6 +91,11 @@ struct hfcm_hw {
 #define	HFC_CFG_CRC4		10 /* disable CRC-4 Multiframe mode, */
 					/* use double frame instead. */
 
+#define HFC_TYPE_E1		1 /* controller is HFC-E1 */
+#define HFC_TYPE_4S		4 /* controller is HFC-4S */
+#define HFC_TYPE_8S		8 /* controller is HFC-8S */
+#define HFC_TYPE_XHFC		5 /* controller is XHFC */
+
 #define	HFC_CHIP_EXRAM_128	0 /* external ram 128k */
 #define	HFC_CHIP_EXRAM_512	1 /* external ram 256k */
 #define	HFC_CHIP_REVISION0	2 /* old fifo handling */
@@ -88,19 +103,22 @@ struct hfcm_hw {
 #define	HFC_CHIP_PCM_MASTER	4 /* PCM is master */
 #define	HFC_CHIP_RX_SYNC	5 /* disable pll sync for pcm */
 #define	HFC_CHIP_DTMF		6 /* DTMF decoding is enabled */
-#define	HFC_CHIP_ULAW		7 /* ULAW mode */
-#define	HFC_CHIP_CLOCK2		8 /* double clock mode */
-#define	HFC_CHIP_E1CLOCK_GET	9 /* always get clock from E1 interface */
-#define	HFC_CHIP_E1CLOCK_PUT	10 /* always put clock from E1 interface */
-#define	HFC_CHIP_WATCHDOG	11 /* whether we should send signals */
+#define	HFC_CHIP_CONF		7 /* conference handling is enabled */
+#define	HFC_CHIP_ULAW		8 /* ULAW mode */
+#define	HFC_CHIP_CLOCK2		9 /* double clock mode */
+#define	HFC_CHIP_E1CLOCK_GET	10 /* always get clock from E1 interface */
+#define	HFC_CHIP_E1CLOCK_PUT	11 /* always put clock from E1 interface */
+#define	HFC_CHIP_WATCHDOG	12 /* whether we should send signals */
 					/* to the watchdog */
-#define	HFC_CHIP_B410P		12 /* whether we have a b410p with echocan in */
+#define	HFC_CHIP_B410P		13 /* whether we have a b410p with echocan in */
 					/* hw */
-#define	HFC_CHIP_PLXSD		13 /* whether we have a Speech-Design PLX */
+#define	HFC_CHIP_PLXSD		14 /* whether we have a Speech-Design PLX */
+#define	HFC_CHIP_EMBSD          15 /* whether we have a SD Embedded board */
 
 #define HFC_IO_MODE_PCIMEM	0x00 /* normal memory mapped IO */
 #define HFC_IO_MODE_REGIO	0x01 /* PCI io access */
 #define HFC_IO_MODE_PLXSD	0x02 /* access HFC via PLX9030 */
+#define HFC_IO_MODE_EMBSD	0x03 /* direct access */
 
 /* table entry in the PCI devices list */
 struct hm_map {
@@ -113,6 +131,7 @@ struct hm_map {
 	int opticalsupport;
 	int dip_type;
 	int io_mode;
+	int irq;
 };
 
 struct hfc_multi {
@@ -120,7 +139,7 @@ struct hfc_multi {
 	struct hm_map	*mtyp;
 	int		id;
 	int		pcm;	/* id of pcm bus */
-	int		type;
+	int		ctype;	/* controller type */
 	int		ports;
 
 	u_int		irq;	/* irq used by card */
@@ -160,10 +179,16 @@ struct hfc_multi {
 				int len);
 	void		(*write_fifo)(struct hfc_multi *hc, u_char *data,
 				int len);
-	u_long		pci_origmembase, plx_origmembase, dsp_origmembase;
+	u_long		pci_origmembase, plx_origmembase;
 	void __iomem	*pci_membase; /* PCI memory */
 	void __iomem	*plx_membase; /* PLX memory */
-	u_char		*dsp_membase; /* DSP on PLX */
+	u_long		xhfc_origmembase;
+	u_char		*xhfc_membase;
+	u_long		*xhfc_memaddr, *xhfc_memdata;
+#ifdef CONFIG_MISDN_HFCMULTI_8xx
+	struct immap	*immap;
+#endif
+	u_long		pb_irqmsk;	/* Portbit mask to check the IRQ line */
 	u_long		pci_iobase; /* PCI IO */
 	struct hfcm_hw	hw;	/* remember data of write-only-registers */
 
diff --git a/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h b/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h
new file mode 100644
index 00000000000..45ddced956d
--- /dev/null
+++ b/drivers/isdn/hardware/mISDN/hfc_multi_8xx.h
@@ -0,0 +1,167 @@
+/*
+ * For License see notice in hfc_multi.c
+ *
+ * special IO and init functions for the embedded XHFC board
+ * from Speech Design
+ *
+ */
+
+#include <asm/8xx_immap.h>
+
+/* Change this to the value used by your board */
+#ifndef IMAP_ADDR
+#define IMAP_ADDR	0xFFF00000
+#endif
+
+static void
+#ifdef HFC_REGISTER_DEBUG
+HFC_outb_embsd(struct hfc_multi *hc, u_char reg, u_char val,
+		const char *function, int line)
+#else
+HFC_outb_embsd(struct hfc_multi *hc, u_char reg, u_char val)
+#endif
+{
+	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
+	writeb(reg, hc->xhfc_memaddr);
+	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
+	writeb(val, hc->xhfc_memdata);
+}
+static u_char
+#ifdef HFC_REGISTER_DEBUG
+HFC_inb_embsd(struct hfc_multi *hc, u_char reg, const char *function, int line)
+#else
+HFC_inb_embsd(struct hfc_multi *hc, u_char reg)
+#endif
+{
+	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
+	writeb(reg, hc->xhfc_memaddr);
+	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
+	return readb(hc->xhfc_memdata);
+}
+static u_short
+#ifdef HFC_REGISTER_DEBUG
+HFC_inw_embsd(struct hfc_multi *hc, u_char reg, const char *function, int line)
+#else
+HFC_inw_embsd(struct hfc_multi *hc, u_char reg)
+#endif
+{
+	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
+	writeb(reg, hc->xhfc_memaddr);
+	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
+	return readb(hc->xhfc_memdata);
+}
+static void
+#ifdef HFC_REGISTER_DEBUG
+HFC_wait_embsd(struct hfc_multi *hc, const char *function, int line)
+#else
+HFC_wait_embsd(struct hfc_multi *hc)
+#endif
+{
+	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
+	writeb(R_STATUS, hc->xhfc_memaddr);
+	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
+	while (readb(hc->xhfc_memdata) & V_BUSY)
+		cpu_relax();
+}
+
+/* write fifo data (EMBSD) */
+void
+write_fifo_embsd(struct hfc_multi *hc, u_char *data, int len)
+{
+	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
+	*hc->xhfc_memaddr = A_FIFO_DATA0;
+	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
+	while (len) {
+		*hc->xhfc_memdata = *data;
+		data++;
+		len--;
+	}
+}
+
+/* read fifo data (EMBSD) */
+void
+read_fifo_embsd(struct hfc_multi *hc, u_char *data, int len)
+{
+	hc->immap->im_ioport.iop_padat |= PA_XHFC_A0;
+	*hc->xhfc_memaddr = A_FIFO_DATA0;
+	hc->immap->im_ioport.iop_padat &= ~(PA_XHFC_A0);
+	while (len) {
+		*data = (u_char)(*hc->xhfc_memdata);
+		data++;
+		len--;
+	}
+}
+
+static int
+setup_embedded(struct hfc_multi *hc, struct hm_map *m)
+{
+	printk(KERN_INFO
+	    "HFC-multi: card manufacturer: '%s' card name: '%s' clock: %s\n",
+	    m->vendor_name, m->card_name, m->clock2 ? "double" : "normal");
+
+	hc->pci_dev = NULL;
+	if (m->clock2)
+		test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip);
+
+	hc->leds = m->leds;
+	hc->ledstate = 0xAFFEAFFE;
+	hc->opticalsupport = m->opticalsupport;
+
+	hc->pci_iobase = 0;
+	hc->pci_membase = 0;
+	hc->xhfc_membase = NULL;
+	hc->xhfc_memaddr = NULL;
+	hc->xhfc_memdata = NULL;
+
+	/* set memory access methods */
+	if (m->io_mode) /* use mode from card config */
+		hc->io_mode = m->io_mode;
+	switch (hc->io_mode) {
+	case HFC_IO_MODE_EMBSD:
+		test_and_set_bit(HFC_CHIP_EMBSD, &hc->chip);
+		hc->slots = 128; /* required */
+		/* fall through */
+		hc->HFC_outb = HFC_outb_embsd;
+		hc->HFC_inb = HFC_inb_embsd;
+		hc->HFC_inw = HFC_inw_embsd;
+		hc->HFC_wait = HFC_wait_embsd;
+		hc->read_fifo = read_fifo_embsd;
+		hc->write_fifo = write_fifo_embsd;
+		hc->xhfc_origmembase = XHFC_MEMBASE + XHFC_OFFSET * hc->id;
+		hc->xhfc_membase = (u_char *)ioremap(hc->xhfc_origmembase,
+				XHFC_MEMSIZE);
+		if (!hc->xhfc_membase) {
+			printk(KERN_WARNING
+			    "HFC-multi: failed to remap xhfc address space. "
+			    "(internal error)\n");
+			return -EIO;
+		}
+		hc->xhfc_memaddr = (u_long *)(hc->xhfc_membase + 4);
+		hc->xhfc_memdata = (u_long *)(hc->xhfc_membase);
+		printk(KERN_INFO
+		    "HFC-multi: xhfc_membase:%#lx xhfc_origmembase:%#lx "
+		    "xhfc_memaddr:%#lx xhfc_memdata:%#lx\n",
+		    (u_long)hc->xhfc_membase, hc->xhfc_origmembase,
+		    (u_long)hc->xhfc_memaddr, (u_long)hc->xhfc_memdata);
+		break;
+	default:
+		printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n");
+		return -EIO;
+	}
+
+	/* Prepare the MPC8XX PortA 10 as output (address/data selector) */
+	hc->immap = (struct immap *)(IMAP_ADDR);
+	hc->immap->im_ioport.iop_papar &= ~(PA_XHFC_A0);
+	hc->immap->im_ioport.iop_paodr &= ~(PA_XHFC_A0);
+	hc->immap->im_ioport.iop_padir |=   PA_XHFC_A0;
+
+	/* Prepare the MPC8xx PortB __X__ as input (ISDN__X__IRQ) */
+	hc->pb_irqmsk = (PB_XHFC_IRQ1 << hc->id);
+	hc->immap->im_cpm.cp_pbpar &= ~(hc->pb_irqmsk);
+	hc->immap->im_cpm.cp_pbodr &= ~(hc->pb_irqmsk);
+	hc->immap->im_cpm.cp_pbdir &= ~(hc->pb_irqmsk);
+
+	/* At this point the needed config is done */
+	/* fifos are still not enabled */
+	return 0;
+}
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index bc0d3efeb56..5be4edf631d 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -139,6 +139,10 @@
  *	Selects interface with clock source for mISDN and applications.
  *	Set to card number starting with 1. Set to -1 to disable.
  *	By default, the first card is used as clock source.
+ *
+ * hwid:
+ *	NOTE: only one hwid value must be given once
+ * 	Enable special embedded devices with XHFC controllers.
  */
 
 /*
@@ -206,6 +210,11 @@ static int	clock;
 static uint	timer;
 static uint	clockdelay_te = CLKDEL_TE;
 static uint	clockdelay_nt = CLKDEL_NT;
+#define HWID_NONE	0
+#define HWID_MINIP4	1
+#define HWID_MINIP8	2
+#define HWID_MINIP16	3
+static uint	hwid = HWID_NONE;
 
 static int	HFC_cnt, Port_cnt, PCM_cnt = 99;
 
@@ -223,6 +232,7 @@ module_param_array(pcm, int, NULL, S_IRUGO | S_IWUSR);
 module_param_array(dslot, int, NULL, S_IRUGO | S_IWUSR);
 module_param_array(iomode, uint, NULL, S_IRUGO | S_IWUSR);
 module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR);
+module_param(hwid, uint, S_IRUGO | S_IWUSR); /* The hardware ID */
 
 #ifdef HFC_REGISTER_DEBUG
 #define HFC_outb(hc, reg, val) \
@@ -252,6 +262,10 @@ module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR);
 #define HFC_wait_nodebug(hc)		(hc->HFC_wait_nodebug(hc))
 #endif
 
+#ifdef CONFIG_MISDN_HFCMULTI_8xx
+#include "hfc_multi_8xx.h"
+#endif
+
 /* HFC_IO_MODE_PCIMEM */
 static void
 #ifdef HFC_REGISTER_DEBUG
@@ -928,7 +942,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
 			writel(pv, plx_acc_32);
 			if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)) {
 				pcmmaster = hc;
-				if (hc->type == 1) {
+				if (hc->ctype == HFC_TYPE_E1) {
 					if (debug & DEBUG_HFCMULTI_PLXSD)
 						printk(KERN_DEBUG
 							"Schedule SYNC_I\n");
@@ -949,7 +963,8 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
 		pv |= PLX_SYNC_O_EN;
 		writel(pv, plx_acc_32);
 		/* switch to jatt PLL, if not disabled by RX_SYNC */
-		if (hc->type == 1 && !test_bit(HFC_CHIP_RX_SYNC, &hc->chip)) {
+		if (hc->ctype == HFC_TYPE_E1
+				&& !test_bit(HFC_CHIP_RX_SYNC, &hc->chip)) {
 			if (debug & DEBUG_HFCMULTI_PLXSD)
 				printk(KERN_DEBUG "Schedule jatt PLL\n");
 			hc->e1_resync |= 2; /* switch to jatt */
@@ -961,7 +976,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
 				printk(KERN_DEBUG
 					"id=%d (0x%p) = PCM master syncronized "
 					"with QUARTZ\n", hc->id, hc);
-			if (hc->type == 1) {
+			if (hc->ctype == HFC_TYPE_E1) {
 				/* Use the crystal clock for the PCM
 				   master card */
 				if (debug & DEBUG_HFCMULTI_PLXSD)
@@ -972,7 +987,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
 				if (debug & DEBUG_HFCMULTI_PLXSD)
 					printk(KERN_DEBUG
 					    "QUARTZ is automatically "
-					    "enabled by HFC-%dS\n", hc->type);
+					    "enabled by HFC-%dS\n", hc->ctype);
 			}
 			plx_acc_32 = hc->plx_membase + PLX_GPIOC;
 			pv = readl(plx_acc_32);
@@ -1060,13 +1075,16 @@ release_io_hfcmulti(struct hfc_multi *hc)
 
 	/* disable memory mapped ports / io ports */
 	test_and_clear_bit(HFC_CHIP_PLXSD, &hc->chip); /* prevent resync */
-	pci_write_config_word(hc->pci_dev, PCI_COMMAND, 0);
+	if (hc->pci_dev)
+		pci_write_config_word(hc->pci_dev, PCI_COMMAND, 0);
 	if (hc->pci_membase)
 		iounmap(hc->pci_membase);
 	if (hc->plx_membase)
 		iounmap(hc->plx_membase);
 	if (hc->pci_iobase)
 		release_region(hc->pci_iobase, 8);
+	if (hc->xhfc_membase)
+		iounmap((void *)hc->xhfc_membase);
 
 	if (hc->pci_dev) {
 		pci_disable_device(hc->pci_dev);
@@ -1100,8 +1118,9 @@ init_chip(struct hfc_multi *hc)
 	/* revision check */
 	if (debug & DEBUG_HFCMULTI_INIT)
 		printk(KERN_DEBUG "%s: entered\n", __func__);
-	val = HFC_inb(hc, R_CHIP_ID)>>4;
-	if (val != 0x8 && val != 0xc && val != 0xe) {
+	val = HFC_inb(hc, R_CHIP_ID);
+	if ((val>>4) != 0x8 && (val>>4) != 0xc && (val>>4) != 0xe
+			&& (val>>1) != 0x31) {
 		printk(KERN_INFO "HFC_multi: unknown CHIP_ID:%x\n", (u_int)val);
 		err = -EIO;
 		goto out;
@@ -1109,8 +1128,9 @@ init_chip(struct hfc_multi *hc)
 	rev = HFC_inb(hc, R_CHIP_RV);
 	printk(KERN_INFO
 	    "HFC_multi: detected HFC with chip ID=0x%lx revision=%ld%s\n",
-	    val, rev, (rev == 0) ? " (old FIFO handling)" : "");
-	if (rev == 0) {
+	    val, rev, (rev == 0 && (hc->ctype != HFC_TYPE_XHFC)) ?
+		" (old FIFO handling)" : "");
+	if (hc->ctype != HFC_TYPE_XHFC && rev == 0) {
 		test_and_set_bit(HFC_CHIP_REVISION0, &hc->chip);
 		printk(KERN_WARNING
 		    "HFC_multi: NOTE: Your chip is revision 0, "
@@ -1152,6 +1172,12 @@ init_chip(struct hfc_multi *hc)
 		hc->Zlen = 8000;
 		hc->DTMFbase = 0x2000;
 	}
+	if (hc->ctype == HFC_TYPE_XHFC) {
+		hc->Flen = 0x8;
+		hc->Zmin = 0x0;
+		hc->Zlen = 64;
+		hc->DTMFbase = 0x0;
+	}
 	hc->max_trans = poll << 1;
 	if (hc->max_trans > hc->Zlen)
 		hc->max_trans = hc->Zlen;
@@ -1211,6 +1237,9 @@ init_chip(struct hfc_multi *hc)
 		hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */
 	}
 
+	if (test_bit(HFC_CHIP_EMBSD, &hc->chip))
+		hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */
+
 	/* we only want the real Z2 read-pointer for revision > 0 */
 	if (!test_bit(HFC_CHIP_REVISION0, &hc->chip))
 		hc->hw.r_ram_sz |= V_FZ_MD;
@@ -1234,15 +1263,24 @@ init_chip(struct hfc_multi *hc)
 
 	/* soft reset */
 	HFC_outb(hc, R_CTRL, hc->hw.r_ctrl);
-	HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
+	if (hc->ctype == HFC_TYPE_XHFC)
+		HFC_outb(hc, 0x0C /* R_FIFO_THRES */,
+				0x11 /* 16 Bytes TX/RX */);
+	else
+		HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
 	HFC_outb(hc, R_FIFO_MD, 0);
-	hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES | V_RLD_EPR;
+	if (hc->ctype == HFC_TYPE_XHFC)
+		hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES;
+	else
+		hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES
+			| V_RLD_EPR;
 	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
 	udelay(100);
 	hc->hw.r_cirm = 0;
 	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
 	udelay(100);
-	HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
+	if (hc->ctype != HFC_TYPE_XHFC)
+		HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
 
 	/* Speech Design PLX bridge pcm and sync mode */
 	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
@@ -1278,13 +1316,16 @@ init_chip(struct hfc_multi *hc)
 	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0xa0);
 	if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
 		HFC_outb(hc, R_PCM_MD2, V_SYNC_SRC); /* sync via SYNC_I / O */
+	else if (test_bit(HFC_CHIP_EMBSD, &hc->chip))
+		HFC_outb(hc, R_PCM_MD2, 0x10); /* V_C2O_EN */
 	else
 		HFC_outb(hc, R_PCM_MD2, 0x00); /* sync from interface */
 	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00);
 	for (i = 0; i < 256; i++) {
 		HFC_outb_nodebug(hc, R_SLOT, i);
 		HFC_outb_nodebug(hc, A_SL_CFG, 0);
-		HFC_outb_nodebug(hc, A_CONF, 0);
+		if (hc->ctype != HFC_TYPE_XHFC)
+			HFC_outb_nodebug(hc, A_CONF, 0);
 		hc->slot_owner[i] = -1;
 	}
 
@@ -1296,6 +1337,9 @@ init_chip(struct hfc_multi *hc)
 		HFC_outb(hc, R_BRG_PCM_CFG, V_PCM_CLK);
 	}
 
+	if (test_bit(HFC_CHIP_EMBSD, &hc->chip))
+		HFC_outb(hc, 0x02 /* R_CLK_CFG */, 0x40 /* V_CLKO_OFF */);
+
 	/* B410P GPIO */
 	if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
 		printk(KERN_NOTICE "Setting GPIOs\n");
@@ -1424,7 +1468,7 @@ controller_fail:
 	hc->hw.r_irqmsk_misc |= V_TI_IRQMSK;
 
 	/* set E1 state machine IRQ */
-	if (hc->type == 1)
+	if (hc->ctype == HFC_TYPE_E1)
 		hc->hw.r_irqmsk_misc |= V_STA_IRQMSK;
 
 	/* set DTMF detection */
@@ -1444,7 +1488,8 @@ controller_fail:
 		r_conf_en = V_CONF_EN | V_ULAW;
 	else
 		r_conf_en = V_CONF_EN;
-	HFC_outb(hc, R_CONF_EN, r_conf_en);
+	if (hc->ctype != HFC_TYPE_XHFC)
+		HFC_outb(hc, R_CONF_EN, r_conf_en);
 
 	/* setting leds */
 	switch (hc->leds) {
@@ -1468,16 +1513,23 @@ controller_fail:
 		break;
 	}
 
+	if (test_bit(HFC_CHIP_EMBSD, &hc->chip)) {
+		hc->hw.r_st_sync = 0x10; /* V_AUTO_SYNCI */
+		HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync);
+	}
+
 	/* set master clock */
 	if (hc->masterclk >= 0) {
 		if (debug & DEBUG_HFCMULTI_INIT)
 			printk(KERN_DEBUG "%s: setting ST master clock "
 			    "to port %d (0..%d)\n",
 			    __func__, hc->masterclk, hc->ports-1);
-		hc->hw.r_st_sync = hc->masterclk | V_AUTO_SYNC;
+		hc->hw.r_st_sync |= (hc->masterclk | V_AUTO_SYNC);
 		HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync);
 	}
 
+
+
 	/* setting misc irq */
 	HFC_outb(hc, R_IRQMSK_MISC, hc->hw.r_irqmsk_misc);
 	if (debug & DEBUG_HFCMULTI_INIT)
@@ -1929,7 +1981,7 @@ next_frame:
 				Fspace = 1;
 		}
 		/* one frame only for ST D-channels, to allow resending */
-		if (hc->type != 1 && dch) {
+		if (hc->ctype != HFC_TYPE_E1 && dch) {
 			if (f1 != f2)
 				Fspace = 0;
 		}
@@ -1971,12 +2023,22 @@ next_frame:
 					    "slot_tx %d\n",
 					    __func__, ch, slot_tx);
 				/* connect slot */
-				HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
-				    V_HDLC_TRP | V_IFF);
+				if (hc->ctype == HFC_TYPE_XHFC)
+					HFC_outb(hc, A_CON_HDLC, 0xc0
+					    | 0x07 << 2 | V_HDLC_TRP | V_IFF);
+						/* Enable FIFO, no interrupt */
+				else
+					HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
+					    V_HDLC_TRP | V_IFF);
 				HFC_outb_nodebug(hc, R_FIFO, ch<<1 | 1);
 				HFC_wait_nodebug(hc);
-				HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
-				    V_HDLC_TRP | V_IFF);
+				if (hc->ctype == HFC_TYPE_XHFC)
+					HFC_outb(hc, A_CON_HDLC, 0xc0
+					    | 0x07 << 2 | V_HDLC_TRP | V_IFF);
+						/* Enable FIFO, no interrupt */
+				else
+					HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
+					    V_HDLC_TRP | V_IFF);
 				HFC_outb_nodebug(hc, R_FIFO, ch<<1);
 				HFC_wait_nodebug(hc);
 			}
@@ -2004,10 +2066,22 @@ next_frame:
 			    "FIFO data: channel %d slot_tx %d\n",
 			    __func__, ch, slot_tx);
 		/* disconnect slot */
-		HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | V_HDLC_TRP | V_IFF);
+		if (hc->ctype == HFC_TYPE_XHFC)
+			HFC_outb(hc, A_CON_HDLC, 0x80
+			    | 0x07 << 2 | V_HDLC_TRP | V_IFF);
+				/* Enable FIFO, no interrupt */
+		else
+			HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 |
+			    V_HDLC_TRP | V_IFF);
 		HFC_outb_nodebug(hc, R_FIFO, ch<<1 | 1);
 		HFC_wait_nodebug(hc);
-		HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | V_HDLC_TRP | V_IFF);
+		if (hc->ctype == HFC_TYPE_XHFC)
+			HFC_outb(hc, A_CON_HDLC, 0x80
+			    | 0x07 << 2 | V_HDLC_TRP | V_IFF);
+				/* Enable FIFO, no interrupt */
+		else
+			HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 |
+			    V_HDLC_TRP | V_IFF);
 		HFC_outb_nodebug(hc, R_FIFO, ch<<1);
 		HFC_wait_nodebug(hc);
 	}
@@ -2327,7 +2401,7 @@ handle_timer_irq(struct hfc_multi *hc)
 		spin_unlock_irqrestore(&HFClock, flags);
 	}
 
-	if (hc->type != 1 || hc->e1_state == 1)
+	if (hc->ctype != HFC_TYPE_E1 || hc->e1_state == 1)
 		for (ch = 0; ch <= 31; ch++) {
 			if (hc->created[hc->chan[ch].port]) {
 				hfcmulti_tx(hc, ch);
@@ -2350,7 +2424,7 @@ handle_timer_irq(struct hfc_multi *hc)
 				}
 			}
 		}
-	if (hc->type == 1 && hc->created[0]) {
+	if (hc->ctype == HFC_TYPE_E1 && hc->created[0]) {
 		dch = hc->chan[hc->dslot].dch;
 		if (test_bit(HFC_CFG_REPORT_LOS, &hc->chan[hc->dslot].cfg)) {
 			/* LOS */
@@ -2610,7 +2684,10 @@ hfcmulti_interrupt(int intno, void *dev_id)
 		"card %d, this is no bug.\n", hc->id + 1, irqsem);
 	irqsem = hc->id + 1;
 #endif
-
+#ifdef CONFIG_MISDN_HFCMULTI_8xx
+	if (hc->immap->im_cpm.cp_pbdat & hc->pb_irqmsk)
+		goto irq_notforus;
+#endif
 	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
 		spin_lock_irqsave(&plx_lock, flags);
 		plx_acc = hc->plx_membase + PLX_INTCSR;
@@ -2650,7 +2727,7 @@ hfcmulti_interrupt(int intno, void *dev_id)
 	}
 	hc->irqcnt++;
 	if (r_irq_statech) {
-		if (hc->type != 1)
+		if (hc->ctype != HFC_TYPE_E1)
 			ph_state_irq(hc, r_irq_statech);
 	}
 	if (status & V_EXT_IRQSTA)
@@ -2664,7 +2741,7 @@ hfcmulti_interrupt(int intno, void *dev_id)
 		r_irq_misc = HFC_inb_nodebug(hc, R_IRQ_MISC);
 		r_irq_misc &= hc->hw.r_irqmsk_misc; /* ignore disabled irqs */
 		if (r_irq_misc & V_STA_IRQ) {
-			if (hc->type == 1) {
+			if (hc->ctype == HFC_TYPE_E1) {
 				/* state machine */
 				dch = hc->chan[hc->dslot].dch;
 				e1_syncsta = HFC_inb_nodebug(hc, R_SYNC_STA);
@@ -2786,7 +2863,8 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		if (hc->slot_owner[oslot_tx<<1] == ch) {
 			HFC_outb(hc, R_SLOT, oslot_tx << 1);
 			HFC_outb(hc, A_SL_CFG, 0);
-			HFC_outb(hc, A_CONF, 0);
+			if (hc->ctype != HFC_TYPE_XHFC)
+				HFC_outb(hc, A_CONF, 0);
 			hc->slot_owner[oslot_tx<<1] = -1;
 		} else {
 			if (debug & DEBUG_HFCMULTI_MODE)
@@ -2839,7 +2917,9 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 			    flow_tx, routing, conf);
 		HFC_outb(hc, R_SLOT, slot_tx << 1);
 		HFC_outb(hc, A_SL_CFG, (ch<<1) | routing);
-		HFC_outb(hc, A_CONF, (conf < 0) ? 0 : (conf | V_CONF_SL));
+		if (hc->ctype != HFC_TYPE_XHFC)
+			HFC_outb(hc, A_CONF,
+				(conf < 0) ? 0 : (conf | V_CONF_SL));
 		hc->slot_owner[slot_tx << 1] = ch;
 		hc->chan[ch].slot_tx = slot_tx;
 		hc->chan[ch].bank_tx = bank_tx;
@@ -2889,7 +2969,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		HFC_outb(hc, A_IRQ_MSK, 0);
 		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
 		HFC_wait(hc);
-		if (hc->chan[ch].bch && hc->type != 1) {
+		if (hc->chan[ch].bch && hc->ctype != HFC_TYPE_E1) {
 			hc->hw.a_st_ctrl0[hc->chan[ch].port] &=
 			    ((ch & 0x3) == 0)? ~V_B1_EN: ~V_B2_EN;
 			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
@@ -2965,8 +3045,13 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 			/* enable TX fifo */
 			HFC_outb(hc, R_FIFO, ch << 1);
 			HFC_wait(hc);
-			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 |
-			    V_HDLC_TRP | V_IFF);
+			if (hc->ctype == HFC_TYPE_XHFC)
+				HFC_outb(hc, A_CON_HDLC, flow_tx | 0x07 << 2 |
+					V_HDLC_TRP | V_IFF);
+					/* Enable FIFO, no interrupt */
+			else
+				HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 |
+					V_HDLC_TRP | V_IFF);
 			HFC_outb(hc, A_SUBCH_CFG, 0);
 			HFC_outb(hc, A_IRQ_MSK, 0);
 			HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
@@ -2976,13 +3061,19 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 			/* enable RX fifo */
 			HFC_outb(hc, R_FIFO, (ch<<1)|1);
 			HFC_wait(hc);
-			HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00 | V_HDLC_TRP);
+			if (hc->ctype == HFC_TYPE_XHFC)
+				HFC_outb(hc, A_CON_HDLC, flow_rx | 0x07 << 2 |
+					V_HDLC_TRP);
+					/* Enable FIFO, no interrupt*/
+			else
+				HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00 |
+						V_HDLC_TRP);
 			HFC_outb(hc, A_SUBCH_CFG, 0);
 			HFC_outb(hc, A_IRQ_MSK, 0);
 			HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
 			HFC_wait(hc);
 		}
-		if (hc->type != 1) {
+		if (hc->ctype != HFC_TYPE_E1) {
 			hc->hw.a_st_ctrl0[hc->chan[ch].port] |=
 			    ((ch & 0x3) == 0) ? V_B1_EN : V_B2_EN;
 			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
@@ -3003,7 +3094,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		/* enable TX fifo */
 		HFC_outb(hc, R_FIFO, ch<<1);
 		HFC_wait(hc);
-		if (hc->type == 1 || hc->chan[ch].bch) {
+		if (hc->ctype == HFC_TYPE_E1 || hc->chan[ch].bch) {
 			/* E1 or B-channel */
 			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x04);
 			HFC_outb(hc, A_SUBCH_CFG, 0);
@@ -3019,7 +3110,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		HFC_outb(hc, R_FIFO, (ch<<1)|1);
 		HFC_wait(hc);
 		HFC_outb(hc, A_CON_HDLC, flow_rx | 0x04);
-		if (hc->type == 1 || hc->chan[ch].bch)
+		if (hc->ctype == HFC_TYPE_E1 || hc->chan[ch].bch)
 			HFC_outb(hc, A_SUBCH_CFG, 0); /* full 8 bits */
 		else
 			HFC_outb(hc, A_SUBCH_CFG, 2); /* 2 bits dchannel */
@@ -3028,7 +3119,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		HFC_wait(hc);
 		if (hc->chan[ch].bch) {
 			test_and_set_bit(FLG_HDLC, &hc->chan[ch].bch->Flags);
-			if (hc->type != 1) {
+			if (hc->ctype != HFC_TYPE_E1) {
 				hc->hw.a_st_ctrl0[hc->chan[ch].port] |=
 				  ((ch&0x3) == 0) ? V_B1_EN : V_B2_EN;
 				HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
@@ -3108,7 +3199,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
 	case HW_RESET_REQ:
 		/* start activation */
 		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->type == 1) {
+		if (hc->ctype == HFC_TYPE_E1) {
 			if (debug & DEBUG_HFCMULTI_MSG)
 				printk(KERN_DEBUG
 				    "%s: HW_RESET_REQ no BRI\n",
@@ -3129,7 +3220,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
 	case HW_DEACT_REQ:
 		/* start deactivation */
 		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->type == 1) {
+		if (hc->ctype == HFC_TYPE_E1) {
 			if (debug & DEBUG_HFCMULTI_MSG)
 				printk(KERN_DEBUG
 				    "%s: HW_DEACT_REQ no BRI\n",
@@ -3163,7 +3254,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
 		break;
 	case HW_POWERUP_REQ:
 		spin_lock_irqsave(&hc->lock, flags);
-		if (hc->type == 1) {
+		if (hc->ctype == HFC_TYPE_E1) {
 			if (debug & DEBUG_HFCMULTI_MSG)
 				printk(KERN_DEBUG
 				    "%s: HW_POWERUP_REQ no BRI\n",
@@ -3240,7 +3331,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 				    __func__, hc->chan[dch->slot].port,
 				    hc->ports-1);
 			/* start activation */
-			if (hc->type == 1) {
+			if (hc->ctype == HFC_TYPE_E1) {
 				ph_state_change(dch);
 				if (debug & DEBUG_HFCMULTI_STATE)
 					printk(KERN_DEBUG
@@ -3273,7 +3364,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 				    __func__, hc->chan[dch->slot].port,
 				    hc->ports-1);
 			/* start deactivation */
-			if (hc->type == 1) {
+			if (hc->ctype == HFC_TYPE_E1) {
 				if (debug & DEBUG_HFCMULTI_MSG)
 					printk(KERN_DEBUG
 					    "%s: PH_DEACTIVATE no BRI\n",
@@ -3493,6 +3584,8 @@ channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
 		features->hfc_id = hc->id;
 		if (test_bit(HFC_CHIP_DTMF, &hc->chip))
 			features->hfc_dtmf = 1;
+		if (test_bit(HFC_CHIP_CONF, &hc->chip))
+			features->hfc_conf = 1;
 		features->hfc_loops = 0;
 		if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
 			features->hfc_echocanhw = 1;
@@ -3630,7 +3723,7 @@ ph_state_change(struct dchannel *dch)
 	hc = dch->hw;
 	ch = dch->slot;
 
-	if (hc->type == 1) {
+	if (hc->ctype == HFC_TYPE_E1) {
 		if (dch->dev.D.protocol == ISDN_P_TE_E1) {
 			if (debug & DEBUG_HFCMULTI_STATE)
 				printk(KERN_DEBUG
@@ -3755,7 +3848,7 @@ hfcmulti_initmode(struct dchannel *dch)
 	if (debug & DEBUG_HFCMULTI_INIT)
 		printk(KERN_DEBUG "%s: entered\n", __func__);
 
-	if (hc->type == 1) {
+	if (hc->ctype == HFC_TYPE_E1) {
 		hc->chan[hc->dslot].slot_tx = -1;
 		hc->chan[hc->dslot].slot_rx = -1;
 		hc->chan[hc->dslot].conf = -1;
@@ -3904,6 +3997,11 @@ hfcmulti_initmode(struct dchannel *dch)
 		}
 		if (!test_bit(HFC_CFG_NONCAP_TX, &hc->chan[i].cfg))
 			hc->hw.a_st_ctrl0[pt] |= V_TX_LI;
+		if (hc->ctype == HFC_TYPE_XHFC) {
+			hc->hw.a_st_ctrl0[pt] |= 0x40 /* V_ST_PU_CTRL */;
+			HFC_outb(hc, 0x35 /* A_ST_CTRL3 */,
+				0x7c << 1 /* V_ST_PULSE */);
+		}
 		/* line setup */
 		HFC_outb(hc, A_ST_CTRL0,  hc->hw.a_st_ctrl0[pt]);
 		/* disable E-channel */
@@ -3990,7 +4088,7 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch,
 		return -EINVAL;
 	if (rq->protocol == ISDN_P_NONE)
 		return -EINVAL;
-	if (hc->type == 1)
+	if (hc->ctype == HFC_TYPE_E1)
 		ch = rq->adr.channel;
 	else
 		ch = (rq->adr.channel - 1) + (dch->slot - 2);
@@ -4081,7 +4179,7 @@ hfcm_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 		switch (rq->protocol) {
 		case ISDN_P_TE_S0:
 		case ISDN_P_NT_S0:
-			if (hc->type == 1) {
+			if (hc->ctype == HFC_TYPE_E1) {
 				err = -EINVAL;
 				break;
 			}
@@ -4089,7 +4187,7 @@ hfcm_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 			break;
 		case ISDN_P_TE_E1:
 		case ISDN_P_NT_E1:
-			if (hc->type != 1) {
+			if (hc->ctype != HFC_TYPE_E1) {
 				err = -EINVAL;
 				break;
 			}
@@ -4156,13 +4254,13 @@ init_card(struct hfc_multi *hc)
 	disable_hwirq(hc);
 	spin_unlock_irqrestore(&hc->lock, flags);
 
-	if (request_irq(hc->pci_dev->irq, hfcmulti_interrupt, IRQF_SHARED,
+	if (request_irq(hc->irq, hfcmulti_interrupt, IRQF_SHARED,
 	    "HFC-multi", hc)) {
 		printk(KERN_WARNING "mISDN: Could not get interrupt %d.\n",
-		    hc->pci_dev->irq);
+		    hc->irq);
+		hc->irq = 0;
 		return -EIO;
 	}
-	hc->irq = hc->pci_dev->irq;
 
 	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
 		spin_lock_irqsave(&plx_lock, plx_flags);
@@ -4269,6 +4367,10 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
 	hc->ledstate = 0xAFFEAFFE;
 	hc->opticalsupport = m->opticalsupport;
 
+	hc->pci_iobase = 0;
+	hc->pci_membase = NULL;
+	hc->plx_membase = NULL;
+
 	/* set memory access methods */
 	if (m->io_mode) /* use mode from card config */
 		hc->io_mode = m->io_mode;
@@ -4276,44 +4378,12 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
 	case HFC_IO_MODE_PLXSD:
 		test_and_set_bit(HFC_CHIP_PLXSD, &hc->chip);
 		hc->slots = 128; /* required */
-		/* fall through */
-	case HFC_IO_MODE_PCIMEM:
 		hc->HFC_outb = HFC_outb_pcimem;
 		hc->HFC_inb = HFC_inb_pcimem;
 		hc->HFC_inw = HFC_inw_pcimem;
 		hc->HFC_wait = HFC_wait_pcimem;
 		hc->read_fifo = read_fifo_pcimem;
 		hc->write_fifo = write_fifo_pcimem;
-		break;
-	case HFC_IO_MODE_REGIO:
-		hc->HFC_outb = HFC_outb_regio;
-		hc->HFC_inb = HFC_inb_regio;
-		hc->HFC_inw = HFC_inw_regio;
-		hc->HFC_wait = HFC_wait_regio;
-		hc->read_fifo = read_fifo_regio;
-		hc->write_fifo = write_fifo_regio;
-		break;
-	default:
-		printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n");
-		pci_disable_device(hc->pci_dev);
-		return -EIO;
-	}
-	hc->HFC_outb_nodebug = hc->HFC_outb;
-	hc->HFC_inb_nodebug = hc->HFC_inb;
-	hc->HFC_inw_nodebug = hc->HFC_inw;
-	hc->HFC_wait_nodebug = hc->HFC_wait;
-#ifdef HFC_REGISTER_DEBUG
-	hc->HFC_outb = HFC_outb_debug;
-	hc->HFC_inb = HFC_inb_debug;
-	hc->HFC_inw = HFC_inw_debug;
-	hc->HFC_wait = HFC_wait_debug;
-#endif
-	hc->pci_iobase = 0;
-	hc->pci_membase = NULL;
-	hc->plx_membase = NULL;
-
-	switch (hc->io_mode) {
-	case HFC_IO_MODE_PLXSD:
 		hc->plx_origmembase =  hc->pci_dev->resource[0].start;
 		/* MEMBASE 1 is PLX PCI Bridge */
 
@@ -4361,6 +4431,12 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
 		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO);
 		break;
 	case HFC_IO_MODE_PCIMEM:
+		hc->HFC_outb = HFC_outb_pcimem;
+		hc->HFC_inb = HFC_inb_pcimem;
+		hc->HFC_inw = HFC_inw_pcimem;
+		hc->HFC_wait = HFC_wait_pcimem;
+		hc->read_fifo = read_fifo_pcimem;
+		hc->write_fifo = write_fifo_pcimem;
 		hc->pci_origmembase = hc->pci_dev->resource[1].start;
 		if (!hc->pci_origmembase) {
 			printk(KERN_WARNING
@@ -4377,12 +4453,18 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
 			pci_disable_device(hc->pci_dev);
 			return -EIO;
 		}
-		printk(KERN_INFO "card %d: defined at MEMBASE %#lx (%#lx) IRQ %d "
-		    "HZ %d leds-type %d\n", hc->id, (u_long)hc->pci_membase,
+		printk(KERN_INFO "card %d: defined at MEMBASE %#lx (%#lx) IRQ "
+		    "%d HZ %d leds-type %d\n", hc->id, (u_long)hc->pci_membase,
 		    hc->pci_origmembase, hc->pci_dev->irq, HZ, hc->leds);
 		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO);
 		break;
 	case HFC_IO_MODE_REGIO:
+		hc->HFC_outb = HFC_outb_regio;
+		hc->HFC_inb = HFC_inb_regio;
+		hc->HFC_inw = HFC_inw_regio;
+		hc->HFC_wait = HFC_wait_regio;
+		hc->read_fifo = read_fifo_regio;
+		hc->write_fifo = write_fifo_regio;
 		hc->pci_iobase = (u_int) hc->pci_dev->resource[0].start;
 		if (!hc->pci_iobase) {
 			printk(KERN_WARNING
@@ -4464,7 +4546,7 @@ release_port(struct hfc_multi *hc, struct dchannel *dch)
 		dch->timer.function = NULL;
 	}
 
-	if (hc->type == 1) { /* E1 */
+	if (hc->ctype == HFC_TYPE_E1) { /* E1 */
 		/* remove sync */
 		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
 			hc->syncronized = 0;
@@ -4863,9 +4945,15 @@ init_multi_port(struct hfc_multi *hc, int pt)
 		test_and_set_bit(HFC_CFG_DIS_ECHANNEL,
 		    &hc->chan[i + 2].cfg);
 	}
-	snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-%ds.%d-%d",
-		hc->type, HFC_cnt + 1, pt + 1);
-	ret = mISDN_register_device(&dch->dev, &hc->pci_dev->dev, name);
+	if (hc->ctype == HFC_TYPE_XHFC) {
+		snprintf(name, MISDN_MAX_IDLEN - 1, "xhfc.%d-%d",
+			HFC_cnt + 1, pt + 1);
+		ret = mISDN_register_device(&dch->dev, NULL, name);
+	} else {
+		snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-%ds.%d-%d",
+			hc->ctype, HFC_cnt + 1, pt + 1);
+		ret = mISDN_register_device(&dch->dev, &hc->pci_dev->dev, name);
+	}
 	if (ret)
 		goto free_chan;
 	hc->created[pt] = 1;
@@ -4876,9 +4964,9 @@ free_chan:
 }
 
 static int
-hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+hfcmulti_init(struct hm_map *m, struct pci_dev *pdev,
+    const struct pci_device_id *ent)
 {
-	struct hm_map	*m = (struct hm_map *)ent->driver_data;
 	int		ret_err = 0;
 	int		pt;
 	struct hfc_multi	*hc;
@@ -4913,16 +5001,18 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	spin_lock_init(&hc->lock);
 	hc->mtyp = m;
-	hc->type =  m->type;
+	hc->ctype =  m->type;
 	hc->ports = m->ports;
 	hc->id = HFC_cnt;
 	hc->pcm = pcm[HFC_cnt];
 	hc->io_mode = iomode[HFC_cnt];
-	if (dslot[HFC_cnt] < 0 && hc->type == 1) {
+	if (dslot[HFC_cnt] < 0 && hc->ctype == HFC_TYPE_E1) {
 		hc->dslot = 0;
 		printk(KERN_INFO "HFC-E1 card has disabled D-channel, but "
 			"31 B-channels\n");
-	} if (dslot[HFC_cnt] > 0 && dslot[HFC_cnt] < 32 && hc->type == 1) {
+	}
+	if (dslot[HFC_cnt] > 0 && dslot[HFC_cnt] < 32
+	    && hc->ctype == HFC_TYPE_E1) {
 		hc->dslot = dslot[HFC_cnt];
 		printk(KERN_INFO "HFC-E1 card has alternating D-channel on "
 			"time slot %d\n", dslot[HFC_cnt]);
@@ -4944,8 +5034,11 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 	for (i = 0; i < (poll >> 1); i++)
 		hc->silence_data[i] = hc->silence;
 
-	if (!(type[HFC_cnt] & 0x200))
-		test_and_set_bit(HFC_CHIP_DTMF, &hc->chip);
+	if (hc->ctype != HFC_TYPE_XHFC) {
+		if (!(type[HFC_cnt] & 0x200))
+			test_and_set_bit(HFC_CHIP_DTMF, &hc->chip);
+		test_and_set_bit(HFC_CHIP_CONF, &hc->chip);
+	}
 
 	if (type[HFC_cnt] & 0x800)
 		test_and_set_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
@@ -4969,8 +5062,18 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 		printk(KERN_NOTICE "Watchdog enabled\n");
 	}
 
-	/* setup pci, hc->slots may change due to PLXSD */
-	ret_err = setup_pci(hc, pdev, ent);
+	if (pdev && ent)
+		/* setup pci, hc->slots may change due to PLXSD */
+		ret_err = setup_pci(hc, pdev, ent);
+	else
+#ifdef CONFIG_MISDN_HFCMULTI_8xx
+		ret_err = setup_embedded(hc, m);
+#else
+	{
+		printk(KERN_WARNING "Embedded IO Mode not selected\n");
+		ret_err = -EIO;
+	}
+#endif
 	if (ret_err) {
 		if (hc == syncmaster)
 			syncmaster = NULL;
@@ -4978,7 +5081,17 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return ret_err;
 	}
 
-	/* crate channels */
+	hc->HFC_outb_nodebug = hc->HFC_outb;
+	hc->HFC_inb_nodebug = hc->HFC_inb;
+	hc->HFC_inw_nodebug = hc->HFC_inw;
+	hc->HFC_wait_nodebug = hc->HFC_wait;
+#ifdef HFC_REGISTER_DEBUG
+	hc->HFC_outb = HFC_outb_debug;
+	hc->HFC_inb = HFC_inb_debug;
+	hc->HFC_inw = HFC_inw_debug;
+	hc->HFC_wait = HFC_wait_debug;
+#endif
+	/* create channels */
 	for (pt = 0; pt < hc->ports; pt++) {
 		if (Port_cnt >= MAX_PORTS) {
 			printk(KERN_ERR "too many ports (max=%d).\n",
@@ -4986,7 +5099,7 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 			ret_err = -EINVAL;
 			goto free_card;
 		}
-		if (hc->type == 1)
+		if (hc->ctype == HFC_TYPE_E1)
 			ret_err = init_e1_port(hc, m);
 		else
 			ret_err = init_multi_port(hc, pt);
@@ -5070,6 +5183,7 @@ hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 		hc->iclock = mISDN_register_clock("HFCMulti", 0, clockctl, hc);
 
 	/* initialize hardware */
+	hc->irq = (m->irq) ? : hc->pci_dev->irq;
 	ret_err = init_card(hc);
 	if (ret_err) {
 		printk(KERN_ERR "init card returns %d\n", ret_err);
@@ -5120,45 +5234,47 @@ static void __devexit hfc_remove_pci(struct pci_dev *pdev)
 #define VENDOR_PRIM	"PrimuX"
 
 static const struct hm_map hfcm_map[] = {
-/*0*/	{VENDOR_BN, "HFC-1S Card (mini PCI)", 4, 1, 1, 3, 0, DIP_4S, 0},
-/*1*/	{VENDOR_BN, "HFC-2S Card", 4, 2, 1, 3, 0, DIP_4S, 0},
-/*2*/	{VENDOR_BN, "HFC-2S Card (mini PCI)", 4, 2, 1, 3, 0, DIP_4S, 0},
-/*3*/	{VENDOR_BN, "HFC-4S Card", 4, 4, 1, 2, 0, DIP_4S, 0},
-/*4*/	{VENDOR_BN, "HFC-4S Card (mini PCI)", 4, 4, 1, 2, 0, 0, 0},
-/*5*/	{VENDOR_CCD, "HFC-4S Eval (old)", 4, 4, 0, 0, 0, 0, 0},
-/*6*/	{VENDOR_CCD, "HFC-4S IOB4ST", 4, 4, 1, 2, 0, DIP_4S, 0},
-/*7*/	{VENDOR_CCD, "HFC-4S", 4, 4, 1, 2, 0, 0, 0},
-/*8*/	{VENDOR_DIG, "HFC-4S Card", 4, 4, 0, 2, 0, 0, HFC_IO_MODE_REGIO},
-/*9*/	{VENDOR_CCD, "HFC-4S Swyx 4xS0 SX2 QuadBri", 4, 4, 1, 2, 0, 0, 0},
-/*10*/	{VENDOR_JH, "HFC-4S (junghanns 2.0)", 4, 4, 1, 2, 0, 0, 0},
-/*11*/	{VENDOR_PRIM, "HFC-2S Primux Card", 4, 2, 0, 0, 0, 0, 0},
-
-/*12*/	{VENDOR_BN, "HFC-8S Card", 8, 8, 1, 0, 0, 0, 0},
+/*0*/	{VENDOR_BN, "HFC-1S Card (mini PCI)", 4, 1, 1, 3, 0, DIP_4S, 0, 0},
+/*1*/	{VENDOR_BN, "HFC-2S Card", 4, 2, 1, 3, 0, DIP_4S, 0, 0},
+/*2*/	{VENDOR_BN, "HFC-2S Card (mini PCI)", 4, 2, 1, 3, 0, DIP_4S, 0, 0},
+/*3*/	{VENDOR_BN, "HFC-4S Card", 4, 4, 1, 2, 0, DIP_4S, 0, 0},
+/*4*/	{VENDOR_BN, "HFC-4S Card (mini PCI)", 4, 4, 1, 2, 0, 0, 0, 0},
+/*5*/	{VENDOR_CCD, "HFC-4S Eval (old)", 4, 4, 0, 0, 0, 0, 0, 0},
+/*6*/	{VENDOR_CCD, "HFC-4S IOB4ST", 4, 4, 1, 2, 0, DIP_4S, 0, 0},
+/*7*/	{VENDOR_CCD, "HFC-4S", 4, 4, 1, 2, 0, 0, 0, 0},
+/*8*/	{VENDOR_DIG, "HFC-4S Card", 4, 4, 0, 2, 0, 0, HFC_IO_MODE_REGIO, 0},
+/*9*/	{VENDOR_CCD, "HFC-4S Swyx 4xS0 SX2 QuadBri", 4, 4, 1, 2, 0, 0, 0, 0},
+/*10*/	{VENDOR_JH, "HFC-4S (junghanns 2.0)", 4, 4, 1, 2, 0, 0, 0, 0},
+/*11*/	{VENDOR_PRIM, "HFC-2S Primux Card", 4, 2, 0, 0, 0, 0, 0, 0},
+
+/*12*/	{VENDOR_BN, "HFC-8S Card", 8, 8, 1, 0, 0, 0, 0, 0},
 /*13*/	{VENDOR_BN, "HFC-8S Card (+)", 8, 8, 1, 8, 0, DIP_8S,
-		HFC_IO_MODE_REGIO},
-/*14*/	{VENDOR_CCD, "HFC-8S Eval (old)", 8, 8, 0, 0, 0, 0, 0},
-/*15*/	{VENDOR_CCD, "HFC-8S IOB4ST Recording", 8, 8, 1, 0, 0, 0, 0},
+		HFC_IO_MODE_REGIO, 0},
+/*14*/	{VENDOR_CCD, "HFC-8S Eval (old)", 8, 8, 0, 0, 0, 0, 0, 0},
+/*15*/	{VENDOR_CCD, "HFC-8S IOB4ST Recording", 8, 8, 1, 0, 0, 0, 0, 0},
 
-/*16*/	{VENDOR_CCD, "HFC-8S IOB8ST", 8, 8, 1, 0, 0, 0, 0},
-/*17*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0},
-/*18*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0},
+/*16*/	{VENDOR_CCD, "HFC-8S IOB8ST", 8, 8, 1, 0, 0, 0, 0, 0},
+/*17*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0, 0},
+/*18*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0, 0},
 
-/*19*/	{VENDOR_BN, "HFC-E1 Card", 1, 1, 0, 1, 0, DIP_E1, 0},
-/*20*/	{VENDOR_BN, "HFC-E1 Card (mini PCI)", 1, 1, 0, 1, 0, 0, 0},
-/*21*/	{VENDOR_BN, "HFC-E1+ Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0},
-/*22*/	{VENDOR_BN, "HFC-E1 Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0},
+/*19*/	{VENDOR_BN, "HFC-E1 Card", 1, 1, 0, 1, 0, DIP_E1, 0, 0},
+/*20*/	{VENDOR_BN, "HFC-E1 Card (mini PCI)", 1, 1, 0, 1, 0, 0, 0, 0},
+/*21*/	{VENDOR_BN, "HFC-E1+ Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0, 0},
+/*22*/	{VENDOR_BN, "HFC-E1 Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0, 0},
 
-/*23*/	{VENDOR_CCD, "HFC-E1 Eval (old)", 1, 1, 0, 0, 0, 0, 0},
-/*24*/	{VENDOR_CCD, "HFC-E1 IOB1E1", 1, 1, 0, 1, 0, 0, 0},
-/*25*/	{VENDOR_CCD, "HFC-E1", 1, 1, 0, 1, 0, 0, 0},
+/*23*/	{VENDOR_CCD, "HFC-E1 Eval (old)", 1, 1, 0, 0, 0, 0, 0, 0},
+/*24*/	{VENDOR_CCD, "HFC-E1 IOB1E1", 1, 1, 0, 1, 0, 0, 0, 0},
+/*25*/	{VENDOR_CCD, "HFC-E1", 1, 1, 0, 1, 0, 0, 0, 0},
 
 /*26*/	{VENDOR_CCD, "HFC-4S Speech Design", 4, 4, 0, 0, 0, 0,
-		HFC_IO_MODE_PLXSD},
+		HFC_IO_MODE_PLXSD, 0},
 /*27*/	{VENDOR_CCD, "HFC-E1 Speech Design", 1, 1, 0, 0, 0, 0,
-		HFC_IO_MODE_PLXSD},
-/*28*/	{VENDOR_CCD, "HFC-4S OpenVox", 4, 4, 1, 0, 0, 0, 0},
-/*29*/	{VENDOR_CCD, "HFC-2S OpenVox", 4, 2, 1, 0, 0, 0, 0},
-/*30*/	{VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0},
+		HFC_IO_MODE_PLXSD, 0},
+/*28*/	{VENDOR_CCD, "HFC-4S OpenVox", 4, 4, 1, 0, 0, 0, 0, 0},
+/*29*/	{VENDOR_CCD, "HFC-2S OpenVox", 4, 2, 1, 0, 0, 0, 0, 0},
+/*30*/	{VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0, 0},
+/*31*/	{VENDOR_CCD, "XHFC-4S Speech Design", 5, 4, 0, 0, 0, 0,
+		HFC_IO_MODE_EMBSD, XHFC_IRQ},
 };
 
 #undef H
@@ -5265,7 +5381,7 @@ hfcmulti_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		    "Please contact the driver maintainer for support.\n");
 		return -ENODEV;
 	}
-	ret = hfcmulti_init(pdev, ent);
+	ret = hfcmulti_init(m, pdev, ent);
 	if (ret)
 		return ret;
 	HFC_cnt++;
@@ -5295,6 +5411,8 @@ static int __init
 HFCmulti_init(void)
 {
 	int err;
+	int i, xhfc = 0;
+	struct hm_map m;
 
 	printk(KERN_INFO "mISDN: HFC-multi driver %s\n", HFC_MULTI_VERSION);
 
@@ -5342,11 +5460,43 @@ HFCmulti_init(void)
 	if (!clock)
 		clock = 1;
 
+	/* Register the embedded devices.
+	 * This should be done before the PCI cards registration */
+	switch (hwid) {
+	case HWID_MINIP4:
+		xhfc = 1;
+		m = hfcm_map[31];
+		break;
+	case HWID_MINIP8:
+		xhfc = 2;
+		m = hfcm_map[31];
+		break;
+	case HWID_MINIP16:
+		xhfc = 4;
+		m = hfcm_map[31];
+		break;
+	default:
+		xhfc = 0;
+	}
+
+	for (i = 0; i < xhfc; ++i) {
+		err = hfcmulti_init(&m, NULL, NULL);
+		if (err) {
+			printk(KERN_ERR "error registering embedded driver: "
+				"%x\n", err);
+			return -err;
+		}
+		HFC_cnt++;
+		printk(KERN_INFO "%d devices registered\n", HFC_cnt);
+	}
+
+	/* Register the PCI cards */
 	err = pci_register_driver(&hfcmultipci_driver);
 	if (err < 0) {
 		printk(KERN_ERR "error registering pci driver: %x\n", err);
 		return err;
 	}
+
 	return 0;
 }
 
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
index d19b4f6d7d8..05866184ba2 100644
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -947,6 +947,10 @@ conf_software:
 	if (current_conf >= 0) {
 join_members:
 		list_for_each_entry(member, &conf->mlist, list) {
+			/* if no conference engine on our chip, change to
+			 * software */
+			if (!member->dsp->features.hfc_conf)
+				goto conf_software;
 			/* in case of hdlc, change to software */
 			if (member->dsp->hdlc)
 				goto conf_software;
diff --git a/include/linux/mISDNdsp.h b/include/linux/mISDNdsp.h
index 2c483d45f14..41d1eeb9b3b 100644
--- a/include/linux/mISDNdsp.h
+++ b/include/linux/mISDNdsp.h
@@ -25,6 +25,7 @@ extern void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem);
 struct dsp_features {
 	int	hfc_id; /* unique id to identify the chip (or -1) */
 	int	hfc_dtmf; /* set if HFCmulti card supports dtmf */
+	int	hfc_conf; /* set if HFCmulti card supports conferences */
 	int	hfc_loops; /* set if card supports tone loops */
 	int	hfc_echocanhw; /* set if card supports echocancelation*/
 	int	pcm_id; /* unique id to identify the pcm bus (or -1) */
-- 
cgit v1.2.3-70-g09d2


From 7245a2fe3c10ed7c2e9b1c8a83af5919c0cc0a89 Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Fri, 22 May 2009 11:04:55 +0000
Subject: mISDN: Add PCI ID for Junghanns 8S card

new id for HFC-8S

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 3 +++
 include/linux/pci_ids.h                | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 5be4edf631d..50e9f4d88f4 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -5275,6 +5275,7 @@ static const struct hm_map hfcm_map[] = {
 /*30*/	{VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0, 0},
 /*31*/	{VENDOR_CCD, "XHFC-4S Speech Design", 5, 4, 0, 0, 0, 0,
 		HFC_IO_MODE_EMBSD, XHFC_IRQ},
+/*32*/	{VENDOR_JH, "HFC-8S (junghanns)", 8, 8, 1, 0, 0, 0, 0, 0},
 };
 
 #undef H
@@ -5328,6 +5329,8 @@ static struct pci_device_id hfmultipci_ids[] __devinitdata = {
 		PCI_SUBDEVICE_ID_CCD_HFC8S, 0, 0, H(18)}, /* 8S */
 	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
 		PCI_SUBDEVICE_ID_CCD_OV8S, 0, 0, H(30)}, /* OpenVox 8 */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_JH8S, 0, 0, H(32)}, /* Junganns 8S  */
 
 
 	/* Cards with HFC-E1 Chip */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3038ac25491..4bdff984ac9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1914,6 +1914,7 @@
 #define PCI_SUBDEVICE_ID_CCD_SWYX4S	0xB540
 #define PCI_SUBDEVICE_ID_CCD_JH4S20	0xB550
 #define PCI_SUBDEVICE_ID_CCD_IOB8ST_1	0xB552
+#define PCI_SUBDEVICE_ID_CCD_JH8S	0xB55B
 #define PCI_SUBDEVICE_ID_CCD_BN4S	0xB560
 #define PCI_SUBDEVICE_ID_CCD_BN8S	0xB562
 #define PCI_SUBDEVICE_ID_CCD_BNE1	0xB563
-- 
cgit v1.2.3-70-g09d2


From eac74af9b547e29c9634ed5eff4d514349e73310 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Fri, 22 May 2009 11:04:56 +0000
Subject: mISDN: Cleanup debug messages

This patch make debug printk's KERN_DEBUG and also fix some
codestyle issues.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 159 +++++++++++++++++----------------
 drivers/isdn/hardware/mISDN/hfcpci.c   |  28 +++---
 drivers/isdn/mISDN/dsp_audio.c         |   5 +-
 drivers/isdn/mISDN/dsp_cmx.c           |   7 +-
 drivers/isdn/mISDN/dsp_core.c          |  14 +--
 drivers/isdn/mISDN/dsp_ecdis.h         |   2 +-
 drivers/isdn/mISDN/dsp_tones.c         |  23 +++--
 drivers/isdn/mISDN/l1oip_core.c        |  30 +++----
 drivers/isdn/mISDN/socket.c            |   2 +-
 drivers/isdn/mISDN/tei.c               |   1 -
 drivers/isdn/mISDN/timerdev.c          |   2 +-
 include/linux/mISDNif.h                |  14 +--
 12 files changed, 148 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 50e9f4d88f4..d60f5b7a41d 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -275,7 +275,7 @@ HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val,
 HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val)
 #endif
 {
-	writeb(val, (hc->pci_membase)+reg);
+	writeb(val, hc->pci_membase + reg);
 }
 static u_char
 #ifdef HFC_REGISTER_DEBUG
@@ -284,7 +284,7 @@ HFC_inb_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line)
 HFC_inb_pcimem(struct hfc_multi *hc, u_char reg)
 #endif
 {
-	return readb((hc->pci_membase)+reg);
+	return readb(hc->pci_membase + reg);
 }
 static u_short
 #ifdef HFC_REGISTER_DEBUG
@@ -293,7 +293,7 @@ HFC_inw_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line)
 HFC_inw_pcimem(struct hfc_multi *hc, u_char reg)
 #endif
 {
-	return readw((hc->pci_membase)+reg);
+	return readw(hc->pci_membase + reg);
 }
 static void
 #ifdef HFC_REGISTER_DEBUG
@@ -302,7 +302,8 @@ HFC_wait_pcimem(struct hfc_multi *hc, const char *function, int line)
 HFC_wait_pcimem(struct hfc_multi *hc)
 #endif
 {
-	while (readb((hc->pci_membase)+R_STATUS) & V_BUSY);
+	while (readb(hc->pci_membase + R_STATUS) & V_BUSY)
+		cpu_relax();
 }
 
 /* HFC_IO_MODE_REGIO */
@@ -314,7 +315,7 @@ HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val,
 HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val)
 #endif
 {
-	outb(reg, (hc->pci_iobase)+4);
+	outb(reg, hc->pci_iobase + 4);
 	outb(val, hc->pci_iobase);
 }
 static u_char
@@ -324,7 +325,7 @@ HFC_inb_regio(struct hfc_multi *hc, u_char reg, const char *function, int line)
 HFC_inb_regio(struct hfc_multi *hc, u_char reg)
 #endif
 {
-	outb(reg, (hc->pci_iobase)+4);
+	outb(reg, hc->pci_iobase + 4);
 	return inb(hc->pci_iobase);
 }
 static u_short
@@ -334,7 +335,7 @@ HFC_inw_regio(struct hfc_multi *hc, u_char reg, const char *function, int line)
 HFC_inw_regio(struct hfc_multi *hc, u_char reg)
 #endif
 {
-	outb(reg, (hc->pci_iobase)+4);
+	outb(reg, hc->pci_iobase + 4);
 	return inw(hc->pci_iobase);
 }
 static void
@@ -344,8 +345,9 @@ HFC_wait_regio(struct hfc_multi *hc, const char *function, int line)
 HFC_wait_regio(struct hfc_multi *hc)
 #endif
 {
-	outb(R_STATUS, (hc->pci_iobase)+4);
-	while (inb(hc->pci_iobase) & V_BUSY);
+	outb(R_STATUS, hc->pci_iobase + 4);
+	while (inb(hc->pci_iobase) & V_BUSY)
+		cpu_relax();
 }
 
 #ifdef HFC_REGISTER_DEBUG
@@ -364,14 +366,14 @@ HFC_outb_debug(struct hfc_multi *hc, u_char reg, u_char val,
 	if (regname[0] == '\0')
 		strcpy(regname, "register");
 
-	bits[7] = '0'+(!!(val&1));
-	bits[6] = '0'+(!!(val&2));
-	bits[5] = '0'+(!!(val&4));
-	bits[4] = '0'+(!!(val&8));
-	bits[3] = '0'+(!!(val&16));
-	bits[2] = '0'+(!!(val&32));
-	bits[1] = '0'+(!!(val&64));
-	bits[0] = '0'+(!!(val&128));
+	bits[7] = '0' + (!!(val & 1));
+	bits[6] = '0' + (!!(val & 2));
+	bits[5] = '0' + (!!(val & 4));
+	bits[4] = '0' + (!!(val & 8));
+	bits[3] = '0' + (!!(val & 16));
+	bits[2] = '0' + (!!(val & 32));
+	bits[1] = '0' + (!!(val & 64));
+	bits[0] = '0' + (!!(val & 128));
 	printk(KERN_DEBUG
 	    "HFC_outb(chip %d, %02x=%s, 0x%02x=%s); in %s() line %d\n",
 	    hc->id, reg, regname, val, bits, function, line);
@@ -394,14 +396,14 @@ HFC_inb_debug(struct hfc_multi *hc, u_char reg, const char *function, int line)
 	if (regname[0] == '\0')
 		strcpy(regname, "register");
 
-	bits[7] = '0'+(!!(val&1));
-	bits[6] = '0'+(!!(val&2));
-	bits[5] = '0'+(!!(val&4));
-	bits[4] = '0'+(!!(val&8));
-	bits[3] = '0'+(!!(val&16));
-	bits[2] = '0'+(!!(val&32));
-	bits[1] = '0'+(!!(val&64));
-	bits[0] = '0'+(!!(val&128));
+	bits[7] = '0' + (!!(val & 1));
+	bits[6] = '0' + (!!(val & 2));
+	bits[5] = '0' + (!!(val & 4));
+	bits[4] = '0' + (!!(val & 8));
+	bits[3] = '0' + (!!(val & 16));
+	bits[2] = '0' + (!!(val & 32));
+	bits[1] = '0' + (!!(val & 64));
+	bits[0] = '0' + (!!(val & 128));
 	printk(KERN_DEBUG
 	    "HFC_inb(chip %d, %02x=%s) = 0x%02x=%s; in %s() line %d\n",
 	    hc->id, reg, regname, val, bits, function, line);
@@ -481,6 +483,7 @@ write_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len)
 		len--;
 	}
 }
+
 /* read fifo data (REGIO) */
 static void
 read_fifo_regio(struct hfc_multi *hc, u_char *data, int len)
@@ -526,7 +529,6 @@ read_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len)
 	}
 }
 
-
 static void
 enable_hwirq(struct hfc_multi *hc)
 {
@@ -1011,7 +1013,7 @@ plxsd_checksync(struct hfc_multi *hc, int rm)
 	if (hc->syncronized) {
 		if (syncmaster == NULL) {
 			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_WARNING "%s: GOT sync on card %d"
+				printk(KERN_DEBUG "%s: GOT sync on card %d"
 					" (id=%d)\n", __func__, hc->id + 1,
 					hc->id);
 			hfcmulti_resync(hc, hc, rm);
@@ -1019,7 +1021,7 @@ plxsd_checksync(struct hfc_multi *hc, int rm)
 	} else {
 		if (syncmaster == hc) {
 			if (debug & DEBUG_HFCMULTI_PLXSD)
-				printk(KERN_WARNING "%s: LOST sync on card %d"
+				printk(KERN_DEBUG "%s: LOST sync on card %d"
 					" (id=%d)\n", __func__, hc->id + 1,
 					hc->id);
 			hfcmulti_resync(hc, NULL, rm);
@@ -1068,7 +1070,7 @@ release_io_hfcmulti(struct hfc_multi *hc)
 		pv &= ~PLX_DSP_RES_N;
 		writel(pv, plx_acc_32);
 		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_WARNING "%s: PCM off: PLX_GPIO=%x\n",
+			printk(KERN_DEBUG "%s: PCM off: PLX_GPIO=%x\n",
 				__func__, pv);
 		spin_unlock_irqrestore(&plx_lock, plx_flags);
 	}
@@ -1119,8 +1121,8 @@ init_chip(struct hfc_multi *hc)
 	if (debug & DEBUG_HFCMULTI_INIT)
 		printk(KERN_DEBUG "%s: entered\n", __func__);
 	val = HFC_inb(hc, R_CHIP_ID);
-	if ((val>>4) != 0x8 && (val>>4) != 0xc && (val>>4) != 0xe
-			&& (val>>1) != 0x31) {
+	if ((val >> 4) != 0x8 && (val >> 4) != 0xc && (val >> 4) != 0xe &&
+	    (val >> 1) != 0x31) {
 		printk(KERN_INFO "HFC_multi: unknown CHIP_ID:%x\n", (u_int)val);
 		err = -EIO;
 		goto out;
@@ -1202,7 +1204,7 @@ init_chip(struct hfc_multi *hc)
 		writel(pv, plx_acc_32);
 		spin_unlock_irqrestore(&plx_lock, plx_flags);
 		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_WARNING "%s: slave/term: PLX_GPIO=%x\n",
+			printk(KERN_DEBUG "%s: slave/term: PLX_GPIO=%x\n",
 				__func__, pv);
 		/*
 		 * If we are the 3rd PLXSD card or higher, we must turn
@@ -1230,8 +1232,9 @@ init_chip(struct hfc_multi *hc)
 			writel(pv, plx_acc_32);
 			spin_unlock_irqrestore(&plx_lock, plx_flags);
 			if (debug & DEBUG_HFCMULTI_INIT)
-			    printk(KERN_WARNING "%s: term off: PLX_GPIO=%x\n",
-					__func__, pv);
+				printk(KERN_DEBUG
+				    "%s: term off: PLX_GPIO=%x\n",
+				    __func__, pv);
 		}
 		spin_unlock_irqrestore(&HFClock, hfc_flags);
 		hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */
@@ -1292,13 +1295,13 @@ init_chip(struct hfc_multi *hc)
 			pv |= PLX_MASTER_EN | PLX_SLAVE_EN_N;
 			pv |= PLX_SYNC_O_EN;
 			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_WARNING "%s: master: PLX_GPIO=%x\n",
+				printk(KERN_DEBUG "%s: master: PLX_GPIO=%x\n",
 					__func__, pv);
 		} else {
 			pv &= ~(PLX_MASTER_EN | PLX_SLAVE_EN_N);
 			pv &= ~PLX_SYNC_O_EN;
 			if (debug & DEBUG_HFCMULTI_INIT)
-				printk(KERN_WARNING "%s: slave: PLX_GPIO=%x\n",
+				printk(KERN_DEBUG "%s: slave: PLX_GPIO=%x\n",
 					__func__, pv);
 		}
 		writel(pv, plx_acc_32);
@@ -1410,8 +1413,8 @@ controller_fail:
 				writel(pv, plx_acc_32);
 				spin_unlock_irqrestore(&plx_lock, plx_flags);
 				if (debug & DEBUG_HFCMULTI_INIT)
-				    printk(KERN_WARNING "%s: master: PLX_GPIO"
-					"=%x\n", __func__, pv);
+					printk(KERN_DEBUG "%s: master: "
+					    "PLX_GPIO=%x\n", __func__, pv);
 			}
 			hc->hw.r_pcm_md0 |= V_PCM_MD;
 			HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00);
@@ -1445,7 +1448,7 @@ controller_fail:
 		writel(pv, plx_acc_32);
 		spin_unlock_irqrestore(&plx_lock, plx_flags);
 		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_WARNING "%s: reset off: PLX_GPIO=%x\n",
+			printk(KERN_DEBUG "%s: reset off: PLX_GPIO=%x\n",
 				__func__, pv);
 	}
 
@@ -1878,7 +1881,7 @@ hfcmulti_dtmf(struct hfc_multi *hc)
 			hc->chan[ch].coeff_count = 0;
 			skb = mI_alloc_skb(512, GFP_ATOMIC);
 			if (!skb) {
-				printk(KERN_WARNING "%s: No memory for skb\n",
+				printk(KERN_DEBUG "%s: No memory for skb\n",
 				    __func__);
 				continue;
 			}
@@ -2104,7 +2107,7 @@ next_frame:
 		printk(KERN_DEBUG "%s(card %d): fifo(%d) has %d bytes space "
 		    "left (z1=%04x, z2=%04x) sending %d of %d bytes %s\n",
 			__func__, hc->id + 1, ch, Zspace, z1, z2, ii-i, len-i,
-			temp ? "HDLC":"TRANS");
+			temp ? "HDLC" : "TRANS");
 
 	/* Have to prep the audio data */
 	hc->write_fifo(hc, d, ii - i);
@@ -2780,13 +2783,13 @@ hfcmulti_interrupt(int intno, void *dev_id)
 			handle_timer_irq(hc);
 		}
 
-		if (r_irq_misc & V_DTMF_IRQ) {
+		if (r_irq_misc & V_DTMF_IRQ)
 			hfcmulti_dtmf(hc);
-		}
+
 		if (r_irq_misc & V_IRQ_PROC) {
 			static int irq_proc_cnt;
 			if (!irq_proc_cnt++)
-				printk(KERN_WARNING "%s: got V_IRQ_PROC -"
+				printk(KERN_DEBUG "%s: got V_IRQ_PROC -"
 				    " this should not happen\n", __func__);
 		}
 
@@ -2936,7 +2939,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		else
 			flow_rx = 0xc0; /* ST->(FIFO,PCM) */
 		/* put on slot */
-		routing = bank_rx?0x80:0xc0; /* reversed */
+		routing = bank_rx ? 0x80 : 0xc0; /* reversed */
 		if (conf >= 0 || bank_rx > 1)
 			routing = 0x40; /* loop */
 		if (debug & DEBUG_HFCMULTI_MODE)
@@ -2971,7 +2974,7 @@ mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
 		HFC_wait(hc);
 		if (hc->chan[ch].bch && hc->ctype != HFC_TYPE_E1) {
 			hc->hw.a_st_ctrl0[hc->chan[ch].port] &=
-			    ((ch & 0x3) == 0)? ~V_B1_EN: ~V_B2_EN;
+			    ((ch & 0x3) == 0) ? ~V_B1_EN : ~V_B2_EN;
 			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
 			/* undocumented: delay after R_ST_SEL */
 			udelay(1);
@@ -3505,9 +3508,9 @@ handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 		switch (hh->id) {
 		case HFC_SPL_LOOP_ON: /* set sample loop */
 			if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG
-			    "%s: HFC_SPL_LOOP_ON (len = %d)\n",
-			    __func__, skb->len);
+				printk(KERN_DEBUG
+				    "%s: HFC_SPL_LOOP_ON (len = %d)\n",
+				    __func__, skb->len);
 			ret = 0;
 			break;
 		case HFC_SPL_LOOP_OFF: /* set silence */
@@ -3716,8 +3719,7 @@ ph_state_change(struct dchannel *dch)
 	int ch, i;
 
 	if (!dch) {
-		printk(KERN_WARNING "%s: ERROR given dch is NULL\n",
-		    __func__);
+		printk(KERN_WARNING "%s: ERROR given dch is NULL\n", __func__);
 		return;
 	}
 	hc = dch->hw;
@@ -3738,14 +3740,15 @@ ph_state_change(struct dchannel *dch)
 		switch (dch->state) {
 		case (1):
 			if (hc->e1_state != 1) {
-			    for (i = 1; i <= 31; i++) {
-				/* reset fifos on e1 activation */
-				HFC_outb_nodebug(hc, R_FIFO, (i << 1) | 1);
-				HFC_wait_nodebug(hc);
-				HFC_outb_nodebug(hc,
-					R_INC_RES_FIFO, V_RES_F);
-				HFC_wait_nodebug(hc);
-			    }
+				for (i = 1; i <= 31; i++) {
+					/* reset fifos on e1 activation */
+					HFC_outb_nodebug(hc, R_FIFO,
+						(i << 1) | 1);
+					HFC_wait_nodebug(hc);
+					HFC_outb_nodebug(hc, R_INC_RES_FIFO,
+						V_RES_F);
+					HFC_wait_nodebug(hc);
+				}
 			}
 			test_and_set_bit(FLG_ACTIVE, &dch->Flags);
 			_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
@@ -4045,12 +4048,12 @@ open_dchannel(struct hfc_multi *hc, struct dchannel *dch,
 		return -EINVAL;
 	if ((dch->dev.D.protocol != ISDN_P_NONE) &&
 	    (dch->dev.D.protocol != rq->protocol)) {
-	    if (debug & DEBUG_HFCMULTI_MODE)
-		printk(KERN_WARNING "%s: change protocol %x to %x\n",
-		    __func__, dch->dev.D.protocol, rq->protocol);
+		if (debug & DEBUG_HFCMULTI_MODE)
+			printk(KERN_DEBUG "%s: change protocol %x to %x\n",
+			    __func__, dch->dev.D.protocol, rq->protocol);
 	}
-	if ((dch->dev.D.protocol == ISDN_P_TE_S0)
-	 && (rq->protocol != ISDN_P_TE_S0))
+	if ((dch->dev.D.protocol == ISDN_P_TE_S0) &&
+	    (rq->protocol != ISDN_P_TE_S0))
 		l1_event(dch->l1, CLOSE_CHANNEL);
 	if (dch->dev.D.protocol != rq->protocol) {
 		if (rq->protocol == ISDN_P_TE_S0) {
@@ -4127,9 +4130,9 @@ channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq)
 		wd_cnt = cq->p1 & 0xf;
 		wd_mode = !!(cq->p1 >> 4);
 		if (debug & DEBUG_HFCMULTI_MSG)
-			printk(KERN_DEBUG
-			    "%s: MISDN_CTRL_HFC_WD_INIT mode %s counter 0x%x\n",
-			    __func__, wd_mode ? "AUTO" : "MANUAL", wd_cnt);
+			printk(KERN_DEBUG "%s: MISDN_CTRL_HFC_WD_INIT mode %s"
+			    ", counter 0x%x\n", __func__,
+			    wd_mode ? "AUTO" : "MANUAL", wd_cnt);
 		/* set the watchdog timer */
 		HFC_outb(hc, R_TI_WD, poll_timer | (wd_cnt << 4));
 		hc->hw.r_bert_wd_md = (wd_mode ? V_AUTO_WD_RES : 0);
@@ -4139,8 +4142,8 @@ channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq)
 		HFC_outb(hc, R_BERT_WD_MD, hc->hw.r_bert_wd_md | V_WD_RES);
 		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
 			/* enable the watchdog output for Speech-Design */
-			HFC_outb(hc, R_GPIO_SEL, V_GPIO_SEL7);
-			HFC_outb(hc, R_GPIO_EN1, V_GPIO_EN15);
+			HFC_outb(hc, R_GPIO_SEL,  V_GPIO_SEL7);
+			HFC_outb(hc, R_GPIO_EN1,  V_GPIO_EN15);
 			HFC_outb(hc, R_GPIO_OUT1, 0);
 			HFC_outb(hc, R_GPIO_OUT1, V_GPIO_OUT15);
 		}
@@ -4319,7 +4322,7 @@ error:
 	}
 
 	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_WARNING "%s: free irq %d\n", __func__, hc->irq);
+		printk(KERN_DEBUG "%s: free irq %d\n", __func__, hc->irq);
 	if (hc->irq) {
 		free_irq(hc->irq, hc);
 		hc->irq = 0;
@@ -4624,7 +4627,7 @@ release_card(struct hfc_multi *hc)
 	int	ch;
 
 	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_WARNING "%s: release card (%d) entered\n",
+		printk(KERN_DEBUG "%s: release card (%d) entered\n",
 		    __func__, hc->id);
 
 	/* unregister clock source */
@@ -4653,7 +4656,7 @@ release_card(struct hfc_multi *hc)
 	/* release hardware & irq */
 	if (hc->irq) {
 		if (debug & DEBUG_HFCMULTI_INIT)
-			printk(KERN_WARNING "%s: free irq %d\n",
+			printk(KERN_DEBUG "%s: free irq %d\n",
 			    __func__, hc->irq);
 		free_irq(hc->irq, hc);
 		hc->irq = 0;
@@ -4662,17 +4665,17 @@ release_card(struct hfc_multi *hc)
 	release_io_hfcmulti(hc);
 
 	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_WARNING "%s: remove instance from list\n",
+		printk(KERN_DEBUG "%s: remove instance from list\n",
 		     __func__);
 	list_del(&hc->list);
 
 	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_WARNING "%s: delete instance\n", __func__);
+		printk(KERN_DEBUG "%s: delete instance\n", __func__);
 	if (hc == syncmaster)
 		syncmaster = NULL;
 	kfree(hc);
 	if (debug & DEBUG_HFCMULTI_INIT)
-		printk(KERN_WARNING "%s: card successfully removed\n",
+		printk(KERN_DEBUG "%s: card successfully removed\n",
 		    __func__);
 }
 
@@ -4695,7 +4698,7 @@ init_e1_port(struct hfc_multi *hc, struct hm_map *m)
 	    (1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
 	dch->dev.D.send = handle_dmsg;
 	dch->dev.D.ctrl = hfcm_dctrl;
-	dch->dev.nrbchan = (hc->dslot)?30:31;
+	dch->dev.nrbchan = (hc->dslot) ? 30 : 31;
 	dch->slot = hc->dslot;
 	hc->chan[hc->dslot].dch = dch;
 	hc->chan[hc->dslot].port = 0;
@@ -4937,7 +4940,7 @@ init_multi_port(struct hfc_multi *hc, int pt)
 	}
 	/* disable E-channel */
 	if (port[Port_cnt] & 0x004) {
-	if (debug & DEBUG_HFCMULTI_INIT)
+		if (debug & DEBUG_HFCMULTI_INIT)
 			printk(KERN_DEBUG
 			    "%s: PROTOCOL disable E-channel: "
 			    "card(%d) port(%d)\n",
@@ -5222,7 +5225,7 @@ static void __devexit hfc_remove_pci(struct pci_dev *pdev)
 		spin_unlock_irqrestore(&HFClock, flags);
 	}  else {
 		if (debug)
-			printk(KERN_WARNING "%s: drvdata allready removed\n",
+			printk(KERN_DEBUG "%s: drvdata allready removed\n",
 			    __func__);
 	}
 }
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 60dc92562c6..776afc8c927 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -257,7 +257,7 @@ reset_hfcpci(struct hfc_pci *hc)
 	Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
 
 	/* Clear already pending ints */
-	if (Read_hfc(hc, HFCPCI_INT_S1));
+	val = Read_hfc(hc, HFCPCI_INT_S1);
 
 	/* set NT/TE mode */
 	hfcpci_setmode(hc);
@@ -499,7 +499,8 @@ receive_dmsg(struct hfc_pci *hc)
 			df->f2 = ((df->f2 + 1) & MAX_D_FRAMES) |
 			    (MAX_D_FRAMES + 1);	/* next buffer */
 			df->za[df->f2 & D_FREG_MASK].z2 =
-			    cpu_to_le16((le16_to_cpu(zp->z2) + rcnt) & (D_FIFO_SIZE - 1));
+			    cpu_to_le16((le16_to_cpu(zp->z2) + rcnt) &
+			    (D_FIFO_SIZE - 1));
 		} else {
 			dch->rx_skb = mI_alloc_skb(rcnt - 3, GFP_ATOMIC);
 			if (!dch->rx_skb) {
@@ -966,6 +967,7 @@ static void
 ph_state_nt(struct dchannel *dch)
 {
 	struct hfc_pci	*hc = dch->hw;
+	u_char	val;
 
 	if (dch->debug)
 		printk(KERN_DEBUG "%s: NT newstate %x\n",
@@ -979,7 +981,7 @@ ph_state_nt(struct dchannel *dch)
 			hc->hw.int_m1 &= ~HFCPCI_INTS_TIMER;
 			Write_hfc(hc, HFCPCI_INT_M1, hc->hw.int_m1);
 			/* Clear already pending ints */
-			if (Read_hfc(hc, HFCPCI_INT_S1));
+			val = Read_hfc(hc, HFCPCI_INT_S1);
 			Write_hfc(hc, HFCPCI_STATES, 4 | HFCPCI_LOAD_STATE);
 			udelay(10);
 			Write_hfc(hc, HFCPCI_STATES, 4);
@@ -1268,8 +1270,7 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol)
 		rx_slot = (bc>>8) & 0xff;
 		tx_slot = (bc>>16) & 0xff;
 		bc = bc & 0xff;
-	} else if (test_bit(HFC_CFG_PCM, &hc->cfg) &&
-	    (protocol > ISDN_P_NONE))
+	} else if (test_bit(HFC_CFG_PCM, &hc->cfg) && (protocol > ISDN_P_NONE))
 		printk(KERN_WARNING "%s: no pcm channel id but HFC_CFG_PCM\n",
 		    __func__);
 	if (hc->chanlimit > 1) {
@@ -1327,8 +1328,8 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol)
 	case (ISDN_P_B_RAW):
 		bch->state = protocol;
 		bch->nr = bc;
-		hfcpci_clear_fifo_rx(hc, (fifo2 & 2)?1:0);
-		hfcpci_clear_fifo_tx(hc, (fifo2 & 2)?1:0);
+		hfcpci_clear_fifo_rx(hc, (fifo2 & 2) ? 1 : 0);
+		hfcpci_clear_fifo_tx(hc, (fifo2 & 2) ? 1 : 0);
 		if (bc & 2) {
 			hc->hw.sctrl |= SCTRL_B2_ENA;
 			hc->hw.sctrl_r |= SCTRL_B2_ENA;
@@ -1362,8 +1363,8 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol)
 	case (ISDN_P_B_HDLC):
 		bch->state = protocol;
 		bch->nr = bc;
-		hfcpci_clear_fifo_rx(hc, (fifo2 & 2)?1:0);
-		hfcpci_clear_fifo_tx(hc, (fifo2 & 2)?1:0);
+		hfcpci_clear_fifo_rx(hc, (fifo2 & 2) ? 1 : 0);
+		hfcpci_clear_fifo_tx(hc, (fifo2 & 2) ? 1 : 0);
 		if (bc & 2) {
 			hc->hw.sctrl |= SCTRL_B2_ENA;
 			hc->hw.sctrl_r |= SCTRL_B2_ENA;
@@ -1457,7 +1458,7 @@ set_hfcpci_rxtest(struct bchannel *bch, int protocol, int chan)
 	switch (protocol) {
 	case (ISDN_P_B_RAW):
 		bch->state = protocol;
-		hfcpci_clear_fifo_rx(hc, (chan & 2)?1:0);
+		hfcpci_clear_fifo_rx(hc, (chan & 2) ? 1 : 0);
 		if (chan & 2) {
 			hc->hw.sctrl_r |= SCTRL_B2_ENA;
 			hc->hw.fifo_en |= HFCPCI_FIFOEN_B2RX;
@@ -1482,7 +1483,7 @@ set_hfcpci_rxtest(struct bchannel *bch, int protocol, int chan)
 		break;
 	case (ISDN_P_B_HDLC):
 		bch->state = protocol;
-		hfcpci_clear_fifo_rx(hc, (chan & 2)?1:0);
+		hfcpci_clear_fifo_rx(hc, (chan & 2) ? 1 : 0);
 		if (chan & 2) {
 			hc->hw.sctrl_r |= SCTRL_B2_ENA;
 			hc->hw.last_bfifo_cnt[1] = 0;
@@ -2047,7 +2048,8 @@ setup_hw(struct hfc_pci *hc)
 		printk(KERN_WARNING "HFC-PCI: No IRQ for PCI card found\n");
 		return 1;
 	}
-	hc->hw.pci_io = (char __iomem *)(unsigned long)hc->pdev->resource[1].start;
+	hc->hw.pci_io =
+		(char __iomem *)(unsigned long)hc->pdev->resource[1].start;
 
 	if (!hc->hw.pci_io) {
 		printk(KERN_WARNING "HFC-PCI: No IO-Mem for PCI card found\n");
@@ -2289,7 +2291,7 @@ hfc_remove_pci(struct pci_dev *pdev)
 		release_card(card);
 	else
 		if (debug)
-			printk(KERN_WARNING "%s: drvdata already removed\n",
+			printk(KERN_DEBUG "%s: drvdata already removed\n",
 			    __func__);
 }
 
diff --git a/drivers/isdn/mISDN/dsp_audio.c b/drivers/isdn/mISDN/dsp_audio.c
index de3795e3f43..9c7c6451bf3 100644
--- a/drivers/isdn/mISDN/dsp_audio.c
+++ b/drivers/isdn/mISDN/dsp_audio.c
@@ -210,9 +210,8 @@ dsp_audio_generate_seven(void)
 		j = 0;
 		for (k = 0; k < 256; k++) {
 			if (dsp_audio_alaw_to_s32[k]
-				< dsp_audio_alaw_to_s32[i]) {
-			j++;
-			}
+			    < dsp_audio_alaw_to_s32[i])
+				j++;
 		}
 		sorted_alaw[j] = i;
 	}
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
index 05866184ba2..9c7c0d1ba55 100644
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -238,7 +238,7 @@ dsp_cmx_add_conf_member(struct dsp *dsp, struct dsp_conf *conf)
 
 	member = kzalloc(sizeof(struct dsp_conf_member), GFP_ATOMIC);
 	if (!member) {
-		printk(KERN_ERR "kmalloc struct dsp_conf_member failed\n");
+		printk(KERN_ERR "kzalloc struct dsp_conf_member failed\n");
 		return -ENOMEM;
 	}
 	member->dsp = dsp;
@@ -317,7 +317,7 @@ static struct dsp_conf
 
 	conf = kzalloc(sizeof(struct dsp_conf), GFP_ATOMIC);
 	if (!conf) {
-		printk(KERN_ERR "kmalloc struct dsp_conf failed\n");
+		printk(KERN_ERR "kzalloc struct dsp_conf failed\n");
 		return NULL;
 	}
 	INIT_LIST_HEAD(&conf->mlist);
@@ -1389,7 +1389,8 @@ dsp_cmx_send_member(struct dsp *dsp, int len, s32 *c, int members)
 		while (r != rr && t != tt) {
 #ifdef CMX_TX_DEBUG
 			if (strlen(debugbuf) < 48)
-			    sprintf(debugbuf+strlen(debugbuf), " %02x", p[t]);
+				sprintf(debugbuf+strlen(debugbuf), " %02x",
+				    p[t]);
 #endif
 			*d++ = p[t]; /* write tx_buff */
 			t = (t+1) & CMX_BUFF_MASK;
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index a0e0af81eb2..c35750647c6 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -280,7 +280,7 @@ dsp_fill_empty(struct dsp *dsp)
 static int
 dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb)
 {
-	struct		sk_buff *nskb;
+	struct sk_buff	*nskb;
 	int ret = 0;
 	int cont;
 	u8 *data;
@@ -558,7 +558,7 @@ tone_off:
 			dsp->pipeline.inuse = 1;
 			dsp_cmx_hardware(dsp->conf, dsp);
 			ret = dsp_pipeline_build(&dsp->pipeline,
-				len > 0 ? (char *)data : NULL);
+				len > 0 ? data : NULL);
 			dsp_cmx_hardware(dsp->conf, dsp);
 			dsp_rx_off(dsp);
 		}
@@ -720,7 +720,7 @@ dsp_function(struct mISDNchannel *ch,  struct sk_buff *skb)
 		/* check if dtmf soft decoding is turned on */
 		if (dsp->dtmf.software) {
 			digits = dsp_dtmf_goertzel_decode(dsp, skb->data,
-				skb->len, (dsp_options&DSP_OPT_ULAW)?1:0);
+				skb->len, (dsp_options&DSP_OPT_ULAW) ? 1 : 0);
 		}
 		/* we need to process receive data if software */
 		if (dsp->conf && dsp->conf->software) {
@@ -952,7 +952,7 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 	int		err = 0;
 
 	if (debug & DEBUG_DSP_CTRL)
-	printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
+		printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
 
 	switch (cmd) {
 	case OPEN_CHANNEL:
@@ -1175,9 +1175,9 @@ static int dsp_init(void)
 
 	/* init conversion tables */
 	dsp_audio_generate_law_tables();
-	dsp_silence = (dsp_options&DSP_OPT_ULAW)?0xff:0x2a;
-	dsp_audio_law_to_s32 = (dsp_options&DSP_OPT_ULAW)?dsp_audio_ulaw_to_s32:
-		dsp_audio_alaw_to_s32;
+	dsp_silence = (dsp_options&DSP_OPT_ULAW) ? 0xff : 0x2a;
+	dsp_audio_law_to_s32 = (dsp_options&DSP_OPT_ULAW) ?
+		dsp_audio_ulaw_to_s32 : dsp_audio_alaw_to_s32;
 	dsp_audio_generate_s2law_table();
 	dsp_audio_generate_seven();
 	dsp_audio_generate_mix_table();
diff --git a/drivers/isdn/mISDN/dsp_ecdis.h b/drivers/isdn/mISDN/dsp_ecdis.h
index 8a20af43308..21dbd153ee2 100644
--- a/drivers/isdn/mISDN/dsp_ecdis.h
+++ b/drivers/isdn/mISDN/dsp_ecdis.h
@@ -91,7 +91,7 @@ int16_t amp)
 					&& det->tone_cycle_duration <= 475*8) {
 					det->good_cycles++;
 					if (det->good_cycles > 2)
-					det->hit = TRUE;
+						det->hit = TRUE;
 				}
 				det->tone_cycle_duration = 0;
 			}
diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c
index 7a9af66f4b1..1debf53670d 100644
--- a/drivers/isdn/mISDN/dsp_tones.c
+++ b/drivers/isdn/mISDN/dsp_tones.c
@@ -253,18 +253,24 @@ static struct pattern {
 	{8000, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 
 	{TONE_GERMAN_DIALPBX,
-	{DATA_GA, DATA_S, DATA_GA, DATA_S, DATA_GA, DATA_S, NULL, NULL, NULL, NULL},
-	{SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, NULL, NULL, NULL, NULL},
+	{DATA_GA, DATA_S, DATA_GA, DATA_S, DATA_GA, DATA_S, NULL, NULL, NULL,
+		NULL},
+	{SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, NULL, NULL, NULL,
+		NULL},
 	{2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
 
 	{TONE_GERMAN_OLDDIALPBX,
-	{DATA_GO, DATA_S, DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL, NULL},
-	{SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL, NULL},
+	{DATA_GO, DATA_S, DATA_GO, DATA_S, DATA_GO, DATA_S, NULL, NULL, NULL,
+		NULL},
+	{SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, NULL, NULL, NULL,
+		NULL},
 	{2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
 
 	{TONE_AMERICAN_DIALPBX,
-	{DATA_DT, DATA_S, DATA_DT, DATA_S, DATA_DT, DATA_S, NULL, NULL, NULL, NULL},
-	{SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, NULL, NULL, NULL, NULL},
+	{DATA_DT, DATA_S, DATA_DT, DATA_S, DATA_DT, DATA_S, NULL, NULL, NULL,
+		NULL},
+	{SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, NULL, NULL, NULL,
+		NULL},
 	{2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
 
 	{TONE_GERMAN_RINGING,
@@ -434,7 +440,7 @@ dsp_tone_hw_message(struct dsp *dsp, u8 *sample, int len)
 
 	/* unlocking is not required, because we don't expect a response */
 	nskb = _alloc_mISDN_skb(PH_CONTROL_REQ,
-		(len)?HFC_SPL_LOOP_ON:HFC_SPL_LOOP_OFF, len, sample,
+		(len) ? HFC_SPL_LOOP_ON : HFC_SPL_LOOP_OFF, len, sample,
 		GFP_ATOMIC);
 	if (nskb) {
 		if (dsp->ch.peer) {
@@ -498,8 +504,7 @@ dsp_tone(struct dsp *dsp, int tone)
 
 	/* we turn off the tone */
 	if (!tone) {
-		if (dsp->features.hfc_loops)
-		if (timer_pending(&tonet->tl))
+		if (dsp->features.hfc_loops && timer_pending(&tonet->tl))
 			del_timer(&tonet->tl);
 		if (dsp->features.hfc_loops)
 			dsp_tone_hw_message(dsp, NULL, 0);
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index ea3c3aa2004..d9cf83b17e3 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -308,8 +308,8 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask,
 
 	/* assemble frame */
 	*p++ = (L1OIP_VERSION<<6) /* version and coding */
-	     | (hc->pri?0x20:0x00) /* type */
-	     | (hc->id?0x10:0x00) /* id */
+	     | (hc->pri ? 0x20 : 0x00) /* type */
+	     | (hc->id ? 0x10 : 0x00) /* id */
 	     | localcodec;
 	if (hc->id) {
 		*p++ = hc->id>>24; /* id */
@@ -317,7 +317,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask,
 		*p++ = hc->id>>8;
 		*p++ = hc->id;
 	}
-	*p++ = (multi == 1)?0x80:0x00 + channel; /* m-flag, channel */
+	*p++ = (multi == 1) ? 0x80 : 0x00 + channel; /* m-flag, channel */
 	if (multi == 1)
 		*p++ = len; /* length */
 	*p++ = timebase>>8; /* time base */
@@ -401,12 +401,12 @@ l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase,
 	}
 
 	/* prepare message */
-	nskb = mI_alloc_skb((remotecodec == 3)?(len<<1):len, GFP_ATOMIC);
+	nskb = mI_alloc_skb((remotecodec == 3) ? (len<<1) : len, GFP_ATOMIC);
 	if (!nskb) {
 		printk(KERN_ERR "%s: No mem for skb.\n", __func__);
 		return;
 	}
-	p = skb_put(nskb, (remotecodec == 3)?(len<<1):len);
+	p = skb_put(nskb, (remotecodec == 3) ? (len<<1) : len);
 
 	if (remotecodec == 1 && ulaw)
 		l1oip_alaw_to_ulaw(buf, len, p);
@@ -458,7 +458,7 @@ l1oip_socket_recv(struct l1oip *hc, u8 remotecodec, u8 channel, u16 timebase,
 		hc->chan[channel].disorder_flag ^= 1;
 		if (nskb)
 #endif
-		queue_ch_frame(&bch->ch, PH_DATA_IND, rx_counter, nskb);
+			queue_ch_frame(&bch->ch, PH_DATA_IND, rx_counter, nskb);
 	}
 }
 
@@ -749,8 +749,8 @@ l1oip_socket_thread(void *data)
 			l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
 		} else {
 			if (debug & DEBUG_L1OIP_SOCKET)
-			    printk(KERN_WARNING "%s: broken pipe on socket\n",
-				__func__);
+				printk(KERN_WARNING
+				    "%s: broken pipe on socket\n", __func__);
 		}
 	}
 
@@ -925,7 +925,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 		p = skb->data;
 		l = skb->len;
 		while (l) {
-			ll = (l < L1OIP_MAX_PERFRAME)?l:L1OIP_MAX_PERFRAME;
+			ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME;
 			l1oip_socket_send(hc, 0, dch->slot, 0,
 				hc->chan[dch->slot].tx_counter++, p, ll);
 			p += ll;
@@ -1173,7 +1173,7 @@ handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 		p = skb->data;
 		l = skb->len;
 		while (l) {
-			ll = (l < L1OIP_MAX_PERFRAME)?l:L1OIP_MAX_PERFRAME;
+			ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME;
 			l1oip_socket_send(hc, hc->codec, bch->slot, 0,
 				hc->chan[bch->slot].tx_counter, p, ll);
 			hc->chan[bch->slot].tx_counter += ll;
@@ -1331,8 +1331,8 @@ init_card(struct l1oip *hc, int pri, int bundle)
 	spin_lock_init(&hc->socket_lock);
 	hc->idx = l1oip_cnt;
 	hc->pri = pri;
-	hc->d_idx = pri?16:3;
-	hc->b_num = pri?30:2;
+	hc->d_idx = pri ? 16 : 3;
+	hc->b_num = pri ? 30 : 2;
 	hc->bundle = bundle;
 	if (hc->pri)
 		sprintf(hc->name, "l1oip-e1.%d", l1oip_cnt + 1);
@@ -1517,9 +1517,9 @@ l1oip_init(void)
 
 		if (debug & DEBUG_L1OIP_INIT)
 			printk(KERN_DEBUG "%s: interface %d is %s with %s.\n",
-				__func__, l1oip_cnt, pri?"PRI":"BRI",
-				bundle?"bundled IP packet for all B-channels"
-				 :"seperate IP packets for every B-channel");
+			    __func__, l1oip_cnt, pri ? "PRI" : "BRI",
+			    bundle ? "bundled IP packet for all B-channels" :
+			    "seperate IP packets for every B-channel");
 
 		hc = kzalloc(sizeof(struct l1oip), GFP_ATOMIC);
 		if (!hc) {
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 2a2c30a9438..c36f5213745 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -222,7 +222,7 @@ mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	} else { /* use default for L2 messages */
 		if ((sk->sk_protocol == ISDN_P_LAPD_TE) ||
 		    (sk->sk_protocol == ISDN_P_LAPD_NT))
-		    mISDN_HEAD_ID(skb) = _pms(sk)->ch.nr;
+			mISDN_HEAD_ID(skb) = _pms(sk)->ch.nr;
 	}
 
 	if (*debug & DEBUG_SOCKET)
diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
index 778b660f067..bfcdd97df95 100644
--- a/drivers/isdn/mISDN/tei.c
+++ b/drivers/isdn/mISDN/tei.c
@@ -872,7 +872,6 @@ ph_data_ind(struct manager *mgr, struct sk_buff *skb)
 			    __func__, skb->len);
 		goto done;
 	}
-	if (*debug  & DEBUG_L2_TEI)
 
 	if ((skb->data[0] >> 2) != TEI_SAPI) /* not for us */
 		goto done;
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
index bbd99d3282c..5b7e9bf514f 100644
--- a/drivers/isdn/mISDN/timerdev.c
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -259,7 +259,7 @@ mISDN_ioctl(struct inode *inode, struct file *filep, unsigned int cmd,
 	return ret;
 }
 
-static struct file_operations mISDN_fops = {
+static const struct file_operations mISDN_fops = {
 	.read		= mISDN_read,
 	.poll		= mISDN_poll,
 	.ioctl		= mISDN_ioctl,
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 0b28fd1e393..45100b39a7c 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -292,19 +292,19 @@ struct mISDN_devrename {
 
 /* MPH_INFORMATION_REQ payload */
 struct ph_info_ch {
-        __u32 protocol;
-        __u64 Flags;
+	__u32 protocol;
+	__u64 Flags;
 };
 
 struct ph_info_dch {
-        struct ph_info_ch ch;
-        __u16 state;
-        __u16 num_bch;
+	struct ph_info_ch ch;
+	__u16 state;
+	__u16 num_bch;
 };
 
 struct ph_info {
-        struct ph_info_dch dch;
-        struct ph_info_ch  bch[];
+	struct ph_info_dch dch;
+	struct ph_info_ch  bch[];
 };
 
 /* timer device ioctl */
-- 
cgit v1.2.3-70-g09d2


From daebafed7fef54fcc73d2d01431122cfd578d1e0 Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Mon, 25 May 2009 00:56:56 -0700
Subject: mISDN: Added PCI ID for new Junghanns.net Single E1 cards.

The new ID is validated by Cologne Chip.
LEDs control is also supported.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 4 ++++
 include/linux/pci_ids.h                | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index d60f5b7a41d..e1dab30aed3 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -5357,6 +5357,10 @@ static struct pci_device_id hfmultipci_ids[] __devinitdata = {
 		PCI_SUBDEVICE_ID_CCD_SPD4S, 0, 0, H(26)}, /* PLX PCI Bridge */
 	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_CCD,
 		PCI_SUBDEVICE_ID_CCD_SPDE1, 0, 0, H(27)}, /* PLX PCI Bridge */
+
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_JHSE1, 0, 0, H(25)}, /* Junghanns E1 */
+
 	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, 0},
 	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4bdff984ac9..12db06cf0e2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1914,6 +1914,7 @@
 #define PCI_SUBDEVICE_ID_CCD_SWYX4S	0xB540
 #define PCI_SUBDEVICE_ID_CCD_JH4S20	0xB550
 #define PCI_SUBDEVICE_ID_CCD_IOB8ST_1	0xB552
+#define PCI_SUBDEVICE_ID_CCD_JHSE1	0xB553
 #define PCI_SUBDEVICE_ID_CCD_JH8S	0xB55B
 #define PCI_SUBDEVICE_ID_CCD_BN4S	0xB560
 #define PCI_SUBDEVICE_ID_CCD_BN8S	0xB562
-- 
cgit v1.2.3-70-g09d2


From e3804cbebb67887879102925961d41b503f7fbe3 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Mon, 25 May 2009 01:53:53 -0700
Subject: net: remove COMPAT_NET_DEV_OPS

All drivers are already converted to new net_device_ops API
and nobody uses old API anymore.

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig       |  9 ---------
 include/linux/netdevice.h | 38 -----------------------------------
 net/802/fddi.c            |  4 ----
 net/802/hippi.c           |  5 -----
 net/8021q/vlan_dev.c      |  1 -
 net/appletalk/dev.c       | 11 -----------
 net/core/dev.c            | 50 -----------------------------------------------
 net/ethernet/eth.c        |  5 -----
 8 files changed, 123 deletions(-)

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0fb446e047e..efa659f0fb5 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -25,15 +25,6 @@ menuconfig NETDEVICES
 # that for each of the symbols.
 if NETDEVICES
 
-config COMPAT_NET_DEV_OPS
-       default y
-       bool "Enable older network device API compatibility"
-       ---help---
-          This option enables kernel compatibility with older network devices
-          that do not use net_device_ops interface.
-
-	  If unsure, say Y.
-
 config IFB
 	tristate "Intermediate Functional Block support"
 	depends on NET_CLS_ACT
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f8574e76b74..ae3c2099a04 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -901,44 +901,6 @@ struct net_device
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
-
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	struct {
-		int			(*init)(struct net_device *dev);
-		void			(*uninit)(struct net_device *dev);
-		int			(*open)(struct net_device *dev);
-		int			(*stop)(struct net_device *dev);
-		int			(*hard_start_xmit) (struct sk_buff *skb,
-							    struct net_device *dev);
-		u16			(*select_queue)(struct net_device *dev,
-							struct sk_buff *skb);
-		void			(*change_rx_flags)(struct net_device *dev,
-							   int flags);
-		void			(*set_rx_mode)(struct net_device *dev);
-		void			(*set_multicast_list)(struct net_device *dev);
-		int			(*set_mac_address)(struct net_device *dev,
-							   void *addr);
-		int			(*validate_addr)(struct net_device *dev);
-		int			(*do_ioctl)(struct net_device *dev,
-						    struct ifreq *ifr, int cmd);
-		int			(*set_config)(struct net_device *dev,
-						      struct ifmap *map);
-		int			(*change_mtu)(struct net_device *dev, int new_mtu);
-		int			(*neigh_setup)(struct net_device *dev,
-						       struct neigh_parms *);
-		void			(*tx_timeout) (struct net_device *dev);
-		struct net_device_stats* (*get_stats)(struct net_device *dev);
-		void			(*vlan_rx_register)(struct net_device *dev,
-							    struct vlan_group *grp);
-		void			(*vlan_rx_add_vid)(struct net_device *dev,
-							   unsigned short vid);
-		void			(*vlan_rx_kill_vid)(struct net_device *dev,
-							    unsigned short vid);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-		void                    (*poll_controller)(struct net_device *dev);
-#endif
-	};
-#endif
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 539e6064e6d..3ef0ab0a543 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -185,10 +185,6 @@ static const struct header_ops fddi_header_ops = {
 static void fddi_setup(struct net_device *dev)
 {
 	dev->header_ops		= &fddi_header_ops;
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	dev->change_mtu		= fddi_change_mtu,
-#endif
-
 	dev->type		= ARPHRD_FDDI;
 	dev->hard_header_len	= FDDI_K_SNAP_HLEN+3;	/* Assume 802.2 SNAP hdr len + 3 pad bytes */
 	dev->mtu		= FDDI_K_SNAP_DLEN;	/* Assume max payload of 802.2 SNAP frame */
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 313b9ebf92e..cd3e8e92952 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -193,11 +193,6 @@ static const struct header_ops hippi_header_ops = {
 
 static void hippi_setup(struct net_device *dev)
 {
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	dev->change_mtu			= hippi_change_mtu;
-	dev->set_mac_address 		= hippi_mac_addr;
-	dev->neigh_setup 		= hippi_neigh_setup_dev;
-#endif
 	dev->header_ops			= &hippi_header_ops;
 
 	/*
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ff7572ac548..1e2ad4c7c59 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -644,7 +644,6 @@ static int vlan_dev_init(struct net_device *dev)
 		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
 		dev->netdev_ops         = &vlan_netdev_ops;
 	}
-	netdev_resync_ops(dev);
 
 	if (is_vlan_dev(real_dev))
 		subclass = 1;
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index 72277d70c98..6c8016f6186 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -9,21 +9,10 @@
 #include <linux/if_arp.h>
 #include <linux/if_ltalk.h>
 
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-static int ltalk_change_mtu(struct net_device *dev, int mtu)
-{
-	return -EINVAL;
-}
-#endif
-
 static void ltalk_setup(struct net_device *dev)
 {
 	/* Fill in the fields of the device structure with localtalk-generic values. */
 
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	dev->change_mtu		= ltalk_change_mtu;
-#endif
-
 	dev->type		= ARPHRD_LOCALTLK;
 	dev->hard_header_len 	= LTALK_HLEN;
 	dev->mtu		= LTALK_MTU;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3942266d1f6..241613f6dd2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4580,39 +4580,6 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 }
 EXPORT_SYMBOL(netdev_fix_features);
 
-/* Some devices need to (re-)set their netdev_ops inside
- * ->init() or similar.  If that happens, we have to setup
- * the compat pointers again.
- */
-void netdev_resync_ops(struct net_device *dev)
-{
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	const struct net_device_ops *ops = dev->netdev_ops;
-
-	dev->init = ops->ndo_init;
-	dev->uninit = ops->ndo_uninit;
-	dev->open = ops->ndo_open;
-	dev->change_rx_flags = ops->ndo_change_rx_flags;
-	dev->set_rx_mode = ops->ndo_set_rx_mode;
-	dev->set_multicast_list = ops->ndo_set_multicast_list;
-	dev->set_mac_address = ops->ndo_set_mac_address;
-	dev->validate_addr = ops->ndo_validate_addr;
-	dev->do_ioctl = ops->ndo_do_ioctl;
-	dev->set_config = ops->ndo_set_config;
-	dev->change_mtu = ops->ndo_change_mtu;
-	dev->neigh_setup = ops->ndo_neigh_setup;
-	dev->tx_timeout = ops->ndo_tx_timeout;
-	dev->get_stats = ops->ndo_get_stats;
-	dev->vlan_rx_register = ops->ndo_vlan_rx_register;
-	dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
-	dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	dev->poll_controller = ops->ndo_poll_controller;
-#endif
-#endif
-}
-EXPORT_SYMBOL(netdev_resync_ops);
-
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -4652,23 +4619,6 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	/* Netdevice_ops API compatibility support.
-	 * This is temporary until all network devices are converted.
-	 */
-	if (dev->netdev_ops) {
-		netdev_resync_ops(dev);
-	} else {
-		char drivername[64];
-		pr_info("%s (%s): not using net_device_ops yet\n",
-			dev->name, netdev_drivername(dev, drivername, 64));
-
-		/* This works only because net_device_ops and the
-		   compatibility structure are the same. */
-		dev->netdev_ops = (void *) &(dev->init);
-	}
-#endif
-
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 280352aba40..5a883affecd 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -337,11 +337,6 @@ const struct header_ops eth_header_ops ____cacheline_aligned = {
 void ether_setup(struct net_device *dev)
 {
 	dev->header_ops		= &eth_header_ops;
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
-	dev->change_mtu		= eth_change_mtu;
-	dev->set_mac_address 	= eth_mac_addr;
-	dev->validate_addr	= eth_validate_addr;
-#endif
 	dev->type		= ARPHRD_ETHER;
 	dev->hard_header_len 	= ETH_HLEN;
 	dev->mtu		= ETH_DATA_LEN;
-- 
cgit v1.2.3-70-g09d2


From e527ea312f31e88a7fa5472b71db71c565b0d44f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:25 +0200
Subject: perf_counter: Remove unused ABI bits

extra_config_len isn't used for anything, remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124600.116035832@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2ddf5e3c551..b1f2bac09f9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -154,11 +154,11 @@ struct perf_counter_hw_event {
 
 				__reserved_1   : 51;
 
-	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
+	__u32			__reserved_2;
 
-	__u64			__reserved_2;
 	__u64			__reserved_3;
+	__u64			__reserved_4;
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 6ab423e0eaca827fbd201ca4ae7d4f8573a366b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 14:45:27 +0200
Subject: perf_counter: Propagate inheritance failures down the fork() path

Fail fork() when we fail inheritance for some reason (-ENOMEM most likely).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525124600.324656474@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 ++--
 kernel/fork.c                |  6 +++++-
 kernel/perf_counter.c        | 20 ++++++++++++--------
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index b1f2bac09f9..d3e85de9bf1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -566,7 +566,7 @@ extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
 extern void perf_counter_task_sched_out(struct task_struct *task,
 					struct task_struct *next, int cpu);
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern void perf_counter_init_task(struct task_struct *child);
+extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
@@ -631,7 +631,7 @@ perf_counter_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline void perf_counter_init_task(struct task_struct *child)	{ }
+static inline int perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
diff --git a/kernel/fork.c b/kernel/fork.c
index 675e01e9072..c07c3335cea 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1095,7 +1095,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
-	perf_counter_init_task(p);
+
+	retval = perf_counter_init_task(p);
+	if (retval)
+		goto bad_fork_cleanup_policy;
 
 	if ((retval = audit_alloc(p)))
 		goto bad_fork_cleanup_policy;
@@ -1295,6 +1298,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
+	perf_counter_exit_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 217dbcce2eb..7a7a144870e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3434,18 +3434,23 @@ again:
 /*
  * Initialize the perf_counter context in task_struct
  */
-void perf_counter_init_task(struct task_struct *child)
+int perf_counter_init_task(struct task_struct *child)
 {
 	struct perf_counter_context *child_ctx, *parent_ctx;
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 	int inherited_all = 1;
+	int ret = 0;
 
 	child->perf_counter_ctxp = NULL;
 
 	mutex_init(&child->perf_counter_mutex);
 	INIT_LIST_HEAD(&child->perf_counter_list);
 
+	parent_ctx = parent->perf_counter_ctxp;
+	if (likely(!parent_ctx || !parent_ctx->nr_counters))
+		return 0;
+
 	/*
 	 * This is executed from the parent task context, so inherit
 	 * counters that have been marked for cloning.
@@ -3454,11 +3459,7 @@ void perf_counter_init_task(struct task_struct *child)
 
 	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
 	if (!child_ctx)
-		return;
-
-	parent_ctx = parent->perf_counter_ctxp;
-	if (likely(!parent_ctx || !parent_ctx->nr_counters))
-		return;
+		return -ENOMEM;
 
 	__perf_counter_init_context(child_ctx, child);
 	child->perf_counter_ctxp = child_ctx;
@@ -3482,8 +3483,9 @@ void perf_counter_init_task(struct task_struct *child)
 			continue;
 		}
 
-		if (inherit_group(counter, parent,
-				  parent_ctx, child, child_ctx)) {
+		ret = inherit_group(counter, parent, parent_ctx,
+					     child, child_ctx);
+		if (ret) {
 			inherited_all = 0;
 			break;
 		}
@@ -3505,6 +3507,8 @@ void perf_counter_init_task(struct task_struct *child)
 	}
 
 	mutex_unlock(&parent_ctx->mutex);
+
+	return ret;
 }
 
 static void __cpuinit perf_counter_init_cpu(int cpu)
-- 
cgit v1.2.3-70-g09d2


From 48e22d56ecdeddd1ffb42a02fccba5c6ef42b133 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 17:39:04 +0200
Subject: perf_counter: x86: Remove interrupt throttle

remove the x86 specific interrupt throttle

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.616671838@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c        |  2 --
 arch/x86/kernel/cpu/perf_counter.c | 47 ++++----------------------------------
 include/linux/perf_counter.h       |  2 --
 3 files changed, 5 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b4f64402a82..89b63b5fad3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -763,8 +763,6 @@ static void local_apic_timer_interrupt(void)
 	inc_irq_stat(apic_timer_irqs);
 
 	evt->event_handler(evt);
-
-	perf_counter_unthrottle();
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index c14437faf5d..8c8177f859f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -718,11 +718,6 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
 		intel_pmu_enable_counter(hwc, idx);
 }
 
-/*
- * Maximum interrupt frequency of 100KHz per CPU
- */
-#define PERFMON_MAX_INTERRUPTS (100000/HZ)
-
 /*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
@@ -775,15 +770,14 @@ again:
 	if (status)
 		goto again;
 
-	if (++cpuc->interrupts != PERFMON_MAX_INTERRUPTS)
-		perf_enable();
+	perf_enable();
 
 	return 1;
 }
 
 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
-	int cpu, idx, throttle = 0, handled = 0;
+	int cpu, idx, handled = 0;
 	struct cpu_hw_counters *cpuc;
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
@@ -792,16 +786,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-	if (++cpuc->interrupts == PERFMON_MAX_INTERRUPTS) {
-		throttle = 1;
-		__perf_disable();
-		cpuc->enabled = 0;
-		barrier();
-	}
-
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		int disable = 0;
-
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
@@ -809,45 +794,23 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 		hwc = &counter->hw;
 
 		if (counter->hw_event.nmi != nmi)
-			goto next;
+			continue;
 
 		val = x86_perf_counter_update(counter, hwc, idx);
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			goto next;
+			continue;
 
 		/* counter overflow */
 		x86_perf_counter_set_period(counter, hwc, idx);
 		handled = 1;
 		inc_irq_stat(apic_perf_irqs);
-		disable = perf_counter_overflow(counter, nmi, regs, 0);
-
-next:
-		if (disable || throttle)
+		if (perf_counter_overflow(counter, nmi, regs, 0))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
 	return handled;
 }
 
-void perf_counter_unthrottle(void)
-{
-	struct cpu_hw_counters *cpuc;
-
-	if (!x86_pmu_initialized())
-		return;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
-		/*
-		 * Clear them before re-enabling irqs/NMIs again:
-		 */
-		cpuc->interrupts = 0;
-		perf_enable();
-	} else {
-		cpuc->interrupts = 0;
-	}
-}
-
 void smp_perf_counter_interrupt(struct pt_regs *regs)
 {
 	irq_enter();
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d3e85de9bf1..0c160be2078 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -570,7 +570,6 @@ extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
-extern void perf_counter_unthrottle(void);
 extern void __perf_disable(void);
 extern bool __perf_enable(void);
 extern void perf_disable(void);
@@ -635,7 +634,6 @@ static inline int perf_counter_init_task(struct task_struct *child)	{ }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
-static inline void perf_counter_unthrottle(void)			{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
 static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-- 
cgit v1.2.3-70-g09d2


From a78ac3258782f3e64cb40beb5990808e1febcc0c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 May 2009 17:39:05 +0200
Subject: perf_counter: Generic per counter interrupt throttle

Introduce a generic per counter interrupt throttle.

This uses the perf_counter_overflow() quick disable to throttle a specific
counter when its going too fast when a pmu->unthrottle() method is provided
which can undo the quick disable.

Power needs to implement both the quick disable and the unthrottle method.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.703093461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 13 +++++++++
 include/linux/perf_counter.h       | 11 +++++++
 kernel/perf_counter.c              | 59 +++++++++++++++++++++++++++++++++++---
 kernel/sysctl.c                    |  8 ++++++
 4 files changed, 87 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8c8177f859f..c4b543d1a86 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -623,6 +623,18 @@ try_generic:
 	return 0;
 }
 
+static void x86_pmu_unthrottle(struct perf_counter *counter)
+{
+	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct hw_perf_counter *hwc = &counter->hw;
+
+	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+				cpuc->counters[hwc->idx] != counter))
+		return;
+
+	x86_pmu.enable(hwc, hwc->idx);
+}
+
 void perf_counter_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
@@ -1038,6 +1050,7 @@ static const struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
 	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
 };
 
 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0c160be2078..e3a7585d3e4 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -266,6 +266,15 @@ enum perf_event_type {
 	 */
 	PERF_EVENT_PERIOD		= 4,
 
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				time;
+	 * };
+	 */
+	PERF_EVENT_THROTTLE		= 5,
+	PERF_EVENT_UNTHROTTLE		= 6,
+
 	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
@@ -367,6 +376,7 @@ struct pmu {
 	int (*enable)			(struct perf_counter *counter);
 	void (*disable)			(struct perf_counter *counter);
 	void (*read)			(struct perf_counter *counter);
+	void (*unthrottle)		(struct perf_counter *counter);
 };
 
 /**
@@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_priv;
 extern int sysctl_perf_counter_mlock;
+extern int sysctl_perf_counter_limit;
 
 extern void perf_counter_init(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 14b1fe98483..ec9c4007a7f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,6 +46,7 @@ static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
+int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1066,12 +1067,15 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 	__perf_counter_sched_in(ctx, cpuctx, cpu);
 }
 
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_counter *counter, int enable);
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
 static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
-	u64 irq_period;
+	u64 interrupts, irq_period;
 	u64 events, period;
 	s64 delta;
 
@@ -1080,10 +1084,19 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
+		interrupts = counter->hw.interrupts;
+		counter->hw.interrupts = 0;
+
+		if (interrupts == MAX_INTERRUPTS) {
+			perf_log_throttle(counter, 1);
+			counter->pmu->unthrottle(counter);
+			interrupts = 2*sysctl_perf_counter_limit/HZ;
+		}
+
 		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
 			continue;
 
-		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		events = HZ * interrupts * counter->hw.irq_period;
 		period = div64_u64(events, counter->hw_event.irq_freq);
 
 		delta = (s64)(1 + period - counter->hw.irq_period);
@@ -1097,7 +1110,6 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		perf_log_period(counter, irq_period);
 
 		counter->hw.irq_period = irq_period;
-		counter->hw.interrupts = 0;
 	}
 	spin_unlock(&ctx->lock);
 }
@@ -2543,6 +2555,35 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 	perf_output_end(&handle);
 }
 
+/*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_counter *counter, int enable)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+	} throttle_event = {
+		.header = {
+			.type = PERF_EVENT_THROTTLE + 1,
+			.misc = 0,
+			.size = sizeof(throttle_event),
+		},
+		.time = sched_clock(),
+	};
+
+	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, throttle_event);
+	perf_output_end(&handle);
+}
+
 /*
  * Generic counter overflow handling.
  */
@@ -2551,9 +2592,19 @@ int perf_counter_overflow(struct perf_counter *counter,
 			  int nmi, struct pt_regs *regs, u64 addr)
 {
 	int events = atomic_read(&counter->event_limit);
+	int throttle = counter->pmu->unthrottle != NULL;
 	int ret = 0;
 
-	counter->hw.interrupts++;
+	if (!throttle) {
+		counter->hw.interrupts++;
+	} else if (counter->hw.interrupts != MAX_INTERRUPTS) {
+		counter->hw.interrupts++;
+		if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
+			counter->hw.interrupts = MAX_INTERRUPTS;
+			perf_log_throttle(counter, 0);
+			ret = 1;
+		}
+	}
 
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3cb1849f598..0c4bf863afa 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -930,6 +930,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "perf_counter_int_limit",
+		.data		= &sysctl_perf_counter_limit,
+		.maxlen		= sizeof(sysctl_perf_counter_limit),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 /*
  * NOTE: do not add new entries to this table unless you have read
-- 
cgit v1.2.3-70-g09d2


From 0127c3ea082ee9f1034789b978dfc7fd83254617 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 25 May 2009 22:03:26 +0200
Subject: perf_counter: fix warning & lockup

 - remove bogus warning
 - fix wakeup from NMI path lockup
 - also fix up whitespace noise in perf_counter.h

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.703093461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 78 ++++++++++++++++++++++----------------------
 kernel/perf_counter.c        |  4 +--
 2 files changed, 40 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e3a7585d3e4..2b16ed37b74 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -73,7 +73,7 @@ enum sw_event_ids {
 	PERF_SW_EVENTS_MAX		= 7,
 };
 
-#define __PERF_COUNTER_MASK(name) 			\
+#define __PERF_COUNTER_MASK(name)			\
 	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
 	 PERF_COUNTER_##name##_SHIFT)
 
@@ -98,14 +98,14 @@ enum sw_event_ids {
  * in the overflow packets.
  */
 enum perf_counter_record_format {
-	PERF_RECORD_IP		= 1U << 0,
-	PERF_RECORD_TID		= 1U << 1,
-	PERF_RECORD_TIME	= 1U << 2,
-	PERF_RECORD_ADDR	= 1U << 3,
-	PERF_RECORD_GROUP	= 1U << 4,
-	PERF_RECORD_CALLCHAIN	= 1U << 5,
-	PERF_RECORD_CONFIG	= 1U << 6,
-	PERF_RECORD_CPU		= 1U << 7,
+	PERF_RECORD_IP			= 1U << 0,
+	PERF_RECORD_TID			= 1U << 1,
+	PERF_RECORD_TIME		= 1U << 2,
+	PERF_RECORD_ADDR		= 1U << 3,
+	PERF_RECORD_GROUP		= 1U << 4,
+	PERF_RECORD_CALLCHAIN		= 1U << 5,
+	PERF_RECORD_CONFIG		= 1U << 6,
+	PERF_RECORD_CPU			= 1U << 7,
 };
 
 /*
@@ -235,13 +235,13 @@ enum perf_event_type {
 	 * correlate userspace IPs to code. They have the following structure:
 	 *
 	 * struct {
-	 * 	struct perf_event_header	header;
+	 *	struct perf_event_header	header;
 	 *
-	 * 	u32				pid, tid;
-	 * 	u64				addr;
-	 * 	u64				len;
-	 * 	u64				pgoff;
-	 * 	char				filename[];
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
 	 * };
 	 */
 	PERF_EVENT_MMAP			= 1,
@@ -249,27 +249,27 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
+	 *	struct perf_event_header	header;
 	 *
-	 * 	u32				pid, tid;
-	 * 	char				comm[];
+	 *	u32				pid, tid;
+	 *	char				comm[];
 	 * };
 	 */
 	PERF_EVENT_COMM			= 3,
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				time;
-	 * 	u64				irq_period;
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				irq_period;
 	 * };
 	 */
 	PERF_EVENT_PERIOD		= 4,
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				time;
+	 *	struct perf_event_header	header;
+	 *	u64				time;
 	 * };
 	 */
 	PERF_EVENT_THROTTLE		= 5,
@@ -280,23 +280,23 @@ enum perf_event_type {
 	 * will be PERF_RECORD_*
 	 *
 	 * struct {
-	 * 	struct perf_event_header	header;
+	 *	struct perf_event_header	header;
 	 *
-	 * 	{ u64			ip;	  } && PERF_RECORD_IP
-	 * 	{ u32			pid, tid; } && PERF_RECORD_TID
-	 * 	{ u64			time;     } && PERF_RECORD_TIME
-	 * 	{ u64			addr;     } && PERF_RECORD_ADDR
-	 * 	{ u64			config;   } && PERF_RECORD_CONFIG
-	 * 	{ u32			cpu, res; } && PERF_RECORD_CPU
+	 *	{ u64			ip;	  } && PERF_RECORD_IP
+	 *	{ u32			pid, tid; } && PERF_RECORD_TID
+	 *	{ u64			time;     } && PERF_RECORD_TIME
+	 *	{ u64			addr;     } && PERF_RECORD_ADDR
+	 *	{ u64			config;   } && PERF_RECORD_CONFIG
+	 *	{ u32			cpu, res; } && PERF_RECORD_CPU
 	 *
-	 * 	{ u64			nr;
-	 * 	  { u64 event, val; } 	cnt[nr];  } && PERF_RECORD_GROUP
+	 *	{ u64			nr;
+	 *	  { u64 event, val; }	cnt[nr];  } && PERF_RECORD_GROUP
 	 *
-	 * 	{ u16			nr,
-	 * 				hv,
-	 * 				kernel,
-	 * 				user;
-	 * 	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
+	 *	{ u16			nr,
+	 *				hv,
+	 *				kernel,
+	 *				user;
+	 *	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
 	 * };
 	 */
 };
@@ -406,7 +406,7 @@ struct perf_mmap_data {
 	atomic_t			wakeup;		/* needs a wakeup    */
 
 	struct perf_counter_mmap_page   *user_page;
-	void 				*data_pages[0];
+	void				*data_pages[0];
 };
 
 struct perf_pending_entry {
@@ -422,7 +422,7 @@ struct perf_counter {
 	struct list_head		list_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
-	int 				nr_siblings;
+	int				nr_siblings;
 	struct perf_counter		*group_leader;
 	const struct pmu		*pmu;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ec9c4007a7f..070f92d3232 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2576,7 +2576,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 		.time = sched_clock(),
 	};
 
-	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0);
+	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
 	if (ret)
 		return;
 
@@ -3449,8 +3449,6 @@ void perf_counter_exit_task(struct task_struct *child)
 	struct perf_counter_context *child_ctx;
 	unsigned long flags;
 
-	WARN_ON_ONCE(child != current);
-
 	child_ctx = child->perf_counter_ctxp;
 
 	if (likely(!child_ctx))
-- 
cgit v1.2.3-70-g09d2


From b0aae68cc5508f3c2fbf728988c954db4c8b8a53 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 21 May 2009 13:59:18 +0800
Subject: tracing/events: change the type of __str_loc_item to unsigned short

When defining a dynamic size string, we add __str_loc_##item to the
trace entry, and it stores the location of the actual string in
entry->_str_data[]

'unsigned short' should be sufficient to store this information, thus
we save 2 bytes per dyn-size string in the ring buffer.

[ Impact: reduce memory occupied by dyn-size strings in ring buffer ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <4A14EDB6.2050507@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index edb02bc9f8f..b5ff2e8229e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -25,7 +25,7 @@
 #define __field(type, item)		type	item;
 
 #undef __string
-#define __string(item, src)		int	__str_loc_##item;
+#define __string(item, src)		unsigned short	__str_loc_##item;
 
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
-- 
cgit v1.2.3-70-g09d2


From 08baf561083bc27a953aa087dd8a664bb2b88e8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 25 May 2009 22:58:01 -0700
Subject: net: txq_trans_update() helper

We would like to get rid of netdev->trans_start = jiffies; that about all net
drivers have to use in their start_xmit() function, and use txq->trans_start
instead.

This can be done generically in core network, as suggested by David.

Some devices, (particularly loopback) dont need trans_start update, because
they dont have transmit watchdog. We could add a new device flag, or rely
on fact that txq->tran_start can be updated is txq->xmit_lock_owner is
different than -1. Use a helper function to hide our choice.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 net/core/dev.c            | 3 +++
 net/core/netpoll.c        | 5 ++++-
 net/core/pktgen.c         | 1 +
 net/sched/sch_teql.c      | 1 +
 5 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae3c2099a04..586b71f0358 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1674,6 +1674,12 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 	spin_unlock_bh(&txq->_xmit_lock);
 }
 
+static inline void txq_trans_update(struct netdev_queue *txq)
+{
+	if (txq->xmit_lock_owner != -1)
+		txq->trans_start = jiffies;
+}
+
 /**
  *	netif_tx_lock - grab network device transmit lock
  *	@dev: network device
diff --git a/net/core/dev.c b/net/core/dev.c
index 241613f6dd2..5eb3e48ab31 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,6 +1698,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb->dst = NULL;
 		}
 		rc = ops->ndo_start_xmit(skb, dev);
+		if (rc == 0)
+			txq_trans_update(txq);
 		/*
 		 * TODO: if skb_orphan() was called by
 		 * dev->hard_start_xmit() (for example, the unmodified
@@ -1727,6 +1729,7 @@ gso:
 			skb->next = nskb;
 			return rc;
 		}
+		txq_trans_update(txq);
 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 67b4f3e3d4a..7ab31a7576a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -302,8 +302,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
-				if (!netif_tx_queue_stopped(txq))
+				if (!netif_tx_queue_stopped(txq)) {
 					status = ops->ndo_start_xmit(skb, dev);
+					if (status == NETDEV_TX_OK)
+						txq_trans_update(txq);
+				}
 				__netif_tx_unlock(txq);
 
 				if (status == NETDEV_TX_OK)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0666a827bc6..b8ccd3c88d6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3438,6 +3438,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	      retry_now:
 		ret = (*xmit)(pkt_dev->skb, odev);
 		if (likely(ret == NETDEV_TX_OK)) {
+			txq_trans_update(txq);
 			pkt_dev->last_ok = 1;
 			pkt_dev->sofar++;
 			pkt_dev->seq_num++;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 428a5ef5b94..a886496bdc3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -308,6 +308,7 @@ restart:
 				if (!netif_tx_queue_stopped(slave_txq) &&
 				    !netif_tx_queue_frozen(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == 0) {
+					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
-- 
cgit v1.2.3-70-g09d2


From b90cf6681f4f6263920616e7ca2fd09130e4143a Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 5 Apr 2009 08:23:44 -0700
Subject: [MTD] Remove option for add_mtd_partitions() to not register
 partitions.

This breaks the dilnetpc map driver, but it could be fixed not to use
that option. We want to simplify the partition handling, and this is a
step towards that.

Remove superfluous 'index' field from private struct mtd_part too, while
we're at it.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdpart.c          | 18 ++++--------------
 include/linux/mtd/partitions.h |  1 -
 2 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 63d1cd2c17b..349fcbe5cc0 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -27,9 +27,7 @@ struct mtd_part {
 	struct mtd_info mtd;
 	struct mtd_info *master;
 	uint64_t offset;
-	int index;
 	struct list_head list;
-	int registered;
 };
 
 /*
@@ -321,8 +319,7 @@ int del_mtd_partitions(struct mtd_info *master)
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
 		if (slave->master == master) {
 			list_del(&slave->list);
-			if (slave->registered)
-				del_mtd_device(&slave->mtd);
+			del_mtd_device(&slave->mtd);
 			kfree(slave);
 		}
 
@@ -412,7 +409,6 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
 	slave->mtd.erase = part_erase;
 	slave->master = master;
 	slave->offset = part->offset;
-	slave->index = partno;
 
 	if (slave->offset == MTDPART_OFS_APPEND)
 		slave->offset = cur_offset;
@@ -500,15 +496,9 @@ static struct mtd_part *add_one_partition(struct mtd_info *master,
 	}
 
 out_register:
-	if (part->mtdp) {
-		/* store the object pointer (caller may or may not register it*/
-		*part->mtdp = &slave->mtd;
-		slave->registered = 0;
-	} else {
-		/* register our partition */
-		add_mtd_device(&slave->mtd);
-		slave->registered = 1;
-	}
+	/* register our partition */
+	add_mtd_device(&slave->mtd);
+
 	return slave;
 }
 
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 7535a74083b..af6dcb992bc 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -40,7 +40,6 @@ struct mtd_partition {
 	uint64_t offset;		/* offset within the master MTD space */
 	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */
 	struct nand_ecclayout *ecclayout;	/* out of band layout for this partition (NAND only)*/
-	struct mtd_info **mtdp;		/* pointer to store the MTD object */
 };
 
 #define MTDPART_OFS_NXTBLK	(-2)
-- 
cgit v1.2.3-70-g09d2


From be74b73a57645cc253d881ab0c1014eb64b9cf22 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 26 May 2009 20:25:22 +0200
Subject: tracing: add __print_flags for events

Developers have been asking for the ability in the ftrace event tracer
to display names of bits in a flags variable.

Instead of printing out c2, it would be easier to read FOO|BAR|GOO,
assuming that FOO is bit 1, BAR is bit 6 and GOO is bit 7.

Some examples where this would be useful are the state flags in a context
switch, kmalloc flags, and even permision flags in accessing files.

[
  v2 changes include:

  Frederic Weisbecker's idea of using a mask instead of bits,
  thus we can output GFP_KERNEL instead of GPF_WAIT|GFP_IO|GFP_FS.

  Li Zefan's idea of allowing the caller of __print_flags to add their
  own delimiter (or no delimiter) where we can get for file permissions
  rwx instead of r|w|x.
]

[
  v3 changes:

   Christoph Hellwig's idea of using an array instead of va_args.
]

[ Impact: better displaying of flags in trace output ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h | 13 ++++++++++++-
 include/trace/ftrace.h       | 14 ++++++++++++++
 kernel/trace/trace_output.c  | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bae51ddfabd..4b58cf1a11c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -3,12 +3,23 @@
 
 #include <linux/trace_seq.h>
 #include <linux/ring_buffer.h>
-
+#include <linux/percpu.h>
 
 struct trace_array;
 struct tracer;
 struct dentry;
 
+DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
+struct trace_print_flags {
+	unsigned long		mask;
+	const char		*name;
+};
+
+const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+				   unsigned long flags,
+				   const struct trace_print_flags *flag_array);
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b5ff2e8229e..22c94719c56 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -87,6 +87,7 @@
  *	struct trace_seq *s = &iter->seq;
  *	struct ftrace_raw_<call> *field; <-- defined in stage 1
  *	struct trace_entry *entry;
+ *	struct trace_seq *p;
  *	int ret;
  *
  *	entry = iter->ent;
@@ -98,7 +99,9 @@
  *
  *	field = (typeof(field))entry;
  *
+ *	p = get_cpu_var(ftrace_event_seq);
  *	ret = trace_seq_printf(s, <TP_printk> "\n");
+ *	put_cpu();
  *	if (!ret)
  *		return TRACE_TYPE_PARTIAL_LINE;
  *
@@ -119,6 +122,14 @@
 #undef __get_str
 #define __get_str(field)	((char *)__entry + __entry->__str_loc_##field)
 
+#undef __print_flags
+#define __print_flags(flag, delim, flag_array...)			\
+	({								\
+		static const struct trace_print_flags flags[] =		\
+			{ flag_array, { -1, NULL }};			\
+		ftrace_print_flags_seq(p, delim, flag, flags);		\
+	})
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 enum print_line_t							\
@@ -127,6 +138,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	struct trace_seq *s = &iter->seq;				\
 	struct ftrace_raw_##call *field;				\
 	struct trace_entry *entry;					\
+	struct trace_seq *p;						\
 	int ret;							\
 									\
 	entry = iter->ent;						\
@@ -138,7 +150,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 									\
 	field = (typeof(field))entry;					\
 									\
+	p = &get_cpu_var(ftrace_event_seq);				\
 	ret = trace_seq_printf(s, #call ": " print);			\
+	put_cpu();							\
 	if (!ret)							\
 		return TRACE_TYPE_PARTIAL_LINE;				\
 									\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7136420603a..a4840c260c8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -15,6 +15,9 @@
 #define EVENT_HASHSIZE	128
 
 static DECLARE_RWSEM(trace_event_mutex);
+
+DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -212,6 +215,42 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
 	return 0;
 }
 
+const char *
+ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+		       unsigned long flags,
+		       const struct trace_print_flags *flag_array)
+{
+	unsigned long mask;
+	const char *str;
+	int i;
+
+	trace_seq_init(p);
+
+	for (i = 0;  flag_array[i].name && flags; i++) {
+
+		mask = flag_array[i].mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		str = flag_array[i].name;
+		flags &= ~mask;
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_puts(p, str);
+	}
+
+	/* check for left over flags */
+	if (flags) {
+		if (p->len && delim)
+			trace_seq_puts(p, delim);
+		trace_seq_printf(p, "0x%lx", flags);
+	}
+
+	trace_seq_putc(p, 0);
+
+	return p->buffer;
+}
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
-- 
cgit v1.2.3-70-g09d2


From 937cdb9db7f59278d0cb1582e6e64e3dfd73b4fc Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 15 May 2009 10:51:13 -0400
Subject: tracing: add previous task state info to sched switch event

It is useful to see the state of a task that is being switched out.
This patch adds the output of the state of the previous task in
the context switch event.

[ Impact: see state of switched out task in context switch ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/sched.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index dd4033cf5b0..24ab5bcff7b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -156,6 +156,7 @@ TRACE_EVENT(sched_switch,
 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	prev_pid			)
 		__field(	int,	prev_prio			)
+		__field(	long,	prev_state			)
 		__array(	char,	next_comm,	TASK_COMM_LEN	)
 		__field(	pid_t,	next_pid			)
 		__field(	int,	next_prio			)
@@ -165,13 +166,19 @@ TRACE_EVENT(sched_switch,
 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_prio	= prev->prio;
+		__entry->prev_state	= prev->state;
 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+	TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+		__entry->prev_state ?
+		  __print_flags(__entry->prev_state, "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "W" }) : "R",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
 
-- 
cgit v1.2.3-70-g09d2


From 62ba180e80f4194a498585ac0e4c07daa8ca08d1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 15 May 2009 16:16:30 -0400
Subject: tracing: add flag output for kmem events

This patch changes the output for gfp_flags from being a simple hex value
to the actual names.

  gfp_flags=GFP_ATOMIC  instead of gfp_flags=00000020

And even

  gfp_flags=GFP_KERNEL instead of gfp_flags=000000d0

(Thanks to Frederic Weisbecker for pointing out that the first version
 had a bad order of GFP masks)

[ Impact: more human readable output from tracer ]

Acked-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/kmem.h | 53 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index c22c42f980b..9baba50d651 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -7,6 +7,43 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kmem
 
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+#define show_gfp_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",				\
+	{(unsigned long)GFP_HIGHUSER_MOVABLE,	"GFP_HIGHUSER_MOVABLE"}, \
+	{(unsigned long)GFP_HIGHUSER,		"GFP_HIGHUSER"},	\
+	{(unsigned long)GFP_USER,		"GFP_USER"},		\
+	{(unsigned long)GFP_TEMPORARY,		"GFP_TEMPORARY"},	\
+	{(unsigned long)GFP_KERNEL,		"GFP_KERNEL"},		\
+	{(unsigned long)GFP_NOFS,		"GFP_NOFS"},		\
+	{(unsigned long)GFP_ATOMIC,		"GFP_ATOMIC"},		\
+	{(unsigned long)GFP_NOIO,		"GFP_NOIO"},		\
+	{(unsigned long)__GFP_HIGH,		"GFP_HIGH"},		\
+	{(unsigned long)__GFP_WAIT,		"GFP_WAIT"},		\
+	{(unsigned long)__GFP_IO,		"GFP_IO"},		\
+	{(unsigned long)__GFP_COLD,		"GFP_COLD"},		\
+	{(unsigned long)__GFP_NOWARN,		"GFP_NOWARN"},		\
+	{(unsigned long)__GFP_REPEAT,		"GFP_REPEAT"},		\
+	{(unsigned long)__GFP_NOFAIL,		"GFP_NOFAIL"},		\
+	{(unsigned long)__GFP_NORETRY,		"GFP_NORETRY"},		\
+	{(unsigned long)__GFP_COMP,		"GFP_COMP"},		\
+	{(unsigned long)__GFP_ZERO,		"GFP_ZERO"},		\
+	{(unsigned long)__GFP_NOMEMALLOC,	"GFP_NOMEMALLOC"},	\
+	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
+	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
+	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"}		\
+	) : "GFP_NOWAIT"
+
 TRACE_EVENT(kmalloc,
 
 	TP_PROTO(unsigned long call_site,
@@ -33,12 +70,12 @@ TRACE_EVENT(kmalloc,
 		__entry->gfp_flags	= gfp_flags;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags)
+		show_gfp_flags(__entry->gfp_flags))
 );
 
 TRACE_EVENT(kmem_cache_alloc,
@@ -67,12 +104,12 @@ TRACE_EVENT(kmem_cache_alloc,
 		__entry->gfp_flags	= gfp_flags;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags)
+		show_gfp_flags(__entry->gfp_flags))
 );
 
 TRACE_EVENT(kmalloc_node,
@@ -104,12 +141,12 @@ TRACE_EVENT(kmalloc_node,
 		__entry->node		= node;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags,
+		show_gfp_flags(__entry->gfp_flags),
 		__entry->node)
 );
 
@@ -142,12 +179,12 @@ TRACE_EVENT(kmem_cache_alloc_node,
 		__entry->node		= node;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
+	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
 		__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
-		__entry->gfp_flags,
+		show_gfp_flags(__entry->gfp_flags),
 		__entry->node)
 );
 
-- 
cgit v1.2.3-70-g09d2


From 0f4fc29dd68dfab9c6ddd5d087d34a5b6818cb00 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 20 May 2009 19:21:47 -0400
Subject: tracing: add __print_symbolic to trace events

This patch adds __print_symbolic which is similar to __print_flags but
works for an enumeration type instead. That is, there is only a one to one
mapping between the values and the symbols. When a match is made, then
it is printed, otherwise the hex value is outputed.

[ Impact: add interface for showing symbol names in events ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h |  3 +++
 include/trace/ftrace.h       |  8 ++++++++
 kernel/trace/trace_output.c  | 25 +++++++++++++++++++++++++
 3 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4b58cf1a11c..bbf40f624fc 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -20,6 +20,9 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 				   unsigned long flags,
 				   const struct trace_print_flags *flag_array);
 
+const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+				     const struct trace_print_flags *symbol_array);
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 22c94719c56..87fc227c6fb 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -130,6 +130,14 @@
 		ftrace_print_flags_seq(p, delim, flag, flags);		\
 	})
 
+#undef __print_symbolic
+#define __print_symbolic(value, symbol_array...)			\
+	({								\
+		static const struct trace_print_flags symbols[] =	\
+			{ symbol_array, { -1, NULL }};			\
+		ftrace_print_symbols_seq(p, value, symbols);		\
+	})
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 enum print_line_t							\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a4840c260c8..c12d95db2f5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -251,6 +251,31 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 	return p->buffer;
 }
 
+const char *
+ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+			 const struct trace_print_flags *symbol_array)
+{
+	int i;
+
+	trace_seq_init(p);
+
+	for (i = 0;  symbol_array[i].name; i++) {
+
+		if (val != symbol_array[i].mask)
+			continue;
+
+		trace_seq_puts(p, symbol_array[i].name);
+		break;
+	}
+
+	if (!p->len)
+		trace_seq_printf(p, "0x%lx", val);
+		
+	trace_seq_putc(p, 0);
+
+	return p->buffer;
+}
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
-- 
cgit v1.2.3-70-g09d2


From c2adae0970ca1db8adb92fb56ae3bcabd916e8bd Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 20 May 2009 19:56:19 -0400
Subject: tracing: convert irq events to use __print_symbolic

The recording of the names at trace time is inefficient. This patch
implements the softirq event recording to only record the vector
and then use the __print_symbolic interface to print out the names.

[ Impact: faster recording of softirq events ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/irq.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 32a9f7ef432..683fb36a994 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -7,6 +7,19 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM irq
 
+#define softirq_name(sirq) { sirq, #sirq }
+#define show_softirq_name(val)						\
+	__print_symbolic(val,						\
+			 softirq_name(HI_SOFTIRQ),			\
+			 softirq_name(TIMER_SOFTIRQ),			\
+			 softirq_name(NET_TX_SOFTIRQ),			\
+			 softirq_name(NET_RX_SOFTIRQ),			\
+			 softirq_name(BLOCK_SOFTIRQ),			\
+			 softirq_name(TASKLET_SOFTIRQ),			\
+			 softirq_name(SCHED_SOFTIRQ),			\
+			 softirq_name(HRTIMER_SOFTIRQ),			\
+			 softirq_name(RCU_SOFTIRQ))
+
 /**
  * irq_handler_entry - called immediately before the irq action handler
  * @irq: irq number
@@ -87,15 +100,14 @@ TRACE_EVENT(softirq_entry,
 
 	TP_STRUCT__entry(
 		__field(	int,	vec			)
-		__string(	name,	softirq_to_name[h-vec]	)
 	),
 
 	TP_fast_assign(
 		__entry->vec = (int)(h - vec);
-		__assign_str(name, softirq_to_name[h-vec]);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
 );
 
 /**
@@ -117,15 +129,14 @@ TRACE_EVENT(softirq_exit,
 
 	TP_STRUCT__entry(
 		__field(	int,	vec			)
-		__string(	name,	softirq_to_name[h-vec]	)
 	),
 
 	TP_fast_assign(
 		__entry->vec = (int)(h - vec);
-		__assign_str(name, softirq_to_name[h-vec]);
 	),
 
-	TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
+	TP_printk("softirq=%d action=%s", __entry->vec,
+		  show_softirq_name(__entry->vec))
 );
 
 #endif /*  _TRACE_IRQ_H */
-- 
cgit v1.2.3-70-g09d2


From 8e401eccd3a62fb57f117bb09b7c1fc70ab19e8c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 26 May 2009 21:16:25 -0700
Subject: phy: Eliminate references to BUS_ID_SIZE.

Just use the constant 20 to keep things working.

If someone is so motivated, this can be converted over to
dynamic strings.  I tried and it's a lot of work.

But for now this is good enough.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index c216e4e503b..b1368b8f657 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -79,7 +79,7 @@ typedef enum {
  * Need to be a little smaller than phydev->dev.bus_id to leave room
  * for the ":%02x"
  */
-#define MII_BUS_ID_SIZE	(BUS_ID_SIZE - 3)
+#define MII_BUS_ID_SIZE	(20 - 3)
 
 /*
  * The Bus class for PHYs.  Devices which provide access to
@@ -407,7 +407,7 @@ struct phy_driver {
 /* A Structure for boards to register fixups with the PHY Lib */
 struct phy_fixup {
 	struct list_head list;
-	char bus_id[BUS_ID_SIZE];
+	char bus_id[20];
 	u32 phy_uid;
 	u32 phy_uid_mask;
 	int (*run)(struct phy_device *phydev);
-- 
cgit v1.2.3-70-g09d2


From b74be6119e9e38390395f08767b7c84de9023b38 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Apr 2009 10:20:23 +0800
Subject: ACPICA: Update error/warning interfaces

Moved the module name and line number to the end of the message.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/utmisc.c | 20 +++++++++-----------
 include/acpi/acpixf.h        |  5 ++++-
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index 1c9e250caef..fbe782348b0 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -1033,11 +1033,12 @@ acpi_error(const char *module_name, u32 line_number, const char *format, ...)
 {
 	va_list args;
 
-	acpi_os_printf("ACPI Error (%s-%04d): ", module_name, line_number);
+	acpi_os_printf("ACPI Error: ");
 
 	va_start(args, format);
 	acpi_os_vprintf(format, args);
-	acpi_os_printf(" [%X]\n", ACPI_CA_VERSION);
+	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name,
+		       line_number);
 	va_end(args);
 }
 
@@ -1047,12 +1048,12 @@ acpi_exception(const char *module_name,
 {
 	va_list args;
 
-	acpi_os_printf("ACPI Exception (%s-%04d): %s, ", module_name,
-		       line_number, acpi_format_exception(status));
+	acpi_os_printf("ACPI Exception: %s, ", acpi_format_exception(status));
 
 	va_start(args, format);
 	acpi_os_vprintf(format, args);
-	acpi_os_printf(" [%X]\n", ACPI_CA_VERSION);
+	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name,
+		       line_number);
 	va_end(args);
 }
 
@@ -1061,11 +1062,12 @@ acpi_warning(const char *module_name, u32 line_number, const char *format, ...)
 {
 	va_list args;
 
-	acpi_os_printf("ACPI Warning (%s-%04d): ", module_name, line_number);
+	acpi_os_printf("ACPI Warning: ");
 
 	va_start(args, format);
 	acpi_os_vprintf(format, args);
-	acpi_os_printf(" [%X]\n", ACPI_CA_VERSION);
+	acpi_os_printf(" %8.8X %s-%u\n", ACPI_CA_VERSION, module_name,
+		       line_number);
 	va_end(args);
 }
 
@@ -1074,10 +1076,6 @@ acpi_info(const char *module_name, u32 line_number, const char *format, ...)
 {
 	va_list args;
 
-	/*
-	 * Removed module_name, line_number, and acpica version, not needed
-	 * for info output
-	 */
 	acpi_os_printf("ACPI: ");
 
 	va_start(args, format);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 4db89e98535..a868df6fcef 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -375,7 +375,7 @@ acpi_status acpi_leave_sleep_state_prep(u8 sleep_state);
 acpi_status acpi_leave_sleep_state(u8 sleep_state);
 
 /*
- * Debug output
+ * Error/Warning output
  */
 void ACPI_INTERNAL_VAR_XFACE
 acpi_error(const char *module_name,
@@ -394,6 +394,9 @@ void ACPI_INTERNAL_VAR_XFACE
 acpi_info(const char *module_name,
 	  u32 line_number, const char *format, ...) ACPI_PRINTF_LIKE(3);
 
+/*
+ * Debug output
+ */
 #ifdef ACPI_DEBUG_OUTPUT
 
 void ACPI_INTERNAL_VAR_XFACE
-- 
cgit v1.2.3-70-g09d2


From 8eb7b2477c4e4fec4788605e4edb5f7acafb59ff Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Apr 2009 10:28:22 +0800
Subject: ACPICA: Fix a few warnings for gcc 3.4.4

Mostly for acpiexec, one in the core subsystem.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/hwregs.c  | 4 ++--
 include/acpi/platform/acgcc.h | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 7b2fb602b5c..23d5505cb1f 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -81,9 +81,9 @@ acpi_status acpi_hw_clear_acpi_status(void)
 
 	ACPI_FUNCTION_TRACE(hw_clear_acpi_status);
 
-	ACPI_DEBUG_PRINT((ACPI_DB_IO, "About to write %04X to %0llX\n",
+	ACPI_DEBUG_PRINT((ACPI_DB_IO, "About to write %04X to %8.8X%8.8X\n",
 			  ACPI_BITMASK_ALL_FIXED_STATUS,
-			  acpi_gbl_xpm1a_status.address));
+			  ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
 
 	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
 
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 8e2cdc57b19..935c5d7fc86 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -62,4 +62,8 @@
  */
 #define ACPI_UNUSED_VAR __attribute__ ((unused))
 
+#ifdef _ANSI
+#define inline
+#endif
+
 #endif				/* __ACGCC_H__ */
-- 
cgit v1.2.3-70-g09d2


From 35d7c1cfe5262480d3d8e6ccd7e4caf3a9c8ab39 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Apr 2009 12:41:27 +0800
Subject: ACPICA: Cleanup byte/word/dword extraction macros, fix possible
 warnings

Removed unnecessary masking. For the 64-bit macros, removed
the structure overlay. Fixes aliasing warnings seen with gcc 4+
compilers.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actypes.h | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index f555d927f7c..37ba576d06e 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -429,20 +429,12 @@ typedef unsigned long long acpi_integer;
 
 /* Data manipulation */
 
-#define ACPI_LOWORD(l)                  ((u16)(u32)(l))
-#define ACPI_HIWORD(l)                  ((u16)((((u32)(l)) >> 16) & 0xFFFF))
-#define ACPI_LOBYTE(l)                  ((u8)(u16)(l))
-#define ACPI_HIBYTE(l)                  ((u8)((((u16)(l)) >> 8) & 0xFF))
-
-/* Full 64-bit integer must be available on both 32-bit and 64-bit platforms */
-
-struct acpi_integer_overlay {
-	u32 lo_dword;
-	u32 hi_dword;
-};
-
-#define ACPI_LODWORD(integer)           (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->lo_dword)
-#define ACPI_HIDWORD(integer)           (ACPI_CAST_PTR (struct acpi_integer_overlay, &integer)->hi_dword)
+#define ACPI_LOBYTE(integer)            ((u8)   (u16)(integer))
+#define ACPI_HIBYTE(integer)            ((u8) (((u16)(integer)) >> 8))
+#define ACPI_LOWORD(integer)            ((u16)  (u32)(integer))
+#define ACPI_HIWORD(integer)            ((u16)(((u32)(integer)) >> 16))
+#define ACPI_LODWORD(integer64)         ((u32)  (u64)(integer64))
+#define ACPI_HIDWORD(integer64)         ((u32)(((u64)(integer64)) >> 32))
 
 #define ACPI_SET_BIT(target,bit)        ((target) |= (bit))
 #define ACPI_CLEAR_BIT(target,bit)      ((target) &= ~(bit))
-- 
cgit v1.2.3-70-g09d2


From e0c437bcca6926b541c738b5c64445654750b365 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 22 Apr 2009 13:39:47 +0800
Subject: ACPICA: Linux OSL: cleanup/update/merge

Merge the OSL with the actual file used by Linux, so that the
file does not require patching when integrated with Linux. General
cleanup and some restructuring.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 63 +++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 6d49b2a498c..fcb8e4b159b 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -1,11 +1,11 @@
 /******************************************************************************
  *
- * Name: aclinux.h - OS specific defines, etc.
+ * Name: aclinux.h - OS specific defines, etc. for Linux
  *
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2008, Intel Corp.
+ * Copyright (C) 2000 - 2009, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,10 +44,13 @@
 #ifndef __ACLINUX_H__
 #define __ACLINUX_H__
 
+/* Common (in-kernel/user-space) ACPICA configuration */
+
 #define ACPI_USE_SYSTEM_CLIBRARY
 #define ACPI_USE_DO_WHILE_0
 #define ACPI_MUTEX_TYPE             ACPI_BINARY_SEMAPHORE
 
+
 #ifdef __KERNEL__
 
 #include <linux/string.h>
@@ -63,15 +66,18 @@
 #include <linux/spinlock_types.h>
 #include <asm/current.h>
 
-/* Host-dependent types and defines */
+/* Host-dependent types and defines for in-kernel ACPICA */
 
 #define ACPI_MACHINE_WIDTH          BITS_PER_LONG
-#define acpi_cache_t                        struct kmem_cache
-#define acpi_spinlock                   spinlock_t *
 #define ACPI_EXPORT_SYMBOL(symbol)  EXPORT_SYMBOL(symbol);
 #define strtoul                     simple_strtoul
 
-#else				/* !__KERNEL__ */
+#define acpi_cache_t                        struct kmem_cache
+#define acpi_spinlock                       spinlock_t *
+#define acpi_cpu_flags                      unsigned long
+#define acpi_thread_id                      struct task_struct *
+
+#else /* !__KERNEL__ */
 
 #include <stdarg.h>
 #include <string.h>
@@ -79,6 +85,11 @@
 #include <ctype.h>
 #include <unistd.h>
 
+/* Host-dependent types and defines for user-space ACPICA */
+
+#define ACPI_FLUSH_CPU_CACHE()
+#define acpi_thread_id                      pthread_t
+
 #if defined(__ia64__) || defined(__x86_64__)
 #define ACPI_MACHINE_WIDTH          64
 #define COMPILER_DEPENDENT_INT64    long
@@ -94,17 +105,17 @@
 #define __cdecl
 #endif
 
-#define ACPI_FLUSH_CPU_CACHE()
-#endif				/* __KERNEL__ */
+#endif /* __KERNEL__ */
 
 /* Linux uses GCC */
 
 #include "acgcc.h"
 
-#define acpi_cpu_flags unsigned long
-
-#define acpi_thread_id struct task_struct *
 
+#ifdef __KERNEL__
+/*
+ * Overrides for in-kernel ACPICA
+ */
 static inline acpi_thread_id acpi_os_get_thread_id(void)
 {
 	return current;
@@ -119,30 +130,32 @@ static inline acpi_thread_id acpi_os_get_thread_id(void)
 #include <acpi/actypes.h>
 static inline void *acpi_os_allocate(acpi_size size)
 {
-	return kmalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL);
+	return kmalloc(size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
 }
+
 static inline void *acpi_os_allocate_zeroed(acpi_size size)
 {
-	return kzalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL);
+	return kzalloc(size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
 }
 
 static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 {
 	return kmem_cache_zalloc(cache,
-				 irqs_disabled()? GFP_ATOMIC : GFP_KERNEL);
+		irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
 }
 
-#define ACPI_ALLOCATE(a)	acpi_os_allocate(a)
-#define ACPI_ALLOCATE_ZEROED(a)	acpi_os_allocate_zeroed(a)
-#define ACPI_FREE(a)		kfree(a)
+#define ACPI_ALLOCATE(a)        acpi_os_allocate(a)
+#define ACPI_ALLOCATE_ZEROED(a) acpi_os_allocate_zeroed(a)
+#define ACPI_FREE(a)            kfree(a)
 
-/*
- * We need to show where it is safe to preempt execution of ACPICA
- */
-#define ACPI_PREEMPTION_POINT()		\
-	do {				\
-		if (!irqs_disabled())	\
-			cond_resched();	\
+/* Used within ACPICA to show where it is safe to preempt execution */
+
+#define ACPI_PREEMPTION_POINT() \
+	do { \
+		if (!irqs_disabled()) \
+			cond_resched(); \
 	} while (0)
 
-#endif				/* __ACLINUX_H__ */
+#endif /* __KERNEL__ */
+
+#endif /* __ACLINUX_H__ */
-- 
cgit v1.2.3-70-g09d2


From ba9e2ae443feb7231d9631ea0f62b63e26cfb9b1 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Apr 2009 10:43:43 +0800
Subject: ACPICA: Update version to 20090422.

Version 20090422.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index a868df6fcef..60b76a3fb3a 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090320
+#define ACPI_CA_VERSION                 0x20090422
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3-70-g09d2


From b2f7ddcfcb9c2436896cb339a7ff70245648f033 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Thu, 21 May 2009 10:42:09 +0800
Subject: ACPICA: New: AcpiInstallMethod - install a single control method

This interface enables the override or creation of a single
control method. Useful to repair a bug or install a missing method.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/aclocal.h  |   1 +
 drivers/acpi/acpica/amlcode.h  |   2 +-
 drivers/acpi/acpica/excreate.c |   2 +-
 drivers/acpi/acpica/exdump.c   |   6 +-
 drivers/acpi/acpica/nsobject.c |   9 +++
 drivers/acpi/acpica/nsxfname.c | 150 +++++++++++++++++++++++++++++++++++++++++
 include/acpi/acpixf.h          |   2 +
 7 files changed, 168 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 2ec394a328e..882b4b55867 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -205,6 +205,7 @@ struct acpi_namespace_node {
 #define ANOBJ_METHOD_LOCAL              0x08	/* Node is a method local */
 #define ANOBJ_SUBTREE_HAS_INI           0x10	/* Used to optimize device initialization */
 #define ANOBJ_EVALUATED                 0x20	/* Set on first evaluation of node */
+#define ANOBJ_ALLOCATED_BUFFER          0x40	/* Method AML buffer is dynamic (install_method) */
 
 #define ANOBJ_IS_EXTERNAL               0x08	/* i_aSL only: This object created via External() */
 #define ANOBJ_METHOD_NO_RETVAL          0x10	/* i_aSL only: Method has no return value */
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index ff851c5df69..067f967eb38 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -483,7 +483,7 @@ typedef enum {
 
 #define AML_METHOD_ARG_COUNT        0x07
 #define AML_METHOD_SERIALIZED       0x08
-#define AML_METHOD_SYNCH_LEVEL      0xF0
+#define AML_METHOD_SYNC_LEVEL       0xF0
 
 /* METHOD_FLAGS_ARG_COUNT is not used internally, define additional flags */
 
diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index a57ad2564ab..02b25d233d9 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c
@@ -502,7 +502,7 @@ acpi_ex_create_method(u8 * aml_start,
 		 * ACPI 2.0: sync_level = sync_level in method declaration
 		 */
 		obj_desc->method.sync_level = (u8)
-		    ((method_flags & AML_METHOD_SYNCH_LEVEL) >> 4);
+		    ((method_flags & AML_METHOD_SYNC_LEVEL) >> 4);
 	}
 
 	/* Attach the new object to the method Node */
diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index 89d141fdae0..ec524614e70 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -120,9 +120,11 @@ static struct acpi_exdump_info acpi_ex_dump_event[2] = {
 	{ACPI_EXD_POINTER, ACPI_EXD_OFFSET(event.os_semaphore), "OsSemaphore"}
 };
 
-static struct acpi_exdump_info acpi_ex_dump_method[8] = {
+static struct acpi_exdump_info acpi_ex_dump_method[9] = {
 	{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_method), NULL},
-	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.param_count), "ParamCount"},
+	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.method_flags), "Method Flags"},
+	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.param_count),
+	 "Parameter Count"},
 	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.sync_level), "Sync Level"},
 	{ACPI_EXD_POINTER, ACPI_EXD_OFFSET(method.mutex), "Mutex"},
 	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.owner_id), "Owner Id"},
diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c
index 3eb20bfda9d..60f3af08d28 100644
--- a/drivers/acpi/acpica/nsobject.c
+++ b/drivers/acpi/acpica/nsobject.c
@@ -213,6 +213,15 @@ void acpi_ns_detach_object(struct acpi_namespace_node *node)
 		return_VOID;
 	}
 
+	if (node->flags & ANOBJ_ALLOCATED_BUFFER) {
+
+		/* Free the dynamic aml buffer */
+
+		if (obj_desc->common.type == ACPI_TYPE_METHOD) {
+			ACPI_FREE(obj_desc->method.aml_start);
+		}
+	}
+
 	/* Clear the entry in all cases */
 
 	node->object = NULL;
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index 9589fea2499..f23593d6add 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -45,6 +45,8 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "acnamesp.h"
+#include "acparser.h"
+#include "amlcode.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsxfname")
@@ -358,3 +360,151 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 }
 
 ACPI_EXPORT_SYMBOL(acpi_get_object_info)
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_install_method
+ *
+ * PARAMETERS:  Buffer         - An ACPI table containing one control method
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Install a control method into the namespace. If the method
+ *              name already exists in the namespace, it is overwritten. The
+ *              input buffer must contain a valid DSDT or SSDT containing a
+ *              single control method.
+ *
+ ******************************************************************************/
+acpi_status acpi_install_method(u8 *buffer)
+{
+	struct acpi_table_header *table =
+	    ACPI_CAST_PTR(struct acpi_table_header, buffer);
+	u8 *aml_buffer;
+	u8 *aml_start;
+	char *path;
+	struct acpi_namespace_node *node;
+	union acpi_operand_object *method_obj;
+	struct acpi_parse_state parser_state;
+	u32 aml_length;
+	u16 opcode;
+	u8 method_flags;
+	acpi_status status;
+
+	/* Parameter validation */
+
+	if (!buffer) {
+		return AE_BAD_PARAMETER;
+	}
+
+	/* Table must be a DSDT or SSDT */
+
+	if (!ACPI_COMPARE_NAME(table->signature, ACPI_SIG_DSDT) &&
+	    !ACPI_COMPARE_NAME(table->signature, ACPI_SIG_SSDT)) {
+		return AE_BAD_HEADER;
+	}
+
+	/* First AML opcode in the table must be a control method */
+
+	parser_state.aml = buffer + sizeof(struct acpi_table_header);
+	opcode = acpi_ps_peek_opcode(&parser_state);
+	if (opcode != AML_METHOD_OP) {
+		return AE_BAD_PARAMETER;
+	}
+
+	/* Extract method information from the raw AML */
+
+	parser_state.aml += acpi_ps_get_opcode_size(opcode);
+	parser_state.pkg_end = acpi_ps_get_next_package_end(&parser_state);
+	path = acpi_ps_get_next_namestring(&parser_state);
+	method_flags = *parser_state.aml++;
+	aml_start = parser_state.aml;
+	aml_length = ACPI_PTR_DIFF(parser_state.pkg_end, aml_start);
+
+	/*
+	 * Allocate resources up-front. We don't want to have to delete a new
+	 * node from the namespace if we cannot allocate memory.
+	 */
+	aml_buffer = ACPI_ALLOCATE(aml_length);
+	if (!aml_buffer) {
+		return AE_NO_MEMORY;
+	}
+
+	method_obj = acpi_ut_create_internal_object(ACPI_TYPE_METHOD);
+	if (!method_obj) {
+		ACPI_FREE(aml_buffer);
+		return AE_NO_MEMORY;
+	}
+
+	/* Lock namespace for acpi_ns_lookup, we may be creating a new node */
+
+	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+	if (ACPI_FAILURE(status)) {
+		goto error_exit;
+	}
+
+	/* The lookup either returns an existing node or creates a new one */
+
+	status =
+	    acpi_ns_lookup(NULL, path, ACPI_TYPE_METHOD, ACPI_IMODE_LOAD_PASS1,
+			   ACPI_NS_DONT_OPEN_SCOPE | ACPI_NS_ERROR_IF_FOUND,
+			   NULL, &node);
+
+	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+
+	if (ACPI_FAILURE(status)) {	/* ns_lookup */
+		if (status != AE_ALREADY_EXISTS) {
+			goto error_exit;
+		}
+
+		/* Node existed previously, make sure it is a method node */
+
+		if (node->type != ACPI_TYPE_METHOD) {
+			status = AE_TYPE;
+			goto error_exit;
+		}
+	}
+
+	/* Copy the method AML to the local buffer */
+
+	ACPI_MEMCPY(aml_buffer, aml_start, aml_length);
+
+	/* Initialize the method object with the new method's information */
+
+	method_obj->method.aml_start = aml_buffer;
+	method_obj->method.aml_length = aml_length;
+
+	method_obj->method.param_count = (u8)
+	    (method_flags & AML_METHOD_ARG_COUNT);
+
+	method_obj->method.method_flags = (u8)
+	    (method_flags & ~AML_METHOD_ARG_COUNT);
+
+	if (method_flags & AML_METHOD_SERIALIZED) {
+		method_obj->method.sync_level = (u8)
+		    ((method_flags & AML_METHOD_SYNC_LEVEL) >> 4);
+	}
+
+	/*
+	 * Now that it is complete, we can attach the new method object to
+	 * the method Node (detaches/deletes any existing object)
+	 */
+	status = acpi_ns_attach_object(node, method_obj, ACPI_TYPE_METHOD);
+
+	/*
+	 * Flag indicates AML buffer is dynamic, must be deleted later.
+	 * Must be set only after attach above.
+	 */
+	node->flags |= ANOBJ_ALLOCATED_BUFFER;
+
+	/* Remove local reference to the method object */
+
+	acpi_ut_remove_reference(method_obj);
+	return status;
+
+error_exit:
+
+	ACPI_FREE(aml_buffer);
+	ACPI_FREE(method_obj);
+	return status;
+}
+ACPI_EXPORT_SYMBOL(acpi_install_method)
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 60b76a3fb3a..c3b08d3330e 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -201,6 +201,8 @@ acpi_evaluate_object_typed(acpi_handle object,
 acpi_status
 acpi_get_object_info(acpi_handle handle, struct acpi_buffer *return_buffer);
 
+acpi_status acpi_install_method(u8 *buffer);
+
 acpi_status
 acpi_get_next_object(acpi_object_type type,
 		     acpi_handle parent,
-- 
cgit v1.2.3-70-g09d2


From d6a1cd4975a5ffaa21a961be04a469519edf50d6 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 21 May 2009 11:06:53 +0800
Subject: ACPICA: Update version to 20090521.

Update version number.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index c3b08d3330e..82ec6a3c050 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090422
+#define ACPI_CA_VERSION                 0x20090521
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3-70-g09d2


From 78a478d0efd9e86e5345b436e130497b4e5846e8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 26 May 2009 18:50:21 +0000
Subject: gro: Inline skb_gro_header and cache frag0 virtual address

The function skb_gro_header is called four times per packet which
quickly adds up at 10Gb/s.  This patch inlines it to allow better
optimisations.

Some architectures perform multiplication for page_address, which
is done by each skb_gro_header invocation.  This patch caches that
value in skb->cb to avoid the unnecessary multiplications.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 22 +++++++++++++++-------
 net/core/dev.c            | 27 ++++++++++++---------------
 2 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 586b71f0358..61890ed0bcf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1008,6 +1008,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 void netif_napi_del(struct napi_struct *napi);
 
 struct napi_gro_cb {
+	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
+	void *frag0;
+
 	/* This indicates where we are processing relative to skb->data. */
 	int data_offset;
 
@@ -1107,9 +1110,9 @@ extern int		dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
 extern int		netpoll_trap(void);
 #endif
-extern void	      *skb_gro_header(struct sk_buff *skb, unsigned int hlen);
 extern int	       skb_gro_receive(struct sk_buff **head,
 				       struct sk_buff *skb);
+extern void	       skb_gro_reset_offset(struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -1126,23 +1129,28 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
 	NAPI_GRO_CB(skb)->data_offset += len;
 }
 
-static inline void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
 {
-	NAPI_GRO_CB(skb)->data_offset = 0;
+	unsigned int offset = skb_gro_offset(skb);
+
+	hlen += offset;
+	if (!NAPI_GRO_CB(skb)->frag0 ||
+	    unlikely(skb_shinfo(skb)->frags[0].size + skb_headlen(skb) < hlen))
+		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+
+	return NAPI_GRO_CB(skb)->frag0 + offset;
 }
 
 static inline void *skb_gro_mac_header(struct sk_buff *skb)
 {
 	return skb_headlen(skb) ? skb_mac_header(skb) :
-	       page_address(skb_shinfo(skb)->frags[0].page) +
-	       skb_shinfo(skb)->frags[0].page_offset;
+	       NAPI_GRO_CB(skb)->frag0;
 }
 
 static inline void *skb_gro_network_header(struct sk_buff *skb)
 {
 	return skb_headlen(skb) ? skb_network_header(skb) :
-	       page_address(skb_shinfo(skb)->frags[0].page) +
-	       skb_shinfo(skb)->frags[0].page_offset + skb_network_offset(skb);
+	       NAPI_GRO_CB(skb)->frag0 + skb_network_offset(skb);
 }
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 5eb3e48ab31..bdb1a738193 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2390,21 +2390,6 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
-{
-	unsigned int offset = skb_gro_offset(skb);
-
-	hlen += offset;
-	if (unlikely(skb_headlen(skb) ||
-		     skb_shinfo(skb)->frags[0].size < hlen ||
-		     PageHighMem(skb_shinfo(skb)->frags[0].page)))
-		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
-
-	return page_address(skb_shinfo(skb)->frags[0].page) +
-	       skb_shinfo(skb)->frags[0].page_offset + offset;
-}
-EXPORT_SYMBOL(skb_gro_header);
-
 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
@@ -2520,6 +2505,18 @@ int napi_skb_finish(int ret, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(napi_skb_finish);
 
+void skb_gro_reset_offset(struct sk_buff *skb)
+{
+	NAPI_GRO_CB(skb)->data_offset = 0;
+	NAPI_GRO_CB(skb)->frag0 = NULL;
+
+	if (!skb_headlen(skb) && !PageHighMem(skb_shinfo(skb)->frags[0].page))
+		NAPI_GRO_CB(skb)->frag0 =
+			page_address(skb_shinfo(skb)->frags[0].page) +
+			skb_shinfo(skb)->frags[0].page_offset;
+}
+EXPORT_SYMBOL(skb_gro_reset_offset);
+
 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	skb_gro_reset_offset(skb);
-- 
cgit v1.2.3-70-g09d2


From 78d3fd0b7de844a6dad56e9620fc9d2271b32ab9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 26 May 2009 18:50:23 +0000
Subject: gro: Only use skb_gro_header for completely non-linear packets

Currently skb_gro_header is used for packets which put the hardware
header in skb->data with the rest in frags.  Since the drivers that
need this optimisation all provide completely non-linear packets,
we can gain extra optimisations by only performing the frag0
optimisation for completely non-linear packets.

In particular, we can simply test frag0 (instead of skb_headlen)
to see whether the optimisation is in force.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 11 ++++++-----
 net/core/dev.c            |  3 ++-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 61890ed0bcf..d1afc3b6485 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1135,22 +1135,23 @@ static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
 
 	hlen += offset;
 	if (!NAPI_GRO_CB(skb)->frag0 ||
-	    unlikely(skb_shinfo(skb)->frags[0].size + skb_headlen(skb) < hlen))
+	    unlikely(skb_shinfo(skb)->frags[0].size < hlen)) {
+		NAPI_GRO_CB(skb)->frag0 = NULL;
 		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+	}
 
 	return NAPI_GRO_CB(skb)->frag0 + offset;
 }
 
 static inline void *skb_gro_mac_header(struct sk_buff *skb)
 {
-	return skb_headlen(skb) ? skb_mac_header(skb) :
-	       NAPI_GRO_CB(skb)->frag0;
+	return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb);
 }
 
 static inline void *skb_gro_network_header(struct sk_buff *skb)
 {
-	return skb_headlen(skb) ? skb_network_header(skb) :
-	       NAPI_GRO_CB(skb)->frag0 + skb_network_offset(skb);
+	return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
+	       skb_network_offset(skb);
 }
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index bdb1a738193..f9d90c56b6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2510,7 +2510,8 @@ void skb_gro_reset_offset(struct sk_buff *skb)
 	NAPI_GRO_CB(skb)->data_offset = 0;
 	NAPI_GRO_CB(skb)->frag0 = NULL;
 
-	if (!skb_headlen(skb) && !PageHighMem(skb_shinfo(skb)->frags[0].page))
+	if (skb->mac_header == skb->tail &&
+	    !PageHighMem(skb_shinfo(skb)->frags[0].page))
 		NAPI_GRO_CB(skb)->frag0 =
 			page_address(skb_shinfo(skb)->frags[0].page) +
 			skb_shinfo(skb)->frags[0].page_offset;
-- 
cgit v1.2.3-70-g09d2


From 7489594cb249aeb178287c9a43a9e4f366044259 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 26 May 2009 18:50:27 +0000
Subject: gro: Optimise length comparison in skb_gro_header

By caching frag0_len, we can avoid checking both frag0 and the
length separately in skb_gro_header.  This helps as skb_gro_header
is called four times per packet which amounts to a few million
times at 10Gb/s.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++--
 net/core/dev.c            | 5 ++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d1afc3b6485..2e44a049be0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1011,6 +1011,9 @@ struct napi_gro_cb {
 	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
 	void *frag0;
 
+	/* Length of frag0. */
+	unsigned int frag0_len;
+
 	/* This indicates where we are processing relative to skb->data. */
 	int data_offset;
 
@@ -1134,9 +1137,9 @@ static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
 	unsigned int offset = skb_gro_offset(skb);
 
 	hlen += offset;
-	if (!NAPI_GRO_CB(skb)->frag0 ||
-	    unlikely(skb_shinfo(skb)->frags[0].size < hlen)) {
+	if (NAPI_GRO_CB(skb)->frag0_len < hlen) {
 		NAPI_GRO_CB(skb)->frag0 = NULL;
+		NAPI_GRO_CB(skb)->frag0_len = 0;
 		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
 	}
 
diff --git a/net/core/dev.c b/net/core/dev.c
index f9d90c56b6f..b1722a2d1fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2509,12 +2509,15 @@ void skb_gro_reset_offset(struct sk_buff *skb)
 {
 	NAPI_GRO_CB(skb)->data_offset = 0;
 	NAPI_GRO_CB(skb)->frag0 = NULL;
+	NAPI_GRO_CB(skb)->frag0_len = 0;
 
 	if (skb->mac_header == skb->tail &&
-	    !PageHighMem(skb_shinfo(skb)->frags[0].page))
+	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
 		NAPI_GRO_CB(skb)->frag0 =
 			page_address(skb_shinfo(skb)->frags[0].page) +
 			skb_shinfo(skb)->frags[0].page_offset;
+		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+	}
 }
 EXPORT_SYMBOL(skb_gro_reset_offset);
 
-- 
cgit v1.2.3-70-g09d2


From a5b1cf288d4200506ab62fbb86cc81ace948a306 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 26 May 2009 18:50:28 +0000
Subject: gro: Avoid unnecessary comparison after skb_gro_header

For the overwhelming majority of cases, skb_gro_header's return
value cannot be NULL.  Yet we must check it because of its current
form.  This patch splits it up into multiple functions in order
to avoid this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 23 ++++++++++++++---------
 net/core/dev.c            | 17 ++++++++++++-----
 net/ipv4/af_inet.c        | 13 ++++++++++---
 net/ipv4/tcp.c            | 22 ++++++++++++++++------
 net/ipv6/af_inet6.c       | 13 ++++++++++---
 5 files changed, 62 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2e44a049be0..371ece521e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1132,18 +1132,23 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
 	NAPI_GRO_CB(skb)->data_offset += len;
 }
 
-static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
+static inline void *skb_gro_header_fast(struct sk_buff *skb,
+					unsigned int offset)
 {
-	unsigned int offset = skb_gro_offset(skb);
+	return NAPI_GRO_CB(skb)->frag0 + offset;
+}
 
-	hlen += offset;
-	if (NAPI_GRO_CB(skb)->frag0_len < hlen) {
-		NAPI_GRO_CB(skb)->frag0 = NULL;
-		NAPI_GRO_CB(skb)->frag0_len = 0;
-		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
-	}
+static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
+{
+	return NAPI_GRO_CB(skb)->frag0_len < hlen;
+}
 
-	return NAPI_GRO_CB(skb)->frag0 + offset;
+static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
+					unsigned int offset)
+{
+	NAPI_GRO_CB(skb)->frag0 = NULL;
+	NAPI_GRO_CB(skb)->frag0_len = 0;
+	return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
 }
 
 static inline void *skb_gro_mac_header(struct sk_buff *skb)
diff --git a/net/core/dev.c b/net/core/dev.c
index b1722a2d1fb..cd29e613bc5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2590,17 +2590,24 @@ struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi->skb;
 	struct ethhdr *eth;
+	unsigned int hlen;
+	unsigned int off;
 
 	napi->skb = NULL;
 
 	skb_reset_mac_header(skb);
 	skb_gro_reset_offset(skb);
 
-	eth = skb_gro_header(skb, sizeof(*eth));
-	if (!eth) {
-		napi_reuse_skb(napi, skb);
-		skb = NULL;
-		goto out;
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*eth);
+	eth = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		eth = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!eth)) {
+			napi_reuse_skb(napi, skb);
+			skb = NULL;
+			goto out;
+		}
 	}
 
 	skb_gro_pull(skb, sizeof(*eth));
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 170689681aa..644cc553531 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1246,13 +1246,20 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	struct sk_buff **pp = NULL;
 	struct sk_buff *p;
 	struct iphdr *iph;
+	unsigned int hlen;
+	unsigned int off;
 	int flush = 1;
 	int proto;
 	int id;
 
-	iph = skb_gro_header(skb, sizeof(*iph));
-	if (unlikely(!iph))
-		goto out;
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*iph);
+	iph = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		iph = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!iph))
+			goto out;
+	}
 
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 68342d43189..c3dcec5efea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2518,20 +2518,30 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	unsigned int thlen;
 	unsigned int flags;
 	unsigned int mss = 1;
+	unsigned int hlen;
+	unsigned int off;
 	int flush = 1;
 	int i;
 
-	th = skb_gro_header(skb, sizeof(*th));
-	if (unlikely(!th))
-		goto out;
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*th);
+	th = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		th = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!th))
+			goto out;
+	}
 
 	thlen = th->doff * 4;
 	if (thlen < sizeof(*th))
 		goto out;
 
-	th = skb_gro_header(skb, thlen);
-	if (unlikely(!th))
-		goto out;
+	hlen = off + thlen;
+	if (skb_gro_header_hard(skb, hlen)) {
+		th = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!th))
+			goto out;
+	}
 
 	skb_gro_pull(skb, thlen);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 61f55386a23..b6215be0963 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -817,13 +817,20 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	struct sk_buff *p;
 	struct ipv6hdr *iph;
 	unsigned int nlen;
+	unsigned int hlen;
+	unsigned int off;
 	int flush = 1;
 	int proto;
 	__wsum csum;
 
-	iph = skb_gro_header(skb, sizeof(*iph));
-	if (unlikely(!iph))
-		goto out;
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*iph);
+	iph = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		iph = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!iph))
+			goto out;
+	}
 
 	skb_gro_pull(skb, sizeof(*iph));
 	skb_set_transport_header(skb, skb_gro_offset(skb));
-- 
cgit v1.2.3-70-g09d2


From cbf806dd9302f3ff27ba496dae474b9da6b58873 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 27 May 2009 06:22:58 -0700
Subject: Input: ucb1400 - move static function from header into core

it's a little too large for static line.
The ts is currently the only mainline user but Marek Vasut claims that
there is a battery driver in an ARM tree which also needs this function.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/mfd/ucb1400_core.c | 20 ++++++++++++++++++++
 include/linux/ucb1400.h    | 23 ++++-------------------
 2 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index 178159e264c..78c2135c5de 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -23,6 +23,26 @@
 #include <linux/module.h>
 #include <linux/ucb1400.h>
 
+unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
+		int adcsync)
+{
+	unsigned int val;
+
+	if (adcsync)
+		adc_channel |= UCB_ADC_SYNC_ENA;
+
+	ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel);
+	ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel |
+			UCB_ADC_START);
+
+	while (!((val = ucb1400_reg_read(ac97, UCB_ADC_DATA))
+				& UCB_ADC_DAT_VALID))
+		schedule_timeout_uninterruptible(1);
+
+	return val & UCB_ADC_DAT_MASK;
+}
+EXPORT_SYMBOL_GPL(ucb1400_adc_read);
+
 static int ucb1400_core_probe(struct device *dev)
 {
 	int err;
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index 970473bf8d5..ed889f4168f 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -134,28 +134,13 @@ static inline void ucb1400_adc_enable(struct snd_ac97 *ac97)
 	ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA);
 }
 
-static unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
-					int adcsync)
-{
-	unsigned int val;
-
-	if (adcsync)
-		adc_channel |= UCB_ADC_SYNC_ENA;
-
-	ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel);
-	ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA | adc_channel |
-				UCB_ADC_START);
-
-	while (!((val = ucb1400_reg_read(ac97, UCB_ADC_DATA))
-			& UCB_ADC_DAT_VALID))
-		schedule_timeout_uninterruptible(1);
-
-	return val & UCB_ADC_DAT_MASK;
-}
-
 static inline void ucb1400_adc_disable(struct snd_ac97 *ac97)
 {
 	ucb1400_reg_write(ac97, UCB_ADC_CR, 0);
 }
 
+
+unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
+			      int adcsync);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From a17c859849402315613a0015ac8fbf101acf0cc1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 27 May 2009 17:50:35 +0200
Subject: netfilter: conntrack: add support for DCCP handshake sequence to
 ctnetlink

This patch adds CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ that exposes
the u64 handshake sequence number to user-space.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h | 1 +
 include/net/netlink.h                         | 9 +++++++++
 net/netfilter/nf_conntrack_proto_dccp.c       | 7 +++++++
 3 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 1a865e48b8e..ed4ef8d0b11 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -101,6 +101,7 @@ enum ctattr_protoinfo_dccp {
 	CTA_PROTOINFO_DCCP_UNSPEC,
 	CTA_PROTOINFO_DCCP_STATE,
 	CTA_PROTOINFO_DCCP_ROLE,
+	CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
 	__CTA_PROTOINFO_DCCP_MAX,
 };
 #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1)
diff --git a/include/net/netlink.h b/include/net/netlink.h
index eddb50289d6..007bdb07dab 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -939,6 +939,15 @@ static inline u64 nla_get_u64(const struct nlattr *nla)
 	return tmp;
 }
 
+/**
+ * nla_get_be64 - return payload of __be64 attribute
+ * @nla: __be64 netlink attribute
+ */
+static inline __be64 nla_get_be64(const struct nlattr *nla)
+{
+	return *(__be64 *) nla_data(nla);
+}
+
 /**
  * nla_get_flag - return payload of flag attribute
  * @nla: flag netlink attribute
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8e757dd5339..11801c43c8c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -635,6 +635,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 	NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
 	NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
 		   ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
+	NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
+		     cpu_to_be64(ct->proto.dccp.handshake_seq));
 	nla_nest_end(skb, nest_parms);
 	read_unlock_bh(&dccp_lock);
 	return 0;
@@ -647,6 +649,7 @@ nla_put_failure:
 static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
 	[CTA_PROTOINFO_DCCP_STATE]	= { .type = NLA_U8 },
 	[CTA_PROTOINFO_DCCP_ROLE]	= { .type = NLA_U8 },
+	[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
 };
 
 static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -679,6 +682,10 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
 		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
 	}
+	if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
+		ct->proto.dccp.handshake_seq =
+		be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
+	}
 	write_unlock_bh(&dccp_lock);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From fca4217c5bab31019b5247e977673c9fcc385f6b Mon Sep 17 00:00:00 2001
From: Greg Banks <gnb@sgi.com>
Date: Wed, 1 Apr 2009 07:28:13 +1100
Subject: knfsd: reply cache cleanups

Make REQHASH() an inline function.  Rename hash_list to cache_hash.
Fix an obsolete comment.

Signed-off-by: Greg Banks <gnb@sgi.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfscache.c         | 29 +++++++++++++++++++----------
 include/linux/nfsd/cache.h |  3 +--
 2 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 5bfc2ac60d5..6f0aa4989c6 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -29,15 +29,24 @@
  */
 #define CACHESIZE		1024
 #define HASHSIZE		64
-#define REQHASH(xid)		(((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1))
 
-static struct hlist_head *	hash_list;
+static struct hlist_head *	cache_hash;
 static struct list_head 	lru_head;
 static int			cache_disabled = 1;
 
+/*
+ * Calculate the hash index from an XID.
+ */
+static inline u32 request_hash(u32 xid)
+{
+	u32 h = xid;
+	h ^= (xid >> 24);
+	return h & (HASHSIZE-1);
+}
+
 static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 
-/* 
+/*
  * locking for the reply cache:
  * A cache entry is "single use" if c_state == RC_INPROG
  * Otherwise, it when accessing _prev or _next, the lock must be held.
@@ -62,8 +71,8 @@ int nfsd_reply_cache_init(void)
 		i--;
 	}
 
-	hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
-	if (!hash_list)
+	cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+	if (!cache_hash)
 		goto out_nomem;
 
 	cache_disabled = 0;
@@ -88,8 +97,8 @@ void nfsd_reply_cache_shutdown(void)
 
 	cache_disabled = 1;
 
-	kfree (hash_list);
-	hash_list = NULL;
+	kfree (cache_hash);
+	cache_hash = NULL;
 }
 
 /*
@@ -108,7 +117,7 @@ static void
 hash_refile(struct svc_cacherep *rp)
 {
 	hlist_del_init(&rp->c_hash);
-	hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid));
+	hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
 }
 
 /*
@@ -138,7 +147,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	spin_lock(&cache_lock);
 	rtn = RC_DOIT;
 
-	rh = &hash_list[REQHASH(xid)];
+	rh = &cache_hash[request_hash(xid)];
 	hlist_for_each_entry(rp, hn, rh, c_hash) {
 		if (rp->c_state != RC_UNUSED &&
 		    xid == rp->c_xid && proc == rp->c_proc &&
@@ -264,7 +273,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 
 	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
 	len >>= 2;
-	
+
 	/* Don't cache excessive amounts of data and XDR failures */
 	if (!statp || len > (256 >> 2)) {
 		rp->c_state = RC_UNUSED;
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index 5bccaab8105..3a3f58934f5 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -14,8 +14,7 @@
 #include <linux/uio.h>
 
 /*
- * Representation of a reply cache entry. The first two members *must*
- * be hash_next and hash_prev.
+ * Representation of a reply cache entry.
  */
 struct svc_cacherep {
 	struct hlist_node	c_hash;
-- 
cgit v1.2.3-70-g09d2


From ab6bf42e2339580b5d87746d0ff4da4b1578b03e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Wed, 27 May 2009 14:38:34 -0700
Subject: mlx4_core: Add module parameter for number of MTTs per segment

The current MTT allocator uses kmalloc() to allocate a buffer for its
buddy allocator, and thus is limited in the amount of MTT segments
that it can control.  As a result, the size of memory that can be
registered is limited too.  This patch uses a module parameter to
control the number of MTT entries that each segment represents,
allowing more memory to be registered with the same number of
segments.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/net/mlx4/main.c     | 14 ++++++++++++--
 drivers/net/mlx4/mr.c       |  6 +++---
 drivers/net/mlx4/profile.c  |  2 +-
 include/linux/mlx4/device.h |  1 +
 4 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 30bea968969..018348c0119 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -100,6 +100,10 @@ module_param_named(use_prio, use_prio, bool, 0444);
 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
 		  "(0/1, default 0)");
 
+static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)");
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -203,12 +207,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
+	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
 	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
-						    MLX4_MTT_ENTRY_PER_SEG);
+						    dev->caps.mtts_per_seg);
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
-	dev->caps.mtt_entry_sz	     = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
+	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
@@ -1304,6 +1309,11 @@ static int __init mlx4_verify_params(void)
 		return -1;
 	}
 
+	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
+		printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 0caf74cae8b..3b8973d1993 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -209,7 +209,7 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 	} else
 		mtt->page_shift = page_shift;
 
-	for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1)
+	for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
 		++mtt->order;
 
 	mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
@@ -350,7 +350,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
 						   MLX4_MPT_PD_FLAG_RAE);
 		mpt_entry->mtt_sz    = cpu_to_be32((1 << mr->mtt.order) *
-						   MLX4_MTT_ENTRY_PER_SEG);
+						   dev->caps.mtts_per_seg);
 	} else {
 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
 	}
@@ -391,7 +391,7 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 	    (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
 		return -EINVAL;
 
-	if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1))
+	if (start_index & (dev->caps.mtts_per_seg - 1))
 		return -EINVAL;
 
 	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index cebdf3243ca..bd22df95adf 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c
@@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_EQ].size     = dev_cap->eqc_entry_sz;
 	profile[MLX4_RES_DMPT].size   = dev_cap->dmpt_entry_sz;
 	profile[MLX4_RES_CMPT].size   = dev_cap->cmpt_entry_sz;
-	profile[MLX4_RES_MTT].size    = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
+	profile[MLX4_RES_MTT].size    = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
 	profile[MLX4_RES_MCG].size    = MLX4_MGM_ENTRY_SIZE;
 
 	profile[MLX4_RES_QP].num      = request->num_qp;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3aff8a6a389..ce7cc6c7bcb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -210,6 +210,7 @@ struct mlx4_caps {
 	int			num_comp_vectors;
 	int			num_mpts;
 	int			num_mtt_segs;
+	int			mtts_per_seg;
 	int			fmr_reserved_mtts;
 	int			reserved_mtts;
 	int			reserved_mrws;
-- 
cgit v1.2.3-70-g09d2


From 1ce8e7b57b3a4527ef83da1c5c7bd8a6b9d87b56 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 27 May 2009 04:42:37 +0000
Subject: net: ALIGN/PTR_ALIGN cleanup in alloc_netdev_mq()/netdev_priv()

Use ALIGN() and PTR_ALIGN() macros instead of handcoding them.

Get rid of NETDEV_ALIGN_CONST ugly define

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +----
 net/core/dev.c            | 9 ++++-----
 net/mac80211/main.c       | 8 ++------
 3 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 371ece521e5..14efce33c00 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -905,7 +905,6 @@ struct net_device
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
 #define	NETDEV_ALIGN		32
-#define	NETDEV_ALIGN_CONST	(NETDEV_ALIGN - 1)
 
 static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
@@ -976,9 +975,7 @@ static inline bool netdev_uses_trailer_tags(struct net_device *dev)
  */
 static inline void *netdev_priv(const struct net_device *dev)
 {
-	return (char *)dev + ((sizeof(struct net_device)
-			       + NETDEV_ALIGN_CONST)
-			      & ~NETDEV_ALIGN_CONST);
+	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
 }
 
 /* Set the sysfs physical device reference for the network logical device
diff --git a/net/core/dev.c b/net/core/dev.c
index ed4550fd9ec..32ceee17896 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4988,18 +4988,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	struct netdev_queue *tx;
 	struct net_device *dev;
 	size_t alloc_size;
-	void *p;
+	struct net_device *p;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
-		alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
 		alloc_size += sizeof_priv;
 	}
 	/* ensure 32-byte alignment of whole construct */
-	alloc_size += NETDEV_ALIGN_CONST;
+	alloc_size += NETDEV_ALIGN - 1;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
@@ -5014,8 +5014,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
-	dev = (struct net_device *)
-		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
 	if (dev_addr_init(dev))
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6b7e92eaab4..e37770ced53 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -735,9 +735,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	 * +-------------------------+
 	 *
 	 */
-	priv_size = ((sizeof(struct ieee80211_local) +
-		      NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST) +
-		    priv_data_len;
+	priv_size = ALIGN(sizeof(*local), NETDEV_ALIGN) + priv_data_len;
 
 	wiphy = wiphy_new(&mac80211_config_ops, priv_size);
 
@@ -754,9 +752,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	local->hw.wiphy = wiphy;
 
-	local->hw.priv = (char *)local +
-			 ((sizeof(struct ieee80211_local) +
-			   NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+	local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);
 
 	BUG_ON(!ops->tx);
 	BUG_ON(!ops->start);
-- 
cgit v1.2.3-70-g09d2


From 385a154cac8763284f65cdfa25f6796c9eb1ca21 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 27 May 2009 15:48:07 -0700
Subject: net: correct a comment for the final #endif

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 14efce33c00..8e03b06e638 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1975,4 +1975,4 @@ static inline u32 dev_ethtool_get_flags(struct net_device *dev)
 }
 #endif /* __KERNEL__ */
 
-#endif	/* _LINUX_DEV_H */
+#endif	/* _LINUX_NETDEVICE_H */
-- 
cgit v1.2.3-70-g09d2


From f2aebaee653a35b01c3665de2cbb1e31456b8ea8 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Wed, 27 May 2009 21:36:02 +0800
Subject: ftrace: don't convert function's local variable name in macro

"call" is an argument of macro, but it is also used as a local
variable name of function in macro.
We should keep this local variable name distinct from any
CPP macro parameter name if both are in the same macro scope,
although it hasn't caused any problem yet.

[ Impact: robustify macro ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/ftrace.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 87fc227c6fb..b4ec83ae711 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -397,19 +397,19 @@ static void ftrace_profile_##call(proto)				\
 	perf_tpcounter_event(event_##call.id);				\
 }									\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
 {									\
 	int ret = 0;							\
 									\
-	if (!atomic_inc_return(&call->profile_count))			\
+	if (!atomic_inc_return(&event_call->profile_count))		\
 		ret = register_trace_##call(ftrace_profile_##call);	\
 									\
 	return ret;							\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 {									\
-	if (atomic_add_negative(-1, &call->profile_count))		\
+	if (atomic_add_negative(-1, &event_call->profile_count))	\
 		unregister_trace_##call(ftrace_profile_##call);		\
 }
 
@@ -433,9 +433,9 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
 #define __array(type, item, len)
 
 #undef __string
-#define __string(item, src)						       \
-	__str_offsets.item = __str_size +				       \
-			     offsetof(typeof(*entry), __str_data);	       \
+#define __string(item, src)						\
+	__str_offsets.item = __str_size +				\
+			     offsetof(typeof(*entry), __str_data);	\
 	__str_size += strlen(src) + 1;
 
 #undef __assign_str
@@ -451,8 +451,8 @@ static struct ftrace_event_call event_##call;				\
 									\
 static void ftrace_raw_event_##call(proto)				\
 {									\
-	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;  \
-	struct ftrace_event_call *call = &event_##call;			\
+	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;	\
+	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	unsigned long irq_flags;					\
@@ -473,7 +473,7 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 	assign;								\
 									\
-	if (!filter_current_check_discard(call, entry, event))		\
+	if (!filter_current_check_discard(event_call, entry, event))	\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
 }									\
 									\
-- 
cgit v1.2.3-70-g09d2


From b9417f84e17b93a6976a8a88b38bf9567975cb38 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Apr 2009 16:33:36 -0600
Subject: ACPI: use LNXCPU, not ACPI_CPU, for Linux-specific processor _HID

ACPI_PROCESSOR_OBJECT_HID is a synthetic _HID that Linux generates
for "Processor" definitions.  Unlike "Device" definitions, "Processor"
definitions do not have a _HID in the namespace, so we generate a
fake _HID.  By convention, all these fake _HIDs begin with "LNX".

This does change the user-visible _HID for "Processor" objects --
previously, we used "ACPI_CPU" and this changes that to "LNXCPU",
which starts with "LNX" as do all the other made-up _HIDs.  This
change is visible in processor filenames and "hid" files under
/sys/devices/LNXSYSTM:00/.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_drivers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 0352c8f0b05..c9922b36200 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -57,7 +57,7 @@
  */
 
 #define ACPI_POWER_HID			"LNXPOWER"
-#define ACPI_PROCESSOR_OBJECT_HID	"ACPI_CPU"
+#define ACPI_PROCESSOR_OBJECT_HID	"LNXCPU"
 #define ACPI_PROCESSOR_HID		"ACPI0007"
 #define ACPI_SYSTEM_HID			"LNXSYSTM"
 #define ACPI_THERMAL_HID		"LNXTHERM"
-- 
cgit v1.2.3-70-g09d2


From 9eccbc2f67efd0d19c47f40182abf2965c287add Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 27 Apr 2009 16:33:46 -0600
Subject: ACPI: processor: move device _HID into driver

The ACPI0007 _HID used for processor "Device" objects in the namespace
is not needed outside the processor driver, so move it there.  Also, the
#define is only used once, so just remove it and hard-code "ACPI0007".

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_core.c | 2 +-
 include/acpi/acpi_drivers.h   | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index cf627d64cde..cabff4cb21f 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -89,7 +89,7 @@ static int acpi_processor_handle_eject(struct acpi_processor *pr);
 
 static const struct acpi_device_id processor_device_ids[] = {
 	{ACPI_PROCESSOR_OBJECT_HID, 0},
-	{ACPI_PROCESSOR_HID, 0},
+	{"ACPI0007", 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, processor_device_ids);
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index c9922b36200..5e8ed3a78cd 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -58,7 +58,6 @@
 
 #define ACPI_POWER_HID			"LNXPOWER"
 #define ACPI_PROCESSOR_OBJECT_HID	"LNXCPU"
-#define ACPI_PROCESSOR_HID		"ACPI0007"
 #define ACPI_SYSTEM_HID			"LNXSYSTM"
 #define ACPI_THERMAL_HID		"LNXTHERM"
 #define ACPI_BUTTON_HID_POWERF		"LNXPWRBN"
-- 
cgit v1.2.3-70-g09d2


From d3e78ee3d015dac1794433abb6403b6fc8e70e10 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 28 May 2009 11:41:50 +0200
Subject: perf_counter: Fix perf_counter_init_task() on !CONFIG_PERF_COUNTERS

Pointed out by compiler warnings:

   tip/include/linux/perf_counter.h:644: warning: no return statement in function returning non-void

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2b16ed37b74..a65ddc58051 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -641,7 +641,7 @@ perf_counter_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline int perf_counter_init_task(struct task_struct *child)	{ }
+static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
-- 
cgit v1.2.3-70-g09d2


From c93f7669098eb97c5376e5396e3dfb734c17df4f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 28 May 2009 22:18:17 +1000
Subject: perf_counter: Fix race in attaching counters to tasks and exiting

Commit 564c2b21 ("perf_counter: Optimize context switch between
identical inherited contexts") introduced a race where it is possible
that a counter being attached to a task could get attached to the
wrong task, if the task is one that has inherited its context from
another task via fork.  This happens because the optimized context
switch could switch the context to another task after find_get_context
has read task->perf_counter_ctxp.  In fact, it's possible that the
context could then get freed, if the other task then exits.

This fixes the problem by protecting both the context switch and the
critical code in find_get_context with spinlocks.  The context switch
locks the cxt->lock of both the outgoing and incoming contexts before
swapping them.  That means that once code such as find_get_context
has obtained the spinlock for the context associated with a task,
the context can't get swapped to another task.  However, the context
may have been swapped in the interval between reading
task->perf_counter_ctxp and getting the lock, so it is necessary to
check and retry.

To make sure that none of the contexts being looked at in
find_get_context can get freed, this changes the context freeing code
to use RCU.  Thus an rcu_read_lock() is sufficient to ensure that no
contexts can get freed.  This part of the patch is lifted from a patch
posted by Peter Zijlstra.

This also adds a check to make sure that we can't add a counter to a
task that is exiting.

There is also a race between perf_counter_exit_task and
find_get_context; this solves the race by moving the get_ctx that
was in perf_counter_alloc into the locked region in find_get_context,
so that once find_get_context has got the context for a task, it
won't get freed even if the task calls perf_counter_exit_task.  It
doesn't matter if new top-level (non-inherited) counters get attached
to the context after perf_counter_exit_task has detached the context
from the task.  They will just stay there and never get scheduled in
until the counters' fds get closed, and then perf_release will remove
them from the context and eventually free the context.

With this, we are now doing the unclone in find_get_context rather
than when a counter was added to or removed from a context (actually,
we were missing the unclone_ctx() call when adding a counter to a
context).  We don't need to unclone when removing a counter from a
context because we have no way to remove a counter from a cloned
context.

This also takes out the smp_wmb() in find_get_context, which Peter
Zijlstra pointed out was unnecessary because the cmpxchg implies a
full barrier anyway.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18974.33033.667187.273886@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |   5 +-
 kernel/perf_counter.c        | 205 ++++++++++++++++++++++++++++++-------------
 2 files changed, 148 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a65ddc58051..717bf3b59ba 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -541,8 +541,9 @@ struct perf_counter_context {
 	 * been cloned (inherited) from a common ancestor.
 	 */
 	struct perf_counter_context *parent_ctx;
-	u32			parent_gen;
-	u32			generation;
+	u64			parent_gen;
+	u64			generation;
+	struct rcu_head		rcu_head;
 };
 
 /**
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 367299f91aa..52e5a15321d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -103,12 +103,22 @@ static void get_ctx(struct perf_counter_context *ctx)
 	atomic_inc(&ctx->refcount);
 }
 
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_counter_context *ctx;
+
+	ctx = container_of(head, struct perf_counter_context, rcu_head);
+	kfree(ctx);
+}
+
 static void put_ctx(struct perf_counter_context *ctx)
 {
 	if (atomic_dec_and_test(&ctx->refcount)) {
 		if (ctx->parent_ctx)
 			put_ctx(ctx->parent_ctx);
-		kfree(ctx);
+		if (ctx->task)
+			put_task_struct(ctx->task);
+		call_rcu(&ctx->rcu_head, free_ctx);
 	}
 }
 
@@ -211,22 +221,6 @@ group_sched_out(struct perf_counter *group_counter,
 		cpuctx->exclusive = 0;
 }
 
-/*
- * Mark this context as not being a clone of another.
- * Called when counters are added to or removed from this context.
- * We also increment our generation number so that anything that
- * was cloned from this context before this will not match anything
- * cloned from this context after this.
- */
-static void unclone_ctx(struct perf_counter_context *ctx)
-{
-	++ctx->generation;
-	if (!ctx->parent_ctx)
-		return;
-	put_ctx(ctx->parent_ctx);
-	ctx->parent_ctx = NULL;
-}
-
 /*
  * Cross CPU call to remove a performance counter
  *
@@ -281,13 +275,19 @@ static void __perf_counter_remove_from_context(void *info)
  *
  * CPU counters are removed with a smp call. For task counters we only
  * call when the task is on a CPU.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This is OK when called from perf_release since
+ * that only calls us on the top-level context, which can't be a clone.
+ * When called from perf_counter_exit_task, it's OK because the
+ * context has been detached from its task.
  */
 static void perf_counter_remove_from_context(struct perf_counter *counter)
 {
 	struct perf_counter_context *ctx = counter->ctx;
 	struct task_struct *task = ctx->task;
 
-	unclone_ctx(ctx);
 	if (!task) {
 		/*
 		 * Per cpu counters are removed via an smp call and
@@ -410,6 +410,16 @@ static void __perf_counter_disable(void *info)
 
 /*
  * Disable a counter.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This condition is satisifed when called through
+ * perf_counter_for_each_child or perf_counter_for_each because they
+ * hold the top-level counter's child_mutex, so any descendant that
+ * goes to exit will block in sync_child_counter.
+ * When called from perf_pending_counter it's OK because counter->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_counter_task_sched_out for this context.
  */
 static void perf_counter_disable(struct perf_counter *counter)
 {
@@ -794,6 +804,12 @@ static void __perf_counter_enable(void *info)
 
 /*
  * Enable a counter.
+ *
+ * If counter->ctx is a cloned context, callers must make sure that
+ * every task struct that counter->ctx->task could possibly point to
+ * remains valid.  This condition is satisfied when called through
+ * perf_counter_for_each_child or perf_counter_for_each as described
+ * for perf_counter_disable.
  */
 static void perf_counter_enable(struct perf_counter *counter)
 {
@@ -923,7 +939,9 @@ void perf_counter_task_sched_out(struct task_struct *task,
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_counter_context *ctx = task->perf_counter_ctxp;
 	struct perf_counter_context *next_ctx;
+	struct perf_counter_context *parent;
 	struct pt_regs *regs;
+	int do_switch = 1;
 
 	regs = task_pt_regs(task);
 	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
@@ -932,18 +950,39 @@ void perf_counter_task_sched_out(struct task_struct *task,
 		return;
 
 	update_context_time(ctx);
+
+	rcu_read_lock();
+	parent = rcu_dereference(ctx->parent_ctx);
 	next_ctx = next->perf_counter_ctxp;
-	if (next_ctx && context_equiv(ctx, next_ctx)) {
-		task->perf_counter_ctxp = next_ctx;
-		next->perf_counter_ctxp = ctx;
-		ctx->task = next;
-		next_ctx->task = task;
-		return;
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+		/*
+		 * Looks like the two contexts are clones, so we might be
+		 * able to optimize the context switch.  We lock both
+		 * contexts and check that they are clones under the
+		 * lock (including re-checking that neither has been
+		 * uncloned in the meantime).  It doesn't matter which
+		 * order we take the locks because no other cpu could
+		 * be trying to lock both of these tasks.
+		 */
+		spin_lock(&ctx->lock);
+		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		if (context_equiv(ctx, next_ctx)) {
+			task->perf_counter_ctxp = next_ctx;
+			next->perf_counter_ctxp = ctx;
+			ctx->task = next;
+			next_ctx->task = task;
+			do_switch = 0;
+		}
+		spin_unlock(&next_ctx->lock);
+		spin_unlock(&ctx->lock);
 	}
+	rcu_read_unlock();
 
-	__perf_counter_sched_out(ctx, cpuctx);
-
-	cpuctx->task_ctx = NULL;
+	if (do_switch) {
+		__perf_counter_sched_out(ctx, cpuctx);
+		cpuctx->task_ctx = NULL;
+	}
 }
 
 static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
@@ -1215,18 +1254,13 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	ctx->task = task;
 }
 
-static void put_context(struct perf_counter_context *ctx)
-{
-	if (ctx->task)
-		put_task_struct(ctx->task);
-}
-
 static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
-	struct perf_counter_context *tctx;
+	struct perf_counter_context *parent_ctx;
 	struct task_struct *task;
+	int err;
 
 	/*
 	 * If cpu is not a wildcard then this is a percpu counter:
@@ -1249,6 +1283,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
+		get_ctx(ctx);
 
 		return ctx;
 	}
@@ -1265,37 +1300,79 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	if (!task)
 		return ERR_PTR(-ESRCH);
 
+	/*
+	 * Can't attach counters to a dying task.
+	 */
+	err = -ESRCH;
+	if (task->flags & PF_EXITING)
+		goto errout;
+
 	/* Reuse ptrace permission checks for now. */
-	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
-		put_task_struct(task);
-		return ERR_PTR(-EACCES);
+	err = -EACCES;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto errout;
+
+ retry_lock:
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_counter_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_counter_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more and we can
+		 * unclone it if necessary.
+		 * Once it's not a clone things will be stable.
+		 */
+		spin_lock_irq(&ctx->lock);
+		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
+			spin_unlock_irq(&ctx->lock);
+			goto retry;
+		}
+		parent_ctx = ctx->parent_ctx;
+		if (parent_ctx) {
+			put_ctx(parent_ctx);
+			ctx->parent_ctx = NULL;		/* no longer a clone */
+		}
+		++ctx->generation;
+		/*
+		 * Get an extra reference before dropping the lock so that
+		 * this context won't get freed if the task exits.
+		 */
+		get_ctx(ctx);
+		spin_unlock_irq(&ctx->lock);
 	}
+	rcu_read_unlock();
 
-	ctx = task->perf_counter_ctxp;
 	if (!ctx) {
 		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-		if (!ctx) {
-			put_task_struct(task);
-			return ERR_PTR(-ENOMEM);
-		}
+		err = -ENOMEM;
+		if (!ctx)
+			goto errout;
 		__perf_counter_init_context(ctx, task);
-		/*
-		 * Make sure other cpus see correct values for *ctx
-		 * once task->perf_counter_ctxp is visible to them.
-		 */
-		smp_wmb();
-		tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx);
-		if (tctx) {
+		get_ctx(ctx);
+		if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
 			/*
 			 * We raced with some other task; use
 			 * the context they set.
 			 */
 			kfree(ctx);
-			ctx = tctx;
+			goto retry_lock;
 		}
+		get_task_struct(task);
 	}
 
+	put_task_struct(task);
 	return ctx;
+
+ errout:
+	put_task_struct(task);
+	return ERR_PTR(err);
 }
 
 static void free_counter_rcu(struct rcu_head *head)
@@ -1303,7 +1380,6 @@ static void free_counter_rcu(struct rcu_head *head)
 	struct perf_counter *counter;
 
 	counter = container_of(head, struct perf_counter, rcu_head);
-	put_ctx(counter->ctx);
 	kfree(counter);
 }
 
@@ -1324,6 +1400,7 @@ static void free_counter(struct perf_counter *counter)
 	if (counter->destroy)
 		counter->destroy(counter);
 
+	put_ctx(counter->ctx);
 	call_rcu(&counter->rcu_head, free_counter_rcu);
 }
 
@@ -1347,7 +1424,6 @@ static int perf_release(struct inode *inode, struct file *file)
 	put_task_struct(counter->owner);
 
 	free_counter(counter);
-	put_context(ctx);
 
 	return 0;
 }
@@ -1437,6 +1513,12 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter,
 	mutex_unlock(&ctx->mutex);
 }
 
+/*
+ * Holding the top-level counter's child_mutex means that any
+ * descendant process that has inherited this counter will block
+ * in sync_child_counter if it goes to exit, thus satisfying the
+ * task existence requirements of perf_counter_enable/disable.
+ */
 static void perf_counter_for_each_child(struct perf_counter *counter,
 					void (*func)(struct perf_counter *))
 {
@@ -3124,8 +3206,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	counter->ctx			= ctx;
 	counter->oncpu			= -1;
 
-	get_ctx(ctx);
-
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	if (hw_event->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
@@ -3290,7 +3370,7 @@ err_free_put_context:
 	kfree(counter);
 
 err_put_context:
-	put_context(ctx);
+	put_ctx(ctx);
 
 	goto out_fput;
 }
@@ -3322,6 +3402,7 @@ inherit_counter(struct perf_counter *parent_counter,
 					   group_leader, GFP_KERNEL);
 	if (IS_ERR(child_counter))
 		return child_counter;
+	get_ctx(child_ctx);
 
 	/*
 	 * Make the child state follow the state of the parent counter,
@@ -3439,11 +3520,6 @@ __perf_counter_exit_task(struct task_struct *child,
 
 /*
  * When a child task exits, feed back counter values to parent counters.
- *
- * Note: we may be running in child context, but the PID is not hashed
- * anymore so new counters will not be added.
- * (XXX not sure that is true when we get called from flush_old_exec.
- *  -- paulus)
  */
 void perf_counter_exit_task(struct task_struct *child)
 {
@@ -3458,7 +3534,15 @@ void perf_counter_exit_task(struct task_struct *child)
 
 	local_irq_save(flags);
 	__perf_counter_task_sched_out(child_ctx);
+
+	/*
+	 * Take the context lock here so that if find_get_context is
+	 * reading child->perf_counter_ctxp, we wait until it has
+	 * incremented the context's refcount before we do put_ctx below.
+	 */
+	spin_lock(&child_ctx->lock);
 	child->perf_counter_ctxp = NULL;
+	spin_unlock(&child_ctx->lock);
 	local_irq_restore(flags);
 
 	mutex_lock(&child_ctx->mutex);
@@ -3513,6 +3597,7 @@ int perf_counter_init_task(struct task_struct *child)
 
 	__perf_counter_init_context(child_ctx, child);
 	child->perf_counter_ctxp = child_ctx;
+	get_task_struct(child);
 
 	/*
 	 * Lock the parent list. No need to lock the child - not PID
-- 
cgit v1.2.3-70-g09d2


From 7f0333eb2f98bbfece4fbfe21076d0a3e49f0bb0 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
Date: Wed, 13 May 2009 06:20:29 -0700
Subject: wimax: Add netlink interface to get device state

wimax connection manager / daemon has to know what is current
state of the device. Previously it was only possible to get
notification whet state has changed.

Note:

 By mistake, the new generic netlink's number for
 WIMAX_GNL_OP_STATE_GET was declared inserting into the existing list
 of API calls, not appending; thus, it'd break existing API.

 Fixed by Inaky Perez-Gonzalez <inaky@linux.intel.com> by moving to
 the tail, where we add to the interface, not modify the interface.

 Thanks to Stephen Hemminger <shemminger@vyatta.com> for catching this.

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
---
 include/linux/wimax.h    |  5 +++
 net/wimax/Makefile       |  1 +
 net/wimax/debug-levels.h |  1 +
 net/wimax/debugfs.c      |  1 +
 net/wimax/op-state-get.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wimax/stack.c        |  5 ++-
 6 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 net/wimax/op-state-get.c

(limited to 'include')

diff --git a/include/linux/wimax.h b/include/linux/wimax.h
index c89de7f4e5b..2f7a6b77d6b 100644
--- a/include/linux/wimax.h
+++ b/include/linux/wimax.h
@@ -78,6 +78,7 @@ enum {
 	WIMAX_GNL_OP_RFKILL,	/* Run wimax_rfkill() */
 	WIMAX_GNL_OP_RESET,	/* Run wimax_rfkill() */
 	WIMAX_GNL_RE_STATE_CHANGE,	/* Report: status change */
+	WIMAX_GNL_OP_STATE_GET,		/* Request for current state */
 };
 
 
@@ -113,6 +114,10 @@ enum {
 	WIMAX_GNL_RESET_IFIDX = 1,
 };
 
+/* Atributes for wimax_state_get() */
+enum {
+	WIMAX_GNL_STGET_IFIDX = 1,
+};
 
 /*
  * Attributes for the Report State Change
diff --git a/net/wimax/Makefile b/net/wimax/Makefile
index 5b80b941c2c..8f1510d0cc2 100644
--- a/net/wimax/Makefile
+++ b/net/wimax/Makefile
@@ -6,6 +6,7 @@ wimax-y :=		\
 	op-msg.o	\
 	op-reset.o	\
 	op-rfkill.o	\
+	op-state-get.o	\
 	stack.o
 
 wimax-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/net/wimax/debug-levels.h b/net/wimax/debug-levels.h
index 1c29123a3aa..0975adba6b7 100644
--- a/net/wimax/debug-levels.h
+++ b/net/wimax/debug-levels.h
@@ -36,6 +36,7 @@ enum d_module {
 	D_SUBMODULE_DECLARE(op_msg),
 	D_SUBMODULE_DECLARE(op_reset),
 	D_SUBMODULE_DECLARE(op_rfkill),
+	D_SUBMODULE_DECLARE(op_state_get),
 	D_SUBMODULE_DECLARE(stack),
 };
 
diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c
index 94d216a4640..6c9bedb7431 100644
--- a/net/wimax/debugfs.c
+++ b/net/wimax/debugfs.c
@@ -61,6 +61,7 @@ int wimax_debugfs_add(struct wimax_dev *wimax_dev)
 	__debugfs_register("wimax_dl_", op_msg, dentry);
 	__debugfs_register("wimax_dl_", op_reset, dentry);
 	__debugfs_register("wimax_dl_", op_rfkill, dentry);
+	__debugfs_register("wimax_dl_", op_state_get, dentry);
 	__debugfs_register("wimax_dl_", stack, dentry);
 	result = 0;
 out:
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
new file mode 100644
index 00000000000..a76b8fcb056
--- /dev/null
+++ b/net/wimax/op-state-get.c
@@ -0,0 +1,86 @@
+/*
+ * Linux WiMAX
+ * Implement and export a method for getting a WiMAX device current state
+ *
+ * Copyright (C) 2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on previous WiMAX core work by:
+ *  Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com>
+ *  Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <net/wimax.h>
+#include <net/genetlink.h>
+#include <linux/wimax.h>
+#include <linux/security.h>
+#include "wimax-internal.h"
+
+#define D_SUBMODULE op_state_get
+#include "debug-levels.h"
+
+
+static const
+struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = {
+	[WIMAX_GNL_STGET_IFIDX] = {
+		.type = NLA_U32,
+	},
+};
+
+
+/*
+ * Exporting to user space over generic netlink
+ *
+ * Parse the state get command from user space, return a combination
+ * value that describe the current state.
+ *
+ * No attributes.
+ */
+static
+int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
+{
+	int result, ifindex;
+	struct wimax_dev *wimax_dev;
+	struct device *dev;
+
+	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
+	result = -ENODEV;
+	if (info->attrs[WIMAX_GNL_STGET_IFIDX] == NULL) {
+		printk(KERN_ERR "WIMAX_GNL_OP_STATE_GET: can't find IFIDX "
+			"attribute\n");
+		goto error_no_wimax_dev;
+	}
+	ifindex = nla_get_u32(info->attrs[WIMAX_GNL_STGET_IFIDX]);
+	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
+	if (wimax_dev == NULL)
+		goto error_no_wimax_dev;
+	dev = wimax_dev_to_dev(wimax_dev);
+	/* Execute the operation and send the result back to user space */
+	result = wimax_state_get(wimax_dev);
+	dev_put(wimax_dev->net_dev);
+error_no_wimax_dev:
+	d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
+	return result;
+}
+
+
+struct genl_ops wimax_gnl_state_get = {
+	.cmd = WIMAX_GNL_OP_STATE_GET,
+	.flags = GENL_ADMIN_PERM,
+	.policy = wimax_gnl_state_get_policy,
+	.doit = wimax_gnl_doit_state_get,
+	.dumpit = NULL,
+};
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 933e1422b09..79fb7d7c640 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -402,13 +402,15 @@ EXPORT_SYMBOL_GPL(wimax_dev_init);
 extern struct genl_ops
 	wimax_gnl_msg_from_user,
 	wimax_gnl_reset,
-	wimax_gnl_rfkill;
+	wimax_gnl_rfkill,
+	wimax_gnl_state_get;
 
 static
 struct genl_ops *wimax_gnl_ops[] = {
 	&wimax_gnl_msg_from_user,
 	&wimax_gnl_reset,
 	&wimax_gnl_rfkill,
+	&wimax_gnl_state_get,
 };
 
 
@@ -533,6 +535,7 @@ struct d_level D_LEVEL[] = {
 	D_SUBMODULE_DEFINE(op_msg),
 	D_SUBMODULE_DEFINE(op_reset),
 	D_SUBMODULE_DEFINE(op_rfkill),
+	D_SUBMODULE_DEFINE(op_state_get),
 	D_SUBMODULE_DEFINE(stack),
 };
 size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-- 
cgit v1.2.3-70-g09d2


From 7481806dcfd07e9a636155554f6f4b4fbd976381 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Fri, 22 May 2009 00:41:11 -0700
Subject: wimax: a new API call was added, increment minor protocol version
 number

As the 'state_get' API call was added, we need to increase the minor
protocol version number so applications that depend on the can check
it's presence.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 include/linux/wimax.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wimax.h b/include/linux/wimax.h
index 2f7a6b77d6b..4fdcc563551 100644
--- a/include/linux/wimax.h
+++ b/include/linux/wimax.h
@@ -59,7 +59,7 @@ enum {
 	 * M - Major: change if removing or modifying an existing call.
 	 * m - minor: change when adding a new call
 	 */
-	WIMAX_GNL_VERSION = 00,
+	WIMAX_GNL_VERSION = 01,
 	/* Generic NetLink attributes */
 	WIMAX_GNL_ATTR_INVALID = 0x00,
 	WIMAX_GNL_ATTR_MAX = 10,
-- 
cgit v1.2.3-70-g09d2


From 5d4e039b2cb1ca4de9774344ea7b61ad7fa1b0a1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 28 May 2009 01:05:00 +0000
Subject: bonding: allow bond in mode balance-alb to work properly in bridge
 -try4.3

[PATCH net-next] bonding: allow bond in mode balance-alb to work properly in bridge -try4.3

(updated)
changes v4.2 -> v4.3
- memcpy the address always, not just in case it differs from master->dev_addr
- compare_ether_addr_64bits() is not used so there is no direct need to make new
  header file (I think it would be good to have bond stuff in separate file
  anyway).

changes v4.1 -> v4.2
- use skb->pkt_type == PACKET_HOST compare rather then comparing skb dest addr
  against skb->dev->dev_addr

The problem is described in following bugzilla:
https://bugzilla.redhat.com/show_bug.cgi?id=487763

Basically here's what's going on. In every mode, bonding interface uses the same
mac address for all enslaved devices (except fail_over_mac). Only balance-alb
will simultaneously use multiple MAC addresses across different slaves. When you
put this kind of bond device into a bridge it will only add one of mac adresses
into a hash list of mac addresses, say X. This mac address is marked as local.
But this bonding interface also has mac address Y. Now then packet arrives with
destination address Y, this address is not marked as local and the packed looks
like it needs to be forwarded. This packet is then lost which is wrong.

Notice that interfaces can be added and removed from bond while it is in bridge.

***
When the multiple addresses for bridge port approach failed to solve this issue
due to STP I started to think other way to solve this. I returned to previous
solution but tweaked one.

This patch solves the situation in the bonding without touching bridge code.
For every incoming frame to bonding the destination address is compared to
current address of the slave device from which tha packet came. If these two
match destination address is replaced by mac address of the master. This address
is known by bridge so it is delivered properly. Note that the comparsion is not
made directly, it's used skb->pkt_type == PACKET_HOST instead. This is "set"
previously in eth_type_trans().

I experimentally tried that this works as good as searching through the slave
list (v4 of this patch).

Jirka

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8e03b06e638..1eaf5ae14fe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1917,6 +1917,16 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+					      struct net_device *master)
+{
+	if (skb->pkt_type == PACKET_HOST) {
+		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+		memcpy(dest, master->dev_addr, ETH_ALEN);
+	}
+}
+
 /* On bonding slaves other than the currently active slave, suppress
  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
  * ARP on active-backup slaves with arp_validate enabled.
@@ -1930,6 +1940,14 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 		if (master->priv_flags & IFF_MASTER_ARPMON)
 			dev->last_rx = jiffies;
 
+		if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+			/* Do address unmangle. The local destination address
+			 * will be always the one master has. Provides the right
+			 * functionality in a bridge.
+			 */
+			skb_bond_set_mac_by_master(skb, master);
+		}
+
 		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
 			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
 			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-- 
cgit v1.2.3-70-g09d2


From 8bea869c5e56234990e6bad92a543437115bfc18 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Mon, 27 Apr 2009 09:44:40 +0200
Subject: ALSA: PCM midlevel: improve fifo_size handling

Move the fifo_size assignment to hw->ioctl callback to allow lowlevel
drivers overwrite the default behaviour.

fifo_size is in frames not bytes as specified in asound.h and alsa-lib's
documentation, but most hardware have fixed byte based FIFOs. Introduce
internal SNDRV_PCM_INFO_FIFO_IN_FRAMES.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/asound.h  |  1 +
 include/sound/pcm.h     |  1 +
 sound/core/pcm_lib.c    | 19 +++++++++++++++++++
 sound/core/pcm_native.c | 15 ++++++++++++---
 4 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 6add80fc251..82aed3f4753 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -255,6 +255,7 @@ typedef int __bitwise snd_pcm_subformat_t;
 #define SNDRV_PCM_INFO_HALF_DUPLEX	0x00100000	/* only half duplex */
 #define SNDRV_PCM_INFO_JOINT_DUPLEX	0x00200000	/* playback and capture stream are somewhat correlated */
 #define SNDRV_PCM_INFO_SYNC_START	0x00400000	/* pcm support some kind of sync go */
+#define SNDRV_PCM_INFO_FIFO_IN_FRAMES	0x80000000	/* internal kernel flag - FIFO size is in frames */
 
 typedef int __bitwise snd_pcm_state_t;
 #define	SNDRV_PCM_STATE_OPEN		((__force snd_pcm_state_t) 0) /* stream is open */
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 267effddb07..8a69b5c1e1c 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -98,6 +98,7 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_IOCTL1_INFO		1
 #define SNDRV_PCM_IOCTL1_CHANNEL_INFO	2
 #define SNDRV_PCM_IOCTL1_GSTATE		3
+#define SNDRV_PCM_IOCTL1_FIFO_SIZE	4
 
 #define SNDRV_PCM_TRIGGER_STOP		0
 #define SNDRV_PCM_TRIGGER_START		1
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d659995ac3a..adc2b0bd113 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1524,6 +1524,23 @@ static int snd_pcm_lib_ioctl_channel_info(struct snd_pcm_substream *substream,
 	return 0;
 }
 
+static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream,
+				       void *arg)
+{
+	struct snd_pcm_hw_params *params = arg;
+	snd_pcm_format_t format;
+	int channels, width;
+
+	params->fifo_size = substream->runtime->hw.fifo_size;
+	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) {
+		format = params_format(params);
+		channels = params_channels(params);
+		width = snd_pcm_format_physical_width(format);
+		params->fifo_size /= width * channels;
+	}
+	return 0;
+}
+
 /**
  * snd_pcm_lib_ioctl - a generic PCM ioctl callback
  * @substream: the pcm substream instance
@@ -1545,6 +1562,8 @@ int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream,
 		return snd_pcm_lib_ioctl_reset(substream, arg);
 	case SNDRV_PCM_IOCTL1_CHANNEL_INFO:
 		return snd_pcm_lib_ioctl_channel_info(substream, arg);
+	case SNDRV_PCM_IOCTL1_FIFO_SIZE:
+		return snd_pcm_lib_ioctl_fifo_size(substream, arg);
 	}
 	return -ENXIO;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 45dc53fcfa2..84da3ba17c8 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -312,9 +312,18 @@ int snd_pcm_hw_refine(struct snd_pcm_substream *substream,
 
 	hw = &substream->runtime->hw;
 	if (!params->info)
-		params->info = hw->info;
-	if (!params->fifo_size)
-		params->fifo_size = hw->fifo_size;
+		params->info = hw->info & ~SNDRV_PCM_INFO_FIFO_IN_FRAMES;
+	if (!params->fifo_size) {
+		if (snd_mask_min(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT]) ==
+		    snd_mask_max(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT]) &&
+                    snd_mask_min(&params->masks[SNDRV_PCM_HW_PARAM_CHANNELS]) ==
+                    snd_mask_max(&params->masks[SNDRV_PCM_HW_PARAM_CHANNELS])) {
+			changed = substream->ops->ioctl(substream,
+					SNDRV_PCM_IOCTL1_FIFO_SIZE, params);
+			if (params < 0)
+				return changed;
+		}
+	}
 	params->rmask = 0;
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 0dc54e9f33e2fbcea28356bc2c8c931cb307d3b3 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <kpc.mtd@gmail.com>
Date: Wed, 8 Apr 2009 22:52:28 -0700
Subject: mtd: add MEMERASE64 ioctl for >4GiB devices

New MEMERASE/MEMREADOOB/MEMWRITEOOB ioctls are needed in order to support
64-bit offsets into large NAND flash devices.

Signed-off-by: Kevin Cernekee <kpc.mtd@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c | 29 +++++++++++++++++++++--------
 fs/compat_ioctl.c     |  1 +
 include/mtd/mtd-abi.h |  6 ++++++
 3 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 763d3f0a1f4..ad4b8618977 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -417,6 +417,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
 		break;
 
 	case MEMERASE:
+	case MEMERASE64:
 	{
 		struct erase_info *erase;
 
@@ -427,20 +428,32 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
 		if (!erase)
 			ret = -ENOMEM;
 		else {
-			struct erase_info_user einfo;
-
 			wait_queue_head_t waitq;
 			DECLARE_WAITQUEUE(wait, current);
 
 			init_waitqueue_head(&waitq);
 
-			if (copy_from_user(&einfo, argp,
-				    sizeof(struct erase_info_user))) {
-				kfree(erase);
-				return -EFAULT;
+			if (cmd == MEMERASE64) {
+				struct erase_info_user64 einfo64;
+
+				if (copy_from_user(&einfo64, argp,
+					    sizeof(struct erase_info_user64))) {
+					kfree(erase);
+					return -EFAULT;
+				}
+				erase->addr = einfo64.start;
+				erase->len = einfo64.length;
+			} else {
+				struct erase_info_user einfo32;
+
+				if (copy_from_user(&einfo32, argp,
+					    sizeof(struct erase_info_user))) {
+					kfree(erase);
+					return -EFAULT;
+				}
+				erase->addr = einfo32.start;
+				erase->len = einfo32.length;
 			}
-			erase->addr = einfo.start;
-			erase->len = einfo.length;
 			erase->mtd = mtd;
 			erase->callback = mtdchar_erase_callback;
 			erase->priv = (unsigned long)&waitq;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index b83f6bcfa51..c603ca2c223 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2441,6 +2441,7 @@ COMPATIBLE_IOCTL(MEMGETREGIONCOUNT)
 COMPATIBLE_IOCTL(MEMGETREGIONINFO)
 COMPATIBLE_IOCTL(MEMGETBADBLOCK)
 COMPATIBLE_IOCTL(MEMSETBADBLOCK)
+COMPATIBLE_IOCTL(MEMERASE64)
 /* NBD */
 ULONG_IOCTL(NBD_SET_SOCK)
 ULONG_IOCTL(NBD_SET_BLKSIZE)
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index b6595b3c68b..2e32be1e3a1 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -12,6 +12,11 @@ struct erase_info_user {
 	__u32 length;
 };
 
+struct erase_info_user64 {
+	__u64 start;
+	__u64 length;
+};
+
 struct mtd_oob_buf {
 	__u32 start;
 	__u32 length;
@@ -95,6 +100,7 @@ struct otp_info {
 #define ECCGETLAYOUT		_IOR('M', 17, struct nand_ecclayout)
 #define ECCGETSTATS		_IOR('M', 18, struct mtd_ecc_stats)
 #define MTDFILEMODE		_IO('M', 19)
+#define MEMERASE64		_IOW('M', 20, struct erase_info_user64)
 
 /*
  * Obsolete legacy interface. Keep it in order not to break userspace
-- 
cgit v1.2.3-70-g09d2


From bbbee90829304d156c12b171c0ac7e6e1aba8b90 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 May 2009 14:25:58 +0200
Subject: perf_counter: Ammend cleanup in fork() fail

When fork() fails we cannot use perf_counter_exit_task() since that
assumes to operate on current. Write a new helper that cleans up
unused/clean contexts.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/fork.c                |  2 +-
 kernel/perf_counter.c        | 43 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 717bf3b59ba..519a41bba24 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -579,6 +579,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task,
 extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
+extern void perf_counter_free_task(struct task_struct *task);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void __perf_disable(void);
@@ -644,6 +645,7 @@ static inline void
 perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
 static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_counter_exit_task(struct task_struct *child)	{ }
+static inline void perf_counter_free_task(struct task_struct *task)	{ }
 static inline void perf_counter_do_pending(void)			{ }
 static inline void perf_counter_print_debug(void)			{ }
 static inline void perf_disable(void)					{ }
diff --git a/kernel/fork.c b/kernel/fork.c
index c07c3335cea..23bf757ed32 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1298,7 +1298,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
-	perf_counter_exit_task(p);
+	perf_counter_free_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0c000d305e0..79c3f26541d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3538,8 +3538,7 @@ static void sync_child_counter(struct perf_counter *child_counter,
 }
 
 static void
-__perf_counter_exit_task(struct task_struct *child,
-			 struct perf_counter *child_counter,
+__perf_counter_exit_task(struct perf_counter *child_counter,
 			 struct perf_counter_context *child_ctx)
 {
 	struct perf_counter *parent_counter;
@@ -3605,7 +3604,7 @@ void perf_counter_exit_task(struct task_struct *child)
 again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
-		__perf_counter_exit_task(child, child_counter, child_ctx);
+		__perf_counter_exit_task(child_counter, child_ctx);
 
 	/*
 	 * If the last counter was a group counter, it will have appended all
@@ -3620,6 +3619,44 @@ again:
 	put_ctx(child_ctx);
 }
 
+/*
+ * free an unexposed, unused context as created by inheritance by
+ * init_task below, used by fork() in case of fail.
+ */
+void perf_counter_free_task(struct task_struct *task)
+{
+	struct perf_counter_context *ctx = task->perf_counter_ctxp;
+	struct perf_counter *counter, *tmp;
+
+	if (!ctx)
+		return;
+
+	mutex_lock(&ctx->mutex);
+again:
+	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) {
+		struct perf_counter *parent = counter->parent;
+
+		if (WARN_ON_ONCE(!parent))
+			continue;
+
+		mutex_lock(&parent->child_mutex);
+		list_del_init(&counter->child_list);
+		mutex_unlock(&parent->child_mutex);
+
+		fput(parent->filp);
+
+		list_del_counter(counter, ctx);
+		free_counter(counter);
+	}
+
+	if (!list_empty(&ctx->counter_list))
+		goto again;
+
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
 /*
  * Initialize the perf_counter context in task_struct
  */
-- 
cgit v1.2.3-70-g09d2


From aea7cea9fa9e39e71f95ad70b3daf98ba9972587 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <kpc.mtd@gmail.com>
Date: Wed, 8 Apr 2009 22:53:49 -0700
Subject: mtd: add OOB ioctls for >4GiB devices

Signed-off-by: Kevin Cernekee <kpc.mtd@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c | 28 ++++++++++++++++++++++++++++
 fs/compat_ioctl.c     |  2 ++
 include/mtd/mtd-abi.h |  9 +++++++++
 3 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 51bb0b09200..99d1fbc9501 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -607,6 +607,34 @@ static int mtd_ioctl(struct inode *inode, struct file *file,
 		break;
 	}
 
+	case MEMWRITEOOB64:
+	{
+		struct mtd_oob_buf64 buf;
+		struct mtd_oob_buf64 __user *buf_user = argp;
+
+		if (copy_from_user(&buf, argp, sizeof(buf)))
+			ret = -EFAULT;
+		else
+			ret = mtd_do_writeoob(file, mtd, buf.start, buf.length,
+				(void __user *)(uintptr_t)buf.usr_ptr,
+				&buf_user->length);
+		break;
+	}
+
+	case MEMREADOOB64:
+	{
+		struct mtd_oob_buf64 buf;
+		struct mtd_oob_buf64 __user *buf_user = argp;
+
+		if (copy_from_user(&buf, argp, sizeof(buf)))
+			ret = -EFAULT;
+		else
+			ret = mtd_do_readoob(mtd, buf.start, buf.length,
+				(void __user *)(uintptr_t)buf.usr_ptr,
+				&buf_user->length);
+		break;
+	}
+
 	case MEMLOCK:
 	{
 		struct erase_info_user einfo;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 196397bff08..8da222eacba 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2411,6 +2411,8 @@ COMPATIBLE_IOCTL(ECCGETLAYOUT)
 COMPATIBLE_IOCTL(ECCGETSTATS)
 COMPATIBLE_IOCTL(MTDFILEMODE)
 COMPATIBLE_IOCTL(MEMERASE64)
+COMPATIBLE_IOCTL(MEMREADOOB64)
+COMPATIBLE_IOCTL(MEMWRITEOOB64)
 /* NBD */
 ULONG_IOCTL(NBD_SET_SOCK)
 ULONG_IOCTL(NBD_SET_BLKSIZE)
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 2e32be1e3a1..be51ae2bd0f 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -23,6 +23,13 @@ struct mtd_oob_buf {
 	unsigned char __user *ptr;
 };
 
+struct mtd_oob_buf64 {
+	__u64 start;
+	__u32 pad;
+	__u32 length;
+	__u64 usr_ptr;
+};
+
 #define MTD_ABSENT		0
 #define MTD_RAM			1
 #define MTD_ROM			2
@@ -101,6 +108,8 @@ struct otp_info {
 #define ECCGETSTATS		_IOR('M', 18, struct mtd_ecc_stats)
 #define MTDFILEMODE		_IO('M', 19)
 #define MEMERASE64		_IOW('M', 20, struct erase_info_user64)
+#define MEMWRITEOOB64		_IOWR('M', 21, struct mtd_oob_buf64)
+#define MEMREADOOB64		_IOWR('M', 22, struct mtd_oob_buf64)
 
 /*
  * Obsolete legacy interface. Keep it in order not to break userspace
-- 
cgit v1.2.3-70-g09d2


From ccffad25b5136958d4769ed6de5e87992dd9c65c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 22 May 2009 23:22:17 +0000
Subject: net: convert unicast addr list

This patch converts unicast address list to standard list_head using
previously introduced struct netdev_hw_addr. It also relaxes the
locking. Original spinlock (still used for multicast addresses) is not
needed and is no longer used for a protection of this list. All
reading and writing takes place under rtnl (with no changes).

I also removed a possibility to specify the length of the address
while adding or deleting unicast address. It's always dev->addr_len.

The convertion touched especially e1000 and ixgbe codes when the
change is not so trivial.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

 drivers/net/bnx2.c               |   13 +--
 drivers/net/e1000/e1000_main.c   |   24 +++--
 drivers/net/ixgbe/ixgbe_common.c |   14 ++--
 drivers/net/ixgbe/ixgbe_common.h |    4 +-
 drivers/net/ixgbe/ixgbe_main.c   |    6 +-
 drivers/net/ixgbe/ixgbe_type.h   |    4 +-
 drivers/net/macvlan.c            |   11 +-
 drivers/net/mv643xx_eth.c        |   11 +-
 drivers/net/niu.c                |    7 +-
 drivers/net/virtio_net.c         |    7 +-
 drivers/s390/net/qeth_l2_main.c  |    6 +-
 drivers/scsi/fcoe/fcoe.c         |   16 ++--
 include/linux/netdevice.h        |   18 ++--
 net/8021q/vlan.c                 |    4 +-
 net/8021q/vlan_dev.c             |   10 +-
 net/core/dev.c                   |  195 +++++++++++++++++++++++++++-----------
 net/dsa/slave.c                  |   10 +-
 net/packet/af_packet.c           |    4 +-
 18 files changed, 227 insertions(+), 137 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c               |  13 ++-
 drivers/net/e1000/e1000_main.c   |  24 +++--
 drivers/net/ixgbe/ixgbe_common.c |  14 +--
 drivers/net/ixgbe/ixgbe_common.h |   4 +-
 drivers/net/ixgbe/ixgbe_main.c   |   6 +-
 drivers/net/ixgbe/ixgbe_type.h   |   4 +-
 drivers/net/macvlan.c            |  11 ++-
 drivers/net/mv643xx_eth.c        |  11 ++-
 drivers/net/niu.c                |   7 +-
 drivers/net/virtio_net.c         |   7 +-
 drivers/s390/net/qeth_l2_main.c  |   6 +-
 drivers/scsi/fcoe/fcoe.c         |  16 ++--
 include/linux/netdevice.h        |  18 ++--
 net/8021q/vlan.c                 |   4 +-
 net/8021q/vlan_dev.c             |  10 +-
 net/core/dev.c                   | 195 +++++++++++++++++++++++++++------------
 net/dsa/slave.c                  |  10 +-
 net/packet/af_packet.c           |   4 +-
 18 files changed, 227 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 83ee0f53f2d..f53017250e0 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -48,6 +48,7 @@
 #include <linux/cache.h>
 #include <linux/firmware.h>
 #include <linux/log2.h>
+#include <linux/list.h>
 
 #include "bnx2.h"
 #include "bnx2_fw.h"
@@ -3310,7 +3311,7 @@ bnx2_set_rx_mode(struct net_device *dev)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 	u32 rx_mode, sort_mode;
-	struct dev_addr_list *uc_ptr;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	if (!netif_running(dev))
@@ -3369,21 +3370,19 @@ bnx2_set_rx_mode(struct net_device *dev)
 		sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN;
 	}
 
-	uc_ptr = NULL;
 	if (dev->uc_count > BNX2_MAX_UNICAST_ADDRESSES) {
 		rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS;
 		sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN |
 			     BNX2_RPM_SORT_USER0_PROM_VLAN;
 	} else if (!(dev->flags & IFF_PROMISC)) {
-		uc_ptr = dev->uc_list;
-
 		/* Add all entries into to the match filter list */
-		for (i = 0; i < dev->uc_count; i++) {
-			bnx2_set_mac_addr(bp, uc_ptr->da_addr,
+		i = 0;
+		list_for_each_entry(ha, &dev->uc_list, list) {
+			bnx2_set_mac_addr(bp, ha->addr,
 					  i + BNX2_START_UNICAST_ADDRESS_INDEX);
 			sort_mode |= (1 <<
 				      (i + BNX2_START_UNICAST_ADDRESS_INDEX));
-			uc_ptr = uc_ptr->next;
+			i++;
 		}
 
 	}
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 79fe1ee3da5..74667e52143 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2330,7 +2330,8 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_addr_list *uc_ptr;
+	struct netdev_hw_addr *ha;
+	bool use_uc = false;
 	struct dev_addr_list *mc_ptr;
 	u32 rctl;
 	u32 hash_value;
@@ -2369,12 +2370,11 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 			rctl |= E1000_RCTL_VFE;
 	}
 
-	uc_ptr = NULL;
 	if (netdev->uc_count > rar_entries - 1) {
 		rctl |= E1000_RCTL_UPE;
 	} else if (!(netdev->flags & IFF_PROMISC)) {
 		rctl &= ~E1000_RCTL_UPE;
-		uc_ptr = netdev->uc_list;
+		use_uc = true;
 	}
 
 	ew32(RCTL, rctl);
@@ -2392,13 +2392,20 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	 * if there are not 14 addresses, go ahead and clear the filters
 	 * -- with 82571 controllers only 0-13 entries are filled here
 	 */
+	i = 1;
+	if (use_uc)
+		list_for_each_entry(ha, &netdev->uc_list, list) {
+			if (i == rar_entries)
+				break;
+			e1000_rar_set(hw, ha->addr, i++);
+		}
+
+	WARN_ON(i == rar_entries);
+
 	mc_ptr = netdev->mc_list;
 
-	for (i = 1; i < rar_entries; i++) {
-		if (uc_ptr) {
-			e1000_rar_set(hw, uc_ptr->da_addr, i);
-			uc_ptr = uc_ptr->next;
-		} else if (mc_ptr) {
+	for (; i < rar_entries; i++) {
+		if (mc_ptr) {
 			e1000_rar_set(hw, mc_ptr->da_addr, i);
 			mc_ptr = mc_ptr->next;
 		} else {
@@ -2408,7 +2415,6 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 			E1000_WRITE_FLUSH();
 		}
 	}
-	WARN_ON(uc_ptr != NULL);
 
 	/* load any remaining addresses into the hash table */
 
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index 0cc3c47cb45..6f79409270a 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -28,6 +28,8 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
@@ -1356,15 +1358,14 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
  *  Drivers using secondary unicast addresses must set user_set_promisc when
  *  manually putting the device into promiscuous mode.
  **/
-s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
-                              u32 addr_count, ixgbe_mc_addr_itr next)
+s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
+				      struct list_head *uc_list)
 {
-	u8 *addr;
 	u32 i;
 	u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
 	u32 uc_addr_in_use;
 	u32 fctrl;
-	u32 vmdq;
+	struct netdev_hw_addr *ha;
 
 	/*
 	 * Clear accounting of old secondary address list,
@@ -1382,10 +1383,9 @@ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
 	}
 
 	/* Add the new addresses */
-	for (i = 0; i < addr_count; i++) {
+	list_for_each_entry(ha, uc_list, list) {
 		hw_dbg(hw, " Adding the secondary addresses:\n");
-		addr = next(hw, &addr_list, &vmdq);
-		ixgbe_add_uc_addr(hw, addr, vmdq);
+		ixgbe_add_uc_addr(hw, ha->addr, 0);
 	}
 
 	if (hw->addr_ctrl.overflow_promisc) {
diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h
index dd260890ad0..b2a4b2c99c4 100644
--- a/drivers/net/ixgbe/ixgbe_common.h
+++ b/drivers/net/ixgbe/ixgbe_common.h
@@ -59,8 +59,8 @@ s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
 s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
                                       u32 mc_addr_count,
                                       ixgbe_mc_addr_itr func);
-s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
-                                      u32 addr_count, ixgbe_mc_addr_itr func);
+s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
+				      struct list_head *uc_list);
 s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
 s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
 s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 924aa5ed02c..de70a2df9ae 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2181,11 +2181,7 @@ static void ixgbe_set_rx_mode(struct net_device *netdev)
 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 
 	/* reprogram secondary unicast list */
-	addr_count = netdev->uc_count;
-	if (addr_count)
-		addr_list = netdev->uc_list->dmi_addr;
-	hw->mac.ops.update_uc_addr_list(hw, addr_list, addr_count,
-	                                  ixgbe_addr_list_itr);
+	hw->mac.ops.update_uc_addr_list(hw, &netdev->uc_list);
 
 	/* reprogram multicast list */
 	addr_count = netdev->mc_count;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index df1f7034c28..a8a8243d8fd 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -30,6 +30,7 @@
 
 #include <linux/types.h>
 #include <linux/mdio.h>
+#include <linux/list.h>
 
 /* Vendor ID */
 #define IXGBE_INTEL_VENDOR_ID   0x8086
@@ -2223,8 +2224,7 @@ struct ixgbe_mac_operations {
 	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*init_rx_addrs)(struct ixgbe_hw *);
-	s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32,
-	                           ixgbe_mc_addr_itr);
+	s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct list_head *);
 	s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32,
 	                           ixgbe_mc_addr_itr);
 	s32 (*enable_mc)(struct ixgbe_hw *);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d5334b41e4b..021d9941c29 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -232,7 +232,7 @@ static int macvlan_open(struct net_device *dev)
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
 
-	err = dev_unicast_add(lowerdev, dev->dev_addr, ETH_ALEN);
+	err = dev_unicast_add(lowerdev, dev->dev_addr);
 	if (err < 0)
 		goto out;
 	if (dev->flags & IFF_ALLMULTI) {
@@ -244,7 +244,7 @@ static int macvlan_open(struct net_device *dev)
 	return 0;
 
 del_unicast:
-	dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN);
+	dev_unicast_delete(lowerdev, dev->dev_addr);
 out:
 	return err;
 }
@@ -258,7 +258,7 @@ static int macvlan_stop(struct net_device *dev)
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(lowerdev, -1);
 
-	dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN);
+	dev_unicast_delete(lowerdev, dev->dev_addr);
 
 	macvlan_hash_del(vlan);
 	return 0;
@@ -282,10 +282,11 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 		if (macvlan_addr_busy(vlan->port, addr->sa_data))
 			return -EBUSY;
 
-		if ((err = dev_unicast_add(lowerdev, addr->sa_data, ETH_ALEN)))
+		err = dev_unicast_add(lowerdev, addr->sa_data);
+		if (err)
 			return err;
 
-		dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(lowerdev, dev->dev_addr);
 
 		macvlan_hash_change_addr(vlan, addr->sa_data);
 	}
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 1361ddc8d31..b4e18a58cb1 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -55,6 +55,7 @@
 #include <linux/types.h>
 #include <linux/inet_lro.h>
 #include <asm/system.h>
+#include <linux/list.h>
 
 static char mv643xx_eth_driver_name[] = "mv643xx_eth";
 static char mv643xx_eth_driver_version[] = "1.4";
@@ -1721,20 +1722,20 @@ static void uc_addr_set(struct mv643xx_eth_private *mp, unsigned char *addr)
 
 static u32 uc_addr_filter_mask(struct net_device *dev)
 {
-	struct dev_addr_list *uc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 nibbles;
 
 	if (dev->flags & IFF_PROMISC)
 		return 0;
 
 	nibbles = 1 << (dev->dev_addr[5] & 0x0f);
-	for (uc_ptr = dev->uc_list; uc_ptr != NULL; uc_ptr = uc_ptr->next) {
-		if (memcmp(dev->dev_addr, uc_ptr->da_addr, 5))
+	list_for_each_entry(ha, &dev->uc_list, list) {
+		if (memcmp(dev->dev_addr, ha->addr, 5))
 			return 0;
-		if ((dev->dev_addr[5] ^ uc_ptr->da_addr[5]) & 0xf0)
+		if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0)
 			return 0;
 
-		nibbles |= 1 << (uc_ptr->da_addr[5] & 0x0f);
+		nibbles |= 1 << (ha->addr[5] & 0x0f);
 	}
 
 	return nibbles;
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index edac3a0b02d..fa61a12c5e1 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -22,6 +22,7 @@
 #include <linux/log2.h>
 #include <linux/jiffies.h>
 #include <linux/crc32.h>
+#include <linux/list.h>
 
 #include <linux/io.h>
 
@@ -6362,6 +6363,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	struct niu *np = netdev_priv(dev);
 	int i, alt_cnt, err;
 	struct dev_addr_list *addr;
+	struct netdev_hw_addr *ha;
 	unsigned long flags;
 	u16 hash[16] = { 0, };
 
@@ -6383,9 +6385,8 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if (alt_cnt) {
 		int index = 0;
 
-		for (addr = dev->uc_list; addr; addr = addr->next) {
-			err = niu_set_alt_mac(np, index,
-					      addr->da_addr);
+		list_for_each_entry(ha, &dev->uc_list, list) {
+			err = niu_set_alt_mac(np, index, ha->addr);
 			if (err)
 				printk(KERN_WARNING PFX "%s: Error %d "
 				       "adding alt mac %d\n",
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6cc5bcd34fb..0c9ca67f66e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -680,6 +680,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	u8 promisc, allmulti;
 	struct virtio_net_ctrl_mac *mac_data;
 	struct dev_addr_list *addr;
+	struct netdev_hw_addr *ha;
 	void *buf;
 	int i;
 
@@ -718,9 +719,9 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 
 	/* Store the unicast list and count in the front of the buffer */
 	mac_data->entries = dev->uc_count;
-	addr = dev->uc_list;
-	for (i = 0; i < dev->uc_count; i++, addr = addr->next)
-		memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN);
+	i = 0;
+	list_for_each_entry(ha, &dev->uc_list, list)
+		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
 	sg_set_buf(&sg[0], mac_data,
 		   sizeof(mac_data->entries) + (dev->uc_count * ETH_ALEN));
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9ca6bab7c9b..ecd3d06c0d5 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -19,6 +19,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mii.h>
 #include <linux/ip.h>
+#include <linux/list.h>
 
 #include "qeth_core.h"
 
@@ -640,6 +641,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
 	struct dev_addr_list *dm;
+	struct netdev_hw_addr *ha;
 
 	if (card->info.type == QETH_CARD_TYPE_OSN)
 		return ;
@@ -653,8 +655,8 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 	for (dm = dev->mc_list; dm; dm = dm->next)
 		qeth_l2_add_mc(card, dm->da_addr, 0);
 
-	for (dm = dev->uc_list; dm; dm = dm->next)
-		qeth_l2_add_mc(card, dm->da_addr, 1);
+	list_for_each_entry(ha, &dev->uc_list, list)
+		qeth_l2_add_mc(card, ha->addr, 1);
 
 	spin_unlock_bh(&card->mclock);
 	if (!qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE))
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index ce33f107b0a..f791348871f 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -182,8 +182,8 @@ static void fcoe_update_src_mac(struct fcoe_ctlr *fip, u8 *old, u8 *new)
 	fc = fcoe_from_ctlr(fip);
 	rtnl_lock();
 	if (!is_zero_ether_addr(old))
-		dev_unicast_delete(fc->real_dev, old, ETH_ALEN);
-	dev_unicast_add(fc->real_dev, new, ETH_ALEN);
+		dev_unicast_delete(fc->real_dev, old);
+	dev_unicast_add(fc->real_dev, new);
 	rtnl_unlock();
 }
 
@@ -233,13 +233,11 @@ void fcoe_netdev_cleanup(struct fcoe_softc *fc)
 	/* Delete secondary MAC addresses */
 	rtnl_lock();
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
-	dev_unicast_delete(fc->real_dev, flogi_maddr, ETH_ALEN);
+	dev_unicast_delete(fc->real_dev, flogi_maddr);
 	if (!is_zero_ether_addr(fc->ctlr.data_src_addr))
-		dev_unicast_delete(fc->real_dev,
-				   fc->ctlr.data_src_addr, ETH_ALEN);
+		dev_unicast_delete(fc->real_dev, fc->ctlr.data_src_addr);
 	if (fc->ctlr.spma)
-		dev_unicast_delete(fc->real_dev,
-				   fc->ctlr.ctl_src_addr, ETH_ALEN);
+		dev_unicast_delete(fc->real_dev, fc->ctlr.ctl_src_addr);
 	dev_mc_delete(fc->real_dev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 	rtnl_unlock();
 }
@@ -347,9 +345,9 @@ static int fcoe_netdev_config(struct fc_lport *lp, struct net_device *netdev)
 	 */
 	rtnl_lock();
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
-	dev_unicast_add(fc->real_dev, flogi_maddr, ETH_ALEN);
+	dev_unicast_add(fc->real_dev, flogi_maddr);
 	if (fc->ctlr.spma)
-		dev_unicast_add(fc->real_dev, fc->ctlr.ctl_src_addr, ETH_ALEN);
+		dev_unicast_add(fc->real_dev, fc->ctlr.ctl_src_addr);
 	dev_mc_add(fc->real_dev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 	rtnl_unlock();
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1eaf5ae14fe..bbfabf3012b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -215,9 +215,12 @@ struct netdev_hw_addr {
 	struct list_head	list;
 	unsigned char		addr[MAX_ADDR_LEN];
 	unsigned char		type;
-#define NETDEV_HW_ADDR_T_LAN	1
-#define NETDEV_HW_ADDR_T_SAN	2
-#define NETDEV_HW_ADDR_T_SLAVE	3
+#define NETDEV_HW_ADDR_T_LAN		1
+#define NETDEV_HW_ADDR_T_SAN		2
+#define NETDEV_HW_ADDR_T_SLAVE		3
+#define NETDEV_HW_ADDR_T_UNICAST	4
+	int			refcount;
+	bool			synced;
 	struct rcu_head		rcu_head;
 };
 
@@ -773,10 +776,11 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
-	spinlock_t		addr_list_lock;
-	struct dev_addr_list	*uc_list;	/* Secondary unicast mac addresses */
+	struct list_head	uc_list;	/* Secondary unicast mac
+						   addresses */
 	int			uc_count;	/* Number of installed ucasts	*/
 	int			uc_promisc;
+	spinlock_t		addr_list_lock;
 	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
 	int			mc_count;	/* Number of installed mcasts	*/
 	unsigned int		promiscuity;
@@ -1836,8 +1840,8 @@ extern int dev_addr_del_multiple(struct net_device *to_dev,
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int		dev_unicast_delete(struct net_device *dev, void *addr, int alen);
-extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
+extern int		dev_unicast_delete(struct net_device *dev, void *addr);
+extern int		dev_unicast_add(struct net_device *dev, void *addr);
 extern int		dev_unicast_sync(struct net_device *to, struct net_device *from);
 extern void		dev_unicast_unsync(struct net_device *to, struct net_device *from);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d1e10546eb8..714e1c3536b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -378,13 +378,13 @@ static void vlan_sync_address(struct net_device *dev,
 	 * the new address */
 	if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_delete(dev, vlandev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(dev, vlandev->dev_addr);
 
 	/* vlan address was equal to the old address and is different from
 	 * the new address */
 	if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_add(dev, vlandev->dev_addr, ETH_ALEN);
+		dev_unicast_add(dev, vlandev->dev_addr);
 
 	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1e2ad4c7c59..96bad8f233e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -441,7 +441,7 @@ static int vlan_dev_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN);
+		err = dev_unicast_add(real_dev, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -470,7 +470,7 @@ clear_allmulti:
 		dev_set_allmulti(real_dev, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(real_dev, dev->dev_addr);
 out:
 	netif_carrier_off(dev);
 	return err;
@@ -492,7 +492,7 @@ static int vlan_dev_stop(struct net_device *dev)
 		dev_set_promiscuity(real_dev, -1);
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len);
+		dev_unicast_delete(real_dev, dev->dev_addr);
 
 	netif_carrier_off(dev);
 	return 0;
@@ -511,13 +511,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, addr->sa_data, ETH_ALEN);
+		err = dev_unicast_add(real_dev, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(real_dev, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
diff --git a/net/core/dev.c b/net/core/dev.c
index 32ceee17896..e2fcc5f1017 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3473,8 +3473,9 @@ void dev_set_rx_mode(struct net_device *dev)
 
 /* hw addresses list handling functions */
 
-static int __hw_addr_add(struct list_head *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_add(struct list_head *list, int *delta,
+			 unsigned char *addr, int addr_len,
+			 unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	int alloc_size;
@@ -3482,6 +3483,15 @@ static int __hw_addr_add(struct list_head *list, unsigned char *addr,
 	if (addr_len > MAX_ADDR_LEN)
 		return -EINVAL;
 
+	list_for_each_entry(ha, list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    ha->type == addr_type) {
+			ha->refcount++;
+			return 0;
+		}
+	}
+
+
 	alloc_size = sizeof(*ha);
 	if (alloc_size < L1_CACHE_BYTES)
 		alloc_size = L1_CACHE_BYTES;
@@ -3490,7 +3500,11 @@ static int __hw_addr_add(struct list_head *list, unsigned char *addr,
 		return -ENOMEM;
 	memcpy(ha->addr, addr, addr_len);
 	ha->type = addr_type;
+	ha->refcount = 1;
+	ha->synced = false;
 	list_add_tail_rcu(&ha->list, list);
+	if (delta)
+		(*delta)++;
 	return 0;
 }
 
@@ -3502,29 +3516,30 @@ static void ha_rcu_free(struct rcu_head *head)
 	kfree(ha);
 }
 
-static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
-			    int addr_len, unsigned char addr_type,
-			    int ignore_index)
+static int __hw_addr_del(struct list_head *list, int *delta,
+			 unsigned char *addr, int addr_len,
+			 unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
-	int i = 0;
 
 	list_for_each_entry(ha, list, list) {
-		if (i++ != ignore_index &&
-		    !memcmp(ha->addr, addr, addr_len) &&
+		if (!memcmp(ha->addr, addr, addr_len) &&
 		    (ha->type == addr_type || !addr_type)) {
+			if (--ha->refcount)
+				return 0;
 			list_del_rcu(&ha->list);
 			call_rcu(&ha->rcu_head, ha_rcu_free);
+			if (delta)
+				(*delta)--;
 			return 0;
 		}
 	}
 	return -ENOENT;
 }
 
-static int __hw_addr_add_multiple_ii(struct list_head *to_list,
-				     struct list_head *from_list,
-				     int addr_len, unsigned char addr_type,
-				     int ignore_index)
+static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta,
+				  struct list_head *from_list, int addr_len,
+				  unsigned char addr_type)
 {
 	int err;
 	struct netdev_hw_addr *ha, *ha2;
@@ -3532,7 +3547,8 @@ static int __hw_addr_add_multiple_ii(struct list_head *to_list,
 
 	list_for_each_entry(ha, from_list, list) {
 		type = addr_type ? addr_type : ha->type;
-		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		err = __hw_addr_add(to_list, to_delta, ha->addr,
+				    addr_len, type);
 		if (err)
 			goto unroll;
 	}
@@ -3543,27 +3559,69 @@ unroll:
 		if (ha2 == ha)
 			break;
 		type = addr_type ? addr_type : ha2->type;
-		__hw_addr_del_ii(to_list, ha2->addr, addr_len, type,
-				 ignore_index);
+		__hw_addr_del(to_list, to_delta, ha2->addr,
+			      addr_len, type);
 	}
 	return err;
 }
 
-static void __hw_addr_del_multiple_ii(struct list_head *to_list,
-				      struct list_head *from_list,
-				      int addr_len, unsigned char addr_type,
-				      int ignore_index)
+static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta,
+				   struct list_head *from_list, int addr_len,
+				   unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	unsigned char type;
 
 	list_for_each_entry(ha, from_list, list) {
 		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type,
-				 ignore_index);
+		__hw_addr_del(to_list, to_delta, ha->addr,
+			      addr_len, addr_type);
+	}
+}
+
+static int __hw_addr_sync(struct list_head *to_list, int *to_delta,
+			  struct list_head *from_list, int *from_delta,
+			  int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, from_list, list) {
+		if (!ha->synced) {
+			err = __hw_addr_add(to_list, to_delta, ha->addr,
+					    addr_len, ha->type);
+			if (err)
+				break;
+			ha->synced = true;
+			ha->refcount++;
+		} else if (ha->refcount == 1) {
+			__hw_addr_del(to_list, to_delta, ha->addr,
+				      addr_len, ha->type);
+			__hw_addr_del(from_list, from_delta, ha->addr,
+				      addr_len, ha->type);
+		}
 	}
+	return err;
 }
 
+static void __hw_addr_unsync(struct list_head *to_list, int *to_delta,
+			     struct list_head *from_list, int *from_delta,
+			     int addr_len)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, from_list, list) {
+		if (ha->synced) {
+			__hw_addr_del(to_list, to_delta, ha->addr,
+				      addr_len, ha->type);
+			ha->synced = false;
+			__hw_addr_del(from_list, from_delta, ha->addr,
+				      addr_len, ha->type);
+		}
+	}
+}
+
+
 static void __hw_addr_flush(struct list_head *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
@@ -3594,7 +3652,7 @@ static int dev_addr_init(struct net_device *dev)
 
 	INIT_LIST_HEAD(&dev->dev_addr_list);
 	memset(addr, 0, sizeof(*addr));
-	err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr),
+	err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(*addr),
 			    NETDEV_HW_ADDR_T_LAN);
 	if (!err) {
 		/*
@@ -3626,7 +3684,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr,
 
 	ASSERT_RTNL();
 
-	err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len,
+	err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len,
 			    addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
@@ -3649,11 +3707,20 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr,
 		 unsigned char addr_type)
 {
 	int err;
+	struct netdev_hw_addr *ha;
 
 	ASSERT_RTNL();
 
-	err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len,
-			       addr_type, 0);
+	/*
+	 * We can not remove the first address from the list because
+	 * dev->dev_addr points to that.
+	 */
+	ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list);
+	if (ha->addr == dev->dev_addr && ha->refcount == 1)
+		return -ENOENT;
+
+	err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len,
+			    addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	return err;
@@ -3680,9 +3747,9 @@ int dev_addr_add_multiple(struct net_device *to_dev,
 
 	if (from_dev->addr_len != to_dev->addr_len)
 		return -EINVAL;
-	err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
-					&from_dev->dev_addr_list,
-					to_dev->addr_len, addr_type, 0);
+	err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL,
+				     &from_dev->dev_addr_list,
+				     to_dev->addr_len, addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
 	return err;
@@ -3707,9 +3774,9 @@ int dev_addr_del_multiple(struct net_device *to_dev,
 
 	if (from_dev->addr_len != to_dev->addr_len)
 		return -EINVAL;
-	__hw_addr_del_multiple_ii(&to_dev->dev_addr_list,
-				  &from_dev->dev_addr_list,
-				  to_dev->addr_len, addr_type, 0);
+	__hw_addr_del_multiple(&to_dev->dev_addr_list, NULL,
+			       &from_dev->dev_addr_list,
+			       to_dev->addr_len, addr_type);
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
 	return 0;
 }
@@ -3779,24 +3846,22 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
  *	dev_unicast_delete	- Release secondary unicast address.
  *	@dev: device
  *	@addr: address to delete
- *	@alen: length of @addr
  *
  *	Release reference to a secondary unicast address and remove it
  *	from the device if the reference count drops to zero.
  *
  * 	The caller must hold the rtnl_mutex.
  */
-int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
+int dev_unicast_delete(struct net_device *dev, void *addr)
 {
 	int err;
 
 	ASSERT_RTNL();
 
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+	err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr,
+			    dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
 	if (!err)
 		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_unicast_delete);
@@ -3805,24 +3870,22 @@ EXPORT_SYMBOL(dev_unicast_delete);
  *	dev_unicast_add		- add a secondary unicast address
  *	@dev: device
  *	@addr: address to add
- *	@alen: length of @addr
  *
  *	Add a secondary unicast address to the device or increase
  *	the reference count if it already exists.
  *
  *	The caller must hold the rtnl_mutex.
  */
-int dev_unicast_add(struct net_device *dev, void *addr, int alen)
+int dev_unicast_add(struct net_device *dev, void *addr)
 {
 	int err;
 
 	ASSERT_RTNL();
 
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
+	err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr,
+			    dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
 	if (!err)
 		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
 	return err;
 }
 EXPORT_SYMBOL(dev_unicast_add);
@@ -3879,8 +3942,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
  *	@from: source device
  *
  *	Add newly added addresses to the destination device and release
- *	addresses that have no users left. The source device must be
- *	locked by netif_tx_lock_bh.
+ *	addresses that have no users left.
  *
  *	This function is intended to be called from the dev->set_rx_mode
  *	function of layered software devices.
@@ -3889,12 +3951,15 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
 {
 	int err = 0;
 
-	netif_addr_lock_bh(to);
-	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
-			      &from->uc_list, &from->uc_count);
+	ASSERT_RTNL();
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	err = __hw_addr_sync(&to->uc_list, &to->uc_count,
+			     &from->uc_list, &from->uc_count, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
 	return err;
 }
 EXPORT_SYMBOL(dev_unicast_sync);
@@ -3910,18 +3975,33 @@ EXPORT_SYMBOL(dev_unicast_sync);
  */
 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 {
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
+	ASSERT_RTNL();
 
-	__dev_addr_unsync(&to->uc_list, &to->uc_count,
-			  &from->uc_list, &from->uc_count);
-	__dev_set_rx_mode(to);
+	if (to->addr_len != from->addr_len)
+		return;
 
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
+	__hw_addr_unsync(&to->uc_list, &to->uc_count,
+			 &from->uc_list, &from->uc_count, to->addr_len);
+	__dev_set_rx_mode(to);
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
 
+static void dev_unicast_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->uc_list);
+	dev->uc_count = 0;
+}
+
+static void dev_unicast_init(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	INIT_LIST_HEAD(&dev->uc_list);
+}
+
+
 static void __dev_addr_discard(struct dev_addr_list **list)
 {
 	struct dev_addr_list *tmp;
@@ -3940,9 +4020,6 @@ static void dev_addr_discard(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 
-	__dev_addr_discard(&dev->uc_list);
-	dev->uc_count = 0;
-
 	__dev_addr_discard(&dev->mc_list);
 	dev->mc_count = 0;
 
@@ -4535,6 +4612,7 @@ static void rollback_registered(struct net_device *dev)
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
+	dev_unicast_flush(dev);
 	dev_addr_discard(dev);
 
 	if (dev->netdev_ops->ndo_uninit)
@@ -5020,6 +5098,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_tx;
 
+	dev_unicast_init(dev);
+
 	dev_net_set(dev, &init_net);
 
 	dev->_tx = tx;
@@ -5223,6 +5303,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
+	dev_unicast_flush(dev);
 	dev_addr_discard(dev);
 
 	netdev_unregister_kobject(dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ed131181215..2175e6d5cc8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr)) {
-		err = dev_unicast_add(master, dev->dev_addr, ETH_ALEN);
+		err = dev_unicast_add(master, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -90,7 +90,7 @@ clear_allmulti:
 		dev_set_allmulti(master, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(master, dev->dev_addr);
 out:
 	return err;
 }
@@ -108,7 +108,7 @@ static int dsa_slave_close(struct net_device *dev)
 		dev_set_promiscuity(master, -1);
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(master, dev->dev_addr);
 
 	return 0;
 }
@@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, master->dev_addr)) {
-		err = dev_unicast_add(master, addr->sa_data, ETH_ALEN);
+		err = dev_unicast_add(master, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr, ETH_ALEN);
+		dev_unicast_delete(master, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c7c5d524967..6da9f38ef5c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1582,9 +1582,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		break;
 	case PACKET_MR_UNICAST:
 		if (what > 0)
-			return dev_unicast_add(dev, i->addr, i->alen);
+			return dev_unicast_add(dev, i->addr);
 		else
-			return dev_unicast_delete(dev, i->addr, i->alen);
+			return dev_unicast_delete(dev, i->addr);
 		break;
 	default:;
 	}
-- 
cgit v1.2.3-70-g09d2


From 25346b93ca079080c9cb23331db5c4f6404e8530 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:48:12 +1000
Subject: perf_counter: Provide functions for locking and pinning the context
 for a task

This abstracts out the code for locking the context associated
with a task.  Because the context might get transferred from
one task to another concurrently, we have to check after
locking the context that it is still the right context for the
task and retry if not.  This was open-coded in
find_get_context() and perf_counter_init_task().

This adds a further function for pinning the context for a
task, i.e. marking it so it can't be transferred to another
task.  This adds a 'pin_count' field to struct
perf_counter_context to indicate that a context is pinned,
instead of the previous method of setting the parent_gen count
to all 1s.  Pinning the context with a pin_count is easier to
undo and doesn't require saving the parent_gen value.  This
also adds a perf_unpin_context() to undo the effect of
perf_pin_task_context() and changes perf_counter_init_task to
use it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.34748.755674.596386@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |   1 +
 kernel/perf_counter.c        | 128 +++++++++++++++++++++++++------------------
 2 files changed, 75 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 519a41bba24..81ec79c9f19 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -543,6 +543,7 @@ struct perf_counter_context {
 	struct perf_counter_context *parent_ctx;
 	u64			parent_gen;
 	u64			generation;
+	int			pin_count;
 	struct rcu_head		rcu_head;
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 79c3f26541d..da8dfef4b47 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -122,6 +122,69 @@ static void put_ctx(struct perf_counter_context *ctx)
 	}
 }
 
+/*
+ * Get the perf_counter_context for a task and lock it.
+ * This has to cope with with the fact that until it is locked,
+ * the context could get moved to another task.
+ */
+static struct perf_counter_context *perf_lock_task_context(
+				struct task_struct *task, unsigned long *flags)
+{
+	struct perf_counter_context *ctx;
+
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_counter_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_counter_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more.
+		 */
+		spin_lock_irqsave(&ctx->lock, *flags);
+		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			goto retry;
+		}
+	}
+	rcu_read_unlock();
+	return ctx;
+}
+
+/*
+ * Get the context for a task and increment its pin_count so it
+ * can't get swapped to another task.  This also increments its
+ * reference count so that the context can't get freed.
+ */
+static struct perf_counter_context *perf_pin_task_context(struct task_struct *task)
+{
+	struct perf_counter_context *ctx;
+	unsigned long flags;
+
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		++ctx->pin_count;
+		get_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+	return ctx;
+}
+
+static void perf_unpin_context(struct perf_counter_context *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	--ctx->pin_count;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	put_ctx(ctx);
+}
+
 /*
  * Add a counter from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
@@ -916,7 +979,7 @@ static int context_equiv(struct perf_counter_context *ctx1,
 {
 	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
 		&& ctx1->parent_gen == ctx2->parent_gen
-		&& ctx1->parent_gen != ~0ull;
+		&& !ctx1->pin_count && !ctx2->pin_count;
 }
 
 /*
@@ -1271,6 +1334,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	struct perf_counter_context *ctx;
 	struct perf_counter_context *parent_ctx;
 	struct task_struct *task;
+	unsigned long flags;
 	int err;
 
 	/*
@@ -1323,28 +1387,9 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	if (!ptrace_may_access(task, PTRACE_MODE_READ))
 		goto errout;
 
- retry_lock:
-	rcu_read_lock();
  retry:
-	ctx = rcu_dereference(task->perf_counter_ctxp);
+	ctx = perf_lock_task_context(task, &flags);
 	if (ctx) {
-		/*
-		 * If this context is a clone of another, it might
-		 * get swapped for another underneath us by
-		 * perf_counter_task_sched_out, though the
-		 * rcu_read_lock() protects us from any context
-		 * getting freed.  Lock the context and check if it
-		 * got swapped before we could get the lock, and retry
-		 * if so.  If we locked the right context, then it
-		 * can't get swapped on us any more and we can
-		 * unclone it if necessary.
-		 * Once it's not a clone things will be stable.
-		 */
-		spin_lock_irq(&ctx->lock);
-		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
-			spin_unlock_irq(&ctx->lock);
-			goto retry;
-		}
 		parent_ctx = ctx->parent_ctx;
 		if (parent_ctx) {
 			put_ctx(parent_ctx);
@@ -1355,9 +1400,8 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 		 * this context won't get freed if the task exits.
 		 */
 		get_ctx(ctx);
-		spin_unlock_irq(&ctx->lock);
+		spin_unlock_irqrestore(&ctx->lock, flags);
 	}
-	rcu_read_unlock();
 
 	if (!ctx) {
 		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
@@ -1372,7 +1416,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 			 * the context they set.
 			 */
 			kfree(ctx);
-			goto retry_lock;
+			goto retry;
 		}
 		get_task_struct(task);
 	}
@@ -3667,7 +3711,6 @@ int perf_counter_init_task(struct task_struct *child)
 	struct perf_counter *counter;
 	struct task_struct *parent = current;
 	int inherited_all = 1;
-	u64 cloned_gen;
 	int ret = 0;
 
 	child->perf_counter_ctxp = NULL;
@@ -3693,32 +3736,17 @@ int perf_counter_init_task(struct task_struct *child)
 	get_task_struct(child);
 
 	/*
-	 * If the parent's context is a clone, temporarily set its
-	 * parent_gen to an impossible value (all 1s) so it won't get
-	 * swapped under us.  The rcu_read_lock makes sure that
-	 * parent_ctx continues to exist even if it gets swapped to
-	 * another process and then freed while we are trying to get
-	 * its lock.
+	 * If the parent's context is a clone, pin it so it won't get
+	 * swapped under us.
 	 */
-	rcu_read_lock();
- retry:
-	parent_ctx = rcu_dereference(parent->perf_counter_ctxp);
+	parent_ctx = perf_pin_task_context(parent);
+
 	/*
 	 * No need to check if parent_ctx != NULL here; since we saw
 	 * it non-NULL earlier, the only reason for it to become NULL
 	 * is if we exit, and since we're currently in the middle of
 	 * a fork we can't be exiting at the same time.
 	 */
-	spin_lock_irq(&parent_ctx->lock);
-	if (parent_ctx != rcu_dereference(parent->perf_counter_ctxp)) {
-		spin_unlock_irq(&parent_ctx->lock);
-		goto retry;
-	}
-	cloned_gen = parent_ctx->parent_gen;
-	if (parent_ctx->parent_ctx)
-		parent_ctx->parent_gen = ~0ull;
-	spin_unlock_irq(&parent_ctx->lock);
-	rcu_read_unlock();
 
 	/*
 	 * Lock the parent list. No need to lock the child - not PID
@@ -3759,7 +3787,7 @@ int perf_counter_init_task(struct task_struct *child)
 		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
 		if (cloned_ctx) {
 			child_ctx->parent_ctx = cloned_ctx;
-			child_ctx->parent_gen = cloned_gen;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
 		} else {
 			child_ctx->parent_ctx = parent_ctx;
 			child_ctx->parent_gen = parent_ctx->generation;
@@ -3769,15 +3797,7 @@ int perf_counter_init_task(struct task_struct *child)
 
 	mutex_unlock(&parent_ctx->mutex);
 
-	/*
-	 * Restore the clone status of the parent.
-	 */
-	if (parent_ctx->parent_ctx) {
-		spin_lock_irq(&parent_ctx->lock);
-		if (parent_ctx->parent_ctx)
-			parent_ctx->parent_gen = cloned_gen;
-		spin_unlock_irq(&parent_ctx->lock);
-	}
+	perf_unpin_context(parent_ctx);
 
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From d1a277c584d0862dbf51991baea947ea5f2ce6bf Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Sat, 30 May 2009 07:55:50 +0000
Subject: can: sja1000: generic OF platform bus driver

This patch adds a generic driver for SJA1000 chips on the OpenFirmware
platform bus found on embedded PowerPC systems. You need a SJA1000 node
definition in your flattened device tree source (DTS) file similar to:

  can@3,100 {
  	compatible = "nxp,sja1000";
  	reg = <3 0x100 0x80>;
  	interrupts = <2 0>;
  	interrupt-parent = <&mpic>;
  	nxp,external-clock-frequency = <16000000>;
  };

See also Documentation/powerpc/dts-bindings/can/sja1000.txt.

CC: devicetree-discuss@ozlabs.org
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/powerpc/dts-bindings/can/sja1000.txt |  53 +++++
 drivers/net/can/Kconfig                            |   9 +
 drivers/net/can/sja1000/Makefile                   |   1 +
 drivers/net/can/sja1000/sja1000_of_platform.c      | 233 +++++++++++++++++++++
 include/linux/can/platform/sja1000.h               |   3 +
 5 files changed, 299 insertions(+)
 create mode 100644 Documentation/powerpc/dts-bindings/can/sja1000.txt
 create mode 100644 drivers/net/can/sja1000/sja1000_of_platform.c

(limited to 'include')

diff --git a/Documentation/powerpc/dts-bindings/can/sja1000.txt b/Documentation/powerpc/dts-bindings/can/sja1000.txt
new file mode 100644
index 00000000000..d6d209ded93
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/can/sja1000.txt
@@ -0,0 +1,53 @@
+Memory mapped SJA1000 CAN controller from NXP (formerly Philips)
+
+Required properties:
+
+- compatible : should be "nxp,sja1000".
+
+- reg : should specify the chip select, address offset and size required
+	to map the registers of the SJA1000. The size is usually 0x80.
+
+- interrupts: property with a value describing the interrupt source
+	(number and sensitivity) required for the SJA1000.
+
+Optional properties:
+
+- nxp,external-clock-frequency : Frequency of the external oscillator
+	clock in Hz. Note that the internal clock frequency used by the
+	SJA1000 is half of that value. If not specified, a default value
+	of 16000000 (16 MHz) is used.
+
+- nxp,tx-output-mode : operation mode of the TX output control logic:
+	<0x0> : bi-phase output mode
+	<0x1> : normal output mode (default)
+	<0x2> : test output mode
+	<0x3> : clock output mode
+
+- nxp,tx-output-config : TX output pin configuration:
+	<0x01> : TX0 invert
+	<0x02> : TX0 pull-down (default)
+	<0x04> : TX0 pull-up
+	<0x06> : TX0 push-pull
+	<0x08> : TX1 invert
+	<0x10> : TX1 pull-down
+	<0x20> : TX1 pull-up
+	<0x30> : TX1 push-pull
+
+- nxp,clock-out-frequency : clock frequency in Hz on the CLKOUT pin.
+	If not specified or if the specified value is 0, the CLKOUT pin
+	will be disabled.
+
+- nxp,no-comparator-bypass : Allows to disable the CAN input comperator.
+
+For futher information, please have a look to the SJA1000 data sheet.
+
+Examples:
+
+can@3,100 {
+	compatible = "nxp,sja1000";
+	reg = <3 0x100 0x80>;
+	interrupts = <2 0>;
+	interrupt-parent = <&mpic>;
+	nxp,external-clock-frequency = <16000000>;
+};
+
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index cfd6c5a285f..d5e18812bf4 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -51,6 +51,15 @@ config CAN_SJA1000_PLATFORM
 	  boards from Phytec (http://www.phytec.de) like the PCM027,
 	  PCM038.
 
+config CAN_SJA1000_OF_PLATFORM
+	depends on CAN_SJA1000 && PPC_OF
+	tristate "Generic OF Platform Bus based SJA1000 driver"
+	---help---
+	  This driver adds support for the SJA1000 chips connected to
+	  the OpenFirmware "platform bus" found on embedded systems with
+	  OpenFirmware bindings, e.g. if you have a PowerPC based system
+	  you may want to enable this option.
+
 config CAN_EMS_PCI
 	tristate "EMS CPC-PCI and CPC-PCIe Card"
 	depends on PCI && CAN_SJA1000
diff --git a/drivers/net/can/sja1000/Makefile b/drivers/net/can/sja1000/Makefile
index d6c631f9e66..9d0c08da273 100644
--- a/drivers/net/can/sja1000/Makefile
+++ b/drivers/net/can/sja1000/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_CAN_SJA1000) += sja1000.o
 obj-$(CONFIG_CAN_SJA1000_PLATFORM) += sja1000_platform.o
+obj-$(CONFIG_CAN_SJA1000_OF_PLATFORM) += sja1000_of_platform.o
 obj-$(CONFIG_CAN_EMS_PCI) += ems_pci.o
 obj-$(CONFIG_CAN_KVASER_PCI) += kvaser_pci.o
 
diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c
new file mode 100644
index 00000000000..aa953fb4b8d
--- /dev/null
+++ b/drivers/net/can/sja1000/sja1000_of_platform.c
@@ -0,0 +1,233 @@
+/*
+ * Driver for SJA1000 CAN controllers on the OpenFirmware platform bus
+ *
+ * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/* This is a generic driver for SJA1000 chips on the OpenFirmware platform
+ * bus found on embedded PowerPC systems. You need a SJA1000 CAN node
+ * definition in your flattened device tree source (DTS) file similar to:
+ *
+ *   can@3,100 {
+ *           compatible = "nxp,sja1000";
+ *           reg = <3 0x100 0x80>;
+ *           interrupts = <2 0>;
+ *           interrupt-parent = <&mpic>;
+ *           nxp,external-clock-frequency = <16000000>;
+ *   };
+ *
+ * See "Documentation/powerpc/dts-bindings/can/sja1000.txt" for further
+ * information.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+
+#include <linux/of_platform.h>
+#include <asm/prom.h>
+
+#include "sja1000.h"
+
+#define DRV_NAME "sja1000_of_platform"
+
+MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
+MODULE_DESCRIPTION("Socket-CAN driver for SJA1000 on the OF platform bus");
+MODULE_LICENSE("GPL v2");
+
+#define SJA1000_OFP_CAN_CLOCK  (16000000 / 2)
+
+#define SJA1000_OFP_OCR        OCR_TX0_PULLDOWN
+#define SJA1000_OFP_CDR        (CDR_CBP | CDR_CLK_OFF)
+
+static u8 sja1000_ofp_read_reg(const struct sja1000_priv *priv, int reg)
+{
+	return in_8(priv->reg_base + reg);
+}
+
+static void sja1000_ofp_write_reg(const struct sja1000_priv *priv,
+				  int reg, u8 val)
+{
+	out_8(priv->reg_base + reg, val);
+}
+
+static int __devexit sja1000_ofp_remove(struct of_device *ofdev)
+{
+	struct net_device *dev = dev_get_drvdata(&ofdev->dev);
+	struct sja1000_priv *priv = netdev_priv(dev);
+	struct device_node *np = ofdev->node;
+	struct resource res;
+
+	dev_set_drvdata(&ofdev->dev, NULL);
+
+	unregister_sja1000dev(dev);
+	free_sja1000dev(dev);
+	iounmap(priv->reg_base);
+	irq_dispose_mapping(dev->irq);
+
+	of_address_to_resource(np, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+
+	return 0;
+}
+
+static int __devinit sja1000_ofp_probe(struct of_device *ofdev,
+				       const struct of_device_id *id)
+{
+	struct device_node *np = ofdev->node;
+	struct net_device *dev;
+	struct sja1000_priv *priv;
+	struct resource res;
+	const u32 *prop;
+	int err, irq, res_size, prop_size;
+	void __iomem *base;
+
+	err = of_address_to_resource(np, 0, &res);
+	if (err) {
+		dev_err(&ofdev->dev, "invalid address\n");
+		return err;
+	}
+
+	res_size = resource_size(&res);
+
+	if (!request_mem_region(res.start, res_size, DRV_NAME)) {
+		dev_err(&ofdev->dev, "couldn't request %#x..%#x\n",
+			res.start, res.end);
+		return -EBUSY;
+	}
+
+	base = ioremap_nocache(res.start, res_size);
+	if (!base) {
+		dev_err(&ofdev->dev, "couldn't ioremap %#x..%#x\n",
+			res.start, res.end);
+		err = -ENOMEM;
+		goto exit_release_mem;
+	}
+
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq == NO_IRQ) {
+		dev_err(&ofdev->dev, "no irq found\n");
+		err = -ENODEV;
+		goto exit_unmap_mem;
+	}
+
+	dev = alloc_sja1000dev(0);
+	if (!dev) {
+		err = -ENOMEM;
+		goto exit_dispose_irq;
+	}
+
+	priv = netdev_priv(dev);
+
+	priv->read_reg = sja1000_ofp_read_reg;
+	priv->write_reg = sja1000_ofp_write_reg;
+
+	prop = of_get_property(np, "nxp,external-clock-frequency", &prop_size);
+	if (prop && (prop_size ==  sizeof(u32)))
+		priv->can.clock.freq = *prop / 2;
+	else
+		priv->can.clock.freq = SJA1000_OFP_CAN_CLOCK; /* default */
+
+	prop = of_get_property(np, "nxp,tx-output-mode", &prop_size);
+	if (prop && (prop_size == sizeof(u32)))
+		priv->ocr |= *prop & OCR_MODE_MASK;
+	else
+		priv->ocr |= OCR_MODE_NORMAL; /* default */
+
+	prop = of_get_property(np, "nxp,tx-output-config", &prop_size);
+	if (prop && (prop_size == sizeof(u32)))
+		priv->ocr |= (*prop << OCR_TX_SHIFT) & OCR_TX_MASK;
+	else
+		priv->ocr |= OCR_TX0_PULLDOWN; /* default */
+
+	prop = of_get_property(np, "nxp,clock-out-frequency", &prop_size);
+	if (prop && (prop_size == sizeof(u32)) && *prop) {
+		u32 divider = priv->can.clock.freq * 2 / *prop;
+
+		if (divider > 1)
+			priv->cdr |= divider / 2 - 1;
+		else
+			priv->cdr |= CDR_CLKOUT_MASK;
+	} else {
+		priv->cdr |= CDR_CLK_OFF; /* default */
+	}
+
+	prop = of_get_property(np, "nxp,no-comparator-bypass", NULL);
+	if (!prop)
+		priv->cdr |= CDR_CBP; /* default */
+
+	priv->irq_flags = IRQF_SHARED;
+	priv->reg_base = base;
+
+	dev->irq = irq;
+
+	dev_info(&ofdev->dev,
+		 "reg_base=0x%p irq=%d clock=%d ocr=0x%02x cdr=0x%02x\n",
+		 priv->reg_base, dev->irq, priv->can.clock.freq,
+		 priv->ocr, priv->cdr);
+
+	dev_set_drvdata(&ofdev->dev, dev);
+	SET_NETDEV_DEV(dev, &ofdev->dev);
+
+	err = register_sja1000dev(dev);
+	if (err) {
+		dev_err(&ofdev->dev, "registering %s failed (err=%d)\n",
+			DRV_NAME, err);
+		goto exit_free_sja1000;
+	}
+
+	return 0;
+
+exit_free_sja1000:
+	free_sja1000dev(dev);
+exit_dispose_irq:
+	irq_dispose_mapping(irq);
+exit_unmap_mem:
+	iounmap(base);
+exit_release_mem:
+	release_mem_region(res.start, res_size);
+
+	return err;
+}
+
+static struct of_device_id __devinitdata sja1000_ofp_table[] = {
+	{.compatible = "nxp,sja1000"},
+	{},
+};
+
+static struct of_platform_driver sja1000_ofp_driver = {
+	.owner = THIS_MODULE,
+	.name = DRV_NAME,
+	.probe = sja1000_ofp_probe,
+	.remove = __devexit_p(sja1000_ofp_remove),
+	.match_table = sja1000_ofp_table,
+};
+
+static int __init sja1000_ofp_init(void)
+{
+	return of_register_platform_driver(&sja1000_ofp_driver);
+}
+module_init(sja1000_ofp_init);
+
+static void __exit sja1000_ofp_exit(void)
+{
+	return of_unregister_platform_driver(&sja1000_ofp_driver);
+};
+module_exit(sja1000_ofp_exit);
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 37966e630ff..01ee2aeb048 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -13,6 +13,7 @@
 #define OCR_MODE_TEST     0x01
 #define OCR_MODE_NORMAL   0x02
 #define OCR_MODE_CLOCK    0x03
+#define OCR_MODE_MASK     0x07
 #define OCR_TX0_INVERT    0x04
 #define OCR_TX0_PULLDOWN  0x08
 #define OCR_TX0_PULLUP    0x10
@@ -21,6 +22,8 @@
 #define OCR_TX1_PULLDOWN  0x40
 #define OCR_TX1_PULLUP    0x80
 #define OCR_TX1_PUSHPULL  0xc0
+#define OCR_TX_MASK       0xfc
+#define OCR_TX_SHIFT      2
 
 struct sja1000_platform_data {
 	u32 clock;	/* CAN bus oscillator frequency in Hz */
-- 
cgit v1.2.3-70-g09d2


From 56d417b12e57dfe11c9b7ba4bea3882c62a55815 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Mon, 1 Jun 2009 03:07:33 -0700
Subject: IPv6: Add 'autoconf' and 'disable_ipv6' module parameters

Add 'autoconf' and 'disable_ipv6' parameters to the IPv6 module.

The first controls if IPv6 addresses are autoconfigured from
prefixes received in Router Advertisements.  The IPv6 loopback
(::1) and link-local addresses are still configured.

The second controls if IPv6 addresses are desired at all.  No
IPv6 addresses will be added to any interfaces.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++
 Documentation/networking/ipv6.txt      | 37 +++++++++++++++
 include/linux/ipv6.h                   |  6 +++
 net/ipv6/addrconf.c                    | 83 ++++++++++++++++++++++++++++++++--
 net/ipv6/af_inet6.c                    | 22 +++++++--
 5 files changed, 145 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 3ffd233c369..8be76235fe6 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1057,6 +1057,13 @@ disable_ipv6 - BOOLEAN
 	address.
 	Default: FALSE (enable IPv6 operation)
 
+	When this value is changed from 1 to 0 (IPv6 is being enabled),
+	it will dynamically create a link-local address on the given
+	interface and start Duplicate Address Detection, if necessary.
+
+	When this value is changed from 0 to 1 (IPv6 is being disabled),
+	it will dynamically delete all address on the given interface.
+
 accept_dad - INTEGER
 	Whether to accept DAD (Duplicate Address Detection).
 	0: Disable DAD
diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt
index 268e5c103dd..9fd7e21296c 100644
--- a/Documentation/networking/ipv6.txt
+++ b/Documentation/networking/ipv6.txt
@@ -33,3 +33,40 @@ disable
 
 		A reboot is required to enable IPv6.
 
+autoconf
+
+	Specifies whether to enable IPv6 address autoconfiguration
+	on all interfaces.  This might be used when one does not wish
+	for addresses to be automatically generated from prefixes
+	received in Router Advertisements.
+
+	The possible values and their effects are:
+
+	0
+		IPv6 address autoconfiguration is disabled on all interfaces.
+
+		Only the IPv6 loopback address (::1) and link-local addresses
+		will be added to interfaces.
+
+	1
+		IPv6 address autoconfiguration is enabled on all interfaces.
+
+		This is the default value.
+
+disable_ipv6
+
+	Specifies whether to disable IPv6 on all interfaces.
+	This might be used when no IPv6 addresses are desired.
+
+	The possible values and their effects are:
+
+	0
+		IPv6 is enabled on all interfaces.
+
+		This is the default value.
+
+	1
+		IPv6 is disabled on all interfaces.
+
+		No IPv6 addresses will be added to interfaces.
+
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 476d9464ac8..c662efa6828 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -169,6 +169,12 @@ struct ipv6_devconf {
 	__s32		accept_dad;
 	void		*sysctl;
 };
+
+struct ipv6_params {
+	__s32 disable_ipv6;
+	__s32 autoconf;
+};
+extern struct ipv6_params ipv6_defaults;
 #endif
 
 /* index values for the variables in ipv6_devconf */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 31938e5fb22..c3488372f12 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -591,7 +591,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct rt6_info *rt;
-	struct net *net = dev_net(idev->dev);
 	int hash;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
@@ -608,7 +607,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 		goto out2;
 	}
 
-	if (idev->cnf.disable_ipv6 || net->ipv6.devconf_all->disable_ipv6) {
+	if (idev->cnf.disable_ipv6) {
 		err = -EACCES;
 		goto out2;
 	}
@@ -1752,6 +1751,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	__u32 prefered_lft;
 	int addr_type;
 	struct inet6_dev *in6_dev;
+	struct net *net = dev_net(dev);
 
 	pinfo = (struct prefix_info *) opt;
 
@@ -1809,7 +1809,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 		if (addrconf_finite_timeout(rt_expires))
 			rt_expires *= HZ;
 
-		rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
+		rt = rt6_lookup(net, &pinfo->prefix, NULL,
 				dev->ifindex, 1);
 
 		if (rt && addrconf_is_prefix_route(rt)) {
@@ -1846,7 +1846,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 		struct inet6_ifaddr * ifp;
 		struct in6_addr addr;
 		int create = 0, update_lft = 0;
-		struct net *net = dev_net(dev);
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
@@ -3988,6 +3987,75 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
 	return addrconf_fixup_forwarding(table, valp, val);
 }
 
+static void dev_disable_change(struct inet6_dev *idev)
+{
+	if (!idev || !idev->dev)
+		return;
+
+	if (idev->cnf.disable_ipv6)
+		addrconf_notify(NULL, NETDEV_DOWN, idev->dev);
+	else
+		addrconf_notify(NULL, NETDEV_UP, idev->dev);
+}
+
+static void addrconf_disable_change(struct net *net, __s32 newf)
+{
+	struct net_device *dev;
+	struct inet6_dev *idev;
+
+	read_lock(&dev_base_lock);
+	for_each_netdev(net, dev) {
+		rcu_read_lock();
+		idev = __in6_dev_get(dev);
+		if (idev) {
+			int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
+			idev->cnf.disable_ipv6 = newf;
+			if (changed)
+				dev_disable_change(idev);
+		}
+		rcu_read_unlock();
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
+{
+	struct net *net;
+
+	net = (struct net *)table->extra2;
+
+	if (p == &net->ipv6.devconf_dflt->disable_ipv6)
+		return 0;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (p == &net->ipv6.devconf_all->disable_ipv6) {
+		__s32 newf = net->ipv6.devconf_all->disable_ipv6;
+		net->ipv6.devconf_dflt->disable_ipv6 = newf;
+		addrconf_disable_change(net, newf);
+	} else if ((!*p) ^ (!old))
+		dev_disable_change((struct inet6_dev *)table->extra1);
+
+	rtnl_unlock();
+	return 0;
+}
+
+static
+int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+			    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = ctl->data;
+	int val = *valp;
+	int ret;
+
+	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write)
+		ret = addrconf_disable_ipv6(ctl, valp, val);
+	return ret;
+}
+
 static struct addrconf_sysctl_table
 {
 	struct ctl_table_header *sysctl_header;
@@ -4225,7 +4293,8 @@ static struct addrconf_sysctl_table
 			.data		=	&ipv6_devconf.disable_ipv6,
 			.maxlen		=	sizeof(int),
 			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.proc_handler	=	addrconf_sysctl_disable,
+			.strategy	=	sysctl_intvec,
 		},
 		{
 			.ctl_name	=	CTL_UNNUMBERED,
@@ -4346,6 +4415,10 @@ static int addrconf_init_net(struct net *net)
 		dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
 		if (dflt == NULL)
 			goto err_alloc_dflt;
+	} else {
+		/* these will be inherited by all namespaces */
+		dflt->autoconf = ipv6_defaults.autoconf;
+		dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
 	}
 
 	net->ipv6.devconf_all = all;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b6215be0963..85b3d0036af 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -72,9 +72,21 @@ MODULE_LICENSE("GPL");
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
-static int disable_ipv6 = 0;
-module_param_named(disable, disable_ipv6, int, 0);
-MODULE_PARM_DESC(disable, "Disable IPv6 such that it is non-functional");
+struct ipv6_params ipv6_defaults = {
+	.disable_ipv6 = 0,
+	.autoconf = 1,
+};
+
+static int disable_ipv6_mod = 0;
+
+module_param_named(disable, disable_ipv6_mod, int, 0444);
+MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
+
+module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
+MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
+
+module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
+MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
 
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
@@ -1038,7 +1050,7 @@ static int __init inet6_init(void)
 	for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
 		INIT_LIST_HEAD(r);
 
-	if (disable_ipv6) {
+	if (disable_ipv6_mod) {
 		printk(KERN_INFO
 		       "IPv6: Loaded, but administratively disabled, "
 		       "reboot required to enable\n");
@@ -1227,7 +1239,7 @@ module_init(inet6_init);
 
 static void __exit inet6_exit(void)
 {
-	if (disable_ipv6)
+	if (disable_ipv6_mod)
 		return;
 
 	/* First of all disallow new sockets creation. */
-- 
cgit v1.2.3-70-g09d2


From 22a4f650d686eeaac3629dae1c4294381485efdf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 1 Jun 2009 10:13:37 +0200
Subject: perf_counter: Tidy up style details

 - whitespace fixlets
 - make local variable definitions more consistent

[ Impact: cleanup ]

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 +-
 kernel/perf_counter.c        | 39 +++++++++++++++++++++------------------
 2 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 81ec79c9f19..0e57d8cc5a3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -562,7 +562,7 @@ struct perf_cpu_context {
 	 *
 	 * task, softirq, irq, nmi context
 	 */
-	int			recursion[4];
+	int				recursion[4];
 };
 
 #ifdef CONFIG_PERF_COUNTERS
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ff8b4636f84..df319c48c52 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -16,8 +16,9 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/sysfs.h>
-#include <linux/ptrace.h>
+#include <linux/dcache.h>
 #include <linux/percpu.h>
+#include <linux/ptrace.h>
 #include <linux/vmstat.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -26,7 +27,6 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_counter.h>
-#include <linux/dcache.h>
 
 #include <asm/irq_regs.h>
 
@@ -65,7 +65,9 @@ void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
-int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
+
+int __weak
+hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_counter_context *ctx, int cpu)
 {
@@ -127,8 +129,8 @@ static void put_ctx(struct perf_counter_context *ctx)
  * This has to cope with with the fact that until it is locked,
  * the context could get moved to another task.
  */
-static struct perf_counter_context *perf_lock_task_context(
-				struct task_struct *task, unsigned long *flags)
+static struct perf_counter_context *
+perf_lock_task_context(struct task_struct *task, unsigned long *flags)
 {
 	struct perf_counter_context *ctx;
 
@@ -1330,9 +1332,9 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 
 static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 {
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
 	struct perf_counter_context *parent_ctx;
+	struct perf_counter_context *ctx;
+	struct perf_cpu_context *cpuctx;
 	struct task_struct *task;
 	unsigned long flags;
 	int err;
@@ -1664,8 +1666,8 @@ int perf_counter_task_disable(void)
  */
 void perf_counter_update_userpage(struct perf_counter *counter)
 {
-	struct perf_mmap_data *data;
 	struct perf_counter_mmap_page *userpg;
+	struct perf_mmap_data *data;
 
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
@@ -1769,10 +1771,11 @@ fail:
 
 static void __perf_mmap_data_free(struct rcu_head *rcu_head)
 {
-	struct perf_mmap_data *data = container_of(rcu_head,
-			struct perf_mmap_data, rcu_head);
+	struct perf_mmap_data *data;
 	int i;
 
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+
 	free_page((unsigned long)data->user_page);
 	for (i = 0; i < data->nr_pages; i++)
 		free_page((unsigned long)data->data_pages[i]);
@@ -1801,8 +1804,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	struct perf_counter *counter = vma->vm_file->private_data;
 
 	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
-				      &counter->mmap_mutex)) {
+	if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) {
 		struct user_struct *user = current_user();
 
 		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
@@ -1821,11 +1823,11 @@ static struct vm_operations_struct perf_mmap_vmops = {
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
+	unsigned long locked, lock_limit;
 	unsigned long vma_size;
 	unsigned long nr_pages;
-	unsigned long user_locked, user_lock_limit;
-	unsigned long locked, lock_limit;
 	long user_extra, extra;
 	int ret = 0;
 
@@ -1900,8 +1902,8 @@ unlock:
 
 static int perf_fasync(int fd, struct file *filp, int on)
 {
-	struct perf_counter *counter = filp->private_data;
 	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct perf_counter *counter = filp->private_data;
 	int retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -2412,8 +2414,8 @@ static void perf_counter_output(struct perf_counter *counter,
  */
 
 struct perf_comm_event {
-	struct task_struct 	*task;
-	char 			*comm;
+	struct task_struct	*task;
+	char			*comm;
 	int			comm_size;
 
 	struct {
@@ -2932,6 +2934,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 			       int nmi, struct pt_regs *regs, u64 addr)
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
+
 	if (counter->hw.irq_period && !neg)
 		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
@@ -3526,7 +3529,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * Make the child state follow the state of the parent counter,
 	 * not its hw_event.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en,dis}able_family.
+	 * so we won't race with perf_counter_{en, dis}able_family.
 	 */
 	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-- 
cgit v1.2.3-70-g09d2


From fb39125fd79a25c5002f3b45cf4c80e3fa6b961b Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Fri, 17 Apr 2009 15:15:51 +0800
Subject: ftrace, workqueuetrace: make workqueue tracepoints use TRACE_EVENT
 macro

v3: zhaolei@cn.fujitsu.com: Change TRACE_EVENT definition to new format
    introduced by Steven Rostedt: consolidate trace and trace_event headers
v2: kosaki@jp.fujitsu.com: print the function names instead of addr, and zap
    the work addr
v1: zhaolei@cn.fujitsu.com: Make workqueue tracepoints use TRACE_EVENT macro

TRACE_EVENT is a more generic way to define tracepoints.
Doing so adds these new capabilities to the tracepoints:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions

Then, this patch converts DEFINE_TRACE to TRACE_EVENT in workqueue related
tracepoints.

[ Impact: expand workqueue tracer to events tracing ]

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/workqueue.h | 100 +++++++++++++++++++++++++++++++++++++++
 include/trace/workqueue.h        |  25 ----------
 kernel/trace/trace_workqueue.c   |   2 +-
 kernel/workqueue.c               |  11 +----
 4 files changed, 103 insertions(+), 35 deletions(-)
 create mode 100644 include/trace/events/workqueue.h
 delete mode 100644 include/trace/workqueue.h

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 00000000000..035f1bff288
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,100 @@
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+	TP_ARGS(wq_thread, work),
+
+	TP_STRUCT__entry(
+		__array(char,		thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,		thread_pid)
+		__field(work_func_t,	func)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->func		= work->func;
+	),
+
+	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+		__entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+	TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+	TP_ARGS(wq_thread, cpu),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+		__field(int,	cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+		__entry->cpu		= cpu;
+	),
+
+	TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+		__entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+	TP_PROTO(struct task_struct *wq_thread),
+
+	TP_ARGS(wq_thread),
+
+	TP_STRUCT__entry(
+		__array(char,	thread_comm,	TASK_COMM_LEN)
+		__field(pid_t,	thread_pid)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+		__entry->thread_pid	= wq_thread->pid;
+	),
+
+	TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644
index 7626523deeb..00000000000
--- a/include/trace/workqueue.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TRACE_WORKQUEUE_H
-#define __TRACE_WORKQUEUE_H
-
-#include <linux/tracepoint.h>
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-
-DECLARE_TRACE(workqueue_insertion,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-	   TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-	   TP_PROTO(struct task_struct *wq_thread, int cpu),
-	   TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-	   TP_PROTO(struct task_struct *wq_thread),
-	   TP_ARGS(wq_thread));
-
-#endif /* __TRACE_WORKQUEUE_H */
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 984b9175c13..cfe56d31d85 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -6,7 +6,7 @@
  */
 
 
-#include <trace/workqueue.h>
+#include <trace/events/workqueue.h>
 #include <linux/list.h>
 #include <linux/percpu.h>
 #include "trace_stat.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f71fb2a0895..0668795d881 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,7 +33,8 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#include <trace/workqueue.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-DEFINE_TRACE(workqueue_insertion);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head)
 {
@@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
-
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 	return cwq;
 }
 
-DEFINE_TRACE(workqueue_creation);
-
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-DEFINE_TRACE(workqueue_destruction);
-
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
-- 
cgit v1.2.3-70-g09d2


From 6e25db44a7ad7eb380f4ec774ec00a8fcddea112 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 29 May 2009 11:24:59 +0800
Subject: tracing/events: fix a typo in __string() format output

"tsize" should be "\tsize". Also remove the space before "__str_loc".

Before:
 # cat tracing/events/irq/irq_handler_entry/format
        ...
        field:int irq;  offset:12;      size:4;
        field: __str_loc name;  offset:16;tsize:2;
        ...

After:
 # cat tracing/events/irq/irq_handler_entry/format
	...
        field:int irq;  offset:12;      size:4;
        field:__str_loc name;   offset:16;      size:2;
	...

[ Impact: standardize __string field description in events format file ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b4ec83ae711..9276ec4f34d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -209,8 +209,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 
 #undef __string
 #define __string(item, src)						       \
-	ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t"	       \
-			       "offset:%u;tsize:%u;\n",			       \
+	ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t"	       \
+			       "offset:%u;\tsize:%u;\n",		       \
 			       (unsigned int)offsetof(typeof(field),	       \
 					__str_loc_##item),		       \
 			       (unsigned int)sizeof(field.__str_loc_##item));  \
-- 
cgit v1.2.3-70-g09d2


From a9c1c3abe1160a5632e48c929b02b740556bf423 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 1 Jun 2009 15:35:13 +0800
Subject: tracing/events: put TP_fast_assign into braces

Currently TP_fast_assign has a limitation that we can't define local
variables in it.

Here's one use case when we introduce __dynamic_array():

TP_fast_assign(
	type *p = __get_dynamic_array(item);

	foo(p);
	bar(p);
),

[ Impact: allow defining local variables in TP_fast_assign ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2384B1.90100@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 9276ec4f34d..ee926822244 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -471,7 +471,7 @@ static void ftrace_raw_event_##call(proto)				\
 		return;							\
 	entry	= ring_buffer_event_data(event);			\
 									\
-	assign;								\
+	{ assign; }							\
 									\
 	if (!filter_current_check_discard(event_call, entry, event))	\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
-- 
cgit v1.2.3-70-g09d2


From 7fcb7c472f455d1711eb5a7633204dba8800a6d6 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 1 Jun 2009 15:35:46 +0800
Subject: tracing/events: introduce __dynamic_array()

__string() is limited:

  - it's a char array, but we may want to define array with other types
  - a source string should be available, but we may just know the string size

We introduce __dynamic_array() to break those limitations, and __string()
becomes a wrapper of it. As a side effect, now __get_str() can be used
in TP_fast_assign but not only TP_print.

Take XFS for example, we have the string length in the dirent, but the
string itself is not NULL-terminated, so __dynamic_array() can be used:

TRACE_EVENT(xfs_dir2,
	TP_PROTO(struct xfs_da_args *args),
	TP_ARGS(args),

	TP_STRUCT__entry(
		__field(int, namelen)
		__dynamic_array(char, name, args->namelen + 1)
		...
	),

	TP_fast_assign(
		char *name = __get_str(name);

		if (args->namelen)
			memcpy(name, args->name, args->namelen);
		name[args->namelen] = '\0';

		__entry->namelen = args->namelen;
	),

	TP_printk("name %.*s namelen %d",
		  __entry->namelen ? __get_str(name) : NULL
		  __entry->namelen)
);

[ Impact: allow defining dynamic size arrays ]

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2384D2.3080403@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h             | 122 ++++++++++++++++++++++++++-----------
 kernel/trace/trace_events_filter.c |   6 +-
 2 files changed, 91 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index ee926822244..b5478dab579 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -18,14 +18,17 @@
 
 #include <linux/ftrace_event.h>
 
+#undef __field
+#define __field(type, item)		type	item;
+
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
-#undef __field
-#define __field(type, item)		type	item;
+#undef __dynamic_array
+#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
 
 #undef __string
-#define __string(item, src)		unsigned short	__str_loc_##item;
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
@@ -35,7 +38,7 @@
 	struct ftrace_raw_##name {				\
 		struct trace_entry	ent;			\
 		tstruct						\
-		char			__str_data[0];		\
+		char			__data[0];		\
 	};							\
 	static struct ftrace_event_call event_##name
 
@@ -47,30 +50,31 @@
  *
  * Include the following:
  *
- * struct ftrace_str_offsets_<call> {
- *	int				<str1>;
- *	int				<str2>;
+ * struct ftrace_data_offsets_<call> {
+ *	int				<item1>;
+ *	int				<item2>;
  *	[...]
  * };
  *
- * The __string() macro will create each int <str>, this is to
- * keep the offset of each string from the beggining of the event
- * once we perform the strlen() of the src strings.
- *
+ * The __dynamic_array() macro will create each int <item>, this is
+ * to keep the offset of each array from the beginning of the event.
  */
 
+#undef __field
+#define __field(type, item);
+
 #undef __array
 #define __array(type, item, len)
 
-#undef __field
-#define __field(type, item);
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)	int item;
 
 #undef __string
-#define __string(item, src)	int item;
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-	struct ftrace_str_offsets_##call {				\
+	struct ftrace_data_offsets_##call {				\
 		tstruct;						\
 	};
 
@@ -119,8 +123,12 @@
 #undef TP_printk
 #define TP_printk(fmt, args...) fmt "\n", args
 
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + __entry->__data_loc_##field)
+
 #undef __get_str
-#define __get_str(field)	((char *)__entry + __entry->__str_loc_##field)
+#define __get_str(field) (char *)__get_dynamic_array(field)
 
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
@@ -207,16 +215,19 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	if (!ret)							\
 		return 0;
 
-#undef __string
-#define __string(item, src)						       \
-	ret = trace_seq_printf(s, "\tfield:__str_loc " #item ";\t"	       \
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
 			       "offset:%u;\tsize:%u;\n",		       \
 			       (unsigned int)offsetof(typeof(field),	       \
-					__str_loc_##item),		       \
-			       (unsigned int)sizeof(field.__str_loc_##item));  \
+					__data_loc_##item),		       \
+			       (unsigned int)sizeof(field.__data_loc_##item)); \
 	if (!ret)							       \
 		return 0;
 
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
 #undef __entry
 #define __entry REC
 
@@ -260,11 +271,14 @@ ftrace_format_##call(struct trace_seq *s)				\
 	if (ret)							\
 		return ret;
 
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
+				offsetof(typeof(field), __data_loc_##item),    \
+				 sizeof(field.__data_loc_##item), 0);
+
 #undef __string
-#define __string(item, src)						       \
-	ret = trace_define_field(event_call, "__str_loc", #item,	       \
-				offsetof(typeof(field), __str_loc_##item),     \
-				 sizeof(field.__str_loc_##item), 0);
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
@@ -288,6 +302,43 @@ ftrace_define_fields_##call(void)					\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+/*
+ * remember the offset of each array from the beginning of the event.
+ */
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__data_offsets->item = __data_size +				\
+			       offsetof(typeof(*entry), __data);	\
+	__data_size += (len) * sizeof(type);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+static inline int ftrace_get_offsets_##call(				\
+	struct ftrace_data_offsets_##call *__data_offsets, proto)       \
+{									\
+	int __data_size = 0;						\
+	struct ftrace_raw_##call __maybe_unused *entry;			\
+									\
+	tstruct;							\
+									\
+	return __data_size;						\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
 /*
  * Stage 4 of the trace events.
  *
@@ -432,15 +483,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 #undef __array
 #define __array(type, item, len)
 
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				\
+	__entry->__data_loc_##item = __data_offsets.item;
+
 #undef __string
-#define __string(item, src)						\
-	__str_offsets.item = __str_size +				\
-			     offsetof(typeof(*entry), __str_data);	\
-	__str_size += strlen(src) + 1;
+#define __string(item, src) __dynamic_array(char, item, -1)       	\
 
 #undef __assign_str
 #define __assign_str(dst, src)						\
-	__entry->__str_loc_##dst = __str_offsets.dst;			\
 	strcpy(__get_str(dst), src);
 
 #undef TRACE_EVENT
@@ -451,26 +502,29 @@ static struct ftrace_event_call event_##call;				\
 									\
 static void ftrace_raw_event_##call(proto)				\
 {									\
-	struct ftrace_str_offsets_##call __maybe_unused __str_offsets;	\
+	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
 	unsigned long irq_flags;					\
-	int __str_size = 0;						\
+	int __data_size;						\
 	int pc;								\
 									\
 	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
-	tstruct;							\
+	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 									\
 	event = trace_current_buffer_lock_reserve(event_##call.id,	\
-				 sizeof(struct ftrace_raw_##call) + __str_size,\
+				 sizeof(*entry) + __data_size,		\
 				 irq_flags, pc);			\
 	if (!event)							\
 		return;							\
 	entry	= ring_buffer_event_data(event);			\
 									\
+									\
+	tstruct								\
+									\
 	{ assign; }							\
 									\
 	if (!filter_current_check_discard(event_call, entry, event))	\
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a7430b16d24..db6e54bdb59 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -478,12 +478,12 @@ enum {
 
 static int is_string_field(const char *type)
 {
+	if (strstr(type, "__data_loc") && strstr(type, "char"))
+		return FILTER_DYN_STRING;
+
 	if (strchr(type, '[') && strstr(type, "char"))
 		return FILTER_STATIC_STRING;
 
-	if (!strcmp(type, "__str_loc"))
-		return FILTER_DYN_STRING;
-
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 1d080d6c3141623c92caaebe20e847cb99ccbb60 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 1 Jun 2009 12:20:40 -0400
Subject: tracing: remove redundant SOFTIRQ from softirq event traces

After converting the softirq tracer to use te flags options, this
caused a regression with the name. Since the flag was used directly
it was printed out (i.e. HRTIMER_SOFTIRQ).

This patch only shows the softirq name without the SOFTIRQ part.

[ Impact: fix regression of output from softirq events ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/irq.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 683fb36a994..b0c7ede55eb 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -7,18 +7,18 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM irq
 
-#define softirq_name(sirq) { sirq, #sirq }
-#define show_softirq_name(val)						\
-	__print_symbolic(val,						\
-			 softirq_name(HI_SOFTIRQ),			\
-			 softirq_name(TIMER_SOFTIRQ),			\
-			 softirq_name(NET_TX_SOFTIRQ),			\
-			 softirq_name(NET_RX_SOFTIRQ),			\
-			 softirq_name(BLOCK_SOFTIRQ),			\
-			 softirq_name(TASKLET_SOFTIRQ),			\
-			 softirq_name(SCHED_SOFTIRQ),			\
-			 softirq_name(HRTIMER_SOFTIRQ),			\
-			 softirq_name(RCU_SOFTIRQ))
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)			\
+	__print_symbolic(val,			\
+			 softirq_name(HI),	\
+			 softirq_name(TIMER),	\
+			 softirq_name(NET_TX),	\
+			 softirq_name(NET_RX),	\
+			 softirq_name(BLOCK),	\
+			 softirq_name(TASKLET),	\
+			 softirq_name(SCHED),	\
+			 softirq_name(HRTIMER),	\
+			 softirq_name(RCU))
 
 /**
  * irq_handler_entry - called immediately before the irq action handler
-- 
cgit v1.2.3-70-g09d2


From 112f38a7e36e9d688b389507136bf3af3e6d159b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 1 Jun 2009 15:16:05 -0400
Subject: tracing: make trace pipe recognize latency format flag

The trace_pipe did not recognize the latency format flag and would produce
different output than the trace file. The problem was partly due that
the trace flags in the iterator was not set as well as the trace_pipe
zeros out part of the iterator (including the flags) to be able to use
the same routines as the trace file. trace_flags of the iterator should
not cause any problems when not zeroed out by for trace_pipe.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 2 +-
 kernel/trace/trace.c         | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bbf40f624fc..5c093ffc655 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -51,6 +51,7 @@ struct trace_iterator {
 	int			cpu_file;
 	struct mutex		mutex;
 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+	unsigned long		iter_flags;
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
@@ -58,7 +59,6 @@ struct trace_iterator {
 	int			cpu;
 	u64			ts;
 
-	unsigned long		iter_flags;
 	loff_t			pos;
 	long			idx;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a3a8a87d7e9..cae34c69752 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2826,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	/* trace pipe does not show start of buffer */
 	cpumask_setall(iter->started);
 
+	if (trace_flags & TRACE_ITER_LATENCY_FMT)
+		iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
 	iter->cpu_file = cpu_file;
 	iter->tr = &global_trace;
 	mutex_init(&iter->mutex);
-- 
cgit v1.2.3-70-g09d2


From f771bef98004d9d141b085d987a77d06669d4f4f Mon Sep 17 00:00:00 2001
From: Nivedita Singhvi <niv@us.ibm.com>
Date: Thu, 28 May 2009 07:00:46 +0000
Subject: ipv4: New multicast-all socket option

After some discussion offline with Christoph Lameter and David Stevens
regarding multicast behaviour in Linux, I'm submitting a slightly
modified patch from the one Christoph submitted earlier.

This patch provides a new socket option IP_MULTICAST_ALL.

In this case, default behaviour is _unchanged_ from the current
Linux standard. The socket option is set by default to provide
original behaviour. Sockets wishing to receive data only from
multicast groups they join explicitly will need to clear this
socket option.

Signed-off-by: Nivedita Singhvi <niv@us.ibm.com>
Signed-off-by: Christoph Lameter<cl@linux.com>
Acked-by: David Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h      |  1 +
 include/net/inet_sock.h |  3 ++-
 net/ipv4/af_inet.c      |  1 +
 net/ipv4/igmp.c         |  2 +-
 net/ipv4/ip_sockglue.c  | 11 +++++++++++
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/in.h b/include/linux/in.h
index d60122a3a08..cf196da04ec 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -107,6 +107,7 @@ struct in_addr {
 #define MCAST_JOIN_SOURCE_GROUP		46
 #define MCAST_LEAVE_SOURCE_GROUP	47
 #define MCAST_MSFILTER			48
+#define IP_MULTICAST_ALL		49
 
 #define MCAST_EXCLUDE	0
 #define MCAST_INCLUDE	1
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index de0ecc71cf0..20a6957af87 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -130,7 +130,8 @@ struct inet_sock {
 				freebind:1,
 				hdrincl:1,
 				mc_loop:1,
-				transparent:1;
+				transparent:1,
+				mc_all:1;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist	*mc_list;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5abee4c9744..d8736217858 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -375,6 +375,7 @@ lookup_protocol:
 	inet->uc_ttl	= -1;
 	inet->mc_loop	= 1;
 	inet->mc_ttl	= 1;
+	inet->mc_all	= 1;
 	inet->mc_index	= 0;
 	inet->mc_list	= NULL;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9eb6219af61..e6058a50379 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2196,7 +2196,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 			break;
 	}
 	if (!pmc)
-		return 1;
+		return inet->mc_all;
 	psl = pmc->sflist;
 	if (!psl)
 		return pmc->sfmode == MCAST_EXCLUDE;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 21b0187123d..cb49936856e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -453,6 +453,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
 	    optname == IP_MULTICAST_TTL ||
+	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
 	    optname == IP_RECVORIGDSTADDR) {
 		if (optlen >= sizeof(int)) {
@@ -898,6 +899,13 @@ mc_msf_out:
 		kfree(gsf);
 		break;
 	}
+	case IP_MULTICAST_ALL:
+		if (optlen < 1)
+			goto e_inval;
+		if (val != 0 && val != 1)
+			goto e_inval;
+		inet->mc_all = val;
+		break;
 	case IP_ROUTER_ALERT:
 		err = ip_ra_control(sk, val ? 1 : 0, NULL);
 		break;
@@ -1151,6 +1159,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 		release_sock(sk);
 		return err;
 	}
+	case IP_MULTICAST_ALL:
+		val = inet->mc_all;
+		break;
 	case IP_PKTOPTIONS:
 	{
 		struct msghdr msg;
-- 
cgit v1.2.3-70-g09d2


From 10a8ebbb08c4b08292598947bbe534e04d6ee705 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Tue, 2 Jun 2009 12:02:38 +0200
Subject: ALSA: Core - add snd_card_set_id() function

Introduce snd_card_set_id() function to allow lowlevel drivers to set
default identification name for card slot. The function checks also
for identification name collisions and tries to create unique name.

Also, the snd_card_create() function is simplified, because this new
function is used. As bonus, proper name collision checks are evaluated
at the card create time.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h |  1 +
 sound/core/init.c    | 62 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 40 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 3dea79829ac..0e8ae10155a 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -313,6 +313,7 @@ struct snd_card *snd_card_new(int idx, const char *id,
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
 int snd_card_free_when_closed(struct snd_card *card);
+void snd_card_set_id(struct snd_card *card, const char *id);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
 int snd_card_info_done(void);
diff --git a/sound/core/init.c b/sound/core/init.c
index fd56afe846e..6557dd85e19 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -152,15 +152,8 @@ int snd_card_create(int idx, const char *xid,
 	card = kzalloc(sizeof(*card) + extra_size, GFP_KERNEL);
 	if (!card)
 		return -ENOMEM;
-	if (xid) {
-		if (!snd_info_check_reserved_words(xid)) {
-			snd_printk(KERN_ERR
-				   "given id string '%s' is reserved.\n", xid);
-			err = -EBUSY;
-			goto __error;
-		}
-		strlcpy(card->id, xid, sizeof(card->id));
-	}
+	if (xid)
+		snd_card_set_id(card, xid);
 	err = 0;
 	mutex_lock(&snd_card_mutex);
 	if (idx < 0) {
@@ -483,22 +476,39 @@ int snd_card_free(struct snd_card *card)
 
 EXPORT_SYMBOL(snd_card_free);
 
-static void choose_default_id(struct snd_card *card)
+/**
+ *  snd_card_set_id - set card identification name
+ *  @card: soundcard structure
+ *  @nid: new identification string
+ *
+ *  This function sets the card identification and checks for name
+ *  collisions.
+ */
+void snd_card_set_id(struct snd_card *card, const char *nid)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
-	char *id, *spos;
+	const char *spos, *src;
+	char *id;
 	
-	id = spos = card->shortname;	
-	while (*id != '\0') {
-		if (*id == ' ')
-			spos = id + 1;
-		id++;
+	/* check if user specified own card->id */
+	if (card->id[0] != '\0')
+		return;
+	if (nid == NULL) {
+		id = card->shortname;
+		spos = src = id;
+		while (*id != '\0') {
+			if (*id == ' ')
+				spos = id + 1;
+			id++;
+		}
+	} else {
+		spos = src = nid;
 	}
 	id = card->id;
 	while (*spos != '\0' && !isalnum(*spos))
 		spos++;
 	if (isdigit(*spos))
-		*id++ = isalpha(card->shortname[0]) ? card->shortname[0] : 'D';
+		*id++ = isalpha(src[0]) ? src[0] : 'D';
 	while (*spos != '\0' && (size_t)(id - card->id) < sizeof(card->id) - 1) {
 		if (isalnum(*spos))
 			*id++ = *spos;
@@ -513,16 +523,20 @@ static void choose_default_id(struct snd_card *card)
 
 	while (1) {
 	      	if (loops-- == 0) {
-      			snd_printk(KERN_ERR "unable to choose default card id (%s)\n", id);
+			snd_printk(KERN_ERR "unable to set card id (%s)\n", id);
       			strcpy(card->id, card->proc_root->name);
       			return;
       		}
 	      	if (!snd_info_check_reserved_words(id))
       			goto __change;
+		mutex_lock(&snd_card_mutex);
 		for (i = 0; i < snd_ecards_limit; i++) {
-			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id))
+			if (snd_cards[i] && !strcmp(snd_cards[i]->id, id)) {
+				mutex_unlock(&snd_card_mutex);
 				goto __change;
+			}
 		}
+		mutex_unlock(&snd_card_mutex);
 		break;
 
 	      __change:
@@ -539,14 +553,16 @@ static void choose_default_id(struct snd_card *card)
 			spos = id + len - 2;
 			if ((size_t)len <= sizeof(card->id) - 2)
 				spos++;
-			*spos++ = '_';
-			*spos++ = '1';
-			*spos++ = '\0';
+			*(char *)spos++ = '_';
+			*(char *)spos++ = '1';
+			*(char *)spos++ = '\0';
 			idx_flag++;
 		}
 	}
 }
 
+EXPORT_SYMBOL(snd_card_set_id);
+
 #ifndef CONFIG_SYSFS_DEPRECATED
 static ssize_t
 card_id_show_attr(struct device *dev,
@@ -641,7 +657,7 @@ int snd_card_register(struct snd_card *card)
 		return 0;
 	}
 	if (card->id[0] == '\0')
-		choose_default_id(card);
+		snd_card_set_id(card, NULL);
 	snd_cards[card->number] = card;
 	mutex_unlock(&snd_card_mutex);
 	init_info_for_card(card);
-- 
cgit v1.2.3-70-g09d2


From 0e0ee1cc33de8f0cc603269b354085dee340afa0 Mon Sep 17 00:00:00 2001
From: Dmitry Pervushin <dpervushin@embeddedalley.com>
Date: Wed, 29 Apr 2009 19:29:38 +0400
Subject: UBI: add notification API

UBI volume notifications are intended to create the API to get clients
notified about volume creation/deletion, renaming and re-sizing. A
client can subscribe to these notifications using 'ubi_volume_register()'
and cancel the subscription using 'ubi_volume_unregister()'. When UBI
volumes change, a blocking notifier is called. Clients also can request
"added" events on all volumes that existed before client subscribed
to the notifications.

If we use notifications instead of calling functions like 'ubi_gluebi_xxx()',
we can make the MTD emulation layer to be more flexible: build it as a
separate module and load/unload it on demand.

[Artem: many cleanups, rework locking, add "updated" event, provide
 device/volume info in notifiers]

Signed-off-by: Dmitry Pervushin <dpervushin@embeddedalley.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c | 100 +++++++++++++++++++++++++++++++++++++++++---
 drivers/mtd/ubi/cdev.c  |   1 +
 drivers/mtd/ubi/kapi.c  | 108 +++++++++++++++++++++++++++++++++++++++++-------
 drivers/mtd/ubi/ubi.h   |  12 ++++++
 drivers/mtd/ubi/vmt.c   |   4 ++
 include/linux/mtd/ubi.h |  37 +++++++++++++++++
 6 files changed, 241 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index d3da6668266..964a99d48bc 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -121,6 +121,94 @@ static struct device_attribute dev_bgt_enabled =
 static struct device_attribute dev_mtd_num =
 	__ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL);
 
+/**
+ * ubi_volume_notify - send a volume change notification.
+ * @ubi: UBI device description object
+ * @vol: volume description object of the changed volume
+ * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc)
+ *
+ * This is a helper function which notifies all subscribers about a volume
+ * change event (creation, removal, re-sizing, re-naming, updating). Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype)
+{
+	struct ubi_notification nt;
+
+	ubi_do_get_device_info(ubi, &nt.di);
+	ubi_do_get_volume_info(ubi, vol, &nt.vi);
+	return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt);
+}
+
+/**
+ * ubi_notify_all - send a notification to all volumes.
+ * @ubi: UBI device description object
+ * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc)
+ * @nb: the notifier to call
+ *
+ * This function walks all volumes of UBI device @ubi and sends the @ntype
+ * notification for each volume. If @nb is %NULL, then all registered notifiers
+ * are called, otherwise only the @nb notifier is called. Returns the number of
+ * sent notifications.
+ */
+int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb)
+{
+	struct ubi_notification nt;
+	int i, count = 0;
+
+	ubi_do_get_device_info(ubi, &nt.di);
+
+	mutex_lock(&ubi->device_mutex);
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		/*
+		 * Since the @ubi->device is locked, and we are not going to
+		 * change @ubi->volumes, we do not have to lock
+		 * @ubi->volumes_lock.
+		 */
+		if (!ubi->volumes[i])
+			continue;
+
+		ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi);
+		if (nb)
+			nb->notifier_call(nb, ntype, &nt);
+		else
+			blocking_notifier_call_chain(&ubi_notifiers, ntype,
+						     &nt);
+		count += 1;
+	}
+	mutex_unlock(&ubi->device_mutex);
+
+	return count;
+}
+
+/**
+ * ubi_enumerate_volumes - send "add" notification for all existing volumes.
+ * @nb: the notifier to call
+ *
+ * This function walks all UBI devices and volumes and sends the
+ * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all
+ * registered notifiers are called, otherwise only the @nb notifier is called.
+ * Returns the number of sent notifications.
+ */
+int ubi_enumerate_volumes(struct notifier_block *nb)
+{
+	int i, count = 0;
+
+	/*
+	 * Since the @ubi_devices_mutex is locked, and we are not going to
+	 * change @ubi_devices, we do not have to lock @ubi_devices_lock.
+	 */
+	for (i = 0; i < UBI_MAX_DEVICES; i++) {
+		struct ubi_device *ubi = ubi_devices[i];
+
+		if (!ubi)
+			continue;
+		count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb);
+	}
+
+	return count;
+}
+
 /**
  * ubi_get_device - get UBI device.
  * @ubi_num: UBI device number
@@ -891,6 +979,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 	spin_unlock(&ubi->wl_lock);
 
 	ubi_devices[ubi_num] = ubi;
+	ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
 	return ubi_num;
 
 out_uif:
@@ -933,13 +1022,13 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
 	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
 		return -EINVAL;
 
-	spin_lock(&ubi_devices_lock);
-	ubi = ubi_devices[ubi_num];
-	if (!ubi) {
-		spin_unlock(&ubi_devices_lock);
+	ubi = ubi_get_device(ubi_num);
+	if (!ubi)
 		return -EINVAL;
-	}
 
+	spin_lock(&ubi_devices_lock);
+	put_device(&ubi->dev);
+	ubi->ref_count -= 1;
 	if (ubi->ref_count) {
 		if (!anyway) {
 			spin_unlock(&ubi_devices_lock);
@@ -953,6 +1042,7 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
 	spin_unlock(&ubi_devices_lock);
 
 	ubi_assert(ubi_num == ubi->ubi_num);
+	ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL);
 	dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num);
 
 	/*
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 9a2b217941f..631983615f1 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -396,6 +396,7 @@ static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
 		}
 		vol->checked = 1;
 		ubi_gluebi_updated(vol);
+		ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED);
 		revoke_exclusive(desc, UBI_READWRITE);
 	}
 
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 2675207c5fe..88a72e9c8be 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -25,6 +25,24 @@
 #include <asm/div64.h>
 #include "ubi.h"
 
+/**
+ * ubi_do_get_device_info - get information about UBI device.
+ * @ubi: UBI device description object
+ * @di: the information is stored here
+ *
+ * This function is the same as 'ubi_get_device_info()', but it assumes the UBI
+ * device is locked and cannot disappear.
+ */
+void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di)
+{
+	di->ubi_num = ubi->ubi_num;
+	di->leb_size = ubi->leb_size;
+	di->min_io_size = ubi->min_io_size;
+	di->ro_mode = ubi->ro_mode;
+	di->cdev = ubi->cdev.dev;
+}
+EXPORT_SYMBOL_GPL(ubi_do_get_device_info);
+
 /**
  * ubi_get_device_info - get information about UBI device.
  * @ubi_num: UBI device number
@@ -39,33 +57,24 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
 
 	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
 		return -EINVAL;
-
 	ubi = ubi_get_device(ubi_num);
 	if (!ubi)
 		return -ENODEV;
-
-	di->ubi_num = ubi->ubi_num;
-	di->leb_size = ubi->leb_size;
-	di->min_io_size = ubi->min_io_size;
-	di->ro_mode = ubi->ro_mode;
-	di->cdev = ubi->cdev.dev;
-
+	ubi_do_get_device_info(ubi, di);
 	ubi_put_device(ubi);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ubi_get_device_info);
 
 /**
- * ubi_get_volume_info - get information about UBI volume.
- * @desc: volume descriptor
+ * ubi_do_get_volume_info - get information about UBI volume.
+ * @ubi: UBI device description object
+ * @vol: volume description object
  * @vi: the information is stored here
  */
-void ubi_get_volume_info(struct ubi_volume_desc *desc,
-			 struct ubi_volume_info *vi)
+void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
+			    struct ubi_volume_info *vi)
 {
-	const struct ubi_volume *vol = desc->vol;
-	const struct ubi_device *ubi = vol->ubi;
-
 	vi->vol_id = vol->vol_id;
 	vi->ubi_num = ubi->ubi_num;
 	vi->size = vol->reserved_pebs;
@@ -79,6 +88,17 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc,
 	vi->name = vol->name;
 	vi->cdev = vol->cdev.dev;
 }
+
+/**
+ * ubi_get_volume_info - get information about UBI volume.
+ * @desc: volume descriptor
+ * @vi: the information is stored here
+ */
+void ubi_get_volume_info(struct ubi_volume_desc *desc,
+			 struct ubi_volume_info *vi)
+{
+	ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi);
+}
 EXPORT_SYMBOL_GPL(ubi_get_volume_info);
 
 /**
@@ -561,7 +581,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
 EXPORT_SYMBOL_GPL(ubi_leb_unmap);
 
 /**
- * ubi_leb_map - map logical erasblock to a physical eraseblock.
+ * ubi_leb_map - map logical eraseblock to a physical eraseblock.
  * @desc: volume descriptor
  * @lnum: logical eraseblock number
  * @dtype: expected data type
@@ -659,3 +679,59 @@ int ubi_sync(int ubi_num)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ubi_sync);
+
+BLOCKING_NOTIFIER_HEAD(ubi_notifiers);
+
+/**
+ * ubi_register_volume_notifier - register a volume notifier.
+ * @nb: the notifier description object
+ * @ignore_existing: if non-zero, do not send "added" notification for all
+ *                   already existing volumes
+ *
+ * This function registers a volume notifier, which means that
+ * 'nb->notifier_call()' will be invoked when an UBI  volume is created,
+ * removed, re-sized, re-named, or updated. The first argument of the function
+ * is the notification type. The second argument is pointer to a
+ * &struct ubi_notification object which describes the notification event.
+ * Using UBI API from the volume notifier is prohibited.
+ *
+ * This function returns zero in case of success and a negative error code
+ * in case of failure.
+ */
+int ubi_register_volume_notifier(struct notifier_block *nb,
+				 int ignore_existing)
+{
+	int err;
+
+	err = blocking_notifier_chain_register(&ubi_notifiers, nb);
+	if (err != 0)
+		return err;
+	if (ignore_existing)
+		return 0;
+
+	/*
+	 * We are going to walk all UBI devices and all volumes, and
+	 * notify the user about existing volumes by the %UBI_VOLUME_ADDED
+	 * event. We have to lock the @ubi_devices_mutex to make sure UBI
+	 * devices do not disappear.
+	 */
+	mutex_lock(&ubi_devices_mutex);
+	ubi_enumerate_volumes(nb);
+	mutex_unlock(&ubi_devices_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ubi_register_volume_notifier);
+
+/**
+ * ubi_unregister_volume_notifier - unregister the volume notifier.
+ * @nb: the notifier description object
+ *
+ * This function unregisters volume notifier @nm and returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubi_unregister_volume_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&ubi_notifiers, nb);
+}
+EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier);
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 6d929329a8d..86e1a4e0ab0 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -38,6 +38,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/ubi.h>
+#include <linux/notifier.h>
 
 #include "ubi-media.h"
 #include "scan.h"
@@ -483,6 +484,7 @@ extern const struct file_operations ubi_cdev_operations;
 extern const struct file_operations ubi_vol_cdev_operations;
 extern struct class *ubi_class;
 extern struct mutex ubi_devices_mutex;
+extern struct blocking_notifier_head ubi_notifiers;
 
 /* vtbl.c */
 int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
@@ -575,6 +577,16 @@ struct ubi_device *ubi_get_device(int ubi_num);
 void ubi_put_device(struct ubi_device *ubi);
 struct ubi_device *ubi_get_by_major(int major);
 int ubi_major2num(int major);
+int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol,
+		      int ntype);
+int ubi_notify_all(struct ubi_device *ubi, int ntype,
+		   struct notifier_block *nb);
+int ubi_enumerate_volumes(struct notifier_block *nb);
+
+/* kapi.c */
+void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di);
+void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
+			    struct ubi_volume_info *vi);
 
 /*
  * ubi_rb_for_each_entry - walk an RB-tree.
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 8e8d6fae7a0..e151862a3a9 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -358,6 +358,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	ubi->vol_count += 1;
 	spin_unlock(&ubi->volumes_lock);
 
+	ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED);
 	if (paranoid_check_volumes(ubi))
 		dbg_err("check failed while creating volume %d", vol_id);
 	return err;
@@ -466,6 +467,7 @@ int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
 	ubi->vol_count -= 1;
 	spin_unlock(&ubi->volumes_lock);
 
+	ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED);
 	if (!no_vtbl && paranoid_check_volumes(ubi))
 		dbg_err("check failed while removing volume %d", vol_id);
 
@@ -589,6 +591,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
 			(long long)vol->used_ebs * vol->usable_leb_size;
 	}
 
+	ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED);
 	if (paranoid_check_volumes(ubi))
 		dbg_err("check failed while re-sizing volume %d", vol_id);
 	return err;
@@ -635,6 +638,7 @@ int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
 			vol->name_len = re->new_name_len;
 			memcpy(vol->name, re->new_name, re->new_name_len + 1);
 			spin_unlock(&ubi->volumes_lock);
+			ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED);
 		}
 	}
 
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 6316fafe5c2..6913b71d9ab 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -132,6 +132,39 @@ struct ubi_device_info {
 	dev_t cdev;
 };
 
+/*
+ * enum - volume notification types.
+ * @UBI_VOLUME_ADDED: volume has been added
+ * @UBI_VOLUME_REMOVED: start volume volume
+ * @UBI_VOLUME_RESIZED: volume size has been re-sized
+ * @UBI_VOLUME_RENAMED: volume name has been re-named
+ * @UBI_VOLUME_UPDATED: volume name has been updated
+ *
+ * These constants define which type of event has happened when a volume
+ * notification function is invoked.
+ */
+enum {
+	UBI_VOLUME_ADDED,
+	UBI_VOLUME_REMOVED,
+	UBI_VOLUME_RESIZED,
+	UBI_VOLUME_RENAMED,
+	UBI_VOLUME_UPDATED,
+};
+
+/*
+ * struct ubi_notification - UBI notification description structure.
+ * @di: UBI device description object
+ * @vi: UBI volume description object
+ *
+ * UBI notifiers are called with a pointer to an object of this type. The
+ * object describes the notification. Namely, it provides a description of the
+ * UBI device and UBI volume the notification informs about.
+ */
+struct ubi_notification {
+	struct ubi_device_info di;
+	struct ubi_volume_info vi;
+};
+
 /* UBI descriptor given to users when they open UBI volumes */
 struct ubi_volume_desc;
 
@@ -141,6 +174,10 @@ void ubi_get_volume_info(struct ubi_volume_desc *desc,
 struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode);
 struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
 					   int mode);
+int ubi_register_volume_notifier(struct notifier_block *nb,
+				 int ignore_existing);
+int ubi_unregister_volume_notifier(struct notifier_block *nb);
+
 void ubi_close_volume(struct ubi_volume_desc *desc);
 int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
 		 int len, int check);
-- 
cgit v1.2.3-70-g09d2


From 3f731ca60afc29f5bcdb5fd2a04391466313a9ac Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:52:30 +1000
Subject: perf_counter: Fix cpu migration counter

This fixes the cpu migration software counter to count
correctly even when contexts get swapped from one task to
another.  Previously the cpu migration counts reported by perf
stat were bogus, ranging from negative to several thousand for
a single "lat_ctx 2 8 32" run.  With this patch the cpu
migration count reported for "lat_ctx 2 8 32" is almost always
between 35 and 44.

This fixes the problem by adding a call into the perf_counter
code from set_task_cpu when tasks are migrated.  This enables
us to use the generic swcounter code (with some modifications)
for the cpu migration counter.

This modifies the swcounter code to allow a NULL regs pointer
to be passed in to perf_swcounter_ctx_event() etc.  The cpu
migration counter does this because there isn't necessarily a
pt_regs struct for the task available.  In this case, the
counter will not have interrupt capability - but the migration
counter didn't have interrupt capability before, so this is no
loss.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.35006.819769.416327@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 +++
 kernel/perf_counter.c        | 74 +++++++++++++-------------------------------
 kernel/sched.c               |  1 +
 3 files changed, 26 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0e57d8cc5a3..deb9acf9ad2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -615,6 +615,8 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 
 extern void perf_counter_comm(struct task_struct *tsk);
 
+extern void perf_counter_task_migration(struct task_struct *task, int cpu);
+
 #define MAX_STACK_DEPTH		255
 
 struct perf_callchain_entry {
@@ -668,6 +670,8 @@ perf_counter_munmap(unsigned long addr, unsigned long len,
 
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
+static inline void perf_counter_task_migration(struct task_struct *task,
+					       int cpu)			{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8d2653f137e..cd94cf3bf9e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2921,11 +2921,13 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	if (counter->hw_event.config != event_config)
 		return 0;
 
-	if (counter->hw_event.exclude_user && user_mode(regs))
-		return 0;
+	if (regs) {
+		if (counter->hw_event.exclude_user && user_mode(regs))
+			return 0;
 
-	if (counter->hw_event.exclude_kernel && !user_mode(regs))
-		return 0;
+		if (counter->hw_event.exclude_kernel && !user_mode(regs))
+			return 0;
+	}
 
 	return 1;
 }
@@ -2935,7 +2937,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
 
-	if (counter->hw.irq_period && !neg)
+	if (counter->hw.irq_period && !neg && regs)
 		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
@@ -3151,55 +3153,24 @@ static const struct pmu perf_ops_task_clock = {
 /*
  * Software counter: cpu migrations
  */
-
-static inline u64 get_cpu_migrations(struct perf_counter *counter)
-{
-	struct task_struct *curr = counter->ctx->task;
-
-	if (curr)
-		return curr->se.nr_migrations;
-	return cpu_nr_migrations(smp_processor_id());
-}
-
-static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
-{
-	u64 prev, now;
-	s64 delta;
-
-	prev = atomic64_read(&counter->hw.prev_count);
-	now = get_cpu_migrations(counter);
-
-	atomic64_set(&counter->hw.prev_count, now);
-
-	delta = now - prev;
-
-	atomic64_add(delta, &counter->count);
-}
-
-static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
+void perf_counter_task_migration(struct task_struct *task, int cpu)
 {
-	cpu_migrations_perf_counter_update(counter);
-}
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_counter_context *ctx;
 
-static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
-{
-	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
-		atomic64_set(&counter->hw.prev_count,
-			     get_cpu_migrations(counter));
-	return 0;
-}
+	perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
+				 PERF_COUNT_CPU_MIGRATIONS,
+				 1, 1, NULL, 0);
 
-static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
-{
-	cpu_migrations_perf_counter_update(counter);
+	ctx = perf_pin_task_context(task);
+	if (ctx) {
+		perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
+					 PERF_COUNT_CPU_MIGRATIONS,
+					 1, 1, NULL, 0);
+		perf_unpin_context(ctx);
+	}
 }
 
-static const struct pmu perf_ops_cpu_migrations = {
-	.enable		= cpu_migrations_perf_counter_enable,
-	.disable	= cpu_migrations_perf_counter_disable,
-	.read		= cpu_migrations_perf_counter_read,
-};
-
 #ifdef CONFIG_EVENT_PROFILE
 void perf_tpcounter_event(int event_id)
 {
@@ -3272,11 +3243,8 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_PAGE_FAULTS_MIN:
 	case PERF_COUNT_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_CONTEXT_SWITCHES:
-		pmu = &perf_ops_generic;
-		break;
 	case PERF_COUNT_CPU_MIGRATIONS:
-		if (!counter->hw_event.exclude_kernel)
-			pmu = &perf_ops_cpu_migrations;
+		pmu = &perf_ops_generic;
 		break;
 	}
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3226cc132e9..8d43347a0c0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1977,6 +1977,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
 #endif
+		perf_counter_task_migration(p, new_cpu);
 	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
 					 new_cfsrq->min_vruntime;
-- 
cgit v1.2.3-70-g09d2


From bf4e0ed3d027ce581be18496036862131b5f32aa Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 1 Jun 2009 17:53:16 +1000
Subject: perf_counter: Remove unused prev_state field

This removes the prev_state field of struct perf_counter since
it is now unused.  It was only used by the cpu migration
counter, which doesn't use it any more.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <18979.35052.915728.626374@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 1 -
 kernel/perf_counter.c        | 4 ----
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index deb9acf9ad2..d970fbc16af 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -427,7 +427,6 @@ struct perf_counter {
 	const struct pmu		*pmu;
 
 	enum perf_counter_active_state	state;
-	enum perf_counter_active_state	prev_state;
 	atomic64_t			count;
 
 	/*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index cd94cf3bf9e..fbed4d28ad7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -572,7 +572,6 @@ group_sched_in(struct perf_counter *group_counter,
 	if (ret)
 		return ret < 0 ? ret : 0;
 
-	group_counter->prev_state = group_counter->state;
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
 
@@ -580,7 +579,6 @@ group_sched_in(struct perf_counter *group_counter,
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
-		counter->prev_state = counter->state;
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -657,7 +655,6 @@ static void add_counter_to_ctx(struct perf_counter *counter,
 			       struct perf_counter_context *ctx)
 {
 	list_add_counter(counter, ctx);
-	counter->prev_state = PERF_COUNTER_STATE_OFF;
 	counter->tstamp_enabled = ctx->time;
 	counter->tstamp_running = ctx->time;
 	counter->tstamp_stopped = ctx->time;
@@ -820,7 +817,6 @@ static void __perf_counter_enable(void *info)
 	ctx->is_active = 1;
 	update_context_time(ctx);
 
-	counter->prev_state = counter->state;
 	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
 		goto unlock;
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-- 
cgit v1.2.3-70-g09d2


From 874ab9233eeddb85fd2dd85131c145bde75da39a Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 2 Jun 2009 13:58:56 +0200
Subject: netfilter: nf_ct_tcp: TCP simultaneous open support

The patch below adds supporting TCP simultaneous open to conntrack. The
unused LISTEN state is replaced by a new state (SYN_SENT2) denoting the
second SYN sent from the reply direction in the new case. The state table
is updated and the function tcp_in_window is modified to handle
simultaneous open.

The functionality can fairly easily be tested by socat. A sample tcpdump
recording

23:21:34.244733 IP (tos 0x0, ttl 64, id 49224, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.254.2020 > 192.168.0.1.2020: S, cksum 0xe75f (correct), 3383710133:3383710133(0) win 5840 <mss 1460,sackOK,timestamp 173445629 0,nop,wscale 7>
23:21:34.244783 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 40) 192.168.0.1.2020 > 192.168.0.254.2020: R, cksum 0x0253 (correct), 0:0(0) ack 3383710134 win 0
23:21:36.038680 IP (tos 0x0, ttl 64, id 28092, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.1.2020 > 192.168.0.254.2020: S, cksum 0x704b (correct), 2634546729:2634546729(0) win 5840 <mss 1460,sackOK,timestamp 824213 0,nop,wscale 1>
23:21:36.038777 IP (tos 0x0, ttl 64, id 49225, offset 0, flags [DF], proto TCP (6), length 60) 192.168.0.254.2020 > 192.168.0.1.2020: S, cksum 0xb179 (correct), 3383710133:3383710133(0) ack 2634546730 win 5840 <mss 1460,sackOK,timestamp 173447423 824213,nop,wscale 7>
23:21:36.038847 IP (tos 0x0, ttl 64, id 28093, offset 0, flags [DF], proto TCP (6), length 52) 192.168.0.1.2020 > 192.168.0.254.2020: ., cksum 0xebad (correct), ack 3383710134 win 2920 <nop,nop,timestamp 824213 173447423>

and the corresponding netlink events:

    [NEW] tcp      6 120 SYN_SENT src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 [UNREPLIED] src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020
 [UPDATE] tcp      6 120 LISTEN src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020
 [UPDATE] tcp      6 60 SYN_RECV src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020
 [UPDATE] tcp      6 432000 ESTABLISHED src=192.168.0.254 dst=192.168.0.1 sport=2020 dport=2020 src=192.168.0.1 dst=192.168.0.254 sport=2020 dport=2020 [ASSURED]

The RST packet was dropped in the raw table, thus it did not reach
conntrack.  nfnetlink_conntrack is unpatched so it shows the new SYN_SENT2
state as the old unused LISTEN.

With TCP simultaneous open support we satisfy REQ-2 in RFC 5382  ;-) .

Additional minor correction in this patch is that in order to catch
uninitialized reply directions, "td_maxwin == 0" is used instead of
"td_end == 0" because the former can't be true except in uninitialized
state while td_end may accidentally be equal to zero in the mid of a
connection.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h |  3 +-
 net/netfilter/nf_conntrack_proto_tcp.c     | 98 +++++++++++++++++++-----------
 2 files changed, 63 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 3066789b972..74c27ca770e 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -15,7 +15,8 @@ enum tcp_conntrack {
 	TCP_CONNTRACK_LAST_ACK,
 	TCP_CONNTRACK_TIME_WAIT,
 	TCP_CONNTRACK_CLOSE,
-	TCP_CONNTRACK_LISTEN,
+	TCP_CONNTRACK_LISTEN,	/* obsolete */
+#define TCP_CONNTRACK_SYN_SENT2	TCP_CONNTRACK_LISTEN
 	TCP_CONNTRACK_MAX,
 	TCP_CONNTRACK_IGNORE
 };
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b5ccf2b4b2e..4c7f6f0dae9 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -59,7 +59,7 @@ static const char *const tcp_conntrack_names[] = {
 	"LAST_ACK",
 	"TIME_WAIT",
 	"CLOSE",
-	"LISTEN"
+	"SYN_SENT2",
 };
 
 #define SECS * HZ
@@ -82,6 +82,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
 	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
 	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
 	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
+	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
 };
 
 #define sNO TCP_CONNTRACK_NONE
@@ -93,7 +94,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
 #define sLA TCP_CONNTRACK_LAST_ACK
 #define sTW TCP_CONNTRACK_TIME_WAIT
 #define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
+#define sS2 TCP_CONNTRACK_SYN_SENT2
 #define sIV TCP_CONNTRACK_MAX
 #define sIG TCP_CONNTRACK_IGNORE
 
@@ -123,6 +124,7 @@ enum tcp_bit_set {
  *
  * NONE:	initial state
  * SYN_SENT:	SYN-only packet seen
+ * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
  * SYN_RECV:	SYN-ACK packet seen
  * ESTABLISHED:	ACK packet seen
  * FIN_WAIT:	FIN packet seen
@@ -131,26 +133,24 @@ enum tcp_bit_set {
  * TIME_WAIT:	last ACK seen
  * CLOSE:	closed connection (RST)
  *
- * LISTEN state is not used.
- *
  * Packets marked as IGNORED (sIG):
  *	if they may be either invalid or valid
  *	and the receiver may send back a connection
  *	closing RST or a SYN/ACK.
  *
  * Packets marked as INVALID (sIV):
- *	if they are invalid
- *	or we do not support the request (simultaneous open)
+ *	if we regard them as truly invalid packets
  */
 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 	{
 /* ORIGINAL */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
 /*
  *	sNO -> sSS	Initialize a new connection
  *	sSS -> sSS	Retransmitted SYN
- *	sSR -> sIG	Late retransmitted SYN?
+ *	sS2 -> sS2	Late retransmitted SYN
+ *	sSR -> sIG
  *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
  *			are errors. Receiver will reply with RST
  *			and close the connection.
@@ -161,22 +161,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sTW -> sSS	Reopened connection (RFC 1122).
  *	sCL -> sSS
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 /*
- * A SYN/ACK from the client is always invalid:
- *	- either it tries to set up a simultaneous open, which is
- *	  not supported;
- *	- or the firewall has just been inserted between the two hosts
- *	  during the session set-up. The SYN will be retransmitted
- *	  by the true client (or it'll time out).
+ *	sNO -> sIV	Too late and no reason to do anything
+ *	sSS -> sIV	Client can't send SYN and then SYN/ACK
+ *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
+ *	sSR -> sIG
+ *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
+ *			are errors. Receiver will reply with RST
+ *			and close the connection.
+ *			Or we are not in sync and hold a dead connection.
+ *	sFW -> sIG
+ *	sCW -> sIG
+ *	sLA -> sIG
+ *	sTW -> sIG
+ *	sCL -> sIG
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 /*
  *	sNO -> sIV	Too late and no reason to do anything...
  *	sSS -> sIV	Client migth not send FIN in this state:
  *			we enforce waiting for a SYN/ACK reply first.
+ *	sS2 -> sIV
  *	sSR -> sFW	Close started.
  *	sES -> sFW
  *	sFW -> sLA	FIN seen in both directions, waiting for
@@ -187,11 +195,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sTW -> sTW
  *	sCL -> sCL
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
 /*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
  *	sNO -> sES	Assumed.
  *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
+ *	sS2 -> sIV
  *	sSR -> sES	Established state is reached.
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
@@ -200,29 +209,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
  *	sCL -> sCL
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 	},
 	{
 /* REPLY */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*syn*/	   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
 /*
  *	sNO -> sIV	Never reached.
- *	sSS -> sIV	Simultaneous open, not supported
- *	sSR -> sIV	Simultaneous open, not supported.
- *	sES -> sIV	Server may not initiate a connection.
+ *	sSS -> sS2	Simultaneous open
+ *	sS2 -> sS2	Retransmitted simultaneous SYN
+ *	sSR -> sIV	Invalid SYN packets sent by the server
+ *	sES -> sIV
  *	sFW -> sIV
  *	sCW -> sIV
  *	sLA -> sIV
  *	sTW -> sIV	Reopened connection, but server may not do it.
  *	sCL -> sIV
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
 /*
  *	sSS -> sSR	Standard open.
+ *	sS2 -> sSR	Simultaneous open
  *	sSR -> sSR	Retransmitted SYN/ACK.
  *	sES -> sIG	Late retransmitted SYN/ACK?
  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
@@ -231,10 +242,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sTW -> sIG
  *	sCL -> sIG
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
 /*
  *	sSS -> sIV	Server might not send FIN in this state.
+ *	sS2 -> sIV
  *	sSR -> sFW	Close started.
  *	sES -> sFW
  *	sFW -> sLA	FIN seen in both directions.
@@ -243,10 +255,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sTW -> sTW
  *	sCL -> sCL
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
 /*
  *	sSS -> sIG	Might be a half-open connection.
+ *	sS2 -> sIG
  *	sSR -> sSR	Might answer late resent SYN.
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
@@ -255,8 +268,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sTW -> sTW	Retransmitted last ACK.
  *	sCL -> sCL
  */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
+/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
 	}
 };
@@ -521,13 +534,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 		 receiver->td_scale);
 
-	if (sender->td_end == 0) {
+	if (sender->td_maxwin == 0) {
 		/*
 		 * Initialize sender data.
 		 */
-		if (tcph->syn && tcph->ack) {
+		if (tcph->syn) {
 			/*
-			 * Outgoing SYN-ACK in reply to a SYN.
+			 * SYN-ACK in reply to a SYN
+			 * or SYN from reply direction in simultaneous open.
 			 */
 			sender->td_end =
 			sender->td_maxend = end;
@@ -543,6 +557,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
 				sender->td_scale =
 				receiver->td_scale = 0;
+			if (!tcph->ack)
+				/* Simultaneous open */
+				return true;
 		} else {
 			/*
 			 * We are in the middle of a connection,
@@ -1068,7 +1085,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 	ct->proto.tcp.seen[1].td_end = 0;
 	ct->proto.tcp.seen[1].td_maxend = 0;
-	ct->proto.tcp.seen[1].td_maxwin = 1;
+	ct->proto.tcp.seen[1].td_maxwin = 0;
 	ct->proto.tcp.seen[1].td_scale = 0;
 
 	/* tcp_packet will set them */
@@ -1309,6 +1326,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+	{
+		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
+		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
 	{
 		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
 		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
-- 
cgit v1.2.3-70-g09d2


From 709e50cf870e61745b39552044aa6c7c38e4f9e0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 14:13:15 +0200
Subject: perf_counter: Use PID namespaces properly

Stop using task_struct::pid and start using PID namespaces.

PIDs will be reported in the PID namespace of the monitoring
task at the moment of counter creation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  3 +++
 kernel/perf_counter.c        | 42 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d970fbc16af..9ec20fc6bd3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -317,6 +317,7 @@ enum perf_event_type {
 #include <linux/spinlock.h>
 #include <linux/hrtimer.h>
 #include <linux/fs.h>
+#include <linux/pid_namespace.h>
 #include <asm/atomic.h>
 
 struct task_struct;
@@ -500,6 +501,8 @@ struct perf_counter {
 
 	void (*destroy)(struct perf_counter *);
 	struct rcu_head			rcu_head;
+
+	struct pid_namespace		*ns;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fbed4d28ad7..caa012cfe49 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1432,6 +1432,8 @@ static void free_counter_rcu(struct rcu_head *head)
 	struct perf_counter *counter;
 
 	counter = container_of(head, struct perf_counter, rcu_head);
+	if (counter->ns)
+		put_pid_ns(counter->ns);
 	kfree(counter);
 }
 
@@ -2267,6 +2269,28 @@ static void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
+static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p)
+{
+	/*
+	 * only top level counters have the pid namespace they were created in
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	return task_tgid_nr_ns(p, counter->ns);
+}
+
+static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
+{
+	/*
+	 * only top level counters have the pid namespace they were created in
+	 */
+	if (counter->parent)
+		counter = counter->parent;
+
+	return task_pid_nr_ns(p, counter->ns);
+}
+
 static void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs, u64 addr)
 {
@@ -2303,8 +2327,8 @@ static void perf_counter_output(struct perf_counter *counter,
 
 	if (record_type & PERF_RECORD_TID) {
 		/* namespace issues */
-		tid_entry.pid = current->group_leader->pid;
-		tid_entry.tid = current->pid;
+		tid_entry.pid = perf_counter_pid(counter, current);
+		tid_entry.tid = perf_counter_tid(counter, current);
 
 		header.type |= PERF_RECORD_TID;
 		header.size += sizeof(tid_entry);
@@ -2432,6 +2456,9 @@ static void perf_counter_comm_output(struct perf_counter *counter,
 	if (ret)
 		return;
 
+	comm_event->event.pid = perf_counter_pid(counter, comm_event->task);
+	comm_event->event.tid = perf_counter_tid(counter, comm_event->task);
+
 	perf_output_put(&handle, comm_event->event);
 	perf_output_copy(&handle, comm_event->comm,
 				   comm_event->comm_size);
@@ -2504,8 +2531,6 @@ void perf_counter_comm(struct task_struct *task)
 		.task	= task,
 		.event  = {
 			.header = { .type = PERF_EVENT_COMM, },
-			.pid	= task->group_leader->pid,
-			.tid	= task->pid,
 		},
 	};
 
@@ -2542,6 +2567,9 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 	if (ret)
 		return;
 
+	mmap_event->event.pid = perf_counter_pid(counter, current);
+	mmap_event->event.tid = perf_counter_tid(counter, current);
+
 	perf_output_put(&handle, mmap_event->event);
 	perf_output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
@@ -2641,8 +2669,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 		.file   = file,
 		.event  = {
 			.header = { .type = PERF_EVENT_MMAP, },
-			.pid	= current->group_leader->pid,
-			.tid	= current->pid,
 			.start  = addr,
 			.len    = len,
 			.pgoff  = pgoff,
@@ -2664,8 +2690,6 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 		.file   = file,
 		.event  = {
 			.header = { .type = PERF_EVENT_MUNMAP, },
-			.pid	= current->group_leader->pid,
-			.tid	= current->pid,
 			.start  = addr,
 			.len    = len,
 			.pgoff  = pgoff,
@@ -3445,6 +3469,8 @@ SYSCALL_DEFINE5(perf_counter_open,
 	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
 	mutex_unlock(&current->perf_counter_mutex);
 
+	counter->ns = get_pid_ns(current->nsproxy->pid_ns);
+
 	fput_light(counter_file, fput_needed2);
 
 out_fput:
-- 
cgit v1.2.3-70-g09d2


From f2f3e38c63c58a3d39bd710039af8bbd15ecaff6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Jun 2009 20:03:35 +0200
Subject: netfilter: ctnetlink: rename tuple() by nf_ct_tuple() macro
 definition

This patch move the internal tuple() macro definition to the
header file as nf_ct_tuple().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  2 ++
 net/netfilter/nf_conntrack_netlink.c | 13 ++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 6c3f964de9e..b909241b668 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -144,6 +144,8 @@ static inline u_int8_t nf_ct_protonum(const struct nf_conn *ct)
 	return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
 }
 
+#define nf_ct_tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
 /* get master conntrack via master expectation */
 #define master_ct(conntr) (conntr->master)
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2d7d69ff215..3553f394125 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -346,8 +346,6 @@ nla_put_failure:
 	return -1;
 }
 
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
 static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		    int event, const struct nf_conn *ct)
@@ -369,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
@@ -509,7 +507,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	if (!item->report && !nfnetlink_has_listeners(group))
 		return NOTIFY_DONE;
 
-	skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
+	skb = ctnetlink_alloc_skb(nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL),
+				  GFP_ATOMIC);
 	if (!skb)
 		goto errout;
 
@@ -528,14 +527,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+	if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
-- 
cgit v1.2.3-70-g09d2


From 274d383b9c1906847a64bbb267b0183599ce86a0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Jun 2009 20:08:38 +0200
Subject: netfilter: conntrack: don't report events on module removal

During the module removal there are no possible event listeners
since ctnetlink must be removed before to allow removing
nf_conntrack. This patch removes the event reporting for the
module removal case which is not of any use in the existing code.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  2 +-
 net/netfilter/nf_conntrack_core.c    | 15 ++++++++++-----
 net/netfilter/nf_conntrack_netlink.c |  6 +++---
 3 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b909241b668..2ba36dd33ae 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -203,7 +203,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
 
-extern void nf_conntrack_flush(struct net *net, u32 pid, int report);
+extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
 
 extern bool nf_ct_get_tuplepr(const struct sk_buff *skb,
 			      unsigned int nhoff, u_int16_t l3num,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8020db6274b..f59c4edf905 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1001,7 +1001,7 @@ struct __nf_ct_flush_report {
 	int report;
 };
 
-static int kill_all(struct nf_conn *i, void *data)
+static int kill_report(struct nf_conn *i, void *data)
 {
 	struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
 
@@ -1013,6 +1013,11 @@ static int kill_all(struct nf_conn *i, void *data)
 	return 1;
 }
 
+static int kill_all(struct nf_conn *i, void *data)
+{
+	return 1;
+}
+
 void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
 {
 	if (vmalloced)
@@ -1023,15 +1028,15 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush(struct net *net, u32 pid, int report)
+void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
 {
 	struct __nf_ct_flush_report fr = {
 		.pid 	= pid,
 		.report = report,
 	};
-	nf_ct_iterate_cleanup(net, kill_all, &fr);
+	nf_ct_iterate_cleanup(net, kill_report, &fr);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_flush);
+EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
 
 static void nf_conntrack_cleanup_init_net(void)
 {
@@ -1045,7 +1050,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_ct_event_cache_flush(net);
 	nf_conntrack_ecache_fini(net);
  i_see_dead_people:
-	nf_conntrack_flush(net, 0, 0);
+	nf_ct_iterate_cleanup(net, kill_all, NULL);
 	if (atomic_read(&net->ct.count) != 0) {
 		schedule();
 		goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 58fde0e47b0..3a20de1be63 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -777,9 +777,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
 		/* Flush the whole table */
-		nf_conntrack_flush(&init_net, 
-				   NETLINK_CB(skb).pid, 
-				   nlmsg_report(nlh));
+		nf_conntrack_flush_report(&init_net,
+					 NETLINK_CB(skb).pid,
+					 nlmsg_report(nlh));
 		return 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 6bfea1984aea86089907caf8974513c2402a3b3d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Jun 2009 20:08:44 +0200
Subject: netfilter: conntrack: remove events flags from userspace exposed file

This patch moves the event flags from linux/netfilter/nf_conntrack_common.h
to net/netfilter/nf_conntrack_ecache.h. This flags are not of any use
from userspace.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h | 69 ---------------------------
 include/net/netfilter/nf_conntrack_ecache.h   | 69 +++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 885cbe28226..a8248ee422b 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -75,75 +75,6 @@ enum ip_conntrack_status {
 	IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
 };
 
-/* Connection tracking event bits */
-enum ip_conntrack_events
-{
-	/* New conntrack */
-	IPCT_NEW_BIT = 0,
-	IPCT_NEW = (1 << IPCT_NEW_BIT),
-
-	/* Expected connection */
-	IPCT_RELATED_BIT = 1,
-	IPCT_RELATED = (1 << IPCT_RELATED_BIT),
-
-	/* Destroyed conntrack */
-	IPCT_DESTROY_BIT = 2,
-	IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
-
-	/* Timer has been refreshed */
-	IPCT_REFRESH_BIT = 3,
-	IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
-
-	/* Status has changed */
-	IPCT_STATUS_BIT = 4,
-	IPCT_STATUS = (1 << IPCT_STATUS_BIT),
-
-	/* Update of protocol info */
-	IPCT_PROTOINFO_BIT = 5,
-	IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
-
-	/* Volatile protocol info */
-	IPCT_PROTOINFO_VOLATILE_BIT = 6,
-	IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
-
-	/* New helper for conntrack */
-	IPCT_HELPER_BIT = 7,
-	IPCT_HELPER = (1 << IPCT_HELPER_BIT),
-
-	/* Update of helper info */
-	IPCT_HELPINFO_BIT = 8,
-	IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
-
-	/* Volatile helper info */
-	IPCT_HELPINFO_VOLATILE_BIT = 9,
-	IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
-
-	/* NAT info */
-	IPCT_NATINFO_BIT = 10,
-	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
-
-	/* Counter highest bit has been set, unused */
-	IPCT_COUNTER_FILLING_BIT = 11,
-	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
-
-	/* Mark is set */
-	IPCT_MARK_BIT = 12,
-	IPCT_MARK = (1 << IPCT_MARK_BIT),
-
-	/* NAT sequence adjustment */
-	IPCT_NATSEQADJ_BIT = 13,
-	IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
-
-	/* Secmark is set */
-	IPCT_SECMARK_BIT = 14,
-	IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
-};
-
-enum ip_conntrack_expect_events {
-	IPEXP_NEW_BIT = 0,
-	IPEXP_NEW = (1 << IPEXP_NEW_BIT),
-};
-
 #ifdef __KERNEL__
 struct ip_conntrack_stat
 {
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 0ff0dc69ca4..892b8cdf7f6 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -11,6 +11,75 @@
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 
+/* Connection tracking event bits */
+enum ip_conntrack_events
+{
+	/* New conntrack */
+	IPCT_NEW_BIT = 0,
+	IPCT_NEW = (1 << IPCT_NEW_BIT),
+
+	/* Expected connection */
+	IPCT_RELATED_BIT = 1,
+	IPCT_RELATED = (1 << IPCT_RELATED_BIT),
+
+	/* Destroyed conntrack */
+	IPCT_DESTROY_BIT = 2,
+	IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
+
+	/* Timer has been refreshed */
+	IPCT_REFRESH_BIT = 3,
+	IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
+
+	/* Status has changed */
+	IPCT_STATUS_BIT = 4,
+	IPCT_STATUS = (1 << IPCT_STATUS_BIT),
+
+	/* Update of protocol info */
+	IPCT_PROTOINFO_BIT = 5,
+	IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
+
+	/* Volatile protocol info */
+	IPCT_PROTOINFO_VOLATILE_BIT = 6,
+	IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
+
+	/* New helper for conntrack */
+	IPCT_HELPER_BIT = 7,
+	IPCT_HELPER = (1 << IPCT_HELPER_BIT),
+
+	/* Update of helper info */
+	IPCT_HELPINFO_BIT = 8,
+	IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
+
+	/* Volatile helper info */
+	IPCT_HELPINFO_VOLATILE_BIT = 9,
+	IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
+
+	/* NAT info */
+	IPCT_NATINFO_BIT = 10,
+	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
+
+	/* Counter highest bit has been set, unused */
+	IPCT_COUNTER_FILLING_BIT = 11,
+	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
+
+	/* Mark is set */
+	IPCT_MARK_BIT = 12,
+	IPCT_MARK = (1 << IPCT_MARK_BIT),
+
+	/* NAT sequence adjustment */
+	IPCT_NATSEQADJ_BIT = 13,
+	IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
+
+	/* Secmark is set */
+	IPCT_SECMARK_BIT = 14,
+	IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
+};
+
+enum ip_conntrack_expect_events {
+	IPEXP_NEW_BIT = 0,
+	IPEXP_NEW = (1 << IPEXP_NEW_BIT),
+};
+
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 struct nf_conntrack_ecache {
 	struct nf_conn *ct;
-- 
cgit v1.2.3-70-g09d2


From 17e6e4eac070607a35464ea7e2c5eceac32e5eca Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 2 Jun 2009 20:08:46 +0200
Subject: netfilter: conntrack: simplify event caching system

This patch simplifies the conntrack event caching system by removing
several events:

 * IPCT_[*]_VOLATILE, IPCT_HELPINFO and IPCT_NATINFO has been deleted
   since the have no clients.
 * IPCT_COUNTER_FILLING which is a leftover of the 32-bits counter
   days.
 * IPCT_REFRESH which is not of any use since we always include the
   timeout in the messages.

After this patch, the existing events are:

 * IPCT_NEW, IPCT_RELATED and IPCT_DESTROY, that are used to identify
 addition and deletion of entries.
 * IPCT_STATUS, that notes that the status bits have changes,
 eg. IPS_SEEN_REPLY and IPS_ASSURED.
 * IPCT_PROTOINFO, that reports that internal protocol information has
 changed, eg. the TCP, DCCP and SCTP protocol state.
 * IPCT_HELPER, that a helper has been assigned or unassigned to this
 entry.
 * IPCT_MARK and IPCT_SECMARK, that reports that the mark has changed, this
 covers the case when a mark is set to zero.
 * IPCT_NATSEQADJ, to report that there's updates in the NAT sequence
 adjustment.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h    | 36 +++++---------------------
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  1 -
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  1 -
 net/netfilter/nf_conntrack_core.c              | 14 +---------
 net/netfilter/nf_conntrack_ftp.c               |  2 --
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 net/netfilter/nf_conntrack_proto_tcp.c         |  1 -
 7 files changed, 8 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 892b8cdf7f6..2e17a2d0eb3 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -26,52 +26,28 @@ enum ip_conntrack_events
 	IPCT_DESTROY_BIT = 2,
 	IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
 
-	/* Timer has been refreshed */
-	IPCT_REFRESH_BIT = 3,
-	IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
-
 	/* Status has changed */
-	IPCT_STATUS_BIT = 4,
+	IPCT_STATUS_BIT = 3,
 	IPCT_STATUS = (1 << IPCT_STATUS_BIT),
 
 	/* Update of protocol info */
-	IPCT_PROTOINFO_BIT = 5,
+	IPCT_PROTOINFO_BIT = 4,
 	IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
 
-	/* Volatile protocol info */
-	IPCT_PROTOINFO_VOLATILE_BIT = 6,
-	IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
-
 	/* New helper for conntrack */
-	IPCT_HELPER_BIT = 7,
+	IPCT_HELPER_BIT = 5,
 	IPCT_HELPER = (1 << IPCT_HELPER_BIT),
 
-	/* Update of helper info */
-	IPCT_HELPINFO_BIT = 8,
-	IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
-
-	/* Volatile helper info */
-	IPCT_HELPINFO_VOLATILE_BIT = 9,
-	IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
-
-	/* NAT info */
-	IPCT_NATINFO_BIT = 10,
-	IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
-
-	/* Counter highest bit has been set, unused */
-	IPCT_COUNTER_FILLING_BIT = 11,
-	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
-
 	/* Mark is set */
-	IPCT_MARK_BIT = 12,
+	IPCT_MARK_BIT = 6,
 	IPCT_MARK = (1 << IPCT_MARK_BIT),
 
 	/* NAT sequence adjustment */
-	IPCT_NATSEQADJ_BIT = 13,
+	IPCT_NATSEQADJ_BIT = 7,
 	IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
 
 	/* Secmark is set */
-	IPCT_SECMARK_BIT = 14,
+	IPCT_SECMARK_BIT = 8,
 	IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
 };
 
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 23b2c2ee869..c6ab3d99e79 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -91,7 +91,6 @@ static int icmp_packet(struct nf_conn *ct,
 			nf_ct_kill_acct(ct, ctinfo, skb);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
-		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
 	}
 
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9903227bf37..a0acd9655fe 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -104,7 +104,6 @@ static int icmpv6_packet(struct nf_conn *ct,
 			nf_ct_kill_acct(ct, ctinfo, skb);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
-		nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
 	}
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f59c4edf905..b54c23475e9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -398,11 +398,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	help = nfct_help(ct);
 	if (help && help->helper)
 		nf_conntrack_event_cache(IPCT_HELPER, ct);
-#ifdef CONFIG_NF_NAT_NEEDED
-	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_NATINFO, ct);
-#endif
+
 	nf_conntrack_event_cache(master_ct(ct) ?
 				 IPCT_RELATED : IPCT_NEW, ct);
 	return NF_ACCEPT;
@@ -807,8 +803,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 			  unsigned long extra_jiffies,
 			  int do_acct)
 {
-	int event = 0;
-
 	NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
 	NF_CT_ASSERT(skb);
 
@@ -821,7 +815,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 	/* If not in hash table, timer will not be active yet */
 	if (!nf_ct_is_confirmed(ct)) {
 		ct->timeout.expires = extra_jiffies;
-		event = IPCT_REFRESH;
 	} else {
 		unsigned long newtime = jiffies + extra_jiffies;
 
@@ -832,7 +825,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 		    && del_timer(&ct->timeout)) {
 			ct->timeout.expires = newtime;
 			add_timer(&ct->timeout);
-			event = IPCT_REFRESH;
 		}
 	}
 
@@ -849,10 +841,6 @@ acct:
 	}
 
 	spin_unlock_bh(&nf_conntrack_lock);
-
-	/* must be unlocked when calling event cache */
-	if (event)
-		nf_conntrack_event_cache(event, ct);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 00fecc385f9..5509dd1f14c 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
 
 	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
 		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
 	} else if (oldest != NUM_SEQ_TO_REMEMBER &&
 		   after(nl_seq, info->seq_aft_nl[dir][oldest])) {
 		info->seq_aft_nl[dir][oldest] = nl_seq;
-		nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
 	}
 }
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3a20de1be63..b1b9e4fb7de 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -477,7 +477,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		type = IPCTNL_MSG_CT_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 		group = NFNLGRP_CONNTRACK_NEW;
-	} else  if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
+	} else  if (events) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4c7f6f0dae9..b7e8a825efe 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -991,7 +991,6 @@ static int tcp_packet(struct nf_conn *ct,
 		timeout = tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
-	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
 	if (new_state != old_state)
 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
-- 
cgit v1.2.3-70-g09d2


From 53e111a730ea8b002d57dd226098c12789993329 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 17:01:58 +0200
Subject: x86: Fix atomic_long_xchg() on 64bit

Apparently I'm the first to use it :-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 3673a13b670..81d3be459ef 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 #define atomic_long_cmpxchg(l, old, new) \
 	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
 #define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(l), (new)))
+	(atomic64_xchg((atomic64_t *)(v), (new)))
 
 #else  /*  BITS_PER_LONG == 64  */
 
-- 
cgit v1.2.3-70-g09d2


From 8e5799b1ad2a0567fdfaaf0e91b40efee010f2c1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:08:15 +0200
Subject: perf_counter: Add unique counter id

Stephan raised the issue that we currently cannot distinguish between
similar counters within a group (PERF_RECORD_GROUP uses the config
value as identifier).

Therefore, generate a new ID for each counter using a global u64
sequence counter.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 8 +++++---
 kernel/perf_counter.c        | 9 +++++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9ec20fc6bd3..4845a214b9e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -114,8 +114,9 @@ enum perf_counter_record_format {
  * in increasing order of bit value, after the counter value.
  */
 enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1,
-	PERF_FORMAT_TOTAL_TIME_RUNNING	=  2,
+	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING	=  1U << 1,
+	PERF_FORMAT_ID			=  1U << 2,
 };
 
 /*
@@ -290,7 +291,7 @@ enum perf_event_type {
 	 *	{ u32			cpu, res; } && PERF_RECORD_CPU
 	 *
 	 *	{ u64			nr;
-	 *	  { u64 event, val; }	cnt[nr];  } && PERF_RECORD_GROUP
+	 *	  { u64 id, val; }	cnt[nr];  } && PERF_RECORD_GROUP
 	 *
 	 *	{ u16			nr,
 	 *				hv,
@@ -503,6 +504,7 @@ struct perf_counter {
 	struct rcu_head			rcu_head;
 
 	struct pid_namespace		*ns;
+	u64				id;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index caa012cfe49..978ecfcc7aa 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1510,6 +1510,8 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
+	if (counter->hw_event.read_format & PERF_FORMAT_ID)
+		values[n++] = counter->id;
 	mutex_unlock(&counter->child_mutex);
 
 	if (count < n * sizeof(u64))
@@ -2303,7 +2305,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		u32 pid, tid;
 	} tid_entry;
 	struct {
-		u64 event;
+		u64 id;
 		u64 counter;
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
@@ -2416,7 +2418,7 @@ static void perf_counter_output(struct perf_counter *counter,
 			if (sub != counter)
 				sub->pmu->read(sub);
 
-			group_entry.event = sub->hw_event.config;
+			group_entry.id = sub->id;
 			group_entry.counter = atomic64_read(&sub->count);
 
 			perf_output_put(&handle, group_entry);
@@ -3375,6 +3377,8 @@ done:
 	return counter;
 }
 
+static atomic64_t perf_counter_id;
+
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -3470,6 +3474,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	mutex_unlock(&current->perf_counter_mutex);
 
 	counter->ns = get_pid_ns(current->nsproxy->pid_ns);
+	counter->id = atomic64_inc_return(&perf_counter_id);
 
 	fput_light(counter_file, fput_needed2);
 
-- 
cgit v1.2.3-70-g09d2


From b23f3325ed465f1bd914384884269af0d106778c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:13:03 +0200
Subject: perf_counter: Rename various fields

A few renames:

  s/irq_period/sample_period/
  s/irq_freq/sample_freq/
  s/PERF_RECORD_/PERF_SAMPLE_/
  s/record_type/sample_type/

And change both the new sample_type and read_format to u64.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  12 ++---
 arch/x86/kernel/cpu/perf_counter.c |   8 +--
 include/linux/perf_counter.h       |  32 ++++++------
 kernel/perf_counter.c              | 104 ++++++++++++++++++-------------------
 4 files changed, 78 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index f96d55f55bd..c9633321e7a 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -535,7 +535,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw.irq_period) {
+		if (counter->hw.sample_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -749,12 +749,12 @@ static void power_pmu_unthrottle(struct perf_counter *counter)
 	s64 val, left;
 	unsigned long flags;
 
-	if (!counter->hw.idx || !counter->hw.irq_period)
+	if (!counter->hw.idx || !counter->hw.sample_period)
 		return;
 	local_irq_save(flags);
 	perf_disable();
 	power_pmu_read(counter);
-	left = counter->hw.irq_period;
+	left = counter->hw.sample_period;
 	val = 0;
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
@@ -789,7 +789,7 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
 	if (counter->hw_event.exclude_user
 	    || counter->hw_event.exclude_kernel
 	    || counter->hw_event.exclude_hv
-	    || counter->hw_event.irq_period)
+	    || counter->hw_event.sample_period)
 		return 0;
 
 	if (ppmu->limited_pmc_event(ev))
@@ -925,7 +925,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.sample_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -958,7 +958,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
-	u64 period = counter->hw.irq_period;
+	u64 period = counter->hw.sample_period;
 	s64 prev, delta, left;
 	int record = 0;
 	u64 addr, mmcra, sdsync;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 316b0c995f3..ec06aa5e928 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -290,11 +290,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	hwc->nmi	= 1;
 	hw_event->nmi	= 1;
 
-	if (!hwc->irq_period)
-		hwc->irq_period = x86_pmu.max_period;
+	if (!hwc->sample_period)
+		hwc->sample_period = x86_pmu.max_period;
 
 	atomic64_set(&hwc->period_left,
-			min(x86_pmu.max_period, hwc->irq_period));
+			min(x86_pmu.max_period, hwc->sample_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -462,7 +462,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = min(x86_pmu.max_period, hwc->irq_period);
+	s64 period = min(x86_pmu.max_period, hwc->sample_period);
 	int err;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4845a214b9e..1fcd3cc9385 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -94,18 +94,18 @@ enum sw_event_ids {
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
 /*
- * Bits that can be set in hw_event.record_type to request information
+ * Bits that can be set in hw_event.sample_type to request information
  * in the overflow packets.
  */
-enum perf_counter_record_format {
-	PERF_RECORD_IP			= 1U << 0,
-	PERF_RECORD_TID			= 1U << 1,
-	PERF_RECORD_TIME		= 1U << 2,
-	PERF_RECORD_ADDR		= 1U << 3,
-	PERF_RECORD_GROUP		= 1U << 4,
-	PERF_RECORD_CALLCHAIN		= 1U << 5,
-	PERF_RECORD_CONFIG		= 1U << 6,
-	PERF_RECORD_CPU			= 1U << 7,
+enum perf_counter_sample_format {
+	PERF_SAMPLE_IP			= 1U << 0,
+	PERF_SAMPLE_TID			= 1U << 1,
+	PERF_SAMPLE_TIME		= 1U << 2,
+	PERF_SAMPLE_ADDR		= 1U << 3,
+	PERF_SAMPLE_GROUP		= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
+	PERF_SAMPLE_CONFIG		= 1U << 6,
+	PERF_SAMPLE_CPU			= 1U << 7,
 };
 
 /*
@@ -132,12 +132,12 @@ struct perf_counter_hw_event {
 	__u64			config;
 
 	union {
-		__u64		irq_period;
-		__u64		irq_freq;
+		__u64		sample_period;
+		__u64		sample_freq;
 	};
 
-	__u32			record_type;
-	__u32			read_format;
+	__u64			sample_type;
+	__u64			read_format;
 
 	__u64			disabled       :  1, /* off by default        */
 				nmi	       :  1, /* NMI sampling          */
@@ -262,7 +262,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
-	 *	u64				irq_period;
+	 *	u64				sample_period;
 	 * };
 	 */
 	PERF_EVENT_PERIOD		= 4,
@@ -363,7 +363,7 @@ struct hw_perf_counter {
 		};
 	};
 	atomic64_t			prev_count;
-	u64				irq_period;
+	u64				sample_period;
 	atomic64_t			period_left;
 	u64				interrupts;
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 978ecfcc7aa..5ecd9981c03 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1186,7 +1186,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period);
 static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
-	u64 interrupts, irq_period;
+	u64 interrupts, sample_period;
 	u64 events, period;
 	s64 delta;
 
@@ -1204,23 +1204,23 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 			interrupts = 2*sysctl_perf_counter_limit/HZ;
 		}
 
-		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+		if (!counter->hw_event.freq || !counter->hw_event.sample_freq)
 			continue;
 
-		events = HZ * interrupts * counter->hw.irq_period;
-		period = div64_u64(events, counter->hw_event.irq_freq);
+		events = HZ * interrupts * counter->hw.sample_period;
+		period = div64_u64(events, counter->hw_event.sample_freq);
 
-		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta = (s64)(1 + period - counter->hw.sample_period);
 		delta >>= 1;
 
-		irq_period = counter->hw.irq_period + delta;
+		sample_period = counter->hw.sample_period + delta;
 
-		if (!irq_period)
-			irq_period = 1;
+		if (!sample_period)
+			sample_period = 1;
 
-		perf_log_period(counter, irq_period);
+		perf_log_period(counter, sample_period);
 
-		counter->hw.irq_period = irq_period;
+		counter->hw.sample_period = sample_period;
 	}
 	spin_unlock(&ctx->lock);
 }
@@ -2297,7 +2297,7 @@ static void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs, u64 addr)
 {
 	int ret;
-	u64 record_type = counter->hw_event.record_type;
+	u64 sample_type = counter->hw_event.sample_type;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	u64 ip;
@@ -2321,61 +2321,61 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.misc = PERF_EVENT_MISC_OVERFLOW;
 	header.misc |= perf_misc_flags(regs);
 
-	if (record_type & PERF_RECORD_IP) {
+	if (sample_type & PERF_SAMPLE_IP) {
 		ip = perf_instruction_pointer(regs);
-		header.type |= PERF_RECORD_IP;
+		header.type |= PERF_SAMPLE_IP;
 		header.size += sizeof(ip);
 	}
 
-	if (record_type & PERF_RECORD_TID) {
+	if (sample_type & PERF_SAMPLE_TID) {
 		/* namespace issues */
 		tid_entry.pid = perf_counter_pid(counter, current);
 		tid_entry.tid = perf_counter_tid(counter, current);
 
-		header.type |= PERF_RECORD_TID;
+		header.type |= PERF_SAMPLE_TID;
 		header.size += sizeof(tid_entry);
 	}
 
-	if (record_type & PERF_RECORD_TIME) {
+	if (sample_type & PERF_SAMPLE_TIME) {
 		/*
 		 * Maybe do better on x86 and provide cpu_clock_nmi()
 		 */
 		time = sched_clock();
 
-		header.type |= PERF_RECORD_TIME;
+		header.type |= PERF_SAMPLE_TIME;
 		header.size += sizeof(u64);
 	}
 
-	if (record_type & PERF_RECORD_ADDR) {
-		header.type |= PERF_RECORD_ADDR;
+	if (sample_type & PERF_SAMPLE_ADDR) {
+		header.type |= PERF_SAMPLE_ADDR;
 		header.size += sizeof(u64);
 	}
 
-	if (record_type & PERF_RECORD_CONFIG) {
-		header.type |= PERF_RECORD_CONFIG;
+	if (sample_type & PERF_SAMPLE_CONFIG) {
+		header.type |= PERF_SAMPLE_CONFIG;
 		header.size += sizeof(u64);
 	}
 
-	if (record_type & PERF_RECORD_CPU) {
-		header.type |= PERF_RECORD_CPU;
+	if (sample_type & PERF_SAMPLE_CPU) {
+		header.type |= PERF_SAMPLE_CPU;
 		header.size += sizeof(cpu_entry);
 
 		cpu_entry.cpu = raw_smp_processor_id();
 	}
 
-	if (record_type & PERF_RECORD_GROUP) {
-		header.type |= PERF_RECORD_GROUP;
+	if (sample_type & PERF_SAMPLE_GROUP) {
+		header.type |= PERF_SAMPLE_GROUP;
 		header.size += sizeof(u64) +
 			counter->nr_siblings * sizeof(group_entry);
 	}
 
-	if (record_type & PERF_RECORD_CALLCHAIN) {
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		callchain = perf_callchain(regs);
 
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
 
-			header.type |= PERF_RECORD_CALLCHAIN;
+			header.type |= PERF_SAMPLE_CALLCHAIN;
 			header.size += callchain_size;
 		}
 	}
@@ -2386,28 +2386,28 @@ static void perf_counter_output(struct perf_counter *counter,
 
 	perf_output_put(&handle, header);
 
-	if (record_type & PERF_RECORD_IP)
+	if (sample_type & PERF_SAMPLE_IP)
 		perf_output_put(&handle, ip);
 
-	if (record_type & PERF_RECORD_TID)
+	if (sample_type & PERF_SAMPLE_TID)
 		perf_output_put(&handle, tid_entry);
 
-	if (record_type & PERF_RECORD_TIME)
+	if (sample_type & PERF_SAMPLE_TIME)
 		perf_output_put(&handle, time);
 
-	if (record_type & PERF_RECORD_ADDR)
+	if (sample_type & PERF_SAMPLE_ADDR)
 		perf_output_put(&handle, addr);
 
-	if (record_type & PERF_RECORD_CONFIG)
+	if (sample_type & PERF_SAMPLE_CONFIG)
 		perf_output_put(&handle, counter->hw_event.config);
 
-	if (record_type & PERF_RECORD_CPU)
+	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
 
 	/*
-	 * XXX PERF_RECORD_GROUP vs inherited counters seems difficult.
+	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
 	 */
-	if (record_type & PERF_RECORD_GROUP) {
+	if (sample_type & PERF_SAMPLE_GROUP) {
 		struct perf_counter *leader, *sub;
 		u64 nr = counter->nr_siblings;
 
@@ -2702,7 +2702,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 }
 
 /*
- * Log irq_period changes so that analyzing tools can re-normalize the
+ * Log sample_period changes so that analyzing tools can re-normalize the
  * event flow.
  */
 
@@ -2725,7 +2725,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 		.period = period,
 	};
 
-	if (counter->hw.irq_period == period)
+	if (counter->hw.sample_period == period)
 		return;
 
 	ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
@@ -2834,7 +2834,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = hwc->sample_period;
 
 	if (unlikely(left <= -period)) {
 		left = period;
@@ -2874,7 +2874,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			ret = HRTIMER_NORESTART;
 	}
 
-	period = max_t(u64, 10000, counter->hw.irq_period);
+	period = max_t(u64, 10000, counter->hw.sample_period);
 	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
@@ -2959,7 +2959,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 {
 	int neg = atomic64_add_negative(nr, &counter->hw.count);
 
-	if (counter->hw.irq_period && !neg && regs)
+	if (counter->hw.sample_period && !neg && regs)
 		perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
@@ -3080,8 +3080,8 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->irq_period) {
-		u64 period = max_t(u64, 10000, hwc->irq_period);
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
@@ -3092,7 +3092,7 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	if (counter->hw.irq_period)
+	if (counter->hw.sample_period)
 		hrtimer_cancel(&counter->hw.hrtimer);
 	cpu_clock_perf_counter_update(counter);
 }
@@ -3132,8 +3132,8 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	atomic64_set(&hwc->prev_count, now);
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->irq_period) {
-		u64 period = max_t(u64, 10000, hwc->irq_period);
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
 				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
@@ -3144,7 +3144,7 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 
 static void task_clock_perf_counter_disable(struct perf_counter *counter)
 {
-	if (counter->hw.irq_period)
+	if (counter->hw.sample_period)
 		hrtimer_cancel(&counter->hw.hrtimer);
 	task_clock_perf_counter_update(counter, counter->ctx->time);
 
@@ -3223,7 +3223,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
-	counter->hw.irq_period = counter->hw_event.irq_period;
+	counter->hw.sample_period = counter->hw_event.sample_period;
 
 	return &perf_ops_generic;
 }
@@ -3323,15 +3323,15 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	pmu = NULL;
 
 	hwc = &counter->hw;
-	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
+	if (hw_event->freq && hw_event->sample_freq)
+		hwc->sample_period = div64_u64(TICK_NSEC, hw_event->sample_freq);
 	else
-		hwc->irq_period = hw_event->irq_period;
+		hwc->sample_period = hw_event->sample_period;
 
 	/*
-	 * we currently do not support PERF_RECORD_GROUP on inherited counters
+	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
 	 */
-	if (hw_event->inherit && (hw_event->record_type & PERF_RECORD_GROUP))
+	if (hw_event->inherit && (hw_event->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
 	if (perf_event_raw(hw_event)) {
-- 
cgit v1.2.3-70-g09d2


From 8a016db386195b193e2a8aeddff9fe937dcb7a40 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 15:27:45 +0200
Subject: perf_counter: Remove the last nmi/irq bits

IRQ (non-NMI) sampling is not used anymore - remove the last few bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 6 ------
 include/linux/perf_counter.h       | 4 +---
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ec06aa5e928..9e144fbebd2 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -284,12 +284,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (!hw_event->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
-	/*
-	 * Use NMI events all the time:
-	 */
-	hwc->nmi	= 1;
-	hw_event->nmi	= 1;
-
 	if (!hwc->sample_period)
 		hwc->sample_period = x86_pmu.max_period;
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1fcd3cc9385..cef9931793f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -140,7 +140,6 @@ struct perf_counter_hw_event {
 	__u64			read_format;
 
 	__u64			disabled       :  1, /* off by default        */
-				nmi	       :  1, /* NMI sampling          */
 				inherit	       :  1, /* children inherit it   */
 				pinned	       :  1, /* must always be on PMU */
 				exclusive      :  1, /* only group on PMU     */
@@ -153,7 +152,7 @@ struct perf_counter_hw_event {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 51;
+				__reserved_1   : 52;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
@@ -354,7 +353,6 @@ struct hw_perf_counter {
 			u64				config;
 			unsigned long			config_base;
 			unsigned long			counter_base;
-			int				nmi;
 			int				idx;
 		};
 		union { /* software */
-- 
cgit v1.2.3-70-g09d2


From 8e3747c13c39246c7e46def7cf495d9d21d4c5f9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 16:16:02 +0200
Subject: perf_counter: Change data head from u32 to u64

Since some people worried that 4G might not be a large enough
as an mmap data window, extend it to 64 bit for capable
platforms.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  7 ++++---
 kernel/perf_counter.c        | 15 ++++++++-------
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index cef9931793f..c046f7d97cf 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -212,7 +212,7 @@ struct perf_counter_mmap_page {
 	 * User-space reading this value should issue an rmb(), on SMP capable
 	 * platforms, after reading this value -- see perf_counter_wakeup().
 	 */
-	__u32   data_head;		/* head in the data section */
+	__u64   data_head;		/* head in the data section */
 };
 
 #define PERF_EVENT_MISC_CPUMODE_MASK	(3 << 0)
@@ -397,10 +397,11 @@ struct perf_mmap_data {
 	int				nr_locked;	/* nr pages mlocked  */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
-	atomic_t			head;		/* write position    */
 	atomic_t			events;		/* event limit       */
 
-	atomic_t			done_head;	/* completed head    */
+	atomic_long_t			head;		/* write position    */
+	atomic_long_t			done_head;	/* completed head    */
+
 	atomic_t			lock;		/* concurrent writes */
 
 	atomic_t			wakeup;		/* needs a wakeup    */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5ecd9981c03..3f11a2bc6c7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2067,8 +2067,8 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 struct perf_output_handle {
 	struct perf_counter	*counter;
 	struct perf_mmap_data	*data;
-	unsigned int		offset;
-	unsigned int		head;
+	unsigned long		head;
+	unsigned long		offset;
 	int			nmi;
 	int			overflow;
 	int			locked;
@@ -2122,7 +2122,8 @@ static void perf_output_lock(struct perf_output_handle *handle)
 static void perf_output_unlock(struct perf_output_handle *handle)
 {
 	struct perf_mmap_data *data = handle->data;
-	int head, cpu;
+	unsigned long head;
+	int cpu;
 
 	data->done_head = data->head;
 
@@ -2135,7 +2136,7 @@ again:
 	 * before we publish the new head, matched by a rmb() in userspace when
 	 * reading this position.
 	 */
-	while ((head = atomic_xchg(&data->done_head, 0)))
+	while ((head = atomic_long_xchg(&data->done_head, 0)))
 		data->user_page->data_head = head;
 
 	/*
@@ -2148,7 +2149,7 @@ again:
 	/*
 	 * Therefore we have to validate we did not indeed do so.
 	 */
-	if (unlikely(atomic_read(&data->done_head))) {
+	if (unlikely(atomic_long_read(&data->done_head))) {
 		/*
 		 * Since we had it locked, we can lock it again.
 		 */
@@ -2195,7 +2196,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	do {
 		offset = head = atomic_read(&data->head);
 		head += size;
-	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
+	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
 
 	handle->offset	= offset;
 	handle->head	= head;
@@ -2246,7 +2247,7 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	 * Check we didn't copy past our reservation window, taking the
 	 * possible unsigned int wrap into account.
 	 */
-	WARN_ON_ONCE(((int)(handle->head - handle->offset)) < 0);
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
 }
 
 #define perf_output_put(handle, x) \
-- 
cgit v1.2.3-70-g09d2


From 08247e31ca79b8f02cce47b7e8120797a8726606 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 16:46:57 +0200
Subject: perf_counter: Add ioctl for changing the sample period/frequency

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  9 +++++----
 kernel/perf_counter.c        | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c046f7d97cf..45bdd3b95d3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -164,10 +164,11 @@ struct perf_counter_hw_event {
 /*
  * Ioctls that can be done on a perf counter fd:
  */
-#define PERF_COUNTER_IOC_ENABLE		_IOW('$', 0, u32)
-#define PERF_COUNTER_IOC_DISABLE	_IOW('$', 1, u32)
-#define PERF_COUNTER_IOC_REFRESH	_IOW('$', 2, u32)
-#define PERF_COUNTER_IOC_RESET		_IOW('$', 3, u32)
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3f11a2bc6c7..abe2f3b6c42 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1604,6 +1604,43 @@ static void perf_counter_for_each(struct perf_counter *counter,
 	mutex_unlock(&counter->child_mutex);
 }
 
+static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
+{
+	struct perf_counter_context *ctx = counter->ctx;
+	unsigned long size;
+	int ret = 0;
+	u64 value;
+
+	if (!counter->hw_event.sample_period)
+		return -EINVAL;
+
+	size = copy_from_user(&value, arg, sizeof(value));
+	if (size != sizeof(value))
+		return -EFAULT;
+
+	if (!value)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->lock);
+	if (counter->hw_event.freq) {
+		if (value > sysctl_perf_counter_limit) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		counter->hw_event.sample_freq = value;
+	} else {
+		counter->hw_event.sample_period = value;
+		counter->hw.sample_period = value;
+
+		perf_log_period(counter, value);
+	}
+unlock:
+	spin_unlock_irq(&ctx->lock);
+
+	return ret;
+}
+
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
@@ -1623,6 +1660,10 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	case PERF_COUNTER_IOC_REFRESH:
 		return perf_counter_refresh(counter, arg);
+
+	case PERF_COUNTER_IOC_PERIOD:
+		return perf_counter_period(counter, (u64 __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
-- 
cgit v1.2.3-70-g09d2


From 0d48696f87e3618b0d35bd3e4e9d7c188d51e7de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Jun 2009 19:22:16 +0200
Subject: perf_counter: Rename perf_counter_hw_event => perf_counter_attr

The structure isn't hw only and when I read event, I think about those
things that fall out the other end. Rename the thing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Stephane Eranian <eranian@googlemail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  38 ++++++------
 arch/x86/kernel/cpu/perf_counter.c |  16 ++---
 include/linux/perf_counter.h       |  34 +++++------
 include/linux/syscalls.h           |   4 +-
 kernel/perf_counter.c              | 116 ++++++++++++++++++-------------------
 5 files changed, 104 insertions(+), 104 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index c9633321e7a..ea54686cb78 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -262,13 +262,13 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
 		}
 		counter = ctrs[i];
 		if (first) {
-			eu = counter->hw_event.exclude_user;
-			ek = counter->hw_event.exclude_kernel;
-			eh = counter->hw_event.exclude_hv;
+			eu = counter->attr.exclude_user;
+			ek = counter->attr.exclude_kernel;
+			eh = counter->attr.exclude_hv;
 			first = 0;
-		} else if (counter->hw_event.exclude_user != eu ||
-			   counter->hw_event.exclude_kernel != ek ||
-			   counter->hw_event.exclude_hv != eh) {
+		} else if (counter->attr.exclude_user != eu ||
+			   counter->attr.exclude_kernel != ek ||
+			   counter->attr.exclude_hv != eh) {
 			return -EAGAIN;
 		}
 	}
@@ -483,16 +483,16 @@ void hw_perf_enable(void)
 
 	/*
 	 * Add in MMCR0 freeze bits corresponding to the
-	 * hw_event.exclude_* bits for the first counter.
+	 * attr.exclude_* bits for the first counter.
 	 * We have already checked that all counters have the
 	 * same values for these bits as the first counter.
 	 */
 	counter = cpuhw->counter[0];
-	if (counter->hw_event.exclude_user)
+	if (counter->attr.exclude_user)
 		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (counter->hw_event.exclude_kernel)
+	if (counter->attr.exclude_kernel)
 		cpuhw->mmcr[0] |= freeze_counters_kernel;
-	if (counter->hw_event.exclude_hv)
+	if (counter->attr.exclude_hv)
 		cpuhw->mmcr[0] |= MMCR0_FCHV;
 
 	/*
@@ -786,10 +786,10 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
 	int n;
 	u64 alt[MAX_EVENT_ALTERNATIVES];
 
-	if (counter->hw_event.exclude_user
-	    || counter->hw_event.exclude_kernel
-	    || counter->hw_event.exclude_hv
-	    || counter->hw_event.sample_period)
+	if (counter->attr.exclude_user
+	    || counter->attr.exclude_kernel
+	    || counter->attr.exclude_hv
+	    || counter->attr.sample_period)
 		return 0;
 
 	if (ppmu->limited_pmc_event(ev))
@@ -855,13 +855,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if (!perf_event_raw(&counter->hw_event)) {
-		ev = perf_event_id(&counter->hw_event);
+	if (!perf_event_raw(&counter->attr)) {
+		ev = perf_event_id(&counter->attr);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return ERR_PTR(-EOPNOTSUPP);
 		ev = ppmu->generic_events[ev];
 	} else {
-		ev = perf_event_config(&counter->hw_event);
+		ev = perf_event_config(&counter->attr);
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
@@ -872,7 +872,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 	 * the user set it to.
 	 */
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		counter->hw_event.exclude_hv = 0;
+		counter->attr.exclude_hv = 0;
 
 	/*
 	 * If this is a per-task counter, then we can use
@@ -990,7 +990,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	if (record) {
 		addr = 0;
-		if (counter->hw_event.record_type & PERF_RECORD_ADDR) {
+		if (counter->attr.record_type & PERF_RECORD_ADDR) {
 			/*
 			 * The user wants a data address recorded.
 			 * If we're not doing instruction sampling,
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 904571bea71..e16e8c13132 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -247,11 +247,11 @@ static inline int x86_pmu_initialized(void)
 }
 
 /*
- * Setup the hardware configuration for a given hw_event_type
+ * Setup the hardware configuration for a given attr_type
  */
 static int __hw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
+	struct perf_counter_attr *attr = &counter->attr;
 	struct hw_perf_counter *hwc = &counter->hw;
 	int err;
 
@@ -279,9 +279,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Count user and OS events unless requested not to.
 	 */
-	if (!hw_event->exclude_user)
+	if (!attr->exclude_user)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!hw_event->exclude_kernel)
+	if (!attr->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
 	if (!hwc->sample_period)
@@ -292,15 +292,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (perf_event_raw(hw_event)) {
-		hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event));
+	if (perf_event_raw(attr)) {
+		hwc->config |= x86_pmu.raw_event(perf_event_config(attr));
 	} else {
-		if (perf_event_id(hw_event) >= x86_pmu.max_events)
+		if (perf_event_id(attr) >= x86_pmu.max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= x86_pmu.event_map(perf_event_id(hw_event));
+		hwc->config |= x86_pmu.event_map(perf_event_id(attr));
 	}
 
 	counter->destroy = hw_perf_counter_destroy;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 45bdd3b95d3..37d5541d74c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -22,7 +22,7 @@
  */
 
 /*
- * hw_event.type
+ * attr.type
  */
 enum perf_event_types {
 	PERF_TYPE_HARDWARE		= 0,
@@ -37,10 +37,10 @@ enum perf_event_types {
 };
 
 /*
- * Generalized performance counter event types, used by the hw_event.event_id
+ * Generalized performance counter event types, used by the attr.event_id
  * parameter of the sys_perf_counter_open() syscall:
  */
-enum hw_event_ids {
+enum attr_ids {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
@@ -94,7 +94,7 @@ enum sw_event_ids {
 #define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
 
 /*
- * Bits that can be set in hw_event.sample_type to request information
+ * Bits that can be set in attr.sample_type to request information
  * in the overflow packets.
  */
 enum perf_counter_sample_format {
@@ -109,7 +109,7 @@ enum perf_counter_sample_format {
 };
 
 /*
- * Bits that can be set in hw_event.read_format to request that
+ * Bits that can be set in attr.read_format to request that
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
  */
@@ -122,7 +122,7 @@ enum perf_counter_read_format {
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
-struct perf_counter_hw_event {
+struct perf_counter_attr {
 	/*
 	 * The MSB of the config word signifies if the rest contains cpu
 	 * specific (raw) counter configuration data, if unset, the next
@@ -323,25 +323,25 @@ enum perf_event_type {
 
 struct task_struct;
 
-static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_raw(struct perf_counter_attr *attr)
 {
-	return hw_event->config & PERF_COUNTER_RAW_MASK;
+	return attr->config & PERF_COUNTER_RAW_MASK;
 }
 
-static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_config(struct perf_counter_attr *attr)
 {
-	return hw_event->config & PERF_COUNTER_CONFIG_MASK;
+	return attr->config & PERF_COUNTER_CONFIG_MASK;
 }
 
-static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_type(struct perf_counter_attr *attr)
 {
-	return (hw_event->config & PERF_COUNTER_TYPE_MASK) >>
+	return (attr->config & PERF_COUNTER_TYPE_MASK) >>
 		PERF_COUNTER_TYPE_SHIFT;
 }
 
-static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event)
+static inline u64 perf_event_id(struct perf_counter_attr *attr)
 {
-	return hw_event->config & PERF_COUNTER_EVENT_MASK;
+	return attr->config & PERF_COUNTER_EVENT_MASK;
 }
 
 /**
@@ -457,7 +457,7 @@ struct perf_counter {
 	u64				tstamp_running;
 	u64				tstamp_stopped;
 
-	struct perf_counter_hw_event	hw_event;
+	struct perf_counter_attr	attr;
 	struct hw_perf_counter		hw;
 
 	struct perf_counter_context	*ctx;
@@ -605,8 +605,8 @@ extern int perf_counter_overflow(struct perf_counter *counter,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !perf_event_raw(&counter->hw_event) &&
-		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
+	return !perf_event_raw(&counter->attr) &&
+		perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE;
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 79faae950e2..c6c84ad8bd7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,7 +55,7 @@ struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
-struct perf_counter_hw_event;
+struct perf_counter_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
 asmlinkage long sys_perf_counter_open(
-		const struct perf_counter_hw_event __user *hw_event_uptr,
+		const struct perf_counter_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index abe2f3b6c42..317cef78a38 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -260,7 +260,7 @@ counter_sched_out(struct perf_counter *counter,
 	if (!is_software_counter(counter))
 		cpuctx->active_oncpu--;
 	ctx->nr_active--;
-	if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
+	if (counter->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 }
 
@@ -282,7 +282,7 @@ group_sched_out(struct perf_counter *group_counter,
 	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
 		counter_sched_out(counter, cpuctx, ctx);
 
-	if (group_counter->hw_event.exclusive)
+	if (group_counter->attr.exclusive)
 		cpuctx->exclusive = 0;
 }
 
@@ -550,7 +550,7 @@ counter_sched_in(struct perf_counter *counter,
 		cpuctx->active_oncpu++;
 	ctx->nr_active++;
 
-	if (counter->hw_event.exclusive)
+	if (counter->attr.exclusive)
 		cpuctx->exclusive = 1;
 
 	return 0;
@@ -642,7 +642,7 @@ static int group_can_go_on(struct perf_counter *counter,
 	 * If this group is exclusive and there are already
 	 * counters on the CPU, it can't go on.
 	 */
-	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
+	if (counter->attr.exclusive && cpuctx->active_oncpu)
 		return 0;
 	/*
 	 * Otherwise, try to add it if all previous groups were able
@@ -725,7 +725,7 @@ static void __perf_install_in_context(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned) {
+		if (leader->attr.pinned) {
 			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
 		}
@@ -849,7 +849,7 @@ static void __perf_counter_enable(void *info)
 		 */
 		if (leader != counter)
 			group_sched_out(leader, cpuctx, ctx);
-		if (leader->hw_event.pinned) {
+		if (leader->attr.pinned) {
 			update_group_times(leader);
 			leader->state = PERF_COUNTER_STATE_ERROR;
 		}
@@ -927,7 +927,7 @@ static int perf_counter_refresh(struct perf_counter *counter, int refresh)
 	/*
 	 * not supported on inherited counters
 	 */
-	if (counter->hw_event.inherit)
+	if (counter->attr.inherit)
 		return -EINVAL;
 
 	atomic_add(refresh, &counter->event_limit);
@@ -1094,7 +1094,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	 */
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    !counter->hw_event.pinned)
+		    !counter->attr.pinned)
 			continue;
 		if (counter->cpu != -1 && counter->cpu != cpu)
 			continue;
@@ -1122,7 +1122,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		 * ignore pinned counters since we did them already.
 		 */
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    counter->hw_event.pinned)
+		    counter->attr.pinned)
 			continue;
 
 		/*
@@ -1204,11 +1204,11 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 			interrupts = 2*sysctl_perf_counter_limit/HZ;
 		}
 
-		if (!counter->hw_event.freq || !counter->hw_event.sample_freq)
+		if (!counter->attr.freq || !counter->attr.sample_freq)
 			continue;
 
 		events = HZ * interrupts * counter->hw.sample_period;
-		period = div64_u64(events, counter->hw_event.sample_freq);
+		period = div64_u64(events, counter->attr.sample_freq);
 
 		delta = (s64)(1 + period - counter->hw.sample_period);
 		delta >>= 1;
@@ -1444,11 +1444,11 @@ static void free_counter(struct perf_counter *counter)
 	perf_pending_sync(counter);
 
 	atomic_dec(&nr_counters);
-	if (counter->hw_event.mmap)
+	if (counter->attr.mmap)
 		atomic_dec(&nr_mmap_tracking);
-	if (counter->hw_event.munmap)
+	if (counter->attr.munmap)
 		atomic_dec(&nr_munmap_tracking);
-	if (counter->hw_event.comm)
+	if (counter->attr.comm)
 		atomic_dec(&nr_comm_tracking);
 
 	if (counter->destroy)
@@ -1504,13 +1504,13 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	mutex_lock(&counter->child_mutex);
 	values[0] = perf_counter_read(counter);
 	n = 1;
-	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
 		values[n++] = counter->total_time_enabled +
 			atomic64_read(&counter->child_total_time_enabled);
-	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
-	if (counter->hw_event.read_format & PERF_FORMAT_ID)
+	if (counter->attr.read_format & PERF_FORMAT_ID)
 		values[n++] = counter->id;
 	mutex_unlock(&counter->child_mutex);
 
@@ -1611,7 +1611,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 	int ret = 0;
 	u64 value;
 
-	if (!counter->hw_event.sample_period)
+	if (!counter->attr.sample_period)
 		return -EINVAL;
 
 	size = copy_from_user(&value, arg, sizeof(value));
@@ -1622,15 +1622,15 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 		return -EINVAL;
 
 	spin_lock_irq(&ctx->lock);
-	if (counter->hw_event.freq) {
+	if (counter->attr.freq) {
 		if (value > sysctl_perf_counter_limit) {
 			ret = -EINVAL;
 			goto unlock;
 		}
 
-		counter->hw_event.sample_freq = value;
+		counter->attr.sample_freq = value;
 	} else {
-		counter->hw_event.sample_period = value;
+		counter->attr.sample_period = value;
 		counter->hw.sample_period = value;
 
 		perf_log_period(counter, value);
@@ -2299,7 +2299,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 	struct perf_counter *counter = handle->counter;
 	struct perf_mmap_data *data = handle->data;
 
-	int wakeup_events = counter->hw_event.wakeup_events;
+	int wakeup_events = counter->attr.wakeup_events;
 
 	if (handle->overflow && wakeup_events) {
 		int events = atomic_inc_return(&data->events);
@@ -2339,7 +2339,7 @@ static void perf_counter_output(struct perf_counter *counter,
 				int nmi, struct pt_regs *regs, u64 addr)
 {
 	int ret;
-	u64 sample_type = counter->hw_event.sample_type;
+	u64 sample_type = counter->attr.sample_type;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	u64 ip;
@@ -2441,7 +2441,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		perf_output_put(&handle, addr);
 
 	if (sample_type & PERF_SAMPLE_CONFIG)
-		perf_output_put(&handle, counter->hw_event.config);
+		perf_output_put(&handle, counter->attr.config);
 
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
@@ -2512,7 +2512,7 @@ static void perf_counter_comm_output(struct perf_counter *counter,
 static int perf_counter_comm_match(struct perf_counter *counter,
 				   struct perf_comm_event *comm_event)
 {
-	if (counter->hw_event.comm &&
+	if (counter->attr.comm &&
 	    comm_event->event.header.type == PERF_EVENT_COMM)
 		return 1;
 
@@ -2623,11 +2623,11 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 static int perf_counter_mmap_match(struct perf_counter *counter,
 				   struct perf_mmap_event *mmap_event)
 {
-	if (counter->hw_event.mmap &&
+	if (counter->attr.mmap &&
 	    mmap_event->event.header.type == PERF_EVENT_MMAP)
 		return 1;
 
-	if (counter->hw_event.munmap &&
+	if (counter->attr.munmap &&
 	    mmap_event->event.header.type == PERF_EVENT_MUNMAP)
 		return 1;
 
@@ -2907,8 +2907,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	 * In case we exclude kernel IPs or are somehow not in interrupt
 	 * context, provide the next best thing, the user IP.
 	 */
-	if ((counter->hw_event.exclude_kernel || !regs) &&
-			!counter->hw_event.exclude_user)
+	if ((counter->attr.exclude_kernel || !regs) &&
+			!counter->attr.exclude_user)
 		regs = task_pt_regs(current);
 
 	if (regs) {
@@ -2982,14 +2982,14 @@ static int perf_swcounter_match(struct perf_counter *counter,
 	if (!perf_swcounter_is_counting(counter))
 		return 0;
 
-	if (counter->hw_event.config != event_config)
+	if (counter->attr.config != event_config)
 		return 0;
 
 	if (regs) {
-		if (counter->hw_event.exclude_user && user_mode(regs))
+		if (counter->attr.exclude_user && user_mode(regs))
 			return 0;
 
-		if (counter->hw_event.exclude_kernel && !user_mode(regs))
+		if (counter->attr.exclude_kernel && !user_mode(regs))
 			return 0;
 	}
 
@@ -3252,12 +3252,12 @@ extern void ftrace_profile_disable(int);
 
 static void tp_perf_counter_destroy(struct perf_counter *counter)
 {
-	ftrace_profile_disable(perf_event_id(&counter->hw_event));
+	ftrace_profile_disable(perf_event_id(&counter->attr));
 }
 
 static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 {
-	int event_id = perf_event_id(&counter->hw_event);
+	int event_id = perf_event_id(&counter->attr);
 	int ret;
 
 	ret = ftrace_profile_enable(event_id);
@@ -3265,7 +3265,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
-	counter->hw.sample_period = counter->hw_event.sample_period;
+	counter->hw.sample_period = counter->attr.sample_period;
 
 	return &perf_ops_generic;
 }
@@ -3287,7 +3287,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (perf_event_id(&counter->hw_event)) {
+	switch (perf_event_id(&counter->attr)) {
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
@@ -3319,7 +3319,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
  * Allocate and initialize a counter structure
  */
 static struct perf_counter *
-perf_counter_alloc(struct perf_counter_hw_event *hw_event,
+perf_counter_alloc(struct perf_counter_attr *attr,
 		   int cpu,
 		   struct perf_counter_context *ctx,
 		   struct perf_counter *group_leader,
@@ -3352,36 +3352,36 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 	mutex_init(&counter->mmap_mutex);
 
 	counter->cpu			= cpu;
-	counter->hw_event		= *hw_event;
+	counter->attr		= *attr;
 	counter->group_leader		= group_leader;
 	counter->pmu			= NULL;
 	counter->ctx			= ctx;
 	counter->oncpu			= -1;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	if (hw_event->disabled)
+	if (attr->disabled)
 		counter->state = PERF_COUNTER_STATE_OFF;
 
 	pmu = NULL;
 
 	hwc = &counter->hw;
-	if (hw_event->freq && hw_event->sample_freq)
-		hwc->sample_period = div64_u64(TICK_NSEC, hw_event->sample_freq);
+	if (attr->freq && attr->sample_freq)
+		hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq);
 	else
-		hwc->sample_period = hw_event->sample_period;
+		hwc->sample_period = attr->sample_period;
 
 	/*
 	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
 	 */
-	if (hw_event->inherit && (hw_event->sample_type & PERF_SAMPLE_GROUP))
+	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
-	if (perf_event_raw(hw_event)) {
+	if (perf_event_raw(attr)) {
 		pmu = hw_perf_counter_init(counter);
 		goto done;
 	}
 
-	switch (perf_event_type(hw_event)) {
+	switch (perf_event_type(attr)) {
 	case PERF_TYPE_HARDWARE:
 		pmu = hw_perf_counter_init(counter);
 		break;
@@ -3409,11 +3409,11 @@ done:
 	counter->pmu = pmu;
 
 	atomic_inc(&nr_counters);
-	if (counter->hw_event.mmap)
+	if (counter->attr.mmap)
 		atomic_inc(&nr_mmap_tracking);
-	if (counter->hw_event.munmap)
+	if (counter->attr.munmap)
 		atomic_inc(&nr_munmap_tracking);
-	if (counter->hw_event.comm)
+	if (counter->attr.comm)
 		atomic_inc(&nr_comm_tracking);
 
 	return counter;
@@ -3424,17 +3424,17 @@ static atomic64_t perf_counter_id;
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
- * @hw_event_uptr:	event type attributes for monitoring/sampling
+ * @attr_uptr:	event type attributes for monitoring/sampling
  * @pid:		target pid
  * @cpu:		target cpu
  * @group_fd:		group leader counter fd
  */
 SYSCALL_DEFINE5(perf_counter_open,
-		const struct perf_counter_hw_event __user *, hw_event_uptr,
+		const struct perf_counter_attr __user *, attr_uptr,
 		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_counter *counter, *group_leader;
-	struct perf_counter_hw_event hw_event;
+	struct perf_counter_attr attr;
 	struct perf_counter_context *ctx;
 	struct file *counter_file = NULL;
 	struct file *group_file = NULL;
@@ -3446,7 +3446,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (flags)
 		return -EINVAL;
 
-	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
+	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
 		return -EFAULT;
 
 	/*
@@ -3484,11 +3484,11 @@ SYSCALL_DEFINE5(perf_counter_open,
 		/*
 		 * Only a group leader can be exclusive or pinned
 		 */
-		if (hw_event.exclusive || hw_event.pinned)
+		if (attr.exclusive || attr.pinned)
 			goto err_put_context;
 	}
 
-	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
+	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
 				     GFP_KERNEL);
 	ret = PTR_ERR(counter);
 	if (IS_ERR(counter))
@@ -3556,7 +3556,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	if (parent_counter->parent)
 		parent_counter = parent_counter->parent;
 
-	child_counter = perf_counter_alloc(&parent_counter->hw_event,
+	child_counter = perf_counter_alloc(&parent_counter->attr,
 					   parent_counter->cpu, child_ctx,
 					   group_leader, GFP_KERNEL);
 	if (IS_ERR(child_counter))
@@ -3565,7 +3565,7 @@ inherit_counter(struct perf_counter *parent_counter,
 
 	/*
 	 * Make the child state follow the state of the parent counter,
-	 * not its hw_event.disabled bit.  We hold the parent's mutex,
+	 * not its attr.disabled bit.  We hold the parent's mutex,
 	 * so we won't race with perf_counter_{en, dis}able_family.
 	 */
 	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
@@ -3582,7 +3582,7 @@ inherit_counter(struct perf_counter *parent_counter,
 	/*
 	 * inherit into child's child as well:
 	 */
-	child_counter->hw_event.inherit = 1;
+	child_counter->attr.inherit = 1;
 
 	/*
 	 * Get a reference to the parent filp - we will fput it
@@ -3838,7 +3838,7 @@ int perf_counter_init_task(struct task_struct *child)
 		if (counter != counter->group_leader)
 			continue;
 
-		if (!counter->hw_event.inherit) {
+		if (!counter->attr.inherit) {
 			inherited_all = 0;
 			continue;
 		}
-- 
cgit v1.2.3-70-g09d2


From a05c0205ba031c01bba33a21bf0a35920eb64833 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 3 Jun 2009 09:33:18 +0200
Subject: block: Fix bounce limit setting in DM

blk_queue_bounce_limit() is more than a wrapper about the request queue
limits.bounce_pfn variable.  Introduce blk_queue_bounce_pfn() which can
be called by stacking drivers that wish to set the bounce limit
explicitly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 17 +++++++++++++++++
 drivers/md/dm-table.c  |  2 +-
 include/linux/blkdev.h |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8d339349289..9acd0b7e802 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -193,6 +193,23 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
+/**
+ * blk_queue_bounce_pfn - set the bounce buffer limit for queue
+ * @q: the request queue for the device
+ * @pfn: max address
+ *
+ * Description:
+ *    This function is similar to blk_queue_bounce_limit except it
+ *    neither changes allocation flags, nor does it set up the ISA DMA
+ *    pool. This function should only be used by stacking drivers.
+ *    Hardware drivers should use blk_queue_bounce_limit instead.
+ */
+void blk_queue_bounce_pfn(struct request_queue *q, u64 pfn)
+{
+	q->limits.bounce_pfn = pfn;
+}
+EXPORT_SYMBOL(blk_queue_bounce_pfn);
+
 /**
  * blk_queue_max_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e9a73bb242b..3ca1604ddd5 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -920,7 +920,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	blk_queue_max_segment_size(q, t->limits.max_segment_size);
 	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
 	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
-	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
+	blk_queue_bounce_pfn(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5e740a135e7..989aa1790f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -910,6 +910,7 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
+extern void blk_queue_bounce_pfn(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-- 
cgit v1.2.3-70-g09d2


From e34d5c1a4f9919a81b4ea4591d7383245f35cb8e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 3 Jun 2009 10:32:06 +0200
Subject: netfilter: conntrack: replace notify chain by function pointer

This patch removes the notify chain infrastructure and replace it
by a simple function pointer. This issue has been mentioned in the
mailing list several times: the use of the notify chain adds
too much overhead for something that is only used by ctnetlink.

This patch also changes nfnetlink_send(). It seems that gfp_any()
returns GFP_KERNEL for user-context request, like those via
ctnetlink, inside the RCU read-side section which is not valid.
Using GFP_KERNEL is also evil since netlink may schedule(),
this leads to "scheduling while atomic" bug reports.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h         |  2 +-
 include/net/netfilter/nf_conntrack_ecache.h | 68 ++++++++++++++++-------
 net/netfilter/nf_conntrack_ecache.c         | 83 +++++++++++++++++++++++------
 net/netfilter/nf_conntrack_netlink.c        | 42 +++++++--------
 net/netfilter/nfnetlink.c                   |  5 +-
 5 files changed, 139 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index c600083cbdf..2214e516146 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -75,7 +75,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 
 extern int nfnetlink_has_listeners(unsigned int group);
 extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, 
-			  int echo);
+			  int echo, gfp_t flags);
 extern void nfnetlink_set_err(u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
 
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 2e17a2d0eb3..1afb907e015 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -6,7 +6,6 @@
 #define _NF_CONNTRACK_ECACHE_H
 #include <net/netfilter/nf_conntrack.h>
 
-#include <linux/notifier.h>
 #include <linux/interrupt.h>
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -69,9 +68,13 @@ struct nf_ct_event {
 	int report;
 };
 
-extern struct atomic_notifier_head nf_conntrack_chain;
-extern int nf_conntrack_register_notifier(struct notifier_block *nb);
-extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
+struct nf_ct_event_notifier {
+	int (*fcn)(unsigned int events, struct nf_ct_event *item);
+};
+
+extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
+extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
 extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
 extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -97,13 +100,23 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
 			  u32 pid,
 			  int report)
 {
-	struct nf_ct_event item = {
-		.ct 	= ct,
-		.pid	= pid,
-		.report = report
-	};
-	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
-		atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
+	struct nf_ct_event_notifier *notify;
+
+	rcu_read_lock();
+	notify = rcu_dereference(nf_conntrack_event_cb);
+	if (notify == NULL)
+		goto out_unlock;
+
+	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+		struct nf_ct_event item = {
+			.ct 	= ct,
+			.pid	= pid,
+			.report = report
+		};
+		notify->fcn(event, &item);
+	}
+out_unlock:
+	rcu_read_unlock();
 }
 
 static inline void
@@ -118,9 +131,13 @@ struct nf_exp_event {
 	int report;
 };
 
-extern struct atomic_notifier_head nf_ct_expect_chain;
-extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
-extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
+struct nf_exp_event_notifier {
+	int (*fcn)(unsigned int events, struct nf_exp_event *item);
+};
+
+extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
+extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
 
 static inline void
 nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -128,12 +145,23 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 			  u32 pid,
 			  int report)
 {
-	struct nf_exp_event item = {
-		.exp	= exp,
-		.pid	= pid,
-		.report = report
-	};
-	atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+	struct nf_exp_event_notifier *notify;
+
+	rcu_read_lock();
+	notify = rcu_dereference(nf_expect_event_cb);
+	if (notify == NULL)
+		goto out_unlock;
+
+	{
+		struct nf_exp_event item = {
+			.exp	= exp,
+			.pid	= pid,
+			.report = report
+		};
+		notify->fcn(event, &item);
+	}
+out_unlock:
+	rcu_read_unlock();
 }
 
 static inline void
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index dee4190209c..5516b3e64b4 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,24 +16,32 @@
 #include <linux/stddef.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
-#include <linux/notifier.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
 
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 static inline void
 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
 {
+	struct nf_ct_event_notifier *notify;
+
+	rcu_read_lock();
+	notify = rcu_dereference(nf_conntrack_event_cb);
+	if (notify == NULL)
+		goto out_unlock;
+
 	if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
 	    && ecache->events) {
 		struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
 			.report	= 0
 		};
 
-		atomic_notifier_call_chain(&nf_conntrack_chain,
-					   ecache->events,
-					   &item);
+		notify->fcn(ecache->events, &item);
 	}
 
 	ecache->events = 0;
 	nf_ct_put(ecache->ct);
 	ecache->ct = NULL;
+
+out_unlock:
+	rcu_read_unlock();
 }
 
 /* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net)
 	free_percpu(net->ct.ecache);
 }
 
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
-	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+	int ret = 0;
+	struct nf_ct_event_notifier *notify;
+
+	mutex_lock(&nf_ct_ecache_mutex);
+	notify = rcu_dereference(nf_conntrack_event_cb);
+	if (notify != NULL) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+	rcu_assign_pointer(nf_conntrack_event_cb, new);
+	mutex_unlock(&nf_ct_ecache_mutex);
+	return ret;
+
+out_unlock:
+	mutex_unlock(&nf_ct_ecache_mutex);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
-	return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+	struct nf_ct_event_notifier *notify;
+
+	mutex_lock(&nf_ct_ecache_mutex);
+	notify = rcu_dereference(nf_conntrack_event_cb);
+	BUG_ON(notify != new);
+	rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
-	return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+	int ret = 0;
+	struct nf_exp_event_notifier *notify;
+
+	mutex_lock(&nf_ct_ecache_mutex);
+	notify = rcu_dereference(nf_expect_event_cb);
+	if (notify != NULL) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+	rcu_assign_pointer(nf_expect_event_cb, new);
+	mutex_unlock(&nf_ct_ecache_mutex);
+	return ret;
+
+out_unlock:
+	mutex_unlock(&nf_ct_ecache_mutex);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
-	return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+	struct nf_exp_event_notifier *notify;
+
+	mutex_lock(&nf_ct_ecache_mutex);
+	notify = rcu_dereference(nf_expect_event_cb);
+	BUG_ON(notify != new);
+	rcu_assign_pointer(nf_expect_event_cb, NULL);
+	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b1b9e4fb7de..4448b062de0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,7 +27,6 @@
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/notifier.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
@@ -454,13 +453,12 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       ;
 }
 
-static int ctnetlink_conntrack_event(struct notifier_block *this,
-				     unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
-	struct nf_ct_event *item = (struct nf_ct_event *)ptr;
 	struct nf_conn *ct = item->ct;
 	struct sk_buff *skb;
 	unsigned int type;
@@ -468,7 +466,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 
 	/* ignore our fake conntrack entry */
 	if (ct == &nf_conntrack_untracked)
-		return NOTIFY_DONE;
+		return 0;
 
 	if (events & IPCT_DESTROY) {
 		type = IPCTNL_MSG_CT_DELETE;
@@ -481,10 +479,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else
-		return NOTIFY_DONE;
+		return 0;
 
 	if (!item->report && !nfnetlink_has_listeners(group))
-		return NOTIFY_DONE;
+		return 0;
 
 	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
 	if (skb == NULL)
@@ -560,8 +558,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, item->pid, group, item->report);
-	return NOTIFY_DONE;
+	nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+	return 0;
 
 nla_put_failure:
 	rcu_read_unlock();
@@ -570,7 +568,7 @@ nlmsg_failure:
 	kfree_skb(skb);
 errout:
 	nfnetlink_set_err(0, group, -ENOBUFS);
-	return NOTIFY_DONE;
+	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
 
@@ -1507,12 +1505,11 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
-				  unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	struct nf_exp_event *item = (struct nf_exp_event *)ptr;
 	struct nf_conntrack_expect *exp = item->exp;
 	struct sk_buff *skb;
 	unsigned int type;
@@ -1522,11 +1519,11 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 		type = IPCTNL_MSG_EXP_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 	} else
-		return NOTIFY_DONE;
+		return 0;
 
 	if (!item->report &&
 	    !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
-		return NOTIFY_DONE;
+		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 	if (skb == NULL)
@@ -1548,8 +1545,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
-	return NOTIFY_DONE;
+	nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+		       item->report, GFP_ATOMIC);
+	return 0;
 
 nla_put_failure:
 	rcu_read_unlock();
@@ -1558,7 +1556,7 @@ nlmsg_failure:
 	kfree_skb(skb);
 errout:
 	nfnetlink_set_err(0, 0, -ENOBUFS);
-	return NOTIFY_DONE;
+	return 0;
 }
 #endif
 static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1864,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
-	.notifier_call	= ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+	.fcn = ctnetlink_conntrack_event,
 };
 
-static struct notifier_block ctnl_notifier_exp = {
-	.notifier_call	= ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+	.fcn = ctnetlink_expect_event,
 };
 #endif
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9dbd5709aad..92761a98837 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int nfnetlink_send(struct sk_buff *skb, u32 pid,
+		   unsigned group, int echo, gfp_t flags)
 {
-	return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+	return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-- 
cgit v1.2.3-70-g09d2


From dfbf97f3ac980b69dfbc41c83a208211a38443e8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 2 Jun 2009 05:13:45 +0000
Subject: net: add _skb_dst opaque field

struct sk_buff uses one union to define dst and rtable fields.

We want to replace direct access to these pointers by accessors.

First patch adds a new "unsigned long _skb_dst;" opaque field
in this union.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aff494ba6a3..d4d7c666ca6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -325,6 +325,7 @@ struct sk_buff {
 	union {
 		struct  dst_entry	*dst;
 		struct  rtable		*rtable;
+		unsigned long		_skb_dst;
 	};
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
-- 
cgit v1.2.3-70-g09d2


From 511c3f92ad5b6d9f8f6464be1b4f85f0422be91a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 2 Jun 2009 05:14:27 +0000
Subject: net: skb->rtable accessor

Define skb_rtable(const struct sk_buff *skb) accessor to get rtable from skb

Delete skb->rtable field

Setting rtable is not allowed, just set dst instead as rtable is an alias.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ipddp.c           |  2 +-
 include/linux/skbuff.h                  |  6 +++++-
 include/net/route.h                     |  2 +-
 net/bridge/br_netfilter.c               | 25 +++++++++++++---------
 net/dccp/ipv4.c                         |  6 +++---
 net/ipv4/arp.c                          |  4 ++--
 net/ipv4/icmp.c                         | 10 ++++-----
 net/ipv4/igmp.c                         |  2 +-
 net/ipv4/ip_forward.c                   |  2 +-
 net/ipv4/ip_gre.c                       |  4 ++--
 net/ipv4/ip_input.c                     |  2 +-
 net/ipv4/ip_options.c                   | 16 +++++++-------
 net/ipv4/ip_output.c                    | 10 ++++-----
 net/ipv4/ip_sockglue.c                  |  2 +-
 net/ipv4/ipip.c                         |  2 +-
 net/ipv4/ipmr.c                         |  6 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c     |  2 +-
 net/ipv4/netfilter/nf_nat_helper.c      |  4 ++--
 net/ipv4/route.c                        | 37 ++++++++++++++++++++-------------
 net/ipv4/tcp_ipv4.c                     |  4 ++--
 net/netfilter/nf_conntrack_netbios_ns.c |  2 +-
 net/sched/em_meta.c                     |  4 ++--
 net/sctp/protocol.c                     |  8 +++----
 23 files changed, 89 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 9832b757f10..78cea5e80b1 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -115,7 +115,7 @@ static struct net_device * __init ipddp_init(void)
  */
 static int ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	__be32 paddr = ((struct rtable*)skb->dst)->rt_gateway;
+	__be32 paddr = skb_rtable(skb)->rt_gateway;
         struct ddpehdr *ddp;
         struct ipddp_route *rt;
         struct atalk_addr *our_addr;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d4d7c666ca6..a3ae3c52583 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -324,7 +324,6 @@ struct sk_buff {
 
 	union {
 		struct  dst_entry	*dst;
-		struct  rtable		*rtable;
 		unsigned long		_skb_dst;
 	};
 #ifdef CONFIG_XFRM
@@ -427,6 +426,11 @@ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
 			  enum dma_data_direction dir);
 #endif
 
+static inline struct rtable *skb_rtable(const struct sk_buff *skb)
+{
+	return (struct rtable *)skb->_skb_dst;
+}
+
 extern void kfree_skb(struct sk_buff *skb);
 extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
diff --git a/include/net/route.h b/include/net/route.h
index 4e8cae0e584..40f6346ef49 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -210,7 +210,7 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
 
 static inline int inet_iif(const struct sk_buff *skb)
 {
-	return skb->rtable->rt_iif;
+	return skb_rtable(skb)->rt_iif;
 }
 
 #endif	/* _ROUTE_H */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index e4a418fcb35..e0ceb66a9ec 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -228,6 +228,7 @@ int nf_bridge_copy_header(struct sk_buff *skb)
 static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct rtable *rt;
 
 	if (nf_bridge->mask & BRNF_PKT_TYPE) {
 		skb->pkt_type = PACKET_OTHERHOST;
@@ -235,12 +236,13 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	}
 	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
 
-	skb->rtable = bridge_parent_rtable(nf_bridge->physindev);
-	if (!skb->rtable) {
+	rt = bridge_parent_rtable(nf_bridge->physindev);
+	if (!rt) {
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&skb->rtable->u.dst);
+	dst_hold(&rt->u.dst);
+	skb->dst = &rt->u.dst;
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
@@ -338,6 +340,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct iphdr *iph = ip_hdr(skb);
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct rtable *rt;
 	int err;
 
 	if (nf_bridge->mask & BRNF_PKT_TYPE) {
@@ -347,7 +350,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
 	if (dnat_took_place(skb)) {
 		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
-			struct rtable *rt;
 			struct flowi fl = {
 				.nl_u = {
 					.ip4_u = {
@@ -404,12 +406,13 @@ bridged_dnat:
 			skb->pkt_type = PACKET_HOST;
 		}
 	} else {
-		skb->rtable = bridge_parent_rtable(nf_bridge->physindev);
-		if (!skb->rtable) {
+		rt = bridge_parent_rtable(nf_bridge->physindev);
+		if (!rt) {
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&skb->rtable->u.dst);
+		dst_hold(&rt->u.dst);
+		skb->dst = &rt->u.dst;
 	}
 
 	skb->dev = nf_bridge->physindev;
@@ -628,9 +631,11 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	if (skb->rtable && skb->rtable == bridge_parent_rtable(in)) {
-		dst_release(&skb->rtable->u.dst);
-		skb->rtable = NULL;
+	struct rtable *rt = skb_rtable(skb);
+
+	if (rt && rt == bridge_parent_rtable(in)) {
+		dst_release(&rt->u.dst);
+		skb->dst = NULL;
 	}
 
 	return NF_ACCEPT;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d1dd95289b8..2cf48ba0dbb 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -452,7 +452,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 					   struct sk_buff *skb)
 {
 	struct rtable *rt;
-	struct flowi fl = { .oif = skb->rtable->rt_iif,
+	struct flowi fl = { .oif = skb_rtable(skb)->rt_iif,
 			    .nl_u = { .ip4_u =
 				      { .daddr = ip_hdr(skb)->saddr,
 					.saddr = ip_hdr(skb)->daddr,
@@ -514,7 +514,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
 		return;
 
-	if (rxskb->rtable->rt_type != RTN_LOCAL)
+	if (skb_rtable(rxskb)->rt_type != RTN_LOCAL)
 		return;
 
 	dst = dccp_v4_route_skb(net, ctl_sk, rxskb);
@@ -567,7 +567,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 
 	/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
-	if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
 		return 0;	/* discard, don't send a reset here */
 
 	if (dccp_bad_service_code(sk, service)) {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f11931c1838..816494f271a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -474,7 +474,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 		return 1;
 	}
 
-	paddr = skb->rtable->rt_gateway;
+	paddr = skb_rtable(skb)->rt_gateway;
 
 	if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev))
 		return 0;
@@ -817,7 +817,7 @@ static int arp_process(struct sk_buff *skb)
 	if (arp->ar_op == htons(ARPOP_REQUEST) &&
 	    ip_route_input(skb, tip, sip, 0, dev) == 0) {
 
-		rt = skb->rtable;
+		rt = skb_rtable(skb);
 		addr_type = rt->rt_type;
 
 		if (addr_type == RTN_LOCAL) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3f50807237e..94f75efae93 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -356,7 +356,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
 	struct ipcm_cookie ipc;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct net *net = dev_net(rt->u.dst.dev);
 	struct sock *sk;
 	struct inet_sock *inet;
@@ -416,7 +416,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	struct iphdr *iph;
 	int room;
 	struct icmp_bxm icmp_param;
-	struct rtable *rt = skb_in->rtable;
+	struct rtable *rt = skb_rtable(skb_in);
 	struct ipcm_cookie ipc;
 	__be32 saddr;
 	u8  tos;
@@ -596,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					     RT_TOS(tos), rt2->u.dst.dev);
 
 			dst_release(&rt2->u.dst);
-			rt2 = skb_in->rtable;
+			rt2 = skb_rtable(skb_in);
 			skb_in->dst = odst;
 		}
 
@@ -926,7 +926,7 @@ static void icmp_address(struct sk_buff *skb)
 
 static void icmp_address_reply(struct sk_buff *skb)
 {
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct net_device *dev = skb->dev;
 	struct in_device *in_dev;
 	struct in_ifaddr *ifa;
@@ -970,7 +970,7 @@ static void icmp_discard(struct sk_buff *skb)
 int icmp_rcv(struct sk_buff *skb)
 {
 	struct icmphdr *icmph;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct net *net = dev_net(rt->u.dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index e6058a50379..afabd2758b6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb)
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
 		/* Is it our report looped back? */
-		if (skb->rtable->fl.iif == 0)
+		if (skb_rtable(skb)->fl.iif == 0)
 			break;
 		/* don't rely on MC router hearing unicast reports */
 		if (skb->pkt_type == PACKET_MULTICAST ||
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index df3fe50bbf0..0761cd9bbd1 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -81,7 +81,7 @@ int ip_forward(struct sk_buff *skb)
 	if (!xfrm4_route_forward(skb))
 		goto drop;
 
-	rt = skb->rtable;
+	rt = skb_rtable(skb);
 
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 77436e2732e..85ddad45a91 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -602,7 +602,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (ipv4_is_multicast(iph->daddr)) {
 			/* Looped back packet, drop it! */
-			if (skb->rtable->fl.iif == 0)
+			if (skb_rtable(skb)->fl.iif == 0)
 				goto drop;
 			stats->multicast++;
 			skb->pkt_type = PACKET_BROADCAST;
@@ -704,7 +704,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		if (skb->protocol == htons(ETH_P_IP)) {
-			rt = skb->rtable;
+			rt = skb_rtable(skb);
 			if ((dst = rt->rt_gateway) == 0)
 				goto tx_error_icmp;
 		}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 40f6206b2aa..cea784b0aa4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -357,7 +357,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 	if (iph->ihl > 5 && ip_rcv_options(skb))
 		goto drop;
 
-	rt = skb->rtable;
+	rt = skb_rtable(skb);
 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
 				skb->len);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2c88da6e786..7e1074ffdbd 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -102,7 +102,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 	sptr = skb_network_header(skb);
 	dptr = dopt->__data;
 
-	daddr = skb->rtable->rt_spec_dst;
+	daddr = skb_rtable(skb)->rt_spec_dst;
 
 	if (sopt->rr) {
 		optlen  = sptr[sopt->rr+1];
@@ -257,7 +257,7 @@ int ip_options_compile(struct net *net,
 	struct rtable *rt = NULL;
 
 	if (skb != NULL) {
-		rt = skb->rtable;
+		rt = skb_rtable(skb);
 		optptr = (unsigned char *)&(ip_hdr(skb)[1]);
 	} else
 		optptr = opt->__data;
@@ -550,7 +550,7 @@ void ip_forward_options(struct sk_buff *skb)
 {
 	struct   ip_options * opt	= &(IPCB(skb)->opt);
 	unsigned char * optptr;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	unsigned char *raw = skb_network_header(skb);
 
 	if (opt->rr_needaddr) {
@@ -598,7 +598,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	__be32 nexthop;
 	struct iphdr *iph = ip_hdr(skb);
 	unsigned char *optptr = skb_network_header(skb) + opt->srr;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct rtable *rt2;
 	int err;
 
@@ -623,13 +623,13 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 		}
 		memcpy(&nexthop, &optptr[srrptr-1], 4);
 
-		rt = skb->rtable;
-		skb->rtable = NULL;
+		rt = skb_rtable(skb);
+		skb->dst = NULL;
 		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
-		rt2 = skb->rtable;
+		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
 			ip_rt_put(rt2);
-			skb->rtable = rt;
+			skb->dst = &rt->u.dst;
 			return -EINVAL;
 		}
 		ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ea19c37ccc0..8d845ebfcca 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -140,7 +140,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 			  __be32 saddr, __be32 daddr, struct ip_options *opt)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct iphdr *iph;
 
 	/* Build the IP header. */
@@ -238,7 +238,7 @@ static int ip_finish_output(struct sk_buff *skb)
 int ip_mc_output(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct net_device *dev = rt->u.dst.dev;
 
 	/*
@@ -319,7 +319,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 	/* Skip all of this if the packet is already routed,
 	 * f.e. by something like SCTP.
 	 */
-	rt = skb->rtable;
+	rt = skb_rtable(skb);
 	if (rt != NULL)
 		goto packet_routed;
 
@@ -440,7 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	unsigned int mtu, hlen, left, len, ll_rs, pad;
 	int offset;
 	__be16 not_last_frag;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	int err = 0;
 
 	dev = rt->u.dst.dev;
@@ -1362,7 +1362,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	} replyopts;
 	struct ipcm_cookie ipc;
 	__be32 daddr;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 
 	if (ip_options_echo(&replyopts.opt, skb))
 		return;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cb49936856e..fc7993e9061 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -57,7 +57,7 @@
 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 {
 	struct in_pktinfo info;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 
 	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
 	if (rt) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bb2f1b17fbf..0c6e7bf18a4 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -416,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (!dst) {
 		/* NBMA tunnel */
-		if ((rt = skb->rtable) == NULL) {
+		if ((rt = skb_rtable(skb)) == NULL) {
 			stats->tx_fifo_errors++;
 			goto tx_error;
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 13e9dd3012b..69dd058283e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1354,7 +1354,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 	if (net->ipv4.vif_table[vif].dev != skb->dev) {
 		int true_vifi;
 
-		if (skb->rtable->fl.iif == 0) {
+		if (skb_rtable(skb)->fl.iif == 0) {
 			/* It is our own packet, looped back.
 			   Very complicated situation...
 
@@ -1430,7 +1430,7 @@ int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	int local = skb->rtable->rt_flags&RTCF_LOCAL;
+	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
 
 	/* Packet is looped back after forward, it should not be
 	   forwarded second time, but still can be delivered locally.
@@ -1646,7 +1646,7 @@ int ipmr_get_route(struct net *net,
 {
 	int err;
 	struct mfc_cache *cache;
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 
 	read_lock(&mrt_lock);
 	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index f389f60cb10..c0992c75bda 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -72,7 +72,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
 		return NF_ACCEPT;
 
 	mr = par->targinfo;
-	rt = skb->rtable;
+	rt = skb_rtable(skb);
 	newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
 	if (!newsrc) {
 		printk("MASQUERADE: %s ate my IP address\n", par->out->name);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index cf7a42bf982..155c008626c 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -140,7 +140,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct iphdr *iph;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
@@ -218,7 +218,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct iphdr *iph;
 	struct udphdr *udph;
 	int datalen, oldlen;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 28205e5bfa9..f20060ac2f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1064,7 +1064,8 @@ work_done:
 out:	return 0;
 }
 
-static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
+static int rt_intern_hash(unsigned hash, struct rtable *rt,
+			  struct rtable **rp, struct sk_buff *skb)
 {
 	struct rtable	*rth, **rthp;
 	unsigned long	now;
@@ -1114,7 +1115,10 @@ restart:
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
-			*rp = rth;
+			if (rp)
+				*rp = rth;
+			else
+				skb->dst = &rth->u.dst;
 			return 0;
 		}
 
@@ -1210,7 +1214,10 @@ restart:
 	rcu_assign_pointer(rt_hash_table[hash].chain, rt);
 
 	spin_unlock_bh(rt_hash_lock_addr(hash));
-	*rp = rt;
+	if (rp)
+		*rp = rt;
+	else
+		skb->dst = &rt->u.dst;
 	return 0;
 }
 
@@ -1407,7 +1414,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 							&netevent);
 
 				rt_del(hash, rth);
-				if (!rt_intern_hash(hash, rt, &rt))
+				if (!rt_intern_hash(hash, rt, &rt, NULL))
 					ip_rt_put(rt);
 				goto do_next;
 			}
@@ -1473,7 +1480,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 
 void ip_rt_send_redirect(struct sk_buff *skb)
 {
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
 
 	if (!in_dev)
@@ -1521,7 +1528,7 @@ out:
 
 static int ip_error(struct sk_buff *skb)
 {
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	unsigned long now;
 	int code;
 
@@ -1698,7 +1705,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
 
-	rt = skb->rtable;
+	rt = skb_rtable(skb);
 	if (rt)
 		dst_set_expires(&rt->u.dst, 0);
 }
@@ -1858,7 +1865,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	in_dev_put(in_dev);
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
-	return rt_intern_hash(hash, rth, &skb->rtable);
+	return rt_intern_hash(hash, rth, NULL, skb);
 
 e_nobufs:
 	in_dev_put(in_dev);
@@ -2019,7 +2026,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
 		       rt_genid(dev_net(rth->u.dst.dev)));
-	return rt_intern_hash(hash, rth, &skb->rtable);
+	return rt_intern_hash(hash, rth, NULL, skb);
 }
 
 /*
@@ -2175,7 +2182,7 @@ local_input:
 	}
 	rth->rt_type	= res.type;
 	hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
-	err = rt_intern_hash(hash, rth, &skb->rtable);
+	err = rt_intern_hash(hash, rth, NULL, skb);
 	goto done;
 
 no_route:
@@ -2244,7 +2251,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
-			skb->rtable = rth;
+			skb->dst = &rth->u.dst;
 			return 0;
 		}
 		RT_CACHE_STAT_INC(in_hlist_search);
@@ -2420,7 +2427,7 @@ static int ip_mkroute_output(struct rtable **rp,
 	if (err == 0) {
 		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
 			       rt_genid(dev_net(dev_out)));
-		err = rt_intern_hash(hash, rth, rp);
+		err = rt_intern_hash(hash, rth, rp, NULL);
 	}
 
 	return err;
@@ -2763,7 +2770,7 @@ static int rt_fill_info(struct net *net,
 			struct sk_buff *skb, u32 pid, u32 seq, int event,
 			int nowait, unsigned int flags)
 {
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
 	long expires;
@@ -2907,7 +2914,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
 
-		rt = skb->rtable;
+		rt = skb_rtable(skb);
 		if (err == 0 && rt->u.dst.error)
 			err = -rt->u.dst.error;
 	} else {
@@ -2927,7 +2934,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err)
 		goto errout_free;
 
-	skb->rtable = rt;
+	skb->dst = &rt->u.dst;
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc79e341628..319c8852644 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -546,7 +546,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	if (th->rst)
 		return;
 
-	if (skb->rtable->rt_type != RTN_LOCAL)
+	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
 		return;
 
 	/* Swap the send and the receive. */
@@ -1185,7 +1185,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 #endif
 
 	/* Never answer to SYNs send to broadcast or multicast */
-	if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
 		goto drop;
 
 	/* TW buckets are converted to open requests without
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 8a3875e36ec..497b2224536 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -48,7 +48,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 {
 	struct nf_conntrack_expect *exp;
 	struct iphdr *iph = ip_hdr(skb);
-	struct rtable *rt = skb->rtable;
+	struct rtable *rt = skb_rtable(skb);
 	struct in_device *in_dev;
 	__be32 mask = 0;
 
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index fad596bf32d..b6b588bed4e 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -258,10 +258,10 @@ META_COLLECTOR(int_rtclassid)
 
 META_COLLECTOR(int_rtiif)
 {
-	if (unlikely(skb->rtable == NULL))
+	if (unlikely(skb_rtable(skb) == NULL))
 		*err = -1;
 	else
-		dst->value = skb->rtable->fl.iif;
+		dst->value = skb_rtable(skb)->fl.iif;
 }
 
 /**************************************************************************
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8eb3e61cb70..cb2c50dbd42 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -393,7 +393,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
 		return 0;
 
 	/* Is this a broadcast address? */
-	if (skb && skb->rtable->rt_flags & RTCF_BROADCAST)
+	if (skb && skb_rtable(skb)->rt_flags & RTCF_BROADCAST)
 		return 0;
 
 	return 1;
@@ -572,7 +572,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
 /* What interface did this skb arrive on? */
 static int sctp_v4_skb_iif(const struct sk_buff *skb)
 {
-	return skb->rtable->rt_iif;
+	return skb_rtable(skb)->rt_iif;
 }
 
 /* Was this packet marked by Explicit Congestion Notification? */
@@ -848,8 +848,8 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 
 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n",
 			  __func__, skb, skb->len,
-			  &skb->rtable->rt_src,
-			  &skb->rtable->rt_dst);
+			  &skb_rtable(skb)->rt_src,
+			  &skb_rtable(skb)->rt_dst);
 
 	inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
-- 
cgit v1.2.3-70-g09d2


From adf30907d63893e4208dfe3f5c88ae12bc2f25d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 2 Jun 2009 05:19:30 +0000
Subject: net: skb->dst accessors

Define three accessors to get/set dst attached to a skb

struct dst_entry *skb_dst(const struct sk_buff *skb)

void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)

void skb_dst_drop(struct sk_buff *skb)
This one should replace occurrences of :
dst_release(skb->dst)
skb->dst = NULL;

Delete skb->dst field

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c        |  4 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c      | 30 ++++++-------
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 10 ++---
 drivers/net/pppol2tp.c                         | 11 +++--
 drivers/s390/net/qeth_core_main.c              |  4 +-
 drivers/s390/net/qeth_l3_main.c                |  8 ++--
 include/linux/skbuff.h                         | 13 +++++-
 include/net/dst.h                              | 12 ++++--
 include/net/inet6_hashtables.h                 |  2 +-
 include/net/inet_hashtables.h                  |  2 +-
 include/net/ip6_route.h                        |  2 +-
 include/net/xfrm.h                             |  4 +-
 net/atm/br2684.c                               |  2 +-
 net/atm/clip.c                                 | 14 +++---
 net/bridge/br_netfilter.c                      | 18 ++++----
 net/core/dev.c                                 |  7 ++-
 net/core/neighbour.c                           | 11 +++--
 net/core/skbuff.c                              |  4 +-
 net/dccp/ipv4.c                                |  4 +-
 net/dccp/ipv6.c                                |  8 ++--
 net/dccp/output.c                              |  2 +-
 net/decnet/af_decnet.c                         |  6 ++-
 net/decnet/dn_neigh.c                          |  8 ++--
 net/decnet/dn_nsp_out.c                        |  6 +--
 net/decnet/dn_route.c                          | 25 +++++------
 net/ipv4/arp.c                                 |  2 +-
 net/ipv4/icmp.c                                | 10 ++---
 net/ipv4/igmp.c                                |  4 +-
 net/ipv4/ip_forward.c                          |  4 +-
 net/ipv4/ip_fragment.c                         |  2 +-
 net/ipv4/ip_gre.c                              | 23 +++++-----
 net/ipv4/ip_input.c                            |  6 +--
 net/ipv4/ip_options.c                          |  6 +--
 net/ipv4/ip_output.c                           | 20 ++++-----
 net/ipv4/ipip.c                                | 13 +++---
 net/ipv4/ipmr.c                                | 13 +++---
 net/ipv4/netfilter.c                           | 28 ++++++------
 net/ipv4/netfilter/ipt_REJECT.c                |  7 ++-
 net/ipv4/netfilter/nf_nat_standalone.c         |  7 ++-
 net/ipv4/raw.c                                 |  2 +-
 net/ipv4/route.c                               | 14 +++---
 net/ipv4/tcp_ipv4.c                            |  4 +-
 net/ipv4/tcp_output.c                          |  2 +-
 net/ipv4/udp.c                                 |  4 +-
 net/ipv4/xfrm4_input.c                         |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c                   |  4 +-
 net/ipv4/xfrm4_output.c                        |  6 +--
 net/ipv6/exthdrs.c                             | 40 ++++++++---------
 net/ipv6/inet6_connection_sock.c               |  2 +-
 net/ipv6/ip6_input.c                           | 12 +++---
 net/ipv6/ip6_output.c                          | 60 +++++++++++++-------------
 net/ipv6/ip6_tunnel.c                          | 26 ++++++-----
 net/ipv6/ip6mr.c                               | 13 +++---
 net/ipv6/mcast.c                               | 17 +++++---
 net/ipv6/ndisc.c                               |  4 +-
 net/ipv6/netfilter.c                           | 16 ++++---
 net/ipv6/netfilter/ip6t_REJECT.c               |  2 +-
 net/ipv6/raw.c                                 |  2 +-
 net/ipv6/reassembly.c                          | 22 +++++-----
 net/ipv6/route.c                               | 12 +++---
 net/ipv6/sit.c                                 | 21 +++++----
 net/ipv6/tcp_ipv6.c                            |  8 ++--
 net/ipv6/udp.c                                 |  7 ++-
 net/ipv6/xfrm6_mode_tunnel.c                   |  4 +-
 net/ipv6/xfrm6_output.c                        |  4 +-
 net/netfilter/ipvs/ip_vs_xmit.c                | 48 ++++++++++-----------
 net/netfilter/nf_conntrack_proto_gre.c         |  2 +-
 net/netfilter/xt_TCPMSS.c                      |  6 +--
 net/netfilter/xt_policy.c                      |  2 +-
 net/netfilter/xt_realm.c                       |  2 +-
 net/packet/af_packet.c                         |  6 +--
 net/sched/cls_flow.c                           |  8 ++--
 net/sched/cls_route.c                          |  2 +-
 net/sched/em_meta.c                            |  4 +-
 net/sched/sch_sfq.c                            |  2 +-
 net/sched/sch_teql.c                           |  6 +--
 net/sctp/output.c                              |  6 +--
 net/sunrpc/xprtsock.c                          |  2 +-
 net/xfrm/xfrm_input.c                          |  3 +-
 net/xfrm/xfrm_output.c                         | 21 ++++-----
 net/xfrm/xfrm_policy.c                         |  8 +++-
 security/selinux/hooks.c                       |  2 +-
 security/selinux/xfrm.c                        |  2 +-
 83 files changed, 414 insertions(+), 390 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 4248c313936..181b1f32325 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1394,8 +1394,8 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int e = skb_queue_empty(&priv->cm.skb_queue);
 
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	skb_queue_tail(&priv->cm.skb_queue, skb);
 	if (e)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 69c6304cc94..e319d91f60a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -561,7 +561,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
-	neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
+	neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev);
 	if (!neigh) {
 		++dev->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
@@ -570,9 +570,9 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 
 	spin_lock_irqsave(&priv->lock, flags);
 
-	path = __path_find(dev, skb->dst->neighbour->ha + 4);
+	path = __path_find(dev, skb_dst(skb)->neighbour->ha + 4);
 	if (!path) {
-		path = path_rec_create(dev, skb->dst->neighbour->ha + 4);
+		path = path_rec_create(dev, skb_dst(skb)->neighbour->ha + 4);
 		if (!path)
 			goto err_path;
 
@@ -605,7 +605,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 				goto err_drop;
 			}
 		} else
-			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
 	} else {
 		neigh->ah  = NULL;
 
@@ -635,15 +635,15 @@ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
 
 	/* Look up path record for unicasts */
-	if (skb->dst->neighbour->ha[4] != 0xff) {
+	if (skb_dst(skb)->neighbour->ha[4] != 0xff) {
 		neigh_add_path(skb, dev);
 		return;
 	}
 
 	/* Add in the P_Key for multicasts */
-	skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
-	skb->dst->neighbour->ha[9] = priv->pkey & 0xff;
-	ipoib_mcast_send(dev, skb->dst->neighbour->ha + 4, skb);
+	skb_dst(skb)->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
+	skb_dst(skb)->neighbour->ha[9] = priv->pkey & 0xff;
+	ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb);
 }
 
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -708,16 +708,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
-	if (likely(skb->dst && skb->dst->neighbour)) {
-		if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
+	if (likely(skb_dst(skb) && skb_dst(skb)->neighbour)) {
+		if (unlikely(!*to_ipoib_neigh(skb_dst(skb)->neighbour))) {
 			ipoib_path_lookup(skb, dev);
 			return NETDEV_TX_OK;
 		}
 
-		neigh = *to_ipoib_neigh(skb->dst->neighbour);
+		neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour);
 
 		if (unlikely((memcmp(&neigh->dgid.raw,
-				     skb->dst->neighbour->ha + 4,
+				     skb_dst(skb)->neighbour->ha + 4,
 				     sizeof(union ib_gid))) ||
 			     (neigh->dev != dev))) {
 			spin_lock_irqsave(&priv->lock, flags);
@@ -743,7 +743,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				return NETDEV_TX_OK;
 			}
 		} else if (neigh->ah) {
-			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
 			return NETDEV_TX_OK;
 		}
 
@@ -772,7 +772,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
 			    (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
 				ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
-					   skb->dst ? "neigh" : "dst",
+					   skb_dst(skb) ? "neigh" : "dst",
 					   be16_to_cpup((__be16 *) skb->data),
 					   IPOIB_QPN(phdr->hwaddr),
 					   phdr->hwaddr + 4);
@@ -817,7 +817,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
 	 * destination address onto the front of the skb so we can
 	 * figure out where to send the packet later.
 	 */
-	if ((!skb->dst || !skb->dst->neighbour) && daddr) {
+	if ((!skb_dst(skb) || !skb_dst(skb)->neighbour) && daddr) {
 		struct ipoib_pseudoheader *phdr =
 			(struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
 		memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 425e31112ed..a0e97532e71 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -261,7 +261,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 
 		skb->dev = dev;
 
-		if (!skb->dst || !skb->dst->neighbour) {
+		if (!skb_dst(skb) || !skb_dst(skb)->neighbour) {
 			/* put pseudoheader back on for next time */
 			skb_push(skb, sizeof (struct ipoib_pseudoheader));
 		}
@@ -707,10 +707,10 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 
 out:
 	if (mcast && mcast->ah) {
-		if (skb->dst		&&
-		    skb->dst->neighbour &&
-		    !*to_ipoib_neigh(skb->dst->neighbour)) {
-			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour,
+		if (skb_dst(skb)		&&
+		    skb_dst(skb)->neighbour &&
+		    !*to_ipoib_neigh(skb_dst(skb)->neighbour)) {
+			struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour,
 									skb->dev);
 
 			if (neigh) {
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index 5981debcde5..e7935d09c89 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -433,8 +433,7 @@ static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct s
 		 *   to the inner packet either
 		 */
 		secpath_reset(skb);
-		dst_release(skb->dst);
-		skb->dst = NULL;
+		skb_dst_drop(skb);
 		nf_reset(skb);
 
 		po = pppox_sk(session_sock);
@@ -976,7 +975,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	/* Calculate UDP checksum if configured to do so */
 	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
 		skb->ip_summed = CHECKSUM_NONE;
-	else if (!(skb->dst->dev->features & NETIF_F_V4_CSUM)) {
+	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		csum = skb_checksum(skb, 0, udp_len, 0);
 		uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr,
@@ -1172,14 +1171,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	nf_reset(skb);
 
 	/* Get routing info from the tunnel socket */
-	dst_release(skb->dst);
-	skb->dst = dst_clone(__sk_dst_get(sk_tun));
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
 	pppol2tp_skb_set_owner_w(skb, sk_tun);
 
 	/* Calculate UDP checksum if configured to do so */
 	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
 		skb->ip_summed = CHECKSUM_NONE;
-	else if (!(skb->dst->dev->features & NETIF_F_V4_CSUM)) {
+	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		csum = skb_checksum(skb, 0, udp_len, 0);
 		uh->check = csum_tcpudp_magic(inet->saddr, inet->daddr,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 2994aa1ed46..74c49d9a8db 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2937,8 +2937,8 @@ int qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 	if (card->info.type == QETH_CARD_TYPE_OSN)
 		return cast_type;
 
-	if (skb->dst && skb->dst->neighbour) {
-		cast_type = skb->dst->neighbour->type;
+	if (skb_dst(skb) && skb_dst(skb)->neighbour) {
+		cast_type = skb_dst(skb)->neighbour->type;
 		if ((cast_type == RTN_BROADCAST) ||
 		    (cast_type == RTN_MULTICAST) ||
 		    (cast_type == RTN_ANYCAST))
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index cb64b0b534a..6f2386e9d6e 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2549,9 +2549,9 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		/* IPv4 */
 		hdr->hdr.l3.flags = qeth_l3_get_qeth_hdr_flags4(cast_type);
 		memset(hdr->hdr.l3.dest_addr, 0, 12);
-		if ((skb->dst) && (skb->dst->neighbour)) {
+		if ((skb_dst(skb)) && (skb_dst(skb)->neighbour)) {
 			*((u32 *) (&hdr->hdr.l3.dest_addr[12])) =
-			    *((u32 *) skb->dst->neighbour->primary_key);
+			    *((u32 *) skb_dst(skb)->neighbour->primary_key);
 		} else {
 			/* fill in destination address used in ip header */
 			*((u32 *) (&hdr->hdr.l3.dest_addr[12])) =
@@ -2562,9 +2562,9 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		hdr->hdr.l3.flags = qeth_l3_get_qeth_hdr_flags6(cast_type);
 		if (card->info.type == QETH_CARD_TYPE_IQD)
 			hdr->hdr.l3.flags &= ~QETH_HDR_PASSTHRU;
-		if ((skb->dst) && (skb->dst->neighbour)) {
+		if ((skb_dst(skb)) && (skb_dst(skb)->neighbour)) {
 			memcpy(hdr->hdr.l3.dest_addr,
-			       skb->dst->neighbour->primary_key, 16);
+			       skb_dst(skb)->neighbour->primary_key, 16);
 		} else {
 			/* fill in destination address used in ip header */
 			memcpy(hdr->hdr.l3.dest_addr,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a3ae3c52583..9ef6eb20247 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -323,7 +323,6 @@ struct sk_buff {
 	struct net_device	*dev;
 
 	union {
-		struct  dst_entry	*dst;
 		unsigned long		_skb_dst;
 	};
 #ifdef CONFIG_XFRM
@@ -426,9 +425,19 @@ extern void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
 			  enum dma_data_direction dir);
 #endif
 
+static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
+{
+	return (struct dst_entry *)skb->_skb_dst;
+}
+
+static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
+{
+	skb->_skb_dst = (unsigned long)dst;
+}
+
 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 {
-	return (struct rtable *)skb->_skb_dst;
+	return (struct rtable *)skb_dst(skb);
 }
 
 extern void kfree_skb(struct sk_buff *skb);
diff --git a/include/net/dst.h b/include/net/dst.h
index 6be3b082a07..7fc409c19b3 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -195,6 +195,12 @@ struct dst_entry * dst_clone(struct dst_entry * dst)
 }
 
 extern void dst_release(struct dst_entry *dst);
+static inline void skb_dst_drop(struct sk_buff *skb)
+{
+	if (skb->_skb_dst)
+		dst_release(skb_dst(skb));
+	skb->_skb_dst = 0UL;
+}
 
 /* Children define the path of the packet through the
  * Linux networking.  Thus, destinations are stackable.
@@ -246,7 +252,7 @@ static inline void dst_negative_advice(struct dst_entry **dst_p)
 
 static inline void dst_link_failure(struct sk_buff *skb)
 {
-	struct dst_entry * dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	if (dst && dst->ops && dst->ops->link_failure)
 		dst->ops->link_failure(skb);
 }
@@ -265,13 +271,13 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
 /* Output packet to network from transport.  */
 static inline int dst_output(struct sk_buff *skb)
 {
-	return skb->dst->output(skb);
+	return skb_dst(skb)->output(skb);
 }
 
 /* Input packet from network to transport.  */
 static inline int dst_input(struct sk_buff *skb)
 {
-	return skb->dst->input(skb);
+	return skb_dst(skb)->input(skb);
 }
 
 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index f74665d7bea..22c73a77cd9 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -100,7 +100,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 
 	if (unlikely(sk = skb_steal_sock(skb)))
 		return sk;
-	else return __inet6_lookup(dev_net(skb->dst->dev), hashinfo,
+	else return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
 				   &ipv6_hdr(skb)->saddr, sport,
 				   &ipv6_hdr(skb)->daddr, ntohs(dport),
 				   inet6_iif(skb));
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a44e2248b2e..d522dcf3031 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -385,7 +385,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 	if (unlikely(sk = skb_steal_sock(skb)))
 		return sk;
 	else
-		return __inet_lookup(dev_net(skb->dst->dev), hashinfo,
+		return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
 				     iph->saddr, sport,
 				     iph->daddr, dport, inet_iif(skb));
 }
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 5f53db7e4e5..0e1b8aebaff 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -142,7 +142,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
 
 static inline int ipv6_unicast_destination(struct sk_buff *skb)
 {
-	struct rt6_info *rt = (struct rt6_info *) skb->dst;
+	struct rt6_info *rt = (struct rt6_info *) skb_dst(skb);
 
 	return rt->rt6i_flags & RTF_LOCAL;
 }
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2e9f5c0018a..736bca45088 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -994,7 +994,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
 		return __xfrm_policy_check(sk, ndir, skb, family);
 
 	return	(!net->xfrm.policy_count[dir] && !skb->sp) ||
-		(skb->dst->flags & DST_NOPOLICY) ||
+		(skb_dst(skb)->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, ndir, skb, family);
 }
 
@@ -1048,7 +1048,7 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 	struct net *net = dev_net(skb->dev);
 
 	return	!net->xfrm.policy_count[XFRM_POLICY_OUT] ||
-		(skb->dst->flags & DST_NOXFRM) ||
+		(skb_dst(skb)->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
 
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index bfa8fa9894f..2912665fc58 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -228,7 +228,7 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct br2684_dev *brdev = BRPRIV(dev);
 	struct br2684_vcc *brvcc;
 
-	pr_debug("br2684_start_xmit, skb->dst=%p\n", skb->dst);
+	pr_debug("br2684_start_xmit, skb_dst(skb)=%p\n", skb_dst(skb));
 	read_lock(&devs_lock);
 	brvcc = pick_outgoing_vcc(skb, brdev);
 	if (brvcc == NULL) {
diff --git a/net/atm/clip.c b/net/atm/clip.c
index fb7623c080f..e65a3b1477f 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -369,16 +369,16 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned long flags;
 
 	pr_debug("clip_start_xmit (skb %p)\n", skb);
-	if (!skb->dst) {
-		printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n");
+	if (!skb_dst(skb)) {
+		printk(KERN_ERR "clip_start_xmit: skb_dst(skb) == NULL\n");
 		dev_kfree_skb(skb);
 		dev->stats.tx_dropped++;
 		return 0;
 	}
-	if (!skb->dst->neighbour) {
+	if (!skb_dst(skb)->neighbour) {
 #if 0
-		skb->dst->neighbour = clip_find_neighbour(skb->dst, 1);
-		if (!skb->dst->neighbour) {
+		skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1);
+		if (!skb_dst(skb)->neighbour) {
 			dev_kfree_skb(skb);	/* lost that one */
 			dev->stats.tx_dropped++;
 			return 0;
@@ -389,7 +389,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		dev->stats.tx_dropped++;
 		return 0;
 	}
-	entry = NEIGH2ENTRY(skb->dst->neighbour);
+	entry = NEIGH2ENTRY(skb_dst(skb)->neighbour);
 	if (!entry->vccs) {
 		if (time_after(jiffies, entry->expires)) {
 			/* should be resolved */
@@ -406,7 +406,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	pr_debug("neigh %p, vccs %p\n", entry, entry->vccs);
 	ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc;
-	pr_debug("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc);
+	pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc);
 	if (entry->vccs->encap) {
 		void *here;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index e0ceb66a9ec..d22f611e400 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -242,7 +242,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		return 0;
 	}
 	dst_hold(&rt->u.dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
@@ -322,7 +322,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 
 	skb->dev = bridge_parent(skb->dev);
 	if (skb->dev) {
-		struct dst_entry *dst = skb->dst;
+		struct dst_entry *dst = skb_dst(skb);
 
 		nf_bridge_pull_encap_header(skb);
 
@@ -375,7 +375,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 				/* - Bridged-and-DNAT'ed traffic doesn't
 				 *   require ip_forwarding. */
 				if (((struct dst_entry *)rt)->dev == dev) {
-					skb->dst = (struct dst_entry *)rt;
+					skb_dst_set(skb, (struct dst_entry *)rt);
 					goto bridged_dnat;
 				}
 				/* we are sure that forwarding is disabled, so printing
@@ -389,7 +389,7 @@ free_skb:
 			kfree_skb(skb);
 			return 0;
 		} else {
-			if (skb->dst->dev == dev) {
+			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
 				/* Tell br_nf_local_out this is a
 				 * bridged frame */
@@ -412,7 +412,7 @@ bridged_dnat:
 			return 0;
 		}
 		dst_hold(&rt->u.dst);
-		skb->dst = &rt->u.dst;
+		skb_dst_set(skb, &rt->u.dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
@@ -633,10 +633,8 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
 {
 	struct rtable *rt = skb_rtable(skb);
 
-	if (rt && rt == bridge_parent_rtable(in)) {
-		dst_release(&rt->u.dst);
-		skb->dst = NULL;
-	}
+	if (rt && rt == bridge_parent_rtable(in))
+		skb_dst_drop(skb);
 
 	return NF_ACCEPT;
 }
@@ -851,7 +849,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 		return NF_ACCEPT;
 
 #ifdef CONFIG_NETFILTER_DEBUG
-	if (skb->dst == NULL) {
+	if (skb_dst(skb) == NULL) {
 		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
 		goto print_error;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index e2fcc5f1017..34b49a6a22f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1693,10 +1693,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
 		 */
-		if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) {
-			dst_release(skb->dst);
-			skb->dst = NULL;
-		}
+		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+			skb_dst_drop(skb);
+
 		rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == 0)
 			txq_trans_update(txq);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a1cbce7fdae..c54229befcf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1088,8 +1088,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 			struct neighbour *n1 = neigh;
 			write_unlock_bh(&neigh->lock);
 			/* On shaper/eql skb->dst->neighbour != neigh :( */
-			if (skb->dst && skb->dst->neighbour)
-				n1 = skb->dst->neighbour;
+			if (skb_dst(skb) && skb_dst(skb)->neighbour)
+				n1 = skb_dst(skb)->neighbour;
 			n1->output(skb);
 			write_lock_bh(&neigh->lock);
 		}
@@ -1182,7 +1182,7 @@ EXPORT_SYMBOL(neigh_compat_output);
 
 int neigh_resolve_output(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh;
 	int rc = 0;
 
@@ -1229,7 +1229,7 @@ EXPORT_SYMBOL(neigh_resolve_output);
 int neigh_connected_output(struct sk_buff *skb)
 {
 	int err;
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
 
@@ -1298,8 +1298,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 		if (time_before(tbl->proxy_timer.expires, sched_next))
 			sched_next = tbl->proxy_timer.expires;
 	}
-	dst_release(skb->dst);
-	skb->dst = NULL;
+	skb_dst_drop(skb);
 	dev_hold(skb->dev);
 	__skb_queue_tail(&tbl->proxy_queue, skb);
 	mod_timer(&tbl->proxy_timer, sched_next);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8e815e685f2..6adf19ec95c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -381,7 +381,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 
 static void skb_release_head_state(struct sk_buff *skb)
 {
-	dst_release(skb->dst);
+	skb_dst_drop(skb);
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
 #endif
@@ -521,7 +521,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
-	new->dst		= dst_clone(old->dst);
+	skb_dst_set(new, dst_clone(skb_dst(old)));
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2cf48ba0dbb..a0a36c9e6cc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -507,7 +507,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	const struct iphdr *rxiph;
 	struct sk_buff *skb;
 	struct dst_entry *dst;
-	struct net *net = dev_net(rxskb->dst->dev);
+	struct net *net = dev_net(skb_dst(rxskb)->dev);
 	struct sock *ctl_sk = net->dccp.v4_ctl_sk;
 
 	/* Never send a reset in response to a reset. */
@@ -528,7 +528,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	rxiph = ip_hdr(rxskb);
 	dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
 								 rxiph->daddr);
-	skb->dst = dst_clone(dst);
+	skb_dst_set(skb, dst_clone(dst));
 
 	bh_lock_sock(ctl_sk);
 	err = ip_build_and_send_pkt(skb, ctl_sk,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b963f35c65f..05ea7440d9e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -314,8 +314,9 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	struct ipv6hdr *rxip6h;
 	struct sk_buff *skb;
 	struct flowi fl;
-	struct net *net = dev_net(rxskb->dst->dev);
+	struct net *net = dev_net(skb_dst(rxskb)->dev);
 	struct sock *ctl_sk = net->dccp.v6_ctl_sk;
+	struct dst_entry *dst;
 
 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
 		return;
@@ -342,8 +343,9 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	security_skb_classify_flow(rxskb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
-	if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) {
-		if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) {
+	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
+		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
+			skb_dst_set(skb, dst);
 			ip6_xmit(ctl_sk, skb, &fl, NULL, 0);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 36bcc00654d..c0e88c16d08 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -350,7 +350,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	/* Reserve space for headers. */
 	skb_reserve(skb, sk->sk_prot->max_header);
 
-	skb->dst = dst_clone(dst);
+	skb_dst_set(skb, dst_clone(dst));
 
 	dreq = dccp_rsk(req);
 	if (inet_rsk(req)->acked)	/* increase ISS upon retransmission */
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index bccb3887773..a5e3a593e47 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1075,6 +1075,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 	int err = 0;
 	unsigned char type;
 	long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+	struct dst_entry *dst;
 
 	lock_sock(sk);
 
@@ -1102,8 +1103,9 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 	}
 	release_sock(sk);
 
-	dst_release(xchg(&newsk->sk_dst_cache, skb->dst));
-	skb->dst = NULL;
+	dst = skb_dst(skb);
+	dst_release(xchg(&newsk->sk_dst_cache, dst));
+	skb_dst_set(skb, NULL);
 
 	DN_SK(newsk)->state        = DN_CR;
 	DN_SK(newsk)->addrrem      = cb->src_port;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 05b5aa05e50..923786bd6d0 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -204,7 +204,7 @@ static void dn_short_error_report(struct neighbour *neigh, struct sk_buff *skb)
 
 static int dn_neigh_output_packet(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
@@ -224,7 +224,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb)
 
 static int dn_long_output(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
 	int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
@@ -270,7 +270,7 @@ static int dn_long_output(struct sk_buff *skb)
 
 static int dn_short_output(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
 	int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
@@ -313,7 +313,7 @@ static int dn_short_output(struct sk_buff *skb)
  */
 static int dn_phase3_output(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
 	int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index da04f459337..a65e929ce76 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -85,7 +85,7 @@ static void dn_nsp_send(struct sk_buff *skb)
 	dst = sk_dst_check(sk, 0);
 	if (dst) {
 try_again:
-		skb->dst = dst;
+		skb_dst_set(skb, dst);
 		dst_output(skb);
 		return;
 	}
@@ -582,7 +582,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
 	 * to be able to send disc packets out which have no socket
 	 * associations.
 	 */
-	skb->dst = dst_clone(dst);
+	skb_dst_set(skb, dst_clone(dst));
 	dst_output(skb);
 }
 
@@ -611,7 +611,7 @@ void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
 	int ddl = 0;
 	gfp_t gfp = GFP_ATOMIC;
 
-	dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb->dst, ddl,
+	dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl,
 			NULL, cb->src_port, cb->dst_port);
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 0cc4394117d..1d6ca8a98dc 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -678,7 +678,7 @@ out:
 
 static int dn_output(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
 	struct net_device *dev = dst->dev;
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -717,7 +717,7 @@ error:
 static int dn_forward(struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct dn_dev *dn_db = dst->dev->dn_ptr;
 	struct dn_route *rt;
 	struct neighbour *neigh = dst->neighbour;
@@ -730,7 +730,7 @@ static int dn_forward(struct sk_buff *skb)
 		goto drop;
 
 	/* Ensure that we have enough space for headers */
-	rt = (struct dn_route *)skb->dst;
+	rt = (struct dn_route *)skb_dst(skb);
 	header_len = dn_db->use_long ? 21 : 6;
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len))
 		goto drop;
@@ -1392,7 +1392,8 @@ make_route:
 		goto e_neighbour;
 
 	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
-	dn_insert_route(rt, hash, (struct dn_route **)&skb->dst);
+	dn_insert_route(rt, hash, &rt);
+	skb_dst_set(skb, &rt->u.dst);
 
 done:
 	if (neigh)
@@ -1424,7 +1425,7 @@ static int dn_route_input(struct sk_buff *skb)
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	unsigned hash = dn_hash(cb->src, cb->dst);
 
-	if (skb->dst)
+	if (skb_dst(skb))
 		return 0;
 
 	rcu_read_lock();
@@ -1437,7 +1438,7 @@ static int dn_route_input(struct sk_buff *skb)
 		    (rt->fl.iif == cb->iif)) {
 			dst_use(&rt->u.dst, jiffies);
 			rcu_read_unlock();
-			skb->dst = (struct dst_entry *)rt;
+			skb_dst_set(skb, (struct dst_entry *)rt);
 			return 0;
 		}
 	}
@@ -1449,7 +1450,7 @@ static int dn_route_input(struct sk_buff *skb)
 static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 			   int event, int nowait, unsigned int flags)
 {
-	struct dn_route *rt = (struct dn_route *)skb->dst;
+	struct dn_route *rt = (struct dn_route *)skb_dst(skb);
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
@@ -1554,7 +1555,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		err = dn_route_input(skb);
 		local_bh_enable();
 		memset(cb, 0, sizeof(struct dn_skb_cb));
-		rt = (struct dn_route *)skb->dst;
+		rt = (struct dn_route *)skb_dst(skb);
 		if (!err && -rt->u.dst.error)
 			err = rt->u.dst.error;
 	} else {
@@ -1570,7 +1571,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	skb->dev = NULL;
 	if (err)
 		goto out_free;
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -1622,15 +1623,15 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			rt = rcu_dereference(rt->u.dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
-			skb->dst = dst_clone(&rt->u.dst);
+			skb_dst_set(skb, dst_clone(&rt->u.dst));
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					1, NLM_F_MULTI) <= 0) {
-				dst_release(xchg(&skb->dst, NULL));
+				skb_dst_drop(skb);
 				rcu_read_unlock_bh();
 				goto done;
 			}
-			dst_release(xchg(&skb->dst, NULL));
+			skb_dst_drop(skb);
 		}
 		rcu_read_unlock_bh();
 	}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 816494f271a..8a3881e28ac 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -468,7 +468,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 	__be32 paddr;
 	struct neighbour *n;
 
-	if (!skb->dst) {
+	if (!skb_dst(skb)) {
 		printk(KERN_DEBUG "arp_find is called with dst==NULL\n");
 		kfree_skb(skb);
 		return 1;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 94f75efae93..97c410e8438 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -591,13 +591,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 				goto relookup_failed;
 
 			/* Ugh! */
-			odst = skb_in->dst;
+			odst = skb_dst(skb_in);
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
 					     RT_TOS(tos), rt2->u.dst.dev);
 
 			dst_release(&rt2->u.dst);
 			rt2 = skb_rtable(skb_in);
-			skb_in->dst = odst;
+			skb_dst_set(skb_in, odst);
 		}
 
 		if (err)
@@ -659,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	u32 info = 0;
 	struct net *net;
 
-	net = dev_net(skb->dst->dev);
+	net = dev_net(skb_dst(skb)->dev);
 
 	/*
 	 *	Incomplete header ?
@@ -822,7 +822,7 @@ static void icmp_echo(struct sk_buff *skb)
 {
 	struct net *net;
 
-	net = dev_net(skb->dst->dev);
+	net = dev_net(skb_dst(skb)->dev);
 	if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
 		struct icmp_bxm icmp_param;
 
@@ -873,7 +873,7 @@ static void icmp_timestamp(struct sk_buff *skb)
 out:
 	return;
 out_err:
-	ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS);
+	ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
 	goto out;
 }
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index afabd2758b6..01b4284ed69 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -311,7 +311,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 		return NULL;
 	}
 
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 	skb->dev = dev;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -659,7 +659,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 0761cd9bbd1..a2991bc8e32 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
@@ -123,7 +123,7 @@ sr_failed:
 
 too_many_hops:
 	/* Tell the sender its packet died... */
-	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS);
+	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS);
 	icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
 drop:
 	kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7985346653b..1f1b82475ea 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -573,7 +573,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	struct ipq *qp;
 	struct net *net;
 
-	net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
+	net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
 
 	/* Start by cleaning up the memory. */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 85ddad45a91..44e2a3d2359 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -643,8 +643,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		stats->rx_packets++;
 		stats->rx_bytes += len;
 		skb->dev = tunnel->dev;
-		dst_release(skb->dst);
-		skb->dst = NULL;
+		skb_dst_drop(skb);
 		nf_reset(skb);
 
 		skb_reset_network_header(skb);
@@ -698,7 +697,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((dst = tiph->daddr) == 0) {
 		/* NBMA tunnel */
 
-		if (skb->dst == NULL) {
+		if (skb_dst(skb) == NULL) {
 			stats->tx_fifo_errors++;
 			goto tx_error;
 		}
@@ -712,7 +711,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct in6_addr *addr6;
 			int addr_type;
-			struct neighbour *neigh = skb->dst->neighbour;
+			struct neighbour *neigh = skb_dst(skb)->neighbour;
 
 			if (neigh == NULL)
 				goto tx_error;
@@ -766,10 +765,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (df)
 		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
 	else
-		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
@@ -783,14 +782,14 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 #ifdef CONFIG_IPV6
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
+		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
-		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
+		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
 			if ((tunnel->parms.iph.daddr &&
 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 			    rt6->rt6i_dst.plen == 128) {
 				rt6->rt6i_flags |= RTF_MODIFIED;
-				skb->dst->metrics[RTAX_MTU-1] = mtu;
+				skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
 			}
 		}
 
@@ -837,8 +836,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/*
 	 *	Push down and install the IPIP header.
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index cea784b0aa4..490ce20faf3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -329,7 +329,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 	 *	Initialise the virtual path cache for the packet. It describes
 	 *	how the packet travels inside Linux networking.
 	 */
-	if (skb->dst == NULL) {
+	if (skb_dst(skb) == NULL) {
 		int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
 					 skb->dev);
 		if (unlikely(err)) {
@@ -344,9 +344,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
 	}
 
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (unlikely(skb->dst->tclassid)) {
+	if (unlikely(skb_dst(skb)->tclassid)) {
 		struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
-		u32 idx = skb->dst->tclassid;
+		u32 idx = skb_dst(skb)->tclassid;
 		st[idx&0xFF].o_packets++;
 		st[idx&0xFF].o_bytes += skb->len;
 		st[(idx>>16)&0xFF].i_packets++;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 7e1074ffdbd..94bf105ef3c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -143,7 +143,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 						__be32 addr;
 
 						memcpy(&addr, sptr+soffset-1, 4);
-						if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) {
+						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
@@ -624,12 +624,12 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 		memcpy(&nexthop, &optptr[srrptr-1], 4);
 
 		rt = skb_rtable(skb);
-		skb->dst = NULL;
+		skb_dst_set(skb, NULL);
 		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
 			ip_rt_put(rt2);
-			skb->dst = &rt->u.dst;
+			skb_dst_set(skb, &rt->u.dst);
 			return -EINVAL;
 		}
 		ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8d845ebfcca..3d6167fb2d9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,7 +95,7 @@ int __ip_local_out(struct sk_buff *skb)
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
-	return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
+	return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
 		       dst_output);
 }
 
@@ -118,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
-	WARN_ON(!newskb->dst);
+	WARN_ON(!skb_dst(newskb));
 	netif_rx(newskb);
 	return 0;
 }
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct rtable *rt = (struct rtable *)dst;
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
@@ -217,14 +217,14 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb)
 	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
 
 	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
-	       skb->dst->dev->mtu : dst_mtu(skb->dst);
+	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
 }
 
 static int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
-	if (skb->dst->xfrm != NULL) {
+	if (skb_dst(skb)->xfrm != NULL) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
 		return dst_output(skb);
 	}
@@ -296,7 +296,7 @@ int ip_mc_output(struct sk_buff *skb)
 
 int ip_output(struct sk_buff *skb)
 {
-	struct net_device *dev = skb->dst->dev;
+	struct net_device *dev = skb_dst(skb)->dev;
 
 	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
 
@@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 		}
 		sk_setup_caps(sk, &rt->u.dst);
 	}
-	skb->dst = dst_clone(&rt->u.dst);
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -401,8 +401,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
-	dst_release(to->dst);
-	to->dst = dst_clone(from->dst);
+	skb_dst_drop(to);
+	skb_dst_set(to, dst_clone(skb_dst(from)));
 	to->dev = from->dev;
 	to->mark = from->mark;
 
@@ -1294,7 +1294,7 @@ int ip_push_pending_frames(struct sock *sk)
 	 * on dst refcount
 	 */
 	inet->cork.dst = NULL;
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
 		icmp_out_count(net, ((struct icmphdr *)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 0c6e7bf18a4..93e2b787da2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -370,8 +370,7 @@ static int ipip_rcv(struct sk_buff *skb)
 		tunnel->dev->stats.rx_packets++;
 		tunnel->dev->stats.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
-		dst_release(skb->dst);
-		skb->dst = NULL;
+		skb_dst_drop(skb);
 		nf_reset(skb);
 		ipip_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
@@ -447,15 +446,15 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tiph->frag_off)
 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 	else
-		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
 	if (mtu < 68) {
 		stats->collisions++;
 		ip_rt_put(rt);
 		goto tx_error;
 	}
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	df |= (old_iph->frag_off&htons(IP_DF));
 
@@ -502,8 +501,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/*
 	 *	Push down and install the IPIP header.
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 69dd058283e..ffd98610446 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -651,7 +651,7 @@ static int ipmr_cache_report(struct net *net,
 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
 	msg = (struct igmpmsg *)skb_network_header(skb);
 	msg->im_vif = vifi;
-	skb->dst = dst_clone(pkt->dst);
+	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 
 	/*
 	 *	Add our header
@@ -1201,7 +1201,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	iph->protocol	=	IPPROTO_IPIP;
 	iph->ihl	=	5;
 	iph->tot_len	=	htons(skb->len);
-	ip_select_ident(iph, skb->dst, NULL);
+	ip_select_ident(iph, skb_dst(skb), NULL);
 	ip_send_check(iph);
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1212,7 +1212,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
@@ -1290,8 +1290,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	vif->pkt_out++;
 	vif->bytes_out += skb->len;
 
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
@@ -1543,8 +1543,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
-	dst_release(skb->dst);
-	skb->dst = NULL;
+	skb_dst_drop(skb);
 	reg_dev->stats.rx_bytes += skb->len;
 	reg_dev->stats.rx_packets++;
 	nf_reset(skb);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index fdf6811c31a..1725dc0ef68 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,7 +12,7 @@
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 {
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
@@ -41,8 +41,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 			return -1;
 
 		/* Drop old route. */
-		dst_release(skb->dst);
-		skb->dst = &rt->u.dst;
+		skb_dst_drop(skb);
+		skb_dst_set(skb, &rt->u.dst);
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -50,7 +50,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
-		odst = skb->dst;
+		odst = skb_dst(skb);
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
 				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
@@ -60,18 +60,22 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		dst_release(odst);
 	}
 
-	if (skb->dst->error)
+	if (skb_dst(skb)->error)
 		return -1;
 
 #ifdef CONFIG_XFRM
 	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-	    xfrm_decode_session(skb, &fl, AF_INET) == 0)
-		if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0))
+	    xfrm_decode_session(skb, &fl, AF_INET) == 0) {
+		struct dst_entry *dst = skb_dst(skb);
+		skb_dst_set(skb, NULL);
+		if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
 			return -1;
+		skb_dst_set(skb, dst);
+	}
 #endif
 
 	/* Change in oif may mean change in hh_len. */
-	hh_len = skb->dst->dev->hard_header_len;
+	hh_len = skb_dst(skb)->dev->hard_header_len;
 	if (skb_headroom(skb) < hh_len &&
 	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
 		return -1;
@@ -92,7 +96,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
 	if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
 		return -1;
 
-	dst = skb->dst;
+	dst = skb_dst(skb);
 	if (dst->xfrm)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
@@ -100,11 +104,11 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
 	if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0)
 		return -1;
 
-	dst_release(skb->dst);
-	skb->dst = dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
 
 	/* Change in oif may mean change in hh_len. */
-	hh_len = skb->dst->dev->hard_header_len;
+	hh_len = skb_dst(skb)->dev->hard_header_len;
 	if (skb_headroom(skb) < hh_len &&
 	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
 		return -1;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 0b4b6e0ff2b..c93ae44bff2 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -108,17 +108,16 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		addr_type = RTN_LOCAL;
 
 	/* ip_route_me_harder expects skb->dst to be set */
-	dst_hold(oldskb->dst);
-	nskb->dst = oldskb->dst;
+	skb_dst_set(nskb, dst_clone(skb_dst(oldskb)));
 
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
-	niph->ttl	= dst_metric(nskb->dst, RTAX_HOPLIMIT);
+	niph->ttl	= dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
 	nskb->ip_summed = CHECKSUM_NONE;
 
 	/* "Never happens" */
-	if (nskb->len > dst_mtu(nskb->dst))
+	if (nskb->len > dst_mtu(skb_dst(nskb)))
 		goto free_nskb;
 
 	nf_ct_attach(nskb, oldskb);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index b7dd695691a..5567bd0d075 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -167,10 +167,9 @@ nf_nat_in(unsigned int hooknum,
 
 	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != ip_hdr(skb)->daddr) {
-		dst_release(skb->dst);
-		skb->dst = NULL;
-	}
+	    daddr != ip_hdr(skb)->daddr)
+		skb_dst_drop(skb);
+
 	return ret;
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f774651f0a4..3dc9171a272 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -343,7 +343,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb->dst = dst_clone(&rt->u.dst);
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
 
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f20060ac2f0..a849bb15d86 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1118,7 +1118,7 @@ restart:
 			if (rp)
 				*rp = rth;
 			else
-				skb->dst = &rth->u.dst;
+				skb_dst_set(skb, &rth->u.dst);
 			return 0;
 		}
 
@@ -1217,7 +1217,7 @@ restart:
 	if (rp)
 		*rp = rt;
 	else
-		skb->dst = &rt->u.dst;
+		skb_dst_set(skb, &rt->u.dst);
 	return 0;
 }
 
@@ -2251,7 +2251,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			dst_use(&rth->u.dst, jiffies);
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
-			skb->dst = &rth->u.dst;
+			skb_dst_set(skb, &rth->u.dst);
 			return 0;
 		}
 		RT_CACHE_STAT_INC(in_hlist_search);
@@ -2934,7 +2934,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err)
 		goto errout_free;
 
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -2975,15 +2975,15 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb->dst = dst_clone(&rt->u.dst);
+			skb_dst_set(skb, dst_clone(&rt->u.dst));
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
-				dst_release(xchg(&skb->dst, NULL));
+				skb_dst_drop(skb);
 				rcu_read_unlock_bh();
 				goto done;
 			}
-			dst_release(xchg(&skb->dst, NULL));
+			skb_dst_drop(skb);
 		}
 		rcu_read_unlock_bh();
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 319c8852644..5a1ca2698c8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -590,7 +590,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 
-	net = dev_net(skb->dst->dev);
+	net = dev_net(skb_dst(skb)->dev);
 	ip_send_reply(net->ipv4.tcp_sock, skb,
 		      &arg, arg.iov[0].iov_len);
 
@@ -617,7 +617,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			];
 	} rep;
 	struct ip_reply_arg arg;
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 79c39dc9b01..416fc4c2e7e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2202,7 +2202,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	/* Reserve space for headers. */
 	skb_reserve(skb, MAX_TCP_HEADER);
 
-	skb->dst = dst_clone(dst);
+	skb_dst_set(skb, dst_clone(dst));
 
 	mss = dst_metric(dst, RTAX_ADVMSS);
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7a1d1ce22e6..8f4158d7c9a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -328,7 +328,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 	if (unlikely(sk = skb_steal_sock(skb)))
 		return sk;
 	else
-		return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
+		return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
 					 iph->daddr, dport, inet_iif(skb),
 					 udptable);
 }
@@ -1237,7 +1237,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	struct sock *sk;
 	struct udphdr *uh;
 	unsigned short ulen;
-	struct rtable *rt = (struct rtable*)skb->dst;
+	struct rtable *rt = skb_rtable(skb);
 	__be32 saddr, daddr;
 	struct net *net = dev_net(skb->dev);
 
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 4ec2162a437..f9f922a0ba8 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,7 +23,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
-	if (skb->dst == NULL) {
+	if (skb_dst(skb) == NULL) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 7135279f3f8..3444f3b34ec 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -28,7 +28,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
  */
 static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct iphdr *top_iph;
 	int flags;
 
@@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->ihl = 5;
 	top_iph->version = 4;
 
-	top_iph->protocol = xfrm_af2proto(skb->dst->ops->family);
+	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
 
 	/* DS disclosed */
 	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 8c3180adddb..c908bd99bcb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -29,7 +29,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
 		goto out;
 
-	dst = skb->dst;
+	dst = skb_dst(skb);
 	mtu = dst_mtu(dst);
 	if (skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
 static int xfrm4_output_finish(struct sk_buff *skb)
 {
 #ifdef CONFIG_NETFILTER
-	if (!skb->dst->xfrm) {
+	if (!skb_dst(skb)->xfrm) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
 		return dst_output(skb);
 	}
@@ -87,6 +87,6 @@ static int xfrm4_output_finish(struct sk_buff *skb)
 int xfrm4_output(struct sk_buff *skb)
 {
 	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
-			    NULL, skb->dst->dev, xfrm4_output_finish,
+			    NULL, skb_dst(skb)->dev, xfrm4_output_finish,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1c7f400a3cf..4aae658e550 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -277,7 +277,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
-		IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -288,7 +288,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	dstbuf = opt->dst1;
 #endif
 
-	dst = dst_clone(skb->dst);
+	dst = dst_clone(skb_dst(skb));
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
 		dst_release(dst);
 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
@@ -333,7 +333,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
-		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -343,7 +343,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
-		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -358,7 +358,7 @@ looped_back:
 			 * processed by own
 			 */
 			if (!addr) {
-				IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+				IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 						 IPSTATS_MIB_INADDRERRORS);
 				kfree_skb(skb);
 				return -1;
@@ -384,7 +384,7 @@ looped_back:
 			goto unknown_rh;
 		/* Silently discard invalid RTH type 2 */
 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
-			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INHDRERRORS);
 			kfree_skb(skb);
 			return -1;
@@ -403,7 +403,7 @@ looped_back:
 	n = hdr->hdrlen >> 1;
 
 	if (hdr->segments_left > n) {
-		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((&hdr->segments_left) -
@@ -417,7 +417,7 @@ looped_back:
 	if (skb_cloned(skb)) {
 		/* the copy is a forwarded packet */
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_OUTDISCARDS);
 			kfree_skb(skb);
 			return -1;
@@ -440,13 +440,13 @@ looped_back:
 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
 				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 				     IPPROTO_ROUTING) < 0) {
-			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
 			return -1;
 		}
-		if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) {
-			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+		if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INADDRERRORS);
 			kfree_skb(skb);
 			return -1;
@@ -458,7 +458,7 @@ looped_back:
 	}
 
 	if (ipv6_addr_is_multicast(addr)) {
-		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -468,17 +468,17 @@ looped_back:
 	ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
 	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
 
-	dst_release(xchg(&skb->dst, NULL));
+	skb_dst_drop(skb);
 	ip6_route_input(skb);
-	if (skb->dst->error) {
+	if (skb_dst(skb)->error) {
 		skb_push(skb, skb->data - skb_network_header(skb));
 		dst_input(skb);
 		return -1;
 	}
 
-	if (skb->dst->dev->flags&IFF_LOOPBACK) {
+	if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
 		if (ipv6_hdr(skb)->hop_limit <= 1) {
-			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 				    0, skb->dev);
@@ -494,7 +494,7 @@ looped_back:
 	return -1;
 
 unknown_rh:
-	IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 			  (&hdr->type) - skb_network_header(skb));
 	return -1;
@@ -552,11 +552,11 @@ void ipv6_exthdrs_exit(void)
  **********************************/
 
 /*
- * Note: we cannot rely on skb->dst before we assign it in ip6_route_input().
+ * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
  */
 static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
 {
-	return skb->dst ? ip6_dst_idev(skb->dst) : __in6_dev_get(skb->dev);
+	return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev);
 }
 
 /* Router Alert as of RFC 2711 */
@@ -581,7 +581,7 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 {
 	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 3c3732d50c1..cc4797dd832 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -228,7 +228,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 		__inet6_csk_dst_store(sk, dst, NULL, NULL);
 	}
 
-	skb->dst = dst_clone(dst);
+	skb_dst_set(skb, dst_clone(dst));
 
 	/* Restore final destination back after routing done */
 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index bc1a920c34a..c3a07d75b5f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -48,7 +48,7 @@
 
 inline int ip6_rcv_finish( struct sk_buff *skb)
 {
-	if (skb->dst == NULL)
+	if (skb_dst(skb) == NULL)
 		ip6_route_input(skb);
 
 	return dst_input(skb);
@@ -91,7 +91,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	 * arrived via the sending interface (ethX), because of the
 	 * nature of scoping architecture. --yoshfuji
 	 */
-	IP6CB(skb)->iif = skb->dst ? ip6_dst_idev(skb->dst)->dev->ifindex : dev->ifindex;
+	IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
 
 	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
 		goto err;
@@ -161,7 +161,7 @@ static int ip6_input_finish(struct sk_buff *skb)
 	int nexthdr, raw;
 	u8 hash;
 	struct inet6_dev *idev;
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	/*
 	 *	Parse extension headers
@@ -169,7 +169,7 @@ static int ip6_input_finish(struct sk_buff *skb)
 
 	rcu_read_lock();
 resubmit:
-	idev = ip6_dst_idev(skb->dst);
+	idev = ip6_dst_idev(skb_dst(skb));
 	if (!pskb_pull(skb, skb_transport_offset(skb)))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
@@ -242,8 +242,8 @@ int ip6_mc_input(struct sk_buff *skb)
 	struct ipv6hdr *hdr;
 	int deliver;
 
-	IP6_UPD_PO_STATS_BH(dev_net(skb->dst->dev),
-			 ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCAST,
+	IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+			 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
 			 skb->len);
 
 	hdr = ipv6_hdr(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 735a2bf4b5f..c8dc8e5a822 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -78,7 +78,7 @@ int __ip6_local_out(struct sk_buff *skb)
 		len = 0;
 	ipv6_hdr(skb)->payload_len = htons(len);
 
-	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
+	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
 		       dst_output);
 }
 
@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(ip6_local_out);
 
 static int ip6_output_finish(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 
 	if (dst->hh)
 		return neigh_hh_output(dst->hh, skb);
@@ -117,7 +117,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
-	WARN_ON(!newskb->dst);
+	WARN_ON(!skb_dst(newskb));
 
 	netif_rx(newskb);
 	return 0;
@@ -126,7 +126,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 
 static int ip6_output2(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
 
 	skb->protocol = htons(ETH_P_IPV6);
@@ -134,7 +134,7 @@ static int ip6_output2(struct sk_buff *skb)
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
-		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
+		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 		    ((mroute6_socket(dev_net(dev)) &&
@@ -172,21 +172,21 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 
 	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
-	       skb->dst->dev->mtu : dst_mtu(skb->dst);
+	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
 }
 
 int ip6_output(struct sk_buff *skb)
 {
-	struct inet6_dev *idev = ip6_dst_idev(skb->dst);
+	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 	if (unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS(dev_net(skb->dst->dev), idev,
+		IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
 			      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 		return 0;
 	}
 
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
-				dst_allfrag(skb->dst))
+				dst_allfrag(skb_dst(skb)))
 		return ip6_fragment(skb, ip6_output2);
 	else
 		return ip6_output2(skb);
@@ -202,7 +202,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	struct net *net = sock_net(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *first_hop = &fl->fl6_dst;
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr;
 	u8  proto = fl->proto;
 	int seg_len = skb->len;
@@ -222,7 +222,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 		if (skb_headroom(skb) < head_room) {
 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 			if (skb2 == NULL) {
-				IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 					      IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return -ENOBUFS;
@@ -276,7 +276,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 
 	mtu = dst_mtu(dst);
 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
-		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb->dst),
+		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 				dst_output);
@@ -286,7 +286,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-	IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
 }
@@ -416,7 +416,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
 
 int ip6_forward(struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct net *net = dev_net(dst->dev);
@@ -485,7 +485,7 @@ int ip6_forward(struct sk_buff *skb)
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
-	dst = skb->dst;
+	dst = skb_dst(skb);
 
 	/* IPv6 specs say nothing about it, but it is clear that we cannot
 	   send redirects to source routed frames.
@@ -566,8 +566,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->pkt_type = from->pkt_type;
 	to->priority = from->priority;
 	to->protocol = from->protocol;
-	dst_release(to->dst);
-	to->dst = dst_clone(from->dst);
+	skb_dst_drop(to);
+	skb_dst_set(to, dst_clone(skb_dst(from)));
 	to->dev = from->dev;
 	to->mark = from->mark;
 
@@ -624,7 +624,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
-	struct rt6_info *rt = (struct rt6_info*)skb->dst;
+	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 	struct ipv6hdr *tmp_hdr;
 	struct frag_hdr *fh;
@@ -632,7 +632,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	__be32 frag_id = 0;
 	int ptr, offset = 0, err=0;
 	u8 *prevhdr, nexthdr = 0;
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
@@ -644,9 +644,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 * check should be redundant, but it's free.)
 	 */
 	if (!skb->local_df) {
-		skb->dev = skb->dst->dev;
+		skb->dev = skb_dst(skb)->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
 		return -EMSGSIZE;
@@ -696,7 +696,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		*prevhdr = NEXTHDR_FRAGMENT;
 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
-			IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_FRAGFAILS);
 			return -ENOMEM;
 		}
@@ -809,7 +809,7 @@ slow_path:
 
 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
-			IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_FRAGFAILS);
 			err = -ENOMEM;
 			goto fail;
@@ -873,16 +873,16 @@ slow_path:
 		if (err)
 			goto fail;
 
-		IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_FRAGCREATES);
 	}
-	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 		      IPSTATS_MIB_FRAGOKS);
 	kfree_skb(skb);
 	return err;
 
 fail:
-	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 		      IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return err;
@@ -1516,10 +1516,10 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	skb->dst = dst_clone(&rt->u.dst);
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
-		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
+		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
@@ -1545,8 +1545,8 @@ void ip6_flush_pending_frames(struct sock *sk)
 	struct sk_buff *skb;
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
-		if (skb->dst)
-			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
+		if (skb_dst(skb))
+			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 	}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index af256d47fd3..404d16a97d5 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -532,8 +532,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (!skb2)
 		return 0;
 
-	dst_release(skb2->dst);
-	skb2->dst = NULL;
+	skb_dst_drop(skb2);
+
 	skb_pull(skb2, offset);
 	skb_reset_network_header(skb2);
 	eiph = ip_hdr(skb2);
@@ -560,21 +560,21 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			ip_rt_put(rt);
 			goto out;
 		}
-		skb2->dst = (struct dst_entry *)rt;
+		skb_dst_set(skb2, (struct dst_entry *)rt);
 	} else {
 		ip_rt_put(rt);
 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 				   skb2->dev) ||
-		    skb2->dst->dev->type != ARPHRD_TUNNEL)
+		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
 			goto out;
 	}
 
 	/* change mtu on this route */
 	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
-		if (rel_info > dst_mtu(skb2->dst))
+		if (rel_info > dst_mtu(skb_dst(skb2)))
 			goto out;
 
-		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
 	}
 
 	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -606,8 +606,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (!skb2)
 			return 0;
 
-		dst_release(skb2->dst);
-		skb2->dst = NULL;
+		skb_dst_drop(skb2);
 		skb_pull(skb2, offset);
 		skb_reset_network_header(skb2);
 
@@ -720,8 +719,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		skb->pkt_type = PACKET_HOST;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		skb->dev = t->dev;
-		dst_release(skb->dst);
-		skb->dst = NULL;
+		skb_dst_drop(skb);
 		nf_reset(skb);
 
 		dscp_ecn_decapsulate(t, ipv6h, skb);
@@ -885,8 +883,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 	if (skb->len > mtu) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;
@@ -910,8 +908,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 		kfree_skb(skb);
 		skb = new_skb;
 	}
-	dst_release(skb->dst);
-	skb->dst = dst_clone(dst);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(dst));
 
 	skb->transport_header = skb->network_header;
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 228be551e9c..a35d8fc55b0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -398,10 +398,9 @@ static int pim6_rcv(struct sk_buff *skb)
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
-	dst_release(skb->dst);
+	skb_dst_drop(skb);
 	reg_dev->stats.rx_bytes += skb->len;
 	reg_dev->stats.rx_packets++;
-	skb->dst = NULL;
 	nf_reset(skb);
 	netif_rx(skb);
 	dev_put(reg_dev);
@@ -849,7 +848,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
 
-	skb->dst = dst_clone(pkt->dst);
+	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
@@ -1487,7 +1486,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 
 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
 {
-	IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
+	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
 	return dst_output(skb);
 }
@@ -1532,8 +1531,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 	if (!dst)
 		goto out_free;
 
-	dst_release(skb->dst);
-	skb->dst = dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
 
 	/*
 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
@@ -1722,7 +1721,7 @@ int ip6mr_get_route(struct net *net,
 {
 	int err;
 	struct mfc6_cache *cache;
-	struct rt6_info *rt = (struct rt6_info *)skb->dst;
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 4b48819a5b8..4b264ed40a8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1448,6 +1448,7 @@ static void mld_sendpack(struct sk_buff *skb)
 	struct net *net = dev_net(skb->dev);
 	int err;
 	struct flowi fl;
+	struct dst_entry *dst;
 
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
@@ -1459,9 +1460,9 @@ static void mld_sendpack(struct sk_buff *skb)
 		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
 					     mldlen, 0));
 
-	skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
+	dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
 
-	if (!skb->dst) {
+	if (!dst) {
 		err = -ENOMEM;
 		goto err_out;
 	}
@@ -1470,7 +1471,8 @@ static void mld_sendpack(struct sk_buff *skb)
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
 
-	err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0);
+	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
+	skb_dst_set(skb, dst);
 	if (err)
 		goto err_out;
 
@@ -1775,6 +1777,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 		     IPV6_TLV_ROUTERALERT, 2, 0, 0,
 		     IPV6_TLV_PADN, 0 };
 	struct flowi fl;
+	struct dst_entry *dst;
 
 	if (type == ICMPV6_MGM_REDUCTION)
 		snd_addr = &in6addr_linklocal_allrouters;
@@ -1828,8 +1831,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	idev = in6_dev_get(skb->dev);
 
-	skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
-	if (!skb->dst) {
+	dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
+	if (!dst) {
 		err = -ENOMEM;
 		goto err_out;
 	}
@@ -1838,11 +1841,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 			 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			 skb->dev->ifindex);
 
-	err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0);
+	err = xfrm_lookup(net, &dst, &fl, NULL, 0);
 	if (err)
 		goto err_out;
 
-
+	skb_dst_set(skb, dst);
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 1d13d996498..9eb68e92cc1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -530,7 +530,7 @@ void ndisc_send_skb(struct sk_buff *skb,
 		return;
 	}
 
-	skb->dst = dst;
+	skb_dst_set(skb, dst);
 
 	idev = in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
@@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 					     len, IPPROTO_ICMPV6,
 					     csum_partial(icmph, len, 0));
 
-	buff->dst = dst;
+	skb_dst_set(buff, dst);
 	idev = in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 834cea69fb5..d5ed92b1434 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -12,7 +12,7 @@
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct dst_entry *dst;
 	struct flowi fl = {
@@ -28,9 +28,15 @@ int ip6_route_me_harder(struct sk_buff *skb)
 
 #ifdef CONFIG_XFRM
 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-	    xfrm_decode_session(skb, &fl, AF_INET6) == 0)
-		if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0))
+	    xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
+		struct dst_entry *dst2 = skb_dst(skb);
+
+		if (xfrm_lookup(net, &dst2, &fl, skb->sk, 0)) {
+			skb_dst_set(skb, NULL);
 			return -1;
+		}
+		skb_dst_set(skb, dst2);
+	}
 #endif
 
 	if (dst->error) {
@@ -41,9 +47,9 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	}
 
 	/* Drop old route. */
-	dst_release(skb->dst);
+	skb_dst_drop(skb);
 
-	skb->dst = dst;
+	skb_dst_set(skb, dst);
 	return 0;
 }
 EXPORT_SYMBOL(ip6_route_me_harder);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 5a2d0a41694..5a7f00cd15c 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -112,7 +112,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 		return;
 	}
 
-	nskb->dst = dst;
+	skb_dst_set(nskb, dst);
 
 	skb_reserve(nskb, hh_len + dst->header_len);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e99307fba0b..36a090d87a3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -625,7 +625,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb->dst = dst_clone(&rt->u.dst);
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e9ac7a12f59..54a387d31e1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -267,7 +267,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	struct sk_buff *prev, *next;
 	struct net_device *dev;
 	int offset, end;
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE)
 		goto err;
@@ -277,7 +277,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
-		IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+		IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 				 IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((u8 *)&fhdr->frag_off -
@@ -310,7 +310,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			/* RFC2460 says always send parameter problem in
 			 * this case. -DaveM
 			 */
-			IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst),
+			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 					  offsetof(struct ipv6hdr, payload_len));
@@ -434,7 +434,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	return -1;
 
 err:
-	IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 		      IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
@@ -576,9 +576,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 
-	IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
 	if (hdr->payload_len==0)
@@ -595,17 +595,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		/* It is not a fragmented frame */
 		skb->transport_header += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(net,
-				 ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
+				 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		return 1;
 	}
 
 	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
-		ip6_evictor(net, ip6_dst_idev(skb->dst));
+		ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
 
 	if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
-			  ip6_dst_idev(skb->dst))) != NULL) {
+			  ip6_dst_idev(skb_dst(skb)))) != NULL) {
 		int ret;
 
 		spin_lock(&fq->q.lock);
@@ -617,12 +617,12 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return ret;
 	}
 
-	IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
 	return -1;
 
 fail_hdr:
-	IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
 	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
 	return -1;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 032a5ec391c..658293ea05b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -800,7 +800,7 @@ void ip6_route_input(struct sk_buff *skb)
 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 		flags |= RT6_LOOKUP_F_IFACE;
 
-	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
+	skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
 }
 
 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
@@ -911,7 +911,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
 
-	rt = (struct rt6_info *) skb->dst;
+	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
 		if (rt->rt6i_flags&RTF_CACHE) {
 			dst_set_expires(&rt->u.dst, 0);
@@ -1868,7 +1868,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
 {
 	int type;
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	switch (ipstats_mib_noroutes) {
 	case IPSTATS_MIB_INNOROUTES:
 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
@@ -1895,7 +1895,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
 
 static int ip6_pkt_discard_out(struct sk_buff *skb)
 {
-	skb->dev = skb->dst->dev;
+	skb->dev = skb_dst(skb)->dev;
 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
 }
 
@@ -1908,7 +1908,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
 
 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
 {
-	skb->dev = skb->dst->dev;
+	skb->dev = skb_dst(skb)->dev;
 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
 }
 
@@ -2366,7 +2366,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
-	skb->dst = &rt->u.dst;
+	skb_dst_set(skb, &rt->u.dst);
 
 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b3a59bd40f0..68e52308e55 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -575,8 +575,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 		tunnel->dev->stats.rx_packets++;
 		tunnel->dev->stats.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
-		dst_release(skb->dst);
-		skb->dst = NULL;
+		skb_dst_drop(skb);
 		nf_reset(skb);
 		ipip6_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
@@ -638,8 +637,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (dev->priv_flags & IFF_ISATAP) {
 		struct neighbour *neigh = NULL;
 
-		if (skb->dst)
-			neigh = skb->dst->neighbour;
+		if (skb_dst(skb))
+			neigh = skb_dst(skb)->neighbour;
 
 		if (neigh == NULL) {
 			if (net_ratelimit())
@@ -663,8 +662,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!dst) {
 		struct neighbour *neigh = NULL;
 
-		if (skb->dst)
-			neigh = skb->dst->neighbour;
+		if (skb_dst(skb))
+			neigh = skb_dst(skb)->neighbour;
 
 		if (neigh == NULL) {
 			if (net_ratelimit())
@@ -714,7 +713,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tiph->frag_off)
 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 	else
-		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
 	if (mtu < 68) {
 		stats->collisions++;
@@ -723,8 +722,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
-	if (tunnel->parms.iph.daddr && skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (tunnel->parms.iph.daddr && skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
@@ -768,8 +767,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/*
 	 *	Push down and install the IPIP header.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ea37741062a..53b6a4192b1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -981,9 +981,10 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
+	struct dst_entry *dst;
 	__be32 *topt;
 
 	if (ts)
@@ -1052,8 +1053,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	 * Underlying function will use this to retrieve the network
 	 * namespace
 	 */
-	if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
-		if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) {
+	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
+		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
+			skb_dst_set(buff, dst);
 			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 			if (rst)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8905712cfbb..fc333d85472 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -177,10 +177,9 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	if (unlikely(sk = skb_steal_sock(skb)))
 		return sk;
-	else
-		return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport,
-					 &iph->daddr, dport, inet6_iif(skb),
-					 udptable);
+	return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
+				 &iph->daddr, dport, inet6_iif(skb),
+				 udptable);
 }
 
 /*
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index e20529b4c82..3927832227b 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -31,7 +31,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
  */
 static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *top_iph;
 	int dsfield;
 
@@ -45,7 +45,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
 	       sizeof(top_iph->flow_lbl));
-	top_iph->nexthdr = xfrm_af2proto(skb->dst->ops->family);
+	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
 
 	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
 	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5ee5a031bc9..c4f4eef032a 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(xfrm6_find_1stfragopt);
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 
 	mtu = dst_mtu(dst);
 	if (mtu < IPV6_MIN_MTU)
@@ -90,6 +90,6 @@ static int xfrm6_output_finish(struct sk_buff *skb)
 
 int xfrm6_output(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev,
+	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev,
 		       xfrm6_output_finish);
 }
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 425ab144f15..5874657af7f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -260,8 +260,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_send_check(ip_hdr(skb));
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -324,8 +324,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -388,8 +388,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_put;
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -465,8 +465,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_put;
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -553,8 +553,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
 		goto tx_error;
 	}
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	df |= (old_iph->frag_off & htons(IP_DF));
 
@@ -596,8 +596,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -665,8 +665,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
 		goto tx_error;
 	}
-	if (skb->dst)
-		skb->dst->ops->update_pmtu(skb->dst, mtu);
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
@@ -702,8 +702,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -775,8 +775,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_send_check(ip_hdr(skb));
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -828,8 +828,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* drop old route */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -900,8 +900,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
@@ -975,8 +975,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
-	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->u.dst);
 
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 117b80112fc..a6d6ec320fb 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -176,7 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			     struct nf_conntrack_tuple *tuple)
 {
-	struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev);
+	struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev);
 	const struct gre_hdr_pptp *pgrehdr;
 	struct gre_hdr_pptp _pgrehdr;
 	__be16 srckey;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 4f3b1f80879..eda64c1cb1e 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -73,11 +73,11 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	}
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-		if (dst_mtu(skb->dst) <= minlen) {
+		if (dst_mtu(skb_dst(skb)) <= minlen) {
 			if (net_ratelimit())
 				printk(KERN_ERR "xt_TCPMSS: "
 				       "unknown or invalid path-MTU (%u)\n",
-				       dst_mtu(skb->dst));
+				       dst_mtu(skb_dst(skb)));
 			return -1;
 		}
 		if (in_mtu <= minlen) {
@@ -86,7 +86,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 				       "invalid path-MTU (%u)\n", in_mtu);
 			return -1;
 		}
-		newmss = min(dst_mtu(skb->dst), in_mtu) - minlen;
+		newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
 	} else
 		newmss = info->mss;
 
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 328bd20ddd2..4cbfebda8fa 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -86,7 +86,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 		 unsigned short family)
 {
 	const struct xt_policy_elem *e;
-	const struct dst_entry *dst = skb->dst;
+	const struct dst_entry *dst = skb_dst(skb);
 	int strict = info->flags & XT_POLICY_MATCH_STRICT;
 	int i, pos;
 
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 67419287bc7..484d1689bfd 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -25,7 +25,7 @@ static bool
 realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
-	const struct dst_entry *dst = skb->dst;
+	const struct dst_entry *dst = skb_dst(skb);
 
 	return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6da9f38ef5c..4f76e5552d8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -372,8 +372,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 		goto oom;
 
 	/* drop any routing info */
-	dst_release(skb->dst);
-	skb->dst = NULL;
+	skb_dst_drop(skb);
 
 	/* drop conntrack reference */
 	nf_reset(skb);
@@ -621,8 +620,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
-	dst_release(skb->dst);
-	skb->dst = NULL;
+	skb_dst_drop(skb);
 
 	/* drop conntrack reference */
 	nf_reset(skb);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 0ef4e3065bc..9402a7fd378 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -84,7 +84,7 @@ static u32 flow_get_dst(const struct sk_buff *skb)
 	case htons(ETH_P_IPV6):
 		return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
 	default:
-		return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
+		return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 	}
 }
 
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
 		break;
 	}
 	default:
-		res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
+		res = addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 	}
 
 	return res;
@@ -251,8 +251,8 @@ fallback:
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (skb->dst)
-		return skb->dst->tclassid;
+	if (skb_dst(skb))
+		return skb_dst(skb)->tclassid;
 #endif
 	return 0;
 }
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index bdf1f4172ee..dd872d5383e 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -137,7 +137,7 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	u32 id, h;
 	int iif, dont_cache = 0;
 
-	if ((dst = skb->dst) == NULL)
+	if ((dst = skb_dst(skb)) == NULL)
 		goto failure;
 
 	id = dst->tclassid;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index b6b588bed4e..266151ae85a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -246,11 +246,11 @@ META_COLLECTOR(int_tcindex)
 
 META_COLLECTOR(int_rtclassid)
 {
-	if (unlikely(skb->dst == NULL))
+	if (unlikely(skb_dst(skb) == NULL))
 		*err = -1;
 	else
 #ifdef CONFIG_NET_CLS_ROUTE
-		dst->value = skb->dst->tclassid;
+		dst->value = skb_dst(skb)->tclassid;
 #else
 		dst->value = 0;
 #endif
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 33133d27b53..8706920a6d4 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -149,7 +149,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		break;
 	}
 	default:
-		h = (unsigned long)skb->dst ^ skb->protocol;
+		h = (unsigned long)skb_dst(skb) ^ skb->protocol;
 		h2 = (unsigned long)skb->sk;
 	}
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a886496bdc3..cb1cb1e76b9 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -222,7 +222,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
 {
 	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
-	struct neighbour *mn = skb->dst->neighbour;
+	struct neighbour *mn = skb_dst(skb)->neighbour;
 	struct neighbour *n = q->ncache;
 
 	if (mn->tbl == NULL)
@@ -262,8 +262,8 @@ static inline int teql_resolve(struct sk_buff *skb,
 		return -ENODEV;
 
 	if (dev->header_ops == NULL ||
-	    skb->dst == NULL ||
-	    skb->dst->neighbour == NULL)
+	    skb_dst(skb) == NULL ||
+	    skb_dst(skb)->neighbour == NULL)
 		return 0;
 	return __teql_resolve(skb, skb_res, dev);
 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f0c91df59d4..b7641144451 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -405,10 +405,10 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 			sctp_assoc_sync_pmtu(asoc);
 		}
 	}
-	nskb->dst = dst_clone(tp->dst);
-	if (!nskb->dst)
+	dst = dst_clone(tp->dst);
+	skb_dst_set(nskb, dst);
+	if (dst)
 		goto no_route;
-	dst = nskb->dst;
 
 	/* Build the SCTP header.  */
 	sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e1859614601..6c2d6158655 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -918,7 +918,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 	UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 
 	/* Something worked... */
-	dst_confirm(skb->dst);
+	dst_confirm(skb_dst(skb));
 
 	xprt_adjust_cwnd(task, copied);
 	xprt_update_rtt(task);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b4a13178fb4..e0009c17d80 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -251,8 +251,7 @@ resume:
 	nf_reset(skb);
 
 	if (decaps) {
-		dst_release(skb->dst);
-		skb->dst = NULL;
+		skb_dst_drop(skb);
 		netif_rx(skb);
 		return 0;
 	} else {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index c235597ba8d..b9fe13138c0 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -22,7 +22,7 @@ static int xfrm_output2(struct sk_buff *skb);
 
 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
 		- skb_headroom(skb);
 	int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
@@ -39,7 +39,7 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm_output_one(struct sk_buff *skb, int err)
 {
-	struct dst_entry *dst = skb->dst;
+	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_state *x = dst->xfrm;
 	struct net *net = xs_net(x);
 
@@ -94,12 +94,13 @@ resume:
 			goto error_nolock;
 		}
 
-		if (!(skb->dst = dst_pop(dst))) {
+		dst = dst_pop(dst);
+		if (!dst) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		dst = skb->dst;
+		skb_dst_set(skb, dst);
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
@@ -119,16 +120,16 @@ int xfrm_output_resume(struct sk_buff *skb, int err)
 	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
 		nf_reset(skb);
 
-		err = skb->dst->ops->local_out(skb);
+		err = skb_dst(skb)->ops->local_out(skb);
 		if (unlikely(err != 1))
 			goto out;
 
-		if (!skb->dst->xfrm)
+		if (!skb_dst(skb)->xfrm)
 			return dst_output(skb);
 
-		err = nf_hook(skb->dst->ops->family,
+		err = nf_hook(skb_dst(skb)->ops->family,
 			      NF_INET_POST_ROUTING, skb,
-			      NULL, skb->dst->dev, xfrm_output2);
+			      NULL, skb_dst(skb)->dev, xfrm_output2);
 		if (unlikely(err != 1))
 			goto out;
 	}
@@ -179,7 +180,7 @@ static int xfrm_output_gso(struct sk_buff *skb)
 
 int xfrm_output(struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dst->dev);
+	struct net *net = dev_net(skb_dst(skb)->dev);
 	int err;
 
 	if (skb_is_gso(skb))
@@ -202,7 +203,7 @@ int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct xfrm_mode *inner_mode;
 	if (x->sel.family == AF_UNSPEC)
 		inner_mode = xfrm_ip2inner_mode(x,
-				xfrm_af2proto(skb->dst->ops->family));
+				xfrm_af2proto(skb_dst(skb)->ops->family));
 	else
 		inner_mode = x->inner_mode;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9c068ab3a83..cb81ca35b0d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2027,6 +2027,8 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
 	struct net *net = dev_net(skb->dev);
 	struct flowi fl;
+	struct dst_entry *dst;
+	int res;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0) {
 		/* XXX: we should have something like FWDHDRERROR here. */
@@ -2034,7 +2036,11 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		return 0;
 	}
 
-	return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0;
+	dst = skb_dst(skb);
+
+	res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
+	skb_dst_set(skb, dst);
+	return res;
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2fcad7c33ea..4bfc6153ad4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4503,7 +4503,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	 * when the packet is on it's final way out.
 	 * NOTE: there appear to be some IPv6 multicast cases where skb->dst
 	 *       is NULL, in this case go ahead and apply access control. */
-	if (skb->dst != NULL && skb->dst->xfrm != NULL)
+	if (skb_dst(skb) != NULL && skb_dst(skb)->xfrm != NULL)
 		return NF_ACCEPT;
 #endif
 	secmark_active = selinux_secmark_enabled();
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index c0eb72013d6..72b18452e1a 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -447,7 +447,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
 	struct dst_entry *dst;
 	int rc = 0;
 
-	dst = skb->dst;
+	dst = skb_dst(skb);
 
 	if (dst) {
 		struct dst_entry *dst_test;
-- 
cgit v1.2.3-70-g09d2


From e5b9215ef9a274eb9fb65f6aa4602ad82d10a6cb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 2 Jun 2009 05:20:21 +0000
Subject: net: skb cleanup

Can remove anonymous union now it has one field.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9ef6eb20247..7305da92be8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -322,9 +322,7 @@ struct sk_buff {
 	ktime_t			tstamp;
 	struct net_device	*dev;
 
-	union {
-		unsigned long		_skb_dst;
-	};
+	unsigned long		_skb_dst;
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
 #endif
-- 
cgit v1.2.3-70-g09d2


From a84db7949eab7a42e715192f62c55c554e195e54 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 7 Apr 2009 15:41:39 +0800
Subject: sctp: fix error cause codes of ADD-IP extension

RFC5061 had changed the error cause codes for Dynamic Address
Reconfiguration As the following:

       Cause Code
       Value          Cause Code
       ---------      ----------------
       0x00A0          Request to Delete Last Remaining IP Address
       0x00A1          Operation Refused Due to Resource Shortage
       0x00A2          Request to Delete Source IP Address
       0x00A3          Association Aborted Due to Illegal ASCONF-ACK
       0x00A4          Request Refused - No Authorization

This patch fix the error cause codes.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/linux/sctp.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index c2731bfe04d..b464b9d3d24 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -487,17 +487,17 @@ typedef enum {
 	 *
 	 * Value          Cause Code
 	 * ---------      ----------------
-	 * 0x0100          Request to Delete Last Remaining IP Address.
-	 * 0x0101          Operation Refused Due to Resource Shortage.
-	 * 0x0102          Request to Delete Source IP Address.
-	 * 0x0103          Association Aborted due to illegal ASCONF-ACK
-	 * 0x0104          Request refused - no authorization.
+	 * 0x00A0          Request to Delete Last Remaining IP Address.
+	 * 0x00A1          Operation Refused Due to Resource Shortage.
+	 * 0x00A2          Request to Delete Source IP Address.
+	 * 0x00A3          Association Aborted due to illegal ASCONF-ACK
+	 * 0x00A4          Request refused - no authorization.
 	 */
-	SCTP_ERROR_DEL_LAST_IP	= cpu_to_be16(0x0100),
-	SCTP_ERROR_RSRC_LOW	= cpu_to_be16(0x0101),
-	SCTP_ERROR_DEL_SRC_IP	= cpu_to_be16(0x0102),
-	SCTP_ERROR_ASCONF_ACK   = cpu_to_be16(0x0103),
-	SCTP_ERROR_REQ_REFUSED	= cpu_to_be16(0x0104),
+	SCTP_ERROR_DEL_LAST_IP	= cpu_to_be16(0x00A0),
+	SCTP_ERROR_RSRC_LOW	= cpu_to_be16(0x00A1),
+	SCTP_ERROR_DEL_SRC_IP	= cpu_to_be16(0x00A2),
+	SCTP_ERROR_ASCONF_ACK   = cpu_to_be16(0x00A3),
+	SCTP_ERROR_REQ_REFUSED	= cpu_to_be16(0x00A4),
 
 	/* AUTH Section 4.  New Error Cause
 	 *
-- 
cgit v1.2.3-70-g09d2


From 9919b455fc00c995ef8141848bdc0709ce50bf36 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 12 May 2009 21:52:51 +0800
Subject: sctp: fix to choose alternate destination when retransmit ASCONF
 chunk

RFC 5061 Section 5.1 ASCONF Chunk Procedures said:

B4)  Re-transmit the ASCONF Chunk last sent and if possible choose an
     alternate destination address (please refer to [RFC4960],
     Section 6.4.1).  An endpoint MUST NOT add new parameters to this
     chunk; it MUST be the same (including its Sequence Number) as
     the last ASCONF sent.  An endpoint MAY, however, bundle an
     additional ASCONF with new ASCONF parameters with the next
     Sequence Number.  For details, see Section 5.5.

This patch fix to choose an alternate destination address when
re-transmit the ASCONF chunk, with some dup codes cleanup.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h |  6 ++----
 net/sctp/associola.c       | 42 +++++++-----------------------------------
 net/sctp/sm_sideeffect.c   |  8 +++++---
 3 files changed, 14 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 23f08fe1d50..edfcacf3250 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1939,10 +1939,8 @@ void sctp_association_free(struct sctp_association *);
 void sctp_association_put(struct sctp_association *);
 void sctp_association_hold(struct sctp_association *);
 
-struct sctp_transport *sctp_assoc_choose_init_transport(
-	struct sctp_association *);
-struct sctp_transport *sctp_assoc_choose_shutdown_transport(
-	struct sctp_association *);
+struct sctp_transport *sctp_assoc_choose_alter_transport(
+	struct sctp_association *, struct sctp_transport *);
 void sctp_assoc_update_retran_path(struct sctp_association *);
 struct sctp_transport *sctp_assoc_lookup_paddr(const struct sctp_association *,
 					  const union sctp_addr *);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 8d3aef9d061..39f5166ae7a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1284,49 +1284,21 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 				 ntohs(t->ipaddr.v4.sin_port));
 }
 
-/* Choose the transport for sending a INIT packet.  */
-struct sctp_transport *sctp_assoc_choose_init_transport(
-	struct sctp_association *asoc)
+/* Choose the transport for sending retransmit packet.  */
+struct sctp_transport *sctp_assoc_choose_alter_transport(
+	struct sctp_association *asoc, struct sctp_transport *last_sent_to)
 {
-	struct sctp_transport *t;
-
-	/* Use the retran path. If the last INIT was sent over the
+	/* If this is the first time packet is sent, use the active path,
+	 * else use the retran path. If the last packet was sent over the
 	 * retran path, update the retran path and use it.
 	 */
-	if (!asoc->init_last_sent_to) {
-		t = asoc->peer.active_path;
-	} else {
-		if (asoc->init_last_sent_to == asoc->peer.retran_path)
-			sctp_assoc_update_retran_path(asoc);
-		t = asoc->peer.retran_path;
-	}
-
-	SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
-				 " %p addr: ",
-				 " port: %d\n",
-				 asoc,
-				 (&t->ipaddr),
-				 ntohs(t->ipaddr.v4.sin_port));
-
-	return t;
-}
-
-/* Choose the transport for sending a SHUTDOWN packet.  */
-struct sctp_transport *sctp_assoc_choose_shutdown_transport(
-	struct sctp_association *asoc)
-{
-	/* If this is the first time SHUTDOWN is sent, use the active path,
-	 * else use the retran path. If the last SHUTDOWN was sent over the
-	 * retran path, update the retran path and use it.
-	 */
-	if (!asoc->shutdown_last_sent_to)
+	if (!last_sent_to)
 		return asoc->peer.active_path;
 	else {
-		if (asoc->shutdown_last_sent_to == asoc->peer.retran_path)
+		if (last_sent_to == asoc->peer.retran_path)
 			sctp_assoc_update_retran_path(asoc);
 		return asoc->peer.retran_path;
 	}
-
 }
 
 /* Update the association's pmtu and frag_point by going through all the
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e2020eb2c8c..86426aac160 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -686,7 +686,8 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 {
 	struct sctp_transport *t;
 
-	t = sctp_assoc_choose_shutdown_transport(asoc);
+	t = sctp_assoc_choose_alter_transport(asoc,
+					      asoc->shutdown_last_sent_to);
 	asoc->shutdown_last_sent_to = t;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
 	chunk->transport = t;
@@ -777,7 +778,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
 {
 	struct sctp_transport *t;
 
-	t = asoc->peer.active_path;
+	t = sctp_assoc_choose_alter_transport(asoc, chunk->transport);
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T4_RTO] = t->rto;
 	chunk->transport = t;
 }
@@ -1379,7 +1380,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 
 		case SCTP_CMD_INIT_CHOOSE_TRANSPORT:
 			chunk = cmd->obj.ptr;
-			t = sctp_assoc_choose_init_transport(asoc);
+			t = sctp_assoc_choose_alter_transport(asoc,
+						asoc->init_last_sent_to);
 			asoc->init_last_sent_to = t;
 			chunk->transport = t;
 			t->init_sent_count++;
-- 
cgit v1.2.3-70-g09d2


From c6ba68a26645dbc5029a9faa5687ebe6fcfc53e4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 1 Jun 2009 12:41:15 -0400
Subject: sctp: support non-blocking version of the new sctp_connectx() API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prior implementation of the new sctp_connectx() call that returns
an association ID did not work correctly on non-blocking socket.
This is because we could not return both a EINPROGRESS error and
an association id.  This is a new implementation that supports this.

Originally from Ivan Skytte Jørgensen <isj-sctp@i1.dk

Signed-off-by: Ivan Skytte Jørgensen <isj-sctp@i1.dk
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/user.h |  2 ++
 net/sctp/associola.c    |  4 ++++
 net/sctp/socket.c       | 42 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index b259fc5798f..1580c04f68b 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -147,6 +147,8 @@ enum sctp_optname {
 #define SCTP_GET_LOCAL_ADDRS	SCTP_GET_LOCAL_ADDRS
 	SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */
 #define SCTP_SOCKOPT_CONNECTX	SCTP_SOCKOPT_CONNECTX
+	SCTP_SOCKOPT_CONNECTX3, /* CONNECTX requests. (new implementation) */
+#define SCTP_SOCKOPT_CONNECTX3	SCTP_SOCKOPT_CONNECTX3
 };
 
 /*
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 39f5166ae7a..525864bf4f0 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1470,6 +1470,10 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
 	int assoc_id;
 	int error = 0;
+
+	/* If the id is already assigned, keep it. */
+	if (asoc->assoc_id)
+		return error;
 retry:
 	if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
 		return -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5fb3a8c9792..7c3dfd2d948 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1100,6 +1100,15 @@ static int __sctp_connect(struct sock* sk,
 		goto out_free;
 	}
 
+	/* In case the user of sctp_connectx() wants an association
+	 * id back, assign one now.
+	 */
+	if (assoc_id) {
+		err = sctp_assoc_set_id(asoc, GFP_KERNEL);
+		if (err < 0)
+			goto out_free;
+	}
+
 	err = sctp_primitive_ASSOCIATE(asoc, NULL);
 	if (err < 0) {
 		goto out_free;
@@ -1120,7 +1129,7 @@ static int __sctp_connect(struct sock* sk,
 	timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
 
 	err = sctp_wait_for_connect(asoc, &timeo);
-	if (!err && assoc_id)
+	if ((err == 0 || err == -EINPROGRESS) && assoc_id)
 		*assoc_id = asoc->assoc_id;
 
 	/* Don't free association on exit. */
@@ -1264,6 +1273,34 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
 		return assoc_id;
 }
 
+/*
+ * New (hopefully final) interface for the API.  The option buffer is used
+ * both for the returned association id and the addresses.
+ */
+SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
+					char __user *optval,
+					int __user *optlen)
+{
+	sctp_assoc_t assoc_id = 0;
+	int err = 0;
+
+	if (len < sizeof(assoc_id))
+		return -EINVAL;
+
+	err = __sctp_setsockopt_connectx(sk,
+			(struct sockaddr __user *)(optval + sizeof(assoc_id)),
+			len - sizeof(assoc_id), &assoc_id);
+
+	if (err == 0 || err == -EINPROGRESS) {
+		if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
+			return -EFAULT;
+		if (put_user(sizeof(assoc_id), optlen))
+			return -EFAULT;
+	}
+
+	return err;
+}
+
 /* API 3.1.4 close() - UDP Style Syntax
  * Applications use close() to perform graceful shutdown (as described in
  * Section 10.1 of [SCTP]) on ALL the associations currently represented
@@ -5578,6 +5615,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_local_addrs(sk, len, optval,
 						     optlen);
 		break;
+	case SCTP_SOCKOPT_CONNECTX3:
+		retval = sctp_getsockopt_connectx3(sk, len, optval, optlen);
+		break;
 	case SCTP_DEFAULT_SEND_PARAM:
 		retval = sctp_getsockopt_default_send_param(sk, len,
 							    optval, optlen);
-- 
cgit v1.2.3-70-g09d2


From 56d8bd3f0b98972312cad683947ec90b21011199 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 3 Jun 2009 14:52:03 +0100
Subject: tracing: fix multiple use of __print_flags and __print_symbolic

Here is an updated patch to include the extra call to
trace_seq_init() as requested. This is vs. the latest
-tip tree and fixes the use of multiple __print_flags
and __print_symbolic in a single tracer. Also tested
to ensure its working now:

mount.gfs2-2534  [000]   235.850587: gfs2_glock_queue: 8.7 glock 1:2 dequeue PR
mount.gfs2-2534  [000]   235.850591: gfs2_demote_rq: 8.7 glock 1:0 demote EX to NL flags:DI
mount.gfs2-2534  [000]   235.850591: gfs2_glock_queue: 8.7 glock 1:0 dequeue EX
glock_workqueue-2529  [000]   235.850666: gfs2_glock_state_change: 8.7 glock 1:0 state EX => NL tgt:NL dmt:NL flags:lDpI
glock_workqueue-2529  [000]   235.850672: gfs2_glock_put: 8.7 glock 1:0 state NL => IV flags:I

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
LKML-Reference: <1244037123.29604.603.camel@localhost.localdomain>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h      |  2 ++
 kernel/trace/trace_output.c | 10 ++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b5478dab579..40ede4db4d8 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -104,6 +104,7 @@
  *	field = (typeof(field))entry;
  *
  *	p = get_cpu_var(ftrace_event_seq);
+ *	trace_seq_init(p);
  *	ret = trace_seq_printf(s, <TP_printk> "\n");
  *	put_cpu();
  *	if (!ret)
@@ -167,6 +168,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	field = (typeof(field))entry;					\
 									\
 	p = &get_cpu_var(ftrace_event_seq);				\
+	trace_seq_init(p);						\
 	ret = trace_seq_printf(s, #call ": " print);			\
 	put_cpu();							\
 	if (!ret)							\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8dadbbbd2d5..8afeea412e7 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -223,10 +223,9 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 {
 	unsigned long mask;
 	const char *str;
+	const char *ret = p->buffer + p->len;
 	int i;
 
-	trace_seq_init(p);
-
 	for (i = 0;  flag_array[i].name && flags; i++) {
 
 		mask = flag_array[i].mask;
@@ -249,7 +248,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 
 	trace_seq_putc(p, 0);
 
-	return p->buffer;
+	return ret;
 }
 EXPORT_SYMBOL(ftrace_print_flags_seq);
 
@@ -258,8 +257,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 			 const struct trace_print_flags *symbol_array)
 {
 	int i;
-
-	trace_seq_init(p);
+	const char *ret = p->buffer + p->len;
 
 	for (i = 0;  symbol_array[i].name; i++) {
 
@@ -275,7 +273,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 		
 	trace_seq_putc(p, 0);
 
-	return p->buffer;
+	return ret;
 }
 EXPORT_SYMBOL(ftrace_print_symbols_seq);
 
-- 
cgit v1.2.3-70-g09d2


From e535c7566e1318ccfa015e297f0309994f7bc078 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 23 May 2009 11:18:45 +0200
Subject: mac80211: deprecate conf.beacon_int properly

Ivo has updated the driver to no longer use the change flag,
so we can remove that, but rt2x00 and ath5k still use the
actual value so let's mark it as deprecated too.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 14 ++------------
 net/mac80211/main.c    | 12 ++----------
 2 files changed, 4 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d72346ff324..0270aa6e08f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -527,7 +527,6 @@ enum ieee80211_conf_flags {
  * enum ieee80211_conf_changed - denotes which configuration changed
  *
  * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed
- * @_IEEE80211_CONF_CHANGE_BEACON_INTERVAL: DEPRECATED
  * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed
  * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed
  * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed
@@ -538,7 +537,6 @@ enum ieee80211_conf_flags {
  */
 enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
-	_IEEE80211_CONF_CHANGE_BEACON_INTERVAL	= BIT(1),
 	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
 	IEEE80211_CONF_CHANGE_RADIOTAP		= BIT(3),
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
@@ -548,14 +546,6 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
 };
 
-static inline __deprecated enum ieee80211_conf_changed
-__IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void)
-{
-	return _IEEE80211_CONF_CHANGE_BEACON_INTERVAL;
-}
-#define IEEE80211_CONF_CHANGE_BEACON_INTERVAL \
-	__IEEE80211_CONF_CHANGE_BEACON_INTERVAL()
-
 /**
  * struct ieee80211_conf - configuration of the device
  *
@@ -564,7 +554,7 @@ __IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void)
  * @flags: configuration flags defined above
  *
  * @radio_enabled: when zero, driver is required to switch off the radio.
- * @beacon_int: beacon interval (TODO make interface config)
+ * @beacon_int: DEPRECATED, DO NOT USE
  *
  * @listen_interval: listen interval in units of beacon interval
  * @max_sleep_period: the maximum number of beacon intervals to sleep for
@@ -589,7 +579,7 @@ __IEEE80211_CONF_CHANGE_BEACON_INTERVAL(void)
  *    number of transmissions not the number of retries
  */
 struct ieee80211_conf {
-	int beacon_int;
+	int __deprecated beacon_int;
 	u32 flags;
 	int power_level, dynamic_ps_timeout;
 	int max_sleep_period;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index e37770ced53..2683df91807 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -289,16 +289,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 	drv_bss_info_changed(local, &sdata->vif,
 			     &sdata->vif.bss_conf, changed);
 
-	/*
-	 * DEPRECATED
-	 *
-	 * ~changed is just there to not do this at resume time
-	 */
-	if (changed & BSS_CHANGED_BEACON_INT && ~changed) {
-		local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int;
-		ieee80211_hw_config(local,
-				    _IEEE80211_CONF_CHANGE_BEACON_INTERVAL);
-	}
+	/* DEPRECATED */
+	local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int;
 }
 
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
-- 
cgit v1.2.3-70-g09d2


From 8fc0fee09245fe740d81aded36f4af6db1054015 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 24 May 2009 16:57:19 +0200
Subject: cfg80211: use key size constants

Instead of hardcoding the key length for validation, use the
constants Zhu Yi recently added and add one for AES_CMAC too.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  1 +
 net/wireless/util.c       | 11 +++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 34de8b21f6d..a9173d5434d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1092,6 +1092,7 @@ enum ieee80211_key_len {
 	WLAN_KEY_LEN_WEP104 = 13,
 	WLAN_KEY_LEN_CCMP = 16,
 	WLAN_KEY_LEN_TKIP = 32,
+	WLAN_KEY_LEN_AES_CMAC = 16,
 };
 
 /*
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d072bff463a..5be9ed7ae1e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -157,26 +157,25 @@ int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
 	    params->cipher != WLAN_CIPHER_SUITE_WEP104)
 		return -EINVAL;
 
-	/* TODO: add definitions for the lengths to linux/ieee80211.h */
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
-		if (params->key_len != 5)
+		if (params->key_len != WLAN_KEY_LEN_WEP40)
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_TKIP:
-		if (params->key_len != 32)
+		if (params->key_len != WLAN_KEY_LEN_TKIP)
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
-		if (params->key_len != 16)
+		if (params->key_len != WLAN_KEY_LEN_CCMP)
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_WEP104:
-		if (params->key_len != 13)
+		if (params->key_len != WLAN_KEY_LEN_WEP104)
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
-		if (params->key_len != 16)
+		if (params->key_len != WLAN_KEY_LEN_AES_CMAC)
 			return -EINVAL;
 		break;
 	default:
-- 
cgit v1.2.3-70-g09d2


From 3b8bcfd5d31ea0fec58681d035544ace707d2536 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 30 May 2009 01:39:53 +0200
Subject: net: introduce pre-up netdev notifier

NETDEV_UP is called after the device is set UP, but sometimes
it is useful to be able to veto the device UP. Introduce a
new NETDEV_PRE_UP notifier that can be used for exactly this.
The first use case will be cfg80211 denying interfaces to be
set UP if the device is known to be rfkill'ed.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/notifier.h | 1 +
 net/core/dev.c           | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index b86fa2ffca0..81bc252dc8a 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -198,6 +198,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_CHANGENAME	0x000A
 #define NETDEV_FEAT_CHANGE	0x000B
 #define NETDEV_BONDING_FAILOVER 0x000C
+#define NETDEV_PRE_UP		0x000D
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/dev.c b/net/core/dev.c
index 34b49a6a22f..1f38401fc02 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1048,7 +1048,7 @@ void dev_load(struct net *net, const char *name)
 int dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	int ret = 0;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -1065,6 +1065,11 @@ int dev_open(struct net_device *dev)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
+	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		return ret;
+
 	/*
 	 *	Call device private open method
 	 */
-- 
cgit v1.2.3-70-g09d2


From 19d337dff95cbf76edd3ad95c0cee2732c3e1ec5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 2 Jun 2009 13:01:37 +0200
Subject: rfkill: rewrite

This patch completely rewrites the rfkill core to address
the following deficiencies:

 * all rfkill drivers need to implement polling where necessary
   rather than having one central implementation

 * updating the rfkill state cannot be done from arbitrary
   contexts, forcing drivers to use schedule_work and requiring
   lots of code

 * rfkill drivers need to keep track of soft/hard blocked
   internally -- the core should do this

 * the rfkill API has many unexpected quirks, for example being
   asymmetric wrt. alloc/free and register/unregister

 * rfkill can call back into a driver from within a function the
   driver called -- this is prone to deadlocks and generally
   should be avoided

 * rfkill-input pointlessly is a separate module

 * drivers need to #ifdef rfkill functions (unless they want to
   depend on or select RFKILL) -- rfkill should provide inlines
   that do nothing if it isn't compiled in

 * the rfkill structure is not opaque -- drivers need to initialise
   it correctly (lots of sanity checking code required) -- instead
   force drivers to pass the right variables to rfkill_alloc()

 * the documentation is hard to read because it always assumes the
   reader is completely clueless and contains way TOO MANY CAPS

 * the rfkill code needlessly uses a lot of locks and atomic
   operations in locked sections

 * fix LED trigger to actually change the LED when the radio state
   changes -- this wasn't done before

Tested-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br> [thinkpad]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt                   | 597 +++----------------
 MAINTAINERS                                |   6 +-
 arch/arm/mach-pxa/tosa-bt.c                |  30 +-
 arch/arm/mach-pxa/tosa.c                   |   1 -
 drivers/net/usb/hso.c                      |  42 +-
 drivers/net/wireless/ath/ath9k/ath9k.h     |   7 +-
 drivers/net/wireless/ath/ath9k/main.c      | 115 +---
 drivers/net/wireless/ath/ath9k/pci.c       |  15 -
 drivers/net/wireless/b43/Kconfig           |   2 +-
 drivers/net/wireless/b43/leds.c            |   2 +-
 drivers/net/wireless/b43/main.c            |   4 +-
 drivers/net/wireless/b43/phy_a.c           |   4 +-
 drivers/net/wireless/b43/phy_common.c      |  17 +-
 drivers/net/wireless/b43/phy_common.h      |   4 +-
 drivers/net/wireless/b43/phy_g.c           |   4 +-
 drivers/net/wireless/b43/phy_lp.c          |   2 +-
 drivers/net/wireless/b43/phy_n.c           |   2 +-
 drivers/net/wireless/b43/rfkill.c          | 123 ++--
 drivers/net/wireless/b43/rfkill.h          |   5 +-
 drivers/net/wireless/b43legacy/Kconfig     |   2 +-
 drivers/net/wireless/b43legacy/leds.c      |   3 +-
 drivers/net/wireless/b43legacy/rfkill.c    | 123 ++--
 drivers/net/wireless/b43legacy/rfkill.h    |   6 +-
 drivers/net/wireless/iwlwifi/Kconfig       |   5 +-
 drivers/net/wireless/iwlwifi/iwl-rfkill.c  |  69 +--
 drivers/net/wireless/iwmc3200wifi/rfkill.c |  39 +-
 drivers/platform/x86/Kconfig               |  14 +-
 drivers/platform/x86/acer-wmi.c            |  50 +-
 drivers/platform/x86/dell-laptop.c         | 101 ++--
 drivers/platform/x86/eeepc-laptop.c        |  99 +---
 drivers/platform/x86/hp-wmi.c              | 103 ++--
 drivers/platform/x86/sony-laptop.c         | 191 +++---
 drivers/platform/x86/thinkpad_acpi.c       | 873 ++++++++++++++--------------
 drivers/platform/x86/toshiba_acpi.c        | 159 ++---
 include/linux/Kbuild                       |   1 +
 include/linux/rfkill.h                     | 325 ++++++++---
 include/net/wimax.h                        |   8 +-
 net/rfkill/Kconfig                         |  21 +-
 net/rfkill/Makefile                        |   5 +-
 net/rfkill/core.c                          | 896 +++++++++++++++++++++++++++++
 net/rfkill/input.c                         | 342 +++++++++++
 net/rfkill/rfkill-input.c                  | 390 -------------
 net/rfkill/rfkill-input.h                  |  21 -
 net/rfkill/rfkill.c                        | 855 ---------------------------
 net/rfkill/rfkill.h                        |  27 +
 net/wimax/Kconfig                          |  14 -
 net/wimax/op-rfkill.c                      | 123 +---
 47 files changed, 2555 insertions(+), 3292 deletions(-)
 create mode 100644 net/rfkill/core.c
 create mode 100644 net/rfkill/input.c
 delete mode 100644 net/rfkill/rfkill-input.c
 delete mode 100644 net/rfkill/rfkill-input.h
 delete mode 100644 net/rfkill/rfkill.c
 create mode 100644 net/rfkill/rfkill.h

(limited to 'include')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 40c3a3f1081..de941e309d4 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -1,571 +1,130 @@
-rfkill - RF switch subsystem support
-====================================
+rfkill - RF kill switch support
+===============================
 
-1 Introduction
-2 Implementation details
-3 Kernel driver guidelines
-3.1 wireless device drivers
-3.2 platform/switch drivers
-3.3 input device drivers
-4 Kernel API
-5 Userspace support
+1. Introduction
+2. Implementation details
+3. Kernel driver guidelines
+4. Kernel API
+5. Userspace support
 
 
-1. Introduction:
+1. Introduction
 
-The rfkill switch subsystem exists to add a generic interface to circuitry that
-can enable or disable the signal output of a wireless *transmitter* of any
-type.  By far, the most common use is to disable radio-frequency transmitters.
+The rfkill subsystem provides a generic interface to disabling any radio
+transmitter in the system. When a transmitter is blocked, it shall not
+radiate any power.
 
-Note that disabling the signal output means that the the transmitter is to be
-made to not emit any energy when "blocked".  rfkill is not about blocking data
-transmissions, it is about blocking energy emission.
+The subsystem also provides the ability to react on button presses and
+disable all transmitters of a certain type (or all). This is intended for
+situations where transmitters need to be turned off, for example on
+aircraft.
 
-The rfkill subsystem offers support for keys and switches often found on
-laptops to enable wireless devices like WiFi and Bluetooth, so that these keys
-and switches actually perform an action in all wireless devices of a given type
-attached to the system.
 
-The buttons to enable and disable the wireless transmitters are important in
-situations where the user is for example using his laptop on a location where
-radio-frequency transmitters _must_ be disabled (e.g. airplanes).
 
-Because of this requirement, userspace support for the keys should not be made
-mandatory.  Because userspace might want to perform some additional smarter
-tasks when the key is pressed, rfkill provides userspace the possibility to
-take over the task to handle the key events.
-
-===============================================================================
-2: Implementation details
+2. Implementation details
 
 The rfkill subsystem is composed of various components: the rfkill class, the
 rfkill-input module (an input layer handler), and some specific input layer
 events.
 
-The rfkill class provides kernel drivers with an interface that allows them to
-know when they should enable or disable a wireless network device transmitter.
-This is enabled by the CONFIG_RFKILL Kconfig option.
-
-The rfkill class support makes sure userspace will be notified of all state
-changes on rfkill devices through uevents.  It provides a notification chain
-for interested parties in the kernel to also get notified of rfkill state
-changes in other drivers.  It creates several sysfs entries which can be used
-by userspace.  See section "Userspace support".
-
-The rfkill-input module provides the kernel with the ability to implement a
-basic response when the user presses a key or button (or toggles a switch)
-related to rfkill functionality.  It is an in-kernel implementation of default
-policy of reacting to rfkill-related input events and neither mandatory nor
-required for wireless drivers to operate.  It is enabled by the
-CONFIG_RFKILL_INPUT Kconfig option.
-
-rfkill-input is a rfkill-related events input layer handler.  This handler will
-listen to all rfkill key events and will change the rfkill state of the
-wireless devices accordingly.  With this option enabled userspace could either
-do nothing or simply perform monitoring tasks.
-
-The rfkill-input module also provides EPO (emergency power-off) functionality
-for all wireless transmitters.  This function cannot be overridden, and it is
-always active.  rfkill EPO is related to *_RFKILL_ALL input layer events.
-
-
-Important terms for the rfkill subsystem:
-
-In order to avoid confusion, we avoid the term "switch" in rfkill when it is
-referring to an electronic control circuit that enables or disables a
-transmitter.  We reserve it for the physical device a human manipulates
-(which is an input device, by the way):
-
-rfkill switch:
-
-	A physical device a human manipulates.  Its state can be perceived by
-	the kernel either directly (through a GPIO pin, ACPI GPE) or by its
-	effect on a rfkill line of a wireless device.
-
-rfkill controller:
-
-	A hardware circuit that controls the state of a rfkill line, which a
-	kernel driver can interact with *to modify* that state (i.e. it has
-	either write-only or read/write access).
-
-rfkill line:
-
-	An input channel (hardware or software) of a wireless device, which
-	causes a wireless transmitter to stop emitting energy (BLOCK) when it
-	is active.  Point of view is extremely important here: rfkill lines are
-	always seen from the PoV of a wireless device (and its driver).
-
-soft rfkill line/software rfkill line:
-
-	A rfkill line the wireless device driver can directly change the state
-	of.  Related to rfkill_state RFKILL_STATE_SOFT_BLOCKED.
-
-hard rfkill line/hardware rfkill line:
-
-	A rfkill line that works fully in hardware or firmware, and that cannot
-	be overridden by the kernel driver.  The hardware device or the
-	firmware just exports its status to the driver, but it is read-only.
-	Related to rfkill_state RFKILL_STATE_HARD_BLOCKED.
-
-The enum rfkill_state describes the rfkill state of a transmitter:
-
-When a rfkill line or rfkill controller is in the RFKILL_STATE_UNBLOCKED state,
-the wireless transmitter (radio TX circuit for example) is *enabled*.  When the
-it is in the RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the
-wireless transmitter is to be *blocked* from operating.
-
-RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change
-that state.  RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio()
-will not be able to change the state and will return with a suitable error if
-attempts are made to set the state to RFKILL_STATE_UNBLOCKED.
-
-RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is
-locked in the BLOCKED state by a hardwire rfkill line (typically an input pin
-that, when active, forces the transmitter to be disabled) which the driver
-CANNOT override.
-
-Full rfkill functionality requires two different subsystems to cooperate: the
-input layer and the rfkill class.  The input layer issues *commands* to the
-entire system requesting that devices registered to the rfkill class change
-state.  The way this interaction happens is not complex, but it is not obvious
-either:
-
-Kernel Input layer:
-
-	* Generates KEY_WWAN, KEY_WLAN, KEY_BLUETOOTH, SW_RFKILL_ALL, and
-	  other such events when the user presses certain keys, buttons, or
-	  toggles certain physical switches.
-
-	THE INPUT LAYER IS NEVER USED TO PROPAGATE STATUS, NOTIFICATIONS OR THE
-	KIND OF STUFF AN ON-SCREEN-DISPLAY APPLICATION WOULD REPORT.  It is
-	used to issue *commands* for the system to change behaviour, and these
-	commands may or may not be carried out by some kernel driver or
-	userspace application.  It follows that doing user feedback based only
-	on input events is broken, as there is no guarantee that an input event
-	will be acted upon.
-
-	Most wireless communication device drivers implementing rfkill
-	functionality MUST NOT generate these events, and have no reason to
-	register themselves with the input layer.  Doing otherwise is a common
-	misconception.  There is an API to propagate rfkill status change
-	information, and it is NOT the input layer.
-
-rfkill class:
-
-	* Calls a hook in a driver to effectively change the wireless
-	  transmitter state;
-	* Keeps track of the wireless transmitter state (with help from
-	  the driver);
-	* Generates userspace notifications (uevents) and a call to a
-	  notification chain (kernel) when there is a wireless transmitter
-	  state change;
-	* Connects a wireless communications driver with the common rfkill
-	  control system, which, for example, allows actions such as
-	  "switch all bluetooth devices offline" to be carried out by
-	  userspace or by rfkill-input.
-
-	THE RFKILL CLASS NEVER ISSUES INPUT EVENTS.  THE RFKILL CLASS DOES
-	NOT LISTEN TO INPUT EVENTS.  NO DRIVER USING THE RFKILL CLASS SHALL
-	EVER LISTEN TO, OR ACT ON RFKILL INPUT EVENTS.  Doing otherwise is
-	a layering violation.
-
-	Most wireless data communication drivers in the kernel have just to
-	implement the rfkill class API to work properly.  Interfacing to the
-	input layer is not often required (and is very often a *bug*) on
-	wireless drivers.
-
-	Platform drivers often have to attach to the input layer to *issue*
-	(but never to listen to) rfkill events for rfkill switches, and also to
-	the rfkill class to export a control interface for the platform rfkill
-	controllers to the rfkill subsystem.  This does NOT mean the rfkill
-	switch is attached to a rfkill class (doing so is almost always wrong).
-	It just means the same kernel module is the driver for different
-	devices (rfkill switches and rfkill controllers).
-
-
-Userspace input handlers (uevents) or kernel input handlers (rfkill-input):
-
-	* Implements the policy of what should happen when one of the input
-	  layer events related to rfkill operation is received.
-	* Uses the sysfs interface (userspace) or private rfkill API calls
-	  to tell the devices registered with the rfkill class to change
-	  their state (i.e. translates the input layer event into real
-	  action).
-
-	* rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0
-	  (power off all transmitters) in a special way: it ignores any
-	  overrides and local state cache and forces all transmitters to the
-	  RFKILL_STATE_SOFT_BLOCKED state (including those which are already
-	  supposed to be BLOCKED).
-	* rfkill EPO will remain active until rfkill-input receives an
-	  EV_SW SW_RFKILL_ALL 1 event.  While the EPO is active, transmitters
-	  are locked in the blocked state (rfkill will refuse to unblock them).
-	* rfkill-input implements different policies that the user can
-	  select for handling EV_SW SW_RFKILL_ALL 1.  It will unlock rfkill,
-	  and either do nothing (leave transmitters blocked, but now unlocked),
-	  restore the transmitters to their state before the EPO, or unblock
-	  them all.
-
-Userspace uevent handler or kernel platform-specific drivers hooked to the
-rfkill notifier chain:
-
-	* Taps into the rfkill notifier chain or to KOBJ_CHANGE uevents,
-	  in order to know when a device that is registered with the rfkill
-	  class changes state;
-	* Issues feedback notifications to the user;
-	* In the rare platforms where this is required, synthesizes an input
-	  event to command all *OTHER* rfkill devices to also change their
-	  statues when a specific rfkill device changes state.
-
-
-===============================================================================
-3: Kernel driver guidelines
-
-Remember: point-of-view is everything for a driver that connects to the rfkill
-subsystem.  All the details below must be measured/perceived from the point of
-view of the specific driver being modified.
-
-The first thing one needs to know is whether his driver should be talking to
-the rfkill class or to the input layer.  In rare cases (platform drivers), it
-could happen that you need to do both, as platform drivers often handle a
-variety of devices in the same driver.
-
-Do not mistake input devices for rfkill controllers.  The only type of "rfkill
-switch" device that is to be registered with the rfkill class are those
-directly controlling the circuits that cause a wireless transmitter to stop
-working (or the software equivalent of them), i.e. what we call a rfkill
-controller.  Every other kind of "rfkill switch" is just an input device and
-MUST NOT be registered with the rfkill class.
-
-A driver should register a device with the rfkill class when ALL of the
-following conditions are met (they define a rfkill controller):
-
-1. The device is/controls a data communications wireless transmitter;
-
-2. The kernel can interact with the hardware/firmware to CHANGE the wireless
-   transmitter state (block/unblock TX operation);
-
-3. The transmitter can be made to not emit any energy when "blocked":
-   rfkill is not about blocking data transmissions, it is about blocking
-   energy emission;
-
-A driver should register a device with the input subsystem to issue
-rfkill-related events (KEY_WLAN, KEY_BLUETOOTH, KEY_WWAN, KEY_WIMAX,
-SW_RFKILL_ALL, etc) when ALL of the folowing conditions are met:
-
-1. It is directly related to some physical device the user interacts with, to
-   command the O.S./firmware/hardware to enable/disable a data communications
-   wireless transmitter.
-
-   Examples of the physical device are: buttons, keys and switches the user
-   will press/touch/slide/switch to enable or disable the wireless
-   communication device.
-
-2. It is NOT slaved to another device, i.e. there is no other device that
-   issues rfkill-related input events in preference to this one.
-
-   Please refer to the corner cases and examples section for more details.
-
-When in doubt, do not issue input events.  For drivers that should generate
-input events in some platforms, but not in others (e.g. b43), the best solution
-is to NEVER generate input events in the first place.  That work should be
-deferred to a platform-specific kernel module (which will know when to generate
-events through the rfkill notifier chain) or to userspace.  This avoids the
-usual maintenance problems with DMI whitelisting.
-
-
-Corner cases and examples:
-====================================
-
-1. If the device is an input device that, because of hardware or firmware,
-causes wireless transmitters to be blocked regardless of the kernel's will, it
-is still just an input device, and NOT to be registered with the rfkill class.
-
-2. If the wireless transmitter switch control is read-only, it is an input
-device and not to be registered with the rfkill class (and maybe not to be made
-an input layer event source either, see below).
-
-3. If there is some other device driver *closer* to the actual hardware the
-user interacted with (the button/switch/key) to issue an input event, THAT is
-the device driver that should be issuing input events.
-
-E.g:
-  [RFKILL slider switch] -- [GPIO hardware] -- [WLAN card rf-kill input]
-                           (platform driver)    (wireless card driver)
-
-The user is closer to the RFKILL slide switch plaform driver, so the driver
-which must issue input events is the platform driver looking at the GPIO
-hardware, and NEVER the wireless card driver (which is just a slave).  It is
-very likely that there are other leaves than just the WLAN card rf-kill input
-(e.g. a bluetooth card, etc)...
-
-On the other hand, some embedded devices do this:
-
-  [RFKILL slider switch] -- [WLAN card rf-kill input]
-                             (wireless card driver)
-
-In this situation, the wireless card driver *could* register itself as an input
-device and issue rf-kill related input events... but in order to AVOID the need
-for DMI whitelisting, the wireless card driver does NOT do it.  Userspace (HAL)
-or a platform driver (that exists only on these embedded devices) will do the
-dirty job of issuing the input events.
-
-
-COMMON MISTAKES in kernel drivers, related to rfkill:
-====================================
-
-1. NEVER confuse input device keys and buttons with input device switches.
-
-  1a. Switches are always set or reset.  They report the current state
-      (on position or off position).
-
-  1b. Keys and buttons are either in the pressed or not-pressed state, and
-      that's it.  A "button" that latches down when you press it, and
-      unlatches when you press it again is in fact a switch as far as input
-      devices go.
-
-Add the SW_* events you need for switches, do NOT try to emulate a button using
-KEY_* events just because there is no such SW_* event yet.  Do NOT try to use,
-for example, KEY_BLUETOOTH when you should be using SW_BLUETOOTH instead.
-
-2. Input device switches (sources of EV_SW events) DO store their current state
-(so you *must* initialize it by issuing a gratuitous input layer event on
-driver start-up and also when resuming from sleep), and that state CAN be
-queried from userspace through IOCTLs.  There is no sysfs interface for this,
-but that doesn't mean you should break things trying to hook it to the rfkill
-class to get a sysfs interface :-)
-
-3. Do not issue *_RFKILL_ALL events by default, unless you are sure it is the
-correct event for your switch/button.  These events are emergency power-off
-events when they are trying to turn the transmitters off.  An example of an
-input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill
-switch in a laptop which is NOT a hotkey, but a real sliding/rocker switch.
-An example of an input device which SHOULD NOT generate *_RFKILL_ALL events by
-default, is any sort of hot key that is type-specific (e.g. the one for WLAN).
-
-
-3.1 Guidelines for wireless device drivers
-------------------------------------------
-
-(in this text, rfkill->foo means the foo field of struct rfkill).
-
-1. Each independent transmitter in a wireless device (usually there is only one
-transmitter per device) should have a SINGLE rfkill class attached to it.
-
-2. If the device does not have any sort of hardware assistance to allow the
-driver to rfkill the device, the driver should emulate it by taking all actions
-required to silence the transmitter.
-
-3. If it is impossible to silence the transmitter (i.e. it still emits energy,
-even if it is just in brief pulses, when there is no data to transmit and there
-is no hardware support to turn it off) do NOT lie to the users.  Do not attach
-it to a rfkill class.  The rfkill subsystem does not deal with data
-transmission, it deals with energy emission.  If the transmitter is emitting
-energy, it is not blocked in rfkill terms.
-
-4. It doesn't matter if the device has multiple rfkill input lines affecting
-the same transmitter, their combined state is to be exported as a single state
-per transmitter (see rule 1).
-
-This rule exists because users of the rfkill subsystem expect to get (and set,
-when possible) the overall transmitter rfkill state, not of a particular rfkill
-line.
+The rfkill class is provided for kernel drivers to register their radio
+transmitter with the kernel, provide methods for turning it on and off and,
+optionally, letting the system know about hardware-disabled states that may
+be implemented on the device. This code is enabled with the CONFIG_RFKILL
+Kconfig option, which drivers can "select".
 
-5. The wireless device driver MUST NOT leave the transmitter enabled during
-suspend and hibernation unless:
+The rfkill class code also notifies userspace of state changes, this is
+achieved via uevents. It also provides some sysfs files for userspace to
+check the status of radio transmitters. See the "Userspace support" section
+below.
 
-	5.1. The transmitter has to be enabled for some sort of functionality
-	like wake-on-wireless-packet or autonomous packed forwarding in a mesh
-	network, and that functionality is enabled for this suspend/hibernation
-	cycle.
 
-AND
+The rfkill-input code implements a basic response to rfkill buttons -- it
+implements turning on/off all devices of a certain class (or all).
 
-	5.2. The device was not on a user-requested BLOCKED state before
-	the suspend (i.e. the driver must NOT unblock a device, not even
-	to support wake-on-wireless-packet or remain in the mesh).
+When the device is hard-blocked (either by a call to rfkill_set_hw_state()
+or from query_hw_block) set_block() will be invoked but drivers can well
+ignore the method call since they can use the return value of the function
+rfkill_set_hw_state() to sync the software state instead of keeping track
+of calls to set_block().
 
-In other words, there is absolutely no allowed scenario where a driver can
-automatically take action to unblock a rfkill controller (obviously, this deals
-with scenarios where soft-blocking or both soft and hard blocking is happening.
-Scenarios where hardware rfkill lines are the only ones blocking the
-transmitter are outside of this rule, since the wireless device driver does not
-control its input hardware rfkill lines in the first place).
 
-6. During resume, rfkill will try to restore its previous state.
+The entire functionality is spread over more than one subsystem:
 
-7. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio
-until it is resumed.
+ * The kernel input layer generates KEY_WWAN, KEY_WLAN etc. and
+   SW_RFKILL_ALL -- when the user presses a button. Drivers for radio
+   transmitters generally do not register to the input layer, unless the
+   device really provides an input device (i.e. a button that has no
+   effect other than generating a button press event)
 
+ * The rfkill-input code hooks up to these events and switches the soft-block
+   of the various radio transmitters, depending on the button type.
 
-Example of a WLAN wireless driver connected to the rfkill subsystem:
---------------------------------------------------------------------
+ * The rfkill drivers turn off/on their transmitters as requested.
 
-A certain WLAN card has one input pin that causes it to block the transmitter
-and makes the status of that input pin available (only for reading!) to the
-kernel driver.  This is a hard rfkill input line (it cannot be overridden by
-the kernel driver).
+ * The rfkill class will generate userspace notifications (uevents) to tell
+   userspace what the current state is.
 
-The card also has one PCI register that, if manipulated by the driver, causes
-it to block the transmitter.  This is a soft rfkill input line.
 
-It has also a thermal protection circuitry that shuts down its transmitter if
-the card overheats, and makes the status of that protection available (only for
-reading!) to the kernel driver.  This is also a hard rfkill input line.
 
-If either one of these rfkill lines are active, the transmitter is blocked by
-the hardware and forced offline.
+3. Kernel driver guidelines
 
-The driver should allocate and attach to its struct device *ONE* instance of
-the rfkill class (there is only one transmitter).
 
-It can implement the get_state() hook, and return RFKILL_STATE_HARD_BLOCKED if
-either one of its two hard rfkill input lines are active.  If the two hard
-rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft
-rfkill input line is active.  Only if none of the rfkill input lines are
-active, will it return RFKILL_STATE_UNBLOCKED.
+Drivers for radio transmitters normally implement only the rfkill class.
+These drivers may not unblock the transmitter based on own decisions, they
+should act on information provided by the rfkill class only.
 
-Since the device has a hardware rfkill line, it IS subject to state changes
-external to rfkill.  Therefore, the driver must make sure that it calls
-rfkill_force_state() to keep the status always up-to-date, and it must do a
-rfkill_force_state() on resume from sleep.
+Platform drivers might implement input devices if the rfkill button is just
+that, a button. If that button influences the hardware then you need to
+implement an rfkill class instead. This also applies if the platform provides
+a way to turn on/off the transmitter(s).
 
-Every time the driver gets a notification from the card that one of its rfkill
-lines changed state (polling might be needed on badly designed cards that don't
-generate interrupts for such events), it recomputes the rfkill state as per
-above, and calls rfkill_force_state() to update it.
+During suspend/hibernation, transmitters should only be left enabled when
+wake-on wlan or similar functionality requires it and the device wasn't
+blocked before suspend/hibernate. Note that it may be necessary to update
+the rfkill subsystem's idea of what the current state is at resume time if
+the state may have changed over suspend.
 
-The driver should implement the toggle_radio() hook, that:
 
-1. Returns an error if one of the hardware rfkill lines are active, and the
-caller asked for RFKILL_STATE_UNBLOCKED.
 
-2. Activates the soft rfkill line if the caller asked for state
-RFKILL_STATE_SOFT_BLOCKED.  It should do this even if one of the hard rfkill
-lines are active, effectively double-blocking the transmitter.
-
-3. Deactivates the soft rfkill line if none of the hardware rfkill lines are
-active and the caller asked for RFKILL_STATE_UNBLOCKED.
-
-===============================================================================
-4: Kernel API
+4. Kernel API
 
 To build a driver with rfkill subsystem support, the driver should depend on
-(or select) the Kconfig symbol RFKILL; it should _not_ depend on RKFILL_INPUT.
+(or select) the Kconfig symbol RFKILL.
 
 The hardware the driver talks to may be write-only (where the current state
 of the hardware is unknown), or read-write (where the hardware can be queried
 about its current state).
 
-The rfkill class will call the get_state hook of a device every time it needs
-to know the *real* current state of the hardware.  This can happen often, but
-it does not do any polling, so it is not enough on hardware that is subject
-to state changes outside of the rfkill subsystem.
-
-Therefore, calling rfkill_force_state() when a state change happens is
-mandatory when the device has a hardware rfkill line, or when something else
-like the firmware could cause its state to be changed without going through the
-rfkill class.
-
-Some hardware provides events when its status changes.  In these cases, it is
-best for the driver to not provide a get_state hook, and instead register the
-rfkill class *already* with the correct status, and keep it updated using
-rfkill_force_state() when it gets an event from the hardware.
-
-rfkill_force_state() must be used on the device resume handlers to update the
-rfkill status, should there be any chance of the device status changing during
-the sleep.
-
-There is no provision for a statically-allocated rfkill struct.  You must
-use rfkill_allocate() to allocate one.
-
-You should:
-	- rfkill_allocate()
-	- modify rfkill fields (flags, name)
-	- modify state to the current hardware state (THIS IS THE ONLY TIME
-	  YOU CAN ACCESS state DIRECTLY)
-	- rfkill_register()
+Calling rfkill_set_hw_state() when a state change happens is required from
+rfkill drivers that control devices that can be hard-blocked unless they also
+assign the poll_hw_block() callback (then the rfkill core will poll the
+device). Don't do this unless you cannot get the event in any other way.
 
-The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through
-a suitable return of get_state() or through rfkill_force_state().
 
-When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch
-it to a different state is through a suitable return of get_state() or through
-rfkill_force_state().
 
-If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED
-when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should
-not return an error.  Instead, it should try to double-block the transmitter,
-so that its state will change from RFKILL_STATE_HARD_BLOCKED to
-RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease.
-
-Please refer to the source for more documentation.
-
-===============================================================================
-5: Userspace support
-
-rfkill devices issue uevents (with an action of "change"), with the following
-environment variables set:
-
-RFKILL_NAME
-RFKILL_STATE
-RFKILL_TYPE
+5. Userspace support
 
-The ABI for these variables is defined by the sysfs attributes.  It is best
-to take a quick look at the source to make sure of the possible values.
-
-It is expected that HAL will trap those, and bridge them to DBUS, etc.  These
-events CAN and SHOULD be used to give feedback to the user about the rfkill
-status of the system.
-
-Input devices may issue events that are related to rfkill.  These are the
-various KEY_* events and SW_* events supported by rfkill-input.c.
-
-Userspace may not change the state of an rfkill switch in response to an
-input event, it should refrain from changing states entirely.
-
-Userspace cannot assume it is the only source of control for rfkill switches.
-Their state can change due to firmware actions, direct user actions, and the
-rfkill-input EPO override for *_RFKILL_ALL.
-
-When rfkill-input is not active, userspace must initiate a rfkill status
-change by writing to the "state" attribute in order for anything to happen.
-
-Take particular care to implement EV_SW SW_RFKILL_ALL properly.  When that
-switch is set to OFF, *every* rfkill device *MUST* be immediately put into the
-RFKILL_STATE_SOFT_BLOCKED state, no questions asked.
-
-The following sysfs entries will be created:
+The following sysfs entries exist for every rfkill device:
 
 	name: Name assigned by driver to this key (interface or driver name).
 	type: Name of the key type ("wlan", "bluetooth", etc).
 	state: Current state of the transmitter
 		0: RFKILL_STATE_SOFT_BLOCKED
-			transmitter is forced off, but one can override it
-			by a write to the state attribute;
+			transmitter is turned off by software
 		1: RFKILL_STATE_UNBLOCKED
-			transmiter is NOT forced off, and may operate if
-			all other conditions for such operation are met
-			(such as interface is up and configured, etc);
+			transmiter is (potentially) active
 		2: RFKILL_STATE_HARD_BLOCKED
 			transmitter is forced off by something outside of
-			the driver's control.  One cannot set a device to
-			this state through writes to the state attribute;
-	claim: 1: Userspace handles events, 0: Kernel handles events
-
-Both the "state" and "claim" entries are also writable. For the "state" entry
-this means that when 1 or 0 is written, the device rfkill state (if not yet in
-the requested state), will be will be toggled accordingly.
-
-For the "claim" entry writing 1 to it means that the kernel no longer handles
-key events even though RFKILL_INPUT input was enabled. When "claim" has been
-set to 0, userspace should make sure that it listens for the input events or
-check the sysfs "state" entry regularly to correctly perform the required tasks
-when the rkfill key is pressed.
-
-A note about input devices and EV_SW events:
-
-In order to know the current state of an input device switch (like
-SW_RFKILL_ALL), you will need to use an IOCTL.  That information is not
-available through sysfs in a generic way at this time, and it is not available
-through the rfkill class AT ALL.
+			the driver's control.
+	claim: 0: Kernel handles events (currently always reads that value)
+
+rfkill devices also issue uevents (with an action of "change"), with the
+following environment variables set:
+
+RFKILL_NAME
+RFKILL_STATE
+RFKILL_TYPE
+
+The contents of these variables corresponds to the "name", "state" and
+"type" sysfs files explained above.
diff --git a/MAINTAINERS b/MAINTAINERS
index e18baa410b5..2f6a8fcfb1f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4753,9 +4753,9 @@ S:	Supported
 F:	fs/reiserfs/
 
 RFKILL
-P:	Ivo van Doorn
-M:	IvDoorn@gmail.com
-L:	netdev@vger.kernel.org
+P:	Johannes Berg
+M:	johannes@sipsolutions.net
+L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F	Documentation/rfkill.txt
 F:	net/rfkill/
diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c
index bde42aa2937..c31e601eb49 100644
--- a/arch/arm/mach-pxa/tosa-bt.c
+++ b/arch/arm/mach-pxa/tosa-bt.c
@@ -35,21 +35,25 @@ static void tosa_bt_off(struct tosa_bt_data *data)
 	gpio_set_value(data->gpio_reset, 0);
 }
 
-static int tosa_bt_toggle_radio(void *data, enum rfkill_state state)
+static int tosa_bt_set_block(void *data, bool blocked)
 {
-	pr_info("BT_RADIO going: %s\n",
-			state == RFKILL_STATE_UNBLOCKED ? "on" : "off");
+	pr_info("BT_RADIO going: %s\n", blocked ? "off" : "on");
 
-	if (state == RFKILL_STATE_UNBLOCKED) {
+	if (!blocked) {
 		pr_info("TOSA_BT: going ON\n");
 		tosa_bt_on(data);
 	} else {
 		pr_info("TOSA_BT: going OFF\n");
 		tosa_bt_off(data);
 	}
+
 	return 0;
 }
 
+static const struct rfkill_ops tosa_bt_rfkill_ops = {
+	.set_block = tosa_bt_set_block,
+};
+
 static int tosa_bt_probe(struct platform_device *dev)
 {
 	int rc;
@@ -70,18 +74,14 @@ static int tosa_bt_probe(struct platform_device *dev)
 	if (rc)
 		goto err_pwr_dir;
 
-	rfk = rfkill_allocate(&dev->dev, RFKILL_TYPE_BLUETOOTH);
+	rfk = rfkill_alloc("tosa-bt", &dev->dev, RFKILL_TYPE_BLUETOOTH,
+			   &tosa_bt_rfkill_ops, data);
 	if (!rfk) {
 		rc = -ENOMEM;
 		goto err_rfk_alloc;
 	}
 
-	rfk->name = "tosa-bt";
-	rfk->toggle_radio = tosa_bt_toggle_radio;
-	rfk->data = data;
-#ifdef CONFIG_RFKILL_LEDS
-	rfk->led_trigger.name = "tosa-bt";
-#endif
+	rfkill_set_led_trigger_name(rfk, "tosa-bt");
 
 	rc = rfkill_register(rfk);
 	if (rc)
@@ -92,9 +92,7 @@ static int tosa_bt_probe(struct platform_device *dev)
 	return 0;
 
 err_rfkill:
-	if (rfk)
-		rfkill_free(rfk);
-	rfk = NULL;
+	rfkill_destroy(rfk);
 err_rfk_alloc:
 	tosa_bt_off(data);
 err_pwr_dir:
@@ -113,8 +111,10 @@ static int __devexit tosa_bt_remove(struct platform_device *dev)
 
 	platform_set_drvdata(dev, NULL);
 
-	if (rfk)
+	if (rfk) {
 		rfkill_unregister(rfk);
+		rfkill_destroy(rfk);
+	}
 	rfk = NULL;
 
 	tosa_bt_off(data);
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index afac5b6d3d7..58ce807fe44 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -31,7 +31,6 @@
 #include <linux/input.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
-#include <linux/rfkill.h>
 #include <linux/spi/spi.h>
 
 #include <asm/setup.h>
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 837135f0390..5ddd8c4f901 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2481,10 +2481,10 @@ static int add_net_device(struct hso_device *hso_dev)
 	return 0;
 }
 
-static int hso_radio_toggle(void *data, enum rfkill_state state)
+static int hso_rfkill_set_block(void *data, bool blocked)
 {
 	struct hso_device *hso_dev = data;
-	int enabled = (state == RFKILL_STATE_UNBLOCKED);
+	int enabled = !blocked;
 	int rv;
 
 	mutex_lock(&hso_dev->mutex);
@@ -2498,6 +2498,10 @@ static int hso_radio_toggle(void *data, enum rfkill_state state)
 	return rv;
 }
 
+static const struct rfkill_ops hso_rfkill_ops = {
+	.set_block = hso_rfkill_set_block,
+};
+
 /* Creates and sets up everything for rfkill */
 static void hso_create_rfkill(struct hso_device *hso_dev,
 			     struct usb_interface *interface)
@@ -2506,29 +2510,25 @@ static void hso_create_rfkill(struct hso_device *hso_dev,
 	struct device *dev = &hso_net->net->dev;
 	char *rfkn;
 
-	hso_net->rfkill = rfkill_allocate(&interface_to_usbdev(interface)->dev,
-				 RFKILL_TYPE_WWAN);
-	if (!hso_net->rfkill) {
-		dev_err(dev, "%s - Out of memory\n", __func__);
-		return;
-	}
 	rfkn = kzalloc(20, GFP_KERNEL);
-	if (!rfkn) {
-		rfkill_free(hso_net->rfkill);
-		hso_net->rfkill = NULL;
+	if (!rfkn)
 		dev_err(dev, "%s - Out of memory\n", __func__);
-		return;
-	}
+
 	snprintf(rfkn, 20, "hso-%d",
 		 interface->altsetting->desc.bInterfaceNumber);
-	hso_net->rfkill->name = rfkn;
-	hso_net->rfkill->state = RFKILL_STATE_UNBLOCKED;
-	hso_net->rfkill->data = hso_dev;
-	hso_net->rfkill->toggle_radio = hso_radio_toggle;
+
+	hso_net->rfkill = rfkill_alloc(rfkn,
+				       &interface_to_usbdev(interface)->dev,
+				       RFKILL_TYPE_WWAN,
+				       &hso_rfkill_ops, hso_dev);
+	if (!hso_net->rfkill) {
+		dev_err(dev, "%s - Out of memory\n", __func__);
+		kfree(rfkn);
+		return;
+	}
 	if (rfkill_register(hso_net->rfkill) < 0) {
+		rfkill_destroy(hso_net->rfkill);
 		kfree(rfkn);
-		hso_net->rfkill->name = NULL;
-		rfkill_free(hso_net->rfkill);
 		hso_net->rfkill = NULL;
 		dev_err(dev, "%s - Failed to register rfkill\n", __func__);
 		return;
@@ -3165,8 +3165,10 @@ static void hso_free_interface(struct usb_interface *interface)
 			hso_stop_net_device(network_table[i]);
 			cancel_work_sync(&network_table[i]->async_put_intf);
 			cancel_work_sync(&network_table[i]->async_get_intf);
-			if (rfk)
+			if (rfk) {
 				rfkill_unregister(rfk);
+				rfkill_destroy(rfk);
+			}
 			hso_free_net_device(network_table[i]);
 		}
 	}
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 796a3adffea..515880aa211 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -460,12 +460,9 @@ struct ath_led {
 	bool registered;
 };
 
-/* Rfkill */
-#define ATH_RFKILL_POLL_INTERVAL	2000 /* msecs */
-
 struct ath_rfkill {
 	struct rfkill *rfkill;
-	struct delayed_work rfkill_poll;
+	struct rfkill_ops ops;
 	char rfkill_name[32];
 };
 
@@ -509,8 +506,6 @@ struct ath_rfkill {
 #define SC_OP_RXFLUSH           BIT(7)
 #define SC_OP_LED_ASSOCIATED    BIT(8)
 #define SC_OP_RFKILL_REGISTERED BIT(9)
-#define SC_OP_RFKILL_SW_BLOCKED BIT(10)
-#define SC_OP_RFKILL_HW_BLOCKED BIT(11)
 #define SC_OP_WAIT_FOR_BEACON   BIT(12)
 #define SC_OP_LED_ON            BIT(13)
 #define SC_OP_SCANNING          BIT(14)
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 61da08a1648..f7baa406918 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1192,120 +1192,69 @@ static bool ath_is_rfkill_set(struct ath_softc *sc)
 				  ah->rfkill_polarity;
 }
 
-/* h/w rfkill poll function */
-static void ath_rfkill_poll(struct work_struct *work)
+/* s/w rfkill handlers */
+static int ath_rfkill_set_block(void *data, bool blocked)
 {
-	struct ath_softc *sc = container_of(work, struct ath_softc,
-					    rf_kill.rfkill_poll.work);
-	bool radio_on;
-
-	if (sc->sc_flags & SC_OP_INVALID)
-		return;
-
-	radio_on = !ath_is_rfkill_set(sc);
-
-	/*
-	 * enable/disable radio only when there is a
-	 * state change in RF switch
-	 */
-	if (radio_on == !!(sc->sc_flags & SC_OP_RFKILL_HW_BLOCKED)) {
-		enum rfkill_state state;
-
-		if (sc->sc_flags & SC_OP_RFKILL_SW_BLOCKED) {
-			state = radio_on ? RFKILL_STATE_SOFT_BLOCKED
-				: RFKILL_STATE_HARD_BLOCKED;
-		} else if (radio_on) {
-			ath_radio_enable(sc);
-			state = RFKILL_STATE_UNBLOCKED;
-		} else {
-			ath_radio_disable(sc);
-			state = RFKILL_STATE_HARD_BLOCKED;
-		}
-
-		if (state == RFKILL_STATE_HARD_BLOCKED)
-			sc->sc_flags |= SC_OP_RFKILL_HW_BLOCKED;
-		else
-			sc->sc_flags &= ~SC_OP_RFKILL_HW_BLOCKED;
+	struct ath_softc *sc = data;
 
-		rfkill_force_state(sc->rf_kill.rfkill, state);
-	}
+	if (blocked)
+		ath_radio_disable(sc);
+	else
+		ath_radio_enable(sc);
 
-	queue_delayed_work(sc->hw->workqueue, &sc->rf_kill.rfkill_poll,
-			   msecs_to_jiffies(ATH_RFKILL_POLL_INTERVAL));
+	return 0;
 }
 
-/* s/w rfkill handler */
-static int ath_sw_toggle_radio(void *data, enum rfkill_state state)
+static void ath_rfkill_poll_state(struct rfkill *rfkill, void *data)
 {
 	struct ath_softc *sc = data;
+	bool blocked = !!ath_is_rfkill_set(sc);
 
-	switch (state) {
-	case RFKILL_STATE_SOFT_BLOCKED:
-		if (!(sc->sc_flags & (SC_OP_RFKILL_HW_BLOCKED |
-		    SC_OP_RFKILL_SW_BLOCKED)))
-			ath_radio_disable(sc);
-		sc->sc_flags |= SC_OP_RFKILL_SW_BLOCKED;
-		return 0;
-	case RFKILL_STATE_UNBLOCKED:
-		if ((sc->sc_flags & SC_OP_RFKILL_SW_BLOCKED)) {
-			sc->sc_flags &= ~SC_OP_RFKILL_SW_BLOCKED;
-			if (sc->sc_flags & SC_OP_RFKILL_HW_BLOCKED) {
-				DPRINTF(sc, ATH_DBG_FATAL, "Can't turn on the"
-					"radio as it is disabled by h/w\n");
-				return -EPERM;
-			}
-			ath_radio_enable(sc);
-		}
-		return 0;
-	default:
-		return -EINVAL;
-	}
+	if (rfkill_set_hw_state(rfkill, blocked))
+		ath_radio_disable(sc);
+	else
+		ath_radio_enable(sc);
 }
 
 /* Init s/w rfkill */
 static int ath_init_sw_rfkill(struct ath_softc *sc)
 {
-	sc->rf_kill.rfkill = rfkill_allocate(wiphy_dev(sc->hw->wiphy),
-					     RFKILL_TYPE_WLAN);
+	sc->rf_kill.ops.set_block = ath_rfkill_set_block;
+	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
+		sc->rf_kill.ops.poll = ath_rfkill_poll_state;
+
+	snprintf(sc->rf_kill.rfkill_name, sizeof(sc->rf_kill.rfkill_name),
+		"ath9k-%s::rfkill", wiphy_name(sc->hw->wiphy));
+
+	sc->rf_kill.rfkill = rfkill_alloc(sc->rf_kill.rfkill_name,
+					  wiphy_dev(sc->hw->wiphy),
+					  RFKILL_TYPE_WLAN,
+					  &sc->rf_kill.ops, sc);
 	if (!sc->rf_kill.rfkill) {
 		DPRINTF(sc, ATH_DBG_FATAL, "Failed to allocate rfkill\n");
 		return -ENOMEM;
 	}
 
-	snprintf(sc->rf_kill.rfkill_name, sizeof(sc->rf_kill.rfkill_name),
-		"ath9k-%s::rfkill", wiphy_name(sc->hw->wiphy));
-	sc->rf_kill.rfkill->name = sc->rf_kill.rfkill_name;
-	sc->rf_kill.rfkill->data = sc;
-	sc->rf_kill.rfkill->toggle_radio = ath_sw_toggle_radio;
-	sc->rf_kill.rfkill->state = RFKILL_STATE_UNBLOCKED;
-
 	return 0;
 }
 
 /* Deinitialize rfkill */
 static void ath_deinit_rfkill(struct ath_softc *sc)
 {
-	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
-		cancel_delayed_work_sync(&sc->rf_kill.rfkill_poll);
-
 	if (sc->sc_flags & SC_OP_RFKILL_REGISTERED) {
 		rfkill_unregister(sc->rf_kill.rfkill);
+		rfkill_destroy(sc->rf_kill.rfkill);
 		sc->sc_flags &= ~SC_OP_RFKILL_REGISTERED;
-		sc->rf_kill.rfkill = NULL;
 	}
 }
 
 static int ath_start_rfkill_poll(struct ath_softc *sc)
 {
-	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
-		queue_delayed_work(sc->hw->workqueue,
-				   &sc->rf_kill.rfkill_poll, 0);
-
 	if (!(sc->sc_flags & SC_OP_RFKILL_REGISTERED)) {
 		if (rfkill_register(sc->rf_kill.rfkill)) {
 			DPRINTF(sc, ATH_DBG_FATAL,
 				"Unable to register rfkill\n");
-			rfkill_free(sc->rf_kill.rfkill);
+			rfkill_destroy(sc->rf_kill.rfkill);
 
 			/* Deinitialize the device */
 			ath_cleanup(sc);
@@ -1678,10 +1627,6 @@ int ath_attach(u16 devid, struct ath_softc *sc)
 		goto error_attach;
 
 #if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
-	/* Initialze h/w Rfkill */
-	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
-		INIT_DELAYED_WORK(&sc->rf_kill.rfkill_poll, ath_rfkill_poll);
-
 	/* Initialize s/w rfkill */
 	error = ath_init_sw_rfkill(sc);
 	if (error)
@@ -2214,10 +2159,8 @@ static void ath9k_stop(struct ieee80211_hw *hw)
 	} else
 		sc->rx.rxlink = NULL;
 
-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
-	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
-		cancel_delayed_work_sync(&sc->rf_kill.rfkill_poll);
-#endif
+	rfkill_pause_polling(sc->rf_kill.rfkill);
+
 	/* disable HAL and put h/w to sleep */
 	ath9k_hw_disable(sc->sc_ah);
 	ath9k_hw_configpcipowersave(sc->sc_ah, 1);
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index 168411d322a..ccdf20a2e9b 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -227,11 +227,6 @@ static int ath_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	ath9k_hw_set_gpio(sc->sc_ah, ATH_LED_PIN, 1);
 
-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
-	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
-		cancel_delayed_work_sync(&sc->rf_kill.rfkill_poll);
-#endif
-
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_set_power_state(pdev, PCI_D3hot);
@@ -256,16 +251,6 @@ static int ath_pci_resume(struct pci_dev *pdev)
 			    AR_GPIO_OUTPUT_MUX_AS_OUTPUT);
 	ath9k_hw_set_gpio(sc->sc_ah, ATH_LED_PIN, 1);
 
-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
-	/*
-	 * check the h/w rfkill state on resume
-	 * and start the rfkill poll timer
-	 */
-	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_RFSILENT)
-		queue_delayed_work(sc->hw->workqueue,
-				   &sc->rf_kill.rfkill_poll, 0);
-#endif
-
 	return 0;
 }
 
diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index 21572e40b79..07a99e3faf9 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -102,7 +102,7 @@ config B43_LEDS
 # if it's possible.
 config B43_RFKILL
 	bool
-	depends on B43 && (RFKILL = y || RFKILL = B43) && RFKILL_INPUT && (INPUT_POLLDEV = y || INPUT_POLLDEV = B43)
+	depends on B43 && (RFKILL = y || RFKILL = B43)
 	default y
 
 # This config option automatically enables b43 HW-RNG support,
diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c
index 76f4c7bad8b..9a498d3fc65 100644
--- a/drivers/net/wireless/b43/leds.c
+++ b/drivers/net/wireless/b43/leds.c
@@ -87,7 +87,7 @@ static void b43_led_brightness_set(struct led_classdev *led_dev,
 }
 
 static int b43_register_led(struct b43_wldev *dev, struct b43_led *led,
-			    const char *name, char *default_trigger,
+			    const char *name, const char *default_trigger,
 			    u8 led_index, bool activelow)
 {
 	int err;
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index cb4a8712946..1d3e40095ad 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3470,7 +3470,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed)
 
 	if (!!conf->radio_enabled != phy->radio_on) {
 		if (conf->radio_enabled) {
-			b43_software_rfkill(dev, RFKILL_STATE_UNBLOCKED);
+			b43_software_rfkill(dev, false);
 			b43info(dev->wl, "Radio turned on by software\n");
 			if (!dev->radio_hw_enable) {
 				b43info(dev->wl, "The hardware RF-kill button "
@@ -3478,7 +3478,7 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed)
 					"Press the button to turn it on.\n");
 			}
 		} else {
-			b43_software_rfkill(dev, RFKILL_STATE_SOFT_BLOCKED);
+			b43_software_rfkill(dev, true);
 			b43info(dev->wl, "Radio turned off by software\n");
 		}
 	}
diff --git a/drivers/net/wireless/b43/phy_a.c b/drivers/net/wireless/b43/phy_a.c
index c836c077d51..816e028a262 100644
--- a/drivers/net/wireless/b43/phy_a.c
+++ b/drivers/net/wireless/b43/phy_a.c
@@ -480,11 +480,11 @@ static bool b43_aphy_op_supports_hwpctl(struct b43_wldev *dev)
 }
 
 static void b43_aphy_op_software_rfkill(struct b43_wldev *dev,
-					enum rfkill_state state)
+					bool blocked)
 {
 	struct b43_phy *phy = &dev->phy;
 
-	if (state == RFKILL_STATE_UNBLOCKED) {
+	if (!blocked) {
 		if (phy->radio_on)
 			return;
 		b43_radio_write16(dev, 0x0004, 0x00C0);
diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c
index e176b6e0d9c..6d241622210 100644
--- a/drivers/net/wireless/b43/phy_common.c
+++ b/drivers/net/wireless/b43/phy_common.c
@@ -84,7 +84,7 @@ int b43_phy_init(struct b43_wldev *dev)
 
 	phy->channel = ops->get_default_chan(dev);
 
-	ops->software_rfkill(dev, RFKILL_STATE_UNBLOCKED);
+	ops->software_rfkill(dev, false);
 	err = ops->init(dev);
 	if (err) {
 		b43err(dev->wl, "PHY init failed\n");
@@ -104,7 +104,7 @@ err_phy_exit:
 	if (ops->exit)
 		ops->exit(dev);
 err_block_rf:
-	ops->software_rfkill(dev, RFKILL_STATE_SOFT_BLOCKED);
+	ops->software_rfkill(dev, true);
 
 	return err;
 }
@@ -113,7 +113,7 @@ void b43_phy_exit(struct b43_wldev *dev)
 {
 	const struct b43_phy_operations *ops = dev->phy.ops;
 
-	ops->software_rfkill(dev, RFKILL_STATE_SOFT_BLOCKED);
+	ops->software_rfkill(dev, true);
 	if (ops->exit)
 		ops->exit(dev);
 }
@@ -295,18 +295,13 @@ err_restore_cookie:
 	return err;
 }
 
-void b43_software_rfkill(struct b43_wldev *dev, enum rfkill_state state)
+void b43_software_rfkill(struct b43_wldev *dev, bool blocked)
 {
 	struct b43_phy *phy = &dev->phy;
 
-	if (state == RFKILL_STATE_HARD_BLOCKED) {
-		/* We cannot hardware-block the device */
-		state = RFKILL_STATE_SOFT_BLOCKED;
-	}
-
 	b43_mac_suspend(dev);
-	phy->ops->software_rfkill(dev, state);
-	phy->radio_on = (state == RFKILL_STATE_UNBLOCKED);
+	phy->ops->software_rfkill(dev, blocked);
+	phy->radio_on = !blocked;
 	b43_mac_enable(dev);
 }
 
diff --git a/drivers/net/wireless/b43/phy_common.h b/drivers/net/wireless/b43/phy_common.h
index b2d99101947..f4c2d79cbc8 100644
--- a/drivers/net/wireless/b43/phy_common.h
+++ b/drivers/net/wireless/b43/phy_common.h
@@ -159,7 +159,7 @@ struct b43_phy_operations {
 
 	/* Radio */
 	bool (*supports_hwpctl)(struct b43_wldev *dev);
-	void (*software_rfkill)(struct b43_wldev *dev, enum rfkill_state state);
+	void (*software_rfkill)(struct b43_wldev *dev, bool blocked);
 	void (*switch_analog)(struct b43_wldev *dev, bool on);
 	int (*switch_channel)(struct b43_wldev *dev, unsigned int new_channel);
 	unsigned int (*get_default_chan)(struct b43_wldev *dev);
@@ -364,7 +364,7 @@ int b43_switch_channel(struct b43_wldev *dev, unsigned int new_channel);
 /**
  * b43_software_rfkill - Turn the radio ON or OFF in software.
  */
-void b43_software_rfkill(struct b43_wldev *dev, enum rfkill_state state);
+void b43_software_rfkill(struct b43_wldev *dev, bool blocked);
 
 /**
  * b43_phy_txpower_check - Check TX power output.
diff --git a/drivers/net/wireless/b43/phy_g.c b/drivers/net/wireless/b43/phy_g.c
index e7b98f013b0..5300232449f 100644
--- a/drivers/net/wireless/b43/phy_g.c
+++ b/drivers/net/wireless/b43/phy_g.c
@@ -2592,7 +2592,7 @@ static bool b43_gphy_op_supports_hwpctl(struct b43_wldev *dev)
 }
 
 static void b43_gphy_op_software_rfkill(struct b43_wldev *dev,
-					enum rfkill_state state)
+					bool blocked)
 {
 	struct b43_phy *phy = &dev->phy;
 	struct b43_phy_g *gphy = phy->g;
@@ -2600,7 +2600,7 @@ static void b43_gphy_op_software_rfkill(struct b43_wldev *dev,
 
 	might_sleep();
 
-	if (state == RFKILL_STATE_UNBLOCKED) {
+	if (!blocked) {
 		/* Turn radio ON */
 		if (phy->radio_on)
 			return;
diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c
index 58e319d6b1e..ea0d3a3a6a6 100644
--- a/drivers/net/wireless/b43/phy_lp.c
+++ b/drivers/net/wireless/b43/phy_lp.c
@@ -488,7 +488,7 @@ static void b43_lpphy_op_radio_write(struct b43_wldev *dev, u16 reg, u16 value)
 }
 
 static void b43_lpphy_op_software_rfkill(struct b43_wldev *dev,
-					 enum rfkill_state state)
+					 bool blocked)
 {
 	//TODO
 }
diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c
index 8bcfda5f3f0..be7b5604947 100644
--- a/drivers/net/wireless/b43/phy_n.c
+++ b/drivers/net/wireless/b43/phy_n.c
@@ -579,7 +579,7 @@ static void b43_nphy_op_radio_write(struct b43_wldev *dev, u16 reg, u16 value)
 }
 
 static void b43_nphy_op_software_rfkill(struct b43_wldev *dev,
-					enum rfkill_state state)
+					bool blocked)
 {//TODO
 }
 
diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c
index 9e1d00bc24d..96047843cd5 100644
--- a/drivers/net/wireless/b43/rfkill.c
+++ b/drivers/net/wireless/b43/rfkill.c
@@ -45,12 +45,11 @@ static bool b43_is_hw_radio_enabled(struct b43_wldev *dev)
 }
 
 /* The poll callback for the hardware button. */
-static void b43_rfkill_poll(struct input_polled_dev *poll_dev)
+static void b43_rfkill_poll(struct rfkill *rfkill, void *data)
 {
-	struct b43_wldev *dev = poll_dev->private;
+	struct b43_wldev *dev = data;
 	struct b43_wl *wl = dev->wl;
 	bool enabled;
-	bool report_change = 0;
 
 	mutex_lock(&wl->mutex);
 	if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED)) {
@@ -60,68 +59,55 @@ static void b43_rfkill_poll(struct input_polled_dev *poll_dev)
 	enabled = b43_is_hw_radio_enabled(dev);
 	if (unlikely(enabled != dev->radio_hw_enable)) {
 		dev->radio_hw_enable = enabled;
-		report_change = 1;
 		b43info(wl, "Radio hardware status changed to %s\n",
 			enabled ? "ENABLED" : "DISABLED");
+		enabled = !rfkill_set_hw_state(rfkill, !enabled);
+		if (enabled != dev->phy.radio_on)
+			b43_software_rfkill(dev, !enabled);
 	}
 	mutex_unlock(&wl->mutex);
-
-	/* send the radio switch event to the system - note both a key press
-	 * and a release are required */
-	if (unlikely(report_change)) {
-		input_report_key(poll_dev->input, KEY_WLAN, 1);
-		input_report_key(poll_dev->input, KEY_WLAN, 0);
-	}
 }
 
 /* Called when the RFKILL toggled in software. */
-static int b43_rfkill_soft_toggle(void *data, enum rfkill_state state)
+static int b43_rfkill_soft_set(void *data, bool blocked)
 {
 	struct b43_wldev *dev = data;
 	struct b43_wl *wl = dev->wl;
-	int err = -EBUSY;
+	int err = -EINVAL;
 
-	if (!wl->rfkill.registered)
-		return 0;
+	if (WARN_ON(!wl->rfkill.registered))
+		return -EINVAL;
 
 	mutex_lock(&wl->mutex);
+
 	if (b43_status(dev) < B43_STAT_INITIALIZED)
 		goto out_unlock;
+
+	if (!dev->radio_hw_enable)
+		goto out_unlock;
+
+	if (!blocked != dev->phy.radio_on)
+		b43_software_rfkill(dev, blocked);
 	err = 0;
-	switch (state) {
-	case RFKILL_STATE_UNBLOCKED:
-		if (!dev->radio_hw_enable) {
-			/* No luck. We can't toggle the hardware RF-kill
-			 * button from software. */
-			err = -EBUSY;
-			goto out_unlock;
-		}
-		if (!dev->phy.radio_on)
-			b43_software_rfkill(dev, state);
-		break;
-	case RFKILL_STATE_SOFT_BLOCKED:
-		if (dev->phy.radio_on)
-			b43_software_rfkill(dev, state);
-		break;
-	default:
-		b43warn(wl, "Received unexpected rfkill state %d.\n", state);
-		break;
-	}
 out_unlock:
 	mutex_unlock(&wl->mutex);
-
 	return err;
 }
 
-char *b43_rfkill_led_name(struct b43_wldev *dev)
+const char *b43_rfkill_led_name(struct b43_wldev *dev)
 {
 	struct b43_rfkill *rfk = &(dev->wl->rfkill);
 
 	if (!rfk->registered)
 		return NULL;
-	return rfkill_get_led_name(rfk->rfkill);
+	return rfkill_get_led_trigger_name(rfk->rfkill);
 }
 
+static const struct rfkill_ops b43_rfkill_ops = {
+	.set_block = b43_rfkill_soft_set,
+	.poll = b43_rfkill_poll,
+};
+
 void b43_rfkill_init(struct b43_wldev *dev)
 {
 	struct b43_wl *wl = dev->wl;
@@ -130,65 +116,26 @@ void b43_rfkill_init(struct b43_wldev *dev)
 
 	rfk->registered = 0;
 
-	rfk->rfkill = rfkill_allocate(dev->dev->dev, RFKILL_TYPE_WLAN);
-	if (!rfk->rfkill)
-		goto out_error;
 	snprintf(rfk->name, sizeof(rfk->name),
 		 "b43-%s", wiphy_name(wl->hw->wiphy));
-	rfk->rfkill->name = rfk->name;
-	rfk->rfkill->state = RFKILL_STATE_UNBLOCKED;
-	rfk->rfkill->data = dev;
-	rfk->rfkill->toggle_radio = b43_rfkill_soft_toggle;
-
-	rfk->poll_dev = input_allocate_polled_device();
-	if (!rfk->poll_dev) {
-		rfkill_free(rfk->rfkill);
-		goto err_freed_rfk;
-	}
-
-	rfk->poll_dev->private = dev;
-	rfk->poll_dev->poll = b43_rfkill_poll;
-	rfk->poll_dev->poll_interval = 1000; /* msecs */
 
-	rfk->poll_dev->input->name = rfk->name;
-	rfk->poll_dev->input->id.bustype = BUS_HOST;
-	rfk->poll_dev->input->id.vendor = dev->dev->bus->boardinfo.vendor;
-	rfk->poll_dev->input->evbit[0] = BIT(EV_KEY);
-	set_bit(KEY_WLAN, rfk->poll_dev->input->keybit);
+	rfk->rfkill = rfkill_alloc(rfk->name,
+				   dev->dev->dev,
+				   RFKILL_TYPE_WLAN,
+				   &b43_rfkill_ops, dev);
+	if (!rfk->rfkill)
+		goto out_error;
 
 	err = rfkill_register(rfk->rfkill);
 	if (err)
-		goto err_free_polldev;
-
-#ifdef CONFIG_RFKILL_INPUT_MODULE
-	/* B43 RF-kill isn't useful without the rfkill-input subsystem.
-	 * Try to load the module. */
-	err = request_module("rfkill-input");
-	if (err)
-		b43warn(wl, "Failed to load the rfkill-input module. "
-			"The built-in radio LED will not work.\n");
-#endif /* CONFIG_RFKILL_INPUT */
-
-#if !defined(CONFIG_RFKILL_INPUT) && !defined(CONFIG_RFKILL_INPUT_MODULE)
-	b43warn(wl, "The rfkill-input subsystem is not available. "
-		"The built-in radio LED will not work.\n");
-#endif
-
-	err = input_register_polled_device(rfk->poll_dev);
-	if (err)
-		goto err_unreg_rfk;
+		goto err_free;
 
 	rfk->registered = 1;
 
 	return;
-err_unreg_rfk:
-	rfkill_unregister(rfk->rfkill);
-err_free_polldev:
-	input_free_polled_device(rfk->poll_dev);
-	rfk->poll_dev = NULL;
-err_freed_rfk:
-	rfk->rfkill = NULL;
-out_error:
+ err_free:
+	rfkill_destroy(rfk->rfkill);
+ out_error:
 	rfk->registered = 0;
 	b43warn(wl, "RF-kill button init failed\n");
 }
@@ -201,9 +148,7 @@ void b43_rfkill_exit(struct b43_wldev *dev)
 		return;
 	rfk->registered = 0;
 
-	input_unregister_polled_device(rfk->poll_dev);
 	rfkill_unregister(rfk->rfkill);
-	input_free_polled_device(rfk->poll_dev);
-	rfk->poll_dev = NULL;
+	rfkill_destroy(rfk->rfkill);
 	rfk->rfkill = NULL;
 }
diff --git a/drivers/net/wireless/b43/rfkill.h b/drivers/net/wireless/b43/rfkill.h
index adacf936d81..da497e01bbb 100644
--- a/drivers/net/wireless/b43/rfkill.h
+++ b/drivers/net/wireless/b43/rfkill.h
@@ -7,14 +7,11 @@ struct b43_wldev;
 #ifdef CONFIG_B43_RFKILL
 
 #include <linux/rfkill.h>
-#include <linux/input-polldev.h>
 
 
 struct b43_rfkill {
 	/* The RFKILL subsystem data structure */
 	struct rfkill *rfkill;
-	/* The poll device for the RFKILL input button */
-	struct input_polled_dev *poll_dev;
 	/* Did initialization succeed? Used for freeing. */
 	bool registered;
 	/* The unique name of this rfkill switch */
@@ -26,7 +23,7 @@ struct b43_rfkill {
 void b43_rfkill_init(struct b43_wldev *dev);
 void b43_rfkill_exit(struct b43_wldev *dev);
 
-char * b43_rfkill_led_name(struct b43_wldev *dev);
+const char *b43_rfkill_led_name(struct b43_wldev *dev);
 
 
 #else /* CONFIG_B43_RFKILL */
diff --git a/drivers/net/wireless/b43legacy/Kconfig b/drivers/net/wireless/b43legacy/Kconfig
index d4f628a74bb..6893f439df7 100644
--- a/drivers/net/wireless/b43legacy/Kconfig
+++ b/drivers/net/wireless/b43legacy/Kconfig
@@ -47,7 +47,7 @@ config B43LEGACY_LEDS
 # if it's possible.
 config B43LEGACY_RFKILL
 	bool
-	depends on B43LEGACY && (RFKILL = y || RFKILL = B43LEGACY) && RFKILL_INPUT && (INPUT_POLLDEV = y || INPUT_POLLDEV = B43LEGACY)
+	depends on B43LEGACY && (RFKILL = y || RFKILL = B43LEGACY)
 	default y
 
 # This config option automatically enables b43 HW-RNG support,
diff --git a/drivers/net/wireless/b43legacy/leds.c b/drivers/net/wireless/b43legacy/leds.c
index 3ea55b18c70..538d3117594 100644
--- a/drivers/net/wireless/b43legacy/leds.c
+++ b/drivers/net/wireless/b43legacy/leds.c
@@ -86,7 +86,8 @@ static void b43legacy_led_brightness_set(struct led_classdev *led_dev,
 
 static int b43legacy_register_led(struct b43legacy_wldev *dev,
 				  struct b43legacy_led *led,
-				  const char *name, char *default_trigger,
+				  const char *name,
+				  const char *default_trigger,
 				  u8 led_index, bool activelow)
 {
 	int err;
diff --git a/drivers/net/wireless/b43legacy/rfkill.c b/drivers/net/wireless/b43legacy/rfkill.c
index 4b0c7d27a51..c6230a64505 100644
--- a/drivers/net/wireless/b43legacy/rfkill.c
+++ b/drivers/net/wireless/b43legacy/rfkill.c
@@ -45,12 +45,11 @@ static bool b43legacy_is_hw_radio_enabled(struct b43legacy_wldev *dev)
 }
 
 /* The poll callback for the hardware button. */
-static void b43legacy_rfkill_poll(struct input_polled_dev *poll_dev)
+static void b43legacy_rfkill_poll(struct rfkill *rfkill, void *data)
 {
-	struct b43legacy_wldev *dev = poll_dev->private;
+	struct b43legacy_wldev *dev = data;
 	struct b43legacy_wl *wl = dev->wl;
 	bool enabled;
-	bool report_change = 0;
 
 	mutex_lock(&wl->mutex);
 	if (unlikely(b43legacy_status(dev) < B43legacy_STAT_INITIALIZED)) {
@@ -60,71 +59,64 @@ static void b43legacy_rfkill_poll(struct input_polled_dev *poll_dev)
 	enabled = b43legacy_is_hw_radio_enabled(dev);
 	if (unlikely(enabled != dev->radio_hw_enable)) {
 		dev->radio_hw_enable = enabled;
-		report_change = 1;
 		b43legacyinfo(wl, "Radio hardware status changed to %s\n",
 			enabled ? "ENABLED" : "DISABLED");
+		enabled = !rfkill_set_hw_state(rfkill, !enabled);
+		if (enabled != dev->phy.radio_on) {
+			if (enabled)
+				b43legacy_radio_turn_on(dev);
+			else
+				b43legacy_radio_turn_off(dev, 0);
+		}
 	}
 	mutex_unlock(&wl->mutex);
-
-	/* send the radio switch event to the system - note both a key press
-	 * and a release are required */
-	if (unlikely(report_change)) {
-		input_report_key(poll_dev->input, KEY_WLAN, 1);
-		input_report_key(poll_dev->input, KEY_WLAN, 0);
-	}
 }
 
 /* Called when the RFKILL toggled in software.
  * This is called without locking. */
-static int b43legacy_rfkill_soft_toggle(void *data, enum rfkill_state state)
+static int b43legacy_rfkill_soft_set(void *data, bool blocked)
 {
 	struct b43legacy_wldev *dev = data;
 	struct b43legacy_wl *wl = dev->wl;
-	int err = -EBUSY;
+	int ret = -EINVAL;
 
 	if (!wl->rfkill.registered)
-		return 0;
+		return -EINVAL;
 
 	mutex_lock(&wl->mutex);
 	if (b43legacy_status(dev) < B43legacy_STAT_INITIALIZED)
 		goto out_unlock;
-	err = 0;
-	switch (state) {
-	case RFKILL_STATE_UNBLOCKED:
-		if (!dev->radio_hw_enable) {
-			/* No luck. We can't toggle the hardware RF-kill
-			 * button from software. */
-			err = -EBUSY;
-			goto out_unlock;
-		}
-		if (!dev->phy.radio_on)
+
+	if (!dev->radio_hw_enable)
+		goto out_unlock;
+
+	if (!blocked != dev->phy.radio_on) {
+		if (!blocked)
 			b43legacy_radio_turn_on(dev);
-		break;
-	case RFKILL_STATE_SOFT_BLOCKED:
-		if (dev->phy.radio_on)
+		else
 			b43legacy_radio_turn_off(dev, 0);
-		break;
-	default:
-		b43legacywarn(wl, "Received unexpected rfkill state %d.\n",
-			      state);
-		break;
 	}
+	ret = 0;
 
 out_unlock:
 	mutex_unlock(&wl->mutex);
-
-	return err;
+	return ret;
 }
 
-char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev)
+const char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev)
 {
 	struct b43legacy_rfkill *rfk = &(dev->wl->rfkill);
 
 	if (!rfk->registered)
 		return NULL;
-	return rfkill_get_led_name(rfk->rfkill);
+	return rfkill_get_led_trigger_name(rfk->rfkill);
 }
 
+static const struct rfkill_ops b43legacy_rfkill_ops = {
+	.set_block = b43legacy_rfkill_soft_set,
+	.poll = b43legacy_rfkill_poll,
+};
+
 void b43legacy_rfkill_init(struct b43legacy_wldev *dev)
 {
 	struct b43legacy_wl *wl = dev->wl;
@@ -133,60 +125,25 @@ void b43legacy_rfkill_init(struct b43legacy_wldev *dev)
 
 	rfk->registered = 0;
 
-	rfk->rfkill = rfkill_allocate(dev->dev->dev, RFKILL_TYPE_WLAN);
-	if (!rfk->rfkill)
-		goto out_error;
 	snprintf(rfk->name, sizeof(rfk->name),
 		 "b43legacy-%s", wiphy_name(wl->hw->wiphy));
-	rfk->rfkill->name = rfk->name;
-	rfk->rfkill->state = RFKILL_STATE_UNBLOCKED;
-	rfk->rfkill->data = dev;
-	rfk->rfkill->toggle_radio = b43legacy_rfkill_soft_toggle;
-
-	rfk->poll_dev = input_allocate_polled_device();
-	if (!rfk->poll_dev) {
-		rfkill_free(rfk->rfkill);
-		goto err_freed_rfk;
-	}
-
-	rfk->poll_dev->private = dev;
-	rfk->poll_dev->poll = b43legacy_rfkill_poll;
-	rfk->poll_dev->poll_interval = 1000; /* msecs */
-
-	rfk->poll_dev->input->name = rfk->name;
-	rfk->poll_dev->input->id.bustype = BUS_HOST;
-	rfk->poll_dev->input->id.vendor = dev->dev->bus->boardinfo.vendor;
-	rfk->poll_dev->input->evbit[0] = BIT(EV_KEY);
-	set_bit(KEY_WLAN, rfk->poll_dev->input->keybit);
+	rfk->rfkill = rfkill_alloc(rfk->name,
+				   dev->dev->dev,
+				   RFKILL_TYPE_WLAN,
+				   &b43legacy_rfkill_ops, dev);
+	if (!rfk->rfkill)
+		goto out_error;
 
 	err = rfkill_register(rfk->rfkill);
 	if (err)
-		goto err_free_polldev;
-
-#ifdef CONFIG_RFKILL_INPUT_MODULE
-	/* B43legacy RF-kill isn't useful without the rfkill-input subsystem.
-	 * Try to load the module. */
-	err = request_module("rfkill-input");
-	if (err)
-		b43legacywarn(wl, "Failed to load the rfkill-input module."
-			"The built-in radio LED will not work.\n");
-#endif /* CONFIG_RFKILL_INPUT */
-
-	err = input_register_polled_device(rfk->poll_dev);
-	if (err)
-		goto err_unreg_rfk;
+		goto err_free;
 
 	rfk->registered = 1;
 
 	return;
-err_unreg_rfk:
-	rfkill_unregister(rfk->rfkill);
-err_free_polldev:
-	input_free_polled_device(rfk->poll_dev);
-	rfk->poll_dev = NULL;
-err_freed_rfk:
-	rfk->rfkill = NULL;
-out_error:
+ err_free:
+	rfkill_destroy(rfk->rfkill);
+ out_error:
 	rfk->registered = 0;
 	b43legacywarn(wl, "RF-kill button init failed\n");
 }
@@ -199,10 +156,8 @@ void b43legacy_rfkill_exit(struct b43legacy_wldev *dev)
 		return;
 	rfk->registered = 0;
 
-	input_unregister_polled_device(rfk->poll_dev);
 	rfkill_unregister(rfk->rfkill);
-	input_free_polled_device(rfk->poll_dev);
-	rfk->poll_dev = NULL;
+	rfkill_destroy(rfk->rfkill);
 	rfk->rfkill = NULL;
 }
 
diff --git a/drivers/net/wireless/b43legacy/rfkill.h b/drivers/net/wireless/b43legacy/rfkill.h
index 11150a8032f..adffc503a6a 100644
--- a/drivers/net/wireless/b43legacy/rfkill.h
+++ b/drivers/net/wireless/b43legacy/rfkill.h
@@ -6,16 +6,12 @@ struct b43legacy_wldev;
 #ifdef CONFIG_B43LEGACY_RFKILL
 
 #include <linux/rfkill.h>
-#include <linux/workqueue.h>
-#include <linux/input-polldev.h>
 
 
 struct b43legacy_rfkill {
 	/* The RFKILL subsystem data structure */
 	struct rfkill *rfkill;
-	/* The poll device for the RFKILL input button */
-	struct input_polled_dev *poll_dev;
 	/* Did initialization succeed? Used for freeing. */
 	bool registered;
 	/* The unique name of this rfkill switch */
@@ -27,7 +23,7 @@ struct b43legacy_rfkill {
 void b43legacy_rfkill_init(struct b43legacy_wldev *dev);
 void b43legacy_rfkill_exit(struct b43legacy_wldev *dev);
 
-char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev);
+const char *b43legacy_rfkill_led_name(struct b43legacy_wldev *dev);
 
 
 #else /* CONFIG_B43LEGACY_RFKILL */
diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
index 8304f6406a1..6fe259fcfb8 100644
--- a/drivers/net/wireless/iwlwifi/Kconfig
+++ b/drivers/net/wireless/iwlwifi/Kconfig
@@ -5,15 +5,14 @@ config IWLWIFI
 	select FW_LOADER
 	select MAC80211_LEDS if IWLWIFI_LEDS
 	select LEDS_CLASS if IWLWIFI_LEDS
-	select RFKILL if IWLWIFI_RFKILL
 
 config IWLWIFI_LEDS
 	bool "Enable LED support in iwlagn and iwl3945 drivers"
 	depends on IWLWIFI
 
 config IWLWIFI_RFKILL
-	bool "Enable RF kill support in iwlagn and iwl3945 drivers"
-	depends on IWLWIFI
+	def_bool y
+	depends on IWLWIFI && RFKILL
 
 config IWLWIFI_SPECTRUM_MEASUREMENT
 	bool "Enable Spectrum Measurement in iwlagn driver"
diff --git a/drivers/net/wireless/iwlwifi/iwl-rfkill.c b/drivers/net/wireless/iwlwifi/iwl-rfkill.c
index 65605ad44e4..13149936fd2 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rfkill.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rfkill.c
@@ -36,42 +36,37 @@
 #include "iwl-core.h"
 
 /* software rf-kill from user */
-static int iwl_rfkill_soft_rf_kill(void *data, enum rfkill_state state)
+static int iwl_rfkill_soft_rf_kill(void *data, bool blocked)
 {
 	struct iwl_priv *priv = data;
-	int err = 0;
 
 	if (!priv->rfkill)
-		return 0;
+		return -EINVAL;
 
 	if (test_bit(STATUS_EXIT_PENDING, &priv->status))
 		return 0;
 
-	IWL_DEBUG_RF_KILL(priv, "we received soft RFKILL set to state %d\n", state);
+	IWL_DEBUG_RF_KILL(priv, "received soft RFKILL: block=%d\n", blocked);
+
 	mutex_lock(&priv->mutex);
 
-	switch (state) {
-	case RFKILL_STATE_UNBLOCKED:
-		if (iwl_is_rfkill_hw(priv)) {
-			err = -EBUSY;
-			goto out_unlock;
-		}
+	if (iwl_is_rfkill_hw(priv))
+		goto out_unlock;
+
+	if (!blocked)
 		iwl_radio_kill_sw_enable_radio(priv);
-		break;
-	case RFKILL_STATE_SOFT_BLOCKED:
+	else
 		iwl_radio_kill_sw_disable_radio(priv);
-		break;
-	default:
-		IWL_WARN(priv, "we received unexpected RFKILL state %d\n",
-			state);
-		break;
-	}
+
 out_unlock:
 	mutex_unlock(&priv->mutex);
-
-	return err;
+	return 0;
 }
 
+static const struct rfkill_ops iwl_rfkill_ops = {
+	.set_block = iwl_rfkill_soft_rf_kill,
+};
+
 int iwl_rfkill_init(struct iwl_priv *priv)
 {
 	struct device *device = wiphy_dev(priv->hw->wiphy);
@@ -80,21 +75,16 @@ int iwl_rfkill_init(struct iwl_priv *priv)
 	BUG_ON(device == NULL);
 
 	IWL_DEBUG_RF_KILL(priv, "Initializing RFKILL.\n");
-	priv->rfkill = rfkill_allocate(device, RFKILL_TYPE_WLAN);
+	priv->rfkill = rfkill_alloc(priv->cfg->name,
+				    device,
+				    RFKILL_TYPE_WLAN,
+				    &iwl_rfkill_ops, priv);
 	if (!priv->rfkill) {
 		IWL_ERR(priv, "Unable to allocate RFKILL device.\n");
 		ret = -ENOMEM;
 		goto error;
 	}
 
-	priv->rfkill->name = priv->cfg->name;
-	priv->rfkill->data = priv;
-	priv->rfkill->state = RFKILL_STATE_UNBLOCKED;
-	priv->rfkill->toggle_radio = iwl_rfkill_soft_rf_kill;
-
-	priv->rfkill->dev.class->suspend = NULL;
-	priv->rfkill->dev.class->resume = NULL;
-
 	ret = rfkill_register(priv->rfkill);
 	if (ret) {
 		IWL_ERR(priv, "Unable to register RFKILL: %d\n", ret);
@@ -102,11 +92,10 @@ int iwl_rfkill_init(struct iwl_priv *priv)
 	}
 
 	IWL_DEBUG_RF_KILL(priv, "RFKILL initialization complete.\n");
-	return ret;
+	return 0;
 
 free_rfkill:
-	if (priv->rfkill != NULL)
-		rfkill_free(priv->rfkill);
+	rfkill_destroy(priv->rfkill);
 	priv->rfkill = NULL;
 
 error:
@@ -118,8 +107,10 @@ EXPORT_SYMBOL(iwl_rfkill_init);
 void iwl_rfkill_unregister(struct iwl_priv *priv)
 {
 
-	if (priv->rfkill)
+	if (priv->rfkill) {
 		rfkill_unregister(priv->rfkill);
+		rfkill_destroy(priv->rfkill);
+	}
 
 	priv->rfkill = NULL;
 }
@@ -131,14 +122,10 @@ void iwl_rfkill_set_hw_state(struct iwl_priv *priv)
 	if (!priv->rfkill)
 		return;
 
-	if (iwl_is_rfkill_hw(priv)) {
-		rfkill_force_state(priv->rfkill, RFKILL_STATE_HARD_BLOCKED);
-		return;
-	}
-
-	if (!iwl_is_rfkill_sw(priv))
-		rfkill_force_state(priv->rfkill, RFKILL_STATE_UNBLOCKED);
+	if (rfkill_set_hw_state(priv->rfkill,
+				!!iwl_is_rfkill_hw(priv)))
+		iwl_radio_kill_sw_disable_radio(priv);
 	else
-		rfkill_force_state(priv->rfkill, RFKILL_STATE_SOFT_BLOCKED);
+		iwl_radio_kill_sw_enable_radio(priv);
 }
 EXPORT_SYMBOL(iwl_rfkill_set_hw_state);
diff --git a/drivers/net/wireless/iwmc3200wifi/rfkill.c b/drivers/net/wireless/iwmc3200wifi/rfkill.c
index 4ca8b495f82..8ee2c3c09a0 100644
--- a/drivers/net/wireless/iwmc3200wifi/rfkill.c
+++ b/drivers/net/wireless/iwmc3200wifi/rfkill.c
@@ -25,47 +25,42 @@
 
 #include "iwm.h"
 
-static int iwm_rfkill_soft_toggle(void *data, enum rfkill_state state)
+static int iwm_rfkill_set_block(void *data, bool blocked)
 {
 	struct iwm_priv *iwm = data;
 
-	switch (state) {
-	case RFKILL_STATE_UNBLOCKED:
+	if (!blocked) {
 		if (test_bit(IWM_RADIO_RFKILL_HW, &iwm->radio))
 			return -EBUSY;
 
 		if (test_and_clear_bit(IWM_RADIO_RFKILL_SW, &iwm->radio) &&
 		    (iwm_to_ndev(iwm)->flags & IFF_UP))
-			iwm_up(iwm);
-
-		break;
-	case RFKILL_STATE_SOFT_BLOCKED:
+			return iwm_up(iwm);
+	} else {
 		if (!test_and_set_bit(IWM_RADIO_RFKILL_SW, &iwm->radio))
-			iwm_down(iwm);
-
-		break;
-	default:
-		break;
+			return iwm_down(iwm);
 	}
 
 	return 0;
 }
 
+static const struct rfkill_ops iwm_rfkill_ops = {
+	.set_block = iwm_rfkill_set_block,
+};
+
 int iwm_rfkill_init(struct iwm_priv *iwm)
 {
 	int ret;
 
-	iwm->rfkill = rfkill_allocate(iwm_to_dev(iwm), RFKILL_TYPE_WLAN);
+	iwm->rfkill = rfkill_alloc(KBUILD_MODNAME,
+				   iwm_to_dev(iwm),
+				   RFKILL_TYPE_WLAN,
+				   &iwm_rfkill_ops, iwm);
 	if (!iwm->rfkill) {
 		IWM_ERR(iwm, "Unable to allocate rfkill device\n");
 		return -ENOMEM;
 	}
 
-	iwm->rfkill->name = KBUILD_MODNAME;
-	iwm->rfkill->data = iwm;
-	iwm->rfkill->state = RFKILL_STATE_UNBLOCKED;
-	iwm->rfkill->toggle_radio = iwm_rfkill_soft_toggle;
-
 	ret = rfkill_register(iwm->rfkill);
 	if (ret) {
 		IWM_ERR(iwm, "Failed to register rfkill device\n");
@@ -74,15 +69,15 @@ int iwm_rfkill_init(struct iwm_priv *iwm)
 
 	return 0;
  fail:
-	rfkill_free(iwm->rfkill);
+	rfkill_destroy(iwm->rfkill);
 	return ret;
 }
 
 void iwm_rfkill_exit(struct iwm_priv *iwm)
 {
-	if (iwm->rfkill)
+	if (iwm->rfkill) {
 		rfkill_unregister(iwm->rfkill);
-
-	rfkill_free(iwm->rfkill);
+		rfkill_destroy(iwm->rfkill);
+	}
 	iwm->rfkill = NULL;
 }
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 284ebaca6e4..c682ac53641 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -21,7 +21,7 @@ config ACER_WMI
 	depends on NEW_LEDS
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on SERIO_I8042
-	depends on RFKILL
+	depends on RFKILL || RFKILL = n
 	select ACPI_WMI
 	---help---
 	  This is a driver for newer Acer (and Wistron) laptops. It adds
@@ -60,7 +60,7 @@ config DELL_LAPTOP
 	depends on DCDBAS
 	depends on EXPERIMENTAL
 	depends on BACKLIGHT_CLASS_DEVICE
-	depends on RFKILL
+	depends on RFKILL || RFKILL = n
 	depends on POWER_SUPPLY
 	default n
 	---help---
@@ -117,7 +117,7 @@ config HP_WMI
 	tristate "HP WMI extras"
 	depends on ACPI_WMI
 	depends on INPUT
-	depends on RFKILL
+	depends on RFKILL || RFKILL = n
 	help
 	 Say Y here if you want to support WMI-based hotkeys on HP laptops and
 	 to read data from WMI such as docking or ambient light sensor state.
@@ -196,14 +196,13 @@ config THINKPAD_ACPI
 	tristate "ThinkPad ACPI Laptop Extras"
 	depends on ACPI
 	depends on INPUT
+	depends on RFKILL || RFKILL = n
 	select BACKLIGHT_LCD_SUPPORT
 	select BACKLIGHT_CLASS_DEVICE
 	select HWMON
 	select NVRAM
 	select NEW_LEDS
 	select LEDS_CLASS
-	select NET
-	select RFKILL
 	---help---
 	  This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
 	  support for Fn-Fx key combinations, Bluetooth control, video
@@ -338,9 +337,9 @@ config EEEPC_LAPTOP
 	depends on ACPI
 	depends on INPUT
 	depends on EXPERIMENTAL
+	depends on RFKILL || RFKILL = n
 	select BACKLIGHT_CLASS_DEVICE
 	select HWMON
-	select RFKILL
 	---help---
 	  This driver supports the Fn-Fx keys on Eee PC laptops.
 	  It also adds the ability to switch camera/wlan on/off.
@@ -405,9 +404,8 @@ config ACPI_TOSHIBA
 	tristate "Toshiba Laptop Extras"
 	depends on ACPI
 	depends on INPUT
+	depends on RFKILL || RFKILL = n
 	select INPUT_POLLDEV
-	select NET
-	select RFKILL
 	select BACKLIGHT_CLASS_DEVICE
 	---help---
 	  This driver adds support for access to certain system settings
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 62d02b3c998..b618fa51db2 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -958,58 +958,50 @@ static void acer_rfkill_update(struct work_struct *ignored)
 
 	status = get_u32(&state, ACER_CAP_WIRELESS);
 	if (ACPI_SUCCESS(status))
-		rfkill_force_state(wireless_rfkill, state ?
-			RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED);
+		rfkill_set_sw_state(wireless_rfkill, !!state);
 
 	if (has_cap(ACER_CAP_BLUETOOTH)) {
 		status = get_u32(&state, ACER_CAP_BLUETOOTH);
 		if (ACPI_SUCCESS(status))
-			rfkill_force_state(bluetooth_rfkill, state ?
-				RFKILL_STATE_UNBLOCKED :
-				RFKILL_STATE_SOFT_BLOCKED);
+			rfkill_set_sw_state(bluetooth_rfkill, !!state);
 	}
 
 	schedule_delayed_work(&acer_rfkill_work, round_jiffies_relative(HZ));
 }
 
-static int acer_rfkill_set(void *data, enum rfkill_state state)
+static int acer_rfkill_set(void *data, bool blocked)
 {
 	acpi_status status;
-	u32 *cap = data;
-	status = set_u32((u32) (state == RFKILL_STATE_UNBLOCKED), *cap);
+	u32 cap = (unsigned long)data;
+	status = set_u32(!!blocked, cap);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 	return 0;
 }
 
-static struct rfkill * acer_rfkill_register(struct device *dev,
-enum rfkill_type type, char *name, u32 cap)
+static const struct rfkill_ops acer_rfkill_ops = {
+	.set_block = acer_rfkill_set,
+};
+
+static struct rfkill *acer_rfkill_register(struct device *dev,
+					   enum rfkill_type type,
+					   char *name, u32 cap)
 {
 	int err;
 	u32 state;
-	u32 *data;
 	struct rfkill *rfkill_dev;
 
-	rfkill_dev = rfkill_allocate(dev, type);
+	rfkill_dev = rfkill_alloc(name, dev, type,
+				  &acer_rfkill_ops,
+				  (void *)(unsigned long)cap);
 	if (!rfkill_dev)
 		return ERR_PTR(-ENOMEM);
-	rfkill_dev->name = name;
 	get_u32(&state, cap);
-	rfkill_dev->state = state ? RFKILL_STATE_UNBLOCKED :
-		RFKILL_STATE_SOFT_BLOCKED;
-	data = kzalloc(sizeof(u32), GFP_KERNEL);
-	if (!data) {
-		rfkill_free(rfkill_dev);
-		return ERR_PTR(-ENOMEM);
-	}
-	*data = cap;
-	rfkill_dev->data = data;
-	rfkill_dev->toggle_radio = acer_rfkill_set;
+	rfkill_set_sw_state(rfkill_dev, !state);
 
 	err = rfkill_register(rfkill_dev);
 	if (err) {
-		kfree(rfkill_dev->data);
-		rfkill_free(rfkill_dev);
+		rfkill_destroy(rfkill_dev);
 		return ERR_PTR(err);
 	}
 	return rfkill_dev;
@@ -1027,8 +1019,8 @@ static int acer_rfkill_init(struct device *dev)
 			RFKILL_TYPE_BLUETOOTH, "acer-bluetooth",
 			ACER_CAP_BLUETOOTH);
 		if (IS_ERR(bluetooth_rfkill)) {
-			kfree(wireless_rfkill->data);
 			rfkill_unregister(wireless_rfkill);
+			rfkill_destroy(wireless_rfkill);
 			return PTR_ERR(bluetooth_rfkill);
 		}
 	}
@@ -1041,11 +1033,13 @@ static int acer_rfkill_init(struct device *dev)
 static void acer_rfkill_exit(void)
 {
 	cancel_delayed_work_sync(&acer_rfkill_work);
-	kfree(wireless_rfkill->data);
+
 	rfkill_unregister(wireless_rfkill);
+	rfkill_destroy(wireless_rfkill);
+
 	if (has_cap(ACER_CAP_BLUETOOTH)) {
-		kfree(bluetooth_rfkill->data);
 		rfkill_unregister(bluetooth_rfkill);
+		rfkill_destroy(bluetooth_rfkill);
 	}
 	return;
 }
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index af9f4302117..2faf0e14f05 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -174,10 +174,11 @@ dell_send_request(struct calling_interface_buffer *buffer, int class,
    result[3]: NVRAM format version number
 */
 
-static int dell_rfkill_set(int radio, enum rfkill_state state)
+static int dell_rfkill_set(void *data, bool blocked)
 {
 	struct calling_interface_buffer buffer;
-	int disable = (state == RFKILL_STATE_UNBLOCKED) ? 0 : 1;
+	int disable = blocked ? 0 : 1;
+	unsigned long radio = (unsigned long)data;
 
 	memset(&buffer, 0, sizeof(struct calling_interface_buffer));
 	buffer.input[0] = (1 | (radio<<8) | (disable << 16));
@@ -186,56 +187,24 @@ static int dell_rfkill_set(int radio, enum rfkill_state state)
 	return 0;
 }
 
-static int dell_wifi_set(void *data, enum rfkill_state state)
-{
-	return dell_rfkill_set(1, state);
-}
-
-static int dell_bluetooth_set(void *data, enum rfkill_state state)
-{
-	return dell_rfkill_set(2, state);
-}
-
-static int dell_wwan_set(void *data, enum rfkill_state state)
-{
-	return dell_rfkill_set(3, state);
-}
-
-static int dell_rfkill_get(int bit, enum rfkill_state *state)
+static void dell_rfkill_query(struct rfkill *rfkill, void *data)
 {
 	struct calling_interface_buffer buffer;
 	int status;
-	int new_state = RFKILL_STATE_HARD_BLOCKED;
+	int bit = (unsigned long)data + 16;
 
 	memset(&buffer, 0, sizeof(struct calling_interface_buffer));
 	dell_send_request(&buffer, 17, 11);
 	status = buffer.output[1];
 
-	if (status & (1<<16))
-		new_state = RFKILL_STATE_SOFT_BLOCKED;
-
-	if (status & (1<<bit))
-		*state = new_state;
-	else
-		*state = RFKILL_STATE_UNBLOCKED;
-
-	return 0;
-}
-
-static int dell_wifi_get(void *data, enum rfkill_state *state)
-{
-	return dell_rfkill_get(17, state);
-}
-
-static int dell_bluetooth_get(void *data, enum rfkill_state *state)
-{
-	return dell_rfkill_get(18, state);
+	if (status & BIT(bit))
+		rfkill_set_hw_state(rfkill, !!(status & BIT(16)));
 }
 
-static int dell_wwan_get(void *data, enum rfkill_state *state)
-{
-	return dell_rfkill_get(19, state);
-}
+static const struct rfkill_ops dell_rfkill_ops = {
+	.set_block = dell_rfkill_set,
+	.query = dell_rfkill_query,
+};
 
 static int dell_setup_rfkill(void)
 {
@@ -248,36 +217,37 @@ static int dell_setup_rfkill(void)
 	status = buffer.output[1];
 
 	if ((status & (1<<2|1<<8)) == (1<<2|1<<8)) {
-		wifi_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WLAN);
-		if (!wifi_rfkill)
+		wifi_rfkill = rfkill_alloc("dell-wifi", NULL, RFKILL_TYPE_WLAN,
+					   &dell_rfkill_ops, (void *) 1);
+		if (!wifi_rfkill) {
+			ret = -ENOMEM;
 			goto err_wifi;
-		wifi_rfkill->name = "dell-wifi";
-		wifi_rfkill->toggle_radio = dell_wifi_set;
-		wifi_rfkill->get_state = dell_wifi_get;
+		}
 		ret = rfkill_register(wifi_rfkill);
 		if (ret)
 			goto err_wifi;
 	}
 
 	if ((status & (1<<3|1<<9)) == (1<<3|1<<9)) {
-		bluetooth_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_BLUETOOTH);
-		if (!bluetooth_rfkill)
+		bluetooth_rfkill = rfkill_alloc("dell-bluetooth", NULL,
+						RFKILL_TYPE_BLUETOOTH,
+						&dell_rfkill_ops, (void *) 2);
+		if (!bluetooth_rfkill) {
+			ret = -ENOMEM;
 			goto err_bluetooth;
-		bluetooth_rfkill->name = "dell-bluetooth";
-		bluetooth_rfkill->toggle_radio = dell_bluetooth_set;
-		bluetooth_rfkill->get_state = dell_bluetooth_get;
+		}
 		ret = rfkill_register(bluetooth_rfkill);
 		if (ret)
 			goto err_bluetooth;
 	}
 
 	if ((status & (1<<4|1<<10)) == (1<<4|1<<10)) {
-		wwan_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WWAN);
-		if (!wwan_rfkill)
+		wwan_rfkill = rfkill_alloc("dell-wwan", NULL, RFKILL_TYPE_WWAN,
+					   &dell_rfkill_ops, (void *) 3);
+		if (!wwan_rfkill) {
+			ret = -ENOMEM;
 			goto err_wwan;
-		wwan_rfkill->name = "dell-wwan";
-		wwan_rfkill->toggle_radio = dell_wwan_set;
-		wwan_rfkill->get_state = dell_wwan_get;
+		}
 		ret = rfkill_register(wwan_rfkill);
 		if (ret)
 			goto err_wwan;
@@ -285,22 +255,15 @@ static int dell_setup_rfkill(void)
 
 	return 0;
 err_wwan:
-	if (wwan_rfkill)
-		rfkill_free(wwan_rfkill);
-	if (bluetooth_rfkill) {
+	rfkill_destroy(wwan_rfkill);
+	if (bluetooth_rfkill)
 		rfkill_unregister(bluetooth_rfkill);
-		bluetooth_rfkill = NULL;
-	}
 err_bluetooth:
-	if (bluetooth_rfkill)
-		rfkill_free(bluetooth_rfkill);
-	if (wifi_rfkill) {
+	rfkill_destroy(bluetooth_rfkill);
+	if (wifi_rfkill)
 		rfkill_unregister(wifi_rfkill);
-		wifi_rfkill = NULL;
-	}
 err_wifi:
-	if (wifi_rfkill)
-		rfkill_free(wifi_rfkill);
+	rfkill_destroy(wifi_rfkill);
 
 	return ret;
 }
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 353a898c369..1208d0cedd1 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -299,39 +299,22 @@ static int update_bl_status(struct backlight_device *bd)
  * Rfkill helpers
  */
 
-static int eeepc_wlan_rfkill_set(void *data, enum rfkill_state state)
-{
-	if (state == RFKILL_STATE_SOFT_BLOCKED)
-		return set_acpi(CM_ASL_WLAN, 0);
-	else
-		return set_acpi(CM_ASL_WLAN, 1);
-}
-
-static int eeepc_wlan_rfkill_state(void *data, enum rfkill_state *state)
+static bool eeepc_wlan_rfkill_blocked(void)
 {
 	if (get_acpi(CM_ASL_WLAN) == 1)
-		*state = RFKILL_STATE_UNBLOCKED;
-	else
-		*state = RFKILL_STATE_SOFT_BLOCKED;
-	return 0;
+		return false;
+	return true;
 }
 
-static int eeepc_bluetooth_rfkill_set(void *data, enum rfkill_state state)
+static int eeepc_rfkill_set(void *data, bool blocked)
 {
-	if (state == RFKILL_STATE_SOFT_BLOCKED)
-		return set_acpi(CM_ASL_BLUETOOTH, 0);
-	else
-		return set_acpi(CM_ASL_BLUETOOTH, 1);
+	unsigned long asl = (unsigned long)data;
+	return set_acpi(asl, !blocked);
 }
 
-static int eeepc_bluetooth_rfkill_state(void *data, enum rfkill_state *state)
-{
-	if (get_acpi(CM_ASL_BLUETOOTH) == 1)
-		*state = RFKILL_STATE_UNBLOCKED;
-	else
-		*state = RFKILL_STATE_SOFT_BLOCKED;
-	return 0;
-}
+static const struct rfkill_ops eeepc_rfkill_ops = {
+	.set_block = eeepc_rfkill_set,
+};
 
 /*
  * Sys helpers
@@ -531,9 +514,9 @@ static int notify_brn(void)
 
 static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
 {
-	enum rfkill_state state;
 	struct pci_dev *dev;
 	struct pci_bus *bus = pci_find_bus(0, 1);
+	bool blocked;
 
 	if (event != ACPI_NOTIFY_BUS_CHECK)
 		return;
@@ -543,9 +526,8 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
 		return;
 	}
 
-	eeepc_wlan_rfkill_state(ehotk->eeepc_wlan_rfkill, &state);
-
-	if (state == RFKILL_STATE_UNBLOCKED) {
+	blocked = eeepc_wlan_rfkill_blocked();
+	if (!blocked) {
 		dev = pci_get_slot(bus, 0);
 		if (dev) {
 			/* Device already present */
@@ -566,7 +548,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data)
 		}
 	}
 
-	rfkill_force_state(ehotk->eeepc_wlan_rfkill, state);
+	rfkill_set_sw_state(ehotk->eeepc_wlan_rfkill, blocked);
 }
 
 static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data)
@@ -684,26 +666,17 @@ static int eeepc_hotk_add(struct acpi_device *device)
 	eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7");
 
 	if (get_acpi(CM_ASL_WLAN) != -1) {
-		ehotk->eeepc_wlan_rfkill = rfkill_allocate(&device->dev,
-							   RFKILL_TYPE_WLAN);
+		ehotk->eeepc_wlan_rfkill = rfkill_alloc("eeepc-wlan",
+							&device->dev,
+							RFKILL_TYPE_WLAN,
+							&eeepc_rfkill_ops,
+							(void *)CM_ASL_WLAN);
 
 		if (!ehotk->eeepc_wlan_rfkill)
 			goto wlan_fail;
 
-		ehotk->eeepc_wlan_rfkill->name = "eeepc-wlan";
-		ehotk->eeepc_wlan_rfkill->toggle_radio = eeepc_wlan_rfkill_set;
-		ehotk->eeepc_wlan_rfkill->get_state = eeepc_wlan_rfkill_state;
-		if (get_acpi(CM_ASL_WLAN) == 1) {
-			ehotk->eeepc_wlan_rfkill->state =
-				RFKILL_STATE_UNBLOCKED;
-			rfkill_set_default(RFKILL_TYPE_WLAN,
-					   RFKILL_STATE_UNBLOCKED);
-		} else {
-			ehotk->eeepc_wlan_rfkill->state =
-				RFKILL_STATE_SOFT_BLOCKED;
-			rfkill_set_default(RFKILL_TYPE_WLAN,
-					   RFKILL_STATE_SOFT_BLOCKED);
-		}
+		rfkill_set_global_sw_state(RFKILL_TYPE_WLAN,
+					   get_acpi(CM_ASL_WLAN) != 1);
 		result = rfkill_register(ehotk->eeepc_wlan_rfkill);
 		if (result)
 			goto wlan_fail;
@@ -711,28 +684,17 @@ static int eeepc_hotk_add(struct acpi_device *device)
 
 	if (get_acpi(CM_ASL_BLUETOOTH) != -1) {
 		ehotk->eeepc_bluetooth_rfkill =
-			rfkill_allocate(&device->dev, RFKILL_TYPE_BLUETOOTH);
+			rfkill_alloc("eeepc-bluetooth",
+				     &device->dev,
+				     RFKILL_TYPE_BLUETOOTH,
+				     &eeepc_rfkill_ops,
+				     (void *)CM_ASL_BLUETOOTH);
 
 		if (!ehotk->eeepc_bluetooth_rfkill)
 			goto bluetooth_fail;
 
-		ehotk->eeepc_bluetooth_rfkill->name = "eeepc-bluetooth";
-		ehotk->eeepc_bluetooth_rfkill->toggle_radio =
-			eeepc_bluetooth_rfkill_set;
-		ehotk->eeepc_bluetooth_rfkill->get_state =
-			eeepc_bluetooth_rfkill_state;
-		if (get_acpi(CM_ASL_BLUETOOTH) == 1) {
-			ehotk->eeepc_bluetooth_rfkill->state =
-				RFKILL_STATE_UNBLOCKED;
-			rfkill_set_default(RFKILL_TYPE_BLUETOOTH,
-					   RFKILL_STATE_UNBLOCKED);
-		} else {
-			ehotk->eeepc_bluetooth_rfkill->state =
-				RFKILL_STATE_SOFT_BLOCKED;
-			rfkill_set_default(RFKILL_TYPE_BLUETOOTH,
-					   RFKILL_STATE_SOFT_BLOCKED);
-		}
-
+		rfkill_set_global_sw_state(RFKILL_TYPE_BLUETOOTH,
+					   get_acpi(CM_ASL_BLUETOOTH) != 1);
 		result = rfkill_register(ehotk->eeepc_bluetooth_rfkill);
 		if (result)
 			goto bluetooth_fail;
@@ -741,13 +703,10 @@ static int eeepc_hotk_add(struct acpi_device *device)
 	return 0;
 
  bluetooth_fail:
-	if (ehotk->eeepc_bluetooth_rfkill)
-		rfkill_free(ehotk->eeepc_bluetooth_rfkill);
+	rfkill_destroy(ehotk->eeepc_bluetooth_rfkill);
 	rfkill_unregister(ehotk->eeepc_wlan_rfkill);
-	ehotk->eeepc_wlan_rfkill = NULL;
  wlan_fail:
-	if (ehotk->eeepc_wlan_rfkill)
-		rfkill_free(ehotk->eeepc_wlan_rfkill);
+	rfkill_destroy(ehotk->eeepc_wlan_rfkill);
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6");
 	eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7");
  ehotk_fail:
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index fe171fad12c..8d931145cbf 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -154,58 +154,46 @@ static int hp_wmi_dock_state(void)
 	return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
 }
 
-static int hp_wmi_wifi_set(void *data, enum rfkill_state state)
+static int hp_wmi_set_block(void *data, bool blocked)
 {
-	if (state)
-		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101);
-	else
-		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100);
-}
+	unsigned long b = (unsigned long) data;
+	int query = BIT(b + 8) | ((!!blocked) << b);
 
-static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state)
-{
-	if (state)
-		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202);
-	else
-		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200);
+	return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, query);
 }
 
-static int hp_wmi_wwan_set(void *data, enum rfkill_state state)
-{
-	if (state)
-		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404);
-	else
-		return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400);
-}
+static const struct rfkill_ops hp_wmi_rfkill_ops = {
+	.set_block = hp_wmi_set_block,
+};
 
-static int hp_wmi_wifi_state(void)
+static bool hp_wmi_wifi_state(void)
 {
 	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	if (wireless & 0x100)
-		return RFKILL_STATE_UNBLOCKED;
+		return false;
 	else
-		return RFKILL_STATE_SOFT_BLOCKED;
+		return true;
 }
 
-static int hp_wmi_bluetooth_state(void)
+static bool hp_wmi_bluetooth_state(void)
 {
 	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	if (wireless & 0x10000)
-		return RFKILL_STATE_UNBLOCKED;
+		return false;
 	else
-		return RFKILL_STATE_SOFT_BLOCKED;
+		return true;
 }
 
-static int hp_wmi_wwan_state(void)
+static bool hp_wmi_wwan_state(void)
 {
 	int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
 
 	if (wireless & 0x1000000)
-		return RFKILL_STATE_UNBLOCKED;
+		return false;
 	else
-		return RFKILL_STATE_SOFT_BLOCKED;
+		return true;
 }
 
 static ssize_t show_display(struct device *dev, struct device_attribute *attr,
@@ -347,14 +335,14 @@ static void hp_wmi_notify(u32 value, void *context)
 			}
 		} else if (eventcode == 0x5) {
 			if (wifi_rfkill)
-				rfkill_force_state(wifi_rfkill,
-						   hp_wmi_wifi_state());
+				rfkill_set_sw_state(wifi_rfkill,
+						    hp_wmi_wifi_state());
 			if (bluetooth_rfkill)
-				rfkill_force_state(bluetooth_rfkill,
-						   hp_wmi_bluetooth_state());
+				rfkill_set_sw_state(bluetooth_rfkill,
+						    hp_wmi_bluetooth_state());
 			if (wwan_rfkill)
-				rfkill_force_state(wwan_rfkill,
-						   hp_wmi_wwan_state());
+				rfkill_set_sw_state(wwan_rfkill,
+						    hp_wmi_wwan_state());
 		} else
 			printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
 			       eventcode);
@@ -430,31 +418,34 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 		goto add_sysfs_error;
 
 	if (wireless & 0x1) {
-		wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
-		wifi_rfkill->name = "hp-wifi";
-		wifi_rfkill->state = hp_wmi_wifi_state();
-		wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
+		wifi_rfkill = rfkill_alloc("hp-wifi", &device->dev,
+					   RFKILL_TYPE_WLAN,
+					   &hp_wmi_rfkill_ops,
+					   (void *) 0);
+		rfkill_set_sw_state(wifi_rfkill, hp_wmi_wifi_state());
 		err = rfkill_register(wifi_rfkill);
 		if (err)
-			goto add_sysfs_error;
+			goto register_wifi_error;
 	}
 
 	if (wireless & 0x2) {
-		bluetooth_rfkill = rfkill_allocate(&device->dev,
-						   RFKILL_TYPE_BLUETOOTH);
-		bluetooth_rfkill->name = "hp-bluetooth";
-		bluetooth_rfkill->state = hp_wmi_bluetooth_state();
-		bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
+		bluetooth_rfkill = rfkill_alloc("hp-bluetooth", &device->dev,
+						RFKILL_TYPE_BLUETOOTH,
+						&hp_wmi_rfkill_ops,
+						(void *) 1);
+		rfkill_set_sw_state(bluetooth_rfkill,
+				    hp_wmi_bluetooth_state());
 		err = rfkill_register(bluetooth_rfkill);
 		if (err)
 			goto register_bluetooth_error;
 	}
 
 	if (wireless & 0x4) {
-		wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WWAN);
-		wwan_rfkill->name = "hp-wwan";
-		wwan_rfkill->state = hp_wmi_wwan_state();
-		wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
+		wwan_rfkill = rfkill_alloc("hp-wwan", &device->dev,
+					   RFKILL_TYPE_WWAN,
+					   &hp_wmi_rfkill_ops,
+					   (void *) 2);
+		rfkill_set_sw_state(wwan_rfkill, hp_wmi_wwan_state());
 		err = rfkill_register(wwan_rfkill);
 		if (err)
 			goto register_wwan_err;
@@ -462,11 +453,15 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 
 	return 0;
 register_wwan_err:
+	rfkill_destroy(wwan_rfkill);
 	if (bluetooth_rfkill)
 		rfkill_unregister(bluetooth_rfkill);
 register_bluetooth_error:
+	rfkill_destroy(bluetooth_rfkill);
 	if (wifi_rfkill)
 		rfkill_unregister(wifi_rfkill);
+register_wifi_error:
+	rfkill_destroy(wifi_rfkill);
 add_sysfs_error:
 	cleanup_sysfs(device);
 	return err;
@@ -476,12 +471,18 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
 {
 	cleanup_sysfs(device);
 
-	if (wifi_rfkill)
+	if (wifi_rfkill) {
 		rfkill_unregister(wifi_rfkill);
-	if (bluetooth_rfkill)
+		rfkill_destroy(wifi_rfkill);
+	}
+	if (bluetooth_rfkill) {
 		rfkill_unregister(bluetooth_rfkill);
-	if (wwan_rfkill)
+		rfkill_destroy(wifi_rfkill);
+	}
+	if (wwan_rfkill) {
 		rfkill_unregister(wwan_rfkill);
+		rfkill_destroy(wwan_rfkill);
+	}
 
 	return 0;
 }
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index f1963b05175..aec0b27fd77 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -128,11 +128,11 @@ enum sony_nc_rfkill {
 	SONY_BLUETOOTH,
 	SONY_WWAN,
 	SONY_WIMAX,
-	SONY_RFKILL_MAX,
+	N_SONY_RFKILL,
 };
 
-static struct rfkill *sony_rfkill_devices[SONY_RFKILL_MAX];
-static int sony_rfkill_address[SONY_RFKILL_MAX] = {0x300, 0x500, 0x700, 0x900};
+static struct rfkill *sony_rfkill_devices[N_SONY_RFKILL];
+static int sony_rfkill_address[N_SONY_RFKILL] = {0x300, 0x500, 0x700, 0x900};
 static void sony_nc_rfkill_update(void);
 
 /*********** Input Devices ***********/
@@ -1051,147 +1051,98 @@ static void sony_nc_rfkill_cleanup(void)
 {
 	int i;
 
-	for (i = 0; i < SONY_RFKILL_MAX; i++) {
-		if (sony_rfkill_devices[i])
+	for (i = 0; i < N_SONY_RFKILL; i++) {
+		if (sony_rfkill_devices[i]) {
 			rfkill_unregister(sony_rfkill_devices[i]);
+			rfkill_destroy(sony_rfkill_devices[i]);
+		}
 	}
 }
 
-static int sony_nc_rfkill_get(void *data, enum rfkill_state *state)
-{
-	int result;
-	int argument = sony_rfkill_address[(long) data];
-
-	sony_call_snc_handle(0x124, 0x200, &result);
-	if (result & 0x1) {
-		sony_call_snc_handle(0x124, argument, &result);
-		if (result & 0xf)
-			*state = RFKILL_STATE_UNBLOCKED;
-		else
-			*state = RFKILL_STATE_SOFT_BLOCKED;
-	} else {
-		*state = RFKILL_STATE_HARD_BLOCKED;
-	}
-
-	return 0;
-}
-
-static int sony_nc_rfkill_set(void *data, enum rfkill_state state)
+static int sony_nc_rfkill_set(void *data, bool blocked)
 {
 	int result;
 	int argument = sony_rfkill_address[(long) data] + 0x100;
 
-	if (state == RFKILL_STATE_UNBLOCKED)
+	if (!blocked)
 		argument |= 0xff0000;
 
 	return sony_call_snc_handle(0x124, argument, &result);
 }
 
-static int sony_nc_setup_wifi_rfkill(struct acpi_device *device)
-{
-	int err = 0;
-	struct rfkill *sony_wifi_rfkill;
-
-	sony_wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
-	if (!sony_wifi_rfkill)
-		return -1;
-	sony_wifi_rfkill->name = "sony-wifi";
-	sony_wifi_rfkill->toggle_radio = sony_nc_rfkill_set;
-	sony_wifi_rfkill->get_state = sony_nc_rfkill_get;
-	sony_wifi_rfkill->data = (void *)SONY_WIFI;
-	err = rfkill_register(sony_wifi_rfkill);
-	if (err)
-		rfkill_free(sony_wifi_rfkill);
-	else {
-		sony_rfkill_devices[SONY_WIFI] = sony_wifi_rfkill;
-		sony_nc_rfkill_set(sony_wifi_rfkill->data,
-				RFKILL_STATE_UNBLOCKED);
-	}
-	return err;
-}
+static const struct rfkill_ops sony_rfkill_ops = {
+	.set_block = sony_nc_rfkill_set,
+};
 
-static int sony_nc_setup_bluetooth_rfkill(struct acpi_device *device)
+static int sony_nc_setup_rfkill(struct acpi_device *device,
+				enum sony_nc_rfkill nc_type)
 {
 	int err = 0;
-	struct rfkill *sony_bluetooth_rfkill;
-
-	sony_bluetooth_rfkill = rfkill_allocate(&device->dev,
-						RFKILL_TYPE_BLUETOOTH);
-	if (!sony_bluetooth_rfkill)
-		return -1;
-	sony_bluetooth_rfkill->name = "sony-bluetooth";
-	sony_bluetooth_rfkill->toggle_radio = sony_nc_rfkill_set;
-	sony_bluetooth_rfkill->get_state = sony_nc_rfkill_get;
-	sony_bluetooth_rfkill->data = (void *)SONY_BLUETOOTH;
-	err = rfkill_register(sony_bluetooth_rfkill);
-	if (err)
-		rfkill_free(sony_bluetooth_rfkill);
-	else {
-		sony_rfkill_devices[SONY_BLUETOOTH] = sony_bluetooth_rfkill;
-		sony_nc_rfkill_set(sony_bluetooth_rfkill->data,
-				RFKILL_STATE_UNBLOCKED);
+	struct rfkill *rfk;
+	enum rfkill_type type;
+	const char *name;
+
+	switch (nc_type) {
+	case SONY_WIFI:
+		type = RFKILL_TYPE_WLAN;
+		name = "sony-wifi";
+		break;
+	case SONY_BLUETOOTH:
+		type = RFKILL_TYPE_BLUETOOTH;
+		name = "sony-bluetooth";
+		break;
+	case SONY_WWAN:
+		type = RFKILL_TYPE_WWAN;
+		name = "sony-wwan";
+		break;
+	case SONY_WIMAX:
+		type = RFKILL_TYPE_WIMAX;
+		name = "sony-wimax";
+		break;
+	default:
+		return -EINVAL;
 	}
-	return err;
-}
 
-static int sony_nc_setup_wwan_rfkill(struct acpi_device *device)
-{
-	int err = 0;
-	struct rfkill *sony_wwan_rfkill;
+	rfk = rfkill_alloc(name, &device->dev, type,
+			   &sony_rfkill_ops, (void *)nc_type);
+	if (!rfk)
+		return -ENOMEM;
 
-	sony_wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WWAN);
-	if (!sony_wwan_rfkill)
-		return -1;
-	sony_wwan_rfkill->name = "sony-wwan";
-	sony_wwan_rfkill->toggle_radio = sony_nc_rfkill_set;
-	sony_wwan_rfkill->get_state = sony_nc_rfkill_get;
-	sony_wwan_rfkill->data = (void *)SONY_WWAN;
-	err = rfkill_register(sony_wwan_rfkill);
-	if (err)
-		rfkill_free(sony_wwan_rfkill);
-	else {
-		sony_rfkill_devices[SONY_WWAN] = sony_wwan_rfkill;
-		sony_nc_rfkill_set(sony_wwan_rfkill->data,
-				RFKILL_STATE_UNBLOCKED);
+	err = rfkill_register(rfk);
+	if (err) {
+		rfkill_destroy(rfk);
+		return err;
 	}
+	sony_rfkill_devices[nc_type] = rfk;
+	sony_nc_rfkill_set((void *)nc_type, false);
 	return err;
 }
 
-static int sony_nc_setup_wimax_rfkill(struct acpi_device *device)
+static void sony_nc_rfkill_update()
 {
-	int err = 0;
-	struct rfkill *sony_wimax_rfkill;
+	enum sony_nc_rfkill i;
+	int result;
+	bool hwblock;
 
-	sony_wimax_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
-	if (!sony_wimax_rfkill)
-		return -1;
-	sony_wimax_rfkill->name = "sony-wimax";
-	sony_wimax_rfkill->toggle_radio = sony_nc_rfkill_set;
-	sony_wimax_rfkill->get_state = sony_nc_rfkill_get;
-	sony_wimax_rfkill->data = (void *)SONY_WIMAX;
-	err = rfkill_register(sony_wimax_rfkill);
-	if (err)
-		rfkill_free(sony_wimax_rfkill);
-	else {
-		sony_rfkill_devices[SONY_WIMAX] = sony_wimax_rfkill;
-		sony_nc_rfkill_set(sony_wimax_rfkill->data,
-				RFKILL_STATE_UNBLOCKED);
-	}
-	return err;
-}
+	sony_call_snc_handle(0x124, 0x200, &result);
+	hwblock = !(result & 0x1);
 
-static void sony_nc_rfkill_update()
-{
-	int i;
-	enum rfkill_state state;
+	for (i = 0; i < N_SONY_RFKILL; i++) {
+		int argument = sony_rfkill_address[i];
 
-	for (i = 0; i < SONY_RFKILL_MAX; i++) {
-		if (sony_rfkill_devices[i]) {
-			sony_rfkill_devices[i]->
-				get_state(sony_rfkill_devices[i]->data,
-					  &state);
-			rfkill_force_state(sony_rfkill_devices[i], state);
+		if (!sony_rfkill_devices[i])
+			continue;
+
+		if (hwblock) {
+			if (rfkill_set_hw_state(sony_rfkill_devices[i], true))
+				sony_nc_rfkill_set(sony_rfkill_devices[i],
+						   true);
+			continue;
 		}
+
+		sony_call_snc_handle(0x124, argument, &result);
+		rfkill_set_states(sony_rfkill_devices[i],
+				  !(result & 0xf), false);
 	}
 }
 
@@ -1210,13 +1161,13 @@ static int sony_nc_rfkill_setup(struct acpi_device *device)
 	}
 
 	if (result & 0x1)
-		sony_nc_setup_wifi_rfkill(device);
+		sony_nc_setup_rfkill(device, SONY_WIFI);
 	if (result & 0x2)
-		sony_nc_setup_bluetooth_rfkill(device);
+		sony_nc_setup_rfkill(device, SONY_BLUETOOTH);
 	if (result & 0x1c)
-		sony_nc_setup_wwan_rfkill(device);
+		sony_nc_setup_rfkill(device, SONY_WWAN);
 	if (result & 0x20)
-		sony_nc_setup_wimax_rfkill(device);
+		sony_nc_setup_rfkill(device, SONY_WIMAX);
 
 	return 0;
 }
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 912be65b626..cfcafa4e947 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -166,13 +166,6 @@ enum {
 
 #define TPACPI_MAX_ACPI_ARGS 3
 
-/* rfkill switches */
-enum {
-	TPACPI_RFK_BLUETOOTH_SW_ID = 0,
-	TPACPI_RFK_WWAN_SW_ID,
-	TPACPI_RFK_UWB_SW_ID,
-};
-
 /* printk headers */
 #define TPACPI_LOG TPACPI_FILE ": "
 #define TPACPI_EMERG	KERN_EMERG	TPACPI_LOG
@@ -1005,67 +998,237 @@ static int __init tpacpi_check_std_acpi_brightness_support(void)
 	return 0;
 }
 
-static int __init tpacpi_new_rfkill(const unsigned int id,
-			struct rfkill **rfk,
+static void printk_deprecated_attribute(const char * const what,
+					const char * const details)
+{
+	tpacpi_log_usertask("deprecated sysfs attribute");
+	printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and "
+		"will be removed. %s\n",
+		what, details);
+}
+
+/*************************************************************************
+ * rfkill and radio control support helpers
+ */
+
+/*
+ * ThinkPad-ACPI firmware handling model:
+ *
+ * WLSW (master wireless switch) is event-driven, and is common to all
+ * firmware-controlled radios.  It cannot be controlled, just monitored,
+ * as expected.  It overrides all radio state in firmware
+ *
+ * The kernel, a masked-off hotkey, and WLSW can change the radio state
+ * (TODO: verify how WLSW interacts with the returned radio state).
+ *
+ * The only time there are shadow radio state changes, is when
+ * masked-off hotkeys are used.
+ */
+
+/*
+ * Internal driver API for radio state:
+ *
+ * int: < 0 = error, otherwise enum tpacpi_rfkill_state
+ * bool: true means radio blocked (off)
+ */
+enum tpacpi_rfkill_state {
+	TPACPI_RFK_RADIO_OFF = 0,
+	TPACPI_RFK_RADIO_ON
+};
+
+/* rfkill switches */
+enum tpacpi_rfk_id {
+	TPACPI_RFK_BLUETOOTH_SW_ID = 0,
+	TPACPI_RFK_WWAN_SW_ID,
+	TPACPI_RFK_UWB_SW_ID,
+	TPACPI_RFK_SW_MAX
+};
+
+static const char *tpacpi_rfkill_names[] = {
+	[TPACPI_RFK_BLUETOOTH_SW_ID] = "bluetooth",
+	[TPACPI_RFK_WWAN_SW_ID] = "wwan",
+	[TPACPI_RFK_UWB_SW_ID] = "uwb",
+	[TPACPI_RFK_SW_MAX] = NULL
+};
+
+/* ThinkPad-ACPI rfkill subdriver */
+struct tpacpi_rfk {
+	struct rfkill *rfkill;
+	enum tpacpi_rfk_id id;
+	const struct tpacpi_rfk_ops *ops;
+};
+
+struct tpacpi_rfk_ops {
+	/* firmware interface */
+	int (*get_status)(void);
+	int (*set_status)(const enum tpacpi_rfkill_state);
+};
+
+static struct tpacpi_rfk *tpacpi_rfkill_switches[TPACPI_RFK_SW_MAX];
+
+/* Query FW and update rfkill sw state for a given rfkill switch */
+static int tpacpi_rfk_update_swstate(const struct tpacpi_rfk *tp_rfk)
+{
+	int status;
+
+	if (!tp_rfk)
+		return -ENODEV;
+
+	status = (tp_rfk->ops->get_status)();
+	if (status < 0)
+		return status;
+
+	rfkill_set_sw_state(tp_rfk->rfkill,
+			    (status == TPACPI_RFK_RADIO_OFF));
+
+	return status;
+}
+
+/* Query FW and update rfkill sw state for all rfkill switches */
+static void tpacpi_rfk_update_swstate_all(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < TPACPI_RFK_SW_MAX; i++)
+		tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[i]);
+}
+
+/*
+ * Sync the HW-blocking state of all rfkill switches,
+ * do notice it causes the rfkill core to schedule uevents
+ */
+static void tpacpi_rfk_update_hwblock_state(bool blocked)
+{
+	unsigned int i;
+	struct tpacpi_rfk *tp_rfk;
+
+	for (i = 0; i < TPACPI_RFK_SW_MAX; i++) {
+		tp_rfk = tpacpi_rfkill_switches[i];
+		if (tp_rfk) {
+			if (rfkill_set_hw_state(tp_rfk->rfkill,
+						blocked)) {
+				/* ignore -- we track sw block */
+			}
+		}
+	}
+}
+
+/* Call to get the WLSW state from the firmware */
+static int hotkey_get_wlsw(void);
+
+/* Call to query WLSW state and update all rfkill switches */
+static bool tpacpi_rfk_check_hwblock_state(void)
+{
+	int res = hotkey_get_wlsw();
+	int hw_blocked;
+
+	/* When unknown or unsupported, we have to assume it is unblocked */
+	if (res < 0)
+		return false;
+
+	hw_blocked = (res == TPACPI_RFK_RADIO_OFF);
+	tpacpi_rfk_update_hwblock_state(hw_blocked);
+
+	return hw_blocked;
+}
+
+static int tpacpi_rfk_hook_set_block(void *data, bool blocked)
+{
+	struct tpacpi_rfk *tp_rfk = data;
+	int res;
+
+	dbg_printk(TPACPI_DBG_RFKILL,
+		   "request to change radio state to %s\n",
+		   blocked ? "blocked" : "unblocked");
+
+	/* try to set radio state */
+	res = (tp_rfk->ops->set_status)(blocked ?
+				TPACPI_RFK_RADIO_OFF : TPACPI_RFK_RADIO_ON);
+
+	/* and update the rfkill core with whatever the FW really did */
+	tpacpi_rfk_update_swstate(tp_rfk);
+
+	return (res < 0) ? res : 0;
+}
+
+static const struct rfkill_ops tpacpi_rfk_rfkill_ops = {
+	.set_block = tpacpi_rfk_hook_set_block,
+};
+
+static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id,
+			const struct tpacpi_rfk_ops *tp_rfkops,
 			const enum rfkill_type rfktype,
 			const char *name,
-			const bool set_default,
-			int (*toggle_radio)(void *, enum rfkill_state),
-			int (*get_state)(void *, enum rfkill_state *))
+			const bool set_default)
 {
+	struct tpacpi_rfk *atp_rfk;
 	int res;
-	enum rfkill_state initial_state = RFKILL_STATE_SOFT_BLOCKED;
+	bool initial_sw_state = false;
+	int initial_sw_status;
 
-	res = get_state(NULL, &initial_state);
-	if (res < 0) {
+	BUG_ON(id >= TPACPI_RFK_SW_MAX || tpacpi_rfkill_switches[id]);
+
+	initial_sw_status = (tp_rfkops->get_status)();
+	if (initial_sw_status < 0) {
 		printk(TPACPI_ERR
 			"failed to read initial state for %s, error %d; "
-			"will turn radio off\n", name, res);
-	} else if (set_default) {
-		/* try to set the initial state as the default for the rfkill
-		 * type, since we ask the firmware to preserve it across S5 in
-		 * NVRAM */
-		if (rfkill_set_default(rfktype,
-				(initial_state == RFKILL_STATE_UNBLOCKED) ?
-					RFKILL_STATE_UNBLOCKED :
-					RFKILL_STATE_SOFT_BLOCKED) == -EPERM)
-			vdbg_printk(TPACPI_DBG_RFKILL,
-				    "Default state for %s cannot be changed\n",
-				    name);
-	}
-
-	*rfk = rfkill_allocate(&tpacpi_pdev->dev, rfktype);
-	if (!*rfk) {
+			"will turn radio off\n", name, initial_sw_status);
+	} else {
+		initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF);
+		if (set_default) {
+			/* try to set the initial state as the default for the
+			 * rfkill type, since we ask the firmware to preserve
+			 * it across S5 in NVRAM */
+			rfkill_set_global_sw_state(rfktype, initial_sw_state);
+		}
+	}
+
+	atp_rfk = kzalloc(sizeof(struct tpacpi_rfk), GFP_KERNEL);
+	if (atp_rfk)
+		atp_rfk->rfkill = rfkill_alloc(name,
+						&tpacpi_pdev->dev,
+						rfktype,
+						&tpacpi_rfk_rfkill_ops,
+						atp_rfk);
+	if (!atp_rfk || !atp_rfk->rfkill) {
 		printk(TPACPI_ERR
 			"failed to allocate memory for rfkill class\n");
+		kfree(atp_rfk);
 		return -ENOMEM;
 	}
 
-	(*rfk)->name = name;
-	(*rfk)->get_state = get_state;
-	(*rfk)->toggle_radio = toggle_radio;
-	(*rfk)->state = initial_state;
+	atp_rfk->id = id;
+	atp_rfk->ops = tp_rfkops;
+
+	rfkill_set_states(atp_rfk->rfkill, initial_sw_state,
+				tpacpi_rfk_check_hwblock_state());
 
-	res = rfkill_register(*rfk);
+	res = rfkill_register(atp_rfk->rfkill);
 	if (res < 0) {
 		printk(TPACPI_ERR
 			"failed to register %s rfkill switch: %d\n",
 			name, res);
-		rfkill_free(*rfk);
-		*rfk = NULL;
+		rfkill_destroy(atp_rfk->rfkill);
+		kfree(atp_rfk);
 		return res;
 	}
 
+	tpacpi_rfkill_switches[id] = atp_rfk;
 	return 0;
 }
 
-static void printk_deprecated_attribute(const char * const what,
-					const char * const details)
+static void tpacpi_destroy_rfkill(const enum tpacpi_rfk_id id)
 {
-	tpacpi_log_usertask("deprecated sysfs attribute");
-	printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and "
-		"will be removed. %s\n",
-		what, details);
+	struct tpacpi_rfk *tp_rfk;
+
+	BUG_ON(id >= TPACPI_RFK_SW_MAX);
+
+	tp_rfk = tpacpi_rfkill_switches[id];
+	if (tp_rfk) {
+		rfkill_unregister(tp_rfk->rfkill);
+		tpacpi_rfkill_switches[id] = NULL;
+		kfree(tp_rfk);
+	}
 }
 
 static void printk_deprecated_rfkill_attribute(const char * const what)
@@ -1074,6 +1237,112 @@ static void printk_deprecated_rfkill_attribute(const char * const what)
 			"Please switch to generic rfkill before year 2010");
 }
 
+/* sysfs <radio> enable ------------------------------------------------ */
+static ssize_t tpacpi_rfk_sysfs_enable_show(const enum tpacpi_rfk_id id,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	int status;
+
+	printk_deprecated_rfkill_attribute(attr->attr.name);
+
+	/* This is in the ABI... */
+	if (tpacpi_rfk_check_hwblock_state()) {
+		status = TPACPI_RFK_RADIO_OFF;
+	} else {
+		status = tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[id]);
+		if (status < 0)
+			return status;
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			(status == TPACPI_RFK_RADIO_ON) ? 1 : 0);
+}
+
+static ssize_t tpacpi_rfk_sysfs_enable_store(const enum tpacpi_rfk_id id,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned long t;
+	int res;
+
+	printk_deprecated_rfkill_attribute(attr->attr.name);
+
+	if (parse_strtoul(buf, 1, &t))
+		return -EINVAL;
+
+	tpacpi_disclose_usertask(attr->attr.name, "set to %ld\n", t);
+
+	/* This is in the ABI... */
+	if (tpacpi_rfk_check_hwblock_state() && !!t)
+		return -EPERM;
+
+	res = tpacpi_rfkill_switches[id]->ops->set_status((!!t) ?
+				TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF);
+	tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[id]);
+
+	return (res < 0) ? res : count;
+}
+
+/* procfs -------------------------------------------------------------- */
+static int tpacpi_rfk_procfs_read(const enum tpacpi_rfk_id id, char *p)
+{
+	int len = 0;
+
+	if (id >= TPACPI_RFK_SW_MAX)
+		len += sprintf(p + len, "status:\t\tnot supported\n");
+	else {
+		int status;
+
+		/* This is in the ABI... */
+		if (tpacpi_rfk_check_hwblock_state()) {
+			status = TPACPI_RFK_RADIO_OFF;
+		} else {
+			status = tpacpi_rfk_update_swstate(
+						tpacpi_rfkill_switches[id]);
+			if (status < 0)
+				return status;
+		}
+
+		len += sprintf(p + len, "status:\t\t%s\n",
+				(status == TPACPI_RFK_RADIO_ON) ?
+					"enabled" : "disabled");
+		len += sprintf(p + len, "commands:\tenable, disable\n");
+	}
+
+	return len;
+}
+
+static int tpacpi_rfk_procfs_write(const enum tpacpi_rfk_id id, char *buf)
+{
+	char *cmd;
+	int status = -1;
+	int res = 0;
+
+	if (id >= TPACPI_RFK_SW_MAX)
+		return -ENODEV;
+
+	while ((cmd = next_cmd(&buf))) {
+		if (strlencmp(cmd, "enable") == 0)
+			status = TPACPI_RFK_RADIO_ON;
+		else if (strlencmp(cmd, "disable") == 0)
+			status = TPACPI_RFK_RADIO_OFF;
+		else
+			return -EINVAL;
+	}
+
+	if (status != -1) {
+		tpacpi_disclose_usertask("procfs", "attempt to %s %s\n",
+				(status == TPACPI_RFK_RADIO_ON) ?
+						"enable" : "disable",
+				tpacpi_rfkill_names[id]);
+		res = (tpacpi_rfkill_switches[id]->ops->set_status)(status);
+		tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[id]);
+	}
+
+	return res;
+}
+
 /*************************************************************************
  * thinkpad-acpi driver attributes
  */
@@ -1127,8 +1396,6 @@ static DRIVER_ATTR(version, S_IRUGO,
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 
-static void tpacpi_send_radiosw_update(void);
-
 /* wlsw_emulstate ------------------------------------------------------ */
 static ssize_t tpacpi_driver_wlsw_emulstate_show(struct device_driver *drv,
 						char *buf)
@@ -1144,11 +1411,10 @@ static ssize_t tpacpi_driver_wlsw_emulstate_store(struct device_driver *drv,
 	if (parse_strtoul(buf, 1, &t))
 		return -EINVAL;
 
-	if (tpacpi_wlsw_emulstate != t) {
-		tpacpi_wlsw_emulstate = !!t;
-		tpacpi_send_radiosw_update();
-	} else
+	if (tpacpi_wlsw_emulstate != !!t) {
 		tpacpi_wlsw_emulstate = !!t;
+		tpacpi_rfk_update_hwblock_state(!t);	/* negative logic */
+	}
 
 	return count;
 }
@@ -1463,17 +1729,23 @@ static struct attribute_set *hotkey_dev_attributes;
 /* HKEY.MHKG() return bits */
 #define TP_HOTKEY_TABLET_MASK (1 << 3)
 
-static int hotkey_get_wlsw(int *status)
+static int hotkey_get_wlsw(void)
 {
+	int status;
+
+	if (!tp_features.hotkey_wlsw)
+		return -ENODEV;
+
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
-	if (dbg_wlswemul) {
-		*status = !!tpacpi_wlsw_emulstate;
-		return 0;
-	}
+	if (dbg_wlswemul)
+		return (tpacpi_wlsw_emulstate) ?
+				TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 #endif
-	if (!acpi_evalf(hkey_handle, status, "WLSW", "d"))
+
+	if (!acpi_evalf(hkey_handle, &status, "WLSW", "d"))
 		return -EIO;
-	return 0;
+
+	return (status) ? TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 }
 
 static int hotkey_get_tablet_mode(int *status)
@@ -2107,12 +2379,16 @@ static ssize_t hotkey_radio_sw_show(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
 {
-	int res, s;
-	res = hotkey_get_wlsw(&s);
+	int res;
+	res = hotkey_get_wlsw();
 	if (res < 0)
 		return res;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", !!s);
+	/* Opportunistic update */
+	tpacpi_rfk_update_hwblock_state((res == TPACPI_RFK_RADIO_OFF));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			(res == TPACPI_RFK_RADIO_OFF) ? 0 : 1);
 }
 
 static struct device_attribute dev_attr_hotkey_radio_sw =
@@ -2223,30 +2499,52 @@ static struct attribute *hotkey_mask_attributes[] __initdata = {
 	&dev_attr_hotkey_wakeup_hotunplug_complete.attr,
 };
 
-static void bluetooth_update_rfk(void);
-static void wan_update_rfk(void);
-static void uwb_update_rfk(void);
+/*
+ * Sync both the hw and sw blocking state of all switches
+ */
 static void tpacpi_send_radiosw_update(void)
 {
 	int wlsw;
 
-	/* Sync these BEFORE sending any rfkill events */
-	if (tp_features.bluetooth)
-		bluetooth_update_rfk();
-	if (tp_features.wan)
-		wan_update_rfk();
-	if (tp_features.uwb)
-		uwb_update_rfk();
+	/*
+	 * We must sync all rfkill controllers *before* issuing any
+	 * rfkill input events, or we will race the rfkill core input
+	 * handler.
+	 *
+	 * tpacpi_inputdev_send_mutex works as a syncronization point
+	 * for the above.
+	 *
+	 * We optimize to avoid numerous calls to hotkey_get_wlsw.
+	 */
 
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
+	wlsw = hotkey_get_wlsw();
+
+	/* Sync hw blocking state first if it is hw-blocked */
+	if (wlsw == TPACPI_RFK_RADIO_OFF)
+		tpacpi_rfk_update_hwblock_state(true);
+
+	/* Sync sw blocking state */
+	tpacpi_rfk_update_swstate_all();
+
+	/* Sync hw blocking state last if it is hw-unblocked */
+	if (wlsw == TPACPI_RFK_RADIO_ON)
+		tpacpi_rfk_update_hwblock_state(false);
+
+	/* Issue rfkill input event for WLSW switch */
+	if (!(wlsw < 0)) {
 		mutex_lock(&tpacpi_inputdev_send_mutex);
 
 		input_report_switch(tpacpi_inputdev,
-				    SW_RFKILL_ALL, !!wlsw);
+				    SW_RFKILL_ALL, (wlsw > 0));
 		input_sync(tpacpi_inputdev);
 
 		mutex_unlock(&tpacpi_inputdev_send_mutex);
 	}
+
+	/*
+	 * this can be unconditional, as we will poll state again
+	 * if userspace uses the notify to read data
+	 */
 	hotkey_radio_sw_notify_change();
 }
 
@@ -3056,8 +3354,6 @@ enum {
 
 #define TPACPI_RFK_BLUETOOTH_SW_NAME	"tpacpi_bluetooth_sw"
 
-static struct rfkill *tpacpi_bluetooth_rfkill;
-
 static void bluetooth_suspend(pm_message_t state)
 {
 	/* Try to make sure radio will resume powered off */
@@ -3067,83 +3363,47 @@ static void bluetooth_suspend(pm_message_t state)
 			"bluetooth power down on resume request failed\n");
 }
 
-static int bluetooth_get_radiosw(void)
+static int bluetooth_get_status(void)
 {
 	int status;
 
-	if (!tp_features.bluetooth)
-		return -ENODEV;
-
-	/* WLSW overrides bluetooth in firmware/hardware, reflect that */
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
-		return RFKILL_STATE_HARD_BLOCKED;
-
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_bluetoothemul)
 		return (tpacpi_bluetooth_emulstate) ?
-			RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+		       TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 #endif
 
 	if (!acpi_evalf(hkey_handle, &status, "GBDC", "d"))
 		return -EIO;
 
 	return ((status & TP_ACPI_BLUETOOTH_RADIOSSW) != 0) ?
-		RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+			TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 }
 
-static void bluetooth_update_rfk(void)
+static int bluetooth_set_status(enum tpacpi_rfkill_state state)
 {
 	int status;
 
-	if (!tpacpi_bluetooth_rfkill)
-		return;
-
-	status = bluetooth_get_radiosw();
-	if (status < 0)
-		return;
-	rfkill_force_state(tpacpi_bluetooth_rfkill, status);
-
 	vdbg_printk(TPACPI_DBG_RFKILL,
-		"forced rfkill state to %d\n",
-		status);
-}
-
-static int bluetooth_set_radiosw(int radio_on, int update_rfk)
-{
-	int status;
-
-	if (!tp_features.bluetooth)
-		return -ENODEV;
-
-	/* WLSW overrides bluetooth in firmware/hardware, but there is no
-	 * reason to risk weird behaviour. */
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
-	    && radio_on)
-		return -EPERM;
-
-	vdbg_printk(TPACPI_DBG_RFKILL,
-		"will %s bluetooth\n", radio_on ? "enable" : "disable");
+		"will attempt to %s bluetooth\n",
+		(state == TPACPI_RFK_RADIO_ON) ? "enable" : "disable");
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_bluetoothemul) {
-		tpacpi_bluetooth_emulstate = !!radio_on;
-		if (update_rfk)
-			bluetooth_update_rfk();
+		tpacpi_bluetooth_emulstate = (state == TPACPI_RFK_RADIO_ON);
 		return 0;
 	}
 #endif
 
 	/* We make sure to keep TP_ACPI_BLUETOOTH_RESUMECTRL off */
-	if (radio_on)
+	if (state == TPACPI_RFK_RADIO_ON)
 		status = TP_ACPI_BLUETOOTH_RADIOSSW;
 	else
 		status = 0;
+
 	if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status))
 		return -EIO;
 
-	if (update_rfk)
-		bluetooth_update_rfk();
-
 	return 0;
 }
 
@@ -3152,35 +3412,16 @@ static ssize_t bluetooth_enable_show(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
 {
-	int status;
-
-	printk_deprecated_rfkill_attribute("bluetooth_enable");
-
-	status = bluetooth_get_radiosw();
-	if (status < 0)
-		return status;
-
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			(status == RFKILL_STATE_UNBLOCKED) ? 1 : 0);
+	return tpacpi_rfk_sysfs_enable_show(TPACPI_RFK_BLUETOOTH_SW_ID,
+			attr, buf);
 }
 
 static ssize_t bluetooth_enable_store(struct device *dev,
 			    struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	unsigned long t;
-	int res;
-
-	printk_deprecated_rfkill_attribute("bluetooth_enable");
-
-	if (parse_strtoul(buf, 1, &t))
-		return -EINVAL;
-
-	tpacpi_disclose_usertask("bluetooth_enable", "set to %ld\n", t);
-
-	res = bluetooth_set_radiosw(t, 1);
-
-	return (res) ? res : count;
+	return tpacpi_rfk_sysfs_enable_store(TPACPI_RFK_BLUETOOTH_SW_ID,
+				attr, buf, count);
 }
 
 static struct device_attribute dev_attr_bluetooth_enable =
@@ -3198,23 +3439,10 @@ static const struct attribute_group bluetooth_attr_group = {
 	.attrs = bluetooth_attributes,
 };
 
-static int tpacpi_bluetooth_rfk_get(void *data, enum rfkill_state *state)
-{
-	int bts = bluetooth_get_radiosw();
-
-	if (bts < 0)
-		return bts;
-
-	*state = bts;
-	return 0;
-}
-
-static int tpacpi_bluetooth_rfk_set(void *data, enum rfkill_state state)
-{
-	dbg_printk(TPACPI_DBG_RFKILL,
-		   "request to change radio state to %d\n", state);
-	return bluetooth_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
-}
+static const struct tpacpi_rfk_ops bluetooth_tprfk_ops = {
+	.get_status = bluetooth_get_status,
+	.set_status = bluetooth_set_status,
+};
 
 static void bluetooth_shutdown(void)
 {
@@ -3230,13 +3458,12 @@ static void bluetooth_shutdown(void)
 
 static void bluetooth_exit(void)
 {
-	bluetooth_shutdown();
-
-	if (tpacpi_bluetooth_rfkill)
-		rfkill_unregister(tpacpi_bluetooth_rfkill);
-
 	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
 			&bluetooth_attr_group);
+
+	tpacpi_destroy_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID);
+
+	bluetooth_shutdown();
 }
 
 static int __init bluetooth_init(struct ibm_init_struct *iibm)
@@ -3277,20 +3504,18 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm)
 	if (!tp_features.bluetooth)
 		return 1;
 
-	res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-				&bluetooth_attr_group);
-	if (res)
-		return res;
-
 	res = tpacpi_new_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID,
-				&tpacpi_bluetooth_rfkill,
+				&bluetooth_tprfk_ops,
 				RFKILL_TYPE_BLUETOOTH,
 				TPACPI_RFK_BLUETOOTH_SW_NAME,
-				true,
-				tpacpi_bluetooth_rfk_set,
-				tpacpi_bluetooth_rfk_get);
+				true);
+	if (res)
+		return res;
+
+	res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+				&bluetooth_attr_group);
 	if (res) {
-		bluetooth_exit();
+		tpacpi_destroy_rfkill(TPACPI_RFK_BLUETOOTH_SW_ID);
 		return res;
 	}
 
@@ -3300,46 +3525,12 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm)
 /* procfs -------------------------------------------------------------- */
 static int bluetooth_read(char *p)
 {
-	int len = 0;
-	int status = bluetooth_get_radiosw();
-
-	if (!tp_features.bluetooth)
-		len += sprintf(p + len, "status:\t\tnot supported\n");
-	else {
-		len += sprintf(p + len, "status:\t\t%s\n",
-				(status == RFKILL_STATE_UNBLOCKED) ?
-					"enabled" : "disabled");
-		len += sprintf(p + len, "commands:\tenable, disable\n");
-	}
-
-	return len;
+	return tpacpi_rfk_procfs_read(TPACPI_RFK_BLUETOOTH_SW_ID, p);
 }
 
 static int bluetooth_write(char *buf)
 {
-	char *cmd;
-	int state = -1;
-
-	if (!tp_features.bluetooth)
-		return -ENODEV;
-
-	while ((cmd = next_cmd(&buf))) {
-		if (strlencmp(cmd, "enable") == 0) {
-			state = 1;
-		} else if (strlencmp(cmd, "disable") == 0) {
-			state = 0;
-		} else
-			return -EINVAL;
-	}
-
-	if (state != -1) {
-		tpacpi_disclose_usertask("procfs bluetooth",
-			"attempt to %s\n",
-			state ? "enable" : "disable");
-		bluetooth_set_radiosw(state, 1);
-	}
-
-	return 0;
+	return tpacpi_rfk_procfs_write(TPACPI_RFK_BLUETOOTH_SW_ID, buf);
 }
 
 static struct ibm_struct bluetooth_driver_data = {
@@ -3365,8 +3556,6 @@ enum {
 
 #define TPACPI_RFK_WWAN_SW_NAME		"tpacpi_wwan_sw"
 
-static struct rfkill *tpacpi_wan_rfkill;
-
 static void wan_suspend(pm_message_t state)
 {
 	/* Try to make sure radio will resume powered off */
@@ -3376,83 +3565,47 @@ static void wan_suspend(pm_message_t state)
 			"WWAN power down on resume request failed\n");
 }
 
-static int wan_get_radiosw(void)
+static int wan_get_status(void)
 {
 	int status;
 
-	if (!tp_features.wan)
-		return -ENODEV;
-
-	/* WLSW overrides WWAN in firmware/hardware, reflect that */
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
-		return RFKILL_STATE_HARD_BLOCKED;
-
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_wwanemul)
 		return (tpacpi_wwan_emulstate) ?
-			RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+		       TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 #endif
 
 	if (!acpi_evalf(hkey_handle, &status, "GWAN", "d"))
 		return -EIO;
 
 	return ((status & TP_ACPI_WANCARD_RADIOSSW) != 0) ?
-		RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
-}
-
-static void wan_update_rfk(void)
-{
-	int status;
-
-	if (!tpacpi_wan_rfkill)
-		return;
-
-	status = wan_get_radiosw();
-	if (status < 0)
-		return;
-	rfkill_force_state(tpacpi_wan_rfkill, status);
-
-	vdbg_printk(TPACPI_DBG_RFKILL,
-		"forced rfkill state to %d\n",
-		status);
+			TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 }
 
-static int wan_set_radiosw(int radio_on, int update_rfk)
+static int wan_set_status(enum tpacpi_rfkill_state state)
 {
 	int status;
 
-	if (!tp_features.wan)
-		return -ENODEV;
-
-	/* WLSW overrides bluetooth in firmware/hardware, but there is no
-	 * reason to risk weird behaviour. */
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
-	    && radio_on)
-		return -EPERM;
-
 	vdbg_printk(TPACPI_DBG_RFKILL,
-		"will %s WWAN\n", radio_on ? "enable" : "disable");
+		"will attempt to %s wwan\n",
+		(state == TPACPI_RFK_RADIO_ON) ? "enable" : "disable");
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_wwanemul) {
-		tpacpi_wwan_emulstate = !!radio_on;
-		if (update_rfk)
-			wan_update_rfk();
+		tpacpi_wwan_emulstate = (state == TPACPI_RFK_RADIO_ON);
 		return 0;
 	}
 #endif
 
 	/* We make sure to keep TP_ACPI_WANCARD_RESUMECTRL off */
-	if (radio_on)
+	if (state == TPACPI_RFK_RADIO_ON)
 		status = TP_ACPI_WANCARD_RADIOSSW;
 	else
 		status = 0;
+
 	if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status))
 		return -EIO;
 
-	if (update_rfk)
-		wan_update_rfk();
-
 	return 0;
 }
 
@@ -3461,35 +3614,16 @@ static ssize_t wan_enable_show(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
 {
-	int status;
-
-	printk_deprecated_rfkill_attribute("wwan_enable");
-
-	status = wan_get_radiosw();
-	if (status < 0)
-		return status;
-
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			(status == RFKILL_STATE_UNBLOCKED) ? 1 : 0);
+	return tpacpi_rfk_sysfs_enable_show(TPACPI_RFK_WWAN_SW_ID,
+			attr, buf);
 }
 
 static ssize_t wan_enable_store(struct device *dev,
 			    struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	unsigned long t;
-	int res;
-
-	printk_deprecated_rfkill_attribute("wwan_enable");
-
-	if (parse_strtoul(buf, 1, &t))
-		return -EINVAL;
-
-	tpacpi_disclose_usertask("wwan_enable", "set to %ld\n", t);
-
-	res = wan_set_radiosw(t, 1);
-
-	return (res) ? res : count;
+	return tpacpi_rfk_sysfs_enable_store(TPACPI_RFK_WWAN_SW_ID,
+			attr, buf, count);
 }
 
 static struct device_attribute dev_attr_wan_enable =
@@ -3507,23 +3641,10 @@ static const struct attribute_group wan_attr_group = {
 	.attrs = wan_attributes,
 };
 
-static int tpacpi_wan_rfk_get(void *data, enum rfkill_state *state)
-{
-	int wans = wan_get_radiosw();
-
-	if (wans < 0)
-		return wans;
-
-	*state = wans;
-	return 0;
-}
-
-static int tpacpi_wan_rfk_set(void *data, enum rfkill_state state)
-{
-	dbg_printk(TPACPI_DBG_RFKILL,
-		   "request to change radio state to %d\n", state);
-	return wan_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
-}
+static const struct tpacpi_rfk_ops wan_tprfk_ops = {
+	.get_status = wan_get_status,
+	.set_status = wan_set_status,
+};
 
 static void wan_shutdown(void)
 {
@@ -3539,13 +3660,12 @@ static void wan_shutdown(void)
 
 static void wan_exit(void)
 {
-	wan_shutdown();
-
-	if (tpacpi_wan_rfkill)
-		rfkill_unregister(tpacpi_wan_rfkill);
-
 	sysfs_remove_group(&tpacpi_pdev->dev.kobj,
 		&wan_attr_group);
+
+	tpacpi_destroy_rfkill(TPACPI_RFK_WWAN_SW_ID);
+
+	wan_shutdown();
 }
 
 static int __init wan_init(struct ibm_init_struct *iibm)
@@ -3584,20 +3704,19 @@ static int __init wan_init(struct ibm_init_struct *iibm)
 	if (!tp_features.wan)
 		return 1;
 
-	res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-				&wan_attr_group);
-	if (res)
-		return res;
-
 	res = tpacpi_new_rfkill(TPACPI_RFK_WWAN_SW_ID,
-				&tpacpi_wan_rfkill,
+				&wan_tprfk_ops,
 				RFKILL_TYPE_WWAN,
 				TPACPI_RFK_WWAN_SW_NAME,
-				true,
-				tpacpi_wan_rfk_set,
-				tpacpi_wan_rfk_get);
+				true);
+	if (res)
+		return res;
+
+	res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+				&wan_attr_group);
+
 	if (res) {
-		wan_exit();
+		tpacpi_destroy_rfkill(TPACPI_RFK_WWAN_SW_ID);
 		return res;
 	}
 
@@ -3607,48 +3726,12 @@ static int __init wan_init(struct ibm_init_struct *iibm)
 /* procfs -------------------------------------------------------------- */
 static int wan_read(char *p)
 {
-	int len = 0;
-	int status = wan_get_radiosw();
-
-	tpacpi_disclose_usertask("procfs wan", "read");
-
-	if (!tp_features.wan)
-		len += sprintf(p + len, "status:\t\tnot supported\n");
-	else {
-		len += sprintf(p + len, "status:\t\t%s\n",
-				(status == RFKILL_STATE_UNBLOCKED) ?
-					"enabled" : "disabled");
-		len += sprintf(p + len, "commands:\tenable, disable\n");
-	}
-
-	return len;
+	return tpacpi_rfk_procfs_read(TPACPI_RFK_WWAN_SW_ID, p);
 }
 
 static int wan_write(char *buf)
 {
-	char *cmd;
-	int state = -1;
-
-	if (!tp_features.wan)
-		return -ENODEV;
-
-	while ((cmd = next_cmd(&buf))) {
-		if (strlencmp(cmd, "enable") == 0) {
-			state = 1;
-		} else if (strlencmp(cmd, "disable") == 0) {
-			state = 0;
-		} else
-			return -EINVAL;
-	}
-
-	if (state != -1) {
-		tpacpi_disclose_usertask("procfs wan",
-			"attempt to %s\n",
-			state ? "enable" : "disable");
-		wan_set_radiosw(state, 1);
-	}
-
-	return 0;
+	return tpacpi_rfk_procfs_write(TPACPI_RFK_WWAN_SW_ID, buf);
 }
 
 static struct ibm_struct wan_driver_data = {
@@ -3672,108 +3755,59 @@ enum {
 
 #define TPACPI_RFK_UWB_SW_NAME	"tpacpi_uwb_sw"
 
-static struct rfkill *tpacpi_uwb_rfkill;
-
-static int uwb_get_radiosw(void)
+static int uwb_get_status(void)
 {
 	int status;
 
-	if (!tp_features.uwb)
-		return -ENODEV;
-
-	/* WLSW overrides UWB in firmware/hardware, reflect that */
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status)
-		return RFKILL_STATE_HARD_BLOCKED;
-
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_uwbemul)
 		return (tpacpi_uwb_emulstate) ?
-			RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+		       TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 #endif
 
 	if (!acpi_evalf(hkey_handle, &status, "GUWB", "d"))
 		return -EIO;
 
 	return ((status & TP_ACPI_UWB_RADIOSSW) != 0) ?
-		RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
+			TPACPI_RFK_RADIO_ON : TPACPI_RFK_RADIO_OFF;
 }
 
-static void uwb_update_rfk(void)
+static int uwb_set_status(enum tpacpi_rfkill_state state)
 {
 	int status;
 
-	if (!tpacpi_uwb_rfkill)
-		return;
-
-	status = uwb_get_radiosw();
-	if (status < 0)
-		return;
-	rfkill_force_state(tpacpi_uwb_rfkill, status);
-
 	vdbg_printk(TPACPI_DBG_RFKILL,
-		"forced rfkill state to %d\n",
-		status);
-}
-
-static int uwb_set_radiosw(int radio_on, int update_rfk)
-{
-	int status;
-
-	if (!tp_features.uwb)
-		return -ENODEV;
-
-	/* WLSW overrides UWB in firmware/hardware, but there is no
-	 * reason to risk weird behaviour. */
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status
-	    && radio_on)
-		return -EPERM;
-
-	vdbg_printk(TPACPI_DBG_RFKILL,
-			"will %s UWB\n", radio_on ? "enable" : "disable");
+		"will attempt to %s UWB\n",
+		(state == TPACPI_RFK_RADIO_ON) ? "enable" : "disable");
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 	if (dbg_uwbemul) {
-		tpacpi_uwb_emulstate = !!radio_on;
-		if (update_rfk)
-			uwb_update_rfk();
+		tpacpi_uwb_emulstate = (state == TPACPI_RFK_RADIO_ON);
 		return 0;
 	}
 #endif
 
-	status = (radio_on) ? TP_ACPI_UWB_RADIOSSW : 0;
+	if (state == TPACPI_RFK_RADIO_ON)
+		status = TP_ACPI_UWB_RADIOSSW;
+	else
+		status = 0;
+
 	if (!acpi_evalf(hkey_handle, NULL, "SUWB", "vd", status))
 		return -EIO;
 
-	if (update_rfk)
-		uwb_update_rfk();
-
 	return 0;
 }
 
 /* --------------------------------------------------------------------- */
 
-static int tpacpi_uwb_rfk_get(void *data, enum rfkill_state *state)
-{
-	int uwbs = uwb_get_radiosw();
-
-	if (uwbs < 0)
-		return uwbs;
-
-	*state = uwbs;
-	return 0;
-}
-
-static int tpacpi_uwb_rfk_set(void *data, enum rfkill_state state)
-{
-	dbg_printk(TPACPI_DBG_RFKILL,
-		   "request to change radio state to %d\n", state);
-	return uwb_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0);
-}
+static const struct tpacpi_rfk_ops uwb_tprfk_ops = {
+	.get_status = uwb_get_status,
+	.set_status = uwb_set_status,
+};
 
 static void uwb_exit(void)
 {
-	if (tpacpi_uwb_rfkill)
-		rfkill_unregister(tpacpi_uwb_rfkill);
+	tpacpi_destroy_rfkill(TPACPI_RFK_UWB_SW_ID);
 }
 
 static int __init uwb_init(struct ibm_init_struct *iibm)
@@ -3813,13 +3847,10 @@ static int __init uwb_init(struct ibm_init_struct *iibm)
 		return 1;
 
 	res = tpacpi_new_rfkill(TPACPI_RFK_UWB_SW_ID,
-				&tpacpi_uwb_rfkill,
+				&uwb_tprfk_ops,
 				RFKILL_TYPE_UWB,
 				TPACPI_RFK_UWB_SW_NAME,
-				false,
-				tpacpi_uwb_rfk_set,
-				tpacpi_uwb_rfk_get);
-
+				false);
 	return res;
 }
 
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 4345089f517..81d31ea507d 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -45,7 +45,6 @@
 #include <linux/backlight.h>
 #include <linux/platform_device.h>
 #include <linux/rfkill.h>
-#include <linux/input-polldev.h>
 
 #include <asm/uaccess.h>
 
@@ -250,21 +249,15 @@ static acpi_status hci_read2(u32 reg, u32 *out1, u32 *out2, u32 *result)
 
 struct toshiba_acpi_dev {
 	struct platform_device *p_dev;
-	struct rfkill *rfk_dev;
-	struct input_polled_dev *poll_dev;
+	struct rfkill *bt_rfk;
 
 	const char *bt_name;
-	const char *rfk_name;
-
-	bool last_rfk_state;
 
 	struct mutex mutex;
 };
 
 static struct toshiba_acpi_dev toshiba_acpi = {
 	.bt_name = "Toshiba Bluetooth",
-	.rfk_name = "Toshiba RFKill Switch",
-	.last_rfk_state = false,
 };
 
 /* Bluetooth rfkill handlers */
@@ -283,21 +276,6 @@ static u32 hci_get_bt_present(bool *present)
 	return hci_result;
 }
 
-static u32 hci_get_bt_on(bool *on)
-{
-	u32 hci_result;
-	u32 value, value2;
-
-	value = 0;
-	value2 = 0x0001;
-	hci_read2(HCI_WIRELESS, &value, &value2, &hci_result);
-	if (hci_result == HCI_SUCCESS)
-		*on = (value & HCI_WIRELESS_BT_POWER) &&
-		      (value & HCI_WIRELESS_BT_ATTACH);
-
-	return hci_result;
-}
-
 static u32 hci_get_radio_state(bool *radio_state)
 {
 	u32 hci_result;
@@ -311,70 +289,67 @@ static u32 hci_get_radio_state(bool *radio_state)
 	return hci_result;
 }
 
-static int bt_rfkill_toggle_radio(void *data, enum rfkill_state state)
+static int bt_rfkill_set_block(void *data, bool blocked)
 {
+	struct toshiba_acpi_dev *dev = data;
 	u32 result1, result2;
 	u32 value;
+	int err;
 	bool radio_state;
-	struct toshiba_acpi_dev *dev = data;
 
-	value = (state == RFKILL_STATE_UNBLOCKED);
+	value = (blocked == false);
 
-	if (hci_get_radio_state(&radio_state) != HCI_SUCCESS)
-		return -EFAULT;
+	mutex_lock(&dev->mutex);
+	if (hci_get_radio_state(&radio_state) != HCI_SUCCESS) {
+		err = -EBUSY;
+		goto out;
+	}
 
-	switch (state) {
-	case RFKILL_STATE_UNBLOCKED:
-		if (!radio_state)
-			return -EPERM;
-		break;
-	case RFKILL_STATE_SOFT_BLOCKED:
-		break;
-	default:
-		return -EINVAL;
+	if (!radio_state) {
+		err = 0;
+		goto out;
 	}
 
-	mutex_lock(&dev->mutex);
 	hci_write2(HCI_WIRELESS, value, HCI_WIRELESS_BT_POWER, &result1);
 	hci_write2(HCI_WIRELESS, value, HCI_WIRELESS_BT_ATTACH, &result2);
-	mutex_unlock(&dev->mutex);
 
 	if (result1 != HCI_SUCCESS || result2 != HCI_SUCCESS)
-		return -EFAULT;
-
-	return 0;
+		err = -EBUSY;
+	else
+		err = 0;
+ out:
+	mutex_unlock(&dev->mutex);
+	return err;
 }
 
-static void bt_poll_rfkill(struct input_polled_dev *poll_dev)
+static void bt_rfkill_poll(struct rfkill *rfkill, void *data)
 {
-	bool state_changed;
 	bool new_rfk_state;
 	bool value;
 	u32 hci_result;
-	struct toshiba_acpi_dev *dev = poll_dev->private;
+	struct toshiba_acpi_dev *dev = data;
+
+	mutex_lock(&dev->mutex);
 
 	hci_result = hci_get_radio_state(&value);
-	if (hci_result != HCI_SUCCESS)
-		return; /* Can't do anything useful */
+	if (hci_result != HCI_SUCCESS) {
+		/* Can't do anything useful */
+		mutex_unlock(&dev->mutex);
+	}
 
 	new_rfk_state = value;
 
-	mutex_lock(&dev->mutex);
-	state_changed = new_rfk_state != dev->last_rfk_state;
-	dev->last_rfk_state = new_rfk_state;
 	mutex_unlock(&dev->mutex);
 
-	if (unlikely(state_changed)) {
-		rfkill_force_state(dev->rfk_dev,
-				   new_rfk_state ?
-				   RFKILL_STATE_SOFT_BLOCKED :
-				   RFKILL_STATE_HARD_BLOCKED);
-		input_report_switch(poll_dev->input, SW_RFKILL_ALL,
-				    new_rfk_state);
-		input_sync(poll_dev->input);
-	}
+	if (rfkill_set_hw_state(rfkill, !new_rfk_state))
+		bt_rfkill_set_block(data, true);
 }
 
+static const struct rfkill_ops toshiba_rfk_ops = {
+	.set_block = bt_rfkill_set_block,
+	.poll = bt_rfkill_poll,
+};
+
 static struct proc_dir_entry *toshiba_proc_dir /*= 0*/ ;
 static struct backlight_device *toshiba_backlight_device;
 static int force_fan;
@@ -702,14 +677,11 @@ static struct backlight_ops toshiba_backlight_data = {
 
 static void toshiba_acpi_exit(void)
 {
-	if (toshiba_acpi.poll_dev) {
-		input_unregister_polled_device(toshiba_acpi.poll_dev);
-		input_free_polled_device(toshiba_acpi.poll_dev);
+	if (toshiba_acpi.bt_rfk) {
+		rfkill_unregister(toshiba_acpi.bt_rfk);
+		rfkill_destroy(toshiba_acpi.bt_rfk);
 	}
 
-	if (toshiba_acpi.rfk_dev)
-		rfkill_unregister(toshiba_acpi.rfk_dev);
-
 	if (toshiba_backlight_device)
 		backlight_device_unregister(toshiba_backlight_device);
 
@@ -728,8 +700,6 @@ static int __init toshiba_acpi_init(void)
 	acpi_status status = AE_OK;
 	u32 hci_result;
 	bool bt_present;
-	bool bt_on;
-	bool radio_on;
 	int ret = 0;
 
 	if (acpi_disabled)
@@ -793,60 +763,21 @@ static int __init toshiba_acpi_init(void)
 
 	/* Register rfkill switch for Bluetooth */
 	if (hci_get_bt_present(&bt_present) == HCI_SUCCESS && bt_present) {
-		toshiba_acpi.rfk_dev = rfkill_allocate(&toshiba_acpi.p_dev->dev,
-							RFKILL_TYPE_BLUETOOTH);
-		if (!toshiba_acpi.rfk_dev) {
+		toshiba_acpi.bt_rfk = rfkill_alloc(toshiba_acpi.bt_name,
+						   &toshiba_acpi.p_dev->dev,
+						   RFKILL_TYPE_BLUETOOTH,
+						   &toshiba_rfk_ops,
+						   &toshiba_acpi);
+		if (!toshiba_acpi.bt_rfk) {
 			printk(MY_ERR "unable to allocate rfkill device\n");
 			toshiba_acpi_exit();
 			return -ENOMEM;
 		}
 
-		toshiba_acpi.rfk_dev->name = toshiba_acpi.bt_name;
-		toshiba_acpi.rfk_dev->toggle_radio = bt_rfkill_toggle_radio;
-		toshiba_acpi.rfk_dev->data = &toshiba_acpi;
-
-		if (hci_get_bt_on(&bt_on) == HCI_SUCCESS && bt_on) {
-			toshiba_acpi.rfk_dev->state = RFKILL_STATE_UNBLOCKED;
-		} else if (hci_get_radio_state(&radio_on) == HCI_SUCCESS &&
-			   radio_on) {
-			toshiba_acpi.rfk_dev->state = RFKILL_STATE_SOFT_BLOCKED;
-		} else {
-			toshiba_acpi.rfk_dev->state = RFKILL_STATE_HARD_BLOCKED;
-		}
-
-		ret = rfkill_register(toshiba_acpi.rfk_dev);
+		ret = rfkill_register(toshiba_acpi.bt_rfk);
 		if (ret) {
 			printk(MY_ERR "unable to register rfkill device\n");
-			toshiba_acpi_exit();
-			return -ENOMEM;
-		}
-
-		/* Register input device for kill switch */
-		toshiba_acpi.poll_dev = input_allocate_polled_device();
-		if (!toshiba_acpi.poll_dev) {
-			printk(MY_ERR
-			       "unable to allocate kill-switch input device\n");
-			toshiba_acpi_exit();
-			return -ENOMEM;
-		}
-		toshiba_acpi.poll_dev->private = &toshiba_acpi;
-		toshiba_acpi.poll_dev->poll = bt_poll_rfkill;
-		toshiba_acpi.poll_dev->poll_interval = 1000; /* msecs */
-
-		toshiba_acpi.poll_dev->input->name = toshiba_acpi.rfk_name;
-		toshiba_acpi.poll_dev->input->id.bustype = BUS_HOST;
-		/* Toshiba USB ID */
-		toshiba_acpi.poll_dev->input->id.vendor = 0x0930;
-		set_bit(EV_SW, toshiba_acpi.poll_dev->input->evbit);
-		set_bit(SW_RFKILL_ALL, toshiba_acpi.poll_dev->input->swbit);
-		input_report_switch(toshiba_acpi.poll_dev->input,
-				    SW_RFKILL_ALL, TRUE);
-		input_sync(toshiba_acpi.poll_dev->input);
-
-		ret = input_register_polled_device(toshiba_acpi.poll_dev);
-		if (ret) {
-			printk(MY_ERR
-			       "unable to register kill-switch input device\n");
+			rfkill_destroy(toshiba_acpi.bt_rfk);
 			toshiba_acpi_exit();
 			return ret;
 		}
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 3f0eaa397ef..7e09c5c1ed0 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -311,6 +311,7 @@ unifdef-y += ptrace.h
 unifdef-y += qnx4_fs.h
 unifdef-y += quota.h
 unifdef-y += random.h
+unifdef-y += rfkill.h
 unifdef-y += irqnr.h
 unifdef-y += reboot.h
 unifdef-y += reiserfs_fs.h
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index de18ef227e0..090852c8de7 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -4,6 +4,7 @@
 /*
  * Copyright (C) 2006 - 2007 Ivo van Doorn
  * Copyright (C) 2007 Dmitry Torokhov
+ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -21,6 +22,24 @@
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
+
+/* define userspace visible states */
+#define RFKILL_STATE_SOFT_BLOCKED	0
+#define RFKILL_STATE_UNBLOCKED		1
+#define RFKILL_STATE_HARD_BLOCKED	2
+
+/* and that's all userspace gets */
+#ifdef __KERNEL__
+/* don't allow anyone to use these in the kernel */
+enum rfkill_user_states {
+	RFKILL_USER_STATE_SOFT_BLOCKED	= RFKILL_STATE_SOFT_BLOCKED,
+	RFKILL_USER_STATE_UNBLOCKED	= RFKILL_STATE_UNBLOCKED,
+	RFKILL_USER_STATE_HARD_BLOCKED	= RFKILL_STATE_HARD_BLOCKED,
+};
+#undef RFKILL_STATE_SOFT_BLOCKED
+#undef RFKILL_STATE_UNBLOCKED
+#undef RFKILL_STATE_HARD_BLOCKED
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -30,109 +49,267 @@
 
 /**
  * enum rfkill_type - type of rfkill switch.
- * RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device.
- * RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device.
- * RFKILL_TYPE_UWB: switch is on a ultra wideband device.
- * RFKILL_TYPE_WIMAX: switch is on a WiMAX device.
- * RFKILL_TYPE_WWAN: switch is on a wireless WAN device.
+ *
+ * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device.
+ * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device.
+ * @RFKILL_TYPE_UWB: switch is on a ultra wideband device.
+ * @RFKILL_TYPE_WIMAX: switch is on a WiMAX device.
+ * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device.
+ * @NUM_RFKILL_TYPES: number of defined rfkill types
  */
 enum rfkill_type {
-	RFKILL_TYPE_WLAN ,
+	RFKILL_TYPE_WLAN,
 	RFKILL_TYPE_BLUETOOTH,
 	RFKILL_TYPE_UWB,
 	RFKILL_TYPE_WIMAX,
 	RFKILL_TYPE_WWAN,
-	RFKILL_TYPE_MAX,
+	NUM_RFKILL_TYPES,
 };
 
-enum rfkill_state {
-	RFKILL_STATE_SOFT_BLOCKED = 0,	/* Radio output blocked */
-	RFKILL_STATE_UNBLOCKED    = 1,	/* Radio output allowed */
-	RFKILL_STATE_HARD_BLOCKED = 2,	/* Output blocked, non-overrideable */
-	RFKILL_STATE_MAX,		/* marker for last valid state */
+/* this is opaque */
+struct rfkill;
+
+/**
+ * struct rfkill_ops - rfkill driver methods
+ *
+ * @poll: poll the rfkill block state(s) -- only assign this method
+ *	when you need polling. When called, simply call one of the
+ *	rfkill_set{,_hw,_sw}_state family of functions. If the hw
+ *	is getting unblocked you need to take into account the return
+ *	value of those functions to make sure the software block is
+ *	properly used.
+ * @query: query the rfkill block state(s) and call exactly one of the
+ *	rfkill_set{,_hw,_sw}_state family of functions. Assign this
+ *	method if input events can cause hardware state changes to make
+ *	the rfkill core query your driver before setting a requested
+ *	block.
+ * @set_block: turn the transmitter on (blocked == false) or off
+ *	(blocked == true) -- this is called only while the transmitter
+ *	is not hard-blocked, but note that the core's view of whether
+ *	the transmitter is hard-blocked might differ from your driver's
+ *	view due to race conditions, so it is possible that it is still
+ *	called at the same time as you are calling rfkill_set_hw_state().
+ *	This callback must be assigned.
+ */
+struct rfkill_ops {
+	void	(*poll)(struct rfkill *rfkill, void *data);
+	void	(*query)(struct rfkill *rfkill, void *data);
+	int	(*set_block)(void *data, bool blocked);
 };
 
+#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
 /**
- * struct rfkill - rfkill control structure.
- * @name: Name of the switch.
- * @type: Radio type which the button controls, the value stored
- *	here should be a value from enum rfkill_type.
- * @state: State of the switch, "UNBLOCKED" means radio can operate.
- * @mutex: Guards switch state transitions.  It serializes callbacks
- *	and also protects the state.
- * @data: Pointer to the RF button drivers private data which will be
- *	passed along when toggling radio state.
- * @toggle_radio(): Mandatory handler to control state of the radio.
- *	only RFKILL_STATE_SOFT_BLOCKED and RFKILL_STATE_UNBLOCKED are
- *	valid parameters.
- * @get_state(): handler to read current radio state from hardware,
- *      may be called from atomic context, should return 0 on success.
- *      Either this handler OR judicious use of rfkill_force_state() is
- *      MANDATORY for any driver capable of RFKILL_STATE_HARD_BLOCKED.
- * @led_trigger: A LED trigger for this button's LED.
- * @dev: Device structure integrating the switch into device tree.
- * @node: Used to place switch into list of all switches known to the
- *	the system.
- *
- * This structure represents a RF switch located on a network device.
+ * rfkill_alloc - allocate rfkill structure
+ * @name: name of the struct -- the string is not copied internally
+ * @parent: device that has rf switch on it
+ * @type: type of the switch (RFKILL_TYPE_*)
+ * @ops: rfkill methods
+ * @ops_data: data passed to each method
+ *
+ * This function should be called by the transmitter driver to allocate an
+ * rfkill structure. Returns %NULL on failure.
  */
-struct rfkill {
-	const char *name;
-	enum rfkill_type type;
-
-	/* the mutex serializes callbacks and also protects
-	 * the state */
-	struct mutex mutex;
-	enum rfkill_state state;
-	void *data;
-	int (*toggle_radio)(void *data, enum rfkill_state state);
-	int (*get_state)(void *data, enum rfkill_state *state);
+struct rfkill * __must_check rfkill_alloc(const char *name,
+					  struct device *parent,
+					  const enum rfkill_type type,
+					  const struct rfkill_ops *ops,
+					  void *ops_data);
 
-#ifdef CONFIG_RFKILL_LEDS
-	struct led_trigger led_trigger;
-#endif
+/**
+ * rfkill_register - Register a rfkill structure.
+ * @rfkill: rfkill structure to be registered
+ *
+ * This function should be called by the transmitter driver to register
+ * the rfkill structure needs to be registered. Before calling this function
+ * the driver needs to be ready to service method calls from rfkill.
+ */
+int __must_check rfkill_register(struct rfkill *rfkill);
 
-	struct device dev;
-	struct list_head node;
-	enum rfkill_state state_for_resume;
-};
-#define to_rfkill(d)	container_of(d, struct rfkill, dev)
+/**
+ * rfkill_pause_polling(struct rfkill *rfkill)
+ *
+ * Pause polling -- say transmitter is off for other reasons.
+ * NOTE: not necessary for suspend/resume -- in that case the
+ * core stops polling anyway
+ */
+void rfkill_pause_polling(struct rfkill *rfkill);
 
-struct rfkill * __must_check rfkill_allocate(struct device *parent,
-					     enum rfkill_type type);
-void rfkill_free(struct rfkill *rfkill);
-int __must_check rfkill_register(struct rfkill *rfkill);
+/**
+ * rfkill_resume_polling(struct rfkill *rfkill)
+ *
+ * Pause polling -- say transmitter is off for other reasons.
+ * NOTE: not necessary for suspend/resume -- in that case the
+ * core stops polling anyway
+ */
+void rfkill_resume_polling(struct rfkill *rfkill);
+
+
+/**
+ * rfkill_unregister - Unregister a rfkill structure.
+ * @rfkill: rfkill structure to be unregistered
+ *
+ * This function should be called by the network driver during device
+ * teardown to destroy rfkill structure. Until it returns, the driver
+ * needs to be able to service method calls.
+ */
 void rfkill_unregister(struct rfkill *rfkill);
 
-int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state);
-int rfkill_set_default(enum rfkill_type type, enum rfkill_state state);
+/**
+ * rfkill_destroy - free rfkill structure
+ * @rfkill: rfkill structure to be destroyed
+ *
+ * Destroys the rfkill structure.
+ */
+void rfkill_destroy(struct rfkill *rfkill);
+
+/**
+ * rfkill_set_hw_state - Set the internal rfkill hardware block state
+ * @rfkill: pointer to the rfkill class to modify.
+ * @state: the current hardware block state to set
+ *
+ * rfkill drivers that get events when the hard-blocked state changes
+ * use this function to notify the rfkill core (and through that also
+ * userspace) of the current state -- they should also use this after
+ * resume if the state could have changed.
+ *
+ * You need not (but may) call this function if poll_state is assigned.
+ *
+ * This function can be called in any context, even from within rfkill
+ * callbacks.
+ *
+ * The function returns the combined block state (true if transmitter
+ * should be blocked) so that drivers need not keep track of the soft
+ * block state -- which they might not be able to.
+ */
+bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
+
+/**
+ * rfkill_set_sw_state - Set the internal rfkill software block state
+ * @rfkill: pointer to the rfkill class to modify.
+ * @state: the current software block state to set
+ *
+ * rfkill drivers that get events when the soft-blocked state changes
+ * (yes, some platforms directly act on input but allow changing again)
+ * use this function to notify the rfkill core (and through that also
+ * userspace) of the current state -- they should also use this after
+ * resume if the state could have changed.
+ *
+ * This function can be called in any context, even from within rfkill
+ * callbacks.
+ *
+ * The function returns the combined block state (true if transmitter
+ * should be blocked).
+ */
+bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
+
+/**
+ * rfkill_set_states - Set the internal rfkill block states
+ * @rfkill: pointer to the rfkill class to modify.
+ * @sw: the current software block state to set
+ * @hw: the current hardware block state to set
+ *
+ * This function can be called in any context, even from within rfkill
+ * callbacks.
+ */
+void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
 
 /**
- * rfkill_state_complement - return complementar state
- * @state: state to return the complement of
+ * rfkill_set_global_sw_state - set global sw block default
+ * @type: rfkill type to set default for
+ * @blocked: default to set
  *
- * Returns RFKILL_STATE_SOFT_BLOCKED if @state is RFKILL_STATE_UNBLOCKED,
- * returns RFKILL_STATE_UNBLOCKED otherwise.
+ * This function sets the global default -- use at boot if your platform has
+ * an rfkill switch. If not early enough this call may be ignored.
+ *
+ * XXX: instead of ignoring -- how about just updating all currently
+ *	registered drivers?
  */
-static inline enum rfkill_state rfkill_state_complement(enum rfkill_state state)
+void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked);
+#else /* !RFKILL */
+static inline struct rfkill * __must_check
+rfkill_alloc(const char *name,
+	     struct device *parent,
+	     const enum rfkill_type type,
+	     const struct rfkill_ops *ops,
+	     void *ops_data)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline int __must_check rfkill_register(struct rfkill *rfkill)
+{
+	if (rfkill == ERR_PTR(-ENODEV))
+		return 0;
+	return -EINVAL;
+}
+
+static inline void rfkill_pause_polling(struct rfkill *rfkill)
+{
+}
+
+static inline void rfkill_resume_polling(struct rfkill *rfkill)
+{
+}
+
+static inline void rfkill_unregister(struct rfkill *rfkill)
+{
+}
+
+static inline void rfkill_destroy(struct rfkill *rfkill)
+{
+}
+
+static inline bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
+{
+	return blocked;
+}
+
+static inline bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
+{
+	return blocked;
+}
+
+static inline void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
+{
+}
+
+static inline void rfkill_set_global_sw_state(const enum rfkill_type type,
+					      bool blocked)
 {
-	return (state == RFKILL_STATE_UNBLOCKED) ?
-		RFKILL_STATE_SOFT_BLOCKED : RFKILL_STATE_UNBLOCKED;
 }
+#endif /* RFKILL || RFKILL_MODULE */
+
 
+#ifdef CONFIG_RFKILL_LEDS
 /**
- * rfkill_get_led_name - Get the LED trigger name for the button's LED.
+ * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED.
  * This function might return a NULL pointer if registering of the
- * LED trigger failed.
- * Use this as "default_trigger" for the LED.
+ * LED trigger failed. Use this as "default_trigger" for the LED.
  */
-static inline char *rfkill_get_led_name(struct rfkill *rfkill)
-{
-#ifdef CONFIG_RFKILL_LEDS
-	return (char *)(rfkill->led_trigger.name);
+const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
+
+/**
+ * rfkill_set_led_trigger_name -- set the LED trigger name
+ * @rfkill: rfkill struct
+ * @name: LED trigger name
+ *
+ * This function sets the LED trigger name of the radio LED
+ * trigger that rfkill creates. It is optional, but if called
+ * must be called before rfkill_register() to be effective.
+ */
+void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name);
 #else
+static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
+{
 	return NULL;
-#endif
 }
 
+static inline void
+rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
+{
+}
+#endif
+
+#endif /* __KERNEL__ */
+
 #endif /* RFKILL_H */
diff --git a/include/net/wimax.h b/include/net/wimax.h
index 6b3824edb39..2af7bf839f2 100644
--- a/include/net/wimax.h
+++ b/include/net/wimax.h
@@ -253,7 +253,6 @@
 struct net_device;
 struct genl_info;
 struct wimax_dev;
-struct input_dev;
 
 /**
  * struct wimax_dev - Generic WiMAX device
@@ -293,8 +292,8 @@ struct input_dev;
  *     See wimax_reset()'s documentation.
  *
  * @name: [fill] A way to identify this device. We need to register a
- *     name with many subsystems (input for RFKILL, workqueue
- *     creation, etc). We can't use the network device name as that
+ *     name with many subsystems (rfkill, workqueue creation, etc).
+ *     We can't use the network device name as that
  *     might change and in some instances we don't know it yet (until
  *     we don't call register_netdev()). So we generate an unique one
  *     using the driver name and device bus id, place it here and use
@@ -316,9 +315,6 @@ struct input_dev;
  *
  * @rfkill: [private] integration into the RF-Kill infrastructure.
  *
- * @rfkill_input: [private] virtual input device to process the
- *     hardware RF Kill switches.
- *
  * @rf_sw: [private] State of the software radio switch (OFF/ON)
  *
  * @rf_hw: [private] State of the hardware radio switch (OFF/ON)
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 7f807b30cfb..b47f72fae05 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -10,22 +10,15 @@ menuconfig RFKILL
 	  To compile this driver as a module, choose M here: the
 	  module will be called rfkill.
 
-config RFKILL_INPUT
-	tristate "Input layer to RF switch connector"
-	depends on RFKILL && INPUT
-	help
-	  Say Y here if you want kernel automatically toggle state
-	  of RF switches on and off when user presses appropriate
-	  button or a key on the keyboard. Without this module you
-	  need a some kind of userspace application to control
-	  state of the switches.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called rfkill-input.
-
 # LED trigger support
 config RFKILL_LEDS
 	bool
-	depends on RFKILL && LEDS_TRIGGERS
+	depends on RFKILL
+	depends on LEDS_TRIGGERS = y || RFKILL = LEDS_TRIGGERS
 	default y
 
+config RFKILL_INPUT
+	bool
+	depends on RFKILL
+	depends on INPUT = y || RFKILL = INPUT
+	default y
diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile
index b38c430be05..66210535269 100644
--- a/net/rfkill/Makefile
+++ b/net/rfkill/Makefile
@@ -2,5 +2,6 @@
 # Makefile for the RF switch subsystem.
 #
 
-obj-$(CONFIG_RFKILL)			+= rfkill.o
-obj-$(CONFIG_RFKILL_INPUT)		+= rfkill-input.o
+rfkill-y			+= core.o
+rfkill-$(CONFIG_RFKILL_INPUT)	+= input.o
+obj-$(CONFIG_RFKILL)		+= rfkill.o
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
new file mode 100644
index 00000000000..30a6f8d819b
--- /dev/null
+++ b/net/rfkill/core.c
@@ -0,0 +1,896 @@
+/*
+ * Copyright (C) 2006 - 2007 Ivo van Doorn
+ * Copyright (C) 2007 Dmitry Torokhov
+ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/workqueue.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rfkill.h>
+#include <linux/spinlock.h>
+
+#include "rfkill.h"
+
+#define POLL_INTERVAL		(5 * HZ)
+
+#define RFKILL_BLOCK_HW		BIT(0)
+#define RFKILL_BLOCK_SW		BIT(1)
+#define RFKILL_BLOCK_SW_PREV	BIT(2)
+#define RFKILL_BLOCK_ANY	(RFKILL_BLOCK_HW |\
+				 RFKILL_BLOCK_SW |\
+				 RFKILL_BLOCK_SW_PREV)
+#define RFKILL_BLOCK_SW_SETCALL	BIT(31)
+
+struct rfkill {
+	spinlock_t		lock;
+
+	const char		*name;
+	enum rfkill_type	type;
+
+	unsigned long		state;
+
+	bool			registered;
+	bool			suspended;
+
+	const struct rfkill_ops	*ops;
+	void			*data;
+
+#ifdef CONFIG_RFKILL_LEDS
+	struct led_trigger	led_trigger;
+	const char		*ledtrigname;
+#endif
+
+	struct device		dev;
+	struct list_head	node;
+
+	struct delayed_work	poll_work;
+	struct work_struct	uevent_work;
+	struct work_struct	sync_work;
+};
+#define to_rfkill(d)	container_of(d, struct rfkill, dev)
+
+
+
+MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_DESCRIPTION("RF switch support");
+MODULE_LICENSE("GPL");
+
+
+/*
+ * The locking here should be made much smarter, we currently have
+ * a bit of a stupid situation because drivers might want to register
+ * the rfkill struct under their own lock, and take this lock during
+ * rfkill method calls -- which will cause an AB-BA deadlock situation.
+ *
+ * To fix that, we need to rework this code here to be mostly lock-free
+ * and only use the mutex for list manipulations, not to protect the
+ * various other global variables. Then we can avoid holding the mutex
+ * around driver operations, and all is happy.
+ */
+static LIST_HEAD(rfkill_list);	/* list of registered rf switches */
+static DEFINE_MUTEX(rfkill_global_mutex);
+
+static unsigned int rfkill_default_state = 1;
+module_param_named(default_state, rfkill_default_state, uint, 0444);
+MODULE_PARM_DESC(default_state,
+		 "Default initial state for all radio types, 0 = radio off");
+
+static struct {
+	bool cur, def;
+} rfkill_global_states[NUM_RFKILL_TYPES];
+
+static unsigned long rfkill_states_default_locked;
+
+static bool rfkill_epo_lock_active;
+
+
+#ifdef CONFIG_RFKILL_LEDS
+static void rfkill_led_trigger_event(struct rfkill *rfkill)
+{
+	struct led_trigger *trigger;
+
+	if (!rfkill->registered)
+		return;
+
+	trigger = &rfkill->led_trigger;
+
+	if (rfkill->state & RFKILL_BLOCK_ANY)
+		led_trigger_event(trigger, LED_OFF);
+	else
+		led_trigger_event(trigger, LED_FULL);
+}
+
+static void rfkill_led_trigger_activate(struct led_classdev *led)
+{
+	struct rfkill *rfkill;
+
+	rfkill = container_of(led->trigger, struct rfkill, led_trigger);
+
+	rfkill_led_trigger_event(rfkill);
+}
+
+const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
+{
+	return rfkill->led_trigger.name;
+}
+EXPORT_SYMBOL(rfkill_get_led_trigger_name);
+
+void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
+{
+	BUG_ON(!rfkill);
+
+	rfkill->ledtrigname = name;
+}
+EXPORT_SYMBOL(rfkill_set_led_trigger_name);
+
+static int rfkill_led_trigger_register(struct rfkill *rfkill)
+{
+	rfkill->led_trigger.name = rfkill->ledtrigname
+					? : dev_name(&rfkill->dev);
+	rfkill->led_trigger.activate = rfkill_led_trigger_activate;
+	return led_trigger_register(&rfkill->led_trigger);
+}
+
+static void rfkill_led_trigger_unregister(struct rfkill *rfkill)
+{
+	led_trigger_unregister(&rfkill->led_trigger);
+}
+#else
+static void rfkill_led_trigger_event(struct rfkill *rfkill)
+{
+}
+
+static inline int rfkill_led_trigger_register(struct rfkill *rfkill)
+{
+	return 0;
+}
+
+static inline void rfkill_led_trigger_unregister(struct rfkill *rfkill)
+{
+}
+#endif /* CONFIG_RFKILL_LEDS */
+
+static void rfkill_uevent(struct rfkill *rfkill)
+{
+	if (!rfkill->registered || rfkill->suspended)
+		return;
+
+	kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
+}
+
+static bool __rfkill_set_hw_state(struct rfkill *rfkill,
+				  bool blocked, bool *change)
+{
+	unsigned long flags;
+	bool prev, any;
+
+	BUG_ON(!rfkill);
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	prev = !!(rfkill->state & RFKILL_BLOCK_HW);
+	if (blocked)
+		rfkill->state |= RFKILL_BLOCK_HW;
+	else
+		rfkill->state &= ~RFKILL_BLOCK_HW;
+	*change = prev != blocked;
+	any = rfkill->state & RFKILL_BLOCK_ANY;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	rfkill_led_trigger_event(rfkill);
+
+	return any;
+}
+
+/**
+ * rfkill_set_block - wrapper for set_block method
+ *
+ * @rfkill: the rfkill struct to use
+ * @blocked: the new software state
+ *
+ * Calls the set_block method (when applicable) and handles notifications
+ * etc. as well.
+ */
+static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
+{
+	unsigned long flags;
+	int err;
+
+	/*
+	 * Some platforms (...!) generate input events which affect the
+	 * _hard_ kill state -- whenever something tries to change the
+	 * current software state query the hardware state too.
+	 */
+	if (rfkill->ops->query)
+		rfkill->ops->query(rfkill, rfkill->data);
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	if (rfkill->state & RFKILL_BLOCK_SW)
+		rfkill->state |= RFKILL_BLOCK_SW_PREV;
+	else
+		rfkill->state &= ~RFKILL_BLOCK_SW_PREV;
+
+	if (blocked)
+		rfkill->state |= RFKILL_BLOCK_SW;
+	else
+		rfkill->state &= ~RFKILL_BLOCK_SW;
+
+	rfkill->state |= RFKILL_BLOCK_SW_SETCALL;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP))
+		return;
+
+	err = rfkill->ops->set_block(rfkill->data, blocked);
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	if (err) {
+		/*
+		 * Failed -- reset status to _prev, this may be different
+		 * from what set set _PREV to earlier in this function
+		 * if rfkill_set_sw_state was invoked.
+		 */
+		if (rfkill->state & RFKILL_BLOCK_SW_PREV)
+			rfkill->state |= RFKILL_BLOCK_SW;
+		else
+			rfkill->state &= ~RFKILL_BLOCK_SW;
+	}
+	rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL;
+	rfkill->state &= ~RFKILL_BLOCK_SW_PREV;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	rfkill_led_trigger_event(rfkill);
+	rfkill_uevent(rfkill);
+}
+
+/**
+ * __rfkill_switch_all - Toggle state of all switches of given type
+ * @type: type of interfaces to be affected
+ * @state: the new state
+ *
+ * This function sets the state of all switches of given type,
+ * unless a specific switch is claimed by userspace (in which case,
+ * that switch is left alone) or suspended.
+ *
+ * Caller must have acquired rfkill_global_mutex.
+ */
+static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
+{
+	struct rfkill *rfkill;
+
+	rfkill_global_states[type].cur = blocked;
+	list_for_each_entry(rfkill, &rfkill_list, node) {
+		if (rfkill->type != type)
+			continue;
+
+		rfkill_set_block(rfkill, blocked);
+	}
+}
+
+/**
+ * rfkill_switch_all - Toggle state of all switches of given type
+ * @type: type of interfaces to be affected
+ * @state: the new state
+ *
+ * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
+ * Please refer to __rfkill_switch_all() for details.
+ *
+ * Does nothing if the EPO lock is active.
+ */
+void rfkill_switch_all(enum rfkill_type type, bool blocked)
+{
+	mutex_lock(&rfkill_global_mutex);
+
+	if (!rfkill_epo_lock_active)
+		__rfkill_switch_all(type, blocked);
+
+	mutex_unlock(&rfkill_global_mutex);
+}
+
+/**
+ * rfkill_epo - emergency power off all transmitters
+ *
+ * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED,
+ * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex.
+ *
+ * The global state before the EPO is saved and can be restored later
+ * using rfkill_restore_states().
+ */
+void rfkill_epo(void)
+{
+	struct rfkill *rfkill;
+	int i;
+
+	mutex_lock(&rfkill_global_mutex);
+
+	rfkill_epo_lock_active = true;
+	list_for_each_entry(rfkill, &rfkill_list, node)
+		rfkill_set_block(rfkill, true);
+
+	for (i = 0; i < NUM_RFKILL_TYPES; i++) {
+		rfkill_global_states[i].def = rfkill_global_states[i].cur;
+		rfkill_global_states[i].cur = true;
+	}
+	mutex_unlock(&rfkill_global_mutex);
+}
+
+/**
+ * rfkill_restore_states - restore global states
+ *
+ * Restore (and sync switches to) the global state from the
+ * states in rfkill_default_states.  This can undo the effects of
+ * a call to rfkill_epo().
+ */
+void rfkill_restore_states(void)
+{
+	int i;
+
+	mutex_lock(&rfkill_global_mutex);
+
+	rfkill_epo_lock_active = false;
+	for (i = 0; i < NUM_RFKILL_TYPES; i++)
+		__rfkill_switch_all(i, rfkill_global_states[i].def);
+	mutex_unlock(&rfkill_global_mutex);
+}
+
+/**
+ * rfkill_remove_epo_lock - unlock state changes
+ *
+ * Used by rfkill-input manually unlock state changes, when
+ * the EPO switch is deactivated.
+ */
+void rfkill_remove_epo_lock(void)
+{
+	mutex_lock(&rfkill_global_mutex);
+	rfkill_epo_lock_active = false;
+	mutex_unlock(&rfkill_global_mutex);
+}
+
+/**
+ * rfkill_is_epo_lock_active - returns true EPO is active
+ *
+ * Returns 0 (false) if there is NOT an active EPO contidion,
+ * and 1 (true) if there is an active EPO contition, which
+ * locks all radios in one of the BLOCKED states.
+ *
+ * Can be called in atomic context.
+ */
+bool rfkill_is_epo_lock_active(void)
+{
+	return rfkill_epo_lock_active;
+}
+
+/**
+ * rfkill_get_global_sw_state - returns global state for a type
+ * @type: the type to get the global state of
+ *
+ * Returns the current global state for a given wireless
+ * device type.
+ */
+bool rfkill_get_global_sw_state(const enum rfkill_type type)
+{
+	return rfkill_global_states[type].cur;
+}
+
+void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked)
+{
+	mutex_lock(&rfkill_global_mutex);
+
+	/* don't allow unblock when epo */
+	if (rfkill_epo_lock_active && !blocked)
+		goto out;
+
+	/* too late */
+	if (rfkill_states_default_locked & BIT(type))
+		goto out;
+
+	rfkill_states_default_locked |= BIT(type);
+
+	rfkill_global_states[type].cur = blocked;
+	rfkill_global_states[type].def = blocked;
+ out:
+	mutex_unlock(&rfkill_global_mutex);
+}
+EXPORT_SYMBOL(rfkill_set_global_sw_state);
+
+
+bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
+{
+	bool ret, change;
+
+	ret = __rfkill_set_hw_state(rfkill, blocked, &change);
+
+	if (!rfkill->registered)
+		return ret;
+
+	if (change)
+		schedule_work(&rfkill->uevent_work);
+
+	return ret;
+}
+EXPORT_SYMBOL(rfkill_set_hw_state);
+
+static void __rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
+{
+	u32 bit = RFKILL_BLOCK_SW;
+
+	/* if in a ops->set_block right now, use other bit */
+	if (rfkill->state & RFKILL_BLOCK_SW_SETCALL)
+		bit = RFKILL_BLOCK_SW_PREV;
+
+	if (blocked)
+		rfkill->state |= bit;
+	else
+		rfkill->state &= ~bit;
+}
+
+bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
+{
+	unsigned long flags;
+	bool prev, hwblock;
+
+	BUG_ON(!rfkill);
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	prev = !!(rfkill->state & RFKILL_BLOCK_SW);
+	__rfkill_set_sw_state(rfkill, blocked);
+	hwblock = !!(rfkill->state & RFKILL_BLOCK_HW);
+	blocked = blocked || hwblock;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	if (!rfkill->registered)
+		return blocked;
+
+	if (prev != blocked && !hwblock)
+		schedule_work(&rfkill->uevent_work);
+
+	rfkill_led_trigger_event(rfkill);
+
+	return blocked;
+}
+EXPORT_SYMBOL(rfkill_set_sw_state);
+
+void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
+{
+	unsigned long flags;
+	bool swprev, hwprev;
+
+	BUG_ON(!rfkill);
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+
+	/*
+	 * No need to care about prev/setblock ... this is for uevent only
+	 * and that will get triggered by rfkill_set_block anyway.
+	 */
+	swprev = !!(rfkill->state & RFKILL_BLOCK_SW);
+	hwprev = !!(rfkill->state & RFKILL_BLOCK_HW);
+	__rfkill_set_sw_state(rfkill, sw);
+
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	if (!rfkill->registered)
+		return;
+
+	if (swprev != sw || hwprev != hw)
+		schedule_work(&rfkill->uevent_work);
+
+	rfkill_led_trigger_event(rfkill);
+}
+EXPORT_SYMBOL(rfkill_set_states);
+
+static ssize_t rfkill_name_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	return sprintf(buf, "%s\n", rfkill->name);
+}
+
+static const char *rfkill_get_type_str(enum rfkill_type type)
+{
+	switch (type) {
+	case RFKILL_TYPE_WLAN:
+		return "wlan";
+	case RFKILL_TYPE_BLUETOOTH:
+		return "bluetooth";
+	case RFKILL_TYPE_UWB:
+		return "ultrawideband";
+	case RFKILL_TYPE_WIMAX:
+		return "wimax";
+	case RFKILL_TYPE_WWAN:
+		return "wwan";
+	default:
+		BUG();
+	}
+
+	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_WWAN + 1);
+}
+
+static ssize_t rfkill_type_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
+}
+
+static u8 user_state_from_blocked(unsigned long state)
+{
+	if (state & RFKILL_BLOCK_HW)
+		return RFKILL_USER_STATE_HARD_BLOCKED;
+	if (state & RFKILL_BLOCK_SW)
+		return RFKILL_USER_STATE_SOFT_BLOCKED;
+
+	return RFKILL_USER_STATE_UNBLOCKED;
+}
+
+static ssize_t rfkill_state_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long flags;
+	u32 state;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	return sprintf(buf, "%d\n", user_state_from_blocked(state));
+}
+
+static ssize_t rfkill_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	/*
+	 * The intention was that userspace can only take control over
+	 * a given device when/if rfkill-input doesn't control it due
+	 * to user_claim. Since user_claim is currently unsupported,
+	 * we never support changing the state from userspace -- this
+	 * can be implemented again later.
+	 */
+
+	return -EPERM;
+}
+
+static ssize_t rfkill_claim_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	return sprintf(buf, "%d\n", 0);
+}
+
+static ssize_t rfkill_claim_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct device_attribute rfkill_dev_attrs[] = {
+	__ATTR(name, S_IRUGO, rfkill_name_show, NULL),
+	__ATTR(type, S_IRUGO, rfkill_type_show, NULL),
+	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
+	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
+	__ATTR_NULL
+};
+
+static void rfkill_release(struct device *dev)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	kfree(rfkill);
+}
+
+static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long flags;
+	u32 state;
+	int error;
+
+	error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name);
+	if (error)
+		return error;
+	error = add_uevent_var(env, "RFKILL_TYPE=%s",
+			       rfkill_get_type_str(rfkill->type));
+	if (error)
+		return error;
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+	error = add_uevent_var(env, "RFKILL_STATE=%d",
+			       user_state_from_blocked(state));
+	return error;
+}
+
+void rfkill_pause_polling(struct rfkill *rfkill)
+{
+	BUG_ON(!rfkill);
+
+	if (!rfkill->ops->poll)
+		return;
+
+	cancel_delayed_work_sync(&rfkill->poll_work);
+}
+EXPORT_SYMBOL(rfkill_pause_polling);
+
+void rfkill_resume_polling(struct rfkill *rfkill)
+{
+	BUG_ON(!rfkill);
+
+	if (!rfkill->ops->poll)
+		return;
+
+	schedule_work(&rfkill->poll_work.work);
+}
+EXPORT_SYMBOL(rfkill_resume_polling);
+
+static int rfkill_suspend(struct device *dev, pm_message_t state)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	rfkill_pause_polling(rfkill);
+
+	rfkill->suspended = true;
+
+	return 0;
+}
+
+static int rfkill_resume(struct device *dev)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	bool cur;
+
+	mutex_lock(&rfkill_global_mutex);
+	cur = rfkill_global_states[rfkill->type].cur;
+	rfkill_set_block(rfkill, cur);
+	mutex_unlock(&rfkill_global_mutex);
+
+	rfkill->suspended = false;
+
+	schedule_work(&rfkill->uevent_work);
+
+	rfkill_resume_polling(rfkill);
+
+	return 0;
+}
+
+static struct class rfkill_class = {
+	.name		= "rfkill",
+	.dev_release	= rfkill_release,
+	.dev_attrs	= rfkill_dev_attrs,
+	.dev_uevent	= rfkill_dev_uevent,
+	.suspend	= rfkill_suspend,
+	.resume		= rfkill_resume,
+};
+
+
+struct rfkill * __must_check rfkill_alloc(const char *name,
+					  struct device *parent,
+					  const enum rfkill_type type,
+					  const struct rfkill_ops *ops,
+					  void *ops_data)
+{
+	struct rfkill *rfkill;
+	struct device *dev;
+
+	if (WARN_ON(!ops))
+		return NULL;
+
+	if (WARN_ON(!ops->set_block))
+		return NULL;
+
+	if (WARN_ON(!name))
+		return NULL;
+
+	if (WARN_ON(type >= NUM_RFKILL_TYPES))
+		return NULL;
+
+	rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL);
+	if (!rfkill)
+		return NULL;
+
+	spin_lock_init(&rfkill->lock);
+	INIT_LIST_HEAD(&rfkill->node);
+	rfkill->type = type;
+	rfkill->name = name;
+	rfkill->ops = ops;
+	rfkill->data = ops_data;
+
+	dev = &rfkill->dev;
+	dev->class = &rfkill_class;
+	dev->parent = parent;
+	device_initialize(dev);
+
+	return rfkill;
+}
+EXPORT_SYMBOL(rfkill_alloc);
+
+static void rfkill_poll(struct work_struct *work)
+{
+	struct rfkill *rfkill;
+
+	rfkill = container_of(work, struct rfkill, poll_work.work);
+
+	/*
+	 * Poll hardware state -- driver will use one of the
+	 * rfkill_set{,_hw,_sw}_state functions and use its
+	 * return value to update the current status.
+	 */
+	rfkill->ops->poll(rfkill, rfkill->data);
+
+	schedule_delayed_work(&rfkill->poll_work,
+		round_jiffies_relative(POLL_INTERVAL));
+}
+
+static void rfkill_uevent_work(struct work_struct *work)
+{
+	struct rfkill *rfkill;
+
+	rfkill = container_of(work, struct rfkill, uevent_work);
+
+	rfkill_uevent(rfkill);
+}
+
+static void rfkill_sync_work(struct work_struct *work)
+{
+	struct rfkill *rfkill;
+	bool cur;
+
+	rfkill = container_of(work, struct rfkill, sync_work);
+
+	mutex_lock(&rfkill_global_mutex);
+	cur = rfkill_global_states[rfkill->type].cur;
+	rfkill_set_block(rfkill, cur);
+	mutex_unlock(&rfkill_global_mutex);
+}
+
+int __must_check rfkill_register(struct rfkill *rfkill)
+{
+	static unsigned long rfkill_no;
+	struct device *dev = &rfkill->dev;
+	int error;
+
+	BUG_ON(!rfkill);
+
+	mutex_lock(&rfkill_global_mutex);
+
+	if (rfkill->registered) {
+		error = -EALREADY;
+		goto unlock;
+	}
+
+	dev_set_name(dev, "rfkill%lu", rfkill_no);
+	rfkill_no++;
+
+	if (!(rfkill_states_default_locked & BIT(rfkill->type))) {
+		/* first of its kind */
+		BUILD_BUG_ON(NUM_RFKILL_TYPES >
+			sizeof(rfkill_states_default_locked) * 8);
+		rfkill_states_default_locked |= BIT(rfkill->type);
+		rfkill_global_states[rfkill->type].cur =
+			rfkill_global_states[rfkill->type].def;
+	}
+
+	list_add_tail(&rfkill->node, &rfkill_list);
+
+	error = device_add(dev);
+	if (error)
+		goto remove;
+
+	error = rfkill_led_trigger_register(rfkill);
+	if (error)
+		goto devdel;
+
+	rfkill->registered = true;
+
+	if (rfkill->ops->poll) {
+		INIT_DELAYED_WORK(&rfkill->poll_work, rfkill_poll);
+		schedule_delayed_work(&rfkill->poll_work,
+			round_jiffies_relative(POLL_INTERVAL));
+	}
+
+	INIT_WORK(&rfkill->uevent_work, rfkill_uevent_work);
+
+	INIT_WORK(&rfkill->sync_work, rfkill_sync_work);
+	schedule_work(&rfkill->sync_work);
+
+	mutex_unlock(&rfkill_global_mutex);
+	return 0;
+
+ devdel:
+	device_del(&rfkill->dev);
+ remove:
+	list_del_init(&rfkill->node);
+ unlock:
+	mutex_unlock(&rfkill_global_mutex);
+	return error;
+}
+EXPORT_SYMBOL(rfkill_register);
+
+void rfkill_unregister(struct rfkill *rfkill)
+{
+	BUG_ON(!rfkill);
+
+	if (rfkill->ops->poll)
+		cancel_delayed_work_sync(&rfkill->poll_work);
+
+	cancel_work_sync(&rfkill->uevent_work);
+	cancel_work_sync(&rfkill->sync_work);
+
+	rfkill->registered = false;
+
+	device_del(&rfkill->dev);
+
+	mutex_lock(&rfkill_global_mutex);
+	list_del_init(&rfkill->node);
+	mutex_unlock(&rfkill_global_mutex);
+
+	rfkill_led_trigger_unregister(rfkill);
+}
+EXPORT_SYMBOL(rfkill_unregister);
+
+void rfkill_destroy(struct rfkill *rfkill)
+{
+	if (rfkill)
+		put_device(&rfkill->dev);
+}
+EXPORT_SYMBOL(rfkill_destroy);
+
+
+static int __init rfkill_init(void)
+{
+	int error;
+	int i;
+
+	for (i = 0; i < NUM_RFKILL_TYPES; i++)
+		rfkill_global_states[i].def = !rfkill_default_state;
+
+	error = class_register(&rfkill_class);
+	if (error)
+		goto out;
+
+#ifdef CONFIG_RFKILL_INPUT
+	error = rfkill_handler_init();
+	if (error)
+		class_unregister(&rfkill_class);
+#endif
+
+ out:
+	return error;
+}
+subsys_initcall(rfkill_init);
+
+static void __exit rfkill_exit(void)
+{
+#ifdef CONFIG_RFKILL_INPUT
+	rfkill_handler_exit();
+#endif
+	class_unregister(&rfkill_class);
+}
+module_exit(rfkill_exit);
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
new file mode 100644
index 00000000000..a7295ad5f9c
--- /dev/null
+++ b/net/rfkill/input.c
@@ -0,0 +1,342 @@
+/*
+ * Input layer to RF Kill interface connector
+ *
+ * Copyright (c) 2007 Dmitry Torokhov
+ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * If you ever run into a situation in which you have a SW_ type rfkill
+ * input device, then you can revive code that was removed in the patch
+ * "rfkill-input: remove unused code".
+ */
+
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/init.h>
+#include <linux/rfkill.h>
+#include <linux/sched.h>
+
+#include "rfkill.h"
+
+enum rfkill_input_master_mode {
+	RFKILL_INPUT_MASTER_UNLOCK = 0,
+	RFKILL_INPUT_MASTER_RESTORE = 1,
+	RFKILL_INPUT_MASTER_UNBLOCKALL = 2,
+	NUM_RFKILL_INPUT_MASTER_MODES
+};
+
+/* Delay (in ms) between consecutive switch ops */
+#define RFKILL_OPS_DELAY 200
+
+static enum rfkill_input_master_mode rfkill_master_switch_mode =
+					RFKILL_INPUT_MASTER_UNBLOCKALL;
+module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0);
+MODULE_PARM_DESC(master_switch_mode,
+	"SW_RFKILL_ALL ON should: 0=do nothing (only unlock); 1=restore; 2=unblock all");
+
+static spinlock_t rfkill_op_lock;
+static bool rfkill_op_pending;
+static unsigned long rfkill_sw_pending[BITS_TO_LONGS(NUM_RFKILL_TYPES)];
+static unsigned long rfkill_sw_state[BITS_TO_LONGS(NUM_RFKILL_TYPES)];
+
+enum rfkill_sched_op {
+	RFKILL_GLOBAL_OP_EPO = 0,
+	RFKILL_GLOBAL_OP_RESTORE,
+	RFKILL_GLOBAL_OP_UNLOCK,
+	RFKILL_GLOBAL_OP_UNBLOCK,
+};
+
+static enum rfkill_sched_op rfkill_master_switch_op;
+static enum rfkill_sched_op rfkill_op;
+
+static void __rfkill_handle_global_op(enum rfkill_sched_op op)
+{
+	unsigned int i;
+
+	switch (op) {
+	case RFKILL_GLOBAL_OP_EPO:
+		rfkill_epo();
+		break;
+	case RFKILL_GLOBAL_OP_RESTORE:
+		rfkill_restore_states();
+		break;
+	case RFKILL_GLOBAL_OP_UNLOCK:
+		rfkill_remove_epo_lock();
+		break;
+	case RFKILL_GLOBAL_OP_UNBLOCK:
+		rfkill_remove_epo_lock();
+		for (i = 0; i < NUM_RFKILL_TYPES; i++)
+			rfkill_switch_all(i, false);
+		break;
+	default:
+		/* memory corruption or bug, fail safely */
+		rfkill_epo();
+		WARN(1, "Unknown requested operation %d! "
+			"rfkill Emergency Power Off activated\n",
+			op);
+	}
+}
+
+static void __rfkill_handle_normal_op(const enum rfkill_type type,
+				      const bool complement)
+{
+	bool blocked;
+
+	blocked = rfkill_get_global_sw_state(type);
+	if (complement)
+		blocked = !blocked;
+
+	rfkill_switch_all(type, blocked);
+}
+
+static void rfkill_op_handler(struct work_struct *work)
+{
+	unsigned int i;
+	bool c;
+
+	spin_lock_irq(&rfkill_op_lock);
+	do {
+		if (rfkill_op_pending) {
+			enum rfkill_sched_op op = rfkill_op;
+			rfkill_op_pending = false;
+			memset(rfkill_sw_pending, 0,
+				sizeof(rfkill_sw_pending));
+			spin_unlock_irq(&rfkill_op_lock);
+
+			__rfkill_handle_global_op(op);
+
+			spin_lock_irq(&rfkill_op_lock);
+
+			/*
+			 * handle global ops first -- during unlocked period
+			 * we might have gotten a new global op.
+			 */
+			if (rfkill_op_pending)
+				continue;
+		}
+
+		if (rfkill_is_epo_lock_active())
+			continue;
+
+		for (i = 0; i < NUM_RFKILL_TYPES; i++) {
+			if (__test_and_clear_bit(i, rfkill_sw_pending)) {
+				c = __test_and_clear_bit(i, rfkill_sw_state);
+				spin_unlock_irq(&rfkill_op_lock);
+
+				__rfkill_handle_normal_op(i, c);
+
+				spin_lock_irq(&rfkill_op_lock);
+			}
+		}
+	} while (rfkill_op_pending);
+	spin_unlock_irq(&rfkill_op_lock);
+}
+
+static DECLARE_DELAYED_WORK(rfkill_op_work, rfkill_op_handler);
+static unsigned long rfkill_last_scheduled;
+
+static unsigned long rfkill_ratelimit(const unsigned long last)
+{
+	const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
+	return (time_after(jiffies, last + delay)) ? 0 : delay;
+}
+
+static void rfkill_schedule_ratelimited(void)
+{
+	if (delayed_work_pending(&rfkill_op_work))
+		return;
+	schedule_delayed_work(&rfkill_op_work,
+			      rfkill_ratelimit(rfkill_last_scheduled));
+	rfkill_last_scheduled = jiffies;
+}
+
+static void rfkill_schedule_global_op(enum rfkill_sched_op op)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rfkill_op_lock, flags);
+	rfkill_op = op;
+	rfkill_op_pending = true;
+	if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) {
+		/* bypass the limiter for EPO */
+		cancel_delayed_work(&rfkill_op_work);
+		schedule_delayed_work(&rfkill_op_work, 0);
+		rfkill_last_scheduled = jiffies;
+	} else
+		rfkill_schedule_ratelimited();
+	spin_unlock_irqrestore(&rfkill_op_lock, flags);
+}
+
+static void rfkill_schedule_toggle(enum rfkill_type type)
+{
+	unsigned long flags;
+
+	if (rfkill_is_epo_lock_active())
+		return;
+
+	spin_lock_irqsave(&rfkill_op_lock, flags);
+	if (!rfkill_op_pending) {
+		__set_bit(type, rfkill_sw_pending);
+		__change_bit(type, rfkill_sw_state);
+		rfkill_schedule_ratelimited();
+	}
+	spin_unlock_irqrestore(&rfkill_op_lock, flags);
+}
+
+static void rfkill_schedule_evsw_rfkillall(int state)
+{
+	if (state)
+		rfkill_schedule_global_op(rfkill_master_switch_op);
+	else
+		rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO);
+}
+
+static void rfkill_event(struct input_handle *handle, unsigned int type,
+			unsigned int code, int data)
+{
+	if (type == EV_KEY && data == 1) {
+		switch (code) {
+		case KEY_WLAN:
+			rfkill_schedule_toggle(RFKILL_TYPE_WLAN);
+			break;
+		case KEY_BLUETOOTH:
+			rfkill_schedule_toggle(RFKILL_TYPE_BLUETOOTH);
+			break;
+		case KEY_UWB:
+			rfkill_schedule_toggle(RFKILL_TYPE_UWB);
+			break;
+		case KEY_WIMAX:
+			rfkill_schedule_toggle(RFKILL_TYPE_WIMAX);
+			break;
+		}
+	} else if (type == EV_SW && code == SW_RFKILL_ALL)
+		rfkill_schedule_evsw_rfkillall(data);
+}
+
+static int rfkill_connect(struct input_handler *handler, struct input_dev *dev,
+			  const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int error;
+
+	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = "rfkill";
+
+	/* causes rfkill_start() to be called */
+	error = input_register_handle(handle);
+	if (error)
+		goto err_free_handle;
+
+	error = input_open_device(handle);
+	if (error)
+		goto err_unregister_handle;
+
+	return 0;
+
+ err_unregister_handle:
+	input_unregister_handle(handle);
+ err_free_handle:
+	kfree(handle);
+	return error;
+}
+
+static void rfkill_start(struct input_handle *handle)
+{
+	/*
+	 * Take event_lock to guard against configuration changes, we
+	 * should be able to deal with concurrency with rfkill_event()
+	 * just fine (which event_lock will also avoid).
+	 */
+	spin_lock_irq(&handle->dev->event_lock);
+
+	if (test_bit(EV_SW, handle->dev->evbit) &&
+	    test_bit(SW_RFKILL_ALL, handle->dev->swbit))
+		rfkill_schedule_evsw_rfkillall(test_bit(SW_RFKILL_ALL,
+							handle->dev->sw));
+
+	spin_unlock_irq(&handle->dev->event_lock);
+}
+
+static void rfkill_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+static const struct input_device_id rfkill_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) },
+	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) },
+	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
+	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
+	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
+		.evbit = { BIT(EV_SW) },
+		.swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) },
+	},
+	{ }
+};
+
+static struct input_handler rfkill_handler = {
+	.name =	"rfkill",
+	.event = rfkill_event,
+	.connect = rfkill_connect,
+	.start = rfkill_start,
+	.disconnect = rfkill_disconnect,
+	.id_table = rfkill_ids,
+};
+
+int __init rfkill_handler_init(void)
+{
+	switch (rfkill_master_switch_mode) {
+	case RFKILL_INPUT_MASTER_UNBLOCKALL:
+		rfkill_master_switch_op = RFKILL_GLOBAL_OP_UNBLOCK;
+		break;
+	case RFKILL_INPUT_MASTER_RESTORE:
+		rfkill_master_switch_op = RFKILL_GLOBAL_OP_RESTORE;
+		break;
+	case RFKILL_INPUT_MASTER_UNLOCK:
+		rfkill_master_switch_op = RFKILL_GLOBAL_OP_UNLOCK;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_init(&rfkill_op_lock);
+
+	/* Avoid delay at first schedule */
+	rfkill_last_scheduled =
+			jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1;
+	return input_register_handler(&rfkill_handler);
+}
+
+void __exit rfkill_handler_exit(void)
+{
+	input_unregister_handler(&rfkill_handler);
+	cancel_delayed_work_sync(&rfkill_op_work);
+}
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
deleted file mode 100644
index 60a34f3b5f6..00000000000
--- a/net/rfkill/rfkill-input.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Input layer to RF Kill interface connector
- *
- * Copyright (c) 2007 Dmitry Torokhov
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/input.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/init.h>
-#include <linux/rfkill.h>
-#include <linux/sched.h>
-
-#include "rfkill-input.h"
-
-MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>");
-MODULE_DESCRIPTION("Input layer to RF switch connector");
-MODULE_LICENSE("GPL");
-
-enum rfkill_input_master_mode {
-	RFKILL_INPUT_MASTER_DONOTHING = 0,
-	RFKILL_INPUT_MASTER_RESTORE = 1,
-	RFKILL_INPUT_MASTER_UNBLOCKALL = 2,
-	RFKILL_INPUT_MASTER_MAX,	/* marker */
-};
-
-/* Delay (in ms) between consecutive switch ops */
-#define RFKILL_OPS_DELAY 200
-
-static enum rfkill_input_master_mode rfkill_master_switch_mode =
-					RFKILL_INPUT_MASTER_UNBLOCKALL;
-module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0);
-MODULE_PARM_DESC(master_switch_mode,
-	"SW_RFKILL_ALL ON should: 0=do nothing; 1=restore; 2=unblock all");
-
-enum rfkill_global_sched_op {
-	RFKILL_GLOBAL_OP_EPO = 0,
-	RFKILL_GLOBAL_OP_RESTORE,
-	RFKILL_GLOBAL_OP_UNLOCK,
-	RFKILL_GLOBAL_OP_UNBLOCK,
-};
-
-struct rfkill_task {
-	struct delayed_work dwork;
-
-	/* ensures that task is serialized */
-	struct mutex mutex;
-
-	/* protects everything below */
-	spinlock_t lock;
-
-	/* pending regular switch operations (1=pending) */
-	unsigned long sw_pending[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
-
-	/* should the state be complemented (1=yes) */
-	unsigned long sw_togglestate[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
-
-	bool global_op_pending;
-	enum rfkill_global_sched_op op;
-
-	/* last time it was scheduled */
-	unsigned long last_scheduled;
-};
-
-static void __rfkill_handle_global_op(enum rfkill_global_sched_op op)
-{
-	unsigned int i;
-
-	switch (op) {
-	case RFKILL_GLOBAL_OP_EPO:
-		rfkill_epo();
-		break;
-	case RFKILL_GLOBAL_OP_RESTORE:
-		rfkill_restore_states();
-		break;
-	case RFKILL_GLOBAL_OP_UNLOCK:
-		rfkill_remove_epo_lock();
-		break;
-	case RFKILL_GLOBAL_OP_UNBLOCK:
-		rfkill_remove_epo_lock();
-		for (i = 0; i < RFKILL_TYPE_MAX; i++)
-			rfkill_switch_all(i, RFKILL_STATE_UNBLOCKED);
-		break;
-	default:
-		/* memory corruption or bug, fail safely */
-		rfkill_epo();
-		WARN(1, "Unknown requested operation %d! "
-			"rfkill Emergency Power Off activated\n",
-			op);
-	}
-}
-
-static void __rfkill_handle_normal_op(const enum rfkill_type type,
-			const bool c)
-{
-	enum rfkill_state state;
-
-	state = rfkill_get_global_state(type);
-	if (c)
-		state = rfkill_state_complement(state);
-
-	rfkill_switch_all(type, state);
-}
-
-static void rfkill_task_handler(struct work_struct *work)
-{
-	struct rfkill_task *task = container_of(work,
-					struct rfkill_task, dwork.work);
-	bool doit = true;
-
-	mutex_lock(&task->mutex);
-
-	spin_lock_irq(&task->lock);
-	while (doit) {
-		if (task->global_op_pending) {
-			enum rfkill_global_sched_op op = task->op;
-			task->global_op_pending = false;
-			memset(task->sw_pending, 0, sizeof(task->sw_pending));
-			spin_unlock_irq(&task->lock);
-
-			__rfkill_handle_global_op(op);
-
-			/* make sure we do at least one pass with
-			 * !task->global_op_pending */
-			spin_lock_irq(&task->lock);
-			continue;
-		} else if (!rfkill_is_epo_lock_active()) {
-			unsigned int i = 0;
-
-			while (!task->global_op_pending &&
-						i < RFKILL_TYPE_MAX) {
-				if (test_and_clear_bit(i, task->sw_pending)) {
-					bool c;
-					c = test_and_clear_bit(i,
-							task->sw_togglestate);
-					spin_unlock_irq(&task->lock);
-
-					__rfkill_handle_normal_op(i, c);
-
-					spin_lock_irq(&task->lock);
-				}
-				i++;
-			}
-		}
-		doit = task->global_op_pending;
-	}
-	spin_unlock_irq(&task->lock);
-
-	mutex_unlock(&task->mutex);
-}
-
-static struct rfkill_task rfkill_task = {
-	.dwork = __DELAYED_WORK_INITIALIZER(rfkill_task.dwork,
-				rfkill_task_handler),
-	.mutex = __MUTEX_INITIALIZER(rfkill_task.mutex),
-	.lock = __SPIN_LOCK_UNLOCKED(rfkill_task.lock),
-};
-
-static unsigned long rfkill_ratelimit(const unsigned long last)
-{
-	const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
-	return (time_after(jiffies, last + delay)) ? 0 : delay;
-}
-
-static void rfkill_schedule_ratelimited(void)
-{
-	if (!delayed_work_pending(&rfkill_task.dwork)) {
-		schedule_delayed_work(&rfkill_task.dwork,
-				rfkill_ratelimit(rfkill_task.last_scheduled));
-		rfkill_task.last_scheduled = jiffies;
-	}
-}
-
-static void rfkill_schedule_global_op(enum rfkill_global_sched_op op)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rfkill_task.lock, flags);
-	rfkill_task.op = op;
-	rfkill_task.global_op_pending = true;
-	if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) {
-		/* bypass the limiter for EPO */
-		cancel_delayed_work(&rfkill_task.dwork);
-		schedule_delayed_work(&rfkill_task.dwork, 0);
-		rfkill_task.last_scheduled = jiffies;
-	} else
-		rfkill_schedule_ratelimited();
-	spin_unlock_irqrestore(&rfkill_task.lock, flags);
-}
-
-static void rfkill_schedule_toggle(enum rfkill_type type)
-{
-	unsigned long flags;
-
-	if (rfkill_is_epo_lock_active())
-		return;
-
-	spin_lock_irqsave(&rfkill_task.lock, flags);
-	if (!rfkill_task.global_op_pending) {
-		set_bit(type, rfkill_task.sw_pending);
-		change_bit(type, rfkill_task.sw_togglestate);
-		rfkill_schedule_ratelimited();
-	}
-	spin_unlock_irqrestore(&rfkill_task.lock, flags);
-}
-
-static void rfkill_schedule_evsw_rfkillall(int state)
-{
-	if (state) {
-		switch (rfkill_master_switch_mode) {
-		case RFKILL_INPUT_MASTER_UNBLOCKALL:
-			rfkill_schedule_global_op(RFKILL_GLOBAL_OP_UNBLOCK);
-			break;
-		case RFKILL_INPUT_MASTER_RESTORE:
-			rfkill_schedule_global_op(RFKILL_GLOBAL_OP_RESTORE);
-			break;
-		case RFKILL_INPUT_MASTER_DONOTHING:
-			rfkill_schedule_global_op(RFKILL_GLOBAL_OP_UNLOCK);
-			break;
-		default:
-			/* memory corruption or driver bug! fail safely */
-			rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO);
-			WARN(1, "Unknown rfkill_master_switch_mode (%d), "
-				"driver bug or memory corruption detected!\n",
-				rfkill_master_switch_mode);
-			break;
-		}
-	} else
-		rfkill_schedule_global_op(RFKILL_GLOBAL_OP_EPO);
-}
-
-static void rfkill_event(struct input_handle *handle, unsigned int type,
-			unsigned int code, int data)
-{
-	if (type == EV_KEY && data == 1) {
-		enum rfkill_type t;
-
-		switch (code) {
-		case KEY_WLAN:
-			t = RFKILL_TYPE_WLAN;
-			break;
-		case KEY_BLUETOOTH:
-			t = RFKILL_TYPE_BLUETOOTH;
-			break;
-		case KEY_UWB:
-			t = RFKILL_TYPE_UWB;
-			break;
-		case KEY_WIMAX:
-			t = RFKILL_TYPE_WIMAX;
-			break;
-		default:
-			return;
-		}
-		rfkill_schedule_toggle(t);
-		return;
-	} else if (type == EV_SW) {
-		switch (code) {
-		case SW_RFKILL_ALL:
-			rfkill_schedule_evsw_rfkillall(data);
-			return;
-		default:
-			return;
-		}
-	}
-}
-
-static int rfkill_connect(struct input_handler *handler, struct input_dev *dev,
-			  const struct input_device_id *id)
-{
-	struct input_handle *handle;
-	int error;
-
-	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
-	if (!handle)
-		return -ENOMEM;
-
-	handle->dev = dev;
-	handle->handler = handler;
-	handle->name = "rfkill";
-
-	/* causes rfkill_start() to be called */
-	error = input_register_handle(handle);
-	if (error)
-		goto err_free_handle;
-
-	error = input_open_device(handle);
-	if (error)
-		goto err_unregister_handle;
-
-	return 0;
-
- err_unregister_handle:
-	input_unregister_handle(handle);
- err_free_handle:
-	kfree(handle);
-	return error;
-}
-
-static void rfkill_start(struct input_handle *handle)
-{
-	/* Take event_lock to guard against configuration changes, we
-	 * should be able to deal with concurrency with rfkill_event()
-	 * just fine (which event_lock will also avoid). */
-	spin_lock_irq(&handle->dev->event_lock);
-
-	if (test_bit(EV_SW, handle->dev->evbit)) {
-		if (test_bit(SW_RFKILL_ALL, handle->dev->swbit))
-			rfkill_schedule_evsw_rfkillall(test_bit(SW_RFKILL_ALL,
-							handle->dev->sw));
-		/* add resync for further EV_SW events here */
-	}
-
-	spin_unlock_irq(&handle->dev->event_lock);
-}
-
-static void rfkill_disconnect(struct input_handle *handle)
-{
-	input_close_device(handle);
-	input_unregister_handle(handle);
-	kfree(handle);
-}
-
-static const struct input_device_id rfkill_ids[] = {
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) },
-	},
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) },
-	},
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
-	},
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
-	},
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
-		.evbit = { BIT(EV_SW) },
-		.swbit = { [BIT_WORD(SW_RFKILL_ALL)] = BIT_MASK(SW_RFKILL_ALL) },
-	},
-	{ }
-};
-
-static struct input_handler rfkill_handler = {
-	.event =	rfkill_event,
-	.connect =	rfkill_connect,
-	.disconnect =	rfkill_disconnect,
-	.start =	rfkill_start,
-	.name =		"rfkill",
-	.id_table =	rfkill_ids,
-};
-
-static int __init rfkill_handler_init(void)
-{
-	if (rfkill_master_switch_mode >= RFKILL_INPUT_MASTER_MAX)
-		return -EINVAL;
-
-	/*
-	 * The penalty to not doing this is a possible RFKILL_OPS_DELAY delay
-	 * at the first use.  Acceptable, but if we can avoid it, why not?
-	 */
-	rfkill_task.last_scheduled =
-			jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1;
-	return input_register_handler(&rfkill_handler);
-}
-
-static void __exit rfkill_handler_exit(void)
-{
-	input_unregister_handler(&rfkill_handler);
-	cancel_delayed_work_sync(&rfkill_task.dwork);
-	rfkill_remove_epo_lock();
-}
-
-module_init(rfkill_handler_init);
-module_exit(rfkill_handler_exit);
diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h
deleted file mode 100644
index fe8df6b5b93..00000000000
--- a/net/rfkill/rfkill-input.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2007 Ivo van Doorn
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#ifndef __RFKILL_INPUT_H
-#define __RFKILL_INPUT_H
-
-void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state);
-void rfkill_epo(void);
-void rfkill_restore_states(void);
-void rfkill_remove_epo_lock(void);
-bool rfkill_is_epo_lock_active(void);
-enum rfkill_state rfkill_get_global_state(const enum rfkill_type type);
-
-#endif /* __RFKILL_INPUT_H */
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
deleted file mode 100644
index 4f5a83183c9..00000000000
--- a/net/rfkill/rfkill.c
+++ /dev/null
@@ -1,855 +0,0 @@
-/*
- * Copyright (C) 2006 - 2007 Ivo van Doorn
- * Copyright (C) 2007 Dmitry Torokhov
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/workqueue.h>
-#include <linux/capability.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rfkill.h>
-
-/* Get declaration of rfkill_switch_all() to shut up sparse. */
-#include "rfkill-input.h"
-
-
-MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>");
-MODULE_VERSION("1.0");
-MODULE_DESCRIPTION("RF switch support");
-MODULE_LICENSE("GPL");
-
-static LIST_HEAD(rfkill_list);	/* list of registered rf switches */
-static DEFINE_MUTEX(rfkill_global_mutex);
-
-static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED;
-module_param_named(default_state, rfkill_default_state, uint, 0444);
-MODULE_PARM_DESC(default_state,
-		 "Default initial state for all radio types, 0 = radio off");
-
-struct rfkill_gsw_state {
-	enum rfkill_state current_state;
-	enum rfkill_state default_state;
-};
-
-static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX];
-static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
-static bool rfkill_epo_lock_active;
-
-
-#ifdef CONFIG_RFKILL_LEDS
-static void rfkill_led_trigger(struct rfkill *rfkill,
-			       enum rfkill_state state)
-{
-	struct led_trigger *led = &rfkill->led_trigger;
-
-	if (!led->name)
-		return;
-	if (state != RFKILL_STATE_UNBLOCKED)
-		led_trigger_event(led, LED_OFF);
-	else
-		led_trigger_event(led, LED_FULL);
-}
-
-static void rfkill_led_trigger_activate(struct led_classdev *led)
-{
-	struct rfkill *rfkill = container_of(led->trigger,
-			struct rfkill, led_trigger);
-
-	rfkill_led_trigger(rfkill, rfkill->state);
-}
-#else
-static inline void rfkill_led_trigger(struct rfkill *rfkill,
-					enum rfkill_state state)
-{
-}
-#endif /* CONFIG_RFKILL_LEDS */
-
-static void rfkill_uevent(struct rfkill *rfkill)
-{
-	kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
-}
-
-static void update_rfkill_state(struct rfkill *rfkill)
-{
-	enum rfkill_state newstate, oldstate;
-
-	if (rfkill->get_state) {
-		mutex_lock(&rfkill->mutex);
-		if (!rfkill->get_state(rfkill->data, &newstate)) {
-			oldstate = rfkill->state;
-			rfkill->state = newstate;
-			if (oldstate != newstate)
-				rfkill_uevent(rfkill);
-		}
-		mutex_unlock(&rfkill->mutex);
-	}
-	rfkill_led_trigger(rfkill, rfkill->state);
-}
-
-/**
- * rfkill_toggle_radio - wrapper for toggle_radio hook
- * @rfkill: the rfkill struct to use
- * @force: calls toggle_radio even if cache says it is not needed,
- *	and also makes sure notifications of the state will be
- *	sent even if it didn't change
- * @state: the new state to call toggle_radio() with
- *
- * Calls rfkill->toggle_radio, enforcing the API for toggle_radio
- * calls and handling all the red tape such as issuing notifications
- * if the call is successful.
- *
- * Suspended devices are not touched at all, and -EAGAIN is returned.
- *
- * Note that the @force parameter cannot override a (possibly cached)
- * state of RFKILL_STATE_HARD_BLOCKED.  Any device making use of
- * RFKILL_STATE_HARD_BLOCKED implements either get_state() or
- * rfkill_force_state(), so the cache either is bypassed or valid.
- *
- * Note that we do call toggle_radio for RFKILL_STATE_SOFT_BLOCKED
- * even if the radio is in RFKILL_STATE_HARD_BLOCKED state, so as to
- * give the driver a hint that it should double-BLOCK the transmitter.
- *
- * Caller must have acquired rfkill->mutex.
- */
-static int rfkill_toggle_radio(struct rfkill *rfkill,
-				enum rfkill_state state,
-				int force)
-{
-	int retval = 0;
-	enum rfkill_state oldstate, newstate;
-
-	if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP))
-		return -EBUSY;
-
-	oldstate = rfkill->state;
-
-	if (rfkill->get_state && !force &&
-	    !rfkill->get_state(rfkill->data, &newstate)) {
-		rfkill->state = newstate;
-	}
-
-	switch (state) {
-	case RFKILL_STATE_HARD_BLOCKED:
-		/* typically happens when refreshing hardware state,
-		 * such as on resume */
-		state = RFKILL_STATE_SOFT_BLOCKED;
-		break;
-	case RFKILL_STATE_UNBLOCKED:
-		/* force can't override this, only rfkill_force_state() can */
-		if (rfkill->state == RFKILL_STATE_HARD_BLOCKED)
-			return -EPERM;
-		break;
-	case RFKILL_STATE_SOFT_BLOCKED:
-		/* nothing to do, we want to give drivers the hint to double
-		 * BLOCK even a transmitter that is already in state
-		 * RFKILL_STATE_HARD_BLOCKED */
-		break;
-	default:
-		WARN(1, KERN_WARNING
-			"rfkill: illegal state %d passed as parameter "
-			"to rfkill_toggle_radio\n", state);
-		return -EINVAL;
-	}
-
-	if (force || state != rfkill->state) {
-		retval = rfkill->toggle_radio(rfkill->data, state);
-		/* never allow a HARD->SOFT downgrade! */
-		if (!retval && rfkill->state != RFKILL_STATE_HARD_BLOCKED)
-			rfkill->state = state;
-	}
-
-	if (force || rfkill->state != oldstate)
-		rfkill_uevent(rfkill);
-
-	rfkill_led_trigger(rfkill, rfkill->state);
-	return retval;
-}
-
-/**
- * __rfkill_switch_all - Toggle state of all switches of given type
- * @type: type of interfaces to be affected
- * @state: the new state
- *
- * This function toggles the state of all switches of given type,
- * unless a specific switch is claimed by userspace (in which case,
- * that switch is left alone) or suspended.
- *
- * Caller must have acquired rfkill_global_mutex.
- */
-static void __rfkill_switch_all(const enum rfkill_type type,
-				const enum rfkill_state state)
-{
-	struct rfkill *rfkill;
-
-	if (WARN((state >= RFKILL_STATE_MAX || type >= RFKILL_TYPE_MAX),
-			KERN_WARNING
-			"rfkill: illegal state %d or type %d "
-			"passed as parameter to __rfkill_switch_all\n",
-			state, type))
-		return;
-
-	rfkill_global_states[type].current_state = state;
-	list_for_each_entry(rfkill, &rfkill_list, node) {
-		if (rfkill->type == type) {
-			mutex_lock(&rfkill->mutex);
-			rfkill_toggle_radio(rfkill, state, 0);
-			mutex_unlock(&rfkill->mutex);
-			rfkill_led_trigger(rfkill, rfkill->state);
-		}
-	}
-}
-
-/**
- * rfkill_switch_all - Toggle state of all switches of given type
- * @type: type of interfaces to be affected
- * @state: the new state
- *
- * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
- * Please refer to __rfkill_switch_all() for details.
- *
- * Does nothing if the EPO lock is active.
- */
-void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state)
-{
-	mutex_lock(&rfkill_global_mutex);
-	if (!rfkill_epo_lock_active)
-		__rfkill_switch_all(type, state);
-	mutex_unlock(&rfkill_global_mutex);
-}
-EXPORT_SYMBOL(rfkill_switch_all);
-
-/**
- * rfkill_epo - emergency power off all transmitters
- *
- * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED,
- * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex.
- *
- * The global state before the EPO is saved and can be restored later
- * using rfkill_restore_states().
- */
-void rfkill_epo(void)
-{
-	struct rfkill *rfkill;
-	int i;
-
-	mutex_lock(&rfkill_global_mutex);
-
-	rfkill_epo_lock_active = true;
-	list_for_each_entry(rfkill, &rfkill_list, node) {
-		mutex_lock(&rfkill->mutex);
-		rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
-		mutex_unlock(&rfkill->mutex);
-	}
-	for (i = 0; i < RFKILL_TYPE_MAX; i++) {
-		rfkill_global_states[i].default_state =
-				rfkill_global_states[i].current_state;
-		rfkill_global_states[i].current_state =
-				RFKILL_STATE_SOFT_BLOCKED;
-	}
-	mutex_unlock(&rfkill_global_mutex);
-	rfkill_led_trigger(rfkill, rfkill->state);
-}
-EXPORT_SYMBOL_GPL(rfkill_epo);
-
-/**
- * rfkill_restore_states - restore global states
- *
- * Restore (and sync switches to) the global state from the
- * states in rfkill_default_states.  This can undo the effects of
- * a call to rfkill_epo().
- */
-void rfkill_restore_states(void)
-{
-	int i;
-
-	mutex_lock(&rfkill_global_mutex);
-
-	rfkill_epo_lock_active = false;
-	for (i = 0; i < RFKILL_TYPE_MAX; i++)
-		__rfkill_switch_all(i, rfkill_global_states[i].default_state);
-	mutex_unlock(&rfkill_global_mutex);
-}
-EXPORT_SYMBOL_GPL(rfkill_restore_states);
-
-/**
- * rfkill_remove_epo_lock - unlock state changes
- *
- * Used by rfkill-input manually unlock state changes, when
- * the EPO switch is deactivated.
- */
-void rfkill_remove_epo_lock(void)
-{
-	mutex_lock(&rfkill_global_mutex);
-	rfkill_epo_lock_active = false;
-	mutex_unlock(&rfkill_global_mutex);
-}
-EXPORT_SYMBOL_GPL(rfkill_remove_epo_lock);
-
-/**
- * rfkill_is_epo_lock_active - returns true EPO is active
- *
- * Returns 0 (false) if there is NOT an active EPO contidion,
- * and 1 (true) if there is an active EPO contition, which
- * locks all radios in one of the BLOCKED states.
- *
- * Can be called in atomic context.
- */
-bool rfkill_is_epo_lock_active(void)
-{
-	return rfkill_epo_lock_active;
-}
-EXPORT_SYMBOL_GPL(rfkill_is_epo_lock_active);
-
-/**
- * rfkill_get_global_state - returns global state for a type
- * @type: the type to get the global state of
- *
- * Returns the current global state for a given wireless
- * device type.
- */
-enum rfkill_state rfkill_get_global_state(const enum rfkill_type type)
-{
-	return rfkill_global_states[type].current_state;
-}
-EXPORT_SYMBOL_GPL(rfkill_get_global_state);
-
-/**
- * rfkill_force_state - Force the internal rfkill radio state
- * @rfkill: pointer to the rfkill class to modify.
- * @state: the current radio state the class should be forced to.
- *
- * This function updates the internal state of the radio cached
- * by the rfkill class.  It should be used when the driver gets
- * a notification by the firmware/hardware of the current *real*
- * state of the radio rfkill switch.
- *
- * Devices which are subject to external changes on their rfkill
- * state (such as those caused by a hardware rfkill line) MUST
- * have their driver arrange to call rfkill_force_state() as soon
- * as possible after such a change.
- *
- * This function may not be called from an atomic context.
- */
-int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state)
-{
-	enum rfkill_state oldstate;
-
-	BUG_ON(!rfkill);
-	if (WARN((state >= RFKILL_STATE_MAX),
-			KERN_WARNING
-			"rfkill: illegal state %d passed as parameter "
-			"to rfkill_force_state\n", state))
-		return -EINVAL;
-
-	mutex_lock(&rfkill->mutex);
-
-	oldstate = rfkill->state;
-	rfkill->state = state;
-
-	if (state != oldstate)
-		rfkill_uevent(rfkill);
-
-	mutex_unlock(&rfkill->mutex);
-	rfkill_led_trigger(rfkill, rfkill->state);
-
-	return 0;
-}
-EXPORT_SYMBOL(rfkill_force_state);
-
-static ssize_t rfkill_name_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	return sprintf(buf, "%s\n", rfkill->name);
-}
-
-static const char *rfkill_get_type_str(enum rfkill_type type)
-{
-	switch (type) {
-	case RFKILL_TYPE_WLAN:
-		return "wlan";
-	case RFKILL_TYPE_BLUETOOTH:
-		return "bluetooth";
-	case RFKILL_TYPE_UWB:
-		return "ultrawideband";
-	case RFKILL_TYPE_WIMAX:
-		return "wimax";
-	case RFKILL_TYPE_WWAN:
-		return "wwan";
-	default:
-		BUG();
-	}
-}
-
-static ssize_t rfkill_type_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
-}
-
-static ssize_t rfkill_state_show(struct device *dev,
-				 struct device_attribute *attr,
-				 char *buf)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	update_rfkill_state(rfkill);
-	return sprintf(buf, "%d\n", rfkill->state);
-}
-
-static ssize_t rfkill_state_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t count)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long state;
-	int error;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	error = strict_strtoul(buf, 0, &state);
-	if (error)
-		return error;
-
-	/* RFKILL_STATE_HARD_BLOCKED is illegal here... */
-	if (state != RFKILL_STATE_UNBLOCKED &&
-	    state != RFKILL_STATE_SOFT_BLOCKED)
-		return -EINVAL;
-
-	error = mutex_lock_killable(&rfkill->mutex);
-	if (error)
-		return error;
-
-	if (!rfkill_epo_lock_active)
-		error = rfkill_toggle_radio(rfkill, state, 0);
-	else
-		error = -EPERM;
-
-	mutex_unlock(&rfkill->mutex);
-
-	return error ? error : count;
-}
-
-static ssize_t rfkill_claim_show(struct device *dev,
-				 struct device_attribute *attr,
-				 char *buf)
-{
-	return sprintf(buf, "%d\n", 0);
-}
-
-static ssize_t rfkill_claim_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t count)
-{
-	return -EOPNOTSUPP;
-}
-
-static struct device_attribute rfkill_dev_attrs[] = {
-	__ATTR(name, S_IRUGO, rfkill_name_show, NULL),
-	__ATTR(type, S_IRUGO, rfkill_type_show, NULL),
-	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
-	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
-	__ATTR_NULL
-};
-
-static void rfkill_release(struct device *dev)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	kfree(rfkill);
-	module_put(THIS_MODULE);
-}
-
-#ifdef CONFIG_PM
-static int rfkill_suspend(struct device *dev, pm_message_t state)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-
-	/* mark class device as suspended */
-	if (dev->power.power_state.event != state.event)
-		dev->power.power_state = state;
-
-	/* store state for the resume handler */
-	rfkill->state_for_resume = rfkill->state;
-
-	return 0;
-}
-
-static int rfkill_resume(struct device *dev)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-	enum rfkill_state newstate;
-
-	if (dev->power.power_state.event != PM_EVENT_ON) {
-		mutex_lock(&rfkill->mutex);
-
-		dev->power.power_state.event = PM_EVENT_ON;
-
-		/*
-		 * rfkill->state could have been modified before we got
-		 * called, and won't be updated by rfkill_toggle_radio()
-		 * in force mode.  Sync it FIRST.
-		 */
-		if (rfkill->get_state &&
-		    !rfkill->get_state(rfkill->data, &newstate))
-			rfkill->state = newstate;
-
-		/*
-		 * If we are under EPO, kick transmitter offline,
-		 * otherwise restore to pre-suspend state.
-		 *
-		 * Issue a notification in any case
-		 */
-		rfkill_toggle_radio(rfkill,
-				rfkill_epo_lock_active ?
-					RFKILL_STATE_SOFT_BLOCKED :
-					rfkill->state_for_resume,
-				1);
-
-		mutex_unlock(&rfkill->mutex);
-		rfkill_led_trigger(rfkill, rfkill->state);
-	}
-
-	return 0;
-}
-#else
-#define rfkill_suspend NULL
-#define rfkill_resume NULL
-#endif
-
-static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct rfkill *rfkill = to_rfkill(dev);
-	int error;
-
-	error = add_uevent_var(env, "RFKILL_NAME=%s", rfkill->name);
-	if (error)
-		return error;
-	error = add_uevent_var(env, "RFKILL_TYPE=%s",
-				rfkill_get_type_str(rfkill->type));
-	if (error)
-		return error;
-	error = add_uevent_var(env, "RFKILL_STATE=%d", rfkill->state);
-	return error;
-}
-
-static struct class rfkill_class = {
-	.name		= "rfkill",
-	.dev_release	= rfkill_release,
-	.dev_attrs	= rfkill_dev_attrs,
-	.suspend	= rfkill_suspend,
-	.resume		= rfkill_resume,
-	.dev_uevent	= rfkill_dev_uevent,
-};
-
-static int rfkill_check_duplicity(const struct rfkill *rfkill)
-{
-	struct rfkill *p;
-	unsigned long seen[BITS_TO_LONGS(RFKILL_TYPE_MAX)];
-
-	memset(seen, 0, sizeof(seen));
-
-	list_for_each_entry(p, &rfkill_list, node) {
-		if (WARN((p == rfkill), KERN_WARNING
-				"rfkill: illegal attempt to register "
-				"an already registered rfkill struct\n"))
-			return -EEXIST;
-		set_bit(p->type, seen);
-	}
-
-	/* 0: first switch of its kind */
-	return (test_bit(rfkill->type, seen)) ? 1 : 0;
-}
-
-static int rfkill_add_switch(struct rfkill *rfkill)
-{
-	int error;
-
-	mutex_lock(&rfkill_global_mutex);
-
-	error = rfkill_check_duplicity(rfkill);
-	if (error < 0)
-		goto unlock_out;
-
-	if (!error) {
-		/* lock default after first use */
-		set_bit(rfkill->type, rfkill_states_lockdflt);
-		rfkill_global_states[rfkill->type].current_state =
-			rfkill_global_states[rfkill->type].default_state;
-	}
-
-	rfkill_toggle_radio(rfkill,
-			    rfkill_global_states[rfkill->type].current_state,
-			    0);
-
-	list_add_tail(&rfkill->node, &rfkill_list);
-
-	error = 0;
-unlock_out:
-	mutex_unlock(&rfkill_global_mutex);
-
-	return error;
-}
-
-static void rfkill_remove_switch(struct rfkill *rfkill)
-{
-	mutex_lock(&rfkill_global_mutex);
-	list_del_init(&rfkill->node);
-	mutex_unlock(&rfkill_global_mutex);
-
-	mutex_lock(&rfkill->mutex);
-	rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1);
-	mutex_unlock(&rfkill->mutex);
-}
-
-/**
- * rfkill_allocate - allocate memory for rfkill structure.
- * @parent: device that has rf switch on it
- * @type: type of the switch (RFKILL_TYPE_*)
- *
- * This function should be called by the network driver when it needs
- * rfkill structure.  Once the structure is allocated the driver should
- * finish its initialization by setting the name, private data, enable_radio
- * and disable_radio methods and then register it with rfkill_register().
- *
- * NOTE: If registration fails the structure shoudl be freed by calling
- * rfkill_free() otherwise rfkill_unregister() should be used.
- */
-struct rfkill * __must_check rfkill_allocate(struct device *parent,
-					     enum rfkill_type type)
-{
-	struct rfkill *rfkill;
-	struct device *dev;
-
-	if (WARN((type >= RFKILL_TYPE_MAX),
-			KERN_WARNING
-			"rfkill: illegal type %d passed as parameter "
-			"to rfkill_allocate\n", type))
-		return NULL;
-
-	rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL);
-	if (!rfkill)
-		return NULL;
-
-	mutex_init(&rfkill->mutex);
-	INIT_LIST_HEAD(&rfkill->node);
-	rfkill->type = type;
-
-	dev = &rfkill->dev;
-	dev->class = &rfkill_class;
-	dev->parent = parent;
-	device_initialize(dev);
-
-	__module_get(THIS_MODULE);
-
-	return rfkill;
-}
-EXPORT_SYMBOL(rfkill_allocate);
-
-/**
- * rfkill_free - Mark rfkill structure for deletion
- * @rfkill: rfkill structure to be destroyed
- *
- * Decrements reference count of the rfkill structure so it is destroyed.
- * Note that rfkill_free() should _not_ be called after rfkill_unregister().
- */
-void rfkill_free(struct rfkill *rfkill)
-{
-	if (rfkill)
-		put_device(&rfkill->dev);
-}
-EXPORT_SYMBOL(rfkill_free);
-
-static void rfkill_led_trigger_register(struct rfkill *rfkill)
-{
-#ifdef CONFIG_RFKILL_LEDS
-	int error;
-
-	if (!rfkill->led_trigger.name)
-		rfkill->led_trigger.name = dev_name(&rfkill->dev);
-	if (!rfkill->led_trigger.activate)
-		rfkill->led_trigger.activate = rfkill_led_trigger_activate;
-	error = led_trigger_register(&rfkill->led_trigger);
-	if (error)
-		rfkill->led_trigger.name = NULL;
-#endif /* CONFIG_RFKILL_LEDS */
-}
-
-static void rfkill_led_trigger_unregister(struct rfkill *rfkill)
-{
-#ifdef CONFIG_RFKILL_LEDS
-	if (rfkill->led_trigger.name) {
-		led_trigger_unregister(&rfkill->led_trigger);
-		rfkill->led_trigger.name = NULL;
-	}
-#endif
-}
-
-/**
- * rfkill_register - Register a rfkill structure.
- * @rfkill: rfkill structure to be registered
- *
- * This function should be called by the network driver when the rfkill
- * structure needs to be registered. Immediately from registration the
- * switch driver should be able to service calls to toggle_radio.
- */
-int __must_check rfkill_register(struct rfkill *rfkill)
-{
-	static atomic_t rfkill_no = ATOMIC_INIT(0);
-	struct device *dev = &rfkill->dev;
-	int error;
-
-	if (WARN((!rfkill || !rfkill->toggle_radio ||
-			rfkill->type >= RFKILL_TYPE_MAX ||
-			rfkill->state >= RFKILL_STATE_MAX),
-			KERN_WARNING
-			"rfkill: attempt to register a "
-			"badly initialized rfkill struct\n"))
-		return -EINVAL;
-
-	dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1);
-
-	rfkill_led_trigger_register(rfkill);
-
-	error = rfkill_add_switch(rfkill);
-	if (error) {
-		rfkill_led_trigger_unregister(rfkill);
-		return error;
-	}
-
-	error = device_add(dev);
-	if (error) {
-		rfkill_remove_switch(rfkill);
-		rfkill_led_trigger_unregister(rfkill);
-		return error;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(rfkill_register);
-
-/**
- * rfkill_unregister - Unregister a rfkill structure.
- * @rfkill: rfkill structure to be unregistered
- *
- * This function should be called by the network driver during device
- * teardown to destroy rfkill structure. Note that rfkill_free() should
- * _not_ be called after rfkill_unregister().
- */
-void rfkill_unregister(struct rfkill *rfkill)
-{
-	BUG_ON(!rfkill);
-	device_del(&rfkill->dev);
-	rfkill_remove_switch(rfkill);
-	rfkill_led_trigger_unregister(rfkill);
-	put_device(&rfkill->dev);
-}
-EXPORT_SYMBOL(rfkill_unregister);
-
-/**
- * rfkill_set_default - set initial value for a switch type
- * @type - the type of switch to set the default state of
- * @state - the new default state for that group of switches
- *
- * Sets the initial state rfkill should use for a given type.
- * The following initial states are allowed: RFKILL_STATE_SOFT_BLOCKED
- * and RFKILL_STATE_UNBLOCKED.
- *
- * This function is meant to be used by platform drivers for platforms
- * that can save switch state across power down/reboot.
- *
- * The default state for each switch type can be changed exactly once.
- * After a switch of that type is registered, the default state cannot
- * be changed anymore.  This guards against multiple drivers it the
- * same platform trying to set the initial switch default state, which
- * is not allowed.
- *
- * Returns -EPERM if the state has already been set once or is in use,
- * so drivers likely want to either ignore or at most printk(KERN_NOTICE)
- * if this function returns -EPERM.
- *
- * Returns 0 if the new default state was set, or an error if it
- * could not be set.
- */
-int rfkill_set_default(enum rfkill_type type, enum rfkill_state state)
-{
-	int error;
-
-	if (WARN((type >= RFKILL_TYPE_MAX ||
-			(state != RFKILL_STATE_SOFT_BLOCKED &&
-			 state != RFKILL_STATE_UNBLOCKED)),
-			KERN_WARNING
-			"rfkill: illegal state %d or type %d passed as "
-			"parameter to rfkill_set_default\n", state, type))
-		return -EINVAL;
-
-	mutex_lock(&rfkill_global_mutex);
-
-	if (!test_and_set_bit(type, rfkill_states_lockdflt)) {
-		rfkill_global_states[type].default_state = state;
-		rfkill_global_states[type].current_state = state;
-		error = 0;
-	} else
-		error = -EPERM;
-
-	mutex_unlock(&rfkill_global_mutex);
-	return error;
-}
-EXPORT_SYMBOL_GPL(rfkill_set_default);
-
-/*
- * Rfkill module initialization/deinitialization.
- */
-static int __init rfkill_init(void)
-{
-	int error;
-	int i;
-
-	/* RFKILL_STATE_HARD_BLOCKED is illegal here... */
-	if (rfkill_default_state != RFKILL_STATE_SOFT_BLOCKED &&
-	    rfkill_default_state != RFKILL_STATE_UNBLOCKED)
-		return -EINVAL;
-
-	for (i = 0; i < RFKILL_TYPE_MAX; i++)
-		rfkill_global_states[i].default_state = rfkill_default_state;
-
-	error = class_register(&rfkill_class);
-	if (error) {
-		printk(KERN_ERR "rfkill: unable to register rfkill class\n");
-		return error;
-	}
-
-	return 0;
-}
-
-static void __exit rfkill_exit(void)
-{
-	class_unregister(&rfkill_class);
-}
-
-subsys_initcall(rfkill_init);
-module_exit(rfkill_exit);
diff --git a/net/rfkill/rfkill.h b/net/rfkill/rfkill.h
new file mode 100644
index 00000000000..d1117cb6e4d
--- /dev/null
+++ b/net/rfkill/rfkill.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2007 Ivo van Doorn
+ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __RFKILL_INPUT_H
+#define __RFKILL_INPUT_H
+
+/* core code */
+void rfkill_switch_all(const enum rfkill_type type, bool blocked);
+void rfkill_epo(void);
+void rfkill_restore_states(void);
+void rfkill_remove_epo_lock(void);
+bool rfkill_is_epo_lock_active(void);
+bool rfkill_get_global_sw_state(const enum rfkill_type type);
+
+/* input handler */
+int rfkill_handler_init(void);
+void rfkill_handler_exit(void);
+
+#endif /* __RFKILL_INPUT_H */
diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig
index 1b46747a5f5..0bdbb692820 100644
--- a/net/wimax/Kconfig
+++ b/net/wimax/Kconfig
@@ -1,23 +1,9 @@
 #
 # WiMAX LAN device configuration
 #
-# Note the ugly 'depends on' on WIMAX: that disallows RFKILL to be a
-# module if WIMAX is to be linked in. The WiMAX code is done in such a
-# way that it doesn't require and explicit dependency on RFKILL in
-# case an embedded system wants to rip it out.
-#
-# As well, enablement of the RFKILL code means we need the INPUT layer
-# support to inject events coming from hw rfkill switches. That
-# dependency could be killed if input.h provided appropriate means to
-# work when input is disabled.
-
-comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled"
-	depends on INPUT = n && RFKILL != n
 
 menuconfig WIMAX
 	tristate "WiMAX Wireless Broadband support"
-	depends on (y && RFKILL != m) || m
-	depends on (INPUT && RFKILL != n) || RFKILL = n
 	help
 
 	  Select to configure support for devices that provide
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index a3616e2ccb8..bb102e4aa3e 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -29,8 +29,8 @@
  * A non-polled generic rfkill device is embedded into the WiMAX
  * subsystem's representation of a device.
  *
- * FIXME: Need polled support? use a timer or add the implementation
- *     to the stack.
+ * FIXME: Need polled support? Let drivers provide a poll routine
+ *	  and hand it to rfkill ops then?
  *
  * All device drivers have to do is after wimax_dev_init(), call
  * wimax_report_rfkill_hw() and wimax_report_rfkill_sw() to update
@@ -43,7 +43,7 @@
  *   wimax_rfkill()             Kernel calling wimax_rfkill()
  *     __wimax_rf_toggle_radio()
  *
- * wimax_rfkill_toggle_radio()  RF-Kill subsytem calling
+ * wimax_rfkill_set_radio_block()  RF-Kill subsytem calling
  *   __wimax_rf_toggle_radio()
  *
  * __wimax_rf_toggle_radio()
@@ -65,15 +65,11 @@
 #include <linux/wimax.h>
 #include <linux/security.h>
 #include <linux/rfkill.h>
-#include <linux/input.h>
 #include "wimax-internal.h"
 
 #define D_SUBMODULE op_rfkill
 #include "debug-levels.h"
 
-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
-
-
 /**
  * wimax_report_rfkill_hw - Reports changes in the hardware RF switch
  *
@@ -99,7 +95,6 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev,
 	int result;
 	struct device *dev = wimax_dev_to_dev(wimax_dev);
 	enum wimax_st wimax_state;
-	enum rfkill_state rfkill_state;
 
 	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
 	BUG_ON(state == WIMAX_RF_QUERY);
@@ -112,16 +107,15 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev,
 
 	if (state != wimax_dev->rf_hw) {
 		wimax_dev->rf_hw = state;
-		rfkill_state = state == WIMAX_RF_ON ?
-			RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED;
 		if (wimax_dev->rf_hw == WIMAX_RF_ON
 		    && wimax_dev->rf_sw == WIMAX_RF_ON)
 			wimax_state = WIMAX_ST_READY;
 		else
 			wimax_state = WIMAX_ST_RADIO_OFF;
+
+		rfkill_set_hw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
+
 		__wimax_state_change(wimax_dev, wimax_state);
-		input_report_key(wimax_dev->rfkill_input, KEY_WIMAX,
-				 rfkill_state);
 	}
 error_not_ready:
 	mutex_unlock(&wimax_dev->mutex);
@@ -174,6 +168,7 @@ void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev,
 		else
 			wimax_state = WIMAX_ST_RADIO_OFF;
 		__wimax_state_change(wimax_dev, wimax_state);
+		rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
 	}
 error_not_ready:
 	mutex_unlock(&wimax_dev->mutex);
@@ -249,36 +244,31 @@ out_no_change:
  *
  * NOTE: This call will block until the operation is completed.
  */
-static
-int wimax_rfkill_toggle_radio(void *data, enum rfkill_state state)
+static int wimax_rfkill_set_radio_block(void *data, bool blocked)
 {
 	int result;
 	struct wimax_dev *wimax_dev = data;
 	struct device *dev = wimax_dev_to_dev(wimax_dev);
 	enum wimax_rf_state rf_state;
 
-	d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state);
-	switch (state) {
-	case RFKILL_STATE_SOFT_BLOCKED:
+	d_fnstart(3, dev, "(wimax_dev %p blocked %u)\n", wimax_dev, blocked);
+	rf_state = WIMAX_RF_ON;
+	if (blocked)
 		rf_state = WIMAX_RF_OFF;
-		break;
-	case RFKILL_STATE_UNBLOCKED:
-		rf_state = WIMAX_RF_ON;
-		break;
-	default:
-		BUG();
-	}
 	mutex_lock(&wimax_dev->mutex);
 	if (wimax_dev->state <= __WIMAX_ST_QUIESCING)
-		result = 0;	/* just pretend it didn't happen */
+		result = 0;
 	else
 		result = __wimax_rf_toggle_radio(wimax_dev, rf_state);
 	mutex_unlock(&wimax_dev->mutex);
-	d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n",
-		wimax_dev, state, result);
+	d_fnend(3, dev, "(wimax_dev %p blocked %u) = %d\n",
+		wimax_dev, blocked, result);
 	return result;
 }
 
+static const struct rfkill_ops wimax_rfkill_ops = {
+	.set_block = wimax_rfkill_set_radio_block,
+};
 
 /**
  * wimax_rfkill - Set the software RF switch state for a WiMAX device
@@ -322,6 +312,7 @@ int wimax_rfkill(struct wimax_dev *wimax_dev, enum wimax_rf_state state)
 		result = __wimax_rf_toggle_radio(wimax_dev, state);
 		if (result < 0)
 			goto error;
+		rfkill_set_sw_state(wimax_dev->rfkill, state == WIMAX_RF_OFF);
 		break;
 	case WIMAX_RF_QUERY:
 		break;
@@ -349,40 +340,20 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev)
 {
 	int result;
 	struct rfkill *rfkill;
-	struct input_dev *input_dev;
 	struct device *dev = wimax_dev_to_dev(wimax_dev);
 
 	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
 	/* Initialize RF Kill */
 	result = -ENOMEM;
-	rfkill = rfkill_allocate(dev, RFKILL_TYPE_WIMAX);
+	rfkill = rfkill_alloc(wimax_dev->name, dev, RFKILL_TYPE_WIMAX,
+			      &wimax_rfkill_ops, wimax_dev);
 	if (rfkill == NULL)
 		goto error_rfkill_allocate;
+
+	d_printf(1, dev, "rfkill %p\n", rfkill);
+
 	wimax_dev->rfkill = rfkill;
 
-	rfkill->name = wimax_dev->name;
-	rfkill->state = RFKILL_STATE_UNBLOCKED;
-	rfkill->data = wimax_dev;
-	rfkill->toggle_radio = wimax_rfkill_toggle_radio;
-
-	/* Initialize the input device for the hw key */
-	input_dev = input_allocate_device();
-	if (input_dev == NULL)
-		goto error_input_allocate;
-	wimax_dev->rfkill_input = input_dev;
-	d_printf(1, dev, "rfkill %p input %p\n", rfkill, input_dev);
-
-	input_dev->name = wimax_dev->name;
-	/* FIXME: get a real device bus ID and stuff? do we care? */
-	input_dev->id.bustype = BUS_HOST;
-	input_dev->id.vendor = 0xffff;
-	input_dev->evbit[0] = BIT(EV_KEY);
-	set_bit(KEY_WIMAX, input_dev->keybit);
-
-	/* Register both */
-	result = input_register_device(wimax_dev->rfkill_input);
-	if (result < 0)
-		goto error_input_register;
 	result = rfkill_register(wimax_dev->rfkill);
 	if (result < 0)
 		goto error_rfkill_register;
@@ -394,17 +365,8 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev)
 	d_fnend(3, dev, "(wimax_dev %p) = 0\n", wimax_dev);
 	return 0;
 
-	/* if rfkill_register() suceeds, can't use rfkill_free() any
-	 * more, only rfkill_unregister() [it owns the refcount]; with
-	 * the input device we have the same issue--hence the if. */
 error_rfkill_register:
-	input_unregister_device(wimax_dev->rfkill_input);
-	wimax_dev->rfkill_input = NULL;
-error_input_register:
-	if (wimax_dev->rfkill_input)
-		input_free_device(wimax_dev->rfkill_input);
-error_input_allocate:
-	rfkill_free(wimax_dev->rfkill);
+	rfkill_destroy(wimax_dev->rfkill);
 error_rfkill_allocate:
 	d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result);
 	return result;
@@ -423,45 +385,12 @@ void wimax_rfkill_rm(struct wimax_dev *wimax_dev)
 {
 	struct device *dev = wimax_dev_to_dev(wimax_dev);
 	d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev);
-	rfkill_unregister(wimax_dev->rfkill);	/* frees */
-	input_unregister_device(wimax_dev->rfkill_input);
+	rfkill_unregister(wimax_dev->rfkill);
+	rfkill_destroy(wimax_dev->rfkill);
 	d_fnend(3, dev, "(wimax_dev %p)\n", wimax_dev);
 }
 
 
-#else /* #ifdef CONFIG_RFKILL */
-
-void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev,
-			    enum wimax_rf_state state)
-{
-}
-EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw);
-
-void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev,
-			    enum wimax_rf_state state)
-{
-}
-EXPORT_SYMBOL_GPL(wimax_report_rfkill_sw);
-
-int wimax_rfkill(struct wimax_dev *wimax_dev,
-		 enum wimax_rf_state state)
-{
-	return WIMAX_RF_ON << 1 | WIMAX_RF_ON;
-}
-EXPORT_SYMBOL_GPL(wimax_rfkill);
-
-int wimax_rfkill_add(struct wimax_dev *wimax_dev)
-{
-	return 0;
-}
-
-void wimax_rfkill_rm(struct wimax_dev *wimax_dev)
-{
-}
-
-#endif /* #ifdef CONFIG_RFKILL */
-
-
 /*
  * Exporting to user space over generic netlink
  *
-- 
cgit v1.2.3-70-g09d2


From c64fb01627e24725d1f9d535e4426475a4415753 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 2 Jun 2009 13:01:38 +0200
Subject: rfkill: create useful userspace interface

The new code added by this patch will make rfkill create
a misc character device /dev/rfkill that userspace can use
to control rfkill soft blocks and get status of devices as
well as events when the status changes.

Using it is very simple -- when you open it you can read
a number of times to get the initial state, and every
further read blocks (you can poll) on getting the next
event from the kernel. The same structure you read is
also used when writing to it to change the soft block of
a given device, all devices of a given type, or all
devices.

This also makes CONFIG_RFKILL_INPUT selectable again in
order to be able to test without it present since its
functionality can now be replaced by userspace entirely
and distros and users may not want the input part of
rfkill interfering with their userspace code. We will
also write a userspace daemon to handle all that and
consequently add the input code to the feature removal
schedule.

In order to have rfkilld support both kernels with and
without CONFIG_RFKILL_INPUT (or new kernels after its
eventual removal) we also add an ioctl (that only exists
if rfkill-input is present) to disable rfkill-input.
It is not very efficient, but at least gives the correct
behaviour in all cases.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/feature-removal-schedule.txt |   7 +
 include/linux/rfkill.h                     |  84 +++++---
 net/rfkill/Kconfig                         |   4 +-
 net/rfkill/core.c                          | 330 ++++++++++++++++++++++++++++-
 4 files changed, 394 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index de491a3e231..edb2f0b0761 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -437,3 +437,10 @@ Why:	Superseded by tdfxfb. I2C/DDC support used to live in a separate
 	driver but this caused driver conflicts.
 Who:	Jean Delvare <khali@linux-fr.org>
 	Krzysztof Helt <krzysztof.h1@wp.pl>
+
+---------------------------
+
+What:	CONFIG_RFKILL_INPUT
+When:	2.6.33
+Why:	Should be implemented in userspace, policy daemon.
+Who:	Johannes Berg <johannes@sipsolutions.net>
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 090852c8de7..7c116f6631b 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -22,34 +22,17 @@
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
+#include <linux/types.h>
 
 /* define userspace visible states */
 #define RFKILL_STATE_SOFT_BLOCKED	0
 #define RFKILL_STATE_UNBLOCKED		1
 #define RFKILL_STATE_HARD_BLOCKED	2
 
-/* and that's all userspace gets */
-#ifdef __KERNEL__
-/* don't allow anyone to use these in the kernel */
-enum rfkill_user_states {
-	RFKILL_USER_STATE_SOFT_BLOCKED	= RFKILL_STATE_SOFT_BLOCKED,
-	RFKILL_USER_STATE_UNBLOCKED	= RFKILL_STATE_UNBLOCKED,
-	RFKILL_USER_STATE_HARD_BLOCKED	= RFKILL_STATE_HARD_BLOCKED,
-};
-#undef RFKILL_STATE_SOFT_BLOCKED
-#undef RFKILL_STATE_UNBLOCKED
-#undef RFKILL_STATE_HARD_BLOCKED
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/device.h>
-#include <linux/leds.h>
-
 /**
  * enum rfkill_type - type of rfkill switch.
  *
+ * @RFKILL_TYPE_ALL: toggles all switches (userspace only)
  * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device.
  * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device.
  * @RFKILL_TYPE_UWB: switch is on a ultra wideband device.
@@ -58,6 +41,7 @@ enum rfkill_user_states {
  * @NUM_RFKILL_TYPES: number of defined rfkill types
  */
 enum rfkill_type {
+	RFKILL_TYPE_ALL = 0,
 	RFKILL_TYPE_WLAN,
 	RFKILL_TYPE_BLUETOOTH,
 	RFKILL_TYPE_UWB,
@@ -66,6 +50,62 @@ enum rfkill_type {
 	NUM_RFKILL_TYPES,
 };
 
+/**
+ * enum rfkill_operation - operation types
+ * @RFKILL_OP_ADD: a device was added
+ * @RFKILL_OP_DEL: a device was removed
+ * @RFKILL_OP_CHANGE: a device's state changed -- userspace changes one device
+ * @RFKILL_OP_CHANGE_ALL: userspace changes all devices (of a type, or all)
+ */
+enum rfkill_operation {
+	RFKILL_OP_ADD = 0,
+	RFKILL_OP_DEL,
+	RFKILL_OP_CHANGE,
+	RFKILL_OP_CHANGE_ALL,
+};
+
+/**
+ * struct rfkill_event - events for userspace on /dev/rfkill
+ * @idx: index of dev rfkill
+ * @type: type of the rfkill struct
+ * @op: operation code
+ * @hard: hard state (0/1)
+ * @soft: soft state (0/1)
+ *
+ * Structure used for userspace communication on /dev/rfkill,
+ * used for events from the kernel and control to the kernel.
+ */
+struct rfkill_event {
+	__u32 idx;
+	__u8  type;
+	__u8  op;
+	__u8  soft, hard;
+} __packed;
+
+/* ioctl for turning off rfkill-input (if present) */
+#define RFKILL_IOC_MAGIC	'R'
+#define RFKILL_IOC_NOINPUT	1
+#define RFKILL_IOCTL_NOINPUT	_IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT)
+
+/* and that's all userspace gets */
+#ifdef __KERNEL__
+/* don't allow anyone to use these in the kernel */
+enum rfkill_user_states {
+	RFKILL_USER_STATE_SOFT_BLOCKED	= RFKILL_STATE_SOFT_BLOCKED,
+	RFKILL_USER_STATE_UNBLOCKED	= RFKILL_STATE_UNBLOCKED,
+	RFKILL_USER_STATE_HARD_BLOCKED	= RFKILL_STATE_HARD_BLOCKED,
+};
+#undef RFKILL_STATE_SOFT_BLOCKED
+#undef RFKILL_STATE_UNBLOCKED
+#undef RFKILL_STATE_HARD_BLOCKED
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/leds.h>
+
 /* this is opaque */
 struct rfkill;
 
@@ -84,11 +124,7 @@ struct rfkill;
  *	the rfkill core query your driver before setting a requested
  *	block.
  * @set_block: turn the transmitter on (blocked == false) or off
- *	(blocked == true) -- this is called only while the transmitter
- *	is not hard-blocked, but note that the core's view of whether
- *	the transmitter is hard-blocked might differ from your driver's
- *	view due to race conditions, so it is possible that it is still
- *	called at the same time as you are calling rfkill_set_hw_state().
+ *	(blocked == true) -- ignore and return 0 when hard blocked.
  *	This callback must be assigned.
  */
 struct rfkill_ops {
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index b47f72fae05..fd7600d8ab1 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -18,7 +18,7 @@ config RFKILL_LEDS
 	default y
 
 config RFKILL_INPUT
-	bool
+	bool "RF switch input support"
 	depends on RFKILL
 	depends on INPUT = y || RFKILL = INPUT
-	default y
+	default y if !EMBEDDED
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 30a6f8d819b..2230aa6b14f 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -28,6 +28,10 @@
 #include <linux/mutex.h>
 #include <linux/rfkill.h>
 #include <linux/spinlock.h>
+#include <linux/miscdevice.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/fs.h>
 
 #include "rfkill.h"
 
@@ -49,6 +53,8 @@ struct rfkill {
 
 	unsigned long		state;
 
+	u32			idx;
+
 	bool			registered;
 	bool			suspended;
 
@@ -69,6 +75,18 @@ struct rfkill {
 };
 #define to_rfkill(d)	container_of(d, struct rfkill, dev)
 
+struct rfkill_int_event {
+	struct list_head	list;
+	struct rfkill_event	ev;
+};
+
+struct rfkill_data {
+	struct list_head	list;
+	struct list_head	events;
+	struct mutex		mtx;
+	wait_queue_head_t	read_wait;
+	bool			input_handler;
+};
 
 
 MODULE_AUTHOR("Ivo van Doorn <IvDoorn@gmail.com>");
@@ -90,6 +108,7 @@ MODULE_LICENSE("GPL");
  */
 static LIST_HEAD(rfkill_list);	/* list of registered rf switches */
 static DEFINE_MUTEX(rfkill_global_mutex);
+static LIST_HEAD(rfkill_fds);	/* list of open fds of /dev/rfkill */
 
 static unsigned int rfkill_default_state = 1;
 module_param_named(default_state, rfkill_default_state, uint, 0444);
@@ -171,12 +190,48 @@ static inline void rfkill_led_trigger_unregister(struct rfkill *rfkill)
 }
 #endif /* CONFIG_RFKILL_LEDS */
 
-static void rfkill_uevent(struct rfkill *rfkill)
+static void rfkill_fill_event(struct rfkill_event *ev, struct rfkill *rfkill,
+			      enum rfkill_operation op)
+{
+	unsigned long flags;
+
+	ev->idx = rfkill->idx;
+	ev->type = rfkill->type;
+	ev->op = op;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	ev->hard = !!(rfkill->state & RFKILL_BLOCK_HW);
+	ev->soft = !!(rfkill->state & (RFKILL_BLOCK_SW |
+					RFKILL_BLOCK_SW_PREV));
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+}
+
+static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op)
+{
+	struct rfkill_data *data;
+	struct rfkill_int_event *ev;
+
+	list_for_each_entry(data, &rfkill_fds, list) {
+		ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+		if (!ev)
+			continue;
+		rfkill_fill_event(&ev->ev, rfkill, op);
+		mutex_lock(&data->mtx);
+		list_add_tail(&ev->list, &data->events);
+		mutex_unlock(&data->mtx);
+		wake_up_interruptible(&data->read_wait);
+	}
+}
+
+static void rfkill_event(struct rfkill *rfkill)
 {
 	if (!rfkill->registered || rfkill->suspended)
 		return;
 
 	kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
+
+	/* also send event to /dev/rfkill */
+	rfkill_send_events(rfkill, RFKILL_OP_CHANGE);
 }
 
 static bool __rfkill_set_hw_state(struct rfkill *rfkill,
@@ -260,9 +315,12 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 
 	rfkill_led_trigger_event(rfkill);
-	rfkill_uevent(rfkill);
+	rfkill_event(rfkill);
 }
 
+#ifdef CONFIG_RFKILL_INPUT
+static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
+
 /**
  * __rfkill_switch_all - Toggle state of all switches of given type
  * @type: type of interfaces to be affected
@@ -299,6 +357,9 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
  */
 void rfkill_switch_all(enum rfkill_type type, bool blocked)
 {
+	if (atomic_read(&rfkill_input_disabled))
+		return;
+
 	mutex_lock(&rfkill_global_mutex);
 
 	if (!rfkill_epo_lock_active)
@@ -321,6 +382,9 @@ void rfkill_epo(void)
 	struct rfkill *rfkill;
 	int i;
 
+	if (atomic_read(&rfkill_input_disabled))
+		return;
+
 	mutex_lock(&rfkill_global_mutex);
 
 	rfkill_epo_lock_active = true;
@@ -331,6 +395,7 @@ void rfkill_epo(void)
 		rfkill_global_states[i].def = rfkill_global_states[i].cur;
 		rfkill_global_states[i].cur = true;
 	}
+
 	mutex_unlock(&rfkill_global_mutex);
 }
 
@@ -345,6 +410,9 @@ void rfkill_restore_states(void)
 {
 	int i;
 
+	if (atomic_read(&rfkill_input_disabled))
+		return;
+
 	mutex_lock(&rfkill_global_mutex);
 
 	rfkill_epo_lock_active = false;
@@ -361,6 +429,9 @@ void rfkill_restore_states(void)
  */
 void rfkill_remove_epo_lock(void)
 {
+	if (atomic_read(&rfkill_input_disabled))
+		return;
+
 	mutex_lock(&rfkill_global_mutex);
 	rfkill_epo_lock_active = false;
 	mutex_unlock(&rfkill_global_mutex);
@@ -391,9 +462,12 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
 {
 	return rfkill_global_states[type].cur;
 }
+#endif
 
 void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked)
 {
+	BUG_ON(type == RFKILL_TYPE_ALL);
+
 	mutex_lock(&rfkill_global_mutex);
 
 	/* don't allow unblock when epo */
@@ -537,6 +611,15 @@ static ssize_t rfkill_type_show(struct device *dev,
 	return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
 }
 
+static ssize_t rfkill_idx_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	return sprintf(buf, "%d\n", rfkill->idx);
+}
+
 static u8 user_state_from_blocked(unsigned long state)
 {
 	if (state & RFKILL_BLOCK_HW)
@@ -594,6 +677,7 @@ static ssize_t rfkill_claim_store(struct device *dev,
 static struct device_attribute rfkill_dev_attrs[] = {
 	__ATTR(name, S_IRUGO, rfkill_name_show, NULL),
 	__ATTR(type, S_IRUGO, rfkill_type_show, NULL),
+	__ATTR(index, S_IRUGO, rfkill_idx_show, NULL),
 	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
 	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
 	__ATTR_NULL
@@ -708,7 +792,7 @@ struct rfkill * __must_check rfkill_alloc(const char *name,
 	if (WARN_ON(!name))
 		return NULL;
 
-	if (WARN_ON(type >= NUM_RFKILL_TYPES))
+	if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES))
 		return NULL;
 
 	rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL);
@@ -754,7 +838,9 @@ static void rfkill_uevent_work(struct work_struct *work)
 
 	rfkill = container_of(work, struct rfkill, uevent_work);
 
-	rfkill_uevent(rfkill);
+	mutex_lock(&rfkill_global_mutex);
+	rfkill_event(rfkill);
+	mutex_unlock(&rfkill_global_mutex);
 }
 
 static void rfkill_sync_work(struct work_struct *work)
@@ -785,6 +871,7 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 		goto unlock;
 	}
 
+	rfkill->idx = rfkill_no;
 	dev_set_name(dev, "rfkill%lu", rfkill_no);
 	rfkill_no++;
 
@@ -819,6 +906,7 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 
 	INIT_WORK(&rfkill->sync_work, rfkill_sync_work);
 	schedule_work(&rfkill->sync_work);
+	rfkill_send_events(rfkill, RFKILL_OP_ADD);
 
 	mutex_unlock(&rfkill_global_mutex);
 	return 0;
@@ -848,6 +936,7 @@ void rfkill_unregister(struct rfkill *rfkill)
 	device_del(&rfkill->dev);
 
 	mutex_lock(&rfkill_global_mutex);
+	rfkill_send_events(rfkill, RFKILL_OP_DEL);
 	list_del_init(&rfkill->node);
 	mutex_unlock(&rfkill_global_mutex);
 
@@ -862,6 +951,227 @@ void rfkill_destroy(struct rfkill *rfkill)
 }
 EXPORT_SYMBOL(rfkill_destroy);
 
+static int rfkill_fop_open(struct inode *inode, struct file *file)
+{
+	struct rfkill_data *data;
+	struct rfkill *rfkill;
+	struct rfkill_int_event *ev, *tmp;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&data->events);
+	mutex_init(&data->mtx);
+	init_waitqueue_head(&data->read_wait);
+
+	mutex_lock(&rfkill_global_mutex);
+	mutex_lock(&data->mtx);
+	/*
+	 * start getting events from elsewhere but hold mtx to get
+	 * startup events added first
+	 */
+	list_add(&data->list, &rfkill_fds);
+
+	list_for_each_entry(rfkill, &rfkill_list, node) {
+		ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+		if (!ev)
+			goto free;
+		rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD);
+		list_add_tail(&ev->list, &data->events);
+	}
+	mutex_unlock(&data->mtx);
+	mutex_unlock(&rfkill_global_mutex);
+
+	file->private_data = data;
+
+	return nonseekable_open(inode, file);
+
+ free:
+	mutex_unlock(&data->mtx);
+	mutex_unlock(&rfkill_global_mutex);
+	mutex_destroy(&data->mtx);
+	list_for_each_entry_safe(ev, tmp, &data->events, list)
+		kfree(ev);
+	kfree(data);
+	return -ENOMEM;
+}
+
+static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
+{
+	struct rfkill_data *data = file->private_data;
+	unsigned int res = POLLOUT | POLLWRNORM;
+
+	poll_wait(file, &data->read_wait, wait);
+
+	mutex_lock(&data->mtx);
+	if (!list_empty(&data->events))
+		res = POLLIN | POLLRDNORM;
+	mutex_unlock(&data->mtx);
+
+	return res;
+}
+
+static bool rfkill_readable(struct rfkill_data *data)
+{
+	bool r;
+
+	mutex_lock(&data->mtx);
+	r = !list_empty(&data->events);
+	mutex_unlock(&data->mtx);
+
+	return r;
+}
+
+static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct rfkill_data *data = file->private_data;
+	struct rfkill_int_event *ev;
+	unsigned long sz;
+	int ret;
+
+	mutex_lock(&data->mtx);
+
+	while (list_empty(&data->events)) {
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			goto out;
+		}
+		mutex_unlock(&data->mtx);
+		ret = wait_event_interruptible(data->read_wait,
+					       rfkill_readable(data));
+		mutex_lock(&data->mtx);
+
+		if (ret)
+			goto out;
+	}
+
+	ev = list_first_entry(&data->events, struct rfkill_int_event,
+				list);
+
+	sz = min_t(unsigned long, sizeof(ev->ev), count);
+	ret = sz;
+	if (copy_to_user(buf, &ev->ev, sz))
+		ret = -EFAULT;
+
+	list_del(&ev->list);
+	kfree(ev);
+ out:
+	mutex_unlock(&data->mtx);
+	return ret;
+}
+
+static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct rfkill *rfkill;
+	struct rfkill_event ev;
+
+	/* we don't need the 'hard' variable but accept it */
+	if (count < sizeof(ev) - 1)
+		return -EINVAL;
+
+	if (copy_from_user(&ev, buf, sizeof(ev) - 1))
+		return -EFAULT;
+
+	if (ev.op != RFKILL_OP_CHANGE && ev.op != RFKILL_OP_CHANGE_ALL)
+		return -EINVAL;
+
+	if (ev.type >= NUM_RFKILL_TYPES)
+		return -EINVAL;
+
+	mutex_lock(&rfkill_global_mutex);
+
+	if (ev.op == RFKILL_OP_CHANGE_ALL) {
+		if (ev.type == RFKILL_TYPE_ALL) {
+			enum rfkill_type i;
+			for (i = 0; i < NUM_RFKILL_TYPES; i++)
+				rfkill_global_states[i].cur = ev.soft;
+		} else {
+			rfkill_global_states[ev.type].cur = ev.soft;
+		}
+	}
+
+	list_for_each_entry(rfkill, &rfkill_list, node) {
+		if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL)
+			continue;
+
+		if (rfkill->type != ev.type && ev.type != RFKILL_TYPE_ALL)
+			continue;
+
+		rfkill_set_block(rfkill, ev.soft);
+	}
+	mutex_unlock(&rfkill_global_mutex);
+
+	return count;
+}
+
+static int rfkill_fop_release(struct inode *inode, struct file *file)
+{
+	struct rfkill_data *data = file->private_data;
+	struct rfkill_int_event *ev, *tmp;
+
+	mutex_lock(&rfkill_global_mutex);
+	list_del(&data->list);
+	mutex_unlock(&rfkill_global_mutex);
+
+	mutex_destroy(&data->mtx);
+	list_for_each_entry_safe(ev, tmp, &data->events, list)
+		kfree(ev);
+
+#ifdef CONFIG_RFKILL_INPUT
+	if (data->input_handler)
+		atomic_dec(&rfkill_input_disabled);
+#endif
+
+	kfree(data);
+
+	return 0;
+}
+
+#ifdef CONFIG_RFKILL_INPUT
+static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct rfkill_data *data = file->private_data;
+
+	if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
+		return -ENOSYS;
+
+	if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT)
+		return -ENOSYS;
+
+	mutex_lock(&data->mtx);
+
+	if (!data->input_handler) {
+		atomic_inc(&rfkill_input_disabled);
+		data->input_handler = true;
+	}
+
+	mutex_unlock(&data->mtx);
+
+	return 0;
+}
+#endif
+
+static const struct file_operations rfkill_fops = {
+	.open		= rfkill_fop_open,
+	.read		= rfkill_fop_read,
+	.write		= rfkill_fop_write,
+	.poll		= rfkill_fop_poll,
+	.release	= rfkill_fop_release,
+#ifdef CONFIG_RFKILL_INPUT
+	.unlocked_ioctl	= rfkill_fop_ioctl,
+	.compat_ioctl	= rfkill_fop_ioctl,
+#endif
+};
+
+static struct miscdevice rfkill_miscdev = {
+	.name	= "rfkill",
+	.fops	= &rfkill_fops,
+	.minor	= MISC_DYNAMIC_MINOR,
+};
 
 static int __init rfkill_init(void)
 {
@@ -875,10 +1185,19 @@ static int __init rfkill_init(void)
 	if (error)
 		goto out;
 
+	error = misc_register(&rfkill_miscdev);
+	if (error) {
+		class_unregister(&rfkill_class);
+		goto out;
+	}
+
 #ifdef CONFIG_RFKILL_INPUT
 	error = rfkill_handler_init();
-	if (error)
+	if (error) {
+		misc_deregister(&rfkill_miscdev);
 		class_unregister(&rfkill_class);
+		goto out;
+	}
 #endif
 
  out:
@@ -891,6 +1210,7 @@ static void __exit rfkill_exit(void)
 #ifdef CONFIG_RFKILL_INPUT
 	rfkill_handler_exit();
 #endif
+	misc_deregister(&rfkill_miscdev);
 	class_unregister(&rfkill_class);
 }
 module_exit(rfkill_exit);
-- 
cgit v1.2.3-70-g09d2


From 7643a2c3fcc13cd6fbd731f214463547383418ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 2 Jun 2009 13:01:39 +0200
Subject: cfg80211: move txpower wext from mac80211

This patch introduces new cfg80211 API to set the TX power
via cfg80211, puts the wext code into cfg80211 and updates
mac80211 to use all that. The -ENETDOWN bits are a hack but
will go away soon.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 30 +++++++++++++++++
 net/mac80211/cfg.c         | 54 +++++++++++++++++++++++++++++++
 net/mac80211/wext.c        | 80 ++--------------------------------------------
 net/wireless/wext-compat.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 166 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f20da7d63b1..8b8e4b89362 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -751,6 +751,21 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
 };
 
+/**
+ * enum tx_power_setting - TX power adjustment
+ *
+ * @TX_POWER_AUTOMATIC: the dbm parameter is ignored
+ * @TX_POWER_LIMITED: limit TX power by the dbm parameter
+ * @TX_POWER_FIXED: fix TX power to the dbm parameter
+ * @TX_POWER_OFF: turn off completely (will go away)
+ */
+enum tx_power_setting {
+	TX_POWER_AUTOMATIC,
+	TX_POWER_LIMITED,
+	TX_POWER_FIXED,
+	TX_POWER_OFF,
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -837,6 +852,11 @@ enum wiphy_params_flags {
  *	@changed bitfield (see &enum wiphy_params_flags) describes which values
  *	have changed. The actual parameter values are available in
  *	struct wiphy. If returning an error, no value should be changed.
+ *
+ * @set_tx_power: set the transmit power according to the parameters
+ * @get_tx_power: store the current TX power into the dbm variable;
+ *	return 0 if successful; or -ENETDOWN if successful but power
+ *	is disabled (this will go away)
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -928,6 +948,10 @@ struct cfg80211_ops {
 	int	(*leave_ibss)(struct wiphy *wiphy, struct net_device *dev);
 
 	int	(*set_wiphy_params)(struct wiphy *wiphy, u32 changed);
+
+	int	(*set_tx_power)(struct wiphy *wiphy,
+				enum tx_power_setting type, int dbm);
+	int	(*get_tx_power)(struct wiphy *wiphy, int *dbm);
 };
 
 /*
@@ -1451,6 +1475,12 @@ int cfg80211_wext_siwencode(struct net_device *dev,
 int cfg80211_wext_giwencode(struct net_device *dev,
 			    struct iw_request_info *info,
 			    struct iw_point *erq, char *keybuf);
+int cfg80211_wext_siwtxpower(struct net_device *dev,
+			     struct iw_request_info *info,
+			     union iwreq_data *data, char *keybuf);
+int cfg80211_wext_giwtxpower(struct net_device *dev,
+			     struct iw_request_info *info,
+			     union iwreq_data *data, char *keybuf);
 
 /*
  * callbacks for asynchronous cfg80211 methods, notification
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d2fc18c1ae0..81258acf48b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1334,6 +1334,58 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	return 0;
 }
 
+static int ieee80211_set_tx_power(struct wiphy *wiphy,
+				  enum tx_power_setting type, int dbm)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_channel *chan = local->hw.conf.channel;
+	u32 changes = 0;
+	bool radio_enabled = true;
+
+	switch (type) {
+	case TX_POWER_AUTOMATIC:
+		local->user_power_level = -1;
+		break;
+	case TX_POWER_LIMITED:
+		if (dbm < 0)
+			return -EINVAL;
+		local->user_power_level = dbm;
+		break;
+	case TX_POWER_FIXED:
+		if (dbm < 0)
+			return -EINVAL;
+		/* TODO: move to cfg80211 when it knows the channel */
+		if (dbm > chan->max_power)
+			return -EINVAL;
+		local->user_power_level = dbm;
+		break;
+	case TX_POWER_OFF:
+		radio_enabled = false;
+		break;
+	}
+
+	if (radio_enabled != local->hw.conf.radio_enabled) {
+		changes |= IEEE80211_CONF_CHANGE_RADIO_ENABLED;
+		local->hw.conf.radio_enabled = radio_enabled;
+	}
+
+	ieee80211_hw_config(local, changes);
+
+	return 0;
+}
+
+static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	*dbm = local->hw.conf.power_level;
+
+	if (!local->hw.conf.radio_enabled)
+		return -ENETDOWN;
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1373,4 +1425,6 @@ struct cfg80211_ops mac80211_config_ops = {
 	.join_ibss = ieee80211_join_ibss,
 	.leave_ibss = ieee80211_leave_ibss,
 	.set_wiphy_params = ieee80211_set_wiphy_params,
+	.set_tx_power = ieee80211_set_tx_power,
+	.get_tx_power = ieee80211_get_tx_power,
 };
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index a01154e127f..d2d81b10334 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -306,82 +306,6 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
-				      struct iw_request_info *info,
-				      union iwreq_data *data, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_channel* chan = local->hw.conf.channel;
-	bool reconf = false;
-	u32 reconf_flags = 0;
-	int new_power_level;
-
-	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
-		return -EINVAL;
-	if (data->txpower.flags & IW_TXPOW_RANGE)
-		return -EINVAL;
-	if (!chan)
-		return -EINVAL;
-
-	/* only change when not disabling */
-	if (!data->txpower.disabled) {
-		if (data->txpower.fixed) {
-			if (data->txpower.value < 0)
-				return -EINVAL;
-			new_power_level = data->txpower.value;
-			/*
-			 * Debatable, but we cannot do a fixed power
-			 * level above the regulatory constraint.
-			 * Use "iwconfig wlan0 txpower 15dBm" instead.
-			 */
-			if (new_power_level > chan->max_power)
-				return -EINVAL;
-		} else {
-			/*
-			 * Automatic power level setting, max being the value
-			 * passed in from userland.
-			 */
-			if (data->txpower.value < 0)
-				new_power_level = -1;
-			else
-				new_power_level = data->txpower.value;
-		}
-
-		reconf = true;
-
-		/*
-		 * ieee80211_hw_config() will limit to the channel's
-		 * max power and possibly power constraint from AP.
-		 */
-		local->user_power_level = new_power_level;
-	}
-
-	if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
-		local->hw.conf.radio_enabled = !(data->txpower.disabled);
-		reconf_flags |= IEEE80211_CONF_CHANGE_RADIO_ENABLED;
-		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
-	}
-
-	if (reconf || reconf_flags)
-		ieee80211_hw_config(local, reconf_flags);
-
-	return 0;
-}
-
-static int ieee80211_ioctl_giwtxpower(struct net_device *dev,
-				   struct iw_request_info *info,
-				   union iwreq_data *data, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	data->txpower.fixed = 1;
-	data->txpower.disabled = !(local->hw.conf.radio_enabled);
-	data->txpower.value = local->hw.conf.power_level;
-	data->txpower.flags = IW_TXPOW_DBM;
-
-	return 0;
-}
-
 static int ieee80211_ioctl_siwpower(struct net_device *dev,
 				    struct iw_request_info *info,
 				    struct iw_param *wrq,
@@ -658,8 +582,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) cfg80211_wext_giwrts,		/* SIOCGIWRTS */
 	(iw_handler) cfg80211_wext_siwfrag,		/* SIOCSIWFRAG */
 	(iw_handler) cfg80211_wext_giwfrag,		/* SIOCGIWFRAG */
-	(iw_handler) ieee80211_ioctl_siwtxpower,	/* SIOCSIWTXPOW */
-	(iw_handler) ieee80211_ioctl_giwtxpower,	/* SIOCGIWTXPOW */
+	(iw_handler) cfg80211_wext_siwtxpower,		/* SIOCSIWTXPOW */
+	(iw_handler) cfg80211_wext_giwtxpower,		/* SIOCGIWTXPOW */
 	(iw_handler) cfg80211_wext_siwretry,		/* SIOCSIWRETRY */
 	(iw_handler) cfg80211_wext_giwretry,		/* SIOCGIWRETRY */
 	(iw_handler) cfg80211_wext_siwencode,		/* SIOCSIWENCODE */
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 711e00a0c9b..9fbfb8536e7 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -744,3 +744,83 @@ int cfg80211_wext_giwencode(struct net_device *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode);
+
+int cfg80211_wext_siwtxpower(struct net_device *dev,
+			     struct iw_request_info *info,
+			     union iwreq_data *data, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	enum tx_power_setting type;
+	int dbm = 0;
+
+	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
+		return -EINVAL;
+	if (data->txpower.flags & IW_TXPOW_RANGE)
+		return -EINVAL;
+
+	if (!rdev->ops->set_tx_power)
+		return -EOPNOTSUPP;
+
+	/* only change when not disabling */
+	if (!data->txpower.disabled) {
+		if (data->txpower.fixed) {
+			/*
+			 * wext doesn't support negative values, see
+			 * below where it's for automatic
+			 */
+			if (data->txpower.value < 0)
+				return -EINVAL;
+			dbm = data->txpower.value;
+			type = TX_POWER_FIXED;
+			/* TODO: do regulatory check! */
+		} else {
+			/*
+			 * Automatic power level setting, max being the value
+			 * passed in from userland.
+			 */
+			if (data->txpower.value < 0) {
+				type = TX_POWER_AUTOMATIC;
+			} else {
+				dbm = data->txpower.value;
+				type = TX_POWER_LIMITED;
+			}
+		}
+	} else {
+		type = TX_POWER_OFF;
+	}
+
+	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
+
+int cfg80211_wext_giwtxpower(struct net_device *dev,
+			     struct iw_request_info *info,
+			     union iwreq_data *data, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	int err, val;
+
+	if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
+		return -EINVAL;
+	if (data->txpower.flags & IW_TXPOW_RANGE)
+		return -EINVAL;
+
+	if (!rdev->ops->get_tx_power)
+		return -EOPNOTSUPP;
+
+	err = rdev->ops->get_tx_power(wdev->wiphy, &val);
+	/* HACK!!! */
+	if (err && err != -ENETDOWN)
+		return err;
+
+	/* well... oh well */
+	data->txpower.fixed = 1;
+	data->txpower.disabled = err == -ENETDOWN;
+	data->txpower.value = val;
+	data->txpower.flags = IW_TXPOW_DBM;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwtxpower);
-- 
cgit v1.2.3-70-g09d2


From 6081162e2ed78dfcf149b076b047078ab1445cc2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 2 Jun 2009 13:01:40 +0200
Subject: rfkill: add function to query state

Sometimes it is necessary to know how the state is,
and it is easier to query rfkill than keep track of
it somewhere else, so add a function for that. This
could later be expanded to return hard/soft block,
but so far that isn't necessary.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 12 ++++++++++++
 net/rfkill/core.c      | 13 +++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 7c116f6631b..ee3eddea856 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -261,6 +261,13 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
  *	registered drivers?
  */
 void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked);
+
+/**
+ * rfkill_blocked - query rfkill block
+ *
+ * @rfkill: rfkill struct to query
+ */
+bool rfkill_blocked(struct rfkill *rfkill);
 #else /* !RFKILL */
 static inline struct rfkill * __must_check
 rfkill_alloc(const char *name,
@@ -313,6 +320,11 @@ static inline void rfkill_set_global_sw_state(const enum rfkill_type type,
 					      bool blocked)
 {
 }
+
+static inline bool rfkill_blocked(struct rfkill *rfkill)
+{
+	return false;
+}
 #endif /* RFKILL || RFKILL_MODULE */
 
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 2230aa6b14f..91e9168b544 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -773,6 +773,19 @@ static struct class rfkill_class = {
 	.resume		= rfkill_resume,
 };
 
+bool rfkill_blocked(struct rfkill *rfkill)
+{
+	unsigned long flags;
+	u32 state;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	return !!(state & RFKILL_BLOCK_ANY);
+}
+EXPORT_SYMBOL(rfkill_blocked);
+
 
 struct rfkill * __must_check rfkill_alloc(const char *name,
 					  struct device *parent,
-- 
cgit v1.2.3-70-g09d2


From 1f87f7d3a3b42b20f34cb03f0fd1a41c3d0e27f3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 2 Jun 2009 13:01:41 +0200
Subject: cfg80211: add rfkill support

To be easier on drivers and users, have cfg80211 register an
rfkill structure that drivers can access. When soft-killed,
simply take down all interfaces; when hard-killed the driver
needs to notify us and we will take down the interfaces
after the fact. While rfkilled, interfaces cannot be set UP.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/asm-generic/errno.h |  2 +
 include/net/cfg80211.h      | 29 ++++++++++++--
 include/net/mac80211.h      | 20 ++++++++--
 net/mac80211/cfg.c          | 20 ++++------
 net/mac80211/driver-ops.h   |  7 ++++
 net/mac80211/iface.c        |  4 +-
 net/mac80211/util.c         |  2 +-
 net/wireless/Kconfig        |  3 +-
 net/wireless/core.c         | 97 +++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/core.h         |  7 ++++
 net/wireless/wext-compat.c  | 11 +++--
 11 files changed, 172 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/errno.h b/include/asm-generic/errno.h
index e8852c092fe..28cc03bf19e 100644
--- a/include/asm-generic/errno.h
+++ b/include/asm-generic/errno.h
@@ -106,4 +106,6 @@
 #define	EOWNERDEAD	130	/* Owner died */
 #define	ENOTRECOVERABLE	131	/* State not recoverable */
 
+#define ERFKILL		132	/* Operation not possible due to RF-kill */
+
 #endif
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b8e4b89362..1a21895b732 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -757,13 +757,11 @@ enum wiphy_params_flags {
  * @TX_POWER_AUTOMATIC: the dbm parameter is ignored
  * @TX_POWER_LIMITED: limit TX power by the dbm parameter
  * @TX_POWER_FIXED: fix TX power to the dbm parameter
- * @TX_POWER_OFF: turn off completely (will go away)
  */
 enum tx_power_setting {
 	TX_POWER_AUTOMATIC,
 	TX_POWER_LIMITED,
 	TX_POWER_FIXED,
-	TX_POWER_OFF,
 };
 
 /**
@@ -855,8 +853,10 @@ enum tx_power_setting {
  *
  * @set_tx_power: set the transmit power according to the parameters
  * @get_tx_power: store the current TX power into the dbm variable;
- *	return 0 if successful; or -ENETDOWN if successful but power
- *	is disabled (this will go away)
+ *	return 0 if successful
+ *
+ * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
+ *	functions to adjust rfkill hw state
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -952,6 +952,8 @@ struct cfg80211_ops {
 	int	(*set_tx_power)(struct wiphy *wiphy,
 				enum tx_power_setting type, int dbm);
 	int	(*get_tx_power)(struct wiphy *wiphy, int *dbm);
+
+	void	(*rfkill_poll)(struct wiphy *wiphy);
 };
 
 /*
@@ -1666,4 +1668,23 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
  */
 void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp);
 
+/**
+ * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state
+ * @wiphy: the wiphy
+ * @blocked: block status
+ */
+void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked);
+
+/**
+ * wiphy_rfkill_start_polling - start polling rfkill
+ * @wiphy: the wiphy
+ */
+void wiphy_rfkill_start_polling(struct wiphy *wiphy);
+
+/**
+ * wiphy_rfkill_stop_polling - stop polling rfkill
+ * @wiphy: the wiphy
+ */
+void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
+
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0270aa6e08f..17d61d19d91 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -526,7 +526,7 @@ enum ieee80211_conf_flags {
 /**
  * enum ieee80211_conf_changed - denotes which configuration changed
  *
- * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed
+ * @_IEEE80211_CONF_CHANGE_RADIO_ENABLED: DEPRECATED
  * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed
  * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed
  * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed
@@ -536,7 +536,7 @@ enum ieee80211_conf_flags {
  * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed
  */
 enum ieee80211_conf_changed {
-	IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
+	_IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
 	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
 	IEEE80211_CONF_CHANGE_RADIOTAP		= BIT(3),
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
@@ -546,6 +546,14 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
 };
 
+static inline __deprecated enum ieee80211_conf_changed
+__IEEE80211_CONF_CHANGE_RADIO_ENABLED(void)
+{
+	return _IEEE80211_CONF_CHANGE_RADIO_ENABLED;
+}
+#define IEEE80211_CONF_CHANGE_RADIO_ENABLED \
+	__IEEE80211_CONF_CHANGE_RADIO_ENABLED()
+
 /**
  * struct ieee80211_conf - configuration of the device
  *
@@ -585,7 +593,7 @@ struct ieee80211_conf {
 	int max_sleep_period;
 
 	u16 listen_interval;
-	bool radio_enabled;
+	bool __deprecated radio_enabled;
 
 	u8 long_frame_max_tx_count, short_frame_max_tx_count;
 
@@ -1396,6 +1404,10 @@ enum ieee80211_ampdu_mlme_action {
  * 	is the first frame we expect to perform the action on. Notice
  * 	that TX/RX_STOP can pass NULL for this parameter.
  *	Returns a negative error code on failure.
+ *
+ * @rfkill_poll: Poll rfkill hardware state. If you need this, you also
+ *	need to set wiphy->rfkill_poll to %true before registration,
+ *	and need to call wiphy_rfkill_set_hw_state() in the callback.
  */
 struct ieee80211_ops {
 	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
@@ -1444,6 +1456,8 @@ struct ieee80211_ops {
 	int (*ampdu_action)(struct ieee80211_hw *hw,
 			    enum ieee80211_ampdu_mlme_action action,
 			    struct ieee80211_sta *sta, u16 tid, u16 *ssn);
+
+	void (*rfkill_poll)(struct ieee80211_hw *hw);
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 81258acf48b..a9211cc183c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1340,7 +1340,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_channel *chan = local->hw.conf.channel;
 	u32 changes = 0;
-	bool radio_enabled = true;
 
 	switch (type) {
 	case TX_POWER_AUTOMATIC:
@@ -1359,14 +1358,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 			return -EINVAL;
 		local->user_power_level = dbm;
 		break;
-	case TX_POWER_OFF:
-		radio_enabled = false;
-		break;
-	}
-
-	if (radio_enabled != local->hw.conf.radio_enabled) {
-		changes |= IEEE80211_CONF_CHANGE_RADIO_ENABLED;
-		local->hw.conf.radio_enabled = radio_enabled;
 	}
 
 	ieee80211_hw_config(local, changes);
@@ -1380,12 +1371,16 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm)
 
 	*dbm = local->hw.conf.power_level;
 
-	if (!local->hw.conf.radio_enabled)
-		return -ENETDOWN;
-
 	return 0;
 }
 
+static void ieee80211_rfkill_poll(struct wiphy *wiphy)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	drv_rfkill_poll(local);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1427,4 +1422,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_wiphy_params = ieee80211_set_wiphy_params,
 	.set_tx_power = ieee80211_set_tx_power,
 	.get_tx_power = ieee80211_get_tx_power,
+	.rfkill_poll = ieee80211_rfkill_poll,
 };
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 3912b5334b9..b13446afd48 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -181,4 +181,11 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 						sta, tid, ssn);
 	return -EOPNOTSUPP;
 }
+
+
+static inline void drv_rfkill_poll(struct ieee80211_local *local)
+{
+	if (local->ops->rfkill_poll)
+		local->ops->rfkill_poll(&local->hw);
+}
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8c9f1c722cd..b7c8a448429 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -170,7 +170,7 @@ static int ieee80211_open(struct net_device *dev)
 			goto err_del_bss;
 		/* we're brought up, everything changes */
 		hw_reconf_flags = ~0;
-		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
+		ieee80211_led_radio(local, true);
 	}
 
 	/*
@@ -560,7 +560,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 		drv_stop(local);
 
-		ieee80211_led_radio(local, 0);
+		ieee80211_led_radio(local, false);
 
 		flush_workqueue(local->hw.workqueue);
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 31284c984e3..22f63815fb3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -973,7 +973,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	if (local->open_count) {
 		res = drv_start(local);
 
-		ieee80211_led_radio(local, hw->conf.radio_enabled);
+		ieee80211_led_radio(local, true);
 	}
 
 	/* add interfaces */
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 45005497c63..4428dd5e911 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,5 +1,6 @@
 config CFG80211
-        tristate "Improved wireless configuration API"
+	tristate "Improved wireless configuration API"
+	depends on RFKILL || !RFKILL
 
 config CFG80211_REG_DEBUG
 	bool "cfg80211 regulatory debugging"
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a5dbea1da47..3b74b88e10a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -12,6 +12,7 @@
 #include <linux/debugfs.h>
 #include <linux/notifier.h>
 #include <linux/device.h>
+#include <linux/rtnetlink.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include "nl80211.h"
@@ -227,6 +228,41 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
+static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
+{
+	struct cfg80211_registered_device *drv = data;
+
+	drv->ops->rfkill_poll(&drv->wiphy);
+}
+
+static int cfg80211_rfkill_set_block(void *data, bool blocked)
+{
+	struct cfg80211_registered_device *drv = data;
+	struct wireless_dev *wdev;
+
+	if (!blocked)
+		return 0;
+
+	rtnl_lock();
+	mutex_lock(&drv->devlist_mtx);
+
+	list_for_each_entry(wdev, &drv->netdev_list, list)
+		dev_close(wdev->netdev);
+
+	mutex_unlock(&drv->devlist_mtx);
+	rtnl_unlock();
+
+	return 0;
+}
+
+static void cfg80211_rfkill_sync_work(struct work_struct *work)
+{
+	struct cfg80211_registered_device *drv;
+
+	drv = container_of(work, struct cfg80211_registered_device, rfkill_sync);
+	cfg80211_rfkill_set_block(drv, rfkill_blocked(drv->rfkill));
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
@@ -274,6 +310,18 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	drv->wiphy.dev.class = &ieee80211_class;
 	drv->wiphy.dev.platform_data = drv;
 
+	drv->rfkill_ops.set_block = cfg80211_rfkill_set_block;
+	drv->rfkill = rfkill_alloc(dev_name(&drv->wiphy.dev),
+				   &drv->wiphy.dev, RFKILL_TYPE_WLAN,
+				   &drv->rfkill_ops, drv);
+
+	if (!drv->rfkill) {
+		kfree(drv);
+		return NULL;
+	}
+
+	INIT_WORK(&drv->rfkill_sync, cfg80211_rfkill_sync_work);
+
 	/*
 	 * Initialize wiphy parameters to IEEE 802.11 MIB default values.
 	 * Fragmentation and RTS threshold are disabled by default with the
@@ -356,6 +404,10 @@ int wiphy_register(struct wiphy *wiphy)
 	if (res)
 		goto out_unlock;
 
+	res = rfkill_register(drv->rfkill);
+	if (res)
+		goto out_rm_dev;
+
 	list_add(&drv->list, &cfg80211_drv_list);
 
 	/* add to debugfs */
@@ -379,16 +431,41 @@ int wiphy_register(struct wiphy *wiphy)
 	cfg80211_debugfs_drv_add(drv);
 
 	res = 0;
-out_unlock:
+	goto out_unlock;
+
+ out_rm_dev:
+	device_del(&drv->wiphy.dev);
+ out_unlock:
 	mutex_unlock(&cfg80211_mutex);
 	return res;
 }
 EXPORT_SYMBOL(wiphy_register);
 
+void wiphy_rfkill_start_polling(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+	if (!drv->ops->rfkill_poll)
+		return;
+	drv->rfkill_ops.poll = cfg80211_rfkill_poll;
+	rfkill_resume_polling(drv->rfkill);
+}
+EXPORT_SYMBOL(wiphy_rfkill_start_polling);
+
+void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+	rfkill_pause_polling(drv->rfkill);
+}
+EXPORT_SYMBOL(wiphy_rfkill_stop_polling);
+
 void wiphy_unregister(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
 
+	rfkill_unregister(drv->rfkill);
+
 	/* protect the device list */
 	mutex_lock(&cfg80211_mutex);
 
@@ -425,6 +502,7 @@ EXPORT_SYMBOL(wiphy_unregister);
 void cfg80211_dev_free(struct cfg80211_registered_device *drv)
 {
 	struct cfg80211_internal_bss *scan, *tmp;
+	rfkill_destroy(drv->rfkill);
 	mutex_destroy(&drv->mtx);
 	mutex_destroy(&drv->devlist_mtx);
 	list_for_each_entry_safe(scan, tmp, &drv->bss_list, list)
@@ -438,6 +516,15 @@ void wiphy_free(struct wiphy *wiphy)
 }
 EXPORT_SYMBOL(wiphy_free);
 
+void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+	if (rfkill_set_hw_state(drv->rfkill, blocked))
+		schedule_work(&drv->rfkill_sync);
+}
+EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
+
 static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 					 unsigned long state,
 					 void *ndev)
@@ -446,7 +533,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 	struct cfg80211_registered_device *rdev;
 
 	if (!dev->ieee80211_ptr)
-		return 0;
+		return NOTIFY_DONE;
 
 	rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
 
@@ -492,9 +579,13 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		}
 		mutex_unlock(&rdev->devlist_mtx);
 		break;
+	case NETDEV_PRE_UP:
+		if (rfkill_blocked(rdev->rfkill))
+			return notifier_from_errno(-ERFKILL);
+		break;
 	}
 
-	return 0;
+	return NOTIFY_DONE;
 }
 
 static struct notifier_block cfg80211_netdev_notifier = {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index ab512bcd815..bfa340c7abb 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -11,6 +11,8 @@
 #include <linux/kref.h>
 #include <linux/rbtree.h>
 #include <linux/debugfs.h>
+#include <linux/rfkill.h>
+#include <linux/workqueue.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
 #include "reg.h"
@@ -24,6 +26,11 @@ struct cfg80211_registered_device {
 	 * any call is in progress */
 	struct mutex mtx;
 
+	/* rfkill support */
+	struct rfkill_ops rfkill_ops;
+	struct rfkill *rfkill;
+	struct work_struct rfkill_sync;
+
 	/* ISO / IEC 3166 alpha2 for which this device is receiving
 	 * country IEs on, this can help disregard country IEs from APs
 	 * on the same alpha2 quickly. The alpha2 may differ from
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 9fbfb8536e7..d030c531567 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -764,6 +764,8 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 
 	/* only change when not disabling */
 	if (!data->txpower.disabled) {
+		rfkill_set_sw_state(rdev->rfkill, false);
+
 		if (data->txpower.fixed) {
 			/*
 			 * wext doesn't support negative values, see
@@ -787,7 +789,9 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 			}
 		}
 	} else {
-		type = TX_POWER_OFF;
+		rfkill_set_sw_state(rdev->rfkill, true);
+		schedule_work(&rdev->rfkill_sync);
+		return 0;
 	}
 
 	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);;
@@ -811,13 +815,12 @@ int cfg80211_wext_giwtxpower(struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	err = rdev->ops->get_tx_power(wdev->wiphy, &val);
-	/* HACK!!! */
-	if (err && err != -ENETDOWN)
+	if (err)
 		return err;
 
 	/* well... oh well */
 	data->txpower.fixed = 1;
-	data->txpower.disabled = err == -ENETDOWN;
+	data->txpower.disabled = rfkill_blocked(rdev->rfkill);
 	data->txpower.value = val;
 	data->txpower.flags = IW_TXPOW_DBM;
 
-- 
cgit v1.2.3-70-g09d2


From e0a94c2a63f2644826069044649669b5e7ca75d3 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Wed, 3 Jun 2009 16:04:31 -0400
Subject: security: use mmap_min_addr indepedently of security models

This patch removes the dependency of mmap_min_addr on CONFIG_SECURITY.
It also sets a default mmap_min_addr of 4096.

mmapping of addresses below 4096 will only be possible for processes
with CAP_SYS_RAWIO.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Eric Paris <eparis@redhat.com>
Looks-ok-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/mm.h       |  2 --
 include/linux/security.h |  2 ++
 kernel/sysctl.c          |  2 --
 mm/Kconfig               | 19 +++++++++++++++++++
 mm/mmap.c                |  3 +++
 security/Kconfig         | 22 +---------------------
 security/security.c      |  3 ---
 7 files changed, 25 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d475c..0c21af6abff 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -580,12 +580,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
  */
 static inline unsigned long round_hint_to_min(unsigned long hint)
 {
-#ifdef CONFIG_SECURITY
 	hint &= PAGE_MASK;
 	if (((void *)hint != NULL) &&
 	    (hint < mmap_min_addr))
 		return PAGE_ALIGN(mmap_min_addr);
-#endif
 	return hint;
 }
 
diff --git a/include/linux/security.h b/include/linux/security.h
index d5fd6163606..5eff459b383 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
+	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
+		return -EACCES;
 	return 0;
 }
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 149581fb48a..45bd711a242 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1237,7 +1237,6 @@ static struct ctl_table vm_table[] = {
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
-#ifdef CONFIG_SECURITY
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "mmap_min_addr",
@@ -1246,7 +1245,6 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
-#endif
 #ifdef CONFIG_NUMA
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/mm/Kconfig b/mm/Kconfig
index c2b57d81e15..71830ba7b98 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -226,6 +226,25 @@ config HAVE_MLOCKED_PAGE_BIT
 config MMU_NOTIFIER
 	bool
 
+config DEFAULT_MMAP_MIN_ADDR
+        int "Low address space to protect from user allocation"
+        default 4096
+        help
+	  This is the portion of low virtual memory which should be protected
+	  from userspace allocation.  Keeping a user from writing to low pages
+	  can help reduce the impact of kernel NULL pointer bugs.
+
+	  For most ia64, ppc64 and x86 users with lots of address space
+	  a value of 65536 is reasonable and should cause no problems.
+	  On arm and other archs it should not be higher than 32768.
+	  Programs which use vm86 functionality would either need additional
+	  permissions from either the LSM or the capabilities module or have
+	  this protection disabled.
+
+	  This value can be changed after boot using the
+	  /proc/sys/vm/mmap_min_addr tunable.
+
+
 config NOMMU_INITIAL_TRIM_EXCESS
 	int "Turn on mmap() excess space trimming before booting"
 	depends on !MMU
diff --git a/mm/mmap.c b/mm/mmap.c
index 6b7b1a95944..2b43fa1aa3c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -87,6 +87,9 @@ int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
+/* amount of vm to protect from userspace access */
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/security/Kconfig b/security/Kconfig
index bb244774e9d..d23c839038f 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -110,28 +110,8 @@ config SECURITY_ROOTPLUG
 
 	  See <http://www.linuxjournal.com/article.php?sid=6279> for
 	  more information about this module.
-	  
-	  If you are unsure how to answer this question, answer N.
-
-config SECURITY_DEFAULT_MMAP_MIN_ADDR
-        int "Low address space to protect from user allocation"
-        depends on SECURITY
-        default 0
-        help
-	  This is the portion of low virtual memory which should be protected
-	  from userspace allocation.  Keeping a user from writing to low pages
-	  can help reduce the impact of kernel NULL pointer bugs.
-
-	  For most ia64, ppc64 and x86 users with lots of address space
-	  a value of 65536 is reasonable and should cause no problems.
-	  On arm and other archs it should not be higher than 32768.
-	  Programs which use vm86 functionality would either need additional
-	  permissions from either the LSM or the capabilities module or have
-	  this protection disabled.
-
-	  This value can be changed after boot using the
-	  /proc/sys/vm/mmap_min_addr tunable.
 
+	  If you are unsure how to answer this question, answer N.
 
 source security/selinux/Kconfig
 source security/smack/Kconfig
diff --git a/security/security.c b/security/security.c
index 5284255c5cd..dc7674fbfc7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -26,9 +26,6 @@ extern void security_fixup_ops(struct security_operations *ops);
 
 struct security_operations *security_ops;	/* Initialized to NULL */
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR;
-
 static inline int verify(struct security_operations *ops)
 {
 	/* verify the security_operations structure exists */
-- 
cgit v1.2.3-70-g09d2


From 278b2513f76161a9cf1ebddd620dc9d1714fe573 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 3 Jun 2009 21:20:51 -0700
Subject: gso: Stop fraglists from escaping

As it stands skb fraglists can get past the check in dev_queue_xmit
if the skb is marked as GSO.  In particular, if the packet doesn't
have the proper checksums for GSO, but can otherwise be handled by
the underlying device, we will not perform the fraglist check on it
at all.

If the underlying device cannot handle fraglists, then this will
break.

The fix is as simple as moving the fraglist check from the device
check into skb_gso_ok.

This has caused crashes with Xen when used together with GRO which
can generate GSO packets with fraglists.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bbfabf3012b..2a801380b50 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1903,15 +1903,14 @@ static inline int net_gso_ok(int features, int gso_type)
 
 static inline int skb_gso_ok(struct sk_buff *skb, int features)
 {
-	return net_gso_ok(features, skb_shinfo(skb)->gso_type);
+	return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
+	       (!skb_shinfo(skb)->frag_list || (features & NETIF_F_FRAGLIST));
 }
 
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	return skb_is_gso(skb) &&
 	       (!skb_gso_ok(skb, dev->features) ||
-	        (skb_shinfo(skb)->frag_list &&
-	         !(dev->features & NETIF_F_FRAGLIST)) ||
 		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
-- 
cgit v1.2.3-70-g09d2


From a5e78820966e17c2316866e00047e4e7e5480f04 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Thu, 4 Jun 2009 16:54:42 +0200
Subject: netfilter: x_tables: added hook number into match extension parameter
 structure.

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 6 ++++--
 net/bridge/netfilter/ebtables.c    | 2 +-
 net/ipv4/netfilter/ip_tables.c     | 2 +-
 net/ipv6/netfilter/ip6_tables.c    | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c9efe039dc5..1030b759389 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -184,9 +184,10 @@ struct xt_counters_info
  * @matchinfo:	per-match data
  * @fragoff:	packet is a fragment, this is the data offset
  * @thoff:	position of transport header relative to skb->data
- * @hotdrop:	drop packet if we had inspection problems
+ * @hook:	hook number given packet came from
  * @family:	Actual NFPROTO_* through which the function is invoked
  * 		(helpful when match->family == NFPROTO_UNSPEC)
+ * @hotdrop:	drop packet if we had inspection problems
  */
 struct xt_match_param {
 	const struct net_device *in, *out;
@@ -194,8 +195,9 @@ struct xt_match_param {
 	const void *matchinfo;
 	int fragoff;
 	unsigned int thoff;
-	bool *hotdrop;
+	unsigned int hooknum;
 	u_int8_t family;
+	bool *hotdrop;
 };
 
 /**
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 24555834d43..37928d5f284 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -170,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	mtpar.in      = tgpar.in  = in;
 	mtpar.out     = tgpar.out = out;
 	mtpar.hotdrop = &hotdrop;
-	tgpar.hooknum = hook;
+	mtpar.hooknum = tgpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7b35c0b3841..5bf7c3f185d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -343,7 +343,7 @@ ipt_do_table(struct sk_buff *skb,
 	mtpar.in      = tgpar.in  = in;
 	mtpar.out     = tgpar.out = out;
 	mtpar.family  = tgpar.family = NFPROTO_IPV4;
-	tgpar.hooknum = hook;
+	mtpar.hooknum = tgpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 5164e0bf3bc..ced1f2c0cb6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
 	mtpar.in      = tgpar.in  = in;
 	mtpar.out     = tgpar.out = out;
 	mtpar.family  = tgpar.family = NFPROTO_IPV6;
-	tgpar.hooknum = hook;
+	mtpar.hooknum = tgpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
-- 
cgit v1.2.3-70-g09d2


From 60313ebed739b331e8e61079da27a11ee3b73a30 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 16:53:44 +0200
Subject: perf_counter: Add fork event

Create a fork event so that we can easily clone the comm and
dso maps without having to generate all those events.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  10 ++++
 kernel/fork.c                |   4 +-
 kernel/perf_counter.c        | 131 +++++++++++++++++++++++++++++++++++++------
 3 files changed, 126 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 37d5541d74c..380247bdb91 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -276,6 +276,14 @@ enum perf_event_type {
 	PERF_EVENT_THROTTLE		= 5,
 	PERF_EVENT_UNTHROTTLE		= 6,
 
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u32				pid, ppid;
+	 * };
+	 */
+	PERF_EVENT_FORK			= 7,
+
 	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_RECORD_*
@@ -618,6 +626,7 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len,
 				unsigned long pgoff, struct file *file);
 
 extern void perf_counter_comm(struct task_struct *tsk);
+extern void perf_counter_fork(struct task_struct *tsk);
 
 extern void perf_counter_task_migration(struct task_struct *task, int cpu);
 
@@ -673,6 +682,7 @@ perf_counter_munmap(unsigned long addr, unsigned long len,
 		    unsigned long pgoff, struct file *file)		{ }
 
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
+static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
 static inline void perf_counter_task_migration(struct task_struct *task,
 					       int cpu)			{ }
diff --git a/kernel/fork.c b/kernel/fork.c
index b7d7a9f0bd7..f4466ca37ec 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1412,12 +1412,12 @@ long do_fork(unsigned long clone_flags,
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
-		} else {
+		} else if (!(clone_flags & CLONE_VM)) {
 			/*
 			 * vfork will do an exec which will call
 			 * set_task_comm()
 			 */
-			perf_counter_comm(p);
+			perf_counter_fork(p);
 		}
 
 		audit_finish_fork(p);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0bb03f15a5b..78c58623a0d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -40,9 +40,9 @@ static int perf_reserved_percpu __read_mostly;
 static int perf_overcommit __read_mostly = 1;
 
 static atomic_t nr_counters __read_mostly;
-static atomic_t nr_mmap_tracking __read_mostly;
-static atomic_t nr_munmap_tracking __read_mostly;
-static atomic_t nr_comm_tracking __read_mostly;
+static atomic_t nr_mmap_counters __read_mostly;
+static atomic_t nr_munmap_counters __read_mostly;
+static atomic_t nr_comm_counters __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
@@ -1447,11 +1447,11 @@ static void free_counter(struct perf_counter *counter)
 
 	atomic_dec(&nr_counters);
 	if (counter->attr.mmap)
-		atomic_dec(&nr_mmap_tracking);
+		atomic_dec(&nr_mmap_counters);
 	if (counter->attr.munmap)
-		atomic_dec(&nr_munmap_tracking);
+		atomic_dec(&nr_munmap_counters);
 	if (counter->attr.comm)
-		atomic_dec(&nr_comm_tracking);
+		atomic_dec(&nr_comm_counters);
 
 	if (counter->destroy)
 		counter->destroy(counter);
@@ -2475,6 +2475,105 @@ static void perf_counter_output(struct perf_counter *counter,
 	perf_output_end(&handle);
 }
 
+/*
+ * fork tracking
+ */
+
+struct perf_fork_event {
+	struct task_struct	*task;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				ppid;
+	} event;
+};
+
+static void perf_counter_fork_output(struct perf_counter *counter,
+				     struct perf_fork_event *fork_event)
+{
+	struct perf_output_handle handle;
+	int size = fork_event->event.header.size;
+	struct task_struct *task = fork_event->task;
+	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+
+	if (ret)
+		return;
+
+	fork_event->event.pid = perf_counter_pid(counter, task);
+	fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+
+	perf_output_put(&handle, fork_event->event);
+	perf_output_end(&handle);
+}
+
+static int perf_counter_fork_match(struct perf_counter *counter)
+{
+	if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
+				  struct perf_fork_event *fork_event)
+{
+	struct perf_counter *counter;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
+		if (perf_counter_fork_match(counter))
+			perf_counter_fork_output(counter, fork_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_counter_fork_event(struct perf_fork_event *fork_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_counter_context *ctx;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_counter_ctxp);
+	if (ctx)
+		perf_counter_fork_ctx(ctx, fork_event);
+	rcu_read_unlock();
+}
+
+void perf_counter_fork(struct task_struct *task)
+{
+	struct perf_fork_event fork_event;
+
+	if (!atomic_read(&nr_comm_counters) &&
+	    !atomic_read(&nr_mmap_counters) &&
+	    !atomic_read(&nr_munmap_counters))
+		return;
+
+	fork_event = (struct perf_fork_event){
+		.task	= task,
+		.event  = {
+			.header = {
+				.type = PERF_EVENT_FORK,
+				.size = sizeof(fork_event.event),
+			},
+		},
+	};
+
+	perf_counter_fork_event(&fork_event);
+}
+
 /*
  * comm tracking
  */
@@ -2511,11 +2610,9 @@ static void perf_counter_comm_output(struct perf_counter *counter,
 	perf_output_end(&handle);
 }
 
-static int perf_counter_comm_match(struct perf_counter *counter,
-				   struct perf_comm_event *comm_event)
+static int perf_counter_comm_match(struct perf_counter *counter)
 {
-	if (counter->attr.comm &&
-	    comm_event->event.header.type == PERF_EVENT_COMM)
+	if (counter->attr.comm)
 		return 1;
 
 	return 0;
@@ -2531,7 +2628,7 @@ static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_comm_match(counter, comm_event))
+		if (perf_counter_comm_match(counter))
 			perf_counter_comm_output(counter, comm_event);
 	}
 	rcu_read_unlock();
@@ -2570,7 +2667,7 @@ void perf_counter_comm(struct task_struct *task)
 {
 	struct perf_comm_event comm_event;
 
-	if (!atomic_read(&nr_comm_tracking))
+	if (!atomic_read(&nr_comm_counters))
 		return;
 
 	comm_event = (struct perf_comm_event){
@@ -2708,7 +2805,7 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 {
 	struct perf_mmap_event mmap_event;
 
-	if (!atomic_read(&nr_mmap_tracking))
+	if (!atomic_read(&nr_mmap_counters))
 		return;
 
 	mmap_event = (struct perf_mmap_event){
@@ -2729,7 +2826,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
 {
 	struct perf_mmap_event mmap_event;
 
-	if (!atomic_read(&nr_munmap_tracking))
+	if (!atomic_read(&nr_munmap_counters))
 		return;
 
 	mmap_event = (struct perf_mmap_event){
@@ -3427,11 +3524,11 @@ done:
 
 	atomic_inc(&nr_counters);
 	if (counter->attr.mmap)
-		atomic_inc(&nr_mmap_tracking);
+		atomic_inc(&nr_mmap_counters);
 	if (counter->attr.munmap)
-		atomic_inc(&nr_munmap_tracking);
+		atomic_inc(&nr_munmap_counters);
 	if (counter->attr.comm)
-		atomic_inc(&nr_comm_tracking);
+		atomic_inc(&nr_comm_counters);
 
 	return counter;
 }
-- 
cgit v1.2.3-70-g09d2


From d99e9446200c1ffab28cb0e39b76c34a2bfafd06 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Jun 2009 17:08:58 +0200
Subject: perf_counter: Remove munmap stuff

In name of keeping it simple, only track mmap events. Userspace
will have to remove old overlapping maps when it encounters them.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 11 +----------
 kernel/perf_counter.c        | 38 +++-----------------------------------
 mm/mmap.c                    |  6 ------
 3 files changed, 4 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 380247bdb91..6ca403acd41 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -148,11 +148,10 @@ struct perf_counter_attr {
 				exclude_hv     :  1, /* ditto hypervisor      */
 				exclude_idle   :  1, /* don't count when idle */
 				mmap           :  1, /* include mmap data     */
-				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 53;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
@@ -246,7 +245,6 @@ enum perf_event_type {
 	 * };
 	 */
 	PERF_EVENT_MMAP			= 1,
-	PERF_EVENT_MUNMAP		= 2,
 
 	/*
 	 * struct {
@@ -622,9 +620,6 @@ extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 extern void perf_counter_mmap(unsigned long addr, unsigned long len,
 			      unsigned long pgoff, struct file *file);
 
-extern void perf_counter_munmap(unsigned long addr, unsigned long len,
-				unsigned long pgoff, struct file *file);
-
 extern void perf_counter_comm(struct task_struct *tsk);
 extern void perf_counter_fork(struct task_struct *tsk);
 
@@ -677,10 +672,6 @@ static inline void
 perf_counter_mmap(unsigned long addr, unsigned long len,
 		  unsigned long pgoff, struct file *file)		{ }
 
-static inline void
-perf_counter_munmap(unsigned long addr, unsigned long len,
-		    unsigned long pgoff, struct file *file)		{ }
-
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 78c58623a0d..195712e20d0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -41,7 +41,6 @@ static int perf_overcommit __read_mostly = 1;
 
 static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_counters __read_mostly;
-static atomic_t nr_munmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
@@ -1448,8 +1447,6 @@ static void free_counter(struct perf_counter *counter)
 	atomic_dec(&nr_counters);
 	if (counter->attr.mmap)
 		atomic_dec(&nr_mmap_counters);
-	if (counter->attr.munmap)
-		atomic_dec(&nr_munmap_counters);
 	if (counter->attr.comm)
 		atomic_dec(&nr_comm_counters);
 
@@ -2510,7 +2507,7 @@ static void perf_counter_fork_output(struct perf_counter *counter,
 
 static int perf_counter_fork_match(struct perf_counter *counter)
 {
-	if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap)
+	if (counter->attr.comm || counter->attr.mmap)
 		return 1;
 
 	return 0;
@@ -2557,8 +2554,7 @@ void perf_counter_fork(struct task_struct *task)
 	struct perf_fork_event fork_event;
 
 	if (!atomic_read(&nr_comm_counters) &&
-	    !atomic_read(&nr_mmap_counters) &&
-	    !atomic_read(&nr_munmap_counters))
+	    !atomic_read(&nr_mmap_counters))
 		return;
 
 	fork_event = (struct perf_fork_event){
@@ -2722,12 +2718,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter,
 static int perf_counter_mmap_match(struct perf_counter *counter,
 				   struct perf_mmap_event *mmap_event)
 {
-	if (counter->attr.mmap &&
-	    mmap_event->event.header.type == PERF_EVENT_MMAP)
-		return 1;
-
-	if (counter->attr.munmap &&
-	    mmap_event->event.header.type == PERF_EVENT_MUNMAP)
+	if (counter->attr.mmap)
 		return 1;
 
 	return 0;
@@ -2821,27 +2812,6 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 	perf_counter_mmap_event(&mmap_event);
 }
 
-void perf_counter_munmap(unsigned long addr, unsigned long len,
-			 unsigned long pgoff, struct file *file)
-{
-	struct perf_mmap_event mmap_event;
-
-	if (!atomic_read(&nr_munmap_counters))
-		return;
-
-	mmap_event = (struct perf_mmap_event){
-		.file   = file,
-		.event  = {
-			.header = { .type = PERF_EVENT_MUNMAP, },
-			.start  = addr,
-			.len    = len,
-			.pgoff  = pgoff,
-		},
-	};
-
-	perf_counter_mmap_event(&mmap_event);
-}
-
 /*
  * Log sample_period changes so that analyzing tools can re-normalize the
  * event flow.
@@ -3525,8 +3495,6 @@ done:
 	atomic_inc(&nr_counters);
 	if (counter->attr.mmap)
 		atomic_inc(&nr_mmap_counters);
-	if (counter->attr.munmap)
-		atomic_inc(&nr_munmap_counters);
 	if (counter->attr.comm)
 		atomic_inc(&nr_comm_counters);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 2c1c2cb0e2e..6451ce2854b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1756,12 +1756,6 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 	do {
 		long nrpages = vma_pages(vma);
 
-		if (vma->vm_flags & VM_EXEC) {
-			perf_counter_munmap(vma->vm_start,
-					nrpages << PAGE_SHIFT,
-					vma->vm_pgoff, vma->vm_file);
-		}
-
 		mm->total_vm -= nrpages;
 		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
 		vma = remove_vma(vma);
-- 
cgit v1.2.3-70-g09d2


From 5926a295bb78272b3f648f62febecd19a1b6a6ca Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Thu, 4 Jun 2009 17:43:04 +0100
Subject: [ARM] 5541/1: serial/amba-pl011.c: add support for the modified port
 found in Nomadik

The Nomadik 8815 SoC has a slightly modified version of the PL011 block.
The patch uses the different ID value as a key to select a vendor
structure that is used to keep track of the differences, as suggested
by Russell King.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Acked-by: Andrea Gallo <andrea.gallo@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/serial/amba-pl011.c | 30 +++++++++++++++++++++++++++---
 include/linux/amba/serial.h |  3 +++
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 4cfa1eb2689..8c5bda27736 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -70,6 +70,23 @@ struct uart_amba_port {
 	struct clk		*clk;
 	unsigned int		im;	/* interrupt mask */
 	unsigned int		old_status;
+	unsigned int		ifls;	/* vendor-specific */
+};
+
+/* There is by now at least one vendor with differing details, so handle it */
+struct vendor_data {
+	unsigned int		ifls;
+	unsigned int		fifosize;
+};
+
+static struct vendor_data vendor_arm = {
+	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
+	.fifosize		= 16,
+};
+
+static struct vendor_data vendor_st = {
+	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
+	.fifosize		= 64,
 };
 
 static void pl011_stop_tx(struct uart_port *port)
@@ -360,8 +377,7 @@ static int pl011_startup(struct uart_port *port)
 	if (retval)
 		goto clk_dis;
 
-	writew(UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
-	       uap->port.membase + UART011_IFLS);
+	writew(uap->ifls, uap->port.membase + UART011_IFLS);
 
 	/*
 	 * Provoke TX FIFO interrupt into asserting.
@@ -732,6 +748,7 @@ static struct uart_driver amba_reg = {
 static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 {
 	struct uart_amba_port *uap;
+	struct vendor_data *vendor = id->data;
 	void __iomem *base;
 	int i, ret;
 
@@ -762,12 +779,13 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 		goto unmap;
 	}
 
+	uap->ifls = vendor->ifls;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
 	uap->port.iotype = UPIO_MEM;
 	uap->port.irq = dev->irq[0];
-	uap->port.fifosize = 16;
+	uap->port.fifosize = vendor->fifosize;
 	uap->port.ops = &amba_pl011_pops;
 	uap->port.flags = UPF_BOOT_AUTOCONF;
 	uap->port.line = i;
@@ -812,6 +830,12 @@ static struct amba_id pl011_ids[] __initdata = {
 	{
 		.id	= 0x00041011,
 		.mask	= 0x000fffff,
+		.data	= &vendor_arm,
+	},
+	{
+		.id	= 0x00380802,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_st,
 	},
 	{ 0, 0 },
 };
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 48ee32a18ac..42949e210cb 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -114,6 +114,9 @@
 #define UART011_IFLS_TX4_8	(2 << 0)
 #define UART011_IFLS_TX6_8	(3 << 0)
 #define UART011_IFLS_TX7_8	(4 << 0)
+/* special values for ST vendor with deeper fifo */
+#define UART011_IFLS_RX_HALF	(5 << 3)
+#define UART011_IFLS_TX_HALF	(5 << 0)
 
 #define UART011_OEIM		(1 << 10)	/* overrun error interrupt mask */
 #define UART011_BEIM		(1 << 9)	/* break error interrupt mask */
-- 
cgit v1.2.3-70-g09d2


From c0683039207226afcffbe0fbf6a1caaee77a37b0 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 3 Jun 2009 17:43:14 +0100
Subject: [ARM] 5536/1: Move clk_add_alias() to arch/arm/common/clkdev.c

This can be used for other arm platforms too as discussed
on the linux-arm-kernel list.

Also check the return value with IS_ERR and return PTR_ERR
as suggested by Russell King.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/clkdev.c  | 18 ++++++++++++++++++
 arch/arm/mach-pxa/clock.c | 17 -----------------
 include/linux/clk.h       | 13 +++++++++++++
 3 files changed, 31 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c
index 5589444ff43..f37afd9422f 100644
--- a/arch/arm/common/clkdev.c
+++ b/arch/arm/common/clkdev.c
@@ -135,6 +135,24 @@ struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
 }
 EXPORT_SYMBOL(clkdev_alloc);
 
+int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
+	struct device *dev)
+{
+	struct clk *r = clk_get(dev, id);
+	struct clk_lookup *l;
+
+	if (IS_ERR(r))
+		return PTR_ERR(r);
+
+	l = clkdev_alloc(r, alias, alias_dev_name);
+	clk_put(r);
+	if (!l)
+		return -ENODEV;
+	clkdev_add(l);
+	return 0;
+}
+EXPORT_SYMBOL(clk_add_alias);
+
 /*
  * clkdev_drop - remove a clock dynamically allocated
  */
diff --git a/arch/arm/mach-pxa/clock.c b/arch/arm/mach-pxa/clock.c
index db52d2c4791..49ae3829231 100644
--- a/arch/arm/mach-pxa/clock.c
+++ b/arch/arm/mach-pxa/clock.c
@@ -86,20 +86,3 @@ void clks_register(struct clk_lookup *clks, size_t num)
 	for (i = 0; i < num; i++)
 		clkdev_add(&clks[i]);
 }
-
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-	struct device *dev)
-{
-	struct clk *r = clk_get(dev, id);
-	struct clk_lookup *l;
-
-	if (!r)
-		return -ENODEV;
-
-	l = clkdev_alloc(r, alias, alias_dev_name);
-	clk_put(r);
-	if (!l)
-		return -ENODEV;
-	clkdev_add(l);
-	return 0;
-}
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 1db9bbf444a..1d37f42ac29 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -142,4 +142,17 @@ struct clk *clk_get_parent(struct clk *clk);
  */
 struct clk *clk_get_sys(const char *dev_id, const char *con_id);
 
+/**
+ * clk_add_alias - add a new clock alias
+ * @alias: name for clock alias
+ * @alias_dev_name: device name
+ * @id: platform specific clock name
+ * @dev: device
+ *
+ * Allows using generic clock names for drivers by adding a new alias.
+ * Assumes clkdev, see clkdev.h for more info.
+ */
+int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
+			struct device *dev);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 10662aa3083f869c645cc2abf5d66849001e2f5d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 5 Jun 2009 13:24:24 +0200
Subject: netfilter: xt_NFQUEUE: queue balancing support

Adds support for specifying a range of queues instead of a single queue
id. Flows will be distributed across the given range.

This is useful for multicore systems: Instead of having a single
application read packets from a queue, start multiple
instances on queues x, x+1, .. x+n. Each instance can process
flows independently.

Packets for the same connection are put into the same queue.

Signed-off-by: Holger Eitzenberger <heitzenberger@astaro.com>
Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_NFQUEUE.h |  5 ++
 net/netfilter/xt_NFQUEUE.c           | 93 ++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h
index 982a89f7827..2584f4a777d 100644
--- a/include/linux/netfilter/xt_NFQUEUE.h
+++ b/include/linux/netfilter/xt_NFQUEUE.h
@@ -15,4 +15,9 @@ struct xt_NFQ_info {
 	__u16 queuenum;
 };
 
+struct xt_NFQ_info_v1 {
+	__u16 queuenum;
+	__u16 queues_total;
+};
+
 #endif /* _XT_NFQ_TARGET_H */
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 6e0f84d4d05..498b45101df 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -11,6 +11,10 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
 #include <linux/netfilter.h>
 #include <linux/netfilter_arp.h>
 #include <linux/netfilter/x_tables.h>
@@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE");
 MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
+static u32 jhash_initval __read_mostly;
+
 static unsigned int
 nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
@@ -31,6 +37,72 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
 
+static u32 hash_v4(const struct sk_buff *skb)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	u32 ipaddr;
+
+	/* packets in either direction go into same queue */
+	ipaddr = iph->saddr ^ iph->daddr;
+
+	return jhash_2words(ipaddr, iph->protocol, jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	u32 queue = info->queuenum;
+
+	if (info->queues_total > 1)
+		queue = hash_v4(skb) % info->queues_total + queue;
+	return NF_QUEUE_NR(queue);
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static u32 hash_v6(const struct sk_buff *skb)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	u32 addr[4];
+
+	addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
+	addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
+	addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
+	addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
+
+	return jhash2(addr, ARRAY_SIZE(addr), jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	u32 queue = info->queuenum;
+
+	if (info->queues_total > 1)
+		queue = hash_v6(skb) % info->queues_total + queue;
+	return NF_QUEUE_NR(queue);
+}
+#endif
+
+static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_NFQ_info_v1 *info = par->targinfo;
+	u32 maxid;
+
+	if (info->queues_total == 0) {
+		pr_err("NFQUEUE: number of total queues is 0\n");
+		return false;
+	}
+	maxid = info->queues_total - 1 + info->queuenum;
+	if (maxid > 0xffff) {
+		pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
+		       info->queues_total, maxid);
+		return false;
+	}
+	return true;
+}
+
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
@@ -39,10 +111,31 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_NFQ_info),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.checkentry	= nfqueue_tg_v1_check,
+		.target		= nfqueue_tg4_v1,
+		.targetsize	= sizeof(struct xt_NFQ_info_v1),
+		.me		= THIS_MODULE,
+	},
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	{
+		.name		= "NFQUEUE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV6,
+		.checkentry	= nfqueue_tg_v1_check,
+		.target		= nfqueue_tg6_v1,
+		.targetsize	= sizeof(struct xt_NFQ_info_v1),
+		.me		= THIS_MODULE,
+	},
+#endif
 };
 
 static int __init nfqueue_tg_init(void)
 {
+	get_random_bytes(&jhash_initval, sizeof(jhash_initval));
 	return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
 }
 
-- 
cgit v1.2.3-70-g09d2


From 089dd79db9264dc0da602bad45d42f1b3e7d1e07 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 14:04:55 +0200
Subject: perf_counter: Generate mmap events for install_special_mapping()

In order to track the vdso also generate mmap events for
install_special_mapping().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 14 ++++++++------
 kernel/perf_counter.c        | 34 ++++++++++++++++++++++------------
 mm/mmap.c                    |  5 +++--
 3 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6ca403acd41..40dc0e273d9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -617,8 +617,13 @@ static inline int is_software_counter(struct perf_counter *counter)
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 
-extern void perf_counter_mmap(unsigned long addr, unsigned long len,
-			      unsigned long pgoff, struct file *file);
+extern void __perf_counter_mmap(struct vm_area_struct *vma);
+
+static inline void perf_counter_mmap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__perf_counter_mmap(vma);
+}
 
 extern void perf_counter_comm(struct task_struct *tsk);
 extern void perf_counter_fork(struct task_struct *tsk);
@@ -668,10 +673,7 @@ static inline void
 perf_swcounter_event(u32 event, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 
-static inline void
-perf_counter_mmap(unsigned long addr, unsigned long len,
-		  unsigned long pgoff, struct file *file)		{ }
-
+static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a5d3e2aedd2..37a5a241ca7 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2255,7 +2255,7 @@ out:
 }
 
 static void perf_output_copy(struct perf_output_handle *handle,
-			     void *buf, unsigned int len)
+			     const void *buf, unsigned int len)
 {
 	unsigned int pages_mask;
 	unsigned int offset;
@@ -2681,9 +2681,10 @@ void perf_counter_comm(struct task_struct *task)
  */
 
 struct perf_mmap_event {
-	struct file	*file;
-	char		*file_name;
-	int		file_size;
+	struct vm_area_struct	*vma;
+
+	const char		*file_name;
+	int			file_size;
 
 	struct {
 		struct perf_event_header	header;
@@ -2744,11 +2745,12 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
-	struct file *file = mmap_event->file;
+	struct vm_area_struct *vma = mmap_event->vma;
+	struct file *file = vma->vm_file;
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
-	char *name;
+	const char *name;
 
 	if (file) {
 		buf = kzalloc(PATH_MAX, GFP_KERNEL);
@@ -2762,6 +2764,15 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 			goto got_name;
 		}
 	} else {
+		name = arch_vma_name(mmap_event->vma);
+		if (name)
+			goto got_name;
+
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		}
+
 		name = strncpy(tmp, "//anon", sizeof(tmp));
 		goto got_name;
 	}
@@ -2791,8 +2802,7 @@ got_name:
 	kfree(buf);
 }
 
-void perf_counter_mmap(unsigned long addr, unsigned long len,
-		       unsigned long pgoff, struct file *file)
+void __perf_counter_mmap(struct vm_area_struct *vma)
 {
 	struct perf_mmap_event mmap_event;
 
@@ -2800,12 +2810,12 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
 		return;
 
 	mmap_event = (struct perf_mmap_event){
-		.file   = file,
+		.vma	= vma,
 		.event  = {
 			.header = { .type = PERF_EVENT_MMAP, },
-			.start  = addr,
-			.len    = len,
-			.pgoff  = pgoff,
+			.start  = vma->vm_start,
+			.len    = vma->vm_end - vma->vm_start,
+			.pgoff  = vma->vm_pgoff,
 		},
 	};
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 6451ce2854b..8101de490c7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1220,8 +1220,7 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
-	if (vm_flags & VM_EXEC)
-		perf_counter_mmap(addr, len, pgoff, file);
+	perf_counter_mmap(vma);
 
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
@@ -2309,6 +2308,8 @@ int install_special_mapping(struct mm_struct *mm,
 
 	mm->total_vm += len >> PAGE_SHIFT;
 
+	perf_counter_mmap(vma);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 77c9a5daa9c4d9b37812c9c69c7bcbb3f9399c3c Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Fri, 5 Jun 2009 16:26:18 +0200
Subject: firewire: reorganize header files

The three header files of firewire-core, i.e.
 "drivers/firewire/fw-device.h",
 "drivers/firewire/fw-topology.h",
 "drivers/firewire/fw-transaction.h",
are replaced by
 "drivers/firewire/core.h",
 "include/linux/firewire.h".

The latter includes everything which a firewire high-level driver (like
firewire-sbp2) needs besides linux/firewire-constants.h, while core.h
contains the rest which is needed by firewire-core itself and by low-
level drivers (card drivers) like firewire-ohci.

High-level drivers can now also reside outside of drivers/firewire
without having to add drivers/firewire to the header file search path in
makefiles.  At least the firedtv driver will be such a driver.

I also considered to spread the contents of core.h over several files,
one for each .c file where the respective implementation resides.  But
it turned out that most core .c files will end up including most of the
core .h files.  Also, the combined core.h isn't unreasonably big, and it
will lose more of its contents to linux/firewire.h anyway soon when more
firewire drivers are added.  (IP-over-1394, firedtv, and there are plans
for one or two more.)

Furthermore, fw-ohci.h is renamed to ohci.h.  The name of core.h and
ohci.h is chosen with regard to name changes of the .c files in a
follow-up change.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core.h           | 293 +++++++++++++++++++++++++
 drivers/firewire/fw-card.c        |   6 +-
 drivers/firewire/fw-cdev.c        |   5 +-
 drivers/firewire/fw-device.c      |   9 +-
 drivers/firewire/fw-device.h      | 190 ----------------
 drivers/firewire/fw-iso.c         |   4 +-
 drivers/firewire/fw-ohci.c        |   5 +-
 drivers/firewire/fw-ohci.h        | 157 --------------
 drivers/firewire/fw-sbp2.c        |   4 +-
 drivers/firewire/fw-topology.c    |  10 +-
 drivers/firewire/fw-topology.h    |  77 -------
 drivers/firewire/fw-transaction.c |   8 +-
 drivers/firewire/fw-transaction.h | 446 --------------------------------------
 drivers/firewire/ohci.h           | 157 ++++++++++++++
 include/linux/firewire.h          | 350 ++++++++++++++++++++++++++++++
 15 files changed, 830 insertions(+), 891 deletions(-)
 create mode 100644 drivers/firewire/core.h
 delete mode 100644 drivers/firewire/fw-device.h
 delete mode 100644 drivers/firewire/fw-ohci.h
 delete mode 100644 drivers/firewire/fw-topology.h
 delete mode 100644 drivers/firewire/fw-transaction.h
 create mode 100644 drivers/firewire/ohci.h
 create mode 100644 include/linux/firewire.h

(limited to 'include')

diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
new file mode 100644
index 00000000000..273f0ab8292
--- /dev/null
+++ b/drivers/firewire/core.h
@@ -0,0 +1,293 @@
+#ifndef _FIREWIRE_CORE_H
+#define _FIREWIRE_CORE_H
+
+#include <linux/dma-mapping.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/idr.h>
+#include <linux/mm_types.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+
+struct device;
+struct fw_card;
+struct fw_device;
+struct fw_iso_buffer;
+struct fw_iso_context;
+struct fw_iso_packet;
+struct fw_node;
+struct fw_packet;
+
+
+/* -card */
+
+/* bitfields within the PHY registers */
+#define PHY_LINK_ACTIVE		0x80
+#define PHY_CONTENDER		0x40
+#define PHY_BUS_RESET		0x40
+#define PHY_BUS_SHORT_RESET	0x40
+
+#define BANDWIDTH_AVAILABLE_INITIAL	4915
+#define BROADCAST_CHANNEL_INITIAL	(1 << 31 | 31)
+#define BROADCAST_CHANNEL_VALID		(1 << 30)
+
+struct fw_card_driver {
+	/*
+	 * Enable the given card with the given initial config rom.
+	 * This function is expected to activate the card, and either
+	 * enable the PHY or set the link_on bit and initiate a bus
+	 * reset.
+	 */
+	int (*enable)(struct fw_card *card, u32 *config_rom, size_t length);
+
+	int (*update_phy_reg)(struct fw_card *card, int address,
+			      int clear_bits, int set_bits);
+
+	/*
+	 * Update the config rom for an enabled card.  This function
+	 * should change the config rom that is presented on the bus
+	 * an initiate a bus reset.
+	 */
+	int (*set_config_rom)(struct fw_card *card,
+			      u32 *config_rom, size_t length);
+
+	void (*send_request)(struct fw_card *card, struct fw_packet *packet);
+	void (*send_response)(struct fw_card *card, struct fw_packet *packet);
+	/* Calling cancel is valid once a packet has been submitted. */
+	int (*cancel_packet)(struct fw_card *card, struct fw_packet *packet);
+
+	/*
+	 * Allow the specified node ID to do direct DMA out and in of
+	 * host memory.  The card will disable this for all node when
+	 * a bus reset happens, so driver need to reenable this after
+	 * bus reset.  Returns 0 on success, -ENODEV if the card
+	 * doesn't support this, -ESTALE if the generation doesn't
+	 * match.
+	 */
+	int (*enable_phys_dma)(struct fw_card *card,
+			       int node_id, int generation);
+
+	u64 (*get_bus_time)(struct fw_card *card);
+
+	struct fw_iso_context *
+	(*allocate_iso_context)(struct fw_card *card,
+				int type, int channel, size_t header_size);
+	void (*free_iso_context)(struct fw_iso_context *ctx);
+
+	int (*start_iso)(struct fw_iso_context *ctx,
+			 s32 cycle, u32 sync, u32 tags);
+
+	int (*queue_iso)(struct fw_iso_context *ctx,
+			 struct fw_iso_packet *packet,
+			 struct fw_iso_buffer *buffer,
+			 unsigned long payload);
+
+	int (*stop_iso)(struct fw_iso_context *ctx);
+};
+
+void fw_card_initialize(struct fw_card *card,
+		const struct fw_card_driver *driver, struct device *device);
+int fw_card_add(struct fw_card *card,
+		u32 max_receive, u32 link_speed, u64 guid);
+void fw_core_remove_card(struct fw_card *card);
+int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
+int fw_compute_block_crc(u32 *block);
+void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
+
+struct fw_descriptor {
+	struct list_head link;
+	size_t length;
+	u32 immediate;
+	u32 key;
+	const u32 *data;
+};
+
+int fw_core_add_descriptor(struct fw_descriptor *desc);
+void fw_core_remove_descriptor(struct fw_descriptor *desc);
+
+
+/* -cdev */
+
+extern const struct file_operations fw_device_ops;
+
+void fw_device_cdev_update(struct fw_device *device);
+void fw_device_cdev_remove(struct fw_device *device);
+
+
+/* -device */
+
+extern struct rw_semaphore fw_device_rwsem;
+extern struct idr fw_device_idr;
+extern int fw_cdev_major;
+
+struct fw_device *fw_device_get_by_devt(dev_t devt);
+void fw_device_set_broadcast_channel(struct fw_device *device, int generation);
+void fw_node_event(struct fw_card *card, struct fw_node *node, int event);
+
+
+/* -iso */
+
+/*
+ * The iso packet format allows for an immediate header/payload part
+ * stored in 'header' immediately after the packet info plus an
+ * indirect payload part that is pointer to by the 'payload' field.
+ * Applications can use one or the other or both to implement simple
+ * low-bandwidth streaming (e.g. audio) or more advanced
+ * scatter-gather streaming (e.g. assembling video frame automatically).
+ */
+struct fw_iso_packet {
+	u16 payload_length;	/* Length of indirect payload. */
+	u32 interrupt:1;	/* Generate interrupt on this packet */
+	u32 skip:1;		/* Set to not send packet at all. */
+	u32 tag:2;
+	u32 sy:4;
+	u32 header_length:8;	/* Length of immediate header. */
+	u32 header[0];
+};
+
+#define FW_ISO_CONTEXT_TRANSMIT	0
+#define FW_ISO_CONTEXT_RECEIVE	1
+
+#define FW_ISO_CONTEXT_MATCH_TAG0	 1
+#define FW_ISO_CONTEXT_MATCH_TAG1	 2
+#define FW_ISO_CONTEXT_MATCH_TAG2	 4
+#define FW_ISO_CONTEXT_MATCH_TAG3	 8
+#define FW_ISO_CONTEXT_MATCH_ALL_TAGS	15
+
+/*
+ * An iso buffer is just a set of pages mapped for DMA in the
+ * specified direction.  Since the pages are to be used for DMA, they
+ * are not mapped into the kernel virtual address space.  We store the
+ * DMA address in the page private. The helper function
+ * fw_iso_buffer_map() will map the pages into a given vma.
+ */
+struct fw_iso_buffer {
+	enum dma_data_direction direction;
+	struct page **pages;
+	int page_count;
+};
+
+typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
+				  u32 cycle, size_t header_length,
+				  void *header, void *data);
+
+struct fw_iso_context {
+	struct fw_card *card;
+	int type;
+	int channel;
+	int speed;
+	size_t header_size;
+	fw_iso_callback_t callback;
+	void *callback_data;
+};
+
+int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
+		       int page_count, enum dma_data_direction direction);
+int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma);
+void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card);
+
+struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
+		int type, int channel, int speed, size_t header_size,
+		fw_iso_callback_t callback, void *callback_data);
+int fw_iso_context_queue(struct fw_iso_context *ctx,
+			 struct fw_iso_packet *packet,
+			 struct fw_iso_buffer *buffer,
+			 unsigned long payload);
+int fw_iso_context_start(struct fw_iso_context *ctx,
+			 int cycle, int sync, int tags);
+int fw_iso_context_stop(struct fw_iso_context *ctx);
+void fw_iso_context_destroy(struct fw_iso_context *ctx);
+
+void fw_iso_resource_manage(struct fw_card *card, int generation,
+		u64 channels_mask, int *channel, int *bandwidth, bool allocate);
+
+
+/* -topology */
+
+enum {
+	FW_NODE_CREATED,
+	FW_NODE_UPDATED,
+	FW_NODE_DESTROYED,
+	FW_NODE_LINK_ON,
+	FW_NODE_LINK_OFF,
+	FW_NODE_INITIATED_RESET,
+};
+
+struct fw_node {
+	u16 node_id;
+	u8 color;
+	u8 port_count;
+	u8 link_on:1;
+	u8 initiated_reset:1;
+	u8 b_path:1;
+	u8 phy_speed:2;	/* As in the self ID packet. */
+	u8 max_speed:2;	/* Minimum of all phy-speeds on the path from the
+			 * local node to this node. */
+	u8 max_depth:4;	/* Maximum depth to any leaf node */
+	u8 max_hops:4;	/* Max hops in this sub tree */
+	atomic_t ref_count;
+
+	/* For serializing node topology into a list. */
+	struct list_head link;
+
+	/* Upper layer specific data. */
+	void *data;
+
+	struct fw_node *ports[0];
+};
+
+static inline struct fw_node *fw_node_get(struct fw_node *node)
+{
+	atomic_inc(&node->ref_count);
+
+	return node;
+}
+
+static inline void fw_node_put(struct fw_node *node)
+{
+	if (atomic_dec_and_test(&node->ref_count))
+		kfree(node);
+}
+
+void fw_core_handle_bus_reset(struct fw_card *card, int node_id,
+			      int generation, int self_id_count, u32 *self_ids);
+void fw_destroy_nodes(struct fw_card *card);
+
+/*
+ * Check whether new_generation is the immediate successor of old_generation.
+ * Take counter roll-over at 255 (as per OHCI) into account.
+ */
+static inline bool is_next_generation(int new_generation, int old_generation)
+{
+	return (new_generation & 0xff) == ((old_generation + 1) & 0xff);
+}
+
+
+/* -transaction */
+
+#define TCODE_IS_READ_REQUEST(tcode)	(((tcode) & ~1) == 4)
+#define TCODE_IS_BLOCK_PACKET(tcode)	(((tcode) &  1) != 0)
+#define TCODE_IS_REQUEST(tcode)		(((tcode) &  2) == 0)
+#define TCODE_IS_RESPONSE(tcode)	(((tcode) &  2) != 0)
+#define TCODE_HAS_REQUEST_DATA(tcode)	(((tcode) & 12) != 4)
+#define TCODE_HAS_RESPONSE_DATA(tcode)	(((tcode) & 12) != 0)
+
+#define LOCAL_BUS 0xffc0
+
+void fw_core_handle_request(struct fw_card *card, struct fw_packet *request);
+void fw_core_handle_response(struct fw_card *card, struct fw_packet *packet);
+void fw_fill_response(struct fw_packet *response, u32 *request_header,
+		      int rcode, void *payload, size_t length);
+void fw_flush_transactions(struct fw_card *card);
+void fw_send_phy_config(struct fw_card *card,
+			int node_id, int generation, int gap_count);
+
+static inline int fw_stream_packet_destination_id(int tag, int channel, int sy)
+{
+	return tag << 14 | channel << 8 | sy;
+}
+
+#endif /* _FIREWIRE_CORE_H */
diff --git a/drivers/firewire/fw-card.c b/drivers/firewire/fw-card.c
index b6f55e262e7..ba6cd70b851 100644
--- a/drivers/firewire/fw-card.c
+++ b/drivers/firewire/fw-card.c
@@ -21,6 +21,8 @@
 #include <linux/crc-itu-t.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/firewire.h>
+#include <linux/firewire-constants.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
@@ -34,9 +36,7 @@
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
 
-#include "fw-device.h"
-#include "fw-topology.h"
-#include "fw-transaction.h"
+#include "core.h"
 
 int fw_compute_block_crc(u32 *block)
 {
diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c
index 8a5e6ae2552..042c0454047 100644
--- a/drivers/firewire/fw-cdev.c
+++ b/drivers/firewire/fw-cdev.c
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/firewire.h>
 #include <linux/firewire-cdev.h>
 #include <linux/idr.h>
 #include <linux/jiffies.h>
@@ -41,9 +42,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-#include "fw-device.h"
-#include "fw-topology.h"
-#include "fw-transaction.h"
+#include "core.h"
 
 struct client {
 	u32 version;
diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c
index 238acac5bad..65d84dd6c1d 100644
--- a/drivers/firewire/fw-device.c
+++ b/drivers/firewire/fw-device.c
@@ -22,6 +22,8 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/firewire.h>
+#include <linux/firewire-constants.h>
 #include <linux/idr.h>
 #include <linux/jiffies.h>
 #include <linux/kobject.h>
@@ -39,9 +41,7 @@
 #include <asm/byteorder.h>
 #include <asm/system.h>
 
-#include "fw-device.h"
-#include "fw-topology.h"
-#include "fw-transaction.h"
+#include "core.h"
 
 void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 * p)
 {
@@ -94,8 +94,9 @@ static int fw_unit_match(struct device *dev, struct device_driver *drv)
 		return 0;
 
 	device = fw_device(unit->device.parent);
+	id = container_of(drv, struct fw_driver, driver)->id_table;
 
-	for (id = fw_driver(drv)->id_table; id->match_flags != 0; id++) {
+	for (; id->match_flags != 0; id++) {
 		if (match_unit_directory(unit->directory, id->match_flags, id))
 			return 1;
 
diff --git a/drivers/firewire/fw-device.h b/drivers/firewire/fw-device.h
deleted file mode 100644
index e973c4361f4..00000000000
--- a/drivers/firewire/fw-device.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (C) 2005-2006  Kristian Hoegsberg <krh@bitplanet.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#ifndef __fw_device_h
-#define __fw_device_h
-
-#include <linux/device.h>
-#include <linux/fs.h>
-#include <linux/idr.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/mod_devicetable.h>
-#include <linux/mutex.h>
-#include <linux/rwsem.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/workqueue.h>
-
-#include <asm/atomic.h>
-
-enum fw_device_state {
-	FW_DEVICE_INITIALIZING,
-	FW_DEVICE_RUNNING,
-	FW_DEVICE_GONE,
-	FW_DEVICE_SHUTDOWN,
-};
-
-struct fw_attribute_group {
-	struct attribute_group *groups[2];
-	struct attribute_group group;
-	struct attribute *attrs[12];
-};
-
-struct fw_node;
-struct fw_card;
-
-/*
- * Note, fw_device.generation always has to be read before fw_device.node_id.
- * Use SMP memory barriers to ensure this.  Otherwise requests will be sent
- * to an outdated node_id if the generation was updated in the meantime due
- * to a bus reset.
- *
- * Likewise, fw-core will take care to update .node_id before .generation so
- * that whenever fw_device.generation is current WRT the actual bus generation,
- * fw_device.node_id is guaranteed to be current too.
- *
- * The same applies to fw_device.card->node_id vs. fw_device.generation.
- *
- * fw_device.config_rom and fw_device.config_rom_length may be accessed during
- * the lifetime of any fw_unit belonging to the fw_device, before device_del()
- * was called on the last fw_unit.  Alternatively, they may be accessed while
- * holding fw_device_rwsem.
- */
-struct fw_device {
-	atomic_t state;
-	struct fw_node *node;
-	int node_id;
-	int generation;
-	unsigned max_speed;
-	struct fw_card *card;
-	struct device device;
-
-	struct mutex client_list_mutex;
-	struct list_head client_list;
-
-	u32 *config_rom;
-	size_t config_rom_length;
-	int config_rom_retries;
-	unsigned is_local:1;
-	unsigned cmc:1;
-	unsigned bc_implemented:2;
-
-	struct delayed_work work;
-	struct fw_attribute_group attribute_group;
-};
-
-static inline struct fw_device *fw_device(struct device *dev)
-{
-	return container_of(dev, struct fw_device, device);
-}
-
-static inline int fw_device_is_shutdown(struct fw_device *device)
-{
-	return atomic_read(&device->state) == FW_DEVICE_SHUTDOWN;
-}
-
-static inline struct fw_device *fw_device_get(struct fw_device *device)
-{
-	get_device(&device->device);
-
-	return device;
-}
-
-static inline void fw_device_put(struct fw_device *device)
-{
-	put_device(&device->device);
-}
-
-struct fw_device *fw_device_get_by_devt(dev_t devt);
-int fw_device_enable_phys_dma(struct fw_device *device);
-void fw_device_set_broadcast_channel(struct fw_device *device, int generation);
-
-void fw_device_cdev_update(struct fw_device *device);
-void fw_device_cdev_remove(struct fw_device *device);
-
-extern struct rw_semaphore fw_device_rwsem;
-extern struct idr fw_device_idr;
-extern int fw_cdev_major;
-
-/*
- * fw_unit.directory must not be accessed after device_del(&fw_unit.device).
- */
-struct fw_unit {
-	struct device device;
-	u32 *directory;
-	struct fw_attribute_group attribute_group;
-};
-
-static inline struct fw_unit *fw_unit(struct device *dev)
-{
-	return container_of(dev, struct fw_unit, device);
-}
-
-static inline struct fw_unit *fw_unit_get(struct fw_unit *unit)
-{
-	get_device(&unit->device);
-
-	return unit;
-}
-
-static inline void fw_unit_put(struct fw_unit *unit)
-{
-	put_device(&unit->device);
-}
-
-#define CSR_OFFSET	0x40
-#define CSR_LEAF	0x80
-#define CSR_DIRECTORY	0xc0
-
-#define CSR_DESCRIPTOR		0x01
-#define CSR_VENDOR		0x03
-#define CSR_HARDWARE_VERSION	0x04
-#define CSR_NODE_CAPABILITIES	0x0c
-#define CSR_UNIT		0x11
-#define CSR_SPECIFIER_ID	0x12
-#define CSR_VERSION		0x13
-#define CSR_DEPENDENT_INFO	0x14
-#define CSR_MODEL		0x17
-#define CSR_INSTANCE		0x18
-#define CSR_DIRECTORY_ID	0x20
-
-struct fw_csr_iterator {
-	u32 *p;
-	u32 *end;
-};
-
-void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
-int fw_csr_iterator_next(struct fw_csr_iterator *ci,
-			 int *key, int *value);
-
-struct fw_driver {
-	struct device_driver driver;
-	/* Called when the parent device sits through a bus reset. */
-	void (*update)(struct fw_unit *unit);
-	const struct ieee1394_device_id *id_table;
-};
-
-static inline struct fw_driver *fw_driver(struct device_driver *drv)
-{
-	return container_of(drv, struct fw_driver, driver);
-}
-
-extern const struct file_operations fw_device_ops;
-
-#endif /* __fw_device_h */
diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c
index 0ff3e9c42eb..28076c892d7 100644
--- a/drivers/firewire/fw-iso.c
+++ b/drivers/firewire/fw-iso.c
@@ -22,6 +22,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
+#include <linux/firewire.h>
 #include <linux/firewire-constants.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -30,8 +31,7 @@
 
 #include <asm/byteorder.h>
 
-#include "fw-topology.h"
-#include "fw-transaction.h"
+#include "core.h"
 
 /*
  * Isochronous DMA context management
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index d296d12909d..ecddd11b797 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/firewire.h>
 #include <linux/firewire-constants.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
@@ -45,8 +46,8 @@
 #include <asm/pmac_feature.h>
 #endif
 
-#include "fw-ohci.h"
-#include "fw-transaction.h"
+#include "core.h"
+#include "ohci.h"
 
 #define DESCRIPTOR_OUTPUT_MORE		0
 #define DESCRIPTOR_OUTPUT_LAST		(1 << 12)
diff --git a/drivers/firewire/fw-ohci.h b/drivers/firewire/fw-ohci.h
deleted file mode 100644
index a2fbb6240ca..00000000000
--- a/drivers/firewire/fw-ohci.h
+++ /dev/null
@@ -1,157 +0,0 @@
-#ifndef __fw_ohci_h
-#define __fw_ohci_h
-
-/* OHCI register map */
-
-#define OHCI1394_Version                      0x000
-#define OHCI1394_GUID_ROM                     0x004
-#define OHCI1394_ATRetries                    0x008
-#define OHCI1394_CSRData                      0x00C
-#define OHCI1394_CSRCompareData               0x010
-#define OHCI1394_CSRControl                   0x014
-#define OHCI1394_ConfigROMhdr                 0x018
-#define OHCI1394_BusID                        0x01C
-#define OHCI1394_BusOptions                   0x020
-#define OHCI1394_GUIDHi                       0x024
-#define OHCI1394_GUIDLo                       0x028
-#define OHCI1394_ConfigROMmap                 0x034
-#define OHCI1394_PostedWriteAddressLo         0x038
-#define OHCI1394_PostedWriteAddressHi         0x03C
-#define OHCI1394_VendorID                     0x040
-#define OHCI1394_HCControlSet                 0x050
-#define OHCI1394_HCControlClear               0x054
-#define  OHCI1394_HCControl_BIBimageValid	0x80000000
-#define  OHCI1394_HCControl_noByteSwapData	0x40000000
-#define  OHCI1394_HCControl_programPhyEnable	0x00800000
-#define  OHCI1394_HCControl_aPhyEnhanceEnable	0x00400000
-#define  OHCI1394_HCControl_LPS			0x00080000
-#define  OHCI1394_HCControl_postedWriteEnable	0x00040000
-#define  OHCI1394_HCControl_linkEnable		0x00020000
-#define  OHCI1394_HCControl_softReset		0x00010000
-#define OHCI1394_SelfIDBuffer                 0x064
-#define OHCI1394_SelfIDCount                  0x068
-#define  OHCI1394_SelfIDCount_selfIDError	0x80000000
-#define OHCI1394_IRMultiChanMaskHiSet         0x070
-#define OHCI1394_IRMultiChanMaskHiClear       0x074
-#define OHCI1394_IRMultiChanMaskLoSet         0x078
-#define OHCI1394_IRMultiChanMaskLoClear       0x07C
-#define OHCI1394_IntEventSet                  0x080
-#define OHCI1394_IntEventClear                0x084
-#define OHCI1394_IntMaskSet                   0x088
-#define OHCI1394_IntMaskClear                 0x08C
-#define OHCI1394_IsoXmitIntEventSet           0x090
-#define OHCI1394_IsoXmitIntEventClear         0x094
-#define OHCI1394_IsoXmitIntMaskSet            0x098
-#define OHCI1394_IsoXmitIntMaskClear          0x09C
-#define OHCI1394_IsoRecvIntEventSet           0x0A0
-#define OHCI1394_IsoRecvIntEventClear         0x0A4
-#define OHCI1394_IsoRecvIntMaskSet            0x0A8
-#define OHCI1394_IsoRecvIntMaskClear          0x0AC
-#define OHCI1394_InitialBandwidthAvailable    0x0B0
-#define OHCI1394_InitialChannelsAvailableHi   0x0B4
-#define OHCI1394_InitialChannelsAvailableLo   0x0B8
-#define OHCI1394_FairnessControl              0x0DC
-#define OHCI1394_LinkControlSet               0x0E0
-#define OHCI1394_LinkControlClear             0x0E4
-#define   OHCI1394_LinkControl_rcvSelfID	(1 << 9)
-#define   OHCI1394_LinkControl_rcvPhyPkt	(1 << 10)
-#define   OHCI1394_LinkControl_cycleTimerEnable	(1 << 20)
-#define   OHCI1394_LinkControl_cycleMaster	(1 << 21)
-#define   OHCI1394_LinkControl_cycleSource	(1 << 22)
-#define OHCI1394_NodeID                       0x0E8
-#define   OHCI1394_NodeID_idValid             0x80000000
-#define   OHCI1394_NodeID_nodeNumber          0x0000003f
-#define   OHCI1394_NodeID_busNumber           0x0000ffc0
-#define OHCI1394_PhyControl                   0x0EC
-#define   OHCI1394_PhyControl_Read(addr)	(((addr) << 8) | 0x00008000)
-#define   OHCI1394_PhyControl_ReadDone		0x80000000
-#define   OHCI1394_PhyControl_ReadData(r)	(((r) & 0x00ff0000) >> 16)
-#define   OHCI1394_PhyControl_Write(addr, data)	(((addr) << 8) | (data) | 0x00004000)
-#define   OHCI1394_PhyControl_WriteDone		0x00004000
-#define OHCI1394_IsochronousCycleTimer        0x0F0
-#define OHCI1394_AsReqFilterHiSet             0x100
-#define OHCI1394_AsReqFilterHiClear           0x104
-#define OHCI1394_AsReqFilterLoSet             0x108
-#define OHCI1394_AsReqFilterLoClear           0x10C
-#define OHCI1394_PhyReqFilterHiSet            0x110
-#define OHCI1394_PhyReqFilterHiClear          0x114
-#define OHCI1394_PhyReqFilterLoSet            0x118
-#define OHCI1394_PhyReqFilterLoClear          0x11C
-#define OHCI1394_PhyUpperBound                0x120
-
-#define OHCI1394_AsReqTrContextBase           0x180
-#define OHCI1394_AsReqTrContextControlSet     0x180
-#define OHCI1394_AsReqTrContextControlClear   0x184
-#define OHCI1394_AsReqTrCommandPtr            0x18C
-
-#define OHCI1394_AsRspTrContextBase           0x1A0
-#define OHCI1394_AsRspTrContextControlSet     0x1A0
-#define OHCI1394_AsRspTrContextControlClear   0x1A4
-#define OHCI1394_AsRspTrCommandPtr            0x1AC
-
-#define OHCI1394_AsReqRcvContextBase          0x1C0
-#define OHCI1394_AsReqRcvContextControlSet    0x1C0
-#define OHCI1394_AsReqRcvContextControlClear  0x1C4
-#define OHCI1394_AsReqRcvCommandPtr           0x1CC
-
-#define OHCI1394_AsRspRcvContextBase          0x1E0
-#define OHCI1394_AsRspRcvContextControlSet    0x1E0
-#define OHCI1394_AsRspRcvContextControlClear  0x1E4
-#define OHCI1394_AsRspRcvCommandPtr           0x1EC
-
-/* Isochronous transmit registers */
-#define OHCI1394_IsoXmitContextBase(n)           (0x200 + 16 * (n))
-#define OHCI1394_IsoXmitContextControlSet(n)     (0x200 + 16 * (n))
-#define OHCI1394_IsoXmitContextControlClear(n)   (0x204 + 16 * (n))
-#define OHCI1394_IsoXmitCommandPtr(n)            (0x20C + 16 * (n))
-
-/* Isochronous receive registers */
-#define OHCI1394_IsoRcvContextBase(n)         (0x400 + 32 * (n))
-#define OHCI1394_IsoRcvContextControlSet(n)   (0x400 + 32 * (n))
-#define OHCI1394_IsoRcvContextControlClear(n) (0x404 + 32 * (n))
-#define OHCI1394_IsoRcvCommandPtr(n)          (0x40C + 32 * (n))
-#define OHCI1394_IsoRcvContextMatch(n)        (0x410 + 32 * (n))
-
-/* Interrupts Mask/Events */
-#define OHCI1394_reqTxComplete		0x00000001
-#define OHCI1394_respTxComplete		0x00000002
-#define OHCI1394_ARRQ			0x00000004
-#define OHCI1394_ARRS			0x00000008
-#define OHCI1394_RQPkt			0x00000010
-#define OHCI1394_RSPkt			0x00000020
-#define OHCI1394_isochTx		0x00000040
-#define OHCI1394_isochRx		0x00000080
-#define OHCI1394_postedWriteErr		0x00000100
-#define OHCI1394_lockRespErr		0x00000200
-#define OHCI1394_selfIDComplete		0x00010000
-#define OHCI1394_busReset		0x00020000
-#define OHCI1394_regAccessFail		0x00040000
-#define OHCI1394_phy			0x00080000
-#define OHCI1394_cycleSynch		0x00100000
-#define OHCI1394_cycle64Seconds		0x00200000
-#define OHCI1394_cycleLost		0x00400000
-#define OHCI1394_cycleInconsistent	0x00800000
-#define OHCI1394_unrecoverableError	0x01000000
-#define OHCI1394_cycleTooLong		0x02000000
-#define OHCI1394_phyRegRcvd		0x04000000
-#define OHCI1394_masterIntEnable	0x80000000
-
-#define OHCI1394_evt_no_status		0x0
-#define OHCI1394_evt_long_packet	0x2
-#define OHCI1394_evt_missing_ack	0x3
-#define OHCI1394_evt_underrun		0x4
-#define OHCI1394_evt_overrun		0x5
-#define OHCI1394_evt_descriptor_read	0x6
-#define OHCI1394_evt_data_read		0x7
-#define OHCI1394_evt_data_write		0x8
-#define OHCI1394_evt_bus_reset		0x9
-#define OHCI1394_evt_timeout		0xa
-#define OHCI1394_evt_tcode_err		0xb
-#define OHCI1394_evt_reserved_b		0xc
-#define OHCI1394_evt_reserved_c		0xd
-#define OHCI1394_evt_unknown		0xe
-#define OHCI1394_evt_flushed		0xf
-
-#define OHCI1394_phy_tcode		0xe
-
-#endif /* __fw_ohci_h */
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 027b91f4a6e..d41cb6e455b 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -34,6 +34,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/firewire.h>
 #include <linux/firewire-constants.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
@@ -58,9 +59,6 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-#include "fw-device.h"
-#include "fw-transaction.h"
-
 /*
  * So far only bridges from Oxford Semiconductor are known to support
  * concurrent logins. Depending on firmware, four or two concurrent logins
diff --git a/drivers/firewire/fw-topology.c b/drivers/firewire/fw-topology.c
index 6d0ea1bb7e2..fddf2b35893 100644
--- a/drivers/firewire/fw-topology.c
+++ b/drivers/firewire/fw-topology.c
@@ -20,6 +20,8 @@
 
 #include <linux/bug.h>
 #include <linux/errno.h>
+#include <linux/firewire.h>
+#include <linux/firewire-constants.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -31,8 +33,7 @@
 #include <asm/atomic.h>
 #include <asm/system.h>
 
-#include "fw-topology.h"
-#include "fw-transaction.h"
+#include "core.h"
 
 #define SELF_ID_PHY_ID(q)		(((q) >> 24) & 0x3f)
 #define SELF_ID_EXTENDED(q)		(((q) >> 23) & 0x01)
@@ -45,6 +46,11 @@
 
 #define SELF_ID_EXT_SEQUENCE(q)		(((q) >> 20) & 0x07)
 
+#define SELFID_PORT_CHILD	0x3
+#define SELFID_PORT_PARENT	0x2
+#define SELFID_PORT_NCONN	0x1
+#define SELFID_PORT_NONE	0x0
+
 static u32 *count_ports(u32 *sid, int *total_port_count, int *child_port_count)
 {
 	u32 q;
diff --git a/drivers/firewire/fw-topology.h b/drivers/firewire/fw-topology.h
deleted file mode 100644
index 3c497bb4fae..00000000000
--- a/drivers/firewire/fw-topology.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2003-2006 Kristian Hoegsberg <krh@bitplanet.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#ifndef __fw_topology_h
-#define __fw_topology_h
-
-#include <linux/list.h>
-#include <linux/slab.h>
-
-#include <asm/atomic.h>
-
-enum {
-	FW_NODE_CREATED,
-	FW_NODE_UPDATED,
-	FW_NODE_DESTROYED,
-	FW_NODE_LINK_ON,
-	FW_NODE_LINK_OFF,
-	FW_NODE_INITIATED_RESET,
-};
-
-struct fw_node {
-	u16 node_id;
-	u8 color;
-	u8 port_count;
-	u8 link_on : 1;
-	u8 initiated_reset : 1;
-	u8 b_path : 1;
-	u8 phy_speed : 2; /* As in the self ID packet. */
-	u8 max_speed : 2; /* Minimum of all phy-speeds on the path from the
-			   * local node to this node. */
-	u8 max_depth : 4; /* Maximum depth to any leaf node */
-	u8 max_hops : 4;  /* Max hops in this sub tree */
-	atomic_t ref_count;
-
-	/* For serializing node topology into a list. */
-	struct list_head link;
-
-	/* Upper layer specific data. */
-	void *data;
-
-	struct fw_node *ports[0];
-};
-
-static inline struct fw_node *fw_node_get(struct fw_node *node)
-{
-	atomic_inc(&node->ref_count);
-
-	return node;
-}
-
-static inline void fw_node_put(struct fw_node *node)
-{
-	if (atomic_dec_and_test(&node->ref_count))
-		kfree(node);
-}
-
-struct fw_card;
-void fw_destroy_nodes(struct fw_card *card);
-
-int fw_compute_block_crc(u32 *block);
-
-#endif /* __fw_topology_h */
diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c
index 7008214e333..9a6ce9ab2a6 100644
--- a/drivers/firewire/fw-transaction.c
+++ b/drivers/firewire/fw-transaction.c
@@ -22,6 +22,7 @@
 #include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/firewire.h>
 #include <linux/firewire-constants.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -38,8 +39,7 @@
 
 #include <asm/byteorder.h>
 
-#include "fw-device.h"	/* for fw_device_ops */
-#include "fw-transaction.h"
+#include "core.h"
 
 #define HEADER_PRI(pri)			((pri) << 0)
 #define HEADER_TCODE(tcode)		((tcode) << 4)
@@ -64,6 +64,10 @@
 #define HEADER_DESTINATION_IS_BROADCAST(q) \
 	(((q) & HEADER_DESTINATION(0x3f)) == HEADER_DESTINATION(0x3f))
 
+#define PHY_PACKET_CONFIG	0x0
+#define PHY_PACKET_LINK_ON	0x1
+#define PHY_PACKET_SELF_ID	0x2
+
 #define PHY_CONFIG_GAP_COUNT(gap_count)	(((gap_count) << 16) | (1 << 22))
 #define PHY_CONFIG_ROOT_ID(node_id)	((((node_id) & 0x3f) << 24) | (1 << 23))
 #define PHY_IDENTIFIER(id)		((id) << 30)
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
deleted file mode 100644
index dfa799068f8..00000000000
--- a/drivers/firewire/fw-transaction.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright (C) 2003-2006 Kristian Hoegsberg <krh@bitplanet.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#ifndef __fw_transaction_h
-#define __fw_transaction_h
-
-#include <linux/completion.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/firewire-constants.h>
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/spinlock_types.h>
-#include <linux/timer.h>
-#include <linux/types.h>
-#include <linux/workqueue.h>
-
-#define TCODE_IS_READ_REQUEST(tcode)	(((tcode) & ~1) == 4)
-#define TCODE_IS_BLOCK_PACKET(tcode)	(((tcode) &  1) != 0)
-#define TCODE_IS_REQUEST(tcode)		(((tcode) &  2) == 0)
-#define TCODE_IS_RESPONSE(tcode)	(((tcode) &  2) != 0)
-#define TCODE_HAS_REQUEST_DATA(tcode)	(((tcode) & 12) != 4)
-#define TCODE_HAS_RESPONSE_DATA(tcode)	(((tcode) & 12) != 0)
-
-#define LOCAL_BUS 0xffc0
-
-#define SELFID_PORT_CHILD	0x3
-#define SELFID_PORT_PARENT	0x2
-#define SELFID_PORT_NCONN	0x1
-#define SELFID_PORT_NONE	0x0
-
-#define PHY_PACKET_CONFIG	0x0
-#define PHY_PACKET_LINK_ON	0x1
-#define PHY_PACKET_SELF_ID	0x2
-
-/* Bit fields _within_ the PHY registers. */
-#define PHY_LINK_ACTIVE		0x80
-#define PHY_CONTENDER		0x40
-#define PHY_BUS_RESET		0x40
-#define PHY_BUS_SHORT_RESET	0x40
-
-#define CSR_REGISTER_BASE		0xfffff0000000ULL
-
-/* register offsets relative to CSR_REGISTER_BASE */
-#define CSR_STATE_CLEAR			0x0
-#define CSR_STATE_SET			0x4
-#define CSR_NODE_IDS			0x8
-#define CSR_RESET_START			0xc
-#define CSR_SPLIT_TIMEOUT_HI		0x18
-#define CSR_SPLIT_TIMEOUT_LO		0x1c
-#define CSR_CYCLE_TIME			0x200
-#define CSR_BUS_TIME			0x204
-#define CSR_BUSY_TIMEOUT		0x210
-#define CSR_BUS_MANAGER_ID		0x21c
-#define CSR_BANDWIDTH_AVAILABLE		0x220
-#define CSR_CHANNELS_AVAILABLE		0x224
-#define CSR_CHANNELS_AVAILABLE_HI	0x224
-#define CSR_CHANNELS_AVAILABLE_LO	0x228
-#define CSR_BROADCAST_CHANNEL		0x234
-#define CSR_CONFIG_ROM			0x400
-#define CSR_CONFIG_ROM_END		0x800
-#define CSR_FCP_COMMAND			0xB00
-#define CSR_FCP_RESPONSE		0xD00
-#define CSR_FCP_END			0xF00
-#define CSR_TOPOLOGY_MAP		0x1000
-#define CSR_TOPOLOGY_MAP_END		0x1400
-#define CSR_SPEED_MAP			0x2000
-#define CSR_SPEED_MAP_END		0x3000
-
-#define BANDWIDTH_AVAILABLE_INITIAL	4915
-#define BROADCAST_CHANNEL_INITIAL	(1 << 31 | 31)
-#define BROADCAST_CHANNEL_VALID		(1 << 30)
-
-#define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args)
-#define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args)
-
-static inline void fw_memcpy_from_be32(void *_dst, void *_src, size_t size)
-{
-	u32    *dst = _dst;
-	__be32 *src = _src;
-	int i;
-
-	for (i = 0; i < size / 4; i++)
-		dst[i] = be32_to_cpu(src[i]);
-}
-
-static inline void fw_memcpy_to_be32(void *_dst, void *_src, size_t size)
-{
-	fw_memcpy_from_be32(_dst, _src, size);
-}
-
-struct fw_card;
-struct fw_packet;
-struct fw_node;
-struct fw_request;
-
-struct fw_descriptor {
-	struct list_head link;
-	size_t length;
-	u32 immediate;
-	u32 key;
-	const u32 *data;
-};
-
-int fw_core_add_descriptor(struct fw_descriptor *desc);
-void fw_core_remove_descriptor(struct fw_descriptor *desc);
-
-typedef void (*fw_packet_callback_t)(struct fw_packet *packet,
-				     struct fw_card *card, int status);
-
-typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
-					  void *data, size_t length,
-					  void *callback_data);
-
-/*
- * Important note:  The callback must guarantee that either fw_send_response()
- * or kfree() is called on the @request.
- */
-typedef void (*fw_address_callback_t)(struct fw_card *card,
-				      struct fw_request *request,
-				      int tcode, int destination, int source,
-				      int generation, int speed,
-				      unsigned long long offset,
-				      void *data, size_t length,
-				      void *callback_data);
-
-struct fw_packet {
-	int speed;
-	int generation;
-	u32 header[4];
-	size_t header_length;
-	void *payload;
-	size_t payload_length;
-	dma_addr_t payload_bus;
-	u32 timestamp;
-
-	/*
-	 * This callback is called when the packet transmission has
-	 * completed; for successful transmission, the status code is
-	 * the ack received from the destination, otherwise it's a
-	 * negative errno: ENOMEM, ESTALE, ETIMEDOUT, ENODEV, EIO.
-	 * The callback can be called from tasklet context and thus
-	 * must never block.
-	 */
-	fw_packet_callback_t callback;
-	int ack;
-	struct list_head link;
-	void *driver_data;
-};
-
-struct fw_transaction {
-	int node_id; /* The generation is implied; it is always the current. */
-	int tlabel;
-	int timestamp;
-	struct list_head link;
-
-	struct fw_packet packet;
-
-	/*
-	 * The data passed to the callback is valid only during the
-	 * callback.
-	 */
-	fw_transaction_callback_t callback;
-	void *callback_data;
-};
-
-struct fw_address_handler {
-	u64 offset;
-	size_t length;
-	fw_address_callback_t address_callback;
-	void *callback_data;
-	struct list_head link;
-};
-
-struct fw_address_region {
-	u64 start;
-	u64 end;
-};
-
-extern const struct fw_address_region fw_high_memory_region;
-
-int fw_core_add_address_handler(struct fw_address_handler *handler,
-				const struct fw_address_region *region);
-void fw_core_remove_address_handler(struct fw_address_handler *handler);
-void fw_fill_response(struct fw_packet *response, u32 *request_header,
-		      int rcode, void *payload, size_t length);
-void fw_send_response(struct fw_card *card,
-		      struct fw_request *request, int rcode);
-
-extern struct bus_type fw_bus_type;
-
-struct fw_card {
-	const struct fw_card_driver *driver;
-	struct device *device;
-	struct kref kref;
-	struct completion done;
-
-	int node_id;
-	int generation;
-	int current_tlabel, tlabel_mask;
-	struct list_head transaction_list;
-	struct timer_list flush_timer;
-	unsigned long reset_jiffies;
-
-	unsigned long long guid;
-	unsigned max_receive;
-	int link_speed;
-	int config_rom_generation;
-
-	spinlock_t lock; /* Take this lock when handling the lists in
-			  * this struct. */
-	struct fw_node *local_node;
-	struct fw_node *root_node;
-	struct fw_node *irm_node;
-	u8 color; /* must be u8 to match the definition in struct fw_node */
-	int gap_count;
-	bool beta_repeaters_present;
-
-	int index;
-
-	struct list_head link;
-
-	/* Work struct for BM duties. */
-	struct delayed_work work;
-	int bm_retries;
-	int bm_generation;
-
-	bool broadcast_channel_allocated;
-	u32 broadcast_channel;
-	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
-};
-
-static inline struct fw_card *fw_card_get(struct fw_card *card)
-{
-	kref_get(&card->kref);
-
-	return card;
-}
-
-void fw_card_release(struct kref *kref);
-
-static inline void fw_card_put(struct fw_card *card)
-{
-	kref_put(&card->kref, fw_card_release);
-}
-
-extern void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
-
-/*
- * Check whether new_generation is the immediate successor of old_generation.
- * Take counter roll-over at 255 (as per to OHCI) into account.
- */
-static inline bool is_next_generation(int new_generation, int old_generation)
-{
-	return (new_generation & 0xff) == ((old_generation + 1) & 0xff);
-}
-
-/*
- * The iso packet format allows for an immediate header/payload part
- * stored in 'header' immediately after the packet info plus an
- * indirect payload part that is pointer to by the 'payload' field.
- * Applications can use one or the other or both to implement simple
- * low-bandwidth streaming (e.g. audio) or more advanced
- * scatter-gather streaming (e.g. assembling video frame automatically).
- */
-
-struct fw_iso_packet {
-	u16 payload_length;	/* Length of indirect payload. */
-	u32 interrupt : 1;	/* Generate interrupt on this packet */
-	u32 skip : 1;		/* Set to not send packet at all. */
-	u32 tag : 2;
-	u32 sy : 4;
-	u32 header_length : 8;	/* Length of immediate header. */
-	u32 header[0];
-};
-
-#define FW_ISO_CONTEXT_TRANSMIT	0
-#define FW_ISO_CONTEXT_RECEIVE	1
-
-#define FW_ISO_CONTEXT_MATCH_TAG0	 1
-#define FW_ISO_CONTEXT_MATCH_TAG1	 2
-#define FW_ISO_CONTEXT_MATCH_TAG2	 4
-#define FW_ISO_CONTEXT_MATCH_TAG3	 8
-#define FW_ISO_CONTEXT_MATCH_ALL_TAGS	15
-
-struct fw_iso_context;
-
-typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
-				  u32 cycle, size_t header_length,
-				  void *header, void *data);
-
-/*
- * An iso buffer is just a set of pages mapped for DMA in the
- * specified direction.  Since the pages are to be used for DMA, they
- * are not mapped into the kernel virtual address space.  We store the
- * DMA address in the page private. The helper function
- * fw_iso_buffer_map() will map the pages into a given vma.
- */
-
-struct fw_iso_buffer {
-	enum dma_data_direction direction;
-	struct page **pages;
-	int page_count;
-};
-
-struct fw_iso_context {
-	struct fw_card *card;
-	int type;
-	int channel;
-	int speed;
-	size_t header_size;
-	fw_iso_callback_t callback;
-	void *callback_data;
-};
-
-int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
-		       int page_count, enum dma_data_direction direction);
-int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma);
-void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card);
-
-struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
-		int type, int channel, int speed, size_t header_size,
-		fw_iso_callback_t callback, void *callback_data);
-int fw_iso_context_queue(struct fw_iso_context *ctx,
-			 struct fw_iso_packet *packet,
-			 struct fw_iso_buffer *buffer,
-			 unsigned long payload);
-int fw_iso_context_start(struct fw_iso_context *ctx,
-			 int cycle, int sync, int tags);
-int fw_iso_context_stop(struct fw_iso_context *ctx);
-void fw_iso_context_destroy(struct fw_iso_context *ctx);
-
-void fw_iso_resource_manage(struct fw_card *card, int generation,
-		u64 channels_mask, int *channel, int *bandwidth, bool allocate);
-
-struct fw_card_driver {
-	/*
-	 * Enable the given card with the given initial config rom.
-	 * This function is expected to activate the card, and either
-	 * enable the PHY or set the link_on bit and initiate a bus
-	 * reset.
-	 */
-	int (*enable)(struct fw_card *card, u32 *config_rom, size_t length);
-
-	int (*update_phy_reg)(struct fw_card *card, int address,
-			      int clear_bits, int set_bits);
-
-	/*
-	 * Update the config rom for an enabled card.  This function
-	 * should change the config rom that is presented on the bus
-	 * an initiate a bus reset.
-	 */
-	int (*set_config_rom)(struct fw_card *card,
-			      u32 *config_rom, size_t length);
-
-	void (*send_request)(struct fw_card *card, struct fw_packet *packet);
-	void (*send_response)(struct fw_card *card, struct fw_packet *packet);
-	/* Calling cancel is valid once a packet has been submitted. */
-	int (*cancel_packet)(struct fw_card *card, struct fw_packet *packet);
-
-	/*
-	 * Allow the specified node ID to do direct DMA out and in of
-	 * host memory.  The card will disable this for all node when
-	 * a bus reset happens, so driver need to reenable this after
-	 * bus reset.  Returns 0 on success, -ENODEV if the card
-	 * doesn't support this, -ESTALE if the generation doesn't
-	 * match.
-	 */
-	int (*enable_phys_dma)(struct fw_card *card,
-			       int node_id, int generation);
-
-	u64 (*get_bus_time)(struct fw_card *card);
-
-	struct fw_iso_context *
-	(*allocate_iso_context)(struct fw_card *card,
-				int type, int channel, size_t header_size);
-	void (*free_iso_context)(struct fw_iso_context *ctx);
-
-	int (*start_iso)(struct fw_iso_context *ctx,
-			 s32 cycle, u32 sync, u32 tags);
-
-	int (*queue_iso)(struct fw_iso_context *ctx,
-			 struct fw_iso_packet *packet,
-			 struct fw_iso_buffer *buffer,
-			 unsigned long payload);
-
-	int (*stop_iso)(struct fw_iso_context *ctx);
-};
-
-int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
-
-void fw_send_request(struct fw_card *card, struct fw_transaction *t,
-		int tcode, int destination_id, int generation, int speed,
-		unsigned long long offset, void *payload, size_t length,
-		fw_transaction_callback_t callback, void *callback_data);
-int fw_cancel_transaction(struct fw_card *card,
-			  struct fw_transaction *transaction);
-void fw_flush_transactions(struct fw_card *card);
-int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
-		       int generation, int speed, unsigned long long offset,
-		       void *payload, size_t length);
-void fw_send_phy_config(struct fw_card *card,
-			int node_id, int generation, int gap_count);
-
-static inline int fw_stream_packet_destination_id(int tag, int channel, int sy)
-{
-	return tag << 14 | channel << 8 | sy;
-}
-
-/*
- * Called by the topology code to inform the device code of node
- * activity; found, lost, or updated nodes.
- */
-void fw_node_event(struct fw_card *card, struct fw_node *node, int event);
-
-/* API used by card level drivers */
-
-void fw_card_initialize(struct fw_card *card,
-		const struct fw_card_driver *driver, struct device *device);
-int fw_card_add(struct fw_card *card,
-		u32 max_receive, u32 link_speed, u64 guid);
-void fw_core_remove_card(struct fw_card *card);
-void fw_core_handle_bus_reset(struct fw_card *card, int node_id,
-		int generation, int self_id_count, u32 *self_ids);
-void fw_core_handle_request(struct fw_card *card, struct fw_packet *request);
-void fw_core_handle_response(struct fw_card *card, struct fw_packet *packet);
-
-extern int fw_irm_set_broadcast_channel_register(struct device *dev,
-						 void *data);
-
-#endif /* __fw_transaction_h */
diff --git a/drivers/firewire/ohci.h b/drivers/firewire/ohci.h
new file mode 100644
index 00000000000..ba492d85c51
--- /dev/null
+++ b/drivers/firewire/ohci.h
@@ -0,0 +1,157 @@
+#ifndef _FIREWIRE_OHCI_H
+#define _FIREWIRE_OHCI_H
+
+/* OHCI register map */
+
+#define OHCI1394_Version                      0x000
+#define OHCI1394_GUID_ROM                     0x004
+#define OHCI1394_ATRetries                    0x008
+#define OHCI1394_CSRData                      0x00C
+#define OHCI1394_CSRCompareData               0x010
+#define OHCI1394_CSRControl                   0x014
+#define OHCI1394_ConfigROMhdr                 0x018
+#define OHCI1394_BusID                        0x01C
+#define OHCI1394_BusOptions                   0x020
+#define OHCI1394_GUIDHi                       0x024
+#define OHCI1394_GUIDLo                       0x028
+#define OHCI1394_ConfigROMmap                 0x034
+#define OHCI1394_PostedWriteAddressLo         0x038
+#define OHCI1394_PostedWriteAddressHi         0x03C
+#define OHCI1394_VendorID                     0x040
+#define OHCI1394_HCControlSet                 0x050
+#define OHCI1394_HCControlClear               0x054
+#define  OHCI1394_HCControl_BIBimageValid	0x80000000
+#define  OHCI1394_HCControl_noByteSwapData	0x40000000
+#define  OHCI1394_HCControl_programPhyEnable	0x00800000
+#define  OHCI1394_HCControl_aPhyEnhanceEnable	0x00400000
+#define  OHCI1394_HCControl_LPS			0x00080000
+#define  OHCI1394_HCControl_postedWriteEnable	0x00040000
+#define  OHCI1394_HCControl_linkEnable		0x00020000
+#define  OHCI1394_HCControl_softReset		0x00010000
+#define OHCI1394_SelfIDBuffer                 0x064
+#define OHCI1394_SelfIDCount                  0x068
+#define  OHCI1394_SelfIDCount_selfIDError	0x80000000
+#define OHCI1394_IRMultiChanMaskHiSet         0x070
+#define OHCI1394_IRMultiChanMaskHiClear       0x074
+#define OHCI1394_IRMultiChanMaskLoSet         0x078
+#define OHCI1394_IRMultiChanMaskLoClear       0x07C
+#define OHCI1394_IntEventSet                  0x080
+#define OHCI1394_IntEventClear                0x084
+#define OHCI1394_IntMaskSet                   0x088
+#define OHCI1394_IntMaskClear                 0x08C
+#define OHCI1394_IsoXmitIntEventSet           0x090
+#define OHCI1394_IsoXmitIntEventClear         0x094
+#define OHCI1394_IsoXmitIntMaskSet            0x098
+#define OHCI1394_IsoXmitIntMaskClear          0x09C
+#define OHCI1394_IsoRecvIntEventSet           0x0A0
+#define OHCI1394_IsoRecvIntEventClear         0x0A4
+#define OHCI1394_IsoRecvIntMaskSet            0x0A8
+#define OHCI1394_IsoRecvIntMaskClear          0x0AC
+#define OHCI1394_InitialBandwidthAvailable    0x0B0
+#define OHCI1394_InitialChannelsAvailableHi   0x0B4
+#define OHCI1394_InitialChannelsAvailableLo   0x0B8
+#define OHCI1394_FairnessControl              0x0DC
+#define OHCI1394_LinkControlSet               0x0E0
+#define OHCI1394_LinkControlClear             0x0E4
+#define   OHCI1394_LinkControl_rcvSelfID	(1 << 9)
+#define   OHCI1394_LinkControl_rcvPhyPkt	(1 << 10)
+#define   OHCI1394_LinkControl_cycleTimerEnable	(1 << 20)
+#define   OHCI1394_LinkControl_cycleMaster	(1 << 21)
+#define   OHCI1394_LinkControl_cycleSource	(1 << 22)
+#define OHCI1394_NodeID                       0x0E8
+#define   OHCI1394_NodeID_idValid             0x80000000
+#define   OHCI1394_NodeID_nodeNumber          0x0000003f
+#define   OHCI1394_NodeID_busNumber           0x0000ffc0
+#define OHCI1394_PhyControl                   0x0EC
+#define   OHCI1394_PhyControl_Read(addr)	(((addr) << 8) | 0x00008000)
+#define   OHCI1394_PhyControl_ReadDone		0x80000000
+#define   OHCI1394_PhyControl_ReadData(r)	(((r) & 0x00ff0000) >> 16)
+#define   OHCI1394_PhyControl_Write(addr, data)	(((addr) << 8) | (data) | 0x00004000)
+#define   OHCI1394_PhyControl_WriteDone		0x00004000
+#define OHCI1394_IsochronousCycleTimer        0x0F0
+#define OHCI1394_AsReqFilterHiSet             0x100
+#define OHCI1394_AsReqFilterHiClear           0x104
+#define OHCI1394_AsReqFilterLoSet             0x108
+#define OHCI1394_AsReqFilterLoClear           0x10C
+#define OHCI1394_PhyReqFilterHiSet            0x110
+#define OHCI1394_PhyReqFilterHiClear          0x114
+#define OHCI1394_PhyReqFilterLoSet            0x118
+#define OHCI1394_PhyReqFilterLoClear          0x11C
+#define OHCI1394_PhyUpperBound                0x120
+
+#define OHCI1394_AsReqTrContextBase           0x180
+#define OHCI1394_AsReqTrContextControlSet     0x180
+#define OHCI1394_AsReqTrContextControlClear   0x184
+#define OHCI1394_AsReqTrCommandPtr            0x18C
+
+#define OHCI1394_AsRspTrContextBase           0x1A0
+#define OHCI1394_AsRspTrContextControlSet     0x1A0
+#define OHCI1394_AsRspTrContextControlClear   0x1A4
+#define OHCI1394_AsRspTrCommandPtr            0x1AC
+
+#define OHCI1394_AsReqRcvContextBase          0x1C0
+#define OHCI1394_AsReqRcvContextControlSet    0x1C0
+#define OHCI1394_AsReqRcvContextControlClear  0x1C4
+#define OHCI1394_AsReqRcvCommandPtr           0x1CC
+
+#define OHCI1394_AsRspRcvContextBase          0x1E0
+#define OHCI1394_AsRspRcvContextControlSet    0x1E0
+#define OHCI1394_AsRspRcvContextControlClear  0x1E4
+#define OHCI1394_AsRspRcvCommandPtr           0x1EC
+
+/* Isochronous transmit registers */
+#define OHCI1394_IsoXmitContextBase(n)           (0x200 + 16 * (n))
+#define OHCI1394_IsoXmitContextControlSet(n)     (0x200 + 16 * (n))
+#define OHCI1394_IsoXmitContextControlClear(n)   (0x204 + 16 * (n))
+#define OHCI1394_IsoXmitCommandPtr(n)            (0x20C + 16 * (n))
+
+/* Isochronous receive registers */
+#define OHCI1394_IsoRcvContextBase(n)         (0x400 + 32 * (n))
+#define OHCI1394_IsoRcvContextControlSet(n)   (0x400 + 32 * (n))
+#define OHCI1394_IsoRcvContextControlClear(n) (0x404 + 32 * (n))
+#define OHCI1394_IsoRcvCommandPtr(n)          (0x40C + 32 * (n))
+#define OHCI1394_IsoRcvContextMatch(n)        (0x410 + 32 * (n))
+
+/* Interrupts Mask/Events */
+#define OHCI1394_reqTxComplete		0x00000001
+#define OHCI1394_respTxComplete		0x00000002
+#define OHCI1394_ARRQ			0x00000004
+#define OHCI1394_ARRS			0x00000008
+#define OHCI1394_RQPkt			0x00000010
+#define OHCI1394_RSPkt			0x00000020
+#define OHCI1394_isochTx		0x00000040
+#define OHCI1394_isochRx		0x00000080
+#define OHCI1394_postedWriteErr		0x00000100
+#define OHCI1394_lockRespErr		0x00000200
+#define OHCI1394_selfIDComplete		0x00010000
+#define OHCI1394_busReset		0x00020000
+#define OHCI1394_regAccessFail		0x00040000
+#define OHCI1394_phy			0x00080000
+#define OHCI1394_cycleSynch		0x00100000
+#define OHCI1394_cycle64Seconds		0x00200000
+#define OHCI1394_cycleLost		0x00400000
+#define OHCI1394_cycleInconsistent	0x00800000
+#define OHCI1394_unrecoverableError	0x01000000
+#define OHCI1394_cycleTooLong		0x02000000
+#define OHCI1394_phyRegRcvd		0x04000000
+#define OHCI1394_masterIntEnable	0x80000000
+
+#define OHCI1394_evt_no_status		0x0
+#define OHCI1394_evt_long_packet	0x2
+#define OHCI1394_evt_missing_ack	0x3
+#define OHCI1394_evt_underrun		0x4
+#define OHCI1394_evt_overrun		0x5
+#define OHCI1394_evt_descriptor_read	0x6
+#define OHCI1394_evt_data_read		0x7
+#define OHCI1394_evt_data_write		0x8
+#define OHCI1394_evt_bus_reset		0x9
+#define OHCI1394_evt_timeout		0xa
+#define OHCI1394_evt_tcode_err		0xb
+#define OHCI1394_evt_reserved_b		0xc
+#define OHCI1394_evt_reserved_c		0xd
+#define OHCI1394_evt_unknown		0xe
+#define OHCI1394_evt_flushed		0xf
+
+#define OHCI1394_phy_tcode		0xe
+
+#endif /* _FIREWIRE_OHCI_H */
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
new file mode 100644
index 00000000000..e979f9b22cb
--- /dev/null
+++ b/include/linux/firewire.h
@@ -0,0 +1,350 @@
+#ifndef _LINUX_FIREWIRE_H
+#define _LINUX_FIREWIRE_H
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include <asm/atomic.h>
+#include <asm/byteorder.h>
+
+#define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args)
+#define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args)
+
+static inline void fw_memcpy_from_be32(void *_dst, void *_src, size_t size)
+{
+	u32    *dst = _dst;
+	__be32 *src = _src;
+	int i;
+
+	for (i = 0; i < size / 4; i++)
+		dst[i] = be32_to_cpu(src[i]);
+}
+
+static inline void fw_memcpy_to_be32(void *_dst, void *_src, size_t size)
+{
+	fw_memcpy_from_be32(_dst, _src, size);
+}
+#define CSR_REGISTER_BASE		0xfffff0000000ULL
+
+/* register offsets are relative to CSR_REGISTER_BASE */
+#define CSR_STATE_CLEAR			0x0
+#define CSR_STATE_SET			0x4
+#define CSR_NODE_IDS			0x8
+#define CSR_RESET_START			0xc
+#define CSR_SPLIT_TIMEOUT_HI		0x18
+#define CSR_SPLIT_TIMEOUT_LO		0x1c
+#define CSR_CYCLE_TIME			0x200
+#define CSR_BUS_TIME			0x204
+#define CSR_BUSY_TIMEOUT		0x210
+#define CSR_BUS_MANAGER_ID		0x21c
+#define CSR_BANDWIDTH_AVAILABLE		0x220
+#define CSR_CHANNELS_AVAILABLE		0x224
+#define CSR_CHANNELS_AVAILABLE_HI	0x224
+#define CSR_CHANNELS_AVAILABLE_LO	0x228
+#define CSR_BROADCAST_CHANNEL		0x234
+#define CSR_CONFIG_ROM			0x400
+#define CSR_CONFIG_ROM_END		0x800
+#define CSR_FCP_COMMAND			0xB00
+#define CSR_FCP_RESPONSE		0xD00
+#define CSR_FCP_END			0xF00
+#define CSR_TOPOLOGY_MAP		0x1000
+#define CSR_TOPOLOGY_MAP_END		0x1400
+#define CSR_SPEED_MAP			0x2000
+#define CSR_SPEED_MAP_END		0x3000
+
+#define CSR_OFFSET		0x40
+#define CSR_LEAF		0x80
+#define CSR_DIRECTORY		0xc0
+
+#define CSR_DESCRIPTOR		0x01
+#define CSR_VENDOR		0x03
+#define CSR_HARDWARE_VERSION	0x04
+#define CSR_NODE_CAPABILITIES	0x0c
+#define CSR_UNIT		0x11
+#define CSR_SPECIFIER_ID	0x12
+#define CSR_VERSION		0x13
+#define CSR_DEPENDENT_INFO	0x14
+#define CSR_MODEL		0x17
+#define CSR_INSTANCE		0x18
+#define CSR_DIRECTORY_ID	0x20
+
+struct fw_csr_iterator {
+	u32 *p;
+	u32 *end;
+};
+
+void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
+int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
+
+extern struct bus_type fw_bus_type;
+
+struct fw_card_driver;
+struct fw_node;
+
+struct fw_card {
+	const struct fw_card_driver *driver;
+	struct device *device;
+	struct kref kref;
+	struct completion done;
+
+	int node_id;
+	int generation;
+	int current_tlabel, tlabel_mask;
+	struct list_head transaction_list;
+	struct timer_list flush_timer;
+	unsigned long reset_jiffies;
+
+	unsigned long long guid;
+	unsigned max_receive;
+	int link_speed;
+	int config_rom_generation;
+
+	spinlock_t lock; /* Take this lock when handling the lists in
+			  * this struct. */
+	struct fw_node *local_node;
+	struct fw_node *root_node;
+	struct fw_node *irm_node;
+	u8 color; /* must be u8 to match the definition in struct fw_node */
+	int gap_count;
+	bool beta_repeaters_present;
+
+	int index;
+
+	struct list_head link;
+
+	/* Work struct for BM duties. */
+	struct delayed_work work;
+	int bm_retries;
+	int bm_generation;
+
+	bool broadcast_channel_allocated;
+	u32 broadcast_channel;
+	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
+};
+
+static inline struct fw_card *fw_card_get(struct fw_card *card)
+{
+	kref_get(&card->kref);
+
+	return card;
+}
+
+void fw_card_release(struct kref *kref);
+
+static inline void fw_card_put(struct fw_card *card)
+{
+	kref_put(&card->kref, fw_card_release);
+}
+
+struct fw_attribute_group {
+	struct attribute_group *groups[2];
+	struct attribute_group group;
+	struct attribute *attrs[12];
+};
+
+enum fw_device_state {
+	FW_DEVICE_INITIALIZING,
+	FW_DEVICE_RUNNING,
+	FW_DEVICE_GONE,
+	FW_DEVICE_SHUTDOWN,
+};
+
+/*
+ * Note, fw_device.generation always has to be read before fw_device.node_id.
+ * Use SMP memory barriers to ensure this.  Otherwise requests will be sent
+ * to an outdated node_id if the generation was updated in the meantime due
+ * to a bus reset.
+ *
+ * Likewise, fw-core will take care to update .node_id before .generation so
+ * that whenever fw_device.generation is current WRT the actual bus generation,
+ * fw_device.node_id is guaranteed to be current too.
+ *
+ * The same applies to fw_device.card->node_id vs. fw_device.generation.
+ *
+ * fw_device.config_rom and fw_device.config_rom_length may be accessed during
+ * the lifetime of any fw_unit belonging to the fw_device, before device_del()
+ * was called on the last fw_unit.  Alternatively, they may be accessed while
+ * holding fw_device_rwsem.
+ */
+struct fw_device {
+	atomic_t state;
+	struct fw_node *node;
+	int node_id;
+	int generation;
+	unsigned max_speed;
+	struct fw_card *card;
+	struct device device;
+
+	struct mutex client_list_mutex;
+	struct list_head client_list;
+
+	u32 *config_rom;
+	size_t config_rom_length;
+	int config_rom_retries;
+	unsigned is_local:1;
+	unsigned cmc:1;
+	unsigned bc_implemented:2;
+
+	struct delayed_work work;
+	struct fw_attribute_group attribute_group;
+};
+
+static inline struct fw_device *fw_device(struct device *dev)
+{
+	return container_of(dev, struct fw_device, device);
+}
+
+static inline int fw_device_is_shutdown(struct fw_device *device)
+{
+	return atomic_read(&device->state) == FW_DEVICE_SHUTDOWN;
+}
+
+static inline struct fw_device *fw_device_get(struct fw_device *device)
+{
+	get_device(&device->device);
+
+	return device;
+}
+
+static inline void fw_device_put(struct fw_device *device)
+{
+	put_device(&device->device);
+}
+
+int fw_device_enable_phys_dma(struct fw_device *device);
+
+/*
+ * fw_unit.directory must not be accessed after device_del(&fw_unit.device).
+ */
+struct fw_unit {
+	struct device device;
+	u32 *directory;
+	struct fw_attribute_group attribute_group;
+};
+
+static inline struct fw_unit *fw_unit(struct device *dev)
+{
+	return container_of(dev, struct fw_unit, device);
+}
+
+static inline struct fw_unit *fw_unit_get(struct fw_unit *unit)
+{
+	get_device(&unit->device);
+
+	return unit;
+}
+
+static inline void fw_unit_put(struct fw_unit *unit)
+{
+	put_device(&unit->device);
+}
+
+struct ieee1394_device_id;
+
+struct fw_driver {
+	struct device_driver driver;
+	/* Called when the parent device sits through a bus reset. */
+	void (*update)(struct fw_unit *unit);
+	const struct ieee1394_device_id *id_table;
+};
+
+struct fw_packet;
+struct fw_request;
+
+typedef void (*fw_packet_callback_t)(struct fw_packet *packet,
+				     struct fw_card *card, int status);
+typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
+					  void *data, size_t length,
+					  void *callback_data);
+/*
+ * Important note:  The callback must guarantee that either fw_send_response()
+ * or kfree() is called on the @request.
+ */
+typedef void (*fw_address_callback_t)(struct fw_card *card,
+				      struct fw_request *request,
+				      int tcode, int destination, int source,
+				      int generation, int speed,
+				      unsigned long long offset,
+				      void *data, size_t length,
+				      void *callback_data);
+
+struct fw_packet {
+	int speed;
+	int generation;
+	u32 header[4];
+	size_t header_length;
+	void *payload;
+	size_t payload_length;
+	dma_addr_t payload_bus;
+	u32 timestamp;
+
+	/*
+	 * This callback is called when the packet transmission has
+	 * completed; for successful transmission, the status code is
+	 * the ack received from the destination, otherwise it's a
+	 * negative errno: ENOMEM, ESTALE, ETIMEDOUT, ENODEV, EIO.
+	 * The callback can be called from tasklet context and thus
+	 * must never block.
+	 */
+	fw_packet_callback_t callback;
+	int ack;
+	struct list_head link;
+	void *driver_data;
+};
+
+struct fw_transaction {
+	int node_id; /* The generation is implied; it is always the current. */
+	int tlabel;
+	int timestamp;
+	struct list_head link;
+
+	struct fw_packet packet;
+
+	/*
+	 * The data passed to the callback is valid only during the
+	 * callback.
+	 */
+	fw_transaction_callback_t callback;
+	void *callback_data;
+};
+
+struct fw_address_handler {
+	u64 offset;
+	size_t length;
+	fw_address_callback_t address_callback;
+	void *callback_data;
+	struct list_head link;
+};
+
+struct fw_address_region {
+	u64 start;
+	u64 end;
+};
+
+extern const struct fw_address_region fw_high_memory_region;
+
+int fw_core_add_address_handler(struct fw_address_handler *handler,
+				const struct fw_address_region *region);
+void fw_core_remove_address_handler(struct fw_address_handler *handler);
+void fw_send_response(struct fw_card *card,
+		      struct fw_request *request, int rcode);
+void fw_send_request(struct fw_card *card, struct fw_transaction *t,
+		     int tcode, int destination_id, int generation, int speed,
+		     unsigned long long offset, void *payload, size_t length,
+		     fw_transaction_callback_t callback, void *callback_data);
+int fw_cancel_transaction(struct fw_card *card,
+			  struct fw_transaction *transaction);
+int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
+		       int generation, int speed, unsigned long long offset,
+		       void *payload, size_t length);
+
+#endif /* _LINUX_FIREWIRE_H */
-- 
cgit v1.2.3-70-g09d2


From 1b8e69662e1a086878bf930a6042daf7f8a076cc Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas at hp.com>
Date: Fri, 5 Jun 2009 14:37:23 +0000
Subject: pnp: add PNP resource range checking function

Add a PNP resource range check function, indicating whether a resource
has been assigned to any device.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
[apw@canonical.com: fixed up exports et al]
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/pnp/resource.c | 18 ++++++++++++++++++
 include/linux/pnp.h    |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index f604061d2bb..ba976542788 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -638,6 +638,24 @@ int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t start,
 }
 EXPORT_SYMBOL(pnp_possible_config);
 
+int pnp_range_reserved(resource_size_t start, resource_size_t end)
+{
+	struct pnp_dev *dev;
+	struct pnp_resource *pnp_res;
+	resource_size_t *dev_start, *dev_end;
+
+	pnp_for_each_dev(dev) {
+		list_for_each_entry(pnp_res, &dev->resources, list) {
+			dev_start = &pnp_res->res.start;
+			dev_end   = &pnp_res->res.end;
+			if (ranged_conflict(&start, &end, dev_start, dev_end))
+				return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(pnp_range_reserved);
+
 /* format is: pnp_reserve_irq=irq1[,irq2] .... */
 static int __init pnp_setup_reserve_irq(char *str)
 {
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index ca3c8877302..b063c7328ba 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -446,6 +446,7 @@ int pnp_start_dev(struct pnp_dev *dev);
 int pnp_stop_dev(struct pnp_dev *dev);
 int pnp_activate_dev(struct pnp_dev *dev);
 int pnp_disable_dev(struct pnp_dev *dev);
+int pnp_range_reserved(resource_size_t start, resource_size_t end);
 
 /* protocol helpers */
 int pnp_is_active(struct pnp_dev *dev);
@@ -476,6 +477,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; }
+static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;}
 
 /* protocol helpers */
 static inline int pnp_is_active(struct pnp_dev *dev) { return 0; }
-- 
cgit v1.2.3-70-g09d2


From 3f7440a6b771169e1f11fa582e53a4259b682809 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jun 2009 17:40:04 +0200
Subject: ALSA: Clean up 64bit division functions

Replace the house-made div64_32() with the standard div_u64*() functions.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h       | 74 -----------------------------------------------
 sound/core/oss/pcm_oss.c  |  5 ++--
 sound/core/pcm_lib.c      |  3 +-
 sound/pci/rme9652/hdsp.c  |  7 ++---
 sound/pci/rme9652/hdspm.c |  4 +--
 5 files changed, 9 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c1729689161..0caf71e1694 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -486,80 +486,6 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream);
 void snd_pcm_vma_notify_data(void *client, void *data);
 int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);
 
-#if BITS_PER_LONG >= 64
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	*rem = *n % div;
-	*n /= div;
-}
-
-#elif defined(i386)
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	u_int32_t low, high;
-	low = *n & 0xffffffff;
-	high = *n >> 32;
-	if (high) {
-		u_int32_t high1 = high % div;
-		high /= div;
-		asm("divl %2":"=a" (low), "=d" (*rem):"rm" (div), "a" (low), "d" (high1));
-		*n = (u_int64_t)high << 32 | low;
-	} else {
-		*n = low / div;
-		*rem = low % div;
-	}
-}
-#else
-
-static inline void divl(u_int32_t high, u_int32_t low,
-			u_int32_t div,
-			u_int32_t *q, u_int32_t *r)
-{
-	u_int64_t n = (u_int64_t)high << 32 | low;
-	u_int64_t d = (u_int64_t)div << 31;
-	u_int32_t q1 = 0;
-	int c = 32;
-	while (n > 0xffffffffU) {
-		q1 <<= 1;
-		if (n >= d) {
-			n -= d;
-			q1 |= 1;
-		}
-		d >>= 1;
-		c--;
-	}
-	q1 <<= c;
-	if (n) {
-		low = n;
-		*q = q1 | (low / div);
-		*r = low % div;
-	} else {
-		*r = 0;
-		*q = q1;
-	}
-	return;
-}
-
-static inline void div64_32(u_int64_t *n, u_int32_t div, u_int32_t *rem)
-{
-	u_int32_t low, high;
-	low = *n & 0xffffffff;
-	high = *n >> 32;
-	if (high) {
-		u_int32_t high1 = high % div;
-		u_int32_t low1 = low;
-		high /= div;
-		divl(high1, low1, div, &low, rem);
-		*n = (u_int64_t)high << 32 | low;
-	} else {
-		*n = low / div;
-		*rem = low % div;
-	}
-}
-#endif
-
 /*
  *  PCM library
  */
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index dda000b9684..dbe406b8259 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -31,6 +31,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/math64.h>
 #include <linux/string.h>
 #include <sound/core.h>
 #include <sound/minors.h>
@@ -617,9 +618,7 @@ static long snd_pcm_oss_bytes(struct snd_pcm_substream *substream, long frames)
 #else
 	{
 		u64 bsize = (u64)runtime->oss.buffer_bytes * (u64)bytes;
-		u32 rem;
-		div64_32(&bsize, buffer_size, &rem);
-		return (long)bsize;
+		return div_u64(bsize, buffer_size);
 	}
 #endif
 }
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d659995ac3a..a7482874c45 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -22,6 +22,7 @@
 
 #include <linux/slab.h>
 #include <linux/time.h>
+#include <linux/math64.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/info.h>
@@ -452,7 +453,7 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b,
 		*r = 0;
 		return UINT_MAX;
 	}
-	div64_32(&n, c, r);
+	n = div_u64_rem(n, c, r);
 	if (n >= UINT_MAX) {
 		*r = 0;
 		return UINT_MAX;
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 314e73531bd..bcfdbb5ebc4 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/firmware.h>
 #include <linux/moduleparam.h>
+#include <linux/math64.h>
 
 #include <sound/core.h>
 #include <sound/control.h>
@@ -1047,7 +1048,6 @@ static int hdsp_set_interrupt_interval(struct hdsp *s, unsigned int frames)
 static void hdsp_set_dds_value(struct hdsp *hdsp, int rate)
 {
 	u64 n;
-	u32 r;
 
 	if (rate >= 112000)
 		rate /= 4;
@@ -1055,7 +1055,7 @@ static void hdsp_set_dds_value(struct hdsp *hdsp, int rate)
 		rate /= 2;
 
 	n = DDS_NUMERATOR;
-	div64_32(&n, rate, &r);
+	n = div_u64(n, rate);
 	/* n should be less than 2^32 for being written to FREQ register */
 	snd_BUG_ON(n >> 32);
 	/* HDSP_freqReg and HDSP_resetPointer are the same, so keep the DDS
@@ -3097,7 +3097,6 @@ static int snd_hdsp_get_adat_sync_check(struct snd_kcontrol *kcontrol, struct sn
 static int hdsp_dds_offset(struct hdsp *hdsp)
 {
 	u64 n;
-	u32 r;
 	unsigned int dds_value = hdsp->dds_value;
 	int system_sample_rate = hdsp->system_sample_rate;
 
@@ -3109,7 +3108,7 @@ static int hdsp_dds_offset(struct hdsp *hdsp)
 	 * dds_value = n / rate
 	 * rate = n / dds_value
 	 */
-	div64_32(&n, dds_value, &r);
+	n = div_u64(n, dds_value);
 	if (system_sample_rate >= 112000)
 		n *= 4;
 	else if (system_sample_rate >= 56000)
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index bac2dc0c5d8..0dce331a2a3 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -29,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/math64.h>
 #include <asm/io.h>
 
 #include <sound/core.h>
@@ -831,7 +832,6 @@ static int hdspm_set_interrupt_interval(struct hdspm * s, unsigned int frames)
 static void hdspm_set_dds_value(struct hdspm *hdspm, int rate)
 {
 	u64 n;
-	u32 r;
 	
 	if (rate >= 112000)
 		rate /= 4;
@@ -844,7 +844,7 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate)
         */	   
 	/* n = 104857600000000ULL; */ /*  =  2^20 * 10^8 */
 	n = 110100480000000ULL;    /* Value checked for AES32 and MADI */
-	div64_32(&n, rate, &r);
+	n = div_u64(n, rate);
 	/* n should be less than 2^32 for being written to FREQ register */
 	snd_BUG_ON(n >> 32);
 	hdspm_write(hdspm, HDSPM_freqReg, (u32)n);
-- 
cgit v1.2.3-70-g09d2


From ac4bcf889469ffbca88f234d3184452886a47905 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 14:44:52 +0200
Subject: perf_counter: Change PERF_SAMPLE_CONFIG into PERF_SAMPLE_ID

The purpose of PERF_SAMPLE_CONFIG was to identify the counters,
since then we've added counter ids, use those instead.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 2 +-
 kernel/perf_counter.c        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 40dc0e273d9..9cea32a0655 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -104,7 +104,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_ADDR		= 1U << 3,
 	PERF_SAMPLE_GROUP		= 1U << 4,
 	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
-	PERF_SAMPLE_CONFIG		= 1U << 6,
+	PERF_SAMPLE_ID			= 1U << 6,
 	PERF_SAMPLE_CPU			= 1U << 7,
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 37a5a241ca7..e75b91a76a5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2392,8 +2392,8 @@ static void perf_counter_output(struct perf_counter *counter,
 		header.size += sizeof(u64);
 	}
 
-	if (sample_type & PERF_SAMPLE_CONFIG) {
-		header.type |= PERF_SAMPLE_CONFIG;
+	if (sample_type & PERF_SAMPLE_ID) {
+		header.type |= PERF_SAMPLE_ID;
 		header.size += sizeof(u64);
 	}
 
@@ -2439,8 +2439,8 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (sample_type & PERF_SAMPLE_ADDR)
 		perf_output_put(&handle, addr);
 
-	if (sample_type & PERF_SAMPLE_CONFIG)
-		perf_output_put(&handle, counter->attr.config);
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(&handle, counter->id);
 
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
-- 
cgit v1.2.3-70-g09d2


From 689802b2d0536e72281dc959ab9cb34fb3c304cf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 15:05:43 +0200
Subject: perf_counter: Add PERF_SAMPLE_PERIOD

In order to allow easy tracking of the period, also provide means of
adding it to the sample data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 10 ++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 9cea32a0655..6bc25003973 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -106,6 +106,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
 	PERF_SAMPLE_ID			= 1U << 6,
 	PERF_SAMPLE_CPU			= 1U << 7,
+	PERF_SAMPLE_PERIOD		= 1U << 8,
 };
 
 /*
@@ -260,6 +261,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
+	 *	u64				id;
 	 *	u64				sample_period;
 	 * };
 	 */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e75b91a76a5..f8390668c39 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2404,6 +2404,11 @@ static void perf_counter_output(struct perf_counter *counter,
 		cpu_entry.cpu = raw_smp_processor_id();
 	}
 
+	if (sample_type & PERF_SAMPLE_PERIOD) {
+		header.type |= PERF_SAMPLE_PERIOD;
+		header.size += sizeof(u64);
+	}
+
 	if (sample_type & PERF_SAMPLE_GROUP) {
 		header.type |= PERF_SAMPLE_GROUP;
 		header.size += sizeof(u64) +
@@ -2445,6 +2450,9 @@ static void perf_counter_output(struct perf_counter *counter,
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(&handle, cpu_entry);
 
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(&handle, counter->hw.sample_period);
+
 	/*
 	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
 	 */
@@ -2835,6 +2843,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 	struct {
 		struct perf_event_header	header;
 		u64				time;
+		u64				id;
 		u64				period;
 	} freq_event = {
 		.header = {
@@ -2843,6 +2852,7 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
 			.size = sizeof(freq_event),
 		},
 		.time = sched_clock(),
+		.id = counter->id,
 		.period = period,
 	};
 
-- 
cgit v1.2.3-70-g09d2


From 6a24ed6c6082ec65d19331a4bfa30c0512a1a822 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Jun 2009 18:01:29 +0200
Subject: perf_counter: Fix frequency adjustment for < HZ

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  3 +++
 kernel/perf_counter.c        | 32 +++++++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6bc25003973..4f9d39ecdc0 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -373,6 +373,9 @@ struct hw_perf_counter {
 	u64				sample_period;
 	atomic64_t			period_left;
 	u64				interrupts;
+
+	u64				freq_count;
+	u64				freq_interrupts;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f8390668c39..47c92fb927f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1187,8 +1187,9 @@ static void perf_log_period(struct perf_counter *counter, u64 period);
 static void perf_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	u64 interrupts, sample_period;
-	u64 events, period;
+	u64 events, period, freq;
 	s64 delta;
 
 	spin_lock(&ctx->lock);
@@ -1196,8 +1197,10 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
-		interrupts = counter->hw.interrupts;
-		counter->hw.interrupts = 0;
+		hwc = &counter->hw;
+
+		interrupts = hwc->interrupts;
+		hwc->interrupts = 0;
 
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(counter, 1);
@@ -1208,20 +1211,35 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (!counter->attr.freq || !counter->attr.sample_freq)
 			continue;
 
-		events = HZ * interrupts * counter->hw.sample_period;
+		if (counter->attr.sample_freq < HZ) {
+			freq = counter->attr.sample_freq;
+
+			hwc->freq_count += freq;
+			hwc->freq_interrupts += interrupts;
+
+			if (hwc->freq_count < HZ)
+				continue;
+
+			interrupts = hwc->freq_interrupts;
+			hwc->freq_interrupts = 0;
+			hwc->freq_count -= HZ;
+		} else
+			freq = HZ;
+
+		events = freq * interrupts * hwc->sample_period;
 		period = div64_u64(events, counter->attr.sample_freq);
 
-		delta = (s64)(1 + period - counter->hw.sample_period);
+		delta = (s64)(1 + period - hwc->sample_period);
 		delta >>= 1;
 
-		sample_period = counter->hw.sample_period + delta;
+		sample_period = hwc->sample_period + delta;
 
 		if (!sample_period)
 			sample_period = 1;
 
 		perf_log_period(counter, sample_period);
 
-		counter->hw.sample_period = sample_period;
+		hwc->sample_period = sample_period;
 	}
 	spin_unlock(&ctx->lock);
 }
-- 
cgit v1.2.3-70-g09d2


From 24430abc88c67e3df2f06c96a6ccd73bda4c92f0 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 21 Apr 2009 11:02:01 +0300
Subject: Kill jffs2-user.h

This file does not define any kernel-userspace API, all
it does it defines few helpers for userspace. Instead,
userspace should have a private copy of this file.

The main (if not the only) user is the mtd-utils package, but
it already has a private copy of this file.

This patch also removes references to 'jffs2-user.h' from
'Kbuild' and MAINTAINERS' files.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 MAINTAINERS              |  1 -
 include/mtd/Kbuild       |  1 -
 include/mtd/jffs2-user.h | 34 ----------------------------------
 3 files changed, 36 deletions(-)
 delete mode 100644 include/mtd/jffs2-user.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 2b349ba4add..3276e1c256b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3171,7 +3171,6 @@ W:	http://www.linux-mtd.infradead.org/doc/jffs2.html
 S:	Maintained
 F:	fs/jffs2/
 F:	include/linux/jffs2.h
-F:	include/mtd/jffs2-user.h
 
 JOURNALLING LAYER FOR BLOCK DEVICES (JBD)
 P:	Stephen Tweedie
diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild
index 8eb018f9600..192f8fb7d54 100644
--- a/include/mtd/Kbuild
+++ b/include/mtd/Kbuild
@@ -1,5 +1,4 @@
 header-y += inftl-user.h
-header-y += jffs2-user.h
 header-y += mtd-abi.h
 header-y += mtd-user.h
 header-y += nftl-user.h
diff --git a/include/mtd/jffs2-user.h b/include/mtd/jffs2-user.h
deleted file mode 100644
index fa94b0eb67c..00000000000
--- a/include/mtd/jffs2-user.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * JFFS2 definitions for use in user space only
- */
-
-#ifndef __JFFS2_USER_H__
-#define __JFFS2_USER_H__
-
-/* This file is blessed for inclusion by userspace */
-#include <linux/jffs2.h>
-#include <linux/types.h>
-#include <endian.h>
-#include <byteswap.h>
-
-#undef cpu_to_je16
-#undef cpu_to_je32
-#undef cpu_to_jemode
-#undef je16_to_cpu
-#undef je32_to_cpu
-#undef jemode_to_cpu
-
-extern int target_endian;
-
-#define t16(x) ({ __u16 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); })
-#define t32(x) ({ __u32 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); })
-
-#define cpu_to_je16(x) ((jint16_t){t16(x)})
-#define cpu_to_je32(x) ((jint32_t){t32(x)})
-#define cpu_to_jemode(x) ((jmode_t){t32(x)})
-
-#define je16_to_cpu(x) (t16((x).v16))
-#define je32_to_cpu(x) (t32((x).v32))
-#define jemode_to_cpu(x) (t32((x).m))
-
-#endif /* __JFFS2_USER_H__ */
-- 
cgit v1.2.3-70-g09d2


From ee8f37688966ab1438d0cf42e0cb7c6595d9592c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 5 May 2009 11:04:19 +0300
Subject: mtd: OneNAND: add support for OneNAND manufactured by Numonyx

In addition to adding the Numonyx manufacturer code, this patch
also ensures 'sync. write' is disabled when reading identification
data - something that the Numonyx chip objects to, but the
Samsung chip seems to ignore.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 3 ++-
 include/linux/mtd/onenand.h        | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 30d6999e5f9..2346857a275 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -2576,6 +2576,7 @@ static void onenand_print_device_info(int device, int version)
 
 static const struct onenand_manufacturers onenand_manuf_ids[] = {
         {ONENAND_MFR_SAMSUNG, "Samsung"},
+	{ONENAND_MFR_NUMONYX, "Numonyx"},
 };
 
 /**
@@ -2621,7 +2622,7 @@ static int onenand_probe(struct mtd_info *mtd)
 	/* Save system configuration 1 */
 	syscfg = this->read_word(this->base + ONENAND_REG_SYS_CFG1);
 	/* Clear Sync. Burst Read mode to read BootRAM */
-	this->write_word((syscfg & ~ONENAND_SYS_CFG1_SYNC_READ), this->base + ONENAND_REG_SYS_CFG1);
+	this->write_word((syscfg & ~ONENAND_SYS_CFG1_SYNC_READ & ~ONENAND_SYS_CFG1_SYNC_WRITE), this->base + ONENAND_REG_SYS_CFG1);
 
 	/* Send the command for reading device ID from BootRAM */
 	this->write_word(ONENAND_CMD_READID, this->base + ONENAND_BOOTRAM);
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 9aa2a9149b5..0fa3ac4ad57 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -176,6 +176,7 @@ struct onenand_chip {
  * OneNAND Flash Manufacturer ID Codes
  */
 #define ONENAND_MFR_SAMSUNG	0xec
+#define ONENAND_MFR_NUMONYX	0x20
 
 /**
  * struct onenand_manufacturers - NAND Flash Manufacturer ID Structure
-- 
cgit v1.2.3-70-g09d2


From d6fed9e9fc5eefae5be0ecf222bac7e7496e8e74 Mon Sep 17 00:00:00 2001
From: Alexander Clouter <alex@digriz.org.uk>
Date: Mon, 11 May 2009 19:28:01 +0100
Subject: mtd: extend plat_nand for (read|write)_buf

This patch adds (write|read)_buf callbacks to plat_nand.

The NAND on the TS-7800 provisioned by the FPGA allows readw() and
readl() to be used which gives a 2.5x speed up.  To be able to use this
from the plat_nand driver a hook for read_buf (and also write_buf whilst
we are in there) need to be made available.  This patch adds the hook.

Signed-off-by: Alexander Clouter <alex@digriz.org.uk>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/plat_nand.c | 2 ++
 include/linux/mtd/nand.h     | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index 28ffd4e8bb2..47a2105b967 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -61,6 +61,8 @@ static int __devinit plat_nand_probe(struct platform_device *pdev)
 	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
 	data->chip.dev_ready = pdata->ctrl.dev_ready;
 	data->chip.select_chip = pdata->ctrl.select_chip;
+	data->chip.write_buf = pdata->ctrl.write_buf;
+	data->chip.read_buf = pdata->ctrl.read_buf;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7efb9be3466..0e35375ea79 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -584,6 +584,8 @@ struct platform_nand_chip {
  * @select_chip:	platform specific chip select function
  * @cmd_ctrl:		platform specific function for controlling
  *			ALE/CLE/nCE. Also used to write command and address
+ * @write_buf:		platform specific function for write buffer
+ * @read_buf:		platform specific function for read buffer
  * @priv:		private data to transport driver specific settings
  *
  * All fields are optional and depend on the hardware driver requirements
@@ -594,6 +596,10 @@ struct platform_nand_ctrl {
 	void		(*select_chip)(struct mtd_info *mtd, int chip);
 	void		(*cmd_ctrl)(struct mtd_info *mtd, int dat,
 				    unsigned int ctrl);
+	void		(*write_buf)(struct mtd_info *mtd,
+				    const uint8_t *buf, int len);
+	void		(*read_buf)(struct mtd_info *mtd,
+				    uint8_t *buf, int len);
 	void		*priv;
 };
 
-- 
cgit v1.2.3-70-g09d2


From bf95efd41b1a760128eb25402791b0a4941eb655 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Tue, 12 May 2009 13:46:58 -0700
Subject: mtd: plat_nand: add platform probe/remove callbacks

Add optional probe and remove callbacks to the plat_nand driver.

Some platforms may require additional setup, such as configuring the
memory controller, before the nand device can be accessed.  This patch
provides an optional callback to handle this setup as well as a callback
to teardown the setup.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Tested-by: Alexander Clouter <alex@digriz.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/plat_nand.c | 13 +++++++++++--
 include/linux/mtd/nand.h     |  7 +++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index 47a2105b967..22e0ce78841 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -72,6 +72,13 @@ static int __devinit plat_nand_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, data);
 
+	/* Handle any platform specific setup */
+	if (pdata->ctrl.probe) {
+		res = pdata->ctrl.probe(pdev);
+		if (res)
+			goto out;
+	}
+
 	/* Scan to find existance of the device */
 	if (nand_scan(&data->mtd, 1)) {
 		res = -ENXIO;
@@ -101,6 +108,8 @@ static int __devinit plat_nand_probe(struct platform_device *pdev)
 
 	nand_release(&data->mtd);
 out:
+	if (pdata->ctrl.remove)
+		pdata->ctrl.remove(pdev);
 	platform_set_drvdata(pdev, NULL);
 	iounmap(data->io_base);
 	kfree(data);
@@ -113,15 +122,15 @@ out:
 static int __devexit plat_nand_remove(struct platform_device *pdev)
 {
 	struct plat_nand_data *data = platform_get_drvdata(pdev);
-#ifdef CONFIG_MTD_PARTITIONS
 	struct platform_nand_data *pdata = pdev->dev.platform_data;
-#endif
 
 	nand_release(&data->mtd);
 #ifdef CONFIG_MTD_PARTITIONS
 	if (data->parts && data->parts != pdata->chip.partitions)
 		kfree(data->parts);
 #endif
+	if (pdata->ctrl.remove)
+		pdata->ctrl.remove(pdev);
 	iounmap(data->io_base);
 	kfree(data);
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 0e35375ea79..7f2d6935655 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -577,8 +577,13 @@ struct platform_nand_chip {
 	void			*priv;
 };
 
+/* Keep gcc happy */
+struct platform_device;
+
 /**
  * struct platform_nand_ctrl - controller level device structure
+ * @probe:		platform specific function to probe/setup hardware
+ * @remove:		platform specific function to remove/teardown hardware
  * @hwcontrol:		platform specific hardware control structure
  * @dev_ready:		platform specific function to read ready/busy pin
  * @select_chip:	platform specific chip select function
@@ -591,6 +596,8 @@ struct platform_nand_chip {
  * All fields are optional and depend on the hardware driver requirements
  */
 struct platform_nand_ctrl {
+	int		(*probe)(struct platform_device *pdev);
+	void		(*remove)(struct platform_device *pdev);
 	void		(*hwcontrol)(struct mtd_info *mtd, int cmd);
 	int		(*dev_ready)(struct mtd_info *mtd);
 	void		(*select_chip)(struct mtd_info *mtd, int chip);
-- 
cgit v1.2.3-70-g09d2


From f36e20c01ad0104688f2eaebdf2213e749929c97 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Tue, 12 May 2009 13:46:59 -0700
Subject: mtd: plat_nand: allow platform to set partitions

Add optional callback to allow platform to initialize partitions.

Static partitions on a nand device could vary depending on the size of the
device.  This patch allows an optional platform callback to be used to
setup this partition information at runtime.

Scan order is:
	1) chip.part_probe_types
	2) chip.set_parts
	3) chip.partitions
	4) full mtd device (fallback for no partitions)

Some of the existing nand drivers could possibly be replaced by the
plat_nand driver by using this patch.  These include autcpu12.c and
ts7250.c drivers.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/plat_nand.c | 2 ++
 include/linux/mtd/nand.h     | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index 22e0ce78841..4e16c6f5bdd 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -95,6 +95,8 @@ static int __devinit plat_nand_probe(struct platform_device *pdev)
 			return 0;
 		}
 	}
+	if (pdata->chip.set_parts)
+		pdata->chip.set_parts(data->mtd.size, &pdata->chip);
 	if (pdata->chip.partitions) {
 		data->parts = pdata->chip.partitions;
 		res = add_mtd_partitions(&data->mtd, data->parts,
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7f2d6935655..4030ebada49 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -563,6 +563,7 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
  * @options:		Option flags, e.g. 16bit buswidth
  * @ecclayout:		ecc layout info structure
  * @part_probe_types:	NULL-terminated array of probe types
+ * @set_parts:		platform specific function to set partitions
  * @priv:		hardware controller specific settings
  */
 struct platform_nand_chip {
@@ -574,6 +575,8 @@ struct platform_nand_chip {
 	int			chip_delay;
 	unsigned int		options;
 	const char		**part_probe_types;
+	void			(*set_parts)(uint64_t size,
+					struct platform_nand_chip *chip);
 	void			*priv;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 2ac6bf4ddc87c3b6b609f8fa82f6ebbffeac12f4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Fri, 5 Jun 2009 10:36:24 -0700
Subject: IB/mlx4: Add strong ordering to local inval and fast reg work
 requests

The ConnectX Programmer's Reference Manual states that the "SO" bit
must be set when posting Fast Register and Local Invalidate send work
requests.  When this bit is set, the work request will be executed
only after all previous work requests on the send queue have been
executed.  (If the bit is not set, Fast Register and Local Invalidate
WQEs may begin execution too early, which violates the defined
semantics for these operations)

This fixes the issue with NFS/RDMA reported in
<http://lists.openfabrics.org/pipermail/general/2009-April/059253.html>

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Cc: <stable@kernel.org>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/qp.c | 4 ++++
 include/linux/mlx4/qp.h         | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 20724aee76f..c4a02648c8a 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1585,12 +1585,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				break;
 
 			case IB_WR_LOCAL_INV:
+				ctrl->srcrb_flags |=
+					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
 				set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
 				wqe  += sizeof (struct mlx4_wqe_local_inval_seg);
 				size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
 				break;
 
 			case IB_WR_FAST_REG_MR:
+				ctrl->srcrb_flags |=
+					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
 				set_fmr_seg(wqe, wr);
 				wqe  += sizeof (struct mlx4_wqe_fmr_seg);
 				size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index bf8f11982da..9f29d86e5dc 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -165,6 +165,7 @@ enum {
 	MLX4_WQE_CTRL_IP_CSUM		= 1 << 4,
 	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
 	MLX4_WQE_CTRL_INS_VLAN		= 1 << 6,
+	MLX4_WQE_CTRL_STRONG_ORDER	= 1 << 7,
 };
 
 struct mlx4_wqe_ctrl_seg {
-- 
cgit v1.2.3-70-g09d2


From 5988af2319781bc8e0ce418affec4e09cfa77907 Mon Sep 17 00:00:00 2001
From: Rohit Hagargundgi <h.rohit@samsung.com>
Date: Tue, 12 May 2009 13:46:57 -0700
Subject: mtd: Flex-OneNAND support

Add support for Samsung Flex-OneNAND devices.

Flex-OneNAND combines SLC and MLC technologies into a single device.
SLC area provides increased reliability and speed, suitable for storing
code such as bootloader, kernel and root file system.  MLC area
provides high density and is suitable for storing user data.

SLC and MLC regions can be configured through kernel parameter.

[akpm@linux-foundation.org: export flexoand_region and onenand_addr]
Signed-off-by: Rohit Hagargundgi <h.rohit@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Vishak G <vishak.g@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/kernel-parameters.txt |  10 +
 drivers/mtd/onenand/onenand_base.c  | 857 ++++++++++++++++++++++++++++++++----
 drivers/mtd/onenand/onenand_bbt.c   |  14 +-
 drivers/mtd/onenand/onenand_sim.c   |  81 +++-
 include/linux/mtd/onenand.h         |  18 +
 include/linux/mtd/onenand_regs.h    |  20 +-
 6 files changed, 913 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e87bdbfbcc7..12df135f8af 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1380,6 +1380,16 @@ and is between 256 and 4096 characters. It is defined in the file
 	mtdparts=	[MTD]
 			See drivers/mtd/cmdlinepart.c.
 
+	onenand.bdry=	[HW,MTD] Flex-OneNAND Boundary Configuration
+
+			Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
+
+			boundary - index of last SLC block on Flex-OneNAND.
+				   The remaining blocks are configured as MLC blocks.
+			lock	 - Configure if Flex-OneNAND boundary should be locked.
+				   Once locked, the boundary cannot be changed.
+				   1 indicates lock status, 0 indicates unlock status.
+
 	mtdset=		[ARM]
 			ARM/S3C2412 JIVE boot control
 
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 2346857a275..8d4c9c25373 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -9,6 +9,10 @@
  *	auto-placement support, read-while load support, various fixes
  *	Copyright (C) Nokia Corporation, 2007
  *
+ *	Vishak G <vishak.g at samsung.com>, Rohit Hagargundgi <h.rohit at samsung.com>
+ *	Flex-OneNAND support
+ *	Copyright (C) Samsung Electronics, 2008
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -27,6 +31,30 @@
 
 #include <asm/io.h>
 
+/* Default Flex-OneNAND boundary and lock respectively */
+static int flex_bdry[MAX_DIES * 2] = { -1, 0, -1, 0 };
+
+/**
+ *  onenand_oob_128 - oob info for Flex-Onenand with 4KB page
+ *  For now, we expose only 64 out of 80 ecc bytes
+ */
+static struct nand_ecclayout onenand_oob_128 = {
+	.eccbytes	= 64,
+	.eccpos		= {
+		6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+		22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+		38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+		54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+		70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+		86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+		102, 103, 104, 105
+		},
+	.oobfree	= {
+		{2, 4}, {18, 4}, {34, 4}, {50, 4},
+		{66, 4}, {82, 4}, {98, 4}, {114, 4}
+	}
+};
+
 /**
  * onenand_oob_64 - oob info for large (2KB) page
  */
@@ -65,6 +93,14 @@ static const unsigned char ffchars[] = {
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	/* 48 */
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	/* 64 */
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	/* 80 */
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	/* 96 */
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	/* 112 */
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	/* 128 */
 };
 
 /**
@@ -170,6 +206,70 @@ static int onenand_buffer_address(int dataram1, int sectors, int count)
 	return ((bsa << ONENAND_BSA_SHIFT) | bsc);
 }
 
+/**
+ * flexonenand_block- For given address return block number
+ * @param this         - OneNAND device structure
+ * @param addr		- Address for which block number is needed
+ */
+static unsigned flexonenand_block(struct onenand_chip *this, loff_t addr)
+{
+	unsigned boundary, blk, die = 0;
+
+	if (ONENAND_IS_DDP(this) && addr >= this->diesize[0]) {
+		die = 1;
+		addr -= this->diesize[0];
+	}
+
+	boundary = this->boundary[die];
+
+	blk = addr >> (this->erase_shift - 1);
+	if (blk > boundary)
+		blk = (blk + boundary + 1) >> 1;
+
+	blk += die ? this->density_mask : 0;
+	return blk;
+}
+
+inline unsigned onenand_block(struct onenand_chip *this, loff_t addr)
+{
+	if (!FLEXONENAND(this))
+		return addr >> this->erase_shift;
+	return flexonenand_block(this, addr);
+}
+
+/**
+ * flexonenand_addr - Return address of the block
+ * @this:		OneNAND device structure
+ * @block:		Block number on Flex-OneNAND
+ *
+ * Return address of the block
+ */
+static loff_t flexonenand_addr(struct onenand_chip *this, int block)
+{
+	loff_t ofs = 0;
+	int die = 0, boundary;
+
+	if (ONENAND_IS_DDP(this) && block >= this->density_mask) {
+		block -= this->density_mask;
+		die = 1;
+		ofs = this->diesize[0];
+	}
+
+	boundary = this->boundary[die];
+	ofs += (loff_t)block << (this->erase_shift - 1);
+	if (block > (boundary + 1))
+		ofs += (loff_t)(block - boundary - 1) << (this->erase_shift - 1);
+	return ofs;
+}
+
+loff_t onenand_addr(struct onenand_chip *this, int block)
+{
+	if (!FLEXONENAND(this))
+		return (loff_t)block << this->erase_shift;
+	return flexonenand_addr(this, block);
+}
+EXPORT_SYMBOL(onenand_addr);
+
 /**
  * onenand_get_density - [DEFAULT] Get OneNAND density
  * @param dev_id	OneNAND device ID
@@ -182,6 +282,22 @@ static inline int onenand_get_density(int dev_id)
 	return (density & ONENAND_DEVICE_DENSITY_MASK);
 }
 
+/**
+ * flexonenand_region - [Flex-OneNAND] Return erase region of addr
+ * @param mtd		MTD device structure
+ * @param addr		address whose erase region needs to be identified
+ */
+int flexonenand_region(struct mtd_info *mtd, loff_t addr)
+{
+	int i;
+
+	for (i = 0; i < mtd->numeraseregions; i++)
+		if (addr < mtd->eraseregions[i].offset)
+			break;
+	return i - 1;
+}
+EXPORT_SYMBOL(flexonenand_region);
+
 /**
  * onenand_command - [DEFAULT] Send command to OneNAND device
  * @param mtd		MTD device structure
@@ -207,16 +323,28 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		page = -1;
 		break;
 
+	case FLEXONENAND_CMD_PI_ACCESS:
+		/* addr contains die index */
+		block = addr * this->density_mask;
+		page = -1;
+		break;
+
 	case ONENAND_CMD_ERASE:
 	case ONENAND_CMD_BUFFERRAM:
 	case ONENAND_CMD_OTP_ACCESS:
-		block = (int) (addr >> this->erase_shift);
+		block = onenand_block(this, addr);
 		page = -1;
 		break;
 
+	case FLEXONENAND_CMD_READ_PI:
+		cmd = ONENAND_CMD_READ;
+		block = addr * this->density_mask;
+		page = 0;
+		break;
+
 	default:
-		block = (int) (addr >> this->erase_shift);
-		page = (int) (addr >> this->page_shift);
+		block = onenand_block(this, addr);
+		page = (int) (addr - onenand_addr(this, block)) >> this->page_shift;
 
 		if (ONENAND_IS_2PLANE(this)) {
 			/* Make the even block number */
@@ -236,7 +364,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		value = onenand_bufferram_address(this, block);
 		this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2);
 
-		if (ONENAND_IS_2PLANE(this))
+		if (ONENAND_IS_MLC(this) || ONENAND_IS_2PLANE(this))
 			/* It is always BufferRAM0 */
 			ONENAND_SET_BUFFERRAM0(this);
 		else
@@ -258,13 +386,18 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 
 	if (page != -1) {
 		/* Now we use page size operation */
-		int sectors = 4, count = 4;
+		int sectors = 0, count = 0;
 		int dataram;
 
 		switch (cmd) {
+		case FLEXONENAND_CMD_RECOVER_LSB:
 		case ONENAND_CMD_READ:
 		case ONENAND_CMD_READOOB:
-			dataram = ONENAND_SET_NEXT_BUFFERRAM(this);
+			if (ONENAND_IS_MLC(this))
+				/* It is always BufferRAM0 */
+				dataram = ONENAND_SET_BUFFERRAM0(this);
+			else
+				dataram = ONENAND_SET_NEXT_BUFFERRAM(this);
 			break;
 
 		default:
@@ -292,6 +425,30 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 	return 0;
 }
 
+/**
+ * onenand_read_ecc - return ecc status
+ * @param this		onenand chip structure
+ */
+static inline int onenand_read_ecc(struct onenand_chip *this)
+{
+	int ecc, i, result = 0;
+
+	if (!FLEXONENAND(this))
+		return this->read_word(this->base + ONENAND_REG_ECC_STATUS);
+
+	for (i = 0; i < 4; i++) {
+		ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS + i);
+		if (likely(!ecc))
+			continue;
+		if (ecc & FLEXONENAND_UNCORRECTABLE_ERROR)
+			return ONENAND_ECC_2BIT_ALL;
+		else
+			result = ONENAND_ECC_1BIT_ALL;
+	}
+
+	return result;
+}
+
 /**
  * onenand_wait - [DEFAULT] wait until the command is done
  * @param mtd		MTD device structure
@@ -331,14 +488,14 @@ static int onenand_wait(struct mtd_info *mtd, int state)
 	 * power off recovery (POR) test, it should read ECC status first
 	 */
 	if (interrupt & ONENAND_INT_READ) {
-		int ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS);
+		int ecc = onenand_read_ecc(this);
 		if (ecc) {
 			if (ecc & ONENAND_ECC_2BIT_ALL) {
 				printk(KERN_ERR "onenand_wait: ECC error = 0x%04x\n", ecc);
 				mtd->ecc_stats.failed++;
 				return -EBADMSG;
 			} else if (ecc & ONENAND_ECC_1BIT_ALL) {
-				printk(KERN_INFO "onenand_wait: correctable ECC error = 0x%04x\n", ecc);
+				printk(KERN_DEBUG "onenand_wait: correctable ECC error = 0x%04x\n", ecc);
 				mtd->ecc_stats.corrected++;
 			}
 		}
@@ -656,7 +813,7 @@ static int onenand_check_bufferram(struct mtd_info *mtd, loff_t addr)
 
 	if (found && ONENAND_IS_DDP(this)) {
 		/* Select DataRAM for DDP */
-		int block = (int) (addr >> this->erase_shift);
+		int block = onenand_block(this, addr);
 		int value = onenand_bufferram_address(this, block);
 		this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2);
 	}
@@ -815,6 +972,149 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col
 	return 0;
 }
 
+/**
+ * onenand_recover_lsb - [Flex-OneNAND] Recover LSB page data
+ * @param mtd		MTD device structure
+ * @param addr		address to recover
+ * @param status	return value from onenand_wait / onenand_bbt_wait
+ *
+ * MLC NAND Flash cell has paired pages - LSB page and MSB page. LSB page has
+ * lower page address and MSB page has higher page address in paired pages.
+ * If power off occurs during MSB page program, the paired LSB page data can
+ * become corrupt. LSB page recovery read is a way to read LSB page though page
+ * data are corrupted. When uncorrectable error occurs as a result of LSB page
+ * read after power up, issue LSB page recovery read.
+ */
+static int onenand_recover_lsb(struct mtd_info *mtd, loff_t addr, int status)
+{
+	struct onenand_chip *this = mtd->priv;
+	int i;
+
+	/* Recovery is only for Flex-OneNAND */
+	if (!FLEXONENAND(this))
+		return status;
+
+	/* check if we failed due to uncorrectable error */
+	if (status != -EBADMSG && status != ONENAND_BBT_READ_ECC_ERROR)
+		return status;
+
+	/* check if address lies in MLC region */
+	i = flexonenand_region(mtd, addr);
+	if (mtd->eraseregions[i].erasesize < (1 << this->erase_shift))
+		return status;
+
+	/* We are attempting to reread, so decrement stats.failed
+	 * which was incremented by onenand_wait due to read failure
+	 */
+	printk(KERN_INFO "onenand_recover_lsb: Attempting to recover from uncorrectable read\n");
+	mtd->ecc_stats.failed--;
+
+	/* Issue the LSB page recovery command */
+	this->command(mtd, FLEXONENAND_CMD_RECOVER_LSB, addr, this->writesize);
+	return this->wait(mtd, FL_READING);
+}
+
+/**
+ * onenand_mlc_read_ops_nolock - MLC OneNAND read main and/or out-of-band
+ * @param mtd		MTD device structure
+ * @param from		offset to read from
+ * @param ops:		oob operation description structure
+ *
+ * MLC OneNAND / Flex-OneNAND has 4KB page size and 4KB dataram.
+ * So, read-while-load is not present.
+ */
+static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from,
+				struct mtd_oob_ops *ops)
+{
+	struct onenand_chip *this = mtd->priv;
+	struct mtd_ecc_stats stats;
+	size_t len = ops->len;
+	size_t ooblen = ops->ooblen;
+	u_char *buf = ops->datbuf;
+	u_char *oobbuf = ops->oobbuf;
+	int read = 0, column, thislen;
+	int oobread = 0, oobcolumn, thisooblen, oobsize;
+	int ret = 0;
+	int writesize = this->writesize;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "onenand_mlc_read_ops_nolock: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len);
+
+	if (ops->mode == MTD_OOB_AUTO)
+		oobsize = this->ecclayout->oobavail;
+	else
+		oobsize = mtd->oobsize;
+
+	oobcolumn = from & (mtd->oobsize - 1);
+
+	/* Do not allow reads past end of device */
+	if (from + len > mtd->size) {
+		printk(KERN_ERR "onenand_mlc_read_ops_nolock: Attempt read beyond end of device\n");
+		ops->retlen = 0;
+		ops->oobretlen = 0;
+		return -EINVAL;
+	}
+
+	stats = mtd->ecc_stats;
+
+	while (read < len) {
+		cond_resched();
+
+		thislen = min_t(int, writesize, len - read);
+
+		column = from & (writesize - 1);
+		if (column + thislen > writesize)
+			thislen = writesize - column;
+
+		if (!onenand_check_bufferram(mtd, from)) {
+			this->command(mtd, ONENAND_CMD_READ, from, writesize);
+
+			ret = this->wait(mtd, FL_READING);
+			if (unlikely(ret))
+				ret = onenand_recover_lsb(mtd, from, ret);
+			onenand_update_bufferram(mtd, from, !ret);
+			if (ret == -EBADMSG)
+				ret = 0;
+		}
+
+		this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen);
+		if (oobbuf) {
+			thisooblen = oobsize - oobcolumn;
+			thisooblen = min_t(int, thisooblen, ooblen - oobread);
+
+			if (ops->mode == MTD_OOB_AUTO)
+				onenand_transfer_auto_oob(mtd, oobbuf, oobcolumn, thisooblen);
+			else
+				this->read_bufferram(mtd, ONENAND_SPARERAM, oobbuf, oobcolumn, thisooblen);
+			oobread += thisooblen;
+			oobbuf += thisooblen;
+			oobcolumn = 0;
+		}
+
+		read += thislen;
+		if (read == len)
+			break;
+
+		from += thislen;
+		buf += thislen;
+	}
+
+	/*
+	 * Return success, if no ECC failures, else -EBADMSG
+	 * fs driver will take care of that, because
+	 * retlen == desired len and result == -EBADMSG
+	 */
+	ops->retlen = read;
+	ops->oobretlen = oobread;
+
+	if (ret)
+		return ret;
+
+	if (mtd->ecc_stats.failed - stats.failed)
+		return -EBADMSG;
+
+	return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0;
+}
+
 /**
  * onenand_read_ops_nolock - [OneNAND Interface] OneNAND read main and/or out-of-band
  * @param mtd		MTD device structure
@@ -962,7 +1262,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
 	size_t len = ops->ooblen;
 	mtd_oob_mode_t mode = ops->mode;
 	u_char *buf = ops->oobbuf;
-	int ret = 0;
+	int ret = 0, readcmd;
 
 	from += ops->ooboffs;
 
@@ -993,17 +1293,22 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
 
 	stats = mtd->ecc_stats;
 
+	readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
+
 	while (read < len) {
 		cond_resched();
 
 		thislen = oobsize - column;
 		thislen = min_t(int, thislen, len);
 
-		this->command(mtd, ONENAND_CMD_READOOB, from, mtd->oobsize);
+		this->command(mtd, readcmd, from, mtd->oobsize);
 
 		onenand_update_bufferram(mtd, from, 0);
 
 		ret = this->wait(mtd, FL_READING);
+		if (unlikely(ret))
+			ret = onenand_recover_lsb(mtd, from, ret);
+
 		if (ret && ret != -EBADMSG) {
 			printk(KERN_ERR "onenand_read_oob_nolock: read failed = 0x%x\n", ret);
 			break;
@@ -1053,6 +1358,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
 static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
 	size_t *retlen, u_char *buf)
 {
+	struct onenand_chip *this = mtd->priv;
 	struct mtd_oob_ops ops = {
 		.len	= len,
 		.ooblen	= 0,
@@ -1062,7 +1368,9 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
 	int ret;
 
 	onenand_get_device(mtd, FL_READING);
-	ret = onenand_read_ops_nolock(mtd, from, &ops);
+	ret = ONENAND_IS_MLC(this) ?
+		onenand_mlc_read_ops_nolock(mtd, from, &ops) :
+		onenand_read_ops_nolock(mtd, from, &ops);
 	onenand_release_device(mtd);
 
 	*retlen = ops.retlen;
@@ -1080,6 +1388,7 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
 static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
 			    struct mtd_oob_ops *ops)
 {
+	struct onenand_chip *this = mtd->priv;
 	int ret;
 
 	switch (ops->mode) {
@@ -1094,7 +1403,9 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
 
 	onenand_get_device(mtd, FL_READING);
 	if (ops->datbuf)
-		ret = onenand_read_ops_nolock(mtd, from, ops);
+		ret = ONENAND_IS_MLC(this) ?
+			onenand_mlc_read_ops_nolock(mtd, from, ops) :
+			onenand_read_ops_nolock(mtd, from, ops);
 	else
 		ret = onenand_read_oob_nolock(mtd, from, ops);
 	onenand_release_device(mtd);
@@ -1128,11 +1439,11 @@ static int onenand_bbt_wait(struct mtd_info *mtd, int state)
 	ctrl = this->read_word(this->base + ONENAND_REG_CTRL_STATUS);
 
 	if (interrupt & ONENAND_INT_READ) {
-		int ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS);
+		int ecc = onenand_read_ecc(this);
 		if (ecc & ONENAND_ECC_2BIT_ALL) {
 			printk(KERN_INFO "onenand_bbt_wait: ecc error = 0x%04x"
 				", controller error 0x%04x\n", ecc, ctrl);
-			return ONENAND_BBT_READ_ERROR;
+			return ONENAND_BBT_READ_ECC_ERROR;
 		}
 	} else {
 		printk(KERN_ERR "onenand_bbt_wait: read timeout!"
@@ -1163,7 +1474,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
 {
 	struct onenand_chip *this = mtd->priv;
 	int read = 0, thislen, column;
-	int ret = 0;
+	int ret = 0, readcmd;
 	size_t len = ops->ooblen;
 	u_char *buf = ops->oobbuf;
 
@@ -1183,17 +1494,22 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
 
 	column = from & (mtd->oobsize - 1);
 
+	readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
+
 	while (read < len) {
 		cond_resched();
 
 		thislen = mtd->oobsize - column;
 		thislen = min_t(int, thislen, len);
 
-		this->command(mtd, ONENAND_CMD_READOOB, from, mtd->oobsize);
+		this->command(mtd, readcmd, from, mtd->oobsize);
 
 		onenand_update_bufferram(mtd, from, 0);
 
 		ret = onenand_bbt_wait(mtd, FL_READING);
+		if (unlikely(ret))
+			ret = onenand_recover_lsb(mtd, from, ret);
+
 		if (ret)
 			break;
 
@@ -1230,9 +1546,11 @@ static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to
 {
 	struct onenand_chip *this = mtd->priv;
 	u_char *oob_buf = this->oob_buf;
-	int status, i;
+	int status, i, readcmd;
+
+	readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
 
-	this->command(mtd, ONENAND_CMD_READOOB, to, mtd->oobsize);
+	this->command(mtd, readcmd, to, mtd->oobsize);
 	onenand_update_bufferram(mtd, to, 0);
 	status = this->wait(mtd, FL_READING);
 	if (status)
@@ -1633,7 +1951,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 {
 	struct onenand_chip *this = mtd->priv;
 	int column, ret = 0, oobsize;
-	int written = 0;
+	int written = 0, oobcmd;
 	u_char *oobbuf;
 	size_t len = ops->ooblen;
 	const u_char *buf = ops->oobbuf;
@@ -1675,6 +1993,8 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 
 	oobbuf = this->oob_buf;
 
+	oobcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB;
+
 	/* Loop until all data write */
 	while (written < len) {
 		int thislen = min_t(int, oobsize, len - written);
@@ -1692,7 +2012,14 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 			memcpy(oobbuf + column, buf, thislen);
 		this->write_bufferram(mtd, ONENAND_SPARERAM, oobbuf, 0, mtd->oobsize);
 
-		this->command(mtd, ONENAND_CMD_PROGOOB, to, mtd->oobsize);
+		if (ONENAND_IS_MLC(this)) {
+			/* Set main area of DataRAM to 0xff*/
+			memset(this->page_buf, 0xff, mtd->writesize);
+			this->write_bufferram(mtd, ONENAND_DATARAM,
+					 this->page_buf, 0, mtd->writesize);
+		}
+
+		this->command(mtd, oobcmd, to, mtd->oobsize);
 
 		onenand_update_bufferram(mtd, to, 0);
 		if (ONENAND_IS_2PLANE(this)) {
@@ -1815,29 +2142,48 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
 	struct onenand_chip *this = mtd->priv;
 	unsigned int block_size;
-	loff_t addr;
-	int len;
-	int ret = 0;
+	loff_t addr = instr->addr;
+	loff_t len = instr->len;
+	int ret = 0, i;
+	struct mtd_erase_region_info *region = NULL;
+	loff_t region_end = 0;
 
 	DEBUG(MTD_DEBUG_LEVEL3, "onenand_erase: start = 0x%012llx, len = %llu\n", (unsigned long long) instr->addr, (unsigned long long) instr->len);
 
-	block_size = (1 << this->erase_shift);
-
-	/* Start address must align on block boundary */
-	if (unlikely(instr->addr & (block_size - 1))) {
-		printk(KERN_ERR "onenand_erase: Unaligned address\n");
+	/* Do not allow erase past end of device */
+	if (unlikely((len + addr) > mtd->size)) {
+		printk(KERN_ERR "onenand_erase: Erase past end of device\n");
 		return -EINVAL;
 	}
 
-	/* Length must align on block boundary */
-	if (unlikely(instr->len & (block_size - 1))) {
-		printk(KERN_ERR "onenand_erase: Length not block aligned\n");
-		return -EINVAL;
+	if (FLEXONENAND(this)) {
+		/* Find the eraseregion of this address */
+		i = flexonenand_region(mtd, addr);
+		region = &mtd->eraseregions[i];
+
+		block_size = region->erasesize;
+		region_end = region->offset + region->erasesize * region->numblocks;
+
+		/* Start address within region must align on block boundary.
+		 * Erase region's start offset is always block start address.
+		 */
+		if (unlikely((addr - region->offset) & (block_size - 1))) {
+			printk(KERN_ERR "onenand_erase: Unaligned address\n");
+			return -EINVAL;
+		}
+	} else {
+		block_size = 1 << this->erase_shift;
+
+		/* Start address must align on block boundary */
+		if (unlikely(addr & (block_size - 1))) {
+			printk(KERN_ERR "onenand_erase: Unaligned address\n");
+			return -EINVAL;
+		}
 	}
 
-	/* Do not allow erase past end of device */
-	if (unlikely((instr->len + instr->addr) > mtd->size)) {
-		printk(KERN_ERR "onenand_erase: Erase past end of device\n");
+	/* Length must align on block boundary */
+	if (unlikely(len & (block_size - 1))) {
+		printk(KERN_ERR "onenand_erase: Length not block aligned\n");
 		return -EINVAL;
 	}
 
@@ -1847,9 +2193,6 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 	onenand_get_device(mtd, FL_ERASING);
 
 	/* Loop throught the pages */
-	len = instr->len;
-	addr = instr->addr;
-
 	instr->state = MTD_ERASING;
 
 	while (len) {
@@ -1869,7 +2212,8 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 		ret = this->wait(mtd, FL_ERASING);
 		/* Check, if it is write protected */
 		if (ret) {
-			printk(KERN_ERR "onenand_erase: Failed erase, block %d\n", (unsigned) (addr >> this->erase_shift));
+			printk(KERN_ERR "onenand_erase: Failed erase, block %d\n",
+						 onenand_block(this, addr));
 			instr->state = MTD_ERASE_FAILED;
 			instr->fail_addr = addr;
 			goto erase_exit;
@@ -1877,6 +2221,22 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 		len -= block_size;
 		addr += block_size;
+
+		if (addr == region_end) {
+			if (!len)
+				break;
+			region++;
+
+			block_size = region->erasesize;
+			region_end = region->offset + region->erasesize * region->numblocks;
+
+			if (len & (block_size - 1)) {
+				/* FIXME: This should be handled at MTD partitioning level. */
+				printk(KERN_ERR "onenand_erase: Unaligned address\n");
+				goto erase_exit;
+			}
+		}
+
 	}
 
 	instr->state = MTD_ERASE_DONE;
@@ -1955,13 +2315,17 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	int block;
 
 	/* Get block number */
-	block = ((int) ofs) >> bbm->bbt_erase_shift;
+	block = onenand_block(this, ofs);
         if (bbm->bbt)
                 bbm->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1);
 
         /* We write two bytes, so we dont have to mess with 16 bit access */
         ofs += mtd->oobsize + (bbm->badblockpos & ~0x01);
-        return onenand_write_oob_nolock(mtd, ofs, &ops);
+	/* FIXME : What to do when marking SLC block in partition
+	 * 	   with MLC erasesize? For now, it is not advisable to
+	 *	   create partitions containing both SLC and MLC regions.
+	 */
+	return onenand_write_oob_nolock(mtd, ofs, &ops);
 }
 
 /**
@@ -2005,8 +2369,8 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int
 	int start, end, block, value, status;
 	int wp_status_mask;
 
-	start = ofs >> this->erase_shift;
-	end = len >> this->erase_shift;
+	start = onenand_block(this, ofs);
+	end = onenand_block(this, ofs + len) - 1;
 
 	if (cmd == ONENAND_CMD_LOCK)
 		wp_status_mask = ONENAND_WP_LS;
@@ -2018,7 +2382,7 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int
 		/* Set start block address */
 		this->write_word(start, this->base + ONENAND_REG_START_BLOCK_ADDRESS);
 		/* Set end block address */
-		this->write_word(start + end - 1, this->base + ONENAND_REG_END_BLOCK_ADDRESS);
+		this->write_word(end, this->base +  ONENAND_REG_END_BLOCK_ADDRESS);
 		/* Write lock command */
 		this->command(mtd, cmd, 0, 0);
 
@@ -2039,7 +2403,7 @@ static int onenand_do_lock_cmd(struct mtd_info *mtd, loff_t ofs, size_t len, int
 	}
 
 	/* Block lock scheme */
-	for (block = start; block < start + end; block++) {
+	for (block = start; block < end + 1; block++) {
 		/* Set block address */
 		value = onenand_block_address(this, block);
 		this->write_word(value, this->base + ONENAND_REG_START_ADDRESS1);
@@ -2147,7 +2511,7 @@ static void onenand_unlock_all(struct mtd_info *mtd)
 {
 	struct onenand_chip *this = mtd->priv;
 	loff_t ofs = 0;
-	size_t len = this->chipsize;
+	loff_t len = mtd->size;
 
 	if (this->options & ONENAND_HAS_UNLOCK_ALL) {
 		/* Set start block address */
@@ -2168,7 +2532,7 @@ static void onenand_unlock_all(struct mtd_info *mtd)
 			return;
 
 		/* Workaround for all block unlock in DDP */
-		if (ONENAND_IS_DDP(this)) {
+		if (ONENAND_IS_DDP(this) && !FLEXONENAND(this)) {
 			/* All blocks on another chip */
 			ofs = this->chipsize >> 1;
 			len = this->chipsize >> 1;
@@ -2210,7 +2574,9 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len,
 	this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
 	this->wait(mtd, FL_OTPING);
 
-	ret = onenand_read_ops_nolock(mtd, from, &ops);
+	ret = ONENAND_IS_MLC(this) ?
+		onenand_mlc_read_ops_nolock(mtd, from, &ops) :
+		onenand_read_ops_nolock(mtd, from, &ops);
 
 	/* Exit OTP access mode */
 	this->command(mtd, ONENAND_CMD_RESET, 0, 0);
@@ -2277,21 +2643,32 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len,
 		size_t *retlen, u_char *buf)
 {
 	struct onenand_chip *this = mtd->priv;
-	struct mtd_oob_ops ops = {
-		.mode = MTD_OOB_PLACE,
-		.ooblen = len,
-		.oobbuf = buf,
-		.ooboffs = 0,
-	};
+	struct mtd_oob_ops ops;
 	int ret;
 
 	/* Enter OTP access mode */
 	this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
 	this->wait(mtd, FL_OTPING);
 
-	ret = onenand_write_oob_nolock(mtd, from, &ops);
-
-	*retlen = ops.oobretlen;
+	if (FLEXONENAND(this)) {
+		/*
+		 * For Flex-OneNAND, we write lock mark to 1st word of sector 4 of
+		 * main area of page 49.
+		 */
+		ops.len = mtd->writesize;
+		ops.ooblen = 0;
+		ops.datbuf = buf;
+		ops.oobbuf = NULL;
+		ret = onenand_write_ops_nolock(mtd, mtd->writesize * 49, &ops);
+		*retlen = ops.retlen;
+	} else {
+		ops.mode = MTD_OOB_PLACE;
+		ops.ooblen = len;
+		ops.oobbuf = buf;
+		ops.ooboffs = 0;
+		ret = onenand_write_oob_nolock(mtd, from, &ops);
+		*retlen = ops.oobretlen;
+	}
 
 	/* Exit OTP access mode */
 	this->command(mtd, ONENAND_CMD_RESET, 0, 0);
@@ -2475,27 +2852,34 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 			size_t len)
 {
 	struct onenand_chip *this = mtd->priv;
-	u_char *oob_buf = this->oob_buf;
+	u_char *buf = FLEXONENAND(this) ? this->page_buf : this->oob_buf;
 	size_t retlen;
 	int ret;
 
-	memset(oob_buf, 0xff, mtd->oobsize);
+	memset(buf, 0xff, FLEXONENAND(this) ? this->writesize
+						 : mtd->oobsize);
 	/*
 	 * Note: OTP lock operation
 	 *       OTP block : 0xXXFC
 	 *       1st block : 0xXXF3 (If chip support)
 	 *       Both      : 0xXXF0 (If chip support)
 	 */
-	oob_buf[ONENAND_OTP_LOCK_OFFSET] = 0xFC;
+	if (FLEXONENAND(this))
+		buf[FLEXONENAND_OTP_LOCK_OFFSET] = 0xFC;
+	else
+		buf[ONENAND_OTP_LOCK_OFFSET] = 0xFC;
 
 	/*
 	 * Write lock mark to 8th word of sector0 of page0 of the spare0.
 	 * We write 16 bytes spare area instead of 2 bytes.
+	 * For Flex-OneNAND, we write lock mark to 1st word of sector 4 of
+	 * main area of page 49.
 	 */
+
 	from = 0;
-	len = 16;
+	len = FLEXONENAND(this) ? mtd->writesize : 16;
 
-	ret = onenand_otp_walk(mtd, from, len, &retlen, oob_buf, do_otp_lock, MTD_OTP_USER);
+	ret = onenand_otp_walk(mtd, from, len, &retlen, buf, do_otp_lock, MTD_OTP_USER);
 
 	return ret ? : retlen;
 }
@@ -2542,6 +2926,14 @@ static void onenand_check_features(struct mtd_info *mtd)
 		break;
 	}
 
+	if (ONENAND_IS_MLC(this))
+		this->options &= ~ONENAND_HAS_2PLANE;
+
+	if (FLEXONENAND(this)) {
+		this->options &= ~ONENAND_HAS_CONT_LOCK;
+		this->options |= ONENAND_HAS_UNLOCK_ALL;
+	}
+
 	if (this->options & ONENAND_HAS_CONT_LOCK)
 		printk(KERN_DEBUG "Lock scheme is Continuous Lock\n");
 	if (this->options & ONENAND_HAS_UNLOCK_ALL)
@@ -2559,14 +2951,16 @@ static void onenand_check_features(struct mtd_info *mtd)
  */
 static void onenand_print_device_info(int device, int version)
 {
-        int vcc, demuxed, ddp, density;
+	int vcc, demuxed, ddp, density, flexonenand;
 
         vcc = device & ONENAND_DEVICE_VCC_MASK;
         demuxed = device & ONENAND_DEVICE_IS_DEMUX;
         ddp = device & ONENAND_DEVICE_IS_DDP;
         density = onenand_get_density(device);
-        printk(KERN_INFO "%sOneNAND%s %dMB %sV 16-bit (0x%02x)\n",
-                demuxed ? "" : "Muxed ",
+	flexonenand = device & DEVICE_IS_FLEXONENAND;
+	printk(KERN_INFO "%s%sOneNAND%s %dMB %sV 16-bit (0x%02x)\n",
+		demuxed ? "" : "Muxed ",
+		flexonenand ? "Flex-" : "",
                 ddp ? "(DDP)" : "",
                 (16 << density),
                 vcc ? "2.65/3.3" : "1.8",
@@ -2605,6 +2999,280 @@ static int onenand_check_maf(int manuf)
 	return (i == size);
 }
 
+/**
+* flexonenand_get_boundary	- Reads the SLC boundary
+* @param onenand_info		- onenand info structure
+**/
+static int flexonenand_get_boundary(struct mtd_info *mtd)
+{
+	struct onenand_chip *this = mtd->priv;
+	unsigned die, bdry;
+	int ret, syscfg, locked;
+
+	/* Disable ECC */
+	syscfg = this->read_word(this->base + ONENAND_REG_SYS_CFG1);
+	this->write_word((syscfg | 0x0100), this->base + ONENAND_REG_SYS_CFG1);
+
+	for (die = 0; die < this->dies; die++) {
+		this->command(mtd, FLEXONENAND_CMD_PI_ACCESS, die, 0);
+		this->wait(mtd, FL_SYNCING);
+
+		this->command(mtd, FLEXONENAND_CMD_READ_PI, die, 0);
+		ret = this->wait(mtd, FL_READING);
+
+		bdry = this->read_word(this->base + ONENAND_DATARAM);
+		if ((bdry >> FLEXONENAND_PI_UNLOCK_SHIFT) == 3)
+			locked = 0;
+		else
+			locked = 1;
+		this->boundary[die] = bdry & FLEXONENAND_PI_MASK;
+
+		this->command(mtd, ONENAND_CMD_RESET, 0, 0);
+		ret = this->wait(mtd, FL_RESETING);
+
+		printk(KERN_INFO "Die %d boundary: %d%s\n", die,
+		       this->boundary[die], locked ? "(Locked)" : "(Unlocked)");
+	}
+
+	/* Enable ECC */
+	this->write_word(syscfg, this->base + ONENAND_REG_SYS_CFG1);
+	return 0;
+}
+
+/**
+ * flexonenand_get_size - Fill up fields in onenand_chip and mtd_info
+ * 			  boundary[], diesize[], mtd->size, mtd->erasesize
+ * @param mtd		- MTD device structure
+ */
+static void flexonenand_get_size(struct mtd_info *mtd)
+{
+	struct onenand_chip *this = mtd->priv;
+	int die, i, eraseshift, density;
+	int blksperdie, maxbdry;
+	loff_t ofs;
+
+	density = onenand_get_density(this->device_id);
+	blksperdie = ((loff_t)(16 << density) << 20) >> (this->erase_shift);
+	blksperdie >>= ONENAND_IS_DDP(this) ? 1 : 0;
+	maxbdry = blksperdie - 1;
+	eraseshift = this->erase_shift - 1;
+
+	mtd->numeraseregions = this->dies << 1;
+
+	/* This fills up the device boundary */
+	flexonenand_get_boundary(mtd);
+	die = ofs = 0;
+	i = -1;
+	for (; die < this->dies; die++) {
+		if (!die || this->boundary[die-1] != maxbdry) {
+			i++;
+			mtd->eraseregions[i].offset = ofs;
+			mtd->eraseregions[i].erasesize = 1 << eraseshift;
+			mtd->eraseregions[i].numblocks =
+							this->boundary[die] + 1;
+			ofs += mtd->eraseregions[i].numblocks << eraseshift;
+			eraseshift++;
+		} else {
+			mtd->numeraseregions -= 1;
+			mtd->eraseregions[i].numblocks +=
+							this->boundary[die] + 1;
+			ofs += (this->boundary[die] + 1) << (eraseshift - 1);
+		}
+		if (this->boundary[die] != maxbdry) {
+			i++;
+			mtd->eraseregions[i].offset = ofs;
+			mtd->eraseregions[i].erasesize = 1 << eraseshift;
+			mtd->eraseregions[i].numblocks = maxbdry ^
+							 this->boundary[die];
+			ofs += mtd->eraseregions[i].numblocks << eraseshift;
+			eraseshift--;
+		} else
+			mtd->numeraseregions -= 1;
+	}
+
+	/* Expose MLC erase size except when all blocks are SLC */
+	mtd->erasesize = 1 << this->erase_shift;
+	if (mtd->numeraseregions == 1)
+		mtd->erasesize >>= 1;
+
+	printk(KERN_INFO "Device has %d eraseregions\n", mtd->numeraseregions);
+	for (i = 0; i < mtd->numeraseregions; i++)
+		printk(KERN_INFO "[offset: 0x%08x, erasesize: 0x%05x,"
+			" numblocks: %04u]\n",
+			(unsigned int) mtd->eraseregions[i].offset,
+			mtd->eraseregions[i].erasesize,
+			mtd->eraseregions[i].numblocks);
+
+	for (die = 0, mtd->size = 0; die < this->dies; die++) {
+		this->diesize[die] = (loff_t)blksperdie << this->erase_shift;
+		this->diesize[die] -= (loff_t)(this->boundary[die] + 1)
+						 << (this->erase_shift - 1);
+		mtd->size += this->diesize[die];
+	}
+}
+
+/**
+ * flexonenand_check_blocks_erased - Check if blocks are erased
+ * @param mtd_info	- mtd info structure
+ * @param start		- first erase block to check
+ * @param end		- last erase block to check
+ *
+ * Converting an unerased block from MLC to SLC
+ * causes byte values to change. Since both data and its ECC
+ * have changed, reads on the block give uncorrectable error.
+ * This might lead to the block being detected as bad.
+ *
+ * Avoid this by ensuring that the block to be converted is
+ * erased.
+ */
+static int flexonenand_check_blocks_erased(struct mtd_info *mtd, int start, int end)
+{
+	struct onenand_chip *this = mtd->priv;
+	int i, ret;
+	int block;
+	struct mtd_oob_ops ops = {
+		.mode = MTD_OOB_PLACE,
+		.ooboffs = 0,
+		.ooblen	= mtd->oobsize,
+		.datbuf	= NULL,
+		.oobbuf	= this->oob_buf,
+	};
+	loff_t addr;
+
+	printk(KERN_DEBUG "Check blocks from %d to %d\n", start, end);
+
+	for (block = start; block <= end; block++) {
+		addr = flexonenand_addr(this, block);
+		if (onenand_block_isbad_nolock(mtd, addr, 0))
+			continue;
+
+		/*
+		 * Since main area write results in ECC write to spare,
+		 * it is sufficient to check only ECC bytes for change.
+		 */
+		ret = onenand_read_oob_nolock(mtd, addr, &ops);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < mtd->oobsize; i++)
+			if (this->oob_buf[i] != 0xff)
+				break;
+
+		if (i != mtd->oobsize) {
+			printk(KERN_WARNING "Block %d not erased.\n", block);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * flexonenand_set_boundary	- Writes the SLC boundary
+ * @param mtd			- mtd info structure
+ */
+int flexonenand_set_boundary(struct mtd_info *mtd, int die,
+				    int boundary, int lock)
+{
+	struct onenand_chip *this = mtd->priv;
+	int ret, density, blksperdie, old, new, thisboundary;
+	loff_t addr;
+
+	/* Change only once for SDP Flex-OneNAND */
+	if (die && (!ONENAND_IS_DDP(this)))
+		return 0;
+
+	/* boundary value of -1 indicates no required change */
+	if (boundary < 0 || boundary == this->boundary[die])
+		return 0;
+
+	density = onenand_get_density(this->device_id);
+	blksperdie = ((16 << density) << 20) >> this->erase_shift;
+	blksperdie >>= ONENAND_IS_DDP(this) ? 1 : 0;
+
+	if (boundary >= blksperdie) {
+		printk(KERN_ERR "flexonenand_set_boundary: Invalid boundary value. "
+				"Boundary not changed.\n");
+		return -EINVAL;
+	}
+
+	/* Check if converting blocks are erased */
+	old = this->boundary[die] + (die * this->density_mask);
+	new = boundary + (die * this->density_mask);
+	ret = flexonenand_check_blocks_erased(mtd, min(old, new) + 1, max(old, new));
+	if (ret) {
+		printk(KERN_ERR "flexonenand_set_boundary: Please erase blocks before boundary change\n");
+		return ret;
+	}
+
+	this->command(mtd, FLEXONENAND_CMD_PI_ACCESS, die, 0);
+	this->wait(mtd, FL_SYNCING);
+
+	/* Check is boundary is locked */
+	this->command(mtd, FLEXONENAND_CMD_READ_PI, die, 0);
+	ret = this->wait(mtd, FL_READING);
+
+	thisboundary = this->read_word(this->base + ONENAND_DATARAM);
+	if ((thisboundary >> FLEXONENAND_PI_UNLOCK_SHIFT) != 3) {
+		printk(KERN_ERR "flexonenand_set_boundary: boundary locked\n");
+		ret = 1;
+		goto out;
+	}
+
+	printk(KERN_INFO "flexonenand_set_boundary: Changing die %d boundary: %d%s\n",
+			die, boundary, lock ? "(Locked)" : "(Unlocked)");
+
+	addr = die ? this->diesize[0] : 0;
+
+	boundary &= FLEXONENAND_PI_MASK;
+	boundary |= lock ? 0 : (3 << FLEXONENAND_PI_UNLOCK_SHIFT);
+
+	this->command(mtd, ONENAND_CMD_ERASE, addr, 0);
+	ret = this->wait(mtd, FL_ERASING);
+	if (ret) {
+		printk(KERN_ERR "flexonenand_set_boundary: Failed PI erase for Die %d\n", die);
+		goto out;
+	}
+
+	this->write_word(boundary, this->base + ONENAND_DATARAM);
+	this->command(mtd, ONENAND_CMD_PROG, addr, 0);
+	ret = this->wait(mtd, FL_WRITING);
+	if (ret) {
+		printk(KERN_ERR "flexonenand_set_boundary: Failed PI write for Die %d\n", die);
+		goto out;
+	}
+
+	this->command(mtd, FLEXONENAND_CMD_PI_UPDATE, die, 0);
+	ret = this->wait(mtd, FL_WRITING);
+out:
+	this->write_word(ONENAND_CMD_RESET, this->base + ONENAND_REG_COMMAND);
+	this->wait(mtd, FL_RESETING);
+	if (!ret)
+		/* Recalculate device size on boundary change*/
+		flexonenand_get_size(mtd);
+
+	return ret;
+}
+
+/**
+ * flexonenand_setup - 	capture Flex-OneNAND boundary and lock
+ * 			values  passed as kernel parameters
+ * @param s	kernel parameter string
+ */
+static int flexonenand_setup(char *s)
+{
+	int ints[5], i;
+
+	s = get_options(s, 5, ints);
+
+	for (i = 0; i < ints[0]; i++)
+		flex_bdry[i] = ints[i + 1];
+
+	return 1;
+}
+
+__setup("onenand.bdry=", flexonenand_setup);
+
 /**
  * onenand_probe - [OneNAND Interface] Probe the OneNAND device
  * @param mtd		MTD device structure
@@ -2647,6 +3315,7 @@ static int onenand_probe(struct mtd_info *mtd)
 	maf_id = this->read_word(this->base + ONENAND_REG_MANUFACTURER_ID);
 	dev_id = this->read_word(this->base + ONENAND_REG_DEVICE_ID);
 	ver_id = this->read_word(this->base + ONENAND_REG_VERSION_ID);
+	this->technology = this->read_word(this->base + ONENAND_REG_TECHNOLOGY);
 
 	/* Check OneNAND device */
 	if (maf_id != bram_maf_id || dev_id != bram_dev_id)
@@ -2658,29 +3327,55 @@ static int onenand_probe(struct mtd_info *mtd)
 	this->version_id = ver_id;
 
 	density = onenand_get_density(dev_id);
+	if (FLEXONENAND(this)) {
+		this->dies = ONENAND_IS_DDP(this) ? 2 : 1;
+		/* Maximum possible erase regions */
+		mtd->numeraseregions = this->dies << 1;
+		mtd->eraseregions = kzalloc(sizeof(struct mtd_erase_region_info)
+					* (this->dies << 1), GFP_KERNEL);
+		if (!mtd->eraseregions)
+			return -ENOMEM;
+	}
+
+	/*
+	 * For Flex-OneNAND, chipsize represents maximum possible device size.
+	 * mtd->size represents the actual device size.
+	 */
 	this->chipsize = (16 << density) << 20;
-	/* Set density mask. it is used for DDP */
-	if (ONENAND_IS_DDP(this))
-		this->density_mask = (1 << (density + 6));
-	else
-		this->density_mask = 0;
 
 	/* OneNAND page size & block size */
 	/* The data buffer size is equal to page size */
 	mtd->writesize = this->read_word(this->base + ONENAND_REG_DATA_BUFFER_SIZE);
+	/* We use the full BufferRAM */
+	if (ONENAND_IS_MLC(this))
+		mtd->writesize <<= 1;
+
 	mtd->oobsize = mtd->writesize >> 5;
 	/* Pages per a block are always 64 in OneNAND */
 	mtd->erasesize = mtd->writesize << 6;
+	/*
+	 * Flex-OneNAND SLC area has 64 pages per block.
+	 * Flex-OneNAND MLC area has 128 pages per block.
+	 * Expose MLC erase size to find erase_shift and page_mask.
+	 */
+	if (FLEXONENAND(this))
+		mtd->erasesize <<= 1;
 
 	this->erase_shift = ffs(mtd->erasesize) - 1;
 	this->page_shift = ffs(mtd->writesize) - 1;
 	this->page_mask = (1 << (this->erase_shift - this->page_shift)) - 1;
+	/* Set density mask. it is used for DDP */
+	if (ONENAND_IS_DDP(this))
+		this->density_mask = this->chipsize >> (this->erase_shift + 1);
 	/* It's real page size */
 	this->writesize = mtd->writesize;
 
 	/* REVIST: Multichip handling */
 
-	mtd->size = this->chipsize;
+	if (FLEXONENAND(this))
+		flexonenand_get_size(mtd);
+	else
+		mtd->size = this->chipsize;
 
 	/* Check OneNAND features */
 	onenand_check_features(mtd);
@@ -2735,7 +3430,7 @@ static void onenand_resume(struct mtd_info *mtd)
  */
 int onenand_scan(struct mtd_info *mtd, int maxchips)
 {
-	int i;
+	int i, ret;
 	struct onenand_chip *this = mtd->priv;
 
 	if (!this->read_word)
@@ -2797,6 +3492,10 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 	 * Allow subpage writes up to oobsize.
 	 */
 	switch (mtd->oobsize) {
+	case 128:
+		this->ecclayout = &onenand_oob_128;
+		mtd->subpage_sft = 0;
+		break;
 	case 64:
 		this->ecclayout = &onenand_oob_64;
 		mtd->subpage_sft = 2;
@@ -2862,7 +3561,16 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 	/* Unlock whole block */
 	onenand_unlock_all(mtd);
 
-	return this->scan_bbt(mtd);
+	ret = this->scan_bbt(mtd);
+	if ((!FLEXONENAND(this)) || ret)
+		return ret;
+
+	/* Change Flex-OneNAND boundaries if required */
+	for (i = 0; i < MAX_DIES; i++)
+		flexonenand_set_boundary(mtd, i, flex_bdry[2 * i],
+						 flex_bdry[(2 * i) + 1]);
+
+	return 0;
 }
 
 /**
@@ -2891,6 +3599,7 @@ void onenand_release(struct mtd_info *mtd)
 		kfree(this->page_buf);
 	if (this->options & ONENAND_OOBBUF_ALLOC)
 		kfree(this->oob_buf);
+	kfree(mtd->eraseregions);
 }
 
 EXPORT_SYMBOL_GPL(onenand_scan);
diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index 2f53b51c680..a91fcac1af0 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c
@@ -63,6 +63,7 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
 	loff_t from;
 	size_t readlen, ooblen;
 	struct mtd_oob_ops ops;
+	int rgn;
 
 	printk(KERN_INFO "Scanning device for bad blocks\n");
 
@@ -76,7 +77,7 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
 	/* Note that numblocks is 2 * (real numblocks) here;
 	 * see i += 2 below as it makses shifting and masking less painful
 	 */
-	numblocks = mtd->size >> (bbm->bbt_erase_shift - 1);
+	numblocks = this->chipsize >> (bbm->bbt_erase_shift - 1);
 	startblock = 0;
 	from = 0;
 
@@ -106,7 +107,12 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
 			}
 		}
 		i += 2;
-		from += (1 << bbm->bbt_erase_shift);
+
+		if (FLEXONENAND(this)) {
+			rgn = flexonenand_region(mtd, from);
+			from += mtd->eraseregions[rgn].erasesize;
+		} else
+			from += (1 << bbm->bbt_erase_shift);
 	}
 
 	return 0;
@@ -143,7 +149,7 @@ static int onenand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt)
 	uint8_t res;
 
 	/* Get block number * 2 */
-	block = (int) (offs >> (bbm->bbt_erase_shift - 1));
+	block = (int) (onenand_block(this, offs) << 1);
 	res = (bbm->bbt[block >> 3] >> (block & 0x06)) & 0x03;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "onenand_isbad_bbt: bbt info for offs 0x%08x: (block %d) 0x%02x\n",
@@ -178,7 +184,7 @@ int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
 	struct bbm_info *bbm = this->bbm;
 	int len, ret = 0;
 
-	len = mtd->size >> (this->erase_shift + 2);
+	len = this->chipsize >> (this->erase_shift + 2);
 	/* Allocate memory (2bit per block) and clear the memory bad block table */
 	bbm->bbt = kzalloc(len, GFP_KERNEL);
 	if (!bbm->bbt) {
diff --git a/drivers/mtd/onenand/onenand_sim.c b/drivers/mtd/onenand/onenand_sim.c
index d64200b7c94..f6e3c8aebd3 100644
--- a/drivers/mtd/onenand/onenand_sim.c
+++ b/drivers/mtd/onenand/onenand_sim.c
@@ -6,6 +6,10 @@
  *  Copyright © 2005-2007 Samsung Electronics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *
+ *  Vishak G <vishak.g at samsung.com>, Rohit Hagargundgi <h.rohit at samsung.com>
+ *  Flex-OneNAND simulator support
+ *  Copyright (C) Samsung Electronics, 2008
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -24,16 +28,38 @@
 #ifndef CONFIG_ONENAND_SIM_MANUFACTURER
 #define CONFIG_ONENAND_SIM_MANUFACTURER         0xec
 #endif
+
 #ifndef CONFIG_ONENAND_SIM_DEVICE_ID
 #define CONFIG_ONENAND_SIM_DEVICE_ID            0x04
 #endif
+
+#define CONFIG_FLEXONENAND ((CONFIG_ONENAND_SIM_DEVICE_ID >> 9) & 1)
+
 #ifndef CONFIG_ONENAND_SIM_VERSION_ID
 #define CONFIG_ONENAND_SIM_VERSION_ID           0x1e
 #endif
 
+#ifndef CONFIG_ONENAND_SIM_TECHNOLOGY_ID
+#define CONFIG_ONENAND_SIM_TECHNOLOGY_ID CONFIG_FLEXONENAND
+#endif
+
+/* Initial boundary values for Flex-OneNAND Simulator */
+#ifndef CONFIG_FLEXONENAND_SIM_DIE0_BOUNDARY
+#define CONFIG_FLEXONENAND_SIM_DIE0_BOUNDARY	0x01
+#endif
+
+#ifndef CONFIG_FLEXONENAND_SIM_DIE1_BOUNDARY
+#define CONFIG_FLEXONENAND_SIM_DIE1_BOUNDARY	0x01
+#endif
+
 static int manuf_id	= CONFIG_ONENAND_SIM_MANUFACTURER;
 static int device_id	= CONFIG_ONENAND_SIM_DEVICE_ID;
 static int version_id	= CONFIG_ONENAND_SIM_VERSION_ID;
+static int technology_id = CONFIG_ONENAND_SIM_TECHNOLOGY_ID;
+static int boundary[] = {
+	CONFIG_FLEXONENAND_SIM_DIE0_BOUNDARY,
+	CONFIG_FLEXONENAND_SIM_DIE1_BOUNDARY,
+};
 
 struct onenand_flash {
 	void __iomem *base;
@@ -57,12 +83,18 @@ struct onenand_flash {
 	(writew(v, this->base + ONENAND_REG_WP_STATUS))
 
 /* It has all 0xff chars */
-#define MAX_ONENAND_PAGESIZE		(2048 + 64)
+#define MAX_ONENAND_PAGESIZE		(4096 + 128)
 static unsigned char *ffchars;
 
+#if CONFIG_FLEXONENAND
+#define PARTITION_NAME "Flex-OneNAND simulator partition"
+#else
+#define PARTITION_NAME "OneNAND simulator partition"
+#endif
+
 static struct mtd_partition os_partitions[] = {
 	{
-		.name		= "OneNAND simulator partition",
+		.name		= PARTITION_NAME,
 		.offset		= 0,
 		.size		= MTDPART_SIZ_FULL,
 	},
@@ -104,6 +136,7 @@ static void onenand_lock_handle(struct onenand_chip *this, int cmd)
 
 	switch (cmd) {
 	case ONENAND_CMD_UNLOCK:
+	case ONENAND_CMD_UNLOCK_ALL:
 		if (block_lock_scheme)
 			ONENAND_SET_WP_STATUS(ONENAND_WP_US, this);
 		else
@@ -228,10 +261,12 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd,
 {
 	struct mtd_info *mtd = &info->mtd;
 	struct onenand_flash *flash = this->priv;
-	int main_offset, spare_offset;
+	int main_offset, spare_offset, die = 0;
 	void __iomem *src;
 	void __iomem *dest;
 	unsigned int i;
+	static int pi_operation;
+	int erasesize, rgn;
 
 	if (dataram) {
 		main_offset = mtd->writesize;
@@ -241,10 +276,27 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd,
 		spare_offset = 0;
 	}
 
+	if (pi_operation) {
+		die = readw(this->base + ONENAND_REG_START_ADDRESS2);
+		die >>= ONENAND_DDP_SHIFT;
+	}
+
 	switch (cmd) {
+	case FLEXONENAND_CMD_PI_ACCESS:
+		pi_operation = 1;
+		break;
+
+	case ONENAND_CMD_RESET:
+		pi_operation = 0;
+		break;
+
 	case ONENAND_CMD_READ:
 		src = ONENAND_CORE(flash) + offset;
 		dest = ONENAND_MAIN_AREA(this, main_offset);
+		if (pi_operation) {
+			writew(boundary[die], this->base + ONENAND_DATARAM);
+			break;
+		}
 		memcpy(dest, src, mtd->writesize);
 		/* Fall through */
 
@@ -257,6 +309,10 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd,
 	case ONENAND_CMD_PROG:
 		src = ONENAND_MAIN_AREA(this, main_offset);
 		dest = ONENAND_CORE(flash) + offset;
+		if (pi_operation) {
+			boundary[die] = readw(this->base + ONENAND_DATARAM);
+			break;
+		}
 		/* To handle partial write */
 		for (i = 0; i < (1 << mtd->subpage_sft); i++) {
 			int off = i * this->subpagesize;
@@ -284,9 +340,18 @@ static void onenand_data_handle(struct onenand_chip *this, int cmd,
 		break;
 
 	case ONENAND_CMD_ERASE:
-		memset(ONENAND_CORE(flash) + offset, 0xff, mtd->erasesize);
+		if (pi_operation)
+			break;
+
+		if (FLEXONENAND(this)) {
+			rgn = flexonenand_region(mtd, offset);
+			erasesize = mtd->eraseregions[rgn].erasesize;
+		} else
+			erasesize = mtd->erasesize;
+
+		memset(ONENAND_CORE(flash) + offset, 0xff, erasesize);
 		memset(ONENAND_CORE_SPARE(flash, this, offset), 0xff,
-		       (mtd->erasesize >> 5));
+		       (erasesize >> 5));
 		break;
 
 	default:
@@ -339,7 +404,7 @@ static void onenand_command_handle(struct onenand_chip *this, int cmd)
 	}
 
 	if (block != -1)
-		offset += block << this->erase_shift;
+		offset = onenand_addr(this, block);
 
 	if (page != -1)
 		offset += page << this->page_shift;
@@ -390,6 +455,7 @@ static int __init flash_init(struct onenand_flash *flash)
 	}
 
 	density = device_id >> ONENAND_DEVICE_DENSITY_SHIFT;
+	density &= ONENAND_DEVICE_DENSITY_MASK;
 	size = ((16 << 20) << density);
 
 	ONENAND_CORE(flash) = vmalloc(size + (size >> 5));
@@ -405,8 +471,9 @@ static int __init flash_init(struct onenand_flash *flash)
 	writew(manuf_id, flash->base + ONENAND_REG_MANUFACTURER_ID);
 	writew(device_id, flash->base + ONENAND_REG_DEVICE_ID);
 	writew(version_id, flash->base + ONENAND_REG_VERSION_ID);
+	writew(technology_id, flash->base + ONENAND_REG_TECHNOLOGY);
 
-	if (density < 2)
+	if (density < 2 && (!CONFIG_FLEXONENAND))
 		buffer_size = 0x0400;	/* 1KiB page */
 	else
 		buffer_size = 0x0800;	/* 2KiB page */
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 0fa3ac4ad57..9aab82c1c74 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -17,6 +17,7 @@
 #include <linux/mtd/onenand_regs.h>
 #include <linux/mtd/bbm.h>
 
+#define MAX_DIES		2
 #define MAX_BUFFERRAM		2
 
 /* Scan and identify a OneNAND device */
@@ -51,7 +52,12 @@ struct onenand_bufferram {
 /**
  * struct onenand_chip - OneNAND Private Flash Chip Data
  * @base:		[BOARDSPECIFIC] address to access OneNAND
+ * @dies:		[INTERN][FLEX-ONENAND] number of dies on chip
+ * @boundary:		[INTERN][FLEX-ONENAND] Boundary of the dies
+ * @diesize:		[INTERN][FLEX-ONENAND] Size of the dies
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
+ *			FIXME For Flex-OneNAND, chipsize holds maximum possible
+ *			device size ie when all blocks are considered MLC
  * @device_id:		[INTERN] device ID
  * @density_mask:	chip density, used for DDP devices
  * @verstion_id:	[INTERN] version ID
@@ -92,9 +98,13 @@ struct onenand_bufferram {
  */
 struct onenand_chip {
 	void __iomem		*base;
+	unsigned		dies;
+	unsigned		boundary[MAX_DIES];
+	loff_t			diesize[MAX_DIES];
 	unsigned int		chipsize;
 	unsigned int		device_id;
 	unsigned int		version_id;
+	unsigned int		technology;
 	unsigned int		density_mask;
 	unsigned int		options;
 
@@ -145,6 +155,8 @@ struct onenand_chip {
 #define ONENAND_SET_BUFFERRAM0(this)		(this->bufferram_index = 0)
 #define ONENAND_SET_BUFFERRAM1(this)		(this->bufferram_index = 1)
 
+#define FLEXONENAND(this)						\
+	(this->device_id & DEVICE_IS_FLEXONENAND)
 #define ONENAND_GET_SYS_CFG1(this)					\
 	(this->read_word(this->base + ONENAND_REG_SYS_CFG1))
 #define ONENAND_SET_SYS_CFG1(v, this)					\
@@ -153,6 +165,9 @@ struct onenand_chip {
 #define ONENAND_IS_DDP(this)						\
 	(this->device_id & ONENAND_DEVICE_IS_DDP)
 
+#define ONENAND_IS_MLC(this)						\
+	(this->technology & ONENAND_TECHNOLOGY_IS_MLC)
+
 #ifdef CONFIG_MTD_ONENAND_2X_PROGRAM
 #define ONENAND_IS_2PLANE(this)						\
 	(this->options & ONENAND_HAS_2PLANE)
@@ -190,5 +205,8 @@ struct onenand_manufacturers {
 
 int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops);
+unsigned onenand_block(struct onenand_chip *this, loff_t addr);
+loff_t onenand_addr(struct onenand_chip *this, int block);
+int flexonenand_region(struct mtd_info *mtd, loff_t addr);
 
 #endif	/* __LINUX_MTD_ONENAND_H */
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index 0c6bbe28f38..86a6bbef646 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -67,6 +67,9 @@
 /*
  * Device ID Register F001h (R)
  */
+#define DEVICE_IS_FLEXONENAND		(1 << 9)
+#define FLEXONENAND_PI_MASK		(0x3ff)
+#define FLEXONENAND_PI_UNLOCK_SHIFT	(14)
 #define ONENAND_DEVICE_DENSITY_MASK	(0xf)
 #define ONENAND_DEVICE_DENSITY_SHIFT	(4)
 #define ONENAND_DEVICE_IS_DDP		(1 << 3)
@@ -83,6 +86,11 @@
  */
 #define ONENAND_VERSION_PROCESS_SHIFT	(8)
 
+/*
+ * Technology Register F006h (R)
+ */
+#define ONENAND_TECHNOLOGY_IS_MLC	(1 << 0)
+
 /*
  * Start Address 1 F100h (R/W) & Start Address 2 F101h (R/W)
  */
@@ -93,7 +101,8 @@
 /*
  * Start Address 8 F107h (R/W)
  */
-#define ONENAND_FPA_MASK		(0x3f)
+/* Note: It's actually 0x3f in case of SLC */
+#define ONENAND_FPA_MASK		(0x7f)
 #define ONENAND_FPA_SHIFT		(2)
 #define ONENAND_FSA_MASK		(0x03)
 
@@ -105,7 +114,8 @@
 #define ONENAND_BSA_BOOTRAM		(0 << 2)
 #define ONENAND_BSA_DATARAM0		(2 << 2)
 #define ONENAND_BSA_DATARAM1		(3 << 2)
-#define ONENAND_BSC_MASK		(0x03)
+/* Note: It's actually 0x03 in case of SLC */
+#define ONENAND_BSC_MASK		(0x07)
 
 /*
  * Command Register F220h (R/W)
@@ -124,9 +134,13 @@
 #define ONENAND_CMD_RESET		(0xF0)
 #define ONENAND_CMD_OTP_ACCESS		(0x65)
 #define ONENAND_CMD_READID		(0x90)
+#define FLEXONENAND_CMD_PI_UPDATE	(0x05)
+#define FLEXONENAND_CMD_PI_ACCESS	(0x66)
+#define FLEXONENAND_CMD_RECOVER_LSB	(0x05)
 
 /* NOTE: Those are not *REAL* commands */
 #define ONENAND_CMD_BUFFERRAM		(0x1978)
+#define FLEXONENAND_CMD_READ_PI		(0x1985)
 
 /*
  * System Configuration 1 Register F221h (R, R/W)
@@ -192,10 +206,12 @@
 #define ONENAND_ECC_1BIT_ALL		(0x5555)
 #define ONENAND_ECC_2BIT		(1 << 1)
 #define ONENAND_ECC_2BIT_ALL		(0xAAAA)
+#define FLEXONENAND_UNCORRECTABLE_ERROR	(0x1010)
 
 /*
  * One-Time Programmable (OTP)
  */
+#define FLEXONENAND_OTP_LOCK_OFFSET		(2048)
 #define ONENAND_OTP_LOCK_OFFSET		(14)
 
 #endif	/* __ONENAND_REG_H */
-- 
cgit v1.2.3-70-g09d2


From 31bb999ee73748068ddc271dd99b22dcc418efe3 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kmpark@infradead.org>
Date: Tue, 12 May 2009 13:46:57 -0700
Subject: mtd: onenand: add bbt_wait & unlock_all as replaceable for some
 platform

Add bbt_wait & unlock_all as replaceable for some platform such as
  s3c64xx s3c64xx has its own OneNAND controller and another interface

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 12 ++++++++++--
 include/linux/mtd/onenand.h        |  5 +++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 8d4c9c25373..864327ed7fb 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1506,7 +1506,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
 
 		onenand_update_bufferram(mtd, from, 0);
 
-		ret = onenand_bbt_wait(mtd, FL_READING);
+		ret = this->bbt_wait(mtd, FL_READING);
 		if (unlikely(ret))
 			ret = onenand_recover_lsb(mtd, from, ret);
 
@@ -2527,6 +2527,10 @@ static void onenand_unlock_all(struct mtd_info *mtd)
 		    & ONENAND_CTRL_ONGO)
 			continue;
 
+		/* Don't check lock status */
+		if (this->options & ONENAND_SKIP_UNLOCK_CHECK)
+			return;
+
 		/* Check lock status */
 		if (onenand_check_lock_status(this))
 			return;
@@ -3442,6 +3446,10 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 		this->command = onenand_command;
 	if (!this->wait)
 		onenand_setup_wait(mtd);
+	if (!this->bbt_wait)
+		this->bbt_wait = onenand_bbt_wait;
+	if (!this->unlock_all)
+		this->unlock_all = onenand_unlock_all;
 
 	if (!this->read_bufferram)
 		this->read_bufferram = onenand_read_bufferram;
@@ -3559,7 +3567,7 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 	mtd->owner = THIS_MODULE;
 
 	/* Unlock whole block */
-	onenand_unlock_all(mtd);
+	this->unlock_all(mtd);
 
 	ret = this->scan_bbt(mtd);
 	if ((!FLEXONENAND(this)) || ret)
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 9aab82c1c74..8ed87337438 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -74,6 +74,8 @@ struct onenand_bufferram {
  * @command:		[REPLACEABLE] hardware specific function for writing
  *			commands to the chip
  * @wait:		[REPLACEABLE] hardware specific function for wait on ready
+ * @bbt_wait:		[REPLACEABLE] hardware specific function for bbt wait on ready
+ * @unlock_all:		[REPLACEABLE] hardware specific function for unlock all
  * @read_bufferram:	[REPLACEABLE] hardware specific function for BufferRAM Area
  * @write_bufferram:	[REPLACEABLE] hardware specific function for BufferRAM Area
  * @read_word:		[REPLACEABLE] hardware specific function for read
@@ -118,6 +120,8 @@ struct onenand_chip {
 
 	int (*command)(struct mtd_info *mtd, int cmd, loff_t address, size_t len);
 	int (*wait)(struct mtd_info *mtd, int state);
+	int (*bbt_wait)(struct mtd_info *mtd, int state);
+	void (*unlock_all)(struct mtd_info *mtd);
 	int (*read_bufferram)(struct mtd_info *mtd, int area,
 			unsigned char *buffer, int offset, size_t count);
 	int (*write_bufferram)(struct mtd_info *mtd, int area,
@@ -184,6 +188,7 @@ struct onenand_chip {
 #define ONENAND_HAS_CONT_LOCK		(0x0001)
 #define ONENAND_HAS_UNLOCK_ALL		(0x0002)
 #define ONENAND_HAS_2PLANE		(0x0004)
+#define ONENAND_SKIP_UNLOCK_CHECK	(0x0100)
 #define ONENAND_PAGEBUF_ALLOC		(0x1000)
 #define ONENAND_OOBBUF_ALLOC		(0x2000)
 
-- 
cgit v1.2.3-70-g09d2


From a21ca2cac582886a3e95c8bb84ff7c52d4d15e54 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 6 Jun 2009 09:58:57 +0200
Subject: perf_counter: Separate out attr->type from attr->config

Counter type is a frequently used value and we do a lot of
bit juggling by encoding and decoding it from attr->config.

Clean this up by creating a separate attr->type field.

Also clean up the various similarly complex user-space bits
all around counter attribute management.

The net improvement is significant, and it will be easier
to add a new major type (which is what triggered this cleanup).

(This changes the ABI, all tools are adapted.)
(PowerPC build-tested.)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/perf_counter/builtin-record.c    | 105 ++++++++++------------
 Documentation/perf_counter/builtin-stat.c      |  76 +++++++---------
 Documentation/perf_counter/builtin-top.c       |  67 +++++---------
 Documentation/perf_counter/perf.h              |   2 -
 Documentation/perf_counter/util/parse-events.c | 120 ++++++++++++++-----------
 Documentation/perf_counter/util/parse-events.h |   7 +-
 arch/powerpc/kernel/perf_counter.c             |   6 +-
 arch/x86/kernel/cpu/perf_counter.c             |   8 +-
 include/linux/perf_counter.h                   |  65 +++-----------
 kernel/perf_counter.c                          |  14 ++-
 10 files changed, 196 insertions(+), 274 deletions(-)

(limited to 'include')

diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c
index c22ea0c7472..130fd88266b 100644
--- a/Documentation/perf_counter/builtin-record.c
+++ b/Documentation/perf_counter/builtin-record.c
@@ -20,10 +20,10 @@
 #define ALIGN(x, a)		__ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
-static long			default_interval = 100000;
-static long			event_count[MAX_COUNTERS];
-
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+
+static long			default_interval		= 100000;
+
 static int			nr_cpus				= 0;
 static unsigned int		page_size;
 static unsigned int		mmap_pages			= 128;
@@ -38,22 +38,44 @@ static int			inherit				= 1;
 static int			force				= 0;
 static int			append_file			= 0;
 
-const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
+static long			samples;
+static struct timeval		last_read;
+static struct timeval		this_read;
+
+static __u64			bytes_written;
+
+static struct pollfd		event_array[MAX_NR_CPUS * MAX_COUNTERS];
+
+static int			nr_poll;
+static int			nr_cpu;
+
+struct mmap_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	__u64				start;
+	__u64				len;
+	__u64				pgoff;
+	char				filename[PATH_MAX];
+};
+
+struct comm_event {
+	struct perf_event_header	header;
+	__u32				pid;
+	__u32				tid;
+	char				comm[16];
 };
 
+
 struct mmap_data {
-	int counter;
-	void *base;
-	unsigned int mask;
-	unsigned int prev;
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
 };
 
+static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
+
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
 	struct perf_counter_mmap_page *pc = md->base;
@@ -65,11 +87,6 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 	return head;
 }
 
-static long samples;
-static struct timeval last_read, this_read;
-
-static __u64 bytes_written;
-
 static void mmap_read(struct mmap_data *md)
 {
 	unsigned int head = mmap_read_head(md);
@@ -157,29 +174,6 @@ static void sig_handler(int sig)
 	done = 1;
 }
 
-static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
-static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int nr_poll;
-static int nr_cpu;
-
-struct mmap_event {
-	struct perf_event_header	header;
-	__u32				pid;
-	__u32				tid;
-	__u64				start;
-	__u64				len;
-	__u64				pgoff;
-	char				filename[PATH_MAX];
-};
-
-struct comm_event {
-	struct perf_event_header	header;
-	__u32				pid;
-	__u32				tid;
-	char				comm[16];
-};
-
 static void pid_synthesize_comm_event(pid_t pid, int full)
 {
 	struct comm_event comm_ev;
@@ -341,24 +335,21 @@ static int group_fd;
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
-	struct perf_counter_attr attr;
+	struct perf_counter_attr *attr = attrs + counter;
 	int track = 1;
 
-	memset(&attr, 0, sizeof(attr));
-	attr.config		= event_id[counter];
-	attr.sample_period	= event_count[counter];
-	attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
+	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD;
 	if (freq) {
-		attr.freq		= 1;
-		attr.sample_freq	= freq;
+		attr->freq		= 1;
+		attr->sample_freq	= freq;
 	}
-	attr.mmap		= track;
-	attr.comm		= track;
-	attr.inherit		= (cpu < 0) && inherit;
+	attr->mmap		= track;
+	attr->comm		= track;
+	attr->inherit		= (cpu < 0) && inherit;
 
 	track = 0; /* only the first counter needs these */
 
-	fd[nr_cpu][counter] = sys_perf_counter_open(&attr, pid, cpu, group_fd, 0);
+	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
 
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
@@ -542,16 +533,14 @@ int cmd_record(int argc, const char **argv, const char *prefix)
 	if (!argc && target_pid == -1 && !system_wide)
 		usage_with_options(record_usage, options);
 
-	if (!nr_counters) {
+	if (!nr_counters)
 		nr_counters = 1;
-		event_id[0] = 0;
-	}
 
 	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
+		if (attrs[counter].sample_period)
 			continue;
 
-		event_count[counter] = default_interval;
+		attrs[counter].sample_period = default_interval;
 	}
 
 	return __cmd_record(argc, argv);
diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c
index 4fc0d80440e..9711e552423 100644
--- a/Documentation/perf_counter/builtin-stat.c
+++ b/Documentation/perf_counter/builtin-stat.c
@@ -44,23 +44,22 @@
 
 #include <sys/prctl.h>
 
-static int			system_wide			=  0;
-static int			inherit				=  1;
+static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
 
-static __u64			default_event_id[MAX_COUNTERS]	= {
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK		},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS	},
 
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES	},
 };
 
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
+static int			system_wide			=  0;
+static int			inherit				=  1;
+
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int			target_pid			= -1;
@@ -86,22 +85,16 @@ static __u64			walltime_nsecs;
 
 static void create_perfstat_counter(int counter)
 {
-	struct perf_counter_attr attr;
-
-	memset(&attr, 0, sizeof(attr));
-	attr.config		= event_id[counter];
-	attr.sample_type	= 0;
-	attr.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
-	attr.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
+	struct perf_counter_attr *attr = attrs + counter;
 
 	if (scale)
-		attr.read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
-					  PERF_FORMAT_TOTAL_TIME_RUNNING;
+		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
 	if (system_wide) {
 		int cpu;
 		for (cpu = 0; cpu < nr_cpus; cpu ++) {
-			fd[cpu][counter] = sys_perf_counter_open(&attr, -1, cpu, -1, 0);
+			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
 			if (fd[cpu][counter] < 0) {
 				printf("perfstat error: syscall returned with %d (%s)\n",
 						fd[cpu][counter], strerror(errno));
@@ -109,10 +102,10 @@ static void create_perfstat_counter(int counter)
 			}
 		}
 	} else {
-		attr.inherit	= inherit;
-		attr.disabled	= 1;
+		attr->inherit	= inherit;
+		attr->disabled	= 1;
 
-		fd[0][counter] = sys_perf_counter_open(&attr, 0, -1, -1, 0);
+		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
 		if (fd[0][counter] < 0) {
 			printf("perfstat error: syscall returned with %d (%s)\n",
 					fd[0][counter], strerror(errno));
@@ -126,9 +119,13 @@ static void create_perfstat_counter(int counter)
  */
 static inline int nsec_counter(int counter)
 {
-	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK))
+	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
+		return 0;
+
+	if (attrs[counter].config == PERF_COUNT_CPU_CLOCK)
 		return 1;
-	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
+
+	if (attrs[counter].config == PERF_COUNT_TASK_CLOCK)
 		return 1;
 
 	return 0;
@@ -177,7 +174,8 @@ static void read_counter(int counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK))
+	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
 		runtime_nsecs = count[0];
 }
 
@@ -203,8 +201,8 @@ static void print_counter(int counter)
 
 		fprintf(stderr, " %14.6f  %-20s",
 			msecs, event_name(counter));
-		if (event_id[counter] ==
-				EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
+		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
 
 			fprintf(stderr, " # %11.3f CPU utilization factor",
 				(double)count[0] / (double)walltime_nsecs);
@@ -300,8 +298,6 @@ static char events_help_msg[EVENTS_HELP_MAX];
 static const struct option options[] = {
 	OPT_CALLBACK('e', "event", NULL, "event",
 		     events_help_msg, parse_events),
-	OPT_INTEGER('c', "count", &default_interval,
-		    "event period to sample"),
 	OPT_BOOLEAN('i', "inherit", &inherit,
 		    "child tasks inherit counters"),
 	OPT_INTEGER('p', "pid", &target_pid,
@@ -315,27 +311,19 @@ static const struct option options[] = {
 
 int cmd_stat(int argc, const char **argv, const char *prefix)
 {
-	int counter;
-
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	create_events_help(events_help_msg);
-	memcpy(event_id, default_event_id, sizeof(default_event_id));
+
+	memcpy(attrs, default_attrs, sizeof(attrs));
 
 	argc = parse_options(argc, argv, options, stat_usage, 0);
 	if (!argc)
 		usage_with_options(stat_usage, options);
 
-	if (!nr_counters) {
+	if (!nr_counters)
 		nr_counters = 8;
-	}
-
-	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
-			continue;
 
-		event_count[counter] = default_interval;
-	}
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
 	assert(nr_cpus >= 0);
diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index b2f480b5a13..98a6d53e17b 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -48,22 +48,11 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-static int			system_wide			=  0;
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static __u64			default_event_id[MAX_COUNTERS]		= {
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
-	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
+static int			system_wide			=  0;
 
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
-	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
-};
-static int			default_interval = 100000;
-static int			event_count[MAX_COUNTERS];
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
+static int			default_interval		= 100000;
 
 static __u64			count_filter			=  5;
 static int			print_entries			= 15;
@@ -85,15 +74,6 @@ static int			delay_secs			=  2;
 static int			zero;
 static int			dump_symtab;
 
-static const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
-
 /*
  * Symbols
  */
@@ -112,7 +92,7 @@ struct sym_entry {
 
 struct sym_entry		*sym_filter_entry;
 
-struct dso *kernel_dso;
+struct dso			*kernel_dso;
 
 /*
  * Symbols will be added here in record_ip and will get out
@@ -213,7 +193,7 @@ static void print_sym_table(void)
 		100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
 
 	if (nr_counters == 1) {
-		printf("%d", event_count[0]);
+		printf("%Ld", attrs[0].sample_period);
 		if (freq)
 			printf("Hz ");
 		else
@@ -421,10 +401,10 @@ static void process_event(uint64_t ip, int counter)
 }
 
 struct mmap_data {
-	int counter;
-	void *base;
-	unsigned int mask;
-	unsigned int prev;
+	int			counter;
+	void			*base;
+	unsigned int		mask;
+	unsigned int		prev;
 };
 
 static unsigned int mmap_read_head(struct mmap_data *md)
@@ -539,7 +519,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static int __cmd_top(void)
 {
-	struct perf_counter_attr attr;
+	struct perf_counter_attr *attr;
 	pthread_t thread;
 	int i, counter, group_fd, nr_poll = 0;
 	unsigned int cpu;
@@ -553,13 +533,12 @@ static int __cmd_top(void)
 			if (target_pid == -1 && profile_cpu == -1)
 				cpu = i;
 
-			memset(&attr, 0, sizeof(attr));
-			attr.config		= event_id[counter];
-			attr.sample_period	= event_count[counter];
-			attr.sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-			attr.freq		= freq;
+			attr = attrs + counter;
 
-			fd[i][counter] = sys_perf_counter_open(&attr, target_pid, cpu, group_fd, 0);
+			attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+			attr->freq		= freq;
+
+			fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {
 				int err = errno;
 
@@ -670,7 +649,6 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	create_events_help(events_help_msg);
-	memcpy(event_id, default_event_id, sizeof(default_event_id));
 
 	argc = parse_options(argc, argv, options, top_usage, 0);
 	if (argc)
@@ -688,19 +666,22 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 		profile_cpu = -1;
 	}
 
-	if (!nr_counters) {
+	if (!nr_counters)
 		nr_counters = 1;
-		event_id[0] = 0;
-	}
 
 	if (delay_secs < 1)
 		delay_secs = 1;
 
+	parse_symbols();
+
+	/*
+	 * Fill in the ones not specifically initialized via -c:
+	 */
 	for (counter = 0; counter < nr_counters; counter++) {
-		if (event_count[counter])
+		if (attrs[counter].sample_period)
 			continue;
 
-		event_count[counter] = default_interval;
+		attrs[counter].sample_period = default_interval;
 	}
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
@@ -710,7 +691,5 @@ int cmd_top(int argc, const char **argv, const char *prefix)
 	if (target_pid != -1 || profile_cpu != -1)
 		nr_cpus = 1;
 
-	parse_symbols();
-
 	return __cmd_top();
 }
diff --git a/Documentation/perf_counter/perf.h b/Documentation/perf_counter/perf.h
index 10622a48b40..af0a5046d74 100644
--- a/Documentation/perf_counter/perf.h
+++ b/Documentation/perf_counter/perf.h
@@ -64,6 +64,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
 #define MAX_COUNTERS			256
 #define MAX_NR_CPUS			256
 
-#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
-
 #endif
diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index 2fdfd1d923f..eb56bd99657 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -6,37 +6,39 @@
 #include "exec_cmd.h"
 #include "string.h"
 
-int nr_counters;
+int					nr_counters;
 
-__u64			event_id[MAX_COUNTERS]		= { };
-int			event_mask[MAX_COUNTERS];
+struct perf_counter_attr		attrs[MAX_COUNTERS];
 
 struct event_symbol {
-	__u64 event;
-	char *symbol;
+	__u8	type;
+	__u64	config;
+	char	*symbol;
 };
 
+#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
+
 static struct event_symbol event_symbols[] = {
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
-	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
-
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
-	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
+  { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
+  { C(HARDWARE, CPU_CYCLES),		"cycles",		},
+  { C(HARDWARE, INSTRUCTIONS),		"instructions",		},
+  { C(HARDWARE, CACHE_REFERENCES),	"cache-references",	},
+  { C(HARDWARE, CACHE_MISSES),		"cache-misses",		},
+  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branches",		},
+  { C(HARDWARE, BRANCH_MISSES),		"branch-misses",	},
+  { C(HARDWARE, BUS_CYCLES),		"bus-cycles",		},
+
+  { C(SOFTWARE, CPU_CLOCK),		"cpu-clock",		},
+  { C(SOFTWARE, TASK_CLOCK),		"task-clock",		},
+  { C(SOFTWARE, PAGE_FAULTS),		"page-faults",		},
+  { C(SOFTWARE, PAGE_FAULTS),		"faults",		},
+  { C(SOFTWARE, PAGE_FAULTS_MIN),	"minor-faults",		},
+  { C(SOFTWARE, PAGE_FAULTS_MAJ),	"major-faults",		},
+  { C(SOFTWARE, CONTEXT_SWITCHES),	"context-switches",	},
+  { C(SOFTWARE, CONTEXT_SWITCHES),	"cs",			},
+  { C(SOFTWARE, CPU_MIGRATIONS),	"cpu-migrations",	},
+  { C(SOFTWARE, CPU_MIGRATIONS),	"migrations",		},
 };
 
 #define __PERF_COUNTER_FIELD(config, name) \
@@ -67,27 +69,26 @@ static char *sw_event_names[] = {
 	"major faults",
 };
 
-char *event_name(int ctr)
+char *event_name(int counter)
 {
-	__u64 config = event_id[ctr];
-	int type = PERF_COUNTER_TYPE(config);
-	int id = PERF_COUNTER_ID(config);
+	__u64 config = attrs[counter].config;
+	int type = attrs[counter].type;
 	static char buf[32];
 
-	if (PERF_COUNTER_RAW(config)) {
-		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
+	if (attrs[counter].type == PERF_TYPE_RAW) {
+		sprintf(buf, "raw 0x%llx", config);
 		return buf;
 	}
 
 	switch (type) {
 	case PERF_TYPE_HARDWARE:
-		if (id < PERF_HW_EVENTS_MAX)
-			return hw_event_names[id];
+		if (config < PERF_HW_EVENTS_MAX)
+			return hw_event_names[config];
 		return "unknown-hardware";
 
 	case PERF_TYPE_SOFTWARE:
-		if (id < PERF_SW_EVENTS_MAX)
-			return sw_event_names[id];
+		if (config < PERF_SW_EVENTS_MAX)
+			return sw_event_names[config];
 		return "unknown-software";
 
 	default:
@@ -101,15 +102,19 @@ char *event_name(int ctr)
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
  */
-static __u64 match_event_symbols(const char *str)
+static int match_event_symbols(const char *str, struct perf_counter_attr *attr)
 {
 	__u64 config, id;
 	int type;
 	unsigned int i;
 	const char *sep, *pstr;
 
-	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0)
-		return config | PERF_COUNTER_RAW_MASK;
+	if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) {
+		attr->type = PERF_TYPE_RAW;
+		attr->config = config;
+
+		return 0;
+	}
 
 	pstr = str;
 	sep = strchr(pstr, ':');
@@ -121,35 +126,45 @@ static __u64 match_event_symbols(const char *str)
 		if (sep) {
 			pstr = sep + 1;
 			if (strchr(pstr, 'k'))
-				event_mask[nr_counters] |= EVENT_MASK_USER;
+				attr->exclude_user = 1;
 			if (strchr(pstr, 'u'))
-				event_mask[nr_counters] |= EVENT_MASK_KERNEL;
+				attr->exclude_kernel = 1;
 		}
-		return EID(type, id);
+		attr->type = type;
+		attr->config = id;
+
+		return 0;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol)))
-			return event_symbols[i].event;
+			     strlen(event_symbols[i].symbol))) {
+
+			attr->type = event_symbols[i].type;
+			attr->config = event_symbols[i].config;
+
+			return 0;
+		}
 	}
 
-	return ~0ULL;
+	return -EINVAL;
 }
 
 int parse_events(const struct option *opt, const char *str, int unset)
 {
-	__u64 config;
+	struct perf_counter_attr attr;
+	int ret;
 
+	memset(&attr, 0, sizeof(attr));
 again:
 	if (nr_counters == MAX_COUNTERS)
 		return -1;
 
-	config = match_event_symbols(str);
-	if (config == ~0ULL)
-		return -1;
+	ret = match_event_symbols(str, &attr);
+	if (ret < 0)
+		return ret;
 
-	event_id[nr_counters] = config;
+	attrs[nr_counters] = attr;
 	nr_counters++;
 
 	str = strstr(str, ",");
@@ -168,7 +183,6 @@ void create_events_help(char *events_help_msg)
 {
 	unsigned int i;
 	char *str;
-	__u64 e;
 
 	str = events_help_msg;
 
@@ -178,9 +192,8 @@ void create_events_help(char *events_help_msg)
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
 		int type, id;
 
-		e = event_symbols[i].event;
-		type = PERF_COUNTER_TYPE(e);
-		id = PERF_COUNTER_ID(e);
+		type = event_symbols[i].type;
+		id = event_symbols[i].config;
 
 		if (i)
 			str += sprintf(str, "|");
@@ -191,4 +204,3 @@ void create_events_help(char *events_help_msg)
 
 	str += sprintf(str, "|rNNN]");
 }
-
diff --git a/Documentation/perf_counter/util/parse-events.h b/Documentation/perf_counter/util/parse-events.h
index 0da306bb902..542971c495b 100644
--- a/Documentation/perf_counter/util/parse-events.h
+++ b/Documentation/perf_counter/util/parse-events.h
@@ -3,12 +3,9 @@
  * Parse symbolic events/counts passed in as options:
  */
 
-extern int nr_counters;
-extern __u64			event_id[MAX_COUNTERS];
-extern int			event_mask[MAX_COUNTERS];
+extern int			nr_counters;
 
-#define EVENT_MASK_KERNEL	1
-#define EVENT_MASK_USER		2
+extern struct perf_counter_attr attrs[MAX_COUNTERS];
 
 extern char *event_name(int ctr);
 
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 232b00a36f7..4786ad9a288 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -867,13 +867,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if (!perf_event_raw(&counter->attr)) {
-		ev = perf_event_id(&counter->attr);
+	if (counter->attr.type != PERF_TYPE_RAW) {
+		ev = counter->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return ERR_PTR(-EOPNOTSUPP);
 		ev = ppmu->generic_events[ev];
 	} else {
-		ev = perf_event_config(&counter->attr);
+		ev = counter->attr.config;
 	}
 	counter->hw.config_base = ev;
 	counter->hw.idx = 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8f53f3a7da2..430e048f285 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -292,15 +292,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
-	if (perf_event_raw(attr)) {
-		hwc->config |= x86_pmu.raw_event(perf_event_config(attr));
+	if (attr->type == PERF_TYPE_RAW) {
+		hwc->config |= x86_pmu.raw_event(attr->config);
 	} else {
-		if (perf_event_id(attr) >= x86_pmu.max_events)
+		if (attr->config >= x86_pmu.max_events)
 			return -EINVAL;
 		/*
 		 * The generic map:
 		 */
-		hwc->config |= x86_pmu.event_map(perf_event_id(attr));
+		hwc->config |= x86_pmu.event_map(attr->config);
 	}
 
 	counter->destroy = hw_perf_counter_destroy;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 4f9d39ecdc0..f794c69b34c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -73,26 +73,6 @@ enum sw_event_ids {
 	PERF_SW_EVENTS_MAX		= 7,
 };
 
-#define __PERF_COUNTER_MASK(name)			\
-	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
-	 PERF_COUNTER_##name##_SHIFT)
-
-#define PERF_COUNTER_RAW_BITS		1
-#define PERF_COUNTER_RAW_SHIFT		63
-#define PERF_COUNTER_RAW_MASK		__PERF_COUNTER_MASK(RAW)
-
-#define PERF_COUNTER_CONFIG_BITS	63
-#define PERF_COUNTER_CONFIG_SHIFT	0
-#define PERF_COUNTER_CONFIG_MASK	__PERF_COUNTER_MASK(CONFIG)
-
-#define PERF_COUNTER_TYPE_BITS		7
-#define PERF_COUNTER_TYPE_SHIFT		56
-#define PERF_COUNTER_TYPE_MASK		__PERF_COUNTER_MASK(TYPE)
-
-#define PERF_COUNTER_EVENT_BITS		56
-#define PERF_COUNTER_EVENT_SHIFT	0
-#define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
-
 /*
  * Bits that can be set in attr.sample_type to request information
  * in the overflow packets.
@@ -125,10 +105,13 @@ enum perf_counter_read_format {
  */
 struct perf_counter_attr {
 	/*
-	 * The MSB of the config word signifies if the rest contains cpu
-	 * specific (raw) counter configuration data, if unset, the next
-	 * 7 bits are an event type and the rest of the bits are the event
-	 * identifier.
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+	__u32			__reserved_1;
+
+	/*
+	 * Type specific configuration information.
 	 */
 	__u64			config;
 
@@ -152,12 +135,11 @@ struct perf_counter_attr {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 53;
+				__reserved_2   : 53;
 
 	__u32			wakeup_events;	/* wakeup every n events */
-	__u32			__reserved_2;
+	__u32			__reserved_3;
 
-	__u64			__reserved_3;
 	__u64			__reserved_4;
 };
 
@@ -278,8 +260,8 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, ppid;
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
@@ -331,27 +313,6 @@ enum perf_event_type {
 
 struct task_struct;
 
-static inline u64 perf_event_raw(struct perf_counter_attr *attr)
-{
-	return attr->config & PERF_COUNTER_RAW_MASK;
-}
-
-static inline u64 perf_event_config(struct perf_counter_attr *attr)
-{
-	return attr->config & PERF_COUNTER_CONFIG_MASK;
-}
-
-static inline u64 perf_event_type(struct perf_counter_attr *attr)
-{
-	return (attr->config & PERF_COUNTER_TYPE_MASK) >>
-		PERF_COUNTER_TYPE_SHIFT;
-}
-
-static inline u64 perf_event_id(struct perf_counter_attr *attr)
-{
-	return attr->config & PERF_COUNTER_EVENT_MASK;
-}
-
 /**
  * struct hw_perf_counter - performance counter hardware details:
  */
@@ -616,8 +577,8 @@ extern int perf_counter_overflow(struct perf_counter *counter,
  */
 static inline int is_software_counter(struct perf_counter *counter)
 {
-	return !perf_event_raw(&counter->attr) &&
-		perf_event_type(&counter->attr) != PERF_TYPE_HARDWARE;
+	return (counter->attr.type != PERF_TYPE_RAW) &&
+		(counter->attr.type != PERF_TYPE_HARDWARE);
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 47c92fb927f..75ae76796df 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3091,14 +3091,12 @@ static int perf_swcounter_match(struct perf_counter *counter,
 				enum perf_event_types type,
 				u32 event, struct pt_regs *regs)
 {
-	u64 event_config;
-
-	event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event;
-
 	if (!perf_swcounter_is_counting(counter))
 		return 0;
 
-	if (counter->attr.config != event_config)
+	if (counter->attr.type != type)
+		return 0;
+	if (counter->attr.config != event)
 		return 0;
 
 	if (regs) {
@@ -3403,7 +3401,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (perf_event_id(&counter->attr)) {
+	switch (counter->attr.config) {
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
@@ -3496,12 +3494,12 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
 		goto done;
 
-	if (perf_event_raw(attr)) {
+	if (attr->type == PERF_TYPE_RAW) {
 		pmu = hw_perf_counter_init(counter);
 		goto done;
 	}
 
-	switch (perf_event_type(attr)) {
+	switch (attr->type) {
 	case PERF_TYPE_HARDWARE:
 		pmu = hw_perf_counter_init(counter);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 8326f44da090d6d304d29b9fdc7fb3e20889e329 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 5 Jun 2009 20:22:46 +0200
Subject: perf_counter: Implement generalized cache event types

Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.

This is a 3-dimensional space:

       { L1-D, L1-I, L2, ITLB, DTLB, BPU } x
       { load, store, prefetch } x
       { accesses, misses }

User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)

Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.

Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.

( x86 is supported for now, with the Nehalem event table filled in,
  and with Core2 and Atom having placeholder tables. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/perf_counter/util/parse-events.c | 104 ++++++++++++-
 arch/x86/kernel/cpu/perf_counter.c             | 201 ++++++++++++++++++++++++-
 include/linux/perf_counter.h                   |  34 +++++
 kernel/perf_counter.c                          |   1 +
 4 files changed, 329 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/Documentation/perf_counter/util/parse-events.c b/Documentation/perf_counter/util/parse-events.c
index eb56bd99657..de9a77c4715 100644
--- a/Documentation/perf_counter/util/parse-events.c
+++ b/Documentation/perf_counter/util/parse-events.c
@@ -6,6 +6,8 @@
 #include "exec_cmd.h"
 #include "string.h"
 
+extern char *strcasestr(const char *haystack, const char *needle);
+
 int					nr_counters;
 
 struct perf_counter_attr		attrs[MAX_COUNTERS];
@@ -17,6 +19,7 @@ struct event_symbol {
 };
 
 #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
+#define CR(x, y) .type = PERF_TYPE_##x, .config = y
 
 static struct event_symbol event_symbols[] = {
   { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
@@ -69,6 +72,28 @@ static char *sw_event_names[] = {
 	"major faults",
 };
 
+#define MAX_ALIASES 8
+
+static char *hw_cache [][MAX_ALIASES] = {
+	{ "l1-d" ,	"l1d" ,	"l1", "l1-data-cache"			},
+	{ "l1-i" ,	"l1i" ,	"l1-instruction-cache"		},
+	{ "l2"  , },
+	{ "dtlb", },
+	{ "itlb", },
+	{ "bpu" , "btb", "branch-cache", NULL },
+};
+
+static char *hw_cache_op [][MAX_ALIASES] = {
+	{ "read"	, "load" },
+	{ "write"	, "store" },
+	{ "prefetch"	, "speculative-read", "speculative-load" },
+};
+
+static char *hw_cache_result [][MAX_ALIASES] = {
+	{ "access", "ops" },
+	{ "miss", },
+};
+
 char *event_name(int counter)
 {
 	__u64 config = attrs[counter].config;
@@ -86,6 +111,30 @@ char *event_name(int counter)
 			return hw_event_names[config];
 		return "unknown-hardware";
 
+	case PERF_TYPE_HW_CACHE: {
+		__u8 cache_type, cache_op, cache_result;
+		static char name[100];
+
+		cache_type   = (config >>  0) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
+			return "unknown-ext-hardware-cache-type";
+
+		cache_op     = (config >>  8) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_OP_MAX)
+			return "unknown-ext-hardware-cache-op-type";
+
+		cache_result = (config >> 16) & 0xff;
+		if (cache_type > PERF_COUNT_HW_CACHE_RESULT_MAX)
+			return "unknown-ext-hardware-cache-result-type";
+
+		sprintf(name, "%s:%s:%s",
+			hw_cache[cache_type][0],
+			hw_cache_op[cache_op][0],
+			hw_cache_result[cache_result][0]);
+
+		return name;
+	}
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_SW_EVENTS_MAX)
 			return sw_event_names[config];
@@ -98,11 +147,60 @@ char *event_name(int counter)
 	return "unknown";
 }
 
+static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
+{
+	int i, j;
+
+	for (i = 0; i < size; i++) {
+		for (j = 0; j < MAX_ALIASES; j++) {
+			if (!names[i][j])
+				break;
+			if (strcasestr(str, names[i][j]))
+				return i;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
+{
+	__u8 cache_type = -1, cache_op = 0, cache_result = 0;
+
+	cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
+	/*
+	 * No fallback - if we cannot get a clear cache type
+	 * then bail out:
+	 */
+	if (cache_type == -1)
+		return -EINVAL;
+
+	cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
+	/*
+	 * Fall back to reads:
+	 */
+	if (cache_type == -1)
+		cache_type = PERF_COUNT_HW_CACHE_OP_READ;
+
+	cache_result = parse_aliases(str, hw_cache_result,
+					PERF_COUNT_HW_CACHE_RESULT_MAX);
+	/*
+	 * Fall back to accesses:
+	 */
+	if (cache_result == -1)
+		cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+	attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
+	attr->type = PERF_TYPE_HW_CACHE;
+
+	return 0;
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
  */
-static int match_event_symbols(const char *str, struct perf_counter_attr *attr)
+static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
 {
 	__u64 config, id;
 	int type;
@@ -147,7 +245,7 @@ static int match_event_symbols(const char *str, struct perf_counter_attr *attr)
 		}
 	}
 
-	return -EINVAL;
+	return parse_generic_hw_symbols(str, attr);
 }
 
 int parse_events(const struct option *opt, const char *str, int unset)
@@ -160,7 +258,7 @@ again:
 	if (nr_counters == MAX_COUNTERS)
 		return -1;
 
-	ret = match_event_symbols(str, &attr);
+	ret = parse_event_symbols(str, &attr);
 	if (ret < 0)
 		return ret;
 
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 430e048f285..e86679fa521 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -83,6 +83,128 @@ static u64 intel_pmu_event_map(int event)
 	return intel_perfmon_event_map[event];
 }
 
+/*
+ * Generalized hw caching related event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'event makes no sense on
+ * this CPU', any other value means the raw event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0480, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(L2  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES          */
+		[ C(RESULT_MISS)   ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS       */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+	/* To be filled in */
+};
+
+static const u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+	/* To be filled in */
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -246,6 +368,39 @@ static inline int x86_pmu_initialized(void)
 	return x86_pmu.handle_irq != NULL;
 }
 
+static inline int
+set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	u64 config, val;
+
+	config = attr->config;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+	if (val == 0)
+		return -ENOENT;
+
+	if (val == -1)
+		return -EINVAL;
+
+	hwc->config |= val;
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -288,22 +443,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->sample_period = x86_pmu.max_period;
 
 	atomic64_set(&hwc->period_left, hwc->sample_period);
+	counter->destroy = hw_perf_counter_destroy;
 
 	/*
 	 * Raw event type provide the config in the event structure
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
-	} else {
-		if (attr->config >= x86_pmu.max_events)
-			return -EINVAL;
-		/*
-		 * The generic map:
-		 */
-		hwc->config |= x86_pmu.event_map(attr->config);
+		return 0;
 	}
 
-	counter->destroy = hw_perf_counter_destroy;
+	if (attr->type == PERF_TYPE_HW_CACHE)
+		return set_ext_hw_attr(hwc, attr);
+
+	if (attr->config >= x86_pmu.max_events)
+		return -EINVAL;
+	/*
+	 * The generic map:
+	 */
+	hwc->config |= x86_pmu.event_map(attr->config);
 
 	return 0;
 }
@@ -989,6 +1147,33 @@ static int intel_pmu_init(void)
 
 	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
+	/*
+	 * Nehalem:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 17:
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
+			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+
+		pr_info("... installed Core2 event tables\n");
+		break;
+	default:
+	case 26:
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
+			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+
+		pr_info("... installed Nehalem/Corei7 event tables\n");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		sizeof(u64)*PERF_COUNT_HW_CACHE_MAX*
+			PERF_COUNT_HW_CACHE_OP_MAX*PERF_COUNT_HW_CACHE_RESULT_MAX);
+
+		pr_info("... installed Atom event tables\n");
+		break;
+	}
 	return 0;
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f794c69b34c..3586df840f6 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -28,6 +28,7 @@ enum perf_event_types {
 	PERF_TYPE_HARDWARE		= 0,
 	PERF_TYPE_SOFTWARE		= 1,
 	PERF_TYPE_TRACEPOINT		= 2,
+	PERF_TYPE_HW_CACHE		= 3,
 
 	/*
 	 * available TYPE space, raw is the max value.
@@ -55,6 +56,39 @@ enum attr_ids {
 	PERF_HW_EVENTS_MAX		= 7,
 };
 
+/*
+ * Generalized hardware cache counters:
+ *
+ *       { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D,
+	PERF_COUNT_HW_CACHE_L1I,
+	PERF_COUNT_HW_CACHE_L2,
+	PERF_COUNT_HW_CACHE_DTLB,
+	PERF_COUNT_HW_CACHE_ITLB,
+	PERF_COUNT_HW_CACHE_BPU,
+
+	PERF_COUNT_HW_CACHE_MAX,
+};
+
+enum hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ,
+	PERF_COUNT_HW_CACHE_OP_WRITE,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,
+};
+
+enum hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS,
+	PERF_COUNT_HW_CACHE_RESULT_MISS,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,
+};
+
 /*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 75ae76796df..5eacaaf3f9c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3501,6 +3501,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 
 	switch (attr->type) {
 	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
-- 
cgit v1.2.3-70-g09d2


From e5110d011e03030926872457f05e49e3d5031737 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 6 Jun 2009 18:35:27 +0200
Subject: firewire: add parent-of-unit accessor

Retrieval of an fw_unit's parent is a common pattern in high-level code.
Wrap it up as device = fw_parent_device(unit).

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c |  4 ++--
 drivers/firewire/sbp2.c        | 37 +++++++++++++++++++++----------------
 include/linux/firewire.h       |  5 +++++
 3 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 65d84dd6c1d..3f4e646367b 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -93,7 +93,7 @@ static int fw_unit_match(struct device *dev, struct device_driver *drv)
 	if (!is_fw_unit(dev))
 		return 0;
 
-	device = fw_device(unit->device.parent);
+	device = fw_parent_device(unit);
 	id = container_of(drv, struct fw_driver, driver)->id_table;
 
 	for (; id->match_flags != 0; id++) {
@@ -114,7 +114,7 @@ static int fw_unit_match(struct device *dev, struct device_driver *drv)
 
 static int get_modalias(struct fw_unit *unit, char *buffer, size_t buffer_size)
 {
-	struct fw_device *device = fw_device(unit->device.parent);
+	struct fw_device *device = fw_parent_device(unit);
 	struct fw_csr_iterator ci;
 
 	int key, value;
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d41cb6e455b..2353643721c 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -180,6 +180,11 @@ struct sbp2_target {
 	int blocked;	/* ditto */
 };
 
+static struct fw_device *target_device(struct sbp2_target *tgt)
+{
+	return fw_parent_device(tgt->unit);
+}
+
 /* Impossible login_id, to detect logout attempt before successful login */
 #define INVALID_LOGIN_ID 0x10000
 
@@ -488,7 +493,7 @@ static void complete_transaction(struct fw_card *card, int rcode,
 static void sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu,
 			  int node_id, int generation, u64 offset)
 {
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	unsigned long flags;
 
 	orb->pointer.high = 0;
@@ -510,7 +515,7 @@ static void sbp2_send_orb(struct sbp2_orb *orb, struct sbp2_logical_unit *lu,
 
 static int sbp2_cancel_orbs(struct sbp2_logical_unit *lu)
 {
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	struct sbp2_orb *orb, *next;
 	struct list_head list;
 	unsigned long flags;
@@ -548,7 +553,7 @@ static int sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 				    int generation, int function,
 				    int lun_or_login_id, void *response)
 {
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	struct sbp2_management_orb *orb;
 	unsigned int timeout;
 	int retval = -ENOMEM;
@@ -644,7 +649,7 @@ static int sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 
 static void sbp2_agent_reset(struct sbp2_logical_unit *lu)
 {
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	__be32 d = 0;
 
 	fw_run_transaction(device->card, TCODE_WRITE_QUADLET_REQUEST,
@@ -661,7 +666,7 @@ static void complete_agent_reset_write_no_wait(struct fw_card *card,
 
 static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu)
 {
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	struct fw_transaction *t;
 	static __be32 d;
 
@@ -700,7 +705,7 @@ static inline void sbp2_allow_block(struct sbp2_logical_unit *lu)
 static void sbp2_conditionally_block(struct sbp2_logical_unit *lu)
 {
 	struct sbp2_target *tgt = lu->tgt;
-	struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+	struct fw_card *card = target_device(tgt)->card;
 	struct Scsi_Host *shost =
 		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
 	unsigned long flags;
@@ -724,7 +729,7 @@ static void sbp2_conditionally_block(struct sbp2_logical_unit *lu)
 static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu)
 {
 	struct sbp2_target *tgt = lu->tgt;
-	struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+	struct fw_card *card = target_device(tgt)->card;
 	struct Scsi_Host *shost =
 		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
 	unsigned long flags;
@@ -749,7 +754,7 @@ static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu)
  */
 static void sbp2_unblock(struct sbp2_target *tgt)
 {
-	struct fw_card *card = fw_device(tgt->unit->device.parent)->card;
+	struct fw_card *card = target_device(tgt)->card;
 	struct Scsi_Host *shost =
 		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
 	unsigned long flags;
@@ -779,7 +784,7 @@ static void sbp2_release_target(struct kref *kref)
 	struct Scsi_Host *shost =
 		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
 	struct scsi_device *sdev;
-	struct fw_device *device = fw_device(tgt->unit->device.parent);
+	struct fw_device *device = target_device(tgt);
 
 	/* prevent deadlocks */
 	sbp2_unblock(tgt);
@@ -852,7 +857,7 @@ static void sbp2_queue_work(struct sbp2_logical_unit *lu, unsigned long delay)
  */
 static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu)
 {
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	__be32 d = cpu_to_be32(SBP2_CYCLE_LIMIT | SBP2_RETRY_LIMIT);
 
 	fw_run_transaction(device->card, TCODE_WRITE_QUADLET_REQUEST,
@@ -868,7 +873,7 @@ static void sbp2_login(struct work_struct *work)
 	struct sbp2_logical_unit *lu =
 		container_of(work, struct sbp2_logical_unit, work.work);
 	struct sbp2_target *tgt = lu->tgt;
-	struct fw_device *device = fw_device(tgt->unit->device.parent);
+	struct fw_device *device = target_device(tgt);
 	struct Scsi_Host *shost;
 	struct scsi_device *sdev;
 	struct sbp2_login_response response;
@@ -1116,7 +1121,7 @@ static struct scsi_host_template scsi_driver_template;
 static int sbp2_probe(struct device *dev)
 {
 	struct fw_unit *unit = fw_unit(dev);
-	struct fw_device *device = fw_device(unit->device.parent);
+	struct fw_device *device = fw_parent_device(unit);
 	struct sbp2_target *tgt;
 	struct sbp2_logical_unit *lu;
 	struct Scsi_Host *shost;
@@ -1197,7 +1202,7 @@ static void sbp2_reconnect(struct work_struct *work)
 	struct sbp2_logical_unit *lu =
 		container_of(work, struct sbp2_logical_unit, work.work);
 	struct sbp2_target *tgt = lu->tgt;
-	struct fw_device *device = fw_device(tgt->unit->device.parent);
+	struct fw_device *device = target_device(tgt);
 	int generation, node_id, local_node_id;
 
 	if (fw_device_is_shutdown(device))
@@ -1249,7 +1254,7 @@ static void sbp2_update(struct fw_unit *unit)
 	struct sbp2_target *tgt = unit->device.driver_data;
 	struct sbp2_logical_unit *lu;
 
-	fw_device_enable_phys_dma(fw_device(unit->device.parent));
+	fw_device_enable_phys_dma(fw_parent_device(unit));
 
 	/*
 	 * Fw-core serializes sbp2_update() against sbp2_remove().
@@ -1342,7 +1347,7 @@ static void complete_command_orb(struct sbp2_orb *base_orb,
 {
 	struct sbp2_command_orb *orb =
 		container_of(base_orb, struct sbp2_command_orb, base);
-	struct fw_device *device = fw_device(orb->lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(orb->lu->tgt);
 	int result;
 
 	if (status != NULL) {
@@ -1449,7 +1454,7 @@ static int sbp2_map_scatterlist(struct sbp2_command_orb *orb,
 static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
 {
 	struct sbp2_logical_unit *lu = cmd->device->hostdata;
-	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+	struct fw_device *device = target_device(lu->tgt);
 	struct sbp2_command_orb *orb;
 	int generation, retval = SCSI_MLQUEUE_HOST_BUSY;
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index e979f9b22cb..a69aea0394e 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -248,6 +248,11 @@ static inline void fw_unit_put(struct fw_unit *unit)
 	put_device(&unit->device);
 }
 
+static inline struct fw_device *fw_parent_device(struct fw_unit *unit)
+{
+	return fw_device(unit->device.parent);
+}
+
 struct ieee1394_device_id;
 
 struct fw_driver {
-- 
cgit v1.2.3-70-g09d2


From a93958ac980f0ce594ad90657ecbc595ff157a40 Mon Sep 17 00:00:00 2001
From: Ayaz Abdulla <aabdulla@nvidia.com>
Date: Sun, 7 Jun 2009 03:54:37 -0700
Subject: removal of forcedeth device ids

This patch removes the forcedeth device ids from pci_ids.h

The forcedeth driver uses the device id constants directly in its source
file.

[ Need to keep PCI_DEVICE_ID_NVIDIA_NVENET_15 in order to keep
  drivers/pci/quirks.c building -DaveM ]

Signed-off-by: Ayaz Abdulla <aabdulla@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pci_ids.h | 38 --------------------------------------
 1 file changed, 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 12db06cf0e2..b87c51aea14 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1066,8 +1066,6 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS	0x0034
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE	0x0035
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA	0x0036
-#define PCI_DEVICE_ID_NVIDIA_NVENET_10		0x0037
-#define PCI_DEVICE_ID_NVIDIA_NVENET_11		0x0038
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2	0x003e
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800_ULTRA 0x0040
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_6800       0x0041
@@ -1078,21 +1076,16 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE	0x0053
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA	0x0054
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2	0x0055
-#define PCI_DEVICE_ID_NVIDIA_NVENET_8		0x0056
-#define PCI_DEVICE_ID_NVIDIA_NVENET_9		0x0057
 #define PCI_DEVICE_ID_NVIDIA_CK804_AUDIO	0x0059
 #define PCI_DEVICE_ID_NVIDIA_CK804_PCIE		0x005d
 #define PCI_DEVICE_ID_NVIDIA_NFORCE2_SMBUS	0x0064
 #define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE	0x0065
-#define PCI_DEVICE_ID_NVIDIA_NVENET_2		0x0066
 #define PCI_DEVICE_ID_NVIDIA_MCP2_MODEM		0x0069
 #define PCI_DEVICE_ID_NVIDIA_MCP2_AUDIO		0x006a
 #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SMBUS	0x0084
 #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE	0x0085
-#define PCI_DEVICE_ID_NVIDIA_NVENET_4		0x0086
 #define PCI_DEVICE_ID_NVIDIA_MCP2S_MODEM	0x0089
 #define PCI_DEVICE_ID_NVIDIA_CK8_AUDIO		0x008a
-#define PCI_DEVICE_ID_NVIDIA_NVENET_5		0x008c
 #define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA	0x008e
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GT   0x0090
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_7800_GTX	0x0091
@@ -1108,15 +1101,12 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3		0x00d1
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS	0x00d4
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE	0x00d5
-#define PCI_DEVICE_ID_NVIDIA_NVENET_3		0x00d6
 #define PCI_DEVICE_ID_NVIDIA_MCP3_MODEM		0x00d9
 #define PCI_DEVICE_ID_NVIDIA_MCP3_AUDIO		0x00da
-#define PCI_DEVICE_ID_NVIDIA_NVENET_7		0x00df
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S		0x00e1
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA	0x00e3
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SMBUS	0x00e4
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE	0x00e5
-#define PCI_DEVICE_ID_NVIDIA_NVENET_6		0x00e6
 #define PCI_DEVICE_ID_NVIDIA_CK8S_AUDIO		0x00ea
 #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2	0x00ee
 #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1 0x00f0
@@ -1176,7 +1166,6 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_SMBUS	0x01b4
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE		0x01bc
 #define PCI_DEVICE_ID_NVIDIA_MCP1_MODEM		0x01c1
-#define PCI_DEVICE_ID_NVIDIA_NVENET_1		0x01c3
 #define PCI_DEVICE_ID_NVIDIA_NFORCE2		0x01e0
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE3		0x0200
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1		0x0201
@@ -1199,8 +1188,6 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE	0x036E
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA	0x037E
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2	0x037F
-#define PCI_DEVICE_ID_NVIDIA_NVENET_12		0x0268
-#define PCI_DEVICE_ID_NVIDIA_NVENET_13		0x0269
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800	0x0280
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X    0x0281
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE     0x0282
@@ -1247,46 +1234,21 @@
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_FX_GO5700_2    0x0348
 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_GO1000       0x034C
 #define PCI_DEVICE_ID_NVIDIA_QUADRO_FX_1100         0x034E
-#define PCI_DEVICE_ID_NVIDIA_NVENET_14              0x0372
 #define PCI_DEVICE_ID_NVIDIA_NVENET_15              0x0373
-#define PCI_DEVICE_ID_NVIDIA_NVENET_16              0x03E5
-#define PCI_DEVICE_ID_NVIDIA_NVENET_17              0x03E6
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA      0x03E7
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SMBUS	    0x03EB
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE       0x03EC
-#define PCI_DEVICE_ID_NVIDIA_NVENET_18              0x03EE
-#define PCI_DEVICE_ID_NVIDIA_NVENET_19              0x03EF
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2     0x03F6
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3     0x03F7
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_SMBUS	    0x0446
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE	    0x0448
-#define PCI_DEVICE_ID_NVIDIA_NVENET_20              0x0450
-#define PCI_DEVICE_ID_NVIDIA_NVENET_21              0x0451
-#define PCI_DEVICE_ID_NVIDIA_NVENET_22              0x0452
-#define PCI_DEVICE_ID_NVIDIA_NVENET_23              0x0453
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_SMBUS     0x0542
-#define PCI_DEVICE_ID_NVIDIA_NVENET_24              0x054C
-#define PCI_DEVICE_ID_NVIDIA_NVENET_25              0x054D
-#define PCI_DEVICE_ID_NVIDIA_NVENET_26              0x054E
-#define PCI_DEVICE_ID_NVIDIA_NVENET_27              0x054F
-#define PCI_DEVICE_ID_NVIDIA_NVENET_28              0x07DC
-#define PCI_DEVICE_ID_NVIDIA_NVENET_29              0x07DD
-#define PCI_DEVICE_ID_NVIDIA_NVENET_30              0x07DE
-#define PCI_DEVICE_ID_NVIDIA_NVENET_31              0x07DF
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE       0x0560
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE       0x056C
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP78S_SMBUS    0x0752
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE       0x0759
-#define PCI_DEVICE_ID_NVIDIA_NVENET_32              0x0760
-#define PCI_DEVICE_ID_NVIDIA_NVENET_33              0x0761
-#define PCI_DEVICE_ID_NVIDIA_NVENET_34              0x0762
-#define PCI_DEVICE_ID_NVIDIA_NVENET_35              0x0763
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_SMBUS     0x07D8
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP79_SMBUS     0x0AA2
-#define PCI_DEVICE_ID_NVIDIA_NVENET_36              0x0AB0
-#define PCI_DEVICE_ID_NVIDIA_NVENET_37              0x0AB1
-#define PCI_DEVICE_ID_NVIDIA_NVENET_38              0x0AB2
-#define PCI_DEVICE_ID_NVIDIA_NVENET_39              0x0AB3
 
 #define PCI_VENDOR_ID_IMS		0x10e0
 #define PCI_DEVICE_ID_IMS_TT128		0x9128
-- 
cgit v1.2.3-70-g09d2


From db429e9ec0f9dee2d8e50c154f04f29f880fc9d6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:52 +0200
Subject: partitions: add ->set_capacity block device method

* Add ->set_capacity block device method and use it in rescan_partitions()
  to attempt enabling native capacity of the device upon detecting the
  partition which exceeds device capacity.

* Add GENHD_FL_NATIVE_CAPACITY flag to try limit attempts of enabling
  native capacity during partition scan.

Together with the consecutive patch implementing ->set_capacity method in
ide-gd device driver this allows automatic disabling of Host Protected Area
(HPA) if any partitions overlapping HPA are detected.

Cc: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 fs/partitions/check.c  | 43 ++++++++++++++++++++++++++++++++-----------
 include/linux/blkdev.h |  2 ++
 include/linux/genhd.h  |  1 +
 3 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 137a708bb21..4bc2c43fa08 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -546,28 +546,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 
 	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
-		sector_t size = state->parts[p].size;
-		sector_t from = state->parts[p].from;
+		sector_t size, from;
+try_scan:
+		size = state->parts[p].size;
 		if (!size)
 			continue;
+
+		from = state->parts[p].from;
 		if (from >= get_capacity(disk)) {
 			printk(KERN_WARNING
 			       "%s: p%d ignored, start %llu is behind the end of the disk\n",
 			       disk->disk_name, p, (unsigned long long) from);
 			continue;
 		}
+
 		if (from + size > get_capacity(disk)) {
-			/*
-			 * we can not ignore partitions of broken tables
-			 * created by for example camera firmware, but we
-			 * limit them to the end of the disk to avoid
-			 * creating invalid block devices
-			 */
+			struct block_device_operations *bdops = disk->fops;
+			unsigned long long capacity;
+
 			printk(KERN_WARNING
-			       "%s: p%d size %llu exceeds device capacity, "
-			       "limited to end of disk\n",
+			       "%s: p%d size %llu exceeds device capacity, ",
 			       disk->disk_name, p, (unsigned long long) size);
-			size = get_capacity(disk) - from;
+
+			if (bdops->set_capacity &&
+			    (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
+				printk(KERN_CONT "enabling native capacity\n");
+				capacity = bdops->set_capacity(disk, ~0ULL);
+				disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+				if (capacity > get_capacity(disk)) {
+					set_capacity(disk, capacity);
+					check_disk_size_change(disk, bdev);
+					bdev->bd_invalidated = 0;
+				}
+				goto try_scan;
+			} else {
+				/*
+				 * we can not ignore partitions of broken tables
+				 * created by for example camera firmware, but
+				 * we limit them to the end of the disk to avoid
+				 * creating invalid block devices
+				 */
+				printk(KERN_CONT "limited to end of disk\n");
+				size = get_capacity(disk) - from;
+			}
 		}
 		part = add_partition(disk, p, from, size,
 				     state->parts[p].flags);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6f841fb1be3..a2d7298be35 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1106,6 +1106,8 @@ struct block_device_operations {
 	int (*direct_access) (struct block_device *, sector_t,
 						void **, unsigned long *);
 	int (*media_changed) (struct gendisk *);
+	unsigned long long (*set_capacity) (struct gendisk *,
+						unsigned long long);
 	int (*revalidate_disk) (struct gendisk *);
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	struct module *owner;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 634c53028fb..239e24b081a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -113,6 +113,7 @@ struct hd_struct {
 #define GENHD_FL_UP				16
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
+#define GENHD_FL_NATIVE_CAPACITY		128
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-- 
cgit v1.2.3-70-g09d2


From e957b60d1583022a0f7c03267d37fcae2ddb78b1 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:52 +0200
Subject: ide-gd: implement block device ->set_capacity method (v2)

* Use ->probed_capacity to store native device capacity for ATA disks.

* Add ->set_capacity method to struct ide_disk_ops.

* Implement disk device ->set_capacity method for ATA disks.

* Implement block device ->set_capacity method.

v2:
* Check if LBA and HPA are supported in ide_disk_set_capacity().

* According to the spec the SET MAX ADDRESS command shall be
  immediately preceded by a READ NATIVE MAX ADDRESS command.

* Add ide_disk_hpa_{get_native,set}_capacity() helpers.

Together with the previous patch adding ->set_capacity block device
method this allows automatic disabling of Host Protected Area (HPA)
if any partitions overlapping HPA are detected.

Cc: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-disk.c | 67 +++++++++++++++++++++++++++++++++++++++++---------
 drivers/ide/ide-gd.c   | 14 +++++++++++
 include/linux/ide.h    |  4 +--
 3 files changed, 72 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index a9fbe2c3121..61a6d354622 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -302,14 +302,12 @@ static const struct drive_list_entry hpa_list[] = {
 	{ NULL,		NULL }
 };
 
-static void idedisk_check_hpa(ide_drive_t *drive)
+static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
 {
-	unsigned long long capacity, set_max;
-	int lba48 = ata_id_lba48_enabled(drive->id);
+	u64 capacity, set_max;
 
 	capacity = drive->capacity64;
-
-	set_max = idedisk_read_native_max_address(drive, lba48);
+	set_max  = idedisk_read_native_max_address(drive, lba48);
 
 	if (ide_in_drive_list(drive->id, hpa_list)) {
 		/*
@@ -320,9 +318,31 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			set_max--;
 	}
 
+	return set_max;
+}
+
+static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
+{
+	set_max = idedisk_set_max_address(drive, set_max, lba48);
+	if (set_max)
+		drive->capacity64 = set_max;
+
+	return set_max;
+}
+
+static void idedisk_check_hpa(ide_drive_t *drive)
+{
+	u64 capacity, set_max;
+	int lba48 = ata_id_lba48_enabled(drive->id);
+
+	capacity = drive->capacity64;
+	set_max  = ide_disk_hpa_get_native_capacity(drive, lba48);
+
 	if (set_max <= capacity)
 		return;
 
+	drive->probed_capacity = set_max;
+
 	printk(KERN_INFO "%s: Host Protected Area detected.\n"
 			 "\tcurrent capacity is %llu sectors (%llu MB)\n"
 			 "\tnative  capacity is %llu sectors (%llu MB)\n",
@@ -330,13 +350,10 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			 capacity, sectors_to_MB(capacity),
 			 set_max, sectors_to_MB(set_max));
 
-	set_max = idedisk_set_max_address(drive, set_max, lba48);
-
-	if (set_max) {
-		drive->capacity64 = set_max;
+	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
+	if (set_max)
 		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
 				 drive->name);
-	}
 }
 
 static int ide_disk_get_capacity(ide_drive_t *drive)
@@ -358,6 +375,8 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 		drive->capacity64 = drive->cyl * drive->head * drive->sect;
 	}
 
+	drive->probed_capacity = drive->capacity64;
+
 	if (lba) {
 		drive->dev_flags |= IDE_DFLAG_LBA;
 
@@ -376,7 +395,7 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 		       "%llu sectors (%llu MB)\n",
 		       drive->name, (unsigned long long)drive->capacity64,
 		       sectors_to_MB(drive->capacity64));
-		drive->capacity64 = 1ULL << 28;
+		drive->probed_capacity = drive->capacity64 = 1ULL << 28;
 	}
 
 	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
@@ -392,6 +411,31 @@ static int ide_disk_get_capacity(ide_drive_t *drive)
 	return 0;
 }
 
+static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
+{
+	u64 set = min(capacity, drive->probed_capacity);
+	u16 *id = drive->id;
+	int lba48 = ata_id_lba48_enabled(id);
+
+	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
+	    ata_id_hpa_enabled(id) == 0)
+		goto out;
+
+	/*
+	 * according to the spec the SET MAX ADDRESS command shall be
+	 * immediately preceded by a READ NATIVE MAX ADDRESS command
+	 */
+	capacity = ide_disk_hpa_get_native_capacity(drive, lba48);
+	if (capacity == 0)
+		goto out;
+
+	set = ide_disk_hpa_set_capacity(drive, set, lba48);
+	if (set)
+		return set;
+out:
+	return drive->capacity64;
+}
+
 static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
@@ -741,6 +785,7 @@ static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
 
 const struct ide_disk_ops ide_ata_disk_ops = {
 	.check		= ide_disk_check,
+	.set_capacity	= ide_disk_set_capacity,
 	.get_capacity	= ide_disk_get_capacity,
 	.setup		= ide_disk_setup,
 	.flush		= ide_disk_flush,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 4b6b71e2cdf..214119026b3 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -287,6 +287,19 @@ static int ide_gd_media_changed(struct gendisk *disk)
 	return ret;
 }
 
+static unsigned long long ide_gd_set_capacity(struct gendisk *disk,
+					      unsigned long long capacity)
+{
+	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
+	ide_drive_t *drive = idkp->drive;
+	const struct ide_disk_ops *disk_ops = drive->disk_ops;
+
+	if (disk_ops->set_capacity)
+		return disk_ops->set_capacity(drive, capacity);
+
+	return drive->capacity64;
+}
+
 static int ide_gd_revalidate_disk(struct gendisk *disk)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -315,6 +328,7 @@ static struct block_device_operations ide_gd_ops = {
 	.locked_ioctl		= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
+	.set_capacity		= ide_gd_set_capacity,
 	.revalidate_disk	= ide_gd_revalidate_disk
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9fed365a598..e96ace12872 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -397,6 +397,7 @@ struct ide_drive_s;
 struct ide_disk_ops {
 	int		(*check)(struct ide_drive_s *, const char *);
 	int		(*get_capacity)(struct ide_drive_s *);
+	u64		(*set_capacity)(struct ide_drive_s *, u64);
 	void		(*setup)(struct ide_drive_s *);
 	void		(*flush)(struct ide_drive_s *);
 	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
@@ -568,8 +569,7 @@ struct ide_drive_s {
 	unsigned int	drive_data;	/* used by set_pio_mode/dev_select() */
 	unsigned int	failures;	/* current failure count */
 	unsigned int	max_failures;	/* maximum allowed failure count */
-	u64		probed_capacity;/* initial reported media capacity (ide-cd only currently) */
-
+	u64		probed_capacity;/* initial/native media capacity */
 	u64		capacity64;	/* total number of sectors */
 
 	int		lun;		/* logical unit */
-- 
cgit v1.2.3-70-g09d2


From 075affcbe01d4d7cefcd0e30a98df1253bcf8d92 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 13:52:52 +0200
Subject: ide: preserve Host Protected Area by default (v2)

From the perspective of most users of recent systems, disabling Host
Protected Area (HPA) can break vendor RAID formats, GPT partitions and
risks corrupting firmware or overwriting vendor system recovery tools.

Unfortunately the original (kernels < 2.6.30) behavior (unconditionally
disabling HPA and using full disk capacity) was introduced at the time
when the main use of HPA was to make the drive look small enough for the
BIOS to allow the system to boot with large capacity drives.

Thus to allow the maximum compatibility with the existing setups (using
HPA and partitioned with HPA disabled) we automically disable HPA if
any partitions overlapping HPA are detected.  Additionally HPA can also
be disabled using the "nohpa" module parameter (i.e. "ide_core.nohpa=0.0"
to disable HPA on /dev/hda).

v2:
Fix ->resume HPA support.

While at it:
- remove stale "idebus=" entry from Documentation/kernel-parameters.txt

Cc: Robert Hancock <hancockrwd@gmail.com>
Cc: Frans Pop <elendil@planet.nl>
Cc: "Andries E. Brouwer" <Andries.Brouwer@cwi.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
[patch description was based on input from Alan Cox and Frans Pop]
Emphatically-Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 Documentation/ide/ide.txt           |  2 ++
 Documentation/kernel-parameters.txt |  7 ++-----
 drivers/ide/ide-disk.c              |  8 +++++++-
 drivers/ide/ide.c                   | 10 ++++++++++
 include/linux/ide.h                 |  2 ++
 5 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
index 0c78f4b1d9d..e77bebfa7b0 100644
--- a/Documentation/ide/ide.txt
+++ b/Documentation/ide/ide.txt
@@ -216,6 +216,8 @@ Other kernel parameters for ide_core are:
 
 * "noflush=[interface_number.device_number]" to disable flush requests
 
+* "nohpa=[interface_number.device_number]" to disable Host Protected Area
+
 * "noprobe=[interface_number.device_number]" to skip probing
 
 * "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a19f021f081..e58c91ca802 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -835,11 +835,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	ide-core.nodma=	[HW] (E)IDE subsystem
 			Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
-			.vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom
-			.chs .ignore_cable are additional options
-			See Documentation/ide/ide.txt.
-
-	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
+			.vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
+			.cdrom .chs .ignore_cable are additional options
 			See Documentation/ide/ide.txt.
 
 	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 61a6d354622..3d92c9d54d4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -350,6 +350,9 @@ static void idedisk_check_hpa(ide_drive_t *drive)
 			 capacity, sectors_to_MB(capacity),
 			 set_max, sectors_to_MB(set_max));
 
+	if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
+		return;
+
 	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
 	if (set_max)
 		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
@@ -430,8 +433,11 @@ static u64 ide_disk_set_capacity(ide_drive_t *drive, u64 capacity)
 		goto out;
 
 	set = ide_disk_hpa_set_capacity(drive, set, lba48);
-	if (set)
+	if (set) {
+		/* needed for ->resume to disable HPA */
+		drive->dev_flags |= IDE_DFLAG_NOHPA;
 		return set;
+	}
 out:
 	return drive->capacity64;
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 92c9b90931e..16d056939f9 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -211,6 +211,11 @@ static unsigned int ide_noflush;
 module_param_call(noflush, ide_set_dev_param_mask, NULL, &ide_noflush, 0);
 MODULE_PARM_DESC(noflush, "disable flush requests for a device");
 
+static unsigned int ide_nohpa;
+
+module_param_call(nohpa, ide_set_dev_param_mask, NULL, &ide_nohpa, 0);
+MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
+
 static unsigned int ide_noprobe;
 
 module_param_call(noprobe, ide_set_dev_param_mask, NULL, &ide_noprobe, 0);
@@ -281,6 +286,11 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
 				 drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOFLUSH;
 	}
+	if (ide_nohpa & (1 << i)) {
+		printk(KERN_INFO "ide: disabling Host Protected Area for %s\n",
+				 drive->name);
+		drive->dev_flags |= IDE_DFLAG_NOHPA;
+	}
 	if (ide_noprobe & (1 << i)) {
 		printk(KERN_INFO "ide: skipping probe for %s\n", drive->name);
 		drive->dev_flags |= IDE_DFLAG_NOPROBE;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e96ace12872..45dce3b4c88 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -475,6 +475,8 @@ enum {
 	IDE_DFLAG_NICE1			= (1 << 5),
 	/* device is physically present */
 	IDE_DFLAG_PRESENT		= (1 << 6),
+	/* disable Host Protected Area */
+	IDE_DFLAG_NOHPA			= (1 << 7),
 	/* id read from device (synthetic if not set) */
 	IDE_DFLAG_ID_READ		= (1 << 8),
 	IDE_DFLAG_NOPROBE		= (1 << 9),
-- 
cgit v1.2.3-70-g09d2


From 8bc1e5aa06a2a9a425c4a6795fc564cba1521487 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:09 +0200
Subject: ide: respect quirk_drives[] list on all controllers

* Add ide_check_nien_quirk_list() helper to the core code
  and then use it in ide_port_tune_devices().

* Remove no longer needed ->quirkproc methods from hpt366.c
  and pdc202xx_{new,old}.c.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/hpt366.c       | 27 ---------------------------
 drivers/ide/ide-iops.c     | 25 +++++++++++++++++++++++++
 drivers/ide/ide-probe.c    |  2 ++
 drivers/ide/pdc202xx_new.c | 26 --------------------------
 drivers/ide/pdc202xx_old.c | 27 ---------------------------
 include/linux/ide.h        |  1 +
 6 files changed, 28 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index cb04523e31c..a2e9f6c65a9 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -138,18 +138,6 @@
 #undef	HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
 
-static const char *quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static const char *bad_ata100_5[] = {
 	"IBM-DTLA-307075",
 	"IBM-DTLA-307060",
@@ -733,20 +721,6 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
 }
 
-static void hpt3xx_quirkproc(ide_drive_t *drive)
-{
-	char *m			= (char *)&drive->id[ATA_ID_PROD];
-	const  char **list	= quirk_drives;
-
-	while (*list)
-		if (strstr(m, *list++)) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
 	ide_hwif_t *hwif	= drive->hwif;
@@ -1408,7 +1382,6 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
 static const struct ide_port_ops hpt3xx_port_ops = {
 	.set_pio_mode		= hpt3xx_set_pio_mode,
 	.set_dma_mode		= hpt3xx_set_mode,
-	.quirkproc		= hpt3xx_quirkproc,
 	.maskproc		= hpt3xx_maskproc,
 	.mdma_filter		= hpt3xx_mdma_filter,
 	.udma_filter		= hpt3xx_udma_filter,
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 8dff623f9da..c55349537c2 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -282,6 +282,31 @@ no_80w:
 	return 0;
 }
 
+static const char *nien_quirk_list[] = {
+	"QUANTUM FIREBALLlct08 08",
+	"QUANTUM FIREBALLP KA6.4",
+	"QUANTUM FIREBALLP KA9.1",
+	"QUANTUM FIREBALLP KX13.6",
+	"QUANTUM FIREBALLP KX20.5",
+	"QUANTUM FIREBALLP KX27.3",
+	"QUANTUM FIREBALLP LM20.4",
+	"QUANTUM FIREBALLP LM20.5",
+	NULL
+};
+
+void ide_check_nien_quirk_list(ide_drive_t *drive)
+{
+	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
+
+	for (list = nien_quirk_list; *list != NULL; list++)
+		if (strstr(m, *list) != NULL) {
+			drive->quirk_list = 2;
+			return;
+		}
+
+	drive->quirk_list = 0;
+}
+
 int ide_driveid_update(ide_drive_t *drive)
 {
 	u16 *id;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 89574b0bd56..28f95cb41c2 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -732,6 +732,8 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	int i;
 
 	ide_port_for_each_present_dev(i, drive, hwif) {
+		ide_check_nien_quirk_list(drive);
+
 		if (port_ops && port_ops->quirkproc)
 			port_ops->quirkproc(drive);
 	}
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index b68906c3c17..65ba8239e7b 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -40,18 +40,6 @@
 #define DBG(fmt, args...)
 #endif
 
-static const char *pdc_quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static u8 max_dma_rate(struct pci_dev *pdev)
 {
 	u8 mode;
@@ -200,19 +188,6 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
 		return ATA_CBL_PATA80;
 }
 
-static void pdcnew_quirkproc(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = pdc_quirk_drives; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void pdcnew_reset(ide_drive_t *drive)
 {
 	/*
@@ -473,7 +448,6 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
 static const struct ide_port_ops pdcnew_port_ops = {
 	.set_pio_mode		= pdcnew_set_pio_mode,
 	.set_dma_mode		= pdcnew_set_dma_mode,
-	.quirkproc		= pdcnew_quirkproc,
 	.resetproc		= pdcnew_reset,
 	.cable_detect		= pdcnew_cable_detect,
 };
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 4980dd7b2e2..fe01db679a3 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -23,18 +23,6 @@
 
 #define PDC202XX_DEBUG_DRIVE_INFO	0
 
-static const char *pdc_quirk_drives[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.5",
-	NULL
-};
-
 static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
 
 static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
@@ -151,19 +139,6 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif)
 	outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
 }
 
-static void pdc202xx_quirkproc(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = pdc_quirk_drives; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
-			return;
-		}
-
-	drive->quirk_list = 0;
-}
-
 static void pdc202xx_dma_start(ide_drive_t *drive)
 {
 	if (drive->current_speed > XFER_UDMA_2)
@@ -256,13 +231,11 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
 static const struct ide_port_ops pdc20246_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
-	.quirkproc		= pdc202xx_quirkproc,
 };
 
 static const struct ide_port_ops pdc2026x_port_ops = {
 	.set_pio_mode		= pdc202xx_set_pio_mode,
 	.set_dma_mode		= pdc202xx_set_mode,
-	.quirkproc		= pdc202xx_quirkproc,
 	.cable_detect		= pdc2026x_cable_detect,
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c8f7b967371..6caaae0c774 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1453,6 +1453,7 @@ static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
 void ide_register_region(struct gendisk *);
 void ide_unregister_region(struct gendisk *);
 
+void ide_check_nien_quirk_list(ide_drive_t *);
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
-- 
cgit v1.2.3-70-g09d2


From 734affdcae20af4fec95e46a64fb29f063a15c19 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Sun, 7 Jun 2009 15:37:10 +0200
Subject: ide: add IDE_DFLAG_NIEN_QUIRK device flag

Add IDE_DFLAG_NIEN_QUIRK device flag and use it instead of
drive->quirk_list.

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/hpt366.c   | 2 +-
 drivers/ide/ide-eh.c   | 5 +++--
 drivers/ide/ide-io.c   | 8 ++++++--
 drivers/ide/ide-iops.c | 6 ++----
 include/linux/ide.h    | 2 +-
 5 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index a2e9f6c65a9..7ce68ef6b90 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -727,7 +727,7 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
-	if (drive->quirk_list == 0)
+	if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
 		return;
 
 	if (info->chip_type >= HPT370) {
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 39d589254d4..2b914197961 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -407,8 +407,9 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 	/* more than enough time */
 	udelay(10);
 	/* clear SRST, leave nIEN (unless device is on the quirk list) */
-	tp_ops->write_devctl(hwif, (drive->quirk_list == 2 ? 0 : ATA_NIEN) |
-			     ATA_DEVCTL_OBS);
+	tp_ops->write_devctl(hwif,
+		((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) |
+		 ATA_DEVCTL_OBS);
 	/* more than enough time */
 	udelay(10);
 	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9654bd34cf5..243cf6561e7 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -488,11 +488,15 @@ repeat:
 
 		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
 		    hwif != prev_port) {
+			ide_drive_t *cur_dev =
+				prev_port ? prev_port->cur_dev : NULL;
+
 			/*
 			 * set nIEN for previous port, drives in the
-			 * quirk_list may not like intr setups/cleanups
+			 * quirk list may not like intr setups/cleanups
 			 */
-			if (prev_port && prev_port->cur_dev->quirk_list == 0)
+			if (cur_dev &&
+			    (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
 				prev_port->tp_ops->write_devctl(prev_port,
 								ATA_NIEN |
 								ATA_DEVCTL_OBS);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c55349537c2..fa047150a1c 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -300,11 +300,9 @@ void ide_check_nien_quirk_list(ide_drive_t *drive)
 
 	for (list = nien_quirk_list; *list != NULL; list++)
 		if (strstr(m, *list) != NULL) {
-			drive->quirk_list = 2;
+			drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK;
 			return;
 		}
-
-	drive->quirk_list = 0;
 }
 
 int ide_driveid_update(ide_drive_t *drive)
@@ -389,7 +387,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 
 	tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
 
-	if (drive->quirk_list == 2)
+	if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK)
 		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
 
 	error = __ide_wait_stat(drive, drive->ready_stat,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6caaae0c774..a6c6a2fad7c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -499,6 +499,7 @@ enum {
 	/* write protect */
 	IDE_DFLAG_WP			= (1 << 29),
 	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
+	IDE_DFLAG_NIEN_QUIRK		= (1 << 31),
 };
 
 struct ide_drive_s {
@@ -530,7 +531,6 @@ struct ide_drive_s {
 	u8	waiting_for_dma;	/* dma currently in progress */
 	u8	dma;			/* atapi dma flag */
 
-        u8	quirk_list;	/* considered quirky, set for a specific host */
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
-- 
cgit v1.2.3-70-g09d2


From eae3f29cc73f83cc3f1891d3ad40021b5172c630 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 5 Jun 2009 04:03:35 +0000
Subject: net: num_dma_maps is not used

Get rid of num_dma_maps in struct skb_shared_info, as it seems unused.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 ---
 net/core/skb_dma_map.c | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7305da92be8..7485058125e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -194,9 +194,6 @@ struct skb_shared_info {
 	unsigned short  gso_type;
 	__be32          ip6_frag_id;
 	union skb_shared_tx tx_flags;
-#ifdef CONFIG_HAS_DMA
-	unsigned int	num_dma_maps;
-#endif
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
 	skb_frag_t	frags[MAX_SKB_FRAGS];
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 86234923a3b..7adb623ef66 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -30,7 +30,6 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
 			goto unwind;
 		sp->dma_maps[i + 1] = map;
 	}
-	sp->num_dma_maps = i + 1;
 
 	return 0;
 
-- 
cgit v1.2.3-70-g09d2


From 042a53a9e437feaf2230dd2cadcecfae9c7bfe05 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 5 Jun 2009 04:04:16 +0000
Subject: net: skb_shared_info optimization

skb_dma_unmap() is quite expensive for small packets,
because we use two different cache lines from skb_shared_info.

One to access nr_frags, one to access dma_maps[0]

Instead of dma_maps being an array of MAX_SKB_FRAGS + 1 elements,
let dma_head alone in a new dma_head field, close to nr_frags,
to reduce cache lines misses.

Tested on my dev machine (bnx2 & tg3 adapters), nice speedup !

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c             |  6 +++---
 drivers/net/e1000/e1000_main.c |  4 ++--
 drivers/net/e1000e/netdev.c    |  4 ++--
 drivers/net/igb/igb_main.c     |  5 ++---
 drivers/net/igbvf/netdev.c     |  5 ++---
 drivers/net/ixgb/ixgb_main.c   |  4 ++--
 drivers/net/ixgbe/ixgbe_main.c |  4 ++--
 drivers/net/tg3.c              | 10 +++++-----
 include/linux/skbuff.h         |  5 ++++-
 net/core/skb_dma_map.c         | 12 ++++++------
 10 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index f53017250e0..391d2d47089 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -5487,7 +5487,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
 		dev_kfree_skb(skb);
 		return -EIO;
 	}
-	map = skb_shinfo(skb)->dma_maps[0];
+	map = skb_shinfo(skb)->dma_head;
 
 	REG_WR(bp, BNX2_HC_COMMAND,
 	       bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
@@ -6167,7 +6167,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	sp = skb_shinfo(skb);
-	mapping = sp->dma_maps[0];
+	mapping = sp->dma_head;
 
 	tx_buf = &txr->tx_buf_ring[ring_prod];
 	tx_buf->skb = skb;
@@ -6191,7 +6191,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		txbd = &txr->tx_desc_ring[ring_prod];
 
 		len = frag->size;
-		mapping = sp->dma_maps[i + 1];
+		mapping = sp->dma_maps[i];
 
 		txbd->tx_bd_haddr_hi = (u64) mapping >> 32;
 		txbd->tx_bd_haddr_lo = (u64) mapping & 0xffffffff;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 05e87a59f1c..8d36743c814 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2998,7 +2998,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 			size -= 4;
 
 		buffer_info->length = size;
-		buffer_info->dma = map[0] + offset;
+		buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
 		buffer_info->time_stamp = jiffies;
 		buffer_info->next_to_watch = i;
 
@@ -3039,7 +3039,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 				size -= 4;
 
 			buffer_info->length = size;
-			buffer_info->dma = map[f + 1] + offset;
+			buffer_info->dma = map[f] + offset;
 			buffer_info->time_stamp = jiffies;
 			buffer_info->next_to_watch = i;
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 38694c79edc..9043f1b845f 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -3916,7 +3916,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 		buffer_info->length = size;
 		buffer_info->time_stamp = jiffies;
 		buffer_info->next_to_watch = i;
-		buffer_info->dma = map[0] + offset;
+		buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
 		count++;
 
 		len -= size;
@@ -3947,7 +3947,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 			buffer_info->length = size;
 			buffer_info->time_stamp = jiffies;
 			buffer_info->next_to_watch = i;
-			buffer_info->dma = map[f + 1] + offset;
+			buffer_info->dma = map[f] + offset;
 
 			len -= size;
 			offset += size;
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 958b2879da4..ea17319624a 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3139,8 +3139,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
 	/* set time_stamp *before* dma to help avoid a possible race */
 	buffer_info->time_stamp = jiffies;
 	buffer_info->next_to_watch = i;
-	buffer_info->dma = map[count];
-	count++;
+	buffer_info->dma = skb_shinfo(skb)->dma_head;
 
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
 		struct skb_frag_struct *frag;
@@ -3164,7 +3163,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
 	tx_ring->buffer_info[i].skb = skb;
 	tx_ring->buffer_info[first].next_to_watch = i;
 
-	return count;
+	return count + 1;
 }
 
 static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 5f7ba1a4990..22aadb7884f 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -2119,8 +2119,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
 	/* set time_stamp *before* dma to help avoid a possible race */
 	buffer_info->time_stamp = jiffies;
 	buffer_info->next_to_watch = i;
-	buffer_info->dma = map[count];
-	count++;
+	buffer_info->dma = skb_shinfo(skb)->dma_head;
 
 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
 		struct skb_frag_struct *frag;
@@ -2144,7 +2143,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
 	tx_ring->buffer_info[i].skb = skb;
 	tx_ring->buffer_info[first].next_to_watch = i;
 
-	return count;
+	return count + 1;
 }
 
 static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 6eb7f37a113..9c897cf86b9 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1300,7 +1300,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
 		buffer_info->length = size;
 		WARN_ON(buffer_info->dma != 0);
 		buffer_info->time_stamp = jiffies;
-		buffer_info->dma = map[0] + offset;
+		buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
 			pci_map_single(adapter->pdev,
 				skb->data + offset,
 				size,
@@ -1340,7 +1340,7 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
 
 			buffer_info->length = size;
 			buffer_info->time_stamp = jiffies;
-			buffer_info->dma = map[f + 1] + offset;
+			buffer_info->dma = map[f] + offset;
 			buffer_info->next_to_watch = 0;
 
 			len -= size;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index d36003cbb6d..09994e920d5 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4837,7 +4837,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
 		size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
 
 		tx_buffer_info->length = size;
-		tx_buffer_info->dma = map[0] + offset;
+		tx_buffer_info->dma = skb_shinfo(skb)->dma_head + offset;
 		tx_buffer_info->time_stamp = jiffies;
 		tx_buffer_info->next_to_watch = i;
 
@@ -4869,7 +4869,7 @@ static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
 			size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
 
 			tx_buffer_info->length = size;
-			tx_buffer_info->dma = map[f + 1] + offset;
+			tx_buffer_info->dma = map[f] + offset;
 			tx_buffer_info->time_stamp = jiffies;
 			tx_buffer_info->next_to_watch = i;
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a39b534fb43..46a3f86125b 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -5021,7 +5021,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
 		/* New SKB is guaranteed to be linear. */
 		entry = *start;
 		ret = skb_dma_map(&tp->pdev->dev, new_skb, DMA_TO_DEVICE);
-		new_addr = skb_shinfo(new_skb)->dma_maps[0];
+		new_addr = skb_shinfo(new_skb)->dma_head;
 
 		/* Make sure new skb does not cross any 4G boundaries.
 		 * Drop the packet if it does.
@@ -5155,7 +5155,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	sp = skb_shinfo(skb);
 
-	mapping = sp->dma_maps[0];
+	mapping = sp->dma_head;
 
 	tp->tx_buffers[entry].skb = skb;
 
@@ -5173,7 +5173,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			len = frag->size;
-			mapping = sp->dma_maps[i + 1];
+			mapping = sp->dma_maps[i];
 			tp->tx_buffers[entry].skb = NULL;
 
 			tg3_set_txd(tp, entry, mapping, len,
@@ -5331,7 +5331,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 
 	sp = skb_shinfo(skb);
 
-	mapping = sp->dma_maps[0];
+	mapping = sp->dma_head;
 
 	tp->tx_buffers[entry].skb = skb;
 
@@ -5356,7 +5356,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			len = frag->size;
-			mapping = sp->dma_maps[i + 1];
+			mapping = sp->dma_maps[i];
 
 			tp->tx_buffers[entry].skb = NULL;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7485058125e..aad484cd586 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -189,6 +189,9 @@ struct skb_shared_info {
 	atomic_t	dataref;
 	unsigned short	nr_frags;
 	unsigned short	gso_size;
+#ifdef CONFIG_HAS_DMA
+	dma_addr_t	dma_head;
+#endif
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
@@ -198,7 +201,7 @@ struct skb_shared_info {
 	struct skb_shared_hwtstamps hwtstamps;
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 #ifdef CONFIG_HAS_DMA
-	dma_addr_t	dma_maps[MAX_SKB_FRAGS + 1];
+	dma_addr_t	dma_maps[MAX_SKB_FRAGS];
 #endif
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 7adb623ef66..79687dfd695 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
 	if (dma_mapping_error(dev, map))
 		goto out_err;
 
-	sp->dma_maps[0] = map;
+	sp->dma_head = map;
 	for (i = 0; i < sp->nr_frags; i++) {
 		skb_frag_t *fp = &sp->frags[i];
 
@@ -28,7 +28,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
 				   fp->size, dir);
 		if (dma_mapping_error(dev, map))
 			goto unwind;
-		sp->dma_maps[i + 1] = map;
+		sp->dma_maps[i] = map;
 	}
 
 	return 0;
@@ -37,10 +37,10 @@ unwind:
 	while (--i >= 0) {
 		skb_frag_t *fp = &sp->frags[i];
 
-		dma_unmap_page(dev, sp->dma_maps[i + 1],
+		dma_unmap_page(dev, sp->dma_maps[i],
 			       fp->size, dir);
 	}
-	dma_unmap_single(dev, sp->dma_maps[0],
+	dma_unmap_single(dev, sp->dma_head,
 			 skb_headlen(skb), dir);
 out_err:
 	return -ENOMEM;
@@ -53,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
 	struct skb_shared_info *sp = skb_shinfo(skb);
 	int i;
 
-	dma_unmap_single(dev, sp->dma_maps[0],
+	dma_unmap_single(dev, sp->dma_head,
 			 skb_headlen(skb), dir);
 	for (i = 0; i < sp->nr_frags; i++) {
 		skb_frag_t *fp = &sp->frags[i];
 
-		dma_unmap_page(dev, sp->dma_maps[i + 1],
+		dma_unmap_page(dev, sp->dma_maps[i],
 			       fp->size, dir);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 4e329972052c3649367b91de783f6293b8653cb2 Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Sun, 7 Jun 2009 09:09:23 +0000
Subject: isdn: rename capi_ctr_reseted() to capi_ctr_down()

Change the name of the Kernel CAPI exported function capi_ctr_reseted()
to something representing its purpose better.

Impact: renaming, no functional change
Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/isdn/INTERFACE.CAPI | 4 ++--
 drivers/isdn/capi/kcapi.c         | 8 ++++----
 drivers/isdn/hardware/avm/b1.c    | 2 +-
 drivers/isdn/hardware/avm/b1dma.c | 2 +-
 drivers/isdn/hardware/avm/c4.c    | 4 ++--
 drivers/isdn/hardware/avm/t1isa.c | 2 +-
 drivers/isdn/hysdn/hycapi.c       | 4 ++--
 include/linux/isdn/capilli.h      | 2 +-
 net/bluetooth/cmtp/capi.c         | 2 +-
 9 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 786d619b36e..46315381616 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -45,7 +45,7 @@ From then on, Kernel CAPI may call the registered callback functions for the
 device.
 
 If the device becomes unusable for any reason (shutdown, disconnect ...), the
-driver has to call capi_ctr_reseted(). This will prevent further calls to the
+driver has to call capi_ctr_down(). This will prevent further calls to the
 callback functions by Kernel CAPI.
 
 
@@ -166,7 +166,7 @@ int detach_capi_ctr(struct capi_ctr *ctrlr)
 	register/unregister a device (controller) with Kernel CAPI
 
 void capi_ctr_ready(struct capi_ctr *ctrlr)
-void capi_ctr_reseted(struct capi_ctr *ctrlr)
+void capi_ctr_down(struct capi_ctr *ctrlr)
 	signal controller ready/not ready
 
 void capi_ctr_suspend_output(struct capi_ctr *ctrlr)
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index f33170368cd..57d26360f64 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -377,14 +377,14 @@ void capi_ctr_ready(struct capi_ctr * card)
 EXPORT_SYMBOL(capi_ctr_ready);
 
 /**
- * capi_ctr_reseted() - signal CAPI controller reset
+ * capi_ctr_down() - signal CAPI controller not ready
  * @card:	controller descriptor structure.
  *
  * Called by hardware driver to signal that the controller is down and
  * unavailable for use.
  */
 
-void capi_ctr_reseted(struct capi_ctr * card)
+void capi_ctr_down(struct capi_ctr * card)
 {
 	u16 appl;
 
@@ -413,7 +413,7 @@ void capi_ctr_reseted(struct capi_ctr * card)
 	notify_push(KCI_CONTRDOWN, card->cnr, 0, 0);
 }
 
-EXPORT_SYMBOL(capi_ctr_reseted);
+EXPORT_SYMBOL(capi_ctr_down);
 
 /**
  * capi_ctr_suspend_output() - suspend controller
@@ -517,7 +517,7 @@ EXPORT_SYMBOL(attach_capi_ctr);
 int detach_capi_ctr(struct capi_ctr *card)
 {
         if (card->cardstate != CARD_DETECTED)
-		capi_ctr_reseted(card);
+		capi_ctr_down(card);
 
 	ncards--;
 
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index abf05ec3176..a7c0083e78a 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -330,7 +330,7 @@ void b1_reset_ctr(struct capi_ctr *ctrl)
 	spin_lock_irqsave(&card->lock, flags);
 	capilib_release(&cinfo->ncci_head);
 	spin_unlock_irqrestore(&card->lock, flags);
-	capi_ctr_reseted(ctrl);
+	capi_ctr_down(ctrl);
 }
 
 void b1_register_appl(struct capi_ctr *ctrl,
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index da34b98e3de..0e84aaae43f 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -759,7 +759,7 @@ void b1dma_reset_ctr(struct capi_ctr *ctrl)
 	memset(cinfo->version, 0, sizeof(cinfo->version));
 	capilib_release(&cinfo->ncci_head);
 	spin_unlock_irqrestore(&card->lock, flags);
-	capi_ctr_reseted(ctrl);
+	capi_ctr_down(ctrl);
 }
 
 /* ------------------------------------------------------------- */
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 9df1d3f66c8..6833301a45f 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -681,7 +681,7 @@ static irqreturn_t c4_handle_interrupt(avmcard *card)
 			spin_lock_irqsave(&card->lock, flags);
 			capilib_release(&cinfo->ncci_head);
 			spin_unlock_irqrestore(&card->lock, flags);
-			capi_ctr_reseted(&cinfo->capi_ctrl);
+			capi_ctr_down(&cinfo->capi_ctrl);
 		}
 		card->nlogcontr = 0;
 		return IRQ_HANDLED;
@@ -909,7 +909,7 @@ static void c4_reset_ctr(struct capi_ctr *ctrl)
         for (i=0; i < card->nr_controllers; i++) {
 		cinfo = &card->ctrlinfo[i];
 		memset(cinfo->version, 0, sizeof(cinfo->version));
-		capi_ctr_reseted(&cinfo->capi_ctrl);
+		capi_ctr_down(&cinfo->capi_ctrl);
 	}
 	card->nlogcontr = 0;
 }
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index e7724493738..1c53fd49adb 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -339,7 +339,7 @@ static void t1isa_reset_ctr(struct capi_ctr *ctrl)
 	spin_lock_irqsave(&card->lock, flags);
 	capilib_release(&cinfo->ncci_head);
 	spin_unlock_irqrestore(&card->lock, flags);
-	capi_ctr_reseted(ctrl);
+	capi_ctr_down(ctrl);
 }
 
 static void t1isa_remove(struct pci_dev *pdev)
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index 53f6ad1235d..4ffaa14b9fc 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -67,7 +67,7 @@ hycapi_reset_ctr(struct capi_ctr *ctrl)
 	printk(KERN_NOTICE "HYCAPI hycapi_reset_ctr\n");
 #endif
 	capilib_release(&cinfo->ncci_head);
-	capi_ctr_reseted(ctrl);
+	capi_ctr_down(ctrl);
 }
 
 /******************************
@@ -347,7 +347,7 @@ int hycapi_capi_stop(hysdn_card *card)
 	if(cinfo) {
 		ctrl = &cinfo->capi_ctrl;
 /*		ctrl->suspend_output(ctrl); */
-		capi_ctr_reseted(ctrl);
+		capi_ctr_down(ctrl);
 	}
 	return 0;
 }
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index 35e9b0fd014..7acb87a4487 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -79,7 +79,7 @@ int attach_capi_ctr(struct capi_ctr *);
 int detach_capi_ctr(struct capi_ctr *);
 
 void capi_ctr_ready(struct capi_ctr * card);
-void capi_ctr_reseted(struct capi_ctr * card);
+void capi_ctr_down(struct capi_ctr * card);
 void capi_ctr_suspend_output(struct capi_ctr * card);
 void capi_ctr_resume_output(struct capi_ctr * card);
 void capi_ctr_handle_message(struct capi_ctr * card, u16 appl, struct sk_buff *skb);
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 78958c0f9a4..97f8d68d574 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -382,7 +382,7 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl)
 
 	BT_DBG("ctrl %p", ctrl);
 
-	capi_ctr_reseted(ctrl);
+	capi_ctr_down(ctrl);
 
 	atomic_inc(&session->terminate);
 	cmtp_schedule(session);
-- 
cgit v1.2.3-70-g09d2


From 8db4dc46dcff7568896aa1eae4bd07620ce3dd93 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Mon, 20 Apr 2009 01:31:05 -0300
Subject: Bluetooth: Use macros for L2CAP channel identifiers

Use macros instead of hardcoded numbers to make the L2CAP source code
more readable.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  6 ++++++
 net/bluetooth/l2cap.c         | 18 +++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index f566aa1f0a4..ed4ba913d94 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -106,6 +106,12 @@ struct l2cap_conn_rsp {
 	__le16     status;
 } __attribute__ ((packed));
 
+/* channel indentifier */
+#define L2CAP_CID_SIGNALING	0x0001
+#define L2CAP_CID_CONN_LESS	0x0002
+#define L2CAP_CID_DYN_START	0x0040
+#define L2CAP_CID_DYN_END	0xffff
+
 /* connect result */
 #define L2CAP_CR_SUCCESS    0x0000
 #define L2CAP_CR_PEND       0x0001
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ca4d3b40d5c..ff1744e34cd 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -161,9 +161,9 @@ static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8
 
 static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
 {
-	u16 cid = 0x0040;
+	u16 cid = L2CAP_CID_DYN_START;
 
-	for (; cid < 0xffff; cid++) {
+	for (; cid < L2CAP_CID_DYN_END; cid++) {
 		if(!__l2cap_get_chan_by_scid(l, cid))
 			return cid;
 	}
@@ -215,13 +215,13 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
 		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
 	} else if (sk->sk_type == SOCK_DGRAM) {
 		/* Connectionless socket */
-		l2cap_pi(sk)->scid = 0x0002;
-		l2cap_pi(sk)->dcid = 0x0002;
+		l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS;
+		l2cap_pi(sk)->dcid = L2CAP_CID_CONN_LESS;
 		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
 	} else {
 		/* Raw socket can send/recv signalling messages only */
-		l2cap_pi(sk)->scid = 0x0001;
-		l2cap_pi(sk)->dcid = 0x0001;
+		l2cap_pi(sk)->scid = L2CAP_CID_SIGNALING;
+		l2cap_pi(sk)->dcid = L2CAP_CID_SIGNALING;
 		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
 	}
 
@@ -1598,7 +1598,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
 	lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
-	lh->cid = cpu_to_le16(0x0001);
+	lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING);
 
 	cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
 	cmd->code  = code;
@@ -2420,11 +2420,11 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 	BT_DBG("len %d, cid 0x%4.4x", len, cid);
 
 	switch (cid) {
-	case 0x0001:
+	case L2CAP_CID_SIGNALING:
 		l2cap_sig_channel(conn, skb);
 		break;
 
-	case 0x0002:
+	case L2CAP_CID_CONN_LESS:
 		psm = get_unaligned((__le16 *) skb->data);
 		skb_pull(skb, 2);
 		l2cap_conless_channel(conn, psm, skb);
-- 
cgit v1.2.3-70-g09d2


From 589d27464846c7cb758f93d9ee380c8ff05a161c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Mon, 20 Apr 2009 01:31:07 -0300
Subject: Bluetooth: Use macro for L2CAP hint mask on receiving config request

Using the L2CAP_CONF_HINT macro is easier to understand than using a
hardcoded 0x80 value.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 ++
 net/bluetooth/l2cap.c         | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index ed4ba913d94..300b63f0eec 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -149,6 +149,8 @@ struct l2cap_conf_opt {
 } __attribute__ ((packed));
 #define L2CAP_CONF_OPT_SIZE	2
 
+#define L2CAP_CONF_HINT		0x80
+
 #define L2CAP_CONF_MTU		0x01
 #define L2CAP_CONF_FLUSH_TO	0x02
 #define L2CAP_CONF_QOS		0x03
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ff1744e34cd..c0c091533bd 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1739,7 +1739,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 	while (len >= L2CAP_CONF_OPT_SIZE) {
 		len -= l2cap_get_conf_opt(&req, &type, &olen, &val);
 
-		hint  = type & 0x80;
+		hint  = type & L2CAP_CONF_HINT;
 		type &= 0x7f;
 
 		switch (type) {
-- 
cgit v1.2.3-70-g09d2


From 47ec1dcd696d56a7c396e5838516a566ecd4b03d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 2 May 2009 18:57:55 -0700
Subject: Bluetooth: Add basic constants for L2CAP ERTM support and use them

This adds the basic constants required to add support for L2CAP Enhanced
Retransmission feature.

Based on a patch from Nathan Holstein <nathan@lampreynetworks.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 63 ++++++++++++++++++++++++++++---------------
 net/bluetooth/l2cap.c         |  6 ++---
 2 files changed, 45 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 300b63f0eec..e919fca1072 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -26,8 +26,13 @@
 #define __L2CAP_H
 
 /* L2CAP defaults */
-#define L2CAP_DEFAULT_MTU	672
-#define L2CAP_DEFAULT_FLUSH_TO	0xFFFF
+#define L2CAP_DEFAULT_MTU		672
+#define L2CAP_DEFAULT_FLUSH_TO		0xffff
+#define L2CAP_DEFAULT_RX_WINDOW		1
+#define L2CAP_DEFAULT_MAX_RECEIVE	1
+#define L2CAP_DEFAULT_RETRANS_TO	300    /* 300 milliseconds */
+#define L2CAP_DEFAULT_MONITOR_TO	1000   /* 1 second */
+#define L2CAP_DEFAULT_MAX_RX_APDU	0xfff7
 
 #define L2CAP_CONN_TIMEOUT	(40000) /* 40 seconds */
 #define L2CAP_INFO_TIMEOUT	(4000)  /*  4 seconds */
@@ -64,17 +69,29 @@ struct l2cap_conninfo {
 #define L2CAP_LM_SECURE		0x0020
 
 /* L2CAP command codes */
-#define L2CAP_COMMAND_REJ 0x01
-#define L2CAP_CONN_REQ    0x02
-#define L2CAP_CONN_RSP    0x03
-#define L2CAP_CONF_REQ    0x04
-#define L2CAP_CONF_RSP    0x05
-#define L2CAP_DISCONN_REQ 0x06
-#define L2CAP_DISCONN_RSP 0x07
-#define L2CAP_ECHO_REQ    0x08
-#define L2CAP_ECHO_RSP    0x09
-#define L2CAP_INFO_REQ    0x0a
-#define L2CAP_INFO_RSP    0x0b
+#define L2CAP_COMMAND_REJ	0x01
+#define L2CAP_CONN_REQ		0x02
+#define L2CAP_CONN_RSP		0x03
+#define L2CAP_CONF_REQ		0x04
+#define L2CAP_CONF_RSP		0x05
+#define L2CAP_DISCONN_REQ	0x06
+#define L2CAP_DISCONN_RSP	0x07
+#define L2CAP_ECHO_REQ		0x08
+#define L2CAP_ECHO_RSP		0x09
+#define L2CAP_INFO_REQ		0x0a
+#define L2CAP_INFO_RSP		0x0b
+
+/* L2CAP feature mask */
+#define L2CAP_FEAT_FLOWCTL	0x00000001
+#define L2CAP_FEAT_RETRANS	0x00000002
+#define L2CAP_FEAT_ERTM		0x00000008
+#define L2CAP_FEAT_STREAMING	0x00000010
+#define L2CAP_FEAT_FCS		0x00000020
+#define L2CAP_FEAT_FIXED_CHAN	0x00000080
+
+/* L2CAP checksum option */
+#define L2CAP_FCS_NONE		0x00
+#define L2CAP_FCS_CRC16		0x01
 
 /* L2CAP structures */
 struct l2cap_hdr {
@@ -113,16 +130,16 @@ struct l2cap_conn_rsp {
 #define L2CAP_CID_DYN_END	0xffff
 
 /* connect result */
-#define L2CAP_CR_SUCCESS    0x0000
-#define L2CAP_CR_PEND       0x0001
-#define L2CAP_CR_BAD_PSM    0x0002
-#define L2CAP_CR_SEC_BLOCK  0x0003
-#define L2CAP_CR_NO_MEM     0x0004
+#define L2CAP_CR_SUCCESS	0x0000
+#define L2CAP_CR_PEND		0x0001
+#define L2CAP_CR_BAD_PSM	0x0002
+#define L2CAP_CR_SEC_BLOCK	0x0003
+#define L2CAP_CR_NO_MEM		0x0004
 
 /* connect status */
-#define L2CAP_CS_NO_INFO      0x0000
-#define L2CAP_CS_AUTHEN_PEND  0x0001
-#define L2CAP_CS_AUTHOR_PEND  0x0002
+#define L2CAP_CS_NO_INFO	0x0000
+#define L2CAP_CS_AUTHEN_PEND	0x0001
+#define L2CAP_CS_AUTHOR_PEND	0x0002
 
 struct l2cap_conf_req {
 	__le16     dcid;
@@ -150,11 +167,13 @@ struct l2cap_conf_opt {
 #define L2CAP_CONF_OPT_SIZE	2
 
 #define L2CAP_CONF_HINT		0x80
+#define L2CAP_CONF_MASK		0x7f
 
 #define L2CAP_CONF_MTU		0x01
 #define L2CAP_CONF_FLUSH_TO	0x02
 #define L2CAP_CONF_QOS		0x03
 #define L2CAP_CONF_RFC		0x04
+#define L2CAP_CONF_FCS		0x05
 
 #define L2CAP_CONF_MAX_SIZE	22
 
@@ -170,6 +189,8 @@ struct l2cap_conf_rfc {
 #define L2CAP_MODE_BASIC	0x00
 #define L2CAP_MODE_RETRANS	0x01
 #define L2CAP_MODE_FLOWCTL	0x02
+#define L2CAP_MODE_ERTM		0x03
+#define L2CAP_MODE_STREAM	0x04
 
 struct l2cap_disconn_req {
 	__le16     dcid;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6a82f203fd..338295007c4 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -52,7 +52,7 @@
 
 #define VERSION "2.13"
 
-static u32 l2cap_feat_mask = 0x0080;
+static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
 
 static const struct proto_ops l2cap_sock_ops;
@@ -1747,7 +1747,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 		len -= l2cap_get_conf_opt(&req, &type, &olen, &val);
 
 		hint  = type & L2CAP_CONF_HINT;
-		type &= 0x7f;
+		type &= L2CAP_CONF_MASK;
 
 		switch (type) {
 		case L2CAP_CONF_MTU:
@@ -2244,7 +2244,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
 	if (type == L2CAP_IT_FEAT_MASK) {
 		conn->feat_mask = get_unaligned_le32(rsp->data);
 
-		if (conn->feat_mask & 0x0080) {
+		if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) {
 			struct l2cap_info_req req;
 			req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
 
-- 
cgit v1.2.3-70-g09d2


From b4324b5dc5b62ba76372e1bf8927230cf744df66 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 7 Jun 2009 18:06:51 +0200
Subject: Bluetooth: Remove pointless endian conversion helpers

The Bluetooth source uses some endian conversion helpers, that in the end
translate to kernel standard routines. So remove this obfuscation since it
is fully pointless.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  6 ------
 net/bluetooth/l2cap.c             | 19 ++++++++++---------
 net/bluetooth/rfcomm/core.c       | 12 ++++++------
 3 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 3ad5390a4dd..cf8bcb25533 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -81,12 +81,6 @@ enum {
 	BT_CLOSED
 };
 
-/* Endianness conversions */
-#define htobs(a)	__cpu_to_le16(a)
-#define htobl(a)	__cpu_to_le32(a)
-#define btohs(a)	__le16_to_cpu(a)
-#define btohl(a)	__le32_to_cpu(a)
-
 /* BD Address */
 typedef struct {
 	__u8 b[6];
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 338295007c4..bd0a4c1bced 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -812,7 +812,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 		goto done;
 	}
 
-	if (la.l2_psm && btohs(la.l2_psm) < 0x1001 &&
+	if (la.l2_psm && __le16_to_cpu(la.l2_psm) < 0x1001 &&
 				!capable(CAP_NET_BIND_SERVICE)) {
 		err = -EACCES;
 		goto done;
@@ -829,7 +829,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 		l2cap_pi(sk)->sport = la.l2_psm;
 		sk->sk_state = BT_BOUND;
 
-		if (btohs(la.l2_psm) == 0x0001 || btohs(la.l2_psm) == 0x0003)
+		if (__le16_to_cpu(la.l2_psm) == 0x0001 ||
+					__le16_to_cpu(la.l2_psm) == 0x0003)
 			l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
 	}
 
@@ -1015,9 +1016,9 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 		write_lock_bh(&l2cap_sk_list.lock);
 
 		for (psm = 0x1001; psm < 0x1100; psm += 2)
-			if (!__l2cap_get_sock_by_addr(htobs(psm), src)) {
-				l2cap_pi(sk)->psm   = htobs(psm);
-				l2cap_pi(sk)->sport = htobs(psm);
+			if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) {
+				l2cap_pi(sk)->psm   = cpu_to_le16(psm);
+				l2cap_pi(sk)->sport = cpu_to_le16(psm);
 				err = 0;
 				break;
 			}
@@ -1106,11 +1107,11 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 	if (peer) {
 		la->l2_psm = l2cap_pi(sk)->psm;
 		bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
-		la->l2_cid = htobs(l2cap_pi(sk)->dcid);
+		la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid);
 	} else {
 		la->l2_psm = l2cap_pi(sk)->sport;
 		bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
-		la->l2_cid = htobs(l2cap_pi(sk)->scid);
+		la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid);
 	}
 
 	return 0;
@@ -2720,8 +2721,8 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
 
 		str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
 				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-				sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid,
-				pi->imtu, pi->omtu, pi->sec_level);
+				sk->sk_state, __le16_to_cpu(pi->psm), pi->scid,
+				pi->dcid, pi->imtu, pi->omtu, pi->sec_level);
 	}
 
 	read_unlock_bh(&l2cap_sk_list.lock);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 374536e050a..e50566ebf9f 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -679,7 +679,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
 
 	bacpy(&addr.l2_bdaddr, dst);
 	addr.l2_family = AF_BLUETOOTH;
-	addr.l2_psm    = htobs(RFCOMM_PSM);
+	addr.l2_psm    = cpu_to_le16(RFCOMM_PSM);
 	addr.l2_cid    = 0;
 	*err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
 	if (*err == 0 || *err == -EINPROGRESS)
@@ -852,9 +852,9 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d
 	}
 
 	if (cr && channel_mtu >= 0)
-		pn->mtu = htobs(channel_mtu);
+		pn->mtu = cpu_to_le16(channel_mtu);
 	else
-		pn->mtu = htobs(d->mtu);
+		pn->mtu = cpu_to_le16(d->mtu);
 
 	*ptr = __fcs(buf); ptr++;
 
@@ -1056,7 +1056,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
 
 	if (len > 127) {
 		hdr = (void *) skb_push(skb, 4);
-		put_unaligned(htobs(__len16(len)), (__le16 *) &hdr->len);
+		put_unaligned(cpu_to_le16(__len16(len)), (__le16 *) &hdr->len);
 	} else {
 		hdr = (void *) skb_push(skb, 3);
 		hdr->len = __len8(len);
@@ -1289,7 +1289,7 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn)
 
 	d->priority = pn->priority;
 
-	d->mtu = btohs(pn->mtu);
+	d->mtu = __le16_to_cpu(pn->mtu);
 
 	if (cr && d->mtu > s->mtu)
 		d->mtu = s->mtu;
@@ -1922,7 +1922,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
 	/* Bind socket */
 	bacpy(&addr.l2_bdaddr, ba);
 	addr.l2_family = AF_BLUETOOTH;
-	addr.l2_psm    = htobs(RFCOMM_PSM);
+	addr.l2_psm    = cpu_to_le16(RFCOMM_PSM);
 	addr.l2_cid    = 0;
 	err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
 	if (err < 0) {
-- 
cgit v1.2.3-70-g09d2


From 611b30f74b5d8ca036a9923b3bf6e0ee10a21a53 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 8 Jun 2009 14:41:38 +0200
Subject: Bluetooth: Add native RFKILL soft-switch support for all devices

With the re-write of the RFKILL subsystem it is now possible to easily
integrate RFKILL soft-switch support into the Bluetooth subsystem. All
Bluetooth devices will now get automatically RFKILL support.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_core.c         | 41 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 73aead222b3..c4ca4228b08 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -137,6 +137,8 @@ struct hci_dev {
 	struct device		*parent;
 	struct device		dev;
 
+	struct rfkill		*rfkill;
+
 	struct module 		*owner;
 
 	int (*open)(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index cd061510b6b..406ad07cdea 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -39,6 +39,7 @@
 #include <linux/skbuff.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
+#include <linux/rfkill.h>
 #include <net/sock.h>
 
 #include <asm/system.h>
@@ -476,6 +477,11 @@ int hci_dev_open(__u16 dev)
 
 	hci_req_lock(hdev);
 
+	if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) {
+		ret = -ERFKILL;
+		goto done;
+	}
+
 	if (test_bit(HCI_UP, &hdev->flags)) {
 		ret = -EALREADY;
 		goto done;
@@ -813,6 +819,24 @@ int hci_get_dev_info(void __user *arg)
 
 /* ---- Interface to HCI drivers ---- */
 
+static int hci_rfkill_set_block(void *data, bool blocked)
+{
+	struct hci_dev *hdev = data;
+
+	BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
+
+	if (!blocked)
+		return 0;
+
+	hci_dev_do_close(hdev);
+
+	return 0;
+}
+
+static const struct rfkill_ops hci_rfkill_ops = {
+	.set_block = hci_rfkill_set_block,
+};
+
 /* Alloc HCI device */
 struct hci_dev *hci_alloc_dev(void)
 {
@@ -844,7 +868,8 @@ int hci_register_dev(struct hci_dev *hdev)
 	struct list_head *head = &hci_dev_list, *p;
 	int i, id = 0;
 
-	BT_DBG("%p name %s type %d owner %p", hdev, hdev->name, hdev->type, hdev->owner);
+	BT_DBG("%p name %s type %d owner %p", hdev, hdev->name,
+						hdev->type, hdev->owner);
 
 	if (!hdev->open || !hdev->close || !hdev->destruct)
 		return -EINVAL;
@@ -900,6 +925,15 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hci_register_sysfs(hdev);
 
+	hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
+				RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops, hdev);
+	if (hdev->rfkill) {
+		if (rfkill_register(hdev->rfkill) < 0) {
+			rfkill_destroy(hdev->rfkill);
+			hdev->rfkill = NULL;
+		}
+	}
+
 	hci_notify(hdev, HCI_DEV_REG);
 
 	return id;
@@ -924,6 +958,11 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_notify(hdev, HCI_DEV_UNREG);
 
+	if (hdev->rfkill) {
+		rfkill_unregister(hdev->rfkill);
+		rfkill_destroy(hdev->rfkill);
+	}
+
 	hci_unregister_sysfs(hdev);
 
 	__hci_dev_put(hdev);
-- 
cgit v1.2.3-70-g09d2


From f87fb666bb00a7afcbd7992d236e42ac544996f9 Mon Sep 17 00:00:00 2001
From: Jan Kasprzak <kas@fi.muni.cz>
Date: Mon, 8 Jun 2009 15:53:43 +0200
Subject: netfilter: nf_ct_icmp: keep the ICMP ct entries longer

Current conntrack code kills the ICMP conntrack entry as soon as
the first reply is received. This is incorrect, as we then see only
the first ICMP echo reply out of several possible duplicates as
ESTABLISHED, while the rest will be INVALID. Also this unnecessarily
increases the conntrackd traffic on H-A firewalls.

Make all the ICMP conntrack entries (including the replied ones)
last for the default of nf_conntrack_icmp{,v6}_timeout seconds.

Signed-off-by: Jan "Yenya" Kasprzak <kas@fi.muni.cz>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/ipv4/nf_conntrack_icmp.h   | 11 -----------
 include/net/netfilter/ipv6/nf_conntrack_icmpv6.h |  7 -------
 include/net/netfilter/nf_conntrack.h             |  3 ---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c     | 16 ++++------------
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c   | 16 ++++------------
 5 files changed, 8 insertions(+), 45 deletions(-)
 delete mode 100644 include/net/netfilter/ipv4/nf_conntrack_icmp.h

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_conntrack_icmp.h b/include/net/netfilter/ipv4/nf_conntrack_icmp.h
deleted file mode 100644
index 3dd22cff23e..00000000000
--- a/include/net/netfilter/ipv4/nf_conntrack_icmp.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _NF_CONNTRACK_ICMP_H
-#define _NF_CONNTRACK_ICMP_H
-/* ICMP tracking. */
-#include <asm/atomic.h>
-
-struct ip_ct_icmp
-{
-	/* Optimization: when number in == number out, forget immediately. */
-	atomic_t count;
-};
-#endif /* _NF_CONNTRACK_ICMP_H */
diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
index 86591afda29..67edd50a398 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
@@ -9,7 +9,6 @@
 
 #ifndef _NF_CONNTRACK_ICMPV6_H
 #define _NF_CONNTRACK_ICMPV6_H
-#include <asm/atomic.h>
 
 #ifndef ICMPV6_NI_QUERY
 #define ICMPV6_NI_QUERY 139
@@ -18,10 +17,4 @@
 #define ICMPV6_NI_REPLY 140
 #endif
 
-struct nf_ct_icmpv6
-{
-	/* Optimization: when number in == number out, forget immediately. */
-	atomic_t count;
-};
-
 #endif /* _NF_CONNTRACK_ICMPV6_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2ba36dd33ae..2b877374242 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -23,7 +23,6 @@
 #include <linux/netfilter/nf_conntrack_dccp.h>
 #include <linux/netfilter/nf_conntrack_sctp.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
-#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 
 #include <net/netfilter/nf_conntrack_tuple.h>
@@ -34,8 +33,6 @@ union nf_conntrack_proto {
 	struct nf_ct_dccp dccp;
 	struct ip_ct_sctp sctp;
 	struct ip_ct_tcp tcp;
-	struct ip_ct_icmp icmp;
-	struct nf_ct_icmpv6 icmpv6;
 	struct nf_ct_gre gre;
 };
 
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index c6ab3d99e79..d71ba767734 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -82,17 +82,10 @@ static int icmp_packet(struct nf_conn *ct,
 		       u_int8_t pf,
 		       unsigned int hooknum)
 {
-	/* Try to delete connection immediately after all replies:
-	   won't actually vanish as we still have skb, and del_timer
-	   means this will only run once even if count hits zero twice
-	   (theoretically possible with SMP) */
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count))
-			nf_ct_kill_acct(ct, ctinfo, skb);
-	} else {
-		atomic_inc(&ct->proto.icmp.count);
-		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
-	}
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
 
 	return NF_ACCEPT;
 }
@@ -116,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
 		return false;
 	}
-	atomic_set(&ct->proto.icmp.count, 0);
 	return true;
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a0acd9655fe..642dcb127ba 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -95,17 +95,10 @@ static int icmpv6_packet(struct nf_conn *ct,
 		       u_int8_t pf,
 		       unsigned int hooknum)
 {
-	/* Try to delete connection immediately after all replies:
-	   won't actually vanish as we still have skb, and del_timer
-	   means this will only run once even if count hits zero twice
-	   (theoretically possible with SMP) */
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count))
-			nf_ct_kill_acct(ct, ctinfo, skb);
-	} else {
-		atomic_inc(&ct->proto.icmp.count);
-		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
-	}
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
 
 	return NF_ACCEPT;
 }
@@ -131,7 +124,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 				      type + 128);
 		return false;
 	}
-	atomic_set(&ct->proto.icmp.count, 0);
 	return true;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 11eeef41d5f63c7d2f7fdfcc733eb7fb137cc384 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Mon, 8 Jun 2009 17:01:51 +0200
Subject: netfilter: passive OS fingerprint xtables match

Passive OS fingerprinting netfilter module allows to passively detect
remote OS and perform various netfilter actions based on that knowledge.
This module compares some data (WS, MSS, options and it's order, ttl, df
and others) from packets with SYN bit set with dynamically loaded OS
fingerprints.

Fingerprint matching rules can be downloaded from OpenBSD source tree
or found in archive and loaded via netfilter netlink subsystem into
the kernel via special util found in archive.

Archive contains library file (also attached), which was shipped
with iptables extensions some time ago (at least when ipt_osf existed
in patch-o-matic).

Following changes were made in this release:
 * added NLM_F_CREATE/NLM_F_EXCL checks
 * dropped _rcu list traversing helpers in the protected add/remove calls
 * dropped unneded structures, debug prints, obscure comment and check

Fingerprints can be downloaded from
http://www.openbsd.org/cgi-bin/cvsweb/src/etc/pf.os
or can be found in archive

Example usage:
-d switch removes fingerprints

Please consider for inclusion.
Thank you.

Passive OS fingerprint homepage (archives, examples):
http://www.ioremap.net/projects/osf

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild      |   1 +
 include/linux/netfilter/nfnetlink.h |   3 +-
 include/linux/netfilter/xt_osf.h    | 133 +++++++++++
 net/netfilter/Kconfig               |  13 ++
 net/netfilter/Makefile              |   1 +
 net/netfilter/xt_osf.c              | 428 ++++++++++++++++++++++++++++++++++++
 6 files changed, 578 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/netfilter/xt_osf.h
 create mode 100644 net/netfilter/xt_osf.c

(limited to 'include')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index af9d2fb9721..2aea50399c0 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -33,6 +33,7 @@ header-y += xt_limit.h
 header-y += xt_mac.h
 header-y += xt_mark.h
 header-y += xt_multiport.h
+header-y += xt_osf.h
 header-y += xt_owner.h
 header-y += xt_pkttype.h
 header-y += xt_quota.h
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 2214e516146..bff4d5741d9 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -46,7 +46,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_CTNETLINK_EXP	2
 #define NFNL_SUBSYS_QUEUE		3
 #define NFNL_SUBSYS_ULOG		4
-#define NFNL_SUBSYS_COUNT		5
+#define NFNL_SUBSYS_OSF			5
+#define NFNL_SUBSYS_COUNT		6
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h
new file mode 100644
index 00000000000..fd2272e0959
--- /dev/null
+++ b/include/linux/netfilter/xt_osf.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <johnpol@2ka.mxt.ru>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XT_OSF_H
+#define _XT_OSF_H
+
+#define MAXGENRELEN		32
+
+#define XT_OSF_GENRE		(1<<0)
+#define	XT_OSF_TTL		(1<<1)
+#define XT_OSF_LOG		(1<<2)
+#define XT_OSF_INVERT		(1<<3)
+
+#define XT_OSF_LOGLEVEL_ALL	0	/* log all matched fingerprints */
+#define XT_OSF_LOGLEVEL_FIRST	1	/* log only the first matced fingerprint */
+#define XT_OSF_LOGLEVEL_ALL_KNOWN	2 /* do not log unknown packets */
+
+#define XT_OSF_TTL_TRUE		0	/* True ip and fingerprint TTL comparison */
+#define XT_OSF_TTL_LESS		1	/* Check if ip TTL is less than fingerprint one */
+#define XT_OSF_TTL_NOCHECK	2	/* Do not compare ip and fingerprint TTL at all */
+
+struct xt_osf_info {
+	char			genre[MAXGENRELEN];
+	__u32			len;
+	__u32			flags;
+	__u32			loglevel;
+	__u32			ttl;
+};
+
+/*
+ * Wildcard MSS (kind of).
+ * It is used to implement a state machine for the different wildcard values
+ * of the MSS and window sizes.
+ */
+struct xt_osf_wc {
+	__u32			wc;
+	__u32			val;
+};
+
+/*
+ * This struct represents IANA options
+ * http://www.iana.org/assignments/tcp-parameters
+ */
+struct xt_osf_opt {
+	__u16			kind, length;
+	struct xt_osf_wc	wc;
+};
+
+struct xt_osf_user_finger {
+	struct xt_osf_wc	wss;
+
+	__u8			ttl, df;
+	__u16			ss, mss;
+	__u16			opt_num;
+
+	char			genre[MAXGENRELEN];
+	char			version[MAXGENRELEN];
+	char			subtype[MAXGENRELEN];
+
+	/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
+	struct xt_osf_opt	opt[MAX_IPOPTLEN];
+};
+
+struct xt_osf_nlmsg {
+	struct xt_osf_user_finger	f;
+	struct iphdr		ip;
+	struct tcphdr		tcp;
+};
+
+/* Defines for IANA option kinds */
+
+enum iana_options {
+	OSFOPT_EOL = 0,		/* End of options */
+	OSFOPT_NOP, 		/* NOP */
+	OSFOPT_MSS, 		/* Maximum segment size */
+	OSFOPT_WSO, 		/* Window scale option */
+	OSFOPT_SACKP,		/* SACK permitted */
+	OSFOPT_SACK,		/* SACK */
+	OSFOPT_ECHO,
+	OSFOPT_ECHOREPLY,
+	OSFOPT_TS,		/* Timestamp option */
+	OSFOPT_POCP,		/* Partial Order Connection Permitted */
+	OSFOPT_POSP,		/* Partial Order Service Profile */
+
+	/* Others are not used in the current OSF */
+	OSFOPT_EMPTY = 255,
+};
+
+/*
+ * Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum xt_osf_window_size_options {
+	OSF_WSS_PLAIN	= 0,
+	OSF_WSS_MSS,
+	OSF_WSS_MTU,
+	OSF_WSS_MODULO,
+	OSF_WSS_MAX,
+};
+
+/*
+ * Add/remove fingerprint from the kernel.
+ */
+enum xt_osf_msg_types {
+	OSF_MSG_ADD,
+	OSF_MSG_REMOVE,
+	OSF_MSG_MAX,
+};
+
+enum xt_osf_attr_type {
+	OSF_ATTR_UNSPEC,
+	OSF_ATTR_FINGER,
+	OSF_ATTR_MAX,
+};
+
+#endif				/* _XT_OSF_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index cb3ad741ebf..79ba47f042c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
+config NETFILTER_XT_MATCH_OSF
+	tristate '"osf" Passive OS fingerprint match'
+	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+	help
+	  This option selects the Passive OS Fingerprinting match module
+	  that allows to passively match the remote operating system by
+	  analyzing incoming TCP SYN packets.
+
+	  Rules and loading software can be downloaded from
+	  http://www.ioremap.net/projects/osf
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 endif # NETFILTER_XTABLES
 
 endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6282060fbda..49f62ee4e9f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
new file mode 100644
index 00000000000..863e40977a4
--- /dev/null
+++ b/net/netfilter/xt_osf.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/xt_osf.h>
+
+struct xt_osf_finger {
+	struct rcu_head			rcu_head;
+	struct list_head		finger_entry;
+	struct xt_osf_user_finger	finger;
+};
+
+enum osf_fmatch_states {
+	/* Packet does not match the fingerprint */
+	FMATCH_WRONG = 0,
+	/* Packet matches the fingerprint */
+	FMATCH_OK,
+	/* Options do not match the fingerprint, but header does */
+	FMATCH_OPT_WRONG,
+};
+
+/*
+ * Indexed by dont-fragment bit.
+ * It is the only constant value in the fingerprint.
+ */
+static struct list_head xt_osf_fingers[2];
+
+static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
+	[OSF_ATTR_FINGER]	= { .len = sizeof(struct xt_osf_user_finger) },
+};
+
+static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
+{
+	struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
+
+	kfree(f);
+}
+
+static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
+			struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+	struct xt_osf_user_finger *f;
+	struct xt_osf_finger *kf = NULL, *sf;
+	int err = 0;
+
+	if (!osf_attrs[OSF_ATTR_FINGER])
+		return -EINVAL;
+
+	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+		return -EINVAL;
+
+	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+	kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
+	if (!kf)
+		return -ENOMEM;
+
+	memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger));
+
+	list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+		if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+			continue;
+
+		kfree(kf);
+		kf = NULL;
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			err = -EEXIST;
+		break;
+	}
+
+	/*
+	 * We are protected by nfnl mutex.
+	 */
+	if (kf)
+		list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]);
+
+	return err;
+}
+
+static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
+			struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+	struct xt_osf_user_finger *f;
+	struct xt_osf_finger *sf;
+	int err = ENOENT;
+
+	if (!osf_attrs[OSF_ATTR_FINGER])
+		return -EINVAL;
+
+	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+	list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+		if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+			continue;
+
+		/*
+		 * We are protected by nfnl mutex.
+		 */
+		list_del_rcu(&sf->finger_entry);
+		call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu);
+
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = {
+	[OSF_MSG_ADD]	= {
+		.call		= xt_osf_add_callback,
+		.attr_count	= OSF_ATTR_MAX,
+		.policy		= xt_osf_policy,
+	},
+	[OSF_MSG_REMOVE]	= {
+		.call		= xt_osf_remove_callback,
+		.attr_count	= OSF_ATTR_MAX,
+		.policy		= xt_osf_policy,
+	},
+};
+
+static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
+	.name			= "osf",
+	.subsys_id		= NFNL_SUBSYS_OSF,
+	.cb_count		= OSF_MSG_MAX,
+	.cb			= xt_osf_nfnetlink_callbacks,
+};
+
+static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
+			    unsigned char f_ttl)
+{
+	const struct iphdr *ip = ip_hdr(skb);
+
+	if (info->flags & XT_OSF_TTL) {
+		if (info->ttl == XT_OSF_TTL_TRUE)
+			return ip->ttl == f_ttl;
+		if (info->ttl == XT_OSF_TTL_NOCHECK)
+			return 1;
+		else if (ip->ttl <= f_ttl)
+			return 1;
+		else {
+			struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+			int ret = 0;
+
+			for_ifa(in_dev) {
+				if (inet_ifa_match(ip->saddr, ifa)) {
+					ret = (ip->ttl == f_ttl);
+					break;
+				}
+			}
+			endfor_ifa(in_dev);
+
+			return ret;
+		}
+	}
+
+	return ip->ttl == f_ttl;
+}
+
+static bool xt_osf_match_packet(const struct sk_buff *skb,
+		const struct xt_match_param *p)
+{
+	const struct xt_osf_info *info = p->matchinfo;
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
+	int fmatch = FMATCH_WRONG, fcount = 0;
+	unsigned int optsize = 0, check_WSS = 0;
+	u16 window, totlen, mss = 0;
+	bool df;
+	const unsigned char *optp = NULL, *_optp = NULL;
+	unsigned char opts[MAX_IPOPTLEN];
+	const struct xt_osf_finger *kf;
+	const struct xt_osf_user_finger *f;
+
+	if (!info)
+		return false;
+
+	tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+	if (!tcp)
+		return false;
+
+	if (!tcp->syn)
+		return false;
+
+	totlen = ntohs(ip->tot_len);
+	df = ntohs(ip->frag_off) & IP_DF;
+	window = ntohs(tcp->window);
+
+	if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+		optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+
+		_optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+				sizeof(struct tcphdr), optsize, opts);
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
+		f = &kf->finger;
+
+		if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
+			continue;
+
+		optp = _optp;
+		fmatch = FMATCH_WRONG;
+
+		if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
+			int foptsize, optnum;
+
+			/*
+			 * Should not happen if userspace parser was written correctly.
+			 */
+			if (f->wss.wc >= OSF_WSS_MAX)
+				continue;
+
+			/* Check options */
+
+			foptsize = 0;
+			for (optnum = 0; optnum < f->opt_num; ++optnum)
+				foptsize += f->opt[optnum].length;
+
+			if (foptsize > MAX_IPOPTLEN ||
+				optsize > MAX_IPOPTLEN ||
+				optsize != foptsize)
+				continue;
+
+			check_WSS = f->wss.wc;
+
+			for (optnum = 0; optnum < f->opt_num; ++optnum) {
+				if (f->opt[optnum].kind == (*optp)) {
+					__u32 len = f->opt[optnum].length;
+					const __u8 *optend = optp + len;
+					int loop_cont = 0;
+
+					fmatch = FMATCH_OK;
+
+					switch (*optp) {
+					case OSFOPT_MSS:
+						mss = optp[3];
+						mss <<= 8;
+						mss |= optp[2];
+
+						mss = ntohs(mss);
+						break;
+					case OSFOPT_TS:
+						loop_cont = 1;
+						break;
+					}
+
+					optp = optend;
+				} else
+					fmatch = FMATCH_OPT_WRONG;
+
+				if (fmatch != FMATCH_OK)
+					break;
+			}
+
+			if (fmatch != FMATCH_OPT_WRONG) {
+				fmatch = FMATCH_WRONG;
+
+				switch (check_WSS) {
+				case OSF_WSS_PLAIN:
+					if (f->wss.val == 0 || window == f->wss.val)
+						fmatch = FMATCH_OK;
+					break;
+				case OSF_WSS_MSS:
+					/*
+					 * Some smart modems decrease mangle MSS to 
+					 * SMART_MSS_2, so we check standard, decreased
+					 * and the one provided in the fingerprint MSS
+					 * values.
+					 */
+#define SMART_MSS_1	1460
+#define SMART_MSS_2	1448
+					if (window == f->wss.val * mss ||
+					    window == f->wss.val * SMART_MSS_1 ||
+					    window == f->wss.val * SMART_MSS_2)
+						fmatch = FMATCH_OK;
+					break;
+				case OSF_WSS_MTU:
+					if (window == f->wss.val * (mss + 40) ||
+					    window == f->wss.val * (SMART_MSS_1 + 40) ||
+					    window == f->wss.val * (SMART_MSS_2 + 40))
+						fmatch = FMATCH_OK;
+					break;
+				case OSF_WSS_MODULO:
+					if ((window % f->wss.val) == 0)
+						fmatch = FMATCH_OK;
+					break;
+				}
+			}
+
+			if (fmatch != FMATCH_OK)
+				continue;
+
+			fcount++;
+
+			if (info->flags & XT_OSF_LOG)
+				nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+					"%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+					f->genre, f->version, f->subtype,
+					&ip->saddr, ntohs(tcp->source),
+					&ip->daddr, ntohs(tcp->dest),
+					f->ttl - ip->ttl);
+
+			if ((info->flags & XT_OSF_LOG) &&
+			    info->loglevel == XT_OSF_LOGLEVEL_FIRST)
+				break;
+		}
+	}
+	rcu_read_unlock();
+
+	if (!fcount && (info->flags & XT_OSF_LOG))
+		nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+			"Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+				&ip->saddr, ntohs(tcp->source),
+				&ip->daddr, ntohs(tcp->dest));
+
+	if (fcount)
+		fmatch = FMATCH_OK;
+
+	return fmatch == FMATCH_OK;
+}
+
+static struct xt_match xt_osf_match = {
+	.name 		= "osf",
+	.revision	= 0,
+	.family		= NFPROTO_IPV4,
+	.proto		= IPPROTO_TCP,
+	.hooks      	= (1 << NF_INET_LOCAL_IN) |
+				(1 << NF_INET_PRE_ROUTING) |
+				(1 << NF_INET_FORWARD),
+	.match 		= xt_osf_match_packet,
+	.matchsize	= sizeof(struct xt_osf_info),
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_osf_init(void)
+{
+	int err = -EINVAL;
+	int i;
+
+	for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i)
+		INIT_LIST_HEAD(&xt_osf_fingers[i]);
+
+	err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
+	if (err < 0) {
+		printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
+		goto err_out_exit;
+	}
+
+	err = xt_register_match(&xt_osf_match);
+	if (err) {
+		printk(KERN_ERR "Failed (%d) to register OS fingerprint "
+				"matching module.\n", err);
+		goto err_out_remove;
+	}
+
+	return 0;
+
+err_out_remove:
+	nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+err_out_exit:
+	return err;
+}
+
+static void __exit xt_osf_fini(void)
+{
+	struct xt_osf_finger *f;
+	int i;
+
+	nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+	xt_unregister_match(&xt_osf_match);
+
+	rcu_read_lock();
+	for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) {
+
+		list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
+			list_del_rcu(&f->finger_entry);
+			call_rcu(&f->rcu_head, xt_osf_finger_free_rcu);
+		}
+	}
+	rcu_read_unlock();
+
+	rcu_barrier();
+}
+
+module_init(xt_osf_init);
+module_exit(xt_osf_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
-- 
cgit v1.2.3-70-g09d2


From 226c7ffe74474257b4b87bd38ae8ba0030cf65e2 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 6 May 2009 10:52:51 -0700
Subject: [SCSI] net, libfcoe: Add the FCoE Initialization Protocol ethertype

FIP is the FCoE Initialization Protocol and this patch
adds the protocol ethertype to the kernel's list of
ethertypes.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/linux/if_ether.h | 1 +
 include/scsi/fc/fc_fip.h | 7 -------
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index cfe4fe1b713..60e8934d10b 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -79,6 +79,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
+#define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 /*
diff --git a/include/scsi/fc/fc_fip.h b/include/scsi/fc/fc_fip.h
index 0627a9ae634..3d138c1fcf8 100644
--- a/include/scsi/fc/fc_fip.h
+++ b/include/scsi/fc/fc_fip.h
@@ -22,13 +22,6 @@
  * http://www.t11.org/ftp/t11/pub/fc/bb-5/08-543v1.pdf
  */
 
-/*
- * The FIP ethertype eventually goes in net/if_ether.h.
- */
-#ifndef ETH_P_FIP
-#define ETH_P_FIP	0x8914	/* FIP Ethertype */
-#endif
-
 #define FIP_DEF_PRI	128	/* default selection priority */
 #define FIP_DEF_FC_MAP	0x0efc00 /* default FCoE MAP (MAC OUI) value */
 #define FIP_DEF_FKA	8000	/* default FCF keep-alive/advert period (mS) */
-- 
cgit v1.2.3-70-g09d2


From c6f4a42de60b981dd210de01cd3e575835e3158e Mon Sep 17 00:00:00 2001
From: Minkyu Kang <mk7.kang@samsung.com>
Date: Fri, 5 Jun 2009 15:33:04 +0900
Subject: Add MAX17040 Fuel Gauge driver

The MAX17040 is a I2C interfaced Fuel Gauge systems for lithium-ion
batteries This patch adds support the MAX17040 Fuel Gauge

Signed-off-by: Minkyu Kang <mk7.kang@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig            |   8 +
 drivers/power/Makefile           |   3 +-
 drivers/power/max17040_battery.c | 309 +++++++++++++++++++++++++++++++++++++++
 include/linux/max17040_battery.h |  19 +++
 4 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 drivers/power/max17040_battery.c
 create mode 100644 include/linux/max17040_battery.h

(limited to 'include')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 33da1127992..7eda34838bf 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -82,6 +82,14 @@ config BATTERY_DA9030
 	  Say Y here to enable support for batteries charger integrated into
 	  DA9030 PMIC.
 
+config BATTERY_MAX17040
+	tristate "Maxim MAX17040 Fuel Gauge"
+	depends on I2C
+	help
+	  MAX17040 is fuel-gauge systems for lithium-ion (Li+) batteries
+	  in handheld and portable equipment. The MAX17040 is configured
+	  to operate with a single lithium cell
+
 config CHARGER_PCF50633
 	tristate "NXP PCF50633 MBC"
 	depends on MFD_PCF50633
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 2fcf41d13e5..daf3179689a 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -25,4 +25,5 @@ obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
 obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
 obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
 obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
-obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
\ No newline at end of file
+obj-$(CONFIG_BATTERY_MAX17040)	+= max17040_battery.o
+obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c
new file mode 100644
index 00000000000..87b98bf27ae
--- /dev/null
+++ b/drivers/power/max17040_battery.c
@@ -0,0 +1,309 @@
+/*
+ *  max17040_battery.c
+ *  fuel-gauge systems for lithium-ion (Li+) batteries
+ *
+ *  Copyright (C) 2009 Samsung Electronics
+ *  Minkyu Kang <mk7.kang@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/power_supply.h>
+#include <linux/max17040_battery.h>
+
+#define MAX17040_VCELL_MSB	0x02
+#define MAX17040_VCELL_LSB	0x03
+#define MAX17040_SOC_MSB	0x04
+#define MAX17040_SOC_LSB	0x05
+#define MAX17040_MODE_MSB	0x06
+#define MAX17040_MODE_LSB	0x07
+#define MAX17040_VER_MSB	0x08
+#define MAX17040_VER_LSB	0x09
+#define MAX17040_RCOMP_MSB	0x0C
+#define MAX17040_RCOMP_LSB	0x0D
+#define MAX17040_CMD_MSB	0xFE
+#define MAX17040_CMD_LSB	0xFF
+
+#define MAX17040_DELAY		1000
+#define MAX17040_BATTERY_FULL	95
+
+struct max17040_chip {
+	struct i2c_client		*client;
+	struct delayed_work		work;
+	struct power_supply		battery;
+	struct max17040_platform_data	*pdata;
+
+	/* State Of Connect */
+	int online;
+	/* battery voltage */
+	int vcell;
+	/* battery capacity */
+	int soc;
+	/* State Of Charge */
+	int status;
+};
+
+static int max17040_get_property(struct power_supply *psy,
+			    enum power_supply_property psp,
+			    union power_supply_propval *val)
+{
+	struct max17040_chip *chip = container_of(psy,
+				struct max17040_chip, battery);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = chip->status;
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = chip->online;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = chip->vcell;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = chip->soc;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int max17040_write_reg(struct i2c_client *client, int reg, u8 value)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, reg, value);
+
+	if (ret < 0)
+		dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int max17040_read_reg(struct i2c_client *client, int reg)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+	return ret;
+}
+
+static void max17040_reset(struct i2c_client *client)
+{
+	max17040_write_reg(client, MAX17040_CMD_MSB, 0x54);
+	max17040_write_reg(client, MAX17040_CMD_LSB, 0x00);
+}
+
+static void max17040_get_vcell(struct i2c_client *client)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+	u8 msb;
+	u8 lsb;
+
+	msb = max17040_read_reg(client, MAX17040_VCELL_MSB);
+	lsb = max17040_read_reg(client, MAX17040_VCELL_LSB);
+
+	chip->vcell = (msb << 4) + (lsb >> 4);
+}
+
+static void max17040_get_soc(struct i2c_client *client)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+	u8 msb;
+	u8 lsb;
+
+	msb = max17040_read_reg(client, MAX17040_SOC_MSB);
+	lsb = max17040_read_reg(client, MAX17040_SOC_LSB);
+
+	chip->soc = msb;
+}
+
+static void max17040_get_version(struct i2c_client *client)
+{
+	u8 msb;
+	u8 lsb;
+
+	msb = max17040_read_reg(client, MAX17040_VER_MSB);
+	lsb = max17040_read_reg(client, MAX17040_VER_LSB);
+
+	dev_info(&client->dev, "MAX17040 Fuel-Gauge Ver %d%d\n", msb, lsb);
+}
+
+static void max17040_get_online(struct i2c_client *client)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+
+	if (chip->pdata->battery_online)
+		chip->online = chip->pdata->battery_online();
+	else
+		chip->online = 1;
+}
+
+static void max17040_get_status(struct i2c_client *client)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+
+	if (!chip->pdata->charger_online || !chip->pdata->charger_enable) {
+		chip->status = POWER_SUPPLY_STATUS_UNKNOWN;
+		return;
+	}
+
+	if (chip->pdata->charger_online()) {
+		if (chip->pdata->charger_enable())
+			chip->status = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			chip->status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+	} else {
+		chip->status = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+
+	if (chip->soc > MAX17040_BATTERY_FULL)
+		chip->status = POWER_SUPPLY_STATUS_FULL;
+}
+
+static void max17040_work(struct work_struct *work)
+{
+	struct max17040_chip *chip;
+
+	chip = container_of(work, struct max17040_chip, work.work);
+
+	max17040_get_vcell(chip->client);
+	max17040_get_soc(chip->client);
+	max17040_get_online(chip->client);
+	max17040_get_status(chip->client);
+
+	schedule_delayed_work(&chip->work, MAX17040_DELAY);
+}
+
+static enum power_supply_property max17040_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+};
+
+static int __devinit max17040_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct max17040_chip *chip;
+	int ret;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+		return -EIO;
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->client = client;
+	chip->pdata = client->dev.platform_data;
+
+	i2c_set_clientdata(client, chip);
+
+	chip->battery.name		= "battery";
+	chip->battery.type		= POWER_SUPPLY_TYPE_BATTERY;
+	chip->battery.get_property	= max17040_get_property;
+	chip->battery.properties	= max17040_battery_props;
+	chip->battery.num_properties	= ARRAY_SIZE(max17040_battery_props);
+
+	ret = power_supply_register(&client->dev, &chip->battery);
+	if (ret) {
+		dev_err(&client->dev, "failed: power supply register\n");
+		i2c_set_clientdata(client, NULL);
+		kfree(chip);
+		return ret;
+	}
+
+	max17040_reset(client);
+	max17040_get_version(client);
+
+	INIT_DELAYED_WORK_DEFERRABLE(&chip->work, max17040_work);
+	schedule_delayed_work(&chip->work, MAX17040_DELAY);
+
+	return 0;
+}
+
+static int __devexit max17040_remove(struct i2c_client *client)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+
+	power_supply_unregister(&chip->battery);
+	cancel_delayed_work(&chip->work);
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int max17040_suspend(struct i2c_client *client,
+		pm_message_t state)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+
+	cancel_delayed_work(&chip->work);
+	return 0;
+}
+
+static int max17040_resume(struct i2c_client *client)
+{
+	struct max17040_chip *chip = i2c_get_clientdata(client);
+
+	schedule_delayed_work(&chip->work, MAX17040_DELAY);
+	return 0;
+}
+
+#else
+
+#define max17040_suspend NULL
+#define max17040_resume NULL
+
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id max17040_id[] = {
+	{ "max17040", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max17040_id);
+
+static struct i2c_driver max17040_i2c_driver = {
+	.driver	= {
+		.name	= "max17040",
+	},
+	.probe		= max17040_probe,
+	.remove		= __devexit_p(max17040_remove),
+	.suspend	= max17040_suspend,
+	.resume		= max17040_resume,
+	.id_table	= max17040_id,
+};
+
+static int __init max17040_init(void)
+{
+	return i2c_add_driver(&max17040_i2c_driver);
+}
+module_init(max17040_init);
+
+static void __exit max17040_exit(void)
+{
+	i2c_del_driver(&max17040_i2c_driver);
+}
+module_exit(max17040_exit);
+
+MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
+MODULE_DESCRIPTION("MAX17040 Fuel Gauge");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/max17040_battery.h b/include/linux/max17040_battery.h
new file mode 100644
index 00000000000..ad97b06cf93
--- /dev/null
+++ b/include/linux/max17040_battery.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (C) 2009 Samsung Electronics
+ *  Minkyu Kang <mk7.kang@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAX17040_BATTERY_H_
+#define __MAX17040_BATTERY_H_
+
+struct max17040_platform_data {
+	int (*battery_online)(void);
+	int (*charger_online)(void);
+	int (*charger_enable)(void);
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 1f8a6a10fb9437eac3f516ea4324a19087872f30 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 8 Jun 2009 18:18:39 +0200
Subject: ring-buffer: pass in lockdep class key for reader_lock

On Sun, 7 Jun 2009, Ingo Molnar wrote:
> Testing tracer sched_switch: <6>Starting ring buffer hammer
> PASSED
> Testing tracer sysprof: PASSED
> Testing tracer function: PASSED
> Testing tracer irqsoff:
> =============================================
> PASSED
> Testing tracer preemptoff: PASSED
> Testing tracer preemptirqsoff: [ INFO: possible recursive locking detected ]
> PASSED
> Testing tracer branch: 2.6.30-rc8-tip-01972-ge5b9078-dirty #5760
> ---------------------------------------------
> rb_consumer/431 is trying to acquire lock:
>  (&cpu_buffer->reader_lock){......}, at: [<c109eef7>] ring_buffer_reset_cpu+0x37/0x70
>
> but task is already holding lock:
>  (&cpu_buffer->reader_lock){......}, at: [<c10a019e>] ring_buffer_consume+0x7e/0xc0
>
> other info that might help us debug this:
> 1 lock held by rb_consumer/431:
>  #0:  (&cpu_buffer->reader_lock){......}, at: [<c10a019e>] ring_buffer_consume+0x7e/0xc0

The ring buffer is a generic structure, and can be used outside of
ftrace. If ftrace traces within the use of the ring buffer, it can produce
false positives with lockdep.

This patch passes in a static lock key into the allocation of the ring
buffer, so that different ring buffers will have their own lock class.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1244477919.13761.9042.camel@twins>

[ store key in ring buffer descriptor ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 14 +++++++++++++-
 kernel/trace/ring_buffer.c  |  9 +++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index f1345828c7c..8670f1575fe 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -105,7 +105,19 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
  * size is in bytes for each per CPU buffer.
  */
 struct ring_buffer *
-ring_buffer_alloc(unsigned long size, unsigned flags);
+__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
+
+/*
+ * Because the ring buffer is generic, if other users of the ring buffer get
+ * traced by ftrace, it can produce lockdep warnings. We need to keep each
+ * ring buffer's lock class separate.
+ */
+#define ring_buffer_alloc(size, flags)			\
+({							\
+	static struct lock_class_key __key;		\
+	__ring_buffer_alloc((size), (flags), &__key);	\
+})
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7102d7a2fad..22878b0d370 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -426,6 +426,8 @@ struct ring_buffer {
 	atomic_t			record_disabled;
 	cpumask_var_t			cpumask;
 
+	struct lock_class_key		*reader_lock_key;
+
 	struct mutex			mutex;
 
 	struct ring_buffer_per_cpu	**buffers;
@@ -565,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	cpu_buffer->cpu = cpu;
 	cpu_buffer->buffer = buffer;
 	spin_lock_init(&cpu_buffer->reader_lock);
+	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
 	cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	INIT_LIST_HEAD(&cpu_buffer->pages);
 
@@ -635,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self,
  * when the buffer wraps. If this flag is not set, the buffer will
  * drop data when the tail hits the head.
  */
-struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+					struct lock_class_key *key)
 {
 	struct ring_buffer *buffer;
 	int bsize;
@@ -658,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 	buffer->flags = flags;
 	buffer->clock = trace_clock_local;
+	buffer->reader_lock_key = key;
 
 	/* need at least two pages */
 	if (buffer->pages == 1)
@@ -715,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
 	kfree(buffer);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
-- 
cgit v1.2.3-70-g09d2


From 05f77f85f47e30a53f7971b687f3b0250e42f665 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 8 Jun 2009 16:16:56 -0700
Subject: bluetooth: Kill skb_frags_no(), unused.

Furthermore, it twiddles with the details of SKB list handling
directly, which we're trying to eliminate.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bluetooth/bluetooth.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 3ad5390a4dd..144d1d5dd82 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -171,15 +171,6 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l
 	return skb;
 }
 
-static inline int skb_frags_no(struct sk_buff *skb)
-{
-	register struct sk_buff *frag = skb_shinfo(skb)->frag_list;
-	register int n = 1;
-
-	for (; frag; frag=frag->next, n++);
-	return n;
-}
-
 int bt_err(__u16 code);
 
 extern int hci_sock_init(void);
-- 
cgit v1.2.3-70-g09d2


From 9df1bb9b516daeece159ab7fb262d01a0359247c Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 9 Jun 2009 06:22:57 +0200
Subject: Revert "block: Fix bounce limit setting in DM"

This reverts commit a05c0205ba031c01bba33a21bf0a35920eb64833.

DM doesn't need to access the bounce_pfn directly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 17 -----------------
 drivers/md/dm-table.c  |  2 +-
 include/linux/blkdev.h |  1 -
 3 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 9acd0b7e802..8d339349289 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -193,23 +193,6 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
-/**
- * blk_queue_bounce_pfn - set the bounce buffer limit for queue
- * @q: the request queue for the device
- * @pfn: max address
- *
- * Description:
- *    This function is similar to blk_queue_bounce_limit except it
- *    neither changes allocation flags, nor does it set up the ISA DMA
- *    pool. This function should only be used by stacking drivers.
- *    Hardware drivers should use blk_queue_bounce_limit instead.
- */
-void blk_queue_bounce_pfn(struct request_queue *q, u64 pfn)
-{
-	q->limits.bounce_pfn = pfn;
-}
-EXPORT_SYMBOL(blk_queue_bounce_pfn);
-
 /**
  * blk_queue_max_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3ca1604ddd5..e9a73bb242b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -920,7 +920,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	blk_queue_max_segment_size(q, t->limits.max_segment_size);
 	blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors);
 	blk_queue_segment_boundary(q, t->limits.seg_boundary_mask);
-	blk_queue_bounce_pfn(q, t->limits.bounce_pfn);
+	blk_queue_bounce_limit(q, t->limits.bounce_pfn);
 
 	if (t->limits.no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 989aa1790f4..5e740a135e7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -910,7 +910,6 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_queue_bounce_pfn(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-- 
cgit v1.2.3-70-g09d2


From ee0398717078260ee4ffa97d407071bc774e2dac Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Jun 2009 00:17:13 -0700
Subject: skbuff: Add frag list abstraction interfaces.

With the hope that these can be used to eliminate direct
references to the frag list implementation.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aad484cd586..f1c93b878b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1077,7 +1077,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
 			    int off, int size);
 
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
-#define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->frag_list)
+#define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frags(skb))
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
 
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
@@ -1716,6 +1716,25 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
 		     skb = skb->prev)
 
 
+static inline bool skb_has_frags(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->frag_list != NULL;
+}
+
+static inline void skb_frag_list_init(struct sk_buff *skb)
+{
+	skb_shinfo(skb)->frag_list = NULL;
+}
+
+static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
+{
+	frag->next = skb_shinfo(skb)->frag_list;
+	skb_shinfo(skb)->frag_list = frag;
+}
+
+#define skb_walk_frags(skb, iter)	\
+	for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
+
 extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
 					   int *peeked, int *err);
 extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-- 
cgit v1.2.3-70-g09d2


From a5bd8a13e9e0322c7f76b34790ba34e2e0ce2ac5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Jun 2009 00:17:27 -0700
Subject: netdevice.h: Use frag list abstraction interfaces.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2a801380b50..9ea8d6dfe54 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1904,7 +1904,7 @@ static inline int net_gso_ok(int features, int gso_type)
 static inline int skb_gso_ok(struct sk_buff *skb, int features)
 {
 	return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
-	       (!skb_shinfo(skb)->frag_list || (features & NETIF_F_FRAGLIST));
+	       (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST));
 }
 
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
-- 
cgit v1.2.3-70-g09d2


From 151060ac13144208bd7601d17e4c92c59b98072f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Apr 2009 10:54:54 +0900
Subject: CUSE: implement CUSE - Character device in Userspace

CUSE enables implementing character devices in userspace.  With recent
additions of ioctl and poll support, FUSE already has most of what's
necessary to implement character devices.  All CUSE has to do is
bonding all those components - FUSE, chardev and the driver model -
nicely.

When client opens /dev/cuse, kernel starts conversation with
CUSE_INIT.  The client tells CUSE which device it wants to create.  As
the previous patch made fuse_file usable without associated
fuse_inode, CUSE doesn't create super block or inodes.  It attaches
fuse_file to cdev file->private_data during open and set ff->fi to
NULL.  The rest of the operation is almost identical to FUSE direct IO
case.

Each CUSE device has a corresponding directory /sys/class/cuse/DEVNAME
(which is symlink to /sys/devices/virtual/class/DEVNAME if
SYSFS_DEPRECATED is turned off) which hosts "waiting" and "abort"
among other things.  Those two files have the same meaning as the FUSE
control files.

The only notable lacking feature compared to in-kernel implementation
is mmap support.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/Kconfig           |  10 +
 fs/fuse/Makefile     |   1 +
 fs/fuse/cuse.c       | 610 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fuse.h |  31 +++
 4 files changed, 652 insertions(+)
 create mode 100644 fs/fuse/cuse.c

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index 9f7270f36b2..525da2e8f73 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,6 +62,16 @@ source "fs/autofs/Kconfig"
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
 
+config CUSE
+	tristate "Character device in Userpace support"
+	depends on FUSE_FS
+	help
+	  This FUSE extension allows character devices to be
+	  implemented in userspace.
+
+	  If you want to develop or use userspace character device
+	  based on CUSE, answer Y or M.
+
 config GENERIC_ACL
 	bool
 	select FS_POSIX_ACL
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 72437065f6a..e95eeb445e5 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -3,5 +3,6 @@
 #
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 00000000000..de792dcf327
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,610 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009  SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009  Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems.  On initialization /dev/cuse is
+ * created.  By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device.  After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn	: contains fuse_conn and serves as bonding structure
+ * channel	: file handle connected to the userland CUSE server
+ * cdev		: the implemented character device
+ * dev		: generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE.  As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies.  When channel is
+ * closed, everything begins to destruct.  The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+
+#include "fuse_i.h"
+
+#define CUSE_CONNTBL_LEN	64
+
+struct cuse_conn {
+	struct list_head	list;	/* linked on cuse_conntbl */
+	struct fuse_conn	fc;	/* fuse connection */
+	struct cdev		*cdev;	/* associated character device */
+	struct device		*dev;	/* device representing @cdev */
+
+	/* init parameters, set once during initialization */
+	bool			unrestricted_ioctl;
+};
+
+static DEFINE_SPINLOCK(cuse_lock);		/* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+	return container_of(fc, struct cuse_conn, fc);
+}
+
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+	return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+
+
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file.  All other ops call FUSE ops on the
+ * FUSE file.
+ */
+
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+			 loff_t *ppos)
+{
+	loff_t pos = 0;
+
+	return fuse_direct_io(file, buf, count, &pos, 0);
+}
+
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	loff_t pos = 0;
+	/*
+	 * No locking or generic_write_checks(), the server is
+	 * responsible for locking and sanity checks.
+	 */
+	return fuse_direct_io(file, buf, count, &pos, 1);
+}
+
+static int cuse_open(struct inode *inode, struct file *file)
+{
+	dev_t devt = inode->i_cdev->dev;
+	struct cuse_conn *cc = NULL, *pos;
+	int rc;
+
+	/* look up and get the connection */
+	spin_lock(&cuse_lock);
+	list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+		if (pos->dev->devt == devt) {
+			fuse_conn_get(&pos->fc);
+			cc = pos;
+			break;
+		}
+	spin_unlock(&cuse_lock);
+
+	/* dead? */
+	if (!cc)
+		return -ENODEV;
+
+	/*
+	 * Generic permission check is already done against the chrdev
+	 * file, proceed to open.
+	 */
+	rc = fuse_do_open(&cc->fc, 0, file, 0);
+	if (rc)
+		fuse_conn_put(&cc->fc);
+	return rc;
+}
+
+static int cuse_release(struct inode *inode, struct file *file)
+{
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
+
+	fuse_sync_release(ff, file->f_flags);
+	fuse_conn_put(fc);
+
+	return 0;
+}
+
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct fuse_file *ff = file->private_data;
+	struct cuse_conn *cc = fc_to_cc(ff->fc);
+	unsigned int flags = 0;
+
+	if (cc->unrestricted_ioctl)
+		flags |= FUSE_IOCTL_UNRESTRICTED;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fuse_file *ff = file->private_data;
+	struct cuse_conn *cc = fc_to_cc(ff->fc);
+	unsigned int flags = FUSE_IOCTL_COMPAT;
+
+	if (cc->unrestricted_ioctl)
+		flags |= FUSE_IOCTL_UNRESTRICTED;
+
+	return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static const struct file_operations cuse_frontend_fops = {
+	.owner			= THIS_MODULE,
+	.read			= cuse_read,
+	.write			= cuse_write,
+	.open			= cuse_open,
+	.release		= cuse_release,
+	.unlocked_ioctl		= cuse_file_ioctl,
+	.compat_ioctl		= cuse_file_compat_ioctl,
+	.poll			= fuse_file_poll,
+};
+
+
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+
+struct cuse_devinfo {
+	const char		*name;
+};
+
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1.  This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value.  Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'.  *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+	char *p = *pp;
+	char *key, *val;
+
+	while (p < end && *p == '\0')
+		p++;
+	if (p == end)
+		return 0;
+
+	if (end[-1] != '\0') {
+		printk(KERN_ERR "CUSE: info not properly terminated\n");
+		return -EINVAL;
+	}
+
+	key = val = p;
+	p += strlen(p);
+
+	if (valp) {
+		strsep(&val, "=");
+		if (!val)
+			val = key + strlen(key);
+		key = strstrip(key);
+		val = strstrip(val);
+	} else
+		key = strstrip(key);
+
+	if (!strlen(key)) {
+		printk(KERN_ERR "CUSE: zero length info key specified\n");
+		return -EINVAL;
+	}
+
+	*pp = p;
+	*keyp = key;
+	if (valp)
+		*valp = val;
+
+	return 1;
+}
+
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo.  String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+	char *end = p + len;
+	char *key, *val;
+	int rc;
+
+	while (true) {
+		rc = cuse_parse_one(&p, end, &key, &val);
+		if (rc < 0)
+			return rc;
+		if (!rc)
+			break;
+		if (strcmp(key, "DEVNAME") == 0)
+			devinfo->name = val;
+		else
+			printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+			       key);
+	}
+
+	if (!devinfo->name || !strlen(devinfo->name)) {
+		printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void cuse_gendev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it.  Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	struct cuse_conn *cc = fc_to_cc(fc);
+	struct cuse_init_out *arg = &req->misc.cuse_init_out;
+	struct page *page = req->pages[0];
+	struct cuse_devinfo devinfo = { };
+	struct device *dev;
+	struct cdev *cdev;
+	dev_t devt;
+	int rc;
+
+	if (req->out.h.error ||
+	    arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+		goto err;
+	}
+
+	fc->minor = arg->minor;
+	fc->max_read = max_t(unsigned, arg->max_read, 4096);
+	fc->max_write = max_t(unsigned, arg->max_write, 4096);
+
+	/* parse init reply */
+	cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+
+	rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+				&devinfo);
+	if (rc)
+		goto err;
+
+	/* determine and reserve devt */
+	devt = MKDEV(arg->dev_major, arg->dev_minor);
+	if (!MAJOR(devt))
+		rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+	else
+		rc = register_chrdev_region(devt, 1, devinfo.name);
+	if (rc) {
+		printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+		goto err;
+	}
+
+	/* devt determined, create device */
+	rc = -ENOMEM;
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		goto err_region;
+
+	device_initialize(dev);
+	dev_set_uevent_suppress(dev, 1);
+	dev->class = cuse_class;
+	dev->devt = devt;
+	dev->release = cuse_gendev_release;
+	dev_set_drvdata(dev, cc);
+	dev_set_name(dev, "%s", devinfo.name);
+
+	rc = device_add(dev);
+	if (rc)
+		goto err_device;
+
+	/* register cdev */
+	rc = -ENOMEM;
+	cdev = cdev_alloc();
+	if (!cdev)
+		goto err_device;
+
+	cdev->owner = THIS_MODULE;
+	cdev->ops = &cuse_frontend_fops;
+
+	rc = cdev_add(cdev, devt, 1);
+	if (rc)
+		goto err_cdev;
+
+	cc->dev = dev;
+	cc->cdev = cdev;
+
+	/* make the device available */
+	spin_lock(&cuse_lock);
+	list_add(&cc->list, cuse_conntbl_head(devt));
+	spin_unlock(&cuse_lock);
+
+	/* announce device availability */
+	dev_set_uevent_suppress(dev, 0);
+	kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+	__free_page(page);
+	return;
+
+err_cdev:
+	cdev_del(cdev);
+err_device:
+	put_device(dev);
+err_region:
+	unregister_chrdev_region(devt, 1);
+err:
+	fc->conn_error = 1;
+	goto out;
+}
+
+static int cuse_send_init(struct cuse_conn *cc)
+{
+	int rc;
+	struct fuse_req *req;
+	struct page *page;
+	struct fuse_conn *fc = &cc->fc;
+	struct cuse_init_in *arg;
+
+	BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req)) {
+		rc = PTR_ERR(req);
+		goto err;
+	}
+
+	rc = -ENOMEM;
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		goto err_put_req;
+
+	arg = &req->misc.cuse_init_in;
+	arg->major = FUSE_KERNEL_VERSION;
+	arg->minor = FUSE_KERNEL_MINOR_VERSION;
+	arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+	req->in.h.opcode = CUSE_INIT;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(struct cuse_init_in);
+	req->in.args[0].value = arg;
+	req->out.numargs = 2;
+	req->out.args[0].size = sizeof(struct cuse_init_out);
+	req->out.args[0].value = &req->misc.cuse_init_out;
+	req->out.args[1].size = CUSE_INIT_INFO_MAX;
+	req->out.argvar = 1;
+	req->out.argpages = 1;
+	req->pages[0] = page;
+	req->num_pages = 1;
+	req->end = cuse_process_init_reply;
+	fuse_request_send_background(fc, req);
+
+	return 0;
+
+err_put_req:
+	fuse_put_request(fc, req);
+err:
+	return rc;
+}
+
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+	struct cuse_conn *cc = fc_to_cc(fc);
+	kfree(cc);
+}
+
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initilaization request kernel sends.  This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init.  The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+	struct cuse_conn *cc;
+	int rc;
+
+	/* set up cuse_conn */
+	cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+	if (!cc)
+		return -ENOMEM;
+
+	fuse_conn_init(&cc->fc);
+
+	INIT_LIST_HEAD(&cc->list);
+	cc->fc.release = cuse_fc_release;
+
+	cc->fc.connected = 1;
+	cc->fc.blocked = 0;
+	rc = cuse_send_init(cc);
+	if (rc) {
+		fuse_conn_put(&cc->fc);
+		return rc;
+	}
+	file->private_data = &cc->fc;	/* channel owns base reference to cc */
+
+	return 0;
+}
+
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+	struct cuse_conn *cc = fc_to_cc(file->private_data);
+	int rc;
+
+	/* remove from the conntbl, no more access from this point on */
+	spin_lock(&cuse_lock);
+	list_del_init(&cc->list);
+	spin_unlock(&cuse_lock);
+
+	/* remove device */
+	if (cc->dev)
+		device_unregister(cc->dev);
+	if (cc->cdev) {
+		unregister_chrdev_region(cc->cdev->dev, 1);
+		cdev_del(cc->cdev);
+	}
+
+	/* kill connection and shutdown channel */
+	fuse_conn_kill(&cc->fc);
+	rc = fuse_dev_release(inode, file);	/* puts the base reference */
+
+	return rc;
+}
+
+static struct file_operations cuse_channel_fops; /* initialized during init */
+
+
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+
+static ssize_t cuse_class_waiting_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct cuse_conn *cc = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+
+static ssize_t cuse_class_abort_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct cuse_conn *cc = dev_get_drvdata(dev);
+
+	fuse_abort_conn(&cc->fc);
+	return count;
+}
+
+static struct device_attribute cuse_class_dev_attrs[] = {
+	__ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+	__ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+	{ }
+};
+
+static struct miscdevice cuse_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "cuse",
+	.fops		= &cuse_channel_fops,
+};
+
+static int __init cuse_init(void)
+{
+	int i, rc;
+
+	/* init conntbl */
+	for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+		INIT_LIST_HEAD(&cuse_conntbl[i]);
+
+	/* inherit and extend fuse_dev_operations */
+	cuse_channel_fops		= fuse_dev_operations;
+	cuse_channel_fops.owner		= THIS_MODULE;
+	cuse_channel_fops.open		= cuse_channel_open;
+	cuse_channel_fops.release	= cuse_channel_release;
+
+	cuse_class = class_create(THIS_MODULE, "cuse");
+	if (IS_ERR(cuse_class))
+		return PTR_ERR(cuse_class);
+
+	cuse_class->dev_attrs = cuse_class_dev_attrs;
+
+	rc = misc_register(&cuse_miscdev);
+	if (rc) {
+		class_destroy(cuse_class);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit cuse_exit(void)
+{
+	misc_deregister(&cuse_miscdev);
+	class_destroy(cuse_class);
+}
+
+module_init(cuse_init);
+module_exit(cuse_exit);
+
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 162e5defe68..d41ed593f79 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -120,6 +120,13 @@ struct fuse_file_lock {
 #define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 
+/**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL	(1 << 0)
+
 /**
  * Release flags
  */
@@ -210,6 +217,9 @@ enum fuse_opcode {
 	FUSE_DESTROY       = 38,
 	FUSE_IOCTL         = 39,
 	FUSE_POLL          = 40,
+
+	/* CUSE specific operations */
+	CUSE_INIT          = 4096,
 };
 
 enum fuse_notify_code {
@@ -401,6 +411,27 @@ struct fuse_init_out {
 	__u32	max_write;
 };
 
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+	__u32	major;
+	__u32	minor;
+	__u32	unused;
+	__u32	flags;
+};
+
+struct cuse_init_out {
+	__u32	major;
+	__u32	minor;
+	__u32	unused;
+	__u32	flags;
+	__u32	max_read;
+	__u32	max_write;
+	__u32	dev_major;		/* chardev major */
+	__u32	dev_minor;		/* chardev minor */
+	__u32	spare[10];
+};
+
 struct fuse_interrupt_in {
 	__u64	unique;
 };
-- 
cgit v1.2.3-70-g09d2


From 728bf09827d350cdaa1f093170e745e8dac49b7a Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 8 Jun 2009 22:05:00 +0000
Subject: pkt_sched: Use PSCHED_SHIFT in PSCHED time conversion

Use PSCHED_SHIFT constant instead of '10' in PSCHED_US2NS() and
PSCHED_NS2US() macros to enable changing this value later.

Additionally use PSCHED_SHIFT in sch_hfsc SM_SHIFT and ISM_SHIFT
definitions. This part of the patch is based on feedback from
Patrick McHardy <kaber@trash.net>.

Reported-by: Antonio Almeida <vexwek@gmail.com>
Tested-by: Antonio Almeida <vexwek@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 5 +++--
 net/sched/sch_hfsc.c    | 8 +++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index e37fe3129c1..cd0e026d4f6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -42,8 +42,9 @@ typedef u64	psched_time_t;
 typedef long	psched_tdiff_t;
 
 /* Avoid doing 64 bit divide by 1000 */
-#define PSCHED_US2NS(x)			((s64)(x) << 10)
-#define PSCHED_NS2US(x)			((x) >> 10)
+#define PSCHED_SHIFT			10
+#define PSCHED_US2NS(x)			((s64)(x) << PSCHED_SHIFT)
+#define PSCHED_NS2US(x)			((x) >> PSCHED_SHIFT)
 
 #define PSCHED_TICKS_PER_SEC		PSCHED_NS2US(NSEC_PER_SEC)
 #define PSCHED_PASTPERFECT		0
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5022f9c1f34..362c2811b2d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -372,7 +372,7 @@ cftree_update(struct hfsc_class *cl)
  *	ism: (psched_us/byte) << ISM_SHIFT
  *	dx: psched_us
  *
- * The clock source resolution with ktime is 1.024us.
+ * The clock source resolution with ktime and PSCHED_SHIFT 10 is 1.024us.
  *
  * sm and ism are scaled in order to keep effective digits.
  * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
@@ -383,9 +383,11 @@ cftree_update(struct hfsc_class *cl)
  *  bytes/1.024us 12.8e-3    128e-3     1280e-3    12800e-3   128000e-3
  *
  *  1.024us/byte  78.125     7.8125     0.78125    0.078125   0.0078125
+ *
+ * So, for PSCHED_SHIFT 10 we need: SM_SHIFT 20, ISM_SHIFT 18.
  */
-#define	SM_SHIFT	20
-#define	ISM_SHIFT	18
+#define	SM_SHIFT	(30 - PSCHED_SHIFT)
+#define	ISM_SHIFT	(8 + PSCHED_SHIFT)
 
 #define	SM_MASK		((1ULL << SM_SHIFT) - 1)
 #define	ISM_MASK	((1ULL << ISM_SHIFT) - 1)
-- 
cgit v1.2.3-70-g09d2


From a4a710c4a7490587406462bf1d54504b7783d7d7 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 8 Jun 2009 22:05:13 +0000
Subject: pkt_sched: Change PSCHED_SHIFT from 10 to 6

Change PSCHED_SHIFT from 10 to 6 to increase schedulers time
resolution. This will increase 16x a number of (internal) ticks per
nanosecond, and is needed to improve accuracy of schedulers based on
rate tables, like HTB, TBF or CBQ, with rates above 100Mbit. It is
assumed this change is safe for 32bit accounting of time diffs up
to 2 minutes, which should be enough for common use (extremely low
rate values may overflow, so get inaccurate instead). To make full
use of this change an updated iproute2 will be needed. (But using
older iproute2 should be safe too.)

This change breaks ticks - microseconds similarity, so some minor code
fixes might be needed. It is also planned to change naming adequately
eg. to PSCHED_TICKS2NS() etc. in the near future.

Reported-by: Antonio Almeida <vexwek@gmail.com>
Tested-by: Antonio Almeida <vexwek@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index cd0e026d4f6..120935b2abd 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -41,8 +41,8 @@ static inline void *qdisc_priv(struct Qdisc *q)
 typedef u64	psched_time_t;
 typedef long	psched_tdiff_t;
 
-/* Avoid doing 64 bit divide by 1000 */
-#define PSCHED_SHIFT			10
+/* Avoid doing 64 bit divide */
+#define PSCHED_SHIFT			6
 #define PSCHED_US2NS(x)			((s64)(x) << PSCHED_SHIFT)
 #define PSCHED_NS2US(x)			((x) >> PSCHED_SHIFT)
 
-- 
cgit v1.2.3-70-g09d2


From fcb94e422479da52ed90bab230c59617a0462416 Mon Sep 17 00:00:00 2001
From: Sergey Lapin <slapin@ossfans.org>
Date: Mon, 8 Jun 2009 12:18:47 +0000
Subject: Add constants for the ieee 802.15.4 stack

IEEE 802.15.4 stack requires several constants to be defined/adjusted.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Sergey Lapin <slapin@ossfans.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h   | 2 ++
 include/linux/if_ether.h | 1 +
 include/linux/socket.h   | 4 +++-
 net/core/dev.c           | 6 ++++--
 net/core/sock.c          | 3 +++
 5 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 5ff89809a58..b554300ef8b 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -86,6 +86,8 @@
 #define ARPHRD_IEEE80211 801		/* IEEE 802.11			*/
 #define ARPHRD_IEEE80211_PRISM 802	/* IEEE 802.11 + Prism2 header  */
 #define ARPHRD_IEEE80211_RADIOTAP 803	/* IEEE 802.11 + radiotap header */
+#define ARPHRD_IEEE802154	  804
+#define ARPHRD_IEEE802154_PHY	  805
 
 #define ARPHRD_PHONET	820		/* PhoNet media type		*/
 #define ARPHRD_PHONET_PIPE 821		/* PhoNet pipe header		*/
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index cfe4fe1b713..11a60e4f0a6 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -106,6 +106,7 @@
 #define ETH_P_DSA	0x001B		/* Distributed Switch Arch.	*/
 #define ETH_P_TRAILER	0x001C		/* Trailer switch tagging	*/
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
+#define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 
 /*
  *	This is an Ethernet frame header.
diff --git a/include/linux/socket.h b/include/linux/socket.h
index d2310cb45d2..3b461dffe24 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -194,7 +194,8 @@ struct ucred {
 #define AF_RXRPC	33	/* RxRPC sockets 		*/
 #define AF_ISDN		34	/* mISDN sockets 		*/
 #define AF_PHONET	35	/* Phonet sockets		*/
-#define AF_MAX		36	/* For now.. */
+#define AF_IEEE802154	36	/* IEEE802154 sockets		*/
+#define AF_MAX		37	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -233,6 +234,7 @@ struct ucred {
 #define PF_RXRPC	AF_RXRPC
 #define PF_ISDN		AF_ISDN
 #define PF_PHONET	AF_PHONET
+#define PF_IEEE802154	AF_IEEE802154
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/net/core/dev.c b/net/core/dev.c
index 81b392ef511..11560e3258b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -269,7 +269,8 @@ static const unsigned short netdev_lock_type[] =
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
-	 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
+	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
+	 ARPHRD_VOID, ARPHRD_NONE};
 
 static const char *netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -286,7 +287,8 @@ static const char *netdev_lock_name[] =
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
-	 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
+	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
+	 "_xmit_VOID", "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
diff --git a/net/core/sock.c b/net/core/sock.c
index 58dec9dff99..04e35eb2e73 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -155,6 +155,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
+  "sk_lock-AF_IEEE802154",
   "sk_lock-AF_MAX"
 };
 static const char *af_family_slock_key_strings[AF_MAX+1] = {
@@ -170,6 +171,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
+  "slock-AF_IEEE802154",
   "slock-AF_MAX"
 };
 static const char *af_family_clock_key_strings[AF_MAX+1] = {
@@ -185,6 +187,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
+  "clock-AF_IEEE802154",
   "clock-AF_MAX"
 };
 
-- 
cgit v1.2.3-70-g09d2


From 9ec7671603573ede31207eb5b0b3e1aa211b2854 Mon Sep 17 00:00:00 2001
From: Sergey Lapin <slapin@ossfans.org>
Date: Mon, 8 Jun 2009 12:18:48 +0000
Subject: net: add IEEE 802.15.4 socket family implementation

Add support for communication over IEEE 802.15.4 networks. This implementation
is neither certified nor complete, but aims to that goal. This commit contains
only the socket interface for communication over IEEE 802.15.4 networks.
One can either send RAW datagrams or use SOCK_DGRAM to encapsulate data
inside normal IEEE 802.15.4 packets.

Configuration interface, drivers and software MAC 802.15.4 implementation will
follow.

Initial implementation was done by Maxim Gorbachyov, Maxim Osipov and Pavel
Smolensky as a research project at Siemens AG. Later the stack was heavily
reworked to better suit the linux networking model, and is now maitained
as an open project partially sponsored by Siemens.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Sergey Lapin <slapin@ossfans.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154/af_ieee802154.h |  60 +++++
 include/net/ieee802154/mac_def.h       | 160 +++++++++++++
 include/net/ieee802154/netdevice.h     | 115 ++++++++++
 net/Kconfig                            |   1 +
 net/Makefile                           |   1 +
 net/ieee802154/Kconfig                 |  12 +
 net/ieee802154/Makefile                |   4 +
 net/ieee802154/af802154.h              |  36 +++
 net/ieee802154/af_ieee802154.c         | 372 +++++++++++++++++++++++++++++++
 net/ieee802154/dgram.c                 | 394 +++++++++++++++++++++++++++++++++
 net/ieee802154/raw.c                   | 254 +++++++++++++++++++++
 11 files changed, 1409 insertions(+)
 create mode 100644 include/net/ieee802154/af_ieee802154.h
 create mode 100644 include/net/ieee802154/mac_def.h
 create mode 100644 include/net/ieee802154/netdevice.h
 create mode 100644 net/ieee802154/Kconfig
 create mode 100644 net/ieee802154/Makefile
 create mode 100644 net/ieee802154/af802154.h
 create mode 100644 net/ieee802154/af_ieee802154.c
 create mode 100644 net/ieee802154/dgram.c
 create mode 100644 net/ieee802154/raw.c

(limited to 'include')

diff --git a/include/net/ieee802154/af_ieee802154.h b/include/net/ieee802154/af_ieee802154.h
new file mode 100644
index 00000000000..0d78605fb1a
--- /dev/null
+++ b/include/net/ieee802154/af_ieee802154.h
@@ -0,0 +1,60 @@
+/*
+ * IEEE 802.15.4 inteface for userspace
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef _AF_IEEE802154_H
+#define _AF_IEEE802154_H
+
+#include <linux/socket.h> /* for sa_family_t */
+
+enum {
+	IEEE802154_ADDR_NONE = 0x0,
+	/* RESERVED = 0x01, */
+	IEEE802154_ADDR_SHORT = 0x2, /* 16-bit address + PANid */
+	IEEE802154_ADDR_LONG = 0x3, /* 64-bit address + PANid */
+};
+
+/* address length, octets */
+#define IEEE802154_ADDR_LEN	8
+
+struct ieee802154_addr {
+	int addr_type;
+	u16 pan_id;
+	union {
+		u8 hwaddr[IEEE802154_ADDR_LEN];
+		u16 short_addr;
+	};
+};
+
+#define IEEE802154_PANID_BROADCAST	0xffff
+#define IEEE802154_ADDR_BROADCAST	0xffff
+#define IEEE802154_ADDR_UNDEF		0xfffe
+
+struct sockaddr_ieee802154 {
+	sa_family_t family; /* AF_IEEE802154 */
+	struct ieee802154_addr addr;
+};
+
+/* master device */
+#define IEEE802154_SIOC_ADD_SLAVE		(SIOCDEVPRIVATE + 0)
+
+#endif
diff --git a/include/net/ieee802154/mac_def.h b/include/net/ieee802154/mac_def.h
new file mode 100644
index 00000000000..8cb68463565
--- /dev/null
+++ b/include/net/ieee802154/mac_def.h
@@ -0,0 +1,160 @@
+/*
+ * IEEE802.15.4-2003 specification
+ *
+ * Copyright (C) 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef IEEE802154_MAC_DEF_H
+#define IEEE802154_MAC_DEF_H
+
+#define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
+#define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
+#define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
+#define IEEE802154_FC_TYPE_MAC_CMD	0x3	/* Frame is MAC command */
+
+#define IEEE802154_FC_TYPE_SHIFT		0
+#define IEEE802154_FC_TYPE_MASK		((1 << 3) - 1)
+#define IEEE802154_FC_TYPE(x)		((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT)
+#define IEEE802154_FC_SET_TYPE(v, x)	do {	\
+	v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \
+	    (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \
+	} while (0)
+
+#define IEEE802154_FC_SECEN		(1 << 3)
+#define IEEE802154_FC_FRPEND		(1 << 4)
+#define IEEE802154_FC_ACK_REQ		(1 << 5)
+#define IEEE802154_FC_INTRA_PAN		(1 << 6)
+
+#define IEEE802154_FC_SAMODE_SHIFT	14
+#define IEEE802154_FC_SAMODE_MASK	(3 << IEEE802154_FC_SAMODE_SHIFT)
+#define IEEE802154_FC_DAMODE_SHIFT	10
+#define IEEE802154_FC_DAMODE_MASK	(3 << IEEE802154_FC_DAMODE_SHIFT)
+
+#define IEEE802154_FC_SAMODE(x)		\
+	(((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT)
+
+#define IEEE802154_FC_DAMODE(x)		\
+	(((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT)
+
+
+/* MAC's Command Frames Identifiers */
+#define IEEE802154_CMD_ASSOCIATION_REQ		0x01
+#define IEEE802154_CMD_ASSOCIATION_RESP		0x02
+#define IEEE802154_CMD_DISASSOCIATION_NOTIFY	0x03
+#define IEEE802154_CMD_DATA_REQ			0x04
+#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY	0x05
+#define IEEE802154_CMD_ORPHAN_NOTIFY		0x06
+#define IEEE802154_CMD_BEACON_REQ		0x07
+#define IEEE802154_CMD_COORD_REALIGN_NOTIFY	0x08
+#define IEEE802154_CMD_GTS_REQ			0x09
+
+/*
+ * The return values of MAC operations
+ */
+enum {
+	/*
+	 * The requested operation was completed successfully.
+	 * For a transmission request, this value indicates
+	 * a successful transmission.
+	 */
+	IEEE802154_SUCCESS = 0x0,
+
+	/* The beacon was lost following a synchronization request. */
+	IEEE802154_BEACON_LOSS = 0xe0,
+	/*
+	 * A transmission could not take place due to activity on the
+	 * channel, i.e., the CSMA-CA mechanism has failed.
+	 */
+	IEEE802154_CHNL_ACCESS_FAIL = 0xe1,
+	/* The GTS request has been denied by the PAN coordinator. */
+	IEEE802154_DENINED = 0xe2,
+	/* The attempt to disable the transceiver has failed. */
+	IEEE802154_DISABLE_TRX_FAIL = 0xe3,
+	/*
+	 * The received frame induces a failed security check according to
+	 * the security suite.
+	 */
+	IEEE802154_FAILED_SECURITY_CHECK = 0xe4,
+	/*
+	 * The frame resulting from secure processing has a length that is
+	 * greater than aMACMaxFrameSize.
+	 */
+	IEEE802154_FRAME_TOO_LONG = 0xe5,
+	/*
+	 * The requested GTS transmission failed because the specified GTS
+	 * either did not have a transmit GTS direction or was not defined.
+	 */
+	IEEE802154_INVALID_GTS = 0xe6,
+	/*
+	 * A request to purge an MSDU from the transaction queue was made using
+	 * an MSDU handle that was not found in the transaction table.
+	 */
+	IEEE802154_INVALID_HANDLE = 0xe7,
+	/* A parameter in the primitive is out of the valid range.*/
+	IEEE802154_INVALID_PARAMETER = 0xe8,
+	/* No acknowledgment was received after aMaxFrameRetries. */
+	IEEE802154_NO_ACK = 0xe9,
+	/* A scan operation failed to find any network beacons.*/
+	IEEE802154_NO_BEACON = 0xea,
+	/* No response data were available following a request. */
+	IEEE802154_NO_DATA = 0xeb,
+	/* The operation failed because a short address was not allocated. */
+	IEEE802154_NO_SHORT_ADDRESS = 0xec,
+	/*
+	 * A receiver enable request was unsuccessful because it could not be
+	 * completed within the CAP.
+	 */
+	IEEE802154_OUT_OF_CAP = 0xed,
+	/*
+	 * A PAN identifier conflict has been detected and communicated to the
+	 * PAN coordinator.
+	 */
+	IEEE802154_PANID_CONFLICT = 0xee,
+	/* A coordinator realignment command has been received. */
+	IEEE802154_REALIGMENT = 0xef,
+	/* The transaction has expired and its information discarded. */
+	IEEE802154_TRANSACTION_EXPIRED = 0xf0,
+	/* There is no capacity to store the transaction. */
+	IEEE802154_TRANSACTION_OVERFLOW = 0xf1,
+	/*
+	 * The transceiver was in the transmitter enabled state when the
+	 * receiver was requested to be enabled.
+	 */
+	IEEE802154_TX_ACTIVE = 0xf2,
+	/* The appropriate key is not available in the ACL. */
+	IEEE802154_UNAVAILABLE_KEY = 0xf3,
+	/*
+	 * A SET/GET request was issued with the identifier of a PIB attribute
+	 * that is not supported.
+	 */
+	IEEE802154_UNSUPPORTED_ATTR = 0xf4,
+	/*
+	 * A request to perform a scan operation failed because the MLME was
+	 * in the process of performing a previously initiated scan operation.
+	 */
+	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
+};
+
+
+#endif
+
+
diff --git a/include/net/ieee802154/netdevice.h b/include/net/ieee802154/netdevice.h
new file mode 100644
index 00000000000..e2506af3e7c
--- /dev/null
+++ b/include/net/ieee802154/netdevice.h
@@ -0,0 +1,115 @@
+/*
+ * An interface between IEEE802.15.4 device and rest of the kernel.
+ *
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef IEEE802154_NETDEVICE_H
+#define IEEE802154_NETDEVICE_H
+
+/*
+ * A control block of skb passed between the ARPHRD_IEEE802154 device
+ * and other stack parts.
+ */
+struct ieee802154_mac_cb {
+	u8 lqi;
+	struct ieee802154_addr sa;
+	struct ieee802154_addr da;
+	u8 flags;
+	u8 seq;
+};
+
+static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb)
+{
+	return (struct ieee802154_mac_cb *)skb->cb;
+}
+
+#define MAC_CB_FLAG_TYPEMASK		((1 << 3) - 1)
+
+#define MAC_CB_FLAG_ACKREQ		(1 << 3)
+#define MAC_CB_FLAG_SECEN		(1 << 4)
+#define MAC_CB_FLAG_INTRAPAN		(1 << 5)
+
+static inline int mac_cb_is_ackreq(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ;
+}
+
+static inline int mac_cb_is_secen(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN;
+}
+
+static inline int mac_cb_is_intrapan(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_INTRAPAN;
+}
+
+static inline int mac_cb_type(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK;
+}
+
+#define IEEE802154_MAC_SCAN_ED		0
+#define IEEE802154_MAC_SCAN_ACTIVE	1
+#define IEEE802154_MAC_SCAN_PASSIVE	2
+#define IEEE802154_MAC_SCAN_ORPHAN	3
+
+/*
+ * This should be located at net_device->ml_priv
+ */
+struct ieee802154_mlme_ops {
+	int (*assoc_req)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u8 channel, u8 cap);
+	int (*assoc_resp)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u16 short_addr, u8 status);
+	int (*disassoc_req)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u8 reason);
+	int (*start_req)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u8 channel, u8 bcn_ord, u8 sf_ord,
+			u8 pan_coord, u8 blx, u8 coord_realign);
+	int (*scan_req)(struct net_device *dev,
+			u8 type, u32 channels, u8 duration);
+
+	/*
+	 * FIXME: these should become the part of PIB/MIB interface.
+	 * However we still don't have IB interface of any kind
+	 */
+	u16 (*get_pan_id)(struct net_device *dev);
+	u16 (*get_short_addr)(struct net_device *dev);
+	u8 (*get_dsn)(struct net_device *dev);
+	u8 (*get_bsn)(struct net_device *dev);
+};
+
+static inline struct ieee802154_mlme_ops *ieee802154_mlme_ops(
+		struct net_device *dev)
+{
+	return dev->ml_priv;
+}
+
+#endif
+
+
diff --git a/net/Kconfig b/net/Kconfig
index c19f549c8e7..7051b971067 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -179,6 +179,7 @@ source "net/lapb/Kconfig"
 source "net/econet/Kconfig"
 source "net/wanrouter/Kconfig"
 source "net/phonet/Kconfig"
+source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 
diff --git a/net/Makefile b/net/Makefile
index 9e00a55a901..ba324aefda7 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -60,6 +60,7 @@ obj-$(CONFIG_NET_9P)		+= 9p/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
+obj-y				+= ieee802154/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
new file mode 100644
index 00000000000..1c1de97d264
--- /dev/null
+++ b/net/ieee802154/Kconfig
@@ -0,0 +1,12 @@
+config IEEE802154
+	tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  IEEE Std 802.15.4 defines a low data rate, low power and low
+	  complexity short range wireless personal area networks. It was
+	  designed to organise networks of sensors, switches, etc automation
+	  devices. Maximum allowed data rate is 250 kb/s and typical personal
+	  operating space around 10m.
+
+	  Say Y here to compile LR-WPAN support into the kernel or say M to
+	  compile it as modules.
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
new file mode 100644
index 00000000000..7451c7cc8c8
--- /dev/null
+++ b/net/ieee802154/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_IEEE802154) +=	af_802154.o
+af_802154-y		:= af_ieee802154.o raw.o dgram.o
+
+ccflags-y += -Wall -DDEBUG
diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h
new file mode 100644
index 00000000000..b1ec5253752
--- /dev/null
+++ b/net/ieee802154/af802154.h
@@ -0,0 +1,36 @@
+/*
+ * Internal interfaces for ieee 802.15.4 address family.
+ *
+ * Copyright 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef AF802154_H
+#define AF802154_H
+
+struct sk_buff;
+struct net_devce;
+extern struct proto ieee802154_raw_prot;
+extern struct proto ieee802154_dgram_prot;
+void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb);
+int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb);
+struct net_device *ieee802154_get_dev(struct net *net,
+		struct ieee802154_addr *addr);
+
+#endif
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
new file mode 100644
index 00000000000..882a927cefa
--- /dev/null
+++ b/net/ieee802154/af_ieee802154.c
@@ -0,0 +1,372 @@
+/*
+ * IEEE802154.4 socket interface
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ */
+
+#include <linux/net.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/if.h>
+#include <linux/termios.h>	/* For TIOCOUTQ/INQ */
+#include <linux/list.h>
+#include <net/datalink.h>
+#include <net/psnap.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/route.h>
+
+#include <net/ieee802154/af_ieee802154.h>
+#include <net/ieee802154/netdevice.h>
+
+#include "af802154.h"
+
+#define DBG_DUMP(data, len) { \
+	int i; \
+	pr_debug("function: %s: data: len %d:\n", __func__, len); \
+	for (i = 0; i < len; i++) {\
+		pr_debug("%02x: %02x\n", i, (data)[i]); \
+	} \
+}
+
+/*
+ * Utility function for families
+ */
+struct net_device *ieee802154_get_dev(struct net *net,
+		struct ieee802154_addr *addr)
+{
+	struct net_device *dev = NULL;
+	struct net_device *tmp;
+	u16 pan_id, short_addr;
+
+	switch (addr->addr_type) {
+	case IEEE802154_ADDR_LONG:
+		rtnl_lock();
+		dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr);
+		if (dev)
+			dev_hold(dev);
+		rtnl_unlock();
+		break;
+	case IEEE802154_ADDR_SHORT:
+		if (addr->pan_id == 0xffff ||
+		    addr->short_addr == IEEE802154_ADDR_UNDEF ||
+		    addr->short_addr == 0xffff)
+			break;
+
+		rtnl_lock();
+
+		for_each_netdev(net, tmp) {
+			if (tmp->type != ARPHRD_IEEE802154)
+				continue;
+
+			pan_id = ieee802154_mlme_ops(tmp)->get_pan_id(tmp);
+			short_addr =
+				ieee802154_mlme_ops(tmp)->get_short_addr(tmp);
+
+			if (pan_id == addr->pan_id &&
+			    short_addr == addr->short_addr) {
+				dev = tmp;
+				dev_hold(dev);
+				break;
+			}
+		}
+
+		rtnl_unlock();
+		break;
+	default:
+		pr_warning("Unsupported ieee802154 address type: %d\n",
+				addr->addr_type);
+		break;
+	}
+
+	return dev;
+}
+
+static int ieee802154_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk) {
+		sock->sk = NULL;
+		sk->sk_prot->close(sk, 0);
+	}
+	return 0;
+}
+static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+		struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+
+	return sk->sk_prot->sendmsg(iocb, sk, msg, len);
+}
+
+static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr,
+		int addr_len)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_prot->bind)
+		return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+	return sock_no_bind(sock, uaddr, addr_len);
+}
+
+static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr,
+			int addr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+
+	if (uaddr->sa_family == AF_UNSPEC)
+		return sk->sk_prot->disconnect(sk, flags);
+
+	return sk->sk_prot->connect(sk, uaddr, addr_len);
+}
+
+static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
+		unsigned int cmd)
+{
+	struct ifreq ifr;
+	int ret = -EINVAL;
+	struct net_device *dev;
+
+	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+		return -EFAULT;
+
+	ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+	dev_load(sock_net(sk), ifr.ifr_name);
+	dev = dev_get_by_name(sock_net(sk), ifr.ifr_name);
+	if (dev->type == ARPHRD_IEEE802154 ||
+	    dev->type == ARPHRD_IEEE802154_PHY)
+		ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
+
+	if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+		ret = -EFAULT;
+	dev_put(dev);
+
+	return ret;
+}
+
+static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
+		unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+
+	switch (cmd) {
+	case SIOCGSTAMP:
+		return sock_get_timestamp(sk, (struct timeval __user *)arg);
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *)arg);
+	case SIOCGIFADDR:
+	case SIOCSIFADDR:
+		return ieee802154_dev_ioctl(sk, (struct ifreq __user *)arg,
+				cmd);
+	default:
+		if (!sk->sk_prot->ioctl)
+			return -ENOIOCTLCMD;
+		return sk->sk_prot->ioctl(sk, cmd, arg);
+	}
+}
+
+static const struct proto_ops ieee802154_raw_ops = {
+	.family		   = PF_IEEE802154,
+	.owner		   = THIS_MODULE,
+	.release	   = ieee802154_sock_release,
+	.bind		   = ieee802154_sock_bind,
+	.connect	   = ieee802154_sock_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = sock_no_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = ieee802154_sock_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = sock_no_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = ieee802154_sock_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static const struct proto_ops ieee802154_dgram_ops = {
+	.family		   = PF_IEEE802154,
+	.owner		   = THIS_MODULE,
+	.release	   = ieee802154_sock_release,
+	.bind		   = ieee802154_sock_bind,
+	.connect	   = ieee802154_sock_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = sock_no_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = ieee802154_sock_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = sock_no_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = ieee802154_sock_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+
+/*
+ * Create a socket. Initialise the socket, blank the addresses
+ * set the state.
+ */
+static int ieee802154_create(struct net *net, struct socket *sock,
+		int protocol)
+{
+	struct sock *sk;
+	int rc;
+	struct proto *proto;
+	const struct proto_ops *ops;
+
+	if (net != &init_net)
+		return -EAFNOSUPPORT;
+
+	switch (sock->type) {
+	case SOCK_RAW:
+		proto = &ieee802154_raw_prot;
+		ops = &ieee802154_raw_ops;
+		break;
+	case SOCK_DGRAM:
+		proto = &ieee802154_dgram_prot;
+		ops = &ieee802154_dgram_ops;
+		break;
+	default:
+		rc = -ESOCKTNOSUPPORT;
+		goto out;
+	}
+
+	rc = -ENOMEM;
+	sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto);
+	if (!sk)
+		goto out;
+	rc = 0;
+
+	sock->ops = ops;
+
+	sock_init_data(sock, sk);
+	/* FIXME: sk->sk_destruct */
+	sk->sk_family = PF_IEEE802154;
+
+	/* Checksums on by default */
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (sk->sk_prot->hash)
+		sk->sk_prot->hash(sk);
+
+	if (sk->sk_prot->init) {
+		rc = sk->sk_prot->init(sk);
+		if (rc)
+			sk_common_release(sk);
+	}
+out:
+	return rc;
+}
+
+static struct net_proto_family ieee802154_family_ops = {
+	.family		= PF_IEEE802154,
+	.create		= ieee802154_create,
+	.owner		= THIS_MODULE,
+};
+
+static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev,
+	struct packet_type *pt, struct net_device *orig_dev)
+{
+	DBG_DUMP(skb->data, skb->len);
+	if (!netif_running(dev))
+		return -ENODEV;
+	pr_debug("got frame, type %d, dev %p\n", dev->type, dev);
+
+	if (!net_eq(dev_net(dev), &init_net))
+		goto drop;
+
+	ieee802154_raw_deliver(dev, skb);
+
+	if (dev->type != ARPHRD_IEEE802154)
+		goto drop;
+
+	if (skb->pkt_type != PACKET_OTHERHOST)
+		return ieee802154_dgram_deliver(dev, skb);
+
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+
+static struct packet_type ieee802154_packet_type = {
+	.type = __constant_htons(ETH_P_IEEE802154),
+	.func = ieee802154_rcv,
+};
+
+static int __init af_ieee802154_init(void)
+{
+	int rc = -EINVAL;
+
+	rc = proto_register(&ieee802154_raw_prot, 1);
+	if (rc)
+		goto out;
+
+	rc = proto_register(&ieee802154_dgram_prot, 1);
+	if (rc)
+		goto err_dgram;
+
+	/* Tell SOCKET that we are alive */
+	rc = sock_register(&ieee802154_family_ops);
+	if (rc)
+		goto err_sock;
+	dev_add_pack(&ieee802154_packet_type);
+
+	rc = 0;
+	goto out;
+
+err_sock:
+	proto_unregister(&ieee802154_dgram_prot);
+err_dgram:
+	proto_unregister(&ieee802154_raw_prot);
+out:
+	return rc;
+}
+static void __exit af_ieee802154_remove(void)
+{
+	dev_remove_pack(&ieee802154_packet_type);
+	sock_unregister(PF_IEEE802154);
+	proto_unregister(&ieee802154_dgram_prot);
+	proto_unregister(&ieee802154_raw_prot);
+}
+
+module_init(af_ieee802154_init);
+module_exit(af_ieee802154_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_IEEE802154);
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
new file mode 100644
index 00000000000..1f5ea11c2fd
--- /dev/null
+++ b/net/ieee802154/dgram.c
@@ -0,0 +1,394 @@
+/*
+ * ZigBee socket interface
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#include <linux/net.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/ieee802154/af_ieee802154.h>
+#include <net/ieee802154/mac_def.h>
+#include <net/ieee802154/netdevice.h>
+
+#include <asm/ioctls.h>
+
+#include "af802154.h"
+
+static HLIST_HEAD(dgram_head);
+static DEFINE_RWLOCK(dgram_lock);
+
+struct dgram_sock {
+	struct sock sk;
+
+	int bound;
+	struct ieee802154_addr src_addr;
+	struct ieee802154_addr dst_addr;
+};
+
+static inline struct dgram_sock *dgram_sk(const struct sock *sk)
+{
+	return container_of(sk, struct dgram_sock, sk);
+}
+
+
+static void dgram_hash(struct sock *sk)
+{
+	write_lock_bh(&dgram_lock);
+	sk_add_node(sk, &dgram_head);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	write_unlock_bh(&dgram_lock);
+}
+
+static void dgram_unhash(struct sock *sk)
+{
+	write_lock_bh(&dgram_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	write_unlock_bh(&dgram_lock);
+}
+
+static int dgram_init(struct sock *sk)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
+	ro->dst_addr.pan_id = 0xffff;
+	memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
+	return 0;
+}
+
+static void dgram_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err = 0;
+	struct net_device *dev;
+
+	ro->bound = 0;
+
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
+	if (addr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	dev = ieee802154_get_dev(sock_net(sk), &addr->addr);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (dev->type != ARPHRD_IEEE802154) {
+		err = -ENODEV;
+		goto out_put;
+	}
+
+	memcpy(&ro->src_addr, &addr->addr, sizeof(struct ieee802154_addr));
+
+	ro->bound = 1;
+out_put:
+	dev_put(dev);
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case SIOCOUTQ:
+	{
+		int amount = atomic_read(&sk->sk_wmem_alloc);
+		return put_user(amount, (int __user *)arg);
+	}
+
+	case SIOCINQ:
+	{
+		struct sk_buff *skb;
+		unsigned long amount;
+
+		amount = 0;
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb != NULL) {
+			/*
+			 * We will only return the amount
+			 * of this packet since that is all
+			 * that will be read.
+			 */
+			/* FIXME: parse the header for more correct value */
+			amount = skb->len - (3+8+8);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return put_user(amount, (int __user *)arg);
+	}
+
+	}
+	return -ENOIOCTLCMD;
+}
+
+/* FIXME: autobind */
+static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
+			int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err = 0;
+
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
+	if (addr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!ro->bound) {
+		err = -ENETUNREACH;
+		goto out;
+	}
+
+	memcpy(&ro->dst_addr, &addr->addr, sizeof(struct ieee802154_addr));
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int dgram_disconnect(struct sock *sk, int flags)
+{
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	lock_sock(sk);
+
+	ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
+	memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
+
+	release_sock(sk);
+
+	return 0;
+}
+
+static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
+		struct msghdr *msg, size_t size)
+{
+	struct net_device *dev;
+	unsigned mtu;
+	struct sk_buff *skb;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int err;
+
+	if (msg->msg_flags & MSG_OOB) {
+		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
+		return -EOPNOTSUPP;
+	}
+
+	if (!ro->bound)
+		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+	else
+		dev = ieee802154_get_dev(sock_net(sk), &ro->src_addr);
+
+	if (!dev) {
+		pr_debug("no dev\n");
+		err = -ENXIO;
+		goto out;
+	}
+	mtu = dev->mtu;
+	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
+
+	skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size,
+			msg->msg_flags & MSG_DONTWAIT,
+			&err);
+	if (!skb)
+		goto out_dev;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+
+	skb_reset_network_header(skb);
+
+	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA | MAC_CB_FLAG_ACKREQ;
+	mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr,
+			ro->bound ? &ro->src_addr : NULL, size);
+	if (err < 0)
+		goto out_skb;
+
+	skb_reset_mac_header(skb);
+
+	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+	if (err < 0)
+		goto out_skb;
+
+	if (size > mtu) {
+		pr_debug("size = %u, mtu = %u\n", size, mtu);
+		err = -EINVAL;
+		goto out_skb;
+	}
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	skb->protocol = htons(ETH_P_IEEE802154);
+
+	dev_put(dev);
+
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	return err ?: size;
+
+out_skb:
+	kfree_skb(skb);
+out_dev:
+	dev_put(dev);
+out:
+	return err;
+}
+
+static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
+		struct msghdr *msg, size_t len, int noblock, int flags,
+		int *addr_len)
+{
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sk_buff *skb;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	/* FIXME: skip headers if necessary ?! */
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err)
+		return err;
+	return copied;
+}
+
+static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		atomic_inc(&sk->sk_drops);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id,
+		u16 short_addr, struct dgram_sock *ro)
+{
+	if (!ro->bound)
+		return 1;
+
+	if (ro->src_addr.addr_type == IEEE802154_ADDR_LONG &&
+	    !memcmp(ro->src_addr.hwaddr, hw_addr, IEEE802154_ADDR_LEN))
+		return 1;
+
+	if (ro->src_addr.addr_type == IEEE802154_ADDR_SHORT &&
+		     pan_id == ro->src_addr.pan_id &&
+		     short_addr == ro->src_addr.short_addr)
+		return 1;
+
+	return 0;
+}
+
+int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk, *prev = NULL;
+	struct hlist_node *node;
+	int ret = NET_RX_SUCCESS;
+	u16 pan_id, short_addr;
+
+	/* Data frame processing */
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
+
+	read_lock(&dgram_lock);
+	sk_for_each(sk, node, &dgram_head) {
+		if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr,
+					dgram_sk(sk))) {
+			if (prev) {
+				struct sk_buff *clone;
+				clone = skb_clone(skb, GFP_ATOMIC);
+				if (clone)
+					dgram_rcv_skb(prev, clone);
+			}
+
+			prev = sk;
+		}
+	}
+
+	if (prev)
+		dgram_rcv_skb(prev, skb);
+	else {
+		kfree_skb(skb);
+		ret = NET_RX_DROP;
+	}
+	read_unlock(&dgram_lock);
+
+	return ret;
+}
+
+struct proto ieee802154_dgram_prot = {
+	.name		= "IEEE-802.15.4-MAC",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct dgram_sock),
+	.init		= dgram_init,
+	.close		= dgram_close,
+	.bind		= dgram_bind,
+	.sendmsg	= dgram_sendmsg,
+	.recvmsg	= dgram_recvmsg,
+	.hash		= dgram_hash,
+	.unhash		= dgram_unhash,
+	.connect	= dgram_connect,
+	.disconnect	= dgram_disconnect,
+	.ioctl		= dgram_ioctl,
+};
+
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
new file mode 100644
index 00000000000..8b892e06049
--- /dev/null
+++ b/net/ieee802154/raw.c
@@ -0,0 +1,254 @@
+/*
+ * Raw IEEE 802.15.4 sockets
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#include <linux/net.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/ieee802154/af_ieee802154.h>
+
+#include "af802154.h"
+
+static HLIST_HEAD(raw_head);
+static DEFINE_RWLOCK(raw_lock);
+
+static void raw_hash(struct sock *sk)
+{
+	write_lock_bh(&raw_lock);
+	sk_add_node(sk, &raw_head);
+	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	write_unlock_bh(&raw_lock);
+}
+
+static void raw_unhash(struct sock *sk)
+{
+	write_lock_bh(&raw_lock);
+	if (sk_del_node_init(sk))
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+	write_unlock_bh(&raw_lock);
+}
+
+static void raw_close(struct sock *sk, long timeout)
+{
+	sk_common_release(sk);
+}
+
+static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr;
+	int err = 0;
+	struct net_device *dev = NULL;
+
+	if (len < sizeof(*addr))
+		return -EINVAL;
+
+	if (addr->family != AF_IEEE802154)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	dev = ieee802154_get_dev(sock_net(sk), &addr->addr);
+	if (!dev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (dev->type != ARPHRD_IEEE802154_PHY &&
+	    dev->type != ARPHRD_IEEE802154) {
+		err = -ENODEV;
+		goto out_put;
+	}
+
+	sk->sk_bound_dev_if = dev->ifindex;
+	sk_dst_reset(sk);
+
+out_put:
+	dev_put(dev);
+out:
+	release_sock(sk);
+
+	return err;
+}
+
+static int raw_connect(struct sock *sk, struct sockaddr *uaddr,
+			int addr_len)
+{
+	return -ENOTSUPP;
+}
+
+static int raw_disconnect(struct sock *sk, int flags)
+{
+	return 0;
+}
+
+static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t size)
+{
+	struct net_device *dev;
+	unsigned mtu;
+	struct sk_buff *skb;
+	int err;
+
+	if (msg->msg_flags & MSG_OOB) {
+		pr_debug("msg->msg_flags = 0x%x\n", msg->msg_flags);
+		return -EOPNOTSUPP;
+	}
+
+	lock_sock(sk);
+	if (!sk->sk_bound_dev_if)
+		dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
+	else
+		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
+	release_sock(sk);
+
+	if (!dev) {
+		pr_debug("no dev\n");
+		err = -ENXIO;
+		goto out;
+	}
+
+	mtu = dev->mtu;
+	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
+
+	if (size > mtu) {
+		pr_debug("size = %u, mtu = %u\n", size, mtu);
+		err = -EINVAL;
+		goto out_dev;
+	}
+
+	skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto out_dev;
+
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+	if (err < 0)
+		goto out_skb;
+
+	skb->dev = dev;
+	skb->sk  = sk;
+	skb->protocol = htons(ETH_P_IEEE802154);
+
+	dev_put(dev);
+
+	err = dev_queue_xmit(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	return err ?: size;
+
+out_skb:
+	kfree_skb(skb);
+out_dev:
+	dev_put(dev);
+out:
+	return err;
+}
+
+static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int noblock, int flags, int *addr_len)
+{
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sk_buff *skb;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err)
+		return err;
+	return copied;
+}
+
+static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (sock_queue_rcv_skb(sk, skb) < 0) {
+		atomic_inc(&sk->sk_drops);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+
+void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	read_lock(&raw_lock);
+	sk_for_each(sk, node, &raw_head) {
+		bh_lock_sock(sk);
+		if (!sk->sk_bound_dev_if ||
+		    sk->sk_bound_dev_if == dev->ifindex) {
+
+			struct sk_buff *clone;
+
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (clone)
+				raw_rcv_skb(sk, clone);
+		}
+		bh_unlock_sock(sk);
+	}
+	read_unlock(&raw_lock);
+}
+
+struct proto ieee802154_raw_prot = {
+	.name		= "IEEE-802.15.4-RAW",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct sock),
+	.close		= raw_close,
+	.bind		= raw_bind,
+	.sendmsg	= raw_sendmsg,
+	.recvmsg	= raw_recvmsg,
+	.hash		= raw_hash,
+	.unhash		= raw_unhash,
+	.connect	= raw_connect,
+	.disconnect	= raw_disconnect,
+};
+
-- 
cgit v1.2.3-70-g09d2


From 2c21d11518b688cd4c8e7ddfcd4ba41482ad075b Mon Sep 17 00:00:00 2001
From: Sergey Lapin <slapin@ossfans.org>
Date: Mon, 8 Jun 2009 12:18:49 +0000
Subject: net: add NL802154 interface for configuration of 802.15.4 devices

Add a netlink interface for configuration of IEEE 802.15.4 device. Also this
interface specifies events notification sent by devices towards higher layers.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: Sergey Lapin <slapin@ossfans.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/nl802154.h          | 119 +++++++++
 include/net/ieee802154/nl802154.h |  41 +++
 net/ieee802154/Makefile           |   3 +-
 net/ieee802154/netlink.c          | 523 ++++++++++++++++++++++++++++++++++++++
 net/ieee802154/nl_policy.c        |  52 ++++
 5 files changed, 737 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/nl802154.h
 create mode 100644 include/net/ieee802154/nl802154.h
 create mode 100644 net/ieee802154/netlink.c
 create mode 100644 net/ieee802154/nl_policy.c

(limited to 'include')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
new file mode 100644
index 00000000000..2cda00ccfcc
--- /dev/null
+++ b/include/linux/nl802154.h
@@ -0,0 +1,119 @@
+/*
+ * nl802154.h
+ *
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef NL802154_H
+#define NL802154_H
+
+#define IEEE802154_NL_NAME "802.15.4 MAC"
+#define IEEE802154_MCAST_COORD_NAME "coordinator"
+#define IEEE802154_MCAST_BEACON_NAME "beacon"
+
+enum {
+	__IEEE802154_ATTR_INVALID,
+
+	IEEE802154_ATTR_DEV_NAME,
+	IEEE802154_ATTR_DEV_INDEX,
+
+	IEEE802154_ATTR_STATUS,
+
+	IEEE802154_ATTR_SHORT_ADDR,
+	IEEE802154_ATTR_HW_ADDR,
+	IEEE802154_ATTR_PAN_ID,
+
+	IEEE802154_ATTR_CHANNEL,
+
+	IEEE802154_ATTR_COORD_SHORT_ADDR,
+	IEEE802154_ATTR_COORD_HW_ADDR,
+	IEEE802154_ATTR_COORD_PAN_ID,
+
+	IEEE802154_ATTR_SRC_SHORT_ADDR,
+	IEEE802154_ATTR_SRC_HW_ADDR,
+	IEEE802154_ATTR_SRC_PAN_ID,
+
+	IEEE802154_ATTR_DEST_SHORT_ADDR,
+	IEEE802154_ATTR_DEST_HW_ADDR,
+	IEEE802154_ATTR_DEST_PAN_ID,
+
+	IEEE802154_ATTR_CAPABILITY,
+	IEEE802154_ATTR_REASON,
+	IEEE802154_ATTR_SCAN_TYPE,
+	IEEE802154_ATTR_CHANNELS,
+	IEEE802154_ATTR_DURATION,
+	IEEE802154_ATTR_ED_LIST,
+	IEEE802154_ATTR_BCN_ORD,
+	IEEE802154_ATTR_SF_ORD,
+	IEEE802154_ATTR_PAN_COORD,
+	IEEE802154_ATTR_BAT_EXT,
+	IEEE802154_ATTR_COORD_REALIGN,
+	IEEE802154_ATTR_SEC,
+
+	__IEEE802154_ATTR_MAX,
+};
+
+#define IEEE802154_ATTR_MAX (__IEEE802154_ATTR_MAX - 1)
+
+extern struct nla_policy ieee802154_policy[];
+
+/* commands */
+/* REQ should be responded with CONF
+ * and INDIC with RESP
+ */
+enum {
+	__IEEE802154_COMMAND_INVALID,
+
+	IEEE802154_ASSOCIATE_REQ,
+	IEEE802154_ASSOCIATE_CONF,
+	IEEE802154_DISASSOCIATE_REQ,
+	IEEE802154_DISASSOCIATE_CONF,
+	IEEE802154_GET_REQ,
+	IEEE802154_GET_CONF,
+	IEEE802154_RESET_REQ,
+	IEEE802154_RESET_CONF,
+	IEEE802154_SCAN_REQ,
+	IEEE802154_SCAN_CONF,
+	IEEE802154_SET_REQ,
+	IEEE802154_SET_CONF,
+	IEEE802154_START_REQ,
+	IEEE802154_START_CONF,
+	IEEE802154_SYNC_REQ,
+	IEEE802154_POLL_REQ,
+	IEEE802154_POLL_CONF,
+
+	IEEE802154_ASSOCIATE_INDIC,
+	IEEE802154_ASSOCIATE_RESP,
+	IEEE802154_DISASSOCIATE_INDIC,
+	IEEE802154_BEACON_NOTIFY_INDIC,
+	IEEE802154_ORPHAN_INDIC,
+	IEEE802154_ORPHAN_RESP,
+	IEEE802154_COMM_STATUS_INDIC,
+	IEEE802154_SYNC_LOSS_INDIC,
+
+	IEEE802154_GTS_REQ, /* Not supported yet */
+	IEEE802154_GTS_INDIC, /* Not supported yet */
+	IEEE802154_GTS_CONF, /* Not supported yet */
+	IEEE802154_RX_ENABLE_REQ, /* Not supported yet */
+	IEEE802154_RX_ENABLE_CONF, /* Not supported yet */
+
+	__IEEE802154_CMD_MAX,
+};
+
+#define IEEE802154_CMD_MAX (__IEEE802154_CMD_MAX - 1)
+
+#endif
diff --git a/include/net/ieee802154/nl802154.h b/include/net/ieee802154/nl802154.h
new file mode 100644
index 00000000000..78efcdf52b5
--- /dev/null
+++ b/include/net/ieee802154/nl802154.h
@@ -0,0 +1,41 @@
+/*
+ * nl802154.h
+ *
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef IEEE802154_NL_H
+#define IEEE802154_NL_H
+
+struct net_device;
+struct ieee802154_addr;
+
+int ieee802154_nl_assoc_indic(struct net_device *dev,
+		struct ieee802154_addr *addr, u8 cap);
+int ieee802154_nl_assoc_confirm(struct net_device *dev,
+		u16 short_addr, u8 status);
+int ieee802154_nl_disassoc_indic(struct net_device *dev,
+		struct ieee802154_addr *addr, u8 reason);
+int ieee802154_nl_disassoc_confirm(struct net_device *dev,
+		u8 status);
+int ieee802154_nl_scan_confirm(struct net_device *dev,
+		u8 status, u8 scan_type, u32 unscanned,
+		u8 *edl/*, struct list_head *pan_desc_list */);
+int ieee802154_nl_beacon_indic(struct net_device *dev, u16 panid,
+		u16 coord_addr);
+
+#endif
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 7451c7cc8c8..f99338a2610 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,4 +1,5 @@
-obj-$(CONFIG_IEEE802154) +=	af_802154.o
+obj-$(CONFIG_IEEE802154) +=	nl802154.o af_802154.o
+nl802154-y		:= netlink.o nl_policy.o
 af_802154-y		:= af_ieee802154.o raw.o dgram.o
 
 ccflags-y += -Wall -DDEBUG
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
new file mode 100644
index 00000000000..105ad10876a
--- /dev/null
+++ b/net/ieee802154/netlink.c
@@ -0,0 +1,523 @@
+/*
+ * Netlink inteface for IEEE 802.15.4 stack
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <linux/nl802154.h>
+#include <net/ieee802154/af_ieee802154.h>
+#include <net/ieee802154/nl802154.h>
+#include <net/ieee802154/netdevice.h>
+
+static unsigned int ieee802154_seq_num;
+
+static struct genl_family ieee802154_coordinator_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IEEE802154_NL_NAME,
+	.version	= 1,
+	.maxattr	= IEEE802154_ATTR_MAX,
+};
+
+static struct genl_multicast_group ieee802154_coord_mcgrp = {
+	.name		= IEEE802154_MCAST_COORD_NAME,
+};
+
+static struct genl_multicast_group ieee802154_beacon_mcgrp = {
+	.name		= IEEE802154_MCAST_BEACON_NAME,
+};
+
+/* Requests to userspace */
+static struct sk_buff *ieee802154_nl_create(int flags, u8 req)
+{
+	void *hdr;
+	struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+
+	if (!msg)
+		return NULL;
+
+	hdr = genlmsg_put(msg, 0, ieee802154_seq_num++,
+			&ieee802154_coordinator_family, flags, req);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return NULL;
+	}
+
+	return msg;
+}
+
+static int ieee802154_nl_finish(struct sk_buff *msg)
+{
+	/* XXX: nlh is right at the start of msg */
+	void *hdr = genlmsg_data(NLMSG_DATA(msg->data));
+
+	if (!genlmsg_end(msg, hdr))
+		goto out;
+
+	return genlmsg_multicast(msg, 0, ieee802154_coord_mcgrp.id,
+			GFP_ATOMIC);
+out:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+int ieee802154_nl_assoc_indic(struct net_device *dev,
+		struct ieee802154_addr *addr, u8 cap)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	if (addr->addr_type != IEEE802154_ADDR_LONG) {
+		pr_err("%s: received non-long source address!\n", __func__);
+		return -EINVAL;
+	}
+
+	msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_INDIC);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+
+	NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN,
+			addr->hwaddr);
+
+	NLA_PUT_U8(msg, IEEE802154_ATTR_CAPABILITY, cap);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_assoc_indic);
+
+int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr,
+		u8 status)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_CONF);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+
+	NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr);
+	NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_assoc_confirm);
+
+int ieee802154_nl_disassoc_indic(struct net_device *dev,
+		struct ieee802154_addr *addr, u8 reason)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_INDIC);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+
+	if (addr->addr_type == IEEE802154_ADDR_LONG)
+		NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN,
+				addr->hwaddr);
+	else
+		NLA_PUT_U16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR,
+				addr->short_addr);
+
+	NLA_PUT_U8(msg, IEEE802154_ATTR_REASON, reason);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_disassoc_indic);
+
+int ieee802154_nl_disassoc_confirm(struct net_device *dev, u8 status)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_CONF);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+
+	NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm);
+
+int ieee802154_nl_beacon_indic(struct net_device *dev,
+		u16 panid, u16 coord_addr)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	msg = ieee802154_nl_create(0, IEEE802154_BEACON_NOTIFY_INDIC);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+	NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr);
+	NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_beacon_indic);
+
+int ieee802154_nl_scan_confirm(struct net_device *dev,
+		u8 status, u8 scan_type, u32 unscanned,
+		u8 *edl/* , struct list_head *pan_desc_list */)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	msg = ieee802154_nl_create(0, IEEE802154_SCAN_CONF);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+
+	NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status);
+	NLA_PUT_U8(msg, IEEE802154_ATTR_SCAN_TYPE, scan_type);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_CHANNELS, unscanned);
+
+	if (edl)
+		NLA_PUT(msg, IEEE802154_ATTR_ED_LIST, 27, edl);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_scan_confirm);
+
+/* Requests from userspace */
+static struct net_device *ieee802154_nl_get_dev(struct genl_info *info)
+{
+	struct net_device *dev;
+
+	if (info->attrs[IEEE802154_ATTR_DEV_NAME]) {
+		char name[IFNAMSIZ + 1];
+		nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME],
+				sizeof(name));
+		dev = dev_get_by_name(&init_net, name);
+	} else if (info->attrs[IEEE802154_ATTR_DEV_INDEX])
+		dev = dev_get_by_index(&init_net,
+				nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX]));
+	else
+		return NULL;
+
+	if (dev->type != ARPHRD_IEEE802154) {
+		dev_put(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+static int ieee802154_associate_req(struct sk_buff *skb,
+		struct genl_info *info)
+{
+	struct net_device *dev;
+	struct ieee802154_addr addr;
+	int ret = -EINVAL;
+
+	if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
+	    !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
+	    (!info->attrs[IEEE802154_ATTR_COORD_HW_ADDR] &&
+		!info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]) ||
+	    !info->attrs[IEEE802154_ATTR_CAPABILITY])
+		return -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
+		addr.addr_type = IEEE802154_ADDR_LONG;
+		nla_memcpy(addr.hwaddr,
+				info->attrs[IEEE802154_ATTR_COORD_HW_ADDR],
+				IEEE802154_ADDR_LEN);
+	} else {
+		addr.addr_type = IEEE802154_ADDR_SHORT;
+		addr.short_addr = nla_get_u16(
+				info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]);
+	}
+	addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
+
+	ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr,
+			nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]),
+			nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
+
+	dev_put(dev);
+	return ret;
+}
+
+static int ieee802154_associate_resp(struct sk_buff *skb,
+		struct genl_info *info)
+{
+	struct net_device *dev;
+	struct ieee802154_addr addr;
+	int ret = -EINVAL;
+
+	if (!info->attrs[IEEE802154_ATTR_STATUS] ||
+	    !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] ||
+	    !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR])
+		return -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	addr.addr_type = IEEE802154_ADDR_LONG;
+	nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
+			IEEE802154_ADDR_LEN);
+	addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+
+
+	ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr,
+		nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
+		nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
+
+	dev_put(dev);
+	return ret;
+}
+
+static int ieee802154_disassociate_req(struct sk_buff *skb,
+		struct genl_info *info)
+{
+	struct net_device *dev;
+	struct ieee802154_addr addr;
+	int ret = -EINVAL;
+
+	if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
+		!info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
+	    !info->attrs[IEEE802154_ATTR_REASON])
+		return -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
+		addr.addr_type = IEEE802154_ADDR_LONG;
+		nla_memcpy(addr.hwaddr,
+				info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
+				IEEE802154_ADDR_LEN);
+	} else {
+		addr.addr_type = IEEE802154_ADDR_SHORT;
+		addr.short_addr = nla_get_u16(
+				info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]);
+	}
+	addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
+
+	ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
+			nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
+
+	dev_put(dev);
+	return ret;
+}
+
+/*
+ * PANid, channel, beacon_order = 15, superframe_order = 15,
+ * PAN_coordinator, battery_life_extension = 0,
+ * coord_realignment = 0, security_enable = 0
+*/
+static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device *dev;
+	struct ieee802154_addr addr;
+
+	u8 channel, bcn_ord, sf_ord;
+	int pan_coord, blx, coord_realign;
+	int ret;
+
+	if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
+	    !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] ||
+	    !info->attrs[IEEE802154_ATTR_CHANNEL] ||
+	    !info->attrs[IEEE802154_ATTR_BCN_ORD] ||
+	    !info->attrs[IEEE802154_ATTR_SF_ORD] ||
+	    !info->attrs[IEEE802154_ATTR_PAN_COORD] ||
+	    !info->attrs[IEEE802154_ATTR_BAT_EXT] ||
+	    !info->attrs[IEEE802154_ATTR_COORD_REALIGN]
+	 )
+		return -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	addr.addr_type = IEEE802154_ADDR_SHORT;
+	addr.short_addr = nla_get_u16(
+			info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]);
+	addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
+
+	channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]);
+	bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]);
+	sf_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_SF_ORD]);
+	pan_coord = nla_get_u8(info->attrs[IEEE802154_ATTR_PAN_COORD]);
+	blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]);
+	coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]);
+
+	ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel,
+		bcn_ord, sf_ord, pan_coord, blx, coord_realign);
+
+	dev_put(dev);
+	return ret;
+}
+
+static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device *dev;
+	int ret;
+	u8 type;
+	u32 channels;
+	u8 duration;
+
+	if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] ||
+	    !info->attrs[IEEE802154_ATTR_CHANNELS] ||
+	    !info->attrs[IEEE802154_ATTR_DURATION])
+		return -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);
+	channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
+	duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]);
+
+	ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels,
+			duration);
+
+	dev_put(dev);
+	return ret;
+}
+
+#define IEEE802154_OP(_cmd, _func)			\
+	{						\
+		.cmd	= _cmd,				\
+		.policy	= ieee802154_policy,		\
+		.doit	= _func,			\
+		.dumpit	= NULL,				\
+		.flags	= GENL_ADMIN_PERM,		\
+	}
+
+static struct genl_ops ieee802154_coordinator_ops[] = {
+	IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req),
+	IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp),
+	IEEE802154_OP(IEEE802154_DISASSOCIATE_REQ, ieee802154_disassociate_req),
+	IEEE802154_OP(IEEE802154_SCAN_REQ, ieee802154_scan_req),
+	IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req),
+};
+
+static int __init ieee802154_nl_init(void)
+{
+	int rc;
+	int i;
+
+	rc = genl_register_family(&ieee802154_coordinator_family);
+	if (rc)
+		goto fail;
+
+	rc = genl_register_mc_group(&ieee802154_coordinator_family,
+			&ieee802154_coord_mcgrp);
+	if (rc)
+		goto fail;
+
+	rc = genl_register_mc_group(&ieee802154_coordinator_family,
+			&ieee802154_beacon_mcgrp);
+	if (rc)
+		goto fail;
+
+
+	for (i = 0; i < ARRAY_SIZE(ieee802154_coordinator_ops); i++) {
+		rc = genl_register_ops(&ieee802154_coordinator_family,
+				&ieee802154_coordinator_ops[i]);
+		if (rc)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	genl_unregister_family(&ieee802154_coordinator_family);
+	return rc;
+}
+module_init(ieee802154_nl_init);
+
+static void __exit ieee802154_nl_exit(void)
+{
+	genl_unregister_family(&ieee802154_coordinator_family);
+}
+module_exit(ieee802154_nl_exit);
+
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
new file mode 100644
index 00000000000..c7d71d1adca
--- /dev/null
+++ b/net/ieee802154/nl_policy.c
@@ -0,0 +1,52 @@
+/*
+ * nl802154.h
+ *
+ * Copyright (C) 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <net/netlink.h>
+#include <linux/nl802154.h>
+
+#define NLA_HW_ADDR NLA_U64
+
+struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
+	[IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, },
+	[IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, },
+
+	[IEEE802154_ATTR_STATUS] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_SHORT_ADDR] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_SRC_SHORT_ADDR] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_SRC_HW_ADDR] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_SRC_PAN_ID] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_DEST_SHORT_ADDR] = { .type = NLA_U16, },
+	[IEEE802154_ATTR_DEST_HW_ADDR] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_DEST_PAN_ID] = { .type = NLA_U16, },
+
+	[IEEE802154_ATTR_CAPABILITY] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_REASON] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_SCAN_TYPE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_CHANNELS] = { .type = NLA_U32, },
+	[IEEE802154_ATTR_DURATION] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_ED_LIST] = { .len = 27 },
+};
-- 
cgit v1.2.3-70-g09d2


From 18760f1e74e8dfe8f30d4891e66163d1e6feb893 Mon Sep 17 00:00:00 2001
From: Chaitanya Lala <clala@riverbed.com>
Date: Mon, 8 Jun 2009 14:28:54 +0000
Subject: e1000e: Expose MDI-X status via ethtool change

Ethtool is a standard way of getting information about
ethernet interfaces.  We enhance ethtool kernel interface
& e1000e to make the MDI-X status readable via ethtool in
userspace.

Signed-off-by: Chaitanya Lala <clala@riverbed.com>
Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/e1000e/ethtool.c | 9 +++++++++
 include/linux/ethtool.h      | 8 +++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index 3d643561752..1bf4d2a5d34 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -167,6 +167,15 @@ static int e1000_get_settings(struct net_device *netdev,
 
 	ecmd->autoneg = ((hw->phy.media_type == e1000_media_type_fiber) ||
 			 hw->mac.autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if ((hw->phy.media_type == e1000_media_type_copper) &&
+	    !hw->mac.get_link_status)
+		ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+		                                      ETH_TP_MDI;
+	else
+		ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+
 	return 0;
 }
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 380b04272bf..9b660bd2e2b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -30,7 +30,8 @@ struct ethtool_cmd {
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
 	__u16	speed_hi;
-	__u16	reserved2;
+	__u8	eth_tp_mdix;
+	__u8	reserved2;
 	__u32	lp_advertising;	/* Features the link partner advertises */
 	__u32	reserved[2];
 };
@@ -632,6 +633,11 @@ struct ethtool_ops {
 #define AUTONEG_DISABLE		0x00
 #define AUTONEG_ENABLE		0x01
 
+/* Mode MDI or MDI-X */
+#define ETH_TP_MDI_INVALID	0x00
+#define ETH_TP_MDI		0x01
+#define ETH_TP_MDI_X		0x02
+
 /* Wake-On-Lan options. */
 #define WAKE_PHY		(1 << 0)
 #define WAKE_UCAST		(1 << 1)
-- 
cgit v1.2.3-70-g09d2


From 1d589bb16b825b3a7b4edd34d997f1f1f953033d Mon Sep 17 00:00:00 2001
From: john cooper <john.cooper@redhat.com>
Date: Tue, 9 Jun 2009 14:41:40 +0200
Subject: Add serial number support for virtio_blk, V4a

This patch extracts the opaque data from pci i/o
region 0 via the added VIRTIO_BLK_F_IDENTIFY
field.  By convention this data takes the form of
that returned by an ATA IDENTIFY DEVICE command,
however the driver (except for structure size)
makes no interpretation of the data.  The structure
data is copied wholesale to userspace via a
HDIO_GET_IDENTITY ioctl command (eg: hdparm -i <dev>).

Signed-off-by: john cooper <john.cooper@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/virtio_blk.c | 37 ++++++++++++++++++++++++++++++++++---
 include/linux/virtio_blk.h |  4 ++++
 2 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c4845b16946..c0facaa55cf 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -171,11 +171,43 @@ static void do_virtblk_request(struct request_queue *q)
 		vblk->vq->vq_ops->kick(vblk->vq);
 }
 
+/* return ATA identify data
+ */
+static int virtblk_identify(struct gendisk *disk, void *argp)
+{
+	struct virtio_blk *vblk = disk->private_data;
+	void *opaque;
+	int err = -ENOMEM;
+
+	opaque = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
+	if (!opaque)
+		goto out;
+
+	err = virtio_config_buf(vblk->vdev, VIRTIO_BLK_F_IDENTIFY,
+		offsetof(struct virtio_blk_config, identify), opaque,
+		VIRTIO_BLK_ID_BYTES);
+
+	if (err)
+		goto out_kfree;
+
+	if (copy_to_user(argp, opaque, VIRTIO_BLK_ID_BYTES))
+		err = -EFAULT;
+
+out_kfree:
+	kfree(opaque);
+out:
+	return err;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct virtio_blk *vblk = disk->private_data;
+	void __user *argp = (void __user *)data;
+
+	if (cmd == HDIO_GET_IDENTITY)
+		return virtblk_identify(disk, argp);
 
 	/*
 	 * Only allow the generic SCSI ioctls if the host can support it.
@@ -183,8 +215,7 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 		return -ENOIOCTLCMD;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
-			      (void __user *)data);
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
@@ -390,7 +421,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI,
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 4dbcbc1c348..be7d255fc7c 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -16,6 +16,9 @@
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
+#define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
+
+#define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
 struct virtio_blk_config
 {
@@ -33,6 +36,7 @@ struct virtio_blk_config
 	} geometry;
 	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
 	__u32 blk_size;
+	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
 /* These two define direction. */
-- 
cgit v1.2.3-70-g09d2


From a31e1ffd2231b8fdf7eda9ed750a4a0df9bcad4e Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Tue, 9 Jun 2009 15:16:34 +0200
Subject: netfilter: xt_socket: added new revision of the 'socket' match
 supporting flags

If the XT_SOCKET_TRANSPARENT flag is set, enabled 'transparent'
socket option is required for the socket to be matched.

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_socket.h | 12 +++++++
 net/netfilter/xt_socket.c           | 63 ++++++++++++++++++++++++++++++-------
 2 files changed, 64 insertions(+), 11 deletions(-)
 create mode 100644 include/linux/netfilter/xt_socket.h

(limited to 'include')

diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h
new file mode 100644
index 00000000000..6f475b8ff34
--- /dev/null
+++ b/include/linux/netfilter/xt_socket.h
@@ -0,0 +1,12 @@
+#ifndef _XT_SOCKET_H
+#define _XT_SOCKET_H
+
+enum {
+	XT_SOCKET_TRANSPARENT = 1 << 0,
+};
+
+struct xt_socket_mtinfo1 {
+	__u8 flags;
+};
+
+#endif /* _XT_SOCKET_H */
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1acc089be7e..ebf00ad5b19 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,6 +22,8 @@
 #include <net/netfilter/nf_tproxy_core.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
+#include <linux/netfilter/xt_socket.h>
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #define XT_SOCKET_HAVE_CONNTRACK 1
 #include <net/netfilter/nf_conntrack.h>
@@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
@@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
 				   saddr, daddr, sport, dport, par->in, false);
 	if (sk != NULL) {
-		bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0);
+		bool wildcard;
+		bool transparent = true;
+
+		/* Ignore sockets listening on INADDR_ANY */
+		wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+			    inet_sk(sk)->rcv_saddr == 0);
+
+		/* Ignore non-transparent sockets,
+		   if XT_SOCKET_TRANSPARENT is used */
+		if (info && info->flags & XT_SOCKET_TRANSPARENT)
+			transparent = ((sk->sk_state != TCP_TIME_WAIT &&
+					inet_sk(sk)->transparent) ||
+				       (sk->sk_state == TCP_TIME_WAIT &&
+					inet_twsk(sk)->tw_transparent));
 
 		nf_tproxy_put_sock(sk);
-		if (wildcard)
+
+		if (wildcard || !transparent)
 			sk = NULL;
 	}
 
@@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (sk != NULL);
 }
 
-static struct xt_match socket_mt_reg __read_mostly = {
-	.name		= "socket",
-	.family		= AF_INET,
-	.match		= socket_mt,
-	.hooks		= 1 << NF_INET_PRE_ROUTING,
-	.me		= THIS_MODULE,
+static bool
+socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	return socket_match(skb, par, NULL);
+}
+
+static bool
+socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	return socket_match(skb, par, par->matchinfo);
+}
+
+static struct xt_match socket_mt_reg[] __read_mostly = {
+	{
+		.name		= "socket",
+		.revision	= 0,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt_v0,
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "socket",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt_v1,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= 1 << NF_INET_PRE_ROUTING,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init socket_mt_init(void)
 {
 	nf_defrag_ipv4_enable();
-	return xt_register_match(&socket_mt_reg);
+	return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
 }
 
 static void __exit socket_mt_exit(void)
 {
-	xt_unregister_match(&socket_mt_reg);
+	xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
 }
 
 module_init(socket_mt_init);
-- 
cgit v1.2.3-70-g09d2


From 43514774ff40c4fbe0cbbd3d8293a359f1a9fe71 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 8 Jun 2009 18:14:41 -0700
Subject: [SCSI] iscsi class: Add new NETLINK_ISCSI messages for cnic/bnx2i
 driver.

Add ISCSI_NETLINK messages for iSCSI NICs to get information such as
path from userspace.  Original iscsid messages are now always sent as
multicast to group 1.  The new messages are sent to group 2.

The multicast changes were made by Mike Christie.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 122 +++++++++++++++++++++++++-----------
 include/scsi/iscsi_if.h             |  42 +++++++++++++
 include/scsi/scsi_transport_iscsi.h |   5 ++
 3 files changed, 133 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index d69a53aa406..f3e664628d7 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -37,7 +37,6 @@
 #define ISCSI_TRANSPORT_VERSION "2.0-870"
 
 struct iscsi_internal {
-	int daemon_pid;
 	struct scsi_transport_template t;
 	struct iscsi_transport *iscsi_transport;
 	struct list_head list;
@@ -938,23 +937,9 @@ iscsi_if_transport_lookup(struct iscsi_transport *tt)
 }
 
 static int
-iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp)
+iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp)
 {
-	return netlink_broadcast(nls, skb, 0, 1, gfp);
-}
-
-static int
-iscsi_unicast_skb(struct sk_buff *skb, int pid)
-{
-	int rc;
-
-	rc = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
-	if (rc < 0) {
-		printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc);
-		return rc;
-	}
-
-	return 0;
+	return nlmsg_multicast(nls, skb, 0, group, gfp);
 }
 
 int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
@@ -980,7 +965,7 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	memset(ev, 0, sizeof(*ev));
 	ev->transport_handle = iscsi_handle(conn->transport);
@@ -991,10 +976,45 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 	memcpy(pdu, hdr, sizeof(struct iscsi_hdr));
 	memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size);
 
-	return iscsi_unicast_skb(skb, priv->daemon_pid);
+	return iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(iscsi_recv_pdu);
 
+int iscsi_offload_mesg(struct Scsi_Host *shost,
+		       struct iscsi_transport *transport, uint32_t type,
+		       char *data, uint16_t data_size)
+{
+	struct nlmsghdr	*nlh;
+	struct sk_buff *skb;
+	struct iscsi_uevent *ev;
+	int len = NLMSG_SPACE(sizeof(*ev) + data_size);
+
+	skb = alloc_skb(len, GFP_NOIO);
+	if (!skb) {
+		printk(KERN_ERR "can not deliver iscsi offload message:OOM\n");
+		return -ENOMEM;
+	}
+
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
+	ev = NLMSG_DATA(nlh);
+	memset(ev, 0, sizeof(*ev));
+	ev->type = type;
+	ev->transport_handle = iscsi_handle(transport);
+	switch (type) {
+	case ISCSI_KEVENT_PATH_REQ:
+		ev->r.req_path.host_no = shost->host_no;
+		break;
+	case ISCSI_KEVENT_IF_DOWN:
+		ev->r.notify_if_down.host_no = shost->host_no;
+		break;
+	}
+
+	memcpy((char *)ev + sizeof(*ev), data, data_size);
+
+	return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_NOIO);
+}
+EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
+
 void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 {
 	struct nlmsghdr	*nlh;
@@ -1014,7 +1034,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 		return;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(conn->transport);
 	ev->type = ISCSI_KEVENT_CONN_ERROR;
@@ -1022,7 +1042,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	ev->r.connerror.cid = conn->cid;
 	ev->r.connerror.sid = iscsi_conn_get_sid(conn);
 
-	iscsi_broadcast_skb(skb, GFP_ATOMIC);
+	iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC);
 
 	iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn error (%d)\n",
 			      error);
@@ -1030,8 +1050,8 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
 EXPORT_SYMBOL_GPL(iscsi_conn_error_event);
 
 static int
-iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
-		      void *payload, int size)
+iscsi_if_send_reply(uint32_t group, int seq, int type, int done, int multi,
+		    void *payload, int size)
 {
 	struct sk_buff	*skb;
 	struct nlmsghdr	*nlh;
@@ -1045,10 +1065,10 @@ iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, t, (len - sizeof(*nlh)), 0);
 	nlh->nlmsg_flags = flags;
 	memcpy(NLMSG_DATA(nlh), payload, size);
-	return iscsi_unicast_skb(skb, pid);
+	return iscsi_multicast_skb(skb, group, GFP_ATOMIC);
 }
 
 static int
@@ -1085,7 +1105,7 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 			return -ENOMEM;
 		}
 
-		nlhstat = __nlmsg_put(skbstat, priv->daemon_pid, 0, 0,
+		nlhstat = __nlmsg_put(skbstat, 0, 0, 0,
 				      (len - sizeof(*nlhstat)), 0);
 		evstat = NLMSG_DATA(nlhstat);
 		memset(evstat, 0, sizeof(*evstat));
@@ -1109,7 +1129,8 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 		skb_trim(skbstat, NLMSG_ALIGN(actual_size));
 		nlhstat->nlmsg_len = actual_size;
 
-		err = iscsi_unicast_skb(skbstat, priv->daemon_pid);
+		err = iscsi_multicast_skb(skbstat, ISCSI_NL_GRP_ISCSID,
+					  GFP_ATOMIC);
 	} while (err < 0 && err != -ECONNREFUSED);
 
 	return err;
@@ -1143,7 +1164,7 @@ int iscsi_session_event(struct iscsi_cls_session *session,
 		return -ENOMEM;
 	}
 
-	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0);
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(session->transport);
 
@@ -1172,7 +1193,7 @@ int iscsi_session_event(struct iscsi_cls_session *session,
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(skb, GFP_KERNEL);
+	rc = iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_KERNEL);
 	if (rc == -ESRCH)
 		iscsi_cls_session_printk(KERN_ERR, session,
 					 "Cannot notify userspace of session "
@@ -1393,7 +1414,31 @@ iscsi_set_host_param(struct iscsi_transport *transport,
 }
 
 static int
-iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+{
+	struct Scsi_Host *shost;
+	struct iscsi_path *params;
+	int err;
+
+	if (!transport->set_path)
+		return -ENOSYS;
+
+	shost = scsi_host_lookup(ev->u.set_path.host_no);
+	if (!shost) {
+		printk(KERN_ERR "set path could not find host no %u\n",
+		       ev->u.set_path.host_no);
+		return -ENODEV;
+	}
+
+	params = (struct iscsi_path *)((char *)ev + sizeof(*ev));
+	err = transport->set_path(shost, params);
+
+	scsi_host_put(shost);
+	return err;
+}
+
+static int
+iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
 	int err = 0;
 	struct iscsi_uevent *ev = NLMSG_DATA(nlh);
@@ -1403,6 +1448,11 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct iscsi_cls_conn *conn;
 	struct iscsi_endpoint *ep = NULL;
 
+	if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
+		*group = ISCSI_NL_GRP_UIP;
+	else
+		*group = ISCSI_NL_GRP_ISCSID;
+
 	priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle));
 	if (!priv)
 		return -EINVAL;
@@ -1411,8 +1461,6 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (!try_module_get(transport->owner))
 		return -EINVAL;
 
-	priv->daemon_pid = NETLINK_CREDS(skb)->pid;
-
 	switch (nlh->nlmsg_type) {
 	case ISCSI_UEVENT_CREATE_SESSION:
 		err = iscsi_if_create_session(priv, ep, ev,
@@ -1506,6 +1554,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case ISCSI_UEVENT_SET_HOST_PARAM:
 		err = iscsi_set_host_param(transport, ev);
 		break;
+	case ISCSI_UEVENT_PATH_UPDATE:
+		err = iscsi_set_path(transport, ev);
+		break;
 	default:
 		err = -ENOSYS;
 		break;
@@ -1528,6 +1579,7 @@ iscsi_if_rx(struct sk_buff *skb)
 		uint32_t rlen;
 		struct nlmsghdr	*nlh;
 		struct iscsi_uevent *ev;
+		uint32_t group;
 
 		nlh = nlmsg_hdr(skb);
 		if (nlh->nlmsg_len < sizeof(*nlh) ||
@@ -1540,7 +1592,7 @@ iscsi_if_rx(struct sk_buff *skb)
 		if (rlen > skb->len)
 			rlen = skb->len;
 
-		err = iscsi_if_recv_msg(skb, nlh);
+		err = iscsi_if_recv_msg(skb, nlh, &group);
 		if (err) {
 			ev->type = ISCSI_KEVENT_IF_ERROR;
 			ev->iferror = err;
@@ -1554,8 +1606,7 @@ iscsi_if_rx(struct sk_buff *skb)
 			 */
 			if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
 				break;
-			err = iscsi_if_send_reply(
-				NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
+			err = iscsi_if_send_reply(group, nlh->nlmsg_seq,
 				nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
 		} while (err < 0 && err != -ECONNREFUSED);
 		skb_pull(skb, rlen);
@@ -1803,7 +1854,6 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	if (!priv)
 		return NULL;
 	INIT_LIST_HEAD(&priv->list);
-	priv->daemon_pid = -1;
 	priv->iscsi_transport = tt;
 	priv->t.user_scan = iscsi_user_scan;
 	priv->t.create_work_queue = 1;
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 2c1a4af9eaf..4426f00da5f 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -22,6 +22,11 @@
 #define ISCSI_IF_H
 
 #include <scsi/iscsi_proto.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#define ISCSI_NL_GRP_ISCSID	1
+#define ISCSI_NL_GRP_UIP	2
 
 #define UEVENT_BASE			10
 #define KEVENT_BASE			100
@@ -53,6 +58,8 @@ enum iscsi_uevent_e {
 	ISCSI_UEVENT_CREATE_BOUND_SESSION		= UEVENT_BASE + 18,
 	ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST	= UEVENT_BASE + 19,
 
+	ISCSI_UEVENT_PATH_UPDATE	= UEVENT_BASE + 20,
+
 	/* up events */
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
 	ISCSI_KEVENT_CONN_ERROR		= KEVENT_BASE + 2,
@@ -60,6 +67,9 @@ enum iscsi_uevent_e {
 	ISCSI_KEVENT_DESTROY_SESSION	= KEVENT_BASE + 4,
 	ISCSI_KEVENT_UNBIND_SESSION	= KEVENT_BASE + 5,
 	ISCSI_KEVENT_CREATE_SESSION	= KEVENT_BASE + 6,
+
+	ISCSI_KEVENT_PATH_REQ		= KEVENT_BASE + 7,
+	ISCSI_KEVENT_IF_DOWN		= KEVENT_BASE + 8,
 };
 
 enum iscsi_tgt_dscvr {
@@ -159,6 +169,9 @@ struct iscsi_uevent {
 			uint32_t	param; /* enum iscsi_host_param */
 			uint32_t	len;
 		} set_host_param;
+		struct msg_set_path {
+			uint32_t	host_no;
+		} set_path;
 	} u;
 	union {
 		/* messages k -> u */
@@ -192,9 +205,38 @@ struct iscsi_uevent {
 		struct msg_transport_connect_ret {
 			uint64_t	handle;
 		} ep_connect_ret;
+		struct msg_req_path {
+			uint32_t	host_no;
+		} req_path;
+		struct msg_notify_if_down {
+			uint32_t	host_no;
+		} notify_if_down;
 	} r;
 } __attribute__ ((aligned (sizeof(uint64_t))));
 
+/*
+ * To keep the struct iscsi_uevent size the same for userspace code
+ * compatibility, the main structure for ISCSI_UEVENT_PATH_UPDATE and
+ * ISCSI_KEVENT_PATH_REQ is defined separately and comes after the
+ * struct iscsi_uevent in the NETLINK_ISCSI message.
+ */
+struct iscsi_path {
+	uint64_t	handle;
+	uint8_t		mac_addr[6];
+	uint8_t		mac_addr_old[6];
+	uint32_t	ip_addr_len;	/* 4 or 16 */
+	union {
+		struct in_addr	v4_addr;
+		struct in6_addr	v6_addr;
+	} src;
+	union {
+		struct in_addr	v4_addr;
+		struct in6_addr	v6_addr;
+	} dst;
+	uint16_t	vlan_id;
+	uint16_t	pmtu;
+} __attribute__ ((aligned (sizeof(uint64_t))));
+
 /*
  * Common error codes
  */
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 8cb7a31d996..349c7f30720 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -133,6 +133,7 @@ struct iscsi_transport {
 	void (*ep_disconnect) (struct iscsi_endpoint *ep);
 	int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
 			  uint32_t enable, struct sockaddr *dst_addr);
+	int (*set_path) (struct Scsi_Host *shost, struct iscsi_path *params);
 };
 
 /*
@@ -149,6 +150,10 @@ extern void iscsi_conn_error_event(struct iscsi_cls_conn *conn,
 extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 			  char *data, uint32_t data_size);
 
+extern int iscsi_offload_mesg(struct Scsi_Host *shost,
+			      struct iscsi_transport *transport, uint32_t type,
+			      char *data, uint16_t data_size);
+
 struct iscsi_cls_conn {
 	struct list_head conn_list;	/* item in connlist */
 	void *dd_data;			/* LLD private data */
-- 
cgit v1.2.3-70-g09d2


From 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 9 Jun 2009 13:43:05 +0800
Subject: tracing/events: convert block trace points to TRACE_EVENT()

TRACE_EVENT is a more generic way to define tracepoints. Doing so adds
these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions
  ...

Cons:

  - no dev_t info for the output of plug, unplug_timer and unplug_io events.
    no dev_t info for getrq and sleeprq events if bio == NULL.
    no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL.

    This is mainly because we can't get the deivce from a request queue.
    But this may change in the future.

  - A packet command is converted to a string in TP_assign, not TP_print.
    While blktrace do the convertion just before output.

    Since pc requests should be rather rare, this is not a big issue.

  - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT
    has a unique format, which means we have some unused data in a trace entry.

    The overhead is minimized by using __dynamic_array() instead of __array().

I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing:

      dd                   dd + ioctl blktrace       dd + TRACE_EVENT (splice)
1     7.36s, 42.7 MB/s     7.50s, 42.0 MB/s          7.41s, 42.5 MB/s
2     7.43s, 42.3 MB/s     7.48s, 42.1 MB/s          7.43s, 42.4 MB/s
3     7.38s, 42.6 MB/s     7.45s, 42.2 MB/s          7.41s, 42.5 MB/s

So the overhead of tracing is very small, and no regression when using
those trace events vs blktrace.

And the binary output of TRACE_EVENT is much smaller than blktrace:

 # ls -l -h
 -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0
 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1
 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out

Following are some comparisons between TRACE_EVENT and blktrace:

plug:
  kjournald-480   [000]   303.084981: block_plug: [kjournald]
  kjournald-480   [000]   303.084981:   8,0    P   N [kjournald]

unplug_io:
  kblockd/0-118   [000]   300.052973: block_unplug_io: [kblockd/0] 1
  kblockd/0-118   [000]   300.052974:   8,0    U   N [kblockd/0] 1

remap:
  kjournald-480   [000]   303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384
  kjournald-480   [000]   303.085043:   8,0    A   W 102736992 + 8 <- (8,8) 33384

bio_backmerge:
  kjournald-480   [000]   303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald]
  kjournald-480   [000]   303.085086:   8,0    M   W 102737032 + 8 [kjournald]

getrq:
  kjournald-480   [000]   303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald]
  kjournald-480   [000]   303.084975:   8,0    G   W 102736984 + 8 [kjournald]

  bash-2066  [001]  1072.953770:   8,0    G   N [bash]
  bash-2066  [001]  1072.953773: block_getrq: 0,0 N 0 + 0 [bash]

rq_complete:
  konsole-2065  [001]   300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0]
  konsole-2065  [001]   300.053191:   8,0    C   W 103669040 + 16 [0]

  ksoftirqd/1-7   [001]  1072.953811:   8,0    C   N (5a 00 08 00 00 00 00 00 24 00) [0]
  ksoftirqd/1-7   [001]  1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]

rq_insert:
  kjournald-480   [000]   303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald]
  kjournald-480   [000]   303.084986:   8,0    I   W 102736984 + 8 [kjournald]

Changelog from v2 -> v3:

- use the newly introduced __dynamic_array().

Changelog from v1 -> v2:

- use __string() instead of __array() to minimize the memory required
  to store hex dump of rq->cmd().

- support large pc requests.

- add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT.

- some cleanups.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 block/blk-core.c             |  16 +-
 block/elevator.c             |   8 +-
 drivers/md/dm.c              |   5 +-
 fs/bio.c                     |   3 +-
 include/linux/blktrace_api.h |  13 ++
 include/trace/block.h        |  76 -------
 include/trace/events/block.h | 483 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/Makefile        |   5 +-
 kernel/trace/blktrace.c      |  78 ++++++-
 mm/bounce.c                  |   5 +-
 10 files changed, 588 insertions(+), 104 deletions(-)
 delete mode 100644 include/trace/block.h
 create mode 100644 include/trace/events/block.h

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 1306de9cce0..9475bf99b89 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,22 +28,14 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
 
 #include "blk.h"
 
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap);	/* Also used in drivers/md/dm.c */
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
 
diff --git a/block/elevator.c b/block/elevator.c
index 7073a907257..e220f0c543e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,17 +33,16 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
-DEFINE_TRACE(block_rq_abort);
-
 /*
  * Merge hash stuff.
  */
@@ -55,9 +54,6 @@ static const int elv_hash_shift = 6;
 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
-DEFINE_TRACE(block_rq_insert);
-DEFINE_TRACE(block_rq_issue);
-
 /*
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e2ee4a79ea2..3fd8b1e6548 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,7 +20,8 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -53,8 +54,6 @@ struct dm_target_io {
 	union map_info info;
 };
 
-DEFINE_TRACE(block_bio_complete);
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
diff --git a/fs/bio.c b/fs/bio.c
index 98711647ece..740699c4f90 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -26,10 +26,9 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <scsi/sg.h>		/* for struct sg_iovec */
 
-DEFINE_TRACE(block_split);
+#include <trace/events/block.h>
 
 /*
  * Test patch to inline a certain number of bi_io_vec's inside the bio
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 82b4636030e..c7ec31dd04c 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -218,5 +218,18 @@ static inline int blk_trace_init_sysfs(struct device *dev)
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
+#ifdef CONFIG_EVENT_TRACING
+
+static inline int blk_cmd_buf_len(struct request *rq)
+{
+	return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+}
+
+extern void blk_dump_cmd(char *buf, struct request *rq);
+extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
+
+#endif /* CONFIG_EVENT_TRACING */
+
 #endif /* __KERNEL__ */
 #endif
diff --git a/include/trace/block.h b/include/trace/block.h
deleted file mode 100644
index 5b12efa096b..00000000000
--- a/include/trace/block.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _TRACE_BLOCK_H
-#define _TRACE_BLOCK_H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(block_rq_abort,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_insert,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_issue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_requeue,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_complete,
-	TP_PROTO(struct request_queue *q, struct request *rq),
-	      TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_bio_bounce,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_complete,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_backmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_frontmerge,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_queue,
-	TP_PROTO(struct request_queue *q, struct bio *bio),
-	      TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_getrq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_sleeprq,
-	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-	      TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_plug,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_timer,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_io,
-	TP_PROTO(struct request_queue *q),
-	      TP_ARGS(q));
-
-DECLARE_TRACE(block_split,
-	TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
-	      TP_ARGS(q, bio, pdu));
-
-DECLARE_TRACE(block_remap,
-	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-		 sector_t from),
-	      TP_ARGS(q, bio, dev, from));
-
-#endif
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644
index 00000000000..a99d1e565bb
--- /dev/null
+++ b/include/trace/events/block.h
@@ -0,0 +1,483 @@
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+TRACE_EVENT(block_rq_abort,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_insert,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,         comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_issue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned int,	bytes			)
+		__array(  char,		rwbs,	6		)
+		__array(  char,		comm,   TASK_COMM_LEN   )
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __entry->bytes, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_requeue,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors	   = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_complete,
+
+	TP_PROTO(struct request_queue *q, struct request *rq),
+
+	TP_ARGS(q, rq),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	6		)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+		__entry->nr_sector = blk_pc_request(rq) ?
+						0 : rq->hard_nr_sectors;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  __entry->sector, __entry->nr_sector, __entry->errors)
+);
+TRACE_EVENT(block_bio_bounce,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_complete,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned,	nr_sector	)
+		__field( int,		error		)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->error)
+);
+
+TRACE_EVENT(block_bio_backmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_frontmerge,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_queue,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio),
+
+	TP_ARGS(q, bio),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_getrq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+        ),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			      bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+        ),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_sleeprq,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+	TP_ARGS(q, bio, rw),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev			)
+		__field( sector_t,	sector			)
+		__field( unsigned int,	nr_sector		)
+		__array( char,		rwbs,	6		)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
+		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->nr_sector	= bio ? bio->bi_size >> 9 : 0;
+		blk_fill_rwbs(__entry->rwbs,
+			    bio ? bio->bi_rw : 0, __entry->nr_sector);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu + %u [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_plug,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s]", __entry->comm)
+);
+
+TRACE_EVENT(block_unplug_timer,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_unplug_io,
+
+	TP_PROTO(struct request_queue *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__field( int,		nr_rq			)
+		__array( char,		comm,	TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->nr_rq	= q->rq.count[READ] + q->rq.count[WRITE];
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_split,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio,
+		 unsigned int new_sector),
+
+	TP_ARGS(q, bio, new_sector),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev				)
+		__field( sector_t,	sector				)
+		__field( sector_t,	new_sector			)
+		__array( char,		rwbs,		6		)
+		__array( char,		comm,		TASK_COMM_LEN	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->new_sector	= new_sector;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+	),
+
+	TP_printk("%d,%d %s %llu / %llu [%s]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->new_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_remap,
+
+	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, bio, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  __entry->sector, __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  __entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 06b85850fab..844164dca90 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -45,7 +45,10 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+ifeq ($(CONFIG_BLOCK),y)
+obj-$(CONFIG_EVENT_TRACING) += blktrace.o
+endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e3abf55bc8e..7bd6a9893c2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,10 +23,14 @@
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
-#include <trace/block.h>
 #include <linux/uaccess.h>
+
+#include <trace/events/block.h>
+
 #include "trace_output.h"
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+
 static unsigned int blktrace_seq __read_mostly = 1;
 
 static struct trace_array *blk_tr;
@@ -1658,3 +1662,75 @@ int blk_trace_init_sysfs(struct device *dev)
 	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
 }
 
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#ifdef CONFIG_EVENT_TRACING
+
+void blk_dump_cmd(char *buf, struct request *rq)
+{
+	int i, end;
+	int len = rq->cmd_len;
+	unsigned char *cmd = rq->cmd;
+
+	if (!blk_pc_request(rq)) {
+		buf[0] = '\0';
+		return;
+	}
+
+	for (end = len - 1; end >= 0; end--)
+		if (cmd[end])
+			break;
+	end++;
+
+	for (i = 0; i < len; i++) {
+		buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
+		if (i == end && end != len - 1) {
+			sprintf(buf, " ..");
+			break;
+		}
+	}
+}
+
+void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
+{
+	int i = 0;
+
+	if (rw & WRITE)
+		rwbs[i++] = 'W';
+	else if (rw & 1 << BIO_RW_DISCARD)
+		rwbs[i++] = 'D';
+	else if (bytes)
+		rwbs[i++] = 'R';
+	else
+		rwbs[i++] = 'N';
+
+	if (rw & 1 << BIO_RW_AHEAD)
+		rwbs[i++] = 'A';
+	if (rw & 1 << BIO_RW_BARRIER)
+		rwbs[i++] = 'B';
+	if (rw & 1 << BIO_RW_SYNCIO)
+		rwbs[i++] = 'S';
+	if (rw & 1 << BIO_RW_META)
+		rwbs[i++] = 'M';
+
+	rwbs[i] = '\0';
+}
+
+void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
+{
+	int rw = rq->cmd_flags & 0x03;
+	int bytes;
+
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
+	if (blk_pc_request(rq))
+		bytes = rq->data_len;
+	else
+		bytes = rq->hard_nr_sectors << 9;
+
+	blk_fill_rwbs(rwbs, rw, bytes);
+}
+
+#endif /* CONFIG_EVENT_TRACING */
+
diff --git a/mm/bounce.c b/mm/bounce.c
index e590272fe7a..65f5e17e411 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,16 +14,15 @@
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/block.h>
+
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
 static mempool_t *page_pool, *isa_page_pool;
 
-DEFINE_TRACE(block_bio_bounce);
-
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 80a538e49daddbf3bf783f3464e91bd3181957b2 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Mon, 8 Jun 2009 14:40:20 +0800
Subject: drm/i915: Enable probe on new chipset

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 include/drm/drm_pciids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index fc55db78019..8b4c80cdd27 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -536,4 +536,6 @@
 	{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x0042, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x0046, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0, 0, 0}
-- 
cgit v1.2.3-70-g09d2


From 6556d1df88fe68f9836beeb43342a336691cb67c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 9 Jun 2009 14:04:26 -0400
Subject: tracing: fix the block trace points print size

The sector field is either u64 or unsigned long depending on
the arch. This patch casts the sector to unsigned long long to
prevent the printf warnings.

[ Impact: remove compile warnings ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/block.h | 45 +++++++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index a99d1e565bb..53effd496a5 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -37,7 +37,8 @@ TRACE_EVENT(block_rq_abort,
 	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->errors)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 
 TRACE_EVENT(block_rq_insert,
@@ -71,7 +72,8 @@ TRACE_EVENT(block_rq_insert,
 	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_rq_issue,
@@ -105,7 +107,8 @@ TRACE_EVENT(block_rq_issue,
 	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_rq_requeue,
@@ -137,7 +140,8 @@ TRACE_EVENT(block_rq_requeue,
 	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->errors)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 
 TRACE_EVENT(block_rq_complete,
@@ -169,7 +173,8 @@ TRACE_EVENT(block_rq_complete,
 	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  __entry->sector, __entry->nr_sector, __entry->errors)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 TRACE_EVENT(block_bio_bounce,
 
@@ -195,7 +200,8 @@ TRACE_EVENT(block_bio_bounce,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_bio_complete,
@@ -221,7 +227,8 @@ TRACE_EVENT(block_bio_complete,
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->error)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->error)
 );
 
 TRACE_EVENT(block_bio_backmerge,
@@ -248,7 +255,8 @@ TRACE_EVENT(block_bio_backmerge,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_bio_frontmerge,
@@ -275,7 +283,8 @@ TRACE_EVENT(block_bio_frontmerge,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_bio_queue,
@@ -302,7 +311,8 @@ TRACE_EVENT(block_bio_queue,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_getrq,
@@ -330,7 +340,8 @@ TRACE_EVENT(block_getrq,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_sleeprq,
@@ -358,7 +369,8 @@ TRACE_EVENT(block_sleeprq,
 
 	TP_printk("%d,%d %s %llu + %u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->comm)
 );
 
 TRACE_EVENT(block_plug,
@@ -441,7 +453,9 @@ TRACE_EVENT(block_split,
 
 	TP_printk("%d,%d %s %llu / %llu [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->new_sector, __entry->comm)
+		  (unsigned long long)__entry->sector,
+		  (unsigned long long)__entry->new_sector,
+		  __entry->comm)
 );
 
 TRACE_EVENT(block_remap,
@@ -471,9 +485,10 @@ TRACE_EVENT(block_remap,
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
-		  __entry->sector, __entry->nr_sector,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
 		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
-		  __entry->old_sector)
+		  (unsigned long long)__entry->old_sector)
 );
 
 #endif /* _TRACE_BLOCK_H */
-- 
cgit v1.2.3-70-g09d2


From 725c624a58a10ef90a2ff889e122158fabf36147 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 8 Jun 2009 19:09:45 -0400
Subject: tracing: add trace_seq_vprint interface

The code to update the print formats for events requires a vprintf
format in the trace_seq. This patch adds that interface.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h   |  2 ++
 kernel/trace/trace_output.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index ba9627f00d3..c68bccba207 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -27,6 +27,8 @@ trace_seq_init(struct trace_seq *s)
 #ifdef CONFIG_TRACING
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+	__attribute__ ((format (printf, 2, 0)));
 extern int
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 425725c1622..c05aff465dc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -100,6 +100,38 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (!len)
+		return 0;
+
+	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len)
+		return 0;
+
+	s->len += ret;
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
+
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
 	int len = (PAGE_SIZE - 1) - s->len;
-- 
cgit v1.2.3-70-g09d2


From fd6c3a8dc44329d3aff9a578b5120982f63711ee Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 12 Mar 2009 10:58:33 +0000
Subject: initconst adjustments

- add .init.rodata to INIT_DATA, and group all initconst flavors
  together
- move strings generated from __setup_param() into .init.rodata
- add .*init.rodata to modpost's sets of init sections
- make modpost warn about references between meminit and cpuinit
  as well as memexit and cpuexit sections (as CPU and memory
  hotplug are independently selectable features)

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/asm-generic/vmlinux.lds.h |  5 ++--
 include/linux/init.h              |  3 ++-
 scripts/mod/modpost.c             | 48 +++++++++++++++++++++++++++++++--------
 3 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 89853bcd27a..3edb1149974 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -336,10 +336,11 @@
 #define INIT_DATA							\
 	*(.init.data)							\
 	DEV_DISCARD(init.data)						\
-	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.data)						\
-	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.data)						\
+	*(.init.rodata)							\
+	DEV_DISCARD(init.rodata)					\
+	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)
 
 #define INIT_TEXT							\
diff --git a/include/linux/init.h b/include/linux/init.h
index 0e06c176f18..9f70c9f25d4 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -225,7 +225,8 @@ struct obs_kernel_param {
  * obs_kernel_param "array" too far apart in .init.setup.
  */
 #define __setup_param(str, unique_id, fn, early)			\
-	static char __setup_str_##unique_id[] __initdata __aligned(1) = str; \
+	static const char __setup_str_##unique_id[] __initconst	\
+		__aligned(1) = str; \
 	static struct obs_kernel_param __setup_##unique_id	\
 		__used __section(.init.setup)			\
 		__attribute__((aligned((sizeof(long)))))	\
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 161b7846733..94e71efb2c1 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -763,6 +763,8 @@ static void check_section(const char *modname, struct elf_info *elf,
 
 
 #define ALL_INIT_DATA_SECTIONS \
+	".init.setup$", ".init.rodata$", \
+	".devinit.rodata$", ".cpuinit.rodata$", ".meminit.rodata$" \
 	".init.data$", ".devinit.data$", ".cpuinit.data$", ".meminit.data$"
 #define ALL_EXIT_DATA_SECTIONS \
 	".exit.data$", ".devexit.data$", ".cpuexit.data$", ".memexit.data$"
@@ -772,21 +774,23 @@ static void check_section(const char *modname, struct elf_info *elf,
 #define ALL_EXIT_TEXT_SECTIONS \
 	".exit.text$", ".devexit.text$", ".cpuexit.text$", ".memexit.text$"
 
-#define ALL_INIT_SECTIONS ALL_INIT_DATA_SECTIONS, ALL_INIT_TEXT_SECTIONS
-#define ALL_EXIT_SECTIONS ALL_EXIT_DATA_SECTIONS, ALL_EXIT_TEXT_SECTIONS
+#define ALL_INIT_SECTIONS INIT_SECTIONS, DEV_INIT_SECTIONS, \
+	CPU_INIT_SECTIONS, MEM_INIT_SECTIONS
+#define ALL_EXIT_SECTIONS EXIT_SECTIONS, DEV_EXIT_SECTIONS, \
+	CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS
 
 #define DATA_SECTIONS ".data$", ".data.rel$"
 #define TEXT_SECTIONS ".text$"
 
-#define INIT_SECTIONS      ".init.data$", ".init.text$"
-#define DEV_INIT_SECTIONS  ".devinit.data$", ".devinit.text$"
-#define CPU_INIT_SECTIONS  ".cpuinit.data$", ".cpuinit.text$"
-#define MEM_INIT_SECTIONS  ".meminit.data$", ".meminit.text$"
+#define INIT_SECTIONS      ".init.*"
+#define DEV_INIT_SECTIONS  ".devinit.*"
+#define CPU_INIT_SECTIONS  ".cpuinit.*"
+#define MEM_INIT_SECTIONS  ".meminit.*"
 
-#define EXIT_SECTIONS      ".exit.data$", ".exit.text$"
-#define DEV_EXIT_SECTIONS  ".devexit.data$", ".devexit.text$"
-#define CPU_EXIT_SECTIONS  ".cpuexit.data$", ".cpuexit.text$"
-#define MEM_EXIT_SECTIONS  ".memexit.data$", ".memexit.text$"
+#define EXIT_SECTIONS      ".exit.*"
+#define DEV_EXIT_SECTIONS  ".devexit.*"
+#define CPU_EXIT_SECTIONS  ".cpuexit.*"
+#define MEM_EXIT_SECTIONS  ".memexit.*"
 
 /* init data sections */
 static const char *init_data_sections[] = { ALL_INIT_DATA_SECTIONS, NULL };
@@ -869,12 +873,36 @@ const struct sectioncheck sectioncheck[] = {
 	.tosec   = { INIT_SECTIONS, NULL },
 	.mismatch = XXXINIT_TO_INIT,
 },
+/* Do not reference cpuinit code/data from meminit code/data */
+{
+	.fromsec = { MEM_INIT_SECTIONS, NULL },
+	.tosec   = { CPU_INIT_SECTIONS, NULL },
+	.mismatch = XXXINIT_TO_INIT,
+},
+/* Do not reference meminit code/data from cpuinit code/data */
+{
+	.fromsec = { CPU_INIT_SECTIONS, NULL },
+	.tosec   = { MEM_INIT_SECTIONS, NULL },
+	.mismatch = XXXINIT_TO_INIT,
+},
 /* Do not reference exit code/data from devexit/cpuexit/memexit code/data */
 {
 	.fromsec = { DEV_EXIT_SECTIONS, CPU_EXIT_SECTIONS, MEM_EXIT_SECTIONS, NULL },
 	.tosec   = { EXIT_SECTIONS, NULL },
 	.mismatch = XXXEXIT_TO_EXIT,
 },
+/* Do not reference cpuexit code/data from memexit code/data */
+{
+	.fromsec = { MEM_EXIT_SECTIONS, NULL },
+	.tosec   = { CPU_EXIT_SECTIONS, NULL },
+	.mismatch = XXXEXIT_TO_EXIT,
+},
+/* Do not reference memexit code/data from cpuexit code/data */
+{
+	.fromsec = { CPU_EXIT_SECTIONS, NULL },
+	.tosec   = { MEM_EXIT_SECTIONS, NULL },
+	.mismatch = XXXEXIT_TO_EXIT,
+},
 /* Do not use exit code/data from init code */
 {
 	.fromsec = { ALL_INIT_SECTIONS, NULL },
-- 
cgit v1.2.3-70-g09d2


From ef53dae8658cf0e93d380983824a661067948d87 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 7 Jun 2009 20:46:37 +0200
Subject: Improve vmlinux.lds.h support for arch specific linker scripts

To support alingment of the individual architecture specific linker scripts
provide a set of general definitions in vmlinux.lds.h

With these definitions applied the diverse linekr scripts can be reduced
in line count and their readability are improved - IMO.

A sample linker script is included to give the preferred
order of the sections for the architectures that do not
have any special requirments.

These definitions are also a first step towards eventual
support for -ffunction-sections.
The definitions makes it much easier to do a global
renaming of section names - but the main purpose is
to clean up the linker scripts.

Tim Aboot has provided a lot of inputs to improve
the definitions - all faults are mine.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@mit.edu>
---
 include/asm-generic/vmlinux.lds.h | 231 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 224 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3edb1149974..fba42236e94 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -1,4 +1,58 @@
-#include <linux/section-names.h>
+/*
+ * Helper macros to support writing architecture specific
+ * linker scripts.
+ *
+ * A minimal linker scripts has following content:
+ * [This is a sample, architectures may have special requiriements]
+ *
+ * OUTPUT_FORMAT(...)
+ * OUTPUT_ARCH(...)
+ * ENTRY(...)
+ * SECTIONS
+ * {
+ *	. = START;
+ *	__init_begin = .;
+ *	HEAD_SECTION
+ *	INIT_TEXT_SECTION(PAGE_SIZE)
+ *	INIT_DATA_SECTION(...)
+ *	PERCPU(PAGE_SIZE)
+ *	__init_end = .;
+ *
+ *	_stext = .;
+ *	TEXT_SECTION = 0
+ *	_etext = .;
+ *
+ *      _sdata = .;
+ *	RO_DATA_SECTION(PAGE_SIZE)
+ *	RW_DATA_SECTION(...)
+ *	_edata = .;
+ *
+ *	EXCEPTION_TABLE(...)
+ *	NOTES
+ *
+ *	__bss_start = .;
+ *	BSS_SECTION(0, 0)
+ *	__bss_stop = .;
+ *	_end = .;
+ *
+ *	/DISCARD/ : {
+ *		EXIT_TEXT
+ *		EXIT_DATA
+ *		*(.exitcall.exit)
+ *	}
+ *	STABS_DEBUG
+ *	DWARF_DEBUG
+ * }
+ *
+ * [__init_begin, __init_end] is the init section that may be freed after init
+ * [_stext, _etext] is the text section
+ * [_sdata, _edata] is the data section
+ *
+ * Some of the included output section have their own set of constants.
+ * Examples are: [__initramfs_start, __initramfs_end] for initramfs and
+ *               [__nosave_begin, __nosave_end] for the nosave data
+ */
+ #include <linux/section-names.h>
 
 #ifndef LOAD_OFFSET
 #define LOAD_OFFSET 0
@@ -116,7 +170,36 @@
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()
 
-#define RO_DATA(align)							\
+/*
+ * Data section helpers
+ */
+#define NOSAVE_DATA							\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__nosave_begin) = .;				\
+	*(.data.nosave)							\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__nosave_end) = .;
+
+#define PAGE_ALIGNED_DATA(page_align)					\
+	. = ALIGN(page_align);						\
+	*(.data.page_aligned)
+
+#define READ_MOSTLY_DATA(align)						\
+	. = ALIGN(align);						\
+	*(.data.read_mostly)
+
+#define CACHELINE_ALIGNED_DATA(align)					\
+	. = ALIGN(align);						\
+	*(.data.cacheline_aligned)
+
+#define INIT_TASK(align)						\
+	. = ALIGN(align);						\
+	*(.data.init_task)
+
+/*
+ * Read only Data
+ */
+#define RO_DATA_SECTION(align)						\
 	. = ALIGN((align));						\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
@@ -270,9 +353,10 @@
 	}								\
 	. = ALIGN((align));
 
-/* RODATA provided for backward compatibility.
+/* RODATA & RO_DATA provided for backward compatibility.
  * All archs are supposed to use RO_DATA() */
-#define RODATA RO_DATA(4096)
+#define RODATA          RO_DATA_SECTION(4096)
+#define RO_DATA(align)  RO_DATA_SECTION(align)
 
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
@@ -332,6 +416,31 @@
 /* Section used for early init (in .S files) */
 #define HEAD_TEXT  *(HEAD_TEXT_SECTION)
 
+#define HEAD_SECTION							\
+	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {		\
+		HEAD_TEXT						\
+	}
+
+/*
+ * Exception table
+ */
+#define EXCEPTION_TABLE(align)						\
+	. = ALIGN(align);						\
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start___ex_table) = .;			\
+		*(__ex_table)						\
+		VMLINUX_SYMBOL(__stop___ex_table) = .;			\
+	}
+
+/*
+ * Init task
+ */
+#define INIT_TASK_DATA(align)						\
+	. = ALIGN(align);						\
+	.data.init_task : {						\
+		INIT_TASK						\
+	}
+
 /* init and exit section handling */
 #define INIT_DATA							\
 	*(.init.data)							\
@@ -364,9 +473,32 @@
 	CPU_DISCARD(exit.text)						\
 	MEM_DISCARD(exit.text)
 
-		/* DWARF debug sections.
-		Symbols in the DWARF debugging sections are relative to
-		the beginning of the section so we begin them at 0.  */
+/*
+ * bss (Block Started by Symbol) - uninitialized data
+ * zeroed during startup
+ */
+#define SBSS								\
+	.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {				\
+		*(.sbss)						\
+		*(.scommon)						\
+	}
+
+#define BSS(bss_align)							\
+	. = ALIGN(bss_align);						\
+	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {				\
+		VMLINUX_SYMBOL(__bss_start) = .;			\
+		*(.bss.page_aligned)					\
+		*(.dynbss)						\
+		*(.bss)							\
+		*(COMMON)						\
+		VMLINUX_SYMBOL(__bss_stop) = .;				\
+	}
+
+/*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to
+ * the beginning of the section so we begin them at 0.
+ */
 #define DWARF_DEBUG							\
 		/* DWARF 1 */						\
 		.debug          0 : { *(.debug) }			\
@@ -433,6 +565,12 @@
 		VMLINUX_SYMBOL(__stop_notes) = .;			\
 	}
 
+#define INIT_SETUP(initsetup_align)					\
+		. = ALIGN(initsetup_align);				\
+		VMLINUX_SYMBOL(__setup_start) = .;			\
+		*(.init.setup)						\
+		VMLINUX_SYMBOL(__setup_end) = .;
+
 #define INITCALLS							\
 	*(.initcallearly.init)						\
 	VMLINUX_SYMBOL(__early_initcall_end) = .;			\
@@ -454,6 +592,31 @@
   	*(.initcall7.init)						\
   	*(.initcall7s.init)
 
+#define INIT_CALLS							\
+		VMLINUX_SYMBOL(__initcall_start) = .;			\
+		INITCALLS						\
+		VMLINUX_SYMBOL(__initcall_end) = .;
+
+#define CON_INITCALL							\
+		VMLINUX_SYMBOL(__con_initcall_start) = .;		\
+		*(.con_initcall.init)					\
+		VMLINUX_SYMBOL(__con_initcall_end) = .;
+
+#define SECURITY_INITCALL						\
+		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
+		*(.security_initcall.init)				\
+		VMLINUX_SYMBOL(__security_initcall_end) = .;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#define INIT_RAM_FS							\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__initramfs_start) = .;				\
+	*(.init.ramfs)							\
+	VMLINUX_SYMBOL(__initramfs_end) = .;
+#else
+#define INITRAMFS
+#endif
+
 /**
  * PERCPU_VADDR - define output section for percpu area
  * @vaddr: explicit base address (optional)
@@ -510,3 +673,57 @@
 		*(.data.percpu.shared_aligned)				\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
 	}
+
+
+/*
+ * Definition of the high level *_SECTION macros
+ * They will fit only a subset of the architectures
+ */
+
+
+/*
+ * Writeable data.
+ * All sections are combined in a single .data section.
+ * The sections following CONSTRUCTORS are arranged so their
+ * typical alignment matches.
+ * A cacheline is typical/always less than a PAGE_SIZE so
+ * the sections that has this restriction (or similar)
+ * is located before the ones requiring PAGE_SIZE alignment.
+ * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which
+ * matches the requirment of PAGE_ALIGNED_DATA.
+ *
+/* use 0 as page_align if page_aligned data is not used */
+#define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask)	\
+	. = ALIGN(PAGE_SIZE);						\
+	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
+		INIT_TASK(inittask)					\
+		CACHELINE_ALIGNED_DATA(cacheline)			\
+		READ_MOSTLY_DATA(cacheline)				\
+		DATA_DATA						\
+		CONSTRUCTORS						\
+		NOSAVE_DATA(nosave)					\
+		PAGE_ALIGNED_DATA(pagealigned)				\
+	}
+
+#define INIT_TEXT_SECTION(inittext_align)				\
+	. = ALIGN(inittext_align);					\
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(_sinittext) = .;				\
+		INIT_TEXT						\
+		VMLINUX_SYMBOL(_einittext) = .;				\
+	}
+
+#define INIT_DATA_SECTION(initsetup_align)				\
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {		\
+		INIT_DATA						\
+		INIT_SETUP(initsetup_align)				\
+		INIT_CALLS						\
+		CON_INITCALL						\
+		SECURITY_INITCALL					\
+		INIT_RAM_FS						\
+	}
+
+#define BSS_SECTION(sbss_align, bss_align)				\
+	SBSS								\
+	BSS(bss_align)							\
+	. = ALIGN(4);							\
-- 
cgit v1.2.3-70-g09d2


From 1506e30b5f25f6c3357167a18f0e4ae6f5662a28 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Jun 2009 17:49:06 -0700
Subject: rfkill: include err.h

Since we use ERR_PTR and similar macros, we need to include
linux/err.h.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rfkill.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index ee3eddea856..d7e818ad0bc 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -105,6 +105,7 @@ enum rfkill_user_states {
 #include <linux/mutex.h>
 #include <linux/device.h>
 #include <linux/leds.h>
+#include <linux/err.h>
 
 /* this is opaque */
 struct rfkill;
-- 
cgit v1.2.3-70-g09d2


From cf9e4e15e8f6306b2559979269ead7c02e6b2b95 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:36 +0800
Subject: KVM: Split IOAPIC structure

Prepared for reuse ioapic_redir_entry for MSI.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_types.h | 17 +++++++++++++++++
 virt/kvm/ioapic.c         |  6 +++---
 virt/kvm/ioapic.h         | 17 +----------------
 3 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2b8318c83e5..b84aca3c4ad 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,4 +40,21 @@ typedef unsigned long  hfn_t;
 
 typedef hfn_t pfn_t;
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index c3b99def9cb..812801317e3 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 
 static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
-	union ioapic_redir_entry *pent;
+	union kvm_ioapic_redirect_entry *pent;
 	int injected = -1;
 
 	pent = &ioapic->redirtbl[idx];
@@ -284,7 +284,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
 	u32 old_irr = ioapic->irr;
 	u32 mask = 1 << irq;
-	union ioapic_redir_entry entry;
+	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
@@ -305,7 +305,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
 				    int trigger_mode)
 {
-	union ioapic_redir_entry *ent;
+	union kvm_ioapic_redirect_entry *ent;
 
 	ent = &ioapic->redirtbl[pin];
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index a34bd5e6436..008ec873d01 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -40,22 +40,7 @@ struct kvm_ioapic {
 	u32 id;
 	u32 irr;
 	u32 pad;
-	union ioapic_redir_entry {
-		u64 bits;
-		struct {
-			u8 vector;
-			u8 delivery_mode:3;
-			u8 dest_mode:1;
-			u8 delivery_status:1;
-			u8 polarity:1;
-			u8 remote_irr:1;
-			u8 trig_mode:1;
-			u8 mask:1;
-			u8 reserve:7;
-			u8 reserved[4];
-			u8 dest_id;
-		} fields;
-	} redirtbl[IOAPIC_NUM_PINS];
+	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
-- 
cgit v1.2.3-70-g09d2


From 116191b69b608d0f1513e3abe71d6a46800f2bd6 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 11 Feb 2009 16:03:37 +0800
Subject: KVM: Unify the delivery of IOAPIC and MSI interrupts

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  3 ++
 virt/kvm/ioapic.c        | 91 +++++++++++++++++-----------------------------
 virt/kvm/irq_comm.c      | 95 +++++++++++++++++++++++++++++-------------------
 3 files changed, 95 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 894a56e365e..1a2f98fbece 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -352,6 +352,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+				   union kvm_ioapic_redirect_entry *entry,
+				   unsigned long *deliver_bitmask);
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 812801317e3..883fd0dc9b7 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -203,79 +203,56 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
-	u8 dest = ioapic->redirtbl[irq].fields.dest_id;
-	u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
-	u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
-	u8 vector = ioapic->redirtbl[irq].fields.vector;
-	u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
-	u32 deliver_bitmask;
+	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
+	unsigned long deliver_bitmask;
 	struct kvm_vcpu *vcpu;
 	int vcpu_id, r = -1;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
-		     dest, dest_mode, delivery_mode, vector, trig_mode);
+		     entry.fields.dest, entry.fields.dest_mode,
+		     entry.fields.delivery_mode, entry.fields.vector,
+		     entry.fields.trig_mode);
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
-							  dest_mode);
+	kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask);
 	if (!deliver_bitmask) {
 		ioapic_debug("no target on destination\n");
 		return 0;
 	}
 
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
+	/* Always delivery PIT interrupt to vcpu 0 */
 #ifdef CONFIG_X86
-		if (irq == 0)
-			vcpu = ioapic->kvm->vcpus[0];
+	if (irq == 0)
+		deliver_bitmask = 1;
 #endif
-		if (vcpu != NULL)
-			r = ioapic_inj_irq(ioapic, vcpu, vector,
-				       trig_mode, delivery_mode);
-		else
-			ioapic_debug("null lowest prio vcpu: "
-				     "mask=%x vector=%x delivery_mode=%x\n",
-				     deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
-		break;
-	case IOAPIC_FIXED:
-#ifdef CONFIG_X86
-		if (irq == 0)
-			deliver_bitmask = 1;
-#endif
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
+
+	for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+		if (!(deliver_bitmask & (1 << vcpu_id)))
+			continue;
+		deliver_bitmask &= ~(1 << vcpu_id);
+		vcpu = ioapic->kvm->vcpus[vcpu_id];
+		if (vcpu) {
+			if (entry.fields.delivery_mode ==
+					IOAPIC_LOWEST_PRIORITY ||
+			    entry.fields.delivery_mode == IOAPIC_FIXED) {
 				if (r < 0)
 					r = 0;
-				r += ioapic_inj_irq(ioapic, vcpu, vector,
-					       trig_mode, delivery_mode);
-			}
-		}
-		break;
-	case IOAPIC_NMI:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				ioapic_inj_nmi(vcpu);
+				r += ioapic_inj_irq(ioapic, vcpu,
+						    entry.fields.vector,
+						    entry.fields.trig_mode,
+						    entry.fields.delivery_mode);
+			} else if (entry.fields.delivery_mode == IOAPIC_NMI) {
 				r = 1;
-			}
-			else
-				ioapic_debug("NMI to vcpu %d failed\n",
-						vcpu->vcpu_id);
-		}
-		break;
-	default:
-		printk(KERN_WARNING "Unsupported delivery mode %d\n",
-		       delivery_mode);
-		break;
+				ioapic_inj_nmi(vcpu);
+			} else
+				ioapic_debug("unsupported delivery mode %x!\n",
+					     entry.fields.delivery_mode);
+		} else
+			ioapic_debug("null destination vcpu: "
+				     "mask=%x vector=%x delivery_mode=%x\n",
+				     entry.fields.deliver_bitmask,
+				     entry.fields.vector,
+				     entry.fields.delivery_mode);
 	}
 	return r;
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 864ac5483ba..aec7a0d93a3 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -43,53 +43,74 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+				   union kvm_ioapic_redirect_entry *entry,
+				   unsigned long *deliver_bitmask)
+{
+	struct kvm_vcpu *vcpu;
+
+	*deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+				entry->fields.dest_id, entry->fields.dest_mode);
+	switch (entry->fields.delivery_mode) {
+	case IOAPIC_LOWEST_PRIORITY:
+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
+				entry->fields.vector, *deliver_bitmask);
+		*deliver_bitmask = 1 << vcpu->vcpu_id;
+		break;
+	case IOAPIC_FIXED:
+	case IOAPIC_NMI:
+		break;
+	default:
+		if (printk_ratelimit())
+			printk(KERN_INFO "kvm: unsupported delivery mode %d\n",
+				entry->fields.delivery_mode);
+		*deliver_bitmask = 0;
+	}
+}
+
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
 	int vcpu_id, r = -1;
 	struct kvm_vcpu *vcpu;
 	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-	int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK)
-			>> MSI_ADDR_DEST_ID_SHIFT;
-	int vector = (e->msi.data & MSI_DATA_VECTOR_MASK)
-			>> MSI_DATA_VECTOR_SHIFT;
-	int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-				(unsigned long *)&e->msi.address_lo);
-	int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-				(unsigned long *)&e->msi.data);
-	int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-				(unsigned long *)&e->msi.data);
-	u32 deliver_bitmask;
+	union kvm_ioapic_redirect_entry entry;
+	unsigned long deliver_bitmask;
 
 	BUG_ON(!ioapic);
 
-	deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-				dest_id, dest_mode);
-	/* IOAPIC delivery mode value is the same as MSI here */
-	switch (delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
-				deliver_bitmask);
-		if (vcpu != NULL)
-			r = kvm_apic_set_irq(vcpu, vector, trig_mode);
-		else
-			printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
-		break;
-	case IOAPIC_FIXED:
-		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-			if (!(deliver_bitmask & (1 << vcpu_id)))
-				continue;
-			deliver_bitmask &= ~(1 << vcpu_id);
-			vcpu = ioapic->kvm->vcpus[vcpu_id];
-			if (vcpu) {
-				if (r < 0)
-					r = 0;
-				r += kvm_apic_set_irq(vcpu, vector, trig_mode);
-			}
+	entry.bits = 0;
+	entry.fields.dest_id = (e->msi.address_lo &
+			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	entry.fields.vector = (e->msi.data &
+			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+	entry.fields.dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+			(unsigned long *)&e->msi.address_lo);
+	entry.fields.trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+			(unsigned long *)&e->msi.data);
+	entry.fields.delivery_mode = test_bit(
+			MSI_DATA_DELIVERY_MODE_SHIFT,
+			(unsigned long *)&e->msi.data);
+
+	/* TODO Deal with RH bit of MSI message address */
+
+	kvm_get_intr_delivery_bitmask(ioapic, &entry, &deliver_bitmask);
+
+	if (!deliver_bitmask) {
+		printk(KERN_WARNING "kvm: no destination for MSI delivery!");
+		return -1;
+	}
+	for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+		if (!(deliver_bitmask & (1 << vcpu_id)))
+			continue;
+		deliver_bitmask &= ~(1 << vcpu_id);
+		vcpu = ioapic->kvm->vcpus[vcpu_id];
+		if (vcpu) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, entry.fields.vector,
+					      entry.fields.trig_mode);
 		}
-		break;
-	default:
-		break;
 	}
 	return r;
 }
-- 
cgit v1.2.3-70-g09d2


From c1e01514296e8a4a43ff0c88dcff635cb90feb5f Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Feb 2009 17:22:26 +0800
Subject: KVM: Ioctls for init MSI-X entry

Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls.

This two ioctls are used by userspace to specific guest device MSI-X entry
number and correlate MSI-X entry with GSI during the initialization stage.

MSI-X should be well initialzed before enabling.

Don't support change MSI-X entry number for now.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h      |  18 ++++++++
 include/linux/kvm_host.h |  10 +++++
 virt/kvm/kvm_main.c      | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 132 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8cc137911b3..78cdee8c635 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -487,6 +487,10 @@ struct kvm_irq_routing {
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
 #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
 				     struct kvm_assigned_pci_dev)
+#define KVM_ASSIGN_SET_MSIX_NR \
+			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
+#define KVM_ASSIGN_SET_MSIX_ENTRY \
+			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 
 /*
  * ioctls for vcpu fds
@@ -607,4 +611,18 @@ struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+struct kvm_assigned_msix_nr {
+	__u32 assigned_dev_id;
+	__u16 entry_nr;
+	__u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV		512
+struct kvm_assigned_msix_entry {
+	__u32 assigned_dev_id;
+	__u32 gsi;
+	__u16 entry; /* The index of entry in the MSI-X table */
+	__u16 padding[3];
+};
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1a2f98fbece..432edc27e82 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,6 +319,12 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+struct kvm_guest_msix_entry {
+	u32 vector;
+	u16 entry;
+	u16 flags;
+};
+
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
 	struct work_struct interrupt_work;
@@ -326,13 +332,17 @@ struct kvm_assigned_dev_kernel {
 	int assigned_dev_id;
 	int host_busnr;
 	int host_devfn;
+	unsigned int entries_nr;
 	int host_irq;
 	bool host_irq_disabled;
+	struct msix_entry *host_msix_entries;
 	int guest_irq;
+	struct kvm_guest_msix_entry *guest_msix_entries;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
 #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
+#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d0dd390aa5..1ceb96901f3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1593,6 +1593,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 	return 0;
 }
 
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
+			r = -EINVAL;
+			goto msix_nr_out;
+		}
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries = kzalloc(
+				sizeof(struct kvm_guest_msix_entry) *
+				entry_nr->entry_nr, GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else /* Not allowed set MSI-X number twice */
+		r = -EINVAL;
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+#endif
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -1917,7 +1999,29 @@ static long kvm_vm_ioctl(struct file *filp,
 		vfree(entries);
 		break;
 	}
+#ifdef __KVM_HAVE_MSIX
+	case KVM_ASSIGN_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
+#endif /* KVM_CAP_IRQ_ROUTING */
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
-- 
cgit v1.2.3-70-g09d2


From 2350bd1f62c8706c22b8e58c3bfff10806c0a31b Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Feb 2009 17:22:27 +0800
Subject: KVM: Add MSI-X interrupt injection logic

We have to handle more than one interrupt with one handler for MSI-X. Avi
suggested to use a flag to indicate the pending. So here is it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 66 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 60 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 432edc27e82..3832243625d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,6 +319,7 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+#define KVM_ASSIGNED_MSIX_PENDING		0x1
 struct kvm_guest_msix_entry {
 	u32 vector;
 	u16 entry;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ceb96901f3..8bd44d6985c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,25 +95,69 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
 	return NULL;
 }
 
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+				    *assigned_dev, int irq)
+{
+	int i, index;
+	struct msix_entry *host_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+
+	index = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == host_msix_entries[i].vector) {
+			index = i;
+			break;
+		}
+	if (index < 0) {
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+		return 0;
+	}
+
+	return index;
+}
+
 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev;
+	struct kvm *kvm;
+	int irq, i;
 
 	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
 				    interrupt_work);
+	kvm = assigned_dev->kvm;
 
 	/* This is taken to safely inject irq inside the guest. When
 	 * the interrupt injection (or the ioapic code) uses a
 	 * finer-grained lock, update this
 	 */
-	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
-
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
-		enable_irq(assigned_dev->host_irq);
-		assigned_dev->host_irq_disabled = false;
+	mutex_lock(&kvm->lock);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		struct kvm_guest_msix_entry *guest_entries =
+			assigned_dev->guest_msix_entries;
+		for (i = 0; i < assigned_dev->entries_nr; i++) {
+			if (!(guest_entries[i].flags &
+					KVM_ASSIGNED_MSIX_PENDING))
+				continue;
+			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
+			kvm_set_irq(assigned_dev->kvm,
+				    assigned_dev->irq_source_id,
+				    guest_entries[i].vector, 1);
+			irq = assigned_dev->host_msix_entries[i].vector;
+			if (irq != 0)
+				enable_irq(irq);
+			assigned_dev->host_irq_disabled = false;
+		}
+	} else {
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+		if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_GUEST_MSI) {
+			enable_irq(assigned_dev->host_irq);
+			assigned_dev->host_irq_disabled = false;
+		}
 	}
+
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
@@ -122,6 +166,14 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+		int index = find_index_from_host_irq(assigned_dev, irq);
+		if (index < 0)
+			return IRQ_HANDLED;
+		assigned_dev->guest_msix_entries[index].flags |=
+			KVM_ASSIGNED_MSIX_PENDING;
+	}
+
 	schedule_work(&assigned_dev->interrupt_work);
 
 	disable_irq_nosync(irq);
-- 
cgit v1.2.3-70-g09d2


From d510d6cc653bc4b3094ea73afe12600d0ab445b3 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 25 Feb 2009 17:22:28 +0800
Subject: KVM: Enable MSI-X for KVM assigned device

This patch finally enable MSI-X.

What we need for MSI-X:
1. Intercept one page in MMIO region of device. So that we can get guest desired
MSI-X table and set up the real one. Now this have been done by guest, and
transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY.

2. Information for incoming interrupt. Now one device can have more than one
interrupt, and they are all handled by one workqueue structure. So we need to
identify them. The previous patch enable gsi_msg_pending_bitmap get this done.

3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X
message address/data. We used same entry number for the host and guest here, so
that it's easy to find the correlated guest gsi.

What we lack for now:
1. The PCI spec said nothing can existed with MSI-X table in the same page of
MMIO region, except pending bits. The patch ignore pending bits as the first
step (so they are always 0 - no pending).

2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS
can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch
didn't support this, and Linux also don't work in this way.

3. The patch didn't implement MSI-X mask all and mask single entry. I would
implement the former in driver/pci/msi.c later. And for single entry, userspace
should have reposibility to handle it.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h |  1 +
 include/linux/kvm.h        |  8 ++++
 virt/kvm/kvm_main.c        | 98 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 101 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index dc3f6cf1170..125be8b1956 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -16,6 +16,7 @@
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_MSIX
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 78cdee8c635..640835ed270 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -409,6 +409,9 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
 #endif
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_DEVICE_MSIX 28
+#endif
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 
@@ -611,6 +614,11 @@ struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION  (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
+					KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX  (1 << 1)
+#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX    (1 << 2)
+
 struct kvm_assigned_msix_nr {
 	__u32 assigned_dev_id;
 	__u16 entry_nr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8bd44d6985c..3bed82754a5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -236,13 +236,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	 * now, the kvm state is still legal for probably we also have to wait
 	 * interrupt_work done.
 	 */
-	disable_irq_nosync(assigned_dev->host_irq);
-	cancel_work_sync(&assigned_dev->interrupt_work);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq_nosync(assigned_dev->
+					   host_msix_entries[i].vector);
+
+		cancel_work_sync(&assigned_dev->interrupt_work);
 
-	free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 (void *)assigned_dev);
 
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		pci_disable_msi(assigned_dev->dev);
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		disable_irq_nosync(assigned_dev->host_irq);
+		cancel_work_sync(&assigned_dev->interrupt_work);
+
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+		if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
 
 	assigned_dev->irq_requested_type = 0;
 }
@@ -373,6 +393,60 @@ static int assigned_device_update_msi(struct kvm *kvm,
 }
 #endif
 
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_update_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+	int i, r;
+
+	adev->ack_notifier.gsi = -1;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+			return -ENOTTY;
+
+		if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
+			/* Guest disable MSI-X */
+			kvm_free_assigned_irq(kvm, adev);
+			if (msi2intx) {
+				pci_enable_msi(adev->dev);
+				if (adev->dev->msi_enabled)
+					return assigned_device_update_msi(kvm,
+							adev, airq);
+			}
+			return assigned_device_update_intx(kvm, adev, airq);
+		}
+
+		/* host_msix_entries and guest_msix_entries should have been
+		 * initialized */
+		if (adev->entries_nr == 0)
+			return -EINVAL;
+
+		kvm_free_assigned_irq(kvm, adev);
+
+		r = pci_enable_msix(adev->dev, adev->host_msix_entries,
+				    adev->entries_nr);
+		if (r)
+			return r;
+
+		for (i = 0; i < adev->entries_nr; i++) {
+			r = request_irq((adev->host_msix_entries + i)->vector,
+					kvm_assigned_dev_intr, 0,
+					"kvm_assigned_msix_device",
+					(void *)adev);
+			if (r)
+				return r;
+		}
+	}
+
+	adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+
+	return 0;
+}
+#endif
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -417,12 +491,24 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		}
 	}
 
-	if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+	if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
+		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
+	else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
 		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
 		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
 
 	changed_flags = assigned_irq->flags ^ current_flags;
 
+#ifdef __KVM_HAVE_MSIX
+	if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
+		r = assigned_device_update_msix(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to execute "
+					"MSI-X action!\n");
+			goto out_release;
+		}
+	} else
+#endif
 	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
 	    (msi2intx && match->dev->msi_enabled)) {
 #ifdef CONFIG_X86
-- 
cgit v1.2.3-70-g09d2


From b95b51d580bff9376850eef29d34c3aa08c26db7 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 26 Feb 2009 13:55:33 +0100
Subject: KVM: declare ioapic functions only on affected hardware

Since "KVM: Unify the delivery of IOAPIC and MSI interrupts"
I get the following warnings:

  CC [M]  arch/s390/kvm/kvm-s390.o
In file included from arch/s390/kvm/kvm-s390.c:22:
include/linux/kvm_host.h:357: warning: 'struct kvm_ioapic' declared inside parameter list
include/linux/kvm_host.h:357: warning: its scope is only this definition or declaration, which is probably not what you want

This patch limits IOAPIC functions for architectures that have one.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3832243625d..3b91ec9982c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -363,9 +363,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
+#ifdef __KVM_HAVE_IOAPIC
 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
+#endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
-- 
cgit v1.2.3-70-g09d2


From a53c17d21c46a752f5ac6695376481bc27865b04 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:34:49 +0200
Subject: KVM: ioapic/msi interrupt delivery consolidation

ioapic_deliver() and kvm_set_msi() have code duplication. Move
the code into ioapic_deliver_entry() function and call it from
both places.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/ioapic.c        | 61 +++++++++++++++++++++++++-----------------------
 virt/kvm/ioapic.h        |  4 ++--
 virt/kvm/irq_comm.c      | 32 +++----------------------
 4 files changed, 38 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3b91ec9982c..ec9d078b1e8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -364,7 +364,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
 #ifdef __KVM_HAVE_IOAPIC
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index d4a7948b010..b71c0442cec 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -142,54 +142,57 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	}
 }
 
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e)
 {
-	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
 	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-	struct kvm_vcpu *vcpu;
-	int vcpu_id, r = -1;
+	int i, r = -1;
 
-	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-		     "vector=%x trig_mode=%x\n",
-		     entry.fields.dest, entry.fields.dest_mode,
-		     entry.fields.delivery_mode, entry.fields.vector,
-		     entry.fields.trig_mode);
-
-	/* Always delivery PIT interrupt to vcpu 0 */
-#ifdef CONFIG_X86
-	if (irq == 0) {
-                bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-		__set_bit(0, deliver_bitmask);
-        } else
-#endif
-		kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
+	kvm_get_intr_delivery_bitmask(kvm, e, deliver_bitmask);
 
 	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
 		ioapic_debug("no target on destination\n");
-		return 0;
+		return r;
 	}
 
-	while ((vcpu_id = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
+	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
 			< KVM_MAX_VCPUS) {
-		__clear_bit(vcpu_id, deliver_bitmask);
-		vcpu = ioapic->kvm->vcpus[vcpu_id];
+		struct kvm_vcpu *vcpu = kvm->vcpus[i];
+		__clear_bit(i, deliver_bitmask);
 		if (vcpu) {
 			if (r < 0)
 				r = 0;
-			r += kvm_apic_set_irq(vcpu,
-					entry.fields.vector,
-					entry.fields.trig_mode,
-					entry.fields.delivery_mode);
+			r += kvm_apic_set_irq(vcpu, e->fields.vector,
+					e->fields.delivery_mode,
+					e->fields.trig_mode);
 		} else
 			ioapic_debug("null destination vcpu: "
 				     "mask=%x vector=%x delivery_mode=%x\n",
-				     entry.fields.deliver_bitmask,
-				     entry.fields.vector,
-				     entry.fields.delivery_mode);
+				     e->fields.deliver_bitmask,
+				     e->fields.vector, e->fields.delivery_mode);
 	}
 	return r;
 }
 
+static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
+
+	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+		     "vector=%x trig_mode=%x\n",
+		     entry.fields.dest, entry.fields.dest_mode,
+		     entry.fields.delivery_mode, entry.fields.vector,
+		     entry.fields.trig_mode);
+
+#ifdef CONFIG_X86
+	/* Always delivery PIT interrupt to vcpu 0 */
+	if (irq == 0) {
+		entry.fields.dest_mode = 0; /* Physical mode. */
+		entry.fields.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
+	}
+#endif
+	return ioapic_deliver_entry(ioapic->kvm, &entry);
+}
+
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
 	u32 old_irr = ioapic->irr;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index c8032ab2a4e..bedeea59cc1 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,8 +70,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
-
+int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e);
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 325c6685f20..35397a569b2 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -43,12 +43,11 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask)
 {
 	int i;
-	struct kvm *kvm = ioapic->kvm;
 	struct kvm_vcpu *vcpu;
 
 	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
@@ -90,7 +89,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 	switch (entry->fields.delivery_mode) {
 	case IOAPIC_LOWEST_PRIORITY:
 		/* Select one in deliver_bitmask */
-		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm,
+		vcpu = kvm_get_lowest_prio_vcpu(kvm,
 				entry->fields.vector, deliver_bitmask);
 		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 		if (!vcpu)
@@ -111,13 +110,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
-	int vcpu_id, r = -1;
-	struct kvm_vcpu *vcpu;
-	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
 	union kvm_ioapic_redirect_entry entry;
-	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-
-	BUG_ON(!ioapic);
 
 	entry.bits = 0;
 	entry.fields.dest_id = (e->msi.address_lo &
@@ -133,26 +126,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 			(unsigned long *)&e->msi.data);
 
 	/* TODO Deal with RH bit of MSI message address */
-
-	kvm_get_intr_delivery_bitmask(ioapic, &entry, deliver_bitmask);
-
-	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
-		printk(KERN_WARNING "kvm: no destination for MSI delivery!");
-		return -1;
-	}
-	while ((vcpu_id = find_first_bit(deliver_bitmask,
-					KVM_MAX_VCPUS)) < KVM_MAX_VCPUS) {
-		__clear_bit(vcpu_id, deliver_bitmask);
-		vcpu = ioapic->kvm->vcpus[vcpu_id];
-		if (vcpu) {
-			if (r < 0)
-				r = 0;
-			r += kvm_apic_set_irq(vcpu, entry.fields.vector,
-					      entry.fields.dest_mode,
-					      entry.fields.trig_mode);
-		}
-	}
-	return r;
+	return ioapic_deliver_entry(kvm, &entry);
 }
 
 /* This should be called with the kvm->lock mutex held
-- 
cgit v1.2.3-70-g09d2


From 343f94fe4d16ec898da77720c03da9e09f8523d2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:34:54 +0200
Subject: KVM: consolidate ioapic/ipi interrupt delivery logic

Use kvm_apic_match_dest() in kvm_get_intr_delivery_bitmask() instead
of duplicating the same code. Use kvm_get_intr_delivery_bitmask() in
apic_send_ipi() to figure out ipi destination instead of reimplementing
the logic.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |  8 ++++++
 arch/ia64/kvm/lapic.h    |  3 ++
 arch/x86/kvm/lapic.c     | 69 +++++++++++++++++---------------------------
 arch/x86/kvm/lapic.h     |  2 ++
 include/linux/kvm_host.h |  5 ----
 virt/kvm/ioapic.c        |  5 +++-
 virt/kvm/ioapic.h        | 10 ++++---
 virt/kvm/irq_comm.c      | 74 ++++++++++++++----------------------------------
 8 files changed, 70 insertions(+), 106 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 99d6d174d93..8eea9cba7b7 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1852,6 +1852,14 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
 	return lvcpu;
 }
 
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode)
+{
+	return (dest_mode == 0) ?
+		kvm_apic_match_physical_addr(target, dest) :
+		kvm_apic_match_logical_addr(target, dest);
+}
+
 static int find_highest_bits(int *dat)
 {
 	u32  bits, bitnum;
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index cbcfaa6195c..31602e7338d 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -20,6 +20,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
+bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a42f968a23e..998862a3c26 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -260,7 +260,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
 {
-	return kvm_apic_id(apic) == dest;
+	return dest == 0xff || kvm_apic_id(apic) == dest;
 }
 
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
@@ -289,37 +289,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 	return result;
 }
 
-static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
 
 	apic_debug("target %p, source %p, dest 0x%x, "
-		   "dest_mode 0x%x, short_hand 0x%x",
+		   "dest_mode 0x%x, short_hand 0x%x\n",
 		   target, source, dest, dest_mode, short_hand);
 
 	ASSERT(!target);
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
-		if (dest_mode == 0) {
+		if (dest_mode == 0)
 			/* Physical mode. */
-			if ((dest == 0xFF) || (dest == kvm_apic_id(target)))
-				result = 1;
-		} else
+			result = kvm_apic_match_physical_addr(target, dest);
+		else
 			/* Logical mode. */
 			result = kvm_apic_match_logical_addr(target, dest);
 		break;
 	case APIC_DEST_SELF:
-		if (target == source)
-			result = 1;
+		result = (target == source);
 		break;
 	case APIC_DEST_ALLINC:
 		result = 1;
 		break;
 	case APIC_DEST_ALLBUT:
-		if (target != source)
-			result = 1;
+		result = (target != source);
 		break;
 	default:
 		printk(KERN_WARNING "Bad dest shorthand value %x\n",
@@ -492,38 +489,26 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
 	unsigned int vector = icr_low & APIC_VECTOR_MASK;
 
-	struct kvm_vcpu *target;
-	struct kvm_vcpu *vcpu;
-	DECLARE_BITMAP(lpr_map, KVM_MAX_VCPUS);
+	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 	int i;
 
-	bitmap_zero(lpr_map, KVM_MAX_VCPUS);
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
 		   icr_high, icr_low, short_hand, dest,
 		   trig_mode, level, dest_mode, delivery_mode, vector);
 
-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
-		vcpu = apic->vcpu->kvm->vcpus[i];
-		if (!vcpu)
-			continue;
-
-		if (vcpu->arch.apic &&
-		    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
-			if (delivery_mode == APIC_DM_LOWEST)
-				__set_bit(vcpu->vcpu_id, lpr_map);
-			else
-				__apic_accept_irq(vcpu->arch.apic, delivery_mode,
-						  vector, level, trig_mode);
-		}
-	}
-
-	if (delivery_mode == APIC_DM_LOWEST) {
-		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
-		if (target != NULL)
-			__apic_accept_irq(target->arch.apic, delivery_mode,
-					  vector, level, trig_mode);
+	kvm_get_intr_delivery_bitmask(apic->vcpu->kvm, apic, dest, dest_mode,
+			delivery_mode == APIC_DM_LOWEST, short_hand,
+			deliver_bitmask);
+
+	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
+			< KVM_MAX_VCPUS) {
+		struct kvm_vcpu *vcpu = apic->vcpu->kvm->vcpus[i];
+		__clear_bit(i, deliver_bitmask);
+		if (vcpu)
+			__apic_accept_irq(vcpu->arch.apic, delivery_mode,
+					vector, level, trig_mode);
 	}
 }
 
@@ -930,16 +915,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_reset);
 
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+bool kvm_apic_present(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
-	int ret = 0;
-
-	if (!apic)
-		return 0;
-	ret = apic_enabled(apic);
+	return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
+}
 
-	return ret;
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1b0e3c03cb3..b66dc14a969 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -37,6 +37,8 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
+bool kvm_apic_present(struct kvm_vcpu *vcpu);
+bool kvm_lapic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec9d078b1e8..fb60f31c4fb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -363,11 +363,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
-#ifdef __KVM_HAVE_IOAPIC
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask);
-#endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index b71c0442cec..43969bbf127 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -147,7 +147,10 @@ int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e)
 	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 	int i, r = -1;
 
-	kvm_get_intr_delivery_bitmask(kvm, e, deliver_bitmask);
+	kvm_get_intr_delivery_bitmask(kvm, NULL, e->fields.dest_id,
+			e->fields.dest_mode,
+			e->fields.delivery_mode == IOAPIC_LOWEST_PRIORITY,
+			0, deliver_bitmask);
 
 	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
 		ioapic_debug("no target on destination\n");
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index bedeea59cc1..d996c7abc46 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -65,13 +65,15 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-				       unsigned long *bitmap);
+		unsigned long *bitmap);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, int dest, int dest_mode);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask);
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
+		int dest_id, int dest_mode, bool low_prio, int short_hand,
+		unsigned long *deliver_bitmask);
 int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e);
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 35397a569b2..e43701c0a5c 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -43,67 +43,35 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm,
-				   union kvm_ioapic_redirect_entry *entry,
-				   unsigned long *deliver_bitmask)
+void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
+		int dest_id, int dest_mode, bool low_prio, int short_hand,
+		unsigned long *deliver_bitmask)
 {
 	int i;
 	struct kvm_vcpu *vcpu;
 
+	if (dest_mode == 0 && dest_id == 0xff && low_prio)
+		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+
 	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		vcpu = kvm->vcpus[i];
 
-	if (entry->fields.dest_mode == 0) {	/* Physical mode. */
-		if (entry->fields.dest_id == 0xFF) {	/* Broadcast. */
-			for (i = 0; i < KVM_MAX_VCPUS; ++i)
-				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
-					__set_bit(i, deliver_bitmask);
-			/* Lowest priority shouldn't combine with broadcast */
-			if (entry->fields.delivery_mode ==
-			    IOAPIC_LOWEST_PRIORITY && printk_ratelimit())
-				printk(KERN_INFO "kvm: apic: phys broadcast "
-						  "and lowest prio\n");
-			return;
-		}
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (kvm_apic_match_physical_addr(vcpu->arch.apic,
-					entry->fields.dest_id)) {
-				if (vcpu->arch.apic)
-					__set_bit(i, deliver_bitmask);
-				break;
-			}
-		}
-	} else if (entry->fields.dest_id != 0) /* Logical mode, MDA non-zero. */
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
-			if (vcpu->arch.apic &&
-			    kvm_apic_match_logical_addr(vcpu->arch.apic,
-					entry->fields.dest_id))
-				__set_bit(i, deliver_bitmask);
-		}
+		if (!vcpu || !kvm_apic_present(vcpu))
+			continue;
 
-	switch (entry->fields.delivery_mode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		/* Select one in deliver_bitmask */
-		vcpu = kvm_get_lowest_prio_vcpu(kvm,
-				entry->fields.vector, deliver_bitmask);
-		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
-		if (!vcpu)
-			return;
-		__set_bit(vcpu->vcpu_id, deliver_bitmask);
-		break;
-	case IOAPIC_FIXED:
-	case IOAPIC_NMI:
-		break;
-	default:
-		if (printk_ratelimit())
-			printk(KERN_INFO "kvm: unsupported delivery mode %d\n",
-				entry->fields.delivery_mode);
+		if (!kvm_apic_match_dest(vcpu, src, short_hand, dest_id,
+					dest_mode))
+			continue;
+
+		__set_bit(i, deliver_bitmask);
+	}
+
+	if (low_prio) {
+		vcpu = kvm_get_lowest_prio_vcpu(kvm, 0, deliver_bitmask);
 		bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
+		if (vcpu)
+			__set_bit(vcpu->vcpu_id, deliver_bitmask);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 58c2dde17d6eb6c8c0566e52d184aa16755d890f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 5 Mar 2009 16:35:04 +0200
Subject: KVM: APIC: get rid of deliver_bitmask

Deliver interrupt during destination matching loop.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  | 33 ++++++++++++----------
 arch/ia64/kvm/lapic.h     |  4 +--
 arch/x86/kvm/lapic.c      | 59 +++++++++++----------------------------
 arch/x86/kvm/lapic.h      |  3 +-
 include/linux/kvm_types.h | 10 +++++++
 virt/kvm/ioapic.c         | 57 +++++++++++--------------------------
 virt/kvm/ioapic.h         |  6 ++--
 virt/kvm/irq_comm.c       | 71 ++++++++++++++++++++++++++++-------------------
 8 files changed, 108 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 1887a93a2bd..acf43ec4270 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -283,6 +283,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 }
 
+static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
+{
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vector, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		kvm_vcpu_kick(vcpu);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  *  offset: address offset to IPI space.
  *  value:  deliver value.
@@ -292,20 +304,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
 {
 	switch (dm) {
 	case SAPIC_FIXED:
-		kvm_apic_set_irq(vcpu, vector, dm, 0);
 		break;
 	case SAPIC_NMI:
-		kvm_apic_set_irq(vcpu, 2, dm, 0);
+		vector = 2;
 		break;
 	case SAPIC_EXTINT:
-		kvm_apic_set_irq(vcpu, 0, dm, 0);
+		vector = 0;
 		break;
 	case SAPIC_INIT:
 	case SAPIC_PMI:
 	default:
 		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		break;
+		return;
 	}
+	__apic_accept_irq(vcpu, vector);
 }
 
 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
@@ -1813,17 +1825,9 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 	put_cpu();
 }
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
-
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vec, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
+	return __apic_accept_irq(vcpu, irq->vector);
 }
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
@@ -1844,6 +1848,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode)
 {
+	struct kvm_lapic *target = vcpu->arch.apic;
 	return (dest_mode == 0) ?
 		kvm_apic_match_physical_addr(target, dest) :
 		kvm_apic_match_logical_addr(target, dest);
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index e42109e6ca4..ee541cebcd7 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -23,7 +23,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		int short_hand, int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-bool kvm_apic_present(struct kvm_vcpu *vcpu);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+#define kvm_apic_present(x) (true)
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 814466f455d..dd934d27040 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -199,27 +199,12 @@ EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			     int vector, int level, int trig_mode);
 
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	int lapic_dmode;
 
-	switch (dmode) {
-	case IOAPIC_LOWEST_PRIORITY:
-		lapic_dmode = APIC_DM_LOWEST;
-		break;
-	case IOAPIC_FIXED:
-		lapic_dmode = APIC_DM_FIXED;
-		break;
-	case IOAPIC_NMI:
-		lapic_dmode = APIC_DM_NMI;
-		break;
-	default:
-		printk(KERN_DEBUG"Ignoring delivery mode %d\n", dmode);
-		return 0;
-		break;
-	}
-	return __apic_accept_irq(apic, lapic_dmode, vec, 1, trig);
+	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
+			irq->level, irq->trig_mode);
 }
 
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
@@ -447,36 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 {
 	u32 icr_low = apic_get_reg(apic, APIC_ICR);
 	u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+	struct kvm_lapic_irq irq;
 
-	unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
-	unsigned int short_hand = icr_low & APIC_SHORT_MASK;
-	unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
-	unsigned int level = icr_low & APIC_INT_ASSERT;
-	unsigned int dest_mode = icr_low & APIC_DEST_MASK;
-	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
-	unsigned int vector = icr_low & APIC_VECTOR_MASK;
-
-	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-	int i;
+	irq.vector = icr_low & APIC_VECTOR_MASK;
+	irq.delivery_mode = icr_low & APIC_MODE_MASK;
+	irq.dest_mode = icr_low & APIC_DEST_MASK;
+	irq.level = icr_low & APIC_INT_ASSERT;
+	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
+	irq.shorthand = icr_low & APIC_SHORT_MASK;
+	irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
-		   icr_high, icr_low, short_hand, dest,
-		   trig_mode, level, dest_mode, delivery_mode, vector);
-
-	kvm_get_intr_delivery_bitmask(apic->vcpu->kvm, apic, dest, dest_mode,
-			delivery_mode == APIC_DM_LOWEST, short_hand,
-			deliver_bitmask);
-
-	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
-			< KVM_MAX_VCPUS) {
-		struct kvm_vcpu *vcpu = apic->vcpu->kvm->vcpus[i];
-		__clear_bit(i, deliver_bitmask);
-		if (vcpu)
-			__apic_accept_irq(vcpu->arch.apic, delivery_mode,
-					vector, level, trig_mode);
-	}
+		   icr_high, icr_low, irq.shorthand, irq.dest,
+		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
+		   irq.vector);
+
+	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index b66dc14a969..a587f8349c4 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,14 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 dmode, u8 trig);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
 bool kvm_apic_present(struct kvm_vcpu *vcpu);
-bool kvm_lapic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b84aca3c4ad..fb46efbeabe 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -57,4 +57,14 @@ union kvm_ioapic_redirect_entry {
 	} fields;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 43969bbf127..1eddae94bab 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -142,58 +142,33 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	}
 }
 
-int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e)
-{
-	DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
-	int i, r = -1;
-
-	kvm_get_intr_delivery_bitmask(kvm, NULL, e->fields.dest_id,
-			e->fields.dest_mode,
-			e->fields.delivery_mode == IOAPIC_LOWEST_PRIORITY,
-			0, deliver_bitmask);
-
-	if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) >= KVM_MAX_VCPUS) {
-		ioapic_debug("no target on destination\n");
-		return r;
-	}
-
-	while ((i = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
-			< KVM_MAX_VCPUS) {
-		struct kvm_vcpu *vcpu = kvm->vcpus[i];
-		__clear_bit(i, deliver_bitmask);
-		if (vcpu) {
-			if (r < 0)
-				r = 0;
-			r += kvm_apic_set_irq(vcpu, e->fields.vector,
-					e->fields.delivery_mode,
-					e->fields.trig_mode);
-		} else
-			ioapic_debug("null destination vcpu: "
-				     "mask=%x vector=%x delivery_mode=%x\n",
-				     e->fields.deliver_bitmask,
-				     e->fields.vector, e->fields.delivery_mode);
-	}
-	return r;
-}
-
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
-	union kvm_ioapic_redirect_entry entry = ioapic->redirtbl[irq];
+	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+	struct kvm_lapic_irq irqe;
 
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
-		     entry.fields.dest, entry.fields.dest_mode,
-		     entry.fields.delivery_mode, entry.fields.vector,
-		     entry.fields.trig_mode);
+		     entry->fields.dest, entry->fields.dest_mode,
+		     entry->fields.delivery_mode, entry->fields.vector,
+		     entry->fields.trig_mode);
+
+	irqe.dest_id = entry->fields.dest_id;
+	irqe.vector = entry->fields.vector;
+	irqe.dest_mode = entry->fields.dest_mode;
+	irqe.trig_mode = entry->fields.trig_mode;
+	irqe.delivery_mode = entry->fields.delivery_mode << 8;
+	irqe.level = 1;
+	irqe.shorthand = 0;
 
 #ifdef CONFIG_X86
 	/* Always delivery PIT interrupt to vcpu 0 */
 	if (irq == 0) {
-		entry.fields.dest_mode = 0; /* Physical mode. */
-		entry.fields.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
+		irqe.dest_mode = 0; /* Physical mode. */
+		irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
 	}
 #endif
-	return ioapic_deliver_entry(ioapic->kvm, &entry);
+	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index e7bc92d895f..7080b713c16 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -71,8 +71,6 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
-		int dest_id, int dest_mode, bool low_prio, int short_hand,
-		unsigned long *deliver_bitmask);
-int ioapic_deliver_entry(struct kvm *kvm, union kvm_ioapic_redirect_entry *e);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq);
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index f5e059b67cd..4fa1f604b42 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -22,6 +22,9 @@
 #include <linux/kvm_host.h>
 
 #include <asm/msidef.h>
+#ifdef CONFIG_IA64
+#include <asm/iosapic.h>
+#endif
 
 #include "irq.h"
 
@@ -43,61 +46,71 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
 }
 
-void kvm_get_intr_delivery_bitmask(struct kvm *kvm, struct kvm_lapic *src,
-		int dest_id, int dest_mode, bool low_prio, int short_hand,
-		unsigned long *deliver_bitmask)
+inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
 {
-	int i, lowest = -1;
-	struct kvm_vcpu *vcpu;
+#ifdef CONFIG_IA64
+	return irq->delivery_mode ==
+		(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
+#else
+	return irq->delivery_mode == APIC_DM_LOWEST;
+#endif
+}
 
-	if (dest_mode == 0 && dest_id == 0xff && low_prio)
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq)
+{
+	int i, r = -1;
+	struct kvm_vcpu *vcpu, *lowest = NULL;
+
+	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
+			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
 
-	bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 	for (i = 0; i < KVM_MAX_VCPUS; i++) {
 		vcpu = kvm->vcpus[i];
 
 		if (!vcpu || !kvm_apic_present(vcpu))
 			continue;
 
-		if (!kvm_apic_match_dest(vcpu, src, short_hand, dest_id,
-					dest_mode))
+		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
 			continue;
 
-		if (!low_prio) {
-			__set_bit(i, deliver_bitmask);
+		if (!kvm_is_dm_lowest_prio(irq)) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, irq);
 		} else {
-			if (lowest < 0)
-				lowest = i;
-			if (kvm_apic_compare_prio(vcpu, kvm->vcpus[lowest]) < 0)
-				lowest = i;
+			if (!lowest)
+				lowest = vcpu;
+			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+				lowest = vcpu;
 		}
 	}
 
-	if (lowest != -1)
-		__set_bit(lowest, deliver_bitmask);
+	if (lowest)
+		r = kvm_apic_set_irq(lowest, irq);
+
+	return r;
 }
 
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		       struct kvm *kvm, int level)
 {
-	union kvm_ioapic_redirect_entry entry;
+	struct kvm_lapic_irq irq;
 
-	entry.bits = 0;
-	entry.fields.dest_id = (e->msi.address_lo &
+	irq.dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-	entry.fields.vector = (e->msi.data &
+	irq.vector = (e->msi.data &
 			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-	entry.fields.dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-			(unsigned long *)&e->msi.address_lo);
-	entry.fields.trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-			(unsigned long *)&e->msi.data);
-	entry.fields.delivery_mode = test_bit(
-			MSI_DATA_DELIVERY_MODE_SHIFT,
-			(unsigned long *)&e->msi.data);
+	irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+	irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+	irq.delivery_mode = e->msi.data & 0x700;
+	irq.level = 1;
+	irq.shorthand = 0;
 
 	/* TODO Deal with RH bit of MSI message address */
-	return ioapic_deliver_entry(kvm, &entry);
+	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
 /* This should be called with the kvm->lock mutex held
-- 
cgit v1.2.3-70-g09d2


From e56d532f20c890a06bbe7cd479f4201e3a03cd73 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 12 Mar 2009 21:45:39 +0800
Subject: KVM: Device assignment framework rework

After discussion with Marcelo, we decided to rework device assignment framework
together. The old problems are kernel logic is unnecessary complex. So Marcelo
suggest to split it into a more elegant way:

1. Split host IRQ assign and guest IRQ assign. And userspace determine the
combination. Also discard msi2intx parameter, userspace can specific
KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_INTX in assigned_irq->flags to
enable MSI to INTx convertion.

2. Split assign IRQ and deassign IRQ. Import two new ioctls:
KVM_ASSIGN_DEV_IRQ and KVM_DEASSIGN_DEV_IRQ.

This patch also fixed the reversed _IOR vs _IOW in definition(by deprecated the
old interface).

[avi: replace homemade bitcount() by hweight_long()]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       |   1 +
 include/linux/kvm.h      |  26 ++-
 include/linux/kvm_host.h |   5 -
 virt/kvm/kvm_main.c      | 486 +++++++++++++++++++++++++----------------------
 4 files changed, 276 insertions(+), 242 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43e049a2ccf..41123fc8613 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_ASSIGN_DEV_IRQ:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 640835ed270..644e3a9f47d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -412,6 +412,7 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_MSIX
 #define KVM_CAP_DEVICE_MSIX 28
 #endif
+#define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 
@@ -485,8 +486,10 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
 				   struct kvm_assigned_pci_dev)
 #define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
+/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
+#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
 #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
 				     struct kvm_assigned_pci_dev)
@@ -494,6 +497,7 @@ struct kvm_irq_routing {
 			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
 			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
+#define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 
 /*
  * ioctls for vcpu fds
@@ -584,6 +588,8 @@ struct kvm_debug_guest {
 #define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
 #define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 struct kvm_assigned_pci_dev {
 	__u32 assigned_dev_id;
 	__u32 busnr;
@@ -594,6 +600,17 @@ struct kvm_assigned_pci_dev {
 	};
 };
 
+#define KVM_DEV_IRQ_HOST_INTX    (1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI     (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX    (1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX   (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI    (1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX   (1 << 10)
+
+#define KVM_DEV_IRQ_HOST_MASK	 0x00ff
+#define KVM_DEV_IRQ_GUEST_MASK   0xff00
+
 struct kvm_assigned_irq {
 	__u32 assigned_dev_id;
 	__u32 host_irq;
@@ -609,15 +626,6 @@ struct kvm_assigned_irq {
 	};
 };
 
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
-
-#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
-
-#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION  (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
-					KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX  (1 << 1)
-#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX    (1 << 2)
 
 struct kvm_assigned_msix_nr {
 	__u32 assigned_dev_id;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fb60f31c4fb..40e49ede8f9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -339,11 +339,6 @@ struct kvm_assigned_dev_kernel {
 	struct msix_entry *host_msix_entries;
 	int guest_irq;
 	struct kvm_guest_msix_entry *guest_msix_entries;
-#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
-#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
-#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
-#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
-#define KVM_ASSIGNED_DEV_MSIX		((1 << 2) | (1 << 10))
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	int flags;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3bed82754a5..792fb7fae0a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -60,9 +61,6 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-static int msi2intx = 1;
-module_param(msi2intx, bool, 0);
-
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -132,7 +130,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&kvm->lock);
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
 			assigned_dev->guest_msix_entries;
 		for (i = 0; i < assigned_dev->entries_nr; i++) {
@@ -152,7 +150,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
 			    assigned_dev->guest_irq, 1);
 		if (assigned_dev->irq_requested_type &
-				KVM_ASSIGNED_DEV_GUEST_MSI) {
+				KVM_DEV_IRQ_GUEST_MSI) {
 			enable_irq(assigned_dev->host_irq);
 			assigned_dev->host_irq_disabled = false;
 		}
@@ -166,7 +164,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
-	if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int index = find_index_from_host_irq(assigned_dev, irq);
 		if (index < 0)
 			return IRQ_HANDLED;
@@ -204,22 +202,22 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	}
 }
 
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void kvm_free_assigned_irq(struct kvm *kvm,
-				  struct kvm_assigned_dev_kernel *assigned_dev)
+static void deassign_guest_irq(struct kvm *kvm,
+			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	if (!irqchip_in_kernel(kvm))
-		return;
-
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
 		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 	assigned_dev->irq_source_id = -1;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
 
-	if (!assigned_dev->irq_requested_type)
-		return;
-
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+			      struct kvm_assigned_dev_kernel *assigned_dev)
+{
 	/*
 	 * In kvm_free_device_irq, cancel_work_sync return true if:
 	 * 1. work is scheduled, and then cancelled.
@@ -236,7 +234,7 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	 * now, the kvm state is still legal for probably we also have to wait
 	 * interrupt_work done.
 	 */
-	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int i;
 		for (i = 0; i < assigned_dev->entries_nr; i++)
 			disable_irq_nosync(assigned_dev->
@@ -259,14 +257,41 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
-		if (assigned_dev->irq_requested_type &
-				KVM_ASSIGNED_DEV_HOST_MSI)
+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
 			pci_disable_msi(assigned_dev->dev);
 	}
 
-	assigned_dev->irq_requested_type = 0;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
+}
+
+static int kvm_deassign_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *assigned_dev,
+			    unsigned long irq_requested_type)
+{
+	unsigned long guest_irq_type, host_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return -EINVAL;
+	/* no irq assignment to deassign */
+	if (!assigned_dev->irq_requested_type)
+		return -ENXIO;
+
+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+	if (host_irq_type)
+		deassign_host_irq(kvm, assigned_dev);
+	if (guest_irq_type)
+		deassign_guest_irq(kvm, assigned_dev);
+
+	return 0;
 }
 
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
 
 static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
@@ -298,256 +323,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
 	}
 }
 
-static int assigned_device_update_intx(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
 {
-	adev->guest_irq = airq->guest_irq;
-	adev->ack_notifier.gsi = airq->guest_irq;
-
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
-		return 0;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx &&
-		    (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
-			free_irq(adev->host_irq, (void *)adev);
-			pci_disable_msi(adev->dev);
-		}
+	dev->host_irq = dev->dev->irq;
+	/* Even though this is PCI, we don't want to use shared
+	 * interrupts. Sharing host devices with guest-assigned devices
+	 * on the same interrupt line is not a happy situation: there
+	 * are going to be long delays in accepting, acking, etc.
+	 */
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
+			0, "kvm_assigned_intx_device", (void *)dev))
+		return -EIO;
+	return 0;
+}
 
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+					   struct kvm_assigned_dev_kernel *dev)
+{
+	int r;
 
-		if (airq->host_irq)
-			adev->host_irq = airq->host_irq;
-		else
-			adev->host_irq = adev->dev->irq;
+	if (!dev->dev->msi_enabled) {
+		r = pci_enable_msi(dev->dev);
+		if (r)
+			return r;
+	}
 
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
-				0, "kvm_assigned_intx_device", (void *)adev))
-			return -EIO;
+	dev->host_irq = dev->dev->irq;
+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
+			"kvm_assigned_msi_device", (void *)dev)) {
+		pci_disable_msi(dev->dev);
+		return -EIO;
 	}
 
-	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
-				   KVM_ASSIGNED_DEV_HOST_INTX;
 	return 0;
 }
+#endif
 
-#ifdef CONFIG_X86
-static int assigned_device_update_msi(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
 {
-	int r;
+	int i, r = -EINVAL;
 
-	adev->guest_irq = airq->guest_irq;
-	if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
-		/* x86 don't care upper address of guest msi message addr */
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->ack_notifier.gsi = -1;
-	} else if (msi2intx) {
-		adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
-		adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
-		adev->ack_notifier.gsi = airq->guest_irq;
-	} else {
-		/*
-		 * Guest require to disable device MSI, we disable MSI and
-		 * re-enable INTx by default again. Notice it's only for
-		 * non-msi2intx.
-		 */
-		assigned_device_update_intx(kvm, adev, airq);
-		return 0;
-	}
+	/* host_msix_entries and guest_msix_entries should have been
+	 * initialized */
+	if (dev->entries_nr == 0)
+		return r;
 
-	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
-		return 0;
+	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
+	if (r)
+		return r;
 
-	if (irqchip_in_kernel(kvm)) {
-		if (!msi2intx) {
-			if (adev->irq_requested_type &
-					KVM_ASSIGNED_DEV_HOST_INTX)
-				free_irq(adev->host_irq, (void *)adev);
+	for (i = 0; i < dev->entries_nr; i++) {
+		r = request_irq(dev->host_msix_entries[i].vector,
+				kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msix_device",
+				(void *)dev);
+		/* FIXME: free requested_irq's on failure */
+		if (r)
+			return r;
+	}
 
-			r = pci_enable_msi(adev->dev);
-			if (r)
-				return r;
-		}
+	return 0;
+}
 
-		adev->host_irq = adev->dev->irq;
-		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_msi_device", (void *)adev))
-			return -EIO;
-	}
+#endif
 
-	if (!msi2intx)
-		adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *dev,
+				struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = irq->guest_irq;
+	return 0;
+}
 
-	adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
 	return 0;
 }
 #endif
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+			   struct kvm_assigned_dev_kernel *dev,
+			   __u32 host_irq_type)
+{
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+		return r;
 
+	switch (host_irq_type) {
+	case KVM_DEV_IRQ_HOST_INTX:
+		r = assigned_device_enable_host_intx(kvm, dev);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_HOST_MSI:
+		r = assigned_device_enable_host_msi(kvm, dev);
+		break;
+#endif
 #ifdef __KVM_HAVE_MSIX
-static int assigned_device_update_msix(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *adev,
-			struct kvm_assigned_irq *airq)
-{
-	/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-	int i, r;
-
-	adev->ack_notifier.gsi = -1;
-
-	if (irqchip_in_kernel(kvm)) {
-		if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
-			return -ENOTTY;
-
-		if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
-			/* Guest disable MSI-X */
-			kvm_free_assigned_irq(kvm, adev);
-			if (msi2intx) {
-				pci_enable_msi(adev->dev);
-				if (adev->dev->msi_enabled)
-					return assigned_device_update_msi(kvm,
-							adev, airq);
-			}
-			return assigned_device_update_intx(kvm, adev, airq);
-		}
+	case KVM_DEV_IRQ_HOST_MSIX:
+		r = assigned_device_enable_host_msix(kvm, dev);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
 
-		/* host_msix_entries and guest_msix_entries should have been
-		 * initialized */
-		if (adev->entries_nr == 0)
-			return -EINVAL;
+	if (!r)
+		dev->irq_requested_type |= host_irq_type;
 
-		kvm_free_assigned_irq(kvm, adev);
+	return r;
+}
 
-		r = pci_enable_msix(adev->dev, adev->host_msix_entries,
-				    adev->entries_nr);
-		if (r)
-			return r;
+static int assign_guest_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *dev,
+			    struct kvm_assigned_irq *irq,
+			    unsigned long guest_irq_type)
+{
+	int id;
+	int r = -EEXIST;
 
-		for (i = 0; i < adev->entries_nr; i++) {
-			r = request_irq((adev->host_msix_entries + i)->vector,
-					kvm_assigned_dev_intr, 0,
-					"kvm_assigned_msix_device",
-					(void *)adev);
-			if (r)
-				return r;
-		}
+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+		return r;
+
+	id = kvm_request_irq_source_id(kvm);
+	if (id < 0)
+		return id;
+
+	dev->irq_source_id = id;
+
+	switch (guest_irq_type) {
+	case KVM_DEV_IRQ_GUEST_INTX:
+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_GUEST_MSI:
+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_GUEST_MSIX:
+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
+		break;
+#endif
+	default:
+		r = -EINVAL;
 	}
 
-	adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+	if (!r) {
+		dev->irq_requested_type |= guest_irq_type;
+		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+	} else
+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
 
-	return 0;
+	return r;
 }
-#endif
 
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-				   struct kvm_assigned_irq
-				   *assigned_irq)
+				   struct kvm_assigned_irq *assigned_irq)
 {
-	int r = 0;
+	int r = -EINVAL;
 	struct kvm_assigned_dev_kernel *match;
-	u32 current_flags = 0, changed_flags;
+	unsigned long host_irq_type, guest_irq_type;
 
-	mutex_lock(&kvm->lock);
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
 
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	mutex_lock(&kvm->lock);
+	r = -ENODEV;
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_irq->assigned_dev_id);
-	if (!match) {
-		mutex_unlock(&kvm->lock);
-		return -EINVAL;
-	}
-
-	if (!match->irq_requested_type) {
-		INIT_WORK(&match->interrupt_work,
-				kvm_assigned_dev_interrupt_work_handler);
-		if (irqchip_in_kernel(kvm)) {
-			/* Register ack nofitier */
-			match->ack_notifier.gsi = -1;
-			match->ack_notifier.irq_acked =
-					kvm_assigned_dev_ack_irq;
-			kvm_register_irq_ack_notifier(kvm,
-					&match->ack_notifier);
-
-			/* Request IRQ source ID */
-			r = kvm_request_irq_source_id(kvm);
-			if (r < 0)
-				goto out_release;
-			else
-				match->irq_source_id = r;
-
-#ifdef CONFIG_X86
-			/* Determine host device irq type, we can know the
-			 * result from dev->msi_enabled */
-			if (msi2intx)
-				pci_enable_msi(match->dev);
-#endif
-		}
-	}
+	if (!match)
+		goto out;
 
-	if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
-		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
-	else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
-		 (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
-		current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
 
-	changed_flags = assigned_irq->flags ^ current_flags;
+	r = -EINVAL;
+	/* can only assign one type at a time */
+	if (hweight_long(host_irq_type) > 1)
+		goto out;
+	if (hweight_long(guest_irq_type) > 1)
+		goto out;
+	if (host_irq_type == 0 && guest_irq_type == 0)
+		goto out;
 
-#ifdef __KVM_HAVE_MSIX
-	if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
-		r = assigned_device_update_msix(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to execute "
-					"MSI-X action!\n");
-			goto out_release;
-		}
-	} else
-#endif
-	if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
-	    (msi2intx && match->dev->msi_enabled)) {
-#ifdef CONFIG_X86
-		r = assigned_device_update_msi(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"MSI device!\n");
-			goto out_release;
-		}
-#else
-		r = -ENOTTY;
-#endif
-	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
-		/* Host device IRQ 0 means don't support INTx */
-		if (!msi2intx) {
-			printk(KERN_WARNING
-			       "kvm: wait device to enable MSI!\n");
-			r = 0;
-		} else {
-			printk(KERN_WARNING
-			       "kvm: failed to enable MSI device!\n");
-			r = -ENOTTY;
-			goto out_release;
-		}
-	} else {
-		/* Non-sharing INTx mode */
-		r = assigned_device_update_intx(kvm, match, assigned_irq);
-		if (r) {
-			printk(KERN_WARNING "kvm: failed to enable "
-					"INTx device!\n");
-			goto out_release;
-		}
-	}
+	r = 0;
+	if (host_irq_type)
+		r = assign_host_irq(kvm, match, host_irq_type);
+	if (r)
+		goto out;
 
+	if (guest_irq_type)
+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
 	mutex_unlock(&kvm->lock);
 	return r;
-out_release:
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+					 struct kvm_assigned_irq
+					 *assigned_irq)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
+out:
 	mutex_unlock(&kvm->lock);
-	kvm_free_assigned_device(kvm, match);
 	return r;
 }
 
@@ -565,7 +578,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      assigned_dev->assigned_dev_id);
 	if (match) {
 		/* device already assigned */
-		r = -EINVAL;
+		r = -EEXIST;
 		goto out;
 	}
 
@@ -604,6 +617,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->dev = dev;
 	match->irq_source_id = -1;
 	match->kvm = kvm;
+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
@@ -2084,6 +2100,11 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_ASSIGN_IRQ: {
+		r = -EOPNOTSUPP;
+		break;
+	}
+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
+	case KVM_ASSIGN_DEV_IRQ: {
 		struct kvm_assigned_irq assigned_irq;
 
 		r = -EFAULT;
@@ -2094,6 +2115,18 @@ static long kvm_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
+	case KVM_DEASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
 #endif
 #ifdef KVM_CAP_DEVICE_DEASSIGNMENT
 	case KVM_DEASSIGN_PCI_DEVICE: {
@@ -2596,9 +2629,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
-#ifndef CONFIG_X86
-	msi2intx = 0;
-#endif
 
 	return 0;
 
-- 
cgit v1.2.3-70-g09d2


From 78646121e9a2fcf7977cc15966420e572a450bc3 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 23 Mar 2009 12:12:11 +0200
Subject: KVM: Fix interrupt unhalting a vcpu when it shouldn't

kvm_vcpu_block() unhalts vpu on an interrupt/timer without checking
if interrupt window is actually opened.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        |  6 ++++++
 arch/powerpc/kvm/powerpc.c      |  6 ++++++
 arch/s390/kvm/interrupt.c       |  6 ++++++
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              | 10 ++++++++++
 arch/x86/kvm/vmx.c              |  8 +++++++-
 arch/x86/kvm/x86.c              |  5 +++++
 include/linux/kvm_host.h        |  1 +
 virt/kvm/kvm_main.c             |  3 ++-
 9 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d2a90fd505b..3bf0a345224 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1963,6 +1963,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.timer_fired;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9057335fdc6..2cf915e51e7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 	return !!(v->arch.pending_exceptions);
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
 	return !(v->arch.msr & MSR_WE);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0189356fe20..4ed4c3a1148 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -318,6 +318,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	/* do real check here */
+	return 1;
+}
+
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 46276273a1a..8351c4d00ac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -521,7 +521,7 @@ struct kvm_x86_ops {
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
-
+	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	int (*get_mt_mask_shift)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index aa528dbad07..de741043c5b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2270,6 +2270,15 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
 }
 
+static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+	return (vmcb->save.rflags & X86_EFLAGS_IF) &&
+		!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		(svm->vcpu.arch.hflags & HF_GIF_MASK);
+}
+
 static void svm_intr_assist(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2649,6 +2658,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
+	.interrupt_allowed = svm_interrupt_allowed,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index da6461d5dc8..b9e06b07aca 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2490,6 +2490,12 @@ static void vmx_update_window_states(struct kvm_vcpu *vcpu)
 				 GUEST_INTR_STATE_MOV_SS)));
 }
 
+static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	vmx_update_window_states(vcpu);
+	return vcpu->arch.interrupt_window_open;
+}
+
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
@@ -3691,7 +3697,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
-
+	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask_shift = vmx_get_mt_mask_shift,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8fca7a4e95a..5bbcad34537 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4475,3 +4475,8 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
 	put_cpu();
 }
+
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
+{
+	return kvm_x86_ops->interrupt_allowed(vcpu);
+}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 40e49ede8f9..72d56844f38 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 
 void kvm_free_physmem(struct kvm *kvm);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a1a4272fa57..63d5fa2bc84 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1610,7 +1610,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_cpu_has_interrupt(vcpu) ||
+		if ((kvm_arch_interrupt_allowed(vcpu) &&
+					kvm_cpu_has_interrupt(vcpu)) ||
 				kvm_arch_vcpu_runnable(vcpu)) {
 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
-- 
cgit v1.2.3-70-g09d2


From 2f8b9ee14eb439008e0c5131116ea6baa40dba50 Mon Sep 17 00:00:00 2001
From: nathan binkert <nate@binkert.org>
Date: Fri, 27 Mar 2009 21:53:05 -0700
Subject: KVM: Make kvm header C++ friendly

Two things needed fixing: 1) g++ does not allow a named structure type
within an anonymous union and 2) Avoid name clash between two padding
fields within the same struct by giving them different names as is
done elsewhere in the header.

Signed-off-by: Nathan Binkert <nate@binkert.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 644e3a9f47d..3db5d8d3748 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -119,7 +119,7 @@ struct kvm_run {
 			__u32 error_code;
 		} ex;
 		/* KVM_EXIT_IO */
-		struct kvm_io {
+		struct {
 #define KVM_EXIT_IO_IN  0
 #define KVM_EXIT_IO_OUT 1
 			__u8 direction;
@@ -224,10 +224,10 @@ struct kvm_interrupt {
 /* for KVM_GET_DIRTY_LOG */
 struct kvm_dirty_log {
 	__u32 slot;
-	__u32 padding;
+	__u32 padding1;
 	union {
 		void __user *dirty_bitmap; /* one bit per page */
-		__u64 padding;
+		__u64 padding2;
 	};
 };
 
-- 
cgit v1.2.3-70-g09d2


From 522c68c4416de3cd3e11a9ff10d58e776a69ae1e Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 27 Apr 2009 20:35:43 +0800
Subject: KVM: Enable snooping control for supported hardware

Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.

[avi: fix build on ia64]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |  1 +
 arch/x86/include/asm/kvm_host.h  |  1 +
 arch/x86/kvm/vmx.c               | 19 +++++++++++++++++--
 include/linux/kvm_host.h         |  3 +++
 virt/kvm/iommu.c                 | 27 ++++++++++++++++++++++++---
 5 files changed, 46 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 589536fa799..5f43697aed3 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -474,6 +474,7 @@ struct kvm_arch {
 
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
+	int iommu_flags;
 	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8a6f6b643df..253d8f669cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -393,6 +393,7 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
+	int iommu_flags;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 59b080c262e..e8a5649f9c1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3581,11 +3581,26 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
 	u64 ret;
 
+	/* For VT-d and EPT combination
+	 * 1. MMIO: always map as UC
+	 * 2. EPT with VT-d:
+	 *   a. VT-d without snooping control feature: can't guarantee the
+	 *	result, try to trust guest.
+	 *   b. VT-d with snooping control feature: snooping control feature of
+	 *	VT-d engine can guarantee the cache correctness. Just set it
+	 *	to WB to keep consistent with host. So the same as item 3.
+	 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+	 *    consistent with host MTRR
+	 */
 	if (is_mmio)
 		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+	else if (vcpu->kvm->arch.iommu_domain &&
+		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+		      VMX_EPT_MT_EPTE_SHIFT;
 	else
-		ret = (kvm_get_guest_memory_type(vcpu, gfn) <<
-			VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IGMT_BIT;
+		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
+			| VMX_EPT_IGMT_BIT;
 
 	return ret;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 72d56844f38..bdce8e1303c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -367,6 +367,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
+/* For vcpu->arch.iommu_flags */
+#define KVM_IOMMU_CACHE_COHERENCY	0x1
+
 #ifdef CONFIG_IOMMU_API
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c403750360..15147583abd 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
+	flags = IOMMU_READ | IOMMU_WRITE;
+	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+		flags |= IOMMU_CACHE;
+
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
-				    PAGE_SIZE,
-				    IOMMU_READ | IOMMU_WRITE);
+				    PAGE_SIZE, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
 {
 	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r;
+	int r, last_flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
 		return r;
 	}
 
+	last_flags = kvm->arch.iommu_flags;
+	if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+				 IOMMU_CAP_CACHE_COHERENCY))
+		kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if ((last_flags ^ kvm->arch.iommu_flags) ==
+			KVM_IOMMU_CACHE_COHERENCY) {
+		kvm_iommu_unmap_memslots(kvm);
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
 	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
 
 	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
 }
 
 int kvm_deassign_device(struct kvm *kvm,
-- 
cgit v1.2.3-70-g09d2


From 32f8840064d88cc3f6e85203aec7b6b57bebcb97 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 7 May 2009 17:55:12 -0300
Subject: KVM: use smp_send_reschedule in kvm_vcpu_kick

KVM uses a function call IPI to cause the exit of a guest running on a
physical cpu. For virtual interrupt notification there is no need to
wait on IPI receival, or to execute any function.

This is exactly what the reschedule IPI does, without the overhead
of function IPI. So use it instead of smp_call_function_single in
kvm_vcpu_kick.

Also change the "guest_mode" variable to a bit in vcpu->requests, and
use that to collapse multiple IPI's that would be issued between the
first one and zeroing of guest mode.

This allows kvm_vcpu_kick to called with interrupts disabled.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kernel/irq_ia64.c |  3 +++
 arch/ia64/kvm/kvm-ia64.c    | 22 ++++++++--------------
 arch/x86/kernel/smp.c       |  3 +++
 arch/x86/kvm/x86.c          | 36 +++++++++++-------------------------
 include/linux/kvm_host.h    |  2 +-
 5 files changed, 26 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index acc4d19ae62..b448197728b 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = {
 	.name =		"IPI"
 };
 
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
 static struct irqaction resched_irqaction = {
 	.handler =	dummy_handler,
 	.flags =	IRQF_DISABLED,
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index be4413e1f43..80c57b0a21c 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -668,7 +668,7 @@ again:
 	host_ctx = kvm_get_host_context(vcpu);
 	guest_ctx = kvm_get_guest_context(vcpu);
 
-	vcpu->guest_mode = 1;
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
 
 	r = kvm_vcpu_pre_transition(vcpu);
 	if (r < 0)
@@ -685,7 +685,7 @@ again:
 	kvm_vcpu_post_transition(vcpu);
 
 	vcpu->arch.launched = 1;
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	/*
@@ -1879,24 +1879,18 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu = get_cpu();
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	if (vcpu->guest_mode && cpu != ipi_pcpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+	me = get_cpu();
+	if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 13f33ea8cca..3b2e55e8ad2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	/*
+	 * KVM uses this interrupt to force a cpu out of guest mode
+	 */
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 75927700a26..3c4c327490a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3230,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	local_irq_disable();
 
+	clear_bit(KVM_REQ_KICK, &vcpu->requests);
+	smp_mb__after_clear_bit();
+
 	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -3237,13 +3240,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}
 
-	vcpu->guest_mode = 1;
-	/*
-	 * Make sure that guest_mode assignment won't happen after
-	 * testing the pending IRQ vector bitmap.
-	 */
-	smp_wmb();
-
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
@@ -3288,7 +3284,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	set_debugreg(vcpu->arch.host_dr6, 6);
 	set_debugreg(vcpu->arch.host_dr7, 7);
 
-	vcpu->guest_mode = 0;
+	set_bit(KVM_REQ_KICK, &vcpu->requests);
 	local_irq_enable();
 
 	++vcpu->stat.exits;
@@ -4571,30 +4567,20 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	       || vcpu->arch.nmi_pending;
 }
 
-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	int ipi_pcpu = vcpu->cpu;
-	int cpu;
+	int me;
+	int cpu = vcpu->cpu;
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
-	/*
-	 * We may be called synchronously with irqs disabled in guest mode,
-	 * So need not to call smp_call_function_single() in that case.
-	 */
-	cpu = get_cpu();
-	if (vcpu->guest_mode && vcpu->cpu != cpu)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+
+	me = get_cpu();
+	if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
+		if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
+			smp_send_reschedule(cpu);
 	put_cpu();
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bdce8e1303c..16181628419 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
+#define KVM_REQ_KICK               9
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -72,7 +73,6 @@ struct kvm_vcpu {
 	struct mutex mutex;
 	int   cpu;
 	struct kvm_run *run;
-	int guest_mode;
 	unsigned long requests;
 	unsigned long guest_debug;
 	int fpu_active;
-- 
cgit v1.2.3-70-g09d2


From 547de29e5b1662deb05b5f90917902dc0e9ac182 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 7 May 2009 17:55:13 -0300
Subject: KVM: protect assigned dev workqueue, int handler and irq acker

kvm_assigned_dev_ack_irq is vulnerable to a race condition with the
interrupt handler function. It does:

        if (dev->host_irq_disabled) {
                enable_irq(dev->host_irq);
                dev->host_irq_disabled = false;
        }

If an interrupt triggers before the host->dev_irq_disabled assignment,
it will disable the interrupt and set dev->host_irq_disabled to true.

On return to kvm_assigned_dev_ack_irq, dev->host_irq_disabled is set to
false, and the next kvm_assigned_dev_ack_irq call will fail to reenable
it.

Other than that, having the interrupt handler and work handlers run in
parallel sounds like asking for trouble (could not spot any obvious
problem, but better not have to, its fragile).

CC: sheng.yang@intel.com
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 16181628419..aacc5449f58 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -345,6 +345,7 @@ struct kvm_assigned_dev_kernel {
 	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
+	spinlock_t assigned_dev_lock;
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 29c0afb064d..687d113a3e5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -42,6 +42,7 @@
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/bitops.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -130,6 +131,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&kvm->lock);
+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
 			assigned_dev->guest_msix_entries;
@@ -156,18 +158,21 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 		}
 	}
 
+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
 	mutex_unlock(&assigned_dev->kvm->lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
+	unsigned long flags;
 	struct kvm_assigned_dev_kernel *assigned_dev =
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		int index = find_index_from_host_irq(assigned_dev, irq);
 		if (index < 0)
-			return IRQ_HANDLED;
+			goto out;
 		assigned_dev->guest_msix_entries[index].flags |=
 			KVM_ASSIGNED_MSIX_PENDING;
 	}
@@ -177,6 +182,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 	disable_irq_nosync(irq);
 	assigned_dev->host_irq_disabled = true;
 
+out:
+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -184,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_assigned_dev_kernel *dev;
+	unsigned long flags;
 
 	if (kian->gsi == -1)
 		return;
@@ -196,10 +204,12 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 	/* The guest irq may be shared so this ack may be
 	 * from another device.
 	 */
+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
 	if (dev->host_irq_disabled) {
 		enable_irq(dev->host_irq);
 		dev->host_irq_disabled = false;
 	}
+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
 }
 
 static void deassign_guest_irq(struct kvm *kvm,
@@ -615,6 +625,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
 	match->dev = dev;
+	spin_lock_init(&match->assigned_dev_lock);
 	match->irq_source_id = -1;
 	match->kvm = kvm;
 	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-- 
cgit v1.2.3-70-g09d2


From 04dce7d9d429ea5ea04e9432d1726c930f4d67da Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 10 Jun 2009 16:59:46 +1000
Subject: spinlock: Add missing __raw_spin_lock_flags() stub for UP

This was only defined with CONFIG_DEBUG_SPINLOCK set, but some
obscure arch/powerpc code wants it always.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/spinlock_up.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 938234c4a99..d4841ed8215 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 #define __raw_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
 # define __raw_spin_lock(lock)		do { (void)(lock); } while (0)
+# define __raw_spin_lock_flags(lock, flags)	do { (void)(lock); } while (0)
 # define __raw_spin_unlock(lock)	do { (void)(lock); } while (0)
 # define __raw_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
-- 
cgit v1.2.3-70-g09d2


From f1db457ce6e2f63cb01022f58c0c023838958bd1 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 10 Jun 2009 10:06:24 +0800
Subject: tracing/events: convert block trace points to TRACE_EVENT(), fix
 !CONFIG_BLOCK

Fix building failures when CONFIG_BLOCK == n.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2F1520.8020003@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/blktrace_api.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index c7ec31dd04c..7e4350ece0f 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -218,7 +218,7 @@ static inline int blk_trace_init_sysfs(struct device *dev)
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
-#ifdef CONFIG_EVENT_TRACING
+#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
 
 static inline int blk_cmd_buf_len(struct request *rq)
 {
@@ -229,7 +229,7 @@ extern void blk_dump_cmd(char *buf, struct request *rq);
 extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
 extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
 
-#endif /* CONFIG_EVENT_TRACING */
+#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
 
 #endif /* __KERNEL__ */
 #endif
-- 
cgit v1.2.3-70-g09d2


From 440f0d588555892601cfe511728a0fc0c8204063 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 10 Jun 2009 14:32:47 +0200
Subject: netfilter: nf_conntrack: use per-conntrack locks for protocol data

Introduce per-conntrack locks and use them instead of the global protocol
locks to avoid contention. Especially tcp_lock shows up very high in
profiles on larger machines.

This will also allow to simplify the upcoming reliable event delivery patches.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h         |  2 ++
 include/net/netfilter/nf_conntrack_l4proto.h |  4 +--
 net/netfilter/nf_conntrack_core.c            |  1 +
 net/netfilter/nf_conntrack_netlink.c         |  4 +--
 net/netfilter/nf_conntrack_proto_dccp.c      | 24 ++++++++---------
 net/netfilter/nf_conntrack_proto_gre.c       |  3 +--
 net/netfilter/nf_conntrack_proto_sctp.c      | 27 +++++++++----------
 net/netfilter/nf_conntrack_proto_tcp.c       | 39 +++++++++++++---------------
 8 files changed, 49 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2b877374242..ecc79f95907 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -93,6 +93,8 @@ struct nf_conn {
            plus 1 for any connection(s) we are `master' for */
 	struct nf_conntrack ct_general;
 
+	spinlock_t lock;
+
 	/* XXX should I move this to the tail ? - Y.K */
 	/* These are my tuples; original and reply */
 	struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index ba32ed7bdab..3767fb41e54 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -59,11 +59,11 @@ struct nf_conntrack_l4proto
 			   const struct nf_conntrack_tuple *);
 
 	/* Print out the private part of the conntrack. */
-	int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
+	int (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 
 	/* convert protoinfo to nfnetink attributes */
 	int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
-			 const struct nf_conn *ct);
+			 struct nf_conn *ct);
 	/* Calculate protoinfo nlattr size */
 	int (*nlattr_size)(void);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b54c23475e9..edf95695e0a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -519,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	spin_lock_init(&ct->lock);
 	atomic_set(&ct->ct_general.use, 1);
 	ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4448b062de0..4e503ada572 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -143,7 +143,7 @@ nla_put_failure:
 }
 
 static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
 {
 	struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *nest_proto;
@@ -347,7 +347,7 @@ nla_put_failure:
 
 static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-		    int event, const struct nf_conn *ct)
+		    int event, struct nf_conn *ct)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 11801c43c8c..6b08d327796 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -24,8 +24,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_log.h>
 
-static DEFINE_RWLOCK(dccp_lock);
-
 /* Timeouts are based on values from RFC4340:
  *
  * - REQUEST:
@@ -491,7 +489,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 
-	write_lock_bh(&dccp_lock);
+	spin_lock_bh(&ct->lock);
 
 	role = ct->proto.dccp.role[dir];
 	old_state = ct->proto.dccp.state;
@@ -535,13 +533,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		ct->proto.dccp.last_dir = dir;
 		ct->proto.dccp.last_pkt = type;
 
-		write_unlock_bh(&dccp_lock);
+		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_DCCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid packet ignored ");
 		return NF_ACCEPT;
 	case CT_DCCP_INVALID:
-		write_unlock_bh(&dccp_lock);
+		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_DCCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				      "nf_ct_dccp: invalid state transition ");
@@ -551,7 +549,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	ct->proto.dccp.last_dir = dir;
 	ct->proto.dccp.last_pkt = type;
 	ct->proto.dccp.state = new_state;
-	write_unlock_bh(&dccp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	dn = dccp_pernet(net);
 	nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
@@ -617,18 +615,18 @@ static int dccp_print_tuple(struct seq_file *s,
 			  ntohs(tuple->dst.u.dccp.port));
 }
 
-static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
-			  const struct nf_conn *ct)
+			  struct nf_conn *ct)
 {
 	struct nlattr *nest_parms;
 
-	read_lock_bh(&dccp_lock);
+	spin_lock_bh(&ct->lock);
 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
@@ -638,11 +636,11 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 	NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
 		     cpu_to_be64(ct->proto.dccp.handshake_seq));
 	nla_nest_end(skb, nest_parms);
-	read_unlock_bh(&dccp_lock);
+	spin_unlock_bh(&ct->lock);
 	return 0;
 
 nla_put_failure:
-	read_unlock_bh(&dccp_lock);
+	spin_unlock_bh(&ct->lock);
 	return -1;
 }
 
@@ -673,7 +671,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 		return -EINVAL;
 	}
 
-	write_lock_bh(&dccp_lock);
+	spin_lock_bh(&ct->lock);
 	ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
 	if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
 		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
@@ -686,7 +684,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 		ct->proto.dccp.handshake_seq =
 		be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
 	}
-	write_unlock_bh(&dccp_lock);
+	spin_unlock_bh(&ct->lock);
 	return 0;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 117b80112fc..175a28c9616 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s,
 }
 
 /* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
-			       const struct nf_conn *ct)
+static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	return seq_printf(s, "timeout=%u, stream_timeout=%u ",
 			  (ct->proto.gre.timeout / HZ),
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 101b4ad9e81..c10e6f36e31 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -25,9 +25,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-/* Protects ct->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
    closely.  They're more complex. --RR
 
@@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s,
 }
 
 /* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	enum sctp_conntrack state;
 
-	read_lock_bh(&sctp_lock);
+	spin_lock_bh(&ct->lock);
 	state = ct->proto.sctp.state;
-	read_unlock_bh(&sctp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	return seq_printf(s, "%s ", sctp_conntrack_names[state]);
 }
@@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct,
 	}
 
 	old_state = new_state = SCTP_CONNTRACK_NONE;
-	write_lock_bh(&sctp_lock);
+	spin_lock_bh(&ct->lock);
 	for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
 		/* Special cases of Verification tag check (Sec 8.5.1) */
 		if (sch->type == SCTP_CID_INIT) {
@@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct,
 		if (old_state != new_state)
 			nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 	}
-	write_unlock_bh(&sctp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
 
@@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct,
 	return NF_ACCEPT;
 
 out_unlock:
-	write_unlock_bh(&sctp_lock);
+	spin_unlock_bh(&ct->lock);
 out:
 	return -NF_ACCEPT;
 }
@@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
 static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
-			  const struct nf_conn *ct)
+			  struct nf_conn *ct)
 {
 	struct nlattr *nest_parms;
 
-	read_lock_bh(&sctp_lock);
+	spin_lock_bh(&ct->lock);
 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
@@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 		     CTA_PROTOINFO_SCTP_VTAG_REPLY,
 		     ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
 
-	read_unlock_bh(&sctp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	nla_nest_end(skb, nest_parms);
 
 	return 0;
 
 nla_put_failure:
-	read_unlock_bh(&sctp_lock);
+	spin_unlock_bh(&ct->lock);
 	return -1;
 }
 
@@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
 	    !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
 		return -EINVAL;
 
-	write_lock_bh(&sctp_lock);
+	spin_lock_bh(&ct->lock);
 	ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
 	ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
 		nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
 	ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
 		nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
-	write_unlock_bh(&sctp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b7e8a825efe..5c5739c741f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,9 +29,6 @@
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
-/* Protects ct->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
 /* "Be conservative in what you do,
     be liberal in what you accept from others."
     If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -309,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s,
 }
 
 /* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	enum tcp_conntrack state;
 
-	read_lock_bh(&tcp_lock);
+	spin_lock_bh(&ct->lock);
 	state = ct->proto.tcp.state;
-	read_unlock_bh(&tcp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 }
@@ -725,14 +722,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
 
 	end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
 
-	write_lock_bh(&tcp_lock);
+	spin_lock_bh(&ct->lock);
 	/*
 	 * We have to worry for the ack in the reply packet only...
 	 */
 	if (after(end, ct->proto.tcp.seen[dir].td_end))
 		ct->proto.tcp.seen[dir].td_end = end;
 	ct->proto.tcp.last_end = end;
-	write_unlock_bh(&tcp_lock);
+	spin_unlock_bh(&ct->lock);
 	pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -841,7 +838,7 @@ static int tcp_packet(struct nf_conn *ct,
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	BUG_ON(th == NULL);
 
-	write_lock_bh(&tcp_lock);
+	spin_lock_bh(&ct->lock);
 	old_state = ct->proto.tcp.state;
 	dir = CTINFO2DIR(ctinfo);
 	index = get_conntrack_index(th);
@@ -871,7 +868,7 @@ static int tcp_packet(struct nf_conn *ct,
 		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
 			/* Attempt to reopen a closed/aborted connection.
 			 * Delete this connection and look up again. */
-			write_unlock_bh(&tcp_lock);
+			spin_unlock_bh(&ct->lock);
 
 			/* Only repeat if we can actually remove the timer.
 			 * Destruction may already be in progress in process
@@ -907,7 +904,7 @@ static int tcp_packet(struct nf_conn *ct,
 			 * that the client cannot but retransmit its SYN and
 			 * thus initiate a clean new session.
 			 */
-			write_unlock_bh(&tcp_lock);
+			spin_unlock_bh(&ct->lock);
 			if (LOG_INVALID(net, IPPROTO_TCP))
 				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 					  "nf_ct_tcp: killing out of sync session ");
@@ -920,7 +917,7 @@ static int tcp_packet(struct nf_conn *ct,
 		ct->proto.tcp.last_end =
 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 
-		write_unlock_bh(&tcp_lock);
+		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid packet ignored ");
@@ -929,7 +926,7 @@ static int tcp_packet(struct nf_conn *ct,
 		/* Invalid packet */
 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 			 dir, get_conntrack_index(th), old_state);
-		write_unlock_bh(&tcp_lock);
+		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid state ");
@@ -960,7 +957,7 @@ static int tcp_packet(struct nf_conn *ct,
 
 	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
 			   skb, dataoff, th, pf)) {
-		write_unlock_bh(&tcp_lock);
+		spin_unlock_bh(&ct->lock);
 		return -NF_ACCEPT;
 	}
      in_window:
@@ -989,7 +986,7 @@ static int tcp_packet(struct nf_conn *ct,
 		timeout = nf_ct_tcp_timeout_unacknowledged;
 	else
 		timeout = tcp_timeouts[new_state];
-	write_unlock_bh(&tcp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	if (new_state != old_state)
 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -1106,12 +1103,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
-			 const struct nf_conn *ct)
+			 struct nf_conn *ct)
 {
 	struct nlattr *nest_parms;
 	struct nf_ct_tcp_flags tmp = {};
 
-	read_lock_bh(&tcp_lock);
+	spin_lock_bh(&ct->lock);
 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
 	if (!nest_parms)
 		goto nla_put_failure;
@@ -1131,14 +1128,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 	tmp.flags = ct->proto.tcp.seen[1].flags;
 	NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
 		sizeof(struct nf_ct_tcp_flags), &tmp);
-	read_unlock_bh(&tcp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	nla_nest_end(skb, nest_parms);
 
 	return 0;
 
 nla_put_failure:
-	read_unlock_bh(&tcp_lock);
+	spin_unlock_bh(&ct->lock);
 	return -1;
 }
 
@@ -1169,7 +1166,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
 	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
 		return -EINVAL;
 
-	write_lock_bh(&tcp_lock);
+	spin_lock_bh(&ct->lock);
 	if (tb[CTA_PROTOINFO_TCP_STATE])
 		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
 
@@ -1196,7 +1193,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
 		ct->proto.tcp.seen[1].td_scale =
 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
 	}
-	write_unlock_bh(&tcp_lock);
+	spin_unlock_bh(&ct->lock);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From feb6118d4e76bc5685909426c2dcc90fdb7ba05b Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:00:39 +0300
Subject: [SCSI] libosd: OSD2r05: Additional command enums

Add all constant definitions of new OSD commands added in
revision 4 & 5. Mainly for creating snapshots and clones.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/scsi/osd_protocol.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index 62b2ab8c69d..2cc8e8b1cc1 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -303,7 +303,15 @@ enum osd_service_actions {
 	OSD_ACT_V2(REMOVE_MEMBER_OBJECTS,	0x21)
 	OSD_ACT_V2(GET_MEMBER_ATTRIBUTES,	0x22)
 	OSD_ACT_V2(SET_MEMBER_ATTRIBUTES,	0x23)
+
+	OSD_ACT_V2(CREATE_CLONE,		0x28)
+	OSD_ACT_V2(CREATE_SNAPSHOT,		0x29)
+	OSD_ACT_V2(DETACH_CLONE,		0x2A)
+	OSD_ACT_V2(REFRESH_SNAPSHOT_CLONE,	0x2B)
+	OSD_ACT_V2(RESTORE_PARTITION_FROM_SNAPSHOT, 0x2C)
+
 	OSD_ACT_V2(READ_MAP,			0x31)
+	OSD_ACT_V2(READ_MAPS_COMPARE,		0x32)
 
 	OSD_ACT_V1_V2(PERFORM_SCSI_COMMAND,	0x8F7E, 0x8F7C)
 	OSD_ACT_V1_V2(SCSI_TASK_MANAGEMENT,	0x8F7F, 0x8F7D)
-- 
cgit v1.2.3-70-g09d2


From 29191b92030bc97b05b539ce5ae0543c24a6d7ca Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:01:03 +0300
Subject: [SCSI] libosd: OSD2r05: Attribute definitions

Some New revision 5 Attribute definitions.
Some missing definitions of Attributes and pages

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/scsi/osd_attributes.h | 74 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h
index f888a6fda07..56e920ade32 100644
--- a/include/scsi/osd_attributes.h
+++ b/include/scsi/osd_attributes.h
@@ -29,6 +29,7 @@ enum {
 	OSD_APAGE_PARTITION_INFORMATION = OSD_APAGE_PARTITION_FIRST + 1,
 	OSD_APAGE_PARTITION_QUOTAS	= OSD_APAGE_PARTITION_FIRST + 2,
 	OSD_APAGE_PARTITION_TIMESTAMP	= OSD_APAGE_PARTITION_FIRST + 3,
+	OSD_APAGE_PARTITION_ATTR_ACCESS = OSD_APAGE_PARTITION_FIRST + 4,
 	OSD_APAGE_PARTITION_SECURITY	= OSD_APAGE_PARTITION_FIRST + 5,
 	OSD_APAGE_PARTITION_LAST	= 0x5FFFFFFF,
 
@@ -51,7 +52,9 @@ enum {
 	OSD_APAGE_RESERVED_TYPE_LAST	= 0xEFFFFFFF,
 
 	OSD_APAGE_COMMON_FIRST		= 0xF0000000,
-	OSD_APAGE_COMMON_LAST		= 0xFFFFFFFE,
+	OSD_APAGE_COMMON_LAST		= 0xFFFFFFFD,
+
+	OSD_APAGE_CURRENT_COMMAND	= 0xFFFFFFFE,
 
 	OSD_APAGE_REQUEST_ALL		= 0xFFFFFFFF,
 };
@@ -106,10 +109,30 @@ enum {
 	OSD_ATTR_RI_PRODUCT_REVISION_LEVEL   = 0x7,   /* 4        */
 	OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER    = 0x8,   /* variable */
 	OSD_ATTR_RI_OSD_NAME                 = 0x9,   /* variable */
+	OSD_ATTR_RI_MAX_CDB_CONTINUATION_LEN = 0xA,   /* 4        */
 	OSD_ATTR_RI_TOTAL_CAPACITY           = 0x80,  /* 8        */
 	OSD_ATTR_RI_USED_CAPACITY            = 0x81,  /* 8        */
 	OSD_ATTR_RI_NUMBER_OF_PARTITIONS     = 0xC0,  /* 8        */
 	OSD_ATTR_RI_CLOCK                    = 0x100, /* 6        */
+	OARI_DEFAULT_ISOLATION_METHOD        = 0X110, /* 1        */
+	OARI_SUPPORTED_ISOLATION_METHODS     = 0X111, /* 32       */
+
+	OARI_DATA_ATOMICITY_GUARANTEE                   = 0X120,   /* 8       */
+	OARI_DATA_ATOMICITY_ALIGNMENT                   = 0X121,   /* 8       */
+	OARI_ATTRIBUTES_ATOMICITY_GUARANTEE             = 0X122,   /* 8       */
+	OARI_DATA_ATTRIBUTES_ATOMICITY_MULTIPLIER       = 0X123,   /* 1       */
+
+	OARI_MAXIMUM_SNAPSHOTS_COUNT                    = 0X1C1,    /* 0 or 4 */
+	OARI_MAXIMUM_CLONES_COUNT                       = 0X1C2,    /* 0 or 4 */
+	OARI_MAXIMUM_BRANCH_DEPTH                       = 0X1CC,    /* 0 or 4 */
+	OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_FIRST  = 0X200,    /* 0 or 4 */
+	OARI_SUPPORTED_OBJECT_DUPLICATION_METHOD_LAST   = 0X2ff,    /* 0 or 4 */
+	OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_FIRST = 0X300,    /* 0 or 4 */
+	OARI_SUPPORTED_TIME_OF_DUPLICATION_METHOD_LAST  = 0X30F,    /* 0 or 4 */
+	OARI_SUPPORT_FOR_DUPLICATED_OBJECT_FREEZING     = 0X310,    /* 0 or 4 */
+	OARI_SUPPORT_FOR_SNAPSHOT_REFRESHING            = 0X311,    /* 0 or 1 */
+	OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_FIRST = 0X7000001,/* 0 or 4 */
+	OARI_SUPPORTED_CDB_CONTINUATION_DESC_TYPE_LAST  = 0X700FFFF,/* 0 or 4 */
 };
 /* Root_Information_attributes_page does not have a get_page structure */
 
@@ -120,7 +143,15 @@ enum {
 	OSD_ATTR_PI_PARTITION_ID            = 0x1,     /* 8        */
 	OSD_ATTR_PI_USERNAME                = 0x9,     /* variable */
 	OSD_ATTR_PI_USED_CAPACITY           = 0x81,    /* 8        */
+	OSD_ATTR_PI_USED_CAPACITY_INCREMENT = 0x84,    /* 0 or 8   */
 	OSD_ATTR_PI_NUMBER_OF_OBJECTS       = 0xC1,    /* 8        */
+
+	OSD_ATTR_PI_ACTUAL_DATA_SPACE                      = 0xD1, /* 0 or 8 */
+	OSD_ATTR_PI_RESERVED_DATA_SPACE                    = 0xD2, /* 0 or 8 */
+	OSD_ATTR_PI_DEFAULT_SNAPSHOT_DUPLICATION_METHOD    = 0x200,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_CLONE_DUPLICATION_METHOD       = 0x201,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_SP_TIME_OF_DUPLICATION         = 0x300,/* 0 or 4 */
+	OSD_ATTR_PI_DEFAULT_CLONE_TIME_OF_DUPLICATION      = 0x301,/* 0 or 4 */
 };
 /* Partition Information attributes page does not have a get_page structure */
 
@@ -131,6 +162,7 @@ enum {
 	OSD_ATTR_CI_PARTITION_ID           = 0x1,       /* 8        */
 	OSD_ATTR_CI_COLLECTION_OBJECT_ID   = 0x2,       /* 8        */
 	OSD_ATTR_CI_USERNAME               = 0x9,       /* variable */
+	OSD_ATTR_CI_COLLECTION_TYPE        = 0xA,       /* 1        */
 	OSD_ATTR_CI_USED_CAPACITY          = 0x81,      /* 8        */
 };
 /* Collection Information attributes page does not have a get_page structure */
@@ -144,6 +176,8 @@ enum {
 	OSD_ATTR_OI_USERNAME             = 0x9,       /* variable */
 	OSD_ATTR_OI_USED_CAPACITY        = 0x81,      /* 8        */
 	OSD_ATTR_OI_LOGICAL_LENGTH       = 0x82,      /* 8        */
+	SD_ATTR_OI_ACTUAL_DATA_SPACE     = 0XD1,      /* 0 OR 8   */
+	SD_ATTR_OI_RESERVED_DATA_SPACE   = 0XD2,      /* 0 OR 8   */
 };
 /* Object Information attributes page does not have a get_page structure */
 
@@ -248,7 +282,18 @@ struct object_timestamps_attributes_page {
 	struct osd_timestamp data_modified_time;
 }  __packed;
 
-/* 7.1.2.19 Collections attributes page */
+/* OSD2r05: 7.1.3.19 Attributes Access attributes page
+ * (OSD_APAGE_PARTITION_ATTR_ACCESS)
+ *
+ * each attribute is of the form below. Total array length is deduced
+ * from the attribute's length
+ * (See allowed_attributes_access of the struct osd_cap_object_descriptor)
+ */
+struct attributes_access_attr {
+	struct osd_attributes_list_attrid attr_list[0];
+} __packed;
+
+/* OSD2r05: 7.1.2.21 Collections attributes page */
 /* TBD */
 
 /* 7.1.2.20 Root Policy/Security attributes page (OSD_APAGE_ROOT_SECURITY) */
@@ -324,4 +369,29 @@ struct object_security_attributes_page {
 	__be32 policy_access_tag;
 }  __packed;
 
+/* OSD2r05: 7.1.3.31 Current Command attributes page
+ * (OSD_APAGE_CURRENT_COMMAND)
+ */
+enum {
+	OSD_ATTR_CC_RESPONSE_INTEGRITY_CHECK_VALUE     = 0x1, /* 32  */
+	OSD_ATTR_CC_OBJECT_TYPE                        = 0x2, /* 1   */
+	OSD_ATTR_CC_PARTITION_ID                       = 0x3, /* 8   */
+	OSD_ATTR_CC_OBJECT_ID                          = 0x4, /* 8   */
+	OSD_ATTR_CC_STARTING_BYTE_ADDRESS_OF_APPEND    = 0x5, /* 8   */
+	OSD_ATTR_CC_CHANGE_IN_USED_CAPACITY            = 0x6, /* 8   */
+};
+
+/*TBD: osdv1_current_command_attributes_page */
+
+struct osdv2_current_command_attributes_page {
+	struct osd_attr_page_header hdr;  /* id=0xFFFFFFFE, size=0x44 */
+	u8 response_integrity_check_value[OSD_CRYPTO_KEYID_SIZE];
+	u8 object_type;
+	u8 reserved[3];
+	__be64 partition_id;
+	__be64 object_id;
+	__be64 starting_byte_address_of_append;
+	__be64 change_in_used_capacity;
+};
+
 #endif /*ndef __OSD_ATTRIBUTES_H__*/
-- 
cgit v1.2.3-70-g09d2


From 0e35afbc8b054e04a35faa796c72abb3b82bd33b Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:02:22 +0300
Subject: [SCSI] libosd: osd_req_{read,write}_kern new API

By popular demand, define usefull wrappers for osd_req_read/write
that recieve kernel pointers. All users had their own.

Also remove these from exofs

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/osd/osd_initiator.c | 28 ++++++++++++++++++++++++++++
 fs/exofs/common.h                |  6 ------
 fs/exofs/osd.c                   | 26 --------------------------
 include/scsi/osd_initiator.h     |  4 ++++
 4 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 15f0bbc19c9..c98153bfb36 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -789,6 +789,20 @@ void osd_req_write(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_write);
 
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	osd_req_write(or, obj, bio, offset);
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_write_kern);
+
 /*TODO: void osd_req_append(struct osd_request *,
 	const struct osd_obj_id *, struct bio *data_out); */
 /*TODO: void osd_req_create_write(struct osd_request *,
@@ -824,6 +838,20 @@ void osd_req_read(struct osd_request *or,
 }
 EXPORT_SYMBOL(osd_req_read);
 
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
+{
+	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	osd_req_read(or, obj, bio, offset);
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_read_kern);
+
 void osd_req_get_attributes(struct osd_request *or,
 	const struct osd_obj_id *obj)
 {
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1512c4bb8c..24667eedc02 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or,
 
 int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
 
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
 #endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b249ae97fb1..48cc4d11d3f 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
 
 	return -EIO;
 }
-
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (!bio)
-		return -ENOMEM;
-
-	osd_req_read(or, obj, bio, offset);
-	return 0;
-}
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (!bio)
-		return -ENOMEM;
-
-	osd_req_write(or, obj, bio, offset);
-	return 0;
-}
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b24d9616eb4..6132790d678 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -364,6 +364,8 @@ void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *);
 
 void osd_req_write(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_out, u64 offset);
+int osd_req_write_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 void osd_req_append(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_out);/* NI */
 void osd_req_create_write(struct osd_request *or,
@@ -379,6 +381,8 @@ void osd_req_flush_object(struct osd_request *or,
 
 void osd_req_read(struct osd_request *or,
 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
+int osd_req_read_kern(struct osd_request *or,
+	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 
 /*
  * Root/Partition/Collection/Object Attributes commands
-- 
cgit v1.2.3-70-g09d2


From 62f469b596dd0aadf046a69027087c18db43734e Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:04:26 +0300
Subject: [SCSI] libosd: osd_req_{read,write} takes a length parameter

For supporting of chained-bios we can not inspect the first
bio only, as before. Caller shall pass the total length of the
request, ie. sum_bytes(bio-chain).

Also since the bio might be a chain we don't set it's direction
on behalf of it's callers. The bio direction should be properly
set prior to this call. So fix a couple of write users that now
need to set the bio direction properly

[In this patch I change both library code and user sites at
 exofs, to make it easy on integration. It should be submitted
 via James's scsi-misc tree.]

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Jeff Garzik <jeff@garzik.org>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/osd/osd_initiator.c | 23 +++++++++++++----------
 fs/exofs/inode.c                 |  5 +++--
 include/scsi/osd_initiator.h     |  4 ++--
 3 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index ba2ebae305c..3f5ec578e6c 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -779,13 +779,14 @@ EXPORT_SYMBOL(osd_req_remove_object);
 */
 
 void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio, u64 offset)
+	const struct osd_obj_id *obj, u64 offset,
+	struct bio *bio, u64 len)
 {
-	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, bio->bi_size);
+	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
 	WARN_ON(or->out.bio || or->out.total_bytes);
-	bio->bi_rw |= (1 << BIO_RW);
+	WARN_ON(0 ==  bio_rw_flagged(bio, BIO_RW));
 	or->out.bio = bio;
-	or->out.total_bytes = bio->bi_size;
+	or->out.total_bytes = len;
 }
 EXPORT_SYMBOL(osd_req_write);
 
@@ -798,7 +799,8 @@ int osd_req_write_kern(struct osd_request *or,
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	osd_req_write(or, obj, bio, offset);
+	bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	osd_req_write(or, obj, offset, bio, len);
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_write_kern);
@@ -828,13 +830,14 @@ void osd_req_flush_object(struct osd_request *or,
 EXPORT_SYMBOL(osd_req_flush_object);
 
 void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio, u64 offset)
+	const struct osd_obj_id *obj, u64 offset,
+	struct bio *bio, u64 len)
 {
-	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, bio->bi_size);
+	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
 	WARN_ON(or->in.bio || or->in.total_bytes);
-	bio->bi_rw &= ~(1 << BIO_RW);
+	WARN_ON(1 == bio_rw_flagged(bio, BIO_RW));
 	or->in.bio = bio;
-	or->in.total_bytes = bio->bi_size;
+	or->in.total_bytes = len;
 }
 EXPORT_SYMBOL(osd_req_read);
 
@@ -847,7 +850,7 @@ int osd_req_read_kern(struct osd_request *or,
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	osd_req_read(or, obj, bio, offset);
+	osd_req_read(or, obj, offset, bio, len);
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_read_kern);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ba8d9fab469..f79e8e58c3a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -266,7 +266,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
 		goto err;
 	}
 
-	osd_req_read(or, &obj, pcol->bio, i_start);
+	osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
 
 	if (is_sync) {
 		exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
@@ -522,7 +522,8 @@ static int write_exec(struct page_collect *pcol)
 
 	*pcol_copy = *pcol;
 
-	osd_req_write(or, &obj, pcol_copy->bio, i_start);
+	pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
 	ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
 	if (unlikely(ret)) {
 		EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 6132790d678..8c1e3b804af 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -363,7 +363,7 @@ void osd_req_create_object(struct osd_request *or, struct osd_obj_id *);
 void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *);
 
 void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_out, u64 offset);
+	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
 int osd_req_write_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 void osd_req_append(struct osd_request *or,
@@ -380,7 +380,7 @@ void osd_req_flush_object(struct osd_request *or,
 	/*V2*/ u64 offset, /*V2*/ u64 len);
 
 void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_in, u64 offset);
+	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
 int osd_req_read_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
 
-- 
cgit v1.2.3-70-g09d2


From fc2fac5b5f11e2bee3bf37215c8746236f5ea188 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:04:43 +0300
Subject: [SCSI] libosd: Define an osd_dev wrapper to retrieve the
 request_queue

libosd users that need to work with bios, must sometime use
the request_queue associated with the osd_dev. Make a wrapper for
that, and convert all in-tree users.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/osd/osd_initiator.c | 6 +++---
 fs/exofs/inode.c                 | 3 +--
 include/scsi/osd_initiator.h     | 5 +++++
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 3f5ec578e6c..3959797149f 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -670,7 +670,7 @@ static int _osd_req_list_objects(struct osd_request *or,
 	__be16 action, const struct osd_obj_id *obj, osd_id initial_id,
 	struct osd_obj_id_list *list, unsigned nelem)
 {
-	struct request_queue *q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *q = osd_request_queue(or->osd_dev);
 	u64 len = nelem * sizeof(osd_id) + sizeof(*list);
 	struct bio *bio;
 
@@ -793,7 +793,7 @@ EXPORT_SYMBOL(osd_req_write);
 int osd_req_write_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
 {
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
 	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
 
 	if (IS_ERR(bio))
@@ -844,7 +844,7 @@ EXPORT_SYMBOL(osd_req_read);
 int osd_req_read_kern(struct osd_request *or,
 	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
 {
-	struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
+	struct request_queue *req_q = osd_request_queue(or->osd_dev);
 	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
 
 	if (IS_ERR(bio))
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f79e8e58c3a..77d0a295eb1 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
 		struct inode *inode)
 {
 	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
-	struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
 
 	pcol->sbi = sbi;
-	pcol->req_q = req_q;
+	pcol->req_q = osd_request_queue(sbi->s_dev);
 	pcol->inode = inode;
 	pcol->expected_pages = expected_pages;
 
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 8c1e3b804af..b44dc53bd88 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -18,6 +18,7 @@
 #include "osd_types.h"
 
 #include <linux/blkdev.h>
+#include <scsi/scsi_device.h>
 
 /* Note: "NI" in comments below means "Not Implemented yet" */
 
@@ -69,6 +70,10 @@ void osd_dev_fini(struct osd_dev *od);
 
 /* some hi level device operations */
 int osd_auto_detect_ver(struct osd_dev *od, void *caps);    /* GFP_KERNEL */
+static inline struct request_queue *osd_request_queue(struct osd_dev *od)
+{
+	return od->scsi_device->request_queue;
+}
 
 /* we might want to use function vector in the future */
 static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v)
-- 
cgit v1.2.3-70-g09d2


From 021e2230d6c04d80289fceb2d21c9ce93a615b32 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Sun, 24 May 2009 20:05:05 +0300
Subject: [SCSI] osduld: use filp_open() when looking up an osd-device

This patch was inspired by Al Viro, for simplifying and fixing the
retrieval of osd-devices by in-kernel users, eg: file systems.
In-Kernel users, now, go through the same path user-mode does by
opening a file on the osd char-device and though holding a reference
to both the device and the Module.

A file pointer was added to the osd_dev structure which is now
allocated for each user. The internal osd_dev is no longer exposed
outside of the uld. I wanted to do that for a long time so each
libosd user can have his own defaults on the device.

The API is left the same, so user code need not change.

It is no longer needed to open/close a file handle on the osd
char-device from user-mode, before mounting an exofs on it.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/osd/osd_uld.c   | 66 ++++++++++++++++++++------------------------
 include/scsi/osd_initiator.h |  1 +
 2 files changed, 31 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index 22b59e13ba8..0bdef339090 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -49,6 +49,7 @@
 #include <linux/device.h>
 #include <linux/idr.h>
 #include <linux/major.h>
+#include <linux/file.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_driver.h>
@@ -175,10 +176,9 @@ static const struct file_operations osd_fops = {
 
 struct osd_dev *osduld_path_lookup(const char *name)
 {
-	struct path path;
-	struct inode *inode;
-	struct cdev *cdev;
-	struct osd_uld_device *uninitialized_var(oud);
+	struct osd_uld_device *oud;
+	struct osd_dev *od;
+	struct file *file;
 	int error;
 
 	if (!name || !*name) {
@@ -186,52 +186,46 @@ struct osd_dev *osduld_path_lookup(const char *name)
 		return ERR_PTR(-EINVAL);
 	}
 
-	error = kern_path(name, LOOKUP_FOLLOW, &path);
-	if (error) {
-		OSD_ERR("path_lookup of %s failed=>%d\n", name, error);
-		return ERR_PTR(error);
-	}
+	od = kzalloc(sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return ERR_PTR(-ENOMEM);
 
-	inode = path.dentry->d_inode;
-	error = -EINVAL; /* Not the right device e.g osd_uld_device */
-	if (!S_ISCHR(inode->i_mode)) {
-		OSD_DEBUG("!S_ISCHR()\n");
-		goto out;
+	file = filp_open(name, O_RDWR, 0);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto free_od;
 	}
 
-	cdev = inode->i_cdev;
-	if (!cdev) {
-		OSD_ERR("Before mounting an OSD Based filesystem\n");
-		OSD_ERR("  user-mode must open+close the %s device\n", name);
-		OSD_ERR("  Example: bash: echo < %s\n", name);
-		goto out;
+	if (file->f_op != &osd_fops){
+		error = -EINVAL;
+		goto close_file;
 	}
 
-	/* The Magic wand. Is it our char-dev */
-	/* TODO: Support sg devices */
-	if (cdev->owner != THIS_MODULE) {
-		OSD_ERR("Error mounting %s - is not an OSD device\n", name);
-		goto out;
-	}
+	oud = file->private_data;
 
-	oud = container_of(cdev, struct osd_uld_device, cdev);
+	*od = oud->od;
+	od->file = file;
 
-	__uld_get(oud);
-	error = 0;
+	return od;
 
-out:
-	path_put(&path);
-	return error ? ERR_PTR(error) : &oud->od;
+close_file:
+	fput(file);
+free_od:
+	kfree(od);
+	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(osduld_path_lookup);
 
 void osduld_put_device(struct osd_dev *od)
 {
-	if (od) {
-		struct osd_uld_device *oud = container_of(od,
-						struct osd_uld_device, od);
 
-		__uld_put(oud);
+	if (od && !IS_ERR(od)) {
+		struct osd_uld_device *oud = od->file->private_data;
+
+		BUG_ON(od->scsi_device != oud->od.scsi_device);
+
+		fput(od->file);
+		kfree(od);
 	}
 }
 EXPORT_SYMBOL(osduld_put_device);
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b44dc53bd88..02bd9f71635 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -48,6 +48,7 @@ enum osd_std_version {
  */
 struct osd_dev {
 	struct scsi_device *scsi_device;
+	struct file *file;
 	unsigned def_timeout;
 
 #ifdef OSD_VER1_SUPPORT
-- 
cgit v1.2.3-70-g09d2


From bd2b5b12849a3446abad0b25e920f86f5480b309 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 13:40:57 +0200
Subject: perf_counter: More aggressive frequency adjustment

Also employ the overflow handler to adjust the frequency, this results
in a stable frequency in about 40~50 samples, instead of that many ticks.

This also means we can start sampling at a sample period of 1 without
running head-first into the throttle.

It relies on sched_clock() to accurately measure the time difference
between the overflow NMIs.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |   5 +-
 include/linux/perf_counter.h       |   1 +
 kernel/perf_counter.c              | 130 +++++++++++++++++++++++++------------
 3 files changed, 92 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 49f258537cb..240ca563063 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -696,10 +696,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (!attr->exclude_kernel)
 		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
-	if (!hwc->sample_period)
+	if (!hwc->sample_period) {
 		hwc->sample_period = x86_pmu.max_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
 
-	atomic64_set(&hwc->period_left, hwc->sample_period);
 	counter->destroy = hw_perf_counter_destroy;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 3586df840f6..282d8cc4898 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -371,6 +371,7 @@ struct hw_perf_counter {
 
 	u64				freq_count;
 	u64				freq_interrupts;
+	u64				freq_stamp;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5eacaaf3f9c..51c571ee4d0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1184,13 +1184,33 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 static void perf_log_throttle(struct perf_counter *counter, int enable);
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
-static void perf_adjust_freq(struct perf_counter_context *ctx)
+static void perf_adjust_period(struct perf_counter *counter, u64 events)
+{
+	struct hw_perf_counter *hwc = &counter->hw;
+	u64 period, sample_period;
+	s64 delta;
+
+	events *= hwc->sample_period;
+	period = div64_u64(events, counter->attr.sample_freq);
+
+	delta = (s64)(period - hwc->sample_period);
+	delta = (delta + 7) / 8; /* low pass filter */
+
+	sample_period = hwc->sample_period + delta;
+
+	if (!sample_period)
+		sample_period = 1;
+
+	perf_log_period(counter, sample_period);
+
+	hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 {
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
-	u64 interrupts, sample_period;
-	u64 events, period, freq;
-	s64 delta;
+	u64 interrupts, freq;
 
 	spin_lock(&ctx->lock);
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -1202,6 +1222,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		interrupts = hwc->interrupts;
 		hwc->interrupts = 0;
 
+		/*
+		 * unthrottle counters on the tick
+		 */
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(counter, 1);
 			counter->pmu->unthrottle(counter);
@@ -1211,6 +1234,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		if (!counter->attr.freq || !counter->attr.sample_freq)
 			continue;
 
+		/*
+		 * if the specified freq < HZ then we need to skip ticks
+		 */
 		if (counter->attr.sample_freq < HZ) {
 			freq = counter->attr.sample_freq;
 
@@ -1226,20 +1252,20 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
 		} else
 			freq = HZ;
 
-		events = freq * interrupts * hwc->sample_period;
-		period = div64_u64(events, counter->attr.sample_freq);
-
-		delta = (s64)(1 + period - hwc->sample_period);
-		delta >>= 1;
-
-		sample_period = hwc->sample_period + delta;
-
-		if (!sample_period)
-			sample_period = 1;
+		perf_adjust_period(counter, freq * interrupts);
 
-		perf_log_period(counter, sample_period);
-
-		hwc->sample_period = sample_period;
+		/*
+		 * In order to avoid being stalled by an (accidental) huge
+		 * sample period, force reset the sample period if we didn't
+		 * get any events in this freq period.
+		 */
+		if (!interrupts) {
+			perf_disable();
+			counter->pmu->disable(counter);
+			atomic_set(&hwc->period_left, 0);
+			counter->pmu->enable(counter);
+			perf_enable();
+		}
 	}
 	spin_unlock(&ctx->lock);
 }
@@ -1279,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = curr->perf_counter_ctxp;
 
-	perf_adjust_freq(&cpuctx->ctx);
+	perf_ctx_adjust_freq(&cpuctx->ctx);
 	if (ctx)
-		perf_adjust_freq(ctx);
+		perf_ctx_adjust_freq(ctx);
 
 	perf_counter_cpu_sched_out(cpuctx);
 	if (ctx)
@@ -1647,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
 		counter->attr.sample_freq = value;
 	} else {
+		perf_log_period(counter, value);
+
 		counter->attr.sample_period = value;
 		counter->hw.sample_period = value;
-
-		perf_log_period(counter, value);
 	}
 unlock:
 	spin_unlock_irq(&ctx->lock);
@@ -2853,35 +2879,41 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
  * event flow.
  */
 
+struct freq_event {
+	struct perf_event_header	header;
+	u64				time;
+	u64				id;
+	u64				period;
+};
+
 static void perf_log_period(struct perf_counter *counter, u64 period)
 {
 	struct perf_output_handle handle;
+	struct freq_event event;
 	int ret;
 
-	struct {
-		struct perf_event_header	header;
-		u64				time;
-		u64				id;
-		u64				period;
-	} freq_event = {
+	if (counter->hw.sample_period == period)
+		return;
+
+	if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
+		return;
+
+	event = (struct freq_event) {
 		.header = {
 			.type = PERF_EVENT_PERIOD,
 			.misc = 0,
-			.size = sizeof(freq_event),
+			.size = sizeof(event),
 		},
 		.time = sched_clock(),
 		.id = counter->id,
 		.period = period,
 	};
 
-	if (counter->hw.sample_period == period)
-		return;
-
-	ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
+	ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
 	if (ret)
 		return;
 
-	perf_output_put(&handle, freq_event);
+	perf_output_put(&handle, event);
 	perf_output_end(&handle);
 }
 
@@ -2923,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter,
 {
 	int events = atomic_read(&counter->event_limit);
 	int throttle = counter->pmu->unthrottle != NULL;
+	struct hw_perf_counter *hwc = &counter->hw;
 	int ret = 0;
 
 	if (!throttle) {
-		counter->hw.interrupts++;
+		hwc->interrupts++;
 	} else {
-		if (counter->hw.interrupts != MAX_INTERRUPTS) {
-			counter->hw.interrupts++;
-			if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
-				counter->hw.interrupts = MAX_INTERRUPTS;
+		if (hwc->interrupts != MAX_INTERRUPTS) {
+			hwc->interrupts++;
+			if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
+				hwc->interrupts = MAX_INTERRUPTS;
 				perf_log_throttle(counter, 0);
 				ret = 1;
 			}
@@ -2945,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter,
 		}
 	}
 
+	if (counter->attr.freq) {
+		u64 now = sched_clock();
+		s64 delta = now - hwc->freq_stamp;
+
+		hwc->freq_stamp = now;
+
+		if (delta > 0 && delta < TICK_NSEC)
+			perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
+	}
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -3379,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 		return NULL;
 
 	counter->destroy = tp_perf_counter_destroy;
-	counter->hw.sample_period = counter->attr.sample_period;
 
 	return &perf_ops_generic;
 }
@@ -3483,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	pmu = NULL;
 
 	hwc = &counter->hw;
+	hwc->sample_period = attr->sample_period;
 	if (attr->freq && attr->sample_freq)
-		hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq);
-	else
-		hwc->sample_period = attr->sample_period;
+		hwc->sample_period = 1;
+
+	atomic64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
 	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
@@ -3687,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter,
 	else
 		child_counter->state = PERF_COUNTER_STATE_OFF;
 
+	if (parent_counter->attr.freq)
+		child_counter->hw.sample_period = parent_counter->hw.sample_period;
+
 	/*
 	 * Link it up in the child's context:
 	 */
-- 
cgit v1.2.3-70-g09d2


From 4d1d49858c0a5a4fb1be4bc7972754cd640245ba Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 4 Jun 2009 21:57:03 +0200
Subject: net/libertas: remove GPIO-CS handling in SPI interface code

This removes the dependency on GPIO framework and lets the SPI host
driver handle the chip select. The SPI host driver is required to keep
the CS active for the entire message unless cs_change says otherwise.
This patch collects the two/three single SPI transfers into a message.
Also the delay in read path in case use_dummy_writes are not used is
moved into the SPI host driver.

Tested-by: Mike Rapoport <mike@compulab.co.il>
Tested-by: Andrey Yurovsky <andrey@cozybit.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/Kconfig           |  2 +-
 drivers/net/wireless/libertas/if_spi.c | 92 ++++++++++++++++------------------
 include/linux/spi/libertas_spi.h       |  3 --
 3 files changed, 45 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index daf4c805be5..fb7541c28e5 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -153,7 +153,7 @@ config LIBERTAS_SDIO
 
 config LIBERTAS_SPI
 	tristate "Marvell Libertas 8686 SPI 802.11b/g cards"
-	depends on LIBERTAS && SPI && GENERIC_GPIO
+	depends on LIBERTAS && SPI
 	---help---
 	  A driver for Marvell Libertas 8686 SPI devices.
 
diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c
index ea23c5de142..f8c2898d82b 100644
--- a/drivers/net/wireless/libertas/if_spi.c
+++ b/drivers/net/wireless/libertas/if_spi.c
@@ -19,7 +19,6 @@
 
 #include <linux/moduleparam.h>
 #include <linux/firmware.h>
-#include <linux/gpio.h>
 #include <linux/jiffies.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
@@ -51,13 +50,6 @@ struct if_spi_card {
 	u16				card_id;
 	u8				card_rev;
 
-	/* Pin number for our GPIO chip-select. */
-	/* TODO: Once the generic SPI layer has some additional features, we
-	 * should take this out and use the normal chip select here.
-	 * We need support for chip select delays, and not dropping chipselect
-	 * after each word. */
-	int				gpio_cs;
-
 	/* The last time that we initiated an SPU operation */
 	unsigned long			prev_xfer_time;
 
@@ -130,12 +122,10 @@ static void spu_transaction_init(struct if_spi_card *card)
 		 * If not, we have to busy-wait to be on the safe side. */
 		ndelay(400);
 	}
-	gpio_set_value(card->gpio_cs, 0); /* assert CS */
 }
 
 static void spu_transaction_finish(struct if_spi_card *card)
 {
-	gpio_set_value(card->gpio_cs, 1); /* drop CS */
 	card->prev_xfer_time = jiffies;
 }
 
@@ -145,6 +135,13 @@ static int spu_write(struct if_spi_card *card, u16 reg, const u8 *buf, int len)
 {
 	int err = 0;
 	u16 reg_out = cpu_to_le16(reg | IF_SPI_WRITE_OPERATION_MASK);
+	struct spi_message m;
+	struct spi_transfer reg_trans;
+	struct spi_transfer data_trans;
+
+	spi_message_init(&m);
+	memset(&reg_trans, 0, sizeof(reg_trans));
+	memset(&data_trans, 0, sizeof(data_trans));
 
 	/* You must give an even number of bytes to the SPU, even if it
 	 * doesn't care about the last one.  */
@@ -153,13 +150,16 @@ static int spu_write(struct if_spi_card *card, u16 reg, const u8 *buf, int len)
 	spu_transaction_init(card);
 
 	/* write SPU register index */
-	err = spi_write(card->spi, (u8 *)&reg_out, sizeof(u16));
-	if (err)
-		goto out;
+	reg_trans.tx_buf = &reg_out;
+	reg_trans.len = sizeof(reg_out);
 
-	err = spi_write(card->spi, buf, len);
+	data_trans.tx_buf = buf;
+	data_trans.len = len;
 
-out:
+	spi_message_add_tail(&reg_trans, &m);
+	spi_message_add_tail(&data_trans, &m);
+
+	err = spi_sync(card->spi, &m);
 	spu_transaction_finish(card);
 	return err;
 }
@@ -186,10 +186,13 @@ static inline int spu_reg_is_port_reg(u16 reg)
 
 static int spu_read(struct if_spi_card *card, u16 reg, u8 *buf, int len)
 {
-	unsigned int i, delay;
+	unsigned int delay;
 	int err = 0;
-	u16 zero = 0;
 	u16 reg_out = cpu_to_le16(reg | IF_SPI_READ_OPERATION_MASK);
+	struct spi_message m;
+	struct spi_transfer reg_trans;
+	struct spi_transfer dummy_trans;
+	struct spi_transfer data_trans;
 
 	/* You must take an even number of bytes from the SPU, even if you
 	 * don't care about the last one.  */
@@ -197,29 +200,34 @@ static int spu_read(struct if_spi_card *card, u16 reg, u8 *buf, int len)
 
 	spu_transaction_init(card);
 
+	spi_message_init(&m);
+	memset(&reg_trans, 0, sizeof(reg_trans));
+	memset(&dummy_trans, 0, sizeof(dummy_trans));
+	memset(&data_trans, 0, sizeof(data_trans));
+
 	/* write SPU register index */
-	err = spi_write(card->spi, (u8 *)&reg_out, sizeof(u16));
-	if (err)
-		goto out;
+	reg_trans.tx_buf = &reg_out;
+	reg_trans.len = sizeof(reg_out);
+	spi_message_add_tail(&reg_trans, &m);
 
 	delay = spu_reg_is_port_reg(reg) ? card->spu_port_delay :
 						card->spu_reg_delay;
 	if (card->use_dummy_writes) {
 		/* Clock in dummy cycles while the SPU fills the FIFO */
-		for (i = 0; i < delay / 16; ++i) {
-			err = spi_write(card->spi, (u8 *)&zero, sizeof(u16));
-			if (err)
-				return err;
-		}
+		dummy_trans.len = delay / 8;
+		spi_message_add_tail(&dummy_trans, &m);
 	} else {
 		/* Busy-wait while the SPU fills the FIFO */
-		ndelay(100 + (delay * 10));
+		reg_trans.delay_usecs =
+			DIV_ROUND_UP((100 + (delay * 10)), 1000);
 	}
 
 	/* read in data */
-	err = spi_read(card->spi, buf, len);
+	data_trans.rx_buf = buf;
+	data_trans.len = len;
+	spi_message_add_tail(&data_trans, &m);
 
-out:
+	err = spi_sync(card->spi, &m);
 	spu_transaction_finish(card);
 	return err;
 }
@@ -1049,7 +1057,6 @@ static int __devinit if_spi_probe(struct spi_device *spi)
 	spi_set_drvdata(spi, card);
 	card->pdata = pdata;
 	card->spi = spi;
-	card->gpio_cs = pdata->gpio_cs;
 	card->prev_xfer_time = jiffies;
 
 	sema_init(&card->spi_ready, 0);
@@ -1058,26 +1065,18 @@ static int __devinit if_spi_probe(struct spi_device *spi)
 	INIT_LIST_HEAD(&card->data_packet_list);
 	spin_lock_init(&card->buffer_lock);
 
-	/* set up GPIO CS line. TODO: use  regular CS line */
-	err = gpio_request(card->gpio_cs, "if_spi_gpio_chip_select");
-	if (err)
-		goto free_card;
-	err = gpio_direction_output(card->gpio_cs, 1);
-	if (err)
-		goto free_gpio;
-
 	/* Initialize the SPI Interface Unit */
 	err = spu_init(card, pdata->use_dummy_writes);
 	if (err)
-		goto free_gpio;
+		goto free_card;
 	err = spu_get_chip_revision(card, &card->card_id, &card->card_rev);
 	if (err)
-		goto free_gpio;
+		goto free_card;
 
 	/* Firmware load */
 	err = spu_read_u32(card, IF_SPI_SCRATCH_4_REG, &scratch);
 	if (err)
-		goto free_gpio;
+		goto free_card;
 	if (scratch == SUCCESSFUL_FW_DOWNLOAD_MAGIC)
 		lbs_deb_spi("Firmware is already loaded for "
 			    "Marvell WLAN 802.11 adapter\n");
@@ -1085,7 +1084,7 @@ static int __devinit if_spi_probe(struct spi_device *spi)
 		err = if_spi_calculate_fw_names(card->card_id,
 				card->helper_fw_name, card->main_fw_name);
 		if (err)
-			goto free_gpio;
+			goto free_card;
 
 		lbs_deb_spi("Initializing FW for Marvell WLAN 802.11 adapter "
 				"(chip_id = 0x%04x, chip_rev = 0x%02x) "
@@ -1096,23 +1095,23 @@ static int __devinit if_spi_probe(struct spi_device *spi)
 				spi->max_speed_hz);
 		err = if_spi_prog_helper_firmware(card);
 		if (err)
-			goto free_gpio;
+			goto free_card;
 		err = if_spi_prog_main_firmware(card);
 		if (err)
-			goto free_gpio;
+			goto free_card;
 		lbs_deb_spi("loaded FW for Marvell WLAN 802.11 adapter\n");
 	}
 
 	err = spu_set_interrupt_mode(card, 0, 1);
 	if (err)
-		goto free_gpio;
+		goto free_card;
 
 	/* Register our card with libertas.
 	 * This will call alloc_etherdev */
 	priv = lbs_add_card(card, &spi->dev);
 	if (!priv) {
 		err = -ENOMEM;
-		goto free_gpio;
+		goto free_card;
 	}
 	card->priv = priv;
 	priv->card = card;
@@ -1157,8 +1156,6 @@ terminate_thread:
 	if_spi_terminate_spi_thread(card);
 remove_card:
 	lbs_remove_card(priv); /* will call free_netdev */
-free_gpio:
-	gpio_free(card->gpio_cs);
 free_card:
 	free_if_spi_card(card);
 out:
@@ -1179,7 +1176,6 @@ static int __devexit libertas_spi_remove(struct spi_device *spi)
 	free_irq(spi->irq, card);
 	if_spi_terminate_spi_thread(card);
 	lbs_remove_card(priv); /* will call free_netdev */
-	gpio_free(card->gpio_cs);
 	if (card->pdata->teardown)
 		card->pdata->teardown(spi);
 	free_if_spi_card(card);
diff --git a/include/linux/spi/libertas_spi.h b/include/linux/spi/libertas_spi.h
index 79506f5f9e6..1b5d5384fcd 100644
--- a/include/linux/spi/libertas_spi.h
+++ b/include/linux/spi/libertas_spi.h
@@ -22,9 +22,6 @@ struct libertas_spi_platform_data {
 	 * speed, you may want to use 0 here. */
 	u16 use_dummy_writes;
 
-	/* GPIO number to use as chip select */
-	u16 gpio_cs;
-
 	/* Board specific setup/teardown */
 	int (*setup)(struct spi_device *spi);
 	int (*teardown)(struct spi_device *spi);
-- 
cgit v1.2.3-70-g09d2


From 8f77f3849cc3ae2d6df9301785a3d316ea7d7ee1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 7 Jun 2009 21:58:37 +0200
Subject: mac80211: do not pass PS frames out of mac80211 again

In order to handle powersave frames properly we had needed
to pass these out to the device queues again, and introduce
the skb->requeue bit. This, however, also has unnecessary
overhead by needing to 'clean up' already tried frames, and
this clean-up code is also buggy when software encryption
is used.

Instead of sending the frames via the master netdev queue
again, simply put them into the pending queue. This also
fixes a problem where frames for that particular station
could be reordered when some were still on the software
queues and older ones are re-injected into the software
queue after them.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/skbuff.h     |  4 ---
 include/net/mac80211.h     |  3 +++
 net/core/skbuff.c          |  1 -
 net/mac80211/ieee80211_i.h |  5 ++++
 net/mac80211/main.c        | 61 +++++-----------------------------------------
 net/mac80211/rx.c          | 25 +++++++------------
 net/mac80211/tx.c          |  3 ++-
 net/mac80211/util.c        | 46 ++++++++++++++++++++++++++++++++++
 net/mac80211/wme.c         |  2 +-
 9 files changed, 72 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1c93b878b3..fa51293f270 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -304,9 +304,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@tc_verd: traffic control verdict
  *	@ndisc_nodetype: router type (from link layer)
  *	@do_not_encrypt: set to prevent encryption of this frame
- *	@requeue: set to indicate that the wireless core should attempt
- *		a software retry on this frame if we failed to
- *		receive an ACK for it
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
@@ -380,7 +377,6 @@ struct sk_buff {
 #endif
 #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
 	__u8			do_not_encrypt:1;
-	__u8			requeue:1;
 #endif
 	/* 0/13/14 bit hole */
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 17d61d19d91..c0610447697 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -239,6 +239,8 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211,
  *	used to indicate that a pending frame requires TX processing before
  *	it can be sent out.
+ * @IEEE80211_TX_INTFL_RETRIED: completely internal to mac80211,
+ *	used to indicate that a frame was already retried due to PS
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -256,6 +258,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(12),
 	IEEE80211_TX_INTFL_RCALGO		= BIT(13),
 	IEEE80211_TX_INTFL_NEED_TXPROCESSING	= BIT(14),
+	IEEE80211_TX_INTFL_RETRIED		= BIT(15),
 };
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 49961ba3c0f..b94d777e3eb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -552,7 +552,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->vlan_tci		= old->vlan_tci;
 #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
 	new->do_not_encrypt	= old->do_not_encrypt;
-	new->requeue		= old->requeue;
 #endif
 
 	skb_copy_secmark(new, old);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c088c46704a..4dbc2896419 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -589,6 +589,7 @@ enum queue_stop_reason {
 	IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
 	IEEE80211_QUEUE_STOP_REASON_SUSPEND,
 	IEEE80211_QUEUE_STOP_REASON_PENDING,
+	IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
 };
 
 struct ieee80211_master_priv {
@@ -1121,6 +1122,10 @@ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
 				    enum queue_stop_reason reason);
 void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
 				    enum queue_stop_reason reason);
+void ieee80211_add_pending_skb(struct ieee80211_local *local,
+			       struct sk_buff *skb);
+int ieee80211_add_pending_skbs(struct ieee80211_local *local,
+			       struct sk_buff_head *skbs);
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2683df91807..092a017b237 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -369,60 +369,12 @@ static void ieee80211_tasklet_handler(unsigned long data)
 	}
 }
 
-/* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to
- * make a prepared TX frame (one that has been given to hw) to look like brand
- * new IEEE 802.11 frame that is ready to go through TX processing again.
- */
-static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
-				      struct ieee80211_key *key,
-				      struct sk_buff *skb)
-{
-	unsigned int hdrlen, iv_len, mic_len;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
-
-	if (!key)
-		goto no_key;
-
-	switch (key->conf.alg) {
-	case ALG_WEP:
-		iv_len = WEP_IV_LEN;
-		mic_len = WEP_ICV_LEN;
-		break;
-	case ALG_TKIP:
-		iv_len = TKIP_IV_LEN;
-		mic_len = TKIP_ICV_LEN;
-		break;
-	case ALG_CCMP:
-		iv_len = CCMP_HDR_LEN;
-		mic_len = CCMP_MIC_LEN;
-		break;
-	default:
-		goto no_key;
-	}
-
-	if (skb->len >= hdrlen + mic_len &&
-	    !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		skb_trim(skb, skb->len - mic_len);
-	if (skb->len >= hdrlen + iv_len) {
-		memmove(skb->data + iv_len, skb->data, hdrlen);
-		hdr = (struct ieee80211_hdr *)skb_pull(skb, iv_len);
-	}
-
-no_key:
-	if (ieee80211_is_data_qos(hdr->frame_control)) {
-		hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
-		memmove(skb->data + IEEE80211_QOS_CTL_LEN, skb->data,
-			hdrlen - IEEE80211_QOS_CTL_LEN);
-		skb_pull(skb, IEEE80211_QOS_CTL_LEN);
-	}
-}
-
 static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 					    struct sta_info *sta,
 					    struct sk_buff *skb)
 {
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
 	sta->tx_filtered_count++;
 
 	/*
@@ -464,16 +416,15 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	 */
 	if (test_sta_flags(sta, WLAN_STA_PS) &&
 	    skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) {
-		ieee80211_remove_tx_extra(local, sta->key, skb);
 		skb_queue_tail(&sta->tx_filtered, skb);
 		return;
 	}
 
-	if (!test_sta_flags(sta, WLAN_STA_PS) && !skb->requeue) {
+	if (!test_sta_flags(sta, WLAN_STA_PS) &&
+	    !(info->flags & IEEE80211_TX_INTFL_RETRIED)) {
 		/* Software retry the packet once */
-		skb->requeue = 1;
-		ieee80211_remove_tx_extra(local, sta->key, skb);
-		dev_queue_xmit(skb);
+		info->flags |= IEEE80211_TX_INTFL_RETRIED;
+		ieee80211_add_pending_skb(local, skb);
 		return;
 	}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 75412518510..de5bba7f910 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -797,8 +797,7 @@ static int ap_sta_ps_end(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb;
-	int sent = 0;
+	int sent, buffered;
 
 	atomic_dec(&sdata->bss->num_sta_ps);
 
@@ -814,22 +813,16 @@ static int ap_sta_ps_end(struct sta_info *sta)
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
 
 	/* Send all buffered frames to the station */
-	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) {
-		sent++;
-		skb->requeue = 1;
-		dev_queue_xmit(skb);
-	}
-	while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) {
-		local->total_ps_buffered--;
-		sent++;
+	sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered);
+	buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf);
+	sent += buffered;
+	local->total_ps_buffered -= buffered;
+
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
-		printk(KERN_DEBUG "%s: STA %pM aid %d send PS frame "
-		       "since STA not sleeping anymore\n", sdata->dev->name,
-		       sta->sta.addr, sta->sta.aid);
+	printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames "
+	       "since STA not sleeping anymore\n", sdata->dev->name,
+	       sta->sta.addr, sta->sta.aid, sent - buffered, buffered);
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
-		skb->requeue = 1;
-		dev_queue_xmit(skb);
-	}
 
 	return sent;
 }
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1436f747531..bfaa9ce3314 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -400,6 +400,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 			sta_info_set_tim_bit(sta);
 
 		info->control.jiffies = jiffies;
+		info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 		skb_queue_tail(&sta->ps_tx_buf, tx->skb);
 		return TX_QUEUED;
 	}
@@ -420,7 +421,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		 * frame filtering and keeps a station  blacklist on its own
 		 * (e.g: p54), so that frames can be delivered unimpeded.
 		 *
-		 * Note: It should be save to disable the filter now.
+		 * Note: It should be safe to disable the filter now.
 		 * As, it is really unlikely that we still have any pending
 		 * frame for this station in the hw's buffers/fifos left,
 		 * that is not rejected with a unsuccessful tx_status yet.
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 22f63815fb3..66ce96a69f3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -341,6 +341,52 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue)
 }
 EXPORT_SYMBOL(ieee80211_stop_queue);
 
+void ieee80211_add_pending_skb(struct ieee80211_local *local,
+			       struct sk_buff *skb)
+{
+	struct ieee80211_hw *hw = &local->hw;
+	unsigned long flags;
+	int queue = skb_get_queue_mapping(skb);
+
+	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+	__ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+	__ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_PENDING);
+	skb_queue_tail(&local->pending[queue], skb);
+	__ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+}
+
+int ieee80211_add_pending_skbs(struct ieee80211_local *local,
+			       struct sk_buff_head *skbs)
+{
+	struct ieee80211_hw *hw = &local->hw;
+	struct sk_buff *skb;
+	unsigned long flags;
+	int queue, ret = 0, i;
+
+	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+	for (i = 0; i < hw->queues; i++)
+		__ieee80211_stop_queue(hw, i,
+			IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+
+	while ((skb = skb_dequeue(skbs))) {
+		ret++;
+		queue = skb_get_queue_mapping(skb);
+		skb_queue_tail(&local->pending[queue], skb);
+	}
+
+	for (i = 0; i < hw->queues; i++) {
+		if (ret)
+			__ieee80211_stop_queue(hw, i,
+				IEEE80211_QUEUE_STOP_REASON_PENDING);
+		__ieee80211_wake_queue(hw, i,
+			IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
+	}
+	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+
+	return ret;
+}
+
 void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
 				    enum queue_stop_reason reason)
 {
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 694343b9102..116a923b14d 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -101,7 +101,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 	 * Now we know the 1d priority, fill in the QoS header if
 	 * there is one (and we haven't done this before).
 	 */
-	if (!skb->requeue && ieee80211_is_data_qos(hdr->frame_control)) {
+	if (ieee80211_is_data_qos(hdr->frame_control)) {
 		u8 *p = ieee80211_get_qos_ctl(hdr);
 		u8 ack_policy = 0;
 		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
-- 
cgit v1.2.3-70-g09d2


From b3fa1329eaf2a7b97124dacf5b663fd51346ac19 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Mon, 8 Jun 2009 13:27:27 +0100
Subject: rfkill: remove set_global_sw_state

rfkill_set_global_sw_state() (previously rfkill_set_default()) will no
longer be exported by the rewritten rfkill core.

Instead, platform drivers which can provide persistent soft-rfkill state
across power-down/reboot should indicate their initial state by calling
rfkill_set_sw_state() before registration.  Otherwise, they will be
initialized to a default value during registration by a set_block call.

We remove existing calls to rfkill_set_sw_state() which happen before
registration, since these had no effect in the old model.  If these
drivers do have persistent state, the calls can be put back (subject
to testing :-).  This affects hp-wmi and acer-wmi.

Drivers with persistent state will affect the global state only if
rfkill-input is enabled.  This is required, otherwise booting with
wireless soft-blocked and pressing the wireless-toggle key once would
have no apparent effect.  This special case will be removed in future
along with rfkill-input, in favour of a more flexible userspace daemon
(see Documentation/feature-removal-schedule.txt).

Now rfkill_global_states[n].def is only used to preserve global states
over EPO, it is renamed to ".sav".

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/platform/x86/acer-wmi.c      |  3 --
 drivers/platform/x86/eeepc-laptop.c  |  8 ++--
 drivers/platform/x86/hp-wmi.c        |  4 --
 drivers/platform/x86/thinkpad_acpi.c | 31 +++++++-------
 include/linux/rfkill.h               | 28 ++++---------
 net/rfkill/core.c                    | 81 +++++++++++++-----------------------
 6 files changed, 56 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index b618fa51db2..09a503e5da6 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -988,7 +988,6 @@ static struct rfkill *acer_rfkill_register(struct device *dev,
 					   char *name, u32 cap)
 {
 	int err;
-	u32 state;
 	struct rfkill *rfkill_dev;
 
 	rfkill_dev = rfkill_alloc(name, dev, type,
@@ -996,8 +995,6 @@ static struct rfkill *acer_rfkill_register(struct device *dev,
 				  (void *)(unsigned long)cap);
 	if (!rfkill_dev)
 		return ERR_PTR(-ENOMEM);
-	get_u32(&state, cap);
-	rfkill_set_sw_state(rfkill_dev, !state);
 
 	err = rfkill_register(rfkill_dev);
 	if (err) {
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 1208d0cedd1..03bf522bd7a 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -675,8 +675,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
 		if (!ehotk->eeepc_wlan_rfkill)
 			goto wlan_fail;
 
-		rfkill_set_global_sw_state(RFKILL_TYPE_WLAN,
-					   get_acpi(CM_ASL_WLAN) != 1);
+		rfkill_set_sw_state(ehotk->eeepc_wlan_rfkill,
+				    get_acpi(CM_ASL_WLAN) != 1);
 		result = rfkill_register(ehotk->eeepc_wlan_rfkill);
 		if (result)
 			goto wlan_fail;
@@ -693,8 +693,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
 		if (!ehotk->eeepc_bluetooth_rfkill)
 			goto bluetooth_fail;
 
-		rfkill_set_global_sw_state(RFKILL_TYPE_BLUETOOTH,
-					   get_acpi(CM_ASL_BLUETOOTH) != 1);
+		rfkill_set_sw_state(ehotk->eeepc_bluetooth_rfkill,
+				    get_acpi(CM_ASL_BLUETOOTH) != 1);
 		result = rfkill_register(ehotk->eeepc_bluetooth_rfkill);
 		if (result)
 			goto bluetooth_fail;
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 8d931145cbf..16fffe44e33 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -422,7 +422,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 					   RFKILL_TYPE_WLAN,
 					   &hp_wmi_rfkill_ops,
 					   (void *) 0);
-		rfkill_set_sw_state(wifi_rfkill, hp_wmi_wifi_state());
 		err = rfkill_register(wifi_rfkill);
 		if (err)
 			goto register_wifi_error;
@@ -433,8 +432,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 						RFKILL_TYPE_BLUETOOTH,
 						&hp_wmi_rfkill_ops,
 						(void *) 1);
-		rfkill_set_sw_state(bluetooth_rfkill,
-				    hp_wmi_bluetooth_state());
 		err = rfkill_register(bluetooth_rfkill);
 		if (err)
 			goto register_bluetooth_error;
@@ -445,7 +442,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 					   RFKILL_TYPE_WWAN,
 					   &hp_wmi_rfkill_ops,
 					   (void *) 2);
-		rfkill_set_sw_state(wwan_rfkill, hp_wmi_wwan_state());
 		err = rfkill_register(wwan_rfkill);
 		if (err)
 			goto register_wwan_err;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index cfcafa4e947..86e958539f4 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1168,21 +1168,6 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id,
 
 	BUG_ON(id >= TPACPI_RFK_SW_MAX || tpacpi_rfkill_switches[id]);
 
-	initial_sw_status = (tp_rfkops->get_status)();
-	if (initial_sw_status < 0) {
-		printk(TPACPI_ERR
-			"failed to read initial state for %s, error %d; "
-			"will turn radio off\n", name, initial_sw_status);
-	} else {
-		initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF);
-		if (set_default) {
-			/* try to set the initial state as the default for the
-			 * rfkill type, since we ask the firmware to preserve
-			 * it across S5 in NVRAM */
-			rfkill_set_global_sw_state(rfktype, initial_sw_state);
-		}
-	}
-
 	atp_rfk = kzalloc(sizeof(struct tpacpi_rfk), GFP_KERNEL);
 	if (atp_rfk)
 		atp_rfk->rfkill = rfkill_alloc(name,
@@ -1200,8 +1185,20 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id,
 	atp_rfk->id = id;
 	atp_rfk->ops = tp_rfkops;
 
-	rfkill_set_states(atp_rfk->rfkill, initial_sw_state,
-				tpacpi_rfk_check_hwblock_state());
+	initial_sw_status = (tp_rfkops->get_status)();
+	if (initial_sw_status < 0) {
+		printk(TPACPI_ERR
+			"failed to read initial state for %s, error %d\n",
+			name, initial_sw_status);
+	} else {
+		initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF);
+		if (set_default) {
+			/* try to keep the initial state, since we ask the
+			 * firmware to preserve it across S5 in NVRAM */
+			rfkill_set_sw_state(atp_rfk->rfkill, initial_sw_state);
+		}
+	}
+	rfkill_set_hw_state(atp_rfk->rfkill, tpacpi_rfk_check_hwblock_state());
 
 	res = rfkill_register(atp_rfk->rfkill);
 	if (res < 0) {
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index d7e818ad0bc..c1dca0b8138 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -157,8 +157,14 @@ struct rfkill * __must_check rfkill_alloc(const char *name,
  * @rfkill: rfkill structure to be registered
  *
  * This function should be called by the transmitter driver to register
- * the rfkill structure needs to be registered. Before calling this function
- * the driver needs to be ready to service method calls from rfkill.
+ * the rfkill structure. Before calling this function the driver needs
+ * to be ready to service method calls from rfkill.
+ *
+ * If the software blocked state is not set before registration,
+ * set_block will be called to initialize it to a default value.
+ *
+ * If the hardware blocked state is not set before registration,
+ * it is assumed to be unblocked.
  */
 int __must_check rfkill_register(struct rfkill *rfkill);
 
@@ -250,19 +256,6 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
  */
 void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
 
-/**
- * rfkill_set_global_sw_state - set global sw block default
- * @type: rfkill type to set default for
- * @blocked: default to set
- *
- * This function sets the global default -- use at boot if your platform has
- * an rfkill switch. If not early enough this call may be ignored.
- *
- * XXX: instead of ignoring -- how about just updating all currently
- *	registered drivers?
- */
-void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked);
-
 /**
  * rfkill_blocked - query rfkill block
  *
@@ -317,11 +310,6 @@ static inline void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
 {
 }
 
-static inline void rfkill_set_global_sw_state(const enum rfkill_type type,
-					      bool blocked)
-{
-}
-
 static inline bool rfkill_blocked(struct rfkill *rfkill)
 {
 	return false;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index e161ebc40a3..fa430bd03f1 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -57,6 +57,7 @@ struct rfkill {
 
 	bool			registered;
 	bool			suspended;
+	bool			persistent;
 
 	const struct rfkill_ops	*ops;
 	void			*data;
@@ -116,11 +117,9 @@ MODULE_PARM_DESC(default_state,
 		 "Default initial state for all radio types, 0 = radio off");
 
 static struct {
-	bool cur, def;
+	bool cur, sav;
 } rfkill_global_states[NUM_RFKILL_TYPES];
 
-static unsigned long rfkill_states_default_locked;
-
 static bool rfkill_epo_lock_active;
 
 
@@ -392,7 +391,7 @@ void rfkill_epo(void)
 		rfkill_set_block(rfkill, true);
 
 	for (i = 0; i < NUM_RFKILL_TYPES; i++) {
-		rfkill_global_states[i].def = rfkill_global_states[i].cur;
+		rfkill_global_states[i].sav = rfkill_global_states[i].cur;
 		rfkill_global_states[i].cur = true;
 	}
 
@@ -417,7 +416,7 @@ void rfkill_restore_states(void)
 
 	rfkill_epo_lock_active = false;
 	for (i = 0; i < NUM_RFKILL_TYPES; i++)
-		__rfkill_switch_all(i, rfkill_global_states[i].def);
+		__rfkill_switch_all(i, rfkill_global_states[i].sav);
 	mutex_unlock(&rfkill_global_mutex);
 }
 
@@ -464,29 +463,6 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
 }
 #endif
 
-void rfkill_set_global_sw_state(const enum rfkill_type type, bool blocked)
-{
-	BUG_ON(type == RFKILL_TYPE_ALL);
-
-	mutex_lock(&rfkill_global_mutex);
-
-	/* don't allow unblock when epo */
-	if (rfkill_epo_lock_active && !blocked)
-		goto out;
-
-	/* too late */
-	if (rfkill_states_default_locked & BIT(type))
-		goto out;
-
-	rfkill_states_default_locked |= BIT(type);
-
-	rfkill_global_states[type].cur = blocked;
-	rfkill_global_states[type].def = blocked;
- out:
-	mutex_unlock(&rfkill_global_mutex);
-}
-EXPORT_SYMBOL(rfkill_set_global_sw_state);
-
 
 bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
 {
@@ -532,13 +508,14 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
 	blocked = blocked || hwblock;
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	if (!rfkill->registered)
-		return blocked;
+	if (!rfkill->registered) {
+		rfkill->persistent = true;
+	} else {
+		if (prev != blocked && !hwblock)
+			schedule_work(&rfkill->uevent_work);
 
-	if (prev != blocked && !hwblock)
-		schedule_work(&rfkill->uevent_work);
-
-	rfkill_led_trigger_event(rfkill);
+		rfkill_led_trigger_event(rfkill);
+	}
 
 	return blocked;
 }
@@ -563,13 +540,14 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
 
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	if (!rfkill->registered)
-		return;
-
-	if (swprev != sw || hwprev != hw)
-		schedule_work(&rfkill->uevent_work);
+	if (!rfkill->registered) {
+		rfkill->persistent = true;
+	} else {
+		if (swprev != sw || hwprev != hw)
+			schedule_work(&rfkill->uevent_work);
 
-	rfkill_led_trigger_event(rfkill);
+		rfkill_led_trigger_event(rfkill);
+	}
 }
 EXPORT_SYMBOL(rfkill_set_states);
 
@@ -888,15 +866,6 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 	dev_set_name(dev, "rfkill%lu", rfkill_no);
 	rfkill_no++;
 
-	if (!(rfkill_states_default_locked & BIT(rfkill->type))) {
-		/* first of its kind */
-		BUILD_BUG_ON(NUM_RFKILL_TYPES >
-			sizeof(rfkill_states_default_locked) * 8);
-		rfkill_states_default_locked |= BIT(rfkill->type);
-		rfkill_global_states[rfkill->type].cur =
-			rfkill_global_states[rfkill->type].def;
-	}
-
 	list_add_tail(&rfkill->node, &rfkill_list);
 
 	error = device_add(dev);
@@ -916,7 +885,17 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 	if (rfkill->ops->poll)
 		schedule_delayed_work(&rfkill->poll_work,
 			round_jiffies_relative(POLL_INTERVAL));
-	schedule_work(&rfkill->sync_work);
+
+	if (!rfkill->persistent || rfkill_epo_lock_active) {
+		schedule_work(&rfkill->sync_work);
+	} else {
+#ifdef CONFIG_RFKILL_INPUT
+		bool soft_blocked = !!(rfkill->state & RFKILL_BLOCK_SW);
+
+		if (!atomic_read(&rfkill_input_disabled))
+			__rfkill_switch_all(rfkill->type, soft_blocked);
+#endif
+	}
 
 	rfkill_send_events(rfkill, RFKILL_OP_ADD);
 
@@ -1193,7 +1172,7 @@ static int __init rfkill_init(void)
 	int i;
 
 	for (i = 0; i < NUM_RFKILL_TYPES; i++)
-		rfkill_global_states[i].def = !rfkill_default_state;
+		rfkill_global_states[i].cur = !rfkill_default_state;
 
 	error = class_register(&rfkill_class);
 	if (error)
-- 
cgit v1.2.3-70-g09d2


From 908209c160da8ecb68052111972b7a21310eac3f Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Mon, 8 Jun 2009 13:12:23 +0100
Subject: rfkill: don't impose global states on resume (just restore the
 previous states)

Once rfkill-input is disabled, the "global" states will only be used as
default initial states.

Since the states will always be the same after resume, we shouldn't
generate events on resume.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 7 ++++---
 net/rfkill/core.c      | 6 +-----
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index c1dca0b8138..16e39c7a67f 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -212,7 +212,7 @@ void rfkill_destroy(struct rfkill *rfkill);
  *
  * rfkill drivers that get events when the hard-blocked state changes
  * use this function to notify the rfkill core (and through that also
- * userspace) of the current state -- they should also use this after
+ * userspace) of the current state.  They should also use this after
  * resume if the state could have changed.
  *
  * You need not (but may) call this function if poll_state is assigned.
@@ -234,8 +234,9 @@ bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
  * rfkill drivers that get events when the soft-blocked state changes
  * (yes, some platforms directly act on input but allow changing again)
  * use this function to notify the rfkill core (and through that also
- * userspace) of the current state -- they should also use this after
- * resume if the state could have changed.
+ * userspace) of the current state.  It is not necessary to notify on
+ * resume; since hibernation can always change the soft-blocked state,
+ * the rfkill core will unconditionally restore the previous state.
  *
  * This function can be called in any context, even from within rfkill
  * callbacks.
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index fa430bd03f1..4e68ab439d5 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -728,15 +728,11 @@ static int rfkill_resume(struct device *dev)
 	struct rfkill *rfkill = to_rfkill(dev);
 	bool cur;
 
-	mutex_lock(&rfkill_global_mutex);
-	cur = rfkill_global_states[rfkill->type].cur;
+	cur = !!(rfkill->state & RFKILL_BLOCK_SW);
 	rfkill_set_block(rfkill, cur);
-	mutex_unlock(&rfkill_global_mutex);
 
 	rfkill->suspended = false;
 
-	schedule_work(&rfkill->uevent_work);
-
 	rfkill_resume_polling(rfkill);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 6ff9a64d2aaa6eae396adc95e9c91c0cbfa6dbe4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Jun 2009 14:28:34 -0400
Subject: tracing: do not translate event helper macros in print format

By moving the macro that creates the print format code above the
defining of the event macro helpers (__get_str, __print_symbolic,
and __get_dynamic_array), we get a little cleaner print format.

Instead of:

  (char *)((void *)REC + REC->__data_loc_name)

we get:

   __get_str(name)

Instead of:

   ({ static const struct trace_print_flags symbols[] = { { HI_SOFTIRQ, "HI" }, {

we get:

   __print_symbolic(REC->vec, { HI_SOFTIRQ, "HI" }, {

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 158 +++++++++++++++++++++++++------------------------
 1 file changed, 81 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 40ede4db4d8..1867553c61e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -80,6 +80,87 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+/*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *	struct ftrace_raw_##call field;
+ *	int ret;
+ *
+ *	ret = trace_seq_printf(s, #type " " #item ";"
+ *			       " offset:%u; size:%u;\n",
+ *			       offsetof(struct ftrace_raw_##call, item),
+ *			       sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)					\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __array
+#define __array(type, item, len)						\
+	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
+			       "offset:%u;\tsize:%u;\n",		\
+			       (unsigned int)offsetof(typeof(field), item), \
+			       (unsigned int)sizeof(field.item));	\
+	if (!ret)							\
+		return 0;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)				       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
+			       "offset:%u;\tsize:%u;\n",		       \
+			       (unsigned int)offsetof(typeof(field),	       \
+					__data_loc_##item),		       \
+			       (unsigned int)sizeof(field.__data_loc_##item)); \
+	if (!ret)							       \
+		return 0;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef __entry
+#define __entry REC
+
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
+static int								\
+ftrace_format_##call(struct trace_seq *s)				\
+{									\
+	struct ftrace_raw_##call field __attribute__((unused));		\
+	int ret = 0;							\
+									\
+	tstruct;							\
+									\
+	trace_seq_printf(s, "\nprint fmt: " print);			\
+									\
+	return ret;							\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
 /*
  * Stage 3 of the trace events.
  *
@@ -179,83 +260,6 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\n",		\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item));	\
-	if (!ret)							\
-		return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item, len)				       \
-	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
-			       "offset:%u;\tsize:%u;\n",		       \
-			       (unsigned int)offsetof(typeof(field),	       \
-					__data_loc_##item),		       \
-			       (unsigned int)sizeof(field.__data_loc_##item)); \
-	if (!ret)							       \
-		return 0;
-
-#undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-static int								\
-ftrace_format_##call(struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field __attribute__((unused));		\
-	int ret = 0;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-
 #undef __field
 #define __field(type, item)						\
 	ret = trace_define_field(event_call, #type, #item,		\
-- 
cgit v1.2.3-70-g09d2


From d9c7d394a8ebacb60097b192939ae9f15235225e Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@novell.com>
Date: Wed, 10 Jun 2009 12:57:06 -0700
Subject: block: prevent possible io_context->refcount overflow

Currently io_context has an atomic_t(32-bit) as refcount.  In the case of
cfq, for each device against whcih a task does I/O, a reference to the
io_context would be taken.  And when there are multiple process sharing
io_contexts(CLONE_IO) would also have a reference to the same io_context.

Theoretically the possible maximum number of processes sharing the same
io_context + the number of disks/cfq_data referring to the same io_context
can overflow the 32-bit counter on a very high-end machine.

Even though it is an improbable case, let us make it atomic_long_t.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           | 12 ++++++------
 block/cfq-iosched.c       |  2 +-
 include/linux/iocontext.h |  6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 012f065ac8e..d4ed6000147 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -35,9 +35,9 @@ int put_io_context(struct io_context *ioc)
 	if (ioc == NULL)
 		return 1;
 
-	BUG_ON(atomic_read(&ioc->refcount) == 0);
+	BUG_ON(atomic_long_read(&ioc->refcount) == 0);
 
-	if (atomic_dec_and_test(&ioc->refcount)) {
+	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
 		if (ioc->aic && ioc->aic->dtor)
 			ioc->aic->dtor(ioc->aic);
@@ -90,7 +90,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 
 	ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
 	if (ret) {
-		atomic_set(&ret->refcount, 1);
+		atomic_long_set(&ret->refcount, 1);
 		atomic_set(&ret->nr_tasks, 1);
 		spin_lock_init(&ret->lock);
 		ret->ioprio_changed = 0;
@@ -151,7 +151,7 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node)
 		ret = current_io_context(gfp_flags, node);
 		if (unlikely(!ret))
 			break;
-	} while (!atomic_inc_not_zero(&ret->refcount));
+	} while (!atomic_long_inc_not_zero(&ret->refcount));
 
 	return ret;
 }
@@ -163,8 +163,8 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc)
 	struct io_context *dst = *pdst;
 
 	if (src) {
-		BUG_ON(atomic_read(&src->refcount) == 0);
-		atomic_inc(&src->refcount);
+		BUG_ON(atomic_long_read(&src->refcount) == 0);
+		atomic_long_inc(&src->refcount);
 		put_io_context(dst);
 		*pdst = src;
 	}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 99ac4304d71..ef2f72d4243 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1282,7 +1282,7 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	if (!cfqd->active_cic) {
 		struct cfq_io_context *cic = RQ_CIC(rq);
 
-		atomic_inc(&cic->ioc->refcount);
+		atomic_long_inc(&cic->ioc->refcount);
 		cfqd->active_cic = cic;
 	}
 }
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 08b987bccf8..dd05434fa45 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -64,7 +64,7 @@ struct cfq_io_context {
  * and kmalloc'ed. These could be shared between processes.
  */
 struct io_context {
-	atomic_t refcount;
+	atomic_long_t refcount;
 	atomic_t nr_tasks;
 
 	/* all the fields below are protected by this lock */
@@ -91,8 +91,8 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 	 * if ref count is zero, don't allow sharing (ioc is going away, it's
 	 * a race).
 	 */
-	if (ioc && atomic_inc_not_zero(&ioc->refcount)) {
-		atomic_inc(&ioc->nr_tasks);
+	if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) {
+		atomic_long_inc(&ioc->refcount);
 		return ioc;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From b43d65f7e818485664037a46367cfb15af05bd8c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 9 Jun 2009 08:11:42 +0100
Subject: [ARM] 5546/1: ARM PL022 SSP/SPI driver v3

This adds a driver for the ARM PL022 PrimeCell SSP/SPI
driver found in the U300 platforms as well as in some
ARM reference hardware, and in a modified version on the
Nomadik board.

Reviewed-by: Alessandro Rubini <rubini-list@gnudd.com>
Reviewed-by: Russell King <linux@arm.linux.org.uk>
Reviewed-by: Baruch Siach <baruch@tkos.co.il>

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/spi/Kconfig        |    9 +
 drivers/spi/Makefile       |    1 +
 drivers/spi/amba-pl022.c   | 1866 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/amba/pl022.h |  264 +++++++
 4 files changed, 2140 insertions(+)
 create mode 100644 drivers/spi/amba-pl022.c
 create mode 100644 include/linux/amba/pl022.h

(limited to 'include')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 7c61251bea6..8e7c17e4461 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -171,6 +171,15 @@ config SPI_ORION
 	help
 	  This enables using the SPI master controller on the Orion chips.
 
+config SPI_PL022
+	tristate "ARM AMBA PL022 SSP controller (EXPERIMENTAL)"
+	depends on ARM_AMBA && EXPERIMENTAL
+	default y if MACH_U300
+	help
+	  This selects the ARM(R) AMBA(R) PrimeCell PL022 SSP
+	  controller. If you have an embedded system with an AMBA(R)
+	  bus and a PL022 controller, say Y or M here.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 5d0451936d8..ecfadb18048 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
 obj-$(CONFIG_SPI_OMAP24XX)		+= omap2_mcspi.o
 obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
+obj-$(CONFIG_SPI_PL022)			+= amba-pl022.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
 obj-$(CONFIG_SPI_MPC83xx)		+= spi_mpc83xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
new file mode 100644
index 00000000000..da76797ce8b
--- /dev/null
+++ b/drivers/spi/amba-pl022.c
@@ -0,0 +1,1866 @@
+/*
+ * drivers/spi/amba-pl022.c
+ *
+ * A driver for the ARM PL022 PrimeCell SSP/SPI bus master.
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * Copyright (C) 2006 STMicroelectronics Pvt. Ltd.
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * Initial version inspired by:
+ *	linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c
+ * Initial adoption to PL022 by:
+ *      Sachin Verma <sachin.verma@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * TODO:
+ * - add timeout on polled transfers
+ * - add generic DMA framework support
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/pl022.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+
+/*
+ * This macro is used to define some register default values.
+ * reg is masked with mask, the OR:ed with an (again masked)
+ * val shifted sb steps to the left.
+ */
+#define SSP_WRITE_BITS(reg, val, mask, sb) \
+ ((reg) = (((reg) & ~(mask)) | (((val)<<(sb)) & (mask))))
+
+/*
+ * This macro is also used to define some default values.
+ * It will just shift val by sb steps to the left and mask
+ * the result with mask.
+ */
+#define GEN_MASK_BITS(val, mask, sb) \
+ (((val)<<(sb)) & (mask))
+
+#define DRIVE_TX		0
+#define DO_NOT_DRIVE_TX		1
+
+#define DO_NOT_QUEUE_DMA	0
+#define QUEUE_DMA		1
+
+#define RX_TRANSFER		1
+#define TX_TRANSFER		2
+
+/*
+ * Macros to access SSP Registers with their offsets
+ */
+#define SSP_CR0(r)	(r + 0x000)
+#define SSP_CR1(r)	(r + 0x004)
+#define SSP_DR(r)	(r + 0x008)
+#define SSP_SR(r)	(r + 0x00C)
+#define SSP_CPSR(r)	(r + 0x010)
+#define SSP_IMSC(r)	(r + 0x014)
+#define SSP_RIS(r)	(r + 0x018)
+#define SSP_MIS(r)	(r + 0x01C)
+#define SSP_ICR(r)	(r + 0x020)
+#define SSP_DMACR(r)	(r + 0x024)
+#define SSP_ITCR(r)	(r + 0x080)
+#define SSP_ITIP(r)	(r + 0x084)
+#define SSP_ITOP(r)	(r + 0x088)
+#define SSP_TDR(r)	(r + 0x08C)
+
+#define SSP_PID0(r)	(r + 0xFE0)
+#define SSP_PID1(r)	(r + 0xFE4)
+#define SSP_PID2(r)	(r + 0xFE8)
+#define SSP_PID3(r)	(r + 0xFEC)
+
+#define SSP_CID0(r)	(r + 0xFF0)
+#define SSP_CID1(r)	(r + 0xFF4)
+#define SSP_CID2(r)	(r + 0xFF8)
+#define SSP_CID3(r)	(r + 0xFFC)
+
+/*
+ * SSP Control Register 0  - SSP_CR0
+ */
+#define SSP_CR0_MASK_DSS	(0x1FUL << 0)
+#define SSP_CR0_MASK_HALFDUP	(0x1UL << 5)
+#define SSP_CR0_MASK_SPO	(0x1UL << 6)
+#define SSP_CR0_MASK_SPH	(0x1UL << 7)
+#define SSP_CR0_MASK_SCR	(0xFFUL << 8)
+#define SSP_CR0_MASK_CSS	(0x1FUL << 16)
+#define SSP_CR0_MASK_FRF	(0x3UL << 21)
+
+/*
+ * SSP Control Register 0  - SSP_CR1
+ */
+#define SSP_CR1_MASK_LBM	(0x1UL << 0)
+#define SSP_CR1_MASK_SSE	(0x1UL << 1)
+#define SSP_CR1_MASK_MS		(0x1UL << 2)
+#define SSP_CR1_MASK_SOD	(0x1UL << 3)
+#define SSP_CR1_MASK_RENDN	(0x1UL << 4)
+#define SSP_CR1_MASK_TENDN	(0x1UL << 5)
+#define SSP_CR1_MASK_MWAIT	(0x1UL << 6)
+#define SSP_CR1_MASK_RXIFLSEL	(0x7UL << 7)
+#define SSP_CR1_MASK_TXIFLSEL	(0x7UL << 10)
+
+/*
+ * SSP Data Register - SSP_DR
+ */
+#define SSP_DR_MASK_DATA	0xFFFFFFFF
+
+/*
+ * SSP Status Register - SSP_SR
+ */
+#define SSP_SR_MASK_TFE		(0x1UL << 0) /* Transmit FIFO empty */
+#define SSP_SR_MASK_TNF		(0x1UL << 1) /* Transmit FIFO not full */
+#define SSP_SR_MASK_RNE		(0x1UL << 2) /* Receive FIFO not empty */
+#define SSP_SR_MASK_RFF 	(0x1UL << 3) /* Receive FIFO full */
+#define SSP_SR_MASK_BSY		(0x1UL << 4) /* Busy Flag */
+
+/*
+ * SSP Clock Prescale Register  - SSP_CPSR
+ */
+#define SSP_CPSR_MASK_CPSDVSR	(0xFFUL << 0)
+
+/*
+ * SSP Interrupt Mask Set/Clear Register - SSP_IMSC
+ */
+#define SSP_IMSC_MASK_RORIM (0x1UL << 0) /* Receive Overrun Interrupt mask */
+#define SSP_IMSC_MASK_RTIM  (0x1UL << 1) /* Receive timeout Interrupt mask */
+#define SSP_IMSC_MASK_RXIM  (0x1UL << 2) /* Receive FIFO Interrupt mask */
+#define SSP_IMSC_MASK_TXIM  (0x1UL << 3) /* Transmit FIFO Interrupt mask */
+
+/*
+ * SSP Raw Interrupt Status Register - SSP_RIS
+ */
+/* Receive Overrun Raw Interrupt status */
+#define SSP_RIS_MASK_RORRIS		(0x1UL << 0)
+/* Receive Timeout Raw Interrupt status */
+#define SSP_RIS_MASK_RTRIS		(0x1UL << 1)
+/* Receive FIFO Raw Interrupt status */
+#define SSP_RIS_MASK_RXRIS		(0x1UL << 2)
+/* Transmit FIFO Raw Interrupt status */
+#define SSP_RIS_MASK_TXRIS		(0x1UL << 3)
+
+/*
+ * SSP Masked Interrupt Status Register - SSP_MIS
+ */
+/* Receive Overrun Masked Interrupt status */
+#define SSP_MIS_MASK_RORMIS		(0x1UL << 0)
+/* Receive Timeout Masked Interrupt status */
+#define SSP_MIS_MASK_RTMIS		(0x1UL << 1)
+/* Receive FIFO Masked Interrupt status */
+#define SSP_MIS_MASK_RXMIS		(0x1UL << 2)
+/* Transmit FIFO Masked Interrupt status */
+#define SSP_MIS_MASK_TXMIS		(0x1UL << 3)
+
+/*
+ * SSP Interrupt Clear Register - SSP_ICR
+ */
+/* Receive Overrun Raw Clear Interrupt bit */
+#define SSP_ICR_MASK_RORIC		(0x1UL << 0)
+/* Receive Timeout Clear Interrupt bit */
+#define SSP_ICR_MASK_RTIC		(0x1UL << 1)
+
+/*
+ * SSP DMA Control Register - SSP_DMACR
+ */
+/* Receive DMA Enable bit */
+#define SSP_DMACR_MASK_RXDMAE		(0x1UL << 0)
+/* Transmit DMA Enable bit */
+#define SSP_DMACR_MASK_TXDMAE		(0x1UL << 1)
+
+/*
+ * SSP Integration Test control Register - SSP_ITCR
+ */
+#define SSP_ITCR_MASK_ITEN		(0x1UL << 0)
+#define SSP_ITCR_MASK_TESTFIFO		(0x1UL << 1)
+
+/*
+ * SSP Integration Test Input Register - SSP_ITIP
+ */
+#define ITIP_MASK_SSPRXD		 (0x1UL << 0)
+#define ITIP_MASK_SSPFSSIN		 (0x1UL << 1)
+#define ITIP_MASK_SSPCLKIN		 (0x1UL << 2)
+#define ITIP_MASK_RXDMAC		 (0x1UL << 3)
+#define ITIP_MASK_TXDMAC		 (0x1UL << 4)
+#define ITIP_MASK_SSPTXDIN		 (0x1UL << 5)
+
+/*
+ * SSP Integration Test output Register - SSP_ITOP
+ */
+#define ITOP_MASK_SSPTXD		 (0x1UL << 0)
+#define ITOP_MASK_SSPFSSOUT		 (0x1UL << 1)
+#define ITOP_MASK_SSPCLKOUT		 (0x1UL << 2)
+#define ITOP_MASK_SSPOEn		 (0x1UL << 3)
+#define ITOP_MASK_SSPCTLOEn		 (0x1UL << 4)
+#define ITOP_MASK_RORINTR		 (0x1UL << 5)
+#define ITOP_MASK_RTINTR		 (0x1UL << 6)
+#define ITOP_MASK_RXINTR		 (0x1UL << 7)
+#define ITOP_MASK_TXINTR		 (0x1UL << 8)
+#define ITOP_MASK_INTR			 (0x1UL << 9)
+#define ITOP_MASK_RXDMABREQ		 (0x1UL << 10)
+#define ITOP_MASK_RXDMASREQ		 (0x1UL << 11)
+#define ITOP_MASK_TXDMABREQ		 (0x1UL << 12)
+#define ITOP_MASK_TXDMASREQ		 (0x1UL << 13)
+
+/*
+ * SSP Test Data Register - SSP_TDR
+ */
+#define TDR_MASK_TESTDATA 		(0xFFFFFFFF)
+
+/*
+ * Message State
+ * we use the spi_message.state (void *) pointer to
+ * hold a single state value, that's why all this
+ * (void *) casting is done here.
+ */
+#define STATE_START                     ((void *) 0)
+#define STATE_RUNNING                   ((void *) 1)
+#define STATE_DONE                      ((void *) 2)
+#define STATE_ERROR                     ((void *) -1)
+
+/*
+ * Queue State
+ */
+#define QUEUE_RUNNING                   (0)
+#define QUEUE_STOPPED                   (1)
+/*
+ * SSP State - Whether Enabled or Disabled
+ */
+#define SSP_DISABLED 			(0)
+#define SSP_ENABLED 			(1)
+
+/*
+ * SSP DMA State - Whether DMA Enabled or Disabled
+ */
+#define SSP_DMA_DISABLED 		(0)
+#define SSP_DMA_ENABLED 		(1)
+
+/*
+ * SSP Clock Defaults
+ */
+#define NMDK_SSP_DEFAULT_CLKRATE 0x2
+#define NMDK_SSP_DEFAULT_PRESCALE 0x40
+
+/*
+ * SSP Clock Parameter ranges
+ */
+#define CPSDVR_MIN 0x02
+#define CPSDVR_MAX 0xFE
+#define SCR_MIN 0x00
+#define SCR_MAX 0xFF
+
+/*
+ * SSP Interrupt related Macros
+ */
+#define DEFAULT_SSP_REG_IMSC  0x0UL
+#define DISABLE_ALL_INTERRUPTS DEFAULT_SSP_REG_IMSC
+#define ENABLE_ALL_INTERRUPTS (~DEFAULT_SSP_REG_IMSC)
+
+#define CLEAR_ALL_INTERRUPTS  0x3
+
+
+/*
+ * The type of reading going on on this chip
+ */
+enum ssp_reading {
+	READING_NULL,
+	READING_U8,
+	READING_U16,
+	READING_U32
+};
+
+/**
+ * The type of writing going on on this chip
+ */
+enum ssp_writing {
+	WRITING_NULL,
+	WRITING_U8,
+	WRITING_U16,
+	WRITING_U32
+};
+
+/**
+ * struct vendor_data - vendor-specific config parameters
+ * for PL022 derivates
+ * @fifodepth: depth of FIFOs (both)
+ * @max_bpw: maximum number of bits per word
+ * @unidir: supports unidirection transfers
+ */
+struct vendor_data {
+	int fifodepth;
+	int max_bpw;
+	bool unidir;
+};
+
+/**
+ * struct pl022 - This is the private SSP driver data structure
+ * @adev: AMBA device model hookup
+ * @phybase: The physical memory where the SSP device resides
+ * @virtbase: The virtual memory where the SSP is mapped
+ * @master: SPI framework hookup
+ * @master_info: controller-specific data from machine setup
+ * @regs: SSP controller register's virtual address
+ * @pump_messages: Work struct for scheduling work to the workqueue
+ * @lock: spinlock to syncronise access to driver data
+ * @workqueue: a workqueue on which any spi_message request is queued
+ * @busy: workqueue is busy
+ * @run: workqueue is running
+ * @pump_transfers: Tasklet used in Interrupt Transfer mode
+ * @cur_msg: Pointer to current spi_message being processed
+ * @cur_transfer: Pointer to current spi_transfer
+ * @cur_chip: pointer to current clients chip(assigned from controller_state)
+ * @tx: current position in TX buffer to be read
+ * @tx_end: end position in TX buffer to be read
+ * @rx: current position in RX buffer to be written
+ * @rx_end: end position in RX buffer to be written
+ * @readingtype: the type of read currently going on
+ * @writingtype: the type or write currently going on
+ */
+struct pl022 {
+	struct amba_device		*adev;
+	struct vendor_data		*vendor;
+	resource_size_t			phybase;
+	void __iomem			*virtbase;
+	struct clk			*clk;
+	struct spi_master		*master;
+	struct pl022_ssp_controller	*master_info;
+	/* Driver message queue */
+	struct workqueue_struct		*workqueue;
+	struct work_struct		pump_messages;
+	spinlock_t			queue_lock;
+	struct list_head		queue;
+	int				busy;
+	int				run;
+	/* Message transfer pump */
+	struct tasklet_struct		pump_transfers;
+	struct spi_message		*cur_msg;
+	struct spi_transfer		*cur_transfer;
+	struct chip_data		*cur_chip;
+	void				*tx;
+	void				*tx_end;
+	void				*rx;
+	void				*rx_end;
+	enum ssp_reading		read;
+	enum ssp_writing		write;
+};
+
+/**
+ * struct chip_data - To maintain runtime state of SSP for each client chip
+ * @cr0: Value of control register CR0 of SSP
+ * @cr1: Value of control register CR1 of SSP
+ * @dmacr: Value of DMA control Register of SSP
+ * @cpsr: Value of Clock prescale register
+ * @n_bytes: how many bytes(power of 2) reqd for a given data width of client
+ * @enable_dma: Whether to enable DMA or not
+ * @write: function ptr to be used to write when doing xfer for this chip
+ * @read: function ptr to be used to read when doing xfer for this chip
+ * @cs_control: chip select callback provided by chip
+ * @xfer_type: polling/interrupt/DMA
+ *
+ * Runtime state of the SSP controller, maintained per chip,
+ * This would be set according to the current message that would be served
+ */
+struct chip_data {
+	u16 cr0;
+	u16 cr1;
+	u16 dmacr;
+	u16 cpsr;
+	u8 n_bytes;
+	u8 enable_dma:1;
+	enum ssp_reading read;
+	enum ssp_writing write;
+	void (*cs_control) (u32 command);
+	int xfer_type;
+};
+
+/**
+ * null_cs_control - Dummy chip select function
+ * @command: select/delect the chip
+ *
+ * If no chip select function is provided by client this is used as dummy
+ * chip select
+ */
+static void null_cs_control(u32 command)
+{
+	pr_debug("pl022: dummy chip select control, CS=0x%x\n", command);
+}
+
+/**
+ * giveback - current spi_message is over, schedule next message and call
+ * callback of this message. Assumes that caller already
+ * set message->status; dma and pio irqs are blocked
+ * @pl022: SSP driver private data structure
+ */
+static void giveback(struct pl022 *pl022)
+{
+	struct spi_transfer *last_transfer;
+	unsigned long flags;
+	struct spi_message *msg;
+	void (*curr_cs_control) (u32 command);
+
+	/*
+	 * This local reference to the chip select function
+	 * is needed because we set curr_chip to NULL
+	 * as a step toward termininating the message.
+	 */
+	curr_cs_control = pl022->cur_chip->cs_control;
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+	msg = pl022->cur_msg;
+	pl022->cur_msg = NULL;
+	pl022->cur_transfer = NULL;
+	pl022->cur_chip = NULL;
+	queue_work(pl022->workqueue, &pl022->pump_messages);
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	last_transfer = list_entry(msg->transfers.prev,
+					struct spi_transfer,
+					transfer_list);
+
+	/* Delay if requested before any change in chip select */
+	if (last_transfer->delay_usecs)
+		/*
+		 * FIXME: This runs in interrupt context.
+		 * Is this really smart?
+		 */
+		udelay(last_transfer->delay_usecs);
+
+	/*
+	 * Drop chip select UNLESS cs_change is true or we are returning
+	 * a message with an error, or next message is for another chip
+	 */
+	if (!last_transfer->cs_change)
+		curr_cs_control(SSP_CHIP_DESELECT);
+	else {
+		struct spi_message *next_msg;
+
+		/* Holding of cs was hinted, but we need to make sure
+		 * the next message is for the same chip.  Don't waste
+		 * time with the following tests unless this was hinted.
+		 *
+		 * We cannot postpone this until pump_messages, because
+		 * after calling msg->complete (below) the driver that
+		 * sent the current message could be unloaded, which
+		 * could invalidate the cs_control() callback...
+		 */
+
+		/* get a pointer to the next message, if any */
+		spin_lock_irqsave(&pl022->queue_lock, flags);
+		if (list_empty(&pl022->queue))
+			next_msg = NULL;
+		else
+			next_msg = list_entry(pl022->queue.next,
+					struct spi_message, queue);
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+		/* see if the next and current messages point
+		 * to the same chip
+		 */
+		if (next_msg && next_msg->spi != msg->spi)
+			next_msg = NULL;
+		if (!next_msg || msg->state == STATE_ERROR)
+			curr_cs_control(SSP_CHIP_DESELECT);
+	}
+	msg->state = NULL;
+	if (msg->complete)
+		msg->complete(msg->context);
+	/* This message is completed, so let's turn off the clock! */
+	clk_disable(pl022->clk);
+}
+
+/**
+ * flush - flush the FIFO to reach a clean state
+ * @pl022: SSP driver private data structure
+ */
+static int flush(struct pl022 *pl022)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	dev_dbg(&pl022->adev->dev, "flush\n");
+	do {
+		while (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE)
+			readw(SSP_DR(pl022->virtbase));
+	} while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_BSY) && limit--);
+	return limit;
+}
+
+/**
+ * restore_state - Load configuration of current chip
+ * @pl022: SSP driver private data structure
+ */
+static void restore_state(struct pl022 *pl022)
+{
+	struct chip_data *chip = pl022->cur_chip;
+
+	writew(chip->cr0, SSP_CR0(pl022->virtbase));
+	writew(chip->cr1, SSP_CR1(pl022->virtbase));
+	writew(chip->dmacr, SSP_DMACR(pl022->virtbase));
+	writew(chip->cpsr, SSP_CPSR(pl022->virtbase));
+	writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+	writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+}
+
+/**
+ * load_ssp_default_config - Load default configuration for SSP
+ * @pl022: SSP driver private data structure
+ */
+
+/*
+ * Default SSP Register Values
+ */
+#define DEFAULT_SSP_REG_CR0 ( \
+	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0)	| \
+	GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
+	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
+	GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \
+	GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
+	GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16)	| \
+	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
+)
+
+#define DEFAULT_SSP_REG_CR1 ( \
+	GEN_MASK_BITS(LOOPBACK_DISABLED, SSP_CR1_MASK_LBM, 0) | \
+	GEN_MASK_BITS(SSP_DISABLED, SSP_CR1_MASK_SSE, 1) | \
+	GEN_MASK_BITS(SSP_MASTER, SSP_CR1_MASK_MS, 2) | \
+	GEN_MASK_BITS(DO_NOT_DRIVE_TX, SSP_CR1_MASK_SOD, 3) | \
+	GEN_MASK_BITS(SSP_RX_MSB, SSP_CR1_MASK_RENDN, 4) | \
+	GEN_MASK_BITS(SSP_TX_MSB, SSP_CR1_MASK_TENDN, 5) | \
+	GEN_MASK_BITS(SSP_MWIRE_WAIT_ZERO, SSP_CR1_MASK_MWAIT, 6) |\
+	GEN_MASK_BITS(SSP_RX_1_OR_MORE_ELEM, SSP_CR1_MASK_RXIFLSEL, 7) | \
+	GEN_MASK_BITS(SSP_TX_1_OR_MORE_EMPTY_LOC, SSP_CR1_MASK_TXIFLSEL, 10) \
+)
+
+#define DEFAULT_SSP_REG_CPSR ( \
+	GEN_MASK_BITS(NMDK_SSP_DEFAULT_PRESCALE, SSP_CPSR_MASK_CPSDVSR, 0) \
+)
+
+#define DEFAULT_SSP_REG_DMACR (\
+	GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_RXDMAE, 0) | \
+	GEN_MASK_BITS(SSP_DMA_DISABLED, SSP_DMACR_MASK_TXDMAE, 1) \
+)
+
+
+static void load_ssp_default_config(struct pl022 *pl022)
+{
+	writew(DEFAULT_SSP_REG_CR0, SSP_CR0(pl022->virtbase));
+	writew(DEFAULT_SSP_REG_CR1, SSP_CR1(pl022->virtbase));
+	writew(DEFAULT_SSP_REG_DMACR, SSP_DMACR(pl022->virtbase));
+	writew(DEFAULT_SSP_REG_CPSR, SSP_CPSR(pl022->virtbase));
+	writew(DISABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+	writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+}
+
+/**
+ * This will write to TX and read from RX according to the parameters
+ * set in pl022.
+ */
+static void readwriter(struct pl022 *pl022)
+{
+
+	/*
+	 * The FIFO depth is different inbetween primecell variants.
+	 * I believe filling in too much in the FIFO might cause
+	 * errons in 8bit wide transfers on ARM variants (just 8 words
+	 * FIFO, means only 8x8 = 64 bits in FIFO) at least.
+	 *
+	 * FIXME: currently we have no logic to account for this.
+	 * perhaps there is even something broken in HW regarding
+	 * 8bit transfers (it doesn't fail on 16bit) so this needs
+	 * more investigation...
+	 */
+	dev_dbg(&pl022->adev->dev,
+		"%s, rx: %p, rxend: %p, tx: %p, txend: %p\n",
+		__func__, pl022->rx, pl022->rx_end, pl022->tx, pl022->tx_end);
+
+	/* Read as much as you can */
+	while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE)
+	       && (pl022->rx < pl022->rx_end)) {
+		switch (pl022->read) {
+		case READING_NULL:
+			readw(SSP_DR(pl022->virtbase));
+			break;
+		case READING_U8:
+			*(u8 *) (pl022->rx) =
+				readw(SSP_DR(pl022->virtbase)) & 0xFFU;
+			break;
+		case READING_U16:
+			*(u16 *) (pl022->rx) =
+				(u16) readw(SSP_DR(pl022->virtbase));
+			break;
+		case READING_U32:
+			*(u32 *) (pl022->rx) =
+				readl(SSP_DR(pl022->virtbase));
+			break;
+		}
+		pl022->rx += (pl022->cur_chip->n_bytes);
+	}
+	/*
+	 * Write as much as you can, while keeping an eye on the RX FIFO!
+	 */
+	while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
+	       && (pl022->tx < pl022->tx_end)) {
+		switch (pl022->write) {
+		case WRITING_NULL:
+			writew(0x0, SSP_DR(pl022->virtbase));
+			break;
+		case WRITING_U8:
+			writew(*(u8 *) (pl022->tx), SSP_DR(pl022->virtbase));
+			break;
+		case WRITING_U16:
+			writew((*(u16 *) (pl022->tx)), SSP_DR(pl022->virtbase));
+			break;
+		case WRITING_U32:
+			writel(*(u32 *) (pl022->tx), SSP_DR(pl022->virtbase));
+			break;
+		}
+		pl022->tx += (pl022->cur_chip->n_bytes);
+		/*
+		 * This inner reader takes care of things appearing in the RX
+		 * FIFO as we're transmitting. This will happen a lot since the
+		 * clock starts running when you put things into the TX FIFO,
+		 * and then things are continously clocked into the RX FIFO.
+		 */
+		while ((readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RNE)
+		       && (pl022->rx < pl022->rx_end)) {
+			switch (pl022->read) {
+			case READING_NULL:
+				readw(SSP_DR(pl022->virtbase));
+				break;
+			case READING_U8:
+				*(u8 *) (pl022->rx) =
+					readw(SSP_DR(pl022->virtbase)) & 0xFFU;
+				break;
+			case READING_U16:
+				*(u16 *) (pl022->rx) =
+					(u16) readw(SSP_DR(pl022->virtbase));
+				break;
+			case READING_U32:
+				*(u32 *) (pl022->rx) =
+					readl(SSP_DR(pl022->virtbase));
+				break;
+			}
+			pl022->rx += (pl022->cur_chip->n_bytes);
+		}
+	}
+	/*
+	 * When we exit here the TX FIFO should be full and the RX FIFO
+	 * should be empty
+	 */
+}
+
+
+/**
+ * next_transfer - Move to the Next transfer in the current spi message
+ * @pl022: SSP driver private data structure
+ *
+ * This function moves though the linked list of spi transfers in the
+ * current spi message and returns with the state of current spi
+ * message i.e whether its last transfer is done(STATE_DONE) or
+ * Next transfer is ready(STATE_RUNNING)
+ */
+static void *next_transfer(struct pl022 *pl022)
+{
+	struct spi_message *msg = pl022->cur_msg;
+	struct spi_transfer *trans = pl022->cur_transfer;
+
+	/* Move to next transfer */
+	if (trans->transfer_list.next != &msg->transfers) {
+		pl022->cur_transfer =
+		    list_entry(trans->transfer_list.next,
+			       struct spi_transfer, transfer_list);
+		return STATE_RUNNING;
+	}
+	return STATE_DONE;
+}
+/**
+ * pl022_interrupt_handler - Interrupt handler for SSP controller
+ *
+ * This function handles interrupts generated for an interrupt based transfer.
+ * If a receive overrun (ROR) interrupt is there then we disable SSP, flag the
+ * current message's state as STATE_ERROR and schedule the tasklet
+ * pump_transfers which will do the postprocessing of the current message by
+ * calling giveback(). Otherwise it reads data from RX FIFO till there is no
+ * more data, and writes data in TX FIFO till it is not full. If we complete
+ * the transfer we move to the next transfer and schedule the tasklet.
+ */
+static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
+{
+	struct pl022 *pl022 = dev_id;
+	struct spi_message *msg = pl022->cur_msg;
+	u16 irq_status = 0;
+	u16 flag = 0;
+
+	if (unlikely(!msg)) {
+		dev_err(&pl022->adev->dev,
+			"bad message state in interrupt handler");
+		/* Never fail */
+		return IRQ_HANDLED;
+	}
+
+	/* Read the Interrupt Status Register */
+	irq_status = readw(SSP_MIS(pl022->virtbase));
+
+	if (unlikely(!irq_status))
+		return IRQ_NONE;
+
+	/* This handles the error code interrupts */
+	if (unlikely(irq_status & SSP_MIS_MASK_RORMIS)) {
+		/*
+		 * Overrun interrupt - bail out since our Data has been
+		 * corrupted
+		 */
+		dev_err(&pl022->adev->dev,
+			"FIFO overrun\n");
+		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF)
+			dev_err(&pl022->adev->dev,
+				"RXFIFO is full\n");
+		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
+			dev_err(&pl022->adev->dev,
+				"TXFIFO is full\n");
+
+		/*
+		 * Disable and clear interrupts, disable SSP,
+		 * mark message with bad status so it can be
+		 * retried.
+		 */
+		writew(DISABLE_ALL_INTERRUPTS,
+		       SSP_IMSC(pl022->virtbase));
+		writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+		writew((readw(SSP_CR1(pl022->virtbase)) &
+			(~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase));
+		msg->state = STATE_ERROR;
+
+		/* Schedule message queue handler */
+		tasklet_schedule(&pl022->pump_transfers);
+		return IRQ_HANDLED;
+	}
+
+	readwriter(pl022);
+
+	if ((pl022->tx == pl022->tx_end) && (flag == 0)) {
+		flag = 1;
+		/* Disable Transmit interrupt */
+		writew(readw(SSP_IMSC(pl022->virtbase)) &
+		       (~SSP_IMSC_MASK_TXIM),
+		       SSP_IMSC(pl022->virtbase));
+	}
+
+	/*
+	 * Since all transactions must write as much as shall be read,
+	 * we can conclude the entire transaction once RX is complete.
+	 * At this point, all TX will always be finished.
+	 */
+	if (pl022->rx >= pl022->rx_end) {
+		writew(DISABLE_ALL_INTERRUPTS,
+		       SSP_IMSC(pl022->virtbase));
+		writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
+		if (unlikely(pl022->rx > pl022->rx_end)) {
+			dev_warn(&pl022->adev->dev, "read %u surplus "
+				 "bytes (did you request an odd "
+				 "number of bytes on a 16bit bus?)\n",
+				 (u32) (pl022->rx - pl022->rx_end));
+		}
+		/* Update total bytes transfered */
+		msg->actual_length += pl022->cur_transfer->len;
+		if (pl022->cur_transfer->cs_change)
+			pl022->cur_chip->
+				cs_control(SSP_CHIP_DESELECT);
+		/* Move to next transfer */
+		msg->state = next_transfer(pl022);
+		tasklet_schedule(&pl022->pump_transfers);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * This sets up the pointers to memory for the next message to
+ * send out on the SPI bus.
+ */
+static int set_up_next_transfer(struct pl022 *pl022,
+				struct spi_transfer *transfer)
+{
+	int residue;
+
+	/* Sanity check the message for this bus width */
+	residue = pl022->cur_transfer->len % pl022->cur_chip->n_bytes;
+	if (unlikely(residue != 0)) {
+		dev_err(&pl022->adev->dev,
+			"message of %u bytes to transmit but the current "
+			"chip bus has a data width of %u bytes!\n",
+			pl022->cur_transfer->len,
+			pl022->cur_chip->n_bytes);
+		dev_err(&pl022->adev->dev, "skipping this message\n");
+		return -EIO;
+	}
+	pl022->tx = (void *)transfer->tx_buf;
+	pl022->tx_end = pl022->tx + pl022->cur_transfer->len;
+	pl022->rx = (void *)transfer->rx_buf;
+	pl022->rx_end = pl022->rx + pl022->cur_transfer->len;
+	pl022->write =
+	    pl022->tx ? pl022->cur_chip->write : WRITING_NULL;
+	pl022->read = pl022->rx ? pl022->cur_chip->read : READING_NULL;
+	return 0;
+}
+
+/**
+ * pump_transfers - Tasklet function which schedules next interrupt transfer
+ * when running in interrupt transfer mode.
+ * @data: SSP driver private data structure
+ *
+ */
+static void pump_transfers(unsigned long data)
+{
+	struct pl022 *pl022 = (struct pl022 *) data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+
+	/* Get current state information */
+	message = pl022->cur_msg;
+	transfer = pl022->cur_transfer;
+
+	/* Handle for abort */
+	if (message->state == STATE_ERROR) {
+		message->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+
+	/* Handle end of message */
+	if (message->state == STATE_DONE) {
+		message->status = 0;
+		giveback(pl022);
+		return;
+	}
+
+	/* Delay if requested at end of transfer before CS change */
+	if (message->state == STATE_RUNNING) {
+		previous = list_entry(transfer->transfer_list.prev,
+					struct spi_transfer,
+					transfer_list);
+		if (previous->delay_usecs)
+			/*
+			 * FIXME: This runs in interrupt context.
+			 * Is this really smart?
+			 */
+			udelay(previous->delay_usecs);
+
+		/* Drop chip select only if cs_change is requested */
+		if (previous->cs_change)
+			pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+	} else {
+		/* STATE_START */
+		message->state = STATE_RUNNING;
+	}
+
+	if (set_up_next_transfer(pl022, transfer)) {
+		message->state = STATE_ERROR;
+		message->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+	/* Flush the FIFOs and let's go! */
+	flush(pl022);
+	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+}
+
+/**
+ * NOT IMPLEMENTED
+ * configure_dma - It configures the DMA pipes for DMA transfers
+ * @data: SSP driver's private data structure
+ *
+ */
+static int configure_dma(void *data)
+{
+	struct pl022 *pl022 = data;
+	dev_dbg(&pl022->adev->dev, "configure DMA\n");
+	return -ENOTSUPP;
+}
+
+/**
+ * do_dma_transfer - It handles transfers of the current message
+ * if it is DMA xfer.
+ * NOT FULLY IMPLEMENTED
+ * @data: SSP driver's private data structure
+ */
+static void do_dma_transfer(void *data)
+{
+	struct pl022 *pl022 = data;
+
+	if (configure_dma(data)) {
+		dev_dbg(&pl022->adev->dev, "configuration of DMA Failed!\n");
+		goto err_config_dma;
+	}
+
+	/* TODO: Implememt DMA setup of pipes here */
+
+	/* Enable target chip, set up transfer */
+	pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+	if (set_up_next_transfer(pl022, pl022->cur_transfer)) {
+		/* Error path */
+		pl022->cur_msg->state = STATE_ERROR;
+		pl022->cur_msg->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+	/* Enable SSP */
+	writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
+	       SSP_CR1(pl022->virtbase));
+
+	/* TODO: Enable the DMA transfer here */
+	return;
+
+ err_config_dma:
+	pl022->cur_msg->state = STATE_ERROR;
+	pl022->cur_msg->status = -EIO;
+	giveback(pl022);
+	return;
+}
+
+static void do_interrupt_transfer(void *data)
+{
+	struct pl022 *pl022 = data;
+
+	/* Enable target chip */
+	pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+	if (set_up_next_transfer(pl022, pl022->cur_transfer)) {
+		/* Error path */
+		pl022->cur_msg->state = STATE_ERROR;
+		pl022->cur_msg->status = -EIO;
+		giveback(pl022);
+		return;
+	}
+	/* Enable SSP, turn on interrupts */
+	writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
+	       SSP_CR1(pl022->virtbase));
+	writew(ENABLE_ALL_INTERRUPTS, SSP_IMSC(pl022->virtbase));
+}
+
+static void do_polling_transfer(void *data)
+{
+	struct pl022 *pl022 = data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+	struct chip_data *chip;
+
+	chip = pl022->cur_chip;
+	message = pl022->cur_msg;
+
+	while (message->state != STATE_DONE) {
+		/* Handle for abort */
+		if (message->state == STATE_ERROR)
+			break;
+		transfer = pl022->cur_transfer;
+
+		/* Delay if requested at end of transfer */
+		if (message->state == STATE_RUNNING) {
+			previous =
+			    list_entry(transfer->transfer_list.prev,
+				       struct spi_transfer, transfer_list);
+			if (previous->delay_usecs)
+				udelay(previous->delay_usecs);
+			if (previous->cs_change)
+				pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+		} else {
+			/* STATE_START */
+			message->state = STATE_RUNNING;
+			pl022->cur_chip->cs_control(SSP_CHIP_SELECT);
+		}
+
+		/* Configuration Changing Per Transfer */
+		if (set_up_next_transfer(pl022, transfer)) {
+			/* Error path */
+			message->state = STATE_ERROR;
+			break;
+		}
+		/* Flush FIFOs and enable SSP */
+		flush(pl022);
+		writew((readw(SSP_CR1(pl022->virtbase)) | SSP_CR1_MASK_SSE),
+		       SSP_CR1(pl022->virtbase));
+
+		dev_dbg(&pl022->adev->dev, "POLLING TRANSFER ONGOING ... \n");
+		/* FIXME: insert a timeout so we don't hang here indefinately */
+		while (pl022->tx < pl022->tx_end || pl022->rx < pl022->rx_end)
+			readwriter(pl022);
+
+		/* Update total byte transfered */
+		message->actual_length += pl022->cur_transfer->len;
+		if (pl022->cur_transfer->cs_change)
+			pl022->cur_chip->cs_control(SSP_CHIP_DESELECT);
+		/* Move to next transfer */
+		message->state = next_transfer(pl022);
+	}
+
+	/* Handle end of message */
+	if (message->state == STATE_DONE)
+		message->status = 0;
+	else
+		message->status = -EIO;
+
+	giveback(pl022);
+	return;
+}
+
+/**
+ * pump_messages - Workqueue function which processes spi message queue
+ * @data: pointer to private data of SSP driver
+ *
+ * This function checks if there is any spi message in the queue that
+ * needs processing and delegate control to appropriate function
+ * do_polling_transfer()/do_interrupt_transfer()/do_dma_transfer()
+ * based on the kind of the transfer
+ *
+ */
+static void pump_messages(struct work_struct *work)
+{
+	struct pl022 *pl022 =
+		container_of(work, struct pl022, pump_messages);
+	unsigned long flags;
+
+	/* Lock queue and check for queue work */
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+	if (list_empty(&pl022->queue) || pl022->run == QUEUE_STOPPED) {
+		pl022->busy = 0;
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return;
+	}
+	/* Make sure we are not already running a message */
+	if (pl022->cur_msg) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return;
+	}
+	/* Extract head of queue */
+	pl022->cur_msg =
+	    list_entry(pl022->queue.next, struct spi_message, queue);
+
+	list_del_init(&pl022->cur_msg->queue);
+	pl022->busy = 1;
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	/* Initial message state */
+	pl022->cur_msg->state = STATE_START;
+	pl022->cur_transfer = list_entry(pl022->cur_msg->transfers.next,
+					    struct spi_transfer,
+					    transfer_list);
+
+	/* Setup the SPI using the per chip configuration */
+	pl022->cur_chip = spi_get_ctldata(pl022->cur_msg->spi);
+	/*
+	 * We enable the clock here, then the clock will be disabled when
+	 * giveback() is called in each method (poll/interrupt/DMA)
+	 */
+	clk_enable(pl022->clk);
+	restore_state(pl022);
+	flush(pl022);
+
+	if (pl022->cur_chip->xfer_type == POLLING_TRANSFER)
+		do_polling_transfer(pl022);
+	else if (pl022->cur_chip->xfer_type == INTERRUPT_TRANSFER)
+		do_interrupt_transfer(pl022);
+	else
+		do_dma_transfer(pl022);
+}
+
+
+static int __init init_queue(struct pl022 *pl022)
+{
+	INIT_LIST_HEAD(&pl022->queue);
+	spin_lock_init(&pl022->queue_lock);
+
+	pl022->run = QUEUE_STOPPED;
+	pl022->busy = 0;
+
+	tasklet_init(&pl022->pump_transfers,
+			pump_transfers,	(unsigned long)pl022);
+
+	INIT_WORK(&pl022->pump_messages, pump_messages);
+	pl022->workqueue = create_singlethread_workqueue(
+					dev_name(pl022->master->dev.parent));
+	if (pl022->workqueue == NULL)
+		return -EBUSY;
+
+	return 0;
+}
+
+
+static int start_queue(struct pl022 *pl022)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+
+	if (pl022->run == QUEUE_RUNNING || pl022->busy) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return -EBUSY;
+	}
+
+	pl022->run = QUEUE_RUNNING;
+	pl022->cur_msg = NULL;
+	pl022->cur_transfer = NULL;
+	pl022->cur_chip = NULL;
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	queue_work(pl022->workqueue, &pl022->pump_messages);
+
+	return 0;
+}
+
+
+static int stop_queue(struct pl022 *pl022)
+{
+	unsigned long flags;
+	unsigned limit = 500;
+	int status = 0;
+
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+
+	/* This is a bit lame, but is optimized for the common execution path.
+	 * A wait_queue on the pl022->busy could be used, but then the common
+	 * execution path (pump_messages) would be required to call wake_up or
+	 * friends on every SPI message. Do this instead */
+	pl022->run = QUEUE_STOPPED;
+	while (!list_empty(&pl022->queue) && pl022->busy && limit--) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		msleep(10);
+		spin_lock_irqsave(&pl022->queue_lock, flags);
+	}
+
+	if (!list_empty(&pl022->queue) || pl022->busy)
+		status = -EBUSY;
+
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+
+	return status;
+}
+
+static int destroy_queue(struct pl022 *pl022)
+{
+	int status;
+
+	status = stop_queue(pl022);
+	/* we are unloading the module or failing to load (only two calls
+	 * to this routine), and neither call can handle a return value.
+	 * However, destroy_workqueue calls flush_workqueue, and that will
+	 * block until all work is done.  If the reason that stop_queue
+	 * timed out is that the work will never finish, then it does no
+	 * good to call destroy_workqueue, so return anyway. */
+	if (status != 0)
+		return status;
+
+	destroy_workqueue(pl022->workqueue);
+
+	return 0;
+}
+
+static int verify_controller_parameters(struct pl022 *pl022,
+					struct pl022_config_chip *chip_info)
+{
+	if ((chip_info->lbm != LOOPBACK_ENABLED)
+	    && (chip_info->lbm != LOOPBACK_DISABLED)) {
+		dev_err(chip_info->dev,
+			"loopback Mode is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->iface < SSP_INTERFACE_MOTOROLA_SPI)
+	    || (chip_info->iface > SSP_INTERFACE_UNIDIRECTIONAL)) {
+		dev_err(chip_info->dev,
+			"interface is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->iface == SSP_INTERFACE_UNIDIRECTIONAL) &&
+	    (!pl022->vendor->unidir)) {
+		dev_err(chip_info->dev,
+			"unidirectional mode not supported in this "
+			"hardware version\n");
+		return -EINVAL;
+	}
+	if ((chip_info->hierarchy != SSP_MASTER)
+	    && (chip_info->hierarchy != SSP_SLAVE)) {
+		dev_err(chip_info->dev,
+			"hierarchy is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if (((chip_info->clk_freq).cpsdvsr < CPSDVR_MIN)
+	    || ((chip_info->clk_freq).cpsdvsr > CPSDVR_MAX)) {
+		dev_err(chip_info->dev,
+			"cpsdvsr is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->endian_rx != SSP_RX_MSB)
+	    && (chip_info->endian_rx != SSP_RX_LSB)) {
+		dev_err(chip_info->dev,
+			"RX FIFO endianess is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->endian_tx != SSP_TX_MSB)
+	    && (chip_info->endian_tx != SSP_TX_LSB)) {
+		dev_err(chip_info->dev,
+			"TX FIFO endianess is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->data_size < SSP_DATA_BITS_4)
+	    || (chip_info->data_size > SSP_DATA_BITS_32)) {
+		dev_err(chip_info->dev,
+			"DATA Size is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->com_mode != INTERRUPT_TRANSFER)
+	    && (chip_info->com_mode != DMA_TRANSFER)
+	    && (chip_info->com_mode != POLLING_TRANSFER)) {
+		dev_err(chip_info->dev,
+			"Communication mode is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->rx_lev_trig < SSP_RX_1_OR_MORE_ELEM)
+	    || (chip_info->rx_lev_trig > SSP_RX_32_OR_MORE_ELEM)) {
+		dev_err(chip_info->dev,
+			"RX FIFO Trigger Level is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if ((chip_info->tx_lev_trig < SSP_TX_1_OR_MORE_EMPTY_LOC)
+	    || (chip_info->tx_lev_trig > SSP_TX_32_OR_MORE_EMPTY_LOC)) {
+		dev_err(chip_info->dev,
+			"TX FIFO Trigger Level is configured incorrectly\n");
+		return -EINVAL;
+	}
+	if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
+		if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE)
+		    && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) {
+			dev_err(chip_info->dev,
+				"Clock Phase is configured incorrectly\n");
+			return -EINVAL;
+		}
+		if ((chip_info->clk_pol != SSP_CLK_POL_IDLE_LOW)
+		    && (chip_info->clk_pol != SSP_CLK_POL_IDLE_HIGH)) {
+			dev_err(chip_info->dev,
+				"Clock Polarity is configured incorrectly\n");
+			return -EINVAL;
+		}
+	}
+	if (chip_info->iface == SSP_INTERFACE_NATIONAL_MICROWIRE) {
+		if ((chip_info->ctrl_len < SSP_BITS_4)
+		    || (chip_info->ctrl_len > SSP_BITS_32)) {
+			dev_err(chip_info->dev,
+				"CTRL LEN is configured incorrectly\n");
+			return -EINVAL;
+		}
+		if ((chip_info->wait_state != SSP_MWIRE_WAIT_ZERO)
+		    && (chip_info->wait_state != SSP_MWIRE_WAIT_ONE)) {
+			dev_err(chip_info->dev,
+				"Wait State is configured incorrectly\n");
+			return -EINVAL;
+		}
+		if ((chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
+		    && (chip_info->duplex !=
+			SSP_MICROWIRE_CHANNEL_HALF_DUPLEX)) {
+			dev_err(chip_info->dev,
+				"DUPLEX is configured incorrectly\n");
+			return -EINVAL;
+		}
+	}
+	if (chip_info->cs_control == NULL) {
+		dev_warn(chip_info->dev,
+			"Chip Select Function is NULL for this chip\n");
+		chip_info->cs_control = null_cs_control;
+	}
+	return 0;
+}
+
+/**
+ * pl022_transfer - transfer function registered to SPI master framework
+ * @spi: spi device which is requesting transfer
+ * @msg: spi message which is to handled is queued to driver queue
+ *
+ * This function is registered to the SPI framework for this SPI master
+ * controller. It will queue the spi_message in the queue of driver if
+ * the queue is not stopped and return.
+ */
+static int pl022_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct pl022 *pl022 = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pl022->queue_lock, flags);
+
+	if (pl022->run == QUEUE_STOPPED) {
+		spin_unlock_irqrestore(&pl022->queue_lock, flags);
+		return -ESHUTDOWN;
+	}
+	msg->actual_length = 0;
+	msg->status = -EINPROGRESS;
+	msg->state = STATE_START;
+
+	list_add_tail(&msg->queue, &pl022->queue);
+	if (pl022->run == QUEUE_RUNNING && !pl022->busy)
+		queue_work(pl022->workqueue, &pl022->pump_messages);
+
+	spin_unlock_irqrestore(&pl022->queue_lock, flags);
+	return 0;
+}
+
+static int calculate_effective_freq(struct pl022 *pl022,
+				    int freq,
+				    struct ssp_clock_params *clk_freq)
+{
+	/* Lets calculate the frequency parameters */
+	u16 cpsdvsr = 2;
+	u16 scr = 0;
+	bool freq_found = false;
+	u32 rate;
+	u32 max_tclk;
+	u32 min_tclk;
+
+	rate = clk_get_rate(pl022->clk);
+	/* cpsdvscr = 2 & scr 0 */
+	max_tclk = (rate / (CPSDVR_MIN * (1 + SCR_MIN)));
+	/* cpsdvsr = 254 & scr = 255 */
+	min_tclk = (rate / (CPSDVR_MAX * (1 + SCR_MAX)));
+
+	if ((freq <= max_tclk) && (freq >= min_tclk)) {
+		while (cpsdvsr <= CPSDVR_MAX && !freq_found) {
+			while (scr <= SCR_MAX && !freq_found) {
+				if ((rate /
+				     (cpsdvsr * (1 + scr))) > freq)
+					scr += 1;
+				else {
+					/*
+					 * This bool is made true when
+					 * effective frequency >=
+					 * target frequency is found
+					 */
+					freq_found = true;
+					if ((rate /
+					     (cpsdvsr * (1 + scr))) != freq) {
+						if (scr == SCR_MIN) {
+							cpsdvsr -= 2;
+							scr = SCR_MAX;
+						} else
+							scr -= 1;
+					}
+				}
+			}
+			if (!freq_found) {
+				cpsdvsr += 2;
+				scr = SCR_MIN;
+			}
+		}
+		if (cpsdvsr != 0) {
+			dev_dbg(&pl022->adev->dev,
+				"SSP Effective Frequency is %u\n",
+				(rate / (cpsdvsr * (1 + scr))));
+			clk_freq->cpsdvsr = (u8) (cpsdvsr & 0xFF);
+			clk_freq->scr = (u8) (scr & 0xFF);
+			dev_dbg(&pl022->adev->dev,
+				"SSP cpsdvsr = %d, scr = %d\n",
+				clk_freq->cpsdvsr, clk_freq->scr);
+		}
+	} else {
+		dev_err(&pl022->adev->dev,
+			"controller data is incorrect: out of range frequency");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * NOT IMPLEMENTED
+ * process_dma_info - Processes the DMA info provided by client drivers
+ * @chip_info: chip info provided by client device
+ * @chip: Runtime state maintained by the SSP controller for each spi device
+ *
+ * This function processes and stores DMA config provided by client driver
+ * into the runtime state maintained by the SSP controller driver
+ */
+static int process_dma_info(struct pl022_config_chip *chip_info,
+			    struct chip_data *chip)
+{
+	dev_err(chip_info->dev,
+		"cannot process DMA info, DMA not implemented!\n");
+	return -ENOTSUPP;
+}
+
+/**
+ * pl022_setup - setup function registered to SPI master framework
+ * @spi: spi device which is requesting setup
+ *
+ * This function is registered to the SPI framework for this SPI master
+ * controller. If it is the first time when setup is called by this device,
+ * this function will initialize the runtime state for this chip and save
+ * the same in the device structure. Else it will update the runtime info
+ * with the updated chip info. Nothing is really being written to the
+ * controller hardware here, that is not done until the actual transfer
+ * commence.
+ */
+
+/* FIXME: JUST GUESSING the spi->mode bits understood by this driver */
+#define MODEBITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
+			| SPI_LSB_FIRST | SPI_LOOP)
+
+static int pl022_setup(struct spi_device *spi)
+{
+	struct pl022_config_chip *chip_info;
+	struct chip_data *chip;
+	int status = 0;
+	struct pl022 *pl022 = spi_master_get_devdata(spi->master);
+
+	if (spi->mode & ~MODEBITS) {
+		dev_dbg(&spi->dev, "unsupported mode bits %x\n",
+			spi->mode & ~MODEBITS);
+		return -EINVAL;
+	}
+
+	if (!spi->max_speed_hz)
+		return -EINVAL;
+
+	/* Get controller_state if one is supplied */
+	chip = spi_get_ctldata(spi);
+
+	if (chip == NULL) {
+		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
+		if (!chip) {
+			dev_err(&spi->dev,
+				"cannot allocate controller state\n");
+			return -ENOMEM;
+		}
+		dev_dbg(&spi->dev,
+			"allocated memory for controller's runtime state\n");
+	}
+
+	/* Get controller data if one is supplied */
+	chip_info = spi->controller_data;
+
+	if (chip_info == NULL) {
+		/* spi_board_info.controller_data not is supplied */
+		dev_dbg(&spi->dev,
+			"using default controller_data settings\n");
+
+		chip_info =
+			kzalloc(sizeof(struct pl022_config_chip), GFP_KERNEL);
+
+		if (!chip_info) {
+			dev_err(&spi->dev,
+				"cannot allocate controller data\n");
+			status = -ENOMEM;
+			goto err_first_setup;
+		}
+
+		dev_dbg(&spi->dev, "allocated memory for controller data\n");
+
+		/* Pointer back to the SPI device */
+		chip_info->dev = &spi->dev;
+		/*
+		 * Set controller data default values:
+		 * Polling is supported by default
+		 */
+		chip_info->lbm = LOOPBACK_DISABLED;
+		chip_info->com_mode = POLLING_TRANSFER;
+		chip_info->iface = SSP_INTERFACE_MOTOROLA_SPI;
+		chip_info->hierarchy = SSP_SLAVE;
+		chip_info->slave_tx_disable = DO_NOT_DRIVE_TX;
+		chip_info->endian_tx = SSP_TX_LSB;
+		chip_info->endian_rx = SSP_RX_LSB;
+		chip_info->data_size = SSP_DATA_BITS_12;
+		chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
+		chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
+		chip_info->clk_phase = SSP_CLK_FALLING_EDGE;
+		chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
+		chip_info->ctrl_len = SSP_BITS_8;
+		chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
+		chip_info->duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX;
+		chip_info->cs_control = null_cs_control;
+	} else {
+		dev_dbg(&spi->dev,
+			"using user supplied controller_data settings\n");
+	}
+
+	/*
+	 * We can override with custom divisors, else we use the board
+	 * frequency setting
+	 */
+	if ((0 == chip_info->clk_freq.cpsdvsr)
+	    && (0 == chip_info->clk_freq.scr)) {
+		status = calculate_effective_freq(pl022,
+						  spi->max_speed_hz,
+						  &chip_info->clk_freq);
+		if (status < 0)
+			goto err_config_params;
+	} else {
+		if ((chip_info->clk_freq.cpsdvsr % 2) != 0)
+			chip_info->clk_freq.cpsdvsr =
+				chip_info->clk_freq.cpsdvsr - 1;
+	}
+	status = verify_controller_parameters(pl022, chip_info);
+	if (status) {
+		dev_err(&spi->dev, "controller data is incorrect");
+		goto err_config_params;
+	}
+	/* Now set controller state based on controller data */
+	chip->xfer_type = chip_info->com_mode;
+	chip->cs_control = chip_info->cs_control;
+
+	if (chip_info->data_size <= 8) {
+		dev_dbg(&spi->dev, "1 <= n <=8 bits per word\n");
+		chip->n_bytes = 1;
+		chip->read = READING_U8;
+		chip->write = WRITING_U8;
+	} else if (chip_info->data_size <= 16) {
+		dev_dbg(&spi->dev, "9 <= n <= 16 bits per word\n");
+		chip->n_bytes = 2;
+		chip->read = READING_U16;
+		chip->write = WRITING_U16;
+	} else {
+		if (pl022->vendor->max_bpw >= 32) {
+			dev_dbg(&spi->dev, "17 <= n <= 32 bits per word\n");
+			chip->n_bytes = 4;
+			chip->read = READING_U32;
+			chip->write = WRITING_U32;
+		} else {
+			dev_err(&spi->dev,
+				"illegal data size for this controller!\n");
+			dev_err(&spi->dev,
+				"a standard pl022 can only handle "
+				"1 <= n <= 16 bit words\n");
+			goto err_config_params;
+		}
+	}
+
+	/* Now Initialize all register settings required for this chip */
+	chip->cr0 = 0;
+	chip->cr1 = 0;
+	chip->dmacr = 0;
+	chip->cpsr = 0;
+	if ((chip_info->com_mode == DMA_TRANSFER)
+	    && ((pl022->master_info)->enable_dma)) {
+		chip->enable_dma = 1;
+		dev_dbg(&spi->dev, "DMA mode set in controller state\n");
+		status = process_dma_info(chip_info, chip);
+		if (status < 0)
+			goto err_config_params;
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
+			       SSP_DMACR_MASK_RXDMAE, 0);
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
+			       SSP_DMACR_MASK_TXDMAE, 1);
+	} else {
+		chip->enable_dma = 0;
+		dev_dbg(&spi->dev, "DMA mode NOT set in controller state\n");
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED,
+			       SSP_DMACR_MASK_RXDMAE, 0);
+		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_DISABLED,
+			       SSP_DMACR_MASK_TXDMAE, 1);
+	}
+
+	chip->cpsr = chip_info->clk_freq.cpsdvsr;
+
+	SSP_WRITE_BITS(chip->cr0, chip_info->data_size, SSP_CR0_MASK_DSS, 0);
+	SSP_WRITE_BITS(chip->cr0, chip_info->duplex, SSP_CR0_MASK_HALFDUP, 5);
+	SSP_WRITE_BITS(chip->cr0, chip_info->clk_pol, SSP_CR0_MASK_SPO, 6);
+	SSP_WRITE_BITS(chip->cr0, chip_info->clk_phase, SSP_CR0_MASK_SPH, 7);
+	SSP_WRITE_BITS(chip->cr0, chip_info->clk_freq.scr, SSP_CR0_MASK_SCR, 8);
+	SSP_WRITE_BITS(chip->cr0, chip_info->ctrl_len, SSP_CR0_MASK_CSS, 16);
+	SSP_WRITE_BITS(chip->cr0, chip_info->iface, SSP_CR0_MASK_FRF, 21);
+	SSP_WRITE_BITS(chip->cr1, chip_info->lbm, SSP_CR1_MASK_LBM, 0);
+	SSP_WRITE_BITS(chip->cr1, SSP_DISABLED, SSP_CR1_MASK_SSE, 1);
+	SSP_WRITE_BITS(chip->cr1, chip_info->hierarchy, SSP_CR1_MASK_MS, 2);
+	SSP_WRITE_BITS(chip->cr1, chip_info->slave_tx_disable, SSP_CR1_MASK_SOD, 3);
+	SSP_WRITE_BITS(chip->cr1, chip_info->endian_rx, SSP_CR1_MASK_RENDN, 4);
+	SSP_WRITE_BITS(chip->cr1, chip_info->endian_tx, SSP_CR1_MASK_TENDN, 5);
+	SSP_WRITE_BITS(chip->cr1, chip_info->wait_state, SSP_CR1_MASK_MWAIT, 6);
+	SSP_WRITE_BITS(chip->cr1, chip_info->rx_lev_trig, SSP_CR1_MASK_RXIFLSEL, 7);
+	SSP_WRITE_BITS(chip->cr1, chip_info->tx_lev_trig, SSP_CR1_MASK_TXIFLSEL, 10);
+
+	/* Save controller_state */
+	spi_set_ctldata(spi, chip);
+	return status;
+ err_config_params:
+ err_first_setup:
+	kfree(chip);
+	return status;
+}
+
+/**
+ * pl022_cleanup - cleanup function registered to SPI master framework
+ * @spi: spi device which is requesting cleanup
+ *
+ * This function is registered to the SPI framework for this SPI master
+ * controller. It will free the runtime state of chip.
+ */
+static void pl022_cleanup(struct spi_device *spi)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+
+	spi_set_ctldata(spi, NULL);
+	kfree(chip);
+}
+
+
+static int __init
+pl022_probe(struct amba_device *adev, struct amba_id *id)
+{
+	struct device *dev = &adev->dev;
+	struct pl022_ssp_controller *platform_info = adev->dev.platform_data;
+	struct spi_master *master;
+	struct pl022 *pl022 = NULL;	/*Data for this driver */
+	int status = 0;
+
+	dev_info(&adev->dev,
+		 "ARM PL022 driver, device ID: 0x%08x\n", adev->periphid);
+	if (platform_info == NULL) {
+		dev_err(&adev->dev, "probe - no platform data supplied\n");
+		status = -ENODEV;
+		goto err_no_pdata;
+	}
+
+	/* Allocate master with space for data */
+	master = spi_alloc_master(dev, sizeof(struct pl022));
+	if (master == NULL) {
+		dev_err(&adev->dev, "probe - cannot alloc SPI master\n");
+		status = -ENOMEM;
+		goto err_no_master;
+	}
+
+	pl022 = spi_master_get_devdata(master);
+	pl022->master = master;
+	pl022->master_info = platform_info;
+	pl022->adev = adev;
+	pl022->vendor = id->data;
+
+	/*
+	 * Bus Number Which has been Assigned to this SSP controller
+	 * on this board
+	 */
+	master->bus_num = platform_info->bus_id;
+	master->num_chipselect = platform_info->num_chipselect;
+	master->cleanup = pl022_cleanup;
+	master->setup = pl022_setup;
+	master->transfer = pl022_transfer;
+
+	dev_dbg(&adev->dev, "BUSNO: %d\n", master->bus_num);
+
+	status = amba_request_regions(adev, NULL);
+	if (status)
+		goto err_no_ioregion;
+
+	pl022->virtbase = ioremap(adev->res.start, resource_size(&adev->res));
+	if (pl022->virtbase == NULL) {
+		status = -ENOMEM;
+		goto err_no_ioremap;
+	}
+	printk(KERN_INFO "pl022: mapped registers from 0x%08x to %p\n",
+	       adev->res.start, pl022->virtbase);
+
+	pl022->clk = clk_get(&adev->dev, NULL);
+	if (IS_ERR(pl022->clk)) {
+		status = PTR_ERR(pl022->clk);
+		dev_err(&adev->dev, "could not retrieve SSP/SPI bus clock\n");
+		goto err_no_clk;
+	}
+
+	/* Disable SSP */
+	clk_enable(pl022->clk);
+	writew((readw(SSP_CR1(pl022->virtbase)) & (~SSP_CR1_MASK_SSE)),
+	       SSP_CR1(pl022->virtbase));
+	load_ssp_default_config(pl022);
+	clk_disable(pl022->clk);
+
+	status = request_irq(adev->irq[0], pl022_interrupt_handler, 0, "pl022",
+			     pl022);
+	if (status < 0) {
+		dev_err(&adev->dev, "probe - cannot get IRQ (%d)\n", status);
+		goto err_no_irq;
+	}
+	/* Initialize and start queue */
+	status = init_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev, "probe - problem initializing queue\n");
+		goto err_init_queue;
+	}
+	status = start_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev, "probe - problem starting queue\n");
+		goto err_start_queue;
+	}
+	/* Register with the SPI framework */
+	amba_set_drvdata(adev, pl022);
+	status = spi_register_master(master);
+	if (status != 0) {
+		dev_err(&adev->dev,
+			"probe - problem registering spi master\n");
+		goto err_spi_register;
+	}
+	dev_dbg(dev, "probe succeded\n");
+	return 0;
+
+ err_spi_register:
+ err_start_queue:
+ err_init_queue:
+	destroy_queue(pl022);
+	free_irq(adev->irq[0], pl022);
+ err_no_irq:
+	clk_put(pl022->clk);
+ err_no_clk:
+	iounmap(pl022->virtbase);
+ err_no_ioremap:
+	amba_release_regions(adev);
+ err_no_ioregion:
+	spi_master_put(master);
+ err_no_master:
+ err_no_pdata:
+	return status;
+}
+
+static int __exit
+pl022_remove(struct amba_device *adev)
+{
+	struct pl022 *pl022 = amba_get_drvdata(adev);
+	int status = 0;
+	if (!pl022)
+		return 0;
+
+	/* Remove the queue */
+	status = destroy_queue(pl022);
+	if (status != 0) {
+		dev_err(&adev->dev,
+			"queue remove failed (%d)\n", status);
+		return status;
+	}
+	load_ssp_default_config(pl022);
+	free_irq(adev->irq[0], pl022);
+	clk_disable(pl022->clk);
+	clk_put(pl022->clk);
+	iounmap(pl022->virtbase);
+	amba_release_regions(adev);
+	tasklet_disable(&pl022->pump_transfers);
+	spi_unregister_master(pl022->master);
+	spi_master_put(pl022->master);
+	amba_set_drvdata(adev, NULL);
+	dev_dbg(&adev->dev, "remove succeded\n");
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int pl022_suspend(struct amba_device *adev, pm_message_t state)
+{
+	struct pl022 *pl022 = amba_get_drvdata(adev);
+	int status = 0;
+
+	status = stop_queue(pl022);
+	if (status) {
+		dev_warn(&adev->dev, "suspend cannot stop queue\n");
+		return status;
+	}
+
+	clk_enable(pl022->clk);
+	load_ssp_default_config(pl022);
+	clk_disable(pl022->clk);
+	dev_dbg(&adev->dev, "suspended\n");
+	return 0;
+}
+
+static int pl022_resume(struct amba_device *adev)
+{
+	struct pl022 *pl022 = amba_get_drvdata(adev);
+	int status = 0;
+
+	/* Start the queue running */
+	status = start_queue(pl022);
+	if (status)
+		dev_err(&adev->dev, "problem starting queue (%d)\n", status);
+	else
+		dev_dbg(&adev->dev, "resumed\n");
+
+	return status;
+}
+#else
+#define pl022_suspend NULL
+#define pl022_resume NULL
+#endif	/* CONFIG_PM */
+
+static struct vendor_data vendor_arm = {
+	.fifodepth = 8,
+	.max_bpw = 16,
+	.unidir = false,
+};
+
+
+static struct vendor_data vendor_st = {
+	.fifodepth = 32,
+	.max_bpw = 32,
+	.unidir = false,
+};
+
+static struct amba_id pl022_ids[] = {
+	{
+		/*
+		 * ARM PL022 variant, this has a 16bit wide
+		 * and 8 locations deep TX/RX FIFO
+		 */
+		.id	= 0x00041022,
+		.mask	= 0x000fffff,
+		.data	= &vendor_arm,
+	},
+	{
+		/*
+		 * ST Micro derivative, this has 32bit wide
+		 * and 32 locations deep TX/RX FIFO
+		 */
+		.id	= 0x00108022,
+		.mask	= 0xffffffff,
+		.data	= &vendor_st,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl022_driver = {
+	.drv = {
+		.name	= "ssp-pl022",
+	},
+	.id_table	= pl022_ids,
+	.probe		= pl022_probe,
+	.remove		= __exit_p(pl022_remove),
+	.suspend        = pl022_suspend,
+	.resume         = pl022_resume,
+};
+
+
+static int __init pl022_init(void)
+{
+	return amba_driver_register(&pl022_driver);
+}
+
+module_init(pl022_init);
+
+static void __exit pl022_exit(void)
+{
+	amba_driver_unregister(&pl022_driver);
+}
+
+module_exit(pl022_exit);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
+MODULE_DESCRIPTION("PL022 SSP Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
new file mode 100644
index 00000000000..dcad0ffd175
--- /dev/null
+++ b/include/linux/amba/pl022.h
@@ -0,0 +1,264 @@
+/*
+ * include/linux/amba/pl022.h
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * Copyright (C) 2006 STMicroelectronics Pvt. Ltd.
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * Initial version inspired by:
+ *	linux-2.6.17-rc3-mm1/drivers/spi/pxa2xx_spi.c
+ * Initial adoption to PL022 by:
+ *      Sachin Verma <sachin.verma@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _SSP_PL022_H
+#define _SSP_PL022_H
+
+#include <linux/device.h>
+
+/**
+ * whether SSP is in loopback mode or not
+ */
+enum ssp_loopback {
+	LOOPBACK_DISABLED,
+	LOOPBACK_ENABLED
+};
+
+/**
+ * enum ssp_interface - interfaces allowed for this SSP Controller
+ * @SSP_INTERFACE_MOTOROLA_SPI: Motorola Interface
+ * @SSP_INTERFACE_TI_SYNC_SERIAL: Texas Instrument Synchronous Serial
+ * interface
+ * @SSP_INTERFACE_NATIONAL_MICROWIRE: National Semiconductor Microwire
+ * interface
+ * @SSP_INTERFACE_UNIDIRECTIONAL: Unidirectional interface (STn8810
+ * &STn8815 only)
+ */
+enum ssp_interface {
+	SSP_INTERFACE_MOTOROLA_SPI,
+	SSP_INTERFACE_TI_SYNC_SERIAL,
+	SSP_INTERFACE_NATIONAL_MICROWIRE,
+	SSP_INTERFACE_UNIDIRECTIONAL
+};
+
+/**
+ * enum ssp_hierarchy - whether SSP is configured as Master or Slave
+ */
+enum ssp_hierarchy {
+	SSP_MASTER,
+	SSP_SLAVE
+};
+
+/**
+ * enum ssp_clock_params - clock parameters, to set SSP clock at a
+ * desired freq
+ */
+struct ssp_clock_params {
+	u8 cpsdvsr; /* value from 2 to 254 (even only!) */
+	u8 scr;	    /* value from 0 to 255 */
+};
+
+/**
+ * enum ssp_rx_endian - endianess of Rx FIFO Data
+ */
+enum ssp_rx_endian {
+	SSP_RX_MSB,
+	SSP_RX_LSB
+};
+
+/**
+ * enum ssp_tx_endian - endianess of Tx FIFO Data
+ */
+enum ssp_tx_endian {
+	SSP_TX_MSB,
+	SSP_TX_LSB
+};
+
+/**
+ * enum ssp_data_size - number of bits in one data element
+ */
+enum ssp_data_size {
+	SSP_DATA_BITS_4 = 0x03, SSP_DATA_BITS_5, SSP_DATA_BITS_6,
+	SSP_DATA_BITS_7, SSP_DATA_BITS_8, SSP_DATA_BITS_9,
+	SSP_DATA_BITS_10, SSP_DATA_BITS_11, SSP_DATA_BITS_12,
+	SSP_DATA_BITS_13, SSP_DATA_BITS_14, SSP_DATA_BITS_15,
+	SSP_DATA_BITS_16, SSP_DATA_BITS_17, SSP_DATA_BITS_18,
+	SSP_DATA_BITS_19, SSP_DATA_BITS_20, SSP_DATA_BITS_21,
+	SSP_DATA_BITS_22, SSP_DATA_BITS_23, SSP_DATA_BITS_24,
+	SSP_DATA_BITS_25, SSP_DATA_BITS_26, SSP_DATA_BITS_27,
+	SSP_DATA_BITS_28, SSP_DATA_BITS_29, SSP_DATA_BITS_30,
+	SSP_DATA_BITS_31, SSP_DATA_BITS_32
+};
+
+/**
+ * enum ssp_mode - SSP mode of operation (Communication modes)
+ */
+enum ssp_mode {
+	INTERRUPT_TRANSFER,
+	POLLING_TRANSFER,
+	DMA_TRANSFER
+};
+
+/**
+ * enum ssp_rx_level_trig - receive FIFO watermark level which triggers
+ * IT: Interrupt fires when _N_ or more elements in RX FIFO.
+ */
+enum ssp_rx_level_trig {
+	SSP_RX_1_OR_MORE_ELEM,
+	SSP_RX_4_OR_MORE_ELEM,
+	SSP_RX_8_OR_MORE_ELEM,
+	SSP_RX_16_OR_MORE_ELEM,
+	SSP_RX_32_OR_MORE_ELEM
+};
+
+/**
+ * Transmit FIFO watermark level which triggers (IT Interrupt fires
+ * when _N_ or more empty locations in TX FIFO)
+ */
+enum ssp_tx_level_trig {
+	SSP_TX_1_OR_MORE_EMPTY_LOC,
+	SSP_TX_4_OR_MORE_EMPTY_LOC,
+	SSP_TX_8_OR_MORE_EMPTY_LOC,
+	SSP_TX_16_OR_MORE_EMPTY_LOC,
+	SSP_TX_32_OR_MORE_EMPTY_LOC
+};
+
+/**
+ * enum SPI Clock Phase - clock phase (Motorola SPI interface only)
+ * @SSP_CLK_RISING_EDGE: Receive data on rising edge
+ * @SSP_CLK_FALLING_EDGE: Receive data on falling edge
+ */
+enum ssp_spi_clk_phase {
+	SSP_CLK_RISING_EDGE,
+	SSP_CLK_FALLING_EDGE
+};
+
+/**
+ * enum SPI Clock Polarity - clock polarity (Motorola SPI interface only)
+ * @SSP_CLK_POL_IDLE_LOW: Low inactive level
+ * @SSP_CLK_POL_IDLE_HIGH: High inactive level
+ */
+enum ssp_spi_clk_pol {
+	SSP_CLK_POL_IDLE_LOW,
+	SSP_CLK_POL_IDLE_HIGH
+};
+
+/**
+ * Microwire Conrol Lengths Command size in microwire format
+ */
+enum ssp_microwire_ctrl_len {
+	SSP_BITS_4 = 0x03, SSP_BITS_5, SSP_BITS_6,
+	SSP_BITS_7, SSP_BITS_8, SSP_BITS_9,
+	SSP_BITS_10, SSP_BITS_11, SSP_BITS_12,
+	SSP_BITS_13, SSP_BITS_14, SSP_BITS_15,
+	SSP_BITS_16, SSP_BITS_17, SSP_BITS_18,
+	SSP_BITS_19, SSP_BITS_20, SSP_BITS_21,
+	SSP_BITS_22, SSP_BITS_23, SSP_BITS_24,
+	SSP_BITS_25, SSP_BITS_26, SSP_BITS_27,
+	SSP_BITS_28, SSP_BITS_29, SSP_BITS_30,
+	SSP_BITS_31, SSP_BITS_32
+};
+
+/**
+ * enum Microwire Wait State
+ * @SSP_MWIRE_WAIT_ZERO: No wait state inserted after last command bit
+ * @SSP_MWIRE_WAIT_ONE: One wait state inserted after last command bit
+ */
+enum ssp_microwire_wait_state {
+	SSP_MWIRE_WAIT_ZERO,
+	SSP_MWIRE_WAIT_ONE
+};
+
+/**
+ * enum Microwire - whether Full/Half Duplex
+ * @SSP_MICROWIRE_CHANNEL_FULL_DUPLEX: SSPTXD becomes bi-directional,
+ *     SSPRXD not used
+ * @SSP_MICROWIRE_CHANNEL_HALF_DUPLEX: SSPTXD is an output, SSPRXD is
+ *     an input.
+ */
+enum ssp_duplex {
+	SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
+	SSP_MICROWIRE_CHANNEL_HALF_DUPLEX
+};
+
+/**
+ * CHIP select/deselect commands
+ */
+enum ssp_chip_select {
+	SSP_CHIP_SELECT,
+	SSP_CHIP_DESELECT
+};
+
+
+/**
+ * struct pl022_ssp_master - device.platform_data for SPI controller devices.
+ * @num_chipselect: chipselects are used to distinguish individual
+ *     SPI slaves, and are numbered from zero to num_chipselects - 1.
+ *     each slave has a chipselect signal, but it's common that not
+ *     every chipselect is connected to a slave.
+ * @enable_dma: if true enables DMA driven transfers.
+ */
+struct pl022_ssp_controller {
+	u16 bus_id;
+	u8 num_chipselect;
+	u8 enable_dma:1;
+};
+
+/**
+ * struct ssp_config_chip - spi_board_info.controller_data for SPI
+ * slave devices, copied to spi_device.controller_data.
+ *
+ * @lbm: used for test purpose to internally connect RX and TX
+ * @iface: Interface type(Motorola, TI, Microwire, Universal)
+ * @hierarchy: sets whether interface is master or slave
+ * @slave_tx_disable: SSPTXD is disconnected (in slave mode only)
+ * @clk_freq: Tune freq parameters of SSP(when in master mode)
+ * @endian_rx: Endianess of Data in Rx FIFO
+ * @endian_tx: Endianess of Data in Tx FIFO
+ * @data_size: Width of data element(4 to 32 bits)
+ * @com_mode: communication mode: polling, Interrupt or DMA
+ * @rx_lev_trig: Rx FIFO watermark level (for IT & DMA mode)
+ * @tx_lev_trig: Tx FIFO watermark level (for IT & DMA mode)
+ * @clk_phase: Motorola SPI interface Clock phase
+ * @clk_pol: Motorola SPI interface Clock polarity
+ * @ctrl_len: Microwire interface: Control length
+ * @wait_state: Microwire interface: Wait state
+ * @duplex: Microwire interface: Full/Half duplex
+ * @cs_control: function pointer to board-specific function to
+ * assert/deassert I/O port to control HW generation of devices chip-select.
+ * @dma_xfer_type: Type of DMA xfer (Mem-to-periph or Periph-to-Periph)
+ * @dma_config: DMA configuration for SSP controller and peripheral
+ */
+struct pl022_config_chip {
+	struct device *dev;
+	enum ssp_loopback lbm;
+	enum ssp_interface iface;
+	enum ssp_hierarchy hierarchy;
+	bool slave_tx_disable;
+	struct ssp_clock_params clk_freq;
+	enum ssp_rx_endian endian_rx;
+	enum ssp_tx_endian endian_tx;
+	enum ssp_data_size data_size;
+	enum ssp_mode com_mode;
+	enum ssp_rx_level_trig rx_lev_trig;
+	enum ssp_tx_level_trig tx_lev_trig;
+	enum ssp_spi_clk_phase clk_phase;
+	enum ssp_spi_clk_pol clk_pol;
+	enum ssp_microwire_ctrl_len ctrl_len;
+	enum ssp_microwire_wait_state wait_state;
+	enum ssp_duplex duplex;
+	void (*cs_control) (u32 control);
+};
+
+#endif /* _SSP_PL022_H */
-- 
cgit v1.2.3-70-g09d2


From df1a132bf3d3508f863336c80a27806a2ac947e0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 21:02:22 +0200
Subject: perf_counter: Introduce struct for sample data

For easy extension of the sample data, put it in a structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c | 10 +++++++---
 arch/x86/kernel/cpu/perf_counter.c | 15 +++++++++++----
 include/linux/perf_counter.h       | 10 ++++++++--
 kernel/perf_counter.c              | 38 ++++++++++++++++++++++----------------
 4 files changed, 48 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 4786ad9a288..5e0bf399c43 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -1001,7 +1001,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 * Finally record data if requested.
 	 */
 	if (record) {
-		addr = 0;
+		struct perf_sample_data data = {
+			.regs = regs,
+			.addr = 0,
+		};
+
 		if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
 			/*
 			 * The user wants a data address recorded.
@@ -1016,9 +1020,9 @@ static void record_and_restart(struct perf_counter *counter, long val,
 			sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
 				POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
 			if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
-				addr = mfspr(SPRN_SDAR);
+				data.addr = mfspr(SPRN_SDAR);
 		}
-		if (perf_counter_overflow(counter, nmi, regs, addr)) {
+		if (perf_counter_overflow(counter, nmi, &data)) {
 			/*
 			 * Interrupts are coming too fast - throttle them
 			 * by setting the counter to 0, so it will be
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 240ca563063..82a23d487f9 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1173,11 +1173,14 @@ static void intel_pmu_reset(void)
  */
 static int intel_pmu_handle_irq(struct pt_regs *regs)
 {
+	struct perf_sample_data data;
 	struct cpu_hw_counters *cpuc;
-	struct cpu_hw_counters;
 	int bit, cpu, loops;
 	u64 ack, status;
 
+	data.regs = regs;
+	data.addr = 0;
+
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
@@ -1210,7 +1213,7 @@ again:
 		if (!intel_pmu_save_and_restart(counter))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, regs, 0))
+		if (perf_counter_overflow(counter, 1, &data))
 			intel_pmu_disable_counter(&counter->hw, bit);
 	}
 
@@ -1230,12 +1233,16 @@ again:
 
 static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
-	int cpu, idx, handled = 0;
+	struct perf_sample_data data;
 	struct cpu_hw_counters *cpuc;
 	struct perf_counter *counter;
 	struct hw_perf_counter *hwc;
+	int cpu, idx, handled = 0;
 	u64 val;
 
+	data.regs = regs;
+	data.addr = 0;
+
 	cpu = smp_processor_id();
 	cpuc = &per_cpu(cpu_hw_counters, cpu);
 
@@ -1256,7 +1263,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, regs, 0))
+		if (perf_counter_overflow(counter, 1, &data))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 282d8cc4898..d8c0eb480f9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -605,8 +605,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 	       struct perf_counter_context *ctx, int cpu);
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
-extern int perf_counter_overflow(struct perf_counter *counter,
-				 int nmi, struct pt_regs *regs, u64 addr);
+struct perf_sample_data {
+	struct pt_regs	*regs;
+	u64		addr;
+};
+
+extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
+				 struct perf_sample_data *data);
+
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index ae591a1275a..4fe85e804f4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2378,8 +2378,8 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
 	return task_pid_nr_ns(p, counter->ns);
 }
 
-static void perf_counter_output(struct perf_counter *counter,
-				int nmi, struct pt_regs *regs, u64 addr)
+static void perf_counter_output(struct perf_counter *counter, int nmi,
+				struct perf_sample_data *data)
 {
 	int ret;
 	u64 sample_type = counter->attr.sample_type;
@@ -2404,10 +2404,10 @@ static void perf_counter_output(struct perf_counter *counter,
 	header.size = sizeof(header);
 
 	header.misc = PERF_EVENT_MISC_OVERFLOW;
-	header.misc |= perf_misc_flags(regs);
+	header.misc |= perf_misc_flags(data->regs);
 
 	if (sample_type & PERF_SAMPLE_IP) {
-		ip = perf_instruction_pointer(regs);
+		ip = perf_instruction_pointer(data->regs);
 		header.type |= PERF_SAMPLE_IP;
 		header.size += sizeof(ip);
 	}
@@ -2460,7 +2460,7 @@ static void perf_counter_output(struct perf_counter *counter,
 	}
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		callchain = perf_callchain(regs);
+		callchain = perf_callchain(data->regs);
 
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
@@ -2486,7 +2486,7 @@ static void perf_counter_output(struct perf_counter *counter,
 		perf_output_put(&handle, time);
 
 	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(&handle, addr);
+		perf_output_put(&handle, data->addr);
 
 	if (sample_type & PERF_SAMPLE_ID)
 		perf_output_put(&handle, counter->id);
@@ -2950,8 +2950,8 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
  * Generic counter overflow handling.
  */
 
-int perf_counter_overflow(struct perf_counter *counter,
-			  int nmi, struct pt_regs *regs, u64 addr)
+int perf_counter_overflow(struct perf_counter *counter, int nmi,
+			  struct perf_sample_data *data)
 {
 	int events = atomic_read(&counter->event_limit);
 	int throttle = counter->pmu->unthrottle != NULL;
@@ -3005,7 +3005,7 @@ int perf_counter_overflow(struct perf_counter *counter,
 			perf_counter_disable(counter);
 	}
 
-	perf_counter_output(counter, nmi, regs, addr);
+	perf_counter_output(counter, nmi, data);
 	return ret;
 }
 
@@ -3054,24 +3054,25 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
 	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
 	struct perf_counter *counter;
-	struct pt_regs *regs;
 	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
 
-	regs = get_irq_regs();
+	data.addr = 0;
+	data.regs = get_irq_regs();
 	/*
 	 * In case we exclude kernel IPs or are somehow not in interrupt
 	 * context, provide the next best thing, the user IP.
 	 */
-	if ((counter->attr.exclude_kernel || !regs) &&
+	if ((counter->attr.exclude_kernel || !data.regs) &&
 			!counter->attr.exclude_user)
-		regs = task_pt_regs(current);
+		data.regs = task_pt_regs(current);
 
-	if (regs) {
-		if (perf_counter_overflow(counter, 0, regs, 0))
+	if (data.regs) {
+		if (perf_counter_overflow(counter, 0, &data))
 			ret = HRTIMER_NORESTART;
 	}
 
@@ -3084,9 +3085,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 static void perf_swcounter_overflow(struct perf_counter *counter,
 				    int nmi, struct pt_regs *regs, u64 addr)
 {
+	struct perf_sample_data data = {
+		.regs = regs,
+		.addr = addr,
+	};
+
 	perf_swcounter_update(counter);
 	perf_swcounter_set_period(counter);
-	if (perf_counter_overflow(counter, nmi, regs, addr))
+	if (perf_counter_overflow(counter, nmi, &data))
 		/* soft-disable the counter */
 		;
 
-- 
cgit v1.2.3-70-g09d2


From 9e350de37ac9607012fcf9c5314a28fbddf8f43c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Jun 2009 21:34:59 +0200
Subject: perf_counter: Accurate period data

We currently log hw.sample_period for PERF_SAMPLE_PERIOD, however this is
incorrect. When we adjust the period, it will only take effect the next
cycle but report it for the current cycle. So when we adjust the period
for every cycle, we're always wrong.

Solve this by keeping track of the last_period.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  9 ++++++---
 arch/x86/kernel/cpu/perf_counter.c | 15 ++++++++++++---
 include/linux/perf_counter.h       |  6 ++++--
 kernel/perf_counter.c              |  9 ++++++---
 4 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5e0bf399c43..4990ce2e5f0 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -767,6 +767,7 @@ static void power_pmu_unthrottle(struct perf_counter *counter)
 	perf_disable();
 	power_pmu_read(counter);
 	left = counter->hw.sample_period;
+	counter->hw.last_period = left;
 	val = 0;
 	if (left < 0x80000000L)
 		val = 0x80000000L - left;
@@ -937,7 +938,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw.sample_period);
+	counter->hw.last_period = counter->hw.sample_period;
+	atomic64_set(&counter->hw.period_left, counter->hw.last_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -1002,8 +1004,9 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	if (record) {
 		struct perf_sample_data data = {
-			.regs = regs,
-			.addr = 0,
+			.regs	= regs,
+			.addr	= 0,
+			.period	= counter->hw.last_period,
 		};
 
 		if (counter->attr.sample_type & PERF_SAMPLE_ADDR) {
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 82a23d487f9..57ae1bec81b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -698,6 +698,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!hwc->sample_period) {
 		hwc->sample_period = x86_pmu.max_period;
+		hwc->last_period = hwc->sample_period;
 		atomic64_set(&hwc->period_left, hwc->sample_period);
 	}
 
@@ -880,12 +881,14 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	if (unlikely(left <= -period)) {
 		left = period;
 		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
 		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
 		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
 		ret = 1;
 	}
 	/*
@@ -1257,9 +1260,12 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
 			continue;
 
-		/* counter overflow */
-		handled = 1;
-		inc_irq_stat(apic_perf_irqs);
+		/*
+		 * counter overflow
+		 */
+		handled		= 1;
+		data.period	= counter->hw.last_period;
+
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
@@ -1267,6 +1273,9 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
 	return handled;
 }
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d8c0eb480f9..5b966472b45 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -366,6 +366,7 @@ struct hw_perf_counter {
 	};
 	atomic64_t			prev_count;
 	u64				sample_period;
+	u64				last_period;
 	atomic64_t			period_left;
 	u64				interrupts;
 
@@ -606,8 +607,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 struct perf_sample_data {
-	struct pt_regs	*regs;
-	u64		addr;
+	struct pt_regs		*regs;
+	u64			addr;
+	u64			period;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4fe85e804f4..8b89b40bd0f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2495,7 +2495,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		perf_output_put(&handle, cpu_entry);
 
 	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(&handle, counter->hw.sample_period);
+		perf_output_put(&handle, data->period);
 
 	/*
 	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
@@ -3040,11 +3040,13 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
 	if (unlikely(left <= -period)) {
 		left = period;
 		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
 		atomic64_add(period, &hwc->period_left);
+		hwc->last_period = period;
 	}
 
 	atomic64_set(&hwc->prev_count, -left);
@@ -3086,8 +3088,9 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 				    int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data = {
-		.regs = regs,
-		.addr = addr,
+		.regs	= regs,
+		.addr	= addr,
+		.period	= counter->hw.last_period,
 	};
 
 	perf_swcounter_update(counter);
-- 
cgit v1.2.3-70-g09d2


From dcae3626d031fe6296b1e96a16f986193a41f840 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Wed, 10 Jun 2009 12:43:48 -0700
Subject: drm: fix LOCK_TEST_WITH_RETURN macro

When this macro isn't called with 'file_priv' this will result in a build
failure.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drmP.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index b84d8ae35e6..efa5f79a35c 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -237,15 +237,15 @@ struct drm_device;
  * \param dev DRM device.
  * \param filp file pointer of the caller.
  */
-#define LOCK_TEST_WITH_RETURN( dev, file_priv )				\
-do {									\
-	if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock) ||		\
-	    file_priv->master->lock.file_priv != file_priv)	{			\
+#define LOCK_TEST_WITH_RETURN( dev, _file_priv )				\
+do {										\
+	if (!_DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock) ||	\
+	    _file_priv->master->lock.file_priv != _file_priv)	{		\
 		DRM_ERROR( "%s called without lock held, held  %d owner %p %p\n",\
-			   __func__, _DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock),\
-			   file_priv->master->lock.file_priv, file_priv);		\
-		return -EINVAL;						\
-	}								\
+			   __func__, _DRM_LOCK_IS_HELD(_file_priv->master->lock.hw_lock->lock),\
+			   _file_priv->master->lock.file_priv, _file_priv);	\
+		return -EINVAL;							\
+	}									\
 } while (0)
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 4fefcb27050b98c97b1c32bc710fc2f874449dee Mon Sep 17 00:00:00 2001
From: yakui_zhao <yakui.zhao@intel.com>
Date: Tue, 2 Jun 2009 14:09:47 +0800
Subject: drm: add separate drm debugging levels

Now all the DRM debug info will be reported if the boot option of
"drm.debug=1" is added. Sometimes it is inconvenient to get the debug
info in KMS mode. We will get too much unrelated info.

This will separate several DRM debug levels and the debug level can be used
to print the different debug info. And the debug level is controlled by the
module parameter of drm.debug

In this patch it is divided into four debug levels;
       	drm_core, drm_driver, drm_kms, drm_mode.

At the same time we can get the different debug info by changing the debug
level. This can be done by adding the module parameter. Of course it can
be changed through the /sys/module/drm/parameters/debug after the system is
booted.

Four debug macro definitions are provided.
	DRM_DEBUG(fmt, args...)
	DRM_DEBUG_DRIVER(prefix, fmt, args...)
	DRM_DEBUG_KMS(prefix, fmt, args...)
	DRM_DEBUG_MODE(prefix, fmt, args...)

When the boot option of "drm.debug=4" is added, it will print the debug info
using DRM_DEBUG_KMS macro definition.
When the boot option of "drm.debug=6" is added, it will print the debug info
using DRM_DEBUG_KMS/DRM_DEBUG_DRIVER.

Sometimes we expect to print the value of an array.
For example: SDVO command,
In such case the following four DRM debug macro definitions are added:
	DRM_LOG(fmt, args...)
	DRM_LOG_DRIVER(fmt, args...)
	DRM_LOG_KMS(fmt, args...)
	DRM_LOG_MODE(fmt, args...)

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_stub.c | 17 ++++++++++++-
 include/drm/drmP.h         | 61 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 72 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index b9631e3a1ea..89050684fe0 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -51,7 +51,22 @@ struct idr drm_minors_idr;
 struct class *drm_class;
 struct proc_dir_entry *drm_proc_root;
 struct dentry *drm_debugfs_root;
-
+void drm_ut_debug_printk(unsigned int request_level,
+			 const char *prefix,
+			 const char *function_name,
+			 const char *format, ...)
+{
+	va_list args;
+
+	if (drm_debug & request_level) {
+		if (function_name)
+			printk(KERN_DEBUG "[%s:%s], ", prefix, function_name);
+		va_start(args, format);
+		vprintk(format, args);
+		va_end(args);
+	}
+}
+EXPORT_SYMBOL(drm_ut_debug_printk);
 static int drm_minor_get_id(struct drm_device *dev, int type)
 {
 	int new_id;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index efa5f79a35c..afc21685230 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -87,6 +87,15 @@ struct drm_device;
 #include "drm_os_linux.h"
 #include "drm_hashtab.h"
 
+#define DRM_UT_CORE 		0x01
+#define DRM_UT_DRIVER		0x02
+#define DRM_UT_KMS		0x04
+#define DRM_UT_MODE		0x08
+
+extern void drm_ut_debug_printk(unsigned int request_level,
+				const char *prefix,
+				const char *function_name,
+				const char *format, ...);
 /***********************************************************************/
 /** \name DRM template customization defaults */
 /*@{*/
@@ -186,15 +195,57 @@ struct drm_device;
  * \param arg arguments
  */
 #if DRM_DEBUG_CODE
-#define DRM_DEBUG(fmt, arg...)						\
+#define DRM_DEBUG(fmt, args...)						\
 	do {								\
-		if ( drm_debug )			\
-			printk(KERN_DEBUG				\
-			       "[" DRM_NAME ":%s] " fmt ,	\
-			       __func__ , ##arg);			\
+		drm_ut_debug_printk(DRM_UT_CORE, DRM_NAME, 		\
+					__func__, fmt, ##args);		\
+	} while (0)
+
+#define DRM_DEBUG_DRIVER(prefix, fmt, args...)				\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_DRIVER, prefix,		\
+					__func__, fmt, ##args);		\
+	} while (0)
+#define DRM_DEBUG_KMS(prefix, fmt, args...)				\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_KMS, prefix, 		\
+					 __func__, fmt, ##args);	\
+	} while (0)
+#define DRM_DEBUG_MODE(prefix, fmt, args...)				\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_MODE, prefix, 		\
+					 __func__, fmt, ##args);	\
+	} while (0)
+#define DRM_LOG(fmt, args...)						\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_CORE, NULL,			\
+					NULL, fmt, ##args);		\
+	} while (0)
+#define DRM_LOG_KMS(fmt, args...)					\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_KMS, NULL,			\
+					NULL, fmt, ##args);		\
+	} while (0)
+#define DRM_LOG_MODE(fmt, args...)					\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_MODE, NULL,			\
+					NULL, fmt, ##args);		\
+	} while (0)
+#define DRM_LOG_DRIVER(fmt, args...)					\
+	do {								\
+		drm_ut_debug_printk(DRM_UT_DRIVER, NULL,		\
+					NULL, fmt, ##args);		\
 	} while (0)
 #else
+#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0)
+#define DRM_DEBUG_KMS(prefix, fmt, args...)	do { } while (0)
+#define DRM_DEBUG_MODE(prefix, fmt, args...)	do { } while (0)
 #define DRM_DEBUG(fmt, arg...)		 do { } while (0)
+#define DRM_LOG(fmt, arg...)		do { } while (0)
+#define DRM_LOG_KMS(fmt, args...) do { } while (0)
+#define DRM_LOG_MODE(fmt, arg...) do { } while (0)
+#define DRM_LOG_DRIVER(fmt, arg...) do { } while (0)
+
 #endif
 
 #define DRM_PROC_LIMIT (PAGE_SIZE-80)
-- 
cgit v1.2.3-70-g09d2


From 3f68535adad8dd89499505a65fb25d0e02d118cc Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 21 Jan 2009 22:53:22 -0700
Subject: clocksource: sanity check sysfs clocksource changes

Thomas, Andrew and Ingo pointed out that we don't have any safety checks
in the clocksource sysfs entries to make sure sysadmins don't try to
change the clocksource to a non high-res timer capable clocksource (such
as jiffies) when high-res timers (HRT) is enabled.  Doing so will likely
hang a system.

Correct this by filtering non HRT clocksources from available_clocksources
and not accepting non HRT clocksources with HRT enabled.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h   |  2 +-
 kernel/hrtimer.c          |  4 ++--
 kernel/time/clocksource.c | 18 +++++++++++++++++-
 3 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0d2f7c8a33d..58021b0c396 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -305,7 +305,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
-
+extern int hrtimer_hres_active(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cb8a15c1958..1a70c18cdff 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -476,7 +476,7 @@ static inline int hrtimer_is_hres_enabled(void)
 /*
  * Is the high resolution mode active ?
  */
-static inline int hrtimer_hres_active(void)
+int hrtimer_hres_active(void)
 {
 	return __get_cpu_var(hrtimer_bases).hres_active;
 }
@@ -704,7 +704,7 @@ static int hrtimer_switch_to_hres(void)
 
 #else
 
-static inline int hrtimer_hres_active(void) { return 0; }
+int hrtimer_hres_active(void) { return 0; }
 static inline int hrtimer_is_hres_enabled(void) { return 0; }
 static inline int hrtimer_switch_to_hres(void) { return 0; }
 static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 80189f6f1c5..18b9f5da4ee 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
+#include <linux/hrtimer.h>
 
 void timecounter_init(struct timecounter *tc,
 		      const struct cyclecounter *cc,
@@ -509,6 +510,18 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 		}
 	}
 
+	/*
+	 * Check to make sure we don't switch to a non-HRT usable
+	 * clocksource if HRT is enabled and running
+	 */
+	if (hrtimer_hres_active() &&
+	    !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
+		printk(KERN_WARNING "%s clocksource is not HRT compatible. "
+			"Cannot switch while in HRT mode\n", ovr->name);
+		ovr = NULL;
+		override_name[0] = 0;
+	}
+
 	/* Reselect, when the override name has changed */
 	if (ovr != clocksource_override) {
 		clocksource_override = ovr;
@@ -537,7 +550,10 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 
 	spin_lock_irq(&clocksource_lock);
 	list_for_each_entry(src, &clocksource_list, list) {
-		count += snprintf(buf + count,
+		/* Don't show non-HRES clocksource if HRES is enabled */
+		if (!hrtimer_hres_active() ||
+				(src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
+			count += snprintf(buf + count,
 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 				  "%s ", src->name);
 	}
-- 
cgit v1.2.3-70-g09d2


From d005ba6cc82440d9ebf96f3ec8f79c54578b898f Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 10 Jun 2009 05:28:04 +0000
Subject: mdio: Expose 10GBASE-T MDI-X status via ethtool

This is available in a standard MDIO register in 10GBASE-T PHYs.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio.c   | 17 +++++++++++++++++
 include/linux/mdio.h |  9 +++++++++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index 66483035f68..dc45e9856c3 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -296,6 +296,23 @@ void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio,
 		ecmd->duplex = (reg & MDIO_CTRL1_FULLDPLX ||
 				ecmd->speed == SPEED_10000);
 	}
+
+	/* 10GBASE-T MDI/MDI-X */
+	if (ecmd->port == PORT_TP && ecmd->speed == SPEED_10000) {
+		switch (mdio->mdio_read(mdio->dev, mdio->prtad, MDIO_MMD_PMAPMD,
+					MDIO_PMA_10GBT_SWAPPOL)) {
+		case MDIO_PMA_10GBT_SWAPPOL_ABNX | MDIO_PMA_10GBT_SWAPPOL_CDNX:
+			ecmd->eth_tp_mdix = ETH_TP_MDI;
+			break;
+		case 0:
+			ecmd->eth_tp_mdix = ETH_TP_MDI_X;
+			break;
+		default:
+			/* It's complicated... */
+			ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+			break;
+		}
+	}
 }
 EXPORT_SYMBOL(mdio45_ethtool_gset_npage);
 
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 56851646529..cfdf1df2875 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -45,6 +45,7 @@
 #define MDIO_PHYXS_LNSTAT	24	/* PHY XGXS lane state */
 
 /* Media-dependent registers. */
+#define MDIO_PMA_10GBT_SWAPPOL	130	/* 10GBASE-T pair swap & polarity */
 #define MDIO_PMA_10GBT_TXPWR	131	/* 10GBASE-T TX power control */
 #define MDIO_PMA_10GBT_SNR	133	/* 10GBASE-T SNR margin, lane A.
 					 * Lanes B-D are numbered 134-136. */
@@ -195,6 +196,14 @@
 #define MDIO_PHYXS_LNSTAT_SYNC3		0x0008
 #define MDIO_PHYXS_LNSTAT_ALIGN		0x1000
 
+/* PMA 10GBASE-T pair swap & polarity */
+#define MDIO_PMA_10GBT_SWAPPOL_ABNX	0x0001	/* Pair A/B uncrossed */
+#define MDIO_PMA_10GBT_SWAPPOL_CDNX	0x0002	/* Pair C/D uncrossed */
+#define MDIO_PMA_10GBT_SWAPPOL_AREV	0x0100	/* Pair A polarity reversed */
+#define MDIO_PMA_10GBT_SWAPPOL_BREV	0x0200	/* Pair B polarity reversed */
+#define MDIO_PMA_10GBT_SWAPPOL_CREV	0x0400	/* Pair C polarity reversed */
+#define MDIO_PMA_10GBT_SWAPPOL_DREV	0x0800	/* Pair D polarity reversed */
+
 /* PMA 10GBASE-T TX power register. */
 #define MDIO_PMA_10GBT_TXPWR_SHORT	0x0001	/* Short-reach mode */
 
-- 
cgit v1.2.3-70-g09d2


From 2b85a34e911bf483c27cfdd124aeb1605145dc80 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 11 Jun 2009 02:55:43 -0700
Subject: net: No more expensive sock_hold()/sock_put() on each tx

One of the problem with sock memory accounting is it uses
a pair of sock_hold()/sock_put() for each transmitted packet.

This slows down bidirectional flows because the receive path
also needs to take a refcount on socket and might use a different
cpu than transmit path or transmit completion path. So these
two atomic operations also trigger cache line bounces.

We can see this in tx or tx/rx workloads (media gateways for example),
where sock_wfree() can be in top five functions in profiles.

We use this sock_hold()/sock_put() so that sock freeing
is delayed until all tx packets are completed.

As we also update sk_wmem_alloc, we could offset sk_wmem_alloc
by one unit at init time, until sk_free() is called.
Once sk_free() is called, we atomic_dec_and_test(sk_wmem_alloc)
to decrement initial offset and atomicaly check if any packets
are in flight.

skb_set_owner_w() doesnt call sock_hold() anymore

sock_wfree() doesnt call sock_put() anymore, but check if sk_wmem_alloc
reached 0 to perform the final freeing.

Drawback is that a skb->truesize error could lead to unfreeable sockets, or
even worse, prematurely calling __sk_free() on a live socket.

Nice speedups on SMP. tbench for example, going from 2691 MB/s to 2711 MB/s
on my 8 cpu dev machine, even if tbench was not really hitting sk_refcnt
contention point. 5 % speedup on a UDP transmit workload (depends
on number of flows), lowering TX completion cpu usage.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h    |  6 +++++-
 net/core/sock.c       | 29 +++++++++++++++++++++++++----
 net/ipv4/ip_output.c  |  1 -
 net/ipv6/ip6_output.c |  1 -
 4 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 4bb1ff9fd15..010e14a93c9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1217,9 +1217,13 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from,
 
 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 {
-	sock_hold(sk);
 	skb->sk = sk;
 	skb->destructor = sock_wfree;
+	/*
+	 * We used to take a refcount on sk, but following operation
+	 * is enough to guarantee sk_free() wont free this sock until
+	 * all in-flight packets are completed
+	 */
 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 04e35eb2e73..06e26b77ad9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1008,7 +1008,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 }
 EXPORT_SYMBOL(sk_alloc);
 
-void sk_free(struct sock *sk)
+static void __sk_free(struct sock *sk)
 {
 	struct sk_filter *filter;
 
@@ -1031,6 +1031,17 @@ void sk_free(struct sock *sk)
 	put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
+
+void sk_free(struct sock *sk)
+{
+	/*
+	 * We substract one from sk_wmem_alloc and can know if
+	 * some packets are still in some tx queue.
+	 * If not null, sock_wfree() will call __sk_free(sk) later
+	 */
+	if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+		__sk_free(sk);
+}
 EXPORT_SYMBOL(sk_free);
 
 /*
@@ -1071,7 +1082,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
-		atomic_set(&newsk->sk_wmem_alloc, 0);
+		/*
+		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+		 */
+		atomic_set(&newsk->sk_wmem_alloc, 1);
 		atomic_set(&newsk->sk_omem_alloc, 0);
 		skb_queue_head_init(&newsk->sk_receive_queue);
 		skb_queue_head_init(&newsk->sk_write_queue);
@@ -1175,12 +1189,18 @@ void __init sk_init(void)
 void sock_wfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
+	int res;
 
 	/* In case it might be waiting for more memory. */
-	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+	res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc);
 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
 		sk->sk_write_space(sk);
-	sock_put(sk);
+	/*
+	 * if sk_wmem_alloc reached 0, we are last user and should
+	 * free this sock, as sk_free() call could not do it.
+	 */
+	if (res == 0)
+		__sk_free(sk);
 }
 EXPORT_SYMBOL(sock_wfree);
 
@@ -1819,6 +1839,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_stamp = ktime_set(-1L, 0);
 
 	atomic_set(&sk->sk_refcnt, 1);
+	atomic_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_drops, 0);
 }
 EXPORT_SYMBOL(sock_init_data);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9248d2807ba..24702628266 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -498,7 +498,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
-				sock_hold(skb->sk);
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
 				truesizes += frag->truesize;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index db6c7224a86..7c76e3d1821 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -680,7 +680,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			BUG_ON(frag->sk);
 			if (skb->sk) {
-				sock_hold(skb->sk);
 				frag->sk = skb->sk;
 				frag->destructor = sock_wfree;
 				truesizes += frag->truesize;
-- 
cgit v1.2.3-70-g09d2


From 8593a1967fb9746d318dde88a0a39a36dbfc3445 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Wed, 20 May 2009 16:53:30 -0700
Subject: wimax/i2400m: rename misleading I2400M_PL_PAD to I2400M_PL_ALIGN

The constant is being use as an alignment factor, not as a padding
factor; made reading/reviewing the code quite confusing.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 drivers/net/wimax/i2400m/fw.c | 2 +-
 drivers/net/wimax/i2400m/rx.c | 4 ++--
 drivers/net/wimax/i2400m/tx.c | 4 ++--
 include/linux/wimax/i2400m.h  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 7ee1b99b384..26924f17f19 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -397,7 +397,7 @@ static int i2400m_download_chunk(struct i2400m *i2400m, const void *chunk,
 				 unsigned int direct, unsigned int do_csum)
 {
 	int ret;
-	size_t chunk_len = ALIGN(__chunk_len, I2400M_PL_PAD);
+	size_t chunk_len = ALIGN(__chunk_len, I2400M_PL_ALIGN);
 	struct device *dev = i2400m_dev(i2400m);
 	struct {
 		struct i2400m_bootrom_header cmd;
diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c
index 7643850a6fb..07c32e68909 100644
--- a/drivers/net/wimax/i2400m/rx.c
+++ b/drivers/net/wimax/i2400m/rx.c
@@ -1148,7 +1148,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
 	num_pls = le16_to_cpu(msg_hdr->num_pls);
 	pl_itr = sizeof(*msg_hdr) +	/* Check payload descriptor(s) */
 		num_pls * sizeof(msg_hdr->pld[0]);
-	pl_itr = ALIGN(pl_itr, I2400M_PL_PAD);
+	pl_itr = ALIGN(pl_itr, I2400M_PL_ALIGN);
 	if (pl_itr > skb->len) {	/* got all the payload descriptors? */
 		dev_err(dev, "RX: HW BUG? message too short (%u bytes) for "
 			"%u payload descriptors (%zu each, total %zu)\n",
@@ -1166,7 +1166,7 @@ int i2400m_rx(struct i2400m *i2400m, struct sk_buff *skb)
 		single_last = num_pls == 1 || i == num_pls - 1;
 		i2400m_rx_payload(i2400m, skb, single_last, &msg_hdr->pld[i],
 				  skb->data + pl_itr);
-		pl_itr += ALIGN(pl_size, I2400M_PL_PAD);
+		pl_itr += ALIGN(pl_size, I2400M_PL_ALIGN);
 		cond_resched();		/* Don't monopolize */
 	}
 	kfree_skb(skb);
diff --git a/drivers/net/wimax/i2400m/tx.c b/drivers/net/wimax/i2400m/tx.c
index 8ef724d31fb..a635fd720f3 100644
--- a/drivers/net/wimax/i2400m/tx.c
+++ b/drivers/net/wimax/i2400m/tx.c
@@ -491,7 +491,7 @@ void i2400m_tx_close(struct i2400m *i2400m)
 	 */
 	hdr_size = sizeof(*tx_msg)
 		+ le16_to_cpu(tx_msg->num_pls) * sizeof(tx_msg->pld[0]);
-	hdr_size = ALIGN(hdr_size, I2400M_PL_PAD);
+	hdr_size = ALIGN(hdr_size, I2400M_PL_ALIGN);
 	tx_msg->offset = I2400M_TX_PLD_SIZE - hdr_size;
 	tx_msg_moved = (void *) tx_msg + tx_msg->offset;
 	memmove(tx_msg_moved, tx_msg, hdr_size);
@@ -574,7 +574,7 @@ int i2400m_tx(struct i2400m *i2400m, const void *buf, size_t buf_len,
 
 	d_fnstart(3, dev, "(i2400m %p skb %p [%zu bytes] pt %u)\n",
 		  i2400m, buf, buf_len, pl_type);
-	padded_len = ALIGN(buf_len, I2400M_PL_PAD);
+	padded_len = ALIGN(buf_len, I2400M_PL_ALIGN);
 	d_printf(5, dev, "padded_len %zd buf_len %zd\n", padded_len, buf_len);
 	/* If there is no current TX message, create one; if the
 	 * current one is out of payload slots or we have a singleton,
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
index d5148a7889a..433693ef2bb 100644
--- a/include/linux/wimax/i2400m.h
+++ b/include/linux/wimax/i2400m.h
@@ -266,7 +266,7 @@ enum i2400m_ro_type {
 
 /* Misc constants */
 enum {
-	I2400M_PL_PAD = 16,	/* Payload data size alignment */
+	I2400M_PL_ALIGN = 16,	/* Payload data size alignment */
 	I2400M_PL_SIZE_MAX = 0x3EFF,
 	I2400M_MAX_PLS_IN_MSG = 60,
 	/* protocol barkers: sync sequences; for notifications they
-- 
cgit v1.2.3-70-g09d2


From b0fd271d5fba0b2d00888363f3869e3f9b26caa9 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Thu, 11 Jun 2009 13:10:16 +0200
Subject: block: add request clone interface (v2)

This patch adds the following 2 interfaces for request-stacking drivers:

  - blk_rq_prep_clone(struct request *clone, struct request *orig,
		      struct bio_set *bs, gfp_t gfp_mask,
		      int (*bio_ctr)(struct bio *, struct bio*, void *),
		      void *data)
      * Clones bios in the original request to the clone request
        (bio_ctr is called for each cloned bios.)
      * Copies attributes of the original request to the clone request.
        The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not
        copied.

  - blk_rq_unprep_clone(struct request *clone)
      * Frees cloned bios from the clone request.

Request stacking drivers (e.g. request-based dm) need to make a clone
request for a submitted request and dispatch it to other devices.

To allocate request for the clone, request stacking drivers may not
be able to use blk_get_request() because the allocation may be done
in an irq-disabled context.
So blk_rq_prep_clone() takes a request allocated by the caller
as an argument.

For each clone bio in the clone request, request stacking drivers
should be able to set up their own completion handler.
So blk_rq_prep_clone() takes a callback function which is called
for each clone bio, and a pointer for private data which is passed
to the callback.

NOTE:
blk_rq_prep_clone() doesn't copy any actual data of the original
request.  Pages are shared between original bios and cloned bios.
So caller must not complete the original request before the clone
request.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |   5 +++
 2 files changed, 105 insertions(+)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 03c5a64b6cc..02a9252107a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2295,6 +2295,106 @@ int blk_lld_busy(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_lld_busy);
 
+/**
+ * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
+ * @rq: the clone request to be cleaned up
+ *
+ * Description:
+ *     Free all bios in @rq for a cloned request.
+ */
+void blk_rq_unprep_clone(struct request *rq)
+{
+	struct bio *bio;
+
+	while ((bio = rq->bio) != NULL) {
+		rq->bio = bio->bi_next;
+
+		bio_put(bio);
+	}
+}
+EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
+
+/*
+ * Copy attributes of the original request to the clone request.
+ * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
+ */
+static void __blk_rq_prep_clone(struct request *dst, struct request *src)
+{
+	dst->cpu = src->cpu;
+	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+	dst->cmd_type = src->cmd_type;
+	dst->__sector = blk_rq_pos(src);
+	dst->__data_len = blk_rq_bytes(src);
+	dst->nr_phys_segments = src->nr_phys_segments;
+	dst->ioprio = src->ioprio;
+	dst->extra_len = src->extra_len;
+}
+
+/**
+ * blk_rq_prep_clone - Helper function to setup clone request
+ * @rq: the request to be setup
+ * @rq_src: original request to be cloned
+ * @bs: bio_set that bios for clone are allocated from
+ * @gfp_mask: memory allocation mask for bio
+ * @bio_ctr: setup function to be called for each clone bio.
+ *           Returns %0 for success, non %0 for failure.
+ * @data: private data to be passed to @bio_ctr
+ *
+ * Description:
+ *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
+ *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
+ *     are not copied, and copying such parts is the caller's responsibility.
+ *     Also, pages which the original bios are pointing to are not copied
+ *     and the cloned bios just point same pages.
+ *     So cloned bios must be completed before original bios, which means
+ *     the caller must complete @rq before @rq_src.
+ */
+int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+		      struct bio_set *bs, gfp_t gfp_mask,
+		      int (*bio_ctr)(struct bio *, struct bio *, void *),
+		      void *data)
+{
+	struct bio *bio, *bio_src;
+
+	if (!bs)
+		bs = fs_bio_set;
+
+	blk_rq_init(NULL, rq);
+
+	__rq_for_each_bio(bio_src, rq_src) {
+		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
+		if (!bio)
+			goto free_and_out;
+
+		__bio_clone(bio, bio_src);
+
+		if (bio_integrity(bio_src) &&
+		    bio_integrity_clone(bio, bio_src, gfp_mask))
+			goto free_and_out;
+
+		if (bio_ctr && bio_ctr(bio, bio_src, data))
+			goto free_and_out;
+
+		if (rq->bio) {
+			rq->biotail->bi_next = bio;
+			rq->biotail = bio;
+		} else
+			rq->bio = rq->biotail = bio;
+	}
+
+	__blk_rq_prep_clone(rq, rq_src);
+
+	return 0;
+
+free_and_out:
+	if (bio)
+		bio_free(bio, bs);
+	blk_rq_unprep_clone(rq);
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5e740a135e7..ebdfde8fe55 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -765,6 +765,11 @@ extern void blk_insert_request(struct request_queue *, struct request *, int, vo
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
+extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+			     struct bio_set *bs, gfp_t gfp_mask,
+			     int (*bio_ctr)(struct bio *, struct bio *, void *),
+			     void *data);
+extern void blk_rq_unprep_clone(struct request *rq);
 extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_plug_device(struct request_queue *);
-- 
cgit v1.2.3-70-g09d2


From 0764771dab80d7b84b9a271bee7f1b21a04a3f0c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 11:18:36 +0200
Subject: perf_counter: More paranoia settings

Rename the perf_counter_priv knob to perf_counter_paranoia (because
priv can be read as private, as opposed to privileged) and provide
one more level:

 0 - permissive
 1 - restrict cpu counters to privilidged contexts
 2 - restrict kernel-mode code counting and profiling

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 +-
 kernel/perf_counter.c        | 25 +++++++++++++++++++++++--
 kernel/sysctl.c              |  6 +++---
 3 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5b966472b45..386be915baa 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -648,7 +648,7 @@ struct perf_callchain_entry {
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
-extern int sysctl_perf_counter_priv;
+extern int sysctl_perf_counter_paranoid;
 extern int sysctl_perf_counter_mlock;
 extern int sysctl_perf_counter_limit;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8b89b40bd0f..63f1987c1c1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -43,7 +43,23 @@ static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
 
-int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
+/*
+ * 0 - not paranoid
+ * 1 - disallow cpu counters to unpriv
+ * 2 - disallow kernel profiling to unpriv
+ */
+int sysctl_perf_counter_paranoid __read_mostly; /* do we need to be privileged */
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_counter_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_counter_paranoid > 1;
+}
+
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
 
@@ -1385,7 +1401,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
 	 */
 	if (cpu != -1) {
 		/* Must be root to operate on a CPU counter: */
-		if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN))
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
 			return ERR_PTR(-EACCES);
 
 		if (cpu < 0 || cpu > num_possible_cpus())
@@ -3618,6 +3634,11 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
 		return -EFAULT;
 
+	if (!attr.exclude_kernel) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0c4bf863afa..344a65981de 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -916,9 +916,9 @@ static struct ctl_table kern_table[] = {
 #ifdef CONFIG_PERF_COUNTERS
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_privileged",
-		.data		= &sysctl_perf_counter_priv,
-		.maxlen		= sizeof(sysctl_perf_counter_priv),
+		.procname	= "perf_counter_paranoid",
+		.data		= &sysctl_perf_counter_paranoid,
+		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-- 
cgit v1.2.3-70-g09d2


From df58ab24bf26b166874bfb18b3b5a2e0a8e63179 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 11:25:05 +0200
Subject: perf_counter: Rename perf_counter_limit sysctl

Rename perf_counter_limit to perf_counter_max_sample_rate and
prohibit creation of counters with a known higher sample
frequency.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 +-
 kernel/perf_counter.c        | 27 +++++++++++++++++++--------
 kernel/sysctl.c              |  6 +++---
 3 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 386be915baa..95c797c480e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -650,7 +650,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_paranoid;
 extern int sysctl_perf_counter_mlock;
-extern int sysctl_perf_counter_limit;
+extern int sysctl_perf_counter_sample_rate;
 
 extern void perf_counter_init(void);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 63f1987c1c1..3b2829de559 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -44,11 +44,12 @@ static atomic_t nr_mmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
 
 /*
- * 0 - not paranoid
- * 1 - disallow cpu counters to unpriv
- * 2 - disallow kernel profiling to unpriv
+ * perf counter paranoia level:
+ *  0 - not paranoid
+ *  1 - disallow cpu counters to unpriv
+ *  2 - disallow kernel profiling to unpriv
  */
-int sysctl_perf_counter_paranoid __read_mostly; /* do we need to be privileged */
+int sysctl_perf_counter_paranoid __read_mostly;
 
 static inline bool perf_paranoid_cpu(void)
 {
@@ -61,7 +62,11 @@ static inline bool perf_paranoid_kernel(void)
 }
 
 int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
-int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
+
+/*
+ * max perf counter sample rate
+ */
+int sysctl_perf_counter_sample_rate __read_mostly = 100000;
 
 static atomic64_t perf_counter_id;
 
@@ -1244,7 +1249,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(counter, 1);
 			counter->pmu->unthrottle(counter);
-			interrupts = 2*sysctl_perf_counter_limit/HZ;
+			interrupts = 2*sysctl_perf_counter_sample_rate/HZ;
 		}
 
 		if (!counter->attr.freq || !counter->attr.sample_freq)
@@ -1682,7 +1687,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
 	spin_lock_irq(&ctx->lock);
 	if (counter->attr.freq) {
-		if (value > sysctl_perf_counter_limit) {
+		if (value > sysctl_perf_counter_sample_rate) {
 			ret = -EINVAL;
 			goto unlock;
 		}
@@ -2979,7 +2984,8 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
 	} else {
 		if (hwc->interrupts != MAX_INTERRUPTS) {
 			hwc->interrupts++;
-			if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
+			if (HZ * hwc->interrupts >
+					(u64)sysctl_perf_counter_sample_rate) {
 				hwc->interrupts = MAX_INTERRUPTS;
 				perf_log_throttle(counter, 0);
 				ret = 1;
@@ -3639,6 +3645,11 @@ SYSCALL_DEFINE5(perf_counter_open,
 			return -EACCES;
 	}
 
+	if (attr.freq) {
+		if (attr.sample_freq > sysctl_perf_counter_sample_rate)
+			return -EINVAL;
+	}
+
 	/*
 	 * Get the target context (task or percpu):
 	 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 344a65981de..9fd4e436b69 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,9 +932,9 @@ static struct ctl_table kern_table[] = {
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_int_limit",
-		.data		= &sysctl_perf_counter_limit,
-		.maxlen		= sizeof(sysctl_perf_counter_limit),
+		.procname	= "perf_counter_max_sample_rate",
+		.data		= &sysctl_perf_counter_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-- 
cgit v1.2.3-70-g09d2


From fcc8ac1825d3d0fb81f73bc1a80ebc863168bb56 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 11 Jun 2009 12:24:17 +0100
Subject: tty: Add carrier processing on close to the tty_port core

Some drivers implement this internally, others miss it out. Push the
behaviour into the core code as that way everyone will do it consistently.

Update the dtr rts method to raise or lower depending upon flags. Having a
single method in this style fits most of the implementations more cleanly than
two funtions.

We need this in place before we tackle the USB side

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/epca.c               |  4 ++--
 drivers/char/isicom.c             | 19 +++++++++++++------
 drivers/char/istallion.c          |  8 ++++----
 drivers/char/mxser.c              | 12 ++++++++----
 drivers/char/pcmcia/synclink_cs.c | 11 +++++++----
 drivers/char/rocket.c             | 13 +++++++++----
 drivers/char/stallion.c           |  6 +++---
 drivers/char/synclink.c           |  9 ++++++---
 drivers/char/synclink_gt.c        |  9 ++++++---
 drivers/char/synclinkmp.c         |  9 ++++++---
 drivers/char/tty_port.c           | 27 +++++++++++++++++++++++----
 include/linux/tty.h               |  3 ++-
 12 files changed, 89 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index af7c13ca949..8797b774235 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port)
 	return 0;
 }
 
-static void epca_raise_dtr_rts(struct tty_port *port)
+static void epca_dtr_rts(struct tty_port *port, int onoff)
 {
 }
 
@@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = {
 
 static const struct tty_port_operations epca_port_ops = {
 	.carrier_raised = epca_carrier_raised,
-	.raise_dtr_rts = epca_raise_dtr_rts,
+	.dtr_rts = epca_dtr_rts,
 };
 
 static int info_open(struct tty_struct *tty, struct file *filp)
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a59eac584d1..4d745a89504 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port)
 
 /* card->lock MUST NOT be held */
 
-static void isicom_raise_dtr_rts(struct tty_port *port)
+static void isicom_dtr_rts(struct tty_port *port, int on)
 {
 	struct isi_port *ip = container_of(port, struct isi_port, port);
 	struct isi_board *card = ip->card;
@@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port)
 	if (!lock_card(card))
 		return;
 
-	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
-	outw(0x0f04, base);
-	InterruptTheCard(base);
-	ip->status |= (ISI_DTR | ISI_RTS);
+	if (on) {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0f04, base);
+		InterruptTheCard(base);
+		ip->status |= (ISI_DTR | ISI_RTS);
+	} else {
+		outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+		outw(0x0C04, base);
+		InterruptTheCard(base);
+		ip->status &= ~(ISI_DTR | ISI_RTS);
+	}
 	unlock_card(card);
 }
 
@@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = {
 
 static const struct tty_port_operations isicom_port_ops = {
 	.carrier_raised		= isicom_carrier_raised,
-	.raise_dtr_rts		= isicom_raise_dtr_rts,
+	.dtr_rts		= isicom_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fff19f7e29d..e18800c400b 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stli_raise_dtr_rts(struct tty_port *port)
+static void stli_dtr_rts(struct tty_port *port, int on)
 {
 	struct stliport *portp = container_of(port, struct stliport, port);
 	struct stlibrd *brdp = stli_brds[portp->brdnr];
-	stli_mkasysigs(&portp->asig, 1, 1);
+	stli_mkasysigs(&portp->asig, on, on);
 	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
 		sizeof(asysigs_t), 0) < 0)
-			printk(KERN_WARNING "istallion: dtr raise failed.\n");
+			printk(KERN_WARNING "istallion: dtr set failed.\n");
 }
 
 
@@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = {
 
 static const struct tty_port_operations stli_port_ops = {
 	.carrier_raised = stli_carrier_raised,
-	.raise_dtr_rts = stli_raise_dtr_rts,
+	.dtr_rts = stli_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 13f8871e5b2..9533f43a30b 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port)
 	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
-static void mxser_raise_dtr_rts(struct tty_port *port)
+static void mxser_dtr_rts(struct tty_port *port, int on)
 {
 	struct mxser_port *mp = container_of(port, struct mxser_port, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&mp->slock, flags);
-	outb(inb(mp->ioaddr + UART_MCR) |
-		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	if (on)
+		outb(inb(mp->ioaddr + UART_MCR) |
+			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	else
+		outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS),
+			mp->ioaddr + UART_MCR);
 	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
@@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = {
 
 struct tty_port_operations mxser_port_ops = {
 	.carrier_raised = mxser_carrier_raised,
-	.raise_dtr_rts = mxser_raise_dtr_rts,
+	.dtr_rts = mxser_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 19d79fc5446..77b36488922 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info);
 static void tx_timeout(unsigned long context);
 
 static int carrier_raised(struct tty_port *port);
-static void raise_dtr_rts(struct tty_port *port);
+static void dtr_rts(struct tty_port *port, int onoff);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 
 static const struct tty_port_operations mgslpc_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts
+	.dtr_rts = dtr_rts
 };
 
 static int mgslpc_probe(struct pcmcia_device *link)
@@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port)
 	return 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int onoff)
 {
 	MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (onoff)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR;
 	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index f59fc5cea06..7399188049d 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port)
 	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct r_port *info = container_of(port, struct r_port, port);
-	sSetDTR(&info->channel);
-	sSetRTS(&info->channel);
+	if (on) {
+		sSetDTR(&info->channel);
+		sSetRTS(&info->channel);
+	} else {
+		sClrDTR(&info->channel);
+		sClrRTS(&info->channel);
+	}
 }
 
 /*
@@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = {
 
 static const struct tty_port_operations rocket_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 2ad813a801d..53e504f41b2 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stl_raise_dtr_rts(struct tty_port *port)
+static void stl_dtr_rts(struct tty_port *port, int on)
 {
 	struct stlport *portp = container_of(port, struct stlport, port);
 	/* Takes brd_lock internally */
-	stl_setsignals(portp, 1, 1);
+	stl_setsignals(portp, on, on);
 }
 
 /*****************************************************************************/
@@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = {
 
 static const struct tty_port_operations stl_port_ops = {
 	.carrier_raised = stl_carrier_raised,
-	.raise_dtr_rts = stl_raise_dtr_rts,
+	.dtr_rts = stl_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index afd0b26ca05..afded3a2379 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->irq_spinlock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	usc_set_serial_signals(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 }
@@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 static const struct tty_port_operations mgsl_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 5e256494686..67986ea0d47 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3099,13 +3099,16 @@ static int carrier_raised(struct tty_port *port)
 	return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	unsigned long flags;
 	struct slgt_info *info = container_of(port, struct slgt_info, port);
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3419,7 +3422,7 @@ static void add_device(struct slgt_info *info)
 
 static const struct tty_port_operations slgt_port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /*
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 26de60efe4b..6f727e3c53a 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
 	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&info->lock,flags);
-	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	if (on)
+		info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+	else
+		info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
  	set_signals(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info)
 
 static const struct tty_port_operations port_ops = {
 	.carrier_raised = carrier_raised,
-	.raise_dtr_rts = raise_dtr_rts,
+	.dtr_rts = dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9b8004c7268..926d4a5593f 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port)
 EXPORT_SYMBOL(tty_port_carrier_raised);
 
 /**
- *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	tty_port_raise_dtr_rts	-	Raise DTR/RTS
  *	@port: tty port
  *
  *	Wrapper for the DTR/RTS raise logic. For the moment this is used
@@ -147,11 +147,27 @@ EXPORT_SYMBOL(tty_port_carrier_raised);
 
 void tty_port_raise_dtr_rts(struct tty_port *port)
 {
-	if (port->ops->raise_dtr_rts)
-		port->ops->raise_dtr_rts(port);
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 1);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
 
+/**
+ *	tty_port_lower_dtr_rts	-	Lower DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_lower_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->dtr_rts)
+		port->ops->dtr_rts(port, 0);
+}
+EXPORT_SYMBOL(tty_port_lower_dtr_rts);
+
 /**
  *	tty_port_block_til_ready	-	Waiting logic for tty open
  *	@port: the tty port being opened
@@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts);
  *		- port flags and counts
  *
  *	The passed tty_port must implement the carrier_raised method if it can
- *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	do carrier detect and the dtr_rts method if it supports software
  *	management of these lines. Note that the dtr/rts raise is done each
  *	iteration as a hangup may have previously dropped them while we wait.
  */
@@ -302,6 +318,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
 
 	tty_ldisc_flush(tty);
 
+	if (tty->termios->c_cflag & HUPCL)
+		tty_port_lower_dtr_rts(port);
+
 	spin_lock_irqsave(&port->lock, flags);
 	tty->closing = 0;
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fc39db95499..98694364c96 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,7 +185,7 @@ struct tty_port;
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
-	void (*raise_dtr_rts)(struct tty_port *port);
+	void (*dtr_rts)(struct tty_port *port, int raise);
 };
 	
 struct tty_port {
@@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_lower_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-- 
cgit v1.2.3-70-g09d2


From 1ec739be75a6cb961a46ba0b1982d0edb7f27558 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 11 Jun 2009 12:25:25 +0100
Subject: tty: Implement a drain delay in the tty port

We need this for devices that cannot flush and wait, but which do not order
data and modem events. Without it we will hang up before all the data
clears the hardware. Needed for the USB changes.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_port.c | 11 +++++++++++
 include/linux/tty.h     |  3 +++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 926d4a5593f..4d08b6d27c2 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -308,6 +308,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 	if (port->flags & ASYNC_INITIALIZED &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
+	if (port->drain_delay) {
+		unsigned int bps = tty_get_baud_rate(tty);
+		long timeout;
+
+		if (bps > 1200)
+			timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps,
+								HZ / 10);
+		else
+			timeout = 2 * HZ;
+		schedule_timeout_interruptible(timeout);
+	}
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 98694364c96..bed5a3d4030 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -201,6 +201,9 @@ struct tty_port {
 	unsigned char		*xmit_buf;	/* Optional buffer */
 	int			close_delay;	/* Close port delay */
 	int			closing_wait;	/* Delay for output */
+	int			drain_delay;	/* Set to zero if no pure time
+						   based drain is needed else
+						   set to size of fifo */
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 335f8514f200e63d689113d29cb7253a5c282967 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 11 Jun 2009 12:26:29 +0100
Subject: tty: Bring the usb tty port structure into more use

This allows us to clean stuff up, but is probably also going to cause
some app breakage with buggy apps as we now implement proper POSIX behaviour
for USB ports matching all the other ports. This does also mean other apps
that break on USB will now work properly.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/serial/belkin_sa.c        |   6 +-
 drivers/usb/serial/ch341.c            |  46 ++++++-----
 drivers/usb/serial/console.c          |   6 +-
 drivers/usb/serial/cp210x.c           |   6 +-
 drivers/usb/serial/cyberjack.c        |   6 +-
 drivers/usb/serial/cypress_m8.c       |  81 ++++++-------------
 drivers/usb/serial/digi_acceleport.c  |  75 +++++-------------
 drivers/usb/serial/empeg.c            |   6 +-
 drivers/usb/serial/ftdi_sio.c         |  50 ++++++------
 drivers/usb/serial/garmin_gps.c       |   3 +-
 drivers/usb/serial/generic.c          |   3 +-
 drivers/usb/serial/io_edgeport.c      |  10 +--
 drivers/usb/serial/io_ti.c            |   3 +-
 drivers/usb/serial/ipaq.c             |   6 +-
 drivers/usb/serial/ipw.c              |  18 ++---
 drivers/usb/serial/ir-usb.c           |   6 +-
 drivers/usb/serial/iuu_phoenix.c      |  25 +-----
 drivers/usb/serial/keyspan.c          |  13 ++-
 drivers/usb/serial/keyspan.h          |   8 +-
 drivers/usb/serial/keyspan_pda.c      |  48 ++++++++----
 drivers/usb/serial/kl5kusb105.c       |   6 +-
 drivers/usb/serial/kobil_sct.c        |   9 +--
 drivers/usb/serial/mct_u232.c         |  37 ++++-----
 drivers/usb/serial/mos7720.c          |   3 +-
 drivers/usb/serial/mos7840.c          |  48 +-----------
 drivers/usb/serial/navman.c           |   3 +-
 drivers/usb/serial/omninet.c          |   6 +-
 drivers/usb/serial/opticon.c          |   3 +-
 drivers/usb/serial/option.c           |  68 ++++++++--------
 drivers/usb/serial/oti6858.c          |  57 ++------------
 drivers/usb/serial/pl2303.c           |  79 ++++++++-----------
 drivers/usb/serial/sierra.c           |  97 ++++++++++-------------
 drivers/usb/serial/spcp8x5.c          |  85 ++++++++------------
 drivers/usb/serial/symbolserial.c     |   3 +-
 drivers/usb/serial/ti_usb_3410_5052.c |   6 +-
 drivers/usb/serial/usb-serial.c       | 144 +++++++++++++++++++++++-----------
 drivers/usb/serial/visor.c            |   6 +-
 drivers/usb/serial/whiteheat.c        |  33 +-------
 include/linux/usb/serial.h            |  10 ++-
 39 files changed, 467 insertions(+), 661 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index b7eacad4d48..2bfd6dd85b5 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -93,8 +93,7 @@ static int  belkin_sa_startup(struct usb_serial *serial);
 static void belkin_sa_shutdown(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios * old);
@@ -244,8 +243,7 @@ exit:
 } /* belkin_sa_open */
 
 
-static void belkin_sa_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void belkin_sa_close(struct usb_serial_port *port)
 {
 	dbg("%s port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index ab4cc277aa6..2830766f5b3 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -262,13 +262,33 @@ error:	kfree(priv);
 	return r;
 }
 
-static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int ch341_carrier_raised(struct usb_serial_port *port)
+{
+	struct ch341_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & CH341_BIT_DCD)
+		return 1;
+	return 0;
+}
+
+static void ch341_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct ch341_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
 
+	dbg("%s - port %d", __func__, port->number);
+	/* drop DTR and RTS */
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR;
+	else
+		priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	ch341_set_handshake(port->serial->dev, priv->line_control);
+	wake_up_interruptible(&priv->delta_msr_wait);
+}
+
+static void ch341_close(struct usb_serial_port *port)
+{
 	dbg("%s - port %d", __func__, port->number);
 
 	/* shutdown our urbs */
@@ -276,18 +296,6 @@ static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			ch341_set_handshake(port->serial->dev, 0);
-		}
-	}
-	wake_up_interruptible(&priv->delta_msr_wait);
 }
 
 
@@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
 	dbg("ch341_open()");
 
 	priv->baud_rate = DEFAULT_BAUD_RATE;
-	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
 	r = ch341_configure(serial->dev, priv);
 	if (r)
@@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
 	if (r) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, r);
-		ch341_close(tty, port, NULL);
+		ch341_close(port);
 		return -EPROTO;
 	}
 
@@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 
 	dbg("ch341_set_termios()");
 
-	if (!tty || !tty->termios)
-		return;
-
 	baud_rate = tty_get_baud_rate(tty);
 
 	priv->baud_rate = baud_rate;
@@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = {
 	.usb_driver        = &ch341_driver,
 	.num_ports         = 1,
 	.open              = ch341_open,
+	.dtr_rts	   = ch341_dtr_rts,
+	.carrier_raised	   = ch341_carrier_raised,
 	.close             = ch341_close,
 	.ioctl             = ch341_ioctl,
 	.set_termios       = ch341_set_termios,
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 19e24045b13..247b61bfb7f 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options)
 			kfree(tty);
 		}
 	}
-
+	/* So we know not to kill the hardware on a hangup on this
+	   port. We have also bumped the use count by one so it won't go
+	   idle */
 	port->console = 1;
 	retval = 0;
 
@@ -182,7 +184,7 @@ free_tty:
 	kfree(tty);
 reset_open_count:
 	port->port.count = 0;
-goto out;
+	goto out;
 }
 
 static void usb_console_write(struct console *co,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index e8d5133ce9c..d9f586dc6ec 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -36,8 +36,7 @@
 static int cp2101_open(struct tty_struct *, struct usb_serial_port *,
 							struct file *);
 static void cp2101_cleanup(struct usb_serial_port *);
-static void cp2101_close(struct tty_struct *, struct usb_serial_port *,
-							struct file*);
+static void cp2101_close(struct usb_serial_port *);
 static void cp2101_get_termios(struct tty_struct *,
 	struct usb_serial_port *port);
 static void cp2101_get_termios_port(struct usb_serial_port *port,
@@ -398,8 +397,7 @@ static void cp2101_cleanup(struct usb_serial_port *port)
 	}
 }
 
-static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp)
+static void cp2101_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index dd501bb63ed..933ba913e66 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial);
 static void cyberjack_shutdown(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static int cyberjack_write_room(struct tty_struct *tty);
@@ -185,8 +184,7 @@ static int  cyberjack_open(struct tty_struct *tty,
 	return result;
 }
 
-static void cyberjack_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cyberjack_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index e568710b263..669f9384853 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -174,8 +174,8 @@ static int  cypress_ca42v2_startup(struct usb_serial *serial);
 static void cypress_shutdown(struct usb_serial *serial);
 static int  cypress_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void cypress_close(struct usb_serial_port *port);
+static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static void cypress_send(struct usb_serial_port *port);
@@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = {
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = {
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = {
 	.shutdown =			cypress_shutdown,
 	.open =				cypress_open,
 	.close =			cypress_close,
+	.dtr_rts =			cypress_dtr_rts,
 	.write =			cypress_write,
 	.write_room =			cypress_write_room,
 	.ioctl =			cypress_ioctl,
@@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty,
 	priv->rx_flags = 0;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* raise both lines and set termios */
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->line_control = CONTROL_DTR | CONTROL_RTS;
-	priv->cmd_ctrl = 1;
-	spin_unlock_irqrestore(&priv->lock, flags);
+	/* Set termios */
 	result = cypress_write(tty, port, NULL, 0);
 
 	if (result) {
@@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty,
 							__func__, result);
 		cypress_set_dead(port);
 	}
-
+	port->port.drain_delay = 256;
 	return result;
 } /* cypress_open */
 
+static void cypress_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct cypress_private *priv = usb_get_serial_port_data(port);
+	/* drop dtr and rts */
+	priv = usb_get_serial_port_data(port);
+	spin_lock_irq(&priv->lock);
+	if (on == 0)
+		priv->line_control = 0;
+	else 
+		priv->line_control = CONTROL_DTR | CONTROL_RTS;
+	priv->cmd_ctrl = 1;
+	spin_unlock_irq(&priv->lock);
+	cypress_write(NULL, port, NULL, 0);
+}
 
-static void cypress_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void cypress_close(struct usb_serial_port *port)
 {
 	struct cypress_private *priv = usb_get_serial_port_data(port);
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from buffer */
-	spin_lock_irq(&priv->lock);
-	timeout = CYPRESS_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (cypress_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		/* without mutex, allowed due to harmless failure mode */
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irq(&priv->lock);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irq(&priv->lock);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	/* clear out any remaining data in the buffer */
-	cypress_buf_clear(priv->buf);
-	spin_unlock_irq(&priv->lock);
-
 	/* writing is potentially harmful, lock must be taken */
 	mutex_lock(&port->serial->disc_mutex);
 	if (port->serial->disconnected) {
 		mutex_unlock(&port->serial->disc_mutex);
 		return;
 	}
-	/* wait for characters to drain from device */
-	if (tty) {
-		bps = tty_get_baud_rate(tty);
-		if (bps > 1200)
-			timeout = max((HZ * 2560) / bps, HZ / 10);
-		else
-			timeout = 2 * HZ;
-		schedule_timeout_interruptible(timeout);
-	}
-
+	cypress_buf_clear(priv->buf);
 	dbg("%s - stopping urbs", __func__);
 	usb_kill_urb(port->interrupt_in_urb);
 	usb_kill_urb(port->interrupt_out_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop dtr and rts */
-			priv = usb_get_serial_port_data(port);
-			spin_lock_irq(&priv->lock);
-			priv->line_control = 0;
-			priv->cmd_ctrl = 1;
-			spin_unlock_irq(&priv->lock);
-			cypress_write(tty, port, NULL, 0);
-		}
-	}
 
 	if (stats)
 		dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n",
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 38ba4ea8b6b..30f5140eff0 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -422,7 +422,6 @@ struct digi_port {
 	int dp_throttled;
 	int dp_throttle_restart;
 	wait_queue_head_t dp_flush_wait;
-	int dp_in_close;			/* close in progress */
 	wait_queue_head_t dp_close_wait;	/* wait queue for close */
 	struct work_struct dp_wakeup_work;
 	struct usb_serial_port *dp_port;
@@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty);
 static int digi_chars_in_buffer(struct tty_struct *tty);
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	struct file *filp);
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-	struct file *filp);
+static void digi_close(struct usb_serial_port *port);
+static int digi_carrier_raised(struct usb_serial_port *port);
+static void digi_dtr_rts(struct usb_serial_port *port, int on);
 static int digi_startup_device(struct usb_serial *serial);
 static int digi_startup(struct usb_serial *serial);
 static void digi_shutdown(struct usb_serial *serial);
@@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = {
 	.num_ports =			3,
 	.open =				digi_open,
 	.close =			digi_close,
+	.dtr_rts =			digi_dtr_rts,
+	.carrier_raised =		digi_carrier_raised,
 	.write =			digi_write,
 	.write_room =			digi_write_room,
 	.write_bulk_callback = 		digi_write_bulk_callback,
@@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty)
 
 }
 
+static void digi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	/* Adjust DTR and RTS */
+	digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1);
+}
+
+static int digi_carrier_raised(struct usb_serial_port *port)
+{
+	struct digi_port *priv = usb_get_serial_port_data(port);
+	if (priv->dp_modem_signals & TIOCM_CD)
+		return 1;
+	return 0;
+}
 
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 				struct file *filp)
@@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	unsigned char buf[32];
 	struct digi_port *priv = usb_get_serial_port_data(port);
 	struct ktermios not_termios;
-	unsigned long flags = 0;
 
 	dbg("digi_open: TOP: port=%d, open_count=%d",
 		priv->dp_port_num, port->port.count);
@@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 	if (digi_startup_device(port->serial) != 0)
 		return -ENXIO;
 
-	spin_lock_irqsave(&priv->dp_port_lock, flags);
-
-	/* don't wait on a close in progress for non-blocking opens */
-	if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) {
-		spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-		return -EAGAIN;
-	}
-
-	/* wait for a close in progress to finish */
-	while (priv->dp_in_close) {
-		cond_wait_interruptible_timeout_irqrestore(
-			&priv->dp_close_wait, DIGI_RETRY_TIMEOUT,
-			&priv->dp_port_lock, flags);
-		if (signal_pending(current))
-			return -EINTR;
-		spin_lock_irqsave(&priv->dp_port_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-
 	/* read modem signals automatically whenever they change */
 	buf[0] = DIGI_CMD_READ_INPUT_SIGNALS;
 	buf[1] = priv->dp_port_num;
@@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
 		not_termios.c_iflag = ~tty->termios->c_iflag;
 		digi_set_termios(tty, port, &not_termios);
 	}
-
-	/* set DTR and RTS */
-	digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1);
-
 	return 0;
 }
 
 
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void digi_close(struct usb_serial_port *port)
 {
 	DEFINE_WAIT(wait);
 	int ret;
@@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
 	if (port->serial->disconnected)
 		goto exit;
 
-	/* do cleanup only after final close on this port */
-	spin_lock_irq(&priv->dp_port_lock);
-	priv->dp_in_close = 1;
-	spin_unlock_irq(&priv->dp_port_lock);
-
-	/* tell line discipline to process only XON/XOFF */
-	tty->closing = 1;
-
-	/* wait for output to drain */
-	if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-		tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT);
-
-	/* flush driver and line discipline buffers */
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	if (port->serial->dev) {
-		/* wait for transmit idle */
-		if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-			digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
-		/* drop DTR and RTS */
-		digi_set_modem_signals(port, 0, 0);
+		/* FIXME: Transmit idle belongs in the wait_unti_sent path */
+		digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
 
 		/* disable input flow control */
 		buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL;
@@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
 		/* shutdown any outstanding bulk writes */
 		usb_kill_urb(port->write_urb);
 	}
-	tty->closing = 0;
 exit:
 	spin_lock_irq(&priv->dp_port_lock);
 	priv->dp_write_urb_in_use = 0;
-	priv->dp_in_close = 0;
 	wake_up_interruptible(&priv->dp_close_wait);
 	spin_unlock_irq(&priv->dp_port_lock);
 	mutex_unlock(&port->serial->disc_mutex);
@@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial)
 		priv->dp_throttled = 0;
 		priv->dp_throttle_restart = 0;
 		init_waitqueue_head(&priv->dp_flush_wait);
-		priv->dp_in_close = 0;
 		init_waitqueue_head(&priv->dp_close_wait);
 		INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
 		priv->dp_port = serial->port[i];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index c709ec474a8..2b141ccb0cd 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -81,8 +81,7 @@ static int debug;
 /* function prototypes for an empeg-car player */
 static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 						struct file *filp);
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-						struct file *filp);
+static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
 						const unsigned char *buf,
 						int count);
@@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 }
 
 
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static void empeg_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d9fcdaedf38..d9d87111f9a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -719,8 +719,8 @@ static int  ftdi_sio_port_probe(struct usb_serial_port *port);
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
 static int  ftdi_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ftdi_close(struct usb_serial_port *port);
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static int  ftdi_write_room(struct tty_struct *tty);
@@ -758,6 +758,7 @@ static struct usb_serial_driver ftdi_sio_device = {
 	.port_remove =		ftdi_sio_port_remove,
 	.open =			ftdi_open,
 	.close =		ftdi_close,
+	.dtr_rts =		ftdi_dtr_rts,
 	.throttle =		ftdi_throttle,
 	.unthrottle =		ftdi_unthrottle,
 	.write =		ftdi_write,
@@ -1558,6 +1559,30 @@ static int ftdi_open(struct tty_struct *tty,
 } /* ftdi_open */
 
 
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct ftdi_private *priv = usb_get_serial_port_data(port);
+	char buf[1];
+
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* Disable flow control */
+		if (!on && usb_control_msg(port->serial->dev,
+			    usb_sndctrlpipe(port->serial->dev, 0),
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
+			    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
+			    0, priv->interface, buf, 0,
+			    WDR_TIMEOUT) < 0) {
+			    dev_err(&port->dev, "error from flowcontrol urb\n");
+		}
+		/* drop RTS and DTR */
+		if (on)
+			set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+		else
+			clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
 /*
  * usbserial:__serial_close  only calls ftdi_close if the point is open
@@ -1567,31 +1592,12 @@ static int ftdi_open(struct tty_struct *tty,
  *
  */
 
-static void ftdi_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ftdi_close(struct usb_serial_port *port)
 { /* ftdi_close */
-	unsigned int c_cflag = tty->termios->c_cflag;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	char buf[1];
 
 	dbg("%s", __func__);
 
-	mutex_lock(&port->serial->disc_mutex);
-	if (c_cflag & HUPCL && !port->serial->disconnected) {
-		/* Disable flow control */
-		if (usb_control_msg(port->serial->dev,
-				    usb_sndctrlpipe(port->serial->dev, 0),
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-				    0, priv->interface, buf, 0,
-				    WDR_TIMEOUT) < 0) {
-			dev_err(&port->dev, "error from flowcontrol urb\n");
-		}
-
-		/* drop RTS and DTR */
-		clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-	} /* Note change no line if hupcl is off */
-	mutex_unlock(&port->serial->disc_mutex);
 
 	/* cancel any scheduled reading */
 	cancel_delayed_work_sync(&priv->rx_work);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 586d30ff450..ee25a3fe3b0 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty,
 }
 
 
-static void garmin_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void garmin_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 4cec9906ccf..be82ea95672 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial)
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
 
-void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	generic_cleanup(port);
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index fb4a73d090f..53ef5996e33 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb);
 /* function prototypes for the usbserial callbacks */
 static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void edge_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int edge_write_room(struct tty_struct *tty);
@@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty,
 
 	if (!edge_port->txfifo.fifo) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty,
 
 	if (!edge_port->write_urb) {
 		dbg("%s - no memory", __func__);
-		edge_close(tty, port, filp);
+		edge_close(port);
 		return -ENOMEM;
 	}
 
@@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port)
  * edge_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 513b25e044c..eabf20eeb37 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2009,8 +2009,7 @@ release_es_lock:
 	return status;
 }
 
-static void edge_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
 	struct edgeport_serial *edge_serial;
 	struct edgeport_port *edge_port;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index cd62825a9ac..c610a99fa47 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -76,8 +76,7 @@ static int initial_wait;
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
 static void ipaq_shutdown(struct usb_serial *serial);
@@ -714,8 +713,7 @@ error:
 }
 
 
-static void ipaq_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipaq_close(struct usb_serial_port *port)
 {
 	struct ipaq_private	*priv = usb_get_serial_port_data(port);
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index da2a2b46644..29ad038b9c8 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty,
 	return 0;
 }
 
-static void ipw_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void ipw_dtr_rts(struct usb_serial_port *port, int on)
 {
 	struct usb_device *dev = port->serial->dev;
 	int result;
 
-	if (tty_hung_up_p(filp)) {
-		dbg("%s: tty_hung_up_p ...", __func__);
-		return;
-	}
-
 	/*--1: drop the dtr */
 	dbg("%s:dropping dtr", __func__);
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN,
 			 USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRDTR,
+			 on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
 			 0,
 			 NULL,
 			 0,
@@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty,
 	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			 IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
 			 		USB_RECIP_INTERFACE | USB_DIR_OUT,
-			 IPW_PIN_CLRRTS,
+			 on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
 			 0,
 			 NULL,
 			 0,
@@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty,
 	if (result < 0)
 		dev_err(&port->dev,
 				"dropping rts failed (error = %d)\n", result);
+}
 
+static void ipw_close(struct usb_serial_port *port)
+{
+	struct usb_device *dev = port->serial->dev;
+	int result;
 
 	/*--3: purge */
 	dbg("%s:sending purge", __func__);
@@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = {
 	.num_ports =		1,
 	.open =			ipw_open,
 	.close =		ipw_close,
+	.dtr_rts =		ipw_dtr_rts,
 	.port_probe = 		ipw_probe,
 	.port_remove =		ipw_disconnect,
 	.write =		ipw_write,
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 4e2cda93da5..66009b6b763 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -88,8 +88,7 @@ static int xbof = -1;
 static int  ir_startup (struct usb_serial *serial);
 static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filep);
-static void ir_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filep);
+static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static void ir_write_bulk_callback (struct urb *urb);
@@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty,
 	return result;
 }
 
-static void ir_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file * filp)
+static void ir_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 4473d442b2a..bb572cee6ec 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb);
 struct iuu_private {
 	spinlock_t lock;	/* store irq state */
 	wait_queue_head_t delta_msr_wait;
-	u8 line_control;
 	u8 line_status;
 	u8 termios_initialized;
 	int tiostatus;		/* store IUART SIGNAL for tiocmget call */
@@ -946,19 +945,10 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud,
 	return status;
 }
 
-static int set_control_lines(struct usb_device *dev, u8 value)
-{
-	return 0;
-}
-
-static void iuu_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void iuu_close(struct usb_serial_port *port)
 {
 	/* iuu_led (port,255,0,0,0); */
 	struct usb_serial *serial;
-	struct iuu_private *priv = usb_get_serial_port_data(port);
-	unsigned long flags;
-	unsigned int c_cflag;
 
 	serial = port->serial;
 	if (!serial)
@@ -968,17 +958,6 @@ static void iuu_close(struct tty_struct *tty,
 
 	iuu_uart_off(port);
 	if (serial->dev) {
-		if (tty) {
-			c_cflag = tty->termios->c_cflag;
-			if (c_cflag & HUPCL) {
-				/* drop DTR and RTS */
-				priv = usb_get_serial_port_data(port);
-				spin_lock_irqsave(&priv->lock, flags);
-				priv->line_control = 0;
-				spin_unlock_irqrestore(&priv->lock, flags);
-				set_control_lines(port->serial->dev, 0);
-			}
-		}
 		/* free writebuf */
 		/* shutdown our urbs */
 		dbg("%s - shutting down urbs", __func__);
@@ -1154,7 +1133,7 @@ static int iuu_open(struct tty_struct *tty,
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		iuu_close(tty, port, NULL);
+		iuu_close(port);
 		return -EPROTO;
 	} else {
 		dbg("%s - rxcmd OK", __func__);
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 00daa8f7759..f1195a98f31 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb)
 		usb_kill_urb(urb);
 }
 
-static void keyspan_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
+
+	p_priv->rts_state = on;
+	p_priv->dtr_state = on;
+	keyspan_send_setup(port, 0);
+}
+
+static void keyspan_close(struct usb_serial_port *port)
 {
 	int			i;
 	struct usb_serial	*serial = port->serial;
@@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty,
 			stop_urb(p_priv->out_urbs[i]);
 		}
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* download the firmware to a pre-renumeration device */
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 38b4582e073..0d4569b6076 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -38,9 +38,8 @@
 static int  keyspan_open		(struct tty_struct *tty,
 					 struct usb_serial_port *port,
 					 struct file *filp);
-static void keyspan_close		(struct tty_struct *tty,
-					 struct usb_serial_port *port,
-					 struct file *filp);
+static void keyspan_close		(struct usb_serial_port *port);
+static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
 static void keyspan_shutdown		(struct usb_serial *serial);
 static int  keyspan_write_room		(struct tty_struct *tty);
@@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = {
 	.num_ports		= 1,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = {
 	.num_ports		= 2,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
@@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = {
 	.num_ports		= 4,
 	.open			= keyspan_open,
 	.close			= keyspan_close,
+	.dtr_rts		= keyspan_dtr_rts,
 	.write			= keyspan_write,
 	.write_room		= keyspan_write_room,
 	.set_termios		= keyspan_set_termios,
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index bf1ae247da6..ab769dbea1b 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty)
 }
 
 
+static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct usb_serial *serial = port->serial;
+
+	if (serial->dev) {
+		if (on)
+			keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2));
+		else
+			keyspan_pda_set_modem_info(serial, 0);
+	}
+}
+
+static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char modembits;
+
+	/* If we can read the modem status and the DCD is low then
+	   carrier is not raised yet */
+	if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) {
+		if (!(modembits & (1>>6)))
+			return 0;
+	}
+	/* Carrier raised, or we failed (eg disconnected) so
+	   progress accordingly */
+	return 1;
+}
+
+
 static int keyspan_pda_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
 {
@@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty,
 	priv->tx_room = room;
 	priv->tx_throttled = room ? 0 : 1;
 
-	/* the normal serial device seems to always turn on DTR and RTS here,
-	   so do the same */
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2));
-	else
-		keyspan_pda_set_modem_info(serial, 0);
-
 	/*Start reading from the device*/
 	port->interrupt_in_urb->dev = serial->dev;
 	rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
@@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty,
 error:
 	return rc;
 }
-
-
-static void keyspan_pda_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void keyspan_pda_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 
 	if (serial->dev) {
-		/* the normal serial device seems to always shut
-		   off DTR and RTS now */
-		if (tty->termios->c_cflag & HUPCL)
-			keyspan_pda_set_modem_info(serial, 0);
-
 		/* shutdown our bulk reads and writes */
 		usb_kill_urb(port->write_urb);
 		usb_kill_urb(port->interrupt_in_urb);
@@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = {
 	.usb_driver = 		&keyspan_pda_driver,
 	.id_table =		id_table_std,
 	.num_ports =		1,
+	.dtr_rts =		keyspan_pda_dtr_rts,
+	.carrier_raised	=	keyspan_pda_carrier_raised,
 	.open =			keyspan_pda_open,
 	.close =		keyspan_pda_close,
 	.write =		keyspan_pda_write,
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fcd9082f3e7..fa817c66b3e 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -76,8 +76,7 @@ static int  klsi_105_startup(struct usb_serial *serial);
 static void klsi_105_shutdown(struct usb_serial *serial);
 static int  klsi_105_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 static void klsi_105_write_bulk_callback(struct urb *urb);
@@ -447,8 +446,7 @@ exit:
 } /* klsi_105_open */
 
 
-static void klsi_105_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void klsi_105_close(struct usb_serial_port *port)
 {
 	struct klsi_105_private *priv = usb_get_serial_port_data(port);
 	int rc;
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index c148544953b..6b570498287 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -72,8 +72,7 @@ static int  kobil_startup(struct usb_serial *serial);
 static void kobil_shutdown(struct usb_serial *serial);
 static int  kobil_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp);
+static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
 			 const unsigned char *buf, int count);
 static int  kobil_write_room(struct tty_struct *tty);
@@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial)
 
 	for (i = 0; i < serial->num_ports; ++i) {
 		while (serial->port[i]->port.count > 0)
-			kobil_close(NULL, serial->port[i], NULL);
+			kobil_close(serial->port[i]);
 		kfree(usb_get_serial_port_data(serial->port[i]));
 		usb_set_serial_port_data(serial->port[i], NULL);
 	}
@@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty,
 }
 
 
-static void kobil_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void kobil_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
+	/* FIXME: Add rts/dtr methods */
 	if (port->write_urb) {
 		usb_kill_urb(port->write_urb);
 		usb_free_urb(port->write_urb);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 82930a7d509..873795548fc 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -95,8 +95,8 @@ static int  mct_u232_startup(struct usb_serial *serial);
 static void mct_u232_shutdown(struct usb_serial *serial);
 static int  mct_u232_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void mct_u232_close(struct usb_serial_port *port);
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
 static void mct_u232_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
@@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = {
 	.num_ports =	     1,
 	.open =		     mct_u232_open,
 	.close =	     mct_u232_close,
+	.dtr_rts =	     mct_u232_dtr_rts,
 	.throttle =	     mct_u232_throttle,
 	.unthrottle =	     mct_u232_unthrottle,
 	.read_int_callback = mct_u232_read_int_callback,
@@ -496,29 +497,29 @@ error:
 	return retval;
 } /* mct_u232_open */
 
-
-static void mct_u232_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on)
 {
-	unsigned int c_cflag;
 	unsigned int control_state;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
-	dbg("%s port %d", __func__, port->number);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		mutex_lock(&port->serial->disc_mutex);
-		if (c_cflag & HUPCL && !port->serial->disconnected) {
-			/* drop DTR and RTS */
-			spin_lock_irq(&priv->lock);
+	mutex_lock(&port->serial->disc_mutex);
+	if (!port->serial->disconnected) {
+		/* drop DTR and RTS */
+		spin_lock_irq(&priv->lock);
+		if (on)
+			priv->control_state |= TIOCM_DTR | TIOCM_RTS;
+		else
 			priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS);
-			control_state = priv->control_state;
-			spin_unlock_irq(&priv->lock);
-			mct_u232_set_modem_ctrl(port->serial, control_state);
-		}
-		mutex_unlock(&port->serial->disc_mutex);
+		control_state = priv->control_state;
+		spin_unlock_irq(&priv->lock);
+		mct_u232_set_modem_ctrl(port->serial, control_state);
 	}
+	mutex_unlock(&port->serial->disc_mutex);
+}
 
+static void mct_u232_close(struct usb_serial_port *port)
+{
+	dbg("%s port %d", __func__, port->number);
 
 	if (port->serial->dev) {
 		/* shutdown our urbs */
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 24e3b5d4b4d..9e1a013ee7f 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty)
 	return chars;
 }
 
-static void mos7720_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7720_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7720_port;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 84fb1dcd30d..10b78a37214 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty)
 
 }
 
-/************************************************************************
- *
- * mos7840_block_until_tx_empty
- *
- *	This function will block the close until one of the following:
- *		1. TX count are 0
- *		2. The mos7840 has stopped
- *		3. A timeout of 3 seconds without activity has expired
- *
- ************************************************************************/
-static void mos7840_block_until_tx_empty(struct tty_struct *tty,
-				struct moschip_port *mos7840_port)
-{
-	int timeout = HZ / 10;
-	int wait = 30;
-	int count;
-
-	while (1) {
-
-		count = mos7840_chars_in_buffer(tty);
-
-		/* Check for Buffer status */
-		if (count <= 0)
-			return;
-
-		/* Block the thread for a while */
-		interruptible_sleep_on_timeout(&mos7840_port->wait_chase,
-					       timeout);
-
-		/* No activity.. count down section */
-		wait--;
-		if (wait == 0) {
-			dbg("%s - TIMEOUT", __func__);
-			return;
-		} else {
-			/* Reset timeout value back to seconds */
-			wait = 30;
-		}
-	}
-}
-
 /*****************************************************************************
  * mos7840_close
  *	this function is called by the tty driver when a port is closed
  *****************************************************************************/
 
-static void mos7840_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void mos7840_close(struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct moschip_port *mos7840_port;
@@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty,
 		}
 	}
 
-	if (serial->dev)
-		/* flush and block until tx is empty */
-		mos7840_block_until_tx_empty(tty, mos7840_port);
-
 	/* While closing port, shutdown all bulk read, write  *
 	 * and interrupt read if they exists                  */
 	if (serial->dev) {
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index bcdcbb82270..f5f3751a888 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty,
 	return result;
 }
 
-static void navman_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void navman_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index df653971272..1104617334f 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -66,8 +66,7 @@ static int debug;
 /* function prototypes */
 static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty,
 	return result;
 }
 
-static void omninet_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void omninet_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
 	usb_kill_urb(port->read_urb);
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index b500ad10b75..c20480aa975 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port,
 	return result;
 }
 
-static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void opticon_close(struct usb_serial_port *port)
 {
 	struct opticon_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7817b82889c..a16d69fadba 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,9 @@
 /* Function prototypes */
 static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
 							struct file *filp);
-static void option_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static void option_close(struct usb_serial_port *port);
+static void option_dtr_rts(struct usb_serial_port *port, int on);
+
 static int  option_startup(struct usb_serial *serial);
 static void option_shutdown(struct usb_serial *serial);
 static int  option_write_room(struct tty_struct *tty);
@@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty,
 static int  option_tiocmget(struct tty_struct *tty, struct file *file);
 static int  option_tiocmset(struct tty_struct *tty, struct file *file,
 				unsigned int set, unsigned int clear);
-static int  option_send_setup(struct tty_struct *tty, struct usb_serial_port *port);
+static int  option_send_setup(struct usb_serial_port *port);
 static int  option_suspend(struct usb_serial *serial, pm_message_t message);
 static int  option_resume(struct usb_serial *serial);
 
@@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = {
 	.num_ports         = 1,
 	.open              = option_open,
 	.close             = option_close,
+	.dtr_rts	   = option_dtr_rts,
 	.write             = option_write,
 	.write_room        = option_write_room,
 	.chars_in_buffer   = option_chars_in_buffer,
@@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty,
 	dbg("%s", __func__);
 	/* Doesn't support option setting */
 	tty_termios_copy_hw(tty->termios, old_termios);
-	option_send_setup(tty, port);
+	option_send_setup(port);
 }
 
 static int option_tiocmget(struct tty_struct *tty, struct file *file)
@@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file,
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return option_send_setup(tty, port);
+	return option_send_setup(port);
 }
 
 /* Write */
@@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty,
 
 	dbg("%s", __func__);
 
-	/* Set some sane defaults */
-	portdata->rts_state = 1;
-	portdata->dtr_state = 1;
-
 	/* Reset low level data toggle and start reading from endpoints */
 	for (i = 0; i < N_IN_URB; i++) {
 		urb = portdata->in_urbs[i];
@@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty,
 				usb_pipeout(urb->pipe), 0); */
 	}
 
-	option_send_setup(tty, port);
+	option_send_setup(port);
 
 	return 0;
 }
 
-static void option_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void option_dtr_rts(struct usb_serial_port *port, int on)
 {
-	int i;
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
 
 	dbg("%s", __func__);
 	portdata = usb_get_serial_port_data(port);
+	mutex_lock(&serial->disc_mutex);
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
+	if (serial->dev)
+		option_send_setup(port);
+	mutex_unlock(&serial->disc_mutex);
+}
 
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
 
-	if (serial->dev) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			option_send_setup(tty, port);
-		mutex_unlock(&serial->disc_mutex);
+static void option_close(struct usb_serial_port *port)
+{
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct option_port_private *portdata;
+
+	dbg("%s", __func__);
+	portdata = usb_get_serial_port_data(port);
 
+	if (serial->dev) {
 		/* Stop reading/writing urbs */
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 		for (i = 0; i < N_OUT_URB; i++)
 			usb_kill_urb(portdata->out_urbs[i]);
 	}
-	tty_port_tty_set(&port->port, NULL);
 }
 
 /* Helper functions used by option_setup_urbs */
@@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial)
  * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
  * CDC.
 */
-static int option_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int option_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct option_port_private *portdata;
 	int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
+	int val = 0;
 	dbg("%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
 
-		return usb_control_msg(serial->dev,
-			usb_rcvctrlpipe(serial->dev, 0),
-			0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	}
-	return 0;
+	return usb_control_msg(serial->dev,
+		usb_rcvctrlpipe(serial->dev, 0),
+		0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
 }
 
 static int option_startup(struct usb_serial *serial)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index ba551f00f16..7de54781fe6 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -143,8 +143,7 @@ struct oti6858_control_pkt {
 /* function prototypes */
 static int oti6858_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
@@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty,
 	if (result != 0) {
 		dev_err(&port->dev, "%s(): usb_submit_urb() failed"
 			       " with error %d\n", __func__, result);
-		oti6858_close(tty, port, NULL);
+		oti6858_close(port);
 		return -EPROTO;
 	}
 
 	/* setup termios */
 	if (tty)
 		oti6858_set_termios(tty, port, &tmp_termios);
-
+	port->port.drain_delay = 256;	/* FIXME: check the FIFO length */
 	return 0;
 }
 
-static void oti6858_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void oti6858_close(struct usb_serial_port *port)
 {
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s(port = %d)", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = 30 * HZ;	/* PL2303_CLOSING_WAIT */
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): entering wait loop", __func__);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (oti6858_buf_data_avail(priv->buf) == 0
-		|| timeout == 0 || signal_pending(current)
-		|| port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-	dbg("%s(): after wait loop", __func__);
-
 	/* clear out any remaining data in the buffer */
 	oti6858_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	/* FIXME
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps,HZ/10);
-	else
-	*/
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-	dbg("%s(): after schedule_timeout_interruptible()", __func__);
+	dbg("%s(): after buf_clear()", __func__);
 
 	/* cancel scheduled setup */
 	cancel_delayed_work(&priv->delayed_setup_work);
@@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty,
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
-
-	/*
-	if (tty && (tty->termios->c_cflag) & HUPCL) {
-		// drop DTR and RTS
-		spin_lock_irqsave(&priv->lock, flags);
-		priv->pending_setup.control &= ~CONTROL_MASK;
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
-	*/
 }
 
 static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 751a533a434..e02dc3d643c 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty,
 	kfree(buf);
 }
 
-static void pl2303_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void pl2303_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	/* Change DTR and RTS */
+	if (on)
+		priv->line_control |= (CONTROL_DTR | CONTROL_RTS);
+	else
+		priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	set_control_lines(port->serial->dev, control);
+}
+
+static void pl2303_close(struct usb_serial_port *port)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = PL2303_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (pl2303_buf_data_avail(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current) ||
-		    port->serial->disconnected)
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
 	/* clear out any remaining data in the buffer */
 	pl2303_buf_clear(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device */
-	/* (this is long enough for the entire 256 byte */
-	/* pl2303 hardware buffer to drain with no flow */
-	/* control for data rates of 1200 bps or more, */
-	/* for lower rates we should really know how much */
-	/* data is in the buffer to compute a delay */
-	/* that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560)/bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	schedule_timeout_interruptible(timeout);
-
 	/* shutdown our urbs */
 	dbg("%s - shutting down urbs", __func__);
 	usb_kill_urb(port->write_urb);
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->interrupt_in_urb);
 
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			/* drop DTR and RTS */
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			set_control_lines(port->serial->dev, 0);
-		}
-	}
 }
 
 static int pl2303_open(struct tty_struct *tty,
@@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty,
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting read urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
 
@@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty,
 	if (result) {
 		dev_err(&port->dev, "%s - failed submitting interrupt urb,"
 			" error %d\n", __func__, result);
-		pl2303_close(tty, port, NULL);
+		pl2303_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file)
 	return result;
 }
 
+static int pl2303_carrier_raised(struct usb_serial_port *port)
+{
+	struct pl2303_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & UART_DCD)
+		return 1;
+	return 0;
+}
+
 static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 {
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
@@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = {
 	.num_ports =		1,
 	.open =			pl2303_open,
 	.close =		pl2303_close,
+	.dtr_rts = 		pl2303_dtr_rts,
+	.carrier_raised =	pl2303_carrier_raised,
 	.write =		pl2303_write,
 	.ioctl =		pl2303_ioctl,
 	.break_ctl =		pl2303_break_ctl,
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 913225c6161..1319b8968d8 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -240,57 +240,39 @@ struct sierra_port_private {
 	int ri_state;
 };
 
-static int sierra_send_setup(struct tty_struct *tty,
-						struct usb_serial_port *port)
+static int sierra_send_setup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 	__u16 interface = 0;
+	int val = 0;
 
 	dev_dbg(&port->dev, "%s", __func__);
 
 	portdata = usb_get_serial_port_data(port);
 
-	if (tty) {
-		int val = 0;
-		if (portdata->dtr_state)
-			val |= 0x01;
-		if (portdata->rts_state)
-			val |= 0x02;
-
-		/* If composite device then properly report interface */
-		if (serial->num_ports == 1) {
-			interface = sierra_calc_interface(serial);
-
-			/* Control message is sent only to interfaces with
-			 * interrupt_in endpoints
-			 */
-			if (port->interrupt_in_urb) {
-				/* send control message */
-				return usb_control_msg(serial->dev,
-					usb_rcvctrlpipe(serial->dev, 0),
-					0x22, 0x21, val, interface,
-					NULL, 0, USB_CTRL_SET_TIMEOUT);
-			}
-		}
-
-		/* Otherwise the need to do non-composite mapping */
-		else {
-			if (port->bulk_out_endpointAddress == 2)
-				interface = 0;
-			else if (port->bulk_out_endpointAddress == 4)
-				interface = 1;
-			else if (port->bulk_out_endpointAddress == 5)
-				interface = 2;
-
-			return usb_control_msg(serial->dev,
-				usb_rcvctrlpipe(serial->dev, 0),
-				0x22, 0x21, val, interface,
-				NULL, 0, USB_CTRL_SET_TIMEOUT);
-
-		}
+	if (portdata->dtr_state)
+		val |= 0x01;
+	if (portdata->rts_state)
+		val |= 0x02;
+
+	/* If composite device then properly report interface */
+	if (serial->num_ports == 1)
+		interface = sierra_calc_interface(serial);
+
+	/* Otherwise the need to do non-composite mapping */
+	else {
+		if (port->bulk_out_endpointAddress == 2)
+			interface = 0;
+		else if (port->bulk_out_endpointAddress == 4)
+			interface = 1;
+		else if (port->bulk_out_endpointAddress == 5)
+			interface = 2;
 	}
-
+	return usb_control_msg(serial->dev,
+			usb_rcvctrlpipe(serial->dev, 0),
+			0x22, 0x21, val, interface,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
 	return 0;
 }
 
@@ -299,7 +281,7 @@ static void sierra_set_termios(struct tty_struct *tty,
 {
 	dev_dbg(&port->dev, "%s", __func__);
 	tty_termios_copy_hw(tty->termios, old_termios);
-	sierra_send_setup(tty, port);
+	sierra_send_setup(port);
 }
 
 static int sierra_tiocmget(struct tty_struct *tty, struct file *file)
@@ -338,7 +320,7 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file,
 		portdata->rts_state = 0;
 	if (clear & TIOCM_DTR)
 		portdata->dtr_state = 0;
-	return sierra_send_setup(tty, port);
+	return sierra_send_setup(port);
 }
 
 static void sierra_outdat_callback(struct urb *urb)
@@ -598,7 +580,7 @@ static int sierra_open(struct tty_struct *tty,
 		}
 	}
 
-	sierra_send_setup(tty, port);
+	sierra_send_setup(port);
 
 	/* start up the interrupt endpoint if we have one */
 	if (port->interrupt_in_urb) {
@@ -610,32 +592,38 @@ static int sierra_open(struct tty_struct *tty,
 	return 0;
 }
 
-static void sierra_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void sierra_dtr_rts(struct usb_serial_port *port, int on)
 {
-	int i;
 	struct usb_serial *serial = port->serial;
 	struct sierra_port_private *portdata;
 
-	dev_dbg(&port->dev, "%s", __func__);
 	portdata = usb_get_serial_port_data(port);
-
-	portdata->rts_state = 0;
-	portdata->dtr_state = 0;
+	portdata->rts_state = on;
+	portdata->dtr_state = on;
 
 	if (serial->dev) {
 		mutex_lock(&serial->disc_mutex);
 		if (!serial->disconnected)
-			sierra_send_setup(tty, port);
+			sierra_send_setup(port);
 		mutex_unlock(&serial->disc_mutex);
+	}
+}
+
+static void sierra_close(struct usb_serial_port *port)
+{
+	int i;
+	struct usb_serial *serial = port->serial;
+	struct sierra_port_private *portdata;
 
+	dev_dbg(&port->dev, "%s", __func__);
+	portdata = usb_get_serial_port_data(port);
+
+	if (serial->dev) {
 		/* Stop reading/writing urbs */
 		for (i = 0; i < N_IN_URB; i++)
 			usb_kill_urb(portdata->in_urbs[i]);
 	}
-
 	usb_kill_urb(port->interrupt_in_urb);
-	tty_port_tty_set(&port->port, NULL);
 }
 
 static int sierra_startup(struct usb_serial *serial)
@@ -737,6 +725,7 @@ static struct usb_serial_driver sierra_device = {
 	.probe		   = sierra_probe,
 	.open              = sierra_open,
 	.close             = sierra_close,
+	.dtr_rts	   = sierra_dtr_rts,
 	.write             = sierra_write,
 	.write_room        = sierra_write_room,
 	.set_termios       = sierra_set_termios,
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 5e7528cc81a..8f7ed8f1399 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value,
 			"RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret);
 }
 
+static int spcp8x5_carrier_raised(struct usb_serial_port *port)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	if (priv->line_status & MSR_STATUS_LINE_DCD)
+		return 1;
+	return 0;
+}
+
+static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on)
+{
+	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+	unsigned long flags;
+	u8 control;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (on)
+		priv->line_control = MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS;
+	else
+		priv->line_control &= ~ (MCR_CONTROL_LINE_DTR
+						| MCR_CONTROL_LINE_RTS);
+	control = priv->line_control;
+	spin_unlock_irqrestore(&priv->lock, flags);
+	spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type);
+}
+
 /* close the serial port. We should wait for data sending to device 1st and
  * then kill all urb. */
-static void spcp8x5_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void spcp8x5_close(struct usb_serial_port *port)
 {
 	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
 	unsigned long flags;
-	unsigned int c_cflag;
-	int bps;
-	long timeout;
-	wait_queue_t wait;
 	int result;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* wait for data to drain from the buffer */
 	spin_lock_irqsave(&priv->lock, flags);
-	timeout = SPCP8x5_CLOSING_WAIT;
-	init_waitqueue_entry(&wait, current);
-	add_wait_queue(&tty->write_wait, &wait);
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (ringbuf_avail_data(priv->buf) == 0 ||
-		    timeout == 0 || signal_pending(current))
-			break;
-		spin_unlock_irqrestore(&priv->lock, flags);
-		timeout = schedule_timeout(timeout);
-		spin_lock_irqsave(&priv->lock, flags);
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&tty->write_wait, &wait);
-
 	/* clear out any remaining data in the buffer */
 	clear_ringbuf(priv->buf);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* wait for characters to drain from the device (this is long enough
-	 * for the entire all byte spcp8x5 hardware buffer to drain with no
-	 * flow control for data rates of 1200 bps or more, for lower rates we
-	 * should really know how much data is in the buffer to compute a delay
-	 * that is not unnecessarily long) */
-	bps = tty_get_baud_rate(tty);
-	if (bps > 1200)
-		timeout = max((HZ*2560) / bps, HZ/10);
-	else
-		timeout = 2*HZ;
-	set_current_state(TASK_INTERRUPTIBLE);
-	schedule_timeout(timeout);
-
-	/* clear control lines */
-	if (tty) {
-		c_cflag = tty->termios->c_cflag;
-		if (c_cflag & HUPCL) {
-			spin_lock_irqsave(&priv->lock, flags);
-			priv->line_control = 0;
-			spin_unlock_irqrestore(&priv->lock, flags);
-			spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type);
-		}
-	}
-
 	/* kill urb */
 	if (port->write_urb != NULL) {
 		result = usb_unlink_urb(port->write_urb);
@@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty,
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (tty && (tty->termios->c_cflag & CBAUD))
-		priv->line_control = MCR_DTR | MCR_RTS;
-	else
-		priv->line_control = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type);
 
 	/* Setup termios */
@@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty,
 	port->read_urb->dev = serial->dev;
 	ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
 	if (ret) {
-		spcp8x5_close(tty, port, NULL);
+		spcp8x5_close(port);
 		return -EPROTO;
 	}
+	port->port.drain_delay = 256;
 	return 0;
 }
 
@@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = {
 	.num_ports		= 1,
 	.open 			= spcp8x5_open,
 	.close 			= spcp8x5_close,
+	.dtr_rts		= spcp8x5_dtr_rts,
+	.carrier_raised		= spcp8x5_carrier_raised,
 	.write 			= spcp8x5_write,
 	.set_termios 		= spcp8x5_set_termios,
 	.ioctl 			= spcp8x5_ioctl,
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 69879e43794..8b07ebc6bae 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port,
 	return result;
 }
 
-static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port,
-			  struct file *filp)
+static void symbol_close(struct usb_serial_port *port)
 {
 	struct symbol_private *priv = usb_get_serial_data(port->serial);
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 0a64bac306e..42cb04c403b 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial);
 static void ti_shutdown(struct usb_serial *serial);
 static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
 		struct file *file);
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-		struct file *file);
+static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
 		const unsigned char *data, int count);
 static int ti_write_room(struct tty_struct *tty);
@@ -647,8 +646,7 @@ release_lock:
 }
 
 
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *file)
+static void ti_close(struct usb_serial_port *port)
 {
 	struct ti_device *tdev;
 	struct ti_port *tport;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f331e2bde88..1967a7edc10 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp)
 			goto bailout_interface_put;
 		mutex_unlock(&serial->disc_mutex);
 	}
-
 	mutex_unlock(&port->mutex);
-	return 0;
+	/* Now do the correct tty layer semantics */
+	retval = tty_port_block_til_ready(&port->port, tty, filp);
+	if (retval == 0)
+		return 0;
 
 bailout_interface_put:
 	usb_autopm_put_interface(serial->interface);
@@ -259,64 +261,89 @@ bailout_serial_put:
 	return retval;
 }
 
-static void serial_close(struct tty_struct *tty, struct file *filp)
+/**
+ *	serial_do_down		-	shut down hardware
+ *	@port: port to shut down
+ *
+ *	Shut down a USB port unless it is the console. We never shut down the
+ *	console hardware as it will always be in use.
+ *
+ *	Don't free any resources at this point
+ */
+static void serial_do_down(struct usb_serial_port *port)
 {
-	struct usb_serial_port *port = tty->driver_data;
+	struct usb_serial_driver *drv = port->serial->type;
 	struct usb_serial *serial;
 	struct module *owner;
-	int count;
 
-	if (!port)
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
 		return;
 
-	dbg("%s - port %d", __func__, port->number);
-
 	mutex_lock(&port->mutex);
 	serial = port->serial;
 	owner = serial->type->driver.owner;
 
-	if (port->port.count == 0) {
-		mutex_unlock(&port->mutex);
-		return;
-	}
-
-	if (port->port.count == 1)
-		/* only call the device specific close if this
-		 * port is being closed by the last owner. Ensure we do
-		 * this before we drop the port count. The call is protected
-		 * by the port mutex
-		 */
-		serial->type->close(tty, port, filp);
-
-	if (port->port.count == (port->console ? 2 : 1)) {
-		struct tty_struct *tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			/* We must do this before we drop the port count to
-			   zero. */
-			if (tty->driver_data)
-				tty->driver_data = NULL;
-			tty_port_tty_set(&port->port, NULL);
-			tty_kref_put(tty);
-		}
-	}
+	if (drv->close)
+		drv->close(port);
 
-	--port->port.count;
-	count = port->port.count;
 	mutex_unlock(&port->mutex);
-	put_device(&port->dev);
+}
+
+/**
+ *	serial_do_free		-	free resources post close/hangup
+ *	@port: port to free up
+ *
+ *	Do the resource freeing and refcount dropping for the port. We must
+ *	be careful about ordering and we must avoid freeing up the console.
+ */
 
+static void serial_do_free(struct usb_serial_port *port)
+{
+	struct usb_serial *serial;
+	struct module *owner;
+
+	/* The console is magical, do not hang up the console hardware
+	   or there will be tears */
+	if (port->console)
+		return;
+
+	serial = port->serial;
+	owner = serial->type->driver.owner;
+	put_device(&port->dev);
 	/* Mustn't dereference port any more */
-	if (count == 0) {
-		mutex_lock(&serial->disc_mutex);
-		if (!serial->disconnected)
-			usb_autopm_put_interface(serial->interface);
-		mutex_unlock(&serial->disc_mutex);
-	}
+	mutex_lock(&serial->disc_mutex);
+	if (!serial->disconnected)
+		usb_autopm_put_interface(serial->interface);
+	mutex_unlock(&serial->disc_mutex);
 	usb_serial_put(serial);
-
 	/* Mustn't dereference serial any more */
-	if (count == 0)
-		module_put(owner);
+	module_put(owner);
+}
+
+static void serial_close(struct tty_struct *tty, struct file *filp)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	dbg("%s - port %d", __func__, port->number);
+
+
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+
+	serial_do_down(port);		
+	tty_port_close_end(&port->port, tty);
+	tty_port_tty_set(&port->port, NULL);
+	serial_do_free(port);
+}
+
+static void serial_hangup(struct tty_struct *tty)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	serial_do_down(port);
+	tty_port_hangup(&port->port);
+	serial_do_free(port);
 }
 
 static int serial_write(struct tty_struct *tty, const unsigned char *buf,
@@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device(
 	return NULL;
 }
 
+static int serial_carrier_raised(struct tty_port *port)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->carrier_raised)
+		return drv->carrier_raised(p);
+	/* No carrier control - don't block */
+	return 1;	
+}
+
+static void serial_dtr_rts(struct tty_port *port, int on)
+{
+	struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+	struct usb_serial_driver *drv = p->serial->type;
+	if (drv->dtr_rts)
+		drv->dtr_rts(p, on);
+}
+
+static const struct tty_port_operations serial_port_ops = {
+	.carrier_raised = serial_carrier_raised,
+	.dtr_rts = serial_dtr_rts,
+};
+
 int usb_serial_probe(struct usb_interface *interface,
 			       const struct usb_device_id *id)
 {
@@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface,
 		if (!port)
 			goto probe_error;
 		tty_port_init(&port->port);
+		port->port.ops = &serial_port_ops;
 		port->serial = serial;
 		spin_lock_init(&port->lock);
 		mutex_init(&port->mutex);
@@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface)
 		if (port) {
 			struct tty_struct *tty = tty_port_tty_get(&port->port);
 			if (tty) {
+				/* The hangup will occur asynchronously but
+				   the object refcounts will sort out all the
+				   cleanup */
 				tty_hangup(tty);
 				tty_kref_put(tty);
 			}
@@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = {
 	.open =			serial_open,
 	.close =		serial_close,
 	.write =		serial_write,
+	.hangup = 		serial_hangup,
 	.write_room =		serial_write_room,
 	.ioctl =		serial_ioctl,
 	.set_termios =		serial_set_termios,
@@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = {
 	.proc_fops =		&serial_proc_fops,
 };
 
+
 struct tty_driver *usb_serial_tty_driver;
 
 static int __init usb_serial_init(void)
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 5ac414bda71..b15f1c0e1d4 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -38,8 +38,7 @@
 /* function prototypes for a handspring visor */
 static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
 					struct file *filp);
-static void visor_close(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
 static int  visor_write_room(struct tty_struct *tty);
@@ -324,8 +323,7 @@ exit:
 }
 
 
-static void visor_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void visor_close(struct usb_serial_port *port)
 {
 	struct visor_private *priv = usb_get_serial_port_data(port);
 	unsigned char *transfer_buffer;
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 5335d3211c0..7c7295d09f3 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -147,8 +147,7 @@ static int  whiteheat_attach(struct usb_serial *serial);
 static void whiteheat_shutdown(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
 			struct usb_serial_port *port,
 			const unsigned char *buf, int count);
@@ -712,8 +711,7 @@ exit:
 }
 
 
-static void whiteheat_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static void whiteheat_close(struct usb_serial_port *port)
 {
 	struct whiteheat_private *info = usb_get_serial_port_data(port);
 	struct whiteheat_urb_wrap *wrap;
@@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	mutex_lock(&port->serial->disc_mutex);
-	/* filp is NULL when called from usb_serial_disconnect */
-	if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) {
-		mutex_unlock(&port->serial->disc_mutex);
-		return;
-	}
-	mutex_unlock(&port->serial->disc_mutex);
-
-	tty->closing = 1;
-
-/*
- * Not currently in use; tty_wait_until_sent() calls
- * serial_chars_in_buffer() which deadlocks on the second semaphore
- * acquisition. This should be fixed at some point. Greg's been
- * notified.
-	if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) {
-		tty_wait_until_sent(tty, CLOSING_DELAY);
-	}
-*/
-
-	tty_driver_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
 	firm_report_tx_done(port);
-
 	firm_close(port);
 
 	/* shutdown our bulk reads and writes */
@@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty,
 	}
 	spin_unlock_irq(&info->lock);
 	mutex_unlock(&info->deathwarrant);
-
 	stop_command_port(port->serial);
-
-	tty->closing = 0;
 }
 
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 625e9e4639c..8cdfed738fe 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -224,8 +224,7 @@ struct usb_serial_driver {
 	/* Called by console with tty = NULL and by tty */
 	int  (*open)(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
-	void (*close)(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	void (*close)(struct usb_serial_port *port);
 	int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 	/* Called only by the tty layer */
@@ -241,6 +240,10 @@ struct usb_serial_driver {
 	int  (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int  (*tiocmset)(struct tty_struct *tty, struct file *file,
 			 unsigned int set, unsigned int clear);
+	/* Called by the tty layer for port level work. There may or may not
+	   be an attached tty at this point */
+	void (*dtr_rts)(struct usb_serial_port *port, int on);
+	int  (*carrier_raised)(struct usb_serial_port *port);
 	/* USB events */
 	void (*read_int_callback)(struct urb *urb);
 	void (*write_int_callback)(struct urb *urb);
@@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty,
 		struct usb_serial_port *port, struct file *filp);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
-extern void usb_serial_generic_close(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+extern void usb_serial_generic_close(struct usb_serial_port *port);
 extern int usb_serial_generic_resume(struct usb_serial *serial);
 extern int usb_serial_generic_write_room(struct tty_struct *tty);
 extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
-- 
cgit v1.2.3-70-g09d2


From 97e87f8ebe978e881c7325ba490574bd5500b133 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:29:27 +0100
Subject: tty: cyclades, plx9060 casts cleanup

Remove ugly all-over-the-code casts of ctl_addr to 9060 space.
Add an union to the cyclades_card structure, which contains
a pointer to both 9050 and 9060 spaces.

The 9050 space layout is unknown, so let it still as a void
__iomem pointer.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/cyclades.c  | 65 ++++++++++++++++++++++++------------------------
 include/linux/cyclades.h | 23 +++++++++--------
 2 files changed, 46 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index ccf68a9e24b..2cbf74134f1 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -666,12 +666,10 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
 
 #define Z_FPGA_CHECK(card) \
-	((readl(&((struct RUNTIME_9060 __iomem *) \
-		((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
+	((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0)
 
-#define ISZLOADED(card)	(((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
-			((card).ctl_addr))->mail_box_0)) || \
-			Z_FPGA_CHECK(card)) && \
+#define ISZLOADED(card)	(((ZO_V1 == readl(&(card).ctl_addr.p9060->mail_box_0)) \
+			|| Z_FPGA_CHECK(card)) && \
 			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
 			((card).base_addr+ID_ADDRESS))->signature)))
 
@@ -1400,14 +1398,12 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
-	loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *)
-				  (cinfo->ctl_addr))->loc_doorbell);
+	loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
 	if (loc_doorbell) {
 		*cmd = (char)(0xff & loc_doorbell);
 		*channel = readl(&board_ctrl->fwcmd_channel);
 		*param = (__u32) readl(&board_ctrl->fwcmd_param);
-		cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			  loc_doorbell, 0xffffffff);
+		cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
 		return 1;
 	}
 	return 0;
@@ -1431,8 +1427,7 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
 	index = 0;
-	pci_doorbell =
-	    &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell;
+	pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
 	while ((readl(pci_doorbell) & 0xff) != 0) {
 		if (index++ == 1000)
 			return (int)(readl(pci_doorbell) & 0xff);
@@ -1635,8 +1630,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
-	hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-			mail_box_0);
+	hw_ver = readl(&cinfo->ctl_addr.p9060->mail_box_0);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
 		special_count = 0;
@@ -2394,8 +2388,8 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
 		if (!ISZLOADED(*cinfo)) {
-			if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *)
-					 (cinfo->ctl_addr))->mail_box_0)) &&
+			if (((ZE_V1 == readl(&cinfo->ctl_addr.p9060->
+							mail_box_0)) &&
 					Z_FPGA_CHECK(*cinfo)) &&
 					(ZFIRM_HLT == readl(
 						&firm_id->signature))) {
@@ -2417,6 +2411,7 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 			if (!cinfo->intr_enabled) {
 				struct ZFW_CTRL __iomem *zfw_ctrl;
 				struct BOARD_CTRL __iomem *board_ctrl;
+				u16 intr;
 
 				zfw_ctrl = cinfo->base_addr +
 					(readl(&firm_id->zfwctrl_addr) &
@@ -2425,8 +2420,10 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 				board_ctrl = &zfw_ctrl->board_ctrl;
 
 				/* Enable interrupts on the PLX chip */
-				cy_writew(cinfo->ctl_addr + 0x68,
-					readw(cinfo->ctl_addr + 0x68) | 0x0900);
+				intr = readw(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat) | 0x0900;
+				cy_writew(&cinfo->ctl_addr.p9060->
+						intr_ctrl_stat, intr);
 				/* Enable interrupts on the FW */
 				retval = cyz_issue_cmd(cinfo, 0,
 						C_CM_IRQ_ENBL, 0L);
@@ -4347,8 +4344,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 	spin_lock_init(&cinfo->card_lock);
 
 	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
-		mailbox = readl(&((struct RUNTIME_9060 __iomem *)
-				     cinfo->ctl_addr)->mail_box_0);
+		mailbox = readl(&cinfo->ctl_addr.p9060->mail_box_0);
 		nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
 		cinfo->intr_enabled = 0;
 		cinfo->nports = 0;	/* Will be correctly set later, after
@@ -4613,7 +4609,7 @@ static int __init cy_detect_isa(void)
 
 		/* set cy_card */
 		cy_card[j].base_addr = cy_isa_address;
-		cy_card[j].ctl_addr = NULL;
+		cy_card[j].ctl_addr.p9050 = NULL;
 		cy_card[j].irq = (int)cy_isa_irq;
 		cy_card[j].bus_index = 0;
 		cy_card[j].first_line = cy_next_channel;
@@ -5013,7 +5009,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 
 		/* Disable interrupts on the PLX before resetting it */
-		cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900);
+		cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) & ~0x0900);
 
 		plx_init(pdev, irq, addr0);
 
@@ -5109,7 +5106,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 
 	/* set cy_card */
 	cy_card[card_no].base_addr = addr2;
-	cy_card[card_no].ctl_addr = addr0;
+	cy_card[card_no].ctl_addr.p9050 = addr0;
 	cy_card[card_no].irq = irq;
 	cy_card[card_no].bus_index = 1;
 	cy_card[card_no].first_line = cy_next_channel;
@@ -5125,17 +5122,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		plx_ver = readb(addr2 + CyPLX_VER) & 0x0f;
 		switch (plx_ver) {
 		case PLX_9050:
-
 			cy_writeb(addr0 + 0x4c, 0x43);
 			break;
 
 		case PLX_9060:
 		case PLX_9080:
 		default:	/* Old boards, use PLX_9060 */
-			plx_init(pdev, irq, addr0);
-			cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900);
+		{
+			struct RUNTIME_9060 __iomem *ctl_addr = addr0;
+			plx_init(pdev, irq, ctl_addr);
+			cy_writew(&ctl_addr->intr_ctrl_stat,
+				readw(&ctl_addr->intr_ctrl_stat) | 0x0900);
 			break;
 		}
+		}
 	}
 
 	dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from "
@@ -5168,17 +5168,18 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev)
 	/* non-Z with old PLX */
 	if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
 			PLX_9050)
-		cy_writeb(cinfo->ctl_addr + 0x4c, 0);
+		cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
 	else
 #ifndef CONFIG_CYZ_INTR
 		if (!IS_CYC_Z(*cinfo))
 #endif
-		cy_writew(cinfo->ctl_addr + 0x68,
-				readw(cinfo->ctl_addr + 0x68) & ~0x0900);
+		cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
+			readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
+			~0x0900);
 
 	iounmap(cinfo->base_addr);
-	if (cinfo->ctl_addr)
-		iounmap(cinfo->ctl_addr);
+	if (cinfo->ctl_addr.p9050)
+		iounmap(cinfo->ctl_addr.p9050);
 	if (cinfo->irq
 #ifndef CONFIG_CYZ_INTR
 		&& !IS_CYC_Z(*cinfo)
@@ -5373,8 +5374,8 @@ static void __exit cy_cleanup_module(void)
 			/* clear interrupt */
 			cy_writeb(card->base_addr + Cy_ClrIntr, 0);
 			iounmap(card->base_addr);
-			if (card->ctl_addr)
-				iounmap(card->ctl_addr);
+			if (card->ctl_addr.p9050)
+				iounmap(card->ctl_addr.p9050);
 			if (card->irq
 #ifndef CONFIG_CYZ_INTR
 				&& !IS_CYC_Z(*card)
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 788850ba4e7..9ae03d5b359 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -507,16 +507,19 @@ struct ZFW_CTRL {
 
 /* Per card data structure */
 struct cyclades_card {
-    void __iomem *base_addr;
-    void __iomem *ctl_addr;
-    int irq;
-    unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
-    unsigned int first_line;	/* minor number of first channel on card */
-    unsigned int nports;	/* Number of ports in the card */
-    int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
-    int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
-    spinlock_t card_lock;
-    struct cyclades_port *ports;
+	void __iomem *base_addr;
+	union {
+		void __iomem *p9050;
+		struct RUNTIME_9060 __iomem *p9060;
+	} ctl_addr;
+	int irq;
+	unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
+	unsigned int first_line;	/* minor number of first channel on card */
+	unsigned int nports;	/* Number of ports in the card */
+	int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
+	int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
+	spinlock_t card_lock;
+	struct cyclades_port *ports;
 };
 
 /***************************************
-- 
cgit v1.2.3-70-g09d2


From 101b81590d8df0a74c33cf739886247c0a13f4af Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:30:10 +0100
Subject: tty: cyclades, cache HW version

Store HW version locally to not read it all the time in interrupts
and alike.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/cyclades.c  | 32 +++++++++++---------------------
 include/linux/cyclades.h |  1 +
 2 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 2cbf74134f1..cf191cc1c92 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -668,8 +668,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define Z_FPGA_CHECK(card) \
 	((readl(&(card).ctl_addr.p9060->init_ctrl) & (1<<17)) != 0)
 
-#define ISZLOADED(card)	(((ZO_V1 == readl(&(card).ctl_addr.p9060->mail_box_0)) \
-			|| Z_FPGA_CHECK(card)) && \
+#define ISZLOADED(card)	((ZO_V1 == (card).hw_ver || Z_FPGA_CHECK(card)) && \
 			(ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
 			((card).base_addr+ID_ADDRESS))->signature)))
 
@@ -1393,8 +1392,6 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
 	unsigned long loc_doorbell;
 
 	firm_id = cinfo->base_addr + ID_ADDRESS;
-	if (!ISZLOADED(*cinfo))
-		return -1;
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 
@@ -1619,10 +1616,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 	static struct BOARD_CTRL __iomem *board_ctrl;
 	static struct CH_CTRL __iomem *ch_ctrl;
 	static struct BUF_CTRL __iomem *buf_ctrl;
-	__u32 channel;
+	__u32 channel, param, fw_ver;
 	__u8 cmd;
-	__u32 param;
-	__u32 hw_ver, fw_ver;
 	int special_count;
 	int delta_count;
 
@@ -1630,7 +1625,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
 	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
-	hw_ver = readl(&cinfo->ctl_addr.p9060->mail_box_0);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
 		special_count = 0;
@@ -2388,11 +2382,9 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 		struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
 		if (!ISZLOADED(*cinfo)) {
-			if (((ZE_V1 == readl(&cinfo->ctl_addr.p9060->
-							mail_box_0)) &&
-					Z_FPGA_CHECK(*cinfo)) &&
-					(ZFIRM_HLT == readl(
-						&firm_id->signature))) {
+			if (cinfo->hw_ver == ZE_V1 && Z_FPGA_CHECK(*cinfo) &&
+					readl(&firm_id->signature) ==
+					ZFIRM_HLT) {
 				printk(KERN_ERR "cyc:Cyclades-Z Error: you "
 					"need an external power supply for "
 					"this number of ports.\nFirmware "
@@ -4336,7 +4328,6 @@ static void cy_hangup(struct tty_struct *tty)
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	u32 uninitialized_var(mailbox);
 	unsigned int nports, port;
 	unsigned short chip_number;
 	int uninitialized_var(index);
@@ -4344,8 +4335,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 	spin_lock_init(&cinfo->card_lock);
 
 	if (IS_CYC_Z(*cinfo)) {	/* Cyclades-Z */
-		mailbox = readl(&cinfo->ctl_addr.p9060->mail_box_0);
-		nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
+		nports = (cinfo->hw_ver == ZE_V1) ? ZE_V1_NPORTS : 8;
 		cinfo->intr_enabled = 0;
 		cinfo->nports = 0;	/* Will be correctly set later, after
 					   Z FW is loaded */
@@ -4377,7 +4367,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 
 		if (IS_CYC_Z(*cinfo)) {
 			info->type = PORT_STARTECH;
-			if (mailbox == ZO_V1)
+			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
 			else
 				info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE;
@@ -4932,7 +4922,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 {
 	void __iomem *addr0 = NULL, *addr2 = NULL;
 	char *card_name = NULL;
-	u32 mailbox;
+	u32 uninitialized_var(mailbox);
 	unsigned int device_id, nchan = 0, card_no, i;
 	unsigned char plx_ver;
 	int retval, irq;
@@ -5014,7 +5004,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 
 		plx_init(pdev, irq, addr0);
 
-		mailbox = (u32)readl(&ctl_addr->mail_box_0);
+		mailbox = readl(&ctl_addr->mail_box_0);
 
 		addr2 = ioremap_nocache(pci_resource_start(pdev, 2),
 				mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin);
@@ -5026,7 +5016,6 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		if (mailbox == ZE_V1) {
 			card_name = "Cyclades-Ze";
 
-			readl(&ctl_addr->mail_box_0);
 			nchan = ZE_V1_NPORTS;
 		} else {
 			card_name = "Cyclades-8Zo";
@@ -5089,6 +5078,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 		cy_card[card_no].num_chips = nchan / 4;
 	} else {
+		cy_card[card_no].hw_ver = mailbox;
+		cy_card[card_no].num_chips = (unsigned int)-1;
 #ifdef CONFIG_CYZ_INTR
 		/* allocate IRQ only if board has an IRQ */
 		if (irq != 0 && irq != 255) {
@@ -5101,7 +5092,6 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 			}
 		}
 #endif				/* CONFIG_CYZ_INTR */
-		cy_card[card_no].num_chips = (unsigned int)-1;
 	}
 
 	/* set cy_card */
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 9ae03d5b359..a14fe307976 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -518,6 +518,7 @@ struct cyclades_card {
 	unsigned int nports;	/* Number of ports in the card */
 	int bus_index;		/* address shift - 0 for ISA, 1 for PCI */
 	int intr_enabled;		/* FW Interrupt flag - 0 disabled, 1 enabled */
+	u32 hw_ver;
 	spinlock_t card_lock;
 	struct cyclades_port *ports;
 };
-- 
cgit v1.2.3-70-g09d2


From 08a951e1693e324bd035b194002a82b9057fd9c5 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:33:40 +0100
Subject: tty: cyclades, remove typedefs

They are unused anyway.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cyclades.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index a14fe307976..1fbdea4f08e 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL {
 
 
 #ifndef DP_WINDOW_SIZE
-/* #include "cyclomz.h" */
-/****************** ****************** *******************/
-/*
- *	The data types defined below are used in all ZFIRM interface
- *	data structures. They accomodate differences between HW
- *	architectures and compilers.
- */
-
-typedef __u64  ucdouble;		/* 64 bits, unsigned */
-typedef __u32  uclong;			/* 32 bits, unsigned */
-typedef __u16  ucshort;		/* 16 bits, unsigned */
-typedef __u8   ucchar;			/* 8 bits, unsigned */
-
 /*
  *	Memory Window Sizes
  */
-- 
cgit v1.2.3-70-g09d2


From 70beaed22cbe12979e55d99b370e147e2e168562 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 11 Jun 2009 12:39:12 +0100
Subject: serial: refactor ASYNC_ flags

Define ASYNCB_* flags which are bit numbers of the ASYNC_* flags.
This is useful for {test,set,clear}_bit.

Also convert each ASYNC_% to be (1 << ASYNCB_%) and define masks
with the macros, not constants.

Tested with:
#include "PATH_TO_KERNEL/include/linux/serial.h"
static struct {
        unsigned int new, old;
} as[] = {
        { ASYNC_HUP_NOTIFY, 0x0001 },
        { ASYNC_FOURPORT, 0x0002 },
...
	{ ASYNC_BOOT_ONLYMCA, 0x00400000 },
        { ASYNC_INTERNAL_FLAGS, 0xFFC00000 }
};
...
        for (a = 0; a < ARRAY_SIZE(as); a++)
                if (as[a].old != as[a].new)
                        printf("%.8x != %.8x\n", as[a].old, as[a].new);

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial.h | 116 +++++++++++++++++++++++++++++--------------------
 1 file changed, 69 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial.h b/include/linux/serial.h
index 9136cc5608c..e5bb75a6380 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -96,54 +96,76 @@ struct serial_uart_config {
 
 /*
  * Definitions for async_struct (and serial_struct) flags field
+ *
+ * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
  */
-#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 
-				   on the callout port */
-#define ASYNC_FOURPORT  0x0002	/* Set OU1, OUT2 per AST Fourport settings */
-#define ASYNC_SAK	0x0004	/* Secure Attention Key (Orange book) */
-#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */
-
-#define ASYNC_SPD_MASK	0x1030
-#define ASYNC_SPD_HI	0x0010	/* Use 56000 instead of 38400 bps */
-
-#define ASYNC_SPD_VHI	0x0020  /* Use 115200 instead of 38400 bps */
-#define ASYNC_SPD_CUST	0x0030  /* Use user-specified divisor */
-
-#define ASYNC_SKIP_TEST	0x0040 /* Skip UART test during autoconfiguration */
-#define ASYNC_AUTO_IRQ  0x0080 /* Do automatic IRQ during autoconfiguration */
-#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */
-#define ASYNC_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
-#define ASYNC_CALLOUT_NOHUP   0x0400 /* Don't do hangups for cua device */
-
-#define ASYNC_HARDPPS_CD	0x0800	/* Call hardpps when CD goes high  */
-
-#define ASYNC_SPD_SHI	0x1000	/* Use 230400 instead of 38400 bps */
-#define ASYNC_SPD_WARP	0x1010	/* Use 460800 instead of 38400 bps */
-
-#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */
-
-#define ASYNC_BUGGY_UART  0x4000 /* This is a buggy UART, skip some safety
-				  * checks.  Note: can be dangerous! */
-
-#define ASYNC_AUTOPROBE	 0x8000 /* Port was autoprobed by PCI or PNP code */
-
-#define ASYNC_FLAGS	0x7FFF	/* Possible legal async flags */
-#define ASYNC_USR_MASK	0x3430	/* Legal flags that non-privileged
-				 * users can set or reset */
-
-/* Internal flags used only by kernel/chr_drv/serial.c */
-#define ASYNC_INITIALIZED	0x80000000 /* Serial port was initialized */
-#define ASYNC_NORMAL_ACTIVE	0x20000000 /* Normal device is active */
-#define ASYNC_BOOT_AUTOCONF	0x10000000 /* Autoconfigure port on bootup */
-#define ASYNC_CLOSING		0x08000000 /* Serial port is closing */
-#define ASYNC_CTS_FLOW		0x04000000 /* Do CTS flow control */
-#define ASYNC_CHECK_CD		0x02000000 /* i.e., CLOCAL */
-#define ASYNC_SHARE_IRQ		0x01000000 /* for multifunction cards
-					     --- no longer used */
-#define ASYNC_CONS_FLOW		0x00800000 /* flow control for console  */
-
-#define ASYNC_BOOT_ONLYMCA	0x00400000 /* Probe only if MCA bus */
-#define ASYNC_INTERNAL_FLAGS	0xFFC00000 /* Internal flags */
+#define ASYNCB_HUP_NOTIFY	 0 /* Notify getty on hangups and closes
+				    * on the callout port */
+#define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
+#define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
+#define ASYNCB_SPLIT_TERMIOS	 3 /* Separate termios for dialin/callout */
+#define ASYNCB_SPD_HI		 4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_VHI		 5 /* Use 115200 instead of 38400 bps */
+#define ASYNCB_SKIP_TEST	 6 /* Skip UART test during autoconfiguration */
+#define ASYNCB_AUTO_IRQ		 7 /* Do automatic IRQ during
+				    * autoconfiguration */
+#define ASYNCB_SESSION_LOCKOUT	 8 /* Lock out cua opens based on session */
+#define ASYNCB_PGRP_LOCKOUT	 9 /* Lock out cua opens based on pgrp */
+#define ASYNCB_CALLOUT_NOHUP	10 /* Don't do hangups for cua device */
+#define ASYNCB_HARDPPS_CD	11 /* Call hardpps when CD goes high  */
+#define ASYNCB_SPD_SHI		12 /* Use 230400 instead of 38400 bps */
+#define ASYNCB_LOW_LATENCY	13 /* Request low latency behaviour */
+#define ASYNCB_BUGGY_UART	14 /* This is a buggy UART, skip some safety
+				    * checks.  Note: can be dangerous! */
+#define ASYNCB_AUTOPROBE	15 /* Port was autoprobed by PCI or PNP code */
+#define ASYNCB_LAST_USER	15
+
+/* Internal flags used only by kernel */
+#define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
+#define ASYNCB_NORMAL_ACTIVE	29 /* Normal device is active */
+#define ASYNCB_BOOT_AUTOCONF	28 /* Autoconfigure port on bootup */
+#define ASYNCB_CLOSING		27 /* Serial port is closing */
+#define ASYNCB_CTS_FLOW		26 /* Do CTS flow control */
+#define ASYNCB_CHECK_CD		25 /* i.e., CLOCAL */
+#define ASYNCB_SHARE_IRQ	24 /* for multifunction cards, no longer used */
+#define ASYNCB_CONS_FLOW	23 /* flow control for console  */
+#define ASYNCB_BOOT_ONLYMCA	22 /* Probe only if MCA bus */
+#define ASYNCB_FIRST_KERNEL	22
+
+#define ASYNC_HUP_NOTIFY	(1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_FOURPORT		(1U << ASYNCB_FOURPORT)
+#define ASYNC_SAK		(1U << ASYNCB_SAK)
+#define ASYNC_SPLIT_TERMIOS	(1U << ASYNCB_SPLIT_TERMIOS)
+#define ASYNC_SPD_HI		(1U << ASYNCB_SPD_HI)
+#define ASYNC_SPD_VHI		(1U << ASYNCB_SPD_VHI)
+#define ASYNC_SKIP_TEST		(1U << ASYNCB_SKIP_TEST)
+#define ASYNC_AUTO_IRQ		(1U << ASYNCB_AUTO_IRQ)
+#define ASYNC_SESSION_LOCKOUT	(1U << ASYNCB_SESSION_LOCKOUT)
+#define ASYNC_PGRP_LOCKOUT	(1U << ASYNCB_PGRP_LOCKOUT)
+#define ASYNC_CALLOUT_NOHUP	(1U << ASYNCB_CALLOUT_NOHUP)
+#define ASYNC_HARDPPS_CD	(1U << ASYNCB_HARDPPS_CD)
+#define ASYNC_SPD_SHI		(1U << ASYNCB_SPD_SHI)
+#define ASYNC_LOW_LATENCY	(1U << ASYNCB_LOW_LATENCY)
+#define ASYNC_BUGGY_UART	(1U << ASYNCB_BUGGY_UART)
+#define ASYNC_AUTOPROBE		(1U << ASYNCB_AUTOPROBE)
+
+#define ASYNC_FLAGS		((1U << ASYNCB_LAST_USER) - 1)
+#define ASYNC_USR_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI| \
+		ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY)
+#define ASYNC_SPD_CUST		(ASYNC_SPD_HI|ASYNC_SPD_VHI)
+#define ASYNC_SPD_WARP		(ASYNC_SPD_HI|ASYNC_SPD_SHI)
+#define ASYNC_SPD_MASK		(ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI)
+
+#define ASYNC_INITIALIZED	(1U << ASYNCB_INITIALIZED)
+#define ASYNC_NORMAL_ACTIVE	(1U << ASYNCB_NORMAL_ACTIVE)
+#define ASYNC_BOOT_AUTOCONF	(1U << ASYNCB_BOOT_AUTOCONF)
+#define ASYNC_CLOSING		(1U << ASYNCB_CLOSING)
+#define ASYNC_CTS_FLOW		(1U << ASYNCB_CTS_FLOW)
+#define ASYNC_CHECK_CD		(1U << ASYNCB_CHECK_CD)
+#define ASYNC_SHARE_IRQ		(1U << ASYNCB_SHARE_IRQ)
+#define ASYNC_CONS_FLOW		(1U << ASYNCB_CONS_FLOW)
+#define ASYNC_BOOT_ONLYMCA	(1U << ASYNCB_BOOT_ONLYMCA)
+#define ASYNC_INTERNAL_FLAGS	(~((1U << ASYNCB_FIRST_KERNEL) - 1))
 
 /*
  * Multiport serial configuration structure --- external structure
-- 
cgit v1.2.3-70-g09d2


From 70fd8fdecc4430ffcede7704dd812d4054d1faf9 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Thu, 11 Jun 2009 12:41:57 +0100
Subject: 8250_pci: add the OXCB950 chip to the 8250 PCI driver.

This adds support for the following serial controller chip:
Oxford Semiconductor OXCB950 for PCI Cardbus interface
http://www.transdimension.com/products/serial/OXCB950.html

on this card:
ExSys EX-1370 1 port high-speed serial card for ExpressCard/34 slot

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 3 +++
 include/linux/pci_ids.h   | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 938bc1b6c3f..e371a9c1534 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
+	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950,
+		PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0,
+		pbn_b0_1_921600 },
 	{	PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_4_115200 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812d67d..d7d1c41a0b1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1996,10 +1996,12 @@
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U	0xC118
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU	0xC11C
 #define PCI_DEVICE_ID_OXSEMI_16PCI954	0x9501
+#define PCI_DEVICE_ID_OXSEMI_C950	0x950B
 #define PCI_DEVICE_ID_OXSEMI_16PCI95N	0x9511
 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP	0x9513
 #define PCI_DEVICE_ID_OXSEMI_16PCI952	0x9521
 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP	0x9523
+#define PCI_SUBDEVICE_ID_OXSEMI_C950	0x0001
 
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
-- 
cgit v1.2.3-70-g09d2


From 38db89799bdf11625a831c5af33938dcb11908b6 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:44:17 +0100
Subject: tty: throttling race fix

The tty throttling code can race due to the lock drops. It takes very high
loads but this has been observed and verified by Rob Duncan.

The basic problem is that on an SMP box we can go

	CPU #1				CPU #2
	need to throttle ?
	suppose we should		buffer space cleared
					are we throttled
					yes ? - unthrottle
	call throttle method

This changeet take the termios lock to protect against this. The termios
lock isn't the initial obvious candidate but many implementations of throttle
methods already need to poke around their own termios structures (and nobody
really locks them against a racing change of flow control).

This does mean that anyone who is setting tty->low_latency = 1 and then
calling tty_flip_buffer_push from their unthrottle method is going to end up
collapsing in a pile of locks. However we've removed all the known bogus
users of low_latency = 1 and such use isn't safe anyway for other reasons so
catching it would be an improvement.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_ioctl.c   | 13 +++++++++++++
 include/linux/tty_driver.h |  6 ++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 6f4c7d0a53b..2401dbcbee9 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer);
  *	@tty: terminal
  *
  *	Indicate that a tty should stop transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
  */
 
 void tty_throttle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	/* check TTY_THROTTLED first so it indicates our state */
 	if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->throttle)
 		tty->ops->throttle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_throttle);
 
@@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle);
  *	@tty: terminal
  *
  *	Indicate that a tty may continue transmitting data down the stack.
+ *	Takes the termios mutex to protect against parallel throttle/unthrottle
+ *	and also to ensure the driver can consistently reference its own
+ *	termios data at this point when implementing software flow control.
+ *
+ *	Drivers should however remember that the stack can issue a throttle,
+ *	then change flow control method, then unthrottle.
  */
 
 void tty_unthrottle(struct tty_struct *tty)
 {
+	mutex_lock(&tty->termios_mutex);
 	if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) &&
 	    tty->ops->unthrottle)
 		tty->ops->unthrottle(tty);
+	mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_unthrottle);
 
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index bcba84ea2d8..3566129384a 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -127,7 +127,8 @@
  * 	the line discipline are close to full, and it should somehow
  * 	signal that no more characters should be sent to the tty.
  *
- *	Optional: Always invoke via tty_throttle();
+ *	Optional: Always invoke via tty_throttle(), called under the
+ *	termios lock.
  * 
  * void (*unthrottle)(struct tty_struct * tty);
  *
@@ -135,7 +136,8 @@
  * 	that characters can now be sent to the tty without fear of
  * 	overrunning the input buffers of the line disciplines.
  * 
- *	Optional: Always invoke via tty_unthrottle();
+ *	Optional: Always invoke via tty_unthrottle(), called under the
+ *	termios lock.
  *
  * void (*stop)(struct tty_struct *tty);
  *
-- 
cgit v1.2.3-70-g09d2


From e8b70e7d3e86319a8b2aaabde3866833d92cd80f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:48:02 +0100
Subject: tty: Extract various bits of ldisc code

Before trying to tackle the ldisc bugs the code needs to be a good deal
more readable, so do the simple extractions of routines first.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c    | 24 +++++++++---
 drivers/char/tty_ldisc.c | 97 ++++++++++++++++++++++++++++++++----------------
 include/linux/tty.h      |  3 ++
 3 files changed, 88 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6c817398232..be49d0730bb 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2481,6 +2481,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int
 	return tty->ops->tiocmset(tty, file, set, clear);
 }
 
+struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+		tty = tty->link;
+	return tty;
+}
+EXPORT_SYMBOL(tty_pair_get_tty);
+
+struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+{
+	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+	    tty->driver->subtype == PTY_TYPE_MASTER)
+	    return tty;
+	return tty->link;
+}
+EXPORT_SYMBOL(tty_pair_get_pty);
+
 /*
  * Split this up, as gcc can choke on it otherwise..
  */
@@ -2496,11 +2514,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
 		return -EINVAL;
 
-	real_tty = tty;
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-		real_tty = tty->link;
-
+	real_tty = tty_pair_get_tty(tty);
 
 	/*
 	 * Factor out some common prep work
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f78f5b0127a..e3c6416aa86 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -443,6 +443,50 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 	}
 }
 
+/**
+ *	tty_ldisc_halt		-	shutdown the line discipline
+ *	@tty: tty device
+ *
+ *	Shut down the line discipline and work queue for this tty device.
+ *	The TTY_LDISC flag being cleared ensures no further references can
+ *	be obtained while the delayed work queue halt ensures that no more
+ *	data is fed to the ldisc.
+ *
+ *	In order to wait for any existing references to complete see 
+ *	tty_ldisc_wait_idle.
+ */
+
+static void tty_ldisc_halt(struct tty_struct *tty)
+{
+	clear_bit(TTY_LDISC, &tty->flags);
+	cancel_delayed_work(&tty->buf.work);
+	/*
+	 * Wait for ->hangup_work and ->buf.work handlers to terminate
+	 */
+	flush_scheduled_work();
+}
+
+/**
+ *	tty_ldisc_wait_idle	-	wait for the ldisc to become idle
+ *	@tty: tty to wait for
+ *
+ *	Wait for the line discipline to become idle. The discipline must
+ *	have been halted for this to guarantee it remains idle.
+ *
+ */
+
+static void tty_ldisc_wait_idle(struct tty_struct *tty)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&tty_ldisc_lock, flags);
+	while (tty->ldisc.refcount) {
+		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
+		spin_lock_irqsave(&tty_ldisc_lock, flags);
+	}
+	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+}
+
 /**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
@@ -636,6 +680,21 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 	return 0;
 }
 
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+	struct tty_ldisc ld;
+
+	if (tty->ldisc.ops->close)
+		(tty->ldisc.ops->close)(tty);
+	tty_ldisc_put(tty->ldisc.ops);
+	/*
+	 *	Switch the line discipline back
+	 */
+	WARN_ON(tty_ldisc_get(N_TTY, &ld));
+	tty_ldisc_assign(tty, &ld);
+	tty_set_termios_ldisc(tty, N_TTY);
+}
+
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
@@ -647,58 +706,34 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	unsigned long flags;
-	struct tty_ldisc ld;
+
 	/*
 	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
 	 * kill any delayed work. As this is the final close it does not
 	 * race with the set_ldisc code path.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	cancel_delayed_work(&tty->buf.work);
 
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 */
-
-	flush_scheduled_work();
+	tty_ldisc_halt(tty);
 
 	/*
 	 * Wait for any short term users (we know they are just driver
 	 * side waiters as the file is closing so user count on the file
 	 * side is zero.
 	 */
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc.refcount) {
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
-		spin_lock_irqsave(&tty_ldisc_lock, flags);
-	}
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+	tty_ldisc_wait_idle(tty);
+
 	/*
 	 * Shutdown the current line discipline, and reset it to N_TTY.
 	 *
 	 * FIXME: this MUST get fixed for the new reflocking
 	 */
-	if (tty->ldisc.ops->close)
-		(tty->ldisc.ops->close)(tty);
-	tty_ldisc_put(tty->ldisc.ops);
 
-	/*
-	 *	Switch the line discipline back
-	 */
-	WARN_ON(tty_ldisc_get(N_TTY, &ld));
-	tty_ldisc_assign(tty, &ld);
-	tty_set_termios_ldisc(tty, N_TTY);
+	tty_ldisc_reinit(tty);
 	if (o_tty) {
 		/* FIXME: could o_tty be in setldisc here ? */
 		clear_bit(TTY_LDISC, &o_tty->flags);
-		if (o_tty->ldisc.ops->close)
-			(o_tty->ldisc.ops->close)(o_tty);
-		tty_ldisc_put(o_tty->ldisc.ops);
-		WARN_ON(tty_ldisc_get(N_TTY, &ld));
-		tty_ldisc_assign(o_tty, &ld);
-		tty_set_termios_ldisc(o_tty, N_TTY);
+		tty_ldisc_reinit(o_tty);
 	}
 }
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bed5a3d4030..f9c13c83790 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -428,6 +428,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 extern void tty_release_dev(struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
 
+extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
+extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
+
 extern struct mutex tty_mutex;
 
 extern void tty_write_unlock(struct tty_struct *tty);
-- 
cgit v1.2.3-70-g09d2


From c65c9bc3efa5589f691276bb9db689119a711222 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Jun 2009 12:50:12 +0100
Subject: tty: rewrite the ldisc locking

There are several pretty much unfixable races in the old ldisc code, especially
with respect to pty behaviour and also to hangup. It's easier to rewrite the
code than simply try and patch it up.

This patch
- splits the ldisc from the tty (so we will be able to refcount it more cleanly
  later)
- introduces a mutex lock for ldisc changing on an active device
- fixes the complete mess that hangup caused
- implements hopefully correct setldisc/close/hangup locking

There are still some problems around pty pairs that have always been there but
at least it is now possible to understand the code and fix further problems.

This fixes the following known bugs
- hang up can leak ldisc references
- hang up may not call open/close on ldisc in a matched way
- pty/tty pairs can deadlock during an ldisc change
- reading the ldisc proc files can cause every ldisc to be loaded

and probably a few other of the mysterious ldisc race reports.

I'm sure it also adds the odd new one.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/bluetooth/hci_ldisc.c |   4 +-
 drivers/char/cyclades.c       |   2 +-
 drivers/char/epca.c           |   4 +-
 drivers/char/ip2/i2lib.c      |   4 +-
 drivers/char/ip2/ip2main.c    |   4 +-
 drivers/char/n_hdlc.c         |   4 +-
 drivers/char/pty.c            |  10 +-
 drivers/char/selection.c      |   2 +-
 drivers/char/tty_io.c         |  64 +-----
 drivers/char/tty_ldisc.c      | 477 ++++++++++++++++++++++++++----------------
 include/linux/tty.h           |   9 +-
 11 files changed, 330 insertions(+), 254 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index af761dc434f..68801512859 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
 	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
 	   open path is before the ldisc is referencable */
 
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 	tty_driver_flush_buffer(tty);
 
 	return 0;
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 45601905174..f3366d3f06c 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -5200,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v)
 					(cur_jifs - info->idle_stats.recv_idle)/
 					HZ, info->idle_stats.overruns,
 					/* FIXME: double check locking */
-					(long)info->port.tty->ldisc.ops->num);
+					(long)info->port.tty->ldisc->ops->num);
 			else
 				seq_printf(m, "%3d %8lu %10lu %8lu "
 					"%10lu %8lu %9lu %6ld\n",
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 710ee933305..abef1f7d84f 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file,
 			tty_wait_until_sent(tty, 0);
 		} else {
 			/* ldisc lock already held in ioctl */
-			if (tty->ldisc.ops->flush_buffer)
-				tty->ldisc.ops->flush_buffer(tty);
+			if (tty->ldisc->ops->flush_buffer)
+				tty->ldisc->ops->flush_buffer(tty);
 		}
 		unlock_kernel();
 		/* Fall Thru */
diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c
index 0061e18aff6..0d10b89218e 100644
--- a/drivers/char/ip2/i2lib.c
+++ b/drivers/char/ip2/i2lib.c
@@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh)
 		amountToMove = count;
 	}
 	// Move the first block
-	pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+	pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
 	// If we needed to wrap, do the second data move
 	if (count > amountToMove) {
-		pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+		pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
 		 pCh->Ibuf, NULL, count - amountToMove );
 	}
 	// Bump and wrap the stripIndex all at once by the amount of data read. This
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index afd9247cf08..517271c762e 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -1315,8 +1315,8 @@ static inline void  isig(int sig, struct tty_struct *tty, int flush)
 	if (tty->pgrp)
 		kill_pgrp(tty->pgrp, sig, 1);
 	if (flush || !L_NOFLSH(tty)) {
-		if ( tty->ldisc.ops->flush_buffer )  
-			tty->ldisc.ops->flush_buffer(tty);
+		if ( tty->ldisc->ops->flush_buffer )  
+			tty->ldisc->ops->flush_buffer(tty);
 		i2InputFlush( tty->driver_data );
 	}
 }
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index bacb3e2872a..461ece591a5 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty)
 #endif
 	
 	/* Flush any pending characters in the driver and discipline. */
-	if (tty->ldisc.ops->flush_buffer)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 
 	tty_driver_flush_buffer(tty);
 		
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index da2cb8c70c1..5acd29e6e04 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -110,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
 	c = to->receive_room;
 	if (c > count)
 		c = count;
-	to->ldisc.ops->receive_buf(to, buf, NULL, c);
+	to->ldisc->ops->receive_buf(to, buf, NULL, c);
 
 	return c;
 }
@@ -148,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty)
 	int count;
 
 	/* We should get the line discipline lock for "tty->link" */
-	if (!to || !to->ldisc.ops->chars_in_buffer)
+	if (!to || !to->ldisc->ops->chars_in_buffer)
 		return 0;
 
 	/* The ldisc must report 0 if no characters available to be read */
-	count = to->ldisc.ops->chars_in_buffer(to);
+	count = to->ldisc->ops->chars_in_buffer(to);
 
 	if (tty->driver->subtype == PTY_TYPE_SLAVE)
 		return count;
@@ -186,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty)
 	if (!to)
 		return;
 
-	if (to->ldisc.ops->flush_buffer)
-		to->ldisc.ops->flush_buffer(to);
+	if (to->ldisc->ops->flush_buffer)
+		to->ldisc->ops->flush_buffer(to);
 
 	if (to->packet) {
 		spin_lock_irqsave(&tty->ctrl_lock, flags);
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index cb8ca569896..f97b9e84806 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty)
 		}
 		count = sel_buffer_lth - pasted;
 		count = min(count, tty->receive_room);
-		tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted,
+		tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
 								NULL, count);
 		pasted += count;
 	}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index be49d0730bb..2f44b0b241c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -491,22 +491,6 @@ void tty_ldisc_flush(struct tty_struct *tty)
 
 EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 
-/**
- *	tty_reset_termios	-	reset terminal state
- *	@tty: tty to reset
- *
- *	Restore a terminal to the driver default state
- */
-
-static void tty_reset_termios(struct tty_struct *tty)
-{
-	mutex_lock(&tty->termios_mutex);
-	*tty->termios = tty->driver->init_termios;
-	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
-	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
-	mutex_unlock(&tty->termios_mutex);
-}
-
 /**
  *	do_tty_hangup		-	actual handler for hangup events
  *	@work: tty device
@@ -536,7 +520,6 @@ static void do_tty_hangup(struct work_struct *work)
 	struct file *cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
-	struct tty_ldisc *ld;
 	int    closecount = 0, n;
 	unsigned long flags;
 	int refs = 0;
@@ -567,40 +550,8 @@ static void do_tty_hangup(struct work_struct *work)
 		filp->f_op = &hung_up_tty_fops;
 	}
 	file_list_unlock();
-	/*
-	 * FIXME! What are the locking issues here? This may me overdoing
-	 * things... This question is especially important now that we've
-	 * removed the irqlock.
-	 */
-	ld = tty_ldisc_ref(tty);
-	if (ld != NULL) {
-		/* We may have no line discipline at this point */
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
-		tty_driver_flush_buffer(tty);
-		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
-		    ld->ops->write_wakeup)
-			ld->ops->write_wakeup(tty);
-		if (ld->ops->hangup)
-			ld->ops->hangup(tty);
-	}
-	/*
-	 * FIXME: Once we trust the LDISC code better we can wait here for
-	 * ldisc completion and fix the driver call race
-	 */
-	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
-	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
-	/*
-	 * Shutdown the current line discipline, and reset it to
-	 * N_TTY.
-	 */
-	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
-		tty_reset_termios(tty);
-	/* Defer ldisc switch */
-	/* tty_deferred_ldisc_switch(N_TTY);
 
-	  This should get done automatically when the port closes and
-	  tty_release is called */
+	tty_ldisc_hangup(tty);
 
 	read_lock(&tasklist_lock);
 	if (tty->session) {
@@ -629,12 +580,15 @@ static void do_tty_hangup(struct work_struct *work)
 	read_unlock(&tasklist_lock);
 
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	tty->flags = 0;
+	clear_bit(TTY_THROTTLED, &tty->flags);
+	clear_bit(TTY_PUSH, &tty->flags);
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 	put_pid(tty->session);
 	put_pid(tty->pgrp);
 	tty->session = NULL;
 	tty->pgrp = NULL;
 	tty->ctrl_status = 0;
+	set_bit(TTY_HUPPED, &tty->flags);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	/* Account for the p->signal references we killed */
@@ -660,10 +614,7 @@ static void do_tty_hangup(struct work_struct *work)
 	 * can't yet guarantee all that.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
-	if (ld) {
-		tty_ldisc_enable(tty);
-		tty_ldisc_deref(ld);
-	}
+	tty_ldisc_enable(tty);
 	unlock_kernel();
 	if (f)
 		fput(f);
@@ -2570,7 +2521,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case TIOCGSID:
 		return tiocgsid(tty, real_tty, p);
 	case TIOCGETD:
-		return put_user(tty->ldisc.ops->num, (int __user *)p);
+		return put_user(tty->ldisc->ops->num, (int __user *)p);
 	case TIOCSETD:
 		return tiocsetd(tty, p);
 	/*
@@ -2785,6 +2736,7 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
 	mutex_init(&tty->termios_mutex);
+	mutex_init(&tty->ldisc_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index e3c6416aa86..a58a19a6a5e 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc);
 /**
  *	tty_ldisc_try_get	-	try and reference an ldisc
  *	@disc: ldisc number
- *	@ld: tty ldisc structure to complete
  *
  *	Attempt to open and lock a line discipline into place. Return
- *	the line discipline refcounted and assigned in ld. On an error
- *	report the error code back
+ *	the line discipline refcounted or an error.
  */
 
-static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_try_get(int disc)
 {
 	unsigned long flags;
+	struct tty_ldisc *ld;
 	struct tty_ldisc_ops *ldops;
 	int err = -EINVAL;
 	
+	ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
+	if (ld == NULL)
+		return ERR_PTR(-ENOMEM);
+
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	ld->ops = NULL;
 	ldops = tty_ldiscs[disc];
@@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
 			/* lock it */
 			ldops->refcount++;
 			ld->ops = ldops;
+			ld->refcount = 0;
 			err = 0;
 		}
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	return err;
+	if (err)
+		return ERR_PTR(err);
+	return ld;
 }
 
 /**
  *	tty_ldisc_get		-	take a reference to an ldisc
  *	@disc: ldisc number
- *	@ld: tty line discipline structure to use
  *
  *	Takes a reference to a line discipline. Deals with refcounts and
  *	module locking counts. Returns NULL if the discipline is not available.
@@ -161,44 +166,46 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static int tty_ldisc_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_get(int disc)
 {
-	int err;
+	struct tty_ldisc *ld;
 
 	if (disc < N_TTY || disc >= NR_LDISCS)
-		return -EINVAL;
-	err = tty_ldisc_try_get(disc, ld);
-	if (err < 0) {
+		return ERR_PTR(-EINVAL);
+	ld = tty_ldisc_try_get(disc);
+	if (IS_ERR(ld)) {
 		request_module("tty-ldisc-%d", disc);
-		err = tty_ldisc_try_get(disc, ld);
+		ld = tty_ldisc_try_get(disc);
 	}
-	return err;
+	return ld;
 }
 
 /**
  *	tty_ldisc_put		-	drop ldisc reference
- *	@disc: ldisc number
+ *	@ld: ldisc
  *
  *	Drop a reference to a line discipline. Manage refcounts and
- *	module usage counts
+ *	module usage counts. Free the ldisc once the recount hits zero.
  *
  *	Locking:
  *		takes tty_ldisc_lock to guard against ldisc races
  */
 
-static void tty_ldisc_put(struct tty_ldisc_ops *ld)
+static void tty_ldisc_put(struct tty_ldisc *ld)
 {
 	unsigned long flags;
-	int disc = ld->num;
+	int disc = ld->ops->num;
+	struct tty_ldisc_ops *ldo;
 
 	BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = tty_ldiscs[disc];
-	BUG_ON(ld->refcount == 0);
-	ld->refcount--;
-	module_put(ld->owner);
+	ldo = tty_ldiscs[disc];
+	BUG_ON(ldo->refcount == 0);
+	ldo->refcount--;
+	module_put(ldo->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	kfree(ld);
 }
 
 static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
@@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
 static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
 	int i = *(loff_t *)v;
-	struct tty_ldisc ld;
+	struct tty_ldisc *ld;
 	
-	if (tty_ldisc_get(i, &ld) < 0)
+	ld = tty_ldisc_try_get(i);
+	if (IS_ERR(ld))
 		return 0;
-	seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i);
-	tty_ldisc_put(ld.ops);
+	seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i);
+	tty_ldisc_put(ld);
 	return 0;
 }
 
@@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = {
 
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
-	ld->refcount = 0;
-	tty->ldisc = *ld;
+	tty->ldisc = ld;
 }
 
 /**
@@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty)
 	int ret = 0;
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	ld = &tty->ldisc;
+	ld = tty->ldisc;
 	if (test_bit(TTY_LDISC, &tty->flags)) {
 		ld->refcount++;
 		ret = 1;
@@ -315,8 +322,8 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	WARN_ON(tty->ldisc.refcount == 0);
-	return &tty->ldisc;
+	WARN_ON(tty->ldisc->refcount == 0);
+	return tty->ldisc;
 }
 
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
@@ -335,7 +342,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
 {
 	if (tty_ldisc_try(tty))
-		return &tty->ldisc;
+		return tty->ldisc;
 	return NULL;
 }
 
@@ -407,6 +414,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 	mutex_unlock(&tty->termios_mutex);
 }
 
+/**
+ *	tty_ldisc_open		-	open a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to open
+ *
+ *	A helper opening method. Also a convenient debugging and check
+ *	point.
+ */
+
+static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags));
+	if (ld->ops->open)
+		return ld->ops->open(tty);
+	return 0;
+}
+
+/**
+ *	tty_ldisc_close		-	close a line discipline
+ *	@tty: tty we are opening the ldisc on
+ *	@ld: discipline to close
+ *
+ *	A helper close method. Also a convenient debugging and check
+ *	point.
+ */
+
+static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+	WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
+	clear_bit(TTY_LDISC_OPEN, &tty->flags);
+	if (ld->ops->close)
+		ld->ops->close(tty);
+}
 
 /**
  *	tty_ldisc_restore	-	helper for tty ldisc change
@@ -420,31 +460,32 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 {
 	char buf[64];
-	struct tty_ldisc new_ldisc;
+	struct tty_ldisc *new_ldisc;
+	int r;
 
 	/* There is an outstanding reference here so this is safe */
-	tty_ldisc_get(old->ops->num, old);
+	old = tty_ldisc_get(old->ops->num);
+	WARN_ON(IS_ERR(old));
 	tty_ldisc_assign(tty, old);
 	tty_set_termios_ldisc(tty, old->ops->num);
-	if (old->ops->open && (old->ops->open(tty) < 0)) {
-		tty_ldisc_put(old->ops);
+	if (tty_ldisc_open(tty, old) < 0) {
+		tty_ldisc_put(old);
 		/* This driver is always present */
-		if (tty_ldisc_get(N_TTY, &new_ldisc) < 0)
+		new_ldisc =tty_ldisc_get(N_TTY);
+		if (IS_ERR(new_ldisc))
 			panic("n_tty: get");
-		tty_ldisc_assign(tty, &new_ldisc);
+		tty_ldisc_assign(tty, new_ldisc);
 		tty_set_termios_ldisc(tty, N_TTY);
-		if (new_ldisc.ops->open) {
-			int r = new_ldisc.ops->open(tty);
-				if (r < 0)
-				panic("Couldn't open N_TTY ldisc for "
-				      "%s --- error %d.",
-				      tty_name(tty, buf), r);
-		}
+		r = tty_ldisc_open(tty, new_ldisc);
+		if (r < 0)
+			panic("Couldn't open N_TTY ldisc for "
+			      "%s --- error %d.",
+			      tty_name(tty, buf), r);
 	}
 }
 
 /**
- *	tty_ldisc_halt		-	shutdown the line discipline
+ *	tty_ldisc_halt		-	shut down the line discipline
  *	@tty: tty device
  *
  *	Shut down the line discipline and work queue for this tty device.
@@ -456,14 +497,10 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
  *	tty_ldisc_wait_idle.
  */
 
-static void tty_ldisc_halt(struct tty_struct *tty)
+static int tty_ldisc_halt(struct tty_struct *tty)
 {
 	clear_bit(TTY_LDISC, &tty->flags);
-	cancel_delayed_work(&tty->buf.work);
-	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 */
-	flush_scheduled_work();
+	return cancel_delayed_work(&tty->buf.work);
 }
 
 /**
@@ -473,18 +510,22 @@ static void tty_ldisc_halt(struct tty_struct *tty)
  *	Wait for the line discipline to become idle. The discipline must
  *	have been halted for this to guarantee it remains idle.
  *
+ *	tty_ldisc_lock protects the ref counts currently.
  */
 
-static void tty_ldisc_wait_idle(struct tty_struct *tty)
+static int tty_ldisc_wait_idle(struct tty_struct *tty)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc.refcount) {
+	while (tty->ldisc->refcount) {
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
+		if (wait_event_timeout(tty_ldisc_wait,
+				tty->ldisc->refcount == 0, 5 * HZ) == 0)
+			return -EBUSY;
 		spin_lock_irqsave(&tty_ldisc_lock, flags);
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+	return 0;
 }
 
 /**
@@ -493,38 +534,63 @@ static void tty_ldisc_wait_idle(struct tty_struct *tty)
  *	@ldisc: the line discipline
  *
  *	Set the discipline of a tty line. Must be called from a process
- *	context.
+ *	context. The ldisc change logic has to protect itself against any
+ *	overlapping ldisc change (including on the other end of pty pairs),
+ *	the close of one side of a tty/pty pair, and eventually hangup.
  *
- *	Locking: takes tty_ldisc_lock.
- *		 called functions take termios_mutex
+ *	Locking: takes tty_ldisc_lock, termios_mutex
  */
 
 int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 {
 	int retval;
-	struct tty_ldisc o_ldisc, new_ldisc;
-	int work;
-	unsigned long flags;
+	struct tty_ldisc *o_ldisc, *new_ldisc;
+	int work, o_work = 0;
 	struct tty_struct *o_tty;
 
-restart:
-	/* This is a bit ugly for now but means we can break the 'ldisc
-	   is part of the tty struct' assumption later */
-	retval = tty_ldisc_get(ldisc, &new_ldisc);
-	if (retval)
-		return retval;
+	new_ldisc = tty_ldisc_get(ldisc);
+	if (IS_ERR(new_ldisc))
+		return PTR_ERR(new_ldisc);
 
 	/*
-	 *	Problem: What do we do if this blocks ?
+	 *	We need to look at the tty locking here for pty/tty pairs
+	 *	when both sides try to change in parallel.
 	 */
 
-	tty_wait_until_sent(tty, 0);
+	o_tty = tty->link;	/* o_tty is the pty side or NULL */
+
 
-	if (tty->ldisc.ops->num == ldisc) {
-		tty_ldisc_put(new_ldisc.ops);
+	/*
+	 *	Check the no-op case
+	 */
+
+	if (tty->ldisc->ops->num == ldisc) {
+		tty_ldisc_put(new_ldisc);
 		return 0;
 	}
 
+	/*
+	 *	Problem: What do we do if this blocks ?
+	 *	We could deadlock here
+	 */
+
+	tty_wait_until_sent(tty, 0);
+
+	mutex_lock(&tty->ldisc_mutex);
+
+	/*
+	 *	We could be midstream of another ldisc change which has
+	 *	dropped the lock during processing. If so we need to wait.
+	 */
+
+	while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
+		mutex_unlock(&tty->ldisc_mutex);
+		wait_event(tty_ldisc_wait,
+			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
+		mutex_lock(&tty->ldisc_mutex);
+	}
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
+		
 	/*
 	 *	No more input please, we are switching. The new ldisc
 	 *	will update this value in the ldisc open function
@@ -533,8 +599,6 @@ restart:
 	tty->receive_room = 0;
 
 	o_ldisc = tty->ldisc;
-	o_tty = tty->link;
-
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -545,108 +609,181 @@ restart:
 	 *	with a userspace app continually trying to use the tty in
 	 *	parallel to the change and re-referencing the tty.
 	 */
-	clear_bit(TTY_LDISC, &tty->flags);
-	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
 
-	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
-		if (tty->ldisc.refcount) {
-			/* Free the new ldisc we grabbed. Must drop the lock
-			   first. */
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_ldisc.ops);
-			/*
-			 * There are several reasons we may be busy, including
-			 * random momentary I/O traffic. We must therefore
-			 * retry. We could distinguish between blocking ops
-			 * and retries if we made tty_ldisc_wait() smarter.
-			 * That is up for discussion.
-			 */
-			if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-		if (o_tty && o_tty->ldisc.refcount) {
-			spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-			tty_ldisc_put(o_tty->ldisc.ops);
-			if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
-				return -ERESTARTSYS;
-			goto restart;
-		}
-	}
-	/*
-	 *	If the TTY_LDISC bit is set, then we are racing against
-	 *	another ldisc change
-	 */
-	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
-		struct tty_ldisc *ld;
-		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-		tty_ldisc_put(new_ldisc.ops);
-		ld = tty_ldisc_ref_wait(tty);
-		tty_ldisc_deref(ld);
-		goto restart;
-	}
-	/*
-	 *	This flag is used to avoid two parallel ldisc changes. Once
-	 *	open and close are fine grained locked this may work better
-	 *	as a mutex shared with the open/close/hup paths
-	 */
-	set_bit(TTY_LDISC_CHANGING, &tty->flags);
+	work = tty_ldisc_halt(tty);
 	if (o_tty)
-		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
-	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	
-	/*
-	 *	From this point on we know nobody has an ldisc
-	 *	usage reference, nor can they obtain one until
-	 *	we say so later on.
-	 */
+		o_work = tty_ldisc_halt(o_tty);
 
-	work = cancel_delayed_work(&tty->buf.work);
 	/*
-	 * Wait for ->hangup_work and ->buf.work handlers to terminate
-	 * MUST NOT hold locks here.
+	 * Wait for ->hangup_work and ->buf.work handlers to terminate.
+	 * We must drop the mutex here in case a hangup is also in process.
 	 */
+
+	mutex_unlock(&tty->ldisc_mutex);
+
 	flush_scheduled_work();
+
+	/* Let any existing reference holders finish */
+	retval = tty_ldisc_wait_idle(tty);
+	if (retval < 0) {
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		tty_ldisc_put(new_ldisc);
+		return retval;
+	}
+
+	mutex_lock(&tty->ldisc_mutex);
+	if (test_bit(TTY_HUPPED, &tty->flags)) {
+		/* We were raced by the hangup method. It will have stomped
+		   the ldisc data and closed the ldisc down */
+		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_ldisc_put(new_ldisc);
+		return -EIO;
+	}
+
 	/* Shutdown the current discipline. */
-	if (o_ldisc.ops->close)
-		(o_ldisc.ops->close)(tty);
+	tty_ldisc_close(tty, o_ldisc);
 
 	/* Now set up the new line discipline. */
-	tty_ldisc_assign(tty, &new_ldisc);
+	tty_ldisc_assign(tty, new_ldisc);
 	tty_set_termios_ldisc(tty, ldisc);
-	if (new_ldisc.ops->open)
-		retval = (new_ldisc.ops->open)(tty);
+
+	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
-		tty_ldisc_put(new_ldisc.ops);
-		tty_ldisc_restore(tty, &o_ldisc);
+		/* Back to the old one or N_TTY if we can't */
+		tty_ldisc_put(new_ldisc);
+		tty_ldisc_restore(tty, o_ldisc);
 	}
+
 	/* At this point we hold a reference to the new ldisc and a
 	   a reference to the old ldisc. If we ended up flipping back
 	   to the existing ldisc we have two references to it */
 
-	if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc)
+	if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc)
 		tty->ops->set_ldisc(tty);
 
-	tty_ldisc_put(o_ldisc.ops);
+	tty_ldisc_put(o_ldisc);
 
 	/*
-	 *	Allow ldisc referencing to occur as soon as the driver
-	 *	ldisc callback completes.
+	 *	Allow ldisc referencing to occur again
 	 */
 
 	tty_ldisc_enable(tty);
 	if (o_tty)
 		tty_ldisc_enable(o_tty);
 
-	/* Restart it in case no characters kick it off. Safe if
+	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
 	if (work)
 		schedule_delayed_work(&tty->buf.work, 1);
+	if (o_work)
+		schedule_delayed_work(&o_tty->buf.work, 1);
+	mutex_unlock(&tty->ldisc_mutex);
 	return retval;
 }
 
+/**
+ *	tty_reset_termios	-	reset terminal state
+ *	@tty: tty to reset
+ *
+ *	Restore a terminal to the driver default state.
+ */
+
+static void tty_reset_termios(struct tty_struct *tty)
+{
+	mutex_lock(&tty->termios_mutex);
+	*tty->termios = tty->driver->init_termios;
+	tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+	mutex_unlock(&tty->termios_mutex);
+}
+
+
+/**
+ *	tty_ldisc_reinit	-	reinitialise the tty ldisc
+ *	@tty: tty to reinit
+ *
+ *	Switch the tty back to N_TTY line discipline and leave the
+ *	ldisc state closed
+ */
+
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	tty_ldisc_close(tty, tty->ldisc);
+	tty_ldisc_put(tty->ldisc);
+	tty->ldisc = NULL;
+	/*
+	 *	Switch the line discipline back
+	 */
+	ld = tty_ldisc_get(N_TTY);
+	BUG_ON(IS_ERR(ld));
+	tty_ldisc_assign(tty, ld);
+	tty_set_termios_ldisc(tty, N_TTY);
+}
+
+/**
+ *	tty_ldisc_hangup		-	hangup ldisc reset
+ *	@tty: tty being hung up
+ *
+ *	Some tty devices reset their termios when they receive a hangup
+ *	event. In that situation we must also switch back to N_TTY properly
+ *	before we reset the termios data.
+ *
+ *	Locking: We can take the ldisc mutex as the rest of the code is
+ *	careful to allow for this.
+ *
+ *	In the pty pair case this occurs in the close() path of the
+ *	tty itself so we must be careful about locking rules.
+ */
+
+void tty_ldisc_hangup(struct tty_struct *tty)
+{
+	struct tty_ldisc *ld;
+
+	/*
+	 * FIXME! What are the locking issues here? This may me overdoing
+	 * things... This question is especially important now that we've
+	 * removed the irqlock.
+	 */
+	ld = tty_ldisc_ref(tty);
+	if (ld != NULL) {
+		/* We may have no line discipline at this point */
+		if (ld->ops->flush_buffer)
+			ld->ops->flush_buffer(tty);
+		tty_driver_flush_buffer(tty);
+		if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+		    ld->ops->write_wakeup)
+			ld->ops->write_wakeup(tty);
+		if (ld->ops->hangup)
+			ld->ops->hangup(tty);
+		tty_ldisc_deref(ld);
+	}
+	/*
+	 * FIXME: Once we trust the LDISC code better we can wait here for
+	 * ldisc completion and fix the driver call race
+	 */
+	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+	wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+	/*
+	 * Shutdown the current line discipline, and reset it to
+	 * N_TTY.
+	 */
+	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+		/* Avoid racing set_ldisc */
+		mutex_lock(&tty->ldisc_mutex);
+		/* Switch back to N_TTY */
+		tty_ldisc_reinit(tty);
+		/* At this point we have a closed ldisc and we want to
+		   reopen it. We could defer this to the next open but
+		   it means auditing a lot of other paths so this is a FIXME */
+		WARN_ON(tty_ldisc_open(tty, tty->ldisc));
+		tty_ldisc_enable(tty);
+		mutex_unlock(&tty->ldisc_mutex);
+		tty_reset_termios(tty);
+	}
+}
 
 /**
  *	tty_ldisc_setup			-	open line discipline
@@ -654,24 +791,23 @@ restart:
  *	@o_tty: pair tty for pty/tty pairs
  *
  *	Called during the initial open of a tty/pty pair in order to set up the
- *	line discplines and bind them to the tty.
+ *	line disciplines and bind them to the tty. This has no locking issues
+ *	as the device isn't yet active.
  */
 
 int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-	struct tty_ldisc *ld = &tty->ldisc;
+	struct tty_ldisc *ld = tty->ldisc;
 	int retval;
 
-	if (ld->ops->open) {
-		retval = (ld->ops->open)(tty);
-		if (retval)
-			return retval;
-	}
-	if (o_tty && o_tty->ldisc.ops->open) {
-		retval = (o_tty->ldisc.ops->open)(o_tty);
+	retval = tty_ldisc_open(tty, ld);
+	if (retval)
+		return retval;
+
+	if (o_tty) {
+		retval = tty_ldisc_open(o_tty, o_tty->ldisc);
 		if (retval) {
-			if (ld->ops->close)
-				(ld->ops->close)(tty);
+			tty_ldisc_close(tty, ld);
 			return retval;
 		}
 		tty_ldisc_enable(o_tty);
@@ -679,34 +815,18 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 	tty_ldisc_enable(tty);
 	return 0;
 }
-
-static void tty_ldisc_reinit(struct tty_struct *tty)
-{
-	struct tty_ldisc ld;
-
-	if (tty->ldisc.ops->close)
-		(tty->ldisc.ops->close)(tty);
-	tty_ldisc_put(tty->ldisc.ops);
-	/*
-	 *	Switch the line discipline back
-	 */
-	WARN_ON(tty_ldisc_get(N_TTY, &ld));
-	tty_ldisc_assign(tty, &ld);
-	tty_set_termios_ldisc(tty, N_TTY);
-}
-
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
  *	@o_tty: pair tty for pty/tty pairs
  *
  *	Called during the final close of a tty/pty pair in order to shut down the
- *	line discpline layer.
+ *	line discpline layer. On exit the ldisc assigned is N_TTY and the
+ *	ldisc has not been opened.
  */
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-
 	/*
 	 * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
 	 * kill any delayed work. As this is the final close it does not
@@ -714,6 +834,7 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 	 */
 
 	tty_ldisc_halt(tty);
+	flush_scheduled_work();
 
 	/*
 	 * Wait for any short term users (we know they are just driver
@@ -730,11 +851,9 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 	 */
 
 	tty_ldisc_reinit(tty);
-	if (o_tty) {
-		/* FIXME: could o_tty be in setldisc here ? */
-		clear_bit(TTY_LDISC, &o_tty->flags);
-		tty_ldisc_reinit(o_tty);
-	}
+	/* This will need doing differently if we need to lock */
+	if (o_tty)
+		tty_ldisc_release(o_tty, NULL);
 }
 
 /**
@@ -747,10 +866,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 
 void tty_ldisc_init(struct tty_struct *tty)
 {
-	struct tty_ldisc ld;
-	if (tty_ldisc_get(N_TTY, &ld) < 0)
+	struct tty_ldisc *ld = tty_ldisc_get(N_TTY);
+	if (IS_ERR(ld))
 		panic("n_tty: init_tty");
-	tty_ldisc_assign(tty, &ld);
+	tty_ldisc_assign(tty, ld);
 }
 
 void tty_ldisc_begin(void)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f9c13c83790..1488d8c81aa 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -226,8 +226,11 @@ struct tty_struct {
 	struct tty_driver *driver;
 	const struct tty_operations *ops;
 	int index;
-	/* The ldisc objects are protected by tty_ldisc_lock at the moment */
-	struct tty_ldisc ldisc;
+
+	/* Protects ldisc changes: Lock tty not pty */
+	struct mutex ldisc_mutex;
+	struct tty_ldisc *ldisc;
+
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -314,6 +317,7 @@ struct tty_struct {
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
 #define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
+#define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
@@ -406,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b);
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
+extern void tty_ldisc_hangup(struct tty_struct *tty);
 extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
-- 
cgit v1.2.3-70-g09d2


From 08e0992f60ad44025a8a8b8a821838ca4a562686 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Thu, 11 Jun 2009 13:21:24 +0100
Subject: serial: add support for the TI AR7 internal UART

This patch adds support for the TI AR7 internal UART.

Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250.c       | 7 +++++++
 include/linux/serial_core.h | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index a0127e93ade..fb867a9f55e 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_AR7] = {
+		.name		= "AR7",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 57a97e52e58..48766ea845c 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -41,7 +41,8 @@
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
-#define PORT_MAX_8250	17	/* max port ID */
+#define PORT_AR7	18	/* Texas Instruments AR7 internal UART */
+#define PORT_MAX_8250	18	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
-- 
cgit v1.2.3-70-g09d2


From 34aec591847c696339189b070cce2a11f901cfea Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Date: Thu, 11 Jun 2009 14:05:39 +0100
Subject: serial: Added Timberdale UART driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Driver for the UART found in the Timberdale FPGA

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/Kconfig      |   7 +
 drivers/serial/Makefile     |   1 +
 drivers/serial/timbuart.c   | 520 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/serial/timbuart.h   |  58 +++++
 include/linux/serial_core.h |   3 +
 5 files changed, 589 insertions(+)
 create mode 100644 drivers/serial/timbuart.c
 create mode 100644 drivers/serial/timbuart.h

(limited to 'include')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 343e3a35b6a..3391ef0d761 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1433,4 +1433,11 @@ config SPORT_BAUD_RATE
 	default 19200 if (SERIAL_SPORT_BAUD_RATE_19200)
 	default 9600 if (SERIAL_SPORT_BAUD_RATE_9600)
 
+config SERIAL_TIMBERDALE
+	tristate "Support for timberdale UART"
+	depends on MFD_TIMBERDALE
+	select SERIAL_CORE
+	---help---
+	Add support for UART controller on timberdale.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index d438eb2a73d..45a8658f54d 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
+obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
new file mode 100644
index 00000000000..30ba3c4cc18
--- /dev/null
+++ b/drivers/serial/timbuart.c
@@ -0,0 +1,520 @@
+/*
+ * timbuart.c timberdale FPGA UART driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/serial_core.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+
+#include "timbuart.h"
+
+struct timbuart_port {
+	struct uart_port	port;
+	struct tasklet_struct	tasklet;
+	int			usedma;
+	u8			last_ier;
+	struct platform_device  *dev;
+};
+
+static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800,
+	921600, 1843200, 3250000};
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier);
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid);
+
+static void timbuart_stop_rx(struct uart_port *port)
+{
+	/* spin lock held by upper layer, disable all RX interrupts */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_stop_tx(struct uart_port *port)
+{
+	/* spinlock held by upper layer, disable TX interrupt */
+	u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE;
+	iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_start_tx(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	/* do not transfer anything here -> fire off the tasklet */
+	tasklet_schedule(&uart->tasklet);
+}
+
+static void timbuart_flush_buffer(struct uart_port *port)
+{
+	u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX;
+
+	iowrite8(ctl, port->membase + TIMBUART_CTRL);
+	iowrite8(TXBF, port->membase + TIMBUART_ISR);
+}
+
+static void timbuart_rx_chars(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+
+	while (ioread8(port->membase + TIMBUART_ISR) & RXDP) {
+		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
+		port->icount.rx++;
+		tty_insert_flip_char(tty, ch, TTY_NORMAL);
+	}
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(port->info->port.tty);
+	spin_lock(&port->lock);
+
+	dev_dbg(port->dev, "%s - total read %d bytes\n",
+		__func__, port->icount.rx);
+}
+
+static void timbuart_tx_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->info->xmit;
+
+	while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) &&
+		!uart_circ_empty(xmit)) {
+		iowrite8(xmit->buf[xmit->tail],
+			port->membase + TIMBUART_TXFIFO);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	dev_dbg(port->dev,
+		"%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n",
+		 __func__,
+		port->icount.tx,
+		ioread8(port->membase + TIMBUART_CTRL),
+		port->mctrl & TIOCM_RTS,
+		ioread8(port->membase + TIMBUART_BAUDRATE));
+}
+
+static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	struct circ_buf *xmit = &port->info->xmit;
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
+		return;
+
+	if (port->x_char)
+		return;
+
+	if (isr & TXFLAGS) {
+		timbuart_tx_chars(port);
+		/* clear all TX interrupts */
+		iowrite8(TXFLAGS, port->membase + TIMBUART_ISR);
+
+		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+			uart_write_wakeup(port);
+	} else
+		/* Re-enable any tx interrupt */
+		*ier |= uart->last_ier & TXFLAGS;
+
+	/* enable interrupts if there are chars in the transmit buffer,
+	 * Or if we delivered some bytes and want the almost empty interrupt
+	 * we wake up the upper layer later when we got the interrupt
+	 * to give it some time to go out...
+	 */
+	if (!uart_circ_empty(xmit))
+		*ier |= TXBAE;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+	if (isr & RXFLAGS) {
+		/* Some RX status is set */
+		if (isr & RXBF) {
+			u8 ctl = ioread8(port->membase + TIMBUART_CTRL) |
+				TIMBUART_CTRL_FLSHRX;
+			iowrite8(ctl, port->membase + TIMBUART_CTRL);
+			port->icount.overrun++;
+		} else if (isr & (RXDP))
+			timbuart_rx_chars(port);
+
+		/* ack all RX interrupts */
+		iowrite8(RXFLAGS, port->membase + TIMBUART_ISR);
+	}
+
+	/* always have the RX interrupts enabled */
+	*ier |= RXBAF | RXBF | RXTT;
+
+	dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_tasklet(unsigned long arg)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)arg;
+	u8 isr, ier = 0;
+
+	spin_lock(&uart->port.lock);
+
+	isr = ioread8(uart->port.membase + TIMBUART_ISR);
+	dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr);
+
+	if (!uart->usedma)
+		timbuart_handle_tx_port(&uart->port, isr, &ier);
+
+	timbuart_mctrl_check(&uart->port, isr, &ier);
+
+	if (!uart->usedma)
+		timbuart_handle_rx_port(&uart->port, isr, &ier);
+
+	iowrite8(ier, uart->port.membase + TIMBUART_IER);
+
+	spin_unlock(&uart->port.lock);
+	dev_dbg(uart->port.dev, "%s leaving\n", __func__);
+}
+
+static unsigned int timbuart_tx_empty(struct uart_port *port)
+{
+	u8 isr = ioread8(port->membase + TIMBUART_ISR);
+
+	return (isr & TXBAE) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int timbuart_get_mctrl(struct uart_port *port)
+{
+	u8 cts = ioread8(port->membase + TIMBUART_CTRL);
+	dev_dbg(port->dev, "%s - cts %x\n", __func__, cts);
+
+	if (cts & TIMBUART_CTRL_CTS)
+		return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+	else
+		return TIOCM_DSR | TIOCM_CAR;
+}
+
+static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	dev_dbg(port->dev, "%s - %x\n", __func__, mctrl);
+
+	if (mctrl & TIOCM_RTS)
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+	else
+		iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+}
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier)
+{
+	unsigned int cts;
+
+	if (isr & CTS_DELTA) {
+		/* ack */
+		iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR);
+		cts = timbuart_get_mctrl(port);
+		uart_handle_cts_change(port, cts & TIOCM_CTS);
+		wake_up_interruptible(&port->info->delta_msr_wait);
+	}
+
+	*ier |= CTS_DELTA;
+}
+
+static void timbuart_enable_ms(struct uart_port *port)
+{
+	/* N/A */
+}
+
+static void timbuart_break_ctl(struct uart_port *port, int ctl)
+{
+	/* N/A */
+}
+
+static int timbuart_startup(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+
+	dev_dbg(port->dev, "%s\n", __func__);
+
+	iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL);
+	iowrite8(0xff, port->membase + TIMBUART_ISR);
+	/* Enable all but TX interrupts */
+	iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA,
+		port->membase + TIMBUART_IER);
+
+	return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED,
+		"timb-uart", uart);
+}
+
+static void timbuart_shutdown(struct uart_port *port)
+{
+	struct timbuart_port *uart =
+		container_of(port, struct timbuart_port, port);
+	dev_dbg(port->dev, "%s\n", __func__);
+	free_irq(port->irq, uart);
+	iowrite8(0, port->membase + TIMBUART_IER);
+}
+
+static int get_bindex(int baud)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(baudrates); i++)
+		if (baud == baudrates[i])
+			return i;
+
+	return -1;
+}
+
+static void timbuart_set_termios(struct uart_port *port,
+	struct ktermios *termios,
+	struct ktermios *old)
+{
+	unsigned int baud;
+	short bindex;
+	unsigned long flags;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+	bindex = get_bindex(baud);
+	dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
+
+	if (bindex < 0) {
+		printk(KERN_ALERT "timbuart: Unsupported baud rate\n");
+	} else {
+		spin_lock_irqsave(&port->lock, flags);
+		iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
+		uart_update_timeout(port, termios->c_cflag, baud);
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+}
+
+static const char *timbuart_type(struct uart_port *port)
+{
+	return port->type == PORT_UNKNOWN ? "timbuart" : NULL;
+}
+
+/* We do not request/release mappings of the registers here,
+ * currently it's done in the proble function.
+ */
+static void timbuart_release_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (port->flags & UPF_IOREMAP) {
+		iounmap(port->membase);
+		port->membase = NULL;
+	}
+
+	release_mem_region(port->mapbase, size);
+}
+
+static int timbuart_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	int size =
+		resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+	if (!request_mem_region(port->mapbase, size, "timb-uart"))
+		return -EBUSY;
+
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = ioremap(port->mapbase, size);
+		if (port->membase == NULL) {
+			release_mem_region(port->mapbase, size);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid)
+{
+	struct timbuart_port *uart = (struct timbuart_port *)devid;
+
+	if (ioread8(uart->port.membase + TIMBUART_IPR)) {
+		uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER);
+
+		/* disable interrupts, the tasklet enables them again */
+		iowrite8(0, uart->port.membase + TIMBUART_IER);
+
+		/* fire off bottom half */
+		tasklet_schedule(&uart->tasklet);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * Configure/autoconfigure the port.
+ */
+static void timbuart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_TIMBUART;
+		timbuart_request_port(port);
+	}
+}
+
+static int timbuart_verify_port(struct uart_port *port,
+	struct serial_struct *ser)
+{
+	/* we don't want the core code to modify any port params */
+	return -EINVAL;
+}
+
+static struct uart_ops timbuart_ops = {
+	.tx_empty = timbuart_tx_empty,
+	.set_mctrl = timbuart_set_mctrl,
+	.get_mctrl = timbuart_get_mctrl,
+	.stop_tx = timbuart_stop_tx,
+	.start_tx = timbuart_start_tx,
+	.flush_buffer = timbuart_flush_buffer,
+	.stop_rx = timbuart_stop_rx,
+	.enable_ms = timbuart_enable_ms,
+	.break_ctl = timbuart_break_ctl,
+	.startup = timbuart_startup,
+	.shutdown = timbuart_shutdown,
+	.set_termios = timbuart_set_termios,
+	.type = timbuart_type,
+	.release_port = timbuart_release_port,
+	.request_port = timbuart_request_port,
+	.config_port = timbuart_config_port,
+	.verify_port = timbuart_verify_port
+};
+
+static struct uart_driver timbuart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "timberdale_uart",
+	.dev_name = "ttyTU",
+	.major = TIMBUART_MAJOR,
+	.minor = TIMBUART_MINOR,
+	.nr = 1
+};
+
+static int timbuart_probe(struct platform_device *dev)
+{
+	int err;
+	struct timbuart_port *uart;
+	struct resource *iomem;
+
+	dev_dbg(&dev->dev, "%s\n", __func__);
+
+	uart = kzalloc(sizeof(*uart), GFP_KERNEL);
+	if (!uart) {
+		err = -EINVAL;
+		goto err_mem;
+	}
+
+	uart->usedma = 0;
+
+	uart->port.uartclk = 3250000 * 16;
+	uart->port.fifosize  = TIMBUART_FIFO_SIZE;
+	uart->port.regshift  = 2;
+	uart->port.iotype  = UPIO_MEM;
+	uart->port.ops = &timbuart_ops;
+	uart->port.irq = 0;
+	uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
+	uart->port.line  = 0;
+	uart->port.dev	= &dev->dev;
+
+	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		err = -ENOMEM;
+		goto err_register;
+	}
+	uart->port.mapbase = iomem->start;
+	uart->port.membase = NULL;
+
+	uart->port.irq = platform_get_irq(dev, 0);
+	if (uart->port.irq < 0) {
+		err = -EINVAL;
+		goto err_register;
+	}
+
+	tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart);
+
+	err = uart_register_driver(&timbuart_driver);
+	if (err)
+		goto err_register;
+
+	err = uart_add_one_port(&timbuart_driver, &uart->port);
+	if (err)
+		goto err_add_port;
+
+	platform_set_drvdata(dev, uart);
+
+	return 0;
+
+err_add_port:
+	uart_unregister_driver(&timbuart_driver);
+err_register:
+	kfree(uart);
+err_mem:
+	printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n",
+		err);
+
+	return err;
+}
+
+static int timbuart_remove(struct platform_device *dev)
+{
+	struct timbuart_port *uart = platform_get_drvdata(dev);
+
+	tasklet_kill(&uart->tasklet);
+	uart_remove_one_port(&timbuart_driver, &uart->port);
+	uart_unregister_driver(&timbuart_driver);
+	kfree(uart);
+
+	return 0;
+}
+
+static struct platform_driver timbuart_platform_driver = {
+	.driver = {
+		.name	= "timb-uart",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= timbuart_probe,
+	.remove		= timbuart_remove,
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __init timbuart_init(void)
+{
+	return platform_driver_register(&timbuart_platform_driver);
+}
+
+static void __exit timbuart_exit(void)
+{
+	platform_driver_unregister(&timbuart_platform_driver);
+}
+
+module_init(timbuart_init);
+module_exit(timbuart_exit);
+
+MODULE_DESCRIPTION("Timberdale UART driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:timb-uart");
+
diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h
new file mode 100644
index 00000000000..7e566766bc4
--- /dev/null
+++ b/drivers/serial/timbuart.h
@@ -0,0 +1,58 @@
+/*
+ * timbuart.c timberdale FPGA GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#ifndef _TIMBUART_H
+#define _TIMBUART_H
+
+#define TIMBUART_FIFO_SIZE	2048
+
+#define TIMBUART_RXFIFO		0x08
+#define TIMBUART_TXFIFO		0x0c
+#define TIMBUART_IER		0x10
+#define TIMBUART_IPR		0x14
+#define TIMBUART_ISR		0x18
+#define TIMBUART_CTRL		0x1c
+#define TIMBUART_BAUDRATE	0x20
+
+#define TIMBUART_CTRL_RTS	0x01
+#define TIMBUART_CTRL_CTS	0x02
+#define TIMBUART_CTRL_FLSHTX	0x40
+#define TIMBUART_CTRL_FLSHRX	0x80
+
+#define TXBF		0x01
+#define TXBAE		0x02
+#define CTS_DELTA	0x04
+#define RXDP		0x08
+#define RXBAF		0x10
+#define RXBF		0x20
+#define RXTT		0x40
+#define RXBNAE		0x80
+#define TXBE		0x100
+
+#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE)
+#define TXFLAGS (TXBF | TXBAE)
+
+#define TIMBUART_MAJOR 204
+#define TIMBUART_MINOR 192
+
+#endif /* _TIMBUART_H */
+
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 48766ea845c..6fd80c4243f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -168,6 +168,9 @@
 /* MAX3100 */
 #define PORT_MAX3100    86
 
+/* Timberdale UART */
+#define PORT_TIMBUART	87
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3-70-g09d2


From 8759ef32d992fc6c0bcbe40fca7aa302190918a5 Mon Sep 17 00:00:00 2001
From: Oskar Schirmer <os@emlix.com>
Date: Thu, 11 Jun 2009 14:51:15 +0100
Subject: lib: isolate rational fractions helper function

Provide a helper function to determine optimum numerator
denominator value pairs taking into account restricted
register size. Useful especially with PLL and other clock
configurations.

Signed-off-by: Oskar Schirmer <os@emlix.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rational.h | 19 +++++++++++++++
 lib/Kconfig              |  3 +++
 lib/Makefile             |  1 +
 lib/rational.c           | 62 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 85 insertions(+)
 create mode 100644 include/linux/rational.h
 create mode 100644 lib/rational.c

(limited to 'include')

diff --git a/include/linux/rational.h b/include/linux/rational.h
new file mode 100644
index 00000000000..4f532fcd9ee
--- /dev/null
+++ b/include/linux/rational.h
@@ -0,0 +1,19 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers,
+ * e.g. when calculating optimum numerator/denominator pairs for
+ * pll configuration taking into account restricted register size
+ */
+
+#ifndef _LINUX_RATIONAL_H
+#define _LINUX_RATIONAL_H
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator);
+
+#endif /* _LINUX_RATIONAL_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 8ade0a7a91e..9960be04cbb 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -10,6 +10,9 @@ menu "Library routines"
 config BITREVERSE
 	tristate
 
+config RATIONAL
+	boolean
+
 config GENERIC_FIND_FIRST_BIT
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e40e3e..1f6edefebff 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
 endif
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644
index 00000000000..b3c099b5478
--- /dev/null
+++ b/lib/rational.c
@@ -0,0 +1,62 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ *		(1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator)
+{
+	unsigned long n, d, n0, d0, n1, d1;
+	n = given_numerator;
+	d = given_denominator;
+	n0 = d1 = 0;
+	n1 = d0 = 1;
+	for (;;) {
+		unsigned long t, a;
+		if ((n1 > max_numerator) || (d1 > max_denominator)) {
+			n1 = n0;
+			d1 = d0;
+			break;
+		}
+		if (d == 0)
+			break;
+		t = d;
+		a = n / d;
+		d = n % d;
+		n = t;
+		t = n0 + a * n1;
+		n0 = n1;
+		n1 = t;
+		t = d0 + a * d1;
+		d0 = d1;
+		d1 = t;
+	}
+	*best_numerator = n1;
+	*best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
-- 
cgit v1.2.3-70-g09d2


From 1c432d899d32d36371ee4ee310fa3609cf0e5742 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 13:19:29 +0200
Subject: perf_counter: Rename enums

Rename the perf enums to be in the 'perf_' namespace and strictly
enumerate the ABI bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 53 +++++++++++++++++++++-----------------------
 kernel/perf_counter.c        |  6 ++---
 2 files changed, 28 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 95c797c480e..d5911b02bc8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,24 +24,21 @@
 /*
  * attr.type
  */
-enum perf_event_types {
+enum perf_type_id {
 	PERF_TYPE_HARDWARE		= 0,
 	PERF_TYPE_SOFTWARE		= 1,
 	PERF_TYPE_TRACEPOINT		= 2,
 	PERF_TYPE_HW_CACHE		= 3,
+	PERF_TYPE_RAW			= 4,
 
-	/*
-	 * available TYPE space, raw is the max value.
-	 */
-
-	PERF_TYPE_RAW			= 128,
+	PERF_TYPE_MAX,			/* non ABI */
 };
 
 /*
  * Generalized performance counter event types, used by the attr.event_id
  * parameter of the sys_perf_counter_open() syscall:
  */
-enum attr_ids {
+enum perf_hw_id {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
@@ -53,7 +50,7 @@ enum attr_ids {
 	PERF_COUNT_BRANCH_MISSES	= 5,
 	PERF_COUNT_BUS_CYCLES		= 6,
 
-	PERF_HW_EVENTS_MAX		= 7,
+	PERF_HW_EVENTS_MAX,		/* non ABI */
 };
 
 /*
@@ -63,30 +60,30 @@ enum attr_ids {
  *       { read, write, prefetch } x
  *       { accesses, misses }
  */
-enum hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D,
-	PERF_COUNT_HW_CACHE_L1I,
-	PERF_COUNT_HW_CACHE_L2,
-	PERF_COUNT_HW_CACHE_DTLB,
-	PERF_COUNT_HW_CACHE_ITLB,
-	PERF_COUNT_HW_CACHE_BPU,
-
-	PERF_COUNT_HW_CACHE_MAX,
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D		= 0,
+	PERF_COUNT_HW_CACHE_L1I		= 1,
+	PERF_COUNT_HW_CACHE_L2		= 2,
+	PERF_COUNT_HW_CACHE_DTLB	= 3,
+	PERF_COUNT_HW_CACHE_ITLB	= 4,
+	PERF_COUNT_HW_CACHE_BPU		= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,	/* non ABI */
 };
 
-enum hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ,
-	PERF_COUNT_HW_CACHE_OP_WRITE,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH,
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ	= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE	= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH	= 2,
 
-	PERF_COUNT_HW_CACHE_OP_MAX,
+	PERF_COUNT_HW_CACHE_OP_MAX,	/* non ABI */
 };
 
-enum hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS,
-	PERF_COUNT_HW_CACHE_RESULT_MISS,
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
 
-	PERF_COUNT_HW_CACHE_RESULT_MAX,
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non ABI */
 };
 
 /*
@@ -95,7 +92,7 @@ enum hw_cache_op_result_id {
  * physical and sw events of the kernel (and allow the profiling of them as
  * well):
  */
-enum sw_event_ids {
+enum perf_sw_ids {
 	PERF_COUNT_CPU_CLOCK		= 0,
 	PERF_COUNT_TASK_CLOCK		= 1,
 	PERF_COUNT_PAGE_FAULTS		= 2,
@@ -104,7 +101,7 @@ enum sw_event_ids {
 	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
 	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
 
-	PERF_SW_EVENTS_MAX		= 7,
+	PERF_SW_EVENTS_MAX,		/* non ABI */
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b2829de559..c02535bed26 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3162,7 +3162,7 @@ static int perf_swcounter_is_counting(struct perf_counter *counter)
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,
-				enum perf_event_types type,
+				enum perf_type_id type,
 				u32 event, struct pt_regs *regs)
 {
 	if (!perf_swcounter_is_counting(counter))
@@ -3194,7 +3194,7 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum perf_event_types type, u32 event,
+				     enum perf_type_id type, u32 event,
 				     u64 nr, int nmi, struct pt_regs *regs,
 				     u64 addr)
 {
@@ -3225,7 +3225,7 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 	return &cpuctx->recursion[0];
 }
 
-static void __perf_swcounter_event(enum perf_event_types type, u32 event,
+static void __perf_swcounter_event(enum perf_type_id type, u32 event,
 				   u64 nr, int nmi, struct pt_regs *regs,
 				   u64 addr)
 {
-- 
cgit v1.2.3-70-g09d2


From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 14:06:28 +0200
Subject: perf_counter: Standardize event names

Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/power4-pmu.c   | 12 +++++------
 arch/powerpc/kernel/power5+-pmu.c  | 12 +++++------
 arch/powerpc/kernel/power5-pmu.c   | 12 +++++------
 arch/powerpc/kernel/power6-pmu.c   | 12 +++++------
 arch/powerpc/kernel/ppc970-pmu.c   | 12 +++++------
 arch/powerpc/mm/fault.c            |  6 +++---
 arch/x86/kernel/cpu/perf_counter.c | 32 +++++++++++++--------------
 arch/x86/mm/fault.c                |  6 +++---
 include/linux/perf_counter.h       | 36 +++++++++++++++----------------
 kernel/perf_counter.c              | 20 ++++++++---------
 tools/perf/builtin-record.c        |  4 ++--
 tools/perf/builtin-stat.c          | 31 ++++++++++++++-------------
 tools/perf/builtin-top.c           |  4 ++--
 tools/perf/design.txt              | 28 ++++++++++++------------
 tools/perf/util/parse-events.c     | 44 +++++++++++++++++++-------------------
 15 files changed, 136 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 0e94b685722..73956f084b2 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -535,12 +535,12 @@ static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int p4_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 7,
-	[PERF_COUNT_INSTRUCTIONS] = 0x1001,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x8c10,		/* PM_LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3c10,		/* PM_LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330,	/* PM_BR_ISSUED */
-	[PERF_COUNT_BRANCH_MISSES] = 0x331,		/* PM_BR_MPRED_CR */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x1001,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8c10, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c10, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x330,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x331,  /* PM_BR_MPRED_CR */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index bbf2cbb0738..5f8b7741e97 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -606,12 +606,12 @@ static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power5p_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0xf,
-	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8,	/* LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
-	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x1c10a8, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 670cf10b91e..d54723ab627 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -548,12 +548,12 @@ static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power5_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0xf,
-	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x4c1090,	/* LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
-	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0xf,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x100009,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4c1090, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c1088, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x230e4,  /* BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x230e5,  /* BR_MPRED_CR */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 4da70786609..0cd406ee765 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -466,12 +466,12 @@ static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int power6_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 0x1e,
-	[PERF_COUNT_INSTRUCTIONS] = 2,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x280030,	/* LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x30000c,		/* LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0,	/* BR_PRED */ 
-	[PERF_COUNT_BRANCH_MISSES] = 0x400052,		/* BR_MPRED */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x1e,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 2,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x280030, /* LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x30000c, /* LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x410a0,  /* BR_PRED */
+	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x400052, /* BR_MPRED */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 336adf1736a..46a20640942 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -419,12 +419,12 @@ static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
 }
 
 static int ppc970_generic_events[] = {
-	[PERF_COUNT_CPU_CYCLES] = 7,
-	[PERF_COUNT_INSTRUCTIONS] = 1,
-	[PERF_COUNT_CACHE_REFERENCES] = 0x8810,		/* PM_LD_REF_L1 */
-	[PERF_COUNT_CACHE_MISSES] = 0x3810,		/* PM_LD_MISS_L1 */
-	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431,	/* PM_BR_ISSUED */
-	[PERF_COUNT_BRANCH_MISSES] = 0x327,		/* PM_GRP_BR_MPRED */
+	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8810, /* PM_LD_REF_L1 */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3810, /* PM_LD_MISS_L1 */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x431,  /* PM_BR_ISSUED */
+	[PERF_COUNT_HW_BRANCH_MISSES] 		= 0x327,  /* PM_GRP_BR_MPRED */
 };
 
 #define C(x)	PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ac0e112031b..5beffc8f481 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -323,7 +323,7 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 57ae1bec81b..572fb434a66 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -69,13 +69,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
  */
 static const u64 intel_perfmon_event_map[] =
 {
-  [PERF_COUNT_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
-  [PERF_COUNT_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_BUS_CYCLES]		= 0x013c,
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
 static u64 intel_pmu_event_map(int event)
@@ -485,12 +485,12 @@ static const u64 amd_0f_hw_cache_event_ids
  */
 static const u64 amd_perfmon_event_map[] =
 {
-  [PERF_COUNT_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_CACHE_REFERENCES]		= 0x0080,
-  [PERF_COUNT_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
 };
 
 static u64 amd_pmu_event_map(int event)
@@ -970,11 +970,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
 
 	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES)))
+	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6f9df2babe4..5c6d816f30b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1142,11 +1142,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index d5911b02bc8..887df88a9c2 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -42,15 +42,15 @@ enum perf_hw_id {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CPU_CYCLES		= 0,
-	PERF_COUNT_INSTRUCTIONS		= 1,
-	PERF_COUNT_CACHE_REFERENCES	= 2,
-	PERF_COUNT_CACHE_MISSES		= 3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_BRANCH_MISSES	= 5,
-	PERF_COUNT_BUS_CYCLES		= 6,
-
-	PERF_HW_EVENTS_MAX,		/* non ABI */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,		/* non ABI */
 };
 
 /*
@@ -93,15 +93,15 @@ enum perf_hw_cache_op_result_id {
  * well):
  */
 enum perf_sw_ids {
-	PERF_COUNT_CPU_CLOCK		= 0,
-	PERF_COUNT_TASK_CLOCK		= 1,
-	PERF_COUNT_PAGE_FAULTS		= 2,
-	PERF_COUNT_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
-
-	PERF_SW_EVENTS_MAX,		/* non ABI */
+	PERF_COUNT_SW_CPU_CLOCK		= 0,
+	PERF_COUNT_SW_TASK_CLOCK	= 1,
+	PERF_COUNT_SW_PAGE_FAULTS	= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
+
+	PERF_COUNT_SW_MAX,		/* non ABI */
 };
 
 /*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c02535bed26..8859b97390e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1024,7 +1024,7 @@ void perf_counter_task_sched_out(struct task_struct *task,
 	int do_switch = 1;
 
 	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
+	perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
 
 	if (likely(!ctx || !cpuctx->task_ctx))
 		return;
@@ -3411,13 +3411,13 @@ void perf_counter_task_migration(struct task_struct *task, int cpu)
 	struct perf_counter_context *ctx;
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
-				 PERF_COUNT_CPU_MIGRATIONS,
+				 PERF_COUNT_SW_CPU_MIGRATIONS,
 				 1, 1, NULL, 0);
 
 	ctx = perf_pin_task_context(task);
 	if (ctx) {
 		perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
-					 PERF_COUNT_CPU_MIGRATIONS,
+					 PERF_COUNT_SW_CPU_MIGRATIONS,
 					 1, 1, NULL, 0);
 		perf_unpin_context(ctx);
 	}
@@ -3475,11 +3475,11 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * events.
 	 */
 	switch (counter->attr.config) {
-	case PERF_COUNT_CPU_CLOCK:
+	case PERF_COUNT_SW_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
 		break;
-	case PERF_COUNT_TASK_CLOCK:
+	case PERF_COUNT_SW_TASK_CLOCK:
 		/*
 		 * If the user instantiates this as a per-cpu counter,
 		 * use the cpu_clock counter instead.
@@ -3490,11 +3490,11 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 			pmu = &perf_ops_cpu_clock;
 
 		break;
-	case PERF_COUNT_PAGE_FAULTS:
-	case PERF_COUNT_PAGE_FAULTS_MIN:
-	case PERF_COUNT_PAGE_FAULTS_MAJ:
-	case PERF_COUNT_CONTEXT_SWITCHES:
-	case PERF_COUNT_CPU_MIGRATIONS:
+	case PERF_COUNT_SW_PAGE_FAULTS:
+	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+	case PERF_COUNT_SW_CONTEXT_SWITCHES:
+	case PERF_COUNT_SW_CPU_MIGRATIONS:
 		pmu = &perf_ops_generic;
 		break;
 	}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 84cd336ae79..29259e74dcf 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -378,12 +378,12 @@ try_again:
 		 * is always available even if no PMU support:
 		 */
 		if (attr->type == PERF_TYPE_HARDWARE
-			&& attr->config == PERF_COUNT_CPU_CYCLES) {
+			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 
 			if (verbose)
 				warning(" ... trying to fall back to cpu-clock-ticks\n");
 			attr->type = PERF_TYPE_SOFTWARE;
-			attr->config = PERF_COUNT_CPU_CLOCK;
+			attr->config = PERF_COUNT_SW_CPU_CLOCK;
 			goto try_again;
 		}
 		printf("\n");
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6404906924f..c43e4a97dc4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,15 +46,16 @@
 
 static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
 
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK		},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS	},
-
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS	},
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS	},
+
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES	},
+
 };
 
 static int			system_wide			=  0;
@@ -120,10 +121,10 @@ static inline int nsec_counter(int counter)
 	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
 		return 0;
 
-	if (attrs[counter].config == PERF_COUNT_CPU_CLOCK)
+	if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
 		return 1;
 
-	if (attrs[counter].config == PERF_COUNT_TASK_CLOCK)
+	if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
 		return 1;
 
 	return 0;
@@ -176,10 +177,10 @@ static void read_counter(int counter)
 	 * Save the full runtime - to allow normalization during printout:
 	 */
 	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_TASK_CLOCK)
+		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
 		runtime_nsecs = count[0];
 	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
-		attrs[counter].config == PERF_COUNT_CPU_CYCLES)
+		attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
 		runtime_cycles = count[0];
 }
 
@@ -206,7 +207,7 @@ static void print_counter(int counter)
 		fprintf(stderr, " %14.6f  %-20s",
 			msecs, event_name(counter));
 		if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-			attrs[counter].config == PERF_COUNT_TASK_CLOCK) {
+			attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
 
 			if (walltime_nsecs)
 				fprintf(stderr, " # %11.3f CPU utilization factor",
@@ -220,7 +221,7 @@ static void print_counter(int counter)
 				(double)count[0]/runtime_nsecs*1000.0);
 		if (runtime_cycles &&
 			attrs[counter].type == PERF_TYPE_HARDWARE &&
-				attrs[counter].config == PERF_COUNT_INSTRUCTIONS) {
+				attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
 
 			fprintf(stderr, " # %1.3f per cycle",
 				(double)count[0] / (double)runtime_cycles);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 309dbc76ec8..fe338d3c5d7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -562,13 +562,13 @@ try_again:
 		 * is always available even if no PMU support:
 		 */
 		if (attr->type == PERF_TYPE_HARDWARE
-			&& attr->config == PERF_COUNT_CPU_CYCLES) {
+			&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
 
 			if (verbose)
 				warning(" ... trying to fall back to cpu-clock-ticks\n");
 
 			attr->type = PERF_TYPE_SOFTWARE;
-			attr->config = PERF_COUNT_CPU_CLOCK;
+			attr->config = PERF_COUNT_SW_CPU_CLOCK;
 			goto try_again;
 		}
 		printf("\n");
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index d3250763dc9..860e116d979 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -99,13 +99,13 @@ enum hw_event_ids {
 	/*
 	 * Common hardware events, generalized by the kernel:
 	 */
-	PERF_COUNT_CPU_CYCLES		= 0,
-	PERF_COUNT_INSTRUCTIONS		= 1,
-	PERF_COUNT_CACHE_REFERENCES	= 2,
-	PERF_COUNT_CACHE_MISSES		= 3,
-	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_BRANCH_MISSES	= 5,
-	PERF_COUNT_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES	= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES	= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
 };
 
 These are standardized types of events that work relatively uniformly
@@ -130,13 +130,13 @@ software events, selected by 'event_id':
  * well):
  */
 enum sw_event_ids {
-	PERF_COUNT_CPU_CLOCK		= 0,
-	PERF_COUNT_TASK_CLOCK		= 1,
-	PERF_COUNT_PAGE_FAULTS		= 2,
-	PERF_COUNT_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
+	PERF_COUNT_SW_CPU_CLOCK		= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
 };
 
 Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f18a9a006e1..9d5f1ca50e6 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -22,26 +22,26 @@ struct event_symbol {
 #define CR(x, y) .type = PERF_TYPE_##x, .config = y
 
 static struct event_symbol event_symbols[] = {
-  { C(HARDWARE, CPU_CYCLES),		"cpu-cycles",		},
-  { C(HARDWARE, CPU_CYCLES),		"cycles",		},
-  { C(HARDWARE, INSTRUCTIONS),		"instructions",		},
-  { C(HARDWARE, CACHE_REFERENCES),	"cache-references",	},
-  { C(HARDWARE, CACHE_MISSES),		"cache-misses",		},
-  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-  { C(HARDWARE, BRANCH_INSTRUCTIONS),	"branches",		},
-  { C(HARDWARE, BRANCH_MISSES),		"branch-misses",	},
-  { C(HARDWARE, BUS_CYCLES),		"bus-cycles",		},
-
-  { C(SOFTWARE, CPU_CLOCK),		"cpu-clock",		},
-  { C(SOFTWARE, TASK_CLOCK),		"task-clock",		},
-  { C(SOFTWARE, PAGE_FAULTS),		"page-faults",		},
-  { C(SOFTWARE, PAGE_FAULTS),		"faults",		},
-  { C(SOFTWARE, PAGE_FAULTS_MIN),	"minor-faults",		},
-  { C(SOFTWARE, PAGE_FAULTS_MAJ),	"major-faults",		},
-  { C(SOFTWARE, CONTEXT_SWITCHES),	"context-switches",	},
-  { C(SOFTWARE, CONTEXT_SWITCHES),	"cs",			},
-  { C(SOFTWARE, CPU_MIGRATIONS),	"cpu-migrations",	},
-  { C(SOFTWARE, CPU_MIGRATIONS),	"migrations",		},
+  { C(HARDWARE, HW_CPU_CYCLES),		"cpu-cycles",		},
+  { C(HARDWARE, HW_CPU_CYCLES),		"cycles",		},
+  { C(HARDWARE, HW_INSTRUCTIONS),	"instructions",		},
+  { C(HARDWARE, HW_CACHE_REFERENCES),	"cache-references",	},
+  { C(HARDWARE, HW_CACHE_MISSES),	"cache-misses",		},
+  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions",	},
+  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches",		},
+  { C(HARDWARE, HW_BRANCH_MISSES),	"branch-misses",	},
+  { C(HARDWARE, HW_BUS_CYCLES),		"bus-cycles",		},
+
+  { C(SOFTWARE, SW_CPU_CLOCK),		"cpu-clock",		},
+  { C(SOFTWARE, SW_TASK_CLOCK),		"task-clock",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS),	"page-faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS),	"faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS_MIN),	"minor-faults",		},
+  { C(SOFTWARE, SW_PAGE_FAULTS_MAJ),	"major-faults",		},
+  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"context-switches",	},
+  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"cs",			},
+  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"cpu-migrations",	},
+  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"migrations",		},
 };
 
 #define __PERF_COUNTER_FIELD(config, name) \
@@ -107,7 +107,7 @@ char *event_name(int counter)
 
 	switch (type) {
 	case PERF_TYPE_HARDWARE:
-		if (config < PERF_HW_EVENTS_MAX)
+		if (config < PERF_COUNT_HW_MAX)
 			return hw_event_names[config];
 		return "unknown-hardware";
 
@@ -136,7 +136,7 @@ char *event_name(int counter)
 	}
 
 	case PERF_TYPE_SOFTWARE:
-		if (config < PERF_SW_EVENTS_MAX)
+		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
 		return "unknown-software";
 
-- 
cgit v1.2.3-70-g09d2


From 8be6e8f3c3a13900169f1141870562d0c723b010 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 14:19:11 +0200
Subject: perf_counter: Rename L2 to LL cache

The top (fastest) and last level (biggest) caches are the most
interesting ones, performance wise.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
[ Fixed the Nehalem LL table to LLC Reference/Miss events ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/power4-pmu.c   |  2 +-
 arch/powerpc/kernel/power5+-pmu.c  |  2 +-
 arch/powerpc/kernel/power5-pmu.c   |  2 +-
 arch/powerpc/kernel/power6-pmu.c   |  2 +-
 arch/powerpc/kernel/power7-pmu.c   |  2 +-
 arch/powerpc/kernel/ppc970-pmu.c   |  2 +-
 arch/x86/kernel/cpu/perf_counter.c | 12 ++++++------
 include/linux/perf_counter.h       |  4 ++--
 8 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 73956f084b2..07bd308a5fa 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -561,7 +561,7 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	0,		0	},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0	},
 		[C(OP_WRITE)] = {	0,		0	},
 		[C(OP_PREFETCH)] = {	0xc34,		0	},
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 5f8b7741e97..41e5d2d958d 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -632,7 +632,7 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	0,		0		},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0		},
 		[C(OP_WRITE)] = {	0,		0		},
 		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index d54723ab627..05600b66221 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -574,7 +574,7 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	0,		0		},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0x3c309b	},
 		[C(OP_WRITE)] = {	0,		0		},
 		[C(OP_PREFETCH)] = {	0xc50c3,	0		},
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 0cd406ee765..46f74bebcfd 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -493,7 +493,7 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1		},
 		[C(OP_PREFETCH)] = {	0x4008c,	0		},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x150730,	0x250532	},
 		[C(OP_WRITE)] = {	0x250432,	0x150432	},
 		[C(OP_PREFETCH)] = {	0x810a6,	0		},
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 060e0deb399..b3f7d1216ba 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -320,7 +320,7 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	0x408a,		0	},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x6080,		0x6084	},
 		[C(OP_WRITE)] = {	0x6082,		0x6086	},
 		[C(OP_PREFETCH)] = {	0,		0	},
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 46a20640942..ba0a357a89f 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -445,7 +445,7 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 		[C(OP_WRITE)] = {	-1,		-1	},
 		[C(OP_PREFETCH)] = {	0,		0	},
 	},
-	[C(L2)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
+	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0	},
 		[C(OP_WRITE)] = {	0,		0	},
 		[C(OP_PREFETCH)] = {	0x733,		0	},
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 572fb434a66..895c82e7845 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -131,7 +131,7 @@ static const u64 nehalem_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0x0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
 		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
@@ -141,8 +141,8 @@ static const u64 nehalem_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0xc024, /* L2_RQSTS.PREFETCHES          */
-		[ C(RESULT_MISS)   ] = 0x8024, /* L2_RQSTS.PREFETCH_MISS       */
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
 	},
  },
  [ C(DTLB) ] = {
@@ -222,7 +222,7 @@ static const u64 core2_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
 		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
@@ -313,7 +313,7 @@ static const u64 atom_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
 		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
@@ -422,7 +422,7 @@ static const u64 amd_0f_hw_cache_event_ids
 		[ C(RESULT_MISS)   ] = 0,
 	},
  },
- [ C(L2  ) ] = {
+ [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0,
 		[ C(RESULT_MISS)   ] = 0,
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 887df88a9c2..20cf5af27ad 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -56,14 +56,14 @@ enum perf_hw_id {
 /*
  * Generalized hardware cache counters:
  *
- *       { L1-D, L1-I, L2, LLC, ITLB, DTLB, BPU } x
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
  *       { read, write, prefetch } x
  *       { accesses, misses }
  */
 enum perf_hw_cache_id {
 	PERF_COUNT_HW_CACHE_L1D		= 0,
 	PERF_COUNT_HW_CACHE_L1I		= 1,
-	PERF_COUNT_HW_CACHE_L2		= 2,
+	PERF_COUNT_HW_CACHE_LL		= 2,
 	PERF_COUNT_HW_CACHE_DTLB	= 3,
 	PERF_COUNT_HW_CACHE_ITLB	= 4,
 	PERF_COUNT_HW_CACHE_BPU		= 5,
-- 
cgit v1.2.3-70-g09d2


From a308444ceb576d3089f9ca0dfd097eba6f1e623f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Jun 2009 14:44:26 +0200
Subject: perf_counter: Better align code

Whitespace and comment bits. Also update copyrights.

[ Impact: cleanup ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
---
 include/linux/perf_counter.h | 165 ++++++++++++++++++++++---------------------
 1 file changed, 85 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 20cf5af27ad..1fa1a26cb1b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -1,12 +1,13 @@
 /*
  *  Performance counters:
  *
- *   Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
- *   Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
  *
  *  Data type definitions, declarations, prototypes.
  *
- *  Started by: Thomas Gleixner and Ingo Molnar
+ *    Started by: Thomas Gleixner and Ingo Molnar
  *
  *  For licencing details see kernel-base/COPYING
  */
@@ -25,18 +26,19 @@
  * attr.type
  */
 enum perf_type_id {
-	PERF_TYPE_HARDWARE		= 0,
-	PERF_TYPE_SOFTWARE		= 1,
-	PERF_TYPE_TRACEPOINT		= 2,
-	PERF_TYPE_HW_CACHE		= 3,
-	PERF_TYPE_RAW			= 4,
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
 
-	PERF_TYPE_MAX,			/* non ABI */
+	PERF_TYPE_MAX,				/* non-ABI */
 };
 
 /*
- * Generalized performance counter event types, used by the attr.event_id
- * parameter of the sys_perf_counter_open() syscall:
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
  */
 enum perf_hw_id {
 	/*
@@ -50,7 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
 
-	PERF_COUNT_HW_MAX,		/* non ABI */
+	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
 
 /*
@@ -61,29 +63,29 @@ enum perf_hw_id {
  *       { accesses, misses }
  */
 enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D		= 0,
-	PERF_COUNT_HW_CACHE_L1I		= 1,
-	PERF_COUNT_HW_CACHE_LL		= 2,
-	PERF_COUNT_HW_CACHE_DTLB	= 3,
-	PERF_COUNT_HW_CACHE_ITLB	= 4,
-	PERF_COUNT_HW_CACHE_BPU		= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,	/* non ABI */
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
 };
 
 enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ	= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE	= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH	= 2,
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
 
-	PERF_COUNT_HW_CACHE_OP_MAX,	/* non ABI */
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
 };
 
 enum perf_hw_cache_op_result_id {
 	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
 	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
 
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non ABI */
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
 };
 
 /*
@@ -93,15 +95,15 @@ enum perf_hw_cache_op_result_id {
  * well):
  */
 enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK		= 0,
-	PERF_COUNT_SW_TASK_CLOCK	= 1,
-	PERF_COUNT_SW_PAGE_FAULTS	= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES	= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS	= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN	= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ	= 6,
-
-	PERF_COUNT_SW_MAX,		/* non ABI */
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
 
 /*
@@ -109,15 +111,15 @@ enum perf_sw_ids {
  * in the overflow packets.
  */
 enum perf_counter_sample_format {
-	PERF_SAMPLE_IP			= 1U << 0,
-	PERF_SAMPLE_TID			= 1U << 1,
-	PERF_SAMPLE_TIME		= 1U << 2,
-	PERF_SAMPLE_ADDR		= 1U << 3,
-	PERF_SAMPLE_GROUP		= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN		= 1U << 5,
-	PERF_SAMPLE_ID			= 1U << 6,
-	PERF_SAMPLE_CPU			= 1U << 7,
-	PERF_SAMPLE_PERIOD		= 1U << 8,
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_GROUP			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
 };
 
 /*
@@ -126,9 +128,9 @@ enum perf_counter_sample_format {
  * in increasing order of bit value, after the counter value.
  */
 enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING	=  1U << 1,
-	PERF_FORMAT_ID			=  1U << 2,
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
 };
 
 /*
@@ -229,12 +231,12 @@ struct perf_counter_mmap_page {
 	__u64   data_head;		/* head in the data section */
 };
 
-#define PERF_EVENT_MISC_CPUMODE_MASK	(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN	(0 << 0)
-#define PERF_EVENT_MISC_KERNEL		(1 << 0)
-#define PERF_EVENT_MISC_USER		(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR	(3 << 0)
-#define PERF_EVENT_MISC_OVERFLOW	(1 << 2)
+#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_EVENT_MISC_KERNEL			(1 << 0)
+#define PERF_EVENT_MISC_USER			(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
+#define PERF_EVENT_MISC_OVERFLOW		(1 << 2)
 
 struct perf_event_header {
 	__u32	type;
@@ -351,14 +353,14 @@ struct hw_perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
 	union {
 		struct { /* hardware */
-			u64				config;
-			unsigned long			config_base;
-			unsigned long			counter_base;
-			int				idx;
+			u64		config;
+			unsigned long	config_base;
+			unsigned long	counter_base;
+			int		idx;
 		};
 		union { /* software */
-			atomic64_t			count;
-			struct hrtimer			hrtimer;
+			atomic64_t	count;
+			struct hrtimer	hrtimer;
 		};
 	};
 	atomic64_t			prev_count;
@@ -523,37 +525,37 @@ struct perf_counter_context {
 	 * Protect the states of the counters in the list,
 	 * nr_active, and the list:
 	 */
-	spinlock_t		lock;
+	spinlock_t			lock;
 	/*
 	 * Protect the list of counters.  Locking either mutex or lock
 	 * is sufficient to ensure the list doesn't change; to change
 	 * the list you need to lock both the mutex and the spinlock.
 	 */
-	struct mutex		mutex;
+	struct mutex			mutex;
 
-	struct list_head	counter_list;
-	struct list_head	event_list;
-	int			nr_counters;
-	int			nr_active;
-	int			is_active;
-	atomic_t		refcount;
-	struct task_struct	*task;
+	struct list_head		counter_list;
+	struct list_head		event_list;
+	int				nr_counters;
+	int				nr_active;
+	int				is_active;
+	atomic_t			refcount;
+	struct task_struct		*task;
 
 	/*
 	 * Context clock, runs when context enabled.
 	 */
-	u64			time;
-	u64			timestamp;
+	u64				time;
+	u64				timestamp;
 
 	/*
 	 * These fields let us detect when two contexts have both
 	 * been cloned (inherited) from a common ancestor.
 	 */
-	struct perf_counter_context *parent_ctx;
-	u64			parent_gen;
-	u64			generation;
-	int			pin_count;
-	struct rcu_head		rcu_head;
+	struct perf_counter_context	*parent_ctx;
+	u64				parent_gen;
+	u64				generation;
+	int				pin_count;
+	struct rcu_head			rcu_head;
 };
 
 /**
@@ -604,9 +606,9 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 struct perf_sample_data {
-	struct pt_regs		*regs;
-	u64			addr;
-	u64			period;
+	struct pt_regs			*regs;
+	u64				addr;
+	u64				period;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
@@ -636,11 +638,14 @@ extern void perf_counter_fork(struct task_struct *tsk);
 
 extern void perf_counter_task_migration(struct task_struct *task, int cpu);
 
-#define MAX_STACK_DEPTH		255
+#define MAX_STACK_DEPTH			255
 
 struct perf_callchain_entry {
-	u16	nr, hv, kernel, user;
-	u64	ip[MAX_STACK_DEPTH];
+	u16				nr;
+	u16				hv;
+	u16				kernel;
+	u16				user;
+	u64				ip[MAX_STACK_DEPTH];
 };
 
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-- 
cgit v1.2.3-70-g09d2


From cca3f454a85ff42d426401bce7ac804541b2bd03 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 14:57:55 +0200
Subject: perf_counter: Add counter->id to the throttle event

So as to be able to distuinguish between multiple counters.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 1 +
 kernel/perf_counter.c        | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1fa1a26cb1b..6e133954e2e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -286,6 +286,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
+	 *	u64				id;
 	 * };
 	 */
 	PERF_EVENT_THROTTLE		= 5,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 8859b97390e..ef5d8a5b245 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2950,13 +2950,15 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 	struct {
 		struct perf_event_header	header;
 		u64				time;
+		u64				id;
 	} throttle_event = {
 		.header = {
 			.type = PERF_EVENT_THROTTLE + 1,
 			.misc = 0,
 			.size = sizeof(throttle_event),
 		},
-		.time = sched_clock(),
+		.time	= sched_clock(),
+		.id	= counter->id,
 	};
 
 	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
-- 
cgit v1.2.3-70-g09d2


From 3c7b4e6b8be4c16f1e6e5c558e33b7ff0db2dfaf Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:22:39 +0100
Subject: kmemleak: Add the base support

This patch adds the base support for the kernel memory leak
detector. It traces the memory allocation/freeing in a way similar to
the Boehm's conservative garbage collector, the difference being that
the unreferenced objects are not freed but only shown in
/sys/kernel/debug/kmemleak. Enabling this feature introduces an
overhead to memory allocations.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/kmemleak.h |   96 +++
 init/main.c              |    4 +-
 mm/kmemleak.c            | 1498 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1597 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/kmemleak.h
 create mode 100644 mm/kmemleak.c

(limited to 'include')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
new file mode 100644
index 00000000000..7796aed6cdd
--- /dev/null
+++ b/include/linux/kmemleak.h
@@ -0,0 +1,96 @@
+/*
+ * include/linux/kmemleak.h
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __KMEMLEAK_H
+#define __KMEMLEAK_H
+
+#ifdef CONFIG_DEBUG_KMEMLEAK
+
+extern void kmemleak_init(void);
+extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+			   gfp_t gfp);
+extern void kmemleak_free(const void *ptr);
+extern void kmemleak_padding(const void *ptr, unsigned long offset,
+			     size_t size);
+extern void kmemleak_not_leak(const void *ptr);
+extern void kmemleak_ignore(const void *ptr);
+extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
+			       size_t length, gfp_t gfp);
+extern void kmemleak_no_scan(const void *ptr);
+
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+					    int min_count, unsigned long flags,
+					    gfp_t gfp)
+{
+	if (!(flags & SLAB_NOLEAKTRACE))
+		kmemleak_alloc(ptr, size, min_count, gfp);
+}
+
+static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
+{
+	if (!(flags & SLAB_NOLEAKTRACE))
+		kmemleak_free(ptr);
+}
+
+static inline void kmemleak_erase(void **ptr)
+{
+	*ptr = NULL;
+}
+
+#else
+
+static inline void kmemleak_init(void)
+{
+}
+static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count,
+				  gfp_t gfp)
+{
+}
+static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
+					    int min_count, unsigned long flags,
+					    gfp_t gfp)
+{
+}
+static inline void kmemleak_free(const void *ptr)
+{
+}
+static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
+{
+}
+static inline void kmemleak_not_leak(const void *ptr)
+{
+}
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
+static inline void kmemleak_scan_area(const void *ptr, unsigned long offset,
+				      size_t length, gfp_t gfp)
+{
+}
+static inline void kmemleak_erase(void **ptr)
+{
+}
+static inline void kmemleak_no_scan(const void *ptr)
+{
+}
+
+#endif	/* CONFIG_DEBUG_KMEMLEAK */
+
+#endif	/* __KMEMLEAK_H */
diff --git a/init/main.c b/init/main.c
index bb7dc57eee3..9d759d68d7a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -56,6 +56,7 @@
 #include <linux/debug_locks.h>
 #include <linux/debugobjects.h>
 #include <linux/lockdep.h>
+#include <linux/kmemleak.h>
 #include <linux/pid_namespace.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
@@ -603,6 +604,7 @@ asmlinkage void __init start_kernel(void)
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
+	prio_tree_init();
 	pidhash_init();
 	init_timers();
 	hrtimers_init();
@@ -654,6 +656,7 @@ asmlinkage void __init start_kernel(void)
 	cpu_hotplug_init();
 	kmem_cache_init();
 	kmemtrace_init();
+	kmemleak_init();
 	debug_objects_mem_init();
 	idr_init_cache();
 	setup_per_cpu_pageset();
@@ -663,7 +666,6 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	pgtable_cache_init();
-	prio_tree_init();
 	anon_vma_init();
 #ifdef CONFIG_X86
 	if (efi_enabled)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
new file mode 100644
index 00000000000..58ec86c9e58
--- /dev/null
+++ b/mm/kmemleak.c
@@ -0,0 +1,1498 @@
+/*
+ * mm/kmemleak.c
+ *
+ * Copyright (C) 2008 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * For more information on the algorithm and kmemleak usage, please see
+ * Documentation/kmemleak.txt.
+ *
+ * Notes on locking
+ * ----------------
+ *
+ * The following locks and mutexes are used by kmemleak:
+ *
+ * - kmemleak_lock (rwlock): protects the object_list modifications and
+ *   accesses to the object_tree_root. The object_list is the main list
+ *   holding the metadata (struct kmemleak_object) for the allocated memory
+ *   blocks. The object_tree_root is a priority search tree used to look-up
+ *   metadata based on a pointer to the corresponding memory block.  The
+ *   kmemleak_object structures are added to the object_list and
+ *   object_tree_root in the create_object() function called from the
+ *   kmemleak_alloc() callback and removed in delete_object() called from the
+ *   kmemleak_free() callback
+ * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to
+ *   the metadata (e.g. count) are protected by this lock. Note that some
+ *   members of this structure may be protected by other means (atomic or
+ *   kmemleak_lock). This lock is also held when scanning the corresponding
+ *   memory block to avoid the kernel freeing it via the kmemleak_free()
+ *   callback. This is less heavyweight than holding a global lock like
+ *   kmemleak_lock during scanning
+ * - scan_mutex (mutex): ensures that only one thread may scan the memory for
+ *   unreferenced objects at a time. The gray_list contains the objects which
+ *   are already referenced or marked as false positives and need to be
+ *   scanned. This list is only modified during a scanning episode when the
+ *   scan_mutex is held. At the end of a scan, the gray_list is always empty.
+ *   Note that the kmemleak_object.use_count is incremented when an object is
+ *   added to the gray_list and therefore cannot be freed
+ * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs
+ *   file together with modifications to the memory scanning parameters
+ *   including the scan_thread pointer
+ *
+ * The kmemleak_object structures have a use_count incremented or decremented
+ * using the get_object()/put_object() functions. When the use_count becomes
+ * 0, this count can no longer be incremented and put_object() schedules the
+ * kmemleak_object freeing via an RCU callback. All calls to the get_object()
+ * function must be protected by rcu_read_lock() to avoid accessing a freed
+ * structure.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/prio_tree.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/stacktrace.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/nodemask.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+
+#include <linux/kmemleak.h>
+
+/*
+ * Kmemleak configuration and common defines.
+ */
+#define MAX_TRACE		16	/* stack trace length */
+#define REPORTS_NR		50	/* maximum number of reported leaks */
+#define MSECS_MIN_AGE		5000	/* minimum object age for reporting */
+#define MSECS_SCAN_YIELD	10	/* CPU yielding period */
+#define SECS_FIRST_SCAN		60	/* delay before the first scan */
+#define SECS_SCAN_WAIT		600	/* subsequent auto scanning delay */
+
+#define BYTES_PER_POINTER	sizeof(void *)
+
+/* scanning area inside a memory block */
+struct kmemleak_scan_area {
+	struct hlist_node node;
+	unsigned long offset;
+	size_t length;
+};
+
+/*
+ * Structure holding the metadata for each allocated memory block.
+ * Modifications to such objects should be made while holding the
+ * object->lock. Insertions or deletions from object_list, gray_list or
+ * tree_node are already protected by the corresponding locks or mutex (see
+ * the notes on locking above). These objects are reference-counted
+ * (use_count) and freed using the RCU mechanism.
+ */
+struct kmemleak_object {
+	spinlock_t lock;
+	unsigned long flags;		/* object status flags */
+	struct list_head object_list;
+	struct list_head gray_list;
+	struct prio_tree_node tree_node;
+	struct rcu_head rcu;		/* object_list lockless traversal */
+	/* object usage count; object freed when use_count == 0 */
+	atomic_t use_count;
+	unsigned long pointer;
+	size_t size;
+	/* minimum number of a pointers found before it is considered leak */
+	int min_count;
+	/* the total number of pointers found pointing to this object */
+	int count;
+	/* memory ranges to be scanned inside an object (empty for all) */
+	struct hlist_head area_list;
+	unsigned long trace[MAX_TRACE];
+	unsigned int trace_len;
+	unsigned long jiffies;		/* creation timestamp */
+	pid_t pid;			/* pid of the current task */
+	char comm[TASK_COMM_LEN];	/* executable name */
+};
+
+/* flag representing the memory block allocation status */
+#define OBJECT_ALLOCATED	(1 << 0)
+/* flag set after the first reporting of an unreference object */
+#define OBJECT_REPORTED		(1 << 1)
+/* flag set to not scan the object */
+#define OBJECT_NO_SCAN		(1 << 2)
+
+/* the list of all allocated objects */
+static LIST_HEAD(object_list);
+/* the list of gray-colored objects (see color_gray comment below) */
+static LIST_HEAD(gray_list);
+/* prio search tree for object boundaries */
+static struct prio_tree_root object_tree_root;
+/* rw_lock protecting the access to object_list and prio_tree_root */
+static DEFINE_RWLOCK(kmemleak_lock);
+
+/* allocation caches for kmemleak internal data */
+static struct kmem_cache *object_cache;
+static struct kmem_cache *scan_area_cache;
+
+/* set if tracing memory operations is enabled */
+static atomic_t kmemleak_enabled = ATOMIC_INIT(0);
+/* set in the late_initcall if there were no errors */
+static atomic_t kmemleak_initialized = ATOMIC_INIT(0);
+/* enables or disables early logging of the memory operations */
+static atomic_t kmemleak_early_log = ATOMIC_INIT(1);
+/* set if a fata kmemleak error has occurred */
+static atomic_t kmemleak_error = ATOMIC_INIT(0);
+
+/* minimum and maximum address that may be valid pointers */
+static unsigned long min_addr = ULONG_MAX;
+static unsigned long max_addr;
+
+/* used for yielding the CPU to other tasks during scanning */
+static unsigned long next_scan_yield;
+static struct task_struct *scan_thread;
+static unsigned long jiffies_scan_yield;
+static unsigned long jiffies_min_age;
+/* delay between automatic memory scannings */
+static signed long jiffies_scan_wait;
+/* enables or disables the task stacks scanning */
+static int kmemleak_stack_scan;
+/* mutex protecting the memory scanning */
+static DEFINE_MUTEX(scan_mutex);
+/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */
+static DEFINE_MUTEX(kmemleak_mutex);
+
+/* number of leaks reported (for limitation purposes) */
+static int reported_leaks;
+
+/*
+ * Early object allocation/freeing logging. Kkmemleak is initialized after the
+ * kernel allocator. However, both the kernel allocator and kmemleak may
+ * allocate memory blocks which need to be tracked. Kkmemleak defines an
+ * arbitrary buffer to hold the allocation/freeing information before it is
+ * fully initialized.
+ */
+
+/* kmemleak operation type for early logging */
+enum {
+	KMEMLEAK_ALLOC,
+	KMEMLEAK_FREE,
+	KMEMLEAK_NOT_LEAK,
+	KMEMLEAK_IGNORE,
+	KMEMLEAK_SCAN_AREA,
+	KMEMLEAK_NO_SCAN
+};
+
+/*
+ * Structure holding the information passed to kmemleak callbacks during the
+ * early logging.
+ */
+struct early_log {
+	int op_type;			/* kmemleak operation type */
+	const void *ptr;		/* allocated/freed memory block */
+	size_t size;			/* memory block size */
+	int min_count;			/* minimum reference count */
+	unsigned long offset;		/* scan area offset */
+	size_t length;			/* scan area length */
+};
+
+/* early logging buffer and current position */
+static struct early_log early_log[200];
+static int crt_early_log;
+
+static void kmemleak_disable(void);
+
+/*
+ * Print a warning and dump the stack trace.
+ */
+#define kmemleak_warn(x...)	do {	\
+	pr_warning(x);			\
+	dump_stack();			\
+} while (0)
+
+/*
+ * Macro invoked when a serious kmemleak condition occured and cannot be
+ * recovered from. Kkmemleak will be disabled and further allocation/freeing
+ * tracing no longer available.
+ */
+#define kmemleak_panic(x...)	do {	\
+	kmemleak_warn(x);		\
+	kmemleak_disable();		\
+} while (0)
+
+/*
+ * Object colors, encoded with count and min_count:
+ * - white - orphan object, not enough references to it (count < min_count)
+ * - gray  - not orphan, not marked as false positive (min_count == 0) or
+ *		sufficient references to it (count >= min_count)
+ * - black - ignore, it doesn't contain references (e.g. text section)
+ *		(min_count == -1). No function defined for this color.
+ * Newly created objects don't have any color assigned (object->count == -1)
+ * before the next memory scan when they become white.
+ */
+static int color_white(const struct kmemleak_object *object)
+{
+	return object->count != -1 && object->count < object->min_count;
+}
+
+static int color_gray(const struct kmemleak_object *object)
+{
+	return object->min_count != -1 && object->count >= object->min_count;
+}
+
+/*
+ * Objects are considered referenced if their color is gray and they have not
+ * been deleted.
+ */
+static int referenced_object(struct kmemleak_object *object)
+{
+	return (object->flags & OBJECT_ALLOCATED) && color_gray(object);
+}
+
+/*
+ * Objects are considered unreferenced only if their color is white, they have
+ * not be deleted and have a minimum age to avoid false positives caused by
+ * pointers temporarily stored in CPU registers.
+ */
+static int unreferenced_object(struct kmemleak_object *object)
+{
+	return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
+		time_is_before_eq_jiffies(object->jiffies + jiffies_min_age);
+}
+
+/*
+ * Printing of the (un)referenced objects information, either to the seq file
+ * or to the kernel log. The print_referenced/print_unreferenced functions
+ * must be called with the object->lock held.
+ */
+#define print_helper(seq, x...)	do {	\
+	struct seq_file *s = (seq);	\
+	if (s)				\
+		seq_printf(s, x);	\
+	else				\
+		pr_info(x);		\
+} while (0)
+
+static void print_referenced(struct kmemleak_object *object)
+{
+	pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n",
+		object->pointer, object->size);
+}
+
+static void print_unreferenced(struct seq_file *seq,
+			       struct kmemleak_object *object)
+{
+	int i;
+
+	print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n",
+		     object->pointer, object->size);
+	print_helper(seq, "  comm \"%s\", pid %d, jiffies %lu\n",
+		     object->comm, object->pid, object->jiffies);
+	print_helper(seq, "  backtrace:\n");
+
+	for (i = 0; i < object->trace_len; i++) {
+		void *ptr = (void *)object->trace[i];
+		print_helper(seq, "    [<%p>] %pS\n", ptr, ptr);
+	}
+}
+
+/*
+ * Print the kmemleak_object information. This function is used mainly for
+ * debugging special cases when kmemleak operations. It must be called with
+ * the object->lock held.
+ */
+static void dump_object_info(struct kmemleak_object *object)
+{
+	struct stack_trace trace;
+
+	trace.nr_entries = object->trace_len;
+	trace.entries = object->trace;
+
+	pr_notice("kmemleak: Object 0x%08lx (size %zu):\n",
+		  object->tree_node.start, object->size);
+	pr_notice("  comm \"%s\", pid %d, jiffies %lu\n",
+		  object->comm, object->pid, object->jiffies);
+	pr_notice("  min_count = %d\n", object->min_count);
+	pr_notice("  count = %d\n", object->count);
+	pr_notice("  backtrace:\n");
+	print_stack_trace(&trace, 4);
+}
+
+/*
+ * Look-up a memory block metadata (kmemleak_object) in the priority search
+ * tree based on a pointer value. If alias is 0, only values pointing to the
+ * beginning of the memory block are allowed. The kmemleak_lock must be held
+ * when calling this function.
+ */
+static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
+{
+	struct prio_tree_node *node;
+	struct prio_tree_iter iter;
+	struct kmemleak_object *object;
+
+	prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr);
+	node = prio_tree_next(&iter);
+	if (node) {
+		object = prio_tree_entry(node, struct kmemleak_object,
+					 tree_node);
+		if (!alias && object->pointer != ptr) {
+			kmemleak_warn("kmemleak: Found object by alias");
+			object = NULL;
+		}
+	} else
+		object = NULL;
+
+	return object;
+}
+
+/*
+ * Increment the object use_count. Return 1 if successful or 0 otherwise. Note
+ * that once an object's use_count reached 0, the RCU freeing was already
+ * registered and the object should no longer be used. This function must be
+ * called under the protection of rcu_read_lock().
+ */
+static int get_object(struct kmemleak_object *object)
+{
+	return atomic_inc_not_zero(&object->use_count);
+}
+
+/*
+ * RCU callback to free a kmemleak_object.
+ */
+static void free_object_rcu(struct rcu_head *rcu)
+{
+	struct hlist_node *elem, *tmp;
+	struct kmemleak_scan_area *area;
+	struct kmemleak_object *object =
+		container_of(rcu, struct kmemleak_object, rcu);
+
+	/*
+	 * Once use_count is 0 (guaranteed by put_object), there is no other
+	 * code accessing this object, hence no need for locking.
+	 */
+	hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) {
+		hlist_del(elem);
+		kmem_cache_free(scan_area_cache, area);
+	}
+	kmem_cache_free(object_cache, object);
+}
+
+/*
+ * Decrement the object use_count. Once the count is 0, free the object using
+ * an RCU callback. Since put_object() may be called via the kmemleak_free() ->
+ * delete_object() path, the delayed RCU freeing ensures that there is no
+ * recursive call to the kernel allocator. Lock-less RCU object_list traversal
+ * is also possible.
+ */
+static void put_object(struct kmemleak_object *object)
+{
+	if (!atomic_dec_and_test(&object->use_count))
+		return;
+
+	/* should only get here after delete_object was called */
+	WARN_ON(object->flags & OBJECT_ALLOCATED);
+
+	call_rcu(&object->rcu, free_object_rcu);
+}
+
+/*
+ * Look up an object in the prio search tree and increase its use_count.
+ */
+static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
+{
+	unsigned long flags;
+	struct kmemleak_object *object = NULL;
+
+	rcu_read_lock();
+	read_lock_irqsave(&kmemleak_lock, flags);
+	if (ptr >= min_addr && ptr < max_addr)
+		object = lookup_object(ptr, alias);
+	read_unlock_irqrestore(&kmemleak_lock, flags);
+
+	/* check whether the object is still available */
+	if (object && !get_object(object))
+		object = NULL;
+	rcu_read_unlock();
+
+	return object;
+}
+
+/*
+ * Create the metadata (struct kmemleak_object) corresponding to an allocated
+ * memory block and add it to the object_list and object_tree_root.
+ */
+static void create_object(unsigned long ptr, size_t size, int min_count,
+			  gfp_t gfp)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	struct prio_tree_node *node;
+	struct stack_trace trace;
+
+	object = kmem_cache_alloc(object_cache, gfp & ~GFP_SLAB_BUG_MASK);
+	if (!object) {
+		kmemleak_panic("kmemleak: Cannot allocate a kmemleak_object "
+			       "structure\n");
+		return;
+	}
+
+	INIT_LIST_HEAD(&object->object_list);
+	INIT_LIST_HEAD(&object->gray_list);
+	INIT_HLIST_HEAD(&object->area_list);
+	spin_lock_init(&object->lock);
+	atomic_set(&object->use_count, 1);
+	object->flags = OBJECT_ALLOCATED;
+	object->pointer = ptr;
+	object->size = size;
+	object->min_count = min_count;
+	object->count = -1;			/* no color initially */
+	object->jiffies = jiffies;
+
+	/* task information */
+	if (in_irq()) {
+		object->pid = 0;
+		strncpy(object->comm, "hardirq", sizeof(object->comm));
+	} else if (in_softirq()) {
+		object->pid = 0;
+		strncpy(object->comm, "softirq", sizeof(object->comm));
+	} else {
+		object->pid = current->pid;
+		/*
+		 * There is a small chance of a race with set_task_comm(),
+		 * however using get_task_comm() here may cause locking
+		 * dependency issues with current->alloc_lock. In the worst
+		 * case, the command line is not correct.
+		 */
+		strncpy(object->comm, current->comm, sizeof(object->comm));
+	}
+
+	/* kernel backtrace */
+	trace.max_entries = MAX_TRACE;
+	trace.nr_entries = 0;
+	trace.entries = object->trace;
+	trace.skip = 1;
+	save_stack_trace(&trace);
+	object->trace_len = trace.nr_entries;
+
+	INIT_PRIO_TREE_NODE(&object->tree_node);
+	object->tree_node.start = ptr;
+	object->tree_node.last = ptr + size - 1;
+
+	write_lock_irqsave(&kmemleak_lock, flags);
+	min_addr = min(min_addr, ptr);
+	max_addr = max(max_addr, ptr + size);
+	node = prio_tree_insert(&object_tree_root, &object->tree_node);
+	/*
+	 * The code calling the kernel does not yet have the pointer to the
+	 * memory block to be able to free it.  However, we still hold the
+	 * kmemleak_lock here in case parts of the kernel started freeing
+	 * random memory blocks.
+	 */
+	if (node != &object->tree_node) {
+		unsigned long flags;
+
+		kmemleak_panic("kmemleak: Cannot insert 0x%lx into the object "
+			       "search tree (already existing)\n", ptr);
+		object = lookup_object(ptr, 1);
+		spin_lock_irqsave(&object->lock, flags);
+		dump_object_info(object);
+		spin_unlock_irqrestore(&object->lock, flags);
+
+		goto out;
+	}
+	list_add_tail_rcu(&object->object_list, &object_list);
+out:
+	write_unlock_irqrestore(&kmemleak_lock, flags);
+}
+
+/*
+ * Remove the metadata (struct kmemleak_object) for a memory block from the
+ * object_list and object_tree_root and decrement its use_count.
+ */
+static void delete_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	write_lock_irqsave(&kmemleak_lock, flags);
+	object = lookup_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n",
+			      ptr);
+		write_unlock_irqrestore(&kmemleak_lock, flags);
+		return;
+	}
+	prio_tree_remove(&object_tree_root, &object->tree_node);
+	list_del_rcu(&object->object_list);
+	write_unlock_irqrestore(&kmemleak_lock, flags);
+
+	WARN_ON(!(object->flags & OBJECT_ALLOCATED));
+	WARN_ON(atomic_read(&object->use_count) < 1);
+
+	/*
+	 * Locking here also ensures that the corresponding memory block
+	 * cannot be freed when it is being scanned.
+	 */
+	spin_lock_irqsave(&object->lock, flags);
+	if (object->flags & OBJECT_REPORTED)
+		print_referenced(object);
+	object->flags &= ~OBJECT_ALLOCATED;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Make a object permanently as gray-colored so that it can no longer be
+ * reported as a leak. This is used in general to mark a false positive.
+ */
+static void make_gray_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->min_count = 0;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Mark the object as black-colored so that it is ignored from scans and
+ * reporting.
+ */
+static void make_black_object(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n",
+			      ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->min_count = -1;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Add a scanning area to the object. If at least one such area is added,
+ * kmemleak will only scan these ranges rather than the whole memory block.
+ */
+static void add_scan_area(unsigned long ptr, unsigned long offset,
+			  size_t length, gfp_t gfp)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	struct kmemleak_scan_area *area;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Adding scan area to unknown "
+			      "object at 0x%08lx\n", ptr);
+		return;
+	}
+
+	area = kmem_cache_alloc(scan_area_cache, gfp & ~GFP_SLAB_BUG_MASK);
+	if (!area) {
+		kmemleak_warn("kmemleak: Cannot allocate a scan area\n");
+		goto out;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	if (offset + length > object->size) {
+		kmemleak_warn("kmemleak: Scan area larger than object "
+			      "0x%08lx\n", ptr);
+		dump_object_info(object);
+		kmem_cache_free(scan_area_cache, area);
+		goto out_unlock;
+	}
+
+	INIT_HLIST_NODE(&area->node);
+	area->offset = offset;
+	area->length = length;
+
+	hlist_add_head(&area->node, &object->area_list);
+out_unlock:
+	spin_unlock_irqrestore(&object->lock, flags);
+out:
+	put_object(object);
+}
+
+/*
+ * Set the OBJECT_NO_SCAN flag for the object corresponding to the give
+ * pointer. Such object will not be scanned by kmemleak but references to it
+ * are searched.
+ */
+static void object_no_scan(unsigned long ptr)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("kmemleak: Not scanning unknown object at "
+			      "0x%08lx\n", ptr);
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->flags |= OBJECT_NO_SCAN;
+	spin_unlock_irqrestore(&object->lock, flags);
+	put_object(object);
+}
+
+/*
+ * Log an early kmemleak_* call to the early_log buffer. These calls will be
+ * processed later once kmemleak is fully initialized.
+ */
+static void log_early(int op_type, const void *ptr, size_t size,
+		      int min_count, unsigned long offset, size_t length)
+{
+	unsigned long flags;
+	struct early_log *log;
+
+	if (crt_early_log >= ARRAY_SIZE(early_log)) {
+		kmemleak_panic("kmemleak: Early log buffer exceeded\n");
+		return;
+	}
+
+	/*
+	 * There is no need for locking since the kernel is still in UP mode
+	 * at this stage. Disabling the IRQs is enough.
+	 */
+	local_irq_save(flags);
+	log = &early_log[crt_early_log];
+	log->op_type = op_type;
+	log->ptr = ptr;
+	log->size = size;
+	log->min_count = min_count;
+	log->offset = offset;
+	log->length = length;
+	crt_early_log++;
+	local_irq_restore(flags);
+}
+
+/*
+ * Memory allocation function callback. This function is called from the
+ * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
+ * vmalloc etc.).
+ */
+void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+{
+	pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		create_object((unsigned long)ptr, size, min_count, gfp);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_alloc);
+
+/*
+ * Memory freeing function callback. This function is called from the kernel
+ * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
+ */
+void kmemleak_free(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		delete_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free);
+
+/*
+ * Mark an already allocated memory block as a false positive. This will cause
+ * the block to no longer be reported as leak and always be scanned.
+ */
+void kmemleak_not_leak(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		make_gray_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_not_leak);
+
+/*
+ * Ignore a memory block. This is usually done when it is known that the
+ * corresponding block is not a leak and does not contain any references to
+ * other allocated memory blocks.
+ */
+void kmemleak_ignore(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		make_black_object((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_ignore);
+
+/*
+ * Limit the range to be scanned in an allocated memory block.
+ */
+void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
+			gfp_t gfp)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		add_scan_area((unsigned long)ptr, offset, length, gfp);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+}
+EXPORT_SYMBOL(kmemleak_scan_area);
+
+/*
+ * Inform kmemleak not to scan the given memory block.
+ */
+void kmemleak_no_scan(const void *ptr)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		object_no_scan((unsigned long)ptr);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+}
+EXPORT_SYMBOL(kmemleak_no_scan);
+
+/*
+ * Yield the CPU so that other tasks get a chance to run.  The yielding is
+ * rate-limited to avoid excessive number of calls to the schedule() function
+ * during memory scanning.
+ */
+static void scan_yield(void)
+{
+	might_sleep();
+
+	if (time_is_before_eq_jiffies(next_scan_yield)) {
+		schedule();
+		next_scan_yield = jiffies + jiffies_scan_yield;
+	}
+}
+
+/*
+ * Memory scanning is a long process and it needs to be interruptable. This
+ * function checks whether such interrupt condition occured.
+ */
+static int scan_should_stop(void)
+{
+	if (!atomic_read(&kmemleak_enabled))
+		return 1;
+
+	/*
+	 * This function may be called from either process or kthread context,
+	 * hence the need to check for both stop conditions.
+	 */
+	if (current->mm)
+		return signal_pending(current);
+	else
+		return kthread_should_stop();
+
+	return 0;
+}
+
+/*
+ * Scan a memory block (exclusive range) for valid pointers and add those
+ * found to the gray list.
+ */
+static void scan_block(void *_start, void *_end,
+		       struct kmemleak_object *scanned)
+{
+	unsigned long *ptr;
+	unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
+	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
+
+	for (ptr = start; ptr < end; ptr++) {
+		unsigned long flags;
+		unsigned long pointer = *ptr;
+		struct kmemleak_object *object;
+
+		if (scan_should_stop())
+			break;
+
+		/*
+		 * When scanning a memory block with a corresponding
+		 * kmemleak_object, the CPU yielding is handled in the calling
+		 * code since it holds the object->lock to avoid the block
+		 * freeing.
+		 */
+		if (!scanned)
+			scan_yield();
+
+		object = find_and_get_object(pointer, 1);
+		if (!object)
+			continue;
+		if (object == scanned) {
+			/* self referenced, ignore */
+			put_object(object);
+			continue;
+		}
+
+		/*
+		 * Avoid the lockdep recursive warning on object->lock being
+		 * previously acquired in scan_object(). These locks are
+		 * enclosed by scan_mutex.
+		 */
+		spin_lock_irqsave_nested(&object->lock, flags,
+					 SINGLE_DEPTH_NESTING);
+		if (!color_white(object)) {
+			/* non-orphan, ignored or new */
+			spin_unlock_irqrestore(&object->lock, flags);
+			put_object(object);
+			continue;
+		}
+
+		/*
+		 * Increase the object's reference count (number of pointers
+		 * to the memory block). If this count reaches the required
+		 * minimum, the object's color will become gray and it will be
+		 * added to the gray_list.
+		 */
+		object->count++;
+		if (color_gray(object))
+			list_add_tail(&object->gray_list, &gray_list);
+		else
+			put_object(object);
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+}
+
+/*
+ * Scan a memory block corresponding to a kmemleak_object. A condition is
+ * that object->use_count >= 1.
+ */
+static void scan_object(struct kmemleak_object *object)
+{
+	struct kmemleak_scan_area *area;
+	struct hlist_node *elem;
+	unsigned long flags;
+
+	/*
+	 * Once the object->lock is aquired, the corresponding memory block
+	 * cannot be freed (the same lock is aquired in delete_object).
+	 */
+	spin_lock_irqsave(&object->lock, flags);
+	if (object->flags & OBJECT_NO_SCAN)
+		goto out;
+	if (!(object->flags & OBJECT_ALLOCATED))
+		/* already freed object */
+		goto out;
+	if (hlist_empty(&object->area_list))
+		scan_block((void *)object->pointer,
+			   (void *)(object->pointer + object->size), object);
+	else
+		hlist_for_each_entry(area, elem, &object->area_list, node)
+			scan_block((void *)(object->pointer + area->offset),
+				   (void *)(object->pointer + area->offset
+					    + area->length), object);
+out:
+	spin_unlock_irqrestore(&object->lock, flags);
+}
+
+/*
+ * Scan data sections and all the referenced memory blocks allocated via the
+ * kernel's standard allocators. This function must be called with the
+ * scan_mutex held.
+ */
+static void kmemleak_scan(void)
+{
+	unsigned long flags;
+	struct kmemleak_object *object, *tmp;
+	struct task_struct *task;
+	int i;
+
+	/* prepare the kmemleak_object's */
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		spin_lock_irqsave(&object->lock, flags);
+#ifdef DEBUG
+		/*
+		 * With a few exceptions there should be a maximum of
+		 * 1 reference to any object at this point.
+		 */
+		if (atomic_read(&object->use_count) > 1) {
+			pr_debug("kmemleak: object->use_count = %d\n",
+				 atomic_read(&object->use_count));
+			dump_object_info(object);
+		}
+#endif
+		/* reset the reference count (whiten the object) */
+		object->count = 0;
+		if (color_gray(object) && get_object(object))
+			list_add_tail(&object->gray_list, &gray_list);
+
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+	rcu_read_unlock();
+
+	/* data/bss scanning */
+	scan_block(_sdata, _edata, NULL);
+	scan_block(__bss_start, __bss_stop, NULL);
+
+#ifdef CONFIG_SMP
+	/* per-cpu sections scanning */
+	for_each_possible_cpu(i)
+		scan_block(__per_cpu_start + per_cpu_offset(i),
+			   __per_cpu_end + per_cpu_offset(i), NULL);
+#endif
+
+	/*
+	 * Struct page scanning for each node. The code below is not yet safe
+	 * with MEMORY_HOTPLUG.
+	 */
+	for_each_online_node(i) {
+		pg_data_t *pgdat = NODE_DATA(i);
+		unsigned long start_pfn = pgdat->node_start_pfn;
+		unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
+		unsigned long pfn;
+
+		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+			struct page *page;
+
+			if (!pfn_valid(pfn))
+				continue;
+			page = pfn_to_page(pfn);
+			/* only scan if page is in use */
+			if (page_count(page) == 0)
+				continue;
+			scan_block(page, page + 1, NULL);
+		}
+	}
+
+	/*
+	 * Scanning the task stacks may introduce false negatives and it is
+	 * not enabled by default.
+	 */
+	if (kmemleak_stack_scan) {
+		read_lock(&tasklist_lock);
+		for_each_process(task)
+			scan_block(task_stack_page(task),
+				   task_stack_page(task) + THREAD_SIZE, NULL);
+		read_unlock(&tasklist_lock);
+	}
+
+	/*
+	 * Scan the objects already referenced from the sections scanned
+	 * above. More objects will be referenced and, if there are no memory
+	 * leaks, all the objects will be scanned. The list traversal is safe
+	 * for both tail additions and removals from inside the loop. The
+	 * kmemleak objects cannot be freed from outside the loop because their
+	 * use_count was increased.
+	 */
+	object = list_entry(gray_list.next, typeof(*object), gray_list);
+	while (&object->gray_list != &gray_list) {
+		scan_yield();
+
+		/* may add new objects to the list */
+		if (!scan_should_stop())
+			scan_object(object);
+
+		tmp = list_entry(object->gray_list.next, typeof(*object),
+				 gray_list);
+
+		/* remove the object from the list and release it */
+		list_del(&object->gray_list);
+		put_object(object);
+
+		object = tmp;
+	}
+	WARN_ON(!list_empty(&gray_list));
+}
+
+/*
+ * Thread function performing automatic memory scanning. Unreferenced objects
+ * at the end of a memory scan are reported but only the first time.
+ */
+static int kmemleak_scan_thread(void *arg)
+{
+	static int first_run = 1;
+
+	pr_info("kmemleak: Automatic memory scanning thread started\n");
+
+	/*
+	 * Wait before the first scan to allow the system to fully initialize.
+	 */
+	if (first_run) {
+		first_run = 0;
+		ssleep(SECS_FIRST_SCAN);
+	}
+
+	while (!kthread_should_stop()) {
+		struct kmemleak_object *object;
+		signed long timeout = jiffies_scan_wait;
+
+		mutex_lock(&scan_mutex);
+
+		kmemleak_scan();
+		reported_leaks = 0;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(object, &object_list, object_list) {
+			unsigned long flags;
+
+			if (reported_leaks >= REPORTS_NR)
+				break;
+			spin_lock_irqsave(&object->lock, flags);
+			if (!(object->flags & OBJECT_REPORTED) &&
+			    unreferenced_object(object)) {
+				print_unreferenced(NULL, object);
+				object->flags |= OBJECT_REPORTED;
+				reported_leaks++;
+			} else if ((object->flags & OBJECT_REPORTED) &&
+				   referenced_object(object)) {
+				print_referenced(object);
+				object->flags &= ~OBJECT_REPORTED;
+			}
+			spin_unlock_irqrestore(&object->lock, flags);
+		}
+		rcu_read_unlock();
+
+		mutex_unlock(&scan_mutex);
+		/* wait before the next scan */
+		while (timeout && !kthread_should_stop())
+			timeout = schedule_timeout_interruptible(timeout);
+	}
+
+	pr_info("kmemleak: Automatic memory scanning thread ended\n");
+
+	return 0;
+}
+
+/*
+ * Start the automatic memory scanning thread. This function must be called
+ * with the kmemleak_mutex held.
+ */
+void start_scan_thread(void)
+{
+	if (scan_thread)
+		return;
+	scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak");
+	if (IS_ERR(scan_thread)) {
+		pr_warning("kmemleak: Failed to create the scan thread\n");
+		scan_thread = NULL;
+	}
+}
+
+/*
+ * Stop the automatic memory scanning thread. This function must be called
+ * with the kmemleak_mutex held.
+ */
+void stop_scan_thread(void)
+{
+	if (scan_thread) {
+		kthread_stop(scan_thread);
+		scan_thread = NULL;
+	}
+}
+
+/*
+ * Iterate over the object_list and return the first valid object at or after
+ * the required position with its use_count incremented. The function triggers
+ * a memory scanning when the pos argument points to the first position.
+ */
+static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct kmemleak_object *object;
+	loff_t n = *pos;
+
+	if (!n) {
+		kmemleak_scan();
+		reported_leaks = 0;
+	}
+	if (reported_leaks >= REPORTS_NR)
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		if (n-- > 0)
+			continue;
+		if (get_object(object))
+			goto out;
+	}
+	object = NULL;
+out:
+	rcu_read_unlock();
+	return object;
+}
+
+/*
+ * Return the next object in the object_list. The function decrements the
+ * use_count of the previous object and increases that of the next one.
+ */
+static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct kmemleak_object *prev_obj = v;
+	struct kmemleak_object *next_obj = NULL;
+	struct list_head *n = &prev_obj->object_list;
+
+	++(*pos);
+	if (reported_leaks >= REPORTS_NR)
+		goto out;
+
+	rcu_read_lock();
+	list_for_each_continue_rcu(n, &object_list) {
+		next_obj = list_entry(n, struct kmemleak_object, object_list);
+		if (get_object(next_obj))
+			break;
+	}
+	rcu_read_unlock();
+out:
+	put_object(prev_obj);
+	return next_obj;
+}
+
+/*
+ * Decrement the use_count of the last object required, if any.
+ */
+static void kmemleak_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v)
+		put_object(v);
+}
+
+/*
+ * Print the information for an unreferenced object to the seq file.
+ */
+static int kmemleak_seq_show(struct seq_file *seq, void *v)
+{
+	struct kmemleak_object *object = v;
+	unsigned long flags;
+
+	spin_lock_irqsave(&object->lock, flags);
+	if (!unreferenced_object(object))
+		goto out;
+	print_unreferenced(seq, object);
+	reported_leaks++;
+out:
+	spin_unlock_irqrestore(&object->lock, flags);
+	return 0;
+}
+
+static const struct seq_operations kmemleak_seq_ops = {
+	.start = kmemleak_seq_start,
+	.next  = kmemleak_seq_next,
+	.stop  = kmemleak_seq_stop,
+	.show  = kmemleak_seq_show,
+};
+
+static int kmemleak_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (!atomic_read(&kmemleak_enabled))
+		return -EBUSY;
+
+	ret = mutex_lock_interruptible(&kmemleak_mutex);
+	if (ret < 0)
+		goto out;
+	if (file->f_mode & FMODE_READ) {
+		ret = mutex_lock_interruptible(&scan_mutex);
+		if (ret < 0)
+			goto kmemleak_unlock;
+		ret = seq_open(file, &kmemleak_seq_ops);
+		if (ret < 0)
+			goto scan_unlock;
+	}
+	return ret;
+
+scan_unlock:
+	mutex_unlock(&scan_mutex);
+kmemleak_unlock:
+	mutex_unlock(&kmemleak_mutex);
+out:
+	return ret;
+}
+
+static int kmemleak_release(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		seq_release(inode, file);
+		mutex_unlock(&scan_mutex);
+	}
+	mutex_unlock(&kmemleak_mutex);
+
+	return ret;
+}
+
+/*
+ * File write operation to configure kmemleak at run-time. The following
+ * commands can be written to the /sys/kernel/debug/kmemleak file:
+ *   off	- disable kmemleak (irreversible)
+ *   stack=on	- enable the task stacks scanning
+ *   stack=off	- disable the tasks stacks scanning
+ *   scan=on	- start the automatic memory scanning thread
+ *   scan=off	- stop the automatic memory scanning thread
+ *   scan=...	- set the automatic memory scanning period in seconds (0 to
+ *		  disable it)
+ */
+static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
+			      size_t size, loff_t *ppos)
+{
+	char buf[64];
+	int buf_size;
+
+	if (!atomic_read(&kmemleak_enabled))
+		return -EBUSY;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (strncpy_from_user(buf, user_buf, buf_size) < 0)
+		return -EFAULT;
+	buf[buf_size] = 0;
+
+	if (strncmp(buf, "off", 3) == 0)
+		kmemleak_disable();
+	else if (strncmp(buf, "stack=on", 8) == 0)
+		kmemleak_stack_scan = 1;
+	else if (strncmp(buf, "stack=off", 9) == 0)
+		kmemleak_stack_scan = 0;
+	else if (strncmp(buf, "scan=on", 7) == 0)
+		start_scan_thread();
+	else if (strncmp(buf, "scan=off", 8) == 0)
+		stop_scan_thread();
+	else if (strncmp(buf, "scan=", 5) == 0) {
+		unsigned long secs;
+		int err;
+
+		err = strict_strtoul(buf + 5, 0, &secs);
+		if (err < 0)
+			return err;
+		stop_scan_thread();
+		if (secs) {
+			jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
+			start_scan_thread();
+		}
+	} else
+		return -EINVAL;
+
+	/* ignore the rest of the buffer, only one command at a time */
+	*ppos += size;
+	return size;
+}
+
+static const struct file_operations kmemleak_fops = {
+	.owner		= THIS_MODULE,
+	.open		= kmemleak_open,
+	.read		= seq_read,
+	.write		= kmemleak_write,
+	.llseek		= seq_lseek,
+	.release	= kmemleak_release,
+};
+
+/*
+ * Perform the freeing of the kmemleak internal objects after waiting for any
+ * current memory scan to complete.
+ */
+static int kmemleak_cleanup_thread(void *arg)
+{
+	struct kmemleak_object *object;
+
+	mutex_lock(&kmemleak_mutex);
+	stop_scan_thread();
+	mutex_unlock(&kmemleak_mutex);
+
+	mutex_lock(&scan_mutex);
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list)
+		delete_object(object->pointer);
+	rcu_read_unlock();
+	mutex_unlock(&scan_mutex);
+
+	return 0;
+}
+
+/*
+ * Start the clean-up thread.
+ */
+static void kmemleak_cleanup(void)
+{
+	struct task_struct *cleanup_thread;
+
+	cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
+				     "kmemleak-clean");
+	if (IS_ERR(cleanup_thread))
+		pr_warning("kmemleak: Failed to create the clean-up thread\n");
+}
+
+/*
+ * Disable kmemleak. No memory allocation/freeing will be traced once this
+ * function is called. Disabling kmemleak is an irreversible operation.
+ */
+static void kmemleak_disable(void)
+{
+	/* atomically check whether it was already invoked */
+	if (atomic_cmpxchg(&kmemleak_error, 0, 1))
+		return;
+
+	/* stop any memory operation tracing */
+	atomic_set(&kmemleak_early_log, 0);
+	atomic_set(&kmemleak_enabled, 0);
+
+	/* check whether it is too early for a kernel thread */
+	if (atomic_read(&kmemleak_initialized))
+		kmemleak_cleanup();
+
+	pr_info("Kernel memory leak detector disabled\n");
+}
+
+/*
+ * Allow boot-time kmemleak disabling (enabled by default).
+ */
+static int kmemleak_boot_config(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (strcmp(str, "off") == 0)
+		kmemleak_disable();
+	else if (strcmp(str, "on") != 0)
+		return -EINVAL;
+	return 0;
+}
+early_param("kmemleak", kmemleak_boot_config);
+
+/*
+ * Kkmemleak initialization.
+ */
+void __init kmemleak_init(void)
+{
+	int i;
+	unsigned long flags;
+
+	jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD);
+	jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
+	jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
+
+	object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
+	scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
+	INIT_PRIO_TREE_ROOT(&object_tree_root);
+
+	/* the kernel is still in UP mode, so disabling the IRQs is enough */
+	local_irq_save(flags);
+	if (!atomic_read(&kmemleak_error)) {
+		atomic_set(&kmemleak_enabled, 1);
+		atomic_set(&kmemleak_early_log, 0);
+	}
+	local_irq_restore(flags);
+
+	/*
+	 * This is the point where tracking allocations is safe. Automatic
+	 * scanning is started during the late initcall. Add the early logged
+	 * callbacks to the kmemleak infrastructure.
+	 */
+	for (i = 0; i < crt_early_log; i++) {
+		struct early_log *log = &early_log[i];
+
+		switch (log->op_type) {
+		case KMEMLEAK_ALLOC:
+			kmemleak_alloc(log->ptr, log->size, log->min_count,
+				       GFP_KERNEL);
+			break;
+		case KMEMLEAK_FREE:
+			kmemleak_free(log->ptr);
+			break;
+		case KMEMLEAK_NOT_LEAK:
+			kmemleak_not_leak(log->ptr);
+			break;
+		case KMEMLEAK_IGNORE:
+			kmemleak_ignore(log->ptr);
+			break;
+		case KMEMLEAK_SCAN_AREA:
+			kmemleak_scan_area(log->ptr, log->offset, log->length,
+					   GFP_KERNEL);
+			break;
+		case KMEMLEAK_NO_SCAN:
+			kmemleak_no_scan(log->ptr);
+			break;
+		default:
+			WARN_ON(1);
+		}
+	}
+}
+
+/*
+ * Late initialization function.
+ */
+static int __init kmemleak_late_init(void)
+{
+	struct dentry *dentry;
+
+	atomic_set(&kmemleak_initialized, 1);
+
+	if (atomic_read(&kmemleak_error)) {
+		/*
+		 * Some error occured and kmemleak was disabled. There is a
+		 * small chance that kmemleak_disable() was called immediately
+		 * after setting kmemleak_initialized and we may end up with
+		 * two clean-up threads but serialized by scan_mutex.
+		 */
+		kmemleak_cleanup();
+		return -ENOMEM;
+	}
+
+	dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL,
+				     &kmemleak_fops);
+	if (!dentry)
+		pr_warning("kmemleak: Failed to create the debugfs kmemleak "
+			   "file\n");
+	mutex_lock(&kmemleak_mutex);
+	start_scan_thread();
+	mutex_unlock(&kmemleak_mutex);
+
+	pr_info("Kernel memory leak detector initialized\n");
+
+	return 0;
+}
+late_initcall(kmemleak_late_init);
-- 
cgit v1.2.3-70-g09d2


From d5cff635290aec9ad7e6ee546aa4fae895361cbb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:22:40 +0100
Subject: kmemleak: Add the slab memory allocation/freeing hooks

This patch adds the callbacks to kmemleak_(alloc|free) functions from
the slab allocator. The patch also adds the SLAB_NOLEAKTRACE flag to
avoid recursive calls to kmemleak when it allocates its own data
structures.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h |  2 ++
 mm/slab.c            | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 24c5602bee9..48803064ced 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -62,6 +62,8 @@
 # define SLAB_DEBUG_OBJECTS	0x00000000UL
 #endif
 
+#define SLAB_NOLEAKTRACE	0x00800000UL	/* Avoid kmemleak tracing */
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
diff --git a/mm/slab.c b/mm/slab.c
index f85831da908..859067f8e4f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -107,6 +107,7 @@
 #include	<linux/string.h>
 #include	<linux/uaccess.h>
 #include	<linux/nodemask.h>
+#include	<linux/kmemleak.h>
 #include	<linux/mempolicy.h>
 #include	<linux/mutex.h>
 #include	<linux/fault-inject.h>
@@ -178,13 +179,13 @@
 			 SLAB_STORE_USER | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS)
+			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE)
 #else
 # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
 			 SLAB_CACHE_DMA | \
 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-			 SLAB_DEBUG_OBJECTS)
+			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE)
 #endif
 
 /*
@@ -964,6 +965,14 @@ static struct array_cache *alloc_arraycache(int node, int entries,
 	struct array_cache *nc = NULL;
 
 	nc = kmalloc_node(memsize, GFP_KERNEL, node);
+	/*
+	 * The array_cache structures contain pointers to free object.
+	 * However, when such objects are allocated or transfered to another
+	 * cache the pointers are not cleared and they could be counted as
+	 * valid references during a kmemleak scan. Therefore, kmemleak must
+	 * not scan such objects.
+	 */
+	kmemleak_no_scan(nc);
 	if (nc) {
 		nc->avail = 0;
 		nc->limit = entries;
@@ -2621,6 +2630,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 		/* Slab management obj is off-slab. */
 		slabp = kmem_cache_alloc_node(cachep->slabp_cache,
 					      local_flags, nodeid);
+		/*
+		 * If the first object in the slab is leaked (it's allocated
+		 * but no one has a reference to it), we want to make sure
+		 * kmemleak does not treat the ->s_mem pointer as a reference
+		 * to the object. Otherwise we will not report the leak.
+		 */
+		kmemleak_scan_area(slabp, offsetof(struct slab, list),
+				   sizeof(struct list_head), local_flags);
 		if (!slabp)
 			return NULL;
 	} else {
@@ -3141,6 +3158,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 		STATS_INC_ALLOCMISS(cachep);
 		objp = cache_alloc_refill(cachep, flags);
 	}
+	/*
+	 * To avoid a false negative, if an object that is in one of the
+	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
+	 * treat the array pointers as a reference to the object.
+	 */
+	kmemleak_erase(&ac->entry[ac->avail]);
 	return objp;
 }
 
@@ -3360,6 +3383,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
   out:
 	local_irq_restore(save_flags);
 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+	kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+				 flags);
 
 	if (unlikely((flags & __GFP_ZERO) && ptr))
 		memset(ptr, 0, obj_size(cachep));
@@ -3415,6 +3440,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	objp = __do_cache_alloc(cachep, flags);
 	local_irq_restore(save_flags);
 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+	kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
+				 flags);
 	prefetchw(objp);
 
 	if (unlikely((flags & __GFP_ZERO) && objp))
@@ -3530,6 +3557,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
 	struct array_cache *ac = cpu_cache_get(cachep);
 
 	check_irq_off();
+	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 2e1483c995bbd0fa6cbd055ad76088a520799ba4 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 11 Jun 2009 13:24:13 +0100
Subject: kmemleak: Remove some of the kmemleak false positives

There are allocations for which the main pointer cannot be found but
they are not memory leaks. This patch fixes some of them. For more
information on false positives, see Documentation/kmemleak.txt.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/char/vt.c      | 7 +++++++
 fs/block_dev.c         | 6 ++++++
 include/linux/percpu.h | 5 +++++
 3 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 08151d4de48..961c1a788c6 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -104,6 +104,7 @@
 #include <linux/io.h>
 #include <asm/system.h>
 #include <linux/uaccess.h>
+#include <linux/kmemleak.h>
 
 #define MAX_NR_CON_DRIVER 16
 
@@ -2880,6 +2881,12 @@ static int __init con_init(void)
 	 */
 	for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
 		vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data));
+		/*
+		 * Kmemleak does not track the memory allocated via
+		 * alloc_bootmem() but this block contains pointers to
+		 * other blocks allocated via kmalloc.
+		 */
+		kmemleak_alloc(vc, sizeof(struct vc_data), 1, GFP_ATOMIC);
 		INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
 		visual_init(vc, currcons, 1);
 		vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f45dbc18dd1..d250f807fd8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -25,6 +25,7 @@
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <linux/log2.h>
+#include <linux/kmemleak.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -492,6 +493,11 @@ void __init bdev_cache_init(void)
 	bd_mnt = kern_mount(&bd_type);
 	if (IS_ERR(bd_mnt))
 		panic("Cannot create bdev pseudo-fs");
+	/*
+	 * This vfsmount structure is only used to obtain the
+	 * blockdev_superblock, so tell kmemleak not to report it.
+	 */
+	kmemleak_not_leak(bd_mnt);
 	blockdev_superblock = bd_mnt->mnt_sb;	/* For writeback */
 }
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1581ff235c7..26fd9d12f05 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -86,7 +86,12 @@ struct percpu_data {
 	void *ptrs[1];
 };
 
+/* pointer disguising messes up the kmemleak objects tracking */
+#ifndef CONFIG_DEBUG_KMEMLEAK
 #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
+#else
+#define __percpu_disguise(pdata) (struct percpu_data *)(pdata)
+#endif
 
 #define per_cpu_ptr(ptr, cpu)						\
 ({									\
-- 
cgit v1.2.3-70-g09d2


From 38c7fed2f5ffee17e1fa3e0f78b0e1bf43d52d13 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 25 May 2009 15:10:58 +0300
Subject: x86: remove some alloc_bootmem_cpumask_var calling

Now that we set up the slab allocator earlier, we can get rid of some
alloc_bootmem_cpumask_var() calls in boot code.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 arch/x86/kernel/apic/io_apic.c |  4 ++--
 include/linux/irq.h            | 18 +++++++-----------
 kernel/cpuset.c                |  2 +-
 kernel/profile.c               |  6 ------
 lib/cpumask.c                  | 11 ++---------
 5 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1946fac42ab..139201a562b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -185,8 +185,8 @@ int __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_bootmem_cpumask_var(&cfg[i].domain);
-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		alloc_cpumask_var(&cfg[i].domain, GFP_NOWAIT);
+		alloc_cpumask_var(&cfg[i].old_domain, GFP_NOWAIT);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index eedbb8e5e0c..1e50c34f006 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -430,23 +430,19 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
  * Returns true if successful (or not required).
  */
 static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
-								bool boot)
+							bool boot)
 {
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	if (boot) {
-		alloc_bootmem_cpumask_var(&desc->affinity);
+	gfp_t gfp = GFP_ATOMIC;
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-		alloc_bootmem_cpumask_var(&desc->pending_mask);
-#endif
-		return true;
-	}
+	if (boot)
+		gfp = GFP_NOWAIT;
 
-	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var_node(&desc->affinity, gfp, node))
 		return false;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
+	if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
 		free_cpumask_var(desc->affinity);
 		return false;
 	}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 026faccca86..d5a7e17474e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = {
 
 int __init cpuset_init_early(void)
 {
-	alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
+	alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
 
 	top_cpuset.mems_generation = cpuset_mems_generation++;
 	return 0;
diff --git a/kernel/profile.c b/kernel/profile.c
index 7724e0409ba..28cf26ad2d2 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -111,12 +111,6 @@ int __ref profile_init(void)
 	/* only text is profiled */
 	prof_len = (_etext - _stext) >> prof_shift;
 	buffer_bytes = prof_len*sizeof(atomic_t);
-	if (!slab_is_available()) {
-		prof_buffer = alloc_bootmem(buffer_bytes);
-		alloc_bootmem_cpumask_var(&prof_cpu_mask);
-		cpumask_copy(prof_cpu_mask, cpu_possible_mask);
-		return 0;
-	}
 
 	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
 		return -ENOMEM;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index eb23aaa0c7b..7bb4142a502 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -92,15 +92,8 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
  */
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
-	if (likely(slab_is_available()))
-		*mask = kmalloc_node(cpumask_size(), flags, node);
-	else {
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-		printk(KERN_ERR
-			"=> alloc_cpumask_var: kmalloc not available!\n");
-#endif
-		*mask = NULL;
-	}
+	*mask = kmalloc_node(cpumask_size(), flags, node);
+
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 	if (!*mask) {
 		printk(KERN_ERR "=> alloc_cpumask_var: failed!\n");
-- 
cgit v1.2.3-70-g09d2


From 670025478c2a687453cd1bac697d7d765843f59d Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Thu, 11 Jun 2009 15:52:10 +0200
Subject: mISDN: cleanup mISDNhw.h

Remove unused stuff.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
---
 include/linux/mISDNhw.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index ce900f4c245..7f9831da847 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -89,11 +89,6 @@ struct dchannel {
 	void			(*phfunc) (struct dchannel *);
 	u_int			state;
 	void			*l1;
-	/* HW access */
-	u_char			(*read_reg) (void *, u_char);
-	void			(*write_reg) (void *, u_char, u_char);
-	void			(*read_fifo) (void *, u_char *, int);
-	void			(*write_fifo) (void *, u_char *, int);
 	void			*hw;
 	int			slot;	/* multiport card channel slot */
 	struct timer_list	timer;
@@ -151,11 +146,6 @@ struct bchannel {
 	u_long			Flags;
 	struct work_struct	workq;
 	u_int			state;
-	/* HW access */
-	u_char			(*read_reg) (void *, u_char);
-	void			(*write_reg) (void *, u_char, u_char);
-	void			(*read_fifo) (void *, u_char *, int);
-	void			(*write_fifo) (void *, u_char *, int);
 	void			*hw;
 	int			slot;	/* multiport card channel slot */
 	struct timer_list	timer;
-- 
cgit v1.2.3-70-g09d2


From 90586523eb4b349806887c62ee70685a49415124 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:20 -0400
Subject: fsnotify: unified filesystem notification backend

fsnotify is a backend for filesystem notification.  fsnotify does
not provide any userspace interface but does provide the basis
needed for other notification schemes such as dnotify.  fsnotify
can be extended to be the backend for inotify or the upcoming
fanotify.  fsnotify provides a mechanism for "groups" to register for
some set of filesystem events and to then deliver those events to
those groups for processing.

fsnotify has a number of benefits, the first being actually shrinking the size
of an inode.  Before fsnotify to support both dnotify and inotify an inode had

        unsigned long           i_dnotify_mask; /* Directory notify events */
        struct dnotify_struct   *i_dnotify; /* for directory notifications */
        struct list_head        inotify_watches; /* watches on this inode */
        struct mutex            inotify_mutex;  /* protects the watches list

But with fsnotify this same functionallity (and more) is done with just

        __u32                   i_fsnotify_mask; /* all events for this inode */
        struct hlist_head       i_fsnotify_mark_entries; /* marks on this inode */

That's right, inotify, dnotify, and fanotify all in 64 bits.  We used that
much space just in inotify_watches alone, before this patch set.

fsnotify object lifetime and locking is MUCH better than what we have today.
inotify locking is incredibly complex.  See 8f7b0ba1c8539 as an example of
what's been busted since inception.  inotify needs to know internal semantics
of superblock destruction and unmounting to function.  The inode pinning and
vfs contortions are horrible.

no fsnotify implementers do allocation under locks.  This means things like
f04b30de3 which (due to an overabundance of caution) changes GFP_KERNEL to
GFP_NOFS can be reverted.  There are no longer any allocation rules when using
or implementing your own fsnotify listener.

fsnotify paves the way for fanotify.  In brief fanotify is a notification
mechanism that delivers the lisener both an 'event' and an open file descriptor
to the object in question.  This means that fanotify is pathname agnostic.
Some on lkml may not care for the original companies or users that pushed for
TALPA, but fanotify was designed with flexibility and input for other users in
mind.  The readahead group expressed interest in fanotify as it could be used
to profile disk access on boot without breaking the audit system.  The desktop
search groups have also expressed interest in fanotify as it solves a number
of the race conditions and problems present with managing inotify when more
than a limited number of specific files are of interest.  fanotify can provide
for a userspace access control system which makes it a clean interface for AV
vendors to hook without trying to do binary patching on the syscall table,
LSM, and everywhere else they do their things today.  With this patch series
fanotify can be implemented in less than 1200 lines of easy to review code.
Almost all of which is the socket based user interface.

This patch series builds fsnotify to the point that it can implement
dnotify and inotify_user.  Patches exist and will be sent soon after
acceptance to finish the in kernel inotify conversion (audit) and implement
fanotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/notify/Kconfig                |  13 +++
 fs/notify/Makefile               |   2 +
 fs/notify/fsnotify.c             |  79 ++++++++++++++++
 fs/notify/fsnotify.h             |  15 +++
 fs/notify/group.c                | 198 +++++++++++++++++++++++++++++++++++++++
 fs/notify/inotify/inotify.c      |  20 ++++
 fs/notify/notification.c         | 121 ++++++++++++++++++++++++
 include/linux/fsnotify.h         | 115 ++++++++++++++++-------
 include/linux/fsnotify_backend.h | 177 ++++++++++++++++++++++++++++++++++
 9 files changed, 705 insertions(+), 35 deletions(-)
 create mode 100644 fs/notify/fsnotify.c
 create mode 100644 fs/notify/fsnotify.h
 create mode 100644 fs/notify/group.c
 create mode 100644 fs/notify/notification.c
 create mode 100644 include/linux/fsnotify_backend.h

(limited to 'include')

diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 50914d7303c..31dac7e3b0f 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,2 +1,15 @@
+config FSNOTIFY
+	bool "Filesystem notification backend"
+	default y
+	---help---
+	   fsnotify is a backend for filesystem notification.  fsnotify does
+	   not provide any userspace interface but does provide the basis
+	   needed for other notification schemes such as dnotify, inotify,
+	   and fanotify.
+
+	   Say Y here to enable fsnotify suport.
+
+	   If unsure, say Y.
+
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 5a95b6010ce..db5467b5b58 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,2 +1,4 @@
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o
+
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
new file mode 100644
index 00000000000..56bee0f10c3
--- /dev/null
+++ b/fs/notify/fsnotify.c
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/srcu.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+/*
+ * This is the main call to fsnotify.  The VFS calls into hook specific functions
+ * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
+ * out to all of the registered fsnotify_group.  Those groups can then use the
+ * notification event in whatever means they feel necessary.
+ */
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event *event = NULL;
+	int idx;
+
+	if (list_empty(&fsnotify_groups))
+		return;
+
+	if (!(mask & fsnotify_mask))
+		return;
+
+	/*
+	 * SRCU!!  the groups list is very very much read only and the path is
+	 * very hot.  The VAST majority of events are not going to need to do
+	 * anything other than walk the list so it's crazy to pre-allocate.
+	 */
+	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
+		if (mask & group->mask) {
+			if (!event) {
+				event = fsnotify_create_event(to_tell, mask, data, data_is);
+				/* shit, we OOM'd and now we can't tell, maybe
+				 * someday someone else will want to do something
+				 * here */
+				if (!event)
+					break;
+			}
+			group->ops->handle_event(group, event);
+		}
+	}
+	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	/*
+	 * fsnotify_create_event() took a reference so the event can't be cleaned
+	 * up while we are still trying to add it to lists, drop that one.
+	 */
+	if (event)
+		fsnotify_put_event(event);
+}
+EXPORT_SYMBOL_GPL(fsnotify);
+
+static __init int fsnotify_init(void)
+{
+	return init_srcu_struct(&fsnotify_grp_srcu);
+}
+subsys_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
new file mode 100644
index 00000000000..c6a8bd47657
--- /dev/null
+++ b/fs/notify/fsnotify.h
@@ -0,0 +1,15 @@
+#ifndef __FS_NOTIFY_FSNOTIFY_H_
+#define __FS_NOTIFY_FSNOTIFY_H_
+
+#include <linux/list.h>
+#include <linux/fsnotify.h>
+#include <linux/srcu.h>
+#include <linux/types.h>
+
+/* protects reads of fsnotify_groups */
+extern struct srcu_struct fsnotify_grp_srcu;
+/* all groups which receive fsnotify events */
+extern struct list_head fsnotify_groups;
+/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
+extern __u32 fsnotify_mask;
+#endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
new file mode 100644
index 00000000000..c6812953b96
--- /dev/null
+++ b/fs/notify/group.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/srcu.h>
+#include <linux/rculist.h>
+#include <linux/wait.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+#include <asm/atomic.h>
+
+/* protects writes to fsnotify_groups and fsnotify_mask */
+static DEFINE_MUTEX(fsnotify_grp_mutex);
+/* protects reads while running the fsnotify_groups list */
+struct srcu_struct fsnotify_grp_srcu;
+/* all groups registered to receive filesystem notifications */
+LIST_HEAD(fsnotify_groups);
+/* bitwise OR of all events (FS_*) interesting to some group on this system */
+__u32 fsnotify_mask;
+
+/*
+ * When a new group registers or changes it's set of interesting events
+ * this function updates the fsnotify_mask to contain all interesting events
+ */
+void fsnotify_recalc_global_mask(void)
+{
+	struct fsnotify_group *group;
+	__u32 mask = 0;
+	int idx;
+
+	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
+		mask |= group->mask;
+	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	fsnotify_mask = mask;
+}
+
+/*
+ * Take a reference to a group so things found under the fsnotify_grp_mutex
+ * can't get freed under us
+ */
+static void fsnotify_get_group(struct fsnotify_group *group)
+{
+	atomic_inc(&group->refcnt);
+}
+
+/*
+ * Final freeing of a group
+ */
+static void fsnotify_destroy_group(struct fsnotify_group *group)
+{
+	if (group->ops->free_group_priv)
+		group->ops->free_group_priv(group);
+
+	kfree(group);
+}
+
+/*
+ * Remove this group from the global list of groups that will get events
+ * this can be done even if there are still references and things still using
+ * this group.  This just stops the group from getting new events.
+ */
+static void __fsnotify_evict_group(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	if (group->on_group_list)
+		list_del_rcu(&group->group_list);
+	group->on_group_list = 0;
+}
+
+/*
+ * Called when a group is no longer interested in getting events.  This can be
+ * used if a group is misbehaving or if for some reason a group should no longer
+ * get any filesystem events.
+ */
+void fsnotify_evict_group(struct fsnotify_group *group)
+{
+	mutex_lock(&fsnotify_grp_mutex);
+	__fsnotify_evict_group(group);
+	mutex_unlock(&fsnotify_grp_mutex);
+}
+
+/*
+ * Drop a reference to a group.  Free it if it's through.
+ */
+void fsnotify_put_group(struct fsnotify_group *group)
+{
+	if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
+		return;
+
+	/*
+	 * OK, now we know that there's no other users *and* we hold mutex,
+	 * so no new references will appear
+	 */
+	__fsnotify_evict_group(group);
+
+	/*
+	 * now it's off the list, so the only thing we might care about is
+	 * srcu access....
+	 */
+	mutex_unlock(&fsnotify_grp_mutex);
+	synchronize_srcu(&fsnotify_grp_srcu);
+
+	/* and now it is really dead. _Nothing_ could be seeing it */
+	fsnotify_recalc_global_mask();
+	fsnotify_destroy_group(group);
+}
+
+/*
+ * Simply run the fsnotify_groups list and find a group which matches
+ * the given parameters.  If a group is found we take a reference to that
+ * group.
+ */
+static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
+						  const struct fsnotify_ops *ops)
+{
+	struct fsnotify_group *group_iter;
+	struct fsnotify_group *group = NULL;
+
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
+		if (group_iter->group_num == group_num) {
+			if ((group_iter->mask == mask) &&
+			    (group_iter->ops == ops)) {
+				fsnotify_get_group(group_iter);
+				group = group_iter;
+			} else
+				group = ERR_PTR(-EEXIST);
+		}
+	}
+	return group;
+}
+
+/*
+ * Either finds an existing group which matches the group_num, mask, and ops or
+ * creates a new group and adds it to the global group list.  In either case we
+ * take a reference for the group returned.
+ */
+struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
+					     const struct fsnotify_ops *ops)
+{
+	struct fsnotify_group *group, *tgroup;
+
+	/* very low use, simpler locking if we just always alloc */
+	group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+
+	atomic_set(&group->refcnt, 1);
+
+	group->on_group_list = 0;
+	group->group_num = group_num;
+	group->mask = mask;
+
+	group->ops = ops;
+
+	mutex_lock(&fsnotify_grp_mutex);
+	tgroup = fsnotify_find_group(group_num, mask, ops);
+	if (tgroup) {
+		/* group already exists */
+		mutex_unlock(&fsnotify_grp_mutex);
+		/* destroy the new one we made */
+		fsnotify_put_group(group);
+		return tgroup;
+	}
+
+	/* group not found, add a new one */
+	list_add_rcu(&group->group_list, &fsnotify_groups);
+	group->on_group_list = 1;
+
+	mutex_unlock(&fsnotify_grp_mutex);
+
+	if (mask)
+		fsnotify_recalc_global_mask();
+
+	return group;
+}
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 220c13f0d73..40b1cf914cc 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 
 static atomic_t inotify_cookie;
 
@@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch);
  */
 static int __init inotify_setup(void)
 {
+	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
+	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
+	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
+
+	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
+	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+
 	atomic_set(&inotify_cookie, 0);
 
 	return 0;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
new file mode 100644
index 00000000000..b8e9a87f8f5
--- /dev/null
+++ b/fs/notify/notification.c
@@ -0,0 +1,121 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+static struct kmem_cache *fsnotify_event_cachep;
+
+void fsnotify_get_event(struct fsnotify_event *event)
+{
+	atomic_inc(&event->refcnt);
+}
+
+void fsnotify_put_event(struct fsnotify_event *event)
+{
+	if (!event)
+		return;
+
+	if (atomic_dec_and_test(&event->refcnt)) {
+		if (event->data_type == FSNOTIFY_EVENT_PATH)
+			path_put(&event->path);
+
+		kmem_cache_free(fsnotify_event_cachep, event);
+	}
+}
+
+/*
+ * Allocate a new event which will be sent to each group's handle_event function
+ * if the group was interested in this particular event.
+ */
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+					     void *data, int data_type)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	atomic_set(&event->refcnt, 1);
+
+	spin_lock_init(&event->lock);
+
+	event->path.dentry = NULL;
+	event->path.mnt = NULL;
+	event->inode = NULL;
+
+	event->to_tell = to_tell;
+
+	switch (data_type) {
+	case FSNOTIFY_EVENT_FILE: {
+		struct file *file = data;
+		struct path *path = &file->f_path;
+		event->path.dentry = path->dentry;
+		event->path.mnt = path->mnt;
+		path_get(&event->path);
+		event->data_type = FSNOTIFY_EVENT_PATH;
+		break;
+	}
+	case FSNOTIFY_EVENT_PATH: {
+		struct path *path = data;
+		event->path.dentry = path->dentry;
+		event->path.mnt = path->mnt;
+		path_get(&event->path);
+		event->data_type = FSNOTIFY_EVENT_PATH;
+		break;
+	}
+	case FSNOTIFY_EVENT_INODE:
+		event->inode = data;
+		event->data_type = FSNOTIFY_EVENT_INODE;
+		break;
+	case FSNOTIFY_EVENT_NONE:
+		event->inode = NULL;
+		event->path.dentry = NULL;
+		event->path.mnt = NULL;
+		break;
+	default:
+		BUG();
+	}
+
+	event->mask = mask;
+
+	return event;
+}
+
+__init int fsnotify_notification_init(void)
+{
+	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
+
+	return 0;
+}
+subsys_initcall(fsnotify_notification_init);
+
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 00fbd5b245c..6c9ebefdac8 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -13,6 +13,7 @@
 
 #include <linux/dnotify.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify_backend.h>
 #include <linux/audit.h>
 
 /*
@@ -34,6 +35,16 @@ static inline void fsnotify_d_move(struct dentry *entry)
 	inotify_d_move(entry);
 }
 
+/*
+ * fsnotify_link_count - inode's link count changed
+ */
+static inline void fsnotify_link_count(struct inode *inode)
+{
+	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE);
+}
+
 /*
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
@@ -43,28 +54,47 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 {
 	struct inode *source = moved->d_inode;
 	u32 cookie = inotify_get_cookie();
+	__u32 old_dir_mask = 0;
+	__u32 new_dir_mask = 0;
 
-	if (old_dir == new_dir)
+	if (old_dir == new_dir) {
 		inode_dir_notify(old_dir, DN_RENAME);
-	else {
+		old_dir_mask = FS_DN_RENAME;
+	} else {
 		inode_dir_notify(old_dir, DN_DELETE);
+		old_dir_mask = FS_DELETE;
 		inode_dir_notify(new_dir, DN_CREATE);
+		new_dir_mask = FS_CREATE;
 	}
 
-	if (isdir)
+	if (isdir) {
 		isdir = IN_ISDIR;
+		old_dir_mask |= FS_IN_ISDIR;
+		new_dir_mask |= FS_IN_ISDIR;
+	}
+
+	old_dir_mask |= FS_MOVED_FROM;
+	new_dir_mask |= FS_MOVED_TO;
+
 	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name,
 				  source);
 	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
 				  source);
 
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE);
+
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
 		inotify_inode_is_dead(target);
+
+		/* this is really a link_count change not a removal */
+		fsnotify_link_count(target);
 	}
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
@@ -74,10 +104,12 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
  */
 static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 {
+	__u32 mask = FS_DELETE;
+
 	if (isdir)
-		isdir = IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 	dnotify_parent(dentry, DN_DELETE);
-	inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name);
+	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 }
 
 /*
@@ -87,14 +119,8 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 {
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
-}
 
-/*
- * fsnotify_link_count - inode's link count changed
- */
-static inline void fsnotify_link_count(struct inode *inode)
-{
-	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -106,6 +132,8 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
+
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -120,6 +148,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 				  inode);
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -127,10 +157,14 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
  */
 static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
+	__u32 mask = (FS_CREATE | FS_IN_ISDIR);
+	struct inode *d_inode = dentry->d_inode;
+
 	inode_dir_notify(inode, DN_CREATE);
-	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 
-				  dentry->d_name.name, dentry->d_inode);
+	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
+
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -139,14 +173,16 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 static inline void fsnotify_access(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_ACCESS;
+	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_ACCESS);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -155,14 +191,16 @@ static inline void fsnotify_access(struct dentry *dentry)
 static inline void fsnotify_modify(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_MODIFY;
+	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_MODIFY);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -171,13 +209,15 @@ static inline void fsnotify_modify(struct dentry *dentry)
 static inline void fsnotify_open(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_OPEN;
+	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -189,13 +229,15 @@ static inline void fsnotify_close(struct file *file)
 	struct inode *inode = dentry->d_inode;
 	const char *name = dentry->d_name.name;
 	fmode_t mode = file->f_mode;
-	u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE;
+	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE);
 }
 
 /*
@@ -204,13 +246,15 @@ static inline void fsnotify_close(struct file *file)
 static inline void fsnotify_xattr(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
-	u32 mask = IN_ATTRIB;
+	__u32 mask = FS_ATTRIB;
 
 	if (S_ISDIR(inode->i_mode))
-		mask |= IN_ISDIR;
+		mask |= FS_IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
 /*
@@ -221,34 +265,34 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 {
 	struct inode *inode = dentry->d_inode;
 	int dn_mask = 0;
-	u32 in_mask = 0;
+	__u32 in_mask = 0;
 
 	if (ia_valid & ATTR_UID) {
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	}
 	if (ia_valid & ATTR_GID) {
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	}
 	if (ia_valid & ATTR_SIZE) {
-		in_mask |= IN_MODIFY;
+		in_mask |= FS_MODIFY;
 		dn_mask |= DN_MODIFY;
 	}
 	/* both times implies a utime(s) call */
 	if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
 	{
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	} else if (ia_valid & ATTR_ATIME) {
-		in_mask |= IN_ACCESS;
+		in_mask |= FS_ACCESS;
 		dn_mask |= DN_ACCESS;
 	} else if (ia_valid & ATTR_MTIME) {
-		in_mask |= IN_MODIFY;
+		in_mask |= FS_MODIFY;
 		dn_mask |= DN_MODIFY;
 	}
 	if (ia_valid & ATTR_MODE) {
-		in_mask |= IN_ATTRIB;
+		in_mask |= FS_ATTRIB;
 		dn_mask |= DN_ATTRIB;
 	}
 
@@ -256,14 +300,15 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		dnotify_parent(dentry, dn_mask);
 	if (in_mask) {
 		if (S_ISDIR(inode->i_mode))
-			in_mask |= IN_ISDIR;
+			in_mask |= FS_IN_ISDIR;
 		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
 		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
 						  dentry->d_name.name);
+		fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE);
 	}
 }
 
-#ifdef CONFIG_INOTIFY	/* inotify helpers */
+#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY)	/* notify helpers */
 
 /*
  * fsnotify_oldname_init - save off the old filename before we change it
@@ -281,7 +326,7 @@ static inline void fsnotify_oldname_free(const char *old_name)
 	kfree(old_name);
 }
 
-#else	/* CONFIG_INOTIFY */
+#else	/* CONFIG_INOTIFY || CONFIG_FSNOTIFY */
 
 static inline const char *fsnotify_oldname_init(const char *name)
 {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
new file mode 100644
index 00000000000..1a55718b38a
--- /dev/null
+++ b/include/linux/fsnotify_backend.h
@@ -0,0 +1,177 @@
+/*
+ * Filesystem access notification for Linux
+ *
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ */
+
+#ifndef __LINUX_FSNOTIFY_BACKEND_H
+#define __LINUX_FSNOTIFY_BACKEND_H
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h> /* struct inode */
+#include <linux/list.h>
+#include <linux/path.h> /* struct path */
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+
+/*
+ * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
+ * convert between them.  dnotify only needs conversion at watch creation
+ * so no perf loss there.  fanotify isn't defined yet, so it can use the
+ * wholes if it needs more events.
+ */
+#define FS_ACCESS		0x00000001	/* File was accessed */
+#define FS_MODIFY		0x00000002	/* File was modified */
+#define FS_ATTRIB		0x00000004	/* Metadata changed */
+#define FS_CLOSE_WRITE		0x00000008	/* Writtable file was closed */
+#define FS_CLOSE_NOWRITE	0x00000010	/* Unwrittable file closed */
+#define FS_OPEN			0x00000020	/* File was opened */
+#define FS_MOVED_FROM		0x00000040	/* File was moved from X */
+#define FS_MOVED_TO		0x00000080	/* File was moved to Y */
+#define FS_CREATE		0x00000100	/* Subfile was created */
+#define FS_DELETE		0x00000200	/* Subfile was deleted */
+#define FS_DELETE_SELF		0x00000400	/* Self was deleted */
+#define FS_MOVE_SELF		0x00000800	/* Self was moved */
+
+#define FS_UNMOUNT		0x00002000	/* inode on umount fs */
+#define FS_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
+#define FS_IN_IGNORED		0x00008000	/* last inotify event here */
+
+#define FS_IN_ISDIR		0x40000000	/* event occurred against dir */
+#define FS_IN_ONESHOT		0x80000000	/* only send event once */
+
+#define FS_DN_RENAME		0x10000000	/* file renamed */
+#define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
+
+struct fsnotify_group;
+struct fsnotify_event;
+
+/*
+ * Each group much define these ops.  The fsnotify infrastructure will call
+ * these operations for each relevant group.
+ *
+ * handle_event - main call for a group to handle an fs event
+ * free_group_priv - called when a group refcnt hits 0 to clean up the private union
+ */
+struct fsnotify_ops {
+	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
+	void (*free_group_priv)(struct fsnotify_group *group);
+};
+
+/*
+ * A group is a "thing" that wants to receive notification about filesystem
+ * events.  The mask holds the subset of event types this group cares about.
+ * refcnt on a group is up to the implementor and at any moment if it goes 0
+ * everything will be cleaned up.
+ */
+struct fsnotify_group {
+	/*
+	 * global list of all groups receiving events from fsnotify.
+	 * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex
+	 * or fsnotify_grp_srcu depending on write vs read.
+	 */
+	struct list_head group_list;
+
+	/*
+	 * Defines all of the event types in which this group is interested.
+	 * This mask is a bitwise OR of the FS_* events from above.  Each time
+	 * this mask changes for a group (if it changes) the correct functions
+	 * must be called to update the global structures which indicate global
+	 * interest in event types.
+	 */
+	__u32 mask;
+
+	/*
+	 * How the refcnt is used is up to each group.  When the refcnt hits 0
+	 * fsnotify will clean up all of the resources associated with this group.
+	 * As an example, the dnotify group will always have a refcnt=1 and that
+	 * will never change.  Inotify, on the other hand, has a group per
+	 * inotify_init() and the refcnt will hit 0 only when that fd has been
+	 * closed.
+	 */
+	atomic_t refcnt;		/* things with interest in this group */
+	unsigned int group_num;		/* simply prevents accidental group collision */
+
+	const struct fsnotify_ops *ops;	/* how this group handles things */
+
+	/* prevents double list_del of group_list.  protected by global fsnotify_gr_mutex */
+	bool on_group_list;
+
+	/* groups can define private fields here or use the void *private */
+	union {
+		void *private;
+	};
+};
+
+/*
+ * all of the information about the original object we want to now send to
+ * a group.  If you want to carry more info from the accessing task to the
+ * listener this structure is where you need to be adding fields.
+ */
+struct fsnotify_event {
+	spinlock_t lock;	/* protection for the associated event_holder and private_list */
+	/* to_tell may ONLY be dereferenced during handle_event(). */
+	struct inode *to_tell;	/* either the inode the event happened to or its parent */
+	/*
+	 * depending on the event type we should have either a path or inode
+	 * We hold a reference on path, but NOT on inode.  Since we have the ref on
+	 * the path, it may be dereferenced at any point during this object's
+	 * lifetime.  That reference is dropped when this object's refcnt hits
+	 * 0.  If this event contains an inode instead of a path, the inode may
+	 * ONLY be used during handle_event().
+	 */
+	union {
+		struct path path;
+		struct inode *inode;
+	};
+/* when calling fsnotify tell it if the data is a path or inode */
+#define FSNOTIFY_EVENT_NONE	0
+#define FSNOTIFY_EVENT_PATH	1
+#define FSNOTIFY_EVENT_INODE	2
+#define FSNOTIFY_EVENT_FILE	3
+	int data_type;		/* which of the above union we have */
+	atomic_t refcnt;	/* how many groups still are using/need to send this event */
+	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
+};
+
+#ifdef CONFIG_FSNOTIFY
+
+/* called from the vfs helpers */
+
+/* main fsnotify call to send events */
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+
+
+/* called from fsnotify listeners, such as fanotify or dnotify */
+
+/* must call when a group changes its ->mask */
+extern void fsnotify_recalc_global_mask(void);
+/* get a reference to an existing or create a new group */
+extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
+						    __u32 mask,
+						    const struct fsnotify_ops *ops);
+/* drop reference on a group from fsnotify_obtain_group */
+extern void fsnotify_put_group(struct fsnotify_group *group);
+
+/* take a reference to an event */
+extern void fsnotify_get_event(struct fsnotify_event *event);
+extern void fsnotify_put_event(struct fsnotify_event *event);
+/* find private data previously attached to an event */
+extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
+									struct fsnotify_event *event);
+
+/* put here because inotify does some weird stuff when destroying watches */
+extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+						    void *data, int data_is);
+#else
+
+static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+{}
+#endif	/* CONFIG_FSNOTIFY */
+
+#endif	/* __KERNEL __ */
+
+#endif	/* __LINUX_FSNOTIFY_BACKEND_H */
-- 
cgit v1.2.3-70-g09d2


From 3be25f49b9d6a97eae9bcb96d3292072b7658bd8 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:26 -0400
Subject: fsnotify: add marks to inodes so groups can interpret how to handle
 those inodes

This patch creates a way for fsnotify groups to attach marks to inodes.
These marks have little meaning to the generic fsnotify infrastructure
and thus their meaning should be interpreted by the group that attached
them to the inode's list.

dnotify and inotify  will make use of these markings to indicate which
inodes are of interest to their respective groups.  But this implementation
has the useful property that in the future other listeners could actually
use the marks for the exact opposite reason, aka to indicate which inodes
it had NO interest in.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/inode.c                       |   9 ++
 fs/notify/Makefile               |   2 +-
 fs/notify/fsnotify.c             |  13 ++
 fs/notify/fsnotify.h             |   5 +
 fs/notify/group.c                |  49 +++++-
 fs/notify/inode_mark.c           | 329 +++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h               |   5 +
 include/linux/fsnotify.h         |   9 ++
 include/linux/fsnotify_backend.h |  65 +++++++-
 9 files changed, 483 insertions(+), 3 deletions(-)
 create mode 100644 fs/notify/inode_mark.c

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index bca0c618fdb..54c63ce3de2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
+#include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
 
@@ -189,6 +190,10 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
 
+#ifdef CONFIG_FSNOTIFY
+	inode->i_fsnotify_mask = 0;
+#endif
+
 	return inode;
 
 out_free_security:
@@ -221,6 +226,7 @@ void destroy_inode(struct inode *inode)
 	BUG_ON(inode_has_buffers(inode));
 	ima_inode_free(inode);
 	security_inode_free(inode);
+	fsnotify_inode_delete(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
@@ -252,6 +258,9 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->inotify_watches);
 	mutex_init(&inode->inotify_mutex);
 #endif
+#ifdef CONFIG_FSNOTIFY
+	INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
+#endif
 }
 EXPORT_SYMBOL(inode_init_once);
 
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index db5467b5b58..0922cc826c4 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 56bee0f10c3..d5654629c65 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -25,6 +25,15 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+/*
+ * Clear all of the marks on an inode when it is being evicted from core
+ */
+void __fsnotify_inode_delete(struct inode *inode)
+{
+	fsnotify_clear_marks_by_inode(inode);
+}
+EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
+
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
@@ -43,6 +52,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 	if (!(mask & fsnotify_mask))
 		return;
 
+	if (!(mask & to_tell->i_fsnotify_mask))
+		return;
 	/*
 	 * SRCU!!  the groups list is very very much read only and the path is
 	 * very hot.  The VAST majority of events are not going to need to do
@@ -51,6 +62,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
 	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
 		if (mask & group->mask) {
+			if (!group->ops->should_send_event(group, to_tell, mask))
+				continue;
 			if (!event) {
 				event = fsnotify_create_event(to_tell, mask, data, data_is);
 				/* shit, we OOM'd and now we can't tell, maybe
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index c6a8bd47657..8ebcbe893c9 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,4 +12,9 @@ extern struct srcu_struct fsnotify_grp_srcu;
 extern struct list_head fsnotify_groups;
 /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
 extern __u32 fsnotify_mask;
+
+/* final kfree of a group */
+extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
+/* run the list of all marks associated with inode and flag them to be freed */
+extern void fsnotify_clear_marks_by_inode(struct inode *inode);
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index c6812953b96..a29d2fa6792 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -54,6 +54,29 @@ void fsnotify_recalc_global_mask(void)
 	fsnotify_mask = mask;
 }
 
+/*
+ * Update the group->mask by running all of the marks associated with this
+ * group and finding the bitwise | of all of the mark->mask.  If we change
+ * the group->mask we need to update the global mask of events interesting
+ * to the system.
+ */
+void fsnotify_recalc_group_mask(struct fsnotify_group *group)
+{
+	__u32 mask = 0;
+	__u32 old_mask = group->mask;
+	struct fsnotify_mark_entry *entry;
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry(entry, &group->mark_entries, g_list)
+		mask |= entry->mask;
+	spin_unlock(&group->mark_lock);
+
+	group->mask = mask;
+
+	if (old_mask != mask)
+		fsnotify_recalc_global_mask();
+}
+
 /*
  * Take a reference to a group so things found under the fsnotify_grp_mutex
  * can't get freed under us
@@ -66,7 +89,7 @@ static void fsnotify_get_group(struct fsnotify_group *group)
 /*
  * Final freeing of a group
  */
-static void fsnotify_destroy_group(struct fsnotify_group *group)
+void fsnotify_final_destroy_group(struct fsnotify_group *group)
 {
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
@@ -74,6 +97,24 @@ static void fsnotify_destroy_group(struct fsnotify_group *group)
 	kfree(group);
 }
 
+/*
+ * Trying to get rid of a group.  We need to first get rid of any outstanding
+ * allocations and then free the group.  Remember that fsnotify_clear_marks_by_group
+ * could miss marks that are being freed by inode and those marks could still
+ * hold a reference to this group (via group->num_marks)  If we get into that
+ * situtation, the fsnotify_final_destroy_group will get called when that final
+ * mark is freed.
+ */
+static void fsnotify_destroy_group(struct fsnotify_group *group)
+{
+	/* clear all inode mark entries for this group */
+	fsnotify_clear_marks_by_group(group);
+
+	/* past the point of no return, matches the initial value of 1 */
+	if (atomic_dec_and_test(&group->num_marks))
+		fsnotify_final_destroy_group(group);
+}
+
 /*
  * Remove this group from the global list of groups that will get events
  * this can be done even if there are still references and things still using
@@ -173,6 +214,10 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	group->group_num = group_num;
 	group->mask = mask;
 
+	spin_lock_init(&group->mark_lock);
+	atomic_set(&group->num_marks, 0);
+	INIT_LIST_HEAD(&group->mark_entries);
+
 	group->ops = ops;
 
 	mutex_lock(&fsnotify_grp_mutex);
@@ -188,6 +233,8 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	/* group not found, add a new one */
 	list_add_rcu(&group->group_list, &fsnotify_groups);
 	group->on_group_list = 1;
+	/* being on the fsnotify_groups list holds one num_marks */
+	atomic_inc(&group->num_marks);
 
 	mutex_unlock(&fsnotify_grp_mutex);
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
new file mode 100644
index 00000000000..cdc15414697
--- /dev/null
+++ b/fs/notify/inode_mark.c
@@ -0,0 +1,329 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * fsnotify inode mark locking/lifetime/and refcnting
+ *
+ * REFCNT:
+ * The mark->refcnt tells how many "things" in the kernel currently are
+ * referencing this object.  The object typically will live inside the kernel
+ * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
+ * which can find this object holding the appropriete locks, can take a reference
+ * and the object itself is guarenteed to survive until the reference is dropped.
+ *
+ * LOCKING:
+ * There are 3 spinlocks involved with fsnotify inode marks and they MUST
+ * be taken in order as follows:
+ *
+ * entry->lock
+ * group->mark_lock
+ * inode->i_lock
+ *
+ * entry->lock protects 2 things, entry->group and entry->inode.  You must hold
+ * that lock to dereference either of these things (they could be NULL even with
+ * the lock)
+ *
+ * group->mark_lock protects the mark_entries list anchored inside a given group
+ * and each entry is hooked via the g_list.  It also sorta protects the
+ * free_g_list, which when used is anchored by a private list on the stack of the
+ * task which held the group->mark_lock.
+ *
+ * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
+ * given inode and each entry is hooked via the i_list. (and sorta the
+ * free_i_list)
+ *
+ *
+ * LIFETIME:
+ * Inode marks survive between when they are added to an inode and when their
+ * refcnt==0.
+ *
+ * The inode mark can be cleared for a number of different reasons including:
+ * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
+ * - The inode is being evicted from cache. (fsnotify_inode_delete)
+ * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
+ * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark_by_entry)
+ * - The fsnotify_group associated with the mark is going away and all such marks
+ *   need to be cleaned up. (fsnotify_clear_marks_by_group)
+ *
+ * Worst case we are given an inode and need to clean up all the marks on that
+ * inode.  We take i_lock and walk the i_fsnotify_mark_entries safely.  For each
+ * mark on the list we take a reference (so the mark can't disappear under us).
+ * We remove that mark form the inode's list of marks and we add this mark to a
+ * private list anchored on the stack using i_free_list;  At this point we no
+ * longer fear anything finding the mark using the inode's list of marks.
+ *
+ * We can safely and locklessly run the private list on the stack of everything
+ * we just unattached from the original inode.  For each mark on the private list
+ * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
+ * we see the group and inode are not NULL we take those locks.  Now holding all
+ * 3 locks we can completely remove the mark from other tasks finding it in the
+ * future.  Remember, 10 things might already be referencing this mark, but they
+ * better be holding a ref.  We drop our reference we took before we unhooked it
+ * from the inode.  When the ref hits 0 we can free the mark.
+ *
+ * Very similarly for freeing by group, except we use free_g_list.
+ *
+ * This has the very interesting property of being able to run concurrently with
+ * any (or all) other directions.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
+{
+	atomic_inc(&entry->refcnt);
+}
+
+void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
+{
+	if (atomic_dec_and_test(&entry->refcnt))
+		entry->free_mark(entry);
+}
+
+/*
+ * Recalculate the mask of events relevant to a given inode locked.
+ */
+static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry;
+	struct hlist_node *pos;
+	__u32 new_mask = 0;
+
+	assert_spin_locked(&inode->i_lock);
+
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
+		new_mask |= entry->mask;
+	inode->i_fsnotify_mask = new_mask;
+}
+
+/*
+ * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
+ * any notifier is interested in hearing for this inode.
+ */
+void fsnotify_recalc_inode_mask(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	fsnotify_recalc_inode_mask_locked(inode);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Any time a mark is getting freed we end up here.
+ * The caller had better be holding a reference to this mark so we don't actually
+ * do the final put under the entry->lock
+ */
+void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
+{
+	struct fsnotify_group *group;
+	struct inode *inode;
+
+	spin_lock(&entry->lock);
+
+	group = entry->group;
+	inode = entry->inode;
+
+	BUG_ON(group && !inode);
+	BUG_ON(!group && inode);
+
+	/* if !group something else already marked this to die */
+	if (!group) {
+		spin_unlock(&entry->lock);
+		return;
+	}
+
+	/* 1 from caller and 1 for being on i_list/g_list */
+	BUG_ON(atomic_read(&entry->refcnt) < 2);
+
+	spin_lock(&group->mark_lock);
+	spin_lock(&inode->i_lock);
+
+	hlist_del_init(&entry->i_list);
+	entry->inode = NULL;
+
+	list_del_init(&entry->g_list);
+	entry->group = NULL;
+
+	fsnotify_put_mark(entry); /* for i_list and g_list */
+
+	/*
+	 * this mark is now off the inode->i_fsnotify_mark_entries list and we
+	 * hold the inode->i_lock, so this is the perfect time to update the
+	 * inode->i_fsnotify_mask
+	 */
+	fsnotify_recalc_inode_mask_locked(inode);
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&entry->lock);
+
+	/*
+	 * Some groups like to know that marks are being freed.  This is a
+	 * callback to the group function to let it know that this entry
+	 * is being freed.
+	 */
+	group->ops->freeing_mark(entry, group);
+
+	/*
+	 * it's possible that this group tried to destroy itself, but this
+	 * this mark was simultaneously being freed by inode.  If that's the
+	 * case, we finish freeing the group here.
+	 */
+	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+		fsnotify_final_destroy_group(group);
+}
+
+/*
+ * Given a group, destroy all of the marks associated with that group.
+ */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+{
+	struct fsnotify_mark_entry *lentry, *entry;
+	LIST_HEAD(free_list);
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
+		list_add(&entry->free_g_list, &free_list);
+		list_del_init(&entry->g_list);
+		fsnotify_get_mark(entry);
+	}
+	spin_unlock(&group->mark_lock);
+
+	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+}
+
+/*
+ * Given an inode, destroy all of the marks associated with that inode.
+ */
+void fsnotify_clear_marks_by_inode(struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry, *lentry;
+	struct hlist_node *pos, *n;
+	LIST_HEAD(free_list);
+
+	spin_lock(&inode->i_lock);
+	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
+		list_add(&entry->free_i_list, &free_list);
+		hlist_del_init(&entry->i_list);
+		fsnotify_get_mark(entry);
+	}
+	spin_unlock(&inode->i_lock);
+
+	list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+}
+
+/*
+ * given a group and inode, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
+						     struct inode *inode)
+{
+	struct fsnotify_mark_entry *entry;
+	struct hlist_node *pos;
+
+	assert_spin_locked(&inode->i_lock);
+
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
+		if (entry->group == group) {
+			fsnotify_get_mark(entry);
+			return entry;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Nothing fancy, just initialize lists and locks and counters.
+ */
+void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
+			void (*free_mark)(struct fsnotify_mark_entry *entry))
+
+{
+	spin_lock_init(&entry->lock);
+	atomic_set(&entry->refcnt, 1);
+	INIT_HLIST_NODE(&entry->i_list);
+	entry->group = NULL;
+	entry->mask = 0;
+	entry->inode = NULL;
+	entry->free_mark = free_mark;
+}
+
+/*
+ * Attach an initialized mark entry to a given group and inode.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which group and for which inodes.
+ */
+int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
+		      struct fsnotify_group *group, struct inode *inode)
+{
+	struct fsnotify_mark_entry *lentry;
+	int ret = 0;
+
+	/*
+	 * LOCKING ORDER!!!!
+	 * entry->lock
+	 * group->mark_lock
+	 * inode->i_lock
+	 */
+	spin_lock(&entry->lock);
+	spin_lock(&group->mark_lock);
+	spin_lock(&inode->i_lock);
+
+	entry->group = group;
+	entry->inode = inode;
+
+	lentry = fsnotify_find_mark_entry(group, inode);
+	if (!lentry) {
+		hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
+		list_add(&entry->g_list, &group->mark_entries);
+
+		fsnotify_get_mark(entry); /* for i_list and g_list */
+
+		atomic_inc(&group->num_marks);
+
+		fsnotify_recalc_inode_mask_locked(inode);
+	}
+
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&entry->lock);
+
+	if (lentry) {
+		ret = -EEXIST;
+		fsnotify_put_mark(lentry);
+	}
+
+	return ret;
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83d6b439724..275b0860044 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -755,6 +755,11 @@ struct inode {
 
 	__u32			i_generation;
 
+#ifdef CONFIG_FSNOTIFY
+	__u32			i_fsnotify_mask; /* all events this inode cares about */
+	struct hlist_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
+#endif
+
 #ifdef CONFIG_DNOTIFY
 	unsigned long		i_dnotify_mask; /* Directory notify events */
 	struct dnotify_struct	*i_dnotify; /* for directory notifications */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 6c9ebefdac8..3856eb6e597 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -99,6 +99,14 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	audit_inode_child(new_name, moved, new_dir);
 }
 
+/*
+ * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed
+ */
+static inline void fsnotify_inode_delete(struct inode *inode)
+{
+	__fsnotify_inode_delete(inode);
+}
+
 /*
  * fsnotify_nameremove - a filename was removed from a directory
  */
@@ -121,6 +129,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_is_dead(inode);
 
 	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
+	__fsnotify_inode_delete(inode);
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1a55718b38a..cad5c4d75c1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -48,17 +48,25 @@
 
 struct fsnotify_group;
 struct fsnotify_event;
+struct fsnotify_mark_entry;
 
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
  * these operations for each relevant group.
  *
+ * should_send_event - given a group, inode, and mask this function determines
+ *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
+ * freeing-mark - this means that a mark has been flagged to die when everything
+ *		finishes using it.  The function is supplied with what must be a
+ *		valid group and inode to use to clean up.
  */
 struct fsnotify_ops {
+	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask);
 	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
 	void (*free_group_priv)(struct fsnotify_group *group);
+	void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
 };
 
 /*
@@ -97,7 +105,14 @@ struct fsnotify_group {
 
 	const struct fsnotify_ops *ops;	/* how this group handles things */
 
-	/* prevents double list_del of group_list.  protected by global fsnotify_gr_mutex */
+	/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
+	spinlock_t mark_lock;		/* protect mark_entries list */
+	atomic_t num_marks;		/* 1 for each mark entry and 1 for not being
+					 * past the point of no return when freeing
+					 * a group */
+	struct list_head mark_entries;	/* all inode mark entries for this group */
+
+	/* prevents double list_del of group_list.  protected by global fsnotify_grp_mutex */
 	bool on_group_list;
 
 	/* groups can define private fields here or use the void *private */
@@ -137,12 +152,38 @@ struct fsnotify_event {
 	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
 };
 
+/*
+ * a mark is simply an entry attached to an in core inode which allows an
+ * fsnotify listener to indicate they are either no longer interested in events
+ * of a type matching mask or only interested in those events.
+ *
+ * these are flushed when an inode is evicted from core and may be flushed
+ * when the inode is modified (as seen by fsnotify_access).  Some fsnotify users
+ * (such as dnotify) will flush these when the open fd is closed and not at
+ * inode eviction or modification.
+ */
+struct fsnotify_mark_entry {
+	__u32 mask;			/* mask this mark entry is for */
+	/* we hold ref for each i_list and g_list.  also one ref for each 'thing'
+	 * in kernel that found and may be using this mark. */
+	atomic_t refcnt;		/* active things looking at this mark */
+	struct inode *inode;		/* inode this entry is associated with */
+	struct fsnotify_group *group;	/* group this mark entry is for */
+	struct hlist_node i_list;	/* list of mark_entries by inode->i_fsnotify_mark_entries */
+	struct list_head g_list;	/* list of mark_entries by group->i_fsnotify_mark_entries */
+	spinlock_t lock;		/* protect group, inode, and killme */
+	struct list_head free_i_list;	/* tmp list used when freeing this mark */
+	struct list_head free_g_list;	/* tmp list used when freeing this mark */
+	void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */
+};
+
 #ifdef CONFIG_FSNOTIFY
 
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
 extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+extern void __fsnotify_inode_delete(struct inode *inode);
 
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
@@ -153,6 +194,8 @@ extern void fsnotify_recalc_global_mask(void);
 extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
 						    __u32 mask,
 						    const struct fsnotify_ops *ops);
+/* run all marks associated with this group and update group->mask */
+extern void fsnotify_recalc_group_mask(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_obtain_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
 
@@ -163,6 +206,22 @@ extern void fsnotify_put_event(struct fsnotify_event *event);
 extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
 									struct fsnotify_event *event);
 
+/* functions used to manipulate the marks attached to inodes */
+
+/* run all marks associated with an inode and update inode->i_fsnotify_mask */
+extern void fsnotify_recalc_inode_mask(struct inode *inode);
+extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry));
+/* find (and take a reference) to a mark associated with group and inode */
+extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode);
+/* attach the mark to both the group and the inode */
+extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode);
+/* given a mark, flag it to be freed when all references are dropped */
+extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
+/* run all the marks in a group, and flag them to be freed */
+extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
+extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
+extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
+
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 						    void *data, int data_is);
@@ -170,6 +229,10 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32
 
 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 {}
+
+static inline void __fsnotify_inode_delete(struct inode *inode)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v1.2.3-70-g09d2


From c28f7e56e9d95fb531dc3be8df2e7f52bee76d21 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:29 -0400
Subject: fsnotify: parent event notification

inotify and dnotify both use a similar parent notification mechanism.  We
add a generic parent notification mechanism to fsnotify for both of these
to use.  This new machanism also adds the dentry flag optimization which
exists for inotify to dnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/notify/fsnotify.c             | 91 ++++++++++++++++++++++++++++++++++++++++
 fs/notify/fsnotify.h             |  5 +++
 fs/notify/inode_mark.c           | 17 ++++++++
 include/linux/dcache.h           |  4 +-
 include/linux/fsnotify.h         | 34 +++++++++++----
 include/linux/fsnotify_backend.h | 64 ++++++++++++++++++++++++++++
 6 files changed, 205 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index d5654629c65..7fc760067a6 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -34,6 +34,97 @@ void __fsnotify_inode_delete(struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
 
+/*
+ * Given an inode, first check if we care what happens to our children.  Inotify
+ * and dnotify both tell their parents about events.  If we care about any event
+ * on a child we run all of our children and set a dentry flag saying that the
+ * parent cares.  Thus when an event happens on a child it can quickly tell if
+ * if there is a need to find a parent and send the event to the parent.
+ */
+void __fsnotify_update_child_dentry_flags(struct inode *inode)
+{
+	struct dentry *alias;
+	int watched;
+
+	if (!S_ISDIR(inode->i_mode))
+		return;
+
+	/* determine if the children should tell inode about their events */
+	watched = fsnotify_inode_watches_children(inode);
+
+	spin_lock(&dcache_lock);
+	/* run all of the dentries associated with this inode.  Since this is a
+	 * directory, there damn well better only be one item on this list */
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct dentry *child;
+
+		/* run all of the children of the original inode and fix their
+		 * d_flags to indicate parental interest (their parent is the
+		 * original inode) */
+		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+			if (!child->d_inode)
+				continue;
+
+			spin_lock(&child->d_lock);
+			if (watched)
+				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
+			else
+				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+			spin_unlock(&child->d_lock);
+		}
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/* Notify this dentry's parent about a child's events. */
+void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+{
+	struct dentry *parent;
+	struct inode *p_inode;
+	bool send = false;
+	bool should_update_children = false;
+
+	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
+		return;
+
+	spin_lock(&dentry->d_lock);
+	parent = dentry->d_parent;
+	p_inode = parent->d_inode;
+
+	if (fsnotify_inode_watches_children(p_inode)) {
+		if (p_inode->i_fsnotify_mask & mask) {
+			dget(parent);
+			send = true;
+		}
+	} else {
+		/*
+		 * The parent doesn't care about events on it's children but
+		 * at least one child thought it did.  We need to run all the
+		 * children and update their d_flags to let them know p_inode
+		 * doesn't care about them any more.
+		 */
+		dget(parent);
+		should_update_children = true;
+	}
+
+	spin_unlock(&dentry->d_lock);
+
+	if (send) {
+		/* we are notifying a parent so come up with the new mask which
+		 * specifies these are events which came from a child. */
+		mask |= FS_EVENT_ON_CHILD;
+
+		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE);
+		dput(parent);
+	}
+
+	if (unlikely(should_update_children)) {
+		__fsnotify_update_child_dentry_flags(p_inode);
+		dput(parent);
+	}
+}
+EXPORT_SYMBOL_GPL(__fsnotify_parent);
+
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 8ebcbe893c9..83b8ec0a8ec 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -17,4 +17,9 @@ extern __u32 fsnotify_mask;
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
+/*
+ * update the dentry->d_flags of all of inode's children to indicate if inode cares
+ * about events that happen to its children.
+ */
+extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index cdc15414697..a39534845b2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -131,6 +131,8 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
+
+	__fsnotify_update_child_dentry_flags(inode);
 }
 
 /*
@@ -189,6 +191,19 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	 */
 	group->ops->freeing_mark(entry, group);
 
+	/*
+	 * __fsnotify_update_child_dentry_flags(inode);
+	 *
+	 * I really want to call that, but we can't, we have no idea if the inode
+	 * still exists the second we drop the entry->lock.
+	 *
+	 * The next time an event arrive to this inode from one of it's children
+	 * __fsnotify_parent will see that the inode doesn't care about it's
+	 * children and will update all of these flags then.  So really this
+	 * is just a lazy update (and could be a perf win...)
+	 */
+
+
 	/*
 	 * it's possible that this group tried to destroy itself, but this
 	 * this mark was simultaneously being freed by inode.  If that's the
@@ -323,6 +338,8 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	if (lentry) {
 		ret = -EEXIST;
 		fsnotify_put_mark(lentry);
+	} else {
+		__fsnotify_update_child_dentry_flags(inode);
 	}
 
 	return ret;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 15156364d19..97978004338 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -180,10 +180,12 @@ d_iput:		no		no		no       yes
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
 
-#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
+#define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched by inotify */
 
 #define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
 
+#define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 3856eb6e597..6a662ed0bc8 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -23,15 +23,31 @@
 static inline void fsnotify_d_instantiate(struct dentry *entry,
 						struct inode *inode)
 {
+	__fsnotify_d_instantiate(entry, inode);
+
 	inotify_d_instantiate(entry, inode);
 }
 
+/* Notify this dentry's parent about a child's events. */
+static inline void fsnotify_parent(struct dentry *dentry, __u32 mask)
+{
+	__fsnotify_parent(dentry, mask);
+
+	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
+}
+
 /*
  * fsnotify_d_move - entry has been moved
  * Called with dcache_lock and entry->d_lock held.
  */
 static inline void fsnotify_d_move(struct dentry *entry)
 {
+	/*
+	 * On move we need to update entry->d_flags to indicate if the new parent
+	 * cares about events from this entry.
+	 */
+	__fsnotify_update_dcache_flags(entry);
+
 	inotify_d_move(entry);
 }
 
@@ -117,7 +133,8 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 	if (isdir)
 		mask |= FS_IN_ISDIR;
 	dnotify_parent(dentry, DN_DELETE);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
+
+	fsnotify_parent(dentry, mask);
 }
 
 /*
@@ -188,9 +205,9 @@ static inline void fsnotify_access(struct dentry *dentry)
 		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_ACCESS);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -206,9 +223,9 @@ static inline void fsnotify_modify(struct dentry *dentry)
 		mask |= FS_IN_ISDIR;
 
 	dnotify_parent(dentry, DN_MODIFY);
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -223,9 +240,9 @@ static inline void fsnotify_open(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -236,16 +253,15 @@ static inline void fsnotify_close(struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	const char *name = dentry->d_name.name;
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE);
 }
 
@@ -260,9 +276,9 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
+	fsnotify_parent(dentry, mask);
 	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 }
 
@@ -311,8 +327,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		if (S_ISDIR(inode->i_mode))
 			in_mask |= FS_IN_ISDIR;
 		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
-		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
-						  dentry->d_name.name);
+
+		fsnotify_parent(dentry, in_mask);
 		fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE);
 	}
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index cad5c4d75c1..13d2dd57004 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -46,6 +46,17 @@
 #define FS_DN_RENAME		0x10000000	/* file renamed */
 #define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
 
+/* This inode cares about things that happen to its children.  Always set for
+ * dnotify and inotify. */
+#define FS_EVENT_ON_CHILD	0x08000000
+
+/* This is a list of all events that may get sent to a parernt based on fs event
+ * happening to inodes inside that directory */
+#define FS_EVENTS_POSS_ON_CHILD   (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
+				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
+				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
+				   FS_DELETE)
+
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark_entry;
@@ -183,8 +194,52 @@ struct fsnotify_mark_entry {
 
 /* main fsnotify call to send events */
 extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 
+static inline int fsnotify_inode_watches_children(struct inode *inode)
+{
+	/* FS_EVENT_ON_CHILD is set if the inode may care */
+	if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
+		return 0;
+	/* this inode might care about child events, does it care about the
+	 * specific set of events that can happen on a child? */
+	return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
+}
+
+/*
+ * Update the dentry with a flag indicating the interest of its parent to receive
+ * filesystem events when those events happens to this dentry->d_inode.
+ */
+static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	assert_spin_locked(&dcache_lock);
+	assert_spin_locked(&dentry->d_lock);
+
+	parent = dentry->d_parent;
+	if (fsnotify_inode_watches_children(parent->d_inode))
+		dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
+	else
+		dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
+}
+
+/*
+ * fsnotify_d_instantiate - instantiate a dentry for inode
+ * Called with dcache_lock held.
+ */
+static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+{
+	if (!inode)
+		return;
+
+	assert_spin_locked(&dcache_lock);
+
+	spin_lock(&dentry->d_lock);
+	__fsnotify_update_dcache_flags(dentry);
+	spin_unlock(&dentry->d_lock);
+}
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
@@ -230,9 +285,18 @@ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32
 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 {}
 
+static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+{}
+
 static inline void __fsnotify_inode_delete(struct inode *inode)
 {}
 
+static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+{}
+
+static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v1.2.3-70-g09d2


From 3c5119c05d624f95f4967d16b38c9624b816bdb9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:33 -0400
Subject: dnotify: reimplement dnotify using fsnotify

Reimplement dnotify using fsnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS                      |   6 +-
 fs/notify/dnotify/Kconfig        |   1 +
 fs/notify/dnotify/dnotify.c      | 469 ++++++++++++++++++++++++++++++---------
 include/linux/dnotify.h          |  29 +--
 include/linux/fs.h               |   5 -
 include/linux/fsnotify.h         |  68 ++----
 include/linux/fsnotify_backend.h |   3 +
 7 files changed, 398 insertions(+), 183 deletions(-)

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index ccdb57524e3..96e0c8c6079 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1802,10 +1802,10 @@ F:	drivers/char/epca*
 F:	drivers/char/digi*
 
 DIRECTORY NOTIFICATION (DNOTIFY)
-P:	Stephen Rothwell
-M:	sfr@canb.auug.org.au
+P:	Eric Paris
+M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org
-S:	Supported
+S:	Maintained
 F:	Documentation/filesystems/dnotify.txt
 F:	fs/notify/dnotify/
 F:	include/linux/dnotify.h
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
index 26adf5dfa64..904ff8d5405 100644
--- a/fs/notify/dnotify/Kconfig
+++ b/fs/notify/dnotify/Kconfig
@@ -1,5 +1,6 @@
 config DNOTIFY
 	bool "Dnotify support"
+	depends on FSNOTIFY
 	default y
 	help
 	  Dnotify is a directory-based per-fd file change notification system
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index b0aa2cde80b..d9d80f502c6 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 2000,2001,2002 Stephen Rothwell
  *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * dnotify was largly rewritten to use the new fsnotify infrastructure
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2, or (at your option) any
@@ -21,24 +24,178 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
 
 int dir_notify_enable __read_mostly = 1;
 
-static struct kmem_cache *dn_cache __read_mostly;
+static struct kmem_cache *dnotify_struct_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct fsnotify_group *dnotify_group __read_mostly;
+static DEFINE_MUTEX(dnotify_mark_mutex);
+
+/*
+ * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * is being watched by dnotify.  If multiple userspace applications are watching
+ * the same directory with dnotify their information is chained in dn
+ */
+struct dnotify_mark_entry {
+	struct fsnotify_mark_entry fsn_entry;
+	struct dnotify_struct *dn;
+};
 
-static void redo_inode_mask(struct inode *inode)
+/*
+ * When a process starts or stops watching an inode the set of events which
+ * dnotify cares about for that inode may change.  This function runs the
+ * list of everything receiving dnotify events about this directory and calculates
+ * the set of all those events.  After it updates what dnotify is interested in
+ * it calls the fsnotify function so it can update the set of all events relevant
+ * to this inode.
+ */
+static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
 {
-	unsigned long new_mask;
+	__u32 new_mask, old_mask;
 	struct dnotify_struct *dn;
+	struct dnotify_mark_entry *dnentry  = container_of(entry,
+							   struct dnotify_mark_entry,
+							   fsn_entry);
+
+	assert_spin_locked(&entry->lock);
 
+	old_mask = entry->mask;
 	new_mask = 0;
-	for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next)
-		new_mask |= dn->dn_mask & ~DN_MULTISHOT;
-	inode->i_dnotify_mask = new_mask;
+	for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
+		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
+	entry->mask = new_mask;
+
+	if (old_mask == new_mask)
+		return;
+
+	if (entry->inode)
+		fsnotify_recalc_inode_mask(entry->inode);
 }
 
+/*
+ * Mains fsnotify call where events are delivered to dnotify.
+ * Find the dnotify mark on the relevant inode, run the list of dnotify structs
+ * on that mark and determine which of them has expressed interest in receiving
+ * events of this type.  When found send the correct process and signal and
+ * destroy the dnotify struct if it was not registered to receive multiple
+ * events.
+ */
+static int dnotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_event *event)
+{
+	struct fsnotify_mark_entry *entry = NULL;
+	struct dnotify_mark_entry *dnentry;
+	struct inode *to_tell;
+	struct dnotify_struct *dn;
+	struct dnotify_struct **prev;
+	struct fown_struct *fown;
+
+	to_tell = event->to_tell;
+
+	spin_lock(&to_tell->i_lock);
+	entry = fsnotify_find_mark_entry(group, to_tell);
+	spin_unlock(&to_tell->i_lock);
+
+	/* unlikely since we alreay passed dnotify_should_send_event() */
+	if (unlikely(!entry))
+		return 0;
+	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+
+	spin_lock(&entry->lock);
+	prev = &dnentry->dn;
+	while ((dn = *prev) != NULL) {
+		if ((dn->dn_mask & event->mask) == 0) {
+			prev = &dn->dn_next;
+			continue;
+		}
+		fown = &dn->dn_filp->f_owner;
+		send_sigio(fown, dn->dn_fd, POLL_MSG);
+		if (dn->dn_mask & FS_DN_MULTISHOT)
+			prev = &dn->dn_next;
+		else {
+			*prev = dn->dn_next;
+			kmem_cache_free(dnotify_struct_cache, dn);
+			dnotify_recalc_inode_mask(entry);
+		}
+	}
+
+	spin_unlock(&entry->lock);
+	fsnotify_put_mark(entry);
+
+	return 0;
+}
+
+/*
+ * Given an inode and mask determine if dnotify would be interested in sending
+ * userspace notification for that pair.
+ */
+static bool dnotify_should_send_event(struct fsnotify_group *group,
+				      struct inode *inode, __u32 mask)
+{
+	struct fsnotify_mark_entry *entry;
+	bool send;
+
+	/* !dir_notify_enable should never get here, don't waste time checking
+	if (!dir_notify_enable)
+		return 0; */
+
+	/* not a dir, dnotify doesn't care */
+	if (!S_ISDIR(inode->i_mode))
+		return false;
+
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+
+	/* no mark means no dnotify watch */
+	if (!entry)
+		return false;
+
+	spin_lock(&entry->lock);
+	send = (mask & entry->mask) ? true : false;
+	spin_unlock(&entry->lock);
+	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
+
+	return send;
+}
+
+static void dnotify_freeing_mark(struct fsnotify_mark_entry *entry,
+				 struct fsnotify_group *group)
+{
+	/* dnotify doesn't care than an inode is on the way out */
+}
+
+static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+	struct dnotify_mark_entry *dnentry = container_of(entry,
+							  struct dnotify_mark_entry,
+							  fsn_entry);
+
+	BUG_ON(dnentry->dn);
+
+	kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+}
+
+static struct fsnotify_ops dnotify_fsnotify_ops = {
+	.handle_event = dnotify_handle_event,
+	.should_send_event = dnotify_should_send_event,
+	.free_group_priv = NULL,
+	.freeing_mark = dnotify_freeing_mark,
+};
+
+/*
+ * Called every time a file is closed.  Looks first for a dnotify mark on the
+ * inode.  If one is found run all of the ->dn entries attached to that
+ * mark for one relevant to this process closing the file and remove that
+ * dnotify_struct.  If that was the last dnotify_struct also remove the
+ * fsnotify_mark_entry.
+ */
 void dnotify_flush(struct file *filp, fl_owner_t id)
 {
+	struct fsnotify_mark_entry *entry;
+	struct dnotify_mark_entry *dnentry;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
 	struct inode *inode;
@@ -46,145 +203,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	inode = filp->f_path.dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return;
+
 	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
+	entry = fsnotify_find_mark_entry(dnotify_group, inode);
+	spin_unlock(&inode->i_lock);
+	if (!entry)
+		return;
+	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+
+	mutex_lock(&dnotify_mark_mutex);
+
+	spin_lock(&entry->lock);
+	prev = &dnentry->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
 			*prev = dn->dn_next;
-			redo_inode_mask(inode);
-			kmem_cache_free(dn_cache, dn);
+			kmem_cache_free(dnotify_struct_cache, dn);
+			dnotify_recalc_inode_mask(entry);
 			break;
 		}
 		prev = &dn->dn_next;
 	}
-	spin_unlock(&inode->i_lock);
+
+	spin_unlock(&entry->lock);
+
+	/* nothing else could have found us thanks to the dnotify_mark_mutex */
+	if (dnentry->dn == NULL)
+		fsnotify_destroy_mark_by_entry(entry);
+
+	fsnotify_recalc_group_mask(dnotify_group);
+
+	mutex_unlock(&dnotify_mark_mutex);
+
+	fsnotify_put_mark(entry);
+}
+
+/* this conversion is done only at watch creation */
+static __u32 convert_arg(unsigned long arg)
+{
+	__u32 new_mask = FS_EVENT_ON_CHILD;
+
+	if (arg & DN_MULTISHOT)
+		new_mask |= FS_DN_MULTISHOT;
+	if (arg & DN_DELETE)
+		new_mask |= (FS_DELETE | FS_MOVED_FROM);
+	if (arg & DN_MODIFY)
+		new_mask |= FS_MODIFY;
+	if (arg & DN_ACCESS)
+		new_mask |= FS_ACCESS;
+	if (arg & DN_ATTRIB)
+		new_mask |= FS_ATTRIB;
+	if (arg & DN_RENAME)
+		new_mask |= FS_DN_RENAME;
+	if (arg & DN_CREATE)
+		new_mask |= (FS_CREATE | FS_MOVED_TO);
+
+	return new_mask;
 }
 
+/*
+ * If multiple processes watch the same inode with dnotify there is only one
+ * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * onto that mark.  This function either attaches the new dnotify_struct onto
+ * that list, or it |= the mask onto an existing dnofiy_struct.
+ */
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+		     fl_owner_t id, int fd, struct file *filp, __u32 mask)
+{
+	struct dnotify_struct *odn;
+
+	odn = dnentry->dn;
+	while (odn != NULL) {
+		/* adding more events to existing dnofiy_struct? */
+		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
+			odn->dn_fd = fd;
+			odn->dn_mask |= mask;
+			return -EEXIST;
+		}
+		odn = odn->dn_next;
+	}
+
+	dn->dn_mask = mask;
+	dn->dn_fd = fd;
+	dn->dn_filp = filp;
+	dn->dn_owner = id;
+	dn->dn_next = dnentry->dn;
+	dnentry->dn = dn;
+
+	return 0;
+}
+
+/*
+ * When a process calls fcntl to attach a dnotify watch to a directory it ends
+ * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
+ * attached to the fsnotify_mark.
+ */
 int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 {
+	struct dnotify_mark_entry *new_dnentry, *dnentry;
+	struct fsnotify_mark_entry *new_entry, *entry;
 	struct dnotify_struct *dn;
-	struct dnotify_struct *odn;
-	struct dnotify_struct **prev;
 	struct inode *inode;
 	fl_owner_t id = current->files;
 	struct file *f;
-	int error = 0;
+	int destroy = 0, error = 0;
+	__u32 mask;
+
+	/* we use these to tell if we need to kfree */
+	new_entry = NULL;
+	dn = NULL;
 
+	if (!dir_notify_enable) {
+		error = -EINVAL;
+		goto out_err;
+	}
+
+	/* a 0 mask means we are explicitly removing the watch */
 	if ((arg & ~DN_MULTISHOT) == 0) {
 		dnotify_flush(filp, id);
-		return 0;
+		error = 0;
+		goto out_err;
 	}
-	if (!dir_notify_enable)
-		return -EINVAL;
+
+	/* dnotify only works on directories */
 	inode = filp->f_path.dentry->d_inode;
-	if (!S_ISDIR(inode->i_mode))
-		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
-	if (dn == NULL)
-		return -ENOMEM;
-	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((odn = *prev) != NULL) {
-		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
-			odn->dn_fd = fd;
-			odn->dn_mask |= arg;
-			inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-			goto out_free;
-		}
-		prev = &odn->dn_next;
+	if (!S_ISDIR(inode->i_mode)) {
+		error = -ENOTDIR;
+		goto out_err;
 	}
 
-	rcu_read_lock();
-	f = fcheck(fd);
-	rcu_read_unlock();
-	/* we'd lost the race with close(), sod off silently */
-	/* note that inode->i_lock prevents reordering problems
-	 * between accesses to descriptor table and ->i_dnotify */
-	if (f != filp)
-		goto out_free;
+	/* expect most fcntl to add new rather than augment old */
+	dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
+	if (!dn) {
+		error = -ENOMEM;
+		goto out_err;
+	}
 
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-	if (error)
-		goto out_free;
+	/* new fsnotify mark, we expect most fcntl calls to add a new mark */
+	new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
+	if (!new_dnentry) {
+		error = -ENOMEM;
+		goto out_err;
+	}
 
-	dn->dn_mask = arg;
-	dn->dn_fd = fd;
-	dn->dn_filp = filp;
-	dn->dn_owner = id;
-	inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
-	dn->dn_next = inode->i_dnotify;
-	inode->i_dnotify = dn;
-	spin_unlock(&inode->i_lock);
-	return 0;
+	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
+	mask = convert_arg(arg);
 
-out_free:
-	spin_unlock(&inode->i_lock);
-	kmem_cache_free(dn_cache, dn);
-	return error;
-}
+	/* set up the new_entry and new_dnentry */
+	new_entry = &new_dnentry->fsn_entry;
+	fsnotify_init_mark(new_entry, dnotify_free_mark);
+	new_entry->mask = mask;
+	new_dnentry->dn = NULL;
 
-void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	struct dnotify_struct *	dn;
-	struct dnotify_struct **prev;
-	struct fown_struct *	fown;
-	int			changed = 0;
+	/* this is needed to prevent the fcntl/close race described below */
+	mutex_lock(&dnotify_mark_mutex);
 
+	/* add the new_entry or find an old one. */
 	spin_lock(&inode->i_lock);
-	prev = &inode->i_dnotify;
-	while ((dn = *prev) != NULL) {
-		if ((dn->dn_mask & event) == 0) {
-			prev = &dn->dn_next;
-			continue;
-		}
-		fown = &dn->dn_filp->f_owner;
-		send_sigio(fown, dn->dn_fd, POLL_MSG);
-		if (dn->dn_mask & DN_MULTISHOT)
-			prev = &dn->dn_next;
-		else {
-			*prev = dn->dn_next;
-			changed = 1;
-			kmem_cache_free(dn_cache, dn);
-		}
-	}
-	if (changed)
-		redo_inode_mask(inode);
+	entry = fsnotify_find_mark_entry(dnotify_group, inode);
 	spin_unlock(&inode->i_lock);
-}
-
-EXPORT_SYMBOL(__inode_dir_notify);
+	if (entry) {
+		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+		spin_lock(&entry->lock);
+	} else {
+		fsnotify_add_mark(new_entry, dnotify_group, inode);
+		spin_lock(&new_entry->lock);
+		entry = new_entry;
+		dnentry = new_dnentry;
+		/* we used new_entry, so don't free it */
+		new_entry = NULL;
+	}
 
-/*
- * This is hopelessly wrong, but unfixable without API changes.  At
- * least it doesn't oops the kernel...
- *
- * To safely access ->d_parent we need to keep d_move away from it.  Use the
- * dentry's d_lock for this.
- */
-void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-	struct dentry *parent;
+	rcu_read_lock();
+	f = fcheck(fd);
+	rcu_read_unlock();
 
-	if (!dir_notify_enable)
-		return;
+	/* if (f != filp) means that we lost a race and another task/thread
+	 * actually closed the fd we are still playing with before we grabbed
+	 * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
+	 * only time we clean up the mark entries we need to get our mark off
+	 * the list. */
+	if (f != filp) {
+		/* if we added ourselves, shoot ourselves, it's possible that
+		 * the flush actually did shoot this entry.  That's fine too
+		 * since multiple calls to destroy_mark is perfectly safe, if
+		 * we found a dnentry already attached to the inode, just sod
+		 * off silently as the flush at close time dealt with it.
+		 */
+		if (dnentry == new_dnentry)
+			destroy = 1;
+		goto out;
+	}
 
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	if (parent->d_inode->i_dnotify_mask & event) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		__inode_dir_notify(parent->d_inode, event);
-		dput(parent);
-	} else {
-		spin_unlock(&dentry->d_lock);
+	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	if (error) {
+		/* if we added, we must shoot */
+		if (dnentry == new_dnentry)
+			destroy = 1;
+		goto out;
 	}
+
+	error = attach_dn(dn, dnentry, id, fd, filp, mask);
+	/* !error means that we attached the dn to the dnentry, so don't free it */
+	if (!error)
+		dn = NULL;
+	/* -EEXIST means that we didn't add this new dn and used an old one.
+	 * that isn't an error (and the unused dn should be freed) */
+	else if (error == -EEXIST)
+		error = 0;
+
+	dnotify_recalc_inode_mask(entry);
+out:
+	spin_unlock(&entry->lock);
+
+	if (destroy)
+		fsnotify_destroy_mark_by_entry(entry);
+
+	fsnotify_recalc_group_mask(dnotify_group);
+
+	mutex_unlock(&dnotify_mark_mutex);
+	fsnotify_put_mark(entry);
+out_err:
+	if (new_entry)
+		fsnotify_put_mark(new_entry);
+	if (dn)
+		kmem_cache_free(dnotify_struct_cache, dn);
+	return error;
 }
-EXPORT_SYMBOL_GPL(dnotify_parent);
 
 static int __init dnotify_init(void)
 {
-	dn_cache = kmem_cache_create("dnotify_cache",
-		sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL);
+	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
+	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+
+	dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
+					      0, &dnotify_fsnotify_ops);
+	if (IS_ERR(dnotify_group))
+		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
 }
 
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index 102a902b439..ecc06286226 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -10,7 +10,7 @@
 
 struct dnotify_struct {
 	struct dnotify_struct *	dn_next;
-	unsigned long		dn_mask;
+	__u32			dn_mask;
 	int			dn_fd;
 	struct file *		dn_filp;
 	fl_owner_t		dn_owner;
@@ -21,23 +21,18 @@ struct dnotify_struct {
 
 #ifdef CONFIG_DNOTIFY
 
-extern void __inode_dir_notify(struct inode *, unsigned long);
+#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\
+			    FS_MODIFY | FS_MODIFY_CHILD |\
+			    FS_ACCESS | FS_ACCESS_CHILD |\
+			    FS_ATTRIB | FS_ATTRIB_CHILD |\
+			    FS_CREATE | FS_DN_RENAME |\
+			    FS_MOVED_FROM | FS_MOVED_TO)
+
 extern void dnotify_flush(struct file *, fl_owner_t);
 extern int fcntl_dirnotify(int, struct file *, unsigned long);
-extern void dnotify_parent(struct dentry *, unsigned long);
-
-static inline void inode_dir_notify(struct inode *inode, unsigned long event)
-{
-	if (inode->i_dnotify_mask & (event))
-		__inode_dir_notify(inode, event);
-}
 
 #else
 
-static inline void __inode_dir_notify(struct inode *inode, unsigned long event)
-{
-}
-
 static inline void dnotify_flush(struct file *filp, fl_owner_t id)
 {
 }
@@ -47,14 +42,6 @@ static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	return -EINVAL;
 }
 
-static inline void dnotify_parent(struct dentry *dentry, unsigned long event)
-{
-}
-
-static inline void inode_dir_notify(struct inode *inode, unsigned long event)
-{
-}
-
 #endif /* CONFIG_DNOTIFY */
 
 #endif /* __KERNEL __ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 275b0860044..323b5ce474c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -760,11 +760,6 @@ struct inode {
 	struct hlist_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
 #endif
 
-#ifdef CONFIG_DNOTIFY
-	unsigned long		i_dnotify_mask; /* Directory notify events */
-	struct dnotify_struct	*i_dnotify; /* for directory notifications */
-#endif
-
 #ifdef CONFIG_INOTIFY
 	struct list_head	inotify_watches; /* watches on this inode */
 	struct mutex		inotify_mutex;	/* protects the watches list */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 6a662ed0bc8..db12d9de352 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -74,13 +74,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	__u32 new_dir_mask = 0;
 
 	if (old_dir == new_dir) {
-		inode_dir_notify(old_dir, DN_RENAME);
 		old_dir_mask = FS_DN_RENAME;
-	} else {
-		inode_dir_notify(old_dir, DN_DELETE);
-		old_dir_mask = FS_DELETE;
-		inode_dir_notify(new_dir, DN_CREATE);
-		new_dir_mask = FS_CREATE;
 	}
 
 	if (isdir) {
@@ -132,7 +126,6 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 
 	if (isdir)
 		mask |= FS_IN_ISDIR;
-	dnotify_parent(dentry, DN_DELETE);
 
 	fsnotify_parent(dentry, mask);
 }
@@ -154,7 +147,6 @@ static inline void fsnotify_inoderemove(struct inode *inode)
  */
 static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
-	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
@@ -169,7 +161,6 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
  */
 static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry)
 {
-	inode_dir_notify(dir, DN_CREATE);
 	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
 				  inode);
 	fsnotify_link_count(inode);
@@ -186,7 +177,6 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	__u32 mask = (FS_CREATE | FS_IN_ISDIR);
 	struct inode *d_inode = dentry->d_inode;
 
-	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
@@ -204,7 +194,6 @@ static inline void fsnotify_access(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	dnotify_parent(dentry, DN_ACCESS);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
@@ -222,7 +211,6 @@ static inline void fsnotify_modify(struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_IN_ISDIR;
 
-	dnotify_parent(dentry, DN_MODIFY);
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
@@ -289,47 +277,33 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 {
 	struct inode *inode = dentry->d_inode;
-	int dn_mask = 0;
-	__u32 in_mask = 0;
+	__u32 mask = 0;
+
+	if (ia_valid & ATTR_UID)
+		mask |= FS_ATTRIB;
+	if (ia_valid & ATTR_GID)
+		mask |= FS_ATTRIB;
+	if (ia_valid & ATTR_SIZE)
+		mask |= FS_MODIFY;
 
-	if (ia_valid & ATTR_UID) {
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
-	if (ia_valid & ATTR_GID) {
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
-	if (ia_valid & ATTR_SIZE) {
-		in_mask |= FS_MODIFY;
-		dn_mask |= DN_MODIFY;
-	}
 	/* both times implies a utime(s) call */
 	if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
-	{
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	} else if (ia_valid & ATTR_ATIME) {
-		in_mask |= FS_ACCESS;
-		dn_mask |= DN_ACCESS;
-	} else if (ia_valid & ATTR_MTIME) {
-		in_mask |= FS_MODIFY;
-		dn_mask |= DN_MODIFY;
-	}
-	if (ia_valid & ATTR_MODE) {
-		in_mask |= FS_ATTRIB;
-		dn_mask |= DN_ATTRIB;
-	}
+		mask |= FS_ATTRIB;
+	else if (ia_valid & ATTR_ATIME)
+		mask |= FS_ACCESS;
+	else if (ia_valid & ATTR_MTIME)
+		mask |= FS_MODIFY;
+
+	if (ia_valid & ATTR_MODE)
+		mask |= FS_ATTRIB;
 
-	if (dn_mask)
-		dnotify_parent(dentry, dn_mask);
-	if (in_mask) {
+	if (mask) {
 		if (S_ISDIR(inode->i_mode))
-			in_mask |= FS_IN_ISDIR;
-		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
+			mask |= FS_IN_ISDIR;
+		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
-		fsnotify_parent(dentry, in_mask);
-		fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE);
+		fsnotify_parent(dentry, mask);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 13d2dd57004..9ea800e840f 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -57,6 +57,9 @@
 				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
 				   FS_DELETE)
 
+/* listeners that hard code group numbers near the top */
+#define DNOTIFY_GROUP_NUM	UINT_MAX
+
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark_entry;
-- 
cgit v1.2.3-70-g09d2


From a2d8bc6cb4a3024661baf877242f123787d0c054 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:37 -0400
Subject: fsnotify: generic notification queue and waitq

inotify needs to do asyc notification in which event information is stored
on a queue until the listener is ready to receive it.  This patch
implements a generic notification queue for inotify (and later fanotify) to
store events to be sent at a later time.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/notify/fsnotify.h             |   9 ++
 fs/notify/group.c                |   9 ++
 fs/notify/notification.c         | 230 +++++++++++++++++++++++++++++++++++++--
 include/linux/fsnotify_backend.h |  37 +++++++
 4 files changed, 278 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 83b8ec0a8ec..4dc240824b2 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -13,8 +13,12 @@ extern struct list_head fsnotify_groups;
 /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
 extern __u32 fsnotify_mask;
 
+/* destroy all events sitting in this groups notification queue */
+extern void fsnotify_flush_notify(struct fsnotify_group *group);
+
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
+
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
 /*
@@ -22,4 +26,9 @@ extern void fsnotify_clear_marks_by_inode(struct inode *inode);
  * about events that happen to its children.
  */
 extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
+
+/* allocate and destroy and event holder to attach events to notification/access queues */
+extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
+extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
+
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index a29d2fa6792..0e1677144bc 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -91,6 +91,9 @@ static void fsnotify_get_group(struct fsnotify_group *group)
  */
 void fsnotify_final_destroy_group(struct fsnotify_group *group)
 {
+	/* clear the notification queue of all events */
+	fsnotify_flush_notify(group);
+
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
 
@@ -214,6 +217,12 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	group->group_num = group_num;
 	group->mask = mask;
 
+	mutex_init(&group->notification_mutex);
+	INIT_LIST_HEAD(&group->notification_list);
+	init_waitqueue_head(&group->notification_waitq);
+	group->q_len = 0;
+	group->max_events = UINT_MAX;
+
 	spin_lock_init(&group->mark_lock);
 	atomic_set(&group->num_marks, 0);
 	INIT_LIST_HEAD(&group->mark_entries);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b8e9a87f8f5..dddecc74e63 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -16,6 +16,21 @@
  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+/*
+ * Basic idea behind the notification queue: An fsnotify group (like inotify)
+ * sends the userspace notification about events asyncronously some time after
+ * the event happened.  When inotify gets an event it will need to add that
+ * event to the group notify queue.  Since a single event might need to be on
+ * multiple group's notification queues we can't add the event directly to each
+ * queue and instead add a small "event_holder" to each queue.  This event_holder
+ * has a pointer back to the original event.  Since the majority of events are
+ * going to end up on one, and only one, notification queue we embed one
+ * event_holder into each event.  This means we have a single allocation instead
+ * of always needing two.  If the embedded event_holder is already in use by
+ * another group a new event_holder (from fsnotify_event_holder_cachep) will be
+ * allocated and used.
+ */
+
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -33,6 +48,21 @@
 #include "fsnotify.h"
 
 static struct kmem_cache *fsnotify_event_cachep;
+static struct kmem_cache *fsnotify_event_holder_cachep;
+/*
+ * This is a magic event we send when the q is too full.  Since it doesn't
+ * hold real event information we just keep one system wide and use it any time
+ * it is needed.  It's refcnt is set 1 at kernel init time and will never
+ * get set to 0 so it will never get 'freed'
+ */
+static struct fsnotify_event q_overflow_event;
+
+/* return true if the notify queue is empty, false otherwise */
+bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+	return list_empty(&group->notification_list) ? true : false;
+}
 
 void fsnotify_get_event(struct fsnotify_event *event)
 {
@@ -52,19 +82,176 @@ void fsnotify_put_event(struct fsnotify_event *event)
 	}
 }
 
+struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
+{
+	return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
+}
+
+void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
+{
+	kmem_cache_free(fsnotify_event_holder_cachep, holder);
+}
+
+/*
+ * check if 2 events contain the same information.
+ */
+static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	if ((old->mask == new->mask) &&
+	    (old->to_tell == new->to_tell) &&
+	    (old->data_type == new->data_type)) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_INODE):
+			if (old->inode == new->inode)
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_PATH):
+			if ((old->path.mnt == new->path.mnt) &&
+			    (old->path.dentry == new->path.dentry))
+				return true;
+		case (FSNOTIFY_EVENT_NONE):
+			return true;
+		};
+	}
+	return false;
+}
+
 /*
- * Allocate a new event which will be sent to each group's handle_event function
- * if the group was interested in this particular event.
+ * Add an event to the group notification queue.  The group can later pull this
+ * event off the queue to deal with.  If the event is successfully added to the
+ * group's notification queue, a reference is taken on event.
  */
-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-					     void *data, int data_type)
+int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_event_holder *holder = NULL;
+	struct list_head *list = &group->notification_list;
+	struct fsnotify_event_holder *last_holder;
+	struct fsnotify_event *last_event;
+
+	/*
+	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
+	 * Check if we expect to be able to use that holder.  If not alloc a new
+	 * holder.
+	 * For the overflow event it's possible that something will use the in
+	 * event holder before we get the lock so we may need to jump back and
+	 * alloc a new holder, this can't happen for most events...
+	 */
+	if (!list_empty(&event->holder.event_list)) {
+alloc_holder:
+		holder = fsnotify_alloc_event_holder();
+		if (!holder)
+			return -ENOMEM;
+	}
+
+	mutex_lock(&group->notification_mutex);
+
+	if (group->q_len >= group->max_events)
+		event = &q_overflow_event;
+
+	spin_lock(&event->lock);
+
+	if (list_empty(&event->holder.event_list)) {
+		if (unlikely(holder))
+			fsnotify_destroy_event_holder(holder);
+		holder = &event->holder;
+	} else if (unlikely(!holder)) {
+		/* between the time we checked above and got the lock the in
+		 * event holder was used, go back and get a new one */
+		spin_unlock(&event->lock);
+		mutex_unlock(&group->notification_mutex);
+		goto alloc_holder;
+	}
+
+	if (!list_empty(list)) {
+		last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
+		last_event = last_holder->event;
+		if (event_compare(last_event, event)) {
+			spin_unlock(&event->lock);
+			mutex_unlock(&group->notification_mutex);
+			if (holder != &event->holder)
+				fsnotify_destroy_event_holder(holder);
+			return 0;
+		}
+	}
+
+	group->q_len++;
+	holder->event = event;
+
+	fsnotify_get_event(event);
+	list_add_tail(&holder->event_list, list);
+	spin_unlock(&event->lock);
+	mutex_unlock(&group->notification_mutex);
+
+	wake_up(&group->notification_waitq);
+	return 0;
+}
+
+/*
+ * Remove and return the first event from the notification list.  There is a
+ * reference held on this event since it was on the list.  It is the responsibility
+ * of the caller to drop this reference.
+ */
+struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
 {
 	struct fsnotify_event *event;
+	struct fsnotify_event_holder *holder;
 
-	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
-	if (!event)
-		return NULL;
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
+	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
+
+	event = holder->event;
+
+	spin_lock(&event->lock);
+	holder->event = NULL;
+	list_del_init(&holder->event_list);
+	spin_unlock(&event->lock);
+
+	/* event == holder means we are referenced through the in event holder */
+	if (holder != &event->holder)
+		fsnotify_destroy_event_holder(holder);
+
+	group->q_len--;
+
+	return event;
+}
+
+/*
+ * This will not remove the event, that must be done with fsnotify_remove_notify_event()
+ */
+struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+	struct fsnotify_event_holder *holder;
+
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
+	event = holder->event;
+
+	return event;
+}
+
+/*
+ * Called when a group is being torn down to clean up any outstanding
+ * event notifications.
+ */
+void fsnotify_flush_notify(struct fsnotify_group *group)
+{
+	struct fsnotify_event *event;
+
+	mutex_lock(&group->notification_mutex);
+	while (!fsnotify_notify_queue_is_empty(group)) {
+		event = fsnotify_remove_notify_event(group);
+		fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
+	}
+	mutex_unlock(&group->notification_mutex);
+}
+
+static void initialize_event(struct fsnotify_event *event)
+{
+	event->holder.event = NULL;
+	INIT_LIST_HEAD(&event->holder.event_list);
 	atomic_set(&event->refcnt, 1);
 
 	spin_lock_init(&event->lock);
@@ -72,7 +259,32 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 	event->path.dentry = NULL;
 	event->path.mnt = NULL;
 	event->inode = NULL;
+	event->data_type = FSNOTIFY_EVENT_NONE;
 
+	event->to_tell = NULL;
+}
+
+/*
+ * fsnotify_create_event - Allocate a new event which will be sent to each
+ * group's handle_event function if the group was interested in this
+ * particular event.
+ *
+ * @to_tell the inode which is supposed to receive the event (sometimes a
+ *	parent of the inode to which the event happened.
+ * @mask what actually happened.
+ * @data pointer to the object which was actually affected
+ * @data_type flag indication if the data is a file, path, inode, nothing...
+ */
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
+					     void *data, int data_type)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	initialize_event(event);
 	event->to_tell = to_tell;
 
 	switch (data_type) {
@@ -114,6 +326,10 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 __init int fsnotify_notification_init(void)
 {
 	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
+	fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
+
+	initialize_event(&q_overflow_event);
+	q_overflow_event.mask = FS_Q_OVERFLOW;
 
 	return 0;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 9ea800e840f..15f8f82a5c5 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -119,6 +119,13 @@ struct fsnotify_group {
 
 	const struct fsnotify_ops *ops;	/* how this group handles things */
 
+	/* needed to send notification to userspace */
+	struct mutex notification_mutex;	/* protect the notification_list */
+	struct list_head notification_list;	/* list of event_holder this group needs to send to userspace */
+	wait_queue_head_t notification_waitq;	/* read() on the notification file blocks on this waitq */
+	unsigned int q_len;			/* events on the queue */
+	unsigned int max_events;		/* maximum events allowed on the list */
+
 	/* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
 	spinlock_t mark_lock;		/* protect mark_entries list */
 	atomic_t num_marks;		/* 1 for each mark entry and 1 for not being
@@ -135,12 +142,33 @@ struct fsnotify_group {
 	};
 };
 
+/*
+ * A single event can be queued in multiple group->notification_lists.
+ *
+ * each group->notification_list will point to an event_holder which in turns points
+ * to the actual event that needs to be sent to userspace.
+ *
+ * Seemed cheaper to create a refcnt'd event and a small holder for every group
+ * than create a different event for every group
+ *
+ */
+struct fsnotify_event_holder {
+	struct fsnotify_event *event;
+	struct list_head event_list;
+};
+
 /*
  * all of the information about the original object we want to now send to
  * a group.  If you want to carry more info from the accessing task to the
  * listener this structure is where you need to be adding fields.
  */
 struct fsnotify_event {
+	/*
+	 * If we create an event we are also likely going to need a holder
+	 * to link to a group.  So embed one holder in the event.  Means only
+	 * one allocation for the common case where we only have one group
+	 */
+	struct fsnotify_event_holder holder;
 	spinlock_t lock;	/* protection for the associated event_holder and private_list */
 	/* to_tell may ONLY be dereferenced during handle_event(). */
 	struct inode *to_tell;	/* either the inode the event happened to or its parent */
@@ -264,6 +292,15 @@ extern void fsnotify_put_event(struct fsnotify_event *event);
 extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
 									struct fsnotify_event *event);
 
+/* attach the event to the group notification queue */
+extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event);
+/* true if the group notification queue is empty */
+extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
+/* return, but do not dequeue the first event on the notification queue */
+extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
+/* reutnr AND dequeue the first event on the notification queue */
+extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
+
 /* functions used to manipulate the marks attached to inodes */
 
 /* run all marks associated with an inode and update inode->i_fsnotify_mask */
-- 
cgit v1.2.3-70-g09d2


From 62ffe5dfba056f7ba81d710fee9f28c58a42fdd6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:43 -0400
Subject: fsnotify: include pathnames with entries when possible

When inotify wants to send events to a directory about a child it includes
the name of the original file.  This patch collects that filename and makes
it available for notification.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/notify/fsnotify.c             |  7 ++++---
 fs/notify/notification.c         | 16 +++++++++++++++-
 include/linux/fsnotify.h         | 28 ++++++++++++++--------------
 include/linux/fsnotify_backend.h | 11 ++++++++---
 4 files changed, 41 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 7fc760067a6..675129fa9fd 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -114,7 +114,8 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
-		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE);
+		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+			 dentry->d_name.name);
 		dput(parent);
 	}
 
@@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent);
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name)
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
@@ -156,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
 			if (!group->ops->should_send_event(group, to_tell, mask))
 				continue;
 			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data, data_is);
+				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name);
 				/* shit, we OOM'd and now we can't tell, maybe
 				 * someday someone else will want to do something
 				 * here */
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index dddecc74e63..c69b18b9aba 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -78,6 +78,7 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		if (event->data_type == FSNOTIFY_EVENT_PATH)
 			path_put(&event->path);
 
+		kfree(event->file_name);
 		kmem_cache_free(fsnotify_event_cachep, event);
 	}
 }
@@ -262,6 +263,9 @@ static void initialize_event(struct fsnotify_event *event)
 	event->data_type = FSNOTIFY_EVENT_NONE;
 
 	event->to_tell = NULL;
+
+	event->file_name = NULL;
+	event->name_len = 0;
 }
 
 /*
@@ -274,9 +278,10 @@ static void initialize_event(struct fsnotify_event *event)
  * @mask what actually happened.
  * @data pointer to the object which was actually affected
  * @data_type flag indication if the data is a file, path, inode, nothing...
+ * @name the filename, if available
  */
 struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-					     void *data, int data_type)
+					     void *data, int data_type, const char *name)
 {
 	struct fsnotify_event *event;
 
@@ -285,6 +290,15 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		return NULL;
 
 	initialize_event(event);
+
+	if (name) {
+		event->file_name = kstrdup(name, GFP_KERNEL);
+		if (!event->file_name) {
+			kmem_cache_free(fsnotify_event_cachep, event);
+			return NULL;
+		}
+		event->name_len = strlen(event->file_name);
+	}
 	event->to_tell = to_tell;
 
 	switch (data_type) {
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index db12d9de352..180740e9ec8 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode)
 {
 	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
 
-	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -91,8 +91,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
 				  source);
 
-	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE);
-	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE);
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name);
 
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
@@ -104,7 +104,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
-		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
@@ -138,7 +138,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
 
-	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL);
 	__fsnotify_inode_delete(inode);
 }
 
@@ -151,7 +151,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
 }
 
 /*
@@ -166,7 +166,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
 
-	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name);
 }
 
 /*
@@ -180,7 +180,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
 }
 
 /*
@@ -197,7 +197,7 @@ static inline void fsnotify_access(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -214,7 +214,7 @@ static inline void fsnotify_modify(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -231,7 +231,7 @@ static inline void fsnotify_open(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -250,7 +250,7 @@ static inline void fsnotify_close(struct file *file)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE);
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL);
 }
 
 /*
@@ -267,7 +267,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 }
 
 /*
@@ -303,7 +303,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 		fsnotify_parent(dentry, mask);
-		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 15f8f82a5c5..52692f40589 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -192,6 +192,9 @@ struct fsnotify_event {
 	int data_type;		/* which of the above union we have */
 	atomic_t refcnt;	/* how many groups still are using/need to send this event */
 	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
+
+	char *file_name;
+	size_t name_len;
 };
 
 /*
@@ -224,7 +227,7 @@ struct fsnotify_mark_entry {
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
-extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is);
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name);
 extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 
@@ -319,10 +322,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-						    void *data, int data_is);
+						    void *data, int data_is, const char *name);
+
 #else
 
-static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is)
+static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+			    const char *name);
 {}
 
 static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
-- 
cgit v1.2.3-70-g09d2


From 47882c6f51e8ef41fbbe2bbb746a1ea3228dd7ca Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:47 -0400
Subject: fsnotify: add correlations between events

As part of the standard inotify events it includes a correlation cookie
between two dentry move operations.  This patch includes the same behaviour
in fsnotify events.  It is needed so that inotify userspace can be
implemented on top of fsnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/notify/fsnotify.c             |  6 +++---
 fs/notify/notification.c         | 20 ++++++++++++++++++--
 include/linux/fsnotify.h         | 35 ++++++++++++++++++-----------------
 include/linux/fsnotify_backend.h | 15 ++++++++++++---
 4 files changed, 51 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 675129fa9fd..f11d75f0236 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -115,7 +115,7 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 		mask |= FS_EVENT_ON_CHILD;
 
 		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-			 dentry->d_name.name);
+			 dentry->d_name.name, 0);
 		dput(parent);
 	}
 
@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent);
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name)
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
@@ -157,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 			if (!group->ops->should_send_event(group, to_tell, mask))
 				continue;
 			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name);
+				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie);
 				/* shit, we OOM'd and now we can't tell, maybe
 				 * someday someone else will want to do something
 				 * here */
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index c69b18b9aba..346f6e5c355 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/mutex.h>
 #include <linux/namei.h>
@@ -56,6 +57,17 @@ static struct kmem_cache *fsnotify_event_holder_cachep;
  * get set to 0 so it will never get 'freed'
  */
 static struct fsnotify_event q_overflow_event;
+static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
+
+/**
+ * fsnotify_get_cookie - return a unique cookie for use in synchronizing events.
+ * Called from fsnotify_move, which is inlined into filesystem modules.
+ */
+u32 fsnotify_get_cookie(void)
+{
+	return atomic_inc_return(&fsnotify_sync_cookie);
+}
+EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
 
 /* return true if the notify queue is empty, false otherwise */
 bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
@@ -266,6 +278,8 @@ static void initialize_event(struct fsnotify_event *event)
 
 	event->file_name = NULL;
 	event->name_len = 0;
+
+	event->sync_cookie = 0;
 }
 
 /*
@@ -280,8 +294,8 @@ static void initialize_event(struct fsnotify_event *event)
  * @data_type flag indication if the data is a file, path, inode, nothing...
  * @name the filename, if available
  */
-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-					     void *data, int data_type, const char *name)
+struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
+					     int data_type, const char *name, u32 cookie)
 {
 	struct fsnotify_event *event;
 
@@ -299,6 +313,8 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		}
 		event->name_len = strlen(event->file_name);
 	}
+
+	event->sync_cookie = cookie;
 	event->to_tell = to_tell;
 
 	switch (data_type) {
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 180740e9ec8..c25b39ddd62 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -58,7 +58,7 @@ static inline void fsnotify_link_count(struct inode *inode)
 {
 	inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
 
-	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -69,7 +69,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 				 int isdir, struct inode *target, struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
-	u32 cookie = inotify_get_cookie();
+	u32 in_cookie = inotify_get_cookie();
+	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = 0;
 	__u32 new_dir_mask = 0;
 
@@ -86,13 +87,13 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	old_dir_mask |= FS_MOVED_FROM;
 	new_dir_mask |= FS_MOVED_TO;
 
-	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name,
+	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
 				  source);
-	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
+	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
 				  source);
 
-	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name);
-	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name);
+	fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie);
+	fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie);
 
 	if (target) {
 		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
@@ -104,7 +105,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
-		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL);
+		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
 	audit_inode_child(new_name, moved, new_dir);
 }
@@ -138,7 +139,7 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
 
-	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	__fsnotify_inode_delete(inode);
 }
 
@@ -151,7 +152,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
+	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
 
 /*
@@ -166,7 +167,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 	fsnotify_link_count(inode);
 	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
 
-	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name);
+	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
 }
 
 /*
@@ -180,7 +181,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
 	audit_inode_child(dentry->d_name.name, dentry, inode);
 
-	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name);
+	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
 
 /*
@@ -197,7 +198,7 @@ static inline void fsnotify_access(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -214,7 +215,7 @@ static inline void fsnotify_modify(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -231,7 +232,7 @@ static inline void fsnotify_open(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -250,7 +251,7 @@ static inline void fsnotify_close(struct file *file)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL);
+	fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
 }
 
 /*
@@ -267,7 +268,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 	fsnotify_parent(dentry, mask);
-	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -303,7 +304,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 
 		fsnotify_parent(dentry, mask);
-		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL);
+		fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 52692f40589..b78b5573d22 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -193,6 +193,7 @@ struct fsnotify_event {
 	atomic_t refcnt;	/* how many groups still are using/need to send this event */
 	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
 
+	u32 sync_cookie;	/* used to corrolate events, namely inotify mv events */
 	char *file_name;
 	size_t name_len;
 };
@@ -227,9 +228,11 @@ struct fsnotify_mark_entry {
 /* called from the vfs helpers */
 
 /* main fsnotify call to send events */
-extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *name);
+extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+		     const char *name, u32 cookie);
 extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
+extern u32 fsnotify_get_cookie(void);
 
 static inline int fsnotify_inode_watches_children(struct inode *inode)
 {
@@ -322,12 +325,13 @@ extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
-						    void *data, int data_is, const char *name);
+						    void *data, int data_is, const char *name,
+						    u32 cookie);
 
 #else
 
 static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
-			    const char *name);
+			    const char *name, u32 cookie)
 {}
 
 static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
@@ -342,6 +346,11 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
 {}
 
+static inline u32 fsnotify_get_cookie(void)
+{
+	return 0;
+}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v1.2.3-70-g09d2


From e4aff117368cfdd3567ee41844d216d079b55173 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:50 -0400
Subject: fsnotify: allow groups to add private data to events

inotify needs per group information attached to events.  This patch allows
groups to attach private information and implements a callback so that
information can be freed when an event is being destroyed.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/notify/dnotify/dnotify.c      |  1 +
 fs/notify/notification.c         | 52 ++++++++++++++++++++++++++++++++++++----
 include/linux/fsnotify_backend.h | 24 +++++++++++++++----
 3 files changed, 68 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index d9d80f502c6..12f9e6b1ffe 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -183,6 +183,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
 	.should_send_event = dnotify_should_send_event,
 	.free_group_priv = NULL,
 	.freeing_mark = dnotify_freeing_mark,
+	.free_event_priv = NULL,
 };
 
 /*
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 346f6e5c355..959b73e756f 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -90,6 +90,8 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		if (event->data_type == FSNOTIFY_EVENT_PATH)
 			path_put(&event->path);
 
+		BUG_ON(!list_empty(&event->private_data_list));
+
 		kfree(event->file_name);
 		kmem_cache_free(fsnotify_event_cachep, event);
 	}
@@ -106,7 +108,29 @@ void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
 }
 
 /*
- * check if 2 events contain the same information.
+ * Find the private data that the group previously attached to this event when
+ * the group added the event to the notification queue (fsnotify_add_notify_event)
+ */
+struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_event_private_data *lpriv;
+	struct fsnotify_event_private_data *priv = NULL;
+
+	assert_spin_locked(&event->lock);
+
+	list_for_each_entry(lpriv, &event->private_data_list, event_list) {
+		if (lpriv->group == group) {
+			priv = lpriv;
+			list_del(&priv->event_list);
+			break;
+		}
+	}
+	return priv;
+}
+
+/*
+ * Check if 2 events contain the same information.  We do not compare private data
+ * but at this moment that isn't a problem for any know fsnotify listeners.
  */
 static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
 {
@@ -134,13 +158,17 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
  * event off the queue to deal with.  If the event is successfully added to the
  * group's notification queue, a reference is taken on event.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event)
+int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+			      struct fsnotify_event_private_data *priv)
 {
 	struct fsnotify_event_holder *holder = NULL;
 	struct list_head *list = &group->notification_list;
 	struct fsnotify_event_holder *last_holder;
 	struct fsnotify_event *last_event;
 
+	/* easy to tell if priv was attached to the event */
+	INIT_LIST_HEAD(&priv->event_list);
+
 	/*
 	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
 	 * Check if we expect to be able to use that holder.  If not alloc a new
@@ -158,8 +186,11 @@ alloc_holder:
 
 	mutex_lock(&group->notification_mutex);
 
-	if (group->q_len >= group->max_events)
+	if (group->q_len >= group->max_events) {
 		event = &q_overflow_event;
+		/* sorry, no private data on the overflow event */
+		priv = NULL;
+	}
 
 	spin_lock(&event->lock);
 
@@ -183,7 +214,7 @@ alloc_holder:
 			mutex_unlock(&group->notification_mutex);
 			if (holder != &event->holder)
 				fsnotify_destroy_event_holder(holder);
-			return 0;
+			return -EEXIST;
 		}
 	}
 
@@ -192,6 +223,8 @@ alloc_holder:
 
 	fsnotify_get_event(event);
 	list_add_tail(&holder->event_list, list);
+	if (priv)
+		list_add_tail(&priv->event_list, &event->private_data_list);
 	spin_unlock(&event->lock);
 	mutex_unlock(&group->notification_mutex);
 
@@ -252,10 +285,19 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
 void fsnotify_flush_notify(struct fsnotify_group *group)
 {
 	struct fsnotify_event *event;
+	struct fsnotify_event_private_data *priv;
 
 	mutex_lock(&group->notification_mutex);
 	while (!fsnotify_notify_queue_is_empty(group)) {
 		event = fsnotify_remove_notify_event(group);
+		/* if they don't implement free_event_priv they better not have attached any */
+		if (group->ops->free_event_priv) {
+			spin_lock(&event->lock);
+			priv = fsnotify_remove_priv_from_event(group, event);
+			spin_unlock(&event->lock);
+			if (priv)
+				group->ops->free_event_priv(priv);
+		}
 		fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
 	}
 	mutex_unlock(&group->notification_mutex);
@@ -274,6 +316,8 @@ static void initialize_event(struct fsnotify_event *event)
 	event->inode = NULL;
 	event->data_type = FSNOTIFY_EVENT_NONE;
 
+	INIT_LIST_HEAD(&event->private_data_list);
+
 	event->to_tell = NULL;
 
 	event->file_name = NULL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b78b5573d22..efdf9e442d8 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -63,6 +63,7 @@
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark_entry;
+struct fsnotify_event_private_data;
 
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
@@ -81,6 +82,7 @@ struct fsnotify_ops {
 	int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
 	void (*free_group_priv)(struct fsnotify_group *group);
 	void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+	void (*free_event_priv)(struct fsnotify_event_private_data *priv);
 };
 
 /*
@@ -157,6 +159,15 @@ struct fsnotify_event_holder {
 	struct list_head event_list;
 };
 
+/*
+ * Inotify needs to tack data onto an event.  This struct lets us later find the
+ * correct private data of the correct group.
+ */
+struct fsnotify_event_private_data {
+	struct fsnotify_group *group;
+	struct list_head event_list;
+};
+
 /*
  * all of the information about the original object we want to now send to
  * a group.  If you want to carry more info from the accessing task to the
@@ -196,6 +207,8 @@ struct fsnotify_event {
 	u32 sync_cookie;	/* used to corrolate events, namely inotify mv events */
 	char *file_name;
 	size_t name_len;
+
+	struct list_head private_data_list;	/* groups can store private data here */
 };
 
 /*
@@ -294,17 +307,18 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-/* find private data previously attached to an event */
-extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group,
-									struct fsnotify_event *event);
+/* find private data previously attached to an event and unlink it */
+extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
+									   struct fsnotify_event *event);
 
 /* attach the event to the group notification queue */
-extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event);
+extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+				     struct fsnotify_event_private_data *priv);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
 extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
-/* reutnr AND dequeue the first event on the notification queue */
+/* return AND dequeue the first event on the notification queue */
 extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
 
 /* functions used to manipulate the marks attached to inodes */
-- 
cgit v1.2.3-70-g09d2


From 164bc6195139047faaf5ada1278332e99494803b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:01:58 -0400
Subject: fsnotify: handle filesystem unmounts with fsnotify marks

When an fs is unmounted with an fsnotify mark entry attached to one of its
inodes we need to destroy that mark entry and we also (like inotify) send
an unmount event.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 fs/inode.c                       |  1 +
 fs/notify/inode_mark.c           | 72 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fsnotify_backend.h |  4 +++
 3 files changed, 77 insertions(+)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 54c63ce3de2..ca337014ae2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -407,6 +407,7 @@ int invalidate_inodes(struct super_block *sb)
 	mutex_lock(&iprune_mutex);
 	spin_lock(&inode_lock);
 	inotify_unmount_inodes(&sb->s_inodes);
+	fsnotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 282150f74cf..0a499d2c619 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -89,6 +89,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/writeback.h> /* for inode_lock */
 
 #include <asm/atomic.h>
 
@@ -351,3 +352,74 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 
 	return ret;
 }
+
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
+ * @list: list of inodes being unmounted (sb->s_inodes)
+ *
+ * Called with inode_lock held, protecting the unmounting super block's list
+ * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
+ * We temporarily drop inode_lock, however, and CAN block.
+ */
+void fsnotify_unmount_inodes(struct list_head *list)
+{
+	struct inode *inode, *next_i, *need_iput = NULL;
+
+	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+		struct inode *need_iput_tmp;
+
+		/*
+		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+		 * I_WILL_FREE, or I_NEW which is fine because by that point
+		 * the inode cannot have any associated watches.
+		 */
+		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+			continue;
+
+		/*
+		 * If i_count is zero, the inode cannot have any watches and
+		 * doing an __iget/iput with MS_ACTIVE clear would actually
+		 * evict all inodes with zero i_count from icache which is
+		 * unnecessarily violent and may in fact be illegal to do.
+		 */
+		if (!atomic_read(&inode->i_count))
+			continue;
+
+		need_iput_tmp = need_iput;
+		need_iput = NULL;
+
+		/* In case fsnotify_inode_delete() drops a reference. */
+		if (inode != need_iput_tmp)
+			__iget(inode);
+		else
+			need_iput_tmp = NULL;
+
+		/* In case the dropping of a reference would nuke next_i. */
+		if ((&next_i->i_sb_list != list) &&
+		    atomic_read(&next_i->i_count) &&
+		    !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+			__iget(next_i);
+			need_iput = next_i;
+		}
+
+		/*
+		 * We can safely drop inode_lock here because we hold
+		 * references on both inode and next_i.  Also no new inodes
+		 * will be added since the umount has begun.  Finally,
+		 * iprune_mutex keeps shrink_icache_memory() away.
+		 */
+		spin_unlock(&inode_lock);
+
+		if (need_iput_tmp)
+			iput(need_iput_tmp);
+
+		/* for each watch, send FS_UNMOUNT and then remove it */
+		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+
+		fsnotify_inode_delete(inode);
+
+		iput(inode);
+
+		spin_lock(&inode_lock);
+	}
+}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index efdf9e442d8..d2c0ee30e61 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -336,6 +336,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
 extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
 extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
+extern void fsnotify_unmount_inodes(struct list_head *list);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
@@ -365,6 +366,9 @@ static inline u32 fsnotify_get_cookie(void)
 	return 0;
 }
 
+static inline void fsnotify_unmount_inodes(struct list_head *list)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
cgit v1.2.3-70-g09d2


From 63c882a05416e18de6fb59f7dd6da48f3bbe8273 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 21 May 2009 17:02:01 -0400
Subject: inotify: reimplement inotify using fsnotify

Reimplement inotify_user using fsnotify.  This should be feature for feature
exactly the same as the original inotify_user.  This does not make any changes
to the in kernel inotify feature used by audit.  Those patches (and the eventual
removal of in kernel inotify) will come after the new inotify_user proves to be
working correctly.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS                          |   2 +
 fs/notify/inotify/Kconfig            |  20 +-
 fs/notify/inotify/Makefile           |   2 +-
 fs/notify/inotify/inotify.h          |  21 +
 fs/notify/inotify/inotify_fsnotify.c | 137 ++++++
 fs/notify/inotify/inotify_user.c     | 837 +++++++++++++++++------------------
 include/linux/fsnotify_backend.h     |  11 +
 init/Kconfig                         |   3 +-
 8 files changed, 585 insertions(+), 448 deletions(-)
 create mode 100644 fs/notify/inotify/inotify.h
 create mode 100644 fs/notify/inotify/inotify_fsnotify.c

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 96e0c8c6079..e697b67031a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2858,6 +2858,8 @@ P:	John McCutchan
 M:	john@johnmccutchan.com
 P:	Robert Love
 M:	rlove@rlove.org
+P:	Eric Paris
+M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	Documentation/filesystems/inotify.txt
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 44679284102..5356884289a 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,26 +1,30 @@
 config INOTIFY
 	bool "Inotify file change notification support"
-	default y
+	default n
 	---help---
-	  Say Y here to enable inotify support.  Inotify is a file change
-	  notification system and a replacement for dnotify.  Inotify fixes
-	  numerous shortcomings in dnotify and introduces several new features
-	  including multiple file events, one-shot support, and unmount
-	  notification.
+	  Say Y here to enable legacy in kernel inotify support.  Inotify is a
+	  file change notification system.  It is a replacement for dnotify.
+	  This option only provides the legacy inotify in kernel API.  There
+	  are no in tree kernel users of this interface since it is deprecated.
+	  You only need this if you are loading an out of tree kernel module
+	  that uses inotify.
 
 	  For more information, see <file:Documentation/filesystems/inotify.txt>
 
-	  If unsure, say Y.
+	  If unsure, say N.
 
 config INOTIFY_USER
 	bool "Inotify support for userspace"
-	depends on INOTIFY
+	depends on FSNOTIFY
 	default y
 	---help---
 	  Say Y here to enable inotify support for userspace, including the
 	  associated system calls.  Inotify allows monitoring of both files and
 	  directories via a single open fd.  Events are read from the file
 	  descriptor, which is also select()- and poll()-able.
+	  Inotify fixes numerous shortcomings in dnotify and introduces several
+	  new features including multiple file events, one-shot support, and
+	  unmount notification.
 
 	  For more information, see <file:Documentation/filesystems/inotify.txt>
 
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index e290f3bb9d8..94382817136 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INOTIFY)		+= inotify.o
-obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
+obj-$(CONFIG_INOTIFY_USER)	+= inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
new file mode 100644
index 00000000000..ea2605a58b8
--- /dev/null
+++ b/fs/notify/inotify/inotify.h
@@ -0,0 +1,21 @@
+#include <linux/fsnotify_backend.h>
+#include <linux/inotify.h>
+#include <linux/slab.h> /* struct kmem_cache */
+
+extern struct kmem_cache *event_priv_cachep;
+
+struct inotify_event_private_data {
+	struct fsnotify_event_private_data fsnotify_event_priv_data;
+	int wd;
+};
+
+struct inotify_inode_mark_entry {
+	/* fsnotify_mark_entry MUST be the first thing */
+	struct fsnotify_mark_entry fsn_entry;
+	int wd;
+};
+
+extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
+
+extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
new file mode 100644
index 00000000000..160da548683
--- /dev/null
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -0,0 +1,137 @@
+/*
+ * fs/inotify_user.c - inotify support for userspace
+ *
+ * Authors:
+ *	John McCutchan	<ttb@tentacle.dhs.org>
+ *	Robert Love	<rml@novell.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * inotify was largely rewriten to make use of the fsnotify infrastructure
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/fs.h> /* struct inode */
+#include <linux/fsnotify_backend.h>
+#include <linux/inotify.h>
+#include <linux/path.h> /* struct path */
+#include <linux/slab.h> /* kmem_* */
+#include <linux/types.h>
+
+#include "inotify.h"
+
+static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	struct fsnotify_mark_entry *entry;
+	struct inotify_inode_mark_entry *ientry;
+	struct inode *to_tell;
+	struct inotify_event_private_data *event_priv;
+	struct fsnotify_event_private_data *fsn_event_priv;
+	int wd, ret;
+
+	to_tell = event->to_tell;
+
+	spin_lock(&to_tell->i_lock);
+	entry = fsnotify_find_mark_entry(group, to_tell);
+	spin_unlock(&to_tell->i_lock);
+	/* race with watch removal?  We already passes should_send */
+	if (unlikely(!entry))
+		return 0;
+	ientry = container_of(entry, struct inotify_inode_mark_entry,
+			      fsn_entry);
+	wd = ientry->wd;
+
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	if (unlikely(!event_priv))
+		return -ENOMEM;
+
+	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
+
+	fsn_event_priv->group = group;
+	event_priv->wd = wd;
+
+	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
+	/* EEXIST is not an error */
+	if (ret == -EEXIST)
+		ret = 0;
+
+	/* did event_priv get attached? */
+	if (list_empty(&fsn_event_priv->event_list))
+		inotify_free_event_priv(fsn_event_priv);
+
+	/*
+	 * If we hold the entry until after the event is on the queue
+	 * IN_IGNORED won't be able to pass this event in the queue
+	 */
+	fsnotify_put_mark(entry);
+
+	return ret;
+}
+
+static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+{
+	inotify_destroy_mark_entry(entry, group);
+}
+
+static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
+{
+	struct fsnotify_mark_entry *entry;
+	bool send;
+
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+	if (!entry)
+		return false;
+
+	send = (entry->mask & mask);
+
+	/* find took a reference */
+	fsnotify_put_mark(entry);
+
+	return send;
+}
+
+static int idr_callback(int id, void *p, void *data)
+{
+	BUG();
+	return 0;
+}
+
+static void inotify_free_group_priv(struct fsnotify_group *group)
+{
+	/* ideally the idr is empty and we won't hit the BUG in teh callback */
+	idr_for_each(&group->inotify_data.idr, idr_callback, NULL);
+	idr_remove_all(&group->inotify_data.idr);
+	idr_destroy(&group->inotify_data.idr);
+}
+
+void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
+{
+	struct inotify_event_private_data *event_priv;
+
+
+	event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
+				  fsnotify_event_priv_data);
+
+	kmem_cache_free(event_priv_cachep, event_priv);
+}
+
+const struct fsnotify_ops inotify_fsnotify_ops = {
+	.handle_event = inotify_handle_event,
+	.should_send_event = inotify_should_send_event,
+	.free_group_priv = inotify_free_group_priv,
+	.free_event_priv = inotify_free_event_priv,
+	.freeing_mark = inotify_freeing_mark,
+};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1634319e240..982a412ac5b 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -8,6 +8,9 @@
  * Copyright (C) 2005 John McCutchan
  * Copyright 2006 Hewlett-Packard Development Company, L.P.
  *
+ * Copyright (C) 2009 Eric Paris <Red Hat Inc>
+ * inotify was largely rewriten to make use of the fsnotify infrastructure
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2, or (at your option) any
@@ -19,94 +22,48 @@
  * General Public License for more details.
  */
 
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/list.h>
+#include <linux/fs.h> /* struct inode */
+#include <linux/fsnotify_backend.h>
+#include <linux/idr.h>
+#include <linux/init.h> /* module_init */
 #include <linux/inotify.h>
+#include <linux/kernel.h> /* roundup() */
+#include <linux/magic.h> /* superblock magic number */
+#include <linux/mount.h> /* mntget */
+#include <linux/namei.h> /* LOOKUP_FOLLOW */
+#include <linux/path.h> /* struct path */
+#include <linux/sched.h> /* struct user */
+#include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
-#include <linux/magic.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
 
-#include <asm/ioctls.h>
+#include "inotify.h"
 
-static struct kmem_cache *watch_cachep __read_mostly;
-static struct kmem_cache *event_cachep __read_mostly;
+#include <asm/ioctls.h>
 
 static struct vfsmount *inotify_mnt __read_mostly;
 
+/* this just sits here and wastes global memory.  used to just pad userspace messages with zeros */
+static struct inotify_event nul_inotify_event;
+
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
-static int inotify_max_user_watches __read_mostly;
 static int inotify_max_queued_events __read_mostly;
+int inotify_max_user_watches __read_mostly;
 
-/*
- * Lock ordering:
- *
- * inotify_dev->up_mutex (ensures we don't re-add the same watch)
- * 	inode->inotify_mutex (protects inode's watch list)
- * 		inotify_handle->mutex (protects inotify_handle's watch list)
- * 			inotify_dev->ev_mutex (protects device's event queue)
- */
+static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
+struct kmem_cache *event_priv_cachep __read_mostly;
+static struct fsnotify_event *inotify_ignored_event;
 
 /*
- * Lifetimes of the main data structures:
- *
- * inotify_device: Lifetime is managed by reference count, from
- * sys_inotify_init() until release.  Additional references can bump the count
- * via get_inotify_dev() and drop the count via put_inotify_dev().
- *
- * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
- * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
- * first event, or to inotify_destroy().
+ * When inotify registers a new group it increments this and uses that
+ * value as an offset to set the fsnotify group "name" and priority.
  */
-
-/*
- * struct inotify_device - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
-	struct mutex		ev_mutex;	/* protects event queue */
-	struct mutex		up_mutex;	/* synchronizes watch updates */
-	struct list_head 	events;		/* list of queued events */
-	struct user_struct	*user;		/* user who opened this dev */
-	struct inotify_handle	*ih;		/* inotify handle */
-	struct fasync_struct    *fa;            /* async notification */
-	atomic_t		count;		/* reference count */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
-};
-
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->ev_mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_user_watch - our version of an inotify_watch, we add
- * a reference to the associated inotify_device.
- */
-struct inotify_user_watch {
-	struct inotify_device	*dev;	/* associated device */
-	struct inotify_watch	wdata;	/* inotify watch data */
-};
+static atomic_t inotify_grp_num;
 
 #ifdef CONFIG_SYSCTL
 
@@ -149,280 +106,36 @@ ctl_table inotify_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
-static inline void get_inotify_dev(struct inotify_device *dev)
-{
-	atomic_inc(&dev->count);
-}
-
-static inline void put_inotify_dev(struct inotify_device *dev)
-{
-	if (atomic_dec_and_test(&dev->count)) {
-		atomic_dec(&dev->user->inotify_devs);
-		free_uid(dev->user);
-		kfree(dev);
-	}
-}
-
-/*
- * free_inotify_user_watch - cleans up the watch and its references
- */
-static void free_inotify_user_watch(struct inotify_watch *w)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
-
-	atomic_dec(&dev->user->inotify_watches);
-	put_inotify_dev(dev);
-	kmem_cache_free(watch_cachep, watch);
-}
-
-/*
- * kernel_event - create a new kernel event with the given parameters
- *
- * This function can sleep.
- */
-static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
-						  const char *name)
-{
-	struct inotify_kernel_event *kevent;
-
-	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
-	if (unlikely(!kevent))
-		return NULL;
-
-	/* we hand this out to user-space, so zero it just in case */
-	memset(&kevent->event, 0, sizeof(struct inotify_event));
-
-	kevent->event.wd = wd;
-	kevent->event.mask = mask;
-	kevent->event.cookie = cookie;
-
-	INIT_LIST_HEAD(&kevent->list);
-
-	if (name) {
-		size_t len, rem, event_size = sizeof(struct inotify_event);
-
-		/*
-		 * We need to pad the filename so as to properly align an
-		 * array of inotify_event structures.  Because the structure is
-		 * small and the common case is a small filename, we just round
-		 * up to the next multiple of the structure's sizeof.  This is
-		 * simple and safe for all architectures.
-		 */
-		len = strlen(name) + 1;
-		rem = event_size - len;
-		if (len > event_size) {
-			rem = event_size - (len % event_size);
-			if (len % event_size == 0)
-				rem = 0;
-		}
-
-		kevent->name = kmalloc(len + rem, GFP_NOFS);
-		if (unlikely(!kevent->name)) {
-			kmem_cache_free(event_cachep, kevent);
-			return NULL;
-		}
-		memcpy(kevent->name, name, len);
-		if (rem)
-			memset(kevent->name + len, 0, rem);
-		kevent->event.len = len + rem;
-	} else {
-		kevent->event.len = 0;
-		kevent->name = NULL;
-	}
-
-	return kevent;
-}
-
-/*
- * inotify_dev_get_event - return the next event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_event(struct inotify_device *dev)
-{
-	return list_entry(dev->events.next, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_get_last_event - return the last event in the given dev's queue
- *
- * Caller must hold dev->ev_mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_last_event(struct inotify_device *dev)
+static inline __u32 inotify_arg_to_mask(u32 arg)
 {
-	if (list_empty(&dev->events))
-		return NULL;
-	return list_entry(dev->events.prev, struct inotify_kernel_event, list);
-}
+	__u32 mask;
 
-/*
- * inotify_dev_queue_event - event handler registered with core inotify, adds
- * a new event to the given device
- *
- * Can sleep (calls kernel_event()).
- */
-static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
-				    u32 cookie, const char *name,
-				    struct inode *ignored)
-{
-	struct inotify_user_watch *watch;
-	struct inotify_device *dev;
-	struct inotify_kernel_event *kevent, *last;
+	/* everything should accept their own ignored and cares about children */
+	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
 
-	watch = container_of(w, struct inotify_user_watch, wdata);
-	dev = watch->dev;
+	/* mask off the flags used to open the fd */
+	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
 
-	mutex_lock(&dev->ev_mutex);
-
-	/* we can safely put the watch as we don't reference it while
-	 * generating the event
-	 */
-	if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
-		put_inotify_watch(w); /* final put */
-
-	/* coalescing: drop this event if it is a dupe of the previous */
-	last = inotify_dev_get_last_event(dev);
-	if (last && last->event.mask == mask && last->event.wd == wd &&
-			last->event.cookie == cookie) {
-		const char *lastname = last->name;
-
-		if (!name && !lastname)
-			goto out;
-		if (name && lastname && !strcmp(lastname, name))
-			goto out;
-	}
-
-	/* the queue overflowed and we already sent the Q_OVERFLOW event */
-	if (unlikely(dev->event_count > dev->max_events))
-		goto out;
-
-	/* if the queue overflows, we need to notify user space */
-	if (unlikely(dev->event_count == dev->max_events))
-		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
-	else
-		kevent = kernel_event(wd, mask, cookie, name);
-
-	if (unlikely(!kevent))
-		goto out;
-
-	/* queue the event and wake up anyone waiting */
-	dev->event_count++;
-	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
-	list_add_tail(&kevent->list, &dev->events);
-	wake_up_interruptible(&dev->wq);
-	kill_fasync(&dev->fa, SIGIO, POLL_IN);
-
-out:
-	mutex_unlock(&dev->ev_mutex);
-}
-
-/*
- * remove_kevent - cleans up the given kevent
- *
- * Caller must hold dev->ev_mutex.
- */
-static void remove_kevent(struct inotify_device *dev,
-			  struct inotify_kernel_event *kevent)
-{
-	list_del(&kevent->list);
-
-	dev->event_count--;
-	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
-}
-
-/*
- * free_kevent - frees the given kevent.
- */
-static void free_kevent(struct inotify_kernel_event *kevent)
-{
-	kfree(kevent->name);
-	kmem_cache_free(event_cachep, kevent);
-}
-
-/*
- * inotify_dev_event_dequeue - destroy an event on the given device
- *
- * Caller must hold dev->ev_mutex.
- */
-static void inotify_dev_event_dequeue(struct inotify_device *dev)
-{
-	if (!list_empty(&dev->events)) {
-		struct inotify_kernel_event *kevent;
-		kevent = inotify_dev_get_event(dev);
-		remove_kevent(dev, kevent);
-		free_kevent(kevent);
-	}
-}
-
-/*
- * find_inode - resolve a user-given path to a specific inode
- */
-static int find_inode(const char __user *dirname, struct path *path,
-		      unsigned flags)
-{
-	int error;
-
-	error = user_path_at(AT_FDCWD, dirname, flags, path);
-	if (error)
-		return error;
-	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(path->dentry->d_inode, MAY_READ);
-	if (error)
-		path_put(path);
-	return error;
+	return mask;
 }
 
-/*
- * create_watch - creates a watch on the given device.
- *
- * Callers must hold dev->up_mutex.
- */
-static int create_watch(struct inotify_device *dev, struct inode *inode,
-			u32 mask)
+static inline u32 inotify_mask_to_arg(__u32 mask)
 {
-	struct inotify_user_watch *watch;
-	int ret;
-
-	if (atomic_read(&dev->user->inotify_watches) >=
-			inotify_max_user_watches)
-		return -ENOSPC;
-
-	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
-	if (unlikely(!watch))
-		return -ENOMEM;
-
-	/* save a reference to device and bump the count to make it official */
-	get_inotify_dev(dev);
-	watch->dev = dev;
-
-	atomic_inc(&dev->user->inotify_watches);
-
-	inotify_init_watch(&watch->wdata);
-	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
-	if (ret < 0)
-		free_inotify_user_watch(&watch->wdata);
-
-	return ret;
+	return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
+		       IN_Q_OVERFLOW);
 }
 
-/* Device Interface */
-
+/* intofiy userspace file descriptor functions */
 static unsigned int inotify_poll(struct file *file, poll_table *wait)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 	int ret = 0;
 
-	poll_wait(file, &dev->wq, wait);
-	mutex_lock(&dev->ev_mutex);
-	if (!list_empty(&dev->events))
+	poll_wait(file, &group->notification_waitq, wait);
+	mutex_lock(&group->notification_mutex);
+	if (!fsnotify_notify_queue_is_empty(group))
 		ret = POLLIN | POLLRDNORM;
-	mutex_unlock(&dev->ev_mutex);
+	mutex_unlock(&group->notification_mutex);
 
 	return ret;
 }
@@ -432,26 +145,29 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
  * enough to fit in "count". Return an error pointer if
  * not large enough.
  *
- * Called with the device ev_mutex held.
+ * Called with the group->notification_mutex held.
  */
-static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
-						  size_t count)
+static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+					    size_t count)
 {
 	size_t event_size = sizeof(struct inotify_event);
-	struct inotify_kernel_event *kevent;
+	struct fsnotify_event *event;
 
-	if (list_empty(&dev->events))
+	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
-	kevent = inotify_dev_get_event(dev);
-	if (kevent->name)
-		event_size += kevent->event.len;
+	event = fsnotify_peek_notify_event(group);
+
+	event_size += roundup(event->name_len, event_size);
 
 	if (event_size > count)
 		return ERR_PTR(-EINVAL);
 
-	remove_kevent(dev, kevent);
-	return kevent;
+	/* held the notification_mutex the whole time, so this is the
+	 * same event we peeked above */
+	fsnotify_remove_notify_event(group);
+
+	return event;
 }
 
 /*
@@ -460,51 +176,90 @@ static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
  * We already checked that the event size is smaller than the
  * buffer we had in "get_one_event()" above.
  */
-static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent,
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+				  struct fsnotify_event *event,
 				  char __user *buf)
 {
+	struct inotify_event inotify_event;
+	struct fsnotify_event_private_data *fsn_priv;
+	struct inotify_event_private_data *priv;
 	size_t event_size = sizeof(struct inotify_event);
+	size_t name_len;
+
+	/* we get the inotify watch descriptor from the event private data */
+	spin_lock(&event->lock);
+	fsn_priv = fsnotify_remove_priv_from_event(group, event);
+	spin_unlock(&event->lock);
+
+	if (!fsn_priv)
+		inotify_event.wd = -1;
+	else {
+		priv = container_of(fsn_priv, struct inotify_event_private_data,
+				    fsnotify_event_priv_data);
+		inotify_event.wd = priv->wd;
+		inotify_free_event_priv(fsn_priv);
+	}
+
+	/* round up event->name_len so it is a multiple of event_size */
+	name_len = roundup(event->name_len, event_size);
+	inotify_event.len = name_len;
+
+	inotify_event.mask = inotify_mask_to_arg(event->mask);
+	inotify_event.cookie = event->sync_cookie;
 
-	if (copy_to_user(buf, &kevent->event, event_size))
+	/* send the main event */
+	if (copy_to_user(buf, &inotify_event, event_size))
 		return -EFAULT;
 
-	if (kevent->name) {
-		buf += event_size;
+	buf += event_size;
 
-		if (copy_to_user(buf, kevent->name, kevent->event.len))
+	/*
+	 * fsnotify only stores the pathname, so here we have to send the pathname
+	 * and then pad that pathname out to a multiple of sizeof(inotify_event)
+	 * with zeros.  I get my zeros from the nul_inotify_event.
+	 */
+	if (name_len) {
+		unsigned int len_to_zero = name_len - event->name_len;
+		/* copy the path name */
+		if (copy_to_user(buf, event->file_name, event->name_len))
 			return -EFAULT;
+		buf += event->name_len;
 
-		event_size += kevent->event.len;
+		/* fill userspace with 0's from nul_inotify_event */
+		if (copy_to_user(buf, &nul_inotify_event, len_to_zero))
+			return -EFAULT;
+		buf += len_to_zero;
+		event_size += name_len;
 	}
+
 	return event_size;
 }
 
 static ssize_t inotify_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *pos)
 {
-	struct inotify_device *dev;
+	struct fsnotify_group *group;
+	struct fsnotify_event *kevent;
 	char __user *start;
 	int ret;
 	DEFINE_WAIT(wait);
 
 	start = buf;
-	dev = file->private_data;
+	group = file->private_data;
 
 	while (1) {
-		struct inotify_kernel_event *kevent;
+		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
 
-		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
-
-		mutex_lock(&dev->ev_mutex);
-		kevent = get_one_event(dev, count);
-		mutex_unlock(&dev->ev_mutex);
+		mutex_lock(&group->notification_mutex);
+		kevent = get_one_event(group, count);
+		mutex_unlock(&group->notification_mutex);
 
 		if (kevent) {
 			ret = PTR_ERR(kevent);
 			if (IS_ERR(kevent))
 				break;
-			ret = copy_event_to_user(kevent, buf);
-			free_kevent(kevent);
+			ret = copy_event_to_user(group, kevent, buf);
+			fsnotify_put_event(kevent);
 			if (ret < 0)
 				break;
 			buf += ret;
@@ -525,7 +280,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 		schedule();
 	}
 
-	finish_wait(&dev->wq, &wait);
+	finish_wait(&group->notification_waitq, &wait);
 	if (start != buf && ret != -EFAULT)
 		ret = buf - start;
 	return ret;
@@ -533,25 +288,19 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
 static int inotify_fasync(int fd, struct file *file, int on)
 {
-	struct inotify_device *dev = file->private_data;
+	struct fsnotify_group *group = file->private_data;
 
-	return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO;
+	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
 }
 
 static int inotify_release(struct inode *ignored, struct file *file)
 {
-	struct inotify_device *dev = file->private_data;
-
-	inotify_destroy(dev->ih);
+	struct fsnotify_group *group = file->private_data;
 
-	/* destroy all of the events on this device */
-	mutex_lock(&dev->ev_mutex);
-	while (!list_empty(&dev->events))
-		inotify_dev_event_dequeue(dev);
-	mutex_unlock(&dev->ev_mutex);
+	fsnotify_clear_marks_by_group(group);
 
-	/* free this device: the put matching the get in inotify_init() */
-	put_inotify_dev(dev);
+	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
+	fsnotify_put_group(group);
 
 	return 0;
 }
@@ -559,16 +308,27 @@ static int inotify_release(struct inode *ignored, struct file *file)
 static long inotify_ioctl(struct file *file, unsigned int cmd,
 			  unsigned long arg)
 {
-	struct inotify_device *dev;
+	struct fsnotify_group *group;
+	struct fsnotify_event_holder *holder;
+	struct fsnotify_event *event;
 	void __user *p;
 	int ret = -ENOTTY;
+	size_t send_len = 0;
 
-	dev = file->private_data;
+	group = file->private_data;
 	p = (void __user *) arg;
 
 	switch (cmd) {
 	case FIONREAD:
-		ret = put_user(dev->queue_size, (int __user *) p);
+		mutex_lock(&group->notification_mutex);
+		list_for_each_entry(holder, &group->notification_list, event_list) {
+			event = holder->event;
+			send_len += sizeof(struct inotify_event);
+			send_len += roundup(event->name_len,
+					     sizeof(struct inotify_event));
+		}
+		mutex_unlock(&group->notification_mutex);
+		ret = put_user(send_len, (int __user *) p);
 		break;
 	}
 
@@ -576,23 +336,233 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 }
 
 static const struct file_operations inotify_fops = {
-	.poll           = inotify_poll,
-	.read           = inotify_read,
-	.fasync         = inotify_fasync,
-	.release        = inotify_release,
-	.unlocked_ioctl = inotify_ioctl,
+	.poll		= inotify_poll,
+	.read		= inotify_read,
+	.fasync		= inotify_fasync,
+	.release	= inotify_release,
+	.unlocked_ioctl	= inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
 };
 
-static const struct inotify_operations inotify_user_ops = {
-	.handle_event	= inotify_dev_queue_event,
-	.destroy_watch	= free_inotify_user_watch,
-};
 
+/*
+ * find_inode - resolve a user-given path to a specific inode
+ */
+static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
+{
+	int error;
+
+	error = user_path_at(AT_FDCWD, dirname, flags, path);
+	if (error)
+		return error;
+	/* you can only watch an inode if you have read permissions on it */
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (error)
+		path_put(path);
+	return error;
+}
+
+/*
+ * When, for whatever reason, inotify is done with a mark (or what used to be a
+ * watch) we need to remove that watch from the idr and we need to send IN_IGNORED
+ * for the given wd.
+ *
+ * There is a bit of recursion here.  The loop looks like:
+ * 	inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
+ *	inotify_freeing_mark -> inotify_destory_mark_entry -> restart
+ * But the loop is broken in 2 places.  fsnotify_destroy_mark_by_entry sets
+ * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
+ * test below will not call back to fsnotify again.  But even if that test wasn't
+ * there this would still be safe since fsnotify_destroy_mark_by_entry() is
+ * safe from recursion.
+ */
+void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+{
+	struct inotify_inode_mark_entry *ientry;
+	struct inotify_event_private_data *event_priv;
+	struct fsnotify_event_private_data *fsn_event_priv;
+	struct fsnotify_group *egroup;
+	struct idr *idr;
+
+	spin_lock(&entry->lock);
+	egroup = entry->group;
+
+	/* if egroup we aren't really done and something might still send events
+	 * for this inode, on the callback we'll send the IN_IGNORED */
+	if (egroup) {
+		spin_unlock(&entry->lock);
+		fsnotify_destroy_mark_by_entry(entry);
+		return;
+	}
+	spin_unlock(&entry->lock);
+
+	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	if (unlikely(!event_priv))
+		goto skip_send_ignore;
+
+	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
+
+	fsn_event_priv->group = group;
+	event_priv->wd = ientry->wd;
+
+	fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv);
+
+	/* did the private data get added? */
+	if (list_empty(&fsn_event_priv->event_list))
+		inotify_free_event_priv(fsn_event_priv);
+
+skip_send_ignore:
+
+	/* remove this entry from the idr */
+	spin_lock(&group->inotify_data.idr_lock);
+	idr = &group->inotify_data.idr;
+	idr_remove(idr, ientry->wd);
+	spin_unlock(&group->inotify_data.idr_lock);
+
+	/* removed from idr, drop that reference */
+	fsnotify_put_mark(entry);
+}
+
+/* ding dong the mark is dead */
+static void inotify_free_mark(struct fsnotify_mark_entry *entry)
+{
+	struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
+
+	kmem_cache_free(inotify_inode_mark_cachep, ientry);
+}
+
+static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+{
+	struct fsnotify_mark_entry *entry = NULL;
+	struct inotify_inode_mark_entry *ientry;
+	int ret = 0;
+	int add = (arg & IN_MASK_ADD);
+	__u32 mask;
+	__u32 old_mask, new_mask;
+
+	/* don't allow invalid bits: we don't want flags set */
+	mask = inotify_arg_to_mask(arg);
+	if (unlikely(!mask))
+		return -EINVAL;
+
+	ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+	if (unlikely(!ientry))
+		return -ENOMEM;
+	/* we set the mask at the end after attaching it */
+	fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark);
+	ientry->wd = 0;
+
+find_entry:
+	spin_lock(&inode->i_lock);
+	entry = fsnotify_find_mark_entry(group, inode);
+	spin_unlock(&inode->i_lock);
+	if (entry) {
+		kmem_cache_free(inotify_inode_mark_cachep, ientry);
+		ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	} else {
+		if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) {
+			ret = -ENOSPC;
+			goto out_err;
+		}
+
+		ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode);
+		if (ret == -EEXIST)
+			goto find_entry;
+		else if (ret)
+			goto out_err;
+
+		entry = &ientry->fsn_entry;
+retry:
+		ret = -ENOMEM;
+		if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
+			goto out_err;
+
+		spin_lock(&group->inotify_data.idr_lock);
+		/* if entry is added to the idr we keep the reference obtained
+		 * through fsnotify_mark_add.  remember to drop this reference
+		 * when entry is removed from idr */
+		ret = idr_get_new_above(&group->inotify_data.idr, entry,
+					++group->inotify_data.last_wd,
+					&ientry->wd);
+		spin_unlock(&group->inotify_data.idr_lock);
+		if (ret) {
+			if (ret == -EAGAIN)
+				goto retry;
+			goto out_err;
+		}
+		atomic_inc(&group->inotify_data.user->inotify_watches);
+	}
+
+	spin_lock(&entry->lock);
+
+	old_mask = entry->mask;
+	if (add) {
+		entry->mask |= mask;
+		new_mask = entry->mask;
+	} else {
+		entry->mask = mask;
+		new_mask = entry->mask;
+	}
+
+	spin_unlock(&entry->lock);
+
+	if (old_mask != new_mask) {
+		/* more bits in old than in new? */
+		int dropped = (old_mask & ~new_mask);
+		/* more bits in this entry than the inode's mask? */
+		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
+		/* more bits in this entry than the group? */
+		int do_group = (new_mask & ~group->mask);
+
+		/* update the inode with this new entry */
+		if (dropped || do_inode)
+			fsnotify_recalc_inode_mask(inode);
+
+		/* update the group mask with the new mask */
+		if (dropped || do_group)
+			fsnotify_recalc_group_mask(group);
+	}
+
+	return ientry->wd;
+
+out_err:
+	/* see this isn't supposed to happen, just kill the watch */
+	if (entry) {
+		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_put_mark(entry);
+	}
+	return ret;
+}
+
+static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
+{
+	struct fsnotify_group *group;
+	unsigned int grp_num;
+
+	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
+	grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
+	group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
+	if (IS_ERR(group))
+		return group;
+
+	group->max_events = max_events;
+
+	spin_lock_init(&group->inotify_data.idr_lock);
+	idr_init(&group->inotify_data.idr);
+	group->inotify_data.last_wd = 0;
+	group->inotify_data.user = user;
+	group->inotify_data.fa = NULL;
+
+	return group;
+}
+
+
+/* inotify syscalls */
 SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
-	struct inotify_device *dev;
-	struct inotify_handle *ih;
+	struct fsnotify_group *group;
 	struct user_struct *user;
 	struct file *filp;
 	int fd, ret;
@@ -621,45 +591,27 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 		goto out_free_uid;
 	}
 
-	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
-	if (unlikely(!dev)) {
-		ret = -ENOMEM;
+	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
+	group = inotify_new_group(user, inotify_max_queued_events);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
 		goto out_free_uid;
 	}
 
-	ih = inotify_init(&inotify_user_ops);
-	if (IS_ERR(ih)) {
-		ret = PTR_ERR(ih);
-		goto out_free_dev;
-	}
-	dev->ih = ih;
-	dev->fa = NULL;
-
 	filp->f_op = &inotify_fops;
 	filp->f_path.mnt = mntget(inotify_mnt);
 	filp->f_path.dentry = dget(inotify_mnt->mnt_root);
 	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
 	filp->f_mode = FMODE_READ;
 	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = dev;
-
-	INIT_LIST_HEAD(&dev->events);
-	init_waitqueue_head(&dev->wq);
-	mutex_init(&dev->ev_mutex);
-	mutex_init(&dev->up_mutex);
-	dev->event_count = 0;
-	dev->queue_size = 0;
-	dev->max_events = inotify_max_queued_events;
-	dev->user = user;
-	atomic_set(&dev->count, 0);
-
-	get_inotify_dev(dev);
+	filp->private_data = group;
+
 	atomic_inc(&user->inotify_devs);
+
 	fd_install(fd, filp);
 
 	return fd;
-out_free_dev:
-	kfree(dev);
+
 out_free_uid:
 	free_uid(user);
 	put_filp(filp);
@@ -676,8 +628,8 @@ SYSCALL_DEFINE0(inotify_init)
 SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 		u32, mask)
 {
+	struct fsnotify_group *group;
 	struct inode *inode;
-	struct inotify_device *dev;
 	struct path path;
 	struct file *filp;
 	int ret, fput_needed;
@@ -698,20 +650,20 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = find_inode(pathname, &path, flags);
-	if (unlikely(ret))
+	ret = inotify_find_inode(pathname, &path, flags);
+	if (ret)
 		goto fput_and_out;
 
-	/* inode held in place by reference to path; dev by fget on fd */
+	/* inode held in place by reference to path; group by fget on fd */
 	inode = path.dentry->d_inode;
-	dev = filp->private_data;
+	group = filp->private_data;
 
-	mutex_lock(&dev->up_mutex);
-	ret = inotify_find_update_watch(dev->ih, inode, mask);
-	if (ret == -ENOENT)
-		ret = create_watch(dev, inode, mask);
-	mutex_unlock(&dev->up_mutex);
+	/* create/update an inode mark */
+	ret = inotify_update_watch(group, inode, mask);
+	if (unlikely(ret))
+		goto path_put_and_out;
 
+path_put_and_out:
 	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);
@@ -720,9 +672,10 @@ fput_and_out:
 
 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
+	struct fsnotify_group *group;
+	struct fsnotify_mark_entry *entry;
 	struct file *filp;
-	struct inotify_device *dev;
-	int ret, fput_needed;
+	int ret = 0, fput_needed;
 
 	filp = fget_light(fd, &fput_needed);
 	if (unlikely(!filp))
@@ -734,10 +687,20 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 		goto out;
 	}
 
-	dev = filp->private_data;
+	group = filp->private_data;
 
-	/* we free our watch data when we get IN_IGNORED */
-	ret = inotify_rm_wd(dev->ih, wd);
+	spin_lock(&group->inotify_data.idr_lock);
+	entry = idr_find(&group->inotify_data.idr, wd);
+	if (unlikely(!entry)) {
+		spin_unlock(&group->inotify_data.idr_lock);
+		ret = -EINVAL;
+		goto out;
+	}
+	fsnotify_get_mark(entry);
+	spin_unlock(&group->inotify_data.idr_lock);
+
+	inotify_destroy_mark_entry(entry, group);
+	fsnotify_put_mark(entry);
 
 out:
 	fput_light(filp, fput_needed);
@@ -753,9 +716,9 @@ inotify_get_sb(struct file_system_type *fs_type, int flags,
 }
 
 static struct file_system_type inotify_fs_type = {
-    .name           = "inotifyfs",
-    .get_sb         = inotify_get_sb,
-    .kill_sb        = kill_anon_super,
+    .name	= "inotifyfs",
+    .get_sb	= inotify_get_sb,
+    .kill_sb	= kill_anon_super,
 };
 
 /*
@@ -775,18 +738,16 @@ static int __init inotify_user_setup(void)
 	if (IS_ERR(inotify_mnt))
 		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
 
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
+	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
+	inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
+	if (!inotify_ignored_event)
+		panic("unable to allocate the inotify ignored event\n");
+
 	inotify_max_queued_events = 16384;
 	inotify_max_user_instances = 128;
 	inotify_max_user_watches = 8192;
 
-	watch_cachep = kmem_cache_create("inotify_watch_cache",
-					 sizeof(struct inotify_user_watch),
-					 0, SLAB_PANIC, NULL);
-	event_cachep = kmem_cache_create("inotify_event_cache",
-					 sizeof(struct inotify_kernel_event),
-					 0, SLAB_PANIC, NULL);
-
 	return 0;
 }
-
 module_init(inotify_user_setup);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d2c0ee30e61..44848aa830d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -9,6 +9,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/idr.h> /* inotify uses this */
 #include <linux/fs.h> /* struct inode */
 #include <linux/list.h>
 #include <linux/path.h> /* struct path */
@@ -59,6 +60,7 @@
 
 /* listeners that hard code group numbers near the top */
 #define DNOTIFY_GROUP_NUM	UINT_MAX
+#define INOTIFY_GROUP_NUM	(DNOTIFY_GROUP_NUM-1)
 
 struct fsnotify_group;
 struct fsnotify_event;
@@ -141,6 +143,15 @@ struct fsnotify_group {
 	/* groups can define private fields here or use the void *private */
 	union {
 		void *private;
+#ifdef CONFIG_INOTIFY_USER
+		struct inotify_group_private_data {
+			spinlock_t	idr_lock;
+			struct idr      idr;
+			u32             last_wd;
+			struct fasync_struct    *fa;    /* async notification */
+			struct user_struct      *user;
+		} inotify_data;
+#endif
 	};
 };
 
diff --git a/init/Kconfig b/init/Kconfig
index d4e9671347e..5de1c17c51e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -302,7 +302,8 @@ config AUDITSYSCALL
 
 config AUDIT_TREE
 	def_bool y
-	depends on AUDITSYSCALL && INOTIFY
+	depends on AUDITSYSCALL
+	select INOTIFY
 
 menu "RCU Subsystem"
 
-- 
cgit v1.2.3-70-g09d2


From ff52cc2158b32b3b979ca7802b1fd7c70f36e13c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 11:09:47 -0400
Subject: fsnotify: move events should indicate the event was on a child

fsnotify tells its listeners explicitly when an event happened on the given
inode verses on the child of the given inode.  (see __fsnotify_parent)
However, the semantics of fsnotify_move() are such that we deliver events
directly to the two parent directories in question (old_dir and new_dir)
directly without using the __fsnotify_parent() call.  fsnotify should be
adding FS_EVENT_ON_CHILD for the notifications to these parents.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index c25b39ddd62..936f9aa8bb9 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -71,12 +71,11 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	struct inode *source = moved->d_inode;
 	u32 in_cookie = inotify_get_cookie();
 	u32 fs_cookie = fsnotify_get_cookie();
-	__u32 old_dir_mask = 0;
-	__u32 new_dir_mask = 0;
+	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
+	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
 
-	if (old_dir == new_dir) {
-		old_dir_mask = FS_DN_RENAME;
-	}
+	if (old_dir == new_dir)
+		old_dir_mask |= FS_DN_RENAME;
 
 	if (isdir) {
 		isdir = IN_ISDIR;
@@ -84,9 +83,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		new_dir_mask |= FS_IN_ISDIR;
 	}
 
-	old_dir_mask |= FS_MOVED_FROM;
-	new_dir_mask |= FS_MOVED_TO;
-
 	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
 				  source);
 	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
-- 
cgit v1.2.3-70-g09d2


From 63b852a6b67d0820d388b0ecd0da83ccb4048b8d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:24 +0000
Subject: asm-generic: rename termios.h, signal.h and mman.h

The existing asm-generic versions are incomplete and included
by some architectures. New architectures should be able
to use a generic version, so rename the existing files and
change all users, which lets us add the new files.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/signal.h       |  2 +-
 arch/arm/include/asm/mman.h           |  2 +-
 arch/arm/include/asm/signal.h         |  2 +-
 arch/avr32/include/asm/mman.h         |  2 +-
 arch/avr32/include/asm/signal.h       |  2 +-
 arch/avr32/include/asm/termios.h      |  2 +-
 arch/blackfin/include/asm/signal.h    |  2 +-
 arch/cris/include/asm/mman.h          |  2 +-
 arch/cris/include/asm/signal.h        |  2 +-
 arch/frv/include/asm/mman.h           |  2 +-
 arch/frv/include/asm/termios.h        |  2 +-
 arch/h8300/include/asm/mman.h         |  2 +-
 arch/h8300/include/asm/signal.h       |  2 +-
 arch/ia64/include/asm/mman.h          |  2 +-
 arch/ia64/include/asm/signal.h        |  2 +-
 arch/m32r/include/asm/mman.h          |  2 +-
 arch/m32r/include/asm/signal.h        |  2 +-
 arch/m68k/include/asm/mman.h          |  2 +-
 arch/m68k/include/asm/signal.h        |  2 +-
 arch/microblaze/include/asm/signal.h  |  2 +-
 arch/microblaze/include/asm/termios.h |  2 +-
 arch/mips/include/asm/signal.h        |  2 +-
 arch/mn10300/include/asm/mman.h       |  2 +-
 arch/mn10300/include/asm/signal.h     |  2 +-
 arch/powerpc/include/asm/mman.h       |  2 +-
 arch/powerpc/include/asm/signal.h     |  2 +-
 arch/powerpc/include/asm/termios.h    |  2 +-
 arch/s390/include/asm/mman.h          |  2 +-
 arch/s390/include/asm/signal.h        |  2 +-
 arch/s390/include/asm/termios.h       |  2 +-
 arch/sh/include/asm/mman.h            |  2 +-
 arch/sh/include/asm/signal.h          |  2 +-
 arch/sparc/include/asm/mman.h         |  2 +-
 arch/sparc/include/asm/signal.h       |  2 +-
 arch/x86/include/asm/mman.h           |  2 +-
 arch/x86/include/asm/signal.h         |  2 +-
 include/asm-generic/Kbuild            |  4 +-
 include/asm-generic/mman-common.h     | 41 +++++++++++++++++++
 include/asm-generic/mman.h            | 41 -------------------
 include/asm-generic/signal-defs.h     | 28 +++++++++++++
 include/asm-generic/signal.h          | 28 -------------
 include/asm-generic/termios-base.h    | 77 +++++++++++++++++++++++++++++++++++
 include/asm-generic/termios.h         | 77 -----------------------------------
 43 files changed, 184 insertions(+), 184 deletions(-)
 create mode 100644 include/asm-generic/mman-common.h
 delete mode 100644 include/asm-generic/mman.h
 create mode 100644 include/asm-generic/signal-defs.h
 delete mode 100644 include/asm-generic/signal.h
 create mode 100644 include/asm-generic/termios-base.h
 delete mode 100644 include/asm-generic/termios.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index 13c2305d35e..a9388300abb 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h
@@ -111,7 +111,7 @@ typedef unsigned long sigset_t;
 #define SIG_UNBLOCK        2	/* for unblocking signals */
 #define SIG_SETMASK        3	/* for setting the signal mask */
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct osf_sigaction {
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index 54570d2e95b..fc26976d8e3 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ARM_MMAN_H__
 #define __ARM_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index d0fb487aba4..43ba0fb1c8a 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -111,7 +111,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 648f91e7187..9a92b15f6a6 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_AVR32_MMAN_H__
 #define __ASM_AVR32_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h
index caffefeeba1..8790dfc10d5 100644
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -112,7 +112,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/avr32/include/asm/termios.h b/arch/avr32/include/asm/termios.h
index 0152aba3515..dd7e9da2548 100644
--- a/arch/avr32/include/asm/termios.h
+++ b/arch/avr32/include/asm/termios.h
@@ -55,7 +55,7 @@ struct termio {
 */
 #define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/blackfin/include/asm/signal.h b/arch/blackfin/include/asm/signal.h
index 87951d25145..2eea9079445 100644
--- a/arch/blackfin/include/asm/signal.h
+++ b/arch/blackfin/include/asm/signal.h
@@ -104,7 +104,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index 1c35e1b66b4..b7f0afba3ce 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -3,7 +3,7 @@
 
 /* verbatim copy of asm-i386/ version */
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index 349ae682b56..ea6af9aad76 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -106,7 +106,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index b4371e92868..58c1d11e2ac 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_MMAN_H__
 #define __ASM_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/frv/include/asm/termios.h b/arch/frv/include/asm/termios.h
index a62fb587237..b4868aafe79 100644
--- a/arch/frv/include/asm/termios.h
+++ b/arch/frv/include/asm/termios.h
@@ -52,7 +52,7 @@ struct termio {
 /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
 
 #ifdef __KERNEL__
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 #endif
 
 #endif /* _ASM_TERMIOS_H */
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index b9f104f22a3..cf35f0a6f12 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __H8300_MMAN_H__
 #define __H8300_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index 7bc15048a64..fd8b66e40dc 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -105,7 +105,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index c73b87832a1..48cf8b98a0b 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -8,7 +8,7 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x00100		/* stack-like segment */
 #define MAP_GROWSUP	0x00200		/* register stack-like segment */
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
index 4f5ca5643cb..b166248d49a 100644
--- a/arch/ia64/include/asm/signal.h
+++ b/arch/ia64/include/asm/signal.h
@@ -114,7 +114,7 @@
 
 #endif /* __KERNEL__ */
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 # ifndef __ASSEMBLY__
 
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index 516a8973b13..04a5f40aa40 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __M32R_MMAN_H__
 #define __M32R_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index 1a607066bc6..9c1acb2b1a9 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -107,7 +107,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index 1626d37f489..9f5c4c4b3c7 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __M68K_MMAN_H__
 #define __M68K_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 08788fdefde..5bc09c787a1 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -103,7 +103,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/microblaze/include/asm/signal.h b/arch/microblaze/include/asm/signal.h
index 9676fad3486..46bc2267d94 100644
--- a/arch/microblaze/include/asm/signal.h
+++ b/arch/microblaze/include/asm/signal.h
@@ -90,7 +90,7 @@
 
 # ifndef __ASSEMBLY__
 # include <linux/types.h>
-# include <asm-generic/signal.h>
+# include <asm-generic/signal-defs.h>
 
 /* Avoid too many header ordering problems. */
 struct siginfo;
diff --git a/arch/microblaze/include/asm/termios.h b/arch/microblaze/include/asm/termios.h
index 102d7725866..47a46d1fbe2 100644
--- a/arch/microblaze/include/asm/termios.h
+++ b/arch/microblaze/include/asm/termios.h
@@ -81,7 +81,7 @@ struct termio {
 
 #ifdef __KERNEL__
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index bee5153aca4..c783f364938 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -109,7 +109,7 @@ typedef unsigned long old_sigset_t;		/* at least 32 bits */
 #define SIG_UNBLOCK	2	/* for unblocking signals */
 #define SIG_SETMASK	3	/* for setting the signal mask */
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 struct sigaction {
 	unsigned int	sa_flags;
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index b7986b65add..d04fac1da5a 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -12,7 +12,7 @@
 #ifndef _ASM_MMAN_H
 #define _ASM_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h
index e98817cec5f..7e891fce237 100644
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -115,7 +115,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index e7b99bac9f4..7b1c49811a2 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_POWERPC_MMAN_H
 #define _ASM_POWERPC_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 /*
  * This program is free software; you can redistribute it and/or
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 69f709d8e8e..3eb13be11d8 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -94,7 +94,7 @@ typedef struct {
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 struct old_sigaction {
 	__sighandler_t sa_handler;
diff --git a/arch/powerpc/include/asm/termios.h b/arch/powerpc/include/asm/termios.h
index 2c14fea07c8..a24f48704a3 100644
--- a/arch/powerpc/include/asm/termios.h
+++ b/arch/powerpc/include/asm/termios.h
@@ -78,7 +78,7 @@ struct termio {
 
 #ifdef __KERNEL__
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index da01432e8f4..f63fe7b431e 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -9,7 +9,7 @@
 #ifndef __S390_MMAN_H__
 #define __S390_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index f6cfddb278c..cdf5cb2fe03 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -115,7 +115,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ     2048
 #define SIGSTKSZ        8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
index 67f66278f53..bc3a35cefc9 100644
--- a/arch/s390/include/asm/termios.h
+++ b/arch/s390/include/asm/termios.h
@@ -60,7 +60,7 @@ struct termio {
 #define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
 #define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
 
-#include <asm-generic/termios.h>
+#include <asm-generic/termios-base.h>
 
 #endif	/* __KERNEL__ */
 
diff --git a/arch/sh/include/asm/mman.h b/arch/sh/include/asm/mman.h
index 156eb0225cf..7d8b72c91a5 100644
--- a/arch/sh/include/asm/mman.h
+++ b/arch/sh/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_SH_MMAN_H
 #define __ASM_SH_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/arch/sh/include/asm/signal.h b/arch/sh/include/asm/signal.h
index 5c5c1e85208..9cc5f014468 100644
--- a/arch/sh/include/asm/signal.h
+++ b/arch/sh/include/asm/signal.h
@@ -106,7 +106,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifdef __KERNEL__
 struct old_sigaction {
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index fdfbbf0a473..988192e8e95 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef __SPARC_MMAN_H__
 #define __SPARC_MMAN_H__
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 /* SunOS'ified... */
 
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index cba45206b7f..e49b828a247 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -176,7 +176,7 @@ struct sigstack {
 #define SA_STATIC_ALLOC         0x8000
 #endif
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 struct __new_sigaction {
 	__sighandler_t		sa_handler;
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 90bc4108a4f..751af2550ed 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_MMAN_H
 #define _ASM_X86_MMAN_H
 
-#include <asm-generic/mman.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 7761a5d554b..598457cbd0f 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -117,7 +117,7 @@ typedef unsigned long sigset_t;
 #define MINSIGSTKSZ	2048
 #define SIGSTKSZ	8192
 
-#include <asm-generic/signal.h>
+#include <asm-generic/signal-defs.h>
 
 #ifndef __ASSEMBLY__
 
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 4c9932a2503..460b08d51e2 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -2,9 +2,9 @@ header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
 header-y += ioctl.h
-header-y += mman.h
+header-y += mman-common.h
 header-y += poll.h
-header-y += signal.h
+header-y += signal-defs.h
 header-y += statfs.h
 
 unifdef-y += int-l64.h
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
new file mode 100644
index 00000000000..3b69ad34189
--- /dev/null
+++ b/include/asm-generic/mman-common.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping */
+#define MAP_FIXED	0x10		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x20		/* don't use a file */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_INVALIDATE	2		/* invalidate the caches */
+#define MS_SYNC		4		/* synchronous memory sync */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
deleted file mode 100644
index 5e3dde2ee5a..00000000000
--- a/include/asm-generic/mman.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_MMAN_H
-#define _ASM_GENERIC_MMAN_H
-
-/*
- Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
- Based on: asm-xxx/mman.h
-*/
-
-#define PROT_READ	0x1		/* page can be read */
-#define PROT_WRITE	0x2		/* page can be written */
-#define PROT_EXEC	0x4		/* page can be executed */
-#define PROT_SEM	0x8		/* page may be used for atomic ops */
-#define PROT_NONE	0x0		/* page can not be accessed */
-#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
-#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
-
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_TYPE	0x0f		/* Mask for type of mapping */
-#define MAP_FIXED	0x10		/* Interpret addr exactly */
-#define MAP_ANONYMOUS	0x20		/* don't use a file */
-
-#define MS_ASYNC	1		/* sync memory asynchronously */
-#define MS_INVALIDATE	2		/* invalidate the caches */
-#define MS_SYNC		4		/* synchronous memory sync */
-
-#define MADV_NORMAL	0		/* no further special treatment */
-#define MADV_RANDOM	1		/* expect random page references */
-#define MADV_SEQUENTIAL	2		/* expect sequential page references */
-#define MADV_WILLNEED	3		/* will need these pages */
-#define MADV_DONTNEED	4		/* don't need these pages */
-
-/* common parameters: try to keep these consistent across architectures */
-#define MADV_REMOVE	9		/* remove these pages & resources */
-#define MADV_DONTFORK	10		/* don't inherit across fork */
-#define MADV_DOFORK	11		/* do inherit across fork */
-
-/* compatibility flags */
-#define MAP_FILE	0
-
-#endif
diff --git a/include/asm-generic/signal-defs.h b/include/asm-generic/signal-defs.h
new file mode 100644
index 00000000000..00f95df5429
--- /dev/null
+++ b/include/asm-generic/signal-defs.h
@@ -0,0 +1,28 @@
+#ifndef __ASM_GENERIC_SIGNAL_DEFS_H
+#define __ASM_GENERIC_SIGNAL_DEFS_H
+
+#include <linux/compiler.h>
+
+#ifndef SIG_BLOCK
+#define SIG_BLOCK          0	/* for blocking signals */
+#endif
+#ifndef SIG_UNBLOCK
+#define SIG_UNBLOCK        1	/* for unblocking signals */
+#endif
+#ifndef SIG_SETMASK
+#define SIG_SETMASK        2	/* for setting the signal mask */
+#endif
+
+#ifndef __ASSEMBLY__
+typedef void __signalfn_t(int);
+typedef __signalfn_t __user *__sighandler_t;
+
+typedef void __restorefn_t(void);
+typedef __restorefn_t __user *__sigrestore_t;
+
+#define SIG_DFL	((__force __sighandler_t)0)	/* default signal handling */
+#define SIG_IGN	((__force __sighandler_t)1)	/* ignore signal */
+#define SIG_ERR	((__force __sighandler_t)-1)	/* error return from signal */
+#endif
+
+#endif /* __ASM_GENERIC_SIGNAL_DEFS_H */
diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h
deleted file mode 100644
index dae1d872007..00000000000
--- a/include/asm-generic/signal.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef __ASM_GENERIC_SIGNAL_H
-#define __ASM_GENERIC_SIGNAL_H
-
-#include <linux/compiler.h>
-
-#ifndef SIG_BLOCK
-#define SIG_BLOCK          0	/* for blocking signals */
-#endif
-#ifndef SIG_UNBLOCK
-#define SIG_UNBLOCK        1	/* for unblocking signals */
-#endif
-#ifndef SIG_SETMASK
-#define SIG_SETMASK        2	/* for setting the signal mask */
-#endif
-
-#ifndef __ASSEMBLY__
-typedef void __signalfn_t(int);
-typedef __signalfn_t __user *__sighandler_t;
-
-typedef void __restorefn_t(void);
-typedef __restorefn_t __user *__sigrestore_t;
-
-#define SIG_DFL	((__force __sighandler_t)0)	/* default signal handling */
-#define SIG_IGN	((__force __sighandler_t)1)	/* ignore signal */
-#define SIG_ERR	((__force __sighandler_t)-1)	/* error return from signal */
-#endif
-
-#endif /* __ASM_GENERIC_SIGNAL_H */
diff --git a/include/asm-generic/termios-base.h b/include/asm-generic/termios-base.h
new file mode 100644
index 00000000000..0a769feb22b
--- /dev/null
+++ b/include/asm-generic/termios-base.h
@@ -0,0 +1,77 @@
+/* termios.h: generic termios/termio user copying/translation
+ */
+
+#ifndef _ASM_GENERIC_TERMIOS_BASE_H
+#define _ASM_GENERIC_TERMIOS_BASE_H
+
+#include <asm/uaccess.h>
+
+#ifndef __ARCH_TERMIO_GETPUT
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+static inline int user_termio_to_kernel_termios(struct ktermios *termios,
+						struct termio __user *termio)
+{
+	unsigned short tmp;
+
+	if (get_user(tmp, &termio->c_iflag) < 0)
+		goto fault;
+	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
+
+	if (get_user(tmp, &termio->c_oflag) < 0)
+		goto fault;
+	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
+
+	if (get_user(tmp, &termio->c_cflag) < 0)
+		goto fault;
+	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
+
+	if (get_user(tmp, &termio->c_lflag) < 0)
+		goto fault;
+	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
+
+	if (get_user(termios->c_line, &termio->c_line) < 0)
+		goto fault;
+
+	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
+		goto fault;
+
+	return 0;
+
+ fault:
+	return -EFAULT;
+}
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+static inline int kernel_termios_to_user_termio(struct termio __user *termio,
+						struct ktermios *termios)
+{
+	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
+	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
+	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
+	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
+	    put_user(termios->c_line,  &termio->c_line) < 0 ||
+	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+#ifndef user_termios_to_kernel_termios
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios))
+#endif
+
+#ifndef kernel_termios_to_user_termios
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios))
+#endif
+
+#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+#endif	/* __ARCH_TERMIO_GETPUT */
+
+#endif /* _ASM_GENERIC_TERMIOS_BASE_H */
diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h
deleted file mode 100644
index 7d39ecc92d9..00000000000
--- a/include/asm-generic/termios.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* termios.h: generic termios/termio user copying/translation
- */
-
-#ifndef _ASM_GENERIC_TERMIOS_H
-#define _ASM_GENERIC_TERMIOS_H
-
-#include <asm/uaccess.h>
-
-#ifndef __ARCH_TERMIO_GETPUT
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-static inline int user_termio_to_kernel_termios(struct ktermios *termios,
-						struct termio __user *termio)
-{
-	unsigned short tmp;
-
-	if (get_user(tmp, &termio->c_iflag) < 0)
-		goto fault;
-	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
-
-	if (get_user(tmp, &termio->c_oflag) < 0)
-		goto fault;
-	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
-
-	if (get_user(tmp, &termio->c_cflag) < 0)
-		goto fault;
-	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
-
-	if (get_user(tmp, &termio->c_lflag) < 0)
-		goto fault;
-	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
-
-	if (get_user(termios->c_line, &termio->c_line) < 0)
-		goto fault;
-
-	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
-		goto fault;
-
-	return 0;
-
- fault:
-	return -EFAULT;
-}
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-static inline int kernel_termios_to_user_termio(struct termio __user *termio,
-						struct ktermios *termios)
-{
-	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
-	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
-	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
-	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
-	    put_user(termios->c_line,  &termio->c_line) < 0 ||
-	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
-		return -EFAULT;
-
-	return 0;
-}
-
-#ifndef user_termios_to_kernel_termios
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios))
-#endif
-
-#ifndef kernel_termios_to_user_termios
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios))
-#endif
-
-#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios))
-#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios))
-
-#endif	/* __ARCH_TERMIO_GETPUT */
-
-#endif /* _ASM_GENERIC_TERMIOS_H */
-- 
cgit v1.2.3-70-g09d2


From c31ae4bb4a9fa4606a74c0a4fb61b74f804e861e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:25 +0000
Subject: asm-generic: introduce asm/bitsperlong.h

This provides a reliable way for asm-generic/types.h and other
files to find out if it is running on a 32 or 64 bit platform.

We cannot use CONFIG_64BIT for this in headers that are included
from user space because CONFIG symbols are not available there.
We also cannot do it inside of asm/types.h because some headers
need the word size but cannot include types.h.

The solution is to introduce a new header <asm/bitsperlong.h>
that defines both __BITS_PER_LONG for user space and
BITS_PER_LONG for usage in the kernel. The asm-generic
version falls back to 32 bit unless the architecture overrides
it, which I did for all 64 bit platforms.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/bitsperlong.h      |  8 ++++++++
 arch/alpha/include/asm/types.h            |  3 ---
 arch/arm/include/asm/bitsperlong.h        |  1 +
 arch/avr32/include/asm/bitsperlong.h      |  1 +
 arch/blackfin/include/asm/bitsperlong.h   |  1 +
 arch/cris/include/asm/bitsperlong.h       |  1 +
 arch/frv/include/asm/bitsperlong.h        |  1 +
 arch/h8300/include/asm/bitsperlong.h      |  1 +
 arch/ia64/include/asm/bitsperlong.h       |  8 ++++++++
 arch/ia64/include/asm/types.h             |  7 -------
 arch/m32r/include/asm/bitsperlong.h       |  1 +
 arch/m68k/include/asm/bitsperlong.h       |  1 +
 arch/microblaze/include/asm/bitsperlong.h |  1 +
 arch/mips/include/asm/bitsperlong.h       |  8 ++++++++
 arch/mips/include/asm/types.h             |  3 ---
 arch/mn10300/include/asm/bitsperlong.h    |  1 +
 arch/parisc/include/asm/bitsperlong.h     | 20 +++++++++++++++++++
 arch/parisc/include/asm/types.h           |  8 --------
 arch/powerpc/include/asm/bitsperlong.h    | 12 ++++++++++++
 arch/powerpc/include/asm/types.h          |  9 ---------
 arch/s390/include/asm/bitsperlong.h       | 13 +++++++++++++
 arch/s390/include/asm/types.h             |  6 ------
 arch/sh/include/asm/bitsperlong.h         |  1 +
 arch/sparc/include/asm/bitsperlong.h      | 13 +++++++++++++
 arch/sparc/include/asm/types.h            |  4 ----
 arch/x86/include/asm/bitsperlong.h        | 13 +++++++++++++
 arch/x86/include/asm/types.h              |  6 ------
 arch/xtensa/include/asm/bitsperlong.h     |  1 +
 include/asm-generic/Kbuild                |  1 +
 include/asm-generic/Kbuild.asm            |  1 +
 include/asm-generic/bitsperlong.h         | 32 +++++++++++++++++++++++++++++++
 include/asm-generic/int-l64.h             |  2 ++
 include/asm-generic/int-ll64.h            |  2 ++
 33 files changed, 145 insertions(+), 46 deletions(-)
 create mode 100644 arch/alpha/include/asm/bitsperlong.h
 create mode 100644 arch/arm/include/asm/bitsperlong.h
 create mode 100644 arch/avr32/include/asm/bitsperlong.h
 create mode 100644 arch/blackfin/include/asm/bitsperlong.h
 create mode 100644 arch/cris/include/asm/bitsperlong.h
 create mode 100644 arch/frv/include/asm/bitsperlong.h
 create mode 100644 arch/h8300/include/asm/bitsperlong.h
 create mode 100644 arch/ia64/include/asm/bitsperlong.h
 create mode 100644 arch/m32r/include/asm/bitsperlong.h
 create mode 100644 arch/m68k/include/asm/bitsperlong.h
 create mode 100644 arch/microblaze/include/asm/bitsperlong.h
 create mode 100644 arch/mips/include/asm/bitsperlong.h
 create mode 100644 arch/mn10300/include/asm/bitsperlong.h
 create mode 100644 arch/parisc/include/asm/bitsperlong.h
 create mode 100644 arch/powerpc/include/asm/bitsperlong.h
 create mode 100644 arch/s390/include/asm/bitsperlong.h
 create mode 100644 arch/sh/include/asm/bitsperlong.h
 create mode 100644 arch/sparc/include/asm/bitsperlong.h
 create mode 100644 arch/x86/include/asm/bitsperlong.h
 create mode 100644 arch/xtensa/include/asm/bitsperlong.h
 create mode 100644 include/asm-generic/bitsperlong.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/bitsperlong.h b/arch/alpha/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..ad57f786820
--- /dev/null
+++ b/arch/alpha/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index f072f344497..bd621ecd1eb 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -25,9 +25,6 @@ typedef unsigned int umode_t;
  * These aren't exported outside the kernel to avoid name space clashes
  */
 #ifdef __KERNEL__
-
-#define BITS_PER_LONG 64
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma_addr_t;
diff --git a/arch/arm/include/asm/bitsperlong.h b/arch/arm/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/arm/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/avr32/include/asm/bitsperlong.h b/arch/avr32/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/avr32/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/blackfin/include/asm/bitsperlong.h b/arch/blackfin/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/blackfin/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/cris/include/asm/bitsperlong.h b/arch/cris/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/cris/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/frv/include/asm/bitsperlong.h b/arch/frv/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/frv/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/h8300/include/asm/bitsperlong.h b/arch/h8300/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/h8300/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/ia64/include/asm/bitsperlong.h b/arch/ia64/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..ec4db3c970b
--- /dev/null
+++ b/arch/ia64/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_IA64_BITSPERLONG_H
+#define __ASM_IA64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_IA64_BITSPERLONG_H */
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
index e36b3716e71..fbf1ed3b44c 100644
--- a/arch/ia64/include/asm/types.h
+++ b/arch/ia64/include/asm/types.h
@@ -19,10 +19,6 @@
 # define __IA64_UL(x)		(x)
 # define __IA64_UL_CONST(x)	x
 
-# ifdef __KERNEL__
-#  define BITS_PER_LONG 64
-# endif
-
 #else
 # define __IA64_UL(x)		((unsigned long)(x))
 # define __IA64_UL_CONST(x)	x##UL
@@ -34,10 +30,7 @@ typedef unsigned int umode_t;
  */
 # ifdef __KERNEL__
 
-#define BITS_PER_LONG 64
-
 /* DMA addresses are 64-bits wide, in general.  */
-
 typedef u64 dma_addr_t;
 
 # endif /* __KERNEL__ */
diff --git a/arch/m32r/include/asm/bitsperlong.h b/arch/m32r/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/m32r/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/m68k/include/asm/bitsperlong.h b/arch/m68k/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/m68k/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/microblaze/include/asm/bitsperlong.h b/arch/microblaze/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/microblaze/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/mips/include/asm/bitsperlong.h b/arch/mips/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..3e4c10a8e78
--- /dev/null
+++ b/arch/mips/include/asm/bitsperlong.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_MIPS_BITSPERLONG_H
+#define __ASM_MIPS_BITSPERLONG_H
+
+#define __BITS_PER_LONG _MIPS_SZLONG
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_MIPS_BITSPERLONG_H */
diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h
index 7956e69a3bd..544a2854598 100644
--- a/arch/mips/include/asm/types.h
+++ b/arch/mips/include/asm/types.h
@@ -31,9 +31,6 @@ typedef unsigned short umode_t;
  * These aren't exported outside the kernel to avoid name space clashes
  */
 #ifdef __KERNEL__
-
-#define BITS_PER_LONG _MIPS_SZLONG
-
 #ifndef __ASSEMBLY__
 
 #if (defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) \
diff --git a/arch/mn10300/include/asm/bitsperlong.h b/arch/mn10300/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/mn10300/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/parisc/include/asm/bitsperlong.h b/arch/parisc/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..75196b415d3
--- /dev/null
+++ b/arch/parisc/include/asm/bitsperlong.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_PARISC_BITSPERLONG_H
+#define __ASM_PARISC_BITSPERLONG_H
+
+/*
+ * using CONFIG_* outside of __KERNEL__ is wrong,
+ * __LP64__ was also removed from headers, so what
+ * is the right approach on parisc?
+ *	-arnd
+ */
+#if (defined(__KERNEL__) && defined(CONFIG_64BIT)) || defined (__LP64__)
+#define __BITS_PER_LONG 64
+#define SHIFT_PER_LONG 6
+#else
+#define __BITS_PER_LONG 32
+#define SHIFT_PER_LONG 5
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_PARISC_BITSPERLONG_H */
diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h
index 7f5a39bfb4c..20135cc8003 100644
--- a/arch/parisc/include/asm/types.h
+++ b/arch/parisc/include/asm/types.h
@@ -14,14 +14,6 @@ typedef unsigned short umode_t;
  */
 #ifdef __KERNEL__
 
-#ifdef CONFIG_64BIT
-#define BITS_PER_LONG 64
-#define SHIFT_PER_LONG 6
-#else
-#define BITS_PER_LONG 32
-#define SHIFT_PER_LONG 5
-#endif
-
 #ifndef __ASSEMBLY__
 
 /* Dma addresses are 32-bits wide.  */
diff --git a/arch/powerpc/include/asm/bitsperlong.h b/arch/powerpc/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..5f1659032c4
--- /dev/null
+++ b/arch/powerpc/include/asm/bitsperlong.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_POWERPC_BITSPERLONG_H
+#define __ASM_POWERPC_BITSPERLONG_H
+
+#if defined(__powerpc64__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_POWERPC_BITSPERLONG_H */
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index 7ce27a52bb3..a5aea0ca34e 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -40,15 +40,6 @@ typedef struct {
 #endif /* __ASSEMBLY__ */
 
 #ifdef __KERNEL__
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __powerpc64__
-#define BITS_PER_LONG 64
-#else
-#define BITS_PER_LONG 32
-#endif
-
 #ifndef __ASSEMBLY__
 
 typedef __vector128 vector128;
diff --git a/arch/s390/include/asm/bitsperlong.h b/arch/s390/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6b235aea9c6
--- /dev/null
+++ b/arch/s390/include/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
+
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 3dc3fc22881..04d6b95a89c 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -28,12 +28,6 @@ typedef __signed__ long saddr_t;
  */
 #ifdef __KERNEL__
 
-#ifndef __s390x__
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
diff --git a/arch/sh/include/asm/bitsperlong.h b/arch/sh/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/sh/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/arch/sparc/include/asm/bitsperlong.h b/arch/sparc/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..40dcaa3aaa5
--- /dev/null
+++ b/arch/sparc/include/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#if defined(__sparc__) && defined(__arch64__)
+#define __BITS_PER_LONG 64
+#else
+#define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
+
diff --git a/arch/sparc/include/asm/types.h b/arch/sparc/include/asm/types.h
index 2237118825d..de671d73bae 100644
--- a/arch/sparc/include/asm/types.h
+++ b/arch/sparc/include/asm/types.h
@@ -21,8 +21,6 @@ typedef unsigned short umode_t;
 
 #ifdef __KERNEL__
 
-#define BITS_PER_LONG 64
-
 #ifndef __ASSEMBLY__
 
 /* Dma addresses come in generic and 64-bit flavours.  */
@@ -46,8 +44,6 @@ typedef unsigned short umode_t;
 
 #ifdef __KERNEL__
 
-#define BITS_PER_LONG 32
-
 #ifndef __ASSEMBLY__
 
 typedef u32 dma_addr_t;
diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..b0ae1c4dc79
--- /dev/null
+++ b/arch/x86/include/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_X86_BITSPERLONG_H
+#define __ASM_X86_BITSPERLONG_H
+
+#ifdef __x86_64__
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_X86_BITSPERLONG_H */
+
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index e6f73632007..09b97745772 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -14,12 +14,6 @@ typedef unsigned short umode_t;
  */
 #ifdef __KERNEL__
 
-#ifdef CONFIG_X86_32
-# define BITS_PER_LONG 32
-#else
-# define BITS_PER_LONG 64
-#endif
-
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
diff --git a/arch/xtensa/include/asm/bitsperlong.h b/arch/xtensa/include/asm/bitsperlong.h
new file mode 100644
index 00000000000..6dc0bb0c13b
--- /dev/null
+++ b/arch/xtensa/include/asm/bitsperlong.h
@@ -0,0 +1 @@
+#include <asm-generic/bitsperlong.h>
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 460b08d51e2..cbb437875f5 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -1,3 +1,4 @@
+header-y += bitsperlong.h
 header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 70d185534b9..290910e4ede 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -9,6 +9,7 @@ unifdef-y += a.out.h
 endif
 unifdef-y += auxvec.h
 unifdef-y += byteorder.h
+unifdef-y += bitsperlong.h
 unifdef-y += errno.h
 unifdef-y += fcntl.h
 unifdef-y += ioctl.h
diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h
new file mode 100644
index 00000000000..4ae54e07de8
--- /dev/null
+++ b/include/asm-generic/bitsperlong.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_GENERIC_BITS_PER_LONG
+#define __ASM_GENERIC_BITS_PER_LONG
+
+/*
+ * There seems to be no way of detecting this automatically from user
+ * space, so 64 bit architectures should override this in their
+ * bitsperlong.h. In particular, an architecture that supports
+ * both 32 and 64 bit user space must not rely on CONFIG_64BIT
+ * to decide it, but rather check a compiler provided macro.
+ */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG 32
+#endif
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_64BIT
+#define BITS_PER_LONG 64
+#else
+#define BITS_PER_LONG 32
+#endif /* CONFIG_64BIT */
+
+/*
+ * FIXME: The check currently breaks x86-64 build, so it's
+ * temporarily disabled. Please fix x86-64 and reenable
+ */
+#if 0 && BITS_PER_LONG != __BITS_PER_LONG
+#error Inconsistent word size. Check asm/bitsperlong.h
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/asm-generic/int-l64.h b/include/asm-generic/int-l64.h
index 2af9b75d77d..1ca3efc976c 100644
--- a/include/asm-generic/int-l64.h
+++ b/include/asm-generic/int-l64.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_GENERIC_INT_L64_H
 #define _ASM_GENERIC_INT_L64_H
 
+#include <asm/bitsperlong.h>
+
 #ifndef __ASSEMBLY__
 /*
  * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index f9bc9ac29b3..f394147c073 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -8,6 +8,8 @@
 #ifndef _ASM_GENERIC_INT_LL64_H
 #define _ASM_GENERIC_INT_LL64_H
 
+#include <asm/bitsperlong.h>
+
 #ifndef __ASSEMBLY__
 /*
  * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
-- 
cgit v1.2.3-70-g09d2


From 2864d32be31a20a4617e37a857dd9915a57e2efb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:26 +0000
Subject: asm-generic: add generic sysv ipc headers

The ipc64 data structures were originally meant to
be architecture specific so that each architecture
could add their own optimizations for padding.

In the end, most of them just copied the x86 version,
and most got that wrong. UClibc expects the x86 anyway,
so we might just declare that the default and get
rid of the extra copies.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/Kbuild     |  5 ++++
 include/asm-generic/ipcbuf.h   | 34 ++++++++++++++++++++++++
 include/asm-generic/msgbuf.h   | 47 +++++++++++++++++++++++++++++++++
 include/asm-generic/sembuf.h   | 38 +++++++++++++++++++++++++++
 include/asm-generic/shmbuf.h   | 59 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-generic/shmparam.h |  6 +++++
 6 files changed, 189 insertions(+)
 create mode 100644 include/asm-generic/ipcbuf.h
 create mode 100644 include/asm-generic/msgbuf.h
 create mode 100644 include/asm-generic/sembuf.h
 create mode 100644 include/asm-generic/shmbuf.h
 create mode 100644 include/asm-generic/shmparam.h

(limited to 'include')

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index cbb437875f5..fd7a9c49df4 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -3,8 +3,13 @@ header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
 header-y += ioctl.h
+header-y += ipcbuf.h
 header-y += mman-common.h
+header-y += msgbuf.h
 header-y += poll.h
+header-y += sembuf.h
+header-y += shmbuf.h
+header-y += shmparam.h
 header-y += signal-defs.h
 header-y += statfs.h
 
diff --git a/include/asm-generic/ipcbuf.h b/include/asm-generic/ipcbuf.h
new file mode 100644
index 00000000000..76982b2a1b5
--- /dev/null
+++ b/include/asm-generic/ipcbuf.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_GENERIC_IPCBUF_H
+#define __ASM_GENERIC_IPCBUF_H
+
+/*
+ * The generic ipc64_perm structure:
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * ipc64_perm was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t on architectures that only had 16 bit
+ * - 32-bit seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm {
+	__kernel_key_t		key;
+	__kernel_uid32_t	uid;
+	__kernel_gid32_t	gid;
+	__kernel_uid32_t	cuid;
+	__kernel_gid32_t	cgid;
+	__kernel_mode_t		mode;
+				/* pad if mode_t is u16: */
+	unsigned char		__pad1[4 - sizeof(__kernel_mode_t)];
+	unsigned short		seq;
+	unsigned short		__pad2;
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+#endif /* __ASM_GENERIC_IPCBUF_H */
diff --git a/include/asm-generic/msgbuf.h b/include/asm-generic/msgbuf.h
new file mode 100644
index 00000000000..aec850d9159
--- /dev/null
+++ b/include/asm-generic/msgbuf.h
@@ -0,0 +1,47 @@
+#ifndef __ASM_GENERIC_MSGBUF_H
+#define __ASM_GENERIC_MSGBUF_H
+
+#include <asm/bitsperlong.h>
+/*
+ * generic msqid64_ds structure.
+ *
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * msqid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first three padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused1;
+#endif
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused2;
+#endif
+	__kernel_time_t msg_ctime;	/* last change time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused3;
+#endif
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+#endif /* __ASM_GENERIC_MSGBUF_H */
diff --git a/include/asm-generic/sembuf.h b/include/asm-generic/sembuf.h
new file mode 100644
index 00000000000..4cb2c13e509
--- /dev/null
+++ b/include/asm-generic/sembuf.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_GENERIC_SEMBUF_H
+#define __ASM_GENERIC_SEMBUF_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * The semid64_ds structure for x86 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * semid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first two padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+struct semid64_ds {
+	struct ipc64_perm sem_perm;	/* permissions .. see ipc.h */
+	__kernel_time_t	sem_otime;	/* last semop time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused1;
+#endif
+	__kernel_time_t	sem_ctime;	/* last change time */
+#if __BITS_PER_LONG != 64
+	unsigned long	__unused2;
+#endif
+	unsigned long	sem_nsems;	/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* __ASM_GENERIC_SEMBUF_H */
diff --git a/include/asm-generic/shmbuf.h b/include/asm-generic/shmbuf.h
new file mode 100644
index 00000000000..5768fa60ac8
--- /dev/null
+++ b/include/asm-generic/shmbuf.h
@@ -0,0 +1,59 @@
+#ifndef __ASM_GENERIC_SHMBUF_H
+#define __ASM_GENERIC_SHMBUF_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * The shmid64_ds structure for x86 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * shmid64_ds was originally meant to be architecture specific, but
+ * everyone just ended up making identical copies without specific
+ * optimizations, so we may just as well all use the same one.
+ *
+ * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * so they do not need the first two padding words.
+ * On big-endian systems, the padding is in the wrong place.
+ *
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+#if __BITS_PER_LONG != 64
+	unsigned long		__unused1;
+#endif
+	__kernel_time_t		shm_dtime;	/* last detach time */
+#if __BITS_PER_LONG != 64
+	unsigned long		__unused2;
+#endif
+	__kernel_time_t		shm_ctime;	/* last change time */
+#if __BITS_PER_LONG != 64
+	unsigned long		__unused3;
+#endif
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* __ASM_GENERIC_SHMBUF_H */
diff --git a/include/asm-generic/shmparam.h b/include/asm-generic/shmparam.h
new file mode 100644
index 00000000000..51a3852de73
--- /dev/null
+++ b/include/asm-generic/shmparam.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_GENERIC_SHMPARAM_H
+#define __ASM_GENERIC_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE	 /* attach addr a multiple of this */
+
+#endif /* _ASM_GENERIC_SHMPARAM_H */
-- 
cgit v1.2.3-70-g09d2


From 6103ec56c65c33774c7c38652c8204120c3c7519 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:27 +0000
Subject: asm-generic: add generic ABI headers

These header files are typically copied from an existing architecture
into any new one, slightly modified and then remain untouched until
the end of time in the name of ABI stability.

To make it easier for future architectures, provide a sane generic
version here. In cases where multiple architectures already use
identical code, I used the most common version. In cases like
stat.h that are more or less broken everywhere, I provide a
version that is meant to be ideal for new architectures.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/Kbuild        |  15 +++
 include/asm-generic/auxvec.h      |   8 ++
 include/asm-generic/ioctls.h      | 110 +++++++++++++++++++++
 include/asm-generic/mman.h        |  18 ++++
 include/asm-generic/param.h       |  24 +++++
 include/asm-generic/posix_types.h | 165 +++++++++++++++++++++++++++++++
 include/asm-generic/setup.h       |   6 ++
 include/asm-generic/signal.h      | 131 +++++++++++++++++++++++++
 include/asm-generic/socket.h      |  63 ++++++++++++
 include/asm-generic/sockios.h     |  13 +++
 include/asm-generic/stat.h        |  72 ++++++++++++++
 include/asm-generic/swab.h        |  18 ++++
 include/asm-generic/termbits.h    | 198 ++++++++++++++++++++++++++++++++++++++
 include/asm-generic/termios.h     | 154 +++++++++++++++++++++++++++++
 include/asm-generic/types.h       |  42 ++++++++
 include/asm-generic/ucontext.h    |  12 +++
 16 files changed, 1049 insertions(+)
 create mode 100644 include/asm-generic/auxvec.h
 create mode 100644 include/asm-generic/ioctls.h
 create mode 100644 include/asm-generic/mman.h
 create mode 100644 include/asm-generic/param.h
 create mode 100644 include/asm-generic/posix_types.h
 create mode 100644 include/asm-generic/setup.h
 create mode 100644 include/asm-generic/signal.h
 create mode 100644 include/asm-generic/socket.h
 create mode 100644 include/asm-generic/sockios.h
 create mode 100644 include/asm-generic/stat.h
 create mode 100644 include/asm-generic/swab.h
 create mode 100644 include/asm-generic/termbits.h
 create mode 100644 include/asm-generic/termios.h
 create mode 100644 include/asm-generic/types.h
 create mode 100644 include/asm-generic/ucontext.h

(limited to 'include')

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index fd7a9c49df4..11a78b8e2fc 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -1,17 +1,32 @@
+header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += errno-base.h
 header-y += errno.h
 header-y += fcntl.h
 header-y += ioctl.h
+header-y += ioctls.h
 header-y += ipcbuf.h
 header-y += mman-common.h
+header-y += mman.h
 header-y += msgbuf.h
+header-y += param.h
 header-y += poll.h
+header-y += posix_types.h
 header-y += sembuf.h
+header-y += setup.h
 header-y += shmbuf.h
 header-y += shmparam.h
 header-y += signal-defs.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
 header-y += statfs.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += ucontext.h
 
 unifdef-y += int-l64.h
 unifdef-y += int-ll64.h
diff --git a/include/asm-generic/auxvec.h b/include/asm-generic/auxvec.h
new file mode 100644
index 00000000000..b99573b0ad1
--- /dev/null
+++ b/include/asm-generic/auxvec.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_GENERIC_AUXVEC_H
+#define __ASM_GENERIC_AUXVEC_H
+/*
+ * Not all architectures need their own auxvec.h, the most
+ * common definitions are already in linux/auxvec.h.
+ */
+
+#endif /* __ASM_GENERIC_AUXVEC_H */
diff --git a/include/asm-generic/ioctls.h b/include/asm-generic/ioctls.h
new file mode 100644
index 00000000000..a799e20a769
--- /dev/null
+++ b/include/asm-generic/ioctls.h
@@ -0,0 +1,110 @@
+#ifndef __ASM_GENERIC_IOCTLS_H
+#define __ASM_GENERIC_IOCTLS_H
+
+#include <linux/ioctl.h>
+
+/*
+ * These are the most common definitions for tty ioctl numbers.
+ * Most of them do not use the recommended _IOC(), but there is
+ * probably some source code out there hardcoding the number,
+ * so we might as well use them for all new platforms.
+ *
+ * The architectures that use different values here typically
+ * try to be compatible with some Unix variants for the same
+ * architecture.
+ */
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS		0x5401
+#define TCSETS		0x5402
+#define TCSETSW		0x5403
+#define TCSETSF		0x5404
+#define TCGETA		0x5405
+#define TCSETA		0x5406
+#define TCSETAW		0x5407
+#define TCSETAF		0x5408
+#define TCSBRK		0x5409
+#define TCXONC		0x540A
+#define TCFLSH		0x540B
+#define TIOCEXCL	0x540C
+#define TIOCNXCL	0x540D
+#define TIOCSCTTY	0x540E
+#define TIOCGPGRP	0x540F
+#define TIOCSPGRP	0x5410
+#define TIOCOUTQ	0x5411
+#define TIOCSTI		0x5412
+#define TIOCGWINSZ	0x5413
+#define TIOCSWINSZ	0x5414
+#define TIOCMGET	0x5415
+#define TIOCMBIS	0x5416
+#define TIOCMBIC	0x5417
+#define TIOCMSET	0x5418
+#define TIOCGSOFTCAR	0x5419
+#define TIOCSSOFTCAR	0x541A
+#define FIONREAD	0x541B
+#define TIOCINQ		FIONREAD
+#define TIOCLINUX	0x541C
+#define TIOCCONS	0x541D
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TIOCPKT		0x5420
+#define FIONBIO		0x5421
+#define TIOCNOTTY	0x5422
+#define TIOCSETD	0x5423
+#define TIOCGETD	0x5424
+#define TCSBRKP		0x5425	/* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK	0x5427  /* BSD compatibility */
+#define TIOCCBRK	0x5428  /* BSD compatibility */
+#define TIOCGSID	0x5429  /* Return the session ID of FD */
+#define TCGETS2		_IOR('T', 0x2A, struct termios2)
+#define TCSETS2		_IOW('T', 0x2B, struct termios2)
+#define TCSETSW2	_IOW('T', 0x2C, struct termios2)
+#define TCSETSF2	_IOW('T', 0x2D, struct termios2)
+#define TIOCGRS485	0x542E
+#define TIOCSRS485	0x542F
+#define TIOCGPTN	_IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK	_IOW('T', 0x31, int)  /* Lock/unlock Pty */
+#define TCGETX		0x5432 /* SYS5 TCGETX compatibility */
+#define TCSETX		0x5433
+#define TCSETXF		0x5434
+#define TCSETXW		0x5435
+
+#define FIONCLEX	0x5450
+#define FIOCLEX		0x5451
+#define FIOASYNC	0x5452
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT	0x545C	/* wait for a change on serial input line(s) */
+#define TIOCGICOUNT	0x545D	/* read serial port inline interrupt counts */
+
+/*
+ * some architectures define FIOQSIZE as 0x545E, which is used for
+ * TIOCGHAYESESP on others
+ */
+#ifndef FIOQSIZE
+# define TIOCGHAYESESP	0x545E  /* Get Hayes ESP configuration */
+# define TIOCSHAYESESP	0x545F  /* Set Hayes ESP configuration */
+# define FIOQSIZE	0x5460
+#endif
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+
+#define TIOCSER_TEMT	0x01	/* Transmitter physically empty */
+
+#endif /* __ASM_GENERIC_IOCTLS_H */
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
new file mode 100644
index 00000000000..7cab4de2bca
--- /dev/null
+++ b/include/asm-generic/mman.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/include/asm-generic/param.h b/include/asm-generic/param.h
new file mode 100644
index 00000000000..cdf8251bfb6
--- /dev/null
+++ b/include/asm-generic/param.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_GENERIC_PARAM_H
+#define __ASM_GENERIC_PARAM_H
+
+#ifdef __KERNEL__
+# define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
+# define USER_HZ	100		/* some user interfaces are */
+# define CLOCKS_PER_SEC	(USER_HZ)       /* in "ticks" like times() */
+#endif
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#ifndef EXEC_PAGESIZE
+#define EXEC_PAGESIZE	4096
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP		(-1)
+#endif
+
+#define MAXHOSTNAMELEN	64	/* max length of hostname */
+
+#endif /* __ASM_GENERIC_PARAM_H */
diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h
new file mode 100644
index 00000000000..3dab00860e7
--- /dev/null
+++ b/include/asm-generic/posix_types.h
@@ -0,0 +1,165 @@
+#ifndef __ASM_GENERIC_POSIX_TYPES_H
+#define __ASM_GENERIC_POSIX_TYPES_H
+
+#include <asm/bitsperlong.h>
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.
+ *
+ * First the types that are often defined in different ways across
+ * architectures, so that you can override them.
+ */
+
+#ifndef __kernel_ino_t
+typedef unsigned long	__kernel_ino_t;
+#endif
+
+#ifndef __kernel_mode_t
+typedef unsigned int	__kernel_mode_t;
+#endif
+
+#ifndef __kernel_nlink_t
+typedef unsigned long	__kernel_nlink_t;
+#endif
+
+#ifndef __kernel_pid_t
+typedef int		__kernel_pid_t;
+#endif
+
+#ifndef __kernel_ipc_pid_t
+typedef int		__kernel_ipc_pid_t;
+#endif
+
+#ifndef __kernel_uid_t
+typedef unsigned int	__kernel_uid_t;
+typedef unsigned int	__kernel_gid_t;
+#endif
+
+#ifndef __kernel_suseconds_t
+typedef long		__kernel_suseconds_t;
+#endif
+
+#ifndef __kernel_daddr_t
+typedef int		__kernel_daddr_t;
+#endif
+
+#ifndef __kernel_uid32_t
+typedef __kernel_uid_t	__kernel_uid32_t;
+typedef __kernel_gid_t	__kernel_gid32_t;
+#endif
+
+#ifndef __kernel_old_uid_t
+typedef __kernel_uid_t	__kernel_old_uid_t;
+typedef __kernel_gid_t	__kernel_old_gid_t;
+#endif
+
+#ifndef __kernel_old_dev_t
+typedef unsigned int	__kernel_old_dev_t;
+#endif
+
+/*
+ * Most 32 bit architectures use "unsigned int" size_t,
+ * and all 64 bit architectures use "unsigned long" size_t.
+ */
+#ifndef __kernel_size_t
+#if __BITS_PER_LONG != 64
+typedef unsigned int	__kernel_size_t;
+typedef int		__kernel_ssize_t;
+typedef int		__kernel_ptrdiff_t;
+#else
+typedef unsigned long	__kernel_size_t;
+typedef long		__kernel_ssize_t;
+typedef long		__kernel_ptrdiff_t;
+#endif
+#endif
+
+/*
+ * anything below here should be completely generic
+ */
+typedef long		__kernel_off_t;
+typedef long long	__kernel_loff_t;
+typedef long		__kernel_time_t;
+typedef long		__kernel_clock_t;
+typedef int		__kernel_timer_t;
+typedef int		__kernel_clockid_t;
+typedef char *		__kernel_caddr_t;
+typedef unsigned short	__kernel_uid16_t;
+typedef unsigned short	__kernel_gid16_t;
+
+typedef struct {
+	int	val[2];
+} __kernel_fsid_t;
+
+#ifdef __KERNEL__
+
+#undef __FD_SET
+static inline void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
+}
+
+#undef __FD_CLR
+static inline void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	__fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
+}
+
+#undef __FD_ISSET
+static inline int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p)
+{
+	unsigned long __tmp = __fd / __NFDBITS;
+	unsigned long __rem = __fd % __NFDBITS;
+	return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant case (8 ints,
+ * for a 256-bit fd_set)
+ */
+#undef __FD_ZERO
+static inline void __FD_ZERO(__kernel_fd_set *__p)
+{
+	unsigned long *__tmp = __p->fds_bits;
+	int __i;
+
+	if (__builtin_constant_p(__FDSET_LONGS)) {
+		switch (__FDSET_LONGS) {
+		case 16:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			__tmp[ 8] = 0; __tmp[ 9] = 0;
+			__tmp[10] = 0; __tmp[11] = 0;
+			__tmp[12] = 0; __tmp[13] = 0;
+			__tmp[14] = 0; __tmp[15] = 0;
+			return;
+
+		case 8:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			__tmp[ 4] = 0; __tmp[ 5] = 0;
+			__tmp[ 6] = 0; __tmp[ 7] = 0;
+			return;
+
+		case 4:
+			__tmp[ 0] = 0; __tmp[ 1] = 0;
+			__tmp[ 2] = 0; __tmp[ 3] = 0;
+			return;
+		}
+	}
+	__i = __FDSET_LONGS;
+	while (__i) {
+		__i--;
+		*__tmp = 0;
+		__tmp++;
+	}
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_GENERIC_POSIX_TYPES_H */
diff --git a/include/asm-generic/setup.h b/include/asm-generic/setup.h
new file mode 100644
index 00000000000..6fc26a51003
--- /dev/null
+++ b/include/asm-generic/setup.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_GENERIC_SETUP_H
+#define __ASM_GENERIC_SETUP_H
+
+#define COMMAND_LINE_SIZE	512
+
+#endif	/* __ASM_GENERIC_SETUP_H */
diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h
new file mode 100644
index 00000000000..555c0aee8a4
--- /dev/null
+++ b/include/asm-generic/signal.h
@@ -0,0 +1,131 @@
+#ifndef __ASM_GENERIC_SIGNAL_H
+#define __ASM_GENERIC_SIGNAL_H
+
+#include <linux/types.h>
+
+#define _NSIG		64
+#define _NSIG_BPW	__BITS_PER_LONG
+#define _NSIG_WORDS	(_NSIG / _NSIG_BPW)
+
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+#define SIGBUS		 7
+#define SIGFPE		 8
+#define SIGKILL		 9
+#define SIGUSR1		10
+#define SIGSEGV		11
+#define SIGUSR2		12
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGSTKFLT	16
+#define SIGCHLD		17
+#define SIGCONT		18
+#define SIGSTOP		19
+#define SIGTSTP		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGURG		23
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGIO		29
+#define SIGPOLL		SIGIO
+/*
+#define SIGLOST		29
+*/
+#define SIGPWR		30
+#define SIGSYS		31
+#define	SIGUNUSED	31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN	32
+#ifndef SIGRTMAX
+#define SIGRTMAX	_NSIG
+#endif
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP	0x00000001
+#define SA_NOCLDWAIT	0x00000002
+#define SA_SIGINFO	0x00000004
+#define SA_ONSTACK	0x08000000
+#define SA_RESTART	0x10000000
+#define SA_NODEFER	0x40000000
+#define SA_RESETHAND	0x80000000
+
+#define SA_NOMASK	SA_NODEFER
+#define SA_ONESHOT	SA_RESETHAND
+
+/*
+ * New architectures should not define the obsolete
+ *	SA_RESTORER	0x04000000
+ */
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK	1
+#define SS_DISABLE	2
+
+#define MINSIGSTKSZ	2048
+#define SIGSTKSZ	8192
+
+#ifndef __ASSEMBLY__
+typedef struct {
+	unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+/* not actually used, but required for linux/syscalls.h */
+typedef unsigned long old_sigset_t;
+
+#include <asm-generic/signal-defs.h>
+
+struct sigaction {
+	__sighandler_t sa_handler;
+	unsigned long sa_flags;
+#ifdef SA_RESTORER
+	__sigrestore_t sa_restorer;
+#endif
+	sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+};
+
+typedef struct sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+
+#include <asm/sigcontext.h>
+#undef __HAVE_ARCH_SIG_BITOPS
+
+#define ptrace_signal_deliver(regs, cookie) do { } while (0)
+
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_GENERIC_SIGNAL_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
new file mode 100644
index 00000000000..5d79e409241
--- /dev/null
+++ b/include/asm-generic/socket.h
@@ -0,0 +1,63 @@
+#ifndef __ASM_GENERIC_SOCKET_H
+#define __ASM_GENERIC_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+/* To add :#define SO_REUSEPORT 15 */
+
+#ifndef SO_PASSCRED /* powerpc only differs in these */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+#endif
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/asm-generic/sockios.h b/include/asm-generic/sockios.h
new file mode 100644
index 00000000000..9a61a369b90
--- /dev/null
+++ b/include/asm-generic/sockios.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_GENERIC_SOCKIOS_H
+#define __ASM_GENERIC_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif /* __ASM_GENERIC_SOCKIOS_H */
diff --git a/include/asm-generic/stat.h b/include/asm-generic/stat.h
new file mode 100644
index 00000000000..47e64170305
--- /dev/null
+++ b/include/asm-generic/stat.h
@@ -0,0 +1,72 @@
+#ifndef __ASM_GENERIC_STAT_H
+#define __ASM_GENERIC_STAT_H
+
+/*
+ * Everybody gets this wrong and has to stick with it for all
+ * eternity. Hopefully, this version gets used by new architectures
+ * so they don't fall into the same traps.
+ *
+ * stat64 is copied from powerpc64, with explicit padding added.
+ * stat is the same structure layout on 64-bit, without the 'long long'
+ * types.
+ *
+ * By convention, 64 bit architectures use the stat interface, while
+ * 32 bit architectures use the stat64 interface. Note that we don't
+ * provide an __old_kernel_stat here, which new architecture should
+ * not have to start with.
+ */
+
+#include <asm/bitsperlong.h>
+
+#define STAT_HAVE_NSEC 1
+
+struct stat {
+	unsigned long	st_dev;		/* Device.  */
+	unsigned long	st_ino;		/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long	st_rdev;	/* Device number, if device.  */
+	unsigned long	__pad1;
+	long		st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long		st_blocks;	/* Number 512-byte blocks allocated. */
+	int		st_atime;	/* Time of last access.  */
+	unsigned int	st_atime_nsec;
+	int		st_mtime;	/* Time of last modification.  */
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;	/* Time of last status change.  */
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+
+#if __BITS_PER_LONG != 64
+/* This matches struct stat64 in glibc2.1. Only used for 32 bit. */
+struct stat64 {
+	unsigned long long st_dev;	/* Device.  */
+	unsigned long long st_ino;	/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long long st_rdev;	/* Device number, if device.  */
+	unsigned long long __pad1;
+	long long	st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long long	st_blocks;	/* Number 512-byte blocks allocated. */
+	int		st_atime;	/* Time of last access.  */
+	unsigned int	st_atime_nsec;
+	int		st_mtime;	/* Time of last modification.  */
+	unsigned int	st_mtime_nsec;
+	int		st_ctime;	/* Time of last status change.  */
+	unsigned int	st_ctime_nsec;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+#endif
+
+#endif /* __ASM_GENERIC_STAT_H */
diff --git a/include/asm-generic/swab.h b/include/asm-generic/swab.h
new file mode 100644
index 00000000000..a8e9029d9eb
--- /dev/null
+++ b/include/asm-generic/swab.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_GENERIC_SWAB_H
+#define _ASM_GENERIC_SWAB_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * 32 bit architectures typically (but not always) want to
+ * set __SWAB_64_THRU_32__. In user space, this is only
+ * valid if the compiler supports 64 bit data types.
+ */
+
+#if __BITS_PER_LONG == 32
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#define __SWAB_64_THRU_32__
+#endif
+#endif
+
+#endif /* _ASM_GENERIC_SWAB_H */
diff --git a/include/asm-generic/termbits.h b/include/asm-generic/termbits.h
new file mode 100644
index 00000000000..1c9773d48cb
--- /dev/null
+++ b/include/asm-generic/termbits.h
@@ -0,0 +1,198 @@
+#ifndef __ASM_GENERIC_TERMBITS_H
+#define __ASM_GENERIC_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char	cc_t;
+typedef unsigned int	speed_t;
+typedef unsigned int	tcflag_t;
+
+#define NCCS 19
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK	0000001
+#define BRKINT	0000002
+#define IGNPAR	0000004
+#define PARMRK	0000010
+#define INPCK	0000020
+#define ISTRIP	0000040
+#define INLCR	0000100
+#define IGNCR	0000200
+#define ICRNL	0000400
+#define IUCLC	0001000
+#define IXON	0002000
+#define IXANY	0004000
+#define IXOFF	0010000
+#define IMAXBEL	0020000
+#define IUTF8	0040000
+
+/* c_oflag bits */
+#define OPOST	0000001
+#define OLCUC	0000002
+#define ONLCR	0000004
+#define OCRNL	0000010
+#define ONOCR	0000020
+#define ONLRET	0000040
+#define OFILL	0000100
+#define OFDEL	0000200
+#define NLDLY	0000400
+#define   NL0	0000000
+#define   NL1	0000400
+#define CRDLY	0003000
+#define   CR0	0000000
+#define   CR1	0001000
+#define   CR2	0002000
+#define   CR3	0003000
+#define TABDLY	0014000
+#define   TAB0	0000000
+#define   TAB1	0004000
+#define   TAB2	0010000
+#define   TAB3	0014000
+#define   XTABS	0014000
+#define BSDLY	0020000
+#define   BS0	0000000
+#define   BS1	0020000
+#define VTDLY	0040000
+#define   VT0	0000000
+#define   VT1	0040000
+#define FFDLY	0100000
+#define   FF0	0000000
+#define   FF1	0100000
+
+/* c_cflag bit meaning */
+#define CBAUD	0010017
+#define  B0	0000000		/* hang up */
+#define  B50	0000001
+#define  B75	0000002
+#define  B110	0000003
+#define  B134	0000004
+#define  B150	0000005
+#define  B200	0000006
+#define  B300	0000007
+#define  B600	0000010
+#define  B1200	0000011
+#define  B1800	0000012
+#define  B2400	0000013
+#define  B4800	0000014
+#define  B9600	0000015
+#define  B19200	0000016
+#define  B38400	0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE	0000060
+#define   CS5	0000000
+#define   CS6	0000020
+#define   CS7	0000040
+#define   CS8	0000060
+#define CSTOPB	0000100
+#define CREAD	0000200
+#define PARENB	0000400
+#define PARODD	0001000
+#define HUPCL	0002000
+#define CLOCAL	0004000
+#define CBAUDEX 0010000
+#define    BOTHER 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD	  002003600000	/* input baud rate */
+#define CMSPAR	  010000000000	/* mark or space (stick) parity */
+#define CRTSCTS	  020000000000	/* flow control */
+
+#define IBSHIFT	  16		/* Shift from CBAUD to CIBAUD */
+
+/* c_lflag bits */
+#define ISIG	0000001
+#define ICANON	0000002
+#define XCASE	0000004
+#define ECHO	0000010
+#define ECHOE	0000020
+#define ECHOK	0000040
+#define ECHONL	0000100
+#define NOFLSH	0000200
+#define TOSTOP	0000400
+#define ECHOCTL	0001000
+#define ECHOPRT	0002000
+#define ECHOKE	0004000
+#define FLUSHO	0010000
+#define PENDIN	0040000
+#define IEXTEN	0100000
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* __ASM_GENERIC_TERMBITS_H */
diff --git a/include/asm-generic/termios.h b/include/asm-generic/termios.h
new file mode 100644
index 00000000000..d0922adc56d
--- /dev/null
+++ b/include/asm-generic/termios.h
@@ -0,0 +1,154 @@
+#ifndef _ASM_GENERIC_TERMIOS_H
+#define _ASM_GENERIC_TERMIOS_H
+/*
+ * Most architectures have straight copies of the x86 code, with
+ * varying levels of bug fixes on top. Usually it's a good idea
+ * to use this generic version instead, but be careful to avoid
+ * ABI changes.
+ * New architectures should not provide their own version.
+ */
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+#ifdef __KERNEL__
+
+#include <asm/uaccess.h>
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+static inline int user_termio_to_kernel_termios(struct ktermios *termios,
+						const struct termio __user *termio)
+{
+	unsigned short tmp;
+
+	if (get_user(tmp, &termio->c_iflag) < 0)
+		goto fault;
+	termios->c_iflag = (0xffff0000 & termios->c_iflag) | tmp;
+
+	if (get_user(tmp, &termio->c_oflag) < 0)
+		goto fault;
+	termios->c_oflag = (0xffff0000 & termios->c_oflag) | tmp;
+
+	if (get_user(tmp, &termio->c_cflag) < 0)
+		goto fault;
+	termios->c_cflag = (0xffff0000 & termios->c_cflag) | tmp;
+
+	if (get_user(tmp, &termio->c_lflag) < 0)
+		goto fault;
+	termios->c_lflag = (0xffff0000 & termios->c_lflag) | tmp;
+
+	if (get_user(termios->c_line, &termio->c_line) < 0)
+		goto fault;
+
+	if (copy_from_user(termios->c_cc, termio->c_cc, NCC) != 0)
+		goto fault;
+
+	return 0;
+
+ fault:
+	return -EFAULT;
+}
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+static inline int kernel_termios_to_user_termio(struct termio __user *termio,
+						struct ktermios *termios)
+{
+	if (put_user(termios->c_iflag, &termio->c_iflag) < 0 ||
+	    put_user(termios->c_oflag, &termio->c_oflag) < 0 ||
+	    put_user(termios->c_cflag, &termio->c_cflag) < 0 ||
+	    put_user(termios->c_lflag, &termio->c_lflag) < 0 ||
+	    put_user(termios->c_line,  &termio->c_line) < 0 ||
+	    copy_to_user(termio->c_cc, termios->c_cc, NCC) != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+#ifdef TCGETS2
+static inline int user_termios_to_kernel_termios(struct ktermios *k,
+						 struct termios2 __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios2));
+}
+
+static inline int kernel_termios_to_user_termios(struct termios2 __user *u,
+						 struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios2));
+}
+
+static inline int user_termios_to_kernel_termios_1(struct ktermios *k,
+						   struct termios __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios));
+}
+
+static inline int kernel_termios_to_user_termios_1(struct termios __user *u,
+						   struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios));
+}
+#else /* TCGETS2 */
+static inline int user_termios_to_kernel_termios(struct ktermios *k,
+						 struct termios __user *u)
+{
+	return copy_from_user(k, u, sizeof(struct termios));
+}
+
+static inline int kernel_termios_to_user_termios(struct termios __user *u,
+						 struct ktermios *k)
+{
+	return copy_to_user(u, k, sizeof(struct termios));
+}
+#endif /* TCGETS2 */
+
+#endif	/* __KERNEL__ */
+
+#endif /* _ASM_GENERIC_TERMIOS_H */
diff --git a/include/asm-generic/types.h b/include/asm-generic/types.h
new file mode 100644
index 00000000000..fba7d33ca3f
--- /dev/null
+++ b/include/asm-generic/types.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_GENERIC_TYPES_H
+#define _ASM_GENERIC_TYPES_H
+/*
+ * int-ll64 is used practically everywhere now,
+ * so use it as a reasonable default.
+ */
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned short umode_t;
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+/*
+ * DMA addresses may be very different from physical addresses
+ * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit
+ * systems, while sparc64 uses 32 bit DMA addresses for 64 bit
+ * physical addresses.
+ * This default defines dma_addr_t to have the same size as
+ * phys_addr_t, which is the most common way.
+ * Do not define the dma64_addr_t type, which never really
+ * worked.
+ */
+#ifndef dma_addr_t
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif /* CONFIG_PHYS_ADDR_T_64BIT */
+#endif /* dma_addr_t */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_GENERIC_TYPES_H */
diff --git a/include/asm-generic/ucontext.h b/include/asm-generic/ucontext.h
new file mode 100644
index 00000000000..ad77343e8a9
--- /dev/null
+++ b/include/asm-generic/ucontext.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_UCONTEXT_H
+#define __ASM_GENERIC_UCONTEXT_H
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	struct sigcontext uc_mcontext;
+	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+};
+
+#endif /* __ASM_GENERIC_UCONTEXT_H */
-- 
cgit v1.2.3-70-g09d2


From e64a1617eca39d62b248a11699de9c1195369661 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:28 +0000
Subject: asm-generic: add a generic unistd.h

A new architecture should only define a minimal set of system
calls while still providing the full functionality. This version
of unistd.h has gone through intensive review to make sure that
by default it only enables syscalls that do not already have
a more featureful replacement.

It is modeled after the x86-64 version of unistd.h, which unifies
the syscall number definition and the actual system call table
in a single file, in order to keep them synchronized much more
easily.

This first version still keeps legacy system call definitions
around, guarded by various #ifdefs, and with numbers larger
than 1024. The idea behind this is to make it easier for
new architectures to transition from a full list to the reduced
set. In particular, the new microblaze architecture that should
migrate to using the generic ABI headers can at least use an
existing uClibc source tree without major rewrites during the
conversion.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/Kbuild   |   1 +
 include/asm-generic/unistd.h | 854 +++++++++++++++++++++++++++++++++++++++++++
 scripts/checksyscalls.sh     |  92 ++++-
 3 files changed, 944 insertions(+), 3 deletions(-)
 create mode 100644 include/asm-generic/unistd.h

(limited to 'include')

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 11a78b8e2fc..eb62334cda2 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -27,6 +27,7 @@ header-y += termbits.h
 header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
+header-y += unistd.h
 
 unifdef-y += int-l64.h
 unifdef-y += int-ll64.h
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
new file mode 100644
index 00000000000..5b34b6233d6
--- /dev/null
+++ b/include/asm-generic/unistd.h
@@ -0,0 +1,854 @@
+#if !defined(_ASM_GENERIC_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_GENERIC_UNISTD_H
+
+#include <asm/bitsperlong.h>
+
+/*
+ * This file contains the system call numbers, based on the
+ * layout of the x86-64 architecture, which embeds the
+ * pointer to the syscall in the table.
+ *
+ * As a basic principle, no duplication of functionality
+ * should be added, e.g. we don't use lseek when llseek
+ * is present. New architectures should use this file
+ * and implement the less feature-full calls in user space.
+ */
+
+#ifndef __SYSCALL
+#define __SYSCALL(x, y)
+#endif
+
+#if __BITS_PER_LONG == 32
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32)
+#else
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
+#endif
+
+#define __NR_io_setup 0
+__SYSCALL(__NR_io_setup, sys_io_setup)
+#define __NR_io_destroy 1
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_submit 2
+__SYSCALL(__NR_io_submit, sys_io_submit)
+#define __NR_io_cancel 3
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_io_getevents 4
+__SYSCALL(__NR_io_getevents, sys_io_getevents)
+
+/* fs/xattr.c */
+#define __NR_setxattr 5
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 6
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 7
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 8
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 9
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 10
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 11
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 12
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 13
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 14
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 15
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 16
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+
+/* fs/dcache.c */
+#define __NR_getcwd 17
+__SYSCALL(__NR_getcwd, sys_getcwd)
+
+/* fs/cookies.c */
+#define __NR_lookup_dcookie 18
+__SYSCALL(__NR_lookup_dcookie, sys_lookup_dcookie)
+
+/* fs/eventfd.c */
+#define __NR_eventfd2 19
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+
+/* fs/eventpoll.c */
+#define __NR_epoll_create1 20
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_epoll_ctl 21
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_pwait 22
+__SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
+
+/* fs/fcntl.c */
+#define __NR_dup 23
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_dup3 24
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR3264_fcntl 25
+__SC_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl)
+
+/* fs/inotify_user.c */
+#define __NR_inotify_init1 26
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_inotify_add_watch 27
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 28
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+
+/* fs/ioctl.c */
+#define __NR_ioctl 29
+__SYSCALL(__NR_ioctl, sys_ioctl)
+
+/* fs/ioprio.c */
+#define __NR_ioprio_set 30
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 31
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+
+/* fs/locks.c */
+#define __NR_flock 32
+__SYSCALL(__NR_flock, sys_flock)
+
+/* fs/namei.c */
+#define __NR_mknodat 33
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_mkdirat 34
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_unlinkat 35
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_symlinkat 36
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_linkat 37
+__SYSCALL(__NR_linkat, sys_linkat)
+#define __NR_renameat 38
+__SYSCALL(__NR_renameat, sys_renameat)
+
+/* fs/namespace.c */
+#define __NR_umount2 39
+__SYSCALL(__NR_umount2, sys_umount)
+#define __NR_mount 40
+__SYSCALL(__NR_mount, sys_mount)
+#define __NR_pivot_root 41
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+
+/* fs/nfsctl.c */
+#define __NR_nfsservctl 42
+__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+
+/* fs/open.c */
+#define __NR3264_statfs 43
+__SC_3264(__NR3264_statfs, sys_statfs64, sys_statfs)
+#define __NR3264_fstatfs 44
+__SC_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs)
+#define __NR3264_truncate 45
+__SC_3264(__NR3264_truncate, sys_truncate64, sys_truncate)
+#define __NR3264_ftruncate 46
+__SC_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate)
+
+#define __NR_fallocate 47
+__SYSCALL(__NR_fallocate, sys_fallocate)
+#define __NR_faccessat 48
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_chdir 49
+__SYSCALL(__NR_chdir, sys_chdir)
+#define __NR_fchdir 50
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_chroot 51
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_fchmod 52
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchmodat 53
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_fchownat 54
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_fchown 55
+__SYSCALL(__NR_fchown, sys_fchown)
+#define __NR_openat 56
+__SYSCALL(__NR_openat, sys_openat)
+#define __NR_close 57
+__SYSCALL(__NR_close, sys_close)
+#define __NR_vhangup 58
+__SYSCALL(__NR_vhangup, sys_vhangup)
+
+/* fs/pipe.c */
+#define __NR_pipe2 59
+__SYSCALL(__NR_pipe2, sys_pipe2)
+
+/* fs/quota.c */
+#define __NR_quotactl 60
+__SYSCALL(__NR_quotactl, sys_quotactl)
+
+/* fs/readdir.c */
+#define __NR_getdents64 61
+__SYSCALL(__NR_getdents64, sys_getdents64)
+
+/* fs/read_write.c */
+#define __NR3264_lseek 62
+__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
+#define __NR_read 63
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 64
+__SYSCALL(__NR_write, sys_write)
+#define __NR_readv 65
+__SYSCALL(__NR_readv, sys_readv)
+#define __NR_writev 66
+__SYSCALL(__NR_writev, sys_writev)
+#define __NR_pread64 67
+__SYSCALL(__NR_pread64, sys_pread64)
+#define __NR_pwrite64 68
+__SYSCALL(__NR_pwrite64, sys_pwrite64)
+#define __NR_preadv 69
+__SYSCALL(__NR_preadv, sys_preadv)
+#define __NR_pwritev 70
+__SYSCALL(__NR_pwritev, sys_pwritev)
+
+/* fs/sendfile.c */
+#define __NR3264_sendfile 71
+__SC_3264(__NR3264_sendfile, sys_sendfile64, sys_sendfile)
+
+/* fs/select.c */
+#define __NR_pselect6 72
+__SYSCALL(__NR_pselect6, sys_pselect6)
+#define __NR_ppoll 73
+__SYSCALL(__NR_ppoll, sys_ppoll)
+
+/* fs/signalfd.c */
+#define __NR_signalfd4 74
+__SYSCALL(__NR_signalfd4, sys_signalfd4)
+
+/* fs/splice.c */
+#define __NR_vmsplice 75
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
+#define __NR_splice 76
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_tee 77
+__SYSCALL(__NR_tee, sys_tee)
+
+/* fs/stat.c */
+#define __NR_readlinkat 78
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#define __NR3264_fstatat 79
+__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
+#define __NR3264_fstat 80
+__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+
+/* fs/sync.c */
+#define __NR_sync 81
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_fsync 82
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_fdatasync 83
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#define __NR_sync_file_range 84
+__SYSCALL(__NR_sync_file_range, sys_sync_file_range) /* .long sys_sync_file_range2, */
+
+/* fs/timerfd.c */
+#define __NR_timerfd_create 85
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_timerfd_settime 86
+__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
+#define __NR_timerfd_gettime 87
+__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
+
+/* fs/utimes.c */
+#define __NR_utimensat 88
+__SYSCALL(__NR_utimensat, sys_utimensat)
+
+/* kernel/acct.c */
+#define __NR_acct 89
+__SYSCALL(__NR_acct, sys_acct)
+
+/* kernel/capability.c */
+#define __NR_capget 90
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 91
+__SYSCALL(__NR_capset, sys_capset)
+
+/* kernel/exec_domain.c */
+#define __NR_personality 92
+__SYSCALL(__NR_personality, sys_personality)
+
+/* kernel/exit.c */
+#define __NR_exit 93
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_exit_group 94
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_waitid 95
+__SYSCALL(__NR_waitid, sys_waitid)
+
+/* kernel/fork.c */
+#define __NR_set_tid_address 96
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_unshare 97
+__SYSCALL(__NR_unshare, sys_unshare)
+
+/* kernel/futex.c */
+#define __NR_futex 98
+__SYSCALL(__NR_futex, sys_futex)
+#define __NR_set_robust_list 99
+__SYSCALL(__NR_set_robust_list, sys_set_robust_list)
+#define __NR_get_robust_list 100
+__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
+
+/* kernel/hrtimer.c */
+#define __NR_nanosleep 101
+__SYSCALL(__NR_nanosleep, sys_nanosleep)
+
+/* kernel/itimer.c */
+#define __NR_getitimer 102
+__SYSCALL(__NR_getitimer, sys_getitimer)
+#define __NR_setitimer 103
+__SYSCALL(__NR_setitimer, sys_setitimer)
+
+/* kernel/kexec.c */
+#define __NR_kexec_load 104
+__SYSCALL(__NR_kexec_load, sys_kexec_load)
+
+/* kernel/module.c */
+#define __NR_init_module 105
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 106
+__SYSCALL(__NR_delete_module, sys_delete_module)
+
+/* kernel/posix-timers.c */
+#define __NR_timer_create 107
+__SYSCALL(__NR_timer_create, sys_timer_create)
+#define __NR_timer_gettime 108
+__SYSCALL(__NR_timer_gettime, sys_timer_gettime)
+#define __NR_timer_getoverrun 109
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_settime 110
+__SYSCALL(__NR_timer_settime, sys_timer_settime)
+#define __NR_timer_delete 111
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 112
+__SYSCALL(__NR_clock_settime, sys_clock_settime)
+#define __NR_clock_gettime 113
+__SYSCALL(__NR_clock_gettime, sys_clock_gettime)
+#define __NR_clock_getres 114
+__SYSCALL(__NR_clock_getres, sys_clock_getres)
+#define __NR_clock_nanosleep 115
+__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
+
+/* kernel/printk.c */
+#define __NR_syslog 116
+__SYSCALL(__NR_syslog, sys_syslog)
+
+/* kernel/ptrace.c */
+#define __NR_ptrace 117
+__SYSCALL(__NR_ptrace, sys_ptrace)
+
+/* kernel/sched.c */
+#define __NR_sched_setparam 118
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_setscheduler 119
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 120
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_getparam 121
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setaffinity 122
+__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
+#define __NR_sched_getaffinity 123
+__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
+#define __NR_sched_yield 124
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 125
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 126
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 127
+__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
+
+/* kernel/signal.c */
+#define __NR_restart_syscall 128
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_kill 129
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_tkill 130
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_tgkill 131
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_sigaltstack 132
+__SYSCALL(__NR_sigaltstack, sys_sigaltstack)
+#define __NR_rt_sigsuspend 133
+__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend) /* __ARCH_WANT_SYS_RT_SIGSUSPEND */
+#define __NR_rt_sigaction 134
+__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction) /* __ARCH_WANT_SYS_RT_SIGACTION */
+#define __NR_rt_sigprocmask 135
+__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
+#define __NR_rt_sigpending 136
+__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 137
+__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 138
+__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
+#define __NR_rt_sigreturn 139
+__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn) /* sys_rt_sigreturn_wrapper, */
+
+/* kernel/sys.c */
+#define __NR_setpriority 140
+__SYSCALL(__NR_setpriority, sys_setpriority)
+#define __NR_getpriority 141
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_reboot 142
+__SYSCALL(__NR_reboot, sys_reboot)
+#define __NR_setregid 143
+__SYSCALL(__NR_setregid, sys_setregid)
+#define __NR_setgid 144
+__SYSCALL(__NR_setgid, sys_setgid)
+#define __NR_setreuid 145
+__SYSCALL(__NR_setreuid, sys_setreuid)
+#define __NR_setuid 146
+__SYSCALL(__NR_setuid, sys_setuid)
+#define __NR_setresuid 147
+__SYSCALL(__NR_setresuid, sys_setresuid)
+#define __NR_getresuid 148
+__SYSCALL(__NR_getresuid, sys_getresuid)
+#define __NR_setresgid 149
+__SYSCALL(__NR_setresgid, sys_setresgid)
+#define __NR_getresgid 150
+__SYSCALL(__NR_getresgid, sys_getresgid)
+#define __NR_setfsuid 151
+__SYSCALL(__NR_setfsuid, sys_setfsuid)
+#define __NR_setfsgid 152
+__SYSCALL(__NR_setfsgid, sys_setfsgid)
+#define __NR_times 153
+__SYSCALL(__NR_times, sys_times)
+#define __NR_setpgid 154
+__SYSCALL(__NR_setpgid, sys_setpgid)
+#define __NR_getpgid 155
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_getsid 156
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_setsid 157
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_getgroups 158
+__SYSCALL(__NR_getgroups, sys_getgroups)
+#define __NR_setgroups 159
+__SYSCALL(__NR_setgroups, sys_setgroups)
+#define __NR_uname 160
+__SYSCALL(__NR_uname, sys_newuname)
+#define __NR_sethostname 161
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setdomainname 162
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_getrlimit 163
+__SYSCALL(__NR_getrlimit, sys_getrlimit)
+#define __NR_setrlimit 164
+__SYSCALL(__NR_setrlimit, sys_setrlimit)
+#define __NR_getrusage 165
+__SYSCALL(__NR_getrusage, sys_getrusage)
+#define __NR_umask 166
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_prctl 167
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_getcpu 168
+__SYSCALL(__NR_getcpu, sys_getcpu)
+
+/* kernel/time.c */
+#define __NR_gettimeofday 169
+__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
+#define __NR_settimeofday 170
+__SYSCALL(__NR_settimeofday, sys_settimeofday)
+#define __NR_adjtimex 171
+__SYSCALL(__NR_adjtimex, sys_adjtimex)
+
+/* kernel/timer.c */
+#define __NR_getpid 172
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_getppid 173
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getuid 174
+__SYSCALL(__NR_getuid, sys_getuid)
+#define __NR_geteuid 175
+__SYSCALL(__NR_geteuid, sys_geteuid)
+#define __NR_getgid 176
+__SYSCALL(__NR_getgid, sys_getgid)
+#define __NR_getegid 177
+__SYSCALL(__NR_getegid, sys_getegid)
+#define __NR_gettid 178
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_sysinfo 179
+__SYSCALL(__NR_sysinfo, sys_sysinfo)
+
+/* ipc/mqueue.c */
+#define __NR_mq_open 180
+__SYSCALL(__NR_mq_open, sys_mq_open)
+#define __NR_mq_unlink 181
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 182
+__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend)
+#define __NR_mq_timedreceive 183
+__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive)
+#define __NR_mq_notify 184
+__SYSCALL(__NR_mq_notify, sys_mq_notify)
+#define __NR_mq_getsetattr 185
+__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
+
+/* ipc/msg.c */
+#define __NR_msgget 186
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 187
+__SYSCALL(__NR_msgctl, sys_msgctl)
+#define __NR_msgrcv 188
+__SYSCALL(__NR_msgrcv, sys_msgrcv)
+#define __NR_msgsnd 189
+__SYSCALL(__NR_msgsnd, sys_msgsnd)
+
+/* ipc/sem.c */
+#define __NR_semget 190
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 191
+__SYSCALL(__NR_semctl, sys_semctl)
+#define __NR_semtimedop 192
+__SYSCALL(__NR_semtimedop, sys_semtimedop)
+#define __NR_semop 193
+__SYSCALL(__NR_semop, sys_semop)
+
+/* ipc/shm.c */
+#define __NR_shmget 194
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 195
+__SYSCALL(__NR_shmctl, sys_shmctl)
+#define __NR_shmat 196
+__SYSCALL(__NR_shmat, sys_shmat)
+#define __NR_shmdt 197
+__SYSCALL(__NR_shmdt, sys_shmdt)
+
+/* net/socket.c */
+#define __NR_socket 198
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_socketpair 199
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_bind 200
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_listen 201
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 202
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_connect 203
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_getsockname 204
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 205
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_sendto 206
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recvfrom 207
+__SYSCALL(__NR_recvfrom, sys_recvfrom)
+#define __NR_setsockopt 208
+__SYSCALL(__NR_setsockopt, sys_setsockopt)
+#define __NR_getsockopt 209
+__SYSCALL(__NR_getsockopt, sys_getsockopt)
+#define __NR_shutdown 210
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_sendmsg 211
+__SYSCALL(__NR_sendmsg, sys_sendmsg)
+#define __NR_recvmsg 212
+__SYSCALL(__NR_recvmsg, sys_recvmsg)
+
+/* mm/filemap.c */
+#define __NR_readahead 213
+__SYSCALL(__NR_readahead, sys_readahead)
+
+/* mm/nommu.c, also with MMU */
+#define __NR_brk 214
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_munmap 215
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_mremap 216
+__SYSCALL(__NR_mremap, sys_mremap)
+
+/* security/keys/keyctl.c */
+#define __NR_add_key 217
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 218
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 219
+__SYSCALL(__NR_keyctl, sys_keyctl)
+
+/* arch/example/kernel/sys_example.c */
+#define __NR_clone 220
+__SYSCALL(__NR_clone, sys_clone)	/* .long sys_clone_wrapper */
+#define __NR_execve 221
+__SYSCALL(__NR_execve, sys_execve)	/* .long sys_execve_wrapper */
+
+#define __NR3264_mmap 222
+__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
+/* mm/fadvise.c */
+#define __NR3264_fadvise64 223
+__SC_3264(__NR3264_fadvise64, sys_fadvise64_64, sys_fadvise64)
+
+/* mm/, CONFIG_MMU only */
+#ifndef __ARCH_NOMMU
+#define __NR_swapon 224
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_swapoff 225
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_mprotect 226
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_msync 227
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_mlock 228
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 229
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 230
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 231
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_mincore 232
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 233
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_remap_file_pages 234
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+#define __NR_mbind 235
+__SYSCALL(__NR_mbind, sys_mbind)
+#define __NR_get_mempolicy 236
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
+#define __NR_set_mempolicy 237
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
+#define __NR_migrate_pages 238
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
+#define __NR_move_pages 239
+__SYSCALL(__NR_move_pages, sys_move_pages)
+#endif
+
+#undef __NR_syscalls
+#define __NR_syscalls 240
+
+/*
+ * All syscalls below here should go away really,
+ * these are provided for both review and as a porting
+ * help for the C library version.
+*
+ * Last chance: are any of these important enought to
+ * enable by default?
+ */
+#ifdef __ARCH_WANT_SYSCALL_NO_AT
+#define __NR_open 1024
+__SYSCALL(__NR_open, sys_open)
+#define __NR_link 1025
+__SYSCALL(__NR_link, sys_link)
+#define __NR_unlink 1026
+__SYSCALL(__NR_unlink, sys_unlink)
+#define __NR_mknod 1027
+__SYSCALL(__NR_mknod, sys_mknod)
+#define __NR_chmod 1028
+__SYSCALL(__NR_chmod, sys_chmod)
+#define __NR_chown 1029
+__SYSCALL(__NR_chown, sys_chown)
+#define __NR_mkdir 1030
+__SYSCALL(__NR_mkdir, sys_mkdir)
+#define __NR_rmdir 1031
+__SYSCALL(__NR_rmdir, sys_rmdir)
+#define __NR_lchown 1032
+__SYSCALL(__NR_lchown, sys_lchown)
+#define __NR_access 1033
+__SYSCALL(__NR_access, sys_access)
+#define __NR_rename 1034
+__SYSCALL(__NR_rename, sys_rename)
+#define __NR_readlink 1035
+__SYSCALL(__NR_readlink, sys_readlink)
+#define __NR_symlink 1036
+__SYSCALL(__NR_symlink, sys_symlink)
+#define __NR_utimes 1037
+__SYSCALL(__NR_utimes, sys_utimes)
+#define __NR3264_stat 1038
+__SC_3264(__NR3264_stat, sys_stat64, sys_newstat)
+#define __NR3264_lstat 1039
+__SC_3264(__NR3264_lstat, sys_lstat64, sys_newlstat)
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR3264_lstat+1)
+#endif /* __ARCH_WANT_SYSCALL_NO_AT */
+
+#ifdef __ARCH_WANT_SYSCALL_NO_FLAGS
+#define __NR_pipe 1040
+__SYSCALL(__NR_pipe, sys_pipe)
+#define __NR_dup2 1041
+__SYSCALL(__NR_dup2, sys_dup2)
+#define __NR_epoll_create 1042
+__SYSCALL(__NR_epoll_create, sys_epoll_create)
+#define __NR_inotify_init 1043
+__SYSCALL(__NR_inotify_init, sys_inotify_init)
+#define __NR_eventfd 1044
+__SYSCALL(__NR_eventfd, sys_eventfd)
+#define __NR_signalfd 1045
+__SYSCALL(__NR_signalfd, sys_signalfd)
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR_signalfd+1)
+#endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */
+
+#if __BITS_PER_LONG == 32 && defined(__ARCH_WANT_SYSCALL_OFF_T)
+#define __NR_sendfile 1046
+__SYSCALL(__NR_sendfile, sys_sendfile)
+#define __NR_ftruncate 1047
+__SYSCALL(__NR_ftruncate, sys_ftruncate)
+#define __NR_truncate 1048
+__SYSCALL(__NR_truncate, sys_truncate)
+#define __NR_stat 1049
+__SYSCALL(__NR_stat, sys_newstat)
+#define __NR_lstat 1050
+__SYSCALL(__NR_lstat, sys_newlstat)
+#define __NR_fstat 1051
+__SYSCALL(__NR_fstat, sys_newfstat)
+#define __NR_fcntl 1052
+__SYSCALL(__NR_fcntl, sys_fcntl)
+#define __NR_fadvise64 1053
+#define __ARCH_WANT_SYS_FADVISE64
+__SYSCALL(__NR_fadvise64, sys_fadvise64)
+#define __NR_newfstatat 1054
+#define __ARCH_WANT_SYS_NEWFSTATAT
+__SYSCALL(__NR_newfstatat, sys_newfstatat)
+#define __NR_fstatfs 1055
+__SYSCALL(__NR_fstatfs, sys_fstatfs)
+#define __NR_statfs 1056
+__SYSCALL(__NR_statfs, sys_statfs)
+#define __NR_lseek 1057
+__SYSCALL(__NR_lseek, sys_lseek)
+#define __NR_mmap 1058
+__SYSCALL(__NR_mmap, sys_mmap)
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR_mmap+1)
+#endif /* 32 bit off_t syscalls */
+
+#ifdef __ARCH_WANT_SYSCALL_DEPRECATED
+#define __NR_alarm 1059
+#define __ARCH_WANT_SYS_ALARM
+__SYSCALL(__NR_alarm, sys_alarm)
+#define __NR_getpgrp 1060
+#define __ARCH_WANT_SYS_GETPGRP
+__SYSCALL(__NR_getpgrp, sys_getpgrp)
+#define __NR_pause 1061
+#define __ARCH_WANT_SYS_PAUSE
+__SYSCALL(__NR_pause, sys_pause)
+#define __NR_time 1062
+#define __ARCH_WANT_SYS_TIME
+__SYSCALL(__NR_time, sys_time)
+#define __NR_utime 1063
+#define __ARCH_WANT_SYS_UTIME
+__SYSCALL(__NR_utime, sys_utime)
+
+#define __NR_creat 1064
+__SYSCALL(__NR_creat, sys_creat)
+#define __NR_getdents 1065
+#define __ARCH_WANT_SYS_GETDENTS
+__SYSCALL(__NR_getdents, sys_getdents)
+#define __NR_futimesat 1066
+__SYSCALL(__NR_futimesat, sys_futimesat)
+#define __NR_select 1067
+#define __ARCH_WANT_SYS_SELECT
+__SYSCALL(__NR_select, sys_select)
+#define __NR_poll 1068
+__SYSCALL(__NR_poll, sys_poll)
+#define __NR_epoll_wait 1069
+__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
+#define __NR_ustat 1070
+__SYSCALL(__NR_ustat, sys_ustat)
+#define __NR_vfork 1071
+__SYSCALL(__NR_vfork, sys_vfork)
+#define __NR_wait4 1072
+__SYSCALL(__NR_wait4, sys_wait4)
+#define __NR_recv 1073
+__SYSCALL(__NR_recv, sys_recv)
+#define __NR_send 1074
+__SYSCALL(__NR_send, sys_send)
+#define __NR_bdflush 1075
+__SYSCALL(__NR_bdflush, sys_bdflush)
+#define __NR_umount 1076
+__SYSCALL(__NR_umount, sys_oldumount)
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __NR_uselib 1077
+__SYSCALL(__NR_uselib, sys_uselib)
+#define __NR__sysctl 1078
+__SYSCALL(__NR__sysctl, sys_sysctl)
+
+#define __NR_fork 1079
+#ifdef CONFIG_MMU
+__SYSCALL(__NR_fork, sys_fork)
+#else
+__SYSCALL(__NR_fork, sys_ni_syscall)
+#endif /* CONFIG_MMU */
+
+#undef __NR_syscalls
+#define __NR_syscalls (__NR_fork+1)
+
+#endif /* __ARCH_WANT_SYSCALL_DEPRECATED */
+
+/*
+ * 32 bit systems traditionally used different
+ * syscalls for off_t and loff_t arguments, while
+ * 64 bit systems only need the off_t version.
+ * For new 32 bit platforms, there is no need to
+ * implement the old 32 bit off_t syscalls, so
+ * they take different names.
+ * Here we map the numbers so that both versions
+ * use the same syscall table layout.
+ */
+#if __BITS_PER_LONG == 64
+#define __NR_fcntl __NR3264_fcntl
+#define __NR_statfs __NR3264_statfs
+#define __NR_fstatfs __NR3264_fstatfs
+#define __NR_truncate __NR3264_truncate
+#define __NR_ftruncate __NR3264_truncate
+#define __NR_lseek __NR3264_lseek
+#define __NR_sendfile __NR3264_sendfile
+#define __NR_newfstatat __NR3264_fstatat
+#define __NR_fstat __NR3264_fstat
+#define __NR_mmap __NR3264_mmap
+#define __NR_fadvise64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat __NR3264_stat
+#define __NR_lstat __NR3264_lstat
+#endif
+#else
+#define __NR_fcntl64 __NR3264_fcntl
+#define __NR_statfs64 __NR3264_statfs
+#define __NR_fstatfs64 __NR3264_fstatfs
+#define __NR_truncate64 __NR3264_truncate
+#define __NR_ftruncate64 __NR3264_truncate
+#define __NR_llseek __NR3264_lseek
+#define __NR_sendfile64 __NR3264_sendfile
+#define __NR_fstatat64 __NR3264_fstatat
+#define __NR_fstat64 __NR3264_fstat
+#define __NR_mmap2 __NR3264_mmap
+#define __NR_fadvise64_64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat64 __NR3264_stat
+#define __NR_lstat64 __NR3264_lstat
+#endif
+#endif
+
+#ifdef __KERNEL__
+
+/*
+ * These are required system calls, we should
+ * invert the logic eventually and let them
+ * be selected by default.
+ */
+#if __BITS_PER_LONG == 32
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_LLSEEK
+#endif
+#define __ARCH_WANT_SYS_RT_SIGACTION
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
+
+/*
+ * "Conditional" syscalls
+ *
+ * What we want is __attribute__((weak,alias("sys_ni_syscall"))),
+ * but it doesn't work on all toolchains, so we just do it by hand
+ */
+#ifndef cond_syscall
+#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_GENERIC_UNISTD_H */
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index 60d00d1c4ee..66ad375612f 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -14,6 +14,57 @@ cat << EOF
 #include <asm/types.h>
 #include <asm/unistd.h>
 
+/* *at */
+#define __IGNORE_open		/* openat */
+#define __IGNORE_link		/* linkat */
+#define __IGNORE_unlink		/* unlinkat */
+#define __IGNORE_mknod		/* mknodat */
+#define __IGNORE_chmod		/* fchmodat */
+#define __IGNORE_chown		/* fchownat */
+#define __IGNORE_mkdir		/* mkdirat */
+#define __IGNORE_rmdir		/* unlinkat */
+#define __IGNORE_lchown		/* fchownat */
+#define __IGNORE_access		/* faccessat */
+#define __IGNORE_rename		/* renameat */
+#define __IGNORE_readlink	/* readlinkat */
+#define __IGNORE_symlink	/* symlinkat */
+#define __IGNORE_utimes		/* futimesat */
+#if BITS_PER_LONG == 64
+#define __IGNORE_stat		/* fstatat */
+#define __IGNORE_lstat		/* fstatat */
+#else
+#define __IGNORE_stat64		/* fstatat64 */
+#define __IGNORE_lstat64	/* fstatat64 */
+#endif
+
+/* CLOEXEC flag */
+#define __IGNORE_pipe		/* pipe2 */
+#define __IGNORE_dup2		/* dup3 */
+#define __IGNORE_epoll_create	/* epoll_create1 */
+#define __IGNORE_inotify_init	/* inotify_init1 */
+#define __IGNORE_eventfd	/* eventfd2 */
+#define __IGNORE_signalfd	/* signalfd4 */
+
+/* MMU */
+#ifndef CONFIG_MMU
+#define __IGNORE_madvise
+#define __IGNORE_mbind
+#define __IGNORE_mincore
+#define __IGNORE_mlock
+#define __IGNORE_mlockall
+#define __IGNORE_munlock
+#define __IGNORE_munlockall
+#define __IGNORE_mprotect
+#define __IGNORE_msync
+#define __IGNORE_migrate_pages
+#define __IGNORE_move_pages
+#define __IGNORE_remap_file_pages
+#define __IGNORE_get_mempolicy
+#define __IGNORE_set_mempolicy
+#define __IGNORE_swapoff
+#define __IGNORE_swapon
+#endif
+
 /* System calls for 32-bit kernels only */
 #if BITS_PER_LONG == 64
 #define __IGNORE_sendfile64
@@ -27,6 +78,22 @@ cat << EOF
 #define __IGNORE_fstatat64
 #define __IGNORE_fstatfs64
 #define __IGNORE_statfs64
+#define __IGNORE_llseek
+#define __IGNORE_mmap2
+#else
+#define __IGNORE_sendfile
+#define __IGNORE_ftruncate
+#define __IGNORE_truncate
+#define __IGNORE_stat
+#define __IGNORE_lstat
+#define __IGNORE_fstat
+#define __IGNORE_fcntl
+#define __IGNORE_fadvise64
+#define __IGNORE_newfstatat
+#define __IGNORE_fstatfs
+#define __IGNORE_statfs
+#define __IGNORE_lseek
+#define __IGNORE_mmap
 #endif
 
 /* i386-specific or historical system calls */
@@ -44,7 +111,6 @@ cat << EOF
 #define __IGNORE_idle
 #define __IGNORE_modify_ldt
 #define __IGNORE_ugetrlimit
-#define __IGNORE_mmap2
 #define __IGNORE_vm86
 #define __IGNORE_vm86old
 #define __IGNORE_set_thread_area
@@ -55,7 +121,6 @@ cat << EOF
 #define __IGNORE_oldlstat
 #define __IGNORE_oldolduname
 #define __IGNORE_olduname
-#define __IGNORE_umount2
 #define __IGNORE_umount
 #define __IGNORE_waitpid
 #define __IGNORE_stime
@@ -75,9 +140,12 @@ cat << EOF
 #define __IGNORE__llseek
 #define __IGNORE__newselect
 #define __IGNORE_create_module
-#define __IGNORE_delete_module
 #define __IGNORE_query_module
 #define __IGNORE_get_kernel_syms
+#define __IGNORE_sysfs
+#define __IGNORE_uselib
+#define __IGNORE__sysctl
+
 /* ... including the "new" 32-bit uid syscalls */
 #define __IGNORE_lchown32
 #define __IGNORE_getuid32
@@ -99,6 +167,24 @@ cat << EOF
 #define __IGNORE_setfsuid32
 #define __IGNORE_setfsgid32
 
+/* these can be expressed using other calls */
+#define __IGNORE_alarm		/* setitimer */
+#define __IGNORE_creat		/* open */
+#define __IGNORE_fork		/* clone */
+#define __IGNORE_futimesat	/* utimensat */
+#define __IGNORE_getpgrp	/* getpgid */
+#define __IGNORE_getdents	/* getdents64 */
+#define __IGNORE_pause		/* sigsuspend */
+#define __IGNORE_poll		/* ppoll */
+#define __IGNORE_select		/* pselect6 */
+#define __IGNORE_epoll_wait	/* epoll_pwait */
+#define __IGNORE_time		/* gettimeofday */
+#define __IGNORE_uname		/* newuname */
+#define __IGNORE_ustat		/* statfs */
+#define __IGNORE_utime		/* utimes */
+#define __IGNORE_vfork		/* clone */
+#define __IGNORE_wait4		/* waitid */
+
 /* sync_file_range had a stupid ABI. Allow sync_file_range2 instead */
 #ifdef __NR_sync_file_range2
 #define __IGNORE_sync_file_range
-- 
cgit v1.2.3-70-g09d2


From 72099ed2719fc5829bd79c6ca9d1783ed026eb37 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:29 +0000
Subject: asm-generic: rename atomic.h to atomic-long.h

The existing asm-generic/atomic.h only defines the
atomic_long type. This renames it to atomic-long.h
so we have a place to add a truly generic atomic.h
that can be used on all non-SMP systems.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/include/asm/atomic.h      |   2 +-
 arch/arm/include/asm/atomic.h        |   2 +-
 arch/avr32/include/asm/atomic.h      |   2 +-
 arch/blackfin/include/asm/atomic.h   |   2 +-
 arch/cris/include/asm/atomic.h       |   2 +-
 arch/frv/include/asm/atomic.h        |   2 +-
 arch/h8300/include/asm/atomic.h      |   2 +-
 arch/ia64/include/asm/atomic.h       |   2 +-
 arch/m32r/include/asm/atomic.h       |   2 +-
 arch/m68k/include/asm/atomic_mm.h    |   2 +-
 arch/m68k/include/asm/atomic_no.h    |   2 +-
 arch/microblaze/include/asm/atomic.h |   2 +-
 arch/mips/include/asm/atomic.h       |   2 +-
 arch/mn10300/include/asm/atomic.h    |   2 +-
 arch/parisc/include/asm/atomic.h     |   2 +-
 arch/powerpc/include/asm/atomic.h    |   2 +-
 arch/s390/include/asm/atomic.h       |   2 +-
 arch/sh/include/asm/atomic.h         |   2 +-
 arch/sparc/include/asm/atomic_32.h   |   2 +-
 arch/sparc/include/asm/atomic_64.h   |   2 +-
 arch/x86/include/asm/atomic_32.h     |   2 +-
 arch/x86/include/asm/atomic_64.h     |   2 +-
 arch/xtensa/include/asm/atomic.h     |   2 +-
 include/asm-generic/atomic-long.h    | 258 +++++++++++++++++++++++++++++++++++
 include/asm-generic/atomic.h         | 258 -----------------------------------
 include/asm-generic/bitops/atomic.h  |   1 +
 26 files changed, 282 insertions(+), 281 deletions(-)
 create mode 100644 include/asm-generic/atomic-long.h
 delete mode 100644 include/asm-generic/atomic.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 62b363584b2..610dff44d94 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -256,5 +256,5 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 16b52f39798..9e07fe50702 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -249,6 +249,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif
 #endif
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 31881510774..b131c27ddf5 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -196,6 +196,6 @@ static inline int atomic_sub_if_positive(int i, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /*  __ASM_AVR32_ATOMIC_H */
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 94b2a9b1945..7bbf44e4ddf 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -208,6 +208,6 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
 #define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif				/* __ARCH_BLACKFIN_ATOMIC __ */
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
index 5718dd8902a..a6aca819e9f 100644
--- a/arch/cris/include/asm/atomic.h
+++ b/arch/cris/include/asm/atomic.h
@@ -158,5 +158,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()    barrier()
 #define smp_mb__after_atomic_inc()     barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 296c35cfb20..0409d981fd3 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -194,5 +194,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 833186c8dc3..33c8c0fa958 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -141,5 +141,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, unsigned long *v)
 #define smp_mb__before_atomic_inc()    barrier()
 #define smp_mb__after_atomic_inc() barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index d37292bd987..88405cb0832 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -216,5 +216,5 @@ atomic64_add_negative (__s64 i, atomic64_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 2eed30f8408..63f0cf0f50d 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -314,5 +314,5 @@ static __inline__ void atomic_set_mask(unsigned long  mask, atomic_t *addr)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif	/* _ASM_M32R_ATOMIC_H */
diff --git a/arch/m68k/include/asm/atomic_mm.h b/arch/m68k/include/asm/atomic_mm.h
index eb0ab9d4ee7..88b7af20a99 100644
--- a/arch/m68k/include/asm/atomic_mm.h
+++ b/arch/m68k/include/asm/atomic_mm.h
@@ -192,5 +192,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/m68k/include/asm/atomic_no.h b/arch/m68k/include/asm/atomic_no.h
index 6bb674855a3..5674cb9449b 100644
--- a/arch/m68k/include/asm/atomic_no.h
+++ b/arch/m68k/include/asm/atomic_no.h
@@ -151,5 +151,5 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ARCH_M68KNOMMU_ATOMIC __ */
diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index a448d94ab72..0de612ad7cb 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -118,6 +118,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 1b332e15ab5..eb7f01cfd1a 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -793,6 +793,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define smp_mb__before_atomic_inc()	smp_llsc_mb()
 #define smp_mb__after_atomic_inc()	smp_llsc_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index bc064825f9b..5bf5be9566d 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -151,7 +151,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index ada3e5364d8..7eeaff94436 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -338,6 +338,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #endif /* CONFIG_64BIT */
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 
 #endif /* _ASM_PARISC_ATOMIC_H_ */
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index b401950f525..b7d2d07b6f9 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -472,6 +472,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #endif /* __powerpc64__ */
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index de432f2de2d..fca9dffcc66 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -275,6 +275,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v,
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __KERNEL__ */
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 6327ffbb199..a5647d0cd17 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -84,5 +84,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index bb91b1248cd..f0d343c3b95 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -161,5 +161,5 @@ static inline int __atomic24_sub(int i, atomic24_t *v)
 
 #endif /* !(__KERNEL__) */
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index a0a70649269..f2e48009989 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -114,5 +114,5 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index 85b46fba422..c83d3148608 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -247,5 +247,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 8c21731984d..0d636022000 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -455,5 +455,5 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* _ASM_X86_ATOMIC_64_H */
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 67ad67bed8c..22d6dde4261 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -292,7 +292,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-#include <asm-generic/atomic.h>
+#include <asm-generic/atomic-long.h>
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
new file mode 100644
index 00000000000..76e27d66c05
--- /dev/null
+++ b/include/asm-generic/atomic-long.h
@@ -0,0 +1,258 @@
+#ifndef _ASM_GENERIC_ATOMIC_LONG_H
+#define _ASM_GENERIC_ATOMIC_LONG_H
+/*
+ * Copyright (C) 2005 Silicon Graphics, Inc.
+ *	Christoph Lameter
+ *
+ * Allows to provide arch independent atomic definitions without the need to
+ * edit all arch specific atomic.h files.
+ */
+
+#include <asm/types.h>
+
+/*
+ * Suppport for atomic_long_t
+ *
+ * Casts for parameters are avoided for existing atomic functions in order to
+ * avoid issues with cast-as-lval under gcc 4.x and other limitations that the
+ * macros of a platform may have.
+ */
+
+#if BITS_PER_LONG == 64
+
+typedef atomic64_t atomic_long_t;
+
+#define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
+
+static inline long atomic_long_read(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_read(v);
+}
+
+static inline void atomic_long_set(atomic_long_t *l, long i)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_set(v, i);
+}
+
+static inline void atomic_long_inc(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_inc(v);
+}
+
+static inline void atomic_long_dec(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_dec(v);
+}
+
+static inline void atomic_long_add(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_add(i, v);
+}
+
+static inline void atomic_long_sub(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_sub(i, v);
+}
+
+static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_sub_and_test(i, v);
+}
+
+static inline int atomic_long_dec_and_test(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_dec_and_test(v);
+}
+
+static inline int atomic_long_inc_and_test(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_inc_and_test(v);
+}
+
+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return atomic64_add_negative(i, v);
+}
+
+static inline long atomic_long_add_return(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_add_return(i, v);
+}
+
+static inline long atomic_long_sub_return(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_sub_return(i, v);
+}
+
+static inline long atomic_long_inc_return(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_inc_return(v);
+}
+
+static inline long atomic_long_dec_return(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_dec_return(v);
+}
+
+static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_add_unless(v, a, u);
+}
+
+#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
+
+#define atomic_long_cmpxchg(l, old, new) \
+	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
+#define atomic_long_xchg(v, new) \
+	(atomic64_xchg((atomic64_t *)(l), (new)))
+
+#else  /*  BITS_PER_LONG == 64  */
+
+typedef atomic_t atomic_long_t;
+
+#define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
+static inline long atomic_long_read(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_read(v);
+}
+
+static inline void atomic_long_set(atomic_long_t *l, long i)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_set(v, i);
+}
+
+static inline void atomic_long_inc(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_inc(v);
+}
+
+static inline void atomic_long_dec(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_dec(v);
+}
+
+static inline void atomic_long_add(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_add(i, v);
+}
+
+static inline void atomic_long_sub(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_sub(i, v);
+}
+
+static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_sub_and_test(i, v);
+}
+
+static inline int atomic_long_dec_and_test(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_dec_and_test(v);
+}
+
+static inline int atomic_long_inc_and_test(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_inc_and_test(v);
+}
+
+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return atomic_add_negative(i, v);
+}
+
+static inline long atomic_long_add_return(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_add_return(i, v);
+}
+
+static inline long atomic_long_sub_return(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_sub_return(i, v);
+}
+
+static inline long atomic_long_inc_return(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_inc_return(v);
+}
+
+static inline long atomic_long_dec_return(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_dec_return(v);
+}
+
+static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_add_unless(v, a, u);
+}
+
+#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
+
+#define atomic_long_cmpxchg(l, old, new) \
+	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
+#define atomic_long_xchg(v, new) \
+	(atomic_xchg((atomic_t *)(v), (new)))
+
+#endif  /*  BITS_PER_LONG == 64  */
+
+#endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
deleted file mode 100644
index 3673a13b670..00000000000
--- a/include/asm-generic/atomic.h
+++ /dev/null
@@ -1,258 +0,0 @@
-#ifndef _ASM_GENERIC_ATOMIC_H
-#define _ASM_GENERIC_ATOMIC_H
-/*
- * Copyright (C) 2005 Silicon Graphics, Inc.
- *	Christoph Lameter
- *
- * Allows to provide arch independent atomic definitions without the need to
- * edit all arch specific atomic.h files.
- */
-
-#include <asm/types.h>
-
-/*
- * Suppport for atomic_long_t
- *
- * Casts for parameters are avoided for existing atomic functions in order to
- * avoid issues with cast-as-lval under gcc 4.x and other limitations that the
- * macros of a platform may have.
- */
-
-#if BITS_PER_LONG == 64
-
-typedef atomic64_t atomic_long_t;
-
-#define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
-
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(l), (new)))
-
-#else  /*  BITS_PER_LONG == 64  */
-
-typedef atomic_t atomic_long_t;
-
-#define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return atomic_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic_xchg((atomic_t *)(v), (new)))
-
-#endif  /*  BITS_PER_LONG == 64  */
-
-#endif  /*  _ASM_GENERIC_ATOMIC_H  */
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 4657f3e410f..c8946465e63 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -2,6 +2,7 @@
 #define _ASM_GENERIC_BITOPS_ATOMIC_H_
 
 #include <asm/types.h>
+#include <asm/system.h>
 
 #ifdef CONFIG_SMP
 #include <asm/spinlock.h>
-- 
cgit v1.2.3-70-g09d2


From 5b17e1cd8928ae65932758ce6478ac6d3e9a86b2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:30 +0000
Subject: asm-generic: rename page.h and uaccess.h

The current asm-generic/page.h only contains the get_order
function, and asm-generic/uaccess.h only implements
unaligned accesses. This renames the file to getorder.h
and uaccess-unaligned.h to make room for new page.h
and uaccess.h file that will be usable by all simple
(e.g. nommu) architectures.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/page.h           |  2 +-
 arch/arm/include/asm/page.h             |  2 +-
 arch/blackfin/include/asm/page.h        |  2 +-
 arch/cris/include/asm/page.h            |  2 +-
 arch/frv/include/asm/page.h             |  2 +-
 arch/h8300/include/asm/page.h           |  2 +-
 arch/m32r/include/asm/page.h            |  2 +-
 arch/m68k/include/asm/page_mm.h         |  2 +-
 arch/m68k/include/asm/page_no.h         |  2 +-
 arch/microblaze/include/asm/page.h      |  2 +-
 arch/mips/include/asm/page.h            |  2 +-
 arch/parisc/include/asm/page.h          |  2 +-
 arch/parisc/include/asm/uaccess.h       |  2 +-
 arch/powerpc/include/asm/page_32.h      |  2 +-
 arch/powerpc/include/asm/page_64.h      |  2 +-
 arch/s390/include/asm/page.h            |  2 +-
 arch/sh/include/asm/page.h              |  2 +-
 arch/sparc/include/asm/page_32.h        |  2 +-
 arch/sparc/include/asm/page_64.h        |  2 +-
 arch/sparc/include/asm/uaccess_64.h     |  2 +-
 arch/um/include/asm/page.h              |  2 +-
 arch/x86/include/asm/page.h             |  2 +-
 arch/xtensa/include/asm/page.h          |  2 +-
 include/asm-generic/getorder.h          | 24 ++++++++++++++++++++++++
 include/asm-generic/page.h              | 24 ------------------------
 include/asm-generic/uaccess-unaligned.h | 26 ++++++++++++++++++++++++++
 include/asm-generic/uaccess.h           | 26 --------------------------
 27 files changed, 73 insertions(+), 73 deletions(-)
 create mode 100644 include/asm-generic/getorder.h
 delete mode 100644 include/asm-generic/page.h
 create mode 100644 include/asm-generic/uaccess-unaligned.h
 delete mode 100644 include/asm-generic/uaccess.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 0995f9d1341..07af062544f 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -93,6 +93,6 @@ typedef struct page *pgtable_t;
 					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ALPHA_PAGE_H */
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 7b522770f29..be962c1349c 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -202,6 +202,6 @@ typedef struct page *pgtable_t;
 	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif
diff --git a/arch/blackfin/include/asm/page.h b/arch/blackfin/include/asm/page.h
index 344f6a8c1f2..3ea2016a1d4 100644
--- a/arch/blackfin/include/asm/page.h
+++ b/arch/blackfin/include/asm/page.h
@@ -81,7 +81,7 @@ extern unsigned long memory_end;
 #define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
 				((void *)(kaddr) < (void *)memory_end))
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif				/* __ASSEMBLY__ */
 
diff --git a/arch/cris/include/asm/page.h b/arch/cris/include/asm/page.h
index f3fdbd09c34..be45ee366be 100644
--- a/arch/cris/include/asm/page.h
+++ b/arch/cris/include/asm/page.h
@@ -68,7 +68,7 @@ typedef struct page *pgtable_t;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _CRIS_PAGE_H */
 
diff --git a/arch/frv/include/asm/page.h b/arch/frv/include/asm/page.h
index bd9c220094c..25c6a500235 100644
--- a/arch/frv/include/asm/page.h
+++ b/arch/frv/include/asm/page.h
@@ -73,6 +73,6 @@ extern unsigned long max_pfn;
 #endif /* __ASSEMBLY__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_PAGE_H */
diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h
index 0b6acf0b03a..837381a2df4 100644
--- a/arch/h8300/include/asm/page.h
+++ b/arch/h8300/include/asm/page.h
@@ -73,6 +73,6 @@ extern unsigned long memory_end;
 #endif /* __ASSEMBLY__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _H8300_PAGE_H */
diff --git a/arch/m32r/include/asm/page.h b/arch/m32r/include/asm/page.h
index c9333089fe1..11777f7a562 100644
--- a/arch/m32r/include/asm/page.h
+++ b/arch/m32r/include/asm/page.h
@@ -82,6 +82,6 @@ typedef struct page *pgtable_t;
 #define devmem_is_allowed(x) 1
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_M32R_PAGE_H */
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index a34b8bad784..d009f3ea39a 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -223,6 +223,6 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _M68K_PAGE_H */
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 3a1ede4544c..9aa3f90f485 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -72,6 +72,6 @@ extern unsigned long memory_end;
 
 #endif /* __ASSEMBLY__ */
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _M68KNOMMU_PAGE_H */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 7238dcfcc51..962c210e5b9 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -135,6 +135,6 @@ extern unsigned int memory_size;
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_MICROBLAZE_PAGE_H */
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 9f946e4ca05..72c80d2034c 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -189,6 +189,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define CAC_ADDR(addr)		((addr) - UNCAC_BASE + PAGE_OFFSET)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_PAGE_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 7bc5125d7d4..a84cc1f925f 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -159,6 +159,6 @@ extern int npmem_ranges;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _PARISC_PAGE_H */
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index cd4c0b2a8e7..7cf799d70b4 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -7,7 +7,7 @@
 #include <asm/page.h>
 #include <asm/system.h>
 #include <asm/cache.h>
-#include <asm-generic/uaccess.h>
+#include <asm-generic/uaccess-unaligned.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index a0e3f6e6b4e..bd0849dbcaa 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -41,7 +41,7 @@ extern void clear_pages(void *page, int order);
 static inline void clear_page(void *page) { clear_pages(page, 0); }
 extern void copy_page(void *to, void *from);
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #define PGD_T_LOG2	(__builtin_ffs(sizeof(pgd_t)) - 1)
 #define PTE_T_LOG2	(__builtin_ffs(sizeof(pte_t)) - 1)
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 043bfdfe4f7..5817a3b747e 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -180,6 +180,6 @@ do {						\
 	(test_thread_flag(TIF_32BIT) ? \
 	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _ASM_POWERPC_PAGE_64_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 32e8f6aa438..3e3594d01f8 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -150,7 +150,7 @@ void arch_alloc_page(struct page *page, int order);
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #define __HAVE_ARCH_GATE_AREA 1
 
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 9c6d21ec024..49592c780a6 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -163,7 +163,7 @@ typedef struct page *pgtable_t;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 /* vDSO support */
 #ifdef CONFIG_VSYSCALL
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
index d1806edc095..f72080bdda9 100644
--- a/arch/sparc/include/asm/page_32.h
+++ b/arch/sparc/include/asm/page_32.h
@@ -152,6 +152,6 @@ extern unsigned long pfn_base;
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _SPARC_PAGE_H */
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 4274ed13ddb..f0d09b40103 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -132,6 +132,6 @@ typedef struct page *pgtable_t;
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif /* _SPARC64_PAGE_H */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index c64e767a3e4..a38c0323891 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -12,7 +12,7 @@
 #include <asm/asi.h>
 #include <asm/system.h>
 #include <asm/spitfire.h>
-#include <asm-generic/uaccess.h>
+#include <asm-generic/uaccess-unaligned.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 55f28a0bae6..4cc9b6cf480 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -116,7 +116,7 @@ extern unsigned long uml_physmem;
 #define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* __UM_PAGE_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 89ed9d70b0a..625c3f0e741 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -56,7 +56,7 @@ extern bool __virt_addr_valid(unsigned long kaddr);
 #endif	/* __ASSEMBLY__ */
 
 #include <asm-generic/memory_model.h>
-#include <asm-generic/page.h>
+#include <asm-generic/getorder.h>
 
 #define __HAVE_ARCH_GATE_AREA 1
 
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 17e0c5383b1..161bb89e98c 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -129,7 +129,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
 
 #else
 
-# include <asm-generic/page.h>
+# include <asm-generic/getorder.h>
 
 #endif
 
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
new file mode 100644
index 00000000000..67e7245dc9b
--- /dev/null
+++ b/include/asm-generic/getorder.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_GENERIC_GETORDER_H
+#define __ASM_GENERIC_GETORDER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+
+/* Pure 2^n version of get_order */
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+	int order;
+
+	size = (size - 1) >> (PAGE_SHIFT - 1);
+	order = -1;
+	do {
+		size >>= 1;
+		order++;
+	} while (size);
+	return order;
+}
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* __ASM_GENERIC_GETORDER_H */
diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
deleted file mode 100644
index 14db733b8e6..00000000000
--- a/include/asm-generic/page.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _ASM_GENERIC_PAGE_H
-#define _ASM_GENERIC_PAGE_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/compiler.h>
-
-/* Pure 2^n version of get_order */
-static __inline__ __attribute_const__ int get_order(unsigned long size)
-{
-	int order;
-
-	size = (size - 1) >> (PAGE_SHIFT - 1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
-
-#endif	/* __ASSEMBLY__ */
-
-#endif	/* _ASM_GENERIC_PAGE_H */
diff --git a/include/asm-generic/uaccess-unaligned.h b/include/asm-generic/uaccess-unaligned.h
new file mode 100644
index 00000000000..67deb898f0c
--- /dev/null
+++ b/include/asm-generic/uaccess-unaligned.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_GENERIC_UACCESS_UNALIGNED_H
+#define __ASM_GENERIC_UACCESS_UNALIGNED_H
+
+/*
+ * This macro should be used instead of __get_user() when accessing
+ * values at locations that are not known to be aligned.
+ */
+#define __get_user_unaligned(x, ptr)					\
+({									\
+	__typeof__ (*(ptr)) __x;					\
+	__copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0;	\
+	(x) = __x;							\
+})
+
+
+/*
+ * This macro should be used instead of __put_user() when accessing
+ * values at locations that are not known to be aligned.
+ */
+#define __put_user_unaligned(x, ptr)					\
+({									\
+	__typeof__ (*(ptr)) __x = (x);					\
+	__copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0;	\
+})
+
+#endif /* __ASM_GENERIC_UACCESS_UNALIGNED_H */
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
deleted file mode 100644
index 549cb3a1640..00000000000
--- a/include/asm-generic/uaccess.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _ASM_GENERIC_UACCESS_H_
-#define _ASM_GENERIC_UACCESS_H_
-
-/*
- * This macro should be used instead of __get_user() when accessing
- * values at locations that are not known to be aligned.
- */
-#define __get_user_unaligned(x, ptr)					\
-({									\
-	__typeof__ (*(ptr)) __x;					\
-	__copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0;	\
-	(x) = __x;							\
-})
-
-
-/*
- * This macro should be used instead of __put_user() when accessing
- * values at locations that are not known to be aligned.
- */
-#define __put_user_unaligned(x, ptr)					\
-({									\
-	__typeof__ (*(ptr)) __x = (x);					\
-	__copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0;	\
-})
-
-#endif /* _ASM_GENERIC_UACCESS_H */
-- 
cgit v1.2.3-70-g09d2


From 3aef392822e1a42e80077a332ea2efdfd8a4248a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 17 May 2009 21:00:05 +0000
Subject: asm-generic: make get_rtc_time overridable

Evidently, set_rtc_time is supposed to be overridable
by architectures that define their own version, but
unfortunately, get_rtc_ss would in that case still
use the generic version.

This makes get_rtc_ss call the real set_rtc_time
to let architectures define their own version.
The change should fix the "Extended RTC operation"
on Alpha, which uses the incorrect get_rtc_ss
call. It also allows PowerPC to use the asm-generic/rtc.h
file in the future.

Cc: Richard Henderson <rth@twiddle.net>
Cc: linux-alpha@vger.kernel.org
Cc: Tom Rini <trini@mvista.com>
Cc: rtc-linux@googlegroups.com
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/rtc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h
index 763e3b060f4..fa86f240c87 100644
--- a/include/asm-generic/rtc.h
+++ b/include/asm-generic/rtc.h
@@ -202,7 +202,7 @@ static inline unsigned int get_rtc_ss(void)
 {
 	struct rtc_time h;
 
-	__get_rtc_time(&h);
+	get_rtc_time(&h);
 	return h.tm_sec;
 }
 
-- 
cgit v1.2.3-70-g09d2


From d7c4f1b78afeedfc22b1756fcdc1acbe84284d74 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:31 +0000
Subject: asm-generic: make pci.h usable directly

Some generic code is using the horribly misnamed PCI_DMA_BUS_IS_PHYS
from asm/pci.h. This makes sure that an architecture without PCI
support does not have to define this itself but can rely on the
asm-generic version.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/frv/include/asm/pci.h  | 13 +++----------
 arch/m32r/include/asm/pci.h |  2 --
 include/asm-generic/pci.h   |  8 ++++++++
 3 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 585d9b49949..3ce227ba774 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -10,8 +10,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef ASM_PCI_H
-#define	ASM_PCI_H
+#ifndef _ASM_FRV_PCI_H
+#define _ASM_FRV_PCI_H
 
 #include <linux/mm.h>
 #include <asm/scatterlist.h>
@@ -43,12 +43,6 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
 /* Return the index of the PCI controller for device PDEV. */
 #define pci_controller_num(PDEV)	(0)
 
-/* The PCI address space does equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 /* pci_unmap_{page,single} is a nop so... */
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
 #define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
@@ -114,5 +108,4 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
 				    sg_dma_address(&sg[i])+sg_dma_len(&sg[i]));
 }
 
-
-#endif
+#endif /* _ASM_FRV_PCI_H */
diff --git a/arch/m32r/include/asm/pci.h b/arch/m32r/include/asm/pci.h
index fe785d167db..07d3834c6de 100644
--- a/arch/m32r/include/asm/pci.h
+++ b/arch/m32r/include/asm/pci.h
@@ -3,6 +3,4 @@
 
 #include <asm-generic/pci.h>
 
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 #endif /* _ASM_M32R_PCI_H */
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index c36a77d3bf4..515c6e2e321 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -52,4 +52,12 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 }
 #endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */
 
+/*
+ * By default, assume that no iommu is in use and that the PCI
+ * space is mapped to address physical 0.
+ */
+#ifndef PCI_DMA_BUS_IS_PHYS
+#define PCI_DMA_BUS_IS_PHYS	(1)
 #endif
+
+#endif /* _ASM_GENERIC_PCI_H */
-- 
cgit v1.2.3-70-g09d2


From 9858c60cc2d33b18367b2bc6947e3ea23db26ccb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:32 +0000
Subject: asm-generic: make bitops.h usable

bitops.h apparently suffered from some level of bitrot, it
was missing the smp_mb__{before,after}_clear_bit functions,
and included other headers in an invalid order.

This changes the file so that new architectures can use
it out of the box.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/bitops.h | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index c9f369c4bd7..a54f4421a24 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -1,19 +1,29 @@
-#ifndef _ASM_GENERIC_BITOPS_H_
-#define _ASM_GENERIC_BITOPS_H_
+#ifndef __ASM_GENERIC_BITOPS_H
+#define __ASM_GENERIC_BITOPS_H
 
 /*
  * For the benefit of those who are trying to port Linux to another
  * architecture, here are some C-language equivalents.  You should
  * recode these in the native assembly language, if at all possible.
- * 
+ *
  * C language equivalents written by Theodore Ts'o, 9/26/92
  */
 
-#include <asm-generic/bitops/atomic.h>
-#include <asm-generic/bitops/non-atomic.h>
+#include <linux/irqflags.h>
+#include <linux/compiler.h>
+
+/*
+ * clear_bit may not imply a memory barrier
+ */
+#ifndef smp_mb__before_clear_bit
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
+#endif
+
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/find.h>
 
@@ -26,8 +36,10 @@
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/lock.h>
 
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
 #include <asm-generic/bitops/ext2-atomic.h>
 #include <asm-generic/bitops/minix.h>
 
-#endif /* _ASM_GENERIC_BITOPS_H */
+#endif /* __ASM_GENERIC_BITOPS_H */
-- 
cgit v1.2.3-70-g09d2


From aafe4dbed0bf6cbdb2e9f03e1d42f8a540d8541d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:33 +0000
Subject: asm-generic: add generic versions of common headers

These are all kernel internal interfaces that get copied
around a lot. In most cases, architectures can provide
their own optimized versions, but these generic versions
can work as well.

I have tried to use the most common contents of each
header to allow existing architectures to migrate easily.

Thanks to Remis for suggesting a number of cleanups.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/bugs.h        |  10 +++
 include/asm-generic/current.h     |   9 +++
 include/asm-generic/delay.h       |   9 +++
 include/asm-generic/fb.h          |  12 +++
 include/asm-generic/hardirq.h     |  34 ++++++++
 include/asm-generic/irq.h         |  18 +++++
 include/asm-generic/irqflags.h    |  72 +++++++++++++++++
 include/asm-generic/kmap_types.h  |  32 ++++++++
 include/asm-generic/linkage.h     |   8 ++
 include/asm-generic/module.h      |  22 ++++++
 include/asm-generic/mutex.h       |   9 +++
 include/asm-generic/scatterlist.h |  43 ++++++++++
 include/asm-generic/spinlock.h    |  11 +++
 include/asm-generic/string.h      |  10 +++
 include/asm-generic/syscalls.h    |  60 ++++++++++++++
 include/asm-generic/system.h      | 161 ++++++++++++++++++++++++++++++++++++++
 include/asm-generic/unaligned.h   |  30 +++++++
 include/asm-generic/user.h        |   8 ++
 18 files changed, 558 insertions(+)
 create mode 100644 include/asm-generic/bugs.h
 create mode 100644 include/asm-generic/current.h
 create mode 100644 include/asm-generic/delay.h
 create mode 100644 include/asm-generic/fb.h
 create mode 100644 include/asm-generic/hardirq.h
 create mode 100644 include/asm-generic/irq.h
 create mode 100644 include/asm-generic/irqflags.h
 create mode 100644 include/asm-generic/kmap_types.h
 create mode 100644 include/asm-generic/linkage.h
 create mode 100644 include/asm-generic/module.h
 create mode 100644 include/asm-generic/mutex.h
 create mode 100644 include/asm-generic/scatterlist.h
 create mode 100644 include/asm-generic/spinlock.h
 create mode 100644 include/asm-generic/string.h
 create mode 100644 include/asm-generic/syscalls.h
 create mode 100644 include/asm-generic/system.h
 create mode 100644 include/asm-generic/unaligned.h
 create mode 100644 include/asm-generic/user.h

(limited to 'include')

diff --git a/include/asm-generic/bugs.h b/include/asm-generic/bugs.h
new file mode 100644
index 00000000000..6c4f62ea714
--- /dev/null
+++ b/include/asm-generic/bugs.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_GENERIC_BUGS_H
+#define __ASM_GENERIC_BUGS_H
+/*
+ * This file is included by 'init/main.c' to check for
+ * architecture-dependent bugs.
+ */
+
+static inline void check_bugs(void) { }
+
+#endif	/* __ASM_GENERIC_BUGS_H */
diff --git a/include/asm-generic/current.h b/include/asm-generic/current.h
new file mode 100644
index 00000000000..5e86f6ae7ca
--- /dev/null
+++ b/include/asm-generic/current.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_CURRENT_H
+#define __ASM_GENERIC_CURRENT_H
+
+#include <linux/thread_info.h>
+
+#define get_current() (current_thread_info()->task)
+#define current get_current()
+
+#endif /* __ASM_GENERIC_CURRENT_H */
diff --git a/include/asm-generic/delay.h b/include/asm-generic/delay.h
new file mode 100644
index 00000000000..4586fec75dd
--- /dev/null
+++ b/include/asm-generic/delay.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_DELAY_H
+#define __ASM_GENERIC_DELAY_H
+
+extern void __udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) __udelay(n)
+
+#endif /* __ASM_GENERIC_DELAY_H */
diff --git a/include/asm-generic/fb.h b/include/asm-generic/fb.h
new file mode 100644
index 00000000000..fe8ca7fcea0
--- /dev/null
+++ b/include/asm-generic/fb.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_FB_H_
+#define __ASM_GENERIC_FB_H_
+#include <linux/fb.h>
+
+#define fb_pgprotect(...) do {} while (0)
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* __ASM_GENERIC_FB_H_ */
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
new file mode 100644
index 00000000000..3d5d2c906ab
--- /dev/null
+++ b/include/asm-generic/hardirq.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_GENERIC_HARDIRQ_H
+#define __ASM_GENERIC_HARDIRQ_H
+
+#include <linux/cache.h>
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned long __softirq_pending;
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+#ifndef HARDIRQ_BITS
+#define HARDIRQ_BITS	8
+#endif
+
+/*
+ * The hardirq mask has to be large enough to have
+ * space for potentially all IRQ sources in the system
+ * nesting on a single CPU:
+ */
+#if (1 << HARDIRQ_BITS) < NR_IRQS
+# error HARDIRQ_BITS is too low!
+#endif
+
+#ifndef ack_bad_irq
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#endif /* __ASM_GENERIC_HARDIRQ_H */
diff --git a/include/asm-generic/irq.h b/include/asm-generic/irq.h
new file mode 100644
index 00000000000..b90ec0bc485
--- /dev/null
+++ b/include/asm-generic/irq.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_GENERIC_IRQ_H
+#define __ASM_GENERIC_IRQ_H
+
+/*
+ * NR_IRQS is the upper bound of how many interrupts can be handled
+ * in the platform. It is used to size the static irq_map array,
+ * so don't make it too big.
+ */
+#ifndef NR_IRQS
+#define NR_IRQS 64
+#endif
+
+static inline int irq_canonicalize(int irq)
+{
+	return irq;
+}
+
+#endif /* __ASM_GENERIC_IRQ_H */
diff --git a/include/asm-generic/irqflags.h b/include/asm-generic/irqflags.h
new file mode 100644
index 00000000000..9aebf618275
--- /dev/null
+++ b/include/asm-generic/irqflags.h
@@ -0,0 +1,72 @@
+#ifndef __ASM_GENERIC_IRQFLAGS_H
+#define __ASM_GENERIC_IRQFLAGS_H
+
+/*
+ * All architectures should implement at least the first two functions,
+ * usually inline assembly will be the best way.
+ */
+#ifndef RAW_IRQ_DISABLED
+#define RAW_IRQ_DISABLED 0
+#define RAW_IRQ_ENABLED 1
+#endif
+
+/* read interrupt enabled status */
+#ifndef __raw_local_save_flags
+unsigned long __raw_local_save_flags(void);
+#endif
+
+/* set interrupt enabled status */
+#ifndef raw_local_irq_restore
+void raw_local_irq_restore(unsigned long flags);
+#endif
+
+/* get status and disable interrupts */
+#ifndef __raw_local_irq_save
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags;
+	flags = __raw_local_save_flags();
+	raw_local_irq_restore(RAW_IRQ_DISABLED);
+	return flags;
+}
+#endif
+
+/* test flags */
+#ifndef raw_irqs_disabled_flags
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return flags == RAW_IRQ_DISABLED;
+}
+#endif
+
+/* unconditionally enable interrupts */
+#ifndef raw_local_irq_enable
+static inline void raw_local_irq_enable(void)
+{
+	raw_local_irq_restore(RAW_IRQ_ENABLED);
+}
+#endif
+
+/* unconditionally disable interrupts */
+#ifndef raw_local_irq_disable
+static inline void raw_local_irq_disable(void)
+{
+	raw_local_irq_restore(RAW_IRQ_DISABLED);
+}
+#endif
+
+/* test hardware interrupt enable bit */
+#ifndef raw_irqs_disabled
+static inline int raw_irqs_disabled(void)
+{
+	return raw_irqs_disabled_flags(__raw_local_save_flags());
+}
+#endif
+
+#define raw_local_save_flags(flags) \
+	do { (flags) = __raw_local_save_flags(); } while (0)
+
+#define raw_local_irq_save(flags) \
+	do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASM_GENERIC_IRQFLAGS_H */
diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h
new file mode 100644
index 00000000000..58c33055c30
--- /dev/null
+++ b/include/asm-generic/kmap_types.h
@@ -0,0 +1,32 @@
+#ifndef _ASM_GENERIC_KMAP_TYPES_H
+#define _ASM_GENERIC_KMAP_TYPES_H
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+# define D(n) __KM_FENCE_##n ,
+#else
+# define D(n)
+#endif
+
+enum km_type {
+D(0)	KM_BOUNCE_READ,
+D(1)	KM_SKB_SUNRPC_DATA,
+D(2)	KM_SKB_DATA_SOFTIRQ,
+D(3)	KM_USER0,
+D(4)	KM_USER1,
+D(5)	KM_BIO_SRC_IRQ,
+D(6)	KM_BIO_DST_IRQ,
+D(7)	KM_PTE0,
+D(8)	KM_PTE1,
+D(9)	KM_IRQ0,
+D(10)	KM_IRQ1,
+D(11)	KM_SOFTIRQ0,
+D(12)	KM_SOFTIRQ1,
+D(13)	KM_SYNC_ICACHE,
+D(14)	KM_SYNC_DCACHE,
+D(15)	KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
+D(16)	KM_TYPE_NR
+};
+
+#undef D
+
+#endif
diff --git a/include/asm-generic/linkage.h b/include/asm-generic/linkage.h
new file mode 100644
index 00000000000..fef7a01e541
--- /dev/null
+++ b/include/asm-generic/linkage.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_GENERIC_LINKAGE_H
+#define __ASM_GENERIC_LINKAGE_H
+/*
+ * linux/linkage.h provides reasonable defaults.
+ * an architecture can override them by providing its own version.
+ */
+
+#endif /* __ASM_GENERIC_LINKAGE_H */
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
new file mode 100644
index 00000000000..ed5b44de4c9
--- /dev/null
+++ b/include/asm-generic/module.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_MODULE_H
+#define __ASM_GENERIC_MODULE_H
+
+/*
+ * Many architectures just need a simple module
+ * loader without arch specific data.
+ */
+struct mod_arch_specific
+{
+};
+
+#ifdef CONFIG_64BIT
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Ehdr Elf64_Ehdr
+#else
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+#endif
+
+#endif /* __ASM_GENERIC_MODULE_H */
diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h
new file mode 100644
index 00000000000..fe91ab50279
--- /dev/null
+++ b/include/asm-generic/mutex.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_MUTEX_H
+#define __ASM_GENERIC_MUTEX_H
+/*
+ * Pull in the generic implementation for the mutex fastpath,
+ * which is a reasonable default on many architectures.
+ */
+
+#include <asm-generic/mutex-dec.h>
+#endif /* __ASM_GENERIC_MUTEX_H */
diff --git a/include/asm-generic/scatterlist.h b/include/asm-generic/scatterlist.h
new file mode 100644
index 00000000000..8b9454496a7
--- /dev/null
+++ b/include/asm-generic/scatterlist.h
@@ -0,0 +1,43 @@
+#ifndef __ASM_GENERIC_SCATTERLIST_H
+#define __ASM_GENERIC_SCATTERLIST_H
+
+#include <linux/types.h>
+
+struct scatterlist {
+#ifdef CONFIG_DEBUG_SG
+	unsigned long	sg_magic;
+#endif
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+	unsigned int	dma_length;
+};
+
+/*
+ * These macros should be used after a dma_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns, or alternatively stop on the first sg_dma_len(sg) which
+ * is 0.
+ */
+#define sg_dma_address(sg)	((sg)->dma_address)
+#ifndef sg_dma_len
+/*
+ * Normally, you have an iommu on 64 bit machines, but not on 32 bit
+ * machines. Architectures that are differnt should override this.
+ */
+#if __BITS_PER_LONG == 64
+#define sg_dma_len(sg)		((sg)->dma_length)
+#else
+#define sg_dma_len(sg)		((sg)->length)
+#endif /* 64 bit */
+#endif /* sg_dma_len */
+
+#ifndef ISA_DMA_THRESHOLD
+#define ISA_DMA_THRESHOLD	(~0UL)
+#endif
+
+#define ARCH_HAS_SG_CHAIN
+
+#endif /* __ASM_GENERIC_SCATTERLIST_H */
diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
new file mode 100644
index 00000000000..1547a03ac50
--- /dev/null
+++ b/include/asm-generic/spinlock.h
@@ -0,0 +1,11 @@
+#ifndef __ASM_GENERIC_SPINLOCK_H
+#define __ASM_GENERIC_SPINLOCK_H
+/*
+ * You need to implement asm/spinlock.h for SMP support. The generic
+ * version does not handle SMP.
+ */
+#ifdef CONFIG_SMP
+#error need an architecture specific asm/spinlock.h
+#endif
+
+#endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/string.h b/include/asm-generic/string.h
new file mode 100644
index 00000000000..de5e0201459
--- /dev/null
+++ b/include/asm-generic/string.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_GENERIC_STRING_H
+#define __ASM_GENERIC_STRING_H
+/*
+ * The kernel provides all required functions in lib/string.c
+ *
+ * Architectures probably want to provide at least their own optimized
+ * memcpy and memset functions though.
+ */
+
+#endif /* __ASM_GENERIC_STRING_H */
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
new file mode 100644
index 00000000000..df84e3b0455
--- /dev/null
+++ b/include/asm-generic/syscalls.h
@@ -0,0 +1,60 @@
+#ifndef __ASM_GENERIC_SYSCALLS_H
+#define __ASM_GENERIC_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+
+/*
+ * Calling conventions for these system calls can differ, so
+ * it's possible to override them.
+ */
+#ifndef sys_clone
+asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
+			void __user *parent_tid, void __user *child_tid,
+			struct pt_regs *regs);
+#endif
+
+#ifndef sys_fork
+asmlinkage long sys_fork(struct pt_regs *regs);
+#endif
+
+#ifndef sys_vfork
+asmlinkage long sys_vfork(struct pt_regs *regs);
+#endif
+
+#ifndef sys_execve
+asmlinkage long sys_execve(char __user *filename, char __user * __user *argv,
+			char __user * __user *envp, struct pt_regs *regs);
+#endif
+
+#ifndef sys_mmap2
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			unsigned long fd, unsigned long pgoff);
+#endif
+
+#ifndef sys_mmap
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+			unsigned long prot, unsigned long flags,
+			unsigned long fd, off_t pgoff);
+#endif
+
+#ifndef sys_sigaltstack
+asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+			struct pt_regs *);
+#endif
+
+#ifndef sys_rt_sigreturn
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs);
+#endif
+
+#ifndef sys_rt_sigsuspend
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
+#endif
+
+#ifndef sys_rt_sigaction
+asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
+			 struct sigaction __user *oact, size_t sigsetsize);
+#endif
+
+#endif /* __ASM_GENERIC_SYSCALLS_H */
diff --git a/include/asm-generic/system.h b/include/asm-generic/system.h
new file mode 100644
index 00000000000..efa403b5e12
--- /dev/null
+++ b/include/asm-generic/system.h
@@ -0,0 +1,161 @@
+/* Generic system definitions, based on MN10300 definitions.
+ *
+ * It should be possible to use these on really simple architectures,
+ * but it serves more as a starting point for new ports.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_SYSTEM_H
+#define __ASM_GENERIC_SYSTEM_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/irqflags.h>
+
+#include <asm/cmpxchg-local.h>
+
+struct task_struct;
+
+/* context switching is now performed out-of-line in switch_to.S */
+extern struct task_struct *__switch_to(struct task_struct *,
+		struct task_struct *);
+#define switch_to(prev, next, last)					\
+	do {								\
+		((last) = __switch_to((prev), (next)));			\
+	} while (0)
+
+#define arch_align_stack(x) (x)
+
+#define nop() asm volatile ("nop")
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * This implementation only contains a compiler barrier.
+ */
+
+#define mb()	asm volatile ("": : :"memory")
+#define rmb()	mb()
+#define wmb()	asm volatile ("": : :"memory")
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	wmb()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#endif
+
+#define set_mb(var, value)  do { var = value;  mb(); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+#define read_barrier_depends()		do {} while (0)
+#define smp_read_barrier_depends()	do {} while (0)
+
+/*
+ * we make sure local_irq_enable() doesn't cause priority inversion
+ */
+#ifndef __ASSEMBLY__
+
+/* This function doesn't exist, so you'll get a linker error
+ *    if something tries to do an invalid xchg().  */
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline
+unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
+{
+	unsigned long ret, flags;
+
+	switch (size) {
+	case 1:
+#ifdef __xchg_u8
+		return __xchg_u8(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u8 *)ptr;
+		*(volatile u8 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u8 */
+
+	case 2:
+#ifdef __xchg_u16
+		return __xchg_u16(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u16 *)ptr;
+		*(volatile u16 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u16 */
+
+	case 4:
+#ifdef __xchg_u32
+		return __xchg_u32(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u32 *)ptr;
+		*(volatile u32 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u32 */
+
+#ifdef CONFIG_64BIT
+	case 8:
+#ifdef __xchg_u64
+		return __xchg_u64(x, ptr);
+#else
+		local_irq_save(flags);
+		ret = *(volatile u64 *)ptr;
+		*(volatile u64 *)ptr = x;
+		local_irq_restore(flags);
+		return ret;
+#endif /* __xchg_u64 */
+#endif /* CONFIG_64BIT */
+
+	default:
+		__xchg_called_with_bad_pointer();
+		return x;
+	}
+}
+
+#define xchg(ptr, x) \
+	((__typeof__(*(ptr))) __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+
+static inline unsigned long __cmpxchg(volatile unsigned long *m,
+				      unsigned long old, unsigned long new)
+{
+	unsigned long retval;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	retval = *m;
+	if (retval == old)
+		*m = new;
+	local_irq_restore(flags);
+	return retval;
+}
+
+#define cmpxchg(ptr, o, n)					\
+	((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \
+					(unsigned long)(o),	\
+					(unsigned long)(n)))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_GENERIC_SYSTEM_H */
diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
new file mode 100644
index 00000000000..03cf5936bad
--- /dev/null
+++ b/include/asm-generic/unaligned.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_GENERIC_UNALIGNED_H
+#define __ASM_GENERIC_UNALIGNED_H
+
+/*
+ * This is the most generic implementation of unaligned accesses
+ * and should work almost anywhere.
+ *
+ * If an architecture can handle unaligned accesses in hardware,
+ * it may want to use the linux/unaligned/access_ok.h implementation
+ * instead.
+ */
+#include <asm/byteorder.h>
+
+#if defined(__LITTLE_ENDIAN)
+# include <linux/unaligned/le_struct.h>
+# include <linux/unaligned/be_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned	__get_unaligned_le
+# define put_unaligned	__put_unaligned_le
+#elif defined(__BIG_ENDIAN)
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned	__get_unaligned_be
+# define put_unaligned	__put_unaligned_be
+#else
+# error need to define endianess
+#endif
+
+#endif /* __ASM_GENERIC_UNALIGNED_H */
diff --git a/include/asm-generic/user.h b/include/asm-generic/user.h
new file mode 100644
index 00000000000..8b9c3c960ae
--- /dev/null
+++ b/include/asm-generic/user.h
@@ -0,0 +1,8 @@
+#ifndef __ASM_GENERIC_USER_H
+#define __ASM_GENERIC_USER_H
+/*
+ * This file may define a 'struct user' structure. However, it it only
+ * used for a.out file, which are not supported on new architectures.
+ */
+
+#endif	/* __ASM_GENERIC_USER_H */
-- 
cgit v1.2.3-70-g09d2


From ae49e807951d5e26767bc55d1dc29671c596c450 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:34 +0000
Subject: asm-generic: add legacy I/O header files

The dma.h, hw_irq.h, serial.h and timex.h files originally
described PC-style i8237, i8259A, i8250, i8253 and i8255 chips
as well as the VGA style text mode graphics.

Modern architectures live happily without these specific
interfaces, but a few definitions from these headers keep
getting used in common code.

The new generic headers are what most architectures use
anyway nowadays, just implementing the minimal definitions.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/dma.h     | 15 +++++++++++++++
 include/asm-generic/hw_irq.h  |  9 +++++++++
 include/asm-generic/parport.h | 23 +++++++++++++++++++++++
 include/asm-generic/serial.h  | 13 +++++++++++++
 include/asm-generic/timex.h   | 22 ++++++++++++++++++++++
 include/asm-generic/vga.h     | 24 ++++++++++++++++++++++++
 6 files changed, 106 insertions(+)
 create mode 100644 include/asm-generic/dma.h
 create mode 100644 include/asm-generic/hw_irq.h
 create mode 100644 include/asm-generic/parport.h
 create mode 100644 include/asm-generic/serial.h
 create mode 100644 include/asm-generic/timex.h
 create mode 100644 include/asm-generic/vga.h

(limited to 'include')

diff --git a/include/asm-generic/dma.h b/include/asm-generic/dma.h
new file mode 100644
index 00000000000..9dfc3a7f36d
--- /dev/null
+++ b/include/asm-generic/dma.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_GENERIC_DMA_H
+#define __ASM_GENERIC_DMA_H
+/*
+ * This file traditionally describes the i8237 PC style DMA controller.
+ * Most architectures don't have these any more and can get the minimal
+ * implementation from kernel/dma.c by not defining MAX_DMA_CHANNELS.
+ *
+ * Some code relies on seeing MAX_DMA_ADDRESS though.
+ */
+#define MAX_DMA_ADDRESS PAGE_OFFSET
+
+extern int request_dma(unsigned int dmanr, const char *device_id);
+extern void free_dma(unsigned int dmanr);
+
+#endif /* __ASM_GENERIC_DMA_H */
diff --git a/include/asm-generic/hw_irq.h b/include/asm-generic/hw_irq.h
new file mode 100644
index 00000000000..89036d7b40e
--- /dev/null
+++ b/include/asm-generic/hw_irq.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_HW_IRQ_H
+#define __ASM_GENERIC_HW_IRQ_H
+/*
+ * hw_irq.h has internal declarations for the low-level interrupt
+ * controller, like the original i8259A.
+ * In general, this is not needed for new architectures.
+ */
+
+#endif /* __ASM_GENERIC_HW_IRQ_H */
diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h
new file mode 100644
index 00000000000..40528cb977e
--- /dev/null
+++ b/include/asm-generic/parport.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_GENERIC_PARPORT_H
+#define __ASM_GENERIC_PARPORT_H
+
+/*
+ * An ISA bus may have i8255 parallel ports at well-known
+ * locations in the I/O space, which are scanned by
+ * parport_pc_find_isa_ports.
+ *
+ * Without ISA support, the driver will only attach
+ * to devices on the PCI bus.
+ */
+
+static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
+static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
+{
+#ifdef CONFIG_ISA
+	return parport_pc_find_isa_ports(autoirq, autodma);
+#else
+	return 0;
+#endif
+}
+
+#endif /* __ASM_GENERIC_PARPORT_H */
diff --git a/include/asm-generic/serial.h b/include/asm-generic/serial.h
new file mode 100644
index 00000000000..5e291090fe0
--- /dev/null
+++ b/include/asm-generic/serial.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_GENERIC_SERIAL_H
+#define __ASM_GENERIC_SERIAL_H
+
+/*
+ * This should not be an architecture specific #define, oh well.
+ *
+ * Traditionally, it just describes i8250 and related serial ports
+ * that have this clock rate.
+ */
+
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* __ASM_GENERIC_SERIAL_H */
diff --git a/include/asm-generic/timex.h b/include/asm-generic/timex.h
new file mode 100644
index 00000000000..b2243cb8d6f
--- /dev/null
+++ b/include/asm-generic/timex.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_TIMEX_H
+#define __ASM_GENERIC_TIMEX_H
+
+/*
+ * If you have a cycle counter, return the value here.
+ */
+typedef unsigned long cycles_t;
+#ifndef get_cycles
+static inline cycles_t get_cycles(void)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Architectures are encouraged to implement read_current_timer
+ * and define this in order to avoid the expensive delay loop
+ * calibration during boot.
+ */
+#undef ARCH_HAS_READ_CURRENT_TIMER
+
+#endif /* __ASM_GENERIC_TIMEX_H */
diff --git a/include/asm-generic/vga.h b/include/asm-generic/vga.h
new file mode 100644
index 00000000000..36c8ff52016
--- /dev/null
+++ b/include/asm-generic/vga.h
@@ -0,0 +1,24 @@
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ */
+#ifndef __ASM_GENERIC_VGA_H
+#define __ASM_GENERIC_VGA_H
+
+/*
+ *	On most architectures that support VGA, we can just
+ *	recalculate addresses and then access the videoram
+ *	directly without any black magic.
+ *
+ *	Everyone else needs to ioremap the address and use
+ *	proper I/O accesses.
+ */
+#ifndef VGA_MAP_MEM
+#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
+#endif
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x, y) (*(y) = (x))
+
+#endif /* _ASM_GENERIC_VGA_H */
-- 
cgit v1.2.3-70-g09d2


From 3f7e212df82ca0459d44c91d9e019efd1b5f936c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:35 +0000
Subject: asm-generic: add generic atomic.h and io.h

atomic.h and io.h are based on the mn10300 architecture,
which is already pretty generic and can be used by
other architectures that do not have hardware support
for atomic operations or out-of-order I/O access.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/atomic.h | 165 ++++++++++++++++++++++++
 include/asm-generic/io.h     | 300 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 465 insertions(+)
 create mode 100644 include/asm-generic/atomic.h
 create mode 100644 include/asm-generic/io.h

(limited to 'include')

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
new file mode 100644
index 00000000000..c99c64dc5f3
--- /dev/null
+++ b/include/asm-generic/atomic.h
@@ -0,0 +1,165 @@
+/*
+ * Generic C implementation of atomic counter operations
+ * Originally implemented for MN10300.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_ATOMIC_H
+#define __ASM_GENERIC_ATOMIC_H
+
+#ifdef CONFIG_SMP
+#error not SMP safe
+#endif
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+#ifdef __KERNEL__
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_read(v)	((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+#define atomic_set(v, i) (((v)->counter) = (i))
+
+#include <asm/system.h>
+
+/**
+ * atomic_add_return - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns the result
+ * Note that the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	unsigned long flags;
+	int temp;
+
+	local_irq_save(flags);
+	temp = v->counter;
+	temp += i;
+	v->counter = temp;
+	local_irq_restore(flags);
+
+	return temp;
+}
+
+/**
+ * atomic_sub_return - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns the result
+ * Note that the guaranteed useful range of an atomic_t is only 24 bits.
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	unsigned long flags;
+	int temp;
+
+	local_irq_save(flags);
+	temp = v->counter;
+	temp -= i;
+	v->counter = temp;
+	local_irq_restore(flags);
+
+	return temp;
+}
+
+static inline int atomic_add_negative(int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+	atomic_add_return(i, v);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+	atomic_sub_return(i, v);
+}
+
+static inline void atomic_inc(atomic_t *v)
+{
+	atomic_add_return(1, v);
+}
+
+static inline void atomic_dec(atomic_t *v)
+{
+	atomic_sub_return(1, v);
+}
+
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
+#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
+
+#define atomic_add_unless(v, a, u)				\
+({								\
+	int c, old;						\
+	c = atomic_read(v);					\
+	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+		c = old;					\
+	c != (u);						\
+})
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
+{
+	unsigned long flags;
+
+	mask = ~mask;
+	local_irq_save(flags);
+	*addr &= mask;
+	local_irq_restore(flags);
+}
+
+#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
+#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
+
+#define cmpxchg_local(ptr, o, n)				  	       \
+	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+			(unsigned long)(n), sizeof(*(ptr))))
+
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+
+/* Assume that atomic operations are already serializing */
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+#include <asm-generic/atomic-long.h>
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
new file mode 100644
index 00000000000..bcee6365dca
--- /dev/null
+++ b/include/asm-generic/io.h
@@ -0,0 +1,300 @@
+/* Generic I/O port emulation, based on MN10300 code
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __ASM_GENERIC_IO_H
+#define __ASM_GENERIC_IO_H
+
+#include <asm/page.h> /* I/O is all done through memory accesses */
+#include <asm/cacheflush.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_GENERIC_IOMAP
+#include <asm-generic/iomap.h>
+#endif
+
+#define mmiowb() do {} while (0)
+
+/*****************************************************************************/
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the simple architectures, we just read/write the
+ * memory location directly.
+ */
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	return *(const volatile u8 __force *) addr;
+}
+
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	return *(const volatile u16 __force *) addr;
+}
+
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	return *(const volatile u32 __force *) addr;
+}
+
+#define readb __raw_readb
+#define readw(addr) __le16_to_cpu(__raw_readw(addr))
+#define readl(addr) __le32_to_cpu(__raw_readl(addr))
+
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+	*(volatile u8 __force *) addr = b;
+}
+
+static inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+	*(volatile u16 __force *) addr = b;
+}
+
+static inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+	*(volatile u32 __force *) addr = b;
+}
+
+#define writeb __raw_writeb
+#define writew(b,addr) __raw_writew(__cpu_to_le16(b),addr)
+#define writel(b,addr) __raw_writel(__cpu_to_le32(b),addr)
+
+#ifdef CONFIG_64BIT
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	return *(const volatile u64 __force *) addr;
+}
+#define readq(addr) __le64_to_cpu(__raw_readq(addr))
+
+static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+	*(volatile u64 __force *) addr = b;
+}
+#define writeq(b,addr) __raw_writeq(__cpu_to_le64(b),addr)
+#endif
+
+/*****************************************************************************/
+/*
+ * traditional input/output functions
+ */
+
+static inline u8 inb(unsigned long addr)
+{
+	return readb((volatile void __iomem *) addr);
+}
+
+static inline u16 inw(unsigned long addr)
+{
+	return readw((volatile void __iomem *) addr);
+}
+
+static inline u32 inl(unsigned long addr)
+{
+	return readl((volatile void __iomem *) addr);
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+	writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+	writew(b, (volatile void __iomem *) addr);
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+	writel(b, (volatile void __iomem *) addr);
+}
+
+#define inb_p(addr)	inb(addr)
+#define inw_p(addr)	inw(addr)
+#define inl_p(addr)	inl(addr)
+#define outb_p(x, addr)	outb((x), (addr))
+#define outw_p(x, addr)	outw((x), (addr))
+#define outl_p(x, addr)	outl((x), (addr))
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u8 *buf = buffer;
+		do {
+			u8 x = inb(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u16 *buf = buffer;
+		do {
+			u16 x = inw(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+	if (count) {
+		u32 *buf = buffer;
+		do {
+			u32 x = inl(addr);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u8 *buf = buffer;
+		do {
+			outb(*buf++, addr);
+		} while (--count);
+	}
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u16 *buf = buffer;
+		do {
+			outw(*buf++, addr);
+		} while (--count);
+	}
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+	if (count) {
+		const u32 *buf = buffer;
+		do {
+			outl(*buf++, addr);
+		} while (--count);
+	}
+}
+
+#ifndef CONFIG_GENERIC_IOMAP
+#define ioread8(addr)		readb(addr)
+#define ioread16(addr)		readw(addr)
+#define ioread32(addr)		readl(addr)
+
+#define iowrite8(v, addr)	writeb((v), (addr))
+#define iowrite16(v, addr)	writew((v), (addr))
+#define iowrite32(v, addr)	writel((v), (addr))
+
+#define ioread8_rep(p, dst, count) \
+	insb((unsigned long) (p), (dst), (count))
+#define ioread16_rep(p, dst, count) \
+	insw((unsigned long) (p), (dst), (count))
+#define ioread32_rep(p, dst, count) \
+	insl((unsigned long) (p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) \
+	outsb((unsigned long) (p), (src), (count))
+#define iowrite16_rep(p, src, count) \
+	outsw((unsigned long) (p), (src), (count))
+#define iowrite32_rep(p, src, count) \
+	outsl((unsigned long) (p), (src), (count))
+#endif /* CONFIG_GENERIC_IOMAP */
+
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#ifdef __KERNEL__
+
+#include <linux/vmalloc.h>
+#define __io_virt(x) ((void __force *) (x))
+
+#ifndef CONFIG_GENERIC_IOMAP
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+struct pci_dev;
+extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
+{
+}
+#endif /* CONFIG_GENERIC_IOMAP */
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void *address)
+{
+	return __pa((unsigned long)address);
+}
+
+static inline void *phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+static inline void __iomem *ioremap(phys_addr_t offset, unsigned long size)
+{
+	return (void __iomem*) (unsigned long)offset;
+}
+
+#define __ioremap(offset, size, flags)	ioremap(offset, size)
+
+#ifndef ioremap_nocache
+#define ioremap_nocache ioremap
+#endif
+
+#ifndef ioremap_wc
+#define ioremap_wc ioremap_nocache
+#endif
+
+static inline void iounmap(void *addr)
+{
+}
+
+#ifndef CONFIG_GENERIC_IOMAP
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *) port;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+#else /* CONFIG_GENERIC_IOMAP */
+extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
+extern void ioport_unmap(void __iomem *p);
+#endif /* CONFIG_GENERIC_IOMAP */
+
+#define xlate_dev_kmem_ptr(p)	p
+#define xlate_dev_mem_ptr(p)	((void *) (p))
+
+#ifndef virt_to_bus
+static inline unsigned long virt_to_bus(volatile void *address)
+{
+	return ((unsigned long) address);
+}
+
+static inline void *bus_to_virt(unsigned long address)
+{
+	return (void *) address;
+}
+#endif
+
+#define memset_io(a, b, c)	memset(__io_virt(a), (b), (c))
+#define memcpy_fromio(a, b, c)	memcpy((a), __io_virt(b), (c))
+#define memcpy_toio(a, b, c)	memcpy(__io_virt(a), (b), (c))
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_GENERIC_IO_H */
-- 
cgit v1.2.3-70-g09d2


From 5c01b46bb6bb8f2662573c05c87b5d68fa25af89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:36 +0000
Subject: asm-generic: add generic NOMMU versions of some headers

Memory management in generic is highly architecture specific,
but on NOMMU architectures, it is mostly trivial, so just
add a default implementation in asm-generic that applies
to all NOMMU architectures.

The two files cache.h and cacheflush.h can possibly also
be used by architectures that have an MMU but never require
flushing the cache or have cache lines larger than 32 bytes.

Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/cache.h       | 12 +++++
 include/asm-generic/cacheflush.h  | 30 ++++++++++++
 include/asm-generic/mmu.h         | 15 ++++++
 include/asm-generic/mmu_context.h | 45 ++++++++++++++++++
 include/asm-generic/page.h        | 99 +++++++++++++++++++++++++++++++++++++++
 include/asm-generic/pgalloc.h     | 12 +++++
 include/asm-generic/segment.h     |  9 ++++
 include/asm-generic/tlbflush.h    | 18 +++++++
 8 files changed, 240 insertions(+)
 create mode 100644 include/asm-generic/cache.h
 create mode 100644 include/asm-generic/cacheflush.h
 create mode 100644 include/asm-generic/mmu.h
 create mode 100644 include/asm-generic/mmu_context.h
 create mode 100644 include/asm-generic/page.h
 create mode 100644 include/asm-generic/pgalloc.h
 create mode 100644 include/asm-generic/segment.h
 create mode 100644 include/asm-generic/tlbflush.h

(limited to 'include')

diff --git a/include/asm-generic/cache.h b/include/asm-generic/cache.h
new file mode 100644
index 00000000000..1bfcfe5c223
--- /dev/null
+++ b/include/asm-generic/cache.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_CACHE_H
+#define __ASM_GENERIC_CACHE_H
+/*
+ * 32 bytes appears to be the most common cache line size,
+ * so make that the default here. Architectures with larger
+ * cache lines need to provide their own cache.h.
+ */
+
+#define L1_CACHE_SHIFT		5
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#endif /* __ASM_GENERIC_CACHE_H */
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
new file mode 100644
index 00000000000..ba4ec39a113
--- /dev/null
+++ b/include/asm-generic/cacheflush.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_CACHEFLUSH_H
+#define __ASM_CACHEFLUSH_H
+
+/* Keep includes the same across arches.  */
+#include <linux/mm.h>
+
+/*
+ * The cache doesn't need to be flushed when TLB entries change when
+ * the cache is mapped to physical memory, not virtual memory
+ */
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_range(start, end)		do { } while (0)
+#define flush_icache_page(vma,pg)		do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
+
+#endif /* __ASM_CACHEFLUSH_H */
diff --git a/include/asm-generic/mmu.h b/include/asm-generic/mmu.h
new file mode 100644
index 00000000000..4f4aa56d6b5
--- /dev/null
+++ b/include/asm-generic/mmu.h
@@ -0,0 +1,15 @@
+#ifndef __ASM_GENERIC_MMU_H
+#define __ASM_GENERIC_MMU_H
+
+/*
+ * This is the mmu.h header for nommu implementations.
+ * Architectures with an MMU need something more complex.
+ */
+#ifndef __ASSEMBLY__
+typedef struct {
+	struct vm_list_struct	*vmlist;
+	unsigned long		end_brk;
+} mm_context_t;
+#endif
+
+#endif /* __ASM_GENERIC_MMU_H */
diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h
new file mode 100644
index 00000000000..a7eec910ba6
--- /dev/null
+++ b/include/asm-generic/mmu_context.h
@@ -0,0 +1,45 @@
+#ifndef __ASM_GENERIC_MMU_CONTEXT_H
+#define __ASM_GENERIC_MMU_CONTEXT_H
+
+/*
+ * Generic hooks for NOMMU architectures, which do not need to do
+ * anything special here.
+ */
+
+#include <asm-generic/mm_hooks.h>
+
+struct task_struct;
+struct mm_struct;
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+			struct task_struct *tsk)
+{
+}
+
+static inline int init_new_context(struct task_struct *tsk,
+			struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+}
+
+static inline void deactivate_mm(struct task_struct *task,
+			struct mm_struct *mm)
+{
+}
+
+static inline void switch_mm(struct mm_struct *prev,
+			struct mm_struct *next,
+			struct task_struct *tsk)
+{
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+			       struct mm_struct *next_mm)
+{
+}
+
+#endif /* __ASM_GENERIC_MMU_CONTEXT_H */
diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
new file mode 100644
index 00000000000..75fec18cdc5
--- /dev/null
+++ b/include/asm-generic/page.h
@@ -0,0 +1,99 @@
+#ifndef __ASM_GENERIC_PAGE_H
+#define __ASM_GENERIC_PAGE_H
+/*
+ * Generic page.h implementation, for NOMMU architectures.
+ * This provides the dummy definitions for the memory management.
+ */
+
+#ifdef CONFIG_MMU
+#error need to prove a real asm/page.h
+#endif
+
+
+/* PAGE_SHIFT determines the page size */
+
+#define PAGE_SHIFT	12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE	(1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#include <asm/setup.h>
+
+#ifndef __ASSEMBLY__
+
+#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
+#define free_user_page(page, addr)	free_page(addr)
+
+#define clear_page(page)	memset((page), 0, PAGE_SIZE)
+#define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
+
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct {
+	unsigned long pte;
+} pte_t;
+typedef struct {
+	unsigned long pmd[16];
+} pmd_t;
+typedef struct {
+	unsigned long pgd;
+} pgd_t;
+typedef struct {
+	unsigned long pgprot;
+} pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x)	((x).pte)
+#define pmd_val(x)	((&x)->pmd[0])
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) } )
+#define __pmd(x)	((pmd_t) { (x) } )
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+extern unsigned long memory_start;
+extern unsigned long memory_end;
+
+#endif /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_KERNEL_RAM_BASE_ADDRESS
+#define PAGE_OFFSET		(CONFIG_KERNEL_RAM_BASE_ADDRESS)
+#else
+#define PAGE_OFFSET		(0)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
+#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
+
+#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(addr)	(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
+#define page_to_virt(page)	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
+
+#ifndef page_to_phys
+#define page_to_phys(page)      ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+#endif
+
+#define pfn_valid(pfn)		((pfn) < max_mapnr)
+
+#define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
+				((void *)(kaddr) < (void *)memory_end))
+
+#endif /* __ASSEMBLY__ */
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASM_GENERIC_PAGE_H */
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
new file mode 100644
index 00000000000..9e429d08b1f
--- /dev/null
+++ b/include/asm-generic/pgalloc.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_GENERIC_PGALLOC_H
+#define __ASM_GENERIC_PGALLOC_H
+/*
+ * an empty file is enough for a nommu architecture
+ */
+#ifdef CONFIG_MMU
+#error need to implement an architecture specific asm/pgalloc.h
+#endif
+
+#define check_pgt_cache()          do { } while (0)
+
+#endif /* __ASM_GENERIC_PGALLOC_H */
diff --git a/include/asm-generic/segment.h b/include/asm-generic/segment.h
new file mode 100644
index 00000000000..5580eace622
--- /dev/null
+++ b/include/asm-generic/segment.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_GENERIC_SEGMENT_H
+#define __ASM_GENERIC_SEGMENT_H
+/*
+ * Only here because we have some old header files that expect it...
+ *
+ * New architectures probably don't want to have their own version.
+ */
+
+#endif /* __ASM_GENERIC_SEGMENT_H */
diff --git a/include/asm-generic/tlbflush.h b/include/asm-generic/tlbflush.h
new file mode 100644
index 00000000000..c7af037024c
--- /dev/null
+++ b/include/asm-generic/tlbflush.h
@@ -0,0 +1,18 @@
+#ifndef __ASM_GENERIC_TLBFLUSH_H
+#define __ASM_GENERIC_TLBFLUSH_H
+/*
+ * This is a dummy tlbflush implementation that can be used on all
+ * nommu architectures.
+ * If you have an MMU, you need to write your own functions.
+ */
+#ifdef CONFIG_MMU
+#error need to implement an architecture specific asm/tlbflush.h
+#endif
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG();
+}
+
+
+#endif /* __ASM_GENERIC_TLBFLUSH_H */
-- 
cgit v1.2.3-70-g09d2


From eed417ddd52146f446595b5a7d8f21b1814b95b7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:37 +0000
Subject: asm-generic: add a generic uaccess.h

Based on discussions with Michal Simek and code
from m68knommu and h8300, this version of uaccess.h
should be usable by most architectures, by overriding
some parts of it.

Simple NOMMU architectures can use it out of
the box, but a minimal __access_ok() should be
added there as well.

Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/uaccess.h | 325 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 325 insertions(+)
 create mode 100644 include/asm-generic/uaccess.h

(limited to 'include')

diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
new file mode 100644
index 00000000000..6d8cab22e29
--- /dev/null
+++ b/include/asm-generic/uaccess.h
@@ -0,0 +1,325 @@
+#ifndef __ASM_GENERIC_UACCESS_H
+#define __ASM_GENERIC_UACCESS_H
+
+/*
+ * User space memory access functions, these should work
+ * on a ny machine that has kernel and user data in the same
+ * address space, e.g. all NOMMU machines.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+
+#include <asm/segment.h>
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+#ifndef KERNEL_DS
+#define KERNEL_DS	MAKE_MM_SEG(~0UL)
+#endif
+
+#ifndef USER_DS
+#define USER_DS		MAKE_MM_SEG(TASK_SIZE - 1)
+#endif
+
+#ifndef get_fs
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current_thread_info()->addr_limit)
+
+static inline void set_fs(mm_segment_t fs)
+{
+	current_thread_info()->addr_limit = fs;
+}
+#endif
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#define VERIFY_READ	0
+#define VERIFY_WRITE	1
+
+#define access_ok(type, addr, size) __access_ok((unsigned long)(addr),(size))
+
+/*
+ * The architecture should really override this if possible, at least
+ * doing a check on the get_fs()
+ */
+#ifndef __access_ok
+static inline int __access_ok(unsigned long addr, unsigned long size)
+{
+	return 1;
+}
+#endif
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern unsigned long search_exception_table(unsigned long);
+
+/*
+ * architectures with an MMU should override these two
+ */
+#ifndef __copy_from_user
+static inline __must_check long __copy_from_user(void *to,
+		const void __user * from, unsigned long n)
+{
+	if (__builtin_constant_p(n)) {
+		switch(n) {
+		case 1:
+			*(u8 *)to = *(u8 __force *)from;
+			return 0;
+		case 2:
+			*(u16 *)to = *(u16 __force *)from;
+			return 0;
+		case 4:
+			*(u32 *)to = *(u32 __force *)from;
+			return 0;
+#ifdef CONFIG_64BIT
+		case 8:
+			*(u64 *)to = *(u64 __force *)from;
+			return 0;
+#endif
+		default:
+			break;
+		}
+	}
+
+	memcpy(to, (const void __force *)from, n);
+	return 0;
+}
+#endif
+
+#ifndef __copy_to_user
+static inline __must_check long __copy_to_user(void __user *to,
+		const void *from, unsigned long n)
+{
+	if (__builtin_constant_p(n)) {
+		switch(n) {
+		case 1:
+			*(u8 __force *)to = *(u8 *)from;
+			return 0;
+		case 2:
+			*(u16 __force *)to = *(u16 *)from;
+			return 0;
+		case 4:
+			*(u32 __force *)to = *(u32 *)from;
+			return 0;
+#ifdef CONFIG_64BIT
+		case 8:
+			*(u64 __force *)to = *(u64 *)from;
+			return 0;
+#endif
+		default:
+			break;
+		}
+	}
+
+	memcpy((void __force *)to, from, n);
+	return 0;
+}
+#endif
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ * This version just falls back to copy_{from,to}_user, which should
+ * provide a fast-path for small values.
+ */
+#define __put_user(x, ptr) \
+({								\
+	__typeof__(*(ptr)) __x = (x);				\
+	int __pu_err = -EFAULT;					\
+        __chk_user_ptr(ptr);                                    \
+	switch (sizeof (*(ptr))) {				\
+	case 1:							\
+	case 2:							\
+	case 4:							\
+	case 8:							\
+		__pu_err = __put_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		break;						\
+	default:						\
+		__put_user_bad();				\
+		break;						\
+	 }							\
+	__pu_err;						\
+})
+
+#define put_user(x, ptr)					\
+({								\
+	might_sleep();						\
+	__access_ok(ptr, sizeof (*ptr)) ?			\
+		__put_user(x, ptr) :				\
+		-EFAULT;					\
+})
+
+static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
+{
+	size = __copy_to_user(ptr, x, size);
+	return size ? -EFAULT : size;
+}
+
+extern int __put_user_bad(void) __attribute__((noreturn));
+
+#define __get_user(x, ptr)					\
+({								\
+	int __gu_err = -EFAULT;					\
+	__chk_user_ptr(ptr);					\
+	switch (sizeof(*(ptr))) {				\
+	case 1: {						\
+		unsigned char __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 2: {						\
+		unsigned short __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 4: {						\
+		unsigned int __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 8: {						\
+		unsigned long long __x;				\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	default:						\
+		__get_user_bad();				\
+		break;						\
+	}							\
+	__gu_err;						\
+})
+
+#define get_user(x, ptr)					\
+({								\
+	might_sleep();						\
+	__access_ok(ptr, sizeof (*ptr)) ?			\
+		__get_user(x, ptr) :				\
+		-EFAULT;					\
+})
+
+static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
+{
+	size = __copy_from_user(x, ptr, size);
+	return size ? -EFAULT : size;
+}
+
+extern int __get_user_bad(void) __attribute__((noreturn));
+
+#ifndef __copy_from_user_inatomic
+#define __copy_from_user_inatomic __copy_from_user
+#endif
+
+#ifndef __copy_to_user_inatomic
+#define __copy_to_user_inatomic __copy_to_user
+#endif
+
+static inline long copy_from_user(void *to,
+		const void __user * from, unsigned long n)
+{
+	might_sleep();
+	if (__access_ok(from, n))
+		return __copy_from_user(to, from, n);
+	else
+		return n;
+}
+
+static inline long copy_to_user(void __user *to,
+		const void *from, unsigned long n)
+{
+	might_sleep();
+	if (__access_ok(to, n))
+		return __copy_to_user(to, from, n);
+	else
+		return n;
+}
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+#ifndef __strncpy_from_user
+static inline long
+__strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	char *tmp;
+	strncpy(dst, (const char __force *)src, count);
+	for (tmp = dst; *tmp && count > 0; tmp++, count--)
+		;
+	return (tmp - dst);
+}
+#endif
+
+static inline long
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	if (!__access_ok(src, 1))
+		return -EFAULT;
+	return __strncpy_from_user(dst, src, count);
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+#ifndef strnlen_user
+static inline long strnlen_user(const char __user *src, long n)
+{
+	return strlen((void * __force)src) + 1;
+}
+#endif
+
+static inline long strlen_user(const char __user *src)
+{
+	return strnlen_user(src, 32767);
+}
+
+/*
+ * Zero Userspace
+ */
+#ifndef __clear_user
+static inline __must_check unsigned long
+__clear_user(void __user *to, unsigned long n)
+{
+	memset((void __force *)to, 0, n);
+	return 0;
+}
+#endif
+
+static inline __must_check unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+	might_sleep();
+	if (!__access_ok(to, n))
+		return n;
+
+	return __clear_user(to, n);
+}
+
+#endif /* __ASM_GENERIC_UACCESS_H */
-- 
cgit v1.2.3-70-g09d2


From 26a28fa4fea5b8c65713aa50c124f76a88c7924d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 May 2009 22:56:38 +0000
Subject: add generic lib/checksum.c

Add a generic (unoptimized) implementation of checksum.c in pure C
for use by all architectures that cannot be bother with implementing
their own version.

Based on microblaze code by Michal Simek <monstr@monstr.eu>

Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Remis Lima Baima <remis.developer@googlemail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/checksum.h |  79 +++++++++++++++++
 lib/Makefile                   |   2 +
 lib/checksum.c                 | 193 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 274 insertions(+)
 create mode 100644 include/asm-generic/checksum.h
 create mode 100644 lib/checksum.c

(limited to 'include')

diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
new file mode 100644
index 00000000000..4647c762d97
--- /dev/null
+++ b/include/asm-generic/checksum.h
@@ -0,0 +1,79 @@
+#ifndef __ASM_GENERIC_CHECKSUM_H
+#define __ASM_GENERIC_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum);
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					int len, __wsum sum, int *csum_err);
+
+#define csum_partial_copy_nocheck(src, dst, len, sum)	\
+	csum_partial_copy((src), (dst), (len), (sum))
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+	u32 sum = (__force u32)csum;
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __sum16)~sum;
+}
+
+#ifndef csum_tcpudp_nofold
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+extern __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+		unsigned short proto, __wsum sum);
+#endif
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
+		  unsigned short proto, __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif /* __ASM_GENERIC_CHECKSUM_H */
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e40e3e..b6869e12e74 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -92,6 +92,8 @@ obj-$(CONFIG_NLATTR) += nlattr.o
 
 obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
+obj-$(CONFIG_GENERIC_CSUM) += checksum.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/checksum.c b/lib/checksum.c
new file mode 100644
index 00000000000..12e5a1c91cd
--- /dev/null
+++ b/lib/checksum.c
@@ -0,0 +1,193 @@
+/*
+ *
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Tom May, <ftom@netcom.com>
+ *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ *
+ * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
+ *		Fixed some nasty bugs, causing some horrible crashes.
+ *		A: At some points, the sum (%0) was used as
+ *		length-counter instead of the length counter
+ *		(%1). Thanks to Roman Hodek for pointing this out.
+ *		B: GCC seems to mess up if one uses too many
+ *		data-registers to hold input values and one tries to
+ *		specify d0 and d1 as scratch registers. Letting gcc
+ *		choose these registers itself solves the problem.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
+ kills, so most of the assembly has to go. */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short from32to16(unsigned long x)
+{
+	/* add up 16-bit and 16-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = *buff;
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+			unsigned long carry = 0;
+			do {
+				unsigned long w = *(unsigned long *) buff;
+				count--;
+				buff += 4;
+				result += carry;
+				result += w;
+				carry = (w > result);
+			} while (count);
+			result += carry;
+			result = (result & 0xffff) + (result >> 16);
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += (*buff << 8);
+	result = from32to16(result);
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+	return result;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return (__force __sum16)~do_csum(iph, ihl*4);
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
+{
+	unsigned int sum = (__force unsigned int)wsum;
+	unsigned int result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += sum;
+	if (sum > result)
+		result += 1;
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+	return (__force __sum16)~do_csum(buff, len);
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+						__wsum sum, int *csum_err)
+{
+	int missing;
+
+	missing = __copy_from_user(dst, src, len);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*csum_err = -EFAULT;
+	} else
+		*csum_err = 0;
+
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
+
+#ifndef csum_tcpudp_nofold
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+			unsigned short len,
+			unsigned short proto,
+			__wsum sum)
+{
+	unsigned long long s = (__force u32)sum;
+
+	s += (__force u32)saddr;
+	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
+	s += (s >> 32);
+	return (__force __wsum)s;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+#endif
-- 
cgit v1.2.3-70-g09d2


From 04846b5b8112e53b588038349b3e92b8485c1807 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Mon, 20 Apr 2009 10:54:52 +0900
Subject: PCI MSI: Remove unused/obsolete macros and definitions

Impact: cleanup, spec compliance

This patch does:

 - Remove unused msi/msix_enable/disable macros.
   User should use msi/msix_set_enable() functions instead.

 - Remove unused msix_mask/unmask/pending macros.
   These macros are useless because they are not based on any of
   the PCI Local Bus Specifications properly.
   It seems that they were written based on a draft of PCI spec,
   and that the draft was the MSI-X ECN that underwent membership
   review in September 2002.
   (* In the draft, the size of a entry in MSI-X table was 64bit,
      containing 32bit message data and DWORD aligned lower address
      plus a pending bit and a mask bit.(30+1+1bit)  The higher
      address was placed in MSI-X capability structure and shared
      by all entries.)

 - Remove PCI_MSIX_FLAGS_BITMASK.
   This definition also come from the draft ECN.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/msi.h        | 8 +-------
 include/linux/pci_regs.h | 1 -
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 71f4df2ef65..4fed5926195 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -19,18 +19,12 @@
 	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
 #define msi_mask_bits_reg(base, is64bit) \
 	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
-#define msi_disable(control)		control &= ~PCI_MSI_FLAGS_ENABLE
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 
 #define msix_table_offset_reg(base)	(base + 0x04)
 #define msix_pba_offset_reg(base)	(base + 0x08)
-#define msix_enable(control)	 	control |= PCI_MSIX_FLAGS_ENABLE
-#define msix_disable(control)	 	control &= ~PCI_MSIX_FLAGS_ENABLE
 #define msix_table_size(control) 	((control & PCI_MSIX_FLAGS_QSIZE)+1)
-#define multi_msix_capable		msix_table_size
-#define msix_unmask(address)	 	(address & ~PCI_MSIX_FLAGS_BITMASK)
-#define msix_mask(address)		(address | PCI_MSIX_FLAGS_BITMASK)
-#define msix_is_pending(address) 	(address & PCI_MSIX_FLAGS_PENDMASK)
+#define multi_msix_capable(control)	msix_table_size((control))
 
 #endif /* MSI_H */
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 616bf8b3c8b..dcba7668e0c 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -304,7 +304,6 @@
 #define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
 #define  PCI_MSIX_FLAGS_MASKALL	(1 << 14)
 #define PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
-#define PCI_MSIX_FLAGS_BITMASK	(1 << 0)
 
 /* CompactPCI Hotswap Register */
 
-- 
cgit v1.2.3-70-g09d2


From 67b5db6502ddd27d65dea43bf036abbd82d0dfc9 Mon Sep 17 00:00:00 2001
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Date: Mon, 20 Apr 2009 10:54:59 +0900
Subject: PCI MSI: Define PCI_MSI_MASK_32/64

Impact: cleanup, improve readability

Define PCI_MSI_MASK_32/64 for 32/64bit devices, instead of using
implicit offset (-4), "PCI_MSI_MASK_BIT - 4" and "PCI_MSI_MASK_BIT".

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/msi.c        | 2 +-
 drivers/pci/msi.h        | 6 +++---
 include/linux/pci_regs.h | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 362773247fb..7ffac27d5d4 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -381,7 +381,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.pos = pos;
 
-	entry->mask_pos = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
+	entry->mask_pos = msi_mask_reg(pos, entry->msi_attrib.is_64);
 	/* All MSIs are unmasked by default, Mask them all */
 	if (entry->msi_attrib.maskbit)
 		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 4fed5926195..a0662842550 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -16,9 +16,9 @@
 #define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
 #define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
 #define msi_data_reg(base, is64bit)	\
-	( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
-#define msi_mask_bits_reg(base, is64bit) \
-	( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
+	(base + ((is64bit == 1) ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32))
+#define msi_mask_reg(base, is64bit)	\
+	(base + ((is64bit == 1) ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32))
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index dcba7668e0c..83b02f5a25b 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -295,8 +295,9 @@
 #define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
 #define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
 #define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
+#define PCI_MSI_MASK_32		12	/* Mask bits register for 32-bit devices */
 #define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
-#define PCI_MSI_MASK_BIT	16	/* Mask bits register */
+#define PCI_MSI_MASK_64		16	/* Mask bits register for 64-bit devices */
 
 /* MSI-X registers (these are at offset PCI_MSIX_FLAGS) */
 #define PCI_MSIX_FLAGS		2
-- 
cgit v1.2.3-70-g09d2


From 1f82de10d6b1d845155363c895c552e61b36b51a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 23 Apr 2009 20:48:32 -0700
Subject: PCI/x86: don't assume prefetchable ranges are 64bit

We should not assign 64bit ranges to PCI devices that only take 32bit
prefetchable addresses.

Try to set IORESOURCE_MEM_64 in 64bit resource of pci_device/pci_bridge
and make the bus resource only have that bit set when all devices under
it support 64bit prefetchable memory.  Use that flag to allocate
resources from that range.

Reported-by: Yannick <yannick.roehlly@free.fr>
Reviewed-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/include/asm/pci.h |  1 +
 drivers/pci/bus.c          |  7 ++++++-
 drivers/pci/probe.c        |  9 ++++++--
 drivers/pci/setup-bus.c    | 52 +++++++++++++++++++++++++++++++++++++---------
 include/linux/ioport.h     |  2 ++
 include/linux/pci.h        |  4 ++++
 6 files changed, 62 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index b51a1e8b0ba..927958d13c1 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void);
 
 /* generic pci stuff */
 #include <asm-generic/pci.h>
+#define PCIBIOS_MAX_MEM_32 0xffffffff
 
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 97a8194063b..40af27f3104 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -41,9 +41,14 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 		void *alignf_data)
 {
 	int i, ret = -ENOMEM;
+	resource_size_t max = -1;
 
 	type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
 
+	/* don't allocate too high if the pref mem doesn't support 64bit*/
+	if (!(res->flags & IORESOURCE_MEM_64))
+		max = PCIBIOS_MAX_MEM_32;
+
 	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
 		struct resource *r = bus->resource[i];
 		if (!r)
@@ -62,7 +67,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 		/* Ok, try it out.. */
 		ret = allocate_resource(r, res, size,
 					r->start ? : min,
-					-1, align,
+					max, align,
 					alignf, alignf_data);
 		if (ret == 0)
 			break;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f1ae2475fff..b962326e3d9 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -193,7 +193,7 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 		res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN;
 		if (type == pci_bar_io) {
 			l &= PCI_BASE_ADDRESS_IO_MASK;
-			mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff;
+			mask = PCI_BASE_ADDRESS_IO_MASK & IO_SPACE_LIMIT;
 		} else {
 			l &= PCI_BASE_ADDRESS_MEM_MASK;
 			mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
@@ -237,6 +237,8 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
 			dev_printk(KERN_DEBUG, &dev->dev,
 				"reg %x 64bit mmio: %pR\n", pos, res);
 		}
+
+		res->flags |= IORESOURCE_MEM_64;
 	} else {
 		sz = pci_size(l, sz, mask);
 
@@ -362,7 +364,10 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		}
 	}
 	if (base <= limit) {
-		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) |
+					 IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		if (res->flags & PCI_PREF_RANGE_TYPE_64)
+			res->flags |= IORESOURCE_MEM_64;
 		res->start = base;
 		res->end = limit + 0xfffff;
 		dev_printk(KERN_DEBUG, &dev->dev, "bridge %sbit mmio pref: %pR\n",
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index a00f85471b6..e1c360a5b0d 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -143,6 +143,7 @@ static void pci_setup_bridge(struct pci_bus *bus)
 	struct pci_dev *bridge = bus->self;
 	struct pci_bus_region region;
 	u32 l, bu, lu, io_upper16;
+	int pref_mem64;
 
 	if (pci_is_enabled(bridge))
 		return;
@@ -198,16 +199,22 @@ static void pci_setup_bridge(struct pci_bus *bus)
 	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
 
 	/* Set up PREF base/limit. */
+	pref_mem64 = 0;
 	bu = lu = 0;
 	pcibios_resource_to_bus(bridge, &region, bus->resource[2]);
 	if (bus->resource[2]->flags & IORESOURCE_PREFETCH) {
+		int width = 8;
 		l = (region.start >> 16) & 0xfff0;
 		l |= region.end & 0xfff00000;
-		bu = upper_32_bits(region.start);
-		lu = upper_32_bits(region.end);
-		dev_info(&bridge->dev, "  PREFETCH window: %#016llx-%#016llx\n",
-		    (unsigned long long)region.start,
-		    (unsigned long long)region.end);
+		if (bus->resource[2]->flags & IORESOURCE_MEM_64) {
+			pref_mem64 = 1;
+			bu = upper_32_bits(region.start);
+			lu = upper_32_bits(region.end);
+			width = 16;
+		}
+		dev_info(&bridge->dev, "  PREFETCH window: %#0*llx-%#0*llx\n",
+				width, (unsigned long long)region.start,
+				width, (unsigned long long)region.end);
 	}
 	else {
 		l = 0x0000fff0;
@@ -215,9 +222,11 @@ static void pci_setup_bridge(struct pci_bus *bus)
 	}
 	pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
 
-	/* Set the upper 32 bits of PREF base & limit. */
-	pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
-	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+	if (pref_mem64) {
+		/* Set the upper 32 bits of PREF base & limit. */
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
+		pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+	}
 
 	pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
 }
@@ -255,8 +264,25 @@ static void pci_bridge_check_ranges(struct pci_bus *bus)
 		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
 		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
 	}
-	if (pmem)
+	if (pmem) {
 		b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
+		if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64)
+			b_res[2].flags |= IORESOURCE_MEM_64;
+	}
+
+	/* double check if bridge does support 64 bit pref */
+	if (b_res[2].flags & IORESOURCE_MEM_64) {
+		u32 mem_base_hi, tmp;
+		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32,
+					 &mem_base_hi);
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
+					       0xffffffff);
+		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp);
+		if (!tmp)
+			b_res[2].flags &= ~IORESOURCE_MEM_64;
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
+				       mem_base_hi);
+	}
 }
 
 /* Helper function for sizing routines: find first available
@@ -336,6 +362,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 	resource_size_t aligns[12];	/* Alignments from 1Mb to 2Gb */
 	int order, max_order;
 	struct resource *b_res = find_free_bus_resource(bus, type);
+	unsigned int mem64_mask = 0;
 
 	if (!b_res)
 		return 0;
@@ -344,9 +371,12 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 	max_order = 0;
 	size = 0;
 
+	mem64_mask = b_res->flags & IORESOURCE_MEM_64;
+	b_res->flags &= ~IORESOURCE_MEM_64;
+
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		int i;
-		
+
 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
 			struct resource *r = &dev->resource[i];
 			resource_size_t r_size;
@@ -372,6 +402,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 				aligns[order] += align;
 			if (order > max_order)
 				max_order = order;
+			mem64_mask &= r->flags & IORESOURCE_MEM_64;
 		}
 	}
 
@@ -396,6 +427,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 	b_res->start = min_align;
 	b_res->end = size + min_align - 1;
 	b_res->flags |= IORESOURCE_STARTALIGN;
+	b_res->flags |= mem64_mask;
 	return 1;
 }
 
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 32e4b2f7229..786e7b8cece 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -49,6 +49,8 @@ struct resource_list {
 #define IORESOURCE_SIZEALIGN	0x00020000	/* size indicates alignment */
 #define IORESOURCE_STARTALIGN	0x00040000	/* start field is alignment */
 
+#define IORESOURCE_MEM_64	0x00100000
+
 #define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
 #define IORESOURCE_DISABLED	0x10000000
 #define IORESOURCE_UNSET	0x20000000
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 72698d89e76..6dfa47d25ba 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1097,6 +1097,10 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 
 #include <asm/pci.h>
 
+#ifndef PCIBIOS_MAX_MEM_32
+#define PCIBIOS_MAX_MEM_32 (-1)
+#endif
+
 /* these helpers provide future and backwards compatibility
  * for accessing popular PCI BAR info */
 #define pci_resource_start(dev, bar)	((dev)->resource[(bar)].start)
-- 
cgit v1.2.3-70-g09d2


From 3b073eda9557975a87a27b08a46a545fe8da66fb Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Tue, 31 Mar 2009 09:24:22 -0600
Subject: PCI: remove deprecated pci_find_slot() interface

The last in-tree caller of pci_find_slot has been converted, so
let's get rid of this deprecated interface.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/search.c | 30 ------------------------------
 include/linux/pci.h  |  8 --------
 2 files changed, 38 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 710d4ea6956..650bc0a538d 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -114,36 +114,6 @@ pci_find_next_bus(const struct pci_bus *from)
 }
 
 #ifdef CONFIG_PCI_LEGACY
-/**
- * pci_find_slot - locate PCI device from a given PCI slot
- * @bus: number of PCI bus on which desired PCI device resides
- * @devfn: encodes number of PCI slot in which the desired PCI
- * device resides and the logical device number within that slot
- * in case of multi-function devices.
- *
- * Given a PCI bus and slot/function number, the desired PCI device
- * is located in system global list of PCI devices.  If the device
- * is found, a pointer to its data structure is returned.  If no
- * device is found, %NULL is returned.
- *
- * NOTE: Do not use this function any more; use pci_get_slot() instead, as
- * the PCI device returned by this function can disappear at any moment in
- * time.
- */
-struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
-{
-	struct pci_dev *dev = NULL;
-
-	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
-		if (dev->bus->number == bus && dev->devfn == devfn) {
-			pci_dev_put(dev);
-			return dev;
-		}
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(pci_find_slot);
-
 /**
  * pci_find_device - begin or continue searching for a PCI device by vendor/device id
  * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6dfa47d25ba..19ee92c53ef 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -599,8 +599,6 @@ extern void pci_sort_breadthfirst(void);
 struct pci_dev __deprecated *pci_find_device(unsigned int vendor,
 					     unsigned int device,
 					     struct pci_dev *from);
-struct pci_dev __deprecated *pci_find_slot(unsigned int bus,
-					   unsigned int devfn);
 #endif /* CONFIG_PCI_LEGACY */
 
 enum pci_lost_interrupt_reason {
@@ -936,12 +934,6 @@ static inline struct pci_dev *pci_find_device(unsigned int vendor,
 	return NULL;
 }
 
-static inline struct pci_dev *pci_find_slot(unsigned int bus,
-					    unsigned int devfn)
-{
-	return NULL;
-}
-
 static inline struct pci_dev *pci_get_device(unsigned int vendor,
 					     unsigned int device,
 					     struct pci_dev *from)
-- 
cgit v1.2.3-70-g09d2


From 43c16408842b0eeb367c23a6fa540ce69f99e347 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Wed, 22 Apr 2009 16:52:09 -0600
Subject: PCI: Add support for turning PCIe ECRC on or off

Adds support for PCI Express transaction layer end-to-end CRC checking
(ECRC).  This patch will enable/disable ECRC checking by setting/clearing
the ECRC Check Enable and/or ECRC Generation Enable bits for devices that
support ECRC.

The ECRC setting is controlled by the "pci=ecrc=<policy>" command-line
option. If this option is not set or is set to 'bios", the enable and
generation bits are left in whatever state that firmware/BIOS set them to.
The "off" setting turns them off, and the "on" option turns them on (if the
device supports it).

Turning ECRC on or off can be a data integrity versus performance
tradeoff.  In theory, turning it on will catch more data errors, turning
it off means possibly better performance since CRC does not need to be
calculated by the PCIe hardware and packet sizes are reduced.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt |   6 ++
 drivers/pci/pci.c                   |   2 +
 drivers/pci/pcie/aer/Kconfig        |  13 ++++
 drivers/pci/pcie/aer/Makefile       |   2 +
 drivers/pci/pcie/aer/aerdrv_core.c  |  16 +++--
 drivers/pci/pcie/aer/ecrc.c         | 131 ++++++++++++++++++++++++++++++++++++
 include/linux/pci.h                 |  11 +++
 7 files changed, 174 insertions(+), 7 deletions(-)
 create mode 100644 drivers/pci/pcie/aer/ecrc.c

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7bdaf508040..395d1a013eb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1824,6 +1824,12 @@ and is between 256 and 4096 characters. It is defined in the file
 				PAGE_SIZE is used as alignment.
 				PCI-PCI bridge can be specified, if resource
 				windows need to be expanded.
+		ecrc=		Enable/disable PCIe ECRC (transaction layer
+				end-to-end CRC checking).
+				bios: Use BIOS/firmware settings. This is the
+				the default.
+				off: Turn ECRC off
+				on: Turn ECRC on.
 
 	pcie_aspm=	[PCIE] Forcibly enable or disable PCIe Active State Power
 			Management.
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 761557688b1..56fb18d2cb5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2588,6 +2588,8 @@ static int __init pci_setup(char *str)
 			} else if (!strncmp(str, "resource_alignment=", 19)) {
 				pci_set_resource_alignment_param(str + 19,
 							strlen(str + 19));
+			} else if (!strncmp(str, "ecrc=", 5)) {
+				pcie_ecrc_get_policy(str + 5);
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig
index c3bde588aa1..db4cb950933 100644
--- a/drivers/pci/pcie/aer/Kconfig
+++ b/drivers/pci/pcie/aer/Kconfig
@@ -10,3 +10,16 @@ config PCIEAER
 	  This enables PCI Express Root Port Advanced Error Reporting
 	  (AER) driver support. Error reporting messages sent to Root
 	  Port will be handled by PCI Express AER driver.
+
+
+#
+# PCI Express ECRC
+#
+config PCIE_ECRC
+	bool "PCI Express ECRC settings control"
+	depends on PCIEAER
+	help
+	  Used to override firmware/bios settings for PCI Express ECRC
+	  (transaction layer end-to-end CRC checking).
+
+	  When in doubt, say N.
diff --git a/drivers/pci/pcie/aer/Makefile b/drivers/pci/pcie/aer/Makefile
index 8da3bd8455a..7f93411c56e 100644
--- a/drivers/pci/pcie/aer/Makefile
+++ b/drivers/pci/pcie/aer/Makefile
@@ -4,6 +4,8 @@
 
 obj-$(CONFIG_PCIEAER) += aerdriver.o
 
+obj-$(CONFIG_PCIE_ECRC)	+= ecrc.o
+
 aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o
 aerdriver-$(CONFIG_ACPI) += aerdrv_acpi.o
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 307452f3003..dd3829e68e3 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -113,15 +113,17 @@ static void set_device_error_reporting(struct pci_dev *dev, void *data)
 {
 	bool enable = *((bool *)data);
 
-	if (dev->pcie_type != PCIE_RC_PORT &&
-	    dev->pcie_type != PCIE_SW_UPSTREAM_PORT &&
-	    dev->pcie_type != PCIE_SW_DOWNSTREAM_PORT)
-		return;
+	if (dev->pcie_type == PCIE_RC_PORT ||
+	    dev->pcie_type == PCIE_SW_UPSTREAM_PORT ||
+	    dev->pcie_type == PCIE_SW_DOWNSTREAM_PORT) {
+		if (enable)
+			pci_enable_pcie_error_reporting(dev);
+		else
+			pci_disable_pcie_error_reporting(dev);
+	}
 
 	if (enable)
-		pci_enable_pcie_error_reporting(dev);
-	else
-		pci_disable_pcie_error_reporting(dev);
+		pcie_set_ecrc_checking(dev);
 }
 
 /**
diff --git a/drivers/pci/pcie/aer/ecrc.c b/drivers/pci/pcie/aer/ecrc.c
new file mode 100644
index 00000000000..ece97df4df6
--- /dev/null
+++ b/drivers/pci/pcie/aer/ecrc.c
@@ -0,0 +1,131 @@
+/*
+ *    Enables/disables PCIe ECRC checking.
+ *
+ *    (C) Copyright 2009 Hewlett-Packard Development Company, L.P.
+ *    Andrew Patterson <andrew.patterson@hp.com>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; version 2 of the License.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ *    General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ *    02111-1307, USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <linux/errno.h>
+#include "../../pci.h"
+
+#define ECRC_POLICY_DEFAULT 0		/* ECRC set by BIOS */
+#define ECRC_POLICY_OFF     1		/* ECRC off for performance */
+#define ECRC_POLICY_ON      2		/* ECRC on for data integrity */
+
+static int ecrc_policy = ECRC_POLICY_DEFAULT;
+
+static const char *ecrc_policy_str[] = {
+	[ECRC_POLICY_DEFAULT] = "bios",
+	[ECRC_POLICY_OFF] = "off",
+	[ECRC_POLICY_ON] = "on"
+};
+
+/**
+ * enable_ercr_checking - enable PCIe ECRC checking for a device
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+static int enable_ecrc_checking(struct pci_dev *dev)
+{
+	int pos;
+	u32 reg32;
+
+	if (!dev->is_pcie)
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return -ENODEV;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+	if (reg32 & PCI_ERR_CAP_ECRC_GENC)
+		reg32 |= PCI_ERR_CAP_ECRC_GENE;
+	if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
+		reg32 |= PCI_ERR_CAP_ECRC_CHKE;
+	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+	return 0;
+}
+
+/**
+ * disable_ercr_checking - disables PCIe ECRC checking for a device
+ * @dev: the PCI device
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+static int disable_ecrc_checking(struct pci_dev *dev)
+{
+	int pos;
+	u32 reg32;
+
+	if (!dev->is_pcie)
+		return -ENODEV;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return -ENODEV;
+
+	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+	reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+	return 0;
+}
+
+/**
+ * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy
+ * @dev: the PCI device
+ */
+void pcie_set_ecrc_checking(struct pci_dev *dev)
+{
+	switch (ecrc_policy) {
+	case ECRC_POLICY_DEFAULT:
+		return;
+	case ECRC_POLICY_OFF:
+		disable_ecrc_checking(dev);
+		break;
+	case ECRC_POLICY_ON:
+		enable_ecrc_checking(dev);;
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * pcie_ecrc_get_policy - parse kernel command-line ecrc option
+ */
+void pcie_ecrc_get_policy(char *str)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++)
+		if (!strncmp(str, ecrc_policy_str[i],
+			     strlen(ecrc_policy_str[i])))
+			break;
+	if (i >= ARRAY_SIZE(ecrc_policy_str))
+		return;
+
+	ecrc_policy = i;
+}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 19ee92c53ef..ec03b90d351 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -878,6 +878,17 @@ static inline int pcie_aspm_enabled(void)
 extern int pcie_aspm_enabled(void);
 #endif
 
+#ifndef CONFIG_PCIE_ECRC
+static inline void pcie_set_ecrc_checking(struct pci_dev *dev)
+{
+	return;
+}
+static inline void pcie_ecrc_get_policy(char *str) {};
+#else
+extern void pcie_set_ecrc_checking(struct pci_dev *dev);
+extern void pcie_ecrc_get_policy(char *str);
+#endif
+
 #define pci_enable_msi(pdev)	pci_enable_msi_block(pdev, 1)
 
 #ifdef CONFIG_HT_IRQ
-- 
cgit v1.2.3-70-g09d2


From 73422811d290c628b4ddbf6830e5cd6fa42e84f1 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sun, 10 May 2009 16:05:39 -0400
Subject: reiserfs: allow exposing privroot w/ xattrs enabled

This patch adds an -oexpose_privroot option to allow access to the privroot.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/dir.c              | 10 ++++------
 fs/reiserfs/super.c            |  1 +
 fs/reiserfs/xattr.c            |  3 ++-
 include/linux/reiserfs_fs_sb.h |  2 ++
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 45ee3d357c7..6d2668fdc38 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 static inline bool is_privroot_deh(struct dentry *dir,
 				   struct reiserfs_de_head *deh)
 {
-	int ret = 0;
-#ifdef CONFIG_REISERFS_FS_XATTR
 	struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
-	ret = (dir == dir->d_parent && privroot->d_inode &&
-	       deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
-#endif
-	return ret;
+	if (reiserfs_expose_privroot(dir->d_sb))
+		return 0;
+	return (dir == dir->d_parent && privroot->d_inode &&
+	        deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
 }
 
 int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3567fb9e3fb..9dbdcfb5d31 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -898,6 +898,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 		{"conv",.setmask = 1 << REISERFS_CONVERT},
 		{"attrs",.setmask = 1 << REISERFS_ATTRS},
 		{"noattrs",.clrmask = 1 << REISERFS_ATTRS},
+		{"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT},
 #ifdef CONFIG_REISERFS_FS_XATTR
 		{"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
 		{"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8e7deb0e696..f3d47d85684 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s)
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		s->s_root->d_op = &xattr_lookup_poison_ops;
+		if (!reiserfs_expose_privroot(s))
+			s->s_root->d_op = &xattr_lookup_poison_ops;
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 6473650c28f..dab68bbed67 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -453,6 +453,7 @@ enum reiserfs_mount_options {
 	REISERFS_ATTRS,
 	REISERFS_XATTRS_USER,
 	REISERFS_POSIXACL,
+	REISERFS_EXPOSE_PRIVROOT,
 	REISERFS_BARRIER_NONE,
 	REISERFS_BARRIER_FLUSH,
 
@@ -490,6 +491,7 @@ enum reiserfs_mount_options {
 #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK))
 #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER))
 #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL))
+#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT))
 #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s))
 #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE))
 #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH))
-- 
cgit v1.2.3-70-g09d2


From 2a737871108de9ba8930f7650d549f1383767f8b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 11:49:53 -0400
Subject: Cache root in nameidata

New field: nd->root.  When pathname resolution wants to know the root,
check if nd->root.mnt is non-NULL; use nd->root if it is, otherwise
copy current->fs->root there.  After path_walk() is finished, we check
if we'd got a cached value in nd->root and drop it.  Before calling
path_walk() we should either set nd->root.mnt to NULL *or* copy (and
pin down) some path to nd->root.  In the latter case we won't be
looking at current->fs->root at all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 53 +++++++++++++++++++++++++++++++++------------------
 include/linux/namei.h |  1 +
 2 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 895733efc6b..88baaf2b916 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd
 	return result;
 }
 
+static __always_inline void set_root(struct nameidata *nd)
+{
+	if (!nd->root.mnt) {
+		struct fs_struct *fs = current->fs;
+		read_lock(&fs->lock);
+		nd->root = fs->root;
+		path_get(&nd->root);
+		read_unlock(&fs->lock);
+	}
+}
+
 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
 	int res = 0;
@@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 		goto fail;
 
 	if (*link == '/') {
-		struct fs_struct *fs = current->fs;
-
+		set_root(nd);
 		path_put(&nd->path);
-
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	}
 
 	res = link_path_walk(link, nd);
@@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 
 static __always_inline void follow_dotdot(struct nameidata *nd)
 {
-	struct fs_struct *fs = current->fs;
+	set_root(nd);
 
 	while(1) {
 		struct vfsmount *parent;
 		struct dentry *old = nd->path.dentry;
 
-                read_lock(&fs->lock);
-		if (nd->path.dentry == fs->root.dentry &&
-		    nd->path.mnt == fs->root.mnt) {
-                        read_unlock(&fs->lock);
+		if (nd->path.dentry == nd->root.dentry &&
+		    nd->path.mnt == nd->root.mnt) {
 			break;
 		}
-                read_unlock(&fs->lock);
 		spin_lock(&dcache_lock);
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			nd->path.dentry = dget(nd->path.dentry->d_parent);
@@ -1022,18 +1026,18 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
 	int retval = 0;
 	int fput_needed;
 	struct file *file;
-	struct fs_struct *fs = current->fs;
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags;
 	nd->depth = 0;
+	nd->root.mnt = NULL;
 
 	if (*name=='/') {
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		set_root(nd);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	} else if (dfd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
 		read_lock(&fs->lock);
 		nd->path = fs->pwd;
 		path_get(&fs->pwd);
@@ -1079,6 +1083,10 @@ static int do_path_lookup(int dfd, const char *name,
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
+	if (nd->root.mnt) {
+		path_put(&nd->root);
+		nd->root.mnt = NULL;
+	}
 	return retval;
 }
 
@@ -1115,6 +1123,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 	nd->last_type = LAST_ROOT;
 	nd->flags = flags;
 	nd->depth = 0;
+	nd->root.mnt = NULL;
 
 	nd->path.dentry = dentry;
 	nd->path.mnt = mnt;
@@ -1125,8 +1134,12 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
 
-	return retval;
+	if (nd->root.mnt) {
+		path_put(&nd->root);
+		nd->root.mnt = NULL;
+	}
 
+	return retval;
 }
 
 /**
@@ -1817,6 +1830,8 @@ exit:
 	if (!IS_ERR(nd.intent.open.file))
 		release_open_intent(&nd);
 exit_parent:
+	if (nd.root.mnt)
+		path_put(&nd.root);
 	path_put(&nd.path);
 	return ERR_PTR(error);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 518098fe63a..325dd3ad39a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -18,6 +18,7 @@ enum { MAX_NESTED_LINKS = 8 };
 struct nameidata {
 	struct path	path;
 	struct qstr	last;
+	struct path	root;
 	unsigned int	flags;
 	int		last_type;
 	unsigned	depth;
-- 
cgit v1.2.3-70-g09d2


From 91c9fa8f75877c0c1e455c23e8f8206c91c8f77f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 02:42:05 -0400
Subject: switch rqst_exp_get_by_name()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/export.c            | 15 ++++++---------
 fs/nfsd/vfs.c               | 32 ++++++++++++++++----------------
 include/linux/nfsd/export.h |  3 +--
 3 files changed, 23 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5149dabde55..84f5e5cb086 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1240,18 +1240,15 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
  * use exp_get_by_name() or exp_find().
  */
 struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
-	struct path path = {.mnt = mnt, .dentry = dentry};
 
 	if (rqstp->rq_client == NULL)
 		goto gss;
 
 	/* First try the auth_unix client: */
-	exp = exp_get_by_name(rqstp->rq_client, &path,
-						&rqstp->rq_chandle);
+	exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
 	if (PTR_ERR(exp) == -ENOENT)
 		goto gss;
 	if (IS_ERR(exp))
@@ -1263,8 +1260,7 @@ gss:
 	/* Otherwise, try falling back on gss client */
 	if (rqstp->rq_gssclient == NULL)
 		return exp;
-	gssexp = exp_get_by_name(rqstp->rq_gssclient, &path,
-						&rqstp->rq_chandle);
+	gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
 	if (PTR_ERR(gssexp) == -ENOENT)
 		return exp;
 	if (!IS_ERR(exp))
@@ -1307,9 +1303,10 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
 		struct dentry *dentry)
 {
 	struct svc_export *exp;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 
 	dget(dentry);
-	exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+	exp = rqst_exp_get_by_name(rqstp, &path);
 
 	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
 		struct dentry *parent;
@@ -1317,7 +1314,7 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
 		parent = dget_parent(dentry);
 		dput(dentry);
 		dentry = parent;
-		exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+		exp = rqst_exp_get_by_name(rqstp, &path);
 	}
 	dput(dentry);
 	return exp;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bd584bcf1d9..d84c4eaa526 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -101,36 +101,36 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 {
 	struct svc_export *exp = *expp, *exp2 = NULL;
 	struct dentry *dentry = *dpp;
-	struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-	struct dentry *mounts = dget(dentry);
+	struct path path = {.mnt = mntget(exp->ex_path.mnt),
+			    .dentry = dget(dentry)};
 	int err = 0;
 
-	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+	while (follow_down(&path.mnt, &path.dentry) &&
+	       d_mountpoint(path.dentry))
+		;
 
-	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+	exp2 = rqst_exp_get_by_name(rqstp, &path);
 	if (IS_ERR(exp2)) {
 		if (PTR_ERR(exp2) != -ENOENT)
 			err = PTR_ERR(exp2);
-		dput(mounts);
-		mntput(mnt);
+		path_put(&path);
 		goto out;
 	}
 	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
 		/* successfully crossed mount point */
 		/*
-		 * This is subtle: dentry is *not* under mnt at this point.
-		 * The only reason we are safe is that original mnt is pinned
-		 * down by exp, so we should dput before putting exp.
+		 * This is subtle: path.dentry is *not* on path.mnt
+		 * at this point.  The only reason we are safe is that
+		 * original mnt is pinned down by exp, so we should
+		 * put path *before* putting exp
 		 */
-		dput(dentry);
-		*dpp = mounts;
-		exp_put(exp);
+		*dpp = path.dentry;
+		path.dentry = dentry;
 		*expp = exp2;
-	} else {
-		exp_put(exp2);
-		dput(mounts);
+		exp2 = exp;
 	}
-	mntput(mnt);
+	path_put(&path);
+	exp_put(exp2);
 out:
 	return err;
 }
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index bcd0201589f..98f6fd584d5 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -125,8 +125,7 @@ void			nfsd_export_flush(void);
 void			exp_readlock(void);
 void			exp_readunlock(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
-					     struct vfsmount *,
-					     struct dentry *);
+					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
 					struct vfsmount *mnt,
 					struct dentry *dentry);
-- 
cgit v1.2.3-70-g09d2


From e64c390ca0b60fd2119331ef1fa888d7ea27e424 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 03:00:46 -0400
Subject: switch rqst_exp_parent()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/export.c            | 25 ++++++++++---------------
 fs/nfsd/vfs.c               | 23 ++++++++++++-----------
 include/linux/nfsd/export.h |  3 +--
 3 files changed, 23 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 84f5e5cb086..8b1f8efb469 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1299,24 +1299,19 @@ gss:
 }
 
 struct svc_export *
-rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
 {
-	struct svc_export *exp;
-	struct path path = {.mnt = mnt, .dentry = dentry};
-
-	dget(dentry);
-	exp = rqst_exp_get_by_name(rqstp, &path);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
+	struct dentry *saved = dget(path->dentry);
+	struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
 
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = rqst_exp_get_by_name(rqstp, &path);
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = rqst_exp_get_by_name(rqstp, path);
 	}
-	dput(dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d84c4eaa526..9f1ea3127f5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -169,28 +169,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			/* checking mountpoint crossing is very different when stepping up */
 			struct svc_export *exp2 = NULL;
 			struct dentry *dp;
-			struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-			dentry = dget(dparent);
-			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+			struct path path = {.mnt = mntget(exp->ex_path.mnt),
+					    .dentry = dget(dparent)};
+
+			while (path.dentry == path.mnt->mnt_root &&
+			       follow_up(&path.mnt, &path.dentry))
 				;
-			dp = dget_parent(dentry);
-			dput(dentry);
-			dentry = dp;
+			dp = dget_parent(path.dentry);
+			dput(path.dentry);
+			path.dentry = dp;
 
-			exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+			exp2 = rqst_exp_parent(rqstp, &path);
 			if (PTR_ERR(exp2) == -ENOENT) {
-				dput(dentry);
 				dentry = dget(dparent);
 			} else if (IS_ERR(exp2)) {
 				host_err = PTR_ERR(exp2);
-				dput(dentry);
-				mntput(mnt);
+				path_put(&path);
 				goto out_nfserr;
 			} else {
+				dentry = dget(path.dentry);
 				exp_put(exp);
 				exp = exp2;
 			}
-			mntput(mnt);
+			path_put(&path);
 		}
 	} else {
 		fh_lock(fhp);
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 98f6fd584d5..a6d9ef2bb34 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -127,8 +127,7 @@ void			exp_readunlock(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
-					struct vfsmount *mnt,
-					struct dentry *dentry);
+					struct path *);
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
-- 
cgit v1.2.3-70-g09d2


From bab77ebf51e3902f608ecf08c9d34a0a52ac35a9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 03:26:48 -0400
Subject: switch follow_up() to struct path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/autofs4/dev-ioctl.c |  2 +-
 fs/namei.c             | 16 ++++++++--------
 fs/nfsd/vfs.c          |  2 +-
 include/linux/namei.h  |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index f71dac9986f..670407576b2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname,
 				err = 0;
 			}
 		}
-		if (!follow_up(&path.mnt, &path.dentry))
+		if (!follow_up(&path))
 			break;
 	}
 	path_put(&path);
diff --git a/fs/namei.c b/fs/namei.c
index 4379ef98970..8c1f48ae68e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -675,23 +675,23 @@ loop:
 	return err;
 }
 
-int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+int follow_up(struct path *path)
 {
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
 	spin_lock(&vfsmount_lock);
-	parent=(*mnt)->mnt_parent;
-	if (parent == *mnt) {
+	parent = path->mnt->mnt_parent;
+	if (parent == path->mnt) {
 		spin_unlock(&vfsmount_lock);
 		return 0;
 	}
 	mntget(parent);
-	mountpoint=dget((*mnt)->mnt_mountpoint);
+	mountpoint = dget(path->mnt->mnt_mountpoint);
 	spin_unlock(&vfsmount_lock);
-	dput(*dentry);
-	*dentry = mountpoint;
-	mntput(*mnt);
-	*mnt = parent;
+	dput(path->dentry);
+	path->dentry = mountpoint;
+	mntput(path->mnt);
+	path->mnt = parent;
 	return 1;
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9f1ea3127f5..7b2b3f77532 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -173,7 +173,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 					    .dentry = dget(dparent)};
 
 			while (path.dentry == path.mnt->mnt_root &&
-			       follow_up(&path.mnt, &path.dentry))
+			       follow_up(&path))
 				;
 			dp = dget_parent(path.dentry);
 			dput(path.dentry);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 325dd3ad39a..9cd5a717be3 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -79,7 +79,7 @@ extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
 
 extern int follow_down(struct vfsmount **, struct dentry **);
-extern int follow_up(struct vfsmount **, struct dentry **);
+extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
-- 
cgit v1.2.3-70-g09d2


From 589ff870ed60a9ebdd5ec99ec3f5afe1282fe151 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 03:28:19 -0400
Subject: Switch collect_mounts() to struct path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c      | 4 ++--
 include/linux/fs.h  | 2 +-
 kernel/audit_tree.c | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 88a904d5aa2..c85962206aa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1253,11 +1253,11 @@ Enomem:
 	return NULL;
 }
 
-struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *collect_mounts(struct path *path)
 {
 	struct vfsmount *tree;
 	down_write(&namespace_sem);
-	tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+	tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
 	up_write(&namespace_sem);
 	return tree;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 323b5ce474c..03fb2102b8f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1800,7 +1800,7 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
+extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 6e7351739a8..1f6396d7668 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -568,7 +568,7 @@ void audit_trim_trees(void)
 		if (err)
 			goto skip_it;
 
-		root_mnt = collect_mounts(path.mnt, path.dentry);
+		root_mnt = collect_mounts(&path);
 		path_put(&path);
 		if (!root_mnt)
 			goto skip_it;
@@ -660,7 +660,7 @@ int audit_add_tree_rule(struct audit_krule *rule)
 	err = kern_path(tree->pathname, 0, &path);
 	if (err)
 		goto Err;
-	mnt = collect_mounts(path.mnt, path.dentry);
+	mnt = collect_mounts(&path);
 	path_put(&path);
 	if (!mnt) {
 		err = -ENOMEM;
@@ -720,7 +720,7 @@ int audit_tag_tree(char *old, char *new)
 	err = kern_path(new, 0, &path);
 	if (err)
 		return err;
-	tagged = collect_mounts(path.mnt, path.dentry);
+	tagged = collect_mounts(&path);
 	path_put(&path);
 	if (!tagged)
 		return -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From 9393bd07cf218ca51d0e627653f906a9d76a9131 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 13:58:15 -0400
Subject: switch follow_down()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/mntpt.c         |  2 +-
 fs/autofs/dirhash.c    |  5 ++---
 fs/autofs4/autofs_i.h  |  6 +++---
 fs/autofs4/dev-ioctl.c |  2 +-
 fs/autofs4/expire.c    | 15 +++++++--------
 fs/autofs4/root.c      |  7 +++----
 fs/cifs/cifs_dfs_ref.c |  2 +-
 fs/namei.c             | 12 ++++++------
 fs/namespace.c         |  4 ++--
 fs/nfs/namespace.c     |  2 +-
 fs/nfsd/vfs.c          |  3 +--
 include/linux/namei.h  |  2 +-
 12 files changed, 29 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2b9e2d03a39..c52be53f694 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 4eb4d8dfb2f..2316e944a10 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 		}
 		path.mnt = mnt;
 		path_get(&path);
-		if (!follow_down(&path.mnt, &path.dentry)) {
+		if (!follow_down(&path)) {
 			path_put(&path);
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) &&
-		       follow_down(&path.mnt, &path.dentry))
+		while (d_mountpoint(path.dentry) && follow_down(&path));
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b7ff33c6310..8f7cdde4173 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
-static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static inline int autofs4_follow_mount(struct path *path)
 {
 	int res = 0;
 
-	while (d_mountpoint(*dentry)) {
-		int followed = follow_down(mnt, dentry);
+	while (d_mountpoint(path->dentry)) {
+		int followed = follow_down(path);
 		if (!followed)
 			break;
 		res = 1;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 670407576b2..f3da2eb51f5 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -562,7 +562,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 		err = have_submounts(path.dentry);
 
 		if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
-			if (follow_down(&path.mnt, &path.dentry))
+			if (follow_down(&path))
 				magic = path.mnt->mnt_sb->s_magic;
 		}
 	}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3077d8f1652..aa39ae83f01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct dentry *top = dentry;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-	mntget(mnt);
-	dget(dentry);
+	path_get(&path);
 
-	if (!follow_down(&mnt, &dentry))
+	if (!follow_down(&path))
 		goto done;
 
-	if (is_autofs4_dentry(dentry)) {
-		struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	if (is_autofs4_dentry(path.dentry)) {
+		struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
 
 		/* This is an autofs submount, we can't expire it */
 		if (autofs_type_indirect(sbi->type))
@@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		 * Otherwise it's an offset mount and we need to check
 		 * if we can umount its mount, if there is one.
 		 */
-		if (!d_mountpoint(dentry)) {
+		if (!d_mountpoint(path.dentry)) {
 			status = 0;
 			goto done;
 		}
@@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	status = 0;
 done:
 	DPRINTK("returning = %d", status);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path);
 	return status;
 }
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e383bf0334f..b96a3c57359 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		nd->flags);
 	/*
 	 * For an expire of a covered direct or offset mount we need
-	 * to beeak out of follow_down() at the autofs mount trigger
+	 * to break out of follow_down() at the autofs mount trigger
 	 * (d_mounted--), so we can see the expiring flag, and manage
 	 * the blocking and following here until the expire is completed.
 	 */
@@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		if (ino->flags & AUTOFS_INF_EXPIRING) {
 			spin_unlock(&sbi->fs_lock);
 			/* Follow down to our covering mount. */
-			if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+			if (!follow_down(&nd->path))
 				goto done;
 			goto follow;
 		}
@@ -230,8 +230,7 @@ follow:
 	 * to follow it.
 	 */
 	if (d_mountpoint(dentry)) {
-		if (!autofs4_follow_mount(&nd->path.mnt,
-					  &nd->path.dentry)) {
+		if (!autofs4_follow_mount(&nd->path)) {
 			status = -ENOENT;
 			goto out_error;
 		}
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 83d62759c7c..3bb11be8b6a 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:
diff --git a/fs/namei.c b/fs/namei.c
index 8c1f48ae68e..4d49a3eee6d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -731,16 +731,16 @@ static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
 /* no need for dcache_lock, as serialization is taken care in
  * namespace.c
  */
-int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
 
-	mounted = lookup_mnt(*mnt, *dentry);
+	mounted = lookup_mnt(path->mnt, path->dentry);
 	if (mounted) {
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 		return 1;
 	}
 	return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index c85962206aa..ba5237be1cf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1601,7 +1601,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	down_write(&namespace_sem);
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
 	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
@@ -1695,7 +1695,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
 	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 64a288ee046..f01caec8446 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -154,7 +154,7 @@ out_err:
 	goto out;
 out_follow:
 	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	       follow_down(&nd->path))
 		;
 	err = 0;
 	goto out;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7b2b3f77532..99f83575359 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -105,8 +105,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 			    .dentry = dget(dentry)};
 	int err = 0;
 
-	while (follow_down(&path.mnt, &path.dentry) &&
-	       d_mountpoint(path.dentry))
+	while (d_mountpoint(path.dentry) && follow_down(&path))
 		;
 
 	exp2 = rqst_exp_get_by_name(rqstp, &path);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 9cd5a717be3..d870ae2faed 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -78,7 +78,7 @@ extern void release_open_intent(struct nameidata *);
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
 
-extern int follow_down(struct vfsmount **, struct dentry **);
+extern int follow_down(struct path *);
 extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
-- 
cgit v1.2.3-70-g09d2


From 1c755af4df75996b0dd4b7e6cacaf9d57a6ef2ef Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Apr 2009 14:06:57 -0400
Subject: switch lookup_mnt()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c             | 6 +++---
 fs/namespace.c         | 4 ++--
 include/linux/dcache.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index c006bc61d1e..527119afb6a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -702,7 +702,7 @@ static int __follow_mount(struct path *path)
 {
 	int res = 0;
 	while (d_mountpoint(path->dentry)) {
-		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
 		dput(path->dentry);
@@ -718,7 +718,7 @@ static int __follow_mount(struct path *path)
 static void follow_mount(struct path *path)
 {
 	while (d_mountpoint(path->dentry)) {
-		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
 		dput(path->dentry);
@@ -735,7 +735,7 @@ int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
 
-	mounted = lookup_mnt(path->mnt, path->dentry);
+	mounted = lookup_mnt(path);
 	if (mounted) {
 		dput(path->dentry);
 		mntput(path->mnt);
diff --git a/fs/namespace.c b/fs/namespace.c
index ba5237be1cf..b94ad3d685f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -442,11 +442,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
  * lookup_mnt increments the ref count before returning
  * the vfsmount struct.
  */
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct vfsmount *child_mnt;
 	spin_lock(&vfsmount_lock);
-	if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
+	if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
 		mntget(child_mnt);
 	spin_unlock(&vfsmount_lock);
 	return child_mnt;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 97978004338..72ce2ae8859 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -370,7 +370,7 @@ static inline int d_mountpoint(struct dentry *dentry)
 	return dentry->d_mounted;
 }
 
-extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *);
+extern struct vfsmount *lookup_mnt(struct path *);
 extern struct dentry *lookup_create(struct nameidata *nd, int is_dir);
 
 extern int sysctl_vfs_cache_pressure;
-- 
cgit v1.2.3-70-g09d2


From 3174c21b74b56c6a53fddd41a30fd6f757a32bd0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 7 Apr 2009 13:19:18 -0400
Subject: Move junk from proc_fs.h to fs/proc/internal.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/internal.h      | 25 +++++++++++++++++++++++++
 fs/proc/proc_devtree.c  |  1 +
 include/linux/proc_fs.h | 24 ------------------------
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f6db9618a88..753ca37002c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -92,3 +92,28 @@ struct pde_opener {
 	struct list_head lh;
 };
 void pde_users_dec(struct proc_dir_entry *pde);
+
+extern spinlock_t proc_subdir_lock;
+
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+unsigned long task_vsize(struct mm_struct *);
+int task_statm(struct mm_struct *, int *, int *, int *, int *);
+void task_mem(struct seq_file *, struct mm_struct *);
+
+struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+void de_put(struct proc_dir_entry *de);
+
+extern struct vfsmount *proc_mnt;
+int proc_fill_super(struct super_block *);
+struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
+
+/*
+ * These are generic /proc routines that use the internal
+ * "struct proc_dir_entry" tree to traverse the filesystem.
+ *
+ * The /proc root directory has extended versions to take care
+ * of the /proc/<pid> subdirectories.
+ */
+int proc_readdir(struct file *, void *, filldir_t);
+struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de2bba5a344..fc6c3025bef 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <asm/prom.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 #ifndef HAVE_ARCH_DEVTREE_FIXUPS
 static inline void set_node_proc_entry(struct device_node *np,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index fbfa3d44d33..e6e77d31c41 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -93,20 +93,9 @@ struct vmcore {
 
 #ifdef CONFIG_PROC_FS
 
-extern spinlock_t proc_subdir_lock;
-
 extern void proc_root_init(void);
 
 void proc_flush_task(struct task_struct *task);
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
-unsigned long task_vsize(struct mm_struct *);
-int task_statm(struct mm_struct *, int *, int *, int *, int *);
-void task_mem(struct seq_file *, struct mm_struct *);
-void clear_refs_smap(struct mm_struct *mm);
-
-struct proc_dir_entry *de_get(struct proc_dir_entry *de);
-void de_put(struct proc_dir_entry *de);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
@@ -116,20 +105,7 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
 				void *data);
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
-extern struct vfsmount *proc_mnt;
 struct pid_namespace;
-extern int proc_fill_super(struct super_block *);
-extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
-
-/*
- * These are generic /proc routines that use the internal
- * "struct proc_dir_entry" tree to traverse the filesystem.
- *
- * The /proc root directory has extended versions to take care
- * of the /proc/<pid> subdirectories.
- */
-extern int proc_readdir(struct file *, void *, filldir_t);
-extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
-- 
cgit v1.2.3-70-g09d2


From d3ef3d7351ccfbef3e5d926efc5ee332136f40d4 Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Sun, 26 Apr 2009 20:25:54 +1000
Subject: fs: mnt_want_write speedup

This patch speeds up lmbench lat_mmap test by about 8%. lat_mmap is set up
basically to mmap a 64MB file on tmpfs, fault in its pages, then unmap it.
A microbenchmark yes, but it exercises some important paths in the mm.

Before:
 avg = 501.9
 std = 14.7773

After:
 avg = 462.286
 std = 5.46106

(50 runs of each, stddev gives a reasonable confidence, but there is quite
a bit of variation there still)

It does this by removing the complex per-cpu locking and counter-cache and
replaces it with a percpu counter in struct vfsmount. This makes the code
much simpler, and avoids spinlocks (although the msync is still pretty
costly, unfortunately). It results in about 900 bytes smaller code too. It
does increase the size of a vfsmount, however.

It should also give a speedup on large systems if CPUs are frequently operating
on different mounts (because the existing scheme has to operate on an atomic in
the struct vfsmount when switching between mounts). But I'm most interested in
the single threaded path performance for the moment.

[AV: minor cleanup]

Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 268 +++++++++++++++++---------------------------------
 include/linux/mount.h |  21 ++--
 2 files changed, 106 insertions(+), 183 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index b94ad3d685f..22ae06ad751 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
-		atomic_set(&mnt->__mnt_writers, 0);
+#ifdef CONFIG_SMP
+		mnt->mnt_writers = alloc_percpu(int);
+		if (!mnt->mnt_writers)
+			goto out_free_devname;
+#else
+		mnt->mnt_writers = 0;
+#endif
 	}
 	return mnt;
 
+#ifdef CONFIG_SMP
+out_free_devname:
+	kfree(mnt->mnt_devname);
+#endif
 out_free_id:
 	mnt_free_id(mnt);
 out_free_cache:
@@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 
-struct mnt_writer {
-	/*
-	 * If holding multiple instances of this lock, they
-	 * must be ordered by cpu number.
-	 */
-	spinlock_t lock;
-	struct lock_class_key lock_class; /* compiles out with !lockdep */
-	unsigned long count;
-	struct vfsmount *mnt;
-} ____cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
+static inline void inc_mnt_writers(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+#else
+	mnt->mnt_writers++;
+#endif
+}
 
-static int __init init_mnt_writers(void)
+static inline void dec_mnt_writers(struct vfsmount *mnt)
 {
-	int cpu;
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
-		lockdep_set_class(&writer->lock, &writer->lock_class);
-		writer->count = 0;
-	}
-	return 0;
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+#else
+	mnt->mnt_writers--;
+#endif
 }
-fs_initcall(init_mnt_writers);
 
-static void unlock_mnt_writers(void)
+static unsigned int count_mnt_writers(struct vfsmount *mnt)
 {
+#ifdef CONFIG_SMP
+	unsigned int count = 0;
 	int cpu;
-	struct mnt_writer *cpu_writer;
 
 	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		count += *per_cpu_ptr(mnt->mnt_writers, cpu);
 	}
-}
 
-static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
-{
-	if (!cpu_writer->mnt)
-		return;
-	/*
-	 * This is in case anyone ever leaves an invalid,
-	 * old ->mnt and a count of 0.
-	 */
-	if (!cpu_writer->count)
-		return;
-	atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
-	cpu_writer->count = 0;
-}
- /*
- * must hold cpu_writer->lock
- */
-static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
-					  struct vfsmount *mnt)
-{
-	if (cpu_writer->mnt == mnt)
-		return;
-	__clear_mnt_count(cpu_writer);
-	cpu_writer->mnt = mnt;
+	return count;
+#else
+	return mnt->mnt_writers;
+#endif
 }
 
 /*
@@ -253,75 +236,34 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
 int mnt_want_write(struct vfsmount *mnt)
 {
 	int ret = 0;
-	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	/*
+	 * The store to inc_mnt_writers must be visible before we pass
+	 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
+	 * incremented count after it has set MNT_WRITE_HOLD.
+	 */
+	smp_mb();
+	while (mnt->mnt_flags & MNT_WRITE_HOLD)
+		cpu_relax();
+	/*
+	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+	 * be set to match its requirements. So we must not load that until
+	 * MNT_WRITE_HOLD is cleared.
+	 */
+	smp_rmb();
 	if (__mnt_is_readonly(mnt)) {
+		dec_mnt_writers(mnt);
 		ret = -EROFS;
 		goto out;
 	}
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	preempt_enable();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
-static void lock_mnt_writers(void)
-{
-	int cpu;
-	struct mnt_writer *cpu_writer;
-
-	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		__clear_mnt_count(cpu_writer);
-		cpu_writer->mnt = NULL;
-	}
-}
-
-/*
- * These per-cpu write counts are not guaranteed to have
- * matched increments and decrements on any given cpu.
- * A file open()ed for write on one cpu and close()d on
- * another cpu will imbalance this count.  Make sure it
- * does not get too far out of whack.
- */
-static void handle_write_count_underflow(struct vfsmount *mnt)
-{
-	if (atomic_read(&mnt->__mnt_writers) >=
-	    MNT_WRITER_UNDERFLOW_LIMIT)
-		return;
-	/*
-	 * It isn't necessary to hold all of the locks
-	 * at the same time, but doing it this way makes
-	 * us share a lot more code.
-	 */
-	lock_mnt_writers();
-	/*
-	 * vfsmount_lock is for mnt_flags.
-	 */
-	spin_lock(&vfsmount_lock);
-	/*
-	 * If coalescing the per-cpu writer counts did not
-	 * get us back to a positive writer count, we have
-	 * a bug.
-	 */
-	if ((atomic_read(&mnt->__mnt_writers) < 0) &&
-	    !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
-		WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
-				"count: %d\n",
-			mnt, atomic_read(&mnt->__mnt_writers));
-		/* use the flag to keep the dmesg spam down */
-		mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
-	}
-	spin_unlock(&vfsmount_lock);
-	unlock_mnt_writers();
-}
-
 /**
  * mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
@@ -332,37 +274,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
  */
 void mnt_drop_write(struct vfsmount *mnt)
 {
-	int must_check_underflow = 0;
-	struct mnt_writer *cpu_writer;
-
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
-
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	if (cpu_writer->count > 0) {
-		cpu_writer->count--;
-	} else {
-		must_check_underflow = 1;
-		atomic_dec(&mnt->__mnt_writers);
-	}
-
-	spin_unlock(&cpu_writer->lock);
-	/*
-	 * Logically, we could call this each time,
-	 * but the __mnt_writers cacheline tends to
-	 * be cold, and makes this expensive.
-	 */
-	if (must_check_underflow)
-		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
+	preempt_disable();
+	dec_mnt_writers(mnt);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -370,24 +284,41 @@ static int mnt_make_readonly(struct vfsmount *mnt)
 {
 	int ret = 0;
 
-	lock_mnt_writers();
+	spin_lock(&vfsmount_lock);
+	mnt->mnt_flags |= MNT_WRITE_HOLD;
 	/*
-	 * With all the locks held, this value is stable
+	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+	 * should be visible before we do.
 	 */
-	if (atomic_read(&mnt->__mnt_writers) > 0) {
-		ret = -EBUSY;
-		goto out;
-	}
+	smp_mb();
+
 	/*
-	 * nobody can do a successful mnt_want_write() with all
-	 * of the counts in MNT_DENIED_WRITE and the locks held.
+	 * With writers on hold, if this value is zero, then there are
+	 * definitely no active writers (although held writers may subsequently
+	 * increment the count, they'll have to wait, and decrement it after
+	 * seeing MNT_READONLY).
+	 *
+	 * It is OK to have counter incremented on one CPU and decremented on
+	 * another: the sum will add up correctly. The danger would be when we
+	 * sum up each counter, if we read a counter before it is incremented,
+	 * but then read another CPU's count which it has been subsequently
+	 * decremented from -- we would see more decrements than we should.
+	 * MNT_WRITE_HOLD protects against this scenario, because
+	 * mnt_want_write first increments count, then smp_mb, then spins on
+	 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+	 * we're counting up here.
 	 */
-	spin_lock(&vfsmount_lock);
-	if (!ret)
+	if (count_mnt_writers(mnt) > 0)
+		ret = -EBUSY;
+	else
 		mnt->mnt_flags |= MNT_READONLY;
+	/*
+	 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+	 * that become unheld will see MNT_READONLY.
+	 */
+	smp_wmb();
+	mnt->mnt_flags &= ~MNT_WRITE_HOLD;
 	spin_unlock(&vfsmount_lock);
-out:
-	unlock_mnt_writers();
 	return ret;
 }
 
@@ -410,6 +341,9 @@ void free_vfsmnt(struct vfsmount *mnt)
 {
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
+#ifdef CONFIG_SMP
+	free_percpu(mnt->mnt_writers);
+#endif
 	kmem_cache_free(mnt_cache, mnt);
 }
 
@@ -604,38 +538,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 
 static inline void __mntput(struct vfsmount *mnt)
 {
-	int cpu;
 	struct super_block *sb = mnt->mnt_sb;
-	/*
-	 * We don't have to hold all of the locks at the
-	 * same time here because we know that we're the
-	 * last reference to mnt and that no new writers
-	 * can come in.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		if (cpu_writer->mnt != mnt) {
-			spin_unlock(&cpu_writer->lock);
-			continue;
-		}
-		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
-		cpu_writer->count = 0;
-		/*
-		 * Might as well do this so that no one
-		 * ever sees the pointer and expects
-		 * it to be valid.
-		 */
-		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
-	}
 	/*
 	 * This probably indicates that somebody messed
 	 * up a mnt_want/drop_write() pair.  If this
 	 * happens, the filesystem was probably unable
 	 * to make r/w->r/o transitions.
 	 */
-	WARN_ON(atomic_read(&mnt->__mnt_writers));
+	/*
+	 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
+	 * provides barriers, so count_mnt_writers() below is safe.  AV
+	 */
+	WARN_ON(count_mnt_writers(mnt));
 	dput(mnt->mnt_root);
 	free_vfsmnt(mnt);
 	deactivate_super(sb);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 51f55f903af..ac49c1f8e5c 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -30,7 +30,7 @@ struct mnt_namespace;
 #define MNT_STRICTATIME 0x80
 
 #define MNT_SHRINKABLE	0x100
-#define MNT_IMBALANCED_WRITE_COUNT	0x200 /* just for debugging */
+#define MNT_WRITE_HOLD	0x200
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
@@ -65,13 +65,22 @@ struct vfsmount {
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
-	/*
-	 * This value is not stable unless all of the mnt_writers[] spinlocks
-	 * are held, and all mnt_writer[]s on this mount have 0 as their ->count
-	 */
-	atomic_t __mnt_writers;
+#ifdef CONFIG_SMP
+	int *mnt_writers;
+#else
+	int mnt_writers;
+#endif
 };
 
+static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	return mnt->mnt_writers;
+#else
+	return &mnt->mnt_writers;
+#endif
+}
+
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
 {
 	if (mnt)
-- 
cgit v1.2.3-70-g09d2


From 96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Sun, 26 Apr 2009 20:25:55 +1000
Subject: fs: introduce mnt_clone_write

This patch speeds up lmbench lat_mmap test by about another 2% after the
first patch.

Before:
 avg = 462.286
 std = 5.46106

After:
 avg = 453.12
 std = 9.58257

(50 runs of each, stddev gives a reasonable confidence)

It does this by introducing mnt_clone_write, which avoids some heavyweight
operations of mnt_want_write if called on a vfsmount which we know already
has a write count; and mnt_want_write_file, which can call mnt_clone_write
if the file is open for write.

After these two patches, mnt_want_write and mnt_drop_write go from 7% on
the profile down to 1.3% (including mnt_clone_write).

[AV: mnt_want_write_file() should take file alone and derive mnt from it;
not only all callers have that form, but that's the only mnt about which
we know that it's already held for write if file is opened for write]

Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c       |  2 +-
 fs/inode.c            |  2 +-
 fs/namespace.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 fs/open.c             |  4 ++--
 fs/xattr.c            |  4 ++--
 include/linux/mount.h |  4 ++++
 6 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 54018fe4884..3d66dbcebef 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
 		file_take_write(file);
-		error = mnt_want_write(mnt);
+		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
 	return error;
diff --git a/fs/inode.c b/fs/inode.c
index ca337014ae2..a88baebf77c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1422,7 +1422,7 @@ void file_update_time(struct file *file)
 	if (IS_NOCMTIME(inode))
 		return;
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		return;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 22ae06ad751..120b8a6b99e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -264,6 +264,46 @@ out:
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
+/**
+ * mnt_clone_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This is effectively like mnt_want_write, except
+ * it must only be used to take an extra write reference
+ * on a mountpoint that we already know has a write reference
+ * on it. This allows some optimisation.
+ *
+ * After finished, mnt_drop_write must be called as usual to
+ * drop the reference.
+ */
+int mnt_clone_write(struct vfsmount *mnt)
+{
+	/* superblock may be r/o */
+	if (__mnt_is_readonly(mnt))
+		return -EROFS;
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mnt_clone_write);
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+	if (!(file->f_mode & FMODE_WRITE))
+		return mnt_want_write(file->f_path.mnt);
+	else
+		return mnt_clone_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL_GPL(mnt_want_write_file);
+
 /**
  * mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
diff --git a/fs/open.c b/fs/open.c
index bdfbf03615a..7200e23d925 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 
 	audit_inode(NULL, dentry);
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
 	mutex_lock(&inode->i_mutex);
@@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	if (!file)
 		goto out;
 
-	error = mnt_want_write(file->f_path.mnt);
+	error = mnt_want_write_file(file);
 	if (error)
 		goto out_fput;
 	dentry = file->f_path.dentry;
diff --git a/fs/xattr.c b/fs/xattr.c
index d51b8f9db92..1c3d0af59dd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = setxattr(dentry, name, value, size, flags);
 		mnt_drop_write(f->f_path.mnt);
@@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = removexattr(dentry, name);
 		mnt_drop_write(f->f_path.mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index ac49c1f8e5c..5d527536486 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
 	return mnt;
 }
 
+struct file; /* forward dec */
+
 extern int mnt_want_write(struct vfsmount *mnt);
+extern int mnt_want_write_file(struct file *file);
+extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput_no_expire(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
-- 
cgit v1.2.3-70-g09d2


From 876a9f76abbcb775f8d21cbc99fa161f9e5937f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 28 Apr 2009 18:05:55 +0200
Subject: remove s_async_list

Remove the unused s_async_list in the superblock, a leftover of the
broken async inode deletion code that leaked into mainline.  Having this
in the middle of the sync/unmount path is not helpful for the following
cleanups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 8 --------
 include/linux/fs.h | 5 -----
 2 files changed, 13 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index c170551c23f..3d9f117dd2a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,7 +38,6 @@
 #include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/file.h>
-#include <linux/async.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -72,7 +71,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
-		INIT_LIST_HEAD(&s->s_async_list);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -342,11 +340,6 @@ void generic_shutdown_super(struct super_block *sb)
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 
-		/*
-		 * wait for asynchronous fs operations to finish before going further
-		 */
-		async_synchronize_full_domain(&sb->s_async_list);
-
 		/* bad name - it should be evict_inodes() */
 		invalidate_inodes(sb);
 		lock_kernel();
@@ -517,7 +510,6 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		async_synchronize_full_domain(&sb->s_async_list);
 		if (sb->s_root && (wait || sb->s_dirt))
 			sb->s_op->sync_fs(sb, wait);
 		up_read(&sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 03fb2102b8f..36bcff7036e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1372,11 +1372,6 @@ struct super_block {
 	 * generic_show_options()
 	 */
 	char *s_options;
-
-	/*
-	 * storage for asynchronous operations
-	 */
-	struct list_head s_async_list;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.3-70-g09d2


From 429479f031322a0cc5c921ffb2321a51718dc875 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:50 +0200
Subject: vfs: Make __fsync_super() a static function (version 4)

__fsync_super() does the same thing as fsync_super(). So change the only
caller to use fsync_super() and make __fsync_super() static. This removes
unnecessarily duplicated call to sync_blockdev() and prepares ground
for the changes to __fsync_super() in the following patches.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 2 +-
 fs/super.c         | 7 +++----
 include/linux/fs.h | 1 -
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 931f6b8c4b2..fe47f722761 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -241,7 +241,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		__fsync_super(sb);
+		fsync_super(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
diff --git a/fs/super.c b/fs/super.c
index fae91ba38e4..8dbe1ead9dd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL(unlock_super);
  * device.  Takes the superblock lock.  Requires a second blkdev
  * flush by the caller to complete the operation.
  */
-void __fsync_super(struct super_block *sb)
+static int __fsync_super(struct super_block *sb)
 {
 	sync_inodes_sb(sb, 0);
 	vfs_dq_sync(sb);
@@ -300,7 +300,7 @@ void __fsync_super(struct super_block *sb)
 	unlock_super(sb);
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
+	return sync_blockdev(sb->s_bdev);
 }
 
 /*
@@ -310,8 +310,7 @@ void __fsync_super(struct super_block *sb)
  */
 int fsync_super(struct super_block *sb)
 {
-	__fsync_super(sb);
-	return sync_blockdev(sb->s_bdev);
+	return __fsync_super(sb);
 }
 EXPORT_SYMBOL_GPL(fsync_super);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 36bcff7036e..41a9907f342 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2078,7 +2078,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
-extern void __fsync_super(struct super_block *sb);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
-- 
cgit v1.2.3-70-g09d2


From 5cee5815d1564bbbd505fea86f4550f1efdb5cd0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:51 +0200
Subject: vfs: Make sys_sync() use fsync_super() (version 4)

It is unnecessarily fragile to have two places (fsync_super() and do_sync())
doing data integrity sync of the filesystem. Alter __fsync_super() to
accommodate needs of both callers and use it. So after this patch
__fsync_super() is the only place where we gather all the calls needed to
properly send all data on a filesystem to disk.

Nice bonus is that we get a complete livelock avoidance and write_supers()
is now only used for periodic writeback of superblocks.

sync_blockdevs() introduced a couple of patches ago is gone now.

[build fixes folded]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c            | 15 ++++++----
 fs/fs-writeback.c         | 49 --------------------------------
 fs/internal.h             | 16 +++++------
 fs/super.c                | 72 +++++++++++++++--------------------------------
 fs/sync.c                 | 31 +++++++-------------
 include/linux/fs.h        |  2 +-
 include/linux/writeback.h |  1 -
 7 files changed, 51 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe47f722761..4b6a3b9d01e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 				iov, offset, nr_segs, blkdev_get_blocks, NULL);
 }
 
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	if (!bdev)
+		return 0;
+	if (!wait)
+		return filemap_flush(bdev->bd_inode->i_mapping);
+	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
 /*
  * Write out and wait upon all the dirty data associated with a block
  * device via its mapping.  Does not take the superblock lock.
  */
 int sync_blockdev(struct block_device *bdev)
 {
-	int ret = 0;
-
-	if (bdev)
-		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-	return ret;
+	return __sync_blockdev(bdev, 1);
 }
 EXPORT_SYMBOL(sync_blockdev);
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 91013ff7dd5..e0fb2e78959 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -678,55 +678,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
 	sync_sb_inodes(sb, &wbc);
 }
 
-/**
- * sync_inodes - writes all inodes to disk
- * @wait: wait for completion
- *
- * sync_inodes() goes through each super block's dirty inode list, writes the
- * inodes out, waits on the writeout and puts the inodes back on the normal
- * list.
- *
- * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
- * part of the sync functions is that the blockdev "superblock" is processed
- * last.  This is because the write_inode() function of a typical fs will
- * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
- * What we want to do is to perform all that dirtying first, and then write
- * back all those inode blocks via the blockdev mapping in one sweep.  So the
- * additional (somewhat redundant) sync_blockdev() calls here are to make
- * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
- * outstanding dirty inodes, the writeback goes block-at-a-time within the
- * filesystem's write_inode().  This is extremely slow.
- */
-static void __sync_inodes(int wait)
-{
-	struct super_block *sb;
-
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root) {
-			sync_inodes_sb(sb, wait);
-			sync_blockdev(sb->s_bdev);
-		}
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-}
-
-void sync_inodes(int wait)
-{
-	__sync_inodes(0);
-
-	if (wait)
-		__sync_inodes(1);
-}
-
 /**
  * write_inode_now	-	write an inode to disk
  * @inode: inode to write to disk
diff --git a/fs/internal.h b/fs/internal.h
index 343a537ab80..dbec3cc2833 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 	return sb == blockdev_superblock;
 }
 
+extern int __sync_blockdev(struct block_device *bdev, int wait);
+
 #else
 static inline void bdev_cache_init(void)
 {
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 {
 	return 0;
 }
+
+static inline int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -71,12 +78,3 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
-
-/*
- * super.c
- */
-#ifdef CONFIG_BLOCK
-extern void sync_blockdevs(void);
-#else
-static inline void sync_blockdevs(void) { }
-#endif
diff --git a/fs/super.c b/fs/super.c
index 8dbe1ead9dd..c8ce5ed0424 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -284,23 +284,23 @@ EXPORT_SYMBOL(lock_super);
 EXPORT_SYMBOL(unlock_super);
 
 /*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.  Requires a second blkdev
- * flush by the caller to complete the operation.
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
  */
-static int __fsync_super(struct super_block *sb)
+static int __fsync_super(struct super_block *sb, int wait)
 {
-	sync_inodes_sb(sb, 0);
 	vfs_dq_sync(sb);
-	sync_inodes_sb(sb, 1);
+	sync_inodes_sb(sb, wait);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
 		sb->s_op->write_super(sb);
 	unlock_super(sb);
 	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	return sync_blockdev(sb->s_bdev);
+		sb->s_op->sync_fs(sb, wait);
+	return __sync_blockdev(sb->s_bdev, wait);
 }
 
 /*
@@ -310,7 +310,12 @@ static int __fsync_super(struct super_block *sb)
  */
 int fsync_super(struct super_block *sb)
 {
-	return __fsync_super(sb);
+	int ret;
+
+	ret = __fsync_super(sb, 0);
+	if (ret < 0)
+		return ret;
+	return __fsync_super(sb, 1);
 }
 EXPORT_SYMBOL_GPL(fsync_super);
 
@@ -469,20 +474,18 @@ restart:
 }
 
 /*
- * Call the ->sync_fs super_op against all filesystems which are r/w and
- * which implement it.
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
  *
  * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync_fs
+ * if two or more filesystems are being continuously dirtied.  s_need_sync
  * is used only here.  We set it against all filesystems and then clear it as
  * we sync them.  So redirtied filesystems are skipped.
  *
  * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync_fs
+ * calls sync_filesystems as well, process B will set all the s_need_sync
  * flags again, which will cause process A to resync everything.  Fix that with
  * a local mutex.
- *
- * (Fabian) Avoid sync_fs with clean fs & wait mode 0
  */
 void sync_filesystems(int wait)
 {
@@ -492,25 +495,23 @@ void sync_filesystems(int wait)
 	mutex_lock(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_op->sync_fs)
-			continue;
 		if (sb->s_flags & MS_RDONLY)
 			continue;
-		sb->s_need_sync_fs = 1;
+		sb->s_need_sync = 1;
 	}
 
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_need_sync_fs)
+		if (!sb->s_need_sync)
 			continue;
-		sb->s_need_sync_fs = 0;
+		sb->s_need_sync = 0;
 		if (sb->s_flags & MS_RDONLY)
 			continue;	/* hm.  Was remounted r/o meanwhile */
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			sb->s_op->sync_fs(sb, wait);
+			__fsync_super(sb, wait);
 		up_read(&sb->s_umount);
 		/* restart only when sb is no longer on the list */
 		spin_lock(&sb_lock);
@@ -521,33 +522,6 @@ restart:
 	mutex_unlock(&mutex);
 }
 
-#ifdef CONFIG_BLOCK
-/*
- *  Sync all block devices underlying some superblock
- */
-void sync_blockdevs(void)
-{
-	struct super_block *sb;
-
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_bdev)
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root)
-			sync_blockdev(sb->s_bdev);
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-}
-#endif
-
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c
index 631fd5aece7..be0798cc33d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -18,35 +18,24 @@
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
-/*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
+SYSCALL_DEFINE0(sync)
 {
-	wakeup_pdflush(0);
-	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
-	vfs_dq_sync(NULL);
-	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
-	sync_supers();		/* Write the superblocks */
-	sync_filesystems(0);	/* Start syncing the filesystems */
-	sync_filesystems(wait);	/* Waitingly sync the filesystems */
-	sync_blockdevs();
-	if (!wait)
-		printk("Emergency Sync complete\n");
+	sync_filesystems(0);
+	sync_filesystems(1);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
-}
-
-SYSCALL_DEFINE0(sync)
-{
-	do_sync(1);
 	return 0;
 }
 
 static void do_sync_work(struct work_struct *work)
 {
-	do_sync(0);
+	/*
+	 * Sync twice to reduce the possibility we skipped some inodes / pages
+	 * because they were temporarily locked
+	 */
+	sync_filesystems(0);
+	sync_filesystems(0);
+	printk("Emergency Sync complete\n");
 	kfree(work);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 41a9907f342..f00df653cf2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1321,7 +1321,7 @@ struct super_block {
 	struct rw_semaphore	s_umount;
 	struct mutex		s_lock;
 	int			s_count;
-	int			s_need_sync_fs;
+	int			s_need_sync;
 	atomic_t		s_active;
 #ifdef CONFIG_SECURITY
 	void                    *s_security;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 93445477f86..3224820c851 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,7 +79,6 @@ struct writeback_control {
 void writeback_inodes(struct writeback_control *wbc);
 int inode_wait(void *);
 void sync_inodes_sb(struct super_block *, int wait);
-void sync_inodes(int wait);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
-- 
cgit v1.2.3-70-g09d2


From c15c54f5f056ee4819da9fde59a5f2cd45445f23 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:52 +0200
Subject: vfs: Move syncing code from super.c to sync.c (version 4)

Move sync_filesystems(), __fsync_super(), fsync_super() from
super.c to sync.c where it fits better.

[build fixes folded]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 85 ------------------------------------------------------
 fs/sync.c          | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  3 +-
 3 files changed, 86 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index c8ce5ed0424..f822c74f25b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -283,42 +283,6 @@ void unlock_super(struct super_block * sb)
 EXPORT_SYMBOL(lock_super);
 EXPORT_SYMBOL(unlock_super);
 
-/*
- * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
- * just dirties buffers with inodes so we have to submit IO for these buffers
- * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
- * case write_inode() functions do sync_dirty_buffer() and thus effectively
- * write one block at a time.
- */
-static int __fsync_super(struct super_block *sb, int wait)
-{
-	vfs_dq_sync(sb);
-	sync_inodes_sb(sb, wait);
-	lock_super(sb);
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, wait);
-	return __sync_blockdev(sb->s_bdev, wait);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
-	int ret;
-
-	ret = __fsync_super(sb, 0);
-	if (ret < 0)
-		return ret;
-	return __fsync_super(sb, 1);
-}
-EXPORT_SYMBOL_GPL(fsync_super);
-
 /**
  *	generic_shutdown_super	-	common helper for ->kill_sb()
  *	@sb: superblock to kill
@@ -473,55 +437,6 @@ restart:
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Sync all the data for all the filesystems (called by sys_sync() and
- * emergency sync)
- *
- * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync
- * is used only here.  We set it against all filesystems and then clear it as
- * we sync them.  So redirtied filesystems are skipped.
- *
- * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync
- * flags again, which will cause process A to resync everything.  Fix that with
- * a local mutex.
- */
-void sync_filesystems(int wait)
-{
-	struct super_block *sb;
-	static DEFINE_MUTEX(mutex);
-
-	mutex_lock(&mutex);		/* Could be down_interruptible */
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_flags & MS_RDONLY)
-			continue;
-		sb->s_need_sync = 1;
-	}
-
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (!sb->s_need_sync)
-			continue;
-		sb->s_need_sync = 0;
-		if (sb->s_flags & MS_RDONLY)
-			continue;	/* hm.  Was remounted r/o meanwhile */
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root)
-			__fsync_super(sb, wait);
-		up_read(&sb->s_umount);
-		/* restart only when sb is no longer on the list */
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-	mutex_unlock(&mutex);
-}
-
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c
index be0798cc33d..d5fa7b79982 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -18,6 +18,91 @@
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
+/*
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
+ */
+static int __fsync_super(struct super_block *sb, int wait)
+{
+	vfs_dq_sync(sb);
+	sync_inodes_sb(sb, wait);
+	lock_super(sb);
+	if (sb->s_dirt && sb->s_op->write_super)
+		sb->s_op->write_super(sb);
+	unlock_super(sb);
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, wait);
+	return __sync_blockdev(sb->s_bdev, wait);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+	int ret;
+
+	ret = __fsync_super(sb, 0);
+	if (ret < 0)
+		return ret;
+	return __fsync_super(sb, 1);
+}
+EXPORT_SYMBOL_GPL(fsync_super);
+
+/*
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
+ *
+ * This operation is careful to avoid the livelock which could easily happen
+ * if two or more filesystems are being continuously dirtied.  s_need_sync
+ * is used only here.  We set it against all filesystems and then clear it as
+ * we sync them.  So redirtied filesystems are skipped.
+ *
+ * But if process A is currently running sync_filesystems and then process B
+ * calls sync_filesystems as well, process B will set all the s_need_sync
+ * flags again, which will cause process A to resync everything.  Fix that with
+ * a local mutex.
+ */
+static void sync_filesystems(int wait)
+{
+	struct super_block *sb;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);		/* Could be down_interruptible */
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_flags & MS_RDONLY)
+			continue;
+		sb->s_need_sync = 1;
+	}
+
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb->s_need_sync)
+			continue;
+		sb->s_need_sync = 0;
+		if (sb->s_flags & MS_RDONLY)
+			continue;	/* hm.  Was remounted r/o meanwhile */
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root)
+			__fsync_super(sb, wait);
+		up_read(&sb->s_umount);
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
+	}
+	spin_unlock(&sb_lock);
+	mutex_unlock(&mutex);
+}
+
 SYSCALL_DEFINE0(sync)
 {
 	sync_filesystems(0);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f00df653cf2..d3f7159993c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-extern int fsync_super(struct super_block *);
 extern int fsync_no_super(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
@@ -1959,6 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	return 0;
 }
 #endif
+extern int fsync_super(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
@@ -2077,7 +2077,6 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 
 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
-extern void sync_filesystems(int wait);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
-- 
cgit v1.2.3-70-g09d2


From 60b0680fa236ac4e17ce31a50048c9d75f9ec831 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:53 +0200
Subject: vfs: Rename fsync_super() to sync_filesystem() (version 4)

Rename the function so that it better describe what it really does. Also
remove the unnecessary include of buffer_head.h.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c            |  4 ++--
 fs/cachefiles/interface.c |  2 +-
 fs/super.c                |  5 ++---
 fs/sync.c                 | 12 ++++++------
 include/linux/fs.h        |  2 +-
 5 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4b6a3b9d01e..3a6d4fb2a32 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -204,7 +204,7 @@ int fsync_bdev(struct block_device *bdev)
 {
 	struct super_block *sb = get_super(bdev);
 	if (sb) {
-		int res = fsync_super(sb);
+		int res = sync_filesystem(sb);
 		drop_super(sb);
 		return res;
 	}
@@ -246,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		fsync_super(sb);
+		sync_filesystem(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1e962348d11..dafd484d7bd 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -354,7 +354,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
 	/* make sure all pages pinned by operations on behalf of the netfs are
 	 * written to disc */
 	cachefiles_begin_secure(cache, &saved_cred);
-	ret = fsync_super(cache->mnt->mnt_sb);
+	ret = sync_filesystem(cache->mnt->mnt_sb);
 	cachefiles_end_secure(cache, saved_cred);
 
 	if (ret == -EIO)
diff --git a/fs/super.c b/fs/super.c
index f822c74f25b..721236e6177 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -28,7 +28,6 @@
 #include <linux/blkdev.h>
 #include <linux/quotaops.h>
 #include <linux/namei.h>
-#include <linux/buffer_head.h>		/* for fsync_super() */
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -304,7 +303,7 @@ void generic_shutdown_super(struct super_block *sb)
 
 	if (sb->s_root) {
 		shrink_dcache_for_umount(sb);
-		fsync_super(sb);
+		sync_filesystem(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
 
@@ -543,7 +542,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
-	fsync_super(sb);
+	sync_filesystem(sb);
 
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
diff --git a/fs/sync.c b/fs/sync.c
index d5fa7b79982..8aa870a4d40 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -25,7 +25,7 @@
  * case write_inode() functions do sync_dirty_buffer() and thus effectively
  * write one block at a time.
  */
-static int __fsync_super(struct super_block *sb, int wait)
+static int __sync_filesystem(struct super_block *sb, int wait)
 {
 	vfs_dq_sync(sb);
 	sync_inodes_sb(sb, wait);
@@ -43,16 +43,16 @@ static int __fsync_super(struct super_block *sb, int wait)
  * superblock.  Filesystem data as well as the underlying block
  * device.  Takes the superblock lock.
  */
-int fsync_super(struct super_block *sb)
+int sync_filesystem(struct super_block *sb)
 {
 	int ret;
 
-	ret = __fsync_super(sb, 0);
+	ret = __sync_filesystem(sb, 0);
 	if (ret < 0)
 		return ret;
-	return __fsync_super(sb, 1);
+	return __sync_filesystem(sb, 1);
 }
-EXPORT_SYMBOL_GPL(fsync_super);
+EXPORT_SYMBOL_GPL(sync_filesystem);
 
 /*
  * Sync all the data for all the filesystems (called by sys_sync() and
@@ -92,7 +92,7 @@ restart:
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			__fsync_super(sb, wait);
+			__sync_filesystem(sb, wait);
 		up_read(&sb->s_umount);
 		/* restart only when sb is no longer on the list */
 		spin_lock(&sb_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d3f7159993c..fb1822bed7c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1958,7 +1958,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	return 0;
 }
 #endif
-extern int fsync_super(struct super_block *);
+extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
-- 
cgit v1.2.3-70-g09d2


From 850b201b087f5525a0a7278551c2bcd0423c3b26 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 27 Apr 2009 16:43:54 +0200
Subject: quota: cleanup dquota sync functions (version 4)

Currently the VFS calls vfs_dq_sync to sync out disk quotas for a given
superblock.  This is a small wrapper around sync_dquots which for the
case of a non-NULL superblock is a small wrapper around quota_sync_sb.

Just make quota_sync_sb global (rename it to sync_quota_sb) and call it
directly.  Also call it directly for those cases in quota.c that have a
superblock and leave sync_dquots purely an iterator over sync_quota_sb and
remove it's superblock argument.

To make this nicer move the check for the lack of a quota_sync method
from the callers into sync_quota_sb.

[folded build fix from Alexander Beregalov <a.beregalov@gmail.com>]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/quota/quota.c         | 25 ++++++++++++++-----------
 fs/sync.c                |  2 +-
 include/linux/quotaops.h | 11 +++--------
 3 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b7f5a468f07..95c5b42384b 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd,
 	return error;
 }
 
-static void quota_sync_sb(struct super_block *sb, int type)
+#ifdef CONFIG_QUOTA
+void sync_quota_sb(struct super_block *sb, int type)
 {
 	int cnt;
 
+	if (!sb->s_qcop->quota_sync)
+		return;
+
 	sb->s_qcop->quota_sync(sb, type);
 
 	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
@@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	}
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 }
+#endif
 
-void sync_dquots(struct super_block *sb, int type)
+static void sync_dquots(int type)
 {
+	struct super_block *sb;
 	int cnt;
 
-	if (sb) {
-		if (sb->s_qcop->quota_sync)
-			quota_sync_sb(sb, type);
-		return;
-	}
-
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
@@ -222,8 +222,8 @@ restart:
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
-		if (sb->s_root && sb->s_qcop->quota_sync)
-			quota_sync_sb(sb, type);
+		if (sb->s_root)
+			sync_quota_sb(sb, type);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
@@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 			return sb->s_qcop->set_dqblk(sb, type, id, &idq);
 		}
 		case Q_SYNC:
-			sync_dquots(sb, type);
+			if (sb)
+				sync_quota_sb(sb, type);
+			else
+				sync_dquots(type);
 			return 0;
 
 		case Q_XQUOTAON:
diff --git a/fs/sync.c b/fs/sync.c
index 8aa870a4d40..d90ab776455 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,7 +27,7 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	vfs_dq_sync(sb);
+	sync_quota_sb(sb, -1);
 	sync_inodes_sb(sb, wait);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 36353d95c8d..047310fa22f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -20,7 +20,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 /*
  * declaration of quota_function calls in kernel.
  */
-void sync_dquots(struct super_block *sb, int type);
+void sync_quota_sb(struct super_block *sb, int type);
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
@@ -253,12 +253,7 @@ static inline void vfs_dq_free_inode(struct inode *inode)
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
-/* The following two functions cannot be called inside a transaction */
-static inline void vfs_dq_sync(struct super_block *sb)
-{
-	sync_dquots(sb, -1);
-}
-
+/* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	int ret = -ENOSYS;
@@ -334,7 +329,7 @@ static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void vfs_dq_sync(struct super_block *sb)
+static inline void sync_quota_sb(struct super_block *sb, int type)
 {
 }
 
-- 
cgit v1.2.3-70-g09d2


From c3f8a40c1cd5591b882497d1d00d43d0e5bb4698 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Apr 2009 16:43:55 +0200
Subject: quota: Introduce writeout_quota_sb() (version 4)

Introduce this function which just writes all the quota structures but
avoids all the syncing and cache pruning work to expose quota structures
to userspace. Use this function from __sync_filesystem when wait == 0.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c                | 6 +++++-
 include/linux/quotaops.h | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/sync.c b/fs/sync.c
index d90ab776455..4487b5560dc 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,7 +27,11 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	sync_quota_sb(sb, -1);
+	/* Avoid doing twice syncing and cache pruning for quota sync */
+	if (!wait)
+		writeout_quota_sb(sb, -1);
+	else
+		sync_quota_sb(sb, -1);
 	sync_inodes_sb(sb, wait);
 	lock_super(sb);
 	if (sb->s_dirt && sb->s_op->write_super)
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 047310fa22f..7bc45759368 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -21,6 +21,11 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
  * declaration of quota_function calls in kernel.
  */
 void sync_quota_sb(struct super_block *sb, int type);
+static inline void writeout_quota_sb(struct super_block *sb, int type)
+{
+	if (sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, type);
+}
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
@@ -333,6 +338,10 @@ static inline void sync_quota_sb(struct super_block *sb, int type)
 {
 }
 
+static inline void writeout_quota_sb(struct super_block *sb, int type)
+{
+}
+
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From f3da392e9ff14b9f388e74319e6d195848991c07 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 4 May 2009 03:32:03 +0400
Subject: dcache: extrace and use d_unlinked()

d_unlinked() will be used in middle-term to ban checkpointing when opened
but unlinked file is detected, and in long term, to detect such situation
and special case on it.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 7 +++----
 fs/namespace.c         | 8 ++++----
 include/linux/dcache.h | 5 +++++
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 75659a6fd1f..9e5cd3c3a6b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root,
 
 	spin_lock(&vfsmount_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, " (deleted)", 10) != 0))
 			goto Elong;
 
@@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 
 	spin_lock(&dcache_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, "//deleted", 9) != 0))
 			goto Elong;
 	if (buflen < 1)
@@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
-	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
-	if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
+	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		struct path tmp = root;
 		char * cwd;
diff --git a/fs/namespace.c b/fs/namespace.c
index 120b8a6b99e..7e537f0393b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1384,7 +1384,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 		goto out_unlock;
 
 	err = -ENOENT;
-	if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
+	if (!d_unlinked(path->dentry))
 		err = attach_recursive_mnt(mnt, path, NULL);
 out_unlock:
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
@@ -1566,7 +1566,7 @@ static int do_move_mount(struct path *path, char *old_name)
 	if (IS_DEADDIR(path->dentry->d_inode))
 		goto out1;
 
-	if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
+	if (d_unlinked(path->dentry))
 		goto out1;
 
 	err = -EINVAL;
@@ -2129,9 +2129,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	error = -ENOENT;
 	if (IS_DEADDIR(new.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
+	if (d_unlinked(new.dentry))
 		goto out2;
-	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
+	if (d_unlinked(old.dentry))
 		goto out2;
 	error = -EBUSY;
 	if (new.mnt == root.mnt ||
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 72ce2ae8859..30b93b2a01a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -353,6 +353,11 @@ static inline int d_unhashed(struct dentry *dentry)
 	return (dentry->d_flags & DCACHE_UNHASHED);
 }
 
+static inline int d_unlinked(struct dentry *dentry)
+{
+	return d_unhashed(dentry) && !IS_ROOT(dentry);
+}
+
 static inline struct dentry *dget_parent(struct dentry *dentry)
 {
 	struct dentry *ret;
-- 
cgit v1.2.3-70-g09d2


From 62c6943b4b1e818aea60c11c5a68a50785b83119 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 May 2009 03:12:29 -0400
Subject: Trim a bit of crap from fs.h

do_remount_sb() is fs/internal.h fodder, fsync_no_super() is long gone.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h      | 5 +++++
 include/linux/fs.h | 3 ---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/internal.h b/fs/internal.h
index dbec3cc2833..d55ef562f0b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -78,3 +78,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
+
+/*
+ * super.c
+ */
+extern int do_remount_sb(struct super_block *, int, void *, int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb1822bed7c..e7833ef5d1d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,7 +1942,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-extern int fsync_no_super(struct block_device *);
 #else
 static inline void bd_forget(struct inode *inode) {}
 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@ -2079,8 +2078,6 @@ extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
-extern int do_remount_sb(struct super_block *sb, int flags,
-			 void *data, int force);
 #ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
 #endif
-- 
cgit v1.2.3-70-g09d2


From 9fd5746fd3d7838bf6ff991d50f1257057d1156f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 21 May 2009 16:01:00 -0400
Subject: fs: Remove i_cindex from struct inode

The only user of the i_cindex element in the inode structure is used
is by the firewire drivers.  As part of an attempt to slim down the
inode structure to save memory --- since a typical Linux system will
have hundreds of thousands if not millions of inodes cached, a
reduction in the size inode has high leverage.

The firewire driver does not need i_cindex in any fast path, so it's
simple enough to calculate when it is needed, instead of wasting space
in the inode structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: krh@redhat.com
Cc: stefanr@s5r6.in-berlin.de
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/ieee1394/dv1394.c        |  5 +++--
 drivers/ieee1394/ieee1394_core.h |  6 +++++-
 fs/char_dev.c                    | 14 +++++++++++++-
 include/linux/cdev.h             |  2 ++
 include/linux/fs.h               |  1 -
 5 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c
index 823a6297a1a..2cd00b5b45b 100644
--- a/drivers/ieee1394/dv1394.c
+++ b/drivers/ieee1394/dv1394.c
@@ -1789,12 +1789,13 @@ static int dv1394_open(struct inode *inode, struct file *file)
 	} else {
 		/* look up the card by ID */
 		unsigned long flags;
+		int idx = ieee1394_file_to_instance(file);
 
 		spin_lock_irqsave(&dv1394_cards_lock, flags);
 		if (!list_empty(&dv1394_cards)) {
 			struct video_card *p;
 			list_for_each_entry(p, &dv1394_cards, list) {
-				if ((p->id) == ieee1394_file_to_instance(file)) {
+				if ((p->id) == idx) {
 					video = p;
 					break;
 				}
@@ -1803,7 +1804,7 @@ static int dv1394_open(struct inode *inode, struct file *file)
 		spin_unlock_irqrestore(&dv1394_cards_lock, flags);
 
 		if (!video) {
-			debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file));
+			debug_printk("dv1394: OHCI card %d not found", idx);
 			return -ENODEV;
 		}
 
diff --git a/drivers/ieee1394/ieee1394_core.h b/drivers/ieee1394/ieee1394_core.h
index 21d50f73a21..28b9f58bafd 100644
--- a/drivers/ieee1394/ieee1394_core.h
+++ b/drivers/ieee1394/ieee1394_core.h
@@ -5,6 +5,7 @@
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/cdev.h>
 #include <asm/atomic.h>
 
 #include "hosts.h"
@@ -155,7 +156,10 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size,
  */
 static inline unsigned char ieee1394_file_to_instance(struct file *file)
 {
-	return file->f_path.dentry->d_inode->i_cindex;
+	int idx = cdev_index(file->f_path.dentry->d_inode);
+	if (idx < 0)
+		idx = 0;
+	return idx;
 }
 
 extern int hpsb_disable_irm;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 38f71222a55..b7c9d5187a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 		p = inode->i_cdev;
 		if (!p) {
 			inode->i_cdev = p = new;
-			inode->i_cindex = idx;
 			list_add(&inode->i_devices, &p->list);
 			new = NULL;
 		} else if (!cdev_get(p))
@@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 	return ret;
 }
 
+int cdev_index(struct inode *inode)
+{
+	int idx;
+	struct kobject *kobj;
+
+	kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
+	if (!kobj)
+		return -1;
+	kobject_put(kobj);
+	return idx;
+}
+
 void cd_forget(struct inode *inode)
 {
 	spin_lock(&cdev_lock);
@@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init);
 EXPORT_SYMBOL(cdev_alloc);
 EXPORT_SYMBOL(cdev_del);
 EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_index);
 EXPORT_SYMBOL(register_chrdev);
 EXPORT_SYMBOL(unregister_chrdev);
 EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index fb4591977b0..f389e319a45 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -28,6 +28,8 @@ int cdev_add(struct cdev *, dev_t, unsigned);
 
 void cdev_del(struct cdev *);
 
+int cdev_index(struct inode *inode);
+
 void cd_forget(struct inode *);
 
 extern struct backing_dev_info directly_mappable_cdev_bdi;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e7833ef5d1d..bcd63706db8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -751,7 +751,6 @@ struct inode {
 		struct block_device	*i_bdev;
 		struct cdev		*i_cdev;
 	};
-	int			i_cindex;
 
 	__u32			i_generation;
 
-- 
cgit v1.2.3-70-g09d2


From 28ad0c118b0ed98b042d362acfe0017591921138 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 21 May 2009 16:01:02 -0400
Subject: fs: Rearrange inode structure elements to avoid waste due to padding

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bcd63706db8..d883aa1fc2e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,8 +729,8 @@ struct inode {
 	struct timespec		i_atime;
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
-	unsigned int		i_blkbits;
 	blkcnt_t		i_blocks;
+	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
-- 
cgit v1.2.3-70-g09d2


From 8688b8635266cf98f00c6b0350ea2dbe7c42c321 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 May 2009 05:45:04 -0400
Subject: linux/magic.h: move cramfs magic out of cramfs_fs.h

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/cramfs_fs.h | 3 +--
 include/linux/magic.h     | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h
index 3be4e5a27d8..6fc2bed368b 100644
--- a/include/linux/cramfs_fs.h
+++ b/include/linux/cramfs_fs.h
@@ -2,9 +2,8 @@
 #define __CRAMFS_H
 
 #include <linux/types.h>
+#include <linux/magic.h>
 
-#define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
-#define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
 #define CRAMFS_SIGNATURE	"Compressed ROMFS"
 
 /*
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 927138cf305..1923327b986 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -6,6 +6,8 @@
 #define AFS_SUPER_MAGIC                0x5346414F
 #define AUTOFS_SUPER_MAGIC	0x0187
 #define CODA_SUPER_MAGIC	0x73757245
+#define CRAMFS_MAGIC		0x28cd3d45	/* some random number */
+#define CRAMFS_MAGIC_WEND	0x453dcd28	/* magic number with the wrong endianess */
 #define DEBUGFS_MAGIC          0x64626720
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
-- 
cgit v1.2.3-70-g09d2


From d5aacad548db1ff547adf35d0a77eb2a8ed4fe14 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 14:56:44 -0400
Subject: New helper - simple_fsync()

writes associated buffers, then does sync_inode() to write
the inode itself (and to make it clean).  Depends on
->write_inode() honouring the second argument.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/libfs.c         | 25 +++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/fs/libfs.c b/fs/libfs.c
index 80046ddf506..ddfa89948c3 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -9,6 +9,8 @@
 #include <linux/vfs.h>
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
 
 #include <asm/uaccess.h>
 
@@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 
+int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0, /* metadata-only; caller takes care of data */
+	};
+	struct inode *inode = dentry->d_inode;
+	int err;
+	int ret;
+
+	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!(inode->i_state & I_DIRTY))
+		return ret;
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		return ret;
+
+	err = sync_inode(inode, &wbc);
+	if (ret == 0)
+		ret = err;
+	return ret;
+}
+EXPORT_SYMBOL(simple_fsync);
+
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d883aa1fc2e..ede84fa7da5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2345,6 +2345,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count);
 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 			loff_t *ppos, const void *from, size_t available);
 
+extern int simple_fsync(struct file *, struct dentry *, int);
+
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
 				struct page *, struct page *);
-- 
cgit v1.2.3-70-g09d2


From 79d25767583e4e086f8309bfd1f502660a64fe7f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 09:30:08 -0400
Subject: Sanitize qnx4 fsync handling

* have directory operations use mark_buffer_dirty_inode(),
  so that sync_mapping_buffers() would get those.
* make qnx4_write_inode() honour its last argument.
* get rid of insane copies of very ancient "walk the indirect blocks"
  in qnx4/fsync - they never matched the actual fs layout and, fortunately,
  never'd been called.  Again, all this junk is not needed; ->fsync()
  should just do sync_mapping_buffers + sync_inode (and if we implement
  block allocation for qnx4, we'll need to use mark_buffer_dirty_inode()
  for extent blocks)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/qnx4/Makefile        |   2 +-
 fs/qnx4/dir.c           |   2 +-
 fs/qnx4/file.c          |   2 +-
 fs/qnx4/fsync.c         | 169 ------------------------------------------------
 fs/qnx4/inode.c         |  38 ++++-------
 fs/qnx4/namei.c         |   4 +-
 include/linux/qnx4_fs.h |   2 -
 7 files changed, 17 insertions(+), 202 deletions(-)
 delete mode 100644 fs/qnx4/fsync.c

(limited to 'include')

diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile
index 502d7fe98ba..e4d408cc547 100644
--- a/fs/qnx4/Makefile
+++ b/fs/qnx4/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_QNX4FS_FS) += qnx4.o
 
-qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o
+qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index ea9ffefb48a..ff6c1ba6c4e 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -84,7 +84,7 @@ const struct file_operations qnx4_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 const struct inode_operations qnx4_dir_inode_operations =
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 867f42b0203..e7033ea10e2 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -29,7 +29,7 @@ const struct file_operations qnx4_file_operations =
 #ifdef CONFIG_QNX4FS_RW
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
-	.fsync		= qnx4_sync_file,
+	.fsync		= simple_fsync,
 #endif
 };
 
diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c
deleted file mode 100644
index aa3b19544be..00000000000
--- a/fs/qnx4/fsync.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/* 
- * QNX4 file system, Linux implementation.
- * 
- * Version : 0.1
- * 
- * Using parts of the xiafs filesystem.
- * 
- * History :
- * 
- * 24-03-1998 by Richard Frowijn : first release.
- */
-
-#include <linux/errno.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/smp_lock.h>
-#include <linux/buffer_head.h>
-
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-
-#include <asm/system.h>
-
-/*
- * The functions for qnx4 fs file synchronization.
- */
-
-#ifdef CONFIG_QNX4FS_RW
-
-static int sync_block(struct inode *inode, unsigned short *block, int wait)
-{
-	struct buffer_head *bh;
-	unsigned short tmp;
-
-	if (!*block)
-		return 0;
-	tmp = *block;
-	bh = sb_find_get_block(inode->i_sb, *block);
-	if (!bh)
-		return 0;
-	if (*block != tmp) {
-		brelse(bh);
-		return 1;
-	}
-	if (wait && buffer_req(bh) && !buffer_uptodate(bh)) {
-		brelse(bh);
-		return -1;
-	}
-	if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) {
-		brelse(bh);
-		return 0;
-	}
-	ll_rw_block(WRITE, 1, &bh);
-	atomic_dec(&bh->b_count);
-	return 0;
-}
-
-#ifdef WTF
-static int sync_iblock(struct inode *inode, unsigned short *iblock,
-		       struct buffer_head **bh, int wait)
-{
-	int rc;
-	unsigned short tmp;
-
-	*bh = NULL;
-	tmp = *iblock;
-	if (!tmp)
-		return 0;
-	rc = sync_block(inode, iblock, wait);
-	if (rc)
-		return rc;
-	*bh = sb_bread(inode->i_sb, tmp);
-	if (tmp != *iblock) {
-		brelse(*bh);
-		*bh = NULL;
-		return 1;
-	}
-	if (!*bh)
-		return -1;
-	return 0;
-}
-#endif
-
-static int sync_direct(struct inode *inode, int wait)
-{
-	int i;
-	int rc, err = 0;
-
-	for (i = 0; i < 7; i++) {
-		rc = sync_block(inode,
-				(unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	return err;
-}
-
-#ifdef WTF
-static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait)
-{
-	int i;
-	struct buffer_head *ind_bh;
-	int rc, err = 0;
-
-	rc = sync_iblock(inode, iblock, &ind_bh, wait);
-	if (rc || !ind_bh)
-		return rc;
-
-	for (i = 0; i < 512; i++) {
-		rc = sync_block(inode,
-				((unsigned short *) ind_bh->b_data) + i,
-				wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	brelse(ind_bh);
-	return err;
-}
-
-static int sync_dindirect(struct inode *inode, unsigned short *diblock,
-			  int wait)
-{
-	int i;
-	struct buffer_head *dind_bh;
-	int rc, err = 0;
-
-	rc = sync_iblock(inode, diblock, &dind_bh, wait);
-	if (rc || !dind_bh)
-		return rc;
-
-	for (i = 0; i < 512; i++) {
-		rc = sync_indirect(inode,
-				((unsigned short *) dind_bh->b_data) + i,
-				   wait);
-		if (rc > 0)
-			break;
-		if (rc)
-			err = rc;
-	}
-	brelse(dind_bh);
-	return err;
-}
-#endif
-
-int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused)
-{
-        struct inode *inode = dentry->d_inode;
-	int wait, err = 0;
-        
-        (void) file;
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	      S_ISLNK(inode->i_mode)))
-		return -EINVAL;
-
-	lock_kernel();
-	for (wait = 0; wait <= 1; wait++) {
-		err |= sync_direct(inode, wait);
-	}
-	err |= qnx4_sync_inode(inode);
-	unlock_kernel();
-	return (err < 0) ? -EIO : 0;
-}
-
-#endif
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 95c12fc613f..40712867b8a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -24,6 +24,7 @@
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/vfs.h>
 #include <asm/uaccess.h>
 
@@ -34,31 +35,6 @@ static const struct super_operations qnx4_sops;
 
 #ifdef CONFIG_QNX4FS_RW
 
-int qnx4_sync_inode(struct inode *inode)
-{
-	int err = 0;
-# if 0
-	struct buffer_head *bh;
-
-   	bh = qnx4_update_inode(inode);
-	if (bh && buffer_dirty(bh))
-	{
-		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh))
-		{
-			printk ("IO error syncing qnx4 inode [%s:%08lx]\n",
-				inode->i_sb->s_id, inode->i_ino);
-			err = -1;
-		}
-	        brelse (bh);
-	} else if (!bh) {
-		err = -1;
-	}
-# endif
-
-	return err;
-}
-
 static void qnx4_delete_inode(struct inode *inode)
 {
 	QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
@@ -70,7 +46,7 @@ static void qnx4_delete_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static int qnx4_write_inode(struct inode *inode, int unused)
+static int qnx4_write_inode(struct inode *inode, int do_sync)
 {
 	struct qnx4_inode_entry *raw_inode;
 	int block, ino;
@@ -107,6 +83,16 @@ static int qnx4_write_inode(struct inode *inode, int unused)
 	raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
 	raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks);
 	mark_buffer_dirty(bh);
+	if (do_sync) {
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
+			printk("qnx4: IO error syncing inode [%s:%08x]\n",
+					inode->i_sb->s_id, ino);
+			brelse(bh);
+			unlock_kernel();
+			return -EIO;
+		}
+	}
 	brelse(bh);
 	unlock_kernel();
 	return 0;
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 775eed3a408..123270c5376 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -187,7 +187,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry)
 	de->di_status = 0;
 	memset(de->di_fname, 0, sizeof de->di_fname);
 	de->di_mode = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	clear_nlink(inode);
 	mark_inode_dirty(inode);
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -232,7 +232,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry)
 	de->di_status = 0;
 	memset(de->di_fname, 0, sizeof de->di_fname);
 	de->di_mode = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 	inode->i_ctime = dir->i_ctime;
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 787d19ea9f4..acbaec3524e 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -126,8 +126,6 @@ extern void qnx4_truncate(struct inode *inode);
 extern void qnx4_free_inode(struct inode *inode);
 extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
 extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
-extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int);
-extern int qnx4_sync_inode(struct inode *inode);
 
 static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
 {
-- 
cgit v1.2.3-70-g09d2


From 964f5369667b342994fe3f384e9ba41d404ee796 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 7 Jun 2009 09:47:13 -0400
Subject: fs/qnx4: sanitize includes

fs-internal parts of qnx4_fs.h taken to fs/qnx4/qnx4.h, includes adjusted,
qnx4_fs.h doesn't need unifdef anymore.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/qnx4/bitmap.c        |  7 +-----
 fs/qnx4/dir.c           |  7 +-----
 fs/qnx4/file.c          |  3 +--
 fs/qnx4/inode.c         | 11 +++------
 fs/qnx4/namei.c         |  9 +-------
 fs/qnx4/qnx4.h          | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/qnx4/truncate.c      |  6 +----
 include/linux/Kbuild    |  2 +-
 include/linux/qnx4_fs.h | 59 -------------------------------------------------
 9 files changed, 66 insertions(+), 95 deletions(-)
 create mode 100644 fs/qnx4/qnx4.h

(limited to 'include')

diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 8425cf6e962..e1cd061a25f 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -13,14 +13,9 @@
  * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) .
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/bitops.h>
+#include "qnx4.h"
 
 #if 0
 int qnx4_new_block(struct super_block *sb)
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index ff6c1ba6c4e..003c68f3238 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -11,14 +11,9 @@
  * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support.
  */
 
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
-
+#include "qnx4.h"
 
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index e7033ea10e2..09b170ac936 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -12,8 +12,7 @@
  * 27-06-1998 by Frank Denis : file overwriting.
  */
 
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
+#include "qnx4.h"
 
 /*
  * We have mostly NULL's here: the current defaults are ok for
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 40712867b8a..681df5fcd16 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -13,20 +13,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/highuid.h>
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
-#include <linux/vfs.h>
-#include <asm/uaccess.h>
+#include <linux/statfs.h>
+#include "qnx4.h"
 
 #define QNX4_VERSION  4
 #define QNX4_BMNAME   ".bitmap"
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 123270c5376..5972ed21493 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -12,16 +12,9 @@
  * 04-07-1998 by Frank Denis : first step for rmdir/unlink.
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/errno.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include "qnx4.h"
 
 
 /*
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
new file mode 100644
index 00000000000..9efc089454f
--- /dev/null
+++ b/fs/qnx4/qnx4.h
@@ -0,0 +1,57 @@
+#include <linux/fs.h>
+#include <linux/qnx4_fs.h>
+
+#define QNX4_DEBUG 0
+
+#if QNX4_DEBUG
+#define QNX4DEBUG(X) printk X
+#else
+#define QNX4DEBUG(X) (void) 0
+#endif
+
+struct qnx4_sb_info {
+	struct buffer_head	*sb_buf;	/* superblock buffer */
+	struct qnx4_super_block	*sb;		/* our superblock */
+	unsigned int		Version;	/* may be useful */
+	struct qnx4_inode_entry	*BitMap;	/* useful */
+};
+
+struct qnx4_inode_info {
+	struct qnx4_inode_entry raw;
+	loff_t mmu_private;
+	struct inode vfs_inode;
+};
+
+extern struct inode *qnx4_iget(struct super_block *, unsigned long);
+extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
+extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
+extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
+
+extern struct buffer_head *qnx4_bread(struct inode *, int, int);
+
+extern const struct inode_operations qnx4_file_inode_operations;
+extern const struct inode_operations qnx4_dir_inode_operations;
+extern const struct file_operations qnx4_file_operations;
+extern const struct file_operations qnx4_dir_operations;
+extern int qnx4_is_free(struct super_block *sb, long block);
+extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
+extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
+extern void qnx4_truncate(struct inode *inode);
+extern void qnx4_free_inode(struct inode *inode);
+extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
+extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
+
+static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
+{
+	return container_of(inode, struct qnx4_inode_info, vfs_inode);
+}
+
+static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
+{
+	return &qnx4_i(inode)->raw;
+}
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
index 6437c1c3d1d..d94d9ee241f 100644
--- a/fs/qnx4/truncate.c
+++ b/fs/qnx4/truncate.c
@@ -10,12 +10,8 @@
  * 30-06-1998 by Frank DENIS : ugly filler.
  */
 
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
 #include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include "qnx4.h"
 
 #ifdef CONFIG_QNX4FS_RW
 
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 3f0eaa397ef..b3afd2219ad 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -135,6 +135,7 @@ header-y += posix_types.h
 header-y += ppdev.h
 header-y += prctl.h
 header-y += qnxtypes.h
+header-y += qnx4_fs.h
 header-y += radeonfb.h
 header-y += raw.h
 header-y += resource.h
@@ -308,7 +309,6 @@ unifdef-y += poll.h
 unifdef-y += ppp_defs.h
 unifdef-y += ppp-comp.h
 unifdef-y += ptrace.h
-unifdef-y += qnx4_fs.h
 unifdef-y += quota.h
 unifdef-y += random.h
 unifdef-y += irqnr.h
diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index acbaec3524e..8b9aee1a9ce 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -85,63 +85,4 @@ struct qnx4_super_block {
 	struct qnx4_inode_entry AltBoot;
 };
 
-#ifdef __KERNEL__
-
-#define QNX4_DEBUG 0
-
-#if QNX4_DEBUG
-#define QNX4DEBUG(X) printk X
-#else
-#define QNX4DEBUG(X) (void) 0
-#endif
-
-struct qnx4_sb_info {
-	struct buffer_head	*sb_buf;	/* superblock buffer */
-	struct qnx4_super_block	*sb;		/* our superblock */
-	unsigned int		Version;	/* may be useful */
-	struct qnx4_inode_entry	*BitMap;	/* useful */
-};
-
-struct qnx4_inode_info {
-	struct qnx4_inode_entry raw;
-	loff_t mmu_private;
-	struct inode vfs_inode;
-};
-
-extern struct inode *qnx4_iget(struct super_block *, unsigned long);
-extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
-extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
-extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
-
-extern struct buffer_head *qnx4_bread(struct inode *, int, int);
-
-extern const struct inode_operations qnx4_file_inode_operations;
-extern const struct inode_operations qnx4_dir_inode_operations;
-extern const struct file_operations qnx4_file_operations;
-extern const struct file_operations qnx4_dir_operations;
-extern int qnx4_is_free(struct super_block *sb, long block);
-extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
-extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
-extern void qnx4_truncate(struct inode *inode);
-extern void qnx4_free_inode(struct inode *inode);
-extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
-extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
-
-static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
-{
-	return sb->s_fs_info;
-}
-
-static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
-{
-	return container_of(inode, struct qnx4_inode_info, vfs_inode);
-}
-
-static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
-{
-	return &qnx4_i(inode)->raw;
-}
-
-#endif				/* __KERNEL__ */
-
 #endif
-- 
cgit v1.2.3-70-g09d2


From fbe0efb869efde8d847ede3a925230ef88910086 Mon Sep 17 00:00:00 2001
From: Kristian Høgsberg <krh@redhat.com>
Date: Tue, 9 Jun 2009 01:50:41 +1000
Subject: drm_calloc_large: check right size, check integer overflow, use
 GFP_ZERO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously we would check size instead of size * nmemb, and so would
never hit the vmalloc path.  Also add integer overflow check as in kcalloc,
and allocate GFP_ZERO pages instead of memset()ing them.

Signed-off-by: Kristian Høgsberg <krh@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drmP.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index afc21685230..1cc51a0812f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1573,18 +1573,14 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area)
 
 static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
 {
-	u8 *addr;
-
-	if (size <= PAGE_SIZE)
+	if (size * nmemb <= PAGE_SIZE)
 	    return kcalloc(nmemb, size, GFP_KERNEL);
 
-	addr = vmalloc(nmemb * size);
-	if (!addr)
+	if (size != 0 && nmemb > ULONG_MAX / size)
 		return NULL;
 
-	memset(addr, 0, nmemb * size);
-
-	return addr;
+	return __vmalloc(size * nmemb,
+			 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
 }
 
 static __inline void drm_free_large(void *ptr)
-- 
cgit v1.2.3-70-g09d2


From 2a71ebcd85bcc4d6607f577f23a491f796c30e82 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 12 Jun 2009 15:53:10 +1000
Subject: drm/radeon: add rv740 drm support.

This adds drm support for the RV740 family of chips to the r600 support code.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_cp.c    | 38 ++++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/radeon/radeon_drv.h |  1 +
 include/drm/drm_pciids.h            |  5 +++++
 3 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index bc9d09dfa8e..aa4eee4b7f3 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -489,15 +489,16 @@ static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
 			RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]);
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
 
-	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730)) {
+	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730) ||
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)) {
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
-		DRM_INFO("Loading RV730 PFP Microcode\n");
+		DRM_INFO("Loading RV730/RV740 PFP Microcode\n");
 		for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV730_pfp_microcode[i]);
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
-		DRM_INFO("Loading RV730 CP Microcode\n");
+		DRM_INFO("Loading RV730/RV740 CP Microcode\n");
 		for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_ME_RAM_DATA, RV730_cp_microcode[i]);
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
@@ -1324,6 +1325,10 @@ static void r700_gfx_init(struct drm_device *dev,
 		dev_priv->r700_sc_prim_fifo_size = 0xf9;
 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
+		if (dev_priv->r600_sx_max_export_pos_size > 16) {
+			dev_priv->r600_sx_max_export_pos_size -= 16;
+			dev_priv->r600_sx_max_export_smx_size += 16;
+		}
 		break;
 	case CHIP_RV710:
 		dev_priv->r600_max_pipes = 2;
@@ -1345,6 +1350,31 @@ static void r700_gfx_init(struct drm_device *dev,
 		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
 		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
 		break;
+	case CHIP_RV740:
+		dev_priv->r600_max_pipes = 4;
+		dev_priv->r600_max_tile_pipes = 4;
+		dev_priv->r600_max_simds = 8;
+		dev_priv->r600_max_backends = 4;
+		dev_priv->r600_max_gprs = 256;
+		dev_priv->r600_max_threads = 248;
+		dev_priv->r600_max_stack_entries = 512;
+		dev_priv->r600_max_hw_contexts = 8;
+		dev_priv->r600_max_gs_threads = 16 * 2;
+		dev_priv->r600_sx_max_export_size = 256;
+		dev_priv->r600_sx_max_export_pos_size = 32;
+		dev_priv->r600_sx_max_export_smx_size = 224;
+		dev_priv->r600_sq_num_cf_insts = 2;
+
+		dev_priv->r700_sx_num_of_sets = 7;
+		dev_priv->r700_sc_prim_fifo_size = 0x100;
+		dev_priv->r700_sc_hiz_tile_fifo_size = 0x30;
+		dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130;
+
+		if (dev_priv->r600_sx_max_export_pos_size > 16) {
+			dev_priv->r600_sx_max_export_pos_size -= 16;
+			dev_priv->r600_sx_max_export_smx_size += 16;
+		}
+		break;
 	default:
 		break;
 	}
@@ -1493,6 +1523,7 @@ static void r700_gfx_init(struct drm_device *dev,
 		break;
 	case CHIP_RV730:
 	case CHIP_RV710:
+	case CHIP_RV740:
 	default:
 		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
 		break;
@@ -1569,6 +1600,7 @@ static void r700_gfx_init(struct drm_device *dev,
 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 	case CHIP_RV770:
 	case CHIP_RV730:
+	case CHIP_RV740:
 		gs_prim_buffer_depth = 384;
 		break;
 	case CHIP_RV710:
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 8071d965f14..e266e5f8dc2 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -146,6 +146,7 @@ enum radeon_family {
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
+	CHIP_RV740,
 	CHIP_LAST,
 };
 
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 8b4c80cdd27..c7a1a8dc5ea 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -254,6 +254,11 @@
 	{0x1002, 0x940A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x940B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3-70-g09d2


From 715cbb05c935e8a4306a730d14a72d5af881523e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 12 Jun 2009 15:55:44 +1000
Subject: drm/radeon: add support for RV790.

This adds the PCI IDs for the rv790 which are equiv to the rv770.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_cp.c | 4 ++--
 include/drm/drm_pciids.h         | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index aa4eee4b7f3..146f3570af8 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -478,13 +478,13 @@ static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv)
 
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)) {
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
-		DRM_INFO("Loading RV770 PFP Microcode\n");
+		DRM_INFO("Loading RV770/RV790 PFP Microcode\n");
 		for (i = 0; i < R700_PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV770_pfp_microcode[i]);
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
 
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
-		DRM_INFO("Loading RV770 CP Microcode\n");
+		DRM_INFO("Loading RV770/RV790 CP Microcode\n");
 		for (i = 0; i < R700_PM4_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]);
 		RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0);
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index c7a1a8dc5ea..f8634ab53b8 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -273,6 +273,8 @@
 	{0x1002, 0x9456, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x947A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3-70-g09d2


From 249d6048ca98b5452105b0824abac1275661b8e3 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <glisse@freedesktop.org>
Date: Wed, 8 Apr 2009 17:11:16 +0200
Subject: drm: Split out the mm declarations in a separate header. Add atomic
 operations.

this is a TTM preparation patch, it rearranges the mm and
add operations needed to do mm operations in atomic context.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c | 165 +++++++++++++++++++++++++++++++++++++++--------
 include/drm/drmP.h       |  37 +----------
 include/drm/drm_mm.h     |  90 ++++++++++++++++++++++++++
 3 files changed, 228 insertions(+), 64 deletions(-)
 create mode 100644 include/drm/drm_mm.h

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 367c590ffbb..7819fd930a5 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -42,8 +42,11 @@
  */
 
 #include "drmP.h"
+#include "drm_mm.h"
 #include <linux/slab.h>
 
+#define MM_UNUSED_TARGET 4
+
 unsigned long drm_mm_tail_space(struct drm_mm *mm)
 {
 	struct list_head *tail_node;
@@ -74,16 +77,62 @@ int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size)
 	return 0;
 }
 
+static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic)
+{
+	struct drm_mm_node *child;
+
+	if (atomic)
+		child = kmalloc(sizeof(*child), GFP_ATOMIC);
+	else
+		child = kmalloc(sizeof(*child), GFP_KERNEL);
+
+	if (unlikely(child == NULL)) {
+		spin_lock(&mm->unused_lock);
+		if (list_empty(&mm->unused_nodes))
+			child = NULL;
+		else {
+			child =
+			    list_entry(mm->unused_nodes.next,
+				       struct drm_mm_node, fl_entry);
+			list_del(&child->fl_entry);
+			--mm->num_unused;
+		}
+		spin_unlock(&mm->unused_lock);
+	}
+	return child;
+}
+
+int drm_mm_pre_get(struct drm_mm *mm)
+{
+	struct drm_mm_node *node;
+
+	spin_lock(&mm->unused_lock);
+	while (mm->num_unused < MM_UNUSED_TARGET) {
+		spin_unlock(&mm->unused_lock);
+		node = kmalloc(sizeof(*node), GFP_KERNEL);
+		spin_lock(&mm->unused_lock);
+
+		if (unlikely(node == NULL)) {
+			int ret = (mm->num_unused < 2) ? -ENOMEM : 0;
+			spin_unlock(&mm->unused_lock);
+			return ret;
+		}
+		++mm->num_unused;
+		list_add_tail(&node->fl_entry, &mm->unused_nodes);
+	}
+	spin_unlock(&mm->unused_lock);
+	return 0;
+}
+EXPORT_SYMBOL(drm_mm_pre_get);
 
 static int drm_mm_create_tail_node(struct drm_mm *mm,
-			    unsigned long start,
-			    unsigned long size)
+				   unsigned long start,
+				   unsigned long size, int atomic)
 {
 	struct drm_mm_node *child;
 
-	child = (struct drm_mm_node *)
-		drm_alloc(sizeof(*child), DRM_MEM_MM);
-	if (!child)
+	child = drm_mm_kmalloc(mm, atomic);
+	if (unlikely(child == NULL))
 		return -ENOMEM;
 
 	child->free = 1;
@@ -97,8 +146,7 @@ static int drm_mm_create_tail_node(struct drm_mm *mm,
 	return 0;
 }
 
-
-int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size)
+int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size, int atomic)
 {
 	struct list_head *tail_node;
 	struct drm_mm_node *entry;
@@ -106,20 +154,21 @@ int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size)
 	tail_node = mm->ml_entry.prev;
 	entry = list_entry(tail_node, struct drm_mm_node, ml_entry);
 	if (!entry->free) {
-		return drm_mm_create_tail_node(mm, entry->start + entry->size, size);
+		return drm_mm_create_tail_node(mm, entry->start + entry->size,
+					       size, atomic);
 	}
 	entry->size += size;
 	return 0;
 }
 
 static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent,
-					    unsigned long size)
+						 unsigned long size,
+						 int atomic)
 {
 	struct drm_mm_node *child;
 
-	child = (struct drm_mm_node *)
-		drm_alloc(sizeof(*child), DRM_MEM_MM);
-	if (!child)
+	child = drm_mm_kmalloc(parent->mm, atomic);
+	if (unlikely(child == NULL))
 		return NULL;
 
 	INIT_LIST_HEAD(&child->fl_entry);
@@ -151,8 +200,9 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
 		tmp = parent->start % alignment;
 
 	if (tmp) {
-		align_splitoff = drm_mm_split_at_start(parent, alignment - tmp);
-		if (!align_splitoff)
+		align_splitoff =
+		    drm_mm_split_at_start(parent, alignment - tmp, 0);
+		if (unlikely(align_splitoff == NULL))
 			return NULL;
 	}
 
@@ -161,7 +211,7 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
 		parent->free = 0;
 		return parent;
 	} else {
-		child = drm_mm_split_at_start(parent, size);
+		child = drm_mm_split_at_start(parent, size, 0);
 	}
 
 	if (align_splitoff)
@@ -169,14 +219,49 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
 
 	return child;
 }
+
 EXPORT_SYMBOL(drm_mm_get_block);
 
+struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
+					    unsigned long size,
+					    unsigned alignment)
+{
+
+	struct drm_mm_node *align_splitoff = NULL;
+	struct drm_mm_node *child;
+	unsigned tmp = 0;
+
+	if (alignment)
+		tmp = parent->start % alignment;
+
+	if (tmp) {
+		align_splitoff =
+		    drm_mm_split_at_start(parent, alignment - tmp, 1);
+		if (unlikely(align_splitoff == NULL))
+			return NULL;
+	}
+
+	if (parent->size == size) {
+		list_del_init(&parent->fl_entry);
+		parent->free = 0;
+		return parent;
+	} else {
+		child = drm_mm_split_at_start(parent, size, 1);
+	}
+
+	if (align_splitoff)
+		drm_mm_put_block(align_splitoff);
+
+	return child;
+}
+EXPORT_SYMBOL(drm_mm_get_block_atomic);
+
 /*
  * Put a block. Merge with the previous and / or next block if they are free.
  * Otherwise add to the free stack.
  */
 
-void drm_mm_put_block(struct drm_mm_node * cur)
+void drm_mm_put_block(struct drm_mm_node *cur)
 {
 
 	struct drm_mm *mm = cur->mm;
@@ -188,21 +273,27 @@ void drm_mm_put_block(struct drm_mm_node * cur)
 	int merged = 0;
 
 	if (cur_head->prev != root_head) {
-		prev_node = list_entry(cur_head->prev, struct drm_mm_node, ml_entry);
+		prev_node =
+		    list_entry(cur_head->prev, struct drm_mm_node, ml_entry);
 		if (prev_node->free) {
 			prev_node->size += cur->size;
 			merged = 1;
 		}
 	}
 	if (cur_head->next != root_head) {
-		next_node = list_entry(cur_head->next, struct drm_mm_node, ml_entry);
+		next_node =
+		    list_entry(cur_head->next, struct drm_mm_node, ml_entry);
 		if (next_node->free) {
 			if (merged) {
 				prev_node->size += next_node->size;
 				list_del(&next_node->ml_entry);
 				list_del(&next_node->fl_entry);
-				drm_free(next_node, sizeof(*next_node),
-					 DRM_MEM_MM);
+				if (mm->num_unused < MM_UNUSED_TARGET) {
+					list_add(&next_node->fl_entry,
+						 &mm->unused_nodes);
+					++mm->num_unused;
+				} else
+					kfree(next_node);
 			} else {
 				next_node->size += cur->size;
 				next_node->start = cur->start;
@@ -215,14 +306,19 @@ void drm_mm_put_block(struct drm_mm_node * cur)
 		list_add(&cur->fl_entry, &mm->fl_entry);
 	} else {
 		list_del(&cur->ml_entry);
-		drm_free(cur, sizeof(*cur), DRM_MEM_MM);
+		if (mm->num_unused < MM_UNUSED_TARGET) {
+			list_add(&cur->fl_entry, &mm->unused_nodes);
+			++mm->num_unused;
+		} else
+			kfree(cur);
 	}
 }
+
 EXPORT_SYMBOL(drm_mm_put_block);
 
-struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,
-				  unsigned long size,
-				  unsigned alignment, int best_match)
+struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
+				       unsigned long size,
+				       unsigned alignment, int best_match)
 {
 	struct list_head *list;
 	const struct list_head *free_stack = &mm->fl_entry;
@@ -247,7 +343,6 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,
 				wasted += alignment - tmp;
 		}
 
-
 		if (entry->size >= size + wasted) {
 			if (!best_match)
 				return entry;
@@ -260,6 +355,7 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,
 
 	return best;
 }
+EXPORT_SYMBOL(drm_mm_search_free);
 
 int drm_mm_clean(struct drm_mm * mm)
 {
@@ -267,14 +363,17 @@ int drm_mm_clean(struct drm_mm * mm)
 
 	return (head->next->next == head);
 }
-EXPORT_SYMBOL(drm_mm_search_free);
+EXPORT_SYMBOL(drm_mm_clean);
 
 int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 {
 	INIT_LIST_HEAD(&mm->ml_entry);
 	INIT_LIST_HEAD(&mm->fl_entry);
+	INIT_LIST_HEAD(&mm->unused_nodes);
+	mm->num_unused = 0;
+	spin_lock_init(&mm->unused_lock);
 
-	return drm_mm_create_tail_node(mm, start, size);
+	return drm_mm_create_tail_node(mm, start, size, 0);
 }
 EXPORT_SYMBOL(drm_mm_init);
 
@@ -282,6 +381,7 @@ void drm_mm_takedown(struct drm_mm * mm)
 {
 	struct list_head *bnode = mm->fl_entry.next;
 	struct drm_mm_node *entry;
+	struct drm_mm_node *next;
 
 	entry = list_entry(bnode, struct drm_mm_node, fl_entry);
 
@@ -293,7 +393,16 @@ void drm_mm_takedown(struct drm_mm * mm)
 
 	list_del(&entry->fl_entry);
 	list_del(&entry->ml_entry);
+	kfree(entry);
+
+	spin_lock(&mm->unused_lock);
+	list_for_each_entry_safe(entry, next, &mm->unused_nodes, fl_entry) {
+		list_del(&entry->fl_entry);
+		kfree(entry);
+		--mm->num_unused;
+	}
+	spin_unlock(&mm->unused_lock);
 
-	drm_free(entry, sizeof(*entry), DRM_MEM_MM);
+	BUG_ON(mm->num_unused != 0);
 }
 EXPORT_SYMBOL(drm_mm_takedown);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 1cc51a0812f..d4ddc22e46b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -86,6 +86,7 @@ struct drm_device;
 
 #include "drm_os_linux.h"
 #include "drm_hashtab.h"
+#include "drm_mm.h"
 
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
@@ -553,26 +554,6 @@ struct drm_sigdata {
 };
 
 
-/*
- * Generic memory manager structs
- */
-
-struct drm_mm_node {
-	struct list_head fl_entry;
-	struct list_head ml_entry;
-	int free;
-	unsigned long start;
-	unsigned long size;
-	struct drm_mm *mm;
-	void *private;
-};
-
-struct drm_mm {
-	struct list_head fl_entry;
-	struct list_head ml_entry;
-};
-
-
 /**
  * Kernel side of a mapping
  */
@@ -1436,22 +1417,6 @@ extern char *drm_get_connector_status_name(enum drm_connector_status status);
 extern int drm_sysfs_connector_add(struct drm_connector *connector);
 extern void drm_sysfs_connector_remove(struct drm_connector *connector);
 
-/*
- * Basic memory manager support (drm_mm.c)
- */
-extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,
-				       unsigned long size,
-				       unsigned alignment);
-extern void drm_mm_put_block(struct drm_mm_node * cur);
-extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, unsigned long size,
-					 unsigned alignment, int best_match);
-extern int drm_mm_init(struct drm_mm *mm, unsigned long start, unsigned long size);
-extern void drm_mm_takedown(struct drm_mm *mm);
-extern int drm_mm_clean(struct drm_mm *mm);
-extern unsigned long drm_mm_tail_space(struct drm_mm *mm);
-extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size);
-extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
-
 /* Graphics Execution Manager library functions (drm_gem.c) */
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
new file mode 100644
index 00000000000..5662f4278ef
--- /dev/null
+++ b/include/drm/drm_mm.h
@@ -0,0 +1,90 @@
+/**************************************************************************
+ *
+ * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX. USA.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ */
+
+#ifndef _DRM_MM_H_
+#define _DRM_MM_H_
+
+/*
+ * Generic range manager structs
+ */
+#include <linux/list.h>
+
+struct drm_mm_node {
+	struct list_head fl_entry;
+	struct list_head ml_entry;
+	int free;
+	unsigned long start;
+	unsigned long size;
+	struct drm_mm *mm;
+	void *private;
+};
+
+struct drm_mm {
+	struct list_head fl_entry;
+	struct list_head ml_entry;
+	struct list_head unused_nodes;
+	int num_unused;
+	spinlock_t unused_lock;
+};
+
+/*
+ * Basic range manager support (drm_mm.c)
+ */
+
+extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent,
+					    unsigned long size,
+					    unsigned alignment);
+extern struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
+						   unsigned long size,
+						   unsigned alignment);
+extern void drm_mm_put_block(struct drm_mm_node *cur);
+extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
+					      unsigned long size,
+					      unsigned alignment,
+					      int best_match);
+extern int drm_mm_init(struct drm_mm *mm, unsigned long start,
+		       unsigned long size);
+extern void drm_mm_takedown(struct drm_mm *mm);
+extern int drm_mm_clean(struct drm_mm *mm);
+extern unsigned long drm_mm_tail_space(struct drm_mm *mm);
+extern int drm_mm_remove_space_from_tail(struct drm_mm *mm,
+					 unsigned long size);
+extern int drm_mm_add_space_to_tail(struct drm_mm *mm,
+				    unsigned long size, int atomic);
+extern int drm_mm_pre_get(struct drm_mm *mm);
+
+static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
+{
+	return block->mm;
+}
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 3c24475c1e4e8d10e50df161d8c4f1d382997a7c Mon Sep 17 00:00:00 2001
From: Jerome Glisse <glisse@freedesktop.org>
Date: Wed, 8 Apr 2009 18:34:28 +0200
Subject: drm: include kernel list header file in hashtab header

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_hashtab.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_hashtab.h b/include/drm/drm_hashtab.h
index cd2b189e1be..0af087a4d3b 100644
--- a/include/drm/drm_hashtab.h
+++ b/include/drm/drm_hashtab.h
@@ -35,6 +35,8 @@
 #ifndef DRM_HASHTAB_H
 #define DRM_HASHTAB_H
 
+#include <linux/list.h>
+
 #define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member)
 
 struct drm_hash_item {
-- 
cgit v1.2.3-70-g09d2


From ca371c0d7e23d0d0afae65fc83a0e91cf7399573 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 12 Jun 2009 10:33:53 +0300
Subject: memcg: fix page_cgroup fatal error in FLATMEM

Now, SLAB is configured in very early stage and it can be used in
init routine now.

But replacing alloc_bootmem() in FLAT/DISCONTIGMEM's page_cgroup()
initialization breaks the allocation, now.
(Works well in SPARSEMEM case...it supports MEMORY_HOTPLUG and
 size of page_cgroup is in reasonable size (< 1 << MAX_ORDER.)

This patch revive FLATMEM+memory cgroup by using alloc_bootmem.

In future,
We stop to support FLATMEM (if no users) or rewrite codes for flatmem
completely.But this will adds more messy codes and overheads.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/page_cgroup.h | 18 +++++++++++++++++-
 init/main.c                 |  5 +++++
 mm/page_cgroup.c            | 29 ++++++++++-------------------
 3 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 7339c7bf733..13f126c89ae 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -18,7 +18,19 @@ struct page_cgroup {
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
-void __init page_cgroup_init(void);
+
+#ifdef CONFIG_SPARSEMEM
+static inline void __init page_cgroup_init_flatmem(void)
+{
+}
+extern void __init page_cgroup_init(void);
+#else
+void __init page_cgroup_init_flatmem(void);
+static inline void __init page_cgroup_init(void)
+{
+}
+#endif
+
 struct page_cgroup *lookup_page_cgroup(struct page *page);
 
 enum {
@@ -87,6 +99,10 @@ static inline void page_cgroup_init(void)
 {
 }
 
+static inline void __init page_cgroup_init_flatmem(void)
+{
+}
+
 #endif
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
diff --git a/init/main.c b/init/main.c
index 5616661eac0..b3e8f14c568 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,6 +539,11 @@ void __init __weak thread_info_cache_init(void)
  */
 static void __init mm_init(void)
 {
+	/*
+	 * page_cgroup requires countinous pages as memmap
+	 * and it's bigger than MAX_ORDER unless SPARSEMEM.
+	 */
+	page_cgroup_init_flatmem();
 	mem_init();
 	kmem_cache_init();
 	vmalloc_init();
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3dd4a909a1d..11a8a10a390 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -47,8 +47,6 @@ static int __init alloc_node_page_cgroup(int nid)
 	struct page_cgroup *base, *pc;
 	unsigned long table_size;
 	unsigned long start_pfn, nr_pages, index;
-	struct page *page;
-	unsigned int order;
 
 	start_pfn = NODE_DATA(nid)->node_start_pfn;
 	nr_pages = NODE_DATA(nid)->node_spanned_pages;
@@ -57,13 +55,11 @@ static int __init alloc_node_page_cgroup(int nid)
 		return 0;
 
 	table_size = sizeof(struct page_cgroup) * nr_pages;
-	order = get_order(table_size);
-	page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order);
-	if (!page)
-		page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order);
-	if (!page)
+
+	base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!base)
 		return -ENOMEM;
-	base = page_address(page);
 	for (index = 0; index < nr_pages; index++) {
 		pc = base + index;
 		__init_page_cgroup(pc, start_pfn + index);
@@ -73,7 +69,7 @@ static int __init alloc_node_page_cgroup(int nid)
 	return 0;
 }
 
-void __init page_cgroup_init(void)
+void __init page_cgroup_init_flatmem(void)
 {
 
 	int nid, fail;
@@ -117,16 +113,11 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
 	if (!section->page_cgroup) {
 		nid = page_to_nid(pfn_to_page(pfn));
 		table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-		if (slab_is_available()) {
-			base = kmalloc_node(table_size,
-					GFP_KERNEL | __GFP_NOWARN, nid);
-			if (!base)
-				base = vmalloc_node(table_size, nid);
-		} else {
-			base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
-				table_size,
-				PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-		}
+		VM_BUG_ON(!slab_is_available());
+		base = kmalloc_node(table_size,
+				GFP_KERNEL | __GFP_NOWARN, nid);
+		if (!base)
+			base = vmalloc_node(table_size, nid);
 	} else {
 		/*
  		 * We don't have to allocate page_cgroup again, but
-- 
cgit v1.2.3-70-g09d2


From 9a71af2c3627b379b7c31917a7f6ee0d29bc559b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:53 -0600
Subject: module_param: invbool should take a 'bool', not an 'int'

It takes an 'int' for historical reasons, and there are only two
users: simply switch it over to bool.

The other user (uvesafb.c) will get a (harmless-on-x86) warning until
the next patch is applied.

Cc: Brad Douglas <brad@neruo.com>
Cc: Michal Januszewski <spock@gentoo.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/video/aty/aty128fb.c | 2 +-
 include/linux/moduleparam.h  | 2 +-
 kernel/params.c              | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 35e8eb02b9e..e4e4d433b00 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -354,7 +354,7 @@ static int default_crt_on __devinitdata = 0;
 static int default_lcd_on __devinitdata = 1;
 
 #ifdef CONFIG_MTRR
-static int mtrr = 1;
+static bool mtrr = true;
 #endif
 
 #ifdef CONFIG_PMAC_BACKLIGHT
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index a4f0b931846..9bbca8e8c19 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -192,7 +192,7 @@ extern int param_get_bool(char *buffer, struct kernel_param *kp);
 
 extern int param_set_invbool(const char *val, struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, struct kernel_param *kp);
-#define param_check_invbool(name, p) __param_check(name, p, int)
+#define param_check_invbool(name, p) __param_check(name, p, bool)
 
 /* Comma-separated array: *nump is set to number they actually specified. */
 #define module_param_array_named(name, array, type, nump, perm)		\
diff --git a/kernel/params.c b/kernel/params.c
index de273ec85bd..023abbf5f89 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -272,13 +272,13 @@ int param_set_invbool(const char *val, struct kernel_param *kp)
 	dummy.arg = &boolval;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
-		*(int *)kp->arg = !boolval;
+		*(bool *)kp->arg = !boolval;
 	return ret;
 }
 
 int param_get_invbool(char *buffer, struct kernel_param *kp)
 {
-	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
+	return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y');
 }
 
 /* We break the rule and mangle the string. */
-- 
cgit v1.2.3-70-g09d2


From 45fcc70c0b6ee0c508e1fdb5fef735c3546803f4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:56 -0600
Subject: module_param: split perm field into flags and perm

Impact: cleanup

Rather than hack KPARAM_KMALLOCED into the perm field, separate it out.
Since the perm field was 32 bits and only needs 16, we don't add bloat.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 8 ++++++--
 kernel/params.c             | 9 +++------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 9bbca8e8c19..009a5f76876 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -36,9 +36,13 @@ typedef int (*param_set_fn)(const char *val, struct kernel_param *kp);
 /* Returns length written or -errno.  Buffer is 4k (ie. be short!) */
 typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
+/* Flag bits for kernel_param.flags */
+#define KPARAM_KMALLOCED	1
+
 struct kernel_param {
 	const char *name;
-	unsigned int perm;
+	u16 perm;
+	u16 flags;
 	param_set_fn set;
 	param_get_fn get;
 	union {
@@ -88,7 +92,7 @@ struct kparam_array
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-	= { __param_str_##name, perm, set, get, { arg } }
+	= { __param_str_##name, perm, 0, set, get, { arg } }
 
 #define module_param_call(name, set, get, arg, perm)			      \
 	__module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm)
diff --git a/kernel/params.c b/kernel/params.c
index 023abbf5f89..b4660dc13db 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,9 +24,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-/* We abuse the high bits of "perm" to record whether we kmalloc'ed. */
-#define KPARAM_KMALLOCED	0x80000000
-
 #if 0
 #define DEBUGP printk
 #else
@@ -220,13 +217,13 @@ int param_set_charp(const char *val, struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
-	if (kp->perm & KPARAM_KMALLOCED)
+	if (kp->flags & KPARAM_KMALLOCED)
 		kfree(*(char **)kp->arg);
 
 	/* This is a hack.  We can't need to strdup in early boot, and we
 	 * don't need to; this mangled commandline is preserved. */
 	if (slab_is_available()) {
-		kp->perm |= KPARAM_KMALLOCED;
+		kp->flags |= KPARAM_KMALLOCED;
 		*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
 		if (!kp->arg)
 			return -ENOMEM;
@@ -591,7 +588,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
 	unsigned int i;
 
 	for (i = 0; i < num; i++)
-		if (params[i].perm & KPARAM_KMALLOCED)
+		if (params[i].flags & KPARAM_KMALLOCED)
 			kfree(*(char **)params[i].arg);
 }
 
-- 
cgit v1.2.3-70-g09d2


From d2c123c27db841c6c11a63de9c144823d2b1ba76 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:56 -0600
Subject: module_param: add __same_type convenience wrapper for
 __builtin_types_compatible_p

Impact: new API

__builtin_types_compatible_p() is a little awkward to use: it takes two
types rather than types or variables, and it's just damn long.

(typeof(type) == type, so this works on types as well as vars).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/compiler.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 37bcb50a4d7..04fb5135b4e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -261,6 +261,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
+/* Are two types/vars the same type (ignoring qualifiers)? */
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3-70-g09d2


From fddd520122953550ec2c8b60e7ca0d0f0d115d97 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:46:57 -0600
Subject: module_param: allow 'bool' module_params to be bool, not just int.

Impact: API cleanup

For historical reasons, 'bool' parameters must be an int, not a bool.
But there are around 600 users, so a conversion seems like useless churn.

So we use __same_type() to distinguish, and handle both cases.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 32 +++++++++++++++++++++++---------
 kernel/params.c             | 33 ++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 009a5f76876..6547c3cdbc4 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -38,6 +38,7 @@ typedef int (*param_get_fn)(char *buffer, struct kernel_param *kp);
 
 /* Flag bits for kernel_param.flags */
 #define KPARAM_KMALLOCED	1
+#define KPARAM_ISBOOL		2
 
 struct kernel_param {
 	const char *name;
@@ -83,7 +84,7 @@ struct kparam_array
    parameters.  perm sets the visibility in sysfs: 000 means it's
    not there, read bits mean it's readable, write bits mean it's
    writable. */
-#define __module_param_call(prefix, name, set, get, arg, perm)		\
+#define __module_param_call(prefix, name, set, get, arg, isbool, perm)	\
 	/* Default value instead of permissions? */			\
 	static int __param_perm_check_##name __attribute__((unused)) =	\
 	BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2))	\
@@ -92,10 +93,13 @@ struct kparam_array
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
-	= { __param_str_##name, perm, 0, set, get, { arg } }
+	= { __param_str_##name, perm, isbool ? KPARAM_ISBOOL : 0,	\
+	    set, get, { arg } }
 
 #define module_param_call(name, set, get, arg, perm)			      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, set, get, arg, perm)
+	__module_param_call(MODULE_PARAM_PREFIX,			      \
+			    name, set, get, arg,			      \
+			    __same_type(*(arg), bool), perm)
 
 /* Helper functions: type is byte, short, ushort, int, uint, long,
    ulong, charp, bool or invbool, or XXX if you define param_get_XXX,
@@ -124,15 +128,16 @@ struct kparam_array
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
 	__module_param_call("", name, param_set_##type, param_get_##type, \
-			    &var, perm)
+			    &var, __same_type(var, bool), perm)
 #endif /* !MODULE */
 
 /* Actually copy string: maxlen param is usually sizeof(string). */
 #define module_param_string(name, string, len, perm)			\
 	static const struct kparam_string __param_string_##name		\
 		= { len, string };					\
-	module_param_call(name, param_set_copystring, param_get_string,	\
-			  .str = &__param_string_##name, perm);		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    param_set_copystring, param_get_string,	\
+			    .str = &__param_string_##name, 0, perm);	\
 	__MODULE_PARM_TYPE(name, "string")
 
 /* Called on module insert or kernel boot */
@@ -190,9 +195,16 @@ extern int param_set_charp(const char *val, struct kernel_param *kp);
 extern int param_get_charp(char *buffer, struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
+/* For historical reasons "bool" parameters can be (unsigned) "int". */
 extern int param_set_bool(const char *val, struct kernel_param *kp);
 extern int param_get_bool(char *buffer, struct kernel_param *kp);
-#define param_check_bool(name, p) __param_check(name, p, int)
+#define param_check_bool(name, p)					\
+	static inline void __check_##name(void)				\
+	{								\
+		BUILD_BUG_ON(!__same_type(*(p), bool) &&		\
+			     !__same_type(*(p), unsigned int) &&	\
+			     !__same_type(*(p), int));			\
+	}
 
 extern int param_set_invbool(const char *val, struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, struct kernel_param *kp);
@@ -203,8 +215,10 @@ extern int param_get_invbool(char *buffer, struct kernel_param *kp);
 	static const struct kparam_array __param_arr_##name		\
 	= { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type,\
 	    sizeof(array[0]), array };					\
-	module_param_call(name, param_array_set, param_array_get, 	\
-			  .arr = &__param_arr_##name, perm);		\
+	__module_param_call(MODULE_PARAM_PREFIX, name,			\
+			    param_array_set, param_array_get,		\
+			    .arr = &__param_arr_##name,			\
+			    __same_type(array[0], bool), perm);		\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 #define module_param_array(name, type, nump, perm)		\
diff --git a/kernel/params.c b/kernel/params.c
index b4660dc13db..7f6912ced2b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -238,35 +238,54 @@ int param_get_charp(char *buffer, struct kernel_param *kp)
 	return sprintf(buffer, "%s", *((char **)kp->arg));
 }
 
+/* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, struct kernel_param *kp)
 {
+	bool v;
+
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
 	/* One of =[yYnN01] */
 	switch (val[0]) {
 	case 'y': case 'Y': case '1':
-		*(int *)kp->arg = 1;
-		return 0;
+		v = true;
+		break;
 	case 'n': case 'N': case '0':
-		*(int *)kp->arg = 0;
-		return 0;
+		v = false;
+		break;
+	default:
+		return -EINVAL;
 	}
-	return -EINVAL;
+
+	if (kp->flags & KPARAM_ISBOOL)
+		*(bool *)kp->arg = v;
+	else
+		*(int *)kp->arg = v;
+	return 0;
 }
 
 int param_get_bool(char *buffer, struct kernel_param *kp)
 {
+	bool val;
+	if (kp->flags & KPARAM_ISBOOL)
+		val = *(bool *)kp->arg;
+	else
+		val = *(int *)kp->arg;
+
 	/* Y and N chosen as being relatively non-coder friendly */
-	return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'Y' : 'N');
+	return sprintf(buffer, "%c", val ? 'Y' : 'N');
 }
 
+/* This one must be bool. */
 int param_set_invbool(const char *val, struct kernel_param *kp)
 {
-	int boolval, ret;
+	int ret;
+	bool boolval;
 	struct kernel_param dummy;
 
 	dummy.arg = &boolval;
+	dummy.flags = KPARAM_ISBOOL;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
 		*(bool *)kp->arg = !boolval;
-- 
cgit v1.2.3-70-g09d2


From ad6561dffa17f17bb68d7207d422c26c381c4313 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 21:47:03 -0600
Subject: module: trim exception table on init free.

It's theoretically possible that there are exception table entries
which point into the (freed) init text of modules.  These could cause
future problems if other modules get loaded into that memory and cause
an exception as we'd see the wrong fixup.  The only case I know of is
kvm-intel.ko (when CONFIG_CC_OPTIMIZE_FOR_SIZE=n).

Amerigo fixed this long-standing FIXME in the x86 version, but this
patch is more general.

This implements trim_init_extable(); most archs are simple since they
use the standard lib/extable.c sort code.  Alpha and IA64 use relative
addresses in their fixups, so thier trimming is a slight variation.

Sparc32 is unique; it doesn't seem to define ARCH_HAS_SORT_EXTABLE,
yet it defines its own sort_extable() which overrides the one in lib.
It doesn't sort, so we have to mark deleted entries instead of
actually trimming them.

Inspired-by: Amerigo Wang <amwang@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-alpha@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
---
 arch/alpha/mm/extable.c             | 21 +++++++++++++++++++++
 arch/ia64/mm/extable.c              | 26 ++++++++++++++++++++++++++
 arch/sparc/include/asm/uaccess_32.h |  3 +++
 arch/sparc/mm/extable.c             | 29 +++++++++++++++++++++++++++++
 include/linux/module.h              |  1 +
 kernel/module.c                     |  1 +
 lib/extable.c                       | 21 ++++++++++++++++++++-
 7 files changed, 101 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c
index 62dc379d301..813c9b63c0e 100644
--- a/arch/alpha/mm/extable.c
+++ b/arch/alpha/mm/extable.c
@@ -48,6 +48,27 @@ void sort_extable(struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable(const struct exception_table_entry *first,
 	       const struct exception_table_entry *last,
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index 71c50dd8f87..e95d5ad9285 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c
@@ -53,6 +53,32 @@ void sort_extable (struct exception_table_entry *start,
 	     cmp_ex, swap_ex);
 }
 
+static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Any entry referring to the module init will be at the beginning or
+ * the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
+				  m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+
 const struct exception_table_entry *
 search_extable (const struct exception_table_entry *first,
 		const struct exception_table_entry *last,
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 47d5619d43f..8303ac48103 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -17,6 +17,9 @@
 
 #ifndef __ASSEMBLY__
 
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
 /* Sparc is not segmented, however we need to be able to fool access_ok()
  * when doing system calls from kernel mode legitimately.
  *
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index 16cc28935e3..a61c349448e 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c
@@ -28,6 +28,10 @@ search_extable(const struct exception_table_entry *start,
 	 *	word 3: last insn address + 4 bytes
 	 *	word 4: fixup code address
 	 *
+	 * Deleted entries are encoded as:
+	 *	word 1: unused
+	 *	word 2: -1
+	 *
 	 * See asm/uaccess.h for more details.
 	 */
 
@@ -39,6 +43,10 @@ search_extable(const struct exception_table_entry *start,
 			continue;
 		}
 
+		/* A deleted entry; see trim_init_extable */
+		if (walk->fixup == -1)
+			continue;
+
 		if (walk->insn == value)
 			return walk;
 	}
@@ -57,6 +65,27 @@ search_extable(const struct exception_table_entry *start,
         return NULL;
 }
 
+#ifdef CONFIG_MODULES
+/* We could memmove them around; easier to mark the trimmed ones. */
+void trim_init_extable(struct module *m)
+{
+	unsigned int i;
+	bool range;
+
+	for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
+		range = m->extable[i].fixup == 0;
+
+		if (within_module_init(m->extable[i].insn, m)) {
+			m->extable[i].fixup = -1;
+			if (range)
+				m->extable[i+1].fixup = -1;
+		}
+		if (range)
+			i++;
+	}
+}
+#endif /* CONFIG_MODULES */
+
 /* Special extable search, which handles ranges.  Returns fixup */
 unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
 {
diff --git a/include/linux/module.h b/include/linux/module.h
index a8f2c0aa4c3..a7bc6e7b43a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -77,6 +77,7 @@ search_extable(const struct exception_table_entry *first,
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);
 void sort_main_extable(void);
+void trim_init_extable(struct module *m);
 
 #ifdef MODULE
 #define MODULE_GENERIC_TABLE(gtype,name)			\
diff --git a/kernel/module.c b/kernel/module.c
index 35f7de00bf0..e4ab36ce767 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2455,6 +2455,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
+	trim_init_extable(mod);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
diff --git a/lib/extable.c b/lib/extable.c
index 179c0874559..4cac81ec225 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -39,7 +39,26 @@ void sort_extable(struct exception_table_entry *start,
 	sort(start, finish - start, sizeof(struct exception_table_entry),
 	     cmp_ex, NULL);
 }
-#endif
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/*trim the beginning*/
+	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/*trim the end*/
+	while (m->num_exentries &&
+		within_module_init(m->extable[m->num_exentries-1].insn, m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+#endif /* !ARCH_HAS_SORT_EXTABLE */
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
 /*
-- 
cgit v1.2.3-70-g09d2


From f1a3c979059b2033d0b1cc4f9ee5c90bf92b5f94 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Jun 2009 17:56:09 +0200
Subject: perf_counter: PERF_TYPE_HW_CACHE is a hardware counter too

is_software_counter() was missing the new HW_CACHE category.

( This could have caused some counter scheduling artifacts
  with mixed sw and hw counters and counter groups. )

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6e133954e2e..7c4f32f6ae1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -621,7 +621,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
 static inline int is_software_counter(struct perf_counter *counter)
 {
 	return (counter->attr.type != PERF_TYPE_RAW) &&
-		(counter->attr.type != PERF_TYPE_HARDWARE);
+		(counter->attr.type != PERF_TYPE_HARDWARE) &&
+		(counter->attr.type != PERF_TYPE_HW_CACHE);
 }
 
 extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
-- 
cgit v1.2.3-70-g09d2


From 974802eaa1afdc87e00821df7020a2b3c6fee623 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 12 Jun 2009 12:46:55 +0200
Subject: perf_counter: Add forward/backward attribute ABI compatibility

Provide for means of extending the perf_counter_attr in a 'natural' way.

We allow growing the structure by appending fields at the end by specifying
the full structure size inside it.

When a new kernel sees a smaller (old) structure, it will 0 pad the tail.
When an old kernel sees a larger (new) structure, it will verify the tail
consists of 0s, otherwise fail.

If we fail due to a size-mismatch, we return -E2BIG and write the kernel's
native attribe size back into the provided structure.

Furthermore, add some attribute verification, so that we'll fail counter
creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in
the __reserved fields).

(This ABI detail is introduced while keeping the existing syscall ABI.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 19 ++++++++--
 include/linux/syscalls.h     |  2 +-
 kernel/perf_counter.c        | 89 ++++++++++++++++++++++++++++++++++++++++++--
 tools/perf/perf.h            |  5 ++-
 4 files changed, 105 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7c4f32f6ae1..1b3118a1023 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -120,6 +120,8 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_ID				= 1U << 6,
 	PERF_SAMPLE_CPU				= 1U << 7,
 	PERF_SAMPLE_PERIOD			= 1U << 8,
+
+	PERF_SAMPLE_MAX = 1U << 9,		/* non-ABI */
 };
 
 /*
@@ -131,17 +133,26 @@ enum perf_counter_read_format {
 	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
+
+	PERF_FORMAT_MAX = 1U << 3, 		/* non-ABI */
 };
 
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
 /*
  * Hardware event to monitor via a performance monitoring counter:
  */
 struct perf_counter_attr {
+
 	/*
 	 * Major type: hardware/software/tracepoint/etc.
 	 */
 	__u32			type;
-	__u32			__reserved_1;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
 
 	/*
 	 * Type specific configuration information.
@@ -168,12 +179,12 @@ struct perf_counter_attr {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 
-				__reserved_2   : 53;
+				__reserved_1   : 53;
 
 	__u32			wakeup_events;	/* wakeup every n events */
-	__u32			__reserved_3;
+	__u32			__reserved_2;
 
-	__u64			__reserved_4;
+	__u64			__reserved_3;
 };
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6c84ad8bd7..418d90f5eff 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
 asmlinkage long sys_perf_counter_open(
-		const struct perf_counter_attr __user *attr_uptr,
+		struct perf_counter_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 663bbe01505..29b685f551a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3584,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_TRACEPOINT:
 		pmu = tp_perf_counter_init(counter);
 		break;
+
+	default:
+		break;
 	}
 done:
 	err = 0;
@@ -3610,6 +3613,85 @@ done:
 	return counter;
 }
 
+static int perf_copy_attr(struct perf_counter_attr __user *uattr,
+			  struct perf_counter_attr *attr)
+{
+	int ret;
+	u32 size;
+
+	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size < PERF_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned long val;
+		unsigned long __user *addr;
+		unsigned long __user *end;
+
+		addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
+				sizeof(unsigned long));
+		end  = PTR_ALIGN((void __user *)uattr + size,
+				sizeof(unsigned long));
+
+		for (; addr < end; addr += sizeof(unsigned long)) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * If the type exists, the corresponding creation will verify
+	 * the attr->config.
+	 */
+	if (attr->type >= PERF_TYPE_MAX)
+		return -EINVAL;
+
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+		return -EINVAL;
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+		return -EINVAL;
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+		return -EINVAL;
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -3619,7 +3701,7 @@ done:
  * @group_fd:		group leader counter fd
  */
 SYSCALL_DEFINE5(perf_counter_open,
-		const struct perf_counter_attr __user *, attr_uptr,
+		struct perf_counter_attr __user *, attr_uptr,
 		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
 {
 	struct perf_counter *counter, *group_leader;
@@ -3635,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (flags)
 		return -EINVAL;
 
-	if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0)
-		return -EFAULT;
+	ret = perf_copy_attr(attr_uptr, &attr);
+	if (ret)
+		return ret;
 
 	if (!attr.exclude_kernel) {
 		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index af0a5046d74..87a1aca4a42 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void)
 	_min1 < _min2 ? _min1 : _min2; })
 
 static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr_uptr,
+sys_perf_counter_open(struct perf_counter_attr *attr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
-	return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu,
+	attr->size = sizeof(*attr);
+	return syscall(__NR_perf_counter_open, attr, pid, cpu,
 		       group_fd, flags);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 20f77f5654042cf484d8964b618faf9d620f639b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:33 -0600
Subject: virtio: fix obsolete documentation on probe function

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 06005fa9e98..9410394bbf9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -99,8 +99,7 @@ void unregister_virtio_device(struct virtio_device *dev);
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this device.
  * @feature_table_size: number of entries in the feature table array.
- * @probe: the function to call when a device is found.  Returns a token for
- *    remove, or PTR_ERR().
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function when a device is removed.
  * @config_changed: optional function to call when the device configuration
  *    changes; may be called in interrupt context.
-- 
cgit v1.2.3-70-g09d2


From 9499f5e7ed5224c40706f0cec6542a9916bc7606 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:16:35 -0600
Subject: virtio: add names to virtqueue struct, mapping from devices to
 queues.

Add a linked list of all virtqueues for a virtio device: this helps for
debugging and is also needed for upcoming interface change.

Also, add a "name" field for clearer debug messages.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/char/hw_random/virtio-rng.c |  2 +-
 drivers/char/virtio_console.c       |  4 ++--
 drivers/lguest/lguest_device.c      |  5 +++--
 drivers/net/virtio_net.c            |  6 +++---
 drivers/s390/kvm/kvm_virtio.c       |  7 ++++---
 drivers/virtio/virtio.c             |  2 ++
 drivers/virtio/virtio_balloon.c     |  4 ++--
 drivers/virtio/virtio_pci.c         |  5 +++--
 drivers/virtio/virtio_ring.c        | 27 ++++++++++++++++++++-------
 include/linux/virtio.h              | 12 ++++++++----
 include/linux/virtio_config.h       |  6 ++++--
 include/linux/virtio_ring.h         |  3 ++-
 net/9p/trans_virtio.c               |  2 +-
 14 files changed, 56 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c0facaa55cf..db55a50d9f6 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	sg_init_table(vblk->sg, vblk->sg_elems);
 
 	/* We expect one virtqueue, for output. */
-	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done);
+	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests");
 	if (IS_ERR(vblk->vq)) {
 		err = PTR_ERR(vblk->vq);
 		goto out_free_vblk;
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 86e83f88313..2aeafcea95f 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -94,7 +94,7 @@ static int virtrng_probe(struct virtio_device *vdev)
 	int err;
 
 	/* We expect a single virtqueue. */
-	vq = vdev->config->find_vq(vdev, 0, random_recv_done);
+	vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input");
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index ff6f5a4b58f..58684e4a081 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -202,13 +202,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	/* Find the input queue. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
+	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input");
 	if (IS_ERR(in_vq)) {
 		err = PTR_ERR(in_vq);
 		goto free;
 	}
 
-	out_vq = vdev->config->find_vq(vdev, 1, NULL);
+	out_vq = vdev->config->find_vq(vdev, 1, NULL, "output");
 	if (IS_ERR(out_vq)) {
 		err = PTR_ERR(out_vq);
 		goto free_in_vq;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index df44d962626..4babed899d5 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -228,7 +228,8 @@ extern void lguest_setup_irq(unsigned int irq);
  * function. */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 				    unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				    void (*callback)(struct virtqueue *vq),
+				    const char *name)
 {
 	struct lguest_device *ldev = to_lgdev(vdev);
 	struct lguest_vq_info *lvq;
@@ -263,7 +264,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 	 * and we've got a pointer to its pages. */
 	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback);
+				 vdev, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4d1d47953fc..be3b734ff5a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -906,20 +906,20 @@ static int virtnet_probe(struct virtio_device *vdev)
 		vi->mergeable_rx_bufs = true;
 
 	/* We expect two virtqueues, receive then send. */
-	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
+	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done, "input");
 	if (IS_ERR(vi->rvq)) {
 		err = PTR_ERR(vi->rvq);
 		goto free;
 	}
 
-	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
+	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done, "output");
 	if (IS_ERR(vi->svq)) {
 		err = PTR_ERR(vi->svq);
 		goto free_recv;
 	}
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vdev->config->find_vq(vdev, 2, NULL);
+		vi->cvq = vdev->config->find_vq(vdev, 2, NULL, "control");
 		if (IS_ERR(vi->cvq)) {
 			err = PTR_ERR(vi->svq);
 			goto free_send;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index cbc8566fab7..ba8995fbf04 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -173,8 +173,9 @@ static void kvm_notify(struct virtqueue *vq)
  * this device and sets it up.
  */
 static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
-				    unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				     unsigned index,
+				     void (*callback)(struct virtqueue *vq),
+				     const char *name)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	struct kvm_vqconfig *config;
@@ -194,7 +195,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
 				 vdev, (void *) config->address,
-				 kvm_notify, callback);
+				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 6b681036486..3f52c767dfe 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -186,6 +186,8 @@ int register_virtio_device(struct virtio_device *dev)
 	/* Acknowledge that we've seen the device. */
 	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 
+	INIT_LIST_HEAD(&dev->vqs);
+
 	/* device_register() causes the bus infrastructure to look for a
 	 * matching driver. */
 	err = device_register(&dev->dev);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9c76a061a04..0fa73b4d18b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -218,13 +218,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 
 	/* We expect two virtqueues. */
-	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack);
+	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack, "inflate");
 	if (IS_ERR(vb->inflate_vq)) {
 		err = PTR_ERR(vb->inflate_vq);
 		goto out_free_vb;
 	}
 
-	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack);
+	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack, "deflate");
 	if (IS_ERR(vb->deflate_vq)) {
 		err = PTR_ERR(vb->deflate_vq);
 		goto out_del_inflate_vq;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 330aacbdec1..be4047abd5b 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -208,7 +208,8 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 
 /* the config->find_vq() implementation */
 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
-				    void (*callback)(struct virtqueue *vq))
+				    void (*callback)(struct virtqueue *vq),
+				    const char *name)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
@@ -247,7 +248,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback);
+				 vdev, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5c52369ab9b..579fa693d5d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -23,21 +23,30 @@
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
-#define BAD_RING(_vq, fmt...)			\
-	do { dev_err(&(_vq)->vq.vdev->dev, fmt); BUG(); } while(0)
+#define BAD_RING(_vq, fmt, args...)				\
+	do {							\
+		dev_err(&(_vq)->vq.vdev->dev,			\
+			"%s:"fmt, (_vq)->vq.name, ##args);	\
+		BUG();						\
+	} while (0)
 /* Caller is supposed to guarantee no reentry. */
 #define START_USE(_vq)						\
 	do {							\
 		if ((_vq)->in_use)				\
-			panic("in_use = %i\n", (_vq)->in_use);	\
+			panic("%s:in_use = %i\n",		\
+			      (_vq)->vq.name, (_vq)->in_use);	\
 		(_vq)->in_use = __LINE__;			\
 		mb();						\
-	} while(0)
+	} while (0)
 #define END_USE(_vq) \
 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0)
 #else
-#define BAD_RING(_vq, fmt...)			\
-	do { dev_err(&_vq->vq.vdev->dev, fmt); (_vq)->broken = true; } while(0)
+#define BAD_RING(_vq, fmt, args...)				\
+	do {							\
+		dev_err(&_vq->vq.vdev->dev,			\
+			"%s:"fmt, (_vq)->vq.name, ##args);	\
+		(_vq)->broken = true;				\
+	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
 #endif
@@ -284,7 +293,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
-				      void (*callback)(struct virtqueue *))
+				      void (*callback)(struct virtqueue *),
+				      const char *name)
 {
 	struct vring_virtqueue *vq;
 	unsigned int i;
@@ -303,10 +313,12 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
+	vq->vq.name = name;
 	vq->notify = notify;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
+	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
 #endif
@@ -327,6 +339,7 @@ EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
 void vring_del_virtqueue(struct virtqueue *vq)
 {
+	list_del(&vq->list);
 	kfree(to_vvq(vq));
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 9410394bbf9..4fca4f5440b 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -10,14 +10,17 @@
 
 /**
  * virtqueue - a queue to register buffers for sending or receiving.
+ * @list: the chain of virtqueues for this device
  * @callback: the function to call when buffers are consumed (can be NULL).
+ * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @vq_ops: the operations for this virtqueue (see below).
  * @priv: a pointer for the virtqueue implementation to use.
  */
-struct virtqueue
-{
+struct virtqueue {
+	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
+	const char *name;
 	struct virtio_device *vdev;
 	struct virtqueue_ops *vq_ops;
 	void *priv;
@@ -76,15 +79,16 @@ struct virtqueue_ops {
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
+ * @vqs: the list of virtqueues for this device.
  * @features: the features supported by both driver and device.
  * @priv: private pointer for the driver's use.
  */
-struct virtio_device
-{
+struct virtio_device {
 	int index;
 	struct device dev;
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
+	struct list_head vqs;
 	/* Note that this is a Linux set_bit-style bitmap. */
 	unsigned long features[1];
 	void *priv;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index bf8ec283b23..9fae274751e 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -55,7 +55,8 @@
  * @find_vq: find a virtqueue and instantiate it.
  *	vdev: the virtio_device
  *	index: the 0-based virtqueue number in case there's more than one.
- *	callback: the virqtueue callback
+ *	callback: the virtqueue callback
+ *	name: the virtqueue name (mainly for debugging)
  *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
  * @del_vq: free a virtqueue found by find_vq().
  * @get_features: get the array of feature bits for this device.
@@ -77,7 +78,8 @@ struct virtio_config_ops
 	void (*reset)(struct virtio_device *vdev);
 	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
 				     unsigned index,
-				     void (*callback)(struct virtqueue *));
+				     void (*callback)(struct virtqueue *),
+				     const char *name);
 	void (*del_vq)(struct virtqueue *vq);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 71e03722fb5..166c519689d 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -119,7 +119,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
-				      void (*callback)(struct virtqueue *vq));
+				      void (*callback)(struct virtqueue *vq),
+				      const char *name);
 void vring_del_virtqueue(struct virtqueue *vq);
 /* Filter out transport-specific feature bits. */
 void vring_transport_features(struct virtio_device *vdev);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index bb8579a141a..ab8791f9aba 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	chan->vdev = vdev;
 
 	/* We expect one virtqueue, for requests. */
-	chan->vq = vdev->config->find_vq(vdev, 0, req_done);
+	chan->vq = vdev->config->find_vq(vdev, 0, req_done, "requests");
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);
 		goto out_free_vq;
-- 
cgit v1.2.3-70-g09d2


From d2a7ddda9ffb1c8961abff6714b0f1eb925c120f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 12 Jun 2009 22:16:36 -0600
Subject: virtio: find_vqs/del_vqs virtio operations

This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations,
and updates all drivers. This is needed for MSI support, because MSI
needs to know the total number of vectors upfront.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ lguest/9p compile fixes)
---
 drivers/block/virtio_blk.c          |  6 ++---
 drivers/char/hw_random/virtio-rng.c |  6 ++---
 drivers/char/virtio_console.c       | 26 +++++++++-----------
 drivers/lguest/lguest_device.c      | 36 ++++++++++++++++++++++++++--
 drivers/net/virtio_net.c            | 45 ++++++++++++++---------------------
 drivers/s390/kvm/kvm_virtio.c       | 36 ++++++++++++++++++++++++++--
 drivers/virtio/virtio_balloon.c     | 27 +++++++++------------
 drivers/virtio/virtio_pci.c         | 37 +++++++++++++++++++++++------
 include/linux/virtio_config.h       | 47 ++++++++++++++++++++++++++++---------
 net/9p/trans_virtio.c               |  6 ++---
 10 files changed, 183 insertions(+), 89 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index db55a50d9f6..07d8e595e51 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -288,7 +288,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	sg_init_table(vblk->sg, vblk->sg_elems);
 
 	/* We expect one virtqueue, for output. */
-	vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests");
+	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
 	if (IS_ERR(vblk->vq)) {
 		err = PTR_ERR(vblk->vq);
 		goto out_free_vblk;
@@ -388,7 +388,7 @@ out_put_disk:
 out_mempool:
 	mempool_destroy(vblk->pool);
 out_free_vq:
-	vdev->config->del_vq(vblk->vq);
+	vdev->config->del_vqs(vdev);
 out_free_vblk:
 	kfree(vblk);
 out:
@@ -409,7 +409,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
 	mempool_destroy(vblk->pool);
-	vdev->config->del_vq(vblk->vq);
+	vdev->config->del_vqs(vdev);
 	kfree(vblk);
 }
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 2aeafcea95f..f2041fede82 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -94,13 +94,13 @@ static int virtrng_probe(struct virtio_device *vdev)
 	int err;
 
 	/* We expect a single virtqueue. */
-	vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input");
+	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vq))
 		return PTR_ERR(vq);
 
 	err = hwrng_register(&virtio_hwrng);
 	if (err) {
-		vdev->config->del_vq(vq);
+		vdev->config->del_vqs(vdev);
 		return err;
 	}
 
@@ -112,7 +112,7 @@ static void virtrng_remove(struct virtio_device *vdev)
 {
 	vdev->config->reset(vdev);
 	hwrng_unregister(&virtio_hwrng);
-	vdev->config->del_vq(vq);
+	vdev->config->del_vqs(vdev);
 }
 
 static struct virtio_device_id id_table[] = {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 58684e4a081..c74dacfa679 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -188,6 +188,9 @@ static void hvc_handle_input(struct virtqueue *vq)
  * Finally we put our input buffer in the input queue, ready to receive. */
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
+	vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
+	const char *names[] = { "input", "output" };
+	struct virtqueue *vqs[2];
 	int err;
 
 	vdev = dev;
@@ -199,20 +202,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 		goto fail;
 	}
 
-	/* Find the input queue. */
+	/* Find the queues. */
 	/* FIXME: This is why we want to wean off hvc: we do nothing
 	 * when input comes in. */
-	in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input");
-	if (IS_ERR(in_vq)) {
-		err = PTR_ERR(in_vq);
+	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	if (err)
 		goto free;
-	}
 
-	out_vq = vdev->config->find_vq(vdev, 1, NULL, "output");
-	if (IS_ERR(out_vq)) {
-		err = PTR_ERR(out_vq);
-		goto free_in_vq;
-	}
+	in_vq = vqs[0];
+	out_vq = vqs[1];
 
 	/* Start using the new console output. */
 	virtio_cons.get_chars = get_chars;
@@ -233,17 +231,15 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
 	if (IS_ERR(hvc)) {
 		err = PTR_ERR(hvc);
-		goto free_out_vq;
+		goto free_vqs;
 	}
 
 	/* Register the input buffer the first time. */
 	add_inbuf();
 	return 0;
 
-free_out_vq:
-	vdev->config->del_vq(out_vq);
-free_in_vq:
-	vdev->config->del_vq(in_vq);
+free_vqs:
+	vdev->config->del_vqs(vdev);
 free:
 	kfree(inbuf);
 fail:
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 4babed899d5..e082cdac88b 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -313,6 +313,38 @@ static void lg_del_vq(struct virtqueue *vq)
 	kfree(lvq);
 }
 
+static void lg_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		lg_del_vq(vq);
+}
+
+static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	struct lguest_device *ldev = to_lgdev(vdev);
+	int i;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > ldev->desc->num_vq)
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	lg_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -322,8 +354,8 @@ static struct virtio_config_ops lguest_config_ops = {
 	.get_status = lg_get_status,
 	.set_status = lg_set_status,
 	.reset = lg_reset,
-	.find_vq = lg_find_vq,
-	.del_vq = lg_del_vq,
+	.find_vqs = lg_find_vqs,
+	.del_vqs = lg_del_vqs,
 };
 
 /* The root device for the lguest virtio devices.  This makes them appear as
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index be3b734ff5a..7fa620ddeb2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -845,6 +845,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
+	const char *names[] = { "input", "output", "control" };
+	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -905,25 +909,19 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send. */
-	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done, "input");
-	if (IS_ERR(vi->rvq)) {
-		err = PTR_ERR(vi->rvq);
+	/* We expect two virtqueues, receive then send,
+	 * and optionally control. */
+	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	if (err)
 		goto free;
-	}
 
-	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done, "output");
-	if (IS_ERR(vi->svq)) {
-		err = PTR_ERR(vi->svq);
-		goto free_recv;
-	}
+	vi->rvq = vqs[0];
+	vi->svq = vqs[1];
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vdev->config->find_vq(vdev, 2, NULL, "control");
-		if (IS_ERR(vi->cvq)) {
-			err = PTR_ERR(vi->svq);
-			goto free_send;
-		}
+		vi->cvq = vqs[2];
 
 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
 			dev->features |= NETIF_F_HW_VLAN_FILTER;
@@ -941,7 +939,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
-		goto free_ctrl;
+		goto free_vqs;
 	}
 
 	/* Last of all, set up some receive buffers. */
@@ -962,13 +960,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 unregister:
 	unregister_netdev(dev);
-free_ctrl:
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
-		vdev->config->del_vq(vi->cvq);
-free_send:
-	vdev->config->del_vq(vi->svq);
-free_recv:
-	vdev->config->del_vq(vi->rvq);
+free_vqs:
+	vdev->config->del_vqs(vdev);
 free:
 	free_netdev(dev);
 	return err;
@@ -994,12 +987,10 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	BUG_ON(vi->num != 0);
 
-	vdev->config->del_vq(vi->svq);
-	vdev->config->del_vq(vi->rvq);
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
-		vdev->config->del_vq(vi->cvq);
 	unregister_netdev(vi->dev);
 
+	vdev->config->del_vqs(vi->vdev);
+
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index ba8995fbf04..e38e5d306fa 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -227,6 +227,38 @@ static void kvm_del_vq(struct virtqueue *vq)
 				       KVM_S390_VIRTIO_RING_ALIGN));
 }
 
+static void kvm_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		kvm_del_vq(vq);
+}
+
+static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[])
+{
+	struct kvm_device *kdev = to_kvmdev(vdev);
+	int i;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > kdev->desc->num_vq)
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	kvm_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 /*
  * The config ops structure as defined by virtio config
  */
@@ -238,8 +270,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_status = kvm_get_status,
 	.set_status = kvm_set_status,
 	.reset = kvm_reset,
-	.find_vq = kvm_find_vq,
-	.del_vq = kvm_del_vq,
+	.find_vqs = kvm_find_vqs,
+	.del_vqs = kvm_del_vqs,
 };
 
 /*
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0fa73b4d18b..26b27826479 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -204,6 +204,9 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
+	struct virtqueue *vqs[2];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
+	const char *names[] = { "inflate", "deflate" };
 	int err;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -218,22 +221,17 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 
 	/* We expect two virtqueues. */
-	vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack, "inflate");
-	if (IS_ERR(vb->inflate_vq)) {
-		err = PTR_ERR(vb->inflate_vq);
+	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	if (err)
 		goto out_free_vb;
-	}
 
-	vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack, "deflate");
-	if (IS_ERR(vb->deflate_vq)) {
-		err = PTR_ERR(vb->deflate_vq);
-		goto out_del_inflate_vq;
-	}
+	vb->inflate_vq = vqs[0];
+	vb->deflate_vq = vqs[1];
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
-		goto out_del_deflate_vq;
+		goto out_del_vqs;
 	}
 
 	vb->tell_host_first
@@ -241,10 +239,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 	return 0;
 
-out_del_deflate_vq:
-	vdev->config->del_vq(vb->deflate_vq);
-out_del_inflate_vq:
-	vdev->config->del_vq(vb->inflate_vq);
+out_del_vqs:
+	vdev->config->del_vqs(vdev);
 out_free_vb:
 	kfree(vb);
 out:
@@ -264,8 +260,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	/* Now we reset the device so we can clean up the queues. */
 	vdev->config->reset(vdev);
 
-	vdev->config->del_vq(vb->deflate_vq);
-	vdev->config->del_vq(vb->inflate_vq);
+	vdev->config->del_vqs(vdev);
 	kfree(vb);
 }
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index be4047abd5b..027f13fbe49 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -276,11 +276,7 @@ static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
-	unsigned long flags, size;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_del(&info->node);
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
+	unsigned long size;
 
 	vring_del_virtqueue(vq);
 
@@ -293,14 +289,41 @@ static void vp_del_vq(struct virtqueue *vq)
 	kfree(info);
 }
 
+static void vp_del_vqs(struct virtio_device *vdev)
+{
+	struct virtqueue *vq, *n;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		vp_del_vq(vq);
+}
+
+static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	int i;
+
+	for (i = 0; i < nvqs; ++i) {
+		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
+		if (IS_ERR(vqs[i]))
+			goto error;
+	}
+	return 0;
+
+error:
+	vp_del_vqs(vdev);
+	return PTR_ERR(vqs[i]);
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
 	.set_status	= vp_set_status,
 	.reset		= vp_reset,
-	.find_vq	= vp_find_vq,
-	.del_vq		= vp_del_vq,
+	.find_vqs	= vp_find_vqs,
+	.del_vqs	= vp_del_vqs,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
 };
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 9fae274751e..4cd290c06a8 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -29,6 +29,7 @@
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
 
 #ifdef __KERNEL__
+#include <linux/err.h>
 #include <linux/virtio.h>
 
 /**
@@ -49,16 +50,26 @@
  * @set_status: write the status byte
  *	vdev: the virtio_device
  *	status: the new status byte
+ * @request_vqs: request the specified number of virtqueues
+ *	vdev: the virtio_device
+ *	max_vqs: the max number of virtqueues we want
+ *      If supplied, must call before any virtqueues are instantiated.
+ *      To modify the max number of virtqueues after request_vqs has been
+ *      called, call free_vqs and then request_vqs with a new value.
+ * @free_vqs: cleanup resources allocated by request_vqs
+ *	vdev: the virtio_device
+ *      If supplied, must call after all virtqueues have been deleted.
  * @reset: reset the device
  *	vdev: the virtio device
  *	After this, status and feature negotiation must be done again
- * @find_vq: find a virtqueue and instantiate it.
+ * @find_vqs: find virtqueues and instantiate them.
  *	vdev: the virtio_device
- *	index: the 0-based virtqueue number in case there's more than one.
- *	callback: the virtqueue callback
- *	name: the virtqueue name (mainly for debugging)
- *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
- * @del_vq: free a virtqueue found by find_vq().
+ *	nvqs: the number of virtqueues to find
+ *	vqs: on success, includes new virtqueues
+ *	callbacks: array of callbacks, for each virtqueue
+ *	names: array of virtqueue names (mainly for debugging)
+ *	Returns 0 on success or error status
+ * @del_vqs: free virtqueues found by find_vqs().
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
@@ -67,6 +78,7 @@
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
  */
+typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops
 {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
@@ -76,11 +88,11 @@ struct virtio_config_ops
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
-	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
-				     unsigned index,
-				     void (*callback)(struct virtqueue *),
-				     const char *name);
-	void (*del_vq)(struct virtqueue *vq);
+	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[]);
+	void (*del_vqs)(struct virtio_device *);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 };
@@ -128,5 +140,18 @@ static inline int virtio_config_buf(struct virtio_device *vdev,
 	vdev->config->get(vdev, offset, buf, len);
 	return 0;
 }
+
+static inline
+struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
+					vq_callback_t *c, const char *n)
+{
+	vq_callback_t *callbacks[] = { c };
+	const char *names[] = { n };
+	struct virtqueue *vq;
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
+	if (err < 0)
+		return ERR_PTR(err);
+	return vq;
+}
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ab8791f9aba..a49484e67e1 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -246,7 +246,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	chan->vdev = vdev;
 
 	/* We expect one virtqueue, for requests. */
-	chan->vq = vdev->config->find_vq(vdev, 0, req_done, "requests");
+	chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
 	if (IS_ERR(chan->vq)) {
 		err = PTR_ERR(chan->vq);
 		goto out_free_vq;
@@ -261,7 +261,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	return 0;
 
 out_free_vq:
-	vdev->config->del_vq(chan->vq);
+	vdev->config->del_vqs(vdev);
 fail:
 	mutex_lock(&virtio_9p_lock);
 	chan_index--;
@@ -332,7 +332,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	BUG_ON(chan->inuse);
 
 	if (chan->initialized) {
-		vdev->config->del_vq(chan->vq);
+		vdev->config->del_vqs(vdev);
 		chan->initialized = false;
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 82af8ce84ed65d2fb6d8c017d3f2bbbf161061fb Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 May 2009 13:55:41 +0300
Subject: virtio_pci: optional MSI-X support

This implements optional MSI-X support in virtio_pci.
MSI-X is used whenever the host supports at least 2 MSI-X
vectors: 1 for configuration changes and 1 for virtqueues.
Per-virtqueue vectors are allocated if enough vectors
available.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (+ whitespace, style)
---
 drivers/virtio/virtio_pci.c | 228 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/virtio_pci.h  |  10 +-
 2 files changed, 218 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 951e673e50a..193c8f0e5cc 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -42,6 +42,26 @@ struct virtio_pci_device
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
+
+	/* MSI-X support */
+	int msix_enabled;
+	int intx_enabled;
+	struct msix_entry *msix_entries;
+	/* Name strings for interrupts. This size should be enough,
+	 * and I'm too lazy to allocate each name separately. */
+	char (*msix_names)[256];
+	/* Number of available vectors */
+	unsigned msix_vectors;
+	/* Vectors allocated */
+	unsigned msix_used_vectors;
+};
+
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+	VP_MSIX_CONFIG_VECTOR = 0,
+	VP_MSIX_VQ_VECTOR = 1,
 };
 
 struct virtio_pci_vq_info
@@ -60,6 +80,9 @@ struct virtio_pci_vq_info
 
 	/* the list node for the virtqueues list */
 	struct list_head node;
+
+	/* MSI-X vector (or none) */
+	unsigned vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr +
+				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	u8 *ptr = buf;
 	int i;
 
@@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset;
+	void __iomem *ioaddr = vp_dev->ioaddr +
+				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	const u8 *ptr = buf;
 	int i;
 
@@ -221,7 +246,122 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	return vp_vring_interrupt(irq, opaque);
 }
 
-/* the config->find_vq() implementation */
+static void vp_free_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled) {
+		free_irq(vp_dev->pci_dev->irq, vp_dev);
+		vp_dev->intx_enabled = 0;
+	}
+
+	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
+	vp_dev->msix_used_vectors = 0;
+
+	if (vp_dev->msix_enabled) {
+		/* Disable the vector used for configuration */
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		/* Flush the write out to device */
+		ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+
+		vp_dev->msix_enabled = 0;
+		pci_disable_msix(vp_dev->pci_dev);
+	}
+}
+
+static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+			  int *options, int noptions)
+{
+	int i;
+	for (i = 0; i < noptions; ++i)
+		if (!pci_enable_msix(dev, entries, options[i]))
+			return options[i];
+	return -EBUSY;
+}
+
+static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned i, v;
+	int err = -ENOMEM;
+	/* We want at most one vector per queue and one for config changes.
+	 * Fallback to separate vectors for config and a shared for queues.
+	 * Finally fall back to regular interrupts. */
+	int options[] = { max_vqs + 1, 2 };
+	int nvectors = max(options[0], options[1]);
+
+	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
+				       GFP_KERNEL);
+	if (!vp_dev->msix_entries)
+		goto error_entries;
+	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+				     GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto error_names;
+
+	for (i = 0; i < nvectors; ++i)
+		vp_dev->msix_entries[i].entry = i;
+
+	err = vp_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries,
+			     options, ARRAY_SIZE(options));
+	if (err < 0) {
+		/* Can't allocate enough MSI-X vectors, use regular interrupt */
+		vp_dev->msix_vectors = 0;
+		err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+				  IRQF_SHARED, name, vp_dev);
+		if (err)
+			goto error_irq;
+		vp_dev->intx_enabled = 1;
+	} else {
+		vp_dev->msix_vectors = err;
+		vp_dev->msix_enabled = 1;
+
+		/* Set the vector used for configuration */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-config", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_config_changed, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error_irq;
+		++vp_dev->msix_used_vectors;
+
+		iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		/* Verify we had enough resources to assign the vector */
+		v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		if (v == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto error_irq;
+		}
+	}
+
+	if (vp_dev->msix_vectors && vp_dev->msix_vectors != max_vqs + 1) {
+		/* Shared vector for all VQs */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-virtqueues", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error_irq;
+		++vp_dev->msix_used_vectors;
+	}
+	return 0;
+error_irq:
+	vp_free_vectors(vdev);
+	kfree(vp_dev->msix_names);
+error_names:
+	kfree(vp_dev->msix_entries);
+error_entries:
+	return err;
+}
+
 static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 				    void (*callback)(struct virtqueue *vq),
 				    const char *name)
@@ -230,7 +370,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
 	unsigned long flags, size;
-	u16 num;
+	u16 num, vector;
 	int err;
 
 	/* Select the queue we're interested in */
@@ -249,6 +389,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 
 	info->queue_index = index;
 	info->num = num;
+	info->vector = VIRTIO_MSI_NO_VECTOR;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
@@ -272,12 +413,43 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	vq->priv = info;
 	info->vq = vq;
 
+	/* allocate per-vq vector if available and necessary */
+	if (callback && vp_dev->msix_used_vectors < vp_dev->msix_vectors) {
+		vector = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[vector], sizeof *vp_dev->msix_names,
+			 "%s-%s", dev_name(&vp_dev->vdev.dev), name);
+		err = request_irq(vp_dev->msix_entries[vector].vector,
+				  vring_interrupt, 0,
+				  vp_dev->msix_names[vector], vq);
+		if (err)
+			goto out_request_irq;
+		info->vector = vector;
+		++vp_dev->msix_used_vectors;
+	} else
+		vector = VP_MSIX_VQ_VECTOR;
+
+	 if (callback && vp_dev->msix_enabled) {
+		iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		vector = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		if (vector == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto out_assign;
+		}
+	}
+
 	spin_lock_irqsave(&vp_dev->lock, flags);
 	list_add(&info->node, &vp_dev->virtqueues);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return vq;
 
+out_assign:
+	if (info->vector != VIRTIO_MSI_NO_VECTOR) {
+		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+		--vp_dev->msix_used_vectors;
+	}
+out_request_irq:
+	vring_del_virtqueue(vq);
 out_activate_queue:
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 	free_pages_exact(info->queue, size);
@@ -286,17 +458,27 @@ out_info:
 	return ERR_PTR(err);
 }
 
-/* the config->del_vq() implementation */
 static void vp_del_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
 	unsigned long size;
 
+	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+
+	if (info->vector != VIRTIO_MSI_NO_VECTOR)
+		free_irq(vp_dev->msix_entries[info->vector].vector, vq);
+
+	if (vp_dev->msix_enabled) {
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		/* Flush the write out to device */
+		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+	}
+
 	vring_del_virtqueue(vq);
 
 	/* Select and deactivate the queue */
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
@@ -304,30 +486,46 @@ static void vp_del_vq(struct virtqueue *vq)
 	kfree(info);
 }
 
+/* the config->del_vqs() implementation */
 static void vp_del_vqs(struct virtio_device *vdev)
 {
 	struct virtqueue *vq, *n;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
 		vp_del_vq(vq);
+
+	vp_free_vectors(vdev);
 }
 
+/* the config->find_vqs() implementation */
 static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
 		       const char *names[])
 {
-	int i;
+	int vectors = 0;
+	int i, err;
+
+	/* How many vectors would we like? */
+	for (i = 0; i < nvqs; ++i)
+		if (callbacks[i])
+			++vectors;
+
+	err = vp_request_vectors(vdev, vectors);
+	if (err)
+		goto error_request;
 
 	for (i = 0; i < nvqs; ++i) {
 		vqs[i] = vp_find_vq(vdev, i, callbacks[i], names[i]);
 		if (IS_ERR(vqs[i]))
-			goto error;
+			goto error_find;
 	}
 	return 0;
 
-error:
+error_find:
 	vp_del_vqs(vdev);
+
+error_request:
 	return PTR_ERR(vqs[i]);
 }
 
@@ -349,7 +547,7 @@ static void virtio_pci_release_dev(struct device *_d)
 	struct virtio_pci_device *vp_dev = to_vp_device(dev);
 	struct pci_dev *pci_dev = vp_dev->pci_dev;
 
-	free_irq(pci_dev->irq, vp_dev);
+	vp_del_vqs(dev);
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
 	pci_release_regions(pci_dev);
@@ -408,21 +606,13 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
 	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 	vp_dev->vdev.id.device = pci_dev->subsystem_device;
 
-	/* register a handler for the queue with the PCI device's interrupt */
-	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
-			  dev_name(&vp_dev->vdev.dev), vp_dev);
-	if (err)
-		goto out_set_drvdata;
-
 	/* finally register the virtio device */
 	err = register_virtio_device(&vp_dev->vdev);
 	if (err)
-		goto out_req_irq;
+		goto out_set_drvdata;
 
 	return 0;
 
-out_req_irq:
-	free_irq(pci_dev->irq, vp_dev);
 out_set_drvdata:
 	pci_set_drvdata(pci_dev, NULL);
 	pci_iounmap(pci_dev, vp_dev->ioaddr);
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index cd0fd5d181a..9a3d7c48c62 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -47,9 +47,17 @@
 /* The bit of the ISR which indicates a device configuration change. */
 #define VIRTIO_PCI_ISR_CONFIG		0x2
 
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
-#define VIRTIO_PCI_CONFIG		20
+#define VIRTIO_PCI_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
-- 
cgit v1.2.3-70-g09d2


From ee006b353f1ca8c9a8470b72b462beb011d62e32 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 11 May 2009 18:11:44 +0100
Subject: virtio: teach virtio_has_feature() about transport features

Drivers don't add transport features to their table, so we
shouldn't check these with virtio_check_driver_offered_feature().

We could perhaps add an ->offered_feature() virtio_config_op,
but that perhaps that would be overkill for a consitency check
like this.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_config.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 4cd290c06a8..99f514575f6 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -113,7 +113,9 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 	if (__builtin_constant_p(fbit))
 		BUILD_BUG_ON(fbit >= 32);
 
-	virtio_check_driver_offered_feature(vdev, fbit);
+	if (fbit < VIRTIO_TRANSPORT_F_START)
+		virtio_check_driver_offered_feature(vdev, fbit);
+
 	return test_bit(fbit, vdev->features);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9fa29b9df32ba4db055f3977933cd0c1b8fe67cd Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 11 May 2009 18:11:45 +0100
Subject: virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC)

Add a new feature flag for indirect ring entries. These are ring
entries which point to a table of buffer descriptors.

The idea here is to increase the ring capacity by allowing a larger
effective ring size whereby the ring size dictates the number of
requests that may be outstanding, rather than the size of those
requests.

This should be most effective in the case of block I/O where we can
potentially benefit by concurrently dispatching a large number of
large requests. Even in the simple case of single segment block
requests, this results in a threefold increase in ring capacity.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 75 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/virtio_ring.h  |  5 +++
 2 files changed, 78 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 579fa693d5d..a882f260651 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -61,6 +61,9 @@ struct vring_virtqueue
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
+	/* Host supports indirect buffers */
+	bool indirect;
+
 	/* Number of free buffers */
 	unsigned int num_free;
 	/* Head of free buffer list. */
@@ -85,6 +88,55 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+/* Set up an indirect table of descriptors and add it to the queue. */
+static int vring_add_indirect(struct vring_virtqueue *vq,
+			      struct scatterlist sg[],
+			      unsigned int out,
+			      unsigned int in)
+{
+	struct vring_desc *desc;
+	unsigned head;
+	int i;
+
+	desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
+	if (!desc)
+		return vq->vring.num;
+
+	/* Transfer entries from the sg list into the indirect page */
+	for (i = 0; i < out; i++) {
+		desc[i].flags = VRING_DESC_F_NEXT;
+		desc[i].addr = sg_phys(sg);
+		desc[i].len = sg->length;
+		desc[i].next = i+1;
+		sg++;
+	}
+	for (; i < (out + in); i++) {
+		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+		desc[i].addr = sg_phys(sg);
+		desc[i].len = sg->length;
+		desc[i].next = i+1;
+		sg++;
+	}
+
+	/* Last one doesn't continue. */
+	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
+	desc[i-1].next = 0;
+
+	/* We're about to use a buffer */
+	vq->num_free--;
+
+	/* Use a single buffer which doesn't continue */
+	head = vq->free_head;
+	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+	vq->vring.desc[head].addr = virt_to_phys(desc);
+	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
+
+	/* Update free pointer */
+	vq->free_head = vq->vring.desc[head].next;
+
+	return head;
+}
+
 static int vring_add_buf(struct virtqueue *_vq,
 			 struct scatterlist sg[],
 			 unsigned int out,
@@ -94,12 +146,21 @@ static int vring_add_buf(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, head, uninitialized_var(prev);
 
+	START_USE(vq);
+
 	BUG_ON(data == NULL);
+
+	/* If the host supports indirect descriptor tables, and we have multiple
+	 * buffers, then go indirect. FIXME: tune this threshold */
+	if (vq->indirect && (out + in) > 1 && vq->num_free) {
+		head = vring_add_indirect(vq, sg, out, in);
+		if (head != vq->vring.num)
+			goto add_head;
+	}
+
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
-	START_USE(vq);
-
 	if (vq->num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 out + in, vq->num_free);
@@ -136,6 +197,7 @@ static int vring_add_buf(struct virtqueue *_vq,
 	/* Update free pointer */
 	vq->free_head = i;
 
+add_head:
 	/* Set token. */
 	vq->data[head] = data;
 
@@ -179,6 +241,11 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 
 	/* Put back on free list: find end */
 	i = head;
+
+	/* Free the indirect table */
+	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
+		kfree(phys_to_virt(vq->vring.desc[i].addr));
+
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
 		vq->num_free++;
@@ -323,6 +390,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->in_use = false;
 #endif
 
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback)
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
@@ -351,6 +420,8 @@ void vring_transport_features(struct virtio_device *vdev)
 
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
 		switch (i) {
+		case VIRTIO_RING_F_INDIRECT_DESC:
+			break;
 		default:
 			/* We don't understand this bit. */
 			clear_bit(i, vdev->features);
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 166c519689d..693e0ec5afa 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -14,6 +14,8 @@
 #define VRING_DESC_F_NEXT	1
 /* This marks a buffer as write-only (otherwise read-only). */
 #define VRING_DESC_F_WRITE	2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT	4
 
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
@@ -24,6 +26,9 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
 
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC	28
+
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc
 {
-- 
cgit v1.2.3-70-g09d2


From a32a8813d0173163ba44d8f9556e0d89fdc4fb46 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:02 -0600
Subject: lguest: improve interrupt handling, speed up stream networking

lguest never checked for pending interrupts when enabling interrupts, and
things still worked.  However, it makes a significant difference to TCP
performance, so it's time we fixed it by introducing a pending_irq flag
and checking it on irq_restore and irq_enable.

These two routines are now too big to patch into the 8/10 bytes
patch space, so we drop that code.

Note: The high latency on interrupt delivery had a very curious
effect: once everything else was optimized, networking without GSO was
faster than networking with GSO, since more interrupts were sent and
hence a greater chance of one getting through to the Guest!

Note2: (Almost) Closing the same loophole for iret doesn't have any
measurable effect, so I'm leaving that patch for the moment.

Before:
	1GB tcpblast Guest->Host:		30.7 seconds
	1GB tcpblast Guest->Host (no GSO):	76.0 seconds

After:
	1GB tcpblast Guest->Host:		6.8 seconds
	1GB tcpblast Guest->Host (no GSO):	27.8 seconds

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/lguest_hcall.h   |  1 +
 arch/x86/lguest/boot.c                | 21 +++++++++++++++------
 arch/x86/lguest/i386_head.S           |  2 --
 drivers/lguest/core.c                 |  7 ++++---
 drivers/lguest/hypercalls.c           |  4 ++++
 drivers/lguest/interrupts_and_traps.c | 16 +++++++++++++---
 drivers/lguest/lg.h                   |  4 ++--
 include/linux/lguest.h                |  4 ++++
 8 files changed, 43 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index faae1996487..f9a9f781124 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -17,6 +17,7 @@
 #define LHCALL_LOAD_TLS		16
 #define LHCALL_NOTIFY		17
 #define LHCALL_LOAD_GDT_ENTRY	18
+#define LHCALL_SEND_INTERRUPTS	19
 
 #define LGUEST_TRAP_ENTRY 0x1F
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 2392a7a171c..37b8c1d3e02 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -205,6 +205,12 @@ PV_CALLEE_SAVE_REGS_THUNK(save_fl);
 static void restore_fl(unsigned long flags)
 {
 	lguest_data.irq_enabled = flags;
+	mb();
+	/* Null hcall forces interrupt delivery now, if irq_pending is
+	 * set to X86_EFLAGS_IF (ie. an interrupt is pending, and flags
+	 * enables interrupts. */
+	if (flags & lguest_data.irq_pending)
+		kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
 }
 PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
 
@@ -219,6 +225,11 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
 static void irq_enable(void)
 {
 	lguest_data.irq_enabled = X86_EFLAGS_IF;
+	mb();
+	/* Null hcall forces interrupt delivery now. */
+	if (lguest_data.irq_pending)
+		kvm_hypercall0(LHCALL_SEND_INTERRUPTS);
+
 }
 PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
 
@@ -972,10 +983,10 @@ static void lguest_restart(char *reason)
  *
  * Our current solution is to allow the paravirt back end to optionally patch
  * over the indirect calls to replace them with something more efficient.  We
- * patch the four most commonly called functions: disable interrupts, enable
- * interrupts, restore interrupts and save interrupts.  We usually have 6 or 10
- * bytes to patch into: the Guest versions of these operations are small enough
- * that we can fit comfortably.
+ * patch two of the simplest of the most commonly called functions: disable
+ * interrupts and save interrupts.  We usually have 6 or 10 bytes to patch
+ * into: the Guest versions of these operations are small enough that we can
+ * fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
  * and these are in i386_head.S. */
@@ -986,8 +997,6 @@ static const struct lguest_insns
 	const char *start, *end;
 } lguest_insns[] = {
 	[PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
-	[PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
-	[PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
 	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
 };
 
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index f7954198947..3e0c5545d59 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -46,8 +46,6 @@ ENTRY(lguest_entry)
 	.globl lgstart_##name; .globl lgend_##name
 
 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled)
 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
 /*:*/
 
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 8ca1def5b14..03fbc88c002 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -189,6 +189,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
 		unsigned int irq;
+		bool more;
 
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
@@ -213,9 +214,9 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
-		irq = interrupt_pending(cpu);
+		irq = interrupt_pending(cpu, &more);
 		if (irq < LGUEST_IRQS)
-			try_deliver_interrupt(cpu, irq);
+			try_deliver_interrupt(cpu, irq, more);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -233,7 +234,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 			set_current_state(TASK_INTERRUPTIBLE);
 			/* Just before we sleep, make sure nothing snuck in
 			 * which we should be doing. */
-			if (interrupt_pending(cpu) < LGUEST_IRQS
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS
 			    || cpu->break_out)
 				set_current_state(TASK_RUNNING);
 			else
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 54d66f05fef..f252b71ae79 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -37,6 +37,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 		/* This call does nothing, except by breaking out of the Guest
 		 * it makes us process all the asynchronous hypercalls. */
 		break;
+	case LHCALL_SEND_INTERRUPTS:
+		/* This call does nothing too, but by breaking out of the Guest
+		 * it makes us process any pending interrupts. */
+		break;
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a8c966fee1e..5a10754b479 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -131,7 +131,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
  * interrupt_pending() returns the first pending interrupt which isn't blocked
  * by the Guest.  It is called before every entry to the Guest, and just before
  * we go to sleep when the Guest has halted itself. */
-unsigned int interrupt_pending(struct lg_cpu *cpu)
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
@@ -149,13 +149,14 @@ unsigned int interrupt_pending(struct lg_cpu *cpu)
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
+	*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
 
 	return irq;
 }
 
 /* This actually diverts the Guest to running an interrupt handler, once an
  * interrupt has been identified by interrupt_pending(). */
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 {
 	struct desc_struct *idt;
 
@@ -178,8 +179,12 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
 		u32 irq_enabled;
 		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
-		if (!irq_enabled)
+		if (!irq_enabled) {
+			/* Make sure they know an IRQ is pending. */
+			put_user(X86_EFLAGS_IF,
+				 &cpu->lg->lguest_data->irq_pending);
 			return;
+		}
 	}
 
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
@@ -202,6 +207,11 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
 	write_timestamp(cpu);
+
+	/* If there are no other interrupts we want to deliver, clear
+	 * the pending flag. */
+	if (!more)
+		put_user(0, &cpu->lg->lguest_data->irq_pending);
 }
 /*:*/
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 6743cf147d9..573896533ac 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -139,8 +139,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
-unsigned int interrupt_pending(struct lg_cpu *cpu);
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq);
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 175e63f4a8c..7bc1440fc47 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -30,6 +30,10 @@ struct lguest_data
 	/* Wallclock time set by the Host. */
 	struct timespec time;
 
+	/* Interrupt pending set by the Host.  The Guest should do a hypercall
+	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
+	int irq_pending;
+
 	/* Async hypercall ring.  Instead of directly making hypercalls, we can
 	 * place them in here for processing the next time the Host wants.
 	 * This batching can be quite efficient. */
-- 
cgit v1.2.3-70-g09d2


From df60aeef4f4fe0645d9a195a7689005520422de5 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:09 -0600
Subject: lguest: use eventfds for device notification

Currently, when a Guest wants to perform I/O it calls LHCALL_NOTIFY with
an address: the main Launcher process returns with this address, and figures
out what device to run.

A far nicer model is to let processes bind an eventfd to an address: if we
find one, we simply signal the eventfd.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Davide Libenzi <davidel@xmailserver.org>
---
 drivers/lguest/Kconfig          |  2 +-
 drivers/lguest/core.c           |  8 ++--
 drivers/lguest/lg.h             | 13 ++++++
 drivers/lguest/lguest_user.c    | 98 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/lguest_launcher.h |  1 +
 5 files changed, 116 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 8f63845db83..0aaa0597a62 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST
 	tristate "Linux hypervisor example code"
-	depends on X86_32 && EXPERIMENTAL && FUTEX
+	depends on X86_32 && EXPERIMENTAL && EVENTFD
 	select HVC_DRIVER
 	---help---
 	  This is a very simple module which allows you to run
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index d0298dc45d9..508569c9571 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -198,9 +198,11 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		/* It's possible the Guest did a NOTIFY hypercall to the
 		 * Launcher, in which case we return from the read() now. */
 		if (cpu->pending_notify) {
-			if (put_user(cpu->pending_notify, user))
-				return -EFAULT;
-			return sizeof(cpu->pending_notify);
+			if (!send_notify_to_eventfd(cpu)) {
+				if (put_user(cpu->pending_notify, user))
+					return -EFAULT;
+				return sizeof(cpu->pending_notify);
+			}
 		}
 
 		/* Check for signals */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 040cb70780e..32fefdc6ad3 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -82,6 +82,16 @@ struct lg_cpu {
 	struct lg_cpu_arch arch;
 };
 
+struct lg_eventfd {
+	unsigned long addr;
+	struct file *event;
+};
+
+struct lg_eventfd_map {
+	unsigned int num;
+	struct lg_eventfd map[];
+};
+
 /* The private info the thread maintains about the guest. */
 struct lguest
 {
@@ -102,6 +112,8 @@ struct lguest
 	unsigned int stack_pages;
 	u32 tsc_khz;
 
+	struct lg_eventfd_map *eventfds;
+
 	/* Dead? */
 	const char *dead;
 };
@@ -154,6 +166,7 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
 void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		const unsigned long *def);
 void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta);
+bool send_notify_to_eventfd(struct lg_cpu *cpu);
 void init_clockdev(struct lg_cpu *cpu);
 bool check_syscall_vector(struct lguest *lg);
 int init_interrupts(void);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 1982b45bd93..f6bf255f183 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -7,6 +7,8 @@
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
 #include "lg.h"
 
 /*L:055 When something happens, the Waker process needs a way to stop the
@@ -35,6 +37,81 @@ static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
 	}
 }
 
+bool send_notify_to_eventfd(struct lg_cpu *cpu)
+{
+	unsigned int i;
+	struct lg_eventfd_map *map;
+
+	/* lg->eventfds is RCU-protected */
+	rcu_read_lock();
+	map = rcu_dereference(cpu->lg->eventfds);
+	for (i = 0; i < map->num; i++) {
+		if (map->map[i].addr == cpu->pending_notify) {
+			eventfd_signal(map->map[i].event, 1);
+			cpu->pending_notify = 0;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return cpu->pending_notify == 0;
+}
+
+static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
+{
+	struct lg_eventfd_map *new, *old = lg->eventfds;
+
+	if (!addr)
+		return -EINVAL;
+
+	/* Replace the old array with the new one, carefully: others can
+	 * be accessing it at the same time */
+	new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
+		      GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	/* First make identical copy. */
+	memcpy(new->map, old->map, sizeof(old->map[0]) * old->num);
+	new->num = old->num;
+
+	/* Now append new entry. */
+	new->map[new->num].addr = addr;
+	new->map[new->num].event = eventfd_fget(fd);
+	if (IS_ERR(new->map[new->num].event)) {
+		kfree(new);
+		return PTR_ERR(new->map[new->num].event);
+	}
+	new->num++;
+
+	/* Now put new one in place. */
+	rcu_assign_pointer(lg->eventfds, new);
+
+	/* We're not in a big hurry.  Wait until noone's looking at old
+	 * version, then delete it. */
+	synchronize_rcu();
+	kfree(old);
+
+	return 0;
+}
+
+static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
+{
+	unsigned long addr, fd;
+	int err;
+
+	if (get_user(addr, input) != 0)
+		return -EFAULT;
+	input++;
+	if (get_user(fd, input) != 0)
+		return -EFAULT;
+
+	mutex_lock(&lguest_lock);
+	err = add_eventfd(lg, addr, fd);
+	mutex_unlock(&lguest_lock);
+
+	return 0;
+}
+
 /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
  * number to /dev/lguest. */
 static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
@@ -184,6 +261,13 @@ static int initialize(struct file *file, const unsigned long __user *input)
 		goto unlock;
 	}
 
+	lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL);
+	if (!lg->eventfds) {
+		err = -ENOMEM;
+		goto free_lg;
+	}
+	lg->eventfds->num = 0;
+
 	/* Populate the easy fields of our "struct lguest" */
 	lg->mem_base = (void __user *)args[0];
 	lg->pfn_limit = args[1];
@@ -191,7 +275,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
 	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
 	if (err)
-		goto release_guest;
+		goto free_eventfds;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
 	 * address the Launcher gave us.  This allocates memory, so can fail. */
@@ -210,7 +294,9 @@ static int initialize(struct file *file, const unsigned long __user *input)
 free_regs:
 	/* FIXME: This should be in free_vcpu */
 	free_page(lg->cpus[0].regs_page);
-release_guest:
+free_eventfds:
+	kfree(lg->eventfds);
+free_lg:
 	kfree(lg);
 unlock:
 	mutex_unlock(&lguest_lock);
@@ -260,6 +346,8 @@ static ssize_t write(struct file *file, const char __user *in,
 		return user_send_irq(cpu, input);
 	case LHREQ_BREAK:
 		return break_guest_out(cpu, input);
+	case LHREQ_EVENTFD:
+		return attach_eventfd(lg, input);
 	default:
 		return -EINVAL;
 	}
@@ -297,6 +385,12 @@ static int close(struct inode *inode, struct file *file)
 		 * the Launcher's memory management structure. */
 		mmput(lg->cpus[i].mm);
 	}
+
+	/* Release any eventfds they registered. */
+	for (i = 0; i < lg->eventfds->num; i++)
+		fput(lg->eventfds->map[i].event);
+	kfree(lg->eventfds);
+
 	/* If lg->dead doesn't contain an error code it will be NULL or a
 	 * kmalloc()ed string, either of which is ok to hand to kfree(). */
 	if (!IS_ERR(lg->dead))
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index a53407a4165..9de964b9058 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -58,6 +58,7 @@ enum lguest_req
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
 	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
+	LHREQ_EVENTFD, /* + address, fd. */
 };
 
 /* The alignment to use between consumer and producer parts of vring.
-- 
cgit v1.2.3-70-g09d2


From 5dac051bc6030963181b69faddd9e0ad04f85fa8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 12 Jun 2009 22:27:10 -0600
Subject: lguest: remove obsolete LHREQ_BREAK call

We no longer need an efficient mechanism to force the Guest back into
host userspace, as each device is serviced without bothering the main
Guest process (aka. the Launcher).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/core.c           | 11 +++--------
 drivers/lguest/lg.h             |  4 +---
 drivers/lguest/lguest_user.c    | 31 -------------------------------
 include/linux/lguest_launcher.h |  2 +-
 4 files changed, 5 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 508569c9571..a6974e9b8eb 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -209,10 +209,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
-		/* If Waker set break_out, return to Launcher. */
-		if (cpu->break_out)
-			return -EAGAIN;
-
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
@@ -231,13 +227,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 			break;
 
 		/* If the Guest asked to be stopped, we sleep.  The Guest's
-		 * clock timer or LHREQ_BREAK from the Waker will wake us. */
+		 * clock timer will wake us. */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			/* Just before we sleep, make sure nothing snuck in
+			/* Just before we sleep, make sure no interrupt snuck in
 			 * which we should be doing. */
-			if (interrupt_pending(cpu, &more) < LGUEST_IRQS
-			    || cpu->break_out)
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
 				set_current_state(TASK_RUNNING);
 			else
 				schedule();
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 32fefdc6ad3..d4e8979735c 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -71,9 +71,7 @@ struct lg_cpu {
 	/* Virtual clock device */
 	struct hrtimer hrt;
 
-	/* Do we need to stop what we're doing and return to userspace? */
-	int break_out;
-	wait_queue_head_t break_wq;
+	/* Did the Guest tell us to halt? */
 	int halted;
 
 	/* Pending virtual interrupts */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index f6bf255f183..32e29712105 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -11,32 +11,6 @@
 #include <linux/file.h>
 #include "lg.h"
 
-/*L:055 When something happens, the Waker process needs a way to stop the
- * kernel running the Guest and return to the Launcher.  So the Waker writes
- * LHREQ_BREAK and the value "1" to /dev/lguest to do this.  Once the Launcher
- * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release
- * the Waker. */
-static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input)
-{
-	unsigned long on;
-
-	/* Fetch whether they're turning break on or off. */
-	if (get_user(on, input) != 0)
-		return -EFAULT;
-
-	if (on) {
-		cpu->break_out = 1;
-		if (!wake_up_process(cpu->tsk))
-			kick_process(cpu->tsk);
-		/* Wait for them to reset it */
-		return wait_event_interruptible(cpu->break_wq, !cpu->break_out);
-	} else {
-		cpu->break_out = 0;
-		wake_up(&cpu->break_wq);
-		return 0;
-	}
-}
-
 bool send_notify_to_eventfd(struct lg_cpu *cpu)
 {
 	unsigned int i;
@@ -202,9 +176,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	 * address. */
 	lguest_arch_setup_regs(cpu, start_ip);
 
-	/* Initialize the queue for the Waker to wait on */
-	init_waitqueue_head(&cpu->break_wq);
-
 	/* We keep a pointer to the Launcher task (ie. current task) for when
 	 * other Guests want to wake this one (eg. console input). */
 	cpu->tsk = current;
@@ -344,8 +315,6 @@ static ssize_t write(struct file *file, const char __user *in,
 		return initialize(file, input);
 	case LHREQ_IRQ:
 		return user_send_irq(cpu, input);
-	case LHREQ_BREAK:
-		return break_guest_out(cpu, input);
 	case LHREQ_EVENTFD:
 		return attach_eventfd(lg, input);
 	default:
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index 9de964b9058..bfefbdf7498 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -57,7 +57,7 @@ enum lguest_req
 	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
-	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
+	LHREQ_BREAK, /* No longer used */
 	LHREQ_EVENTFD, /* + address, fd. */
 };
 
-- 
cgit v1.2.3-70-g09d2


From 7e85ee0c1d15ca5f8bff0f514f158eba1742dd87 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 12 Jun 2009 14:03:06 +0300
Subject: slab,slub: don't enable interrupts during early boot

As explained by Benjamin Herrenschmidt:

  Oh and btw, your patch alone doesn't fix powerpc, because it's missing
  a whole bunch of GFP_KERNEL's in the arch code... You would have to
  grep the entire kernel for things that check slab_is_available() and
  even then you'll be missing some.

  For example, slab_is_available() didn't always exist, and so in the
  early days on powerpc, we used a mem_init_done global that is set form
  mem_init() (not perfect but works in practice). And we still have code
  using that to do the test.

Therefore, mask out __GFP_WAIT, __GFP_IO, and __GFP_FS in the slab allocators
in early boot code to avoid enabling interrupts.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/gfp.h      |  3 +++
 include/linux/slab.h     |  2 ++
 include/linux/slob_def.h |  5 +++++
 include/linux/slub_def.h |  2 ++
 init/main.c              |  1 +
 mm/slab.c                | 18 ++++++++++++++++++
 mm/slub.c                | 16 ++++++++++++++++
 7 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0bbc15f5453..3760e7c5de0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -85,6 +85,9 @@ struct vm_area_struct;
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_NOMEMALLOC)
 
+/* Control slab gfp mask during early boot */
+#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)
+
 /* Control allocation constraints */
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 48803064ced..219b8fb4651 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -319,4 +319,6 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+void __init kmem_cache_init_late(void);
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 0ec00b39d00..bb5368df4be 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -34,4 +34,9 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags)
 	return kmalloc(size, flags);
 }
 
+static inline void kmem_cache_init_late(void)
+{
+	/* Nothing to do */
+}
+
 #endif /* __LINUX_SLOB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index be5d40c43bd..4dcbc2c7149 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -302,4 +302,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 }
 #endif
 
+void __init kmem_cache_init_late(void);
+
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/init/main.c b/init/main.c
index b3e8f14c568..f6204f712e7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -640,6 +640,7 @@ asmlinkage void __init start_kernel(void)
 				 "enabled early\n");
 	early_boot_irqs_on();
 	local_irq_enable();
+	kmem_cache_init_late();
 
 	/*
 	 * HACK ALERT! This is early. We're enabling the console before
diff --git a/mm/slab.c b/mm/slab.c
index cd76964b53b..453efcb1c98 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -303,6 +303,12 @@ struct kmem_list3 {
 	int free_touched;		/* updated without locking */
 };
 
+/*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
 /*
  * Need this for bootstrapping a per node allocator.
  */
@@ -1654,6 +1660,14 @@ void __init kmem_cache_init(void)
 	 */
 }
 
+void __init kmem_cache_init_late(void)
+{
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
+}
+
 static int __init cpucache_init(void)
 {
 	int cpu;
@@ -3354,6 +3368,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 
+	flags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(flags);
 
 	if (slab_should_failslab(cachep, flags))
@@ -3434,6 +3450,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	unsigned long save_flags;
 	void *objp;
 
+	flags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(flags);
 
 	if (slab_should_failslab(cachep, flags))
diff --git a/mm/slub.c b/mm/slub.c
index 3964d3ce4c1..30354bfeb43 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -178,6 +178,12 @@ static enum {
 	SYSFS		/* Sysfs up */
 } slab_state = DOWN;
 
+/*
+ * The slab allocator is initialized with interrupts disabled. Therefore, make
+ * sure early boot allocations don't accidentally enable interrupts.
+ */
+static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
+
 /* A list of all slab caches on the system */
 static DECLARE_RWSEM(slub_lock);
 static LIST_HEAD(slab_caches);
@@ -1595,6 +1601,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	unsigned long flags;
 	unsigned int objsize;
 
+	gfpflags &= slab_gfp_mask;
+
 	lockdep_trace_alloc(gfpflags);
 	might_sleep_if(gfpflags & __GFP_WAIT);
 
@@ -3104,6 +3112,14 @@ void __init kmem_cache_init(void)
 		nr_cpu_ids, nr_node_ids);
 }
 
+void __init kmem_cache_init_late(void)
+{
+	/*
+	 * Interrupts are enabled now so all GFP allocations are safe.
+	 */
+	slab_gfp_mask = __GFP_BITS_MASK;
+}
+
 /*
  * Find a mergeable slab cache
  */
-- 
cgit v1.2.3-70-g09d2


From 7ea2ac9b6632038377cb488c7d1cb60b88164d4d Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Date: Tue, 14 Apr 2009 23:14:17 -0300
Subject: Trivial: fix typo s/balence/balance/

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/ultrasound.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ultrasound.h b/include/linux/ultrasound.h
index 6b7703e75ce..71339dc531c 100644
--- a/include/linux/ultrasound.h
+++ b/include/linux/ultrasound.h
@@ -34,7 +34,7 @@
  *		_GUS_VOICEOFF	- Stops voice (no parameters)
  *		_GUS_VOICEFADE	- Stops the voice smoothly.
  *		_GUS_VOICEMODE	- Alters the voice mode, don't start or stop voice (P1=voice mode)
- *		_GUS_VOICEBALA	- Sets voice balence (P1, 0=left, 7=middle and 15=right, default 7)
+ *		_GUS_VOICEBALA	- Sets voice balance (P1, 0=left, 7=middle and 15=right, default 7)
  *		_GUS_VOICEFREQ	- Sets voice (sample) playback frequency (P1=Hz)
  *		_GUS_VOICEVOL	- Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off)
  *		_GUS_VOICEVOL2	- Sets voice volume (P1=volume, 0xfff=max, 0xeff=half, 0x000=off)
-- 
cgit v1.2.3-70-g09d2


From 638772c7553f6893f7b346bfee4d46851af59afc Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Wed, 11 Feb 2009 17:25:24 +0800
Subject: fb: add support of LCD display controller on pxa168/910 (base layer)

This driver is originally written by Lennert, modified by Green to be
feature complete,  and ported by Jun Nie and Kevin Liu for pxa168/910
processors.

The patch adds support for the on-chip LCD display controller, it
currently supports the base (graphics) layer only.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Green Wan <gwan@marvell.com>
Cc: Peter Liao <pliao@marvell.com>
Signed-off-by: Jun Nie <njun@marvell.com>
Signed-off-by: Kevin Liu <kliu5@marvell.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 drivers/video/Kconfig    |  10 +
 drivers/video/Makefile   |   1 +
 drivers/video/pxa168fb.c | 803 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/video/pxa168fb.h | 558 ++++++++++++++++++++++++++++++++
 include/video/pxa168fb.h | 127 ++++++++
 5 files changed, 1499 insertions(+)
 create mode 100644 drivers/video/pxa168fb.c
 create mode 100644 drivers/video/pxa168fb.h
 create mode 100644 include/video/pxa168fb.h

(limited to 'include')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0048f1185a6..13fd66a1f10 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1759,6 +1759,16 @@ config FB_68328
 	  Say Y here if you want to support the built-in frame buffer of
 	  the Motorola 68328 CPU family.
 
+config FB_PXA168
+	tristate "PXA168/910 LCD framebuffer support"
+	depends on FB && (CPU_PXA168 || CPU_PXA910)
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Frame buffer driver for the built-in LCD controller in the Marvell
+	  MMP processor.
+
 config FB_PXA
 	tristate "PXA LCD framebuffer support"
 	depends on FB && ARCH_PXA
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index d8d0be5151e..01a819f4737 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_FB_GBE)              += gbefb.o
 obj-$(CONFIG_FB_CIRRUS)		  += cirrusfb.o
 obj-$(CONFIG_FB_ASILIANT)	  += asiliantfb.o
 obj-$(CONFIG_FB_PXA)		  += pxafb.o
+obj-$(CONFIG_FB_PXA168)		  += pxa168fb.o
 obj-$(CONFIG_FB_W100)		  += w100fb.o
 obj-$(CONFIG_FB_TMIO)		  += tmiofb.o
 obj-$(CONFIG_FB_AU1100)		  += au1100fb.o
diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c
new file mode 100644
index 00000000000..84d8327e47d
--- /dev/null
+++ b/drivers/video/pxa168fb.c
@@ -0,0 +1,803 @@
+/*
+ * linux/drivers/video/pxa168fb.c -- Marvell PXA168 LCD Controller
+ *
+ *  Copyright (C) 2008 Marvell International Ltd.
+ *  All rights reserved.
+ *
+ *  2009-02-16  adapted from original version for PXA168/910
+ *              Jun Nie <njun@marvell.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <video/pxa168fb.h>
+
+#include "pxa168fb.h"
+
+#define DEFAULT_REFRESH		60	/* Hz */
+
+static int determine_best_pix_fmt(struct fb_var_screeninfo *var)
+{
+	/*
+	 * Pseudocolor mode?
+	 */
+	if (var->bits_per_pixel == 8)
+		return PIX_FMT_PSEUDOCOLOR;
+
+	/*
+	 * Check for 565/1555.
+	 */
+	if (var->bits_per_pixel == 16 && var->red.length <= 5 &&
+	    var->green.length <= 6 && var->blue.length <= 5) {
+		if (var->transp.length == 0) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB565;
+			else
+				return PIX_FMT_BGR565;
+		}
+
+		if (var->transp.length == 1 && var->green.length <= 5) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB1555;
+			else
+				return PIX_FMT_BGR1555;
+		}
+
+		/* fall through */
+	}
+
+	/*
+	 * Check for 888/A888.
+	 */
+	if (var->bits_per_pixel <= 32 && var->red.length <= 8 &&
+	    var->green.length <= 8 && var->blue.length <= 8) {
+		if (var->bits_per_pixel == 24 && var->transp.length == 0) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB888PACK;
+			else
+				return PIX_FMT_BGR888PACK;
+		}
+
+		if (var->bits_per_pixel == 32 && var->transp.length == 8) {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGBA888;
+			else
+				return PIX_FMT_BGRA888;
+		} else {
+			if (var->red.offset >= var->blue.offset)
+				return PIX_FMT_RGB888UNPACK;
+			else
+				return PIX_FMT_BGR888UNPACK;
+		}
+
+		/* fall through */
+	}
+
+	return -EINVAL;
+}
+
+static void set_pix_fmt(struct fb_var_screeninfo *var, int pix_fmt)
+{
+	switch (pix_fmt) {
+	case PIX_FMT_RGB565:
+		var->bits_per_pixel = 16;
+		var->red.offset = 11;    var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 6;
+		var->blue.offset = 0;    var->blue.length = 5;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_BGR565:
+		var->bits_per_pixel = 16;
+		var->red.offset = 0;     var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 6;
+		var->blue.offset = 11;   var->blue.length = 5;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_RGB1555:
+		var->bits_per_pixel = 16;
+		var->red.offset = 10;    var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 5;
+		var->blue.offset = 0;    var->blue.length = 5;
+		var->transp.offset = 15; var->transp.length = 1;
+		break;
+	case PIX_FMT_BGR1555:
+		var->bits_per_pixel = 16;
+		var->red.offset = 0;     var->red.length = 5;
+		var->green.offset = 5;   var->green.length = 5;
+		var->blue.offset = 10;   var->blue.length = 5;
+		var->transp.offset = 15; var->transp.length = 1;
+		break;
+	case PIX_FMT_RGB888PACK:
+		var->bits_per_pixel = 24;
+		var->red.offset = 16;    var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 0;    var->blue.length = 8;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_BGR888PACK:
+		var->bits_per_pixel = 24;
+		var->red.offset = 0;     var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 16;   var->blue.length = 8;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	case PIX_FMT_RGBA888:
+		var->bits_per_pixel = 32;
+		var->red.offset = 16;    var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 0;    var->blue.length = 8;
+		var->transp.offset = 24; var->transp.length = 8;
+		break;
+	case PIX_FMT_BGRA888:
+		var->bits_per_pixel = 32;
+		var->red.offset = 0;     var->red.length = 8;
+		var->green.offset = 8;   var->green.length = 8;
+		var->blue.offset = 16;   var->blue.length = 8;
+		var->transp.offset = 24; var->transp.length = 8;
+		break;
+	case PIX_FMT_PSEUDOCOLOR:
+		var->bits_per_pixel = 8;
+		var->red.offset = 0;     var->red.length = 8;
+		var->green.offset = 0;   var->green.length = 8;
+		var->blue.offset = 0;    var->blue.length = 8;
+		var->transp.offset = 0;  var->transp.length = 0;
+		break;
+	}
+}
+
+static void set_mode(struct pxa168fb_info *fbi, struct fb_var_screeninfo *var,
+		     struct fb_videomode *mode, int pix_fmt, int ystretch)
+{
+	struct fb_info *info = fbi->info;
+
+	set_pix_fmt(var, pix_fmt);
+
+	var->xres = mode->xres;
+	var->yres = mode->yres;
+	var->xres_virtual = max(var->xres, var->xres_virtual);
+	if (ystretch)
+		var->yres_virtual = info->fix.smem_len /
+			(var->xres_virtual * (var->bits_per_pixel >> 3));
+	else
+		var->yres_virtual = max(var->yres, var->yres_virtual);
+	var->grayscale = 0;
+	var->accel_flags = FB_ACCEL_NONE;
+	var->pixclock = mode->pixclock;
+	var->left_margin = mode->left_margin;
+	var->right_margin = mode->right_margin;
+	var->upper_margin = mode->upper_margin;
+	var->lower_margin = mode->lower_margin;
+	var->hsync_len = mode->hsync_len;
+	var->vsync_len = mode->vsync_len;
+	var->sync = mode->sync;
+	var->vmode = FB_VMODE_NONINTERLACED;
+	var->rotate = FB_ROTATE_UR;
+}
+
+static int pxa168fb_check_var(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	int pix_fmt;
+
+	/*
+	 * Determine which pixel format we're going to use.
+	 */
+	pix_fmt = determine_best_pix_fmt(var);
+	if (pix_fmt < 0)
+		return pix_fmt;
+	set_pix_fmt(var, pix_fmt);
+	fbi->pix_fmt = pix_fmt;
+
+	/*
+	 * Basic geometry sanity checks.
+	 */
+	if (var->xoffset + var->xres > var->xres_virtual)
+		return -EINVAL;
+	if (var->yoffset + var->yres > var->yres_virtual)
+		return -EINVAL;
+	if (var->xres + var->right_margin +
+	    var->hsync_len + var->left_margin > 2048)
+		return -EINVAL;
+	if (var->yres + var->lower_margin +
+	    var->vsync_len + var->upper_margin > 2048)
+		return -EINVAL;
+
+	/*
+	 * Check size of framebuffer.
+	 */
+	if (var->xres_virtual * var->yres_virtual *
+	    (var->bits_per_pixel >> 3) > info->fix.smem_len)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * The hardware clock divider has an integer and a fractional
+ * stage:
+ *
+ *	clk2 = clk_in / integer_divider
+ *	clk_out = clk2 * (1 - (fractional_divider >> 12))
+ *
+ * Calculate integer and fractional divider for given clk_in
+ * and clk_out.
+ */
+static void set_clock_divider(struct pxa168fb_info *fbi,
+			      const struct fb_videomode *m)
+{
+	int divider_int;
+	int needed_pixclk;
+	u64 div_result;
+	u32 x = 0;
+
+	/*
+	 * Notice: The field pixclock is used by linux fb
+	 * is in pixel second. E.g. struct fb_videomode &
+	 * struct fb_var_screeninfo
+	 */
+
+	/*
+	 * Check input values.
+	 */
+	if (!m || !m->pixclock || !m->refresh) {
+		dev_err(fbi->dev, "Input refresh or pixclock is wrong.\n");
+		return;
+	}
+
+	/*
+	 * Using PLL/AXI clock.
+	 */
+	x = 0x80000000;
+
+	/*
+	 * Calc divider according to refresh rate.
+	 */
+	div_result = 1000000000000ll;
+	do_div(div_result, m->pixclock);
+	needed_pixclk = (u32)div_result;
+
+	divider_int = clk_get_rate(fbi->clk) / needed_pixclk;
+
+	/* check whether divisor is too small. */
+	if (divider_int < 2) {
+		dev_warn(fbi->dev, "Warning: clock source is too slow."
+				"Try smaller resolution\n");
+		divider_int = 2;
+	}
+
+	/*
+	 * Set setting to reg.
+	 */
+	x |= divider_int;
+	writel(x, fbi->reg_base + LCD_CFG_SCLK_DIV);
+}
+
+static void set_dma_control0(struct pxa168fb_info *fbi)
+{
+	u32 x;
+
+	/*
+	 * Set bit to enable graphics DMA.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DMA_CTRL0);
+	x |= fbi->active ? 0x00000100 : 0;
+	fbi->active = 0;
+
+	/*
+	 * If we are in a pseudo-color mode, we need to enable
+	 * palette lookup.
+	 */
+	if (fbi->pix_fmt == PIX_FMT_PSEUDOCOLOR)
+		x |= 0x10000000;
+
+	/*
+	 * Configure hardware pixel format.
+	 */
+	x &= ~(0xF << 16);
+	x |= (fbi->pix_fmt >> 1) << 16;
+
+	/*
+	 * Check red and blue pixel swap.
+	 * 1. source data swap
+	 * 2. panel output data swap
+	 */
+	x &= ~(1 << 12);
+	x |= ((fbi->pix_fmt & 1) ^ (fbi->panel_rbswap)) << 12;
+
+	writel(x, fbi->reg_base + LCD_SPU_DMA_CTRL0);
+}
+
+static void set_dma_control1(struct pxa168fb_info *fbi, int sync)
+{
+	u32 x;
+
+	/*
+	 * Configure default bits: vsync triggers DMA, gated clock
+	 * enable, power save enable, configure alpha registers to
+	 * display 100% graphics, and set pixel command.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DMA_CTRL1);
+	x |= 0x2032ff81;
+
+	/*
+	 * We trigger DMA on the falling edge of vsync if vsync is
+	 * active low, or on the rising edge if vsync is active high.
+	 */
+	if (!(sync & FB_SYNC_VERT_HIGH_ACT))
+		x |= 0x08000000;
+
+	writel(x, fbi->reg_base + LCD_SPU_DMA_CTRL1);
+}
+
+static void set_graphics_start(struct fb_info *info, int xoffset, int yoffset)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	int pixel_offset;
+	unsigned long addr;
+
+	pixel_offset = (yoffset * var->xres_virtual) + xoffset;
+
+	addr = fbi->fb_start_dma + (pixel_offset * (var->bits_per_pixel >> 3));
+	writel(addr, fbi->reg_base + LCD_CFG_GRA_START_ADDR0);
+}
+
+static void set_dumb_panel_control(struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct pxa168fb_mach_info *mi = fbi->dev->platform_data;
+	u32 x;
+
+	/*
+	 * Preserve enable flag.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DUMB_CTRL) & 0x00000001;
+
+	x |= (fbi->is_blanked ? 0x7 : mi->dumb_mode) << 28;
+	x |= mi->gpio_output_data << 20;
+	x |= mi->gpio_output_mask << 12;
+	x |= mi->panel_rgb_reverse_lanes ? 0x00000080 : 0;
+	x |= mi->invert_composite_blank ? 0x00000040 : 0;
+	x |= (info->var.sync & FB_SYNC_COMP_HIGH_ACT) ? 0x00000020 : 0;
+	x |= mi->invert_pix_val_ena ? 0x00000010 : 0;
+	x |= (info->var.sync & FB_SYNC_VERT_HIGH_ACT) ? 0 : 0x00000008;
+	x |= (info->var.sync & FB_SYNC_HOR_HIGH_ACT) ? 0 : 0x00000004;
+	x |= mi->invert_pixclock ? 0x00000002 : 0;
+
+	writel(x, fbi->reg_base + LCD_SPU_DUMB_CTRL);
+}
+
+static void set_dumb_screen_dimensions(struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *v = &info->var;
+	int x;
+	int y;
+
+	x = v->xres + v->right_margin + v->hsync_len + v->left_margin;
+	y = v->yres + v->lower_margin + v->vsync_len + v->upper_margin;
+
+	writel((y << 16) | x, fbi->reg_base + LCD_SPUT_V_H_TOTAL);
+}
+
+static int pxa168fb_set_par(struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	struct fb_videomode mode;
+	u32 x;
+	struct pxa168fb_mach_info *mi;
+
+	mi = fbi->dev->platform_data;
+
+	/*
+	 * Set additional mode info.
+	 */
+	if (fbi->pix_fmt == PIX_FMT_PSEUDOCOLOR)
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+	else
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
+	info->fix.ypanstep = var->yres;
+
+	/*
+	 * Disable panel output while we setup the display.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DUMB_CTRL);
+	writel(x & ~1, fbi->reg_base + LCD_SPU_DUMB_CTRL);
+
+	/*
+	 * Configure global panel parameters.
+	 */
+	writel((var->yres << 16) | var->xres,
+		fbi->reg_base + LCD_SPU_V_H_ACTIVE);
+
+	/*
+	 * convet var to video mode
+	 */
+	fb_var_to_videomode(&mode, &info->var);
+
+	/* Calculate clock divisor. */
+	set_clock_divider(fbi, &mode);
+
+	/* Configure dma ctrl regs. */
+	set_dma_control0(fbi);
+	set_dma_control1(fbi, info->var.sync);
+
+	/*
+	 * Configure graphics DMA parameters.
+	 */
+	x = readl(fbi->reg_base + LCD_CFG_GRA_PITCH);
+	x = (x & ~0xFFFF) | ((var->xres_virtual * var->bits_per_pixel) >> 3);
+	writel(x, fbi->reg_base + LCD_CFG_GRA_PITCH);
+	writel((var->yres << 16) | var->xres,
+		fbi->reg_base + LCD_SPU_GRA_HPXL_VLN);
+	writel((var->yres << 16) | var->xres,
+		fbi->reg_base + LCD_SPU_GZM_HPXL_VLN);
+
+	/*
+	 * Configure dumb panel ctrl regs & timings.
+	 */
+	set_dumb_panel_control(info);
+	set_dumb_screen_dimensions(info);
+
+	writel((var->left_margin << 16) | var->right_margin,
+			fbi->reg_base + LCD_SPU_H_PORCH);
+	writel((var->upper_margin << 16) | var->lower_margin,
+			fbi->reg_base + LCD_SPU_V_PORCH);
+
+	/*
+	 * Re-enable panel output.
+	 */
+	x = readl(fbi->reg_base + LCD_SPU_DUMB_CTRL);
+	writel(x | 1, fbi->reg_base + LCD_SPU_DUMB_CTRL);
+
+	return 0;
+}
+
+static unsigned int chan_to_field(unsigned int chan, struct fb_bitfield *bf)
+{
+	return ((chan & 0xffff) >> (16 - bf->length)) << bf->offset;
+}
+
+static u32 to_rgb(u16 red, u16 green, u16 blue)
+{
+	red >>= 8;
+	green >>= 8;
+	blue >>= 8;
+
+	return (red << 16) | (green << 8) | blue;
+}
+
+static int
+pxa168fb_setcolreg(unsigned int regno, unsigned int red, unsigned int green,
+		 unsigned int blue, unsigned int trans, struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+	u32 val;
+
+	if (info->var.grayscale)
+		red = green = blue = (19595 * red + 38470 * green +
+					7471 * blue) >> 16;
+
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR && regno < 16) {
+		val =  chan_to_field(red,   &info->var.red);
+		val |= chan_to_field(green, &info->var.green);
+		val |= chan_to_field(blue , &info->var.blue);
+		fbi->pseudo_palette[regno] = val;
+	}
+
+	if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR && regno < 256) {
+		val = to_rgb(red, green, blue);
+		writel(val, fbi->reg_base + LCD_SPU_SRAM_WRDAT);
+		writel(0x8300 | regno, fbi->reg_base + LCD_SPU_SRAM_CTRL);
+	}
+
+	return 0;
+}
+
+static int pxa168fb_blank(int blank, struct fb_info *info)
+{
+	struct pxa168fb_info *fbi = info->par;
+
+	fbi->is_blanked = (blank == FB_BLANK_UNBLANK) ? 0 : 1;
+	set_dumb_panel_control(info);
+
+	return 0;
+}
+
+static int pxa168fb_pan_display(struct fb_var_screeninfo *var,
+				struct fb_info *info)
+{
+	set_graphics_start(info, var->xoffset, var->yoffset);
+
+	return 0;
+}
+
+static irqreturn_t pxa168fb_handle_irq(int irq, void *dev_id)
+{
+	struct pxa168fb_info *fbi = dev_id;
+	u32 isr = readl(fbi->reg_base + SPU_IRQ_ISR);
+
+	if ((isr & GRA_FRAME_IRQ0_ENA_MASK)) {
+
+		writel(isr & (~GRA_FRAME_IRQ0_ENA_MASK),
+			fbi->reg_base + SPU_IRQ_ISR);
+
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static struct fb_ops pxa168fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= pxa168fb_check_var,
+	.fb_set_par	= pxa168fb_set_par,
+	.fb_setcolreg	= pxa168fb_setcolreg,
+	.fb_blank	= pxa168fb_blank,
+	.fb_pan_display	= pxa168fb_pan_display,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+};
+
+static int __init pxa168fb_init_mode(struct fb_info *info,
+			      struct pxa168fb_mach_info *mi)
+{
+	struct pxa168fb_info *fbi = info->par;
+	struct fb_var_screeninfo *var = &info->var;
+	int ret = 0;
+	u32 total_w, total_h, refresh;
+	u64 div_result;
+	const struct fb_videomode *m;
+
+	/*
+	 * Set default value
+	 */
+	refresh = DEFAULT_REFRESH;
+
+	/* try to find best video mode. */
+	m = fb_find_best_mode(&info->var, &info->modelist);
+	if (m)
+		fb_videomode_to_var(&info->var, m);
+
+	/* Init settings. */
+	var->xres_virtual = var->xres;
+	var->yres_virtual = info->fix.smem_len /
+		(var->xres_virtual * (var->bits_per_pixel >> 3));
+	dev_dbg(fbi->dev, "pxa168fb: find best mode: res = %dx%d\n",
+				var->xres, var->yres);
+
+	/* correct pixclock. */
+	total_w = var->xres + var->left_margin + var->right_margin +
+		  var->hsync_len;
+	total_h = var->yres + var->upper_margin + var->lower_margin +
+		  var->vsync_len;
+
+	div_result = 1000000000000ll;
+	do_div(div_result, total_w * total_h * refresh);
+	var->pixclock = (u32)div_result;
+
+	return ret;
+}
+
+static int __init pxa168fb_probe(struct platform_device *pdev)
+{
+	struct pxa168fb_mach_info *mi;
+	struct fb_info *info = 0;
+	struct pxa168fb_info *fbi = 0;
+	struct resource *res;
+	struct clk *clk;
+	int irq, ret;
+
+	mi = pdev->dev.platform_data;
+	if (mi == NULL) {
+		dev_err(&pdev->dev, "no platform data defined\n");
+		return -EINVAL;
+	}
+
+	clk = clk_get(&pdev->dev, "LCDCLK");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "unable to get LCDCLK");
+		return PTR_ERR(clk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "no IO memory defined\n");
+		return -ENOENT;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no IRQ defined\n");
+		return -ENOENT;
+	}
+
+	info = framebuffer_alloc(sizeof(struct pxa168fb_info), &pdev->dev);
+	if (info == NULL) {
+		clk_put(clk);
+		return -ENOMEM;
+	}
+
+	/* Initialize private data */
+	fbi = info->par;
+	fbi->info = info;
+	fbi->clk = clk;
+	fbi->dev = info->dev = &pdev->dev;
+	fbi->panel_rbswap = mi->panel_rbswap;
+	fbi->is_blanked = 0;
+	fbi->active = mi->active;
+
+	/*
+	 * Initialise static fb parameters.
+	 */
+	info->flags = FBINFO_DEFAULT | FBINFO_PARTIAL_PAN_OK |
+		      FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN;
+	info->node = -1;
+	strlcpy(info->fix.id, mi->id, 16);
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 0;
+	info->fix.ypanstep = 0;
+	info->fix.ywrapstep = 0;
+	info->fix.mmio_start = res->start;
+	info->fix.mmio_len = res->end - res->start + 1;
+	info->fix.accel = FB_ACCEL_NONE;
+	info->fbops = &pxa168fb_ops;
+	info->pseudo_palette = fbi->pseudo_palette;
+
+	/*
+	 * Map LCD controller registers.
+	 */
+	fbi->reg_base = ioremap_nocache(res->start, res->end - res->start);
+	if (fbi->reg_base == NULL) {
+		ret = -ENOMEM;
+		goto failed;
+	}
+
+	/*
+	 * Allocate framebuffer memory.
+	 */
+	info->fix.smem_len = PAGE_ALIGN(DEFAULT_FB_SIZE);
+
+	info->screen_base = dma_alloc_writecombine(fbi->dev, info->fix.smem_len,
+						&fbi->fb_start_dma, GFP_KERNEL);
+	if (info->screen_base == NULL) {
+		ret = -ENOMEM;
+		goto failed;
+	}
+
+	info->fix.smem_start = (unsigned long)fbi->fb_start_dma;
+
+	/*
+	 * Set video mode according to platform data.
+	 */
+	set_mode(fbi, &info->var, mi->modes, mi->pix_fmt, 1);
+
+	fb_videomode_to_modelist(mi->modes, mi->num_modes, &info->modelist);
+
+	/*
+	 * init video mode data.
+	 */
+	pxa168fb_init_mode(info, mi);
+
+	ret = pxa168fb_check_var(&info->var, info);
+	if (ret)
+		goto failed_free_fbmem;
+
+	/*
+	 * Fill in sane defaults.
+	 */
+	ret = pxa168fb_check_var(&info->var, info);
+	if (ret)
+		goto failed;
+
+	/*
+	 * enable controller clock
+	 */
+	clk_enable(fbi->clk);
+
+	pxa168fb_set_par(info);
+
+	/*
+	 * Configure default register values.
+	 */
+	writel(0, fbi->reg_base + LCD_SPU_BLANKCOLOR);
+	writel(mi->io_pin_allocation_mode, fbi->reg_base + SPU_IOPAD_CONTROL);
+	writel(0, fbi->reg_base + LCD_CFG_GRA_START_ADDR1);
+	writel(0, fbi->reg_base + LCD_SPU_GRA_OVSA_HPXL_VLN);
+	writel(0, fbi->reg_base + LCD_SPU_SRAM_PARA0);
+	writel(CFG_CSB_256x32(0x1)|CFG_CSB_256x24(0x1)|CFG_CSB_256x8(0x1),
+		fbi->reg_base + LCD_SPU_SRAM_PARA1);
+
+	/*
+	 * Allocate color map.
+	 */
+	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
+		ret = -ENOMEM;
+		goto failed_free_clk;
+	}
+
+	/*
+	 * Register irq handler.
+	 */
+	ret = request_irq(irq, pxa168fb_handle_irq, IRQF_SHARED,
+					info->fix.id, fbi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to request IRQ\n");
+		ret = -ENXIO;
+		goto failed_free_cmap;
+	}
+
+	/*
+	 * Enable GFX interrupt
+	 */
+	writel(GRA_FRAME_IRQ0_ENA(0x1), fbi->reg_base + SPU_IRQ_ENA);
+
+	/*
+	 * Register framebuffer.
+	 */
+	ret = register_framebuffer(info);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register pxa168-fb: %d\n", ret);
+		ret = -ENXIO;
+		goto failed_free_irq;
+	}
+
+	platform_set_drvdata(pdev, fbi);
+	return 0;
+
+failed_free_irq:
+	free_irq(irq, fbi);
+failed_free_cmap:
+	fb_dealloc_cmap(&info->cmap);
+failed_free_clk:
+	clk_disable(fbi->clk);
+failed_free_fbmem:
+	dma_free_coherent(fbi->dev, info->fix.smem_len,
+			info->screen_base, fbi->fb_start_dma);
+failed:
+	kfree(info);
+	clk_put(clk);
+
+	dev_err(&pdev->dev, "frame buffer device init failed with %d\n", ret);
+	return ret;
+}
+
+static struct platform_driver pxa168fb_driver = {
+	.driver		= {
+		.name	= "pxa168-fb",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pxa168fb_probe,
+};
+
+static int __devinit pxa168fb_init(void)
+{
+	return platform_driver_register(&pxa168fb_driver);
+}
+module_init(pxa168fb_init);
+
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@marvell.com> "
+	      "Green Wan <gwan@marvell.com>");
+MODULE_DESCRIPTION("Framebuffer driver for PXA168/910");
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/pxa168fb.h b/drivers/video/pxa168fb.h
new file mode 100644
index 00000000000..eee09279c52
--- /dev/null
+++ b/drivers/video/pxa168fb.h
@@ -0,0 +1,558 @@
+#ifndef __PXA168FB_H__
+#define __PXA168FB_H__
+
+/* ------------< LCD register >------------ */
+/* Video Frame 0&1 start address registers */
+#define	LCD_SPU_DMA_START_ADDR_Y0		0x00C0
+#define	LCD_SPU_DMA_START_ADDR_U0		0x00C4
+#define	LCD_SPU_DMA_START_ADDR_V0		0x00C8
+#define LCD_CFG_DMA_START_ADDR_0		0x00CC /* Cmd address */
+#define	LCD_SPU_DMA_START_ADDR_Y1		0x00D0
+#define	LCD_SPU_DMA_START_ADDR_U1		0x00D4
+#define	LCD_SPU_DMA_START_ADDR_V1		0x00D8
+#define LCD_CFG_DMA_START_ADDR_1		0x00DC /* Cmd address */
+
+/* YC & UV Pitch */
+#define LCD_SPU_DMA_PITCH_YC			0x00E0
+#define     SPU_DMA_PITCH_C(c)			((c) << 16)
+#define     SPU_DMA_PITCH_Y(y)			(y)
+#define LCD_SPU_DMA_PITCH_UV			0x00E4
+#define     SPU_DMA_PITCH_V(v)			((v) << 16)
+#define     SPU_DMA_PITCH_U(u)			(u)
+
+/* Video Starting Point on Screen Register */
+#define LCD_SPUT_DMA_OVSA_HPXL_VLN		0x00E8
+#define     CFG_DMA_OVSA_VLN(y)			((y) << 16) /* 0~0xfff */
+#define     CFG_DMA_OVSA_HPXL(x)		(x)     /* 0~0xfff */
+
+/* Video Size Register */
+#define LCD_SPU_DMA_HPXL_VLN			0x00EC
+#define     CFG_DMA_VLN(y)			((y) << 16)
+#define     CFG_DMA_HPXL(x)			(x)
+
+/* Video Size After zooming Register */
+#define LCD_SPU_DZM_HPXL_VLN			0x00F0
+#define     CFG_DZM_VLN(y)			((y) << 16)
+#define     CFG_DZM_HPXL(x)			(x)
+
+/* Graphic Frame 0&1 Starting Address Register */
+#define LCD_CFG_GRA_START_ADDR0			0x00F4
+#define LCD_CFG_GRA_START_ADDR1			0x00F8
+
+/* Graphic Frame Pitch */
+#define LCD_CFG_GRA_PITCH			0x00FC
+
+/* Graphic Starting Point on Screen Register */
+#define LCD_SPU_GRA_OVSA_HPXL_VLN		0x0100
+#define     CFG_GRA_OVSA_VLN(y)			((y) << 16)
+#define     CFG_GRA_OVSA_HPXL(x)		(x)
+
+/* Graphic Size Register */
+#define LCD_SPU_GRA_HPXL_VLN			0x0104
+#define     CFG_GRA_VLN(y)			((y) << 16)
+#define     CFG_GRA_HPXL(x)			(x)
+
+/* Graphic Size after Zooming Register */
+#define LCD_SPU_GZM_HPXL_VLN			0x0108
+#define     CFG_GZM_VLN(y)			((y) << 16)
+#define     CFG_GZM_HPXL(x)			(x)
+
+/* HW Cursor Starting Point on Screen Register */
+#define LCD_SPU_HWC_OVSA_HPXL_VLN		0x010C
+#define     CFG_HWC_OVSA_VLN(y)			((y) << 16)
+#define     CFG_HWC_OVSA_HPXL(x)		(x)
+
+/* HW Cursor Size */
+#define LCD_SPU_HWC_HPXL_VLN			0x0110
+#define     CFG_HWC_VLN(y)			((y) << 16)
+#define     CFG_HWC_HPXL(x)			(x)
+
+/* Total Screen Size Register */
+#define LCD_SPUT_V_H_TOTAL			0x0114
+#define     CFG_V_TOTAL(y)			((y) << 16)
+#define     CFG_H_TOTAL(x)			(x)
+
+/* Total Screen Active Size Register */
+#define LCD_SPU_V_H_ACTIVE			0x0118
+#define     CFG_V_ACTIVE(y)			((y) << 16)
+#define     CFG_H_ACTIVE(x)			(x)
+
+/* Screen H&V Porch Register */
+#define LCD_SPU_H_PORCH				0x011C
+#define     CFG_H_BACK_PORCH(b)			((b) << 16)
+#define     CFG_H_FRONT_PORCH(f)		(f)
+#define LCD_SPU_V_PORCH				0x0120
+#define     CFG_V_BACK_PORCH(b)			((b) << 16)
+#define     CFG_V_FRONT_PORCH(f)		(f)
+
+/* Screen Blank Color Register */
+#define LCD_SPU_BLANKCOLOR			0x0124
+#define     CFG_BLANKCOLOR_MASK			0x00FFFFFF
+#define     CFG_BLANKCOLOR_R_MASK		0x000000FF
+#define     CFG_BLANKCOLOR_G_MASK		0x0000FF00
+#define     CFG_BLANKCOLOR_B_MASK		0x00FF0000
+
+/* HW Cursor Color 1&2 Register */
+#define LCD_SPU_ALPHA_COLOR1			0x0128
+#define     CFG_HWC_COLOR1			0x00FFFFFF
+#define     CFG_HWC_COLOR1_R(red)		((red) << 16)
+#define     CFG_HWC_COLOR1_G(green)		((green) << 8)
+#define     CFG_HWC_COLOR1_B(blue)		(blue)
+#define     CFG_HWC_COLOR1_R_MASK		0x000000FF
+#define     CFG_HWC_COLOR1_G_MASK		0x0000FF00
+#define     CFG_HWC_COLOR1_B_MASK		0x00FF0000
+#define LCD_SPU_ALPHA_COLOR2			0x012C
+#define     CFG_HWC_COLOR2			0x00FFFFFF
+#define     CFG_HWC_COLOR2_R_MASK		0x000000FF
+#define     CFG_HWC_COLOR2_G_MASK		0x0000FF00
+#define     CFG_HWC_COLOR2_B_MASK		0x00FF0000
+
+/* Video YUV Color Key Control */
+#define LCD_SPU_COLORKEY_Y			0x0130
+#define     CFG_CKEY_Y2(y2)			((y2) << 24)
+#define     CFG_CKEY_Y2_MASK			0xFF000000
+#define     CFG_CKEY_Y1(y1)			((y1) << 16)
+#define     CFG_CKEY_Y1_MASK			0x00FF0000
+#define     CFG_CKEY_Y(y)			((y) << 8)
+#define     CFG_CKEY_Y_MASK			0x0000FF00
+#define     CFG_ALPHA_Y(y)			(y)
+#define     CFG_ALPHA_Y_MASK			0x000000FF
+#define LCD_SPU_COLORKEY_U			0x0134
+#define     CFG_CKEY_U2(u2)			((u2) << 24)
+#define     CFG_CKEY_U2_MASK			0xFF000000
+#define     CFG_CKEY_U1(u1)			((u1) << 16)
+#define     CFG_CKEY_U1_MASK			0x00FF0000
+#define     CFG_CKEY_U(u)			((u) << 8)
+#define     CFG_CKEY_U_MASK			0x0000FF00
+#define     CFG_ALPHA_U(u)			(u)
+#define     CFG_ALPHA_U_MASK			0x000000FF
+#define LCD_SPU_COLORKEY_V			0x0138
+#define     CFG_CKEY_V2(v2)			((v2) << 24)
+#define     CFG_CKEY_V2_MASK			0xFF000000
+#define     CFG_CKEY_V1(v1)			((v1) << 16)
+#define     CFG_CKEY_V1_MASK			0x00FF0000
+#define     CFG_CKEY_V(v)			((v) << 8)
+#define     CFG_CKEY_V_MASK			0x0000FF00
+#define     CFG_ALPHA_V(v)			(v)
+#define     CFG_ALPHA_V_MASK			0x000000FF
+
+/* SPI Read Data Register */
+#define LCD_SPU_SPI_RXDATA			0x0140
+
+/* Smart Panel Read Data Register */
+#define LCD_SPU_ISA_RSDATA			0x0144
+#define     ISA_RXDATA_16BIT_1_DATA_MASK	0x000000FF
+#define     ISA_RXDATA_16BIT_2_DATA_MASK	0x0000FF00
+#define     ISA_RXDATA_16BIT_3_DATA_MASK	0x00FF0000
+#define     ISA_RXDATA_16BIT_4_DATA_MASK	0xFF000000
+#define     ISA_RXDATA_32BIT_1_DATA_MASK	0x00FFFFFF
+
+/* HWC SRAM Read Data Register */
+#define LCD_SPU_HWC_RDDAT			0x0158
+
+/* Gamma Table SRAM Read Data Register */
+#define LCD_SPU_GAMMA_RDDAT			0x015c
+#define     CFG_GAMMA_RDDAT_MASK		0x000000FF
+
+/* Palette Table SRAM Read Data Register */
+#define LCD_SPU_PALETTE_RDDAT			0x0160
+#define     CFG_PALETTE_RDDAT_MASK		0x00FFFFFF
+
+/* I/O Pads Input Read Only Register */
+#define LCD_SPU_IOPAD_IN			0x0178
+#define     CFG_IOPAD_IN_MASK			0x0FFFFFFF
+
+/* Reserved Read Only Registers */
+#define LCD_CFG_RDREG5F				0x017C
+#define     IRE_FRAME_CNT_MASK			0x000000C0
+#define     IPE_FRAME_CNT_MASK			0x00000030
+#define     GRA_FRAME_CNT_MASK			0x0000000C  /* Graphic */
+#define     DMA_FRAME_CNT_MASK			0x00000003  /* Video */
+
+/* SPI Control Register. */
+#define LCD_SPU_SPI_CTRL			0x0180
+#define     CFG_SCLKCNT(div)			((div) << 24)  /* 0xFF~0x2 */
+#define     CFG_SCLKCNT_MASK			0xFF000000
+#define     CFG_RXBITS(rx)			((rx) << 16)   /* 0x1F~0x1 */
+#define     CFG_RXBITS_MASK			0x00FF0000
+#define     CFG_TXBITS(tx)			((tx) << 8)    /* 0x1F~0x1 */
+#define     CFG_TXBITS_MASK			0x0000FF00
+#define     CFG_CLKINV(clk)			((clk) << 7)
+#define     CFG_CLKINV_MASK			0x00000080
+#define     CFG_KEEPXFER(transfer)		((transfer) << 6)
+#define     CFG_KEEPXFER_MASK			0x00000040
+#define     CFG_RXBITSTO0(rx)			((rx) << 5)
+#define     CFG_RXBITSTO0_MASK			0x00000020
+#define     CFG_TXBITSTO0(tx)			((tx) << 4)
+#define     CFG_TXBITSTO0_MASK			0x00000010
+#define     CFG_SPI_ENA(spi)			((spi) << 3)
+#define     CFG_SPI_ENA_MASK			0x00000008
+#define     CFG_SPI_SEL(spi)			((spi) << 2)
+#define     CFG_SPI_SEL_MASK			0x00000004
+#define     CFG_SPI_3W4WB(wire)			((wire) << 1)
+#define     CFG_SPI_3W4WB_MASK			0x00000002
+#define     CFG_SPI_START(start)		(start)
+#define     CFG_SPI_START_MASK			0x00000001
+
+/* SPI Tx Data Register */
+#define LCD_SPU_SPI_TXDATA			0x0184
+
+/*
+   1. Smart Pannel 8-bit Bus Control Register.
+   2. AHB Slave Path Data Port Register
+*/
+#define LCD_SPU_SMPN_CTRL			0x0188
+
+/* DMA Control 0 Register */
+#define LCD_SPU_DMA_CTRL0			0x0190
+#define     CFG_NOBLENDING(nb)			((nb) << 31)
+#define     CFG_NOBLENDING_MASK			0x80000000
+#define     CFG_GAMMA_ENA(gn)			((gn) << 30)
+#define     CFG_GAMMA_ENA_MASK			0x40000000
+#define     CFG_CBSH_ENA(cn)			((cn) << 29)
+#define     CFG_CBSH_ENA_MASK			0x20000000
+#define     CFG_PALETTE_ENA(pn)			((pn) << 28)
+#define     CFG_PALETTE_ENA_MASK		0x10000000
+#define     CFG_ARBFAST_ENA(an)			((an) << 27)
+#define     CFG_ARBFAST_ENA_MASK		0x08000000
+#define     CFG_HWC_1BITMOD(mode)		((mode) << 26)
+#define     CFG_HWC_1BITMOD_MASK		0x04000000
+#define     CFG_HWC_1BITENA(mn)			((mn) << 25)
+#define     CFG_HWC_1BITENA_MASK		0x02000000
+#define     CFG_HWC_ENA(cn)		        ((cn) << 24)
+#define     CFG_HWC_ENA_MASK			0x01000000
+#define     CFG_DMAFORMAT(dmaformat)		((dmaformat) << 20)
+#define     CFG_DMAFORMAT_MASK			0x00F00000
+#define     CFG_GRAFORMAT(graformat)		((graformat) << 16)
+#define     CFG_GRAFORMAT_MASK			0x000F0000
+/* for graphic part */
+#define     CFG_GRA_FTOGGLE(toggle)		((toggle) << 15)
+#define     CFG_GRA_FTOGGLE_MASK		0x00008000
+#define     CFG_GRA_HSMOOTH(smooth)		((smooth) << 14)
+#define     CFG_GRA_HSMOOTH_MASK		0x00004000
+#define     CFG_GRA_TSTMODE(test)		((test) << 13)
+#define     CFG_GRA_TSTMODE_MASK		0x00002000
+#define     CFG_GRA_SWAPRB(swap)		((swap) << 12)
+#define     CFG_GRA_SWAPRB_MASK			0x00001000
+#define     CFG_GRA_SWAPUV(swap)		((swap) << 11)
+#define     CFG_GRA_SWAPUV_MASK			0x00000800
+#define     CFG_GRA_SWAPYU(swap)		((swap) << 10)
+#define     CFG_GRA_SWAPYU_MASK			0x00000400
+#define     CFG_YUV2RGB_GRA(cvrt)		((cvrt) << 9)
+#define     CFG_YUV2RGB_GRA_MASK		0x00000200
+#define     CFG_GRA_ENA(gra)			((gra) << 8)
+#define     CFG_GRA_ENA_MASK			0x00000100
+/* for video part */
+#define     CFG_DMA_FTOGGLE(toggle)		((toggle) << 7)
+#define     CFG_DMA_FTOGGLE_MASK		0x00000080
+#define     CFG_DMA_HSMOOTH(smooth)		((smooth) << 6)
+#define     CFG_DMA_HSMOOTH_MASK		0x00000040
+#define     CFG_DMA_TSTMODE(test)		((test) << 5)
+#define     CFG_DMA_TSTMODE_MASK		0x00000020
+#define     CFG_DMA_SWAPRB(swap)		((swap) << 4)
+#define     CFG_DMA_SWAPRB_MASK			0x00000010
+#define     CFG_DMA_SWAPUV(swap)		((swap) << 3)
+#define     CFG_DMA_SWAPUV_MASK			0x00000008
+#define     CFG_DMA_SWAPYU(swap)		((swap) << 2)
+#define     CFG_DMA_SWAPYU_MASK			0x00000004
+#define     CFG_DMA_SWAP_MASK			0x0000001C
+#define     CFG_YUV2RGB_DMA(cvrt)		((cvrt) << 1)
+#define     CFG_YUV2RGB_DMA_MASK		0x00000002
+#define     CFG_DMA_ENA(video)			(video)
+#define     CFG_DMA_ENA_MASK			0x00000001
+
+/* DMA Control 1 Register */
+#define LCD_SPU_DMA_CTRL1			0x0194
+#define     CFG_FRAME_TRIG(trig)		((trig) << 31)
+#define     CFG_FRAME_TRIG_MASK			0x80000000
+#define     CFG_VSYNC_TRIG(trig)		((trig) << 28)
+#define     CFG_VSYNC_TRIG_MASK			0x70000000
+#define     CFG_VSYNC_INV(inv)			((inv) << 27)
+#define     CFG_VSYNC_INV_MASK			0x08000000
+#define     CFG_COLOR_KEY_MODE(cmode)		((cmode) << 24)
+#define     CFG_COLOR_KEY_MASK			0x07000000
+#define     CFG_CARRY(carry)			((carry) << 23)
+#define     CFG_CARRY_MASK			0x00800000
+#define     CFG_LNBUF_ENA(lnbuf)		((lnbuf) << 22)
+#define     CFG_LNBUF_ENA_MASK			0x00400000
+#define     CFG_GATED_ENA(gated)		((gated) << 21)
+#define     CFG_GATED_ENA_MASK			0x00200000
+#define     CFG_PWRDN_ENA(power)		((power) << 20)
+#define     CFG_PWRDN_ENA_MASK			0x00100000
+#define     CFG_DSCALE(dscale)			((dscale) << 18)
+#define     CFG_DSCALE_MASK			0x000C0000
+#define     CFG_ALPHA_MODE(amode)		((amode) << 16)
+#define     CFG_ALPHA_MODE_MASK			0x00030000
+#define     CFG_ALPHA(alpha)			((alpha) << 8)
+#define     CFG_ALPHA_MASK			0x0000FF00
+#define     CFG_PXLCMD(pxlcmd)			(pxlcmd)
+#define     CFG_PXLCMD_MASK			0x000000FF
+
+/* SRAM Control Register */
+#define LCD_SPU_SRAM_CTRL			0x0198
+#define     CFG_SRAM_INIT_WR_RD(mode)		((mode) << 14)
+#define     CFG_SRAM_INIT_WR_RD_MASK		0x0000C000
+#define     CFG_SRAM_ADDR_LCDID(id)		((id) << 8)
+#define     CFG_SRAM_ADDR_LCDID_MASK		0x00000F00
+#define     CFG_SRAM_ADDR(addr)			(addr)
+#define     CFG_SRAM_ADDR_MASK			0x000000FF
+
+/* SRAM Write Data Register */
+#define LCD_SPU_SRAM_WRDAT			0x019C
+
+/* SRAM RTC/WTC Control Register */
+#define LCD_SPU_SRAM_PARA0			0x01A0
+
+/* SRAM Power Down Control Register */
+#define LCD_SPU_SRAM_PARA1			0x01A4
+#define     CFG_CSB_256x32(hwc)			((hwc) << 15)	/* HWC */
+#define     CFG_CSB_256x32_MASK			0x00008000
+#define     CFG_CSB_256x24(palette)		((palette) << 14)	/* Palette */
+#define     CFG_CSB_256x24_MASK			0x00004000
+#define     CFG_CSB_256x8(gamma)		((gamma) << 13)	/* Gamma */
+#define     CFG_CSB_256x8_MASK			0x00002000
+#define     CFG_PDWN256x32(pdwn)		((pdwn) << 7)	/* HWC */
+#define     CFG_PDWN256x32_MASK			0x00000080
+#define     CFG_PDWN256x24(pdwn)		((pdwn) << 6)	/* Palette */
+#define     CFG_PDWN256x24_MASK			0x00000040
+#define     CFG_PDWN256x8(pdwn)			((pdwn) << 5)	/* Gamma */
+#define     CFG_PDWN256x8_MASK			0x00000020
+#define     CFG_PDWN32x32(pdwn)			((pdwn) << 3)
+#define     CFG_PDWN32x32_MASK			0x00000008
+#define     CFG_PDWN16x66(pdwn)			((pdwn) << 2)
+#define     CFG_PDWN16x66_MASK			0x00000004
+#define     CFG_PDWN32x66(pdwn)			((pdwn) << 1)
+#define     CFG_PDWN32x66_MASK			0x00000002
+#define     CFG_PDWN64x66(pdwn)			(pdwn)
+#define     CFG_PDWN64x66_MASK			0x00000001
+
+/* Smart or Dumb Panel Clock Divider */
+#define LCD_CFG_SCLK_DIV			0x01A8
+#define     SCLK_SOURCE_SELECT(src)		((src) << 31)
+#define     SCLK_SOURCE_SELECT_MASK		0x80000000
+#define     CLK_FRACDIV(frac)			((frac) << 16)
+#define     CLK_FRACDIV_MASK			0x0FFF0000
+#define     CLK_INT_DIV(div)			(div)
+#define     CLK_INT_DIV_MASK			0x0000FFFF
+
+/* Video Contrast Register */
+#define LCD_SPU_CONTRAST			0x01AC
+#define     CFG_BRIGHTNESS(bright)		((bright) << 16)
+#define     CFG_BRIGHTNESS_MASK			0xFFFF0000
+#define     CFG_CONTRAST(contrast)		(contrast)
+#define     CFG_CONTRAST_MASK			0x0000FFFF
+
+/* Video Saturation Register */
+#define LCD_SPU_SATURATION			0x01B0
+#define     CFG_C_MULTS(mult)			((mult) << 16)
+#define     CFG_C_MULTS_MASK			0xFFFF0000
+#define     CFG_SATURATION(sat)			(sat)
+#define     CFG_SATURATION_MASK			0x0000FFFF
+
+/* Video Hue Adjust Register */
+#define LCD_SPU_CBSH_HUE			0x01B4
+#define     CFG_SIN0(sin0)			((sin0) << 16)
+#define     CFG_SIN0_MASK			0xFFFF0000
+#define     CFG_COS0(con0)			(con0)
+#define     CFG_COS0_MASK			0x0000FFFF
+
+/* Dump LCD Panel Control Register */
+#define LCD_SPU_DUMB_CTRL			0x01B8
+#define     CFG_DUMBMODE(mode)			((mode) << 28)
+#define     CFG_DUMBMODE_MASK			0xF0000000
+#define     CFG_LCDGPIO_O(data)			((data) << 20)
+#define     CFG_LCDGPIO_O_MASK			0x0FF00000
+#define     CFG_LCDGPIO_ENA(gpio)		((gpio) << 12)
+#define     CFG_LCDGPIO_ENA_MASK		0x000FF000
+#define     CFG_BIAS_OUT(bias)			((bias) << 8)
+#define     CFG_BIAS_OUT_MASK			0x00000100
+#define     CFG_REVERSE_RGB(rRGB)		((rRGB) << 7)
+#define     CFG_REVERSE_RGB_MASK		0x00000080
+#define     CFG_INV_COMPBLANK(blank)		((blank) << 6)
+#define     CFG_INV_COMPBLANK_MASK		0x00000040
+#define     CFG_INV_COMPSYNC(sync)		((sync) << 5)
+#define     CFG_INV_COMPSYNC_MASK		0x00000020
+#define     CFG_INV_HENA(hena)			((hena) << 4)
+#define     CFG_INV_HENA_MASK			0x00000010
+#define     CFG_INV_VSYNC(vsync)		((vsync) << 3)
+#define     CFG_INV_VSYNC_MASK			0x00000008
+#define     CFG_INV_HSYNC(hsync)		((hsync) << 2)
+#define     CFG_INV_HSYNC_MASK			0x00000004
+#define     CFG_INV_PCLK(pclk)			((pclk) << 1)
+#define     CFG_INV_PCLK_MASK			0x00000002
+#define     CFG_DUMB_ENA(dumb)			(dumb)
+#define     CFG_DUMB_ENA_MASK			0x00000001
+
+/* LCD I/O Pads Control Register */
+#define SPU_IOPAD_CONTROL			0x01BC
+#define     CFG_GRA_VM_ENA(vm)			((vm) << 15)        /* gfx */
+#define     CFG_GRA_VM_ENA_MASK			0x00008000
+#define     CFG_DMA_VM_ENA(vm)			((vm) << 13)	/* video */
+#define     CFG_DMA_VM_ENA_MASK			0x00002000
+#define     CFG_CMD_VM_ENA(vm)			((vm) << 13)
+#define     CFG_CMD_VM_ENA_MASK			0x00000800
+#define     CFG_CSC(csc)			((csc) << 8)	/* csc */
+#define     CFG_CSC_MASK			0x00000300
+#define     CFG_AXICTRL(axi)			((axi) << 4)
+#define     CFG_AXICTRL_MASK			0x000000F0
+#define     CFG_IOPADMODE(iopad)		(iopad)
+#define     CFG_IOPADMODE_MASK			0x0000000F
+
+/* LCD Interrupt Control Register */
+#define SPU_IRQ_ENA				0x01C0
+#define     DMA_FRAME_IRQ0_ENA(irq)		((irq) << 31)
+#define     DMA_FRAME_IRQ0_ENA_MASK		0x80000000
+#define     DMA_FRAME_IRQ1_ENA(irq)		((irq) << 30)
+#define     DMA_FRAME_IRQ1_ENA_MASK		0x40000000
+#define     DMA_FF_UNDERFLOW_ENA(ff)		((ff) << 29)
+#define     DMA_FF_UNDERFLOW_ENA_MASK		0x20000000
+#define     GRA_FRAME_IRQ0_ENA(irq)		((irq) << 27)
+#define     GRA_FRAME_IRQ0_ENA_MASK		0x08000000
+#define     GRA_FRAME_IRQ1_ENA(irq)		((irq) << 26)
+#define     GRA_FRAME_IRQ1_ENA_MASK		0x04000000
+#define     GRA_FF_UNDERFLOW_ENA(ff)		((ff) << 25)
+#define     GRA_FF_UNDERFLOW_ENA_MASK		0x02000000
+#define     VSYNC_IRQ_ENA(vsync_irq)		((vsync_irq) << 23)
+#define     VSYNC_IRQ_ENA_MASK			0x00800000
+#define     DUMB_FRAMEDONE_ENA(fdone)		((fdone) << 22)
+#define     DUMB_FRAMEDONE_ENA_MASK		0x00400000
+#define     TWC_FRAMEDONE_ENA(fdone)		((fdone) << 21)
+#define     TWC_FRAMEDONE_ENA_MASK		0x00200000
+#define     HWC_FRAMEDONE_ENA(fdone)		((fdone) << 20)
+#define     HWC_FRAMEDONE_ENA_MASK		0x00100000
+#define     SLV_IRQ_ENA(irq)			((irq) << 19)
+#define     SLV_IRQ_ENA_MASK			0x00080000
+#define     SPI_IRQ_ENA(irq)			((irq) << 18)
+#define     SPI_IRQ_ENA_MASK			0x00040000
+#define     PWRDN_IRQ_ENA(irq)			((irq) << 17)
+#define     PWRDN_IRQ_ENA_MASK			0x00020000
+#define     ERR_IRQ_ENA(irq)			((irq) << 16)
+#define     ERR_IRQ_ENA_MASK			0x00010000
+#define     CLEAN_SPU_IRQ_ISR(irq)		(irq)
+#define     CLEAN_SPU_IRQ_ISR_MASK		0x0000FFFF
+
+/* LCD Interrupt Status Register */
+#define SPU_IRQ_ISR				0x01C4
+#define     DMA_FRAME_IRQ0(irq)			((irq) << 31)
+#define     DMA_FRAME_IRQ0_MASK			0x80000000
+#define     DMA_FRAME_IRQ1(irq)			((irq) << 30)
+#define     DMA_FRAME_IRQ1_MASK			0x40000000
+#define     DMA_FF_UNDERFLOW(ff)		((ff) << 29)
+#define     DMA_FF_UNDERFLOW_MASK		0x20000000
+#define     GRA_FRAME_IRQ0(irq)			((irq) << 27)
+#define     GRA_FRAME_IRQ0_MASK			0x08000000
+#define     GRA_FRAME_IRQ1(irq)			((irq) << 26)
+#define     GRA_FRAME_IRQ1_MASK			0x04000000
+#define     GRA_FF_UNDERFLOW(ff)		((ff) << 25)
+#define     GRA_FF_UNDERFLOW_MASK		0x02000000
+#define     VSYNC_IRQ(vsync_irq)		((vsync_irq) << 23)
+#define     VSYNC_IRQ_MASK			0x00800000
+#define     DUMB_FRAMEDONE(fdone)		((fdone) << 22)
+#define     DUMB_FRAMEDONE_MASK			0x00400000
+#define     TWC_FRAMEDONE(fdone)		((fdone) << 21)
+#define     TWC_FRAMEDONE_MASK			0x00200000
+#define     HWC_FRAMEDONE(fdone)		((fdone) << 20)
+#define     HWC_FRAMEDONE_MASK			0x00100000
+#define     SLV_IRQ(irq)			((irq) << 19)
+#define     SLV_IRQ_MASK			0x00080000
+#define     SPI_IRQ(irq)			((irq) << 18)
+#define     SPI_IRQ_MASK			0x00040000
+#define     PWRDN_IRQ(irq)			((irq) << 17)
+#define     PWRDN_IRQ_MASK			0x00020000
+#define     ERR_IRQ(irq)			((irq) << 16)
+#define     ERR_IRQ_MASK			0x00010000
+/* read-only */
+#define     DMA_FRAME_IRQ0_LEVEL_MASK		0x00008000
+#define     DMA_FRAME_IRQ1_LEVEL_MASK		0x00004000
+#define     DMA_FRAME_CNT_ISR_MASK		0x00003000
+#define     GRA_FRAME_IRQ0_LEVEL_MASK		0x00000800
+#define     GRA_FRAME_IRQ1_LEVEL_MASK		0x00000400
+#define     GRA_FRAME_CNT_ISR_MASK		0x00000300
+#define     VSYNC_IRQ_LEVEL_MASK		0x00000080
+#define     DUMB_FRAMEDONE_LEVEL_MASK		0x00000040
+#define     TWC_FRAMEDONE_LEVEL_MASK		0x00000020
+#define     HWC_FRAMEDONE_LEVEL_MASK		0x00000010
+#define     SLV_FF_EMPTY_MASK			0x00000008
+#define     DMA_FF_ALLEMPTY_MASK		0x00000004
+#define     GRA_FF_ALLEMPTY_MASK		0x00000002
+#define     PWRDN_IRQ_LEVEL_MASK		0x00000001
+
+
+/*
+ * defined Video Memory Color format for DMA control 0 register
+ * DMA0 bit[23:20]
+ */
+#define VMODE_RGB565		0x0
+#define VMODE_RGB1555		0x1
+#define VMODE_RGB888PACKED	0x2
+#define VMODE_RGB888UNPACKED	0x3
+#define VMODE_RGBA888		0x4
+#define VMODE_YUV422PACKED	0x5
+#define VMODE_YUV422PLANAR	0x6
+#define VMODE_YUV420PLANAR	0x7
+#define VMODE_SMPNCMD		0x8
+#define VMODE_PALETTE4BIT	0x9
+#define VMODE_PALETTE8BIT	0xa
+#define VMODE_RESERVED		0xb
+
+/*
+ * defined Graphic Memory Color format for DMA control 0 register
+ * DMA0 bit[19:16]
+ */
+#define GMODE_RGB565		0x0
+#define GMODE_RGB1555		0x1
+#define GMODE_RGB888PACKED	0x2
+#define GMODE_RGB888UNPACKED	0x3
+#define GMODE_RGBA888		0x4
+#define GMODE_YUV422PACKED	0x5
+#define GMODE_YUV422PLANAR	0x6
+#define GMODE_YUV420PLANAR	0x7
+#define GMODE_SMPNCMD		0x8
+#define GMODE_PALETTE4BIT	0x9
+#define GMODE_PALETTE8BIT	0xa
+#define GMODE_RESERVED		0xb
+
+/*
+ * define for DMA control 1 register
+ */
+#define DMA1_FRAME_TRIG		31 /* bit location */
+#define DMA1_VSYNC_MODE		28
+#define DMA1_VSYNC_INV		27
+#define DMA1_CKEY		24
+#define DMA1_CARRY		23
+#define DMA1_LNBUF_ENA		22
+#define DMA1_GATED_ENA		21
+#define DMA1_PWRDN_ENA		20
+#define DMA1_DSCALE		18
+#define DMA1_ALPHA_MODE		16
+#define DMA1_ALPHA		08
+#define DMA1_PXLCMD		00
+
+/*
+ * defined for Configure Dumb Mode
+ * DUMB LCD Panel bit[31:28]
+ */
+#define DUMB16_RGB565_0		0x0
+#define DUMB16_RGB565_1		0x1
+#define DUMB18_RGB666_0		0x2
+#define DUMB18_RGB666_1		0x3
+#define DUMB12_RGB444_0		0x4
+#define DUMB12_RGB444_1		0x5
+#define DUMB24_RGB888_0		0x6
+#define DUMB_BLANK		0x7
+
+/*
+ * defined for Configure I/O Pin Allocation Mode
+ * LCD LCD I/O Pads control register bit[3:0]
+ */
+#define IOPAD_DUMB24		0x0
+#define IOPAD_DUMB18SPI		0x1
+#define IOPAD_DUMB18GPIO	0x2
+#define IOPAD_DUMB16SPI		0x3
+#define IOPAD_DUMB16GPIO	0x4
+#define IOPAD_DUMB12		0x5
+#define IOPAD_SMART18SPI	0x6
+#define IOPAD_SMART16SPI	0x7
+#define IOPAD_SMART8BOTH	0x8
+
+#endif /* __PXA168FB_H__ */
diff --git a/include/video/pxa168fb.h b/include/video/pxa168fb.h
new file mode 100644
index 00000000000..b5cc72fe046
--- /dev/null
+++ b/include/video/pxa168fb.h
@@ -0,0 +1,127 @@
+/*
+ * linux/arch/arm/mach-mmp/include/mach/pxa168fb.h
+ *
+ *  Copyright (C) 2009 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MACH_PXA168FB_H
+#define __ASM_MACH_PXA168FB_H
+
+#include <linux/fb.h>
+#include <linux/interrupt.h>
+
+/* Dumb interface */
+#define PIN_MODE_DUMB_24		0
+#define PIN_MODE_DUMB_18_SPI		1
+#define PIN_MODE_DUMB_18_GPIO		2
+#define PIN_MODE_DUMB_16_SPI		3
+#define PIN_MODE_DUMB_16_GPIO		4
+#define PIN_MODE_DUMB_12_SPI_GPIO	5
+#define PIN_MODE_SMART_18_SPI		6
+#define PIN_MODE_SMART_16_SPI		7
+#define PIN_MODE_SMART_8_SPI_GPIO	8
+
+/* Dumb interface pin allocation */
+#define DUMB_MODE_RGB565		0
+#define DUMB_MODE_RGB565_UPPER		1
+#define DUMB_MODE_RGB666		2
+#define DUMB_MODE_RGB666_UPPER		3
+#define DUMB_MODE_RGB444		4
+#define DUMB_MODE_RGB444_UPPER		5
+#define DUMB_MODE_RGB888		6
+
+/* default fb buffer size WVGA-32bits */
+#define DEFAULT_FB_SIZE	(800 * 480 * 4)
+
+/*
+ * Buffer pixel format
+ * bit0 is for rb swap.
+ * bit12 is for Y UorV swap
+ */
+#define PIX_FMT_RGB565		0
+#define PIX_FMT_BGR565		1
+#define PIX_FMT_RGB1555		2
+#define PIX_FMT_BGR1555		3
+#define PIX_FMT_RGB888PACK	4
+#define PIX_FMT_BGR888PACK	5
+#define PIX_FMT_RGB888UNPACK	6
+#define PIX_FMT_BGR888UNPACK	7
+#define PIX_FMT_RGBA888		8
+#define PIX_FMT_BGRA888		9
+#define PIX_FMT_YUV422PACK	10
+#define PIX_FMT_YVU422PACK	11
+#define PIX_FMT_YUV422PLANAR	12
+#define PIX_FMT_YVU422PLANAR	13
+#define PIX_FMT_YUV420PLANAR	14
+#define PIX_FMT_YVU420PLANAR	15
+#define PIX_FMT_PSEUDOCOLOR	20
+#define PIX_FMT_UYVY422PACK	(0x1000|PIX_FMT_YUV422PACK)
+
+/*
+ * PXA LCD controller private state.
+ */
+struct pxa168fb_info {
+	struct device		*dev;
+	struct clk		*clk;
+	struct fb_info		*info;
+
+	void __iomem		*reg_base;
+	dma_addr_t		fb_start_dma;
+	u32			pseudo_palette[16];
+
+	int			pix_fmt;
+	unsigned		is_blanked:1;
+	unsigned		panel_rbswap:1;
+	unsigned		active:1;
+};
+
+/*
+ * PXA fb machine information
+ */
+struct pxa168fb_mach_info {
+	char	id[16];
+
+	int		num_modes;
+	struct fb_videomode *modes;
+
+	/*
+	 * Pix_fmt
+	 */
+	unsigned	pix_fmt;
+
+	/*
+	 * I/O pin allocation.
+	 */
+	unsigned	io_pin_allocation_mode:4;
+
+	/*
+	 * Dumb panel -- assignment of R/G/B component info to the 24
+	 * available external data lanes.
+	 */
+	unsigned	dumb_mode:4;
+	unsigned	panel_rgb_reverse_lanes:1;
+
+	/*
+	 * Dumb panel -- GPIO output data.
+	 */
+	unsigned	gpio_output_mask:8;
+	unsigned	gpio_output_data:8;
+
+	/*
+	 * Dumb panel -- configurable output signal polarity.
+	 */
+	unsigned	invert_composite_blank:1;
+	unsigned	invert_pix_val_ena:1;
+	unsigned	invert_pixclock:1;
+	unsigned	invert_vsync:1;
+	unsigned	invert_hsync:1;
+	unsigned	panel_rbswap:1;
+	unsigned	active:1;
+	unsigned	enable_lcd:1;
+};
+
+#endif /* __ASM_MACH_PXA168FB_H */
-- 
cgit v1.2.3-70-g09d2


From 9e4f5e29610162fd426366f3b29e3cc6e575b858 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Thu, 26 Mar 2009 13:33:19 -0400
Subject: [SCSI] FC Pass Thru support

Attached is the ELS/CT pass-thru patch for the FC Transport. The patch
creates a generic framework that lays on top of bsg and the SGIO v4 ioctl
in order to pass transaction requests to LLDD's.

The interface supports the following operations:
  On an fc_host basis:
    Request login to the specified N_Port_ID, creating an fc_rport.
    Request logout of the specified N_Port_ID, deleting an fc_rport
    Send ELS request to specified N_Port_ID w/o requiring a login, and
      wait for ELS response.
    Send CT request to specified N_Port_ID and wait for CT response.
      Login is required, but LLDD is allowed to manage login and decide
      whether it stays in place after the request is satisfied.
    Vendor-Unique request. Allows a LLDD-specific request to be passed
      to the LLDD, and the passing of a response back to the application.
  On an fc_rport basis:
    Send ELS request to nport and wait for ELS response.
    Send CT request to nport and wait for CT response.

The patch also exports several headers from include/scsi such that
they can be available to user-space applications:
  include/scsi/scsi.h
  include/scsi/scsi_netlink.h
  include/scsi/scsi_netlink_fc.h
  include/scsi/scsi_bsg_fc.h

For further information, refer to the last RFC:
http://marc.info/?l=linux-scsi&m=123436574018579&w=2

Note: Documentation is still spotty and will be added later.

[bharrosh@panasas.com: update for new block API]
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 Documentation/scsi/scsi_fc_transport.txt |  14 +-
 Documentation/scsi/scsi_mid_low_api.txt  |   5 +
 drivers/scsi/scsi_transport_fc.c         | 614 ++++++++++++++++++++++++++++++-
 include/Kbuild                           |   1 +
 include/scsi/Kbuild                      |   4 +
 include/scsi/scsi_bsg_fc.h               | 322 ++++++++++++++++
 include/scsi/scsi_host.h                 |   9 +
 include/scsi/scsi_transport_fc.h         |  52 ++-
 8 files changed, 1016 insertions(+), 5 deletions(-)
 create mode 100644 include/scsi/Kbuild
 create mode 100644 include/scsi/scsi_bsg_fc.h

(limited to 'include')

diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index e5b071d4661..d7f181701dc 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -1,10 +1,11 @@
                              SCSI FC Tansport
                  =============================================
 
-Date:  4/12/2007
+Date:  11/18/2008
 Kernel Revisions for features:
   rports : <<TBS>>
-  vports : 2.6.22 (? TBD)
+  vports : 2.6.22
+  bsg support : 2.6.30 (?TBD?)
 
 
 Introduction
@@ -15,6 +16,7 @@ The FC transport can be found at:
   drivers/scsi/scsi_transport_fc.c
   include/scsi/scsi_transport_fc.h
   include/scsi/scsi_netlink_fc.h
+  include/scsi/scsi_bsg_fc.h
 
 This file is found at Documentation/scsi/scsi_fc_transport.txt
 
@@ -472,6 +474,14 @@ int
 fc_vport_terminate(struct fc_vport *vport)
 
 
+FC BSG support (CT & ELS passthru, and more)
+========================================================================
+<< To Be Supplied >>
+
+
+
+
+
 Credits
 =======
 The following people have contributed to this document:
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index a6d5354639b..de67229251d 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -1271,6 +1271,11 @@ of interest:
     hostdata[0]  - area reserved for LLD at end of struct Scsi_Host. Size
                    is set by the second argument (named 'xtr_bytes') to
                    scsi_host_alloc() or scsi_register().
+    vendor_id    - a unique value that identifies the vendor supplying
+                   the LLD for the Scsi_Host.  Used most often in validating
+                   vendor-specific message requests.  Value consists of an
+                   identifier type and a vendor-specific value.
+                   See scsi_netlink.h for a description of valid formats.
 
 The scsi_host structure is defined in include/scsi/scsi_host.h
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index a152f89ae51..3f64d93b6c8 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -35,6 +35,7 @@
 #include <linux/netlink.h>
 #include <net/netlink.h>
 #include <scsi/scsi_netlink_fc.h>
+#include <scsi/scsi_bsg_fc.h>
 #include "scsi_priv.h"
 #include "scsi_transport_fc_internal.h"
 
@@ -43,6 +44,10 @@ static void fc_vport_sched_delete(struct work_struct *work);
 static int fc_vport_setup(struct Scsi_Host *shost, int channel,
 	struct device *pdev, struct fc_vport_identifiers  *ids,
 	struct fc_vport **vport);
+static int fc_bsg_hostadd(struct Scsi_Host *, struct fc_host_attrs *);
+static int fc_bsg_rportadd(struct Scsi_Host *, struct fc_rport *);
+static void fc_bsg_remove(struct request_queue *);
+static void fc_bsg_goose_queue(struct fc_rport *);
 
 /*
  * Redefine so that we can have same named attributes in the
@@ -411,13 +416,26 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 		return -ENOMEM;
 	}
 
+	fc_bsg_hostadd(shost, fc_host);
+	/* ignore any bsg add error - we just can't do sgio */
+
+	return 0;
+}
+
+static int fc_host_remove(struct transport_container *tc, struct device *dev,
+			 struct device *cdev)
+{
+	struct Scsi_Host *shost = dev_to_shost(dev);
+	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
+
+	fc_bsg_remove(fc_host->rqst_q);
 	return 0;
 }
 
 static DECLARE_TRANSPORT_CLASS(fc_host_class,
 			       "fc_host",
 			       fc_host_setup,
-			       NULL,
+			       fc_host_remove,
 			       NULL);
 
 /*
@@ -2375,6 +2393,7 @@ fc_rport_final_delete(struct work_struct *work)
 		scsi_flush_work(shost);
 
 	fc_terminate_rport_io(rport);
+
 	/*
 	 * Cancel any outstanding timers. These should really exist
 	 * only when rmmod'ing the LLDD and we're asking for
@@ -2407,6 +2426,8 @@ fc_rport_final_delete(struct work_struct *work)
 	    (i->f->dev_loss_tmo_callbk))
 		i->f->dev_loss_tmo_callbk(rport);
 
+	fc_bsg_remove(rport->rqst_q);
+
 	transport_remove_device(dev);
 	device_del(dev);
 	transport_destroy_device(dev);
@@ -2494,6 +2515,9 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 	transport_add_device(dev);
 	transport_configure_device(dev);
 
+	fc_bsg_rportadd(shost, rport);
+	/* ignore any bsg add error - we just can't do sgio */
+
 	if (rport->roles & FC_PORT_ROLE_FCP_TARGET) {
 		/* initiate a scan of the target */
 		rport->flags |= FC_RPORT_SCAN_PENDING;
@@ -2658,6 +2682,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 					spin_unlock_irqrestore(shost->host_lock,
 							flags);
 
+				fc_bsg_goose_queue(rport);
+
 				return rport;
 			}
 		}
@@ -3343,6 +3369,592 @@ fc_vport_sched_delete(struct work_struct *work)
 }
 
 
+/*
+ * BSG support
+ */
+
+
+/**
+ * fc_destroy_bsgjob - routine to teardown/delete a fc bsg job
+ * @job:	fc_bsg_job that is to be torn down
+ */
+static void
+fc_destroy_bsgjob(struct fc_bsg_job *job)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&job->job_lock, flags);
+	if (job->ref_cnt) {
+		spin_unlock_irqrestore(&job->job_lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&job->job_lock, flags);
+
+	put_device(job->dev);	/* release reference for the request */
+
+	kfree(job->request_payload.sg_list);
+	kfree(job->reply_payload.sg_list);
+	kfree(job);
+}
+
+
+/**
+ * fc_bsg_jobdone - completion routine for bsg requests that the LLD has
+ *                  completed
+ * @job:	fc_bsg_job that is complete
+ */
+static void
+fc_bsg_jobdone(struct fc_bsg_job *job)
+{
+	struct request *req = job->req;
+	struct request *rsp = req->next_rq;
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&job->job_lock, flags);
+	job->state_flags |= FC_RQST_STATE_DONE;
+	job->ref_cnt--;
+	spin_unlock_irqrestore(&job->job_lock, flags);
+
+	err = job->req->errors = job->reply->result;
+	if (err < 0)
+		/* we're only returning the result field in the reply */
+		job->req->sense_len = sizeof(uint32_t);
+	else
+		job->req->sense_len = job->reply_len;
+
+	/* we assume all request payload was transferred, residual == 0 */
+	req->resid_len = 0;
+
+	if (rsp) {
+		WARN_ON(job->reply->reply_payload_rcv_len > rsp->resid_len);
+
+		/* set reply (bidi) residual */
+		rsp->resid_len -= min(job->reply->reply_payload_rcv_len,
+				      rsp->resid_len);
+	}
+
+	blk_end_request_all(req, err);
+
+	fc_destroy_bsgjob(job);
+}
+
+
+/**
+ * fc_bsg_job_timeout - handler for when a bsg request timesout
+ * @req:	request that timed out
+ */
+static enum blk_eh_timer_return
+fc_bsg_job_timeout(struct request *req)
+{
+	struct fc_bsg_job *job = (void *) req->special;
+	struct Scsi_Host *shost = job->shost;
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+	unsigned long flags;
+	int err = 0, done = 0;
+
+	if (job->rport && job->rport->port_state == FC_PORTSTATE_BLOCKED)
+		return BLK_EH_RESET_TIMER;
+
+	spin_lock_irqsave(&job->job_lock, flags);
+	if (job->state_flags & FC_RQST_STATE_DONE)
+		done = 1;
+	else
+		job->ref_cnt++;
+	spin_unlock_irqrestore(&job->job_lock, flags);
+
+	if (!done && i->f->bsg_timeout) {
+		/* call LLDD to abort the i/o as it has timed out */
+		err = i->f->bsg_timeout(job);
+		if (err)
+			printk(KERN_ERR "ERROR: FC BSG request timeout - LLD "
+				"abort failed with status %d\n", err);
+	}
+
+	if (!done) {
+		spin_lock_irqsave(&job->job_lock, flags);
+		job->ref_cnt--;
+		spin_unlock_irqrestore(&job->job_lock, flags);
+		fc_destroy_bsgjob(job);
+	}
+
+	/* the blk_end_sync_io() doesn't check the error */
+	return BLK_EH_HANDLED;
+}
+
+
+
+static int
+fc_bsg_map_buffer(struct fc_bsg_buffer *buf, struct request *req)
+{
+	size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+
+	BUG_ON(!req->nr_phys_segments);
+
+	buf->sg_list = kzalloc(sz, GFP_KERNEL);
+	if (!buf->sg_list)
+		return -ENOMEM;
+	sg_init_table(buf->sg_list, req->nr_phys_segments);
+	buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+	buf->payload_len = blk_rq_bytes(req);
+	return 0;
+}
+
+
+/**
+ * fc_req_to_bsgjob - Allocate/create the fc_bsg_job structure for the
+ *                   bsg request
+ * @shost:	SCSI Host corresponding to the bsg object
+ * @rport:	(optional) FC Remote Port corresponding to the bsg object
+ * @req:	BSG request that needs a job structure
+ */
+static int
+fc_req_to_bsgjob(struct Scsi_Host *shost, struct fc_rport *rport,
+	struct request *req)
+{
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+	struct request *rsp = req->next_rq;
+	struct fc_bsg_job *job;
+	int ret;
+
+	BUG_ON(req->special);
+
+	job = kzalloc(sizeof(struct fc_bsg_job) + i->f->dd_bsg_size,
+			GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	/*
+	 * Note: this is a bit silly.
+	 * The request gets formatted as a SGIO v4 ioctl request, which
+	 * then gets reformatted as a blk request, which then gets
+	 * reformatted as a fc bsg request. And on completion, we have
+	 * to wrap return results such that SGIO v4 thinks it was a scsi
+	 * status.  I hope this was all worth it.
+	 */
+
+	req->special = job;
+	job->shost = shost;
+	job->rport = rport;
+	job->req = req;
+	if (i->f->dd_bsg_size)
+		job->dd_data = (void *)&job[1];
+	spin_lock_init(&job->job_lock);
+	job->request = (struct fc_bsg_request *)req->cmd;
+	job->request_len = req->cmd_len;
+	job->reply = req->sense;
+	job->reply_len = SCSI_SENSE_BUFFERSIZE;	/* Size of sense buffer
+						 * allocated */
+	if (req->bio) {
+		ret = fc_bsg_map_buffer(&job->request_payload, req);
+		if (ret)
+			goto failjob_rls_job;
+	}
+	if (rsp && rsp->bio) {
+		ret = fc_bsg_map_buffer(&job->reply_payload, rsp);
+		if (ret)
+			goto failjob_rls_rqst_payload;
+	}
+	job->job_done = fc_bsg_jobdone;
+	if (rport)
+		job->dev = &rport->dev;
+	else
+		job->dev = &shost->shost_gendev;
+	get_device(job->dev);		/* take a reference for the request */
+
+	job->ref_cnt = 1;
+
+	return 0;
+
+
+failjob_rls_rqst_payload:
+	kfree(job->request_payload.sg_list);
+failjob_rls_job:
+	kfree(job);
+	return -ENOMEM;
+}
+
+
+enum fc_dispatch_result {
+	FC_DISPATCH_BREAK,	/* on return, q is locked, break from q loop */
+	FC_DISPATCH_LOCKED,	/* on return, q is locked, continue on */
+	FC_DISPATCH_UNLOCKED,	/* on return, q is unlocked, continue on */
+};
+
+
+/**
+ * fc_bsg_host_dispatch - process fc host bsg requests and dispatch to LLDD
+ * @shost:	scsi host rport attached to
+ * @job:	bsg job to be processed
+ */
+static enum fc_dispatch_result
+fc_bsg_host_dispatch(struct request_queue *q, struct Scsi_Host *shost,
+			 struct fc_bsg_job *job)
+{
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+	int cmdlen = sizeof(uint32_t);	/* start with length of msgcode */
+	int ret;
+
+	/* Validate the host command */
+	switch (job->request->msgcode) {
+	case FC_BSG_HST_ADD_RPORT:
+		cmdlen += sizeof(struct fc_bsg_host_add_rport);
+		break;
+
+	case FC_BSG_HST_DEL_RPORT:
+		cmdlen += sizeof(struct fc_bsg_host_del_rport);
+		break;
+
+	case FC_BSG_HST_ELS_NOLOGIN:
+		cmdlen += sizeof(struct fc_bsg_host_els);
+		/* there better be a xmt and rcv payloads */
+		if ((!job->request_payload.payload_len) ||
+		    (!job->reply_payload.payload_len)) {
+			ret = -EINVAL;
+			goto fail_host_msg;
+		}
+		break;
+
+	case FC_BSG_HST_CT:
+		cmdlen += sizeof(struct fc_bsg_host_ct);
+		/* there better be xmt and rcv payloads */
+		if ((!job->request_payload.payload_len) ||
+		    (!job->reply_payload.payload_len)) {
+			ret = -EINVAL;
+			goto fail_host_msg;
+		}
+		break;
+
+	case FC_BSG_HST_VENDOR:
+		cmdlen += sizeof(struct fc_bsg_host_vendor);
+		if ((shost->hostt->vendor_id == 0L) ||
+		    (job->request->rqst_data.h_vendor.vendor_id !=
+			shost->hostt->vendor_id)) {
+			ret = -ESRCH;
+			goto fail_host_msg;
+		}
+		break;
+
+	default:
+		ret = -EBADR;
+		goto fail_host_msg;
+	}
+
+	/* check if we really have all the request data needed */
+	if (job->request_len < cmdlen) {
+		ret = -ENOMSG;
+		goto fail_host_msg;
+	}
+
+	ret = i->f->bsg_request(job);
+	if (!ret)
+		return FC_DISPATCH_UNLOCKED;
+
+fail_host_msg:
+	/* return the errno failure code as the only status */
+	BUG_ON(job->reply_len < sizeof(uint32_t));
+	job->reply->result = ret;
+	job->reply_len = sizeof(uint32_t);
+	fc_bsg_jobdone(job);
+	return FC_DISPATCH_UNLOCKED;
+}
+
+
+/*
+ * fc_bsg_goose_queue - restart rport queue in case it was stopped
+ * @rport:	rport to be restarted
+ */
+static void
+fc_bsg_goose_queue(struct fc_rport *rport)
+{
+	int flagset;
+
+	if (!rport->rqst_q)
+		return;
+
+	get_device(&rport->dev);
+
+	spin_lock(rport->rqst_q->queue_lock);
+	flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
+		  !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
+	if (flagset)
+		queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
+	__blk_run_queue(rport->rqst_q);
+	if (flagset)
+		queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
+	spin_unlock(rport->rqst_q->queue_lock);
+
+	put_device(&rport->dev);
+}
+
+
+/**
+ * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
+ * @shost:	scsi host rport attached to
+ * @rport:	rport request destined to
+ * @job:	bsg job to be processed
+ */
+static enum fc_dispatch_result
+fc_bsg_rport_dispatch(struct request_queue *q, struct Scsi_Host *shost,
+			 struct fc_rport *rport, struct fc_bsg_job *job)
+{
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+	int cmdlen = sizeof(uint32_t);	/* start with length of msgcode */
+	int ret;
+
+	/* Validate the rport command */
+	switch (job->request->msgcode) {
+	case FC_BSG_RPT_ELS:
+		cmdlen += sizeof(struct fc_bsg_rport_els);
+		goto check_bidi;
+
+	case FC_BSG_RPT_CT:
+		cmdlen += sizeof(struct fc_bsg_rport_ct);
+check_bidi:
+		/* there better be xmt and rcv payloads */
+		if ((!job->request_payload.payload_len) ||
+		    (!job->reply_payload.payload_len)) {
+			ret = -EINVAL;
+			goto fail_rport_msg;
+		}
+		break;
+	default:
+		ret = -EBADR;
+		goto fail_rport_msg;
+	}
+
+	/* check if we really have all the request data needed */
+	if (job->request_len < cmdlen) {
+		ret = -ENOMSG;
+		goto fail_rport_msg;
+	}
+
+	ret = i->f->bsg_request(job);
+	if (!ret)
+		return FC_DISPATCH_UNLOCKED;
+
+fail_rport_msg:
+	/* return the errno failure code as the only status */
+	BUG_ON(job->reply_len < sizeof(uint32_t));
+	job->reply->result = ret;
+	job->reply_len = sizeof(uint32_t);
+	fc_bsg_jobdone(job);
+	return FC_DISPATCH_UNLOCKED;
+}
+
+
+/**
+ * fc_bsg_request_handler - generic handler for bsg requests
+ * @q:		request queue to manage
+ * @shost:	Scsi_Host related to the bsg object
+ * @rport:	FC remote port related to the bsg object (optional)
+ * @dev:	device structure for bsg object
+ */
+static void
+fc_bsg_request_handler(struct request_queue *q, struct Scsi_Host *shost,
+		       struct fc_rport *rport, struct device *dev)
+{
+	struct request *req;
+	struct fc_bsg_job *job;
+	enum fc_dispatch_result ret;
+
+	if (!get_device(dev))
+		return;
+
+	while (!blk_queue_plugged(q)) {
+		if (rport && (rport->port_state == FC_PORTSTATE_BLOCKED))
+				break;
+
+		req = blk_fetch_request(q);
+		if (!req)
+			break;
+
+		if (rport && (rport->port_state != FC_PORTSTATE_ONLINE)) {
+			req->errors = -ENXIO;
+			spin_unlock_irq(q->queue_lock);
+			blk_end_request(req, -ENXIO, blk_rq_bytes(req));
+			spin_lock_irq(q->queue_lock);
+			continue;
+		}
+
+		spin_unlock_irq(q->queue_lock);
+
+		ret = fc_req_to_bsgjob(shost, rport, req);
+		if (ret) {
+			req->errors = ret;
+			blk_end_request(req, ret, blk_rq_bytes(req));
+			spin_lock_irq(q->queue_lock);
+			continue;
+		}
+
+		job = req->special;
+
+		/* check if we have the msgcode value at least */
+		if (job->request_len < sizeof(uint32_t)) {
+			BUG_ON(job->reply_len < sizeof(uint32_t));
+			job->reply->result = -ENOMSG;
+			job->reply_len = sizeof(uint32_t);
+			fc_bsg_jobdone(job);
+			spin_lock_irq(q->queue_lock);
+			continue;
+		}
+
+		/* the dispatch routines will unlock the queue_lock */
+		if (rport)
+			ret = fc_bsg_rport_dispatch(q, shost, rport, job);
+		else
+			ret = fc_bsg_host_dispatch(q, shost, job);
+
+		/* did dispatcher hit state that can't process any more */
+		if (ret == FC_DISPATCH_BREAK)
+			break;
+
+		/* did dispatcher had released the lock */
+		if (ret == FC_DISPATCH_UNLOCKED)
+			spin_lock_irq(q->queue_lock);
+	}
+
+	spin_unlock_irq(q->queue_lock);
+	put_device(dev);
+	spin_lock_irq(q->queue_lock);
+}
+
+
+/**
+ * fc_bsg_host_handler - handler for bsg requests for a fc host
+ * @q:		fc host request queue
+ */
+static void
+fc_bsg_host_handler(struct request_queue *q)
+{
+	struct Scsi_Host *shost = q->queuedata;
+
+	fc_bsg_request_handler(q, shost, NULL, &shost->shost_gendev);
+}
+
+
+/**
+ * fc_bsg_rport_handler - handler for bsg requests for a fc rport
+ * @q:		rport request queue
+ */
+static void
+fc_bsg_rport_handler(struct request_queue *q)
+{
+	struct fc_rport *rport = q->queuedata;
+	struct Scsi_Host *shost = rport_to_shost(rport);
+
+	fc_bsg_request_handler(q, shost, rport, &rport->dev);
+}
+
+
+/**
+ * fc_bsg_hostadd - Create and add the bsg hooks so we can receive requests
+ * @shost:	shost for fc_host
+ * @fc_host:	fc_host adding the structures to
+ */
+static int
+fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
+{
+	struct device *dev = &shost->shost_gendev;
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+	struct request_queue *q;
+	int err;
+	char bsg_name[BUS_ID_SIZE]; /*20*/
+
+	fc_host->rqst_q = NULL;
+
+	if (!i->f->bsg_request)
+		return -ENOTSUPP;
+
+	snprintf(bsg_name, sizeof(bsg_name),
+		 "fc_host%d", shost->host_no);
+
+	q = __scsi_alloc_queue(shost, fc_bsg_host_handler);
+	if (!q) {
+		printk(KERN_ERR "fc_host%d: bsg interface failed to "
+				"initialize - no request queue\n",
+				 shost->host_no);
+		return -ENOMEM;
+	}
+
+	q->queuedata = shost;
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
+	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
+
+	err = bsg_register_queue(q, dev, bsg_name, NULL);
+	if (err) {
+		printk(KERN_ERR "fc_host%d: bsg interface failed to "
+				"initialize - register queue\n",
+				shost->host_no);
+		blk_cleanup_queue(q);
+		return err;
+	}
+
+	fc_host->rqst_q = q;
+	return 0;
+}
+
+
+/**
+ * fc_bsg_rportadd - Create and add the bsg hooks so we can receive requests
+ * @shost:	shost that rport is attached to
+ * @rport:	rport that the bsg hooks are being attached to
+ */
+static int
+fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
+{
+	struct device *dev = &rport->dev;
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+	struct request_queue *q;
+	int err;
+
+	rport->rqst_q = NULL;
+
+	if (!i->f->bsg_request)
+		return -ENOTSUPP;
+
+	q = __scsi_alloc_queue(shost, fc_bsg_rport_handler);
+	if (!q) {
+		printk(KERN_ERR "%s: bsg interface failed to "
+				"initialize - no request queue\n",
+				 dev->kobj.name);
+		return -ENOMEM;
+	}
+
+	q->queuedata = rport;
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
+	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+
+	err = bsg_register_queue(q, dev, NULL, NULL);
+	if (err) {
+		printk(KERN_ERR "%s: bsg interface failed to "
+				"initialize - register queue\n",
+				 dev->kobj.name);
+		blk_cleanup_queue(q);
+		return err;
+	}
+
+	rport->rqst_q = q;
+	return 0;
+}
+
+
+/**
+ * fc_bsg_remove - Deletes the bsg hooks on fchosts/rports
+ * @q:	the request_queue that is to be torn down.
+ */
+static void
+fc_bsg_remove(struct request_queue *q)
+{
+	if (q) {
+		bsg_unregister_queue(q);
+		blk_cleanup_queue(q);
+	}
+}
+
+
 /* Original Author:  Martin Hicks */
 MODULE_AUTHOR("James Smart");
 MODULE_DESCRIPTION("FC Transport Attributes");
diff --git a/include/Kbuild b/include/Kbuild
index fe36accd432..8d226bfa269 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -9,3 +9,4 @@ header-y += rdma/
 header-y += video/
 header-y += drm/
 header-y += xen/
+header-y += scsi/
diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild
new file mode 100644
index 00000000000..33b2750e928
--- /dev/null
+++ b/include/scsi/Kbuild
@@ -0,0 +1,4 @@
+header-y += scsi.h
+header-y += scsi_netlink.h
+header-y += scsi_netlink_fc.h
+header-y += scsi_bsg_fc.h
diff --git a/include/scsi/scsi_bsg_fc.h b/include/scsi/scsi_bsg_fc.h
new file mode 100644
index 00000000000..a4b23331817
--- /dev/null
+++ b/include/scsi/scsi_bsg_fc.h
@@ -0,0 +1,322 @@
+/*
+ *  FC Transport BSG Interface
+ *
+ *  Copyright (C) 2008   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef SCSI_BSG_FC_H
+#define SCSI_BSG_FC_H
+
+/*
+ * This file intended to be included by both kernel and user space
+ */
+
+#include <scsi/scsi.h>
+
+/*
+ * FC Transport SGIO v4 BSG Message Support
+ */
+
+/* Default BSG request timeout (in seconds) */
+#define FC_DEFAULT_BSG_TIMEOUT		(10 * HZ)
+
+
+/*
+ * Request Message Codes supported by the FC Transport
+ */
+
+/* define the class masks for the message codes */
+#define FC_BSG_CLS_MASK		0xF0000000	/* find object class */
+#define FC_BSG_HST_MASK		0x80000000	/* fc host class */
+#define FC_BSG_RPT_MASK		0x40000000	/* fc rport class */
+
+	/* fc_host Message Codes */
+#define FC_BSG_HST_ADD_RPORT		(FC_BSG_HST_MASK | 0x00000001)
+#define FC_BSG_HST_DEL_RPORT		(FC_BSG_HST_MASK | 0x00000002)
+#define FC_BSG_HST_ELS_NOLOGIN		(FC_BSG_HST_MASK | 0x00000003)
+#define FC_BSG_HST_CT			(FC_BSG_HST_MASK | 0x00000004)
+#define FC_BSG_HST_VENDOR		(FC_BSG_HST_MASK | 0x000000FF)
+
+	/* fc_rport Message Codes */
+#define FC_BSG_RPT_ELS			(FC_BSG_RPT_MASK | 0x00000001)
+#define FC_BSG_RPT_CT			(FC_BSG_RPT_MASK | 0x00000002)
+
+
+
+/*
+ * FC Address Identifiers in Message Structures :
+ *
+ *   Whenever a command payload contains a FC Address Identifier
+ *   (aka port_id), the value is effectively in big-endian
+ *   order, thus the array elements are decoded as follows:
+ *     element [0] is bits 23:16 of the FC Address Identifier
+ *     element [1] is bits 15:8 of the FC Address Identifier
+ *     element [2] is bits 7:0 of the FC Address Identifier
+ */
+
+
+/*
+ * FC Host Messages
+ */
+
+/* FC_BSG_HST_ADDR_PORT : */
+
+/* Request:
+ * This message requests the FC host to login to the remote port
+ * at the specified N_Port_Id.  The remote port is to be enumerated
+ * with the transport upon completion of the login.
+ */
+struct fc_bsg_host_add_rport {
+	uint8_t		reserved;
+
+	/* FC Address Identier of the remote port to login to */
+	uint8_t		port_id[3];
+};
+
+/* Response:
+ * There is no additional response data - fc_bsg_reply->result is sufficient
+ */
+
+
+/* FC_BSG_HST_DEL_RPORT : */
+
+/* Request:
+ * This message requests the FC host to remove an enumerated
+ * remote port and to terminate the login to it.
+ *
+ * Note: The driver is free to reject this request if it desires to
+ * remain logged in with the remote port.
+ */
+struct fc_bsg_host_del_rport {
+	uint8_t		reserved;
+
+	/* FC Address Identier of the remote port to logout of */
+	uint8_t		port_id[3];
+};
+
+/* Response:
+ * There is no additional response data - fc_bsg_reply->result is sufficient
+ */
+
+
+/* FC_BSG_HST_ELS_NOLOGIN : */
+
+/* Request:
+ * This message requests the FC_Host to send an ELS to a specific
+ * N_Port_ID. The host does not need to log into the remote port,
+ * nor does it need to enumerate the rport for further traffic
+ * (although, the FC host is free to do so if it desires).
+ */
+struct fc_bsg_host_els {
+	/*
+	 * ELS Command Code being sent (must be the same as byte 0
+	 * of the payload)
+	 */
+	uint8_t 	command_code;
+
+	/* FC Address Identier of the remote port to send the ELS to */
+	uint8_t		port_id[3];
+};
+
+/* Response:
+ */
+/* fc_bsg_ctels_reply->status values */
+#define FC_CTELS_STATUS_OK	0x00000000
+#define FC_CTELS_STATUS_REJECT	0x00000001
+#define FC_CTELS_STATUS_P_RJT	0x00000002
+#define FC_CTELS_STATUS_F_RJT	0x00000003
+#define FC_CTELS_STATUS_P_BSY	0x00000004
+#define FC_CTELS_STATUS_F_BSY	0x00000006
+struct fc_bsg_ctels_reply {
+	/*
+	 * Note: An ELS LS_RJT may be reported in 2 ways:
+	 *  a) A status of FC_CTELS_STATUS_OK is returned. The caller
+	 *     is to look into the ELS receive payload to determine
+	 *     LS_ACC or LS_RJT (by contents of word 0). The reject
+	 *     data will be in word 1.
+	 *  b) A status of FC_CTELS_STATUS_REJECT is returned, The
+	 *     rjt_data field will contain valid data.
+	 *
+	 * Note: ELS LS_ACC is determined by an FC_CTELS_STATUS_OK, and
+	 *   the receive payload word 0 indicates LS_ACC
+	 *   (e.g. value is 0x02xxxxxx).
+	 *
+	 * Note: Similarly, a CT Reject may be reported in 2 ways:
+	 *  a) A status of FC_CTELS_STATUS_OK is returned. The caller
+	 *     is to look into the CT receive payload to determine
+	 *     Accept or Reject (by contents of word 2). The reject
+	 *     data will be in word 3.
+	 *  b) A status of FC_CTELS_STATUS_REJECT is returned, The
+	 *     rjt_data field will contain valid data.
+	 *
+	 * Note: x_RJT/BSY status will indicae that the rjt_data field
+	 *   is valid and contains the reason/explanation values.
+	 */
+	uint32_t	status;		/* See FC_CTELS_STATUS_xxx */
+
+	/* valid if status is not FC_CTELS_STATUS_OK */
+	struct	{
+		uint8_t	action;		/* fragment_id for CT REJECT */
+		uint8_t	reason_code;
+		uint8_t	reason_explanation;
+		uint8_t	vendor_unique;
+	} rjt_data;
+};
+
+
+/* FC_BSG_HST_CT : */
+
+/* Request:
+ * This message requests that a CT Request be performed with the
+ * indicated N_Port_ID. The driver is responsible for logging in with
+ * the fabric and/or N_Port_ID, etc as per FC rules. This request does
+ * not mandate that the driver must enumerate the destination in the
+ * transport. The driver is allowed to decide whether to enumerate it,
+ * and whether to tear it down after the request.
+ */
+struct fc_bsg_host_ct {
+	uint8_t		reserved;
+
+	/* FC Address Identier of the remote port to send the ELS to */
+	uint8_t		port_id[3];
+
+	/*
+	 * We need words 0-2 of the generic preamble for the LLD's
+	 */
+	uint32_t	preamble_word0;	/* revision & IN_ID */
+	uint32_t	preamble_word1;	/* GS_Type, GS_SubType, Options, Rsvd */
+	uint32_t	preamble_word2;	/* Cmd Code, Max Size */
+
+};
+/* Response:
+ *
+ * The reply structure is an fc_bsg_ctels_reply structure
+ */
+
+
+/* FC_BSG_HST_VENDOR : */
+
+/* Request:
+ * Note: When specifying vendor_id, be sure to read the Vendor Type and ID
+ *   formatting requirements specified in scsi_netlink.h
+ */
+struct fc_bsg_host_vendor {
+	/*
+	 * Identifies the vendor that the message is formatted for. This
+	 * should be the recipient of the message.
+	 */
+	uint64_t vendor_id;
+
+	/* start of vendor command area */
+	uint32_t vendor_cmd[0];
+};
+
+/* Response:
+ */
+struct fc_bsg_host_vendor_reply {
+	/* start of vendor response area */
+	uint32_t vendor_rsp[0];
+};
+
+
+
+/*
+ * FC Remote Port Messages
+ */
+
+/* FC_BSG_RPT_ELS : */
+
+/* Request:
+ * This message requests that an ELS be performed with the rport.
+ */
+struct fc_bsg_rport_els {
+	/*
+	 * ELS Command Code being sent (must be the same as
+	 * byte 0 of the payload)
+	 */
+	uint8_t els_code;
+};
+
+/* Response:
+ *
+ * The reply structure is an fc_bsg_ctels_reply structure
+ */
+
+
+/* FC_BSG_RPT_CT : */
+
+/* Request:
+ * This message requests that a CT Request be performed with the rport.
+ */
+struct fc_bsg_rport_ct {
+	/*
+	 * We need words 0-2 of the generic preamble for the LLD's
+	 */
+	uint32_t	preamble_word0;	/* revision & IN_ID */
+	uint32_t	preamble_word1;	/* GS_Type, GS_SubType, Options, Rsvd */
+	uint32_t	preamble_word2;	/* Cmd Code, Max Size */
+};
+/* Response:
+ *
+ * The reply structure is an fc_bsg_ctels_reply structure
+ */
+
+
+
+
+/* request (CDB) structure of the sg_io_v4 */
+struct fc_bsg_request {
+	uint32_t msgcode;
+	union {
+		struct fc_bsg_host_add_rport	h_addrport;
+		struct fc_bsg_host_del_rport	h_delrport;
+		struct fc_bsg_host_els		h_els;
+		struct fc_bsg_host_ct		h_ct;
+		struct fc_bsg_host_vendor	h_vendor;
+
+		struct fc_bsg_rport_els		r_els;
+		struct fc_bsg_rport_ct		r_ct;
+	} rqst_data;
+};
+
+
+/* response (request sense data) structure of the sg_io_v4 */
+struct fc_bsg_reply {
+	/*
+	 * The completion result. Result exists in two forms:
+	 *  if negative, it is an -Exxx system errno value. There will
+	 *    be no further reply information supplied.
+	 *  else, it's the 4-byte scsi error result, with driver, host,
+	 *    msg and status fields. The per-msgcode reply structure
+	 *    will contain valid data.
+	 */
+	uint32_t result;
+
+	/* If there was reply_payload, how much was recevied ? */
+	uint32_t reply_payload_rcv_len;
+
+	union {
+		struct fc_bsg_host_vendor_reply		vendor_reply;
+
+		struct fc_bsg_ctels_reply		ctels_reply;
+	} reply_data;
+};
+
+
+#endif /* SCSI_BSG_FC_H */
+
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index d123ca84e73..b62a097b3ec 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -478,6 +478,15 @@ struct scsi_host_template {
 	 * module_init/module_exit.
 	 */
 	struct list_head legacy_hosts;
+
+	/*
+	 * Vendor Identifier associated with the host
+	 *
+	 * Note: When specifying vendor_id, be sure to read the
+	 *   Vendor Type and ID formatting requirements specified in
+	 *   scsi_netlink.h
+	 */
+	u64 vendor_id;
 };
 
 /*
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 68a8d873bbd..fc50bd64aa4 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -33,7 +33,6 @@
 
 struct scsi_transport_template;
 
-
 /*
  * FC Port definitions - Following FC HBAAPI guidelines
  *
@@ -352,6 +351,7 @@ struct fc_rport {	/* aka fc_starget_attrs */
  	struct delayed_work fail_io_work;
  	struct work_struct stgt_delete_work;
 	struct work_struct rport_delete_work;
+	struct request_queue *rqst_q;	/* bsg support */
 } __attribute__((aligned(sizeof(unsigned long))));
 
 /* bit field values for struct fc_rport "flags" field: */
@@ -514,6 +514,9 @@ struct fc_host_attrs {
 	struct workqueue_struct *work_q;
 	char devloss_work_q_name[20];
 	struct workqueue_struct *devloss_work_q;
+
+	/* bsg support */
+	struct request_queue *rqst_q;
 };
 
 #define shost_to_fc_host(x) \
@@ -579,6 +582,47 @@ struct fc_host_attrs {
 	(((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q)
 
 
+struct fc_bsg_buffer {
+	unsigned int payload_len;
+	int sg_cnt;
+	struct scatterlist *sg_list;
+};
+
+/* Values for fc_bsg_job->state_flags (bitflags) */
+#define FC_RQST_STATE_INPROGRESS	0
+#define FC_RQST_STATE_DONE		1
+
+struct fc_bsg_job {
+	struct Scsi_Host *shost;
+	struct fc_rport *rport;
+	struct device *dev;
+	struct request *req;
+	spinlock_t job_lock;
+	unsigned int state_flags;
+	unsigned int ref_cnt;
+	void (*job_done)(struct fc_bsg_job *);
+
+	struct fc_bsg_request *request;
+	struct fc_bsg_reply *reply;
+	unsigned int request_len;
+	unsigned int reply_len;
+	/*
+	 * On entry : reply_len indicates the buffer size allocated for
+	 * the reply.
+	 *
+	 * Upon completion : the message handler must set reply_len
+	 *  to indicates the size of the reply to be returned to the
+	 *  caller.
+	 */
+
+	/* DMA payloads for the request/response */
+	struct fc_bsg_buffer request_payload;
+	struct fc_bsg_buffer reply_payload;
+
+	void *dd_data;			/* Used for driver-specific storage */
+};
+
+
 /* The functions by which the transport class and the driver communicate */
 struct fc_function_template {
 	void    (*get_rport_dev_loss_tmo)(struct fc_rport *);
@@ -614,9 +658,14 @@ struct fc_function_template {
 	int     (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
 	int     (* it_nexus_response)(struct Scsi_Host *, u64, int);
 
+	/* bsg support */
+	int	(*bsg_request)(struct fc_bsg_job *);
+	int	(*bsg_timeout)(struct fc_bsg_job *);
+
 	/* allocation lengths for host-specific data */
 	u32	 			dd_fcrport_size;
 	u32	 			dd_fcvport_size;
+	u32				dd_bsg_size;
 
 	/*
 	 * The driver sets these to tell the transport class it
@@ -737,7 +786,6 @@ fc_vport_set_state(struct fc_vport *vport, enum fc_vport_state new_state)
 	vport->vport_state = new_state;
 }
 
-
 struct scsi_transport_template *fc_attach_transport(
 			struct fc_function_template *);
 void fc_release_transport(struct scsi_transport_template *);
-- 
cgit v1.2.3-70-g09d2


From e39a71ef80877f4e30d808af9acceec80f4d2f7c Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 15 May 2009 00:53:26 +0200
Subject: PM: Rename device_power_down/up()

Rename the functions performing "_noirq" dev_pm_ops
operations from device_power_down() and device_power_up()
to device_suspend_noirq() and device_resume_noirq().

The new function names are chosen to show that the functions
are responsible for calling the _noirq() versions to finalize
the suspend/resume operation. The current function names do
not perform power down/up anymore so the names may be misleading.

Global function renames:
- device_power_down() -> device_suspend_noirq()
- device_power_up() -> device_resume_noirq()

Static function renames:
- suspend_device_noirq() -> __device_suspend_noirq()
- resume_device_noirq() -> __device_resume_noirq()

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Len Brown <lenb@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/apm_32.c  |  8 ++++----
 drivers/base/power/main.c | 26 +++++++++++++-------------
 drivers/xen/manage.c      | 10 +++++-----
 include/linux/pm.h        |  4 ++--
 kernel/kexec.c            |  8 ++++----
 kernel/power/disk.c       | 16 ++++++++--------
 kernel/power/main.c       |  4 ++--
 7 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 49e0939bac4..31ae547da15 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1235,7 +1235,7 @@ static int suspend(int vetoable)
 
 	device_suspend(PMSG_SUSPEND);
 
-	device_power_down(PMSG_SUSPEND);
+	device_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1259,7 +1259,7 @@ static int suspend(int vetoable)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
 	device_resume(PMSG_RESUME);
 	queue_event(APM_NORMAL_RESUME, NULL);
@@ -1277,7 +1277,7 @@ static void standby(void)
 {
 	int err;
 
-	device_power_down(PMSG_SUSPEND);
+	device_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1291,7 +1291,7 @@ static void standby(void)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 }
 
 static apm_event_t get_event(void)
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3e4bc699bc0..c5a35bc9d63 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -315,13 +315,13 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	resume_device_noirq - Power on one device (early resume).
+ *	__device_resume_noirq - Power on one device (early resume).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	Must be called with interrupts disabled.
  */
-static int resume_device_noirq(struct device *dev, pm_message_t state)
+static int __device_resume_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -363,7 +363,7 @@ static void dpm_power_up(pm_message_t state)
 			int error;
 
 			dev->power.status = DPM_OFF;
-			error = resume_device_noirq(dev, state);
+			error = __device_resume_noirq(dev, state);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
@@ -371,18 +371,18 @@ static void dpm_power_up(pm_message_t state)
 }
 
 /**
- *	device_power_up - Turn on all devices that need special attention.
+ *	device_resume_noirq - Turn on all devices that need special attention.
  *	@state: PM transition of the system being carried out.
  *
  *	Call the "early" resume handlers and enable device drivers to receive
  *	interrupts.
  */
-void device_power_up(pm_message_t state)
+void device_resume_noirq(pm_message_t state)
 {
 	dpm_power_up(state);
 	resume_device_irqs();
 }
-EXPORT_SYMBOL_GPL(device_power_up);
+EXPORT_SYMBOL_GPL(device_resume_noirq);
 
 /**
  *	resume_device - Restore state for one device.
@@ -577,13 +577,13 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 }
 
 /**
- *	suspend_device_noirq - Shut down one device (late suspend).
+ *	__device_suspend_noirq - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	This is called with interrupts off and only a single CPU running.
  */
-static int suspend_device_noirq(struct device *dev, pm_message_t state)
+static int __device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -602,7 +602,7 @@ static int suspend_device_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *	device_power_down - Shut down special devices.
+ *	device_suspend_noirq - Shut down special devices.
  *	@state: PM transition of the system being carried out.
  *
  *	Prevent device drivers from receiving interrupts and call the "late"
@@ -610,7 +610,7 @@ static int suspend_device_noirq(struct device *dev, pm_message_t state)
  *
  *	Must be called under dpm_list_mtx.
  */
-int device_power_down(pm_message_t state)
+int device_suspend_noirq(pm_message_t state)
 {
 	struct device *dev;
 	int error = 0;
@@ -618,7 +618,7 @@ int device_power_down(pm_message_t state)
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = suspend_device_noirq(dev, state);
+		error = __device_suspend_noirq(dev, state);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
 			break;
@@ -627,10 +627,10 @@ int device_power_down(pm_message_t state)
 	}
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
-		device_power_up(resume_event(state));
+		device_resume_noirq(resume_event(state));
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_power_down);
+EXPORT_SYMBOL_GPL(device_suspend_noirq);
 
 /**
  *	suspend_device - Save state of one device.
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index fddc2025dec..d5b327ac403 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -43,7 +43,7 @@ static int xen_suspend(void *data)
 	if (err) {
 		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
 			err);
-		device_power_up(PMSG_RESUME);
+		device_resume_noirq(PMSG_RESUME);
 		return err;
 	}
 
@@ -69,7 +69,7 @@ static int xen_suspend(void *data)
 	}
 
 	sysdev_resume();
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
 	return 0;
 }
@@ -101,9 +101,9 @@ static void do_suspend(void)
 	printk(KERN_DEBUG "suspending xenstore...\n");
 	xs_suspend();
 
-	err = device_power_down(PMSG_SUSPEND);
+	err = device_suspend_noirq(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "device_power_down failed: %d\n", err);
+		printk(KERN_ERR "device_suspend_noirq failed: %d\n", err);
 		goto resume_devices;
 	}
 
@@ -119,7 +119,7 @@ static void do_suspend(void)
 	} else
 		xs_suspend_cancel();
 
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
 resume_devices:
 	device_resume(PMSG_RESUME);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1d4e2d28982..2170252074f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -382,12 +382,12 @@ struct dev_pm_info {
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
 extern int sysdev_resume(void);
-extern void device_power_up(pm_message_t state);
+extern void device_resume_noirq(pm_message_t state);
 extern void device_resume(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int sysdev_suspend(pm_message_t state);
-extern int device_power_down(pm_message_t state);
+extern int device_suspend_noirq(pm_message_t state);
 extern int device_suspend(pm_message_t state);
 extern int device_prepare_suspend(pm_message_t state);
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e4983770913..5a3da87adae 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1452,13 +1452,13 @@ int kernel_kexec(void)
 		if (error)
 			goto Resume_console;
 		/* At this point, device_suspend() has been called,
-		 * but *not* device_power_down(). We *must*
-		 * device_power_down() now.  Otherwise, drivers for
+		 * but *not* device_suspend_noirq(). We *must* call
+		 * device_suspend_noirq() now.  Otherwise, drivers for
 		 * some devices (e.g. interrupt controllers) become
 		 * desynchronized with the actual state of the
 		 * hardware at resume time, and evil weirdness ensues.
 		 */
-		error = device_power_down(PMSG_FREEZE);
+		error = device_suspend_noirq(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
 		error = disable_nonboot_cpus();
@@ -1486,7 +1486,7 @@ int kernel_kexec(void)
 		local_irq_enable();
  Enable_cpus:
 		enable_nonboot_cpus();
-		device_power_up(PMSG_RESTORE);
+		device_resume_noirq(PMSG_RESTORE);
  Resume_devices:
 		device_resume(PMSG_RESTORE);
  Resume_console:
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 5cb080e7eeb..1c18bc894a2 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -216,12 +216,12 @@ static int create_image(int platform_mode)
 		return error;
 
 	/* At this point, device_suspend() has been called, but *not*
-	 * device_power_down(). We *must* call device_power_down() now.
+	 * device_suspend_noirq(). We *must* call device_suspend_noirq() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
 	 * become desynchronized with the actual state of the hardware
 	 * at resume time, and evil weirdness ensues.
 	 */
-	error = device_power_down(PMSG_FREEZE);
+	error = device_suspend_noirq(PMSG_FREEZE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
@@ -262,7 +262,7 @@ static int create_image(int platform_mode)
 
  Power_up:
 	sysdev_resume();
-	/* NOTE:  device_power_up() is just a resume() for devices
+	/* NOTE:  device_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
 
@@ -275,7 +275,7 @@ static int create_image(int platform_mode)
  Platform_finish:
 	platform_finish(platform_mode);
 
-	device_power_up(in_suspend ?
+	device_resume_noirq(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
 	return error;
@@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode)
 {
 	int error;
 
-	error = device_power_down(PMSG_QUIESCE);
+	error = device_suspend_noirq(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
@@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode)
  Cleanup:
 	platform_restore_cleanup(platform_mode);
 
-	device_power_up(PMSG_RECOVER);
+	device_resume_noirq(PMSG_RECOVER);
 
 	return error;
 }
@@ -454,7 +454,7 @@ int hibernation_platform_enter(void)
 		goto Resume_devices;
 	}
 
-	error = device_power_down(PMSG_HIBERNATE);
+	error = device_suspend_noirq(PMSG_HIBERNATE);
 	if (error)
 		goto Resume_devices;
 
@@ -479,7 +479,7 @@ int hibernation_platform_enter(void)
  Platofrm_finish:
 	hibernation_ops->finish();
 
-	device_power_up(PMSG_RESTORE);
+	device_suspend_noirq(PMSG_RESTORE);
 
  Resume_devices:
 	entering_platform_hibernation = false;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 868028280d1..2f6638ee03c 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -295,7 +295,7 @@ static int suspend_enter(suspend_state_t state)
 			return error;
 	}
 
-	error = device_power_down(PMSG_SUSPEND);
+	error = device_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
 		goto Platfrom_finish;
@@ -335,7 +335,7 @@ static int suspend_enter(suspend_state_t state)
 		suspend_ops->wake();
 
  Power_up_devices:
-	device_power_up(PMSG_RESUME);
+	device_resume_noirq(PMSG_RESUME);
 
  Platfrom_finish:
 	if (suspend_ops->finish)
-- 
cgit v1.2.3-70-g09d2


From d161630297a20802d01c55847bfcba85d2118a9f Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sun, 24 May 2009 22:05:42 +0200
Subject: PM core: rename suspend and resume functions

This patch (as1241) renames a bunch of functions in the PM core.
Rather than go through a boring list of name changes, suffice it to
say that in the end we have a bunch of pairs of functions:

	device_resume_noirq	dpm_resume_noirq
	device_resume		dpm_resume
	device_complete		dpm_complete
	device_suspend_noirq	dpm_suspend_noirq
	device_suspend		dpm_suspend
	device_prepare		dpm_prepare

in which device_X does the X operation on a single device and dpm_X
invokes device_X for all devices in the dpm_list.

In addition, the old dpm_power_up and device_resume_noirq have been
combined into a single function (dpm_resume_noirq).

Lastly, dpm_suspend_start and dpm_resume_end are the renamed versions
of the former top-level device_suspend and device_resume routines.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/apm_32.c  | 14 ++++-----
 drivers/base/power/main.c | 80 ++++++++++++++++++++---------------------------
 drivers/xen/manage.c      | 16 +++++-----
 include/linux/pm.h        | 11 +++----
 kernel/kexec.c            | 14 ++++-----
 kernel/power/disk.c       | 30 +++++++++---------
 kernel/power/main.c       |  8 ++---
 7 files changed, 80 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 31ae547da15..79302e9a33a 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1233,9 +1233,9 @@ static int suspend(int vetoable)
 	int err;
 	struct apm_user	*as;
 
-	device_suspend(PMSG_SUSPEND);
+	dpm_suspend_start(PMSG_SUSPEND);
 
-	device_suspend_noirq(PMSG_SUSPEND);
+	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1259,9 +1259,9 @@ static int suspend(int vetoable)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 	queue_event(APM_NORMAL_RESUME, NULL);
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1277,7 @@ static void standby(void)
 {
 	int err;
 
-	device_suspend_noirq(PMSG_SUSPEND);
+	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
@@ -1291,7 +1291,7 @@ static void standby(void)
 	sysdev_resume();
 	local_irq_enable();
 
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 }
 
 static apm_event_t get_event(void)
@@ -1376,7 +1376,7 @@ static void check_events(void)
 			ignore_bounce = 1;
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
-				device_resume(PMSG_RESUME);
+				dpm_resume_end(PMSG_RESUME);
 				queue_event(event, NULL);
 			}
 			ignore_normal_resume = 0;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index c5a35bc9d63..1f3d82260db 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -315,13 +315,13 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 /*------------------------- Resume routines -------------------------*/
 
 /**
- *	__device_resume_noirq - Power on one device (early resume).
+ *	device_resume_noirq - Power on one device (early resume).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	Must be called with interrupts disabled.
  */
-static int __device_resume_noirq(struct device *dev, pm_message_t state)
+static int device_resume_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -344,16 +344,16 @@ static int __device_resume_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *	dpm_power_up - Power on all regular (non-sysdev) devices.
+ *	dpm_resume_noirq - Power on all regular (non-sysdev) devices.
  *	@state: PM transition of the system being carried out.
  *
- *	Execute the appropriate "noirq resume" callback for all devices marked
- *	as DPM_OFF_IRQ.
+ *	Call the "noirq" resume handlers for all devices marked as
+ *	DPM_OFF_IRQ and enable device drivers to receive interrupts.
  *
  *	Must be called under dpm_list_mtx.  Device drivers should not receive
  *	interrupts while it's being executed.
  */
-static void dpm_power_up(pm_message_t state)
+void dpm_resume_noirq(pm_message_t state)
 {
 	struct device *dev;
 
@@ -363,33 +363,21 @@ static void dpm_power_up(pm_message_t state)
 			int error;
 
 			dev->power.status = DPM_OFF;
-			error = __device_resume_noirq(dev, state);
+			error = device_resume_noirq(dev, state);
 			if (error)
 				pm_dev_err(dev, state, " early", error);
 		}
 	mutex_unlock(&dpm_list_mtx);
-}
-
-/**
- *	device_resume_noirq - Turn on all devices that need special attention.
- *	@state: PM transition of the system being carried out.
- *
- *	Call the "early" resume handlers and enable device drivers to receive
- *	interrupts.
- */
-void device_resume_noirq(pm_message_t state)
-{
-	dpm_power_up(state);
 	resume_device_irqs();
 }
-EXPORT_SYMBOL_GPL(device_resume_noirq);
+EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- *	resume_device - Restore state for one device.
+ *	device_resume - Restore state for one device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int resume_device(struct device *dev, pm_message_t state)
+static int device_resume(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -462,7 +450,7 @@ static void dpm_resume(pm_message_t state)
 			dev->power.status = DPM_RESUMING;
 			mutex_unlock(&dpm_list_mtx);
 
-			error = resume_device(dev, state);
+			error = device_resume(dev, state);
 
 			mutex_lock(&dpm_list_mtx);
 			if (error)
@@ -480,11 +468,11 @@ static void dpm_resume(pm_message_t state)
 }
 
 /**
- *	complete_device - Complete a PM transition for given device
+ *	device_complete - Complete a PM transition for given device
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static void complete_device(struct device *dev, pm_message_t state)
+static void device_complete(struct device *dev, pm_message_t state)
 {
 	down(&dev->sem);
 
@@ -527,7 +515,7 @@ static void dpm_complete(pm_message_t state)
 			dev->power.status = DPM_ON;
 			mutex_unlock(&dpm_list_mtx);
 
-			complete_device(dev, state);
+			device_complete(dev, state);
 
 			mutex_lock(&dpm_list_mtx);
 		}
@@ -540,19 +528,19 @@ static void dpm_complete(pm_message_t state)
 }
 
 /**
- *	device_resume - Restore state of each device in system.
+ *	dpm_resume_end - Restore state of each device in system.
  *	@state: PM transition of the system being carried out.
  *
  *	Resume all the devices, unlock them all, and allow new
  *	devices to be registered once again.
  */
-void device_resume(pm_message_t state)
+void dpm_resume_end(pm_message_t state)
 {
 	might_sleep();
 	dpm_resume(state);
 	dpm_complete(state);
 }
-EXPORT_SYMBOL_GPL(device_resume);
+EXPORT_SYMBOL_GPL(dpm_resume_end);
 
 
 /*------------------------- Suspend routines -------------------------*/
@@ -577,13 +565,13 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 }
 
 /**
- *	__device_suspend_noirq - Shut down one device (late suspend).
+ *	device_suspend_noirq - Shut down one device (late suspend).
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  *
  *	This is called with interrupts off and only a single CPU running.
  */
-static int __device_suspend_noirq(struct device *dev, pm_message_t state)
+static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -602,15 +590,15 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state)
 }
 
 /**
- *	device_suspend_noirq - Shut down special devices.
+ *	dpm_suspend_noirq - Power down all regular (non-sysdev) devices.
  *	@state: PM transition of the system being carried out.
  *
- *	Prevent device drivers from receiving interrupts and call the "late"
+ *	Prevent device drivers from receiving interrupts and call the "noirq"
  *	suspend handlers.
  *
  *	Must be called under dpm_list_mtx.
  */
-int device_suspend_noirq(pm_message_t state)
+int dpm_suspend_noirq(pm_message_t state)
 {
 	struct device *dev;
 	int error = 0;
@@ -618,7 +606,7 @@ int device_suspend_noirq(pm_message_t state)
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
 	list_for_each_entry_reverse(dev, &dpm_list, power.entry) {
-		error = __device_suspend_noirq(dev, state);
+		error = device_suspend_noirq(dev, state);
 		if (error) {
 			pm_dev_err(dev, state, " late", error);
 			break;
@@ -627,17 +615,17 @@ int device_suspend_noirq(pm_message_t state)
 	}
 	mutex_unlock(&dpm_list_mtx);
 	if (error)
-		device_resume_noirq(resume_event(state));
+		dpm_resume_noirq(resume_event(state));
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_suspend_noirq);
+EXPORT_SYMBOL_GPL(dpm_suspend_noirq);
 
 /**
- *	suspend_device - Save state of one device.
+ *	device_suspend - Save state of one device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int suspend_device(struct device *dev, pm_message_t state)
+static int device_suspend(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -704,7 +692,7 @@ static int dpm_suspend(pm_message_t state)
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = suspend_device(dev, state);
+		error = device_suspend(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -723,11 +711,11 @@ static int dpm_suspend(pm_message_t state)
 }
 
 /**
- *	prepare_device - Execute the ->prepare() callback(s) for given device.
+ *	device_prepare - Execute the ->prepare() callback(s) for given device.
  *	@dev:	Device.
  *	@state: PM transition of the system being carried out.
  */
-static int prepare_device(struct device *dev, pm_message_t state)
+static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
@@ -781,7 +769,7 @@ static int dpm_prepare(pm_message_t state)
 		dev->power.status = DPM_PREPARING;
 		mutex_unlock(&dpm_list_mtx);
 
-		error = prepare_device(dev, state);
+		error = device_prepare(dev, state);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -807,12 +795,12 @@ static int dpm_prepare(pm_message_t state)
 }
 
 /**
- *	device_suspend - Save state and stop all devices in system.
+ *	dpm_suspend_start - Save state and stop all devices in system.
  *	@state: PM transition of the system being carried out.
  *
  *	Prepare and suspend all devices.
  */
-int device_suspend(pm_message_t state)
+int dpm_suspend_start(pm_message_t state)
 {
 	int error;
 
@@ -822,7 +810,7 @@ int device_suspend(pm_message_t state)
 		error = dpm_suspend(state);
 	return error;
 }
-EXPORT_SYMBOL_GPL(device_suspend);
+EXPORT_SYMBOL_GPL(dpm_suspend_start);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index d5b327ac403..10d03d7931c 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -43,7 +43,7 @@ static int xen_suspend(void *data)
 	if (err) {
 		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
 			err);
-		device_resume_noirq(PMSG_RESUME);
+		dpm_resume_noirq(PMSG_RESUME);
 		return err;
 	}
 
@@ -69,7 +69,7 @@ static int xen_suspend(void *data)
 	}
 
 	sysdev_resume();
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
 	return 0;
 }
@@ -92,18 +92,18 @@ static void do_suspend(void)
 	}
 #endif
 
-	err = device_suspend(PMSG_SUSPEND);
+	err = dpm_suspend_start(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
+		printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
 		goto out;
 	}
 
 	printk(KERN_DEBUG "suspending xenstore...\n");
 	xs_suspend();
 
-	err = device_suspend_noirq(PMSG_SUSPEND);
+	err = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (err) {
-		printk(KERN_ERR "device_suspend_noirq failed: %d\n", err);
+		printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
 		goto resume_devices;
 	}
 
@@ -119,10 +119,10 @@ static void do_suspend(void)
 	} else
 		xs_suspend_cancel();
 
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
 resume_devices:
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 
 	/* Make sure timer events get retriggered on all CPUs */
 	clock_was_set();
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 2170252074f..b3f74764a58 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -382,14 +382,13 @@ struct dev_pm_info {
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
 extern int sysdev_resume(void);
-extern void device_resume_noirq(pm_message_t state);
-extern void device_resume(pm_message_t state);
+extern void dpm_resume_noirq(pm_message_t state);
+extern void dpm_resume_end(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int sysdev_suspend(pm_message_t state);
-extern int device_suspend_noirq(pm_message_t state);
-extern int device_suspend(pm_message_t state);
-extern int device_prepare_suspend(pm_message_t state);
+extern int dpm_suspend_noirq(pm_message_t state);
+extern int dpm_suspend_start(pm_message_t state);
 
 extern void __suspend_report_result(const char *function, void *fn, int ret);
 
@@ -403,7 +402,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 #define device_pm_lock() do {} while (0)
 #define device_pm_unlock() do {} while (0)
 
-static inline int device_suspend(pm_message_t state)
+static inline int dpm_suspend_start(pm_message_t state)
 {
 	return 0;
 }
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5a3da87adae..ae1c35201cc 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1448,17 +1448,17 @@ int kernel_kexec(void)
 			goto Restore_console;
 		}
 		suspend_console();
-		error = device_suspend(PMSG_FREEZE);
+		error = dpm_suspend_start(PMSG_FREEZE);
 		if (error)
 			goto Resume_console;
-		/* At this point, device_suspend() has been called,
-		 * but *not* device_suspend_noirq(). We *must* call
-		 * device_suspend_noirq() now.  Otherwise, drivers for
+		/* At this point, dpm_suspend_start() has been called,
+		 * but *not* dpm_suspend_noirq(). We *must* call
+		 * dpm_suspend_noirq() now.  Otherwise, drivers for
 		 * some devices (e.g. interrupt controllers) become
 		 * desynchronized with the actual state of the
 		 * hardware at resume time, and evil weirdness ensues.
 		 */
-		error = device_suspend_noirq(PMSG_FREEZE);
+		error = dpm_suspend_noirq(PMSG_FREEZE);
 		if (error)
 			goto Resume_devices;
 		error = disable_nonboot_cpus();
@@ -1486,9 +1486,9 @@ int kernel_kexec(void)
 		local_irq_enable();
  Enable_cpus:
 		enable_nonboot_cpus();
-		device_resume_noirq(PMSG_RESTORE);
+		dpm_resume_noirq(PMSG_RESTORE);
  Resume_devices:
-		device_resume(PMSG_RESTORE);
+		dpm_resume_end(PMSG_RESTORE);
  Resume_console:
 		resume_console();
 		thaw_processes();
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 1c18bc894a2..a9beba68b6c 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -215,13 +215,13 @@ static int create_image(int platform_mode)
 	if (error)
 		return error;
 
-	/* At this point, device_suspend() has been called, but *not*
-	 * device_suspend_noirq(). We *must* call device_suspend_noirq() now.
+	/* At this point, dpm_suspend_start() has been called, but *not*
+	 * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
 	 * become desynchronized with the actual state of the hardware
 	 * at resume time, and evil weirdness ensues.
 	 */
-	error = device_suspend_noirq(PMSG_FREEZE);
+	error = dpm_suspend_noirq(PMSG_FREEZE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting hibernation\n");
@@ -262,7 +262,7 @@ static int create_image(int platform_mode)
 
  Power_up:
 	sysdev_resume();
-	/* NOTE:  device_resume_noirq() is just a resume() for devices
+	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
 
@@ -275,7 +275,7 @@ static int create_image(int platform_mode)
  Platform_finish:
 	platform_finish(platform_mode);
 
-	device_resume_noirq(in_suspend ?
+	dpm_resume_noirq(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 
 	return error;
@@ -304,7 +304,7 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
-	error = device_suspend(PMSG_FREEZE);
+	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
 
@@ -315,7 +315,7 @@ int hibernation_snapshot(int platform_mode)
 	/* Control returns here after successful restore */
 
  Resume_devices:
-	device_resume(in_suspend ?
+	dpm_resume_end(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
 	resume_console();
  Close:
@@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode)
 {
 	int error;
 
-	error = device_suspend_noirq(PMSG_QUIESCE);
+	error = dpm_suspend_noirq(PMSG_QUIESCE);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down, "
 			"aborting resume\n");
@@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode)
  Cleanup:
 	platform_restore_cleanup(platform_mode);
 
-	device_resume_noirq(PMSG_RECOVER);
+	dpm_resume_noirq(PMSG_RECOVER);
 
 	return error;
 }
@@ -414,10 +414,10 @@ int hibernation_restore(int platform_mode)
 
 	pm_prepare_console();
 	suspend_console();
-	error = device_suspend(PMSG_QUIESCE);
+	error = dpm_suspend_start(PMSG_QUIESCE);
 	if (!error) {
 		error = resume_target_kernel(platform_mode);
-		device_resume(PMSG_RECOVER);
+		dpm_resume_end(PMSG_RECOVER);
 	}
 	resume_console();
 	pm_restore_console();
@@ -447,14 +447,14 @@ int hibernation_platform_enter(void)
 
 	entering_platform_hibernation = true;
 	suspend_console();
-	error = device_suspend(PMSG_HIBERNATE);
+	error = dpm_suspend_start(PMSG_HIBERNATE);
 	if (error) {
 		if (hibernation_ops->recover)
 			hibernation_ops->recover();
 		goto Resume_devices;
 	}
 
-	error = device_suspend_noirq(PMSG_HIBERNATE);
+	error = dpm_suspend_noirq(PMSG_HIBERNATE);
 	if (error)
 		goto Resume_devices;
 
@@ -479,11 +479,11 @@ int hibernation_platform_enter(void)
  Platofrm_finish:
 	hibernation_ops->finish();
 
-	device_suspend_noirq(PMSG_RESTORE);
+	dpm_suspend_noirq(PMSG_RESTORE);
 
  Resume_devices:
 	entering_platform_hibernation = false;
-	device_resume(PMSG_RESTORE);
+	dpm_resume_end(PMSG_RESTORE);
 	resume_console();
 
  Close:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 2f6638ee03c..46386b9f8dd 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -295,7 +295,7 @@ static int suspend_enter(suspend_state_t state)
 			return error;
 	}
 
-	error = device_suspend_noirq(PMSG_SUSPEND);
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
 		goto Platfrom_finish;
@@ -335,7 +335,7 @@ static int suspend_enter(suspend_state_t state)
 		suspend_ops->wake();
 
  Power_up_devices:
-	device_resume_noirq(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
 
  Platfrom_finish:
 	if (suspend_ops->finish)
@@ -363,7 +363,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 	}
 	suspend_console();
 	suspend_test_start();
-	error = device_suspend(PMSG_SUSPEND);
+	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to suspend\n");
 		goto Recover_platform;
@@ -376,7 +376,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 
  Resume_devices:
 	suspend_test_start();
-	device_resume(PMSG_RESUME);
+	dpm_resume_end(PMSG_RESUME);
 	suspend_test_finish("resume devices");
 	resume_console();
  Close:
-- 
cgit v1.2.3-70-g09d2


From e240b58c79144708530138e05f17c6d0d8d744a8 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Sun, 24 May 2009 22:05:54 +0200
Subject: PM: Remove bus_type suspend_late()/resume_early() V2

Remove the ->suspend_late() and ->resume_early() callbacks
from struct bus_type V2. These callbacks are legacy stuff
at this point and since there seem to be no in-tree users
we may as well remove them. New users should use dev_pm_ops.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/devices.txt | 34 +++++-----------------------------
 drivers/base/power/main.c       |  7 -------
 include/linux/device.h          |  2 --
 3 files changed, 5 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 421e7d00ffd..c9abbd86bc1 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -75,9 +75,6 @@ may need to apply in domain-specific ways to their devices:
 struct bus_type {
 	...
 	int  (*suspend)(struct device *dev, pm_message_t state);
-	int  (*suspend_late)(struct device *dev, pm_message_t state);
-
-	int  (*resume_early)(struct device *dev);
 	int  (*resume)(struct device *dev);
 };
 
@@ -226,20 +223,7 @@ The phases are seen by driver notifications issued in this order:
 
 	This call should handle parts of device suspend logic that require
 	sleeping.  It probably does work to quiesce the device which hasn't
-	been abstracted into class.suspend() or bus.suspend_late().
-
-   3	bus.suspend_late(dev, message) is called with IRQs disabled, and
-	with only one CPU active.  Until the bus.resume_early() phase
-	completes (see later), IRQs are not enabled again.  This method
-	won't be exposed by all busses; for message based busses like USB,
-	I2C, or SPI, device interactions normally require IRQs.  This bus
-	call may be morphed into a driver call with bus-specific parameters.
-
-	This call might save low level hardware state that might otherwise
-	be lost in the upcoming low power state, and actually put the
-	device into a low power state ... so that in some cases the device
-	may stay partly usable until this late.  This "late" call may also
-	help when coping with hardware that behaves badly.
+	been abstracted into class.suspend().
 
 The pm_message_t parameter is currently used to refine those semantics
 (described later).
@@ -351,19 +335,11 @@ devices processing each phase's calls before the next phase begins.
 
 The phases are seen by driver notifications issued in this order:
 
-   1	bus.resume_early(dev) is called with IRQs disabled, and with
-   	only one CPU active.  As with bus.suspend_late(), this method
-	won't be supported on busses that require IRQs in order to
-	interact with devices.
-
-	This reverses the effects of bus.suspend_late().
-
-   2	bus.resume(dev) is called next.  This may be morphed into a device
-   	driver call with bus-specific parameters; implementations may sleep.
-
-	This reverses the effects of bus.suspend().
+   1	bus.resume(dev) reverses the effects of bus.suspend().  This may
+	be morphed into a device driver call with bus-specific parameters;
+	implementations may sleep.
 
-   3	class.resume(dev) is called for devices associated with a class
+   2	class.resume(dev) is called for devices associated with a class
 	that has such a method.  Implementations may sleep.
 
 	This reverses the effects of class.suspend(), and would usually
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1f3d82260db..68f9f3cecf7 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -334,9 +334,6 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 	if (dev->bus->pm) {
 		pm_dev_dbg(dev, state, "EARLY ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-	} else if (dev->bus->resume_early) {
-		pm_dev_dbg(dev, state, "legacy EARLY ");
-		error = dev->bus->resume_early(dev);
 	}
  End:
 	TRACE_RESUME(error);
@@ -581,10 +578,6 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 	if (dev->bus->pm) {
 		pm_dev_dbg(dev, state, "LATE ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-	} else if (dev->bus->suspend_late) {
-		pm_dev_dbg(dev, state, "legacy LATE ");
-		error = dev->bus->suspend_late(dev, state);
-		suspend_report_result(dev->bus->suspend_late, error);
 	}
 	return error;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 5d5c197bad4..84d79cde9f7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,8 +62,6 @@ struct bus_type {
 	void (*shutdown)(struct device *dev);
 
 	int (*suspend)(struct device *dev, pm_message_t state);
-	int (*suspend_late)(struct device *dev, pm_message_t state);
-	int (*resume_early)(struct device *dev);
 	int (*resume)(struct device *dev);
 
 	struct dev_pm_ops *pm;
-- 
cgit v1.2.3-70-g09d2


From 00725787511e20dbd1fdc1fb233606120ae5c8cf Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 4 Jun 2009 22:13:25 +0200
Subject: PM: Remove device_type suspend()/resume()

This patch removes the legacy callbacks ->suspend() and
->resume() from struct device_type. These callbacks seem
unused, and new code should instead make use of struct
dev_pm_ops.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c | 7 -------
 include/linux/device.h    | 3 ---
 2 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 68f9f3cecf7..fae72545898 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -399,9 +399,6 @@ static int device_resume(struct device *dev, pm_message_t state)
 		if (dev->type->pm) {
 			pm_dev_dbg(dev, state, "type ");
 			error = pm_op(dev, dev->type->pm, state);
-		} else if (dev->type->resume) {
-			pm_dev_dbg(dev, state, "legacy type ");
-			error = dev->type->resume(dev);
 		}
 		if (error)
 			goto End;
@@ -641,10 +638,6 @@ static int device_suspend(struct device *dev, pm_message_t state)
 		if (dev->type->pm) {
 			pm_dev_dbg(dev, state, "type ");
 			error = pm_op(dev, dev->type->pm, state);
-		} else if (dev->type->suspend) {
-			pm_dev_dbg(dev, state, "legacy type ");
-			error = dev->type->suspend(dev, state);
-			suspend_report_result(dev->type->suspend, error);
 		}
 		if (error)
 			goto End;
diff --git a/include/linux/device.h b/include/linux/device.h
index 84d79cde9f7..a4a7b10aaa4 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -289,9 +289,6 @@ struct device_type {
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	void (*release)(struct device *dev);
 
-	int (*suspend)(struct device *dev, pm_message_t state);
-	int (*resume)(struct device *dev);
-
 	struct dev_pm_ops *pm;
 };
 
-- 
cgit v1.2.3-70-g09d2


From fce2b111fae9151a53dabb36513b398d03337a19 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Wed, 10 Jun 2009 01:28:19 +0200
Subject: PM/Hibernate: Move NVS routines into a seperate file (v2).

The *_nvs_* routines in swsusp.c make use of the io*map()
functions, which are only provided for HAS_IOMEM, thus
breaking compilation if HAS_IOMEM is not set. Fix this
by moving the *_nvs_* routines into hibernate_nvs.c, which
is only compiled if HAS_IOMEM is set.

[rjw: Change the name of the new file to hibernate_nvs.c, add the
 license line to the header comment.]

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h      |  18 +++---
 kernel/power/Kconfig         |   4 ++
 kernel/power/Makefile        |   1 +
 kernel/power/hibernate_nvs.c | 135 +++++++++++++++++++++++++++++++++++++++++++
 kernel/power/swsusp.c        | 122 --------------------------------------
 5 files changed, 151 insertions(+), 129 deletions(-)
 create mode 100644 kernel/power/hibernate_nvs.c

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 795032edfc4..cd15df6c63c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -245,11 +245,6 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 
 extern void hibernation_set_ops(struct platform_hibernation_ops *ops);
 extern int hibernate(void);
-extern int hibernate_nvs_register(unsigned long start, unsigned long size);
-extern int hibernate_nvs_alloc(void);
-extern void hibernate_nvs_free(void);
-extern void hibernate_nvs_save(void);
-extern void hibernate_nvs_restore(void);
 extern bool system_entering_hibernation(void);
 #else /* CONFIG_HIBERNATION */
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
@@ -258,6 +253,16 @@ static inline void swsusp_unset_page_free(struct page *p) {}
 
 static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
+static inline bool system_entering_hibernation(void) { return false; }
+#endif /* CONFIG_HIBERNATION */
+
+#ifdef CONFIG_HIBERNATION_NVS
+extern int hibernate_nvs_register(unsigned long start, unsigned long size);
+extern int hibernate_nvs_alloc(void);
+extern void hibernate_nvs_free(void);
+extern void hibernate_nvs_save(void);
+extern void hibernate_nvs_restore(void);
+#else /* CONFIG_HIBERNATION_NVS */
 static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
 {
 	return 0;
@@ -266,8 +271,7 @@ static inline int hibernate_nvs_alloc(void) { return 0; }
 static inline void hibernate_nvs_free(void) {}
 static inline void hibernate_nvs_save(void) {}
 static inline void hibernate_nvs_restore(void) {}
-static inline bool system_entering_hibernation(void) { return false; }
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION_NVS */
 
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 23bd4daeb96..72067cbdb37 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -116,9 +116,13 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HIBERNATION_NVS
+	bool
+
 config HIBERNATION
 	bool "Hibernation (aka 'suspend to disk')"
 	depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+	select HIBERNATION_NVS if HAS_IOMEM
 	---help---
 	  Enable the suspend to disk (STD) functionality, which is usually
 	  called "hibernation" in user interfaces.  STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index eadb17fc8f5..c3b81c30e5d 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -9,5 +9,6 @@ obj-$(CONFIG_FREEZER)		+= process.o
 obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION_NVS)	+= hibernate_nvs.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c
new file mode 100644
index 00000000000..39ac698ef83
--- /dev/null
+++ b/kernel/power/hibernate_nvs.c
@@ -0,0 +1,135 @@
+/*
+ * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
+ *
+ * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+
+/*
+ * Platforms, like ACPI, may want us to save some memory used by them during
+ * hibernation and to restore the contents of this memory during the subsequent
+ * resume.  The code below implements a mechanism allowing us to do that.
+ */
+
+struct nvs_page {
+	unsigned long phys_start;
+	unsigned int size;
+	void *kaddr;
+	void *data;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_list);
+
+/**
+ *	hibernate_nvs_register - register platform NVS memory region to save
+ *	@start - physical address of the region
+ *	@size - size of the region
+ *
+ *	The NVS region need not be page-aligned (both ends) and we arrange
+ *	things so that the data from page-aligned addresses in this region will
+ *	be copied into separate RAM pages.
+ */
+int hibernate_nvs_register(unsigned long start, unsigned long size)
+{
+	struct nvs_page *entry, *next;
+
+	while (size > 0) {
+		unsigned int nr_bytes;
+
+		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
+		if (!entry)
+			goto Error;
+
+		list_add_tail(&entry->node, &nvs_list);
+		entry->phys_start = start;
+		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
+		entry->size = (size < nr_bytes) ? size : nr_bytes;
+
+		start += entry->size;
+		size -= entry->size;
+	}
+	return 0;
+
+ Error:
+	list_for_each_entry_safe(entry, next, &nvs_list, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	return -ENOMEM;
+}
+
+/**
+ *	hibernate_nvs_free - free data pages allocated for saving NVS regions
+ */
+void hibernate_nvs_free(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			free_page((unsigned long)entry->data);
+			entry->data = NULL;
+			if (entry->kaddr) {
+				iounmap(entry->kaddr);
+				entry->kaddr = NULL;
+			}
+		}
+}
+
+/**
+ *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
+ */
+int hibernate_nvs_alloc(void)
+{
+	struct nvs_page *entry;
+
+	list_for_each_entry(entry, &nvs_list, node) {
+		entry->data = (void *)__get_free_page(GFP_KERNEL);
+		if (!entry->data) {
+			hibernate_nvs_free();
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/**
+ *	hibernate_nvs_save - save NVS memory regions
+ */
+void hibernate_nvs_save(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Saving platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data) {
+			entry->kaddr = ioremap(entry->phys_start, entry->size);
+			memcpy(entry->data, entry->kaddr, entry->size);
+		}
+}
+
+/**
+ *	hibernate_nvs_restore - restore NVS memory regions
+ *
+ *	This function is going to be called with interrupts disabled, so it
+ *	cannot iounmap the virtual addresses used to access the NVS region.
+ */
+void hibernate_nvs_restore(void)
+{
+	struct nvs_page *entry;
+
+	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+
+	list_for_each_entry(entry, &nvs_list, node)
+		if (entry->data)
+			memcpy(entry->kaddr, entry->data, entry->size);
+}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 87b901cb392..6a07f4dbf2f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -186,125 +186,3 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop,
 			centisecs / 100, centisecs % 100,
 			kps / 1000, (kps % 1000) / 10);
 }
-
-/*
- * Platforms, like ACPI, may want us to save some memory used by them during
- * hibernation and to restore the contents of this memory during the subsequent
- * resume.  The code below implements a mechanism allowing us to do that.
- */
-
-struct nvs_page {
-	unsigned long phys_start;
-	unsigned int size;
-	void *kaddr;
-	void *data;
-	struct list_head node;
-};
-
-static LIST_HEAD(nvs_list);
-
-/**
- *	hibernate_nvs_register - register platform NVS memory region to save
- *	@start - physical address of the region
- *	@size - size of the region
- *
- *	The NVS region need not be page-aligned (both ends) and we arrange
- *	things so that the data from page-aligned addresses in this region will
- *	be copied into separate RAM pages.
- */
-int hibernate_nvs_register(unsigned long start, unsigned long size)
-{
-	struct nvs_page *entry, *next;
-
-	while (size > 0) {
-		unsigned int nr_bytes;
-
-		entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
-		if (!entry)
-			goto Error;
-
-		list_add_tail(&entry->node, &nvs_list);
-		entry->phys_start = start;
-		nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
-		entry->size = (size < nr_bytes) ? size : nr_bytes;
-
-		start += entry->size;
-		size -= entry->size;
-	}
-	return 0;
-
- Error:
-	list_for_each_entry_safe(entry, next, &nvs_list, node) {
-		list_del(&entry->node);
-		kfree(entry);
-	}
-	return -ENOMEM;
-}
-
-/**
- *	hibernate_nvs_free - free data pages allocated for saving NVS regions
- */
-void hibernate_nvs_free(void)
-{
-	struct nvs_page *entry;
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data) {
-			free_page((unsigned long)entry->data);
-			entry->data = NULL;
-			if (entry->kaddr) {
-				iounmap(entry->kaddr);
-				entry->kaddr = NULL;
-			}
-		}
-}
-
-/**
- *	hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
- */
-int hibernate_nvs_alloc(void)
-{
-	struct nvs_page *entry;
-
-	list_for_each_entry(entry, &nvs_list, node) {
-		entry->data = (void *)__get_free_page(GFP_KERNEL);
-		if (!entry->data) {
-			hibernate_nvs_free();
-			return -ENOMEM;
-		}
-	}
-	return 0;
-}
-
-/**
- *	hibernate_nvs_save - save NVS memory regions
- */
-void hibernate_nvs_save(void)
-{
-	struct nvs_page *entry;
-
-	printk(KERN_INFO "PM: Saving platform NVS memory\n");
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data) {
-			entry->kaddr = ioremap(entry->phys_start, entry->size);
-			memcpy(entry->data, entry->kaddr, entry->size);
-		}
-}
-
-/**
- *	hibernate_nvs_restore - restore NVS memory regions
- *
- *	This function is going to be called with interrupts disabled, so it
- *	cannot iounmap the virtual addresses used to access the NVS region.
- */
-void hibernate_nvs_restore(void)
-{
-	struct nvs_page *entry;
-
-	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
-
-	list_for_each_entry(entry, &nvs_list, node)
-		if (entry->data)
-			memcpy(entry->kaddr, entry->data, entry->size);
-}
-- 
cgit v1.2.3-70-g09d2


From 5818a6e2519b34cd6d0220d89f5729ab2725e1bf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 11 Jun 2009 21:59:21 +0200
Subject: PM: Add empty suspend/resume device irq functions

git commit 0a0c5168 "PM: Introduce functions for suspending and resuming
device interrupts" introduced some helper functions. However these
functions are only available for architectures which support
GENERIC_HARDIRQS.

Other architectures will see this build error:

drivers/built-in.o: In function `sysdev_suspend':
(.text+0x15138): undefined reference to `check_wakeup_irqs'
drivers/built-in.o: In function `device_power_up':
(.text+0x1cb66): undefined reference to `resume_device_irqs'
drivers/built-in.o: In function `device_power_down':
(.text+0x1cb92): undefined reference to `suspend_device_irqs'

To fix this add some empty inline functions for !GENERIC_HARDIRQS.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/interrupt.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ff374ceface..c41e812e9d5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -183,6 +183,7 @@ extern void disable_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
 
 /* The following three functions are for the core kernel use only. */
+#ifdef CONFIG_GENERIC_HARDIRQS
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
 #ifdef CONFIG_PM_SLEEP
@@ -190,6 +191,11 @@ extern int check_wakeup_irqs(void);
 #else
 static inline int check_wakeup_irqs(void) { return 0; }
 #endif
+#else
+static inline void suspend_device_irqs(void) { };
+static inline void resume_device_irqs(void) { };
+static inline int check_wakeup_irqs(void) { return 0; }
+#endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
-- 
cgit v1.2.3-70-g09d2


From acc6be5405b90c9f0fb0eb8a74ec4d4b7b5bf48f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 20 May 2008 11:15:43 +0200
Subject: x86: add save_stack_trace_bp() for tracing from a specific stack
 frame

This will help kmemcheck (and possibly other debugging tools) since we
can now simply pass regs->bp to the stack tracer instead of specifying
the number of stack frames to skip, which is unreliable if gcc decides
to inline functions, etc.

Note that this makes the API incomplete for other architectures, but I
expect that those can be updated lazily, e.g. when they need it.

Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 arch/x86/kernel/stacktrace.c | 7 +++++++
 include/linux/stacktrace.h   | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4aaf7e48394..c3eb207181f 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace *trace)
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
+void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+{
+	dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 1a8cecc4f38..551f6c7d504 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -11,6 +11,7 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
+extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
-- 
cgit v1.2.3-70-g09d2


From b618ad31bb2020db6a36929122e5554e33210d47 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 13 Jun 2008 15:31:11 +0200
Subject: stacktrace: add forward-declaration struct task_struct

This is needed if the header is to be free-standing.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/stacktrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 551f6c7d504..51efbef38fb 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -4,6 +4,8 @@
 struct task_struct;
 
 #ifdef CONFIG_STACKTRACE
+struct task_struct;
+
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
-- 
cgit v1.2.3-70-g09d2


From 24c5c4c2f506bf87ef2343669fb892c944c3fdde Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 21 May 2009 16:25:35 +0800
Subject: ACPI: increase size of acpi_bus_id[]

Previously [5], now [8].

sprintf(acpi_device_bid(device), "CPU%X", cpu_id)
now looks better on systems with more than 0xFF processors.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c34b1102290..0be24101a48 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -168,7 +168,7 @@ struct acpi_device_dir {
 
 /* Plug and Play */
 
-typedef char acpi_bus_id[5];
+typedef char acpi_bus_id[8];
 typedef unsigned long acpi_bus_address;
 typedef char acpi_hardware_id[15];
 typedef char acpi_unique_id[9];
-- 
cgit v1.2.3-70-g09d2


From c4bf2f372db09ef8d16a25a60d523bfa1c50f7b5 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 11 Jun 2009 23:53:55 -0400
Subject: ACPI, PCI, x86: move MCFG parsing routine from ACPI to PCI file

Move
arch/x86/kernel/acpi/boot.c: acpi_parse_mcfg()
to
arch/x86/pci/mmconfig-shared.c: pci_parse_mcfg()
where it is used, and make it static.

Move associated globals and helper routine with it.

No functional change.

This code move is in preparation for SFI support,
which will allow the PCI code to find the MCFG table
on systems which do not support ACPI.

Signed-off-by: Len Brown <len.brown@intel.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/include/asm/pci_x86.h |  3 ++
 arch/x86/kernel/acpi/boot.c    | 66 ------------------------------------------
 arch/x86/pci/mmconfig-shared.c | 65 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi.h           |  3 --
 4 files changed, 67 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index e60fd3e14bd..b399988eee3 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -121,6 +121,9 @@ extern int __init pcibios_init(void);
 extern int __init pci_mmcfg_arch_init(void);
 extern void __init pci_mmcfg_arch_free(void);
 
+extern struct acpi_mcfg_allocation *pci_mmcfg_config;
+extern int pci_mmcfg_config_num;
+
 /*
  * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
  * on their northbrige except through the * %eax register. As such, you MUST
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 817d6a5e115..f54e0e557cd 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -117,72 +117,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size)
 	early_iounmap(map, size);
 }
 
-#ifdef CONFIG_PCI_MMCONFIG
-
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
-/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
-struct acpi_mcfg_allocation *pci_mmcfg_config;
-int pci_mmcfg_config_num;
-
-static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
-{
-	if (!strcmp(mcfg->header.oem_id, "SGI"))
-		acpi_mcfg_64bit_base_addr = TRUE;
-
-	return 0;
-}
-
-int __init acpi_parse_mcfg(struct acpi_table_header *header)
-{
-	struct acpi_table_mcfg *mcfg;
-	unsigned long i;
-	int config_size;
-
-	if (!header)
-		return -EINVAL;
-
-	mcfg = (struct acpi_table_mcfg *)header;
-
-	/* how many config structures do we have */
-	pci_mmcfg_config_num = 0;
-	i = header->length - sizeof(struct acpi_table_mcfg);
-	while (i >= sizeof(struct acpi_mcfg_allocation)) {
-		++pci_mmcfg_config_num;
-		i -= sizeof(struct acpi_mcfg_allocation);
-	};
-	if (pci_mmcfg_config_num == 0) {
-		printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
-		return -ENODEV;
-	}
-
-	config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
-	pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
-	if (!pci_mmcfg_config) {
-		printk(KERN_WARNING PREFIX
-		       "No memory for MCFG config tables\n");
-		return -ENOMEM;
-	}
-
-	memcpy(pci_mmcfg_config, &mcfg[1], config_size);
-
-	acpi_mcfg_oem_check(mcfg);
-
-	for (i = 0; i < pci_mmcfg_config_num; ++i) {
-		if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
-		    !acpi_mcfg_64bit_base_addr) {
-			printk(KERN_ERR PREFIX
-			       "MMCONFIG not in low 4GB of memory\n");
-			kfree(pci_mmcfg_config);
-			pci_mmcfg_config_num = 0;
-			return -ENODEV;
-		}
-	}
-
-	return 0;
-}
-#endif				/* CONFIG_PCI_MMCONFIG */
-
 #ifdef CONFIG_X86_LOCAL_APIC
 static int __init acpi_parse_madt(struct acpi_table_header *table)
 {
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 8766b0e216c..712443ec6d4 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -523,6 +523,69 @@ reject:
 
 static int __initdata known_bridge;
 
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+struct acpi_mcfg_allocation *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
+{
+	if (!strcmp(mcfg->header.oem_id, "SGI"))
+		acpi_mcfg_64bit_base_addr = TRUE;
+
+	return 0;
+}
+
+static int __init pci_parse_mcfg(struct acpi_table_header *header)
+{
+	struct acpi_table_mcfg *mcfg;
+	unsigned long i;
+	int config_size;
+
+	if (!header)
+		return -EINVAL;
+
+	mcfg = (struct acpi_table_mcfg *)header;
+
+	/* how many config structures do we have */
+	pci_mmcfg_config_num = 0;
+	i = header->length - sizeof(struct acpi_table_mcfg);
+	while (i >= sizeof(struct acpi_mcfg_allocation)) {
+		++pci_mmcfg_config_num;
+		i -= sizeof(struct acpi_mcfg_allocation);
+	};
+	if (pci_mmcfg_config_num == 0) {
+		printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
+		return -ENODEV;
+	}
+
+	config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+	pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+	if (!pci_mmcfg_config) {
+		printk(KERN_WARNING PREFIX
+		       "No memory for MCFG config tables\n");
+		return -ENOMEM;
+	}
+
+	memcpy(pci_mmcfg_config, &mcfg[1], config_size);
+
+	acpi_mcfg_oem_check(mcfg);
+
+	for (i = 0; i < pci_mmcfg_config_num; ++i) {
+		if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
+		    !acpi_mcfg_64bit_base_addr) {
+			printk(KERN_ERR PREFIX
+			       "MMCONFIG not in low 4GB of memory\n");
+			kfree(pci_mmcfg_config);
+			pci_mmcfg_config_num = 0;
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
 static void __init __pci_mmcfg_init(int early)
 {
 	/* MMCONFIG disabled */
@@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early)
 	}
 
 	if (!known_bridge)
-		acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
+		acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
 
 	pci_mmcfg_reject_broken(early);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88be890ee3c..73cb141150d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -113,9 +113,6 @@ void acpi_irq_stats_init(void);
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;
 
-extern struct acpi_mcfg_allocation *pci_mmcfg_config;
-extern int pci_mmcfg_config_num;
-
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
-- 
cgit v1.2.3-70-g09d2


From 4a7a16dc061e4c57bf288150f51bd4c2ace33723 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 12 Jun 2009 20:42:08 -0400
Subject: ACPI: move declaration acpi_early_init() to acpi.h

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 3 +++
 init/main.c          | 6 +-----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 73cb141150d..bf17681cb06 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -290,7 +290,10 @@ void __init acpi_s4_no_nvs(void);
 				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
 
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
+extern void acpi_early_init(void);
+
 #else	/* CONFIG_ACPI */
+static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
 {
diff --git a/init/main.c b/init/main.c
index d721dad05dd..f1b9f0fdb1b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -24,6 +24,7 @@
 #include <linux/smp_lock.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
+#include <linux/acpi.h>
 #include <linux/tty.h>
 #include <linux/gfp.h>
 #include <linux/percpu.h>
@@ -86,11 +87,6 @@ extern void sbus_init(void);
 extern void prio_tree_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
-#ifdef	CONFIG_ACPI
-extern void acpi_early_init(void);
-#else
-static inline void acpi_early_init(void) { }
-#endif
 #ifndef CONFIG_DEBUG_RODATA
 static inline void mark_rodata_ro(void) { }
 #endif
-- 
cgit v1.2.3-70-g09d2


From 8eae985f08138758e06503588f5f1196269bc415 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Fri, 9 May 2008 20:32:44 +0200
Subject: slab: move struct kmem_cache to headers

Move the SLAB struct kmem_cache definition to <linux/slab_def.h> like
with SLUB so kmemcheck can access ->ctor and ->flags.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/slab_def.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.c                | 81 ------------------------------------------------
 2 files changed, 81 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 713f841ecaa..850d057500d 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -16,6 +16,87 @@
 #include <linux/compiler.h>
 #include <linux/kmemtrace.h>
 
+/*
+ * struct kmem_cache
+ *
+ * manages a cache.
+ */
+
+struct kmem_cache {
+/* 1) per-cpu data, touched during every alloc/free */
+	struct array_cache *array[NR_CPUS];
+/* 2) Cache tunables. Protected by cache_chain_mutex */
+	unsigned int batchcount;
+	unsigned int limit;
+	unsigned int shared;
+
+	unsigned int buffer_size;
+	u32 reciprocal_buffer_size;
+/* 3) touched by every alloc & free from the backend */
+
+	unsigned int flags;		/* constant flags */
+	unsigned int num;		/* # of objs per slab */
+
+/* 4) cache_grow/shrink */
+	/* order of pgs per slab (2^n) */
+	unsigned int gfporder;
+
+	/* force GFP flags, e.g. GFP_DMA */
+	gfp_t gfpflags;
+
+	size_t colour;			/* cache colouring range */
+	unsigned int colour_off;	/* colour offset */
+	struct kmem_cache *slabp_cache;
+	unsigned int slab_size;
+	unsigned int dflags;		/* dynamic flags */
+
+	/* constructor func */
+	void (*ctor)(void *obj);
+
+/* 5) cache creation/removal */
+	const char *name;
+	struct list_head next;
+
+/* 6) statistics */
+#ifdef CONFIG_DEBUG_SLAB
+	unsigned long num_active;
+	unsigned long num_allocations;
+	unsigned long high_mark;
+	unsigned long grown;
+	unsigned long reaped;
+	unsigned long errors;
+	unsigned long max_freeable;
+	unsigned long node_allocs;
+	unsigned long node_frees;
+	unsigned long node_overflow;
+	atomic_t allochit;
+	atomic_t allocmiss;
+	atomic_t freehit;
+	atomic_t freemiss;
+
+	/*
+	 * If debugging is enabled, then the allocator can add additional
+	 * fields and/or padding to every object. buffer_size contains the total
+	 * object size including these internal fields, the following two
+	 * variables contain the offset to the user object and its size.
+	 */
+	int obj_offset;
+	int obj_size;
+#endif /* CONFIG_DEBUG_SLAB */
+
+	/*
+	 * We put nodelists[] at the end of kmem_cache, because we want to size
+	 * this array to nr_node_ids slots instead of MAX_NUMNODES
+	 * (see kmem_cache_init())
+	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+	 * is statically defined, so we reserve the max number of nodes.
+	 */
+	struct kmem_list3 *nodelists[MAX_NUMNODES];
+	/*
+	 * Do not add fields after nodelists[]
+	 */
+};
+
 /* Size description struct for general caches. */
 struct cache_sizes {
 	size_t		 	cs_size;
diff --git a/mm/slab.c b/mm/slab.c
index f46b65d124e..bf0c3af143f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -374,87 +374,6 @@ static void kmem_list3_init(struct kmem_list3 *parent)
 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
 	} while (0)
 
-/*
- * struct kmem_cache
- *
- * manages a cache.
- */
-
-struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
-/* 2) Cache tunables. Protected by cache_chain_mutex */
-	unsigned int batchcount;
-	unsigned int limit;
-	unsigned int shared;
-
-	unsigned int buffer_size;
-	u32 reciprocal_buffer_size;
-/* 3) touched by every alloc & free from the backend */
-
-	unsigned int flags;		/* constant flags */
-	unsigned int num;		/* # of objs per slab */
-
-/* 4) cache_grow/shrink */
-	/* order of pgs per slab (2^n) */
-	unsigned int gfporder;
-
-	/* force GFP flags, e.g. GFP_DMA */
-	gfp_t gfpflags;
-
-	size_t colour;			/* cache colouring range */
-	unsigned int colour_off;	/* colour offset */
-	struct kmem_cache *slabp_cache;
-	unsigned int slab_size;
-	unsigned int dflags;		/* dynamic flags */
-
-	/* constructor func */
-	void (*ctor)(void *obj);
-
-/* 5) cache creation/removal */
-	const char *name;
-	struct list_head next;
-
-/* 6) statistics */
-#if STATS
-	unsigned long num_active;
-	unsigned long num_allocations;
-	unsigned long high_mark;
-	unsigned long grown;
-	unsigned long reaped;
-	unsigned long errors;
-	unsigned long max_freeable;
-	unsigned long node_allocs;
-	unsigned long node_frees;
-	unsigned long node_overflow;
-	atomic_t allochit;
-	atomic_t allocmiss;
-	atomic_t freehit;
-	atomic_t freemiss;
-#endif
-#if DEBUG
-	/*
-	 * If debugging is enabled, then the allocator can add additional
-	 * fields and/or padding to every object. buffer_size contains the total
-	 * object size including these internal fields, the following two
-	 * variables contain the offset to the user object and its size.
-	 */
-	int obj_offset;
-	int obj_size;
-#endif
-	/*
-	 * We put nodelists[] at the end of kmem_cache, because we want to size
-	 * this array to nr_node_ids slots instead of MAX_NUMNODES
-	 * (see kmem_cache_init())
-	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of nodes.
-	 */
-	struct kmem_list3 *nodelists[MAX_NUMNODES];
-	/*
-	 * Do not add fields after nodelists[]
-	 */
-};
-
 #define CFLGS_OFF_SLAB		(0x80000000UL)
 #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
 
-- 
cgit v1.2.3-70-g09d2


From 7c692cbade8b8884f1c20500393bcc7cd6d24ef8 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Wed, 21 May 2008 22:53:13 +0200
Subject: tasklets: new tasklet scheduling function

Rationale: kmemcheck needs to be able to schedule a tasklet without
touching any dynamically allocated memory _at_ _all_ (since that would
lead to a recursive page fault). This tasklet is used for writing the
error reports to the kernel log.

The new scheduling function avoids touching any other tasklets by
inserting the new tasklist as the head of the "tasklet_hi" list instead
of on the tail.

Also don't wake up the softirq thread lest the scheduler access some
tracked memory and we go down with a recursive page fault.

In this case, we'd better just wait for the maximum time of 1/HZ for the
message to appear.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/interrupt.h | 14 ++++++++++++++
 kernel/softirq.c          | 11 +++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ff374ceface..dd574d51bca 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -466,6 +466,20 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 		__tasklet_hi_schedule(t);
 }
 
+extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
+
+/*
+ * This version avoids touching any other tasklets. Needed for kmemcheck
+ * in order not to take any page faults while enqueueing this tasklet;
+ * consider VERY carefully whether you really need this or
+ * tasklet_hi_schedule()...
+ */
+static inline void tasklet_hi_schedule_first(struct tasklet_struct *t)
+{
+	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+		__tasklet_hi_schedule_first(t);
+}
+
 
 static inline void tasklet_disable_nosync(struct tasklet_struct *t)
 {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 258885a543d..b41fb710e11 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -382,6 +382,17 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 
+void __tasklet_hi_schedule_first(struct tasklet_struct *t)
+{
+	BUG_ON(!irqs_disabled());
+
+	t->next = __get_cpu_var(tasklet_hi_vec).head;
+	__get_cpu_var(tasklet_hi_vec).head = t;
+	__raise_softirq_irqoff(HI_SOFTIRQ);
+}
+
+EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+
 static void tasklet_action(struct softirq_action *a)
 {
 	struct tasklet_struct *list;
-- 
cgit v1.2.3-70-g09d2


From dd14be4c274fc484eccace03ae9726e516630331 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Date: Fri, 5 Jun 2009 15:40:32 +0200
Subject: i2c-ocores: Can add I2C devices to the bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is sometimes a need for the ocores driver to add devices to the
bus when installed.

i2c_register_board_info can not always be used, because the I2C devices
 are not known at an early state, they could for instance be connected
 on a I2C bus on a PCI device which has the Open Cores IP.

i2c_new_device can not be used in all cases either since the resulting
bus nummer might be unknown.

The solution is the pass a list of I2C devices in the platform data to
the Open Cores driver. This is useful for MFD drivers.

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 Documentation/i2c/busses/i2c-ocores | 17 +++++++++++++++++
 drivers/i2c/busses/i2c-ocores.c     |  5 +++++
 include/linux/i2c-ocores.h          |  2 ++
 3 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index cfcebb10d14..c269aaa2f26 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -20,6 +20,8 @@ platform_device with the base address and interrupt number. The
 dev.platform_data of the device should also point to a struct
 ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the
 distance between registers and the input clock speed.
+There is also a possibility to attach a list of i2c_board_info which
+the i2c-ocores driver will add to the bus upon creation.
 
 E.G. something like:
 
@@ -36,9 +38,24 @@ static struct resource ocores_resources[] = {
 	},
 };
 
+/* optional board info */
+struct i2c_board_info ocores_i2c_board_info[] = {
+	{
+		I2C_BOARD_INFO("tsc2003", 0x48),
+		.platform_data = &tsc2003_platform_data,
+		.irq = TSC_IRQ
+	},
+	{
+		I2C_BOARD_INFO("adv7180", 0x42 >> 1),
+		.irq = ADV_IRQ
+	}
+};
+
 static struct ocores_i2c_platform_data myi2c_data = {
 	.regstep	= 2,		/* two bytes between registers */
 	.clock_khz	= 50000,	/* input clock of 50MHz */
+	.devices	= ocores_i2c_board_info, /* optional table of devices */
+	.num_devices	= ARRAY_SIZE(ocores_i2c_board_info), /* table size */
 };
 
 static struct platform_device myi2c = {
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index e5193bf7548..3542c6ba98f 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -216,6 +216,7 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
 	struct ocores_i2c_platform_data *pdata;
 	struct resource *res, *res2;
 	int ret;
+	int i;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -271,6 +272,10 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
 		goto add_adapter_failed;
 	}
 
+	/* add in known devices to the bus */
+	for (i = 0; i < pdata->num_devices; i++)
+		i2c_new_device(&i2c->adap, pdata->devices + i);
+
 	return 0;
 
 add_adapter_failed:
diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h
index 8ed591b0887..4d5e57ff661 100644
--- a/include/linux/i2c-ocores.h
+++ b/include/linux/i2c-ocores.h
@@ -14,6 +14,8 @@
 struct ocores_i2c_platform_data {
 	u32 regstep;   /* distance between registers */
 	u32 clock_khz; /* input clock in kHz */
+	u8 num_devices; /* number of devices in the devices list */
+	struct i2c_board_info const *devices; /* devices connected to the bus */
 };
 
 #endif /* _LINUX_I2C_OCORES_H */
-- 
cgit v1.2.3-70-g09d2


From cd6d95d8449b7c9f415f26041e9ae173d387b6bd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 Jun 2009 11:29:27 +0200
Subject: clocksource: prevent selection of low resolution clocksourse also for
 nohz=on

commit 3f68535adad (clocksource: sanity check sysfs clocksource
changes) prevents selection of non high resolution capable
clocksources when high resolution mode is active, but did not take
into account that the same rules apply for highres=off nohz=on.

Check the tick device mode instead of hrtimer_hres_active() to verify
whether the system needs to be protected from a switch to jiffies or
other non highres capable clock sources.

Reported-by: Luming Yu <luming.yu@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h    |  2 +-
 include/linux/tick.h       |  3 +++
 kernel/hrtimer.c           |  4 ++--
 kernel/time/clocksource.c  | 18 ++++++++++--------
 kernel/time/tick-oneshot.c | 17 +++++++++++++++++
 5 files changed, 33 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 58021b0c396..0d2f7c8a33d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -305,7 +305,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
 
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
-extern int hrtimer_hres_active(void);
+
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 469b82d88b3..0482229c07d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -97,10 +97,12 @@ extern void tick_clock_notify(void);
 extern int tick_check_oneshot_change(int allow_nohz);
 extern struct tick_sched *tick_get_tick_sched(int cpu);
 extern void tick_check_idle(int cpu);
+extern int tick_oneshot_mode_active(void);
 # else
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 static inline void tick_check_idle(int cpu) { }
+static inline int tick_oneshot_mode_active(void) { return 0; }
 # endif
 
 #else /* CONFIG_GENERIC_CLOCKEVENTS */
@@ -109,6 +111,7 @@ static inline void tick_cancel_sched_timer(int cpu) { }
 static inline void tick_clock_notify(void) { }
 static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 static inline void tick_check_idle(int cpu) { }
+static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1a70c18cdff..cb8a15c1958 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -476,7 +476,7 @@ static inline int hrtimer_is_hres_enabled(void)
 /*
  * Is the high resolution mode active ?
  */
-int hrtimer_hres_active(void)
+static inline int hrtimer_hres_active(void)
 {
 	return __get_cpu_var(hrtimer_bases).hres_active;
 }
@@ -704,7 +704,7 @@ static int hrtimer_switch_to_hres(void)
 
 #else
 
-int hrtimer_hres_active(void) { return 0; }
+static inline int hrtimer_hres_active(void) { return 0; }
 static inline int hrtimer_is_hres_enabled(void) { return 0; }
 static inline int hrtimer_switch_to_hres(void) { return 0; }
 static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 18b9f5da4ee..592bf584d1d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -30,7 +30,6 @@
 #include <linux/module.h>
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
-#include <linux/hrtimer.h>
 
 void timecounter_init(struct timecounter *tc,
 		      const struct cyclecounter *cc,
@@ -511,13 +510,13 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 	}
 
 	/*
-	 * Check to make sure we don't switch to a non-HRT usable
-	 * clocksource if HRT is enabled and running
+	 * Check to make sure we don't switch to a non-highres capable
+	 * clocksource if the tick code is in oneshot mode (highres or nohz)
 	 */
-	if (hrtimer_hres_active() &&
+	if (tick_oneshot_mode_active() &&
 	    !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
 		printk(KERN_WARNING "%s clocksource is not HRT compatible. "
-			"Cannot switch while in HRT mode\n", ovr->name);
+			"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
 		ovr = NULL;
 		override_name[0] = 0;
 	}
@@ -550,9 +549,12 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 
 	spin_lock_irq(&clocksource_lock);
 	list_for_each_entry(src, &clocksource_list, list) {
-		/* Don't show non-HRES clocksource if HRES is enabled */
-		if (!hrtimer_hres_active() ||
-				(src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
+		/*
+		 * Don't show non-HRES clocksource if the tick code is
+		 * in one shot mode (highres=on or nohz=on)
+		 */
+		if (!tick_oneshot_mode_active() ||
+		    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 			count += snprintf(buf + count,
 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 				  "%s ", src->name);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 2e8de678e76..a96c0e2b89c 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -128,6 +128,23 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
 	return 0;
 }
 
+/**
+ * tick_check_oneshot_mode - check whether the system is in oneshot mode
+ *
+ * returns 1 when either nohz or highres are enabled. otherwise 0.
+ */
+int tick_oneshot_mode_active(void)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
+	local_irq_restore(flags);
+
+	return ret;
+}
+
 #ifdef CONFIG_HIGH_RES_TIMERS
 /**
  * tick_init_highres - switch to high resolution mode
-- 
cgit v1.2.3-70-g09d2


From a0891aa6a635f658f29bb061a00d6d3486941519 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 13 Jun 2009 12:26:29 +0200
Subject: netfilter: conntrack: move event caching to conntrack extension
 infrastructure

This patch reworks the per-cpu event caching to use the conntrack
extension infrastructure.

The main drawback is that we consume more memory per conntrack
if event delivery is enabled. This patch is required by the
reliable event delivery that follows to this patch.

BTW, this patch allows you to enable/disable event delivery via
/proc/sys/net/netfilter/nf_conntrack_events in runtime, although
you can still disable event caching as compilation option.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_ecache.h | 130 ++++++++++---------
 include/net/netfilter/nf_conntrack_extend.h |   2 +
 include/net/netns/conntrack.h               |   5 +-
 net/netfilter/nf_conntrack_core.c           |  15 +--
 net/netfilter/nf_conntrack_ecache.c         | 185 ++++++++++++++++++----------
 net/netfilter/nf_conntrack_netlink.c        |  49 ++++----
 6 files changed, 223 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 1afb907e015..e7ae297ba38 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -6,61 +6,52 @@
 #define _NF_CONNTRACK_ECACHE_H
 #include <net/netfilter/nf_conntrack.h>
 
-#include <linux/interrupt.h>
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 
-/* Connection tracking event bits */
+/* Connection tracking event types */
 enum ip_conntrack_events
 {
-	/* New conntrack */
-	IPCT_NEW_BIT = 0,
-	IPCT_NEW = (1 << IPCT_NEW_BIT),
-
-	/* Expected connection */
-	IPCT_RELATED_BIT = 1,
-	IPCT_RELATED = (1 << IPCT_RELATED_BIT),
-
-	/* Destroyed conntrack */
-	IPCT_DESTROY_BIT = 2,
-	IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
-
-	/* Status has changed */
-	IPCT_STATUS_BIT = 3,
-	IPCT_STATUS = (1 << IPCT_STATUS_BIT),
+	IPCT_NEW		= 0,	/* new conntrack */
+	IPCT_RELATED		= 1,	/* related conntrack */
+	IPCT_DESTROY		= 2,	/* destroyed conntrack */
+	IPCT_STATUS		= 3,	/* status has changed */
+	IPCT_PROTOINFO		= 4,	/* protocol information has changed */
+	IPCT_HELPER		= 5,	/* new helper has been set */
+	IPCT_MARK		= 6,	/* new mark has been set */
+	IPCT_NATSEQADJ		= 7,	/* NAT is doing sequence adjustment */
+	IPCT_SECMARK		= 8,	/* new security mark has been set */
+};
 
-	/* Update of protocol info */
-	IPCT_PROTOINFO_BIT = 4,
-	IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
+enum ip_conntrack_expect_events {
+	IPEXP_NEW		= 0,	/* new expectation */
+};
 
-	/* New helper for conntrack */
-	IPCT_HELPER_BIT = 5,
-	IPCT_HELPER = (1 << IPCT_HELPER_BIT),
+struct nf_conntrack_ecache {
+	unsigned long cache;		/* bitops want long */
+};
 
-	/* Mark is set */
-	IPCT_MARK_BIT = 6,
-	IPCT_MARK = (1 << IPCT_MARK_BIT),
+static inline struct nf_conntrack_ecache *
+nf_ct_ecache_find(const struct nf_conn *ct)
+{
+	return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+}
 
-	/* NAT sequence adjustment */
-	IPCT_NATSEQADJ_BIT = 7,
-	IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
+static inline struct nf_conntrack_ecache *
+nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+	struct net *net = nf_ct_net(ct);
 
-	/* Secmark is set */
-	IPCT_SECMARK_BIT = 8,
-	IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
-};
+	if (!net->ct.sysctl_events)
+		return NULL;
 
-enum ip_conntrack_expect_events {
-	IPEXP_NEW_BIT = 0,
-	IPEXP_NEW = (1 << IPEXP_NEW_BIT),
+	return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
 };
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-struct nf_conntrack_ecache {
-	struct nf_conn *ct;
-	unsigned int events;
-};
-
 /* This structure is passed to event handler */
 struct nf_ct_event {
 	struct nf_conn *ct;
@@ -76,30 +67,30 @@ extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
 extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
 extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
-extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
-extern void __nf_ct_event_cache_init(struct nf_conn *ct);
-extern void nf_ct_event_cache_flush(struct net *net);
+extern void nf_ct_deliver_cached_events(struct nf_conn *ct);
 
 static inline void
 nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 {
-	struct net *net = nf_ct_net(ct);
-	struct nf_conntrack_ecache *ecache;
-
-	local_bh_disable();
-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
-	if (ct != ecache->ct)
-		__nf_ct_event_cache_init(ct);
-	ecache->events |= event;
-	local_bh_enable();
+	struct nf_conntrack_ecache *e;
+
+	if (nf_conntrack_event_cb == NULL)
+		return;
+
+	e = nf_ct_ecache_find(ct);
+	if (e == NULL)
+		return;
+
+	set_bit(event, &e->cache);
 }
 
 static inline void
-nf_conntrack_event_report(enum ip_conntrack_events event,
-			  struct nf_conn *ct,
-			  u32 pid,
-			  int report)
+nf_conntrack_eventmask_report(unsigned int eventmask,
+			      struct nf_conn *ct,
+			      u32 pid,
+			      int report)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_ct_event_notifier *notify;
 
 	rcu_read_lock();
@@ -107,22 +98,32 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
 	if (notify == NULL)
 		goto out_unlock;
 
+	if (!net->ct.sysctl_events)
+		goto out_unlock;
+
 	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
 		struct nf_ct_event item = {
 			.ct 	= ct,
 			.pid	= pid,
 			.report = report
 		};
-		notify->fcn(event, &item);
+		notify->fcn(eventmask, &item);
 	}
 out_unlock:
 	rcu_read_unlock();
 }
 
+static inline void
+nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
+			  u32 pid, int report)
+{
+	nf_conntrack_eventmask_report(1 << event, ct, pid, report);
+}
+
 static inline void
 nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
 {
-	nf_conntrack_event_report(event, ct, 0, 0);
+	nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
 }
 
 struct nf_exp_event {
@@ -145,6 +146,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 			  u32 pid,
 			  int report)
 {
+	struct net *net = nf_ct_exp_net(exp);
 	struct nf_exp_event_notifier *notify;
 
 	rcu_read_lock();
@@ -152,13 +154,16 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 	if (notify == NULL)
 		goto out_unlock;
 
+	if (!net->ct.sysctl_events)
+		goto out_unlock;
+
 	{
 		struct nf_exp_event item = {
 			.exp	= exp,
 			.pid	= pid,
 			.report = report
 		};
-		notify->fcn(event, &item);
+		notify->fcn(1 << event, &item);
 	}
 out_unlock:
 	rcu_read_unlock();
@@ -178,6 +183,10 @@ extern void nf_conntrack_ecache_fini(struct net *net);
 
 static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
 					    struct nf_conn *ct) {}
+static inline void nf_conntrack_eventmask_report(unsigned int eventmask,
+						 struct nf_conn *ct,
+						 u32 pid,
+						 int report) {}
 static inline void nf_conntrack_event(enum ip_conntrack_events event,
 				      struct nf_conn *ct) {}
 static inline void nf_conntrack_event_report(enum ip_conntrack_events event,
@@ -191,7 +200,6 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
 					     struct nf_conntrack_expect *exp,
  					     u32 pid,
  					     int report) {}
-static inline void nf_ct_event_cache_flush(struct net *net) {}
 
 static inline int nf_conntrack_ecache_init(struct net *net)
 {
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index da8ee52613a..7f8fc5d123c 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -8,12 +8,14 @@ enum nf_ct_ext_id
 	NF_CT_EXT_HELPER,
 	NF_CT_EXT_NAT,
 	NF_CT_EXT_ACCT,
+	NF_CT_EXT_ECACHE,
 	NF_CT_EXT_NUM,
 };
 
 #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
 #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
+#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 9dc58402bc0..505a51cd8c6 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -15,15 +15,14 @@ struct netns_ct {
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
 	struct ip_conntrack_stat *stat;
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	struct nf_conntrack_ecache *ecache;
-#endif
+	int			sysctl_events;
 	int			sysctl_acct;
 	int			sysctl_checksum;
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 	struct ctl_table_header	*acct_sysctl_header;
+	struct ctl_table_header	*event_sysctl_header;
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d8dffe7ab50..bcacbb5373c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -577,6 +578,7 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
@@ -1031,8 +1033,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 static void nf_conntrack_cleanup_net(struct net *net)
 {
-	nf_ct_event_cache_flush(net);
-	nf_conntrack_ecache_fini(net);
  i_see_dead_people:
 	nf_ct_iterate_cleanup(net, kill_all, NULL);
 	if (atomic_read(&net->ct.count) != 0) {
@@ -1045,6 +1045,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
+	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
 	free_percpu(net->ct.stat);
@@ -1220,9 +1221,6 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
-	ret = nf_conntrack_ecache_init(net);
-	if (ret < 0)
-		goto err_ecache;
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1236,6 +1234,9 @@ static int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_acct_init(net);
 	if (ret < 0)
 		goto err_acct;
+	ret = nf_conntrack_ecache_init(net);
+	if (ret < 0)
+		goto err_ecache;
 
 	/* Set up fake conntrack:
 	    - to never be deleted, not in any hashes */
@@ -1248,14 +1249,14 @@ static int nf_conntrack_init_net(struct net *net)
 
 	return 0;
 
+err_ecache:
+	nf_conntrack_acct_fini(net);
 err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
-	nf_conntrack_ecache_fini(net);
-err_ecache:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5516b3e64b4..683281b7804 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -21,6 +21,7 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
@@ -32,94 +33,38 @@ EXPORT_SYMBOL_GPL(nf_expect_event_cb);
 
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
-static inline void
-__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
 {
+	unsigned long events;
 	struct nf_ct_event_notifier *notify;
+	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
 	notify = rcu_dereference(nf_conntrack_event_cb);
 	if (notify == NULL)
 		goto out_unlock;
 
-	if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
-	    && ecache->events) {
+	e = nf_ct_ecache_find(ct);
+	if (e == NULL)
+		goto out_unlock;
+
+	events = xchg(&e->cache, 0);
+
+	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && events) {
 		struct nf_ct_event item = {
-			.ct 	= ecache->ct,
+			.ct	= ct,
 			.pid	= 0,
 			.report	= 0
 		};
 
-		notify->fcn(ecache->events, &item);
+		notify->fcn(events, &item);
 	}
 
-	ecache->events = 0;
-	nf_ct_put(ecache->ct);
-	ecache->ct = NULL;
-
 out_unlock:
 	rcu_read_unlock();
 }
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling for freeing the skb */
-void nf_ct_deliver_cached_events(const struct nf_conn *ct)
-{
-	struct net *net = nf_ct_net(ct);
-	struct nf_conntrack_ecache *ecache;
-
-	local_bh_disable();
-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
-	if (ecache->ct == ct)
-		__nf_ct_deliver_cached_events(ecache);
-	local_bh_enable();
-}
 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 
-/* Deliver cached events for old pending events, if current conntrack != old */
-void __nf_ct_event_cache_init(struct nf_conn *ct)
-{
-	struct net *net = nf_ct_net(ct);
-	struct nf_conntrack_ecache *ecache;
-
-	/* take care of delivering potentially old events */
-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
-	BUG_ON(ecache->ct == ct);
-	if (ecache->ct)
-		__nf_ct_deliver_cached_events(ecache);
-	/* initialize for this conntrack/packet */
-	ecache->ct = ct;
-	nf_conntrack_get(&ct->ct_general);
-}
-EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
-
-/* flush the event cache - touches other CPU's data and must not be called
- * while packets are still passing through the code */
-void nf_ct_event_cache_flush(struct net *net)
-{
-	struct nf_conntrack_ecache *ecache;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		ecache = per_cpu_ptr(net->ct.ecache, cpu);
-		if (ecache->ct)
-			nf_ct_put(ecache->ct);
-	}
-}
-
-int nf_conntrack_ecache_init(struct net *net)
-{
-	net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
-	if (!net->ct.ecache)
-		return -ENOMEM;
-	return 0;
-}
-
-void nf_conntrack_ecache_fini(struct net *net)
-{
-	free_percpu(net->ct.ecache);
-}
-
 int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
@@ -185,3 +130,107 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
+
+#define NF_CT_EVENTS_DEFAULT 1
+static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table event_sysctl_table[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nf_conntrack_events",
+		.data		= &init_net.ct.sysctl_events,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type event_extend __read_mostly = {
+	.len	= sizeof(struct nf_conntrack_ecache),
+	.align	= __alignof__(struct nf_conntrack_ecache),
+	.id	= NF_CT_EXT_ECACHE,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_event_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_events;
+
+	net->ct.event_sysctl_header =
+		register_net_sysctl_table(net,
+					  nf_net_netfilter_sysctl_path, table);
+	if (!net->ct.event_sysctl_header) {
+		printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
+		goto out_register;
+	}
+	return 0;
+
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
+
+static void nf_conntrack_event_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ct.event_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.event_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_event_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_event_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
+
+int nf_conntrack_ecache_init(struct net *net)
+{
+	int ret;
+
+	net->ct.sysctl_events = nf_ct_events;
+
+	if (net_eq(net, &init_net)) {
+		ret = nf_ct_extend_register(&event_extend);
+		if (ret < 0) {
+			printk(KERN_ERR "nf_ct_event: Unable to register "
+					"event extension.\n");
+			goto out_extend_register;
+		}
+	}
+
+	ret = nf_conntrack_event_init_sysctl(net);
+	if (ret < 0)
+		goto out_sysctl;
+
+	return 0;
+
+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&event_extend);
+out_extend_register:
+	return ret;
+}
+
+void nf_conntrack_ecache_fini(struct net *net)
+{
+	nf_conntrack_event_fini_sysctl(net);
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&event_extend);
+}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e503ada572..19706eff164 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -468,10 +468,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	if (ct == &nf_conntrack_untracked)
 		return 0;
 
-	if (events & IPCT_DESTROY) {
+	if (events & (1 << IPCT_DESTROY)) {
 		type = IPCTNL_MSG_CT_DELETE;
 		group = NFNLGRP_CONNTRACK_DESTROY;
-	} else  if (events & (IPCT_NEW | IPCT_RELATED)) {
+	} else  if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
 		type = IPCTNL_MSG_CT_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 		group = NFNLGRP_CONNTRACK_NEW;
@@ -519,7 +519,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	if (ctnetlink_dump_status(skb, ct) < 0)
 		goto nla_put_failure;
 
-	if (events & IPCT_DESTROY) {
+	if (events & (1 << IPCT_DESTROY)) {
 		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
 		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
 			goto nla_put_failure;
@@ -527,31 +527,31 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		if (ctnetlink_dump_timeout(skb, ct) < 0)
 			goto nla_put_failure;
 
-		if (events & IPCT_PROTOINFO
+		if (events & (1 << IPCT_PROTOINFO)
 		    && ctnetlink_dump_protoinfo(skb, ct) < 0)
 			goto nla_put_failure;
 
-		if ((events & IPCT_HELPER || nfct_help(ct))
+		if ((events & (1 << IPCT_HELPER) || nfct_help(ct))
 		    && ctnetlink_dump_helpinfo(skb, ct) < 0)
 			goto nla_put_failure;
 
 #ifdef CONFIG_NF_CONNTRACK_SECMARK
-		if ((events & IPCT_SECMARK || ct->secmark)
+		if ((events & (1 << IPCT_SECMARK) || ct->secmark)
 		    && ctnetlink_dump_secmark(skb, ct) < 0)
 			goto nla_put_failure;
 #endif
 
-		if (events & IPCT_RELATED &&
+		if (events & (1 << IPCT_RELATED) &&
 		    ctnetlink_dump_master(skb, ct) < 0)
 			goto nla_put_failure;
 
-		if (events & IPCT_NATSEQADJ &&
+		if (events & (1 << IPCT_NATSEQADJ) &&
 		    ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
 			goto nla_put_failure;
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if ((events & IPCT_MARK || ct->mark)
+	if ((events & (1 << IPCT_MARK) || ct->mark)
 	    && ctnetlink_dump_mark(skb, ct) < 0)
 		goto nla_put_failure;
 #endif
@@ -1253,6 +1253,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
@@ -1340,13 +1341,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			else
 				events = IPCT_NEW;
 
-			nf_conntrack_event_report(IPCT_STATUS |
-						  IPCT_HELPER |
-						  IPCT_PROTOINFO |
-						  IPCT_NATSEQADJ |
-						  IPCT_MARK | events,
-						  ct, NETLINK_CB(skb).pid,
-						  nlmsg_report(nlh));
+			nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+						      (1 << IPCT_HELPER) |
+						      (1 << IPCT_PROTOINFO) |
+						      (1 << IPCT_NATSEQADJ) |
+						      (1 << IPCT_MARK) | events,
+						      ct, NETLINK_CB(skb).pid,
+						      nlmsg_report(nlh));
 			nf_ct_put(ct);
 		} else
 			spin_unlock_bh(&nf_conntrack_lock);
@@ -1365,13 +1366,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (err == 0) {
 			nf_conntrack_get(&ct->ct_general);
 			spin_unlock_bh(&nf_conntrack_lock);
-			nf_conntrack_event_report(IPCT_STATUS |
-						  IPCT_HELPER |
-						  IPCT_PROTOINFO |
-						  IPCT_NATSEQADJ |
-						  IPCT_MARK,
-						  ct, NETLINK_CB(skb).pid,
-						  nlmsg_report(nlh));
+			nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+						      (1 << IPCT_HELPER) |
+						      (1 << IPCT_PROTOINFO) |
+						      (1 << IPCT_NATSEQADJ) |
+						      (1 << IPCT_MARK),
+						      ct, NETLINK_CB(skb).pid,
+						      nlmsg_report(nlh));
 			nf_ct_put(ct);
 		} else
 			spin_unlock_bh(&nf_conntrack_lock);
@@ -1515,7 +1516,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	unsigned int type;
 	int flags = 0;
 
-	if (events & IPEXP_NEW) {
+	if (events & (1 << IPEXP_NEW)) {
 		type = IPCTNL_MSG_EXP_NEW;
 		flags = NLM_F_CREATE|NLM_F_EXCL;
 	} else
-- 
cgit v1.2.3-70-g09d2


From 9858a3ae1d4b390fbaa9c30b83cb66d861b76294 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 13 Jun 2009 12:28:22 +0200
Subject: netfilter: conntrack: move helper destruction to
 nf_ct_helper_destroy()

This patch moves the helper destruction to a function that lives
in nf_conntrack_helper.c. This new function is used in the patch
to add ctnetlink reliable event delivery.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_helper.h |  2 ++
 net/netfilter/nf_conntrack_core.c           | 11 +----------
 net/netfilter/nf_conntrack_helper.c         | 14 ++++++++++++++
 3 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index ee2a4b369a0..1b706800092 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -50,6 +50,8 @@ extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
 extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags);
 
+extern void nf_ct_helper_destroy(struct nf_conn *ct);
+
 static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 {
 	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bcacbb5373c..14235b144cb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -224,17 +224,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
 	struct net *net = nf_ct_net(ct);
-	struct nf_conn_help *help = nfct_help(ct);
-	struct nf_conntrack_helper *helper;
-
-	if (help) {
-		rcu_read_lock();
-		helper = rcu_dereference(help->helper);
-		if (helper && helper->destroy)
-			helper->destroy(ct);
-		rcu_read_unlock();
-	}
 
+	nf_ct_helper_destroy(ct);
 	spin_lock_bh(&nf_conntrack_lock);
 	/* Inside lock so preempt is disabled on module removal path.
 	 * Otherwise we can get spurious warnings. */
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0fa5a422959..65c2a7bc3af 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -136,6 +136,20 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 	return 0;
 }
 
+void nf_ct_helper_destroy(struct nf_conn *ct)
+{
+	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_helper *helper;
+
+	if (help) {
+		rcu_read_lock();
+		helper = rcu_dereference(help->helper);
+		if (helper && helper->destroy)
+			helper->destroy(ct);
+		rcu_read_unlock();
+	}
+}
+
 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 {
 	unsigned int h = helper_hash(&me->tuple);
-- 
cgit v1.2.3-70-g09d2


From d219dce76c64f2c883dad0537fa09a56d5ff0a10 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 13 Jun 2009 12:28:57 +0200
Subject: list_nulls: add hlist_nulls_add_head and hlist_nulls_del

This patch adds the hlist_nulls_add_head() function which is
based on hlist_nulls_add_head_rcu() but without the use of
rcu_assign_pointer(). It also adds hlist_nulls_del which is
exactly the same like hlist_nulls_del_rcu().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/list_nulls.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index 93150ecf3ea..5d10ae364b5 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -56,6 +56,18 @@ static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
 	return is_a_nulls(h->first);
 }
 
+static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
+					struct hlist_nulls_head *h)
+{
+	struct hlist_nulls_node *first = h->first;
+
+	n->next = first;
+	n->pprev = &h->first;
+	h->first = n;
+	if (!is_a_nulls(first))
+		first->pprev = &n->next;
+}
+
 static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
 {
 	struct hlist_nulls_node *next = n->next;
@@ -65,6 +77,12 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
 		next->pprev = pprev;
 }
 
+static inline void hlist_nulls_del(struct hlist_nulls_node *n)
+{
+	__hlist_nulls_del(n);
+	n->pprev = LIST_POISON2;
+}
+
 /**
  * hlist_nulls_for_each_entry	- iterate over list of given type
  * @tpos:	the type * to use as a loop cursor.
-- 
cgit v1.2.3-70-g09d2


From dd7669a92c6066b2b31bae7e04cd787092920883 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 13 Jun 2009 12:30:52 +0200
Subject: netfilter: conntrack: optional reliable conntrack event delivery

This patch improves ctnetlink event reliability if one broadcast
listener has set the NETLINK_BROADCAST_ERROR socket option.

The logic is the following: if an event delivery fails, we keep
the undelivered events in the missed event cache. Once the next
packet arrives, we add the new events (if any) to the missed
events in the cache and we try a new delivery, and so on. Thus,
if ctnetlink fails to deliver an event, we try to deliver them
once we see a new packet. Therefore, we may lose state
transitions but the userspace process gets in sync at some point.

At worst case, if no events were delivered to userspace, we make
sure that destroy events are successfully delivered. Basically,
if ctnetlink fails to deliver the destroy event, we remove the
conntrack entry from the hashes and we insert them in the dying
list, which contains inactive entries. Then, the conntrack timer
is added with an extra grace timeout of random32() % 15 seconds
to trigger the event again (this grace timeout is tunable via
/proc). The use of a limited random timeout value allows
distributing the "destroy" resends, thus, avoiding accumulating
lots "destroy" events at the same time. Event delivery may
re-order but we can identify them by means of the tuple plus
the conntrack ID.

The maximum number of conntrack entries (active or inactive) is
still handled by nf_conntrack_max. Thus, we may start dropping
packets at some point if we accumulate a lot of inactive conntrack
entries that did not successfully report the destroy event to
userspace.

During my stress tests consisting of setting a very small buffer
of 2048 bytes for conntrackd and the NETLINK_BROADCAST_ERROR socket
flag, and generating lots of very small connections, I noticed
very few destroy entries on the fly waiting to be resend.

A simple way to test this patch consist of creating a lot of
entries, set a very small Netlink buffer in conntrackd (+ a patch
which is not in the git tree to set the BROADCAST_ERROR flag)
and invoke `conntrack -F'.

For expectations, no changes are introduced in this patch.
Currently, event delivery is only done for new expectations (no
events from expectation expiration, removal and confirmation).
In that case, they need a per-expectation event cache to implement
the same idea that is exposed in this patch.

This patch can be useful to provide reliable flow-accouting. We
still have to add a new conntrack extension to store the creation
and destroy time.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h        |  2 +
 include/net/netfilter/nf_conntrack_ecache.h | 61 ++++++++++++++------
 include/net/netns/conntrack.h               |  2 +
 net/netfilter/nf_conntrack_core.c           | 89 +++++++++++++++++++++++++----
 net/netfilter/nf_conntrack_ecache.c         | 28 ++++++++-
 net/netfilter/nf_conntrack_netlink.c        | 19 ++++--
 6 files changed, 166 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ecc79f95907..a632689b61b 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -201,6 +201,8 @@ extern struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
 
 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
+extern void nf_ct_delete_from_lists(struct nf_conn *ct);
+extern void nf_ct_insert_dying_list(struct nf_conn *ct);
 
 extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
 
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index e7ae297ba38..4f20d58e2ab 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -32,6 +32,8 @@ enum ip_conntrack_expect_events {
 
 struct nf_conntrack_ecache {
 	unsigned long cache;		/* bitops want long */
+	unsigned long missed;		/* missed events */
+	u32 pid;			/* netlink pid of destroyer */
 };
 
 static inline struct nf_conntrack_ecache *
@@ -84,14 +86,16 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	set_bit(event, &e->cache);
 }
 
-static inline void
+static inline int
 nf_conntrack_eventmask_report(unsigned int eventmask,
 			      struct nf_conn *ct,
 			      u32 pid,
 			      int report)
 {
+	int ret = 0;
 	struct net *net = nf_ct_net(ct);
 	struct nf_ct_event_notifier *notify;
+	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
 	notify = rcu_dereference(nf_conntrack_event_cb);
@@ -101,29 +105,52 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 	if (!net->ct.sysctl_events)
 		goto out_unlock;
 
+	e = nf_ct_ecache_find(ct);
+	if (e == NULL)
+		goto out_unlock;
+
 	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
 		struct nf_ct_event item = {
 			.ct 	= ct,
-			.pid	= pid,
+			.pid	= e->pid ? e->pid : pid,
 			.report = report
 		};
-		notify->fcn(eventmask, &item);
+		/* This is a resent of a destroy event? If so, skip missed */
+		unsigned long missed = e->pid ? 0 : e->missed;
+
+		ret = notify->fcn(eventmask | missed, &item);
+		if (unlikely(ret < 0 || missed)) {
+			spin_lock_bh(&ct->lock);
+			if (ret < 0) {
+				/* This is a destroy event that has been
+				 * triggered by a process, we store the PID
+				 * to include it in the retransmission. */
+				if (eventmask & (1 << IPCT_DESTROY) &&
+				    e->pid == 0 && pid != 0)
+					e->pid = pid;
+				else
+					e->missed |= eventmask;
+			} else
+				e->missed &= ~missed;
+			spin_unlock_bh(&ct->lock);
+		}
 	}
 out_unlock:
 	rcu_read_unlock();
+	return ret;
 }
 
-static inline void
+static inline int
 nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
 			  u32 pid, int report)
 {
-	nf_conntrack_eventmask_report(1 << event, ct, pid, report);
+	return nf_conntrack_eventmask_report(1 << event, ct, pid, report);
 }
 
-static inline void
+static inline int
 nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
 {
-	nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
+	return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
 }
 
 struct nf_exp_event {
@@ -183,16 +210,16 @@ extern void nf_conntrack_ecache_fini(struct net *net);
 
 static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
 					    struct nf_conn *ct) {}
-static inline void nf_conntrack_eventmask_report(unsigned int eventmask,
-						 struct nf_conn *ct,
-						 u32 pid,
-						 int report) {}
-static inline void nf_conntrack_event(enum ip_conntrack_events event,
-				      struct nf_conn *ct) {}
-static inline void nf_conntrack_event_report(enum ip_conntrack_events event,
-					     struct nf_conn *ct,
-					     u32 pid,
-					     int report) {}
+static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
+						struct nf_conn *ct,
+						u32 pid,
+						int report) { return 0; }
+static inline int nf_conntrack_event(enum ip_conntrack_events event,
+				     struct nf_conn *ct) { return 0; }
+static inline int nf_conntrack_event_report(enum ip_conntrack_events event,
+					    struct nf_conn *ct,
+					    u32 pid,
+					    int report) { return 0; }
 static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
 static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
 				      struct nf_conntrack_expect *exp) {}
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 505a51cd8c6..ba1ba0c5efd 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -14,8 +14,10 @@ struct netns_ct {
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
+	struct hlist_nulls_head	dying;
 	struct ip_conntrack_stat *stat;
 	int			sysctl_events;
+	unsigned int		sysctl_events_retry_timeout;
 	int			sysctl_acct;
 	int			sysctl_checksum;
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 14235b144cb..5f72b94b491 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -183,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
 	NF_CT_ASSERT(!timer_pending(&ct->timeout));
 
-	if (!test_bit(IPS_DYING_BIT, &ct->status))
-		nf_conntrack_event(IPCT_DESTROY, ct);
-	set_bit(IPS_DYING_BIT, &ct->status);
-
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to nf_conntrack_lock!!! -HW */
@@ -220,9 +216,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	nf_conntrack_free(ct);
 }
 
-static void death_by_timeout(unsigned long ul_conntrack)
+void nf_ct_delete_from_lists(struct nf_conn *ct)
 {
-	struct nf_conn *ct = (void *)ul_conntrack;
 	struct net *net = nf_ct_net(ct);
 
 	nf_ct_helper_destroy(ct);
@@ -232,6 +227,59 @@ static void death_by_timeout(unsigned long ul_conntrack)
 	NF_CT_STAT_INC(net, delete_list);
 	clean_from_lists(ct);
 	spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
+
+static void death_by_event(unsigned long ul_conntrack)
+{
+	struct nf_conn *ct = (void *)ul_conntrack;
+	struct net *net = nf_ct_net(ct);
+
+	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
+		/* bad luck, let's retry again */
+		ct->timeout.expires = jiffies +
+			(random32() % net->ct.sysctl_events_retry_timeout);
+		add_timer(&ct->timeout);
+		return;
+	}
+	/* we've got the event delivered, now it's dying */
+	set_bit(IPS_DYING_BIT, &ct->status);
+	spin_lock(&nf_conntrack_lock);
+	hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+	spin_unlock(&nf_conntrack_lock);
+	nf_ct_put(ct);
+}
+
+void nf_ct_insert_dying_list(struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+
+	/* add this conntrack to the dying list */
+	spin_lock_bh(&nf_conntrack_lock);
+	hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+			     &net->ct.dying);
+	spin_unlock_bh(&nf_conntrack_lock);
+	/* set a new timer to retry event delivery */
+	setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
+	ct->timeout.expires = jiffies +
+		(random32() % net->ct.sysctl_events_retry_timeout);
+	add_timer(&ct->timeout);
+}
+EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+	struct nf_conn *ct = (void *)ul_conntrack;
+
+	if (!test_bit(IPS_DYING_BIT, &ct->status) &&
+	    unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+		/* destroy event was not delivered */
+		nf_ct_delete_from_lists(ct);
+		nf_ct_insert_dying_list(ct);
+		return;
+	}
+	set_bit(IPS_DYING_BIT, &ct->status);
+	nf_ct_delete_from_lists(ct);
 	nf_ct_put(ct);
 }
 
@@ -982,11 +1030,13 @@ static int kill_report(struct nf_conn *i, void *data)
 {
 	struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
 
-	/* get_next_corpse sets the dying bit for us */
-	nf_conntrack_event_report(IPCT_DESTROY,
-				  i,
-				  fr->pid,
-				  fr->report);
+	/* If we fail to deliver the event, death_by_timeout() will retry */
+	if (nf_conntrack_event_report(IPCT_DESTROY, i,
+				      fr->pid, fr->report) < 0)
+		return 1;
+
+	/* Avoid the delivery of the destroy event in death_by_timeout(). */
+	set_bit(IPS_DYING_BIT, &i->status);
 	return 1;
 }
 
@@ -1015,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
 
+static void nf_ct_release_dying_list(void)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
+	struct hlist_nulls_node *n;
+
+	spin_lock_bh(&nf_conntrack_lock);
+	hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		/* never fails to remove them, no listeners at this point */
+		nf_ct_kill(ct);
+	}
+	spin_unlock_bh(&nf_conntrack_lock);
+}
+
 static void nf_conntrack_cleanup_init_net(void)
 {
 	nf_conntrack_helper_fini();
@@ -1026,6 +1091,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 {
  i_see_dead_people:
 	nf_ct_iterate_cleanup(net, kill_all, NULL);
+	nf_ct_release_dying_list();
 	if (atomic_read(&net->ct.count) != 0) {
 		schedule();
 		goto i_see_dead_people;
@@ -1207,6 +1273,7 @@ static int nf_conntrack_init_net(struct net *net)
 
 	atomic_set(&net->ct.count, 0);
 	INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
+	INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
 	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
 	if (!net->ct.stat) {
 		ret = -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 683281b7804..aee560b4768 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -56,8 +56,21 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
 			.pid	= 0,
 			.report	= 0
 		};
-
-		notify->fcn(events, &item);
+		int ret;
+		/* We make a copy of the missed event cache without taking
+		 * the lock, thus we may send missed events twice. However,
+		 * this does not harm and it happens very rarely. */
+		unsigned long missed = e->missed;
+
+		ret = notify->fcn(events | missed, &item);
+		if (unlikely(ret < 0 || missed)) {
+			spin_lock_bh(&ct->lock);
+			if (ret < 0)
+				e->missed |= events;
+			else
+				e->missed &= ~missed;
+			spin_unlock_bh(&ct->lock);
+		} 
 	}
 
 out_unlock:
@@ -133,6 +146,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 
 #define NF_CT_EVENTS_DEFAULT 1
 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table event_sysctl_table[] = {
@@ -144,6 +158,14 @@ static struct ctl_table event_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nf_conntrack_events_retry_timeout",
+		.data		= &init_net.ct.sysctl_events_retry_timeout,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
 	{}
 };
 #endif /* CONFIG_SYSCTL */
@@ -165,6 +187,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
 		goto out;
 
 	table[0].data = &net->ct.sysctl_events;
+	table[1].data = &net->ct.sysctl_events_retry_timeout;
 
 	net->ct.event_sysctl_header =
 		register_net_sysctl_table(net,
@@ -205,6 +228,7 @@ int nf_conntrack_ecache_init(struct net *net)
 	int ret;
 
 	net->ct.sysctl_events = nf_ct_events;
+	net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
 
 	if (net_eq(net, &init_net)) {
 		ret = nf_ct_extend_register(&event_extend);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 19706eff164..49479d19457 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -463,6 +463,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	struct sk_buff *skb;
 	unsigned int type;
 	unsigned int flags = 0, group;
+	int err;
 
 	/* ignore our fake conntrack entry */
 	if (ct == &nf_conntrack_untracked)
@@ -558,7 +559,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+	err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+	if (err == -ENOBUFS || err == -EAGAIN)
+		return -ENOBUFS;
+
 	return 0;
 
 nla_put_failure:
@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		}
 	}
 
-	nf_conntrack_event_report(IPCT_DESTROY,
-				  ct,
-				  NETLINK_CB(skb).pid,
-				  nlmsg_report(nlh));
+	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+				      NETLINK_CB(skb).pid,
+				      nlmsg_report(nlh)) < 0) {
+		nf_ct_delete_from_lists(ct);
+		/* we failed to report the event, try later */
+		nf_ct_insert_dying_list(ct);
+		nf_ct_put(ct);
+		return 0;
+	}
 
 	/* death_by_timeout would report the event again */
 	set_bit(IPS_DYING_BIT, &ct->status);
-- 
cgit v1.2.3-70-g09d2


From dfec072ecd35ba6ecad2d51dde325253ac9a2936 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 4 Apr 2008 00:51:41 +0200
Subject: kmemcheck: add the kmemcheck core

General description: kmemcheck is a patch to the linux kernel that
detects use of uninitialized memory. It does this by trapping every
read and write to memory that was allocated dynamically (e.g. using
kmalloc()). If a memory address is read that has not previously been
written to, a message is printed to the kernel log.

Thanks to Andi Kleen for the set_memory_4k() solution.

Andrew Morton suggested documenting the shadow member of struct page.

Signed-off-by: Vegard Nossum <vegardno@ifi.uio.no>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

[export kmemcheck_mark_initialized]
[build fix for setup_max_cpus]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegardno@ifi.uio.no>
---
 arch/x86/Makefile                    |   5 +
 arch/x86/include/asm/kmemcheck.h     |  42 +++
 arch/x86/include/asm/pgtable.h       |   9 +
 arch/x86/include/asm/pgtable_types.h |   4 +-
 arch/x86/mm/Makefile                 |   2 +
 arch/x86/mm/kmemcheck/Makefile       |   1 +
 arch/x86/mm/kmemcheck/error.c        | 229 ++++++++++++
 arch/x86/mm/kmemcheck/error.h        |  15 +
 arch/x86/mm/kmemcheck/kmemcheck.c    | 650 +++++++++++++++++++++++++++++++++++
 arch/x86/mm/kmemcheck/opcode.c       | 101 ++++++
 arch/x86/mm/kmemcheck/opcode.h       |   9 +
 arch/x86/mm/kmemcheck/pte.c          |  22 ++
 arch/x86/mm/kmemcheck/pte.h          |  10 +
 arch/x86/mm/kmemcheck/shadow.c       | 153 +++++++++
 arch/x86/mm/kmemcheck/shadow.h       |  16 +
 include/linux/kmemcheck.h            |  17 +
 include/linux/mm_types.h             |   8 +
 init/main.c                          |   1 +
 kernel/sysctl.c                      |  12 +
 19 files changed, 1304 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/asm/kmemcheck.h
 create mode 100644 arch/x86/mm/kmemcheck/Makefile
 create mode 100644 arch/x86/mm/kmemcheck/error.c
 create mode 100644 arch/x86/mm/kmemcheck/error.h
 create mode 100644 arch/x86/mm/kmemcheck/kmemcheck.c
 create mode 100644 arch/x86/mm/kmemcheck/opcode.c
 create mode 100644 arch/x86/mm/kmemcheck/opcode.h
 create mode 100644 arch/x86/mm/kmemcheck/pte.c
 create mode 100644 arch/x86/mm/kmemcheck/pte.h
 create mode 100644 arch/x86/mm/kmemcheck/shadow.c
 create mode 100644 arch/x86/mm/kmemcheck/shadow.h
 create mode 100644 include/linux/kmemcheck.h

(limited to 'include')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index edbd0ca6206..1b68659c41b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR
         endif
 endif
 
+# Don't unroll struct assignments with kmemcheck enabled
+ifeq ($(CONFIG_KMEMCHECK),y)
+	KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
+endif
+
 # Stackpointer is addressed different for 32 bit and 64 bit x86
 sp-$(CONFIG_X86_32) := esp
 sp-$(CONFIG_X86_64) := rsp
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
new file mode 100644
index 00000000000..ed01518f297
--- /dev/null
+++ b/arch/x86/include/asm/kmemcheck.h
@@ -0,0 +1,42 @@
+#ifndef ASM_X86_KMEMCHECK_H
+#define ASM_X86_KMEMCHECK_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_KMEMCHECK
+bool kmemcheck_active(struct pt_regs *regs);
+
+void kmemcheck_show(struct pt_regs *regs);
+void kmemcheck_hide(struct pt_regs *regs);
+
+bool kmemcheck_fault(struct pt_regs *regs,
+	unsigned long address, unsigned long error_code);
+bool kmemcheck_trap(struct pt_regs *regs);
+#else
+static inline bool kmemcheck_active(struct pt_regs *regs)
+{
+	return false;
+}
+
+static inline void kmemcheck_show(struct pt_regs *regs)
+{
+}
+
+static inline void kmemcheck_hide(struct pt_regs *regs)
+{
+}
+
+static inline bool kmemcheck_fault(struct pt_regs *regs,
+	unsigned long address, unsigned long error_code)
+{
+	return false;
+}
+
+static inline bool kmemcheck_trap(struct pt_regs *regs)
+{
+	return false;
+}
+#endif /* CONFIG_KMEMCHECK */
+
+#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18ef7ebf263..c5a08079ad5 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -317,6 +317,15 @@ static inline int pte_present(pte_t a)
 	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 }
 
+static inline int pte_hidden(pte_t x)
+{
+#ifdef CONFIG_KMEMCHECK
+	return pte_flags(x) & _PAGE_HIDDEN;
+#else
+	return 0;
+#endif
+}
+
 static inline int pmd_present(pmd_t pmd)
 {
 	return pmd_flags(pmd) & _PAGE_PRESENT;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 4d258ad76a0..9b5c92140aa 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -18,7 +18,7 @@
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_UNUSED1	9	/* available for programmer */
 #define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */
-#define _PAGE_BIT_UNUSED3	11
+#define _PAGE_BIT_HIDDEN	11	/* hidden by kmemcheck */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
@@ -41,7 +41,7 @@
 #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
 #define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
 #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
-#define _PAGE_UNUSED3	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
+#define _PAGE_HIDDEN	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
 #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 #define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fdd30d08ab5..eefdeee8a87 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 
+obj-$(CONFIG_KMEMCHECK)		+= kmemcheck/
+
 obj-$(CONFIG_MMIOTRACE)		+= mmiotrace.o
 mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile
new file mode 100644
index 00000000000..4666b7a778b
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/Makefile
@@ -0,0 +1 @@
+obj-y := error.o kmemcheck.o opcode.o pte.o shadow.o
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
new file mode 100644
index 00000000000..5ec9f5a93f4
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -0,0 +1,229 @@
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/kmemcheck.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+
+#include "error.h"
+#include "shadow.h"
+
+enum kmemcheck_error_type {
+	KMEMCHECK_ERROR_INVALID_ACCESS,
+	KMEMCHECK_ERROR_BUG,
+};
+
+#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
+
+struct kmemcheck_error {
+	enum kmemcheck_error_type type;
+
+	union {
+		/* KMEMCHECK_ERROR_INVALID_ACCESS */
+		struct {
+			/* Kind of access that caused the error */
+			enum kmemcheck_shadow state;
+			/* Address and size of the erroneous read */
+			unsigned long	address;
+			unsigned int	size;
+		};
+	};
+
+	struct pt_regs		regs;
+	struct stack_trace	trace;
+	unsigned long		trace_entries[32];
+
+	/* We compress it to a char. */
+	unsigned char		shadow_copy[SHADOW_COPY_SIZE];
+	unsigned char		memory_copy[SHADOW_COPY_SIZE];
+};
+
+/*
+ * Create a ring queue of errors to output. We can't call printk() directly
+ * from the kmemcheck traps, since this may call the console drivers and
+ * result in a recursive fault.
+ */
+static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
+static unsigned int error_count;
+static unsigned int error_rd;
+static unsigned int error_wr;
+static unsigned int error_missed_count;
+
+static struct kmemcheck_error *error_next_wr(void)
+{
+	struct kmemcheck_error *e;
+
+	if (error_count == ARRAY_SIZE(error_fifo)) {
+		++error_missed_count;
+		return NULL;
+	}
+
+	e = &error_fifo[error_wr];
+	if (++error_wr == ARRAY_SIZE(error_fifo))
+		error_wr = 0;
+	++error_count;
+	return e;
+}
+
+static struct kmemcheck_error *error_next_rd(void)
+{
+	struct kmemcheck_error *e;
+
+	if (error_count == 0)
+		return NULL;
+
+	e = &error_fifo[error_rd];
+	if (++error_rd == ARRAY_SIZE(error_fifo))
+		error_rd = 0;
+	--error_count;
+	return e;
+}
+
+static void do_wakeup(unsigned long);
+static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
+
+/*
+ * Save the context of an error report.
+ */
+void kmemcheck_error_save(enum kmemcheck_shadow state,
+	unsigned long address, unsigned int size, struct pt_regs *regs)
+{
+	static unsigned long prev_ip;
+
+	struct kmemcheck_error *e;
+	void *shadow_copy;
+	void *memory_copy;
+
+	/* Don't report several adjacent errors from the same EIP. */
+	if (regs->ip == prev_ip)
+		return;
+	prev_ip = regs->ip;
+
+	e = error_next_wr();
+	if (!e)
+		return;
+
+	e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
+
+	e->state = state;
+	e->address = address;
+	e->size = size;
+
+	/* Save regs */
+	memcpy(&e->regs, regs, sizeof(*regs));
+
+	/* Save stack trace */
+	e->trace.nr_entries = 0;
+	e->trace.entries = e->trace_entries;
+	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
+	e->trace.skip = 0;
+	save_stack_trace_bp(&e->trace, regs->bp);
+
+	/* Round address down to nearest 16 bytes */
+	shadow_copy = kmemcheck_shadow_lookup(address
+		& ~(SHADOW_COPY_SIZE - 1));
+	BUG_ON(!shadow_copy);
+
+	memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
+
+	kmemcheck_show_addr(address);
+	memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
+	memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
+	kmemcheck_hide_addr(address);
+
+	tasklet_hi_schedule_first(&kmemcheck_tasklet);
+}
+
+/*
+ * Save the context of a kmemcheck bug.
+ */
+void kmemcheck_error_save_bug(struct pt_regs *regs)
+{
+	struct kmemcheck_error *e;
+
+	e = error_next_wr();
+	if (!e)
+		return;
+
+	e->type = KMEMCHECK_ERROR_BUG;
+
+	memcpy(&e->regs, regs, sizeof(*regs));
+
+	e->trace.nr_entries = 0;
+	e->trace.entries = e->trace_entries;
+	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
+	e->trace.skip = 1;
+	save_stack_trace(&e->trace);
+
+	tasklet_hi_schedule_first(&kmemcheck_tasklet);
+}
+
+void kmemcheck_error_recall(void)
+{
+	static const char *desc[] = {
+		[KMEMCHECK_SHADOW_UNALLOCATED]		= "unallocated",
+		[KMEMCHECK_SHADOW_UNINITIALIZED]	= "uninitialized",
+		[KMEMCHECK_SHADOW_INITIALIZED]		= "initialized",
+		[KMEMCHECK_SHADOW_FREED]		= "freed",
+	};
+
+	static const char short_desc[] = {
+		[KMEMCHECK_SHADOW_UNALLOCATED]		= 'a',
+		[KMEMCHECK_SHADOW_UNINITIALIZED]	= 'u',
+		[KMEMCHECK_SHADOW_INITIALIZED]		= 'i',
+		[KMEMCHECK_SHADOW_FREED]		= 'f',
+	};
+
+	struct kmemcheck_error *e;
+	unsigned int i;
+
+	e = error_next_rd();
+	if (!e)
+		return;
+
+	switch (e->type) {
+	case KMEMCHECK_ERROR_INVALID_ACCESS:
+		printk(KERN_ERR  "WARNING: kmemcheck: Caught %d-bit read "
+			"from %s memory (%p)\n",
+			8 * e->size, e->state < ARRAY_SIZE(desc) ?
+				desc[e->state] : "(invalid shadow state)",
+			(void *) e->address);
+
+		printk(KERN_INFO);
+		for (i = 0; i < SHADOW_COPY_SIZE; ++i)
+			printk("%02x", e->memory_copy[i]);
+		printk("\n");
+
+		printk(KERN_INFO);
+		for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
+			if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
+				printk(" %c", short_desc[e->shadow_copy[i]]);
+			else
+				printk(" ?");
+		}
+		printk("\n");
+		printk(KERN_INFO "%*c\n", 2 + 2
+			* (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
+		break;
+	case KMEMCHECK_ERROR_BUG:
+		printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
+		break;
+	}
+
+	__show_regs(&e->regs, 1);
+	print_stack_trace(&e->trace, 0);
+}
+
+static void do_wakeup(unsigned long data)
+{
+	while (error_count > 0)
+		kmemcheck_error_recall();
+
+	if (error_missed_count > 0) {
+		printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
+			"the queue was too small\n", error_missed_count);
+		error_missed_count = 0;
+	}
+}
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
new file mode 100644
index 00000000000..0efc2e8d0a2
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.h
@@ -0,0 +1,15 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
+#define ARCH__X86__MM__KMEMCHECK__ERROR_H
+
+#include <linux/ptrace.h>
+
+#include "shadow.h"
+
+void kmemcheck_error_save(enum kmemcheck_shadow state,
+	unsigned long address, unsigned int size, struct pt_regs *regs);
+
+void kmemcheck_error_save_bug(struct pt_regs *regs);
+
+void kmemcheck_error_recall(void);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
new file mode 100644
index 00000000000..9de7d8f6b6e
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -0,0 +1,650 @@
+/**
+ * kmemcheck - a heavyweight memory checker for the linux kernel
+ * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
+ * (With a lot of help from Ingo Molnar and Pekka Enberg.)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2) as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/page-flags.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kmemcheck.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "error.h"
+#include "opcode.h"
+#include "pte.h"
+#include "shadow.h"
+
+#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
+#  define KMEMCHECK_ENABLED 0
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
+#  define KMEMCHECK_ENABLED 1
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
+#  define KMEMCHECK_ENABLED 2
+#endif
+
+int kmemcheck_enabled = KMEMCHECK_ENABLED;
+
+int __init kmemcheck_init(void)
+{
+	printk(KERN_INFO "kmemcheck: \"Bugs, beware!\"\n");
+
+#ifdef CONFIG_SMP
+	/*
+	 * Limit SMP to use a single CPU. We rely on the fact that this code
+	 * runs before SMP is set up.
+	 */
+	if (setup_max_cpus > 1) {
+		printk(KERN_INFO
+			"kmemcheck: Limiting number of CPUs to 1.\n");
+		setup_max_cpus = 1;
+	}
+#endif
+
+	return 0;
+}
+
+early_initcall(kmemcheck_init);
+
+#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
+int kmemcheck_enabled = 0;
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
+int kmemcheck_enabled = 1;
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
+int kmemcheck_enabled = 2;
+#endif
+
+/*
+ * We need to parse the kmemcheck= option before any memory is allocated.
+ */
+static int __init param_kmemcheck(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	sscanf(str, "%d", &kmemcheck_enabled);
+	return 0;
+}
+
+early_param("kmemcheck", param_kmemcheck);
+
+int kmemcheck_show_addr(unsigned long address)
+{
+	pte_t *pte;
+
+	pte = kmemcheck_pte_lookup(address);
+	if (!pte)
+		return 0;
+
+	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+	__flush_tlb_one(address);
+	return 1;
+}
+
+int kmemcheck_hide_addr(unsigned long address)
+{
+	pte_t *pte;
+
+	pte = kmemcheck_pte_lookup(address);
+	if (!pte)
+		return 0;
+
+	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+	__flush_tlb_one(address);
+	return 1;
+}
+
+struct kmemcheck_context {
+	bool busy;
+	int balance;
+
+	/*
+	 * There can be at most two memory operands to an instruction, but
+	 * each address can cross a page boundary -- so we may need up to
+	 * four addresses that must be hidden/revealed for each fault.
+	 */
+	unsigned long addr[4];
+	unsigned long n_addrs;
+	unsigned long flags;
+
+	/* Data size of the instruction that caused a fault. */
+	unsigned int size;
+};
+
+static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
+
+bool kmemcheck_active(struct pt_regs *regs)
+{
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+	return data->balance > 0;
+}
+
+/* Save an address that needs to be shown/hidden */
+static void kmemcheck_save_addr(unsigned long addr)
+{
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
+	data->addr[data->n_addrs++] = addr;
+}
+
+static unsigned int kmemcheck_show_all(void)
+{
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+	unsigned int i;
+	unsigned int n;
+
+	n = 0;
+	for (i = 0; i < data->n_addrs; ++i)
+		n += kmemcheck_show_addr(data->addr[i]);
+
+	return n;
+}
+
+static unsigned int kmemcheck_hide_all(void)
+{
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+	unsigned int i;
+	unsigned int n;
+
+	n = 0;
+	for (i = 0; i < data->n_addrs; ++i)
+		n += kmemcheck_hide_addr(data->addr[i]);
+
+	return n;
+}
+
+/*
+ * Called from the #PF handler.
+ */
+void kmemcheck_show(struct pt_regs *regs)
+{
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+	BUG_ON(!irqs_disabled());
+
+	if (unlikely(data->balance != 0)) {
+		kmemcheck_show_all();
+		kmemcheck_error_save_bug(regs);
+		data->balance = 0;
+		return;
+	}
+
+	/*
+	 * None of the addresses actually belonged to kmemcheck. Note that
+	 * this is not an error.
+	 */
+	if (kmemcheck_show_all() == 0)
+		return;
+
+	++data->balance;
+
+	/*
+	 * The IF needs to be cleared as well, so that the faulting
+	 * instruction can run "uninterrupted". Otherwise, we might take
+	 * an interrupt and start executing that before we've had a chance
+	 * to hide the page again.
+	 *
+	 * NOTE: In the rare case of multiple faults, we must not override
+	 * the original flags:
+	 */
+	if (!(regs->flags & X86_EFLAGS_TF))
+		data->flags = regs->flags;
+
+	regs->flags |= X86_EFLAGS_TF;
+	regs->flags &= ~X86_EFLAGS_IF;
+}
+
+/*
+ * Called from the #DB handler.
+ */
+void kmemcheck_hide(struct pt_regs *regs)
+{
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+	int n;
+
+	BUG_ON(!irqs_disabled());
+
+	if (data->balance == 0)
+		return;
+
+	if (unlikely(data->balance != 1)) {
+		kmemcheck_show_all();
+		kmemcheck_error_save_bug(regs);
+		data->n_addrs = 0;
+		data->balance = 0;
+
+		if (!(data->flags & X86_EFLAGS_TF))
+			regs->flags &= ~X86_EFLAGS_TF;
+		if (data->flags & X86_EFLAGS_IF)
+			regs->flags |= X86_EFLAGS_IF;
+		return;
+	}
+
+	if (kmemcheck_enabled)
+		n = kmemcheck_hide_all();
+	else
+		n = kmemcheck_show_all();
+
+	if (n == 0)
+		return;
+
+	--data->balance;
+
+	data->n_addrs = 0;
+
+	if (!(data->flags & X86_EFLAGS_TF))
+		regs->flags &= ~X86_EFLAGS_TF;
+	if (data->flags & X86_EFLAGS_IF)
+		regs->flags |= X86_EFLAGS_IF;
+}
+
+void kmemcheck_show_pages(struct page *p, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; ++i) {
+		unsigned long address;
+		pte_t *pte;
+		unsigned int level;
+
+		address = (unsigned long) page_address(&p[i]);
+		pte = lookup_address(address, &level);
+		BUG_ON(!pte);
+		BUG_ON(level != PG_LEVEL_4K);
+
+		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
+		__flush_tlb_one(address);
+	}
+}
+
+bool kmemcheck_page_is_tracked(struct page *p)
+{
+	/* This will also check the "hidden" flag of the PTE. */
+	return kmemcheck_pte_lookup((unsigned long) page_address(p));
+}
+
+void kmemcheck_hide_pages(struct page *p, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; ++i) {
+		unsigned long address;
+		pte_t *pte;
+		unsigned int level;
+
+		address = (unsigned long) page_address(&p[i]);
+		pte = lookup_address(address, &level);
+		BUG_ON(!pte);
+		BUG_ON(level != PG_LEVEL_4K);
+
+		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
+		__flush_tlb_one(address);
+	}
+}
+
+/* Access may NOT cross page boundary */
+static void kmemcheck_read_strict(struct pt_regs *regs,
+	unsigned long addr, unsigned int size)
+{
+	void *shadow;
+	enum kmemcheck_shadow status;
+
+	shadow = kmemcheck_shadow_lookup(addr);
+	if (!shadow)
+		return;
+
+	kmemcheck_save_addr(addr);
+	status = kmemcheck_shadow_test(shadow, size);
+	if (status == KMEMCHECK_SHADOW_INITIALIZED)
+		return;
+
+	if (kmemcheck_enabled)
+		kmemcheck_error_save(status, addr, size, regs);
+
+	if (kmemcheck_enabled == 2)
+		kmemcheck_enabled = 0;
+
+	/* Don't warn about it again. */
+	kmemcheck_shadow_set(shadow, size);
+}
+
+/* Access may cross page boundary */
+static void kmemcheck_read(struct pt_regs *regs,
+	unsigned long addr, unsigned int size)
+{
+	unsigned long page = addr & PAGE_MASK;
+	unsigned long next_addr = addr + size - 1;
+	unsigned long next_page = next_addr & PAGE_MASK;
+
+	if (likely(page == next_page)) {
+		kmemcheck_read_strict(regs, addr, size);
+		return;
+	}
+
+	/*
+	 * What we do is basically to split the access across the
+	 * two pages and handle each part separately. Yes, this means
+	 * that we may now see reads that are 3 + 5 bytes, for
+	 * example (and if both are uninitialized, there will be two
+	 * reports), but it makes the code a lot simpler.
+	 */
+	kmemcheck_read_strict(regs, addr, next_page - addr);
+	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
+}
+
+static void kmemcheck_write_strict(struct pt_regs *regs,
+	unsigned long addr, unsigned int size)
+{
+	void *shadow;
+
+	shadow = kmemcheck_shadow_lookup(addr);
+	if (!shadow)
+		return;
+
+	kmemcheck_save_addr(addr);
+	kmemcheck_shadow_set(shadow, size);
+}
+
+static void kmemcheck_write(struct pt_regs *regs,
+	unsigned long addr, unsigned int size)
+{
+	unsigned long page = addr & PAGE_MASK;
+	unsigned long next_addr = addr + size - 1;
+	unsigned long next_page = next_addr & PAGE_MASK;
+
+	if (likely(page == next_page)) {
+		kmemcheck_write_strict(regs, addr, size);
+		return;
+	}
+
+	/* See comment in kmemcheck_read(). */
+	kmemcheck_write_strict(regs, addr, next_page - addr);
+	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
+}
+
+/*
+ * Copying is hard. We have two addresses, each of which may be split across
+ * a page (and each page will have different shadow addresses).
+ */
+static void kmemcheck_copy(struct pt_regs *regs,
+	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
+{
+	uint8_t shadow[8];
+	enum kmemcheck_shadow status;
+
+	unsigned long page;
+	unsigned long next_addr;
+	unsigned long next_page;
+
+	uint8_t *x;
+	unsigned int i;
+	unsigned int n;
+
+	BUG_ON(size > sizeof(shadow));
+
+	page = src_addr & PAGE_MASK;
+	next_addr = src_addr + size - 1;
+	next_page = next_addr & PAGE_MASK;
+
+	if (likely(page == next_page)) {
+		/* Same page */
+		x = kmemcheck_shadow_lookup(src_addr);
+		if (x) {
+			kmemcheck_save_addr(src_addr);
+			for (i = 0; i < size; ++i)
+				shadow[i] = x[i];
+		} else {
+			for (i = 0; i < size; ++i)
+				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+		}
+	} else {
+		n = next_page - src_addr;
+		BUG_ON(n > sizeof(shadow));
+
+		/* First page */
+		x = kmemcheck_shadow_lookup(src_addr);
+		if (x) {
+			kmemcheck_save_addr(src_addr);
+			for (i = 0; i < n; ++i)
+				shadow[i] = x[i];
+		} else {
+			/* Not tracked */
+			for (i = 0; i < n; ++i)
+				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+		}
+
+		/* Second page */
+		x = kmemcheck_shadow_lookup(next_page);
+		if (x) {
+			kmemcheck_save_addr(next_page);
+			for (i = n; i < size; ++i)
+				shadow[i] = x[i - n];
+		} else {
+			/* Not tracked */
+			for (i = n; i < size; ++i)
+				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+		}
+	}
+
+	page = dst_addr & PAGE_MASK;
+	next_addr = dst_addr + size - 1;
+	next_page = next_addr & PAGE_MASK;
+
+	if (likely(page == next_page)) {
+		/* Same page */
+		x = kmemcheck_shadow_lookup(dst_addr);
+		if (x) {
+			kmemcheck_save_addr(dst_addr);
+			for (i = 0; i < size; ++i) {
+				x[i] = shadow[i];
+				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+			}
+		}
+	} else {
+		n = next_page - dst_addr;
+		BUG_ON(n > sizeof(shadow));
+
+		/* First page */
+		x = kmemcheck_shadow_lookup(dst_addr);
+		if (x) {
+			kmemcheck_save_addr(dst_addr);
+			for (i = 0; i < n; ++i) {
+				x[i] = shadow[i];
+				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+			}
+		}
+
+		/* Second page */
+		x = kmemcheck_shadow_lookup(next_page);
+		if (x) {
+			kmemcheck_save_addr(next_page);
+			for (i = n; i < size; ++i) {
+				x[i - n] = shadow[i];
+				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
+			}
+		}
+	}
+
+	status = kmemcheck_shadow_test(shadow, size);
+	if (status == KMEMCHECK_SHADOW_INITIALIZED)
+		return;
+
+	if (kmemcheck_enabled)
+		kmemcheck_error_save(status, src_addr, size, regs);
+
+	if (kmemcheck_enabled == 2)
+		kmemcheck_enabled = 0;
+}
+
+enum kmemcheck_method {
+	KMEMCHECK_READ,
+	KMEMCHECK_WRITE,
+};
+
+static void kmemcheck_access(struct pt_regs *regs,
+	unsigned long fallback_address, enum kmemcheck_method fallback_method)
+{
+	const uint8_t *insn;
+	const uint8_t *insn_primary;
+	unsigned int size;
+
+	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+	/* Recursive fault -- ouch. */
+	if (data->busy) {
+		kmemcheck_show_addr(fallback_address);
+		kmemcheck_error_save_bug(regs);
+		return;
+	}
+
+	data->busy = true;
+
+	insn = (const uint8_t *) regs->ip;
+	insn_primary = kmemcheck_opcode_get_primary(insn);
+
+	kmemcheck_opcode_decode(insn, &size);
+
+	switch (insn_primary[0]) {
+#ifdef CONFIG_KMEMCHECK_BITOPS_OK
+		/* AND, OR, XOR */
+		/*
+		 * Unfortunately, these instructions have to be excluded from
+		 * our regular checking since they access only some (and not
+		 * all) bits. This clears out "bogus" bitfield-access warnings.
+		 */
+	case 0x80:
+	case 0x81:
+	case 0x82:
+	case 0x83:
+		switch ((insn_primary[1] >> 3) & 7) {
+			/* OR */
+		case 1:
+			/* AND */
+		case 4:
+			/* XOR */
+		case 6:
+			kmemcheck_write(regs, fallback_address, size);
+			goto out;
+
+			/* ADD */
+		case 0:
+			/* ADC */
+		case 2:
+			/* SBB */
+		case 3:
+			/* SUB */
+		case 5:
+			/* CMP */
+		case 7:
+			break;
+		}
+		break;
+#endif
+
+		/* MOVS, MOVSB, MOVSW, MOVSD */
+	case 0xa4:
+	case 0xa5:
+		/*
+		 * These instructions are special because they take two
+		 * addresses, but we only get one page fault.
+		 */
+		kmemcheck_copy(regs, regs->si, regs->di, size);
+		goto out;
+
+		/* CMPS, CMPSB, CMPSW, CMPSD */
+	case 0xa6:
+	case 0xa7:
+		kmemcheck_read(regs, regs->si, size);
+		kmemcheck_read(regs, regs->di, size);
+		goto out;
+	}
+
+	/*
+	 * If the opcode isn't special in any way, we use the data from the
+	 * page fault handler to determine the address and type of memory
+	 * access.
+	 */
+	switch (fallback_method) {
+	case KMEMCHECK_READ:
+		kmemcheck_read(regs, fallback_address, size);
+		goto out;
+	case KMEMCHECK_WRITE:
+		kmemcheck_write(regs, fallback_address, size);
+		goto out;
+	}
+
+out:
+	data->busy = false;
+}
+
+bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
+	unsigned long error_code)
+{
+	pte_t *pte;
+	unsigned int level;
+
+	/*
+	 * XXX: Is it safe to assume that memory accesses from virtual 86
+	 * mode or non-kernel code segments will _never_ access kernel
+	 * memory (e.g. tracked pages)? For now, we need this to avoid
+	 * invoking kmemcheck for PnP BIOS calls.
+	 */
+	if (regs->flags & X86_VM_MASK)
+		return false;
+	if (regs->cs != __KERNEL_CS)
+		return false;
+
+	pte = lookup_address(address, &level);
+	if (!pte)
+		return false;
+	if (level != PG_LEVEL_4K)
+		return false;
+	if (!pte_hidden(*pte))
+		return false;
+
+	if (error_code & 2)
+		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
+	else
+		kmemcheck_access(regs, address, KMEMCHECK_READ);
+
+	kmemcheck_show(regs);
+	return true;
+}
+
+bool kmemcheck_trap(struct pt_regs *regs)
+{
+	if (!kmemcheck_active(regs))
+		return false;
+
+	/* We're done. */
+	kmemcheck_hide(regs);
+	return true;
+}
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
new file mode 100644
index 00000000000..a4100b6e783
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -0,0 +1,101 @@
+#include <linux/types.h>
+
+#include "opcode.h"
+
+static bool opcode_is_prefix(uint8_t b)
+{
+	return
+		/* Group 1 */
+		b == 0xf0 || b == 0xf2 || b == 0xf3
+		/* Group 2 */
+		|| b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
+		|| b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
+		/* Group 3 */
+		|| b == 0x66
+		/* Group 4 */
+		|| b == 0x67;
+}
+
+static bool opcode_is_rex_prefix(uint8_t b)
+{
+	return (b & 0xf0) == 0x40;
+}
+
+#define REX_W (1 << 3)
+
+/*
+ * This is a VERY crude opcode decoder. We only need to find the size of the
+ * load/store that caused our #PF and this should work for all the opcodes
+ * that we care about. Moreover, the ones who invented this instruction set
+ * should be shot.
+ */
+void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
+{
+	/* Default operand size */
+	int operand_size_override = 4;
+
+	/* prefixes */
+	for (; opcode_is_prefix(*op); ++op) {
+		if (*op == 0x66)
+			operand_size_override = 2;
+	}
+
+#ifdef CONFIG_X86_64
+	/* REX prefix */
+	if (opcode_is_rex_prefix(*op)) {
+		uint8_t rex = *op;
+
+		++op;
+		if (rex & REX_W) {
+			switch (*op) {
+			case 0x63:
+				*size = 4;
+				return;
+			case 0x0f:
+				++op;
+
+				switch (*op) {
+				case 0xb6:
+				case 0xbe:
+					*size = 1;
+					return;
+				case 0xb7:
+				case 0xbf:
+					*size = 2;
+					return;
+				}
+
+				break;
+			}
+
+			*size = 8;
+			return;
+		}
+	}
+#endif
+
+	/* escape opcode */
+	if (*op == 0x0f) {
+		++op;
+
+		/*
+		 * This is move with zero-extend and sign-extend, respectively;
+		 * we don't have to think about 0xb6/0xbe, because this is
+		 * already handled in the conditional below.
+		 */
+		if (*op == 0xb7 || *op == 0xbf)
+			operand_size_override = 2;
+	}
+
+	*size = (*op & 1) ? operand_size_override : 1;
+}
+
+const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
+{
+	/* skip prefixes */
+	while (opcode_is_prefix(*op))
+		++op;
+	if (opcode_is_rex_prefix(*op))
+		++op;
+	return op;
+}
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
new file mode 100644
index 00000000000..6956aad66b5
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.h
@@ -0,0 +1,9 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
+#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
+
+#include <linux/types.h>
+
+void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
+const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
new file mode 100644
index 00000000000..4ead26eeaf9
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.c
@@ -0,0 +1,22 @@
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+#include "pte.h"
+
+pte_t *kmemcheck_pte_lookup(unsigned long address)
+{
+	pte_t *pte;
+	unsigned int level;
+
+	pte = lookup_address(address, &level);
+	if (!pte)
+		return NULL;
+	if (level != PG_LEVEL_4K)
+		return NULL;
+	if (!pte_hidden(*pte))
+		return NULL;
+
+	return pte;
+}
+
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
new file mode 100644
index 00000000000..9f596645649
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.h
@@ -0,0 +1,10 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
+#define ARCH__X86__MM__KMEMCHECK__PTE_H
+
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+pte_t *kmemcheck_pte_lookup(unsigned long address);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
new file mode 100644
index 00000000000..5544d360087
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -0,0 +1,153 @@
+#include <linux/kmemcheck.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#include "pte.h"
+#include "shadow.h"
+
+/*
+ * Return the shadow address for the given address. Returns NULL if the
+ * address is not tracked.
+ *
+ * We need to be extremely careful not to follow any invalid pointers,
+ * because this function can be called for *any* possible address.
+ */
+void *kmemcheck_shadow_lookup(unsigned long address)
+{
+	pte_t *pte;
+	struct page *page;
+
+	if (!virt_addr_valid(address))
+		return NULL;
+
+	pte = kmemcheck_pte_lookup(address);
+	if (!pte)
+		return NULL;
+
+	page = virt_to_page(address);
+	if (!page->shadow)
+		return NULL;
+	return page->shadow + (address & (PAGE_SIZE - 1));
+}
+
+static void mark_shadow(void *address, unsigned int n,
+	enum kmemcheck_shadow status)
+{
+	unsigned long addr = (unsigned long) address;
+	unsigned long last_addr = addr + n - 1;
+	unsigned long page = addr & PAGE_MASK;
+	unsigned long last_page = last_addr & PAGE_MASK;
+	unsigned int first_n;
+	void *shadow;
+
+	/* If the memory range crosses a page boundary, stop there. */
+	if (page == last_page)
+		first_n = n;
+	else
+		first_n = page + PAGE_SIZE - addr;
+
+	shadow = kmemcheck_shadow_lookup(addr);
+	if (shadow)
+		memset(shadow, status, first_n);
+
+	addr += first_n;
+	n -= first_n;
+
+	/* Do full-page memset()s. */
+	while (n >= PAGE_SIZE) {
+		shadow = kmemcheck_shadow_lookup(addr);
+		if (shadow)
+			memset(shadow, status, PAGE_SIZE);
+
+		addr += PAGE_SIZE;
+		n -= PAGE_SIZE;
+	}
+
+	/* Do the remaining page, if any. */
+	if (n > 0) {
+		shadow = kmemcheck_shadow_lookup(addr);
+		if (shadow)
+			memset(shadow, status, n);
+	}
+}
+
+void kmemcheck_mark_unallocated(void *address, unsigned int n)
+{
+	mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
+}
+
+void kmemcheck_mark_uninitialized(void *address, unsigned int n)
+{
+	mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
+}
+
+/*
+ * Fill the shadow memory of the given address such that the memory at that
+ * address is marked as being initialized.
+ */
+void kmemcheck_mark_initialized(void *address, unsigned int n)
+{
+	mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
+}
+EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
+
+void kmemcheck_mark_freed(void *address, unsigned int n)
+{
+	mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
+}
+
+void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; ++i)
+		kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
+}
+
+void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; ++i)
+		kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
+}
+
+enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
+{
+	uint8_t *x;
+	unsigned int i;
+
+	x = shadow;
+
+#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
+	/*
+	 * Make sure _some_ bytes are initialized. Gcc frequently generates
+	 * code to access neighboring bytes.
+	 */
+	for (i = 0; i < size; ++i) {
+		if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
+			return x[i];
+	}
+#else
+	/* All bytes must be initialized. */
+	for (i = 0; i < size; ++i) {
+		if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
+			return x[i];
+	}
+#endif
+
+	return x[0];
+}
+
+void kmemcheck_shadow_set(void *shadow, unsigned int size)
+{
+	uint8_t *x;
+	unsigned int i;
+
+	x = shadow;
+	for (i = 0; i < size; ++i)
+		x[i] = KMEMCHECK_SHADOW_INITIALIZED;
+}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
new file mode 100644
index 00000000000..af46d9ab9d8
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -0,0 +1,16 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
+#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
+
+enum kmemcheck_shadow {
+	KMEMCHECK_SHADOW_UNALLOCATED,
+	KMEMCHECK_SHADOW_UNINITIALIZED,
+	KMEMCHECK_SHADOW_INITIALIZED,
+	KMEMCHECK_SHADOW_FREED,
+};
+
+void *kmemcheck_shadow_lookup(unsigned long address);
+
+enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
+void kmemcheck_shadow_set(void *shadow, unsigned int size);
+
+#endif
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
new file mode 100644
index 00000000000..39480c91b2f
--- /dev/null
+++ b/include/linux/kmemcheck.h
@@ -0,0 +1,17 @@
+#ifndef LINUX_KMEMCHECK_H
+#define LINUX_KMEMCHECK_H
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KMEMCHECK
+extern int kmemcheck_enabled;
+
+int kmemcheck_show_addr(unsigned long address);
+int kmemcheck_hide_addr(unsigned long address);
+#else
+#define kmemcheck_enabled 0
+
+#endif /* CONFIG_KMEMCHECK */
+
+#endif /* LINUX_KMEMCHECK_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e80e26ecf2..0042090a4d7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -98,6 +98,14 @@ struct page {
 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
 	unsigned long debug_flags;	/* Use atomic bitops on this */
 #endif
+
+#ifdef CONFIG_KMEMCHECK
+	/*
+	 * kmemcheck wants to track the status of each byte in a page; this
+	 * is a pointer to such a status block. NULL if not tracked.
+	 */
+	void *shadow;
+#endif
 };
 
 /*
diff --git a/init/main.c b/init/main.c
index 5616661eac0..e3c335e47cd 100644
--- a/init/main.c
+++ b/init/main.c
@@ -65,6 +65,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemcheck.h>
 #include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ce664f98e3f..9ef80bba350 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -27,6 +27,7 @@
 #include <linux/security.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
+#include <linux/kmemcheck.h>
 #include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -959,6 +960,17 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_KMEMCHECK
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "kmemcheck",
+		.data		= &kmemcheck_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
+
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
-- 
cgit v1.2.3-70-g09d2


From 5f5bac8272be791b67c7b7b411e7c8c5847e598a Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Fri, 22 May 2009 20:33:59 +0200
Subject: mmc: Driver for CB710/720 memory card reader (MMC part)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code is divided in two parts. There is a virtual 'bus' driver
that handles PCI device and registers three new devices one per card
reader type. The other driver handles SD/MMC part of the reader.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>
---
 MAINTAINERS                  |   9 +
 drivers/misc/Kconfig         |   1 +
 drivers/misc/Makefile        |   1 +
 drivers/misc/cb710/Kconfig   |  21 ++
 drivers/misc/cb710/Makefile  |   4 +
 drivers/misc/cb710/core.c    | 357 +++++++++++++++++++
 drivers/misc/cb710/debug.c   | 119 +++++++
 drivers/misc/cb710/sgbuf2.c  | 150 ++++++++
 drivers/mmc/host/Kconfig     |  13 +
 drivers/mmc/host/Makefile    |   1 +
 drivers/mmc/host/cb710-mmc.c | 804 +++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/cb710-mmc.h | 104 ++++++
 include/linux/cb710.h        | 238 +++++++++++++
 include/linux/pci_ids.h      |   1 +
 14 files changed, 1823 insertions(+)
 create mode 100644 drivers/misc/cb710/Kconfig
 create mode 100644 drivers/misc/cb710/Makefile
 create mode 100644 drivers/misc/cb710/core.c
 create mode 100644 drivers/misc/cb710/debug.c
 create mode 100644 drivers/misc/cb710/sgbuf2.c
 create mode 100644 drivers/mmc/host/cb710-mmc.c
 create mode 100644 drivers/mmc/host/cb710-mmc.h
 create mode 100644 include/linux/cb710.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 90f81283b72..b5cebddd284 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2105,6 +2105,15 @@ W:	http://sourceforge.net/projects/lpfcxxxx
 S:	Supported
 F:	drivers/scsi/lpfc/
 
+ENE CB710 FLASH CARD READER DRIVER
+P:	Michał Mirosław
+M:	mirq-linux@rere.qmqm.pl
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/misc/cb710/
+F:	drivers/mmc/host/cb710-mmc.*
+F:	include/linux/cb710.h
+
 EPSON 1355 FRAMEBUFFER DRIVER
 P:	Christopher Hoover
 M:	ch@murgatroid.com
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 6d1ac180f6e..68ab39d7cb3 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -235,5 +235,6 @@ config ISL29003
 
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
+source "drivers/misc/cb710/Kconfig"
 
 endif # MISC_DEVICES
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 7871f05dcb9..36f733cd60e 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_HP_ILO)		+= hpilo.o
 obj-$(CONFIG_ISL29003)		+= isl29003.o
 obj-$(CONFIG_C2PORT)		+= c2port/
 obj-y				+= eeprom/
+obj-y				+= cb710/
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
new file mode 100644
index 00000000000..0b55079774c
--- /dev/null
+++ b/drivers/misc/cb710/Kconfig
@@ -0,0 +1,21 @@
+config CB710_CORE
+	tristate "ENE CB710/720 Flash memory card reader support"
+	depends on PCI
+	help
+	  This option enables support for PCI ENE CB710/720 Flash memory card
+	  reader found in some laptops (ie. some versions of HP Compaq nx9500).
+
+	  You will also have to select some flash card format drivers (MMC/SD,
+	  MemoryStick).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710.
+
+config CB710_DEBUG
+	bool "Enable driver debugging"
+	depends on CB710_CORE != n
+	default n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This adds a lot of debugging output to dmesg.
+
diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
new file mode 100644
index 00000000000..62d51acfeca
--- /dev/null
+++ b/drivers/misc/cb710/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_CB710_CORE)	+= cb710.o
+
+cb710-y				:= core.o sgbuf2.o
+cb710-$(CONFIG_CB710_DEBUG)	+= debug.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
new file mode 100644
index 00000000000..b14eab0f2ba
--- /dev/null
+++ b/drivers/misc/cb710/core.c
@@ -0,0 +1,357 @@
+/*
+ *  cb710/core.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/cb710.h>
+
+static DEFINE_IDA(cb710_ida);
+static DEFINE_SPINLOCK(cb710_ida_lock);
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t mask, uint32_t xor)
+{
+	u32 rval;
+
+	pci_read_config_dword(pdev, reg, &rval);
+	rval = (rval & mask) ^ xor;
+	pci_write_config_dword(pdev, reg, rval);
+}
+EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
+
+/* Some magic writes based on Windows driver init code */
+static int __devinit cb710_pci_configure(struct pci_dev *pdev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	struct pci_dev *pdev0 = pci_get_slot(pdev->bus, devfn);
+	u32 val;
+
+	cb710_pci_update_config_reg(pdev, 0x48,
+		~0x000000FF, 0x0000003F);
+
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (val & 0x80000000)
+		return 0;
+
+	if (!pdev0)
+		return -ENODEV;
+
+	if (pdev0->vendor == PCI_VENDOR_ID_ENE
+	    && pdev0->device == PCI_DEVICE_ID_ENE_720) {
+		cb710_pci_update_config_reg(pdev0, 0x8C,
+			~0x00F00000, 0x00100000);
+		cb710_pci_update_config_reg(pdev0, 0xB0,
+			~0x08000000, 0x08000000);
+	}
+
+	cb710_pci_update_config_reg(pdev0, 0x8C,
+		~0x00000F00, 0x00000200);
+	cb710_pci_update_config_reg(pdev0, 0x90,
+		~0x00060000, 0x00040000);
+
+	pci_dev_put(pdev0);
+
+	return 0;
+}
+
+static irqreturn_t cb710_irq_handler(int irq, void *data)
+{
+	struct cb710_chip *chip = data;
+	struct cb710_slot *slot = &chip->slot[0];
+	irqreturn_t handled = IRQ_NONE;
+	unsigned nr;
+
+	spin_lock(&chip->irq_lock); /* incl. smp_rmb() */
+
+	for (nr = chip->slots; nr; ++slot, --nr) {
+		cb710_irq_handler_t handler_func = slot->irq_handler;
+		if (handler_func && handler_func(slot))
+			handled = IRQ_HANDLED;
+	}
+
+	spin_unlock(&chip->irq_lock);
+
+	return handled;
+}
+
+static void cb710_release_slot(struct device *dev)
+{
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev));
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+
+	/* slot struct can be freed now */
+	atomic_dec(&chip->slot_refs_count);
+#endif
+}
+
+static int __devinit cb710_register_slot(struct cb710_chip *chip,
+	unsigned slot_mask, unsigned io_offset, const char *name)
+{
+	int nr = chip->slots;
+	struct cb710_slot *slot = &chip->slot[nr];
+	int err;
+
+	dev_dbg(cb710_chip_dev(chip),
+		"register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n",
+		name, chip->platform_id, nr, slot_mask, io_offset);
+
+	/* slot->irq_handler == NULL here; this needs to be
+	 * seen before platform_device_register() */
+	++chip->slots;
+	smp_wmb();
+
+	slot->iobase = chip->iobase + io_offset;
+	slot->pdev.name = name;
+	slot->pdev.id = chip->platform_id;
+	slot->pdev.dev.parent = &chip->pdev->dev;
+	slot->pdev.dev.release = cb710_release_slot;
+
+	err = platform_device_register(&slot->pdev);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_inc(&chip->slot_refs_count);
+#endif
+
+	if (err) {
+		/* device_initialize() called from platform_device_register()
+		 * wants this on error path */
+		platform_device_put(&slot->pdev);
+
+		/* slot->irq_handler == NULL here anyway, so no lock needed */
+		--chip->slots;
+		return err;
+	}
+
+	chip->slot_mask |= slot_mask;
+
+	return 0;
+}
+
+static void cb710_unregister_slot(struct cb710_chip *chip,
+	unsigned slot_mask)
+{
+	int nr = chip->slots - 1;
+
+	if (!(chip->slot_mask & slot_mask))
+		return;
+
+	platform_device_unregister(&chip->slot[nr].pdev);
+
+	/* complementary to spin_unlock() in cb710_set_irq_handler() */
+	smp_rmb();
+	BUG_ON(chip->slot[nr].irq_handler != NULL);
+
+	/* slot->irq_handler == NULL here, so no lock needed */
+	--chip->slots;
+	chip->slot_mask &= ~slot_mask;
+}
+
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler)
+{
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	slot->irq_handler = handler;
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cb710_set_irq_handler);
+
+#ifdef CONFIG_PM
+
+static int cb710_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	free_irq(pdev->irq, chip);
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	if (state.event & PM_EVENT_SLEEP)
+		pci_set_power_state(pdev, PCI_D3cold);
+	return 0;
+}
+
+static int cb710_resume(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	return devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+}
+
+#endif /* CONFIG_PM */
+
+static int __devinit cb710_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct cb710_chip *chip;
+	unsigned long flags;
+	u32 val;
+	int err;
+	int n = 0;
+
+	err = cb710_pci_configure(pdev);
+	if (err)
+		return err;
+
+	/* this is actually magic... */
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (!(val & 0x80000000)) {
+		pci_write_config_dword(pdev, 0x48, val|0x71000000);
+		pci_read_config_dword(pdev, 0x48, &val);
+	}
+
+	dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val);
+	if (!(val & 0x70000000))
+		return -ENODEV;
+	val = (val >> 28) & 7;
+	if (val & CB710_SLOT_MMC)
+		++n;
+	if (val & CB710_SLOT_MS)
+		++n;
+	if (val & CB710_SLOT_SM)
+		++n;
+
+	chip = devm_kzalloc(&pdev->dev,
+		sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	chip->pdev = pdev;
+	chip->iobase = pcim_iomap_table(pdev)[0];
+
+	pci_set_drvdata(pdev, chip);
+
+	err = devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (err)
+		return err;
+
+	do {
+		if (!ida_pre_get(&cb710_ida, GFP_KERNEL))
+			return -ENOMEM;
+
+		spin_lock_irqsave(&cb710_ida_lock, flags);
+		err = ida_get_new(&cb710_ida, &chip->platform_id);
+		spin_unlock_irqrestore(&cb710_ida_lock, flags);
+
+		if (err && err != -EAGAIN)
+			return err;
+	} while (err);
+
+
+	dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n",
+		chip->platform_id, chip->iobase, pdev->irq);
+
+	if (val & CB710_SLOT_MMC) {	/* MMC/SD slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MMC, 0x00, "cb710-mmc");
+		if (err)
+			return err;
+	}
+
+	if (val & CB710_SLOT_MS) {	/* MemoryStick slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MS, 0x40, "cb710-ms");
+		if (err)
+			goto unreg_mmc;
+	}
+
+	if (val & CB710_SLOT_SM) {	/* SmartMedia slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_SM, 0x60, "cb710-sm");
+		if (err)
+			goto unreg_ms;
+	}
+
+	return 0;
+unreg_ms:
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+unreg_mmc:
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+	return err;
+}
+
+static void __devexit cb710_remove_one(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	unsigned long flags;
+
+	cb710_unregister_slot(chip, CB710_SLOT_SM);
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+
+	spin_lock_irqsave(&cb710_ida_lock, flags);
+	ida_remove(&cb710_ida, chip->platform_id);
+	spin_unlock_irqrestore(&cb710_ida_lock, flags);
+}
+
+static const struct pci_device_id cb710_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{ 0, }
+};
+
+static struct pci_driver cb710_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = cb710_pci_tbl,
+	.probe = cb710_probe,
+	.remove = __devexit_p(cb710_remove_one),
+#ifdef CONFIG_PM
+	.suspend = cb710_suspend,
+	.resume = cb710_resume,
+#endif
+};
+
+static int __init cb710_init_module(void)
+{
+	return pci_register_driver(&cb710_driver);
+}
+
+static void __exit cb710_cleanup_module(void)
+{
+	pci_unregister_driver(&cb710_driver);
+	ida_destroy(&cb710_ida);
+}
+
+module_init(cb710_init_module);
+module_exit(cb710_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cb710_pci_tbl);
diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
new file mode 100644
index 00000000000..9da11bc620a
--- /dev/null
+++ b/drivers/misc/cb710/debug.c
@@ -0,0 +1,119 @@
+/*
+ *  cb710/debug.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cb710.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define CB710_REG_COUNT		0x80
+
+static const u16 allow[CB710_REG_COUNT/16] = {
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+};
+static const char *const prefix[ARRAY_SIZE(allow)] = {
+	"MMC", "MMC", "MMC", "MMC",
+	"MS?", "MS?", "SM?", "SM?"
+};
+
+static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits)
+{
+	unsigned mask = (1 << bits/8) - 1;
+	offset *= bits/8;
+	return ((allow[block] >> offset) & mask) == mask;
+}
+
+#define CB710_READ_REGS_TEMPLATE(t)					\
+static void cb710_read_regs_##t(void __iomem *iobase,			\
+	u##t *reg, unsigned select)					\
+{									\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!select & (1 << i))					\
+			continue;					\
+									\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			if (!allow_reg_read(i, j, t))			\
+				continue;				\
+			reg[j] = ioread##t(iobase			\
+				+ (i << 4) + (j * (t/8)));		\
+		}							\
+	}								\
+}
+
+static const char cb710_regf_8[] = "%02X";
+static const char cb710_regf_16[] = "%04X";
+static const char cb710_regf_32[] = "%08X";
+static const char cb710_xes[] = "xxxxxxxx";
+
+#define CB710_DUMP_REGS_TEMPLATE(t)					\
+static void cb710_dump_regs_##t(struct device *dev,			\
+	const u##t *reg, unsigned select)				\
+{									\
+	const char *const xp = &cb710_xes[8 - t/4];			\
+	const char *const format = cb710_regf_##t;			\
+									\
+	char msg[100], *p;						\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))				\
+			continue;					\
+		p = msg;						\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			*p++ = ' ';					\
+			if (j == 8/(t/8))				\
+				*p++ = ' ';				\
+			if (allow_reg_read(i, j, t))			\
+				p += sprintf(p, format, reg[j]);	\
+			else						\
+				p += sprintf(p, "%s", xp);		\
+		}							\
+		dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg);	\
+	}								\
+}
+
+#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t)				\
+static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip,	\
+	unsigned select)						\
+{									\
+	u##t regs[CB710_REG_COUNT/sizeof(u##t)];			\
+									\
+	memset(&regs, 0, sizeof(regs));					\
+	cb710_read_regs_##t(chip->iobase, regs, select);		\
+	cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select);	\
+}
+
+#define CB710_REG_ACCESS_TEMPLATES(t)		\
+  CB710_READ_REGS_TEMPLATE(t)			\
+  CB710_DUMP_REGS_TEMPLATE(t)			\
+  CB710_READ_AND_DUMP_REGS_TEMPLATE(t)
+
+CB710_REG_ACCESS_TEMPLATES(8)
+CB710_REG_ACCESS_TEMPLATES(16)
+CB710_REG_ACCESS_TEMPLATES(32)
+
+void cb710_dump_regs(struct cb710_chip *chip, unsigned select)
+{
+	if (!(select & CB710_DUMP_REGS_MASK))
+		select = CB710_DUMP_REGS_ALL;
+	if (!(select & CB710_DUMP_ACCESS_MASK))
+		select |= CB710_DUMP_ACCESS_8;
+
+	if (select & CB710_DUMP_ACCESS_32)
+		cb710_read_and_dump_regs_32(chip, select);
+	if (select & CB710_DUMP_ACCESS_16)
+		cb710_read_and_dump_regs_16(chip, select);
+	if (select & CB710_DUMP_ACCESS_8)
+		cb710_read_and_dump_regs_8(chip, select);
+}
+EXPORT_SYMBOL_GPL(cb710_dump_regs);
+
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
new file mode 100644
index 00000000000..d38a7acdb6e
--- /dev/null
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -0,0 +1,150 @@
+/*
+ *  cb710/sgbuf2.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cb710.h>
+
+static bool sg_dwiter_next(struct sg_mapping_iter *miter)
+{
+	if (sg_miter_next(miter)) {
+		miter->consumed = 0;
+		return true;
+	} else
+		return false;
+}
+
+static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter)
+{
+	return miter->length == miter->consumed && !sg_dwiter_next(miter);
+}
+
+static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
+{
+	size_t len, left = 4;
+	uint32_t data;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(addr, miter->addr + miter->consumed, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return data;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+
+	memset(addr, 0, left);
+	return data;
+}
+
+static inline bool needs_unaligned_copy(const void *ptr)
+{
+#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return false;
+#else
+	return ((ptr - NULL) & 3) != 0;
+#endif
+}
+
+static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr)
+{
+	size_t len;
+
+	if (sg_dwiter_is_at_end(miter))
+		return true;
+
+	len = miter->length - miter->consumed;
+
+	if (likely(len >= 4 && !needs_unaligned_copy(
+			miter->addr + miter->consumed))) {
+		*ptr = miter->addr + miter->consumed;
+		miter->consumed += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer
+ * @miter: sg mapping iterator used for reading
+ *
+ * Description:
+ *   Returns 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes past the buffer's end
+ *   are not accessed (read) but are returned as zeroes.  @miter@
+ *   is advanced by 4 bytes or to the end of buffer whichever is
+ *   closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ *
+ * Returns:
+ *   32-bit word just read.
+ */
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr)))
+		return ptr ? *ptr : 0;
+
+	return sg_dwiter_read_buffer(miter);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block);
+
+static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
+{
+	size_t len, left = 4;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(miter->addr, addr, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return;
+		addr += len;
+		flush_kernel_dcache_page(miter->page);
+	} while (sg_dwiter_next(miter));
+}
+
+/**
+ * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer
+ * @miter: sg mapping iterator used for writing
+ *
+ * Description:
+ *   Writes 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes which would be written
+ *   past the buffer's end are silently discarded. @miter@ is
+ *   advanced by 4 bytes or to the end of buffer whichever is closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ */
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr))) {
+		if (ptr)
+			*ptr = data;
+		else
+			return;
+	} else
+		sg_dwiter_write_slow(miter, data);
+
+	if (miter->length == miter->consumed)
+		flush_kernel_dcache_page(miter->page);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
+
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index b4cf691f3f6..75337e442da 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -241,3 +241,16 @@ config MMC_TMIO
 	help
 	  This provides support for the SD/MMC cell found in TC6393XB,
 	  T7L66XB and also ipaq ASIC3
+
+config MMC_CB710
+	tristate "ENE CB710 MMC/SD Interface support"
+	depends on PCI
+	select CB710_CORE
+	help
+	  This option enables support for MMC/SD part of ENE CB710/720 Flash
+	  memory card reader found in some laptops (ie. some versions of
+	  HP Compaq nx9500).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710-mmc.
+
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 970a997620e..ce9c8b6f0f4 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -29,4 +29,5 @@ endif
 obj-$(CONFIG_MMC_S3C)   	+= s3cmci.o
 obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
+obj-$(CONFIG_MMC_CB710)	+= cb710-mmc.o
 
diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
new file mode 100644
index 00000000000..bc048aae720
--- /dev/null
+++ b/drivers/mmc/host/cb710-mmc.c
@@ -0,0 +1,804 @@
+/*
+ *  cb710/mmc.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "cb710-mmc.h"
+
+static const u8 cb710_clock_divider_log2[8] = {
+/*	1, 2, 4, 8, 16, 32, 128, 512 */
+	0, 1, 2, 3,  4,  5,   7,   9
+};
+#define CB710_MAX_DIVIDER_IDX	\
+	(ARRAY_SIZE(cb710_clock_divider_log2) - 1)
+
+static const u8 cb710_src_freq_mhz[16] = {
+	33, 10, 20, 25, 30, 35, 40, 45,
+	50, 55, 60, 65, 70, 75, 80, 85
+};
+
+static void cb710_mmc_set_clock(struct mmc_host *mmc, int hz)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct pci_dev *pdev = cb710_slot_to_chip(slot)->pdev;
+	u32 src_freq_idx;
+	u32 divider_idx;
+	int src_hz;
+
+	/* this is magic, unverifiable for me, unless I get
+	 * MMC card with cables connected to bus signals */
+	pci_read_config_dword(pdev, 0x48, &src_freq_idx);
+	src_freq_idx = (src_freq_idx >> 16) & 0xF;
+	src_hz = cb710_src_freq_mhz[src_freq_idx] * 1000000;
+
+	for (divider_idx = 0; divider_idx < CB710_MAX_DIVIDER_IDX; ++divider_idx) {
+		if (hz >= src_hz >> cb710_clock_divider_log2[divider_idx])
+			break;
+	}
+
+	if (src_freq_idx)
+		divider_idx |= 0x8;
+
+	cb710_pci_update_config_reg(pdev, 0x40, ~0xF0000000, divider_idx << 28);
+
+	dev_dbg(cb710_slot_dev(slot),
+		"clock set to %d Hz, wanted %d Hz; flag = %d\n",
+		src_hz >> cb710_clock_divider_log2[divider_idx & 7],
+		hz, (divider_idx & 8) != 0);
+}
+
+static void __cb710_mmc_enable_irq(struct cb710_slot *slot,
+	unsigned short enable, unsigned short mask)
+{
+	/* clear global IE
+	 * - it gets set later if any interrupt sources are enabled */
+	mask |= CB710_MMC_IE_IRQ_ENABLE;
+
+	/* look like interrupt is fired whenever
+	 * WORD[0x0C] & WORD[0x10] != 0;
+	 * -> bit 15 port 0x0C seems to be global interrupt enable
+	 */
+
+	enable = (cb710_read_port_16(slot, CB710_MMC_IRQ_ENABLE_PORT)
+		& ~mask) | enable;
+
+	if (enable)
+		enable |= CB710_MMC_IE_IRQ_ENABLE;
+
+	cb710_write_port_16(slot, CB710_MMC_IRQ_ENABLE_PORT, enable);
+}
+
+static void cb710_mmc_enable_irq(struct cb710_slot *slot,
+	unsigned short enable, unsigned short mask)
+{
+	struct cb710_mmc_reader *reader = mmc_priv(cb710_slot_to_mmc(slot));
+	unsigned long flags;
+
+	spin_lock_irqsave(&reader->irq_lock, flags);
+	/* this is the only thing irq_lock protects */
+	__cb710_mmc_enable_irq(slot, enable, mask);
+	spin_unlock_irqrestore(&reader->irq_lock, flags);
+}
+
+static void cb710_mmc_reset_events(struct cb710_slot *slot)
+{
+	cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT, 0xFF);
+	cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT, 0xFF);
+	cb710_write_port_8(slot, CB710_MMC_STATUS2_PORT, 0xFF);
+}
+
+static int cb710_mmc_is_card_inserted(struct cb710_slot *slot)
+{
+	return cb710_read_port_8(slot, CB710_MMC_STATUS3_PORT)
+		& CB710_MMC_S3_CARD_DETECTED;
+}
+
+static void cb710_mmc_enable_4bit_data(struct cb710_slot *slot, int enable)
+{
+	dev_dbg(cb710_slot_dev(slot), "configuring %d-data-line%s mode\n",
+		enable ? 4 : 1, enable ? "s" : "");
+	if (enable)
+		cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT,
+			CB710_MMC_C1_4BIT_DATA_BUS, 0);
+	else
+		cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT,
+			0, CB710_MMC_C1_4BIT_DATA_BUS);
+}
+
+static int cb710_check_event(struct cb710_slot *slot, u8 what)
+{
+	u16 status;
+
+	status = cb710_read_port_16(slot, CB710_MMC_STATUS_PORT);
+
+	if (status & CB710_MMC_S0_FIFO_UNDERFLOW) {
+		/* it is just a guess, so log it */
+		dev_dbg(cb710_slot_dev(slot),
+			"CHECK : ignoring bit 6 in status %04X\n", status);
+		cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT,
+			CB710_MMC_S0_FIFO_UNDERFLOW);
+		status &= ~CB710_MMC_S0_FIFO_UNDERFLOW;
+	}
+
+	if (status & CB710_MMC_STATUS_ERROR_EVENTS) {
+		dev_dbg(cb710_slot_dev(slot),
+			"CHECK : returning EIO on status %04X\n", status);
+		cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT, status & 0xFF);
+		cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT,
+			CB710_MMC_S1_RESET);
+		return -EIO;
+	}
+
+	/* 'what' is a bit in MMC_STATUS1 */
+	if ((status >> 8) & what) {
+		cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT, what);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int cb710_wait_for_event(struct cb710_slot *slot, u8 what)
+{
+	int err = 0;
+	unsigned limit = 2000000;	/* FIXME: real timeout */
+
+#ifdef CONFIG_CB710_DEBUG
+	u32 e, x;
+	e = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+#endif
+
+	while (!(err = cb710_check_event(slot, what))) {
+		if (!--limit) {
+			cb710_dump_regs(cb710_slot_to_chip(slot),
+				CB710_DUMP_REGS_MMC);
+			err = -ETIMEDOUT;
+			break;
+		}
+		udelay(1);
+	}
+
+#ifdef CONFIG_CB710_DEBUG
+	x = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+
+	limit = 2000000 - limit;
+	if (limit > 100)
+		dev_dbg(cb710_slot_dev(slot),
+			"WAIT10: waited %d loops, what %d, entry val %08X, exit val %08X\n",
+			limit, what, e, x);
+#endif
+	return err < 0 ? err : 0;
+}
+
+
+static int cb710_wait_while_busy(struct cb710_slot *slot, uint8_t mask)
+{
+	unsigned limit = 500000;	/* FIXME: real timeout */
+	int err = 0;
+
+#ifdef CONFIG_CB710_DEBUG
+	u32 e, x;
+	e = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+#endif
+
+	while (cb710_read_port_8(slot, CB710_MMC_STATUS2_PORT) & mask) {
+		if (!--limit) {
+			cb710_dump_regs(cb710_slot_to_chip(slot),
+				CB710_DUMP_REGS_MMC);
+			err = -ETIMEDOUT;
+			break;
+		}
+		udelay(1);
+	}
+
+#ifdef CONFIG_CB710_DEBUG
+	x = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+
+	limit = 500000 - limit;
+	if (limit > 100)
+		dev_dbg(cb710_slot_dev(slot),
+			"WAIT12: waited %d loops, mask %02X, entry val %08X, exit val %08X\n",
+			limit, mask, e, x);
+#endif
+	return 0;
+}
+
+static void cb710_mmc_set_transfer_size(struct cb710_slot *slot,
+	size_t count, size_t blocksize)
+{
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	cb710_write_port_32(slot, CB710_MMC_TRANSFER_SIZE_PORT,
+		((count - 1) << 16)|(blocksize - 1));
+
+	dev_vdbg(cb710_slot_dev(slot), "set up for %d block%s of %d bytes\n",
+		count, count == 1 ? "" : "s", blocksize);
+}
+
+static void cb710_mmc_fifo_hack(struct cb710_slot *slot)
+{
+	/* without this, received data is prepended with 8-bytes of zeroes */
+	u32 r1, r2;
+	int ok = 0;
+
+	r1 = cb710_read_port_32(slot, CB710_MMC_DATA_PORT);
+	r2 = cb710_read_port_32(slot, CB710_MMC_DATA_PORT);
+	if (cb710_read_port_8(slot, CB710_MMC_STATUS0_PORT)
+	    & CB710_MMC_S0_FIFO_UNDERFLOW) {
+		cb710_write_port_8(slot, CB710_MMC_STATUS0_PORT,
+			CB710_MMC_S0_FIFO_UNDERFLOW);
+		ok = 1;
+	}
+
+	dev_dbg(cb710_slot_dev(slot),
+		"FIFO-read-hack: expected STATUS0 bit was %s\n",
+		ok ? "set." : "NOT SET!");
+	dev_dbg(cb710_slot_dev(slot),
+		"FIFO-read-hack: dwords ignored: %08X %08X - %s\n",
+		r1, r2, (r1|r2) ? "BAD (NOT ZERO)!" : "ok");
+}
+
+static int cb710_mmc_receive_pio(struct cb710_slot *slot,
+	struct sg_mapping_iter *miter, size_t dw_count)
+{
+	if (!(cb710_read_port_8(slot, CB710_MMC_STATUS2_PORT) & CB710_MMC_S2_FIFO_READY)) {
+		int err = cb710_wait_for_event(slot,
+			CB710_MMC_S1_PIO_TRANSFER_DONE);
+		if (err)
+			return err;
+	}
+
+	cb710_sg_dwiter_write_from_io(miter,
+		slot->iobase + CB710_MMC_DATA_PORT, dw_count);
+
+	return 0;
+}
+
+static bool cb710_is_transfer_size_supported(struct mmc_data *data)
+{
+	return !(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8));
+}
+
+static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data)
+{
+	struct sg_mapping_iter miter;
+	size_t len, blocks = data->blocks;
+	int err = 0;
+
+	/* TODO: I don't know how/if the hardware handles non-16B-boundary blocks
+	 * except single 8B block */
+	if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8)))
+		return -EINVAL;
+
+	sg_miter_start(&miter, data->sg, data->sg_len, 0);
+
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
+		15, CB710_MMC_C2_READ_PIO_SIZE_MASK);
+
+	cb710_mmc_fifo_hack(slot);
+
+	while (blocks-- > 0) {
+		len = data->blksz;
+
+		while (len >= 16) {
+			err = cb710_mmc_receive_pio(slot, &miter, 4);
+			if (err)
+				goto out;
+			len -= 16;
+		}
+
+		if (!len)
+			continue;
+
+		cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
+			len - 1, CB710_MMC_C2_READ_PIO_SIZE_MASK);
+
+		len = (len >= 8) ? 4 : 2;
+		err = cb710_mmc_receive_pio(slot, &miter, len);
+		if (err)
+			goto out;
+	}
+out:
+	cb710_sg_miter_stop_writing(&miter);
+	return err;
+}
+
+static int cb710_mmc_send(struct cb710_slot *slot, struct mmc_data *data)
+{
+	struct sg_mapping_iter miter;
+	size_t len, blocks = data->blocks;
+	int err = 0;
+
+	/* TODO: I don't know how/if the hardware handles multiple
+	 * non-16B-boundary blocks */
+	if (unlikely(data->blocks > 1 && data->blksz & 15))
+		return -EINVAL;
+
+	sg_miter_start(&miter, data->sg, data->sg_len, 0);
+
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
+		0, CB710_MMC_C2_READ_PIO_SIZE_MASK);
+
+	while (blocks-- > 0) {
+		len = (data->blksz + 15) >> 4;
+		do {
+			if (!(cb710_read_port_8(slot, CB710_MMC_STATUS2_PORT)
+			    & CB710_MMC_S2_FIFO_EMPTY)) {
+				err = cb710_wait_for_event(slot,
+					CB710_MMC_S1_PIO_TRANSFER_DONE);
+				if (err)
+					goto out;
+			}
+			cb710_sg_dwiter_read_to_io(&miter,
+				slot->iobase + CB710_MMC_DATA_PORT, 4);
+		} while (--len);
+	}
+out:
+	sg_miter_stop(&miter);
+	return err;
+}
+
+static u16 cb710_encode_cmd_flags(struct cb710_mmc_reader *reader,
+	struct mmc_command *cmd)
+{
+	unsigned int flags = cmd->flags;
+	u16 cb_flags = 0;
+
+	/* Windows driver returned 0 for commands for which no response
+	 * is expected. It happened that there were only two such commands
+	 * used: MMC_GO_IDLE_STATE and MMC_GO_INACTIVE_STATE so it might
+	 * as well be a bug in that driver.
+	 *
+	 * Original driver set bit 14 for MMC/SD application
+	 * commands. There's no difference 'on the wire' and
+	 * it apparently works without it anyway.
+	 */
+
+	switch (flags & MMC_CMD_MASK) {
+	case MMC_CMD_AC:	cb_flags = CB710_MMC_CMD_AC;	break;
+	case MMC_CMD_ADTC:	cb_flags = CB710_MMC_CMD_ADTC;	break;
+	case MMC_CMD_BC:	cb_flags = CB710_MMC_CMD_BC;	break;
+	case MMC_CMD_BCR:	cb_flags = CB710_MMC_CMD_BCR;	break;
+	}
+
+	if (flags & MMC_RSP_BUSY)
+		cb_flags |= CB710_MMC_RSP_BUSY;
+
+	cb_flags |= cmd->opcode << CB710_MMC_CMD_CODE_SHIFT;
+
+	if (cmd->data && (cmd->data->flags & MMC_DATA_READ))
+		cb_flags |= CB710_MMC_DATA_READ;
+
+	if (flags & MMC_RSP_PRESENT) {
+		/* Windows driver set 01 at bits 4,3 except for
+		 * MMC_SET_BLOCKLEN where it set 10. Maybe the
+		 * hardware can do something special about this
+		 * command? The original driver looks buggy/incomplete
+		 * anyway so we ignore this for now.
+		 *
+		 * I assume that 00 here means no response is expected.
+		 */
+		cb_flags |= CB710_MMC_RSP_PRESENT;
+
+		if (flags & MMC_RSP_136)
+			cb_flags |= CB710_MMC_RSP_136;
+		if (!(flags & MMC_RSP_CRC))
+			cb_flags |= CB710_MMC_RSP_NO_CRC;
+	}
+
+	return cb_flags;
+}
+
+static void cb710_receive_response(struct cb710_slot *slot,
+	struct mmc_command *cmd)
+{
+	unsigned rsp_opcode, wanted_opcode;
+
+	/* Looks like final byte with CRC is always stripped (same as SDHCI) */
+	if (cmd->flags & MMC_RSP_136) {
+		u32 resp[4];
+
+		resp[0] = cb710_read_port_32(slot, CB710_MMC_RESPONSE3_PORT);
+		resp[1] = cb710_read_port_32(slot, CB710_MMC_RESPONSE2_PORT);
+		resp[2] = cb710_read_port_32(slot, CB710_MMC_RESPONSE1_PORT);
+		resp[3] = cb710_read_port_32(slot, CB710_MMC_RESPONSE0_PORT);
+		rsp_opcode = resp[0] >> 24;
+
+		cmd->resp[0] = (resp[0] << 8)|(resp[1] >> 24);
+		cmd->resp[1] = (resp[1] << 8)|(resp[2] >> 24);
+		cmd->resp[2] = (resp[2] << 8)|(resp[3] >> 24);
+		cmd->resp[3] = (resp[3] << 8);
+	} else {
+		rsp_opcode = cb710_read_port_32(slot, CB710_MMC_RESPONSE1_PORT) & 0x3F;
+		cmd->resp[0] = cb710_read_port_32(slot, CB710_MMC_RESPONSE0_PORT);
+	}
+
+	wanted_opcode = (cmd->flags & MMC_RSP_OPCODE) ? cmd->opcode : 0x3F;
+	if (rsp_opcode != wanted_opcode)
+		cmd->error = -EILSEQ;
+}
+
+static int cb710_mmc_transfer_data(struct cb710_slot *slot,
+	struct mmc_data *data)
+{
+	int error, to;
+
+	if (data->flags & MMC_DATA_READ)
+		error = cb710_mmc_receive(slot, data);
+	else
+		error = cb710_mmc_send(slot, data);
+
+	to = cb710_wait_for_event(slot, CB710_MMC_S1_DATA_TRANSFER_DONE);
+	if (!error)
+		error = to;
+
+	if (!error)
+		data->bytes_xfered = data->blksz * data->blocks;
+	return error;
+}
+
+static int cb710_mmc_command(struct mmc_host *mmc, struct mmc_command *cmd)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	struct mmc_data *data = cmd->data;
+
+	u16 cb_cmd = cb710_encode_cmd_flags(reader, cmd);
+	dev_dbg(cb710_slot_dev(slot), "cmd request: 0x%04X\n", cb_cmd);
+
+	if (data) {
+		if (!cb710_is_transfer_size_supported(data)) {
+			data->error = -EINVAL;
+			return -1;
+		}
+		cb710_mmc_set_transfer_size(slot, data->blocks, data->blksz);
+	}
+
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20|CB710_MMC_S2_BUSY_10);
+	cb710_write_port_16(slot, CB710_MMC_CMD_TYPE_PORT, cb_cmd);
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	cb710_write_port_32(slot, CB710_MMC_CMD_PARAM_PORT, cmd->arg);
+	cb710_mmc_reset_events(slot);
+	cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG0_PORT, 0x01, 0);
+
+	cmd->error = cb710_wait_for_event(slot, CB710_MMC_S1_COMMAND_SENT);
+	if (cmd->error)
+		return -1;
+
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		cb710_receive_response(slot, cmd);
+		if (cmd->error)
+			return -1;
+	}
+
+	if (data)
+		data->error = cb710_mmc_transfer_data(slot, data);
+	return 0;
+}
+
+static void cb710_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+
+	WARN_ON(reader->mrq != NULL);
+
+	reader->mrq = mrq;
+	cb710_mmc_enable_irq(slot, CB710_MMC_IE_TEST_MASK, 0);
+
+	if (cb710_mmc_is_card_inserted(slot)) {
+		if (!cb710_mmc_command(mmc, mrq->cmd) && mrq->stop)
+			cb710_mmc_command(mmc, mrq->stop);
+		mdelay(1);
+	} else {
+		mrq->cmd->error = -ENOMEDIUM;
+	}
+
+	tasklet_schedule(&reader->finish_req_tasklet);
+}
+
+static int cb710_mmc_powerup(struct cb710_slot *slot)
+{
+#ifdef CONFIG_CB710_DEBUG
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+#endif
+	int err;
+
+	/* a lot of magic; see comment in cb710_mmc_set_clock() */
+	dev_dbg(cb710_slot_dev(slot), "bus powerup\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0x80, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG3_PORT, 0x80, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	mdelay(1);
+	dev_dbg(cb710_slot_dev(slot), "after delay 1\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0x09, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	mdelay(1);
+	dev_dbg(cb710_slot_dev(slot), "after delay 2\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0, 0x08);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	mdelay(2);
+	dev_dbg(cb710_slot_dev(slot), "after delay 3\n");
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG0_PORT, 0x06, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0x70, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT, 0x80, 0);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG3_PORT, 0x03, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	err = cb710_wait_while_busy(slot, CB710_MMC_S2_BUSY_20);
+	if (unlikely(err))
+		return err;
+	/* This port behaves weird: quick byte reads of 0x08,0x09 return
+	 * 0xFF,0x00 after writing 0xFFFF to 0x08; it works correctly when
+	 * read/written from userspace...  What am I missing here?
+	 * (it doesn't depend on write-to-read delay) */
+	cb710_write_port_16(slot, CB710_MMC_CONFIGB_PORT, 0xFFFF);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG0_PORT, 0x06, 0);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+	dev_dbg(cb710_slot_dev(slot), "bus powerup finished\n");
+
+	return cb710_check_event(slot, 0);
+}
+
+static void cb710_mmc_powerdown(struct cb710_slot *slot)
+{
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG1_PORT, 0, 0x81);
+	cb710_modify_port_8(slot, CB710_MMC_CONFIG3_PORT, 0, 0x80);
+}
+
+static void cb710_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	int err;
+
+	cb710_mmc_set_clock(mmc, ios->clock);
+
+	if (!cb710_mmc_is_card_inserted(slot)) {
+		dev_dbg(cb710_slot_dev(slot),
+			"no card inserted - ignoring bus powerup request\n");
+		ios->power_mode = MMC_POWER_OFF;
+	}
+
+	if (ios->power_mode != reader->last_power_mode)
+	switch (ios->power_mode) {
+	case MMC_POWER_ON:
+		err = cb710_mmc_powerup(slot);
+		if (err) {
+			dev_warn(cb710_slot_dev(slot),
+				"powerup failed (%d)- retrying\n", err);
+			cb710_mmc_powerdown(slot);
+			udelay(1);
+			err = cb710_mmc_powerup(slot);
+			if (err)
+				dev_warn(cb710_slot_dev(slot),
+					"powerup retry failed (%d) - expect errors\n",
+					err);
+		}
+		reader->last_power_mode = MMC_POWER_ON;
+		break;
+	case MMC_POWER_OFF:
+		cb710_mmc_powerdown(slot);
+		reader->last_power_mode = MMC_POWER_OFF;
+		break;
+	case MMC_POWER_UP:
+	default:
+		/* ignore */;
+	}
+
+	cb710_mmc_enable_4bit_data(slot, ios->bus_width != MMC_BUS_WIDTH_1);
+
+	cb710_mmc_enable_irq(slot, CB710_MMC_IE_TEST_MASK, 0);
+}
+
+static int cb710_mmc_get_ro(struct mmc_host *mmc)
+{
+	struct cb710_slot *slot = cb710_mmc_to_slot(mmc);
+
+	return cb710_read_port_8(slot, CB710_MMC_STATUS3_PORT)
+		& CB710_MMC_S3_WRITE_PROTECTED;
+}
+
+static int cb710_mmc_irq_handler(struct cb710_slot *slot)
+{
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	u32 status, config1, config2, irqen;
+
+	status = cb710_read_port_32(slot, CB710_MMC_STATUS_PORT);
+	irqen = cb710_read_port_32(slot, CB710_MMC_IRQ_ENABLE_PORT);
+	config2 = cb710_read_port_32(slot, CB710_MMC_CONFIGB_PORT);
+	config1 = cb710_read_port_32(slot, CB710_MMC_CONFIG_PORT);
+
+	dev_dbg(cb710_slot_dev(slot), "interrupt; status: %08X, "
+		"ie: %08X, c2: %08X, c1: %08X\n",
+		status, irqen, config2, config1);
+
+	if (status & (CB710_MMC_S1_CARD_CHANGED << 8)) {
+		/* ack the event */
+		cb710_write_port_8(slot, CB710_MMC_STATUS1_PORT,
+			CB710_MMC_S1_CARD_CHANGED);
+		if ((irqen & CB710_MMC_IE_CISTATUS_MASK)
+		    == CB710_MMC_IE_CISTATUS_MASK)
+			mmc_detect_change(mmc, HZ/5);
+	} else {
+		dev_dbg(cb710_slot_dev(slot), "unknown interrupt (test)\n");
+		spin_lock(&reader->irq_lock);
+		__cb710_mmc_enable_irq(slot, 0, CB710_MMC_IE_TEST_MASK);
+		spin_unlock(&reader->irq_lock);
+	}
+
+	return 1;
+}
+
+static void cb710_mmc_finish_request_tasklet(unsigned long data)
+{
+	struct mmc_host *mmc = (void *)data;
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+	struct mmc_request *mrq = reader->mrq;
+
+	reader->mrq = NULL;
+	mmc_request_done(mmc, mrq);
+}
+
+static const struct mmc_host_ops cb710_mmc_host = {
+	.request = cb710_mmc_request,
+	.set_ios = cb710_mmc_set_ios,
+	.get_ro = cb710_mmc_get_ro
+};
+
+#ifdef CONFIG_PM
+
+static int cb710_mmc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+	int err;
+
+	err = mmc_suspend_host(mmc, state);
+	if (err)
+		return err;
+
+	cb710_mmc_enable_irq(slot, 0, ~0);
+	return 0;
+}
+
+static int cb710_mmc_resume(struct platform_device *pdev)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+
+	cb710_mmc_enable_irq(slot, 0, ~0);
+
+	return mmc_resume_host(mmc);
+}
+
+#endif /* CONFIG_PM */
+
+static int __devinit cb710_mmc_init(struct platform_device *pdev)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	struct mmc_host *mmc;
+	struct cb710_mmc_reader *reader;
+	int err;
+	u32 val;
+
+	mmc = mmc_alloc_host(sizeof(*reader), cb710_slot_dev(slot));
+	if (!mmc)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, mmc);
+
+	/* harmless (maybe) magic */
+	pci_read_config_dword(chip->pdev, 0x48, &val);
+	val = cb710_src_freq_mhz[(val >> 16) & 0xF];
+	dev_dbg(cb710_slot_dev(slot), "source frequency: %dMHz\n", val);
+	val *= 1000000;
+
+	mmc->ops = &cb710_mmc_host;
+	mmc->f_max = val;
+	mmc->f_min = val >> cb710_clock_divider_log2[CB710_MAX_DIVIDER_IDX];
+	mmc->ocr_avail = MMC_VDD_32_33|MMC_VDD_33_34;
+	mmc->caps = MMC_CAP_4_BIT_DATA;
+
+	reader = mmc_priv(mmc);
+
+	tasklet_init(&reader->finish_req_tasklet,
+		cb710_mmc_finish_request_tasklet, (unsigned long)mmc);
+	spin_lock_init(&reader->irq_lock);
+	cb710_dump_regs(chip, CB710_DUMP_REGS_MMC);
+
+	cb710_mmc_enable_irq(slot, 0, ~0);
+	cb710_set_irq_handler(slot, cb710_mmc_irq_handler);
+
+	err = mmc_add_host(mmc);
+	if (unlikely(err))
+		goto err_free_mmc;
+
+	dev_dbg(cb710_slot_dev(slot), "mmc_hostname is %s\n",
+		mmc_hostname(mmc));
+
+	cb710_mmc_enable_irq(slot, CB710_MMC_IE_CARD_INSERTION_STATUS, 0);
+
+	return 0;
+
+err_free_mmc:
+	dev_dbg(cb710_slot_dev(slot), "mmc_add_host() failed: %d\n", err);
+
+	mmc_free_host(mmc);
+	return err;
+}
+
+static int __devexit cb710_mmc_exit(struct platform_device *pdev)
+{
+	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
+	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
+	struct cb710_mmc_reader *reader = mmc_priv(mmc);
+
+	cb710_mmc_enable_irq(slot, 0, CB710_MMC_IE_CARD_INSERTION_STATUS);
+
+	mmc_remove_host(mmc);
+
+	/* IRQs should be disabled now, but let's stay on the safe side */
+	cb710_mmc_enable_irq(slot, 0, ~0);
+	cb710_set_irq_handler(slot, NULL);
+
+	/* clear config ports - just in case */
+	cb710_write_port_32(slot, CB710_MMC_CONFIG_PORT, 0);
+	cb710_write_port_16(slot, CB710_MMC_CONFIGB_PORT, 0);
+
+	tasklet_kill(&reader->finish_req_tasklet);
+
+	mmc_free_host(mmc);
+	return 0;
+}
+
+static struct platform_driver cb710_mmc_driver = {
+	.driver.name = "cb710-mmc",
+	.probe = cb710_mmc_init,
+	.remove = __devexit_p(cb710_mmc_exit),
+#ifdef CONFIG_PM
+	.suspend = cb710_mmc_suspend,
+	.resume = cb710_mmc_resume,
+#endif
+};
+
+static int __init cb710_mmc_init_module(void)
+{
+	return platform_driver_register(&cb710_mmc_driver);
+}
+
+static void __exit cb710_mmc_cleanup_module(void)
+{
+	platform_driver_unregister(&cb710_mmc_driver);
+}
+
+module_init(cb710_mmc_init_module);
+module_exit(cb710_mmc_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver - MMC/SD part");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cb710-mmc");
diff --git a/drivers/mmc/host/cb710-mmc.h b/drivers/mmc/host/cb710-mmc.h
new file mode 100644
index 00000000000..e845c776bdd
--- /dev/null
+++ b/drivers/mmc/host/cb710-mmc.h
@@ -0,0 +1,104 @@
+/*
+ *  cb710/cb710-mmc.h
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_CB710_MMC_H
+#define LINUX_CB710_MMC_H
+
+#include <linux/cb710.h>
+
+/* per-MMC-reader structure */
+struct cb710_mmc_reader {
+	struct tasklet_struct finish_req_tasklet;
+	struct mmc_request *mrq;
+	spinlock_t irq_lock;
+	unsigned char last_power_mode;
+};
+
+/* some device struct walking */
+
+static inline struct mmc_host *cb710_slot_to_mmc(struct cb710_slot *slot)
+{
+	return dev_get_drvdata(&slot->pdev.dev);
+}
+
+static inline struct cb710_slot *cb710_mmc_to_slot(struct mmc_host *mmc)
+{
+	struct platform_device *pdev = container_of(mmc_dev(mmc),
+		struct platform_device, dev);
+	return cb710_pdev_to_slot(pdev);
+}
+
+/* registers (this might be all wrong ;) */
+
+#define CB710_MMC_DATA_PORT		0x00
+
+#define CB710_MMC_CONFIG_PORT		0x04
+#define CB710_MMC_CONFIG0_PORT		0x04
+#define CB710_MMC_CONFIG1_PORT		0x05
+#define   CB710_MMC_C1_4BIT_DATA_BUS		0x40
+#define CB710_MMC_CONFIG2_PORT		0x06
+#define   CB710_MMC_C2_READ_PIO_SIZE_MASK	0x0F	/* N-1 */
+#define CB710_MMC_CONFIG3_PORT		0x07
+
+#define CB710_MMC_CONFIGB_PORT		0x08
+
+#define CB710_MMC_IRQ_ENABLE_PORT	0x0C
+#define   CB710_MMC_IE_TEST_MASK		0x00BF
+#define   CB710_MMC_IE_CARD_INSERTION_STATUS	0x1000
+#define   CB710_MMC_IE_IRQ_ENABLE		0x8000
+#define   CB710_MMC_IE_CISTATUS_MASK		\
+		(CB710_MMC_IE_CARD_INSERTION_STATUS|CB710_MMC_IE_IRQ_ENABLE)
+
+#define CB710_MMC_STATUS_PORT		0x10
+#define   CB710_MMC_STATUS_ERROR_EVENTS		0x60FF
+#define CB710_MMC_STATUS0_PORT		0x10
+#define   CB710_MMC_S0_FIFO_UNDERFLOW		0x40
+#define CB710_MMC_STATUS1_PORT		0x11
+#define   CB710_MMC_S1_COMMAND_SENT		0x01
+#define   CB710_MMC_S1_DATA_TRANSFER_DONE	0x02
+#define   CB710_MMC_S1_PIO_TRANSFER_DONE	0x04
+#define   CB710_MMC_S1_CARD_CHANGED		0x10
+#define   CB710_MMC_S1_RESET			0x20
+#define CB710_MMC_STATUS2_PORT		0x12
+#define   CB710_MMC_S2_FIFO_READY		0x01
+#define   CB710_MMC_S2_FIFO_EMPTY		0x02
+#define   CB710_MMC_S2_BUSY_10			0x10
+#define   CB710_MMC_S2_BUSY_20			0x20
+#define CB710_MMC_STATUS3_PORT		0x13
+#define   CB710_MMC_S3_CARD_DETECTED		0x02
+#define   CB710_MMC_S3_WRITE_PROTECTED		0x04
+
+#define CB710_MMC_CMD_TYPE_PORT		0x14
+#define   CB710_MMC_RSP_TYPE_MASK		0x0007
+#define     CB710_MMC_RSP_R1			(0)
+#define     CB710_MMC_RSP_136			(5)
+#define     CB710_MMC_RSP_NO_CRC		(2)
+#define   CB710_MMC_RSP_PRESENT_MASK		0x0018
+#define     CB710_MMC_RSP_NONE			(0 << 3)
+#define     CB710_MMC_RSP_PRESENT		(1 << 3)
+#define     CB710_MMC_RSP_PRESENT_X		(2 << 3)
+#define   CB710_MMC_CMD_TYPE_MASK		0x0060
+#define     CB710_MMC_CMD_BC			(0 << 5)
+#define     CB710_MMC_CMD_BCR			(1 << 5)
+#define     CB710_MMC_CMD_AC			(2 << 5)
+#define     CB710_MMC_CMD_ADTC			(3 << 5)
+#define   CB710_MMC_DATA_READ			0x0080
+#define   CB710_MMC_CMD_CODE_MASK		0x3F00
+#define   CB710_MMC_CMD_CODE_SHIFT		8
+#define   CB710_MMC_IS_APP_CMD			0x4000
+#define   CB710_MMC_RSP_BUSY			0x8000
+
+#define CB710_MMC_CMD_PARAM_PORT	0x18
+#define CB710_MMC_TRANSFER_SIZE_PORT	0x1C
+#define CB710_MMC_RESPONSE0_PORT	0x20
+#define CB710_MMC_RESPONSE1_PORT	0x24
+#define CB710_MMC_RESPONSE2_PORT	0x28
+#define CB710_MMC_RESPONSE3_PORT	0x2C
+
+#endif /* LINUX_CB710_MMC_H */
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
new file mode 100644
index 00000000000..c3819e1ce1c
--- /dev/null
+++ b/include/linux/cb710.h
@@ -0,0 +1,238 @@
+/*
+ *  cb710/cb710.h
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_CB710_DRIVER_H
+#define LINUX_CB710_DRIVER_H
+
+#ifdef CONFIG_CB710_DEBUG
+#define DEBUG
+#endif
+
+/* verify assumptions on platform_device framework */
+#define CONFIG_CB710_DEBUG_ASSUMPTIONS
+
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/mmc/host.h>
+
+struct cb710_slot;
+
+typedef int (*cb710_irq_handler_t)(struct cb710_slot *);
+
+/* per-virtual-slot structure */
+struct cb710_slot {
+	struct platform_device	pdev;
+	void __iomem		*iobase;
+	cb710_irq_handler_t	irq_handler;
+};
+
+/* per-device structure */
+struct cb710_chip {
+	struct pci_dev		*pdev;
+	void __iomem		*iobase;
+	unsigned		platform_id;
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_t		slot_refs_count;
+#endif
+	unsigned		slot_mask;
+	unsigned		slots;
+	spinlock_t		irq_lock;
+	struct cb710_slot	slot[0];
+};
+
+/* NOTE: cb710_chip.slots is modified only during device init/exit and
+ * they are all serialized wrt themselves */
+
+/* cb710_chip.slot_mask values */
+#define CB710_SLOT_MMC		1
+#define CB710_SLOT_MS		2
+#define CB710_SLOT_SM		4
+
+/* slot port accessors - so the logic is more clear in the code */
+#define CB710_PORT_ACCESSORS(t) \
+static inline void cb710_write_port_##t(struct cb710_slot *slot,	\
+	unsigned port, u##t value)					\
+{									\
+	iowrite##t(value, slot->iobase + port);				\
+}									\
+									\
+static inline u##t cb710_read_port_##t(struct cb710_slot *slot,		\
+	unsigned port)							\
+{									\
+	return ioread##t(slot->iobase + port);				\
+}									\
+									\
+static inline void cb710_modify_port_##t(struct cb710_slot *slot,	\
+	unsigned port, u##t set, u##t clear)				\
+{									\
+	iowrite##t(							\
+		(ioread##t(slot->iobase + port) & ~clear)|set,		\
+		slot->iobase + port);					\
+}
+
+CB710_PORT_ACCESSORS(8)
+CB710_PORT_ACCESSORS(16)
+CB710_PORT_ACCESSORS(32)
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t and, uint32_t xor);
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler);
+
+/* some device struct walking */
+
+static inline struct cb710_slot *cb710_pdev_to_slot(
+	struct platform_device *pdev)
+{
+	return container_of(pdev, struct cb710_slot, pdev);
+}
+
+static inline struct cb710_chip *cb710_slot_to_chip(struct cb710_slot *slot)
+{
+	return dev_get_drvdata(slot->pdev.dev.parent);
+}
+
+static inline struct device *cb710_slot_dev(struct cb710_slot *slot)
+{
+	return &slot->pdev.dev;
+}
+
+static inline struct device *cb710_chip_dev(struct cb710_chip *chip)
+{
+	return &chip->pdev->dev;
+}
+
+/* debugging aids */
+
+#ifdef CONFIG_CB710_DEBUG
+void cb710_dump_regs(struct cb710_chip *chip, unsigned dump);
+#else
+#define cb710_dump_regs(c, d) do {} while (0)
+#endif
+
+#define CB710_DUMP_REGS_MMC	0x0F
+#define CB710_DUMP_REGS_MS	0x30
+#define CB710_DUMP_REGS_SM	0xC0
+#define CB710_DUMP_REGS_ALL	0xFF
+#define CB710_DUMP_REGS_MASK	0xFF
+
+#define CB710_DUMP_ACCESS_8	0x100
+#define CB710_DUMP_ACCESS_16	0x200
+#define CB710_DUMP_ACCESS_32	0x400
+#define CB710_DUMP_ACCESS_ALL	0x700
+#define CB710_DUMP_ACCESS_MASK	0x700
+
+#endif /* LINUX_CB710_DRIVER_H */
+/*
+ *  cb710/sgbuf2.h
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef LINUX_CB710_SG_H
+#define LINUX_CB710_SG_H
+
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+
+/**
+ * cb710_sg_miter_stop_writing - stop mapping iteration after writing
+ * @miter: sg mapping iter to be stopped
+ *
+ * Description:
+ *   Stops mapping iterator @miter.  @miter should have been started
+ *   started using sg_miter_start().  A stopped iteration can be
+ *   resumed by calling sg_miter_next() on it.  This is useful when
+ *   resources (kmap) need to be released during iteration.
+ *
+ *   This is a convenience wrapper that will be optimized out for arches
+ *   that don't need flush_kernel_dcache_page().
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter)
+{
+	if (miter->page)
+		flush_kernel_dcache_page(miter->page);
+	sg_miter_stop(miter);
+}
+
+/*
+ * 32-bit PIO mapping sg iterator
+ *
+ * Hides scatterlist access issues - fragment boundaries, alignment, page
+ * mapping - for drivers using 32-bit-word-at-a-time-PIO (ie. PCI devices
+ * without DMA support).
+ *
+ * Best-case reading (transfer from device):
+ *   sg_miter_start();
+ *   cb710_sg_dwiter_write_from_io();
+ *   cb710_sg_miter_stop_writing();
+ *
+ * Best-case writing (transfer to device):
+ *   sg_miter_start();
+ *   cb710_sg_dwiter_read_to_io();
+ *   sg_miter_stop();
+ */
+
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter);
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data);
+
+/**
+ * cb710_sg_dwiter_write_from_io - transfer data to mapped buffer from 32-bit IO port
+ * @miter: sg mapping iter
+ * @port: PIO port - IO or MMIO address
+ * @count: number of 32-bit words to transfer
+ *
+ * Description:
+ *   Reads @count 32-bit words from register @port and stores it in
+ *   buffer iterated by @miter.  Data that would overflow the buffer
+ *   is silently ignored.  Iterator is advanced by 4*@count bytes
+ *   or to the buffer's end whichever is closer.
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+static inline void cb710_sg_dwiter_write_from_io(struct sg_mapping_iter *miter,
+	void __iomem *port, size_t count)
+{
+	while (count-- > 0)
+		cb710_sg_dwiter_write_next_block(miter, ioread32(port));
+}
+
+/**
+ * cb710_sg_dwiter_read_to_io - transfer data to 32-bit IO port from mapped buffer
+ * @miter: sg mapping iter
+ * @port: PIO port - IO or MMIO address
+ * @count: number of 32-bit words to transfer
+ *
+ * Description:
+ *   Writes @count 32-bit words to register @port from buffer iterated
+ *   through @miter.  If buffer ends before @count words are written
+ *   missing data is replaced by zeroes. @miter is advanced by 4*@count
+ *   bytes or to the buffer's end whichever is closer.
+ *
+ * Context:
+ *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
+ */
+static inline void cb710_sg_dwiter_read_to_io(struct sg_mapping_iter *miter,
+	void __iomem *port, size_t count)
+{
+	while (count-- > 0)
+		iowrite32(cb710_sg_dwiter_read_next_block(miter), port);
+}
+
+#endif /* LINUX_CB710_SG_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 19f8e6d1a4d..9f36e1cdbf0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2127,6 +2127,7 @@
 #define PCI_VENDOR_ID_MAINPINE		0x1522
 #define PCI_DEVICE_ID_MAINPINE_PBRIDGE	0x0100
 #define PCI_VENDOR_ID_ENE		0x1524
+#define PCI_DEVICE_ID_ENE_CB710_FLASH	0x0510
 #define PCI_DEVICE_ID_ENE_CB712_SD	0x0550
 #define PCI_DEVICE_ID_ENE_CB712_SD_2	0x0551
 #define PCI_DEVICE_ID_ENE_CB714_SD	0x0750
-- 
cgit v1.2.3-70-g09d2


From 9bf69a26ad9ccdc49469402275204271b3336ab6 Mon Sep 17 00:00:00 2001
From: Pierre Ossman <pierre@ossman.eu>
Date: Thu, 4 Jun 2009 08:00:40 +0200
Subject: cb710: handle DEBUG define in Makefile

Signed-off-by: Pierre Ossman <pierre@ossman.eu>
---
 drivers/misc/cb710/Makefile | 4 ++++
 include/linux/cb710.h       | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
index 62d51acfeca..7b80cbf1a60 100644
--- a/drivers/misc/cb710/Makefile
+++ b/drivers/misc/cb710/Makefile
@@ -1,3 +1,7 @@
+ifeq ($(CONFIG_CB710_DEBUG),y)
+	EXTRA_CFLAGS		+= -DDEBUG
+endif
+
 obj-$(CONFIG_CB710_CORE)	+= cb710.o
 
 cb710-y				:= core.o sgbuf2.o
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
index c3819e1ce1c..a09213b5261 100644
--- a/include/linux/cb710.h
+++ b/include/linux/cb710.h
@@ -10,10 +10,6 @@
 #ifndef LINUX_CB710_DRIVER_H
 #define LINUX_CB710_DRIVER_H
 
-#ifdef CONFIG_CB710_DEBUG
-#define DEBUG
-#endif
-
 /* verify assumptions on platform_device framework */
 #define CONFIG_CB710_DEBUG_ASSUMPTIONS
 
-- 
cgit v1.2.3-70-g09d2


From c54f6bc67a4398243682f7438a2129906e127d21 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 13 Jun 2009 12:37:59 +0200
Subject: cb710: more cleanup for the DEBUG case.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>
---
 drivers/misc/cb710/Kconfig | 4 ++++
 drivers/mmc/host/Makefile  | 3 +++
 include/linux/cb710.h      | 3 ---
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
index 0b55079774c..22429b8b106 100644
--- a/drivers/misc/cb710/Kconfig
+++ b/drivers/misc/cb710/Kconfig
@@ -19,3 +19,7 @@ config CB710_DEBUG
 	  This is an option for use by developers; most people should
 	  say N here.  This adds a lot of debugging output to dmesg.
 
+config CB710_DEBUG_ASSUMPTIONS
+	bool
+	depends on CB710_CORE != n
+	default y
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 254e441291d..79da397c5fe 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -32,3 +32,6 @@ obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
 obj-$(CONFIG_MMC_CB710)	+= cb710-mmc.o
 
+ifeq ($(CONFIG_CB710_DEBUG),y)
+	CFLAGS-cb710-mmc	+= -DDEBUG
+endif
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
index a09213b5261..63bc9a4d292 100644
--- a/include/linux/cb710.h
+++ b/include/linux/cb710.h
@@ -10,9 +10,6 @@
 #ifndef LINUX_CB710_DRIVER_H
 #define LINUX_CB710_DRIVER_H
 
-/* verify assumptions on platform_device framework */
-#define CONFIG_CB710_DEBUG_ASSUMPTIONS
-
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-- 
cgit v1.2.3-70-g09d2


From f0e46cc4971f6be96010d9248e0fc076b229d989 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 4 Jun 2009 20:12:31 +0200
Subject: MFD,mmc: tmio_mmc: make HCLK configurable

The Toshiba parts all have a 24 MHz HCLK, but HTC ASIC3 has a 24.576 MHz HCLK
and AMD Imageon w228x's HCLK is 80 MHz. With this patch, the MFD driver
provides the HCLK frequency to tmio_mmc via mfd_cell->driver_data.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Ian Molton <ian@mnementh.co.uk>
Acked-by: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>
---
 drivers/mfd/t7l66xb.c       |  5 +++++
 drivers/mfd/tc6387xb.c      |  5 +++++
 drivers/mfd/tc6393xb.c      |  5 +++++
 drivers/mmc/host/tmio_mmc.c | 24 +++++++++++-------------
 include/linux/mfd/tmio.h    |  7 +++++++
 5 files changed, 33 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index e9f4323dd2c..875f7a87573 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -108,6 +108,10 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 
 /*--------------------------------------------------------------------------*/
 
+static const struct tmio_mmc_data t7166xb_mmc_data = {
+	.hclk = 24000000,
+};
+
 static const struct resource t7l66xb_mmc_resources[] = {
 	{
 		.start = 0x800,
@@ -149,6 +153,7 @@ static struct mfd_cell t7l66xb_cells[] = {
 		.name = "tmio-mmc",
 		.enable = t7l66xb_mmc_enable,
 		.disable = t7l66xb_mmc_disable,
+		.driver_data = &t7166xb_mmc_data,
 		.num_resources = ARRAY_SIZE(t7l66xb_mmc_resources),
 		.resources = t7l66xb_mmc_resources,
 	},
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 43222c12fec..c3993ac2054 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -75,6 +75,10 @@ static int tc6387xb_mmc_disable(struct platform_device *mmc)
 
 /*--------------------------------------------------------------------------*/
 
+const static struct tmio_mmc_data tc6387xb_mmc_data = {
+	.hclk = 24000000,
+};
+
 static struct resource tc6387xb_mmc_resources[] = {
 	{
 		.start = 0x800,
@@ -98,6 +102,7 @@ static struct mfd_cell tc6387xb_cells[] = {
 		.name = "tmio-mmc",
 		.enable = tc6387xb_mmc_enable,
 		.disable = tc6387xb_mmc_disable,
+		.driver_data = &tc6387xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6387xb_mmc_resources),
 		.resources = tc6387xb_mmc_resources,
 	},
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 77a12fc8045..9d2abb5d6e2 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,6 +136,10 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
+const static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+};
+
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -351,6 +355,7 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
 	},
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 63fbd5b7d31..49df71e6be1 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -35,23 +35,14 @@
 
 #include "tmio_mmc.h"
 
-/*
- * Fixme - documentation conflicts on what the clock values are for the
- * various dividers.
- * One document I have says that its a divisor of a 24MHz clock, another 33.
- * This probably depends on HCLK for a given platform, so we may need to
- * require HCLK be passed to us from the MFD core.
- *
- */
-
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 {
 	void __iomem *cnf = host->cnf;
 	void __iomem *ctl = host->ctl;
-	u32 clk = 0, clock;
+	u32 clk = 0, clock, f_min = host->mmc->f_min;
 
 	if (new_clock) {
-		for (clock = 46875, clk = 0x100; new_clock >= (clock<<1); ) {
+		for (clock = f_min, clk = 0x100; new_clock >= (clock<<1); ) {
 			clock <<= 1;
 			clk >>= 1;
 		}
@@ -545,6 +536,7 @@ out:
 static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
+	struct tmio_mmc_data *pdata;
 	struct resource *res_ctl, *res_cnf;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
@@ -560,6 +552,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 		goto out;
 	}
 
+	pdata = cell->driver_data;
+	if (!pdata || !pdata->hclk) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	mmc = mmc_alloc_host(sizeof(struct tmio_mmc_host), &dev->dev);
 	if (!mmc)
 		goto out;
@@ -578,8 +576,8 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
-	mmc->f_min = 46875; /* 24000000 / 512 */
-	mmc->f_max = 24000000;
+	mmc->f_max = pdata->hclk;
+	mmc->f_min = mmc->f_max / 512;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	/* Enable the MMC/SD Control registers */
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 516d955ab8a..c377118884e 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -18,6 +18,13 @@
 	writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
+/*
+ * data for the MMC controller
+ */
+struct tmio_mmc_data {
+	unsigned int		hclk;
+};
+
 /*
  * data for the NAND controller
  */
-- 
cgit v1.2.3-70-g09d2


From ab33dcff40d7a9a28587e4425621e4cbc4089e03 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 13 Jun 2009 20:01:00 -0700
Subject: genirq, irq.h: Fix kernel-doc warnings

Fix kernel-doc warnings in linux/irq.h:

  Warning(include/linux/irq.h:201): No description found for parameter 'node'
  Warning(include/linux/irq.h:201): Excess struct/union/enum/typedef member 'cpu' description in 'irq_desc'
  Warning(include/linux/irq.h:434): No description found for parameter 'node'
  Warning(include/linux/irq.h:434): Excess function parameter 'cpu' description in 'alloc_desc_masks'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
LKML-Reference: <4A3467EC.50006@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b7cbeed972e..0c001c15752 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -157,7 +157,7 @@ struct irq_2_iommu;
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
  * @affinity:		IRQ affinity on SMP
- * @cpu:		cpu index useful for balancing
+ * @node:		node index useful for balancing
  * @pending_mask:	pending rebalanced interrupts
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
@@ -426,7 +426,7 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 /**
  * init_alloc_desc_masks - allocate cpumasks for irq_desc
  * @desc:	pointer to irq_desc struct
- * @cpu:	cpu which will be handling the cpumasks
+ * @node:	node which will be handling the cpumasks
  * @boot:	true if need bootmem
  *
  * Allocates affinity and pending_mask cpumask if required.
-- 
cgit v1.2.3-70-g09d2


From 837ec787d85fda8d73193a399ebcea0288e4765b Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 9 Jun 2009 23:56:55 +0200
Subject: firewire: core: don't update Broadcast_Channel if RFC 2734 conditions
 aren't met

This extra check will avoid Broadcast_Channel register related traffic
to many IIDC, SBP-2, and AV/C devices which aren't IRMC or have a
max_rec < 8 (i.e. support < 512 bytes async payload).  This avoids a
little bit of traffic after bus reset and is even more careful with
devices which don't implement this CSR.

The assumption is that no other protocol than IP over 1394 uses the
broadcast channel for streams.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c | 18 +++++++++++++++++-
 include/linux/firewire.h       |  2 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index d6e54a5173f..97e656af2d2 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -580,7 +580,9 @@ static int read_bus_info_block(struct fw_device *device, int generation)
 
 	kfree(old_rom);
 	ret = 0;
-	device->cmc = rom[2] >> 30 & 1;
+	device->max_rec	= rom[2] >> 12 & 0xf;
+	device->cmc	= rom[2] >> 30 & 1;
+	device->irmc	= rom[2] >> 31 & 1;
  out:
 	kfree(rom);
 
@@ -841,6 +843,20 @@ static void set_broadcast_channel(struct fw_device *device, int generation)
 	if (!card->broadcast_channel_allocated)
 		return;
 
+	/*
+	 * The Broadcast_Channel Valid bit is required by nodes which want to
+	 * transmit on this channel.  Such transmissions are practically
+	 * exclusive to IP over 1394 (RFC 2734).  IP capable nodes are required
+	 * to be IRM capable and have a max_rec of 8 or more.  We use this fact
+	 * to narrow down to which nodes we send Broadcast_Channel updates.
+	 */
+	if (!device->irmc || device->max_rec < 8)
+		return;
+
+	/*
+	 * Some 1394-1995 nodes crash if this 1394a-2000 register is written.
+	 * Perform a read test first.
+	 */
 	if (device->bc_implemented == BC_UNKNOWN) {
 		rcode = fw_run_transaction(card, TCODE_READ_QUADLET_REQUEST,
 				device->node_id, generation, device->max_speed,
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index a69aea0394e..610eade8abb 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -191,7 +191,9 @@ struct fw_device {
 	size_t config_rom_length;
 	int config_rom_retries;
 	unsigned is_local:1;
+	unsigned max_rec:4;
 	unsigned cmc:1;
+	unsigned irmc:1;
 	unsigned bc_implemented:2;
 
 	struct delayed_work work;
-- 
cgit v1.2.3-70-g09d2


From 1e626fdcef61460dc75fe7377f38bb019722b848 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 14 Jun 2009 13:23:58 +0200
Subject: firewire: core: use more outbound tlabels

Tlabel is a 6 bits wide datum.  Wrap it after 63 rather than 31 for more
safety against transaction label exhaustion and potential responders'
transaction layer bugs.  (As noted by Guus Sliepen, this change requires
an expansion of tlabel_mask to 64 bits.)

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-transaction.c | 8 ++++----
 include/linux/firewire.h            | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 9a6ce9ab2a6..479b22f5a1e 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -82,7 +82,7 @@ static int close_transaction(struct fw_transaction *transaction,
 	list_for_each_entry(t, &card->transaction_list, link) {
 		if (t == transaction) {
 			list_del(&t->link);
-			card->tlabel_mask &= ~(1 << t->tlabel);
+			card->tlabel_mask &= ~(1ULL << t->tlabel);
 			break;
 		}
 	}
@@ -288,14 +288,14 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 	spin_lock_irqsave(&card->lock, flags);
 
 	tlabel = card->current_tlabel;
-	if (card->tlabel_mask & (1 << tlabel)) {
+	if (card->tlabel_mask & (1ULL << tlabel)) {
 		spin_unlock_irqrestore(&card->lock, flags);
 		callback(card, RCODE_SEND_ERROR, NULL, 0, callback_data);
 		return;
 	}
 
-	card->current_tlabel = (card->current_tlabel + 1) & 0x1f;
-	card->tlabel_mask |= (1 << tlabel);
+	card->current_tlabel = (card->current_tlabel + 1) & 0x3f;
+	card->tlabel_mask |= (1ULL << tlabel);
 
 	t->node_id = destination_id;
 	t->tlabel = tlabel;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 610eade8abb..e584b7215e8 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -98,7 +98,8 @@ struct fw_card {
 
 	int node_id;
 	int generation;
-	int current_tlabel, tlabel_mask;
+	int current_tlabel;
+	u64 tlabel_mask;
 	struct list_head transaction_list;
 	struct timer_list flush_timer;
 	unsigned long reset_jiffies;
-- 
cgit v1.2.3-70-g09d2


From c76acec6d55107b652a37c90b36c00bc8b04dabb Mon Sep 17 00:00:00 2001
From: Jay Fenlason <fenlason@redhat.com>
Date: Mon, 18 May 2009 13:08:06 -0400
Subject: firewire: add IPv4 support

Implement IPv4 over IEEE 1394 as per RFC 2734 for the newer firewire
stack.  This feature has only been present in the older ieee1394 stack
via the eth1394 driver.

Still to do:
  - fix ipv4_priv and ipv4_node lifetime logic
  - fix determination of speeds and max payloads
  - fix bus reset handling
  - fix unaligned memory accesses
  - fix coding style
  - further testing/ improvement of fragment reassembly
  - perhaps multicast support

Signed-off-by: Jay Fenlason <fenlason@redhat.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (rebased, copyright note, changelog)
---
 drivers/firewire/Makefile    |    2 +
 drivers/firewire/core-card.c |    4 +
 drivers/firewire/core-iso.c  |    7 +
 drivers/firewire/core.h      |   87 --
 drivers/firewire/fw-ipv4.c   | 1819 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/firewire.h     |   94 +++
 6 files changed, 1926 insertions(+), 87 deletions(-)
 create mode 100644 drivers/firewire/fw-ipv4.c

(limited to 'include')

diff --git a/drivers/firewire/Makefile b/drivers/firewire/Makefile
index bc3b9bf822b..31edf30c558 100644
--- a/drivers/firewire/Makefile
+++ b/drivers/firewire/Makefile
@@ -6,7 +6,9 @@ firewire-core-y += core-card.o core-cdev.o core-device.o \
                    core-iso.o core-topology.o core-transaction.o
 firewire-ohci-y += ohci.o
 firewire-sbp2-y += sbp2.o
+firewire-ipv4-y += fw-ipv4.o
 
 obj-$(CONFIG_FIREWIRE) += firewire-core.o
 obj-$(CONFIG_FIREWIRE_OHCI) += firewire-ohci.o
 obj-$(CONFIG_FIREWIRE_SBP2) += firewire-sbp2.o
+obj-$(CONFIG_FIREWIRE_IPV4) += firewire-ipv4.o
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 4c1be64fddd..cdab32b2067 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -176,6 +176,7 @@ int fw_core_add_descriptor(struct fw_descriptor *desc)
 
 	return 0;
 }
+EXPORT_SYMBOL(fw_core_add_descriptor);
 
 void fw_core_remove_descriptor(struct fw_descriptor *desc)
 {
@@ -189,6 +190,7 @@ void fw_core_remove_descriptor(struct fw_descriptor *desc)
 
 	mutex_unlock(&card_mutex);
 }
+EXPORT_SYMBOL(fw_core_remove_descriptor);
 
 static void allocate_broadcast_channel(struct fw_card *card, int generation)
 {
@@ -427,6 +429,8 @@ void fw_card_initialize(struct fw_card *card,
 	card->local_node = NULL;
 
 	INIT_DELAYED_WORK(&card->work, fw_card_bm_work);
+	card->netdev = NULL;
+	INIT_LIST_HEAD(&card->ipv4_nodes);
 }
 EXPORT_SYMBOL(fw_card_initialize);
 
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 28076c892d7..448ddd7d887 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -80,6 +80,7 @@ int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
 
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(fw_iso_buffer_init);
 
 int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma)
 {
@@ -114,6 +115,7 @@ void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer,
 	kfree(buffer->pages);
 	buffer->pages = NULL;
 }
+EXPORT_SYMBOL(fw_iso_buffer_destroy);
 
 struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
 		int type, int channel, int speed, size_t header_size,
@@ -136,6 +138,7 @@ struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
 
 	return ctx;
 }
+EXPORT_SYMBOL(fw_iso_context_create);
 
 void fw_iso_context_destroy(struct fw_iso_context *ctx)
 {
@@ -143,12 +146,14 @@ void fw_iso_context_destroy(struct fw_iso_context *ctx)
 
 	card->driver->free_iso_context(ctx);
 }
+EXPORT_SYMBOL(fw_iso_context_destroy);
 
 int fw_iso_context_start(struct fw_iso_context *ctx,
 			 int cycle, int sync, int tags)
 {
 	return ctx->card->driver->start_iso(ctx, cycle, sync, tags);
 }
+EXPORT_SYMBOL(fw_iso_context_start);
 
 int fw_iso_context_queue(struct fw_iso_context *ctx,
 			 struct fw_iso_packet *packet,
@@ -159,11 +164,13 @@ int fw_iso_context_queue(struct fw_iso_context *ctx,
 
 	return card->driver->queue_iso(ctx, packet, buffer, payload);
 }
+EXPORT_SYMBOL(fw_iso_context_queue);
 
 int fw_iso_context_stop(struct fw_iso_context *ctx)
 {
 	return ctx->card->driver->stop_iso(ctx);
 }
+EXPORT_SYMBOL(fw_iso_context_stop);
 
 /*
  * Isochronous bus resource management (channels, bandwidth), client side
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 0a25a7b38a8..c3cfc647e5e 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -1,7 +1,6 @@
 #ifndef _FIREWIRE_CORE_H
 #define _FIREWIRE_CORE_H
 
-#include <linux/dma-mapping.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/idr.h>
@@ -97,17 +96,6 @@ int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
 int fw_compute_block_crc(u32 *block);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
-struct fw_descriptor {
-	struct list_head link;
-	size_t length;
-	u32 immediate;
-	u32 key;
-	const u32 *data;
-};
-
-int fw_core_add_descriptor(struct fw_descriptor *desc);
-void fw_core_remove_descriptor(struct fw_descriptor *desc);
-
 
 /* -cdev */
 
@@ -130,77 +118,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event);
 
 /* -iso */
 
-/*
- * The iso packet format allows for an immediate header/payload part
- * stored in 'header' immediately after the packet info plus an
- * indirect payload part that is pointer to by the 'payload' field.
- * Applications can use one or the other or both to implement simple
- * low-bandwidth streaming (e.g. audio) or more advanced
- * scatter-gather streaming (e.g. assembling video frame automatically).
- */
-struct fw_iso_packet {
-	u16 payload_length;	/* Length of indirect payload. */
-	u32 interrupt:1;	/* Generate interrupt on this packet */
-	u32 skip:1;		/* Set to not send packet at all. */
-	u32 tag:2;
-	u32 sy:4;
-	u32 header_length:8;	/* Length of immediate header. */
-	u32 header[0];
-};
-
-#define FW_ISO_CONTEXT_TRANSMIT	0
-#define FW_ISO_CONTEXT_RECEIVE	1
-
-#define FW_ISO_CONTEXT_MATCH_TAG0	 1
-#define FW_ISO_CONTEXT_MATCH_TAG1	 2
-#define FW_ISO_CONTEXT_MATCH_TAG2	 4
-#define FW_ISO_CONTEXT_MATCH_TAG3	 8
-#define FW_ISO_CONTEXT_MATCH_ALL_TAGS	15
-
-/*
- * An iso buffer is just a set of pages mapped for DMA in the
- * specified direction.  Since the pages are to be used for DMA, they
- * are not mapped into the kernel virtual address space.  We store the
- * DMA address in the page private. The helper function
- * fw_iso_buffer_map() will map the pages into a given vma.
- */
-struct fw_iso_buffer {
-	enum dma_data_direction direction;
-	struct page **pages;
-	int page_count;
-};
-
-typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
-				  u32 cycle, size_t header_length,
-				  void *header, void *data);
-
-struct fw_iso_context {
-	struct fw_card *card;
-	int type;
-	int channel;
-	int speed;
-	size_t header_size;
-	fw_iso_callback_t callback;
-	void *callback_data;
-};
-
-int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
-		       int page_count, enum dma_data_direction direction);
 int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma);
-void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card);
-
-struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
-		int type, int channel, int speed, size_t header_size,
-		fw_iso_callback_t callback, void *callback_data);
-int fw_iso_context_queue(struct fw_iso_context *ctx,
-			 struct fw_iso_packet *packet,
-			 struct fw_iso_buffer *buffer,
-			 unsigned long payload);
-int fw_iso_context_start(struct fw_iso_context *ctx,
-			 int cycle, int sync, int tags);
-int fw_iso_context_stop(struct fw_iso_context *ctx);
-void fw_iso_context_destroy(struct fw_iso_context *ctx);
-
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 		u64 channels_mask, int *channel, int *bandwidth, bool allocate);
 
@@ -285,9 +203,4 @@ void fw_flush_transactions(struct fw_card *card);
 void fw_send_phy_config(struct fw_card *card,
 			int node_id, int generation, int gap_count);
 
-static inline int fw_stream_packet_destination_id(int tag, int channel, int sy)
-{
-	return tag << 14 | channel << 8 | sy;
-}
-
 #endif /* _FIREWIRE_CORE_H */
diff --git a/drivers/firewire/fw-ipv4.c b/drivers/firewire/fw-ipv4.c
new file mode 100644
index 00000000000..4de6dbb95f0
--- /dev/null
+++ b/drivers/firewire/fw-ipv4.c
@@ -0,0 +1,1819 @@
+/*
+ * IPv4 over IEEE 1394, per RFC 2734
+ *
+ * Copyright (C) 2009 Jay Fenlason <fenlason@redhat.com>
+ *
+ * based on eth1394 by Ben Collins et al
+ */
+
+#include <linux/device.h>
+#include <linux/ethtool.h>
+#include <linux/firewire.h>
+#include <linux/firewire-constants.h>
+#include <linux/highmem.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include <asm/unaligned.h>
+#include <net/arp.h>
+
+/* Things to potentially make runtime cofigurable */
+/* must be at least as large as our maximum receive size */
+#define FIFO_SIZE 4096
+/* Network timeout in glibbles */
+#define IPV4_TIMEOUT       100000
+
+/* Runitme configurable paramaters */
+static int ipv4_mpd = 25;
+static int ipv4_max_xmt = 0;
+/* 16k for receiving arp and broadcast packets.  Enough? */
+static int ipv4_iso_page_count = 4;
+
+MODULE_AUTHOR("Jay Fenlason (fenlason@redhat.com)");
+MODULE_DESCRIPTION("Firewire IPv4 Driver (IPv4-over-IEEE1394 as per RFC 2734)");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(ieee1394, ipv4_id_table);
+module_param_named(max_partial_datagrams, ipv4_mpd, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_partial_datagrams, "Maximum number of received"
+ " incomplete fragmented datagrams (default = 25).");
+
+/* Max xmt is useful for forcing fragmentation, which makes testing easier. */
+module_param_named(max_transmit, ipv4_max_xmt, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_transmit, "Maximum datagram size to transmit"
+ " (larger datagrams will be fragmented) (default = 0 (use hardware defaults).");
+
+/* iso page count controls how many pages will be used for receiving broadcast packets. */
+module_param_named(iso_pages, ipv4_iso_page_count, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(iso_pages, "Number of pages to use for receiving broadcast packets"
+ " (default = 4).");
+
+/* uncomment this line to do debugging */
+#define fw_debug(s, args...) printk(KERN_DEBUG KBUILD_MODNAME ": " s, ## args)
+
+/* comment out these lines to do debugging. */
+/* #undef fw_debug */
+/* #define fw_debug(s...) */
+/* #define print_hex_dump(l...) */
+
+/* Define a fake hardware header format for the networking core.  Note that
+ * header size cannot exceed 16 bytes as that is the size of the header cache.
+ * Also, we do not need the source address in the header so we omit it and
+ * keep the header to under 16 bytes */
+#define IPV4_ALEN (8)
+/* This must equal sizeof(struct ipv4_ether_hdr) */
+#define IPV4_HLEN (10)
+
+/* FIXME: what's a good size for this? */
+#define INVALID_FIFO_ADDR (u64)~0ULL
+
+/* Things specified by standards */
+#define BROADCAST_CHANNEL 31
+
+#define S100_BUFFER_SIZE 512
+#define MAX_BUFFER_SIZE 4096
+
+#define IPV4_GASP_SPECIFIER_ID	0x00005EU
+#define IPV4_GASP_VERSION	0x00000001U
+
+#define IPV4_GASP_OVERHEAD (2 * sizeof(u32)) /* for GASP header */
+
+#define IPV4_UNFRAG_HDR_SIZE	sizeof(u32)
+#define IPV4_FRAG_HDR_SIZE	(2 * sizeof(u32))
+#define IPV4_FRAG_OVERHEAD	sizeof(u32)
+
+#define ALL_NODES (0xffc0 | 0x003f)
+
+#define IPV4_HDR_UNFRAG		0	/* unfragmented		*/
+#define IPV4_HDR_FIRSTFRAG	1	/* first fragment	*/
+#define IPV4_HDR_LASTFRAG	2	/* last fragment	*/
+#define IPV4_HDR_INTFRAG	3	/* interior fragment	*/
+
+/* Our arp packet (ARPHRD_IEEE1394) */
+/* FIXME: note that this is probably bogus on weird-endian machines */
+struct ipv4_arp {
+	u16 hw_type;		/* 0x0018	*/
+	u16 proto_type;		/* 0x0806       */
+	u8 hw_addr_len;		/* 16		*/
+	u8 ip_addr_len;         /* 4		*/
+	u16 opcode;	        /* ARP Opcode	*/
+	/* Above is exactly the same format as struct arphdr */
+
+	u64 s_uniq_id;		/* Sender's 64bit EUI			*/
+	u8 max_rec;             /* Sender's max packet size		*/
+	u8 sspd;		/* Sender's max speed			*/
+	u16 fifo_hi;            /* hi 16bits of sender's FIFO addr	*/
+	u32 fifo_lo;            /* lo 32bits of sender's FIFO addr	*/
+	u32 sip;		/* Sender's IP Address			*/
+	u32 tip;		/* IP Address of requested hw addr	*/
+} __attribute__((packed));
+
+struct ipv4_ether_hdr {
+	unsigned char	h_dest[IPV4_ALEN];	/* destination address */
+	unsigned short  h_proto;                /* packet type ID field */
+}  __attribute__((packed));
+
+static inline struct ipv4_ether_hdr *ipv4_ether_hdr(const struct sk_buff *skb)
+{
+	return (struct ipv4_ether_hdr *)skb_mac_header(skb);
+}
+
+enum ipv4_tx_type {
+	IPV4_UNKNOWN = 0,
+	IPV4_GASP = 1,
+	IPV4_WRREQ = 2,
+};
+
+enum ipv4_broadcast_state {
+	IPV4_BROADCAST_ERROR,
+	IPV4_BROADCAST_RUNNING,
+	IPV4_BROADCAST_STOPPED,
+};
+
+#define ipv4_get_hdr_lf(h)		(((h)->w0&0xC0000000)>>30)
+#define ipv4_get_hdr_ether_type(h)	(((h)->w0&0x0000FFFF)    )
+#define ipv4_get_hdr_dg_size(h)		(((h)->w0&0x0FFF0000)>>16)
+#define ipv4_get_hdr_fg_off(h)		(((h)->w0&0x00000FFF)    )
+#define ipv4_get_hdr_dgl(h)		(((h)->w1&0xFFFF0000)>>16)
+
+#define ipv4_set_hdr_lf(lf)		(( lf)<<30)
+#define ipv4_set_hdr_ether_type(et)	(( et)    )
+#define ipv4_set_hdr_dg_size(dgs)	((dgs)<<16)
+#define ipv4_set_hdr_fg_off(fgo)	((fgo)    )
+
+#define ipv4_set_hdr_dgl(dgl)		((dgl)<<16)
+
+struct ipv4_hdr {
+	u32 w0;
+	u32 w1;
+};
+
+static inline void ipv4_make_uf_hdr( struct ipv4_hdr *hdr, unsigned ether_type) {
+	hdr->w0 = ipv4_set_hdr_lf(IPV4_HDR_UNFRAG)
+		   |ipv4_set_hdr_ether_type(ether_type);
+	fw_debug ( "Setting unfragmented header %p to %x\n", hdr, hdr->w0 );
+}
+
+static inline void ipv4_make_ff_hdr ( struct ipv4_hdr *hdr, unsigned ether_type, unsigned dg_size, unsigned dgl ) {
+	hdr->w0 = ipv4_set_hdr_lf(IPV4_HDR_FIRSTFRAG)
+		   |ipv4_set_hdr_dg_size(dg_size)
+		   |ipv4_set_hdr_ether_type(ether_type);
+	hdr->w1 = ipv4_set_hdr_dgl(dgl);
+	fw_debug ( "Setting fragmented header %p to first_frag %x,%x (et %x, dgs %x, dgl %x)\n", hdr, hdr->w0, hdr->w1,
+ ether_type, dg_size, dgl );
+}
+
+static inline void ipv4_make_sf_hdr ( struct ipv4_hdr *hdr, unsigned lf, unsigned dg_size, unsigned fg_off, unsigned dgl) {
+	hdr->w0 = ipv4_set_hdr_lf(lf)
+		 |ipv4_set_hdr_dg_size(dg_size)
+		 |ipv4_set_hdr_fg_off(fg_off);
+	hdr->w1 = ipv4_set_hdr_dgl(dgl);
+	fw_debug ( "Setting fragmented header %p to %x,%x (lf %x, dgs %x, fo %x dgl %x)\n",
+ hdr, hdr->w0, hdr->w1,
+ lf, dg_size, fg_off, dgl );
+}
+
+/* End of IP1394 headers */
+
+/* Fragment types */
+#define ETH1394_HDR_LF_UF	0	/* unfragmented		*/
+#define ETH1394_HDR_LF_FF	1	/* first fragment	*/
+#define ETH1394_HDR_LF_LF	2	/* last fragment	*/
+#define ETH1394_HDR_LF_IF	3	/* interior fragment	*/
+
+#define IP1394_HW_ADDR_LEN	16	/* As per RFC		*/
+
+/* This list keeps track of what parts of the datagram have been filled in */
+struct ipv4_fragment_info {
+        struct list_head fragment_info;
+	u16 offset;
+	u16 len;
+};
+
+struct ipv4_partial_datagram {
+	struct list_head pdg_list;
+	struct list_head fragment_info;
+	struct sk_buff *skb;
+	/* FIXME Why not use skb->data? */
+	char *pbuf;
+	u16 datagram_label;
+	u16 ether_type;
+	u16 datagram_size;
+};
+
+/*
+ * We keep one of these for each IPv4 capable device attached to a fw_card.
+ * The list of them is stored in the fw_card structure rather than in the
+ * ipv4_priv because the remote IPv4 nodes may be probed before the card is,
+ * so we need a place to store them before the ipv4_priv structure is
+ * allocated.
+ */
+struct ipv4_node {
+	struct list_head ipv4_nodes;
+	/* guid of the remote node */
+	u64 guid;
+	/* FIFO address to transmit datagrams to, or INVALID_FIFO_ADDR */
+	u64 fifo;
+
+	spinlock_t pdg_lock;	/* partial datagram lock		*/
+	/* List of partial datagrams received from this node */
+	struct list_head pdg_list;
+	/* Number of entries in pdg_list at the moment */
+	unsigned pdg_size;
+
+	/* max payload to transmit to this remote node */
+	/* This already includes the IPV4_FRAG_HDR_SIZE overhead */
+	u16 max_payload;
+	/* outgoing datagram label */
+	u16 datagram_label;
+	/* Current node_id of the remote node */
+	u16 nodeid;
+	/* current generation of the remote node */
+	u8 generation;
+	/* max speed that this node can receive at */
+	u8 xmt_speed;
+};
+
+struct ipv4_priv {
+	spinlock_t lock;
+
+	enum ipv4_broadcast_state broadcast_state;
+	struct fw_iso_context *broadcast_rcv_context;
+	struct fw_iso_buffer broadcast_rcv_buffer;
+	void **broadcast_rcv_buffer_ptrs;
+	unsigned broadcast_rcv_next_ptr;
+	unsigned num_broadcast_rcv_ptrs;
+	unsigned rcv_buffer_size;
+	/*
+	 * This value is the maximum unfragmented datagram size that can be
+	 * sent by the hardware.  It already has the GASP overhead and the
+	 * unfragmented datagram header overhead calculated into it.
+	 */
+	unsigned broadcast_xmt_max_payload;
+	u16 broadcast_xmt_datagramlabel;
+
+	/*
+	 * The csr address that remote nodes must send datagrams to for us to
+	 * receive them.
+	 */
+	struct fw_address_handler handler;
+	u64 local_fifo;
+
+	/* Wake up to xmt	 */
+        /* struct work_struct wake;*/
+	/* List of packets to be sent */
+	struct list_head packet_list;
+	/*
+	 * List of packets that were broadcasted.  When we get an ISO interrupt
+	 * one of them has been sent
+	 */
+	struct list_head broadcasted_list;
+	/* List of packets that have been sent but not yet acked */
+	struct list_head sent_list;
+
+	struct fw_card *card;
+};
+
+/* This is our task struct. It's used for the packet complete callback.  */
+struct ipv4_packet_task {
+	/*
+	 * ptask can actually be on priv->packet_list, priv->broadcasted_list,
+	 * or priv->sent_list depending on its current state.
+	 */
+	struct list_head packet_list;
+	struct fw_transaction transaction;
+	struct ipv4_hdr hdr;
+	struct sk_buff *skb;
+	struct ipv4_priv *priv;
+	enum ipv4_tx_type tx_type;
+	int outstanding_pkts;
+	unsigned max_payload;
+	u64 fifo_addr;
+	u16 dest_node;
+	u8 generation;
+	u8 speed;
+};
+
+static struct kmem_cache *ipv4_packet_task_cache;
+
+static const char ipv4_driver_name[] = "firewire-ipv4";
+
+static const struct ieee1394_device_id ipv4_id_table[] = {
+	{
+		.match_flags  = IEEE1394_MATCH_SPECIFIER_ID |
+				IEEE1394_MATCH_VERSION,
+		.specifier_id = IPV4_GASP_SPECIFIER_ID,
+		.version      = IPV4_GASP_VERSION,
+	},
+	{ }
+};
+
+static u32 ipv4_unit_directory_data[] = {
+	0x00040000,					/* unit directory */
+	0x12000000 | IPV4_GASP_SPECIFIER_ID,	/* specifier ID */
+	0x81000003,					/* text descriptor */
+	0x13000000 | IPV4_GASP_VERSION,		/* version */
+	0x81000005,					/* text descriptor */
+
+	0x00030000,					/* Three quadlets */
+	0x00000000,					/* Text */
+	0x00000000,					/* Language 0 */
+	0x49414e41,					/* I A N A */
+	0x00030000,					/* Three quadlets */
+	0x00000000,					/* Text */
+	0x00000000,					/* Language 0 */
+	0x49507634,					/* I P v 4 */
+};
+
+static struct fw_descriptor ipv4_unit_directory = {
+	.length = ARRAY_SIZE(ipv4_unit_directory_data),
+	.key = 0xd1000000,
+	.data = ipv4_unit_directory_data
+};
+
+static int ipv4_send_packet(struct ipv4_packet_task *ptask );
+
+/* ------------------------------------------------------------------ */
+/******************************************
+ * HW Header net device functions
+ ******************************************/
+  /* These functions have been adapted from net/ethernet/eth.c */
+
+/* Create a fake MAC header for an arbitrary protocol layer.
+ * saddr=NULL means use device source address
+ * daddr=NULL means leave destination address (eg unresolved arp). */
+
+static int ipv4_header ( struct sk_buff *skb, struct net_device *dev,
+		       unsigned short type, const void *daddr,
+		       const void *saddr, unsigned len) {
+	struct ipv4_ether_hdr *eth;
+
+	eth = (struct ipv4_ether_hdr *)skb_push(skb, sizeof(*eth));
+	eth->h_proto = htons(type);
+
+	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
+		memset(eth->h_dest, 0, dev->addr_len);
+		return dev->hard_header_len;
+	}
+
+	if (daddr) {
+		memcpy(eth->h_dest, daddr, dev->addr_len);
+		return dev->hard_header_len;
+	}
+
+	return -dev->hard_header_len;
+}
+
+/* Rebuild the faked MAC header. This is called after an ARP
+ * (or in future other address resolution) has completed on this
+ * sk_buff. We now let ARP fill in the other fields.
+ *
+ * This routine CANNOT use cached dst->neigh!
+ * Really, it is used only when dst->neigh is wrong.
+ */
+
+static int ipv4_rebuild_header(struct sk_buff *skb)
+{
+	struct ipv4_ether_hdr *eth;
+
+	eth = (struct ipv4_ether_hdr *)skb->data;
+	if (eth->h_proto == htons(ETH_P_IP))
+		return arp_find((unsigned char *)&eth->h_dest, skb);
+
+	fw_notify ( "%s: unable to resolve type %04x addresses\n",
+		   skb->dev->name,ntohs(eth->h_proto) );
+	return 0;
+}
+
+static int ipv4_header_cache(const struct neighbour *neigh, struct hh_cache *hh) {
+	unsigned short type = hh->hh_type;
+	struct net_device *dev;
+	struct ipv4_ether_hdr *eth;
+
+	if (type == htons(ETH_P_802_3))
+		return -1;
+	dev = neigh->dev;
+	eth = (struct ipv4_ether_hdr *)((u8 *)hh->hh_data + 16 - sizeof(*eth));
+	eth->h_proto = type;
+	memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+
+	hh->hh_len = IPV4_HLEN;
+	return 0;
+}
+
+/* Called by Address Resolution module to notify changes in address. */
+static void ipv4_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char * haddr ) {
+	memcpy((u8 *)hh->hh_data + 16 - IPV4_HLEN, haddr, dev->addr_len);
+}
+
+static int ipv4_header_parse(const struct sk_buff *skb, unsigned char *haddr) {
+	memcpy(haddr, skb->dev->dev_addr, IPV4_ALEN);
+	return IPV4_ALEN;
+}
+
+static const struct header_ops ipv4_header_ops = {
+	.create         = ipv4_header,
+	.rebuild        = ipv4_rebuild_header,
+	.cache		= ipv4_header_cache,
+	.cache_update	= ipv4_header_cache_update,
+	.parse          = ipv4_header_parse,
+};
+
+/* ------------------------------------------------------------------ */
+
+/* FIXME: is this correct for all cases? */
+static bool ipv4_frag_overlap(struct ipv4_partial_datagram *pd, unsigned offset, unsigned len)
+{
+        struct ipv4_fragment_info *fi;
+	unsigned end = offset + len;
+
+	list_for_each_entry(fi, &pd->fragment_info, fragment_info) {
+		if (offset < fi->offset + fi->len && end > fi->offset) {
+			fw_debug ( "frag_overlap pd %p fi %p (%x@%x) with %x@%x\n", pd, fi, fi->len, fi->offset, len, offset );
+			return true;
+		}
+	}
+	fw_debug ( "frag_overlap %p does not overlap with %x@%x\n", pd, len, offset );
+	return false;
+}
+
+/* Assumes that new fragment does not overlap any existing fragments */
+static struct ipv4_fragment_info *ipv4_frag_new ( struct ipv4_partial_datagram *pd, unsigned offset, unsigned len ) {
+	struct ipv4_fragment_info *fi, *fi2, *new;
+	struct list_head *list;
+
+	fw_debug ( "frag_new pd %p %x@%x\n", pd, len, offset );
+	list = &pd->fragment_info;
+	list_for_each_entry(fi, &pd->fragment_info, fragment_info) {
+		if (fi->offset + fi->len == offset) {
+			/* The new fragment can be tacked on to the end */
+			/* Did the new fragment plug a hole? */
+			fi2 = list_entry(fi->fragment_info.next, struct ipv4_fragment_info, fragment_info);
+			if (fi->offset + fi->len == fi2->offset) {
+				fw_debug ( "pd %p: hole filling %p (%x@%x) and %p(%x@%x): now %x@%x\n", pd, fi, fi->len, fi->offset,
+				fi2, fi2->len, fi2->offset, fi->len + len + fi2->len, fi->offset );
+				/* glue fragments together */
+				fi->len += len + fi2->len;
+				list_del(&fi2->fragment_info);
+				kfree(fi2);
+			} else {
+				fw_debug ( "pd %p: extending %p from %x@%x to %x@%x\n", pd, fi, fi->len, fi->offset, fi->len+len, fi->offset );
+				fi->len += len;
+			}
+			return fi;
+		}
+		if (offset + len == fi->offset) {
+			/* The new fragment can be tacked on to the beginning */
+			/* Did the new fragment plug a hole? */
+			fi2 = list_entry(fi->fragment_info.prev, struct ipv4_fragment_info, fragment_info);
+			if (fi2->offset + fi2->len == fi->offset) {
+				/* glue fragments together */
+				fw_debug ( "pd %p: extending %p and merging with %p from %x@%x to %x@%x\n",
+ pd, fi2, fi, fi2->len, fi2->offset, fi2->len + fi->len + len, fi2->offset );
+				fi2->len += fi->len + len;
+				list_del(&fi->fragment_info);
+				kfree(fi);
+				return fi2;
+			}
+			fw_debug ( "pd %p: extending %p from %x@%x to %x@%x\n", pd, fi, fi->len, fi->offset, offset, fi->len + len );
+			fi->offset = offset;
+			fi->len += len;
+			return fi;
+		}
+		if (offset > fi->offset + fi->len) {
+			list = &fi->fragment_info;
+			break;
+		}
+		if (offset + len < fi->offset) {
+			list = fi->fragment_info.prev;
+			break;
+		}
+	}
+
+	new = kmalloc(sizeof(*new), GFP_ATOMIC);
+	if (!new) {
+		fw_error ( "out of memory in fragment handling!\n" );
+		return NULL;
+	}
+
+	new->offset = offset;
+	new->len = len;
+	list_add(&new->fragment_info, list);
+	fw_debug ( "pd %p: new frag %p %x@%x\n", pd, new, new->len, new->offset );
+	list_for_each_entry( fi, &pd->fragment_info, fragment_info )
+		fw_debug ( "fi %p %x@%x\n", fi, fi->len, fi->offset );
+	return new;
+}
+
+/* ------------------------------------------------------------------ */
+
+static struct ipv4_partial_datagram *ipv4_pd_new(struct net_device *netdev,
+ struct ipv4_node *node, u16 datagram_label, unsigned dg_size, u32 *frag_buf,
+ unsigned frag_off, unsigned frag_len) {
+	struct ipv4_partial_datagram *new;
+	struct ipv4_fragment_info *fi;
+
+	new = kmalloc(sizeof(*new), GFP_ATOMIC);
+	if (!new)
+		goto fail;
+	INIT_LIST_HEAD(&new->fragment_info);
+	fi = ipv4_frag_new ( new, frag_off, frag_len);
+	if ( fi == NULL )
+		goto fail_w_new;
+	new->datagram_label = datagram_label;
+	new->datagram_size = dg_size;
+	new->skb = dev_alloc_skb(dg_size + netdev->hard_header_len + 15);
+	if ( new->skb == NULL )
+		goto fail_w_fi;
+	skb_reserve(new->skb, (netdev->hard_header_len + 15) & ~15);
+	new->pbuf = skb_put(new->skb, dg_size);
+	memcpy(new->pbuf + frag_off, frag_buf, frag_len);
+	list_add_tail(&new->pdg_list, &node->pdg_list);
+	fw_debug ( "pd_new: new pd %p { dgl %u, dg_size %u, skb %p, pbuf %p } on node %p\n",
+ new, new->datagram_label, new->datagram_size, new->skb, new->pbuf, node );
+	return new;
+
+fail_w_fi:
+	kfree(fi);
+fail_w_new:
+	kfree(new);
+fail:
+	fw_error("ipv4_pd_new: no memory\n");
+	return NULL;
+}
+
+static struct ipv4_partial_datagram *ipv4_pd_find(struct ipv4_node *node, u16 datagram_label) {
+	struct ipv4_partial_datagram *pd;
+
+	list_for_each_entry(pd, &node->pdg_list, pdg_list) {
+	        if ( pd->datagram_label == datagram_label ) {
+			fw_debug ( "pd_find(node %p, label %u): pd %p\n", node, datagram_label, pd );
+			return pd;
+		}
+	}
+	fw_debug ( "pd_find(node %p, label %u) no entry\n", node, datagram_label );
+	return NULL;
+}
+
+
+static void ipv4_pd_delete ( struct ipv4_partial_datagram *old ) {
+	struct ipv4_fragment_info *fi, *n;
+
+	fw_debug ( "pd_delete %p\n", old );
+	list_for_each_entry_safe(fi, n, &old->fragment_info, fragment_info) {
+		fw_debug ( "Freeing fi %p\n", fi );
+		kfree(fi);
+	}
+	list_del(&old->pdg_list);
+	dev_kfree_skb_any(old->skb);
+	kfree(old);
+}
+
+static bool ipv4_pd_update ( struct ipv4_node *node, struct ipv4_partial_datagram *pd,
+ u32 *frag_buf, unsigned frag_off, unsigned frag_len) {
+	fw_debug ( "pd_update node %p, pd %p, frag_buf %p, %x@%x\n", node, pd, frag_buf, frag_len, frag_off );
+	if ( ipv4_frag_new ( pd, frag_off, frag_len ) == NULL)
+		return false;
+	memcpy(pd->pbuf + frag_off, frag_buf, frag_len);
+
+	/*
+	 * Move list entry to beginnig of list so that oldest partial
+	 * datagrams percolate to the end of the list
+	 */
+	list_move_tail(&pd->pdg_list, &node->pdg_list);
+	fw_debug ( "New pd list:\n" );
+	list_for_each_entry ( pd, &node->pdg_list, pdg_list ) {
+		fw_debug ( "pd %p\n", pd );
+	}
+	return true;
+}
+
+static bool ipv4_pd_is_complete ( struct ipv4_partial_datagram *pd ) {
+	struct ipv4_fragment_info *fi;
+	bool ret;
+
+	fi = list_entry(pd->fragment_info.next, struct ipv4_fragment_info, fragment_info);
+
+	ret = (fi->len == pd->datagram_size);
+	fw_debug ( "pd_is_complete (pd %p, dgs %x): fi %p (%x@%x) %s\n", pd, pd->datagram_size, fi, fi->len, fi->offset, ret ? "yes" : "no" );
+	return ret;
+}
+
+/* ------------------------------------------------------------------ */
+
+static int ipv4_node_new ( struct fw_card *card, struct fw_device *device ) {
+	struct ipv4_node *node;
+
+	node = kmalloc ( sizeof(*node), GFP_KERNEL );
+	if ( ! node ) {
+		fw_error ( "allocate new node failed\n" );
+		return -ENOMEM;
+	}
+	node->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
+	node->fifo = INVALID_FIFO_ADDR;
+	INIT_LIST_HEAD(&node->pdg_list);
+	spin_lock_init(&node->pdg_lock);
+	node->pdg_size = 0;
+	node->generation = device->generation;
+	rmb();
+	node->nodeid = device->node_id;
+	 /* FIXME what should it really be? */
+	node->max_payload = S100_BUFFER_SIZE - IPV4_UNFRAG_HDR_SIZE;
+	node->datagram_label = 0U;
+	node->xmt_speed = device->max_speed;
+	list_add_tail ( &node->ipv4_nodes, &card->ipv4_nodes );
+	fw_debug ( "node_new: %p { guid %016llx, generation %u, nodeid %x, max_payload %x, xmt_speed %x } added\n",
+ node, (unsigned long long)node->guid, node->generation, node->nodeid, node->max_payload, node->xmt_speed );
+	return 0;
+}
+
+static struct ipv4_node *ipv4_node_find_by_guid(struct ipv4_priv *priv, u64 guid) {
+	struct ipv4_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(node, &priv->card->ipv4_nodes, ipv4_nodes)
+		if (node->guid == guid) {
+			/* FIXME: lock the node first? */
+			spin_unlock_irqrestore ( &priv->lock, flags );
+			fw_debug ( "node_find_by_guid (%016llx) found %p\n", (unsigned long long)guid, node );
+			return node;
+		}
+
+	spin_unlock_irqrestore ( &priv->lock, flags );
+	fw_debug ( "node_find_by_guid (%016llx) not found\n", (unsigned long long)guid );
+	return NULL;
+}
+
+static struct ipv4_node *ipv4_node_find_by_nodeid(struct ipv4_priv *priv, u16 nodeid) {
+	struct ipv4_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_for_each_entry(node, &priv->card->ipv4_nodes, ipv4_nodes)
+		if (node->nodeid == nodeid) {
+			/* FIXME: lock the node first? */
+			spin_unlock_irqrestore ( &priv->lock, flags );
+			fw_debug ( "node_find_by_nodeid (%x) found %p\n", nodeid, node );
+			return node;
+		}
+	fw_debug ( "node_find_by_nodeid (%x) not found\n", nodeid );
+	spin_unlock_irqrestore ( &priv->lock, flags );
+	return NULL;
+}
+
+/* This is only complicated because we can't assume priv exists */
+static void ipv4_node_delete ( struct fw_card *card, struct fw_device *device ) {
+	struct net_device *netdev;
+	struct ipv4_priv *priv;
+	struct ipv4_node *node;
+	u64 guid;
+	unsigned long flags;
+	struct ipv4_partial_datagram *pd, *pd_next;
+
+	guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
+	netdev = card->netdev;
+	if ( netdev )
+		priv = netdev_priv ( netdev );
+	else
+		priv = NULL;
+	if ( priv )
+		spin_lock_irqsave ( &priv->lock, flags );
+	list_for_each_entry( node, &card->ipv4_nodes, ipv4_nodes ) {
+		if ( node->guid == guid ) {
+			list_del ( &node->ipv4_nodes );
+			list_for_each_entry_safe( pd, pd_next, &node->pdg_list, pdg_list )
+				ipv4_pd_delete ( pd );
+			break;
+		}
+	}
+	if ( priv )
+		spin_unlock_irqrestore ( &priv->lock, flags );
+}
+
+/* ------------------------------------------------------------------ */
+
+
+static int ipv4_finish_incoming_packet ( struct net_device *netdev,
+ struct sk_buff *skb, u16 source_node_id, bool is_broadcast, u16 ether_type ) {
+	struct ipv4_priv *priv;
+	static u64 broadcast_hw = ~0ULL;
+	int status;
+	u64 guid;
+
+	fw_debug ( "ipv4_finish_incoming_packet(%p, %p, %x, %s, %x\n",
+ netdev, skb, source_node_id, is_broadcast ? "true" : "false", ether_type );
+	priv = netdev_priv(netdev);
+	/* Write metadata, and then pass to the receive level */
+	skb->dev = netdev;
+	skb->ip_summed = CHECKSUM_UNNECESSARY;  /* don't check it */
+
+	/*
+	 * Parse the encapsulation header. This actually does the job of
+	 * converting to an ethernet frame header, as well as arp
+	 * conversion if needed. ARP conversion is easier in this
+	 * direction, since we are using ethernet as our backend.
+	 */
+	/*
+	 * If this is an ARP packet, convert it. First, we want to make
+	 * use of some of the fields, since they tell us a little bit
+	 * about the sending machine.
+	 */
+	if (ether_type == ETH_P_ARP) {
+		struct ipv4_arp *arp1394;
+		struct arphdr *arp;
+		unsigned char *arp_ptr;
+		u64 fifo_addr;
+		u8 max_rec;
+		u8 sspd;
+		u16 max_payload;
+		struct ipv4_node *node;
+		static const u16 ipv4_speed_to_max_payload[] = {
+			/* S100, S200, S400, S800, S1600, S3200 */
+			    512, 1024, 2048, 4096,  4096,  4096
+		};
+
+		/* fw_debug ( "ARP packet\n" ); */
+		arp1394 = (struct ipv4_arp *)skb->data;
+		arp = (struct arphdr *)skb->data;
+		arp_ptr = (unsigned char *)(arp + 1);
+		fifo_addr = (u64)ntohs(arp1394->fifo_hi) << 32 |
+ ntohl(arp1394->fifo_lo);
+		max_rec = priv->card->max_receive;
+		if ( arp1394->max_rec < max_rec )
+			max_rec = arp1394->max_rec;
+		sspd = arp1394->sspd;
+		/*
+		 * Sanity check. MacOSX seems to be sending us 131 in this
+		 * field (atleast on my Panther G5). Not sure why.
+		 */
+		if (sspd > 5 ) {
+			fw_notify ( "sspd %x out of range\n", sspd );
+			sspd = 0;
+		}
+
+		max_payload = min(ipv4_speed_to_max_payload[sspd],
+ (u16)(1 << (max_rec + 1))) - IPV4_UNFRAG_HDR_SIZE;
+
+		guid = be64_to_cpu(get_unaligned(&arp1394->s_uniq_id));
+		node = ipv4_node_find_by_guid(priv, guid);
+		if (!node) {
+			fw_notify ( "No node for ARP packet from %llx\n", guid );
+			goto failed_proto;
+		}
+		if ( node->nodeid != source_node_id || node->generation != priv->card->generation ) {
+			fw_notify ( "Internal error: node->nodeid (%x) != soucre_node_id (%x) or node->generation (%x) != priv->card->generation(%x)\n",
+ node->nodeid, source_node_id, node->generation, priv->card->generation );
+			node->nodeid = source_node_id;
+			node->generation = priv->card->generation;
+		}
+
+		/* FIXME: for debugging */
+		if ( sspd > SCODE_400 )
+			sspd = SCODE_400;
+		/* Update our speed/payload/fifo_offset table */
+		/*
+		 * FIXME: this does not handle cases where two high-speed endpoints must use a slower speed because of
+		 * a lower speed hub between them.  We need to look at the actual topology map here.
+		 */
+		fw_debug ( "Setting node %p fifo %llx (was %llx), max_payload %x (was %x), speed %x (was %x)\n",
+ node, fifo_addr, node->fifo, max_payload, node->max_payload, sspd, node->xmt_speed );
+		node->fifo =	fifo_addr;
+		node->max_payload = max_payload;
+		/*
+		 * Only allow speeds to go down from their initial value.
+		 * Otherwise a local node that can only do S400 or slower may
+		 * be told to transmit at S800 to a faster remote node.
+		 */
+		if ( node->xmt_speed > sspd )
+			node->xmt_speed = sspd;
+
+		/*
+		 * Now that we're done with the 1394 specific stuff, we'll
+		 * need to alter some of the data.  Believe it or not, all
+		 * that needs to be done is sender_IP_address needs to be
+		 * moved, the destination hardware address get stuffed
+		 * in and the hardware address length set to 8.
+		 *
+		 * IMPORTANT: The code below overwrites 1394 specific data
+		 * needed above so keep the munging of the data for the
+		 * higher level IP stack last.
+		 */
+
+		arp->ar_hln = 8;
+		arp_ptr += arp->ar_hln;		/* skip over sender unique id */
+		*(u32 *)arp_ptr = arp1394->sip; /* move sender IP addr */
+		arp_ptr += arp->ar_pln;		/* skip over sender IP addr */
+
+		if (arp->ar_op == htons(ARPOP_REQUEST))
+			memset(arp_ptr, 0, sizeof(u64));
+		else
+			memcpy(arp_ptr, netdev->dev_addr, sizeof(u64));
+	}
+
+	/* Now add the ethernet header. */
+	guid = cpu_to_be64(priv->card->guid);
+	if (dev_hard_header(skb, netdev, ether_type, is_broadcast ? &broadcast_hw : &guid, NULL,
+ skb->len) >= 0) {
+		struct ipv4_ether_hdr *eth;
+		u16 *rawp;
+		__be16 protocol;
+
+		skb_reset_mac_header(skb);
+		skb_pull(skb, sizeof(*eth));
+		eth = ipv4_ether_hdr(skb);
+		if (*eth->h_dest & 1) {
+			if (memcmp(eth->h_dest, netdev->broadcast, netdev->addr_len) == 0) {
+				fw_debug ( "Broadcast\n" );
+				skb->pkt_type = PACKET_BROADCAST;
+			}
+#if 0
+			else
+				skb->pkt_type = PACKET_MULTICAST;
+#endif
+		} else {
+			if (memcmp(eth->h_dest, netdev->dev_addr, netdev->addr_len)) {
+				u64 a1, a2;
+
+				memcpy ( &a1, eth->h_dest, sizeof(u64));
+				memcpy ( &a2, netdev->dev_addr, sizeof(u64));
+				fw_debug ( "Otherhost %llx %llx %x\n", a1, a2, netdev->addr_len );
+				skb->pkt_type = PACKET_OTHERHOST;
+			}
+		}
+		if (ntohs(eth->h_proto) >= 1536) {
+			fw_debug ( " proto %x %x\n", eth->h_proto, ntohs(eth->h_proto) );
+			protocol = eth->h_proto;
+		} else {
+			rawp = (u16 *)skb->data;
+			if (*rawp == 0xFFFF) {
+				fw_debug ( "proto 802_3\n" );
+				protocol = htons(ETH_P_802_3);
+			} else {
+				fw_debug ( "proto 802_2\n" );
+				protocol = htons(ETH_P_802_2);
+			}
+		}
+		skb->protocol = protocol;
+	}
+	status = netif_rx(skb);
+	if ( status == NET_RX_DROP) {
+		netdev->stats.rx_errors++;
+		netdev->stats.rx_dropped++;
+	} else {
+		netdev->stats.rx_packets++;
+		netdev->stats.rx_bytes += skb->len;
+	}
+	if (netif_queue_stopped(netdev))
+		netif_wake_queue(netdev);
+	return 0;
+
+ failed_proto:
+	netdev->stats.rx_errors++;
+	netdev->stats.rx_dropped++;
+	dev_kfree_skb_any(skb);
+	if (netif_queue_stopped(netdev))
+		netif_wake_queue(netdev);
+	netdev->last_rx = jiffies;
+	return 0;
+}
+
+/* ------------------------------------------------------------------ */
+
+static int ipv4_incoming_packet ( struct ipv4_priv *priv, u32 *buf, int len, u16 source_node_id, bool is_broadcast ) {
+	struct sk_buff *skb;
+	struct net_device *netdev;
+	struct ipv4_hdr hdr;
+	unsigned lf;
+	unsigned long flags;
+	struct ipv4_node *node;
+	struct ipv4_partial_datagram *pd;
+	int fg_off;
+	int dg_size;
+	u16 datagram_label;
+	int retval;
+	u16 ether_type;
+
+	fw_debug ( "ipv4_incoming_packet(%p, %p, %d, %x, %s)\n", priv, buf, len, source_node_id, is_broadcast ? "true" : "false" );
+	netdev = priv->card->netdev;
+
+	hdr.w0 = ntohl(buf[0]);
+	lf = ipv4_get_hdr_lf(&hdr);
+	if ( lf == IPV4_HDR_UNFRAG ) {
+		/*
+		 * An unfragmented datagram has been received by the ieee1394
+		 * bus. Build an skbuff around it so we can pass it to the
+		 * high level network layer.
+		 */
+		ether_type = ipv4_get_hdr_ether_type(&hdr);
+		fw_debug ( "header w0 = %x, lf = %x, ether_type = %x\n", hdr.w0, lf, ether_type );
+		buf++;
+		len -= IPV4_UNFRAG_HDR_SIZE;
+
+		skb = dev_alloc_skb(len + netdev->hard_header_len + 15);
+		if (unlikely(!skb)) {
+			fw_error ( "Out of memory for incoming packet\n");
+			netdev->stats.rx_dropped++;
+			return -1;
+		}
+		skb_reserve(skb, (netdev->hard_header_len + 15) & ~15);
+		memcpy(skb_put(skb, len), buf, len );
+		return ipv4_finish_incoming_packet(netdev, skb, source_node_id, is_broadcast, ether_type );
+	}
+	/* A datagram fragment has been received, now the fun begins. */
+	hdr.w1 = ntohl(buf[1]);
+	buf +=2;
+	len -= IPV4_FRAG_HDR_SIZE;
+	if ( lf ==IPV4_HDR_FIRSTFRAG ) {
+		ether_type = ipv4_get_hdr_ether_type(&hdr);
+		fg_off = 0;
+	} else {
+		fg_off = ipv4_get_hdr_fg_off(&hdr);
+		ether_type = 0; /* Shut up compiler! */
+	}
+	datagram_label = ipv4_get_hdr_dgl(&hdr);
+	dg_size = ipv4_get_hdr_dg_size(&hdr); /* ??? + 1 */
+	fw_debug ( "fragmented: %x.%x = lf %x, ether_type %x, fg_off %x, dgl %x, dg_size %x\n", hdr.w0, hdr.w1, lf, ether_type, fg_off, datagram_label, dg_size );
+	node = ipv4_node_find_by_nodeid ( priv, source_node_id);
+	spin_lock_irqsave(&node->pdg_lock, flags);
+	pd = ipv4_pd_find( node, datagram_label );
+	if (pd == NULL) {
+		while ( node->pdg_size >= ipv4_mpd ) {
+			/* remove the oldest */
+			ipv4_pd_delete ( list_first_entry(&node->pdg_list, struct ipv4_partial_datagram, pdg_list) );
+			node->pdg_size--;
+		}
+		pd = ipv4_pd_new ( netdev, node, datagram_label, dg_size,
+ buf, fg_off, len);
+		if ( pd == NULL) {
+			retval = -ENOMEM;
+			goto bad_proto;
+		}
+		node->pdg_size++;
+	} else {
+		if (ipv4_frag_overlap(pd, fg_off, len) || pd->datagram_size != dg_size) {
+			/*
+			 * Differing datagram sizes or overlapping fragments,
+			 * Either way the remote machine is playing silly buggers
+			 * with us: obliterate the old datagram and start a new one.
+			 */
+			ipv4_pd_delete ( pd );
+			pd = ipv4_pd_new ( netdev, node, datagram_label,
+ dg_size, buf, fg_off, len);
+			if ( pd == NULL ) {
+				retval = -ENOMEM;
+				node->pdg_size--;
+				goto bad_proto;
+			}
+		} else {
+			bool worked;
+
+			worked = ipv4_pd_update ( node, pd,
+ buf, fg_off, len );
+			if ( ! worked ) {
+				/*
+				 * Couldn't save off fragment anyway
+				 * so might as well obliterate the
+				 * datagram now.
+				 */
+				ipv4_pd_delete ( pd );
+				node->pdg_size--;
+				goto bad_proto;
+			}
+		}
+	} /* new datagram or add to existing one */
+
+	if ( lf == IPV4_HDR_FIRSTFRAG )
+		pd->ether_type = ether_type;
+	if ( ipv4_pd_is_complete ( pd ) ) {
+		ether_type = pd->ether_type;
+		node->pdg_size--;
+		skb = skb_get(pd->skb);
+		ipv4_pd_delete ( pd );
+		spin_unlock_irqrestore(&node->pdg_lock, flags);
+		return ipv4_finish_incoming_packet ( netdev, skb, source_node_id, false, ether_type );
+	}
+	/*
+	 * Datagram is not complete, we're done for the
+	 * moment.
+	 */
+	spin_unlock_irqrestore(&node->pdg_lock, flags);
+	return 0;
+
+ bad_proto:
+	spin_unlock_irqrestore(&node->pdg_lock, flags);
+	if (netif_queue_stopped(netdev))
+		netif_wake_queue(netdev);
+	return 0;
+}
+
+static void ipv4_receive_packet ( struct fw_card *card, struct fw_request *r,
+ int tcode, int destination, int source, int generation, int speed,
+ unsigned long long offset, void *payload, size_t length, void *callback_data ) {
+	struct ipv4_priv *priv;
+	int status;
+
+	fw_debug ( "ipv4_receive_packet(%p,%p,%x,%x,%x,%x,%x,%llx,%p,%lx,%p)\n",
+ card, r, tcode, destination, source, generation, speed, offset, payload,
+ (unsigned long)length, callback_data);
+	print_hex_dump ( KERN_DEBUG, "header: ", DUMP_PREFIX_OFFSET, 32, 1, payload, length, false );
+	priv = callback_data;
+	if (   tcode != TCODE_WRITE_BLOCK_REQUEST
+	    || destination != card->node_id
+	    || generation != card->generation
+	    || offset != priv->handler.offset ) {
+		fw_send_response(card, r, RCODE_CONFLICT_ERROR);
+		fw_debug("Conflict error card node_id=%x, card generation=%x, local offset %llx\n",
+ card->node_id, card->generation, (unsigned long long)priv->handler.offset );
+		return;
+	}
+	status = ipv4_incoming_packet ( priv, payload, length, source, false );
+	if ( status != 0 ) {
+		fw_error ( "Incoming packet failure\n" );
+		fw_send_response ( card, r, RCODE_CONFLICT_ERROR );
+		return;
+	}
+	fw_send_response ( card, r, RCODE_COMPLETE );
+}
+
+static void ipv4_receive_broadcast(struct fw_iso_context *context, u32 cycle,
+ size_t header_length, void *header, void *data) {
+	struct ipv4_priv *priv;
+	struct fw_iso_packet packet;
+	struct fw_card *card;
+	u16 *hdr_ptr;
+	u32 *buf_ptr;
+	int retval;
+	u32 length;
+	u16 source_node_id;
+	u32 specifier_id;
+	u32 ver;
+	unsigned long offset;
+	unsigned long flags;
+
+	fw_debug ( "ipv4_receive_broadcast ( context=%p, cycle=%x, header_length=%lx, header=%p, data=%p )\n", context, cycle, (unsigned long)header_length, header, data );
+	print_hex_dump ( KERN_DEBUG, "header: ", DUMP_PREFIX_OFFSET, 32, 1, header, header_length, false );
+	priv = data;
+	card = priv->card;
+	hdr_ptr = header;
+	length = ntohs(hdr_ptr[0]);
+	spin_lock_irqsave(&priv->lock,flags);
+	offset = priv->rcv_buffer_size * priv->broadcast_rcv_next_ptr;
+	buf_ptr = priv->broadcast_rcv_buffer_ptrs[priv->broadcast_rcv_next_ptr++];
+	if ( priv->broadcast_rcv_next_ptr == priv->num_broadcast_rcv_ptrs )
+		priv->broadcast_rcv_next_ptr = 0;
+	spin_unlock_irqrestore(&priv->lock,flags);
+	fw_debug ( "length %u at %p\n", length, buf_ptr );
+	print_hex_dump ( KERN_DEBUG, "buffer: ", DUMP_PREFIX_OFFSET, 32, 1, buf_ptr, length, false );
+
+	specifier_id =    (be32_to_cpu(buf_ptr[0]) & 0xffff) << 8
+			| (be32_to_cpu(buf_ptr[1]) & 0xff000000) >> 24;
+	ver = be32_to_cpu(buf_ptr[1]) & 0xFFFFFF;
+	source_node_id = be32_to_cpu(buf_ptr[0]) >> 16;
+	/* fw_debug ( "source %x SpecID %x ver %x\n", source_node_id, specifier_id, ver ); */
+	if ( specifier_id == IPV4_GASP_SPECIFIER_ID && ver == IPV4_GASP_VERSION ) {
+		buf_ptr += 2;
+		length -= IPV4_GASP_OVERHEAD;
+		ipv4_incoming_packet(priv, buf_ptr, length, source_node_id, true);
+	} else
+		fw_debug ( "Ignoring packet: not GASP\n" );
+	packet.payload_length = priv->rcv_buffer_size;
+	packet.interrupt = 1;
+	packet.skip = 0;
+	packet.tag = 3;
+	packet.sy = 0;
+	packet.header_length = IPV4_GASP_OVERHEAD;
+	spin_lock_irqsave(&priv->lock,flags);
+	retval = fw_iso_context_queue ( priv->broadcast_rcv_context, &packet,
+ &priv->broadcast_rcv_buffer, offset );
+	spin_unlock_irqrestore(&priv->lock,flags);
+	if ( retval < 0 )
+		fw_error ( "requeue failed\n" );
+}
+
+static void debug_ptask ( struct ipv4_packet_task *ptask ) {
+	static const char *tx_types[] = { "Unknown", "GASP", "Write" };
+
+	fw_debug ( "packet %p { hdr { w0 %x w1 %x }, skb %p, priv %p,"
+ " tx_type %s, outstanding_pkts %d, max_payload %x, fifo %llx,"
+ " speed %x, dest_node %x, generation %x }\n",
+ ptask, ptask->hdr.w0, ptask->hdr.w1, ptask->skb, ptask->priv,
+ ptask->tx_type > IPV4_WRREQ ? "Invalid" : tx_types[ptask->tx_type],
+ ptask->outstanding_pkts,  ptask->max_payload,
+ ptask->fifo_addr, ptask->speed, ptask->dest_node, ptask->generation );
+	print_hex_dump ( KERN_DEBUG, "packet :", DUMP_PREFIX_OFFSET, 32, 1,
+ ptask->skb->data, ptask->skb->len, false );
+}
+
+static void ipv4_transmit_packet_done ( struct ipv4_packet_task *ptask ) {
+	struct ipv4_priv *priv;
+	unsigned long flags;
+
+	priv = ptask->priv;
+	spin_lock_irqsave ( &priv->lock, flags );
+	list_del ( &ptask->packet_list );
+	spin_unlock_irqrestore ( &priv->lock, flags );
+	ptask->outstanding_pkts--;
+	if ( ptask->outstanding_pkts > 0 ) {
+		u16 dg_size;
+		u16 fg_off;
+		u16 datagram_label;
+		u16 lf;
+		struct sk_buff *skb;
+
+		/* Update the ptask to point to the next fragment and send it */
+		lf = ipv4_get_hdr_lf(&ptask->hdr);
+		switch (lf) {
+		case IPV4_HDR_LASTFRAG:
+		case IPV4_HDR_UNFRAG:
+		default:
+			fw_error ( "Outstanding packet %x lf %x, header %x,%x\n", ptask->outstanding_pkts, lf, ptask->hdr.w0, ptask->hdr.w1 );
+			BUG();
+
+		case IPV4_HDR_FIRSTFRAG:
+			/* Set frag type here for future interior fragments */
+			dg_size = ipv4_get_hdr_dg_size(&ptask->hdr);
+			fg_off = ptask->max_payload - IPV4_FRAG_HDR_SIZE;
+			datagram_label = ipv4_get_hdr_dgl(&ptask->hdr);
+			break;
+
+		case IPV4_HDR_INTFRAG:
+			dg_size = ipv4_get_hdr_dg_size(&ptask->hdr);
+			fg_off = ipv4_get_hdr_fg_off(&ptask->hdr) + ptask->max_payload - IPV4_FRAG_HDR_SIZE;
+			datagram_label = ipv4_get_hdr_dgl(&ptask->hdr);
+			break;
+		}
+		skb = ptask->skb;
+		skb_pull ( skb, ptask->max_payload );
+		if ( ptask->outstanding_pkts > 1 ) {
+			ipv4_make_sf_hdr ( &ptask->hdr,
+  IPV4_HDR_INTFRAG, dg_size, fg_off, datagram_label );
+		} else {
+			ipv4_make_sf_hdr ( &ptask->hdr,
+  IPV4_HDR_LASTFRAG, dg_size, fg_off, datagram_label );
+			ptask->max_payload = skb->len + IPV4_FRAG_HDR_SIZE;
+
+		}
+		ipv4_send_packet ( ptask );
+	} else {
+		dev_kfree_skb_any ( ptask->skb );
+		kmem_cache_free( ipv4_packet_task_cache, ptask );
+	}
+}
+
+static void ipv4_write_complete ( struct fw_card *card, int rcode,
+ void *payload, size_t length, void *data ) {
+	struct ipv4_packet_task *ptask;
+
+	ptask = data;
+	fw_debug ( "ipv4_write_complete ( %p, %x, %p, %lx, %p )\n",
+ card, rcode, payload, (unsigned long)length, data );
+	debug_ptask ( ptask );
+
+	if ( rcode == RCODE_COMPLETE ) {
+		ipv4_transmit_packet_done ( ptask );
+	} else {
+		fw_error ( "ipv4_write_complete: failed: %x\n", rcode );
+		/* ??? error recovery */
+	}
+}
+
+static int ipv4_send_packet ( struct ipv4_packet_task *ptask ) {
+	struct ipv4_priv *priv;
+	unsigned tx_len;
+	struct ipv4_hdr *bufhdr;
+	unsigned long flags;
+	struct net_device *netdev;
+#if 0 /* stefanr */
+	int retval;
+#endif
+
+	fw_debug ( "ipv4_send_packet\n" );
+	debug_ptask ( ptask );
+	priv = ptask->priv;
+	tx_len = ptask->max_payload;
+	switch (ipv4_get_hdr_lf(&ptask->hdr)) {
+	case IPV4_HDR_UNFRAG:
+		bufhdr = (struct ipv4_hdr *)skb_push(ptask->skb, IPV4_UNFRAG_HDR_SIZE);
+		bufhdr->w0 = htonl(ptask->hdr.w0);
+		break;
+
+	case IPV4_HDR_FIRSTFRAG:
+	case IPV4_HDR_INTFRAG:
+	case IPV4_HDR_LASTFRAG:
+		bufhdr = (struct ipv4_hdr *)skb_push(ptask->skb, IPV4_FRAG_HDR_SIZE);
+		bufhdr->w0 = htonl(ptask->hdr.w0);
+		bufhdr->w1 = htonl(ptask->hdr.w1);
+		break;
+
+	default:
+		BUG();
+	}
+	if ( ptask->tx_type == IPV4_GASP ) {
+		u32 *packets;
+		int generation;
+		int nodeid;
+
+		/* ptask->generation may not have been set yet */
+		generation = priv->card->generation;
+		smp_rmb();
+		nodeid = priv->card->node_id;
+		packets = (u32 *)skb_push(ptask->skb, sizeof(u32)*2);
+		packets[0] = htonl(nodeid << 16 | (IPV4_GASP_SPECIFIER_ID>>8));
+		packets[1] = htonl((IPV4_GASP_SPECIFIER_ID & 0xFF) << 24 | IPV4_GASP_VERSION);
+		fw_send_request ( priv->card, &ptask->transaction, TCODE_STREAM_DATA,
+ fw_stream_packet_destination_id(3, BROADCAST_CHANNEL, 0),
+ generation, SCODE_100, 0ULL, ptask->skb->data, tx_len + 8, ipv4_write_complete, ptask );
+		spin_lock_irqsave(&priv->lock,flags);
+		list_add_tail ( &ptask->packet_list, &priv->broadcasted_list );
+		spin_unlock_irqrestore(&priv->lock,flags);
+#if 0 /* stefanr */
+		return retval;
+#else
+		return 0;
+#endif
+	}
+	fw_debug("send_request (%p, %p, WRITE_BLOCK, %x, %x, %x, %llx, %p, %d, %p, %p\n",
+ priv->card, &ptask->transaction, ptask->dest_node, ptask->generation,
+ ptask->speed, (unsigned long long)ptask->fifo_addr, ptask->skb->data, tx_len,
+ ipv4_write_complete, ptask );
+	fw_send_request ( priv->card, &ptask->transaction,
+ TCODE_WRITE_BLOCK_REQUEST, ptask->dest_node, ptask->generation, ptask->speed,
+ ptask->fifo_addr, ptask->skb->data, tx_len, ipv4_write_complete, ptask );
+	spin_lock_irqsave(&priv->lock,flags);
+	list_add_tail ( &ptask->packet_list, &priv->sent_list );
+	spin_unlock_irqrestore(&priv->lock,flags);
+	netdev = priv->card->netdev;
+	netdev->trans_start = jiffies;
+	return 0;
+}
+
+static int ipv4_broadcast_start ( struct ipv4_priv *priv ) {
+	struct fw_iso_context *context;
+	int retval;
+	unsigned num_packets;
+	unsigned max_receive;
+	struct fw_iso_packet packet;
+	unsigned long offset;
+	unsigned u;
+	/* unsigned transmit_speed; */
+
+#if 0 /* stefanr */
+	if ( priv->card->broadcast_channel != (BROADCAST_CHANNEL_VALID|BROADCAST_CHANNEL_INITIAL)) {
+		fw_notify ( "Invalid broadcast channel %x\n", priv->card->broadcast_channel );
+		/* FIXME: try again later? */
+		/* return -EINVAL; */
+	}
+#endif
+	if ( priv->local_fifo == INVALID_FIFO_ADDR ) {
+		struct fw_address_region region;
+
+		priv->handler.length = FIFO_SIZE;
+		priv->handler.address_callback = ipv4_receive_packet;
+		priv->handler.callback_data = priv;
+		/* FIXME: this is OHCI, but what about others? */
+		region.start = 0xffff00000000ULL;
+		region.end =   0xfffffffffffcULL;
+
+		retval = fw_core_add_address_handler ( &priv->handler, &region );
+		if ( retval < 0 )
+			goto failed_initial;
+		priv->local_fifo = priv->handler.offset;
+	}
+
+	/*
+	 * FIXME: rawiso limits us to PAGE_SIZE.  This only matters if we ever have
+	 * a machine with PAGE_SIZE < 4096
+	 */
+	max_receive = 1U << (priv->card->max_receive + 1);
+	num_packets = ( ipv4_iso_page_count * PAGE_SIZE ) / max_receive;
+	if ( ! priv->broadcast_rcv_context ) {
+		void **ptrptr;
+
+		context = fw_iso_context_create ( priv->card,
+ FW_ISO_CONTEXT_RECEIVE, BROADCAST_CHANNEL,
+ priv->card->link_speed, 8, ipv4_receive_broadcast, priv );
+		if (IS_ERR(context)) {
+			retval = PTR_ERR(context);
+			goto failed_context_create;
+		}
+		retval = fw_iso_buffer_init ( &priv->broadcast_rcv_buffer,
+ priv->card, ipv4_iso_page_count, DMA_FROM_DEVICE );
+		if ( retval < 0 )
+			goto failed_buffer_init;
+		ptrptr = kmalloc ( sizeof(void*)*num_packets, GFP_KERNEL );
+		if ( ! ptrptr ) {
+			retval = -ENOMEM;
+			goto failed_ptrs_alloc;
+		}
+		priv->broadcast_rcv_buffer_ptrs = ptrptr;
+		for ( u = 0; u < ipv4_iso_page_count; u++ ) {
+			void *ptr;
+			unsigned v;
+
+			ptr = kmap ( priv->broadcast_rcv_buffer.pages[u] );
+			for ( v = 0; v < num_packets / ipv4_iso_page_count; v++ )
+				*ptrptr++ = (void *)((char *)ptr + v * max_receive);
+		}
+		priv->broadcast_rcv_context = context;
+	} else
+		context = priv->broadcast_rcv_context;
+
+	packet.payload_length = max_receive;
+	packet.interrupt = 1;
+	packet.skip = 0;
+	packet.tag = 3;
+	packet.sy = 0;
+	packet.header_length = IPV4_GASP_OVERHEAD;
+	offset = 0;
+	for ( u = 0; u < num_packets; u++ ) {
+		retval = fw_iso_context_queue ( context, &packet,
+ &priv->broadcast_rcv_buffer, offset );
+		if ( retval < 0 )
+			goto failed_rcv_queue;
+		offset += max_receive;
+	}
+	priv->num_broadcast_rcv_ptrs = num_packets;
+	priv->rcv_buffer_size = max_receive;
+	priv->broadcast_rcv_next_ptr = 0U;
+	retval = fw_iso_context_start ( context, -1, 0, FW_ISO_CONTEXT_MATCH_ALL_TAGS ); /* ??? sync */
+	if ( retval < 0 )
+		goto failed_rcv_queue;
+	/* FIXME: adjust this when we know the max receive speeds of all other IP nodes on the bus. */
+	/* since we only xmt at S100 ??? */
+	priv->broadcast_xmt_max_payload = S100_BUFFER_SIZE - IPV4_GASP_OVERHEAD - IPV4_UNFRAG_HDR_SIZE;
+	priv->broadcast_state = IPV4_BROADCAST_RUNNING;
+	return 0;
+
+ failed_rcv_queue:
+	kfree ( priv->broadcast_rcv_buffer_ptrs );
+	priv->broadcast_rcv_buffer_ptrs = NULL;
+ failed_ptrs_alloc:
+	fw_iso_buffer_destroy ( &priv->broadcast_rcv_buffer, priv->card );
+ failed_buffer_init:
+	fw_iso_context_destroy ( context );
+	priv->broadcast_rcv_context = NULL;
+ failed_context_create:
+	fw_core_remove_address_handler ( &priv->handler );
+ failed_initial:
+	priv->local_fifo = INVALID_FIFO_ADDR;
+	return retval;
+}
+
+/* This is called after an "ifup" */
+static int ipv4_open(struct net_device *dev) {
+	struct ipv4_priv *priv;
+	int ret;
+
+	priv = netdev_priv(dev);
+	if (priv->broadcast_state == IPV4_BROADCAST_ERROR) {
+		ret = ipv4_broadcast_start ( priv );
+		if (ret)
+			return ret;
+	}
+	netif_start_queue(dev);
+	return 0;
+}
+
+/* This is called after an "ifdown" */
+static int ipv4_stop(struct net_device *netdev)
+{
+	/* flush priv->wake */
+	/* flush_scheduled_work(); */
+
+	netif_stop_queue(netdev);
+	return 0;
+}
+
+/* Transmit a packet (called by kernel) */
+static int ipv4_tx(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ipv4_ether_hdr hdr_buf;
+	struct ipv4_priv *priv = netdev_priv(netdev);
+	__be16 proto;
+	u16 dest_node;
+	enum ipv4_tx_type tx_type;
+	unsigned max_payload;
+	u16 dg_size;
+	u16 *datagram_label_ptr;
+	struct ipv4_packet_task *ptask;
+	struct ipv4_node *node = NULL;
+
+	ptask = kmem_cache_alloc(ipv4_packet_task_cache, GFP_ATOMIC);
+	if (ptask == NULL)
+		goto fail;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto fail;
+
+	/*
+	 * Get rid of the fake ipv4 header, but first make a copy.
+	 * We might need to rebuild the header on tx failure.
+	 */
+	memcpy(&hdr_buf, skb->data, sizeof(hdr_buf));
+	skb_pull(skb, sizeof(hdr_buf));
+
+	proto = hdr_buf.h_proto;
+	dg_size = skb->len;
+
+	/*
+	 * Set the transmission type for the packet.  ARP packets and IP
+	 * broadcast packets are sent via GASP.
+	 */
+	if (   memcmp(hdr_buf.h_dest, netdev->broadcast, IPV4_ALEN) == 0
+	    || proto == htons(ETH_P_ARP)
+	    || (   proto == htons(ETH_P_IP)
+		&& IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)) ) ) {
+		/* fw_debug ( "transmitting arp or multicast packet\n" );*/
+		tx_type = IPV4_GASP;
+		dest_node = ALL_NODES;
+		max_payload = priv->broadcast_xmt_max_payload;
+		/* BUG_ON(max_payload < S100_BUFFER_SIZE - IPV4_GASP_OVERHEAD); */
+		datagram_label_ptr = &priv->broadcast_xmt_datagramlabel;
+		ptask->fifo_addr = INVALID_FIFO_ADDR;
+		ptask->generation = 0U;
+		ptask->dest_node = 0U;
+		ptask->speed = 0;
+	} else {
+		__be64 guid = get_unaligned((u64 *)hdr_buf.h_dest);
+		u8 generation;
+
+		node = ipv4_node_find_by_guid(priv, be64_to_cpu(guid));
+		if (!node) {
+			fw_debug ( "Normal packet but no node\n" );
+			goto fail;
+		}
+
+		if (node->fifo == INVALID_FIFO_ADDR) {
+			fw_debug ( "Normal packet but no fifo addr\n" );
+			goto fail;
+		}
+
+		/* fw_debug ( "Transmitting normal packet to %x at %llxx\n", node->nodeid, node->fifo ); */
+		generation = node->generation;
+		dest_node = node->nodeid;
+		max_payload = node->max_payload;
+		/* BUG_ON(max_payload < S100_BUFFER_SIZE - IPV4_FRAG_HDR_SIZE); */
+
+		datagram_label_ptr = &node->datagram_label;
+		tx_type = IPV4_WRREQ;
+		ptask->fifo_addr = node->fifo;
+		ptask->generation = generation;
+		ptask->dest_node = dest_node;
+		ptask->speed = node->xmt_speed;
+	}
+
+	/* If this is an ARP packet, convert it */
+	if (proto == htons(ETH_P_ARP)) {
+		/* Convert a standard ARP packet to 1394 ARP. The first 8 bytes (the entire
+		 * arphdr) is the same format as the ip1394 header, so they overlap.  The rest
+		 * needs to be munged a bit.  The remainder of the arphdr is formatted based
+		 * on hwaddr len and ipaddr len.  We know what they'll be, so it's easy to
+		 * judge.
+		 *
+		 * Now that the EUI is used for the hardware address all we need to do to make
+		 * this work for 1394 is to insert 2 quadlets that contain max_rec size,
+		 * speed, and unicast FIFO address information between the sender_unique_id
+		 * and the IP addresses.
+		 */
+		struct arphdr *arp = (struct arphdr *)skb->data;
+		unsigned char *arp_ptr = (unsigned char *)(arp + 1);
+		struct ipv4_arp *arp1394 = (struct ipv4_arp *)skb->data;
+		u32 ipaddr;
+
+		ipaddr = *(u32*)(arp_ptr + IPV4_ALEN);
+		arp1394->hw_addr_len    = 16;
+		arp1394->max_rec        = priv->card->max_receive;
+		arp1394->sspd		= priv->card->link_speed;
+		arp1394->fifo_hi	= htons(priv->local_fifo >> 32);
+		arp1394->fifo_lo        = htonl(priv->local_fifo & 0xFFFFFFFF);
+		arp1394->sip		= ipaddr;
+	}
+	if ( ipv4_max_xmt && max_payload > ipv4_max_xmt )
+		max_payload = ipv4_max_xmt;
+
+	ptask->hdr.w0 = 0;
+	ptask->hdr.w1 = 0;
+	ptask->skb = skb;
+	ptask->priv = priv;
+        ptask->tx_type = tx_type;
+	/* Does it all fit in one packet? */
+	if ( dg_size <= max_payload ) {
+		ipv4_make_uf_hdr(&ptask->hdr, be16_to_cpu(proto));
+		ptask->outstanding_pkts = 1;
+		max_payload = dg_size + IPV4_UNFRAG_HDR_SIZE;
+	} else {
+		u16 datagram_label;
+
+		max_payload -= IPV4_FRAG_OVERHEAD;
+		datagram_label = (*datagram_label_ptr)++;
+		ipv4_make_ff_hdr(&ptask->hdr, be16_to_cpu(proto), dg_size, datagram_label );
+		ptask->outstanding_pkts = DIV_ROUND_UP(dg_size, max_payload);
+		max_payload += IPV4_FRAG_HDR_SIZE;
+	}
+	ptask->max_payload = max_payload;
+	ipv4_send_packet ( ptask );
+	return NETDEV_TX_OK;
+
+ fail:
+	if (ptask)
+		kmem_cache_free(ipv4_packet_task_cache, ptask);
+
+	if (skb != NULL)
+		dev_kfree_skb(skb);
+
+	netdev->stats.tx_dropped++;
+	netdev->stats.tx_errors++;
+
+	/*
+	 * FIXME: According to a patch from 2003-02-26, "returning non-zero
+	 * causes serious problems" here, allegedly.  Before that patch,
+	 * -ERRNO was returned which is not appropriate under Linux 2.6.
+	 * Perhaps more needs to be done?  Stop the queue in serious
+	 * conditions and restart it elsewhere?
+	 */
+	return NETDEV_TX_OK;
+}
+
+/*
+ * FIXME: What to do if we timeout? I think a host reset is probably in order,
+ * so that's what we do. Should we increment the stat counters too?
+ */
+static void ipv4_tx_timeout(struct net_device *dev) {
+	struct ipv4_priv *priv;
+
+	priv = netdev_priv(dev);
+	fw_error ( "%s: Timeout, resetting host\n", dev->name );
+#if 0 /* stefanr */
+	fw_core_initiate_bus_reset ( priv->card, 1 );
+#endif
+}
+
+static int ipv4_change_mtu ( struct net_device *dev, int new_mtu ) {
+#if 0
+	int max_mtu;
+	struct ipv4_priv *priv;
+#endif
+
+	if (new_mtu < 68)
+		return -EINVAL;
+
+#if 0
+	priv = netdev_priv(dev);
+	/* This is not actually true because we can fragment packets at the firewire layer */
+	max_mtu = (1 << (priv->card->max_receive + 1))
+		                - sizeof(struct ipv4_hdr) - IPV4_GASP_OVERHEAD;
+	if (new_mtu > max_mtu) {
+		fw_notify ( "%s: Local node constrains MTU to %d\n", dev->name, max_mtu);
+		return -ERANGE;
+	}
+#endif
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static void ipv4_get_drvinfo(struct net_device *dev,
+struct ethtool_drvinfo *info) {
+	strcpy(info->driver, ipv4_driver_name);
+	strcpy(info->bus_info, "ieee1394"); /* FIXME provide more detail? */
+}
+
+static struct ethtool_ops ipv4_ethtool_ops = {
+	.get_drvinfo = ipv4_get_drvinfo,
+};
+
+static const struct net_device_ops ipv4_netdev_ops = {
+	.ndo_open       = ipv4_open,
+	.ndo_stop	= ipv4_stop,
+	.ndo_start_xmit = ipv4_tx,
+	.ndo_tx_timeout = ipv4_tx_timeout,
+	.ndo_change_mtu = ipv4_change_mtu,
+};
+
+static void ipv4_init_dev ( struct net_device *dev ) {
+	dev->header_ops		= &ipv4_header_ops;
+	dev->netdev_ops         = &ipv4_netdev_ops;
+	SET_ETHTOOL_OPS(dev, &ipv4_ethtool_ops);
+
+	dev->watchdog_timeo	= IPV4_TIMEOUT;
+	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	dev->features		= NETIF_F_HIGHDMA;
+	dev->addr_len		= IPV4_ALEN;
+	dev->hard_header_len	= IPV4_HLEN;
+	dev->type		= ARPHRD_IEEE1394;
+
+	/* FIXME: This value was copied from ether_setup(). Is it too much? */
+	dev->tx_queue_len	= 1000;
+}
+
+static int ipv4_probe ( struct device *dev ) {
+	struct fw_unit * unit;
+	struct fw_device *device;
+	struct fw_card *card;
+	struct net_device *netdev;
+	struct ipv4_priv *priv;
+	unsigned max_mtu;
+	__be64 guid;
+
+	fw_debug("ipv4 Probing\n" );
+	unit = fw_unit ( dev );
+	device = fw_device ( unit->device.parent );
+	card = device->card;
+
+	if ( ! device->is_local ) {
+		int added;
+
+		fw_debug ( "Non-local, adding remote node entry\n" );
+		added = ipv4_node_new ( card, device );
+		return added;
+	}
+	fw_debug("ipv4 Local: adding netdev\n" );
+	netdev = alloc_netdev ( sizeof(*priv), "fw-ipv4-%d", ipv4_init_dev );
+	if ( netdev == NULL) {
+		fw_error( "Out of memory\n");
+		goto out;
+	}
+
+	SET_NETDEV_DEV(netdev, card->device);
+	priv = netdev_priv(netdev);
+
+	spin_lock_init(&priv->lock);
+	priv->broadcast_state = IPV4_BROADCAST_ERROR;
+	priv->broadcast_rcv_context = NULL;
+	priv->broadcast_xmt_max_payload = 0;
+	priv->broadcast_xmt_datagramlabel = 0;
+
+	priv->local_fifo = INVALID_FIFO_ADDR;
+
+	/* INIT_WORK(&priv->wake, ipv4_handle_queue);*/
+	INIT_LIST_HEAD(&priv->packet_list);
+	INIT_LIST_HEAD(&priv->broadcasted_list);
+	INIT_LIST_HEAD(&priv->sent_list );
+
+	priv->card = card;
+
+	/*
+	 * Use the RFC 2734 default 1500 octets or the maximum payload
+	 * as initial MTU
+	 */
+	max_mtu = (1 << (card->max_receive + 1))
+		  - sizeof(struct ipv4_hdr) - IPV4_GASP_OVERHEAD;
+	netdev->mtu = min(1500U, max_mtu);
+
+	/* Set our hardware address while we're at it */
+	guid = cpu_to_be64(card->guid);
+	memcpy(netdev->dev_addr, &guid, sizeof(u64));
+	memset(netdev->broadcast, 0xff, sizeof(u64));
+	if ( register_netdev ( netdev ) ) {
+		fw_error ( "Cannot register the driver\n");
+		goto out;
+	}
+
+	fw_notify ( "%s: IPv4 over Firewire on device %016llx\n",
+ netdev->name, card->guid );
+	card->netdev = netdev;
+
+	return 0 /* ipv4_new_node ( ud ) */;
+ out:
+	if ( netdev )
+		free_netdev ( netdev );
+	return -ENOENT;
+}
+
+
+static int ipv4_remove ( struct device *dev ) {
+	struct fw_unit * unit;
+	struct fw_device *device;
+	struct fw_card *card;
+	struct net_device *netdev;
+	struct ipv4_priv *priv;
+	struct ipv4_node *node;
+	struct ipv4_partial_datagram *pd, *pd_next;
+	struct ipv4_packet_task *ptask, *pt_next;
+
+	fw_debug("ipv4 Removing\n" );
+	unit = fw_unit ( dev );
+	device = fw_device ( unit->device.parent );
+	card = device->card;
+
+	if ( ! device->is_local ) {
+		fw_debug ( "Node %x is non-local, removing remote node entry\n", device->node_id );
+		ipv4_node_delete ( card, device );
+		return 0;
+	}
+	netdev = card->netdev;
+	if ( netdev ) {
+		fw_debug ( "Node %x is local: deleting netdev\n", device->node_id );
+		priv = netdev_priv ( netdev );
+		unregister_netdev ( netdev );
+		fw_debug ( "unregistered\n" );
+		if ( priv->local_fifo != INVALID_FIFO_ADDR )
+			fw_core_remove_address_handler ( &priv->handler );
+		fw_debug ( "address handler gone\n" );
+		if ( priv->broadcast_rcv_context ) {
+			fw_iso_context_stop ( priv->broadcast_rcv_context );
+			fw_iso_buffer_destroy ( &priv->broadcast_rcv_buffer, priv->card );
+			fw_iso_context_destroy ( priv->broadcast_rcv_context );
+			fw_debug ( "rcv stopped\n" );
+		}
+		list_for_each_entry_safe( ptask, pt_next, &priv->packet_list, packet_list ) {
+			dev_kfree_skb_any ( ptask->skb );
+			kmem_cache_free( ipv4_packet_task_cache, ptask );
+		}
+		list_for_each_entry_safe( ptask, pt_next, &priv->broadcasted_list, packet_list ) {
+			dev_kfree_skb_any ( ptask->skb );
+			kmem_cache_free( ipv4_packet_task_cache, ptask );
+		}
+		list_for_each_entry_safe( ptask, pt_next, &priv->sent_list, packet_list ) {
+			dev_kfree_skb_any ( ptask->skb );
+			kmem_cache_free( ipv4_packet_task_cache, ptask );
+		}
+		fw_debug ( "lists emptied\n" );
+		list_for_each_entry( node, &card->ipv4_nodes, ipv4_nodes ) {
+			if ( node->pdg_size ) {
+				list_for_each_entry_safe( pd, pd_next, &node->pdg_list, pdg_list )
+					ipv4_pd_delete ( pd );
+				node->pdg_size = 0;
+			}
+			node->fifo = INVALID_FIFO_ADDR;
+		}
+		fw_debug ( "nodes cleaned up\n" );
+		free_netdev ( netdev );
+		card->netdev = NULL;
+		fw_debug ( "done\n" );
+	}
+	return 0;
+}
+
+static void ipv4_update ( struct fw_unit *unit ) {
+	struct fw_device *device;
+	struct fw_card *card;
+
+	fw_debug ( "ipv4_update unit %p\n", unit );
+	device = fw_device ( unit->device.parent );
+	card = device->card;
+	if ( ! device->is_local ) {
+		struct ipv4_node *node;
+		u64 guid;
+		struct net_device *netdev;
+		struct ipv4_priv *priv;
+
+		netdev = card->netdev;
+		if ( netdev ) {
+			priv = netdev_priv ( netdev );
+			guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
+			node = ipv4_node_find_by_guid ( priv, guid );
+			if ( ! node ) {
+				fw_error ( "ipv4_update: no node for device %llx\n", guid );
+				return;
+			}
+			fw_debug ( "Non-local, updating remote node entry for guid %llx old generation %x, old nodeid %x\n", guid, node->generation, node->nodeid );
+			node->generation = device->generation;
+			rmb();
+			node->nodeid = device->node_id;
+			fw_debug ( "New generation %x, new nodeid %x\n", node->generation, node->nodeid );
+		} else
+			fw_error ( "nonlocal, but no netdev?  How can that be?\n" );
+	} else {
+		/* FIXME: What do we need to do on bus reset? */
+		fw_debug ( "Local, doing nothing\n" );
+	}
+}
+
+static struct fw_driver ipv4_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = ipv4_driver_name,
+		.bus = &fw_bus_type,
+		.probe = ipv4_probe,
+		.remove = ipv4_remove,
+	},
+	.update = ipv4_update,
+	.id_table = ipv4_id_table,
+};
+
+static int __init ipv4_init ( void ) {
+	int added;
+
+	added = fw_core_add_descriptor ( &ipv4_unit_directory );
+	if ( added < 0 )
+		fw_error ( "Failed to add descriptor" );
+	ipv4_packet_task_cache = kmem_cache_create("packet_task",
+ sizeof(struct ipv4_packet_task), 0, 0, NULL);
+	fw_debug("Adding ipv4 module\n" );
+	return driver_register ( &ipv4_driver.driver );
+}
+
+static void __exit ipv4_cleanup ( void ) {
+	fw_core_remove_descriptor ( &ipv4_unit_directory );
+	fw_debug("Removing ipv4 module\n" );
+	driver_unregister ( &ipv4_driver.driver );
+}
+
+module_init(ipv4_init);
+module_exit(ipv4_cleanup);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index e584b7215e8..d44f47d3b2d 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -3,6 +3,7 @@
 
 #include <linux/completion.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -130,6 +131,13 @@ struct fw_card {
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
 	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
+	/* Only non-NULL if firewire-ipv4 is active on this card. */
+	void *netdev;
+	/*
+	 * The nodes get probed before the card, so we need a place to store
+	 * them independent of card->netdev
+	 */
+	struct list_head ipv4_nodes;
 };
 
 static inline struct fw_card *fw_card_get(struct fw_card *card)
@@ -355,4 +363,90 @@ int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
 		       int generation, int speed, unsigned long long offset,
 		       void *payload, size_t length);
 
+static inline int fw_stream_packet_destination_id(int tag, int channel, int sy)
+{
+	return tag << 14 | channel << 8 | sy;
+}
+
+struct fw_descriptor {
+	struct list_head link;
+	size_t length;
+	u32 immediate;
+	u32 key;
+	const u32 *data;
+};
+
+int fw_core_add_descriptor(struct fw_descriptor *desc);
+void fw_core_remove_descriptor(struct fw_descriptor *desc);
+
+/*
+ * The iso packet format allows for an immediate header/payload part
+ * stored in 'header' immediately after the packet info plus an
+ * indirect payload part that is pointer to by the 'payload' field.
+ * Applications can use one or the other or both to implement simple
+ * low-bandwidth streaming (e.g. audio) or more advanced
+ * scatter-gather streaming (e.g. assembling video frame automatically).
+ */
+struct fw_iso_packet {
+	u16 payload_length;	/* Length of indirect payload. */
+	u32 interrupt:1;	/* Generate interrupt on this packet */
+	u32 skip:1;		/* Set to not send packet at all. */
+	u32 tag:2;
+	u32 sy:4;
+	u32 header_length:8;	/* Length of immediate header. */
+	u32 header[0];
+};
+
+#define FW_ISO_CONTEXT_TRANSMIT	0
+#define FW_ISO_CONTEXT_RECEIVE	1
+
+#define FW_ISO_CONTEXT_MATCH_TAG0	 1
+#define FW_ISO_CONTEXT_MATCH_TAG1	 2
+#define FW_ISO_CONTEXT_MATCH_TAG2	 4
+#define FW_ISO_CONTEXT_MATCH_TAG3	 8
+#define FW_ISO_CONTEXT_MATCH_ALL_TAGS	15
+
+/*
+ * An iso buffer is just a set of pages mapped for DMA in the
+ * specified direction.  Since the pages are to be used for DMA, they
+ * are not mapped into the kernel virtual address space.  We store the
+ * DMA address in the page private. The helper function
+ * fw_iso_buffer_map() will map the pages into a given vma.
+ */
+struct fw_iso_buffer {
+	enum dma_data_direction direction;
+	struct page **pages;
+	int page_count;
+};
+
+int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
+		       int page_count, enum dma_data_direction direction);
+void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer, struct fw_card *card);
+
+struct fw_iso_context;
+typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
+				  u32 cycle, size_t header_length,
+				  void *header, void *data);
+struct fw_iso_context {
+	struct fw_card *card;
+	int type;
+	int channel;
+	int speed;
+	size_t header_size;
+	fw_iso_callback_t callback;
+	void *callback_data;
+};
+
+struct fw_iso_context *fw_iso_context_create(struct fw_card *card,
+		int type, int channel, int speed, size_t header_size,
+		fw_iso_callback_t callback, void *callback_data);
+int fw_iso_context_queue(struct fw_iso_context *ctx,
+			 struct fw_iso_packet *packet,
+			 struct fw_iso_buffer *buffer,
+			 unsigned long payload);
+int fw_iso_context_start(struct fw_iso_context *ctx,
+			 int cycle, int sync, int tags);
+int fw_iso_context_stop(struct fw_iso_context *ctx);
+void fw_iso_context_destroy(struct fw_iso_context *ctx);
+
 #endif /* _LINUX_FIREWIRE_H */
-- 
cgit v1.2.3-70-g09d2


From f91e3bd842ec6f5cea245993926ee8ff26250467 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 7 Jun 2009 22:57:53 +0200
Subject: firewire: net: style changes

Change names of types, variables, functions.
Omit debug code.
Use get_unaligned*, put_unaligned*.
Annotate big endian data.
Handle errors in __init.
Change whitespace.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c |    2 +-
 drivers/firewire/net.c       | 2041 ++++++++++++++++++++----------------------
 include/linux/firewire.h     |    9 +-
 3 files changed, 969 insertions(+), 1083 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index cdab32b2067..8c45e43da7c 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -430,7 +430,7 @@ void fw_card_initialize(struct fw_card *card,
 
 	INIT_DELAYED_WORK(&card->work, fw_card_bm_work);
 	card->netdev = NULL;
-	INIT_LIST_HEAD(&card->ipv4_nodes);
+	INIT_LIST_HEAD(&card->peer_list);
 }
 EXPORT_SYMBOL(fw_card_initialize);
 
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 15353886bd8..ba6f924b1b1 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -6,6 +6,7 @@
  * based on eth1394 by Ben Collins et al
  */
 
+#include <linux/bug.h>
 #include <linux/device.h>
 #include <linux/ethtool.h>
 #include <linux/firewire.h>
@@ -13,6 +14,7 @@
 #include <linux/highmem.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/jiffies.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -22,181 +24,109 @@
 #include <asm/unaligned.h>
 #include <net/arp.h>
 
-/* Things to potentially make runtime cofigurable */
-/* must be at least as large as our maximum receive size */
-#define FIFO_SIZE 4096
-/* Network timeout in glibbles */
-#define IPV4_TIMEOUT       100000
+#define FWNET_MAX_FRAGMENTS	25	/* arbitrary limit */
+#define FWNET_ISO_PAGE_COUNT	(PAGE_SIZE < 16 * 1024 ? 4 : 2)
 
-/* Runitme configurable paramaters */
-static int ipv4_mpd = 25;
-static int ipv4_max_xmt = 0;
-/* 16k for receiving arp and broadcast packets.  Enough? */
-static int ipv4_iso_page_count = 4;
+#define IEEE1394_BROADCAST_CHANNEL	31
+#define IEEE1394_ALL_NODES		(0xffc0 | 0x003f)
+#define IEEE1394_MAX_PAYLOAD_S100	512
+#define FWNET_NO_FIFO_ADDR		(~0ULL)
 
-MODULE_AUTHOR("Jay Fenlason (fenlason@redhat.com)");
-MODULE_DESCRIPTION("Firewire IPv4 Driver (IPv4-over-IEEE1394 as per RFC 2734)");
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(ieee1394, ipv4_id_table);
-module_param_named(max_partial_datagrams, ipv4_mpd, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(max_partial_datagrams, "Maximum number of received"
- " incomplete fragmented datagrams (default = 25).");
-
-/* Max xmt is useful for forcing fragmentation, which makes testing easier. */
-module_param_named(max_transmit, ipv4_max_xmt, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(max_transmit, "Maximum datagram size to transmit"
- " (larger datagrams will be fragmented) (default = 0 (use hardware defaults).");
-
-/* iso page count controls how many pages will be used for receiving broadcast packets. */
-module_param_named(iso_pages, ipv4_iso_page_count, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(iso_pages, "Number of pages to use for receiving broadcast packets"
- " (default = 4).");
-
-/* uncomment this line to do debugging */
-#define fw_debug(s, args...) printk(KERN_DEBUG KBUILD_MODNAME ": " s, ## args)
-
-/* comment out these lines to do debugging. */
-/* #undef fw_debug */
-/* #define fw_debug(s...) */
-/* #define print_hex_dump(l...) */
-
-/* Define a fake hardware header format for the networking core.  Note that
- * header size cannot exceed 16 bytes as that is the size of the header cache.
- * Also, we do not need the source address in the header so we omit it and
- * keep the header to under 16 bytes */
-#define IPV4_ALEN (8)
-/* This must equal sizeof(struct ipv4_ether_hdr) */
-#define IPV4_HLEN (10)
-
-/* FIXME: what's a good size for this? */
-#define INVALID_FIFO_ADDR (u64)~0ULL
-
-/* Things specified by standards */
-#define BROADCAST_CHANNEL 31
-
-#define S100_BUFFER_SIZE 512
-#define MAX_BUFFER_SIZE 4096
-
-#define IPV4_GASP_SPECIFIER_ID	0x00005EU
-#define IPV4_GASP_VERSION	0x00000001U
-
-#define IPV4_GASP_OVERHEAD (2 * sizeof(u32)) /* for GASP header */
-
-#define IPV4_UNFRAG_HDR_SIZE	sizeof(u32)
-#define IPV4_FRAG_HDR_SIZE	(2 * sizeof(u32))
-#define IPV4_FRAG_OVERHEAD	sizeof(u32)
-
-#define ALL_NODES (0xffc0 | 0x003f)
-
-#define IPV4_HDR_UNFRAG		0	/* unfragmented		*/
-#define IPV4_HDR_FIRSTFRAG	1	/* first fragment	*/
-#define IPV4_HDR_LASTFRAG	2	/* last fragment	*/
-#define IPV4_HDR_INTFRAG	3	/* interior fragment	*/
-
-/* Our arp packet (ARPHRD_IEEE1394) */
-/* FIXME: note that this is probably bogus on weird-endian machines */
-struct ipv4_arp {
-	u16 hw_type;		/* 0x0018	*/
-	u16 proto_type;		/* 0x0806       */
-	u8 hw_addr_len;		/* 16		*/
-	u8 ip_addr_len;         /* 4		*/
-	u16 opcode;	        /* ARP Opcode	*/
-	/* Above is exactly the same format as struct arphdr */
-
-	u64 s_uniq_id;		/* Sender's 64bit EUI			*/
-	u8 max_rec;             /* Sender's max packet size		*/
-	u8 sspd;		/* Sender's max speed			*/
-	u16 fifo_hi;            /* hi 16bits of sender's FIFO addr	*/
-	u32 fifo_lo;            /* lo 32bits of sender's FIFO addr	*/
-	u32 sip;		/* Sender's IP Address			*/
-	u32 tip;		/* IP Address of requested hw addr	*/
-} __attribute__((packed));
+#define IANA_SPECIFIER_ID		0x00005eU
+#define RFC2734_SW_VERSION		0x000001U
 
-struct ipv4_ether_hdr {
-	unsigned char	h_dest[IPV4_ALEN];	/* destination address */
-	unsigned short  h_proto;                /* packet type ID field */
-}  __attribute__((packed));
+#define IEEE1394_GASP_HDR_SIZE	8
 
-static inline struct ipv4_ether_hdr *ipv4_ether_hdr(const struct sk_buff *skb)
-{
-	return (struct ipv4_ether_hdr *)skb_mac_header(skb);
-}
+#define RFC2374_UNFRAG_HDR_SIZE	4
+#define RFC2374_FRAG_HDR_SIZE	8
+#define RFC2374_FRAG_OVERHEAD	4
 
-enum ipv4_tx_type {
-	IPV4_UNKNOWN = 0,
-	IPV4_GASP = 1,
-	IPV4_WRREQ = 2,
-};
+#define RFC2374_HDR_UNFRAG	0	/* unfragmented		*/
+#define RFC2374_HDR_FIRSTFRAG	1	/* first fragment	*/
+#define RFC2374_HDR_LASTFRAG	2	/* last fragment	*/
+#define RFC2374_HDR_INTFRAG	3	/* interior fragment	*/
 
-enum ipv4_broadcast_state {
-	IPV4_BROADCAST_ERROR,
-	IPV4_BROADCAST_RUNNING,
-	IPV4_BROADCAST_STOPPED,
-};
+#define RFC2734_HW_ADDR_LEN	16
 
-#define ipv4_get_hdr_lf(h)		(((h)->w0&0xC0000000)>>30)
-#define ipv4_get_hdr_ether_type(h)	(((h)->w0&0x0000FFFF)    )
-#define ipv4_get_hdr_dg_size(h)		(((h)->w0&0x0FFF0000)>>16)
-#define ipv4_get_hdr_fg_off(h)		(((h)->w0&0x00000FFF)    )
-#define ipv4_get_hdr_dgl(h)		(((h)->w1&0xFFFF0000)>>16)
+struct rfc2734_arp {
+	__be16 hw_type;		/* 0x0018	*/
+	__be16 proto_type;	/* 0x0806       */
+	u8 hw_addr_len;		/* 16		*/
+	u8 ip_addr_len;		/* 4		*/
+	__be16 opcode;		/* ARP Opcode	*/
+	/* Above is exactly the same format as struct arphdr */
 
-#define ipv4_set_hdr_lf(lf)		(( lf)<<30)
-#define ipv4_set_hdr_ether_type(et)	(( et)    )
-#define ipv4_set_hdr_dg_size(dgs)	((dgs)<<16)
-#define ipv4_set_hdr_fg_off(fgo)	((fgo)    )
+	__be64 s_uniq_id;	/* Sender's 64bit EUI			*/
+	u8 max_rec;		/* Sender's max packet size		*/
+	u8 sspd;		/* Sender's max speed			*/
+	__be16 fifo_hi;		/* hi 16bits of sender's FIFO addr	*/
+	__be32 fifo_lo;		/* lo 32bits of sender's FIFO addr	*/
+	__be32 sip;		/* Sender's IP Address			*/
+	__be32 tip;		/* IP Address of requested hw addr	*/
+} __attribute__((packed));
 
-#define ipv4_set_hdr_dgl(dgl)		((dgl)<<16)
+/* This header format is specific to this driver implementation. */
+#define FWNET_ALEN	8
+#define FWNET_HLEN	10
+struct fwnet_header {
+	u8 h_dest[FWNET_ALEN];	/* destination address */
+	__be16 h_proto;		/* packet type ID field */
+} __attribute__((packed));
 
-struct ipv4_hdr {
+/* IPv4 and IPv6 encapsulation header */
+struct rfc2734_header {
 	u32 w0;
 	u32 w1;
 };
 
-static inline void ipv4_make_uf_hdr( struct ipv4_hdr *hdr, unsigned ether_type) {
-	hdr->w0 = ipv4_set_hdr_lf(IPV4_HDR_UNFRAG)
-		   |ipv4_set_hdr_ether_type(ether_type);
-	fw_debug ( "Setting unfragmented header %p to %x\n", hdr, hdr->w0 );
-}
+#define fwnet_get_hdr_lf(h)		(((h)->w0 & 0xc0000000) >> 30)
+#define fwnet_get_hdr_ether_type(h)	(((h)->w0 & 0x0000ffff))
+#define fwnet_get_hdr_dg_size(h)	(((h)->w0 & 0x0fff0000) >> 16)
+#define fwnet_get_hdr_fg_off(h)		(((h)->w0 & 0x00000fff))
+#define fwnet_get_hdr_dgl(h)		(((h)->w1 & 0xffff0000) >> 16)
 
-static inline void ipv4_make_ff_hdr ( struct ipv4_hdr *hdr, unsigned ether_type, unsigned dg_size, unsigned dgl ) {
-	hdr->w0 = ipv4_set_hdr_lf(IPV4_HDR_FIRSTFRAG)
-		   |ipv4_set_hdr_dg_size(dg_size)
-		   |ipv4_set_hdr_ether_type(ether_type);
-	hdr->w1 = ipv4_set_hdr_dgl(dgl);
-	fw_debug ( "Setting fragmented header %p to first_frag %x,%x (et %x, dgs %x, dgl %x)\n", hdr, hdr->w0, hdr->w1,
- ether_type, dg_size, dgl );
-}
+#define fwnet_set_hdr_lf(lf)		((lf)  << 30)
+#define fwnet_set_hdr_ether_type(et)	(et)
+#define fwnet_set_hdr_dg_size(dgs)	((dgs) << 16)
+#define fwnet_set_hdr_fg_off(fgo)	(fgo)
 
-static inline void ipv4_make_sf_hdr ( struct ipv4_hdr *hdr, unsigned lf, unsigned dg_size, unsigned fg_off, unsigned dgl) {
-	hdr->w0 = ipv4_set_hdr_lf(lf)
-		 |ipv4_set_hdr_dg_size(dg_size)
-		 |ipv4_set_hdr_fg_off(fg_off);
-	hdr->w1 = ipv4_set_hdr_dgl(dgl);
-	fw_debug ( "Setting fragmented header %p to %x,%x (lf %x, dgs %x, fo %x dgl %x)\n",
- hdr, hdr->w0, hdr->w1,
- lf, dg_size, fg_off, dgl );
-}
+#define fwnet_set_hdr_dgl(dgl)		((dgl) << 16)
 
-/* End of IP1394 headers */
+static inline void fwnet_make_uf_hdr(struct rfc2734_header *hdr,
+		unsigned ether_type)
+{
+	hdr->w0 = fwnet_set_hdr_lf(RFC2374_HDR_UNFRAG)
+		  | fwnet_set_hdr_ether_type(ether_type);
+}
 
-/* Fragment types */
-#define ETH1394_HDR_LF_UF	0	/* unfragmented		*/
-#define ETH1394_HDR_LF_FF	1	/* first fragment	*/
-#define ETH1394_HDR_LF_LF	2	/* last fragment	*/
-#define ETH1394_HDR_LF_IF	3	/* interior fragment	*/
+static inline void fwnet_make_ff_hdr(struct rfc2734_header *hdr,
+		unsigned ether_type, unsigned dg_size, unsigned dgl)
+{
+	hdr->w0 = fwnet_set_hdr_lf(RFC2374_HDR_FIRSTFRAG)
+		  | fwnet_set_hdr_dg_size(dg_size)
+		  | fwnet_set_hdr_ether_type(ether_type);
+	hdr->w1 = fwnet_set_hdr_dgl(dgl);
+}
 
-#define IP1394_HW_ADDR_LEN	16	/* As per RFC		*/
+static inline void fwnet_make_sf_hdr(struct rfc2734_header *hdr,
+		unsigned lf, unsigned dg_size, unsigned fg_off, unsigned dgl)
+{
+	hdr->w0 = fwnet_set_hdr_lf(lf)
+		  | fwnet_set_hdr_dg_size(dg_size)
+		  | fwnet_set_hdr_fg_off(fg_off);
+	hdr->w1 = fwnet_set_hdr_dgl(dgl);
+}
 
 /* This list keeps track of what parts of the datagram have been filled in */
-struct ipv4_fragment_info {
-        struct list_head fragment_info;
+struct fwnet_fragment_info {
+	struct list_head fi_link;
 	u16 offset;
 	u16 len;
 };
 
-struct ipv4_partial_datagram {
-	struct list_head pdg_list;
-	struct list_head fragment_info;
+struct fwnet_partial_datagram {
+	struct list_head pd_link;
+	struct list_head fi_list;
 	struct sk_buff *skb;
 	/* FIXME Why not use skb->data? */
 	char *pbuf;
@@ -208,40 +138,43 @@ struct ipv4_partial_datagram {
 /*
  * We keep one of these for each IPv4 capable device attached to a fw_card.
  * The list of them is stored in the fw_card structure rather than in the
- * ipv4_priv because the remote IPv4 nodes may be probed before the card is,
- * so we need a place to store them before the ipv4_priv structure is
+ * fwnet_device because the remote IPv4 nodes may be probed before the card is,
+ * so we need a place to store them before the fwnet_device structure is
  * allocated.
  */
-struct ipv4_node {
-	struct list_head ipv4_nodes;
-	/* guid of the remote node */
+struct fwnet_peer {
+	struct list_head peer_link;
+	/* guid of the remote peer */
 	u64 guid;
-	/* FIFO address to transmit datagrams to, or INVALID_FIFO_ADDR */
+	/* FIFO address to transmit datagrams to, or FWNET_NO_FIFO_ADDR */
 	u64 fifo;
 
 	spinlock_t pdg_lock;	/* partial datagram lock		*/
-	/* List of partial datagrams received from this node */
-	struct list_head pdg_list;
-	/* Number of entries in pdg_list at the moment */
+	/* List of partial datagrams received from this peer */
+	struct list_head pd_list;
+	/* Number of entries in pd_list at the moment */
 	unsigned pdg_size;
 
-	/* max payload to transmit to this remote node */
-	/* This already includes the IPV4_FRAG_HDR_SIZE overhead */
+	/* max payload to transmit to this remote peer */
+	/* This already includes the RFC2374_FRAG_HDR_SIZE overhead */
 	u16 max_payload;
 	/* outgoing datagram label */
 	u16 datagram_label;
-	/* Current node_id of the remote node */
-	u16 nodeid;
-	/* current generation of the remote node */
+	/* Current node_id of the remote peer */
+	u16 node_id;
+	/* current generation of the remote peer */
 	u8 generation;
-	/* max speed that this node can receive at */
+	/* max speed that this peer can receive at */
 	u8 xmt_speed;
 };
 
-struct ipv4_priv {
+struct fwnet_device {
 	spinlock_t lock;
-
-	enum ipv4_broadcast_state broadcast_state;
+	enum {
+		FWNET_BROADCAST_ERROR,
+		FWNET_BROADCAST_RUNNING,
+		FWNET_BROADCAST_STOPPED,
+	} broadcast_state;
 	struct fw_iso_context *broadcast_rcv_context;
 	struct fw_iso_buffer broadcast_rcv_buffer;
 	void **broadcast_rcv_buffer_ptrs;
@@ -257,14 +190,12 @@ struct ipv4_priv {
 	u16 broadcast_xmt_datagramlabel;
 
 	/*
-	 * The csr address that remote nodes must send datagrams to for us to
+	 * The CSR address that remote nodes must send datagrams to for us to
 	 * receive them.
 	 */
 	struct fw_address_handler handler;
 	u64 local_fifo;
 
-	/* Wake up to xmt	 */
-        /* struct work_struct wake;*/
 	/* List of packets to be sent */
 	struct list_head packet_list;
 	/*
@@ -279,17 +210,17 @@ struct ipv4_priv {
 };
 
 /* This is our task struct. It's used for the packet complete callback.  */
-struct ipv4_packet_task {
+struct fwnet_packet_task {
 	/*
-	 * ptask can actually be on priv->packet_list, priv->broadcasted_list,
-	 * or priv->sent_list depending on its current state.
+	 * ptask can actually be on dev->packet_list, dev->broadcasted_list,
+	 * or dev->sent_list depending on its current state.
 	 */
-	struct list_head packet_list;
+	struct list_head pt_link;
 	struct fw_transaction transaction;
-	struct ipv4_hdr hdr;
+	struct rfc2734_header hdr;
 	struct sk_buff *skb;
-	struct ipv4_priv *priv;
-	enum ipv4_tx_type tx_type;
+	struct fwnet_device *dev;
+
 	int outstanding_pkts;
 	unsigned max_payload;
 	u64 fifo_addr;
@@ -298,243 +229,192 @@ struct ipv4_packet_task {
 	u8 speed;
 };
 
-static struct kmem_cache *ipv4_packet_task_cache;
-
-static const char ipv4_driver_name[] = "firewire-ipv4";
-
-static const struct ieee1394_device_id ipv4_id_table[] = {
-	{
-		.match_flags  = IEEE1394_MATCH_SPECIFIER_ID |
-				IEEE1394_MATCH_VERSION,
-		.specifier_id = IPV4_GASP_SPECIFIER_ID,
-		.version      = IPV4_GASP_VERSION,
-	},
-	{ }
-};
-
-static u32 ipv4_unit_directory_data[] = {
-	0x00040000,					/* unit directory */
-	0x12000000 | IPV4_GASP_SPECIFIER_ID,	/* specifier ID */
-	0x81000003,					/* text descriptor */
-	0x13000000 | IPV4_GASP_VERSION,		/* version */
-	0x81000005,					/* text descriptor */
-
-	0x00030000,					/* Three quadlets */
-	0x00000000,					/* Text */
-	0x00000000,					/* Language 0 */
-	0x49414e41,					/* I A N A */
-	0x00030000,					/* Three quadlets */
-	0x00000000,					/* Text */
-	0x00000000,					/* Language 0 */
-	0x49507634,					/* I P v 4 */
-};
-
-static struct fw_descriptor ipv4_unit_directory = {
-	.length = ARRAY_SIZE(ipv4_unit_directory_data),
-	.key = 0xd1000000,
-	.data = ipv4_unit_directory_data
-};
-
-static int ipv4_send_packet(struct ipv4_packet_task *ptask );
-
-/* ------------------------------------------------------------------ */
-/******************************************
- * HW Header net device functions
- ******************************************/
-  /* These functions have been adapted from net/ethernet/eth.c */
-
-/* Create a fake MAC header for an arbitrary protocol layer.
- * saddr=NULL means use device source address
- * daddr=NULL means leave destination address (eg unresolved arp). */
+/*
+ * saddr == NULL means use device source address.
+ * daddr == NULL means leave destination address (eg unresolved arp).
+ */
+static int fwnet_header_create(struct sk_buff *skb, struct net_device *net,
+			unsigned short type, const void *daddr,
+			const void *saddr, unsigned len)
+{
+	struct fwnet_header *h;
 
-static int ipv4_header ( struct sk_buff *skb, struct net_device *dev,
-		       unsigned short type, const void *daddr,
-		       const void *saddr, unsigned len) {
-	struct ipv4_ether_hdr *eth;
+	h = (struct fwnet_header *)skb_push(skb, sizeof(*h));
+	put_unaligned_be16(type, &h->h_proto);
 
-	eth = (struct ipv4_ether_hdr *)skb_push(skb, sizeof(*eth));
-	eth->h_proto = htons(type);
+	if (net->flags & (IFF_LOOPBACK | IFF_NOARP)) {
+		memset(h->h_dest, 0, net->addr_len);
 
-	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
-		memset(eth->h_dest, 0, dev->addr_len);
-		return dev->hard_header_len;
+		return net->hard_header_len;
 	}
 
 	if (daddr) {
-		memcpy(eth->h_dest, daddr, dev->addr_len);
-		return dev->hard_header_len;
+		memcpy(h->h_dest, daddr, net->addr_len);
+
+		return net->hard_header_len;
 	}
 
-	return -dev->hard_header_len;
+	return -net->hard_header_len;
 }
 
-/* Rebuild the faked MAC header. This is called after an ARP
- * (or in future other address resolution) has completed on this
- * sk_buff. We now let ARP fill in the other fields.
- *
- * This routine CANNOT use cached dst->neigh!
- * Really, it is used only when dst->neigh is wrong.
- */
-
-static int ipv4_rebuild_header(struct sk_buff *skb)
+static int fwnet_header_rebuild(struct sk_buff *skb)
 {
-	struct ipv4_ether_hdr *eth;
+	struct fwnet_header *h = (struct fwnet_header *)skb->data;
 
-	eth = (struct ipv4_ether_hdr *)skb->data;
-	if (eth->h_proto == htons(ETH_P_IP))
-		return arp_find((unsigned char *)&eth->h_dest, skb);
+	if (get_unaligned_be16(&h->h_proto) == ETH_P_IP)
+		return arp_find((unsigned char *)&h->h_dest, skb);
 
-	fw_notify ( "%s: unable to resolve type %04x addresses\n",
-		   skb->dev->name,ntohs(eth->h_proto) );
+	fw_notify("%s: unable to resolve type %04x addresses\n",
+		  skb->dev->name, be16_to_cpu(h->h_proto));
 	return 0;
 }
 
-static int ipv4_header_cache(const struct neighbour *neigh, struct hh_cache *hh) {
-	unsigned short type = hh->hh_type;
-	struct net_device *dev;
-	struct ipv4_ether_hdr *eth;
+static int fwnet_header_cache(const struct neighbour *neigh,
+			      struct hh_cache *hh)
+{
+	struct net_device *net;
+	struct fwnet_header *h;
 
-	if (type == htons(ETH_P_802_3))
+	if (hh->hh_type == cpu_to_be16(ETH_P_802_3))
 		return -1;
-	dev = neigh->dev;
-	eth = (struct ipv4_ether_hdr *)((u8 *)hh->hh_data + 16 - sizeof(*eth));
-	eth->h_proto = type;
-	memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+	net = neigh->dev;
+	h = (struct fwnet_header *)((u8 *)hh->hh_data + 16 - sizeof(*h));
+	h->h_proto = hh->hh_type;
+	memcpy(h->h_dest, neigh->ha, net->addr_len);
+	hh->hh_len = FWNET_HLEN;
 
-	hh->hh_len = IPV4_HLEN;
 	return 0;
 }
 
 /* Called by Address Resolution module to notify changes in address. */
-static void ipv4_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char * haddr ) {
-	memcpy((u8 *)hh->hh_data + 16 - IPV4_HLEN, haddr, dev->addr_len);
+static void fwnet_header_cache_update(struct hh_cache *hh,
+		const struct net_device *net, const unsigned char *haddr)
+{
+	memcpy((u8 *)hh->hh_data + 16 - FWNET_HLEN, haddr, net->addr_len);
 }
 
-static int ipv4_header_parse(const struct sk_buff *skb, unsigned char *haddr) {
-	memcpy(haddr, skb->dev->dev_addr, IPV4_ALEN);
-	return IPV4_ALEN;
+static int fwnet_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+	memcpy(haddr, skb->dev->dev_addr, FWNET_ALEN);
+
+	return FWNET_ALEN;
 }
 
-static const struct header_ops ipv4_header_ops = {
-	.create         = ipv4_header,
-	.rebuild        = ipv4_rebuild_header,
-	.cache		= ipv4_header_cache,
-	.cache_update	= ipv4_header_cache_update,
-	.parse          = ipv4_header_parse,
+static const struct header_ops fwnet_header_ops = {
+	.create         = fwnet_header_create,
+	.rebuild        = fwnet_header_rebuild,
+	.cache		= fwnet_header_cache,
+	.cache_update	= fwnet_header_cache_update,
+	.parse          = fwnet_header_parse,
 };
 
-/* ------------------------------------------------------------------ */
-
 /* FIXME: is this correct for all cases? */
-static bool ipv4_frag_overlap(struct ipv4_partial_datagram *pd, unsigned offset, unsigned len)
+static bool fwnet_frag_overlap(struct fwnet_partial_datagram *pd,
+			       unsigned offset, unsigned len)
 {
-        struct ipv4_fragment_info *fi;
+	struct fwnet_fragment_info *fi;
 	unsigned end = offset + len;
 
-	list_for_each_entry(fi, &pd->fragment_info, fragment_info) {
-		if (offset < fi->offset + fi->len && end > fi->offset) {
-			fw_debug ( "frag_overlap pd %p fi %p (%x@%x) with %x@%x\n", pd, fi, fi->len, fi->offset, len, offset );
+	list_for_each_entry(fi, &pd->fi_list, fi_link)
+		if (offset < fi->offset + fi->len && end > fi->offset)
 			return true;
-		}
-	}
-	fw_debug ( "frag_overlap %p does not overlap with %x@%x\n", pd, len, offset );
+
 	return false;
 }
 
 /* Assumes that new fragment does not overlap any existing fragments */
-static struct ipv4_fragment_info *ipv4_frag_new ( struct ipv4_partial_datagram *pd, unsigned offset, unsigned len ) {
-	struct ipv4_fragment_info *fi, *fi2, *new;
+static struct fwnet_fragment_info *fwnet_frag_new(
+	struct fwnet_partial_datagram *pd, unsigned offset, unsigned len)
+{
+	struct fwnet_fragment_info *fi, *fi2, *new;
 	struct list_head *list;
 
-	fw_debug ( "frag_new pd %p %x@%x\n", pd, len, offset );
-	list = &pd->fragment_info;
-	list_for_each_entry(fi, &pd->fragment_info, fragment_info) {
+	list = &pd->fi_list;
+	list_for_each_entry(fi, &pd->fi_list, fi_link) {
 		if (fi->offset + fi->len == offset) {
 			/* The new fragment can be tacked on to the end */
 			/* Did the new fragment plug a hole? */
-			fi2 = list_entry(fi->fragment_info.next, struct ipv4_fragment_info, fragment_info);
+			fi2 = list_entry(fi->fi_link.next,
+					 struct fwnet_fragment_info, fi_link);
 			if (fi->offset + fi->len == fi2->offset) {
-				fw_debug ( "pd %p: hole filling %p (%x@%x) and %p(%x@%x): now %x@%x\n", pd, fi, fi->len, fi->offset,
-				fi2, fi2->len, fi2->offset, fi->len + len + fi2->len, fi->offset );
 				/* glue fragments together */
 				fi->len += len + fi2->len;
-				list_del(&fi2->fragment_info);
+				list_del(&fi2->fi_link);
 				kfree(fi2);
 			} else {
-				fw_debug ( "pd %p: extending %p from %x@%x to %x@%x\n", pd, fi, fi->len, fi->offset, fi->len+len, fi->offset );
 				fi->len += len;
 			}
+
 			return fi;
 		}
 		if (offset + len == fi->offset) {
 			/* The new fragment can be tacked on to the beginning */
 			/* Did the new fragment plug a hole? */
-			fi2 = list_entry(fi->fragment_info.prev, struct ipv4_fragment_info, fragment_info);
+			fi2 = list_entry(fi->fi_link.prev,
+					 struct fwnet_fragment_info, fi_link);
 			if (fi2->offset + fi2->len == fi->offset) {
 				/* glue fragments together */
-				fw_debug ( "pd %p: extending %p and merging with %p from %x@%x to %x@%x\n",
- pd, fi2, fi, fi2->len, fi2->offset, fi2->len + fi->len + len, fi2->offset );
 				fi2->len += fi->len + len;
-				list_del(&fi->fragment_info);
+				list_del(&fi->fi_link);
 				kfree(fi);
+
 				return fi2;
 			}
-			fw_debug ( "pd %p: extending %p from %x@%x to %x@%x\n", pd, fi, fi->len, fi->offset, offset, fi->len + len );
 			fi->offset = offset;
 			fi->len += len;
+
 			return fi;
 		}
 		if (offset > fi->offset + fi->len) {
-			list = &fi->fragment_info;
+			list = &fi->fi_link;
 			break;
 		}
 		if (offset + len < fi->offset) {
-			list = fi->fragment_info.prev;
+			list = fi->fi_link.prev;
 			break;
 		}
 	}
 
 	new = kmalloc(sizeof(*new), GFP_ATOMIC);
 	if (!new) {
-		fw_error ( "out of memory in fragment handling!\n" );
+		fw_error("out of memory\n");
 		return NULL;
 	}
 
 	new->offset = offset;
 	new->len = len;
-	list_add(&new->fragment_info, list);
-	fw_debug ( "pd %p: new frag %p %x@%x\n", pd, new, new->len, new->offset );
-	list_for_each_entry( fi, &pd->fragment_info, fragment_info )
-		fw_debug ( "fi %p %x@%x\n", fi, fi->len, fi->offset );
+	list_add(&new->fi_link, list);
+
 	return new;
 }
 
-/* ------------------------------------------------------------------ */
-
-static struct ipv4_partial_datagram *ipv4_pd_new(struct net_device *netdev,
- struct ipv4_node *node, u16 datagram_label, unsigned dg_size, u32 *frag_buf,
- unsigned frag_off, unsigned frag_len) {
-	struct ipv4_partial_datagram *new;
-	struct ipv4_fragment_info *fi;
+static struct fwnet_partial_datagram *fwnet_pd_new(struct net_device *net,
+		struct fwnet_peer *peer, u16 datagram_label, unsigned dg_size,
+		void *frag_buf, unsigned frag_off, unsigned frag_len)
+{
+	struct fwnet_partial_datagram *new;
+	struct fwnet_fragment_info *fi;
 
 	new = kmalloc(sizeof(*new), GFP_ATOMIC);
 	if (!new)
 		goto fail;
-	INIT_LIST_HEAD(&new->fragment_info);
-	fi = ipv4_frag_new ( new, frag_off, frag_len);
-	if ( fi == NULL )
+
+	INIT_LIST_HEAD(&new->fi_list);
+	fi = fwnet_frag_new(new, frag_off, frag_len);
+	if (fi == NULL)
 		goto fail_w_new;
+
 	new->datagram_label = datagram_label;
 	new->datagram_size = dg_size;
-	new->skb = dev_alloc_skb(dg_size + netdev->hard_header_len + 15);
-	if ( new->skb == NULL )
+	new->skb = dev_alloc_skb(dg_size + net->hard_header_len + 15);
+	if (new->skb == NULL)
 		goto fail_w_fi;
-	skb_reserve(new->skb, (netdev->hard_header_len + 15) & ~15);
+
+	skb_reserve(new->skb, (net->hard_header_len + 15) & ~15);
 	new->pbuf = skb_put(new->skb, dg_size);
 	memcpy(new->pbuf + frag_off, frag_buf, frag_len);
-	list_add_tail(&new->pdg_list, &node->pdg_list);
-	fw_debug ( "pd_new: new pd %p { dgl %u, dg_size %u, skb %p, pbuf %p } on node %p\n",
- new, new->datagram_label, new->datagram_size, new->skb, new->pbuf, node );
+	list_add_tail(&new->pd_link, &peer->pd_list);
+
 	return new;
 
 fail_w_fi:
@@ -542,174 +422,171 @@ fail_w_fi:
 fail_w_new:
 	kfree(new);
 fail:
-	fw_error("ipv4_pd_new: no memory\n");
+	fw_error("out of memory\n");
+
 	return NULL;
 }
 
-static struct ipv4_partial_datagram *ipv4_pd_find(struct ipv4_node *node, u16 datagram_label) {
-	struct ipv4_partial_datagram *pd;
+static struct fwnet_partial_datagram *fwnet_pd_find(struct fwnet_peer *peer,
+						    u16 datagram_label)
+{
+	struct fwnet_partial_datagram *pd;
 
-	list_for_each_entry(pd, &node->pdg_list, pdg_list) {
-	        if ( pd->datagram_label == datagram_label ) {
-			fw_debug ( "pd_find(node %p, label %u): pd %p\n", node, datagram_label, pd );
+	list_for_each_entry(pd, &peer->pd_list, pd_link)
+		if (pd->datagram_label == datagram_label)
 			return pd;
-		}
-	}
-	fw_debug ( "pd_find(node %p, label %u) no entry\n", node, datagram_label );
+
 	return NULL;
 }
 
 
-static void ipv4_pd_delete ( struct ipv4_partial_datagram *old ) {
-	struct ipv4_fragment_info *fi, *n;
+static void fwnet_pd_delete(struct fwnet_partial_datagram *old)
+{
+	struct fwnet_fragment_info *fi, *n;
 
-	fw_debug ( "pd_delete %p\n", old );
-	list_for_each_entry_safe(fi, n, &old->fragment_info, fragment_info) {
-		fw_debug ( "Freeing fi %p\n", fi );
+	list_for_each_entry_safe(fi, n, &old->fi_list, fi_link)
 		kfree(fi);
-	}
-	list_del(&old->pdg_list);
+
+	list_del(&old->pd_link);
 	dev_kfree_skb_any(old->skb);
 	kfree(old);
 }
 
-static bool ipv4_pd_update ( struct ipv4_node *node, struct ipv4_partial_datagram *pd,
- u32 *frag_buf, unsigned frag_off, unsigned frag_len) {
-	fw_debug ( "pd_update node %p, pd %p, frag_buf %p, %x@%x\n", node, pd, frag_buf, frag_len, frag_off );
-	if ( ipv4_frag_new ( pd, frag_off, frag_len ) == NULL)
+static bool fwnet_pd_update(struct fwnet_peer *peer,
+		struct fwnet_partial_datagram *pd, void *frag_buf,
+		unsigned frag_off, unsigned frag_len)
+{
+	if (fwnet_frag_new(pd, frag_off, frag_len) == NULL)
 		return false;
+
 	memcpy(pd->pbuf + frag_off, frag_buf, frag_len);
 
 	/*
 	 * Move list entry to beginnig of list so that oldest partial
 	 * datagrams percolate to the end of the list
 	 */
-	list_move_tail(&pd->pdg_list, &node->pdg_list);
-	fw_debug ( "New pd list:\n" );
-	list_for_each_entry ( pd, &node->pdg_list, pdg_list ) {
-		fw_debug ( "pd %p\n", pd );
-	}
+	list_move_tail(&pd->pd_link, &peer->pd_list);
+
 	return true;
 }
 
-static bool ipv4_pd_is_complete ( struct ipv4_partial_datagram *pd ) {
-	struct ipv4_fragment_info *fi;
-	bool ret;
+static bool fwnet_pd_is_complete(struct fwnet_partial_datagram *pd)
+{
+	struct fwnet_fragment_info *fi;
 
-	fi = list_entry(pd->fragment_info.next, struct ipv4_fragment_info, fragment_info);
+	fi = list_entry(pd->fi_list.next, struct fwnet_fragment_info, fi_link);
 
-	ret = (fi->len == pd->datagram_size);
-	fw_debug ( "pd_is_complete (pd %p, dgs %x): fi %p (%x@%x) %s\n", pd, pd->datagram_size, fi, fi->len, fi->offset, ret ? "yes" : "no" );
-	return ret;
+	return fi->len == pd->datagram_size;
 }
 
-/* ------------------------------------------------------------------ */
+static int fwnet_peer_new(struct fw_card *card, struct fw_device *device)
+{
+	struct fwnet_peer *peer;
 
-static int ipv4_node_new ( struct fw_card *card, struct fw_device *device ) {
-	struct ipv4_node *node;
+	peer = kmalloc(sizeof(*peer), GFP_KERNEL);
+	if (!peer) {
+		fw_error("out of memory\n");
 
-	node = kmalloc ( sizeof(*node), GFP_KERNEL );
-	if ( ! node ) {
-		fw_error ( "allocate new node failed\n" );
 		return -ENOMEM;
 	}
-	node->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
-	node->fifo = INVALID_FIFO_ADDR;
-	INIT_LIST_HEAD(&node->pdg_list);
-	spin_lock_init(&node->pdg_lock);
-	node->pdg_size = 0;
-	node->generation = device->generation;
+	peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
+	peer->fifo = FWNET_NO_FIFO_ADDR;
+	INIT_LIST_HEAD(&peer->pd_list);
+	spin_lock_init(&peer->pdg_lock);
+	peer->pdg_size = 0;
+	peer->generation = device->generation;
 	rmb();
-	node->nodeid = device->node_id;
+	peer->node_id = device->node_id;
 	 /* FIXME what should it really be? */
-	node->max_payload = S100_BUFFER_SIZE - IPV4_UNFRAG_HDR_SIZE;
-	node->datagram_label = 0U;
-	node->xmt_speed = device->max_speed;
-	list_add_tail ( &node->ipv4_nodes, &card->ipv4_nodes );
-	fw_debug ( "node_new: %p { guid %016llx, generation %u, nodeid %x, max_payload %x, xmt_speed %x } added\n",
- node, (unsigned long long)node->guid, node->generation, node->nodeid, node->max_payload, node->xmt_speed );
+	peer->max_payload = IEEE1394_MAX_PAYLOAD_S100 - RFC2374_UNFRAG_HDR_SIZE;
+	peer->datagram_label = 0U;
+	peer->xmt_speed = device->max_speed;
+	list_add_tail(&peer->peer_link, &card->peer_list);
+
 	return 0;
 }
 
-static struct ipv4_node *ipv4_node_find_by_guid(struct ipv4_priv *priv, u64 guid) {
-	struct ipv4_node *node;
+/* FIXME caller must take the lock, or peer needs to be reference-counted */
+static struct fwnet_peer *fwnet_peer_find_by_guid(struct fwnet_device *dev,
+						  u64 guid)
+{
+	struct fwnet_peer *p, *peer = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	list_for_each_entry(node, &priv->card->ipv4_nodes, ipv4_nodes)
-		if (node->guid == guid) {
-			/* FIXME: lock the node first? */
-			spin_unlock_irqrestore ( &priv->lock, flags );
-			fw_debug ( "node_find_by_guid (%016llx) found %p\n", (unsigned long long)guid, node );
-			return node;
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry(p, &dev->card->peer_list, peer_link)
+		if (p->guid == guid) {
+			peer = p;
+			break;
 		}
+	spin_unlock_irqrestore(&dev->lock, flags);
 
-	spin_unlock_irqrestore ( &priv->lock, flags );
-	fw_debug ( "node_find_by_guid (%016llx) not found\n", (unsigned long long)guid );
-	return NULL;
+	return peer;
 }
 
-static struct ipv4_node *ipv4_node_find_by_nodeid(struct ipv4_priv *priv, u16 nodeid) {
-	struct ipv4_node *node;
+/* FIXME caller must take the lock, or peer needs to be reference-counted */
+/* FIXME node_id doesn't mean anything without generation */
+static struct fwnet_peer *fwnet_peer_find_by_node_id(struct fwnet_device *dev,
+						     u16 node_id)
+{
+	struct fwnet_peer *p, *peer = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	list_for_each_entry(node, &priv->card->ipv4_nodes, ipv4_nodes)
-		if (node->nodeid == nodeid) {
-			/* FIXME: lock the node first? */
-			spin_unlock_irqrestore ( &priv->lock, flags );
-			fw_debug ( "node_find_by_nodeid (%x) found %p\n", nodeid, node );
-			return node;
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry(p, &dev->card->peer_list, peer_link)
+		if (p->node_id == node_id) {
+			peer = p;
+			break;
 		}
-	fw_debug ( "node_find_by_nodeid (%x) not found\n", nodeid );
-	spin_unlock_irqrestore ( &priv->lock, flags );
-	return NULL;
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	return peer;
 }
 
-/* This is only complicated because we can't assume priv exists */
-static void ipv4_node_delete ( struct fw_card *card, struct fw_device *device ) {
-	struct net_device *netdev;
-	struct ipv4_priv *priv;
-	struct ipv4_node *node;
+/* FIXME */
+static void fwnet_peer_delete(struct fw_card *card, struct fw_device *device)
+{
+	struct net_device *net;
+	struct fwnet_device *dev;
+	struct fwnet_peer *peer;
 	u64 guid;
 	unsigned long flags;
-	struct ipv4_partial_datagram *pd, *pd_next;
+	struct fwnet_partial_datagram *pd, *pd_next;
 
 	guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
-	netdev = card->netdev;
-	if ( netdev )
-		priv = netdev_priv ( netdev );
+	net = card->netdev;
+	if (net)
+		dev = netdev_priv(net);
 	else
-		priv = NULL;
-	if ( priv )
-		spin_lock_irqsave ( &priv->lock, flags );
-	list_for_each_entry( node, &card->ipv4_nodes, ipv4_nodes ) {
-		if ( node->guid == guid ) {
-			list_del ( &node->ipv4_nodes );
-			list_for_each_entry_safe( pd, pd_next, &node->pdg_list, pdg_list )
-				ipv4_pd_delete ( pd );
+		dev = NULL;
+	if (dev)
+		spin_lock_irqsave(&dev->lock, flags);
+
+	list_for_each_entry(peer, &card->peer_list, peer_link) {
+		if (peer->guid == guid) {
+			list_del(&peer->peer_link);
+			list_for_each_entry_safe(pd, pd_next, &peer->pd_list,
+						 pd_link)
+				fwnet_pd_delete(pd);
 			break;
 		}
 	}
-	if ( priv )
-		spin_unlock_irqrestore ( &priv->lock, flags );
+	if (dev)
+		spin_unlock_irqrestore(&dev->lock, flags);
 }
 
-/* ------------------------------------------------------------------ */
-
-
-static int ipv4_finish_incoming_packet ( struct net_device *netdev,
- struct sk_buff *skb, u16 source_node_id, bool is_broadcast, u16 ether_type ) {
-	struct ipv4_priv *priv;
-	static u64 broadcast_hw = ~0ULL;
+static int fwnet_finish_incoming_packet(struct net_device *net,
+					struct sk_buff *skb, u16 source_node_id,
+					bool is_broadcast, u16 ether_type)
+{
+	struct fwnet_device *dev;
+	static const __be64 broadcast_hw = cpu_to_be64(~0ULL);
 	int status;
-	u64 guid;
+	__be64 guid;
 
-	fw_debug ( "ipv4_finish_incoming_packet(%p, %p, %x, %s, %x\n",
- netdev, skb, source_node_id, is_broadcast ? "true" : "false", ether_type );
-	priv = netdev_priv(netdev);
+	dev = netdev_priv(net);
 	/* Write metadata, and then pass to the receive level */
-	skb->dev = netdev;
+	skb->dev = net;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;  /* don't check it */
 
 	/*
@@ -724,73 +601,75 @@ static int ipv4_finish_incoming_packet ( struct net_device *netdev,
 	 * about the sending machine.
 	 */
 	if (ether_type == ETH_P_ARP) {
-		struct ipv4_arp *arp1394;
+		struct rfc2734_arp *arp1394;
 		struct arphdr *arp;
 		unsigned char *arp_ptr;
 		u64 fifo_addr;
+		u64 peer_guid;
 		u8 max_rec;
 		u8 sspd;
 		u16 max_payload;
-		struct ipv4_node *node;
-		static const u16 ipv4_speed_to_max_payload[] = {
+		struct fwnet_peer *peer;
+		static const u16 fwnet_speed_to_max_payload[] = {
 			/* S100, S200, S400, S800, S1600, S3200 */
 			    512, 1024, 2048, 4096,  4096,  4096
 		};
 
-		/* fw_debug ( "ARP packet\n" ); */
-		arp1394 = (struct ipv4_arp *)skb->data;
+		arp1394 = (struct rfc2734_arp *)skb->data;
 		arp = (struct arphdr *)skb->data;
 		arp_ptr = (unsigned char *)(arp + 1);
-		fifo_addr = (u64)ntohs(arp1394->fifo_hi) << 32 |
- ntohl(arp1394->fifo_lo);
-		max_rec = priv->card->max_receive;
-		if ( arp1394->max_rec < max_rec )
+		fifo_addr = (u64)ntohs(arp1394->fifo_hi) << 32
+				| ntohl(arp1394->fifo_lo);
+		max_rec = dev->card->max_receive;
+		if (arp1394->max_rec < max_rec)
 			max_rec = arp1394->max_rec;
 		sspd = arp1394->sspd;
-		/*
-		 * Sanity check. MacOSX seems to be sending us 131 in this
-		 * field (atleast on my Panther G5). Not sure why.
-		 */
-		if (sspd > 5 ) {
-			fw_notify ( "sspd %x out of range\n", sspd );
+		/* Sanity check.  OS X 10.3 PPC reportedly sends 131. */
+		if (sspd > SCODE_3200) {
+			fw_notify("sspd %x out of range\n", sspd);
 			sspd = 0;
 		}
 
-		max_payload = min(ipv4_speed_to_max_payload[sspd],
- (u16)(1 << (max_rec + 1))) - IPV4_UNFRAG_HDR_SIZE;
+		max_payload = min(fwnet_speed_to_max_payload[sspd],
+			(u16)(1 << (max_rec + 1))) - RFC2374_UNFRAG_HDR_SIZE;
 
-		guid = be64_to_cpu(get_unaligned(&arp1394->s_uniq_id));
-		node = ipv4_node_find_by_guid(priv, guid);
-		if (!node) {
-			fw_notify ( "No node for ARP packet from %llx\n", guid );
+		peer_guid = get_unaligned_be64(&arp1394->s_uniq_id);
+		peer = fwnet_peer_find_by_guid(dev, peer_guid);
+		if (!peer) {
+			fw_notify("No peer for ARP packet from %016llx\n",
+				  (unsigned long long)peer_guid);
 			goto failed_proto;
 		}
-		if ( node->nodeid != source_node_id || node->generation != priv->card->generation ) {
-			fw_notify ( "Internal error: node->nodeid (%x) != soucre_node_id (%x) or node->generation (%x) != priv->card->generation(%x)\n",
- node->nodeid, source_node_id, node->generation, priv->card->generation );
-			node->nodeid = source_node_id;
-			node->generation = priv->card->generation;
+
+		/* FIXME don't use card->generation */
+		if (peer->node_id != source_node_id ||
+		    peer->generation != dev->card->generation) {
+			fw_notify("Internal error: peer->node_id (%x) != "
+				  "source_node_id (%x) or peer->generation (%x)"
+				  " != dev->card->generation(%x)\n",
+				  peer->node_id, source_node_id,
+				  peer->generation, dev->card->generation);
+			peer->node_id = source_node_id;
+			peer->generation = dev->card->generation;
 		}
 
 		/* FIXME: for debugging */
-		if ( sspd > SCODE_400 )
+		if (sspd > SCODE_400)
 			sspd = SCODE_400;
 		/* Update our speed/payload/fifo_offset table */
 		/*
 		 * FIXME: this does not handle cases where two high-speed endpoints must use a slower speed because of
 		 * a lower speed hub between them.  We need to look at the actual topology map here.
 		 */
-		fw_debug ( "Setting node %p fifo %llx (was %llx), max_payload %x (was %x), speed %x (was %x)\n",
- node, fifo_addr, node->fifo, max_payload, node->max_payload, sspd, node->xmt_speed );
-		node->fifo =	fifo_addr;
-		node->max_payload = max_payload;
+		peer->fifo = fifo_addr;
+		peer->max_payload = max_payload;
 		/*
 		 * Only allow speeds to go down from their initial value.
-		 * Otherwise a local node that can only do S400 or slower may
-		 * be told to transmit at S800 to a faster remote node.
+		 * Otherwise a local peer that can only do S400 or slower may
+		 * be told to transmit at S800 to a faster remote peer.
 		 */
-		if ( node->xmt_speed > sspd )
-			node->xmt_speed = sspd;
+		if (peer->xmt_speed > sspd)
+			peer->xmt_speed = sspd;
 
 		/*
 		 * Now that we're done with the 1394 specific stuff, we'll
@@ -805,248 +684,257 @@ static int ipv4_finish_incoming_packet ( struct net_device *netdev,
 		 */
 
 		arp->ar_hln = 8;
-		arp_ptr += arp->ar_hln;		/* skip over sender unique id */
-		*(u32 *)arp_ptr = arp1394->sip; /* move sender IP addr */
-		arp_ptr += arp->ar_pln;		/* skip over sender IP addr */
+		/* skip over sender unique id */
+		arp_ptr += arp->ar_hln;
+		/* move sender IP addr */
+		put_unaligned(arp1394->sip, (u32 *)arp_ptr);
+		/* skip over sender IP addr */
+		arp_ptr += arp->ar_pln;
 
 		if (arp->ar_op == htons(ARPOP_REQUEST))
 			memset(arp_ptr, 0, sizeof(u64));
 		else
-			memcpy(arp_ptr, netdev->dev_addr, sizeof(u64));
+			memcpy(arp_ptr, net->dev_addr, sizeof(u64));
 	}
 
 	/* Now add the ethernet header. */
-	guid = cpu_to_be64(priv->card->guid);
-	if (dev_hard_header(skb, netdev, ether_type, is_broadcast ? &broadcast_hw : &guid, NULL,
- skb->len) >= 0) {
-		struct ipv4_ether_hdr *eth;
+	guid = cpu_to_be64(dev->card->guid);
+	if (dev_hard_header(skb, net, ether_type,
+			   is_broadcast ? &broadcast_hw : &guid,
+			   NULL, skb->len) >= 0) {
+		struct fwnet_header *eth;
 		u16 *rawp;
 		__be16 protocol;
 
 		skb_reset_mac_header(skb);
 		skb_pull(skb, sizeof(*eth));
-		eth = ipv4_ether_hdr(skb);
+		eth = (struct fwnet_header *)skb_mac_header(skb);
 		if (*eth->h_dest & 1) {
-			if (memcmp(eth->h_dest, netdev->broadcast, netdev->addr_len) == 0) {
-				fw_debug ( "Broadcast\n" );
+			if (memcmp(eth->h_dest, net->broadcast,
+				   net->addr_len) == 0)
 				skb->pkt_type = PACKET_BROADCAST;
-			}
 #if 0
 			else
 				skb->pkt_type = PACKET_MULTICAST;
 #endif
 		} else {
-			if (memcmp(eth->h_dest, netdev->dev_addr, netdev->addr_len)) {
+			if (memcmp(eth->h_dest, net->dev_addr, net->addr_len)) {
 				u64 a1, a2;
 
-				memcpy ( &a1, eth->h_dest, sizeof(u64));
-				memcpy ( &a2, netdev->dev_addr, sizeof(u64));
-				fw_debug ( "Otherhost %llx %llx %x\n", a1, a2, netdev->addr_len );
+				memcpy(&a1, eth->h_dest, sizeof(u64));
+				memcpy(&a2, net->dev_addr, sizeof(u64));
 				skb->pkt_type = PACKET_OTHERHOST;
 			}
 		}
 		if (ntohs(eth->h_proto) >= 1536) {
-			fw_debug ( " proto %x %x\n", eth->h_proto, ntohs(eth->h_proto) );
 			protocol = eth->h_proto;
 		} else {
 			rawp = (u16 *)skb->data;
-			if (*rawp == 0xFFFF) {
-				fw_debug ( "proto 802_3\n" );
+			if (*rawp == 0xffff)
 				protocol = htons(ETH_P_802_3);
-			} else {
-				fw_debug ( "proto 802_2\n" );
+			else
 				protocol = htons(ETH_P_802_2);
-			}
 		}
 		skb->protocol = protocol;
 	}
 	status = netif_rx(skb);
-	if ( status == NET_RX_DROP) {
-		netdev->stats.rx_errors++;
-		netdev->stats.rx_dropped++;
+	if (status == NET_RX_DROP) {
+		net->stats.rx_errors++;
+		net->stats.rx_dropped++;
 	} else {
-		netdev->stats.rx_packets++;
-		netdev->stats.rx_bytes += skb->len;
+		net->stats.rx_packets++;
+		net->stats.rx_bytes += skb->len;
 	}
-	if (netif_queue_stopped(netdev))
-		netif_wake_queue(netdev);
+	if (netif_queue_stopped(net))
+		netif_wake_queue(net);
+
 	return 0;
 
  failed_proto:
-	netdev->stats.rx_errors++;
-	netdev->stats.rx_dropped++;
+	net->stats.rx_errors++;
+	net->stats.rx_dropped++;
+
 	dev_kfree_skb_any(skb);
-	if (netif_queue_stopped(netdev))
-		netif_wake_queue(netdev);
-	netdev->last_rx = jiffies;
+	if (netif_queue_stopped(net))
+		netif_wake_queue(net);
+
+	net->last_rx = jiffies;
+
 	return 0;
 }
 
-/* ------------------------------------------------------------------ */
-
-static int ipv4_incoming_packet ( struct ipv4_priv *priv, u32 *buf, int len, u16 source_node_id, bool is_broadcast ) {
+static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
+				 u16 source_node_id, bool is_broadcast)
+{
 	struct sk_buff *skb;
-	struct net_device *netdev;
-	struct ipv4_hdr hdr;
+	struct net_device *net;
+	struct rfc2734_header hdr;
 	unsigned lf;
 	unsigned long flags;
-	struct ipv4_node *node;
-	struct ipv4_partial_datagram *pd;
+	struct fwnet_peer *peer;
+	struct fwnet_partial_datagram *pd;
 	int fg_off;
 	int dg_size;
 	u16 datagram_label;
 	int retval;
 	u16 ether_type;
 
-	fw_debug ( "ipv4_incoming_packet(%p, %p, %d, %x, %s)\n", priv, buf, len, source_node_id, is_broadcast ? "true" : "false" );
-	netdev = priv->card->netdev;
+	net = dev->card->netdev;
 
-	hdr.w0 = ntohl(buf[0]);
-	lf = ipv4_get_hdr_lf(&hdr);
-	if ( lf == IPV4_HDR_UNFRAG ) {
+	hdr.w0 = be32_to_cpu(buf[0]);
+	lf = fwnet_get_hdr_lf(&hdr);
+	if (lf == RFC2374_HDR_UNFRAG) {
 		/*
 		 * An unfragmented datagram has been received by the ieee1394
 		 * bus. Build an skbuff around it so we can pass it to the
 		 * high level network layer.
 		 */
-		ether_type = ipv4_get_hdr_ether_type(&hdr);
-		fw_debug ( "header w0 = %x, lf = %x, ether_type = %x\n", hdr.w0, lf, ether_type );
+		ether_type = fwnet_get_hdr_ether_type(&hdr);
 		buf++;
-		len -= IPV4_UNFRAG_HDR_SIZE;
+		len -= RFC2374_UNFRAG_HDR_SIZE;
 
-		skb = dev_alloc_skb(len + netdev->hard_header_len + 15);
+		skb = dev_alloc_skb(len + net->hard_header_len + 15);
 		if (unlikely(!skb)) {
-			fw_error ( "Out of memory for incoming packet\n");
-			netdev->stats.rx_dropped++;
+			fw_error("out of memory\n");
+			net->stats.rx_dropped++;
+
 			return -1;
 		}
-		skb_reserve(skb, (netdev->hard_header_len + 15) & ~15);
-		memcpy(skb_put(skb, len), buf, len );
-		return ipv4_finish_incoming_packet(netdev, skb, source_node_id, is_broadcast, ether_type );
+		skb_reserve(skb, (net->hard_header_len + 15) & ~15);
+		memcpy(skb_put(skb, len), buf, len);
+
+		return fwnet_finish_incoming_packet(net, skb, source_node_id,
+						    is_broadcast, ether_type);
 	}
 	/* A datagram fragment has been received, now the fun begins. */
 	hdr.w1 = ntohl(buf[1]);
-	buf +=2;
-	len -= IPV4_FRAG_HDR_SIZE;
-	if ( lf ==IPV4_HDR_FIRSTFRAG ) {
-		ether_type = ipv4_get_hdr_ether_type(&hdr);
+	buf += 2;
+	len -= RFC2374_FRAG_HDR_SIZE;
+	if (lf == RFC2374_HDR_FIRSTFRAG) {
+		ether_type = fwnet_get_hdr_ether_type(&hdr);
 		fg_off = 0;
 	} else {
-		fg_off = ipv4_get_hdr_fg_off(&hdr);
-		ether_type = 0; /* Shut up compiler! */
+		ether_type = 0;
+		fg_off = fwnet_get_hdr_fg_off(&hdr);
 	}
-	datagram_label = ipv4_get_hdr_dgl(&hdr);
-	dg_size = ipv4_get_hdr_dg_size(&hdr); /* ??? + 1 */
-	fw_debug ( "fragmented: %x.%x = lf %x, ether_type %x, fg_off %x, dgl %x, dg_size %x\n", hdr.w0, hdr.w1, lf, ether_type, fg_off, datagram_label, dg_size );
-	node = ipv4_node_find_by_nodeid ( priv, source_node_id);
-	spin_lock_irqsave(&node->pdg_lock, flags);
-	pd = ipv4_pd_find( node, datagram_label );
+	datagram_label = fwnet_get_hdr_dgl(&hdr);
+	dg_size = fwnet_get_hdr_dg_size(&hdr); /* ??? + 1 */
+	peer = fwnet_peer_find_by_node_id(dev, source_node_id);
+
+	spin_lock_irqsave(&peer->pdg_lock, flags);
+
+	pd = fwnet_pd_find(peer, datagram_label);
 	if (pd == NULL) {
-		while ( node->pdg_size >= ipv4_mpd ) {
+		while (peer->pdg_size >= FWNET_MAX_FRAGMENTS) {
 			/* remove the oldest */
-			ipv4_pd_delete ( list_first_entry(&node->pdg_list, struct ipv4_partial_datagram, pdg_list) );
-			node->pdg_size--;
+			fwnet_pd_delete(list_first_entry(&peer->pd_list,
+				struct fwnet_partial_datagram, pd_link));
+			peer->pdg_size--;
 		}
-		pd = ipv4_pd_new ( netdev, node, datagram_label, dg_size,
- buf, fg_off, len);
-		if ( pd == NULL) {
+		pd = fwnet_pd_new(net, peer, datagram_label,
+				  dg_size, buf, fg_off, len);
+		if (pd == NULL) {
 			retval = -ENOMEM;
 			goto bad_proto;
 		}
-		node->pdg_size++;
+		peer->pdg_size++;
 	} else {
-		if (ipv4_frag_overlap(pd, fg_off, len) || pd->datagram_size != dg_size) {
+		if (fwnet_frag_overlap(pd, fg_off, len) ||
+		    pd->datagram_size != dg_size) {
 			/*
 			 * Differing datagram sizes or overlapping fragments,
-			 * Either way the remote machine is playing silly buggers
-			 * with us: obliterate the old datagram and start a new one.
+			 * discard old datagram and start a new one.
 			 */
-			ipv4_pd_delete ( pd );
-			pd = ipv4_pd_new ( netdev, node, datagram_label,
- dg_size, buf, fg_off, len);
-			if ( pd == NULL ) {
+			fwnet_pd_delete(pd);
+			pd = fwnet_pd_new(net, peer, datagram_label,
+					  dg_size, buf, fg_off, len);
+			if (pd == NULL) {
 				retval = -ENOMEM;
-				node->pdg_size--;
+				peer->pdg_size--;
 				goto bad_proto;
 			}
 		} else {
-			bool worked;
-
-			worked = ipv4_pd_update ( node, pd,
- buf, fg_off, len );
-			if ( ! worked ) {
+			if (!fwnet_pd_update(peer, pd, buf, fg_off, len)) {
 				/*
 				 * Couldn't save off fragment anyway
 				 * so might as well obliterate the
 				 * datagram now.
 				 */
-				ipv4_pd_delete ( pd );
-				node->pdg_size--;
+				fwnet_pd_delete(pd);
+				peer->pdg_size--;
 				goto bad_proto;
 			}
 		}
 	} /* new datagram or add to existing one */
 
-	if ( lf == IPV4_HDR_FIRSTFRAG )
+	if (lf == RFC2374_HDR_FIRSTFRAG)
 		pd->ether_type = ether_type;
-	if ( ipv4_pd_is_complete ( pd ) ) {
+
+	if (fwnet_pd_is_complete(pd)) {
 		ether_type = pd->ether_type;
-		node->pdg_size--;
+		peer->pdg_size--;
 		skb = skb_get(pd->skb);
-		ipv4_pd_delete ( pd );
-		spin_unlock_irqrestore(&node->pdg_lock, flags);
-		return ipv4_finish_incoming_packet ( netdev, skb, source_node_id, false, ether_type );
+		fwnet_pd_delete(pd);
+
+		spin_unlock_irqrestore(&peer->pdg_lock, flags);
+
+		return fwnet_finish_incoming_packet(net, skb, source_node_id,
+						    false, ether_type);
 	}
 	/*
 	 * Datagram is not complete, we're done for the
 	 * moment.
 	 */
-	spin_unlock_irqrestore(&node->pdg_lock, flags);
+	spin_unlock_irqrestore(&peer->pdg_lock, flags);
+
 	return 0;
 
  bad_proto:
-	spin_unlock_irqrestore(&node->pdg_lock, flags);
-	if (netif_queue_stopped(netdev))
-		netif_wake_queue(netdev);
+	spin_unlock_irqrestore(&peer->pdg_lock, flags);
+
+	if (netif_queue_stopped(net))
+		netif_wake_queue(net);
+
 	return 0;
 }
 
-static void ipv4_receive_packet ( struct fw_card *card, struct fw_request *r,
- int tcode, int destination, int source, int generation, int speed,
- unsigned long long offset, void *payload, size_t length, void *callback_data ) {
-	struct ipv4_priv *priv;
+static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r,
+		int tcode, int destination, int source, int generation,
+		int speed, unsigned long long offset, void *payload,
+		size_t length, void *callback_data)
+{
+	struct fwnet_device *dev;
 	int status;
 
-	fw_debug ( "ipv4_receive_packet(%p,%p,%x,%x,%x,%x,%x,%llx,%p,%lx,%p)\n",
- card, r, tcode, destination, source, generation, speed, offset, payload,
- (unsigned long)length, callback_data);
-	print_hex_dump ( KERN_DEBUG, "header: ", DUMP_PREFIX_OFFSET, 32, 1, payload, length, false );
-	priv = callback_data;
-	if (   tcode != TCODE_WRITE_BLOCK_REQUEST
-	    || destination != card->node_id
-	    || generation != card->generation
-	    || offset != priv->handler.offset ) {
+	dev = callback_data;
+	if (tcode != TCODE_WRITE_BLOCK_REQUEST
+	    || destination != card->node_id	/* <- FIXME */
+	    || generation != card->generation	/* <- FIXME */
+	    || offset != dev->handler.offset) {
 		fw_send_response(card, r, RCODE_CONFLICT_ERROR);
-		fw_debug("Conflict error card node_id=%x, card generation=%x, local offset %llx\n",
- card->node_id, card->generation, (unsigned long long)priv->handler.offset );
+
 		return;
 	}
-	status = ipv4_incoming_packet ( priv, payload, length, source, false );
-	if ( status != 0 ) {
-		fw_error ( "Incoming packet failure\n" );
-		fw_send_response ( card, r, RCODE_CONFLICT_ERROR );
+
+	status = fwnet_incoming_packet(dev, payload, length, source, false);
+	if (status != 0) {
+		fw_error("Incoming packet failure\n");
+		fw_send_response(card, r, RCODE_CONFLICT_ERROR);
+
 		return;
 	}
-	fw_send_response ( card, r, RCODE_COMPLETE );
+
+	fw_send_response(card, r, RCODE_COMPLETE);
 }
 
-static void ipv4_receive_broadcast(struct fw_iso_context *context, u32 cycle,
- size_t header_length, void *header, void *data) {
-	struct ipv4_priv *priv;
+static void fwnet_receive_broadcast(struct fw_iso_context *context,
+		u32 cycle, size_t header_length, void *header, void *data)
+{
+	struct fwnet_device *dev;
 	struct fw_iso_packet packet;
 	struct fw_card *card;
-	u16 *hdr_ptr;
-	u32 *buf_ptr;
+	__be16 *hdr_ptr;
+	__be32 *buf_ptr;
 	int retval;
 	u32 length;
 	u16 source_node_id;
@@ -1055,70 +943,68 @@ static void ipv4_receive_broadcast(struct fw_iso_context *context, u32 cycle,
 	unsigned long offset;
 	unsigned long flags;
 
-	fw_debug ( "ipv4_receive_broadcast ( context=%p, cycle=%x, header_length=%lx, header=%p, data=%p )\n", context, cycle, (unsigned long)header_length, header, data );
-	print_hex_dump ( KERN_DEBUG, "header: ", DUMP_PREFIX_OFFSET, 32, 1, header, header_length, false );
-	priv = data;
-	card = priv->card;
+	dev = data;
+	card = dev->card;
 	hdr_ptr = header;
-	length = ntohs(hdr_ptr[0]);
-	spin_lock_irqsave(&priv->lock,flags);
-	offset = priv->rcv_buffer_size * priv->broadcast_rcv_next_ptr;
-	buf_ptr = priv->broadcast_rcv_buffer_ptrs[priv->broadcast_rcv_next_ptr++];
-	if ( priv->broadcast_rcv_next_ptr == priv->num_broadcast_rcv_ptrs )
-		priv->broadcast_rcv_next_ptr = 0;
-	spin_unlock_irqrestore(&priv->lock,flags);
-	fw_debug ( "length %u at %p\n", length, buf_ptr );
-	print_hex_dump ( KERN_DEBUG, "buffer: ", DUMP_PREFIX_OFFSET, 32, 1, buf_ptr, length, false );
+	length = be16_to_cpup(hdr_ptr);
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	offset = dev->rcv_buffer_size * dev->broadcast_rcv_next_ptr;
+	buf_ptr = dev->broadcast_rcv_buffer_ptrs[dev->broadcast_rcv_next_ptr++];
+	if (dev->broadcast_rcv_next_ptr == dev->num_broadcast_rcv_ptrs)
+		dev->broadcast_rcv_next_ptr = 0;
+
+	spin_unlock_irqrestore(&dev->lock, flags);
 
 	specifier_id =    (be32_to_cpu(buf_ptr[0]) & 0xffff) << 8
 			| (be32_to_cpu(buf_ptr[1]) & 0xff000000) >> 24;
-	ver = be32_to_cpu(buf_ptr[1]) & 0xFFFFFF;
+	ver = be32_to_cpu(buf_ptr[1]) & 0xffffff;
 	source_node_id = be32_to_cpu(buf_ptr[0]) >> 16;
-	/* fw_debug ( "source %x SpecID %x ver %x\n", source_node_id, specifier_id, ver ); */
-	if ( specifier_id == IPV4_GASP_SPECIFIER_ID && ver == IPV4_GASP_VERSION ) {
+
+	if (specifier_id == IANA_SPECIFIER_ID && ver == RFC2734_SW_VERSION) {
 		buf_ptr += 2;
-		length -= IPV4_GASP_OVERHEAD;
-		ipv4_incoming_packet(priv, buf_ptr, length, source_node_id, true);
-	} else
-		fw_debug ( "Ignoring packet: not GASP\n" );
-	packet.payload_length = priv->rcv_buffer_size;
+		length -= IEEE1394_GASP_HDR_SIZE;
+		fwnet_incoming_packet(dev, buf_ptr, length,
+				      source_node_id, true);
+	}
+
+	packet.payload_length = dev->rcv_buffer_size;
 	packet.interrupt = 1;
 	packet.skip = 0;
 	packet.tag = 3;
 	packet.sy = 0;
-	packet.header_length = IPV4_GASP_OVERHEAD;
-	spin_lock_irqsave(&priv->lock,flags);
-	retval = fw_iso_context_queue ( priv->broadcast_rcv_context, &packet,
- &priv->broadcast_rcv_buffer, offset );
-	spin_unlock_irqrestore(&priv->lock,flags);
-	if ( retval < 0 )
-		fw_error ( "requeue failed\n" );
-}
+	packet.header_length = IEEE1394_GASP_HDR_SIZE;
+
+	spin_lock_irqsave(&dev->lock, flags);
 
-static void debug_ptask ( struct ipv4_packet_task *ptask ) {
-	static const char *tx_types[] = { "Unknown", "GASP", "Write" };
-
-	fw_debug ( "packet %p { hdr { w0 %x w1 %x }, skb %p, priv %p,"
- " tx_type %s, outstanding_pkts %d, max_payload %x, fifo %llx,"
- " speed %x, dest_node %x, generation %x }\n",
- ptask, ptask->hdr.w0, ptask->hdr.w1, ptask->skb, ptask->priv,
- ptask->tx_type > IPV4_WRREQ ? "Invalid" : tx_types[ptask->tx_type],
- ptask->outstanding_pkts,  ptask->max_payload,
- ptask->fifo_addr, ptask->speed, ptask->dest_node, ptask->generation );
-	print_hex_dump ( KERN_DEBUG, "packet :", DUMP_PREFIX_OFFSET, 32, 1,
- ptask->skb->data, ptask->skb->len, false );
+	retval = fw_iso_context_queue(dev->broadcast_rcv_context, &packet,
+				      &dev->broadcast_rcv_buffer, offset);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	if (retval < 0)
+		fw_error("requeue failed\n");
 }
 
-static void ipv4_transmit_packet_done ( struct ipv4_packet_task *ptask ) {
-	struct ipv4_priv *priv;
+static struct kmem_cache *fwnet_packet_task_cache;
+
+static int fwnet_send_packet(struct fwnet_packet_task *ptask);
+
+static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
+{
+	struct fwnet_device *dev;
 	unsigned long flags;
 
-	priv = ptask->priv;
-	spin_lock_irqsave ( &priv->lock, flags );
-	list_del ( &ptask->packet_list );
-	spin_unlock_irqrestore ( &priv->lock, flags );
-	ptask->outstanding_pkts--;
-	if ( ptask->outstanding_pkts > 0 ) {
+	dev = ptask->dev;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_del(&ptask->pt_link);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	ptask->outstanding_pkts--; /* FIXME access inside lock */
+
+	if (ptask->outstanding_pkts > 0) {
 		u16 dg_size;
 		u16 fg_off;
 		u16 datagram_label;
@@ -1126,133 +1012,139 @@ static void ipv4_transmit_packet_done ( struct ipv4_packet_task *ptask ) {
 		struct sk_buff *skb;
 
 		/* Update the ptask to point to the next fragment and send it */
-		lf = ipv4_get_hdr_lf(&ptask->hdr);
+		lf = fwnet_get_hdr_lf(&ptask->hdr);
 		switch (lf) {
-		case IPV4_HDR_LASTFRAG:
-		case IPV4_HDR_UNFRAG:
+		case RFC2374_HDR_LASTFRAG:
+		case RFC2374_HDR_UNFRAG:
 		default:
-			fw_error ( "Outstanding packet %x lf %x, header %x,%x\n", ptask->outstanding_pkts, lf, ptask->hdr.w0, ptask->hdr.w1 );
+			fw_error("Outstanding packet %x lf %x, header %x,%x\n",
+				 ptask->outstanding_pkts, lf, ptask->hdr.w0,
+				 ptask->hdr.w1);
 			BUG();
 
-		case IPV4_HDR_FIRSTFRAG:
+		case RFC2374_HDR_FIRSTFRAG:
 			/* Set frag type here for future interior fragments */
-			dg_size = ipv4_get_hdr_dg_size(&ptask->hdr);
-			fg_off = ptask->max_payload - IPV4_FRAG_HDR_SIZE;
-			datagram_label = ipv4_get_hdr_dgl(&ptask->hdr);
+			dg_size = fwnet_get_hdr_dg_size(&ptask->hdr);
+			fg_off = ptask->max_payload - RFC2374_FRAG_HDR_SIZE;
+			datagram_label = fwnet_get_hdr_dgl(&ptask->hdr);
 			break;
 
-		case IPV4_HDR_INTFRAG:
-			dg_size = ipv4_get_hdr_dg_size(&ptask->hdr);
-			fg_off = ipv4_get_hdr_fg_off(&ptask->hdr) + ptask->max_payload - IPV4_FRAG_HDR_SIZE;
-			datagram_label = ipv4_get_hdr_dgl(&ptask->hdr);
+		case RFC2374_HDR_INTFRAG:
+			dg_size = fwnet_get_hdr_dg_size(&ptask->hdr);
+			fg_off = fwnet_get_hdr_fg_off(&ptask->hdr)
+				  + ptask->max_payload - RFC2374_FRAG_HDR_SIZE;
+			datagram_label = fwnet_get_hdr_dgl(&ptask->hdr);
 			break;
 		}
 		skb = ptask->skb;
-		skb_pull ( skb, ptask->max_payload );
-		if ( ptask->outstanding_pkts > 1 ) {
-			ipv4_make_sf_hdr ( &ptask->hdr,
-  IPV4_HDR_INTFRAG, dg_size, fg_off, datagram_label );
+		skb_pull(skb, ptask->max_payload);
+		if (ptask->outstanding_pkts > 1) {
+			fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG,
+					  dg_size, fg_off, datagram_label);
 		} else {
-			ipv4_make_sf_hdr ( &ptask->hdr,
-  IPV4_HDR_LASTFRAG, dg_size, fg_off, datagram_label );
-			ptask->max_payload = skb->len + IPV4_FRAG_HDR_SIZE;
-
+			fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_LASTFRAG,
+					  dg_size, fg_off, datagram_label);
+			ptask->max_payload = skb->len + RFC2374_FRAG_HDR_SIZE;
 		}
-		ipv4_send_packet ( ptask );
+		fwnet_send_packet(ptask);
 	} else {
-		dev_kfree_skb_any ( ptask->skb );
-		kmem_cache_free( ipv4_packet_task_cache, ptask );
+		dev_kfree_skb_any(ptask->skb);
+		kmem_cache_free(fwnet_packet_task_cache, ptask);
 	}
 }
 
-static void ipv4_write_complete ( struct fw_card *card, int rcode,
- void *payload, size_t length, void *data ) {
-	struct ipv4_packet_task *ptask;
+static void fwnet_write_complete(struct fw_card *card, int rcode,
+				 void *payload, size_t length, void *data)
+{
+	struct fwnet_packet_task *ptask;
 
 	ptask = data;
-	fw_debug ( "ipv4_write_complete ( %p, %x, %p, %lx, %p )\n",
- card, rcode, payload, (unsigned long)length, data );
-	debug_ptask ( ptask );
 
-	if ( rcode == RCODE_COMPLETE ) {
-		ipv4_transmit_packet_done ( ptask );
-	} else {
-		fw_error ( "ipv4_write_complete: failed: %x\n", rcode );
+	if (rcode == RCODE_COMPLETE)
+		fwnet_transmit_packet_done(ptask);
+	else
+		fw_error("fwnet_write_complete: failed: %x\n", rcode);
 		/* ??? error recovery */
-	}
 }
 
-static int ipv4_send_packet ( struct ipv4_packet_task *ptask ) {
-	struct ipv4_priv *priv;
+static int fwnet_send_packet(struct fwnet_packet_task *ptask)
+{
+	struct fwnet_device *dev;
 	unsigned tx_len;
-	struct ipv4_hdr *bufhdr;
+	struct rfc2734_header *bufhdr;
 	unsigned long flags;
-	struct net_device *netdev;
-#if 0 /* stefanr */
-	int retval;
-#endif
+	struct net_device *net;
 
-	fw_debug ( "ipv4_send_packet\n" );
-	debug_ptask ( ptask );
-	priv = ptask->priv;
+	dev = ptask->dev;
 	tx_len = ptask->max_payload;
-	switch (ipv4_get_hdr_lf(&ptask->hdr)) {
-	case IPV4_HDR_UNFRAG:
-		bufhdr = (struct ipv4_hdr *)skb_push(ptask->skb, IPV4_UNFRAG_HDR_SIZE);
-		bufhdr->w0 = htonl(ptask->hdr.w0);
+	switch (fwnet_get_hdr_lf(&ptask->hdr)) {
+	case RFC2374_HDR_UNFRAG:
+		bufhdr = (struct rfc2734_header *)
+				skb_push(ptask->skb, RFC2374_UNFRAG_HDR_SIZE);
+		put_unaligned_be32(ptask->hdr.w0, &bufhdr->w0);
 		break;
 
-	case IPV4_HDR_FIRSTFRAG:
-	case IPV4_HDR_INTFRAG:
-	case IPV4_HDR_LASTFRAG:
-		bufhdr = (struct ipv4_hdr *)skb_push(ptask->skb, IPV4_FRAG_HDR_SIZE);
-		bufhdr->w0 = htonl(ptask->hdr.w0);
-		bufhdr->w1 = htonl(ptask->hdr.w1);
+	case RFC2374_HDR_FIRSTFRAG:
+	case RFC2374_HDR_INTFRAG:
+	case RFC2374_HDR_LASTFRAG:
+		bufhdr = (struct rfc2734_header *)
+				skb_push(ptask->skb, RFC2374_FRAG_HDR_SIZE);
+		put_unaligned_be32(ptask->hdr.w0, &bufhdr->w0);
+		put_unaligned_be32(ptask->hdr.w1, &bufhdr->w1);
 		break;
 
 	default:
 		BUG();
 	}
-	if ( ptask->tx_type == IPV4_GASP ) {
-		u32 *packets;
+	if (ptask->dest_node == IEEE1394_ALL_NODES) {
+		u8 *p;
 		int generation;
-		int nodeid;
+		int node_id;
 
 		/* ptask->generation may not have been set yet */
-		generation = priv->card->generation;
+		generation = dev->card->generation;
 		smp_rmb();
-		nodeid = priv->card->node_id;
-		packets = (u32 *)skb_push(ptask->skb, sizeof(u32)*2);
-		packets[0] = htonl(nodeid << 16 | (IPV4_GASP_SPECIFIER_ID>>8));
-		packets[1] = htonl((IPV4_GASP_SPECIFIER_ID & 0xFF) << 24 | IPV4_GASP_VERSION);
-		fw_send_request ( priv->card, &ptask->transaction, TCODE_STREAM_DATA,
- fw_stream_packet_destination_id(3, BROADCAST_CHANNEL, 0),
- generation, SCODE_100, 0ULL, ptask->skb->data, tx_len + 8, ipv4_write_complete, ptask );
-		spin_lock_irqsave(&priv->lock,flags);
-		list_add_tail ( &ptask->packet_list, &priv->broadcasted_list );
-		spin_unlock_irqrestore(&priv->lock,flags);
-#if 0 /* stefanr */
-		return retval;
-#else
+		node_id = dev->card->node_id;
+
+		p = skb_push(ptask->skb, 8);
+		put_unaligned_be32(node_id << 16 | IANA_SPECIFIER_ID >> 8, p);
+		put_unaligned_be32((IANA_SPECIFIER_ID & 0xff) << 24
+						| RFC2734_SW_VERSION, &p[4]);
+
+		/* We should not transmit if broadcast_channel.valid == 0. */
+		fw_send_request(dev->card, &ptask->transaction,
+				TCODE_STREAM_DATA,
+				fw_stream_packet_destination_id(3,
+						IEEE1394_BROADCAST_CHANNEL, 0),
+				generation, SCODE_100, 0ULL, ptask->skb->data,
+				tx_len + 8, fwnet_write_complete, ptask);
+
+		/* FIXME race? */
+		spin_lock_irqsave(&dev->lock, flags);
+		list_add_tail(&ptask->pt_link, &dev->broadcasted_list);
+		spin_unlock_irqrestore(&dev->lock, flags);
+
 		return 0;
-#endif
 	}
-	fw_debug("send_request (%p, %p, WRITE_BLOCK, %x, %x, %x, %llx, %p, %d, %p, %p\n",
- priv->card, &ptask->transaction, ptask->dest_node, ptask->generation,
- ptask->speed, (unsigned long long)ptask->fifo_addr, ptask->skb->data, tx_len,
- ipv4_write_complete, ptask );
-	fw_send_request ( priv->card, &ptask->transaction,
- TCODE_WRITE_BLOCK_REQUEST, ptask->dest_node, ptask->generation, ptask->speed,
- ptask->fifo_addr, ptask->skb->data, tx_len, ipv4_write_complete, ptask );
-	spin_lock_irqsave(&priv->lock,flags);
-	list_add_tail ( &ptask->packet_list, &priv->sent_list );
-	spin_unlock_irqrestore(&priv->lock,flags);
-	netdev = priv->card->netdev;
-	netdev->trans_start = jiffies;
+
+	fw_send_request(dev->card, &ptask->transaction,
+			TCODE_WRITE_BLOCK_REQUEST, ptask->dest_node,
+			ptask->generation, ptask->speed, ptask->fifo_addr,
+			ptask->skb->data, tx_len, fwnet_write_complete, ptask);
+
+	/* FIXME race? */
+	spin_lock_irqsave(&dev->lock, flags);
+	list_add_tail(&ptask->pt_link, &dev->sent_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	net = dev->card->netdev;
+	net->trans_start = jiffies;
+
 	return 0;
 }
 
-static int ipv4_broadcast_start ( struct ipv4_priv *priv ) {
+static int fwnet_broadcast_start(struct fwnet_device *dev)
+{
 	struct fw_iso_context *context;
 	int retval;
 	unsigned num_packets;
@@ -1260,150 +1152,151 @@ static int ipv4_broadcast_start ( struct ipv4_priv *priv ) {
 	struct fw_iso_packet packet;
 	unsigned long offset;
 	unsigned u;
-	/* unsigned transmit_speed; */
 
-#if 0 /* stefanr */
-	if ( priv->card->broadcast_channel != (BROADCAST_CHANNEL_VALID|BROADCAST_CHANNEL_INITIAL)) {
-		fw_notify ( "Invalid broadcast channel %x\n", priv->card->broadcast_channel );
-		/* FIXME: try again later? */
-		/* return -EINVAL; */
-	}
-#endif
-	if ( priv->local_fifo == INVALID_FIFO_ADDR ) {
-		struct fw_address_region region;
-
-		priv->handler.length = FIFO_SIZE;
-		priv->handler.address_callback = ipv4_receive_packet;
-		priv->handler.callback_data = priv;
-		/* FIXME: this is OHCI, but what about others? */
-		region.start = 0xffff00000000ULL;
-		region.end =   0xfffffffffffcULL;
-
-		retval = fw_core_add_address_handler ( &priv->handler, &region );
-		if ( retval < 0 )
+	if (dev->local_fifo == FWNET_NO_FIFO_ADDR) {
+		/* outside OHCI posted write area? */
+		static const struct fw_address_region region = {
+			.start = 0xffff00000000ULL,
+			.end   = CSR_REGISTER_BASE,
+		};
+
+		dev->handler.length = 4096;
+		dev->handler.address_callback = fwnet_receive_packet;
+		dev->handler.callback_data = dev;
+
+		retval = fw_core_add_address_handler(&dev->handler, &region);
+		if (retval < 0)
 			goto failed_initial;
-		priv->local_fifo = priv->handler.offset;
+
+		dev->local_fifo = dev->handler.offset;
 	}
 
-	/*
-	 * FIXME: rawiso limits us to PAGE_SIZE.  This only matters if we ever have
-	 * a machine with PAGE_SIZE < 4096
-	 */
-	max_receive = 1U << (priv->card->max_receive + 1);
-	num_packets = ( ipv4_iso_page_count * PAGE_SIZE ) / max_receive;
-	if ( ! priv->broadcast_rcv_context ) {
+	max_receive = 1U << (dev->card->max_receive + 1);
+	num_packets = (FWNET_ISO_PAGE_COUNT * PAGE_SIZE) / max_receive;
+
+	if (!dev->broadcast_rcv_context) {
 		void **ptrptr;
 
-		context = fw_iso_context_create ( priv->card,
- FW_ISO_CONTEXT_RECEIVE, BROADCAST_CHANNEL,
- priv->card->link_speed, 8, ipv4_receive_broadcast, priv );
+		context = fw_iso_context_create(dev->card,
+		    FW_ISO_CONTEXT_RECEIVE, IEEE1394_BROADCAST_CHANNEL,
+		    dev->card->link_speed, 8, fwnet_receive_broadcast, dev);
 		if (IS_ERR(context)) {
 			retval = PTR_ERR(context);
 			goto failed_context_create;
 		}
-		retval = fw_iso_buffer_init ( &priv->broadcast_rcv_buffer,
- priv->card, ipv4_iso_page_count, DMA_FROM_DEVICE );
-		if ( retval < 0 )
+
+		retval = fw_iso_buffer_init(&dev->broadcast_rcv_buffer,
+		    dev->card, FWNET_ISO_PAGE_COUNT, DMA_FROM_DEVICE);
+		if (retval < 0)
 			goto failed_buffer_init;
-		ptrptr = kmalloc ( sizeof(void*)*num_packets, GFP_KERNEL );
-		if ( ! ptrptr ) {
+
+		ptrptr = kmalloc(sizeof(void *) * num_packets, GFP_KERNEL);
+		if (!ptrptr) {
 			retval = -ENOMEM;
 			goto failed_ptrs_alloc;
 		}
-		priv->broadcast_rcv_buffer_ptrs = ptrptr;
-		for ( u = 0; u < ipv4_iso_page_count; u++ ) {
+
+		dev->broadcast_rcv_buffer_ptrs = ptrptr;
+		for (u = 0; u < FWNET_ISO_PAGE_COUNT; u++) {
 			void *ptr;
 			unsigned v;
 
-			ptr = kmap ( priv->broadcast_rcv_buffer.pages[u] );
-			for ( v = 0; v < num_packets / ipv4_iso_page_count; v++ )
-				*ptrptr++ = (void *)((char *)ptr + v * max_receive);
+			ptr = kmap(dev->broadcast_rcv_buffer.pages[u]);
+			for (v = 0; v < num_packets / FWNET_ISO_PAGE_COUNT; v++)
+				*ptrptr++ = (void *)
+						((char *)ptr + v * max_receive);
 		}
-		priv->broadcast_rcv_context = context;
-	} else
-		context = priv->broadcast_rcv_context;
+		dev->broadcast_rcv_context = context;
+	} else {
+		context = dev->broadcast_rcv_context;
+	}
 
 	packet.payload_length = max_receive;
 	packet.interrupt = 1;
 	packet.skip = 0;
 	packet.tag = 3;
 	packet.sy = 0;
-	packet.header_length = IPV4_GASP_OVERHEAD;
+	packet.header_length = IEEE1394_GASP_HDR_SIZE;
 	offset = 0;
-	for ( u = 0; u < num_packets; u++ ) {
-		retval = fw_iso_context_queue ( context, &packet,
- &priv->broadcast_rcv_buffer, offset );
-		if ( retval < 0 )
+
+	for (u = 0; u < num_packets; u++) {
+		retval = fw_iso_context_queue(context, &packet,
+				&dev->broadcast_rcv_buffer, offset);
+		if (retval < 0)
 			goto failed_rcv_queue;
+
 		offset += max_receive;
 	}
-	priv->num_broadcast_rcv_ptrs = num_packets;
-	priv->rcv_buffer_size = max_receive;
-	priv->broadcast_rcv_next_ptr = 0U;
-	retval = fw_iso_context_start ( context, -1, 0, FW_ISO_CONTEXT_MATCH_ALL_TAGS ); /* ??? sync */
-	if ( retval < 0 )
+	dev->num_broadcast_rcv_ptrs = num_packets;
+	dev->rcv_buffer_size = max_receive;
+	dev->broadcast_rcv_next_ptr = 0U;
+	retval = fw_iso_context_start(context, -1, 0,
+			FW_ISO_CONTEXT_MATCH_ALL_TAGS); /* ??? sync */
+	if (retval < 0)
 		goto failed_rcv_queue;
-	/* FIXME: adjust this when we know the max receive speeds of all other IP nodes on the bus. */
-	/* since we only xmt at S100 ??? */
-	priv->broadcast_xmt_max_payload = S100_BUFFER_SIZE - IPV4_GASP_OVERHEAD - IPV4_UNFRAG_HDR_SIZE;
-	priv->broadcast_state = IPV4_BROADCAST_RUNNING;
+
+	/* FIXME: adjust it according to the min. speed of all known peers? */
+	dev->broadcast_xmt_max_payload = IEEE1394_MAX_PAYLOAD_S100
+			- IEEE1394_GASP_HDR_SIZE - RFC2374_UNFRAG_HDR_SIZE;
+	dev->broadcast_state = FWNET_BROADCAST_RUNNING;
+
 	return 0;
 
  failed_rcv_queue:
-	kfree ( priv->broadcast_rcv_buffer_ptrs );
-	priv->broadcast_rcv_buffer_ptrs = NULL;
+	kfree(dev->broadcast_rcv_buffer_ptrs);
+	dev->broadcast_rcv_buffer_ptrs = NULL;
  failed_ptrs_alloc:
-	fw_iso_buffer_destroy ( &priv->broadcast_rcv_buffer, priv->card );
+	fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer, dev->card);
  failed_buffer_init:
-	fw_iso_context_destroy ( context );
-	priv->broadcast_rcv_context = NULL;
+	fw_iso_context_destroy(context);
+	dev->broadcast_rcv_context = NULL;
  failed_context_create:
-	fw_core_remove_address_handler ( &priv->handler );
+	fw_core_remove_address_handler(&dev->handler);
  failed_initial:
-	priv->local_fifo = INVALID_FIFO_ADDR;
+	dev->local_fifo = FWNET_NO_FIFO_ADDR;
+
 	return retval;
 }
 
-/* This is called after an "ifup" */
-static int ipv4_open(struct net_device *dev) {
-	struct ipv4_priv *priv;
+/* ifup */
+static int fwnet_open(struct net_device *net)
+{
+	struct fwnet_device *dev = netdev_priv(net);
 	int ret;
 
-	priv = netdev_priv(dev);
-	if (priv->broadcast_state == IPV4_BROADCAST_ERROR) {
-		ret = ipv4_broadcast_start ( priv );
+	if (dev->broadcast_state == FWNET_BROADCAST_ERROR) {
+		ret = fwnet_broadcast_start(dev);
 		if (ret)
 			return ret;
 	}
-	netif_start_queue(dev);
+	netif_start_queue(net);
+
 	return 0;
 }
 
-/* This is called after an "ifdown" */
-static int ipv4_stop(struct net_device *netdev)
+/* ifdown */
+static int fwnet_stop(struct net_device *net)
 {
-	/* flush priv->wake */
-	/* flush_scheduled_work(); */
+	netif_stop_queue(net);
+
+	/* Deallocate iso context for use by other applications? */
 
-	netif_stop_queue(netdev);
 	return 0;
 }
 
-/* Transmit a packet (called by kernel) */
-static int ipv4_tx(struct sk_buff *skb, struct net_device *netdev)
+static int fwnet_tx(struct sk_buff *skb, struct net_device *net)
 {
-	struct ipv4_ether_hdr hdr_buf;
-	struct ipv4_priv *priv = netdev_priv(netdev);
+	struct fwnet_header hdr_buf;
+	struct fwnet_device *dev = netdev_priv(net);
 	__be16 proto;
 	u16 dest_node;
-	enum ipv4_tx_type tx_type;
 	unsigned max_payload;
 	u16 dg_size;
 	u16 *datagram_label_ptr;
-	struct ipv4_packet_task *ptask;
-	struct ipv4_node *node = NULL;
+	struct fwnet_packet_task *ptask;
+	struct fwnet_peer *peer = NULL;
 
-	ptask = kmem_cache_alloc(ipv4_packet_task_cache, GFP_ATOMIC);
+	ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC);
 	if (ptask == NULL)
 		goto fail;
 
@@ -1412,7 +1305,7 @@ static int ipv4_tx(struct sk_buff *skb, struct net_device *netdev)
 		goto fail;
 
 	/*
-	 * Get rid of the fake ipv4 header, but first make a copy.
+	 * Make a copy of the driver-specific header.
 	 * We might need to rebuild the header on tx failure.
 	 */
 	memcpy(&hdr_buf, skb->data, sizeof(hdr_buf));
@@ -1425,110 +1318,95 @@ static int ipv4_tx(struct sk_buff *skb, struct net_device *netdev)
 	 * Set the transmission type for the packet.  ARP packets and IP
 	 * broadcast packets are sent via GASP.
 	 */
-	if (   memcmp(hdr_buf.h_dest, netdev->broadcast, IPV4_ALEN) == 0
+	if (memcmp(hdr_buf.h_dest, net->broadcast, FWNET_ALEN) == 0
 	    || proto == htons(ETH_P_ARP)
-	    || (   proto == htons(ETH_P_IP)
-		&& IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)) ) ) {
-		/* fw_debug ( "transmitting arp or multicast packet\n" );*/
-		tx_type = IPV4_GASP;
-		dest_node = ALL_NODES;
-		max_payload = priv->broadcast_xmt_max_payload;
-		/* BUG_ON(max_payload < S100_BUFFER_SIZE - IPV4_GASP_OVERHEAD); */
-		datagram_label_ptr = &priv->broadcast_xmt_datagramlabel;
-		ptask->fifo_addr = INVALID_FIFO_ADDR;
-		ptask->generation = 0U;
-		ptask->dest_node = 0U;
-		ptask->speed = 0;
+	    || (proto == htons(ETH_P_IP)
+		&& IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) {
+		max_payload = dev->broadcast_xmt_max_payload;
+		datagram_label_ptr = &dev->broadcast_xmt_datagramlabel;
+
+		ptask->fifo_addr = FWNET_NO_FIFO_ADDR;
+		ptask->generation = 0;
+		ptask->dest_node = IEEE1394_ALL_NODES;
+		ptask->speed = SCODE_100;
 	} else {
-		__be64 guid = get_unaligned((u64 *)hdr_buf.h_dest);
+		__be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest);
 		u8 generation;
 
-		node = ipv4_node_find_by_guid(priv, be64_to_cpu(guid));
-		if (!node) {
-			fw_debug ( "Normal packet but no node\n" );
+		peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid));
+		if (!peer)
 			goto fail;
-		}
 
-		if (node->fifo == INVALID_FIFO_ADDR) {
-			fw_debug ( "Normal packet but no fifo addr\n" );
+		if (peer->fifo == FWNET_NO_FIFO_ADDR)
 			goto fail;
-		}
 
-		/* fw_debug ( "Transmitting normal packet to %x at %llxx\n", node->nodeid, node->fifo ); */
-		generation = node->generation;
-		dest_node = node->nodeid;
-		max_payload = node->max_payload;
-		/* BUG_ON(max_payload < S100_BUFFER_SIZE - IPV4_FRAG_HDR_SIZE); */
+		generation = peer->generation;
+		smp_rmb();
+		dest_node = peer->node_id;
+
+		max_payload = peer->max_payload;
+		datagram_label_ptr = &peer->datagram_label;
 
-		datagram_label_ptr = &node->datagram_label;
-		tx_type = IPV4_WRREQ;
-		ptask->fifo_addr = node->fifo;
+		ptask->fifo_addr = peer->fifo;
 		ptask->generation = generation;
 		ptask->dest_node = dest_node;
-		ptask->speed = node->xmt_speed;
+		ptask->speed = peer->xmt_speed;
 	}
 
 	/* If this is an ARP packet, convert it */
 	if (proto == htons(ETH_P_ARP)) {
-		/* Convert a standard ARP packet to 1394 ARP. The first 8 bytes (the entire
-		 * arphdr) is the same format as the ip1394 header, so they overlap.  The rest
-		 * needs to be munged a bit.  The remainder of the arphdr is formatted based
-		 * on hwaddr len and ipaddr len.  We know what they'll be, so it's easy to
-		 * judge.
-		 *
-		 * Now that the EUI is used for the hardware address all we need to do to make
-		 * this work for 1394 is to insert 2 quadlets that contain max_rec size,
-		 * speed, and unicast FIFO address information between the sender_unique_id
-		 * and the IP addresses.
-		 */
 		struct arphdr *arp = (struct arphdr *)skb->data;
 		unsigned char *arp_ptr = (unsigned char *)(arp + 1);
-		struct ipv4_arp *arp1394 = (struct ipv4_arp *)skb->data;
-		u32 ipaddr;
-
-		ipaddr = *(u32*)(arp_ptr + IPV4_ALEN);
-		arp1394->hw_addr_len    = 16;
-		arp1394->max_rec        = priv->card->max_receive;
-		arp1394->sspd		= priv->card->link_speed;
-		arp1394->fifo_hi	= htons(priv->local_fifo >> 32);
-		arp1394->fifo_lo        = htonl(priv->local_fifo & 0xFFFFFFFF);
-		arp1394->sip		= ipaddr;
+		struct rfc2734_arp *arp1394 = (struct rfc2734_arp *)skb->data;
+		__be32 ipaddr;
+
+		ipaddr = get_unaligned((__be32 *)(arp_ptr + FWNET_ALEN));
+
+		arp1394->hw_addr_len    = RFC2734_HW_ADDR_LEN;
+		arp1394->max_rec        = dev->card->max_receive;
+		arp1394->sspd		= dev->card->link_speed;
+
+		put_unaligned_be16(dev->local_fifo >> 32,
+				   &arp1394->fifo_hi);
+		put_unaligned_be32(dev->local_fifo & 0xffffffff,
+				   &arp1394->fifo_lo);
+		put_unaligned(ipaddr, &arp1394->sip);
 	}
-	if ( ipv4_max_xmt && max_payload > ipv4_max_xmt )
-		max_payload = ipv4_max_xmt;
 
 	ptask->hdr.w0 = 0;
 	ptask->hdr.w1 = 0;
 	ptask->skb = skb;
-	ptask->priv = priv;
-        ptask->tx_type = tx_type;
+	ptask->dev = dev;
+
 	/* Does it all fit in one packet? */
-	if ( dg_size <= max_payload ) {
-		ipv4_make_uf_hdr(&ptask->hdr, be16_to_cpu(proto));
+	if (dg_size <= max_payload) {
+		fwnet_make_uf_hdr(&ptask->hdr, ntohs(proto));
 		ptask->outstanding_pkts = 1;
-		max_payload = dg_size + IPV4_UNFRAG_HDR_SIZE;
+		max_payload = dg_size + RFC2374_UNFRAG_HDR_SIZE;
 	} else {
 		u16 datagram_label;
 
-		max_payload -= IPV4_FRAG_OVERHEAD;
+		max_payload -= RFC2374_FRAG_OVERHEAD;
 		datagram_label = (*datagram_label_ptr)++;
-		ipv4_make_ff_hdr(&ptask->hdr, be16_to_cpu(proto), dg_size, datagram_label );
+		fwnet_make_ff_hdr(&ptask->hdr, ntohs(proto), dg_size,
+				  datagram_label);
 		ptask->outstanding_pkts = DIV_ROUND_UP(dg_size, max_payload);
-		max_payload += IPV4_FRAG_HDR_SIZE;
+		max_payload += RFC2374_FRAG_HDR_SIZE;
 	}
 	ptask->max_payload = max_payload;
-	ipv4_send_packet ( ptask );
+	fwnet_send_packet(ptask);
+
 	return NETDEV_TX_OK;
 
  fail:
 	if (ptask)
-		kmem_cache_free(ipv4_packet_task_cache, ptask);
+		kmem_cache_free(fwnet_packet_task_cache, ptask);
 
 	if (skb != NULL)
 		dev_kfree_skb(skb);
 
-	netdev->stats.tx_dropped++;
-	netdev->stats.tx_errors++;
+	net->stats.tx_dropped++;
+	net->stats.tx_errors++;
 
 	/*
 	 * FIXME: According to a patch from 2003-02-26, "returning non-zero
@@ -1540,280 +1418,291 @@ static int ipv4_tx(struct sk_buff *skb, struct net_device *netdev)
 	return NETDEV_TX_OK;
 }
 
-/*
- * FIXME: What to do if we timeout? I think a host reset is probably in order,
- * so that's what we do. Should we increment the stat counters too?
- */
-static void ipv4_tx_timeout(struct net_device *dev) {
-	struct ipv4_priv *priv;
+static void fwnet_tx_timeout(struct net_device *net)
+{
+	fw_error("%s: timeout\n", net->name);
 
-	priv = netdev_priv(dev);
-	fw_error ( "%s: Timeout, resetting host\n", dev->name );
-#if 0 /* stefanr */
-	fw_core_initiate_bus_reset ( priv->card, 1 );
-#endif
+	/* FIXME: What to do if we timeout? */
 }
 
-static int ipv4_change_mtu ( struct net_device *dev, int new_mtu ) {
-#if 0
-	int max_mtu;
-	struct ipv4_priv *priv;
-#endif
-
+static int fwnet_change_mtu(struct net_device *net, int new_mtu)
+{
 	if (new_mtu < 68)
 		return -EINVAL;
 
-#if 0
-	priv = netdev_priv(dev);
-	/* This is not actually true because we can fragment packets at the firewire layer */
-	max_mtu = (1 << (priv->card->max_receive + 1))
-		                - sizeof(struct ipv4_hdr) - IPV4_GASP_OVERHEAD;
-	if (new_mtu > max_mtu) {
-		fw_notify ( "%s: Local node constrains MTU to %d\n", dev->name, max_mtu);
-		return -ERANGE;
-	}
-#endif
-	dev->mtu = new_mtu;
+	net->mtu = new_mtu;
 	return 0;
 }
 
-static void ipv4_get_drvinfo(struct net_device *dev,
-struct ethtool_drvinfo *info) {
-	strcpy(info->driver, ipv4_driver_name);
-	strcpy(info->bus_info, "ieee1394"); /* FIXME provide more detail? */
+static void fwnet_get_drvinfo(struct net_device *net,
+			      struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, KBUILD_MODNAME);
+	strcpy(info->bus_info, "ieee1394");
 }
 
-static struct ethtool_ops ipv4_ethtool_ops = {
-	.get_drvinfo = ipv4_get_drvinfo,
+static struct ethtool_ops fwnet_ethtool_ops = {
+	.get_drvinfo = fwnet_get_drvinfo,
 };
 
-static const struct net_device_ops ipv4_netdev_ops = {
-	.ndo_open       = ipv4_open,
-	.ndo_stop	= ipv4_stop,
-	.ndo_start_xmit = ipv4_tx,
-	.ndo_tx_timeout = ipv4_tx_timeout,
-	.ndo_change_mtu = ipv4_change_mtu,
+static const struct net_device_ops fwnet_netdev_ops = {
+	.ndo_open       = fwnet_open,
+	.ndo_stop	= fwnet_stop,
+	.ndo_start_xmit = fwnet_tx,
+	.ndo_tx_timeout = fwnet_tx_timeout,
+	.ndo_change_mtu = fwnet_change_mtu,
 };
 
-static void ipv4_init_dev ( struct net_device *dev ) {
-	dev->header_ops		= &ipv4_header_ops;
-	dev->netdev_ops         = &ipv4_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ipv4_ethtool_ops);
-
-	dev->watchdog_timeo	= IPV4_TIMEOUT;
-	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
-	dev->features		= NETIF_F_HIGHDMA;
-	dev->addr_len		= IPV4_ALEN;
-	dev->hard_header_len	= IPV4_HLEN;
-	dev->type		= ARPHRD_IEEE1394;
-
-	/* FIXME: This value was copied from ether_setup(). Is it too much? */
-	dev->tx_queue_len	= 1000;
+static void fwnet_init_dev(struct net_device *net)
+{
+	net->header_ops		= &fwnet_header_ops;
+	net->netdev_ops		= &fwnet_netdev_ops;
+	net->watchdog_timeo	= 100000; /* ? FIXME */
+	net->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	net->features		= NETIF_F_HIGHDMA;
+	net->addr_len		= FWNET_ALEN;
+	net->hard_header_len	= FWNET_HLEN;
+	net->type		= ARPHRD_IEEE1394;
+	net->tx_queue_len	= 1000; /* ? FIXME */
+	SET_ETHTOOL_OPS(net, &fwnet_ethtool_ops);
 }
 
-static int ipv4_probe ( struct device *dev ) {
-	struct fw_unit * unit;
-	struct fw_device *device;
-	struct fw_card *card;
-	struct net_device *netdev;
-	struct ipv4_priv *priv;
+/* FIXME create netdev upon first fw_unit of a card, not upon local fw_unit */
+static int fwnet_probe(struct device *_dev)
+{
+	struct fw_unit *unit = fw_unit(_dev);
+	struct fw_device *device = fw_parent_device(unit);
+	struct fw_card *card = device->card;
+	struct net_device *net;
+	struct fwnet_device *dev;
 	unsigned max_mtu;
-	__be64 guid;
-
-	fw_debug("ipv4 Probing\n" );
-	unit = fw_unit ( dev );
-	device = fw_device ( unit->device.parent );
-	card = device->card;
 
-	if ( ! device->is_local ) {
+	if (!device->is_local) {
 		int added;
 
-		fw_debug ( "Non-local, adding remote node entry\n" );
-		added = ipv4_node_new ( card, device );
+		added = fwnet_peer_new(card, device);
 		return added;
 	}
-	fw_debug("ipv4 Local: adding netdev\n" );
-	netdev = alloc_netdev ( sizeof(*priv), "firewire%d", ipv4_init_dev );
-	if ( netdev == NULL) {
-		fw_error( "Out of memory\n");
+	net = alloc_netdev(sizeof(*dev), "firewire%d", fwnet_init_dev);
+	if (net == NULL) {
+		fw_error("out of memory\n");
 		goto out;
 	}
 
-	SET_NETDEV_DEV(netdev, card->device);
-	priv = netdev_priv(netdev);
+	SET_NETDEV_DEV(net, card->device);
+	dev = netdev_priv(net);
 
-	spin_lock_init(&priv->lock);
-	priv->broadcast_state = IPV4_BROADCAST_ERROR;
-	priv->broadcast_rcv_context = NULL;
-	priv->broadcast_xmt_max_payload = 0;
-	priv->broadcast_xmt_datagramlabel = 0;
+	spin_lock_init(&dev->lock);
+	dev->broadcast_state = FWNET_BROADCAST_ERROR;
+	dev->broadcast_rcv_context = NULL;
+	dev->broadcast_xmt_max_payload = 0;
+	dev->broadcast_xmt_datagramlabel = 0;
 
-	priv->local_fifo = INVALID_FIFO_ADDR;
+	dev->local_fifo = FWNET_NO_FIFO_ADDR;
 
-	/* INIT_WORK(&priv->wake, ipv4_handle_queue);*/
-	INIT_LIST_HEAD(&priv->packet_list);
-	INIT_LIST_HEAD(&priv->broadcasted_list);
-	INIT_LIST_HEAD(&priv->sent_list );
+	/* INIT_WORK(&dev->wake, fwnet_handle_queue);*/
+	INIT_LIST_HEAD(&dev->packet_list);
+	INIT_LIST_HEAD(&dev->broadcasted_list);
+	INIT_LIST_HEAD(&dev->sent_list);
 
-	priv->card = card;
+	dev->card = card;
 
 	/*
 	 * Use the RFC 2734 default 1500 octets or the maximum payload
 	 * as initial MTU
 	 */
 	max_mtu = (1 << (card->max_receive + 1))
-		  - sizeof(struct ipv4_hdr) - IPV4_GASP_OVERHEAD;
-	netdev->mtu = min(1500U, max_mtu);
+		  - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE;
+	net->mtu = min(1500U, max_mtu);
 
 	/* Set our hardware address while we're at it */
-	guid = cpu_to_be64(card->guid);
-	memcpy(netdev->dev_addr, &guid, sizeof(u64));
-	memset(netdev->broadcast, 0xff, sizeof(u64));
-	if ( register_netdev ( netdev ) ) {
-		fw_error ( "Cannot register the driver\n");
+	put_unaligned_be64(card->guid, net->dev_addr);
+	put_unaligned_be64(~0ULL, net->broadcast);
+	if (register_netdev(net)) {
+		fw_error("Cannot register the driver\n");
 		goto out;
 	}
 
-	fw_notify ( "%s: IPv4 over Firewire on device %016llx\n",
- netdev->name, card->guid );
-	card->netdev = netdev;
+	fw_notify("%s: IPv4 over FireWire on device %016llx\n",
+		  net->name, (unsigned long long)card->guid);
+	card->netdev = net;
 
-	return 0 /* ipv4_new_node ( ud ) */;
+	return 0;
  out:
-	if ( netdev )
-		free_netdev ( netdev );
+	if (net)
+		free_netdev(net);
+
 	return -ENOENT;
 }
 
+static int fwnet_remove(struct device *_dev)
+{
+	struct fw_unit *unit = fw_unit(_dev);
+	struct fw_device *device = fw_parent_device(unit);
+	struct fw_card *card = device->card;
+	struct net_device *net;
+	struct fwnet_device *dev;
+	struct fwnet_peer *peer;
+	struct fwnet_partial_datagram *pd, *pd_next;
+	struct fwnet_packet_task *ptask, *pt_next;
+
+	if (!device->is_local) {
+		fwnet_peer_delete(card, device);
 
-static int ipv4_remove ( struct device *dev ) {
-	struct fw_unit * unit;
-	struct fw_device *device;
-	struct fw_card *card;
-	struct net_device *netdev;
-	struct ipv4_priv *priv;
-	struct ipv4_node *node;
-	struct ipv4_partial_datagram *pd, *pd_next;
-	struct ipv4_packet_task *ptask, *pt_next;
-
-	fw_debug("ipv4 Removing\n" );
-	unit = fw_unit ( dev );
-	device = fw_device ( unit->device.parent );
-	card = device->card;
-
-	if ( ! device->is_local ) {
-		fw_debug ( "Node %x is non-local, removing remote node entry\n", device->node_id );
-		ipv4_node_delete ( card, device );
 		return 0;
 	}
-	netdev = card->netdev;
-	if ( netdev ) {
-		fw_debug ( "Node %x is local: deleting netdev\n", device->node_id );
-		priv = netdev_priv ( netdev );
-		unregister_netdev ( netdev );
-		fw_debug ( "unregistered\n" );
-		if ( priv->local_fifo != INVALID_FIFO_ADDR )
-			fw_core_remove_address_handler ( &priv->handler );
-		fw_debug ( "address handler gone\n" );
-		if ( priv->broadcast_rcv_context ) {
-			fw_iso_context_stop ( priv->broadcast_rcv_context );
-			fw_iso_buffer_destroy ( &priv->broadcast_rcv_buffer, priv->card );
-			fw_iso_context_destroy ( priv->broadcast_rcv_context );
-			fw_debug ( "rcv stopped\n" );
+
+	net = card->netdev;
+	if (net) {
+		dev = netdev_priv(net);
+		unregister_netdev(net);
+
+		if (dev->local_fifo != FWNET_NO_FIFO_ADDR)
+			fw_core_remove_address_handler(&dev->handler);
+		if (dev->broadcast_rcv_context) {
+			fw_iso_context_stop(dev->broadcast_rcv_context);
+			fw_iso_buffer_destroy(&dev->broadcast_rcv_buffer,
+					      dev->card);
+			fw_iso_context_destroy(dev->broadcast_rcv_context);
 		}
-		list_for_each_entry_safe( ptask, pt_next, &priv->packet_list, packet_list ) {
-			dev_kfree_skb_any ( ptask->skb );
-			kmem_cache_free( ipv4_packet_task_cache, ptask );
+		list_for_each_entry_safe(ptask, pt_next,
+					 &dev->packet_list, pt_link) {
+			dev_kfree_skb_any(ptask->skb);
+			kmem_cache_free(fwnet_packet_task_cache, ptask);
 		}
-		list_for_each_entry_safe( ptask, pt_next, &priv->broadcasted_list, packet_list ) {
-			dev_kfree_skb_any ( ptask->skb );
-			kmem_cache_free( ipv4_packet_task_cache, ptask );
+		list_for_each_entry_safe(ptask, pt_next,
+					 &dev->broadcasted_list, pt_link) {
+			dev_kfree_skb_any(ptask->skb);
+			kmem_cache_free(fwnet_packet_task_cache, ptask);
 		}
-		list_for_each_entry_safe( ptask, pt_next, &priv->sent_list, packet_list ) {
-			dev_kfree_skb_any ( ptask->skb );
-			kmem_cache_free( ipv4_packet_task_cache, ptask );
+		list_for_each_entry_safe(ptask, pt_next,
+					 &dev->sent_list, pt_link) {
+			dev_kfree_skb_any(ptask->skb);
+			kmem_cache_free(fwnet_packet_task_cache, ptask);
 		}
-		fw_debug ( "lists emptied\n" );
-		list_for_each_entry( node, &card->ipv4_nodes, ipv4_nodes ) {
-			if ( node->pdg_size ) {
-				list_for_each_entry_safe( pd, pd_next, &node->pdg_list, pdg_list )
-					ipv4_pd_delete ( pd );
-				node->pdg_size = 0;
+		list_for_each_entry(peer, &card->peer_list, peer_link) {
+			if (peer->pdg_size) {
+				list_for_each_entry_safe(pd, pd_next,
+						&peer->pd_list, pd_link)
+					fwnet_pd_delete(pd);
+				peer->pdg_size = 0;
 			}
-			node->fifo = INVALID_FIFO_ADDR;
+			peer->fifo = FWNET_NO_FIFO_ADDR;
 		}
-		fw_debug ( "nodes cleaned up\n" );
-		free_netdev ( netdev );
+		free_netdev(net);
 		card->netdev = NULL;
-		fw_debug ( "done\n" );
 	}
+
 	return 0;
 }
 
-static void ipv4_update ( struct fw_unit *unit ) {
-	struct fw_device *device;
-	struct fw_card *card;
+/*
+ * FIXME abort partially sent fragmented datagrams,
+ * discard partially received fragmented datagrams
+ */
+static void fwnet_update(struct fw_unit *unit)
+{
+	struct fw_device *device = fw_parent_device(unit);
+	struct net_device *net = device->card->netdev;
+	struct fwnet_device *dev;
+	struct fwnet_peer *peer;
+	u64 guid;
 
-	fw_debug ( "ipv4_update unit %p\n", unit );
-	device = fw_device ( unit->device.parent );
-	card = device->card;
-	if ( ! device->is_local ) {
-		struct ipv4_node *node;
-		u64 guid;
-		struct net_device *netdev;
-		struct ipv4_priv *priv;
-
-		netdev = card->netdev;
-		if ( netdev ) {
-			priv = netdev_priv ( netdev );
-			guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
-			node = ipv4_node_find_by_guid ( priv, guid );
-			if ( ! node ) {
-				fw_error ( "ipv4_update: no node for device %llx\n", guid );
-				return;
-			}
-			fw_debug ( "Non-local, updating remote node entry for guid %llx old generation %x, old nodeid %x\n", guid, node->generation, node->nodeid );
-			node->generation = device->generation;
-			rmb();
-			node->nodeid = device->node_id;
-			fw_debug ( "New generation %x, new nodeid %x\n", node->generation, node->nodeid );
-		} else
-			fw_error ( "nonlocal, but no netdev?  How can that be?\n" );
-	} else {
-		/* FIXME: What do we need to do on bus reset? */
-		fw_debug ( "Local, doing nothing\n" );
+	if (net && !device->is_local) {
+		dev = netdev_priv(net);
+		guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
+		peer = fwnet_peer_find_by_guid(dev, guid);
+		if (!peer) {
+			fw_error("fwnet_update: no peer for device %016llx\n",
+				 (unsigned long long)guid);
+			return;
+		}
+		peer->generation = device->generation;
+		rmb();
+		peer->node_id = device->node_id;
 	}
 }
 
-static struct fw_driver ipv4_driver = {
+static const struct ieee1394_device_id fwnet_id_table[] = {
+	{
+		.match_flags  = IEEE1394_MATCH_SPECIFIER_ID |
+				IEEE1394_MATCH_VERSION,
+		.specifier_id = IANA_SPECIFIER_ID,
+		.version      = RFC2734_SW_VERSION,
+	},
+	{ }
+};
+
+static struct fw_driver fwnet_driver = {
 	.driver = {
-		.owner = THIS_MODULE,
-		.name = ipv4_driver_name,
-		.bus = &fw_bus_type,
-		.probe = ipv4_probe,
-		.remove = ipv4_remove,
+		.owner  = THIS_MODULE,
+		.name   = "net",
+		.bus    = &fw_bus_type,
+		.probe  = fwnet_probe,
+		.remove = fwnet_remove,
 	},
-	.update = ipv4_update,
-	.id_table = ipv4_id_table,
+	.update   = fwnet_update,
+	.id_table = fwnet_id_table,
+};
+
+static const u32 rfc2374_unit_directory_data[] = {
+	0x00040000,	/* directory_length		*/
+	0x1200005e,	/* unit_specifier_id: IANA	*/
+	0x81000003,	/* textual descriptor offset	*/
+	0x13000001,	/* unit_sw_version: RFC 2734	*/
+	0x81000005,	/* textual descriptor offset	*/
+	0x00030000,	/* descriptor_length		*/
+	0x00000000,	/* text				*/
+	0x00000000,	/* minimal ASCII, en		*/
+	0x49414e41,	/* I A N A			*/
+	0x00030000,	/* descriptor_length		*/
+	0x00000000,	/* text				*/
+	0x00000000,	/* minimal ASCII, en		*/
+	0x49507634,	/* I P v 4			*/
+};
+
+static struct fw_descriptor rfc2374_unit_directory = {
+	.length = ARRAY_SIZE(rfc2374_unit_directory_data),
+	.key    = (CSR_DIRECTORY | CSR_UNIT) << 24,
+	.data   = rfc2374_unit_directory_data
 };
 
-static int __init ipv4_init ( void ) {
-	int added;
+static int __init fwnet_init(void)
+{
+	int err;
+
+	err = fw_core_add_descriptor(&rfc2374_unit_directory);
+	if (err)
+		return err;
 
-	added = fw_core_add_descriptor ( &ipv4_unit_directory );
-	if ( added < 0 )
-		fw_error ( "Failed to add descriptor" );
-	ipv4_packet_task_cache = kmem_cache_create("packet_task",
- sizeof(struct ipv4_packet_task), 0, 0, NULL);
-	fw_debug("Adding ipv4 module\n" );
-	return driver_register ( &ipv4_driver.driver );
+	fwnet_packet_task_cache = kmem_cache_create("packet_task",
+			sizeof(struct fwnet_packet_task), 0, 0, NULL);
+	if (!fwnet_packet_task_cache) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = driver_register(&fwnet_driver.driver);
+	if (!err)
+		return 0;
+
+	kmem_cache_destroy(fwnet_packet_task_cache);
+out:
+	fw_core_remove_descriptor(&rfc2374_unit_directory);
+
+	return err;
 }
+module_init(fwnet_init);
 
-static void __exit ipv4_cleanup ( void ) {
-	fw_core_remove_descriptor ( &ipv4_unit_directory );
-	fw_debug("Removing ipv4 module\n" );
-	driver_unregister ( &ipv4_driver.driver );
+static void __exit fwnet_cleanup(void)
+{
+	driver_unregister(&fwnet_driver.driver);
+	kmem_cache_destroy(fwnet_packet_task_cache);
+	fw_core_remove_descriptor(&rfc2374_unit_directory);
 }
+module_exit(fwnet_cleanup);
 
-module_init(ipv4_init);
-module_exit(ipv4_cleanup);
+MODULE_AUTHOR("Jay Fenlason <fenlason@redhat.com>");
+MODULE_DESCRIPTION("IPv4 over IEEE1394 as per RFC 2734");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(ieee1394, fwnet_id_table);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index d44f47d3b2d..5cb0c1549ff 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -131,13 +131,10 @@ struct fw_card {
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
 	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
-	/* Only non-NULL if firewire-ipv4 is active on this card. */
+
+	/* firewire-net driver data */
 	void *netdev;
-	/*
-	 * The nodes get probed before the card, so we need a place to store
-	 * them independent of card->netdev
-	 */
-	struct list_head ipv4_nodes;
+	struct list_head peer_list;
 };
 
 static inline struct fw_card *fw_card_get(struct fw_card *card)
-- 
cgit v1.2.3-70-g09d2


From 5a124d382ea5c97be43c779e4f481455e0287654 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 14 Jun 2009 11:45:27 +0200
Subject: firewire: net: allow for unordered unit discovery

Decouple the creation and destruction of the net_device from the order
of discovery and removal of nodes with RFC 2734 unit directories since
there is no reliable order.  The net_device is now created when the
first RFC 2734 unit on a card is discovered, and destroyed when the last
RFC 2734 unit on a card went away.  This includes all remote units as
well as the local unit, which is therefore tracked as a peer now too.

Also, locking around the list of peers is slightly extended to guard
against peer removal.  As a side effect, fwnet_peer.pdg_lock has become
superfluous and is deleted.

Peer data (max_rec, speed, node ID, generation) are updated more
carefully.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c |   2 -
 drivers/firewire/net.c       | 454 ++++++++++++++++++++-----------------------
 include/linux/firewire.h     |   4 -
 3 files changed, 207 insertions(+), 253 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 8c45e43da7c..667603ac14b 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -429,8 +429,6 @@ void fw_card_initialize(struct fw_card *card,
 	card->local_node = NULL;
 
 	INIT_DELAYED_WORK(&card->work, fw_card_bm_work);
-	card->netdev = NULL;
-	INIT_LIST_HEAD(&card->peer_list);
 }
 EXPORT_SYMBOL(fw_card_initialize);
 
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index ba6f924b1b1..d83c54587a6 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -18,8 +18,10 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/spinlock.h>
 
 #include <asm/unaligned.h>
 #include <net/arp.h>
@@ -135,40 +137,11 @@ struct fwnet_partial_datagram {
 	u16 datagram_size;
 };
 
-/*
- * We keep one of these for each IPv4 capable device attached to a fw_card.
- * The list of them is stored in the fw_card structure rather than in the
- * fwnet_device because the remote IPv4 nodes may be probed before the card is,
- * so we need a place to store them before the fwnet_device structure is
- * allocated.
- */
-struct fwnet_peer {
-	struct list_head peer_link;
-	/* guid of the remote peer */
-	u64 guid;
-	/* FIFO address to transmit datagrams to, or FWNET_NO_FIFO_ADDR */
-	u64 fifo;
-
-	spinlock_t pdg_lock;	/* partial datagram lock		*/
-	/* List of partial datagrams received from this peer */
-	struct list_head pd_list;
-	/* Number of entries in pd_list at the moment */
-	unsigned pdg_size;
-
-	/* max payload to transmit to this remote peer */
-	/* This already includes the RFC2374_FRAG_HDR_SIZE overhead */
-	u16 max_payload;
-	/* outgoing datagram label */
-	u16 datagram_label;
-	/* Current node_id of the remote peer */
-	u16 node_id;
-	/* current generation of the remote peer */
-	u8 generation;
-	/* max speed that this peer can receive at */
-	u8 xmt_speed;
-};
+static DEFINE_MUTEX(fwnet_device_mutex);
+static LIST_HEAD(fwnet_device_list);
 
 struct fwnet_device {
+	struct list_head dev_link;
 	spinlock_t lock;
 	enum {
 		FWNET_BROADCAST_ERROR,
@@ -206,7 +179,26 @@ struct fwnet_device {
 	/* List of packets that have been sent but not yet acked */
 	struct list_head sent_list;
 
+	struct list_head peer_list;
 	struct fw_card *card;
+	struct net_device *netdev;
+};
+
+struct fwnet_peer {
+	struct list_head peer_link;
+	struct fwnet_device *dev;
+	u64 guid;
+	u64 fifo;
+
+	/* guarded by dev->lock */
+	struct list_head pd_list; /* received partial datagrams */
+	unsigned pdg_size;        /* pd_list size */
+
+	u16 datagram_label;       /* outgoing datagram label */
+	unsigned max_payload;     /* includes RFC2374_FRAG_HDR_SIZE overhead */
+	int node_id;
+	int generation;
+	unsigned speed;
 };
 
 /* This is our task struct. It's used for the packet complete callback.  */
@@ -479,102 +471,47 @@ static bool fwnet_pd_is_complete(struct fwnet_partial_datagram *pd)
 	return fi->len == pd->datagram_size;
 }
 
-static int fwnet_peer_new(struct fw_card *card, struct fw_device *device)
-{
-	struct fwnet_peer *peer;
-
-	peer = kmalloc(sizeof(*peer), GFP_KERNEL);
-	if (!peer) {
-		fw_error("out of memory\n");
-
-		return -ENOMEM;
-	}
-	peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
-	peer->fifo = FWNET_NO_FIFO_ADDR;
-	INIT_LIST_HEAD(&peer->pd_list);
-	spin_lock_init(&peer->pdg_lock);
-	peer->pdg_size = 0;
-	peer->generation = device->generation;
-	rmb();
-	peer->node_id = device->node_id;
-	 /* FIXME what should it really be? */
-	peer->max_payload = IEEE1394_MAX_PAYLOAD_S100 - RFC2374_UNFRAG_HDR_SIZE;
-	peer->datagram_label = 0U;
-	peer->xmt_speed = device->max_speed;
-	list_add_tail(&peer->peer_link, &card->peer_list);
-
-	return 0;
-}
-
-/* FIXME caller must take the lock, or peer needs to be reference-counted */
+/* caller must hold dev->lock */
 static struct fwnet_peer *fwnet_peer_find_by_guid(struct fwnet_device *dev,
 						  u64 guid)
 {
-	struct fwnet_peer *p, *peer = NULL;
-	unsigned long flags;
+	struct fwnet_peer *peer;
 
-	spin_lock_irqsave(&dev->lock, flags);
-	list_for_each_entry(p, &dev->card->peer_list, peer_link)
-		if (p->guid == guid) {
-			peer = p;
-			break;
-		}
-	spin_unlock_irqrestore(&dev->lock, flags);
+	list_for_each_entry(peer, &dev->peer_list, peer_link)
+		if (peer->guid == guid)
+			return peer;
 
-	return peer;
+	return NULL;
 }
 
-/* FIXME caller must take the lock, or peer needs to be reference-counted */
-/* FIXME node_id doesn't mean anything without generation */
+/* caller must hold dev->lock */
 static struct fwnet_peer *fwnet_peer_find_by_node_id(struct fwnet_device *dev,
-						     u16 node_id)
+						int node_id, int generation)
 {
-	struct fwnet_peer *p, *peer = NULL;
-	unsigned long flags;
+	struct fwnet_peer *peer;
 
-	spin_lock_irqsave(&dev->lock, flags);
-	list_for_each_entry(p, &dev->card->peer_list, peer_link)
-		if (p->node_id == node_id) {
-			peer = p;
-			break;
-		}
-	spin_unlock_irqrestore(&dev->lock, flags);
+	list_for_each_entry(peer, &dev->peer_list, peer_link)
+		if (peer->node_id    == node_id &&
+		    peer->generation == generation)
+			return peer;
 
-	return peer;
+	return NULL;
 }
 
-/* FIXME */
-static void fwnet_peer_delete(struct fw_card *card, struct fw_device *device)
+/* See IEEE 1394-2008 table 6-4, table 8-8, table 16-18. */
+static unsigned fwnet_max_payload(unsigned max_rec, unsigned speed)
 {
-	struct net_device *net;
-	struct fwnet_device *dev;
-	struct fwnet_peer *peer;
-	u64 guid;
-	unsigned long flags;
-	struct fwnet_partial_datagram *pd, *pd_next;
-
-	guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
-	net = card->netdev;
-	if (net)
-		dev = netdev_priv(net);
-	else
-		dev = NULL;
-	if (dev)
-		spin_lock_irqsave(&dev->lock, flags);
-
-	list_for_each_entry(peer, &card->peer_list, peer_link) {
-		if (peer->guid == guid) {
-			list_del(&peer->peer_link);
-			list_for_each_entry_safe(pd, pd_next, &peer->pd_list,
-						 pd_link)
-				fwnet_pd_delete(pd);
-			break;
-		}
+	max_rec = min(max_rec, speed + 8);
+	max_rec = min(max_rec, 0xbU); /* <= 4096 */
+	if (max_rec < 8) {
+		fw_notify("max_rec %x out of range\n", max_rec);
+		max_rec = 8;
 	}
-	if (dev)
-		spin_unlock_irqrestore(&dev->lock, flags);
+
+	return (1 << (max_rec + 1)) - RFC2374_FRAG_HDR_SIZE;
 }
 
+
 static int fwnet_finish_incoming_packet(struct net_device *net,
 					struct sk_buff *skb, u16 source_node_id,
 					bool is_broadcast, u16 ether_type)
@@ -606,71 +543,44 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 		unsigned char *arp_ptr;
 		u64 fifo_addr;
 		u64 peer_guid;
-		u8 max_rec;
-		u8 sspd;
+		unsigned sspd;
 		u16 max_payload;
 		struct fwnet_peer *peer;
-		static const u16 fwnet_speed_to_max_payload[] = {
-			/* S100, S200, S400, S800, S1600, S3200 */
-			    512, 1024, 2048, 4096,  4096,  4096
-		};
+		unsigned long flags;
+
+		arp1394   = (struct rfc2734_arp *)skb->data;
+		arp       = (struct arphdr *)skb->data;
+		arp_ptr   = (unsigned char *)(arp + 1);
+		peer_guid = get_unaligned_be64(&arp1394->s_uniq_id);
+		fifo_addr = (u64)get_unaligned_be16(&arp1394->fifo_hi) << 32
+				| get_unaligned_be32(&arp1394->fifo_lo);
 
-		arp1394 = (struct rfc2734_arp *)skb->data;
-		arp = (struct arphdr *)skb->data;
-		arp_ptr = (unsigned char *)(arp + 1);
-		fifo_addr = (u64)ntohs(arp1394->fifo_hi) << 32
-				| ntohl(arp1394->fifo_lo);
-		max_rec = dev->card->max_receive;
-		if (arp1394->max_rec < max_rec)
-			max_rec = arp1394->max_rec;
 		sspd = arp1394->sspd;
 		/* Sanity check.  OS X 10.3 PPC reportedly sends 131. */
 		if (sspd > SCODE_3200) {
 			fw_notify("sspd %x out of range\n", sspd);
-			sspd = 0;
+			sspd = SCODE_3200;
 		}
+		max_payload = fwnet_max_payload(arp1394->max_rec, sspd);
 
-		max_payload = min(fwnet_speed_to_max_payload[sspd],
-			(u16)(1 << (max_rec + 1))) - RFC2374_UNFRAG_HDR_SIZE;
-
-		peer_guid = get_unaligned_be64(&arp1394->s_uniq_id);
+		spin_lock_irqsave(&dev->lock, flags);
 		peer = fwnet_peer_find_by_guid(dev, peer_guid);
+		if (peer) {
+			peer->fifo = fifo_addr;
+
+			if (peer->speed > sspd)
+				peer->speed = sspd;
+			if (peer->max_payload > max_payload)
+				peer->max_payload = max_payload;
+		}
+		spin_unlock_irqrestore(&dev->lock, flags);
+
 		if (!peer) {
 			fw_notify("No peer for ARP packet from %016llx\n",
 				  (unsigned long long)peer_guid);
 			goto failed_proto;
 		}
 
-		/* FIXME don't use card->generation */
-		if (peer->node_id != source_node_id ||
-		    peer->generation != dev->card->generation) {
-			fw_notify("Internal error: peer->node_id (%x) != "
-				  "source_node_id (%x) or peer->generation (%x)"
-				  " != dev->card->generation(%x)\n",
-				  peer->node_id, source_node_id,
-				  peer->generation, dev->card->generation);
-			peer->node_id = source_node_id;
-			peer->generation = dev->card->generation;
-		}
-
-		/* FIXME: for debugging */
-		if (sspd > SCODE_400)
-			sspd = SCODE_400;
-		/* Update our speed/payload/fifo_offset table */
-		/*
-		 * FIXME: this does not handle cases where two high-speed endpoints must use a slower speed because of
-		 * a lower speed hub between them.  We need to look at the actual topology map here.
-		 */
-		peer->fifo = fifo_addr;
-		peer->max_payload = max_payload;
-		/*
-		 * Only allow speeds to go down from their initial value.
-		 * Otherwise a local peer that can only do S400 or slower may
-		 * be told to transmit at S800 to a faster remote peer.
-		 */
-		if (peer->xmt_speed > sspd)
-			peer->xmt_speed = sspd;
-
 		/*
 		 * Now that we're done with the 1394 specific stuff, we'll
 		 * need to alter some of the data.  Believe it or not, all
@@ -764,10 +674,11 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 }
 
 static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
-				 u16 source_node_id, bool is_broadcast)
+				 int source_node_id, int generation,
+				 bool is_broadcast)
 {
 	struct sk_buff *skb;
-	struct net_device *net;
+	struct net_device *net = dev->netdev;
 	struct rfc2734_header hdr;
 	unsigned lf;
 	unsigned long flags;
@@ -779,8 +690,6 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 	int retval;
 	u16 ether_type;
 
-	net = dev->card->netdev;
-
 	hdr.w0 = be32_to_cpu(buf[0]);
 	lf = fwnet_get_hdr_lf(&hdr);
 	if (lf == RFC2374_HDR_UNFRAG) {
@@ -819,9 +728,12 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 	}
 	datagram_label = fwnet_get_hdr_dgl(&hdr);
 	dg_size = fwnet_get_hdr_dg_size(&hdr); /* ??? + 1 */
-	peer = fwnet_peer_find_by_node_id(dev, source_node_id);
 
-	spin_lock_irqsave(&peer->pdg_lock, flags);
+	spin_lock_irqsave(&dev->lock, flags);
+
+	peer = fwnet_peer_find_by_node_id(dev, source_node_id, generation);
+	if (!peer)
+		goto bad_proto;
 
 	pd = fwnet_pd_find(peer, datagram_label);
 	if (pd == NULL) {
@@ -876,7 +788,7 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 		skb = skb_get(pd->skb);
 		fwnet_pd_delete(pd);
 
-		spin_unlock_irqrestore(&peer->pdg_lock, flags);
+		spin_unlock_irqrestore(&dev->lock, flags);
 
 		return fwnet_finish_incoming_packet(net, skb, source_node_id,
 						    false, ether_type);
@@ -885,12 +797,12 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 	 * Datagram is not complete, we're done for the
 	 * moment.
 	 */
-	spin_unlock_irqrestore(&peer->pdg_lock, flags);
+	spin_unlock_irqrestore(&dev->lock, flags);
 
 	return 0;
 
  bad_proto:
-	spin_unlock_irqrestore(&peer->pdg_lock, flags);
+	spin_unlock_irqrestore(&dev->lock, flags);
 
 	if (netif_queue_stopped(net))
 		netif_wake_queue(net);
@@ -916,7 +828,8 @@ static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r,
 		return;
 	}
 
-	status = fwnet_incoming_packet(dev, payload, length, source, false);
+	status = fwnet_incoming_packet(dev, payload, length,
+				       source, generation, false);
 	if (status != 0) {
 		fw_error("Incoming packet failure\n");
 		fw_send_response(card, r, RCODE_CONFLICT_ERROR);
@@ -966,7 +879,7 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context,
 		buf_ptr += 2;
 		length -= IEEE1394_GASP_HDR_SIZE;
 		fwnet_incoming_packet(dev, buf_ptr, length,
-				      source_node_id, true);
+				      source_node_id, -1, true);
 	}
 
 	packet.payload_length = dev->rcv_buffer_size;
@@ -1073,7 +986,6 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	unsigned tx_len;
 	struct rfc2734_header *bufhdr;
 	unsigned long flags;
-	struct net_device *net;
 
 	dev = ptask->dev;
 	tx_len = ptask->max_payload;
@@ -1137,8 +1049,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	list_add_tail(&ptask->pt_link, &dev->sent_list);
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	net = dev->card->netdev;
-	net->trans_start = jiffies;
+	dev->netdev->trans_start = jiffies;
 
 	return 0;
 }
@@ -1294,7 +1205,8 @@ static int fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	u16 dg_size;
 	u16 *datagram_label_ptr;
 	struct fwnet_packet_task *ptask;
-	struct fwnet_peer *peer = NULL;
+	struct fwnet_peer *peer;
+	unsigned long flags;
 
 	ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC);
 	if (ptask == NULL)
@@ -1314,6 +1226,9 @@ static int fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	proto = hdr_buf.h_proto;
 	dg_size = skb->len;
 
+	/* serialize access to peer, including peer->datagram_label */
+	spin_lock_irqsave(&dev->lock, flags);
+
 	/*
 	 * Set the transmission type for the packet.  ARP packets and IP
 	 * broadcast packets are sent via GASP.
@@ -1322,35 +1237,30 @@ static int fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	    || proto == htons(ETH_P_ARP)
 	    || (proto == htons(ETH_P_IP)
 		&& IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) {
-		max_payload = dev->broadcast_xmt_max_payload;
+		max_payload        = dev->broadcast_xmt_max_payload;
 		datagram_label_ptr = &dev->broadcast_xmt_datagramlabel;
 
-		ptask->fifo_addr = FWNET_NO_FIFO_ADDR;
-		ptask->generation = 0;
-		ptask->dest_node = IEEE1394_ALL_NODES;
-		ptask->speed = SCODE_100;
+		ptask->fifo_addr   = FWNET_NO_FIFO_ADDR;
+		ptask->generation  = 0;
+		ptask->dest_node   = IEEE1394_ALL_NODES;
+		ptask->speed       = SCODE_100;
 	} else {
 		__be64 guid = get_unaligned((__be64 *)hdr_buf.h_dest);
 		u8 generation;
 
 		peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid));
-		if (!peer)
-			goto fail;
-
-		if (peer->fifo == FWNET_NO_FIFO_ADDR)
-			goto fail;
+		if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR)
+			goto fail_unlock;
 
-		generation = peer->generation;
-		smp_rmb();
-		dest_node = peer->node_id;
-
-		max_payload = peer->max_payload;
+		generation         = peer->generation;
+		dest_node          = peer->node_id;
+		max_payload        = peer->max_payload;
 		datagram_label_ptr = &peer->datagram_label;
 
-		ptask->fifo_addr = peer->fifo;
-		ptask->generation = generation;
-		ptask->dest_node = dest_node;
-		ptask->speed = peer->xmt_speed;
+		ptask->fifo_addr   = peer->fifo;
+		ptask->generation  = generation;
+		ptask->dest_node   = dest_node;
+		ptask->speed       = peer->speed;
 	}
 
 	/* If this is an ARP packet, convert it */
@@ -1393,11 +1303,16 @@ static int fwnet_tx(struct sk_buff *skb, struct net_device *net)
 		ptask->outstanding_pkts = DIV_ROUND_UP(dg_size, max_payload);
 		max_payload += RFC2374_FRAG_HDR_SIZE;
 	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
 	ptask->max_payload = max_payload;
 	fwnet_send_packet(ptask);
 
 	return NETDEV_TX_OK;
 
+ fail_unlock:
+	spin_unlock_irqrestore(&dev->lock, flags);
  fail:
 	if (ptask)
 		kmem_cache_free(fwnet_packet_task_cache, ptask);
@@ -1467,7 +1382,48 @@ static void fwnet_init_dev(struct net_device *net)
 	SET_ETHTOOL_OPS(net, &fwnet_ethtool_ops);
 }
 
-/* FIXME create netdev upon first fw_unit of a card, not upon local fw_unit */
+/* caller must hold fwnet_device_mutex */
+static struct fwnet_device *fwnet_dev_find(struct fw_card *card)
+{
+	struct fwnet_device *dev;
+
+	list_for_each_entry(dev, &fwnet_device_list, dev_link)
+		if (dev->card == card)
+			return dev;
+
+	return NULL;
+}
+
+static int fwnet_add_peer(struct fwnet_device *dev,
+			  struct fw_unit *unit, struct fw_device *device)
+{
+	struct fwnet_peer *peer;
+
+	peer = kmalloc(sizeof(*peer), GFP_KERNEL);
+	if (!peer)
+		return -ENOMEM;
+
+	unit->device.driver_data = peer;
+	peer->dev = dev;
+	peer->guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
+	peer->fifo = FWNET_NO_FIFO_ADDR;
+	INIT_LIST_HEAD(&peer->pd_list);
+	peer->pdg_size = 0;
+	peer->datagram_label = 0;
+	peer->speed = device->max_speed;
+	peer->max_payload = fwnet_max_payload(device->max_rec, peer->speed);
+
+	peer->generation = device->generation;
+	smp_rmb();
+	peer->node_id = device->node_id;
+
+	spin_lock_irq(&dev->lock);
+	list_add_tail(&peer->peer_link, &dev->peer_list);
+	spin_unlock_irq(&dev->lock);
+
+	return 0;
+}
+
 static int fwnet_probe(struct device *_dev)
 {
 	struct fw_unit *unit = fw_unit(_dev);
@@ -1476,16 +1432,22 @@ static int fwnet_probe(struct device *_dev)
 	struct net_device *net;
 	struct fwnet_device *dev;
 	unsigned max_mtu;
+	bool new_netdev;
+	int ret;
 
-	if (!device->is_local) {
-		int added;
+	mutex_lock(&fwnet_device_mutex);
 
-		added = fwnet_peer_new(card, device);
-		return added;
+	dev = fwnet_dev_find(card);
+	if (dev) {
+		new_netdev = false;
+		net = dev->netdev;
+		goto have_dev;
 	}
+
+	new_netdev = true;
 	net = alloc_netdev(sizeof(*dev), "firewire%d", fwnet_init_dev);
 	if (net == NULL) {
-		fw_error("out of memory\n");
+		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -1500,12 +1462,13 @@ static int fwnet_probe(struct device *_dev)
 
 	dev->local_fifo = FWNET_NO_FIFO_ADDR;
 
-	/* INIT_WORK(&dev->wake, fwnet_handle_queue);*/
 	INIT_LIST_HEAD(&dev->packet_list);
 	INIT_LIST_HEAD(&dev->broadcasted_list);
 	INIT_LIST_HEAD(&dev->sent_list);
+	INIT_LIST_HEAD(&dev->peer_list);
 
 	dev->card = card;
+	dev->netdev = net;
 
 	/*
 	 * Use the RFC 2734 default 1500 octets or the maximum payload
@@ -1518,43 +1481,57 @@ static int fwnet_probe(struct device *_dev)
 	/* Set our hardware address while we're at it */
 	put_unaligned_be64(card->guid, net->dev_addr);
 	put_unaligned_be64(~0ULL, net->broadcast);
-	if (register_netdev(net)) {
+	ret = register_netdev(net);
+	if (ret) {
 		fw_error("Cannot register the driver\n");
 		goto out;
 	}
 
+	list_add_tail(&dev->dev_link, &fwnet_device_list);
 	fw_notify("%s: IPv4 over FireWire on device %016llx\n",
 		  net->name, (unsigned long long)card->guid);
-	card->netdev = net;
-
-	return 0;
+ have_dev:
+	ret = fwnet_add_peer(dev, unit, device);
+	if (ret && new_netdev) {
+		unregister_netdev(net);
+		list_del(&dev->dev_link);
+	}
  out:
-	if (net)
+	if (ret && new_netdev)
 		free_netdev(net);
 
-	return -ENOENT;
+	mutex_unlock(&fwnet_device_mutex);
+
+	return ret;
+}
+
+static void fwnet_remove_peer(struct fwnet_peer *peer)
+{
+	struct fwnet_partial_datagram *pd, *pd_next;
+
+	spin_lock_irq(&peer->dev->lock);
+	list_del(&peer->peer_link);
+	spin_unlock_irq(&peer->dev->lock);
+
+	list_for_each_entry_safe(pd, pd_next, &peer->pd_list, pd_link)
+		fwnet_pd_delete(pd);
+
+	kfree(peer);
 }
 
 static int fwnet_remove(struct device *_dev)
 {
-	struct fw_unit *unit = fw_unit(_dev);
-	struct fw_device *device = fw_parent_device(unit);
-	struct fw_card *card = device->card;
+	struct fwnet_peer *peer = _dev->driver_data;
+	struct fwnet_device *dev = peer->dev;
 	struct net_device *net;
-	struct fwnet_device *dev;
-	struct fwnet_peer *peer;
-	struct fwnet_partial_datagram *pd, *pd_next;
 	struct fwnet_packet_task *ptask, *pt_next;
 
-	if (!device->is_local) {
-		fwnet_peer_delete(card, device);
+	mutex_lock(&fwnet_device_mutex);
 
-		return 0;
-	}
+	fwnet_remove_peer(peer);
 
-	net = card->netdev;
-	if (net) {
-		dev = netdev_priv(net);
+	if (list_empty(&dev->peer_list)) {
+		net = dev->netdev;
 		unregister_netdev(net);
 
 		if (dev->local_fifo != FWNET_NO_FIFO_ADDR)
@@ -1580,19 +1557,11 @@ static int fwnet_remove(struct device *_dev)
 			dev_kfree_skb_any(ptask->skb);
 			kmem_cache_free(fwnet_packet_task_cache, ptask);
 		}
-		list_for_each_entry(peer, &card->peer_list, peer_link) {
-			if (peer->pdg_size) {
-				list_for_each_entry_safe(pd, pd_next,
-						&peer->pd_list, pd_link)
-					fwnet_pd_delete(pd);
-				peer->pdg_size = 0;
-			}
-			peer->fifo = FWNET_NO_FIFO_ADDR;
-		}
 		free_netdev(net);
-		card->netdev = NULL;
 	}
 
+	mutex_unlock(&fwnet_device_mutex);
+
 	return 0;
 }
 
@@ -1603,24 +1572,15 @@ static int fwnet_remove(struct device *_dev)
 static void fwnet_update(struct fw_unit *unit)
 {
 	struct fw_device *device = fw_parent_device(unit);
-	struct net_device *net = device->card->netdev;
-	struct fwnet_device *dev;
-	struct fwnet_peer *peer;
-	u64 guid;
+	struct fwnet_peer *peer = unit->device.driver_data;
+	int generation;
 
-	if (net && !device->is_local) {
-		dev = netdev_priv(net);
-		guid = (u64)device->config_rom[3] << 32 | device->config_rom[4];
-		peer = fwnet_peer_find_by_guid(dev, guid);
-		if (!peer) {
-			fw_error("fwnet_update: no peer for device %016llx\n",
-				 (unsigned long long)guid);
-			return;
-		}
-		peer->generation = device->generation;
-		rmb();
-		peer->node_id = device->node_id;
-	}
+	generation = device->generation;
+
+	spin_lock_irq(&peer->dev->lock);
+	peer->node_id    = device->node_id;
+	peer->generation = generation;
+	spin_unlock_irq(&peer->dev->lock);
 }
 
 static const struct ieee1394_device_id fwnet_id_table[] = {
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5cb0c1549ff..9823946adbc 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -131,10 +131,6 @@ struct fw_card {
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
 	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
-
-	/* firewire-net driver data */
-	void *netdev;
-	struct list_head peer_list;
 };
 
 static inline struct fw_card *fw_card_get(struct fw_card *card)
-- 
cgit v1.2.3-70-g09d2


From 7923f90fffa8746f6457d4eea2109fd3d6414189 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 14 Jun 2009 22:10:41 +0200
Subject: vmlinux.lds.h update

Updated after review by Tim Abbott.
- Use HEAD_TEXT_SECTION
- Drop use of section-names.h and delete file
- Introduce EXIT_CALL

Deleting section-names.h required a few simple
updates of init.h

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Tim Abbott <tabbott@ksplice.com>
---
 include/asm-generic/vmlinux.lds.h | 17 ++++++++++-------
 include/linux/init.h              |  4 +---
 include/linux/section-names.h     |  6 ------
 3 files changed, 11 insertions(+), 16 deletions(-)
 delete mode 100644 include/linux/section-names.h

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fba42236e94..7381f701f3f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -12,7 +12,7 @@
  * {
  *	. = START;
  *	__init_begin = .;
- *	HEAD_SECTION
+ *	HEAD_TEXT_SECTION
  *	INIT_TEXT_SECTION(PAGE_SIZE)
  *	INIT_DATA_SECTION(...)
  *	PERCPU(PAGE_SIZE)
@@ -38,7 +38,7 @@
  *	/DISCARD/ : {
  *		EXIT_TEXT
  *		EXIT_DATA
- *		*(.exitcall.exit)
+ *		EXIT_CALL
  *	}
  *	STABS_DEBUG
  *	DWARF_DEBUG
@@ -52,7 +52,6 @@
  * Examples are: [__initramfs_start, __initramfs_end] for initramfs and
  *               [__nosave_begin, __nosave_end] for the nosave data
  */
- #include <linux/section-names.h>
 
 #ifndef LOAD_OFFSET
 #define LOAD_OFFSET 0
@@ -414,9 +413,9 @@
 #endif
 
 /* Section used for early init (in .S files) */
-#define HEAD_TEXT  *(HEAD_TEXT_SECTION)
+#define HEAD_TEXT  *(.head.text)
 
-#define HEAD_SECTION							\
+#define HEAD_TEXT_SECTION							\
 	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {		\
 		HEAD_TEXT						\
 	}
@@ -473,6 +472,9 @@
 	CPU_DISCARD(exit.text)						\
 	MEM_DISCARD(exit.text)
 
+#define EXIT_CALL							\
+	*(.exitcall.exit)
+
 /*
  * bss (Block Started by Symbol) - uninitialized data
  * zeroed during startup
@@ -692,7 +694,7 @@
  * NOSAVE_DATA starts and ends with a PAGE_SIZE alignment which
  * matches the requirment of PAGE_ALIGNED_DATA.
  *
-/* use 0 as page_align if page_aligned data is not used */
+ * use 0 as page_align if page_aligned data is not used */
 #define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask)	\
 	. = ALIGN(PAGE_SIZE);						\
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
@@ -726,4 +728,5 @@
 #define BSS_SECTION(sbss_align, bss_align)				\
 	SBSS								\
 	BSS(bss_align)							\
-	. = ALIGN(4);							\
+	. = ALIGN(4);
+
diff --git a/include/linux/init.h b/include/linux/init.h
index 9f70c9f25d4..b2189803f19 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -2,8 +2,6 @@
 #define _LINUX_INIT_H
 
 #include <linux/compiler.h>
-#include <linux/section-names.h>
-#include <linux/stringify.h>
 
 /* These macros are used to mark some functions or 
  * initialized data (doesn't apply to uninitialized data)
@@ -101,7 +99,7 @@
 #define __memexitconst   __section(.memexit.rodata)
 
 /* For assembly routines */
-#define __HEAD		.section	__stringify(HEAD_TEXT_SECTION),"ax"
+#define __HEAD		.section	".head.text","ax"
 #define __INIT		.section	".init.text","ax"
 #define __FINIT		.previous
 
diff --git a/include/linux/section-names.h b/include/linux/section-names.h
deleted file mode 100644
index c956f4eb2ad..00000000000
--- a/include/linux/section-names.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_SECTION_NAMES_H
-#define __LINUX_SECTION_NAMES_H
-
-#define HEAD_TEXT_SECTION .head.text
-
-#endif /* !__LINUX_SECTION_NAMES_H */
-- 
cgit v1.2.3-70-g09d2


From 5a7e3d1281bbc4404b250b4a18d3ecb07c77640c Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Sat, 13 Jun 2009 14:52:33 +0200
Subject: keyboard: advertise KT_DEAD2 extended diacriticals

In addition to KT_DEAD which has limited support for diacriticals,
there is KT_DEAD2 that can support 256 criticals, so let's advertise
it in <linux/keyboard.h>.

This lets userland know abut the drivers/char/keyboard.c function
k_dead2, which supports more than the few trivial ones that k_dead
supports.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/keyboard.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index a3c984d780f..33a63f62d57 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -56,6 +56,7 @@ extern int unregister_keyboard_notifier(struct notifier_block *nb);
 #define KT_ASCII	9
 #define KT_LOCK		10
 #define KT_SLOCK	12
+#define KT_DEAD2	13
 #define KT_BRL		14
 
 #define K(t,v)		(((t)<<8)|(v))
-- 
cgit v1.2.3-70-g09d2


From 602c11a8ee62d49cddbc5972e5edb876dd415113 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 15 Jun 2009 09:24:41 +1000
Subject: drm/radeon: fix mobility flags on new PCI IDs.

These aren't used that much on r600, but may be needed in the future,
so get them correct now.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index f8634ab53b8..45c18672b09 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -254,8 +254,8 @@
 	{0x1002, 0x940A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x940B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
-	{0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
-	{0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
@@ -273,8 +273,8 @@
 	{0x1002, 0x9456, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x945B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-	{0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-	{0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x946B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x947A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3-70-g09d2


From ba4e7d973dd09b66912ac4c0856add8b0703a997 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 10 Jun 2009 15:20:19 +0200
Subject: drm: Add the TTM GPU memory manager subsystem.

TTM is a GPU memory manager subsystem designed for use with GPU
devices with various memory types (On-card VRAM, AGP,
PCI apertures etc.). It's essentially a helper library that assists
the DRM driver in creating and managing persistent buffer objects.

TTM manages placement of data and CPU map setup and teardown on
data movement. It can also optionally manage synchronization of
data on a per-buffer-object level.

TTM takes care to provide an always valid virtual user-space address
to a buffer object which makes user-space sub-allocation of
big buffer objects feasible.

TTM uses a fine-grained per buffer-object locking scheme, taking
care to release all relevant locks when waiting for the GPU.
Although this implies some locking overhead, it's probably a big
win for devices with multiple command submission mechanisms, since
the lock contention will be minimal.

TTM can be used with whatever user-space interface the driver
chooses, including GEM. It's used by the upcoming Radeon KMS DRM driver
and is also the GPU memory management core of various new experimental
DRM drivers.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Kconfig               |    8 +
 drivers/gpu/drm/Makefile              |    2 +-
 drivers/gpu/drm/ttm/Makefile          |    8 +
 drivers/gpu/drm/ttm/ttm_agp_backend.c |  150 +++
 drivers/gpu/drm/ttm/ttm_bo.c          | 1698 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/ttm/ttm_bo_util.c     |  561 +++++++++++
 drivers/gpu/drm/ttm/ttm_bo_vm.c       |  454 +++++++++
 drivers/gpu/drm/ttm/ttm_global.c      |  114 +++
 drivers/gpu/drm/ttm/ttm_memory.c      |  234 +++++
 drivers/gpu/drm/ttm/ttm_module.c      |   50 +
 drivers/gpu/drm/ttm/ttm_tt.c          |  635 ++++++++++++
 include/drm/ttm/ttm_bo_api.h          |  618 ++++++++++++
 include/drm/ttm/ttm_bo_driver.h       |  867 +++++++++++++++++
 include/drm/ttm/ttm_memory.h          |  153 +++
 include/drm/ttm/ttm_module.h          |   58 ++
 include/drm/ttm/ttm_placement.h       |   92 ++
 16 files changed, 5701 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/ttm/Makefile
 create mode 100644 drivers/gpu/drm/ttm/ttm_agp_backend.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_bo.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_bo_util.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_bo_vm.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_global.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_memory.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_module.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_tt.c
 create mode 100644 include/drm/ttm/ttm_bo_api.h
 create mode 100644 include/drm/ttm/ttm_bo_driver.h
 create mode 100644 include/drm/ttm/ttm_memory.h
 create mode 100644 include/drm/ttm/ttm_module.h
 create mode 100644 include/drm/ttm/ttm_placement.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index f5d46e7199d..64a57adc0a2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -18,6 +18,14 @@ menuconfig DRM
 	  details.  You should also select and configure AGP
 	  (/dev/agpgart) support.
 
+config DRM_TTM
+	tristate "TTM memory manager"
+	depends on DRM
+	help
+	  GPU memory management subsystem for devices with multiple
+	  GPU memory types. Will be enabled automatically if a device driver
+	  uses it.
+
 config DRM_TDFX
 	tristate "3dfx Banshee/Voodoo3+"
 	depends on DRM && PCI
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 4ec5061fa58..4e89ab08b7b 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -26,4 +26,4 @@ obj-$(CONFIG_DRM_I915)  += i915/
 obj-$(CONFIG_DRM_SIS)   += sis/
 obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VIA)	+=via/
-
+obj-$(CONFIG_DRM_TTM)	+= ttm/
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
new file mode 100644
index 00000000000..b0a9de7a57c
--- /dev/null
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+
+ccflags-y := -Iinclude/drm
+ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
+	ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o
+
+obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
new file mode 100644
index 00000000000..e8f6d2229d8
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -0,0 +1,150 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ *          Keith Packard.
+ */
+
+#include "ttm/ttm_module.h"
+#include "ttm/ttm_bo_driver.h"
+#ifdef TTM_HAS_AGP
+#include "ttm/ttm_placement.h"
+#include <linux/agp_backend.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <asm/agp.h>
+
+struct ttm_agp_backend {
+	struct ttm_backend backend;
+	struct agp_memory *mem;
+	struct agp_bridge_data *bridge;
+};
+
+static int ttm_agp_populate(struct ttm_backend *backend,
+			    unsigned long num_pages, struct page **pages,
+			    struct page *dummy_read_page)
+{
+	struct ttm_agp_backend *agp_be =
+	    container_of(backend, struct ttm_agp_backend, backend);
+	struct page **cur_page, **last_page = pages + num_pages;
+	struct agp_memory *mem;
+
+	mem = agp_allocate_memory(agp_be->bridge, num_pages, AGP_USER_MEMORY);
+	if (unlikely(mem == NULL))
+		return -ENOMEM;
+
+	mem->page_count = 0;
+	for (cur_page = pages; cur_page < last_page; ++cur_page) {
+		struct page *page = *cur_page;
+		if (!page)
+			page = dummy_read_page;
+
+		mem->memory[mem->page_count++] =
+		    phys_to_gart(page_to_phys(page));
+	}
+	agp_be->mem = mem;
+	return 0;
+}
+
+static int ttm_agp_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
+{
+	struct ttm_agp_backend *agp_be =
+	    container_of(backend, struct ttm_agp_backend, backend);
+	struct agp_memory *mem = agp_be->mem;
+	int cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
+	int ret;
+
+	mem->is_flushed = 1;
+	mem->type = (cached) ? AGP_USER_CACHED_MEMORY : AGP_USER_MEMORY;
+
+	ret = agp_bind_memory(mem, bo_mem->mm_node->start);
+	if (ret)
+		printk(KERN_ERR TTM_PFX "AGP Bind memory failed.\n");
+
+	return ret;
+}
+
+static int ttm_agp_unbind(struct ttm_backend *backend)
+{
+	struct ttm_agp_backend *agp_be =
+	    container_of(backend, struct ttm_agp_backend, backend);
+
+	if (agp_be->mem->is_bound)
+		return agp_unbind_memory(agp_be->mem);
+	else
+		return 0;
+}
+
+static void ttm_agp_clear(struct ttm_backend *backend)
+{
+	struct ttm_agp_backend *agp_be =
+	    container_of(backend, struct ttm_agp_backend, backend);
+	struct agp_memory *mem = agp_be->mem;
+
+	if (mem) {
+		ttm_agp_unbind(backend);
+		agp_free_memory(mem);
+	}
+	agp_be->mem = NULL;
+}
+
+static void ttm_agp_destroy(struct ttm_backend *backend)
+{
+	struct ttm_agp_backend *agp_be =
+	    container_of(backend, struct ttm_agp_backend, backend);
+
+	if (agp_be->mem)
+		ttm_agp_clear(backend);
+	kfree(agp_be);
+}
+
+static struct ttm_backend_func ttm_agp_func = {
+	.populate = ttm_agp_populate,
+	.clear = ttm_agp_clear,
+	.bind = ttm_agp_bind,
+	.unbind = ttm_agp_unbind,
+	.destroy = ttm_agp_destroy,
+};
+
+struct ttm_backend *ttm_agp_backend_init(struct ttm_bo_device *bdev,
+					 struct agp_bridge_data *bridge)
+{
+	struct ttm_agp_backend *agp_be;
+
+	agp_be = kmalloc(sizeof(*agp_be), GFP_KERNEL);
+	if (!agp_be)
+		return NULL;
+
+	agp_be->mem = NULL;
+	agp_be->bridge = bridge;
+	agp_be->backend.func = &ttm_agp_func;
+	agp_be->backend.bdev = bdev;
+	return &agp_be->backend;
+}
+EXPORT_SYMBOL(ttm_agp_backend_init);
+
+#endif
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
new file mode 100644
index 00000000000..1587aeca7be
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -0,0 +1,1698 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include "ttm/ttm_module.h"
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_placement.h"
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/module.h>
+
+#define TTM_ASSERT_LOCKED(param)
+#define TTM_DEBUG(fmt, arg...)
+#define TTM_BO_HASH_ORDER 13
+
+static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
+static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
+static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
+
+static inline uint32_t ttm_bo_type_flags(unsigned type)
+{
+	return 1 << (type);
+}
+
+static void ttm_bo_release_list(struct kref *list_kref)
+{
+	struct ttm_buffer_object *bo =
+	    container_of(list_kref, struct ttm_buffer_object, list_kref);
+	struct ttm_bo_device *bdev = bo->bdev;
+
+	BUG_ON(atomic_read(&bo->list_kref.refcount));
+	BUG_ON(atomic_read(&bo->kref.refcount));
+	BUG_ON(atomic_read(&bo->cpu_writers));
+	BUG_ON(bo->sync_obj != NULL);
+	BUG_ON(bo->mem.mm_node != NULL);
+	BUG_ON(!list_empty(&bo->lru));
+	BUG_ON(!list_empty(&bo->ddestroy));
+
+	if (bo->ttm)
+		ttm_tt_destroy(bo->ttm);
+	if (bo->destroy)
+		bo->destroy(bo);
+	else {
+		ttm_mem_global_free(bdev->mem_glob, bo->acc_size, false);
+		kfree(bo);
+	}
+}
+
+int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible)
+{
+
+	if (interruptible) {
+		int ret = 0;
+
+		ret = wait_event_interruptible(bo->event_queue,
+					       atomic_read(&bo->reserved) == 0);
+		if (unlikely(ret != 0))
+			return -ERESTART;
+	} else {
+		wait_event(bo->event_queue, atomic_read(&bo->reserved) == 0);
+	}
+	return 0;
+}
+
+static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man;
+
+	BUG_ON(!atomic_read(&bo->reserved));
+
+	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
+
+		BUG_ON(!list_empty(&bo->lru));
+
+		man = &bdev->man[bo->mem.mem_type];
+		list_add_tail(&bo->lru, &man->lru);
+		kref_get(&bo->list_kref);
+
+		if (bo->ttm != NULL) {
+			list_add_tail(&bo->swap, &bdev->swap_lru);
+			kref_get(&bo->list_kref);
+		}
+	}
+}
+
+/**
+ * Call with the lru_lock held.
+ */
+
+static int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+{
+	int put_count = 0;
+
+	if (!list_empty(&bo->swap)) {
+		list_del_init(&bo->swap);
+		++put_count;
+	}
+	if (!list_empty(&bo->lru)) {
+		list_del_init(&bo->lru);
+		++put_count;
+	}
+
+	/*
+	 * TODO: Add a driver hook to delete from
+	 * driver-specific LRU's here.
+	 */
+
+	return put_count;
+}
+
+int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
+			  bool interruptible,
+			  bool no_wait, bool use_sequence, uint32_t sequence)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	int ret;
+
+	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
+		if (use_sequence && bo->seq_valid &&
+			(sequence - bo->val_seq < (1 << 31))) {
+			return -EAGAIN;
+		}
+
+		if (no_wait)
+			return -EBUSY;
+
+		spin_unlock(&bdev->lru_lock);
+		ret = ttm_bo_wait_unreserved(bo, interruptible);
+		spin_lock(&bdev->lru_lock);
+
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (use_sequence) {
+		bo->val_seq = sequence;
+		bo->seq_valid = true;
+	} else {
+		bo->seq_valid = false;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_reserve);
+
+static void ttm_bo_ref_bug(struct kref *list_kref)
+{
+	BUG();
+}
+
+int ttm_bo_reserve(struct ttm_buffer_object *bo,
+		   bool interruptible,
+		   bool no_wait, bool use_sequence, uint32_t sequence)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	int put_count = 0;
+	int ret;
+
+	spin_lock(&bdev->lru_lock);
+	ret = ttm_bo_reserve_locked(bo, interruptible, no_wait, use_sequence,
+				    sequence);
+	if (likely(ret == 0))
+		put_count = ttm_bo_del_from_lru(bo);
+	spin_unlock(&bdev->lru_lock);
+
+	while (put_count--)
+		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+
+	return ret;
+}
+
+void ttm_bo_unreserve(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+
+	spin_lock(&bdev->lru_lock);
+	ttm_bo_add_to_lru(bo);
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+	spin_unlock(&bdev->lru_lock);
+}
+EXPORT_SYMBOL(ttm_bo_unreserve);
+
+/*
+ * Call bo->mutex locked.
+ */
+
+static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	int ret = 0;
+	uint32_t page_flags = 0;
+
+	TTM_ASSERT_LOCKED(&bo->mutex);
+	bo->ttm = NULL;
+
+	switch (bo->type) {
+	case ttm_bo_type_device:
+		if (zero_alloc)
+			page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
+	case ttm_bo_type_kernel:
+		bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
+					page_flags, bdev->dummy_read_page);
+		if (unlikely(bo->ttm == NULL))
+			ret = -ENOMEM;
+		break;
+	case ttm_bo_type_user:
+		bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
+					page_flags | TTM_PAGE_FLAG_USER,
+					bdev->dummy_read_page);
+		if (unlikely(bo->ttm == NULL))
+			ret = -ENOMEM;
+		break;
+
+		ret = ttm_tt_set_user(bo->ttm, current,
+				      bo->buffer_start, bo->num_pages);
+		if (unlikely(ret != 0))
+			ttm_tt_destroy(bo->ttm);
+		break;
+	default:
+		printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
+				  struct ttm_mem_reg *mem,
+				  bool evict, bool interruptible, bool no_wait)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	bool old_is_pci = ttm_mem_reg_is_pci(bdev, &bo->mem);
+	bool new_is_pci = ttm_mem_reg_is_pci(bdev, mem);
+	struct ttm_mem_type_manager *old_man = &bdev->man[bo->mem.mem_type];
+	struct ttm_mem_type_manager *new_man = &bdev->man[mem->mem_type];
+	int ret = 0;
+
+	if (old_is_pci || new_is_pci ||
+	    ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0))
+		ttm_bo_unmap_virtual(bo);
+
+	/*
+	 * Create and bind a ttm if required.
+	 */
+
+	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) {
+		ret = ttm_bo_add_ttm(bo, false);
+		if (ret)
+			goto out_err;
+
+		ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
+		if (ret)
+			return ret;
+
+		if (mem->mem_type != TTM_PL_SYSTEM) {
+			ret = ttm_tt_bind(bo->ttm, mem);
+			if (ret)
+				goto out_err;
+		}
+
+		if (bo->mem.mem_type == TTM_PL_SYSTEM) {
+
+			struct ttm_mem_reg *old_mem = &bo->mem;
+			uint32_t save_flags = old_mem->placement;
+
+			*old_mem = *mem;
+			mem->mm_node = NULL;
+			ttm_flag_masked(&save_flags, mem->placement,
+					TTM_PL_MASK_MEMTYPE);
+			goto moved;
+		}
+
+	}
+
+	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
+	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
+		ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
+	else if (bdev->driver->move)
+		ret = bdev->driver->move(bo, evict, interruptible,
+					 no_wait, mem);
+	else
+		ret = ttm_bo_move_memcpy(bo, evict, no_wait, mem);
+
+	if (ret)
+		goto out_err;
+
+moved:
+	if (bo->evicted) {
+		ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement);
+		if (ret)
+			printk(KERN_ERR TTM_PFX "Can not flush read caches\n");
+		bo->evicted = false;
+	}
+
+	if (bo->mem.mm_node) {
+		spin_lock(&bo->lock);
+		bo->offset = (bo->mem.mm_node->start << PAGE_SHIFT) +
+		    bdev->man[bo->mem.mem_type].gpu_offset;
+		bo->cur_placement = bo->mem.placement;
+		spin_unlock(&bo->lock);
+	}
+
+	return 0;
+
+out_err:
+	new_man = &bdev->man[bo->mem.mem_type];
+	if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) {
+		ttm_tt_unbind(bo->ttm);
+		ttm_tt_destroy(bo->ttm);
+		bo->ttm = NULL;
+	}
+
+	return ret;
+}
+
+/**
+ * If bo idle, remove from delayed- and lru lists, and unref.
+ * If not idle, and already on delayed list, do nothing.
+ * If not idle, and not on delayed list, put on delayed list,
+ *   up the list_kref and schedule a delayed list check.
+ */
+
+static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
+	int ret;
+
+	spin_lock(&bo->lock);
+	(void) ttm_bo_wait(bo, false, false, !remove_all);
+
+	if (!bo->sync_obj) {
+		int put_count;
+
+		spin_unlock(&bo->lock);
+
+		spin_lock(&bdev->lru_lock);
+		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
+		BUG_ON(ret);
+		if (bo->ttm)
+			ttm_tt_unbind(bo->ttm);
+
+		if (!list_empty(&bo->ddestroy)) {
+			list_del_init(&bo->ddestroy);
+			kref_put(&bo->list_kref, ttm_bo_ref_bug);
+		}
+		if (bo->mem.mm_node) {
+			drm_mm_put_block(bo->mem.mm_node);
+			bo->mem.mm_node = NULL;
+		}
+		put_count = ttm_bo_del_from_lru(bo);
+		spin_unlock(&bdev->lru_lock);
+
+		atomic_set(&bo->reserved, 0);
+
+		while (put_count--)
+			kref_put(&bo->list_kref, ttm_bo_release_list);
+
+		return 0;
+	}
+
+	spin_lock(&bdev->lru_lock);
+	if (list_empty(&bo->ddestroy)) {
+		void *sync_obj = bo->sync_obj;
+		void *sync_obj_arg = bo->sync_obj_arg;
+
+		kref_get(&bo->list_kref);
+		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
+		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&bo->lock);
+
+		if (sync_obj)
+			driver->sync_obj_flush(sync_obj, sync_obj_arg);
+		schedule_delayed_work(&bdev->wq,
+				      ((HZ / 100) < 1) ? 1 : HZ / 100);
+		ret = 0;
+
+	} else {
+		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&bo->lock);
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+/**
+ * Traverse the delayed list, and call ttm_bo_cleanup_refs on all
+ * encountered buffers.
+ */
+
+static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
+{
+	struct ttm_buffer_object *entry, *nentry;
+	struct list_head *list, *next;
+	int ret;
+
+	spin_lock(&bdev->lru_lock);
+	list_for_each_safe(list, next, &bdev->ddestroy) {
+		entry = list_entry(list, struct ttm_buffer_object, ddestroy);
+		nentry = NULL;
+
+		/*
+		 * Protect the next list entry from destruction while we
+		 * unlock the lru_lock.
+		 */
+
+		if (next != &bdev->ddestroy) {
+			nentry = list_entry(next, struct ttm_buffer_object,
+					    ddestroy);
+			kref_get(&nentry->list_kref);
+		}
+		kref_get(&entry->list_kref);
+
+		spin_unlock(&bdev->lru_lock);
+		ret = ttm_bo_cleanup_refs(entry, remove_all);
+		kref_put(&entry->list_kref, ttm_bo_release_list);
+
+		spin_lock(&bdev->lru_lock);
+		if (nentry) {
+			bool next_onlist = !list_empty(next);
+			spin_unlock(&bdev->lru_lock);
+			kref_put(&nentry->list_kref, ttm_bo_release_list);
+			spin_lock(&bdev->lru_lock);
+			/*
+			 * Someone might have raced us and removed the
+			 * next entry from the list. We don't bother restarting
+			 * list traversal.
+			 */
+
+			if (!next_onlist)
+				break;
+		}
+		if (ret)
+			break;
+	}
+	ret = !list_empty(&bdev->ddestroy);
+	spin_unlock(&bdev->lru_lock);
+
+	return ret;
+}
+
+static void ttm_bo_delayed_workqueue(struct work_struct *work)
+{
+	struct ttm_bo_device *bdev =
+	    container_of(work, struct ttm_bo_device, wq.work);
+
+	if (ttm_bo_delayed_delete(bdev, false)) {
+		schedule_delayed_work(&bdev->wq,
+				      ((HZ / 100) < 1) ? 1 : HZ / 100);
+	}
+}
+
+static void ttm_bo_release(struct kref *kref)
+{
+	struct ttm_buffer_object *bo =
+	    container_of(kref, struct ttm_buffer_object, kref);
+	struct ttm_bo_device *bdev = bo->bdev;
+
+	if (likely(bo->vm_node != NULL)) {
+		rb_erase(&bo->vm_rb, &bdev->addr_space_rb);
+		drm_mm_put_block(bo->vm_node);
+		bo->vm_node = NULL;
+	}
+	write_unlock(&bdev->vm_lock);
+	ttm_bo_cleanup_refs(bo, false);
+	kref_put(&bo->list_kref, ttm_bo_release_list);
+	write_lock(&bdev->vm_lock);
+}
+
+void ttm_bo_unref(struct ttm_buffer_object **p_bo)
+{
+	struct ttm_buffer_object *bo = *p_bo;
+	struct ttm_bo_device *bdev = bo->bdev;
+
+	*p_bo = NULL;
+	write_lock(&bdev->vm_lock);
+	kref_put(&bo->kref, ttm_bo_release);
+	write_unlock(&bdev->vm_lock);
+}
+EXPORT_SYMBOL(ttm_bo_unref);
+
+static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type,
+			bool interruptible, bool no_wait)
+{
+	int ret = 0;
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_reg evict_mem;
+	uint32_t proposed_placement;
+
+	if (bo->mem.mem_type != mem_type)
+		goto out;
+
+	spin_lock(&bo->lock);
+	ret = ttm_bo_wait(bo, false, interruptible, no_wait);
+	spin_unlock(&bo->lock);
+
+	if (ret && ret != -ERESTART) {
+		printk(KERN_ERR TTM_PFX "Failed to expire sync object before "
+		       "buffer eviction.\n");
+		goto out;
+	}
+
+	BUG_ON(!atomic_read(&bo->reserved));
+
+	evict_mem = bo->mem;
+	evict_mem.mm_node = NULL;
+
+	proposed_placement = bdev->driver->evict_flags(bo);
+
+	ret = ttm_bo_mem_space(bo, proposed_placement,
+			       &evict_mem, interruptible, no_wait);
+	if (unlikely(ret != 0 && ret != -ERESTART))
+		ret = ttm_bo_mem_space(bo, TTM_PL_FLAG_SYSTEM,
+				       &evict_mem, interruptible, no_wait);
+
+	if (ret) {
+		if (ret != -ERESTART)
+			printk(KERN_ERR TTM_PFX
+			       "Failed to find memory space for "
+			       "buffer 0x%p eviction.\n", bo);
+		goto out;
+	}
+
+	ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible,
+				     no_wait);
+	if (ret) {
+		if (ret != -ERESTART)
+			printk(KERN_ERR TTM_PFX "Buffer eviction failed\n");
+		goto out;
+	}
+
+	spin_lock(&bdev->lru_lock);
+	if (evict_mem.mm_node) {
+		drm_mm_put_block(evict_mem.mm_node);
+		evict_mem.mm_node = NULL;
+	}
+	spin_unlock(&bdev->lru_lock);
+	bo->evicted = true;
+out:
+	return ret;
+}
+
+/**
+ * Repeatedly evict memory from the LRU for @mem_type until we create enough
+ * space, or we've evicted everything and there isn't enough space.
+ */
+static int ttm_bo_mem_force_space(struct ttm_bo_device *bdev,
+				  struct ttm_mem_reg *mem,
+				  uint32_t mem_type,
+				  bool interruptible, bool no_wait)
+{
+	struct drm_mm_node *node;
+	struct ttm_buffer_object *entry;
+	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct list_head *lru;
+	unsigned long num_pages = mem->num_pages;
+	int put_count = 0;
+	int ret;
+
+retry_pre_get:
+	ret = drm_mm_pre_get(&man->manager);
+	if (unlikely(ret != 0))
+		return ret;
+
+	spin_lock(&bdev->lru_lock);
+	do {
+		node = drm_mm_search_free(&man->manager, num_pages,
+					  mem->page_alignment, 1);
+		if (node)
+			break;
+
+		lru = &man->lru;
+		if (list_empty(lru))
+			break;
+
+		entry = list_first_entry(lru, struct ttm_buffer_object, lru);
+		kref_get(&entry->list_kref);
+
+		ret =
+		    ttm_bo_reserve_locked(entry, interruptible, no_wait,
+					  false, 0);
+
+		if (likely(ret == 0))
+			put_count = ttm_bo_del_from_lru(entry);
+
+		spin_unlock(&bdev->lru_lock);
+
+		if (unlikely(ret != 0))
+			return ret;
+
+		while (put_count--)
+			kref_put(&entry->list_kref, ttm_bo_ref_bug);
+
+		ret = ttm_bo_evict(entry, mem_type, interruptible, no_wait);
+
+		ttm_bo_unreserve(entry);
+
+		kref_put(&entry->list_kref, ttm_bo_release_list);
+		if (ret)
+			return ret;
+
+		spin_lock(&bdev->lru_lock);
+	} while (1);
+
+	if (!node) {
+		spin_unlock(&bdev->lru_lock);
+		return -ENOMEM;
+	}
+
+	node = drm_mm_get_block_atomic(node, num_pages, mem->page_alignment);
+	if (unlikely(!node)) {
+		spin_unlock(&bdev->lru_lock);
+		goto retry_pre_get;
+	}
+
+	spin_unlock(&bdev->lru_lock);
+	mem->mm_node = node;
+	mem->mem_type = mem_type;
+	return 0;
+}
+
+static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
+				 bool disallow_fixed,
+				 uint32_t mem_type,
+				 uint32_t mask, uint32_t *res_mask)
+{
+	uint32_t cur_flags = ttm_bo_type_flags(mem_type);
+
+	if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && disallow_fixed)
+		return false;
+
+	if ((cur_flags & mask & TTM_PL_MASK_MEM) == 0)
+		return false;
+
+	if ((mask & man->available_caching) == 0)
+		return false;
+	if (mask & man->default_caching)
+		cur_flags |= man->default_caching;
+	else if (mask & TTM_PL_FLAG_CACHED)
+		cur_flags |= TTM_PL_FLAG_CACHED;
+	else if (mask & TTM_PL_FLAG_WC)
+		cur_flags |= TTM_PL_FLAG_WC;
+	else
+		cur_flags |= TTM_PL_FLAG_UNCACHED;
+
+	*res_mask = cur_flags;
+	return true;
+}
+
+/**
+ * Creates space for memory region @mem according to its type.
+ *
+ * This function first searches for free space in compatible memory types in
+ * the priority order defined by the driver.  If free space isn't found, then
+ * ttm_bo_mem_force_space is attempted in priority order to evict and find
+ * space.
+ */
+int ttm_bo_mem_space(struct ttm_buffer_object *bo,
+		     uint32_t proposed_placement,
+		     struct ttm_mem_reg *mem,
+		     bool interruptible, bool no_wait)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man;
+
+	uint32_t num_prios = bdev->driver->num_mem_type_prio;
+	const uint32_t *prios = bdev->driver->mem_type_prio;
+	uint32_t i;
+	uint32_t mem_type = TTM_PL_SYSTEM;
+	uint32_t cur_flags = 0;
+	bool type_found = false;
+	bool type_ok = false;
+	bool has_eagain = false;
+	struct drm_mm_node *node = NULL;
+	int ret;
+
+	mem->mm_node = NULL;
+	for (i = 0; i < num_prios; ++i) {
+		mem_type = prios[i];
+		man = &bdev->man[mem_type];
+
+		type_ok = ttm_bo_mt_compatible(man,
+					       bo->type == ttm_bo_type_user,
+					       mem_type, proposed_placement,
+					       &cur_flags);
+
+		if (!type_ok)
+			continue;
+
+		if (mem_type == TTM_PL_SYSTEM)
+			break;
+
+		if (man->has_type && man->use_type) {
+			type_found = true;
+			do {
+				ret = drm_mm_pre_get(&man->manager);
+				if (unlikely(ret))
+					return ret;
+
+				spin_lock(&bdev->lru_lock);
+				node = drm_mm_search_free(&man->manager,
+							  mem->num_pages,
+							  mem->page_alignment,
+							  1);
+				if (unlikely(!node)) {
+					spin_unlock(&bdev->lru_lock);
+					break;
+				}
+				node = drm_mm_get_block_atomic(node,
+							       mem->num_pages,
+							       mem->
+							       page_alignment);
+				spin_unlock(&bdev->lru_lock);
+			} while (!node);
+		}
+		if (node)
+			break;
+	}
+
+	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || node) {
+		mem->mm_node = node;
+		mem->mem_type = mem_type;
+		mem->placement = cur_flags;
+		return 0;
+	}
+
+	if (!type_found)
+		return -EINVAL;
+
+	num_prios = bdev->driver->num_mem_busy_prio;
+	prios = bdev->driver->mem_busy_prio;
+
+	for (i = 0; i < num_prios; ++i) {
+		mem_type = prios[i];
+		man = &bdev->man[mem_type];
+
+		if (!man->has_type)
+			continue;
+
+		if (!ttm_bo_mt_compatible(man,
+					  bo->type == ttm_bo_type_user,
+					  mem_type,
+					  proposed_placement, &cur_flags))
+			continue;
+
+		ret = ttm_bo_mem_force_space(bdev, mem, mem_type,
+					     interruptible, no_wait);
+
+		if (ret == 0 && mem->mm_node) {
+			mem->placement = cur_flags;
+			return 0;
+		}
+
+		if (ret == -ERESTART)
+			has_eagain = true;
+	}
+
+	ret = (has_eagain) ? -ERESTART : -ENOMEM;
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_mem_space);
+
+int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait)
+{
+	int ret = 0;
+
+	if ((atomic_read(&bo->cpu_writers) > 0) && no_wait)
+		return -EBUSY;
+
+	ret = wait_event_interruptible(bo->event_queue,
+				       atomic_read(&bo->cpu_writers) == 0);
+
+	if (ret == -ERESTARTSYS)
+		ret = -ERESTART;
+
+	return ret;
+}
+
+int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
+		       uint32_t proposed_placement,
+		       bool interruptible, bool no_wait)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	int ret = 0;
+	struct ttm_mem_reg mem;
+
+	BUG_ON(!atomic_read(&bo->reserved));
+
+	/*
+	 * FIXME: It's possible to pipeline buffer moves.
+	 * Have the driver move function wait for idle when necessary,
+	 * instead of doing it here.
+	 */
+
+	spin_lock(&bo->lock);
+	ret = ttm_bo_wait(bo, false, interruptible, no_wait);
+	spin_unlock(&bo->lock);
+
+	if (ret)
+		return ret;
+
+	mem.num_pages = bo->num_pages;
+	mem.size = mem.num_pages << PAGE_SHIFT;
+	mem.page_alignment = bo->mem.page_alignment;
+
+	/*
+	 * Determine where to move the buffer.
+	 */
+
+	ret = ttm_bo_mem_space(bo, proposed_placement, &mem,
+			       interruptible, no_wait);
+	if (ret)
+		goto out_unlock;
+
+	ret = ttm_bo_handle_move_mem(bo, &mem, false, interruptible, no_wait);
+
+out_unlock:
+	if (ret && mem.mm_node) {
+		spin_lock(&bdev->lru_lock);
+		drm_mm_put_block(mem.mm_node);
+		spin_unlock(&bdev->lru_lock);
+	}
+	return ret;
+}
+
+static int ttm_bo_mem_compat(uint32_t proposed_placement,
+			     struct ttm_mem_reg *mem)
+{
+	if ((proposed_placement & mem->placement & TTM_PL_MASK_MEM) == 0)
+		return 0;
+	if ((proposed_placement & mem->placement & TTM_PL_MASK_CACHING) == 0)
+		return 0;
+
+	return 1;
+}
+
+int ttm_buffer_object_validate(struct ttm_buffer_object *bo,
+			       uint32_t proposed_placement,
+			       bool interruptible, bool no_wait)
+{
+	int ret;
+
+	BUG_ON(!atomic_read(&bo->reserved));
+	bo->proposed_placement = proposed_placement;
+
+	TTM_DEBUG("Proposed placement 0x%08lx, Old flags 0x%08lx\n",
+		  (unsigned long)proposed_placement,
+		  (unsigned long)bo->mem.placement);
+
+	/*
+	 * Check whether we need to move buffer.
+	 */
+
+	if (!ttm_bo_mem_compat(bo->proposed_placement, &bo->mem)) {
+		ret = ttm_bo_move_buffer(bo, bo->proposed_placement,
+					 interruptible, no_wait);
+		if (ret) {
+			if (ret != -ERESTART)
+				printk(KERN_ERR TTM_PFX
+				       "Failed moving buffer. "
+				       "Proposed placement 0x%08x\n",
+				       bo->proposed_placement);
+			if (ret == -ENOMEM)
+				printk(KERN_ERR TTM_PFX
+				       "Out of aperture space or "
+				       "DRM memory quota.\n");
+			return ret;
+		}
+	}
+
+	/*
+	 * We might need to add a TTM.
+	 */
+
+	if (bo->mem.mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
+		ret = ttm_bo_add_ttm(bo, true);
+		if (ret)
+			return ret;
+	}
+	/*
+	 * Validation has succeeded, move the access and other
+	 * non-mapping-related flag bits from the proposed flags to
+	 * the active flags
+	 */
+
+	ttm_flag_masked(&bo->mem.placement, bo->proposed_placement,
+			~TTM_PL_MASK_MEMTYPE);
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_buffer_object_validate);
+
+int
+ttm_bo_check_placement(struct ttm_buffer_object *bo,
+		       uint32_t set_flags, uint32_t clr_flags)
+{
+	uint32_t new_mask = set_flags | clr_flags;
+
+	if ((bo->type == ttm_bo_type_user) &&
+	    (clr_flags & TTM_PL_FLAG_CACHED)) {
+		printk(KERN_ERR TTM_PFX
+		       "User buffers require cache-coherent memory.\n");
+		return -EINVAL;
+	}
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		if (new_mask & TTM_PL_FLAG_NO_EVICT) {
+			printk(KERN_ERR TTM_PFX "Need to be root to modify"
+			       " NO_EVICT status.\n");
+			return -EINVAL;
+		}
+
+		if ((clr_flags & bo->mem.placement & TTM_PL_MASK_MEMTYPE) &&
+		    (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
+			printk(KERN_ERR TTM_PFX
+			       "Incompatible memory specification"
+			       " for NO_EVICT buffer.\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+int ttm_buffer_object_init(struct ttm_bo_device *bdev,
+			   struct ttm_buffer_object *bo,
+			   unsigned long size,
+			   enum ttm_bo_type type,
+			   uint32_t flags,
+			   uint32_t page_alignment,
+			   unsigned long buffer_start,
+			   bool interruptible,
+			   struct file *persistant_swap_storage,
+			   size_t acc_size,
+			   void (*destroy) (struct ttm_buffer_object *))
+{
+	int ret = 0;
+	unsigned long num_pages;
+
+	size += buffer_start & ~PAGE_MASK;
+	num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (num_pages == 0) {
+		printk(KERN_ERR TTM_PFX "Illegal buffer object size.\n");
+		return -EINVAL;
+	}
+	bo->destroy = destroy;
+
+	spin_lock_init(&bo->lock);
+	kref_init(&bo->kref);
+	kref_init(&bo->list_kref);
+	atomic_set(&bo->cpu_writers, 0);
+	atomic_set(&bo->reserved, 1);
+	init_waitqueue_head(&bo->event_queue);
+	INIT_LIST_HEAD(&bo->lru);
+	INIT_LIST_HEAD(&bo->ddestroy);
+	INIT_LIST_HEAD(&bo->swap);
+	bo->bdev = bdev;
+	bo->type = type;
+	bo->num_pages = num_pages;
+	bo->mem.mem_type = TTM_PL_SYSTEM;
+	bo->mem.num_pages = bo->num_pages;
+	bo->mem.mm_node = NULL;
+	bo->mem.page_alignment = page_alignment;
+	bo->buffer_start = buffer_start & PAGE_MASK;
+	bo->priv_flags = 0;
+	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
+	bo->seq_valid = false;
+	bo->persistant_swap_storage = persistant_swap_storage;
+	bo->acc_size = acc_size;
+
+	ret = ttm_bo_check_placement(bo, flags, 0ULL);
+	if (unlikely(ret != 0))
+		goto out_err;
+
+	/*
+	 * If no caching attributes are set, accept any form of caching.
+	 */
+
+	if ((flags & TTM_PL_MASK_CACHING) == 0)
+		flags |= TTM_PL_MASK_CACHING;
+
+	/*
+	 * For ttm_bo_type_device buffers, allocate
+	 * address space from the device.
+	 */
+
+	if (bo->type == ttm_bo_type_device) {
+		ret = ttm_bo_setup_vm(bo);
+		if (ret)
+			goto out_err;
+	}
+
+	ret = ttm_buffer_object_validate(bo, flags, interruptible, false);
+	if (ret)
+		goto out_err;
+
+	ttm_bo_unreserve(bo);
+	return 0;
+
+out_err:
+	ttm_bo_unreserve(bo);
+	ttm_bo_unref(&bo);
+
+	return ret;
+}
+EXPORT_SYMBOL(ttm_buffer_object_init);
+
+static inline size_t ttm_bo_size(struct ttm_bo_device *bdev,
+				 unsigned long num_pages)
+{
+	size_t page_array_size = (num_pages * sizeof(void *) + PAGE_SIZE - 1) &
+	    PAGE_MASK;
+
+	return bdev->ttm_bo_size + 2 * page_array_size;
+}
+
+int ttm_buffer_object_create(struct ttm_bo_device *bdev,
+			     unsigned long size,
+			     enum ttm_bo_type type,
+			     uint32_t flags,
+			     uint32_t page_alignment,
+			     unsigned long buffer_start,
+			     bool interruptible,
+			     struct file *persistant_swap_storage,
+			     struct ttm_buffer_object **p_bo)
+{
+	struct ttm_buffer_object *bo;
+	int ret;
+	struct ttm_mem_global *mem_glob = bdev->mem_glob;
+
+	size_t acc_size =
+	    ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+	if (unlikely(bo == NULL)) {
+		ttm_mem_global_free(mem_glob, acc_size, false);
+		return -ENOMEM;
+	}
+
+	ret = ttm_buffer_object_init(bdev, bo, size, type, flags,
+				     page_alignment, buffer_start,
+				     interruptible,
+				     persistant_swap_storage, acc_size, NULL);
+	if (likely(ret == 0))
+		*p_bo = bo;
+
+	return ret;
+}
+
+static int ttm_bo_leave_list(struct ttm_buffer_object *bo,
+			     uint32_t mem_type, bool allow_errors)
+{
+	int ret;
+
+	spin_lock(&bo->lock);
+	ret = ttm_bo_wait(bo, false, false, false);
+	spin_unlock(&bo->lock);
+
+	if (ret && allow_errors)
+		goto out;
+
+	if (bo->mem.mem_type == mem_type)
+		ret = ttm_bo_evict(bo, mem_type, false, false);
+
+	if (ret) {
+		if (allow_errors) {
+			goto out;
+		} else {
+			ret = 0;
+			printk(KERN_ERR TTM_PFX "Cleanup eviction failed\n");
+		}
+	}
+
+out:
+	return ret;
+}
+
+static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
+				   struct list_head *head,
+				   unsigned mem_type, bool allow_errors)
+{
+	struct ttm_buffer_object *entry;
+	int ret;
+	int put_count;
+
+	/*
+	 * Can't use standard list traversal since we're unlocking.
+	 */
+
+	spin_lock(&bdev->lru_lock);
+
+	while (!list_empty(head)) {
+		entry = list_first_entry(head, struct ttm_buffer_object, lru);
+		kref_get(&entry->list_kref);
+		ret = ttm_bo_reserve_locked(entry, false, false, false, 0);
+		put_count = ttm_bo_del_from_lru(entry);
+		spin_unlock(&bdev->lru_lock);
+		while (put_count--)
+			kref_put(&entry->list_kref, ttm_bo_ref_bug);
+		BUG_ON(ret);
+		ret = ttm_bo_leave_list(entry, mem_type, allow_errors);
+		ttm_bo_unreserve(entry);
+		kref_put(&entry->list_kref, ttm_bo_release_list);
+		spin_lock(&bdev->lru_lock);
+	}
+
+	spin_unlock(&bdev->lru_lock);
+
+	return 0;
+}
+
+int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	int ret = -EINVAL;
+
+	if (mem_type >= TTM_NUM_MEM_TYPES) {
+		printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", mem_type);
+		return ret;
+	}
+
+	if (!man->has_type) {
+		printk(KERN_ERR TTM_PFX "Trying to take down uninitialized "
+		       "memory manager type %u\n", mem_type);
+		return ret;
+	}
+
+	man->use_type = false;
+	man->has_type = false;
+
+	ret = 0;
+	if (mem_type > 0) {
+		ttm_bo_force_list_clean(bdev, &man->lru, mem_type, false);
+
+		spin_lock(&bdev->lru_lock);
+		if (drm_mm_clean(&man->manager))
+			drm_mm_takedown(&man->manager);
+		else
+			ret = -EBUSY;
+
+		spin_unlock(&bdev->lru_lock);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_clean_mm);
+
+int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+
+	if (mem_type == 0 || mem_type >= TTM_NUM_MEM_TYPES) {
+		printk(KERN_ERR TTM_PFX
+		       "Illegal memory manager memory type %u.\n",
+		       mem_type);
+		return -EINVAL;
+	}
+
+	if (!man->has_type) {
+		printk(KERN_ERR TTM_PFX
+		       "Memory type %u has not been initialized.\n",
+		       mem_type);
+		return 0;
+	}
+
+	return ttm_bo_force_list_clean(bdev, &man->lru, mem_type, true);
+}
+EXPORT_SYMBOL(ttm_bo_evict_mm);
+
+int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
+		   unsigned long p_offset, unsigned long p_size)
+{
+	int ret = -EINVAL;
+	struct ttm_mem_type_manager *man;
+
+	if (type >= TTM_NUM_MEM_TYPES) {
+		printk(KERN_ERR TTM_PFX "Illegal memory type %d\n", type);
+		return ret;
+	}
+
+	man = &bdev->man[type];
+	if (man->has_type) {
+		printk(KERN_ERR TTM_PFX
+		       "Memory manager already initialized for type %d\n",
+		       type);
+		return ret;
+	}
+
+	ret = bdev->driver->init_mem_type(bdev, type, man);
+	if (ret)
+		return ret;
+
+	ret = 0;
+	if (type != TTM_PL_SYSTEM) {
+		if (!p_size) {
+			printk(KERN_ERR TTM_PFX
+			       "Zero size memory manager type %d\n",
+			       type);
+			return ret;
+		}
+		ret = drm_mm_init(&man->manager, p_offset, p_size);
+		if (ret)
+			return ret;
+	}
+	man->has_type = true;
+	man->use_type = true;
+	man->size = p_size;
+
+	INIT_LIST_HEAD(&man->lru);
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_init_mm);
+
+int ttm_bo_device_release(struct ttm_bo_device *bdev)
+{
+	int ret = 0;
+	unsigned i = TTM_NUM_MEM_TYPES;
+	struct ttm_mem_type_manager *man;
+
+	while (i--) {
+		man = &bdev->man[i];
+		if (man->has_type) {
+			man->use_type = false;
+			if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
+				ret = -EBUSY;
+				printk(KERN_ERR TTM_PFX
+				       "DRM memory manager type %d "
+				       "is not clean.\n", i);
+			}
+			man->has_type = false;
+		}
+	}
+
+	if (!cancel_delayed_work(&bdev->wq))
+		flush_scheduled_work();
+
+	while (ttm_bo_delayed_delete(bdev, true))
+		;
+
+	spin_lock(&bdev->lru_lock);
+	if (list_empty(&bdev->ddestroy))
+		TTM_DEBUG("Delayed destroy list was clean\n");
+
+	if (list_empty(&bdev->man[0].lru))
+		TTM_DEBUG("Swap list was clean\n");
+	spin_unlock(&bdev->lru_lock);
+
+	ttm_mem_unregister_shrink(bdev->mem_glob, &bdev->shrink);
+	BUG_ON(!drm_mm_clean(&bdev->addr_space_mm));
+	write_lock(&bdev->vm_lock);
+	drm_mm_takedown(&bdev->addr_space_mm);
+	write_unlock(&bdev->vm_lock);
+
+	__free_page(bdev->dummy_read_page);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_device_release);
+
+/*
+ * This function is intended to be called on drm driver load.
+ * If you decide to call it from firstopen, you must protect the call
+ * from a potentially racing ttm_bo_driver_finish in lastclose.
+ * (This may happen on X server restart).
+ */
+
+int ttm_bo_device_init(struct ttm_bo_device *bdev,
+		       struct ttm_mem_global *mem_glob,
+		       struct ttm_bo_driver *driver, uint64_t file_page_offset)
+{
+	int ret = -EINVAL;
+
+	bdev->dummy_read_page = NULL;
+	rwlock_init(&bdev->vm_lock);
+	spin_lock_init(&bdev->lru_lock);
+
+	bdev->driver = driver;
+	bdev->mem_glob = mem_glob;
+
+	memset(bdev->man, 0, sizeof(bdev->man));
+
+	bdev->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
+	if (unlikely(bdev->dummy_read_page == NULL)) {
+		ret = -ENOMEM;
+		goto out_err0;
+	}
+
+	/*
+	 * Initialize the system memory buffer type.
+	 * Other types need to be driver / IOCTL initialized.
+	 */
+	ret = ttm_bo_init_mm(bdev, TTM_PL_SYSTEM, 0, 0);
+	if (unlikely(ret != 0))
+		goto out_err1;
+
+	bdev->addr_space_rb = RB_ROOT;
+	ret = drm_mm_init(&bdev->addr_space_mm, file_page_offset, 0x10000000);
+	if (unlikely(ret != 0))
+		goto out_err2;
+
+	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
+	bdev->nice_mode = true;
+	INIT_LIST_HEAD(&bdev->ddestroy);
+	INIT_LIST_HEAD(&bdev->swap_lru);
+	bdev->dev_mapping = NULL;
+	ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout);
+	ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink);
+	if (unlikely(ret != 0)) {
+		printk(KERN_ERR TTM_PFX
+		       "Could not register buffer object swapout.\n");
+		goto out_err2;
+	}
+
+	bdev->ttm_bo_extra_size =
+		ttm_round_pot(sizeof(struct ttm_tt)) +
+		ttm_round_pot(sizeof(struct ttm_backend));
+
+	bdev->ttm_bo_size = bdev->ttm_bo_extra_size +
+		ttm_round_pot(sizeof(struct ttm_buffer_object));
+
+	return 0;
+out_err2:
+	ttm_bo_clean_mm(bdev, 0);
+out_err1:
+	__free_page(bdev->dummy_read_page);
+out_err0:
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_device_init);
+
+/*
+ * buffer object vm functions.
+ */
+
+bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+		if (mem->mem_type == TTM_PL_SYSTEM)
+			return false;
+
+		if (man->flags & TTM_MEMTYPE_FLAG_CMA)
+			return false;
+
+		if (mem->placement & TTM_PL_FLAG_CACHED)
+			return false;
+	}
+	return true;
+}
+
+int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
+		      struct ttm_mem_reg *mem,
+		      unsigned long *bus_base,
+		      unsigned long *bus_offset, unsigned long *bus_size)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	*bus_size = 0;
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+
+	if (ttm_mem_reg_is_pci(bdev, mem)) {
+		*bus_offset = mem->mm_node->start << PAGE_SHIFT;
+		*bus_size = mem->num_pages << PAGE_SHIFT;
+		*bus_base = man->io_offset;
+	}
+
+	return 0;
+}
+
+void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	loff_t offset = (loff_t) bo->addr_space_offset;
+	loff_t holelen = ((loff_t) bo->mem.num_pages) << PAGE_SHIFT;
+
+	if (!bdev->dev_mapping)
+		return;
+
+	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
+}
+
+static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct rb_node **cur = &bdev->addr_space_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct ttm_buffer_object *cur_bo;
+	unsigned long offset = bo->vm_node->start;
+	unsigned long cur_offset;
+
+	while (*cur) {
+		parent = *cur;
+		cur_bo = rb_entry(parent, struct ttm_buffer_object, vm_rb);
+		cur_offset = cur_bo->vm_node->start;
+		if (offset < cur_offset)
+			cur = &parent->rb_left;
+		else if (offset > cur_offset)
+			cur = &parent->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&bo->vm_rb, parent, cur);
+	rb_insert_color(&bo->vm_rb, &bdev->addr_space_rb);
+}
+
+/**
+ * ttm_bo_setup_vm:
+ *
+ * @bo: the buffer to allocate address space for
+ *
+ * Allocate address space in the drm device so that applications
+ * can mmap the buffer and access the contents. This only
+ * applies to ttm_bo_type_device objects as others are not
+ * placed in the drm device address space.
+ */
+
+static int ttm_bo_setup_vm(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	int ret;
+
+retry_pre_get:
+	ret = drm_mm_pre_get(&bdev->addr_space_mm);
+	if (unlikely(ret != 0))
+		return ret;
+
+	write_lock(&bdev->vm_lock);
+	bo->vm_node = drm_mm_search_free(&bdev->addr_space_mm,
+					 bo->mem.num_pages, 0, 0);
+
+	if (unlikely(bo->vm_node == NULL)) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	bo->vm_node = drm_mm_get_block_atomic(bo->vm_node,
+					      bo->mem.num_pages, 0);
+
+	if (unlikely(bo->vm_node == NULL)) {
+		write_unlock(&bdev->vm_lock);
+		goto retry_pre_get;
+	}
+
+	ttm_bo_vm_insert_rb(bo);
+	write_unlock(&bdev->vm_lock);
+	bo->addr_space_offset = ((uint64_t) bo->vm_node->start) << PAGE_SHIFT;
+
+	return 0;
+out_unlock:
+	write_unlock(&bdev->vm_lock);
+	return ret;
+}
+
+int ttm_bo_wait(struct ttm_buffer_object *bo,
+		bool lazy, bool interruptible, bool no_wait)
+{
+	struct ttm_bo_driver *driver = bo->bdev->driver;
+	void *sync_obj;
+	void *sync_obj_arg;
+	int ret = 0;
+
+	if (likely(bo->sync_obj == NULL))
+		return 0;
+
+	while (bo->sync_obj) {
+
+		if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) {
+			void *tmp_obj = bo->sync_obj;
+			bo->sync_obj = NULL;
+			clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+			spin_unlock(&bo->lock);
+			driver->sync_obj_unref(&tmp_obj);
+			spin_lock(&bo->lock);
+			continue;
+		}
+
+		if (no_wait)
+			return -EBUSY;
+
+		sync_obj = driver->sync_obj_ref(bo->sync_obj);
+		sync_obj_arg = bo->sync_obj_arg;
+		spin_unlock(&bo->lock);
+		ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
+					    lazy, interruptible);
+		if (unlikely(ret != 0)) {
+			driver->sync_obj_unref(&sync_obj);
+			spin_lock(&bo->lock);
+			return ret;
+		}
+		spin_lock(&bo->lock);
+		if (likely(bo->sync_obj == sync_obj &&
+			   bo->sync_obj_arg == sync_obj_arg)) {
+			void *tmp_obj = bo->sync_obj;
+			bo->sync_obj = NULL;
+			clear_bit(TTM_BO_PRIV_FLAG_MOVING,
+				  &bo->priv_flags);
+			spin_unlock(&bo->lock);
+			driver->sync_obj_unref(&sync_obj);
+			driver->sync_obj_unref(&tmp_obj);
+			spin_lock(&bo->lock);
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_wait);
+
+void ttm_bo_unblock_reservation(struct ttm_buffer_object *bo)
+{
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+}
+
+int ttm_bo_block_reservation(struct ttm_buffer_object *bo, bool interruptible,
+			     bool no_wait)
+{
+	int ret;
+
+	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
+		if (no_wait)
+			return -EBUSY;
+		else if (interruptible) {
+			ret = wait_event_interruptible
+			    (bo->event_queue, atomic_read(&bo->reserved) == 0);
+			if (unlikely(ret != 0))
+				return -ERESTART;
+		} else {
+			wait_event(bo->event_queue,
+				   atomic_read(&bo->reserved) == 0);
+		}
+	}
+	return 0;
+}
+
+int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
+{
+	int ret = 0;
+
+	/*
+	 * Using ttm_bo_reserve instead of ttm_bo_block_reservation
+	 * makes sure the lru lists are updated.
+	 */
+
+	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
+	if (unlikely(ret != 0))
+		return ret;
+	spin_lock(&bo->lock);
+	ret = ttm_bo_wait(bo, false, true, no_wait);
+	spin_unlock(&bo->lock);
+	if (likely(ret == 0))
+		atomic_inc(&bo->cpu_writers);
+	ttm_bo_unreserve(bo);
+	return ret;
+}
+
+void ttm_bo_synccpu_write_release(struct ttm_buffer_object *bo)
+{
+	if (atomic_dec_and_test(&bo->cpu_writers))
+		wake_up_all(&bo->event_queue);
+}
+
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the bo_global::swap_lru list.
+ */
+
+static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
+{
+	struct ttm_bo_device *bdev =
+	    container_of(shrink, struct ttm_bo_device, shrink);
+	struct ttm_buffer_object *bo;
+	int ret = -EBUSY;
+	int put_count;
+	uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
+
+	spin_lock(&bdev->lru_lock);
+	while (ret == -EBUSY) {
+		if (unlikely(list_empty(&bdev->swap_lru))) {
+			spin_unlock(&bdev->lru_lock);
+			return -EBUSY;
+		}
+
+		bo = list_first_entry(&bdev->swap_lru,
+				      struct ttm_buffer_object, swap);
+		kref_get(&bo->list_kref);
+
+		/**
+		 * Reserve buffer. Since we unlock while sleeping, we need
+		 * to re-check that nobody removed us from the swap-list while
+		 * we slept.
+		 */
+
+		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+		if (unlikely(ret == -EBUSY)) {
+			spin_unlock(&bdev->lru_lock);
+			ttm_bo_wait_unreserved(bo, false);
+			kref_put(&bo->list_kref, ttm_bo_release_list);
+			spin_lock(&bdev->lru_lock);
+		}
+	}
+
+	BUG_ON(ret != 0);
+	put_count = ttm_bo_del_from_lru(bo);
+	spin_unlock(&bdev->lru_lock);
+
+	while (put_count--)
+		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+
+	/**
+	 * Wait for GPU, then move to system cached.
+	 */
+
+	spin_lock(&bo->lock);
+	ret = ttm_bo_wait(bo, false, false, false);
+	spin_unlock(&bo->lock);
+
+	if (unlikely(ret != 0))
+		goto out;
+
+	if ((bo->mem.placement & swap_placement) != swap_placement) {
+		struct ttm_mem_reg evict_mem;
+
+		evict_mem = bo->mem;
+		evict_mem.mm_node = NULL;
+		evict_mem.placement = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED;
+		evict_mem.mem_type = TTM_PL_SYSTEM;
+
+		ret = ttm_bo_handle_move_mem(bo, &evict_mem, true,
+					     false, false);
+		if (unlikely(ret != 0))
+			goto out;
+	}
+
+	ttm_bo_unmap_virtual(bo);
+
+	/**
+	 * Swap out. Buffer will be swapped in again as soon as
+	 * anyone tries to access a ttm page.
+	 */
+
+	ret = ttm_tt_swapout(bo->ttm, bo->persistant_swap_storage);
+out:
+
+	/**
+	 *
+	 * Unreserve without putting on LRU to avoid swapping out an
+	 * already swapped buffer.
+	 */
+
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+	kref_put(&bo->list_kref, ttm_bo_release_list);
+	return ret;
+}
+
+void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
+{
+	while (ttm_bo_swapout(&bdev->shrink) == 0)
+		;
+}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
new file mode 100644
index 00000000000..517c8455963
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -0,0 +1,561 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_placement.h"
+#include <linux/io.h>
+#include <linux/highmem.h>
+#include <linux/wait.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/module.h>
+
+void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
+{
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	if (old_mem->mm_node) {
+		spin_lock(&bo->bdev->lru_lock);
+		drm_mm_put_block(old_mem->mm_node);
+		spin_unlock(&bo->bdev->lru_lock);
+	}
+	old_mem->mm_node = NULL;
+}
+
+int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
+		    bool evict, bool no_wait, struct ttm_mem_reg *new_mem)
+{
+	struct ttm_tt *ttm = bo->ttm;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	uint32_t save_flags = old_mem->placement;
+	int ret;
+
+	if (old_mem->mem_type != TTM_PL_SYSTEM) {
+		ttm_tt_unbind(ttm);
+		ttm_bo_free_old_node(bo);
+		ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM,
+				TTM_PL_MASK_MEM);
+		old_mem->mem_type = TTM_PL_SYSTEM;
+		save_flags = old_mem->placement;
+	}
+
+	ret = ttm_tt_set_placement_caching(ttm, new_mem->placement);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (new_mem->mem_type != TTM_PL_SYSTEM) {
+		ret = ttm_tt_bind(ttm, new_mem);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+	ttm_flag_masked(&save_flags, new_mem->placement, TTM_PL_MASK_MEMTYPE);
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_move_ttm);
+
+int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
+			void **virtual)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	unsigned long bus_offset;
+	unsigned long bus_size;
+	unsigned long bus_base;
+	int ret;
+	void *addr;
+
+	*virtual = NULL;
+	ret = ttm_bo_pci_offset(bdev, mem, &bus_base, &bus_offset, &bus_size);
+	if (ret || bus_size == 0)
+		return ret;
+
+	if (!(man->flags & TTM_MEMTYPE_FLAG_NEEDS_IOREMAP))
+		addr = (void *)(((u8 *) man->io_addr) + bus_offset);
+	else {
+		if (mem->placement & TTM_PL_FLAG_WC)
+			addr = ioremap_wc(bus_base + bus_offset, bus_size);
+		else
+			addr = ioremap_nocache(bus_base + bus_offset, bus_size);
+		if (!addr)
+			return -ENOMEM;
+	}
+	*virtual = addr;
+	return 0;
+}
+
+void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
+			 void *virtual)
+{
+	struct ttm_mem_type_manager *man;
+
+	man = &bdev->man[mem->mem_type];
+
+	if (virtual && (man->flags & TTM_MEMTYPE_FLAG_NEEDS_IOREMAP))
+		iounmap(virtual);
+}
+
+static int ttm_copy_io_page(void *dst, void *src, unsigned long page)
+{
+	uint32_t *dstP =
+	    (uint32_t *) ((unsigned long)dst + (page << PAGE_SHIFT));
+	uint32_t *srcP =
+	    (uint32_t *) ((unsigned long)src + (page << PAGE_SHIFT));
+
+	int i;
+	for (i = 0; i < PAGE_SIZE / sizeof(uint32_t); ++i)
+		iowrite32(ioread32(srcP++), dstP++);
+	return 0;
+}
+
+static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
+				unsigned long page)
+{
+	struct page *d = ttm_tt_get_page(ttm, page);
+	void *dst;
+
+	if (!d)
+		return -ENOMEM;
+
+	src = (void *)((unsigned long)src + (page << PAGE_SHIFT));
+	dst = kmap(d);
+	if (!dst)
+		return -ENOMEM;
+
+	memcpy_fromio(dst, src, PAGE_SIZE);
+	kunmap(d);
+	return 0;
+}
+
+static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
+				unsigned long page)
+{
+	struct page *s = ttm_tt_get_page(ttm, page);
+	void *src;
+
+	if (!s)
+		return -ENOMEM;
+
+	dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT));
+	src = kmap(s);
+	if (!src)
+		return -ENOMEM;
+
+	memcpy_toio(dst, src, PAGE_SIZE);
+	kunmap(s);
+	return 0;
+}
+
+int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
+		       bool evict, bool no_wait, struct ttm_mem_reg *new_mem)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
+	struct ttm_tt *ttm = bo->ttm;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg old_copy = *old_mem;
+	void *old_iomap;
+	void *new_iomap;
+	int ret;
+	uint32_t save_flags = old_mem->placement;
+	unsigned long i;
+	unsigned long page;
+	unsigned long add = 0;
+	int dir;
+
+	ret = ttm_mem_reg_ioremap(bdev, old_mem, &old_iomap);
+	if (ret)
+		return ret;
+	ret = ttm_mem_reg_ioremap(bdev, new_mem, &new_iomap);
+	if (ret)
+		goto out;
+
+	if (old_iomap == NULL && new_iomap == NULL)
+		goto out2;
+	if (old_iomap == NULL && ttm == NULL)
+		goto out2;
+
+	add = 0;
+	dir = 1;
+
+	if ((old_mem->mem_type == new_mem->mem_type) &&
+	    (new_mem->mm_node->start <
+	     old_mem->mm_node->start + old_mem->mm_node->size)) {
+		dir = -1;
+		add = new_mem->num_pages - 1;
+	}
+
+	for (i = 0; i < new_mem->num_pages; ++i) {
+		page = i * dir + add;
+		if (old_iomap == NULL)
+			ret = ttm_copy_ttm_io_page(ttm, new_iomap, page);
+		else if (new_iomap == NULL)
+			ret = ttm_copy_io_ttm_page(ttm, old_iomap, page);
+		else
+			ret = ttm_copy_io_page(new_iomap, old_iomap, page);
+		if (ret)
+			goto out1;
+	}
+	mb();
+out2:
+	ttm_bo_free_old_node(bo);
+
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+	ttm_flag_masked(&save_flags, new_mem->placement, TTM_PL_MASK_MEMTYPE);
+
+	if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (ttm != NULL)) {
+		ttm_tt_unbind(ttm);
+		ttm_tt_destroy(ttm);
+		bo->ttm = NULL;
+	}
+
+out1:
+	ttm_mem_reg_iounmap(bdev, new_mem, new_iomap);
+out:
+	ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_move_memcpy);
+
+static void ttm_transfered_destroy(struct ttm_buffer_object *bo)
+{
+	kfree(bo);
+}
+
+/**
+ * ttm_buffer_object_transfer
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @new_obj: A pointer to a pointer to a newly created ttm_buffer_object,
+ * holding the data of @bo with the old placement.
+ *
+ * This is a utility function that may be called after an accelerated move
+ * has been scheduled. A new buffer object is created as a placeholder for
+ * the old data while it's being copied. When that buffer object is idle,
+ * it can be destroyed, releasing the space of the old placement.
+ * Returns:
+ * !0: Failure.
+ */
+
+static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
+				      struct ttm_buffer_object **new_obj)
+{
+	struct ttm_buffer_object *fbo;
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
+
+	fbo = kzalloc(sizeof(*fbo), GFP_KERNEL);
+	if (!fbo)
+		return -ENOMEM;
+
+	*fbo = *bo;
+
+	/**
+	 * Fix up members that we shouldn't copy directly:
+	 * TODO: Explicit member copy would probably be better here.
+	 */
+
+	spin_lock_init(&fbo->lock);
+	init_waitqueue_head(&fbo->event_queue);
+	INIT_LIST_HEAD(&fbo->ddestroy);
+	INIT_LIST_HEAD(&fbo->lru);
+	INIT_LIST_HEAD(&fbo->swap);
+	fbo->vm_node = NULL;
+
+	fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
+	if (fbo->mem.mm_node)
+		fbo->mem.mm_node->private = (void *)fbo;
+	kref_init(&fbo->list_kref);
+	kref_init(&fbo->kref);
+	fbo->destroy = &ttm_transfered_destroy;
+
+	*new_obj = fbo;
+	return 0;
+}
+
+pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
+{
+#if defined(__i386__) || defined(__x86_64__)
+	if (caching_flags & TTM_PL_FLAG_WC)
+		tmp = pgprot_writecombine(tmp);
+	else if (boot_cpu_data.x86 > 3)
+		tmp = pgprot_noncached(tmp);
+
+#elif defined(__powerpc__)
+	if (!(caching_flags & TTM_PL_FLAG_CACHED)) {
+		pgprot_val(tmp) |= _PAGE_NO_CACHE;
+		if (caching_flags & TTM_PL_FLAG_UNCACHED)
+			pgprot_val(tmp) |= _PAGE_GUARDED;
+	}
+#endif
+#if defined(__ia64__)
+	if (caching_flags & TTM_PL_FLAG_WC)
+		tmp = pgprot_writecombine(tmp);
+	else
+		tmp = pgprot_noncached(tmp);
+#endif
+#if defined(__sparc__)
+	if (!(caching_flags & TTM_PL_FLAG_CACHED))
+		tmp = pgprot_noncached(tmp);
+#endif
+	return tmp;
+}
+
+static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
+			  unsigned long bus_base,
+			  unsigned long bus_offset,
+			  unsigned long bus_size,
+			  struct ttm_bo_kmap_obj *map)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_reg *mem = &bo->mem;
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	if (!(man->flags & TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)) {
+		map->bo_kmap_type = ttm_bo_map_premapped;
+		map->virtual = (void *)(((u8 *) man->io_addr) + bus_offset);
+	} else {
+		map->bo_kmap_type = ttm_bo_map_iomap;
+		if (mem->placement & TTM_PL_FLAG_WC)
+			map->virtual = ioremap_wc(bus_base + bus_offset,
+						  bus_size);
+		else
+			map->virtual = ioremap_nocache(bus_base + bus_offset,
+						       bus_size);
+	}
+	return (!map->virtual) ? -ENOMEM : 0;
+}
+
+static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
+			   unsigned long start_page,
+			   unsigned long num_pages,
+			   struct ttm_bo_kmap_obj *map)
+{
+	struct ttm_mem_reg *mem = &bo->mem; pgprot_t prot;
+	struct ttm_tt *ttm = bo->ttm;
+	struct page *d;
+	int i;
+
+	BUG_ON(!ttm);
+	if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) {
+		/*
+		 * We're mapping a single page, and the desired
+		 * page protection is consistent with the bo.
+		 */
+
+		map->bo_kmap_type = ttm_bo_map_kmap;
+		map->page = ttm_tt_get_page(ttm, start_page);
+		map->virtual = kmap(map->page);
+	} else {
+	    /*
+	     * Populate the part we're mapping;
+	     */
+		for (i = start_page; i < start_page + num_pages; ++i) {
+			d = ttm_tt_get_page(ttm, i);
+			if (!d)
+				return -ENOMEM;
+		}
+
+		/*
+		 * We need to use vmap to get the desired page protection
+		 * or to make the buffer object look contigous.
+		 */
+		prot = (mem->placement & TTM_PL_FLAG_CACHED) ?
+			PAGE_KERNEL :
+			ttm_io_prot(mem->placement, PAGE_KERNEL);
+		map->bo_kmap_type = ttm_bo_map_vmap;
+		map->virtual = vmap(ttm->pages + start_page, num_pages,
+				    0, prot);
+	}
+	return (!map->virtual) ? -ENOMEM : 0;
+}
+
+int ttm_bo_kmap(struct ttm_buffer_object *bo,
+		unsigned long start_page, unsigned long num_pages,
+		struct ttm_bo_kmap_obj *map)
+{
+	int ret;
+	unsigned long bus_base;
+	unsigned long bus_offset;
+	unsigned long bus_size;
+
+	BUG_ON(!list_empty(&bo->swap));
+	map->virtual = NULL;
+	if (num_pages > bo->num_pages)
+		return -EINVAL;
+	if (start_page > bo->num_pages)
+		return -EINVAL;
+#if 0
+	if (num_pages > 1 && !DRM_SUSER(DRM_CURPROC))
+		return -EPERM;
+#endif
+	ret = ttm_bo_pci_offset(bo->bdev, &bo->mem, &bus_base,
+				&bus_offset, &bus_size);
+	if (ret)
+		return ret;
+	if (bus_size == 0) {
+		return ttm_bo_kmap_ttm(bo, start_page, num_pages, map);
+	} else {
+		bus_offset += start_page << PAGE_SHIFT;
+		bus_size = num_pages << PAGE_SHIFT;
+		return ttm_bo_ioremap(bo, bus_base, bus_offset, bus_size, map);
+	}
+}
+EXPORT_SYMBOL(ttm_bo_kmap);
+
+void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
+{
+	if (!map->virtual)
+		return;
+	switch (map->bo_kmap_type) {
+	case ttm_bo_map_iomap:
+		iounmap(map->virtual);
+		break;
+	case ttm_bo_map_vmap:
+		vunmap(map->virtual);
+		break;
+	case ttm_bo_map_kmap:
+		kunmap(map->page);
+		break;
+	case ttm_bo_map_premapped:
+		break;
+	default:
+		BUG();
+	}
+	map->virtual = NULL;
+	map->page = NULL;
+}
+EXPORT_SYMBOL(ttm_bo_kunmap);
+
+int ttm_bo_pfn_prot(struct ttm_buffer_object *bo,
+		    unsigned long dst_offset,
+		    unsigned long *pfn, pgprot_t *prot)
+{
+	struct ttm_mem_reg *mem = &bo->mem;
+	struct ttm_bo_device *bdev = bo->bdev;
+	unsigned long bus_offset;
+	unsigned long bus_size;
+	unsigned long bus_base;
+	int ret;
+	ret = ttm_bo_pci_offset(bdev, mem, &bus_base, &bus_offset,
+			&bus_size);
+	if (ret)
+		return -EINVAL;
+	if (bus_size != 0)
+		*pfn = (bus_base + bus_offset + dst_offset) >> PAGE_SHIFT;
+	else
+		if (!bo->ttm)
+			return -EINVAL;
+		else
+			*pfn = page_to_pfn(ttm_tt_get_page(bo->ttm,
+							   dst_offset >>
+							   PAGE_SHIFT));
+	*prot = (mem->placement & TTM_PL_FLAG_CACHED) ?
+		PAGE_KERNEL : ttm_io_prot(mem->placement, PAGE_KERNEL);
+
+	return 0;
+}
+
+int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
+			      void *sync_obj,
+			      void *sync_obj_arg,
+			      bool evict, bool no_wait,
+			      struct ttm_mem_reg *new_mem)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
+	struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	int ret;
+	uint32_t save_flags = old_mem->placement;
+	struct ttm_buffer_object *ghost_obj;
+	void *tmp_obj = NULL;
+
+	spin_lock(&bo->lock);
+	if (bo->sync_obj) {
+		tmp_obj = bo->sync_obj;
+		bo->sync_obj = NULL;
+	}
+	bo->sync_obj = driver->sync_obj_ref(sync_obj);
+	bo->sync_obj_arg = sync_obj_arg;
+	if (evict) {
+		ret = ttm_bo_wait(bo, false, false, false);
+		spin_unlock(&bo->lock);
+		driver->sync_obj_unref(&bo->sync_obj);
+
+		if (ret)
+			return ret;
+
+		ttm_bo_free_old_node(bo);
+		if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
+		    (bo->ttm != NULL)) {
+			ttm_tt_unbind(bo->ttm);
+			ttm_tt_destroy(bo->ttm);
+			bo->ttm = NULL;
+		}
+	} else {
+		/**
+		 * This should help pipeline ordinary buffer moves.
+		 *
+		 * Hang old buffer memory on a new buffer object,
+		 * and leave it to be released when the GPU
+		 * operation has completed.
+		 */
+
+		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+		spin_unlock(&bo->lock);
+
+		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
+		if (ret)
+			return ret;
+
+		/**
+		 * If we're not moving to fixed memory, the TTM object
+		 * needs to stay alive. Otherwhise hang it on the ghost
+		 * bo to be unbound and destroyed.
+		 */
+
+		if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED))
+			ghost_obj->ttm = NULL;
+		else
+			bo->ttm = NULL;
+
+		ttm_bo_unreserve(ghost_obj);
+		ttm_bo_unref(&ghost_obj);
+	}
+
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+	ttm_flag_masked(&save_flags, new_mem->placement, TTM_PL_MASK_MEMTYPE);
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_move_accel_cleanup);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
new file mode 100644
index 00000000000..27b146c54fb
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -0,0 +1,454 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include <ttm/ttm_module.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <linux/mm.h>
+#include <linux/version.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#define TTM_BO_VM_NUM_PREFAULT 16
+
+static struct ttm_buffer_object *ttm_bo_vm_lookup_rb(struct ttm_bo_device *bdev,
+						     unsigned long page_start,
+						     unsigned long num_pages)
+{
+	struct rb_node *cur = bdev->addr_space_rb.rb_node;
+	unsigned long cur_offset;
+	struct ttm_buffer_object *bo;
+	struct ttm_buffer_object *best_bo = NULL;
+
+	while (likely(cur != NULL)) {
+		bo = rb_entry(cur, struct ttm_buffer_object, vm_rb);
+		cur_offset = bo->vm_node->start;
+		if (page_start >= cur_offset) {
+			cur = cur->rb_right;
+			best_bo = bo;
+			if (page_start == cur_offset)
+				break;
+		} else
+			cur = cur->rb_left;
+	}
+
+	if (unlikely(best_bo == NULL))
+		return NULL;
+
+	if (unlikely((best_bo->vm_node->start + best_bo->num_pages) <
+		     (page_start + num_pages)))
+		return NULL;
+
+	return best_bo;
+}
+
+static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+	    vma->vm_private_data;
+	struct ttm_bo_device *bdev = bo->bdev;
+	unsigned long bus_base;
+	unsigned long bus_offset;
+	unsigned long bus_size;
+	unsigned long page_offset;
+	unsigned long page_last;
+	unsigned long pfn;
+	struct ttm_tt *ttm = NULL;
+	struct page *page;
+	int ret;
+	int i;
+	bool is_iomem;
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	int retval = VM_FAULT_NOPAGE;
+
+	/*
+	 * Work around locking order reversal in fault / nopfn
+	 * between mmap_sem and bo_reserve: Perform a trylock operation
+	 * for reserve, and if it fails, retry the fault after scheduling.
+	 */
+
+	ret = ttm_bo_reserve(bo, true, true, false, 0);
+	if (unlikely(ret != 0)) {
+		if (ret == -EBUSY)
+			set_need_resched();
+		return VM_FAULT_NOPAGE;
+	}
+
+	/*
+	 * Wait for buffer data in transit, due to a pipelined
+	 * move.
+	 */
+
+	spin_lock(&bo->lock);
+	if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
+		ret = ttm_bo_wait(bo, false, true, false);
+		spin_unlock(&bo->lock);
+		if (unlikely(ret != 0)) {
+			retval = (ret != -ERESTART) ?
+			    VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
+			goto out_unlock;
+		}
+	} else
+		spin_unlock(&bo->lock);
+
+
+	ret = ttm_bo_pci_offset(bdev, &bo->mem, &bus_base, &bus_offset,
+				&bus_size);
+	if (unlikely(ret != 0)) {
+		retval = VM_FAULT_SIGBUS;
+		goto out_unlock;
+	}
+
+	is_iomem = (bus_size != 0);
+
+	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
+	    bo->vm_node->start - vma->vm_pgoff;
+	page_last = ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) +
+	    bo->vm_node->start - vma->vm_pgoff;
+
+	if (unlikely(page_offset >= bo->num_pages)) {
+		retval = VM_FAULT_SIGBUS;
+		goto out_unlock;
+	}
+
+	/*
+	 * Strictly, we're not allowed to modify vma->vm_page_prot here,
+	 * since the mmap_sem is only held in read mode. However, we
+	 * modify only the caching bits of vma->vm_page_prot and
+	 * consider those bits protected by
+	 * the bo->mutex, as we should be the only writers.
+	 * There shouldn't really be any readers of these bits except
+	 * within vm_insert_mixed()? fork?
+	 *
+	 * TODO: Add a list of vmas to the bo, and change the
+	 * vma->vm_page_prot when the object changes caching policy, with
+	 * the correct locks held.
+	 */
+
+	if (is_iomem) {
+		vma->vm_page_prot = ttm_io_prot(bo->mem.placement,
+						vma->vm_page_prot);
+	} else {
+		ttm = bo->ttm;
+		vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
+		    vm_get_page_prot(vma->vm_flags) :
+		    ttm_io_prot(bo->mem.placement, vma->vm_page_prot);
+	}
+
+	/*
+	 * Speculatively prefault a number of pages. Only error on
+	 * first page.
+	 */
+
+	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+
+		if (is_iomem)
+			pfn = ((bus_base + bus_offset) >> PAGE_SHIFT) +
+			    page_offset;
+		else {
+			page = ttm_tt_get_page(ttm, page_offset);
+			if (unlikely(!page && i == 0)) {
+				retval = VM_FAULT_OOM;
+				goto out_unlock;
+			} else if (unlikely(!page)) {
+				break;
+			}
+			pfn = page_to_pfn(page);
+		}
+
+		ret = vm_insert_mixed(vma, address, pfn);
+		/*
+		 * Somebody beat us to this PTE or prefaulting to
+		 * an already populated PTE, or prefaulting error.
+		 */
+
+		if (unlikely((ret == -EBUSY) || (ret != 0 && i > 0)))
+			break;
+		else if (unlikely(ret != 0)) {
+			retval =
+			    (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+			goto out_unlock;
+
+		}
+
+		address += PAGE_SIZE;
+		if (unlikely(++page_offset >= page_last))
+			break;
+	}
+
+out_unlock:
+	ttm_bo_unreserve(bo);
+	return retval;
+}
+
+static void ttm_bo_vm_open(struct vm_area_struct *vma)
+{
+	struct ttm_buffer_object *bo =
+	    (struct ttm_buffer_object *)vma->vm_private_data;
+
+	(void)ttm_bo_reference(bo);
+}
+
+static void ttm_bo_vm_close(struct vm_area_struct *vma)
+{
+	struct ttm_buffer_object *bo =
+	    (struct ttm_buffer_object *)vma->vm_private_data;
+
+	ttm_bo_unref(&bo);
+	vma->vm_private_data = NULL;
+}
+
+static struct vm_operations_struct ttm_bo_vm_ops = {
+	.fault = ttm_bo_vm_fault,
+	.open = ttm_bo_vm_open,
+	.close = ttm_bo_vm_close
+};
+
+int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+		struct ttm_bo_device *bdev)
+{
+	struct ttm_bo_driver *driver;
+	struct ttm_buffer_object *bo;
+	int ret;
+
+	read_lock(&bdev->vm_lock);
+	bo = ttm_bo_vm_lookup_rb(bdev, vma->vm_pgoff,
+				 (vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
+	if (likely(bo != NULL))
+		ttm_bo_reference(bo);
+	read_unlock(&bdev->vm_lock);
+
+	if (unlikely(bo == NULL)) {
+		printk(KERN_ERR TTM_PFX
+		       "Could not find buffer object to map.\n");
+		return -EINVAL;
+	}
+
+	driver = bo->bdev->driver;
+	if (unlikely(!driver->verify_access)) {
+		ret = -EPERM;
+		goto out_unref;
+	}
+	ret = driver->verify_access(bo, filp);
+	if (unlikely(ret != 0))
+		goto out_unref;
+
+	vma->vm_ops = &ttm_bo_vm_ops;
+
+	/*
+	 * Note: We're transferring the bo reference to
+	 * vma->vm_private_data here.
+	 */
+
+	vma->vm_private_data = bo;
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	return 0;
+out_unref:
+	ttm_bo_unref(&bo);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_mmap);
+
+int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
+{
+	if (vma->vm_pgoff != 0)
+		return -EACCES;
+
+	vma->vm_ops = &ttm_bo_vm_ops;
+	vma->vm_private_data = ttm_bo_reference(bo);
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	return 0;
+}
+EXPORT_SYMBOL(ttm_fbdev_mmap);
+
+
+ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
+		  const char __user *wbuf, char __user *rbuf, size_t count,
+		  loff_t *f_pos, bool write)
+{
+	struct ttm_buffer_object *bo;
+	struct ttm_bo_driver *driver;
+	struct ttm_bo_kmap_obj map;
+	unsigned long dev_offset = (*f_pos >> PAGE_SHIFT);
+	unsigned long kmap_offset;
+	unsigned long kmap_end;
+	unsigned long kmap_num;
+	size_t io_size;
+	unsigned int page_offset;
+	char *virtual;
+	int ret;
+	bool no_wait = false;
+	bool dummy;
+
+	read_lock(&bdev->vm_lock);
+	bo = ttm_bo_vm_lookup_rb(bdev, dev_offset, 1);
+	if (likely(bo != NULL))
+		ttm_bo_reference(bo);
+	read_unlock(&bdev->vm_lock);
+
+	if (unlikely(bo == NULL))
+		return -EFAULT;
+
+	driver = bo->bdev->driver;
+	if (unlikely(driver->verify_access)) {
+		ret = -EPERM;
+		goto out_unref;
+	}
+
+	ret = driver->verify_access(bo, filp);
+	if (unlikely(ret != 0))
+		goto out_unref;
+
+	kmap_offset = dev_offset - bo->vm_node->start;
+	if (unlikely(kmap_offset) >= bo->num_pages) {
+		ret = -EFBIG;
+		goto out_unref;
+	}
+
+	page_offset = *f_pos & ~PAGE_MASK;
+	io_size = bo->num_pages - kmap_offset;
+	io_size = (io_size << PAGE_SHIFT) - page_offset;
+	if (count < io_size)
+		io_size = count;
+
+	kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT;
+	kmap_num = kmap_end - kmap_offset + 1;
+
+	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
+
+	switch (ret) {
+	case 0:
+		break;
+	case -ERESTART:
+		ret = -EINTR;
+		goto out_unref;
+	case -EBUSY:
+		ret = -EAGAIN;
+		goto out_unref;
+	default:
+		goto out_unref;
+	}
+
+	ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
+	if (unlikely(ret != 0)) {
+		ttm_bo_unreserve(bo);
+		goto out_unref;
+	}
+
+	virtual = ttm_kmap_obj_virtual(&map, &dummy);
+	virtual += page_offset;
+
+	if (write)
+		ret = copy_from_user(virtual, wbuf, io_size);
+	else
+		ret = copy_to_user(rbuf, virtual, io_size);
+
+	ttm_bo_kunmap(&map);
+	ttm_bo_unreserve(bo);
+	ttm_bo_unref(&bo);
+
+	if (unlikely(ret != 0))
+		return -EFBIG;
+
+	*f_pos += io_size;
+
+	return io_size;
+out_unref:
+	ttm_bo_unref(&bo);
+	return ret;
+}
+
+ssize_t ttm_bo_fbdev_io(struct ttm_buffer_object *bo, const char __user *wbuf,
+			char __user *rbuf, size_t count, loff_t *f_pos,
+			bool write)
+{
+	struct ttm_bo_kmap_obj map;
+	unsigned long kmap_offset;
+	unsigned long kmap_end;
+	unsigned long kmap_num;
+	size_t io_size;
+	unsigned int page_offset;
+	char *virtual;
+	int ret;
+	bool no_wait = false;
+	bool dummy;
+
+	kmap_offset = (*f_pos >> PAGE_SHIFT);
+	if (unlikely(kmap_offset) >= bo->num_pages)
+		return -EFBIG;
+
+	page_offset = *f_pos & ~PAGE_MASK;
+	io_size = bo->num_pages - kmap_offset;
+	io_size = (io_size << PAGE_SHIFT) - page_offset;
+	if (count < io_size)
+		io_size = count;
+
+	kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT;
+	kmap_num = kmap_end - kmap_offset + 1;
+
+	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
+
+	switch (ret) {
+	case 0:
+		break;
+	case -ERESTART:
+		return -EINTR;
+	case -EBUSY:
+		return -EAGAIN;
+	default:
+		return ret;
+	}
+
+	ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
+	if (unlikely(ret != 0)) {
+		ttm_bo_unreserve(bo);
+		return ret;
+	}
+
+	virtual = ttm_kmap_obj_virtual(&map, &dummy);
+	virtual += page_offset;
+
+	if (write)
+		ret = copy_from_user(virtual, wbuf, io_size);
+	else
+		ret = copy_to_user(rbuf, virtual, io_size);
+
+	ttm_bo_kunmap(&map);
+	ttm_bo_unreserve(bo);
+	ttm_bo_unref(&bo);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	*f_pos += io_size;
+
+	return io_size;
+}
diff --git a/drivers/gpu/drm/ttm/ttm_global.c b/drivers/gpu/drm/ttm/ttm_global.c
new file mode 100644
index 00000000000..0b14eb1972b
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_global.c
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include "ttm/ttm_module.h"
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+struct ttm_global_item {
+	struct mutex mutex;
+	void *object;
+	int refcount;
+};
+
+static struct ttm_global_item glob[TTM_GLOBAL_NUM];
+
+void ttm_global_init(void)
+{
+	int i;
+
+	for (i = 0; i < TTM_GLOBAL_NUM; ++i) {
+		struct ttm_global_item *item = &glob[i];
+		mutex_init(&item->mutex);
+		item->object = NULL;
+		item->refcount = 0;
+	}
+}
+
+void ttm_global_release(void)
+{
+	int i;
+	for (i = 0; i < TTM_GLOBAL_NUM; ++i) {
+		struct ttm_global_item *item = &glob[i];
+		BUG_ON(item->object != NULL);
+		BUG_ON(item->refcount != 0);
+	}
+}
+
+int ttm_global_item_ref(struct ttm_global_reference *ref)
+{
+	int ret;
+	struct ttm_global_item *item = &glob[ref->global_type];
+	void *object;
+
+	mutex_lock(&item->mutex);
+	if (item->refcount == 0) {
+		item->object = kmalloc(ref->size, GFP_KERNEL);
+		if (unlikely(item->object == NULL)) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		ref->object = item->object;
+		ret = ref->init(ref);
+		if (unlikely(ret != 0))
+			goto out_err;
+
+		++item->refcount;
+	}
+	ref->object = item->object;
+	object = item->object;
+	mutex_unlock(&item->mutex);
+	return 0;
+out_err:
+	kfree(item->object);
+	mutex_unlock(&item->mutex);
+	item->object = NULL;
+	return ret;
+}
+EXPORT_SYMBOL(ttm_global_item_ref);
+
+void ttm_global_item_unref(struct ttm_global_reference *ref)
+{
+	struct ttm_global_item *item = &glob[ref->global_type];
+
+	mutex_lock(&item->mutex);
+	BUG_ON(item->refcount == 0);
+	BUG_ON(ref->object != item->object);
+	if (--item->refcount == 0) {
+		ref->release(ref);
+		kfree(item->object);
+		item->object = NULL;
+	}
+	mutex_unlock(&item->mutex);
+}
+EXPORT_SYMBOL(ttm_global_item_unref);
+
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
new file mode 100644
index 00000000000..87323d4ff68
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -0,0 +1,234 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "ttm/ttm_memory.h"
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#define TTM_PFX "[TTM] "
+#define TTM_MEMORY_ALLOC_RETRIES 4
+
+/**
+ * At this point we only support a single shrink callback.
+ * Extend this if needed, perhaps using a linked list of callbacks.
+ * Note that this function is reentrant:
+ * many threads may try to swap out at any given time.
+ */
+
+static void ttm_shrink(struct ttm_mem_global *glob, bool from_workqueue,
+		       uint64_t extra)
+{
+	int ret;
+	struct ttm_mem_shrink *shrink;
+	uint64_t target;
+	uint64_t total_target;
+
+	spin_lock(&glob->lock);
+	if (glob->shrink == NULL)
+		goto out;
+
+	if (from_workqueue) {
+		target = glob->swap_limit;
+		total_target = glob->total_memory_swap_limit;
+	} else if (capable(CAP_SYS_ADMIN)) {
+		total_target = glob->emer_total_memory;
+		target = glob->emer_memory;
+	} else {
+		total_target = glob->max_total_memory;
+		target = glob->max_memory;
+	}
+
+	total_target = (extra >= total_target) ? 0 : total_target - extra;
+	target = (extra >= target) ? 0 : target - extra;
+
+	while (glob->used_memory > target ||
+	       glob->used_total_memory > total_target) {
+		shrink = glob->shrink;
+		spin_unlock(&glob->lock);
+		ret = shrink->do_shrink(shrink);
+		spin_lock(&glob->lock);
+		if (unlikely(ret != 0))
+			goto out;
+	}
+out:
+	spin_unlock(&glob->lock);
+}
+
+static void ttm_shrink_work(struct work_struct *work)
+{
+	struct ttm_mem_global *glob =
+	    container_of(work, struct ttm_mem_global, work);
+
+	ttm_shrink(glob, true, 0ULL);
+}
+
+int ttm_mem_global_init(struct ttm_mem_global *glob)
+{
+	struct sysinfo si;
+	uint64_t mem;
+
+	spin_lock_init(&glob->lock);
+	glob->swap_queue = create_singlethread_workqueue("ttm_swap");
+	INIT_WORK(&glob->work, ttm_shrink_work);
+	init_waitqueue_head(&glob->queue);
+
+	si_meminfo(&si);
+
+	mem = si.totalram - si.totalhigh;
+	mem *= si.mem_unit;
+
+	glob->max_memory = mem >> 1;
+	glob->emer_memory = (mem >> 1) + (mem >> 2);
+	glob->swap_limit = glob->max_memory - (mem >> 3);
+	glob->used_memory = 0;
+	glob->used_total_memory = 0;
+	glob->shrink = NULL;
+
+	mem = si.totalram;
+	mem *= si.mem_unit;
+
+	glob->max_total_memory = mem >> 1;
+	glob->emer_total_memory = (mem >> 1) + (mem >> 2);
+
+	glob->total_memory_swap_limit = glob->max_total_memory - (mem >> 3);
+
+	printk(KERN_INFO TTM_PFX "TTM available graphics memory: %llu MiB\n",
+	       glob->max_total_memory >> 20);
+	printk(KERN_INFO TTM_PFX "TTM available object memory: %llu MiB\n",
+	       glob->max_memory >> 20);
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_mem_global_init);
+
+void ttm_mem_global_release(struct ttm_mem_global *glob)
+{
+	printk(KERN_INFO TTM_PFX "Used total memory is %llu bytes.\n",
+	       (unsigned long long)glob->used_total_memory);
+	flush_workqueue(glob->swap_queue);
+	destroy_workqueue(glob->swap_queue);
+	glob->swap_queue = NULL;
+}
+EXPORT_SYMBOL(ttm_mem_global_release);
+
+static inline void ttm_check_swapping(struct ttm_mem_global *glob)
+{
+	bool needs_swapping;
+
+	spin_lock(&glob->lock);
+	needs_swapping = (glob->used_memory > glob->swap_limit ||
+			  glob->used_total_memory >
+			  glob->total_memory_swap_limit);
+	spin_unlock(&glob->lock);
+
+	if (unlikely(needs_swapping))
+		(void)queue_work(glob->swap_queue, &glob->work);
+
+}
+
+void ttm_mem_global_free(struct ttm_mem_global *glob,
+			 uint64_t amount, bool himem)
+{
+	spin_lock(&glob->lock);
+	glob->used_total_memory -= amount;
+	if (!himem)
+		glob->used_memory -= amount;
+	wake_up_all(&glob->queue);
+	spin_unlock(&glob->lock);
+}
+
+static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
+				  uint64_t amount, bool himem, bool reserve)
+{
+	uint64_t limit;
+	uint64_t lomem_limit;
+	int ret = -ENOMEM;
+
+	spin_lock(&glob->lock);
+
+	if (capable(CAP_SYS_ADMIN)) {
+		limit = glob->emer_total_memory;
+		lomem_limit = glob->emer_memory;
+	} else {
+		limit = glob->max_total_memory;
+		lomem_limit = glob->max_memory;
+	}
+
+	if (unlikely(glob->used_total_memory + amount > limit))
+		goto out_unlock;
+	if (unlikely(!himem && glob->used_memory + amount > lomem_limit))
+		goto out_unlock;
+
+	if (reserve) {
+		glob->used_total_memory += amount;
+		if (!himem)
+			glob->used_memory += amount;
+	}
+	ret = 0;
+out_unlock:
+	spin_unlock(&glob->lock);
+	ttm_check_swapping(glob);
+
+	return ret;
+}
+
+int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
+			 bool no_wait, bool interruptible, bool himem)
+{
+	int count = TTM_MEMORY_ALLOC_RETRIES;
+
+	while (unlikely(ttm_mem_global_reserve(glob, memory, himem, true)
+			!= 0)) {
+		if (no_wait)
+			return -ENOMEM;
+		if (unlikely(count-- == 0))
+			return -ENOMEM;
+		ttm_shrink(glob, false, memory + (memory >> 2) + 16);
+	}
+
+	return 0;
+}
+
+size_t ttm_round_pot(size_t size)
+{
+	if ((size & (size - 1)) == 0)
+		return size;
+	else if (size > PAGE_SIZE)
+		return PAGE_ALIGN(size);
+	else {
+		size_t tmp_size = 4;
+
+		while (tmp_size < size)
+			tmp_size <<= 1;
+
+		return tmp_size;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
new file mode 100644
index 00000000000..59ce8191d58
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ * 	    Jerome Glisse
+ */
+#include <linux/module.h>
+#include <ttm/ttm_module.h>
+
+static int __init ttm_init(void)
+{
+	ttm_global_init();
+	return 0;
+}
+
+static void __exit ttm_exit(void)
+{
+	ttm_global_release();
+}
+
+module_init(ttm_init);
+module_exit(ttm_exit);
+
+MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse");
+MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)");
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
new file mode 100644
index 00000000000..c27ab3a877a
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -0,0 +1,635 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include "ttm/ttm_module.h"
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_placement.h"
+
+static int ttm_tt_swapin(struct ttm_tt *ttm);
+
+#if defined(CONFIG_X86)
+static void ttm_tt_clflush_page(struct page *page)
+{
+	uint8_t *page_virtual;
+	unsigned int i;
+
+	if (unlikely(page == NULL))
+		return;
+
+	page_virtual = kmap_atomic(page, KM_USER0);
+
+	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+		clflush(page_virtual + i);
+
+	kunmap_atomic(page_virtual, KM_USER0);
+}
+
+static void ttm_tt_cache_flush_clflush(struct page *pages[],
+				       unsigned long num_pages)
+{
+	unsigned long i;
+
+	mb();
+	for (i = 0; i < num_pages; ++i)
+		ttm_tt_clflush_page(*pages++);
+	mb();
+}
+#else
+static void ttm_tt_ipi_handler(void *null)
+{
+	;
+}
+#endif
+
+void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages)
+{
+
+#if defined(CONFIG_X86)
+	if (cpu_has_clflush) {
+		ttm_tt_cache_flush_clflush(pages, num_pages);
+		return;
+	}
+#else
+	if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0)
+		printk(KERN_ERR TTM_PFX
+		       "Timed out waiting for drm cache flush.\n");
+#endif
+}
+
+/**
+ * Allocates storage for pointers to the pages that back the ttm.
+ *
+ * Uses kmalloc if possible. Otherwise falls back to vmalloc.
+ */
+static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
+{
+	unsigned long size = ttm->num_pages * sizeof(*ttm->pages);
+	ttm->pages = NULL;
+
+	if (size <= PAGE_SIZE)
+		ttm->pages = kzalloc(size, GFP_KERNEL);
+
+	if (!ttm->pages) {
+		ttm->pages = vmalloc_user(size);
+		if (ttm->pages)
+			ttm->page_flags |= TTM_PAGE_FLAG_VMALLOC;
+	}
+}
+
+static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
+{
+	if (ttm->page_flags & TTM_PAGE_FLAG_VMALLOC) {
+		vfree(ttm->pages);
+		ttm->page_flags &= ~TTM_PAGE_FLAG_VMALLOC;
+	} else {
+		kfree(ttm->pages);
+	}
+	ttm->pages = NULL;
+}
+
+static struct page *ttm_tt_alloc_page(unsigned page_flags)
+{
+	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+		return alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+
+	return alloc_page(GFP_HIGHUSER);
+}
+
+static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
+{
+	int write;
+	int dirty;
+	struct page *page;
+	int i;
+	struct ttm_backend *be = ttm->be;
+
+	BUG_ON(!(ttm->page_flags & TTM_PAGE_FLAG_USER));
+	write = ((ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0);
+	dirty = ((ttm->page_flags & TTM_PAGE_FLAG_USER_DIRTY) != 0);
+
+	if (be)
+		be->func->clear(be);
+
+	for (i = 0; i < ttm->num_pages; ++i) {
+		page = ttm->pages[i];
+		if (page == NULL)
+			continue;
+
+		if (page == ttm->dummy_read_page) {
+			BUG_ON(write);
+			continue;
+		}
+
+		if (write && dirty && !PageReserved(page))
+			set_page_dirty_lock(page);
+
+		ttm->pages[i] = NULL;
+		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE, false);
+		put_page(page);
+	}
+	ttm->state = tt_unpopulated;
+	ttm->first_himem_page = ttm->num_pages;
+	ttm->last_lomem_page = -1;
+}
+
+static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
+{
+	struct page *p;
+	struct ttm_bo_device *bdev = ttm->bdev;
+	struct ttm_mem_global *mem_glob = bdev->mem_glob;
+	int ret;
+
+	while (NULL == (p = ttm->pages[index])) {
+		p = ttm_tt_alloc_page(ttm->page_flags);
+
+		if (!p)
+			return NULL;
+
+		if (PageHighMem(p)) {
+			ret =
+			    ttm_mem_global_alloc(mem_glob, PAGE_SIZE,
+						 false, false, true);
+			if (unlikely(ret != 0))
+				goto out_err;
+			ttm->pages[--ttm->first_himem_page] = p;
+		} else {
+			ret =
+			    ttm_mem_global_alloc(mem_glob, PAGE_SIZE,
+						 false, false, false);
+			if (unlikely(ret != 0))
+				goto out_err;
+			ttm->pages[++ttm->last_lomem_page] = p;
+		}
+	}
+	return p;
+out_err:
+	put_page(p);
+	return NULL;
+}
+
+struct page *ttm_tt_get_page(struct ttm_tt *ttm, int index)
+{
+	int ret;
+
+	if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
+		ret = ttm_tt_swapin(ttm);
+		if (unlikely(ret != 0))
+			return NULL;
+	}
+	return __ttm_tt_get_page(ttm, index);
+}
+
+int ttm_tt_populate(struct ttm_tt *ttm)
+{
+	struct page *page;
+	unsigned long i;
+	struct ttm_backend *be;
+	int ret;
+
+	if (ttm->state != tt_unpopulated)
+		return 0;
+
+	if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
+		ret = ttm_tt_swapin(ttm);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	be = ttm->be;
+
+	for (i = 0; i < ttm->num_pages; ++i) {
+		page = __ttm_tt_get_page(ttm, i);
+		if (!page)
+			return -ENOMEM;
+	}
+
+	be->func->populate(be, ttm->num_pages, ttm->pages,
+			   ttm->dummy_read_page);
+	ttm->state = tt_unbound;
+	return 0;
+}
+
+#ifdef CONFIG_X86
+static inline int ttm_tt_set_page_caching(struct page *p,
+					  enum ttm_caching_state c_state)
+{
+	if (PageHighMem(p))
+		return 0;
+
+	switch (c_state) {
+	case tt_cached:
+		return set_pages_wb(p, 1);
+	case tt_wc:
+	    return set_memory_wc((unsigned long) page_address(p), 1);
+	default:
+		return set_pages_uc(p, 1);
+	}
+}
+#else /* CONFIG_X86 */
+static inline int ttm_tt_set_page_caching(struct page *p,
+					  enum ttm_caching_state c_state)
+{
+	return 0;
+}
+#endif /* CONFIG_X86 */
+
+/*
+ * Change caching policy for the linear kernel map
+ * for range of pages in a ttm.
+ */
+
+static int ttm_tt_set_caching(struct ttm_tt *ttm,
+			      enum ttm_caching_state c_state)
+{
+	int i, j;
+	struct page *cur_page;
+	int ret;
+
+	if (ttm->caching_state == c_state)
+		return 0;
+
+	if (c_state != tt_cached) {
+		ret = ttm_tt_populate(ttm);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	if (ttm->caching_state == tt_cached)
+		ttm_tt_cache_flush(ttm->pages, ttm->num_pages);
+
+	for (i = 0; i < ttm->num_pages; ++i) {
+		cur_page = ttm->pages[i];
+		if (likely(cur_page != NULL)) {
+			ret = ttm_tt_set_page_caching(cur_page, c_state);
+			if (unlikely(ret != 0))
+				goto out_err;
+		}
+	}
+
+	ttm->caching_state = c_state;
+
+	return 0;
+
+out_err:
+	for (j = 0; j < i; ++j) {
+		cur_page = ttm->pages[j];
+		if (likely(cur_page != NULL)) {
+			(void)ttm_tt_set_page_caching(cur_page,
+						      ttm->caching_state);
+		}
+	}
+
+	return ret;
+}
+
+int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
+{
+	enum ttm_caching_state state;
+
+	if (placement & TTM_PL_FLAG_WC)
+		state = tt_wc;
+	else if (placement & TTM_PL_FLAG_UNCACHED)
+		state = tt_uncached;
+	else
+		state = tt_cached;
+
+	return ttm_tt_set_caching(ttm, state);
+}
+
+static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
+{
+	int i;
+	struct page *cur_page;
+	struct ttm_backend *be = ttm->be;
+
+	if (be)
+		be->func->clear(be);
+	(void)ttm_tt_set_caching(ttm, tt_cached);
+	for (i = 0; i < ttm->num_pages; ++i) {
+		cur_page = ttm->pages[i];
+		ttm->pages[i] = NULL;
+		if (cur_page) {
+			if (page_count(cur_page) != 1)
+				printk(KERN_ERR TTM_PFX
+				       "Erroneous page count. "
+				       "Leaking pages.\n");
+			ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE,
+					    PageHighMem(cur_page));
+			__free_page(cur_page);
+		}
+	}
+	ttm->state = tt_unpopulated;
+	ttm->first_himem_page = ttm->num_pages;
+	ttm->last_lomem_page = -1;
+}
+
+void ttm_tt_destroy(struct ttm_tt *ttm)
+{
+	struct ttm_backend *be;
+
+	if (unlikely(ttm == NULL))
+		return;
+
+	be = ttm->be;
+	if (likely(be != NULL)) {
+		be->func->destroy(be);
+		ttm->be = NULL;
+	}
+
+	if (likely(ttm->pages != NULL)) {
+		if (ttm->page_flags & TTM_PAGE_FLAG_USER)
+			ttm_tt_free_user_pages(ttm);
+		else
+			ttm_tt_free_alloced_pages(ttm);
+
+		ttm_tt_free_page_directory(ttm);
+	}
+
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTANT_SWAP) &&
+	    ttm->swap_storage)
+		fput(ttm->swap_storage);
+
+	kfree(ttm);
+}
+
+int ttm_tt_set_user(struct ttm_tt *ttm,
+		    struct task_struct *tsk,
+		    unsigned long start, unsigned long num_pages)
+{
+	struct mm_struct *mm = tsk->mm;
+	int ret;
+	int write = (ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0;
+	struct ttm_mem_global *mem_glob = ttm->bdev->mem_glob;
+
+	BUG_ON(num_pages != ttm->num_pages);
+	BUG_ON((ttm->page_flags & TTM_PAGE_FLAG_USER) == 0);
+
+	/**
+	 * Account user pages as lowmem pages for now.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, num_pages * PAGE_SIZE,
+				   false, false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	down_read(&mm->mmap_sem);
+	ret = get_user_pages(tsk, mm, start, num_pages,
+			     write, 0, ttm->pages, NULL);
+	up_read(&mm->mmap_sem);
+
+	if (ret != num_pages && write) {
+		ttm_tt_free_user_pages(ttm);
+		ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE, false);
+		return -ENOMEM;
+	}
+
+	ttm->tsk = tsk;
+	ttm->start = start;
+	ttm->state = tt_unbound;
+
+	return 0;
+}
+
+struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
+			     uint32_t page_flags, struct page *dummy_read_page)
+{
+	struct ttm_bo_driver *bo_driver = bdev->driver;
+	struct ttm_tt *ttm;
+
+	if (!bo_driver)
+		return NULL;
+
+	ttm = kzalloc(sizeof(*ttm), GFP_KERNEL);
+	if (!ttm)
+		return NULL;
+
+	ttm->bdev = bdev;
+
+	ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	ttm->first_himem_page = ttm->num_pages;
+	ttm->last_lomem_page = -1;
+	ttm->caching_state = tt_cached;
+	ttm->page_flags = page_flags;
+
+	ttm->dummy_read_page = dummy_read_page;
+
+	ttm_tt_alloc_page_directory(ttm);
+	if (!ttm->pages) {
+		ttm_tt_destroy(ttm);
+		printk(KERN_ERR TTM_PFX "Failed allocating page table\n");
+		return NULL;
+	}
+	ttm->be = bo_driver->create_ttm_backend_entry(bdev);
+	if (!ttm->be) {
+		ttm_tt_destroy(ttm);
+		printk(KERN_ERR TTM_PFX "Failed creating ttm backend entry\n");
+		return NULL;
+	}
+	ttm->state = tt_unpopulated;
+	return ttm;
+}
+
+void ttm_tt_unbind(struct ttm_tt *ttm)
+{
+	int ret;
+	struct ttm_backend *be = ttm->be;
+
+	if (ttm->state == tt_bound) {
+		ret = be->func->unbind(be);
+		BUG_ON(ret);
+		ttm->state = tt_unbound;
+	}
+}
+
+int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+{
+	int ret = 0;
+	struct ttm_backend *be;
+
+	if (!ttm)
+		return -EINVAL;
+
+	if (ttm->state == tt_bound)
+		return 0;
+
+	be = ttm->be;
+
+	ret = ttm_tt_populate(ttm);
+	if (ret)
+		return ret;
+
+	ret = be->func->bind(be, bo_mem);
+	if (ret) {
+		printk(KERN_ERR TTM_PFX "Couldn't bind backend.\n");
+		return ret;
+	}
+
+	ttm->state = tt_bound;
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_USER)
+		ttm->page_flags |= TTM_PAGE_FLAG_USER_DIRTY;
+	return 0;
+}
+EXPORT_SYMBOL(ttm_tt_bind);
+
+static int ttm_tt_swapin(struct ttm_tt *ttm)
+{
+	struct address_space *swap_space;
+	struct file *swap_storage;
+	struct page *from_page;
+	struct page *to_page;
+	void *from_virtual;
+	void *to_virtual;
+	int i;
+	int ret;
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
+		ret = ttm_tt_set_user(ttm, ttm->tsk, ttm->start,
+				      ttm->num_pages);
+		if (unlikely(ret != 0))
+			return ret;
+
+		ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED;
+		return 0;
+	}
+
+	swap_storage = ttm->swap_storage;
+	BUG_ON(swap_storage == NULL);
+
+	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;
+
+	for (i = 0; i < ttm->num_pages; ++i) {
+		from_page = read_mapping_page(swap_space, i, NULL);
+		if (IS_ERR(from_page))
+			goto out_err;
+		to_page = __ttm_tt_get_page(ttm, i);
+		if (unlikely(to_page == NULL))
+			goto out_err;
+
+		preempt_disable();
+		from_virtual = kmap_atomic(from_page, KM_USER0);
+		to_virtual = kmap_atomic(to_page, KM_USER1);
+		memcpy(to_virtual, from_virtual, PAGE_SIZE);
+		kunmap_atomic(to_virtual, KM_USER1);
+		kunmap_atomic(from_virtual, KM_USER0);
+		preempt_enable();
+		page_cache_release(from_page);
+	}
+
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTANT_SWAP))
+		fput(swap_storage);
+	ttm->swap_storage = NULL;
+	ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED;
+
+	return 0;
+out_err:
+	ttm_tt_free_alloced_pages(ttm);
+	return -ENOMEM;
+}
+
+int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage)
+{
+	struct address_space *swap_space;
+	struct file *swap_storage;
+	struct page *from_page;
+	struct page *to_page;
+	void *from_virtual;
+	void *to_virtual;
+	int i;
+
+	BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated);
+	BUG_ON(ttm->caching_state != tt_cached);
+
+	/*
+	 * For user buffers, just unpin the pages, as there should be
+	 * vma references.
+	 */
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
+		ttm_tt_free_user_pages(ttm);
+		ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
+		ttm->swap_storage = NULL;
+		return 0;
+	}
+
+	if (!persistant_swap_storage) {
+		swap_storage = shmem_file_setup("ttm swap",
+						ttm->num_pages << PAGE_SHIFT,
+						0);
+		if (unlikely(IS_ERR(swap_storage))) {
+			printk(KERN_ERR "Failed allocating swap storage.\n");
+			return -ENOMEM;
+		}
+	} else
+		swap_storage = persistant_swap_storage;
+
+	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;
+
+	for (i = 0; i < ttm->num_pages; ++i) {
+		from_page = ttm->pages[i];
+		if (unlikely(from_page == NULL))
+			continue;
+		to_page = read_mapping_page(swap_space, i, NULL);
+		if (unlikely(to_page == NULL))
+			goto out_err;
+
+		preempt_disable();
+		from_virtual = kmap_atomic(from_page, KM_USER0);
+		to_virtual = kmap_atomic(to_page, KM_USER1);
+		memcpy(to_virtual, from_virtual, PAGE_SIZE);
+		kunmap_atomic(to_virtual, KM_USER1);
+		kunmap_atomic(from_virtual, KM_USER0);
+		preempt_enable();
+		set_page_dirty(to_page);
+		mark_page_accessed(to_page);
+		page_cache_release(to_page);
+	}
+
+	ttm_tt_free_alloced_pages(ttm);
+	ttm->swap_storage = swap_storage;
+	ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
+	if (persistant_swap_storage)
+		ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP;
+
+	return 0;
+out_err:
+	if (!persistant_swap_storage)
+		fput(swap_storage);
+
+	return -ENOMEM;
+}
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
new file mode 100644
index 00000000000..cd22ab4b495
--- /dev/null
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -0,0 +1,618 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#ifndef _TTM_BO_API_H_
+#define _TTM_BO_API_H_
+
+#include "drm_hashtab.h"
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/bitmap.h>
+
+struct ttm_bo_device;
+
+struct drm_mm_node;
+
+/**
+ * struct ttm_mem_reg
+ *
+ * @mm_node: Memory manager node.
+ * @size: Requested size of memory region.
+ * @num_pages: Actual size of memory region in pages.
+ * @page_alignment: Page alignment.
+ * @placement: Placement flags.
+ *
+ * Structure indicating the placement and space resources used by a
+ * buffer object.
+ */
+
+struct ttm_mem_reg {
+	struct drm_mm_node *mm_node;
+	unsigned long size;
+	unsigned long num_pages;
+	uint32_t page_alignment;
+	uint32_t mem_type;
+	uint32_t placement;
+};
+
+/**
+ * enum ttm_bo_type
+ *
+ * @ttm_bo_type_device:	These are 'normal' buffers that can
+ * be mmapped by user space. Each of these bos occupy a slot in the
+ * device address space, that can be used for normal vm operations.
+ *
+ * @ttm_bo_type_user: These are user-space memory areas that are made
+ * available to the GPU by mapping the buffer pages into the GPU aperture
+ * space. These buffers cannot be mmaped from the device address space.
+ *
+ * @ttm_bo_type_kernel: These buffers are like ttm_bo_type_device buffers,
+ * but they cannot be accessed from user-space. For kernel-only use.
+ */
+
+enum ttm_bo_type {
+	ttm_bo_type_device,
+	ttm_bo_type_user,
+	ttm_bo_type_kernel
+};
+
+struct ttm_tt;
+
+/**
+ * struct ttm_buffer_object
+ *
+ * @bdev: Pointer to the buffer object device structure.
+ * @buffer_start: The virtual user-space start address of ttm_bo_type_user
+ * buffers.
+ * @type: The bo type.
+ * @destroy: Destruction function. If NULL, kfree is used.
+ * @num_pages: Actual number of pages.
+ * @addr_space_offset: Address space offset.
+ * @acc_size: Accounted size for this object.
+ * @kref: Reference count of this buffer object. When this refcount reaches
+ * zero, the object is put on the delayed delete list.
+ * @list_kref: List reference count of this buffer object. This member is
+ * used to avoid destruction while the buffer object is still on a list.
+ * Lru lists may keep one refcount, the delayed delete list, and kref != 0
+ * keeps one refcount. When this refcount reaches zero,
+ * the object is destroyed.
+ * @event_queue: Queue for processes waiting on buffer object status change.
+ * @lock: spinlock protecting mostly synchronization members.
+ * @proposed_placement: Proposed placement for the buffer. Changed only by the
+ * creator prior to validation as opposed to bo->mem.proposed_flags which is
+ * changed by the implementation prior to a buffer move if it wants to outsmart
+ * the buffer creator / user. This latter happens, for example, at eviction.
+ * @mem: structure describing current placement.
+ * @persistant_swap_storage: Usually the swap storage is deleted for buffers
+ * pinned in physical memory. If this behaviour is not desired, this member
+ * holds a pointer to a persistant shmem object.
+ * @ttm: TTM structure holding system pages.
+ * @evicted: Whether the object was evicted without user-space knowing.
+ * @cpu_writes: For synchronization. Number of cpu writers.
+ * @lru: List head for the lru list.
+ * @ddestroy: List head for the delayed destroy list.
+ * @swap: List head for swap LRU list.
+ * @val_seq: Sequence of the validation holding the @reserved lock.
+ * Used to avoid starvation when many processes compete to validate the
+ * buffer. This member is protected by the bo_device::lru_lock.
+ * @seq_valid: The value of @val_seq is valid. This value is protected by
+ * the bo_device::lru_lock.
+ * @reserved: Deadlock-free lock used for synchronization state transitions.
+ * @sync_obj_arg: Opaque argument to synchronization object function.
+ * @sync_obj: Pointer to a synchronization object.
+ * @priv_flags: Flags describing buffer object internal state.
+ * @vm_rb: Rb node for the vm rb tree.
+ * @vm_node: Address space manager node.
+ * @offset: The current GPU offset, which can have different meanings
+ * depending on the memory type. For SYSTEM type memory, it should be 0.
+ * @cur_placement: Hint of current placement.
+ *
+ * Base class for TTM buffer object, that deals with data placement and CPU
+ * mappings. GPU mappings are really up to the driver, but for simpler GPUs
+ * the driver can usually use the placement offset @offset directly as the
+ * GPU virtual address. For drivers implementing multiple
+ * GPU memory manager contexts, the driver should manage the address space
+ * in these contexts separately and use these objects to get the correct
+ * placement and caching for these GPU maps. This makes it possible to use
+ * these objects for even quite elaborate memory management schemes.
+ * The destroy member, the API visibility of this object makes it possible
+ * to derive driver specific types.
+ */
+
+struct ttm_buffer_object {
+	/**
+	 * Members constant at init.
+	 */
+
+	struct ttm_bo_device *bdev;
+	unsigned long buffer_start;
+	enum ttm_bo_type type;
+	void (*destroy) (struct ttm_buffer_object *);
+	unsigned long num_pages;
+	uint64_t addr_space_offset;
+	size_t acc_size;
+
+	/**
+	* Members not needing protection.
+	*/
+
+	struct kref kref;
+	struct kref list_kref;
+	wait_queue_head_t event_queue;
+	spinlock_t lock;
+
+	/**
+	 * Members protected by the bo::reserved lock.
+	 */
+
+	uint32_t proposed_placement;
+	struct ttm_mem_reg mem;
+	struct file *persistant_swap_storage;
+	struct ttm_tt *ttm;
+	bool evicted;
+
+	/**
+	 * Members protected by the bo::reserved lock only when written to.
+	 */
+
+	atomic_t cpu_writers;
+
+	/**
+	 * Members protected by the bdev::lru_lock.
+	 */
+
+	struct list_head lru;
+	struct list_head ddestroy;
+	struct list_head swap;
+	uint32_t val_seq;
+	bool seq_valid;
+
+	/**
+	 * Members protected by the bdev::lru_lock
+	 * only when written to.
+	 */
+
+	atomic_t reserved;
+
+
+	/**
+	 * Members protected by the bo::lock
+	 */
+
+	void *sync_obj_arg;
+	void *sync_obj;
+	unsigned long priv_flags;
+
+	/**
+	 * Members protected by the bdev::vm_lock
+	 */
+
+	struct rb_node vm_rb;
+	struct drm_mm_node *vm_node;
+
+
+	/**
+	 * Special members that are protected by the reserve lock
+	 * and the bo::lock when written to. Can be read with
+	 * either of these locks held.
+	 */
+
+	unsigned long offset;
+	uint32_t cur_placement;
+};
+
+/**
+ * struct ttm_bo_kmap_obj
+ *
+ * @virtual: The current kernel virtual address.
+ * @page: The page when kmap'ing a single page.
+ * @bo_kmap_type: Type of bo_kmap.
+ *
+ * Object describing a kernel mapping. Since a TTM bo may be located
+ * in various memory types with various caching policies, the
+ * mapping can either be an ioremap, a vmap, a kmap or part of a
+ * premapped region.
+ */
+
+struct ttm_bo_kmap_obj {
+	void *virtual;
+	struct page *page;
+	enum {
+		ttm_bo_map_iomap,
+		ttm_bo_map_vmap,
+		ttm_bo_map_kmap,
+		ttm_bo_map_premapped,
+	} bo_kmap_type;
+};
+
+/**
+ * ttm_bo_reference - reference a struct ttm_buffer_object
+ *
+ * @bo: The buffer object.
+ *
+ * Returns a refcounted pointer to a buffer object.
+ */
+
+static inline struct ttm_buffer_object *
+ttm_bo_reference(struct ttm_buffer_object *bo)
+{
+	kref_get(&bo->kref);
+	return bo;
+}
+
+/**
+ * ttm_bo_wait - wait for buffer idle.
+ *
+ * @bo:  The buffer object.
+ * @interruptible:  Use interruptible wait.
+ * @no_wait:  Return immediately if buffer is busy.
+ *
+ * This function must be called with the bo::mutex held, and makes
+ * sure any previous rendering to the buffer is completed.
+ * Note: It might be necessary to block validations before the
+ * wait by reserving the buffer.
+ * Returns -EBUSY if no_wait is true and the buffer is busy.
+ * Returns -ERESTART if interrupted by a signal.
+ */
+extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy,
+		       bool interruptible, bool no_wait);
+/**
+ * ttm_buffer_object_validate
+ *
+ * @bo: The buffer object.
+ * @proposed_placement: Proposed_placement for the buffer object.
+ * @interruptible: Sleep interruptible if sleeping.
+ * @no_wait: Return immediately if the buffer is busy.
+ *
+ * Changes placement and caching policy of the buffer object
+ * according to bo::proposed_flags.
+ * Returns
+ * -EINVAL on invalid proposed_flags.
+ * -ENOMEM on out-of-memory condition.
+ * -EBUSY if no_wait is true and buffer busy.
+ * -ERESTART if interrupted by a signal.
+ */
+extern int ttm_buffer_object_validate(struct ttm_buffer_object *bo,
+				      uint32_t proposed_placement,
+				      bool interruptible, bool no_wait);
+/**
+ * ttm_bo_unref
+ *
+ * @bo: The buffer object.
+ *
+ * Unreference and clear a pointer to a buffer object.
+ */
+extern void ttm_bo_unref(struct ttm_buffer_object **bo);
+
+/**
+ * ttm_bo_synccpu_write_grab
+ *
+ * @bo: The buffer object:
+ * @no_wait: Return immediately if buffer is busy.
+ *
+ * Synchronizes a buffer object for CPU RW access. This means
+ * blocking command submission that affects the buffer and
+ * waiting for buffer idle. This lock is recursive.
+ * Returns
+ * -EBUSY if the buffer is busy and no_wait is true.
+ * -ERESTART if interrupted by a signal.
+ */
+
+extern int
+ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait);
+/**
+ * ttm_bo_synccpu_write_release:
+ *
+ * @bo : The buffer object.
+ *
+ * Releases a synccpu lock.
+ */
+extern void ttm_bo_synccpu_write_release(struct ttm_buffer_object *bo);
+
+/**
+ * ttm_buffer_object_init
+ *
+ * @bdev: Pointer to a ttm_bo_device struct.
+ * @bo: Pointer to a ttm_buffer_object to be initialized.
+ * @size: Requested size of buffer object.
+ * @type: Requested type of buffer object.
+ * @flags: Initial placement flags.
+ * @page_alignment: Data alignment in pages.
+ * @buffer_start: Virtual address of user space data backing a
+ * user buffer object.
+ * @interruptible: If needing to sleep to wait for GPU resources,
+ * sleep interruptible.
+ * @persistant_swap_storage: Usually the swap storage is deleted for buffers
+ * pinned in physical memory. If this behaviour is not desired, this member
+ * holds a pointer to a persistant shmem object. Typically, this would
+ * point to the shmem object backing a GEM object if TTM is used to back a
+ * GEM user interface.
+ * @acc_size: Accounted size for this object.
+ * @destroy: Destroy function. Use NULL for kfree().
+ *
+ * This function initializes a pre-allocated struct ttm_buffer_object.
+ * As this object may be part of a larger structure, this function,
+ * together with the @destroy function,
+ * enables driver-specific objects derived from a ttm_buffer_object.
+ * On successful return, the object kref and list_kref are set to 1.
+ * Returns
+ * -ENOMEM: Out of memory.
+ * -EINVAL: Invalid placement flags.
+ * -ERESTART: Interrupted by signal while sleeping waiting for resources.
+ */
+
+extern int ttm_buffer_object_init(struct ttm_bo_device *bdev,
+				  struct ttm_buffer_object *bo,
+				  unsigned long size,
+				  enum ttm_bo_type type,
+				  uint32_t flags,
+				  uint32_t page_alignment,
+				  unsigned long buffer_start,
+				  bool interrubtible,
+				  struct file *persistant_swap_storage,
+				  size_t acc_size,
+				  void (*destroy) (struct ttm_buffer_object *));
+/**
+ * ttm_bo_synccpu_object_init
+ *
+ * @bdev: Pointer to a ttm_bo_device struct.
+ * @bo: Pointer to a ttm_buffer_object to be initialized.
+ * @size: Requested size of buffer object.
+ * @type: Requested type of buffer object.
+ * @flags: Initial placement flags.
+ * @page_alignment: Data alignment in pages.
+ * @buffer_start: Virtual address of user space data backing a
+ * user buffer object.
+ * @interruptible: If needing to sleep while waiting for GPU resources,
+ * sleep interruptible.
+ * @persistant_swap_storage: Usually the swap storage is deleted for buffers
+ * pinned in physical memory. If this behaviour is not desired, this member
+ * holds a pointer to a persistant shmem object. Typically, this would
+ * point to the shmem object backing a GEM object if TTM is used to back a
+ * GEM user interface.
+ * @p_bo: On successful completion *p_bo points to the created object.
+ *
+ * This function allocates a ttm_buffer_object, and then calls
+ * ttm_buffer_object_init on that object.
+ * The destroy function is set to kfree().
+ * Returns
+ * -ENOMEM: Out of memory.
+ * -EINVAL: Invalid placement flags.
+ * -ERESTART: Interrupted by signal while waiting for resources.
+ */
+
+extern int ttm_buffer_object_create(struct ttm_bo_device *bdev,
+				    unsigned long size,
+				    enum ttm_bo_type type,
+				    uint32_t flags,
+				    uint32_t page_alignment,
+				    unsigned long buffer_start,
+				    bool interruptible,
+				    struct file *persistant_swap_storage,
+				    struct ttm_buffer_object **p_bo);
+
+/**
+ * ttm_bo_check_placement
+ *
+ * @bo: the buffer object.
+ * @set_flags: placement flags to set.
+ * @clr_flags: placement flags to clear.
+ *
+ * Performs minimal validity checking on an intended change of
+ * placement flags.
+ * Returns
+ * -EINVAL: Intended change is invalid or not allowed.
+ */
+
+extern int ttm_bo_check_placement(struct ttm_buffer_object *bo,
+				  uint32_t set_flags, uint32_t clr_flags);
+
+/**
+ * ttm_bo_init_mm
+ *
+ * @bdev: Pointer to a ttm_bo_device struct.
+ * @mem_type: The memory type.
+ * @p_offset: offset for managed area in pages.
+ * @p_size: size managed area in pages.
+ *
+ * Initialize a manager for a given memory type.
+ * Note: if part of driver firstopen, it must be protected from a
+ * potentially racing lastclose.
+ * Returns:
+ * -EINVAL: invalid size or memory type.
+ * -ENOMEM: Not enough memory.
+ * May also return driver-specified errors.
+ */
+
+extern int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
+			  unsigned long p_offset, unsigned long p_size);
+/**
+ * ttm_bo_clean_mm
+ *
+ * @bdev: Pointer to a ttm_bo_device struct.
+ * @mem_type: The memory type.
+ *
+ * Take down a manager for a given memory type after first walking
+ * the LRU list to evict any buffers left alive.
+ *
+ * Normally, this function is part of lastclose() or unload(), and at that
+ * point there shouldn't be any buffers left created by user-space, since
+ * there should've been removed by the file descriptor release() method.
+ * However, before this function is run, make sure to signal all sync objects,
+ * and verify that the delayed delete queue is empty. The driver must also
+ * make sure that there are no NO_EVICT buffers present in this memory type
+ * when the call is made.
+ *
+ * If this function is part of a VT switch, the caller must make sure that
+ * there are no appications currently validating buffers before this
+ * function is called. The caller can do that by first taking the
+ * struct ttm_bo_device::ttm_lock in write mode.
+ *
+ * Returns:
+ * -EINVAL: invalid or uninitialized memory type.
+ * -EBUSY: There are still buffers left in this memory type.
+ */
+
+extern int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type);
+
+/**
+ * ttm_bo_evict_mm
+ *
+ * @bdev: Pointer to a ttm_bo_device struct.
+ * @mem_type: The memory type.
+ *
+ * Evicts all buffers on the lru list of the memory type.
+ * This is normally part of a VT switch or an
+ * out-of-memory-space-due-to-fragmentation handler.
+ * The caller must make sure that there are no other processes
+ * currently validating buffers, and can do that by taking the
+ * struct ttm_bo_device::ttm_lock in write mode.
+ *
+ * Returns:
+ * -EINVAL: Invalid or uninitialized memory type.
+ * -ERESTART: The call was interrupted by a signal while waiting to
+ * evict a buffer.
+ */
+
+extern int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type);
+
+/**
+ * ttm_kmap_obj_virtual
+ *
+ * @map: A struct ttm_bo_kmap_obj returned from ttm_bo_kmap.
+ * @is_iomem: Pointer to an integer that on return indicates 1 if the
+ * virtual map is io memory, 0 if normal memory.
+ *
+ * Returns the virtual address of a buffer object area mapped by ttm_bo_kmap.
+ * If *is_iomem is 1 on return, the virtual address points to an io memory area,
+ * that should strictly be accessed by the iowriteXX() and similar functions.
+ */
+
+static inline void *ttm_kmap_obj_virtual(struct ttm_bo_kmap_obj *map,
+					 bool *is_iomem)
+{
+	*is_iomem = (map->bo_kmap_type == ttm_bo_map_iomap ||
+		     map->bo_kmap_type == ttm_bo_map_premapped);
+	return map->virtual;
+}
+
+/**
+ * ttm_bo_kmap
+ *
+ * @bo: The buffer object.
+ * @start_page: The first page to map.
+ * @num_pages: Number of pages to map.
+ * @map: pointer to a struct ttm_bo_kmap_obj representing the map.
+ *
+ * Sets up a kernel virtual mapping, using ioremap, vmap or kmap to the
+ * data in the buffer object. The ttm_kmap_obj_virtual function can then be
+ * used to obtain a virtual address to the data.
+ *
+ * Returns
+ * -ENOMEM: Out of memory.
+ * -EINVAL: Invalid range.
+ */
+
+extern int ttm_bo_kmap(struct ttm_buffer_object *bo, unsigned long start_page,
+		       unsigned long num_pages, struct ttm_bo_kmap_obj *map);
+
+/**
+ * ttm_bo_kunmap
+ *
+ * @map: Object describing the map to unmap.
+ *
+ * Unmaps a kernel map set up by ttm_bo_kmap.
+ */
+
+extern void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map);
+
+#if 0
+#endif
+
+/**
+ * ttm_fbdev_mmap - mmap fbdev memory backed by a ttm buffer object.
+ *
+ * @vma:       vma as input from the fbdev mmap method.
+ * @bo:        The bo backing the address space. The address space will
+ * have the same size as the bo, and start at offset 0.
+ *
+ * This function is intended to be called by the fbdev mmap method
+ * if the fbdev address space is to be backed by a bo.
+ */
+
+extern int ttm_fbdev_mmap(struct vm_area_struct *vma,
+			  struct ttm_buffer_object *bo);
+
+/**
+ * ttm_bo_mmap - mmap out of the ttm device address space.
+ *
+ * @filp:      filp as input from the mmap method.
+ * @vma:       vma as input from the mmap method.
+ * @bdev:      Pointer to the ttm_bo_device with the address space manager.
+ *
+ * This function is intended to be called by the device mmap method.
+ * if the device address space is to be backed by the bo manager.
+ */
+
+extern int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
+		       struct ttm_bo_device *bdev);
+
+/**
+ * ttm_bo_io
+ *
+ * @bdev:      Pointer to the struct ttm_bo_device.
+ * @filp:      Pointer to the struct file attempting to read / write.
+ * @wbuf:      User-space pointer to address of buffer to write. NULL on read.
+ * @rbuf:      User-space pointer to address of buffer to read into.
+ * Null on write.
+ * @count:     Number of bytes to read / write.
+ * @f_pos:     Pointer to current file position.
+ * @write:     1 for read, 0 for write.
+ *
+ * This function implements read / write into ttm buffer objects, and is
+ * intended to
+ * be called from the fops::read and fops::write method.
+ * Returns:
+ * See man (2) write, man(2) read. In particular,
+ * the function may return -EINTR if
+ * interrupted by a signal.
+ */
+
+extern ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
+			 const char __user *wbuf, char __user *rbuf,
+			 size_t count, loff_t *f_pos, bool write);
+
+extern void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
+
+#endif
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
new file mode 100644
index 00000000000..62ed733c52a
--- /dev/null
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -0,0 +1,867 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 Vmware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+#ifndef _TTM_BO_DRIVER_H_
+#define _TTM_BO_DRIVER_H_
+
+#include "ttm/ttm_bo_api.h"
+#include "ttm/ttm_memory.h"
+#include "drm_mm.h"
+#include "linux/workqueue.h"
+#include "linux/fs.h"
+#include "linux/spinlock.h"
+
+struct ttm_backend;
+
+struct ttm_backend_func {
+	/**
+	 * struct ttm_backend_func member populate
+	 *
+	 * @backend: Pointer to a struct ttm_backend.
+	 * @num_pages: Number of pages to populate.
+	 * @pages: Array of pointers to ttm pages.
+	 * @dummy_read_page: Page to be used instead of NULL pages in the
+	 * array @pages.
+	 *
+	 * Populate the backend with ttm pages. Depending on the backend,
+	 * it may or may not copy the @pages array.
+	 */
+	int (*populate) (struct ttm_backend *backend,
+			 unsigned long num_pages, struct page **pages,
+			 struct page *dummy_read_page);
+	/**
+	 * struct ttm_backend_func member clear
+	 *
+	 * @backend: Pointer to a struct ttm_backend.
+	 *
+	 * This is an "unpopulate" function. Release all resources
+	 * allocated with populate.
+	 */
+	void (*clear) (struct ttm_backend *backend);
+
+	/**
+	 * struct ttm_backend_func member bind
+	 *
+	 * @backend: Pointer to a struct ttm_backend.
+	 * @bo_mem: Pointer to a struct ttm_mem_reg describing the
+	 * memory type and location for binding.
+	 *
+	 * Bind the backend pages into the aperture in the location
+	 * indicated by @bo_mem. This function should be able to handle
+	 * differences between aperture- and system page sizes.
+	 */
+	int (*bind) (struct ttm_backend *backend, struct ttm_mem_reg *bo_mem);
+
+	/**
+	 * struct ttm_backend_func member unbind
+	 *
+	 * @backend: Pointer to a struct ttm_backend.
+	 *
+	 * Unbind previously bound backend pages. This function should be
+	 * able to handle differences between aperture- and system page sizes.
+	 */
+	int (*unbind) (struct ttm_backend *backend);
+
+	/**
+	 * struct ttm_backend_func member destroy
+	 *
+	 * @backend: Pointer to a struct ttm_backend.
+	 *
+	 * Destroy the backend.
+	 */
+	void (*destroy) (struct ttm_backend *backend);
+};
+
+/**
+ * struct ttm_backend
+ *
+ * @bdev: Pointer to a struct ttm_bo_device.
+ * @flags: For driver use.
+ * @func: Pointer to a struct ttm_backend_func that describes
+ * the backend methods.
+ *
+ */
+
+struct ttm_backend {
+	struct ttm_bo_device *bdev;
+	uint32_t flags;
+	struct ttm_backend_func *func;
+};
+
+#define TTM_PAGE_FLAG_VMALLOC         (1 << 0)
+#define TTM_PAGE_FLAG_USER            (1 << 1)
+#define TTM_PAGE_FLAG_USER_DIRTY      (1 << 2)
+#define TTM_PAGE_FLAG_WRITE           (1 << 3)
+#define TTM_PAGE_FLAG_SWAPPED         (1 << 4)
+#define TTM_PAGE_FLAG_PERSISTANT_SWAP (1 << 5)
+#define TTM_PAGE_FLAG_ZERO_ALLOC      (1 << 6)
+
+enum ttm_caching_state {
+	tt_uncached,
+	tt_wc,
+	tt_cached
+};
+
+/**
+ * struct ttm_tt
+ *
+ * @dummy_read_page: Page to map where the ttm_tt page array contains a NULL
+ * pointer.
+ * @pages: Array of pages backing the data.
+ * @first_himem_page: Himem pages are put last in the page array, which
+ * enables us to run caching attribute changes on only the first part
+ * of the page array containing lomem pages. This is the index of the
+ * first himem page.
+ * @last_lomem_page: Index of the last lomem page in the page array.
+ * @num_pages: Number of pages in the page array.
+ * @bdev: Pointer to the current struct ttm_bo_device.
+ * @be: Pointer to the ttm backend.
+ * @tsk: The task for user ttm.
+ * @start: virtual address for user ttm.
+ * @swap_storage: Pointer to shmem struct file for swap storage.
+ * @caching_state: The current caching state of the pages.
+ * @state: The current binding state of the pages.
+ *
+ * This is a structure holding the pages, caching- and aperture binding
+ * status for a buffer object that isn't backed by fixed (VRAM / AGP)
+ * memory.
+ */
+
+struct ttm_tt {
+	struct page *dummy_read_page;
+	struct page **pages;
+	long first_himem_page;
+	long last_lomem_page;
+	uint32_t page_flags;
+	unsigned long num_pages;
+	struct ttm_bo_device *bdev;
+	struct ttm_backend *be;
+	struct task_struct *tsk;
+	unsigned long start;
+	struct file *swap_storage;
+	enum ttm_caching_state caching_state;
+	enum {
+		tt_bound,
+		tt_unbound,
+		tt_unpopulated,
+	} state;
+};
+
+#define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
+#define TTM_MEMTYPE_FLAG_MAPPABLE      (1 << 1)	/* Memory mappable */
+#define TTM_MEMTYPE_FLAG_NEEDS_IOREMAP (1 << 2)	/* Fixed memory needs ioremap
+						   before kernel access. */
+#define TTM_MEMTYPE_FLAG_CMA           (1 << 3)	/* Can't map aperture */
+
+/**
+ * struct ttm_mem_type_manager
+ *
+ * @has_type: The memory type has been initialized.
+ * @use_type: The memory type is enabled.
+ * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory
+ * managed by this memory type.
+ * @gpu_offset: If used, the GPU offset of the first managed page of
+ * fixed memory or the first managed location in an aperture.
+ * @io_offset: The io_offset of the first managed page of IO memory or
+ * the first managed location in an aperture. For TTM_MEMTYPE_FLAG_CMA
+ * memory, this should be set to NULL.
+ * @io_size: The size of a managed IO region (fixed memory or aperture).
+ * @io_addr: Virtual kernel address if the io region is pre-mapped. For
+ * TTM_MEMTYPE_FLAG_NEEDS_IOREMAP there is no pre-mapped io map and
+ * @io_addr should be set to NULL.
+ * @size: Size of the managed region.
+ * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX,
+ * as defined in ttm_placement_common.h
+ * @default_caching: The default caching policy used for a buffer object
+ * placed in this memory type if the user doesn't provide one.
+ * @manager: The range manager used for this memory type. FIXME: If the aperture
+ * has a page size different from the underlying system, the granularity
+ * of this manager should take care of this. But the range allocating code
+ * in ttm_bo.c needs to be modified for this.
+ * @lru: The lru list for this memory type.
+ *
+ * This structure is used to identify and manage memory types for a device.
+ * It's set up by the ttm_bo_driver::init_mem_type method.
+ */
+
+struct ttm_mem_type_manager {
+
+	/*
+	 * No protection. Constant from start.
+	 */
+
+	bool has_type;
+	bool use_type;
+	uint32_t flags;
+	unsigned long gpu_offset;
+	unsigned long io_offset;
+	unsigned long io_size;
+	void *io_addr;
+	uint64_t size;
+	uint32_t available_caching;
+	uint32_t default_caching;
+
+	/*
+	 * Protected by the bdev->lru_lock.
+	 * TODO: Consider one lru_lock per ttm_mem_type_manager.
+	 * Plays ill with list removal, though.
+	 */
+
+	struct drm_mm manager;
+	struct list_head lru;
+};
+
+/**
+ * struct ttm_bo_driver
+ *
+ * @mem_type_prio: Priority array of memory types to place a buffer object in
+ * if it fits without evicting buffers from any of these memory types.
+ * @mem_busy_prio: Priority array of memory types to place a buffer object in
+ * if it needs to evict buffers to make room.
+ * @num_mem_type_prio: Number of elements in the @mem_type_prio array.
+ * @num_mem_busy_prio: Number of elements in the @num_mem_busy_prio array.
+ * @create_ttm_backend_entry: Callback to create a struct ttm_backend.
+ * @invalidate_caches: Callback to invalidate read caches when a buffer object
+ * has been evicted.
+ * @init_mem_type: Callback to initialize a struct ttm_mem_type_manager
+ * structure.
+ * @evict_flags: Callback to obtain placement flags when a buffer is evicted.
+ * @move: Callback for a driver to hook in accelerated functions to
+ * move a buffer.
+ * If set to NULL, a potentially slow memcpy() move is used.
+ * @sync_obj_signaled: See ttm_fence_api.h
+ * @sync_obj_wait: See ttm_fence_api.h
+ * @sync_obj_flush: See ttm_fence_api.h
+ * @sync_obj_unref: See ttm_fence_api.h
+ * @sync_obj_ref: See ttm_fence_api.h
+ */
+
+struct ttm_bo_driver {
+	const uint32_t *mem_type_prio;
+	const uint32_t *mem_busy_prio;
+	uint32_t num_mem_type_prio;
+	uint32_t num_mem_busy_prio;
+
+	/**
+	 * struct ttm_bo_driver member create_ttm_backend_entry
+	 *
+	 * @bdev: The buffer object device.
+	 *
+	 * Create a driver specific struct ttm_backend.
+	 */
+
+	struct ttm_backend *(*create_ttm_backend_entry)
+	 (struct ttm_bo_device *bdev);
+
+	/**
+	 * struct ttm_bo_driver member invalidate_caches
+	 *
+	 * @bdev: the buffer object device.
+	 * @flags: new placement of the rebound buffer object.
+	 *
+	 * A previosly evicted buffer has been rebound in a
+	 * potentially new location. Tell the driver that it might
+	 * consider invalidating read (texture) caches on the next command
+	 * submission as a consequence.
+	 */
+
+	int (*invalidate_caches) (struct ttm_bo_device *bdev, uint32_t flags);
+	int (*init_mem_type) (struct ttm_bo_device *bdev, uint32_t type,
+			      struct ttm_mem_type_manager *man);
+	/**
+	 * struct ttm_bo_driver member evict_flags:
+	 *
+	 * @bo: the buffer object to be evicted
+	 *
+	 * Return the bo flags for a buffer which is not mapped to the hardware.
+	 * These will be placed in proposed_flags so that when the move is
+	 * finished, they'll end up in bo->mem.flags
+	 */
+
+	 uint32_t(*evict_flags) (struct ttm_buffer_object *bo);
+	/**
+	 * struct ttm_bo_driver member move:
+	 *
+	 * @bo: the buffer to move
+	 * @evict: whether this motion is evicting the buffer from
+	 * the graphics address space
+	 * @interruptible: Use interruptible sleeps if possible when sleeping.
+	 * @no_wait: whether this should give up and return -EBUSY
+	 * if this move would require sleeping
+	 * @new_mem: the new memory region receiving the buffer
+	 *
+	 * Move a buffer between two memory regions.
+	 */
+	int (*move) (struct ttm_buffer_object *bo,
+		     bool evict, bool interruptible,
+		     bool no_wait, struct ttm_mem_reg *new_mem);
+
+	/**
+	 * struct ttm_bo_driver_member verify_access
+	 *
+	 * @bo: Pointer to a buffer object.
+	 * @filp: Pointer to a struct file trying to access the object.
+	 *
+	 * Called from the map / write / read methods to verify that the
+	 * caller is permitted to access the buffer object.
+	 * This member may be set to NULL, which will refuse this kind of
+	 * access for all buffer objects.
+	 * This function should return 0 if access is granted, -EPERM otherwise.
+	 */
+	int (*verify_access) (struct ttm_buffer_object *bo,
+			      struct file *filp);
+
+	/**
+	 * In case a driver writer dislikes the TTM fence objects,
+	 * the driver writer can replace those with sync objects of
+	 * his / her own. If it turns out that no driver writer is
+	 * using these. I suggest we remove these hooks and plug in
+	 * fences directly. The bo driver needs the following functionality:
+	 * See the corresponding functions in the fence object API
+	 * documentation.
+	 */
+
+	bool (*sync_obj_signaled) (void *sync_obj, void *sync_arg);
+	int (*sync_obj_wait) (void *sync_obj, void *sync_arg,
+			      bool lazy, bool interruptible);
+	int (*sync_obj_flush) (void *sync_obj, void *sync_arg);
+	void (*sync_obj_unref) (void **sync_obj);
+	void *(*sync_obj_ref) (void *sync_obj);
+};
+
+#define TTM_NUM_MEM_TYPES 8
+
+#define TTM_BO_PRIV_FLAG_MOVING  0	/* Buffer object is moving and needs
+					   idling before CPU mapping */
+#define TTM_BO_PRIV_FLAG_MAX 1
+/**
+ * struct ttm_bo_device - Buffer object driver device-specific data.
+ *
+ * @mem_glob: Pointer to a struct ttm_mem_global object for accounting.
+ * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
+ * @count: Current number of buffer object.
+ * @pages: Current number of pinned pages.
+ * @dummy_read_page: Pointer to a dummy page used for mapping requests
+ * of unpopulated pages.
+ * @shrink: A shrink callback object used for buffre object swap.
+ * @ttm_bo_extra_size: Extra size (sizeof(struct ttm_buffer_object) excluded)
+ * used by a buffer object. This is excluding page arrays and backing pages.
+ * @ttm_bo_size: This is @ttm_bo_extra_size + sizeof(struct ttm_buffer_object).
+ * @man: An array of mem_type_managers.
+ * @addr_space_mm: Range manager for the device address space.
+ * lru_lock: Spinlock that protects the buffer+device lru lists and
+ * ddestroy lists.
+ * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager.
+ * If a GPU lockup has been detected, this is forced to 0.
+ * @dev_mapping: A pointer to the struct address_space representing the
+ * device address space.
+ * @wq: Work queue structure for the delayed delete workqueue.
+ *
+ */
+
+struct ttm_bo_device {
+
+	/*
+	 * Constant after bo device init / atomic.
+	 */
+
+	struct ttm_mem_global *mem_glob;
+	struct ttm_bo_driver *driver;
+	struct page *dummy_read_page;
+	struct ttm_mem_shrink shrink;
+
+	size_t ttm_bo_extra_size;
+	size_t ttm_bo_size;
+
+	rwlock_t vm_lock;
+	/*
+	 * Protected by the vm lock.
+	 */
+	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+	struct rb_root addr_space_rb;
+	struct drm_mm addr_space_mm;
+
+	/*
+	 * Might want to change this to one lock per manager.
+	 */
+	spinlock_t lru_lock;
+	/*
+	 * Protected by the lru lock.
+	 */
+	struct list_head ddestroy;
+	struct list_head swap_lru;
+
+	/*
+	 * Protected by load / firstopen / lastclose /unload sync.
+	 */
+
+	bool nice_mode;
+	struct address_space *dev_mapping;
+
+	/*
+	 * Internal protection.
+	 */
+
+	struct delayed_work wq;
+};
+
+/**
+ * ttm_flag_masked
+ *
+ * @old: Pointer to the result and original value.
+ * @new: New value of bits.
+ * @mask: Mask of bits to change.
+ *
+ * Convenience function to change a number of bits identified by a mask.
+ */
+
+static inline uint32_t
+ttm_flag_masked(uint32_t *old, uint32_t new, uint32_t mask)
+{
+	*old ^= (*old ^ new) & mask;
+	return *old;
+}
+
+/**
+ * ttm_tt_create
+ *
+ * @bdev: pointer to a struct ttm_bo_device:
+ * @size: Size of the data needed backing.
+ * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
+ * @dummy_read_page: See struct ttm_bo_device.
+ *
+ * Create a struct ttm_tt to back data with system memory pages.
+ * No pages are actually allocated.
+ * Returns:
+ * NULL: Out of memory.
+ */
+extern struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev,
+				    unsigned long size,
+				    uint32_t page_flags,
+				    struct page *dummy_read_page);
+
+/**
+ * ttm_tt_set_user:
+ *
+ * @ttm: The struct ttm_tt to populate.
+ * @tsk: A struct task_struct for which @start is a valid user-space address.
+ * @start: A valid user-space address.
+ * @num_pages: Size in pages of the user memory area.
+ *
+ * Populate a struct ttm_tt with a user-space memory area after first pinning
+ * the pages backing it.
+ * Returns:
+ * !0: Error.
+ */
+
+extern int ttm_tt_set_user(struct ttm_tt *ttm,
+			   struct task_struct *tsk,
+			   unsigned long start, unsigned long num_pages);
+
+/**
+ * ttm_ttm_bind:
+ *
+ * @ttm: The struct ttm_tt containing backing pages.
+ * @bo_mem: The struct ttm_mem_reg identifying the binding location.
+ *
+ * Bind the pages of @ttm to an aperture location identified by @bo_mem
+ */
+extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
+
+/**
+ * ttm_ttm_destroy:
+ *
+ * @ttm: The struct ttm_tt.
+ *
+ * Unbind, unpopulate and destroy a struct ttm_tt.
+ */
+extern void ttm_tt_destroy(struct ttm_tt *ttm);
+
+/**
+ * ttm_ttm_unbind:
+ *
+ * @ttm: The struct ttm_tt.
+ *
+ * Unbind a struct ttm_tt.
+ */
+extern void ttm_tt_unbind(struct ttm_tt *ttm);
+
+/**
+ * ttm_ttm_destroy:
+ *
+ * @ttm: The struct ttm_tt.
+ * @index: Index of the desired page.
+ *
+ * Return a pointer to the struct page backing @ttm at page
+ * index @index. If the page is unpopulated, one will be allocated to
+ * populate that index.
+ *
+ * Returns:
+ * NULL on OOM.
+ */
+extern struct page *ttm_tt_get_page(struct ttm_tt *ttm, int index);
+
+/**
+ * ttm_tt_cache_flush:
+ *
+ * @pages: An array of pointers to struct page:s to flush.
+ * @num_pages: Number of pages to flush.
+ *
+ * Flush the data of the indicated pages from the cpu caches.
+ * This is used when changing caching attributes of the pages from
+ * cache-coherent.
+ */
+extern void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages);
+
+/**
+ * ttm_tt_set_placement_caching:
+ *
+ * @ttm A struct ttm_tt the backing pages of which will change caching policy.
+ * @placement: Flag indicating the desired caching policy.
+ *
+ * This function will change caching policy of any default kernel mappings of
+ * the pages backing @ttm. If changing from cached to uncached or
+ * write-combined,
+ * all CPU caches will first be flushed to make sure the data of the pages
+ * hit RAM. This function may be very costly as it involves global TLB
+ * and cache flushes and potential page splitting / combining.
+ */
+extern int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement);
+extern int ttm_tt_swapout(struct ttm_tt *ttm,
+			  struct file *persistant_swap_storage);
+
+/*
+ * ttm_bo.c
+ */
+
+/**
+ * ttm_mem_reg_is_pci
+ *
+ * @bdev: Pointer to a struct ttm_bo_device.
+ * @mem: A valid struct ttm_mem_reg.
+ *
+ * Returns true if the memory described by @mem is PCI memory,
+ * false otherwise.
+ */
+extern bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev,
+				   struct ttm_mem_reg *mem);
+
+/**
+ * ttm_bo_mem_space
+ *
+ * @bo: Pointer to a struct ttm_buffer_object. the data of which
+ * we want to allocate space for.
+ * @proposed_placement: Proposed new placement for the buffer object.
+ * @mem: A struct ttm_mem_reg.
+ * @interruptible: Sleep interruptible when sliping.
+ * @no_wait: Don't sleep waiting for space to become available.
+ *
+ * Allocate memory space for the buffer object pointed to by @bo, using
+ * the placement flags in @mem, potentially evicting other idle buffer objects.
+ * This function may sleep while waiting for space to become available.
+ * Returns:
+ * -EBUSY: No space available (only if no_wait == 1).
+ * -ENOMEM: Could not allocate memory for the buffer object, either due to
+ * fragmentation or concurrent allocators.
+ * -ERESTART: An interruptible sleep was interrupted by a signal.
+ */
+extern int ttm_bo_mem_space(struct ttm_buffer_object *bo,
+			    uint32_t proposed_placement,
+			    struct ttm_mem_reg *mem,
+			    bool interruptible, bool no_wait);
+/**
+ * ttm_bo_wait_for_cpu
+ *
+ * @bo: Pointer to a struct ttm_buffer_object.
+ * @no_wait: Don't sleep while waiting.
+ *
+ * Wait until a buffer object is no longer sync'ed for CPU access.
+ * Returns:
+ * -EBUSY: Buffer object was sync'ed for CPU access. (only if no_wait == 1).
+ * -ERESTART: An interruptible sleep was interrupted by a signal.
+ */
+
+extern int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait);
+
+/**
+ * ttm_bo_pci_offset - Get the PCI offset for the buffer object memory.
+ *
+ * @bo Pointer to a struct ttm_buffer_object.
+ * @bus_base On return the base of the PCI region
+ * @bus_offset On return the byte offset into the PCI region
+ * @bus_size On return the byte size of the buffer object or zero if
+ * the buffer object memory is not accessible through a PCI region.
+ *
+ * Returns:
+ * -EINVAL if the buffer object is currently not mappable.
+ * 0 otherwise.
+ */
+
+extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
+			     struct ttm_mem_reg *mem,
+			     unsigned long *bus_base,
+			     unsigned long *bus_offset,
+			     unsigned long *bus_size);
+
+extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
+
+/**
+ * ttm_bo_device_init
+ *
+ * @bdev: A pointer to a struct ttm_bo_device to initialize.
+ * @mem_global: A pointer to an initialized struct ttm_mem_global.
+ * @driver: A pointer to a struct ttm_bo_driver set up by the caller.
+ * @file_page_offset: Offset into the device address space that is available
+ * for buffer data. This ensures compatibility with other users of the
+ * address space.
+ *
+ * Initializes a struct ttm_bo_device:
+ * Returns:
+ * !0: Failure.
+ */
+extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
+			      struct ttm_mem_global *mem_glob,
+			      struct ttm_bo_driver *driver,
+			      uint64_t file_page_offset);
+
+/**
+ * ttm_bo_reserve:
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @interruptible: Sleep interruptible if waiting.
+ * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY.
+ * @use_sequence: If @bo is already reserved, Only sleep waiting for
+ * it to become unreserved if @sequence < (@bo)->sequence.
+ *
+ * Locks a buffer object for validation. (Or prevents other processes from
+ * locking it for validation) and removes it from lru lists, while taking
+ * a number of measures to prevent deadlocks.
+ *
+ * Deadlocks may occur when two processes try to reserve multiple buffers in
+ * different order, either by will or as a result of a buffer being evicted
+ * to make room for a buffer already reserved. (Buffers are reserved before
+ * they are evicted). The following algorithm prevents such deadlocks from
+ * occuring:
+ * 1) Buffers are reserved with the lru spinlock held. Upon successful
+ * reservation they are removed from the lru list. This stops a reserved buffer
+ * from being evicted. However the lru spinlock is released between the time
+ * a buffer is selected for eviction and the time it is reserved.
+ * Therefore a check is made when a buffer is reserved for eviction, that it
+ * is still the first buffer in the lru list, before it is removed from the
+ * list. @check_lru == 1 forces this check. If it fails, the function returns
+ * -EINVAL, and the caller should then choose a new buffer to evict and repeat
+ * the procedure.
+ * 2) Processes attempting to reserve multiple buffers other than for eviction,
+ * (typically execbuf), should first obtain a unique 32-bit
+ * validation sequence number,
+ * and call this function with @use_sequence == 1 and @sequence == the unique
+ * sequence number. If upon call of this function, the buffer object is already
+ * reserved, the validation sequence is checked against the validation
+ * sequence of the process currently reserving the buffer,
+ * and if the current validation sequence is greater than that of the process
+ * holding the reservation, the function returns -EAGAIN. Otherwise it sleeps
+ * waiting for the buffer to become unreserved, after which it retries
+ * reserving.
+ * The caller should, when receiving an -EAGAIN error
+ * release all its buffer reservations, wait for @bo to become unreserved, and
+ * then rerun the validation with the same validation sequence. This procedure
+ * will always guarantee that the process with the lowest validation sequence
+ * will eventually succeed, preventing both deadlocks and starvation.
+ *
+ * Returns:
+ * -EAGAIN: The reservation may cause a deadlock.
+ * Release all buffer reservations, wait for @bo to become unreserved and
+ * try again. (only if use_sequence == 1).
+ * -ERESTART: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
+			  bool interruptible,
+			  bool no_wait, bool use_sequence, uint32_t sequence);
+
+/**
+ * ttm_bo_unreserve
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Unreserve a previous reservation of @bo.
+ */
+extern void ttm_bo_unreserve(struct ttm_buffer_object *bo);
+
+/**
+ * ttm_bo_wait_unreserved
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Wait for a struct ttm_buffer_object to become unreserved.
+ * This is typically used in the execbuf code to relax cpu-usage when
+ * a potential deadlock condition backoff.
+ */
+extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo,
+				  bool interruptible);
+
+/**
+ * ttm_bo_block_reservation
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @interruptible: Use interruptible sleep when waiting.
+ * @no_wait: Don't sleep, but rather return -EBUSY.
+ *
+ * Block reservation for validation by simply reserving the buffer.
+ * This is intended for single buffer use only without eviction,
+ * and thus needs no deadlock protection.
+ *
+ * Returns:
+ * -EBUSY: If no_wait == 1 and the buffer is already reserved.
+ * -ERESTART: If interruptible == 1 and the process received a signal
+ * while sleeping.
+ */
+extern int ttm_bo_block_reservation(struct ttm_buffer_object *bo,
+				    bool interruptible, bool no_wait);
+
+/**
+ * ttm_bo_unblock_reservation
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Unblocks reservation leaving lru lists untouched.
+ */
+extern void ttm_bo_unblock_reservation(struct ttm_buffer_object *bo);
+
+/*
+ * ttm_bo_util.c
+ */
+
+/**
+ * ttm_bo_move_ttm
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @evict: 1: This is an eviction. Don't try to pipeline.
+ * @no_wait: Never sleep, but rather return with -EBUSY.
+ * @new_mem: struct ttm_mem_reg indicating where to move.
+ *
+ * Optimized move function for a buffer object with both old and
+ * new placement backed by a TTM. The function will, if successful,
+ * free any old aperture space, and set (@new_mem)->mm_node to NULL,
+ * and update the (@bo)->mem placement flags. If unsuccessful, the old
+ * data remains untouched, and it's up to the caller to free the
+ * memory space indicated by @new_mem.
+ * Returns:
+ * !0: Failure.
+ */
+
+extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
+			   bool evict, bool no_wait,
+			   struct ttm_mem_reg *new_mem);
+
+/**
+ * ttm_bo_move_memcpy
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @evict: 1: This is an eviction. Don't try to pipeline.
+ * @no_wait: Never sleep, but rather return with -EBUSY.
+ * @new_mem: struct ttm_mem_reg indicating where to move.
+ *
+ * Fallback move function for a mappable buffer object in mappable memory.
+ * The function will, if successful,
+ * free any old aperture space, and set (@new_mem)->mm_node to NULL,
+ * and update the (@bo)->mem placement flags. If unsuccessful, the old
+ * data remains untouched, and it's up to the caller to free the
+ * memory space indicated by @new_mem.
+ * Returns:
+ * !0: Failure.
+ */
+
+extern int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
+			      bool evict,
+			      bool no_wait, struct ttm_mem_reg *new_mem);
+
+/**
+ * ttm_bo_free_old_node
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Utility function to free an old placement after a successful move.
+ */
+extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo);
+
+/**
+ * ttm_bo_move_accel_cleanup.
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @sync_obj: A sync object that signals when moving is complete.
+ * @sync_obj_arg: An argument to pass to the sync object idle / wait
+ * functions.
+ * @evict: This is an evict move. Don't return until the buffer is idle.
+ * @no_wait: Never sleep, but rather return with -EBUSY.
+ * @new_mem: struct ttm_mem_reg indicating where to move.
+ *
+ * Accelerated move function to be called when an accelerated move
+ * has been scheduled. The function will create a new temporary buffer object
+ * representing the old placement, and put the sync object on both buffer
+ * objects. After that the newly created buffer object is unref'd to be
+ * destroyed when the move is complete. This will help pipeline
+ * buffer moves.
+ */
+
+extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
+				     void *sync_obj,
+				     void *sync_obj_arg,
+				     bool evict, bool no_wait,
+				     struct ttm_mem_reg *new_mem);
+/**
+ * ttm_io_prot
+ *
+ * @c_state: Caching state.
+ * @tmp: Page protection flag for a normal, cached mapping.
+ *
+ * Utility function that returns the pgprot_t that should be used for
+ * setting up a PTE with the caching model indicated by @c_state.
+ */
+extern pgprot_t ttm_io_prot(enum ttm_caching_state c_state, pgprot_t tmp);
+
+#if (defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)))
+#define TTM_HAS_AGP
+#include <linux/agp_backend.h>
+
+/**
+ * ttm_agp_backend_init
+ *
+ * @bdev: Pointer to a struct ttm_bo_device.
+ * @bridge: The agp bridge this device is sitting on.
+ *
+ * Create a TTM backend that uses the indicated AGP bridge as an aperture
+ * for TT memory. This function uses the linux agpgart interface to
+ * bind and unbind memory backing a ttm_tt.
+ */
+extern struct ttm_backend *ttm_agp_backend_init(struct ttm_bo_device *bdev,
+						struct agp_bridge_data *bridge);
+#endif
+
+#endif
diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
new file mode 100644
index 00000000000..d8b8f042c4f
--- /dev/null
+++ b/include/drm/ttm/ttm_memory.h
@@ -0,0 +1,153 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TTM_MEMORY_H
+#define TTM_MEMORY_H
+
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/errno.h>
+
+/**
+ * struct ttm_mem_shrink - callback to shrink TTM memory usage.
+ *
+ * @do_shrink: The callback function.
+ *
+ * Arguments to the do_shrink functions are intended to be passed using
+ * inheritance. That is, the argument class derives from struct ttm_mem_srink,
+ * and can be accessed using container_of().
+ */
+
+struct ttm_mem_shrink {
+	int (*do_shrink) (struct ttm_mem_shrink *);
+};
+
+/**
+ * struct ttm_mem_global - Global memory accounting structure.
+ *
+ * @shrink: A single callback to shrink TTM memory usage. Extend this
+ * to a linked list to be able to handle multiple callbacks when needed.
+ * @swap_queue: A workqueue to handle shrinking in low memory situations. We
+ * need a separate workqueue since it will spend a lot of time waiting
+ * for the GPU, and this will otherwise block other workqueue tasks(?)
+ * At this point we use only a single-threaded workqueue.
+ * @work: The workqueue callback for the shrink queue.
+ * @queue: Wait queue for processes suspended waiting for memory.
+ * @lock: Lock to protect the @shrink - and the memory accounting members,
+ * that is, essentially the whole structure with some exceptions.
+ * @emer_memory: Lowmem memory limit available for root.
+ * @max_memory: Lowmem memory limit available for non-root.
+ * @swap_limit: Lowmem memory limit where the shrink workqueue kicks in.
+ * @used_memory: Currently used lowmem memory.
+ * @used_total_memory: Currently used total (lowmem + highmem) memory.
+ * @total_memory_swap_limit: Total memory limit where the shrink workqueue
+ * kicks in.
+ * @max_total_memory: Total memory available to non-root processes.
+ * @emer_total_memory: Total memory available to root processes.
+ *
+ * Note that this structure is not per device. It should be global for all
+ * graphics devices.
+ */
+
+struct ttm_mem_global {
+	struct ttm_mem_shrink *shrink;
+	struct workqueue_struct *swap_queue;
+	struct work_struct work;
+	wait_queue_head_t queue;
+	spinlock_t lock;
+	uint64_t emer_memory;
+	uint64_t max_memory;
+	uint64_t swap_limit;
+	uint64_t used_memory;
+	uint64_t used_total_memory;
+	uint64_t total_memory_swap_limit;
+	uint64_t max_total_memory;
+	uint64_t emer_total_memory;
+};
+
+/**
+ * ttm_mem_init_shrink - initialize a struct ttm_mem_shrink object
+ *
+ * @shrink: The object to initialize.
+ * @func: The callback function.
+ */
+
+static inline void ttm_mem_init_shrink(struct ttm_mem_shrink *shrink,
+				       int (*func) (struct ttm_mem_shrink *))
+{
+	shrink->do_shrink = func;
+}
+
+/**
+ * ttm_mem_register_shrink - register a struct ttm_mem_shrink object.
+ *
+ * @glob: The struct ttm_mem_global object to register with.
+ * @shrink: An initialized struct ttm_mem_shrink object to register.
+ *
+ * Returns:
+ * -EBUSY: There's already a callback registered. (May change).
+ */
+
+static inline int ttm_mem_register_shrink(struct ttm_mem_global *glob,
+					  struct ttm_mem_shrink *shrink)
+{
+	spin_lock(&glob->lock);
+	if (glob->shrink != NULL) {
+		spin_unlock(&glob->lock);
+		return -EBUSY;
+	}
+	glob->shrink = shrink;
+	spin_unlock(&glob->lock);
+	return 0;
+}
+
+/**
+ * ttm_mem_unregister_shrink - unregister a struct ttm_mem_shrink object.
+ *
+ * @glob: The struct ttm_mem_global object to unregister from.
+ * @shrink: A previously registert struct ttm_mem_shrink object.
+ *
+ */
+
+static inline void ttm_mem_unregister_shrink(struct ttm_mem_global *glob,
+					     struct ttm_mem_shrink *shrink)
+{
+	spin_lock(&glob->lock);
+	BUG_ON(glob->shrink != shrink);
+	glob->shrink = NULL;
+	spin_unlock(&glob->lock);
+}
+
+extern int ttm_mem_global_init(struct ttm_mem_global *glob);
+extern void ttm_mem_global_release(struct ttm_mem_global *glob);
+extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
+				bool no_wait, bool interruptible, bool himem);
+extern void ttm_mem_global_free(struct ttm_mem_global *glob,
+				uint64_t amount, bool himem);
+extern size_t ttm_round_pot(size_t size);
+#endif
diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h
new file mode 100644
index 00000000000..889a4c7958a
--- /dev/null
+++ b/include/drm/ttm/ttm_module.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#ifndef _TTM_MODULE_H_
+#define _TTM_MODULE_H_
+
+#include <linux/kernel.h>
+
+#define TTM_PFX "[TTM]"
+
+enum ttm_global_types {
+	TTM_GLOBAL_TTM_MEM = 0,
+	TTM_GLOBAL_TTM_BO,
+	TTM_GLOBAL_TTM_OBJECT,
+	TTM_GLOBAL_NUM
+};
+
+struct ttm_global_reference {
+	enum ttm_global_types global_type;
+	size_t size;
+	void *object;
+	int (*init) (struct ttm_global_reference *);
+	void (*release) (struct ttm_global_reference *);
+};
+
+extern void ttm_global_init(void);
+extern void ttm_global_release(void);
+extern int ttm_global_item_ref(struct ttm_global_reference *ref);
+extern void ttm_global_item_unref(struct ttm_global_reference *ref);
+
+#endif /* _TTM_MODULE_H_ */
diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
new file mode 100644
index 00000000000..c84ff153a56
--- /dev/null
+++ b/include/drm/ttm/ttm_placement.h
@@ -0,0 +1,92 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
+ */
+
+#ifndef _TTM_PLACEMENT_H_
+#define _TTM_PLACEMENT_H_
+/*
+ * Memory regions for data placement.
+ */
+
+#define TTM_PL_SYSTEM           0
+#define TTM_PL_TT               1
+#define TTM_PL_VRAM             2
+#define TTM_PL_PRIV0            3
+#define TTM_PL_PRIV1            4
+#define TTM_PL_PRIV2            5
+#define TTM_PL_PRIV3            6
+#define TTM_PL_PRIV4            7
+#define TTM_PL_PRIV5            8
+#define TTM_PL_SWAPPED          15
+
+#define TTM_PL_FLAG_SYSTEM      (1 << TTM_PL_SYSTEM)
+#define TTM_PL_FLAG_TT          (1 << TTM_PL_TT)
+#define TTM_PL_FLAG_VRAM        (1 << TTM_PL_VRAM)
+#define TTM_PL_FLAG_PRIV0       (1 << TTM_PL_PRIV0)
+#define TTM_PL_FLAG_PRIV1       (1 << TTM_PL_PRIV1)
+#define TTM_PL_FLAG_PRIV2       (1 << TTM_PL_PRIV2)
+#define TTM_PL_FLAG_PRIV3       (1 << TTM_PL_PRIV3)
+#define TTM_PL_FLAG_PRIV4       (1 << TTM_PL_PRIV4)
+#define TTM_PL_FLAG_PRIV5       (1 << TTM_PL_PRIV5)
+#define TTM_PL_FLAG_SWAPPED     (1 << TTM_PL_SWAPPED)
+#define TTM_PL_MASK_MEM         0x0000FFFF
+
+/*
+ * Other flags that affects data placement.
+ * TTM_PL_FLAG_CACHED indicates cache-coherent mappings
+ * if available.
+ * TTM_PL_FLAG_SHARED means that another application may
+ * reference the buffer.
+ * TTM_PL_FLAG_NO_EVICT means that the buffer may never
+ * be evicted to make room for other buffers.
+ */
+
+#define TTM_PL_FLAG_CACHED      (1 << 16)
+#define TTM_PL_FLAG_UNCACHED    (1 << 17)
+#define TTM_PL_FLAG_WC          (1 << 18)
+#define TTM_PL_FLAG_SHARED      (1 << 20)
+#define TTM_PL_FLAG_NO_EVICT    (1 << 21)
+
+#define TTM_PL_MASK_CACHING     (TTM_PL_FLAG_CACHED | \
+				 TTM_PL_FLAG_UNCACHED | \
+				 TTM_PL_FLAG_WC)
+
+#define TTM_PL_MASK_MEMTYPE     (TTM_PL_MASK_MEM | TTM_PL_MASK_CACHING)
+
+/*
+ * Access flags to be used for CPU- and GPU- mappings.
+ * The idea is that the TTM synchronization mechanism will
+ * allow concurrent READ access and exclusive write access.
+ * Currently GPU- and CPU accesses are exclusive.
+ */
+
+#define TTM_ACCESS_READ         (1 << 0)
+#define TTM_ACCESS_WRITE        (1 << 1)
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 771fe6b912fca54f03e8a72eb63058b582775362 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Fri, 5 Jun 2009 14:42:42 +0200
Subject: drm/radeon: introduce kernel modesetting for radeon hardware

Add kernel modesetting support to radeon driver, use the ttm memory
manager to manage memory and DRM/GEM to provide userspace API.
In order to avoid backward compatibility issue and to allow clean
design and code the radeon kernel modesetting use different code path
than old radeon/drm driver.

When kernel modesetting is enabled the IOCTL of radeon/drm
driver are considered as invalid and an error message is printed
in the log and they return failure.

KMS enabled userspace will use new API to talk with the radeon/drm
driver. The new API provide functions to create/destroy/share/mmap
buffer object which are then managed by the kernel memory manager
(here TTM). In order to submit command to the GPU the userspace
provide a buffer holding the command stream, along this buffer
userspace have to provide a list of buffer object used by the
command stream. The kernel radeon driver will then place buffer
in GPU accessible memory and will update command stream to reflect
the position of the different buffers.

The kernel will also perform security check on command stream
provided by the user, we want to catch and forbid any illegal use
of the GPU such as DMA into random system memory or into memory
not owned by the process supplying the command stream. This part
of the code is still incomplete and this why we propose that patch
as a staging driver addition, future security might forbid current
experimental userspace to run.

This code support the following hardware : R1XX,R2XX,R3XX,R4XX,R5XX
(radeon up to X1950). Works is underway to provide support for R6XX,
R7XX and newer hardware (radeon from HD2XXX to HD4XXX).

Authors:
    Jerome Glisse <jglisse@redhat.com>
    Dave Airlie <airlied@redhat.com>
    Alex Deucher <alexdeucher@gmail.com>

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Kconfig                         |    7 +-
 drivers/gpu/drm/radeon/Kconfig                  |   34 +
 drivers/gpu/drm/radeon/Makefile                 |   12 +-
 drivers/gpu/drm/radeon/ObjectID.h               |  578 +++
 drivers/gpu/drm/radeon/atom-bits.h              |   48 +
 drivers/gpu/drm/radeon/atom-names.h             |  100 +
 drivers/gpu/drm/radeon/atom-types.h             |   42 +
 drivers/gpu/drm/radeon/atom.c                   | 1215 ++++++
 drivers/gpu/drm/radeon/atom.h                   |  149 +
 drivers/gpu/drm/radeon/atombios.h               | 4785 +++++++++++++++++++++++
 drivers/gpu/drm/radeon/atombios_crtc.c          |  695 ++++
 drivers/gpu/drm/radeon/r100.c                   | 1524 ++++++++
 drivers/gpu/drm/radeon/r300.c                   | 1116 ++++++
 drivers/gpu/drm/radeon/r300_reg.h               |   52 +-
 drivers/gpu/drm/radeon/r420.c                   |  223 ++
 drivers/gpu/drm/radeon/r500_reg.h               |  749 ++++
 drivers/gpu/drm/radeon/r520.c                   |  234 ++
 drivers/gpu/drm/radeon/r600.c                   |  169 +
 drivers/gpu/drm/radeon/r600_reg.h               |  114 +
 drivers/gpu/drm/radeon/radeon.h                 |  793 ++++
 drivers/gpu/drm/radeon/radeon_agp.c             |  249 ++
 drivers/gpu/drm/radeon/radeon_asic.h            |  405 ++
 drivers/gpu/drm/radeon/radeon_atombios.c        | 1226 ++++++
 drivers/gpu/drm/radeon/radeon_benchmark.c       |  133 +
 drivers/gpu/drm/radeon/radeon_bios.c            |  390 ++
 drivers/gpu/drm/radeon/radeon_clocks.c          |  833 ++++
 drivers/gpu/drm/radeon/radeon_combios.c         | 2476 ++++++++++++
 drivers/gpu/drm/radeon/radeon_connectors.c      |  603 +++
 drivers/gpu/drm/radeon/radeon_cs.c              |  249 ++
 drivers/gpu/drm/radeon/radeon_cursor.c          |  252 ++
 drivers/gpu/drm/radeon/radeon_device.c          |  813 ++++
 drivers/gpu/drm/radeon/radeon_display.c         |  692 ++++
 drivers/gpu/drm/radeon/radeon_drv.c             |  215 +-
 drivers/gpu/drm/radeon/radeon_encoders.c        | 1708 ++++++++
 drivers/gpu/drm/radeon/radeon_fb.c              |  825 ++++
 drivers/gpu/drm/radeon/radeon_fence.c           |  387 ++
 drivers/gpu/drm/radeon/radeon_fixed.h           |   50 +
 drivers/gpu/drm/radeon/radeon_gart.c            |  233 ++
 drivers/gpu/drm/radeon/radeon_gem.c             |  287 ++
 drivers/gpu/drm/radeon/radeon_i2c.c             |  209 +
 drivers/gpu/drm/radeon/radeon_irq_kms.c         |  158 +
 drivers/gpu/drm/radeon/radeon_kms.c             |  295 ++
 drivers/gpu/drm/radeon/radeon_legacy_crtc.c     | 1276 ++++++
 drivers/gpu/drm/radeon/radeon_legacy_encoders.c | 1284 ++++++
 drivers/gpu/drm/radeon/radeon_mode.h            |  394 ++
 drivers/gpu/drm/radeon/radeon_object.c          |  511 +++
 drivers/gpu/drm/radeon/radeon_object.h          |   45 +
 drivers/gpu/drm/radeon/radeon_reg.h             | 3570 +++++++++++++++++
 drivers/gpu/drm/radeon/radeon_ring.c            |  485 +++
 drivers/gpu/drm/radeon/radeon_ttm.c             |  653 ++++
 drivers/gpu/drm/radeon/rs400.c                  |  411 ++
 drivers/gpu/drm/radeon/rs600.c                  |  324 ++
 drivers/gpu/drm/radeon/rs690.c                  |  181 +
 drivers/gpu/drm/radeon/rs780.c                  |  102 +
 drivers/gpu/drm/radeon/rv515.c                  |  504 +++
 drivers/gpu/drm/radeon/rv770.c                  |  124 +
 drivers/staging/Kconfig                         |    2 +
 include/drm/radeon_drm.h                        |  130 +
 58 files changed, 35293 insertions(+), 30 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/Kconfig
 create mode 100644 drivers/gpu/drm/radeon/ObjectID.h
 create mode 100644 drivers/gpu/drm/radeon/atom-bits.h
 create mode 100644 drivers/gpu/drm/radeon/atom-names.h
 create mode 100644 drivers/gpu/drm/radeon/atom-types.h
 create mode 100644 drivers/gpu/drm/radeon/atom.c
 create mode 100644 drivers/gpu/drm/radeon/atom.h
 create mode 100644 drivers/gpu/drm/radeon/atombios.h
 create mode 100644 drivers/gpu/drm/radeon/atombios_crtc.c
 create mode 100644 drivers/gpu/drm/radeon/r100.c
 create mode 100644 drivers/gpu/drm/radeon/r300.c
 create mode 100644 drivers/gpu/drm/radeon/r420.c
 create mode 100644 drivers/gpu/drm/radeon/r500_reg.h
 create mode 100644 drivers/gpu/drm/radeon/r520.c
 create mode 100644 drivers/gpu/drm/radeon/r600.c
 create mode 100644 drivers/gpu/drm/radeon/r600_reg.h
 create mode 100644 drivers/gpu/drm/radeon/radeon.h
 create mode 100644 drivers/gpu/drm/radeon/radeon_agp.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_asic.h
 create mode 100644 drivers/gpu/drm/radeon/radeon_atombios.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_benchmark.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_bios.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_clocks.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_combios.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_connectors.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_cs.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_cursor.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_device.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_display.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_encoders.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_fb.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_fence.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_fixed.h
 create mode 100644 drivers/gpu/drm/radeon/radeon_gart.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_gem.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_i2c.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_irq_kms.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_kms.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_legacy_crtc.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_legacy_encoders.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_mode.h
 create mode 100644 drivers/gpu/drm/radeon/radeon_object.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_object.h
 create mode 100644 drivers/gpu/drm/radeon/radeon_reg.h
 create mode 100644 drivers/gpu/drm/radeon/radeon_ring.c
 create mode 100644 drivers/gpu/drm/radeon/radeon_ttm.c
 create mode 100644 drivers/gpu/drm/radeon/rs400.c
 create mode 100644 drivers/gpu/drm/radeon/rs600.c
 create mode 100644 drivers/gpu/drm/radeon/rs690.c
 create mode 100644 drivers/gpu/drm/radeon/rs780.c
 create mode 100644 drivers/gpu/drm/radeon/rv515.c
 create mode 100644 drivers/gpu/drm/radeon/rv770.c

(limited to 'include')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 64a57adc0a2..c961fe415ae 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -19,7 +19,7 @@ menuconfig DRM
 	  (/dev/agpgart) support.
 
 config DRM_TTM
-	tristate "TTM memory manager"
+	tristate
 	depends on DRM
 	help
 	  GPU memory management subsystem for devices with multiple
@@ -44,6 +44,11 @@ config DRM_R128
 config DRM_RADEON
 	tristate "ATI Radeon"
 	depends on DRM && PCI
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	select FB
+	select FRAMEBUFFER_CONSOLE if !EMBEDDED
 	help
 	  Choose this option if you have an ATI Radeon graphics card.  There
 	  are both PCI and AGP versions.  You don't need to choose this to
diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
new file mode 100644
index 00000000000..2168d67f09a
--- /dev/null
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -0,0 +1,34 @@
+config DRM_RADEON_KMS
+	bool "Enable modesetting on radeon by default"
+	depends on DRM_RADEON
+	select DRM_TTM
+	help
+	  Choose this option if you want kernel modesetting enabled by default,
+	  and you have a new enough userspace to support this. Running old
+	  userspaces with this enabled will cause pain.
+
+	  When kernel modesetting is enabled the IOCTL of radeon/drm
+	  driver are considered as invalid and an error message is printed
+	  in the log and they return failure.
+
+	  KMS enabled userspace will use new API to talk with the radeon/drm
+	  driver. The new API provide functions to create/destroy/share/mmap
+	  buffer object which are then managed by the kernel memory manager
+	  (here TTM). In order to submit command to the GPU the userspace
+	  provide a buffer holding the command stream, along this buffer
+	  userspace have to provide a list of buffer object used by the
+	  command stream. The kernel radeon driver will then place buffer
+	  in GPU accessible memory and will update command stream to reflect
+	  the position of the different buffers.
+
+	  The kernel will also perform security check on command stream
+	  provided by the user, we want to catch and forbid any illegal use
+	  of the GPU such as DMA into random system memory or into memory
+	  not owned by the process supplying the command stream. This part
+	  of the code is still incomplete and this why we propose that patch
+	  as a staging driver addition, future security might forbid current
+	  experimental userspace to run.
+
+	  This code support the following hardware : R1XX,R2XX,R3XX,R4XX,R5XX
+	  (radeon up to X1950). Works is underway to provide support for R6XX,
+	  R7XX and newer hardware (radeon from HD2XXX to HD4XXX).
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 52ce439a0f2..5fae1e074b4 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -3,7 +3,17 @@
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
 ccflags-y := -Iinclude/drm
-radeon-y := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o r300_cmdbuf.o r600_cp.o
+radeon-y := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o \
+	radeon_irq.o r300_cmdbuf.o r600_cp.o
+
+radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
+	radeon_atombios.o radeon_agp.o atombios_crtc.o radeon_combios.o \
+	atom.o radeon_fence.o radeon_ttm.o radeon_object.o radeon_gart.o \
+	radeon_legacy_crtc.o radeon_legacy_encoders.o radeon_connectors.o \
+	radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
+	radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
+	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
+	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
diff --git a/drivers/gpu/drm/radeon/ObjectID.h b/drivers/gpu/drm/radeon/ObjectID.h
new file mode 100644
index 00000000000..6d0183c61d3
--- /dev/null
+++ b/drivers/gpu/drm/radeon/ObjectID.h
@@ -0,0 +1,578 @@
+/*
+* Copyright 2006-2007 Advanced Micro Devices, Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
+/* based on stg/asic_reg/drivers/inc/asic_reg/ObjectID.h ver 23 */
+
+#ifndef _OBJECTID_H
+#define _OBJECTID_H
+
+#if defined(_X86_)
+#pragma pack(1)
+#endif
+
+/****************************************************/
+/* Graphics Object Type Definition                  */
+/****************************************************/
+#define GRAPH_OBJECT_TYPE_NONE                    0x0
+#define GRAPH_OBJECT_TYPE_GPU                     0x1
+#define GRAPH_OBJECT_TYPE_ENCODER                 0x2
+#define GRAPH_OBJECT_TYPE_CONNECTOR               0x3
+#define GRAPH_OBJECT_TYPE_ROUTER                  0x4
+/* deleted */
+
+/****************************************************/
+/* Encoder Object ID Definition                     */
+/****************************************************/
+#define ENCODER_OBJECT_ID_NONE                    0x00
+
+/* Radeon Class Display Hardware */
+#define ENCODER_OBJECT_ID_INTERNAL_LVDS           0x01
+#define ENCODER_OBJECT_ID_INTERNAL_TMDS1          0x02
+#define ENCODER_OBJECT_ID_INTERNAL_TMDS2          0x03
+#define ENCODER_OBJECT_ID_INTERNAL_DAC1           0x04
+#define ENCODER_OBJECT_ID_INTERNAL_DAC2           0x05	/* TV/CV DAC */
+#define ENCODER_OBJECT_ID_INTERNAL_SDVOA          0x06
+#define ENCODER_OBJECT_ID_INTERNAL_SDVOB          0x07
+
+/* External Third Party Encoders */
+#define ENCODER_OBJECT_ID_SI170B                  0x08
+#define ENCODER_OBJECT_ID_CH7303                  0x09
+#define ENCODER_OBJECT_ID_CH7301                  0x0A
+#define ENCODER_OBJECT_ID_INTERNAL_DVO1           0x0B	/* This belongs to Radeon Class Display Hardware */
+#define ENCODER_OBJECT_ID_EXTERNAL_SDVOA          0x0C
+#define ENCODER_OBJECT_ID_EXTERNAL_SDVOB          0x0D
+#define ENCODER_OBJECT_ID_TITFP513                0x0E
+#define ENCODER_OBJECT_ID_INTERNAL_LVTM1          0x0F	/* not used for Radeon */
+#define ENCODER_OBJECT_ID_VT1623                  0x10
+#define ENCODER_OBJECT_ID_HDMI_SI1930             0x11
+#define ENCODER_OBJECT_ID_HDMI_INTERNAL           0x12
+/* Kaleidoscope (KLDSCP) Class Display Hardware (internal) */
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1   0x13
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1    0x14
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1    0x15
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2    0x16	/* Shared with CV/TV and CRT */
+#define ENCODER_OBJECT_ID_SI178                   0X17	/* External TMDS (dual link, no HDCP.) */
+#define ENCODER_OBJECT_ID_MVPU_FPGA               0x18	/* MVPU FPGA chip */
+#define ENCODER_OBJECT_ID_INTERNAL_DDI            0x19
+#define ENCODER_OBJECT_ID_VT1625                  0x1A
+#define ENCODER_OBJECT_ID_HDMI_SI1932             0x1B
+#define ENCODER_OBJECT_ID_DP_AN9801               0x1C
+#define ENCODER_OBJECT_ID_DP_DP501                0x1D
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY         0x1E
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA   0x1F
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY1        0x20
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY2        0x21
+
+#define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO    0xFF
+
+/****************************************************/
+/* Connector Object ID Definition                   */
+/****************************************************/
+#define CONNECTOR_OBJECT_ID_NONE                  0x00
+#define CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I     0x01
+#define CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I       0x02
+#define CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D     0x03
+#define CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D       0x04
+#define CONNECTOR_OBJECT_ID_VGA                   0x05
+#define CONNECTOR_OBJECT_ID_COMPOSITE             0x06
+#define CONNECTOR_OBJECT_ID_SVIDEO                0x07
+#define CONNECTOR_OBJECT_ID_YPbPr                 0x08
+#define CONNECTOR_OBJECT_ID_D_CONNECTOR           0x09
+#define CONNECTOR_OBJECT_ID_9PIN_DIN              0x0A	/* Supports both CV & TV */
+#define CONNECTOR_OBJECT_ID_SCART                 0x0B
+#define CONNECTOR_OBJECT_ID_HDMI_TYPE_A           0x0C
+#define CONNECTOR_OBJECT_ID_HDMI_TYPE_B           0x0D
+#define CONNECTOR_OBJECT_ID_LVDS                  0x0E
+#define CONNECTOR_OBJECT_ID_7PIN_DIN              0x0F
+#define CONNECTOR_OBJECT_ID_PCIE_CONNECTOR        0x10
+#define CONNECTOR_OBJECT_ID_CROSSFIRE             0x11
+#define CONNECTOR_OBJECT_ID_HARDCODE_DVI          0x12
+#define CONNECTOR_OBJECT_ID_DISPLAYPORT           0x13
+
+/* deleted */
+
+/****************************************************/
+/* Router Object ID Definition                      */
+/****************************************************/
+#define ROUTER_OBJECT_ID_NONE											0x00
+#define ROUTER_OBJECT_ID_I2C_EXTENDER_CNTL				0x01
+
+/****************************************************/
+/* Graphics Object ENUM ID Definition               */
+/****************************************************/
+#define GRAPH_OBJECT_ENUM_ID1                     0x01
+#define GRAPH_OBJECT_ENUM_ID2                     0x02
+#define GRAPH_OBJECT_ENUM_ID3                     0x03
+#define GRAPH_OBJECT_ENUM_ID4                     0x04
+#define GRAPH_OBJECT_ENUM_ID5                     0x05
+#define GRAPH_OBJECT_ENUM_ID6                     0x06
+
+/****************************************************/
+/* Graphics Object ID Bit definition                */
+/****************************************************/
+#define OBJECT_ID_MASK                            0x00FF
+#define ENUM_ID_MASK                              0x0700
+#define RESERVED1_ID_MASK                         0x0800
+#define OBJECT_TYPE_MASK                          0x7000
+#define RESERVED2_ID_MASK                         0x8000
+
+#define OBJECT_ID_SHIFT                           0x00
+#define ENUM_ID_SHIFT                             0x08
+#define OBJECT_TYPE_SHIFT                         0x0C
+
+/****************************************************/
+/* Graphics Object family definition                */
+/****************************************************/
+#define CONSTRUCTOBJECTFAMILYID(GRAPHICS_OBJECT_TYPE, GRAPHICS_OBJECT_ID) \
+	(GRAPHICS_OBJECT_TYPE << OBJECT_TYPE_SHIFT | \
+	 GRAPHICS_OBJECT_ID   << OBJECT_ID_SHIFT)
+/****************************************************/
+/* GPU Object ID definition - Shared with BIOS      */
+/****************************************************/
+#define GPU_ENUM_ID1	(GRAPH_OBJECT_TYPE_GPU << OBJECT_TYPE_SHIFT |\
+			 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT)
+
+/****************************************************/
+/* Encoder Object ID definition - Shared with BIOS  */
+/****************************************************/
+/*
+#define ENCODER_INTERNAL_LVDS_ENUM_ID1        0x2101
+#define ENCODER_INTERNAL_TMDS1_ENUM_ID1       0x2102
+#define ENCODER_INTERNAL_TMDS2_ENUM_ID1       0x2103
+#define ENCODER_INTERNAL_DAC1_ENUM_ID1        0x2104
+#define ENCODER_INTERNAL_DAC2_ENUM_ID1        0x2105
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID1       0x2106
+#define ENCODER_INTERNAL_SDVOB_ENUM_ID1       0x2107
+#define ENCODER_SIL170B_ENUM_ID1              0x2108
+#define ENCODER_CH7303_ENUM_ID1               0x2109
+#define ENCODER_CH7301_ENUM_ID1               0x210A
+#define ENCODER_INTERNAL_DVO1_ENUM_ID1        0x210B
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID1       0x210C
+#define ENCODER_EXTERNAL_SDVOB_ENUM_ID1       0x210D
+#define ENCODER_TITFP513_ENUM_ID1             0x210E
+#define ENCODER_INTERNAL_LVTM1_ENUM_ID1       0x210F
+#define ENCODER_VT1623_ENUM_ID1               0x2110
+#define ENCODER_HDMI_SI1930_ENUM_ID1          0x2111
+#define ENCODER_HDMI_INTERNAL_ENUM_ID1        0x2112
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID1   0x2113
+#define ENCODER_INTERNAL_KLDSCP_DVO1_ENUM_ID1    0x2114
+#define ENCODER_INTERNAL_KLDSCP_DAC1_ENUM_ID1    0x2115
+#define ENCODER_INTERNAL_KLDSCP_DAC2_ENUM_ID1    0x2116
+#define ENCODER_SI178_ENUM_ID1                   0x2117
+#define ENCODER_MVPU_FPGA_ENUM_ID1               0x2118
+#define ENCODER_INTERNAL_DDI_ENUM_ID1            0x2119
+#define ENCODER_VT1625_ENUM_ID1                  0x211A
+#define ENCODER_HDMI_SI1932_ENUM_ID1             0x211B
+#define ENCODER_ENCODER_DP_AN9801_ENUM_ID1       0x211C
+#define ENCODER_DP_DP501_ENUM_ID1                0x211D
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID1         0x211E
+*/
+#define ENCODER_INTERNAL_LVDS_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_LVDS << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_TMDS1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_TMDS1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_TMDS2_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_TMDS2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DAC1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_DAC1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DAC2_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_DAC2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOB_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_SDVOB << OBJECT_ID_SHIFT)
+
+#define ENCODER_SIL170B_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_SI170B << OBJECT_ID_SHIFT)
+
+#define ENCODER_CH7303_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_CH7303 << OBJECT_ID_SHIFT)
+
+#define ENCODER_CH7301_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_CH7301 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DVO1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_DVO1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_EXTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_EXTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOB_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_EXTERNAL_SDVOB << OBJECT_ID_SHIFT)
+
+#define ENCODER_TITFP513_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_TITFP513 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_LVTM1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_LVTM1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_VT1623_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_VT1623 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_SI1930_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_HDMI_SI1930 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_INTERNAL_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_HDMI_INTERNAL << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DVO1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DAC1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DAC2_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2 << OBJECT_ID_SHIFT) /* Shared with CV/TV and CRT */
+
+#define ENCODER_SI178_ENUM_ID1  \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_SI178 << OBJECT_ID_SHIFT)
+
+#define ENCODER_MVPU_FPGA_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_MVPU_FPGA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DDI_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_DDI << OBJECT_ID_SHIFT)
+
+#define ENCODER_VT1625_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_VT1625 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_SI1932_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_HDMI_SI1932 << OBJECT_ID_SHIFT)
+
+#define ENCODER_DP_DP501_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_DP_DP501 << OBJECT_ID_SHIFT)
+
+#define ENCODER_DP_AN9801_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_DP_AN9801 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_UNIPHY << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_UNIPHY << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_LVTMA_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY1_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY1_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY2_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY2_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_GENERAL_EXTERNAL_DVO_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO << OBJECT_ID_SHIFT)
+
+/****************************************************/
+/* Connector Object ID definition - Shared with BIOS */
+/****************************************************/
+/*
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID1        0x3101
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID1          0x3102
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID1        0x3103
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID1          0x3104
+#define CONNECTOR_VGA_ENUM_ID1                      0x3105
+#define CONNECTOR_COMPOSITE_ENUM_ID1                0x3106
+#define CONNECTOR_SVIDEO_ENUM_ID1                   0x3107
+#define CONNECTOR_YPbPr_ENUM_ID1                    0x3108
+#define CONNECTOR_D_CONNECTORE_ENUM_ID1             0x3109
+#define CONNECTOR_9PIN_DIN_ENUM_ID1                 0x310A
+#define CONNECTOR_SCART_ENUM_ID1                    0x310B
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID1              0x310C
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID1              0x310D
+#define CONNECTOR_LVDS_ENUM_ID1                     0x310E
+#define CONNECTOR_7PIN_DIN_ENUM_ID1                 0x310F
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID1           0x3110
+*/
+#define CONNECTOR_LVDS_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_LVDS << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_VGA_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_VGA << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_VGA_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_VGA << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_COMPOSITE_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_COMPOSITE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SVIDEO_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_SVIDEO << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_YPbPr_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_YPbPr << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_D_CONNECTOR_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_D_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_9PIN_DIN_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_9PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SCART_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_SCART << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_HDMI_TYPE_B << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_7PIN_DIN_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_7PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_PCIE_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_PCIE_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_CROSSFIRE_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_CROSSFIRE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_CROSSFIRE_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_CROSSFIRE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HARDCODE_DVI_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_HARDCODE_DVI << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HARDCODE_DVI_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_HARDCODE_DVI << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID2 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID3 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID4 \
+	(GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+	 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+/****************************************************/
+/* Router Object ID definition - Shared with BIOS   */
+/****************************************************/
+#define ROUTER_I2C_EXTENDER_CNTL_ENUM_ID1 \
+	(GRAPH_OBJECT_TYPE_ROUTER << OBJECT_TYPE_SHIFT |\
+	 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+	 ROUTER_OBJECT_ID_I2C_EXTENDER_CNTL << OBJECT_ID_SHIFT)
+
+/* deleted */
+
+/****************************************************/
+/* Object Cap definition - Shared with BIOS         */
+/****************************************************/
+#define GRAPHICS_OBJECT_CAP_I2C                 0x00000001L
+#define GRAPHICS_OBJECT_CAP_TABLE_ID            0x00000002L
+
+#define GRAPHICS_OBJECT_I2CCOMMAND_TABLE_ID                   0x01
+#define GRAPHICS_OBJECT_HOTPLUGDETECTIONINTERUPT_TABLE_ID     0x02
+#define GRAPHICS_OBJECT_ENCODER_OUTPUT_PROTECTION_TABLE_ID    0x03
+
+#if defined(_X86_)
+#pragma pack()
+#endif
+
+#endif /*GRAPHICTYPE */
diff --git a/drivers/gpu/drm/radeon/atom-bits.h b/drivers/gpu/drm/radeon/atom-bits.h
new file mode 100644
index 00000000000..e8fae5c7751
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atom-bits.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#ifndef ATOM_BITS_H
+#define ATOM_BITS_H
+
+static inline uint8_t get_u8(void *bios, int ptr)
+{
+    return ((unsigned char *)bios)[ptr];
+}
+#define U8(ptr) get_u8(ctx->ctx->bios, (ptr))
+#define CU8(ptr) get_u8(ctx->bios, (ptr))
+static inline uint16_t get_u16(void *bios, int ptr)
+{
+    return get_u8(bios ,ptr)|(((uint16_t)get_u8(bios, ptr+1))<<8);
+}
+#define U16(ptr) get_u16(ctx->ctx->bios, (ptr))
+#define CU16(ptr) get_u16(ctx->bios, (ptr))
+static inline uint32_t get_u32(void *bios, int ptr)
+{
+    return get_u16(bios, ptr)|(((uint32_t)get_u16(bios, ptr+2))<<16);
+}
+#define U32(ptr) get_u32(ctx->ctx->bios, (ptr))
+#define CU32(ptr) get_u32(ctx->bios, (ptr))
+#define CSTR(ptr) (((char *)(ctx->bios))+(ptr))
+
+#endif
diff --git a/drivers/gpu/drm/radeon/atom-names.h b/drivers/gpu/drm/radeon/atom-names.h
new file mode 100644
index 00000000000..6f907a5ffa5
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atom-names.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#ifndef ATOM_NAMES_H
+#define ATOM_NAMES_H
+
+#include "atom.h"
+
+#ifdef ATOM_DEBUG
+
+#define ATOM_OP_NAMES_CNT 123
+static char *atom_op_names[ATOM_OP_NAMES_CNT] = {
+"RESERVED", "MOVE_REG", "MOVE_PS", "MOVE_WS", "MOVE_FB", "MOVE_PLL",
+"MOVE_MC", "AND_REG", "AND_PS", "AND_WS", "AND_FB", "AND_PLL", "AND_MC",
+"OR_REG", "OR_PS", "OR_WS", "OR_FB", "OR_PLL", "OR_MC", "SHIFT_LEFT_REG",
+"SHIFT_LEFT_PS", "SHIFT_LEFT_WS", "SHIFT_LEFT_FB", "SHIFT_LEFT_PLL",
+"SHIFT_LEFT_MC", "SHIFT_RIGHT_REG", "SHIFT_RIGHT_PS", "SHIFT_RIGHT_WS",
+"SHIFT_RIGHT_FB", "SHIFT_RIGHT_PLL", "SHIFT_RIGHT_MC", "MUL_REG",
+"MUL_PS", "MUL_WS", "MUL_FB", "MUL_PLL", "MUL_MC", "DIV_REG", "DIV_PS",
+"DIV_WS", "DIV_FB", "DIV_PLL", "DIV_MC", "ADD_REG", "ADD_PS", "ADD_WS",
+"ADD_FB", "ADD_PLL", "ADD_MC", "SUB_REG", "SUB_PS", "SUB_WS", "SUB_FB",
+"SUB_PLL", "SUB_MC", "SET_ATI_PORT", "SET_PCI_PORT", "SET_SYS_IO_PORT",
+"SET_REG_BLOCK", "SET_FB_BASE", "COMPARE_REG", "COMPARE_PS",
+"COMPARE_WS", "COMPARE_FB", "COMPARE_PLL", "COMPARE_MC", "SWITCH",
+"JUMP", "JUMP_EQUAL", "JUMP_BELOW", "JUMP_ABOVE", "JUMP_BELOW_OR_EQUAL",
+"JUMP_ABOVE_OR_EQUAL", "JUMP_NOT_EQUAL", "TEST_REG", "TEST_PS", "TEST_WS",
+"TEST_FB", "TEST_PLL", "TEST_MC", "DELAY_MILLISEC", "DELAY_MICROSEC",
+"CALL_TABLE", "REPEAT", "CLEAR_REG", "CLEAR_PS", "CLEAR_WS", "CLEAR_FB",
+"CLEAR_PLL", "CLEAR_MC", "NOP", "EOT", "MASK_REG", "MASK_PS", "MASK_WS",
+"MASK_FB", "MASK_PLL", "MASK_MC", "POST_CARD", "BEEP", "SAVE_REG",
+"RESTORE_REG", "SET_DATA_BLOCK", "XOR_REG", "XOR_PS", "XOR_WS", "XOR_FB",
+"XOR_PLL", "XOR_MC", "SHL_REG", "SHL_PS", "SHL_WS", "SHL_FB", "SHL_PLL",
+"SHL_MC", "SHR_REG", "SHR_PS", "SHR_WS", "SHR_FB", "SHR_PLL", "SHR_MC",
+"DEBUG", "CTB_DS",
+};
+
+#define ATOM_TABLE_NAMES_CNT 74
+static char *atom_table_names[ATOM_TABLE_NAMES_CNT] = {
+"ASIC_Init", "GetDisplaySurfaceSize", "ASIC_RegistersInit",
+"VRAM_BlockVenderDetection", "SetClocksRatio", "MemoryControllerInit",
+"GPIO_PinInit", "MemoryParamAdjust", "DVOEncoderControl",
+"GPIOPinControl", "SetEngineClock", "SetMemoryClock", "SetPixelClock",
+"DynamicClockGating", "ResetMemoryDLL", "ResetMemoryDevice",
+"MemoryPLLInit", "EnableMemorySelfRefresh", "AdjustMemoryController",
+"EnableASIC_StaticPwrMgt", "ASIC_StaticPwrMgtStatusChange",
+"DAC_LoadDetection", "TMDS2EncoderControl", "LCD1OutputControl",
+"DAC1EncoderControl", "DAC2EncoderControl", "DVOOutputControl",
+"CV1OutputControl", "SetCRTC_DPM_State", "TVEncoderControl",
+"TMDS1EncoderControl", "LVDSEncoderControl", "TV1OutputControl",
+"EnableScaler", "BlankCRTC", "EnableCRTC", "GetPixelClock",
+"EnableVGA_Render", "EnableVGA_Access", "SetCRTC_Timing",
+"SetCRTC_OverScan", "SetCRTC_Replication", "SelectCRTC_Source",
+"EnableGraphSurfaces", "UpdateCRTC_DoubleBufferRegisters",
+"LUT_AutoFill", "EnableHW_IconCursor", "GetMemoryClock",
+"GetEngineClock", "SetCRTC_UsingDTDTiming", "TVBootUpStdPinDetection",
+"DFP2OutputControl", "VRAM_BlockDetectionByStrap", "MemoryCleanUp",
+"ReadEDIDFromHWAssistedI2C", "WriteOneByteToHWAssistedI2C",
+"ReadHWAssistedI2CStatus", "SpeedFanControl", "PowerConnectorDetection",
+"MC_Synchronization", "ComputeMemoryEnginePLL", "MemoryRefreshConversion",
+"VRAM_GetCurrentInfoBlock", "DynamicMemorySettings", "MemoryTraining",
+"EnableLVDS_SS", "DFP1OutputControl", "SetVoltage", "CRT1OutputControl",
+"CRT2OutputControl", "SetupHWAssistedI2CStatus", "ClockSource",
+"MemoryDeviceInit", "EnableYUV",
+};
+
+#define ATOM_IO_NAMES_CNT 5
+static char *atom_io_names[ATOM_IO_NAMES_CNT] = {
+"MM", "PLL", "MC", "PCIE", "PCIE PORT",
+};
+
+#else
+
+#define ATOM_OP_NAMES_CNT 0
+#define ATOM_TABLE_NAMES_CNT 0
+#define ATOM_IO_NAMES_CNT 0
+
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/radeon/atom-types.h b/drivers/gpu/drm/radeon/atom-types.h
new file mode 100644
index 00000000000..1125b866cdb
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atom-types.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Dave Airlie
+ */
+
+#ifndef ATOM_TYPES_H
+#define ATOM_TYPES_H
+
+/* sync atom types to kernel types */
+
+typedef uint16_t USHORT;
+typedef uint32_t ULONG;
+typedef uint8_t UCHAR;
+
+
+#ifndef ATOM_BIG_ENDIAN
+#if defined(__BIG_ENDIAN)
+#define ATOM_BIG_ENDIAN 1
+#else
+#define ATOM_BIG_ENDIAN 0
+#endif
+#endif
+#endif
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
new file mode 100644
index 00000000000..901befe03da
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -0,0 +1,1215 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#define ATOM_DEBUG
+
+#include "atom.h"
+#include "atom-names.h"
+#include "atom-bits.h"
+
+#define ATOM_COND_ABOVE		0
+#define ATOM_COND_ABOVEOREQUAL	1
+#define ATOM_COND_ALWAYS	2
+#define ATOM_COND_BELOW		3
+#define ATOM_COND_BELOWOREQUAL	4
+#define ATOM_COND_EQUAL		5
+#define ATOM_COND_NOTEQUAL	6
+
+#define ATOM_PORT_ATI	0
+#define ATOM_PORT_PCI	1
+#define ATOM_PORT_SYSIO	2
+
+#define ATOM_UNIT_MICROSEC	0
+#define ATOM_UNIT_MILLISEC	1
+
+#define PLL_INDEX	2
+#define PLL_DATA	3
+
+typedef struct {
+	struct atom_context *ctx;
+
+	uint32_t *ps, *ws;
+	int ps_shift;
+	uint16_t start;
+} atom_exec_context;
+
+int atom_debug = 0;
+void atom_execute_table(struct atom_context *ctx, int index, uint32_t * params);
+
+static uint32_t atom_arg_mask[8] =
+    { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
+0xFF000000 };
+static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 };
+
+static int atom_dst_to_src[8][4] = {
+	/* translate destination alignment field to the source alignment encoding */
+	{0, 0, 0, 0},
+	{1, 2, 3, 0},
+	{1, 2, 3, 0},
+	{1, 2, 3, 0},
+	{4, 5, 6, 7},
+	{4, 5, 6, 7},
+	{4, 5, 6, 7},
+	{4, 5, 6, 7},
+};
+static int atom_def_dst[8] = { 0, 0, 1, 2, 0, 1, 2, 3 };
+
+static int debug_depth = 0;
+#ifdef ATOM_DEBUG
+static void debug_print_spaces(int n)
+{
+	while (n--)
+		printk("   ");
+}
+
+#define DEBUG(...) do if (atom_debug) { printk(KERN_DEBUG __VA_ARGS__); } while (0)
+#define SDEBUG(...) do if (atom_debug) { printk(KERN_DEBUG); debug_print_spaces(debug_depth); printk(__VA_ARGS__); } while (0)
+#else
+#define DEBUG(...) do { } while (0)
+#define SDEBUG(...) do { } while (0)
+#endif
+
+static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
+				 uint32_t index, uint32_t data)
+{
+	uint32_t temp = 0xCDCDCDCD;
+	while (1)
+		switch (CU8(base)) {
+		case ATOM_IIO_NOP:
+			base++;
+			break;
+		case ATOM_IIO_READ:
+			temp = ctx->card->reg_read(ctx->card, CU16(base + 1));
+			base += 3;
+			break;
+		case ATOM_IIO_WRITE:
+			ctx->card->reg_write(ctx->card, CU16(base + 1), temp);
+			base += 3;
+			break;
+		case ATOM_IIO_CLEAR:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 2));
+			base += 3;
+			break;
+		case ATOM_IIO_SET:
+			temp |=
+			    (0xFFFFFFFF >> (32 - CU8(base + 1))) << CU8(base +
+									2);
+			base += 3;
+			break;
+		case ATOM_IIO_MOVE_INDEX:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 2));
+			temp |=
+			    ((index >> CU8(base + 2)) &
+			     (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
+									  3);
+			base += 4;
+			break;
+		case ATOM_IIO_MOVE_DATA:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 2));
+			temp |=
+			    ((data >> CU8(base + 2)) &
+			     (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
+									  3);
+			base += 4;
+			break;
+		case ATOM_IIO_MOVE_ATTR:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 2));
+			temp |=
+			    ((ctx->
+			      io_attr >> CU8(base + 2)) & (0xFFFFFFFF >> (32 -
+									  CU8
+									  (base
+									   +
+									   1))))
+			    << CU8(base + 3);
+			base += 4;
+			break;
+		case ATOM_IIO_END:
+			return temp;
+		default:
+			printk(KERN_INFO "Unknown IIO opcode.\n");
+			return 0;
+		}
+}
+
+static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
+				 int *ptr, uint32_t *saved, int print)
+{
+	uint32_t idx, val = 0xCDCDCDCD, align, arg;
+	struct atom_context *gctx = ctx->ctx;
+	arg = attr & 7;
+	align = (attr >> 3) & 7;
+	switch (arg) {
+	case ATOM_ARG_REG:
+		idx = U16(*ptr);
+		(*ptr) += 2;
+		if (print)
+			DEBUG("REG[0x%04X]", idx);
+		idx += gctx->reg_block;
+		switch (gctx->io_mode) {
+		case ATOM_IO_MM:
+			val = gctx->card->reg_read(gctx->card, idx);
+			break;
+		case ATOM_IO_PCI:
+			printk(KERN_INFO
+			       "PCI registers are not implemented.\n");
+			return 0;
+		case ATOM_IO_SYSIO:
+			printk(KERN_INFO
+			       "SYSIO registers are not implemented.\n");
+			return 0;
+		default:
+			if (!(gctx->io_mode & 0x80)) {
+				printk(KERN_INFO "Bad IO mode.\n");
+				return 0;
+			}
+			if (!gctx->iio[gctx->io_mode & 0x7F]) {
+				printk(KERN_INFO
+				       "Undefined indirect IO read method %d.\n",
+				       gctx->io_mode & 0x7F);
+				return 0;
+			}
+			val =
+			    atom_iio_execute(gctx,
+					     gctx->iio[gctx->io_mode & 0x7F],
+					     idx, 0);
+		}
+		break;
+	case ATOM_ARG_PS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		val = le32_to_cpu(ctx->ps[idx]);
+		if (print)
+			DEBUG("PS[0x%02X,0x%04X]", idx, val);
+		break;
+	case ATOM_ARG_WS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("WS[0x%02X]", idx);
+		switch (idx) {
+		case ATOM_WS_QUOTIENT:
+			val = gctx->divmul[0];
+			break;
+		case ATOM_WS_REMAINDER:
+			val = gctx->divmul[1];
+			break;
+		case ATOM_WS_DATAPTR:
+			val = gctx->data_block;
+			break;
+		case ATOM_WS_SHIFT:
+			val = gctx->shift;
+			break;
+		case ATOM_WS_OR_MASK:
+			val = 1 << gctx->shift;
+			break;
+		case ATOM_WS_AND_MASK:
+			val = ~(1 << gctx->shift);
+			break;
+		case ATOM_WS_FB_WINDOW:
+			val = gctx->fb_base;
+			break;
+		case ATOM_WS_ATTRIBUTES:
+			val = gctx->io_attr;
+			break;
+		default:
+			val = ctx->ws[idx];
+		}
+		break;
+	case ATOM_ARG_ID:
+		idx = U16(*ptr);
+		(*ptr) += 2;
+		if (print) {
+			if (gctx->data_block)
+				DEBUG("ID[0x%04X+%04X]", idx, gctx->data_block);
+			else
+				DEBUG("ID[0x%04X]", idx);
+		}
+		val = U32(idx + gctx->data_block);
+		break;
+	case ATOM_ARG_FB:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("FB[0x%02X]", idx);
+		printk(KERN_INFO "FB access is not implemented.\n");
+		return 0;
+	case ATOM_ARG_IMM:
+		switch (align) {
+		case ATOM_SRC_DWORD:
+			val = U32(*ptr);
+			(*ptr) += 4;
+			if (print)
+				DEBUG("IMM 0x%08X\n", val);
+			return val;
+		case ATOM_SRC_WORD0:
+		case ATOM_SRC_WORD8:
+		case ATOM_SRC_WORD16:
+			val = U16(*ptr);
+			(*ptr) += 2;
+			if (print)
+				DEBUG("IMM 0x%04X\n", val);
+			return val;
+		case ATOM_SRC_BYTE0:
+		case ATOM_SRC_BYTE8:
+		case ATOM_SRC_BYTE16:
+		case ATOM_SRC_BYTE24:
+			val = U8(*ptr);
+			(*ptr)++;
+			if (print)
+				DEBUG("IMM 0x%02X\n", val);
+			return val;
+		}
+		return 0;
+	case ATOM_ARG_PLL:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("PLL[0x%02X]", idx);
+		val = gctx->card->pll_read(gctx->card, idx);
+		break;
+	case ATOM_ARG_MC:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("MC[0x%02X]", idx);
+		val = gctx->card->mc_read(gctx->card, idx);
+		break;
+	}
+	if (saved)
+		*saved = val;
+	val &= atom_arg_mask[align];
+	val >>= atom_arg_shift[align];
+	if (print)
+		switch (align) {
+		case ATOM_SRC_DWORD:
+			DEBUG(".[31:0] -> 0x%08X\n", val);
+			break;
+		case ATOM_SRC_WORD0:
+			DEBUG(".[15:0] -> 0x%04X\n", val);
+			break;
+		case ATOM_SRC_WORD8:
+			DEBUG(".[23:8] -> 0x%04X\n", val);
+			break;
+		case ATOM_SRC_WORD16:
+			DEBUG(".[31:16] -> 0x%04X\n", val);
+			break;
+		case ATOM_SRC_BYTE0:
+			DEBUG(".[7:0] -> 0x%02X\n", val);
+			break;
+		case ATOM_SRC_BYTE8:
+			DEBUG(".[15:8] -> 0x%02X\n", val);
+			break;
+		case ATOM_SRC_BYTE16:
+			DEBUG(".[23:16] -> 0x%02X\n", val);
+			break;
+		case ATOM_SRC_BYTE24:
+			DEBUG(".[31:24] -> 0x%02X\n", val);
+			break;
+		}
+	return val;
+}
+
+static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
+{
+	uint32_t align = (attr >> 3) & 7, arg = attr & 7;
+	switch (arg) {
+	case ATOM_ARG_REG:
+	case ATOM_ARG_ID:
+		(*ptr) += 2;
+		break;
+	case ATOM_ARG_PLL:
+	case ATOM_ARG_MC:
+	case ATOM_ARG_PS:
+	case ATOM_ARG_WS:
+	case ATOM_ARG_FB:
+		(*ptr)++;
+		break;
+	case ATOM_ARG_IMM:
+		switch (align) {
+		case ATOM_SRC_DWORD:
+			(*ptr) += 4;
+			return;
+		case ATOM_SRC_WORD0:
+		case ATOM_SRC_WORD8:
+		case ATOM_SRC_WORD16:
+			(*ptr) += 2;
+			return;
+		case ATOM_SRC_BYTE0:
+		case ATOM_SRC_BYTE8:
+		case ATOM_SRC_BYTE16:
+		case ATOM_SRC_BYTE24:
+			(*ptr)++;
+			return;
+		}
+		return;
+	}
+}
+
+static uint32_t atom_get_src(atom_exec_context *ctx, uint8_t attr, int *ptr)
+{
+	return atom_get_src_int(ctx, attr, ptr, NULL, 1);
+}
+
+static uint32_t atom_get_dst(atom_exec_context *ctx, int arg, uint8_t attr,
+			     int *ptr, uint32_t *saved, int print)
+{
+	return atom_get_src_int(ctx,
+				arg | atom_dst_to_src[(attr >> 3) &
+						      7][(attr >> 6) & 3] << 3,
+				ptr, saved, print);
+}
+
+static void atom_skip_dst(atom_exec_context *ctx, int arg, uint8_t attr, int *ptr)
+{
+	atom_skip_src_int(ctx,
+			  arg | atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) &
+								 3] << 3, ptr);
+}
+
+static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
+			 int *ptr, uint32_t val, uint32_t saved)
+{
+	uint32_t align =
+	    atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3], old_val =
+	    val, idx;
+	struct atom_context *gctx = ctx->ctx;
+	old_val &= atom_arg_mask[align] >> atom_arg_shift[align];
+	val <<= atom_arg_shift[align];
+	val &= atom_arg_mask[align];
+	saved &= ~atom_arg_mask[align];
+	val |= saved;
+	switch (arg) {
+	case ATOM_ARG_REG:
+		idx = U16(*ptr);
+		(*ptr) += 2;
+		DEBUG("REG[0x%04X]", idx);
+		idx += gctx->reg_block;
+		switch (gctx->io_mode) {
+		case ATOM_IO_MM:
+			if (idx == 0)
+				gctx->card->reg_write(gctx->card, idx,
+						      val << 2);
+			else
+				gctx->card->reg_write(gctx->card, idx, val);
+			break;
+		case ATOM_IO_PCI:
+			printk(KERN_INFO
+			       "PCI registers are not implemented.\n");
+			return;
+		case ATOM_IO_SYSIO:
+			printk(KERN_INFO
+			       "SYSIO registers are not implemented.\n");
+			return;
+		default:
+			if (!(gctx->io_mode & 0x80)) {
+				printk(KERN_INFO "Bad IO mode.\n");
+				return;
+			}
+			if (!gctx->iio[gctx->io_mode & 0xFF]) {
+				printk(KERN_INFO
+				       "Undefined indirect IO write method %d.\n",
+				       gctx->io_mode & 0x7F);
+				return;
+			}
+			atom_iio_execute(gctx, gctx->iio[gctx->io_mode & 0xFF],
+					 idx, val);
+		}
+		break;
+	case ATOM_ARG_PS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("PS[0x%02X]", idx);
+		ctx->ps[idx] = cpu_to_le32(val);
+		break;
+	case ATOM_ARG_WS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("WS[0x%02X]", idx);
+		switch (idx) {
+		case ATOM_WS_QUOTIENT:
+			gctx->divmul[0] = val;
+			break;
+		case ATOM_WS_REMAINDER:
+			gctx->divmul[1] = val;
+			break;
+		case ATOM_WS_DATAPTR:
+			gctx->data_block = val;
+			break;
+		case ATOM_WS_SHIFT:
+			gctx->shift = val;
+			break;
+		case ATOM_WS_OR_MASK:
+		case ATOM_WS_AND_MASK:
+			break;
+		case ATOM_WS_FB_WINDOW:
+			gctx->fb_base = val;
+			break;
+		case ATOM_WS_ATTRIBUTES:
+			gctx->io_attr = val;
+			break;
+		default:
+			ctx->ws[idx] = val;
+		}
+		break;
+	case ATOM_ARG_FB:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("FB[0x%02X]", idx);
+		printk(KERN_INFO "FB access is not implemented.\n");
+		return;
+	case ATOM_ARG_PLL:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("PLL[0x%02X]", idx);
+		gctx->card->pll_write(gctx->card, idx, val);
+		break;
+	case ATOM_ARG_MC:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("MC[0x%02X]", idx);
+		gctx->card->mc_write(gctx->card, idx, val);
+		return;
+	}
+	switch (align) {
+	case ATOM_SRC_DWORD:
+		DEBUG(".[31:0] <- 0x%08X\n", old_val);
+		break;
+	case ATOM_SRC_WORD0:
+		DEBUG(".[15:0] <- 0x%04X\n", old_val);
+		break;
+	case ATOM_SRC_WORD8:
+		DEBUG(".[23:8] <- 0x%04X\n", old_val);
+		break;
+	case ATOM_SRC_WORD16:
+		DEBUG(".[31:16] <- 0x%04X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE0:
+		DEBUG(".[7:0] <- 0x%02X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE8:
+		DEBUG(".[15:8] <- 0x%02X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE16:
+		DEBUG(".[23:16] <- 0x%02X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE24:
+		DEBUG(".[31:24] <- 0x%02X\n", old_val);
+		break;
+	}
+}
+
+static void atom_op_add(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst += src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_and(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst &= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_beep(atom_exec_context *ctx, int *ptr, int arg)
+{
+	printk("ATOM BIOS beeped!\n");
+}
+
+static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int idx = U8((*ptr)++);
+	if (idx < ATOM_TABLE_NAMES_CNT)
+		SDEBUG("   table: %d (%s)\n", idx, atom_table_names[idx]);
+	else
+		SDEBUG("   table: %d\n", idx);
+	if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
+		atom_execute_table(ctx->ctx, idx, ctx->ps + ctx->ps_shift);
+}
+
+static void atom_op_clear(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t saved;
+	int dptr = *ptr;
+	attr &= 0x38;
+	attr |= atom_def_dst[attr >> 3] << 6;
+	atom_get_dst(ctx, arg, attr, ptr, &saved, 0);
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, 0, saved);
+}
+
+static void atom_op_compare(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	ctx->ctx->cs_equal = (dst == src);
+	ctx->ctx->cs_above = (dst > src);
+	SDEBUG("   result: %s %s\n", ctx->ctx->cs_equal ? "EQ" : "NE",
+	       ctx->ctx->cs_above ? "GT" : "LE");
+}
+
+static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t count = U8((*ptr)++);
+	SDEBUG("   count: %d\n", count);
+	if (arg == ATOM_UNIT_MICROSEC)
+		schedule_timeout_uninterruptible(usecs_to_jiffies(count));
+	else
+		schedule_timeout_uninterruptible(msecs_to_jiffies(count));
+}
+
+static void atom_op_div(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	if (src != 0) {
+		ctx->ctx->divmul[0] = dst / src;
+		ctx->ctx->divmul[1] = dst % src;
+	} else {
+		ctx->ctx->divmul[0] = 0;
+		ctx->ctx->divmul[1] = 0;
+	}
+}
+
+static void atom_op_eot(atom_exec_context *ctx, int *ptr, int arg)
+{
+	/* functionally, a nop */
+}
+
+static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int execute = 0, target = U16(*ptr);
+	(*ptr) += 2;
+	switch (arg) {
+	case ATOM_COND_ABOVE:
+		execute = ctx->ctx->cs_above;
+		break;
+	case ATOM_COND_ABOVEOREQUAL:
+		execute = ctx->ctx->cs_above || ctx->ctx->cs_equal;
+		break;
+	case ATOM_COND_ALWAYS:
+		execute = 1;
+		break;
+	case ATOM_COND_BELOW:
+		execute = !(ctx->ctx->cs_above || ctx->ctx->cs_equal);
+		break;
+	case ATOM_COND_BELOWOREQUAL:
+		execute = !ctx->ctx->cs_above;
+		break;
+	case ATOM_COND_EQUAL:
+		execute = ctx->ctx->cs_equal;
+		break;
+	case ATOM_COND_NOTEQUAL:
+		execute = !ctx->ctx->cs_equal;
+		break;
+	}
+	if (arg != ATOM_COND_ALWAYS)
+		SDEBUG("   taken: %s\n", execute ? "yes" : "no");
+	SDEBUG("   target: 0x%04X\n", target);
+	if (execute)
+		*ptr = ctx->start + target;
+}
+
+static void atom_op_mask(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src1, src2, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src1: ");
+	src1 = atom_get_src(ctx, attr, ptr);
+	SDEBUG("   src2: ");
+	src2 = atom_get_src(ctx, attr, ptr);
+	dst &= src1;
+	dst |= src2;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_move(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t src, saved;
+	int dptr = *ptr;
+	if (((attr >> 3) & 7) != ATOM_SRC_DWORD)
+		atom_get_dst(ctx, arg, attr, ptr, &saved, 0);
+	else {
+		atom_skip_dst(ctx, arg, attr, ptr);
+		saved = 0xCDCDCDCD;
+	}
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, src, saved);
+}
+
+static void atom_op_mul(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	ctx->ctx->divmul[0] = dst * src;
+}
+
+static void atom_op_nop(atom_exec_context *ctx, int *ptr, int arg)
+{
+	/* nothing */
+}
+
+static void atom_op_or(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst |= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_postcard(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t val = U8((*ptr)++);
+	SDEBUG("POST card output: 0x%02X\n", val);
+}
+
+static void atom_op_repeat(atom_exec_context *ctx, int *ptr, int arg)
+{
+	printk(KERN_INFO "unimplemented!\n");
+}
+
+static void atom_op_restorereg(atom_exec_context *ctx, int *ptr, int arg)
+{
+	printk(KERN_INFO "unimplemented!\n");
+}
+
+static void atom_op_savereg(atom_exec_context *ctx, int *ptr, int arg)
+{
+	printk(KERN_INFO "unimplemented!\n");
+}
+
+static void atom_op_setdatablock(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int idx = U8(*ptr);
+	(*ptr)++;
+	SDEBUG("   block: %d\n", idx);
+	if (!idx)
+		ctx->ctx->data_block = 0;
+	else if (idx == 255)
+		ctx->ctx->data_block = ctx->start;
+	else
+		ctx->ctx->data_block = U16(ctx->ctx->data_table + 4 + 2 * idx);
+	SDEBUG("   base: 0x%04X\n", ctx->ctx->data_block);
+}
+
+static void atom_op_setfbbase(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	SDEBUG("   fb_base: ");
+	ctx->ctx->fb_base = atom_get_src(ctx, attr, ptr);
+}
+
+static void atom_op_setport(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int port;
+	switch (arg) {
+	case ATOM_PORT_ATI:
+		port = U16(*ptr);
+		if (port < ATOM_IO_NAMES_CNT)
+			SDEBUG("   port: %d (%s)\n", port, atom_io_names[port]);
+		else
+			SDEBUG("   port: %d\n", port);
+		if (!port)
+			ctx->ctx->io_mode = ATOM_IO_MM;
+		else
+			ctx->ctx->io_mode = ATOM_IO_IIO | port;
+		(*ptr) += 2;
+		break;
+	case ATOM_PORT_PCI:
+		ctx->ctx->io_mode = ATOM_IO_PCI;
+		(*ptr)++;
+		break;
+	case ATOM_PORT_SYSIO:
+		ctx->ctx->io_mode = ATOM_IO_SYSIO;
+		(*ptr)++;
+		break;
+	}
+}
+
+static void atom_op_setregblock(atom_exec_context *ctx, int *ptr, int arg)
+{
+	ctx->ctx->reg_block = U16(*ptr);
+	(*ptr) += 2;
+	SDEBUG("   base: 0x%04X\n", ctx->ctx->reg_block);
+}
+
+static void atom_op_shl(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++), shift;
+	uint32_t saved, dst;
+	int dptr = *ptr;
+	attr &= 0x38;
+	attr |= atom_def_dst[attr >> 3] << 6;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	shift = U8((*ptr)++);
+	SDEBUG("   shift: %d\n", shift);
+	dst <<= shift;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shr(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++), shift;
+	uint32_t saved, dst;
+	int dptr = *ptr;
+	attr &= 0x38;
+	attr |= atom_def_dst[attr >> 3] << 6;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	shift = U8((*ptr)++);
+	SDEBUG("   shift: %d\n", shift);
+	dst >>= shift;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_sub(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst -= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_switch(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t src, val, target;
+	SDEBUG("   switch: ");
+	src = atom_get_src(ctx, attr, ptr);
+	while (U16(*ptr) != ATOM_CASE_END)
+		if (U8(*ptr) == ATOM_CASE_MAGIC) {
+			(*ptr)++;
+			SDEBUG("   case: ");
+			val =
+			    atom_get_src(ctx, (attr & 0x38) | ATOM_ARG_IMM,
+					 ptr);
+			target = U16(*ptr);
+			if (val == src) {
+				SDEBUG("   target: %04X\n", target);
+				*ptr = ctx->start + target;
+				return;
+			}
+			(*ptr) += 2;
+		} else {
+			printk(KERN_INFO "Bad case.\n");
+			return;
+		}
+	(*ptr) += 2;
+}
+
+static void atom_op_test(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	ctx->ctx->cs_equal = ((dst & src) == 0);
+	SDEBUG("   result: %s\n", ctx->ctx->cs_equal ? "EQ" : "NE");
+}
+
+static void atom_op_xor(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst ^= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_debug(atom_exec_context *ctx, int *ptr, int arg)
+{
+	printk(KERN_INFO "unimplemented!\n");
+}
+
+static struct {
+	void (*func) (atom_exec_context *, int *, int);
+	int arg;
+} opcode_table[ATOM_OP_CNT] = {
+	{
+	NULL, 0}, {
+	atom_op_move, ATOM_ARG_REG}, {
+	atom_op_move, ATOM_ARG_PS}, {
+	atom_op_move, ATOM_ARG_WS}, {
+	atom_op_move, ATOM_ARG_FB}, {
+	atom_op_move, ATOM_ARG_PLL}, {
+	atom_op_move, ATOM_ARG_MC}, {
+	atom_op_and, ATOM_ARG_REG}, {
+	atom_op_and, ATOM_ARG_PS}, {
+	atom_op_and, ATOM_ARG_WS}, {
+	atom_op_and, ATOM_ARG_FB}, {
+	atom_op_and, ATOM_ARG_PLL}, {
+	atom_op_and, ATOM_ARG_MC}, {
+	atom_op_or, ATOM_ARG_REG}, {
+	atom_op_or, ATOM_ARG_PS}, {
+	atom_op_or, ATOM_ARG_WS}, {
+	atom_op_or, ATOM_ARG_FB}, {
+	atom_op_or, ATOM_ARG_PLL}, {
+	atom_op_or, ATOM_ARG_MC}, {
+	atom_op_shl, ATOM_ARG_REG}, {
+	atom_op_shl, ATOM_ARG_PS}, {
+	atom_op_shl, ATOM_ARG_WS}, {
+	atom_op_shl, ATOM_ARG_FB}, {
+	atom_op_shl, ATOM_ARG_PLL}, {
+	atom_op_shl, ATOM_ARG_MC}, {
+	atom_op_shr, ATOM_ARG_REG}, {
+	atom_op_shr, ATOM_ARG_PS}, {
+	atom_op_shr, ATOM_ARG_WS}, {
+	atom_op_shr, ATOM_ARG_FB}, {
+	atom_op_shr, ATOM_ARG_PLL}, {
+	atom_op_shr, ATOM_ARG_MC}, {
+	atom_op_mul, ATOM_ARG_REG}, {
+	atom_op_mul, ATOM_ARG_PS}, {
+	atom_op_mul, ATOM_ARG_WS}, {
+	atom_op_mul, ATOM_ARG_FB}, {
+	atom_op_mul, ATOM_ARG_PLL}, {
+	atom_op_mul, ATOM_ARG_MC}, {
+	atom_op_div, ATOM_ARG_REG}, {
+	atom_op_div, ATOM_ARG_PS}, {
+	atom_op_div, ATOM_ARG_WS}, {
+	atom_op_div, ATOM_ARG_FB}, {
+	atom_op_div, ATOM_ARG_PLL}, {
+	atom_op_div, ATOM_ARG_MC}, {
+	atom_op_add, ATOM_ARG_REG}, {
+	atom_op_add, ATOM_ARG_PS}, {
+	atom_op_add, ATOM_ARG_WS}, {
+	atom_op_add, ATOM_ARG_FB}, {
+	atom_op_add, ATOM_ARG_PLL}, {
+	atom_op_add, ATOM_ARG_MC}, {
+	atom_op_sub, ATOM_ARG_REG}, {
+	atom_op_sub, ATOM_ARG_PS}, {
+	atom_op_sub, ATOM_ARG_WS}, {
+	atom_op_sub, ATOM_ARG_FB}, {
+	atom_op_sub, ATOM_ARG_PLL}, {
+	atom_op_sub, ATOM_ARG_MC}, {
+	atom_op_setport, ATOM_PORT_ATI}, {
+	atom_op_setport, ATOM_PORT_PCI}, {
+	atom_op_setport, ATOM_PORT_SYSIO}, {
+	atom_op_setregblock, 0}, {
+	atom_op_setfbbase, 0}, {
+	atom_op_compare, ATOM_ARG_REG}, {
+	atom_op_compare, ATOM_ARG_PS}, {
+	atom_op_compare, ATOM_ARG_WS}, {
+	atom_op_compare, ATOM_ARG_FB}, {
+	atom_op_compare, ATOM_ARG_PLL}, {
+	atom_op_compare, ATOM_ARG_MC}, {
+	atom_op_switch, 0}, {
+	atom_op_jump, ATOM_COND_ALWAYS}, {
+	atom_op_jump, ATOM_COND_EQUAL}, {
+	atom_op_jump, ATOM_COND_BELOW}, {
+	atom_op_jump, ATOM_COND_ABOVE}, {
+	atom_op_jump, ATOM_COND_BELOWOREQUAL}, {
+	atom_op_jump, ATOM_COND_ABOVEOREQUAL}, {
+	atom_op_jump, ATOM_COND_NOTEQUAL}, {
+	atom_op_test, ATOM_ARG_REG}, {
+	atom_op_test, ATOM_ARG_PS}, {
+	atom_op_test, ATOM_ARG_WS}, {
+	atom_op_test, ATOM_ARG_FB}, {
+	atom_op_test, ATOM_ARG_PLL}, {
+	atom_op_test, ATOM_ARG_MC}, {
+	atom_op_delay, ATOM_UNIT_MILLISEC}, {
+	atom_op_delay, ATOM_UNIT_MICROSEC}, {
+	atom_op_calltable, 0}, {
+	atom_op_repeat, 0}, {
+	atom_op_clear, ATOM_ARG_REG}, {
+	atom_op_clear, ATOM_ARG_PS}, {
+	atom_op_clear, ATOM_ARG_WS}, {
+	atom_op_clear, ATOM_ARG_FB}, {
+	atom_op_clear, ATOM_ARG_PLL}, {
+	atom_op_clear, ATOM_ARG_MC}, {
+	atom_op_nop, 0}, {
+	atom_op_eot, 0}, {
+	atom_op_mask, ATOM_ARG_REG}, {
+	atom_op_mask, ATOM_ARG_PS}, {
+	atom_op_mask, ATOM_ARG_WS}, {
+	atom_op_mask, ATOM_ARG_FB}, {
+	atom_op_mask, ATOM_ARG_PLL}, {
+	atom_op_mask, ATOM_ARG_MC}, {
+	atom_op_postcard, 0}, {
+	atom_op_beep, 0}, {
+	atom_op_savereg, 0}, {
+	atom_op_restorereg, 0}, {
+	atom_op_setdatablock, 0}, {
+	atom_op_xor, ATOM_ARG_REG}, {
+	atom_op_xor, ATOM_ARG_PS}, {
+	atom_op_xor, ATOM_ARG_WS}, {
+	atom_op_xor, ATOM_ARG_FB}, {
+	atom_op_xor, ATOM_ARG_PLL}, {
+	atom_op_xor, ATOM_ARG_MC}, {
+	atom_op_shl, ATOM_ARG_REG}, {
+	atom_op_shl, ATOM_ARG_PS}, {
+	atom_op_shl, ATOM_ARG_WS}, {
+	atom_op_shl, ATOM_ARG_FB}, {
+	atom_op_shl, ATOM_ARG_PLL}, {
+	atom_op_shl, ATOM_ARG_MC}, {
+	atom_op_shr, ATOM_ARG_REG}, {
+	atom_op_shr, ATOM_ARG_PS}, {
+	atom_op_shr, ATOM_ARG_WS}, {
+	atom_op_shr, ATOM_ARG_FB}, {
+	atom_op_shr, ATOM_ARG_PLL}, {
+	atom_op_shr, ATOM_ARG_MC}, {
+atom_op_debug, 0},};
+
+void atom_execute_table(struct atom_context *ctx, int index, uint32_t * params)
+{
+	int base = CU16(ctx->cmd_table + 4 + 2 * index);
+	int len, ws, ps, ptr;
+	unsigned char op;
+	atom_exec_context ectx;
+
+	if (!base)
+		return;
+
+	len = CU16(base + ATOM_CT_SIZE_PTR);
+	ws = CU8(base + ATOM_CT_WS_PTR);
+	ps = CU8(base + ATOM_CT_PS_PTR) & ATOM_CT_PS_MASK;
+	ptr = base + ATOM_CT_CODE_PTR;
+
+	SDEBUG(">> execute %04X (len %d, WS %d, PS %d)\n", base, len, ws, ps);
+
+	/* reset reg block */
+	ctx->reg_block = 0;
+	ectx.ctx = ctx;
+	ectx.ps_shift = ps / 4;
+	ectx.start = base;
+	ectx.ps = params;
+	if (ws)
+		ectx.ws = kzalloc(4 * ws, GFP_KERNEL);
+	else
+		ectx.ws = NULL;
+
+	debug_depth++;
+	while (1) {
+		op = CU8(ptr++);
+		if (op < ATOM_OP_NAMES_CNT)
+			SDEBUG("%s @ 0x%04X\n", atom_op_names[op], ptr - 1);
+		else
+			SDEBUG("[%d] @ 0x%04X\n", op, ptr - 1);
+
+		if (op < ATOM_OP_CNT && op > 0)
+			opcode_table[op].func(&ectx, &ptr,
+					      opcode_table[op].arg);
+		else
+			break;
+
+		if (op == ATOM_OP_EOT)
+			break;
+	}
+	debug_depth--;
+	SDEBUG("<<\n");
+
+	if (ws)
+		kfree(ectx.ws);
+}
+
+static int atom_iio_len[] = { 1, 2, 3, 3, 3, 3, 4, 4, 4, 3 };
+
+static void atom_index_iio(struct atom_context *ctx, int base)
+{
+	ctx->iio = kzalloc(2 * 256, GFP_KERNEL);
+	while (CU8(base) == ATOM_IIO_START) {
+		ctx->iio[CU8(base + 1)] = base + 2;
+		base += 2;
+		while (CU8(base) != ATOM_IIO_END)
+			base += atom_iio_len[CU8(base)];
+		base += 3;
+	}
+}
+
+struct atom_context *atom_parse(struct card_info *card, void *bios)
+{
+	int base;
+	struct atom_context *ctx =
+	    kzalloc(sizeof(struct atom_context), GFP_KERNEL);
+	char *str;
+	char name[512];
+	int i;
+
+	ctx->card = card;
+	ctx->bios = bios;
+
+	if (CU16(0) != ATOM_BIOS_MAGIC) {
+		printk(KERN_INFO "Invalid BIOS magic.\n");
+		kfree(ctx);
+		return NULL;
+	}
+	if (strncmp
+	    (CSTR(ATOM_ATI_MAGIC_PTR), ATOM_ATI_MAGIC,
+	     strlen(ATOM_ATI_MAGIC))) {
+		printk(KERN_INFO "Invalid ATI magic.\n");
+		kfree(ctx);
+		return NULL;
+	}
+
+	base = CU16(ATOM_ROM_TABLE_PTR);
+	if (strncmp
+	    (CSTR(base + ATOM_ROM_MAGIC_PTR), ATOM_ROM_MAGIC,
+	     strlen(ATOM_ROM_MAGIC))) {
+		printk(KERN_INFO "Invalid ATOM magic.\n");
+		kfree(ctx);
+		return NULL;
+	}
+
+	ctx->cmd_table = CU16(base + ATOM_ROM_CMD_PTR);
+	ctx->data_table = CU16(base + ATOM_ROM_DATA_PTR);
+	atom_index_iio(ctx, CU16(ctx->data_table + ATOM_DATA_IIO_PTR) + 4);
+
+	str = CSTR(CU16(base + ATOM_ROM_MSG_PTR));
+	while (*str && ((*str == '\n') || (*str == '\r')))
+		str++;
+	/* name string isn't always 0 terminated */
+	for (i = 0; i < 511; i++) {
+		name[i] = str[i];
+		if (name[i] < '.' || name[i] > 'z') {
+			name[i] = 0;
+			break;
+		}
+	}
+	printk(KERN_INFO "ATOM BIOS: %s\n", name);
+
+	return ctx;
+}
+
+int atom_asic_init(struct atom_context *ctx)
+{
+	int hwi = CU16(ctx->data_table + ATOM_DATA_FWI_PTR);
+	uint32_t ps[16];
+	memset(ps, 0, 64);
+
+	ps[0] = cpu_to_le32(CU32(hwi + ATOM_FWI_DEFSCLK_PTR));
+	ps[1] = cpu_to_le32(CU32(hwi + ATOM_FWI_DEFMCLK_PTR));
+	if (!ps[0] || !ps[1])
+		return 1;
+
+	if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
+		return 1;
+	atom_execute_table(ctx, ATOM_CMD_INIT, ps);
+
+	return 0;
+}
+
+void atom_destroy(struct atom_context *ctx)
+{
+	if (ctx->iio)
+		kfree(ctx->iio);
+	kfree(ctx);
+}
+
+void atom_parse_data_header(struct atom_context *ctx, int index,
+			    uint16_t * size, uint8_t * frev, uint8_t * crev,
+			    uint16_t * data_start)
+{
+	int offset = index * 2 + 4;
+	int idx = CU16(ctx->data_table + offset);
+
+	if (size)
+		*size = CU16(idx);
+	if (frev)
+		*frev = CU8(idx + 2);
+	if (crev)
+		*crev = CU8(idx + 3);
+	*data_start = idx;
+	return;
+}
+
+void atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t * frev,
+			   uint8_t * crev)
+{
+	int offset = index * 2 + 4;
+	int idx = CU16(ctx->cmd_table + offset);
+
+	if (frev)
+		*frev = CU8(idx + 2);
+	if (crev)
+		*crev = CU8(idx + 3);
+	return;
+}
diff --git a/drivers/gpu/drm/radeon/atom.h b/drivers/gpu/drm/radeon/atom.h
new file mode 100644
index 00000000000..e6eb38f2bca
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atom.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#ifndef ATOM_H
+#define ATOM_H
+
+#include <linux/types.h>
+#include "drmP.h"
+
+#define ATOM_BIOS_MAGIC		0xAA55
+#define ATOM_ATI_MAGIC_PTR	0x30
+#define ATOM_ATI_MAGIC		" 761295520"
+#define ATOM_ROM_TABLE_PTR	0x48
+
+#define ATOM_ROM_MAGIC		"ATOM"
+#define ATOM_ROM_MAGIC_PTR	4
+
+#define ATOM_ROM_MSG_PTR	0x10
+#define ATOM_ROM_CMD_PTR	0x1E
+#define ATOM_ROM_DATA_PTR	0x20
+
+#define ATOM_CMD_INIT		0
+#define ATOM_CMD_SETSCLK	0x0A
+#define ATOM_CMD_SETMCLK	0x0B
+#define ATOM_CMD_SETPCLK	0x0C
+
+#define ATOM_DATA_FWI_PTR	0xC
+#define ATOM_DATA_IIO_PTR	0x32
+
+#define ATOM_FWI_DEFSCLK_PTR	8
+#define ATOM_FWI_DEFMCLK_PTR	0xC
+#define ATOM_FWI_MAXSCLK_PTR	0x24
+#define ATOM_FWI_MAXMCLK_PTR	0x28
+
+#define ATOM_CT_SIZE_PTR	0
+#define ATOM_CT_WS_PTR		4
+#define ATOM_CT_PS_PTR		5
+#define ATOM_CT_PS_MASK		0x7F
+#define ATOM_CT_CODE_PTR	6
+
+#define ATOM_OP_CNT		123
+#define ATOM_OP_EOT		91
+
+#define ATOM_CASE_MAGIC		0x63
+#define ATOM_CASE_END		0x5A5A
+
+#define ATOM_ARG_REG		0
+#define ATOM_ARG_PS		1
+#define ATOM_ARG_WS		2
+#define ATOM_ARG_FB		3
+#define ATOM_ARG_ID		4
+#define ATOM_ARG_IMM		5
+#define ATOM_ARG_PLL		6
+#define ATOM_ARG_MC		7
+
+#define ATOM_SRC_DWORD		0
+#define ATOM_SRC_WORD0		1
+#define ATOM_SRC_WORD8		2
+#define ATOM_SRC_WORD16		3
+#define ATOM_SRC_BYTE0		4
+#define ATOM_SRC_BYTE8		5
+#define ATOM_SRC_BYTE16		6
+#define ATOM_SRC_BYTE24		7
+
+#define ATOM_WS_QUOTIENT	0x40
+#define ATOM_WS_REMAINDER	0x41
+#define ATOM_WS_DATAPTR		0x42
+#define ATOM_WS_SHIFT		0x43
+#define ATOM_WS_OR_MASK		0x44
+#define ATOM_WS_AND_MASK	0x45
+#define ATOM_WS_FB_WINDOW	0x46
+#define ATOM_WS_ATTRIBUTES	0x47
+
+#define ATOM_IIO_NOP		0
+#define ATOM_IIO_START		1
+#define ATOM_IIO_READ		2
+#define ATOM_IIO_WRITE		3
+#define ATOM_IIO_CLEAR		4
+#define ATOM_IIO_SET		5
+#define ATOM_IIO_MOVE_INDEX	6
+#define ATOM_IIO_MOVE_ATTR	7
+#define ATOM_IIO_MOVE_DATA	8
+#define ATOM_IIO_END		9
+
+#define ATOM_IO_MM		0
+#define ATOM_IO_PCI		1
+#define ATOM_IO_SYSIO		2
+#define ATOM_IO_IIO		0x80
+
+struct card_info {
+	struct drm_device *dev;
+	void (* reg_write)(struct card_info *, uint32_t, uint32_t);   /*  filled by driver */
+        uint32_t (* reg_read)(struct card_info *, uint32_t);          /*  filled by driver */
+	void (* mc_write)(struct card_info *, uint32_t, uint32_t);   /*  filled by driver */
+        uint32_t (* mc_read)(struct card_info *, uint32_t);          /*  filled by driver */
+	void (* pll_write)(struct card_info *, uint32_t, uint32_t);   /*  filled by driver */
+        uint32_t (* pll_read)(struct card_info *, uint32_t);          /*  filled by driver */
+};
+
+struct atom_context {
+	struct card_info *card;
+	void *bios;
+	uint32_t cmd_table, data_table;
+	uint16_t *iio;
+
+	uint16_t data_block;
+	uint32_t fb_base;
+	uint32_t divmul[2];
+	uint16_t io_attr;
+	uint16_t reg_block;
+	uint8_t shift;
+	int cs_equal, cs_above;
+	int io_mode;
+};
+
+extern int atom_debug;
+
+struct atom_context *atom_parse(struct card_info *, void *);
+void atom_execute_table(struct atom_context *, int, uint32_t *);
+int atom_asic_init(struct atom_context *);
+void atom_destroy(struct atom_context *);
+void atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size, uint8_t *frev, uint8_t *crev, uint16_t *data_start);
+void atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *frev, uint8_t *crev);
+#include "atom-types.h"
+#include "atombios.h"
+#include "ObjectID.h"
+
+#endif
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
new file mode 100644
index 00000000000..cf67928abbc
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -0,0 +1,4785 @@
+/*
+ * Copyright 2006-2007 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/****************************************************************************/
+/*Portion I: Definitions  shared between VBIOS and Driver                   */
+/****************************************************************************/
+
+#ifndef _ATOMBIOS_H
+#define _ATOMBIOS_H
+
+#define ATOM_VERSION_MAJOR                   0x00020000
+#define ATOM_VERSION_MINOR                   0x00000002
+
+#define ATOM_HEADER_VERSION (ATOM_VERSION_MAJOR | ATOM_VERSION_MINOR)
+
+/* Endianness should be specified before inclusion,
+ * default to little endian
+ */
+#ifndef ATOM_BIG_ENDIAN
+#error Endian not specified
+#endif
+
+#ifdef _H2INC
+#ifndef ULONG
+typedef unsigned long ULONG;
+#endif
+
+#ifndef UCHAR
+typedef unsigned char UCHAR;
+#endif
+
+#ifndef USHORT
+typedef unsigned short USHORT;
+#endif
+#endif
+
+#define ATOM_DAC_A            0
+#define ATOM_DAC_B            1
+#define ATOM_EXT_DAC          2
+
+#define ATOM_CRTC1            0
+#define ATOM_CRTC2            1
+
+#define ATOM_DIGA             0
+#define ATOM_DIGB             1
+
+#define ATOM_PPLL1            0
+#define ATOM_PPLL2            1
+
+#define ATOM_SCALER1          0
+#define ATOM_SCALER2          1
+
+#define ATOM_SCALER_DISABLE   0
+#define ATOM_SCALER_CENTER    1
+#define ATOM_SCALER_EXPANSION 2
+#define ATOM_SCALER_MULTI_EX  3
+
+#define ATOM_DISABLE          0
+#define ATOM_ENABLE           1
+#define ATOM_LCD_BLOFF                          (ATOM_DISABLE+2)
+#define ATOM_LCD_BLON                           (ATOM_ENABLE+2)
+#define ATOM_LCD_BL_BRIGHTNESS_CONTROL          (ATOM_ENABLE+3)
+#define ATOM_LCD_SELFTEST_START									(ATOM_DISABLE+5)
+#define ATOM_LCD_SELFTEST_STOP									(ATOM_ENABLE+5)
+#define ATOM_ENCODER_INIT			                  (ATOM_DISABLE+7)
+
+#define ATOM_BLANKING         1
+#define ATOM_BLANKING_OFF     0
+
+#define ATOM_CURSOR1          0
+#define ATOM_CURSOR2          1
+
+#define ATOM_ICON1            0
+#define ATOM_ICON2            1
+
+#define ATOM_CRT1             0
+#define ATOM_CRT2             1
+
+#define ATOM_TV_NTSC          1
+#define ATOM_TV_NTSCJ         2
+#define ATOM_TV_PAL           3
+#define ATOM_TV_PALM          4
+#define ATOM_TV_PALCN         5
+#define ATOM_TV_PALN          6
+#define ATOM_TV_PAL60         7
+#define ATOM_TV_SECAM         8
+#define ATOM_TV_CV            16
+
+#define ATOM_DAC1_PS2         1
+#define ATOM_DAC1_CV          2
+#define ATOM_DAC1_NTSC        3
+#define ATOM_DAC1_PAL         4
+
+#define ATOM_DAC2_PS2         ATOM_DAC1_PS2
+#define ATOM_DAC2_CV          ATOM_DAC1_CV
+#define ATOM_DAC2_NTSC        ATOM_DAC1_NTSC
+#define ATOM_DAC2_PAL         ATOM_DAC1_PAL
+
+#define ATOM_PM_ON            0
+#define ATOM_PM_STANDBY       1
+#define ATOM_PM_SUSPEND       2
+#define ATOM_PM_OFF           3
+
+/* Bit0:{=0:single, =1:dual},
+   Bit1 {=0:666RGB, =1:888RGB},
+   Bit2:3:{Grey level}
+   Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888}*/
+
+#define ATOM_PANEL_MISC_DUAL               0x00000001
+#define ATOM_PANEL_MISC_888RGB             0x00000002
+#define ATOM_PANEL_MISC_GREY_LEVEL         0x0000000C
+#define ATOM_PANEL_MISC_FPDI               0x00000010
+#define ATOM_PANEL_MISC_GREY_LEVEL_SHIFT   2
+#define ATOM_PANEL_MISC_SPATIAL            0x00000020
+#define ATOM_PANEL_MISC_TEMPORAL           0x00000040
+#define ATOM_PANEL_MISC_API_ENABLED        0x00000080
+
+#define MEMTYPE_DDR1              "DDR1"
+#define MEMTYPE_DDR2              "DDR2"
+#define MEMTYPE_DDR3              "DDR3"
+#define MEMTYPE_DDR4              "DDR4"
+
+#define ASIC_BUS_TYPE_PCI         "PCI"
+#define ASIC_BUS_TYPE_AGP         "AGP"
+#define ASIC_BUS_TYPE_PCIE        "PCI_EXPRESS"
+
+/* Maximum size of that FireGL flag string */
+
+#define ATOM_FIREGL_FLAG_STRING     "FGL"	/* Flag used to enable FireGL Support */
+#define ATOM_MAX_SIZE_OF_FIREGL_FLAG_STRING  3	/* sizeof( ATOM_FIREGL_FLAG_STRING ) */
+
+#define ATOM_FAKE_DESKTOP_STRING    "DSK"	/* Flag used to enable mobile ASIC on Desktop */
+#define ATOM_MAX_SIZE_OF_FAKE_DESKTOP_STRING  ATOM_MAX_SIZE_OF_FIREGL_FLAG_STRING
+
+#define ATOM_M54T_FLAG_STRING       "M54T"	/* Flag used to enable M54T Support */
+#define ATOM_MAX_SIZE_OF_M54T_FLAG_STRING    4	/* sizeof( ATOM_M54T_FLAG_STRING ) */
+
+#define HW_ASSISTED_I2C_STATUS_FAILURE          2
+#define HW_ASSISTED_I2C_STATUS_SUCCESS          1
+
+#pragma pack(1)			/* BIOS data must use byte aligment */
+
+/*  Define offset to location of ROM header. */
+
+#define OFFSET_TO_POINTER_TO_ATOM_ROM_HEADER		0x00000048L
+#define OFFSET_TO_ATOM_ROM_IMAGE_SIZE				    0x00000002L
+
+#define OFFSET_TO_ATOMBIOS_ASIC_BUS_MEM_TYPE    0x94
+#define MAXSIZE_OF_ATOMBIOS_ASIC_BUS_MEM_TYPE   20	/* including the terminator 0x0! */
+#define	OFFSET_TO_GET_ATOMBIOS_STRINGS_NUMBER		0x002f
+#define	OFFSET_TO_GET_ATOMBIOS_STRINGS_START		0x006e
+
+/* Common header for all ROM Data tables.
+  Every table pointed  _ATOM_MASTER_DATA_TABLE has this common header.
+  And the pointer actually points to this header. */
+
+typedef struct _ATOM_COMMON_TABLE_HEADER {
+	USHORT usStructureSize;
+	UCHAR ucTableFormatRevision;	/*Change it when the Parser is not backward compatible */
+	UCHAR ucTableContentRevision;	/*Change it only when the table needs to change but the firmware */
+	/*Image can't be updated, while Driver needs to carry the new table! */
+} ATOM_COMMON_TABLE_HEADER;
+
+typedef struct _ATOM_ROM_HEADER {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR uaFirmWareSignature[4];	/*Signature to distinguish between Atombios and non-atombios,
+					   atombios should init it as "ATOM", don't change the position */
+	USHORT usBiosRuntimeSegmentAddress;
+	USHORT usProtectedModeInfoOffset;
+	USHORT usConfigFilenameOffset;
+	USHORT usCRC_BlockOffset;
+	USHORT usBIOS_BootupMessageOffset;
+	USHORT usInt10Offset;
+	USHORT usPciBusDevInitCode;
+	USHORT usIoBaseAddress;
+	USHORT usSubsystemVendorID;
+	USHORT usSubsystemID;
+	USHORT usPCI_InfoOffset;
+	USHORT usMasterCommandTableOffset;	/*Offset for SW to get all command table offsets, Don't change the position */
+	USHORT usMasterDataTableOffset;	/*Offset for SW to get all data table offsets, Don't change the position */
+	UCHAR ucExtendedFunctionCode;
+	UCHAR ucReserved;
+} ATOM_ROM_HEADER;
+
+/*==============================Command Table Portion==================================== */
+
+#ifdef	UEFI_BUILD
+#define	UTEMP	USHORT
+#define	USHORT	void*
+#endif
+
+typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES {
+	USHORT ASIC_Init;	/* Function Table, used by various SW components,latest version 1.1 */
+	USHORT GetDisplaySurfaceSize;	/* Atomic Table,  Used by Bios when enabling HW ICON */
+	USHORT ASIC_RegistersInit;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
+	USHORT VRAM_BlockVenderDetection;	/* Atomic Table,  used only by Bios */
+	USHORT DIGxEncoderControl;	/* Only used by Bios */
+	USHORT MemoryControllerInit;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
+	USHORT EnableCRTCMemReq;	/* Function Table,directly used by various SW components,latest version 2.1 */
+	USHORT MemoryParamAdjust;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock if needed */
+	USHORT DVOEncoderControl;	/* Function Table,directly used by various SW components,latest version 1.2 */
+	USHORT GPIOPinControl;	/* Atomic Table,  only used by Bios */
+	USHORT SetEngineClock;	/*Function Table,directly used by various SW components,latest version 1.1 */
+	USHORT SetMemoryClock;	/* Function Table,directly used by various SW components,latest version 1.1 */
+	USHORT SetPixelClock;	/*Function Table,directly used by various SW components,latest version 1.2 */
+	USHORT DynamicClockGating;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
+	USHORT ResetMemoryDLL;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
+	USHORT ResetMemoryDevice;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
+	USHORT MemoryPLLInit;
+	USHORT AdjustDisplayPll;	/* only used by Bios */
+	USHORT AdjustMemoryController;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
+	USHORT EnableASIC_StaticPwrMgt;	/* Atomic Table,  only used by Bios */
+	USHORT ASIC_StaticPwrMgtStatusChange;	/* Obsolete, only used by Bios */
+	USHORT DAC_LoadDetection;	/* Atomic Table,  directly used by various SW components,latest version 1.2 */
+	USHORT LVTMAEncoderControl;	/* Atomic Table,directly used by various SW components,latest version 1.3 */
+	USHORT LCD1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT DAC1EncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT DAC2EncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT DVOOutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT CV1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT GetConditionalGoldenSetting;	/* only used by Bios */
+	USHORT TVEncoderControl;	/* Function Table,directly used by various SW components,latest version 1.1 */
+	USHORT TMDSAEncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.3 */
+	USHORT LVDSEncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.3 */
+	USHORT TV1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT EnableScaler;	/* Atomic Table,  used only by Bios */
+	USHORT BlankCRTC;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT EnableCRTC;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT GetPixelClock;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT EnableVGA_Render;	/* Function Table,directly used by various SW components,latest version 1.1 */
+	USHORT EnableVGA_Access;	/* Obsolete ,     only used by Bios */
+	USHORT SetCRTC_Timing;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT SetCRTC_OverScan;	/* Atomic Table,  used by various SW components,latest version 1.1 */
+	USHORT SetCRTC_Replication;	/* Atomic Table,  used only by Bios */
+	USHORT SelectCRTC_Source;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT EnableGraphSurfaces;	/* Atomic Table,  used only by Bios */
+	USHORT UpdateCRTC_DoubleBufferRegisters;
+	USHORT LUT_AutoFill;	/* Atomic Table,  only used by Bios */
+	USHORT EnableHW_IconCursor;	/* Atomic Table,  only used by Bios */
+	USHORT GetMemoryClock;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT GetEngineClock;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT SetCRTC_UsingDTDTiming;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT ExternalEncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 2.1 */
+	USHORT LVTMAOutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT VRAM_BlockDetectionByStrap;	/* Atomic Table,  used only by Bios */
+	USHORT MemoryCleanUp;	/* Atomic Table,  only used by Bios */
+	USHORT ProcessI2cChannelTransaction;	/* Function Table,only used by Bios */
+	USHORT WriteOneByteToHWAssistedI2C;	/* Function Table,indirectly used by various SW components */
+	USHORT ReadHWAssistedI2CStatus;	/* Atomic Table,  indirectly used by various SW components */
+	USHORT SpeedFanControl;	/* Function Table,indirectly used by various SW components,called from ASIC_Init */
+	USHORT PowerConnectorDetection;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT MC_Synchronization;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
+	USHORT ComputeMemoryEnginePLL;	/* Atomic Table,  indirectly used by various SW components,called from SetMemory/EngineClock */
+	USHORT MemoryRefreshConversion;	/* Atomic Table,  indirectly used by various SW components,called from SetMemory or SetEngineClock */
+	USHORT VRAM_GetCurrentInfoBlock;	/* Atomic Table,  used only by Bios */
+	USHORT DynamicMemorySettings;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
+	USHORT MemoryTraining;	/* Atomic Table,  used only by Bios */
+	USHORT EnableSpreadSpectrumOnPPLL;	/* Atomic Table,  directly used by various SW components,latest version 1.2 */
+	USHORT TMDSAOutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT SetVoltage;	/* Function Table,directly and/or indirectly used by various SW components,latest version 1.1 */
+	USHORT DAC1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT DAC2OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
+	USHORT SetupHWAssistedI2CStatus;	/* Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C" */
+	USHORT ClockSource;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
+	USHORT MemoryDeviceInit;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
+	USHORT EnableYUV;	/* Atomic Table,  indirectly used by various SW components,called from EnableVGARender */
+	USHORT DIG1EncoderControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
+	USHORT DIG2EncoderControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
+	USHORT DIG1TransmitterControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
+	USHORT DIG2TransmitterControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
+	USHORT ProcessAuxChannelTransaction;	/* Function Table,only used by Bios */
+	USHORT DPEncoderService;	/* Function Table,only used by Bios */
+} ATOM_MASTER_LIST_OF_COMMAND_TABLES;
+
+/*  For backward compatible */
+#define ReadEDIDFromHWAssistedI2C                ProcessI2cChannelTransaction
+#define UNIPHYTransmitterControl						     DIG1TransmitterControl
+#define LVTMATransmitterControl							     DIG2TransmitterControl
+#define SetCRTC_DPM_State                        GetConditionalGoldenSetting
+#define SetUniphyInstance                        ASIC_StaticPwrMgtStatusChange
+
+typedef struct _ATOM_MASTER_COMMAND_TABLE {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_MASTER_LIST_OF_COMMAND_TABLES ListOfCommandTables;
+} ATOM_MASTER_COMMAND_TABLE;
+
+/****************************************************************************/
+/*  Structures used in every command table */
+/****************************************************************************/
+typedef struct _ATOM_TABLE_ATTRIBUTE {
+#if ATOM_BIG_ENDIAN
+	USHORT UpdatedByUtility:1;	/* [15]=Table updated by utility flag */
+	USHORT PS_SizeInBytes:7;	/* [14:8]=Size of parameter space in Bytes (multiple of a dword), */
+	USHORT WS_SizeInBytes:8;	/* [7:0]=Size of workspace in Bytes (in multiple of a dword), */
+#else
+	USHORT WS_SizeInBytes:8;	/* [7:0]=Size of workspace in Bytes (in multiple of a dword), */
+	USHORT PS_SizeInBytes:7;	/* [14:8]=Size of parameter space in Bytes (multiple of a dword), */
+	USHORT UpdatedByUtility:1;	/* [15]=Table updated by utility flag */
+#endif
+} ATOM_TABLE_ATTRIBUTE;
+
+typedef union _ATOM_TABLE_ATTRIBUTE_ACCESS {
+	ATOM_TABLE_ATTRIBUTE sbfAccess;
+	USHORT susAccess;
+} ATOM_TABLE_ATTRIBUTE_ACCESS;
+
+/****************************************************************************/
+/*  Common header for all command tables. */
+/*  Every table pointed by _ATOM_MASTER_COMMAND_TABLE has this common header. */
+/*  And the pointer actually points to this header. */
+/****************************************************************************/
+typedef struct _ATOM_COMMON_ROM_COMMAND_TABLE_HEADER {
+	ATOM_COMMON_TABLE_HEADER CommonHeader;
+	ATOM_TABLE_ATTRIBUTE TableAttribute;
+} ATOM_COMMON_ROM_COMMAND_TABLE_HEADER;
+
+/****************************************************************************/
+/*  Structures used by ComputeMemoryEnginePLLTable */
+/****************************************************************************/
+#define COMPUTE_MEMORY_PLL_PARAM        1
+#define COMPUTE_ENGINE_PLL_PARAM        2
+
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS {
+	ULONG ulClock;		/* When returen, it's the re-calculated clock based on given Fb_div Post_Div and ref_div */
+	UCHAR ucAction;		/* 0:reserved //1:Memory //2:Engine */
+	UCHAR ucReserved;	/* may expand to return larger Fbdiv later */
+	UCHAR ucFbDiv;		/* return value */
+	UCHAR ucPostDiv;	/* return value */
+} COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS;
+
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 {
+	ULONG ulClock;		/* When return, [23:0] return real clock */
+	UCHAR ucAction;		/* 0:reserved;COMPUTE_MEMORY_PLL_PARAM:Memory;COMPUTE_ENGINE_PLL_PARAM:Engine. it return ref_div to be written to register */
+	USHORT usFbDiv;		/* return Feedback value to be written to register */
+	UCHAR ucPostDiv;	/* return post div to be written to register */
+} COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2;
+#define COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION   COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS
+
+#define SET_CLOCK_FREQ_MASK                     0x00FFFFFF	/* Clock change tables only take bit [23:0] as the requested clock value */
+#define USE_NON_BUS_CLOCK_MASK                  0x01000000	/* Applicable to both memory and engine clock change, when set, it uses another clock as the temporary clock (engine uses memory and vice versa) */
+#define USE_MEMORY_SELF_REFRESH_MASK            0x02000000	/* Only applicable to memory clock change, when set, using memory self refresh during clock transition */
+#define SKIP_INTERNAL_MEMORY_PARAMETER_CHANGE   0x04000000	/* Only applicable to memory clock change, when set, the table will skip predefined internal memory parameter change */
+#define FIRST_TIME_CHANGE_CLOCK									0x08000000	/* Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup */
+#define SKIP_SW_PROGRAM_PLL											0x10000000	/* Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL */
+#define USE_SS_ENABLED_PIXEL_CLOCK  USE_NON_BUS_CLOCK_MASK
+
+#define b3USE_NON_BUS_CLOCK_MASK                  0x01	/* Applicable to both memory and engine clock change, when set, it uses another clock as the temporary clock (engine uses memory and vice versa) */
+#define b3USE_MEMORY_SELF_REFRESH                 0x02	/* Only applicable to memory clock change, when set, using memory self refresh during clock transition */
+#define b3SKIP_INTERNAL_MEMORY_PARAMETER_CHANGE   0x04	/* Only applicable to memory clock change, when set, the table will skip predefined internal memory parameter change */
+#define b3FIRST_TIME_CHANGE_CLOCK									0x08	/* Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup */
+#define b3SKIP_SW_PROGRAM_PLL											0x10	/* Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL */
+
+typedef struct _ATOM_COMPUTE_CLOCK_FREQ {
+#if ATOM_BIG_ENDIAN
+	ULONG ulComputeClockFlag:8;	/*  =1: COMPUTE_MEMORY_PLL_PARAM, =2: COMPUTE_ENGINE_PLL_PARAM */
+	ULONG ulClockFreq:24;	/*  in unit of 10kHz */
+#else
+	ULONG ulClockFreq:24;	/*  in unit of 10kHz */
+	ULONG ulComputeClockFlag:8;	/*  =1: COMPUTE_MEMORY_PLL_PARAM, =2: COMPUTE_ENGINE_PLL_PARAM */
+#endif
+} ATOM_COMPUTE_CLOCK_FREQ;
+
+typedef struct _ATOM_S_MPLL_FB_DIVIDER {
+	USHORT usFbDivFrac;
+	USHORT usFbDiv;
+} ATOM_S_MPLL_FB_DIVIDER;
+
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3 {
+	union {
+		ATOM_COMPUTE_CLOCK_FREQ ulClock;	/* Input Parameter */
+		ATOM_S_MPLL_FB_DIVIDER ulFbDiv;	/* Output Parameter */
+	};
+	UCHAR ucRefDiv;		/* Output Parameter */
+	UCHAR ucPostDiv;	/* Output Parameter */
+	UCHAR ucCntlFlag;	/* Output Parameter */
+	UCHAR ucReserved;
+} COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3;
+
+/*  ucCntlFlag */
+#define ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN          1
+#define ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE            2
+#define ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE         4
+
+typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER {
+	ATOM_COMPUTE_CLOCK_FREQ ulClock;
+	ULONG ulReserved[2];
+} DYNAMICE_MEMORY_SETTINGS_PARAMETER;
+
+typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER {
+	ATOM_COMPUTE_CLOCK_FREQ ulClock;
+	ULONG ulMemoryClock;
+	ULONG ulReserved;
+} DYNAMICE_ENGINE_SETTINGS_PARAMETER;
+
+/****************************************************************************/
+/*  Structures used by SetEngineClockTable */
+/****************************************************************************/
+typedef struct _SET_ENGINE_CLOCK_PARAMETERS {
+	ULONG ulTargetEngineClock;	/* In 10Khz unit */
+} SET_ENGINE_CLOCK_PARAMETERS;
+
+typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION {
+	ULONG ulTargetEngineClock;	/* In 10Khz unit */
+	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
+} SET_ENGINE_CLOCK_PS_ALLOCATION;
+
+/****************************************************************************/
+/*  Structures used by SetMemoryClockTable */
+/****************************************************************************/
+typedef struct _SET_MEMORY_CLOCK_PARAMETERS {
+	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
+} SET_MEMORY_CLOCK_PARAMETERS;
+
+typedef struct _SET_MEMORY_CLOCK_PS_ALLOCATION {
+	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
+	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
+} SET_MEMORY_CLOCK_PS_ALLOCATION;
+
+/****************************************************************************/
+/*  Structures used by ASIC_Init.ctb */
+/****************************************************************************/
+typedef struct _ASIC_INIT_PARAMETERS {
+	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
+	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
+} ASIC_INIT_PARAMETERS;
+
+typedef struct _ASIC_INIT_PS_ALLOCATION {
+	ASIC_INIT_PARAMETERS sASICInitClocks;
+	SET_ENGINE_CLOCK_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this structure */
+} ASIC_INIT_PS_ALLOCATION;
+
+/****************************************************************************/
+/*  Structure used by DynamicClockGatingTable.ctb */
+/****************************************************************************/
+typedef struct _DYNAMIC_CLOCK_GATING_PARAMETERS {
+	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucPadding[3];
+} DYNAMIC_CLOCK_GATING_PARAMETERS;
+#define  DYNAMIC_CLOCK_GATING_PS_ALLOCATION  DYNAMIC_CLOCK_GATING_PARAMETERS
+
+/****************************************************************************/
+/*  Structure used by EnableASIC_StaticPwrMgtTable.ctb */
+/****************************************************************************/
+typedef struct _ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS {
+	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucPadding[3];
+} ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS;
+#define ENABLE_ASIC_STATIC_PWR_MGT_PS_ALLOCATION  ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by DAC_LoadDetectionTable.ctb */
+/****************************************************************************/
+typedef struct _DAC_LOAD_DETECTION_PARAMETERS {
+	USHORT usDeviceID;	/* {ATOM_DEVICE_CRTx_SUPPORT,ATOM_DEVICE_TVx_SUPPORT,ATOM_DEVICE_CVx_SUPPORT} */
+	UCHAR ucDacType;	/* {ATOM_DAC_A,ATOM_DAC_B, ATOM_EXT_DAC} */
+	UCHAR ucMisc;		/* Valid only when table revision =1.3 and above */
+} DAC_LOAD_DETECTION_PARAMETERS;
+
+/*  DAC_LOAD_DETECTION_PARAMETERS.ucMisc */
+#define DAC_LOAD_MISC_YPrPb						0x01
+
+typedef struct _DAC_LOAD_DETECTION_PS_ALLOCATION {
+	DAC_LOAD_DETECTION_PARAMETERS sDacload;
+	ULONG Reserved[2];	/*  Don't set this one, allocation for EXT DAC */
+} DAC_LOAD_DETECTION_PS_ALLOCATION;
+
+/****************************************************************************/
+/*  Structures used by DAC1EncoderControlTable.ctb and DAC2EncoderControlTable.ctb */
+/****************************************************************************/
+typedef struct _DAC_ENCODER_CONTROL_PARAMETERS {
+	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+	UCHAR ucDacStandard;	/*  See definition of ATOM_DACx_xxx, For DEC3.0, bit 7 used as internal flag to indicate DAC2 (==1) or DAC1 (==0) */
+	UCHAR ucAction;		/*  0: turn off encoder */
+	/*  1: setup and turn on encoder */
+	/*  7: ATOM_ENCODER_INIT Initialize DAC */
+} DAC_ENCODER_CONTROL_PARAMETERS;
+
+#define DAC_ENCODER_CONTROL_PS_ALLOCATION  DAC_ENCODER_CONTROL_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by DIG1EncoderControlTable */
+/*                     DIG2EncoderControlTable */
+/*                     ExternalEncoderControlTable */
+/****************************************************************************/
+typedef struct _DIG_ENCODER_CONTROL_PARAMETERS {
+	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+	UCHAR ucConfig;
+	/*  [2] Link Select: */
+	/*  =0: PHY linkA if bfLane<3 */
+	/*  =1: PHY linkB if bfLanes<3 */
+	/*  =0: PHY linkA+B if bfLanes=3 */
+	/*  [3] Transmitter Sel */
+	/*  =0: UNIPHY or PCIEPHY */
+	/*  =1: LVTMA */
+	UCHAR ucAction;		/*  =0: turn off encoder */
+	/*  =1: turn on encoder */
+	UCHAR ucEncoderMode;
+	/*  =0: DP   encoder */
+	/*  =1: LVDS encoder */
+	/*  =2: DVI  encoder */
+	/*  =3: HDMI encoder */
+	/*  =4: SDVO encoder */
+	UCHAR ucLaneNum;	/*  how many lanes to enable */
+	UCHAR ucReserved[2];
+} DIG_ENCODER_CONTROL_PARAMETERS;
+#define DIG_ENCODER_CONTROL_PS_ALLOCATION			  DIG_ENCODER_CONTROL_PARAMETERS
+#define EXTERNAL_ENCODER_CONTROL_PARAMETER			DIG_ENCODER_CONTROL_PARAMETERS
+
+/* ucConfig */
+#define ATOM_ENCODER_CONFIG_DPLINKRATE_MASK				0x01
+#define ATOM_ENCODER_CONFIG_DPLINKRATE_1_62GHZ		0x00
+#define ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ		0x01
+#define ATOM_ENCODER_CONFIG_LINK_SEL_MASK				  0x04
+#define ATOM_ENCODER_CONFIG_LINKA								  0x00
+#define ATOM_ENCODER_CONFIG_LINKB								  0x04
+#define ATOM_ENCODER_CONFIG_LINKA_B							  ATOM_TRANSMITTER_CONFIG_LINKA
+#define ATOM_ENCODER_CONFIG_LINKB_A							  ATOM_ENCODER_CONFIG_LINKB
+#define ATOM_ENCODER_CONFIG_TRANSMITTER_SEL_MASK	0x08
+#define ATOM_ENCODER_CONFIG_UNIPHY							  0x00
+#define ATOM_ENCODER_CONFIG_LVTMA								  0x08
+#define ATOM_ENCODER_CONFIG_TRANSMITTER1				  0x00
+#define ATOM_ENCODER_CONFIG_TRANSMITTER2				  0x08
+#define ATOM_ENCODER_CONFIG_DIGB								  0x80	/*  VBIOS Internal use, outside SW should set this bit=0 */
+/*  ucAction */
+/*  ATOM_ENABLE:  Enable Encoder */
+/*  ATOM_DISABLE: Disable Encoder */
+
+/* ucEncoderMode */
+#define ATOM_ENCODER_MODE_DP											0
+#define ATOM_ENCODER_MODE_LVDS										1
+#define ATOM_ENCODER_MODE_DVI											2
+#define ATOM_ENCODER_MODE_HDMI										3
+#define ATOM_ENCODER_MODE_SDVO										4
+#define ATOM_ENCODER_MODE_TV											13
+#define ATOM_ENCODER_MODE_CV											14
+#define ATOM_ENCODER_MODE_CRT											15
+
+typedef struct _ATOM_DIG_ENCODER_CONFIG_V2 {
+#if ATOM_BIG_ENDIAN
+	UCHAR ucReserved1:2;
+	UCHAR ucTransmitterSel:2;	/*  =0: UniphyAB, =1: UniphyCD  =2: UniphyEF */
+	UCHAR ucLinkSel:1;	/*  =0: linkA/C/E =1: linkB/D/F */
+	UCHAR ucReserved:1;
+	UCHAR ucDPLinkRate:1;	/*  =0: 1.62Ghz, =1: 2.7Ghz */
+#else
+	UCHAR ucDPLinkRate:1;	/*  =0: 1.62Ghz, =1: 2.7Ghz */
+	UCHAR ucReserved:1;
+	UCHAR ucLinkSel:1;	/*  =0: linkA/C/E =1: linkB/D/F */
+	UCHAR ucTransmitterSel:2;	/*  =0: UniphyAB, =1: UniphyCD  =2: UniphyEF */
+	UCHAR ucReserved1:2;
+#endif
+} ATOM_DIG_ENCODER_CONFIG_V2;
+
+typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V2 {
+	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+	ATOM_DIG_ENCODER_CONFIG_V2 acConfig;
+	UCHAR ucAction;
+	UCHAR ucEncoderMode;
+	/*  =0: DP   encoder */
+	/*  =1: LVDS encoder */
+	/*  =2: DVI  encoder */
+	/*  =3: HDMI encoder */
+	/*  =4: SDVO encoder */
+	UCHAR ucLaneNum;	/*  how many lanes to enable */
+	UCHAR ucReserved[2];
+} DIG_ENCODER_CONTROL_PARAMETERS_V2;
+
+/* ucConfig */
+#define ATOM_ENCODER_CONFIG_V2_DPLINKRATE_MASK				0x01
+#define ATOM_ENCODER_CONFIG_V2_DPLINKRATE_1_62GHZ		  0x00
+#define ATOM_ENCODER_CONFIG_V2_DPLINKRATE_2_70GHZ		  0x01
+#define ATOM_ENCODER_CONFIG_V2_LINK_SEL_MASK				  0x04
+#define ATOM_ENCODER_CONFIG_V2_LINKA								  0x00
+#define ATOM_ENCODER_CONFIG_V2_LINKB								  0x04
+#define ATOM_ENCODER_CONFIG_V2_TRANSMITTER_SEL_MASK	  0x18
+#define ATOM_ENCODER_CONFIG_V2_TRANSMITTER1				    0x00
+#define ATOM_ENCODER_CONFIG_V2_TRANSMITTER2				    0x08
+#define ATOM_ENCODER_CONFIG_V2_TRANSMITTER3				    0x10
+
+/****************************************************************************/
+/*  Structures used by UNIPHYTransmitterControlTable */
+/*                     LVTMATransmitterControlTable */
+/*                     DVOOutputControlTable */
+/****************************************************************************/
+typedef struct _ATOM_DP_VS_MODE {
+	UCHAR ucLaneSel;
+	UCHAR ucLaneSet;
+} ATOM_DP_VS_MODE;
+
+typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS {
+	union {
+		USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+		USHORT usInitInfo;	/*  when init uniphy,lower 8bit is used for connector type defined in objectid.h */
+		ATOM_DP_VS_MODE asMode;	/*  DP Voltage swing mode */
+	};
+	UCHAR ucConfig;
+	/*  [0]=0: 4 lane Link, */
+	/*     =1: 8 lane Link ( Dual Links TMDS ) */
+	/*  [1]=0: InCoherent mode */
+	/*     =1: Coherent Mode */
+	/*  [2] Link Select: */
+	/*  =0: PHY linkA   if bfLane<3 */
+	/*  =1: PHY linkB   if bfLanes<3 */
+	/*  =0: PHY linkA+B if bfLanes=3 */
+	/*  [5:4]PCIE lane Sel */
+	/*  =0: lane 0~3 or 0~7 */
+	/*  =1: lane 4~7 */
+	/*  =2: lane 8~11 or 8~15 */
+	/*  =3: lane 12~15 */
+	UCHAR ucAction;		/*  =0: turn off encoder */
+	/*  =1: turn on encoder */
+	UCHAR ucReserved[4];
+} DIG_TRANSMITTER_CONTROL_PARAMETERS;
+
+#define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION		DIG_TRANSMITTER_CONTROL_PARAMETERS
+
+/* ucInitInfo */
+#define ATOM_TRAMITTER_INITINFO_CONNECTOR_MASK	0x00ff
+
+/* ucConfig */
+#define ATOM_TRANSMITTER_CONFIG_8LANE_LINK			0x01
+#define ATOM_TRANSMITTER_CONFIG_COHERENT				0x02
+#define ATOM_TRANSMITTER_CONFIG_LINK_SEL_MASK		0x04
+#define ATOM_TRANSMITTER_CONFIG_LINKA						0x00
+#define ATOM_TRANSMITTER_CONFIG_LINKB						0x04
+#define ATOM_TRANSMITTER_CONFIG_LINKA_B					0x00
+#define ATOM_TRANSMITTER_CONFIG_LINKB_A					0x04
+
+#define ATOM_TRANSMITTER_CONFIG_ENCODER_SEL_MASK	0x08	/*  only used when ATOM_TRANSMITTER_ACTION_ENABLE */
+#define ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER		0x00	/*  only used when ATOM_TRANSMITTER_ACTION_ENABLE */
+#define ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER		0x08	/*  only used when ATOM_TRANSMITTER_ACTION_ENABLE */
+
+#define ATOM_TRANSMITTER_CONFIG_CLKSRC_MASK			0x30
+#define ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL			0x00
+#define ATOM_TRANSMITTER_CONFIG_CLKSRC_PCIE			0x20
+#define ATOM_TRANSMITTER_CONFIG_CLKSRC_XTALIN		0x30
+#define ATOM_TRANSMITTER_CONFIG_LANE_SEL_MASK		0xc0
+#define ATOM_TRANSMITTER_CONFIG_LANE_0_3				0x00
+#define ATOM_TRANSMITTER_CONFIG_LANE_0_7				0x00
+#define ATOM_TRANSMITTER_CONFIG_LANE_4_7				0x40
+#define ATOM_TRANSMITTER_CONFIG_LANE_8_11				0x80
+#define ATOM_TRANSMITTER_CONFIG_LANE_8_15				0x80
+#define ATOM_TRANSMITTER_CONFIG_LANE_12_15			0xc0
+
+/* ucAction */
+#define ATOM_TRANSMITTER_ACTION_DISABLE					       0
+#define ATOM_TRANSMITTER_ACTION_ENABLE					       1
+#define ATOM_TRANSMITTER_ACTION_LCD_BLOFF				       2
+#define ATOM_TRANSMITTER_ACTION_LCD_BLON				       3
+#define ATOM_TRANSMITTER_ACTION_BL_BRIGHTNESS_CONTROL  4
+#define ATOM_TRANSMITTER_ACTION_LCD_SELFTEST_START		 5
+#define ATOM_TRANSMITTER_ACTION_LCD_SELFTEST_STOP			 6
+#define ATOM_TRANSMITTER_ACTION_INIT						       7
+#define ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT	       8
+#define ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT		       9
+#define ATOM_TRANSMITTER_ACTION_SETUP						       10
+#define ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH           11
+
+/*  Following are used for DigTransmitterControlTable ver1.2 */
+typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V2 {
+#if ATOM_BIG_ENDIAN
+	UCHAR ucTransmitterSel:2;	/* bit7:6: =0 Dig Transmitter 1 ( Uniphy AB ) */
+	/*         =1 Dig Transmitter 2 ( Uniphy CD ) */
+	/*         =2 Dig Transmitter 3 ( Uniphy EF ) */
+	UCHAR ucReserved:1;
+	UCHAR fDPConnector:1;	/* bit4=0: DP connector  =1: None DP connector */
+	UCHAR ucEncoderSel:1;	/* bit3=0: Data/Clk path source from DIGA( DIG inst0 ). =1: Data/clk path source from DIGB ( DIG inst1 ) */
+	UCHAR ucLinkSel:1;	/* bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E */
+	/*     =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F */
+
+	UCHAR fCoherentMode:1;	/* bit1=1: Coherent Mode ( for DVI/HDMI mode ) */
+	UCHAR fDualLinkConnector:1;	/* bit0=1: Dual Link DVI connector */
+#else
+	UCHAR fDualLinkConnector:1;	/* bit0=1: Dual Link DVI connector */
+	UCHAR fCoherentMode:1;	/* bit1=1: Coherent Mode ( for DVI/HDMI mode ) */
+	UCHAR ucLinkSel:1;	/* bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E */
+	/*     =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F */
+	UCHAR ucEncoderSel:1;	/* bit3=0: Data/Clk path source from DIGA( DIG inst0 ). =1: Data/clk path source from DIGB ( DIG inst1 ) */
+	UCHAR fDPConnector:1;	/* bit4=0: DP connector  =1: None DP connector */
+	UCHAR ucReserved:1;
+	UCHAR ucTransmitterSel:2;	/* bit7:6: =0 Dig Transmitter 1 ( Uniphy AB ) */
+	/*         =1 Dig Transmitter 2 ( Uniphy CD ) */
+	/*         =2 Dig Transmitter 3 ( Uniphy EF ) */
+#endif
+} ATOM_DIG_TRANSMITTER_CONFIG_V2;
+
+/* ucConfig */
+/* Bit0 */
+#define ATOM_TRANSMITTER_CONFIG_V2_DUAL_LINK_CONNECTOR			0x01
+
+/* Bit1 */
+#define ATOM_TRANSMITTER_CONFIG_V2_COHERENT				          0x02
+
+/* Bit2 */
+#define ATOM_TRANSMITTER_CONFIG_V2_LINK_SEL_MASK		        0x04
+#define ATOM_TRANSMITTER_CONFIG_V2_LINKA			            0x00
+#define ATOM_TRANSMITTER_CONFIG_V2_LINKB				            0x04
+
+/*  Bit3 */
+#define ATOM_TRANSMITTER_CONFIG_V2_ENCODER_SEL_MASK	        0x08
+#define ATOM_TRANSMITTER_CONFIG_V2_DIG1_ENCODER		          0x00	/*  only used when ucAction == ATOM_TRANSMITTER_ACTION_ENABLE or ATOM_TRANSMITTER_ACTION_SETUP */
+#define ATOM_TRANSMITTER_CONFIG_V2_DIG2_ENCODER		          0x08	/*  only used when ucAction == ATOM_TRANSMITTER_ACTION_ENABLE or ATOM_TRANSMITTER_ACTION_SETUP */
+
+/*  Bit4 */
+#define ATOM_TRASMITTER_CONFIG_V2_DP_CONNECTOR			        0x10
+
+/*  Bit7:6 */
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER_SEL_MASK     0xC0
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER1			0x00	/* AB */
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER2			0x40	/* CD */
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER3			0x80	/* EF */
+
+typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 {
+	union {
+		USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+		USHORT usInitInfo;	/*  when init uniphy,lower 8bit is used for connector type defined in objectid.h */
+		ATOM_DP_VS_MODE asMode;	/*  DP Voltage swing mode */
+	};
+	ATOM_DIG_TRANSMITTER_CONFIG_V2 acConfig;
+	UCHAR ucAction;		/*  define as ATOM_TRANSMITER_ACTION_XXX */
+	UCHAR ucReserved[4];
+} DIG_TRANSMITTER_CONTROL_PARAMETERS_V2;
+
+/****************************************************************************/
+/*  Structures used by DAC1OuputControlTable */
+/*                     DAC2OuputControlTable */
+/*                     LVTMAOutputControlTable  (Before DEC30) */
+/*                     TMDSAOutputControlTable  (Before DEC30) */
+/****************************************************************************/
+typedef struct _DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS {
+	UCHAR ucAction;		/*  Possible input:ATOM_ENABLE||ATOMDISABLE */
+	/*  When the display is LCD, in addition to above: */
+	/*  ATOM_LCD_BLOFF|| ATOM_LCD_BLON ||ATOM_LCD_BL_BRIGHTNESS_CONTROL||ATOM_LCD_SELFTEST_START|| */
+	/*  ATOM_LCD_SELFTEST_STOP */
+
+	UCHAR aucPadding[3];	/*  padding to DWORD aligned */
+} DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS;
+
+#define DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+
+#define CRT1_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define CRT1_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define CRT2_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define CRT2_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define CV1_OUTPUT_CONTROL_PARAMETERS      DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define CV1_OUTPUT_CONTROL_PS_ALLOCATION   DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define TV1_OUTPUT_CONTROL_PARAMETERS      DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define TV1_OUTPUT_CONTROL_PS_ALLOCATION   DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define DFP1_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define DFP1_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define DFP2_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define DFP2_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define LCD1_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define LCD1_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define DVO_OUTPUT_CONTROL_PARAMETERS      DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define DVO_OUTPUT_CONTROL_PS_ALLOCATION   DIG_TRANSMITTER_CONTROL_PS_ALLOCATION
+#define DVO_OUTPUT_CONTROL_PARAMETERS_V3	 DIG_TRANSMITTER_CONTROL_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by BlankCRTCTable */
+/****************************************************************************/
+typedef struct _BLANK_CRTC_PARAMETERS {
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucBlanking;	/*  ATOM_BLANKING or ATOM_BLANKINGOFF */
+	USHORT usBlackColorRCr;
+	USHORT usBlackColorGY;
+	USHORT usBlackColorBCb;
+} BLANK_CRTC_PARAMETERS;
+#define BLANK_CRTC_PS_ALLOCATION    BLANK_CRTC_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by EnableCRTCTable */
+/*                     EnableCRTCMemReqTable */
+/*                     UpdateCRTC_DoubleBufferRegistersTable */
+/****************************************************************************/
+typedef struct _ENABLE_CRTC_PARAMETERS {
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucPadding[2];
+} ENABLE_CRTC_PARAMETERS;
+#define ENABLE_CRTC_PS_ALLOCATION   ENABLE_CRTC_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by SetCRTC_OverScanTable */
+/****************************************************************************/
+typedef struct _SET_CRTC_OVERSCAN_PARAMETERS {
+	USHORT usOverscanRight;	/*  right */
+	USHORT usOverscanLeft;	/*  left */
+	USHORT usOverscanBottom;	/*  bottom */
+	USHORT usOverscanTop;	/*  top */
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucPadding[3];
+} SET_CRTC_OVERSCAN_PARAMETERS;
+#define SET_CRTC_OVERSCAN_PS_ALLOCATION  SET_CRTC_OVERSCAN_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by SetCRTC_ReplicationTable */
+/****************************************************************************/
+typedef struct _SET_CRTC_REPLICATION_PARAMETERS {
+	UCHAR ucH_Replication;	/*  horizontal replication */
+	UCHAR ucV_Replication;	/*  vertical replication */
+	UCHAR usCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucPadding;
+} SET_CRTC_REPLICATION_PARAMETERS;
+#define SET_CRTC_REPLICATION_PS_ALLOCATION  SET_CRTC_REPLICATION_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by SelectCRTC_SourceTable */
+/****************************************************************************/
+typedef struct _SELECT_CRTC_SOURCE_PARAMETERS {
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucDevice;		/*  ATOM_DEVICE_CRT1|ATOM_DEVICE_CRT2|.... */
+	UCHAR ucPadding[2];
+} SELECT_CRTC_SOURCE_PARAMETERS;
+#define SELECT_CRTC_SOURCE_PS_ALLOCATION  SELECT_CRTC_SOURCE_PARAMETERS
+
+typedef struct _SELECT_CRTC_SOURCE_PARAMETERS_V2 {
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucEncoderID;	/*  DAC1/DAC2/TVOUT/DIG1/DIG2/DVO */
+	UCHAR ucEncodeMode;	/*  Encoding mode, only valid when using DIG1/DIG2/DVO */
+	UCHAR ucPadding;
+} SELECT_CRTC_SOURCE_PARAMETERS_V2;
+
+/* ucEncoderID */
+/* #define ASIC_INT_DAC1_ENCODER_ID                                              0x00 */
+/* #define ASIC_INT_TV_ENCODER_ID                                                                        0x02 */
+/* #define ASIC_INT_DIG1_ENCODER_ID                                                              0x03 */
+/* #define ASIC_INT_DAC2_ENCODER_ID                                                              0x04 */
+/* #define ASIC_EXT_TV_ENCODER_ID                                                                        0x06 */
+/* #define ASIC_INT_DVO_ENCODER_ID                                                                       0x07 */
+/* #define ASIC_INT_DIG2_ENCODER_ID                                                              0x09 */
+/* #define ASIC_EXT_DIG_ENCODER_ID                                                                       0x05 */
+
+/* ucEncodeMode */
+/* #define ATOM_ENCODER_MODE_DP                                                                          0 */
+/* #define ATOM_ENCODER_MODE_LVDS                                                                        1 */
+/* #define ATOM_ENCODER_MODE_DVI                                                                         2 */
+/* #define ATOM_ENCODER_MODE_HDMI                                                                        3 */
+/* #define ATOM_ENCODER_MODE_SDVO                                                                        4 */
+/* #define ATOM_ENCODER_MODE_TV                                                                          13 */
+/* #define ATOM_ENCODER_MODE_CV                                                                          14 */
+/* #define ATOM_ENCODER_MODE_CRT                                                                         15 */
+
+/****************************************************************************/
+/*  Structures used by SetPixelClockTable */
+/*                     GetPixelClockTable */
+/****************************************************************************/
+/* Major revision=1., Minor revision=1 */
+typedef struct _PIXEL_CLOCK_PARAMETERS {
+	USHORT usPixelClock;	/*  in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div) */
+	/*  0 means disable PPLL */
+	USHORT usRefDiv;	/*  Reference divider */
+	USHORT usFbDiv;		/*  feedback divider */
+	UCHAR ucPostDiv;	/*  post divider */
+	UCHAR ucFracFbDiv;	/*  fractional feedback divider */
+	UCHAR ucPpll;		/*  ATOM_PPLL1 or ATOM_PPL2 */
+	UCHAR ucRefDivSrc;	/*  ATOM_PJITTER or ATO_NONPJITTER */
+	UCHAR ucCRTC;		/*  Which CRTC uses this Ppll */
+	UCHAR ucPadding;
+} PIXEL_CLOCK_PARAMETERS;
+
+/* Major revision=1., Minor revision=2, add ucMiscIfno */
+/* ucMiscInfo: */
+#define MISC_FORCE_REPROG_PIXEL_CLOCK 0x1
+#define MISC_DEVICE_INDEX_MASK        0xF0
+#define MISC_DEVICE_INDEX_SHIFT       4
+
+typedef struct _PIXEL_CLOCK_PARAMETERS_V2 {
+	USHORT usPixelClock;	/*  in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div) */
+	/*  0 means disable PPLL */
+	USHORT usRefDiv;	/*  Reference divider */
+	USHORT usFbDiv;		/*  feedback divider */
+	UCHAR ucPostDiv;	/*  post divider */
+	UCHAR ucFracFbDiv;	/*  fractional feedback divider */
+	UCHAR ucPpll;		/*  ATOM_PPLL1 or ATOM_PPL2 */
+	UCHAR ucRefDivSrc;	/*  ATOM_PJITTER or ATO_NONPJITTER */
+	UCHAR ucCRTC;		/*  Which CRTC uses this Ppll */
+	UCHAR ucMiscInfo;	/*  Different bits for different purpose, bit [7:4] as device index, bit[0]=Force prog */
+} PIXEL_CLOCK_PARAMETERS_V2;
+
+/* Major revision=1., Minor revision=3, structure/definition change */
+/* ucEncoderMode: */
+/* ATOM_ENCODER_MODE_DP */
+/* ATOM_ENOCDER_MODE_LVDS */
+/* ATOM_ENOCDER_MODE_DVI */
+/* ATOM_ENOCDER_MODE_HDMI */
+/* ATOM_ENOCDER_MODE_SDVO */
+/* ATOM_ENCODER_MODE_TV                                                                          13 */
+/* ATOM_ENCODER_MODE_CV                                                                          14 */
+/* ATOM_ENCODER_MODE_CRT                                                                         15 */
+
+/* ucDVOConfig */
+/* #define DVO_ENCODER_CONFIG_RATE_SEL                                                   0x01 */
+/* #define DVO_ENCODER_CONFIG_DDR_SPEED                                          0x00 */
+/* #define DVO_ENCODER_CONFIG_SDR_SPEED                                          0x01 */
+/* #define DVO_ENCODER_CONFIG_OUTPUT_SEL                                         0x0c */
+/* #define DVO_ENCODER_CONFIG_LOW12BIT                                                   0x00 */
+/* #define DVO_ENCODER_CONFIG_UPPER12BIT                                         0x04 */
+/* #define DVO_ENCODER_CONFIG_24BIT                                                              0x08 */
+
+/* ucMiscInfo: also changed, see below */
+#define PIXEL_CLOCK_MISC_FORCE_PROG_PPLL						0x01
+#define PIXEL_CLOCK_MISC_VGA_MODE										0x02
+#define PIXEL_CLOCK_MISC_CRTC_SEL_MASK							0x04
+#define PIXEL_CLOCK_MISC_CRTC_SEL_CRTC1							0x00
+#define PIXEL_CLOCK_MISC_CRTC_SEL_CRTC2							0x04
+#define PIXEL_CLOCK_MISC_USE_ENGINE_FOR_DISPCLK			0x08
+
+typedef struct _PIXEL_CLOCK_PARAMETERS_V3 {
+	USHORT usPixelClock;	/*  in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div) */
+	/*  0 means disable PPLL. For VGA PPLL,make sure this value is not 0. */
+	USHORT usRefDiv;	/*  Reference divider */
+	USHORT usFbDiv;		/*  feedback divider */
+	UCHAR ucPostDiv;	/*  post divider */
+	UCHAR ucFracFbDiv;	/*  fractional feedback divider */
+	UCHAR ucPpll;		/*  ATOM_PPLL1 or ATOM_PPL2 */
+	UCHAR ucTransmitterId;	/*  graphic encoder id defined in objectId.h */
+	union {
+		UCHAR ucEncoderMode;	/*  encoder type defined as ATOM_ENCODER_MODE_DP/DVI/HDMI/ */
+		UCHAR ucDVOConfig;	/*  when use DVO, need to know SDR/DDR, 12bit or 24bit */
+	};
+	UCHAR ucMiscInfo;	/*  bit[0]=Force program, bit[1]= set pclk for VGA, b[2]= CRTC sel */
+	/*  bit[3]=0:use PPLL for dispclk source, =1: use engine clock for dispclock source */
+} PIXEL_CLOCK_PARAMETERS_V3;
+
+#define PIXEL_CLOCK_PARAMETERS_LAST			PIXEL_CLOCK_PARAMETERS_V2
+#define GET_PIXEL_CLOCK_PS_ALLOCATION		PIXEL_CLOCK_PARAMETERS_LAST
+
+/****************************************************************************/
+/*  Structures used by AdjustDisplayPllTable */
+/****************************************************************************/
+typedef struct _ADJUST_DISPLAY_PLL_PARAMETERS {
+	USHORT usPixelClock;
+	UCHAR ucTransmitterID;
+	UCHAR ucEncodeMode;
+	union {
+		UCHAR ucDVOConfig;	/* if DVO, need passing link rate and output 12bitlow or 24bit */
+		UCHAR ucConfig;	/* if none DVO, not defined yet */
+	};
+	UCHAR ucReserved[3];
+} ADJUST_DISPLAY_PLL_PARAMETERS;
+
+#define ADJUST_DISPLAY_CONFIG_SS_ENABLE       0x10
+
+#define ADJUST_DISPLAY_PLL_PS_ALLOCATION			ADJUST_DISPLAY_PLL_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by EnableYUVTable */
+/****************************************************************************/
+typedef struct _ENABLE_YUV_PARAMETERS {
+	UCHAR ucEnable;		/*  ATOM_ENABLE:Enable YUV or ATOM_DISABLE:Disable YUV (RGB) */
+	UCHAR ucCRTC;		/*  Which CRTC needs this YUV or RGB format */
+	UCHAR ucPadding[2];
+} ENABLE_YUV_PARAMETERS;
+#define ENABLE_YUV_PS_ALLOCATION ENABLE_YUV_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by GetMemoryClockTable */
+/****************************************************************************/
+typedef struct _GET_MEMORY_CLOCK_PARAMETERS {
+	ULONG ulReturnMemoryClock;	/*  current memory speed in 10KHz unit */
+} GET_MEMORY_CLOCK_PARAMETERS;
+#define GET_MEMORY_CLOCK_PS_ALLOCATION  GET_MEMORY_CLOCK_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by GetEngineClockTable */
+/****************************************************************************/
+typedef struct _GET_ENGINE_CLOCK_PARAMETERS {
+	ULONG ulReturnEngineClock;	/*  current engine speed in 10KHz unit */
+} GET_ENGINE_CLOCK_PARAMETERS;
+#define GET_ENGINE_CLOCK_PS_ALLOCATION  GET_ENGINE_CLOCK_PARAMETERS
+
+/****************************************************************************/
+/*  Following Structures and constant may be obsolete */
+/****************************************************************************/
+/* Maxium 8 bytes,the data read in will be placed in the parameter space. */
+/* Read operaion successeful when the paramter space is non-zero, otherwise read operation failed */
+typedef struct _READ_EDID_FROM_HW_I2C_DATA_PARAMETERS {
+	USHORT usPrescale;	/* Ratio between Engine clock and I2C clock */
+	USHORT usVRAMAddress;	/* Adress in Frame Buffer where to pace raw EDID */
+	USHORT usStatus;	/* When use output: lower byte EDID checksum, high byte hardware status */
+	/* WHen use input:  lower byte as 'byte to read':currently limited to 128byte or 1byte */
+	UCHAR ucSlaveAddr;	/* Read from which slave */
+	UCHAR ucLineNumber;	/* Read from which HW assisted line */
+} READ_EDID_FROM_HW_I2C_DATA_PARAMETERS;
+#define READ_EDID_FROM_HW_I2C_DATA_PS_ALLOCATION  READ_EDID_FROM_HW_I2C_DATA_PARAMETERS
+
+#define  ATOM_WRITE_I2C_FORMAT_PSOFFSET_PSDATABYTE                  0
+#define  ATOM_WRITE_I2C_FORMAT_PSOFFSET_PSTWODATABYTES              1
+#define  ATOM_WRITE_I2C_FORMAT_PSCOUNTER_PSOFFSET_IDDATABLOCK       2
+#define  ATOM_WRITE_I2C_FORMAT_PSCOUNTER_IDOFFSET_PLUS_IDDATABLOCK  3
+#define  ATOM_WRITE_I2C_FORMAT_IDCOUNTER_IDOFFSET_IDDATABLOCK       4
+
+typedef struct _WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS {
+	USHORT usPrescale;	/* Ratio between Engine clock and I2C clock */
+	USHORT usByteOffset;	/* Write to which byte */
+	/* Upper portion of usByteOffset is Format of data */
+	/* 1bytePS+offsetPS */
+	/* 2bytesPS+offsetPS */
+	/* blockID+offsetPS */
+	/* blockID+offsetID */
+	/* blockID+counterID+offsetID */
+	UCHAR ucData;		/* PS data1 */
+	UCHAR ucStatus;		/* Status byte 1=success, 2=failure, Also is used as PS data2 */
+	UCHAR ucSlaveAddr;	/* Write to which slave */
+	UCHAR ucLineNumber;	/* Write from which HW assisted line */
+} WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS;
+
+#define WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION  WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
+
+typedef struct _SET_UP_HW_I2C_DATA_PARAMETERS {
+	USHORT usPrescale;	/* Ratio between Engine clock and I2C clock */
+	UCHAR ucSlaveAddr;	/* Write to which slave */
+	UCHAR ucLineNumber;	/* Write from which HW assisted line */
+} SET_UP_HW_I2C_DATA_PARAMETERS;
+
+/**************************************************************************/
+#define SPEED_FAN_CONTROL_PS_ALLOCATION   WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
+
+/****************************************************************************/
+/*  Structures used by PowerConnectorDetectionTable */
+/****************************************************************************/
+typedef struct _POWER_CONNECTOR_DETECTION_PARAMETERS {
+	UCHAR ucPowerConnectorStatus;	/* Used for return value 0: detected, 1:not detected */
+	UCHAR ucPwrBehaviorId;
+	USHORT usPwrBudget;	/* how much power currently boot to in unit of watt */
+} POWER_CONNECTOR_DETECTION_PARAMETERS;
+
+typedef struct POWER_CONNECTOR_DETECTION_PS_ALLOCATION {
+	UCHAR ucPowerConnectorStatus;	/* Used for return value 0: detected, 1:not detected */
+	UCHAR ucReserved;
+	USHORT usPwrBudget;	/* how much power currently boot to in unit of watt */
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
+} POWER_CONNECTOR_DETECTION_PS_ALLOCATION;
+
+/****************************LVDS SS Command Table Definitions**********************/
+
+/****************************************************************************/
+/*  Structures used by EnableSpreadSpectrumOnPPLLTable */
+/****************************************************************************/
+typedef struct _ENABLE_LVDS_SS_PARAMETERS {
+	USHORT usSpreadSpectrumPercentage;
+	UCHAR ucSpreadSpectrumType;	/* Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
+	UCHAR ucSpreadSpectrumStepSize_Delay;	/* bits3:2 SS_STEP_SIZE; bit 6:4 SS_DELAY */
+	UCHAR ucEnable;		/* ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucPadding[3];
+} ENABLE_LVDS_SS_PARAMETERS;
+
+/* ucTableFormatRevision=1,ucTableContentRevision=2 */
+typedef struct _ENABLE_LVDS_SS_PARAMETERS_V2 {
+	USHORT usSpreadSpectrumPercentage;
+	UCHAR ucSpreadSpectrumType;	/* Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
+	UCHAR ucSpreadSpectrumStep;	/*  */
+	UCHAR ucEnable;		/* ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucSpreadSpectrumDelay;
+	UCHAR ucSpreadSpectrumRange;
+	UCHAR ucPadding;
+} ENABLE_LVDS_SS_PARAMETERS_V2;
+
+/* This new structure is based on ENABLE_LVDS_SS_PARAMETERS but expands to SS on PPLL, so other devices can use SS. */
+typedef struct _ENABLE_SPREAD_SPECTRUM_ON_PPLL {
+	USHORT usSpreadSpectrumPercentage;
+	UCHAR ucSpreadSpectrumType;	/*  Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
+	UCHAR ucSpreadSpectrumStep;	/*  */
+	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucSpreadSpectrumDelay;
+	UCHAR ucSpreadSpectrumRange;
+	UCHAR ucPpll;		/*  ATOM_PPLL1/ATOM_PPLL2 */
+} ENABLE_SPREAD_SPECTRUM_ON_PPLL;
+
+#define ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION  ENABLE_SPREAD_SPECTRUM_ON_PPLL
+
+/**************************************************************************/
+
+typedef struct _SET_PIXEL_CLOCK_PS_ALLOCATION {
+	PIXEL_CLOCK_PARAMETERS sPCLKInput;
+	ENABLE_SPREAD_SPECTRUM_ON_PPLL sReserved;	/* Caller doesn't need to init this portion */
+} SET_PIXEL_CLOCK_PS_ALLOCATION;
+
+#define ENABLE_VGA_RENDER_PS_ALLOCATION   SET_PIXEL_CLOCK_PS_ALLOCATION
+
+/****************************************************************************/
+/*  Structures used by ### */
+/****************************************************************************/
+typedef struct _MEMORY_TRAINING_PARAMETERS {
+	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
+} MEMORY_TRAINING_PARAMETERS;
+#define MEMORY_TRAINING_PS_ALLOCATION MEMORY_TRAINING_PARAMETERS
+
+/****************************LVDS and other encoder command table definitions **********************/
+
+/****************************************************************************/
+/*  Structures used by LVDSEncoderControlTable   (Before DCE30) */
+/*                     LVTMAEncoderControlTable  (Before DCE30) */
+/*                     TMDSAEncoderControlTable  (Before DCE30) */
+/****************************************************************************/
+typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS {
+	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+	UCHAR ucMisc;		/*  bit0=0: Enable single link */
+	/*      =1: Enable dual link */
+	/*  Bit1=0: 666RGB */
+	/*      =1: 888RGB */
+	UCHAR ucAction;		/*  0: turn off encoder */
+	/*  1: setup and turn on encoder */
+} LVDS_ENCODER_CONTROL_PARAMETERS;
+
+#define LVDS_ENCODER_CONTROL_PS_ALLOCATION  LVDS_ENCODER_CONTROL_PARAMETERS
+
+#define TMDS1_ENCODER_CONTROL_PARAMETERS    LVDS_ENCODER_CONTROL_PARAMETERS
+#define TMDS1_ENCODER_CONTROL_PS_ALLOCATION TMDS1_ENCODER_CONTROL_PARAMETERS
+
+#define TMDS2_ENCODER_CONTROL_PARAMETERS    TMDS1_ENCODER_CONTROL_PARAMETERS
+#define TMDS2_ENCODER_CONTROL_PS_ALLOCATION TMDS2_ENCODER_CONTROL_PARAMETERS
+
+/* ucTableFormatRevision=1,ucTableContentRevision=2 */
+typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS_V2 {
+	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+	UCHAR ucMisc;		/*  see PANEL_ENCODER_MISC_xx defintions below */
+	UCHAR ucAction;		/*  0: turn off encoder */
+	/*  1: setup and turn on encoder */
+	UCHAR ucTruncate;	/*  bit0=0: Disable truncate */
+	/*      =1: Enable truncate */
+	/*  bit4=0: 666RGB */
+	/*      =1: 888RGB */
+	UCHAR ucSpatial;	/*  bit0=0: Disable spatial dithering */
+	/*      =1: Enable spatial dithering */
+	/*  bit4=0: 666RGB */
+	/*      =1: 888RGB */
+	UCHAR ucTemporal;	/*  bit0=0: Disable temporal dithering */
+	/*      =1: Enable temporal dithering */
+	/*  bit4=0: 666RGB */
+	/*      =1: 888RGB */
+	/*  bit5=0: Gray level 2 */
+	/*      =1: Gray level 4 */
+	UCHAR ucFRC;		/*  bit4=0: 25FRC_SEL pattern E */
+	/*      =1: 25FRC_SEL pattern F */
+	/*  bit6:5=0: 50FRC_SEL pattern A */
+	/*        =1: 50FRC_SEL pattern B */
+	/*        =2: 50FRC_SEL pattern C */
+	/*        =3: 50FRC_SEL pattern D */
+	/*  bit7=0: 75FRC_SEL pattern E */
+	/*      =1: 75FRC_SEL pattern F */
+} LVDS_ENCODER_CONTROL_PARAMETERS_V2;
+
+#define LVDS_ENCODER_CONTROL_PS_ALLOCATION_V2  LVDS_ENCODER_CONTROL_PARAMETERS_V2
+
+#define TMDS1_ENCODER_CONTROL_PARAMETERS_V2    LVDS_ENCODER_CONTROL_PARAMETERS_V2
+#define TMDS1_ENCODER_CONTROL_PS_ALLOCATION_V2 TMDS1_ENCODER_CONTROL_PARAMETERS_V2
+
+#define TMDS2_ENCODER_CONTROL_PARAMETERS_V2    TMDS1_ENCODER_CONTROL_PARAMETERS_V2
+#define TMDS2_ENCODER_CONTROL_PS_ALLOCATION_V2 TMDS2_ENCODER_CONTROL_PARAMETERS_V2
+
+#define LVDS_ENCODER_CONTROL_PARAMETERS_V3     LVDS_ENCODER_CONTROL_PARAMETERS_V2
+#define LVDS_ENCODER_CONTROL_PS_ALLOCATION_V3  LVDS_ENCODER_CONTROL_PARAMETERS_V3
+
+#define TMDS1_ENCODER_CONTROL_PARAMETERS_V3    LVDS_ENCODER_CONTROL_PARAMETERS_V3
+#define TMDS1_ENCODER_CONTROL_PS_ALLOCATION_V3 TMDS1_ENCODER_CONTROL_PARAMETERS_V3
+
+#define TMDS2_ENCODER_CONTROL_PARAMETERS_V3    LVDS_ENCODER_CONTROL_PARAMETERS_V3
+#define TMDS2_ENCODER_CONTROL_PS_ALLOCATION_V3 TMDS2_ENCODER_CONTROL_PARAMETERS_V3
+
+/****************************************************************************/
+/*  Structures used by ### */
+/****************************************************************************/
+typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS {
+	UCHAR ucEnable;		/*  Enable or Disable External TMDS encoder */
+	UCHAR ucMisc;		/*  Bit0=0:Enable Single link;=1:Enable Dual link;Bit1 {=0:666RGB, =1:888RGB} */
+	UCHAR ucPadding[2];
+} ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS;
+
+typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION {
+	ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS sXTmdsEncoder;
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this portion */
+} ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION;
+
+#define ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS_V2  LVDS_ENCODER_CONTROL_PARAMETERS_V2
+
+typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION_V2 {
+	ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS_V2 sXTmdsEncoder;
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this portion */
+} ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION_V2;
+
+typedef struct _EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION {
+	DIG_ENCODER_CONTROL_PARAMETERS sDigEncoder;
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
+} EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION;
+
+/****************************************************************************/
+/*  Structures used by DVOEncoderControlTable */
+/****************************************************************************/
+/* ucTableFormatRevision=1,ucTableContentRevision=3 */
+
+/* ucDVOConfig: */
+#define DVO_ENCODER_CONFIG_RATE_SEL							0x01
+#define DVO_ENCODER_CONFIG_DDR_SPEED						0x00
+#define DVO_ENCODER_CONFIG_SDR_SPEED						0x01
+#define DVO_ENCODER_CONFIG_OUTPUT_SEL						0x0c
+#define DVO_ENCODER_CONFIG_LOW12BIT							0x00
+#define DVO_ENCODER_CONFIG_UPPER12BIT						0x04
+#define DVO_ENCODER_CONFIG_24BIT								0x08
+
+typedef struct _DVO_ENCODER_CONTROL_PARAMETERS_V3 {
+	USHORT usPixelClock;
+	UCHAR ucDVOConfig;
+	UCHAR ucAction;		/* ATOM_ENABLE/ATOM_DISABLE/ATOM_HPD_INIT */
+	UCHAR ucReseved[4];
+} DVO_ENCODER_CONTROL_PARAMETERS_V3;
+#define DVO_ENCODER_CONTROL_PS_ALLOCATION_V3	DVO_ENCODER_CONTROL_PARAMETERS_V3
+
+/* ucTableFormatRevision=1 */
+/* ucTableContentRevision=3 structure is not changed but usMisc add bit 1 as another input for */
+/*  bit1=0: non-coherent mode */
+/*      =1: coherent mode */
+
+/* ========================================================================================== */
+/* Only change is here next time when changing encoder parameter definitions again! */
+#define LVDS_ENCODER_CONTROL_PARAMETERS_LAST     LVDS_ENCODER_CONTROL_PARAMETERS_V3
+#define LVDS_ENCODER_CONTROL_PS_ALLOCATION_LAST  LVDS_ENCODER_CONTROL_PARAMETERS_LAST
+
+#define TMDS1_ENCODER_CONTROL_PARAMETERS_LAST    LVDS_ENCODER_CONTROL_PARAMETERS_V3
+#define TMDS1_ENCODER_CONTROL_PS_ALLOCATION_LAST TMDS1_ENCODER_CONTROL_PARAMETERS_LAST
+
+#define TMDS2_ENCODER_CONTROL_PARAMETERS_LAST    LVDS_ENCODER_CONTROL_PARAMETERS_V3
+#define TMDS2_ENCODER_CONTROL_PS_ALLOCATION_LAST TMDS2_ENCODER_CONTROL_PARAMETERS_LAST
+
+#define DVO_ENCODER_CONTROL_PARAMETERS_LAST      DVO_ENCODER_CONTROL_PARAMETERS
+#define DVO_ENCODER_CONTROL_PS_ALLOCATION_LAST   DVO_ENCODER_CONTROL_PS_ALLOCATION
+
+/* ========================================================================================== */
+#define PANEL_ENCODER_MISC_DUAL                0x01
+#define PANEL_ENCODER_MISC_COHERENT            0x02
+#define	PANEL_ENCODER_MISC_TMDS_LINKB					 0x04
+#define	PANEL_ENCODER_MISC_HDMI_TYPE					 0x08
+
+#define PANEL_ENCODER_ACTION_DISABLE           ATOM_DISABLE
+#define PANEL_ENCODER_ACTION_ENABLE            ATOM_ENABLE
+#define PANEL_ENCODER_ACTION_COHERENTSEQ       (ATOM_ENABLE+1)
+
+#define PANEL_ENCODER_TRUNCATE_EN              0x01
+#define PANEL_ENCODER_TRUNCATE_DEPTH           0x10
+#define PANEL_ENCODER_SPATIAL_DITHER_EN        0x01
+#define PANEL_ENCODER_SPATIAL_DITHER_DEPTH     0x10
+#define PANEL_ENCODER_TEMPORAL_DITHER_EN       0x01
+#define PANEL_ENCODER_TEMPORAL_DITHER_DEPTH    0x10
+#define PANEL_ENCODER_TEMPORAL_LEVEL_4         0x20
+#define PANEL_ENCODER_25FRC_MASK               0x10
+#define PANEL_ENCODER_25FRC_E                  0x00
+#define PANEL_ENCODER_25FRC_F                  0x10
+#define PANEL_ENCODER_50FRC_MASK               0x60
+#define PANEL_ENCODER_50FRC_A                  0x00
+#define PANEL_ENCODER_50FRC_B                  0x20
+#define PANEL_ENCODER_50FRC_C                  0x40
+#define PANEL_ENCODER_50FRC_D                  0x60
+#define PANEL_ENCODER_75FRC_MASK               0x80
+#define PANEL_ENCODER_75FRC_E                  0x00
+#define PANEL_ENCODER_75FRC_F                  0x80
+
+/****************************************************************************/
+/*  Structures used by SetVoltageTable */
+/****************************************************************************/
+#define SET_VOLTAGE_TYPE_ASIC_VDDC             1
+#define SET_VOLTAGE_TYPE_ASIC_MVDDC            2
+#define SET_VOLTAGE_TYPE_ASIC_MVDDQ            3
+#define SET_VOLTAGE_TYPE_ASIC_VDDCI            4
+#define SET_VOLTAGE_INIT_MODE                  5
+#define SET_VOLTAGE_GET_MAX_VOLTAGE            6	/* Gets the Max. voltage for the soldered Asic */
+
+#define SET_ASIC_VOLTAGE_MODE_ALL_SOURCE       0x1
+#define SET_ASIC_VOLTAGE_MODE_SOURCE_A         0x2
+#define SET_ASIC_VOLTAGE_MODE_SOURCE_B         0x4
+
+#define	SET_ASIC_VOLTAGE_MODE_SET_VOLTAGE      0x0
+#define	SET_ASIC_VOLTAGE_MODE_GET_GPIOVAL      0x1
+#define	SET_ASIC_VOLTAGE_MODE_GET_GPIOMASK     0x2
+
+typedef struct _SET_VOLTAGE_PARAMETERS {
+	UCHAR ucVoltageType;	/*  To tell which voltage to set up, VDDC/MVDDC/MVDDQ */
+	UCHAR ucVoltageMode;	/*  To set all, to set source A or source B or ... */
+	UCHAR ucVoltageIndex;	/*  An index to tell which voltage level */
+	UCHAR ucReserved;
+} SET_VOLTAGE_PARAMETERS;
+
+typedef struct _SET_VOLTAGE_PARAMETERS_V2 {
+	UCHAR ucVoltageType;	/*  To tell which voltage to set up, VDDC/MVDDC/MVDDQ */
+	UCHAR ucVoltageMode;	/*  Not used, maybe use for state machine for differen power mode */
+	USHORT usVoltageLevel;	/*  real voltage level */
+} SET_VOLTAGE_PARAMETERS_V2;
+
+typedef struct _SET_VOLTAGE_PS_ALLOCATION {
+	SET_VOLTAGE_PARAMETERS sASICSetVoltage;
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
+} SET_VOLTAGE_PS_ALLOCATION;
+
+/****************************************************************************/
+/*  Structures used by TVEncoderControlTable */
+/****************************************************************************/
+typedef struct _TV_ENCODER_CONTROL_PARAMETERS {
+	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
+	UCHAR ucTvStandard;	/*  See definition "ATOM_TV_NTSC ..." */
+	UCHAR ucAction;		/*  0: turn off encoder */
+	/*  1: setup and turn on encoder */
+} TV_ENCODER_CONTROL_PARAMETERS;
+
+typedef struct _TV_ENCODER_CONTROL_PS_ALLOCATION {
+	TV_ENCODER_CONTROL_PARAMETERS sTVEncoder;
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/*  Don't set this one */
+} TV_ENCODER_CONTROL_PS_ALLOCATION;
+
+/* ==============================Data Table Portion==================================== */
+
+#ifdef	UEFI_BUILD
+#define	UTEMP	USHORT
+#define	USHORT	void*
+#endif
+
+/****************************************************************************/
+/*  Structure used in Data.mtb */
+/****************************************************************************/
+typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES {
+	USHORT UtilityPipeLine;	/*  Offest for the utility to get parser info,Don't change this position! */
+	USHORT MultimediaCapabilityInfo;	/*  Only used by MM Lib,latest version 1.1, not configuable from Bios, need to include the table to build Bios */
+	USHORT MultimediaConfigInfo;	/*  Only used by MM Lib,latest version 2.1, not configuable from Bios, need to include the table to build Bios */
+	USHORT StandardVESA_Timing;	/*  Only used by Bios */
+	USHORT FirmwareInfo;	/*  Shared by various SW components,latest version 1.4 */
+	USHORT DAC_Info;	/*  Will be obsolete from R600 */
+	USHORT LVDS_Info;	/*  Shared by various SW components,latest version 1.1 */
+	USHORT TMDS_Info;	/*  Will be obsolete from R600 */
+	USHORT AnalogTV_Info;	/*  Shared by various SW components,latest version 1.1 */
+	USHORT SupportedDevicesInfo;	/*  Will be obsolete from R600 */
+	USHORT GPIO_I2C_Info;	/*  Shared by various SW components,latest version 1.2 will be used from R600 */
+	USHORT VRAM_UsageByFirmware;	/*  Shared by various SW components,latest version 1.3 will be used from R600 */
+	USHORT GPIO_Pin_LUT;	/*  Shared by various SW components,latest version 1.1 */
+	USHORT VESA_ToInternalModeLUT;	/*  Only used by Bios */
+	USHORT ComponentVideoInfo;	/*  Shared by various SW components,latest version 2.1 will be used from R600 */
+	USHORT PowerPlayInfo;	/*  Shared by various SW components,latest version 2.1,new design from R600 */
+	USHORT CompassionateData;	/*  Will be obsolete from R600 */
+	USHORT SaveRestoreInfo;	/*  Only used by Bios */
+	USHORT PPLL_SS_Info;	/*  Shared by various SW components,latest version 1.2, used to call SS_Info, change to new name because of int ASIC SS info */
+	USHORT OemInfo;		/*  Defined and used by external SW, should be obsolete soon */
+	USHORT XTMDS_Info;	/*  Will be obsolete from R600 */
+	USHORT MclkSS_Info;	/*  Shared by various SW components,latest version 1.1, only enabled when ext SS chip is used */
+	USHORT Object_Header;	/*  Shared by various SW components,latest version 1.1 */
+	USHORT IndirectIOAccess;	/*  Only used by Bios,this table position can't change at all!! */
+	USHORT MC_InitParameter;	/*  Only used by command table */
+	USHORT ASIC_VDDC_Info;	/*  Will be obsolete from R600 */
+	USHORT ASIC_InternalSS_Info;	/*  New tabel name from R600, used to be called "ASIC_MVDDC_Info" */
+	USHORT TV_VideoMode;	/*  Only used by command table */
+	USHORT VRAM_Info;	/*  Only used by command table, latest version 1.3 */
+	USHORT MemoryTrainingInfo;	/*  Used for VBIOS and Diag utility for memory training purpose since R600. the new table rev start from 2.1 */
+	USHORT IntegratedSystemInfo;	/*  Shared by various SW components */
+	USHORT ASIC_ProfilingInfo;	/*  New table name from R600, used to be called "ASIC_VDDCI_Info" for pre-R600 */
+	USHORT VoltageObjectInfo;	/*  Shared by various SW components, latest version 1.1 */
+	USHORT PowerSourceInfo;	/*  Shared by various SW components, latest versoin 1.1 */
+} ATOM_MASTER_LIST_OF_DATA_TABLES;
+
+#ifdef	UEFI_BUILD
+#define	USHORT	UTEMP
+#endif
+
+typedef struct _ATOM_MASTER_DATA_TABLE {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_MASTER_LIST_OF_DATA_TABLES ListOfDataTables;
+} ATOM_MASTER_DATA_TABLE;
+
+/****************************************************************************/
+/*  Structure used in MultimediaCapabilityInfoTable */
+/****************************************************************************/
+typedef struct _ATOM_MULTIMEDIA_CAPABILITY_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulSignature;	/*  HW info table signature string "$ATI" */
+	UCHAR ucI2C_Type;	/*  I2C type (normal GP_IO, ImpactTV GP_IO, Dedicated I2C pin, etc) */
+	UCHAR ucTV_OutInfo;	/*  Type of TV out supported (3:0) and video out crystal frequency (6:4) and TV data port (7) */
+	UCHAR ucVideoPortInfo;	/*  Provides the video port capabilities */
+	UCHAR ucHostPortInfo;	/*  Provides host port configuration information */
+} ATOM_MULTIMEDIA_CAPABILITY_INFO;
+
+/****************************************************************************/
+/*  Structure used in MultimediaConfigInfoTable */
+/****************************************************************************/
+typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulSignature;	/*  MM info table signature sting "$MMT" */
+	UCHAR ucTunerInfo;	/*  Type of tuner installed on the adapter (4:0) and video input for tuner (7:5) */
+	UCHAR ucAudioChipInfo;	/*  List the audio chip type (3:0) product type (4) and OEM revision (7:5) */
+	UCHAR ucProductID;	/*  Defines as OEM ID or ATI board ID dependent on product type setting */
+	UCHAR ucMiscInfo1;	/*  Tuner voltage (1:0) HW teletext support (3:2) FM audio decoder (5:4) reserved (6) audio scrambling (7) */
+	UCHAR ucMiscInfo2;	/*  I2S input config (0) I2S output config (1) I2S Audio Chip (4:2) SPDIF Output Config (5) reserved (7:6) */
+	UCHAR ucMiscInfo3;	/*  Video Decoder Type (3:0) Video In Standard/Crystal (7:4) */
+	UCHAR ucMiscInfo4;	/*  Video Decoder Host Config (2:0) reserved (7:3) */
+	UCHAR ucVideoInput0Info;	/*  Video Input 0 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
+	UCHAR ucVideoInput1Info;	/*  Video Input 1 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
+	UCHAR ucVideoInput2Info;	/*  Video Input 2 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
+	UCHAR ucVideoInput3Info;	/*  Video Input 3 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
+	UCHAR ucVideoInput4Info;	/*  Video Input 4 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
+} ATOM_MULTIMEDIA_CONFIG_INFO;
+
+/****************************************************************************/
+/*  Structures used in FirmwareInfoTable */
+/****************************************************************************/
+
+/*  usBIOSCapability Defintion: */
+/*  Bit 0 = 0: Bios image is not Posted, =1:Bios image is Posted; */
+/*  Bit 1 = 0: Dual CRTC is not supported, =1: Dual CRTC is supported; */
+/*  Bit 2 = 0: Extended Desktop is not supported, =1: Extended Desktop is supported; */
+/*  Others: Reserved */
+#define ATOM_BIOS_INFO_ATOM_FIRMWARE_POSTED         0x0001
+#define ATOM_BIOS_INFO_DUAL_CRTC_SUPPORT            0x0002
+#define ATOM_BIOS_INFO_EXTENDED_DESKTOP_SUPPORT     0x0004
+#define ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT      0x0008
+#define ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT      0x0010
+#define ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU         0x0020
+#define ATOM_BIOS_INFO_WMI_SUPPORT                  0x0040
+#define ATOM_BIOS_INFO_PPMODE_ASSIGNGED_BY_SYSTEM   0x0080
+#define ATOM_BIOS_INFO_HYPERMEMORY_SUPPORT          0x0100
+#define ATOM_BIOS_INFO_HYPERMEMORY_SIZE_MASK        0x1E00
+#define ATOM_BIOS_INFO_VPOST_WITHOUT_FIRST_MODE_SET 0x2000
+#define ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE  0x4000
+
+#ifndef _H2INC
+
+/* Please don't add or expand this bitfield structure below, this one will retire soon.! */
+typedef struct _ATOM_FIRMWARE_CAPABILITY {
+#if ATOM_BIG_ENDIAN
+	USHORT Reserved:3;
+	USHORT HyperMemory_Size:4;
+	USHORT HyperMemory_Support:1;
+	USHORT PPMode_Assigned:1;
+	USHORT WMI_SUPPORT:1;
+	USHORT GPUControlsBL:1;
+	USHORT EngineClockSS_Support:1;
+	USHORT MemoryClockSS_Support:1;
+	USHORT ExtendedDesktopSupport:1;
+	USHORT DualCRTC_Support:1;
+	USHORT FirmwarePosted:1;
+#else
+	USHORT FirmwarePosted:1;
+	USHORT DualCRTC_Support:1;
+	USHORT ExtendedDesktopSupport:1;
+	USHORT MemoryClockSS_Support:1;
+	USHORT EngineClockSS_Support:1;
+	USHORT GPUControlsBL:1;
+	USHORT WMI_SUPPORT:1;
+	USHORT PPMode_Assigned:1;
+	USHORT HyperMemory_Support:1;
+	USHORT HyperMemory_Size:4;
+	USHORT Reserved:3;
+#endif
+} ATOM_FIRMWARE_CAPABILITY;
+
+typedef union _ATOM_FIRMWARE_CAPABILITY_ACCESS {
+	ATOM_FIRMWARE_CAPABILITY sbfAccess;
+	USHORT susAccess;
+} ATOM_FIRMWARE_CAPABILITY_ACCESS;
+
+#else
+
+typedef union _ATOM_FIRMWARE_CAPABILITY_ACCESS {
+	USHORT susAccess;
+} ATOM_FIRMWARE_CAPABILITY_ACCESS;
+
+#endif
+
+typedef struct _ATOM_FIRMWARE_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulFirmwareRevision;
+	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
+	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
+	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
+	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
+	UCHAR ucASICMaxTemperature;
+	UCHAR ucPadding[3];	/* Don't use them */
+	ULONG aulReservedForBIOS[3];	/* Don't use them */
+	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
+	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit, the definitions above can't change!!! */
+	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+	USHORT usReferenceClock;	/* In 10Khz unit */
+	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
+	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
+	UCHAR ucDesign_ID;	/* Indicate what is the board design */
+	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
+} ATOM_FIRMWARE_INFO;
+
+typedef struct _ATOM_FIRMWARE_INFO_V1_2 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulFirmwareRevision;
+	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
+	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
+	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
+	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
+	UCHAR ucASICMaxTemperature;
+	UCHAR ucMinAllowedBL_Level;
+	UCHAR ucPadding[2];	/* Don't use them */
+	ULONG aulReservedForBIOS[2];	/* Don't use them */
+	ULONG ulMinPixelClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
+	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output */
+	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+	USHORT usReferenceClock;	/* In 10Khz unit */
+	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
+	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
+	UCHAR ucDesign_ID;	/* Indicate what is the board design */
+	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
+} ATOM_FIRMWARE_INFO_V1_2;
+
+typedef struct _ATOM_FIRMWARE_INFO_V1_3 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulFirmwareRevision;
+	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
+	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
+	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
+	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
+	UCHAR ucASICMaxTemperature;
+	UCHAR ucMinAllowedBL_Level;
+	UCHAR ucPadding[2];	/* Don't use them */
+	ULONG aulReservedForBIOS;	/* Don't use them */
+	ULONG ul3DAccelerationEngineClock;	/* In 10Khz unit */
+	ULONG ulMinPixelClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
+	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output */
+	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+	USHORT usReferenceClock;	/* In 10Khz unit */
+	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
+	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
+	UCHAR ucDesign_ID;	/* Indicate what is the board design */
+	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
+} ATOM_FIRMWARE_INFO_V1_3;
+
+typedef struct _ATOM_FIRMWARE_INFO_V1_4 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulFirmwareRevision;
+	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
+	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
+	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
+	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
+	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
+	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
+	UCHAR ucASICMaxTemperature;
+	UCHAR ucMinAllowedBL_Level;
+	USHORT usBootUpVDDCVoltage;	/* In MV unit */
+	USHORT usLcdMinPixelClockPLL_Output;	/*  In MHz unit */
+	USHORT usLcdMaxPixelClockPLL_Output;	/*  In MHz unit */
+	ULONG ul3DAccelerationEngineClock;	/* In 10Khz unit */
+	ULONG ulMinPixelClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
+	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
+	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
+	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output */
+	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+	USHORT usReferenceClock;	/* In 10Khz unit */
+	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
+	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
+	UCHAR ucDesign_ID;	/* Indicate what is the board design */
+	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
+} ATOM_FIRMWARE_INFO_V1_4;
+
+#define ATOM_FIRMWARE_INFO_LAST  ATOM_FIRMWARE_INFO_V1_4
+
+/****************************************************************************/
+/*  Structures used in IntegratedSystemInfoTable */
+/****************************************************************************/
+#define IGP_CAP_FLAG_DYNAMIC_CLOCK_EN      0x2
+#define IGP_CAP_FLAG_AC_CARD               0x4
+#define IGP_CAP_FLAG_SDVO_CARD             0x8
+#define IGP_CAP_FLAG_POSTDIV_BY_2_MODE     0x10
+
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulBootUpEngineClock;	/* in 10kHz unit */
+	ULONG ulBootUpMemoryClock;	/* in 10kHz unit */
+	ULONG ulMaxSystemMemoryClock;	/* in 10kHz unit */
+	ULONG ulMinSystemMemoryClock;	/* in 10kHz unit */
+	UCHAR ucNumberOfCyclesInPeriodHi;
+	UCHAR ucLCDTimingSel;	/* =0:not valid.!=0 sel this timing descriptor from LCD EDID. */
+	USHORT usReserved1;
+	USHORT usInterNBVoltageLow;	/* An intermidiate PMW value to set the voltage */
+	USHORT usInterNBVoltageHigh;	/* Another intermidiate PMW value to set the voltage */
+	ULONG ulReserved[2];
+
+	USHORT usFSBClock;	/* In MHz unit */
+	USHORT usCapabilityFlag;	/* Bit0=1 indicates the fake HDMI support,Bit1=0/1 for Dynamic clocking dis/enable */
+	/* Bit[3:2]== 0:No PCIE card, 1:AC card, 2:SDVO card */
+	/* Bit[4]==1: P/2 mode, ==0: P/1 mode */
+	USHORT usPCIENBCfgReg7;	/* bit[7:0]=MUX_Sel, bit[9:8]=MUX_SEL_LEVEL2, bit[10]=Lane_Reversal */
+	USHORT usK8MemoryClock;	/* in MHz unit */
+	USHORT usK8SyncStartDelay;	/* in 0.01 us unit */
+	USHORT usK8DataReturnTime;	/* in 0.01 us unit */
+	UCHAR ucMaxNBVoltage;
+	UCHAR ucMinNBVoltage;
+	UCHAR ucMemoryType;	/* [7:4]=1:DDR1;=2:DDR2;=3:DDR3.[3:0] is reserved */
+	UCHAR ucNumberOfCyclesInPeriod;	/* CG.FVTHROT_PWM_CTRL_REG0.NumberOfCyclesInPeriod */
+	UCHAR ucStartingPWM_HighTime;	/* CG.FVTHROT_PWM_CTRL_REG0.StartingPWM_HighTime */
+	UCHAR ucHTLinkWidth;	/* 16 bit vs. 8 bit */
+	UCHAR ucMaxNBVoltageHigh;
+	UCHAR ucMinNBVoltageHigh;
+} ATOM_INTEGRATED_SYSTEM_INFO;
+
+/* Explanation on entries in ATOM_INTEGRATED_SYSTEM_INFO
+ulBootUpMemoryClock:    For Intel IGP,it's the UMA system memory clock
+                        For AMD IGP,it's 0 if no SidePort memory installed or it's the boot-up SidePort memory clock
+ulMaxSystemMemoryClock: For Intel IGP,it's the Max freq from memory SPD if memory runs in ASYNC mode or otherwise (SYNC mode) it's 0
+                        For AMD IGP,for now this can be 0
+ulMinSystemMemoryClock: For Intel IGP,it's 133MHz if memory runs in ASYNC mode or otherwise (SYNC mode) it's 0
+                        For AMD IGP,for now this can be 0
+
+usFSBClock:             For Intel IGP,it's FSB Freq
+                        For AMD IGP,it's HT Link Speed
+
+usK8MemoryClock:        For AMD IGP only. For RevF CPU, set it to 200
+usK8SyncStartDelay:     For AMD IGP only. Memory access latency in K8, required for watermark calculation
+usK8DataReturnTime:     For AMD IGP only. Memory access latency in K8, required for watermark calculation
+
+VC:Voltage Control
+ucMaxNBVoltage:         Voltage regulator dependent PWM value. Low 8 bits of the value for the max voltage.Set this one to 0xFF if VC without PWM. Set this to 0x0 if no VC at all.
+ucMinNBVoltage:         Voltage regulator dependent PWM value. Low 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all.
+
+ucNumberOfCyclesInPeriod:   Indicate how many cycles when PWM duty is 100%. low 8 bits of the value.
+ucNumberOfCyclesInPeriodHi: Indicate how many cycles when PWM duty is 100%. high 8 bits of the value.If the PWM has an inverter,set bit [7]==1,otherwise set it 0
+
+ucMaxNBVoltageHigh:     Voltage regulator dependent PWM value. High 8 bits of  the value for the max voltage.Set this one to 0xFF if VC without PWM. Set this to 0x0 if no VC at all.
+ucMinNBVoltageHigh:     Voltage regulator dependent PWM value. High 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all.
+
+usInterNBVoltageLow:    Voltage regulator dependent PWM value. The value makes the the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all.
+usInterNBVoltageHigh:   Voltage regulator dependent PWM value. The value makes the the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all.
+*/
+
+/*
+The following IGP table is introduced from RS780, which is supposed to be put by SBIOS in FB before IGP VBIOS starts VPOST;
+Then VBIOS will copy the whole structure to its image so all GPU SW components can access this data structure to get whatever they need.
+The enough reservation should allow us to never change table revisions. Whenever needed, a GPU SW component can use reserved portion for new data entries.
+
+SW components can access the IGP system infor structure in the same way as before
+*/
+
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ULONG ulBootUpEngineClock;	/* in 10kHz unit */
+	ULONG ulReserved1[2];	/* must be 0x0 for the reserved */
+	ULONG ulBootUpUMAClock;	/* in 10kHz unit */
+	ULONG ulBootUpSidePortClock;	/* in 10kHz unit */
+	ULONG ulMinSidePortClock;	/* in 10kHz unit */
+	ULONG ulReserved2[6];	/* must be 0x0 for the reserved */
+	ULONG ulSystemConfig;	/* see explanation below */
+	ULONG ulBootUpReqDisplayVector;
+	ULONG ulOtherDisplayMisc;
+	ULONG ulDDISlot1Config;
+	ULONG ulDDISlot2Config;
+	UCHAR ucMemoryType;	/* [3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved */
+	UCHAR ucUMAChannelNumber;
+	UCHAR ucDockingPinBit;
+	UCHAR ucDockingPinPolarity;
+	ULONG ulDockingPinCFGInfo;
+	ULONG ulCPUCapInfo;
+	USHORT usNumberOfCyclesInPeriod;
+	USHORT usMaxNBVoltage;
+	USHORT usMinNBVoltage;
+	USHORT usBootUpNBVoltage;
+	ULONG ulHTLinkFreq;	/* in 10Khz */
+	USHORT usMinHTLinkWidth;
+	USHORT usMaxHTLinkWidth;
+	USHORT usUMASyncStartDelay;
+	USHORT usUMADataReturnTime;
+	USHORT usLinkStatusZeroTime;
+	USHORT usReserved;
+	ULONG ulHighVoltageHTLinkFreq;	/*  in 10Khz */
+	ULONG ulLowVoltageHTLinkFreq;	/*  in 10Khz */
+	USHORT usMaxUpStreamHTLinkWidth;
+	USHORT usMaxDownStreamHTLinkWidth;
+	USHORT usMinUpStreamHTLinkWidth;
+	USHORT usMinDownStreamHTLinkWidth;
+	ULONG ulReserved3[97];	/* must be 0x0 */
+} ATOM_INTEGRATED_SYSTEM_INFO_V2;
+
+/*
+ulBootUpEngineClock:   Boot-up Engine Clock in 10Khz;
+ulBootUpUMAClock:      Boot-up UMA Clock in 10Khz; it must be 0x0 when UMA is not present
+ulBootUpSidePortClock: Boot-up SidePort Clock in 10Khz; it must be 0x0 when SidePort Memory is not present,this could be equal to or less than maximum supported Sideport memory clock
+
+ulSystemConfig:
+Bit[0]=1: PowerExpress mode =0 Non-PowerExpress mode;
+Bit[1]=1: system boots up at AMD overdrived state or user customized  mode. In this case, driver will just stick to this boot-up mode. No other PowerPlay state
+      =0: system boots up at driver control state. Power state depends on PowerPlay table.
+Bit[2]=1: PWM method is used on NB voltage control. =0: GPIO method is used.
+Bit[3]=1: Only one power state(Performance) will be supported.
+      =0: Multiple power states supported from PowerPlay table.
+Bit[4]=1: CLMC is supported and enabled on current system.
+      =0: CLMC is not supported or enabled on current system. SBIOS need to support HT link/freq change through ATIF interface.
+Bit[5]=1: Enable CDLW for all driver control power states. Max HT width is from SBIOS, while Min HT width is determined by display requirement.
+      =0: CDLW is disabled. If CLMC is enabled case, Min HT width will be set equal to Max HT width. If CLMC disabled case, Max HT width will be applied.
+Bit[6]=1: High Voltage requested for all power states. In this case, voltage will be forced at 1.1v and powerplay table voltage drop/throttling request will be ignored.
+      =0: Voltage settings is determined by powerplay table.
+Bit[7]=1: Enable CLMC as hybrid Mode. CDLD and CILR will be disabled in this case and we're using legacy C1E. This is workaround for CPU(Griffin) performance issue.
+      =0: Enable CLMC as regular mode, CDLD and CILR will be enabled.
+
+ulBootUpReqDisplayVector: This dword is a bit vector indicates what display devices are requested during boot-up. Refer to ATOM_DEVICE_xxx_SUPPORT for the bit vector definitions.
+
+ulOtherDisplayMisc: [15:8]- Bootup LCD Expansion selection; 0-center, 1-full panel size expansion;
+			              [7:0] - BootupTV standard selection; This is a bit vector to indicate what TV standards are supported by the system. Refer to ucTVSuppportedStd definition;
+
+ulDDISlot1Config: Describes the PCIE lane configuration on this DDI PCIE slot (ADD2 card) or connector (Mobile design).
+      [3:0]  - Bit vector to indicate PCIE lane config of the DDI slot/connector on chassis (bit 0=1 lane 3:0; bit 1=1 lane 7:4; bit 2=1 lane 11:8; bit 3=1 lane 15:12)
+			[7:4]  - Bit vector to indicate PCIE lane config of the same DDI slot/connector on docking station (bit 0=1 lane 3:0; bit 1=1 lane 7:4; bit 2=1 lane 11:8; bit 3=1 lane 15:12)
+			[15:8] - Lane configuration attribute;
+      [23:16]- Connector type, possible value:
+               CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D
+               CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D
+               CONNECTOR_OBJECT_ID_HDMI_TYPE_A
+               CONNECTOR_OBJECT_ID_DISPLAYPORT
+			[31:24]- Reserved
+
+ulDDISlot2Config: Same as Slot1.
+ucMemoryType: SidePort memory type, set it to 0x0 when Sideport memory is not installed. Driver needs this info to change sideport memory clock. Not for display in CCC.
+For IGP, Hypermemory is the only memory type showed in CCC.
+
+ucUMAChannelNumber:  how many channels for the UMA;
+
+ulDockingPinCFGInfo: [15:0]-Bus/Device/Function # to CFG to read this Docking Pin; [31:16]-reg offset in CFG to read this pin
+ucDockingPinBit:     which bit in this register to read the pin status;
+ucDockingPinPolarity:Polarity of the pin when docked;
+
+ulCPUCapInfo:        [7:0]=1:Griffin;[7:0]=2:Greyhound;[7:0]=3:K8, other bits reserved for now and must be 0x0
+
+usNumberOfCyclesInPeriod:Indicate how many cycles when PWM duty is 100%.
+usMaxNBVoltage:Max. voltage control value in either PWM or GPIO mode.
+usMinNBVoltage:Min. voltage control value in either PWM or GPIO mode.
+                    GPIO mode: both usMaxNBVoltage & usMinNBVoltage have a valid value ulSystemConfig.SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE=0
+                    PWM mode: both usMaxNBVoltage & usMinNBVoltage have a valid value ulSystemConfig.SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE=1
+                    GPU SW don't control mode: usMaxNBVoltage & usMinNBVoltage=0 and no care about ulSystemConfig.SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE
+usBootUpNBVoltage:Boot-up voltage regulator dependent PWM value.
+
+ulHTLinkFreq:       Bootup HT link Frequency in 10Khz.
+usMinHTLinkWidth:   Bootup minimum HT link width. If CDLW disabled, this is equal to usMaxHTLinkWidth.
+                    If CDLW enabled, both upstream and downstream width should be the same during bootup.
+usMaxHTLinkWidth:   Bootup maximum HT link width. If CDLW disabled, this is equal to usMinHTLinkWidth.
+                    If CDLW enabled, both upstream and downstream width should be the same during bootup.
+
+usUMASyncStartDelay: Memory access latency, required for watermark calculation
+usUMADataReturnTime: Memory access latency, required for watermark calculation
+usLinkStatusZeroTime:Memory access latency required for watermark calculation, set this to 0x0 for K8 CPU, set a proper value in 0.01 the unit of us
+for Griffin or Greyhound. SBIOS needs to convert to actual time by:
+                     if T0Ttime [5:4]=00b, then usLinkStatusZeroTime=T0Ttime [3:0]*0.1us (0.0 to 1.5us)
+                     if T0Ttime [5:4]=01b, then usLinkStatusZeroTime=T0Ttime [3:0]*0.5us (0.0 to 7.5us)
+                     if T0Ttime [5:4]=10b, then usLinkStatusZeroTime=T0Ttime [3:0]*2.0us (0.0 to 30us)
+                     if T0Ttime [5:4]=11b, and T0Ttime [3:0]=0x0 to 0xa, then usLinkStatusZeroTime=T0Ttime [3:0]*20us (0.0 to 200us)
+
+ulHighVoltageHTLinkFreq:     HT link frequency for power state with low voltage. If boot up runs in HT1, this must be 0.
+                             This must be less than or equal to ulHTLinkFreq(bootup frequency).
+ulLowVoltageHTLinkFreq:      HT link frequency for power state with low voltage or voltage scaling 1.0v~1.1v. If boot up runs in HT1, this must be 0.
+                             This must be less than or equal to ulHighVoltageHTLinkFreq.
+
+usMaxUpStreamHTLinkWidth:    Asymmetric link width support in the future, to replace usMaxHTLinkWidth. Not used for now.
+usMaxDownStreamHTLinkWidth:  same as above.
+usMinUpStreamHTLinkWidth:    Asymmetric link width support in the future, to replace usMinHTLinkWidth. Not used for now.
+usMinDownStreamHTLinkWidth:  same as above.
+*/
+
+#define SYSTEM_CONFIG_POWEREXPRESS_ENABLE                 0x00000001
+#define SYSTEM_CONFIG_RUN_AT_OVERDRIVE_ENGINE             0x00000002
+#define SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE                  0x00000004
+#define SYSTEM_CONFIG_PERFORMANCE_POWERSTATE_ONLY         0x00000008
+#define SYSTEM_CONFIG_CLMC_ENABLED                        0x00000010
+#define SYSTEM_CONFIG_CDLW_ENABLED                        0x00000020
+#define SYSTEM_CONFIG_HIGH_VOLTAGE_REQUESTED              0x00000040
+#define SYSTEM_CONFIG_CLMC_HYBRID_MODE_ENABLED            0x00000080
+
+#define IGP_DDI_SLOT_LANE_CONFIG_MASK                     0x000000FF
+
+#define b0IGP_DDI_SLOT_LANE_MAP_MASK                      0x0F
+#define b0IGP_DDI_SLOT_DOCKING_LANE_MAP_MASK              0xF0
+#define b0IGP_DDI_SLOT_CONFIG_LANE_0_3                    0x01
+#define b0IGP_DDI_SLOT_CONFIG_LANE_4_7                    0x02
+#define b0IGP_DDI_SLOT_CONFIG_LANE_8_11                   0x04
+#define b0IGP_DDI_SLOT_CONFIG_LANE_12_15                  0x08
+
+#define IGP_DDI_SLOT_ATTRIBUTE_MASK                       0x0000FF00
+#define IGP_DDI_SLOT_CONFIG_REVERSED                      0x00000100
+#define b1IGP_DDI_SLOT_CONFIG_REVERSED                    0x01
+
+#define IGP_DDI_SLOT_CONNECTOR_TYPE_MASK                  0x00FF0000
+
+#define ATOM_CRT_INT_ENCODER1_INDEX                       0x00000000
+#define ATOM_LCD_INT_ENCODER1_INDEX                       0x00000001
+#define ATOM_TV_INT_ENCODER1_INDEX                        0x00000002
+#define ATOM_DFP_INT_ENCODER1_INDEX                       0x00000003
+#define ATOM_CRT_INT_ENCODER2_INDEX                       0x00000004
+#define ATOM_LCD_EXT_ENCODER1_INDEX                       0x00000005
+#define ATOM_TV_EXT_ENCODER1_INDEX                        0x00000006
+#define ATOM_DFP_EXT_ENCODER1_INDEX                       0x00000007
+#define ATOM_CV_INT_ENCODER1_INDEX                        0x00000008
+#define ATOM_DFP_INT_ENCODER2_INDEX                       0x00000009
+#define ATOM_CRT_EXT_ENCODER1_INDEX                       0x0000000A
+#define ATOM_CV_EXT_ENCODER1_INDEX                        0x0000000B
+#define ATOM_DFP_INT_ENCODER3_INDEX                       0x0000000C
+#define ATOM_DFP_INT_ENCODER4_INDEX                       0x0000000D
+
+/*  define ASIC internal encoder id ( bit vector ) */
+#define ASIC_INT_DAC1_ENCODER_ID											0x00
+#define ASIC_INT_TV_ENCODER_ID														0x02
+#define ASIC_INT_DIG1_ENCODER_ID													0x03
+#define ASIC_INT_DAC2_ENCODER_ID													0x04
+#define ASIC_EXT_TV_ENCODER_ID														0x06
+#define ASIC_INT_DVO_ENCODER_ID														0x07
+#define ASIC_INT_DIG2_ENCODER_ID													0x09
+#define ASIC_EXT_DIG_ENCODER_ID														0x05
+
+/* define Encoder attribute */
+#define ATOM_ANALOG_ENCODER																0
+#define ATOM_DIGITAL_ENCODER															1
+
+#define ATOM_DEVICE_CRT1_INDEX                            0x00000000
+#define ATOM_DEVICE_LCD1_INDEX                            0x00000001
+#define ATOM_DEVICE_TV1_INDEX                             0x00000002
+#define ATOM_DEVICE_DFP1_INDEX                            0x00000003
+#define ATOM_DEVICE_CRT2_INDEX                            0x00000004
+#define ATOM_DEVICE_LCD2_INDEX                            0x00000005
+#define ATOM_DEVICE_TV2_INDEX                             0x00000006
+#define ATOM_DEVICE_DFP2_INDEX                            0x00000007
+#define ATOM_DEVICE_CV_INDEX                              0x00000008
+#define ATOM_DEVICE_DFP3_INDEX														0x00000009
+#define ATOM_DEVICE_DFP4_INDEX														0x0000000A
+#define ATOM_DEVICE_DFP5_INDEX														0x0000000B
+#define ATOM_DEVICE_RESERVEDC_INDEX                       0x0000000C
+#define ATOM_DEVICE_RESERVEDD_INDEX                       0x0000000D
+#define ATOM_DEVICE_RESERVEDE_INDEX                       0x0000000E
+#define ATOM_DEVICE_RESERVEDF_INDEX                       0x0000000F
+#define ATOM_MAX_SUPPORTED_DEVICE_INFO                    (ATOM_DEVICE_DFP3_INDEX+1)
+#define ATOM_MAX_SUPPORTED_DEVICE_INFO_2                  ATOM_MAX_SUPPORTED_DEVICE_INFO
+#define ATOM_MAX_SUPPORTED_DEVICE_INFO_3                  (ATOM_DEVICE_DFP5_INDEX + 1)
+
+#define ATOM_MAX_SUPPORTED_DEVICE                         (ATOM_DEVICE_RESERVEDF_INDEX+1)
+
+#define ATOM_DEVICE_CRT1_SUPPORT                          (0x1L << ATOM_DEVICE_CRT1_INDEX)
+#define ATOM_DEVICE_LCD1_SUPPORT                          (0x1L << ATOM_DEVICE_LCD1_INDEX)
+#define ATOM_DEVICE_TV1_SUPPORT                           (0x1L << ATOM_DEVICE_TV1_INDEX)
+#define ATOM_DEVICE_DFP1_SUPPORT                          (0x1L << ATOM_DEVICE_DFP1_INDEX)
+#define ATOM_DEVICE_CRT2_SUPPORT                          (0x1L << ATOM_DEVICE_CRT2_INDEX)
+#define ATOM_DEVICE_LCD2_SUPPORT                          (0x1L << ATOM_DEVICE_LCD2_INDEX)
+#define ATOM_DEVICE_TV2_SUPPORT                           (0x1L << ATOM_DEVICE_TV2_INDEX)
+#define ATOM_DEVICE_DFP2_SUPPORT                          (0x1L << ATOM_DEVICE_DFP2_INDEX)
+#define ATOM_DEVICE_CV_SUPPORT                            (0x1L << ATOM_DEVICE_CV_INDEX)
+#define ATOM_DEVICE_DFP3_SUPPORT													(0x1L << ATOM_DEVICE_DFP3_INDEX)
+#define ATOM_DEVICE_DFP4_SUPPORT													(0x1L << ATOM_DEVICE_DFP4_INDEX )
+#define ATOM_DEVICE_DFP5_SUPPORT													(0x1L << ATOM_DEVICE_DFP5_INDEX)
+
+#define ATOM_DEVICE_CRT_SUPPORT \
+	(ATOM_DEVICE_CRT1_SUPPORT | ATOM_DEVICE_CRT2_SUPPORT)
+#define ATOM_DEVICE_DFP_SUPPORT \
+	(ATOM_DEVICE_DFP1_SUPPORT | ATOM_DEVICE_DFP2_SUPPORT | \
+	 ATOM_DEVICE_DFP3_SUPPORT | ATOM_DEVICE_DFP4_SUPPORT | \
+	 ATOM_DEVICE_DFP5_SUPPORT)
+#define ATOM_DEVICE_TV_SUPPORT \
+	(ATOM_DEVICE_TV1_SUPPORT  | ATOM_DEVICE_TV2_SUPPORT)
+#define ATOM_DEVICE_LCD_SUPPORT \
+	(ATOM_DEVICE_LCD1_SUPPORT | ATOM_DEVICE_LCD2_SUPPORT)
+
+#define ATOM_DEVICE_CONNECTOR_TYPE_MASK                   0x000000F0
+#define ATOM_DEVICE_CONNECTOR_TYPE_SHIFT                  0x00000004
+#define ATOM_DEVICE_CONNECTOR_VGA                         0x00000001
+#define ATOM_DEVICE_CONNECTOR_DVI_I                       0x00000002
+#define ATOM_DEVICE_CONNECTOR_DVI_D                       0x00000003
+#define ATOM_DEVICE_CONNECTOR_DVI_A                       0x00000004
+#define ATOM_DEVICE_CONNECTOR_SVIDEO                      0x00000005
+#define ATOM_DEVICE_CONNECTOR_COMPOSITE                   0x00000006
+#define ATOM_DEVICE_CONNECTOR_LVDS                        0x00000007
+#define ATOM_DEVICE_CONNECTOR_DIGI_LINK                   0x00000008
+#define ATOM_DEVICE_CONNECTOR_SCART                       0x00000009
+#define ATOM_DEVICE_CONNECTOR_HDMI_TYPE_A                 0x0000000A
+#define ATOM_DEVICE_CONNECTOR_HDMI_TYPE_B                 0x0000000B
+#define ATOM_DEVICE_CONNECTOR_CASE_1                      0x0000000E
+#define ATOM_DEVICE_CONNECTOR_DISPLAYPORT                 0x0000000F
+
+#define ATOM_DEVICE_DAC_INFO_MASK                         0x0000000F
+#define ATOM_DEVICE_DAC_INFO_SHIFT                        0x00000000
+#define ATOM_DEVICE_DAC_INFO_NODAC                        0x00000000
+#define ATOM_DEVICE_DAC_INFO_DACA                         0x00000001
+#define ATOM_DEVICE_DAC_INFO_DACB                         0x00000002
+#define ATOM_DEVICE_DAC_INFO_EXDAC                        0x00000003
+
+#define ATOM_DEVICE_I2C_ID_NOI2C                          0x00000000
+
+#define ATOM_DEVICE_I2C_LINEMUX_MASK                      0x0000000F
+#define ATOM_DEVICE_I2C_LINEMUX_SHIFT                     0x00000000
+
+#define ATOM_DEVICE_I2C_ID_MASK                           0x00000070
+#define ATOM_DEVICE_I2C_ID_SHIFT                          0x00000004
+#define ATOM_DEVICE_I2C_ID_IS_FOR_NON_MM_USE              0x00000001
+#define ATOM_DEVICE_I2C_ID_IS_FOR_MM_USE                  0x00000002
+#define ATOM_DEVICE_I2C_ID_IS_FOR_SDVO_USE                0x00000003	/* For IGP RS600 */
+#define ATOM_DEVICE_I2C_ID_IS_FOR_DAC_SCL                 0x00000004	/* For IGP RS690 */
+
+#define ATOM_DEVICE_I2C_HARDWARE_CAP_MASK                 0x00000080
+#define ATOM_DEVICE_I2C_HARDWARE_CAP_SHIFT                0x00000007
+#define	ATOM_DEVICE_USES_SOFTWARE_ASSISTED_I2C            0x00000000
+#define	ATOM_DEVICE_USES_HARDWARE_ASSISTED_I2C            0x00000001
+
+/*   usDeviceSupport: */
+/*   Bits0       = 0 - no CRT1 support= 1- CRT1 is supported */
+/*   Bit 1       = 0 - no LCD1 support= 1- LCD1 is supported */
+/*   Bit 2       = 0 - no TV1  support= 1- TV1  is supported */
+/*   Bit 3       = 0 - no DFP1 support= 1- DFP1 is supported */
+/*   Bit 4       = 0 - no CRT2 support= 1- CRT2 is supported */
+/*   Bit 5       = 0 - no LCD2 support= 1- LCD2 is supported */
+/*   Bit 6       = 0 - no TV2  support= 1- TV2  is supported */
+/*   Bit 7       = 0 - no DFP2 support= 1- DFP2 is supported */
+/*   Bit 8       = 0 - no CV   support= 1- CV   is supported */
+/*   Bit 9       = 0 - no DFP3 support= 1- DFP3 is supported */
+/*   Byte1 (Supported Device Info) */
+/*   Bit 0       = = 0 - no CV support= 1- CV is supported */
+/*  */
+/*  */
+
+/*               ucI2C_ConfigID */
+/*     [7:0] - I2C LINE Associate ID */
+/*           = 0   - no I2C */
+/*     [7]               -       HW_Cap        = 1,  [6:0]=HW assisted I2C ID(HW line selection) */
+/*                           =   0,  [6:0]=SW assisted I2C ID */
+/*     [6-4]     - HW_ENGINE_ID  =       1,  HW engine for NON multimedia use */
+/*                           =   2,      HW engine for Multimedia use */
+/*                           =   3-7     Reserved for future I2C engines */
+/*               [3-0] - I2C_LINE_MUX  = A Mux number when it's HW assisted I2C or GPIO ID when it's SW I2C */
+
+typedef struct _ATOM_I2C_ID_CONFIG {
+#if ATOM_BIG_ENDIAN
+	UCHAR bfHW_Capable:1;
+	UCHAR bfHW_EngineID:3;
+	UCHAR bfI2C_LineMux:4;
+#else
+	UCHAR bfI2C_LineMux:4;
+	UCHAR bfHW_EngineID:3;
+	UCHAR bfHW_Capable:1;
+#endif
+} ATOM_I2C_ID_CONFIG;
+
+typedef union _ATOM_I2C_ID_CONFIG_ACCESS {
+	ATOM_I2C_ID_CONFIG sbfAccess;
+	UCHAR ucAccess;
+} ATOM_I2C_ID_CONFIG_ACCESS;
+
+/****************************************************************************/
+/*  Structure used in GPIO_I2C_InfoTable */
+/****************************************************************************/
+typedef struct _ATOM_GPIO_I2C_ASSIGMENT {
+	USHORT usClkMaskRegisterIndex;
+	USHORT usClkEnRegisterIndex;
+	USHORT usClkY_RegisterIndex;
+	USHORT usClkA_RegisterIndex;
+	USHORT usDataMaskRegisterIndex;
+	USHORT usDataEnRegisterIndex;
+	USHORT usDataY_RegisterIndex;
+	USHORT usDataA_RegisterIndex;
+	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
+	UCHAR ucClkMaskShift;
+	UCHAR ucClkEnShift;
+	UCHAR ucClkY_Shift;
+	UCHAR ucClkA_Shift;
+	UCHAR ucDataMaskShift;
+	UCHAR ucDataEnShift;
+	UCHAR ucDataY_Shift;
+	UCHAR ucDataA_Shift;
+	UCHAR ucReserved1;
+	UCHAR ucReserved2;
+} ATOM_GPIO_I2C_ASSIGMENT;
+
+typedef struct _ATOM_GPIO_I2C_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_GPIO_I2C_ASSIGMENT asGPIO_Info[ATOM_MAX_SUPPORTED_DEVICE];
+} ATOM_GPIO_I2C_INFO;
+
+/****************************************************************************/
+/*  Common Structure used in other structures */
+/****************************************************************************/
+
+#ifndef _H2INC
+
+/* Please don't add or expand this bitfield structure below, this one will retire soon.! */
+typedef struct _ATOM_MODE_MISC_INFO {
+#if ATOM_BIG_ENDIAN
+	USHORT Reserved:6;
+	USHORT RGB888:1;
+	USHORT DoubleClock:1;
+	USHORT Interlace:1;
+	USHORT CompositeSync:1;
+	USHORT V_ReplicationBy2:1;
+	USHORT H_ReplicationBy2:1;
+	USHORT VerticalCutOff:1;
+	USHORT VSyncPolarity:1;	/* 0=Active High, 1=Active Low */
+	USHORT HSyncPolarity:1;	/* 0=Active High, 1=Active Low */
+	USHORT HorizontalCutOff:1;
+#else
+	USHORT HorizontalCutOff:1;
+	USHORT HSyncPolarity:1;	/* 0=Active High, 1=Active Low */
+	USHORT VSyncPolarity:1;	/* 0=Active High, 1=Active Low */
+	USHORT VerticalCutOff:1;
+	USHORT H_ReplicationBy2:1;
+	USHORT V_ReplicationBy2:1;
+	USHORT CompositeSync:1;
+	USHORT Interlace:1;
+	USHORT DoubleClock:1;
+	USHORT RGB888:1;
+	USHORT Reserved:6;
+#endif
+} ATOM_MODE_MISC_INFO;
+
+typedef union _ATOM_MODE_MISC_INFO_ACCESS {
+	ATOM_MODE_MISC_INFO sbfAccess;
+	USHORT usAccess;
+} ATOM_MODE_MISC_INFO_ACCESS;
+
+#else
+
+typedef union _ATOM_MODE_MISC_INFO_ACCESS {
+	USHORT usAccess;
+} ATOM_MODE_MISC_INFO_ACCESS;
+
+#endif
+
+/*  usModeMiscInfo- */
+#define ATOM_H_CUTOFF           0x01
+#define ATOM_HSYNC_POLARITY     0x02	/* 0=Active High, 1=Active Low */
+#define ATOM_VSYNC_POLARITY     0x04	/* 0=Active High, 1=Active Low */
+#define ATOM_V_CUTOFF           0x08
+#define ATOM_H_REPLICATIONBY2   0x10
+#define ATOM_V_REPLICATIONBY2   0x20
+#define ATOM_COMPOSITESYNC      0x40
+#define ATOM_INTERLACE          0x80
+#define ATOM_DOUBLE_CLOCK_MODE  0x100
+#define ATOM_RGB888_MODE        0x200
+
+/* usRefreshRate- */
+#define ATOM_REFRESH_43         43
+#define ATOM_REFRESH_47         47
+#define ATOM_REFRESH_56         56
+#define ATOM_REFRESH_60         60
+#define ATOM_REFRESH_65         65
+#define ATOM_REFRESH_70         70
+#define ATOM_REFRESH_72         72
+#define ATOM_REFRESH_75         75
+#define ATOM_REFRESH_85         85
+
+/*  ATOM_MODE_TIMING data are exactly the same as VESA timing data. */
+/*  Translation from EDID to ATOM_MODE_TIMING, use the following formula. */
+/*  */
+/*       VESA_HTOTAL                     =       VESA_ACTIVE + 2* VESA_BORDER + VESA_BLANK */
+/*                                               =       EDID_HA + EDID_HBL */
+/*       VESA_HDISP                      =       VESA_ACTIVE     =       EDID_HA */
+/*       VESA_HSYNC_START        =       VESA_ACTIVE + VESA_BORDER + VESA_FRONT_PORCH */
+/*                                               =       EDID_HA + EDID_HSO */
+/*       VESA_HSYNC_WIDTH        =       VESA_HSYNC_TIME =       EDID_HSPW */
+/*       VESA_BORDER                     =       EDID_BORDER */
+
+/****************************************************************************/
+/*  Structure used in SetCRTC_UsingDTDTimingTable */
+/****************************************************************************/
+typedef struct _SET_CRTC_USING_DTD_TIMING_PARAMETERS {
+	USHORT usH_Size;
+	USHORT usH_Blanking_Time;
+	USHORT usV_Size;
+	USHORT usV_Blanking_Time;
+	USHORT usH_SyncOffset;
+	USHORT usH_SyncWidth;
+	USHORT usV_SyncOffset;
+	USHORT usV_SyncWidth;
+	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
+	UCHAR ucH_Border;	/*  From DFP EDID */
+	UCHAR ucV_Border;
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucPadding[3];
+} SET_CRTC_USING_DTD_TIMING_PARAMETERS;
+
+/****************************************************************************/
+/*  Structure used in SetCRTC_TimingTable */
+/****************************************************************************/
+typedef struct _SET_CRTC_TIMING_PARAMETERS {
+	USHORT usH_Total;	/*  horizontal total */
+	USHORT usH_Disp;	/*  horizontal display */
+	USHORT usH_SyncStart;	/*  horozontal Sync start */
+	USHORT usH_SyncWidth;	/*  horizontal Sync width */
+	USHORT usV_Total;	/*  vertical total */
+	USHORT usV_Disp;	/*  vertical display */
+	USHORT usV_SyncStart;	/*  vertical Sync start */
+	USHORT usV_SyncWidth;	/*  vertical Sync width */
+	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
+	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
+	UCHAR ucOverscanRight;	/*  right */
+	UCHAR ucOverscanLeft;	/*  left */
+	UCHAR ucOverscanBottom;	/*  bottom */
+	UCHAR ucOverscanTop;	/*  top */
+	UCHAR ucReserved;
+} SET_CRTC_TIMING_PARAMETERS;
+#define SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION SET_CRTC_TIMING_PARAMETERS
+
+/****************************************************************************/
+/*  Structure used in StandardVESA_TimingTable */
+/*                    AnalogTV_InfoTable */
+/*                    ComponentVideoInfoTable */
+/****************************************************************************/
+typedef struct _ATOM_MODE_TIMING {
+	USHORT usCRTC_H_Total;
+	USHORT usCRTC_H_Disp;
+	USHORT usCRTC_H_SyncStart;
+	USHORT usCRTC_H_SyncWidth;
+	USHORT usCRTC_V_Total;
+	USHORT usCRTC_V_Disp;
+	USHORT usCRTC_V_SyncStart;
+	USHORT usCRTC_V_SyncWidth;
+	USHORT usPixelClock;	/* in 10Khz unit */
+	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
+	USHORT usCRTC_OverscanRight;
+	USHORT usCRTC_OverscanLeft;
+	USHORT usCRTC_OverscanBottom;
+	USHORT usCRTC_OverscanTop;
+	USHORT usReserve;
+	UCHAR ucInternalModeNumber;
+	UCHAR ucRefreshRate;
+} ATOM_MODE_TIMING;
+
+typedef struct _ATOM_DTD_FORMAT {
+	USHORT usPixClk;
+	USHORT usHActive;
+	USHORT usHBlanking_Time;
+	USHORT usVActive;
+	USHORT usVBlanking_Time;
+	USHORT usHSyncOffset;
+	USHORT usHSyncWidth;
+	USHORT usVSyncOffset;
+	USHORT usVSyncWidth;
+	USHORT usImageHSize;
+	USHORT usImageVSize;
+	UCHAR ucHBorder;
+	UCHAR ucVBorder;
+	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
+	UCHAR ucInternalModeNumber;
+	UCHAR ucRefreshRate;
+} ATOM_DTD_FORMAT;
+
+/****************************************************************************/
+/*  Structure used in LVDS_InfoTable */
+/*   * Need a document to describe this table */
+/****************************************************************************/
+#define SUPPORTED_LCD_REFRESHRATE_30Hz          0x0004
+#define SUPPORTED_LCD_REFRESHRATE_40Hz          0x0008
+#define SUPPORTED_LCD_REFRESHRATE_50Hz          0x0010
+#define SUPPORTED_LCD_REFRESHRATE_60Hz          0x0020
+
+/* Once DAL sees this CAP is set, it will read EDID from LCD on its own instead of using sLCDTiming in ATOM_LVDS_INFO_V12. */
+/* Other entries in ATOM_LVDS_INFO_V12 are still valid/useful to DAL */
+#define	LCDPANEL_CAP_READ_EDID									0x1
+
+/* ucTableFormatRevision=1 */
+/* ucTableContentRevision=1 */
+typedef struct _ATOM_LVDS_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_DTD_FORMAT sLCDTiming;
+	USHORT usModePatchTableOffset;
+	USHORT usSupportedRefreshRate;	/* Refer to panel info table in ATOMBIOS extension Spec. */
+	USHORT usOffDelayInMs;
+	UCHAR ucPowerSequenceDigOntoDEin10Ms;
+	UCHAR ucPowerSequenceDEtoBLOnin10Ms;
+	UCHAR ucLVDS_Misc;	/*  Bit0:{=0:single, =1:dual},Bit1 {=0:666RGB, =1:888RGB},Bit2:3:{Grey level} */
+	/*  Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888} */
+	/*  Bit5:{=0:Spatial Dithering disabled;1 Spatial Dithering enabled} */
+	/*  Bit6:{=0:Temporal Dithering disabled;1 Temporal Dithering enabled} */
+	UCHAR ucPanelDefaultRefreshRate;
+	UCHAR ucPanelIdentification;
+	UCHAR ucSS_Id;
+} ATOM_LVDS_INFO;
+
+/* ucTableFormatRevision=1 */
+/* ucTableContentRevision=2 */
+typedef struct _ATOM_LVDS_INFO_V12 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_DTD_FORMAT sLCDTiming;
+	USHORT usExtInfoTableOffset;
+	USHORT usSupportedRefreshRate;	/* Refer to panel info table in ATOMBIOS extension Spec. */
+	USHORT usOffDelayInMs;
+	UCHAR ucPowerSequenceDigOntoDEin10Ms;
+	UCHAR ucPowerSequenceDEtoBLOnin10Ms;
+	UCHAR ucLVDS_Misc;	/*  Bit0:{=0:single, =1:dual},Bit1 {=0:666RGB, =1:888RGB},Bit2:3:{Grey level} */
+	/*  Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888} */
+	/*  Bit5:{=0:Spatial Dithering disabled;1 Spatial Dithering enabled} */
+	/*  Bit6:{=0:Temporal Dithering disabled;1 Temporal Dithering enabled} */
+	UCHAR ucPanelDefaultRefreshRate;
+	UCHAR ucPanelIdentification;
+	UCHAR ucSS_Id;
+	USHORT usLCDVenderID;
+	USHORT usLCDProductID;
+	UCHAR ucLCDPanel_SpecialHandlingCap;
+	UCHAR ucPanelInfoSize;	/*   start from ATOM_DTD_FORMAT to end of panel info, include ExtInfoTable */
+	UCHAR ucReserved[2];
+} ATOM_LVDS_INFO_V12;
+
+#define ATOM_LVDS_INFO_LAST  ATOM_LVDS_INFO_V12
+
+typedef struct _ATOM_PATCH_RECORD_MODE {
+	UCHAR ucRecordType;
+	USHORT usHDisp;
+	USHORT usVDisp;
+} ATOM_PATCH_RECORD_MODE;
+
+typedef struct _ATOM_LCD_RTS_RECORD {
+	UCHAR ucRecordType;
+	UCHAR ucRTSValue;
+} ATOM_LCD_RTS_RECORD;
+
+/* !! If the record below exits, it shoud always be the first record for easy use in command table!!! */
+typedef struct _ATOM_LCD_MODE_CONTROL_CAP {
+	UCHAR ucRecordType;
+	USHORT usLCDCap;
+} ATOM_LCD_MODE_CONTROL_CAP;
+
+#define LCD_MODE_CAP_BL_OFF                   1
+#define LCD_MODE_CAP_CRTC_OFF                 2
+#define LCD_MODE_CAP_PANEL_OFF                4
+
+typedef struct _ATOM_FAKE_EDID_PATCH_RECORD {
+	UCHAR ucRecordType;
+	UCHAR ucFakeEDIDLength;
+	UCHAR ucFakeEDIDString[1];	/*  This actually has ucFakeEdidLength elements. */
+} ATOM_FAKE_EDID_PATCH_RECORD;
+
+typedef struct _ATOM_PANEL_RESOLUTION_PATCH_RECORD {
+	UCHAR ucRecordType;
+	USHORT usHSize;
+	USHORT usVSize;
+} ATOM_PANEL_RESOLUTION_PATCH_RECORD;
+
+#define LCD_MODE_PATCH_RECORD_MODE_TYPE       1
+#define LCD_RTS_RECORD_TYPE                   2
+#define LCD_CAP_RECORD_TYPE                   3
+#define LCD_FAKE_EDID_PATCH_RECORD_TYPE       4
+#define LCD_PANEL_RESOLUTION_RECORD_TYPE      5
+#define ATOM_RECORD_END_TYPE                  0xFF
+
+/****************************Spread Spectrum Info Table Definitions **********************/
+
+/* ucTableFormatRevision=1 */
+/* ucTableContentRevision=2 */
+typedef struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT {
+	USHORT usSpreadSpectrumPercentage;
+	UCHAR ucSpreadSpectrumType;	/* Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
+	UCHAR ucSS_Step;
+	UCHAR ucSS_Delay;
+	UCHAR ucSS_Id;
+	UCHAR ucRecommandedRef_Div;
+	UCHAR ucSS_Range;	/* it was reserved for V11 */
+} ATOM_SPREAD_SPECTRUM_ASSIGNMENT;
+
+#define ATOM_MAX_SS_ENTRY                      16
+#define ATOM_DP_SS_ID1												 0x0f1	/*  SS modulation freq=30k */
+#define ATOM_DP_SS_ID2												 0x0f2	/*  SS modulation freq=33k */
+
+#define ATOM_SS_DOWN_SPREAD_MODE_MASK          0x00000000
+#define ATOM_SS_DOWN_SPREAD_MODE               0x00000000
+#define ATOM_SS_CENTRE_SPREAD_MODE_MASK        0x00000001
+#define ATOM_SS_CENTRE_SPREAD_MODE             0x00000001
+#define ATOM_INTERNAL_SS_MASK                  0x00000000
+#define ATOM_EXTERNAL_SS_MASK                  0x00000002
+#define EXEC_SS_STEP_SIZE_SHIFT                2
+#define EXEC_SS_DELAY_SHIFT                    4
+#define ACTIVEDATA_TO_BLON_DELAY_SHIFT         4
+
+typedef struct _ATOM_SPREAD_SPECTRUM_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_SPREAD_SPECTRUM_ASSIGNMENT asSS_Info[ATOM_MAX_SS_ENTRY];
+} ATOM_SPREAD_SPECTRUM_INFO;
+
+/****************************************************************************/
+/*  Structure used in AnalogTV_InfoTable (Top level) */
+/****************************************************************************/
+/* ucTVBootUpDefaultStd definiton: */
+
+/* ATOM_TV_NTSC                1 */
+/* ATOM_TV_NTSCJ               2 */
+/* ATOM_TV_PAL                 3 */
+/* ATOM_TV_PALM                4 */
+/* ATOM_TV_PALCN               5 */
+/* ATOM_TV_PALN                6 */
+/* ATOM_TV_PAL60               7 */
+/* ATOM_TV_SECAM               8 */
+
+/* ucTVSuppportedStd definition: */
+#define NTSC_SUPPORT          0x1
+#define NTSCJ_SUPPORT         0x2
+
+#define PAL_SUPPORT           0x4
+#define PALM_SUPPORT          0x8
+#define PALCN_SUPPORT         0x10
+#define PALN_SUPPORT          0x20
+#define PAL60_SUPPORT         0x40
+#define SECAM_SUPPORT         0x80
+
+#define MAX_SUPPORTED_TV_TIMING    2
+
+typedef struct _ATOM_ANALOG_TV_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucTV_SupportedStandard;
+	UCHAR ucTV_BootUpDefaultStandard;
+	UCHAR ucExt_TV_ASIC_ID;
+	UCHAR ucExt_TV_ASIC_SlaveAddr;
+	/*ATOM_DTD_FORMAT          aModeTimings[MAX_SUPPORTED_TV_TIMING]; */
+	ATOM_MODE_TIMING aModeTimings[MAX_SUPPORTED_TV_TIMING];
+} ATOM_ANALOG_TV_INFO;
+
+/**************************************************************************/
+/*  VRAM usage and their defintions */
+
+/*  One chunk of VRAM used by Bios are for HWICON surfaces,EDID data. */
+/*  Current Mode timing and Dail Timing and/or STD timing data EACH device. They can be broken down as below. */
+/*  All the addresses below are the offsets from the frame buffer start.They all MUST be Dword aligned! */
+/*  To driver: The physical address of this memory portion=mmFB_START(4K aligned)+ATOMBIOS_VRAM_USAGE_START_ADDR+ATOM_x_ADDR */
+/*  To Bios:  ATOMBIOS_VRAM_USAGE_START_ADDR+ATOM_x_ADDR->MM_INDEX */
+
+#ifndef VESA_MEMORY_IN_64K_BLOCK
+#define VESA_MEMORY_IN_64K_BLOCK        0x100	/* 256*64K=16Mb (Max. VESA memory is 16Mb!) */
+#endif
+
+#define ATOM_EDID_RAW_DATASIZE          256	/* In Bytes */
+#define ATOM_HWICON_SURFACE_SIZE        4096	/* In Bytes */
+#define ATOM_HWICON_INFOTABLE_SIZE      32
+#define MAX_DTD_MODE_IN_VRAM            6
+#define ATOM_DTD_MODE_SUPPORT_TBL_SIZE  (MAX_DTD_MODE_IN_VRAM*28)	/* 28= (SIZEOF ATOM_DTD_FORMAT) */
+#define ATOM_STD_MODE_SUPPORT_TBL_SIZE  (32*8)	/* 32 is a predefined number,8= (SIZEOF ATOM_STD_FORMAT) */
+#define DFP_ENCODER_TYPE_OFFSET					0x80
+#define DP_ENCODER_LANE_NUM_OFFSET			0x84
+#define DP_ENCODER_LINK_RATE_OFFSET			0x88
+
+#define ATOM_HWICON1_SURFACE_ADDR       0
+#define ATOM_HWICON2_SURFACE_ADDR       (ATOM_HWICON1_SURFACE_ADDR + ATOM_HWICON_SURFACE_SIZE)
+#define ATOM_HWICON_INFOTABLE_ADDR      (ATOM_HWICON2_SURFACE_ADDR + ATOM_HWICON_SURFACE_SIZE)
+#define ATOM_CRT1_EDID_ADDR             (ATOM_HWICON_INFOTABLE_ADDR + ATOM_HWICON_INFOTABLE_SIZE)
+#define ATOM_CRT1_DTD_MODE_TBL_ADDR     (ATOM_CRT1_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_CRT1_STD_MODE_TBL_ADDR	    (ATOM_CRT1_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_LCD1_EDID_ADDR             (ATOM_CRT1_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_LCD1_DTD_MODE_TBL_ADDR     (ATOM_LCD1_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_LCD1_STD_MODE_TBL_ADDR	(ATOM_LCD1_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_TV1_DTD_MODE_TBL_ADDR      (ATOM_LCD1_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_DFP1_EDID_ADDR             (ATOM_TV1_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP1_DTD_MODE_TBL_ADDR     (ATOM_DFP1_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_DFP1_STD_MODE_TBL_ADDR	    (ATOM_DFP1_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_CRT2_EDID_ADDR             (ATOM_DFP1_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_CRT2_DTD_MODE_TBL_ADDR     (ATOM_CRT2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_CRT2_STD_MODE_TBL_ADDR	    (ATOM_CRT2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_LCD2_EDID_ADDR             (ATOM_CRT2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_LCD2_DTD_MODE_TBL_ADDR     (ATOM_LCD2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_LCD2_STD_MODE_TBL_ADDR	(ATOM_LCD2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_TV2_EDID_ADDR              (ATOM_LCD2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_TV2_DTD_MODE_TBL_ADDR      (ATOM_TV2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_TV2_STD_MODE_TBL_ADDR	  (ATOM_TV2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_DFP2_EDID_ADDR             (ATOM_TV2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP2_DTD_MODE_TBL_ADDR     (ATOM_DFP2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_DFP2_STD_MODE_TBL_ADDR     (ATOM_DFP2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_CV_EDID_ADDR               (ATOM_DFP2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_CV_DTD_MODE_TBL_ADDR       (ATOM_CV_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_CV_STD_MODE_TBL_ADDR       (ATOM_CV_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_DFP3_EDID_ADDR             (ATOM_CV_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP3_DTD_MODE_TBL_ADDR     (ATOM_DFP3_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_DFP3_STD_MODE_TBL_ADDR     (ATOM_DFP3_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_DFP4_EDID_ADDR             (ATOM_DFP3_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP4_DTD_MODE_TBL_ADDR     (ATOM_DFP4_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_DFP4_STD_MODE_TBL_ADDR     (ATOM_DFP4_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_DFP5_EDID_ADDR             (ATOM_DFP4_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP5_DTD_MODE_TBL_ADDR     (ATOM_DFP5_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_DFP5_STD_MODE_TBL_ADDR     (ATOM_DFP5_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_DP_TRAINING_TBL_ADDR	(ATOM_DFP5_STD_MODE_TBL_ADDR+ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+
+#define ATOM_STACK_STORAGE_START        (ATOM_DP_TRAINING_TBL_ADDR + 256)
+#define ATOM_STACK_STORAGE_END          (ATOM_STACK_STORAGE_START + 512)
+
+/* The size below is in Kb! */
+#define ATOM_VRAM_RESERVE_SIZE         ((((ATOM_STACK_STORAGE_END - ATOM_HWICON1_SURFACE_ADDR)>>10)+4)&0xFFFC)
+
+#define	ATOM_VRAM_OPERATION_FLAGS_MASK         0xC0000000L
+#define ATOM_VRAM_OPERATION_FLAGS_SHIFT        30
+#define	ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION   0x1
+#define	ATOM_VRAM_BLOCK_NEEDS_RESERVATION      0x0
+
+/***********************************************************************************/
+/*  Structure used in VRAM_UsageByFirmwareTable */
+/*  Note1: This table is filled by SetBiosReservationStartInFB in CoreCommSubs.asm */
+/*         at running time. */
+/*  note2: From RV770, the memory is more than 32bit addressable, so we will change */
+/*         ucTableFormatRevision=1,ucTableContentRevision=4, the strcuture remains */
+/*         exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware */
+/*         (in offset to start of memory address) is KB aligned instead of byte aligend. */
+/***********************************************************************************/
+#define ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO			1
+
+typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO {
+	ULONG ulStartAddrUsedByFirmware;
+	USHORT usFirmwareUseInKb;
+	USHORT usReserved;
+} ATOM_FIRMWARE_VRAM_RESERVE_INFO;
+
+typedef struct _ATOM_VRAM_USAGE_BY_FIRMWARE {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_FIRMWARE_VRAM_RESERVE_INFO
+	    asFirmwareVramReserveInfo[ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO];
+} ATOM_VRAM_USAGE_BY_FIRMWARE;
+
+/****************************************************************************/
+/*  Structure used in GPIO_Pin_LUTTable */
+/****************************************************************************/
+typedef struct _ATOM_GPIO_PIN_ASSIGNMENT {
+	USHORT usGpioPin_AIndex;
+	UCHAR ucGpioPinBitShift;
+	UCHAR ucGPIO_ID;
+} ATOM_GPIO_PIN_ASSIGNMENT;
+
+typedef struct _ATOM_GPIO_PIN_LUT {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[1];
+} ATOM_GPIO_PIN_LUT;
+
+/****************************************************************************/
+/*  Structure used in ComponentVideoInfoTable */
+/****************************************************************************/
+#define GPIO_PIN_ACTIVE_HIGH          0x1
+
+#define MAX_SUPPORTED_CV_STANDARDS    5
+
+/*  definitions for ATOM_D_INFO.ucSettings */
+#define ATOM_GPIO_SETTINGS_BITSHIFT_MASK  0x1F	/*  [4:0] */
+#define ATOM_GPIO_SETTINGS_RESERVED_MASK  0x60	/*  [6:5] = must be zeroed out */
+#define ATOM_GPIO_SETTINGS_ACTIVE_MASK    0x80	/*  [7] */
+
+typedef struct _ATOM_GPIO_INFO {
+	USHORT usAOffset;
+	UCHAR ucSettings;
+	UCHAR ucReserved;
+} ATOM_GPIO_INFO;
+
+/*  definitions for ATOM_COMPONENT_VIDEO_INFO.ucMiscInfo (bit vector) */
+#define ATOM_CV_RESTRICT_FORMAT_SELECTION           0x2
+
+/*  definitions for ATOM_COMPONENT_VIDEO_INFO.uc480i/uc480p/uc720p/uc1080i */
+#define ATOM_GPIO_DEFAULT_MODE_EN                   0x80	/* [7]; */
+#define ATOM_GPIO_SETTING_PERMODE_MASK              0x7F	/* [6:0] */
+
+/*  definitions for ATOM_COMPONENT_VIDEO_INFO.ucLetterBoxMode */
+/* Line 3 out put 5V. */
+#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_A       0x01	/* represent gpio 3 state for 16:9 */
+#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_B       0x02	/* represent gpio 4 state for 16:9 */
+#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_SHIFT   0x0
+
+/* Line 3 out put 2.2V */
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_A 0x04	/* represent gpio 3 state for 4:3 Letter box */
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_B 0x08	/* represent gpio 4 state for 4:3 Letter box */
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_SHIFT 0x2
+
+/* Line 3 out put 0V */
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_A        0x10	/* represent gpio 3 state for 4:3 */
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_B        0x20	/* represent gpio 4 state for 4:3 */
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_SHIFT    0x4
+
+#define ATOM_CV_LINE3_ASPECTRATIO_MASK              0x3F	/*  bit [5:0] */
+
+#define ATOM_CV_LINE3_ASPECTRATIO_EXIST             0x80	/* bit 7 */
+
+/* GPIO bit index in gpio setting per mode value, also represend the block no. in gpio blocks. */
+#define ATOM_GPIO_INDEX_LINE3_ASPECRATIO_GPIO_A   3	/* bit 3 in uc480i/uc480p/uc720p/uc1080i, which represend the default gpio bit setting for the mode. */
+#define ATOM_GPIO_INDEX_LINE3_ASPECRATIO_GPIO_B   4	/* bit 4 in uc480i/uc480p/uc720p/uc1080i, which represend the default gpio bit setting for the mode. */
+
+typedef struct _ATOM_COMPONENT_VIDEO_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usMask_PinRegisterIndex;
+	USHORT usEN_PinRegisterIndex;
+	USHORT usY_PinRegisterIndex;
+	USHORT usA_PinRegisterIndex;
+	UCHAR ucBitShift;
+	UCHAR ucPinActiveState;	/* ucPinActiveState: Bit0=1 active high, =0 active low */
+	ATOM_DTD_FORMAT sReserved;	/*  must be zeroed out */
+	UCHAR ucMiscInfo;
+	UCHAR uc480i;
+	UCHAR uc480p;
+	UCHAR uc720p;
+	UCHAR uc1080i;
+	UCHAR ucLetterBoxMode;
+	UCHAR ucReserved[3];
+	UCHAR ucNumOfWbGpioBlocks;	/* For Component video D-Connector support. If zere, NTSC type connector */
+	ATOM_GPIO_INFO aWbGpioStateBlock[MAX_SUPPORTED_CV_STANDARDS];
+	ATOM_DTD_FORMAT aModeTimings[MAX_SUPPORTED_CV_STANDARDS];
+} ATOM_COMPONENT_VIDEO_INFO;
+
+/* ucTableFormatRevision=2 */
+/* ucTableContentRevision=1 */
+typedef struct _ATOM_COMPONENT_VIDEO_INFO_V21 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucMiscInfo;
+	UCHAR uc480i;
+	UCHAR uc480p;
+	UCHAR uc720p;
+	UCHAR uc1080i;
+	UCHAR ucReserved;
+	UCHAR ucLetterBoxMode;
+	UCHAR ucNumOfWbGpioBlocks;	/* For Component video D-Connector support. If zere, NTSC type connector */
+	ATOM_GPIO_INFO aWbGpioStateBlock[MAX_SUPPORTED_CV_STANDARDS];
+	ATOM_DTD_FORMAT aModeTimings[MAX_SUPPORTED_CV_STANDARDS];
+} ATOM_COMPONENT_VIDEO_INFO_V21;
+
+#define ATOM_COMPONENT_VIDEO_INFO_LAST  ATOM_COMPONENT_VIDEO_INFO_V21
+
+/****************************************************************************/
+/*  Structure used in object_InfoTable */
+/****************************************************************************/
+typedef struct _ATOM_OBJECT_HEADER {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usDeviceSupport;
+	USHORT usConnectorObjectTableOffset;
+	USHORT usRouterObjectTableOffset;
+	USHORT usEncoderObjectTableOffset;
+	USHORT usProtectionObjectTableOffset;	/* only available when Protection block is independent. */
+	USHORT usDisplayPathTableOffset;
+} ATOM_OBJECT_HEADER;
+
+typedef struct _ATOM_DISPLAY_OBJECT_PATH {
+	USHORT usDeviceTag;	/* supported device */
+	USHORT usSize;		/* the size of ATOM_DISPLAY_OBJECT_PATH */
+	USHORT usConnObjectId;	/* Connector Object ID */
+	USHORT usGPUObjectId;	/* GPU ID */
+	USHORT usGraphicObjIds[1];	/* 1st Encoder Obj source from GPU to last Graphic Obj destinate to connector. */
+} ATOM_DISPLAY_OBJECT_PATH;
+
+typedef struct _ATOM_DISPLAY_OBJECT_PATH_TABLE {
+	UCHAR ucNumOfDispPath;
+	UCHAR ucVersion;
+	UCHAR ucPadding[2];
+	ATOM_DISPLAY_OBJECT_PATH asDispPath[1];
+} ATOM_DISPLAY_OBJECT_PATH_TABLE;
+
+typedef struct _ATOM_OBJECT	/* each object has this structure */
+{
+	USHORT usObjectID;
+	USHORT usSrcDstTableOffset;
+	USHORT usRecordOffset;	/* this pointing to a bunch of records defined below */
+	USHORT usReserved;
+} ATOM_OBJECT;
+
+typedef struct _ATOM_OBJECT_TABLE	/* Above 4 object table offset pointing to a bunch of objects all have this structure */
+{
+	UCHAR ucNumberOfObjects;
+	UCHAR ucPadding[3];
+	ATOM_OBJECT asObjects[1];
+} ATOM_OBJECT_TABLE;
+
+typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT	/* usSrcDstTableOffset pointing to this structure */
+{
+	UCHAR ucNumberOfSrc;
+	USHORT usSrcObjectID[1];
+	UCHAR ucNumberOfDst;
+	USHORT usDstObjectID[1];
+} ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT;
+
+/* Related definitions, all records are differnt but they have a commond header */
+typedef struct _ATOM_COMMON_RECORD_HEADER {
+	UCHAR ucRecordType;	/* An emun to indicate the record type */
+	UCHAR ucRecordSize;	/* The size of the whole record in byte */
+} ATOM_COMMON_RECORD_HEADER;
+
+#define ATOM_I2C_RECORD_TYPE                           1
+#define ATOM_HPD_INT_RECORD_TYPE                       2
+#define ATOM_OUTPUT_PROTECTION_RECORD_TYPE             3
+#define ATOM_CONNECTOR_DEVICE_TAG_RECORD_TYPE          4
+#define	ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD_TYPE	     5	/* Obsolete, switch to use GPIO_CNTL_RECORD_TYPE */
+#define ATOM_ENCODER_FPGA_CONTROL_RECORD_TYPE          6	/* Obsolete, switch to use GPIO_CNTL_RECORD_TYPE */
+#define ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD_TYPE      7
+#define ATOM_JTAG_RECORD_TYPE                          8	/* Obsolete, switch to use GPIO_CNTL_RECORD_TYPE */
+#define ATOM_OBJECT_GPIO_CNTL_RECORD_TYPE              9
+#define ATOM_ENCODER_DVO_CF_RECORD_TYPE               10
+#define ATOM_CONNECTOR_CF_RECORD_TYPE                 11
+#define	ATOM_CONNECTOR_HARDCODE_DTD_RECORD_TYPE	      12
+#define ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD_TYPE  13
+#define ATOM_ROUTER_DDC_PATH_SELECT_RECORD_TYPE				14
+#define ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD_TYPE					15
+
+/* Must be updated when new record type is added,equal to that record definition! */
+#define ATOM_MAX_OBJECT_RECORD_NUMBER             ATOM_CONNECTOR_CF_RECORD_TYPE
+
+typedef struct _ATOM_I2C_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	ATOM_I2C_ID_CONFIG sucI2cId;
+	UCHAR ucI2CAddr;	/* The slave address, it's 0 when the record is attached to connector for DDC */
+} ATOM_I2C_RECORD;
+
+typedef struct _ATOM_HPD_INT_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucHPDIntGPIOID;	/* Corresponding block in GPIO_PIN_INFO table gives the pin info */
+	UCHAR ucPluggged_PinState;
+} ATOM_HPD_INT_RECORD;
+
+typedef struct _ATOM_OUTPUT_PROTECTION_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucProtectionFlag;
+	UCHAR ucReserved;
+} ATOM_OUTPUT_PROTECTION_RECORD;
+
+typedef struct _ATOM_CONNECTOR_DEVICE_TAG {
+	ULONG ulACPIDeviceEnum;	/* Reserved for now */
+	USHORT usDeviceID;	/* This Id is same as "ATOM_DEVICE_XXX_SUPPORT" */
+	USHORT usPadding;
+} ATOM_CONNECTOR_DEVICE_TAG;
+
+typedef struct _ATOM_CONNECTOR_DEVICE_TAG_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucNumberOfDevice;
+	UCHAR ucReserved;
+	ATOM_CONNECTOR_DEVICE_TAG asDeviceTag[1];	/* This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation */
+} ATOM_CONNECTOR_DEVICE_TAG_RECORD;
+
+typedef struct _ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucConfigGPIOID;
+	UCHAR ucConfigGPIOState;	/* Set to 1 when it's active high to enable external flow in */
+	UCHAR ucFlowinGPIPID;
+	UCHAR ucExtInGPIPID;
+} ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD;
+
+typedef struct _ATOM_ENCODER_FPGA_CONTROL_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucCTL1GPIO_ID;
+	UCHAR ucCTL1GPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucCTL2GPIO_ID;
+	UCHAR ucCTL2GPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucCTL3GPIO_ID;
+	UCHAR ucCTL3GPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucCTLFPGA_IN_ID;
+	UCHAR ucPadding[3];
+} ATOM_ENCODER_FPGA_CONTROL_RECORD;
+
+typedef struct _ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucGPIOID;		/* Corresponding block in GPIO_PIN_INFO table gives the pin info */
+	UCHAR ucTVActiveState;	/* Indicating when the pin==0 or 1 when TV is connected */
+} ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD;
+
+typedef struct _ATOM_JTAG_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucTMSGPIO_ID;
+	UCHAR ucTMSGPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucTCKGPIO_ID;
+	UCHAR ucTCKGPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucTDOGPIO_ID;
+	UCHAR ucTDOGPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucTDIGPIO_ID;
+	UCHAR ucTDIGPIOState;	/* Set to 1 when it's active high */
+	UCHAR ucPadding[2];
+} ATOM_JTAG_RECORD;
+
+/* The following generic object gpio pin control record type will replace JTAG_RECORD/FPGA_CONTROL_RECORD/DVI_EXT_INPUT_RECORD above gradually */
+typedef struct _ATOM_GPIO_PIN_CONTROL_PAIR {
+	UCHAR ucGPIOID;		/*  GPIO_ID, find the corresponding ID in GPIO_LUT table */
+	UCHAR ucGPIO_PinState;	/*  Pin state showing how to set-up the pin */
+} ATOM_GPIO_PIN_CONTROL_PAIR;
+
+typedef struct _ATOM_OBJECT_GPIO_CNTL_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucFlags;		/*  Future expnadibility */
+	UCHAR ucNumberOfPins;	/*  Number of GPIO pins used to control the object */
+	ATOM_GPIO_PIN_CONTROL_PAIR asGpio[1];	/*  the real gpio pin pair determined by number of pins ucNumberOfPins */
+} ATOM_OBJECT_GPIO_CNTL_RECORD;
+
+/* Definitions for GPIO pin state */
+#define GPIO_PIN_TYPE_INPUT             0x00
+#define GPIO_PIN_TYPE_OUTPUT            0x10
+#define GPIO_PIN_TYPE_HW_CONTROL        0x20
+
+/* For GPIO_PIN_TYPE_OUTPUT the following is defined */
+#define GPIO_PIN_OUTPUT_STATE_MASK      0x01
+#define GPIO_PIN_OUTPUT_STATE_SHIFT     0
+#define GPIO_PIN_STATE_ACTIVE_LOW       0x0
+#define GPIO_PIN_STATE_ACTIVE_HIGH      0x1
+
+typedef struct _ATOM_ENCODER_DVO_CF_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	ULONG ulStrengthControl;	/*  DVOA strength control for CF */
+	UCHAR ucPadding[2];
+} ATOM_ENCODER_DVO_CF_RECORD;
+
+/*  value for ATOM_CONNECTOR_CF_RECORD.ucConnectedDvoBundle */
+#define ATOM_CONNECTOR_CF_RECORD_CONNECTED_UPPER12BITBUNDLEA   1
+#define ATOM_CONNECTOR_CF_RECORD_CONNECTED_LOWER12BITBUNDLEB   2
+
+typedef struct _ATOM_CONNECTOR_CF_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	USHORT usMaxPixClk;
+	UCHAR ucFlowCntlGpioId;
+	UCHAR ucSwapCntlGpioId;
+	UCHAR ucConnectedDvoBundle;
+	UCHAR ucPadding;
+} ATOM_CONNECTOR_CF_RECORD;
+
+typedef struct _ATOM_CONNECTOR_HARDCODE_DTD_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	ATOM_DTD_FORMAT asTiming;
+} ATOM_CONNECTOR_HARDCODE_DTD_RECORD;
+
+typedef struct _ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;	/* ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD_TYPE */
+	UCHAR ucSubConnectorType;	/* CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D|X_ID_DUAL_LINK_DVI_D|HDMI_TYPE_A */
+	UCHAR ucReserved;
+} ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD;
+
+typedef struct _ATOM_ROUTER_DDC_PATH_SELECT_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucMuxType;	/* decide the number of ucMuxState, =0, no pin state, =1: single state with complement, >1: multiple state */
+	UCHAR ucMuxControlPin;
+	UCHAR ucMuxState[2];	/* for alligment purpose */
+} ATOM_ROUTER_DDC_PATH_SELECT_RECORD;
+
+typedef struct _ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD {
+	ATOM_COMMON_RECORD_HEADER sheader;
+	UCHAR ucMuxType;
+	UCHAR ucMuxControlPin;
+	UCHAR ucMuxState[2];	/* for alligment purpose */
+} ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD;
+
+/*  define ucMuxType */
+#define ATOM_ROUTER_MUX_PIN_STATE_MASK								0x0f
+#define ATOM_ROUTER_MUX_PIN_SINGLE_STATE_COMPLEMENT		0x01
+
+/****************************************************************************/
+/*  ASIC voltage data table */
+/****************************************************************************/
+typedef struct _ATOM_VOLTAGE_INFO_HEADER {
+	USHORT usVDDCBaseLevel;	/* In number of 50mv unit */
+	USHORT usReserved;	/* For possible extension table offset */
+	UCHAR ucNumOfVoltageEntries;
+	UCHAR ucBytesPerVoltageEntry;
+	UCHAR ucVoltageStep;	/* Indicating in how many mv increament is one step, 0.5mv unit */
+	UCHAR ucDefaultVoltageEntry;
+	UCHAR ucVoltageControlI2cLine;
+	UCHAR ucVoltageControlAddress;
+	UCHAR ucVoltageControlOffset;
+} ATOM_VOLTAGE_INFO_HEADER;
+
+typedef struct _ATOM_VOLTAGE_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_VOLTAGE_INFO_HEADER viHeader;
+	UCHAR ucVoltageEntries[64];	/* 64 is for allocation, the actual number of entry is present at ucNumOfVoltageEntries*ucBytesPerVoltageEntry */
+} ATOM_VOLTAGE_INFO;
+
+typedef struct _ATOM_VOLTAGE_FORMULA {
+	USHORT usVoltageBaseLevel;	/*  In number of 1mv unit */
+	USHORT usVoltageStep;	/*  Indicating in how many mv increament is one step, 1mv unit */
+	UCHAR ucNumOfVoltageEntries;	/*  Number of Voltage Entry, which indicate max Voltage */
+	UCHAR ucFlag;		/*  bit0=0 :step is 1mv =1 0.5mv */
+	UCHAR ucBaseVID;	/*  if there is no lookup table, VID= BaseVID + ( Vol - BaseLevle ) /VoltageStep */
+	UCHAR ucReserved;
+	UCHAR ucVIDAdjustEntries[32];	/*  32 is for allocation, the actual number of entry is present at ucNumOfVoltageEntries */
+} ATOM_VOLTAGE_FORMULA;
+
+typedef struct _ATOM_VOLTAGE_CONTROL {
+	UCHAR ucVoltageControlId;	/* Indicate it is controlled by I2C or GPIO or HW state machine */
+	UCHAR ucVoltageControlI2cLine;
+	UCHAR ucVoltageControlAddress;
+	UCHAR ucVoltageControlOffset;
+	USHORT usGpioPin_AIndex;	/* GPIO_PAD register index */
+	UCHAR ucGpioPinBitShift[9];	/* at most 8 pin support 255 VIDs, termintate with 0xff */
+	UCHAR ucReserved;
+} ATOM_VOLTAGE_CONTROL;
+
+/*  Define ucVoltageControlId */
+#define	VOLTAGE_CONTROLLED_BY_HW							0x00
+#define	VOLTAGE_CONTROLLED_BY_I2C_MASK				0x7F
+#define	VOLTAGE_CONTROLLED_BY_GPIO						0x80
+#define	VOLTAGE_CONTROL_ID_LM64								0x01	/* I2C control, used for R5xx Core Voltage */
+#define	VOLTAGE_CONTROL_ID_DAC								0x02	/* I2C control, used for R5xx/R6xx MVDDC,MVDDQ or VDDCI */
+#define	VOLTAGE_CONTROL_ID_VT116xM						0x03	/* I2C control, used for R6xx Core Voltage */
+#define VOLTAGE_CONTROL_ID_DS4402							0x04
+
+typedef struct _ATOM_VOLTAGE_OBJECT {
+	UCHAR ucVoltageType;	/* Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI */
+	UCHAR ucSize;		/* Size of Object */
+	ATOM_VOLTAGE_CONTROL asControl;	/* describ how to control */
+	ATOM_VOLTAGE_FORMULA asFormula;	/* Indicate How to convert real Voltage to VID */
+} ATOM_VOLTAGE_OBJECT;
+
+typedef struct _ATOM_VOLTAGE_OBJECT_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_VOLTAGE_OBJECT asVoltageObj[3];	/* Info for Voltage control */
+} ATOM_VOLTAGE_OBJECT_INFO;
+
+typedef struct _ATOM_LEAKID_VOLTAGE {
+	UCHAR ucLeakageId;
+	UCHAR ucReserved;
+	USHORT usVoltage;
+} ATOM_LEAKID_VOLTAGE;
+
+typedef struct _ATOM_ASIC_PROFILE_VOLTAGE {
+	UCHAR ucProfileId;
+	UCHAR ucReserved;
+	USHORT usSize;
+	USHORT usEfuseSpareStartAddr;
+	USHORT usFuseIndex[8];	/* from LSB to MSB, Max 8bit,end of 0xffff if less than 8 efuse id, */
+	ATOM_LEAKID_VOLTAGE asLeakVol[2];	/* Leakid and relatd voltage */
+} ATOM_ASIC_PROFILE_VOLTAGE;
+
+/* ucProfileId */
+#define	ATOM_ASIC_PROFILE_ID_EFUSE_VOLTAGE			1
+#define	ATOM_ASIC_PROFILE_ID_EFUSE_PERFORMANCE_VOLTAGE			1
+#define	ATOM_ASIC_PROFILE_ID_EFUSE_THERMAL_VOLTAGE					2
+
+typedef struct _ATOM_ASIC_PROFILING_INFO {
+	ATOM_COMMON_TABLE_HEADER asHeader;
+	ATOM_ASIC_PROFILE_VOLTAGE asVoltage;
+} ATOM_ASIC_PROFILING_INFO;
+
+typedef struct _ATOM_POWER_SOURCE_OBJECT {
+	UCHAR ucPwrSrcId;	/*  Power source */
+	UCHAR ucPwrSensorType;	/*  GPIO, I2C or none */
+	UCHAR ucPwrSensId;	/*  if GPIO detect, it is GPIO id,  if I2C detect, it is I2C id */
+	UCHAR ucPwrSensSlaveAddr;	/*  Slave address if I2C detect */
+	UCHAR ucPwrSensRegIndex;	/*  I2C register Index if I2C detect */
+	UCHAR ucPwrSensRegBitMask;	/*  detect which bit is used if I2C detect */
+	UCHAR ucPwrSensActiveState;	/*  high active or low active */
+	UCHAR ucReserve[3];	/*  reserve */
+	USHORT usSensPwr;	/*  in unit of watt */
+} ATOM_POWER_SOURCE_OBJECT;
+
+typedef struct _ATOM_POWER_SOURCE_INFO {
+	ATOM_COMMON_TABLE_HEADER asHeader;
+	UCHAR asPwrbehave[16];
+	ATOM_POWER_SOURCE_OBJECT asPwrObj[1];
+} ATOM_POWER_SOURCE_INFO;
+
+/* Define ucPwrSrcId */
+#define POWERSOURCE_PCIE_ID1						0x00
+#define POWERSOURCE_6PIN_CONNECTOR_ID1	0x01
+#define POWERSOURCE_8PIN_CONNECTOR_ID1	0x02
+#define POWERSOURCE_6PIN_CONNECTOR_ID2	0x04
+#define POWERSOURCE_8PIN_CONNECTOR_ID2	0x08
+
+/* define ucPwrSensorId */
+#define POWER_SENSOR_ALWAYS							0x00
+#define POWER_SENSOR_GPIO								0x01
+#define POWER_SENSOR_I2C								0x02
+
+/**************************************************************************/
+/*  This portion is only used when ext thermal chip or engine/memory clock SS chip is populated on a design */
+/* Memory SS Info Table */
+/* Define Memory Clock SS chip ID */
+#define ICS91719  1
+#define ICS91720  2
+
+/* Define one structure to inform SW a "block of data" writing to external SS chip via I2C protocol */
+typedef struct _ATOM_I2C_DATA_RECORD {
+	UCHAR ucNunberOfBytes;	/* Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop" */
+	UCHAR ucI2CData[1];	/* I2C data in bytes, should be less than 16 bytes usually */
+} ATOM_I2C_DATA_RECORD;
+
+/* Define one structure to inform SW how many blocks of data writing to external SS chip via I2C protocol, in addition to other information */
+typedef struct _ATOM_I2C_DEVICE_SETUP_INFO {
+	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;	/* I2C line and HW/SW assisted cap. */
+	UCHAR ucSSChipID;	/* SS chip being used */
+	UCHAR ucSSChipSlaveAddr;	/* Slave Address to set up this SS chip */
+	UCHAR ucNumOfI2CDataRecords;	/* number of data block */
+	ATOM_I2C_DATA_RECORD asI2CData[1];
+} ATOM_I2C_DEVICE_SETUP_INFO;
+
+/* ========================================================================================== */
+typedef struct _ATOM_ASIC_MVDD_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_I2C_DEVICE_SETUP_INFO asI2CSetup[1];
+} ATOM_ASIC_MVDD_INFO;
+
+/* ========================================================================================== */
+#define ATOM_MCLK_SS_INFO         ATOM_ASIC_MVDD_INFO
+
+/* ========================================================================================== */
+/**************************************************************************/
+
+typedef struct _ATOM_ASIC_SS_ASSIGNMENT {
+	ULONG ulTargetClockRange;	/* Clock Out frequence (VCO ), in unit of 10Khz */
+	USHORT usSpreadSpectrumPercentage;	/* in unit of 0.01% */
+	USHORT usSpreadRateInKhz;	/* in unit of kHz, modulation freq */
+	UCHAR ucClockIndication;	/* Indicate which clock source needs SS */
+	UCHAR ucSpreadSpectrumMode;	/* Bit1=0 Down Spread,=1 Center Spread. */
+	UCHAR ucReserved[2];
+} ATOM_ASIC_SS_ASSIGNMENT;
+
+/* Define ucSpreadSpectrumType */
+#define ASIC_INTERNAL_MEMORY_SS			1
+#define ASIC_INTERNAL_ENGINE_SS			2
+#define ASIC_INTERNAL_UVD_SS				3
+
+typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_ASIC_SS_ASSIGNMENT asSpreadSpectrum[4];
+} ATOM_ASIC_INTERNAL_SS_INFO;
+
+/* ==============================Scratch Pad Definition Portion=============================== */
+#define ATOM_DEVICE_CONNECT_INFO_DEF  0
+#define ATOM_ROM_LOCATION_DEF         1
+#define ATOM_TV_STANDARD_DEF          2
+#define ATOM_ACTIVE_INFO_DEF          3
+#define ATOM_LCD_INFO_DEF             4
+#define ATOM_DOS_REQ_INFO_DEF         5
+#define ATOM_ACC_CHANGE_INFO_DEF      6
+#define ATOM_DOS_MODE_INFO_DEF        7
+#define ATOM_I2C_CHANNEL_STATUS_DEF   8
+#define ATOM_I2C_CHANNEL_STATUS1_DEF  9
+
+/*  BIOS_0_SCRATCH Definition */
+#define ATOM_S0_CRT1_MONO               0x00000001L
+#define ATOM_S0_CRT1_COLOR              0x00000002L
+#define ATOM_S0_CRT1_MASK               (ATOM_S0_CRT1_MONO+ATOM_S0_CRT1_COLOR)
+
+#define ATOM_S0_TV1_COMPOSITE_A         0x00000004L
+#define ATOM_S0_TV1_SVIDEO_A            0x00000008L
+#define ATOM_S0_TV1_MASK_A              (ATOM_S0_TV1_COMPOSITE_A+ATOM_S0_TV1_SVIDEO_A)
+
+#define ATOM_S0_CV_A                    0x00000010L
+#define ATOM_S0_CV_DIN_A                0x00000020L
+#define ATOM_S0_CV_MASK_A               (ATOM_S0_CV_A+ATOM_S0_CV_DIN_A)
+
+#define ATOM_S0_CRT2_MONO               0x00000100L
+#define ATOM_S0_CRT2_COLOR              0x00000200L
+#define ATOM_S0_CRT2_MASK               (ATOM_S0_CRT2_MONO+ATOM_S0_CRT2_COLOR)
+
+#define ATOM_S0_TV1_COMPOSITE           0x00000400L
+#define ATOM_S0_TV1_SVIDEO              0x00000800L
+#define ATOM_S0_TV1_SCART               0x00004000L
+#define ATOM_S0_TV1_MASK                (ATOM_S0_TV1_COMPOSITE+ATOM_S0_TV1_SVIDEO+ATOM_S0_TV1_SCART)
+
+#define ATOM_S0_CV                      0x00001000L
+#define ATOM_S0_CV_DIN                  0x00002000L
+#define ATOM_S0_CV_MASK                 (ATOM_S0_CV+ATOM_S0_CV_DIN)
+
+#define ATOM_S0_DFP1                    0x00010000L
+#define ATOM_S0_DFP2                    0x00020000L
+#define ATOM_S0_LCD1                    0x00040000L
+#define ATOM_S0_LCD2                    0x00080000L
+#define ATOM_S0_TV2                     0x00100000L
+#define ATOM_S0_DFP3			0x00200000L
+#define ATOM_S0_DFP4			0x00400000L
+#define ATOM_S0_DFP5			0x00800000L
+
+#define ATOM_S0_DFP_MASK \
+	(ATOM_S0_DFP1 | ATOM_S0_DFP2 | ATOM_S0_DFP3 | ATOM_S0_DFP4 | ATOM_S0_DFP5)
+
+#define ATOM_S0_FAD_REGISTER_BUG        0x02000000L	/*  If set, indicates we are running a PCIE asic with */
+						    /*  the FAD/HDP reg access bug.  Bit is read by DAL */
+
+#define ATOM_S0_THERMAL_STATE_MASK      0x1C000000L
+#define ATOM_S0_THERMAL_STATE_SHIFT     26
+
+#define ATOM_S0_SYSTEM_POWER_STATE_MASK 0xE0000000L
+#define ATOM_S0_SYSTEM_POWER_STATE_SHIFT 29
+
+#define ATOM_S0_SYSTEM_POWER_STATE_VALUE_AC     1
+#define ATOM_S0_SYSTEM_POWER_STATE_VALUE_DC     2
+#define ATOM_S0_SYSTEM_POWER_STATE_VALUE_LITEAC 3
+
+/* Byte aligned defintion for BIOS usage */
+#define ATOM_S0_CRT1_MONOb0             0x01
+#define ATOM_S0_CRT1_COLORb0            0x02
+#define ATOM_S0_CRT1_MASKb0             (ATOM_S0_CRT1_MONOb0+ATOM_S0_CRT1_COLORb0)
+
+#define ATOM_S0_TV1_COMPOSITEb0         0x04
+#define ATOM_S0_TV1_SVIDEOb0            0x08
+#define ATOM_S0_TV1_MASKb0              (ATOM_S0_TV1_COMPOSITEb0+ATOM_S0_TV1_SVIDEOb0)
+
+#define ATOM_S0_CVb0                    0x10
+#define ATOM_S0_CV_DINb0                0x20
+#define ATOM_S0_CV_MASKb0               (ATOM_S0_CVb0+ATOM_S0_CV_DINb0)
+
+#define ATOM_S0_CRT2_MONOb1             0x01
+#define ATOM_S0_CRT2_COLORb1            0x02
+#define ATOM_S0_CRT2_MASKb1             (ATOM_S0_CRT2_MONOb1+ATOM_S0_CRT2_COLORb1)
+
+#define ATOM_S0_TV1_COMPOSITEb1         0x04
+#define ATOM_S0_TV1_SVIDEOb1            0x08
+#define ATOM_S0_TV1_SCARTb1             0x40
+#define ATOM_S0_TV1_MASKb1              (ATOM_S0_TV1_COMPOSITEb1+ATOM_S0_TV1_SVIDEOb1+ATOM_S0_TV1_SCARTb1)
+
+#define ATOM_S0_CVb1                    0x10
+#define ATOM_S0_CV_DINb1                0x20
+#define ATOM_S0_CV_MASKb1               (ATOM_S0_CVb1+ATOM_S0_CV_DINb1)
+
+#define ATOM_S0_DFP1b2                  0x01
+#define ATOM_S0_DFP2b2                  0x02
+#define ATOM_S0_LCD1b2                  0x04
+#define ATOM_S0_LCD2b2                  0x08
+#define ATOM_S0_TV2b2                   0x10
+#define ATOM_S0_DFP3b2									0x20
+
+#define ATOM_S0_THERMAL_STATE_MASKb3    0x1C
+#define ATOM_S0_THERMAL_STATE_SHIFTb3   2
+
+#define ATOM_S0_SYSTEM_POWER_STATE_MASKb3 0xE0
+#define ATOM_S0_LCD1_SHIFT              18
+
+/*  BIOS_1_SCRATCH Definition */
+#define ATOM_S1_ROM_LOCATION_MASK       0x0000FFFFL
+#define ATOM_S1_PCI_BUS_DEV_MASK        0xFFFF0000L
+
+/*       BIOS_2_SCRATCH Definition */
+#define ATOM_S2_TV1_STANDARD_MASK       0x0000000FL
+#define ATOM_S2_CURRENT_BL_LEVEL_MASK   0x0000FF00L
+#define ATOM_S2_CURRENT_BL_LEVEL_SHIFT  8
+
+#define ATOM_S2_CRT1_DPMS_STATE         0x00010000L
+#define ATOM_S2_LCD1_DPMS_STATE	        0x00020000L
+#define ATOM_S2_TV1_DPMS_STATE          0x00040000L
+#define ATOM_S2_DFP1_DPMS_STATE         0x00080000L
+#define ATOM_S2_CRT2_DPMS_STATE         0x00100000L
+#define ATOM_S2_LCD2_DPMS_STATE         0x00200000L
+#define ATOM_S2_TV2_DPMS_STATE          0x00400000L
+#define ATOM_S2_DFP2_DPMS_STATE         0x00800000L
+#define ATOM_S2_CV_DPMS_STATE           0x01000000L
+#define ATOM_S2_DFP3_DPMS_STATE					0x02000000L
+#define ATOM_S2_DFP4_DPMS_STATE					0x04000000L
+#define ATOM_S2_DFP5_DPMS_STATE					0x08000000L
+
+#define ATOM_S2_DFP_DPM_STATE \
+	(ATOM_S2_DFP1_DPMS_STATE | ATOM_S2_DFP2_DPMS_STATE | \
+	 ATOM_S2_DFP3_DPMS_STATE | ATOM_S2_DFP4_DPMS_STATE | \
+	 ATOM_S2_DFP5_DPMS_STATE)
+
+#define ATOM_S2_DEVICE_DPMS_STATE \
+	(ATOM_S2_CRT1_DPMS_STATE + ATOM_S2_LCD1_DPMS_STATE + \
+	 ATOM_S2_TV1_DPMS_STATE + ATOM_S2_DFP_DPMS_STATE + \
+	 ATOM_S2_CRT2_DPMS_STATE + ATOM_S2_LCD2_DPMS_STATE + \
+	 ATOM_S2_TV2_DPMS_STATE + ATOM_S2_CV_DPMS_STATE)
+
+#define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASK       0x0C000000L
+#define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASK_SHIFT 26
+#define ATOM_S2_FORCEDLOWPWRMODE_STATE_CHANGE     0x10000000L
+
+#define ATOM_S2_VRI_BRIGHT_ENABLE       0x20000000L
+
+#define ATOM_S2_DISPLAY_ROTATION_0_DEGREE     0x0
+#define ATOM_S2_DISPLAY_ROTATION_90_DEGREE    0x1
+#define ATOM_S2_DISPLAY_ROTATION_180_DEGREE   0x2
+#define ATOM_S2_DISPLAY_ROTATION_270_DEGREE   0x3
+#define ATOM_S2_DISPLAY_ROTATION_DEGREE_SHIFT 30
+#define ATOM_S2_DISPLAY_ROTATION_ANGLE_MASK   0xC0000000L
+
+/* Byte aligned defintion for BIOS usage */
+#define ATOM_S2_TV1_STANDARD_MASKb0     0x0F
+#define ATOM_S2_CURRENT_BL_LEVEL_MASKb1 0xFF
+#define ATOM_S2_CRT1_DPMS_STATEb2       0x01
+#define ATOM_S2_LCD1_DPMS_STATEb2       0x02
+#define ATOM_S2_TV1_DPMS_STATEb2        0x04
+#define ATOM_S2_DFP1_DPMS_STATEb2       0x08
+#define ATOM_S2_CRT2_DPMS_STATEb2       0x10
+#define ATOM_S2_LCD2_DPMS_STATEb2       0x20
+#define ATOM_S2_TV2_DPMS_STATEb2        0x40
+#define ATOM_S2_DFP2_DPMS_STATEb2       0x80
+#define ATOM_S2_CV_DPMS_STATEb3         0x01
+#define ATOM_S2_DFP3_DPMS_STATEb3				0x02
+#define ATOM_S2_DFP4_DPMS_STATEb3				0x04
+#define ATOM_S2_DFP5_DPMS_STATEb3				0x08
+
+#define ATOM_S2_DEVICE_DPMS_MASKw1      0x3FF
+#define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASKb3     0x0C
+#define ATOM_S2_FORCEDLOWPWRMODE_STATE_CHANGEb3   0x10
+#define ATOM_S2_VRI_BRIGHT_ENABLEb3     0x20
+#define ATOM_S2_ROTATION_STATE_MASKb3   0xC0
+
+/*  BIOS_3_SCRATCH Definition */
+#define ATOM_S3_CRT1_ACTIVE             0x00000001L
+#define ATOM_S3_LCD1_ACTIVE             0x00000002L
+#define ATOM_S3_TV1_ACTIVE              0x00000004L
+#define ATOM_S3_DFP1_ACTIVE             0x00000008L
+#define ATOM_S3_CRT2_ACTIVE             0x00000010L
+#define ATOM_S3_LCD2_ACTIVE             0x00000020L
+#define ATOM_S3_TV2_ACTIVE              0x00000040L
+#define ATOM_S3_DFP2_ACTIVE             0x00000080L
+#define ATOM_S3_CV_ACTIVE               0x00000100L
+#define ATOM_S3_DFP3_ACTIVE							0x00000200L
+#define ATOM_S3_DFP4_ACTIVE							0x00000400L
+#define ATOM_S3_DFP5_ACTIVE							0x00000800L
+
+#define ATOM_S3_DEVICE_ACTIVE_MASK      0x000003FFL
+
+#define ATOM_S3_LCD_FULLEXPANSION_ACTIVE         0x00001000L
+#define ATOM_S3_LCD_EXPANSION_ASPEC_RATIO_ACTIVE 0x00002000L
+
+#define ATOM_S3_CRT1_CRTC_ACTIVE        0x00010000L
+#define ATOM_S3_LCD1_CRTC_ACTIVE        0x00020000L
+#define ATOM_S3_TV1_CRTC_ACTIVE         0x00040000L
+#define ATOM_S3_DFP1_CRTC_ACTIVE        0x00080000L
+#define ATOM_S3_CRT2_CRTC_ACTIVE        0x00100000L
+#define ATOM_S3_LCD2_CRTC_ACTIVE        0x00200000L
+#define ATOM_S3_TV2_CRTC_ACTIVE         0x00400000L
+#define ATOM_S3_DFP2_CRTC_ACTIVE        0x00800000L
+#define ATOM_S3_CV_CRTC_ACTIVE          0x01000000L
+#define ATOM_S3_DFP3_CRTC_ACTIVE				0x02000000L
+#define ATOM_S3_DFP4_CRTC_ACTIVE				0x04000000L
+#define ATOM_S3_DFP5_CRTC_ACTIVE				0x08000000L
+
+#define ATOM_S3_DEVICE_CRTC_ACTIVE_MASK 0x0FFF0000L
+#define ATOM_S3_ASIC_GUI_ENGINE_HUNG    0x20000000L
+#define ATOM_S3_ALLOW_FAST_PWR_SWITCH   0x40000000L
+#define ATOM_S3_RQST_GPU_USE_MIN_PWR    0x80000000L
+
+/* Byte aligned defintion for BIOS usage */
+#define ATOM_S3_CRT1_ACTIVEb0           0x01
+#define ATOM_S3_LCD1_ACTIVEb0           0x02
+#define ATOM_S3_TV1_ACTIVEb0            0x04
+#define ATOM_S3_DFP1_ACTIVEb0           0x08
+#define ATOM_S3_CRT2_ACTIVEb0           0x10
+#define ATOM_S3_LCD2_ACTIVEb0           0x20
+#define ATOM_S3_TV2_ACTIVEb0            0x40
+#define ATOM_S3_DFP2_ACTIVEb0           0x80
+#define ATOM_S3_CV_ACTIVEb1             0x01
+#define ATOM_S3_DFP3_ACTIVEb1						0x02
+#define ATOM_S3_DFP4_ACTIVEb1						0x04
+#define ATOM_S3_DFP5_ACTIVEb1						0x08
+
+#define ATOM_S3_ACTIVE_CRTC1w0          0xFFF
+
+#define ATOM_S3_CRT1_CRTC_ACTIVEb2      0x01
+#define ATOM_S3_LCD1_CRTC_ACTIVEb2      0x02
+#define ATOM_S3_TV1_CRTC_ACTIVEb2       0x04
+#define ATOM_S3_DFP1_CRTC_ACTIVEb2      0x08
+#define ATOM_S3_CRT2_CRTC_ACTIVEb2      0x10
+#define ATOM_S3_LCD2_CRTC_ACTIVEb2      0x20
+#define ATOM_S3_TV2_CRTC_ACTIVEb2       0x40
+#define ATOM_S3_DFP2_CRTC_ACTIVEb2      0x80
+#define ATOM_S3_CV_CRTC_ACTIVEb3        0x01
+#define ATOM_S3_DFP3_CRTC_ACTIVEb3			0x02
+#define ATOM_S3_DFP4_CRTC_ACTIVEb3			0x04
+#define ATOM_S3_DFP5_CRTC_ACTIVEb3			0x08
+
+#define ATOM_S3_ACTIVE_CRTC2w1          0xFFF
+
+#define ATOM_S3_ASIC_GUI_ENGINE_HUNGb3	0x20
+#define ATOM_S3_ALLOW_FAST_PWR_SWITCHb3 0x40
+#define ATOM_S3_RQST_GPU_USE_MIN_PWRb3  0x80
+
+/*  BIOS_4_SCRATCH Definition */
+#define ATOM_S4_LCD1_PANEL_ID_MASK      0x000000FFL
+#define ATOM_S4_LCD1_REFRESH_MASK       0x0000FF00L
+#define ATOM_S4_LCD1_REFRESH_SHIFT      8
+
+/* Byte aligned defintion for BIOS usage */
+#define ATOM_S4_LCD1_PANEL_ID_MASKb0	  0x0FF
+#define ATOM_S4_LCD1_REFRESH_MASKb1		  ATOM_S4_LCD1_PANEL_ID_MASKb0
+#define ATOM_S4_VRAM_INFO_MASKb2        ATOM_S4_LCD1_PANEL_ID_MASKb0
+
+/*  BIOS_5_SCRATCH Definition, BIOS_5_SCRATCH is used by Firmware only !!!! */
+#define ATOM_S5_DOS_REQ_CRT1b0          0x01
+#define ATOM_S5_DOS_REQ_LCD1b0          0x02
+#define ATOM_S5_DOS_REQ_TV1b0           0x04
+#define ATOM_S5_DOS_REQ_DFP1b0          0x08
+#define ATOM_S5_DOS_REQ_CRT2b0          0x10
+#define ATOM_S5_DOS_REQ_LCD2b0          0x20
+#define ATOM_S5_DOS_REQ_TV2b0           0x40
+#define ATOM_S5_DOS_REQ_DFP2b0          0x80
+#define ATOM_S5_DOS_REQ_CVb1            0x01
+#define ATOM_S5_DOS_REQ_DFP3b1					0x02
+#define ATOM_S5_DOS_REQ_DFP4b1					0x04
+#define ATOM_S5_DOS_REQ_DFP5b1					0x08
+
+#define ATOM_S5_DOS_REQ_DEVICEw0        0x03FF
+
+#define ATOM_S5_DOS_REQ_CRT1            0x0001
+#define ATOM_S5_DOS_REQ_LCD1            0x0002
+#define ATOM_S5_DOS_REQ_TV1             0x0004
+#define ATOM_S5_DOS_REQ_DFP1            0x0008
+#define ATOM_S5_DOS_REQ_CRT2            0x0010
+#define ATOM_S5_DOS_REQ_LCD2            0x0020
+#define ATOM_S5_DOS_REQ_TV2             0x0040
+#define ATOM_S5_DOS_REQ_DFP2            0x0080
+#define ATOM_S5_DOS_REQ_CV              0x0100
+#define ATOM_S5_DOS_REQ_DFP3						0x0200
+#define ATOM_S5_DOS_REQ_DFP4						0x0400
+#define ATOM_S5_DOS_REQ_DFP5						0x0800
+
+#define ATOM_S5_DOS_FORCE_CRT1b2        ATOM_S5_DOS_REQ_CRT1b0
+#define ATOM_S5_DOS_FORCE_TV1b2         ATOM_S5_DOS_REQ_TV1b0
+#define ATOM_S5_DOS_FORCE_CRT2b2        ATOM_S5_DOS_REQ_CRT2b0
+#define ATOM_S5_DOS_FORCE_CVb3          ATOM_S5_DOS_REQ_CVb1
+#define ATOM_S5_DOS_FORCE_DEVICEw1 \
+	(ATOM_S5_DOS_FORCE_CRT1b2 + ATOM_S5_DOS_FORCE_TV1b2 + \
+	 ATOM_S5_DOS_FORCE_CRT2b2 + (ATOM_S5_DOS_FORCE_CVb3 << 8))
+
+/*  BIOS_6_SCRATCH Definition */
+#define ATOM_S6_DEVICE_CHANGE           0x00000001L
+#define ATOM_S6_SCALER_CHANGE           0x00000002L
+#define ATOM_S6_LID_CHANGE              0x00000004L
+#define ATOM_S6_DOCKING_CHANGE          0x00000008L
+#define ATOM_S6_ACC_MODE                0x00000010L
+#define ATOM_S6_EXT_DESKTOP_MODE        0x00000020L
+#define ATOM_S6_LID_STATE               0x00000040L
+#define ATOM_S6_DOCK_STATE              0x00000080L
+#define ATOM_S6_CRITICAL_STATE          0x00000100L
+#define ATOM_S6_HW_I2C_BUSY_STATE       0x00000200L
+#define ATOM_S6_THERMAL_STATE_CHANGE    0x00000400L
+#define ATOM_S6_INTERRUPT_SET_BY_BIOS   0x00000800L
+#define ATOM_S6_REQ_LCD_EXPANSION_FULL         0x00001000L	/* Normal expansion Request bit for LCD */
+#define ATOM_S6_REQ_LCD_EXPANSION_ASPEC_RATIO  0x00002000L	/* Aspect ratio expansion Request bit for LCD */
+
+#define ATOM_S6_DISPLAY_STATE_CHANGE    0x00004000L	/* This bit is recycled when ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE is set,previously it's SCL2_H_expansion */
+#define ATOM_S6_I2C_STATE_CHANGE        0x00008000L	/* This bit is recycled,when ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE is set,previously it's SCL2_V_expansion */
+
+#define ATOM_S6_ACC_REQ_CRT1            0x00010000L
+#define ATOM_S6_ACC_REQ_LCD1            0x00020000L
+#define ATOM_S6_ACC_REQ_TV1             0x00040000L
+#define ATOM_S6_ACC_REQ_DFP1            0x00080000L
+#define ATOM_S6_ACC_REQ_CRT2            0x00100000L
+#define ATOM_S6_ACC_REQ_LCD2            0x00200000L
+#define ATOM_S6_ACC_REQ_TV2             0x00400000L
+#define ATOM_S6_ACC_REQ_DFP2            0x00800000L
+#define ATOM_S6_ACC_REQ_CV              0x01000000L
+#define ATOM_S6_ACC_REQ_DFP3						0x02000000L
+#define ATOM_S6_ACC_REQ_DFP4						0x04000000L
+#define ATOM_S6_ACC_REQ_DFP5						0x08000000L
+
+#define ATOM_S6_ACC_REQ_MASK                0x0FFF0000L
+#define ATOM_S6_SYSTEM_POWER_MODE_CHANGE    0x10000000L
+#define ATOM_S6_ACC_BLOCK_DISPLAY_SWITCH    0x20000000L
+#define ATOM_S6_VRI_BRIGHTNESS_CHANGE       0x40000000L
+#define ATOM_S6_CONFIG_DISPLAY_CHANGE_MASK  0x80000000L
+
+/* Byte aligned defintion for BIOS usage */
+#define ATOM_S6_DEVICE_CHANGEb0         0x01
+#define ATOM_S6_SCALER_CHANGEb0         0x02
+#define ATOM_S6_LID_CHANGEb0            0x04
+#define ATOM_S6_DOCKING_CHANGEb0        0x08
+#define ATOM_S6_ACC_MODEb0              0x10
+#define ATOM_S6_EXT_DESKTOP_MODEb0      0x20
+#define ATOM_S6_LID_STATEb0             0x40
+#define ATOM_S6_DOCK_STATEb0            0x80
+#define ATOM_S6_CRITICAL_STATEb1        0x01
+#define ATOM_S6_HW_I2C_BUSY_STATEb1     0x02
+#define ATOM_S6_THERMAL_STATE_CHANGEb1  0x04
+#define ATOM_S6_INTERRUPT_SET_BY_BIOSb1 0x08
+#define ATOM_S6_REQ_LCD_EXPANSION_FULLb1        0x10
+#define ATOM_S6_REQ_LCD_EXPANSION_ASPEC_RATIOb1 0x20
+
+#define ATOM_S6_ACC_REQ_CRT1b2          0x01
+#define ATOM_S6_ACC_REQ_LCD1b2          0x02
+#define ATOM_S6_ACC_REQ_TV1b2           0x04
+#define ATOM_S6_ACC_REQ_DFP1b2          0x08
+#define ATOM_S6_ACC_REQ_CRT2b2          0x10
+#define ATOM_S6_ACC_REQ_LCD2b2          0x20
+#define ATOM_S6_ACC_REQ_TV2b2           0x40
+#define ATOM_S6_ACC_REQ_DFP2b2          0x80
+#define ATOM_S6_ACC_REQ_CVb3            0x01
+#define ATOM_S6_ACC_REQ_DFP3b3					0x02
+#define ATOM_S6_ACC_REQ_DFP4b3					0x04
+#define ATOM_S6_ACC_REQ_DFP5b3					0x08
+
+#define ATOM_S6_ACC_REQ_DEVICEw1        ATOM_S5_DOS_REQ_DEVICEw0
+#define ATOM_S6_SYSTEM_POWER_MODE_CHANGEb3 0x10
+#define ATOM_S6_ACC_BLOCK_DISPLAY_SWITCHb3 0x20
+#define ATOM_S6_VRI_BRIGHTNESS_CHANGEb3    0x40
+#define ATOM_S6_CONFIG_DISPLAY_CHANGEb3    0x80
+
+#define ATOM_S6_DEVICE_CHANGE_SHIFT             0
+#define ATOM_S6_SCALER_CHANGE_SHIFT             1
+#define ATOM_S6_LID_CHANGE_SHIFT                2
+#define ATOM_S6_DOCKING_CHANGE_SHIFT            3
+#define ATOM_S6_ACC_MODE_SHIFT                  4
+#define ATOM_S6_EXT_DESKTOP_MODE_SHIFT          5
+#define ATOM_S6_LID_STATE_SHIFT                 6
+#define ATOM_S6_DOCK_STATE_SHIFT                7
+#define ATOM_S6_CRITICAL_STATE_SHIFT            8
+#define ATOM_S6_HW_I2C_BUSY_STATE_SHIFT         9
+#define ATOM_S6_THERMAL_STATE_CHANGE_SHIFT      10
+#define ATOM_S6_INTERRUPT_SET_BY_BIOS_SHIFT     11
+#define ATOM_S6_REQ_SCALER_SHIFT                12
+#define ATOM_S6_REQ_SCALER_ARATIO_SHIFT         13
+#define ATOM_S6_DISPLAY_STATE_CHANGE_SHIFT      14
+#define ATOM_S6_I2C_STATE_CHANGE_SHIFT          15
+#define ATOM_S6_SYSTEM_POWER_MODE_CHANGE_SHIFT  28
+#define ATOM_S6_ACC_BLOCK_DISPLAY_SWITCH_SHIFT  29
+#define ATOM_S6_VRI_BRIGHTNESS_CHANGE_SHIFT     30
+#define ATOM_S6_CONFIG_DISPLAY_CHANGE_SHIFT     31
+
+/*  BIOS_7_SCRATCH Definition, BIOS_7_SCRATCH is used by Firmware only !!!! */
+#define ATOM_S7_DOS_MODE_TYPEb0             0x03
+#define ATOM_S7_DOS_MODE_VGAb0              0x00
+#define ATOM_S7_DOS_MODE_VESAb0             0x01
+#define ATOM_S7_DOS_MODE_EXTb0              0x02
+#define ATOM_S7_DOS_MODE_PIXEL_DEPTHb0      0x0C
+#define ATOM_S7_DOS_MODE_PIXEL_FORMATb0     0xF0
+#define ATOM_S7_DOS_8BIT_DAC_ENb1           0x01
+#define ATOM_S7_DOS_MODE_NUMBERw1           0x0FFFF
+
+#define ATOM_S7_DOS_8BIT_DAC_EN_SHIFT       8
+
+/*  BIOS_8_SCRATCH Definition */
+#define ATOM_S8_I2C_CHANNEL_BUSY_MASK       0x00000FFFF
+#define ATOM_S8_I2C_HW_ENGINE_BUSY_MASK     0x0FFFF0000
+
+#define ATOM_S8_I2C_CHANNEL_BUSY_SHIFT      0
+#define ATOM_S8_I2C_ENGINE_BUSY_SHIFT       16
+
+/*  BIOS_9_SCRATCH Definition */
+#ifndef ATOM_S9_I2C_CHANNEL_COMPLETED_MASK
+#define ATOM_S9_I2C_CHANNEL_COMPLETED_MASK  0x0000FFFF
+#endif
+#ifndef ATOM_S9_I2C_CHANNEL_ABORTED_MASK
+#define ATOM_S9_I2C_CHANNEL_ABORTED_MASK    0xFFFF0000
+#endif
+#ifndef ATOM_S9_I2C_CHANNEL_COMPLETED_SHIFT
+#define ATOM_S9_I2C_CHANNEL_COMPLETED_SHIFT 0
+#endif
+#ifndef ATOM_S9_I2C_CHANNEL_ABORTED_SHIFT
+#define ATOM_S9_I2C_CHANNEL_ABORTED_SHIFT   16
+#endif
+
+#define ATOM_FLAG_SET                         0x20
+#define ATOM_FLAG_CLEAR                       0
+#define CLEAR_ATOM_S6_ACC_MODE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_ACC_MODE_SHIFT | ATOM_FLAG_CLEAR)
+#define SET_ATOM_S6_DEVICE_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_DEVICE_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_VRI_BRIGHTNESS_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_VRI_BRIGHTNESS_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_SCALER_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_SCALER_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_LID_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_LID_CHANGE_SHIFT | ATOM_FLAG_SET)
+
+#define SET_ATOM_S6_LID_STATE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) |\
+	 ATOM_S6_LID_STATE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_LID_STATE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_LID_STATE_SHIFT | ATOM_FLAG_CLEAR)
+
+#define SET_ATOM_S6_DOCK_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8)| \
+	 ATOM_S6_DOCKING_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_DOCK_STATE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_DOCK_STATE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_DOCK_STATE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_DOCK_STATE_SHIFT | ATOM_FLAG_CLEAR)
+
+#define SET_ATOM_S6_THERMAL_STATE_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_THERMAL_STATE_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_SYSTEM_POWER_MODE_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_SYSTEM_POWER_MODE_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_INTERRUPT_SET_BY_BIOS \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_INTERRUPT_SET_BY_BIOS_SHIFT | ATOM_FLAG_SET)
+
+#define SET_ATOM_S6_CRITICAL_STATE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_CRITICAL_STATE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_CRITICAL_STATE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_CRITICAL_STATE_SHIFT | ATOM_FLAG_CLEAR)
+
+#define SET_ATOM_S6_REQ_SCALER \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_REQ_SCALER_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_REQ_SCALER \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_REQ_SCALER_SHIFT | ATOM_FLAG_CLEAR )
+
+#define SET_ATOM_S6_REQ_SCALER_ARATIO \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_REQ_SCALER_ARATIO_SHIFT | ATOM_FLAG_SET )
+#define CLEAR_ATOM_S6_REQ_SCALER_ARATIO \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_REQ_SCALER_ARATIO_SHIFT | ATOM_FLAG_CLEAR )
+
+#define SET_ATOM_S6_I2C_STATE_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_I2C_STATE_CHANGE_SHIFT | ATOM_FLAG_SET )
+
+#define SET_ATOM_S6_DISPLAY_STATE_CHANGE \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_DISPLAY_STATE_CHANGE_SHIFT | ATOM_FLAG_SET )
+
+#define SET_ATOM_S6_DEVICE_RECONFIG \
+	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
+	 ATOM_S6_CONFIG_DISPLAY_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S0_LCD1 \
+	((ATOM_DEVICE_CONNECT_INFO_DEF << 8 ) | \
+	 ATOM_S0_LCD1_SHIFT | ATOM_FLAG_CLEAR )
+#define SET_ATOM_S7_DOS_8BIT_DAC_EN \
+	((ATOM_DOS_MODE_INFO_DEF << 8) | \
+	 ATOM_S7_DOS_8BIT_DAC_EN_SHIFT | ATOM_FLAG_SET )
+#define CLEAR_ATOM_S7_DOS_8BIT_DAC_EN \
+	((ATOM_DOS_MODE_INFO_DEF << 8) | \
+	 ATOM_S7_DOS_8BIT_DAC_EN_SHIFT | ATOM_FLAG_CLEAR )
+
+/****************************************************************************/
+/* Portion II: Definitinos only used in Driver */
+/****************************************************************************/
+
+/*  Macros used by driver */
+
+#define	GetIndexIntoMasterTable(MasterOrData, FieldName) (((char *)(&((ATOM_MASTER_LIST_OF_##MasterOrData##_TABLES *)0)->FieldName)-(char *)0)/sizeof(USHORT))
+
+#define GET_COMMAND_TABLE_COMMANDSET_REVISION(TABLE_HEADER_OFFSET) ((((ATOM_COMMON_TABLE_HEADER*)TABLE_HEADER_OFFSET)->ucTableFormatRevision)&0x3F)
+#define GET_COMMAND_TABLE_PARAMETER_REVISION(TABLE_HEADER_OFFSET)  ((((ATOM_COMMON_TABLE_HEADER*)TABLE_HEADER_OFFSET)->ucTableContentRevision)&0x3F)
+
+#define GET_DATA_TABLE_MAJOR_REVISION GET_COMMAND_TABLE_COMMANDSET_REVISION
+#define GET_DATA_TABLE_MINOR_REVISION GET_COMMAND_TABLE_PARAMETER_REVISION
+
+/****************************************************************************/
+/* Portion III: Definitinos only used in VBIOS */
+/****************************************************************************/
+#define ATOM_DAC_SRC					0x80
+#define ATOM_SRC_DAC1					0
+#define ATOM_SRC_DAC2					0x80
+
+#ifdef	UEFI_BUILD
+#define	USHORT	UTEMP
+#endif
+
+typedef struct _MEMORY_PLLINIT_PARAMETERS {
+	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
+	UCHAR ucAction;		/* not define yet */
+	UCHAR ucFbDiv_Hi;	/* Fbdiv Hi byte */
+	UCHAR ucFbDiv;		/* FB value */
+	UCHAR ucPostDiv;	/* Post div */
+} MEMORY_PLLINIT_PARAMETERS;
+
+#define MEMORY_PLLINIT_PS_ALLOCATION  MEMORY_PLLINIT_PARAMETERS
+
+#define	GPIO_PIN_WRITE													0x01
+#define	GPIO_PIN_READ														0x00
+
+typedef struct _GPIO_PIN_CONTROL_PARAMETERS {
+	UCHAR ucGPIO_ID;	/* return value, read from GPIO pins */
+	UCHAR ucGPIOBitShift;	/* define which bit in uGPIOBitVal need to be update */
+	UCHAR ucGPIOBitVal;	/* Set/Reset corresponding bit defined in ucGPIOBitMask */
+	UCHAR ucAction;		/* =GPIO_PIN_WRITE: Read; =GPIO_PIN_READ: Write */
+} GPIO_PIN_CONTROL_PARAMETERS;
+
+typedef struct _ENABLE_SCALER_PARAMETERS {
+	UCHAR ucScaler;		/*  ATOM_SCALER1, ATOM_SCALER2 */
+	UCHAR ucEnable;		/*  ATOM_SCALER_DISABLE or ATOM_SCALER_CENTER or ATOM_SCALER_EXPANSION */
+	UCHAR ucTVStandard;	/*  */
+	UCHAR ucPadding[1];
+} ENABLE_SCALER_PARAMETERS;
+#define ENABLE_SCALER_PS_ALLOCATION ENABLE_SCALER_PARAMETERS
+
+/* ucEnable: */
+#define SCALER_BYPASS_AUTO_CENTER_NO_REPLICATION    0
+#define SCALER_BYPASS_AUTO_CENTER_AUTO_REPLICATION  1
+#define SCALER_ENABLE_2TAP_ALPHA_MODE               2
+#define SCALER_ENABLE_MULTITAP_MODE                 3
+
+typedef struct _ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS {
+	ULONG usHWIconHorzVertPosn;	/*  Hardware Icon Vertical position */
+	UCHAR ucHWIconVertOffset;	/*  Hardware Icon Vertical offset */
+	UCHAR ucHWIconHorzOffset;	/*  Hardware Icon Horizontal offset */
+	UCHAR ucSelection;	/*  ATOM_CURSOR1 or ATOM_ICON1 or ATOM_CURSOR2 or ATOM_ICON2 */
+	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
+} ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS;
+
+typedef struct _ENABLE_HARDWARE_ICON_CURSOR_PS_ALLOCATION {
+	ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS sEnableIcon;
+	ENABLE_CRTC_PARAMETERS sReserved;
+} ENABLE_HARDWARE_ICON_CURSOR_PS_ALLOCATION;
+
+typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS {
+	USHORT usHight;		/*  Image Hight */
+	USHORT usWidth;		/*  Image Width */
+	UCHAR ucSurface;	/*  Surface 1 or 2 */
+	UCHAR ucPadding[3];
+} ENABLE_GRAPH_SURFACE_PARAMETERS;
+
+typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_2 {
+	USHORT usHight;		/*  Image Hight */
+	USHORT usWidth;		/*  Image Width */
+	UCHAR ucSurface;	/*  Surface 1 or 2 */
+	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
+	UCHAR ucPadding[2];
+} ENABLE_GRAPH_SURFACE_PARAMETERS_V1_2;
+
+typedef struct _ENABLE_GRAPH_SURFACE_PS_ALLOCATION {
+	ENABLE_GRAPH_SURFACE_PARAMETERS sSetSurface;
+	ENABLE_YUV_PS_ALLOCATION sReserved;	/*  Don't set this one */
+} ENABLE_GRAPH_SURFACE_PS_ALLOCATION;
+
+typedef struct _MEMORY_CLEAN_UP_PARAMETERS {
+	USHORT usMemoryStart;	/* in 8Kb boundry, offset from memory base address */
+	USHORT usMemorySize;	/* 8Kb blocks aligned */
+} MEMORY_CLEAN_UP_PARAMETERS;
+#define MEMORY_CLEAN_UP_PS_ALLOCATION MEMORY_CLEAN_UP_PARAMETERS
+
+typedef struct _GET_DISPLAY_SURFACE_SIZE_PARAMETERS {
+	USHORT usX_Size;	/* When use as input parameter, usX_Size indicates which CRTC */
+	USHORT usY_Size;
+} GET_DISPLAY_SURFACE_SIZE_PARAMETERS;
+
+typedef struct _INDIRECT_IO_ACCESS {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR IOAccessSequence[256];
+} INDIRECT_IO_ACCESS;
+
+#define INDIRECT_READ              0x00
+#define INDIRECT_WRITE             0x80
+
+#define INDIRECT_IO_MM             0
+#define INDIRECT_IO_PLL            1
+#define INDIRECT_IO_MC             2
+#define INDIRECT_IO_PCIE           3
+#define INDIRECT_IO_PCIEP          4
+#define INDIRECT_IO_NBMISC         5
+
+#define INDIRECT_IO_PLL_READ       INDIRECT_IO_PLL   | INDIRECT_READ
+#define INDIRECT_IO_PLL_WRITE      INDIRECT_IO_PLL   | INDIRECT_WRITE
+#define INDIRECT_IO_MC_READ        INDIRECT_IO_MC    | INDIRECT_READ
+#define INDIRECT_IO_MC_WRITE       INDIRECT_IO_MC    | INDIRECT_WRITE
+#define INDIRECT_IO_PCIE_READ      INDIRECT_IO_PCIE  | INDIRECT_READ
+#define INDIRECT_IO_PCIE_WRITE     INDIRECT_IO_PCIE  | INDIRECT_WRITE
+#define INDIRECT_IO_PCIEP_READ     INDIRECT_IO_PCIEP | INDIRECT_READ
+#define INDIRECT_IO_PCIEP_WRITE    INDIRECT_IO_PCIEP | INDIRECT_WRITE
+#define INDIRECT_IO_NBMISC_READ    INDIRECT_IO_NBMISC | INDIRECT_READ
+#define INDIRECT_IO_NBMISC_WRITE   INDIRECT_IO_NBMISC | INDIRECT_WRITE
+
+typedef struct _ATOM_OEM_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
+} ATOM_OEM_INFO;
+
+typedef struct _ATOM_TV_MODE {
+	UCHAR ucVMode_Num;	/* Video mode number */
+	UCHAR ucTV_Mode_Num;	/* Internal TV mode number */
+} ATOM_TV_MODE;
+
+typedef struct _ATOM_BIOS_INT_TVSTD_MODE {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usTV_Mode_LUT_Offset;	/*  Pointer to standard to internal number conversion table */
+	USHORT usTV_FIFO_Offset;	/*  Pointer to FIFO entry table */
+	USHORT usNTSC_Tbl_Offset;	/*  Pointer to SDTV_Mode_NTSC table */
+	USHORT usPAL_Tbl_Offset;	/*  Pointer to SDTV_Mode_PAL table */
+	USHORT usCV_Tbl_Offset;	/*  Pointer to SDTV_Mode_PAL table */
+} ATOM_BIOS_INT_TVSTD_MODE;
+
+typedef struct _ATOM_TV_MODE_SCALER_PTR {
+	USHORT ucFilter0_Offset;	/* Pointer to filter format 0 coefficients */
+	USHORT usFilter1_Offset;	/* Pointer to filter format 0 coefficients */
+	UCHAR ucTV_Mode_Num;
+} ATOM_TV_MODE_SCALER_PTR;
+
+typedef struct _ATOM_STANDARD_VESA_TIMING {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_DTD_FORMAT aModeTimings[16];	/*  16 is not the real array number, just for initial allocation */
+} ATOM_STANDARD_VESA_TIMING;
+
+typedef struct _ATOM_STD_FORMAT {
+	USHORT usSTD_HDisp;
+	USHORT usSTD_VDisp;
+	USHORT usSTD_RefreshRate;
+	USHORT usReserved;
+} ATOM_STD_FORMAT;
+
+typedef struct _ATOM_VESA_TO_EXTENDED_MODE {
+	USHORT usVESA_ModeNumber;
+	USHORT usExtendedModeNumber;
+} ATOM_VESA_TO_EXTENDED_MODE;
+
+typedef struct _ATOM_VESA_TO_INTENAL_MODE_LUT {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	ATOM_VESA_TO_EXTENDED_MODE asVESA_ToExtendedModeInfo[76];
+} ATOM_VESA_TO_INTENAL_MODE_LUT;
+
+/*************** ATOM Memory Related Data Structure ***********************/
+typedef struct _ATOM_MEMORY_VENDOR_BLOCK {
+	UCHAR ucMemoryType;
+	UCHAR ucMemoryVendor;
+	UCHAR ucAdjMCId;
+	UCHAR ucDynClkId;
+	ULONG ulDllResetClkRange;
+} ATOM_MEMORY_VENDOR_BLOCK;
+
+typedef struct _ATOM_MEMORY_SETTING_ID_CONFIG {
+#if ATOM_BIG_ENDIAN
+	ULONG ucMemBlkId:8;
+	ULONG ulMemClockRange:24;
+#else
+	ULONG ulMemClockRange:24;
+	ULONG ucMemBlkId:8;
+#endif
+} ATOM_MEMORY_SETTING_ID_CONFIG;
+
+typedef union _ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS {
+	ATOM_MEMORY_SETTING_ID_CONFIG slAccess;
+	ULONG ulAccess;
+} ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS;
+
+typedef struct _ATOM_MEMORY_SETTING_DATA_BLOCK {
+	ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS ulMemoryID;
+	ULONG aulMemData[1];
+} ATOM_MEMORY_SETTING_DATA_BLOCK;
+
+typedef struct _ATOM_INIT_REG_INDEX_FORMAT {
+	USHORT usRegIndex;	/*  MC register index */
+	UCHAR ucPreRegDataLength;	/*  offset in ATOM_INIT_REG_DATA_BLOCK.saRegDataBuf */
+} ATOM_INIT_REG_INDEX_FORMAT;
+
+typedef struct _ATOM_INIT_REG_BLOCK {
+	USHORT usRegIndexTblSize;	/* size of asRegIndexBuf */
+	USHORT usRegDataBlkSize;	/* size of ATOM_MEMORY_SETTING_DATA_BLOCK */
+	ATOM_INIT_REG_INDEX_FORMAT asRegIndexBuf[1];
+	ATOM_MEMORY_SETTING_DATA_BLOCK asRegDataBuf[1];
+} ATOM_INIT_REG_BLOCK;
+
+#define END_OF_REG_INDEX_BLOCK  0x0ffff
+#define END_OF_REG_DATA_BLOCK   0x00000000
+#define ATOM_INIT_REG_MASK_FLAG 0x80
+#define	CLOCK_RANGE_HIGHEST			0x00ffffff
+
+#define VALUE_DWORD             SIZEOF ULONG
+#define VALUE_SAME_AS_ABOVE     0
+#define VALUE_MASK_DWORD        0x84
+
+#define INDEX_ACCESS_RANGE_BEGIN	    (VALUE_DWORD + 1)
+#define INDEX_ACCESS_RANGE_END		    (INDEX_ACCESS_RANGE_BEGIN + 1)
+#define VALUE_INDEX_ACCESS_SINGLE	    (INDEX_ACCESS_RANGE_END + 1)
+
+typedef struct _ATOM_MC_INIT_PARAM_TABLE {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usAdjustARB_SEQDataOffset;
+	USHORT usMCInitMemTypeTblOffset;
+	USHORT usMCInitCommonTblOffset;
+	USHORT usMCInitPowerDownTblOffset;
+	ULONG ulARB_SEQDataBuf[32];
+	ATOM_INIT_REG_BLOCK asMCInitMemType;
+	ATOM_INIT_REG_BLOCK asMCInitCommon;
+} ATOM_MC_INIT_PARAM_TABLE;
+
+#define _4Mx16              0x2
+#define _4Mx32              0x3
+#define _8Mx16              0x12
+#define _8Mx32              0x13
+#define _16Mx16             0x22
+#define _16Mx32             0x23
+#define _32Mx16             0x32
+#define _32Mx32             0x33
+#define _64Mx8              0x41
+#define _64Mx16             0x42
+
+#define SAMSUNG             0x1
+#define INFINEON            0x2
+#define ELPIDA              0x3
+#define ETRON               0x4
+#define NANYA               0x5
+#define HYNIX               0x6
+#define MOSEL               0x7
+#define WINBOND             0x8
+#define ESMT                0x9
+#define MICRON              0xF
+
+#define QIMONDA             INFINEON
+#define PROMOS              MOSEL
+
+/* ///////////Support for GDDR5 MC uCode to reside in upper 64K of ROM///////////// */
+
+#define UCODE_ROM_START_ADDRESS		0x1c000
+#define	UCODE_SIGNATURE			0x4375434d	/*  'MCuC' - MC uCode */
+
+/* uCode block header for reference */
+
+typedef struct _MCuCodeHeader {
+	ULONG ulSignature;
+	UCHAR ucRevision;
+	UCHAR ucChecksum;
+	UCHAR ucReserved1;
+	UCHAR ucReserved2;
+	USHORT usParametersLength;
+	USHORT usUCodeLength;
+	USHORT usReserved1;
+	USHORT usReserved2;
+} MCuCodeHeader;
+
+/* //////////////////////////////////////////////////////////////////////////////// */
+
+#define ATOM_MAX_NUMBER_OF_VRAM_MODULE	16
+
+#define ATOM_VRAM_MODULE_MEMORY_VENDOR_ID_MASK	0xF
+typedef struct _ATOM_VRAM_MODULE_V1 {
+	ULONG ulReserved;
+	USHORT usEMRSValue;
+	USHORT usMRSValue;
+	USHORT usReserved;
+	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
+	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] reserved; */
+	UCHAR ucMemoryVenderID;	/*  Predefined,never change across designs or memory type/vender */
+	UCHAR ucMemoryDeviceCfg;	/*  [7:4]=0x0:4M;=0x1:8M;=0x2:16M;0x3:32M....[3:0]=0x0:x4;=0x1:x8;=0x2:x16;=0x3:x32... */
+	UCHAR ucRow;		/*  Number of Row,in power of 2; */
+	UCHAR ucColumn;		/*  Number of Column,in power of 2; */
+	UCHAR ucBank;		/*  Nunber of Bank; */
+	UCHAR ucRank;		/*  Number of Rank, in power of 2 */
+	UCHAR ucChannelNum;	/*  Number of channel; */
+	UCHAR ucChannelConfig;	/*  [3:0]=Indication of what channel combination;[4:7]=Channel bit width, in number of 2 */
+	UCHAR ucDefaultMVDDQ_ID;	/*  Default MVDDQ setting for this memory block, ID linking to MVDDQ info table to find real set-up data; */
+	UCHAR ucDefaultMVDDC_ID;	/*  Default MVDDC setting for this memory block, ID linking to MVDDC info table to find real set-up data; */
+	UCHAR ucReserved[2];
+} ATOM_VRAM_MODULE_V1;
+
+typedef struct _ATOM_VRAM_MODULE_V2 {
+	ULONG ulReserved;
+	ULONG ulFlags;		/*  To enable/disable functionalities based on memory type */
+	ULONG ulEngineClock;	/*  Override of default engine clock for particular memory type */
+	ULONG ulMemoryClock;	/*  Override of default memory clock for particular memory type */
+	USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
+	USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
+	USHORT usEMRSValue;
+	USHORT usMRSValue;
+	USHORT usReserved;
+	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
+	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] - must not be used for now; */
+	UCHAR ucMemoryVenderID;	/*  Predefined,never change across designs or memory type/vender. If not predefined, vendor detection table gets executed */
+	UCHAR ucMemoryDeviceCfg;	/*  [7:4]=0x0:4M;=0x1:8M;=0x2:16M;0x3:32M....[3:0]=0x0:x4;=0x1:x8;=0x2:x16;=0x3:x32... */
+	UCHAR ucRow;		/*  Number of Row,in power of 2; */
+	UCHAR ucColumn;		/*  Number of Column,in power of 2; */
+	UCHAR ucBank;		/*  Nunber of Bank; */
+	UCHAR ucRank;		/*  Number of Rank, in power of 2 */
+	UCHAR ucChannelNum;	/*  Number of channel; */
+	UCHAR ucChannelConfig;	/*  [3:0]=Indication of what channel combination;[4:7]=Channel bit width, in number of 2 */
+	UCHAR ucDefaultMVDDQ_ID;	/*  Default MVDDQ setting for this memory block, ID linking to MVDDQ info table to find real set-up data; */
+	UCHAR ucDefaultMVDDC_ID;	/*  Default MVDDC setting for this memory block, ID linking to MVDDC info table to find real set-up data; */
+	UCHAR ucRefreshRateFactor;
+	UCHAR ucReserved[3];
+} ATOM_VRAM_MODULE_V2;
+
+typedef struct _ATOM_MEMORY_TIMING_FORMAT {
+	ULONG ulClkRange;	/*  memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing */
+	union {
+		USHORT usMRS;	/*  mode register */
+		USHORT usDDR3_MR0;
+	};
+	union {
+		USHORT usEMRS;	/*  extended mode register */
+		USHORT usDDR3_MR1;
+	};
+	UCHAR ucCL;		/*  CAS latency */
+	UCHAR ucWL;		/*  WRITE Latency */
+	UCHAR uctRAS;		/*  tRAS */
+	UCHAR uctRC;		/*  tRC */
+	UCHAR uctRFC;		/*  tRFC */
+	UCHAR uctRCDR;		/*  tRCDR */
+	UCHAR uctRCDW;		/*  tRCDW */
+	UCHAR uctRP;		/*  tRP */
+	UCHAR uctRRD;		/*  tRRD */
+	UCHAR uctWR;		/*  tWR */
+	UCHAR uctWTR;		/*  tWTR */
+	UCHAR uctPDIX;		/*  tPDIX */
+	UCHAR uctFAW;		/*  tFAW */
+	UCHAR uctAOND;		/*  tAOND */
+	union {
+		struct {
+			UCHAR ucflag;	/*  flag to control memory timing calculation. bit0= control EMRS2 Infineon */
+			UCHAR ucReserved;
+		};
+		USHORT usDDR3_MR2;
+	};
+} ATOM_MEMORY_TIMING_FORMAT;
+
+typedef struct _ATOM_MEMORY_TIMING_FORMAT_V1 {
+	ULONG ulClkRange;	/*  memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing */
+	USHORT usMRS;		/*  mode register */
+	USHORT usEMRS;		/*  extended mode register */
+	UCHAR ucCL;		/*  CAS latency */
+	UCHAR ucWL;		/*  WRITE Latency */
+	UCHAR uctRAS;		/*  tRAS */
+	UCHAR uctRC;		/*  tRC */
+	UCHAR uctRFC;		/*  tRFC */
+	UCHAR uctRCDR;		/*  tRCDR */
+	UCHAR uctRCDW;		/*  tRCDW */
+	UCHAR uctRP;		/*  tRP */
+	UCHAR uctRRD;		/*  tRRD */
+	UCHAR uctWR;		/*  tWR */
+	UCHAR uctWTR;		/*  tWTR */
+	UCHAR uctPDIX;		/*  tPDIX */
+	UCHAR uctFAW;		/*  tFAW */
+	UCHAR uctAOND;		/*  tAOND */
+	UCHAR ucflag;		/*  flag to control memory timing calculation. bit0= control EMRS2 Infineon */
+/* ///////////////////////GDDR parameters/////////////////////////////////// */
+	UCHAR uctCCDL;		/*  */
+	UCHAR uctCRCRL;		/*  */
+	UCHAR uctCRCWL;		/*  */
+	UCHAR uctCKE;		/*  */
+	UCHAR uctCKRSE;		/*  */
+	UCHAR uctCKRSX;		/*  */
+	UCHAR uctFAW32;		/*  */
+	UCHAR ucReserved1;	/*  */
+	UCHAR ucReserved2;	/*  */
+	UCHAR ucTerminator;
+} ATOM_MEMORY_TIMING_FORMAT_V1;
+
+typedef struct _ATOM_MEMORY_FORMAT {
+	ULONG ulDllDisClock;	/*  memory DLL will be disable when target memory clock is below this clock */
+	union {
+		USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
+		USHORT usDDR3_Reserved;	/*  Not used for DDR3 memory */
+	};
+	union {
+		USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
+		USHORT usDDR3_MR3;	/*  Used for DDR3 memory */
+	};
+	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] - must not be used for now; */
+	UCHAR ucMemoryVenderID;	/*  Predefined,never change across designs or memory type/vender. If not predefined, vendor detection table gets executed */
+	UCHAR ucRow;		/*  Number of Row,in power of 2; */
+	UCHAR ucColumn;		/*  Number of Column,in power of 2; */
+	UCHAR ucBank;		/*  Nunber of Bank; */
+	UCHAR ucRank;		/*  Number of Rank, in power of 2 */
+	UCHAR ucBurstSize;	/*  burst size, 0= burst size=4  1= burst size=8 */
+	UCHAR ucDllDisBit;	/*  position of DLL Enable/Disable bit in EMRS ( Extended Mode Register ) */
+	UCHAR ucRefreshRateFactor;	/*  memory refresh rate in unit of ms */
+	UCHAR ucDensity;	/*  _8Mx32, _16Mx32, _16Mx16, _32Mx16 */
+	UCHAR ucPreamble;	/* [7:4] Write Preamble, [3:0] Read Preamble */
+	UCHAR ucMemAttrib;	/*  Memory Device Addribute, like RDBI/WDBI etc */
+	ATOM_MEMORY_TIMING_FORMAT asMemTiming[5];	/* Memory Timing block sort from lower clock to higher clock */
+} ATOM_MEMORY_FORMAT;
+
+typedef struct _ATOM_VRAM_MODULE_V3 {
+	ULONG ulChannelMapCfg;	/*  board dependent paramenter:Channel combination */
+	USHORT usSize;		/*  size of ATOM_VRAM_MODULE_V3 */
+	USHORT usDefaultMVDDQ;	/*  board dependent parameter:Default Memory Core Voltage */
+	USHORT usDefaultMVDDC;	/*  board dependent parameter:Default Memory IO Voltage */
+	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
+	UCHAR ucChannelNum;	/*  board dependent parameter:Number of channel; */
+	UCHAR ucChannelSize;	/*  board dependent parameter:32bit or 64bit */
+	UCHAR ucVREFI;		/*  board dependnt parameter: EXT or INT +160mv to -140mv */
+	UCHAR ucNPL_RT;		/*  board dependent parameter:NPL round trip delay, used for calculate memory timing parameters */
+	UCHAR ucFlag;		/*  To enable/disable functionalities based on memory type */
+	ATOM_MEMORY_FORMAT asMemory;	/*  describ all of video memory parameters from memory spec */
+} ATOM_VRAM_MODULE_V3;
+
+/* ATOM_VRAM_MODULE_V3.ucNPL_RT */
+#define NPL_RT_MASK															0x0f
+#define BATTERY_ODT_MASK												0xc0
+
+#define ATOM_VRAM_MODULE		 ATOM_VRAM_MODULE_V3
+
+typedef struct _ATOM_VRAM_MODULE_V4 {
+	ULONG ulChannelMapCfg;	/*  board dependent parameter: Channel combination */
+	USHORT usModuleSize;	/*  size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE */
+	USHORT usPrivateReserved;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
+	/*  MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS) */
+	USHORT usReserved;
+	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
+	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now; */
+	UCHAR ucChannelNum;	/*  Number of channels present in this module config */
+	UCHAR ucChannelWidth;	/*  0 - 32 bits; 1 - 64 bits */
+	UCHAR ucDensity;	/*  _8Mx32, _16Mx32, _16Mx16, _32Mx16 */
+	UCHAR ucFlag;		/*  To enable/disable functionalities based on memory type */
+	UCHAR ucMisc;		/*  bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8 */
+	UCHAR ucVREFI;		/*  board dependent parameter */
+	UCHAR ucNPL_RT;		/*  board dependent parameter:NPL round trip delay, used for calculate memory timing parameters */
+	UCHAR ucPreamble;	/*  [7:4] Write Preamble, [3:0] Read Preamble */
+	UCHAR ucMemorySize;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
+	/*  Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros */
+	UCHAR ucReserved[3];
+
+/* compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level */
+	union {
+		USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
+		USHORT usDDR3_Reserved;
+	};
+	union {
+		USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
+		USHORT usDDR3_MR3;	/*  Used for DDR3 memory */
+	};
+	UCHAR ucMemoryVenderID;	/*  Predefined, If not predefined, vendor detection table gets executed */
+	UCHAR ucRefreshRateFactor;	/*  [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms) */
+	UCHAR ucReserved2[2];
+	ATOM_MEMORY_TIMING_FORMAT asMemTiming[5];	/* Memory Timing block sort from lower clock to higher clock */
+} ATOM_VRAM_MODULE_V4;
+
+#define VRAM_MODULE_V4_MISC_RANK_MASK       0x3
+#define VRAM_MODULE_V4_MISC_DUAL_RANK       0x1
+#define VRAM_MODULE_V4_MISC_BL_MASK         0x4
+#define VRAM_MODULE_V4_MISC_BL8             0x4
+#define VRAM_MODULE_V4_MISC_DUAL_CS         0x10
+
+typedef struct _ATOM_VRAM_MODULE_V5 {
+	ULONG ulChannelMapCfg;	/*  board dependent parameter: Channel combination */
+	USHORT usModuleSize;	/*  size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE */
+	USHORT usPrivateReserved;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
+	/*  MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS) */
+	USHORT usReserved;
+	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
+	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now; */
+	UCHAR ucChannelNum;	/*  Number of channels present in this module config */
+	UCHAR ucChannelWidth;	/*  0 - 32 bits; 1 - 64 bits */
+	UCHAR ucDensity;	/*  _8Mx32, _16Mx32, _16Mx16, _32Mx16 */
+	UCHAR ucFlag;		/*  To enable/disable functionalities based on memory type */
+	UCHAR ucMisc;		/*  bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8 */
+	UCHAR ucVREFI;		/*  board dependent parameter */
+	UCHAR ucNPL_RT;		/*  board dependent parameter:NPL round trip delay, used for calculate memory timing parameters */
+	UCHAR ucPreamble;	/*  [7:4] Write Preamble, [3:0] Read Preamble */
+	UCHAR ucMemorySize;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
+	/*  Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros */
+	UCHAR ucReserved[3];
+
+/* compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level */
+	USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
+	USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
+	UCHAR ucMemoryVenderID;	/*  Predefined, If not predefined, vendor detection table gets executed */
+	UCHAR ucRefreshRateFactor;	/*  [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms) */
+	UCHAR ucFIFODepth;	/*  FIFO depth supposes to be detected during vendor detection, but if we dont do vendor detection we have to hardcode FIFO Depth */
+	UCHAR ucCDR_Bandwidth;	/*  [0:3]=Read CDR bandwidth, [4:7] - Write CDR Bandwidth */
+	ATOM_MEMORY_TIMING_FORMAT_V1 asMemTiming[5];	/* Memory Timing block sort from lower clock to higher clock */
+} ATOM_VRAM_MODULE_V5;
+
+typedef struct _ATOM_VRAM_INFO_V2 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucNumOfVRAMModule;
+	ATOM_VRAM_MODULE aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];	/*  just for allocation, real number of blocks is in ucNumOfVRAMModule; */
+} ATOM_VRAM_INFO_V2;
+
+typedef struct _ATOM_VRAM_INFO_V3 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usMemAdjustTblOffset;	/*  offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting */
+	USHORT usMemClkPatchTblOffset;	/*      offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting */
+	USHORT usRerseved;
+	UCHAR aVID_PinsShift[9];	/*  8 bit strap maximum+terminator */
+	UCHAR ucNumOfVRAMModule;
+	ATOM_VRAM_MODULE aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];	/*  just for allocation, real number of blocks is in ucNumOfVRAMModule; */
+	ATOM_INIT_REG_BLOCK asMemPatch;	/*  for allocation */
+	/*      ATOM_INIT_REG_BLOCK                              aMemAdjust; */
+} ATOM_VRAM_INFO_V3;
+
+#define	ATOM_VRAM_INFO_LAST	     ATOM_VRAM_INFO_V3
+
+typedef struct _ATOM_VRAM_INFO_V4 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usMemAdjustTblOffset;	/*  offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting */
+	USHORT usMemClkPatchTblOffset;	/*      offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting */
+	USHORT usRerseved;
+	UCHAR ucMemDQ7_0ByteRemap;	/*  DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3 */
+	ULONG ulMemDQ7_0BitRemap;	/*  each DQ line ( 7~0) use 3bits, like: DQ0=Bit[2:0], DQ1:[5:3], ... DQ7:[23:21] */
+	UCHAR ucReservde[4];
+	UCHAR ucNumOfVRAMModule;
+	ATOM_VRAM_MODULE_V4 aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];	/*  just for allocation, real number of blocks is in ucNumOfVRAMModule; */
+	ATOM_INIT_REG_BLOCK asMemPatch;	/*  for allocation */
+	/*      ATOM_INIT_REG_BLOCK                              aMemAdjust; */
+} ATOM_VRAM_INFO_V4;
+
+typedef struct _ATOM_VRAM_GPIO_DETECTION_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR aVID_PinsShift[9];	/* 8 bit strap maximum+terminator */
+} ATOM_VRAM_GPIO_DETECTION_INFO;
+
+typedef struct _ATOM_MEMORY_TRAINING_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucTrainingLoop;
+	UCHAR ucReserved[3];
+	ATOM_INIT_REG_BLOCK asMemTrainingSetting;
+} ATOM_MEMORY_TRAINING_INFO;
+
+typedef struct SW_I2C_CNTL_DATA_PARAMETERS {
+	UCHAR ucControl;
+	UCHAR ucData;
+	UCHAR ucSatus;
+	UCHAR ucTemp;
+} SW_I2C_CNTL_DATA_PARAMETERS;
+
+#define SW_I2C_CNTL_DATA_PS_ALLOCATION  SW_I2C_CNTL_DATA_PARAMETERS
+
+typedef struct _SW_I2C_IO_DATA_PARAMETERS {
+	USHORT GPIO_Info;
+	UCHAR ucAct;
+	UCHAR ucData;
+} SW_I2C_IO_DATA_PARAMETERS;
+
+#define SW_I2C_IO_DATA_PS_ALLOCATION  SW_I2C_IO_DATA_PARAMETERS
+
+/****************************SW I2C CNTL DEFINITIONS**********************/
+#define SW_I2C_IO_RESET       0
+#define SW_I2C_IO_GET         1
+#define SW_I2C_IO_DRIVE       2
+#define SW_I2C_IO_SET         3
+#define SW_I2C_IO_START       4
+
+#define SW_I2C_IO_CLOCK       0
+#define SW_I2C_IO_DATA        0x80
+
+#define SW_I2C_IO_ZERO        0
+#define SW_I2C_IO_ONE         0x100
+
+#define SW_I2C_CNTL_READ      0
+#define SW_I2C_CNTL_WRITE     1
+#define SW_I2C_CNTL_START     2
+#define SW_I2C_CNTL_STOP      3
+#define SW_I2C_CNTL_OPEN      4
+#define SW_I2C_CNTL_CLOSE     5
+#define SW_I2C_CNTL_WRITE1BIT 6
+
+/* ==============================VESA definition Portion=============================== */
+#define VESA_OEM_PRODUCT_REV			            '01.00'
+#define VESA_MODE_ATTRIBUTE_MODE_SUPPORT	     0xBB	/* refer to VBE spec p.32, no TTY support */
+#define VESA_MODE_WIN_ATTRIBUTE						     7
+#define VESA_WIN_SIZE											     64
+
+typedef struct _PTR_32_BIT_STRUCTURE {
+	USHORT Offset16;
+	USHORT Segment16;
+} PTR_32_BIT_STRUCTURE;
+
+typedef union _PTR_32_BIT_UNION {
+	PTR_32_BIT_STRUCTURE SegmentOffset;
+	ULONG Ptr32_Bit;
+} PTR_32_BIT_UNION;
+
+typedef struct _VBE_1_2_INFO_BLOCK_UPDATABLE {
+	UCHAR VbeSignature[4];
+	USHORT VbeVersion;
+	PTR_32_BIT_UNION OemStringPtr;
+	UCHAR Capabilities[4];
+	PTR_32_BIT_UNION VideoModePtr;
+	USHORT TotalMemory;
+} VBE_1_2_INFO_BLOCK_UPDATABLE;
+
+typedef struct _VBE_2_0_INFO_BLOCK_UPDATABLE {
+	VBE_1_2_INFO_BLOCK_UPDATABLE CommonBlock;
+	USHORT OemSoftRev;
+	PTR_32_BIT_UNION OemVendorNamePtr;
+	PTR_32_BIT_UNION OemProductNamePtr;
+	PTR_32_BIT_UNION OemProductRevPtr;
+} VBE_2_0_INFO_BLOCK_UPDATABLE;
+
+typedef union _VBE_VERSION_UNION {
+	VBE_2_0_INFO_BLOCK_UPDATABLE VBE_2_0_InfoBlock;
+	VBE_1_2_INFO_BLOCK_UPDATABLE VBE_1_2_InfoBlock;
+} VBE_VERSION_UNION;
+
+typedef struct _VBE_INFO_BLOCK {
+	VBE_VERSION_UNION UpdatableVBE_Info;
+	UCHAR Reserved[222];
+	UCHAR OemData[256];
+} VBE_INFO_BLOCK;
+
+typedef struct _VBE_FP_INFO {
+	USHORT HSize;
+	USHORT VSize;
+	USHORT FPType;
+	UCHAR RedBPP;
+	UCHAR GreenBPP;
+	UCHAR BlueBPP;
+	UCHAR ReservedBPP;
+	ULONG RsvdOffScrnMemSize;
+	ULONG RsvdOffScrnMEmPtr;
+	UCHAR Reserved[14];
+} VBE_FP_INFO;
+
+typedef struct _VESA_MODE_INFO_BLOCK {
+/*  Mandatory information for all VBE revisions */
+	USHORT ModeAttributes;	/*                  dw      ?       ; mode attributes */
+	UCHAR WinAAttributes;	/*                    db      ?       ; window A attributes */
+	UCHAR WinBAttributes;	/*                    db      ?       ; window B attributes */
+	USHORT WinGranularity;	/*                    dw      ?       ; window granularity */
+	USHORT WinSize;		/*                    dw      ?       ; window size */
+	USHORT WinASegment;	/*                    dw      ?       ; window A start segment */
+	USHORT WinBSegment;	/*                    dw      ?       ; window B start segment */
+	ULONG WinFuncPtr;	/*                    dd      ?       ; real mode pointer to window function */
+	USHORT BytesPerScanLine;	/*                    dw      ?       ; bytes per scan line */
+
+/* ; Mandatory information for VBE 1.2 and above */
+	USHORT XResolution;	/*                         dw      ?       ; horizontal resolution in pixels or characters */
+	USHORT YResolution;	/*                   dw      ?       ; vertical resolution in pixels or characters */
+	UCHAR XCharSize;	/*                   db      ?       ; character cell width in pixels */
+	UCHAR YCharSize;	/*                   db      ?       ; character cell height in pixels */
+	UCHAR NumberOfPlanes;	/*                   db      ?       ; number of memory planes */
+	UCHAR BitsPerPixel;	/*                   db      ?       ; bits per pixel */
+	UCHAR NumberOfBanks;	/*                   db      ?       ; number of banks */
+	UCHAR MemoryModel;	/*                   db      ?       ; memory model type */
+	UCHAR BankSize;		/*                   db      ?       ; bank size in KB */
+	UCHAR NumberOfImagePages;	/*            db    ?       ; number of images */
+	UCHAR ReservedForPageFunction;	/* db  1       ; reserved for page function */
+
+/* ; Direct Color fields(required for direct/6 and YUV/7 memory models) */
+	UCHAR RedMaskSize;	/*           db      ?       ; size of direct color red mask in bits */
+	UCHAR RedFieldPosition;	/*           db      ?       ; bit position of lsb of red mask */
+	UCHAR GreenMaskSize;	/*           db      ?       ; size of direct color green mask in bits */
+	UCHAR GreenFieldPosition;	/*           db      ?       ; bit position of lsb of green mask */
+	UCHAR BlueMaskSize;	/*           db      ?       ; size of direct color blue mask in bits */
+	UCHAR BlueFieldPosition;	/*           db      ?       ; bit position of lsb of blue mask */
+	UCHAR RsvdMaskSize;	/*           db      ?       ; size of direct color reserved mask in bits */
+	UCHAR RsvdFieldPosition;	/*           db      ?       ; bit position of lsb of reserved mask */
+	UCHAR DirectColorModeInfo;	/*           db      ?       ; direct color mode attributes */
+
+/* ; Mandatory information for VBE 2.0 and above */
+	ULONG PhysBasePtr;	/*           dd      ?       ; physical address for flat memory frame buffer */
+	ULONG Reserved_1;	/*           dd      0       ; reserved - always set to 0 */
+	USHORT Reserved_2;	/*     dw    0       ; reserved - always set to 0 */
+
+/* ; Mandatory information for VBE 3.0 and above */
+	USHORT LinBytesPerScanLine;	/*         dw      ?       ; bytes per scan line for linear modes */
+	UCHAR BnkNumberOfImagePages;	/*         db      ?       ; number of images for banked modes */
+	UCHAR LinNumberOfImagPages;	/*         db      ?       ; number of images for linear modes */
+	UCHAR LinRedMaskSize;	/*         db      ?       ; size of direct color red mask(linear modes) */
+	UCHAR LinRedFieldPosition;	/*         db      ?       ; bit position of lsb of red mask(linear modes) */
+	UCHAR LinGreenMaskSize;	/*         db      ?       ; size of direct color green mask(linear modes) */
+	UCHAR LinGreenFieldPosition;	/*         db      ?       ; bit position of lsb of green mask(linear modes) */
+	UCHAR LinBlueMaskSize;	/*         db      ?       ; size of direct color blue mask(linear modes) */
+	UCHAR LinBlueFieldPosition;	/*         db      ?       ; bit position of lsb of blue mask(linear modes) */
+	UCHAR LinRsvdMaskSize;	/*         db      ?       ; size of direct color reserved mask(linear modes) */
+	UCHAR LinRsvdFieldPosition;	/*         db      ?       ; bit position of lsb of reserved mask(linear modes) */
+	ULONG MaxPixelClock;	/*         dd      ?       ; maximum pixel clock(in Hz) for graphics mode */
+	UCHAR Reserved;		/*         db      190 dup (0) */
+} VESA_MODE_INFO_BLOCK;
+
+/*  BIOS function CALLS */
+#define ATOM_BIOS_EXTENDED_FUNCTION_CODE        0xA0	/*  ATI Extended Function code */
+#define ATOM_BIOS_FUNCTION_COP_MODE             0x00
+#define ATOM_BIOS_FUNCTION_SHORT_QUERY1         0x04
+#define ATOM_BIOS_FUNCTION_SHORT_QUERY2         0x05
+#define ATOM_BIOS_FUNCTION_SHORT_QUERY3         0x06
+#define ATOM_BIOS_FUNCTION_GET_DDC              0x0B
+#define ATOM_BIOS_FUNCTION_ASIC_DSTATE          0x0E
+#define ATOM_BIOS_FUNCTION_DEBUG_PLAY           0x0F
+#define ATOM_BIOS_FUNCTION_STV_STD              0x16
+#define ATOM_BIOS_FUNCTION_DEVICE_DET           0x17
+#define ATOM_BIOS_FUNCTION_DEVICE_SWITCH        0x18
+
+#define ATOM_BIOS_FUNCTION_PANEL_CONTROL        0x82
+#define ATOM_BIOS_FUNCTION_OLD_DEVICE_DET       0x83
+#define ATOM_BIOS_FUNCTION_OLD_DEVICE_SWITCH    0x84
+#define ATOM_BIOS_FUNCTION_HW_ICON              0x8A
+#define ATOM_BIOS_FUNCTION_SET_CMOS             0x8B
+#define SUB_FUNCTION_UPDATE_DISPLAY_INFO        0x8000	/*  Sub function 80 */
+#define SUB_FUNCTION_UPDATE_EXPANSION_INFO      0x8100	/*  Sub function 80 */
+
+#define ATOM_BIOS_FUNCTION_DISPLAY_INFO         0x8D
+#define ATOM_BIOS_FUNCTION_DEVICE_ON_OFF        0x8E
+#define ATOM_BIOS_FUNCTION_VIDEO_STATE          0x8F
+#define ATOM_SUB_FUNCTION_GET_CRITICAL_STATE    0x0300	/*  Sub function 03 */
+#define ATOM_SUB_FUNCTION_GET_LIDSTATE          0x0700	/*  Sub function 7 */
+#define ATOM_SUB_FUNCTION_THERMAL_STATE_NOTICE  0x1400	/*  Notify caller the current thermal state */
+#define ATOM_SUB_FUNCTION_CRITICAL_STATE_NOTICE 0x8300	/*  Notify caller the current critical state */
+#define ATOM_SUB_FUNCTION_SET_LIDSTATE          0x8500	/*  Sub function 85 */
+#define ATOM_SUB_FUNCTION_GET_REQ_DISPLAY_FROM_SBIOS_MODE 0x8900	/*  Sub function 89 */
+#define ATOM_SUB_FUNCTION_INFORM_ADC_SUPPORT    0x9400	/*  Notify caller that ADC is supported */
+
+#define ATOM_BIOS_FUNCTION_VESA_DPMS            0x4F10	/*  Set DPMS */
+#define ATOM_SUB_FUNCTION_SET_DPMS              0x0001	/*  BL: Sub function 01 */
+#define ATOM_SUB_FUNCTION_GET_DPMS              0x0002	/*  BL: Sub function 02 */
+#define ATOM_PARAMETER_VESA_DPMS_ON             0x0000	/*  BH Parameter for DPMS ON. */
+#define ATOM_PARAMETER_VESA_DPMS_STANDBY        0x0100	/*  BH Parameter for DPMS STANDBY */
+#define ATOM_PARAMETER_VESA_DPMS_SUSPEND        0x0200	/*  BH Parameter for DPMS SUSPEND */
+#define ATOM_PARAMETER_VESA_DPMS_OFF            0x0400	/*  BH Parameter for DPMS OFF */
+#define ATOM_PARAMETER_VESA_DPMS_REDUCE_ON      0x0800	/*  BH Parameter for DPMS REDUCE ON (NOT SUPPORTED) */
+
+#define ATOM_BIOS_RETURN_CODE_MASK              0x0000FF00L
+#define ATOM_BIOS_REG_HIGH_MASK                 0x0000FF00L
+#define ATOM_BIOS_REG_LOW_MASK                  0x000000FFL
+
+/*  structure used for VBIOS only */
+
+/* DispOutInfoTable */
+typedef struct _ASIC_TRANSMITTER_INFO {
+	USHORT usTransmitterObjId;
+	USHORT usSupportDevice;
+	UCHAR ucTransmitterCmdTblId;
+	UCHAR ucConfig;
+	UCHAR ucEncoderID;	/* available 1st encoder ( default ) */
+	UCHAR ucOptionEncoderID;	/* available 2nd encoder ( optional ) */
+	UCHAR uc2ndEncoderID;
+	UCHAR ucReserved;
+} ASIC_TRANSMITTER_INFO;
+
+typedef struct _ASIC_ENCODER_INFO {
+	UCHAR ucEncoderID;
+	UCHAR ucEncoderConfig;
+	USHORT usEncoderCmdTblId;
+} ASIC_ENCODER_INFO;
+
+typedef struct _ATOM_DISP_OUT_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT ptrTransmitterInfo;
+	USHORT ptrEncoderInfo;
+	ASIC_TRANSMITTER_INFO asTransmitterInfo[1];
+	ASIC_ENCODER_INFO asEncoderInfo[1];
+} ATOM_DISP_OUT_INFO;
+
+/*  DispDevicePriorityInfo */
+typedef struct _ATOM_DISPLAY_DEVICE_PRIORITY_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT asDevicePriority[16];
+} ATOM_DISPLAY_DEVICE_PRIORITY_INFO;
+
+/* ProcessAuxChannelTransactionTable */
+typedef struct _PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS {
+	USHORT lpAuxRequest;
+	USHORT lpDataOut;
+	UCHAR ucChannelID;
+	union {
+		UCHAR ucReplyStatus;
+		UCHAR ucDelay;
+	};
+	UCHAR ucDataOutLen;
+	UCHAR ucReserved;
+} PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS;
+
+#define PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION			PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS
+
+/* GetSinkType */
+
+typedef struct _DP_ENCODER_SERVICE_PARAMETERS {
+	USHORT ucLinkClock;
+	union {
+		UCHAR ucConfig;	/*  for DP training command */
+		UCHAR ucI2cId;	/*  use for GET_SINK_TYPE command */
+	};
+	UCHAR ucAction;
+	UCHAR ucStatus;
+	UCHAR ucLaneNum;
+	UCHAR ucReserved[2];
+} DP_ENCODER_SERVICE_PARAMETERS;
+
+/*  ucAction */
+#define ATOM_DP_ACTION_GET_SINK_TYPE							0x01
+#define ATOM_DP_ACTION_TRAINING_START							0x02
+#define ATOM_DP_ACTION_TRAINING_COMPLETE					0x03
+#define ATOM_DP_ACTION_TRAINING_PATTERN_SEL				0x04
+#define ATOM_DP_ACTION_SET_VSWING_PREEMP					0x05
+#define ATOM_DP_ACTION_GET_VSWING_PREEMP					0x06
+#define ATOM_DP_ACTION_BLANKING                   0x07
+
+/*  ucConfig */
+#define ATOM_DP_CONFIG_ENCODER_SEL_MASK						0x03
+#define ATOM_DP_CONFIG_DIG1_ENCODER								0x00
+#define ATOM_DP_CONFIG_DIG2_ENCODER								0x01
+#define ATOM_DP_CONFIG_EXTERNAL_ENCODER						0x02
+#define ATOM_DP_CONFIG_LINK_SEL_MASK							0x04
+#define ATOM_DP_CONFIG_LINK_A											0x00
+#define ATOM_DP_CONFIG_LINK_B											0x04
+
+#define DP_ENCODER_SERVICE_PS_ALLOCATION				WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
+
+/*  DP_TRAINING_TABLE */
+#define DPCD_SET_LINKRATE_LANENUM_PATTERN1_TBL_ADDR				ATOM_DP_TRAINING_TBL_ADDR
+#define DPCD_SET_SS_CNTL_TBL_ADDR													(ATOM_DP_TRAINING_TBL_ADDR + 8 )
+#define DPCD_SET_LANE_VSWING_PREEMP_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 16)
+#define DPCD_SET_TRAINING_PATTERN0_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 24)
+#define DPCD_SET_TRAINING_PATTERN2_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 32)
+#define DPCD_GET_LINKRATE_LANENUM_SS_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 40)
+#define	DPCD_GET_LANE_STATUS_ADJUST_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 48)
+#define DP_I2C_AUX_DDC_WRITE_START_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 60)
+#define DP_I2C_AUX_DDC_WRITE_TBL_ADDR											(ATOM_DP_TRAINING_TBL_ADDR + 64)
+#define DP_I2C_AUX_DDC_READ_START_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 72)
+#define DP_I2C_AUX_DDC_READ_TBL_ADDR											(ATOM_DP_TRAINING_TBL_ADDR + 76)
+#define DP_I2C_AUX_DDC_READ_END_TBL_ADDR									(ATOM_DP_TRAINING_TBL_ADDR + 80)
+
+typedef struct _PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS {
+	UCHAR ucI2CSpeed;
+	union {
+		UCHAR ucRegIndex;
+		UCHAR ucStatus;
+	};
+	USHORT lpI2CDataOut;
+	UCHAR ucFlag;
+	UCHAR ucTransBytes;
+	UCHAR ucSlaveAddr;
+	UCHAR ucLineNumber;
+} PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS;
+
+#define PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION       PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS
+
+/* ucFlag */
+#define HW_I2C_WRITE        1
+#define HW_I2C_READ         0
+
+/****************************************************************************/
+/* Portion VI: Definitinos being oboselete */
+/****************************************************************************/
+
+/* ========================================================================================== */
+/* Remove the definitions below when driver is ready! */
+typedef struct _ATOM_DAC_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usMaxFrequency;	/*  in 10kHz unit */
+	USHORT usReserved;
+} ATOM_DAC_INFO;
+
+typedef struct _COMPASSIONATE_DATA {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+
+	/* ==============================  DAC1 portion */
+	UCHAR ucDAC1_BG_Adjustment;
+	UCHAR ucDAC1_DAC_Adjustment;
+	USHORT usDAC1_FORCE_Data;
+	/* ==============================  DAC2 portion */
+	UCHAR ucDAC2_CRT2_BG_Adjustment;
+	UCHAR ucDAC2_CRT2_DAC_Adjustment;
+	USHORT usDAC2_CRT2_FORCE_Data;
+	USHORT usDAC2_CRT2_MUX_RegisterIndex;
+	UCHAR ucDAC2_CRT2_MUX_RegisterInfo;	/* Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low */
+	UCHAR ucDAC2_NTSC_BG_Adjustment;
+	UCHAR ucDAC2_NTSC_DAC_Adjustment;
+	USHORT usDAC2_TV1_FORCE_Data;
+	USHORT usDAC2_TV1_MUX_RegisterIndex;
+	UCHAR ucDAC2_TV1_MUX_RegisterInfo;	/* Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low */
+	UCHAR ucDAC2_CV_BG_Adjustment;
+	UCHAR ucDAC2_CV_DAC_Adjustment;
+	USHORT usDAC2_CV_FORCE_Data;
+	USHORT usDAC2_CV_MUX_RegisterIndex;
+	UCHAR ucDAC2_CV_MUX_RegisterInfo;	/* Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low */
+	UCHAR ucDAC2_PAL_BG_Adjustment;
+	UCHAR ucDAC2_PAL_DAC_Adjustment;
+	USHORT usDAC2_TV2_FORCE_Data;
+} COMPASSIONATE_DATA;
+
+/****************************Supported Device Info Table Definitions**********************/
+/*   ucConnectInfo: */
+/*     [7:4] - connector type */
+/*       = 1   - VGA connector */
+/*       = 2   - DVI-I */
+/*       = 3   - DVI-D */
+/*       = 4   - DVI-A */
+/*       = 5   - SVIDEO */
+/*       = 6   - COMPOSITE */
+/*       = 7   - LVDS */
+/*       = 8   - DIGITAL LINK */
+/*       = 9   - SCART */
+/*       = 0xA - HDMI_type A */
+/*       = 0xB - HDMI_type B */
+/*       = 0xE - Special case1 (DVI+DIN) */
+/*       Others=TBD */
+/*     [3:0] - DAC Associated */
+/*       = 0   - no DAC */
+/*       = 1   - DACA */
+/*       = 2   - DACB */
+/*       = 3   - External DAC */
+/*       Others=TBD */
+/*  */
+
+typedef struct _ATOM_CONNECTOR_INFO {
+#if ATOM_BIG_ENDIAN
+	UCHAR bfConnectorType:4;
+	UCHAR bfAssociatedDAC:4;
+#else
+	UCHAR bfAssociatedDAC:4;
+	UCHAR bfConnectorType:4;
+#endif
+} ATOM_CONNECTOR_INFO;
+
+typedef union _ATOM_CONNECTOR_INFO_ACCESS {
+	ATOM_CONNECTOR_INFO sbfAccess;
+	UCHAR ucAccess;
+} ATOM_CONNECTOR_INFO_ACCESS;
+
+typedef struct _ATOM_CONNECTOR_INFO_I2C {
+	ATOM_CONNECTOR_INFO_ACCESS sucConnectorInfo;
+	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
+} ATOM_CONNECTOR_INFO_I2C;
+
+typedef struct _ATOM_SUPPORTED_DEVICES_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usDeviceSupport;
+	ATOM_CONNECTOR_INFO_I2C asConnInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO];
+} ATOM_SUPPORTED_DEVICES_INFO;
+
+#define NO_INT_SRC_MAPPED       0xFF
+
+typedef struct _ATOM_CONNECTOR_INC_SRC_BITMAP {
+	UCHAR ucIntSrcBitmap;
+} ATOM_CONNECTOR_INC_SRC_BITMAP;
+
+typedef struct _ATOM_SUPPORTED_DEVICES_INFO_2 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usDeviceSupport;
+	ATOM_CONNECTOR_INFO_I2C asConnInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO_2];
+	ATOM_CONNECTOR_INC_SRC_BITMAP
+	    asIntSrcInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO_2];
+} ATOM_SUPPORTED_DEVICES_INFO_2;
+
+typedef struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usDeviceSupport;
+	ATOM_CONNECTOR_INFO_I2C asConnInfo[ATOM_MAX_SUPPORTED_DEVICE];
+	ATOM_CONNECTOR_INC_SRC_BITMAP asIntSrcInfo[ATOM_MAX_SUPPORTED_DEVICE];
+} ATOM_SUPPORTED_DEVICES_INFO_2d1;
+
+#define ATOM_SUPPORTED_DEVICES_INFO_LAST ATOM_SUPPORTED_DEVICES_INFO_2d1
+
+typedef struct _ATOM_MISC_CONTROL_INFO {
+	USHORT usFrequency;
+	UCHAR ucPLL_ChargePump;	/*  PLL charge-pump gain control */
+	UCHAR ucPLL_DutyCycle;	/*  PLL duty cycle control */
+	UCHAR ucPLL_VCO_Gain;	/*  PLL VCO gain control */
+	UCHAR ucPLL_VoltageSwing;	/*  PLL driver voltage swing control */
+} ATOM_MISC_CONTROL_INFO;
+
+#define ATOM_MAX_MISC_INFO       4
+
+typedef struct _ATOM_TMDS_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usMaxFrequency;	/*  in 10Khz */
+	ATOM_MISC_CONTROL_INFO asMiscInfo[ATOM_MAX_MISC_INFO];
+} ATOM_TMDS_INFO;
+
+typedef struct _ATOM_ENCODER_ANALOG_ATTRIBUTE {
+	UCHAR ucTVStandard;	/* Same as TV standards defined above, */
+	UCHAR ucPadding[1];
+} ATOM_ENCODER_ANALOG_ATTRIBUTE;
+
+typedef struct _ATOM_ENCODER_DIGITAL_ATTRIBUTE {
+	UCHAR ucAttribute;	/* Same as other digital encoder attributes defined above */
+	UCHAR ucPadding[1];
+} ATOM_ENCODER_DIGITAL_ATTRIBUTE;
+
+typedef union _ATOM_ENCODER_ATTRIBUTE {
+	ATOM_ENCODER_ANALOG_ATTRIBUTE sAlgAttrib;
+	ATOM_ENCODER_DIGITAL_ATTRIBUTE sDigAttrib;
+} ATOM_ENCODER_ATTRIBUTE;
+
+typedef struct _DVO_ENCODER_CONTROL_PARAMETERS {
+	USHORT usPixelClock;
+	USHORT usEncoderID;
+	UCHAR ucDeviceType;	/* Use ATOM_DEVICE_xxx1_Index to indicate device type only. */
+	UCHAR ucAction;		/* ATOM_ENABLE/ATOM_DISABLE/ATOM_HPD_INIT */
+	ATOM_ENCODER_ATTRIBUTE usDevAttr;
+} DVO_ENCODER_CONTROL_PARAMETERS;
+
+typedef struct _DVO_ENCODER_CONTROL_PS_ALLOCATION {
+	DVO_ENCODER_CONTROL_PARAMETERS sDVOEncoder;
+	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this portion */
+} DVO_ENCODER_CONTROL_PS_ALLOCATION;
+
+#define ATOM_XTMDS_ASIC_SI164_ID        1
+#define ATOM_XTMDS_ASIC_SI178_ID        2
+#define ATOM_XTMDS_ASIC_TFP513_ID       3
+#define ATOM_XTMDS_SUPPORTED_SINGLELINK 0x00000001
+#define ATOM_XTMDS_SUPPORTED_DUALLINK   0x00000002
+#define ATOM_XTMDS_MVPU_FPGA            0x00000004
+
+typedef struct _ATOM_XTMDS_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	USHORT usSingleLinkMaxFrequency;
+	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;	/* Point the ID on which I2C is used to control external chip */
+	UCHAR ucXtransimitterID;
+	UCHAR ucSupportedLink;	/*  Bit field, bit0=1, single link supported;bit1=1,dual link supported */
+	UCHAR ucSequnceAlterID;	/*  Even with the same external TMDS asic, it's possible that the program seqence alters */
+	/*  due to design. This ID is used to alert driver that the sequence is not "standard"! */
+	UCHAR ucMasterAddress;	/*  Address to control Master xTMDS Chip */
+	UCHAR ucSlaveAddress;	/*  Address to control Slave xTMDS Chip */
+} ATOM_XTMDS_INFO;
+
+typedef struct _DFP_DPMS_STATUS_CHANGE_PARAMETERS {
+	UCHAR ucEnable;		/*  ATOM_ENABLE=On or ATOM_DISABLE=Off */
+	UCHAR ucDevice;		/*  ATOM_DEVICE_DFP1_INDEX.... */
+	UCHAR ucPadding[2];
+} DFP_DPMS_STATUS_CHANGE_PARAMETERS;
+
+/****************************Legacy Power Play Table Definitions **********************/
+
+/* Definitions for ulPowerPlayMiscInfo */
+#define ATOM_PM_MISCINFO_SPLIT_CLOCK                     0x00000000L
+#define ATOM_PM_MISCINFO_USING_MCLK_SRC                  0x00000001L
+#define ATOM_PM_MISCINFO_USING_SCLK_SRC                  0x00000002L
+
+#define ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT            0x00000004L
+#define ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH        0x00000008L
+
+#define ATOM_PM_MISCINFO_LOAD_PERFORMANCE_EN             0x00000010L
+
+#define ATOM_PM_MISCINFO_ENGINE_CLOCK_CONTRL_EN          0x00000020L
+#define ATOM_PM_MISCINFO_MEMORY_CLOCK_CONTRL_EN          0x00000040L
+#define ATOM_PM_MISCINFO_PROGRAM_VOLTAGE                 0x00000080L	/* When this bit set, ucVoltageDropIndex is not an index for GPIO pin, but a voltage ID that SW needs program */
+
+#define ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN      0x00000100L
+#define ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN         0x00000200L
+#define ATOM_PM_MISCINFO_ASIC_SLEEP_MODE_EN              0x00000400L
+#define ATOM_PM_MISCINFO_LOAD_BALANCE_EN                 0x00000800L
+#define ATOM_PM_MISCINFO_DEFAULT_DC_STATE_ENTRY_TRUE     0x00001000L
+#define ATOM_PM_MISCINFO_DEFAULT_LOW_DC_STATE_ENTRY_TRUE 0x00002000L
+#define ATOM_PM_MISCINFO_LOW_LCD_REFRESH_RATE            0x00004000L
+
+#define ATOM_PM_MISCINFO_DRIVER_DEFAULT_MODE             0x00008000L
+#define ATOM_PM_MISCINFO_OVER_CLOCK_MODE                 0x00010000L
+#define ATOM_PM_MISCINFO_OVER_DRIVE_MODE                 0x00020000L
+#define ATOM_PM_MISCINFO_POWER_SAVING_MODE               0x00040000L
+#define ATOM_PM_MISCINFO_THERMAL_DIODE_MODE              0x00080000L
+
+#define ATOM_PM_MISCINFO_FRAME_MODULATION_MASK           0x00300000L	/* 0-FM Disable, 1-2 level FM, 2-4 level FM, 3-Reserved */
+#define ATOM_PM_MISCINFO_FRAME_MODULATION_SHIFT          20
+
+#define ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE                 0x00400000L
+#define ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2      0x00800000L
+#define ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4      0x01000000L
+#define ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN            0x02000000L	/* When set, Dynamic */
+#define ATOM_PM_MISCINFO_DYNAMIC_MC_HOST_BLOCK_EN        0x04000000L	/* When set, Dynamic */
+#define ATOM_PM_MISCINFO_3D_ACCELERATION_EN              0x08000000L	/* When set, This mode is for acceleated 3D mode */
+
+#define ATOM_PM_MISCINFO_POWERPLAY_SETTINGS_GROUP_MASK   0x70000000L	/* 1-Optimal Battery Life Group, 2-High Battery, 3-Balanced, 4-High Performance, 5- Optimal Performance (Default state with Default clocks) */
+#define ATOM_PM_MISCINFO_POWERPLAY_SETTINGS_GROUP_SHIFT  28
+#define ATOM_PM_MISCINFO_ENABLE_BACK_BIAS                0x80000000L
+
+#define ATOM_PM_MISCINFO2_SYSTEM_AC_LITE_MODE            0x00000001L
+#define ATOM_PM_MISCINFO2_MULTI_DISPLAY_SUPPORT          0x00000002L
+#define ATOM_PM_MISCINFO2_DYNAMIC_BACK_BIAS_EN           0x00000004L
+#define ATOM_PM_MISCINFO2_FS3D_OVERDRIVE_INFO            0x00000008L
+#define ATOM_PM_MISCINFO2_FORCEDLOWPWR_MODE              0x00000010L
+#define ATOM_PM_MISCINFO2_VDDCI_DYNAMIC_VOLTAGE_EN       0x00000020L
+#define ATOM_PM_MISCINFO2_VIDEO_PLAYBACK_CAPABLE         0x00000040L	/* If this bit is set in multi-pp mode, then driver will pack up one with the minior power consumption. */
+								      /* If it's not set in any pp mode, driver will use its default logic to pick a pp mode in video playback */
+#define ATOM_PM_MISCINFO2_NOT_VALID_ON_DC                0x00000080L
+#define ATOM_PM_MISCINFO2_STUTTER_MODE_EN                0x00000100L
+#define ATOM_PM_MISCINFO2_UVD_SUPPORT_MODE               0x00000200L
+
+/* ucTableFormatRevision=1 */
+/* ucTableContentRevision=1 */
+typedef struct _ATOM_POWERMODE_INFO {
+	ULONG ulMiscInfo;	/* The power level should be arranged in ascending order */
+	ULONG ulReserved1;	/*  must set to 0 */
+	ULONG ulReserved2;	/*  must set to 0 */
+	USHORT usEngineClock;
+	USHORT usMemoryClock;
+	UCHAR ucVoltageDropIndex;	/*  index to GPIO table */
+	UCHAR ucSelectedPanel_RefreshRate;	/*  panel refresh rate */
+	UCHAR ucMinTemperature;
+	UCHAR ucMaxTemperature;
+	UCHAR ucNumPciELanes;	/*  number of PCIE lanes */
+} ATOM_POWERMODE_INFO;
+
+/* ucTableFormatRevision=2 */
+/* ucTableContentRevision=1 */
+typedef struct _ATOM_POWERMODE_INFO_V2 {
+	ULONG ulMiscInfo;	/* The power level should be arranged in ascending order */
+	ULONG ulMiscInfo2;
+	ULONG ulEngineClock;
+	ULONG ulMemoryClock;
+	UCHAR ucVoltageDropIndex;	/*  index to GPIO table */
+	UCHAR ucSelectedPanel_RefreshRate;	/*  panel refresh rate */
+	UCHAR ucMinTemperature;
+	UCHAR ucMaxTemperature;
+	UCHAR ucNumPciELanes;	/*  number of PCIE lanes */
+} ATOM_POWERMODE_INFO_V2;
+
+/* ucTableFormatRevision=2 */
+/* ucTableContentRevision=2 */
+typedef struct _ATOM_POWERMODE_INFO_V3 {
+	ULONG ulMiscInfo;	/* The power level should be arranged in ascending order */
+	ULONG ulMiscInfo2;
+	ULONG ulEngineClock;
+	ULONG ulMemoryClock;
+	UCHAR ucVoltageDropIndex;	/*  index to Core (VDDC) votage table */
+	UCHAR ucSelectedPanel_RefreshRate;	/*  panel refresh rate */
+	UCHAR ucMinTemperature;
+	UCHAR ucMaxTemperature;
+	UCHAR ucNumPciELanes;	/*  number of PCIE lanes */
+	UCHAR ucVDDCI_VoltageDropIndex;	/*  index to VDDCI votage table */
+} ATOM_POWERMODE_INFO_V3;
+
+#define ATOM_MAX_NUMBEROF_POWER_BLOCK  8
+
+#define ATOM_PP_OVERDRIVE_INTBITMAP_AUXWIN            0x01
+#define ATOM_PP_OVERDRIVE_INTBITMAP_OVERDRIVE         0x02
+
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_LM63      0x01
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_ADM1032   0x02
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_ADM1030   0x03
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_MUA6649   0x04
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_LM64      0x05
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_F75375    0x06
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_ASC7512   0x07	/*  Andigilog */
+
+typedef struct _ATOM_POWERPLAY_INFO {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucOverdriveThermalController;
+	UCHAR ucOverdriveI2cLine;
+	UCHAR ucOverdriveIntBitmap;
+	UCHAR ucOverdriveControllerAddress;
+	UCHAR ucSizeOfPowerModeEntry;
+	UCHAR ucNumOfPowerModeEntries;
+	ATOM_POWERMODE_INFO asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
+} ATOM_POWERPLAY_INFO;
+
+typedef struct _ATOM_POWERPLAY_INFO_V2 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucOverdriveThermalController;
+	UCHAR ucOverdriveI2cLine;
+	UCHAR ucOverdriveIntBitmap;
+	UCHAR ucOverdriveControllerAddress;
+	UCHAR ucSizeOfPowerModeEntry;
+	UCHAR ucNumOfPowerModeEntries;
+	ATOM_POWERMODE_INFO_V2 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
+} ATOM_POWERPLAY_INFO_V2;
+
+typedef struct _ATOM_POWERPLAY_INFO_V3 {
+	ATOM_COMMON_TABLE_HEADER sHeader;
+	UCHAR ucOverdriveThermalController;
+	UCHAR ucOverdriveI2cLine;
+	UCHAR ucOverdriveIntBitmap;
+	UCHAR ucOverdriveControllerAddress;
+	UCHAR ucSizeOfPowerModeEntry;
+	UCHAR ucNumOfPowerModeEntries;
+	ATOM_POWERMODE_INFO_V3 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
+} ATOM_POWERPLAY_INFO_V3;
+
+/**************************************************************************/
+
+/*  Following definitions are for compatiblity issue in different SW components. */
+#define ATOM_MASTER_DATA_TABLE_REVISION   0x01
+#define Object_Info												Object_Header
+#define	AdjustARB_SEQ											MC_InitParameter
+#define	VRAM_GPIO_DetectionInfo						VoltageObjectInfo
+#define	ASIC_VDDCI_Info                   ASIC_ProfilingInfo
+#define ASIC_MVDDQ_Info										MemoryTrainingInfo
+#define SS_Info                           PPLL_SS_Info
+#define ASIC_MVDDC_Info                   ASIC_InternalSS_Info
+#define DispDevicePriorityInfo						SaveRestoreInfo
+#define DispOutInfo												TV_VideoMode
+
+#define ATOM_ENCODER_OBJECT_TABLE         ATOM_OBJECT_TABLE
+#define ATOM_CONNECTOR_OBJECT_TABLE       ATOM_OBJECT_TABLE
+
+/* New device naming, remove them when both DAL/VBIOS is ready */
+#define DFP2I_OUTPUT_CONTROL_PARAMETERS    CRT1_OUTPUT_CONTROL_PARAMETERS
+#define DFP2I_OUTPUT_CONTROL_PS_ALLOCATION DFP2I_OUTPUT_CONTROL_PARAMETERS
+
+#define DFP1X_OUTPUT_CONTROL_PARAMETERS    CRT1_OUTPUT_CONTROL_PARAMETERS
+#define DFP1X_OUTPUT_CONTROL_PS_ALLOCATION DFP1X_OUTPUT_CONTROL_PARAMETERS
+
+#define DFP1I_OUTPUT_CONTROL_PARAMETERS    DFP1_OUTPUT_CONTROL_PARAMETERS
+#define DFP1I_OUTPUT_CONTROL_PS_ALLOCATION DFP1_OUTPUT_CONTROL_PS_ALLOCATION
+
+#define ATOM_DEVICE_DFP1I_SUPPORT          ATOM_DEVICE_DFP1_SUPPORT
+#define ATOM_DEVICE_DFP1X_SUPPORT          ATOM_DEVICE_DFP2_SUPPORT
+
+#define ATOM_DEVICE_DFP1I_INDEX            ATOM_DEVICE_DFP1_INDEX
+#define ATOM_DEVICE_DFP1X_INDEX            ATOM_DEVICE_DFP2_INDEX
+
+#define ATOM_DEVICE_DFP2I_INDEX            0x00000009
+#define ATOM_DEVICE_DFP2I_SUPPORT          (0x1L << ATOM_DEVICE_DFP2I_INDEX)
+
+#define ATOM_S0_DFP1I                      ATOM_S0_DFP1
+#define ATOM_S0_DFP1X                      ATOM_S0_DFP2
+
+#define ATOM_S0_DFP2I                      0x00200000L
+#define ATOM_S0_DFP2Ib2                    0x20
+
+#define ATOM_S2_DFP1I_DPMS_STATE           ATOM_S2_DFP1_DPMS_STATE
+#define ATOM_S2_DFP1X_DPMS_STATE           ATOM_S2_DFP2_DPMS_STATE
+
+#define ATOM_S2_DFP2I_DPMS_STATE           0x02000000L
+#define ATOM_S2_DFP2I_DPMS_STATEb3         0x02
+
+#define ATOM_S3_DFP2I_ACTIVEb1             0x02
+
+#define ATOM_S3_DFP1I_ACTIVE               ATOM_S3_DFP1_ACTIVE
+#define ATOM_S3_DFP1X_ACTIVE               ATOM_S3_DFP2_ACTIVE
+
+#define ATOM_S3_DFP2I_ACTIVE               0x00000200L
+
+#define ATOM_S3_DFP1I_CRTC_ACTIVE          ATOM_S3_DFP1_CRTC_ACTIVE
+#define ATOM_S3_DFP1X_CRTC_ACTIVE          ATOM_S3_DFP2_CRTC_ACTIVE
+#define ATOM_S3_DFP2I_CRTC_ACTIVE          0x02000000L
+
+#define ATOM_S3_DFP2I_CRTC_ACTIVEb3        0x02
+#define ATOM_S5_DOS_REQ_DFP2Ib1            0x02
+
+#define ATOM_S5_DOS_REQ_DFP2I              0x0200
+#define ATOM_S6_ACC_REQ_DFP1I              ATOM_S6_ACC_REQ_DFP1
+#define ATOM_S6_ACC_REQ_DFP1X              ATOM_S6_ACC_REQ_DFP2
+
+#define ATOM_S6_ACC_REQ_DFP2Ib3            0x02
+#define ATOM_S6_ACC_REQ_DFP2I              0x02000000L
+
+#define TMDS1XEncoderControl               DVOEncoderControl
+#define DFP1XOutputControl                 DVOOutputControl
+
+#define ExternalDFPOutputControl           DFP1XOutputControl
+#define EnableExternalTMDS_Encoder         TMDS1XEncoderControl
+
+#define DFP1IOutputControl                 TMDSAOutputControl
+#define DFP2IOutputControl                 LVTMAOutputControl
+
+#define DAC1_ENCODER_CONTROL_PARAMETERS    DAC_ENCODER_CONTROL_PARAMETERS
+#define DAC1_ENCODER_CONTROL_PS_ALLOCATION DAC_ENCODER_CONTROL_PS_ALLOCATION
+
+#define DAC2_ENCODER_CONTROL_PARAMETERS    DAC_ENCODER_CONTROL_PARAMETERS
+#define DAC2_ENCODER_CONTROL_PS_ALLOCATION DAC_ENCODER_CONTROL_PS_ALLOCATION
+
+#define ucDac1Standard  ucDacStandard
+#define ucDac2Standard  ucDacStandard
+
+#define TMDS1EncoderControl TMDSAEncoderControl
+#define TMDS2EncoderControl LVTMAEncoderControl
+
+#define DFP1OutputControl   TMDSAOutputControl
+#define DFP2OutputControl   LVTMAOutputControl
+#define CRT1OutputControl   DAC1OutputControl
+#define CRT2OutputControl   DAC2OutputControl
+
+/* These two lines will be removed for sure in a few days, will follow up with Michael V. */
+#define EnableLVDS_SS   EnableSpreadSpectrumOnPPLL
+#define ENABLE_LVDS_SS_PARAMETERS_V3  ENABLE_SPREAD_SPECTRUM_ON_PPLL
+
+/*********************************************************************************/
+
+#pragma pack()			/*  BIOS data must use byte aligment */
+
+#endif /* _ATOMBIOS_H */
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
new file mode 100644
index 00000000000..c0080cc9bf8
--- /dev/null
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/radeon_drm.h>
+#include "radeon_fixed.h"
+#include "radeon.h"
+#include "atom.h"
+#include "atom-bits.h"
+
+static void atombios_lock_crtc(struct drm_crtc *crtc, int lock)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int index =
+	    GetIndexIntoMasterTable(COMMAND, UpdateCRTC_DoubleBufferRegisters);
+	ENABLE_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = radeon_crtc->crtc_id;
+	args.ucEnable = lock;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+static void atombios_enable_crtc(struct drm_crtc *crtc, int state)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableCRTC);
+	ENABLE_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = radeon_crtc->crtc_id;
+	args.ucEnable = state;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+static void atombios_enable_crtc_memreq(struct drm_crtc *crtc, int state)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableCRTCMemReq);
+	ENABLE_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = radeon_crtc->crtc_id;
+	args.ucEnable = state;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+static void atombios_blank_crtc(struct drm_crtc *crtc, int state)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int index = GetIndexIntoMasterTable(COMMAND, BlankCRTC);
+	BLANK_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = radeon_crtc->crtc_id;
+	args.ucBlanking = state;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		if (ASIC_IS_DCE3(rdev))
+			atombios_enable_crtc_memreq(crtc, 1);
+		atombios_enable_crtc(crtc, 1);
+		atombios_blank_crtc(crtc, 0);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		atombios_blank_crtc(crtc, 1);
+		atombios_enable_crtc(crtc, 0);
+		if (ASIC_IS_DCE3(rdev))
+			atombios_enable_crtc_memreq(crtc, 0);
+		break;
+	}
+
+	if (mode != DRM_MODE_DPMS_OFF) {
+		radeon_crtc_load_lut(crtc);
+	}
+}
+
+static void
+atombios_set_crtc_dtd_timing(struct drm_crtc *crtc,
+			     SET_CRTC_USING_DTD_TIMING_PARAMETERS * crtc_param)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	SET_CRTC_USING_DTD_TIMING_PARAMETERS conv_param;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_UsingDTDTiming);
+
+	conv_param.usH_Size = cpu_to_le16(crtc_param->usH_Size);
+	conv_param.usH_Blanking_Time =
+	    cpu_to_le16(crtc_param->usH_Blanking_Time);
+	conv_param.usV_Size = cpu_to_le16(crtc_param->usV_Size);
+	conv_param.usV_Blanking_Time =
+	    cpu_to_le16(crtc_param->usV_Blanking_Time);
+	conv_param.usH_SyncOffset = cpu_to_le16(crtc_param->usH_SyncOffset);
+	conv_param.usH_SyncWidth = cpu_to_le16(crtc_param->usH_SyncWidth);
+	conv_param.usV_SyncOffset = cpu_to_le16(crtc_param->usV_SyncOffset);
+	conv_param.usV_SyncWidth = cpu_to_le16(crtc_param->usV_SyncWidth);
+	conv_param.susModeMiscInfo.usAccess =
+	    cpu_to_le16(crtc_param->susModeMiscInfo.usAccess);
+	conv_param.ucCRTC = crtc_param->ucCRTC;
+
+	printk("executing set crtc dtd timing\n");
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&conv_param);
+}
+
+void atombios_crtc_set_timing(struct drm_crtc *crtc,
+			      SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION *
+			      crtc_param)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION conv_param;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_Timing);
+
+	conv_param.usH_Total = cpu_to_le16(crtc_param->usH_Total);
+	conv_param.usH_Disp = cpu_to_le16(crtc_param->usH_Disp);
+	conv_param.usH_SyncStart = cpu_to_le16(crtc_param->usH_SyncStart);
+	conv_param.usH_SyncWidth = cpu_to_le16(crtc_param->usH_SyncWidth);
+	conv_param.usV_Total = cpu_to_le16(crtc_param->usV_Total);
+	conv_param.usV_Disp = cpu_to_le16(crtc_param->usV_Disp);
+	conv_param.usV_SyncStart = cpu_to_le16(crtc_param->usV_SyncStart);
+	conv_param.usV_SyncWidth = cpu_to_le16(crtc_param->usV_SyncWidth);
+	conv_param.susModeMiscInfo.usAccess =
+	    cpu_to_le16(crtc_param->susModeMiscInfo.usAccess);
+	conv_param.ucCRTC = crtc_param->ucCRTC;
+	conv_param.ucOverscanRight = crtc_param->ucOverscanRight;
+	conv_param.ucOverscanLeft = crtc_param->ucOverscanLeft;
+	conv_param.ucOverscanBottom = crtc_param->ucOverscanBottom;
+	conv_param.ucOverscanTop = crtc_param->ucOverscanTop;
+	conv_param.ucReserved = crtc_param->ucReserved;
+
+	printk("executing set crtc timing\n");
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&conv_param);
+}
+
+void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *encoder = NULL;
+	struct radeon_encoder *radeon_encoder = NULL;
+	uint8_t frev, crev;
+	int index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+	SET_PIXEL_CLOCK_PS_ALLOCATION args;
+	PIXEL_CLOCK_PARAMETERS *spc1_ptr;
+	PIXEL_CLOCK_PARAMETERS_V2 *spc2_ptr;
+	PIXEL_CLOCK_PARAMETERS_V3 *spc3_ptr;
+	uint32_t sclock = mode->clock;
+	uint32_t ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0;
+	struct radeon_pll *pll;
+	int pll_flags = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		uint32_t ss_cntl;
+
+		if (ASIC_IS_DCE32(rdev) && mode->clock > 200000)	/* range limits??? */
+			pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
+		else
+			pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV;
+
+		/* disable spread spectrum clocking for now -- thanks Hedy Lamarr */
+		if (radeon_crtc->crtc_id == 0) {
+			ss_cntl = RREG32(AVIVO_P1PLL_INT_SS_CNTL);
+			WREG32(AVIVO_P1PLL_INT_SS_CNTL, ss_cntl & ~1);
+		} else {
+			ss_cntl = RREG32(AVIVO_P2PLL_INT_SS_CNTL);
+			WREG32(AVIVO_P2PLL_INT_SS_CNTL, ss_cntl & ~1);
+		}
+	} else {
+		pll_flags |= RADEON_PLL_LEGACY;
+
+		if (mode->clock > 200000)	/* range limits??? */
+			pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
+		else
+			pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV;
+
+	}
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc == crtc) {
+			if (!ASIC_IS_AVIVO(rdev)) {
+				if (encoder->encoder_type !=
+				    DRM_MODE_ENCODER_DAC)
+					pll_flags |= RADEON_PLL_NO_ODD_POST_DIV;
+				if (!ASIC_IS_AVIVO(rdev)
+				    && (encoder->encoder_type ==
+					DRM_MODE_ENCODER_LVDS))
+					pll_flags |= RADEON_PLL_USE_REF_DIV;
+			}
+			radeon_encoder = to_radeon_encoder(encoder);
+		}
+	}
+
+	if (radeon_crtc->crtc_id == 0)
+		pll = &rdev->clock.p1pll;
+	else
+		pll = &rdev->clock.p2pll;
+
+	radeon_compute_pll(pll, mode->clock, &sclock, &fb_div, &frac_fb_div,
+			   &ref_div, &post_div, pll_flags);
+
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev,
+			      &crev);
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+			spc1_ptr = (PIXEL_CLOCK_PARAMETERS *) & args.sPCLKInput;
+			spc1_ptr->usPixelClock = cpu_to_le16(sclock);
+			spc1_ptr->usRefDiv = cpu_to_le16(ref_div);
+			spc1_ptr->usFbDiv = cpu_to_le16(fb_div);
+			spc1_ptr->ucFracFbDiv = frac_fb_div;
+			spc1_ptr->ucPostDiv = post_div;
+			spc1_ptr->ucPpll =
+			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			spc1_ptr->ucCRTC = radeon_crtc->crtc_id;
+			spc1_ptr->ucRefDivSrc = 1;
+			break;
+		case 2:
+			spc2_ptr =
+			    (PIXEL_CLOCK_PARAMETERS_V2 *) & args.sPCLKInput;
+			spc2_ptr->usPixelClock = cpu_to_le16(sclock);
+			spc2_ptr->usRefDiv = cpu_to_le16(ref_div);
+			spc2_ptr->usFbDiv = cpu_to_le16(fb_div);
+			spc2_ptr->ucFracFbDiv = frac_fb_div;
+			spc2_ptr->ucPostDiv = post_div;
+			spc2_ptr->ucPpll =
+			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			spc2_ptr->ucCRTC = radeon_crtc->crtc_id;
+			spc2_ptr->ucRefDivSrc = 1;
+			break;
+		case 3:
+			if (!encoder)
+				return;
+			spc3_ptr =
+			    (PIXEL_CLOCK_PARAMETERS_V3 *) & args.sPCLKInput;
+			spc3_ptr->usPixelClock = cpu_to_le16(sclock);
+			spc3_ptr->usRefDiv = cpu_to_le16(ref_div);
+			spc3_ptr->usFbDiv = cpu_to_le16(fb_div);
+			spc3_ptr->ucFracFbDiv = frac_fb_div;
+			spc3_ptr->ucPostDiv = post_div;
+			spc3_ptr->ucPpll =
+			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			spc3_ptr->ucMiscInfo = (radeon_crtc->crtc_id << 2);
+			spc3_ptr->ucTransmitterId = radeon_encoder->encoder_id;
+			spc3_ptr->ucEncoderMode =
+			    atombios_get_encoder_mode(encoder);
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return;
+	}
+
+	printk("executing set pll\n");
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+			   struct drm_framebuffer *old_fb)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_framebuffer *radeon_fb;
+	struct drm_gem_object *obj;
+	struct drm_radeon_gem_object *obj_priv;
+	uint64_t fb_location;
+	uint32_t fb_format, fb_pitch_pixels;
+
+	if (!crtc->fb)
+		return -EINVAL;
+
+	radeon_fb = to_radeon_framebuffer(crtc->fb);
+
+	obj = radeon_fb->obj;
+	obj_priv = obj->driver_private;
+
+	if (radeon_gem_object_pin(obj, RADEON_GEM_DOMAIN_VRAM, &fb_location)) {
+		return -EINVAL;
+	}
+
+	switch (crtc->fb->bits_per_pixel) {
+	case 15:
+		fb_format =
+		    AVIVO_D1GRPH_CONTROL_DEPTH_16BPP |
+		    AVIVO_D1GRPH_CONTROL_16BPP_ARGB1555;
+		break;
+	case 16:
+		fb_format =
+		    AVIVO_D1GRPH_CONTROL_DEPTH_16BPP |
+		    AVIVO_D1GRPH_CONTROL_16BPP_RGB565;
+		break;
+	case 24:
+	case 32:
+		fb_format =
+		    AVIVO_D1GRPH_CONTROL_DEPTH_32BPP |
+		    AVIVO_D1GRPH_CONTROL_32BPP_ARGB8888;
+		break;
+	default:
+		DRM_ERROR("Unsupported screen depth %d\n",
+			  crtc->fb->bits_per_pixel);
+		return -EINVAL;
+	}
+
+	/* TODO tiling */
+	if (radeon_crtc->crtc_id == 0)
+		WREG32(AVIVO_D1VGA_CONTROL, 0);
+	else
+		WREG32(AVIVO_D2VGA_CONTROL, 0);
+	WREG32(AVIVO_D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+	       (u32) fb_location);
+	WREG32(AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS +
+	       radeon_crtc->crtc_offset, (u32) fb_location);
+	WREG32(AVIVO_D1GRPH_CONTROL + radeon_crtc->crtc_offset, fb_format);
+
+	WREG32(AVIVO_D1GRPH_SURFACE_OFFSET_X + radeon_crtc->crtc_offset, 0);
+	WREG32(AVIVO_D1GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0);
+	WREG32(AVIVO_D1GRPH_X_START + radeon_crtc->crtc_offset, 0);
+	WREG32(AVIVO_D1GRPH_Y_START + radeon_crtc->crtc_offset, 0);
+	WREG32(AVIVO_D1GRPH_X_END + radeon_crtc->crtc_offset, crtc->fb->width);
+	WREG32(AVIVO_D1GRPH_Y_END + radeon_crtc->crtc_offset, crtc->fb->height);
+
+	fb_pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8);
+	WREG32(AVIVO_D1GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels);
+	WREG32(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 1);
+
+	WREG32(AVIVO_D1MODE_DESKTOP_HEIGHT + radeon_crtc->crtc_offset,
+	       crtc->mode.vdisplay);
+	x &= ~3;
+	y &= ~1;
+	WREG32(AVIVO_D1MODE_VIEWPORT_START + radeon_crtc->crtc_offset,
+	       (x << 16) | y);
+	WREG32(AVIVO_D1MODE_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
+	       (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+
+	if (crtc->mode.flags & DRM_MODE_FLAG_INTERLACE)
+		WREG32(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset,
+		       AVIVO_D1MODE_INTERLEAVE_EN);
+	else
+		WREG32(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset, 0);
+
+	if (old_fb && old_fb != crtc->fb) {
+		radeon_fb = to_radeon_framebuffer(old_fb);
+		radeon_gem_object_unpin(radeon_fb->obj);
+	}
+	return 0;
+}
+
+int atombios_crtc_mode_set(struct drm_crtc *crtc,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode,
+			   int x, int y, struct drm_framebuffer *old_fb)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *encoder;
+	SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION crtc_timing;
+
+	/* TODO color tiling */
+	memset(&crtc_timing, 0, sizeof(crtc_timing));
+
+	/* TODO tv */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+	}
+
+	crtc_timing.ucCRTC = radeon_crtc->crtc_id;
+	crtc_timing.usH_Total = adjusted_mode->crtc_htotal;
+	crtc_timing.usH_Disp = adjusted_mode->crtc_hdisplay;
+	crtc_timing.usH_SyncStart = adjusted_mode->crtc_hsync_start;
+	crtc_timing.usH_SyncWidth =
+	    adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start;
+
+	crtc_timing.usV_Total = adjusted_mode->crtc_vtotal;
+	crtc_timing.usV_Disp = adjusted_mode->crtc_vdisplay;
+	crtc_timing.usV_SyncStart = adjusted_mode->crtc_vsync_start;
+	crtc_timing.usV_SyncWidth =
+	    adjusted_mode->crtc_vsync_end - adjusted_mode->crtc_vsync_start;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
+		crtc_timing.susModeMiscInfo.usAccess |= ATOM_VSYNC_POLARITY;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
+		crtc_timing.susModeMiscInfo.usAccess |= ATOM_HSYNC_POLARITY;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_CSYNC)
+		crtc_timing.susModeMiscInfo.usAccess |= ATOM_COMPOSITESYNC;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
+		crtc_timing.susModeMiscInfo.usAccess |= ATOM_INTERLACE;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		crtc_timing.susModeMiscInfo.usAccess |= ATOM_DOUBLE_CLOCK_MODE;
+
+	atombios_crtc_set_pll(crtc, adjusted_mode);
+	atombios_crtc_set_timing(crtc, &crtc_timing);
+
+	if (ASIC_IS_AVIVO(rdev))
+		atombios_crtc_set_base(crtc, x, y, old_fb);
+	else {
+		if (radeon_crtc->crtc_id == 0) {
+			SET_CRTC_USING_DTD_TIMING_PARAMETERS crtc_dtd_timing;
+			memset(&crtc_dtd_timing, 0, sizeof(crtc_dtd_timing));
+
+			/* setup FP shadow regs on R4xx */
+			crtc_dtd_timing.ucCRTC = radeon_crtc->crtc_id;
+			crtc_dtd_timing.usH_Size = adjusted_mode->crtc_hdisplay;
+			crtc_dtd_timing.usV_Size = adjusted_mode->crtc_vdisplay;
+			crtc_dtd_timing.usH_Blanking_Time =
+			    adjusted_mode->crtc_hblank_end -
+			    adjusted_mode->crtc_hdisplay;
+			crtc_dtd_timing.usV_Blanking_Time =
+			    adjusted_mode->crtc_vblank_end -
+			    adjusted_mode->crtc_vdisplay;
+			crtc_dtd_timing.usH_SyncOffset =
+			    adjusted_mode->crtc_hsync_start -
+			    adjusted_mode->crtc_hdisplay;
+			crtc_dtd_timing.usV_SyncOffset =
+			    adjusted_mode->crtc_vsync_start -
+			    adjusted_mode->crtc_vdisplay;
+			crtc_dtd_timing.usH_SyncWidth =
+			    adjusted_mode->crtc_hsync_end -
+			    adjusted_mode->crtc_hsync_start;
+			crtc_dtd_timing.usV_SyncWidth =
+			    adjusted_mode->crtc_vsync_end -
+			    adjusted_mode->crtc_vsync_start;
+			/* crtc_dtd_timing.ucH_Border = adjusted_mode->crtc_hborder; */
+			/* crtc_dtd_timing.ucV_Border = adjusted_mode->crtc_vborder; */
+
+			if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
+				crtc_dtd_timing.susModeMiscInfo.usAccess |=
+				    ATOM_VSYNC_POLARITY;
+
+			if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
+				crtc_dtd_timing.susModeMiscInfo.usAccess |=
+				    ATOM_HSYNC_POLARITY;
+
+			if (adjusted_mode->flags & DRM_MODE_FLAG_CSYNC)
+				crtc_dtd_timing.susModeMiscInfo.usAccess |=
+				    ATOM_COMPOSITESYNC;
+
+			if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
+				crtc_dtd_timing.susModeMiscInfo.usAccess |=
+				    ATOM_INTERLACE;
+
+			if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+				crtc_dtd_timing.susModeMiscInfo.usAccess |=
+				    ATOM_DOUBLE_CLOCK_MODE;
+
+			atombios_set_crtc_dtd_timing(crtc, &crtc_dtd_timing);
+		}
+		radeon_crtc_set_base(crtc, x, y, old_fb);
+		radeon_legacy_atom_set_surface(crtc);
+	}
+	return 0;
+}
+
+static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc,
+				     struct drm_display_mode *mode,
+				     struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void atombios_crtc_prepare(struct drm_crtc *crtc)
+{
+	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	atombios_lock_crtc(crtc, 1);
+}
+
+static void atombios_crtc_commit(struct drm_crtc *crtc)
+{
+	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+	atombios_lock_crtc(crtc, 0);
+}
+
+static const struct drm_crtc_helper_funcs atombios_helper_funcs = {
+	.dpms = atombios_crtc_dpms,
+	.mode_fixup = atombios_crtc_mode_fixup,
+	.mode_set = atombios_crtc_mode_set,
+	.mode_set_base = atombios_crtc_set_base,
+	.prepare = atombios_crtc_prepare,
+	.commit = atombios_crtc_commit,
+};
+
+void radeon_atombios_init_crtc(struct drm_device *dev,
+			       struct radeon_crtc *radeon_crtc)
+{
+	if (radeon_crtc->crtc_id == 1)
+		radeon_crtc->crtc_offset =
+		    AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
+	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
+}
+
+void radeon_init_disp_bw_avivo(struct drm_device *dev,
+			       struct drm_display_mode *mode1,
+			       uint32_t pixel_bytes1,
+			       struct drm_display_mode *mode2,
+			       uint32_t pixel_bytes2)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	fixed20_12 min_mem_eff;
+	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
+	fixed20_12 sclk_ff, mclk_ff;
+	uint32_t dc_lb_memory_split, temp;
+
+	min_mem_eff.full = rfixed_const_8(0);
+	if (rdev->disp_priority == 2) {
+		uint32_t mc_init_misc_lat_timer = 0;
+		if (rdev->family == CHIP_RV515)
+			mc_init_misc_lat_timer =
+			    RREG32_MC(RV515_MC_INIT_MISC_LAT_TIMER);
+		else if (rdev->family == CHIP_RS690)
+			mc_init_misc_lat_timer =
+			    RREG32_MC(RS690_MC_INIT_MISC_LAT_TIMER);
+
+		mc_init_misc_lat_timer &=
+		    ~(R300_MC_DISP1R_INIT_LAT_MASK <<
+		      R300_MC_DISP1R_INIT_LAT_SHIFT);
+		mc_init_misc_lat_timer &=
+		    ~(R300_MC_DISP0R_INIT_LAT_MASK <<
+		      R300_MC_DISP0R_INIT_LAT_SHIFT);
+
+		if (mode2)
+			mc_init_misc_lat_timer |=
+			    (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
+		if (mode1)
+			mc_init_misc_lat_timer |=
+			    (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
+
+		if (rdev->family == CHIP_RV515)
+			WREG32_MC(RV515_MC_INIT_MISC_LAT_TIMER,
+				  mc_init_misc_lat_timer);
+		else if (rdev->family == CHIP_RS690)
+			WREG32_MC(RS690_MC_INIT_MISC_LAT_TIMER,
+				  mc_init_misc_lat_timer);
+	}
+
+	/*
+	 * determine is there is enough bw for current mode
+	 */
+	temp_ff.full = rfixed_const(100);
+	mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
+	mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
+	sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
+	sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
+
+	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
+	temp_ff.full = rfixed_const(temp);
+	mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
+	mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
+
+	pix_clk.full = 0;
+	pix_clk2.full = 0;
+	peak_disp_bw.full = 0;
+	if (mode1) {
+		temp_ff.full = rfixed_const(1000);
+		pix_clk.full = rfixed_const(mode1->clock);	/* convert to fixed point */
+		pix_clk.full = rfixed_div(pix_clk, temp_ff);
+		temp_ff.full = rfixed_const(pixel_bytes1);
+		peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
+	}
+	if (mode2) {
+		temp_ff.full = rfixed_const(1000);
+		pix_clk2.full = rfixed_const(mode2->clock);	/* convert to fixed point */
+		pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
+		temp_ff.full = rfixed_const(pixel_bytes2);
+		peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
+	}
+
+	if (peak_disp_bw.full >= mem_bw.full) {
+		DRM_ERROR
+		    ("You may not have enough display bandwidth for current mode\n"
+		     "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
+		printk("peak disp bw %d, mem_bw %d\n",
+		       rfixed_trunc(peak_disp_bw), rfixed_trunc(mem_bw));
+	}
+
+	/*
+	 * Line Buffer Setup
+	 * There is a single line buffer shared by both display controllers.
+	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between the display
+	 * controllers.  The paritioning can either be done manually or via one of four
+	 * preset allocations specified in bits 1:0:
+	 * 0 - line buffer is divided in half and shared between each display controller
+	 * 1 - D1 gets 3/4 of the line buffer, D2 gets 1/4
+	 * 2 - D1 gets the whole buffer
+	 * 3 - D1 gets 1/4 of the line buffer, D2 gets 3/4
+	 * Setting bit 2 of DC_LB_MEMORY_SPLIT controls switches to manual allocation mode.
+	 * In manual allocation mode, D1 always starts at 0, D1 end/2 is specified in bits
+	 * 14:4; D2 allocation follows D1.
+	 */
+
+	/* is auto or manual better ? */
+	dc_lb_memory_split =
+	    RREG32(AVIVO_DC_LB_MEMORY_SPLIT) & ~AVIVO_DC_LB_MEMORY_SPLIT_MASK;
+	dc_lb_memory_split &= ~AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE;
+#if 1
+	/* auto */
+	if (mode1 && mode2) {
+		if (mode1->hdisplay > mode2->hdisplay) {
+			if (mode1->hdisplay > 2560)
+				dc_lb_memory_split |=
+				    AVIVO_DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q;
+			else
+				dc_lb_memory_split |=
+				    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+		} else if (mode2->hdisplay > mode1->hdisplay) {
+			if (mode2->hdisplay > 2560)
+				dc_lb_memory_split |=
+				    AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
+			else
+				dc_lb_memory_split |=
+				    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+		} else
+			dc_lb_memory_split |=
+			    AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF;
+	} else if (mode1) {
+		dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_ONLY;
+	} else if (mode2) {
+		dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
+	}
+#else
+	/* manual */
+	dc_lb_memory_split |= AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE;
+	dc_lb_memory_split &=
+	    ~(AVIVO_DC_LB_DISP1_END_ADR_MASK <<
+	      AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
+	if (mode1) {
+		dc_lb_memory_split |=
+		    ((((mode1->hdisplay / 2) + 64) & AVIVO_DC_LB_DISP1_END_ADR_MASK)
+		     << AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
+	} else if (mode2) {
+		dc_lb_memory_split |= (0 << AVIVO_DC_LB_DISP1_END_ADR_SHIFT);
+	}
+#endif
+	WREG32(AVIVO_DC_LB_MEMORY_SPLIT, dc_lb_memory_split);
+}
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
new file mode 100644
index 00000000000..5225f5be7ea
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -0,0 +1,1524 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/seq_file.h>
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_microcode.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* This files gather functions specifics to:
+ * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void r100_hdp_reset(struct radeon_device *rdev);
+void r100_gpu_init(struct radeon_device *rdev);
+int r100_gui_wait_for_idle(struct radeon_device *rdev);
+int r100_mc_wait_for_idle(struct radeon_device *rdev);
+void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
+void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
+int r100_debugfs_mc_info_init(struct radeon_device *rdev);
+
+
+/*
+ * PCI GART
+ */
+void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
+{
+	/* TODO: can we do somethings here ? */
+	/* It seems hw only cache one entry so we should discard this
+	 * entry otherwise if first GPU GART read hit this entry it
+	 * could end up in wrong address. */
+}
+
+int r100_pci_gart_enable(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	if (rdev->gart.table.ram.ptr == NULL) {
+		rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
+		r = radeon_gart_table_ram_alloc(rdev);
+		if (r) {
+			return r;
+		}
+	}
+	/* discard memory request outside of configured range */
+	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
+	WREG32(RADEON_AIC_CNTL, tmp);
+	/* set address range for PCI address translate */
+	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	WREG32(RADEON_AIC_HI_ADDR, tmp);
+	/* Enable bus mastering */
+	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
+	WREG32(RADEON_BUS_CNTL, tmp);
+	/* set PCI GART page-table base address */
+	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
+	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
+	WREG32(RADEON_AIC_CNTL, tmp);
+	r100_pci_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void r100_pci_gart_disable(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	/* discard memory request outside of configured range */
+	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
+	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
+	WREG32(RADEON_AIC_LO_ADDR, 0);
+	WREG32(RADEON_AIC_HI_ADDR, 0);
+}
+
+int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+{
+	if (i < 0 || i > rdev->gart.num_gpu_pages) {
+		return -EINVAL;
+	}
+	rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr);
+	return 0;
+}
+
+int r100_gart_enable(struct radeon_device *rdev)
+{
+	if (rdev->flags & RADEON_IS_AGP) {
+		r100_pci_gart_disable(rdev);
+		return 0;
+	}
+	return r100_pci_gart_enable(rdev);
+}
+
+
+/*
+ * MC
+ */
+void r100_mc_disable_clients(struct radeon_device *rdev)
+{
+	uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
+
+	/* FIXME: is this function correct for rs100,rs200,rs300 ? */
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	/* stop display and memory access */
+	ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
+	WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
+	crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
+	WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
+	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
+
+	r100_gpu_wait_for_vsync(rdev);
+
+	WREG32(RADEON_CRTC_GEN_CNTL,
+	       (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
+	       RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
+
+	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
+		crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+
+		r100_gpu_wait_for_vsync2(rdev);
+		WREG32(RADEON_CRTC2_GEN_CNTL,
+		       (crtc2_gen_cntl &
+		        ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
+		       RADEON_CRTC2_DISP_REQ_EN_B);
+	}
+
+	udelay(500);
+}
+
+void r100_mc_setup(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	r = r100_debugfs_mc_info_init(rdev);
+	if (r) {
+		DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
+	}
+	/* Write VRAM size in case we are limiting it */
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32(RADEON_MC_FB_LOCATION, tmp);
+
+	/* Enable bus mastering */
+	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
+	WREG32(RADEON_BUS_CNTL, tmp);
+
+	if (rdev->flags & RADEON_IS_AGP) {
+		tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+		tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
+		tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
+		WREG32(RADEON_MC_AGP_LOCATION, tmp);
+		WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
+	} else {
+		WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
+		WREG32(RADEON_AGP_BASE, 0);
+	}
+
+	tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
+	tmp |= (7 << 28);
+	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
+	(void)RREG32(RADEON_HOST_PATH_CNTL);
+	WREG32(RADEON_HOST_PATH_CNTL, tmp);
+	(void)RREG32(RADEON_HOST_PATH_CNTL);
+}
+
+int r100_mc_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+
+	r100_gpu_init(rdev);
+	/* Disable gart which also disable out of gart access */
+	r100_pci_gart_disable(rdev);
+
+	/* Setup GPU memory space */
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	rdev->mc.gtt_location = 0xFFFFFFFFUL;
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			printk(KERN_WARNING "[drm] Disabling AGP\n");
+			rdev->flags &= ~RADEON_IS_AGP;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		} else {
+			rdev->mc.gtt_location = rdev->mc.agp_base;
+		}
+	}
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	r100_mc_disable_clients(rdev);
+	if (r100_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	r100_mc_setup(rdev);
+	return 0;
+}
+
+void r100_mc_fini(struct radeon_device *rdev)
+{
+	r100_pci_gart_disable(rdev);
+	radeon_gart_table_ram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Fence emission
+ */
+void r100_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence)
+{
+	/* Who ever call radeon_fence_emit should call ring_lock and ask
+	 * for enough space (today caller are ib schedule and buffer move) */
+	/* Wait until IDLE & CLEAN */
+	radeon_ring_write(rdev, PACKET0(0x1720, 0));
+	radeon_ring_write(rdev, (1 << 16) | (1 << 17));
+	/* Emit fence sequence & fire IRQ */
+	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
+	radeon_ring_write(rdev, fence->seq);
+	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
+	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
+}
+
+
+/*
+ * Writeback
+ */
+int r100_wb_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->wb.wb_obj == NULL) {
+		r = radeon_object_create(rdev, NULL, 4096,
+					 true,
+					 RADEON_GEM_DOMAIN_GTT,
+					 false, &rdev->wb.wb_obj);
+		if (r) {
+			DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
+			return r;
+		}
+		r = radeon_object_pin(rdev->wb.wb_obj,
+				      RADEON_GEM_DOMAIN_GTT,
+				      &rdev->wb.gpu_addr);
+		if (r) {
+			DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
+			return r;
+		}
+		r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
+		if (r) {
+			DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
+			return r;
+		}
+	}
+	WREG32(0x774, rdev->wb.gpu_addr);
+	WREG32(0x70C, rdev->wb.gpu_addr + 1024);
+	WREG32(0x770, 0xff);
+	return 0;
+}
+
+void r100_wb_fini(struct radeon_device *rdev)
+{
+	if (rdev->wb.wb_obj) {
+		radeon_object_kunmap(rdev->wb.wb_obj);
+		radeon_object_unpin(rdev->wb.wb_obj);
+		radeon_object_unref(&rdev->wb.wb_obj);
+		rdev->wb.wb = NULL;
+		rdev->wb.wb_obj = NULL;
+	}
+}
+
+int r100_copy_blit(struct radeon_device *rdev,
+		   uint64_t src_offset,
+		   uint64_t dst_offset,
+		   unsigned num_pages,
+		   struct radeon_fence *fence)
+{
+	uint32_t cur_pages;
+	uint32_t stride_bytes = PAGE_SIZE;
+	uint32_t pitch;
+	uint32_t stride_pixels;
+	unsigned ndw;
+	int num_loops;
+	int r = 0;
+
+	/* radeon limited to 16k stride */
+	stride_bytes &= 0x3fff;
+	/* radeon pitch is /64 */
+	pitch = stride_bytes / 64;
+	stride_pixels = stride_bytes / 4;
+	num_loops = DIV_ROUND_UP(num_pages, 8191);
+
+	/* Ask for enough room for blit + flush + fence */
+	ndw = 64 + (10 * num_loops);
+	r = radeon_ring_lock(rdev, ndw);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
+		return -EINVAL;
+	}
+	while (num_pages > 0) {
+		cur_pages = num_pages;
+		if (cur_pages > 8191) {
+			cur_pages = 8191;
+		}
+		num_pages -= cur_pages;
+
+		/* pages are in Y direction - height
+		   page width in X direction - width */
+		radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
+		radeon_ring_write(rdev,
+				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+				  RADEON_GMC_SRC_CLIPPING |
+				  RADEON_GMC_DST_CLIPPING |
+				  RADEON_GMC_BRUSH_NONE |
+				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
+				  RADEON_GMC_SRC_DATATYPE_COLOR |
+				  RADEON_ROP3_S |
+				  RADEON_DP_SRC_SOURCE_MEMORY |
+				  RADEON_GMC_CLR_CMP_CNTL_DIS |
+				  RADEON_GMC_WR_MSK_DIS);
+		radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
+		radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
+		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
+		radeon_ring_write(rdev, 0);
+		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
+		radeon_ring_write(rdev, num_pages);
+		radeon_ring_write(rdev, num_pages);
+		radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_WAIT_2D_IDLECLEAN |
+			  RADEON_WAIT_HOST_IDLECLEAN |
+			  RADEON_WAIT_DMA_GUI_IDLE);
+	if (fence) {
+		r = radeon_fence_emit(rdev, fence);
+	}
+	radeon_ring_unlock_commit(rdev);
+	return r;
+}
+
+
+/*
+ * CP
+ */
+void r100_ring_start(struct radeon_device *rdev)
+{
+	int r;
+
+	r = radeon_ring_lock(rdev, 2);
+	if (r) {
+		return;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_ISYNC_ANY2D_IDLE3D |
+			  RADEON_ISYNC_ANY3D_IDLE2D |
+			  RADEON_ISYNC_WAIT_IDLEGUI |
+			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
+	radeon_ring_unlock_commit(rdev);
+}
+
+static void r100_cp_load_microcode(struct radeon_device *rdev)
+{
+	int i;
+
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	WREG32(RADEON_CP_ME_RAM_ADDR, 0);
+	if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
+	    (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
+	    (rdev->family == CHIP_RS200)) {
+		DRM_INFO("Loading R100 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]);
+		}
+	} else if ((rdev->family == CHIP_R200) ||
+		   (rdev->family == CHIP_RV250) ||
+		   (rdev->family == CHIP_RV280) ||
+		   (rdev->family == CHIP_RS300)) {
+		DRM_INFO("Loading R200 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]);
+		}
+	} else if ((rdev->family == CHIP_R300) ||
+		   (rdev->family == CHIP_R350) ||
+		   (rdev->family == CHIP_RV350) ||
+		   (rdev->family == CHIP_RV380) ||
+		   (rdev->family == CHIP_RS400) ||
+		   (rdev->family == CHIP_RS480)) {
+		DRM_INFO("Loading R300 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]);
+		}
+	} else if ((rdev->family == CHIP_R420) ||
+		   (rdev->family == CHIP_R423) ||
+		   (rdev->family == CHIP_RV410)) {
+		DRM_INFO("Loading R400 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]);
+		}
+	} else if ((rdev->family == CHIP_RS690) ||
+		   (rdev->family == CHIP_RS740)) {
+		DRM_INFO("Loading RS690/RS740 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]);
+		}
+	} else if (rdev->family == CHIP_RS600) {
+		DRM_INFO("Loading RS600 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]);
+		}
+	} else if ((rdev->family == CHIP_RV515) ||
+		   (rdev->family == CHIP_R520) ||
+		   (rdev->family == CHIP_RV530) ||
+		   (rdev->family == CHIP_R580) ||
+		   (rdev->family == CHIP_RV560) ||
+		   (rdev->family == CHIP_RV570)) {
+		DRM_INFO("Loading R500 Microcode\n");
+		for (i = 0; i < 256; i++) {
+			WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]);
+			WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]);
+		}
+	}
+}
+
+int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
+{
+	unsigned rb_bufsz;
+	unsigned rb_blksz;
+	unsigned max_fetch;
+	unsigned pre_write_timer;
+	unsigned pre_write_limit;
+	unsigned indirect2_start;
+	unsigned indirect1_start;
+	uint32_t tmp;
+	int r;
+
+	if (r100_debugfs_cp_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for CP !\n");
+	}
+	/* Reset CP */
+	tmp = RREG32(RADEON_CP_CSQ_STAT);
+	if ((tmp & (1 << 31))) {
+		DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
+		WREG32(RADEON_CP_CSQ_MODE, 0);
+		WREG32(RADEON_CP_CSQ_CNTL, 0);
+		WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
+		tmp = RREG32(RADEON_RBBM_SOFT_RESET);
+		mdelay(2);
+		WREG32(RADEON_RBBM_SOFT_RESET, 0);
+		tmp = RREG32(RADEON_RBBM_SOFT_RESET);
+		mdelay(2);
+		tmp = RREG32(RADEON_CP_CSQ_STAT);
+		if ((tmp & (1 << 31))) {
+			DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
+		}
+	} else {
+		DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
+	}
+	/* Align ring size */
+	rb_bufsz = drm_order(ring_size / 8);
+	ring_size = (1 << (rb_bufsz + 1)) * 4;
+	r100_cp_load_microcode(rdev);
+	r = radeon_ring_init(rdev, ring_size);
+	if (r) {
+		return r;
+	}
+	/* Each time the cp read 1024 bytes (16 dword/quadword) update
+	 * the rptr copy in system ram */
+	rb_blksz = 9;
+	/* cp will read 128bytes at a time (4 dwords) */
+	max_fetch = 1;
+	rdev->cp.align_mask = 16 - 1;
+	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
+	pre_write_timer = 64;
+	/* Force CP_RB_WPTR write if written more than one time before the
+	 * delay expire
+	 */
+	pre_write_limit = 0;
+	/* Setup the cp cache like this (cache size is 96 dwords) :
+	 *	RING		0  to 15
+	 *	INDIRECT1	16 to 79
+	 *	INDIRECT2	80 to 95
+	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
+	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
+	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
+	 * Idea being that most of the gpu cmd will be through indirect1 buffer
+	 * so it gets the bigger cache.
+	 */
+	indirect2_start = 80;
+	indirect1_start = 16;
+	/* cp setup */
+	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
+	WREG32(RADEON_CP_RB_CNTL,
+	       REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
+	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
+	       REG_SET(RADEON_MAX_FETCH, max_fetch) |
+	       RADEON_RB_NO_UPDATE);
+	/* Set ring address */
+	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
+	WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
+	/* Force read & write ptr to 0 */
+	tmp = RREG32(RADEON_CP_RB_CNTL);
+	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
+	WREG32(RADEON_CP_RB_RPTR_WR, 0);
+	WREG32(RADEON_CP_RB_WPTR, 0);
+	WREG32(RADEON_CP_RB_CNTL, tmp);
+	udelay(10);
+	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
+	/* Set cp mode to bus mastering & enable cp*/
+	WREG32(RADEON_CP_CSQ_MODE,
+	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
+	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
+	WREG32(0x718, 0);
+	WREG32(0x744, 0x00004D4D);
+	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
+	radeon_ring_start(rdev);
+	r = radeon_ring_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
+		return r;
+	}
+	rdev->cp.ready = true;
+	return 0;
+}
+
+void r100_cp_fini(struct radeon_device *rdev)
+{
+	/* Disable ring */
+	rdev->cp.ready = false;
+	WREG32(RADEON_CP_CSQ_CNTL, 0);
+	radeon_ring_fini(rdev);
+	DRM_INFO("radeon: cp finalized\n");
+}
+
+void r100_cp_disable(struct radeon_device *rdev)
+{
+	/* Disable ring */
+	rdev->cp.ready = false;
+	WREG32(RADEON_CP_CSQ_MODE, 0);
+	WREG32(RADEON_CP_CSQ_CNTL, 0);
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+int r100_cp_reset(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	bool reinit_cp;
+	int i;
+
+	reinit_cp = rdev->cp.ready;
+	rdev->cp.ready = false;
+	WREG32(RADEON_CP_CSQ_MODE, 0);
+	WREG32(RADEON_CP_CSQ_CNTL, 0);
+	WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
+	(void)RREG32(RADEON_RBBM_SOFT_RESET);
+	udelay(200);
+	WREG32(RADEON_RBBM_SOFT_RESET, 0);
+	/* Wait to prevent race in RBBM_STATUS */
+	mdelay(1);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & (1 << 16))) {
+			DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
+				 tmp);
+			if (reinit_cp) {
+				return r100_cp_init(rdev, rdev->cp.ring_size);
+			}
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	tmp = RREG32(RADEON_RBBM_STATUS);
+	DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
+	return -1;
+}
+
+
+/*
+ * CS functions
+ */
+int r100_cs_parse_packet0(struct radeon_cs_parser *p,
+			  struct radeon_cs_packet *pkt,
+			  unsigned *auth, unsigned n,
+			  radeon_packet0_check_t check)
+{
+	unsigned reg;
+	unsigned i, j, m;
+	unsigned idx;
+	int r;
+
+	idx = pkt->idx + 1;
+	reg = pkt->reg;
+	if (pkt->one_reg_wr) {
+		if ((reg >> 7) > n) {
+			return -EINVAL;
+		}
+	} else {
+		if (((reg + (pkt->count << 2)) >> 7) > n) {
+			return -EINVAL;
+		}
+	}
+	for (i = 0; i <= pkt->count; i++, idx++) {
+		j = (reg >> 7);
+		m = 1 << ((reg >> 2) & 31);
+		if (auth[j] & m) {
+			r = check(p, pkt, idx, reg);
+			if (r) {
+				return r;
+			}
+		}
+		if (pkt->one_reg_wr) {
+			if (!(auth[j] & m)) {
+				break;
+			}
+		} else {
+			reg += 4;
+		}
+	}
+	return 0;
+}
+
+int r100_cs_parse_packet3(struct radeon_cs_parser *p,
+			  struct radeon_cs_packet *pkt,
+			  unsigned *auth, unsigned n,
+			  radeon_packet3_check_t check)
+{
+	unsigned i, m;
+
+	if ((pkt->opcode >> 5) > n) {
+		return -EINVAL;
+	}
+	i = pkt->opcode >> 5;
+	m = 1 << (pkt->opcode & 31);
+	if (auth[i] & m) {
+		return check(p, pkt);
+	}
+	return 0;
+}
+
+void r100_cs_dump_packet(struct radeon_cs_parser *p,
+			 struct radeon_cs_packet *pkt)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	volatile uint32_t *ib;
+	unsigned i;
+	unsigned idx;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	idx = pkt->idx;
+	for (i = 0; i <= (pkt->count + 1); i++, idx++) {
+		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
+	}
+}
+
+/**
+ * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
+ * @parser:	parser structure holding parsing context.
+ * @pkt:	where to store packet informations
+ *
+ * Assume that chunk_ib_index is properly set. Will return -EINVAL
+ * if packet is bigger than remaining ib size. or if packets is unknown.
+ **/
+int r100_cs_packet_parse(struct radeon_cs_parser *p,
+			 struct radeon_cs_packet *pkt,
+			 unsigned idx)
+{
+	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+	uint32_t header = ib_chunk->kdata[idx];
+
+	if (idx >= ib_chunk->length_dw) {
+		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+			  idx, ib_chunk->length_dw);
+		return -EINVAL;
+	}
+	pkt->idx = idx;
+	pkt->type = CP_PACKET_GET_TYPE(header);
+	pkt->count = CP_PACKET_GET_COUNT(header);
+	switch (pkt->type) {
+	case PACKET_TYPE0:
+		pkt->reg = CP_PACKET0_GET_REG(header);
+		pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
+		break;
+	case PACKET_TYPE3:
+		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
+		break;
+	case PACKET_TYPE2:
+		pkt->count = -1;
+		break;
+	default:
+		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
+		return -EINVAL;
+	}
+	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
+		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
+			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
+ * @parser:		parser structure holding parsing context.
+ * @data:		pointer to relocation data
+ * @offset_start:	starting offset
+ * @offset_mask:	offset mask (to align start offset on)
+ * @reloc:		reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
+			      struct radeon_cs_reloc **cs_reloc)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_packet p3reloc;
+	unsigned idx;
+	int r;
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+	*cs_reloc = NULL;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	r = r100_cs_packet_parse(p, &p3reloc, p->idx);
+	if (r) {
+		return r;
+	}
+	p->idx += p3reloc.count + 2;
+	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+			  p3reloc.idx);
+		r100_cs_dump_packet(p, &p3reloc);
+		return -EINVAL;
+	}
+	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		r100_cs_dump_packet(p, &p3reloc);
+		return -EINVAL;
+	}
+	/* FIXME: we assume reloc size is 4 dwords */
+	*cs_reloc = p->relocs_ptr[(idx / 4)];
+	return 0;
+}
+
+static int r100_packet0_check(struct radeon_cs_parser *p,
+			      struct radeon_cs_packet *pkt)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_reloc *reloc;
+	volatile uint32_t *ib;
+	uint32_t tmp;
+	unsigned reg;
+	unsigned i;
+	unsigned idx;
+	bool onereg;
+	int r;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	idx = pkt->idx + 1;
+	reg = pkt->reg;
+	onereg = false;
+	if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
+		onereg = true;
+	}
+	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
+		switch (reg) {
+		/* FIXME: only allow PACKET3 blit? easier to check for out of
+		 * range access */
+		case RADEON_DST_PITCH_OFFSET:
+		case RADEON_SRC_PITCH_OFFSET:
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			tmp = ib_chunk->kdata[idx] & 0x003fffff;
+			tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+			ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+			break;
+		case RADEON_RB3D_DEPTHOFFSET:
+		case RADEON_RB3D_COLOROFFSET:
+		case R300_RB3D_COLOROFFSET0:
+		case R300_ZB_DEPTHOFFSET:
+		case R200_PP_TXOFFSET_0:
+		case R200_PP_TXOFFSET_1:
+		case R200_PP_TXOFFSET_2:
+		case R200_PP_TXOFFSET_3:
+		case R200_PP_TXOFFSET_4:
+		case R200_PP_TXOFFSET_5:
+		case RADEON_PP_TXOFFSET_0:
+		case RADEON_PP_TXOFFSET_1:
+		case RADEON_PP_TXOFFSET_2:
+		case R300_TX_OFFSET_0:
+		case R300_TX_OFFSET_0+4:
+		case R300_TX_OFFSET_0+8:
+		case R300_TX_OFFSET_0+12:
+		case R300_TX_OFFSET_0+16:
+		case R300_TX_OFFSET_0+20:
+		case R300_TX_OFFSET_0+24:
+		case R300_TX_OFFSET_0+28:
+		case R300_TX_OFFSET_0+32:
+		case R300_TX_OFFSET_0+36:
+		case R300_TX_OFFSET_0+40:
+		case R300_TX_OFFSET_0+44:
+		case R300_TX_OFFSET_0+48:
+		case R300_TX_OFFSET_0+52:
+		case R300_TX_OFFSET_0+56:
+		case R300_TX_OFFSET_0+60:
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+			break;
+		default:
+			/* FIXME: we don't want to allow anyothers packet */
+			break;
+		}
+		if (onereg) {
+			/* FIXME: forbid onereg write to register on relocate */
+			break;
+		}
+	}
+	return 0;
+}
+
+static int r100_packet3_check(struct radeon_cs_parser *p,
+			      struct radeon_cs_packet *pkt)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_reloc *reloc;
+	unsigned idx;
+	unsigned i, c;
+	volatile uint32_t *ib;
+	int r;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	idx = pkt->idx + 1;
+	switch (pkt->opcode) {
+	case PACKET3_3D_LOAD_VBPNTR:
+		c = ib_chunk->kdata[idx++];
+		for (i = 0; i < (c - 1); i += 2, idx += 3) {
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
+		}
+		if (c & 1) {
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+		}
+		break;
+	case PACKET3_INDX_BUFFER:
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+		break;
+	case 0x23:
+		/* FIXME: cleanup */
+		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+		break;
+	case PACKET3_3D_DRAW_IMMD:
+		/* triggers drawing using in-packet vertex data */
+	case PACKET3_3D_DRAW_IMMD_2:
+		/* triggers drawing using in-packet vertex data */
+	case PACKET3_3D_DRAW_VBUF_2:
+		/* triggers drawing of vertex buffers setup elsewhere */
+	case PACKET3_3D_DRAW_INDX_2:
+		/* triggers drawing using indices to vertex buffer */
+	case PACKET3_3D_DRAW_VBUF:
+		/* triggers drawing of vertex buffers setup elsewhere */
+	case PACKET3_3D_DRAW_INDX:
+		/* triggers drawing using indices to vertex buffer */
+	case PACKET3_NOP:
+		break;
+	default:
+		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int r100_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_packet pkt;
+	int r;
+
+	do {
+		r = r100_cs_packet_parse(p, &pkt, p->idx);
+		if (r) {
+			return r;
+		}
+		p->idx += pkt.count + 2;
+		switch (pkt.type) {
+		case PACKET_TYPE0:
+			r = r100_packet0_check(p, &pkt);
+			break;
+		case PACKET_TYPE2:
+			break;
+		case PACKET_TYPE3:
+			r = r100_packet3_check(p, &pkt);
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n",
+					pkt.type);
+			return -EINVAL;
+		}
+		if (r) {
+			return r;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	return 0;
+}
+
+
+/*
+ * Global GPU functions
+ */
+void r100_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+
+	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
+		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
+	}
+
+	if (rdev->family == CHIP_RV100 ||
+	    rdev->family == CHIP_RS100 ||
+	    rdev->family == CHIP_RS200) {
+		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
+	}
+}
+
+/* Wait for vertical sync on primary CRTC */
+void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
+{
+	uint32_t crtc_gen_cntl, tmp;
+	int i;
+
+	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
+	if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
+	    !(crtc_gen_cntl & RADEON_CRTC_EN)) {
+		return;
+	}
+	/* Clear the CRTC_VBLANK_SAVE bit */
+	WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_CRTC_STATUS);
+		if (tmp & RADEON_CRTC_VBLANK_SAVE) {
+			return;
+		}
+		DRM_UDELAY(1);
+	}
+}
+
+/* Wait for vertical sync on secondary CRTC */
+void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
+{
+	uint32_t crtc2_gen_cntl, tmp;
+	int i;
+
+	crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+	if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
+	    !(crtc2_gen_cntl & RADEON_CRTC2_EN))
+		return;
+
+	/* Clear the CRTC_VBLANK_SAVE bit */
+	WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_CRTC2_STATUS);
+		if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
+			return;
+		}
+		DRM_UDELAY(1);
+	}
+}
+
+int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
+		if (tmp >= n) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+int r100_gui_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
+		printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
+		       " Bad things might happen.\n");
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & (1 << 31))) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+int r100_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(0x0150);
+		if (tmp & (1 << 2)) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+void r100_gpu_init(struct radeon_device *rdev)
+{
+	/* TODO: anythings to do here ? pipes ? */
+	r100_hdp_reset(rdev);
+}
+
+void r100_hdp_reset(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
+	tmp |= (7 << 28);
+	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
+	(void)RREG32(RADEON_HOST_PATH_CNTL);
+	udelay(200);
+	WREG32(RADEON_RBBM_SOFT_RESET, 0);
+	WREG32(RADEON_HOST_PATH_CNTL, tmp);
+	(void)RREG32(RADEON_HOST_PATH_CNTL);
+}
+
+int r100_rb2d_reset(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int i;
+
+	WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
+	(void)RREG32(RADEON_RBBM_SOFT_RESET);
+	udelay(200);
+	WREG32(RADEON_RBBM_SOFT_RESET, 0);
+	/* Wait to prevent race in RBBM_STATUS */
+	mdelay(1);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & (1 << 26))) {
+			DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
+				 tmp);
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	tmp = RREG32(RADEON_RBBM_STATUS);
+	DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
+	return -1;
+}
+
+int r100_gpu_reset(struct radeon_device *rdev)
+{
+	uint32_t status;
+
+	/* reset order likely matter */
+	status = RREG32(RADEON_RBBM_STATUS);
+	/* reset HDP */
+	r100_hdp_reset(rdev);
+	/* reset rb2d */
+	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
+		r100_rb2d_reset(rdev);
+	}
+	/* TODO: reset 3D engine */
+	/* reset CP */
+	status = RREG32(RADEON_RBBM_STATUS);
+	if (status & (1 << 16)) {
+		r100_cp_reset(rdev);
+	}
+	/* Check if GPU is idle */
+	status = RREG32(RADEON_RBBM_STATUS);
+	if (status & (1 << 31)) {
+		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
+		return -1;
+	}
+	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
+	return 0;
+}
+
+
+/*
+ * VRAM info
+ */
+static void r100_vram_get_type(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	rdev->mc.vram_is_ddr = false;
+	if (rdev->flags & RADEON_IS_IGP)
+		rdev->mc.vram_is_ddr = true;
+	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
+		rdev->mc.vram_is_ddr = true;
+	if ((rdev->family == CHIP_RV100) ||
+	    (rdev->family == CHIP_RS100) ||
+	    (rdev->family == CHIP_RS200)) {
+		tmp = RREG32(RADEON_MEM_CNTL);
+		if (tmp & RV100_HALF_MODE) {
+			rdev->mc.vram_width = 32;
+		} else {
+			rdev->mc.vram_width = 64;
+		}
+		if (rdev->flags & RADEON_SINGLE_CRTC) {
+			rdev->mc.vram_width /= 4;
+			rdev->mc.vram_is_ddr = true;
+		}
+	} else if (rdev->family <= CHIP_RV280) {
+		tmp = RREG32(RADEON_MEM_CNTL);
+		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
+			rdev->mc.vram_width = 128;
+		} else {
+			rdev->mc.vram_width = 64;
+		}
+	} else {
+		/* newer IGPs */
+		rdev->mc.vram_width = 128;
+	}
+}
+
+void r100_vram_info(struct radeon_device *rdev)
+{
+	r100_vram_get_type(rdev);
+
+	if (rdev->flags & RADEON_IS_IGP) {
+		uint32_t tom;
+		/* read NB_TOM to get the amount of ram stolen for the GPU */
+		tom = RREG32(RADEON_NB_TOM);
+		rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
+		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+	} else {
+		rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+		/* Some production boards of m6 will report 0
+		 * if it's 8 MB
+		 */
+		if (rdev->mc.vram_size == 0) {
+			rdev->mc.vram_size = 8192 * 1024;
+			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+		}
+	}
+
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	if (rdev->mc.aper_size > rdev->mc.vram_size) {
+		/* Why does some hw doesn't have CONFIG_MEMSIZE properly
+		 * setup ? */
+		rdev->mc.vram_size = rdev->mc.aper_size;
+		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+	}
+}
+
+
+/*
+ * Indirect registers accessor
+ */
+void r100_pll_errata_after_index(struct radeon_device *rdev)
+{
+	if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
+		return;
+	}
+	(void)RREG32(RADEON_CLOCK_CNTL_DATA);
+	(void)RREG32(RADEON_CRTC_GEN_CNTL);
+}
+
+static void r100_pll_errata_after_data(struct radeon_device *rdev)
+{
+	/* This workarounds is necessary on RV100, RS100 and RS200 chips
+	 * or the chip could hang on a subsequent access
+	 */
+	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
+		udelay(5000);
+	}
+
+	/* This function is required to workaround a hardware bug in some (all?)
+	 * revisions of the R300.  This workaround should be called after every
+	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
+	 * may not be correct.
+	 */
+	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
+		uint32_t save, tmp;
+
+		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
+		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
+		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
+		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
+		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
+	}
+}
+
+uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t data;
+
+	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
+	r100_pll_errata_after_index(rdev);
+	data = RREG32(RADEON_CLOCK_CNTL_DATA);
+	r100_pll_errata_after_data(rdev);
+	return data;
+}
+
+void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
+	r100_pll_errata_after_index(rdev);
+	WREG32(RADEON_CLOCK_CNTL_DATA, v);
+	r100_pll_errata_after_data(rdev);
+}
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	if (reg < 0x10000)
+		return readl(((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	if (reg < 0x10000)
+		writel(v, ((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t reg, value;
+	unsigned i;
+
+	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
+	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
+	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
+	for (i = 0; i < 64; i++) {
+		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
+		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
+		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
+		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
+		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
+	}
+	return 0;
+}
+
+static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t rdp, wdp;
+	unsigned count, i, j;
+
+	radeon_ring_free_size(rdev);
+	rdp = RREG32(RADEON_CP_RB_RPTR);
+	wdp = RREG32(RADEON_CP_RB_WPTR);
+	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
+	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
+	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
+	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
+	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
+	seq_printf(m, "%u dwords in ring\n", count);
+	for (j = 0; j <= count; j++) {
+		i = (rdp + j) & rdev->cp.ptr_mask;
+		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
+	}
+	return 0;
+}
+
+
+static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t csq_stat, csq2_stat, tmp;
+	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
+	unsigned i;
+
+	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
+	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
+	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
+	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
+	r_rptr = (csq_stat >> 0) & 0x3ff;
+	r_wptr = (csq_stat >> 10) & 0x3ff;
+	ib1_rptr = (csq_stat >> 20) & 0x3ff;
+	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
+	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
+	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
+	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
+	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
+	seq_printf(m, "Ring rptr %u\n", r_rptr);
+	seq_printf(m, "Ring wptr %u\n", r_wptr);
+	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
+	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
+	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
+	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
+	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
+	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
+	seq_printf(m, "Ring fifo:\n");
+	for (i = 0; i < 256; i++) {
+		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
+		tmp = RREG32(RADEON_CP_CSQ_DATA);
+		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
+	}
+	seq_printf(m, "Indirect1 fifo:\n");
+	for (i = 256; i <= 512; i++) {
+		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
+		tmp = RREG32(RADEON_CP_CSQ_DATA);
+		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
+	}
+	seq_printf(m, "Indirect2 fifo:\n");
+	for (i = 640; i < ib1_wptr; i++) {
+		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
+		tmp = RREG32(RADEON_CP_CSQ_DATA);
+		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
+	}
+	return 0;
+}
+
+static int r100_debugfs_mc_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
+	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_MC_FB_LOCATION);
+	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_BUS_CNTL);
+	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_MC_AGP_LOCATION);
+	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_AGP_BASE);
+	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_HOST_PATH_CNTL);
+	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
+	tmp = RREG32(0x01D0);
+	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_AIC_LO_ADDR);
+	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_AIC_HI_ADDR);
+	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
+	tmp = RREG32(0x01E4);
+	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
+	return 0;
+}
+
+static struct drm_info_list r100_debugfs_rbbm_list[] = {
+	{"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
+};
+
+static struct drm_info_list r100_debugfs_cp_list[] = {
+	{"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
+	{"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
+};
+
+static struct drm_info_list r100_debugfs_mc_info_list[] = {
+	{"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
+};
+#endif
+
+int r100_debugfs_rbbm_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
+#else
+	return 0;
+#endif
+}
+
+int r100_debugfs_cp_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
+#else
+	return 0;
+#endif
+}
+
+int r100_debugfs_mc_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
new file mode 100644
index 00000000000..f5870a099d4
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -0,0 +1,1116 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/seq_file.h>
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* r300,r350,rv350,rv370,rv380 depends on : */
+void r100_hdp_reset(struct radeon_device *rdev);
+int r100_cp_reset(struct radeon_device *rdev);
+int r100_rb2d_reset(struct radeon_device *rdev);
+int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
+int r100_pci_gart_enable(struct radeon_device *rdev);
+void r100_pci_gart_disable(struct radeon_device *rdev);
+void r100_mc_setup(struct radeon_device *rdev);
+void r100_mc_disable_clients(struct radeon_device *rdev);
+int r100_gui_wait_for_idle(struct radeon_device *rdev);
+int r100_cs_packet_parse(struct radeon_cs_parser *p,
+			 struct radeon_cs_packet *pkt,
+			 unsigned idx);
+int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
+			      struct radeon_cs_reloc **cs_reloc);
+int r100_cs_parse_packet0(struct radeon_cs_parser *p,
+			  struct radeon_cs_packet *pkt,
+			  unsigned *auth, unsigned n,
+			  radeon_packet0_check_t check);
+int r100_cs_parse_packet3(struct radeon_cs_parser *p,
+			  struct radeon_cs_packet *pkt,
+			  unsigned *auth, unsigned n,
+			  radeon_packet3_check_t check);
+void r100_cs_dump_packet(struct radeon_cs_parser *p,
+			 struct radeon_cs_packet *pkt);
+
+/* This files gather functions specifics to:
+ * r300,r350,rv350,rv370,rv380
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void r300_gpu_init(struct radeon_device *rdev);
+int r300_mc_wait_for_idle(struct radeon_device *rdev);
+int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
+
+
+/*
+ * rv370,rv380 PCIE GART
+ */
+void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int i;
+
+	/* Workaround HW bug do flush 2 times */
+	for (i = 0; i < 2; i++) {
+		tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
+		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
+		(void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
+		WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
+		mb();
+	}
+}
+
+int rv370_pcie_gart_enable(struct radeon_device *rdev)
+{
+	uint32_t table_addr;
+	uint32_t tmp;
+	int r;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	r = rv370_debugfs_pcie_gart_info_init(rdev);
+	if (r) {
+		DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
+	}
+	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
+	r = radeon_gart_table_vram_alloc(rdev);
+	if (r) {
+		return r;
+	}
+	/* discard memory request outside of configured range */
+	tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
+	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
+	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
+	WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
+	WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
+	WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
+	table_addr = rdev->gart.table_addr;
+	WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
+	/* FIXME: setup default page */
+	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
+	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
+	/* Clear error */
+	WREG32_PCIE(0x18, 0);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
+	tmp |= RADEON_PCIE_TX_GART_EN;
+	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
+	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
+	rv370_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
+		 rdev->mc.gtt_size >> 20, table_addr);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void rv370_pcie_gart_disable(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
+	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
+	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
+	if (rdev->gart.table.vram.robj) {
+		radeon_object_kunmap(rdev->gart.table.vram.robj);
+		radeon_object_unpin(rdev->gart.table.vram.robj);
+	}
+}
+
+int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+{
+	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
+
+	if (i < 0 || i > rdev->gart.num_gpu_pages) {
+		return -EINVAL;
+	}
+	addr = (((u32)addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 4) | 0xC;
+	writel(cpu_to_le32(addr), ((void __iomem *)ptr) + (i * 4));
+	return 0;
+}
+
+int r300_gart_enable(struct radeon_device *rdev)
+{
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP) {
+		if (rdev->family > CHIP_RV350) {
+			rv370_pcie_gart_disable(rdev);
+		} else {
+			r100_pci_gart_disable(rdev);
+		}
+		return 0;
+	}
+#endif
+	if (rdev->flags & RADEON_IS_PCIE) {
+		rdev->asic->gart_disable = &rv370_pcie_gart_disable;
+		rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
+		rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
+		return rv370_pcie_gart_enable(rdev);
+	}
+	return r100_pci_gart_enable(rdev);
+}
+
+
+/*
+ * MC
+ */
+int r300_mc_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+
+	r300_gpu_init(rdev);
+	r100_pci_gart_disable(rdev);
+	if (rdev->flags & RADEON_IS_PCIE) {
+		rv370_pcie_gart_disable(rdev);
+	}
+
+	/* Setup GPU memory space */
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	rdev->mc.gtt_location = 0xFFFFFFFFUL;
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			printk(KERN_WARNING "[drm] Disabling AGP\n");
+			rdev->flags &= ~RADEON_IS_AGP;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		} else {
+			rdev->mc.gtt_location = rdev->mc.agp_base;
+		}
+	}
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Program GPU memory space */
+	r100_mc_disable_clients(rdev);
+	if (r300_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	r100_mc_setup(rdev);
+	return 0;
+}
+
+void r300_mc_fini(struct radeon_device *rdev)
+{
+	if (rdev->flags & RADEON_IS_PCIE) {
+		rv370_pcie_gart_disable(rdev);
+		radeon_gart_table_vram_free(rdev);
+	} else {
+		r100_pci_gart_disable(rdev);
+		radeon_gart_table_ram_free(rdev);
+	}
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Fence emission
+ */
+void r300_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence)
+{
+	/* Who ever call radeon_fence_emit should call ring_lock and ask
+	 * for enough space (today caller are ib schedule and buffer move) */
+	/* Write SC register so SC & US assert idle */
+	radeon_ring_write(rdev, PACKET0(0x43E0, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(0x43E4, 0));
+	radeon_ring_write(rdev, 0);
+	/* Flush 3D cache */
+	radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
+	radeon_ring_write(rdev, (2 << 0));
+	radeon_ring_write(rdev, PACKET0(0x4F18, 0));
+	radeon_ring_write(rdev, (1 << 0));
+	/* Wait until IDLE & CLEAN */
+	radeon_ring_write(rdev, PACKET0(0x1720, 0));
+	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
+	/* Emit fence sequence & fire IRQ */
+	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
+	radeon_ring_write(rdev, fence->seq);
+	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
+	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
+}
+
+
+/*
+ * Global GPU functions
+ */
+int r300_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence)
+{
+	uint32_t size;
+	uint32_t cur_size;
+	int i, num_loops;
+	int r = 0;
+
+	/* radeon pitch is /64 */
+	size = num_pages << PAGE_SHIFT;
+	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
+	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+	/* Must wait for 2D idle & clean before DMA or hangs might happen */
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, (1 << 16));
+	for (i = 0; i < num_loops; i++) {
+		cur_size = size;
+		if (cur_size > 0x1FFFFF) {
+			cur_size = 0x1FFFFF;
+		}
+		size -= cur_size;
+		radeon_ring_write(rdev, PACKET0(0x720, 2));
+		radeon_ring_write(rdev, src_offset);
+		radeon_ring_write(rdev, dst_offset);
+		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
+		src_offset += cur_size;
+		dst_offset += cur_size;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
+	if (fence) {
+		r = radeon_fence_emit(rdev, fence);
+	}
+	radeon_ring_unlock_commit(rdev);
+	return r;
+}
+
+void r300_ring_start(struct radeon_device *rdev)
+{
+	unsigned gb_tile_config;
+	int r;
+
+	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
+	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
+	switch (rdev->num_gb_pipes) {
+	case 2:
+		gb_tile_config |= R300_PIPE_COUNT_R300;
+		break;
+	case 3:
+		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
+		break;
+	case 4:
+		gb_tile_config |= R300_PIPE_COUNT_R420;
+		break;
+	case 1:
+	default:
+		gb_tile_config |= R300_PIPE_COUNT_RV350;
+		break;
+	}
+
+	r = radeon_ring_lock(rdev, 64);
+	if (r) {
+		return;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_ISYNC_ANY2D_IDLE3D |
+			  RADEON_ISYNC_ANY3D_IDLE2D |
+			  RADEON_ISYNC_WAIT_IDLEGUI |
+			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
+	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
+	radeon_ring_write(rdev, gb_tile_config);
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_WAIT_2D_IDLECLEAN |
+			  RADEON_WAIT_3D_IDLECLEAN);
+	radeon_ring_write(rdev, PACKET0(0x170C, 0));
+	radeon_ring_write(rdev, 1 << 31);
+	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_WAIT_2D_IDLECLEAN |
+			  RADEON_WAIT_3D_IDLECLEAN);
+	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
+	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
+	radeon_ring_write(rdev,
+			  ((6 << R300_MS_X0_SHIFT) |
+			   (6 << R300_MS_Y0_SHIFT) |
+			   (6 << R300_MS_X1_SHIFT) |
+			   (6 << R300_MS_Y1_SHIFT) |
+			   (6 << R300_MS_X2_SHIFT) |
+			   (6 << R300_MS_Y2_SHIFT) |
+			   (6 << R300_MSBD0_Y_SHIFT) |
+			   (6 << R300_MSBD0_X_SHIFT)));
+	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
+	radeon_ring_write(rdev,
+			  ((6 << R300_MS_X3_SHIFT) |
+			   (6 << R300_MS_Y3_SHIFT) |
+			   (6 << R300_MS_X4_SHIFT) |
+			   (6 << R300_MS_Y4_SHIFT) |
+			   (6 << R300_MS_X5_SHIFT) |
+			   (6 << R300_MS_Y5_SHIFT) |
+			   (6 << R300_MSBD1_SHIFT)));
+	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
+	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
+	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
+	radeon_ring_write(rdev,
+			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
+	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
+	radeon_ring_write(rdev,
+			  R300_GEOMETRY_ROUND_NEAREST |
+			  R300_COLOR_ROUND_NEAREST);
+	radeon_ring_unlock_commit(rdev);
+}
+
+void r300_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+
+	if (rdev->family == CHIP_R300 &&
+	    (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
+		rdev->pll_errata |= CHIP_ERRATA_R300_CG;
+	}
+}
+
+int r300_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(0x0150);
+		if (tmp & (1 << 4)) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+void r300_gpu_init(struct radeon_device *rdev)
+{
+	uint32_t gb_tile_config, tmp;
+
+	r100_hdp_reset(rdev);
+	/* FIXME: rv380 one pipes ? */
+	if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
+		/* r300,r350 */
+		rdev->num_gb_pipes = 2;
+	} else {
+		/* rv350,rv370,rv380 */
+		rdev->num_gb_pipes = 1;
+	}
+	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
+	switch (rdev->num_gb_pipes) {
+	case 2:
+		gb_tile_config |= R300_PIPE_COUNT_R300;
+		break;
+	case 3:
+		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
+		break;
+	case 4:
+		gb_tile_config |= R300_PIPE_COUNT_R420;
+		break;
+	case 1:
+	default:
+		gb_tile_config |= R300_PIPE_COUNT_RV350;
+		break;
+	}
+	WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
+
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	tmp = RREG32(0x170C);
+	WREG32(0x170C, tmp | (1 << 31));
+
+	WREG32(R300_RB2D_DSTCACHE_MODE,
+	       R300_DC_AUTOFLUSH_ENABLE |
+	       R300_DC_DC_DISABLE_IGNORE_PE);
+
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	if (r300_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
+}
+
+int r300_ga_reset(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	bool reinit_cp;
+	int i;
+
+	reinit_cp = rdev->cp.ready;
+	rdev->cp.ready = false;
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		WREG32(RADEON_CP_CSQ_MODE, 0);
+		WREG32(RADEON_CP_CSQ_CNTL, 0);
+		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
+		(void)RREG32(RADEON_RBBM_SOFT_RESET);
+		udelay(200);
+		WREG32(RADEON_RBBM_SOFT_RESET, 0);
+		/* Wait to prevent race in RBBM_STATUS */
+		mdelay(1);
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (tmp & ((1 << 20) | (1 << 26))) {
+			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
+			/* GA still busy soft reset it */
+			WREG32(0x429C, 0x200);
+			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+			WREG32(0x43E0, 0);
+			WREG32(0x43E4, 0);
+			WREG32(0x24AC, 0);
+		}
+		/* Wait to prevent race in RBBM_STATUS */
+		mdelay(1);
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & ((1 << 20) | (1 << 26)))) {
+			break;
+		}
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & ((1 << 20) | (1 << 26)))) {
+			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
+				 tmp);
+			if (reinit_cp) {
+				return r100_cp_init(rdev, rdev->cp.ring_size);
+			}
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	tmp = RREG32(RADEON_RBBM_STATUS);
+	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
+	return -1;
+}
+
+int r300_gpu_reset(struct radeon_device *rdev)
+{
+	uint32_t status;
+
+	/* reset order likely matter */
+	status = RREG32(RADEON_RBBM_STATUS);
+	/* reset HDP */
+	r100_hdp_reset(rdev);
+	/* reset rb2d */
+	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
+		r100_rb2d_reset(rdev);
+	}
+	/* reset GA */
+	if (status & ((1 << 20) | (1 << 26))) {
+		r300_ga_reset(rdev);
+	}
+	/* reset CP */
+	status = RREG32(RADEON_RBBM_STATUS);
+	if (status & (1 << 16)) {
+		r100_cp_reset(rdev);
+	}
+	/* Check if GPU is idle */
+	status = RREG32(RADEON_RBBM_STATUS);
+	if (status & (1 << 31)) {
+		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
+		return -1;
+	}
+	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
+	return 0;
+}
+
+
+/*
+ * r300,r350,rv350,rv380 VRAM info
+ */
+void r300_vram_info(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	/* DDR for all card after R300 & IGP */
+	rdev->mc.vram_is_ddr = true;
+	tmp = RREG32(RADEON_MEM_CNTL);
+	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
+		rdev->mc.vram_width = 128;
+	} else {
+		rdev->mc.vram_width = 64;
+	}
+	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
+
+
+/*
+ * Indirect registers accessor
+ */
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
+	(void)RREG32(RADEON_PCIE_INDEX);
+	r = RREG32(RADEON_PCIE_DATA);
+	return r;
+}
+
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
+	(void)RREG32(RADEON_PCIE_INDEX);
+	WREG32(RADEON_PCIE_DATA, (v));
+	(void)RREG32(RADEON_PCIE_DATA);
+}
+
+/*
+ * PCIE Lanes
+ */
+
+void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
+{
+	uint32_t link_width_cntl, mask;
+
+	if (rdev->flags & RADEON_IS_IGP)
+		return;
+
+	if (!(rdev->flags & RADEON_IS_PCIE))
+		return;
+
+	/* FIXME wait for idle */
+
+	switch (lanes) {
+	case 0:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
+		break;
+	case 1:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
+		break;
+	case 2:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
+		break;
+	case 4:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
+		break;
+	case 8:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
+		break;
+	case 12:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
+		break;
+	case 16:
+	default:
+		mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
+		break;
+	}
+
+	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+
+	if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
+	    (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
+		return;
+
+	link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
+			     RADEON_PCIE_LC_RECONFIG_NOW |
+			     RADEON_PCIE_LC_RECONFIG_LATER |
+			     RADEON_PCIE_LC_SHORT_RECONFIG_EN);
+	link_width_cntl |= mask;
+	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+	WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
+						     RADEON_PCIE_LC_RECONFIG_NOW));
+
+	/* wait for lane set to complete */
+	link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+	while (link_width_cntl == 0xffffffff)
+		link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+
+}
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
+	seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
+	seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
+	seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
+	seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
+	seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
+	seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
+	tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
+	seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
+	return 0;
+}
+
+static struct drm_info_list rv370_pcie_gart_info_list[] = {
+	{"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
+};
+#endif
+
+int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
+#else
+	return 0;
+#endif
+}
+
+
+/*
+ * CS functions
+ */
+struct r300_cs_track_cb {
+	struct radeon_object	*robj;
+	unsigned		pitch;
+	unsigned		cpp;
+	unsigned		offset;
+};
+
+struct r300_cs_track {
+	unsigned		num_cb;
+	unsigned		maxy;
+	struct r300_cs_track_cb cb[4];
+	struct r300_cs_track_cb zb;
+	bool			z_enabled;
+};
+
+int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
+{
+	unsigned i;
+	unsigned long size;
+
+	for (i = 0; i < track->num_cb; i++) {
+		if (track->cb[i].robj == NULL) {
+			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
+			return -EINVAL;
+		}
+		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
+		size += track->cb[i].offset;
+		if (size > radeon_object_size(track->cb[i].robj)) {
+			DRM_ERROR("[drm] Buffer too small for color buffer %d "
+				  "(need %lu have %lu) !\n", i, size,
+				  radeon_object_size(track->cb[i].robj));
+			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
+				  i, track->cb[i].pitch, track->cb[i].cpp,
+				  track->cb[i].offset, track->maxy);
+			return -EINVAL;
+		}
+	}
+	if (track->z_enabled) {
+		if (track->zb.robj == NULL) {
+			DRM_ERROR("[drm] No buffer for z buffer !\n");
+			return -EINVAL;
+		}
+		size = track->zb.pitch * track->zb.cpp * track->maxy;
+		size += track->zb.offset;
+		if (size > radeon_object_size(track->zb.robj)) {
+			DRM_ERROR("[drm] Buffer too small for z buffer "
+				  "(need %lu have %lu) !\n", size,
+				  radeon_object_size(track->zb.robj));
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static inline void r300_cs_track_clear(struct r300_cs_track *track)
+{
+	unsigned i;
+
+	track->num_cb = 4;
+	track->maxy = 4096;
+	for (i = 0; i < track->num_cb; i++) {
+		track->cb[i].robj = NULL;
+		track->cb[i].pitch = 8192;
+		track->cb[i].cpp = 16;
+		track->cb[i].offset = 0;
+	}
+	track->z_enabled = true;
+	track->zb.robj = NULL;
+	track->zb.pitch = 8192;
+	track->zb.cpp = 4;
+	track->zb.offset = 0;
+}
+
+static unsigned r300_auth_reg[] = {
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFBF, 0xFFFFFFFF, 0xFFFFFFBF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
+	0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFCFCC, 0xF00E9FFF, 0x007C0000,
+	0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+	0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF,
+	0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
+	0x00000000, 0x00000000, 0xFFFF0000, 0x00000000,
+	0x00000000, 0x0000C100, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
+};
+
+static int r300_packet0_check(struct radeon_cs_parser *p,
+		struct radeon_cs_packet *pkt,
+		unsigned idx, unsigned reg)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_reloc *reloc;
+	struct r300_cs_track *track;
+	volatile uint32_t *ib;
+	uint32_t tmp;
+	unsigned i;
+	int r;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	track = (struct r300_cs_track *)p->track;
+	switch (reg) {
+	case RADEON_DST_PITCH_OFFSET:
+	case RADEON_SRC_PITCH_OFFSET:
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		tmp = ib_chunk->kdata[idx] & 0x003fffff;
+		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+		break;
+	case R300_RB3D_COLOROFFSET0:
+	case R300_RB3D_COLOROFFSET1:
+	case R300_RB3D_COLOROFFSET2:
+	case R300_RB3D_COLOROFFSET3:
+		i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		track->cb[i].robj = reloc->robj;
+		track->cb[i].offset = ib_chunk->kdata[idx];
+		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+		break;
+	case R300_ZB_DEPTHOFFSET:
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		track->zb.robj = reloc->robj;
+		track->zb.offset = ib_chunk->kdata[idx];
+		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+		break;
+	case R300_TX_OFFSET_0:
+	case R300_TX_OFFSET_0+4:
+	case R300_TX_OFFSET_0+8:
+	case R300_TX_OFFSET_0+12:
+	case R300_TX_OFFSET_0+16:
+	case R300_TX_OFFSET_0+20:
+	case R300_TX_OFFSET_0+24:
+	case R300_TX_OFFSET_0+28:
+	case R300_TX_OFFSET_0+32:
+	case R300_TX_OFFSET_0+36:
+	case R300_TX_OFFSET_0+40:
+	case R300_TX_OFFSET_0+44:
+	case R300_TX_OFFSET_0+48:
+	case R300_TX_OFFSET_0+52:
+	case R300_TX_OFFSET_0+56:
+	case R300_TX_OFFSET_0+60:
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
+		break;
+	/* Tracked registers */
+	case 0x43E4:
+		/* SC_SCISSOR1 */
+
+		track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
+		if (p->rdev->family < CHIP_RV515) {
+			track->maxy -= 1440;
+		}
+		break;
+	case 0x4E00:
+		/* RB3D_CCTL */
+		track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
+		break;
+	case 0x4E38:
+	case 0x4E3C:
+	case 0x4E40:
+	case 0x4E44:
+		/* RB3D_COLORPITCH0 */
+		/* RB3D_COLORPITCH1 */
+		/* RB3D_COLORPITCH2 */
+		/* RB3D_COLORPITCH3 */
+		i = (reg - 0x4E38) >> 2;
+		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
+		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
+		case 9:
+		case 11:
+		case 12:
+			track->cb[i].cpp = 1;
+			break;
+		case 3:
+		case 4:
+		case 13:
+		case 15:
+			track->cb[i].cpp = 2;
+			break;
+		case 6:
+			track->cb[i].cpp = 4;
+			break;
+		case 10:
+			track->cb[i].cpp = 8;
+			break;
+		case 7:
+			track->cb[i].cpp = 16;
+			break;
+		default:
+			DRM_ERROR("Invalid color buffer format (%d) !\n",
+				  ((ib_chunk->kdata[idx] >> 21) & 0xF));
+			return -EINVAL;
+		}
+		break;
+	case 0x4F00:
+		/* ZB_CNTL */
+		if (ib_chunk->kdata[idx] & 2) {
+			track->z_enabled = true;
+		} else {
+			track->z_enabled = false;
+		}
+		break;
+	case 0x4F10:
+		/* ZB_FORMAT */
+		switch ((ib_chunk->kdata[idx] & 0xF)) {
+		case 0:
+		case 1:
+			track->zb.cpp = 2;
+			break;
+		case 2:
+			track->zb.cpp = 4;
+			break;
+		default:
+			DRM_ERROR("Invalid z buffer format (%d) !\n",
+				  (ib_chunk->kdata[idx] & 0xF));
+			return -EINVAL;
+		}
+		break;
+	case 0x4F24:
+		/* ZB_DEPTHPITCH */
+		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
+		break;
+	default:
+		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", reg, idx);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int r300_packet3_check(struct radeon_cs_parser *p,
+			      struct radeon_cs_packet *pkt)
+{
+	struct radeon_cs_chunk *ib_chunk;
+	struct radeon_cs_reloc *reloc;
+	struct r300_cs_track *track;
+	volatile uint32_t *ib;
+	unsigned idx;
+	unsigned i, c;
+	int r;
+
+	ib = p->ib->ptr;
+	ib_chunk = &p->chunks[p->chunk_ib_idx];
+	idx = pkt->idx + 1;
+	track = (struct r300_cs_track *)p->track;
+	switch (pkt->opcode) {
+	case PACKET3_3D_LOAD_VBPNTR:
+		c = ib_chunk->kdata[idx++];
+		for (i = 0; i < (c - 1); i += 2, idx += 3) {
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
+		}
+		if (c & 1) {
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+		}
+		break;
+	case PACKET3_INDX_BUFFER:
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
+		break;
+	/* Draw packet */
+	case PACKET3_3D_DRAW_VBUF:
+	case PACKET3_3D_DRAW_IMMD:
+	case PACKET3_3D_DRAW_INDX:
+	case PACKET3_3D_DRAW_VBUF_2:
+	case PACKET3_3D_DRAW_IMMD_2:
+	case PACKET3_3D_DRAW_INDX_2:
+		r = r300_cs_track_check(p->rdev, track);
+		if (r) {
+			return r;
+		}
+		break;
+	case PACKET3_NOP:
+		break;
+	default:
+		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int r300_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_packet pkt;
+	struct r300_cs_track track;
+	int r;
+
+	r300_cs_track_clear(&track);
+	p->track = &track;
+	do {
+		r = r100_cs_packet_parse(p, &pkt, p->idx);
+		if (r) {
+			return r;
+		}
+		p->idx += pkt.count + 2;
+		switch (pkt.type) {
+		case PACKET_TYPE0:
+			r = r100_cs_parse_packet0(p, &pkt,
+						  r300_auth_reg,
+						  ARRAY_SIZE(r300_auth_reg),
+						  &r300_packet0_check);
+			break;
+		case PACKET_TYPE2:
+			break;
+		case PACKET_TYPE3:
+			r = r300_packet3_check(p, &pkt);
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+			return -EINVAL;
+		}
+		if (r) {
+			return r;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h
index bdbc95fa672..70f48609515 100644
--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -1,30 +1,34 @@
-/**************************************************************************
-
-Copyright (C) 2004-2005 Nicolai Haehnle et al.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
+/*
+ * Copyright 2005 Nicolai Haehnle et al.
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Nicolai Haehnle
+ *          Jerome Glisse
+ */
+#ifndef _R300_REG_H_
+#define _R300_REG_H_
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-**************************************************************************/
 
-#ifndef _R300_REG_H
-#define _R300_REG_H
 
 #define R300_MC_INIT_MISC_LAT_TIMER	0x180
 #	define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT	0
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
new file mode 100644
index 00000000000..dea497a979f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/seq_file.h>
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* r420,r423,rv410 depends on : */
+void r100_pci_gart_disable(struct radeon_device *rdev);
+void r100_hdp_reset(struct radeon_device *rdev);
+void r100_mc_setup(struct radeon_device *rdev);
+int r100_gui_wait_for_idle(struct radeon_device *rdev);
+void r100_mc_disable_clients(struct radeon_device *rdev);
+void r300_vram_info(struct radeon_device *rdev);
+int r300_mc_wait_for_idle(struct radeon_device *rdev);
+int rv370_pcie_gart_enable(struct radeon_device *rdev);
+void rv370_pcie_gart_disable(struct radeon_device *rdev);
+
+/* This files gather functions specifics to :
+ * r420,r423,rv410
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void r420_gpu_init(struct radeon_device *rdev);
+int r420_debugfs_pipes_info_init(struct radeon_device *rdev);
+
+
+/*
+ * MC
+ */
+int r420_mc_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+	if (r420_debugfs_pipes_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for pipes !\n");
+	}
+
+	r420_gpu_init(rdev);
+	r100_pci_gart_disable(rdev);
+	if (rdev->flags & RADEON_IS_PCIE) {
+		rv370_pcie_gart_disable(rdev);
+	}
+
+	/* Setup GPU memory space */
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	rdev->mc.gtt_location = 0xFFFFFFFFUL;
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			printk(KERN_WARNING "[drm] Disabling AGP\n");
+			rdev->flags &= ~RADEON_IS_AGP;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		} else {
+			rdev->mc.gtt_location = rdev->mc.agp_base;
+		}
+	}
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Program GPU memory space */
+	r100_mc_disable_clients(rdev);
+	if (r300_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	r100_mc_setup(rdev);
+	return 0;
+}
+
+void r420_mc_fini(struct radeon_device *rdev)
+{
+	rv370_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Global GPU functions
+ */
+void r420_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+void r420_pipes_init(struct radeon_device *rdev)
+{
+	unsigned tmp;
+	unsigned gb_pipe_select;
+	unsigned num_pipes;
+
+	/* GA_ENHANCE workaround TCL deadlock issue */
+	WREG32(0x4274, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3));
+	/* get max number of pipes */
+	gb_pipe_select = RREG32(0x402C);
+	num_pipes = ((gb_pipe_select >> 12) & 3) + 1;
+	rdev->num_gb_pipes = num_pipes;
+	tmp = 0;
+	switch (num_pipes) {
+	default:
+		/* force to 1 pipe */
+		num_pipes = 1;
+	case 1:
+		tmp = (0 << 1);
+		break;
+	case 2:
+		tmp = (3 << 1);
+		break;
+	case 3:
+		tmp = (6 << 1);
+		break;
+	case 4:
+		tmp = (7 << 1);
+		break;
+	}
+	WREG32(0x42C8, (1 << num_pipes) - 1);
+	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
+	tmp |= (1 << 4) | (1 << 0);
+	WREG32(0x4018, tmp);
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	tmp = RREG32(0x170C);
+	WREG32(0x170C, tmp | (1 << 31));
+
+	WREG32(R300_RB2D_DSTCACHE_MODE,
+	       RREG32(R300_RB2D_DSTCACHE_MODE) |
+	       R300_DC_AUTOFLUSH_ENABLE |
+	       R300_DC_DC_DISABLE_IGNORE_PE);
+
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
+}
+
+void r420_gpu_init(struct radeon_device *rdev)
+{
+	r100_hdp_reset(rdev);
+	r420_pipes_init(rdev);
+	if (r300_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+
+/*
+ * r420,r423,rv410 VRAM info
+ */
+void r420_vram_info(struct radeon_device *rdev)
+{
+	r300_vram_info(rdev);
+}
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int r420_debugfs_pipes_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	tmp = RREG32(R400_GB_PIPE_SELECT);
+	seq_printf(m, "GB_PIPE_SELECT 0x%08x\n", tmp);
+	tmp = RREG32(R300_GB_TILE_CONFIG);
+	seq_printf(m, "GB_TILE_CONFIG 0x%08x\n", tmp);
+	tmp = RREG32(R300_DST_PIPE_CONFIG);
+	seq_printf(m, "DST_PIPE_CONFIG 0x%08x\n", tmp);
+	return 0;
+}
+
+static struct drm_info_list r420_pipes_info_list[] = {
+	{"r420_pipes_info", r420_debugfs_pipes_info, 0, NULL},
+};
+#endif
+
+int r420_debugfs_pipes_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, r420_pipes_info_list, 1);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/r500_reg.h b/drivers/gpu/drm/radeon/r500_reg.h
new file mode 100644
index 00000000000..9070a1c2ce2
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r500_reg.h
@@ -0,0 +1,749 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R500_REG_H__
+#define __R500_REG_H__
+
+/* pipe config regs */
+#define R300_GA_POLY_MODE				0x4288
+#       define R300_FRONT_PTYPE_POINT                   (0 << 4)
+#       define R300_FRONT_PTYPE_LINE                    (1 << 4)
+#       define R300_FRONT_PTYPE_TRIANGE                 (2 << 4)
+#       define R300_BACK_PTYPE_POINT                    (0 << 7)
+#       define R300_BACK_PTYPE_LINE                     (1 << 7)
+#       define R300_BACK_PTYPE_TRIANGE                  (2 << 7)
+#define R300_GA_ROUND_MODE				0x428c
+#       define R300_GEOMETRY_ROUND_TRUNC                (0 << 0)
+#       define R300_GEOMETRY_ROUND_NEAREST              (1 << 0)
+#       define R300_COLOR_ROUND_TRUNC                   (0 << 2)
+#       define R300_COLOR_ROUND_NEAREST                 (1 << 2)
+#define R300_GB_MSPOS0				        0x4010
+#       define R300_MS_X0_SHIFT                         0
+#       define R300_MS_Y0_SHIFT                         4
+#       define R300_MS_X1_SHIFT                         8
+#       define R300_MS_Y1_SHIFT                         12
+#       define R300_MS_X2_SHIFT                         16
+#       define R300_MS_Y2_SHIFT                         20
+#       define R300_MSBD0_Y_SHIFT                       24
+#       define R300_MSBD0_X_SHIFT                       28
+#define R300_GB_MSPOS1				        0x4014
+#       define R300_MS_X3_SHIFT                         0
+#       define R300_MS_Y3_SHIFT                         4
+#       define R300_MS_X4_SHIFT                         8
+#       define R300_MS_Y4_SHIFT                         12
+#       define R300_MS_X5_SHIFT                         16
+#       define R300_MS_Y5_SHIFT                         20
+#       define R300_MSBD1_SHIFT                         24
+
+#define R300_GA_ENHANCE				        0x4274
+#       define R300_GA_DEADLOCK_CNTL                    (1 << 0)
+#       define R300_GA_FASTSYNC_CNTL                    (1 << 1)
+#define R300_RB3D_DSTCACHE_CTLSTAT              0x4e4c
+#	define R300_RB3D_DC_FLUSH		(2 << 0)
+#	define R300_RB3D_DC_FREE		(2 << 2)
+#	define R300_RB3D_DC_FINISH		(1 << 4)
+#define R300_RB3D_ZCACHE_CTLSTAT			0x4f18
+#       define R300_ZC_FLUSH                            (1 << 0)
+#       define R300_ZC_FREE                             (1 << 1)
+#       define R300_ZC_FLUSH_ALL                        0x3
+#define R400_GB_PIPE_SELECT             0x402c
+#define R500_DYN_SCLK_PWMEM_PIPE        0x000d /* PLL */
+#define R500_SU_REG_DEST                0x42c8
+#define R300_GB_TILE_CONFIG             0x4018
+#       define R300_ENABLE_TILING       (1 << 0)
+#       define R300_PIPE_COUNT_RV350    (0 << 1)
+#       define R300_PIPE_COUNT_R300     (3 << 1)
+#       define R300_PIPE_COUNT_R420_3P  (6 << 1)
+#       define R300_PIPE_COUNT_R420     (7 << 1)
+#       define R300_TILE_SIZE_8         (0 << 4)
+#       define R300_TILE_SIZE_16        (1 << 4)
+#       define R300_TILE_SIZE_32        (2 << 4)
+#       define R300_SUBPIXEL_1_12       (0 << 16)
+#       define R300_SUBPIXEL_1_16       (1 << 16)
+#define R300_DST_PIPE_CONFIG            0x170c
+#       define R300_PIPE_AUTO_CONFIG    (1 << 31)
+#define R300_RB2D_DSTCACHE_MODE         0x3428
+#       define R300_DC_AUTOFLUSH_ENABLE (1 << 8)
+#       define R300_DC_DC_DISABLE_IGNORE_PE (1 << 17)
+
+#define RADEON_CP_STAT		0x7C0
+#define RADEON_RBBM_CMDFIFO_ADDR	0xE70
+#define RADEON_RBBM_CMDFIFO_DATA	0xE74
+#define RADEON_ISYNC_CNTL		0x1724
+#	define RADEON_ISYNC_ANY2D_IDLE3D	(1 << 0)
+#	define RADEON_ISYNC_ANY3D_IDLE2D	(1 << 1)
+#	define RADEON_ISYNC_TRIG2D_IDLE3D	(1 << 2)
+#	define RADEON_ISYNC_TRIG3D_IDLE2D	(1 << 3)
+#	define RADEON_ISYNC_WAIT_IDLEGUI	(1 << 4)
+#	define RADEON_ISYNC_CPSCRATCH_IDLEGUI	(1 << 5)
+
+#define RS480_NB_MC_INDEX               0x168
+#	define RS480_NB_MC_IND_WR_EN	(1 << 8)
+#define RS480_NB_MC_DATA                0x16c
+
+/*
+ * RS690
+ */
+#define RS690_MCCFG_FB_LOCATION		0x100
+#define		RS690_MC_FB_START_MASK		0x0000FFFF
+#define		RS690_MC_FB_START_SHIFT		0
+#define		RS690_MC_FB_TOP_MASK		0xFFFF0000
+#define		RS690_MC_FB_TOP_SHIFT		16
+#define RS690_MCCFG_AGP_LOCATION	0x101
+#define		RS690_MC_AGP_START_MASK		0x0000FFFF
+#define		RS690_MC_AGP_START_SHIFT	0
+#define		RS690_MC_AGP_TOP_MASK		0xFFFF0000
+#define		RS690_MC_AGP_TOP_SHIFT		16
+#define RS690_MCCFG_AGP_BASE		0x102
+#define RS690_MCCFG_AGP_BASE_2		0x103
+#define RS690_MC_INIT_MISC_LAT_TIMER            0x104
+#define RS690_HDP_FB_LOCATION		0x0134
+#define RS690_MC_INDEX				0x78
+#	define RS690_MC_INDEX_MASK		0x1ff
+#	define RS690_MC_INDEX_WR_EN		(1 << 9)
+#	define RS690_MC_INDEX_WR_ACK		0x7f
+#define RS690_MC_DATA				0x7c
+#define RS690_MC_STATUS                         0x90
+#define RS690_MC_STATUS_IDLE                    (1 << 0)
+#define RS480_AGP_BASE_2		0x0164
+#define RS480_MC_MISC_CNTL              0x18
+#	define RS480_DISABLE_GTW	(1 << 1)
+#	define RS480_GART_INDEX_REG_EN	(1 << 12)
+#	define RS690_BLOCK_GFX_D3_EN	(1 << 14)
+#define RS480_GART_FEATURE_ID           0x2b
+#	define RS480_HANG_EN	        (1 << 11)
+#	define RS480_TLB_ENABLE	        (1 << 18)
+#	define RS480_P2P_ENABLE	        (1 << 19)
+#	define RS480_GTW_LAC_EN	        (1 << 25)
+#	define RS480_2LEVEL_GART	(0 << 30)
+#	define RS480_1LEVEL_GART	(1 << 30)
+#	define RS480_PDC_EN	        (1 << 31)
+#define RS480_GART_BASE                 0x2c
+#define RS480_GART_CACHE_CNTRL          0x2e
+#	define RS480_GART_CACHE_INVALIDATE (1 << 0) /* wait for it to clear */
+#define RS480_AGP_ADDRESS_SPACE_SIZE    0x38
+#	define RS480_GART_EN	        (1 << 0)
+#	define RS480_VA_SIZE_32MB	(0 << 1)
+#	define RS480_VA_SIZE_64MB	(1 << 1)
+#	define RS480_VA_SIZE_128MB	(2 << 1)
+#	define RS480_VA_SIZE_256MB	(3 << 1)
+#	define RS480_VA_SIZE_512MB	(4 << 1)
+#	define RS480_VA_SIZE_1GB	(5 << 1)
+#	define RS480_VA_SIZE_2GB	(6 << 1)
+#define RS480_AGP_MODE_CNTL             0x39
+#	define RS480_POST_GART_Q_SIZE	(1 << 18)
+#	define RS480_NONGART_SNOOP	(1 << 19)
+#	define RS480_AGP_RD_BUF_SIZE	(1 << 20)
+#	define RS480_REQ_TYPE_SNOOP_SHIFT 22
+#	define RS480_REQ_TYPE_SNOOP_MASK  0x3
+#	define RS480_REQ_TYPE_SNOOP_DIS	(1 << 24)
+
+#define RS690_AIC_CTRL_SCRATCH		0x3A
+#	define RS690_DIS_OUT_OF_PCI_GART_ACCESS	(1 << 1)
+
+/*
+ * RS600
+ */
+#define RS600_MC_STATUS                         0x0
+#define RS600_MC_STATUS_IDLE                    (1 << 0)
+#define RS600_MC_INDEX                          0x70
+#       define RS600_MC_ADDR_MASK               0xffff
+#       define RS600_MC_IND_SEQ_RBS_0           (1 << 16)
+#       define RS600_MC_IND_SEQ_RBS_1           (1 << 17)
+#       define RS600_MC_IND_SEQ_RBS_2           (1 << 18)
+#       define RS600_MC_IND_SEQ_RBS_3           (1 << 19)
+#       define RS600_MC_IND_AIC_RBS             (1 << 20)
+#       define RS600_MC_IND_CITF_ARB0           (1 << 21)
+#       define RS600_MC_IND_CITF_ARB1           (1 << 22)
+#       define RS600_MC_IND_WR_EN               (1 << 23)
+#define RS600_MC_DATA                           0x74
+#define RS600_MC_STATUS                         0x0
+#       define RS600_MC_IDLE                    (1 << 1)
+#define RS600_MC_FB_LOCATION                    0x4
+#define		RS600_MC_FB_START_MASK		0x0000FFFF
+#define		RS600_MC_FB_START_SHIFT		0
+#define		RS600_MC_FB_TOP_MASK		0xFFFF0000
+#define		RS600_MC_FB_TOP_SHIFT		16
+#define RS600_MC_AGP_LOCATION                   0x5
+#define		RS600_MC_AGP_START_MASK		0x0000FFFF
+#define		RS600_MC_AGP_START_SHIFT	0
+#define		RS600_MC_AGP_TOP_MASK		0xFFFF0000
+#define		RS600_MC_AGP_TOP_SHIFT		16
+#define RS600_MC_AGP_BASE                          0x6
+#define RS600_MC_AGP_BASE_2                        0x7
+#define RS600_MC_CNTL1                          0x9
+#       define RS600_ENABLE_PAGE_TABLES         (1 << 26)
+#define RS600_MC_PT0_CNTL                       0x100
+#       define RS600_ENABLE_PT                  (1 << 0)
+#       define RS600_EFFECTIVE_L2_CACHE_SIZE(x) ((x) << 15)
+#       define RS600_EFFECTIVE_L2_QUEUE_SIZE(x) ((x) << 21)
+#       define RS600_INVALIDATE_ALL_L1_TLBS     (1 << 28)
+#       define RS600_INVALIDATE_L2_CACHE        (1 << 29)
+#define RS600_MC_PT0_CONTEXT0_CNTL              0x102
+#       define RS600_ENABLE_PAGE_TABLE          (1 << 0)
+#       define RS600_PAGE_TABLE_TYPE_FLAT       (0 << 1)
+#define RS600_MC_PT0_SYSTEM_APERTURE_LOW_ADDR   0x112
+#define RS600_MC_PT0_SYSTEM_APERTURE_HIGH_ADDR  0x114
+#define RS600_MC_PT0_CONTEXT0_DEFAULT_READ_ADDR 0x11c
+#define RS600_MC_PT0_CONTEXT0_FLAT_BASE_ADDR    0x12c
+#define RS600_MC_PT0_CONTEXT0_FLAT_START_ADDR   0x13c
+#define RS600_MC_PT0_CONTEXT0_FLAT_END_ADDR     0x14c
+#define RS600_MC_PT0_CLIENT0_CNTL               0x16c
+#       define RS600_ENABLE_TRANSLATION_MODE_OVERRIDE       (1 << 0)
+#       define RS600_TRANSLATION_MODE_OVERRIDE              (1 << 1)
+#       define RS600_SYSTEM_ACCESS_MODE_MASK                (3 << 8)
+#       define RS600_SYSTEM_ACCESS_MODE_PA_ONLY             (0 << 8)
+#       define RS600_SYSTEM_ACCESS_MODE_USE_SYS_MAP         (1 << 8)
+#       define RS600_SYSTEM_ACCESS_MODE_IN_SYS              (2 << 8)
+#       define RS600_SYSTEM_ACCESS_MODE_NOT_IN_SYS          (3 << 8)
+#       define RS600_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASSTHROUGH        (0 << 10)
+#       define RS600_SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE       (1 << 10)
+#       define RS600_EFFECTIVE_L1_CACHE_SIZE(x) ((x) << 11)
+#       define RS600_ENABLE_FRAGMENT_PROCESSING (1 << 14)
+#       define RS600_EFFECTIVE_L1_QUEUE_SIZE(x) ((x) << 15)
+#       define RS600_INVALIDATE_L1_TLB          (1 << 20)
+/* rs600/rs690/rs740 */
+#	define RS600_BUS_MASTER_DIS		(1 << 14)
+#	define RS600_MSI_REARM		        (1 << 20)
+/* see RS400_MSI_REARM in AIC_CNTL for rs480 */
+
+
+
+#define RV515_MC_FB_LOCATION		0x01
+#define		RV515_MC_FB_START_MASK		0x0000FFFF
+#define		RV515_MC_FB_START_SHIFT		0
+#define		RV515_MC_FB_TOP_MASK		0xFFFF0000
+#define		RV515_MC_FB_TOP_SHIFT		16
+#define RV515_MC_AGP_LOCATION		0x02
+#define		RV515_MC_AGP_START_MASK		0x0000FFFF
+#define		RV515_MC_AGP_START_SHIFT	0
+#define		RV515_MC_AGP_TOP_MASK		0xFFFF0000
+#define		RV515_MC_AGP_TOP_SHIFT		16
+#define RV515_MC_AGP_BASE		0x03
+#define RV515_MC_AGP_BASE_2		0x04
+
+#define R520_MC_FB_LOCATION		0x04
+#define		R520_MC_FB_START_MASK		0x0000FFFF
+#define		R520_MC_FB_START_SHIFT		0
+#define		R520_MC_FB_TOP_MASK		0xFFFF0000
+#define		R520_MC_FB_TOP_SHIFT		16
+#define R520_MC_AGP_LOCATION		0x05
+#define		R520_MC_AGP_START_MASK		0x0000FFFF
+#define		R520_MC_AGP_START_SHIFT		0
+#define		R520_MC_AGP_TOP_MASK		0xFFFF0000
+#define		R520_MC_AGP_TOP_SHIFT		16
+#define R520_MC_AGP_BASE		0x06
+#define R520_MC_AGP_BASE_2		0x07
+
+
+#define AVIVO_MC_INDEX						0x0070
+#define R520_MC_STATUS 0x00
+#define R520_MC_STATUS_IDLE (1<<1)
+#define RV515_MC_STATUS 0x08
+#define RV515_MC_STATUS_IDLE (1<<4)
+#define RV515_MC_INIT_MISC_LAT_TIMER            0x09
+#define AVIVO_MC_DATA						0x0074
+
+#define R520_MC_IND_INDEX 0x70
+#define R520_MC_IND_WR_EN (1 << 24)
+#define R520_MC_IND_DATA  0x74
+
+#define RV515_MC_CNTL          0x5
+#	define RV515_MEM_NUM_CHANNELS_MASK  0x3
+#define R520_MC_CNTL0          0x8
+#	define R520_MEM_NUM_CHANNELS_MASK  (0x3 << 24)
+#	define R520_MEM_NUM_CHANNELS_SHIFT  24
+#	define R520_MC_CHANNEL_SIZE  (1 << 23)
+
+#define AVIVO_CP_DYN_CNTL                              0x000f /* PLL */
+#       define AVIVO_CP_FORCEON                        (1 << 0)
+#define AVIVO_E2_DYN_CNTL                              0x0011 /* PLL */
+#       define AVIVO_E2_FORCEON                        (1 << 0)
+#define AVIVO_IDCT_DYN_CNTL                            0x0013 /* PLL */
+#       define AVIVO_IDCT_FORCEON                      (1 << 0)
+
+#define AVIVO_HDP_FB_LOCATION 0x134
+
+#define AVIVO_VGA_RENDER_CONTROL				0x0300
+#       define AVIVO_VGA_VSTATUS_CNTL_MASK                      (3 << 16)
+#define AVIVO_D1VGA_CONTROL					0x0330
+#       define AVIVO_DVGA_CONTROL_MODE_ENABLE (1<<0)
+#       define AVIVO_DVGA_CONTROL_TIMING_SELECT (1<<8)
+#       define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1<<9)
+#       define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1<<10)
+#       define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1<<16)
+#       define AVIVO_DVGA_CONTROL_ROTATE (1<<24)
+#define AVIVO_D2VGA_CONTROL					0x0338
+
+#define AVIVO_EXT1_PPLL_REF_DIV_SRC                             0x400
+#define AVIVO_EXT1_PPLL_REF_DIV                                 0x404
+#define AVIVO_EXT1_PPLL_UPDATE_LOCK                             0x408
+#define AVIVO_EXT1_PPLL_UPDATE_CNTL                             0x40c
+
+#define AVIVO_EXT2_PPLL_REF_DIV_SRC                             0x410
+#define AVIVO_EXT2_PPLL_REF_DIV                                 0x414
+#define AVIVO_EXT2_PPLL_UPDATE_LOCK                             0x418
+#define AVIVO_EXT2_PPLL_UPDATE_CNTL                             0x41c
+
+#define AVIVO_EXT1_PPLL_FB_DIV                                   0x430
+#define AVIVO_EXT2_PPLL_FB_DIV                                   0x434
+
+#define AVIVO_EXT1_PPLL_POST_DIV_SRC                                 0x438
+#define AVIVO_EXT1_PPLL_POST_DIV                                     0x43c
+
+#define AVIVO_EXT2_PPLL_POST_DIV_SRC                                 0x440
+#define AVIVO_EXT2_PPLL_POST_DIV                                     0x444
+
+#define AVIVO_EXT1_PPLL_CNTL                                    0x448
+#define AVIVO_EXT2_PPLL_CNTL                                    0x44c
+
+#define AVIVO_P1PLL_CNTL                                        0x450
+#define AVIVO_P2PLL_CNTL                                        0x454
+#define AVIVO_P1PLL_INT_SS_CNTL                                 0x458
+#define AVIVO_P2PLL_INT_SS_CNTL                                 0x45c
+#define AVIVO_P1PLL_TMDSA_CNTL                                  0x460
+#define AVIVO_P2PLL_LVTMA_CNTL                                  0x464
+
+#define AVIVO_PCLK_CRTC1_CNTL                                   0x480
+#define AVIVO_PCLK_CRTC2_CNTL                                   0x484
+
+#define AVIVO_D1CRTC_H_TOTAL					0x6000
+#define AVIVO_D1CRTC_H_BLANK_START_END                          0x6004
+#define AVIVO_D1CRTC_H_SYNC_A                                   0x6008
+#define AVIVO_D1CRTC_H_SYNC_A_CNTL                              0x600c
+#define AVIVO_D1CRTC_H_SYNC_B                                   0x6010
+#define AVIVO_D1CRTC_H_SYNC_B_CNTL                              0x6014
+
+#define AVIVO_D1CRTC_V_TOTAL					0x6020
+#define AVIVO_D1CRTC_V_BLANK_START_END                          0x6024
+#define AVIVO_D1CRTC_V_SYNC_A                                   0x6028
+#define AVIVO_D1CRTC_V_SYNC_A_CNTL                              0x602c
+#define AVIVO_D1CRTC_V_SYNC_B                                   0x6030
+#define AVIVO_D1CRTC_V_SYNC_B_CNTL                              0x6034
+
+#define AVIVO_D1CRTC_CONTROL                                    0x6080
+#       define AVIVO_CRTC_EN                                    (1 << 0)
+#define AVIVO_D1CRTC_BLANK_CONTROL                              0x6084
+#define AVIVO_D1CRTC_INTERLACE_CONTROL                          0x6088
+#define AVIVO_D1CRTC_INTERLACE_STATUS                           0x608c
+#define AVIVO_D1CRTC_STEREO_CONTROL                             0x60c4
+
+/* master controls */
+#define AVIVO_DC_CRTC_MASTER_EN                                 0x60f8
+#define AVIVO_DC_CRTC_TV_CONTROL                                0x60fc
+
+#define AVIVO_D1GRPH_ENABLE                                     0x6100
+#define AVIVO_D1GRPH_CONTROL                                    0x6104
+#       define AVIVO_D1GRPH_CONTROL_DEPTH_8BPP                  (0 << 0)
+#       define AVIVO_D1GRPH_CONTROL_DEPTH_16BPP                 (1 << 0)
+#       define AVIVO_D1GRPH_CONTROL_DEPTH_32BPP                 (2 << 0)
+#       define AVIVO_D1GRPH_CONTROL_DEPTH_64BPP                 (3 << 0)
+
+#       define AVIVO_D1GRPH_CONTROL_8BPP_INDEXED                (0 << 8)
+
+#       define AVIVO_D1GRPH_CONTROL_16BPP_ARGB1555              (0 << 8)
+#       define AVIVO_D1GRPH_CONTROL_16BPP_RGB565                (1 << 8)
+#       define AVIVO_D1GRPH_CONTROL_16BPP_ARGB4444              (2 << 8)
+#       define AVIVO_D1GRPH_CONTROL_16BPP_AI88                  (3 << 8)
+#       define AVIVO_D1GRPH_CONTROL_16BPP_MONO16                (4 << 8)
+
+#       define AVIVO_D1GRPH_CONTROL_32BPP_ARGB8888              (0 << 8)
+#       define AVIVO_D1GRPH_CONTROL_32BPP_ARGB2101010           (1 << 8)
+#       define AVIVO_D1GRPH_CONTROL_32BPP_DIGITAL               (2 << 8)
+#       define AVIVO_D1GRPH_CONTROL_32BPP_8B_ARGB2101010        (3 << 8)
+
+
+#       define AVIVO_D1GRPH_CONTROL_64BPP_ARGB16161616          (0 << 8)
+
+#       define AVIVO_D1GRPH_SWAP_RB                             (1 << 16)
+#       define AVIVO_D1GRPH_TILED                               (1 << 20)
+#       define AVIVO_D1GRPH_MACRO_ADDRESS_MODE                  (1 << 21)
+
+#define AVIVO_D1GRPH_LUT_SEL                                    0x6108
+#define AVIVO_D1GRPH_PRIMARY_SURFACE_ADDRESS                    0x6110
+#define AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS                  0x6118
+#define AVIVO_D1GRPH_PITCH                                      0x6120
+#define AVIVO_D1GRPH_SURFACE_OFFSET_X                           0x6124
+#define AVIVO_D1GRPH_SURFACE_OFFSET_Y                           0x6128
+#define AVIVO_D1GRPH_X_START                                    0x612c
+#define AVIVO_D1GRPH_Y_START                                    0x6130
+#define AVIVO_D1GRPH_X_END                                      0x6134
+#define AVIVO_D1GRPH_Y_END                                      0x6138
+#define AVIVO_D1GRPH_UPDATE                                     0x6144
+#       define AVIVO_D1GRPH_UPDATE_LOCK                         (1 << 16)
+#define AVIVO_D1GRPH_FLIP_CONTROL                               0x6148
+
+#define AVIVO_D1CUR_CONTROL                     0x6400
+#       define AVIVO_D1CURSOR_EN                (1 << 0)
+#       define AVIVO_D1CURSOR_MODE_SHIFT        8
+#       define AVIVO_D1CURSOR_MODE_MASK         (3 << 8)
+#       define AVIVO_D1CURSOR_MODE_24BPP        2
+#define AVIVO_D1CUR_SURFACE_ADDRESS             0x6408
+#define AVIVO_D1CUR_SIZE                        0x6410
+#define AVIVO_D1CUR_POSITION                    0x6414
+#define AVIVO_D1CUR_HOT_SPOT                    0x6418
+#define AVIVO_D1CUR_UPDATE                      0x6424
+#       define AVIVO_D1CURSOR_UPDATE_LOCK       (1 << 16)
+
+#define AVIVO_DC_LUT_RW_SELECT                  0x6480
+#define AVIVO_DC_LUT_RW_MODE                    0x6484
+#define AVIVO_DC_LUT_RW_INDEX                   0x6488
+#define AVIVO_DC_LUT_SEQ_COLOR                  0x648c
+#define AVIVO_DC_LUT_PWL_DATA                   0x6490
+#define AVIVO_DC_LUT_30_COLOR                   0x6494
+#define AVIVO_DC_LUT_READ_PIPE_SELECT           0x6498
+#define AVIVO_DC_LUT_WRITE_EN_MASK              0x649c
+#define AVIVO_DC_LUT_AUTOFILL                   0x64a0
+
+#define AVIVO_DC_LUTA_CONTROL                   0x64c0
+#define AVIVO_DC_LUTA_BLACK_OFFSET_BLUE         0x64c4
+#define AVIVO_DC_LUTA_BLACK_OFFSET_GREEN        0x64c8
+#define AVIVO_DC_LUTA_BLACK_OFFSET_RED          0x64cc
+#define AVIVO_DC_LUTA_WHITE_OFFSET_BLUE         0x64d0
+#define AVIVO_DC_LUTA_WHITE_OFFSET_GREEN        0x64d4
+#define AVIVO_DC_LUTA_WHITE_OFFSET_RED          0x64d8
+
+#define AVIVO_DC_LB_MEMORY_SPLIT                0x6520
+#       define AVIVO_DC_LB_MEMORY_SPLIT_MASK    0x3
+#       define AVIVO_DC_LB_MEMORY_SPLIT_SHIFT   0
+#       define AVIVO_DC_LB_MEMORY_SPLIT_D1HALF_D2HALF  0
+#       define AVIVO_DC_LB_MEMORY_SPLIT_D1_3Q_D2_1Q    1
+#       define AVIVO_DC_LB_MEMORY_SPLIT_D1_ONLY        2
+#       define AVIVO_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q    3
+#       define AVIVO_DC_LB_MEMORY_SPLIT_SHIFT_MODE (1 << 2)
+#       define AVIVO_DC_LB_DISP1_END_ADR_SHIFT  4
+#       define AVIVO_DC_LB_DISP1_END_ADR_MASK   0x7ff
+
+#define R500_DxMODE_INT_MASK 0x6540
+#define R500_D1MODE_INT_MASK (1<<0)
+#define R500_D2MODE_INT_MASK (1<<8)
+
+#define AVIVO_D1MODE_DATA_FORMAT                0x6528
+#       define AVIVO_D1MODE_INTERLEAVE_EN       (1 << 0)
+#define AVIVO_D1MODE_DESKTOP_HEIGHT             0x652C
+#define AVIVO_D1MODE_VIEWPORT_START             0x6580
+#define AVIVO_D1MODE_VIEWPORT_SIZE              0x6584
+#define AVIVO_D1MODE_EXT_OVERSCAN_LEFT_RIGHT    0x6588
+#define AVIVO_D1MODE_EXT_OVERSCAN_TOP_BOTTOM    0x658c
+
+#define AVIVO_D1SCL_SCALER_ENABLE               0x6590
+#define AVIVO_D1SCL_SCALER_TAP_CONTROL		0x6594
+#define AVIVO_D1SCL_UPDATE                      0x65cc
+#       define AVIVO_D1SCL_UPDATE_LOCK          (1 << 16)
+
+/* second crtc */
+#define AVIVO_D2CRTC_H_TOTAL					0x6800
+#define AVIVO_D2CRTC_H_BLANK_START_END                          0x6804
+#define AVIVO_D2CRTC_H_SYNC_A                                   0x6808
+#define AVIVO_D2CRTC_H_SYNC_A_CNTL                              0x680c
+#define AVIVO_D2CRTC_H_SYNC_B                                   0x6810
+#define AVIVO_D2CRTC_H_SYNC_B_CNTL                              0x6814
+
+#define AVIVO_D2CRTC_V_TOTAL					0x6820
+#define AVIVO_D2CRTC_V_BLANK_START_END                          0x6824
+#define AVIVO_D2CRTC_V_SYNC_A                                   0x6828
+#define AVIVO_D2CRTC_V_SYNC_A_CNTL                              0x682c
+#define AVIVO_D2CRTC_V_SYNC_B                                   0x6830
+#define AVIVO_D2CRTC_V_SYNC_B_CNTL                              0x6834
+
+#define AVIVO_D2CRTC_CONTROL                                    0x6880
+#define AVIVO_D2CRTC_BLANK_CONTROL                              0x6884
+#define AVIVO_D2CRTC_INTERLACE_CONTROL                          0x6888
+#define AVIVO_D2CRTC_INTERLACE_STATUS                           0x688c
+#define AVIVO_D2CRTC_STEREO_CONTROL                             0x68c4
+
+#define AVIVO_D2GRPH_ENABLE                                     0x6900
+#define AVIVO_D2GRPH_CONTROL                                    0x6904
+#define AVIVO_D2GRPH_LUT_SEL                                    0x6908
+#define AVIVO_D2GRPH_PRIMARY_SURFACE_ADDRESS                    0x6910
+#define AVIVO_D2GRPH_SECONDARY_SURFACE_ADDRESS                  0x6918
+#define AVIVO_D2GRPH_PITCH                                      0x6920
+#define AVIVO_D2GRPH_SURFACE_OFFSET_X                           0x6924
+#define AVIVO_D2GRPH_SURFACE_OFFSET_Y                           0x6928
+#define AVIVO_D2GRPH_X_START                                    0x692c
+#define AVIVO_D2GRPH_Y_START                                    0x6930
+#define AVIVO_D2GRPH_X_END                                      0x6934
+#define AVIVO_D2GRPH_Y_END                                      0x6938
+#define AVIVO_D2GRPH_UPDATE                                     0x6944
+#define AVIVO_D2GRPH_FLIP_CONTROL                               0x6948
+
+#define AVIVO_D2CUR_CONTROL                     0x6c00
+#define AVIVO_D2CUR_SURFACE_ADDRESS             0x6c08
+#define AVIVO_D2CUR_SIZE                        0x6c10
+#define AVIVO_D2CUR_POSITION                    0x6c14
+
+#define AVIVO_D2MODE_VIEWPORT_START             0x6d80
+#define AVIVO_D2MODE_VIEWPORT_SIZE              0x6d84
+#define AVIVO_D2MODE_EXT_OVERSCAN_LEFT_RIGHT    0x6d88
+#define AVIVO_D2MODE_EXT_OVERSCAN_TOP_BOTTOM    0x6d8c
+
+#define AVIVO_D2SCL_SCALER_ENABLE               0x6d90
+#define AVIVO_D2SCL_SCALER_TAP_CONTROL		0x6d94
+
+#define AVIVO_DDIA_BIT_DEPTH_CONTROL				0x7214
+
+#define AVIVO_DACA_ENABLE					0x7800
+#	define AVIVO_DAC_ENABLE				(1 << 0)
+#define AVIVO_DACA_SOURCE_SELECT				0x7804
+#       define AVIVO_DAC_SOURCE_CRTC1                   (0 << 0)
+#       define AVIVO_DAC_SOURCE_CRTC2                   (1 << 0)
+#       define AVIVO_DAC_SOURCE_TV                      (2 << 0)
+
+#define AVIVO_DACA_FORCE_OUTPUT_CNTL				0x783c
+# define AVIVO_DACA_FORCE_OUTPUT_CNTL_FORCE_DATA_EN             (1 << 0)
+# define AVIVO_DACA_FORCE_OUTPUT_CNTL_DATA_SEL_SHIFT            (8)
+# define AVIVO_DACA_FORCE_OUTPUT_CNTL_DATA_SEL_BLUE             (1 << 0)
+# define AVIVO_DACA_FORCE_OUTPUT_CNTL_DATA_SEL_GREEN            (1 << 1)
+# define AVIVO_DACA_FORCE_OUTPUT_CNTL_DATA_SEL_RED              (1 << 2)
+# define AVIVO_DACA_FORCE_OUTPUT_CNTL_DATA_ON_BLANKB_ONLY       (1 << 24)
+#define AVIVO_DACA_POWERDOWN					0x7850
+# define AVIVO_DACA_POWERDOWN_POWERDOWN                         (1 << 0)
+# define AVIVO_DACA_POWERDOWN_BLUE                              (1 << 8)
+# define AVIVO_DACA_POWERDOWN_GREEN                             (1 << 16)
+# define AVIVO_DACA_POWERDOWN_RED                               (1 << 24)
+
+#define AVIVO_DACB_ENABLE					0x7a00
+#define AVIVO_DACB_SOURCE_SELECT				0x7a04
+#define AVIVO_DACB_FORCE_OUTPUT_CNTL				0x7a3c
+# define AVIVO_DACB_FORCE_OUTPUT_CNTL_FORCE_DATA_EN             (1 << 0)
+# define AVIVO_DACB_FORCE_OUTPUT_CNTL_DATA_SEL_SHIFT            (8)
+# define AVIVO_DACB_FORCE_OUTPUT_CNTL_DATA_SEL_BLUE             (1 << 0)
+# define AVIVO_DACB_FORCE_OUTPUT_CNTL_DATA_SEL_GREEN            (1 << 1)
+# define AVIVO_DACB_FORCE_OUTPUT_CNTL_DATA_SEL_RED              (1 << 2)
+# define AVIVO_DACB_FORCE_OUTPUT_CNTL_DATA_ON_BLANKB_ONLY       (1 << 24)
+#define AVIVO_DACB_POWERDOWN					0x7a50
+# define AVIVO_DACB_POWERDOWN_POWERDOWN                         (1 << 0)
+# define AVIVO_DACB_POWERDOWN_BLUE                              (1 << 8)
+# define AVIVO_DACB_POWERDOWN_GREEN                             (1 << 16)
+# define AVIVO_DACB_POWERDOWN_RED
+
+#define AVIVO_TMDSA_CNTL                    0x7880
+#   define AVIVO_TMDSA_CNTL_ENABLE               (1 << 0)
+#   define AVIVO_TMDSA_CNTL_HPD_MASK             (1 << 4)
+#   define AVIVO_TMDSA_CNTL_HPD_SELECT           (1 << 8)
+#   define AVIVO_TMDSA_CNTL_SYNC_PHASE           (1 << 12)
+#   define AVIVO_TMDSA_CNTL_PIXEL_ENCODING       (1 << 16)
+#   define AVIVO_TMDSA_CNTL_DUAL_LINK_ENABLE     (1 << 24)
+#   define AVIVO_TMDSA_CNTL_SWAP                 (1 << 28)
+#define AVIVO_TMDSA_SOURCE_SELECT				0x7884
+/* 78a8 appears to be some kind of (reasonably tolerant) clock?
+ * 78d0 definitely hits the transmitter, definitely clock. */
+/* MYSTERY1 This appears to control dithering? */
+#define AVIVO_TMDSA_BIT_DEPTH_CONTROL		0x7894
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_TRUNCATE_EN           (1 << 0)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_TRUNCATE_DEPTH        (1 << 4)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_SPATIAL_DITHER_EN     (1 << 8)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_SPATIAL_DITHER_DEPTH  (1 << 12)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_TEMPORAL_DITHER_EN    (1 << 16)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_TEMPORAL_DITHER_DEPTH (1 << 20)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_TEMPORAL_LEVEL        (1 << 24)
+#   define AVIVO_TMDS_BIT_DEPTH_CONTROL_TEMPORAL_DITHER_RESET (1 << 26)
+#define AVIVO_TMDSA_DCBALANCER_CONTROL                  0x78d0
+#   define AVIVO_TMDSA_DCBALANCER_CONTROL_EN                  (1 << 0)
+#   define AVIVO_TMDSA_DCBALANCER_CONTROL_TEST_EN             (1 << 8)
+#   define AVIVO_TMDSA_DCBALANCER_CONTROL_TEST_IN_SHIFT       (16)
+#   define AVIVO_TMDSA_DCBALANCER_CONTROL_FORCE               (1 << 24)
+#define AVIVO_TMDSA_DATA_SYNCHRONIZATION                0x78d8
+#   define AVIVO_TMDSA_DATA_SYNCHRONIZATION_DSYNSEL           (1 << 0)
+#   define AVIVO_TMDSA_DATA_SYNCHRONIZATION_PFREQCHG          (1 << 8)
+#define AVIVO_TMDSA_CLOCK_ENABLE            0x7900
+#define AVIVO_TMDSA_TRANSMITTER_ENABLE              0x7904
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_TX0_ENABLE          (1 << 0)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKC0EN             (1 << 1)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKD00EN            (1 << 2)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKD01EN            (1 << 3)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKD02EN            (1 << 4)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_TX1_ENABLE          (1 << 8)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKD10EN            (1 << 10)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKD11EN            (1 << 11)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKD12EN            (1 << 12)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_TX_ENABLE_HPD_MASK  (1 << 16)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKCEN_HPD_MASK     (1 << 17)
+#   define AVIVO_TMDSA_TRANSMITTER_ENABLE_LNKDEN_HPD_MASK     (1 << 18)
+
+#define AVIVO_TMDSA_TRANSMITTER_CONTROL				0x7910
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_PLL_ENABLE	(1 << 0)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_PLL_RESET	(1 << 1)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_PLL_HPD_MASK_SHIFT	(2)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_IDSCKSEL	        (1 << 4)
+#       define AVIVO_TMDSA_TRANSMITTER_CONTROL_BGSLEEP          (1 << 5)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_PLL_PWRUP_SEQ_EN	(1 << 6)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_TMCLK	        (1 << 8)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_TMCLK_FROM_PADS	(1 << 13)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_TDCLK	        (1 << 14)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_TDCLK_FROM_PADS	(1 << 15)
+#       define AVIVO_TMDSA_TRANSMITTER_CONTROL_CLK_PATTERN_SHIFT (16)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_BYPASS_PLL	(1 << 28)
+#       define AVIVO_TMDSA_TRANSMITTER_CONTROL_USE_CLK_DATA     (1 << 29)
+#	define AVIVO_TMDSA_TRANSMITTER_CONTROL_INPUT_TEST_CLK_SEL	(1 << 31)
+
+#define AVIVO_LVTMA_CNTL					0x7a80
+#   define AVIVO_LVTMA_CNTL_ENABLE               (1 << 0)
+#   define AVIVO_LVTMA_CNTL_HPD_MASK             (1 << 4)
+#   define AVIVO_LVTMA_CNTL_HPD_SELECT           (1 << 8)
+#   define AVIVO_LVTMA_CNTL_SYNC_PHASE           (1 << 12)
+#   define AVIVO_LVTMA_CNTL_PIXEL_ENCODING       (1 << 16)
+#   define AVIVO_LVTMA_CNTL_DUAL_LINK_ENABLE     (1 << 24)
+#   define AVIVO_LVTMA_CNTL_SWAP                 (1 << 28)
+#define AVIVO_LVTMA_SOURCE_SELECT                               0x7a84
+#define AVIVO_LVTMA_COLOR_FORMAT                                0x7a88
+#define AVIVO_LVTMA_BIT_DEPTH_CONTROL                           0x7a94
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_TRUNCATE_EN           (1 << 0)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_TRUNCATE_DEPTH        (1 << 4)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_SPATIAL_DITHER_EN     (1 << 8)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_SPATIAL_DITHER_DEPTH  (1 << 12)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_TEMPORAL_DITHER_EN    (1 << 16)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_TEMPORAL_DITHER_DEPTH (1 << 20)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_TEMPORAL_LEVEL        (1 << 24)
+#   define AVIVO_LVTMA_BIT_DEPTH_CONTROL_TEMPORAL_DITHER_RESET (1 << 26)
+
+
+
+#define AVIVO_LVTMA_DCBALANCER_CONTROL                  0x7ad0
+#   define AVIVO_LVTMA_DCBALANCER_CONTROL_EN                  (1 << 0)
+#   define AVIVO_LVTMA_DCBALANCER_CONTROL_TEST_EN             (1 << 8)
+#   define AVIVO_LVTMA_DCBALANCER_CONTROL_TEST_IN_SHIFT       (16)
+#   define AVIVO_LVTMA_DCBALANCER_CONTROL_FORCE               (1 << 24)
+
+#define AVIVO_LVTMA_DATA_SYNCHRONIZATION                0x78d8
+#   define AVIVO_LVTMA_DATA_SYNCHRONIZATION_DSYNSEL           (1 << 0)
+#   define AVIVO_LVTMA_DATA_SYNCHRONIZATION_PFREQCHG          (1 << 8)
+#define R500_LVTMA_CLOCK_ENABLE			0x7b00
+#define R600_LVTMA_CLOCK_ENABLE			0x7b04
+
+#define R500_LVTMA_TRANSMITTER_ENABLE              0x7b04
+#define R600_LVTMA_TRANSMITTER_ENABLE              0x7b08
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKC0EN             (1 << 1)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD00EN            (1 << 2)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD01EN            (1 << 3)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD02EN            (1 << 4)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD03EN            (1 << 5)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKC1EN             (1 << 9)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD10EN            (1 << 10)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD11EN            (1 << 11)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKD12EN            (1 << 12)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKCEN_HPD_MASK     (1 << 17)
+#   define AVIVO_LVTMA_TRANSMITTER_ENABLE_LNKDEN_HPD_MASK     (1 << 18)
+
+#define R500_LVTMA_TRANSMITTER_CONTROL			        0x7b10
+#define R600_LVTMA_TRANSMITTER_CONTROL			        0x7b14
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_PLL_ENABLE	  (1 << 0)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_PLL_RESET	  (1 << 1)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_PLL_HPD_MASK_SHIFT (2)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_IDSCKSEL	          (1 << 4)
+#       define AVIVO_LVTMA_TRANSMITTER_CONTROL_BGSLEEP            (1 << 5)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_PLL_PWRUP_SEQ_EN	  (1 << 6)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_TMCLK	          (1 << 8)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_TMCLK_FROM_PADS	  (1 << 13)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_TDCLK	          (1 << 14)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_TDCLK_FROM_PADS	  (1 << 15)
+#       define AVIVO_LVTMA_TRANSMITTER_CONTROL_CLK_PATTERN_SHIFT  (16)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_BYPASS_PLL	  (1 << 28)
+#       define AVIVO_LVTMA_TRANSMITTER_CONTROL_USE_CLK_DATA       (1 << 29)
+#	define AVIVO_LVTMA_TRANSMITTER_CONTROL_INPUT_TEST_CLK_SEL (1 << 31)
+
+#define R500_LVTMA_PWRSEQ_CNTL						0x7af0
+#define R600_LVTMA_PWRSEQ_CNTL						0x7af4
+#	define AVIVO_LVTMA_PWRSEQ_EN					    (1 << 0)
+#	define AVIVO_LVTMA_PWRSEQ_PLL_ENABLE_MASK			    (1 << 2)
+#	define AVIVO_LVTMA_PWRSEQ_PLL_RESET_MASK			    (1 << 3)
+#	define AVIVO_LVTMA_PWRSEQ_TARGET_STATE				    (1 << 4)
+#	define AVIVO_LVTMA_SYNCEN					    (1 << 8)
+#	define AVIVO_LVTMA_SYNCEN_OVRD					    (1 << 9)
+#	define AVIVO_LVTMA_SYNCEN_POL					    (1 << 10)
+#	define AVIVO_LVTMA_DIGON					    (1 << 16)
+#	define AVIVO_LVTMA_DIGON_OVRD					    (1 << 17)
+#	define AVIVO_LVTMA_DIGON_POL					    (1 << 18)
+#	define AVIVO_LVTMA_BLON						    (1 << 24)
+#	define AVIVO_LVTMA_BLON_OVRD					    (1 << 25)
+#	define AVIVO_LVTMA_BLON_POL					    (1 << 26)
+
+#define R500_LVTMA_PWRSEQ_STATE                        0x7af4
+#define R600_LVTMA_PWRSEQ_STATE                        0x7af8
+#       define AVIVO_LVTMA_PWRSEQ_STATE_TARGET_STATE_R          (1 << 0)
+#       define AVIVO_LVTMA_PWRSEQ_STATE_DIGON                   (1 << 1)
+#       define AVIVO_LVTMA_PWRSEQ_STATE_SYNCEN                  (1 << 2)
+#       define AVIVO_LVTMA_PWRSEQ_STATE_BLON                    (1 << 3)
+#       define AVIVO_LVTMA_PWRSEQ_STATE_DONE                    (1 << 4)
+#       define AVIVO_LVTMA_PWRSEQ_STATE_STATUS_SHIFT            (8)
+
+#define AVIVO_LVDS_BACKLIGHT_CNTL			0x7af8
+#	define AVIVO_LVDS_BACKLIGHT_CNTL_EN			(1 << 0)
+#	define AVIVO_LVDS_BACKLIGHT_LEVEL_MASK		0x0000ff00
+#	define AVIVO_LVDS_BACKLIGHT_LEVEL_SHIFT		8
+
+#define AVIVO_DVOA_BIT_DEPTH_CONTROL			0x7988
+
+#define AVIVO_GPIO_0                        0x7e30
+#define AVIVO_GPIO_1                        0x7e40
+#define AVIVO_GPIO_2                        0x7e50
+#define AVIVO_GPIO_3                        0x7e60
+
+#define AVIVO_DC_GPIO_HPD_Y                 0x7e9c
+
+#define AVIVO_I2C_STATUS					0x7d30
+#	define AVIVO_I2C_STATUS_DONE				(1 << 0)
+#	define AVIVO_I2C_STATUS_NACK				(1 << 1)
+#	define AVIVO_I2C_STATUS_HALT				(1 << 2)
+#	define AVIVO_I2C_STATUS_GO				(1 << 3)
+#	define AVIVO_I2C_STATUS_MASK				0x7
+/* If radeon_mm_i2c is to be believed, this is HALT, NACK, and maybe
+ * DONE? */
+#	define AVIVO_I2C_STATUS_CMD_RESET			0x7
+#	define AVIVO_I2C_STATUS_CMD_WAIT			(1 << 3)
+#define AVIVO_I2C_STOP						0x7d34
+#define AVIVO_I2C_START_CNTL				0x7d38
+#	define AVIVO_I2C_START						(1 << 8)
+#	define AVIVO_I2C_CONNECTOR0					(0 << 16)
+#	define AVIVO_I2C_CONNECTOR1					(1 << 16)
+#define R520_I2C_START (1<<0)
+#define R520_I2C_STOP (1<<1)
+#define R520_I2C_RX (1<<2)
+#define R520_I2C_EN (1<<8)
+#define R520_I2C_DDC1 (0<<16)
+#define R520_I2C_DDC2 (1<<16)
+#define R520_I2C_DDC3 (2<<16)
+#define R520_I2C_DDC_MASK (3<<16)
+#define AVIVO_I2C_CONTROL2					0x7d3c
+#	define AVIVO_I2C_7D3C_SIZE_SHIFT			8
+#	define AVIVO_I2C_7D3C_SIZE_MASK				(0xf << 8)
+#define AVIVO_I2C_CONTROL3						0x7d40
+/* Reading is done 4 bytes at a time: read the bottom 8 bits from
+ * 7d44, four times in a row.
+ * Writing is a little more complex.  First write DATA with
+ * 0xnnnnnnzz, then 0xnnnnnnyy, where nnnnnn is some non-deterministic
+ * magic number, zz is, I think, the slave address, and yy is the byte
+ * you want to write. */
+#define AVIVO_I2C_DATA						0x7d44
+#define R520_I2C_ADDR_COUNT_MASK (0x7)
+#define R520_I2C_DATA_COUNT_SHIFT (8)
+#define R520_I2C_DATA_COUNT_MASK (0xF00)
+#define AVIVO_I2C_CNTL						0x7d50
+#	define AVIVO_I2C_EN							(1 << 0)
+#	define AVIVO_I2C_RESET						(1 << 8)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
new file mode 100644
index 00000000000..570a244bd88
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* r520,rv530,rv560,rv570,r580 depends on : */
+void r100_hdp_reset(struct radeon_device *rdev);
+int rv370_pcie_gart_enable(struct radeon_device *rdev);
+void rv370_pcie_gart_disable(struct radeon_device *rdev);
+void r420_pipes_init(struct radeon_device *rdev);
+void rs600_mc_disable_clients(struct radeon_device *rdev);
+void rs600_disable_vga(struct radeon_device *rdev);
+int rv515_debugfs_pipes_info_init(struct radeon_device *rdev);
+int rv515_debugfs_ga_info_init(struct radeon_device *rdev);
+
+/* This files gather functions specifics to:
+ * r520,rv530,rv560,rv570,r580
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void r520_gpu_init(struct radeon_device *rdev);
+int r520_mc_wait_for_idle(struct radeon_device *rdev);
+
+
+/*
+ * MC
+ */
+int r520_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+	if (rv515_debugfs_pipes_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for pipes !\n");
+	}
+	if (rv515_debugfs_ga_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for pipes !\n");
+	}
+
+	r520_gpu_init(rdev);
+	rv370_pcie_gart_disable(rdev);
+
+	/* Setup GPU memory space */
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	rdev->mc.gtt_location = 0xFFFFFFFFUL;
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			printk(KERN_WARNING "[drm] Disabling AGP\n");
+			rdev->flags &= ~RADEON_IS_AGP;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		} else {
+			rdev->mc.gtt_location = rdev->mc.agp_base;
+		}
+	}
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Program GPU memory space */
+	rs600_mc_disable_clients(rdev);
+	if (r520_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	/* Write VRAM size in case we are limiting it */
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(R520_MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(R520_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32_MC(R520_MC_FB_LOCATION, tmp);
+	WREG32(RS690_HDP_FB_LOCATION, rdev->mc.vram_location >> 16);
+	WREG32(0x310, rdev->mc.vram_location);
+	if (rdev->flags & RADEON_IS_AGP) {
+		tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+		tmp = REG_SET(R520_MC_AGP_TOP, tmp >> 16);
+		tmp |= REG_SET(R520_MC_AGP_START, rdev->mc.gtt_location >> 16);
+		WREG32_MC(R520_MC_AGP_LOCATION, tmp);
+		WREG32_MC(R520_MC_AGP_BASE, rdev->mc.agp_base);
+		WREG32_MC(R520_MC_AGP_BASE_2, 0);
+	} else {
+		WREG32_MC(R520_MC_AGP_LOCATION, 0x0FFFFFFF);
+		WREG32_MC(R520_MC_AGP_BASE, 0);
+		WREG32_MC(R520_MC_AGP_BASE_2, 0);
+	}
+	return 0;
+}
+
+void r520_mc_fini(struct radeon_device *rdev)
+{
+	rv370_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Global GPU functions
+ */
+void r520_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+int r520_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_MC(R520_MC_STATUS);
+		if (tmp & R520_MC_STATUS_IDLE) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+void r520_gpu_init(struct radeon_device *rdev)
+{
+	unsigned pipe_select_current, gb_pipe_select, tmp;
+
+	r100_hdp_reset(rdev);
+	rs600_disable_vga(rdev);
+	/*
+	 * DST_PIPE_CONFIG		0x170C
+	 * GB_TILE_CONFIG		0x4018
+	 * GB_FIFO_SIZE			0x4024
+	 * GB_PIPE_SELECT		0x402C
+	 * GB_PIPE_SELECT2              0x4124
+	 *	Z_PIPE_SHIFT			0
+	 *	Z_PIPE_MASK			0x000000003
+	 * GB_FIFO_SIZE2                0x4128
+	 *	SC_SFIFO_SIZE_SHIFT		0
+	 *	SC_SFIFO_SIZE_MASK		0x000000003
+	 *	SC_MFIFO_SIZE_SHIFT		2
+	 *	SC_MFIFO_SIZE_MASK		0x00000000C
+	 *	FG_SFIFO_SIZE_SHIFT		4
+	 *	FG_SFIFO_SIZE_MASK		0x000000030
+	 *	ZB_MFIFO_SIZE_SHIFT		6
+	 *	ZB_MFIFO_SIZE_MASK		0x0000000C0
+	 * GA_ENHANCE			0x4274
+	 * SU_REG_DEST			0x42C8
+	 */
+	/* workaround for RV530 */
+	if (rdev->family == CHIP_RV530) {
+		WREG32(0x4124, 1);
+		WREG32(0x4128, 0xFF);
+	}
+	r420_pipes_init(rdev);
+	gb_pipe_select = RREG32(0x402C);
+	tmp = RREG32(0x170C);
+	pipe_select_current = (tmp >> 2) & 3;
+	tmp = (1 << pipe_select_current) |
+	      (((gb_pipe_select >> 8) & 0xF) << 4);
+	WREG32_PLL(0x000D, tmp);
+	if (r520_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+
+/*
+ * VRAM info
+ */
+static void r520_vram_get_type(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	rdev->mc.vram_width = 128;
+	rdev->mc.vram_is_ddr = true;
+	tmp = RREG32_MC(R520_MC_CNTL0);
+	switch ((tmp & R520_MEM_NUM_CHANNELS_MASK) >> R520_MEM_NUM_CHANNELS_SHIFT) {
+	case 0:
+		rdev->mc.vram_width = 32;
+		break;
+	case 1:
+		rdev->mc.vram_width = 64;
+		break;
+	case 2:
+		rdev->mc.vram_width = 128;
+		break;
+	case 3:
+		rdev->mc.vram_width = 256;
+		break;
+	default:
+		rdev->mc.vram_width = 128;
+		break;
+	}
+	if (tmp & R520_MC_CHANNEL_SIZE)
+		rdev->mc.vram_width *= 2;
+}
+
+void r520_vram_info(struct radeon_device *rdev)
+{
+	r520_vram_get_type(rdev);
+	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
new file mode 100644
index 00000000000..c45559fc97f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */
+void rs600_mc_disable_clients(struct radeon_device *rdev);
+
+/* This files gather functions specifics to:
+ * r600,rv610,rv630,rv620,rv635,rv670
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+int r600_mc_wait_for_idle(struct radeon_device *rdev);
+void r600_gpu_init(struct radeon_device *rdev);
+
+
+/*
+ * MC
+ */
+int r600_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	r600_gpu_init(rdev);
+
+	/* setup the gart before changing location so we can ask to
+	 * discard unmapped mc request
+	 */
+	/* FIXME: disable out of gart access */
+	tmp = rdev->mc.gtt_location / 4096;
+	tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
+	WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
+	tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
+	tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
+	WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
+
+	rs600_mc_disable_clients(rdev);
+	if (r600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
+	tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
+	WREG32(R600_MC_VM_FB_LOCATION, tmp);
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22);
+	WREG32(R600_MC_VM_AGP_TOP, tmp);
+	tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
+	WREG32(R600_MC_VM_AGP_BOT, tmp);
+	return 0;
+}
+
+void r600_mc_fini(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * Global GPU functions
+ */
+void r600_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+int r600_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+void r600_gpu_init(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * VRAM info
+ */
+void r600_vram_get_type(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int chansize;
+
+	rdev->mc.vram_width = 128;
+	rdev->mc.vram_is_ddr = true;
+
+	tmp = RREG32(R600_RAMCFG);
+	if (tmp & R600_CHANSIZE_OVERRIDE) {
+		chansize = 16;
+	} else if (tmp & R600_CHANSIZE) {
+		chansize = 64;
+	} else {
+		chansize = 32;
+	}
+	if (rdev->family == CHIP_R600) {
+		rdev->mc.vram_width = 8 * chansize;
+	} else if (rdev->family == CHIP_RV670) {
+		rdev->mc.vram_width = 4 * chansize;
+	} else if ((rdev->family == CHIP_RV610) ||
+			(rdev->family == CHIP_RV620)) {
+		rdev->mc.vram_width = chansize;
+	} else if ((rdev->family == CHIP_RV630) ||
+			(rdev->family == CHIP_RV635)) {
+		rdev->mc.vram_width = 2 * chansize;
+	}
+}
+
+void r600_vram_info(struct radeon_device *rdev)
+{
+	r600_vram_get_type(rdev);
+	rdev->mc.vram_size = RREG32(R600_CONFIG_MEMSIZE);
+
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
+
+/*
+ * Indirect registers accessor
+ */
+uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
+	(void)RREG32(R600_PCIE_PORT_INDEX);
+	r = RREG32(R600_PCIE_PORT_DATA);
+	return r;
+}
+
+void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
+	(void)RREG32(R600_PCIE_PORT_INDEX);
+	WREG32(R600_PCIE_PORT_DATA, (v));
+	(void)RREG32(R600_PCIE_PORT_DATA);
+}
diff --git a/drivers/gpu/drm/radeon/r600_reg.h b/drivers/gpu/drm/radeon/r600_reg.h
new file mode 100644
index 00000000000..e2d1f5f33f7
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_reg.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R600_REG_H__
+#define __R600_REG_H__
+
+#define R600_PCIE_PORT_INDEX                0x0038
+#define R600_PCIE_PORT_DATA                 0x003c
+
+#define R600_MC_VM_FB_LOCATION			0x2180
+#define		R600_MC_FB_BASE_MASK			0x0000FFFF
+#define		R600_MC_FB_BASE_SHIFT			0
+#define		R600_MC_FB_TOP_MASK			0xFFFF0000
+#define		R600_MC_FB_TOP_SHIFT			16
+#define R600_MC_VM_AGP_TOP			0x2184
+#define		R600_MC_AGP_TOP_MASK			0x0003FFFF
+#define		R600_MC_AGP_TOP_SHIFT			0
+#define R600_MC_VM_AGP_BOT			0x2188
+#define		R600_MC_AGP_BOT_MASK			0x0003FFFF
+#define		R600_MC_AGP_BOT_SHIFT			0
+#define R600_MC_VM_AGP_BASE			0x218c
+#define R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR	0x2190
+#define		R600_LOGICAL_PAGE_NUMBER_MASK		0x000FFFFF
+#define		R600_LOGICAL_PAGE_NUMBER_SHIFT		0
+#define R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR	0x2194
+#define R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR	0x2198
+
+#define R700_MC_VM_FB_LOCATION			0x2024
+#define		R700_MC_FB_BASE_MASK			0x0000FFFF
+#define		R700_MC_FB_BASE_SHIFT			0
+#define		R700_MC_FB_TOP_MASK			0xFFFF0000
+#define		R700_MC_FB_TOP_SHIFT			16
+#define R700_MC_VM_AGP_TOP			0x2028
+#define		R700_MC_AGP_TOP_MASK			0x0003FFFF
+#define		R700_MC_AGP_TOP_SHIFT			0
+#define R700_MC_VM_AGP_BOT			0x202c
+#define		R700_MC_AGP_BOT_MASK			0x0003FFFF
+#define		R700_MC_AGP_BOT_SHIFT			0
+#define R700_MC_VM_AGP_BASE			0x2030
+#define R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR	0x2034
+#define		R700_LOGICAL_PAGE_NUMBER_MASK		0x000FFFFF
+#define		R700_LOGICAL_PAGE_NUMBER_SHIFT		0
+#define R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR	0x2038
+#define R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR	0x203c
+
+#define R600_RAMCFG				       0x2408
+#       define R600_CHANSIZE                           (1 << 7)
+#       define R600_CHANSIZE_OVERRIDE                  (1 << 10)
+
+
+#define R600_GENERAL_PWRMGT                                        0x618
+#	define R600_OPEN_DRAIN_PADS				   (1 << 11)
+
+#define R600_LOWER_GPIO_ENABLE                                     0x710
+#define R600_CTXSW_VID_LOWER_GPIO_CNTL                             0x718
+#define R600_HIGH_VID_LOWER_GPIO_CNTL                              0x71c
+#define R600_MEDIUM_VID_LOWER_GPIO_CNTL                            0x720
+#define R600_LOW_VID_LOWER_GPIO_CNTL                               0x724
+
+
+
+#define R600_HDP_NONSURFACE_BASE                                0x2c04
+
+#define R600_BUS_CNTL                                           0x5420
+#define R600_CONFIG_CNTL                                        0x5424
+#define R600_CONFIG_MEMSIZE                                     0x5428
+#define R600_CONFIG_F0_BASE                                     0x542C
+#define R600_CONFIG_APER_SIZE                                   0x5430
+
+#define R600_ROM_CNTL                              0x1600
+#       define R600_SCK_OVERWRITE                  (1 << 1)
+#       define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
+#       define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK  (0xf << 28)
+
+#define R600_CG_SPLL_FUNC_CNTL                     0x600
+#       define R600_SPLL_BYPASS_EN                 (1 << 3)
+#define R600_CG_SPLL_STATUS                        0x60c
+#       define R600_SPLL_CHG_STATUS                (1 << 1)
+
+#define R600_BIOS_0_SCRATCH               0x1724
+#define R600_BIOS_1_SCRATCH               0x1728
+#define R600_BIOS_2_SCRATCH               0x172c
+#define R600_BIOS_3_SCRATCH               0x1730
+#define R600_BIOS_4_SCRATCH               0x1734
+#define R600_BIOS_5_SCRATCH               0x1738
+#define R600_BIOS_6_SCRATCH               0x173c
+#define R600_BIOS_7_SCRATCH               0x1740
+
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
new file mode 100644
index 00000000000..c3f24cc5600
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -0,0 +1,793 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __RADEON_H__
+#define __RADEON_H__
+
+#include "radeon_object.h"
+
+/* TODO: Here are things that needs to be done :
+ *	- surface allocator & initializer : (bit like scratch reg) should
+ *	  initialize HDP_ stuff on RS600, R600, R700 hw, well anythings
+ *	  related to surface
+ *	- WB : write back stuff (do it bit like scratch reg things)
+ *	- Vblank : look at Jesse's rework and what we should do
+ *	- r600/r700: gart & cp
+ *	- cs : clean cs ioctl use bitmap & things like that.
+ *	- power management stuff
+ *	- Barrier in gart code
+ *	- Unmappabled vram ?
+ *	- TESTING, TESTING, TESTING
+ */
+
+#include <asm/atomic.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+
+#include "radeon_mode.h"
+#include "radeon_reg.h"
+
+
+/*
+ * Modules parameters.
+ */
+extern int radeon_no_wb;
+extern int radeon_modeset;
+extern int radeon_dynclks;
+extern int radeon_r4xx_atom;
+extern int radeon_agpmode;
+extern int radeon_vram_limit;
+extern int radeon_gart_size;
+extern int radeon_benchmarking;
+extern int radeon_connector_table;
+
+/*
+ * Copy from radeon_drv.h so we don't have to include both and have conflicting
+ * symbol;
+ */
+#define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */
+#define RADEON_IB_POOL_SIZE		16
+#define RADEON_DEBUGFS_MAX_NUM_FILES	32
+#define RADEONFB_CONN_LIMIT		4
+
+enum radeon_family {
+	CHIP_R100,
+	CHIP_RV100,
+	CHIP_RS100,
+	CHIP_RV200,
+	CHIP_RS200,
+	CHIP_R200,
+	CHIP_RV250,
+	CHIP_RS300,
+	CHIP_RV280,
+	CHIP_R300,
+	CHIP_R350,
+	CHIP_RV350,
+	CHIP_RV380,
+	CHIP_R420,
+	CHIP_R423,
+	CHIP_RV410,
+	CHIP_RS400,
+	CHIP_RS480,
+	CHIP_RS600,
+	CHIP_RS690,
+	CHIP_RS740,
+	CHIP_RV515,
+	CHIP_R520,
+	CHIP_RV530,
+	CHIP_RV560,
+	CHIP_RV570,
+	CHIP_R580,
+	CHIP_R600,
+	CHIP_RV610,
+	CHIP_RV630,
+	CHIP_RV620,
+	CHIP_RV635,
+	CHIP_RV670,
+	CHIP_RS780,
+	CHIP_RV770,
+	CHIP_RV730,
+	CHIP_RV710,
+	CHIP_LAST,
+};
+
+enum radeon_chip_flags {
+	RADEON_FAMILY_MASK = 0x0000ffffUL,
+	RADEON_FLAGS_MASK = 0xffff0000UL,
+	RADEON_IS_MOBILITY = 0x00010000UL,
+	RADEON_IS_IGP = 0x00020000UL,
+	RADEON_SINGLE_CRTC = 0x00040000UL,
+	RADEON_IS_AGP = 0x00080000UL,
+	RADEON_HAS_HIERZ = 0x00100000UL,
+	RADEON_IS_PCIE = 0x00200000UL,
+	RADEON_NEW_MEMMAP = 0x00400000UL,
+	RADEON_IS_PCI = 0x00800000UL,
+	RADEON_IS_IGPGART = 0x01000000UL,
+};
+
+
+/*
+ * Errata workarounds.
+ */
+enum radeon_pll_errata {
+	CHIP_ERRATA_R300_CG             = 0x00000001,
+	CHIP_ERRATA_PLL_DUMMYREADS      = 0x00000002,
+	CHIP_ERRATA_PLL_DELAY           = 0x00000004
+};
+
+
+struct radeon_device;
+
+
+/*
+ * BIOS.
+ */
+bool radeon_get_bios(struct radeon_device *rdev);
+
+/*
+ * Clocks
+ */
+
+struct radeon_clock {
+	struct radeon_pll p1pll;
+	struct radeon_pll p2pll;
+	struct radeon_pll spll;
+	struct radeon_pll mpll;
+	/* 10 Khz units */
+	uint32_t default_mclk;
+	uint32_t default_sclk;
+};
+
+/*
+ * Fences.
+ */
+struct radeon_fence_driver {
+	uint32_t			scratch_reg;
+	atomic_t			seq;
+	uint32_t			last_seq;
+	unsigned long			count_timeout;
+	wait_queue_head_t		queue;
+	rwlock_t			lock;
+	struct list_head		created;
+	struct list_head		emited;
+	struct list_head		signaled;
+};
+
+struct radeon_fence {
+	struct radeon_device		*rdev;
+	struct kref			kref;
+	struct list_head		list;
+	/* protected by radeon_fence.lock */
+	uint32_t			seq;
+	unsigned long			timeout;
+	bool				emited;
+	bool				signaled;
+};
+
+int radeon_fence_driver_init(struct radeon_device *rdev);
+void radeon_fence_driver_fini(struct radeon_device *rdev);
+int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence);
+int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence);
+void radeon_fence_process(struct radeon_device *rdev);
+bool radeon_fence_signaled(struct radeon_fence *fence);
+int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
+int radeon_fence_wait_next(struct radeon_device *rdev);
+int radeon_fence_wait_last(struct radeon_device *rdev);
+struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
+void radeon_fence_unref(struct radeon_fence **fence);
+
+
+/*
+ * Radeon buffer.
+ */
+struct radeon_object;
+
+struct radeon_object_list {
+	struct list_head	list;
+	struct radeon_object	*robj;
+	uint64_t		gpu_offset;
+	unsigned		rdomain;
+	unsigned		wdomain;
+};
+
+int radeon_object_init(struct radeon_device *rdev);
+void radeon_object_fini(struct radeon_device *rdev);
+int radeon_object_create(struct radeon_device *rdev,
+			 struct drm_gem_object *gobj,
+			 unsigned long size,
+			 bool kernel,
+			 uint32_t domain,
+			 bool interruptible,
+			 struct radeon_object **robj_ptr);
+int radeon_object_kmap(struct radeon_object *robj, void **ptr);
+void radeon_object_kunmap(struct radeon_object *robj);
+void radeon_object_unref(struct radeon_object **robj);
+int radeon_object_pin(struct radeon_object *robj, uint32_t domain,
+		      uint64_t *gpu_addr);
+void radeon_object_unpin(struct radeon_object *robj);
+int radeon_object_wait(struct radeon_object *robj);
+int radeon_object_evict_vram(struct radeon_device *rdev);
+int radeon_object_mmap(struct radeon_object *robj, uint64_t *offset);
+void radeon_object_force_delete(struct radeon_device *rdev);
+void radeon_object_list_add_object(struct radeon_object_list *lobj,
+				   struct list_head *head);
+int radeon_object_list_validate(struct list_head *head, void *fence);
+void radeon_object_list_unvalidate(struct list_head *head);
+void radeon_object_list_clean(struct list_head *head);
+int radeon_object_fbdev_mmap(struct radeon_object *robj,
+			     struct vm_area_struct *vma);
+unsigned long radeon_object_size(struct radeon_object *robj);
+
+
+/*
+ * GEM objects.
+ */
+struct radeon_gem {
+	struct list_head	objects;
+};
+
+int radeon_gem_init(struct radeon_device *rdev);
+void radeon_gem_fini(struct radeon_device *rdev);
+int radeon_gem_object_create(struct radeon_device *rdev, int size,
+			     int alignment, int initial_domain,
+			     bool discardable, bool kernel,
+			     bool interruptible,
+			     struct drm_gem_object **obj);
+int radeon_gem_object_pin(struct drm_gem_object *obj, uint32_t pin_domain,
+			  uint64_t *gpu_addr);
+void radeon_gem_object_unpin(struct drm_gem_object *obj);
+
+
+/*
+ * GART structures, functions & helpers
+ */
+struct radeon_mc;
+
+struct radeon_gart_table_ram {
+	volatile uint32_t		*ptr;
+};
+
+struct radeon_gart_table_vram {
+	struct radeon_object		*robj;
+	volatile uint32_t		*ptr;
+};
+
+union radeon_gart_table {
+	struct radeon_gart_table_ram	ram;
+	struct radeon_gart_table_vram	vram;
+};
+
+struct radeon_gart {
+	dma_addr_t			table_addr;
+	unsigned			num_gpu_pages;
+	unsigned			num_cpu_pages;
+	unsigned			table_size;
+	union radeon_gart_table		table;
+	struct page			**pages;
+	dma_addr_t			*pages_addr;
+	bool				ready;
+};
+
+int radeon_gart_table_ram_alloc(struct radeon_device *rdev);
+void radeon_gart_table_ram_free(struct radeon_device *rdev);
+int radeon_gart_table_vram_alloc(struct radeon_device *rdev);
+void radeon_gart_table_vram_free(struct radeon_device *rdev);
+int radeon_gart_init(struct radeon_device *rdev);
+void radeon_gart_fini(struct radeon_device *rdev);
+void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+			int pages);
+int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+		     int pages, struct page **pagelist);
+
+
+/*
+ * GPU MC structures, functions & helpers
+ */
+struct radeon_mc {
+	resource_size_t		aper_size;
+	resource_size_t		aper_base;
+	resource_size_t		agp_base;
+	unsigned		gtt_location;
+	unsigned		gtt_size;
+	unsigned		vram_location;
+	unsigned		vram_size;
+	unsigned		vram_width;
+	int			vram_mtrr;
+	bool			vram_is_ddr;
+};
+
+int radeon_mc_setup(struct radeon_device *rdev);
+
+
+/*
+ * GPU scratch registers structures, functions & helpers
+ */
+struct radeon_scratch {
+	unsigned		num_reg;
+	bool			free[32];
+	uint32_t		reg[32];
+};
+
+int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg);
+void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg);
+
+
+/*
+ * IRQS.
+ */
+struct radeon_irq {
+	bool		installed;
+	bool		sw_int;
+	/* FIXME: use a define max crtc rather than hardcode it */
+	bool		crtc_vblank_int[2];
+};
+
+int radeon_irq_kms_init(struct radeon_device *rdev);
+void radeon_irq_kms_fini(struct radeon_device *rdev);
+
+
+/*
+ * CP & ring.
+ */
+struct radeon_ib {
+	struct list_head	list;
+	unsigned long		idx;
+	uint64_t		gpu_addr;
+	struct radeon_fence	*fence;
+	volatile uint32_t	*ptr;
+	uint32_t		length_dw;
+};
+
+struct radeon_ib_pool {
+	struct mutex		mutex;
+	struct radeon_object	*robj;
+	struct list_head	scheduled_ibs;
+	struct radeon_ib	ibs[RADEON_IB_POOL_SIZE];
+	bool			ready;
+	DECLARE_BITMAP(alloc_bm, RADEON_IB_POOL_SIZE);
+};
+
+struct radeon_cp {
+	struct radeon_object	*ring_obj;
+	volatile uint32_t	*ring;
+	unsigned		rptr;
+	unsigned		wptr;
+	unsigned		wptr_old;
+	unsigned		ring_size;
+	unsigned		ring_free_dw;
+	int			count_dw;
+	uint64_t		gpu_addr;
+	uint32_t		align_mask;
+	uint32_t		ptr_mask;
+	struct mutex		mutex;
+	bool			ready;
+};
+
+int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
+void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
+int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
+int radeon_ib_pool_init(struct radeon_device *rdev);
+void radeon_ib_pool_fini(struct radeon_device *rdev);
+int radeon_ib_test(struct radeon_device *rdev);
+/* Ring access between begin & end cannot sleep */
+void radeon_ring_free_size(struct radeon_device *rdev);
+int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw);
+void radeon_ring_unlock_commit(struct radeon_device *rdev);
+void radeon_ring_unlock_undo(struct radeon_device *rdev);
+int radeon_ring_test(struct radeon_device *rdev);
+int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size);
+void radeon_ring_fini(struct radeon_device *rdev);
+
+
+/*
+ * CS.
+ */
+struct radeon_cs_reloc {
+	struct drm_gem_object		*gobj;
+	struct radeon_object		*robj;
+	struct radeon_object_list	lobj;
+	uint32_t			handle;
+	uint32_t			flags;
+};
+
+struct radeon_cs_chunk {
+	uint32_t		chunk_id;
+	uint32_t		length_dw;
+	uint32_t		*kdata;
+};
+
+struct radeon_cs_parser {
+	struct radeon_device	*rdev;
+	struct drm_file		*filp;
+	/* chunks */
+	unsigned		nchunks;
+	struct radeon_cs_chunk	*chunks;
+	uint64_t		*chunks_array;
+	/* IB */
+	unsigned		idx;
+	/* relocations */
+	unsigned		nrelocs;
+	struct radeon_cs_reloc	*relocs;
+	struct radeon_cs_reloc	**relocs_ptr;
+	struct list_head	validated;
+	/* indices of various chunks */
+	int			chunk_ib_idx;
+	int			chunk_relocs_idx;
+	struct radeon_ib	*ib;
+	void			*track;
+};
+
+struct radeon_cs_packet {
+	unsigned	idx;
+	unsigned	type;
+	unsigned	reg;
+	unsigned	opcode;
+	int		count;
+	unsigned	one_reg_wr;
+};
+
+typedef int (*radeon_packet0_check_t)(struct radeon_cs_parser *p,
+				      struct radeon_cs_packet *pkt,
+				      unsigned idx, unsigned reg);
+typedef int (*radeon_packet3_check_t)(struct radeon_cs_parser *p,
+				      struct radeon_cs_packet *pkt);
+
+
+/*
+ * AGP
+ */
+int radeon_agp_init(struct radeon_device *rdev);
+void radeon_agp_fini(struct radeon_device *rdev);
+
+
+/*
+ * Writeback
+ */
+struct radeon_wb {
+	struct radeon_object	*wb_obj;
+	volatile uint32_t	*wb;
+	uint64_t		gpu_addr;
+};
+
+
+/*
+ * Benchmarking
+ */
+void radeon_benchmark(struct radeon_device *rdev);
+
+
+/*
+ * Debugfs
+ */
+int radeon_debugfs_add_files(struct radeon_device *rdev,
+			     struct drm_info_list *files,
+			     unsigned nfiles);
+int radeon_debugfs_fence_init(struct radeon_device *rdev);
+int r100_debugfs_rbbm_init(struct radeon_device *rdev);
+int r100_debugfs_cp_init(struct radeon_device *rdev);
+
+
+/*
+ * ASIC specific functions.
+ */
+struct radeon_asic {
+	void (*errata)(struct radeon_device *rdev);
+	void (*vram_info)(struct radeon_device *rdev);
+	int (*gpu_reset)(struct radeon_device *rdev);
+	int (*mc_init)(struct radeon_device *rdev);
+	void (*mc_fini)(struct radeon_device *rdev);
+	int (*wb_init)(struct radeon_device *rdev);
+	void (*wb_fini)(struct radeon_device *rdev);
+	int (*gart_enable)(struct radeon_device *rdev);
+	void (*gart_disable)(struct radeon_device *rdev);
+	void (*gart_tlb_flush)(struct radeon_device *rdev);
+	int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr);
+	int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
+	void (*cp_fini)(struct radeon_device *rdev);
+	void (*cp_disable)(struct radeon_device *rdev);
+	void (*ring_start)(struct radeon_device *rdev);
+	int (*irq_set)(struct radeon_device *rdev);
+	int (*irq_process)(struct radeon_device *rdev);
+	void (*fence_ring_emit)(struct radeon_device *rdev, struct radeon_fence *fence);
+	int (*cs_parse)(struct radeon_cs_parser *p);
+	int (*copy_blit)(struct radeon_device *rdev,
+			 uint64_t src_offset,
+			 uint64_t dst_offset,
+			 unsigned num_pages,
+			 struct radeon_fence *fence);
+	int (*copy_dma)(struct radeon_device *rdev,
+			uint64_t src_offset,
+			uint64_t dst_offset,
+			unsigned num_pages,
+			struct radeon_fence *fence);
+	int (*copy)(struct radeon_device *rdev,
+		    uint64_t src_offset,
+		    uint64_t dst_offset,
+		    unsigned num_pages,
+		    struct radeon_fence *fence);
+	void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
+	void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
+	void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
+	void (*set_clock_gating)(struct radeon_device *rdev, int enable);
+};
+
+
+/*
+ * IOCTL.
+ */
+int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp);
+int radeon_gem_pin_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
+int radeon_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv);
+int radeon_gem_pread_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
+int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp);
+int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+
+/*
+ * Core structure, functions and helpers.
+ */
+typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t);
+typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t);
+
+struct radeon_device {
+	struct drm_device		*ddev;
+	struct pci_dev			*pdev;
+	/* ASIC */
+	enum radeon_family		family;
+	unsigned long			flags;
+	int				usec_timeout;
+	enum radeon_pll_errata		pll_errata;
+	int				num_gb_pipes;
+	int				disp_priority;
+	/* BIOS */
+	uint8_t				*bios;
+	bool				is_atom_bios;
+	uint16_t			bios_header_start;
+	struct radeon_object		*stollen_vga_memory;
+	struct fb_info			*fbdev_info;
+	struct radeon_object		*fbdev_robj;
+	struct radeon_framebuffer	*fbdev_rfb;
+	/* Register mmio */
+	unsigned long			rmmio_base;
+	unsigned long			rmmio_size;
+	void				*rmmio;
+	radeon_rreg_t			mm_rreg;
+	radeon_wreg_t			mm_wreg;
+	radeon_rreg_t			mc_rreg;
+	radeon_wreg_t			mc_wreg;
+	radeon_rreg_t			pll_rreg;
+	radeon_wreg_t			pll_wreg;
+	radeon_rreg_t			pcie_rreg;
+	radeon_wreg_t			pcie_wreg;
+	radeon_rreg_t			pciep_rreg;
+	radeon_wreg_t			pciep_wreg;
+	struct radeon_clock             clock;
+	struct radeon_mc		mc;
+	struct radeon_gart		gart;
+	struct radeon_mode_info		mode_info;
+	struct radeon_scratch		scratch;
+	struct radeon_mman		mman;
+	struct radeon_fence_driver	fence_drv;
+	struct radeon_cp		cp;
+	struct radeon_ib_pool		ib_pool;
+	struct radeon_irq		irq;
+	struct radeon_asic		*asic;
+	struct radeon_gem		gem;
+	struct mutex			cs_mutex;
+	struct radeon_wb		wb;
+	bool				gpu_lockup;
+	bool				shutdown;
+	bool				suspend;
+};
+
+int radeon_device_init(struct radeon_device *rdev,
+		       struct drm_device *ddev,
+		       struct pci_dev *pdev,
+		       uint32_t flags);
+void radeon_device_fini(struct radeon_device *rdev);
+int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
+
+
+/*
+ * Registers read & write functions.
+ */
+#define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg))
+#define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg))
+#define RREG32(reg) rdev->mm_rreg(rdev, (reg))
+#define WREG32(reg, v) rdev->mm_wreg(rdev, (reg), (v))
+#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
+#define WREG32_PLL(reg, v) rdev->pll_wreg(rdev, (reg), (v))
+#define RREG32_MC(reg) rdev->mc_rreg(rdev, (reg))
+#define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
+#define RREG32_PCIE(reg) rdev->pcie_rreg(rdev, (reg))
+#define WREG32_PCIE(reg, v) rdev->pcie_wreg(rdev, (reg), (v))
+#define WREG32_P(reg, val, mask)				\
+	do {							\
+		uint32_t tmp_ = RREG32(reg);			\
+		tmp_ &= (mask);					\
+		tmp_ |= ((val) & ~(mask));			\
+		WREG32(reg, tmp_);				\
+	} while (0)
+#define WREG32_PLL_P(reg, val, mask)				\
+	do {							\
+		uint32_t tmp_ = RREG32_PLL(reg);		\
+		tmp_ &= (mask);					\
+		tmp_ |= ((val) & ~(mask));			\
+		WREG32_PLL(reg, tmp_);				\
+	} while (0)
+
+void r100_pll_errata_after_index(struct radeon_device *rdev);
+
+
+/*
+ * ASICs helpers.
+ */
+#define ASIC_IS_RV100(rdev) ((rdev->family == CHIP_RV100) || \
+		(rdev->family == CHIP_RV200) || \
+		(rdev->family == CHIP_RS100) || \
+		(rdev->family == CHIP_RS200) || \
+		(rdev->family == CHIP_RV250) || \
+		(rdev->family == CHIP_RV280) || \
+		(rdev->family == CHIP_RS300))
+#define ASIC_IS_R300(rdev) ((rdev->family == CHIP_R300)  ||	\
+		(rdev->family == CHIP_RV350) ||			\
+		(rdev->family == CHIP_R350)  ||			\
+		(rdev->family == CHIP_RV380) ||			\
+		(rdev->family == CHIP_R420)  ||			\
+		(rdev->family == CHIP_R423)  ||			\
+		(rdev->family == CHIP_RV410) ||			\
+		(rdev->family == CHIP_RS400) ||			\
+		(rdev->family == CHIP_RS480))
+#define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
+#define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
+#define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
+
+
+/*
+ * BIOS helpers.
+ */
+#define RBIOS8(i) (rdev->bios[i])
+#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
+#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
+
+int radeon_combios_init(struct radeon_device *rdev);
+void radeon_combios_fini(struct radeon_device *rdev);
+int radeon_atombios_init(struct radeon_device *rdev);
+void radeon_atombios_fini(struct radeon_device *rdev);
+
+
+/*
+ * RING helpers.
+ */
+#define CP_PACKET0			0x00000000
+#define		PACKET0_BASE_INDEX_SHIFT	0
+#define		PACKET0_BASE_INDEX_MASK		(0x1ffff << 0)
+#define		PACKET0_COUNT_SHIFT		16
+#define		PACKET0_COUNT_MASK		(0x3fff << 16)
+#define CP_PACKET1			0x40000000
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+#define CP_PACKET3			0xC0000000
+#define		PACKET3_IT_OPCODE_SHIFT		8
+#define		PACKET3_IT_OPCODE_MASK		(0xff << 8)
+#define		PACKET3_COUNT_SHIFT		16
+#define		PACKET3_COUNT_MASK		(0x3fff << 16)
+/* PACKET3 op code */
+#define		PACKET3_NOP			0x10
+#define		PACKET3_3D_DRAW_VBUF		0x28
+#define		PACKET3_3D_DRAW_IMMD		0x29
+#define		PACKET3_3D_DRAW_INDX		0x2A
+#define		PACKET3_3D_LOAD_VBPNTR		0x2F
+#define		PACKET3_INDX_BUFFER		0x33
+#define		PACKET3_3D_DRAW_VBUF_2		0x34
+#define		PACKET3_3D_DRAW_IMMD_2		0x35
+#define		PACKET3_3D_DRAW_INDX_2		0x36
+#define		PACKET3_BITBLT_MULTI		0x9B
+
+#define PACKET0(reg, n)	(CP_PACKET0 |					\
+			 REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |	\
+			 REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n)	(CP_PACKET3 |					\
+			 REG_SET(PACKET3_IT_OPCODE, (op)) |		\
+			 REG_SET(PACKET3_COUNT, (n)))
+
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+	if (rdev->cp.count_dw <= 0) {
+		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+	}
+#endif
+	rdev->cp.ring[rdev->cp.wptr++] = v;
+	rdev->cp.wptr &= rdev->cp.ptr_mask;
+	rdev->cp.count_dw--;
+	rdev->cp.ring_free_dw--;
+}
+
+
+/*
+ * ASICs macro.
+ */
+#define radeon_cs_parse(p) rdev->asic->cs_parse((p))
+#define radeon_errata(rdev) (rdev)->asic->errata((rdev))
+#define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
+#define radeon_gpu_reset(rdev) (rdev)->asic->gpu_reset((rdev))
+#define radeon_mc_init(rdev) (rdev)->asic->mc_init((rdev))
+#define radeon_mc_fini(rdev) (rdev)->asic->mc_fini((rdev))
+#define radeon_wb_init(rdev) (rdev)->asic->wb_init((rdev))
+#define radeon_wb_fini(rdev) (rdev)->asic->wb_fini((rdev))
+#define radeon_gart_enable(rdev) (rdev)->asic->gart_enable((rdev))
+#define radeon_gart_disable(rdev) (rdev)->asic->gart_disable((rdev))
+#define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev))
+#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p))
+#define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize))
+#define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev))
+#define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev))
+#define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
+#define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
+#define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
+#define radeon_fence_ring_emit(rdev, fence) (rdev)->asic->fence_ring_emit((rdev), (fence))
+#define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy_blit((rdev), (s), (d), (np), (f))
+#define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy_dma((rdev), (s), (d), (np), (f))
+#define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy((rdev), (s), (d), (np), (f))
+#define radeon_set_engine_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
+#define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
+#define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
+#define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
new file mode 100644
index 00000000000..23ea9955ac5
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Dave Airlie
+ *    Jerome Glisse <glisse@freedesktop.org>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon.h"
+#include "radeon_drm.h"
+
+#if __OS_HAS_AGP
+
+struct radeon_agpmode_quirk {
+	u32 hostbridge_vendor;
+	u32 hostbridge_device;
+	u32 chip_vendor;
+	u32 chip_device;
+	u32 subsys_vendor;
+	u32 subsys_device;
+	u32 default_mode;
+};
+
+static struct radeon_agpmode_quirk radeon_agpmode_quirk_list[] = {
+	/* Intel E7505 Memory Controller Hub / RV350 AR [Radeon 9600XT] Needs AGPMode 4 (deb #515326) */
+	{ PCI_VENDOR_ID_INTEL, 0x2550, PCI_VENDOR_ID_ATI, 0x4152, 0x1458, 0x4038, 4},
+	/* Intel 82865G/PE/P DRAM Controller/Host-Hub / Mobility 9800 Needs AGPMode 4 (deb #462590) */
+	{ PCI_VENDOR_ID_INTEL, 0x2570, PCI_VENDOR_ID_ATI, 0x4a4e, PCI_VENDOR_ID_DELL, 0x5106, 4},
+	/* Intel 82865G/PE/P DRAM Controller/Host-Hub / RV280 [Radeon 9200 SE] Needs AGPMode 4 (lp #300304) */
+	{ PCI_VENDOR_ID_INTEL, 0x2570, PCI_VENDOR_ID_ATI, 0x5964,
+		0x148c, 0x2073, 4},
+	/* Intel 82855PM Processor to I/O Controller / Mobility M6 LY Needs AGPMode 1 (deb #467235) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c59,
+		PCI_VENDOR_ID_IBM, 0x052f, 1},
+	/* Intel 82855PM host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #195051) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4e50,
+		PCI_VENDOR_ID_IBM, 0x0550, 1},
+	/* Intel 82855PM host bridge / Mobility M7 needs AGPMode 1 */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c57,
+		PCI_VENDOR_ID_IBM, 0x0530, 1},
+	/* Intel 82855PM host bridge / FireGL Mobility T2 RV350 Needs AGPMode 2 (fdo #20647) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4e54,
+		PCI_VENDOR_ID_IBM, 0x054f, 2},
+	/* Intel 82855PM host bridge / Mobility M9+ / VaioPCG-V505DX Needs AGPMode 2 (fdo #17928) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x5c61,
+		PCI_VENDOR_ID_SONY, 0x816b, 2},
+	/* Intel 82855PM Processor to I/O Controller / Mobility M9+ Needs AGPMode 8 (phoronix forum) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x5c61,
+		PCI_VENDOR_ID_SONY, 0x8195, 8},
+	/* Intel 82830 830 Chipset Host Bridge / Mobility M6 LY Needs AGPMode 2 (fdo #17360)*/
+	{ PCI_VENDOR_ID_INTEL, 0x3575, PCI_VENDOR_ID_ATI, 0x4c59,
+		PCI_VENDOR_ID_DELL, 0x00e3, 2},
+	/* Intel 82852/82855 host bridge / Mobility FireGL 9000 R250 Needs AGPMode 1 (lp #296617) */
+	{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4c66,
+		PCI_VENDOR_ID_DELL, 0x0149, 1},
+	/* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (deb #467460) */
+	{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4e50,
+		0x1025, 0x0061, 1},
+	/* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #203007) */
+	{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4e50,
+		0x1025, 0x0064, 1},
+	/* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #141551) */
+	{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4e50,
+		PCI_VENDOR_ID_ASUSTEK, 0x1942, 1},
+	/* Intel 82852/82855 host bridge / Mobility 9600/9700 Needs AGPMode 1 (deb #510208) */
+	{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4e50,
+		0x10cf, 0x127f, 1},
+	/* ASRock K7VT4A+ AGP 8x / ATI Radeon 9250 AGP Needs AGPMode 4 (lp #133192) */
+	{ 0x1849, 0x3189, PCI_VENDOR_ID_ATI, 0x5960,
+		0x1787, 0x5960, 4},
+	/* VIA K8M800 Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 4 (fdo #12544) */
+	{ PCI_VENDOR_ID_VIA, 0x0204, PCI_VENDOR_ID_ATI, 0x5960,
+		0x17af, 0x2020, 4},
+	/* VIA KT880 Host Bridge / RV350 [Radeon 9550] Needs AGPMode 4 (fdo #19981) */
+	{ PCI_VENDOR_ID_VIA, 0x0269, PCI_VENDOR_ID_ATI, 0x4153,
+		PCI_VENDOR_ID_ASUSTEK, 0x003c, 4},
+	/* VIA VT8363 Host Bridge / R200 QL [Radeon 8500] Needs AGPMode 2 (lp #141551) */
+	{ PCI_VENDOR_ID_VIA, 0x0305, PCI_VENDOR_ID_ATI, 0x514c,
+		PCI_VENDOR_ID_ATI, 0x013a, 2},
+	/* VIA VT82C693A Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 2 (deb #515512) */
+	{ PCI_VENDOR_ID_VIA, 0x0691, PCI_VENDOR_ID_ATI, 0x5960,
+		PCI_VENDOR_ID_ASUSTEK, 0x004c, 2},
+	/* VIA VT82C693A Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 2 */
+	{ PCI_VENDOR_ID_VIA, 0x0691, PCI_VENDOR_ID_ATI, 0x5960,
+		PCI_VENDOR_ID_ASUSTEK, 0x0054, 2},
+	/* VIA VT8377 Host Bridge / R200 QM [Radeon 9100] Needs AGPMode 4 (deb #461144) */
+	{ PCI_VENDOR_ID_VIA, 0x3189, PCI_VENDOR_ID_ATI, 0x514d,
+		0x174b, 0x7149, 4},
+	/* VIA VT8377 Host Bridge / RV280 [Radeon 9200 PRO] Needs AGPMode 4 (lp #312693) */
+	{ PCI_VENDOR_ID_VIA, 0x3189, PCI_VENDOR_ID_ATI, 0x5960,
+		0x1462, 0x0380, 4},
+	/* VIA VT8377 Host Bridge / RV280 Needs AGPMode 4 (ati ML) */
+	{ PCI_VENDOR_ID_VIA, 0x3189, PCI_VENDOR_ID_ATI, 0x5964,
+		0x148c, 0x2073, 4},
+	/* ATI Host Bridge / RV280 [M9+] Needs AGPMode 1 (phoronix forum) */
+	{ PCI_VENDOR_ID_ATI, 0xcbb2, PCI_VENDOR_ID_ATI, 0x5c61,
+		PCI_VENDOR_ID_SONY, 0x8175, 1},
+	/* HP Host Bridge / R300 [FireGL X1] Needs AGPMode 2 (fdo #7770) */
+	{ PCI_VENDOR_ID_HP, 0x122e, PCI_VENDOR_ID_ATI, 0x4e47,
+		PCI_VENDOR_ID_ATI, 0x0152, 2},
+	{ 0, 0, 0, 0, 0, 0, 0 },
+};
+#endif
+
+int radeon_agp_init(struct radeon_device *rdev)
+{
+#if __OS_HAS_AGP
+	struct radeon_agpmode_quirk *p = radeon_agpmode_quirk_list;
+	struct drm_agp_mode mode;
+	struct drm_agp_info info;
+	uint32_t agp_status;
+	int default_mode;
+	bool is_v3;
+	int ret;
+
+	/* Acquire AGP. */
+	if (!rdev->ddev->agp->acquired) {
+		ret = drm_agp_acquire(rdev->ddev);
+		if (ret) {
+			DRM_ERROR("Unable to acquire AGP: %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = drm_agp_info(rdev->ddev, &info);
+	if (ret) {
+		DRM_ERROR("Unable to get AGP info: %d\n", ret);
+		return ret;
+	}
+	mode.mode = info.mode;
+	agp_status = (RREG32(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode.mode;
+	is_v3 = !!(agp_status & RADEON_AGPv3_MODE);
+
+	if (is_v3) {
+		default_mode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4;
+	} else {
+		if (agp_status & RADEON_AGP_4X_MODE) {
+			default_mode = 4;
+		} else if (agp_status & RADEON_AGP_2X_MODE) {
+			default_mode = 2;
+		} else {
+			default_mode = 1;
+		}
+	}
+
+	/* Apply AGPMode Quirks */
+	while (p && p->chip_device != 0) {
+		if (info.id_vendor == p->hostbridge_vendor &&
+		    info.id_device == p->hostbridge_device &&
+		    rdev->pdev->vendor == p->chip_vendor &&
+		    rdev->pdev->device == p->chip_device &&
+		    rdev->pdev->subsystem_vendor == p->subsys_vendor &&
+		    rdev->pdev->subsystem_device == p->subsys_device) {
+			default_mode = p->default_mode;
+		}
+		++p;
+	}
+
+	if (radeon_agpmode > 0) {
+		if ((radeon_agpmode < (is_v3 ? 4 : 1)) ||
+		    (radeon_agpmode > (is_v3 ? 8 : 4)) ||
+		    (radeon_agpmode & (radeon_agpmode - 1))) {
+			DRM_ERROR("Illegal AGP Mode: %d (valid %s), leaving at %d\n",
+				  radeon_agpmode, is_v3 ? "4, 8" : "1, 2, 4",
+				  default_mode);
+			radeon_agpmode = default_mode;
+		} else {
+			DRM_INFO("AGP mode requested: %d\n", radeon_agpmode);
+		}
+	} else {
+		radeon_agpmode = default_mode;
+	}
+
+	mode.mode &= ~RADEON_AGP_MODE_MASK;
+	if (is_v3) {
+		switch (radeon_agpmode) {
+		case 8:
+			mode.mode |= RADEON_AGPv3_8X_MODE;
+			break;
+		case 4:
+		default:
+			mode.mode |= RADEON_AGPv3_4X_MODE;
+			break;
+		}
+	} else {
+		switch (radeon_agpmode) {
+		case 4:
+			mode.mode |= RADEON_AGP_4X_MODE;
+			break;
+		case 2:
+			mode.mode |= RADEON_AGP_2X_MODE;
+			break;
+		case 1:
+		default:
+			mode.mode |= RADEON_AGP_1X_MODE;
+			break;
+		}
+	}
+
+	mode.mode &= ~RADEON_AGP_FW_MODE; /* disable fw */
+	ret = drm_agp_enable(rdev->ddev, mode);
+	if (ret) {
+		DRM_ERROR("Unable to enable AGP (mode = 0x%lx)\n", mode.mode);
+		return ret;
+	}
+
+	rdev->mc.agp_base = rdev->ddev->agp->agp_info.aper_base;
+	rdev->mc.gtt_size = rdev->ddev->agp->agp_info.aper_size << 20;
+
+	/* workaround some hw issues */
+	if (rdev->family < CHIP_R200) {
+		WREG32(RADEON_AGP_CNTL, RREG32(RADEON_AGP_CNTL) | 0x000e0000);
+	}
+	return 0;
+#else
+	return 0;
+#endif
+}
+
+void radeon_agp_fini(struct radeon_device *rdev)
+{
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP) {
+		if (rdev->ddev->agp && rdev->ddev->agp->acquired) {
+			drm_agp_release(rdev->ddev);
+		}
+	}
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
new file mode 100644
index 00000000000..e57d8a784e9
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -0,0 +1,405 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __RADEON_ASIC_H__
+#define __RADEON_ASIC_H__
+
+/*
+ * common functions
+ */
+void radeon_legacy_set_engine_clock(struct radeon_device *rdev, uint32_t eng_clock);
+void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
+
+void radeon_atom_set_engine_clock(struct radeon_device *rdev, uint32_t eng_clock);
+void radeon_atom_set_memory_clock(struct radeon_device *rdev, uint32_t mem_clock);
+void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
+
+/*
+ * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+ */
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void r100_errata(struct radeon_device *rdev);
+void r100_vram_info(struct radeon_device *rdev);
+int r100_gpu_reset(struct radeon_device *rdev);
+int r100_mc_init(struct radeon_device *rdev);
+void r100_mc_fini(struct radeon_device *rdev);
+int r100_wb_init(struct radeon_device *rdev);
+void r100_wb_fini(struct radeon_device *rdev);
+int r100_gart_enable(struct radeon_device *rdev);
+void r100_pci_gart_disable(struct radeon_device *rdev);
+void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
+int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
+void r100_cp_fini(struct radeon_device *rdev);
+void r100_cp_disable(struct radeon_device *rdev);
+void r100_ring_start(struct radeon_device *rdev);
+int r100_irq_set(struct radeon_device *rdev);
+int r100_irq_process(struct radeon_device *rdev);
+void r100_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence);
+int r100_cs_parse(struct radeon_cs_parser *p);
+void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
+int r100_copy_blit(struct radeon_device *rdev,
+		   uint64_t src_offset,
+		   uint64_t dst_offset,
+		   unsigned num_pages,
+		   struct radeon_fence *fence);
+
+static struct radeon_asic r100_asic = {
+	.errata = &r100_errata,
+	.vram_info = &r100_vram_info,
+	.gpu_reset = &r100_gpu_reset,
+	.mc_init = &r100_mc_init,
+	.mc_fini = &r100_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &r100_gart_enable,
+	.gart_disable = &r100_pci_gart_disable,
+	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
+	.gart_set_page = &r100_pci_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &r100_ring_start,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r100_fence_ring_emit,
+	.cs_parse = &r100_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = NULL,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_legacy_set_engine_clock,
+	.set_memory_clock = NULL,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_legacy_set_clock_gating,
+};
+
+
+/*
+ * r300,r350,rv350,rv380
+ */
+void r300_errata(struct radeon_device *rdev);
+void r300_vram_info(struct radeon_device *rdev);
+int r300_gpu_reset(struct radeon_device *rdev);
+int r300_mc_init(struct radeon_device *rdev);
+void r300_mc_fini(struct radeon_device *rdev);
+void r300_ring_start(struct radeon_device *rdev);
+void r300_fence_ring_emit(struct radeon_device *rdev,
+			  struct radeon_fence *fence);
+int r300_cs_parse(struct radeon_cs_parser *p);
+int r300_gart_enable(struct radeon_device *rdev);
+void rv370_pcie_gart_disable(struct radeon_device *rdev);
+void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
+int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
+int r300_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence);
+static struct radeon_asic r300_asic = {
+	.errata = &r300_errata,
+	.vram_info = &r300_vram_info,
+	.gpu_reset = &r300_gpu_reset,
+	.mc_init = &r300_mc_init,
+	.mc_fini = &r300_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &r300_gart_enable,
+	.gart_disable = &r100_pci_gart_disable,
+	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
+	.gart_set_page = &r100_pci_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &r300_ring_start,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_legacy_set_engine_clock,
+	.set_memory_clock = NULL,
+	.set_pcie_lanes = &rv370_set_pcie_lanes,
+	.set_clock_gating = &radeon_legacy_set_clock_gating,
+};
+
+/*
+ * r420,r423,rv410
+ */
+void r420_errata(struct radeon_device *rdev);
+void r420_vram_info(struct radeon_device *rdev);
+int r420_mc_init(struct radeon_device *rdev);
+void r420_mc_fini(struct radeon_device *rdev);
+static struct radeon_asic r420_asic = {
+	.errata = &r420_errata,
+	.vram_info = &r420_vram_info,
+	.gpu_reset = &r300_gpu_reset,
+	.mc_init = &r420_mc_init,
+	.mc_fini = &r420_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &r300_gart_enable,
+	.gart_disable = &rv370_pcie_gart_disable,
+	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
+	.gart_set_page = &rv370_pcie_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &r300_ring_start,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = &rv370_set_pcie_lanes,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+};
+
+
+/*
+ * rs400,rs480
+ */
+void rs400_errata(struct radeon_device *rdev);
+void rs400_vram_info(struct radeon_device *rdev);
+int rs400_mc_init(struct radeon_device *rdev);
+void rs400_mc_fini(struct radeon_device *rdev);
+int rs400_gart_enable(struct radeon_device *rdev);
+void rs400_gart_disable(struct radeon_device *rdev);
+void rs400_gart_tlb_flush(struct radeon_device *rdev);
+int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
+void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+static struct radeon_asic rs400_asic = {
+	.errata = &rs400_errata,
+	.vram_info = &rs400_vram_info,
+	.gpu_reset = &r300_gpu_reset,
+	.mc_init = &rs400_mc_init,
+	.mc_fini = &rs400_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &rs400_gart_enable,
+	.gart_disable = &rs400_gart_disable,
+	.gart_tlb_flush = &rs400_gart_tlb_flush,
+	.gart_set_page = &rs400_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &r300_ring_start,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_legacy_set_engine_clock,
+	.set_memory_clock = NULL,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_legacy_set_clock_gating,
+};
+
+
+/*
+ * rs600.
+ */
+void rs600_errata(struct radeon_device *rdev);
+void rs600_vram_info(struct radeon_device *rdev);
+int rs600_mc_init(struct radeon_device *rdev);
+void rs600_mc_fini(struct radeon_device *rdev);
+int rs600_irq_set(struct radeon_device *rdev);
+int rs600_gart_enable(struct radeon_device *rdev);
+void rs600_gart_disable(struct radeon_device *rdev);
+void rs600_gart_tlb_flush(struct radeon_device *rdev);
+int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
+void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+static struct radeon_asic rs600_asic = {
+	.errata = &rs600_errata,
+	.vram_info = &rs600_vram_info,
+	.gpu_reset = &r300_gpu_reset,
+	.mc_init = &rs600_mc_init,
+	.mc_fini = &rs600_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &rs600_gart_enable,
+	.gart_disable = &rs600_gart_disable,
+	.gart_tlb_flush = &rs600_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &r300_ring_start,
+	.irq_set = &rs600_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+};
+
+
+/*
+ * rs690,rs740
+ */
+void rs690_errata(struct radeon_device *rdev);
+void rs690_vram_info(struct radeon_device *rdev);
+int rs690_mc_init(struct radeon_device *rdev);
+void rs690_mc_fini(struct radeon_device *rdev);
+uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg);
+void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+static struct radeon_asic rs690_asic = {
+	.errata = &rs690_errata,
+	.vram_info = &rs690_vram_info,
+	.gpu_reset = &r300_gpu_reset,
+	.mc_init = &rs690_mc_init,
+	.mc_fini = &rs690_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &rs400_gart_enable,
+	.gart_disable = &rs400_gart_disable,
+	.gart_tlb_flush = &rs400_gart_tlb_flush,
+	.gart_set_page = &rs400_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &r300_ring_start,
+	.irq_set = &rs600_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r300_copy_dma,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+};
+
+
+/*
+ * rv515
+ */
+void rv515_errata(struct radeon_device *rdev);
+void rv515_vram_info(struct radeon_device *rdev);
+int rv515_gpu_reset(struct radeon_device *rdev);
+int rv515_mc_init(struct radeon_device *rdev);
+void rv515_mc_fini(struct radeon_device *rdev);
+uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg);
+void rv515_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+void rv515_ring_start(struct radeon_device *rdev);
+uint32_t rv515_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
+void rv515_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+static struct radeon_asic rv515_asic = {
+	.errata = &rv515_errata,
+	.vram_info = &rv515_vram_info,
+	.gpu_reset = &rv515_gpu_reset,
+	.mc_init = &rv515_mc_init,
+	.mc_fini = &rv515_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &r300_gart_enable,
+	.gart_disable = &rv370_pcie_gart_disable,
+	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
+	.gart_set_page = &rv370_pcie_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &rv515_ring_start,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r100_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = &rv370_set_pcie_lanes,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+};
+
+
+/*
+ * r520,rv530,rv560,rv570,r580
+ */
+void r520_errata(struct radeon_device *rdev);
+void r520_vram_info(struct radeon_device *rdev);
+int r520_mc_init(struct radeon_device *rdev);
+void r520_mc_fini(struct radeon_device *rdev);
+static struct radeon_asic r520_asic = {
+	.errata = &r520_errata,
+	.vram_info = &r520_vram_info,
+	.gpu_reset = &rv515_gpu_reset,
+	.mc_init = &r520_mc_init,
+	.mc_fini = &r520_mc_fini,
+	.wb_init = &r100_wb_init,
+	.wb_fini = &r100_wb_fini,
+	.gart_enable = &r300_gart_enable,
+	.gart_disable = &rv370_pcie_gart_disable,
+	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
+	.gart_set_page = &rv370_pcie_gart_set_page,
+	.cp_init = &r100_cp_init,
+	.cp_fini = &r100_cp_fini,
+	.cp_disable = &r100_cp_disable,
+	.ring_start = &rv515_ring_start,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r100_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r300_copy_dma,
+	.copy = &r100_copy_blit,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = &rv370_set_pcie_lanes,
+	.set_clock_gating = &radeon_atom_set_clock_gating,
+};
+
+/*
+ * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710
+ */
+uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
+void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
new file mode 100644
index 00000000000..98904de6387
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -0,0 +1,1226 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#include "atom.h"
+#include "atom-bits.h"
+
+/* from radeon_encoder.c */
+extern uint32_t
+radeon_get_encoder_id(struct drm_device *dev, uint32_t supported_device,
+		      uint8_t dac);
+extern void radeon_link_encoder_connector(struct drm_device *dev);
+extern void
+radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id,
+			uint32_t supported_device);
+
+/* from radeon_connector.c */
+extern void
+radeon_add_atom_connector(struct drm_device *dev,
+			  uint32_t connector_id,
+			  uint32_t supported_device,
+			  int connector_type,
+			  struct radeon_i2c_bus_rec *i2c_bus,
+			  bool linkb, uint32_t igp_lane_info);
+
+/* from radeon_legacy_encoder.c */
+extern void
+radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_id,
+			  uint32_t supported_device);
+
+union atom_supported_devices {
+	struct _ATOM_SUPPORTED_DEVICES_INFO info;
+	struct _ATOM_SUPPORTED_DEVICES_INFO_2 info_2;
+	struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
+};
+
+static inline struct radeon_i2c_bus_rec radeon_lookup_gpio(struct drm_device
+							   *dev, uint8_t id)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct atom_context *ctx = rdev->mode_info.atom_context;
+	ATOM_GPIO_I2C_ASSIGMENT gpio;
+	struct radeon_i2c_bus_rec i2c;
+	int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+	struct _ATOM_GPIO_I2C_INFO *i2c_info;
+	uint16_t data_offset;
+
+	memset(&i2c, 0, sizeof(struct radeon_i2c_bus_rec));
+	i2c.valid = false;
+
+	atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset);
+
+	i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+	gpio = i2c_info->asGPIO_Info[id];
+
+	i2c.mask_clk_reg = le16_to_cpu(gpio.usClkMaskRegisterIndex) * 4;
+	i2c.mask_data_reg = le16_to_cpu(gpio.usDataMaskRegisterIndex) * 4;
+	i2c.put_clk_reg = le16_to_cpu(gpio.usClkEnRegisterIndex) * 4;
+	i2c.put_data_reg = le16_to_cpu(gpio.usDataEnRegisterIndex) * 4;
+	i2c.get_clk_reg = le16_to_cpu(gpio.usClkY_RegisterIndex) * 4;
+	i2c.get_data_reg = le16_to_cpu(gpio.usDataY_RegisterIndex) * 4;
+	i2c.a_clk_reg = le16_to_cpu(gpio.usClkA_RegisterIndex) * 4;
+	i2c.a_data_reg = le16_to_cpu(gpio.usDataA_RegisterIndex) * 4;
+	i2c.mask_clk_mask = (1 << gpio.ucClkMaskShift);
+	i2c.mask_data_mask = (1 << gpio.ucDataMaskShift);
+	i2c.put_clk_mask = (1 << gpio.ucClkEnShift);
+	i2c.put_data_mask = (1 << gpio.ucDataEnShift);
+	i2c.get_clk_mask = (1 << gpio.ucClkY_Shift);
+	i2c.get_data_mask = (1 << gpio.ucDataY_Shift);
+	i2c.a_clk_mask = (1 << gpio.ucClkA_Shift);
+	i2c.a_data_mask = (1 << gpio.ucDataA_Shift);
+	i2c.valid = true;
+
+	return i2c;
+}
+
+static bool radeon_atom_apply_quirks(struct drm_device *dev,
+				     uint32_t supported_device,
+				     int *connector_type,
+				     struct radeon_i2c_bus_rec *i2c_bus)
+{
+
+	/* Asus M2A-VM HDMI board lists the DVI port as HDMI */
+	if ((dev->pdev->device == 0x791e) &&
+	    (dev->pdev->subsystem_vendor == 0x1043) &&
+	    (dev->pdev->subsystem_device == 0x826d)) {
+		if ((*connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
+		    (supported_device == ATOM_DEVICE_DFP3_SUPPORT))
+			*connector_type = DRM_MODE_CONNECTOR_DVID;
+	}
+
+	/* a-bit f-i90hd - ciaranm on #radeonhd - this board has no DVI */
+	if ((dev->pdev->device == 0x7941) &&
+	    (dev->pdev->subsystem_vendor == 0x147b) &&
+	    (dev->pdev->subsystem_device == 0x2412)) {
+		if (*connector_type == DRM_MODE_CONNECTOR_DVII)
+			return false;
+	}
+
+	/* Falcon NW laptop lists vga ddc line for LVDS */
+	if ((dev->pdev->device == 0x5653) &&
+	    (dev->pdev->subsystem_vendor == 0x1462) &&
+	    (dev->pdev->subsystem_device == 0x0291)) {
+		if (*connector_type == DRM_MODE_CONNECTOR_LVDS)
+			i2c_bus->valid = false;
+	}
+
+	/* Funky macbooks */
+	if ((dev->pdev->device == 0x71C5) &&
+	    (dev->pdev->subsystem_vendor == 0x106b) &&
+	    (dev->pdev->subsystem_device == 0x0080)) {
+		if ((supported_device == ATOM_DEVICE_CRT1_SUPPORT) ||
+		    (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
+			return false;
+	}
+
+	/* some BIOSes seem to report DAC on HDMI - they hurt me with their lies */
+	if ((*connector_type == DRM_MODE_CONNECTOR_HDMIA) ||
+	    (*connector_type == DRM_MODE_CONNECTOR_HDMIB)) {
+		if (supported_device & (ATOM_DEVICE_CRT_SUPPORT)) {
+			return false;
+		}
+	}
+
+	/* ASUS HD 3600 XT board lists the DVI port as HDMI */
+	if ((dev->pdev->device == 0x9598) &&
+	    (dev->pdev->subsystem_vendor == 0x1043) &&
+	    (dev->pdev->subsystem_device == 0x01da)) {
+		if (*connector_type == DRM_MODE_CONNECTOR_HDMIB) {
+			*connector_type = DRM_MODE_CONNECTOR_DVID;
+		}
+	}
+
+	return true;
+}
+
+const int supported_devices_connector_convert[] = {
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_VGA,
+	DRM_MODE_CONNECTOR_DVII,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_DVIA,
+	DRM_MODE_CONNECTOR_SVIDEO,
+	DRM_MODE_CONNECTOR_Composite,
+	DRM_MODE_CONNECTOR_LVDS,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_HDMIA,
+	DRM_MODE_CONNECTOR_HDMIB,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_9PinDIN,
+	DRM_MODE_CONNECTOR_DisplayPort
+};
+
+const int object_connector_convert[] = {
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_DVII,
+	DRM_MODE_CONNECTOR_DVII,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_VGA,
+	DRM_MODE_CONNECTOR_Composite,
+	DRM_MODE_CONNECTOR_SVIDEO,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_9PinDIN,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_HDMIA,
+	DRM_MODE_CONNECTOR_HDMIB,
+	DRM_MODE_CONNECTOR_HDMIB,
+	DRM_MODE_CONNECTOR_LVDS,
+	DRM_MODE_CONNECTOR_9PinDIN,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_DisplayPort
+};
+
+bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	struct atom_context *ctx = mode_info->atom_context;
+	int index = GetIndexIntoMasterTable(DATA, Object_Header);
+	uint16_t size, data_offset;
+	uint8_t frev, crev, line_mux = 0;
+	ATOM_CONNECTOR_OBJECT_TABLE *con_obj;
+	ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+	ATOM_OBJECT_HEADER *obj_header;
+	int i, j, path_size, device_support;
+	int connector_type;
+	uint16_t igp_lane_info;
+	bool linkb;
+	struct radeon_i2c_bus_rec ddc_bus;
+
+	atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset);
+
+	if (data_offset == 0)
+		return false;
+
+	if (crev < 2)
+		return false;
+
+	obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+	path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+	    (ctx->bios + data_offset +
+	     le16_to_cpu(obj_header->usDisplayPathTableOffset));
+	con_obj = (ATOM_CONNECTOR_OBJECT_TABLE *)
+	    (ctx->bios + data_offset +
+	     le16_to_cpu(obj_header->usConnectorObjectTableOffset));
+	device_support = le16_to_cpu(obj_header->usDeviceSupport);
+
+	path_size = 0;
+	for (i = 0; i < path_obj->ucNumOfDispPath; i++) {
+		uint8_t *addr = (uint8_t *) path_obj->asDispPath;
+		ATOM_DISPLAY_OBJECT_PATH *path;
+		addr += path_size;
+		path = (ATOM_DISPLAY_OBJECT_PATH *) addr;
+		path_size += le16_to_cpu(path->usSize);
+		linkb = false;
+
+		if (device_support & le16_to_cpu(path->usDeviceTag)) {
+			uint8_t con_obj_id, con_obj_num, con_obj_type;
+
+			con_obj_id =
+			    (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
+			    >> OBJECT_ID_SHIFT;
+			con_obj_num =
+			    (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
+			    >> ENUM_ID_SHIFT;
+			con_obj_type =
+			    (le16_to_cpu(path->usConnObjectId) &
+			     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
+
+			if ((le16_to_cpu(path->usDeviceTag) ==
+			     ATOM_DEVICE_TV1_SUPPORT)
+			    || (le16_to_cpu(path->usDeviceTag) ==
+				ATOM_DEVICE_TV2_SUPPORT)
+			    || (le16_to_cpu(path->usDeviceTag) ==
+				ATOM_DEVICE_CV_SUPPORT))
+				continue;
+
+			if ((rdev->family == CHIP_RS780) &&
+			    (con_obj_id ==
+			     CONNECTOR_OBJECT_ID_PCIE_CONNECTOR)) {
+				uint16_t igp_offset = 0;
+				ATOM_INTEGRATED_SYSTEM_INFO_V2 *igp_obj;
+
+				index =
+				    GetIndexIntoMasterTable(DATA,
+							    IntegratedSystemInfo);
+
+				atom_parse_data_header(ctx, index, &size, &frev,
+						       &crev, &igp_offset);
+
+				if (crev >= 2) {
+					igp_obj =
+					    (ATOM_INTEGRATED_SYSTEM_INFO_V2
+					     *) (ctx->bios + igp_offset);
+
+					if (igp_obj) {
+						uint32_t slot_config, ct;
+
+						if (con_obj_num == 1)
+							slot_config =
+							    igp_obj->
+							    ulDDISlot1Config;
+						else
+							slot_config =
+							    igp_obj->
+							    ulDDISlot2Config;
+
+						ct = (slot_config >> 16) & 0xff;
+						connector_type =
+						    object_connector_convert
+						    [ct];
+						igp_lane_info =
+						    slot_config & 0xffff;
+					} else
+						continue;
+				} else
+					continue;
+			} else {
+				igp_lane_info = 0;
+				connector_type =
+				    object_connector_convert[con_obj_id];
+			}
+
+			if (connector_type == DRM_MODE_CONNECTOR_Unknown)
+				continue;
+
+			for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2);
+			     j++) {
+				uint8_t enc_obj_id, enc_obj_num, enc_obj_type;
+
+				enc_obj_id =
+				    (le16_to_cpu(path->usGraphicObjIds[j]) &
+				     OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+				enc_obj_num =
+				    (le16_to_cpu(path->usGraphicObjIds[j]) &
+				     ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+				enc_obj_type =
+				    (le16_to_cpu(path->usGraphicObjIds[j]) &
+				     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
+
+				/* FIXME: add support for router objects */
+				if (enc_obj_type == GRAPH_OBJECT_TYPE_ENCODER) {
+					if (enc_obj_num == 2)
+						linkb = true;
+					else
+						linkb = false;
+
+					radeon_add_atom_encoder(dev,
+								enc_obj_id,
+								le16_to_cpu
+								(path->
+								 usDeviceTag));
+
+				}
+			}
+
+			/* look up gpio for ddc */
+			if ((le16_to_cpu(path->usDeviceTag) &
+			     (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT))
+			    == 0) {
+				for (j = 0; j < con_obj->ucNumberOfObjects; j++) {
+					if (le16_to_cpu(path->usConnObjectId) ==
+					    le16_to_cpu(con_obj->asObjects[j].
+							usObjectID)) {
+						ATOM_COMMON_RECORD_HEADER
+						    *record =
+						    (ATOM_COMMON_RECORD_HEADER
+						     *)
+						    (ctx->bios + data_offset +
+						     le16_to_cpu(con_obj->
+								 asObjects[j].
+								 usRecordOffset));
+						ATOM_I2C_RECORD *i2c_record;
+
+						while (record->ucRecordType > 0
+						       && record->
+						       ucRecordType <=
+						       ATOM_MAX_OBJECT_RECORD_NUMBER) {
+							DRM_ERROR
+							    ("record type %d\n",
+							     record->
+							     ucRecordType);
+							switch (record->
+								ucRecordType) {
+							case ATOM_I2C_RECORD_TYPE:
+								i2c_record =
+								    (ATOM_I2C_RECORD
+								     *) record;
+								line_mux =
+								    i2c_record->
+								    sucI2cId.
+								    bfI2C_LineMux;
+								break;
+							}
+							record =
+							    (ATOM_COMMON_RECORD_HEADER
+							     *) ((char *)record
+								 +
+								 record->
+								 ucRecordSize);
+						}
+						break;
+					}
+				}
+			} else
+				line_mux = 0;
+
+			if ((le16_to_cpu(path->usDeviceTag) ==
+			     ATOM_DEVICE_TV1_SUPPORT)
+			    || (le16_to_cpu(path->usDeviceTag) ==
+				ATOM_DEVICE_TV2_SUPPORT)
+			    || (le16_to_cpu(path->usDeviceTag) ==
+				ATOM_DEVICE_CV_SUPPORT))
+				ddc_bus.valid = false;
+			else
+				ddc_bus = radeon_lookup_gpio(dev, line_mux);
+
+			radeon_add_atom_connector(dev,
+						  le16_to_cpu(path->
+							      usConnObjectId),
+						  le16_to_cpu(path->
+							      usDeviceTag),
+						  connector_type, &ddc_bus,
+						  linkb, igp_lane_info);
+
+		}
+	}
+
+	radeon_link_encoder_connector(dev);
+
+	return true;
+}
+
+struct bios_connector {
+	bool valid;
+	uint8_t line_mux;
+	uint16_t devices;
+	int connector_type;
+	struct radeon_i2c_bus_rec ddc_bus;
+};
+
+bool radeon_get_atom_connector_info_from_supported_devices_table(struct
+								 drm_device
+								 *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	struct atom_context *ctx = mode_info->atom_context;
+	int index = GetIndexIntoMasterTable(DATA, SupportedDevicesInfo);
+	uint16_t size, data_offset;
+	uint8_t frev, crev;
+	uint16_t device_support;
+	uint8_t dac;
+	union atom_supported_devices *supported_devices;
+	int i, j;
+	struct bios_connector bios_connectors[ATOM_MAX_SUPPORTED_DEVICE];
+
+	atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset);
+
+	supported_devices =
+	    (union atom_supported_devices *)(ctx->bios + data_offset);
+
+	device_support = le16_to_cpu(supported_devices->info.usDeviceSupport);
+
+	for (i = 0; i < ATOM_MAX_SUPPORTED_DEVICE; i++) {
+		ATOM_CONNECTOR_INFO_I2C ci =
+		    supported_devices->info.asConnInfo[i];
+
+		bios_connectors[i].valid = false;
+
+		if (!(device_support & (1 << i))) {
+			continue;
+		}
+
+		if (i == ATOM_DEVICE_CV_INDEX) {
+			DRM_DEBUG("Skipping Component Video\n");
+			continue;
+		}
+
+		if (i == ATOM_DEVICE_TV1_INDEX) {
+			DRM_DEBUG("Skipping TV Out\n");
+			continue;
+		}
+
+		bios_connectors[i].connector_type =
+		    supported_devices_connector_convert[ci.sucConnectorInfo.
+							sbfAccess.
+							bfConnectorType];
+
+		if (bios_connectors[i].connector_type ==
+		    DRM_MODE_CONNECTOR_Unknown)
+			continue;
+
+		dac = ci.sucConnectorInfo.sbfAccess.bfAssociatedDAC;
+
+		if ((rdev->family == CHIP_RS690) ||
+		    (rdev->family == CHIP_RS740)) {
+			if ((i == ATOM_DEVICE_DFP2_INDEX)
+			    && (ci.sucI2cId.sbfAccess.bfI2C_LineMux == 2))
+				bios_connectors[i].line_mux =
+				    ci.sucI2cId.sbfAccess.bfI2C_LineMux + 1;
+			else if ((i == ATOM_DEVICE_DFP3_INDEX)
+				 && (ci.sucI2cId.sbfAccess.bfI2C_LineMux == 1))
+				bios_connectors[i].line_mux =
+				    ci.sucI2cId.sbfAccess.bfI2C_LineMux + 1;
+			else
+				bios_connectors[i].line_mux =
+				    ci.sucI2cId.sbfAccess.bfI2C_LineMux;
+		} else
+			bios_connectors[i].line_mux =
+			    ci.sucI2cId.sbfAccess.bfI2C_LineMux;
+
+		/* give tv unique connector ids */
+		if (i == ATOM_DEVICE_TV1_INDEX) {
+			bios_connectors[i].ddc_bus.valid = false;
+			bios_connectors[i].line_mux = 50;
+		} else if (i == ATOM_DEVICE_TV2_INDEX) {
+			bios_connectors[i].ddc_bus.valid = false;
+			bios_connectors[i].line_mux = 51;
+		} else if (i == ATOM_DEVICE_CV_INDEX) {
+			bios_connectors[i].ddc_bus.valid = false;
+			bios_connectors[i].line_mux = 52;
+		} else
+			bios_connectors[i].ddc_bus =
+			    radeon_lookup_gpio(dev,
+					       bios_connectors[i].line_mux);
+
+		/* Always set the connector type to VGA for CRT1/CRT2. if they are
+		 * shared with a DVI port, we'll pick up the DVI connector when we
+		 * merge the outputs.  Some bioses incorrectly list VGA ports as DVI.
+		 */
+		if (i == ATOM_DEVICE_CRT1_INDEX || i == ATOM_DEVICE_CRT2_INDEX)
+			bios_connectors[i].connector_type =
+			    DRM_MODE_CONNECTOR_VGA;
+
+		if (!radeon_atom_apply_quirks
+		    (dev, (1 << i), &bios_connectors[i].connector_type,
+		     &bios_connectors[i].ddc_bus))
+			continue;
+
+		bios_connectors[i].valid = true;
+		bios_connectors[i].devices = (1 << i);
+
+		if (ASIC_IS_AVIVO(rdev) || radeon_r4xx_atom)
+			radeon_add_atom_encoder(dev,
+						radeon_get_encoder_id(dev,
+								      (1 << i),
+								      dac),
+						(1 << i));
+		else
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									(1 <<
+									 i),
+									dac),
+						  (1 << i));
+	}
+
+	/* combine shared connectors */
+	for (i = 0; i < ATOM_MAX_SUPPORTED_DEVICE; i++) {
+		if (bios_connectors[i].valid) {
+			for (j = 0; j < ATOM_MAX_SUPPORTED_DEVICE; j++) {
+				if (bios_connectors[j].valid && (i != j)) {
+					if (bios_connectors[i].line_mux ==
+					    bios_connectors[j].line_mux) {
+						if (((bios_connectors[i].
+						      devices &
+						      (ATOM_DEVICE_DFP_SUPPORT))
+						     && (bios_connectors[j].
+							 devices &
+							 (ATOM_DEVICE_CRT_SUPPORT)))
+						    ||
+						    ((bios_connectors[j].
+						      devices &
+						      (ATOM_DEVICE_DFP_SUPPORT))
+						     && (bios_connectors[i].
+							 devices &
+							 (ATOM_DEVICE_CRT_SUPPORT)))) {
+							bios_connectors[i].
+							    devices |=
+							    bios_connectors[j].
+							    devices;
+							bios_connectors[i].
+							    connector_type =
+							    DRM_MODE_CONNECTOR_DVII;
+							bios_connectors[j].
+							    valid = false;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/* add the connectors */
+	for (i = 0; i < ATOM_MAX_SUPPORTED_DEVICE; i++) {
+		if (bios_connectors[i].valid)
+			radeon_add_atom_connector(dev,
+						  bios_connectors[i].line_mux,
+						  bios_connectors[i].devices,
+						  bios_connectors[i].
+						  connector_type,
+						  &bios_connectors[i].ddc_bus,
+						  false, 0);
+	}
+
+	radeon_link_encoder_connector(dev);
+
+	return true;
+}
+
+union firmware_info {
+	ATOM_FIRMWARE_INFO info;
+	ATOM_FIRMWARE_INFO_V1_2 info_12;
+	ATOM_FIRMWARE_INFO_V1_3 info_13;
+	ATOM_FIRMWARE_INFO_V1_4 info_14;
+};
+
+bool radeon_atom_get_clock_info(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+	union firmware_info *firmware_info;
+	uint8_t frev, crev;
+	struct radeon_pll *p1pll = &rdev->clock.p1pll;
+	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *spll = &rdev->clock.spll;
+	struct radeon_pll *mpll = &rdev->clock.mpll;
+	uint16_t data_offset;
+
+	atom_parse_data_header(mode_info->atom_context, index, NULL, &frev,
+			       &crev, &data_offset);
+
+	firmware_info =
+	    (union firmware_info *)(mode_info->atom_context->bios +
+				    data_offset);
+
+	if (firmware_info) {
+		/* pixel clocks */
+		p1pll->reference_freq =
+		    le16_to_cpu(firmware_info->info.usReferenceClock);
+		p1pll->reference_div = 0;
+
+		p1pll->pll_out_min =
+		    le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
+		p1pll->pll_out_max =
+		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
+
+		if (p1pll->pll_out_min == 0) {
+			if (ASIC_IS_AVIVO(rdev))
+				p1pll->pll_out_min = 64800;
+			else
+				p1pll->pll_out_min = 20000;
+		}
+
+		p1pll->pll_in_min =
+		    le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Input);
+		p1pll->pll_in_max =
+		    le16_to_cpu(firmware_info->info.usMaxPixelClockPLL_Input);
+
+		*p2pll = *p1pll;
+
+		/* system clock */
+		spll->reference_freq =
+		    le16_to_cpu(firmware_info->info.usReferenceClock);
+		spll->reference_div = 0;
+
+		spll->pll_out_min =
+		    le16_to_cpu(firmware_info->info.usMinEngineClockPLL_Output);
+		spll->pll_out_max =
+		    le32_to_cpu(firmware_info->info.ulMaxEngineClockPLL_Output);
+
+		/* ??? */
+		if (spll->pll_out_min == 0) {
+			if (ASIC_IS_AVIVO(rdev))
+				spll->pll_out_min = 64800;
+			else
+				spll->pll_out_min = 20000;
+		}
+
+		spll->pll_in_min =
+		    le16_to_cpu(firmware_info->info.usMinEngineClockPLL_Input);
+		spll->pll_in_max =
+		    le16_to_cpu(firmware_info->info.usMaxEngineClockPLL_Input);
+
+		/* memory clock */
+		mpll->reference_freq =
+		    le16_to_cpu(firmware_info->info.usReferenceClock);
+		mpll->reference_div = 0;
+
+		mpll->pll_out_min =
+		    le16_to_cpu(firmware_info->info.usMinMemoryClockPLL_Output);
+		mpll->pll_out_max =
+		    le32_to_cpu(firmware_info->info.ulMaxMemoryClockPLL_Output);
+
+		/* ??? */
+		if (mpll->pll_out_min == 0) {
+			if (ASIC_IS_AVIVO(rdev))
+				mpll->pll_out_min = 64800;
+			else
+				mpll->pll_out_min = 20000;
+		}
+
+		mpll->pll_in_min =
+		    le16_to_cpu(firmware_info->info.usMinMemoryClockPLL_Input);
+		mpll->pll_in_max =
+		    le16_to_cpu(firmware_info->info.usMaxMemoryClockPLL_Input);
+
+		rdev->clock.default_sclk =
+		    le32_to_cpu(firmware_info->info.ulDefaultEngineClock);
+		rdev->clock.default_mclk =
+		    le32_to_cpu(firmware_info->info.ulDefaultMemoryClock);
+
+		return true;
+	}
+	return false;
+}
+
+struct radeon_encoder_int_tmds *radeon_atombios_get_tmds_info(struct
+							      radeon_encoder
+							      *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, TMDS_Info);
+	uint16_t data_offset;
+	struct _ATOM_TMDS_INFO *tmds_info;
+	uint8_t frev, crev;
+	uint16_t maxfreq;
+	int i;
+	struct radeon_encoder_int_tmds *tmds = NULL;
+
+	atom_parse_data_header(mode_info->atom_context, index, NULL, &frev,
+			       &crev, &data_offset);
+
+	tmds_info =
+	    (struct _ATOM_TMDS_INFO *)(mode_info->atom_context->bios +
+				       data_offset);
+
+	if (tmds_info) {
+		tmds =
+		    kzalloc(sizeof(struct radeon_encoder_int_tmds), GFP_KERNEL);
+
+		if (!tmds)
+			return NULL;
+
+		maxfreq = le16_to_cpu(tmds_info->usMaxFrequency);
+		for (i = 0; i < 4; i++) {
+			tmds->tmds_pll[i].freq =
+			    le16_to_cpu(tmds_info->asMiscInfo[i].usFrequency);
+			tmds->tmds_pll[i].value =
+			    tmds_info->asMiscInfo[i].ucPLL_ChargePump & 0x3f;
+			tmds->tmds_pll[i].value |=
+			    (tmds_info->asMiscInfo[i].
+			     ucPLL_VCO_Gain & 0x3f) << 6;
+			tmds->tmds_pll[i].value |=
+			    (tmds_info->asMiscInfo[i].
+			     ucPLL_DutyCycle & 0xf) << 12;
+			tmds->tmds_pll[i].value |=
+			    (tmds_info->asMiscInfo[i].
+			     ucPLL_VoltageSwing & 0xf) << 16;
+
+			DRM_DEBUG("TMDS PLL From ATOMBIOS %u %x\n",
+				  tmds->tmds_pll[i].freq,
+				  tmds->tmds_pll[i].value);
+
+			if (maxfreq == tmds->tmds_pll[i].freq) {
+				tmds->tmds_pll[i].freq = 0xffffffff;
+				break;
+			}
+		}
+	}
+	return tmds;
+}
+
+union lvds_info {
+	struct _ATOM_LVDS_INFO info;
+	struct _ATOM_LVDS_INFO_V12 info_12;
+};
+
+struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct
+							      radeon_encoder
+							      *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, LVDS_Info);
+	uint16_t data_offset;
+	union lvds_info *lvds_info;
+	uint8_t frev, crev;
+	struct radeon_encoder_atom_dig *lvds = NULL;
+
+	atom_parse_data_header(mode_info->atom_context, index, NULL, &frev,
+			       &crev, &data_offset);
+
+	lvds_info =
+	    (union lvds_info *)(mode_info->atom_context->bios + data_offset);
+
+	if (lvds_info) {
+		lvds =
+		    kzalloc(sizeof(struct radeon_encoder_atom_dig), GFP_KERNEL);
+
+		if (!lvds)
+			return NULL;
+
+		lvds->native_mode.dotclock =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usPixClk) * 10;
+		lvds->native_mode.panel_xres =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usHActive);
+		lvds->native_mode.panel_yres =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usVActive);
+		lvds->native_mode.hblank =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usHBlanking_Time);
+		lvds->native_mode.hoverplus =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usHSyncOffset);
+		lvds->native_mode.hsync_width =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usHSyncWidth);
+		lvds->native_mode.vblank =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usVBlanking_Time);
+		lvds->native_mode.voverplus =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncOffset);
+		lvds->native_mode.vsync_width =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncWidth);
+		lvds->panel_pwr_delay =
+		    le16_to_cpu(lvds_info->info.usOffDelayInMs);
+		lvds->lvds_misc = lvds_info->info.ucLVDS_Misc;
+
+		encoder->native_mode = lvds->native_mode;
+	}
+	return lvds;
+}
+
+void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable)
+{
+	DYNAMIC_CLOCK_GATING_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, DynamicClockGating);
+
+	args.ucEnable = enable;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void radeon_atom_static_pwrmgt_setup(struct radeon_device *rdev, int enable)
+{
+	ENABLE_ASIC_STATIC_PWR_MGT_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableASIC_StaticPwrMgt);
+
+	args.ucEnable = enable;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void radeon_atom_set_engine_clock(struct radeon_device *rdev,
+				  uint32_t eng_clock)
+{
+	SET_ENGINE_CLOCK_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetEngineClock);
+
+	args.ulTargetEngineClock = eng_clock;	/* 10 khz */
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void radeon_atom_set_memory_clock(struct radeon_device *rdev,
+				  uint32_t mem_clock)
+{
+	SET_MEMORY_CLOCK_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetMemoryClock);
+
+	if (rdev->flags & RADEON_IS_IGP)
+		return;
+
+	args.ulTargetMemoryClock = mem_clock;	/* 10 khz */
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+void radeon_atom_initialize_bios_scratch_regs(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t bios_2_scratch, bios_6_scratch;
+
+	if (rdev->family >= CHIP_R600) {
+		bios_2_scratch = RREG32(R600_BIOS_0_SCRATCH);
+		bios_6_scratch = RREG32(R600_BIOS_6_SCRATCH);
+	} else {
+		bios_2_scratch = RREG32(RADEON_BIOS_0_SCRATCH);
+		bios_6_scratch = RREG32(RADEON_BIOS_6_SCRATCH);
+	}
+
+	/* let the bios control the backlight */
+	bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE;
+
+	/* tell the bios not to handle mode switching */
+	bios_6_scratch |= (ATOM_S6_ACC_BLOCK_DISPLAY_SWITCH | ATOM_S6_ACC_MODE);
+
+	if (rdev->family >= CHIP_R600) {
+		WREG32(R600_BIOS_2_SCRATCH, bios_2_scratch);
+		WREG32(R600_BIOS_6_SCRATCH, bios_6_scratch);
+	} else {
+		WREG32(RADEON_BIOS_2_SCRATCH, bios_2_scratch);
+		WREG32(RADEON_BIOS_6_SCRATCH, bios_6_scratch);
+	}
+
+}
+
+void radeon_atom_output_lock(struct drm_encoder *encoder, bool lock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t bios_6_scratch;
+
+	if (rdev->family >= CHIP_R600)
+		bios_6_scratch = RREG32(R600_BIOS_6_SCRATCH);
+	else
+		bios_6_scratch = RREG32(RADEON_BIOS_6_SCRATCH);
+
+	if (lock)
+		bios_6_scratch |= ATOM_S6_CRITICAL_STATE;
+	else
+		bios_6_scratch &= ~ATOM_S6_CRITICAL_STATE;
+
+	if (rdev->family >= CHIP_R600)
+		WREG32(R600_BIOS_6_SCRATCH, bios_6_scratch);
+	else
+		WREG32(RADEON_BIOS_6_SCRATCH, bios_6_scratch);
+}
+
+/* at some point we may want to break this out into individual functions */
+void
+radeon_atombios_connected_scratch_regs(struct drm_connector *connector,
+				       struct drm_encoder *encoder,
+				       bool connected)
+{
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_connector *radeon_connector =
+	    to_radeon_connector(connector);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_0_scratch, bios_3_scratch, bios_6_scratch;
+
+	if (rdev->family >= CHIP_R600) {
+		bios_0_scratch = RREG32(R600_BIOS_0_SCRATCH);
+		bios_3_scratch = RREG32(R600_BIOS_3_SCRATCH);
+		bios_6_scratch = RREG32(R600_BIOS_6_SCRATCH);
+	} else {
+		bios_0_scratch = RREG32(RADEON_BIOS_0_SCRATCH);
+		bios_3_scratch = RREG32(RADEON_BIOS_3_SCRATCH);
+		bios_6_scratch = RREG32(RADEON_BIOS_6_SCRATCH);
+	}
+
+	if ((radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_TV1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("TV1 connected\n");
+			bios_3_scratch |= ATOM_S3_TV1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_TV1;
+		} else {
+			DRM_DEBUG("TV1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_TV1_MASK;
+			bios_3_scratch &= ~ATOM_S3_TV1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_TV1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_CV_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_CV_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("CV connected\n");
+			bios_3_scratch |= ATOM_S3_CV_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_CV;
+		} else {
+			DRM_DEBUG("CV disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_CV_MASK;
+			bios_3_scratch &= ~ATOM_S3_CV_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_CV;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_LCD1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("LCD1 connected\n");
+			bios_0_scratch |= ATOM_S0_LCD1;
+			bios_3_scratch |= ATOM_S3_LCD1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_LCD1;
+		} else {
+			DRM_DEBUG("LCD1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_LCD1;
+			bios_3_scratch &= ~ATOM_S3_LCD1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_LCD1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_CRT1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("CRT1 connected\n");
+			bios_0_scratch |= ATOM_S0_CRT1_COLOR;
+			bios_3_scratch |= ATOM_S3_CRT1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_CRT1;
+		} else {
+			DRM_DEBUG("CRT1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_CRT1_MASK;
+			bios_3_scratch &= ~ATOM_S3_CRT1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_CRT1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_CRT2_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("CRT2 connected\n");
+			bios_0_scratch |= ATOM_S0_CRT2_COLOR;
+			bios_3_scratch |= ATOM_S3_CRT2_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_CRT2;
+		} else {
+			DRM_DEBUG("CRT2 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_CRT2_MASK;
+			bios_3_scratch &= ~ATOM_S3_CRT2_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_CRT2;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP1 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP1;
+			bios_3_scratch |= ATOM_S3_DFP1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP1;
+		} else {
+			DRM_DEBUG("DFP1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP1;
+			bios_3_scratch &= ~ATOM_S3_DFP1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP2_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP2 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP2;
+			bios_3_scratch |= ATOM_S3_DFP2_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP2;
+		} else {
+			DRM_DEBUG("DFP2 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP2;
+			bios_3_scratch &= ~ATOM_S3_DFP2_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP2;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP3_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP3_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP3 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP3;
+			bios_3_scratch |= ATOM_S3_DFP3_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP3;
+		} else {
+			DRM_DEBUG("DFP3 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP3;
+			bios_3_scratch &= ~ATOM_S3_DFP3_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP3;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP4_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP4_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP4 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP4;
+			bios_3_scratch |= ATOM_S3_DFP4_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP4;
+		} else {
+			DRM_DEBUG("DFP4 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP4;
+			bios_3_scratch &= ~ATOM_S3_DFP4_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP4;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP5_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP5_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP5 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP5;
+			bios_3_scratch |= ATOM_S3_DFP5_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP5;
+		} else {
+			DRM_DEBUG("DFP5 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP5;
+			bios_3_scratch &= ~ATOM_S3_DFP5_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP5;
+		}
+	}
+
+	if (rdev->family >= CHIP_R600) {
+		WREG32(R600_BIOS_0_SCRATCH, bios_0_scratch);
+		WREG32(R600_BIOS_3_SCRATCH, bios_3_scratch);
+		WREG32(R600_BIOS_6_SCRATCH, bios_6_scratch);
+	} else {
+		WREG32(RADEON_BIOS_0_SCRATCH, bios_0_scratch);
+		WREG32(RADEON_BIOS_3_SCRATCH, bios_3_scratch);
+		WREG32(RADEON_BIOS_6_SCRATCH, bios_6_scratch);
+	}
+}
+
+void
+radeon_atombios_encoder_crtc_scratch_regs(struct drm_encoder *encoder, int crtc)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_3_scratch;
+
+	if (rdev->family >= CHIP_R600)
+		bios_3_scratch = RREG32(R600_BIOS_3_SCRATCH);
+	else
+		bios_3_scratch = RREG32(RADEON_BIOS_3_SCRATCH);
+
+	if (radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_TV1_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 18);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CV_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_CV_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 24);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_CRT1_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 16);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_CRT2_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 20);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_LCD1_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 17);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_DFP1_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 19);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_DFP2_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 23);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP3_SUPPORT) {
+		bios_3_scratch &= ~ATOM_S3_DFP3_CRTC_ACTIVE;
+		bios_3_scratch |= (crtc << 25);
+	}
+
+	if (rdev->family >= CHIP_R600)
+		WREG32(R600_BIOS_3_SCRATCH, bios_3_scratch);
+	else
+		WREG32(RADEON_BIOS_3_SCRATCH, bios_3_scratch);
+}
+
+void
+radeon_atombios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_2_scratch;
+
+	if (rdev->family >= CHIP_R600)
+		bios_2_scratch = RREG32(R600_BIOS_2_SCRATCH);
+	else
+		bios_2_scratch = RREG32(RADEON_BIOS_2_SCRATCH);
+
+	if (radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_TV1_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_TV1_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CV_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_CV_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_CV_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_CRT1_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_CRT1_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_CRT2_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_CRT2_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_LCD1_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_LCD1_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_DFP1_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_DFP1_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_DFP2_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_DFP2_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP3_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_DFP3_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_DFP3_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP4_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_DFP4_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_DFP4_DPMS_STATE;
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP5_SUPPORT) {
+		if (on)
+			bios_2_scratch &= ~ATOM_S2_DFP5_DPMS_STATE;
+		else
+			bios_2_scratch |= ATOM_S2_DFP5_DPMS_STATE;
+	}
+
+	if (rdev->family >= CHIP_R600)
+		WREG32(R600_BIOS_2_SCRATCH, bios_2_scratch);
+	else
+		WREG32(RADEON_BIOS_2_SCRATCH, bios_2_scratch);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
new file mode 100644
index 00000000000..c44403a2ca7
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Jerome Glisse
+ */
+#include <drm/drmP.h>
+#include <drm/radeon_drm.h>
+#include "radeon_reg.h"
+#include "radeon.h"
+
+void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
+			   unsigned sdomain, unsigned ddomain)
+{
+	struct radeon_object *dobj = NULL;
+	struct radeon_object *sobj = NULL;
+	struct radeon_fence *fence = NULL;
+	uint64_t saddr, daddr;
+	unsigned long start_jiffies;
+	unsigned long end_jiffies;
+	unsigned long time;
+	unsigned i, n, size;
+	int r;
+
+	size = bsize;
+	n = 1024;
+	r = radeon_object_create(rdev, NULL, size, true, sdomain, false, &sobj);
+	if (r) {
+		goto out_cleanup;
+	}
+	r = radeon_object_pin(sobj, sdomain, &saddr);
+	if (r) {
+		goto out_cleanup;
+	}
+	r = radeon_object_create(rdev, NULL, size, true, ddomain, false, &dobj);
+	if (r) {
+		goto out_cleanup;
+	}
+	r = radeon_object_pin(dobj, ddomain, &daddr);
+	if (r) {
+		goto out_cleanup;
+	}
+	start_jiffies = jiffies;
+	for (i = 0; i < n; i++) {
+		r = radeon_fence_create(rdev, &fence);
+		if (r) {
+			goto out_cleanup;
+		}
+		r = radeon_copy_dma(rdev, saddr, daddr, size >> 14, fence);
+		if (r) {
+			goto out_cleanup;
+		}
+		r = radeon_fence_wait(fence, false);
+		if (r) {
+			goto out_cleanup;
+		}
+		radeon_fence_unref(&fence);
+	}
+	end_jiffies = jiffies;
+	time = end_jiffies - start_jiffies;
+	time = jiffies_to_msecs(time);
+	if (time > 0) {
+		i = ((n * size) >> 10) / time;
+		printk(KERN_INFO "radeon: dma %u bo moves of %ukb from %d to %d"
+		       " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
+		       sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
+	}
+	start_jiffies = jiffies;
+	for (i = 0; i < n; i++) {
+		r = radeon_fence_create(rdev, &fence);
+		if (r) {
+			goto out_cleanup;
+		}
+		r = radeon_copy_blit(rdev, saddr, daddr, size >> 14, fence);
+		if (r) {
+			goto out_cleanup;
+		}
+		r = radeon_fence_wait(fence, false);
+		if (r) {
+			goto out_cleanup;
+		}
+		radeon_fence_unref(&fence);
+	}
+	end_jiffies = jiffies;
+	time = end_jiffies - start_jiffies;
+	time = jiffies_to_msecs(time);
+	if (time > 0) {
+		i = ((n * size) >> 10) / time;
+		printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d"
+		       " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
+		       sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
+	}
+out_cleanup:
+	if (sobj) {
+		radeon_object_unpin(sobj);
+		radeon_object_unref(&sobj);
+	}
+	if (dobj) {
+		radeon_object_unpin(dobj);
+		radeon_object_unref(&dobj);
+	}
+	if (fence) {
+		radeon_fence_unref(&fence);
+	}
+	if (r) {
+		printk(KERN_WARNING "Error while benchmarking BO move.\n");
+	}
+}
+
+void radeon_benchmark(struct radeon_device *rdev)
+{
+	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
+			      RADEON_GEM_DOMAIN_VRAM);
+	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+			      RADEON_GEM_DOMAIN_GTT);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
new file mode 100644
index 00000000000..96e37a6e7ce
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+#include "atom.h"
+
+/*
+ * BIOS.
+ */
+static bool radeon_read_bios(struct radeon_device *rdev)
+{
+	uint8_t __iomem *bios;
+	size_t size;
+
+	rdev->bios = NULL;
+	bios = pci_map_rom(rdev->pdev, &size);
+	if (!bios) {
+		return false;
+	}
+
+	if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) {
+		pci_unmap_rom(rdev->pdev, bios);
+		return false;
+	}
+	rdev->bios = kmalloc(size, GFP_KERNEL);
+	if (rdev->bios == NULL) {
+		pci_unmap_rom(rdev->pdev, bios);
+		return false;
+	}
+	memcpy(rdev->bios, bios, size);
+	pci_unmap_rom(rdev->pdev, bios);
+	return true;
+}
+
+static bool r700_read_disabled_bios(struct radeon_device *rdev)
+{
+	uint32_t viph_control;
+	uint32_t bus_cntl;
+	uint32_t d1vga_control;
+	uint32_t d2vga_control;
+	uint32_t vga_render_control;
+	uint32_t rom_cntl;
+	uint32_t cg_spll_func_cntl = 0;
+	uint32_t cg_spll_status;
+	bool r;
+
+	viph_control = RREG32(RADEON_VIPH_CONTROL);
+	bus_cntl = RREG32(RADEON_BUS_CNTL);
+	d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
+	d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
+	vga_render_control = RREG32(AVIVO_VGA_RENDER_CONTROL);
+	rom_cntl = RREG32(R600_ROM_CNTL);
+
+	/* disable VIP */
+	WREG32(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN));
+	/* enable the rom */
+	WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
+	/* Disable VGA mode */
+	WREG32(AVIVO_D1VGA_CONTROL,
+	       (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+		AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+	WREG32(AVIVO_D2VGA_CONTROL,
+	       (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+		AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+	WREG32(AVIVO_VGA_RENDER_CONTROL,
+	       (vga_render_control & ~AVIVO_VGA_VSTATUS_CNTL_MASK));
+
+	if (rdev->family == CHIP_RV730) {
+		cg_spll_func_cntl = RREG32(R600_CG_SPLL_FUNC_CNTL);
+
+		/* enable bypass mode */
+		WREG32(R600_CG_SPLL_FUNC_CNTL, (cg_spll_func_cntl |
+						R600_SPLL_BYPASS_EN));
+
+		/* wait for SPLL_CHG_STATUS to change to 1 */
+		cg_spll_status = 0;
+		while (!(cg_spll_status & R600_SPLL_CHG_STATUS))
+			cg_spll_status = RREG32(R600_CG_SPLL_STATUS);
+
+		WREG32(R600_ROM_CNTL, (rom_cntl & ~R600_SCK_OVERWRITE));
+	} else
+		WREG32(R600_ROM_CNTL, (rom_cntl | R600_SCK_OVERWRITE));
+
+	r = radeon_read_bios(rdev);
+
+	/* restore regs */
+	if (rdev->family == CHIP_RV730) {
+		WREG32(R600_CG_SPLL_FUNC_CNTL, cg_spll_func_cntl);
+
+		/* wait for SPLL_CHG_STATUS to change to 1 */
+		cg_spll_status = 0;
+		while (!(cg_spll_status & R600_SPLL_CHG_STATUS))
+			cg_spll_status = RREG32(R600_CG_SPLL_STATUS);
+	}
+	WREG32(RADEON_VIPH_CONTROL, viph_control);
+	WREG32(RADEON_BUS_CNTL, bus_cntl);
+	WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
+	WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
+	WREG32(AVIVO_VGA_RENDER_CONTROL, vga_render_control);
+	WREG32(R600_ROM_CNTL, rom_cntl);
+	return r;
+}
+
+static bool r600_read_disabled_bios(struct radeon_device *rdev)
+{
+	uint32_t viph_control;
+	uint32_t bus_cntl;
+	uint32_t d1vga_control;
+	uint32_t d2vga_control;
+	uint32_t vga_render_control;
+	uint32_t rom_cntl;
+	uint32_t general_pwrmgt;
+	uint32_t low_vid_lower_gpio_cntl;
+	uint32_t medium_vid_lower_gpio_cntl;
+	uint32_t high_vid_lower_gpio_cntl;
+	uint32_t ctxsw_vid_lower_gpio_cntl;
+	uint32_t lower_gpio_enable;
+	bool r;
+
+	viph_control = RREG32(RADEON_VIPH_CONTROL);
+	bus_cntl = RREG32(RADEON_BUS_CNTL);
+	d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
+	d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
+	vga_render_control = RREG32(AVIVO_VGA_RENDER_CONTROL);
+	rom_cntl = RREG32(R600_ROM_CNTL);
+	general_pwrmgt = RREG32(R600_GENERAL_PWRMGT);
+	low_vid_lower_gpio_cntl = RREG32(R600_LOW_VID_LOWER_GPIO_CNTL);
+	medium_vid_lower_gpio_cntl = RREG32(R600_MEDIUM_VID_LOWER_GPIO_CNTL);
+	high_vid_lower_gpio_cntl = RREG32(R600_HIGH_VID_LOWER_GPIO_CNTL);
+	ctxsw_vid_lower_gpio_cntl = RREG32(R600_CTXSW_VID_LOWER_GPIO_CNTL);
+	lower_gpio_enable = RREG32(R600_LOWER_GPIO_ENABLE);
+
+	/* disable VIP */
+	WREG32(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN));
+	/* enable the rom */
+	WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
+	/* Disable VGA mode */
+	WREG32(AVIVO_D1VGA_CONTROL,
+	       (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+		AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+	WREG32(AVIVO_D2VGA_CONTROL,
+	       (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+		AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+	WREG32(AVIVO_VGA_RENDER_CONTROL,
+	       (vga_render_control & ~AVIVO_VGA_VSTATUS_CNTL_MASK));
+
+	WREG32(R600_ROM_CNTL,
+	       ((rom_cntl & ~R600_SCK_PRESCALE_CRYSTAL_CLK_MASK) |
+		(1 << R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT) |
+		R600_SCK_OVERWRITE));
+
+	WREG32(R600_GENERAL_PWRMGT, (general_pwrmgt & ~R600_OPEN_DRAIN_PADS));
+	WREG32(R600_LOW_VID_LOWER_GPIO_CNTL,
+	       (low_vid_lower_gpio_cntl & ~0x400));
+	WREG32(R600_MEDIUM_VID_LOWER_GPIO_CNTL,
+	       (medium_vid_lower_gpio_cntl & ~0x400));
+	WREG32(R600_HIGH_VID_LOWER_GPIO_CNTL,
+	       (high_vid_lower_gpio_cntl & ~0x400));
+	WREG32(R600_CTXSW_VID_LOWER_GPIO_CNTL,
+	       (ctxsw_vid_lower_gpio_cntl & ~0x400));
+	WREG32(R600_LOWER_GPIO_ENABLE, (lower_gpio_enable | 0x400));
+
+	r = radeon_read_bios(rdev);
+
+	/* restore regs */
+	WREG32(RADEON_VIPH_CONTROL, viph_control);
+	WREG32(RADEON_BUS_CNTL, bus_cntl);
+	WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
+	WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
+	WREG32(AVIVO_VGA_RENDER_CONTROL, vga_render_control);
+	WREG32(R600_ROM_CNTL, rom_cntl);
+	WREG32(R600_GENERAL_PWRMGT, general_pwrmgt);
+	WREG32(R600_LOW_VID_LOWER_GPIO_CNTL, low_vid_lower_gpio_cntl);
+	WREG32(R600_MEDIUM_VID_LOWER_GPIO_CNTL, medium_vid_lower_gpio_cntl);
+	WREG32(R600_HIGH_VID_LOWER_GPIO_CNTL, high_vid_lower_gpio_cntl);
+	WREG32(R600_CTXSW_VID_LOWER_GPIO_CNTL, ctxsw_vid_lower_gpio_cntl);
+	WREG32(R600_LOWER_GPIO_ENABLE, lower_gpio_enable);
+	return r;
+}
+
+static bool avivo_read_disabled_bios(struct radeon_device *rdev)
+{
+	uint32_t seprom_cntl1;
+	uint32_t viph_control;
+	uint32_t bus_cntl;
+	uint32_t d1vga_control;
+	uint32_t d2vga_control;
+	uint32_t vga_render_control;
+	uint32_t gpiopad_a;
+	uint32_t gpiopad_en;
+	uint32_t gpiopad_mask;
+	bool r;
+
+	seprom_cntl1 = RREG32(RADEON_SEPROM_CNTL1);
+	viph_control = RREG32(RADEON_VIPH_CONTROL);
+	bus_cntl = RREG32(RADEON_BUS_CNTL);
+	d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
+	d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
+	vga_render_control = RREG32(AVIVO_VGA_RENDER_CONTROL);
+	gpiopad_a = RREG32(RADEON_GPIOPAD_A);
+	gpiopad_en = RREG32(RADEON_GPIOPAD_EN);
+	gpiopad_mask = RREG32(RADEON_GPIOPAD_MASK);
+
+	WREG32(RADEON_SEPROM_CNTL1,
+	       ((seprom_cntl1 & ~RADEON_SCK_PRESCALE_MASK) |
+		(0xc << RADEON_SCK_PRESCALE_SHIFT)));
+	WREG32(RADEON_GPIOPAD_A, 0);
+	WREG32(RADEON_GPIOPAD_EN, 0);
+	WREG32(RADEON_GPIOPAD_MASK, 0);
+
+	/* disable VIP */
+	WREG32(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN));
+
+	/* enable the rom */
+	WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
+
+	/* Disable VGA mode */
+	WREG32(AVIVO_D1VGA_CONTROL,
+	       (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+		AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+	WREG32(AVIVO_D2VGA_CONTROL,
+	       (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
+		AVIVO_DVGA_CONTROL_TIMING_SELECT)));
+	WREG32(AVIVO_VGA_RENDER_CONTROL,
+	       (vga_render_control & ~AVIVO_VGA_VSTATUS_CNTL_MASK));
+
+	r = radeon_read_bios(rdev);
+
+	/* restore regs */
+	WREG32(RADEON_SEPROM_CNTL1, seprom_cntl1);
+	WREG32(RADEON_VIPH_CONTROL, viph_control);
+	WREG32(RADEON_BUS_CNTL, bus_cntl);
+	WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
+	WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
+	WREG32(AVIVO_VGA_RENDER_CONTROL, vga_render_control);
+	WREG32(RADEON_GPIOPAD_A, gpiopad_a);
+	WREG32(RADEON_GPIOPAD_EN, gpiopad_en);
+	WREG32(RADEON_GPIOPAD_MASK, gpiopad_mask);
+	return r;
+}
+
+static bool legacy_read_disabled_bios(struct radeon_device *rdev)
+{
+	uint32_t seprom_cntl1;
+	uint32_t viph_control;
+	uint32_t bus_cntl;
+	uint32_t crtc_gen_cntl;
+	uint32_t crtc2_gen_cntl;
+	uint32_t crtc_ext_cntl;
+	uint32_t fp2_gen_cntl;
+	bool r;
+
+	seprom_cntl1 = RREG32(RADEON_SEPROM_CNTL1);
+	viph_control = RREG32(RADEON_VIPH_CONTROL);
+	bus_cntl = RREG32(RADEON_BUS_CNTL);
+	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
+	crtc2_gen_cntl = 0;
+	crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
+	fp2_gen_cntl = 0;
+
+	if (rdev->ddev->pci_device == PCI_DEVICE_ID_ATI_RADEON_QY) {
+		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+	}
+
+	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
+		crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+	}
+
+	WREG32(RADEON_SEPROM_CNTL1,
+	       ((seprom_cntl1 & ~RADEON_SCK_PRESCALE_MASK) |
+		(0xc << RADEON_SCK_PRESCALE_SHIFT)));
+
+	/* disable VIP */
+	WREG32(RADEON_VIPH_CONTROL, (viph_control & ~RADEON_VIPH_EN));
+
+	/* enable the rom */
+	WREG32(RADEON_BUS_CNTL, (bus_cntl & ~RADEON_BUS_BIOS_DIS_ROM));
+
+	/* Turn off mem requests and CRTC for both controllers */
+	WREG32(RADEON_CRTC_GEN_CNTL,
+	       ((crtc_gen_cntl & ~RADEON_CRTC_EN) |
+		(RADEON_CRTC_DISP_REQ_EN_B |
+		 RADEON_CRTC_EXT_DISP_EN)));
+	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
+		WREG32(RADEON_CRTC2_GEN_CNTL,
+		       ((crtc2_gen_cntl & ~RADEON_CRTC2_EN) |
+			RADEON_CRTC2_DISP_REQ_EN_B));
+	}
+	/* Turn off CRTC */
+	WREG32(RADEON_CRTC_EXT_CNTL,
+	       ((crtc_ext_cntl & ~RADEON_CRTC_CRT_ON) |
+		(RADEON_CRTC_SYNC_TRISTAT |
+		 RADEON_CRTC_DISPLAY_DIS)));
+
+	if (rdev->ddev->pci_device == PCI_DEVICE_ID_ATI_RADEON_QY) {
+		WREG32(RADEON_FP2_GEN_CNTL, (fp2_gen_cntl & ~RADEON_FP2_ON));
+	}
+
+	r = radeon_read_bios(rdev);
+
+	/* restore regs */
+	WREG32(RADEON_SEPROM_CNTL1, seprom_cntl1);
+	WREG32(RADEON_VIPH_CONTROL, viph_control);
+	WREG32(RADEON_BUS_CNTL, bus_cntl);
+	WREG32(RADEON_CRTC_GEN_CNTL, crtc_gen_cntl);
+	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
+		WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+	}
+	WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
+	if (rdev->ddev->pci_device == PCI_DEVICE_ID_ATI_RADEON_QY) {
+		WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl);
+	}
+	return r;
+}
+
+static bool radeon_read_disabled_bios(struct radeon_device *rdev)
+{
+	if (rdev->family >= CHIP_RV770)
+		return r700_read_disabled_bios(rdev);
+	else if (rdev->family >= CHIP_R600)
+		return r600_read_disabled_bios(rdev);
+	else if (rdev->family >= CHIP_RS600)
+		return avivo_read_disabled_bios(rdev);
+	else
+		return legacy_read_disabled_bios(rdev);
+}
+
+bool radeon_get_bios(struct radeon_device *rdev)
+{
+	bool r;
+	uint16_t tmp;
+
+	r = radeon_read_bios(rdev);
+	if (r == false) {
+		r = radeon_read_disabled_bios(rdev);
+	}
+	if (r == false || rdev->bios == NULL) {
+		DRM_ERROR("Unable to locate a BIOS ROM\n");
+		rdev->bios = NULL;
+		return false;
+	}
+	if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		goto free_bios;
+	}
+
+	rdev->bios_header_start = RBIOS16(0x48);
+	if (!rdev->bios_header_start) {
+		goto free_bios;
+	}
+	tmp = rdev->bios_header_start + 4;
+	if (!memcmp(rdev->bios + tmp, "ATOM", 4) ||
+	    !memcmp(rdev->bios + tmp, "MOTA", 4)) {
+		rdev->is_atom_bios = true;
+	} else {
+		rdev->is_atom_bios = false;
+	}
+
+	DRM_DEBUG("%sBIOS detected\n", rdev->is_atom_bios ? "ATOM" : "COM");
+	return true;
+free_bios:
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	return false;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
new file mode 100644
index 00000000000..a37cbce5318
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -0,0 +1,833 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+#include "atom.h"
+
+/* 10 khz */
+static uint32_t radeon_legacy_get_engine_clock(struct radeon_device *rdev)
+{
+	struct radeon_pll *spll = &rdev->clock.spll;
+	uint32_t fb_div, ref_div, post_div, sclk;
+
+	fb_div = RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV);
+	fb_div = (fb_div >> RADEON_SPLL_FB_DIV_SHIFT) & RADEON_SPLL_FB_DIV_MASK;
+	fb_div <<= 1;
+	fb_div *= spll->reference_freq;
+
+	ref_div =
+	    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & RADEON_M_SPLL_REF_DIV_MASK;
+	sclk = fb_div / ref_div;
+
+	post_div = RREG32_PLL(RADEON_SCLK_CNTL) & RADEON_SCLK_SRC_SEL_MASK;
+	if (post_div == 2)
+		sclk >>= 1;
+	else if (post_div == 3)
+		sclk >>= 2;
+	else if (post_div == 4)
+		sclk >>= 4;
+
+	return sclk;
+}
+
+/* 10 khz */
+static uint32_t radeon_legacy_get_memory_clock(struct radeon_device *rdev)
+{
+	struct radeon_pll *mpll = &rdev->clock.mpll;
+	uint32_t fb_div, ref_div, post_div, mclk;
+
+	fb_div = RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV);
+	fb_div = (fb_div >> RADEON_MPLL_FB_DIV_SHIFT) & RADEON_MPLL_FB_DIV_MASK;
+	fb_div <<= 1;
+	fb_div *= mpll->reference_freq;
+
+	ref_div =
+	    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) & RADEON_M_SPLL_REF_DIV_MASK;
+	mclk = fb_div / ref_div;
+
+	post_div = RREG32_PLL(RADEON_MCLK_CNTL) & 0x7;
+	if (post_div == 2)
+		mclk >>= 1;
+	else if (post_div == 3)
+		mclk >>= 2;
+	else if (post_div == 4)
+		mclk >>= 4;
+
+	return mclk;
+}
+
+void radeon_get_clock_info(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_pll *p1pll = &rdev->clock.p1pll;
+	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *spll = &rdev->clock.spll;
+	struct radeon_pll *mpll = &rdev->clock.mpll;
+	int ret;
+
+	if (rdev->is_atom_bios)
+		ret = radeon_atom_get_clock_info(dev);
+	else
+		ret = radeon_combios_get_clock_info(dev);
+
+	if (ret) {
+		if (p1pll->reference_div < 2)
+			p1pll->reference_div = 12;
+		if (p2pll->reference_div < 2)
+			p2pll->reference_div = 12;
+		if (spll->reference_div < 2)
+			spll->reference_div =
+			    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+			    RADEON_M_SPLL_REF_DIV_MASK;
+		if (mpll->reference_div < 2)
+			mpll->reference_div = spll->reference_div;
+	} else {
+		if (ASIC_IS_AVIVO(rdev)) {
+			/* TODO FALLBACK */
+		} else {
+			DRM_INFO("Using generic clock info\n");
+
+			if (rdev->flags & RADEON_IS_IGP) {
+				p1pll->reference_freq = 1432;
+				p2pll->reference_freq = 1432;
+				spll->reference_freq = 1432;
+				mpll->reference_freq = 1432;
+			} else {
+				p1pll->reference_freq = 2700;
+				p2pll->reference_freq = 2700;
+				spll->reference_freq = 2700;
+				mpll->reference_freq = 2700;
+			}
+			p1pll->reference_div =
+			    RREG32_PLL(RADEON_PPLL_REF_DIV) & 0x3ff;
+			if (p1pll->reference_div < 2)
+				p1pll->reference_div = 12;
+			p2pll->reference_div = p1pll->reference_div;
+
+			if (rdev->family >= CHIP_R420) {
+				p1pll->pll_in_min = 100;
+				p1pll->pll_in_max = 1350;
+				p1pll->pll_out_min = 20000;
+				p1pll->pll_out_max = 50000;
+				p2pll->pll_in_min = 100;
+				p2pll->pll_in_max = 1350;
+				p2pll->pll_out_min = 20000;
+				p2pll->pll_out_max = 50000;
+			} else {
+				p1pll->pll_in_min = 40;
+				p1pll->pll_in_max = 500;
+				p1pll->pll_out_min = 12500;
+				p1pll->pll_out_max = 35000;
+				p2pll->pll_in_min = 40;
+				p2pll->pll_in_max = 500;
+				p2pll->pll_out_min = 12500;
+				p2pll->pll_out_max = 35000;
+			}
+
+			spll->reference_div =
+			    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+			    RADEON_M_SPLL_REF_DIV_MASK;
+			mpll->reference_div = spll->reference_div;
+			rdev->clock.default_sclk =
+			    radeon_legacy_get_engine_clock(rdev);
+			rdev->clock.default_mclk =
+			    radeon_legacy_get_memory_clock(rdev);
+		}
+	}
+
+	/* pixel clocks */
+	if (ASIC_IS_AVIVO(rdev)) {
+		p1pll->min_post_div = 2;
+		p1pll->max_post_div = 0x7f;
+		p1pll->min_frac_feedback_div = 0;
+		p1pll->max_frac_feedback_div = 9;
+		p2pll->min_post_div = 2;
+		p2pll->max_post_div = 0x7f;
+		p2pll->min_frac_feedback_div = 0;
+		p2pll->max_frac_feedback_div = 9;
+	} else {
+		p1pll->min_post_div = 1;
+		p1pll->max_post_div = 16;
+		p1pll->min_frac_feedback_div = 0;
+		p1pll->max_frac_feedback_div = 0;
+		p2pll->min_post_div = 1;
+		p2pll->max_post_div = 12;
+		p2pll->min_frac_feedback_div = 0;
+		p2pll->max_frac_feedback_div = 0;
+	}
+
+	p1pll->min_ref_div = 2;
+	p1pll->max_ref_div = 0x3ff;
+	p1pll->min_feedback_div = 4;
+	p1pll->max_feedback_div = 0x7ff;
+	p1pll->best_vco = 0;
+
+	p2pll->min_ref_div = 2;
+	p2pll->max_ref_div = 0x3ff;
+	p2pll->min_feedback_div = 4;
+	p2pll->max_feedback_div = 0x7ff;
+	p2pll->best_vco = 0;
+
+	/* system clock */
+	spll->min_post_div = 1;
+	spll->max_post_div = 1;
+	spll->min_ref_div = 2;
+	spll->max_ref_div = 0xff;
+	spll->min_feedback_div = 4;
+	spll->max_feedback_div = 0xff;
+	spll->best_vco = 0;
+
+	/* memory clock */
+	mpll->min_post_div = 1;
+	mpll->max_post_div = 1;
+	mpll->min_ref_div = 2;
+	mpll->max_ref_div = 0xff;
+	mpll->min_feedback_div = 4;
+	mpll->max_feedback_div = 0xff;
+	mpll->best_vco = 0;
+
+}
+
+/* 10 khz */
+static uint32_t calc_eng_mem_clock(struct radeon_device *rdev,
+				   uint32_t req_clock,
+				   int *fb_div, int *post_div)
+{
+	struct radeon_pll *spll = &rdev->clock.spll;
+	int ref_div = spll->reference_div;
+
+	if (!ref_div)
+		ref_div =
+		    RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+		    RADEON_M_SPLL_REF_DIV_MASK;
+
+	if (req_clock < 15000) {
+		*post_div = 8;
+		req_clock *= 8;
+	} else if (req_clock < 30000) {
+		*post_div = 4;
+		req_clock *= 4;
+	} else if (req_clock < 60000) {
+		*post_div = 2;
+		req_clock *= 2;
+	} else
+		*post_div = 1;
+
+	req_clock *= ref_div;
+	req_clock += spll->reference_freq;
+	req_clock /= (2 * spll->reference_freq);
+
+	*fb_div = req_clock & 0xff;
+
+	req_clock = (req_clock & 0xffff) << 1;
+	req_clock *= spll->reference_freq;
+	req_clock /= ref_div;
+	req_clock /= *post_div;
+
+	return req_clock;
+}
+
+/* 10 khz */
+void radeon_legacy_set_engine_clock(struct radeon_device *rdev,
+				    uint32_t eng_clock)
+{
+	uint32_t tmp;
+	int fb_div, post_div;
+
+	/* XXX: wait for idle */
+
+	eng_clock = calc_eng_mem_clock(rdev, eng_clock, &fb_div, &post_div);
+
+	tmp = RREG32_PLL(RADEON_CLK_PIN_CNTL);
+	tmp &= ~RADEON_DONT_USE_XTALIN;
+	WREG32_PLL(RADEON_CLK_PIN_CNTL, tmp);
+
+	tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+	tmp &= ~RADEON_SCLK_SRC_SEL_MASK;
+	WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+	udelay(10);
+
+	tmp = RREG32_PLL(RADEON_SPLL_CNTL);
+	tmp |= RADEON_SPLL_SLEEP;
+	WREG32_PLL(RADEON_SPLL_CNTL, tmp);
+
+	udelay(2);
+
+	tmp = RREG32_PLL(RADEON_SPLL_CNTL);
+	tmp |= RADEON_SPLL_RESET;
+	WREG32_PLL(RADEON_SPLL_CNTL, tmp);
+
+	udelay(200);
+
+	tmp = RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV);
+	tmp &= ~(RADEON_SPLL_FB_DIV_MASK << RADEON_SPLL_FB_DIV_SHIFT);
+	tmp |= (fb_div & RADEON_SPLL_FB_DIV_MASK) << RADEON_SPLL_FB_DIV_SHIFT;
+	WREG32_PLL(RADEON_M_SPLL_REF_FB_DIV, tmp);
+
+	/* XXX: verify on different asics */
+	tmp = RREG32_PLL(RADEON_SPLL_CNTL);
+	tmp &= ~RADEON_SPLL_PVG_MASK;
+	if ((eng_clock * post_div) >= 90000)
+		tmp |= (0x7 << RADEON_SPLL_PVG_SHIFT);
+	else
+		tmp |= (0x4 << RADEON_SPLL_PVG_SHIFT);
+	WREG32_PLL(RADEON_SPLL_CNTL, tmp);
+
+	tmp = RREG32_PLL(RADEON_SPLL_CNTL);
+	tmp &= ~RADEON_SPLL_SLEEP;
+	WREG32_PLL(RADEON_SPLL_CNTL, tmp);
+
+	udelay(2);
+
+	tmp = RREG32_PLL(RADEON_SPLL_CNTL);
+	tmp &= ~RADEON_SPLL_RESET;
+	WREG32_PLL(RADEON_SPLL_CNTL, tmp);
+
+	udelay(200);
+
+	tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+	tmp &= ~RADEON_SCLK_SRC_SEL_MASK;
+	switch (post_div) {
+	case 1:
+	default:
+		tmp |= 1;
+		break;
+	case 2:
+		tmp |= 2;
+		break;
+	case 4:
+		tmp |= 3;
+		break;
+	case 8:
+		tmp |= 4;
+		break;
+	}
+	WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+	udelay(20);
+
+	tmp = RREG32_PLL(RADEON_CLK_PIN_CNTL);
+	tmp |= RADEON_DONT_USE_XTALIN;
+	WREG32_PLL(RADEON_CLK_PIN_CNTL, tmp);
+
+	udelay(10);
+}
+
+void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable)
+{
+	uint32_t tmp;
+
+	if (enable) {
+		if (rdev->flags & RADEON_SINGLE_CRTC) {
+			tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+			if ((RREG32(RADEON_CONFIG_CNTL) &
+			     RADEON_CFG_ATI_REV_ID_MASK) >
+			    RADEON_CFG_ATI_REV_A13) {
+				tmp &=
+				    ~(RADEON_SCLK_FORCE_CP |
+				      RADEON_SCLK_FORCE_RB);
+			}
+			tmp &=
+			    ~(RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1 |
+			      RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_SE |
+			      RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE |
+			      RADEON_SCLK_FORCE_PB | RADEON_SCLK_FORCE_TAM |
+			      RADEON_SCLK_FORCE_TDM);
+			WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+		} else if (ASIC_IS_R300(rdev)) {
+			if ((rdev->family == CHIP_RS400) ||
+			    (rdev->family == CHIP_RS480)) {
+				tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+				tmp &=
+				    ~(RADEON_SCLK_FORCE_DISP2 |
+				      RADEON_SCLK_FORCE_CP |
+				      RADEON_SCLK_FORCE_HDP |
+				      RADEON_SCLK_FORCE_DISP1 |
+				      RADEON_SCLK_FORCE_TOP |
+				      RADEON_SCLK_FORCE_E2 | R300_SCLK_FORCE_VAP
+				      | RADEON_SCLK_FORCE_IDCT |
+				      RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR
+				      | R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX
+				      | R300_SCLK_FORCE_US |
+				      RADEON_SCLK_FORCE_TV_SCLK |
+				      R300_SCLK_FORCE_SU |
+				      RADEON_SCLK_FORCE_OV0);
+				tmp |= RADEON_DYN_STOP_LAT_MASK;
+				tmp |=
+				    RADEON_SCLK_FORCE_TOP |
+				    RADEON_SCLK_FORCE_VIP;
+				WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_SCLK_MORE_CNTL);
+				tmp &= ~RADEON_SCLK_MORE_FORCEON;
+				tmp |= RADEON_SCLK_MORE_MAX_DYN_STOP_LAT;
+				WREG32_PLL(RADEON_SCLK_MORE_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+				tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
+					RADEON_PIXCLK_DAC_ALWAYS_ONb);
+				WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+				tmp |= (RADEON_PIX2CLK_ALWAYS_ONb |
+					RADEON_PIX2CLK_DAC_ALWAYS_ONb |
+					RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
+					R300_DVOCLK_ALWAYS_ONb |
+					RADEON_PIXCLK_BLEND_ALWAYS_ONb |
+					RADEON_PIXCLK_GV_ALWAYS_ONb |
+					R300_PIXCLK_DVO_ALWAYS_ONb |
+					RADEON_PIXCLK_LVDS_ALWAYS_ONb |
+					RADEON_PIXCLK_TMDS_ALWAYS_ONb |
+					R300_PIXCLK_TRANS_ALWAYS_ONb |
+					R300_PIXCLK_TVO_ALWAYS_ONb |
+					R300_P2G2CLK_ALWAYS_ONb |
+					R300_P2G2CLK_ALWAYS_ONb);
+				WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+			} else if (rdev->family >= CHIP_RV350) {
+				tmp = RREG32_PLL(R300_SCLK_CNTL2);
+				tmp &= ~(R300_SCLK_FORCE_TCL |
+					 R300_SCLK_FORCE_GA |
+					 R300_SCLK_FORCE_CBA);
+				tmp |= (R300_SCLK_TCL_MAX_DYN_STOP_LAT |
+					R300_SCLK_GA_MAX_DYN_STOP_LAT |
+					R300_SCLK_CBA_MAX_DYN_STOP_LAT);
+				WREG32_PLL(R300_SCLK_CNTL2, tmp);
+
+				tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+				tmp &=
+				    ~(RADEON_SCLK_FORCE_DISP2 |
+				      RADEON_SCLK_FORCE_CP |
+				      RADEON_SCLK_FORCE_HDP |
+				      RADEON_SCLK_FORCE_DISP1 |
+				      RADEON_SCLK_FORCE_TOP |
+				      RADEON_SCLK_FORCE_E2 | R300_SCLK_FORCE_VAP
+				      | RADEON_SCLK_FORCE_IDCT |
+				      RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR
+				      | R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX
+				      | R300_SCLK_FORCE_US |
+				      RADEON_SCLK_FORCE_TV_SCLK |
+				      R300_SCLK_FORCE_SU |
+				      RADEON_SCLK_FORCE_OV0);
+				tmp |= RADEON_DYN_STOP_LAT_MASK;
+				WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_SCLK_MORE_CNTL);
+				tmp &= ~RADEON_SCLK_MORE_FORCEON;
+				tmp |= RADEON_SCLK_MORE_MAX_DYN_STOP_LAT;
+				WREG32_PLL(RADEON_SCLK_MORE_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+				tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
+					RADEON_PIXCLK_DAC_ALWAYS_ONb);
+				WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+				tmp |= (RADEON_PIX2CLK_ALWAYS_ONb |
+					RADEON_PIX2CLK_DAC_ALWAYS_ONb |
+					RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
+					R300_DVOCLK_ALWAYS_ONb |
+					RADEON_PIXCLK_BLEND_ALWAYS_ONb |
+					RADEON_PIXCLK_GV_ALWAYS_ONb |
+					R300_PIXCLK_DVO_ALWAYS_ONb |
+					RADEON_PIXCLK_LVDS_ALWAYS_ONb |
+					RADEON_PIXCLK_TMDS_ALWAYS_ONb |
+					R300_PIXCLK_TRANS_ALWAYS_ONb |
+					R300_PIXCLK_TVO_ALWAYS_ONb |
+					R300_P2G2CLK_ALWAYS_ONb |
+					R300_P2G2CLK_ALWAYS_ONb);
+				WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+
+				tmp = RREG32_PLL(RADEON_MCLK_MISC);
+				tmp |= (RADEON_MC_MCLK_DYN_ENABLE |
+					RADEON_IO_MCLK_DYN_ENABLE);
+				WREG32_PLL(RADEON_MCLK_MISC, tmp);
+
+				tmp = RREG32_PLL(RADEON_MCLK_CNTL);
+				tmp |= (RADEON_FORCEON_MCLKA |
+					RADEON_FORCEON_MCLKB);
+
+				tmp &= ~(RADEON_FORCEON_YCLKA |
+					 RADEON_FORCEON_YCLKB |
+					 RADEON_FORCEON_MC);
+
+				/* Some releases of vbios have set DISABLE_MC_MCLKA
+				   and DISABLE_MC_MCLKB bits in the vbios table.  Setting these
+				   bits will cause H/W hang when reading video memory with dynamic clocking
+				   enabled. */
+				if ((tmp & R300_DISABLE_MC_MCLKA) &&
+				    (tmp & R300_DISABLE_MC_MCLKB)) {
+					/* If both bits are set, then check the active channels */
+					tmp = RREG32_PLL(RADEON_MCLK_CNTL);
+					if (rdev->mc.vram_width == 64) {
+						if (RREG32(RADEON_MEM_CNTL) &
+						    R300_MEM_USE_CD_CH_ONLY)
+							tmp &=
+							    ~R300_DISABLE_MC_MCLKB;
+						else
+							tmp &=
+							    ~R300_DISABLE_MC_MCLKA;
+					} else {
+						tmp &= ~(R300_DISABLE_MC_MCLKA |
+							 R300_DISABLE_MC_MCLKB);
+					}
+				}
+
+				WREG32_PLL(RADEON_MCLK_CNTL, tmp);
+			} else {
+				tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+				tmp &= ~(R300_SCLK_FORCE_VAP);
+				tmp |= RADEON_SCLK_FORCE_CP;
+				WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+				udelay(15000);
+
+				tmp = RREG32_PLL(R300_SCLK_CNTL2);
+				tmp &= ~(R300_SCLK_FORCE_TCL |
+					 R300_SCLK_FORCE_GA |
+					 R300_SCLK_FORCE_CBA);
+				WREG32_PLL(R300_SCLK_CNTL2, tmp);
+			}
+		} else {
+			tmp = RREG32_PLL(RADEON_CLK_PWRMGT_CNTL);
+
+			tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK |
+				 RADEON_DISP_DYN_STOP_LAT_MASK |
+				 RADEON_DYN_STOP_MODE_MASK);
+
+			tmp |= (RADEON_ENGIN_DYNCLK_MODE |
+				(0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT));
+			WREG32_PLL(RADEON_CLK_PWRMGT_CNTL, tmp);
+			udelay(15000);
+
+			tmp = RREG32_PLL(RADEON_CLK_PIN_CNTL);
+			tmp |= RADEON_SCLK_DYN_START_CNTL;
+			WREG32_PLL(RADEON_CLK_PIN_CNTL, tmp);
+			udelay(15000);
+
+			/* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200
+			   to lockup randomly, leave them as set by BIOS.
+			 */
+			tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+			/*tmp &= RADEON_SCLK_SRC_SEL_MASK; */
+			tmp &= ~RADEON_SCLK_FORCEON_MASK;
+
+			/*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300 */
+			if (((rdev->family == CHIP_RV250) &&
+			     ((RREG32(RADEON_CONFIG_CNTL) &
+			       RADEON_CFG_ATI_REV_ID_MASK) <
+			      RADEON_CFG_ATI_REV_A13))
+			    || ((rdev->family == CHIP_RV100)
+				&&
+				((RREG32(RADEON_CONFIG_CNTL) &
+				  RADEON_CFG_ATI_REV_ID_MASK) <=
+				 RADEON_CFG_ATI_REV_A13))) {
+				tmp |= RADEON_SCLK_FORCE_CP;
+				tmp |= RADEON_SCLK_FORCE_VIP;
+			}
+
+			WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+			if ((rdev->family == CHIP_RV200) ||
+			    (rdev->family == CHIP_RV250) ||
+			    (rdev->family == CHIP_RV280)) {
+				tmp = RREG32_PLL(RADEON_SCLK_MORE_CNTL);
+				tmp &= ~RADEON_SCLK_MORE_FORCEON;
+
+				/* RV200::A11 A12 RV250::A11 A12 */
+				if (((rdev->family == CHIP_RV200) ||
+				     (rdev->family == CHIP_RV250)) &&
+				    ((RREG32(RADEON_CONFIG_CNTL) &
+				      RADEON_CFG_ATI_REV_ID_MASK) <
+				     RADEON_CFG_ATI_REV_A13)) {
+					tmp |= RADEON_SCLK_MORE_FORCEON;
+				}
+				WREG32_PLL(RADEON_SCLK_MORE_CNTL, tmp);
+				udelay(15000);
+			}
+
+			/* RV200::A11 A12, RV250::A11 A12 */
+			if (((rdev->family == CHIP_RV200) ||
+			     (rdev->family == CHIP_RV250)) &&
+			    ((RREG32(RADEON_CONFIG_CNTL) &
+			      RADEON_CFG_ATI_REV_ID_MASK) <
+			     RADEON_CFG_ATI_REV_A13)) {
+				tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
+				tmp |= RADEON_TCL_BYPASS_DISABLE;
+				WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
+			}
+			udelay(15000);
+
+			/*enable dynamic mode for display clocks (PIXCLK and PIX2CLK) */
+			tmp = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+			tmp |= (RADEON_PIX2CLK_ALWAYS_ONb |
+				RADEON_PIX2CLK_DAC_ALWAYS_ONb |
+				RADEON_PIXCLK_BLEND_ALWAYS_ONb |
+				RADEON_PIXCLK_GV_ALWAYS_ONb |
+				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
+				RADEON_PIXCLK_LVDS_ALWAYS_ONb |
+				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
+
+			WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+			udelay(15000);
+
+			tmp = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+			tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
+				RADEON_PIXCLK_DAC_ALWAYS_ONb);
+
+			WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+			udelay(15000);
+		}
+	} else {
+		/* Turn everything OFF (ForceON to everything) */
+		if (rdev->flags & RADEON_SINGLE_CRTC) {
+			tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+			tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_HDP |
+				RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP
+				| RADEON_SCLK_FORCE_E2 | RADEON_SCLK_FORCE_SE |
+				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP |
+				RADEON_SCLK_FORCE_RE | RADEON_SCLK_FORCE_PB |
+				RADEON_SCLK_FORCE_TAM | RADEON_SCLK_FORCE_TDM |
+				RADEON_SCLK_FORCE_RB);
+			WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+		} else if ((rdev->family == CHIP_RS400) ||
+			   (rdev->family == CHIP_RS480)) {
+			tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+			tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP |
+				RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1
+				| RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 |
+				R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT |
+				RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR |
+				R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX |
+				R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK |
+				R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0);
+			WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_SCLK_MORE_CNTL);
+			tmp |= RADEON_SCLK_MORE_FORCEON;
+			WREG32_PLL(RADEON_SCLK_MORE_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb |
+				 RADEON_PIXCLK_DAC_ALWAYS_ONb |
+				 R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF);
+			WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb |
+				 RADEON_PIX2CLK_DAC_ALWAYS_ONb |
+				 RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
+				 R300_DVOCLK_ALWAYS_ONb |
+				 RADEON_PIXCLK_BLEND_ALWAYS_ONb |
+				 RADEON_PIXCLK_GV_ALWAYS_ONb |
+				 R300_PIXCLK_DVO_ALWAYS_ONb |
+				 RADEON_PIXCLK_LVDS_ALWAYS_ONb |
+				 RADEON_PIXCLK_TMDS_ALWAYS_ONb |
+				 R300_PIXCLK_TRANS_ALWAYS_ONb |
+				 R300_PIXCLK_TVO_ALWAYS_ONb |
+				 R300_P2G2CLK_ALWAYS_ONb |
+				 R300_P2G2CLK_ALWAYS_ONb |
+				 R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF);
+			WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+		} else if (rdev->family >= CHIP_RV350) {
+			/* for RV350/M10, no delays are required. */
+			tmp = RREG32_PLL(R300_SCLK_CNTL2);
+			tmp |= (R300_SCLK_FORCE_TCL |
+				R300_SCLK_FORCE_GA | R300_SCLK_FORCE_CBA);
+			WREG32_PLL(R300_SCLK_CNTL2, tmp);
+
+			tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+			tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP |
+				RADEON_SCLK_FORCE_HDP | RADEON_SCLK_FORCE_DISP1
+				| RADEON_SCLK_FORCE_TOP | RADEON_SCLK_FORCE_E2 |
+				R300_SCLK_FORCE_VAP | RADEON_SCLK_FORCE_IDCT |
+				RADEON_SCLK_FORCE_VIP | R300_SCLK_FORCE_SR |
+				R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX |
+				R300_SCLK_FORCE_US | RADEON_SCLK_FORCE_TV_SCLK |
+				R300_SCLK_FORCE_SU | RADEON_SCLK_FORCE_OV0);
+			WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_SCLK_MORE_CNTL);
+			tmp |= RADEON_SCLK_MORE_FORCEON;
+			WREG32_PLL(RADEON_SCLK_MORE_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_MCLK_CNTL);
+			tmp |= (RADEON_FORCEON_MCLKA |
+				RADEON_FORCEON_MCLKB |
+				RADEON_FORCEON_YCLKA |
+				RADEON_FORCEON_YCLKB | RADEON_FORCEON_MC);
+			WREG32_PLL(RADEON_MCLK_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb |
+				 RADEON_PIXCLK_DAC_ALWAYS_ONb |
+				 R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF);
+			WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+
+			tmp = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb |
+				 RADEON_PIX2CLK_DAC_ALWAYS_ONb |
+				 RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
+				 R300_DVOCLK_ALWAYS_ONb |
+				 RADEON_PIXCLK_BLEND_ALWAYS_ONb |
+				 RADEON_PIXCLK_GV_ALWAYS_ONb |
+				 R300_PIXCLK_DVO_ALWAYS_ONb |
+				 RADEON_PIXCLK_LVDS_ALWAYS_ONb |
+				 RADEON_PIXCLK_TMDS_ALWAYS_ONb |
+				 R300_PIXCLK_TRANS_ALWAYS_ONb |
+				 R300_PIXCLK_TVO_ALWAYS_ONb |
+				 R300_P2G2CLK_ALWAYS_ONb |
+				 R300_P2G2CLK_ALWAYS_ONb |
+				 R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF);
+			WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+		} else {
+			tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+			tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2);
+			tmp |= RADEON_SCLK_FORCE_SE;
+
+			if (rdev->flags & RADEON_SINGLE_CRTC) {
+				tmp |= (RADEON_SCLK_FORCE_RB |
+					RADEON_SCLK_FORCE_TDM |
+					RADEON_SCLK_FORCE_TAM |
+					RADEON_SCLK_FORCE_PB |
+					RADEON_SCLK_FORCE_RE |
+					RADEON_SCLK_FORCE_VIP |
+					RADEON_SCLK_FORCE_IDCT |
+					RADEON_SCLK_FORCE_TOP |
+					RADEON_SCLK_FORCE_DISP1 |
+					RADEON_SCLK_FORCE_DISP2 |
+					RADEON_SCLK_FORCE_HDP);
+			} else if ((rdev->family == CHIP_R300) ||
+				   (rdev->family == CHIP_R350)) {
+				tmp |= (RADEON_SCLK_FORCE_HDP |
+					RADEON_SCLK_FORCE_DISP1 |
+					RADEON_SCLK_FORCE_DISP2 |
+					RADEON_SCLK_FORCE_TOP |
+					RADEON_SCLK_FORCE_IDCT |
+					RADEON_SCLK_FORCE_VIP);
+			}
+			WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+
+			udelay(16000);
+
+			if ((rdev->family == CHIP_R300) ||
+			    (rdev->family == CHIP_R350)) {
+				tmp = RREG32_PLL(R300_SCLK_CNTL2);
+				tmp |= (R300_SCLK_FORCE_TCL |
+					R300_SCLK_FORCE_GA |
+					R300_SCLK_FORCE_CBA);
+				WREG32_PLL(R300_SCLK_CNTL2, tmp);
+				udelay(16000);
+			}
+
+			if (rdev->flags & RADEON_IS_IGP) {
+				tmp = RREG32_PLL(RADEON_MCLK_CNTL);
+				tmp &= ~(RADEON_FORCEON_MCLKA |
+					 RADEON_FORCEON_YCLKA);
+				WREG32_PLL(RADEON_MCLK_CNTL, tmp);
+				udelay(16000);
+			}
+
+			if ((rdev->family == CHIP_RV200) ||
+			    (rdev->family == CHIP_RV250) ||
+			    (rdev->family == CHIP_RV280)) {
+				tmp = RREG32_PLL(RADEON_SCLK_MORE_CNTL);
+				tmp |= RADEON_SCLK_MORE_FORCEON;
+				WREG32_PLL(RADEON_SCLK_MORE_CNTL, tmp);
+				udelay(16000);
+			}
+
+			tmp = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb |
+				 RADEON_PIX2CLK_DAC_ALWAYS_ONb |
+				 RADEON_PIXCLK_BLEND_ALWAYS_ONb |
+				 RADEON_PIXCLK_GV_ALWAYS_ONb |
+				 RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
+				 RADEON_PIXCLK_LVDS_ALWAYS_ONb |
+				 RADEON_PIXCLK_TMDS_ALWAYS_ONb);
+
+			WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+			udelay(16000);
+
+			tmp = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb |
+				 RADEON_PIXCLK_DAC_ALWAYS_ONb);
+			WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+		}
+	}
+}
+
+static void radeon_apply_clock_quirks(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	/* XXX make sure engine is idle */
+
+	if (rdev->family < CHIP_RS600) {
+		tmp = RREG32_PLL(RADEON_SCLK_CNTL);
+		if (ASIC_IS_R300(rdev) || ASIC_IS_RV100(rdev))
+			tmp |= RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_VIP;
+		if ((rdev->family == CHIP_RV250)
+		    || (rdev->family == CHIP_RV280))
+			tmp |=
+			    RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_DISP2;
+		if ((rdev->family == CHIP_RV350)
+		    || (rdev->family == CHIP_RV380))
+			tmp |= R300_SCLK_FORCE_VAP;
+		if (rdev->family == CHIP_R420)
+			tmp |= R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX;
+		WREG32_PLL(RADEON_SCLK_CNTL, tmp);
+	} else if (rdev->family < CHIP_R600) {
+		tmp = RREG32_PLL(AVIVO_CP_DYN_CNTL);
+		tmp |= AVIVO_CP_FORCEON;
+		WREG32_PLL(AVIVO_CP_DYN_CNTL, tmp);
+
+		tmp = RREG32_PLL(AVIVO_E2_DYN_CNTL);
+		tmp |= AVIVO_E2_FORCEON;
+		WREG32_PLL(AVIVO_E2_DYN_CNTL, tmp);
+
+		tmp = RREG32_PLL(AVIVO_IDCT_DYN_CNTL);
+		tmp |= AVIVO_IDCT_FORCEON;
+		WREG32_PLL(AVIVO_IDCT_DYN_CNTL, tmp);
+	}
+}
+
+int radeon_static_clocks_init(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	/* XXX make sure engine is idle */
+
+	if (radeon_dynclks != -1) {
+		if (radeon_dynclks)
+			radeon_set_clock_gating(rdev, 1);
+	}
+	radeon_apply_clock_quirks(rdev);
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
new file mode 100644
index 00000000000..be52198fa56
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -0,0 +1,2476 @@
+/*
+ * Copyright 2004 ATI Technologies Inc., Markham, Ontario
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+#include "atom.h"
+
+#ifdef CONFIG_PPC_PMAC
+/* not sure which of these are needed */
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#endif /* CONFIG_PPC_PMAC */
+
+/* from radeon_encoder.c */
+extern uint32_t
+radeon_get_encoder_id(struct drm_device *dev, uint32_t supported_device,
+		      uint8_t dac);
+extern void radeon_link_encoder_connector(struct drm_device *dev);
+
+/* from radeon_connector.c */
+extern void
+radeon_add_legacy_connector(struct drm_device *dev,
+			    uint32_t connector_id,
+			    uint32_t supported_device,
+			    int connector_type,
+			    struct radeon_i2c_bus_rec *i2c_bus);
+
+/* from radeon_legacy_encoder.c */
+extern void
+radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_id,
+			  uint32_t supported_device);
+
+/* old legacy ATI BIOS routines */
+
+/* COMBIOS table offsets */
+enum radeon_combios_table_offset {
+	/* absolute offset tables */
+	COMBIOS_ASIC_INIT_1_TABLE,
+	COMBIOS_BIOS_SUPPORT_TABLE,
+	COMBIOS_DAC_PROGRAMMING_TABLE,
+	COMBIOS_MAX_COLOR_DEPTH_TABLE,
+	COMBIOS_CRTC_INFO_TABLE,
+	COMBIOS_PLL_INFO_TABLE,
+	COMBIOS_TV_INFO_TABLE,
+	COMBIOS_DFP_INFO_TABLE,
+	COMBIOS_HW_CONFIG_INFO_TABLE,
+	COMBIOS_MULTIMEDIA_INFO_TABLE,
+	COMBIOS_TV_STD_PATCH_TABLE,
+	COMBIOS_LCD_INFO_TABLE,
+	COMBIOS_MOBILE_INFO_TABLE,
+	COMBIOS_PLL_INIT_TABLE,
+	COMBIOS_MEM_CONFIG_TABLE,
+	COMBIOS_SAVE_MASK_TABLE,
+	COMBIOS_HARDCODED_EDID_TABLE,
+	COMBIOS_ASIC_INIT_2_TABLE,
+	COMBIOS_CONNECTOR_INFO_TABLE,
+	COMBIOS_DYN_CLK_1_TABLE,
+	COMBIOS_RESERVED_MEM_TABLE,
+	COMBIOS_EXT_TMDS_INFO_TABLE,
+	COMBIOS_MEM_CLK_INFO_TABLE,
+	COMBIOS_EXT_DAC_INFO_TABLE,
+	COMBIOS_MISC_INFO_TABLE,
+	COMBIOS_CRT_INFO_TABLE,
+	COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE,
+	COMBIOS_COMPONENT_VIDEO_INFO_TABLE,
+	COMBIOS_FAN_SPEED_INFO_TABLE,
+	COMBIOS_OVERDRIVE_INFO_TABLE,
+	COMBIOS_OEM_INFO_TABLE,
+	COMBIOS_DYN_CLK_2_TABLE,
+	COMBIOS_POWER_CONNECTOR_INFO_TABLE,
+	COMBIOS_I2C_INFO_TABLE,
+	/* relative offset tables */
+	COMBIOS_ASIC_INIT_3_TABLE,	/* offset from misc info */
+	COMBIOS_ASIC_INIT_4_TABLE,	/* offset from misc info */
+	COMBIOS_DETECTED_MEM_TABLE,	/* offset from misc info */
+	COMBIOS_ASIC_INIT_5_TABLE,	/* offset from misc info */
+	COMBIOS_RAM_RESET_TABLE,	/* offset from mem config */
+	COMBIOS_POWERPLAY_INFO_TABLE,	/* offset from mobile info */
+	COMBIOS_GPIO_INFO_TABLE,	/* offset from mobile info */
+	COMBIOS_LCD_DDC_INFO_TABLE,	/* offset from mobile info */
+	COMBIOS_TMDS_POWER_TABLE,	/* offset from mobile info */
+	COMBIOS_TMDS_POWER_ON_TABLE,	/* offset from tmds power */
+	COMBIOS_TMDS_POWER_OFF_TABLE,	/* offset from tmds power */
+};
+
+enum radeon_combios_ddc {
+	DDC_NONE_DETECTED,
+	DDC_MONID,
+	DDC_DVI,
+	DDC_VGA,
+	DDC_CRT2,
+	DDC_LCD,
+	DDC_GPIO,
+};
+
+enum radeon_combios_connector {
+	CONNECTOR_NONE_LEGACY,
+	CONNECTOR_PROPRIETARY_LEGACY,
+	CONNECTOR_CRT_LEGACY,
+	CONNECTOR_DVI_I_LEGACY,
+	CONNECTOR_DVI_D_LEGACY,
+	CONNECTOR_CTV_LEGACY,
+	CONNECTOR_STV_LEGACY,
+	CONNECTOR_UNSUPPORTED_LEGACY
+};
+
+const int legacy_connector_convert[] = {
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_VGA,
+	DRM_MODE_CONNECTOR_DVII,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_Composite,
+	DRM_MODE_CONNECTOR_SVIDEO,
+	DRM_MODE_CONNECTOR_Unknown,
+};
+
+static uint16_t combios_get_table_offset(struct drm_device *dev,
+					 enum radeon_combios_table_offset table)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	int rev;
+	uint16_t offset = 0, check_offset;
+
+	switch (table) {
+		/* absolute offset tables */
+	case COMBIOS_ASIC_INIT_1_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0xc);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_BIOS_SUPPORT_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x14);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_DAC_PROGRAMMING_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x2a);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_MAX_COLOR_DEPTH_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x2c);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_CRTC_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x2e);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_PLL_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x30);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_TV_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x32);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_DFP_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x34);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_HW_CONFIG_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x36);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_MULTIMEDIA_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x38);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_TV_STD_PATCH_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x3e);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_LCD_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x40);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_MOBILE_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x42);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_PLL_INIT_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x46);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_MEM_CONFIG_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x48);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_SAVE_MASK_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x4a);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_HARDCODED_EDID_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x4c);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_ASIC_INIT_2_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x4e);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_CONNECTOR_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x50);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_DYN_CLK_1_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x52);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_RESERVED_MEM_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x54);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_EXT_TMDS_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x58);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_MEM_CLK_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x5a);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_EXT_DAC_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x5c);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_MISC_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x5e);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_CRT_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x60);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x62);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_COMPONENT_VIDEO_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x64);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_FAN_SPEED_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x66);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_OVERDRIVE_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x68);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_OEM_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x6a);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_DYN_CLK_2_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x6c);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_POWER_CONNECTOR_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x6e);
+		if (check_offset)
+			offset = check_offset;
+		break;
+	case COMBIOS_I2C_INFO_TABLE:
+		check_offset = RBIOS16(rdev->bios_header_start + 0x70);
+		if (check_offset)
+			offset = check_offset;
+		break;
+		/* relative offset tables */
+	case COMBIOS_ASIC_INIT_3_TABLE:	/* offset from misc info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MISC_INFO_TABLE);
+		if (check_offset) {
+			rev = RBIOS8(check_offset);
+			if (rev > 0) {
+				check_offset = RBIOS16(check_offset + 0x3);
+				if (check_offset)
+					offset = check_offset;
+			}
+		}
+		break;
+	case COMBIOS_ASIC_INIT_4_TABLE:	/* offset from misc info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MISC_INFO_TABLE);
+		if (check_offset) {
+			rev = RBIOS8(check_offset);
+			if (rev > 0) {
+				check_offset = RBIOS16(check_offset + 0x5);
+				if (check_offset)
+					offset = check_offset;
+			}
+		}
+		break;
+	case COMBIOS_DETECTED_MEM_TABLE:	/* offset from misc info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MISC_INFO_TABLE);
+		if (check_offset) {
+			rev = RBIOS8(check_offset);
+			if (rev > 0) {
+				check_offset = RBIOS16(check_offset + 0x7);
+				if (check_offset)
+					offset = check_offset;
+			}
+		}
+		break;
+	case COMBIOS_ASIC_INIT_5_TABLE:	/* offset from misc info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MISC_INFO_TABLE);
+		if (check_offset) {
+			rev = RBIOS8(check_offset);
+			if (rev == 2) {
+				check_offset = RBIOS16(check_offset + 0x9);
+				if (check_offset)
+					offset = check_offset;
+			}
+		}
+		break;
+	case COMBIOS_RAM_RESET_TABLE:	/* offset from mem config */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MEM_CONFIG_TABLE);
+		if (check_offset) {
+			while (RBIOS8(check_offset++));
+			check_offset += 2;
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	case COMBIOS_POWERPLAY_INFO_TABLE:	/* offset from mobile info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MOBILE_INFO_TABLE);
+		if (check_offset) {
+			check_offset = RBIOS16(check_offset + 0x11);
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	case COMBIOS_GPIO_INFO_TABLE:	/* offset from mobile info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MOBILE_INFO_TABLE);
+		if (check_offset) {
+			check_offset = RBIOS16(check_offset + 0x13);
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	case COMBIOS_LCD_DDC_INFO_TABLE:	/* offset from mobile info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MOBILE_INFO_TABLE);
+		if (check_offset) {
+			check_offset = RBIOS16(check_offset + 0x15);
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	case COMBIOS_TMDS_POWER_TABLE:	/* offset from mobile info */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_MOBILE_INFO_TABLE);
+		if (check_offset) {
+			check_offset = RBIOS16(check_offset + 0x17);
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	case COMBIOS_TMDS_POWER_ON_TABLE:	/* offset from tmds power */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_TMDS_POWER_TABLE);
+		if (check_offset) {
+			check_offset = RBIOS16(check_offset + 0x2);
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	case COMBIOS_TMDS_POWER_OFF_TABLE:	/* offset from tmds power */
+		check_offset =
+		    combios_get_table_offset(dev, COMBIOS_TMDS_POWER_TABLE);
+		if (check_offset) {
+			check_offset = RBIOS16(check_offset + 0x4);
+			if (check_offset)
+				offset = check_offset;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return offset;
+
+}
+
+struct radeon_i2c_bus_rec combios_setup_i2c_bus(int ddc_line)
+{
+	struct radeon_i2c_bus_rec i2c;
+
+	i2c.mask_clk_mask = RADEON_GPIO_EN_1;
+	i2c.mask_data_mask = RADEON_GPIO_EN_0;
+	i2c.a_clk_mask = RADEON_GPIO_A_1;
+	i2c.a_data_mask = RADEON_GPIO_A_0;
+	i2c.put_clk_mask = RADEON_GPIO_EN_1;
+	i2c.put_data_mask = RADEON_GPIO_EN_0;
+	i2c.get_clk_mask = RADEON_GPIO_Y_1;
+	i2c.get_data_mask = RADEON_GPIO_Y_0;
+	if ((ddc_line == RADEON_LCD_GPIO_MASK) ||
+	    (ddc_line == RADEON_MDGPIO_EN_REG)) {
+		i2c.mask_clk_reg = ddc_line;
+		i2c.mask_data_reg = ddc_line;
+		i2c.a_clk_reg = ddc_line;
+		i2c.a_data_reg = ddc_line;
+		i2c.put_clk_reg = ddc_line;
+		i2c.put_data_reg = ddc_line;
+		i2c.get_clk_reg = ddc_line + 4;
+		i2c.get_data_reg = ddc_line + 4;
+	} else {
+		i2c.mask_clk_reg = ddc_line;
+		i2c.mask_data_reg = ddc_line;
+		i2c.a_clk_reg = ddc_line;
+		i2c.a_data_reg = ddc_line;
+		i2c.put_clk_reg = ddc_line;
+		i2c.put_data_reg = ddc_line;
+		i2c.get_clk_reg = ddc_line;
+		i2c.get_data_reg = ddc_line;
+	}
+
+	if (ddc_line)
+		i2c.valid = true;
+	else
+		i2c.valid = false;
+
+	return i2c;
+}
+
+bool radeon_combios_get_clock_info(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t pll_info;
+	struct radeon_pll *p1pll = &rdev->clock.p1pll;
+	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *spll = &rdev->clock.spll;
+	struct radeon_pll *mpll = &rdev->clock.mpll;
+	int8_t rev;
+	uint16_t sclk, mclk;
+
+	if (rdev->bios == NULL)
+		return NULL;
+
+	pll_info = combios_get_table_offset(dev, COMBIOS_PLL_INFO_TABLE);
+	if (pll_info) {
+		rev = RBIOS8(pll_info);
+
+		/* pixel clocks */
+		p1pll->reference_freq = RBIOS16(pll_info + 0xe);
+		p1pll->reference_div = RBIOS16(pll_info + 0x10);
+		p1pll->pll_out_min = RBIOS32(pll_info + 0x12);
+		p1pll->pll_out_max = RBIOS32(pll_info + 0x16);
+
+		if (rev > 9) {
+			p1pll->pll_in_min = RBIOS32(pll_info + 0x36);
+			p1pll->pll_in_max = RBIOS32(pll_info + 0x3a);
+		} else {
+			p1pll->pll_in_min = 40;
+			p1pll->pll_in_max = 500;
+		}
+		*p2pll = *p1pll;
+
+		/* system clock */
+		spll->reference_freq = RBIOS16(pll_info + 0x1a);
+		spll->reference_div = RBIOS16(pll_info + 0x1c);
+		spll->pll_out_min = RBIOS32(pll_info + 0x1e);
+		spll->pll_out_max = RBIOS32(pll_info + 0x22);
+
+		if (rev > 10) {
+			spll->pll_in_min = RBIOS32(pll_info + 0x48);
+			spll->pll_in_max = RBIOS32(pll_info + 0x4c);
+		} else {
+			/* ??? */
+			spll->pll_in_min = 40;
+			spll->pll_in_max = 500;
+		}
+
+		/* memory clock */
+		mpll->reference_freq = RBIOS16(pll_info + 0x26);
+		mpll->reference_div = RBIOS16(pll_info + 0x28);
+		mpll->pll_out_min = RBIOS32(pll_info + 0x2a);
+		mpll->pll_out_max = RBIOS32(pll_info + 0x2e);
+
+		if (rev > 10) {
+			mpll->pll_in_min = RBIOS32(pll_info + 0x5a);
+			mpll->pll_in_max = RBIOS32(pll_info + 0x5e);
+		} else {
+			/* ??? */
+			mpll->pll_in_min = 40;
+			mpll->pll_in_max = 500;
+		}
+
+		/* default sclk/mclk */
+		sclk = RBIOS16(pll_info + 0xa);
+		mclk = RBIOS16(pll_info + 0x8);
+		if (sclk == 0)
+			sclk = 200 * 100;
+		if (mclk == 0)
+			mclk = 200 * 100;
+
+		rdev->clock.default_sclk = sclk;
+		rdev->clock.default_mclk = mclk;
+
+		return true;
+	}
+	return false;
+}
+
+struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct
+								       radeon_encoder
+								       *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t dac_info;
+	uint8_t rev, bg, dac;
+	struct radeon_encoder_primary_dac *p_dac = NULL;
+
+	if (rdev->bios == NULL)
+		return NULL;
+
+	/* check CRT table */
+	dac_info = combios_get_table_offset(dev, COMBIOS_CRT_INFO_TABLE);
+	if (dac_info) {
+		p_dac =
+		    kzalloc(sizeof(struct radeon_encoder_primary_dac),
+			    GFP_KERNEL);
+
+		if (!p_dac)
+			return NULL;
+
+		rev = RBIOS8(dac_info) & 0x3;
+		if (rev < 2) {
+			bg = RBIOS8(dac_info + 0x2) & 0xf;
+			dac = (RBIOS8(dac_info + 0x2) >> 4) & 0xf;
+			p_dac->ps2_pdac_adj = (bg << 8) | (dac);
+		} else {
+			bg = RBIOS8(dac_info + 0x2) & 0xf;
+			dac = RBIOS8(dac_info + 0x3) & 0xf;
+			p_dac->ps2_pdac_adj = (bg << 8) | (dac);
+		}
+
+	}
+
+	return p_dac;
+}
+
+static enum radeon_tv_std
+radeon_combios_get_tv_info(struct radeon_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t tv_info;
+	enum radeon_tv_std tv_std = TV_STD_NTSC;
+
+	tv_info = combios_get_table_offset(dev, COMBIOS_TV_INFO_TABLE);
+	if (tv_info) {
+		if (RBIOS8(tv_info + 6) == 'T') {
+			switch (RBIOS8(tv_info + 7) & 0xf) {
+			case 1:
+				tv_std = TV_STD_NTSC;
+				DRM_INFO("Default TV standard: NTSC\n");
+				break;
+			case 2:
+				tv_std = TV_STD_PAL;
+				DRM_INFO("Default TV standard: PAL\n");
+				break;
+			case 3:
+				tv_std = TV_STD_PAL_M;
+				DRM_INFO("Default TV standard: PAL-M\n");
+				break;
+			case 4:
+				tv_std = TV_STD_PAL_60;
+				DRM_INFO("Default TV standard: PAL-60\n");
+				break;
+			case 5:
+				tv_std = TV_STD_NTSC_J;
+				DRM_INFO("Default TV standard: NTSC-J\n");
+				break;
+			case 6:
+				tv_std = TV_STD_SCART_PAL;
+				DRM_INFO("Default TV standard: SCART-PAL\n");
+				break;
+			default:
+				tv_std = TV_STD_NTSC;
+				DRM_INFO
+				    ("Unknown TV standard; defaulting to NTSC\n");
+				break;
+			}
+
+			switch ((RBIOS8(tv_info + 9) >> 2) & 0x3) {
+			case 0:
+				DRM_INFO("29.498928713 MHz TV ref clk\n");
+				break;
+			case 1:
+				DRM_INFO("28.636360000 MHz TV ref clk\n");
+				break;
+			case 2:
+				DRM_INFO("14.318180000 MHz TV ref clk\n");
+				break;
+			case 3:
+				DRM_INFO("27.000000000 MHz TV ref clk\n");
+				break;
+			default:
+				break;
+			}
+		}
+	}
+	return tv_std;
+}
+
+static const uint32_t default_tvdac_adj[CHIP_LAST] = {
+	0x00000000,		/* r100  */
+	0x00280000,		/* rv100 */
+	0x00000000,		/* rs100 */
+	0x00880000,		/* rv200 */
+	0x00000000,		/* rs200 */
+	0x00000000,		/* r200  */
+	0x00770000,		/* rv250 */
+	0x00290000,		/* rs300 */
+	0x00560000,		/* rv280 */
+	0x00780000,		/* r300  */
+	0x00770000,		/* r350  */
+	0x00780000,		/* rv350 */
+	0x00780000,		/* rv380 */
+	0x01080000,		/* r420  */
+	0x01080000,		/* r423  */
+	0x01080000,		/* rv410 */
+	0x00780000,		/* rs400 */
+	0x00780000,		/* rs480 */
+};
+
+static struct radeon_encoder_tv_dac
+    *radeon_legacy_get_tv_dac_info_from_table(struct radeon_device *rdev)
+{
+	struct radeon_encoder_tv_dac *tv_dac = NULL;
+
+	tv_dac = kzalloc(sizeof(struct radeon_encoder_tv_dac), GFP_KERNEL);
+
+	if (!tv_dac)
+		return NULL;
+
+	tv_dac->ps2_tvdac_adj = default_tvdac_adj[rdev->family];
+	if ((rdev->flags & RADEON_IS_MOBILITY) && (rdev->family == CHIP_RV250))
+		tv_dac->ps2_tvdac_adj = 0x00880000;
+	tv_dac->pal_tvdac_adj = tv_dac->ps2_tvdac_adj;
+	tv_dac->ntsc_tvdac_adj = tv_dac->ps2_tvdac_adj;
+
+	return tv_dac;
+}
+
+struct radeon_encoder_tv_dac *radeon_combios_get_tv_dac_info(struct
+							     radeon_encoder
+							     *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t dac_info;
+	uint8_t rev, bg, dac;
+	struct radeon_encoder_tv_dac *tv_dac = NULL;
+
+	if (rdev->bios == NULL)
+		return radeon_legacy_get_tv_dac_info_from_table(rdev);
+
+	/* first check TV table */
+	dac_info = combios_get_table_offset(dev, COMBIOS_TV_INFO_TABLE);
+	if (dac_info) {
+		tv_dac =
+		    kzalloc(sizeof(struct radeon_encoder_tv_dac), GFP_KERNEL);
+
+		if (!tv_dac)
+			return NULL;
+
+		rev = RBIOS8(dac_info + 0x3);
+		if (rev > 4) {
+			bg = RBIOS8(dac_info + 0xc) & 0xf;
+			dac = RBIOS8(dac_info + 0xd) & 0xf;
+			tv_dac->ps2_tvdac_adj = (bg << 16) | (dac << 20);
+
+			bg = RBIOS8(dac_info + 0xe) & 0xf;
+			dac = RBIOS8(dac_info + 0xf) & 0xf;
+			tv_dac->pal_tvdac_adj = (bg << 16) | (dac << 20);
+
+			bg = RBIOS8(dac_info + 0x10) & 0xf;
+			dac = RBIOS8(dac_info + 0x11) & 0xf;
+			tv_dac->ntsc_tvdac_adj = (bg << 16) | (dac << 20);
+		} else if (rev > 1) {
+			bg = RBIOS8(dac_info + 0xc) & 0xf;
+			dac = (RBIOS8(dac_info + 0xc) >> 4) & 0xf;
+			tv_dac->ps2_tvdac_adj = (bg << 16) | (dac << 20);
+
+			bg = RBIOS8(dac_info + 0xd) & 0xf;
+			dac = (RBIOS8(dac_info + 0xd) >> 4) & 0xf;
+			tv_dac->pal_tvdac_adj = (bg << 16) | (dac << 20);
+
+			bg = RBIOS8(dac_info + 0xe) & 0xf;
+			dac = (RBIOS8(dac_info + 0xe) >> 4) & 0xf;
+			tv_dac->ntsc_tvdac_adj = (bg << 16) | (dac << 20);
+		}
+
+		tv_dac->tv_std = radeon_combios_get_tv_info(encoder);
+
+	} else {
+		/* then check CRT table */
+		dac_info =
+		    combios_get_table_offset(dev, COMBIOS_CRT_INFO_TABLE);
+		if (dac_info) {
+			tv_dac =
+			    kzalloc(sizeof(struct radeon_encoder_tv_dac),
+				    GFP_KERNEL);
+
+			if (!tv_dac)
+				return NULL;
+
+			rev = RBIOS8(dac_info) & 0x3;
+			if (rev < 2) {
+				bg = RBIOS8(dac_info + 0x3) & 0xf;
+				dac = (RBIOS8(dac_info + 0x3) >> 4) & 0xf;
+				tv_dac->ps2_tvdac_adj =
+				    (bg << 16) | (dac << 20);
+				tv_dac->pal_tvdac_adj = tv_dac->ps2_tvdac_adj;
+				tv_dac->ntsc_tvdac_adj = tv_dac->ps2_tvdac_adj;
+			} else {
+				bg = RBIOS8(dac_info + 0x4) & 0xf;
+				dac = RBIOS8(dac_info + 0x5) & 0xf;
+				tv_dac->ps2_tvdac_adj =
+				    (bg << 16) | (dac << 20);
+				tv_dac->pal_tvdac_adj = tv_dac->ps2_tvdac_adj;
+				tv_dac->ntsc_tvdac_adj = tv_dac->ps2_tvdac_adj;
+			}
+		}
+	}
+
+	return tv_dac;
+}
+
+static struct radeon_encoder_lvds *radeon_legacy_get_lvds_info_from_regs(struct
+									 radeon_device
+									 *rdev)
+{
+	struct radeon_encoder_lvds *lvds = NULL;
+	uint32_t fp_vert_stretch, fp_horz_stretch;
+	uint32_t ppll_div_sel, ppll_val;
+
+	lvds = kzalloc(sizeof(struct radeon_encoder_lvds), GFP_KERNEL);
+
+	if (!lvds)
+		return NULL;
+
+	fp_vert_stretch = RREG32(RADEON_FP_VERT_STRETCH);
+	fp_horz_stretch = RREG32(RADEON_FP_HORZ_STRETCH);
+
+	if (fp_vert_stretch & RADEON_VERT_STRETCH_ENABLE)
+		lvds->native_mode.panel_yres =
+		    ((fp_vert_stretch & RADEON_VERT_PANEL_SIZE) >>
+		     RADEON_VERT_PANEL_SHIFT) + 1;
+	else
+		lvds->native_mode.panel_yres =
+		    (RREG32(RADEON_CRTC_V_TOTAL_DISP) >> 16) + 1;
+
+	if (fp_horz_stretch & RADEON_HORZ_STRETCH_ENABLE)
+		lvds->native_mode.panel_xres =
+		    (((fp_horz_stretch & RADEON_HORZ_PANEL_SIZE) >>
+		      RADEON_HORZ_PANEL_SHIFT) + 1) * 8;
+	else
+		lvds->native_mode.panel_xres =
+		    ((RREG32(RADEON_CRTC_H_TOTAL_DISP) >> 16) + 1) * 8;
+
+	if ((lvds->native_mode.panel_xres < 640) ||
+	    (lvds->native_mode.panel_yres < 480)) {
+		lvds->native_mode.panel_xres = 640;
+		lvds->native_mode.panel_yres = 480;
+	}
+
+	ppll_div_sel = RREG8(RADEON_CLOCK_CNTL_INDEX + 1) & 0x3;
+	ppll_val = RREG32_PLL(RADEON_PPLL_DIV_0 + ppll_div_sel);
+	if ((ppll_val & 0x000707ff) == 0x1bb)
+		lvds->use_bios_dividers = false;
+	else {
+		lvds->panel_ref_divider =
+		    RREG32_PLL(RADEON_PPLL_REF_DIV) & 0x3ff;
+		lvds->panel_post_divider = (ppll_val >> 16) & 0x7;
+		lvds->panel_fb_divider = ppll_val & 0x7ff;
+
+		if ((lvds->panel_ref_divider != 0) &&
+		    (lvds->panel_fb_divider > 3))
+			lvds->use_bios_dividers = true;
+	}
+	lvds->panel_vcc_delay = 200;
+
+	DRM_INFO("Panel info derived from registers\n");
+	DRM_INFO("Panel Size %dx%d\n", lvds->native_mode.panel_xres,
+		 lvds->native_mode.panel_yres);
+
+	return lvds;
+}
+
+struct radeon_encoder_lvds *radeon_combios_get_lvds_info(struct radeon_encoder
+							 *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t lcd_info;
+	uint32_t panel_setup;
+	char stmp[30];
+	int tmp, i;
+	struct radeon_encoder_lvds *lvds = NULL;
+
+	if (rdev->bios == NULL)
+		return radeon_legacy_get_lvds_info_from_regs(rdev);
+
+	lcd_info = combios_get_table_offset(dev, COMBIOS_LCD_INFO_TABLE);
+
+	if (lcd_info) {
+		lvds = kzalloc(sizeof(struct radeon_encoder_lvds), GFP_KERNEL);
+
+		if (!lvds)
+			return NULL;
+
+		for (i = 0; i < 24; i++)
+			stmp[i] = RBIOS8(lcd_info + i + 1);
+		stmp[24] = 0;
+
+		DRM_INFO("Panel ID String: %s\n", stmp);
+
+		lvds->native_mode.panel_xres = RBIOS16(lcd_info + 0x19);
+		lvds->native_mode.panel_yres = RBIOS16(lcd_info + 0x1b);
+
+		DRM_INFO("Panel Size %dx%d\n", lvds->native_mode.panel_xres,
+			 lvds->native_mode.panel_yres);
+
+		lvds->panel_vcc_delay = RBIOS16(lcd_info + 0x2c);
+		if (lvds->panel_vcc_delay > 2000 || lvds->panel_vcc_delay < 0)
+			lvds->panel_vcc_delay = 2000;
+
+		lvds->panel_pwr_delay = RBIOS8(lcd_info + 0x24);
+		lvds->panel_digon_delay = RBIOS16(lcd_info + 0x38) & 0xf;
+		lvds->panel_blon_delay = (RBIOS16(lcd_info + 0x38) >> 4) & 0xf;
+
+		lvds->panel_ref_divider = RBIOS16(lcd_info + 0x2e);
+		lvds->panel_post_divider = RBIOS8(lcd_info + 0x30);
+		lvds->panel_fb_divider = RBIOS16(lcd_info + 0x31);
+		if ((lvds->panel_ref_divider != 0) &&
+		    (lvds->panel_fb_divider > 3))
+			lvds->use_bios_dividers = true;
+
+		panel_setup = RBIOS32(lcd_info + 0x39);
+		lvds->lvds_gen_cntl = 0xff00;
+		if (panel_setup & 0x1)
+			lvds->lvds_gen_cntl |= RADEON_LVDS_PANEL_FORMAT;
+
+		if ((panel_setup >> 4) & 0x1)
+			lvds->lvds_gen_cntl |= RADEON_LVDS_PANEL_TYPE;
+
+		switch ((panel_setup >> 8) & 0x7) {
+		case 0:
+			lvds->lvds_gen_cntl |= RADEON_LVDS_NO_FM;
+			break;
+		case 1:
+			lvds->lvds_gen_cntl |= RADEON_LVDS_2_GREY;
+			break;
+		case 2:
+			lvds->lvds_gen_cntl |= RADEON_LVDS_4_GREY;
+			break;
+		default:
+			break;
+		}
+
+		if ((panel_setup >> 16) & 0x1)
+			lvds->lvds_gen_cntl |= RADEON_LVDS_FP_POL_LOW;
+
+		if ((panel_setup >> 17) & 0x1)
+			lvds->lvds_gen_cntl |= RADEON_LVDS_LP_POL_LOW;
+
+		if ((panel_setup >> 18) & 0x1)
+			lvds->lvds_gen_cntl |= RADEON_LVDS_DTM_POL_LOW;
+
+		if ((panel_setup >> 23) & 0x1)
+			lvds->lvds_gen_cntl |= RADEON_LVDS_BL_CLK_SEL;
+
+		lvds->lvds_gen_cntl |= (panel_setup & 0xf0000000);
+
+		for (i = 0; i < 32; i++) {
+			tmp = RBIOS16(lcd_info + 64 + i * 2);
+			if (tmp == 0)
+				break;
+
+			if ((RBIOS16(tmp) == lvds->native_mode.panel_xres) &&
+			    (RBIOS16(tmp + 2) ==
+			     lvds->native_mode.panel_yres)) {
+				lvds->native_mode.hblank =
+				    (RBIOS16(tmp + 17) - RBIOS16(tmp + 19)) * 8;
+				lvds->native_mode.hoverplus =
+				    (RBIOS16(tmp + 21) - RBIOS16(tmp + 19) -
+				     1) * 8;
+				lvds->native_mode.hsync_width =
+				    RBIOS8(tmp + 23) * 8;
+
+				lvds->native_mode.vblank = (RBIOS16(tmp + 24) -
+							    RBIOS16(tmp + 26));
+				lvds->native_mode.voverplus =
+				    ((RBIOS16(tmp + 28) & 0x7ff) -
+				     RBIOS16(tmp + 26));
+				lvds->native_mode.vsync_width =
+				    ((RBIOS16(tmp + 28) & 0xf800) >> 11);
+				lvds->native_mode.dotclock =
+				    RBIOS16(tmp + 9) * 10;
+				lvds->native_mode.flags = 0;
+			}
+		}
+		encoder->native_mode = lvds->native_mode;
+	} else
+		DRM_INFO("No panel info found in BIOS\n");
+	return lvds;
+}
+
+static const struct radeon_tmds_pll default_tmds_pll[CHIP_LAST][4] = {
+	{{12000, 0xa1b}, {0xffffffff, 0xa3f}, {0, 0}, {0, 0}},	/* CHIP_R100  */
+	{{12000, 0xa1b}, {0xffffffff, 0xa3f}, {0, 0}, {0, 0}},	/* CHIP_RV100 */
+	{{0, 0}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_RS100 */
+	{{15000, 0xa1b}, {0xffffffff, 0xa3f}, {0, 0}, {0, 0}},	/* CHIP_RV200 */
+	{{12000, 0xa1b}, {0xffffffff, 0xa3f}, {0, 0}, {0, 0}},	/* CHIP_RS200 */
+	{{15000, 0xa1b}, {0xffffffff, 0xa3f}, {0, 0}, {0, 0}},	/* CHIP_R200  */
+	{{15500, 0x81b}, {0xffffffff, 0x83f}, {0, 0}, {0, 0}},	/* CHIP_RV250 */
+	{{0, 0}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_RS300 */
+	{{13000, 0x400f4}, {15000, 0x400f7}, {0xffffffff, 0x40111}, {0, 0}},	/* CHIP_RV280 */
+	{{0xffffffff, 0xb01cb}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_R300  */
+	{{0xffffffff, 0xb01cb}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_R350  */
+	{{15000, 0xb0155}, {0xffffffff, 0xb01cb}, {0, 0}, {0, 0}},	/* CHIP_RV350 */
+	{{15000, 0xb0155}, {0xffffffff, 0xb01cb}, {0, 0}, {0, 0}},	/* CHIP_RV380 */
+	{{0xffffffff, 0xb01cb}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_R420  */
+	{{0xffffffff, 0xb01cb}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_R423  */
+	{{0xffffffff, 0xb01cb}, {0, 0}, {0, 0}, {0, 0}},	/* CHIP_RV410 */
+	{{15000, 0xb0155}, {0xffffffff, 0xb01cb}, {0, 0}, {0, 0}},	/* CHIP_RS400 */
+	{{15000, 0xb0155}, {0xffffffff, 0xb01cb}, {0, 0}, {0, 0}},	/* CHIP_RS480 */
+};
+
+static struct radeon_encoder_int_tmds
+    *radeon_legacy_get_tmds_info_from_table(struct radeon_device *rdev)
+{
+	int i;
+	struct radeon_encoder_int_tmds *tmds = NULL;
+
+	tmds = kzalloc(sizeof(struct radeon_encoder_int_tmds), GFP_KERNEL);
+
+	if (!tmds)
+		return NULL;
+
+	for (i = 0; i < 4; i++) {
+		tmds->tmds_pll[i].value =
+		    default_tmds_pll[rdev->family][i].value;
+		tmds->tmds_pll[i].freq = default_tmds_pll[rdev->family][i].freq;
+	}
+
+	return tmds;
+}
+
+struct radeon_encoder_int_tmds *radeon_combios_get_tmds_info(struct
+							     radeon_encoder
+							     *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t tmds_info;
+	int i, n;
+	uint8_t ver;
+	struct radeon_encoder_int_tmds *tmds = NULL;
+
+	if (rdev->bios == NULL)
+		return radeon_legacy_get_tmds_info_from_table(rdev);
+
+	tmds_info = combios_get_table_offset(dev, COMBIOS_DFP_INFO_TABLE);
+
+	if (tmds_info) {
+		tmds =
+		    kzalloc(sizeof(struct radeon_encoder_int_tmds), GFP_KERNEL);
+
+		if (!tmds)
+			return NULL;
+
+		ver = RBIOS8(tmds_info);
+		DRM_INFO("DFP table revision: %d\n", ver);
+		if (ver == 3) {
+			n = RBIOS8(tmds_info + 5) + 1;
+			if (n > 4)
+				n = 4;
+			for (i = 0; i < n; i++) {
+				tmds->tmds_pll[i].value =
+				    RBIOS32(tmds_info + i * 10 + 0x08);
+				tmds->tmds_pll[i].freq =
+				    RBIOS16(tmds_info + i * 10 + 0x10);
+				DRM_DEBUG("TMDS PLL From COMBIOS %u %x\n",
+					  tmds->tmds_pll[i].freq,
+					  tmds->tmds_pll[i].value);
+			}
+		} else if (ver == 4) {
+			int stride = 0;
+			n = RBIOS8(tmds_info + 5) + 1;
+			if (n > 4)
+				n = 4;
+			for (i = 0; i < n; i++) {
+				tmds->tmds_pll[i].value =
+				    RBIOS32(tmds_info + stride + 0x08);
+				tmds->tmds_pll[i].freq =
+				    RBIOS16(tmds_info + stride + 0x10);
+				if (i == 0)
+					stride += 10;
+				else
+					stride += 6;
+				DRM_DEBUG("TMDS PLL From COMBIOS %u %x\n",
+					  tmds->tmds_pll[i].freq,
+					  tmds->tmds_pll[i].value);
+			}
+		}
+	} else
+		DRM_INFO("No TMDS info found in BIOS\n");
+	return tmds;
+}
+
+void radeon_combios_get_ext_tmds_info(struct radeon_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t ext_tmds_info;
+	uint8_t ver;
+
+	if (rdev->bios == NULL)
+		return;
+
+	ext_tmds_info =
+	    combios_get_table_offset(dev, COMBIOS_EXT_TMDS_INFO_TABLE);
+	if (ext_tmds_info) {
+		ver = RBIOS8(ext_tmds_info);
+		DRM_INFO("External TMDS Table revision: %d\n", ver);
+		// TODO
+	}
+}
+
+bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_i2c_bus_rec ddc_i2c;
+
+	rdev->mode_info.connector_table = radeon_connector_table;
+	if (rdev->mode_info.connector_table == CT_NONE) {
+#ifdef CONFIG_PPC_PMAC
+		if (machine_is_compatible("PowerBook3,3")) {
+			/* powerbook with VGA */
+			rdev->mode_info.connector_table = CT_POWERBOOK_VGA;
+		} else if (machine_is_compatible("PowerBook3,4") ||
+			   machine_is_compatible("PowerBook3,5")) {
+			/* powerbook with internal tmds */
+			rdev->mode_info.connector_table = CT_POWERBOOK_INTERNAL;
+		} else if (machine_is_compatible("PowerBook5,1") ||
+			   machine_is_compatible("PowerBook5,2") ||
+			   machine_is_compatible("PowerBook5,3") ||
+			   machine_is_compatible("PowerBook5,4") ||
+			   machine_is_compatible("PowerBook5,5")) {
+			/* powerbook with external single link tmds (sil164) */
+			rdev->mode_info.connector_table = CT_POWERBOOK_EXTERNAL;
+		} else if (machine_is_compatible("PowerBook5,6")) {
+			/* powerbook with external dual or single link tmds */
+			rdev->mode_info.connector_table = CT_POWERBOOK_EXTERNAL;
+		} else if (machine_is_compatible("PowerBook5,7") ||
+			   machine_is_compatible("PowerBook5,8") ||
+			   machine_is_compatible("PowerBook5,9")) {
+			/* PowerBook6,2 ? */
+			/* powerbook with external dual link tmds (sil1178?) */
+			rdev->mode_info.connector_table = CT_POWERBOOK_EXTERNAL;
+		} else if (machine_is_compatible("PowerBook4,1") ||
+			   machine_is_compatible("PowerBook4,2") ||
+			   machine_is_compatible("PowerBook4,3") ||
+			   machine_is_compatible("PowerBook6,3") ||
+			   machine_is_compatible("PowerBook6,5") ||
+			   machine_is_compatible("PowerBook6,7")) {
+			/* ibook */
+			rdev->mode_info.connector_table = CT_IBOOK;
+		} else if (machine_is_compatible("PowerMac4,4")) {
+			/* emac */
+			rdev->mode_info.connector_table = CT_EMAC;
+		} else if (machine_is_compatible("PowerMac10,1")) {
+			/* mini with internal tmds */
+			rdev->mode_info.connector_table = CT_MINI_INTERNAL;
+		} else if (machine_is_compatible("PowerMac10,2")) {
+			/* mini with external tmds */
+			rdev->mode_info.connector_table = CT_MINI_EXTERNAL;
+		} else if (machine_is_compatible("PowerMac12,1")) {
+			/* PowerMac8,1 ? */
+			/* imac g5 isight */
+			rdev->mode_info.connector_table = CT_IMAC_G5_ISIGHT;
+		} else
+#endif /* CONFIG_PPC_PMAC */
+			rdev->mode_info.connector_table = CT_GENERIC;
+	}
+
+	switch (rdev->mode_info.connector_table) {
+	case CT_GENERIC:
+		DRM_INFO("Connector Table: %d (generic)\n",
+			 rdev->mode_info.connector_table);
+		/* these are the most common settings */
+		if (rdev->flags & RADEON_SINGLE_CRTC) {
+			/* VGA - primary dac */
+			ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_CRT1_SUPPORT,
+									1),
+						  ATOM_DEVICE_CRT1_SUPPORT);
+			radeon_add_legacy_connector(dev, 0,
+						    ATOM_DEVICE_CRT1_SUPPORT,
+						    DRM_MODE_CONNECTOR_VGA,
+						    &ddc_i2c);
+		} else if (rdev->flags & RADEON_IS_MOBILITY) {
+			/* LVDS */
+			ddc_i2c = combios_setup_i2c_bus(RADEON_LCD_GPIO_MASK);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_LCD1_SUPPORT,
+									0),
+						  ATOM_DEVICE_LCD1_SUPPORT);
+			radeon_add_legacy_connector(dev, 0,
+						    ATOM_DEVICE_LCD1_SUPPORT,
+						    DRM_MODE_CONNECTOR_LVDS,
+						    &ddc_i2c);
+
+			/* VGA - primary dac */
+			ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_CRT1_SUPPORT,
+									1),
+						  ATOM_DEVICE_CRT1_SUPPORT);
+			radeon_add_legacy_connector(dev, 1,
+						    ATOM_DEVICE_CRT1_SUPPORT,
+						    DRM_MODE_CONNECTOR_VGA,
+						    &ddc_i2c);
+		} else {
+			/* DVI-I - tv dac, int tmds */
+			ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_DFP1_SUPPORT,
+									0),
+						  ATOM_DEVICE_DFP1_SUPPORT);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_CRT2_SUPPORT,
+									2),
+						  ATOM_DEVICE_CRT2_SUPPORT);
+			radeon_add_legacy_connector(dev, 0,
+						    ATOM_DEVICE_DFP1_SUPPORT |
+						    ATOM_DEVICE_CRT2_SUPPORT,
+						    DRM_MODE_CONNECTOR_DVII,
+						    &ddc_i2c);
+
+			/* VGA - primary dac */
+			ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_CRT1_SUPPORT,
+									1),
+						  ATOM_DEVICE_CRT1_SUPPORT);
+			radeon_add_legacy_connector(dev, 1,
+						    ATOM_DEVICE_CRT1_SUPPORT,
+						    DRM_MODE_CONNECTOR_VGA,
+						    &ddc_i2c);
+		}
+
+		if (rdev->family != CHIP_R100 && rdev->family != CHIP_R200) {
+			/* TV - tv dac */
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_TV1_SUPPORT,
+									2),
+						  ATOM_DEVICE_TV1_SUPPORT);
+			radeon_add_legacy_connector(dev, 2,
+						    ATOM_DEVICE_TV1_SUPPORT,
+						    DRM_MODE_CONNECTOR_SVIDEO,
+						    &ddc_i2c);
+		}
+		break;
+	case CT_IBOOK:
+		DRM_INFO("Connector Table: %d (ibook)\n",
+			 rdev->mode_info.connector_table);
+		/* LVDS */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_LCD1_SUPPORT,
+								0),
+					  ATOM_DEVICE_LCD1_SUPPORT);
+		radeon_add_legacy_connector(dev, 0, ATOM_DEVICE_LCD1_SUPPORT,
+					    DRM_MODE_CONNECTOR_LVDS, &ddc_i2c);
+		/* VGA - TV DAC */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT2_SUPPORT,
+								2),
+					  ATOM_DEVICE_CRT2_SUPPORT);
+		radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_CRT2_SUPPORT,
+					    DRM_MODE_CONNECTOR_VGA, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_POWERBOOK_EXTERNAL:
+		DRM_INFO("Connector Table: %d (powerbook external tmds)\n",
+			 rdev->mode_info.connector_table);
+		/* LVDS */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_LCD1_SUPPORT,
+								0),
+					  ATOM_DEVICE_LCD1_SUPPORT);
+		radeon_add_legacy_connector(dev, 0, ATOM_DEVICE_LCD1_SUPPORT,
+					    DRM_MODE_CONNECTOR_LVDS, &ddc_i2c);
+		/* DVI-I - primary dac, ext tmds */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_DFP2_SUPPORT,
+								0),
+					  ATOM_DEVICE_DFP2_SUPPORT);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT1_SUPPORT,
+								1),
+					  ATOM_DEVICE_CRT1_SUPPORT);
+		radeon_add_legacy_connector(dev, 1,
+					    ATOM_DEVICE_DFP2_SUPPORT |
+					    ATOM_DEVICE_CRT1_SUPPORT,
+					    DRM_MODE_CONNECTOR_DVII, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_POWERBOOK_INTERNAL:
+		DRM_INFO("Connector Table: %d (powerbook internal tmds)\n",
+			 rdev->mode_info.connector_table);
+		/* LVDS */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_LCD1_SUPPORT,
+								0),
+					  ATOM_DEVICE_LCD1_SUPPORT);
+		radeon_add_legacy_connector(dev, 0, ATOM_DEVICE_LCD1_SUPPORT,
+					    DRM_MODE_CONNECTOR_LVDS, &ddc_i2c);
+		/* DVI-I - primary dac, int tmds */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_DFP1_SUPPORT,
+								0),
+					  ATOM_DEVICE_DFP1_SUPPORT);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT1_SUPPORT,
+								1),
+					  ATOM_DEVICE_CRT1_SUPPORT);
+		radeon_add_legacy_connector(dev, 1,
+					    ATOM_DEVICE_DFP1_SUPPORT |
+					    ATOM_DEVICE_CRT1_SUPPORT,
+					    DRM_MODE_CONNECTOR_DVII, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_POWERBOOK_VGA:
+		DRM_INFO("Connector Table: %d (powerbook vga)\n",
+			 rdev->mode_info.connector_table);
+		/* LVDS */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_LCD1_SUPPORT,
+								0),
+					  ATOM_DEVICE_LCD1_SUPPORT);
+		radeon_add_legacy_connector(dev, 0, ATOM_DEVICE_LCD1_SUPPORT,
+					    DRM_MODE_CONNECTOR_LVDS, &ddc_i2c);
+		/* VGA - primary dac */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT1_SUPPORT,
+								1),
+					  ATOM_DEVICE_CRT1_SUPPORT);
+		radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_CRT1_SUPPORT,
+					    DRM_MODE_CONNECTOR_VGA, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_MINI_EXTERNAL:
+		DRM_INFO("Connector Table: %d (mini external tmds)\n",
+			 rdev->mode_info.connector_table);
+		/* DVI-I - tv dac, ext tmds */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_CRT2_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_DFP2_SUPPORT,
+								0),
+					  ATOM_DEVICE_DFP2_SUPPORT);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT2_SUPPORT,
+								2),
+					  ATOM_DEVICE_CRT2_SUPPORT);
+		radeon_add_legacy_connector(dev, 0,
+					    ATOM_DEVICE_DFP2_SUPPORT |
+					    ATOM_DEVICE_CRT2_SUPPORT,
+					    DRM_MODE_CONNECTOR_DVII, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_MINI_INTERNAL:
+		DRM_INFO("Connector Table: %d (mini internal tmds)\n",
+			 rdev->mode_info.connector_table);
+		/* DVI-I - tv dac, int tmds */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_CRT2_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_DFP1_SUPPORT,
+								0),
+					  ATOM_DEVICE_DFP1_SUPPORT);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT2_SUPPORT,
+								2),
+					  ATOM_DEVICE_CRT2_SUPPORT);
+		radeon_add_legacy_connector(dev, 0,
+					    ATOM_DEVICE_DFP1_SUPPORT |
+					    ATOM_DEVICE_CRT2_SUPPORT,
+					    DRM_MODE_CONNECTOR_DVII, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_IMAC_G5_ISIGHT:
+		DRM_INFO("Connector Table: %d (imac g5 isight)\n",
+			 rdev->mode_info.connector_table);
+		/* DVI-D - int tmds */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_MONID);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_DFP1_SUPPORT,
+								0),
+					  ATOM_DEVICE_DFP1_SUPPORT);
+		radeon_add_legacy_connector(dev, 0, ATOM_DEVICE_DFP1_SUPPORT,
+					    DRM_MODE_CONNECTOR_DVID, &ddc_i2c);
+		/* VGA - tv dac */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT2_SUPPORT,
+								2),
+					  ATOM_DEVICE_CRT2_SUPPORT);
+		radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_CRT2_SUPPORT,
+					    DRM_MODE_CONNECTOR_VGA, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	case CT_EMAC:
+		DRM_INFO("Connector Table: %d (emac)\n",
+			 rdev->mode_info.connector_table);
+		/* VGA - primary dac */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT1_SUPPORT,
+								1),
+					  ATOM_DEVICE_CRT1_SUPPORT);
+		radeon_add_legacy_connector(dev, 0, ATOM_DEVICE_CRT1_SUPPORT,
+					    DRM_MODE_CONNECTOR_VGA, &ddc_i2c);
+		/* VGA - tv dac */
+		ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_CRT2_DDC);
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_CRT2_SUPPORT,
+								2),
+					  ATOM_DEVICE_CRT2_SUPPORT);
+		radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_CRT2_SUPPORT,
+					    DRM_MODE_CONNECTOR_VGA, &ddc_i2c);
+		/* TV - TV DAC */
+		radeon_add_legacy_encoder(dev,
+					  radeon_get_encoder_id(dev,
+								ATOM_DEVICE_TV1_SUPPORT,
+								2),
+					  ATOM_DEVICE_TV1_SUPPORT);
+		radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+					    DRM_MODE_CONNECTOR_SVIDEO,
+					    &ddc_i2c);
+		break;
+	default:
+		DRM_INFO("Connector table: %d (invalid)\n",
+			 rdev->mode_info.connector_table);
+		return false;
+	}
+
+	radeon_link_encoder_connector(dev);
+
+	return true;
+}
+
+static bool radeon_apply_legacy_quirks(struct drm_device *dev,
+				       int bios_index,
+				       enum radeon_combios_connector
+				       *legacy_connector,
+				       struct radeon_i2c_bus_rec *ddc_i2c)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	/* XPRESS DDC quirks */
+	if ((rdev->family == CHIP_RS400 ||
+	     rdev->family == CHIP_RS480) &&
+	    ddc_i2c->mask_clk_reg == RADEON_GPIO_CRT2_DDC)
+		*ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_MONID);
+	else if ((rdev->family == CHIP_RS400 ||
+		  rdev->family == CHIP_RS480) &&
+		 ddc_i2c->mask_clk_reg == RADEON_GPIO_MONID) {
+		ddc_i2c->valid = true;
+		ddc_i2c->mask_clk_mask = (0x20 << 8);
+		ddc_i2c->mask_data_mask = 0x80;
+		ddc_i2c->a_clk_mask = (0x20 << 8);
+		ddc_i2c->a_data_mask = 0x80;
+		ddc_i2c->put_clk_mask = (0x20 << 8);
+		ddc_i2c->put_data_mask = 0x80;
+		ddc_i2c->get_clk_mask = (0x20 << 8);
+		ddc_i2c->get_data_mask = 0x80;
+		ddc_i2c->mask_clk_reg = RADEON_GPIOPAD_MASK;
+		ddc_i2c->mask_data_reg = RADEON_GPIOPAD_MASK;
+		ddc_i2c->a_clk_reg = RADEON_GPIOPAD_A;
+		ddc_i2c->a_data_reg = RADEON_GPIOPAD_A;
+		ddc_i2c->put_clk_reg = RADEON_GPIOPAD_EN;
+		ddc_i2c->put_data_reg = RADEON_GPIOPAD_EN;
+		ddc_i2c->get_clk_reg = RADEON_LCD_GPIO_Y_REG;
+		ddc_i2c->get_data_reg = RADEON_LCD_GPIO_Y_REG;
+	}
+
+	/* Certain IBM chipset RN50s have a BIOS reporting two VGAs,
+	   one with VGA DDC and one with CRT2 DDC. - kill the CRT2 DDC one */
+	if (dev->pdev->device == 0x515e &&
+	    dev->pdev->subsystem_vendor == 0x1014) {
+		if (*legacy_connector == CONNECTOR_CRT_LEGACY &&
+		    ddc_i2c->mask_clk_reg == RADEON_GPIO_CRT2_DDC)
+			return false;
+	}
+
+	/* Some RV100 cards with 2 VGA ports show up with DVI+VGA */
+	if (dev->pdev->device == 0x5159 &&
+	    dev->pdev->subsystem_vendor == 0x1002 &&
+	    dev->pdev->subsystem_device == 0x013a) {
+		if (*legacy_connector == CONNECTOR_DVI_I_LEGACY)
+			*legacy_connector = CONNECTOR_CRT_LEGACY;
+
+	}
+
+	/* X300 card with extra non-existent DVI port */
+	if (dev->pdev->device == 0x5B60 &&
+	    dev->pdev->subsystem_vendor == 0x17af &&
+	    dev->pdev->subsystem_device == 0x201e && bios_index == 2) {
+		if (*legacy_connector == CONNECTOR_DVI_I_LEGACY)
+			return false;
+	}
+
+	return true;
+}
+
+bool radeon_get_legacy_connector_info_from_bios(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t conn_info, entry, devices;
+	uint16_t tmp;
+	enum radeon_combios_ddc ddc_type;
+	enum radeon_combios_connector connector;
+	int i = 0;
+	struct radeon_i2c_bus_rec ddc_i2c;
+
+	if (rdev->bios == NULL)
+		return false;
+
+	conn_info = combios_get_table_offset(dev, COMBIOS_CONNECTOR_INFO_TABLE);
+	if (conn_info) {
+		for (i = 0; i < 4; i++) {
+			entry = conn_info + 2 + i * 2;
+
+			if (!RBIOS16(entry))
+				break;
+
+			tmp = RBIOS16(entry);
+
+			connector = (tmp >> 12) & 0xf;
+
+			ddc_type = (tmp >> 8) & 0xf;
+			switch (ddc_type) {
+			case DDC_MONID:
+				ddc_i2c =
+				    combios_setup_i2c_bus(RADEON_GPIO_MONID);
+				break;
+			case DDC_DVI:
+				ddc_i2c =
+				    combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+				break;
+			case DDC_VGA:
+				ddc_i2c =
+				    combios_setup_i2c_bus(RADEON_GPIO_VGA_DDC);
+				break;
+			case DDC_CRT2:
+				ddc_i2c =
+				    combios_setup_i2c_bus(RADEON_GPIO_CRT2_DDC);
+				break;
+			default:
+				break;
+			}
+
+			radeon_apply_legacy_quirks(dev, i, &connector,
+						   &ddc_i2c);
+
+			switch (connector) {
+			case CONNECTOR_PROPRIETARY_LEGACY:
+				if ((tmp >> 4) & 0x1)
+					devices = ATOM_DEVICE_DFP2_SUPPORT;
+				else
+					devices = ATOM_DEVICE_DFP1_SUPPORT;
+				radeon_add_legacy_encoder(dev,
+							  radeon_get_encoder_id
+							  (dev, devices, 0),
+							  devices);
+				radeon_add_legacy_connector(dev, i, devices,
+							    legacy_connector_convert
+							    [connector],
+							    &ddc_i2c);
+				break;
+			case CONNECTOR_CRT_LEGACY:
+				if (tmp & 0x1) {
+					devices = ATOM_DEVICE_CRT2_SUPPORT;
+					radeon_add_legacy_encoder(dev,
+								  radeon_get_encoder_id
+								  (dev,
+								   ATOM_DEVICE_CRT2_SUPPORT,
+								   2),
+								  ATOM_DEVICE_CRT2_SUPPORT);
+				} else {
+					devices = ATOM_DEVICE_CRT1_SUPPORT;
+					radeon_add_legacy_encoder(dev,
+								  radeon_get_encoder_id
+								  (dev,
+								   ATOM_DEVICE_CRT1_SUPPORT,
+								   1),
+								  ATOM_DEVICE_CRT1_SUPPORT);
+				}
+				radeon_add_legacy_connector(dev,
+							    i,
+							    devices,
+							    legacy_connector_convert
+							    [connector],
+							    &ddc_i2c);
+				break;
+			case CONNECTOR_DVI_I_LEGACY:
+				devices = 0;
+				if (tmp & 0x1) {
+					devices |= ATOM_DEVICE_CRT2_SUPPORT;
+					radeon_add_legacy_encoder(dev,
+								  radeon_get_encoder_id
+								  (dev,
+								   ATOM_DEVICE_CRT2_SUPPORT,
+								   2),
+								  ATOM_DEVICE_CRT2_SUPPORT);
+				} else {
+					devices |= ATOM_DEVICE_CRT1_SUPPORT;
+					radeon_add_legacy_encoder(dev,
+								  radeon_get_encoder_id
+								  (dev,
+								   ATOM_DEVICE_CRT1_SUPPORT,
+								   1),
+								  ATOM_DEVICE_CRT1_SUPPORT);
+				}
+				if ((tmp >> 4) & 0x1) {
+					devices |= ATOM_DEVICE_DFP2_SUPPORT;
+					radeon_add_legacy_encoder(dev,
+								  radeon_get_encoder_id
+								  (dev,
+								   ATOM_DEVICE_DFP2_SUPPORT,
+								   0),
+								  ATOM_DEVICE_DFP2_SUPPORT);
+				} else {
+					devices |= ATOM_DEVICE_DFP1_SUPPORT;
+					radeon_add_legacy_encoder(dev,
+								  radeon_get_encoder_id
+								  (dev,
+								   ATOM_DEVICE_DFP1_SUPPORT,
+								   0),
+								  ATOM_DEVICE_DFP1_SUPPORT);
+				}
+				radeon_add_legacy_connector(dev,
+							    i,
+							    devices,
+							    legacy_connector_convert
+							    [connector],
+							    &ddc_i2c);
+				break;
+			case CONNECTOR_DVI_D_LEGACY:
+				if ((tmp >> 4) & 0x1)
+					devices = ATOM_DEVICE_DFP2_SUPPORT;
+				else
+					devices = ATOM_DEVICE_DFP1_SUPPORT;
+				radeon_add_legacy_encoder(dev,
+							  radeon_get_encoder_id
+							  (dev, devices, 0),
+							  devices);
+				radeon_add_legacy_connector(dev, i, devices,
+							    legacy_connector_convert
+							    [connector],
+							    &ddc_i2c);
+				break;
+			case CONNECTOR_CTV_LEGACY:
+			case CONNECTOR_STV_LEGACY:
+				radeon_add_legacy_encoder(dev,
+							  radeon_get_encoder_id
+							  (dev,
+							   ATOM_DEVICE_TV1_SUPPORT,
+							   2),
+							  ATOM_DEVICE_TV1_SUPPORT);
+				radeon_add_legacy_connector(dev, i,
+							    ATOM_DEVICE_TV1_SUPPORT,
+							    legacy_connector_convert
+							    [connector],
+							    &ddc_i2c);
+				break;
+			default:
+				DRM_ERROR("Unknown connector type: %d\n",
+					  connector);
+				continue;
+			}
+
+		}
+	} else {
+		uint16_t tmds_info =
+		    combios_get_table_offset(dev, COMBIOS_DFP_INFO_TABLE);
+		if (tmds_info) {
+			DRM_DEBUG("Found DFP table, assuming DVI connector\n");
+
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_CRT1_SUPPORT,
+									1),
+						  ATOM_DEVICE_CRT1_SUPPORT);
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_DFP1_SUPPORT,
+									0),
+						  ATOM_DEVICE_DFP1_SUPPORT);
+
+			ddc_i2c = combios_setup_i2c_bus(RADEON_GPIO_DVI_DDC);
+			radeon_add_legacy_connector(dev,
+						    0,
+						    ATOM_DEVICE_CRT1_SUPPORT |
+						    ATOM_DEVICE_DFP1_SUPPORT,
+						    DRM_MODE_CONNECTOR_DVII,
+						    &ddc_i2c);
+		} else {
+			DRM_DEBUG("No connector info found\n");
+			return false;
+		}
+	}
+
+	if (rdev->flags & RADEON_IS_MOBILITY || rdev->flags & RADEON_IS_IGP) {
+		uint16_t lcd_info =
+		    combios_get_table_offset(dev, COMBIOS_LCD_INFO_TABLE);
+		if (lcd_info) {
+			uint16_t lcd_ddc_info =
+			    combios_get_table_offset(dev,
+						     COMBIOS_LCD_DDC_INFO_TABLE);
+
+			radeon_add_legacy_encoder(dev,
+						  radeon_get_encoder_id(dev,
+									ATOM_DEVICE_LCD1_SUPPORT,
+									0),
+						  ATOM_DEVICE_LCD1_SUPPORT);
+
+			if (lcd_ddc_info) {
+				ddc_type = RBIOS8(lcd_ddc_info + 2);
+				switch (ddc_type) {
+				case DDC_MONID:
+					ddc_i2c =
+					    combios_setup_i2c_bus
+					    (RADEON_GPIO_MONID);
+					break;
+				case DDC_DVI:
+					ddc_i2c =
+					    combios_setup_i2c_bus
+					    (RADEON_GPIO_DVI_DDC);
+					break;
+				case DDC_VGA:
+					ddc_i2c =
+					    combios_setup_i2c_bus
+					    (RADEON_GPIO_VGA_DDC);
+					break;
+				case DDC_CRT2:
+					ddc_i2c =
+					    combios_setup_i2c_bus
+					    (RADEON_GPIO_CRT2_DDC);
+					break;
+				case DDC_LCD:
+					ddc_i2c =
+					    combios_setup_i2c_bus
+					    (RADEON_LCD_GPIO_MASK);
+					ddc_i2c.mask_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.mask_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					ddc_i2c.a_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.a_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					ddc_i2c.put_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.put_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					ddc_i2c.get_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.get_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					break;
+				case DDC_GPIO:
+					ddc_i2c =
+					    combios_setup_i2c_bus
+					    (RADEON_MDGPIO_EN_REG);
+					ddc_i2c.mask_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.mask_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					ddc_i2c.a_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.a_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					ddc_i2c.put_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.put_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					ddc_i2c.get_clk_mask =
+					    RBIOS32(lcd_ddc_info + 3);
+					ddc_i2c.get_data_mask =
+					    RBIOS32(lcd_ddc_info + 7);
+					break;
+				default:
+					ddc_i2c.valid = false;
+					break;
+				}
+				DRM_DEBUG("LCD DDC Info Table found!\n");
+			} else
+				ddc_i2c.valid = false;
+
+			radeon_add_legacy_connector(dev,
+						    5,
+						    ATOM_DEVICE_LCD1_SUPPORT,
+						    DRM_MODE_CONNECTOR_LVDS,
+						    &ddc_i2c);
+		}
+	}
+
+	/* check TV table */
+	if (rdev->family != CHIP_R100 && rdev->family != CHIP_R200) {
+		uint32_t tv_info =
+		    combios_get_table_offset(dev, COMBIOS_TV_INFO_TABLE);
+		if (tv_info) {
+			if (RBIOS8(tv_info + 6) == 'T') {
+				radeon_add_legacy_encoder(dev,
+							  radeon_get_encoder_id
+							  (dev,
+							   ATOM_DEVICE_TV1_SUPPORT,
+							   2),
+							  ATOM_DEVICE_TV1_SUPPORT);
+				radeon_add_legacy_connector(dev, 6,
+							    ATOM_DEVICE_TV1_SUPPORT,
+							    DRM_MODE_CONNECTOR_SVIDEO,
+							    &ddc_i2c);
+			}
+		}
+	}
+
+	radeon_link_encoder_connector(dev);
+
+	return true;
+}
+
+static void combios_parse_mmio_table(struct drm_device *dev, uint16_t offset)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	if (offset) {
+		while (RBIOS16(offset)) {
+			uint16_t cmd = ((RBIOS16(offset) & 0xe000) >> 13);
+			uint32_t addr = (RBIOS16(offset) & 0x1fff);
+			uint32_t val, and_mask, or_mask;
+			uint32_t tmp;
+
+			offset += 2;
+			switch (cmd) {
+			case 0:
+				val = RBIOS32(offset);
+				offset += 4;
+				WREG32(addr, val);
+				break;
+			case 1:
+				val = RBIOS32(offset);
+				offset += 4;
+				WREG32(addr, val);
+				break;
+			case 2:
+				and_mask = RBIOS32(offset);
+				offset += 4;
+				or_mask = RBIOS32(offset);
+				offset += 4;
+				tmp = RREG32(addr);
+				tmp &= and_mask;
+				tmp |= or_mask;
+				WREG32(addr, tmp);
+				break;
+			case 3:
+				and_mask = RBIOS32(offset);
+				offset += 4;
+				or_mask = RBIOS32(offset);
+				offset += 4;
+				tmp = RREG32(addr);
+				tmp &= and_mask;
+				tmp |= or_mask;
+				WREG32(addr, tmp);
+				break;
+			case 4:
+				val = RBIOS16(offset);
+				offset += 2;
+				udelay(val);
+				break;
+			case 5:
+				val = RBIOS16(offset);
+				offset += 2;
+				switch (addr) {
+				case 8:
+					while (val--) {
+						if (!
+						    (RREG32_PLL
+						     (RADEON_CLK_PWRMGT_CNTL) &
+						     RADEON_MC_BUSY))
+							break;
+					}
+					break;
+				case 9:
+					while (val--) {
+						if ((RREG32(RADEON_MC_STATUS) &
+						     RADEON_MC_IDLE))
+							break;
+					}
+					break;
+				default:
+					break;
+				}
+				break;
+			default:
+				break;
+			}
+		}
+	}
+}
+
+static void combios_parse_pll_table(struct drm_device *dev, uint16_t offset)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	if (offset) {
+		while (RBIOS8(offset)) {
+			uint8_t cmd = ((RBIOS8(offset) & 0xc0) >> 6);
+			uint8_t addr = (RBIOS8(offset) & 0x3f);
+			uint32_t val, shift, tmp;
+			uint32_t and_mask, or_mask;
+
+			offset++;
+			switch (cmd) {
+			case 0:
+				val = RBIOS32(offset);
+				offset += 4;
+				WREG32_PLL(addr, val);
+				break;
+			case 1:
+				shift = RBIOS8(offset) * 8;
+				offset++;
+				and_mask = RBIOS8(offset) << shift;
+				and_mask |= ~(0xff << shift);
+				offset++;
+				or_mask = RBIOS8(offset) << shift;
+				offset++;
+				tmp = RREG32_PLL(addr);
+				tmp &= and_mask;
+				tmp |= or_mask;
+				WREG32_PLL(addr, tmp);
+				break;
+			case 2:
+			case 3:
+				tmp = 1000;
+				switch (addr) {
+				case 1:
+					udelay(150);
+					break;
+				case 2:
+					udelay(1000);
+					break;
+				case 3:
+					while (tmp--) {
+						if (!
+						    (RREG32_PLL
+						     (RADEON_CLK_PWRMGT_CNTL) &
+						     RADEON_MC_BUSY))
+							break;
+					}
+					break;
+				case 4:
+					while (tmp--) {
+						if (RREG32_PLL
+						    (RADEON_CLK_PWRMGT_CNTL) &
+						    RADEON_DLL_READY)
+							break;
+					}
+					break;
+				case 5:
+					tmp =
+					    RREG32_PLL(RADEON_CLK_PWRMGT_CNTL);
+					if (tmp & RADEON_CG_NO1_DEBUG_0) {
+#if 0
+						uint32_t mclk_cntl =
+						    RREG32_PLL
+						    (RADEON_MCLK_CNTL);
+						mclk_cntl &= 0xffff0000;
+						/*mclk_cntl |= 0x00001111;*//* ??? */
+						WREG32_PLL(RADEON_MCLK_CNTL,
+							   mclk_cntl);
+						udelay(10000);
+#endif
+						WREG32_PLL
+						    (RADEON_CLK_PWRMGT_CNTL,
+						     tmp &
+						     ~RADEON_CG_NO1_DEBUG_0);
+						udelay(10000);
+					}
+					break;
+				default:
+					break;
+				}
+				break;
+			default:
+				break;
+			}
+		}
+	}
+}
+
+static void combios_parse_ram_reset_table(struct drm_device *dev,
+					  uint16_t offset)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	if (offset) {
+		uint8_t val = RBIOS8(offset);
+		while (val != 0xff) {
+			offset++;
+
+			if (val == 0x0f) {
+				uint32_t channel_complete_mask;
+
+				if (ASIC_IS_R300(rdev))
+					channel_complete_mask =
+					    R300_MEM_PWRUP_COMPLETE;
+				else
+					channel_complete_mask =
+					    RADEON_MEM_PWRUP_COMPLETE;
+				tmp = 20000;
+				while (tmp--) {
+					if ((RREG32(RADEON_MEM_STR_CNTL) &
+					     channel_complete_mask) ==
+					    channel_complete_mask)
+						break;
+				}
+			} else {
+				uint32_t or_mask = RBIOS16(offset);
+				offset += 2;
+
+				tmp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
+				tmp &= RADEON_SDRAM_MODE_MASK;
+				tmp |= or_mask;
+				WREG32(RADEON_MEM_SDRAM_MODE_REG, tmp);
+
+				or_mask = val << 24;
+				tmp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
+				tmp &= RADEON_B3MEM_RESET_MASK;
+				tmp |= or_mask;
+				WREG32(RADEON_MEM_SDRAM_MODE_REG, tmp);
+			}
+			val = RBIOS8(offset);
+		}
+	}
+}
+
+static uint32_t combios_detect_ram(struct drm_device *dev, int ram,
+				   int mem_addr_mapping)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t mem_cntl;
+	uint32_t mem_size;
+	uint32_t addr = 0;
+
+	mem_cntl = RREG32(RADEON_MEM_CNTL);
+	if (mem_cntl & RV100_HALF_MODE)
+		ram /= 2;
+	mem_size = ram;
+	mem_cntl &= ~(0xff << 8);
+	mem_cntl |= (mem_addr_mapping & 0xff) << 8;
+	WREG32(RADEON_MEM_CNTL, mem_cntl);
+	RREG32(RADEON_MEM_CNTL);
+
+	/* sdram reset ? */
+
+	/* something like this????  */
+	while (ram--) {
+		addr = ram * 1024 * 1024;
+		/* write to each page */
+		WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
+		WREG32(RADEON_MM_DATA, 0xdeadbeef);
+		/* read back and verify */
+		WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
+		if (RREG32(RADEON_MM_DATA) != 0xdeadbeef)
+			return 0;
+	}
+
+	return mem_size;
+}
+
+static void combios_write_ram_size(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint8_t rev;
+	uint16_t offset;
+	uint32_t mem_size = 0;
+	uint32_t mem_cntl = 0;
+
+	/* should do something smarter here I guess... */
+	if (rdev->flags & RADEON_IS_IGP)
+		return;
+
+	/* first check detected mem table */
+	offset = combios_get_table_offset(dev, COMBIOS_DETECTED_MEM_TABLE);
+	if (offset) {
+		rev = RBIOS8(offset);
+		if (rev < 3) {
+			mem_cntl = RBIOS32(offset + 1);
+			mem_size = RBIOS16(offset + 5);
+			if (((rdev->flags & RADEON_FAMILY_MASK) < CHIP_R200) &&
+			    ((dev->pdev->device != 0x515e)
+			     && (dev->pdev->device != 0x5969)))
+				WREG32(RADEON_MEM_CNTL, mem_cntl);
+		}
+	}
+
+	if (!mem_size) {
+		offset =
+		    combios_get_table_offset(dev, COMBIOS_MEM_CONFIG_TABLE);
+		if (offset) {
+			rev = RBIOS8(offset - 1);
+			if (rev < 1) {
+				if (((rdev->flags & RADEON_FAMILY_MASK) <
+				     CHIP_R200)
+				    && ((dev->pdev->device != 0x515e)
+					&& (dev->pdev->device != 0x5969))) {
+					int ram = 0;
+					int mem_addr_mapping = 0;
+
+					while (RBIOS8(offset)) {
+						ram = RBIOS8(offset);
+						mem_addr_mapping =
+						    RBIOS8(offset + 1);
+						if (mem_addr_mapping != 0x25)
+							ram *= 2;
+						mem_size =
+						    combios_detect_ram(dev, ram,
+								       mem_addr_mapping);
+						if (mem_size)
+							break;
+						offset += 2;
+					}
+				} else
+					mem_size = RBIOS8(offset);
+			} else {
+				mem_size = RBIOS8(offset);
+				mem_size *= 2;	/* convert to MB */
+			}
+		}
+	}
+
+	mem_size *= (1024 * 1024);	/* convert to bytes */
+	WREG32(RADEON_CONFIG_MEMSIZE, mem_size);
+}
+
+void radeon_combios_dyn_clk_setup(struct drm_device *dev, int enable)
+{
+	uint16_t dyn_clk_info =
+	    combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
+
+	if (dyn_clk_info)
+		combios_parse_pll_table(dev, dyn_clk_info);
+}
+
+void radeon_combios_asic_init(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint16_t table;
+
+	/* port hardcoded mac stuff from radeonfb */
+	if (rdev->bios == NULL)
+		return;
+
+	/* ASIC INIT 1 */
+	table = combios_get_table_offset(dev, COMBIOS_ASIC_INIT_1_TABLE);
+	if (table)
+		combios_parse_mmio_table(dev, table);
+
+	/* PLL INIT */
+	table = combios_get_table_offset(dev, COMBIOS_PLL_INIT_TABLE);
+	if (table)
+		combios_parse_pll_table(dev, table);
+
+	/* ASIC INIT 2 */
+	table = combios_get_table_offset(dev, COMBIOS_ASIC_INIT_2_TABLE);
+	if (table)
+		combios_parse_mmio_table(dev, table);
+
+	if (!(rdev->flags & RADEON_IS_IGP)) {
+		/* ASIC INIT 4 */
+		table =
+		    combios_get_table_offset(dev, COMBIOS_ASIC_INIT_4_TABLE);
+		if (table)
+			combios_parse_mmio_table(dev, table);
+
+		/* RAM RESET */
+		table = combios_get_table_offset(dev, COMBIOS_RAM_RESET_TABLE);
+		if (table)
+			combios_parse_ram_reset_table(dev, table);
+
+		/* ASIC INIT 3 */
+		table =
+		    combios_get_table_offset(dev, COMBIOS_ASIC_INIT_3_TABLE);
+		if (table)
+			combios_parse_mmio_table(dev, table);
+
+		/* write CONFIG_MEMSIZE */
+		combios_write_ram_size(dev);
+	}
+
+	/* DYN CLK 1 */
+	table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
+	if (table)
+		combios_parse_pll_table(dev, table);
+
+}
+
+void radeon_combios_initialize_bios_scratch_regs(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t bios_0_scratch, bios_6_scratch, bios_7_scratch;
+
+	bios_0_scratch = RREG32(RADEON_BIOS_0_SCRATCH);
+	bios_6_scratch = RREG32(RADEON_BIOS_6_SCRATCH);
+	bios_7_scratch = RREG32(RADEON_BIOS_7_SCRATCH);
+
+	/* let the bios control the backlight */
+	bios_0_scratch &= ~RADEON_DRIVER_BRIGHTNESS_EN;
+
+	/* tell the bios not to handle mode switching */
+	bios_6_scratch |= (RADEON_DISPLAY_SWITCHING_DIS |
+			   RADEON_ACC_MODE_CHANGE);
+
+	/* tell the bios a driver is loaded */
+	bios_7_scratch |= RADEON_DRV_LOADED;
+
+	WREG32(RADEON_BIOS_0_SCRATCH, bios_0_scratch);
+	WREG32(RADEON_BIOS_6_SCRATCH, bios_6_scratch);
+	WREG32(RADEON_BIOS_7_SCRATCH, bios_7_scratch);
+}
+
+void radeon_combios_output_lock(struct drm_encoder *encoder, bool lock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t bios_6_scratch;
+
+	bios_6_scratch = RREG32(RADEON_BIOS_6_SCRATCH);
+
+	if (lock)
+		bios_6_scratch |= RADEON_DRIVER_CRITICAL;
+	else
+		bios_6_scratch &= ~RADEON_DRIVER_CRITICAL;
+
+	WREG32(RADEON_BIOS_6_SCRATCH, bios_6_scratch);
+}
+
+void
+radeon_combios_connected_scratch_regs(struct drm_connector *connector,
+				      struct drm_encoder *encoder,
+				      bool connected)
+{
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_connector *radeon_connector =
+	    to_radeon_connector(connector);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_4_scratch = RREG32(RADEON_BIOS_4_SCRATCH);
+	uint32_t bios_5_scratch = RREG32(RADEON_BIOS_5_SCRATCH);
+
+	if ((radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_TV1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("TV1 connected\n");
+			/* fix me */
+			bios_4_scratch |= RADEON_TV1_ATTACHED_SVIDEO;
+			/*save->bios_4_scratch |= RADEON_TV1_ATTACHED_COMP; */
+			bios_5_scratch |= RADEON_TV1_ON;
+			bios_5_scratch |= RADEON_ACC_REQ_TV1;
+		} else {
+			DRM_DEBUG("TV1 disconnected\n");
+			bios_4_scratch &= ~RADEON_TV1_ATTACHED_MASK;
+			bios_5_scratch &= ~RADEON_TV1_ON;
+			bios_5_scratch &= ~RADEON_ACC_REQ_TV1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_LCD1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("LCD1 connected\n");
+			bios_4_scratch |= RADEON_LCD1_ATTACHED;
+			bios_5_scratch |= RADEON_LCD1_ON;
+			bios_5_scratch |= RADEON_ACC_REQ_LCD1;
+		} else {
+			DRM_DEBUG("LCD1 disconnected\n");
+			bios_4_scratch &= ~RADEON_LCD1_ATTACHED;
+			bios_5_scratch &= ~RADEON_LCD1_ON;
+			bios_5_scratch &= ~RADEON_ACC_REQ_LCD1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_CRT1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("CRT1 connected\n");
+			bios_4_scratch |= RADEON_CRT1_ATTACHED_COLOR;
+			bios_5_scratch |= RADEON_CRT1_ON;
+			bios_5_scratch |= RADEON_ACC_REQ_CRT1;
+		} else {
+			DRM_DEBUG("CRT1 disconnected\n");
+			bios_4_scratch &= ~RADEON_CRT1_ATTACHED_MASK;
+			bios_5_scratch &= ~RADEON_CRT1_ON;
+			bios_5_scratch &= ~RADEON_ACC_REQ_CRT1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_CRT2_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("CRT2 connected\n");
+			bios_4_scratch |= RADEON_CRT2_ATTACHED_COLOR;
+			bios_5_scratch |= RADEON_CRT2_ON;
+			bios_5_scratch |= RADEON_ACC_REQ_CRT2;
+		} else {
+			DRM_DEBUG("CRT2 disconnected\n");
+			bios_4_scratch &= ~RADEON_CRT2_ATTACHED_MASK;
+			bios_5_scratch &= ~RADEON_CRT2_ON;
+			bios_5_scratch &= ~RADEON_ACC_REQ_CRT2;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP1 connected\n");
+			bios_4_scratch |= RADEON_DFP1_ATTACHED;
+			bios_5_scratch |= RADEON_DFP1_ON;
+			bios_5_scratch |= RADEON_ACC_REQ_DFP1;
+		} else {
+			DRM_DEBUG("DFP1 disconnected\n");
+			bios_4_scratch &= ~RADEON_DFP1_ATTACHED;
+			bios_5_scratch &= ~RADEON_DFP1_ON;
+			bios_5_scratch &= ~RADEON_ACC_REQ_DFP1;
+		}
+	}
+	if ((radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) &&
+	    (radeon_connector->devices & ATOM_DEVICE_DFP2_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG("DFP2 connected\n");
+			bios_4_scratch |= RADEON_DFP2_ATTACHED;
+			bios_5_scratch |= RADEON_DFP2_ON;
+			bios_5_scratch |= RADEON_ACC_REQ_DFP2;
+		} else {
+			DRM_DEBUG("DFP2 disconnected\n");
+			bios_4_scratch &= ~RADEON_DFP2_ATTACHED;
+			bios_5_scratch &= ~RADEON_DFP2_ON;
+			bios_5_scratch &= ~RADEON_ACC_REQ_DFP2;
+		}
+	}
+	WREG32(RADEON_BIOS_4_SCRATCH, bios_4_scratch);
+	WREG32(RADEON_BIOS_5_SCRATCH, bios_5_scratch);
+}
+
+void
+radeon_combios_encoder_crtc_scratch_regs(struct drm_encoder *encoder, int crtc)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_5_scratch = RREG32(RADEON_BIOS_5_SCRATCH);
+
+	if (radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) {
+		bios_5_scratch &= ~RADEON_TV1_CRTC_MASK;
+		bios_5_scratch |= (crtc << RADEON_TV1_CRTC_SHIFT);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+		bios_5_scratch &= ~RADEON_CRT1_CRTC_MASK;
+		bios_5_scratch |= (crtc << RADEON_CRT1_CRTC_SHIFT);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+		bios_5_scratch &= ~RADEON_CRT2_CRTC_MASK;
+		bios_5_scratch |= (crtc << RADEON_CRT2_CRTC_SHIFT);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) {
+		bios_5_scratch &= ~RADEON_LCD1_CRTC_MASK;
+		bios_5_scratch |= (crtc << RADEON_LCD1_CRTC_SHIFT);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) {
+		bios_5_scratch &= ~RADEON_DFP1_CRTC_MASK;
+		bios_5_scratch |= (crtc << RADEON_DFP1_CRTC_SHIFT);
+	}
+	if (radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) {
+		bios_5_scratch &= ~RADEON_DFP2_CRTC_MASK;
+		bios_5_scratch |= (crtc << RADEON_DFP2_CRTC_SHIFT);
+	}
+	WREG32(RADEON_BIOS_5_SCRATCH, bios_5_scratch);
+}
+
+void
+radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_6_scratch = RREG32(RADEON_BIOS_6_SCRATCH);
+
+	if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
+		if (on)
+			bios_6_scratch |= RADEON_TV_DPMS_ON;
+		else
+			bios_6_scratch &= ~RADEON_TV_DPMS_ON;
+	}
+	if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+		if (on)
+			bios_6_scratch |= RADEON_CRT_DPMS_ON;
+		else
+			bios_6_scratch &= ~RADEON_CRT_DPMS_ON;
+	}
+	if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+		if (on)
+			bios_6_scratch |= RADEON_LCD_DPMS_ON;
+		else
+			bios_6_scratch &= ~RADEON_LCD_DPMS_ON;
+	}
+	if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+		if (on)
+			bios_6_scratch |= RADEON_DFP_DPMS_ON;
+		else
+			bios_6_scratch &= ~RADEON_DFP_DPMS_ON;
+	}
+	WREG32(RADEON_BIOS_6_SCRATCH, bios_6_scratch);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
new file mode 100644
index 00000000000..70ede6a52d4
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "drm_edid.h"
+#include "drm_crtc_helper.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+extern void
+radeon_combios_connected_scratch_regs(struct drm_connector *connector,
+				      struct drm_encoder *encoder,
+				      bool connected);
+extern void
+radeon_atombios_connected_scratch_regs(struct drm_connector *connector,
+				       struct drm_encoder *encoder,
+				       bool connected);
+
+static void
+radeon_connector_update_scratch_regs(struct drm_connector *connector, enum drm_connector_status status)
+{
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *best_encoder = NULL;
+	struct drm_encoder *encoder = NULL;
+	struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+	struct drm_mode_object *obj;
+	bool connected;
+	int i;
+
+	best_encoder = connector_funcs->best_encoder(connector);
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0)
+			break;
+
+		obj = drm_mode_object_find(connector->dev,
+					   connector->encoder_ids[i],
+					   DRM_MODE_OBJECT_ENCODER);
+		if (!obj)
+			continue;
+
+		encoder = obj_to_encoder(obj);
+
+		if ((encoder == best_encoder) && (status == connector_status_connected))
+			connected = true;
+		else
+			connected = false;
+
+		if (rdev->is_atom_bios)
+			radeon_atombios_connected_scratch_regs(connector, encoder, connected);
+		else
+			radeon_combios_connected_scratch_regs(connector, encoder, connected);
+
+	}
+}
+
+struct drm_encoder *radeon_best_single_encoder(struct drm_connector *connector)
+{
+	int enc_id = connector->encoder_ids[0];
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+
+	/* pick the encoder ids */
+	if (enc_id) {
+		obj = drm_mode_object_find(connector->dev, enc_id, DRM_MODE_OBJECT_ENCODER);
+		if (!obj)
+			return NULL;
+		encoder = obj_to_encoder(obj);
+		return encoder;
+	}
+	return NULL;
+}
+
+static struct drm_display_mode *radeon_fp_native_mode(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_display_mode *mode = NULL;
+	struct radeon_native_mode *native_mode = &radeon_encoder->native_mode;
+
+	if (native_mode->panel_xres != 0 &&
+	    native_mode->panel_yres != 0 &&
+	    native_mode->dotclock != 0) {
+		mode = drm_mode_create(dev);
+
+		mode->hdisplay = native_mode->panel_xres;
+		mode->vdisplay = native_mode->panel_yres;
+
+		mode->htotal = mode->hdisplay + native_mode->hblank;
+		mode->hsync_start = mode->hdisplay + native_mode->hoverplus;
+		mode->hsync_end = mode->hsync_start + native_mode->hsync_width;
+		mode->vtotal = mode->vdisplay + native_mode->vblank;
+		mode->vsync_start = mode->vdisplay + native_mode->voverplus;
+		mode->vsync_end = mode->vsync_start + native_mode->vsync_width;
+		mode->clock = native_mode->dotclock;
+		mode->flags = 0;
+
+		mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+		drm_mode_set_name(mode);
+
+		DRM_DEBUG("Adding native panel mode %s\n", mode->name);
+	}
+	return mode;
+}
+
+int radeon_connector_set_property(struct drm_connector *connector, struct drm_property *property,
+				  uint64_t val)
+{
+	return 0;
+}
+
+
+static int radeon_lvds_get_modes(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct drm_encoder *encoder;
+	int ret = 0;
+	struct drm_display_mode *mode;
+
+	if (radeon_connector->ddc_bus) {
+		ret = radeon_ddc_get_modes(radeon_connector);
+		if (ret > 0) {
+			return ret;
+		}
+	}
+
+	encoder = radeon_best_single_encoder(connector);
+	if (!encoder)
+		return 0;
+
+	/* we have no EDID modes */
+	mode = radeon_fp_native_mode(encoder);
+	if (mode) {
+		ret = 1;
+		drm_mode_probed_add(connector, mode);
+	}
+	return ret;
+}
+
+static int radeon_lvds_mode_valid(struct drm_connector *connector,
+				  struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector)
+{
+	enum drm_connector_status ret = connector_status_connected;
+	/* check acpi lid status ??? */
+	radeon_connector_update_scratch_regs(connector, ret);
+	return ret;
+}
+
+static void radeon_connector_destroy(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+
+	if (radeon_connector->ddc_bus)
+		radeon_i2c_destroy(radeon_connector->ddc_bus);
+	kfree(radeon_connector->con_priv);
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+struct drm_connector_helper_funcs radeon_lvds_connector_helper_funcs = {
+	.get_modes = radeon_lvds_get_modes,
+	.mode_valid = radeon_lvds_mode_valid,
+	.best_encoder = radeon_best_single_encoder,
+};
+
+struct drm_connector_funcs radeon_lvds_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = radeon_lvds_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = radeon_connector_destroy,
+	.set_property = radeon_connector_set_property,
+};
+
+static int radeon_vga_get_modes(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	int ret;
+
+	ret = radeon_ddc_get_modes(radeon_connector);
+
+	return ret;
+}
+
+static int radeon_vga_mode_valid(struct drm_connector *connector,
+				  struct drm_display_mode *mode)
+{
+
+	return MODE_OK;
+}
+
+static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct drm_encoder *encoder;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	bool dret;
+	enum drm_connector_status ret = connector_status_disconnected;
+
+	radeon_i2c_do_lock(radeon_connector, 1);
+	dret = radeon_ddc_probe(radeon_connector);
+	radeon_i2c_do_lock(radeon_connector, 0);
+	if (dret)
+		ret = connector_status_connected;
+	else {
+		/* if EDID fails to a load detect */
+		encoder = radeon_best_single_encoder(connector);
+		if (!encoder)
+			ret = connector_status_disconnected;
+		else {
+			encoder_funcs = encoder->helper_private;
+			ret = encoder_funcs->detect(encoder, connector);
+		}
+	}
+
+	radeon_connector_update_scratch_regs(connector, ret);
+	return ret;
+}
+
+struct drm_connector_helper_funcs radeon_vga_connector_helper_funcs = {
+	.get_modes = radeon_vga_get_modes,
+	.mode_valid = radeon_vga_mode_valid,
+	.best_encoder = radeon_best_single_encoder,
+};
+
+struct drm_connector_funcs radeon_vga_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = radeon_vga_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = radeon_connector_destroy,
+	.set_property = radeon_connector_set_property,
+};
+
+static int radeon_dvi_get_modes(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	int ret;
+
+	ret = radeon_ddc_get_modes(radeon_connector);
+	/* reset scratch regs here since radeon_dvi_detect doesn't check digital bit */
+	radeon_connector_update_scratch_regs(connector, connector_status_connected);
+	return ret;
+}
+
+static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct drm_encoder *encoder;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	struct drm_mode_object *obj;
+	int i;
+	enum drm_connector_status ret = connector_status_disconnected;
+	bool dret;
+
+	radeon_i2c_do_lock(radeon_connector, 1);
+	dret = radeon_ddc_probe(radeon_connector);
+	radeon_i2c_do_lock(radeon_connector, 0);
+	if (dret)
+		ret = connector_status_connected;
+	else {
+		for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+			if (connector->encoder_ids[i] == 0)
+				break;
+
+			obj = drm_mode_object_find(connector->dev,
+						   connector->encoder_ids[i],
+						   DRM_MODE_OBJECT_ENCODER);
+			if (!obj)
+				continue;
+
+			encoder = obj_to_encoder(obj);
+
+			encoder_funcs = encoder->helper_private;
+			if (encoder_funcs->detect) {
+				ret = encoder_funcs->detect(encoder, connector);
+				if (ret == connector_status_connected) {
+					radeon_connector->use_digital = 0;
+					break;
+				}
+			}
+		}
+	}
+
+	/* updated in get modes as well since we need to know if it's analog or digital */
+	radeon_connector_update_scratch_regs(connector, ret);
+	return ret;
+}
+
+/* okay need to be smart in here about which encoder to pick */
+struct drm_encoder *radeon_dvi_encoder(struct drm_connector *connector)
+{
+	int enc_id = connector->encoder_ids[0];
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+	int i;
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0)
+			break;
+
+		obj = drm_mode_object_find(connector->dev, connector->encoder_ids[i], DRM_MODE_OBJECT_ENCODER);
+		if (!obj)
+			continue;
+
+		encoder = obj_to_encoder(obj);
+
+		if (radeon_connector->use_digital) {
+			if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
+				return encoder;
+		} else {
+			if (encoder->encoder_type == DRM_MODE_ENCODER_DAC ||
+			    encoder->encoder_type == DRM_MODE_ENCODER_TVDAC)
+				return encoder;
+		}
+	}
+
+	/* see if we have a default encoder  TODO */
+
+	/* then check use digitial */
+	/* pick the first one */
+	if (enc_id) {
+		obj = drm_mode_object_find(connector->dev, enc_id, DRM_MODE_OBJECT_ENCODER);
+		if (!obj)
+			return NULL;
+		encoder = obj_to_encoder(obj);
+		return encoder;
+	}
+	return NULL;
+}
+
+struct drm_connector_helper_funcs radeon_dvi_connector_helper_funcs = {
+	.get_modes = radeon_dvi_get_modes,
+	.mode_valid = radeon_vga_mode_valid,
+	.best_encoder = radeon_dvi_encoder,
+};
+
+struct drm_connector_funcs radeon_dvi_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = radeon_dvi_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = radeon_connector_set_property,
+	.destroy = radeon_connector_destroy,
+};
+
+void
+radeon_add_atom_connector(struct drm_device *dev,
+			  uint32_t connector_id,
+			  uint32_t supported_device,
+			  int connector_type,
+			  struct radeon_i2c_bus_rec *i2c_bus,
+			  bool linkb,
+			  uint32_t igp_lane_info)
+{
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct radeon_connector_atom_dig *radeon_dig_connector;
+	uint32_t subpixel_order = SubPixelNone;
+
+	/* fixme - tv/cv/din */
+	if ((connector_type == DRM_MODE_CONNECTOR_Unknown) ||
+	    (connector_type == DRM_MODE_CONNECTOR_SVIDEO) ||
+	    (connector_type == DRM_MODE_CONNECTOR_Composite) ||
+	    (connector_type == DRM_MODE_CONNECTOR_9PinDIN))
+		return;
+
+	/* see if we already added it */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		radeon_connector = to_radeon_connector(connector);
+		if (radeon_connector->connector_id == connector_id) {
+			radeon_connector->devices |= supported_device;
+			return;
+		}
+	}
+
+	radeon_connector = kzalloc(sizeof(struct radeon_connector), GFP_KERNEL);
+	if (!radeon_connector)
+		return;
+
+	connector = &radeon_connector->base;
+
+	radeon_connector->connector_id = connector_id;
+	radeon_connector->devices = supported_device;
+	switch (connector_type) {
+	case DRM_MODE_CONNECTOR_VGA:
+		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "VGA");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DVIA:
+		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DVII:
+	case DRM_MODE_CONNECTOR_DVID:
+		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+		if (!radeon_dig_connector)
+			goto failed;
+		radeon_dig_connector->linkb = linkb;
+		radeon_dig_connector->igp_lane_info = igp_lane_info;
+		radeon_connector->con_priv = radeon_dig_connector;
+		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		subpixel_order = SubPixelHorizontalRGB;
+		break;
+	case DRM_MODE_CONNECTOR_HDMIA:
+	case DRM_MODE_CONNECTOR_HDMIB:
+		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+		if (!radeon_dig_connector)
+			goto failed;
+		radeon_dig_connector->linkb = linkb;
+		radeon_dig_connector->igp_lane_info = igp_lane_info;
+		radeon_connector->con_priv = radeon_dig_connector;
+		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "HDMI");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		subpixel_order = SubPixelHorizontalRGB;
+		break;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+		if (!radeon_dig_connector)
+			goto failed;
+		radeon_dig_connector->linkb = linkb;
+		radeon_dig_connector->igp_lane_info = igp_lane_info;
+		radeon_connector->con_priv = radeon_dig_connector;
+		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DP");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		subpixel_order = SubPixelHorizontalRGB;
+		break;
+	case DRM_MODE_CONNECTOR_SVIDEO:
+	case DRM_MODE_CONNECTOR_Composite:
+	case DRM_MODE_CONNECTOR_9PinDIN:
+		break;
+	case DRM_MODE_CONNECTOR_LVDS:
+		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+		if (!radeon_dig_connector)
+			goto failed;
+		radeon_dig_connector->linkb = linkb;
+		radeon_dig_connector->igp_lane_info = igp_lane_info;
+		radeon_connector->con_priv = radeon_dig_connector;
+		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "LVDS");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		subpixel_order = SubPixelHorizontalRGB;
+		break;
+	}
+
+	connector->display_info.subpixel_order = subpixel_order;
+	drm_sysfs_connector_add(connector);
+	return;
+
+failed:
+	if (radeon_connector->ddc_bus)
+		radeon_i2c_destroy(radeon_connector->ddc_bus);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+void
+radeon_add_legacy_connector(struct drm_device *dev,
+			    uint32_t connector_id,
+			    uint32_t supported_device,
+			    int connector_type,
+			    struct radeon_i2c_bus_rec *i2c_bus)
+{
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	uint32_t subpixel_order = SubPixelNone;
+
+	/* fixme - tv/cv/din */
+	if ((connector_type == DRM_MODE_CONNECTOR_Unknown) ||
+	    (connector_type == DRM_MODE_CONNECTOR_SVIDEO) ||
+	    (connector_type == DRM_MODE_CONNECTOR_Composite) ||
+	    (connector_type == DRM_MODE_CONNECTOR_9PinDIN))
+		return;
+
+	/* see if we already added it */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		radeon_connector = to_radeon_connector(connector);
+		if (radeon_connector->connector_id == connector_id) {
+			radeon_connector->devices |= supported_device;
+			return;
+		}
+	}
+
+	radeon_connector = kzalloc(sizeof(struct radeon_connector), GFP_KERNEL);
+	if (!radeon_connector)
+		return;
+
+	connector = &radeon_connector->base;
+
+	radeon_connector->connector_id = connector_id;
+	radeon_connector->devices = supported_device;
+	switch (connector_type) {
+	case DRM_MODE_CONNECTOR_VGA:
+		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "VGA");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DVIA:
+		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DVII:
+	case DRM_MODE_CONNECTOR_DVID:
+		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		subpixel_order = SubPixelHorizontalRGB;
+		break;
+	case DRM_MODE_CONNECTOR_SVIDEO:
+	case DRM_MODE_CONNECTOR_Composite:
+	case DRM_MODE_CONNECTOR_9PinDIN:
+		break;
+	case DRM_MODE_CONNECTOR_LVDS:
+		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		if (i2c_bus->valid) {
+			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "LVDS");
+			if (!radeon_connector->ddc_bus)
+				goto failed;
+		}
+		subpixel_order = SubPixelHorizontalRGB;
+		break;
+	}
+
+	connector->display_info.subpixel_order = subpixel_order;
+	drm_sysfs_connector_add(connector);
+	return;
+
+failed:
+	if (radeon_connector->ddc_bus)
+		radeon_i2c_destroy(radeon_connector->ddc_bus);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
new file mode 100644
index 00000000000..b843f9bdfb1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2008 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+void r100_cs_dump_packet(struct radeon_cs_parser *p,
+			 struct radeon_cs_packet *pkt);
+
+int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
+{
+	struct drm_device *ddev = p->rdev->ddev;
+	struct radeon_cs_chunk *chunk;
+	unsigned i, j;
+	bool duplicate;
+
+	if (p->chunk_relocs_idx == -1) {
+		return 0;
+	}
+	chunk = &p->chunks[p->chunk_relocs_idx];
+	/* FIXME: we assume that each relocs use 4 dwords */
+	p->nrelocs = chunk->length_dw / 4;
+	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
+	if (p->relocs_ptr == NULL) {
+		return -ENOMEM;
+	}
+	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+	if (p->relocs == NULL) {
+		return -ENOMEM;
+	}
+	for (i = 0; i < p->nrelocs; i++) {
+		struct drm_radeon_cs_reloc *r;
+
+		duplicate = false;
+		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
+		for (j = 0; j < p->nrelocs; j++) {
+			if (r->handle == p->relocs[j].handle) {
+				p->relocs_ptr[i] = &p->relocs[j];
+				duplicate = true;
+				break;
+			}
+		}
+		if (!duplicate) {
+			p->relocs[i].gobj = drm_gem_object_lookup(ddev,
+								  p->filp,
+								  r->handle);
+			if (p->relocs[i].gobj == NULL) {
+				DRM_ERROR("gem object lookup failed 0x%x\n",
+					  r->handle);
+				return -EINVAL;
+			}
+			p->relocs_ptr[i] = &p->relocs[i];
+			p->relocs[i].robj = p->relocs[i].gobj->driver_private;
+			p->relocs[i].lobj.robj = p->relocs[i].robj;
+			p->relocs[i].lobj.rdomain = r->read_domains;
+			p->relocs[i].lobj.wdomain = r->write_domain;
+			p->relocs[i].handle = r->handle;
+			p->relocs[i].flags = r->flags;
+			INIT_LIST_HEAD(&p->relocs[i].lobj.list);
+			radeon_object_list_add_object(&p->relocs[i].lobj,
+						      &p->validated);
+		}
+	}
+	return radeon_object_list_validate(&p->validated, p->ib->fence);
+}
+
+int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
+{
+	struct drm_radeon_cs *cs = data;
+	uint64_t *chunk_array_ptr;
+	unsigned size, i;
+
+	if (!cs->num_chunks) {
+		return 0;
+	}
+	/* get chunks */
+	INIT_LIST_HEAD(&p->validated);
+	p->idx = 0;
+	p->chunk_ib_idx = -1;
+	p->chunk_relocs_idx = -1;
+	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
+	if (p->chunks_array == NULL) {
+		return -ENOMEM;
+	}
+	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
+	if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr,
+			       sizeof(uint64_t)*cs->num_chunks)) {
+		return -EFAULT;
+	}
+	p->nchunks = cs->num_chunks;
+	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
+	if (p->chunks == NULL) {
+		return -ENOMEM;
+	}
+	for (i = 0; i < p->nchunks; i++) {
+		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
+		struct drm_radeon_cs_chunk user_chunk;
+		uint32_t __user *cdata;
+
+		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
+		if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr,
+				       sizeof(struct drm_radeon_cs_chunk))) {
+			return -EFAULT;
+		}
+		p->chunks[i].chunk_id = user_chunk.chunk_id;
+		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
+			p->chunk_relocs_idx = i;
+		}
+		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
+			p->chunk_ib_idx = i;
+		}
+		p->chunks[i].length_dw = user_chunk.length_dw;
+		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
+
+		p->chunks[i].kdata = NULL;
+		size = p->chunks[i].length_dw * sizeof(uint32_t);
+		p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
+		if (p->chunks[i].kdata == NULL) {
+			return -ENOMEM;
+		}
+		if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) {
+			return -EFAULT;
+		}
+	}
+	if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) {
+		DRM_ERROR("cs IB too big: %d\n",
+			  p->chunks[p->chunk_ib_idx].length_dw);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser:	parser structure holding parsing context.
+ * @error:	error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
+{
+	unsigned i;
+
+	if (error) {
+		radeon_object_list_unvalidate(&parser->validated);
+	} else {
+		radeon_object_list_clean(&parser->validated);
+	}
+	for (i = 0; i < parser->nrelocs; i++) {
+		if (parser->relocs[i].gobj) {
+			mutex_lock(&parser->rdev->ddev->struct_mutex);
+			drm_gem_object_unreference(parser->relocs[i].gobj);
+			mutex_unlock(&parser->rdev->ddev->struct_mutex);
+		}
+	}
+	kfree(parser->relocs);
+	kfree(parser->relocs_ptr);
+	for (i = 0; i < parser->nchunks; i++) {
+		kfree(parser->chunks[i].kdata);
+	}
+	kfree(parser->chunks);
+	kfree(parser->chunks_array);
+	radeon_ib_free(parser->rdev, &parser->ib);
+}
+
+int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_cs_parser parser;
+	struct radeon_cs_chunk *ib_chunk;
+	int r;
+
+	mutex_lock(&rdev->cs_mutex);
+	if (rdev->gpu_lockup) {
+		mutex_unlock(&rdev->cs_mutex);
+		return -EINVAL;
+	}
+	/* initialize parser */
+	memset(&parser, 0, sizeof(struct radeon_cs_parser));
+	parser.filp = filp;
+	parser.rdev = rdev;
+	r = radeon_cs_parser_init(&parser, data);
+	if (r) {
+		DRM_ERROR("Failed to initialize parser !\n");
+		radeon_cs_parser_fini(&parser, r);
+		mutex_unlock(&rdev->cs_mutex);
+		return r;
+	}
+	r =  radeon_ib_get(rdev, &parser.ib);
+	if (r) {
+		DRM_ERROR("Failed to get ib !\n");
+		radeon_cs_parser_fini(&parser, r);
+		mutex_unlock(&rdev->cs_mutex);
+		return r;
+	}
+	r = radeon_cs_parser_relocs(&parser);
+	if (r) {
+		DRM_ERROR("Failed to parse relocation !\n");
+		radeon_cs_parser_fini(&parser, r);
+		mutex_unlock(&rdev->cs_mutex);
+		return r;
+	}
+	/* Copy the packet into the IB, the parser will read from the
+	 * input memory (cached) and write to the IB (which can be
+	 * uncached). */
+	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+	parser.ib->length_dw = ib_chunk->length_dw;
+	memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
+	r = radeon_cs_parse(&parser);
+	if (r) {
+		DRM_ERROR("Invalid command stream !\n");
+		radeon_cs_parser_fini(&parser, r);
+		mutex_unlock(&rdev->cs_mutex);
+		return r;
+	}
+	r = radeon_ib_schedule(rdev, parser.ib);
+	if (r) {
+		DRM_ERROR("Faild to schedule IB !\n");
+	}
+	radeon_cs_parser_fini(&parser, r);
+	mutex_unlock(&rdev->cs_mutex);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
new file mode 100644
index 00000000000..5232441f119
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#define CURSOR_WIDTH 64
+#define CURSOR_HEIGHT 64
+
+static void radeon_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+	struct radeon_device *rdev = crtc->dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	uint32_t cur_lock;
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		cur_lock = RREG32(AVIVO_D1CUR_UPDATE + radeon_crtc->crtc_offset);
+		if (lock)
+			cur_lock |= AVIVO_D1CURSOR_UPDATE_LOCK;
+		else
+			cur_lock &= ~AVIVO_D1CURSOR_UPDATE_LOCK;
+		WREG32(AVIVO_D1CUR_UPDATE + radeon_crtc->crtc_offset, cur_lock);
+	} else {
+		cur_lock = RREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset);
+		if (lock)
+			cur_lock |= RADEON_CUR_LOCK;
+		else
+			cur_lock &= ~RADEON_CUR_LOCK;
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, cur_lock);
+	}
+}
+
+static void radeon_hide_cursor(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
+		WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+	} else {
+		switch (radeon_crtc->crtc_id) {
+		case 0:
+			WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL);
+			break;
+		case 1:
+			WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL);
+			break;
+		default:
+			return;
+		}
+		WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN);
+	}
+}
+
+static void radeon_show_cursor(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
+		WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN |
+			     (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+	} else {
+		switch (radeon_crtc->crtc_id) {
+		case 0:
+			WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL);
+			break;
+		case 1:
+			WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL);
+			break;
+		default:
+			return;
+		}
+
+		WREG32_P(RADEON_MM_DATA, (RADEON_CRTC_CUR_EN |
+					  (RADEON_CRTC_CUR_MODE_24BPP << RADEON_CRTC_CUR_MODE_SHIFT)),
+			 ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_CUR_MODE_MASK));
+	}
+}
+
+static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,
+			      uint32_t gpu_addr)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
+
+	if (ASIC_IS_AVIVO(rdev))
+		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr);
+	else
+		/* offset is from DISP(2)_BASE_ADDRESS */
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, gpu_addr);
+}
+
+int radeon_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_gem_object *obj;
+	uint64_t gpu_addr;
+	int ret;
+
+	if (!handle) {
+		/* turn off cursor */
+		radeon_hide_cursor(crtc);
+		obj = NULL;
+		goto unpin;
+	}
+
+	if ((width > CURSOR_WIDTH) || (height > CURSOR_HEIGHT)) {
+		DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+		return -EINVAL;
+	}
+
+	radeon_crtc->cursor_width = width;
+	radeon_crtc->cursor_height = height;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id);
+		return -EINVAL;
+	}
+
+	ret = radeon_gem_object_pin(obj, RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
+	if (ret)
+		goto fail;
+
+	radeon_lock_cursor(crtc, true);
+	/* XXX only 27 bit offset for legacy cursor */
+	radeon_set_cursor(crtc, obj, gpu_addr);
+	radeon_show_cursor(crtc);
+	radeon_lock_cursor(crtc, false);
+
+unpin:
+	if (radeon_crtc->cursor_bo) {
+		radeon_gem_object_unpin(radeon_crtc->cursor_bo);
+		mutex_lock(&crtc->dev->struct_mutex);
+		drm_gem_object_unreference(radeon_crtc->cursor_bo);
+		mutex_unlock(&crtc->dev->struct_mutex);
+	}
+
+	radeon_crtc->cursor_bo = obj;
+	return 0;
+fail:
+	mutex_lock(&crtc->dev->struct_mutex);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&crtc->dev->struct_mutex);
+
+	return 0;
+}
+
+int radeon_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
+	int xorigin = 0, yorigin = 0;
+
+	if (x < 0)
+		xorigin = -x + 1;
+	if (y < 0)
+		yorigin = -y + 1;
+	if (xorigin >= CURSOR_WIDTH)
+		xorigin = CURSOR_WIDTH - 1;
+	if (yorigin >= CURSOR_HEIGHT)
+		yorigin = CURSOR_HEIGHT - 1;
+
+	radeon_lock_cursor(crtc, true);
+	if (ASIC_IS_AVIVO(rdev)) {
+		int w = radeon_crtc->cursor_width;
+		int i = 0;
+		struct drm_crtc *crtc_p;
+
+		/* avivo cursor are offset into the total surface */
+		x += crtc->x;
+		y += crtc->y;
+		DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+		/* avivo cursor image can't end on 128 pixel boundry or
+		 * go past the end of the frame if both crtcs are enabled
+		 */
+		list_for_each_entry(crtc_p, &crtc->dev->mode_config.crtc_list, head) {
+			if (crtc_p->enabled)
+				i++;
+		}
+		if (i > 1) {
+			int cursor_end, frame_end;
+
+			cursor_end = x - xorigin + w;
+			frame_end = crtc->x + crtc->mode.crtc_hdisplay;
+			if (cursor_end >= frame_end) {
+				w = w - (cursor_end - frame_end);
+				if (!(frame_end & 0x7f))
+					w--;
+			} else {
+				if (!(cursor_end & 0x7f))
+					w--;
+			}
+			if (w <= 0)
+				w = 1;
+		}
+
+		WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset,
+			     ((xorigin ? 0 : x) << 16) |
+			     (yorigin ? 0 : y));
+		WREG32(AVIVO_D1CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
+		WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset,
+		       ((w - 1) << 16) | (radeon_crtc->cursor_height - 1));
+	} else {
+		if (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)
+			y *= 2;
+
+		WREG32(RADEON_CUR_HORZ_VERT_OFF + radeon_crtc->crtc_offset,
+		       (RADEON_CUR_LOCK
+			| (xorigin << 16)
+			| yorigin));
+		WREG32(RADEON_CUR_HORZ_VERT_POSN + radeon_crtc->crtc_offset,
+		       (RADEON_CUR_LOCK
+			| ((xorigin ? 0 : x) << 16)
+			| (yorigin ? 0 : y)));
+	}
+	radeon_lock_cursor(crtc, false);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
new file mode 100644
index 00000000000..5fd2b639bf6
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -0,0 +1,813 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/console.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/radeon_drm.h>
+#include "radeon_reg.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "atom.h"
+
+/*
+ * GPU scratch registers helpers function.
+ */
+static void radeon_scratch_init(struct radeon_device *rdev)
+{
+	int i;
+
+	/* FIXME: check this out */
+	if (rdev->family < CHIP_R300) {
+		rdev->scratch.num_reg = 5;
+	} else {
+		rdev->scratch.num_reg = 7;
+	}
+	for (i = 0; i < rdev->scratch.num_reg; i++) {
+		rdev->scratch.free[i] = true;
+		rdev->scratch.reg[i] = RADEON_SCRATCH_REG0 + (i * 4);
+	}
+}
+
+int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
+{
+	int i;
+
+	for (i = 0; i < rdev->scratch.num_reg; i++) {
+		if (rdev->scratch.free[i]) {
+			rdev->scratch.free[i] = false;
+			*reg = rdev->scratch.reg[i];
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
+{
+	int i;
+
+	for (i = 0; i < rdev->scratch.num_reg; i++) {
+		if (rdev->scratch.reg[i] == reg) {
+			rdev->scratch.free[i] = true;
+			return;
+		}
+	}
+}
+
+/*
+ * MC common functions
+ */
+int radeon_mc_setup(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	/* Some chips have an "issue" with the memory controller, the
+	 * location must be aligned to the size. We just align it down,
+	 * too bad if we walk over the top of system memory, we don't
+	 * use DMA without a remapped anyway.
+	 * Affected chips are rv280, all r3xx, and all r4xx, but not IGP
+	 */
+	/* FGLRX seems to setup like this, VRAM a 0, then GART.
+	 */
+	/*
+	 * Note: from R6xx the address space is 40bits but here we only
+	 * use 32bits (still have to see a card which would exhaust 4G
+	 * address space).
+	 */
+	if (rdev->mc.vram_location != 0xFFFFFFFFUL) {
+		/* vram location was already setup try to put gtt after
+		 * if it fits */
+		tmp = rdev->mc.vram_location + rdev->mc.vram_size;
+		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
+		if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
+			rdev->mc.gtt_location = tmp;
+		} else {
+			if (rdev->mc.gtt_size >= rdev->mc.vram_location) {
+				printk(KERN_ERR "[drm] GTT too big to fit "
+				       "before or after vram location.\n");
+				return -EINVAL;
+			}
+			rdev->mc.gtt_location = 0;
+		}
+	} else if (rdev->mc.gtt_location != 0xFFFFFFFFUL) {
+		/* gtt location was already setup try to put vram before
+		 * if it fits */
+		if (rdev->mc.vram_size < rdev->mc.gtt_location) {
+			rdev->mc.vram_location = 0;
+		} else {
+			tmp = rdev->mc.gtt_location + rdev->mc.gtt_size;
+			tmp += (rdev->mc.vram_size - 1);
+			tmp &= ~(rdev->mc.vram_size - 1);
+			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.vram_size) {
+				rdev->mc.vram_location = tmp;
+			} else {
+				printk(KERN_ERR "[drm] vram too big to fit "
+				       "before or after GTT location.\n");
+				return -EINVAL;
+			}
+		}
+	} else {
+		rdev->mc.vram_location = 0;
+		rdev->mc.gtt_location = rdev->mc.vram_size;
+	}
+	DRM_INFO("radeon: VRAM %uM\n", rdev->mc.vram_size >> 20);
+	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
+		 rdev->mc.vram_location,
+		 rdev->mc.vram_location + rdev->mc.vram_size - 1);
+	DRM_INFO("radeon: GTT %uM\n", rdev->mc.gtt_size >> 20);
+	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
+		 rdev->mc.gtt_location,
+		 rdev->mc.gtt_location + rdev->mc.gtt_size - 1);
+	return 0;
+}
+
+
+/*
+ * GPU helpers function.
+ */
+static bool radeon_card_posted(struct radeon_device *rdev)
+{
+	uint32_t reg;
+
+	/* first check CRTCs */
+	if (ASIC_IS_AVIVO(rdev)) {
+		reg = RREG32(AVIVO_D1CRTC_CONTROL) |
+		      RREG32(AVIVO_D2CRTC_CONTROL);
+		if (reg & AVIVO_CRTC_EN) {
+			return true;
+		}
+	} else {
+		reg = RREG32(RADEON_CRTC_GEN_CNTL) |
+		      RREG32(RADEON_CRTC2_GEN_CNTL);
+		if (reg & RADEON_CRTC_EN) {
+			return true;
+		}
+	}
+
+	/* then check MEM_SIZE, in case the crtcs are off */
+	if (rdev->family >= CHIP_R600)
+		reg = RREG32(R600_CONFIG_MEMSIZE);
+	else
+		reg = RREG32(RADEON_CONFIG_MEMSIZE);
+
+	if (reg)
+		return true;
+
+	return false;
+
+}
+
+
+/*
+ * Registers accessors functions.
+ */
+uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
+	BUG_ON(1);
+	return 0;
+}
+
+void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
+		  reg, v);
+	BUG_ON(1);
+}
+
+void radeon_register_accessor_init(struct radeon_device *rdev)
+{
+	rdev->mm_rreg = &r100_mm_rreg;
+	rdev->mm_wreg = &r100_mm_wreg;
+	rdev->mc_rreg = &radeon_invalid_rreg;
+	rdev->mc_wreg = &radeon_invalid_wreg;
+	rdev->pll_rreg = &radeon_invalid_rreg;
+	rdev->pll_wreg = &radeon_invalid_wreg;
+	rdev->pcie_rreg = &radeon_invalid_rreg;
+	rdev->pcie_wreg = &radeon_invalid_wreg;
+	rdev->pciep_rreg = &radeon_invalid_rreg;
+	rdev->pciep_wreg = &radeon_invalid_wreg;
+
+	/* Don't change order as we are overridding accessor. */
+	if (rdev->family < CHIP_RV515) {
+		rdev->pcie_rreg = &rv370_pcie_rreg;
+		rdev->pcie_wreg = &rv370_pcie_wreg;
+	}
+	if (rdev->family >= CHIP_RV515) {
+		rdev->pcie_rreg = &rv515_pcie_rreg;
+		rdev->pcie_wreg = &rv515_pcie_wreg;
+	}
+	/* FIXME: not sure here */
+	if (rdev->family <= CHIP_R580) {
+		rdev->pll_rreg = &r100_pll_rreg;
+		rdev->pll_wreg = &r100_pll_wreg;
+	}
+	if (rdev->family >= CHIP_RV515) {
+		rdev->mc_rreg = &rv515_mc_rreg;
+		rdev->mc_wreg = &rv515_mc_wreg;
+	}
+	if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) {
+		rdev->mc_rreg = &rs400_mc_rreg;
+		rdev->mc_wreg = &rs400_mc_wreg;
+	}
+	if (rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
+		rdev->mc_rreg = &rs690_mc_rreg;
+		rdev->mc_wreg = &rs690_mc_wreg;
+	}
+	if (rdev->family == CHIP_RS600) {
+		rdev->mc_rreg = &rs600_mc_rreg;
+		rdev->mc_wreg = &rs600_mc_wreg;
+	}
+	if (rdev->family >= CHIP_R600) {
+		rdev->pciep_rreg = &r600_pciep_rreg;
+		rdev->pciep_wreg = &r600_pciep_wreg;
+	}
+}
+
+
+/*
+ * ASIC
+ */
+int radeon_asic_init(struct radeon_device *rdev)
+{
+	radeon_register_accessor_init(rdev);
+	switch (rdev->family) {
+	case CHIP_R100:
+	case CHIP_RV100:
+	case CHIP_RS100:
+	case CHIP_RV200:
+	case CHIP_RS200:
+	case CHIP_R200:
+	case CHIP_RV250:
+	case CHIP_RS300:
+	case CHIP_RV280:
+		rdev->asic = &r100_asic;
+		break;
+	case CHIP_R300:
+	case CHIP_R350:
+	case CHIP_RV350:
+	case CHIP_RV380:
+		rdev->asic = &r300_asic;
+		break;
+	case CHIP_R420:
+	case CHIP_R423:
+	case CHIP_RV410:
+		rdev->asic = &r420_asic;
+		break;
+	case CHIP_RS400:
+	case CHIP_RS480:
+		rdev->asic = &rs400_asic;
+		break;
+	case CHIP_RS600:
+		rdev->asic = &rs600_asic;
+		break;
+	case CHIP_RS690:
+	case CHIP_RS740:
+		rdev->asic = &rs690_asic;
+		break;
+	case CHIP_RV515:
+		rdev->asic = &rv515_asic;
+		break;
+	case CHIP_R520:
+	case CHIP_RV530:
+	case CHIP_RV560:
+	case CHIP_RV570:
+	case CHIP_R580:
+		rdev->asic = &r520_asic;
+		break;
+	case CHIP_R600:
+	case CHIP_RV610:
+	case CHIP_RV630:
+	case CHIP_RV620:
+	case CHIP_RV635:
+	case CHIP_RV670:
+	case CHIP_RS780:
+	case CHIP_RV770:
+	case CHIP_RV730:
+	case CHIP_RV710:
+	default:
+		/* FIXME: not supported yet */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+/*
+ * Wrapper around modesetting bits.
+ */
+int radeon_clocks_init(struct radeon_device *rdev)
+{
+	int r;
+
+	radeon_get_clock_info(rdev->ddev);
+	r = radeon_static_clocks_init(rdev->ddev);
+	if (r) {
+		return r;
+	}
+	DRM_INFO("Clocks initialized !\n");
+	return 0;
+}
+
+void radeon_clocks_fini(struct radeon_device *rdev)
+{
+}
+
+/* ATOM accessor methods */
+static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
+{
+	struct radeon_device *rdev = info->dev->dev_private;
+	uint32_t r;
+
+	r = rdev->pll_rreg(rdev, reg);
+	return r;
+}
+
+static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+	struct radeon_device *rdev = info->dev->dev_private;
+
+	rdev->pll_wreg(rdev, reg, val);
+}
+
+static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
+{
+	struct radeon_device *rdev = info->dev->dev_private;
+	uint32_t r;
+
+	r = rdev->mc_rreg(rdev, reg);
+	return r;
+}
+
+static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+	struct radeon_device *rdev = info->dev->dev_private;
+
+	rdev->mc_wreg(rdev, reg, val);
+}
+
+static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+	struct radeon_device *rdev = info->dev->dev_private;
+
+	WREG32(reg*4, val);
+}
+
+static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
+{
+	struct radeon_device *rdev = info->dev->dev_private;
+	uint32_t r;
+
+	r = RREG32(reg*4);
+	return r;
+}
+
+static struct card_info atom_card_info = {
+	.dev = NULL,
+	.reg_read = cail_reg_read,
+	.reg_write = cail_reg_write,
+	.mc_read = cail_mc_read,
+	.mc_write = cail_mc_write,
+	.pll_read = cail_pll_read,
+	.pll_write = cail_pll_write,
+};
+
+int radeon_atombios_init(struct radeon_device *rdev)
+{
+	atom_card_info.dev = rdev->ddev;
+	rdev->mode_info.atom_context = atom_parse(&atom_card_info, rdev->bios);
+	radeon_atom_initialize_bios_scratch_regs(rdev->ddev);
+	return 0;
+}
+
+void radeon_atombios_fini(struct radeon_device *rdev)
+{
+	kfree(rdev->mode_info.atom_context);
+}
+
+int radeon_combios_init(struct radeon_device *rdev)
+{
+	radeon_combios_initialize_bios_scratch_regs(rdev->ddev);
+	return 0;
+}
+
+void radeon_combios_fini(struct radeon_device *rdev)
+{
+}
+
+int radeon_modeset_init(struct radeon_device *rdev);
+void radeon_modeset_fini(struct radeon_device *rdev);
+
+
+/*
+ * Radeon device.
+ */
+int radeon_device_init(struct radeon_device *rdev,
+		       struct drm_device *ddev,
+		       struct pci_dev *pdev,
+		       uint32_t flags)
+{
+	int r, ret;
+
+	DRM_INFO("radeon: Initializing kernel modesetting.\n");
+	rdev->shutdown = false;
+	rdev->ddev = ddev;
+	rdev->pdev = pdev;
+	rdev->flags = flags;
+	rdev->family = flags & RADEON_FAMILY_MASK;
+	rdev->is_atom_bios = false;
+	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
+	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+	rdev->gpu_lockup = false;
+	/* mutex initialization are all done here so we
+	 * can recall function without having locking issues */
+	mutex_init(&rdev->cs_mutex);
+	mutex_init(&rdev->ib_pool.mutex);
+	mutex_init(&rdev->cp.mutex);
+	rwlock_init(&rdev->fence_drv.lock);
+
+	if (radeon_agpmode == -1) {
+		rdev->flags &= ~RADEON_IS_AGP;
+		if (rdev->family > CHIP_RV515 ||
+		    rdev->family == CHIP_RV380 ||
+		    rdev->family == CHIP_RV410 ||
+		    rdev->family == CHIP_R423) {
+			DRM_INFO("Forcing AGP to PCIE mode\n");
+			rdev->flags |= RADEON_IS_PCIE;
+		} else {
+			DRM_INFO("Forcing AGP to PCI mode\n");
+			rdev->flags |= RADEON_IS_PCI;
+		}
+	}
+
+	/* Set asic functions */
+	r = radeon_asic_init(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Report DMA addressing limitation */
+	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(32));
+	if (r) {
+		printk(KERN_WARNING "radeon: No suitable DMA available.\n");
+	}
+
+	/* Registers mapping */
+	/* TODO: block userspace mapping of io register */
+	rdev->rmmio_base = drm_get_resource_start(rdev->ddev, 2);
+	rdev->rmmio_size = drm_get_resource_len(rdev->ddev, 2);
+	rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);
+	if (rdev->rmmio == NULL) {
+		return -ENOMEM;
+	}
+	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)rdev->rmmio_base);
+	DRM_INFO("register mmio size: %u\n", (unsigned)rdev->rmmio_size);
+
+	/* Setup errata flags */
+	radeon_errata(rdev);
+	/* Initialize scratch registers */
+	radeon_scratch_init(rdev);
+
+	/* TODO: disable VGA need to use VGA request */
+	/* BIOS*/
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	if (rdev->is_atom_bios) {
+		r = radeon_atombios_init(rdev);
+		if (r) {
+			return r;
+		}
+	} else {
+		r = radeon_combios_init(rdev);
+		if (r) {
+			return r;
+		}
+	}
+	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
+	if (radeon_gpu_reset(rdev)) {
+		/* FIXME: what do we want to do here ? */
+	}
+	/* check if cards are posted or not */
+	if (!radeon_card_posted(rdev) && rdev->bios) {
+		DRM_INFO("GPU not posted. posting now...\n");
+		if (rdev->is_atom_bios) {
+			atom_asic_init(rdev->mode_info.atom_context);
+		} else {
+			radeon_combios_asic_init(rdev->ddev);
+		}
+	}
+	/* Get vram informations */
+	radeon_vram_info(rdev);
+	/* Device is severly broken if aper size > vram size.
+	 * for RN50/M6/M7 - Novell bug 204882 ?
+	 */
+	if (rdev->mc.vram_size < rdev->mc.aper_size) {
+		rdev->mc.aper_size = rdev->mc.vram_size;
+	}
+	/* Add an MTRR for the VRAM */
+	rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
+				      MTRR_TYPE_WRCOMB, 1);
+	DRM_INFO("Detected VRAM RAM=%uM, BAR=%uM\n",
+		 rdev->mc.vram_size >> 20,
+		 (unsigned)rdev->mc.aper_size >> 20);
+	DRM_INFO("RAM width %dbits %cDR\n",
+		 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
+	/* Initialize clocks */
+	r = radeon_clocks_init(rdev);
+	if (r) {
+		return r;
+	}
+	/* Initialize memory controller (also test AGP) */
+	r = radeon_mc_init(rdev);
+	if (r) {
+		return r;
+	}
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r) {
+		return r;
+	}
+	r = radeon_irq_kms_init(rdev);
+	if (r) {
+		return r;
+	}
+	/* Memory manager */
+	r = radeon_object_init(rdev);
+	if (r) {
+		return r;
+	}
+	/* Initialize GART (initialize after TTM so we can allocate
+	 * memory through TTM but finalize after TTM) */
+	r = radeon_gart_enable(rdev);
+	if (!r) {
+		r = radeon_gem_init(rdev);
+	}
+
+	/* 1M ring buffer */
+	if (!r) {
+		r = radeon_cp_init(rdev, 1024 * 1024);
+	}
+	if (!r) {
+		r = radeon_wb_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
+			return r;
+		}
+	}
+	if (!r) {
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+			return r;
+		}
+	}
+	if (!r) {
+		r = radeon_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			return r;
+		}
+	}
+	ret = r;
+	r = radeon_modeset_init(rdev);
+	if (r) {
+		return r;
+	}
+	if (rdev->fbdev_rfb && rdev->fbdev_rfb->obj) {
+		rdev->fbdev_robj = rdev->fbdev_rfb->obj->driver_private;
+	}
+	if (!ret) {
+		DRM_INFO("radeon: kernel modesetting successfully initialized.\n");
+	}
+	if (radeon_benchmarking) {
+		radeon_benchmark(rdev);
+	}
+	return ret;
+}
+
+void radeon_device_fini(struct radeon_device *rdev)
+{
+	if (rdev == NULL || rdev->rmmio == NULL) {
+		return;
+	}
+	DRM_INFO("radeon: finishing device.\n");
+	rdev->shutdown = true;
+	/* Order matter so becarefull if you rearrange anythings */
+	radeon_modeset_fini(rdev);
+	radeon_ib_pool_fini(rdev);
+	radeon_cp_fini(rdev);
+	radeon_wb_fini(rdev);
+	radeon_gem_fini(rdev);
+	radeon_object_fini(rdev);
+	/* mc_fini must be after object_fini */
+	radeon_mc_fini(rdev);
+#if __OS_HAS_AGP
+	radeon_agp_fini(rdev);
+#endif
+	radeon_irq_kms_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+	if (rdev->is_atom_bios) {
+		radeon_atombios_fini(rdev);
+	} else {
+		radeon_combios_fini(rdev);
+	}
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	iounmap(rdev->rmmio);
+	rdev->rmmio = NULL;
+}
+
+
+/*
+ * Suspend & resume.
+ */
+int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_crtc *crtc;
+
+	if (dev == NULL || rdev == NULL) {
+		return -ENODEV;
+	}
+	if (state.event == PM_EVENT_PRETHAW) {
+		return 0;
+	}
+	/* unpin the front buffers */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->fb);
+		struct radeon_object *robj;
+
+		if (rfb == NULL || rfb->obj == NULL) {
+			continue;
+		}
+		robj = rfb->obj->driver_private;
+		if (robj != rdev->fbdev_robj) {
+			radeon_object_unpin(robj);
+		}
+	}
+	/* evict vram memory */
+	radeon_object_evict_vram(rdev);
+	/* wait for gpu to finish processing current batch */
+	radeon_fence_wait_last(rdev);
+
+	radeon_cp_disable(rdev);
+	radeon_gart_disable(rdev);
+
+	/* evict remaining vram memory */
+	radeon_object_evict_vram(rdev);
+
+	rdev->irq.sw_int = false;
+	radeon_irq_set(rdev);
+
+	pci_save_state(dev->pdev);
+	if (state.event == PM_EVENT_SUSPEND) {
+		/* Shut down the device */
+		pci_disable_device(dev->pdev);
+		pci_set_power_state(dev->pdev, PCI_D3hot);
+	}
+	acquire_console_sem();
+	fb_set_suspend(rdev->fbdev_info, 1);
+	release_console_sem();
+	return 0;
+}
+
+int radeon_resume_kms(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	int r;
+
+	acquire_console_sem();
+	pci_set_power_state(dev->pdev, PCI_D0);
+	pci_restore_state(dev->pdev);
+	if (pci_enable_device(dev->pdev)) {
+		release_console_sem();
+		return -1;
+	}
+	pci_set_master(dev->pdev);
+	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
+	if (radeon_gpu_reset(rdev)) {
+		/* FIXME: what do we want to do here ? */
+	}
+	/* post card */
+	if (rdev->is_atom_bios) {
+		atom_asic_init(rdev->mode_info.atom_context);
+	} else {
+		radeon_combios_asic_init(rdev->ddev);
+	}
+	/* Initialize clocks */
+	r = radeon_clocks_init(rdev);
+	if (r) {
+		release_console_sem();
+		return r;
+	}
+	/* Enable IRQ */
+	rdev->irq.sw_int = true;
+	radeon_irq_set(rdev);
+	/* Initialize GPU Memory Controller */
+	r = radeon_mc_init(rdev);
+	if (r) {
+		goto out;
+	}
+	r = radeon_gart_enable(rdev);
+	if (r) {
+		goto out;
+	}
+	r = radeon_cp_init(rdev, rdev->cp.ring_size);
+	if (r) {
+		goto out;
+	}
+out:
+	fb_set_suspend(rdev->fbdev_info, 0);
+	release_console_sem();
+
+	/* blat the mode back in */
+	drm_helper_resume_force_mode(dev);
+	return 0;
+}
+
+
+/*
+ * Debugfs
+ */
+struct radeon_debugfs {
+	struct drm_info_list	*files;
+	unsigned		num_files;
+};
+static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_NUM_FILES];
+static unsigned _radeon_debugfs_count = 0;
+
+int radeon_debugfs_add_files(struct radeon_device *rdev,
+			     struct drm_info_list *files,
+			     unsigned nfiles)
+{
+	unsigned i;
+
+	for (i = 0; i < _radeon_debugfs_count; i++) {
+		if (_radeon_debugfs[i].files == files) {
+			/* Already registered */
+			return 0;
+		}
+	}
+	if ((_radeon_debugfs_count + nfiles) > RADEON_DEBUGFS_MAX_NUM_FILES) {
+		DRM_ERROR("Reached maximum number of debugfs files.\n");
+		DRM_ERROR("Report so we increase RADEON_DEBUGFS_MAX_NUM_FILES.\n");
+		return -EINVAL;
+	}
+	_radeon_debugfs[_radeon_debugfs_count].files = files;
+	_radeon_debugfs[_radeon_debugfs_count].num_files = nfiles;
+	_radeon_debugfs_count++;
+#if defined(CONFIG_DEBUG_FS)
+	drm_debugfs_create_files(files, nfiles,
+				 rdev->ddev->control->debugfs_root,
+				 rdev->ddev->control);
+	drm_debugfs_create_files(files, nfiles,
+				 rdev->ddev->primary->debugfs_root,
+				 rdev->ddev->primary);
+#endif
+	return 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+int radeon_debugfs_init(struct drm_minor *minor)
+{
+	return 0;
+}
+
+void radeon_debugfs_cleanup(struct drm_minor *minor)
+{
+	unsigned i;
+
+	for (i = 0; i < _radeon_debugfs_count; i++) {
+		drm_debugfs_remove_files(_radeon_debugfs[i].files,
+					 _radeon_debugfs[i].num_files, minor);
+	}
+}
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
new file mode 100644
index 00000000000..5452bb9d925
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#include "atom.h"
+#include <asm/div64.h>
+
+#include "drm_crtc_helper.h"
+#include "drm_edid.h"
+
+static int radeon_ddc_dump(struct drm_connector *connector);
+
+static void avivo_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int i;
+
+	DRM_DEBUG("%d\n", radeon_crtc->crtc_id);
+	WREG32(AVIVO_DC_LUTA_CONTROL + radeon_crtc->crtc_offset, 0);
+
+	WREG32(AVIVO_DC_LUTA_BLACK_OFFSET_BLUE + radeon_crtc->crtc_offset, 0);
+	WREG32(AVIVO_DC_LUTA_BLACK_OFFSET_GREEN + radeon_crtc->crtc_offset, 0);
+	WREG32(AVIVO_DC_LUTA_BLACK_OFFSET_RED + radeon_crtc->crtc_offset, 0);
+
+	WREG32(AVIVO_DC_LUTA_WHITE_OFFSET_BLUE + radeon_crtc->crtc_offset, 0xffff);
+	WREG32(AVIVO_DC_LUTA_WHITE_OFFSET_GREEN + radeon_crtc->crtc_offset, 0xffff);
+	WREG32(AVIVO_DC_LUTA_WHITE_OFFSET_RED + radeon_crtc->crtc_offset, 0xffff);
+
+	WREG32(AVIVO_DC_LUT_RW_SELECT, radeon_crtc->crtc_id);
+	WREG32(AVIVO_DC_LUT_RW_MODE, 0);
+	WREG32(AVIVO_DC_LUT_WRITE_EN_MASK, 0x0000003f);
+
+	WREG8(AVIVO_DC_LUT_RW_INDEX, 0);
+	for (i = 0; i < 256; i++) {
+		WREG32(AVIVO_DC_LUT_30_COLOR,
+			     (radeon_crtc->lut_r[i] << 20) |
+			     (radeon_crtc->lut_g[i] << 10) |
+			     (radeon_crtc->lut_b[i] << 0));
+	}
+
+	WREG32(AVIVO_D1GRPH_LUT_SEL + radeon_crtc->crtc_offset, radeon_crtc->crtc_id);
+}
+
+static void legacy_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int i;
+	uint32_t dac2_cntl;
+
+	dac2_cntl = RREG32(RADEON_DAC_CNTL2);
+	if (radeon_crtc->crtc_id == 0)
+		dac2_cntl &= (uint32_t)~RADEON_DAC2_PALETTE_ACC_CTL;
+	else
+		dac2_cntl |= RADEON_DAC2_PALETTE_ACC_CTL;
+	WREG32(RADEON_DAC_CNTL2, dac2_cntl);
+
+	WREG8(RADEON_PALETTE_INDEX, 0);
+	for (i = 0; i < 256; i++) {
+		WREG32(RADEON_PALETTE_30_DATA,
+			     (radeon_crtc->lut_r[i] << 20) |
+			     (radeon_crtc->lut_g[i] << 10) |
+			     (radeon_crtc->lut_b[i] << 0));
+	}
+}
+
+void radeon_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	if (!crtc->enabled)
+		return;
+
+	if (ASIC_IS_AVIVO(rdev))
+		avivo_crtc_load_lut(crtc);
+	else
+		legacy_crtc_load_lut(crtc);
+}
+
+/** Sets the color ramps on behalf of RandR */
+void radeon_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
+			      u16 blue, int regno)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
+	if (regno == 0)
+		DRM_DEBUG("gamma set %d\n", radeon_crtc->crtc_id);
+	radeon_crtc->lut_r[regno] = red >> 6;
+	radeon_crtc->lut_g[regno] = green >> 6;
+	radeon_crtc->lut_b[regno] = blue >> 6;
+}
+
+static void radeon_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				  u16 *blue, uint32_t size)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	int i, j;
+
+	if (size != 256) {
+		return;
+	}
+	if (crtc->fb == NULL) {
+		return;
+	}
+
+	if (crtc->fb->depth == 16) {
+		for (i = 0; i < 64; i++) {
+			if (i <= 31) {
+				for (j = 0; j < 8; j++) {
+					radeon_crtc->lut_r[i * 8 + j] = red[i] >> 6;
+					radeon_crtc->lut_b[i * 8 + j] = blue[i] >> 6;
+				}
+			}
+			for (j = 0; j < 4; j++)
+				radeon_crtc->lut_g[i * 4 + j] = green[i] >> 6;
+		}
+	} else {
+		for (i = 0; i < 256; i++) {
+			radeon_crtc->lut_r[i] = red[i] >> 6;
+			radeon_crtc->lut_g[i] = green[i] >> 6;
+			radeon_crtc->lut_b[i] = blue[i] >> 6;
+		}
+	}
+
+	radeon_crtc_load_lut(crtc);
+}
+
+static void radeon_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
+	if (radeon_crtc->mode_set.mode) {
+		drm_mode_destroy(crtc->dev, radeon_crtc->mode_set.mode);
+	}
+	drm_crtc_cleanup(crtc);
+	kfree(radeon_crtc);
+}
+
+static const struct drm_crtc_funcs radeon_crtc_funcs = {
+	.cursor_set = radeon_crtc_cursor_set,
+	.cursor_move = radeon_crtc_cursor_move,
+	.gamma_set = radeon_crtc_gamma_set,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = radeon_crtc_destroy,
+};
+
+static void radeon_crtc_init(struct drm_device *dev, int index)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc;
+	int i;
+
+	radeon_crtc = kzalloc(sizeof(struct radeon_crtc) + (RADEONFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+	if (radeon_crtc == NULL)
+		return;
+
+	drm_crtc_init(dev, &radeon_crtc->base, &radeon_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
+	radeon_crtc->crtc_id = index;
+
+	radeon_crtc->mode_set.crtc = &radeon_crtc->base;
+	radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1);
+	radeon_crtc->mode_set.num_connectors = 0;
+
+	for (i = 0; i < 256; i++) {
+		radeon_crtc->lut_r[i] = i << 2;
+		radeon_crtc->lut_g[i] = i << 2;
+		radeon_crtc->lut_b[i] = i << 2;
+	}
+
+	if (rdev->is_atom_bios && (ASIC_IS_AVIVO(rdev) || radeon_r4xx_atom))
+		radeon_atombios_init_crtc(dev, radeon_crtc);
+	else
+		radeon_legacy_init_crtc(dev, radeon_crtc);
+}
+
+static const char *encoder_names[34] = {
+	"NONE",
+	"INTERNAL_LVDS",
+	"INTERNAL_TMDS1",
+	"INTERNAL_TMDS2",
+	"INTERNAL_DAC1",
+	"INTERNAL_DAC2",
+	"INTERNAL_SDVOA",
+	"INTERNAL_SDVOB",
+	"SI170B",
+	"CH7303",
+	"CH7301",
+	"INTERNAL_DVO1",
+	"EXTERNAL_SDVOA",
+	"EXTERNAL_SDVOB",
+	"TITFP513",
+	"INTERNAL_LVTM1",
+	"VT1623",
+	"HDMI_SI1930",
+	"HDMI_INTERNAL",
+	"INTERNAL_KLDSCP_TMDS1",
+	"INTERNAL_KLDSCP_DVO1",
+	"INTERNAL_KLDSCP_DAC1",
+	"INTERNAL_KLDSCP_DAC2",
+	"SI178",
+	"MVPU_FPGA",
+	"INTERNAL_DDI",
+	"VT1625",
+	"HDMI_SI1932",
+	"DP_AN9801",
+	"DP_DP501",
+	"INTERNAL_UNIPHY",
+	"INTERNAL_KLDSCP_LVTMA",
+	"INTERNAL_UNIPHY1",
+	"INTERNAL_UNIPHY2",
+};
+
+static const char *connector_names[13] = {
+	"Unknown",
+	"VGA",
+	"DVI-I",
+	"DVI-D",
+	"DVI-A",
+	"Composite",
+	"S-video",
+	"LVDS",
+	"Component",
+	"DIN",
+	"DisplayPort",
+	"HDMI-A",
+	"HDMI-B",
+};
+
+static void radeon_print_display_setup(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+	uint32_t devices;
+	int i = 0;
+
+	DRM_INFO("Radeon Display Connectors\n");
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		radeon_connector = to_radeon_connector(connector);
+		DRM_INFO("Connector %d:\n", i);
+		DRM_INFO("  %s\n", connector_names[connector->connector_type]);
+		if (radeon_connector->ddc_bus)
+			DRM_INFO("  DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+				 radeon_connector->ddc_bus->rec.mask_clk_reg,
+				 radeon_connector->ddc_bus->rec.mask_data_reg,
+				 radeon_connector->ddc_bus->rec.a_clk_reg,
+				 radeon_connector->ddc_bus->rec.a_data_reg,
+				 radeon_connector->ddc_bus->rec.put_clk_reg,
+				 radeon_connector->ddc_bus->rec.put_data_reg,
+				 radeon_connector->ddc_bus->rec.get_clk_reg,
+				 radeon_connector->ddc_bus->rec.get_data_reg);
+		DRM_INFO("  Encoders:\n");
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			radeon_encoder = to_radeon_encoder(encoder);
+			devices = radeon_encoder->devices & radeon_connector->devices;
+			if (devices) {
+				if (devices & ATOM_DEVICE_CRT1_SUPPORT)
+					DRM_INFO("    CRT1: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_CRT2_SUPPORT)
+					DRM_INFO("    CRT2: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_LCD1_SUPPORT)
+					DRM_INFO("    LCD1: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP1_SUPPORT)
+					DRM_INFO("    DFP1: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP2_SUPPORT)
+					DRM_INFO("    DFP2: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP3_SUPPORT)
+					DRM_INFO("    DFP3: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP4_SUPPORT)
+					DRM_INFO("    DFP4: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP5_SUPPORT)
+					DRM_INFO("    DFP5: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_TV1_SUPPORT)
+					DRM_INFO("    TV1: %s\n", encoder_names[radeon_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_CV_SUPPORT)
+					DRM_INFO("    CV: %s\n", encoder_names[radeon_encoder->encoder_id]);
+			}
+		}
+		i++;
+	}
+}
+
+bool radeon_setup_enc_conn(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_connector *drm_connector;
+	bool ret = false;
+
+	if (rdev->bios) {
+		if (rdev->is_atom_bios) {
+			if (rdev->family >= CHIP_R600)
+				ret = radeon_get_atom_connector_info_from_object_table(dev);
+			else
+				ret = radeon_get_atom_connector_info_from_supported_devices_table(dev);
+		} else
+			ret = radeon_get_legacy_connector_info_from_bios(dev);
+	} else {
+		if (!ASIC_IS_AVIVO(rdev))
+			ret = radeon_get_legacy_connector_info_from_table(dev);
+	}
+	if (ret) {
+		radeon_print_display_setup(dev);
+		list_for_each_entry(drm_connector, &dev->mode_config.connector_list, head)
+			radeon_ddc_dump(drm_connector);
+	}
+
+	return ret;
+}
+
+int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
+{
+	struct edid *edid;
+	int ret = 0;
+
+	if (!radeon_connector->ddc_bus)
+		return -1;
+	radeon_i2c_do_lock(radeon_connector, 1);
+	edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
+	radeon_i2c_do_lock(radeon_connector, 0);
+	if (edid) {
+		/* update digital bits here */
+		if (edid->digital)
+			radeon_connector->use_digital = 1;
+		else
+			radeon_connector->use_digital = 0;
+		drm_mode_connector_update_edid_property(&radeon_connector->base, edid);
+		ret = drm_add_edid_modes(&radeon_connector->base, edid);
+		kfree(edid);
+		return ret;
+	}
+	drm_mode_connector_update_edid_property(&radeon_connector->base, NULL);
+	return -1;
+}
+
+static int radeon_ddc_dump(struct drm_connector *connector)
+{
+	struct edid *edid;
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	int ret = 0;
+
+	if (!radeon_connector->ddc_bus)
+		return -1;
+	radeon_i2c_do_lock(radeon_connector, 1);
+	edid = drm_get_edid(connector, &radeon_connector->ddc_bus->adapter);
+	radeon_i2c_do_lock(radeon_connector, 0);
+	if (edid) {
+		kfree(edid);
+	}
+	return ret;
+}
+
+static inline uint32_t radeon_div(uint64_t n, uint32_t d)
+{
+	uint64_t mod;
+
+	n += d / 2;
+
+	mod = do_div(n, d);
+	return n;
+}
+
+void radeon_compute_pll(struct radeon_pll *pll,
+			uint64_t freq,
+			uint32_t *dot_clock_p,
+			uint32_t *fb_div_p,
+			uint32_t *frac_fb_div_p,
+			uint32_t *ref_div_p,
+			uint32_t *post_div_p,
+			int flags)
+{
+	uint32_t min_ref_div = pll->min_ref_div;
+	uint32_t max_ref_div = pll->max_ref_div;
+	uint32_t min_fractional_feed_div = 0;
+	uint32_t max_fractional_feed_div = 0;
+	uint32_t best_vco = pll->best_vco;
+	uint32_t best_post_div = 1;
+	uint32_t best_ref_div = 1;
+	uint32_t best_feedback_div = 1;
+	uint32_t best_frac_feedback_div = 0;
+	uint32_t best_freq = -1;
+	uint32_t best_error = 0xffffffff;
+	uint32_t best_vco_diff = 1;
+	uint32_t post_div;
+
+	DRM_DEBUG("PLL freq %llu %u %u\n", freq, pll->min_ref_div, pll->max_ref_div);
+	freq = freq * 1000;
+
+	if (flags & RADEON_PLL_USE_REF_DIV)
+		min_ref_div = max_ref_div = pll->reference_div;
+	else {
+		while (min_ref_div < max_ref_div-1) {
+			uint32_t mid = (min_ref_div + max_ref_div) / 2;
+			uint32_t pll_in = pll->reference_freq / mid;
+			if (pll_in < pll->pll_in_min)
+				max_ref_div = mid;
+			else if (pll_in > pll->pll_in_max)
+				min_ref_div = mid;
+			else
+				break;
+		}
+	}
+
+	if (flags & RADEON_PLL_USE_FRAC_FB_DIV) {
+		min_fractional_feed_div = pll->min_frac_feedback_div;
+		max_fractional_feed_div = pll->max_frac_feedback_div;
+	}
+
+	for (post_div = pll->min_post_div; post_div <= pll->max_post_div; ++post_div) {
+		uint32_t ref_div;
+
+		if ((flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
+			continue;
+
+		/* legacy radeons only have a few post_divs */
+		if (flags & RADEON_PLL_LEGACY) {
+			if ((post_div == 5) ||
+			    (post_div == 7) ||
+			    (post_div == 9) ||
+			    (post_div == 10) ||
+			    (post_div == 11) ||
+			    (post_div == 13) ||
+			    (post_div == 14) ||
+			    (post_div == 15))
+				continue;
+		}
+
+		for (ref_div = min_ref_div; ref_div <= max_ref_div; ++ref_div) {
+			uint32_t feedback_div, current_freq = 0, error, vco_diff;
+			uint32_t pll_in = pll->reference_freq / ref_div;
+			uint32_t min_feed_div = pll->min_feedback_div;
+			uint32_t max_feed_div = pll->max_feedback_div + 1;
+
+			if (pll_in < pll->pll_in_min || pll_in > pll->pll_in_max)
+				continue;
+
+			while (min_feed_div < max_feed_div) {
+				uint32_t vco;
+				uint32_t min_frac_feed_div = min_fractional_feed_div;
+				uint32_t max_frac_feed_div = max_fractional_feed_div + 1;
+				uint32_t frac_feedback_div;
+				uint64_t tmp;
+
+				feedback_div = (min_feed_div + max_feed_div) / 2;
+
+				tmp = (uint64_t)pll->reference_freq * feedback_div;
+				vco = radeon_div(tmp, ref_div);
+
+				if (vco < pll->pll_out_min) {
+					min_feed_div = feedback_div + 1;
+					continue;
+				} else if (vco > pll->pll_out_max) {
+					max_feed_div = feedback_div;
+					continue;
+				}
+
+				while (min_frac_feed_div < max_frac_feed_div) {
+					frac_feedback_div = (min_frac_feed_div + max_frac_feed_div) / 2;
+					tmp = (uint64_t)pll->reference_freq * 10000 * feedback_div;
+					tmp += (uint64_t)pll->reference_freq * 1000 * frac_feedback_div;
+					current_freq = radeon_div(tmp, ref_div * post_div);
+
+					error = abs(current_freq - freq);
+					vco_diff = abs(vco - best_vco);
+
+					if ((best_vco == 0 && error < best_error) ||
+					    (best_vco != 0 &&
+					     (error < best_error - 100 ||
+					      (abs(error - best_error) < 100 && vco_diff < best_vco_diff)))) {
+						best_post_div = post_div;
+						best_ref_div = ref_div;
+						best_feedback_div = feedback_div;
+						best_frac_feedback_div = frac_feedback_div;
+						best_freq = current_freq;
+						best_error = error;
+						best_vco_diff = vco_diff;
+					} else if (current_freq == freq) {
+						if (best_freq == -1) {
+							best_post_div = post_div;
+							best_ref_div = ref_div;
+							best_feedback_div = feedback_div;
+							best_frac_feedback_div = frac_feedback_div;
+							best_freq = current_freq;
+							best_error = error;
+							best_vco_diff = vco_diff;
+						} else if (((flags & RADEON_PLL_PREFER_LOW_REF_DIV) && (ref_div < best_ref_div)) ||
+							   ((flags & RADEON_PLL_PREFER_HIGH_REF_DIV) && (ref_div > best_ref_div)) ||
+							   ((flags & RADEON_PLL_PREFER_LOW_FB_DIV) && (feedback_div < best_feedback_div)) ||
+							   ((flags & RADEON_PLL_PREFER_HIGH_FB_DIV) && (feedback_div > best_feedback_div)) ||
+							   ((flags & RADEON_PLL_PREFER_LOW_POST_DIV) && (post_div < best_post_div)) ||
+							   ((flags & RADEON_PLL_PREFER_HIGH_POST_DIV) && (post_div > best_post_div))) {
+							best_post_div = post_div;
+							best_ref_div = ref_div;
+							best_feedback_div = feedback_div;
+							best_frac_feedback_div = frac_feedback_div;
+							best_freq = current_freq;
+							best_error = error;
+							best_vco_diff = vco_diff;
+						}
+					}
+					if (current_freq < freq)
+						min_frac_feed_div = frac_feedback_div + 1;
+					else
+						max_frac_feed_div = frac_feedback_div;
+				}
+				if (current_freq < freq)
+					min_feed_div = feedback_div + 1;
+				else
+					max_feed_div = feedback_div;
+			}
+		}
+	}
+
+	*dot_clock_p = best_freq / 10000;
+	*fb_div_p = best_feedback_div;
+	*frac_fb_div_p = best_frac_feedback_div;
+	*ref_div_p = best_ref_div;
+	*post_div_p = best_post_div;
+}
+
+static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
+{
+	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
+	struct drm_device *dev = fb->dev;
+
+	if (fb->fbdev)
+		radeonfb_remove(dev, fb);
+
+	if (radeon_fb->obj) {
+		radeon_gem_object_unpin(radeon_fb->obj);
+		mutex_lock(&dev->struct_mutex);
+		drm_gem_object_unreference(radeon_fb->obj);
+		mutex_unlock(&dev->struct_mutex);
+	}
+	drm_framebuffer_cleanup(fb);
+	kfree(radeon_fb);
+}
+
+static int radeon_user_framebuffer_create_handle(struct drm_framebuffer *fb,
+						  struct drm_file *file_priv,
+						  unsigned int *handle)
+{
+	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
+
+	return drm_gem_handle_create(file_priv, radeon_fb->obj, handle);
+}
+
+static const struct drm_framebuffer_funcs radeon_fb_funcs = {
+	.destroy = radeon_user_framebuffer_destroy,
+	.create_handle = radeon_user_framebuffer_create_handle,
+};
+
+struct drm_framebuffer *
+radeon_framebuffer_create(struct drm_device *dev,
+			  struct drm_mode_fb_cmd *mode_cmd,
+			  struct drm_gem_object *obj)
+{
+	struct radeon_framebuffer *radeon_fb;
+
+	radeon_fb = kzalloc(sizeof(*radeon_fb), GFP_KERNEL);
+	if (radeon_fb == NULL) {
+		return NULL;
+	}
+	drm_framebuffer_init(dev, &radeon_fb->base, &radeon_fb_funcs);
+	drm_helper_mode_fill_fb_struct(&radeon_fb->base, mode_cmd);
+	radeon_fb->obj = obj;
+	return &radeon_fb->base;
+}
+
+static struct drm_framebuffer *
+radeon_user_framebuffer_create(struct drm_device *dev,
+			       struct drm_file *file_priv,
+			       struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct drm_gem_object *obj;
+
+	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);
+
+	return radeon_framebuffer_create(dev, mode_cmd, obj);
+}
+
+static const struct drm_mode_config_funcs radeon_mode_funcs = {
+	.fb_create = radeon_user_framebuffer_create,
+	.fb_changed = radeonfb_probe,
+};
+
+int radeon_modeset_init(struct radeon_device *rdev)
+{
+	int num_crtc = 2, i;
+	int ret;
+
+	drm_mode_config_init(rdev->ddev);
+	rdev->mode_info.mode_config_initialized = true;
+
+	rdev->ddev->mode_config.funcs = (void *)&radeon_mode_funcs;
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		rdev->ddev->mode_config.max_width = 8192;
+		rdev->ddev->mode_config.max_height = 8192;
+	} else {
+		rdev->ddev->mode_config.max_width = 4096;
+		rdev->ddev->mode_config.max_height = 4096;
+	}
+
+	rdev->ddev->mode_config.fb_base = rdev->mc.aper_base;
+
+	/* allocate crtcs - TODO single crtc */
+	for (i = 0; i < num_crtc; i++) {
+		radeon_crtc_init(rdev->ddev, i);
+	}
+
+	/* okay we should have all the bios connectors */
+	ret = radeon_setup_enc_conn(rdev->ddev);
+	if (!ret) {
+		return ret;
+	}
+	drm_helper_initial_config(rdev->ddev);
+	return 0;
+}
+
+void radeon_modeset_fini(struct radeon_device *rdev)
+{
+	if (rdev->mode_info.mode_config_initialized) {
+		drm_mode_config_cleanup(rdev->ddev);
+		rdev->mode_info.mode_config_initialized = false;
+	}
+}
+
+void radeon_init_disp_bandwidth(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_display_mode *modes[2];
+	int pixel_bytes[2];
+	struct drm_crtc *crtc;
+
+	pixel_bytes[0] = pixel_bytes[1] = 0;
+	modes[0] = modes[1] = NULL;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
+		if (crtc->enabled && crtc->fb) {
+			modes[radeon_crtc->crtc_id] = &crtc->mode;
+			pixel_bytes[radeon_crtc->crtc_id] = crtc->fb->bits_per_pixel / 8;
+		}
+	}
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		radeon_init_disp_bw_avivo(dev,
+					  modes[0],
+					  pixel_bytes[0],
+					  modes[1],
+					  pixel_bytes[1]);
+	} else {
+		radeon_init_disp_bw_legacy(dev,
+					   modes[0],
+					   pixel_bytes[0],
+					   modes[1],
+					   pixel_bytes[1]);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 13a60f4d422..f70c351bb43 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -35,12 +35,92 @@
 #include "radeon_drv.h"
 
 #include "drm_pciids.h"
+#include <linux/console.h>
+
+
+#if defined(CONFIG_DRM_RADEON_KMS)
+/*
+ * KMS wrapper.
+ */
+#define KMS_DRIVER_MAJOR	2
+#define KMS_DRIVER_MINOR	0
+#define KMS_DRIVER_PATCHLEVEL	0
+int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
+int radeon_driver_unload_kms(struct drm_device *dev);
+int radeon_driver_firstopen_kms(struct drm_device *dev);
+void radeon_driver_lastclose_kms(struct drm_device *dev);
+int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
+void radeon_driver_postclose_kms(struct drm_device *dev,
+				 struct drm_file *file_priv);
+void radeon_driver_preclose_kms(struct drm_device *dev,
+				struct drm_file *file_priv);
+int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
+int radeon_resume_kms(struct drm_device *dev);
+u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc);
+int radeon_enable_vblank_kms(struct drm_device *dev, int crtc);
+void radeon_disable_vblank_kms(struct drm_device *dev, int crtc);
+void radeon_driver_irq_preinstall_kms(struct drm_device *dev);
+int radeon_driver_irq_postinstall_kms(struct drm_device *dev);
+void radeon_driver_irq_uninstall_kms(struct drm_device *dev);
+irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS);
+int radeon_master_create_kms(struct drm_device *dev, struct drm_master *master);
+void radeon_master_destroy_kms(struct drm_device *dev,
+			       struct drm_master *master);
+int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int radeon_gem_object_init(struct drm_gem_object *obj);
+void radeon_gem_object_free(struct drm_gem_object *obj);
+extern struct drm_ioctl_desc radeon_ioctls_kms[];
+extern int radeon_max_kms_ioctl;
+int radeon_mmap(struct file *filp, struct vm_area_struct *vma);
+#if defined(CONFIG_DEBUG_FS)
+int radeon_debugfs_init(struct drm_minor *minor);
+void radeon_debugfs_cleanup(struct drm_minor *minor);
+#endif
+#endif
+
 
 int radeon_no_wb;
+#if defined(CONFIG_DRM_RADEON_KMS)
+int radeon_modeset = -1;
+int radeon_dynclks = -1;
+int radeon_r4xx_atom = 0;
+int radeon_agpmode = 0;
+int radeon_vram_limit = 0;
+int radeon_gart_size = 512; /* default gart size */
+int radeon_benchmarking = 0;
+int radeon_connector_table = 0;
+#endif
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
 
+#if defined(CONFIG_DRM_RADEON_KMS)
+MODULE_PARM_DESC(modeset, "Disable/Enable modesetting");
+module_param_named(modeset, radeon_modeset, int, 0400);
+
+MODULE_PARM_DESC(dynclks, "Disable/Enable dynamic clocks");
+module_param_named(dynclks, radeon_dynclks, int, 0444);
+
+MODULE_PARM_DESC(r4xx_atom, "Enable ATOMBIOS modesetting for R4xx");
+module_param_named(r4xx_atom, radeon_r4xx_atom, int, 0444);
+
+MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing");
+module_param_named(vramlimit, radeon_vram_limit, int, 0600);
+
+MODULE_PARM_DESC(agpmode, "AGP Mode (-1 == PCI)");
+module_param_named(agpmode, radeon_agpmode, int, 0444);
+
+MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32,64, etc)\n");
+module_param_named(gartsize, radeon_gart_size, int, 0600);
+
+MODULE_PARM_DESC(benchmark, "Run benchmark");
+module_param_named(benchmark, radeon_benchmarking, int, 0444);
+
+MODULE_PARM_DESC(connector_table, "Force connector table");
+module_param_named(connector_table, radeon_connector_table, int, 0444);
+#endif
+
 static int radeon_suspend(struct drm_device *dev, pm_message_t state)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -73,7 +153,11 @@ static struct pci_device_id pciidlist[] = {
 	radeon_PCI_IDS
 };
 
-static struct drm_driver driver = {
+#if defined(CONFIG_DRM_RADEON_KMS)
+MODULE_DEVICE_TABLE(pci, pciidlist);
+#endif
+
+static struct drm_driver driver_old = {
 	.driver_features =
 	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
 	    DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED,
@@ -127,15 +211,138 @@ static struct drm_driver driver = {
 	.patchlevel = DRIVER_PATCHLEVEL,
 };
 
+#if defined(CONFIG_DRM_RADEON_KMS)
+static struct drm_driver kms_driver;
+
+static int __devinit
+radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	return drm_get_dev(pdev, ent, &kms_driver);
+}
+
+static void
+radeon_pci_remove(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+
+	drm_put_dev(dev);
+}
+
+static int
+radeon_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	return radeon_suspend_kms(dev, state);
+}
+
+static int
+radeon_pci_resume(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	return radeon_resume_kms(dev);
+}
+
+static struct drm_driver kms_driver = {
+	.driver_features =
+	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
+	    DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | DRIVER_GEM,
+	.dev_priv_size = 0,
+	.load = radeon_driver_load_kms,
+	.firstopen = radeon_driver_firstopen_kms,
+	.open = radeon_driver_open_kms,
+	.preclose = radeon_driver_preclose_kms,
+	.postclose = radeon_driver_postclose_kms,
+	.lastclose = radeon_driver_lastclose_kms,
+	.unload = radeon_driver_unload_kms,
+	.suspend = radeon_suspend_kms,
+	.resume = radeon_resume_kms,
+	.get_vblank_counter = radeon_get_vblank_counter_kms,
+	.enable_vblank = radeon_enable_vblank_kms,
+	.disable_vblank = radeon_disable_vblank_kms,
+	.master_create = radeon_master_create_kms,
+	.master_destroy = radeon_master_destroy_kms,
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = radeon_debugfs_init,
+	.debugfs_cleanup = radeon_debugfs_cleanup,
+#endif
+	.irq_preinstall = radeon_driver_irq_preinstall_kms,
+	.irq_postinstall = radeon_driver_irq_postinstall_kms,
+	.irq_uninstall = radeon_driver_irq_uninstall_kms,
+	.irq_handler = radeon_driver_irq_handler_kms,
+	.reclaim_buffers = drm_core_reclaim_buffers,
+	.get_map_ofs = drm_core_get_map_ofs,
+	.get_reg_ofs = drm_core_get_reg_ofs,
+	.ioctls = radeon_ioctls_kms,
+	.gem_init_object = radeon_gem_object_init,
+	.gem_free_object = radeon_gem_object_free,
+	.dma_ioctl = radeon_dma_ioctl_kms,
+	.fops = {
+		 .owner = THIS_MODULE,
+		 .open = drm_open,
+		 .release = drm_release,
+		 .ioctl = drm_ioctl,
+		 .mmap = radeon_mmap,
+		 .poll = drm_poll,
+		 .fasync = drm_fasync,
+#ifdef CONFIG_COMPAT
+		 .compat_ioctl = NULL,
+#endif
+	},
+
+	.pci_driver = {
+		 .name = DRIVER_NAME,
+		 .id_table = pciidlist,
+		 .probe = radeon_pci_probe,
+		 .remove = radeon_pci_remove,
+		 .suspend = radeon_pci_suspend,
+		 .resume = radeon_pci_resume,
+	},
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = KMS_DRIVER_MAJOR,
+	.minor = KMS_DRIVER_MINOR,
+	.patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+#endif
+
+static struct drm_driver *driver;
+
 static int __init radeon_init(void)
 {
-	driver.num_ioctls = radeon_max_ioctl;
-	return drm_init(&driver);
+	driver = &driver_old;
+	driver->num_ioctls = radeon_max_ioctl;
+#if defined(CONFIG_DRM_RADEON_KMS) && defined(CONFIG_X86)
+	/* if enabled by default */
+	if (radeon_modeset == -1) {
+		DRM_INFO("radeon default to kernel modesetting.\n");
+		radeon_modeset = 1;
+	}
+	if (radeon_modeset == 1) {
+		DRM_INFO("radeon kernel modesetting enabled.\n");
+		driver = &kms_driver;
+		driver->driver_features |= DRIVER_MODESET;
+		driver->num_ioctls = radeon_max_kms_ioctl;
+	}
+
+	/* if the vga console setting is enabled still
+	 * let modprobe override it */
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force() && radeon_modeset == -1) {
+		DRM_INFO("VGACON disable radeon kernel modesetting.\n");
+		driver = &driver_old;
+		driver->driver_features &= ~DRIVER_MODESET;
+		radeon_modeset = 0;
+	}
+#endif
+#endif
+	return drm_init(driver);
 }
 
 static void __exit radeon_exit(void)
 {
-	drm_exit(&driver);
+	drm_exit(driver);
 }
 
 module_init(radeon_init);
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
new file mode 100644
index 00000000000..c8ef0d14ffa
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -0,0 +1,1708 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+#include "atom.h"
+
+extern int atom_debug;
+
+uint32_t
+radeon_get_encoder_id(struct drm_device *dev, uint32_t supported_device, uint8_t dac)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t ret = 0;
+
+	switch (supported_device) {
+	case ATOM_DEVICE_CRT1_SUPPORT:
+	case ATOM_DEVICE_TV1_SUPPORT:
+	case ATOM_DEVICE_TV2_SUPPORT:
+	case ATOM_DEVICE_CRT2_SUPPORT:
+	case ATOM_DEVICE_CV_SUPPORT:
+		switch (dac) {
+		case 1: /* dac a */
+			if ((rdev->family == CHIP_RS300) ||
+			    (rdev->family == CHIP_RS400) ||
+			    (rdev->family == CHIP_RS480))
+				ret = ENCODER_OBJECT_ID_INTERNAL_DAC2;
+			else if (ASIC_IS_AVIVO(rdev))
+				ret = ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1;
+			else
+				ret = ENCODER_OBJECT_ID_INTERNAL_DAC1;
+			break;
+		case 2: /* dac b */
+			if (ASIC_IS_AVIVO(rdev))
+				ret = ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2;
+			else {
+				/*if (rdev->family == CHIP_R200)
+				  ret = ENCODER_OBJECT_ID_INTERNAL_DVO1;
+				  else*/
+				ret = ENCODER_OBJECT_ID_INTERNAL_DAC2;
+			}
+			break;
+		case 3: /* external dac */
+			if (ASIC_IS_AVIVO(rdev))
+				ret = ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1;
+			else
+				ret = ENCODER_OBJECT_ID_INTERNAL_DVO1;
+			break;
+		}
+		break;
+	case ATOM_DEVICE_LCD1_SUPPORT:
+		if (ASIC_IS_AVIVO(rdev))
+			ret = ENCODER_OBJECT_ID_INTERNAL_LVTM1;
+		else
+			ret = ENCODER_OBJECT_ID_INTERNAL_LVDS;
+		break;
+	case ATOM_DEVICE_DFP1_SUPPORT:
+		if ((rdev->family == CHIP_RS300) ||
+		    (rdev->family == CHIP_RS400) ||
+		    (rdev->family == CHIP_RS480))
+			ret = ENCODER_OBJECT_ID_INTERNAL_DVO1;
+		else if (ASIC_IS_AVIVO(rdev))
+			ret = ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1;
+		else
+			ret = ENCODER_OBJECT_ID_INTERNAL_TMDS1;
+		break;
+	case ATOM_DEVICE_LCD2_SUPPORT:
+	case ATOM_DEVICE_DFP2_SUPPORT:
+		if ((rdev->family == CHIP_RS600) ||
+		    (rdev->family == CHIP_RS690) ||
+		    (rdev->family == CHIP_RS740))
+			ret = ENCODER_OBJECT_ID_INTERNAL_DDI;
+		else if (ASIC_IS_AVIVO(rdev))
+			ret = ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1;
+		else
+			ret = ENCODER_OBJECT_ID_INTERNAL_DVO1;
+		break;
+	case ATOM_DEVICE_DFP3_SUPPORT:
+		ret = ENCODER_OBJECT_ID_INTERNAL_LVTM1;
+		break;
+	}
+
+	return ret;
+}
+
+void
+radeon_link_encoder_connector(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+
+	/* walk the list and link encoders to connectors */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		radeon_connector = to_radeon_connector(connector);
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			radeon_encoder = to_radeon_encoder(encoder);
+			if (radeon_encoder->devices & radeon_connector->devices)
+				drm_mode_connector_attach_encoder(connector, encoder);
+		}
+	}
+}
+
+static struct drm_connector *
+radeon_get_connector_for_encoder(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		radeon_connector = to_radeon_connector(connector);
+		if (radeon_encoder->devices & radeon_connector->devices)
+			return connector;
+	}
+	return NULL;
+}
+
+/* used for both atom and legacy */
+void radeon_rmx_mode_fixup(struct drm_encoder *encoder,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode)
+{
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_native_mode *native_mode = &radeon_encoder->native_mode;
+
+	if (mode->hdisplay < native_mode->panel_xres ||
+	    mode->vdisplay < native_mode->panel_yres) {
+		radeon_encoder->flags |= RADEON_USE_RMX;
+		if (ASIC_IS_AVIVO(rdev)) {
+			adjusted_mode->hdisplay = native_mode->panel_xres;
+			adjusted_mode->vdisplay = native_mode->panel_yres;
+			adjusted_mode->htotal = native_mode->panel_xres + native_mode->hblank;
+			adjusted_mode->hsync_start = native_mode->panel_xres + native_mode->hoverplus;
+			adjusted_mode->hsync_end = adjusted_mode->hsync_start + native_mode->hsync_width;
+			adjusted_mode->vtotal = native_mode->panel_yres + native_mode->vblank;
+			adjusted_mode->vsync_start = native_mode->panel_yres + native_mode->voverplus;
+			adjusted_mode->vsync_end = adjusted_mode->vsync_start + native_mode->vsync_width;
+			/* update crtc values */
+			drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+			/* adjust crtc values */
+			adjusted_mode->crtc_hdisplay = native_mode->panel_xres;
+			adjusted_mode->crtc_vdisplay = native_mode->panel_yres;
+			adjusted_mode->crtc_htotal = adjusted_mode->crtc_hdisplay + native_mode->hblank;
+			adjusted_mode->crtc_hsync_start = adjusted_mode->crtc_hdisplay + native_mode->hoverplus;
+			adjusted_mode->crtc_hsync_end = adjusted_mode->crtc_hsync_start + native_mode->hsync_width;
+			adjusted_mode->crtc_vtotal = adjusted_mode->crtc_vdisplay + native_mode->vblank;
+			adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + native_mode->voverplus;
+			adjusted_mode->crtc_vsync_end = adjusted_mode->crtc_vsync_start + native_mode->vsync_width;
+		} else {
+			adjusted_mode->htotal = native_mode->panel_xres + native_mode->hblank;
+			adjusted_mode->hsync_start = native_mode->panel_xres + native_mode->hoverplus;
+			adjusted_mode->hsync_end = adjusted_mode->hsync_start + native_mode->hsync_width;
+			adjusted_mode->vtotal = native_mode->panel_yres + native_mode->vblank;
+			adjusted_mode->vsync_start = native_mode->panel_yres + native_mode->voverplus;
+			adjusted_mode->vsync_end = adjusted_mode->vsync_start + native_mode->vsync_width;
+			/* update crtc values */
+			drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+			/* adjust crtc values */
+			adjusted_mode->crtc_htotal = adjusted_mode->crtc_hdisplay + native_mode->hblank;
+			adjusted_mode->crtc_hsync_start = adjusted_mode->crtc_hdisplay + native_mode->hoverplus;
+			adjusted_mode->crtc_hsync_end = adjusted_mode->crtc_hsync_start + native_mode->hsync_width;
+			adjusted_mode->crtc_vtotal = adjusted_mode->crtc_vdisplay + native_mode->vblank;
+			adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + native_mode->voverplus;
+			adjusted_mode->crtc_vsync_end = adjusted_mode->crtc_vsync_start + native_mode->vsync_width;
+		}
+		adjusted_mode->flags = native_mode->flags;
+		adjusted_mode->clock = native_mode->dotclock;
+	}
+}
+
+static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *adjusted_mode)
+{
+
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+
+	radeon_encoder->flags &= ~RADEON_USE_RMX;
+
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	if (radeon_encoder->rmx_type != RMX_OFF)
+		radeon_rmx_mode_fixup(encoder, mode, adjusted_mode);
+
+	/* hw bug */
+	if ((mode->flags & DRM_MODE_FLAG_INTERLACE)
+	    && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
+		adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
+
+	return true;
+}
+
+static void
+atombios_dac_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	DAC_ENCODER_CONTROL_PS_ALLOCATION args;
+	int index = 0, num = 0;
+	/* fixme - fill in enc_priv for atom dac */
+	enum radeon_tv_std tv_std = TV_STD_NTSC;
+
+	memset(&args, 0, sizeof(args));
+
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+		index = GetIndexIntoMasterTable(COMMAND, DAC1EncoderControl);
+		num = 1;
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		index = GetIndexIntoMasterTable(COMMAND, DAC2EncoderControl);
+		num = 2;
+		break;
+	}
+
+	args.ucAction = action;
+
+	if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+		args.ucDacStandard = ATOM_DAC1_PS2;
+	else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+		args.ucDacStandard = ATOM_DAC1_CV;
+	else {
+		switch (tv_std) {
+		case TV_STD_PAL:
+		case TV_STD_PAL_M:
+		case TV_STD_SCART_PAL:
+		case TV_STD_SECAM:
+		case TV_STD_PAL_CN:
+			args.ucDacStandard = ATOM_DAC1_PAL;
+			break;
+		case TV_STD_NTSC:
+		case TV_STD_NTSC_J:
+		case TV_STD_PAL_60:
+		default:
+			args.ucDacStandard = ATOM_DAC1_NTSC;
+			break;
+		}
+	}
+	args.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+static void
+atombios_tv_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	TV_ENCODER_CONTROL_PS_ALLOCATION args;
+	int index = 0;
+	/* fixme - fill in enc_priv for atom dac */
+	enum radeon_tv_std tv_std = TV_STD_NTSC;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, TVEncoderControl);
+
+	args.sTVEncoder.ucAction = action;
+
+	if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+		args.sTVEncoder.ucTvStandard = ATOM_TV_CV;
+	else {
+		switch (tv_std) {
+		case TV_STD_NTSC:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_NTSC;
+			break;
+		case TV_STD_PAL:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_PAL;
+			break;
+		case TV_STD_PAL_M:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_PALM;
+			break;
+		case TV_STD_PAL_60:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_PAL60;
+			break;
+		case TV_STD_NTSC_J:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_NTSCJ;
+			break;
+		case TV_STD_SCART_PAL:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_PAL; /* ??? */
+			break;
+		case TV_STD_SECAM:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_SECAM;
+			break;
+		case TV_STD_PAL_CN:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_PALCN;
+			break;
+		default:
+			args.sTVEncoder.ucTvStandard = ATOM_TV_NTSC;
+			break;
+		}
+	}
+
+	args.sTVEncoder.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+void
+atombios_external_tmds_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION args;
+	int index = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
+
+	args.sXTmdsEncoder.ucEnable = action;
+
+	if (radeon_encoder->pixel_clock > 165000)
+		args.sXTmdsEncoder.ucMisc = PANEL_ENCODER_MISC_DUAL;
+
+	/*if (pScrn->rgbBits == 8)*/
+	args.sXTmdsEncoder.ucMisc |= (1 << 1);
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+static void
+atombios_ddia_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	DVO_ENCODER_CONTROL_PS_ALLOCATION args;
+	int index = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
+
+	args.sDVOEncoder.ucAction = action;
+	args.sDVOEncoder.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+
+	if (radeon_encoder->pixel_clock > 165000)
+		args.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute = PANEL_ENCODER_MISC_DUAL;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+union lvds_encoder_control {
+	LVDS_ENCODER_CONTROL_PS_ALLOCATION    v1;
+	LVDS_ENCODER_CONTROL_PS_ALLOCATION_V2 v2;
+};
+
+static void
+atombios_digital_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	union lvds_encoder_control args;
+	int index = 0;
+	uint8_t frev, crev;
+	struct radeon_encoder_atom_dig *dig;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct radeon_connector_atom_dig *dig_connector;
+
+	connector = radeon_get_connector_for_encoder(encoder);
+	if (!connector)
+		return;
+
+	radeon_connector = to_radeon_connector(connector);
+
+	if (!radeon_encoder->enc_priv)
+		return;
+
+	dig = radeon_encoder->enc_priv;
+
+	if (!radeon_connector->con_priv)
+		return;
+
+	dig_connector = radeon_connector->con_priv;
+
+	memset(&args, 0, sizeof(args));
+
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+		index = GetIndexIntoMasterTable(COMMAND, LVDSEncoderControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+		index = GetIndexIntoMasterTable(COMMAND, TMDS1EncoderControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			index = GetIndexIntoMasterTable(COMMAND, LVDSEncoderControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, TMDS2EncoderControl);
+		break;
+	}
+
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
+
+	switch (frev) {
+	case 1:
+	case 2:
+		switch (crev) {
+		case 1:
+			args.v1.ucMisc = 0;
+			args.v1.ucAction = action;
+			if (drm_detect_hdmi_monitor((struct edid *)connector->edid_blob_ptr))
+				args.v1.ucMisc |= PANEL_ENCODER_MISC_HDMI_TYPE;
+			args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				if (dig->lvds_misc & (1 << 0))
+					args.v1.ucMisc |= PANEL_ENCODER_MISC_DUAL;
+				if (dig->lvds_misc & (1 << 1))
+					args.v1.ucMisc |= (1 << 1);
+			} else {
+				if (dig_connector->linkb)
+					args.v1.ucMisc |= PANEL_ENCODER_MISC_TMDS_LINKB;
+				if (radeon_encoder->pixel_clock > 165000)
+					args.v1.ucMisc |= PANEL_ENCODER_MISC_DUAL;
+				/*if (pScrn->rgbBits == 8) */
+				args.v1.ucMisc |= (1 << 1);
+			}
+			break;
+		case 2:
+		case 3:
+			args.v2.ucMisc = 0;
+			args.v2.ucAction = action;
+			if (crev == 3) {
+				if (dig->coherent_mode)
+					args.v2.ucMisc |= PANEL_ENCODER_MISC_COHERENT;
+			}
+			if (drm_detect_hdmi_monitor((struct edid *)connector->edid_blob_ptr))
+				args.v2.ucMisc |= PANEL_ENCODER_MISC_HDMI_TYPE;
+			args.v2.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+			args.v2.ucTruncate = 0;
+			args.v2.ucSpatial = 0;
+			args.v2.ucTemporal = 0;
+			args.v2.ucFRC = 0;
+			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				if (dig->lvds_misc & (1 << 0))
+					args.v2.ucMisc |= PANEL_ENCODER_MISC_DUAL;
+				if (dig->lvds_misc & (1 << 5)) {
+					args.v2.ucSpatial = PANEL_ENCODER_SPATIAL_DITHER_EN;
+					if (dig->lvds_misc & (1 << 1))
+						args.v2.ucSpatial |= PANEL_ENCODER_SPATIAL_DITHER_DEPTH;
+				}
+				if (dig->lvds_misc & (1 << 6)) {
+					args.v2.ucTemporal = PANEL_ENCODER_TEMPORAL_DITHER_EN;
+					if (dig->lvds_misc & (1 << 1))
+						args.v2.ucTemporal |= PANEL_ENCODER_TEMPORAL_DITHER_DEPTH;
+					if (((dig->lvds_misc >> 2) & 0x3) == 2)
+						args.v2.ucTemporal |= PANEL_ENCODER_TEMPORAL_LEVEL_4;
+				}
+			} else {
+				if (dig_connector->linkb)
+					args.v2.ucMisc |= PANEL_ENCODER_MISC_TMDS_LINKB;
+				if (radeon_encoder->pixel_clock > 165000)
+					args.v2.ucMisc |= PANEL_ENCODER_MISC_DUAL;
+			}
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+			break;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		break;
+	}
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+int
+atombios_get_encoder_mode(struct drm_encoder *encoder)
+{
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+
+	connector = radeon_get_connector_for_encoder(encoder);
+	if (!connector)
+		return 0;
+
+	radeon_connector = to_radeon_connector(connector);
+
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_DVII:
+		if (drm_detect_hdmi_monitor((struct edid *)connector->edid_blob_ptr))
+			return ATOM_ENCODER_MODE_HDMI;
+		else if (radeon_connector->use_digital)
+			return ATOM_ENCODER_MODE_DVI;
+		else
+			return ATOM_ENCODER_MODE_CRT;
+		break;
+	case DRM_MODE_CONNECTOR_DVID:
+	case DRM_MODE_CONNECTOR_HDMIA:
+	case DRM_MODE_CONNECTOR_HDMIB:
+	default:
+		if (drm_detect_hdmi_monitor((struct edid *)connector->edid_blob_ptr))
+			return ATOM_ENCODER_MODE_HDMI;
+		else
+			return ATOM_ENCODER_MODE_DVI;
+		break;
+	case DRM_MODE_CONNECTOR_LVDS:
+		return ATOM_ENCODER_MODE_LVDS;
+		break;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+		/*if (radeon_output->MonType == MT_DP)
+		  return ATOM_ENCODER_MODE_DP;
+		  else*/
+		if (drm_detect_hdmi_monitor((struct edid *)connector->edid_blob_ptr))
+			return ATOM_ENCODER_MODE_HDMI;
+		else
+			return ATOM_ENCODER_MODE_DVI;
+		break;
+	case CONNECTOR_DVI_A:
+	case CONNECTOR_VGA:
+		return ATOM_ENCODER_MODE_CRT;
+		break;
+	case CONNECTOR_STV:
+	case CONNECTOR_CTV:
+	case CONNECTOR_DIN:
+		/* fix me */
+		return ATOM_ENCODER_MODE_TV;
+		/*return ATOM_ENCODER_MODE_CV;*/
+		break;
+	}
+}
+
+static void
+atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	DIG_ENCODER_CONTROL_PS_ALLOCATION args;
+	int index = 0, num = 0;
+	uint8_t frev, crev;
+	struct radeon_encoder_atom_dig *dig;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct radeon_connector_atom_dig *dig_connector;
+
+	connector = radeon_get_connector_for_encoder(encoder);
+	if (!connector)
+		return;
+
+	radeon_connector = to_radeon_connector(connector);
+
+	if (!radeon_connector->con_priv)
+		return;
+
+	dig_connector = radeon_connector->con_priv;
+
+	if (!radeon_encoder->enc_priv)
+		return;
+
+	dig = radeon_encoder->enc_priv;
+
+	memset(&args, 0, sizeof(args));
+
+	if (ASIC_IS_DCE32(rdev)) {
+		if (dig->dig_block)
+			index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
+		num = dig->dig_block + 1;
+	} else {
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
+			num = 1;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+			index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
+			num = 2;
+			break;
+		}
+	}
+
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
+
+	args.ucAction = action;
+	args.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+
+	if (ASIC_IS_DCE32(rdev)) {
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
+			break;
+		}
+	} else {
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			args.ucConfig = ATOM_ENCODER_CONFIG_TRANSMITTER1;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+			args.ucConfig = ATOM_ENCODER_CONFIG_TRANSMITTER2;
+			break;
+		}
+	}
+
+	if (radeon_encoder->pixel_clock > 165000) {
+		args.ucConfig |= ATOM_ENCODER_CONFIG_LINKA_B;
+		args.ucLaneNum = 8;
+	} else {
+		if (dig_connector->linkb)
+			args.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
+		else
+			args.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
+		args.ucLaneNum = 4;
+	}
+
+	args.ucEncoderMode = atombios_get_encoder_mode(encoder);
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+union dig_transmitter_control {
+	DIG_TRANSMITTER_CONTROL_PS_ALLOCATION v1;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2;
+};
+
+static void
+atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	union dig_transmitter_control args;
+	int index = 0, num = 0;
+	uint8_t frev, crev;
+	struct radeon_encoder_atom_dig *dig;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct radeon_connector_atom_dig *dig_connector;
+
+	connector = radeon_get_connector_for_encoder(encoder);
+	if (!connector)
+		return;
+
+	radeon_connector = to_radeon_connector(connector);
+
+	if (!radeon_encoder->enc_priv)
+		return;
+
+	dig = radeon_encoder->enc_priv;
+
+	if (!radeon_connector->con_priv)
+		return;
+
+	dig_connector = radeon_connector->con_priv;
+
+	memset(&args, 0, sizeof(args));
+
+	if (ASIC_IS_DCE32(rdev))
+		index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
+	else {
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			index = GetIndexIntoMasterTable(COMMAND, DIG1TransmitterControl);
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+			index = GetIndexIntoMasterTable(COMMAND, DIG2TransmitterControl);
+			break;
+		}
+	}
+
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
+
+	args.v1.ucAction = action;
+
+	if (ASIC_IS_DCE32(rdev)) {
+		if (radeon_encoder->pixel_clock > 165000) {
+			args.v2.usPixelClock = cpu_to_le16((radeon_encoder->pixel_clock * 10 * 2) / 100);
+			args.v2.acConfig.fDualLinkConnector = 1;
+		} else {
+			args.v2.usPixelClock = cpu_to_le16((radeon_encoder->pixel_clock * 10 * 4) / 100);
+		}
+		if (dig->dig_block)
+			args.v2.acConfig.ucEncoderSel = 1;
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			args.v2.acConfig.ucTransmitterSel = 0;
+			num = 0;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			args.v2.acConfig.ucTransmitterSel = 1;
+			num = 1;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			args.v2.acConfig.ucTransmitterSel = 2;
+			num = 2;
+			break;
+		}
+
+		if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+			if (dig->coherent_mode)
+				args.v2.acConfig.fCoherentMode = 1;
+		}
+	} else {
+		args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
+		args.v1.usPixelClock = cpu_to_le16((radeon_encoder->pixel_clock) / 10);
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
+			if (rdev->flags & RADEON_IS_IGP) {
+				if (radeon_encoder->pixel_clock > 165000) {
+					args.v1.ucConfig |= (ATOM_TRANSMITTER_CONFIG_8LANE_LINK |
+							     ATOM_TRANSMITTER_CONFIG_LINKA_B);
+					if (dig_connector->igp_lane_info & 0x3)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7;
+					else if (dig_connector->igp_lane_info & 0xc)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15;
+				} else {
+					args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKA;
+					if (dig_connector->igp_lane_info & 0x1)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3;
+					else if (dig_connector->igp_lane_info & 0x2)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7;
+					else if (dig_connector->igp_lane_info & 0x4)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11;
+					else if (dig_connector->igp_lane_info & 0x8)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15;
+				}
+			} else {
+				if (radeon_encoder->pixel_clock > 165000)
+					args.v1.ucConfig |= (ATOM_TRANSMITTER_CONFIG_8LANE_LINK |
+							     ATOM_TRANSMITTER_CONFIG_LINKA_B |
+							     ATOM_TRANSMITTER_CONFIG_LANE_0_7);
+				else {
+					if (dig_connector->linkb)
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB | ATOM_TRANSMITTER_CONFIG_LANE_0_3;
+					else
+						args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKA | ATOM_TRANSMITTER_CONFIG_LANE_0_3;
+				}
+			}
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+			args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER;
+			if (radeon_encoder->pixel_clock > 165000)
+				args.v1.ucConfig |= (ATOM_TRANSMITTER_CONFIG_8LANE_LINK |
+						     ATOM_TRANSMITTER_CONFIG_LINKA_B |
+						     ATOM_TRANSMITTER_CONFIG_LANE_0_7);
+			else {
+				if (dig_connector->linkb)
+					args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB | ATOM_TRANSMITTER_CONFIG_LANE_0_3;
+				else
+					args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKA | ATOM_TRANSMITTER_CONFIG_LANE_0_3;
+			}
+			break;
+		}
+
+		if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+			if (dig->coherent_mode)
+				args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_COHERENT;
+		}
+	}
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+static void atom_rv515_force_tv_scaler(struct radeon_device *rdev)
+{
+
+	WREG32(0x659C, 0x0);
+	WREG32(0x6594, 0x705);
+	WREG32(0x65A4, 0x10001);
+	WREG32(0x65D8, 0x0);
+	WREG32(0x65B0, 0x0);
+	WREG32(0x65C0, 0x0);
+	WREG32(0x65D4, 0x0);
+	WREG32(0x6578, 0x0);
+	WREG32(0x657C, 0x841880A8);
+	WREG32(0x6578, 0x1);
+	WREG32(0x657C, 0x84208680);
+	WREG32(0x6578, 0x2);
+	WREG32(0x657C, 0xBFF880B0);
+	WREG32(0x6578, 0x100);
+	WREG32(0x657C, 0x83D88088);
+	WREG32(0x6578, 0x101);
+	WREG32(0x657C, 0x84608680);
+	WREG32(0x6578, 0x102);
+	WREG32(0x657C, 0xBFF080D0);
+	WREG32(0x6578, 0x200);
+	WREG32(0x657C, 0x83988068);
+	WREG32(0x6578, 0x201);
+	WREG32(0x657C, 0x84A08680);
+	WREG32(0x6578, 0x202);
+	WREG32(0x657C, 0xBFF080F8);
+	WREG32(0x6578, 0x300);
+	WREG32(0x657C, 0x83588058);
+	WREG32(0x6578, 0x301);
+	WREG32(0x657C, 0x84E08660);
+	WREG32(0x6578, 0x302);
+	WREG32(0x657C, 0xBFF88120);
+	WREG32(0x6578, 0x400);
+	WREG32(0x657C, 0x83188040);
+	WREG32(0x6578, 0x401);
+	WREG32(0x657C, 0x85008660);
+	WREG32(0x6578, 0x402);
+	WREG32(0x657C, 0xBFF88150);
+	WREG32(0x6578, 0x500);
+	WREG32(0x657C, 0x82D88030);
+	WREG32(0x6578, 0x501);
+	WREG32(0x657C, 0x85408640);
+	WREG32(0x6578, 0x502);
+	WREG32(0x657C, 0xBFF88180);
+	WREG32(0x6578, 0x600);
+	WREG32(0x657C, 0x82A08018);
+	WREG32(0x6578, 0x601);
+	WREG32(0x657C, 0x85808620);
+	WREG32(0x6578, 0x602);
+	WREG32(0x657C, 0xBFF081B8);
+	WREG32(0x6578, 0x700);
+	WREG32(0x657C, 0x82608010);
+	WREG32(0x6578, 0x701);
+	WREG32(0x657C, 0x85A08600);
+	WREG32(0x6578, 0x702);
+	WREG32(0x657C, 0x800081F0);
+	WREG32(0x6578, 0x800);
+	WREG32(0x657C, 0x8228BFF8);
+	WREG32(0x6578, 0x801);
+	WREG32(0x657C, 0x85E085E0);
+	WREG32(0x6578, 0x802);
+	WREG32(0x657C, 0xBFF88228);
+	WREG32(0x6578, 0x10000);
+	WREG32(0x657C, 0x82A8BF00);
+	WREG32(0x6578, 0x10001);
+	WREG32(0x657C, 0x82A08CC0);
+	WREG32(0x6578, 0x10002);
+	WREG32(0x657C, 0x8008BEF8);
+	WREG32(0x6578, 0x10100);
+	WREG32(0x657C, 0x81F0BF28);
+	WREG32(0x6578, 0x10101);
+	WREG32(0x657C, 0x83608CA0);
+	WREG32(0x6578, 0x10102);
+	WREG32(0x657C, 0x8018BED0);
+	WREG32(0x6578, 0x10200);
+	WREG32(0x657C, 0x8148BF38);
+	WREG32(0x6578, 0x10201);
+	WREG32(0x657C, 0x84408C80);
+	WREG32(0x6578, 0x10202);
+	WREG32(0x657C, 0x8008BEB8);
+	WREG32(0x6578, 0x10300);
+	WREG32(0x657C, 0x80B0BF78);
+	WREG32(0x6578, 0x10301);
+	WREG32(0x657C, 0x85008C20);
+	WREG32(0x6578, 0x10302);
+	WREG32(0x657C, 0x8020BEA0);
+	WREG32(0x6578, 0x10400);
+	WREG32(0x657C, 0x8028BF90);
+	WREG32(0x6578, 0x10401);
+	WREG32(0x657C, 0x85E08BC0);
+	WREG32(0x6578, 0x10402);
+	WREG32(0x657C, 0x8018BE90);
+	WREG32(0x6578, 0x10500);
+	WREG32(0x657C, 0xBFB8BFB0);
+	WREG32(0x6578, 0x10501);
+	WREG32(0x657C, 0x86C08B40);
+	WREG32(0x6578, 0x10502);
+	WREG32(0x657C, 0x8010BE90);
+	WREG32(0x6578, 0x10600);
+	WREG32(0x657C, 0xBF58BFC8);
+	WREG32(0x6578, 0x10601);
+	WREG32(0x657C, 0x87A08AA0);
+	WREG32(0x6578, 0x10602);
+	WREG32(0x657C, 0x8010BE98);
+	WREG32(0x6578, 0x10700);
+	WREG32(0x657C, 0xBF10BFF0);
+	WREG32(0x6578, 0x10701);
+	WREG32(0x657C, 0x886089E0);
+	WREG32(0x6578, 0x10702);
+	WREG32(0x657C, 0x8018BEB0);
+	WREG32(0x6578, 0x10800);
+	WREG32(0x657C, 0xBED8BFE8);
+	WREG32(0x6578, 0x10801);
+	WREG32(0x657C, 0x89408940);
+	WREG32(0x6578, 0x10802);
+	WREG32(0x657C, 0xBFE8BED8);
+	WREG32(0x6578, 0x20000);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20001);
+	WREG32(0x657C, 0x90008000);
+	WREG32(0x6578, 0x20002);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20003);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20100);
+	WREG32(0x657C, 0x80108000);
+	WREG32(0x6578, 0x20101);
+	WREG32(0x657C, 0x8FE0BF70);
+	WREG32(0x6578, 0x20102);
+	WREG32(0x657C, 0xBFE880C0);
+	WREG32(0x6578, 0x20103);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20200);
+	WREG32(0x657C, 0x8018BFF8);
+	WREG32(0x6578, 0x20201);
+	WREG32(0x657C, 0x8F80BF08);
+	WREG32(0x6578, 0x20202);
+	WREG32(0x657C, 0xBFD081A0);
+	WREG32(0x6578, 0x20203);
+	WREG32(0x657C, 0xBFF88000);
+	WREG32(0x6578, 0x20300);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20301);
+	WREG32(0x657C, 0x8EE0BEC0);
+	WREG32(0x6578, 0x20302);
+	WREG32(0x657C, 0xBFB082A0);
+	WREG32(0x6578, 0x20303);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20400);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20401);
+	WREG32(0x657C, 0x8E00BEA0);
+	WREG32(0x6578, 0x20402);
+	WREG32(0x657C, 0xBF8883C0);
+	WREG32(0x6578, 0x20403);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x20500);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20501);
+	WREG32(0x657C, 0x8D00BE90);
+	WREG32(0x6578, 0x20502);
+	WREG32(0x657C, 0xBF588500);
+	WREG32(0x6578, 0x20503);
+	WREG32(0x657C, 0x80008008);
+	WREG32(0x6578, 0x20600);
+	WREG32(0x657C, 0x80188000);
+	WREG32(0x6578, 0x20601);
+	WREG32(0x657C, 0x8BC0BE98);
+	WREG32(0x6578, 0x20602);
+	WREG32(0x657C, 0xBF308660);
+	WREG32(0x6578, 0x20603);
+	WREG32(0x657C, 0x80008008);
+	WREG32(0x6578, 0x20700);
+	WREG32(0x657C, 0x80108000);
+	WREG32(0x6578, 0x20701);
+	WREG32(0x657C, 0x8A80BEB0);
+	WREG32(0x6578, 0x20702);
+	WREG32(0x657C, 0xBF0087C0);
+	WREG32(0x6578, 0x20703);
+	WREG32(0x657C, 0x80008008);
+	WREG32(0x6578, 0x20800);
+	WREG32(0x657C, 0x80108000);
+	WREG32(0x6578, 0x20801);
+	WREG32(0x657C, 0x8920BED0);
+	WREG32(0x6578, 0x20802);
+	WREG32(0x657C, 0xBED08920);
+	WREG32(0x6578, 0x20803);
+	WREG32(0x657C, 0x80008010);
+	WREG32(0x6578, 0x30000);
+	WREG32(0x657C, 0x90008000);
+	WREG32(0x6578, 0x30001);
+	WREG32(0x657C, 0x80008000);
+	WREG32(0x6578, 0x30100);
+	WREG32(0x657C, 0x8FE0BF90);
+	WREG32(0x6578, 0x30101);
+	WREG32(0x657C, 0xBFF880A0);
+	WREG32(0x6578, 0x30200);
+	WREG32(0x657C, 0x8F60BF40);
+	WREG32(0x6578, 0x30201);
+	WREG32(0x657C, 0xBFE88180);
+	WREG32(0x6578, 0x30300);
+	WREG32(0x657C, 0x8EC0BF00);
+	WREG32(0x6578, 0x30301);
+	WREG32(0x657C, 0xBFC88280);
+	WREG32(0x6578, 0x30400);
+	WREG32(0x657C, 0x8DE0BEE0);
+	WREG32(0x6578, 0x30401);
+	WREG32(0x657C, 0xBFA083A0);
+	WREG32(0x6578, 0x30500);
+	WREG32(0x657C, 0x8CE0BED0);
+	WREG32(0x6578, 0x30501);
+	WREG32(0x657C, 0xBF7884E0);
+	WREG32(0x6578, 0x30600);
+	WREG32(0x657C, 0x8BA0BED8);
+	WREG32(0x6578, 0x30601);
+	WREG32(0x657C, 0xBF508640);
+	WREG32(0x6578, 0x30700);
+	WREG32(0x657C, 0x8A60BEE8);
+	WREG32(0x6578, 0x30701);
+	WREG32(0x657C, 0xBF2087A0);
+	WREG32(0x6578, 0x30800);
+	WREG32(0x657C, 0x8900BF00);
+	WREG32(0x6578, 0x30801);
+	WREG32(0x657C, 0xBF008900);
+}
+
+static void
+atombios_yuv_setup(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	ENABLE_YUV_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableYUV);
+	uint32_t temp, reg;
+
+	memset(&args, 0, sizeof(args));
+
+	if (rdev->family >= CHIP_R600)
+		reg = R600_BIOS_3_SCRATCH;
+	else
+		reg = RADEON_BIOS_3_SCRATCH;
+
+	/* XXX: fix up scratch reg handling */
+	temp = RREG32(reg);
+	if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+		WREG32(reg, (ATOM_S3_TV1_ACTIVE |
+			     (radeon_crtc->crtc_id << 18)));
+	else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+		WREG32(reg, (ATOM_S3_CV_ACTIVE | (radeon_crtc->crtc_id << 24)));
+	else
+		WREG32(reg, 0);
+
+	if (enable)
+		args.ucEnable = ATOM_ENABLE;
+	args.ucCRTC = radeon_crtc->crtc_id;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+	WREG32(reg, temp);
+}
+
+static void
+atombios_overscan_setup(struct drm_encoder *encoder,
+			struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	SET_CRTC_OVERSCAN_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
+
+	memset(&args, 0, sizeof(args));
+
+	args.usOverscanRight = 0;
+	args.usOverscanLeft = 0;
+	args.usOverscanBottom = 0;
+	args.usOverscanTop = 0;
+	args.ucCRTC = radeon_crtc->crtc_id;
+
+	if (radeon_encoder->flags & RADEON_USE_RMX) {
+		if (radeon_encoder->rmx_type == RMX_FULL) {
+			args.usOverscanRight = 0;
+			args.usOverscanLeft = 0;
+			args.usOverscanBottom = 0;
+			args.usOverscanTop = 0;
+		} else if (radeon_encoder->rmx_type == RMX_CENTER) {
+			args.usOverscanTop = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
+			args.usOverscanBottom = (adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2;
+			args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
+			args.usOverscanRight = (adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2;
+		} else if (radeon_encoder->rmx_type == RMX_ASPECT) {
+			int a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
+			int a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
+
+			if (a1 > a2) {
+				args.usOverscanLeft = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
+				args.usOverscanRight = (adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2;
+			} else if (a2 > a1) {
+				args.usOverscanLeft = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
+				args.usOverscanRight = (adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2;
+			}
+		}
+	}
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+static void
+atombios_scaler_setup(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	ENABLE_SCALER_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
+	/* fixme - fill in enc_priv for atom dac */
+	enum radeon_tv_std tv_std = TV_STD_NTSC;
+
+	if (!ASIC_IS_AVIVO(rdev) && radeon_crtc->crtc_id)
+		return;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucScaler = radeon_crtc->crtc_id;
+
+	if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
+		switch (tv_std) {
+		case TV_STD_NTSC:
+		default:
+			args.ucTVStandard = ATOM_TV_NTSC;
+			break;
+		case TV_STD_PAL:
+			args.ucTVStandard = ATOM_TV_PAL;
+			break;
+		case TV_STD_PAL_M:
+			args.ucTVStandard = ATOM_TV_PALM;
+			break;
+		case TV_STD_PAL_60:
+			args.ucTVStandard = ATOM_TV_PAL60;
+			break;
+		case TV_STD_NTSC_J:
+			args.ucTVStandard = ATOM_TV_NTSCJ;
+			break;
+		case TV_STD_SCART_PAL:
+			args.ucTVStandard = ATOM_TV_PAL; /* ??? */
+			break;
+		case TV_STD_SECAM:
+			args.ucTVStandard = ATOM_TV_SECAM;
+			break;
+		case TV_STD_PAL_CN:
+			args.ucTVStandard = ATOM_TV_PALCN;
+			break;
+		}
+		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
+	} else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT)) {
+		args.ucTVStandard = ATOM_TV_CV;
+		args.ucEnable = SCALER_ENABLE_MULTITAP_MODE;
+	} else if (radeon_encoder->flags & RADEON_USE_RMX) {
+		if (radeon_encoder->rmx_type == RMX_FULL)
+			args.ucEnable = ATOM_SCALER_EXPANSION;
+		else if (radeon_encoder->rmx_type == RMX_CENTER)
+			args.ucEnable = ATOM_SCALER_CENTER;
+		else if (radeon_encoder->rmx_type == RMX_ASPECT)
+			args.ucEnable = ATOM_SCALER_EXPANSION;
+	} else {
+		if (ASIC_IS_AVIVO(rdev))
+			args.ucEnable = ATOM_SCALER_DISABLE;
+		else
+			args.ucEnable = ATOM_SCALER_CENTER;
+	}
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+	if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT)
+	    && rdev->family >= CHIP_RV515 && rdev->family <= CHIP_RV570) {
+		atom_rv515_force_tv_scaler(rdev);
+	}
+
+}
+
+static void
+radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION args;
+	int index = 0;
+	bool is_dig = false;
+
+	memset(&args, 0, sizeof(args));
+
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+		index = GetIndexIntoMasterTable(COMMAND, TMDSAOutputControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+		is_dig = true;
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_DDI:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+		index = GetIndexIntoMasterTable(COMMAND, DVOOutputControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+		index = GetIndexIntoMasterTable(COMMAND, LCD1OutputControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			index = GetIndexIntoMasterTable(COMMAND, LCD1OutputControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, LVTMAOutputControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+		if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+			index = GetIndexIntoMasterTable(COMMAND, TV1OutputControl);
+		else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+			index = GetIndexIntoMasterTable(COMMAND, CV1OutputControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, DAC1OutputControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+			index = GetIndexIntoMasterTable(COMMAND, TV1OutputControl);
+		else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+			index = GetIndexIntoMasterTable(COMMAND, CV1OutputControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, DAC2OutputControl);
+		break;
+	}
+
+	if (is_dig) {
+		switch (mode) {
+		case DRM_MODE_DPMS_ON:
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE);
+			break;
+		case DRM_MODE_DPMS_STANDBY:
+		case DRM_MODE_DPMS_SUSPEND:
+		case DRM_MODE_DPMS_OFF:
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE);
+			break;
+		}
+	} else {
+		switch (mode) {
+		case DRM_MODE_DPMS_ON:
+			args.ucAction = ATOM_ENABLE;
+			break;
+		case DRM_MODE_DPMS_STANDBY:
+		case DRM_MODE_DPMS_SUSPEND:
+		case DRM_MODE_DPMS_OFF:
+			args.ucAction = ATOM_DISABLE;
+			break;
+		}
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+	}
+	radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+}
+
+union crtc_sourc_param {
+	SELECT_CRTC_SOURCE_PS_ALLOCATION v1;
+	SELECT_CRTC_SOURCE_PARAMETERS_V2 v2;
+};
+
+static void
+atombios_set_encoder_crtc_source(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	union crtc_sourc_param args;
+	int index = GetIndexIntoMasterTable(COMMAND, SelectCRTC_Source);
+	uint8_t frev, crev;
+
+	memset(&args, 0, sizeof(args));
+
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+		default:
+			if (ASIC_IS_AVIVO(rdev))
+				args.v1.ucCRTC = radeon_crtc->crtc_id;
+			else {
+				if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DAC1) {
+					args.v1.ucCRTC = radeon_crtc->crtc_id;
+				} else {
+					args.v1.ucCRTC = radeon_crtc->crtc_id << 2;
+				}
+			}
+			switch (radeon_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+				args.v1.ucDevice = ATOM_DEVICE_DFP1_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+			case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+				if (radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT)
+					args.v1.ucDevice = ATOM_DEVICE_LCD1_INDEX;
+				else
+					args.v1.ucDevice = ATOM_DEVICE_DFP3_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+			case ENCODER_OBJECT_ID_INTERNAL_DDI:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+				args.v1.ucDevice = ATOM_DEVICE_DFP2_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+				if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_TV1_INDEX;
+				else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_CV_INDEX;
+				else
+					args.v1.ucDevice = ATOM_DEVICE_CRT1_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+				if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_TV1_INDEX;
+				else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_CV_INDEX;
+				else
+					args.v1.ucDevice = ATOM_DEVICE_CRT2_INDEX;
+				break;
+			}
+			break;
+		case 2:
+			args.v2.ucCRTC = radeon_crtc->crtc_id;
+			args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder);
+			switch (radeon_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				if (ASIC_IS_DCE32(rdev)) {
+					if (radeon_crtc->crtc_id)
+						args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+					else
+						args.v2.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+				} else
+					args.v2.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+				args.v2.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+				args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+				if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else
+					args.v2.ucEncoderID = ASIC_INT_DAC1_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+				if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else
+					args.v2.ucEncoderID = ASIC_INT_DAC2_ENCODER_ID;
+				break;
+			}
+			break;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+		break;
+	}
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+}
+
+static void
+atombios_apply_encoder_quirks(struct drm_encoder *encoder,
+			      struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+
+	/* Funky macbooks */
+	if ((dev->pdev->device == 0x71C5) &&
+	    (dev->pdev->subsystem_vendor == 0x106b) &&
+	    (dev->pdev->subsystem_device == 0x0080)) {
+		if (radeon_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) {
+			uint32_t lvtma_bit_depth_control = RREG32(AVIVO_LVTMA_BIT_DEPTH_CONTROL);
+
+			lvtma_bit_depth_control &= ~AVIVO_LVTMA_BIT_DEPTH_CONTROL_TRUNCATE_EN;
+			lvtma_bit_depth_control &= ~AVIVO_LVTMA_BIT_DEPTH_CONTROL_SPATIAL_DITHER_EN;
+
+			WREG32(AVIVO_LVTMA_BIT_DEPTH_CONTROL, lvtma_bit_depth_control);
+		}
+	}
+
+	/* set scaler clears this on some chips */
+	if (ASIC_IS_AVIVO(rdev) && (mode->flags & DRM_MODE_FLAG_INTERLACE))
+		WREG32(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset, AVIVO_D1MODE_INTERLEAVE_EN);
+}
+
+static void
+radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+
+	if (radeon_encoder->enc_priv) {
+		struct radeon_encoder_atom_dig *dig;
+
+		dig = radeon_encoder->enc_priv;
+		dig->dig_block = radeon_crtc->crtc_id;
+	}
+	radeon_encoder->pixel_clock = adjusted_mode->clock;
+
+	radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+	atombios_overscan_setup(encoder, mode, adjusted_mode);
+	atombios_scaler_setup(encoder);
+	atombios_set_encoder_crtc_source(encoder);
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		if (radeon_encoder->devices & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT))
+			atombios_yuv_setup(encoder, true);
+		else
+			atombios_yuv_setup(encoder, false);
+	}
+
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+		atombios_digital_setup(encoder, PANEL_ENCODER_ACTION_ENABLE);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+		/* disable the encoder and transmitter */
+		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE);
+		atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
+
+		/* setup and enable the encoder and transmitter */
+		atombios_dig_encoder_setup(encoder, ATOM_ENABLE);
+		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP);
+		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DDI:
+		atombios_ddia_setup(encoder, ATOM_ENABLE);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+		atombios_external_tmds_setup(encoder, ATOM_ENABLE);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		atombios_dac_setup(encoder, ATOM_ENABLE);
+		if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT))
+			atombios_tv_setup(encoder, ATOM_ENABLE);
+		break;
+	}
+	atombios_apply_encoder_quirks(encoder, adjusted_mode);
+}
+
+static bool
+atombios_dac_load_detect(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+
+	if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT |
+				       ATOM_DEVICE_CV_SUPPORT |
+				       ATOM_DEVICE_CRT_SUPPORT)) {
+		DAC_LOAD_DETECTION_PS_ALLOCATION args;
+		int index = GetIndexIntoMasterTable(COMMAND, DAC_LoadDetection);
+		uint8_t frev, crev;
+
+		memset(&args, 0, sizeof(args));
+
+		atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
+
+		args.sDacload.ucMisc = 0;
+
+		if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DAC1) ||
+		    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1))
+			args.sDacload.ucDacType = ATOM_DAC_A;
+		else
+			args.sDacload.ucDacType = ATOM_DAC_B;
+
+		if (radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT)
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CRT1_SUPPORT);
+		else if (radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT)
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CRT2_SUPPORT);
+		else if (radeon_encoder->devices & ATOM_DEVICE_CV_SUPPORT) {
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CV_SUPPORT);
+			if (crev >= 3)
+				args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
+		} else if (radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) {
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_TV1_SUPPORT);
+			if (crev >= 3)
+				args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
+		}
+
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+		return true;
+	} else
+		return false;
+}
+
+static enum drm_connector_status
+radeon_atom_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t bios_0_scratch;
+
+	if (!atombios_dac_load_detect(encoder)) {
+		DRM_DEBUG("detect returned false \n");
+		return connector_status_unknown;
+	}
+
+	if (rdev->family >= CHIP_R600)
+		bios_0_scratch = RREG32(R600_BIOS_0_SCRATCH);
+	else
+		bios_0_scratch = RREG32(RADEON_BIOS_0_SCRATCH);
+
+	DRM_DEBUG("Bios 0 scratch %x\n", bios_0_scratch);
+	if (radeon_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+		if (bios_0_scratch & ATOM_S0_CRT1_MASK)
+			return connector_status_connected;
+	} else if (radeon_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+		if (bios_0_scratch & ATOM_S0_CRT2_MASK)
+			return connector_status_connected;
+	} else if (radeon_encoder->devices & ATOM_DEVICE_CV_SUPPORT) {
+		if (bios_0_scratch & (ATOM_S0_CV_MASK|ATOM_S0_CV_MASK_A))
+			return connector_status_connected;
+	} else if (radeon_encoder->devices & ATOM_DEVICE_TV1_SUPPORT) {
+		if (bios_0_scratch & (ATOM_S0_TV1_COMPOSITE | ATOM_S0_TV1_COMPOSITE_A))
+			return connector_status_connected; /* CTV */
+		else if (bios_0_scratch & (ATOM_S0_TV1_SVIDEO | ATOM_S0_TV1_SVIDEO_A))
+			return connector_status_connected; /* STV */
+	}
+	return connector_status_disconnected;
+}
+
+static void radeon_atom_encoder_prepare(struct drm_encoder *encoder)
+{
+	radeon_atom_output_lock(encoder, true);
+	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_atom_encoder_commit(struct drm_encoder *encoder)
+{
+	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+	radeon_atom_output_lock(encoder, false);
+}
+
+static const struct drm_encoder_helper_funcs radeon_atom_dig_helper_funcs = {
+	.dpms = radeon_atom_encoder_dpms,
+	.mode_fixup = radeon_atom_mode_fixup,
+	.prepare = radeon_atom_encoder_prepare,
+	.mode_set = radeon_atom_encoder_mode_set,
+	.commit = radeon_atom_encoder_commit,
+	/* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs radeon_atom_dac_helper_funcs = {
+	.dpms = radeon_atom_encoder_dpms,
+	.mode_fixup = radeon_atom_mode_fixup,
+	.prepare = radeon_atom_encoder_prepare,
+	.mode_set = radeon_atom_encoder_mode_set,
+	.commit = radeon_atom_encoder_commit,
+	.detect = radeon_atom_dac_detect,
+};
+
+void radeon_enc_destroy(struct drm_encoder *encoder)
+{
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	kfree(radeon_encoder->enc_priv);
+	drm_encoder_cleanup(encoder);
+	kfree(radeon_encoder);
+}
+
+static const struct drm_encoder_funcs radeon_atom_enc_funcs = {
+	.destroy = radeon_enc_destroy,
+};
+
+struct radeon_encoder_atom_dig *
+radeon_atombios_set_dig_info(struct radeon_encoder *radeon_encoder)
+{
+	struct radeon_encoder_atom_dig *dig = kzalloc(sizeof(struct radeon_encoder_atom_dig), GFP_KERNEL);
+
+	if (!dig)
+		return NULL;
+
+	/* coherent mode by default */
+	dig->coherent_mode = true;
+
+	return dig;
+}
+
+void
+radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t supported_device)
+{
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+
+	/* see if we already added it */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		radeon_encoder = to_radeon_encoder(encoder);
+		if (radeon_encoder->encoder_id == encoder_id) {
+			radeon_encoder->devices |= supported_device;
+			return;
+		}
+
+	}
+
+	/* add a new one */
+	radeon_encoder = kzalloc(sizeof(struct radeon_encoder), GFP_KERNEL);
+	if (!radeon_encoder)
+		return;
+
+	encoder = &radeon_encoder->base;
+	encoder->possible_crtcs = 0x3;
+	encoder->possible_clones = 0;
+
+	radeon_encoder->enc_priv = NULL;
+
+	radeon_encoder->encoder_id = encoder_id;
+	radeon_encoder->devices = supported_device;
+
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+			radeon_encoder->rmx_type = RMX_FULL;
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
+		} else {
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
+		}
+		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+		drm_encoder_helper_add(encoder, &radeon_atom_dac_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TVDAC);
+		drm_encoder_helper_add(encoder, &radeon_atom_dac_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_DDI:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
+		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
+		break;
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
new file mode 100644
index 00000000000..fa86d398945
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -0,0 +1,825 @@
+/*
+ * Copyright © 2007 David Airlie
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     David Airlie
+ */
+    /*
+     *  Modularization
+     */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+struct radeon_fb_device {
+	struct radeon_device		*rdev;
+	struct drm_display_mode		*mode;
+	struct radeon_framebuffer	*rfb;
+	int				crtc_count;
+	/* crtc currently bound to this */
+	uint32_t			crtc_ids[2];
+};
+
+static int radeonfb_setcolreg(unsigned regno,
+			      unsigned red,
+			      unsigned green,
+			      unsigned blue,
+			      unsigned transp,
+			      struct fb_info *info)
+{
+	struct radeon_fb_device *rfbdev = info->par;
+	struct drm_device *dev = rfbdev->rdev->ddev;
+	struct drm_crtc *crtc;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+		struct drm_mode_set *modeset = &radeon_crtc->mode_set;
+		struct drm_framebuffer *fb = modeset->fb;
+
+		for (i = 0; i < rfbdev->crtc_count; i++) {
+			if (crtc->base.id == rfbdev->crtc_ids[i]) {
+				break;
+			}
+		}
+		if (i == rfbdev->crtc_count) {
+			continue;
+		}
+		if (regno > 255) {
+			return 1;
+		}
+		if (fb->depth == 8) {
+			radeon_crtc_fb_gamma_set(crtc, red, green, blue, regno);
+			return 0;
+		}
+
+		if (regno < 16) {
+			switch (fb->depth) {
+			case 15:
+				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
+					((green & 0xf800) >>  6) |
+					((blue & 0xf800) >> 11);
+				break;
+			case 16:
+				fb->pseudo_palette[regno] = (red & 0xf800) |
+					((green & 0xfc00) >>  5) |
+					((blue  & 0xf800) >> 11);
+				break;
+			case 24:
+			case 32:
+				fb->pseudo_palette[regno] = ((red & 0xff00) << 8) |
+					(green & 0xff00) |
+					((blue  & 0xff00) >> 8);
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+static int radeonfb_check_var(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct radeon_fb_device *rfbdev = info->par;
+	struct radeon_framebuffer *rfb = rfbdev->rfb;
+	struct drm_framebuffer *fb = &rfb->base;
+	int depth;
+
+	if (var->pixclock == -1 || !var->pixclock) {
+		return -EINVAL;
+	}
+	/* Need to resize the fb object !!! */
+	if (var->xres > fb->width || var->yres > fb->height) {
+		DRM_ERROR("Requested width/height is greater than current fb "
+			   "object %dx%d > %dx%d\n", var->xres, var->yres,
+			   fb->width, fb->height);
+		DRM_ERROR("Need resizing code.\n");
+		return -EINVAL;
+	}
+
+	switch (var->bits_per_pixel) {
+	case 16:
+		depth = (var->green.length == 6) ? 16 : 15;
+		break;
+	case 32:
+		depth = (var->transp.length > 0) ? 32 : 24;
+		break;
+	default:
+		depth = var->bits_per_pixel;
+		break;
+	}
+
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.length = 1;
+		var->transp.offset = 15;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 24;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* this will let fbcon do the mode init */
+static int radeonfb_set_par(struct fb_info *info)
+{
+	struct radeon_fb_device *rfbdev = info->par;
+	struct drm_device *dev = rfbdev->rdev->ddev;
+	struct fb_var_screeninfo *var = &info->var;
+	struct drm_crtc *crtc;
+	int ret;
+	int i;
+
+	if (var->pixclock != -1) {
+		DRM_ERROR("PIXEL CLCOK SET\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
+		for (i = 0; i < rfbdev->crtc_count; i++) {
+			if (crtc->base.id == rfbdev->crtc_ids[i]) {
+				break;
+			}
+		}
+		if (i == rfbdev->crtc_count) {
+			continue;
+		}
+		if (crtc->fb == radeon_crtc->mode_set.fb) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(&radeon_crtc->mode_set);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (ret) {
+				return ret;
+			}
+		}
+	}
+	return 0;
+}
+
+static int radeonfb_pan_display(struct fb_var_screeninfo *var,
+				struct fb_info *info)
+{
+	struct radeon_fb_device *rfbdev = info->par;
+	struct drm_device *dev = rfbdev->rdev->ddev;
+	struct drm_mode_set *modeset;
+	struct drm_crtc *crtc;
+	struct radeon_crtc *radeon_crtc;
+	int ret = 0;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		for (i = 0; i < rfbdev->crtc_count; i++) {
+			if (crtc->base.id == rfbdev->crtc_ids[i]) {
+				break;
+			}
+		}
+
+		if (i == rfbdev->crtc_count) {
+			continue;
+		}
+
+		radeon_crtc = to_radeon_crtc(crtc);
+		modeset = &radeon_crtc->mode_set;
+
+		modeset->x = var->xoffset;
+		modeset->y = var->yoffset;
+
+		if (modeset->num_connectors) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(modeset);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (!ret) {
+				info->var.xoffset = var->xoffset;
+				info->var.yoffset = var->yoffset;
+			}
+		}
+	}
+	return ret;
+}
+
+static void radeonfb_on(struct fb_info *info)
+{
+	struct radeon_fb_device *rfbdev = info->par;
+	struct drm_device *dev = rfbdev->rdev->ddev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, find all associated encoders
+	 * and turn them off, then turn off the CRTC.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < rfbdev->crtc_count; i++) {
+			if (crtc->base.id == rfbdev->crtc_ids[i]) {
+				break;
+			}
+		}
+
+		mutex_lock(&dev->mode_config.mutex);
+		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+		mutex_unlock(&dev->mode_config.mutex);
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+
+				encoder_funcs = encoder->helper_private;
+				mutex_lock(&dev->mode_config.mutex);
+				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+				mutex_unlock(&dev->mode_config.mutex);
+			}
+		}
+	}
+}
+
+static void radeonfb_off(struct fb_info *info, int dpms_mode)
+{
+	struct radeon_fb_device *rfbdev = info->par;
+	struct drm_device *dev = rfbdev->rdev->ddev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, find all associated encoders
+	 * and turn them off, then turn off the CRTC.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < rfbdev->crtc_count; i++) {
+			if (crtc->base.id == rfbdev->crtc_ids[i]) {
+				break;
+			}
+		}
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+
+				encoder_funcs = encoder->helper_private;
+				mutex_lock(&dev->mode_config.mutex);
+				encoder_funcs->dpms(encoder, dpms_mode);
+				mutex_unlock(&dev->mode_config.mutex);
+			}
+		}
+		if (dpms_mode == DRM_MODE_DPMS_OFF) {
+			mutex_lock(&dev->mode_config.mutex);
+			crtc_funcs->dpms(crtc, dpms_mode);
+			mutex_unlock(&dev->mode_config.mutex);
+		}
+	}
+}
+
+int radeonfb_blank(int blank, struct fb_info *info)
+{
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		radeonfb_on(info);
+		break;
+	case FB_BLANK_NORMAL:
+		radeonfb_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		radeonfb_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		radeonfb_off(info, DRM_MODE_DPMS_SUSPEND);
+		break;
+	case FB_BLANK_POWERDOWN:
+		radeonfb_off(info, DRM_MODE_DPMS_OFF);
+		break;
+	}
+	return 0;
+}
+
+static struct fb_ops radeonfb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = radeonfb_check_var,
+	.fb_set_par = radeonfb_set_par,
+	.fb_setcolreg = radeonfb_setcolreg,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+	.fb_pan_display = radeonfb_pan_display,
+	.fb_blank = radeonfb_blank,
+};
+
+/**
+ * Curretly it is assumed that the old framebuffer is reused.
+ *
+ * LOCKING
+ * caller should hold the mode config lock.
+ *
+ */
+int radeonfb_resize(struct drm_device *dev, struct drm_crtc *crtc)
+{
+	struct fb_info *info;
+	struct drm_framebuffer *fb;
+	struct drm_display_mode *mode = crtc->desired_mode;
+
+	fb = crtc->fb;
+	if (fb == NULL) {
+		return 1;
+	}
+	info = fb->fbdev;
+	if (info == NULL) {
+		return 1;
+	}
+	if (mode == NULL) {
+		return 1;
+	}
+	info->var.xres = mode->hdisplay;
+	info->var.right_margin = mode->hsync_start - mode->hdisplay;
+	info->var.hsync_len = mode->hsync_end - mode->hsync_start;
+	info->var.left_margin = mode->htotal - mode->hsync_end;
+	info->var.yres = mode->vdisplay;
+	info->var.lower_margin = mode->vsync_start - mode->vdisplay;
+	info->var.vsync_len = mode->vsync_end - mode->vsync_start;
+	info->var.upper_margin = mode->vtotal - mode->vsync_end;
+	info->var.pixclock = 10000000 / mode->htotal * 1000 / mode->vtotal * 100;
+	/* avoid overflow */
+	info->var.pixclock = info->var.pixclock * 1000 / mode->vrefresh;
+
+	return 0;
+}
+EXPORT_SYMBOL(radeonfb_resize);
+
+static struct drm_mode_set panic_mode;
+
+int radeonfb_panic(struct notifier_block *n, unsigned long ununsed,
+		  void *panic_str)
+{
+	DRM_ERROR("panic occurred, switching back to text console\n");
+	drm_crtc_helper_set_config(&panic_mode);
+	return 0;
+}
+EXPORT_SYMBOL(radeonfb_panic);
+
+static struct notifier_block paniced = {
+	.notifier_call = radeonfb_panic,
+};
+
+static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp)
+{
+	int aligned = width;
+	int align_large = (ASIC_IS_AVIVO(rdev));
+	int pitch_mask = 0;
+
+	switch (bpp / 8) {
+	case 1:
+		pitch_mask = align_large ? 255 : 127;
+		break;
+	case 2:
+		pitch_mask = align_large ? 127 : 31;
+		break;
+	case 3:
+	case 4:
+		pitch_mask = align_large ? 63 : 15;
+		break;
+	}
+
+	aligned += pitch_mask;
+	aligned &= ~pitch_mask;
+	return aligned;
+}
+
+int radeonfb_create(struct radeon_device *rdev,
+		    uint32_t fb_width, uint32_t fb_height,
+		    uint32_t surface_width, uint32_t surface_height,
+		    struct radeon_framebuffer **rfb_p)
+{
+	struct fb_info *info;
+	struct radeon_fb_device *rfbdev;
+	struct drm_framebuffer *fb;
+	struct radeon_framebuffer *rfb;
+	struct drm_mode_fb_cmd mode_cmd;
+	struct drm_gem_object *gobj = NULL;
+	struct radeon_object *robj = NULL;
+	struct device *device = &rdev->pdev->dev;
+	int size, aligned_size, ret;
+	void *fbptr = NULL;
+
+	mode_cmd.width = surface_width;
+	mode_cmd.height = surface_height;
+	mode_cmd.bpp = 32;
+	/* need to align pitch with crtc limits */
+	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp) * ((mode_cmd.bpp + 1) / 8);
+	mode_cmd.depth = 24;
+
+	size = mode_cmd.pitch * mode_cmd.height;
+	aligned_size = ALIGN(size, PAGE_SIZE);
+
+	ret = radeon_gem_object_create(rdev, aligned_size, 0,
+				       RADEON_GEM_DOMAIN_VRAM,
+				       false, ttm_bo_type_kernel,
+				       false, &gobj);
+	if (ret) {
+		printk(KERN_ERR "failed to allocate framebuffer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+	robj = gobj->driver_private;
+
+	mutex_lock(&rdev->ddev->struct_mutex);
+	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
+	if (fb == NULL) {
+		DRM_ERROR("failed to allocate fb.\n");
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+
+	list_add(&fb->filp_head, &rdev->ddev->mode_config.fb_kernel_list);
+
+	rfb = to_radeon_framebuffer(fb);
+	*rfb_p = rfb;
+	rdev->fbdev_rfb = rfb;
+
+	info = framebuffer_alloc(sizeof(struct radeon_fb_device), device);
+	if (info == NULL) {
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+	rfbdev = info->par;
+
+	ret = radeon_object_kmap(robj, &fbptr);
+	if (ret) {
+		goto out_unref;
+	}
+
+	strcpy(info->fix.id, "radeondrmfb");
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 1; /* doing it in hw */
+	info->fix.ypanstep = 1; /* doing it in hw */
+	info->fix.ywrapstep = 0;
+	info->fix.accel = FB_ACCEL_I830;
+	info->fix.type_aux = 0;
+	info->flags = FBINFO_DEFAULT;
+	info->fbops = &radeonfb_ops;
+	info->fix.line_length = fb->pitch;
+	info->screen_base = fbptr;
+	info->fix.smem_start = (unsigned long)fbptr;
+	info->fix.smem_len = size;
+	info->screen_base = fbptr;
+	info->screen_size = size;
+	info->pseudo_palette = fb->pseudo_palette;
+	info->var.xres_virtual = fb->width;
+	info->var.yres_virtual = fb->height;
+	info->var.bits_per_pixel = fb->bits_per_pixel;
+	info->var.xoffset = 0;
+	info->var.yoffset = 0;
+	info->var.activate = FB_ACTIVATE_NOW;
+	info->var.height = -1;
+	info->var.width = -1;
+	info->var.xres = fb_width;
+	info->var.yres = fb_height;
+	info->fix.mmio_start = pci_resource_start(rdev->pdev, 2);
+	info->fix.mmio_len = pci_resource_len(rdev->pdev, 2);
+	info->pixmap.size = 64*1024;
+	info->pixmap.buf_align = 8;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+	info->pixmap.scan_align = 1;
+	if (info->screen_base == NULL) {
+		ret = -ENOSPC;
+		goto out_unref;
+	}
+	DRM_INFO("fb mappable at 0x%lX\n",  info->fix.smem_start);
+	DRM_INFO("vram apper at 0x%lX\n",  (unsigned long)rdev->mc.aper_base);
+	DRM_INFO("size %lu\n", (unsigned long)size);
+	DRM_INFO("fb depth is %d\n", fb->depth);
+	DRM_INFO("   pitch is %d\n", fb->pitch);
+
+	switch (fb->depth) {
+	case 8:
+		info->var.red.offset = 0;
+		info->var.green.offset = 0;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8; /* 8bit DAC */
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 15:
+		info->var.red.offset = 10;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 5;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 15;
+		info->var.transp.length = 1;
+		break;
+	case 16:
+		info->var.red.offset = 11;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 6;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 0;
+		break;
+	case 24:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 32:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 24;
+		info->var.transp.length = 8;
+		break;
+	default:
+		break;
+	}
+
+	fb->fbdev = info;
+	rfbdev->rfb = rfb;
+	rfbdev->rdev = rdev;
+
+	mutex_unlock(&rdev->ddev->struct_mutex);
+	return 0;
+
+out_unref:
+	if (robj) {
+		radeon_object_kunmap(robj);
+	}
+	if (ret) {
+		list_del(&fb->filp_head);
+		drm_gem_object_unreference(gobj);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+	}
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&rdev->ddev->struct_mutex);
+out:
+	return ret;
+}
+
+static int radeonfb_single_fb_probe(struct radeon_device *rdev)
+{
+	struct drm_crtc *crtc;
+	struct drm_connector *connector;
+	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
+	unsigned int surface_width = 0, surface_height = 0;
+	int new_fb = 0;
+	int crtc_count = 0;
+	int ret, i, conn_count = 0;
+	struct radeon_framebuffer *rfb;
+	struct fb_info *info;
+	struct radeon_fb_device *rfbdev;
+	struct drm_mode_set *modeset = NULL;
+
+	/* first up get a count of crtcs now in use and new min/maxes width/heights */
+	list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) {
+		if (drm_helper_crtc_in_use(crtc)) {
+			if (crtc->desired_mode) {
+				if (crtc->desired_mode->hdisplay < fb_width)
+					fb_width = crtc->desired_mode->hdisplay;
+
+				if (crtc->desired_mode->vdisplay < fb_height)
+					fb_height = crtc->desired_mode->vdisplay;
+
+				if (crtc->desired_mode->hdisplay > surface_width)
+					surface_width = crtc->desired_mode->hdisplay;
+
+				if (crtc->desired_mode->vdisplay > surface_height)
+					surface_height = crtc->desired_mode->vdisplay;
+			}
+			crtc_count++;
+		}
+	}
+
+	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
+		/* hmm everyone went away - assume VGA cable just fell out
+		   and will come back later. */
+		return 0;
+	}
+
+	/* do we have an fb already? */
+	if (list_empty(&rdev->ddev->mode_config.fb_kernel_list)) {
+		/* create an fb if we don't have one */
+		ret = radeonfb_create(rdev, fb_width, fb_height, surface_width, surface_height, &rfb);
+		if (ret) {
+			return -EINVAL;
+		}
+		new_fb = 1;
+	} else {
+		struct drm_framebuffer *fb;
+		fb = list_first_entry(&rdev->ddev->mode_config.fb_kernel_list, struct drm_framebuffer, filp_head);
+		rfb = to_radeon_framebuffer(fb);
+
+		/* if someone hotplugs something bigger than we have already allocated, we are pwned.
+		   As really we can't resize an fbdev that is in the wild currently due to fbdev
+		   not really being designed for the lower layers moving stuff around under it.
+		   - so in the grand style of things - punt. */
+		if ((fb->width < surface_width) || (fb->height < surface_height)) {
+			DRM_ERROR("Framebuffer not large enough to scale console onto.\n");
+			return -EINVAL;
+		}
+	}
+
+	info = rfb->base.fbdev;
+	rdev->fbdev_info = info;
+	rfbdev = info->par;
+
+	crtc_count = 0;
+	/* okay we need to setup new connector sets in the crtcs */
+	list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+		modeset = &radeon_crtc->mode_set;
+		modeset->fb = &rfb->base;
+		conn_count = 0;
+		list_for_each_entry(connector, &rdev->ddev->mode_config.connector_list, head) {
+			if (connector->encoder)
+				if (connector->encoder->crtc == modeset->crtc) {
+					modeset->connectors[conn_count] = connector;
+					conn_count++;
+					if (conn_count > RADEONFB_CONN_LIMIT)
+						BUG();
+				}
+		}
+
+		for (i = conn_count; i < RADEONFB_CONN_LIMIT; i++)
+			modeset->connectors[i] = NULL;
+
+
+		rfbdev->crtc_ids[crtc_count++] = crtc->base.id;
+
+		modeset->num_connectors = conn_count;
+		if (modeset->crtc->desired_mode) {
+			if (modeset->mode) {
+				drm_mode_destroy(rdev->ddev, modeset->mode);
+			}
+			modeset->mode = drm_mode_duplicate(rdev->ddev,
+							   modeset->crtc->desired_mode);
+		}
+	}
+	rfbdev->crtc_count = crtc_count;
+
+	if (new_fb) {
+		info->var.pixclock = -1;
+		if (register_framebuffer(info) < 0)
+			return -EINVAL;
+	} else {
+		radeonfb_set_par(info);
+	}
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+	       info->fix.id);
+
+	/* Switch back to kernel console on panic */
+	panic_mode = *modeset;
+	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
+	printk(KERN_INFO "registered panic notifier\n");
+
+	return 0;
+}
+
+int radeonfb_probe(struct drm_device *dev)
+{
+	int ret;
+
+	/* something has changed in the lower levels of hell - deal with it
+	   here */
+
+	/* two modes : a) 1 fb to rule all crtcs.
+	               b) one fb per crtc.
+	   two actions 1) new connected device
+	               2) device removed.
+	   case a/1 : if the fb surface isn't big enough - resize the surface fb.
+	              if the fb size isn't big enough - resize fb into surface.
+		      if everything big enough configure the new crtc/etc.
+	   case a/2 : undo the configuration
+	              possibly resize down the fb to fit the new configuration.
+           case b/1 : see if it is on a new crtc - setup a new fb and add it.
+	   case b/2 : teardown the new fb.
+	*/
+	ret = radeonfb_single_fb_probe(dev->dev_private);
+	return ret;
+}
+EXPORT_SYMBOL(radeonfb_probe);
+
+int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
+{
+	struct fb_info *info;
+	struct radeon_framebuffer *rfb = to_radeon_framebuffer(fb);
+	struct radeon_object *robj;
+
+	if (!fb) {
+		return -EINVAL;
+	}
+	info = fb->fbdev;
+	if (info) {
+		robj = rfb->obj->driver_private;
+		unregister_framebuffer(info);
+		radeon_object_kunmap(robj);
+		framebuffer_release(info);
+	}
+
+	printk(KERN_INFO "unregistered panic notifier\n");
+	atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
+	memset(&panic_mode, 0, sizeof(struct drm_mode_set));
+	return 0;
+}
+EXPORT_SYMBOL(radeonfb_remove);
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
new file mode 100644
index 00000000000..96afbf5ae2a
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ *    Dave Airlie
+ */
+#include <linux/seq_file.h>
+#include <asm/atomic.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
+{
+	unsigned long irq_flags;
+
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	if (fence->emited) {
+		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+		return 0;
+	}
+	fence->seq = atomic_add_return(1, &rdev->fence_drv.seq);
+	if (!rdev->cp.ready) {
+		/* FIXME: cp is not running assume everythings is done right
+		 * away
+		 */
+		WREG32(rdev->fence_drv.scratch_reg, fence->seq);
+	} else {
+		radeon_fence_ring_emit(rdev, fence);
+	}
+	fence->emited = true;
+	fence->timeout = jiffies + ((2000 * HZ) / 1000);
+	list_del(&fence->list);
+	list_add_tail(&fence->list, &rdev->fence_drv.emited);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	return 0;
+}
+
+static bool radeon_fence_poll_locked(struct radeon_device *rdev)
+{
+	struct radeon_fence *fence;
+	struct list_head *i, *n;
+	uint32_t seq;
+	bool wake = false;
+
+	if (rdev == NULL) {
+		return true;
+	}
+	if (rdev->shutdown) {
+		return true;
+	}
+	seq = RREG32(rdev->fence_drv.scratch_reg);
+	rdev->fence_drv.last_seq = seq;
+	n = NULL;
+	list_for_each(i, &rdev->fence_drv.emited) {
+		fence = list_entry(i, struct radeon_fence, list);
+		if (fence->seq == seq) {
+			n = i;
+			break;
+		}
+	}
+	/* all fence previous to this one are considered as signaled */
+	if (n) {
+		i = n;
+		do {
+			n = i->prev;
+			list_del(i);
+			list_add_tail(i, &rdev->fence_drv.signaled);
+			fence = list_entry(i, struct radeon_fence, list);
+			fence->signaled = true;
+			i = n;
+		} while (i != &rdev->fence_drv.emited);
+		wake = true;
+	}
+	return wake;
+}
+
+static void radeon_fence_destroy(struct kref *kref)
+{
+	unsigned long irq_flags;
+        struct radeon_fence *fence;
+
+	fence = container_of(kref, struct radeon_fence, kref);
+	write_lock_irqsave(&fence->rdev->fence_drv.lock, irq_flags);
+	list_del(&fence->list);
+	fence->emited = false;
+	write_unlock_irqrestore(&fence->rdev->fence_drv.lock, irq_flags);
+	kfree(fence);
+}
+
+int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence)
+{
+	unsigned long irq_flags;
+
+	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
+	if ((*fence) == NULL) {
+		return -ENOMEM;
+	}
+	kref_init(&((*fence)->kref));
+	(*fence)->rdev = rdev;
+	(*fence)->emited = false;
+	(*fence)->signaled = false;
+	(*fence)->seq = 0;
+	INIT_LIST_HEAD(&(*fence)->list);
+
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	list_add_tail(&(*fence)->list, &rdev->fence_drv.created);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	return 0;
+}
+
+
+bool radeon_fence_signaled(struct radeon_fence *fence)
+{
+	struct radeon_device *rdev = fence->rdev;
+	unsigned long irq_flags;
+	bool signaled = false;
+
+	if (rdev->gpu_lockup) {
+		return true;
+	}
+	if (fence == NULL) {
+		return true;
+	}
+	write_lock_irqsave(&fence->rdev->fence_drv.lock, irq_flags);
+	signaled = fence->signaled;
+	/* if we are shuting down report all fence as signaled */
+	if (fence->rdev->shutdown) {
+		signaled = true;
+	}
+	if (!fence->emited) {
+		WARN(1, "Querying an unemited fence : %p !\n", fence);
+		signaled = true;
+	}
+	if (!signaled) {
+		radeon_fence_poll_locked(fence->rdev);
+		signaled = fence->signaled;
+	}
+	write_unlock_irqrestore(&fence->rdev->fence_drv.lock, irq_flags);
+	return signaled;
+}
+
+int radeon_fence_wait(struct radeon_fence *fence, bool interruptible)
+{
+	struct radeon_device *rdev;
+	unsigned long cur_jiffies;
+	unsigned long timeout;
+	bool expired = false;
+	int r;
+
+
+	if (fence == NULL) {
+		WARN(1, "Querying an invalid fence : %p !\n", fence);
+		return 0;
+	}
+	rdev = fence->rdev;
+	if (radeon_fence_signaled(fence)) {
+		return 0;
+	}
+retry:
+	cur_jiffies = jiffies;
+	timeout = HZ / 100;
+	if (time_after(fence->timeout, cur_jiffies)) {
+		timeout = fence->timeout - cur_jiffies;
+	}
+	if (interruptible) {
+		r = wait_event_interruptible_timeout(rdev->fence_drv.queue,
+				radeon_fence_signaled(fence), timeout);
+		if (unlikely(r == -ERESTARTSYS)) {
+			return -ERESTART;
+		}
+	} else {
+		r = wait_event_timeout(rdev->fence_drv.queue,
+			 radeon_fence_signaled(fence), timeout);
+	}
+	if (unlikely(!radeon_fence_signaled(fence))) {
+		if (unlikely(r == 0)) {
+			expired = true;
+		}
+		if (unlikely(expired)) {
+			timeout = 1;
+			if (time_after(cur_jiffies, fence->timeout)) {
+				timeout = cur_jiffies - fence->timeout;
+			}
+			timeout = jiffies_to_msecs(timeout);
+			if (timeout > 500) {
+				DRM_ERROR("fence(%p:0x%08X) %lums timeout "
+					  "going to reset GPU\n",
+					  fence, fence->seq, timeout);
+				radeon_gpu_reset(rdev);
+				WREG32(rdev->fence_drv.scratch_reg, fence->seq);
+			}
+		}
+		goto retry;
+	}
+	if (unlikely(expired)) {
+		rdev->fence_drv.count_timeout++;
+		cur_jiffies = jiffies;
+		timeout = 1;
+		if (time_after(cur_jiffies, fence->timeout)) {
+			timeout = cur_jiffies - fence->timeout;
+		}
+		timeout = jiffies_to_msecs(timeout);
+		DRM_ERROR("fence(%p:0x%08X) %lums timeout\n",
+			  fence, fence->seq, timeout);
+		DRM_ERROR("last signaled fence(0x%08X)\n",
+			  rdev->fence_drv.last_seq);
+	}
+	return 0;
+}
+
+int radeon_fence_wait_next(struct radeon_device *rdev)
+{
+	unsigned long irq_flags;
+	struct radeon_fence *fence;
+	int r;
+
+	if (rdev->gpu_lockup) {
+		return 0;
+	}
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	if (list_empty(&rdev->fence_drv.emited)) {
+		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+		return 0;
+	}
+	fence = list_entry(rdev->fence_drv.emited.next,
+			   struct radeon_fence, list);
+	radeon_fence_ref(fence);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	r = radeon_fence_wait(fence, false);
+	radeon_fence_unref(&fence);
+	return r;
+}
+
+int radeon_fence_wait_last(struct radeon_device *rdev)
+{
+	unsigned long irq_flags;
+	struct radeon_fence *fence;
+	int r;
+
+	if (rdev->gpu_lockup) {
+		return 0;
+	}
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	if (list_empty(&rdev->fence_drv.emited)) {
+		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+		return 0;
+	}
+	fence = list_entry(rdev->fence_drv.emited.prev,
+			   struct radeon_fence, list);
+	radeon_fence_ref(fence);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	r = radeon_fence_wait(fence, false);
+	radeon_fence_unref(&fence);
+	return r;
+}
+
+struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+void radeon_fence_unref(struct radeon_fence **fence)
+{
+	struct radeon_fence *tmp = *fence;
+
+	*fence = NULL;
+	if (tmp) {
+		kref_put(&tmp->kref, &radeon_fence_destroy);
+	}
+}
+
+void radeon_fence_process(struct radeon_device *rdev)
+{
+	unsigned long irq_flags;
+	bool wake;
+
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	wake = radeon_fence_poll_locked(rdev);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	if (wake) {
+		wake_up_all(&rdev->fence_drv.queue);
+	}
+}
+
+int radeon_fence_driver_init(struct radeon_device *rdev)
+{
+	unsigned long irq_flags;
+	int r;
+
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	r = radeon_scratch_get(rdev, &rdev->fence_drv.scratch_reg);
+	if (r) {
+		DRM_ERROR("Fence failed to get a scratch register.");
+		write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+		return r;
+	}
+	WREG32(rdev->fence_drv.scratch_reg, 0);
+	atomic_set(&rdev->fence_drv.seq, 0);
+	INIT_LIST_HEAD(&rdev->fence_drv.created);
+	INIT_LIST_HEAD(&rdev->fence_drv.emited);
+	INIT_LIST_HEAD(&rdev->fence_drv.signaled);
+	rdev->fence_drv.count_timeout = 0;
+	init_waitqueue_head(&rdev->fence_drv.queue);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	if (radeon_debugfs_fence_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for fence !\n");
+	}
+	return 0;
+}
+
+void radeon_fence_driver_fini(struct radeon_device *rdev)
+{
+	unsigned long irq_flags;
+
+	wake_up_all(&rdev->fence_drv.queue);
+	write_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+	radeon_scratch_free(rdev, rdev->fence_drv.scratch_reg);
+	write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+	DRM_INFO("radeon: fence finalized\n");
+}
+
+
+/*
+ * Fence debugfs
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_fence *fence;
+
+	seq_printf(m, "Last signaled fence 0x%08X\n",
+		   RREG32(rdev->fence_drv.scratch_reg));
+	if (!list_empty(&rdev->fence_drv.emited)) {
+		   fence = list_entry(rdev->fence_drv.emited.prev,
+				      struct radeon_fence, list);
+		   seq_printf(m, "Last emited fence %p with 0x%08X\n",
+			      fence,  fence->seq);
+	}
+	return 0;
+}
+
+static struct drm_info_list radeon_debugfs_fence_list[] = {
+	{"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
+};
+#endif
+
+int radeon_debugfs_fence_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/radeon_fixed.h b/drivers/gpu/drm/radeon/radeon_fixed.h
new file mode 100644
index 00000000000..90187d17384
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_fixed.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ */
+#ifndef RADEON_FIXED_H
+#define RADEON_FIXED_H
+
+typedef union rfixed {
+	u32 full;
+} fixed20_12;
+
+
+#define rfixed_const(A) (u32)(((A) << 12))/*  + ((B + 0.000122)*4096)) */
+#define rfixed_const_half(A) (u32)(((A) << 12) + 2048)
+#define rfixed_const_666(A) (u32)(((A) << 12) + 2731)
+#define rfixed_const_8(A) (u32)(((A) << 12) + 3277)
+#define rfixed_mul(A, B) ((u64)((u64)(A).full * (B).full + 2048) >> 12)
+#define fixed_init(A) { .full = rfixed_const((A)) }
+#define fixed_init_half(A) { .full = rfixed_const_half((A)) }
+#define rfixed_trunc(A) ((A).full >> 12)
+
+static inline u32 rfixed_div(fixed20_12 A, fixed20_12 B)
+{
+	u64 tmp = ((u64)A.full << 13);
+
+	do_div(tmp, B.full);
+	tmp += 1;
+	tmp /= 2;
+	return lower_32_bits(tmp);
+}
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
new file mode 100644
index 00000000000..d343a15316e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+#include "radeon_reg.h"
+
+/*
+ * Common GART table functions.
+ */
+int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
+{
+	void *ptr;
+
+	ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size,
+				   &rdev->gart.table_addr);
+	if (ptr == NULL) {
+		return -ENOMEM;
+	}
+#ifdef CONFIG_X86
+	if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 ||
+	    rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
+		set_memory_uc((unsigned long)ptr,
+			      rdev->gart.table_size >> PAGE_SHIFT);
+	}
+#endif
+	rdev->gart.table.ram.ptr = ptr;
+	memset((void *)rdev->gart.table.ram.ptr, 0, rdev->gart.table_size);
+	return 0;
+}
+
+void radeon_gart_table_ram_free(struct radeon_device *rdev)
+{
+	if (rdev->gart.table.ram.ptr == NULL) {
+		return;
+	}
+#ifdef CONFIG_X86
+	if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 ||
+	    rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
+		set_memory_wb((unsigned long)rdev->gart.table.ram.ptr,
+			      rdev->gart.table_size >> PAGE_SHIFT);
+	}
+#endif
+	pci_free_consistent(rdev->pdev, rdev->gart.table_size,
+			    (void *)rdev->gart.table.ram.ptr,
+			    rdev->gart.table_addr);
+	rdev->gart.table.ram.ptr = NULL;
+	rdev->gart.table_addr = 0;
+}
+
+int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
+{
+	uint64_t gpu_addr;
+	int r;
+
+	if (rdev->gart.table.vram.robj == NULL) {
+		r = radeon_object_create(rdev, NULL,
+					 rdev->gart.table_size,
+					 true,
+					 RADEON_GEM_DOMAIN_VRAM,
+					 false, &rdev->gart.table.vram.robj);
+		if (r) {
+			return r;
+		}
+	}
+	r = radeon_object_pin(rdev->gart.table.vram.robj,
+			      RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
+	if (r) {
+		radeon_object_unref(&rdev->gart.table.vram.robj);
+		return r;
+	}
+	r = radeon_object_kmap(rdev->gart.table.vram.robj,
+			       (void **)&rdev->gart.table.vram.ptr);
+	if (r) {
+		radeon_object_unpin(rdev->gart.table.vram.robj);
+		radeon_object_unref(&rdev->gart.table.vram.robj);
+		DRM_ERROR("radeon: failed to map gart vram table.\n");
+		return r;
+	}
+	rdev->gart.table_addr = gpu_addr;
+	return 0;
+}
+
+void radeon_gart_table_vram_free(struct radeon_device *rdev)
+{
+	if (rdev->gart.table.vram.robj == NULL) {
+		return;
+	}
+	radeon_object_kunmap(rdev->gart.table.vram.robj);
+	radeon_object_unpin(rdev->gart.table.vram.robj);
+	radeon_object_unref(&rdev->gart.table.vram.robj);
+}
+
+
+
+
+/*
+ * Common gart functions.
+ */
+void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
+			int pages)
+{
+	unsigned t;
+	unsigned p;
+	int i, j;
+
+	if (!rdev->gart.ready) {
+		WARN(1, "trying to unbind memory to unitialized GART !\n");
+		return;
+	}
+	t = offset / 4096;
+	p = t / (PAGE_SIZE / 4096);
+	for (i = 0; i < pages; i++, p++) {
+		if (rdev->gart.pages[p]) {
+			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
+				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+			rdev->gart.pages[p] = NULL;
+			rdev->gart.pages_addr[p] = 0;
+			for (j = 0; j < (PAGE_SIZE / 4096); j++, t++) {
+				radeon_gart_set_page(rdev, t, 0);
+			}
+		}
+	}
+	mb();
+	radeon_gart_tlb_flush(rdev);
+}
+
+int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
+		     int pages, struct page **pagelist)
+{
+	unsigned t;
+	unsigned p;
+	uint64_t page_base;
+	int i, j;
+
+	if (!rdev->gart.ready) {
+		DRM_ERROR("trying to bind memory to unitialized GART !\n");
+		return -EINVAL;
+	}
+	t = offset / 4096;
+	p = t / (PAGE_SIZE / 4096);
+
+	for (i = 0; i < pages; i++, p++) {
+		/* we need to support large memory configurations */
+		/* assume that unbind have already been call on the range */
+		rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
+							0, PAGE_SIZE,
+							PCI_DMA_BIDIRECTIONAL);
+		if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
+			/* FIXME: failed to map page (return -ENOMEM?) */
+			radeon_gart_unbind(rdev, offset, pages);
+			return -ENOMEM;
+		}
+		rdev->gart.pages[p] = pagelist[i];
+		page_base = (uint32_t)rdev->gart.pages_addr[p];
+		for (j = 0; j < (PAGE_SIZE / 4096); j++, t++) {
+			radeon_gart_set_page(rdev, t, page_base);
+			page_base += 4096;
+		}
+	}
+	mb();
+	radeon_gart_tlb_flush(rdev);
+	return 0;
+}
+
+int radeon_gart_init(struct radeon_device *rdev)
+{
+	if (rdev->gart.pages) {
+		return 0;
+	}
+	/* We need PAGE_SIZE >= 4096 */
+	if (PAGE_SIZE < 4096) {
+		DRM_ERROR("Page size is smaller than GPU page size!\n");
+		return -EINVAL;
+	}
+	/* Compute table size */
+	rdev->gart.num_cpu_pages = rdev->mc.gtt_size / PAGE_SIZE;
+	rdev->gart.num_gpu_pages = rdev->mc.gtt_size / 4096;
+	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
+		 rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages);
+	/* Allocate pages table */
+	rdev->gart.pages = kzalloc(sizeof(void *) * rdev->gart.num_cpu_pages,
+				   GFP_KERNEL);
+	if (rdev->gart.pages == NULL) {
+		radeon_gart_fini(rdev);
+		return -ENOMEM;
+	}
+	rdev->gart.pages_addr = kzalloc(sizeof(dma_addr_t) *
+					rdev->gart.num_cpu_pages, GFP_KERNEL);
+	if (rdev->gart.pages_addr == NULL) {
+		radeon_gart_fini(rdev);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void radeon_gart_fini(struct radeon_device *rdev)
+{
+	if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
+		/* unbind pages */
+		radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages);
+	}
+	rdev->gart.ready = false;
+	kfree(rdev->gart.pages);
+	kfree(rdev->gart.pages_addr);
+	rdev->gart.pages = NULL;
+	rdev->gart.pages_addr = NULL;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
new file mode 100644
index 00000000000..eb516034235
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+int radeon_gem_object_init(struct drm_gem_object *obj)
+{
+	/* we do nothings here */
+	return 0;
+}
+
+void radeon_gem_object_free(struct drm_gem_object *gobj)
+{
+	struct radeon_object *robj = gobj->driver_private;
+
+	gobj->driver_private = NULL;
+	if (robj) {
+		radeon_object_unref(&robj);
+	}
+}
+
+int radeon_gem_object_create(struct radeon_device *rdev, int size,
+			     int alignment, int initial_domain,
+			     bool discardable, bool kernel,
+			     bool interruptible,
+			     struct drm_gem_object **obj)
+{
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r;
+
+	*obj = NULL;
+	gobj = drm_gem_object_alloc(rdev->ddev, size);
+	if (!gobj) {
+		return -ENOMEM;
+	}
+	/* At least align on page size */
+	if (alignment < PAGE_SIZE) {
+		alignment = PAGE_SIZE;
+	}
+	r = radeon_object_create(rdev, gobj, size, kernel, initial_domain,
+				 interruptible, &robj);
+	if (r) {
+		DRM_ERROR("Failed to allocate GEM object (%d, %d, %u)\n",
+			  size, initial_domain, alignment);
+		mutex_lock(&rdev->ddev->struct_mutex);
+		drm_gem_object_unreference(gobj);
+		mutex_unlock(&rdev->ddev->struct_mutex);
+		return r;
+	}
+	gobj->driver_private = robj;
+	*obj = gobj;
+	return 0;
+}
+
+int radeon_gem_object_pin(struct drm_gem_object *obj, uint32_t pin_domain,
+			  uint64_t *gpu_addr)
+{
+	struct radeon_object *robj = obj->driver_private;
+	uint32_t flags;
+
+	switch (pin_domain) {
+	case RADEON_GEM_DOMAIN_VRAM:
+		flags = TTM_PL_FLAG_VRAM;
+		break;
+	case RADEON_GEM_DOMAIN_GTT:
+		flags = TTM_PL_FLAG_TT;
+		break;
+	default:
+		flags = TTM_PL_FLAG_SYSTEM;
+		break;
+	}
+	return radeon_object_pin(robj, flags, gpu_addr);
+}
+
+void radeon_gem_object_unpin(struct drm_gem_object *obj)
+{
+	struct radeon_object *robj = obj->driver_private;
+	radeon_object_unpin(robj);
+}
+
+int radeon_gem_set_domain(struct drm_gem_object *gobj,
+			  uint32_t rdomain, uint32_t wdomain)
+{
+	struct radeon_object *robj;
+	uint32_t domain;
+	int r;
+
+	/* FIXME: reeimplement */
+	robj = gobj->driver_private;
+	/* work out where to validate the buffer to */
+	domain = wdomain;
+	if (!domain) {
+		domain = rdomain;
+	}
+	if (!domain) {
+		/* Do nothings */
+		printk(KERN_WARNING "Set domain withou domain !\n");
+		return 0;
+	}
+	if (domain == RADEON_GEM_DOMAIN_CPU) {
+		/* Asking for cpu access wait for object idle */
+		r = radeon_object_wait(robj);
+		if (r) {
+			printk(KERN_ERR "Failed to wait for object !\n");
+			return r;
+		}
+	}
+	return 0;
+}
+
+int radeon_gem_init(struct radeon_device *rdev)
+{
+	INIT_LIST_HEAD(&rdev->gem.objects);
+	return 0;
+}
+
+void radeon_gem_fini(struct radeon_device *rdev)
+{
+	radeon_object_force_delete(rdev);
+}
+
+
+/*
+ * GEM ioctls.
+ */
+int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_radeon_gem_info *args = data;
+
+	args->vram_size = rdev->mc.vram_size;
+	/* FIXME: report somethings that makes sense */
+	args->vram_visible = rdev->mc.vram_size - (4 * 1024 * 1024);
+	args->gart_size = rdev->mc.gtt_size;
+	return 0;
+}
+
+int radeon_gem_pread_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *filp)
+{
+	/* TODO: implement */
+	DRM_ERROR("unimplemented %s\n", __func__);
+	return -ENOSYS;
+}
+
+int radeon_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp)
+{
+	/* TODO: implement */
+	DRM_ERROR("unimplemented %s\n", __func__);
+	return -ENOSYS;
+}
+
+int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_radeon_gem_create *args = data;
+	struct drm_gem_object *gobj;
+	uint32_t handle;
+	int r;
+
+	/* create a gem object to contain this object in */
+	args->size = roundup(args->size, PAGE_SIZE);
+	r = radeon_gem_object_create(rdev, args->size, args->alignment,
+				     args->initial_domain, false,
+				     false, true, &gobj);
+	if (r) {
+		return r;
+	}
+	r = drm_gem_handle_create(filp, gobj, &handle);
+	if (r) {
+		mutex_lock(&dev->struct_mutex);
+		drm_gem_object_unreference(gobj);
+		mutex_unlock(&dev->struct_mutex);
+		return r;
+	}
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	args->handle = handle;
+	return 0;
+}
+
+int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	/* transition the BO to a domain -
+	 * just validate the BO into a certain domain */
+	struct drm_radeon_gem_set_domain *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r;
+
+	/* for now if someone requests domain CPU -
+	 * just make sure the buffer is finished with */
+
+	/* just do a BO wait for now */
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -EINVAL;
+	}
+	robj = gobj->driver_private;
+
+	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
+
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
+
+int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	struct drm_radeon_gem_mmap *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r;
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -EINVAL;
+	}
+	robj = gobj->driver_private;
+	r = radeon_object_mmap(robj, &args->addr_ptr);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
+
+int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp)
+{
+	struct drm_radeon_gem_wait_idle *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r;
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -EINVAL;
+	}
+	robj = gobj->driver_private;
+	r = radeon_object_wait(robj);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
new file mode 100644
index 00000000000..71465ed2688
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+/**
+ * radeon_ddc_probe
+ *
+ */
+bool radeon_ddc_probe(struct radeon_connector *radeon_connector)
+{
+	u8 out_buf[] = { 0x0, 0x0};
+	u8 buf[2];
+	int ret;
+	struct i2c_msg msgs[] = {
+		{
+			.addr = 0x50,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = 0x50,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = buf,
+		}
+	};
+
+	ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2);
+	if (ret == 2)
+		return true;
+
+	return false;
+}
+
+
+void radeon_i2c_do_lock(struct radeon_connector *radeon_connector, int lock_state)
+{
+	struct radeon_device *rdev = radeon_connector->base.dev->dev_private;
+	uint32_t temp;
+	struct radeon_i2c_bus_rec *rec = &radeon_connector->ddc_bus->rec;
+
+	/* RV410 appears to have a bug where the hw i2c in reset
+	 * holds the i2c port in a bad state - switch hw i2c away before
+	 * doing DDC - do this for all r200s/r300s/r400s for safety sake
+	 */
+	if ((rdev->family >= CHIP_R200) && !ASIC_IS_AVIVO(rdev)) {
+		if (rec->a_clk_reg == RADEON_GPIO_MONID) {
+			WREG32(RADEON_DVI_I2C_CNTL_0, (RADEON_I2C_SOFT_RST |
+						R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1)));
+		} else {
+			WREG32(RADEON_DVI_I2C_CNTL_0, (RADEON_I2C_SOFT_RST |
+						R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3)));
+		}
+	}
+	if (lock_state) {
+		temp = RREG32(rec->a_clk_reg);
+		temp &= ~(rec->a_clk_mask);
+		WREG32(rec->a_clk_reg, temp);
+
+		temp = RREG32(rec->a_data_reg);
+		temp &= ~(rec->a_data_mask);
+		WREG32(rec->a_data_reg, temp);
+	}
+
+	temp = RREG32(rec->mask_clk_reg);
+	if (lock_state)
+		temp |= rec->mask_clk_mask;
+	else
+		temp &= ~rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, temp);
+	temp = RREG32(rec->mask_clk_reg);
+
+	temp = RREG32(rec->mask_data_reg);
+	if (lock_state)
+		temp |= rec->mask_data_mask;
+	else
+		temp &= ~rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, temp);
+	temp = RREG32(rec->mask_data_reg);
+}
+
+static int get_clock(void *i2c_priv)
+{
+	struct radeon_i2c_chan *i2c = i2c_priv;
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	val = RREG32(rec->get_clk_reg);
+	val &= rec->get_clk_mask;
+
+	return (val != 0);
+}
+
+
+static int get_data(void *i2c_priv)
+{
+	struct radeon_i2c_chan *i2c = i2c_priv;
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	val = RREG32(rec->get_data_reg);
+	val &= rec->get_data_mask;
+	return (val != 0);
+}
+
+static void set_clock(void *i2c_priv, int clock)
+{
+	struct radeon_i2c_chan *i2c = i2c_priv;
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	val = RREG32(rec->put_clk_reg) & (uint32_t)~(rec->put_clk_mask);
+	val |= clock ? 0 : rec->put_clk_mask;
+	WREG32(rec->put_clk_reg, val);
+}
+
+static void set_data(void *i2c_priv, int data)
+{
+	struct radeon_i2c_chan *i2c = i2c_priv;
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	val = RREG32(rec->put_data_reg) & (uint32_t)~(rec->put_data_mask);
+	val |= data ? 0 : rec->put_data_mask;
+	WREG32(rec->put_data_reg, val);
+}
+
+struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
+		struct radeon_i2c_bus_rec *rec,
+		const char *name)
+{
+	struct radeon_i2c_chan *i2c;
+	int ret;
+
+	i2c = drm_calloc(1, sizeof(struct radeon_i2c_chan), DRM_MEM_DRIVER);
+	if (i2c == NULL)
+		return NULL;
+
+	i2c->adapter.owner = THIS_MODULE;
+	i2c->adapter.algo_data = &i2c->algo;
+	i2c->dev = dev;
+	i2c->algo.setsda = set_data;
+	i2c->algo.setscl = set_clock;
+	i2c->algo.getsda = get_data;
+	i2c->algo.getscl = get_clock;
+	i2c->algo.udelay = 20;
+	/* vesa says 2.2 ms is enough, 1 jiffy doesn't seem to always
+	 * make this, 2 jiffies is a lot more reliable */
+	i2c->algo.timeout = 2;
+	i2c->algo.data = i2c;
+	i2c->rec = *rec;
+	i2c_set_adapdata(&i2c->adapter, i2c);
+
+	ret = i2c_bit_add_bus(&i2c->adapter);
+	if (ret) {
+		DRM_INFO("Failed to register i2c %s\n", name);
+		goto out_free;
+	}
+
+	return i2c;
+out_free:
+	drm_free(i2c, sizeof(struct radeon_i2c_chan), DRM_MEM_DRIVER);
+	return NULL;
+
+}
+
+void radeon_i2c_destroy(struct radeon_i2c_chan *i2c)
+{
+	if (!i2c)
+		return;
+
+	i2c_del_adapter(&i2c->adapter);
+	drm_free(i2c, sizeof(struct radeon_i2c_chan), DRM_MEM_DRIVER);
+}
+
+struct drm_encoder *radeon_best_encoder(struct drm_connector *connector)
+{
+	return NULL;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
new file mode 100644
index 00000000000..491d569deb0
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon_reg.h"
+#include "radeon_microcode.h"
+#include "radeon.h"
+#include "atom.h"
+
+static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+{
+	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
+	uint32_t irq_mask = RADEON_SW_INT_TEST;
+
+	if (irqs) {
+		WREG32(RADEON_GEN_INT_STATUS, irqs);
+	}
+	return irqs & irq_mask;
+}
+
+int r100_irq_set(struct radeon_device *rdev)
+{
+	uint32_t tmp = 0;
+
+	if (rdev->irq.sw_int) {
+		tmp |= RADEON_SW_INT_ENABLE;
+	}
+	/* Todo go through CRTC and enable vblank int or not */
+	WREG32(RADEON_GEN_INT_CNTL, tmp);
+	return 0;
+}
+
+int r100_irq_process(struct radeon_device *rdev)
+{
+	uint32_t status;
+
+	status = r100_irq_ack(rdev);
+	if (!status) {
+		return IRQ_NONE;
+	}
+	while (status) {
+		/* SW interrupt */
+		if (status & RADEON_SW_INT_TEST) {
+			radeon_fence_process(rdev);
+		}
+		status = r100_irq_ack(rdev);
+	}
+	return IRQ_HANDLED;
+}
+
+int rs600_irq_set(struct radeon_device *rdev)
+{
+	uint32_t tmp = 0;
+
+	if (rdev->irq.sw_int) {
+		tmp |= RADEON_SW_INT_ENABLE;
+	}
+	WREG32(RADEON_GEN_INT_CNTL, tmp);
+	/* Todo go through CRTC and enable vblank int or not */
+	WREG32(R500_DxMODE_INT_MASK, 0);
+	return 0;
+}
+
+irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS)
+{
+	struct drm_device *dev = (struct drm_device *) arg;
+	struct radeon_device *rdev = dev->dev_private;
+
+	return radeon_irq_process(rdev);
+}
+
+void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	unsigned i;
+
+	/* Disable *all* interrupts */
+	rdev->irq.sw_int = false;
+	for (i = 0; i < 2; i++) {
+		rdev->irq.crtc_vblank_int[i] = false;
+	}
+	radeon_irq_set(rdev);
+	/* Clear bits */
+	radeon_irq_process(rdev);
+}
+
+int radeon_driver_irq_postinstall_kms(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	dev->max_vblank_count = 0x001fffff;
+	rdev->irq.sw_int = true;
+	radeon_irq_set(rdev);
+	return 0;
+}
+
+void radeon_driver_irq_uninstall_kms(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	unsigned i;
+
+	if (rdev == NULL) {
+		return;
+	}
+	/* Disable *all* interrupts */
+	rdev->irq.sw_int = false;
+	for (i = 0; i < 2; i++) {
+		rdev->irq.crtc_vblank_int[i] = false;
+	}
+	radeon_irq_set(rdev);
+}
+
+int radeon_irq_kms_init(struct radeon_device *rdev)
+{
+	int r = 0;
+
+	r = drm_vblank_init(rdev->ddev, 2);
+	if (r) {
+		return r;
+	}
+	drm_irq_install(rdev->ddev);
+	rdev->irq.installed = true;
+	DRM_INFO("radeon: irq initialized.\n");
+	return 0;
+}
+
+void radeon_irq_kms_fini(struct radeon_device *rdev)
+{
+	if (rdev->irq.installed) {
+		rdev->irq.installed = false;
+		drm_irq_uninstall(rdev->ddev);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
new file mode 100644
index 00000000000..b0ce44b9f5a
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "drm_sarea.h"
+#include "radeon.h"
+#include "radeon_drm.h"
+
+
+/*
+ * Driver load/unload
+ */
+int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
+{
+	struct radeon_device *rdev;
+	int r;
+
+	rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL);
+	if (rdev == NULL) {
+		return -ENOMEM;
+	}
+	dev->dev_private = (void *)rdev;
+
+	/* update BUS flag */
+	if (drm_device_is_agp(dev)) {
+		flags |= RADEON_IS_AGP;
+	} else if (drm_device_is_pcie(dev)) {
+		flags |= RADEON_IS_PCIE;
+	} else {
+		flags |= RADEON_IS_PCI;
+	}
+
+	r = radeon_device_init(rdev, dev, dev->pdev, flags);
+	if (r) {
+		DRM_ERROR("Failed to initialize radeon, disabling IOCTL\n");
+		radeon_device_fini(rdev);
+		return r;
+	}
+	return 0;
+}
+
+int radeon_driver_unload_kms(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	radeon_device_fini(rdev);
+	kfree(rdev);
+	dev->dev_private = NULL;
+	return 0;
+}
+
+
+/*
+ * Userspace get informations ioctl
+ */
+int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_radeon_info *info;
+	uint32_t *value_ptr;
+	uint32_t value;
+
+	info = data;
+	value_ptr = (uint32_t *)((unsigned long)info->value);
+	switch (info->request) {
+	case RADEON_INFO_DEVICE_ID:
+		value = dev->pci_device;
+		break;
+	case RADEON_INFO_NUM_GB_PIPES:
+		value = rdev->num_gb_pipes;
+		break;
+	default:
+		DRM_DEBUG("Invalid request %d\n", info->request);
+		return -EINVAL;
+	}
+	if (DRM_COPY_TO_USER(value_ptr, &value, sizeof(uint32_t))) {
+		DRM_ERROR("copy_to_user\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+
+/*
+ * Outdated mess for old drm with Xorg being in charge (void function now).
+ */
+int radeon_driver_firstopen_kms(struct drm_device *dev)
+{
+	return 0;
+}
+
+
+void radeon_driver_lastclose_kms(struct drm_device *dev)
+{
+}
+
+int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
+{
+	return 0;
+}
+
+void radeon_driver_postclose_kms(struct drm_device *dev,
+				 struct drm_file *file_priv)
+{
+}
+
+void radeon_driver_preclose_kms(struct drm_device *dev,
+				struct drm_file *file_priv)
+{
+}
+
+
+/*
+ * VBlank related functions.
+ */
+u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+int radeon_enable_vblank_kms(struct drm_device *dev, int crtc)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+void radeon_disable_vblank_kms(struct drm_device *dev, int crtc)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * For multiple master (like multiple X).
+ */
+struct drm_radeon_master_private {
+	drm_local_map_t *sarea;
+	drm_radeon_sarea_t *sarea_priv;
+};
+
+int radeon_master_create_kms(struct drm_device *dev, struct drm_master *master)
+{
+	struct drm_radeon_master_private *master_priv;
+	unsigned long sareapage;
+	int ret;
+
+	master_priv = drm_calloc(1, sizeof(*master_priv), DRM_MEM_DRIVER);
+	if (master_priv == NULL) {
+		return -ENOMEM;
+	}
+	/* prebuild the SAREA */
+	sareapage = max_t(unsigned long, SAREA_MAX, PAGE_SIZE);
+	ret = drm_addmap(dev, 0, sareapage, _DRM_SHM,
+			 _DRM_CONTAINS_LOCK|_DRM_DRIVER,
+			 &master_priv->sarea);
+	if (ret) {
+		DRM_ERROR("SAREA setup failed\n");
+		return ret;
+	}
+	master_priv->sarea_priv = master_priv->sarea->handle + sizeof(struct drm_sarea);
+	master_priv->sarea_priv->pfCurrentPage = 0;
+	master->driver_priv = master_priv;
+	return 0;
+}
+
+void radeon_master_destroy_kms(struct drm_device *dev,
+			       struct drm_master *master)
+{
+	struct drm_radeon_master_private *master_priv = master->driver_priv;
+
+	if (master_priv == NULL) {
+		return;
+	}
+	if (master_priv->sarea) {
+		drm_rmmap_locked(dev, master_priv->sarea);
+	}
+	drm_free(master_priv, sizeof(*master_priv), DRM_MEM_DRIVER);
+	master->driver_priv = NULL;
+}
+
+
+/*
+ * IOCTL.
+ */
+int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	/* Not valid in KMS. */
+	return -EINVAL;
+}
+
+#define KMS_INVALID_IOCTL(name)						\
+int name(struct drm_device *dev, void *data, struct drm_file *file_priv)\
+{									\
+	DRM_ERROR("invalid ioctl with kms %s\n", __func__);		\
+	return -EINVAL;							\
+}
+
+/*
+ * All these ioctls are invalid in kms world.
+ */
+KMS_INVALID_IOCTL(radeon_cp_init_kms)
+KMS_INVALID_IOCTL(radeon_cp_start_kms)
+KMS_INVALID_IOCTL(radeon_cp_stop_kms)
+KMS_INVALID_IOCTL(radeon_cp_reset_kms)
+KMS_INVALID_IOCTL(radeon_cp_idle_kms)
+KMS_INVALID_IOCTL(radeon_cp_resume_kms)
+KMS_INVALID_IOCTL(radeon_engine_reset_kms)
+KMS_INVALID_IOCTL(radeon_fullscreen_kms)
+KMS_INVALID_IOCTL(radeon_cp_swap_kms)
+KMS_INVALID_IOCTL(radeon_cp_clear_kms)
+KMS_INVALID_IOCTL(radeon_cp_vertex_kms)
+KMS_INVALID_IOCTL(radeon_cp_indices_kms)
+KMS_INVALID_IOCTL(radeon_cp_texture_kms)
+KMS_INVALID_IOCTL(radeon_cp_stipple_kms)
+KMS_INVALID_IOCTL(radeon_cp_indirect_kms)
+KMS_INVALID_IOCTL(radeon_cp_vertex2_kms)
+KMS_INVALID_IOCTL(radeon_cp_cmdbuf_kms)
+KMS_INVALID_IOCTL(radeon_cp_getparam_kms)
+KMS_INVALID_IOCTL(radeon_cp_flip_kms)
+KMS_INVALID_IOCTL(radeon_mem_alloc_kms)
+KMS_INVALID_IOCTL(radeon_mem_free_kms)
+KMS_INVALID_IOCTL(radeon_mem_init_heap_kms)
+KMS_INVALID_IOCTL(radeon_irq_emit_kms)
+KMS_INVALID_IOCTL(radeon_irq_wait_kms)
+KMS_INVALID_IOCTL(radeon_cp_setparam_kms)
+KMS_INVALID_IOCTL(radeon_surface_alloc_kms)
+KMS_INVALID_IOCTL(radeon_surface_free_kms)
+
+
+struct drm_ioctl_desc radeon_ioctls_kms[] = {
+	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
+	/* KMS */
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
+};
+int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
new file mode 100644
index 00000000000..8086ecf7f03
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -0,0 +1,1276 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/radeon_drm.h>
+#include "radeon_fixed.h"
+#include "radeon.h"
+
+void radeon_restore_common_regs(struct drm_device *dev)
+{
+	/* don't need this yet */
+}
+
+static void radeon_pll_wait_for_read_update_complete(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	int i = 0;
+
+	/* FIXME: Certain revisions of R300 can't recover here.  Not sure of
+	   the cause yet, but this workaround will mask the problem for now.
+	   Other chips usually will pass at the very first test, so the
+	   workaround shouldn't have any effect on them. */
+	for (i = 0;
+	     (i < 10000 &&
+	      RREG32_PLL(RADEON_PPLL_REF_DIV) & RADEON_PPLL_ATOMIC_UPDATE_R);
+	     i++);
+}
+
+static void radeon_pll_write_update(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	while (RREG32_PLL(RADEON_PPLL_REF_DIV) & RADEON_PPLL_ATOMIC_UPDATE_R);
+
+	WREG32_PLL_P(RADEON_PPLL_REF_DIV,
+			   RADEON_PPLL_ATOMIC_UPDATE_W,
+			   ~(RADEON_PPLL_ATOMIC_UPDATE_W));
+}
+
+static void radeon_pll2_wait_for_read_update_complete(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	int i = 0;
+
+
+	/* FIXME: Certain revisions of R300 can't recover here.  Not sure of
+	   the cause yet, but this workaround will mask the problem for now.
+	   Other chips usually will pass at the very first test, so the
+	   workaround shouldn't have any effect on them. */
+	for (i = 0;
+	     (i < 10000 &&
+	      RREG32_PLL(RADEON_P2PLL_REF_DIV) & RADEON_P2PLL_ATOMIC_UPDATE_R);
+	     i++);
+}
+
+static void radeon_pll2_write_update(struct drm_device *dev)
+{
+	struct radeon_device *rdev = dev->dev_private;
+
+	while (RREG32_PLL(RADEON_P2PLL_REF_DIV) & RADEON_P2PLL_ATOMIC_UPDATE_R);
+
+	WREG32_PLL_P(RADEON_P2PLL_REF_DIV,
+			   RADEON_P2PLL_ATOMIC_UPDATE_W,
+			   ~(RADEON_P2PLL_ATOMIC_UPDATE_W));
+}
+
+static uint8_t radeon_compute_pll_gain(uint16_t ref_freq, uint16_t ref_div,
+				       uint16_t fb_div)
+{
+	unsigned int vcoFreq;
+
+	if (!ref_div)
+		return 1;
+
+	vcoFreq = ((unsigned)ref_freq & fb_div) / ref_div;
+
+	/*
+	 * This is horribly crude: the VCO frequency range is divided into
+	 * 3 parts, each part having a fixed PLL gain value.
+	 */
+	if (vcoFreq >= 30000)
+		/*
+		 * [300..max] MHz : 7
+		 */
+		return 7;
+	else if (vcoFreq >= 18000)
+		/*
+		 * [180..300) MHz : 4
+		 */
+		return 4;
+	else
+		/*
+		 * [0..180) MHz : 1
+		 */
+		return 1;
+}
+
+void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t mask;
+
+	if (radeon_crtc->crtc_id)
+		mask = (RADEON_CRTC2_EN |
+			RADEON_CRTC2_DISP_DIS |
+			RADEON_CRTC2_VSYNC_DIS |
+			RADEON_CRTC2_HSYNC_DIS |
+			RADEON_CRTC2_DISP_REQ_EN_B);
+	else
+		mask = (RADEON_CRTC_DISPLAY_DIS |
+			RADEON_CRTC_VSYNC_DIS |
+			RADEON_CRTC_HSYNC_DIS);
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		if (radeon_crtc->crtc_id)
+			WREG32_P(RADEON_CRTC2_GEN_CNTL, RADEON_CRTC2_EN, ~mask);
+		else {
+			WREG32_P(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_EN, ~(RADEON_CRTC_EN |
+									 RADEON_CRTC_DISP_REQ_EN_B));
+			WREG32_P(RADEON_CRTC_EXT_CNTL, 0, ~mask);
+		}
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		if (radeon_crtc->crtc_id)
+			WREG32_P(RADEON_CRTC2_GEN_CNTL, mask, ~mask);
+		else {
+			WREG32_P(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_DISP_REQ_EN_B, ~(RADEON_CRTC_EN |
+										    RADEON_CRTC_DISP_REQ_EN_B));
+			WREG32_P(RADEON_CRTC_EXT_CNTL, mask, ~mask);
+		}
+		break;
+	}
+
+	if (mode != DRM_MODE_DPMS_OFF) {
+		radeon_crtc_load_lut(crtc);
+	}
+}
+
+/* properly set crtc bpp when using atombios */
+void radeon_legacy_atom_set_surface(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	int format;
+	uint32_t crtc_gen_cntl;
+	uint32_t disp_merge_cntl;
+	uint32_t crtc_pitch;
+
+	switch (crtc->fb->bits_per_pixel) {
+	case 15:      /*  555 */
+		format = 3;
+		break;
+	case 16:      /*  565 */
+		format = 4;
+		break;
+	case 24:      /*  RGB */
+		format = 5;
+		break;
+	case 32:      /* xRGB */
+		format = 6;
+		break;
+	default:
+		return;
+	}
+
+	crtc_pitch  = ((((crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8)) * crtc->fb->bits_per_pixel) +
+			((crtc->fb->bits_per_pixel * 8) - 1)) /
+		       (crtc->fb->bits_per_pixel * 8));
+	crtc_pitch |= crtc_pitch << 16;
+
+	WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
+
+	switch (radeon_crtc->crtc_id) {
+	case 0:
+		disp_merge_cntl = RREG32(RADEON_DISP_MERGE_CNTL);
+		disp_merge_cntl &= ~RADEON_DISP_RGB_OFFSET_EN;
+		WREG32(RADEON_DISP_MERGE_CNTL, disp_merge_cntl);
+
+		crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL) & 0xfffff0ff;
+		crtc_gen_cntl |= (format << 8);
+		crtc_gen_cntl |= RADEON_CRTC_EXT_DISP_EN;
+		WREG32(RADEON_CRTC_GEN_CNTL, crtc_gen_cntl);
+		break;
+	case 1:
+		disp_merge_cntl = RREG32(RADEON_DISP2_MERGE_CNTL);
+		disp_merge_cntl &= ~RADEON_DISP2_RGB_OFFSET_EN;
+		WREG32(RADEON_DISP2_MERGE_CNTL, disp_merge_cntl);
+
+		crtc_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL) & 0xfffff0ff;
+		crtc_gen_cntl |= (format << 8);
+		WREG32(RADEON_CRTC2_GEN_CNTL, crtc_gen_cntl);
+		WREG32(RADEON_FP_H2_SYNC_STRT_WID,   RREG32(RADEON_CRTC2_H_SYNC_STRT_WID));
+		WREG32(RADEON_FP_V2_SYNC_STRT_WID,   RREG32(RADEON_CRTC2_V_SYNC_STRT_WID));
+		break;
+	}
+}
+
+int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+			 struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_framebuffer *radeon_fb;
+	struct drm_gem_object *obj;
+	uint64_t base;
+	uint32_t crtc_offset, crtc_offset_cntl, crtc_tile_x0_y0 = 0;
+	uint32_t crtc_pitch, pitch_pixels;
+
+	DRM_DEBUG("\n");
+
+	radeon_fb = to_radeon_framebuffer(crtc->fb);
+
+	obj = radeon_fb->obj;
+	if (radeon_gem_object_pin(obj, RADEON_GEM_DOMAIN_VRAM, &base)) {
+		return -EINVAL;
+	}
+	crtc_offset = (u32)base;
+	crtc_offset_cntl = 0;
+
+	pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8);
+	crtc_pitch  = (((pitch_pixels * crtc->fb->bits_per_pixel) +
+			((crtc->fb->bits_per_pixel * 8) - 1)) /
+		       (crtc->fb->bits_per_pixel * 8));
+	crtc_pitch |= crtc_pitch << 16;
+
+	/* TODO tiling */
+	if (0) {
+		if (ASIC_IS_R300(rdev))
+			crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
+					     R300_CRTC_MICRO_TILE_BUFFER_DIS |
+					     R300_CRTC_MACRO_TILE_EN);
+		else
+			crtc_offset_cntl |= RADEON_CRTC_TILE_EN;
+	} else {
+		if (ASIC_IS_R300(rdev))
+			crtc_offset_cntl &= ~(R300_CRTC_X_Y_MODE_EN |
+					      R300_CRTC_MICRO_TILE_BUFFER_DIS |
+					      R300_CRTC_MACRO_TILE_EN);
+		else
+			crtc_offset_cntl &= ~RADEON_CRTC_TILE_EN;
+	}
+
+
+	/* TODO more tiling */
+	if (0) {
+		if (ASIC_IS_R300(rdev)) {
+			crtc_tile_x0_y0 = x | (y << 16);
+			base &= ~0x7ff;
+		} else {
+			int byteshift = crtc->fb->bits_per_pixel >> 4;
+			int tile_addr = (((y >> 3) * crtc->fb->width + x) >> (8 - byteshift)) << 11;
+			base += tile_addr + ((x << byteshift) % 256) + ((y % 8) << 8);
+			crtc_offset_cntl |= (y % 16);
+		}
+	} else {
+		int offset = y * pitch_pixels + x;
+		switch (crtc->fb->bits_per_pixel) {
+		case 15:
+		case 16:
+			offset *= 2;
+			break;
+		case 24:
+			offset *= 3;
+			break;
+		case 32:
+			offset *= 4;
+			break;
+		default:
+			return false;
+		}
+		base += offset;
+	}
+
+	base &= ~7;
+
+	/* update sarea TODO */
+
+	crtc_offset = (u32)base;
+
+	WREG32(RADEON_DISPLAY_BASE_ADDR + radeon_crtc->crtc_offset, rdev->mc.vram_location);
+
+	if (ASIC_IS_R300(rdev)) {
+		if (radeon_crtc->crtc_id)
+			WREG32(R300_CRTC2_TILE_X0_Y0, crtc_tile_x0_y0);
+		else
+			WREG32(R300_CRTC_TILE_X0_Y0, crtc_tile_x0_y0);
+	}
+	WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, crtc_offset_cntl);
+	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, crtc_offset);
+	WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
+
+	if (old_fb && old_fb != crtc->fb) {
+		radeon_fb = to_radeon_framebuffer(old_fb);
+		radeon_gem_object_unpin(radeon_fb->obj);
+	}
+	return 0;
+}
+
+static bool radeon_set_crtc_timing(struct drm_crtc *crtc, struct drm_display_mode *mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	int format;
+	int hsync_start;
+	int hsync_wid;
+	int vsync_wid;
+	uint32_t crtc_h_total_disp;
+	uint32_t crtc_h_sync_strt_wid;
+	uint32_t crtc_v_total_disp;
+	uint32_t crtc_v_sync_strt_wid;
+
+	DRM_DEBUG("\n");
+
+	switch (crtc->fb->bits_per_pixel) {
+	case 15:      /*  555 */
+		format = 3;
+		break;
+	case 16:      /*  565 */
+		format = 4;
+		break;
+	case 24:      /*  RGB */
+		format = 5;
+		break;
+	case 32:      /* xRGB */
+		format = 6;
+		break;
+	default:
+		return false;
+	}
+
+	crtc_h_total_disp = ((((mode->crtc_htotal / 8) - 1) & 0x3ff)
+			     | ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
+
+	hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
+	if (!hsync_wid)
+		hsync_wid = 1;
+	hsync_start = mode->crtc_hsync_start - 8;
+
+	crtc_h_sync_strt_wid = ((hsync_start & 0x1fff)
+				| ((hsync_wid & 0x3f) << 16)
+				| ((mode->flags & DRM_MODE_FLAG_NHSYNC)
+				   ? RADEON_CRTC_H_SYNC_POL
+				   : 0));
+
+	/* This works for double scan mode. */
+	crtc_v_total_disp = (((mode->crtc_vtotal - 1) & 0xffff)
+			     | ((mode->crtc_vdisplay - 1) << 16));
+
+	vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
+	if (!vsync_wid)
+		vsync_wid = 1;
+
+	crtc_v_sync_strt_wid = (((mode->crtc_vsync_start - 1) & 0xfff)
+				| ((vsync_wid & 0x1f) << 16)
+				| ((mode->flags & DRM_MODE_FLAG_NVSYNC)
+				   ? RADEON_CRTC_V_SYNC_POL
+				   : 0));
+
+	/* TODO -> Dell Server */
+	if (0) {
+		uint32_t disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
+		uint32_t tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
+		uint32_t dac2_cntl = RREG32(RADEON_DAC_CNTL2);
+		uint32_t crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+
+		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
+		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
+
+		/* For CRT on DAC2, don't turn it on if BIOS didn't
+		   enable it, even it's detected.
+		*/
+		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
+		tv_dac_cntl &= ~((1<<2) | (3<<8) | (7<<24) | (0xff<<16));
+		tv_dac_cntl |= (0x03 | (2<<8) | (0x58<<16));
+
+		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
+		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
+		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
+		WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+	}
+
+	if (radeon_crtc->crtc_id) {
+		uint32_t crtc2_gen_cntl;
+		uint32_t disp2_merge_cntl;
+
+		/* check to see if TV DAC is enabled for another crtc and keep it enabled */
+		if (RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_CRT2_ON)
+			crtc2_gen_cntl = RADEON_CRTC2_CRT2_ON;
+		else
+			crtc2_gen_cntl = 0;
+
+		crtc2_gen_cntl |= ((format << 8)
+				   | RADEON_CRTC2_VSYNC_DIS
+				   | RADEON_CRTC2_HSYNC_DIS
+				   | RADEON_CRTC2_DISP_DIS
+				   | RADEON_CRTC2_DISP_REQ_EN_B
+				   | ((mode->flags & DRM_MODE_FLAG_DBLSCAN)
+				      ? RADEON_CRTC2_DBL_SCAN_EN
+				      : 0)
+				   | ((mode->flags & DRM_MODE_FLAG_CSYNC)
+				      ? RADEON_CRTC2_CSYNC_EN
+				      : 0)
+				   | ((mode->flags & DRM_MODE_FLAG_INTERLACE)
+				      ? RADEON_CRTC2_INTERLACE_EN
+				      : 0));
+
+		disp2_merge_cntl = RREG32(RADEON_DISP2_MERGE_CNTL);
+		disp2_merge_cntl &= ~RADEON_DISP2_RGB_OFFSET_EN;
+
+		WREG32(RADEON_DISP2_MERGE_CNTL, disp2_merge_cntl);
+		WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+	} else {
+		uint32_t crtc_gen_cntl;
+		uint32_t crtc_ext_cntl;
+		uint32_t disp_merge_cntl;
+
+		crtc_gen_cntl = (RADEON_CRTC_EXT_DISP_EN
+				 | (format << 8)
+				 | RADEON_CRTC_DISP_REQ_EN_B
+				 | ((mode->flags & DRM_MODE_FLAG_DBLSCAN)
+				    ? RADEON_CRTC_DBL_SCAN_EN
+				    : 0)
+				 | ((mode->flags & DRM_MODE_FLAG_CSYNC)
+				    ? RADEON_CRTC_CSYNC_EN
+				    : 0)
+				 | ((mode->flags & DRM_MODE_FLAG_INTERLACE)
+				    ? RADEON_CRTC_INTERLACE_EN
+				    : 0));
+
+		crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
+		crtc_ext_cntl |= (RADEON_XCRT_CNT_EN |
+				  RADEON_CRTC_VSYNC_DIS |
+				  RADEON_CRTC_HSYNC_DIS |
+				  RADEON_CRTC_DISPLAY_DIS);
+
+		disp_merge_cntl = RREG32(RADEON_DISP_MERGE_CNTL);
+		disp_merge_cntl &= ~RADEON_DISP_RGB_OFFSET_EN;
+
+		WREG32(RADEON_DISP_MERGE_CNTL, disp_merge_cntl);
+		WREG32(RADEON_CRTC_GEN_CNTL, crtc_gen_cntl);
+		WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
+	}
+
+	WREG32(RADEON_CRTC_H_TOTAL_DISP + radeon_crtc->crtc_offset, crtc_h_total_disp);
+	WREG32(RADEON_CRTC_H_SYNC_STRT_WID + radeon_crtc->crtc_offset, crtc_h_sync_strt_wid);
+	WREG32(RADEON_CRTC_V_TOTAL_DISP + radeon_crtc->crtc_offset, crtc_v_total_disp);
+	WREG32(RADEON_CRTC_V_SYNC_STRT_WID + radeon_crtc->crtc_offset, crtc_v_sync_strt_wid);
+
+	return true;
+}
+
+static void radeon_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_encoder *encoder;
+	uint32_t feedback_div = 0;
+	uint32_t frac_fb_div = 0;
+	uint32_t reference_div = 0;
+	uint32_t post_divider = 0;
+	uint32_t freq = 0;
+	uint8_t pll_gain;
+	int pll_flags = RADEON_PLL_LEGACY;
+	bool use_bios_divs = false;
+	/* PLL registers */
+	uint32_t pll_ref_div = 0;
+	uint32_t pll_fb_post_div = 0;
+	uint32_t htotal_cntl = 0;
+
+	struct radeon_pll *pll;
+
+	struct {
+		int divider;
+		int bitvalue;
+	} *post_div, post_divs[]   = {
+		/* From RAGE 128 VR/RAGE 128 GL Register
+		 * Reference Manual (Technical Reference
+		 * Manual P/N RRG-G04100-C Rev. 0.04), page
+		 * 3-17 (PLL_DIV_[3:0]).
+		 */
+		{  1, 0 },              /* VCLK_SRC                 */
+		{  2, 1 },              /* VCLK_SRC/2               */
+		{  4, 2 },              /* VCLK_SRC/4               */
+		{  8, 3 },              /* VCLK_SRC/8               */
+		{  3, 4 },              /* VCLK_SRC/3               */
+		{ 16, 5 },              /* VCLK_SRC/16              */
+		{  6, 6 },              /* VCLK_SRC/6               */
+		{ 12, 7 },              /* VCLK_SRC/12              */
+		{  0, 0 }
+	};
+
+	if (radeon_crtc->crtc_id)
+		pll = &rdev->clock.p2pll;
+	else
+		pll = &rdev->clock.p1pll;
+
+	if (mode->clock > 200000) /* range limits??? */
+		pll_flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
+	else
+		pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc == crtc) {
+			if (encoder->encoder_type != DRM_MODE_ENCODER_DAC)
+				pll_flags |= RADEON_PLL_NO_ODD_POST_DIV;
+			if (encoder->encoder_type == DRM_MODE_ENCODER_LVDS) {
+				struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+				struct radeon_encoder_lvds *lvds = (struct radeon_encoder_lvds *)radeon_encoder->enc_priv;
+				if (lvds) {
+					if (lvds->use_bios_dividers) {
+						pll_ref_div = lvds->panel_ref_divider;
+						pll_fb_post_div   = (lvds->panel_fb_divider |
+								     (lvds->panel_post_divider << 16));
+						htotal_cntl  = 0;
+						use_bios_divs = true;
+					}
+				}
+				pll_flags |= RADEON_PLL_USE_REF_DIV;
+			}
+		}
+	}
+
+	DRM_DEBUG("\n");
+
+	if (!use_bios_divs) {
+		radeon_compute_pll(pll, mode->clock,
+				   &freq, &feedback_div, &frac_fb_div,
+				   &reference_div, &post_divider,
+				   pll_flags);
+
+		for (post_div = &post_divs[0]; post_div->divider; ++post_div) {
+			if (post_div->divider == post_divider)
+				break;
+		}
+
+		if (!post_div->divider)
+			post_div = &post_divs[0];
+
+		DRM_DEBUG("dc=%u, fd=%d, rd=%d, pd=%d\n",
+			  (unsigned)freq,
+			  feedback_div,
+			  reference_div,
+			  post_divider);
+
+		pll_ref_div   = reference_div;
+#if defined(__powerpc__) && (0) /* TODO */
+		/* apparently programming this otherwise causes a hang??? */
+		if (info->MacModel == RADEON_MAC_IBOOK)
+			pll_fb_post_div = 0x000600ad;
+		else
+#endif
+			pll_fb_post_div     = (feedback_div | (post_div->bitvalue << 16));
+
+		htotal_cntl    = mode->htotal & 0x7;
+
+	}
+
+	pll_gain = radeon_compute_pll_gain(pll->reference_freq,
+					   pll_ref_div & 0x3ff,
+					   pll_fb_post_div & 0x7ff);
+
+	if (radeon_crtc->crtc_id) {
+		uint32_t pixclks_cntl = ((RREG32_PLL(RADEON_PIXCLKS_CNTL) &
+					  ~(RADEON_PIX2CLK_SRC_SEL_MASK)) |
+					 RADEON_PIX2CLK_SRC_SEL_P2PLLCLK);
+
+		WREG32_PLL_P(RADEON_PIXCLKS_CNTL,
+			     RADEON_PIX2CLK_SRC_SEL_CPUCLK,
+			     ~(RADEON_PIX2CLK_SRC_SEL_MASK));
+
+		WREG32_PLL_P(RADEON_P2PLL_CNTL,
+			     RADEON_P2PLL_RESET
+			     | RADEON_P2PLL_ATOMIC_UPDATE_EN
+			     | ((uint32_t)pll_gain << RADEON_P2PLL_PVG_SHIFT),
+			     ~(RADEON_P2PLL_RESET
+			       | RADEON_P2PLL_ATOMIC_UPDATE_EN
+			       | RADEON_P2PLL_PVG_MASK));
+
+		WREG32_PLL_P(RADEON_P2PLL_REF_DIV,
+			     pll_ref_div,
+			     ~RADEON_P2PLL_REF_DIV_MASK);
+
+		WREG32_PLL_P(RADEON_P2PLL_DIV_0,
+			     pll_fb_post_div,
+			     ~RADEON_P2PLL_FB0_DIV_MASK);
+
+		WREG32_PLL_P(RADEON_P2PLL_DIV_0,
+			     pll_fb_post_div,
+			     ~RADEON_P2PLL_POST0_DIV_MASK);
+
+		radeon_pll2_write_update(dev);
+		radeon_pll2_wait_for_read_update_complete(dev);
+
+		WREG32_PLL(RADEON_HTOTAL2_CNTL, htotal_cntl);
+
+		WREG32_PLL_P(RADEON_P2PLL_CNTL,
+			     0,
+			     ~(RADEON_P2PLL_RESET
+			       | RADEON_P2PLL_SLEEP
+			       | RADEON_P2PLL_ATOMIC_UPDATE_EN));
+
+		DRM_DEBUG("Wrote2: 0x%08x 0x%08x 0x%08x (0x%08x)\n",
+			  (unsigned)pll_ref_div,
+			  (unsigned)pll_fb_post_div,
+			  (unsigned)htotal_cntl,
+			  RREG32_PLL(RADEON_P2PLL_CNTL));
+		DRM_DEBUG("Wrote2: rd=%u, fd=%u, pd=%u\n",
+			  (unsigned)pll_ref_div & RADEON_P2PLL_REF_DIV_MASK,
+			  (unsigned)pll_fb_post_div & RADEON_P2PLL_FB0_DIV_MASK,
+			  (unsigned)((pll_fb_post_div &
+				      RADEON_P2PLL_POST0_DIV_MASK) >> 16));
+
+		mdelay(50); /* Let the clock to lock */
+
+		WREG32_PLL_P(RADEON_PIXCLKS_CNTL,
+			     RADEON_PIX2CLK_SRC_SEL_P2PLLCLK,
+			     ~(RADEON_PIX2CLK_SRC_SEL_MASK));
+
+		WREG32_PLL(RADEON_PIXCLKS_CNTL, pixclks_cntl);
+	} else {
+		if (rdev->flags & RADEON_IS_MOBILITY) {
+			/* A temporal workaround for the occational blanking on certain laptop panels.
+			   This appears to related to the PLL divider registers (fail to lock?).
+			   It occurs even when all dividers are the same with their old settings.
+			   In this case we really don't need to fiddle with PLL registers.
+			   By doing this we can avoid the blanking problem with some panels.
+			*/
+			if ((pll_ref_div == (RREG32_PLL(RADEON_PPLL_REF_DIV) & RADEON_PPLL_REF_DIV_MASK)) &&
+			    (pll_fb_post_div == (RREG32_PLL(RADEON_PPLL_DIV_3) &
+						 (RADEON_PPLL_POST3_DIV_MASK | RADEON_PPLL_FB3_DIV_MASK)))) {
+				WREG32_P(RADEON_CLOCK_CNTL_INDEX,
+					 RADEON_PLL_DIV_SEL,
+					 ~(RADEON_PLL_DIV_SEL));
+				r100_pll_errata_after_index(rdev);
+				return;
+			}
+		}
+
+		WREG32_PLL_P(RADEON_VCLK_ECP_CNTL,
+			     RADEON_VCLK_SRC_SEL_CPUCLK,
+			     ~(RADEON_VCLK_SRC_SEL_MASK));
+		WREG32_PLL_P(RADEON_PPLL_CNTL,
+			     RADEON_PPLL_RESET
+			     | RADEON_PPLL_ATOMIC_UPDATE_EN
+			     | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN
+			     | ((uint32_t)pll_gain << RADEON_PPLL_PVG_SHIFT),
+			     ~(RADEON_PPLL_RESET
+			       | RADEON_PPLL_ATOMIC_UPDATE_EN
+			       | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN
+			       | RADEON_PPLL_PVG_MASK));
+
+		WREG32_P(RADEON_CLOCK_CNTL_INDEX,
+			 RADEON_PLL_DIV_SEL,
+			 ~(RADEON_PLL_DIV_SEL));
+		r100_pll_errata_after_index(rdev);
+
+		if (ASIC_IS_R300(rdev) ||
+		    (rdev->family == CHIP_RS300) ||
+		    (rdev->family == CHIP_RS400) ||
+		    (rdev->family == CHIP_RS480)) {
+			if (pll_ref_div & R300_PPLL_REF_DIV_ACC_MASK) {
+				/* When restoring console mode, use saved PPLL_REF_DIV
+				 * setting.
+				 */
+				WREG32_PLL_P(RADEON_PPLL_REF_DIV,
+					     pll_ref_div,
+					     0);
+			} else {
+				/* R300 uses ref_div_acc field as real ref divider */
+				WREG32_PLL_P(RADEON_PPLL_REF_DIV,
+					     (pll_ref_div << R300_PPLL_REF_DIV_ACC_SHIFT),
+					     ~R300_PPLL_REF_DIV_ACC_MASK);
+			}
+		} else
+			WREG32_PLL_P(RADEON_PPLL_REF_DIV,
+				     pll_ref_div,
+				     ~RADEON_PPLL_REF_DIV_MASK);
+
+		WREG32_PLL_P(RADEON_PPLL_DIV_3,
+			     pll_fb_post_div,
+			     ~RADEON_PPLL_FB3_DIV_MASK);
+
+		WREG32_PLL_P(RADEON_PPLL_DIV_3,
+			     pll_fb_post_div,
+			     ~RADEON_PPLL_POST3_DIV_MASK);
+
+		radeon_pll_write_update(dev);
+		radeon_pll_wait_for_read_update_complete(dev);
+
+		WREG32_PLL(RADEON_HTOTAL_CNTL, htotal_cntl);
+
+		WREG32_PLL_P(RADEON_PPLL_CNTL,
+			     0,
+			     ~(RADEON_PPLL_RESET
+			       | RADEON_PPLL_SLEEP
+			       | RADEON_PPLL_ATOMIC_UPDATE_EN
+			       | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN));
+
+		DRM_DEBUG("Wrote: 0x%08x 0x%08x 0x%08x (0x%08x)\n",
+			  pll_ref_div,
+			  pll_fb_post_div,
+			  (unsigned)htotal_cntl,
+			  RREG32_PLL(RADEON_PPLL_CNTL));
+		DRM_DEBUG("Wrote: rd=%d, fd=%d, pd=%d\n",
+			  pll_ref_div & RADEON_PPLL_REF_DIV_MASK,
+			  pll_fb_post_div & RADEON_PPLL_FB3_DIV_MASK,
+			  (pll_fb_post_div & RADEON_PPLL_POST3_DIV_MASK) >> 16);
+
+		mdelay(50); /* Let the clock to lock */
+
+		WREG32_PLL_P(RADEON_VCLK_ECP_CNTL,
+			     RADEON_VCLK_SRC_SEL_PPLLCLK,
+			     ~(RADEON_VCLK_SRC_SEL_MASK));
+
+	}
+}
+
+static bool radeon_crtc_mode_fixup(struct drm_crtc *crtc,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int radeon_crtc_mode_set(struct drm_crtc *crtc,
+				 struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode,
+				 int x, int y, struct drm_framebuffer *old_fb)
+{
+
+	DRM_DEBUG("\n");
+
+	/* TODO TV */
+
+	radeon_crtc_set_base(crtc, x, y, old_fb);
+	radeon_set_crtc_timing(crtc, adjusted_mode);
+	radeon_set_pll(crtc, adjusted_mode);
+	radeon_init_disp_bandwidth(crtc->dev);
+
+	return 0;
+}
+
+static void radeon_crtc_prepare(struct drm_crtc *crtc)
+{
+	radeon_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_crtc_commit(struct drm_crtc *crtc)
+{
+	radeon_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+}
+
+static const struct drm_crtc_helper_funcs legacy_helper_funcs = {
+	.dpms = radeon_crtc_dpms,
+	.mode_fixup = radeon_crtc_mode_fixup,
+	.mode_set = radeon_crtc_mode_set,
+	.mode_set_base = radeon_crtc_set_base,
+	.prepare = radeon_crtc_prepare,
+	.commit = radeon_crtc_commit,
+};
+
+
+void radeon_legacy_init_crtc(struct drm_device *dev,
+			       struct radeon_crtc *radeon_crtc)
+{
+	if (radeon_crtc->crtc_id == 1)
+		radeon_crtc->crtc_offset = RADEON_CRTC2_H_TOTAL_DISP - RADEON_CRTC_H_TOTAL_DISP;
+	drm_crtc_helper_add(&radeon_crtc->base, &legacy_helper_funcs);
+}
+
+void radeon_init_disp_bw_legacy(struct drm_device *dev,
+				struct drm_display_mode *mode1,
+				uint32_t pixel_bytes1,
+				struct drm_display_mode *mode2,
+				uint32_t pixel_bytes2)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
+	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
+	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
+	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
+	fixed20_12 memtcas_ff[8] = {
+		fixed_init(1),
+		fixed_init(2),
+		fixed_init(3),
+		fixed_init(0),
+		fixed_init_half(1),
+		fixed_init_half(2),
+		fixed_init(0),
+	};
+	fixed20_12 memtcas_rs480_ff[8] = {
+		fixed_init(0),
+		fixed_init(1),
+		fixed_init(2),
+		fixed_init(3),
+		fixed_init(0),
+		fixed_init_half(1),
+		fixed_init_half(2),
+		fixed_init_half(3),
+	};
+	fixed20_12 memtcas2_ff[8] = {
+		fixed_init(0),
+		fixed_init(1),
+		fixed_init(2),
+		fixed_init(3),
+		fixed_init(4),
+		fixed_init(5),
+		fixed_init(6),
+		fixed_init(7),
+	};
+	fixed20_12 memtrbs[8] = {
+		fixed_init(1),
+		fixed_init_half(1),
+		fixed_init(2),
+		fixed_init_half(2),
+		fixed_init(3),
+		fixed_init_half(3),
+		fixed_init(4),
+		fixed_init_half(4)
+	};
+	fixed20_12 memtrbs_r4xx[8] = {
+		fixed_init(4),
+		fixed_init(5),
+		fixed_init(6),
+		fixed_init(7),
+		fixed_init(8),
+		fixed_init(9),
+		fixed_init(10),
+		fixed_init(11)
+	};
+	fixed20_12 min_mem_eff;
+	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
+	fixed20_12 cur_latency_mclk, cur_latency_sclk;
+	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
+		disp_drain_rate2, read_return_rate;
+	fixed20_12 time_disp1_drop_priority;
+	int c;
+	int cur_size = 16;       /* in octawords */
+	int critical_point = 0, critical_point2;
+/* 	uint32_t read_return_rate, time_disp1_drop_priority; */
+	int stop_req, max_stop_req;
+
+	min_mem_eff.full = rfixed_const_8(0);
+	/* get modes */
+	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
+		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
+		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
+		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
+		/* check crtc enables */
+		if (mode2)
+			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
+		if (mode1)
+			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
+		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
+	}
+
+	/*
+	 * determine is there is enough bw for current mode
+	 */
+	mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
+	temp_ff.full = rfixed_const(100);
+	mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
+	sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
+	sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
+
+	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
+	temp_ff.full = rfixed_const(temp);
+	mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
+
+	pix_clk.full = 0;
+	pix_clk2.full = 0;
+	peak_disp_bw.full = 0;
+	if (mode1) {
+		temp_ff.full = rfixed_const(1000);
+		pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */
+		pix_clk.full = rfixed_div(pix_clk, temp_ff);
+		temp_ff.full = rfixed_const(pixel_bytes1);
+		peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
+	}
+	if (mode2) {
+		temp_ff.full = rfixed_const(1000);
+		pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */
+		pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
+		temp_ff.full = rfixed_const(pixel_bytes2);
+		peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
+	}
+
+	mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
+	if (peak_disp_bw.full >= mem_bw.full) {
+		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
+			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
+	}
+
+	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
+	temp = RREG32(RADEON_MEM_TIMING_CNTL);
+	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
+		mem_trcd = ((temp >> 2) & 0x3) + 1;
+		mem_trp  = ((temp & 0x3)) + 1;
+		mem_tras = ((temp & 0x70) >> 4) + 1;
+	} else if (rdev->family == CHIP_R300 ||
+		   rdev->family == CHIP_R350) { /* r300, r350 */
+		mem_trcd = (temp & 0x7) + 1;
+		mem_trp = ((temp >> 8) & 0x7) + 1;
+		mem_tras = ((temp >> 11) & 0xf) + 4;
+	} else if (rdev->family == CHIP_RV350 ||
+		   rdev->family <= CHIP_RV380) {
+		/* rv3x0 */
+		mem_trcd = (temp & 0x7) + 3;
+		mem_trp = ((temp >> 8) & 0x7) + 3;
+		mem_tras = ((temp >> 11) & 0xf) + 6;
+	} else if (rdev->family == CHIP_R420 ||
+		   rdev->family == CHIP_R423 ||
+		   rdev->family == CHIP_RV410) {
+		/* r4xx */
+		mem_trcd = (temp & 0xf) + 3;
+		if (mem_trcd > 15)
+			mem_trcd = 15;
+		mem_trp = ((temp >> 8) & 0xf) + 3;
+		if (mem_trp > 15)
+			mem_trp = 15;
+		mem_tras = ((temp >> 12) & 0x1f) + 6;
+		if (mem_tras > 31)
+			mem_tras = 31;
+	} else { /* RV200, R200 */
+		mem_trcd = (temp & 0x7) + 1;
+		mem_trp = ((temp >> 8) & 0x7) + 1;
+		mem_tras = ((temp >> 12) & 0xf) + 4;
+	}
+	/* convert to FF */
+	trcd_ff.full = rfixed_const(mem_trcd);
+	trp_ff.full = rfixed_const(mem_trp);
+	tras_ff.full = rfixed_const(mem_tras);
+
+	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
+	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
+	data = (temp & (7 << 20)) >> 20;
+	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
+		if (rdev->family == CHIP_RS480) /* don't think rs400 */
+			tcas_ff = memtcas_rs480_ff[data];
+		else
+			tcas_ff = memtcas_ff[data];
+	} else
+		tcas_ff = memtcas2_ff[data];
+
+	if (rdev->family == CHIP_RS400 ||
+	    rdev->family == CHIP_RS480) {
+		/* extra cas latency stored in bits 23-25 0-4 clocks */
+		data = (temp >> 23) & 0x7;
+		if (data < 5)
+			tcas_ff.full += rfixed_const(data);
+	}
+
+	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
+		/* on the R300, Tcas is included in Trbs.
+		 */
+		temp = RREG32(RADEON_MEM_CNTL);
+		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
+		if (data == 1) {
+			if (R300_MEM_USE_CD_CH_ONLY & temp) {
+				temp = RREG32(R300_MC_IND_INDEX);
+				temp &= ~R300_MC_IND_ADDR_MASK;
+				temp |= R300_MC_READ_CNTL_CD_mcind;
+				WREG32(R300_MC_IND_INDEX, temp);
+				temp = RREG32(R300_MC_IND_DATA);
+				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
+			} else {
+				temp = RREG32(R300_MC_READ_CNTL_AB);
+				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
+			}
+		} else {
+			temp = RREG32(R300_MC_READ_CNTL_AB);
+			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
+		}
+		if (rdev->family == CHIP_RV410 ||
+		    rdev->family == CHIP_R420 ||
+		    rdev->family == CHIP_R423)
+			trbs_ff = memtrbs_r4xx[data];
+		else
+			trbs_ff = memtrbs[data];
+		tcas_ff.full += trbs_ff.full;
+	}
+
+	sclk_eff_ff.full = sclk_ff.full;
+
+	if (rdev->flags & RADEON_IS_AGP) {
+		fixed20_12 agpmode_ff;
+		agpmode_ff.full = rfixed_const(radeon_agpmode);
+		temp_ff.full = rfixed_const_666(16);
+		sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff);
+	}
+	/* TODO PCIE lanes may affect this - agpmode == 16?? */
+
+	if (ASIC_IS_R300(rdev)) {
+		sclk_delay_ff.full = rfixed_const(250);
+	} else {
+		if ((rdev->family == CHIP_RV100) ||
+		    rdev->flags & RADEON_IS_IGP) {
+			if (rdev->mc.vram_is_ddr)
+				sclk_delay_ff.full = rfixed_const(41);
+			else
+				sclk_delay_ff.full = rfixed_const(33);
+		} else {
+			if (rdev->mc.vram_width == 128)
+				sclk_delay_ff.full = rfixed_const(57);
+			else
+				sclk_delay_ff.full = rfixed_const(41);
+		}
+	}
+
+	mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff);
+
+	if (rdev->mc.vram_is_ddr) {
+		if (rdev->mc.vram_width == 32) {
+			k1.full = rfixed_const(40);
+			c  = 3;
+		} else {
+			k1.full = rfixed_const(20);
+			c  = 1;
+		}
+	} else {
+		k1.full = rfixed_const(40);
+		c  = 3;
+	}
+
+	temp_ff.full = rfixed_const(2);
+	mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff);
+	temp_ff.full = rfixed_const(c);
+	mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff);
+	temp_ff.full = rfixed_const(4);
+	mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff);
+	mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff);
+	mc_latency_mclk.full += k1.full;
+
+	mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff);
+	mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff);
+
+	/*
+	  HW cursor time assuming worst case of full size colour cursor.
+	*/
+	temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
+	temp_ff.full += trcd_ff.full;
+	if (temp_ff.full < tras_ff.full)
+		temp_ff.full = tras_ff.full;
+	cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff);
+
+	temp_ff.full = rfixed_const(cur_size);
+	cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff);
+	/*
+	  Find the total latency for the display data.
+	*/
+	disp_latency_overhead.full = rfixed_const(80);
+	disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff);
+	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
+	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
+
+	if (mc_latency_mclk.full > mc_latency_sclk.full)
+		disp_latency.full = mc_latency_mclk.full;
+	else
+		disp_latency.full = mc_latency_sclk.full;
+
+	/* setup Max GRPH_STOP_REQ default value */
+	if (ASIC_IS_RV100(rdev))
+		max_stop_req = 0x5c;
+	else
+		max_stop_req = 0x7c;
+
+	if (mode1) {
+		/*  CRTC1
+		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
+		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
+		*/
+		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
+
+		if (stop_req > max_stop_req)
+			stop_req = max_stop_req;
+
+		/*
+		  Find the drain rate of the display buffer.
+		*/
+		temp_ff.full = rfixed_const((16/pixel_bytes1));
+		disp_drain_rate.full = rfixed_div(pix_clk, temp_ff);
+
+		/*
+		  Find the critical point of the display buffer.
+		*/
+		crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency);
+		crit_point_ff.full += rfixed_const_half(0);
+
+		critical_point = rfixed_trunc(crit_point_ff);
+
+		if (rdev->disp_priority == 2) {
+			critical_point = 0;
+		}
+
+		/*
+		  The critical point should never be above max_stop_req-4.  Setting
+		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
+		*/
+		if (max_stop_req - critical_point < 4)
+			critical_point = 0;
+
+		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
+			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
+			critical_point = 0x10;
+		}
+
+		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
+		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
+		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
+		temp &= ~(RADEON_GRPH_START_REQ_MASK);
+		if ((rdev->family == CHIP_R350) &&
+		    (stop_req > 0x15)) {
+			stop_req -= 0x10;
+		}
+		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
+		temp |= RADEON_GRPH_BUFFER_SIZE;
+		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
+			  RADEON_GRPH_CRITICAL_AT_SOF |
+			  RADEON_GRPH_STOP_CNTL);
+		/*
+		  Write the result into the register.
+		*/
+		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
+						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
+
+#if 0
+		if ((rdev->family == CHIP_RS400) ||
+		    (rdev->family == CHIP_RS480)) {
+			/* attempt to program RS400 disp regs correctly ??? */
+			temp = RREG32(RS400_DISP1_REG_CNTL);
+			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
+				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
+			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
+						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
+						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
+			temp = RREG32(RS400_DMIF_MEM_CNTL1);
+			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
+				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
+			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
+						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
+						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
+		}
+#endif
+
+		DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n",
+			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
+			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
+	}
+
+	if (mode2) {
+		u32 grph2_cntl;
+		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
+
+		if (stop_req > max_stop_req)
+			stop_req = max_stop_req;
+
+		/*
+		  Find the drain rate of the display buffer.
+		*/
+		temp_ff.full = rfixed_const((16/pixel_bytes2));
+		disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff);
+
+		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
+		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
+		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
+		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
+		if ((rdev->family == CHIP_R350) &&
+		    (stop_req > 0x15)) {
+			stop_req -= 0x10;
+		}
+		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
+		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
+		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
+			  RADEON_GRPH_CRITICAL_AT_SOF |
+			  RADEON_GRPH_STOP_CNTL);
+
+		if ((rdev->family == CHIP_RS100) ||
+		    (rdev->family == CHIP_RS200))
+			critical_point2 = 0;
+		else {
+			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
+			temp_ff.full = rfixed_const(temp);
+			temp_ff.full = rfixed_mul(mclk_ff, temp_ff);
+			if (sclk_ff.full < temp_ff.full)
+				temp_ff.full = sclk_ff.full;
+
+			read_return_rate.full = temp_ff.full;
+
+			if (mode1) {
+				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
+				time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff);
+			} else {
+				time_disp1_drop_priority.full = 0;
+			}
+			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
+			crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2);
+			crit_point_ff.full += rfixed_const_half(0);
+
+			critical_point2 = rfixed_trunc(crit_point_ff);
+
+			if (rdev->disp_priority == 2) {
+				critical_point2 = 0;
+			}
+
+			if (max_stop_req - critical_point2 < 4)
+				critical_point2 = 0;
+
+		}
+
+		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
+			/* some R300 cards have problem with this set to 0 */
+			critical_point2 = 0x10;
+		}
+
+		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
+						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
+
+		if ((rdev->family == CHIP_RS400) ||
+		    (rdev->family == CHIP_RS480)) {
+#if 0
+			/* attempt to program RS400 disp2 regs correctly ??? */
+			temp = RREG32(RS400_DISP2_REQ_CNTL1);
+			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
+				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
+			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
+						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
+						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
+			temp = RREG32(RS400_DISP2_REQ_CNTL2);
+			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
+				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
+			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
+						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
+						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
+#endif
+			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
+			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
+			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
+			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
+		}
+
+		DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n",
+			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
new file mode 100644
index 00000000000..c41ab0966f8
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -0,0 +1,1284 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+#include "atom.h"
+
+
+static void radeon_legacy_rmx_mode_set(struct drm_encoder *encoder,
+				       struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	int    xres = mode->hdisplay;
+	int    yres = mode->vdisplay;
+	bool   hscale = true, vscale = true;
+	int    hsync_wid;
+	int    vsync_wid;
+	int    hsync_start;
+	uint32_t scale, inc;
+	uint32_t fp_horz_stretch, fp_vert_stretch, crtc_more_cntl, fp_horz_vert_active;
+	uint32_t fp_h_sync_strt_wid, fp_v_sync_strt_wid, fp_crtc_h_total_disp, fp_crtc_v_total_disp;
+	struct radeon_native_mode *native_mode = &radeon_encoder->native_mode;
+
+	DRM_DEBUG("\n");
+
+	fp_vert_stretch = RREG32(RADEON_FP_VERT_STRETCH) &
+		(RADEON_VERT_STRETCH_RESERVED |
+		 RADEON_VERT_AUTO_RATIO_INC);
+	fp_horz_stretch = RREG32(RADEON_FP_HORZ_STRETCH) &
+		(RADEON_HORZ_FP_LOOP_STRETCH |
+		 RADEON_HORZ_AUTO_RATIO_INC);
+
+	crtc_more_cntl = 0;
+	if ((rdev->family == CHIP_RS100) ||
+	    (rdev->family == CHIP_RS200)) {
+		/* This is to workaround the asic bug for RMX, some versions
+		   of BIOS dosen't have this register initialized correctly. */
+		crtc_more_cntl |= RADEON_CRTC_H_CUTOFF_ACTIVE_EN;
+	}
+
+
+	fp_crtc_h_total_disp = ((((mode->crtc_htotal / 8) - 1) & 0x3ff)
+				| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
+
+	hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
+	if (!hsync_wid)
+		hsync_wid = 1;
+	hsync_start = mode->crtc_hsync_start - 8;
+
+	fp_h_sync_strt_wid = ((hsync_start & 0x1fff)
+			      | ((hsync_wid & 0x3f) << 16)
+			      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
+				 ? RADEON_CRTC_H_SYNC_POL
+				 : 0));
+
+	fp_crtc_v_total_disp = (((mode->crtc_vtotal - 1) & 0xffff)
+				| ((mode->crtc_vdisplay - 1) << 16));
+
+	vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
+	if (!vsync_wid)
+		vsync_wid = 1;
+
+	fp_v_sync_strt_wid = (((mode->crtc_vsync_start - 1) & 0xfff)
+			      | ((vsync_wid & 0x1f) << 16)
+			      | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
+				 ? RADEON_CRTC_V_SYNC_POL
+				 : 0));
+
+	fp_horz_vert_active = 0;
+
+	if (native_mode->panel_xres == 0 ||
+	    native_mode->panel_yres == 0) {
+		hscale = false;
+		vscale = false;
+	} else {
+		if (xres > native_mode->panel_xres)
+			xres = native_mode->panel_xres;
+		if (yres > native_mode->panel_yres)
+			yres = native_mode->panel_yres;
+
+		if (xres == native_mode->panel_xres)
+			hscale = false;
+		if (yres == native_mode->panel_yres)
+			vscale = false;
+	}
+
+	if (radeon_encoder->flags & RADEON_USE_RMX) {
+		if (radeon_encoder->rmx_type != RMX_CENTER) {
+			if (!hscale)
+				fp_horz_stretch |= ((xres/8-1) << 16);
+			else {
+				inc = (fp_horz_stretch & RADEON_HORZ_AUTO_RATIO_INC) ? 1 : 0;
+				scale = ((xres + inc) * RADEON_HORZ_STRETCH_RATIO_MAX)
+					/ native_mode->panel_xres + 1;
+				fp_horz_stretch |= (((scale) & RADEON_HORZ_STRETCH_RATIO_MASK) |
+						    RADEON_HORZ_STRETCH_BLEND |
+						    RADEON_HORZ_STRETCH_ENABLE |
+						    ((native_mode->panel_xres/8-1) << 16));
+			}
+
+			if (!vscale)
+				fp_vert_stretch |= ((yres-1) << 12);
+			else {
+				inc = (fp_vert_stretch & RADEON_VERT_AUTO_RATIO_INC) ? 1 : 0;
+				scale = ((yres + inc) * RADEON_VERT_STRETCH_RATIO_MAX)
+					/ native_mode->panel_yres + 1;
+				fp_vert_stretch |= (((scale) & RADEON_VERT_STRETCH_RATIO_MASK) |
+						    RADEON_VERT_STRETCH_ENABLE |
+						    RADEON_VERT_STRETCH_BLEND |
+						    ((native_mode->panel_yres-1) << 12));
+			}
+		} else if (radeon_encoder->rmx_type == RMX_CENTER) {
+			int    blank_width;
+
+			fp_horz_stretch |= ((xres/8-1) << 16);
+			fp_vert_stretch |= ((yres-1) << 12);
+
+			crtc_more_cntl |= (RADEON_CRTC_AUTO_HORZ_CENTER_EN |
+					   RADEON_CRTC_AUTO_VERT_CENTER_EN);
+
+			blank_width = (mode->crtc_hblank_end - mode->crtc_hblank_start) / 8;
+			if (blank_width > 110)
+				blank_width = 110;
+
+			fp_crtc_h_total_disp = (((blank_width) & 0x3ff)
+						| ((((mode->crtc_hdisplay / 8) - 1) & 0x1ff) << 16));
+
+			hsync_wid = (mode->crtc_hsync_end - mode->crtc_hsync_start) / 8;
+			if (!hsync_wid)
+				hsync_wid = 1;
+
+			fp_h_sync_strt_wid = ((((mode->crtc_hsync_start - mode->crtc_hblank_start) / 8) & 0x1fff)
+					      | ((hsync_wid & 0x3f) << 16)
+					      | ((mode->flags & DRM_MODE_FLAG_NHSYNC)
+						 ? RADEON_CRTC_H_SYNC_POL
+						 : 0));
+
+			fp_crtc_v_total_disp = (((mode->crtc_vblank_end - mode->crtc_vblank_start) & 0xffff)
+						| ((mode->crtc_vdisplay - 1) << 16));
+
+			vsync_wid = mode->crtc_vsync_end - mode->crtc_vsync_start;
+			if (!vsync_wid)
+				vsync_wid = 1;
+
+			fp_v_sync_strt_wid = ((((mode->crtc_vsync_start - mode->crtc_vblank_start) & 0xfff)
+					       | ((vsync_wid & 0x1f) << 16)
+					       | ((mode->flags & DRM_MODE_FLAG_NVSYNC)
+						  ? RADEON_CRTC_V_SYNC_POL
+						  : 0)));
+
+			fp_horz_vert_active = (((native_mode->panel_yres) & 0xfff) |
+					       (((native_mode->panel_xres / 8) & 0x1ff) << 16));
+		}
+	} else {
+		fp_horz_stretch |= ((xres/8-1) << 16);
+		fp_vert_stretch |= ((yres-1) << 12);
+	}
+
+	WREG32(RADEON_FP_HORZ_STRETCH,      fp_horz_stretch);
+	WREG32(RADEON_FP_VERT_STRETCH,      fp_vert_stretch);
+	WREG32(RADEON_CRTC_MORE_CNTL,       crtc_more_cntl);
+	WREG32(RADEON_FP_HORZ_VERT_ACTIVE,  fp_horz_vert_active);
+	WREG32(RADEON_FP_H_SYNC_STRT_WID,   fp_h_sync_strt_wid);
+	WREG32(RADEON_FP_V_SYNC_STRT_WID,   fp_v_sync_strt_wid);
+	WREG32(RADEON_FP_CRTC_H_TOTAL_DISP, fp_crtc_h_total_disp);
+	WREG32(RADEON_FP_CRTC_V_TOTAL_DISP, fp_crtc_v_total_disp);
+
+}
+
+static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t lvds_gen_cntl, lvds_pll_cntl, pixclks_cntl, disp_pwr_man;
+	int panel_pwr_delay = 2000;
+	DRM_DEBUG("\n");
+
+	if (radeon_encoder->enc_priv) {
+		if (rdev->is_atom_bios) {
+			struct radeon_encoder_atom_dig *lvds = radeon_encoder->enc_priv;
+			panel_pwr_delay = lvds->panel_pwr_delay;
+		} else {
+			struct radeon_encoder_lvds *lvds = radeon_encoder->enc_priv;
+			panel_pwr_delay = lvds->panel_pwr_delay;
+		}
+	}
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		disp_pwr_man = RREG32(RADEON_DISP_PWR_MAN);
+		disp_pwr_man |= RADEON_AUTO_PWRUP_EN;
+		WREG32(RADEON_DISP_PWR_MAN, disp_pwr_man);
+		lvds_pll_cntl = RREG32(RADEON_LVDS_PLL_CNTL);
+		lvds_pll_cntl |= RADEON_LVDS_PLL_EN;
+		WREG32(RADEON_LVDS_PLL_CNTL, lvds_pll_cntl);
+		udelay(1000);
+
+		lvds_pll_cntl = RREG32(RADEON_LVDS_PLL_CNTL);
+		lvds_pll_cntl &= ~RADEON_LVDS_PLL_RESET;
+		WREG32(RADEON_LVDS_PLL_CNTL, lvds_pll_cntl);
+
+		lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
+		lvds_gen_cntl |= (RADEON_LVDS_ON | RADEON_LVDS_EN | RADEON_LVDS_DIGON | RADEON_LVDS_BLON);
+		lvds_gen_cntl &= ~(RADEON_LVDS_DISPLAY_DIS);
+		udelay(panel_pwr_delay * 1000);
+		WREG32(RADEON_LVDS_GEN_CNTL, lvds_gen_cntl);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		pixclks_cntl = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+		WREG32_PLL_P(RADEON_PIXCLKS_CNTL, 0, ~RADEON_PIXCLK_LVDS_ALWAYS_ONb);
+		lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
+		lvds_gen_cntl |= RADEON_LVDS_DISPLAY_DIS;
+		lvds_gen_cntl &= ~(RADEON_LVDS_ON | RADEON_LVDS_BLON | RADEON_LVDS_EN | RADEON_LVDS_DIGON);
+		udelay(panel_pwr_delay * 1000);
+		WREG32(RADEON_LVDS_GEN_CNTL, lvds_gen_cntl);
+		WREG32_PLL(RADEON_PIXCLKS_CNTL, pixclks_cntl);
+		break;
+	}
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+	else
+		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+}
+
+static void radeon_legacy_lvds_prepare(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+	radeon_legacy_lvds_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_legacy_lvds_commit(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	radeon_legacy_lvds_dpms(encoder, DRM_MODE_DPMS_ON);
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, false);
+	else
+		radeon_combios_output_lock(encoder, false);
+}
+
+static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t lvds_pll_cntl, lvds_gen_cntl, lvds_ss_gen_cntl;
+
+	DRM_DEBUG("\n");
+
+	if (radeon_crtc->crtc_id == 0)
+		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
+
+	lvds_pll_cntl = RREG32(RADEON_LVDS_PLL_CNTL);
+	lvds_pll_cntl &= ~RADEON_LVDS_PLL_EN;
+
+	lvds_ss_gen_cntl = RREG32(RADEON_LVDS_SS_GEN_CNTL);
+	if ((!rdev->is_atom_bios)) {
+		struct radeon_encoder_lvds *lvds = (struct radeon_encoder_lvds *)radeon_encoder->enc_priv;
+		if (lvds) {
+			DRM_DEBUG("bios LVDS_GEN_CNTL: 0x%x\n", lvds->lvds_gen_cntl);
+			lvds_gen_cntl = lvds->lvds_gen_cntl;
+			lvds_ss_gen_cntl &= ~((0xf << RADEON_LVDS_PWRSEQ_DELAY1_SHIFT) |
+					      (0xf << RADEON_LVDS_PWRSEQ_DELAY2_SHIFT));
+			lvds_ss_gen_cntl |= ((lvds->panel_digon_delay << RADEON_LVDS_PWRSEQ_DELAY1_SHIFT) |
+					     (lvds->panel_blon_delay << RADEON_LVDS_PWRSEQ_DELAY2_SHIFT));
+		} else
+			lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
+	} else
+		lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
+	lvds_gen_cntl |= RADEON_LVDS_DISPLAY_DIS;
+	lvds_gen_cntl &= ~(RADEON_LVDS_ON |
+			   RADEON_LVDS_BLON |
+			   RADEON_LVDS_EN |
+			   RADEON_LVDS_RST_FM);
+
+	if (ASIC_IS_R300(rdev))
+		lvds_pll_cntl &= ~(R300_LVDS_SRC_SEL_MASK);
+
+	if (radeon_crtc->crtc_id == 0) {
+		if (ASIC_IS_R300(rdev)) {
+			if (radeon_encoder->flags & RADEON_USE_RMX)
+				lvds_pll_cntl |= R300_LVDS_SRC_SEL_RMX;
+		} else
+			lvds_gen_cntl &= ~RADEON_LVDS_SEL_CRTC2;
+	} else {
+		if (ASIC_IS_R300(rdev))
+			lvds_pll_cntl |= R300_LVDS_SRC_SEL_CRTC2;
+		else
+			lvds_gen_cntl |= RADEON_LVDS_SEL_CRTC2;
+	}
+
+	WREG32(RADEON_LVDS_GEN_CNTL, lvds_gen_cntl);
+	WREG32(RADEON_LVDS_PLL_CNTL, lvds_pll_cntl);
+	WREG32(RADEON_LVDS_SS_GEN_CNTL, lvds_ss_gen_cntl);
+
+	if (rdev->family == CHIP_RV410)
+		WREG32(RADEON_CLOCK_CNTL_INDEX, 0);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+	else
+		radeon_combios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+}
+
+static bool radeon_legacy_lvds_mode_fixup(struct drm_encoder *encoder,
+					  struct drm_display_mode *mode,
+					  struct drm_display_mode *adjusted_mode)
+{
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	radeon_encoder->flags &= ~RADEON_USE_RMX;
+
+	if (radeon_encoder->rmx_type != RMX_OFF)
+		radeon_rmx_mode_fixup(encoder, mode, adjusted_mode);
+
+	return true;
+}
+
+static const struct drm_encoder_helper_funcs radeon_legacy_lvds_helper_funcs = {
+	.dpms = radeon_legacy_lvds_dpms,
+	.mode_fixup = radeon_legacy_lvds_mode_fixup,
+	.prepare = radeon_legacy_lvds_prepare,
+	.mode_set = radeon_legacy_lvds_mode_set,
+	.commit = radeon_legacy_lvds_commit,
+};
+
+
+static const struct drm_encoder_funcs radeon_legacy_lvds_enc_funcs = {
+	.destroy = radeon_enc_destroy,
+};
+
+static bool radeon_legacy_primary_dac_mode_fixup(struct drm_encoder *encoder,
+						 struct drm_display_mode *mode,
+						 struct drm_display_mode *adjusted_mode)
+{
+
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	return true;
+}
+
+static void radeon_legacy_primary_dac_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
+	uint32_t dac_cntl = RREG32(RADEON_DAC_CNTL);
+	uint32_t dac_macro_cntl = RREG32(RADEON_DAC_MACRO_CNTL);
+
+	DRM_DEBUG("\n");
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		crtc_ext_cntl |= RADEON_CRTC_CRT_ON;
+		dac_cntl &= ~RADEON_DAC_PDWN;
+		dac_macro_cntl &= ~(RADEON_DAC_PDWN_R |
+				    RADEON_DAC_PDWN_G |
+				    RADEON_DAC_PDWN_B);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		crtc_ext_cntl &= ~RADEON_CRTC_CRT_ON;
+		dac_cntl |= RADEON_DAC_PDWN;
+		dac_macro_cntl |= (RADEON_DAC_PDWN_R |
+				   RADEON_DAC_PDWN_G |
+				   RADEON_DAC_PDWN_B);
+		break;
+	}
+
+	WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
+	WREG32(RADEON_DAC_CNTL, dac_cntl);
+	WREG32(RADEON_DAC_MACRO_CNTL, dac_macro_cntl);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+	else
+		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+}
+
+static void radeon_legacy_primary_dac_prepare(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+	radeon_legacy_primary_dac_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_legacy_primary_dac_commit(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	radeon_legacy_primary_dac_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, false);
+	else
+		radeon_combios_output_lock(encoder, false);
+}
+
+static void radeon_legacy_primary_dac_mode_set(struct drm_encoder *encoder,
+					       struct drm_display_mode *mode,
+					       struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t disp_output_cntl, dac_cntl, dac2_cntl, dac_macro_cntl;
+
+	DRM_DEBUG("\n");
+
+	if (radeon_crtc->crtc_id == 0)
+		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
+
+	if (radeon_crtc->crtc_id == 0) {
+		if (rdev->family == CHIP_R200 || ASIC_IS_R300(rdev)) {
+			disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL) &
+				~(RADEON_DISP_DAC_SOURCE_MASK);
+			WREG32(RADEON_DISP_OUTPUT_CNTL, disp_output_cntl);
+		} else {
+			dac2_cntl = RREG32(RADEON_DAC_CNTL2)  & ~(RADEON_DAC2_DAC_CLK_SEL);
+			WREG32(RADEON_DAC_CNTL2, dac2_cntl);
+		}
+	} else {
+		if (rdev->family == CHIP_R200 || ASIC_IS_R300(rdev)) {
+			disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL) &
+				~(RADEON_DISP_DAC_SOURCE_MASK);
+			disp_output_cntl |= RADEON_DISP_DAC_SOURCE_CRTC2;
+			WREG32(RADEON_DISP_OUTPUT_CNTL, disp_output_cntl);
+		} else {
+			dac2_cntl = RREG32(RADEON_DAC_CNTL2) | RADEON_DAC2_DAC_CLK_SEL;
+			WREG32(RADEON_DAC_CNTL2, dac2_cntl);
+		}
+	}
+
+	dac_cntl = (RADEON_DAC_MASK_ALL |
+		    RADEON_DAC_VGA_ADR_EN |
+		    /* TODO 6-bits */
+		    RADEON_DAC_8BIT_EN);
+
+	WREG32_P(RADEON_DAC_CNTL,
+		       dac_cntl,
+		       RADEON_DAC_RANGE_CNTL |
+		       RADEON_DAC_BLANKING);
+
+	if (radeon_encoder->enc_priv) {
+		struct radeon_encoder_primary_dac *p_dac = (struct radeon_encoder_primary_dac *)radeon_encoder->enc_priv;
+		dac_macro_cntl = p_dac->ps2_pdac_adj;
+	} else
+		dac_macro_cntl = RREG32(RADEON_DAC_MACRO_CNTL);
+	dac_macro_cntl |= RADEON_DAC_PDWN_R | RADEON_DAC_PDWN_G | RADEON_DAC_PDWN_B;
+	WREG32(RADEON_DAC_MACRO_CNTL, dac_macro_cntl);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+	else
+		radeon_combios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+}
+
+static enum drm_connector_status radeon_legacy_primary_dac_detect(struct drm_encoder *encoder,
+								  struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t vclk_ecp_cntl, crtc_ext_cntl;
+	uint32_t dac_ext_cntl, dac_cntl, dac_macro_cntl, tmp;
+	enum drm_connector_status found = connector_status_disconnected;
+	bool color = true;
+
+	/* save the regs we need */
+	vclk_ecp_cntl = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
+	crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
+	dac_ext_cntl = RREG32(RADEON_DAC_EXT_CNTL);
+	dac_cntl = RREG32(RADEON_DAC_CNTL);
+	dac_macro_cntl = RREG32(RADEON_DAC_MACRO_CNTL);
+
+	tmp = vclk_ecp_cntl &
+		~(RADEON_PIXCLK_ALWAYS_ONb | RADEON_PIXCLK_DAC_ALWAYS_ONb);
+	WREG32_PLL(RADEON_VCLK_ECP_CNTL, tmp);
+
+	tmp = crtc_ext_cntl | RADEON_CRTC_CRT_ON;
+	WREG32(RADEON_CRTC_EXT_CNTL, tmp);
+
+	tmp = RADEON_DAC_FORCE_BLANK_OFF_EN |
+		RADEON_DAC_FORCE_DATA_EN;
+
+	if (color)
+		tmp |= RADEON_DAC_FORCE_DATA_SEL_RGB;
+	else
+		tmp |= RADEON_DAC_FORCE_DATA_SEL_G;
+
+	if (ASIC_IS_R300(rdev))
+		tmp |= (0x1b6 << RADEON_DAC_FORCE_DATA_SHIFT);
+	else
+		tmp |= (0x180 << RADEON_DAC_FORCE_DATA_SHIFT);
+
+	WREG32(RADEON_DAC_EXT_CNTL, tmp);
+
+	tmp = dac_cntl & ~(RADEON_DAC_RANGE_CNTL_MASK | RADEON_DAC_PDWN);
+	tmp |= RADEON_DAC_RANGE_CNTL_PS2 | RADEON_DAC_CMP_EN;
+	WREG32(RADEON_DAC_CNTL, tmp);
+
+	tmp &= ~(RADEON_DAC_PDWN_R |
+		 RADEON_DAC_PDWN_G |
+		 RADEON_DAC_PDWN_B);
+
+	WREG32(RADEON_DAC_MACRO_CNTL, tmp);
+
+	udelay(2000);
+
+	if (RREG32(RADEON_DAC_CNTL) & RADEON_DAC_CMP_OUTPUT)
+		found = connector_status_connected;
+
+	/* restore the regs we used */
+	WREG32(RADEON_DAC_CNTL, dac_cntl);
+	WREG32(RADEON_DAC_MACRO_CNTL, dac_macro_cntl);
+	WREG32(RADEON_DAC_EXT_CNTL, dac_ext_cntl);
+	WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
+	WREG32_PLL(RADEON_VCLK_ECP_CNTL, vclk_ecp_cntl);
+
+	return found;
+}
+
+static const struct drm_encoder_helper_funcs radeon_legacy_primary_dac_helper_funcs = {
+	.dpms = radeon_legacy_primary_dac_dpms,
+	.mode_fixup = radeon_legacy_primary_dac_mode_fixup,
+	.prepare = radeon_legacy_primary_dac_prepare,
+	.mode_set = radeon_legacy_primary_dac_mode_set,
+	.commit = radeon_legacy_primary_dac_commit,
+	.detect = radeon_legacy_primary_dac_detect,
+};
+
+
+static const struct drm_encoder_funcs radeon_legacy_primary_dac_enc_funcs = {
+	.destroy = radeon_enc_destroy,
+};
+
+static bool radeon_legacy_tmds_int_mode_fixup(struct drm_encoder *encoder,
+					      struct drm_display_mode *mode,
+					      struct drm_display_mode *adjusted_mode)
+{
+
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	return true;
+}
+
+static void radeon_legacy_tmds_int_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t fp_gen_cntl = RREG32(RADEON_FP_GEN_CNTL);
+	DRM_DEBUG("\n");
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		fp_gen_cntl |= (RADEON_FP_FPON | RADEON_FP_TMDS_EN);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		fp_gen_cntl &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN);
+		break;
+	}
+
+	WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+	else
+		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+}
+
+static void radeon_legacy_tmds_int_prepare(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+	radeon_legacy_tmds_int_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_legacy_tmds_int_commit(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	radeon_legacy_tmds_int_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+}
+
+static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,
+					    struct drm_display_mode *mode,
+					    struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t tmp, tmds_pll_cntl, tmds_transmitter_cntl, fp_gen_cntl;
+	int i;
+
+	DRM_DEBUG("\n");
+
+	if (radeon_crtc->crtc_id == 0)
+		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
+
+	tmp = tmds_pll_cntl = RREG32(RADEON_TMDS_PLL_CNTL);
+	tmp &= 0xfffff;
+	if (rdev->family == CHIP_RV280) {
+		/* bit 22 of TMDS_PLL_CNTL is read-back inverted */
+		tmp ^= (1 << 22);
+		tmds_pll_cntl ^= (1 << 22);
+	}
+
+	if (radeon_encoder->enc_priv) {
+		struct radeon_encoder_int_tmds *tmds = (struct radeon_encoder_int_tmds *)radeon_encoder->enc_priv;
+
+		for (i = 0; i < 4; i++) {
+			if (tmds->tmds_pll[i].freq == 0)
+				break;
+			if ((uint32_t)(mode->clock / 10) < tmds->tmds_pll[i].freq) {
+				tmp = tmds->tmds_pll[i].value ;
+				break;
+			}
+		}
+	}
+
+	if (ASIC_IS_R300(rdev) || (rdev->family == CHIP_RV280)) {
+		if (tmp & 0xfff00000)
+			tmds_pll_cntl = tmp;
+		else {
+			tmds_pll_cntl &= 0xfff00000;
+			tmds_pll_cntl |= tmp;
+		}
+	} else
+		tmds_pll_cntl = tmp;
+
+	tmds_transmitter_cntl = RREG32(RADEON_TMDS_TRANSMITTER_CNTL) &
+		~(RADEON_TMDS_TRANSMITTER_PLLRST);
+
+    if (rdev->family == CHIP_R200 ||
+	rdev->family == CHIP_R100 ||
+	ASIC_IS_R300(rdev))
+	    tmds_transmitter_cntl &= ~(RADEON_TMDS_TRANSMITTER_PLLEN);
+    else /* RV chips got this bit reversed */
+	    tmds_transmitter_cntl |= RADEON_TMDS_TRANSMITTER_PLLEN;
+
+    fp_gen_cntl = (RREG32(RADEON_FP_GEN_CNTL) |
+		   (RADEON_FP_CRTC_DONT_SHADOW_VPAR |
+		    RADEON_FP_CRTC_DONT_SHADOW_HEND));
+
+    fp_gen_cntl &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN);
+
+    if (1) /*  FIXME rgbBits == 8 */
+	    fp_gen_cntl |= RADEON_FP_PANEL_FORMAT;  /* 24 bit format */
+    else
+	    fp_gen_cntl &= ~RADEON_FP_PANEL_FORMAT;/* 18 bit format */
+
+    if (radeon_crtc->crtc_id == 0) {
+	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
+		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
+		    if (radeon_encoder->flags & RADEON_USE_RMX)
+			    fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
+		    else
+			    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
+	    } else
+		    fp_gen_cntl |= RADEON_FP_SEL_CRTC1;
+    } else {
+	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
+		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
+		    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC2;
+	    } else
+		    fp_gen_cntl |= RADEON_FP_SEL_CRTC2;
+    }
+
+    WREG32(RADEON_TMDS_PLL_CNTL, tmds_pll_cntl);
+    WREG32(RADEON_TMDS_TRANSMITTER_CNTL, tmds_transmitter_cntl);
+    WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+	else
+		radeon_combios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+}
+
+static const struct drm_encoder_helper_funcs radeon_legacy_tmds_int_helper_funcs = {
+	.dpms = radeon_legacy_tmds_int_dpms,
+	.mode_fixup = radeon_legacy_tmds_int_mode_fixup,
+	.prepare = radeon_legacy_tmds_int_prepare,
+	.mode_set = radeon_legacy_tmds_int_mode_set,
+	.commit = radeon_legacy_tmds_int_commit,
+};
+
+
+static const struct drm_encoder_funcs radeon_legacy_tmds_int_enc_funcs = {
+	.destroy = radeon_enc_destroy,
+};
+
+static bool radeon_legacy_tmds_ext_mode_fixup(struct drm_encoder *encoder,
+					      struct drm_display_mode *mode,
+					      struct drm_display_mode *adjusted_mode)
+{
+
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	return true;
+}
+
+static void radeon_legacy_tmds_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+	DRM_DEBUG("\n");
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		fp2_gen_cntl &= ~RADEON_FP2_BLANK_EN;
+		fp2_gen_cntl |= (RADEON_FP2_ON | RADEON_FP2_DVO_EN);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		fp2_gen_cntl |= RADEON_FP2_BLANK_EN;
+		fp2_gen_cntl &= ~(RADEON_FP2_ON | RADEON_FP2_DVO_EN);
+		break;
+	}
+
+	WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+	else
+		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+}
+
+static void radeon_legacy_tmds_ext_prepare(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+	radeon_legacy_tmds_ext_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_legacy_tmds_ext_commit(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	radeon_legacy_tmds_ext_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, false);
+	else
+		radeon_combios_output_lock(encoder, false);
+}
+
+static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder,
+					    struct drm_display_mode *mode,
+					    struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t fp2_gen_cntl;
+
+	DRM_DEBUG("\n");
+
+	if (radeon_crtc->crtc_id == 0)
+		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
+
+	if (rdev->is_atom_bios) {
+		radeon_encoder->pixel_clock = adjusted_mode->clock;
+		atombios_external_tmds_setup(encoder, ATOM_ENABLE);
+		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+	} else {
+		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+
+		if (1) /*  FIXME rgbBits == 8 */
+			fp2_gen_cntl |= RADEON_FP2_PANEL_FORMAT; /* 24 bit format, */
+		else
+			fp2_gen_cntl &= ~RADEON_FP2_PANEL_FORMAT;/* 18 bit format, */
+
+		fp2_gen_cntl &= ~(RADEON_FP2_ON |
+				  RADEON_FP2_DVO_EN |
+				  RADEON_FP2_DVO_RATE_SEL_SDR);
+
+		/* XXX: these are oem specific */
+		if (ASIC_IS_R300(rdev)) {
+			if ((dev->pdev->device == 0x4850) &&
+			    (dev->pdev->subsystem_vendor == 0x1028) &&
+			    (dev->pdev->subsystem_device == 0x2001)) /* Dell Inspiron 8600 */
+				fp2_gen_cntl |= R300_FP2_DVO_CLOCK_MODE_SINGLE;
+			else
+				fp2_gen_cntl |= RADEON_FP2_PAD_FLOP_EN | R300_FP2_DVO_CLOCK_MODE_SINGLE;
+
+			/*if (mode->clock > 165000)
+			  fp2_gen_cntl |= R300_FP2_DVO_DUAL_CHANNEL_EN;*/
+		}
+	}
+
+	if (radeon_crtc->crtc_id == 0) {
+		if ((rdev->family == CHIP_R200) || ASIC_IS_R300(rdev)) {
+			fp2_gen_cntl &= ~R200_FP2_SOURCE_SEL_MASK;
+			if (radeon_encoder->flags & RADEON_USE_RMX)
+				fp2_gen_cntl |= R200_FP2_SOURCE_SEL_RMX;
+			else
+				fp2_gen_cntl |= R200_FP2_SOURCE_SEL_CRTC1;
+		} else
+			fp2_gen_cntl &= ~RADEON_FP2_SRC_SEL_CRTC2;
+	} else {
+		if ((rdev->family == CHIP_R200) || ASIC_IS_R300(rdev)) {
+			fp2_gen_cntl &= ~R200_FP2_SOURCE_SEL_MASK;
+			fp2_gen_cntl |= R200_FP2_SOURCE_SEL_CRTC2;
+		} else
+			fp2_gen_cntl |= RADEON_FP2_SRC_SEL_CRTC2;
+	}
+
+	WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+	else
+		radeon_combios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+}
+
+static const struct drm_encoder_helper_funcs radeon_legacy_tmds_ext_helper_funcs = {
+	.dpms = radeon_legacy_tmds_ext_dpms,
+	.mode_fixup = radeon_legacy_tmds_ext_mode_fixup,
+	.prepare = radeon_legacy_tmds_ext_prepare,
+	.mode_set = radeon_legacy_tmds_ext_mode_set,
+	.commit = radeon_legacy_tmds_ext_commit,
+};
+
+
+static const struct drm_encoder_funcs radeon_legacy_tmds_ext_enc_funcs = {
+	.destroy = radeon_enc_destroy,
+};
+
+static bool radeon_legacy_tv_dac_mode_fixup(struct drm_encoder *encoder,
+					    struct drm_display_mode *mode,
+					    struct drm_display_mode *adjusted_mode)
+{
+
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	return true;
+}
+
+static void radeon_legacy_tv_dac_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t fp2_gen_cntl = 0, crtc2_gen_cntl = 0, tv_dac_cntl = 0;
+	/* uint32_t tv_master_cntl = 0; */
+
+	DRM_DEBUG("\n");
+
+	if (rdev->family == CHIP_R200)
+		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+	else {
+		crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+		/*  FIXME TV */
+		/* tv_master_cntl = RREG32(RADEON_TV_MASTER_CNTL); */
+		tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
+	}
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		if (rdev->family == CHIP_R200) {
+			fp2_gen_cntl |= (RADEON_FP2_ON | RADEON_FP2_DVO_EN);
+		} else {
+			crtc2_gen_cntl |= RADEON_CRTC2_CRT2_ON;
+			/* tv_master_cntl |= RADEON_TV_ON; */
+			if (rdev->family == CHIP_R420 ||
+					rdev->family == CHIP_R423 ||
+					rdev->family == CHIP_RV410)
+				tv_dac_cntl &= ~(R420_TV_DAC_RDACPD |
+						R420_TV_DAC_GDACPD |
+						R420_TV_DAC_BDACPD |
+						RADEON_TV_DAC_BGSLEEP);
+			else
+				tv_dac_cntl &= ~(RADEON_TV_DAC_RDACPD |
+						RADEON_TV_DAC_GDACPD |
+						RADEON_TV_DAC_BDACPD |
+						RADEON_TV_DAC_BGSLEEP);
+		}
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		if (rdev->family == CHIP_R200)
+			fp2_gen_cntl &= ~(RADEON_FP2_ON | RADEON_FP2_DVO_EN);
+		else {
+			crtc2_gen_cntl &= ~RADEON_CRTC2_CRT2_ON;
+			/* tv_master_cntl &= ~RADEON_TV_ON; */
+			if (rdev->family == CHIP_R420 ||
+					rdev->family == CHIP_R423 ||
+					rdev->family == CHIP_RV410)
+				tv_dac_cntl |= (R420_TV_DAC_RDACPD |
+						R420_TV_DAC_GDACPD |
+						R420_TV_DAC_BDACPD |
+						RADEON_TV_DAC_BGSLEEP);
+			else
+				tv_dac_cntl |= (RADEON_TV_DAC_RDACPD |
+						RADEON_TV_DAC_GDACPD |
+						RADEON_TV_DAC_BDACPD |
+						RADEON_TV_DAC_BGSLEEP);
+		}
+		break;
+	}
+
+	if (rdev->family == CHIP_R200) {
+		WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl);
+	} else {
+		WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+		/* WREG32(RADEON_TV_MASTER_CNTL, tv_master_cntl); */
+		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
+	}
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+	else
+		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+}
+
+static void radeon_legacy_tv_dac_prepare(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+	radeon_legacy_tv_dac_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void radeon_legacy_tv_dac_commit(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+
+	radeon_legacy_tv_dac_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	if (rdev->is_atom_bios)
+		radeon_atom_output_lock(encoder, true);
+	else
+		radeon_combios_output_lock(encoder, true);
+}
+
+static void radeon_legacy_tv_dac_mode_set(struct drm_encoder *encoder,
+		struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	uint32_t tv_dac_cntl, gpiopad_a = 0, dac2_cntl, disp_output_cntl = 0;
+	uint32_t disp_hw_debug = 0, fp2_gen_cntl = 0;
+
+	DRM_DEBUG("\n");
+
+	if (radeon_crtc->crtc_id == 0)
+		radeon_legacy_rmx_mode_set(encoder, mode, adjusted_mode);
+
+	if (rdev->family != CHIP_R200) {
+		tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
+		if (rdev->family == CHIP_R420 ||
+				rdev->family == CHIP_R423 ||
+				rdev->family == CHIP_RV410) {
+			tv_dac_cntl &= ~(RADEON_TV_DAC_STD_MASK |
+					RADEON_TV_DAC_BGADJ_MASK |
+					R420_TV_DAC_DACADJ_MASK |
+					R420_TV_DAC_RDACPD |
+					R420_TV_DAC_GDACPD |
+					R420_TV_DAC_GDACPD |
+					R420_TV_DAC_TVENABLE);
+		} else {
+			tv_dac_cntl &= ~(RADEON_TV_DAC_STD_MASK |
+					RADEON_TV_DAC_BGADJ_MASK |
+					RADEON_TV_DAC_DACADJ_MASK |
+					RADEON_TV_DAC_RDACPD |
+					RADEON_TV_DAC_GDACPD |
+					RADEON_TV_DAC_GDACPD);
+		}
+
+		/*  FIXME TV */
+		if (radeon_encoder->enc_priv) {
+			struct radeon_encoder_tv_dac *tv_dac = radeon_encoder->enc_priv;
+			tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
+					RADEON_TV_DAC_NHOLD |
+					RADEON_TV_DAC_STD_PS2 |
+					tv_dac->ps2_tvdac_adj);
+		} else
+			tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
+					RADEON_TV_DAC_NHOLD |
+					RADEON_TV_DAC_STD_PS2);
+
+		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
+	}
+
+	if (ASIC_IS_R300(rdev)) {
+		gpiopad_a = RREG32(RADEON_GPIOPAD_A) | 1;
+		disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL);
+	} else if (rdev->family == CHIP_R200)
+		fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+	else
+		disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
+
+	dac2_cntl = RREG32(RADEON_DAC_CNTL2) | RADEON_DAC2_DAC2_CLK_SEL;
+
+	if (radeon_crtc->crtc_id == 0) {
+		if (ASIC_IS_R300(rdev)) {
+			disp_output_cntl &= ~RADEON_DISP_TVDAC_SOURCE_MASK;
+			disp_output_cntl |= RADEON_DISP_TVDAC_SOURCE_CRTC;
+		} else if (rdev->family == CHIP_R200) {
+			fp2_gen_cntl &= ~(R200_FP2_SOURCE_SEL_MASK |
+					  RADEON_FP2_DVO_RATE_SEL_SDR);
+		} else
+			disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
+	} else {
+		if (ASIC_IS_R300(rdev)) {
+			disp_output_cntl &= ~RADEON_DISP_TVDAC_SOURCE_MASK;
+			disp_output_cntl |= RADEON_DISP_TVDAC_SOURCE_CRTC2;
+		} else if (rdev->family == CHIP_R200) {
+			fp2_gen_cntl &= ~(R200_FP2_SOURCE_SEL_MASK |
+					  RADEON_FP2_DVO_RATE_SEL_SDR);
+			fp2_gen_cntl |= R200_FP2_SOURCE_SEL_CRTC2;
+		} else
+			disp_hw_debug &= ~RADEON_CRT2_DISP1_SEL;
+	}
+
+	WREG32(RADEON_DAC_CNTL2, dac2_cntl);
+
+	if (ASIC_IS_R300(rdev)) {
+		WREG32_P(RADEON_GPIOPAD_A, gpiopad_a, ~1);
+		WREG32(RADEON_DISP_TV_OUT_CNTL, disp_output_cntl);
+	} else if (rdev->family == CHIP_R200)
+		WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl);
+	else
+		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
+
+	if (rdev->is_atom_bios)
+		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+	else
+		radeon_combios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
+
+}
+
+static enum drm_connector_status radeon_legacy_tv_dac_detect(struct drm_encoder *encoder,
+							     struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t crtc2_gen_cntl, tv_dac_cntl, dac_cntl2, dac_ext_cntl;
+	uint32_t disp_hw_debug, disp_output_cntl, gpiopad_a, pixclks_cntl, tmp;
+	enum drm_connector_status found = connector_status_disconnected;
+	bool color = true;
+
+	/*  FIXME tv */
+
+	/* save the regs we need */
+	pixclks_cntl = RREG32_PLL(RADEON_PIXCLKS_CNTL);
+	gpiopad_a = ASIC_IS_R300(rdev) ? RREG32(RADEON_GPIOPAD_A) : 0;
+	disp_output_cntl = ASIC_IS_R300(rdev) ? RREG32(RADEON_DISP_OUTPUT_CNTL) : 0;
+	disp_hw_debug = ASIC_IS_R300(rdev) ? 0 : RREG32(RADEON_DISP_HW_DEBUG);
+	crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+	tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
+	dac_ext_cntl = RREG32(RADEON_DAC_EXT_CNTL);
+	dac_cntl2 = RREG32(RADEON_DAC_CNTL2);
+
+	tmp = pixclks_cntl & ~(RADEON_PIX2CLK_ALWAYS_ONb
+			       | RADEON_PIX2CLK_DAC_ALWAYS_ONb);
+	WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
+
+	if (ASIC_IS_R300(rdev))
+		WREG32_P(RADEON_GPIOPAD_A, 1, ~1);
+
+	tmp = crtc2_gen_cntl & ~RADEON_CRTC2_PIX_WIDTH_MASK;
+	tmp |= RADEON_CRTC2_CRT2_ON |
+		(2 << RADEON_CRTC2_PIX_WIDTH_SHIFT);
+
+	WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
+
+	if (ASIC_IS_R300(rdev)) {
+		tmp = disp_output_cntl & ~RADEON_DISP_TVDAC_SOURCE_MASK;
+		tmp |= RADEON_DISP_TVDAC_SOURCE_CRTC2;
+		WREG32(RADEON_DISP_OUTPUT_CNTL, tmp);
+	} else {
+		tmp = disp_hw_debug & ~RADEON_CRT2_DISP1_SEL;
+		WREG32(RADEON_DISP_HW_DEBUG, tmp);
+	}
+
+	tmp = RADEON_TV_DAC_NBLANK |
+		RADEON_TV_DAC_NHOLD |
+		RADEON_TV_MONITOR_DETECT_EN |
+		RADEON_TV_DAC_STD_PS2;
+
+	WREG32(RADEON_TV_DAC_CNTL, tmp);
+
+	tmp = RADEON_DAC2_FORCE_BLANK_OFF_EN |
+		RADEON_DAC2_FORCE_DATA_EN;
+
+	if (color)
+		tmp |= RADEON_DAC_FORCE_DATA_SEL_RGB;
+	else
+		tmp |= RADEON_DAC_FORCE_DATA_SEL_G;
+
+	if (ASIC_IS_R300(rdev))
+		tmp |= (0x1b6 << RADEON_DAC_FORCE_DATA_SHIFT);
+	else
+		tmp |= (0x180 << RADEON_DAC_FORCE_DATA_SHIFT);
+
+	WREG32(RADEON_DAC_EXT_CNTL, tmp);
+
+	tmp = dac_cntl2 | RADEON_DAC2_DAC2_CLK_SEL | RADEON_DAC2_CMP_EN;
+	WREG32(RADEON_DAC_CNTL2, tmp);
+
+	udelay(10000);
+
+	if (ASIC_IS_R300(rdev)) {
+		if (RREG32(RADEON_DAC_CNTL2) & RADEON_DAC2_CMP_OUT_B)
+			found = connector_status_connected;
+	} else {
+		if (RREG32(RADEON_DAC_CNTL2) & RADEON_DAC2_CMP_OUTPUT)
+			found = connector_status_connected;
+	}
+
+	/* restore regs we used */
+	WREG32(RADEON_DAC_CNTL2, dac_cntl2);
+	WREG32(RADEON_DAC_EXT_CNTL, dac_ext_cntl);
+	WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
+	WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+
+	if (ASIC_IS_R300(rdev)) {
+		WREG32(RADEON_DISP_OUTPUT_CNTL, disp_output_cntl);
+		WREG32_P(RADEON_GPIOPAD_A, gpiopad_a, ~1);
+	} else {
+		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
+	}
+	WREG32_PLL(RADEON_PIXCLKS_CNTL, pixclks_cntl);
+
+	/* return found; */
+	return connector_status_disconnected;
+
+}
+
+static const struct drm_encoder_helper_funcs radeon_legacy_tv_dac_helper_funcs = {
+	.dpms = radeon_legacy_tv_dac_dpms,
+	.mode_fixup = radeon_legacy_tv_dac_mode_fixup,
+	.prepare = radeon_legacy_tv_dac_prepare,
+	.mode_set = radeon_legacy_tv_dac_mode_set,
+	.commit = radeon_legacy_tv_dac_commit,
+	.detect = radeon_legacy_tv_dac_detect,
+};
+
+
+static const struct drm_encoder_funcs radeon_legacy_tv_dac_enc_funcs = {
+	.destroy = radeon_enc_destroy,
+};
+
+void
+radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t supported_device)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+
+	/* see if we already added it */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		radeon_encoder = to_radeon_encoder(encoder);
+		if (radeon_encoder->encoder_id == encoder_id) {
+			radeon_encoder->devices |= supported_device;
+			return;
+		}
+
+	}
+
+	/* add a new one */
+	radeon_encoder = kzalloc(sizeof(struct radeon_encoder), GFP_KERNEL);
+	if (!radeon_encoder)
+		return;
+
+	encoder = &radeon_encoder->base;
+	encoder->possible_crtcs = 0x3;
+	encoder->possible_clones = 0;
+
+	radeon_encoder->enc_priv = NULL;
+
+	radeon_encoder->encoder_id = encoder_id;
+	radeon_encoder->devices = supported_device;
+
+	switch (radeon_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+		drm_encoder_init(dev, encoder, &radeon_legacy_lvds_enc_funcs, DRM_MODE_ENCODER_LVDS);
+		drm_encoder_helper_add(encoder, &radeon_legacy_lvds_helper_funcs);
+		if (rdev->is_atom_bios)
+			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
+		else
+			radeon_encoder->enc_priv = radeon_combios_get_lvds_info(radeon_encoder);
+		radeon_encoder->rmx_type = RMX_FULL;
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_int_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_helper_add(encoder, &radeon_legacy_tmds_int_helper_funcs);
+		if (rdev->is_atom_bios)
+			radeon_encoder->enc_priv = radeon_atombios_get_tmds_info(radeon_encoder);
+		else
+			radeon_encoder->enc_priv = radeon_combios_get_tmds_info(radeon_encoder);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+		drm_encoder_init(dev, encoder, &radeon_legacy_primary_dac_enc_funcs, DRM_MODE_ENCODER_DAC);
+		drm_encoder_helper_add(encoder, &radeon_legacy_primary_dac_helper_funcs);
+		if (!rdev->is_atom_bios)
+			radeon_encoder->enc_priv = radeon_combios_get_primary_dac_info(radeon_encoder);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+		drm_encoder_init(dev, encoder, &radeon_legacy_tv_dac_enc_funcs, DRM_MODE_ENCODER_TVDAC);
+		drm_encoder_helper_add(encoder, &radeon_legacy_tv_dac_helper_funcs);
+		if (!rdev->is_atom_bios)
+			radeon_encoder->enc_priv = radeon_combios_get_tv_dac_info(radeon_encoder);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_ext_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_helper_add(encoder, &radeon_legacy_tmds_ext_helper_funcs);
+		if (!rdev->is_atom_bios)
+			radeon_combios_get_ext_tmds_info(radeon_encoder);
+		break;
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
new file mode 100644
index 00000000000..4b37746eae5
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -0,0 +1,394 @@
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original Authors:
+ *   Kevin E. Martin, Rickard E. Faith, Alan Hourihane
+ *
+ * Kernel port Author: Dave Airlie
+ */
+
+#ifndef RADEON_MODE_H
+#define RADEON_MODE_H
+
+#include <drm_crtc.h>
+#include <drm_mode.h>
+#include <drm_edid.h>
+#include <linux/i2c.h>
+#include <linux/i2c-id.h>
+#include <linux/i2c-algo-bit.h>
+
+#define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base)
+#define to_radeon_connector(x) container_of(x, struct radeon_connector, base)
+#define to_radeon_encoder(x) container_of(x, struct radeon_encoder, base)
+#define to_radeon_framebuffer(x) container_of(x, struct radeon_framebuffer, base)
+
+enum radeon_connector_type {
+	CONNECTOR_NONE,
+	CONNECTOR_VGA,
+	CONNECTOR_DVI_I,
+	CONNECTOR_DVI_D,
+	CONNECTOR_DVI_A,
+	CONNECTOR_STV,
+	CONNECTOR_CTV,
+	CONNECTOR_LVDS,
+	CONNECTOR_DIGITAL,
+	CONNECTOR_SCART,
+	CONNECTOR_HDMI_TYPE_A,
+	CONNECTOR_HDMI_TYPE_B,
+	CONNECTOR_0XC,
+	CONNECTOR_0XD,
+	CONNECTOR_DIN,
+	CONNECTOR_DISPLAY_PORT,
+	CONNECTOR_UNSUPPORTED
+};
+
+enum radeon_dvi_type {
+	DVI_AUTO,
+	DVI_DIGITAL,
+	DVI_ANALOG
+};
+
+enum radeon_rmx_type {
+	RMX_OFF,
+	RMX_FULL,
+	RMX_CENTER,
+	RMX_ASPECT
+};
+
+enum radeon_tv_std {
+	TV_STD_NTSC,
+	TV_STD_PAL,
+	TV_STD_PAL_M,
+	TV_STD_PAL_60,
+	TV_STD_NTSC_J,
+	TV_STD_SCART_PAL,
+	TV_STD_SECAM,
+	TV_STD_PAL_CN,
+};
+
+struct radeon_i2c_bus_rec {
+	bool valid;
+	uint32_t mask_clk_reg;
+	uint32_t mask_data_reg;
+	uint32_t a_clk_reg;
+	uint32_t a_data_reg;
+	uint32_t put_clk_reg;
+	uint32_t put_data_reg;
+	uint32_t get_clk_reg;
+	uint32_t get_data_reg;
+	uint32_t mask_clk_mask;
+	uint32_t mask_data_mask;
+	uint32_t put_clk_mask;
+	uint32_t put_data_mask;
+	uint32_t get_clk_mask;
+	uint32_t get_data_mask;
+	uint32_t a_clk_mask;
+	uint32_t a_data_mask;
+};
+
+struct radeon_tmds_pll {
+    uint32_t freq;
+    uint32_t value;
+};
+
+#define RADEON_MAX_BIOS_CONNECTOR 16
+
+#define RADEON_PLL_USE_BIOS_DIVS        (1 << 0)
+#define RADEON_PLL_NO_ODD_POST_DIV      (1 << 1)
+#define RADEON_PLL_USE_REF_DIV          (1 << 2)
+#define RADEON_PLL_LEGACY               (1 << 3)
+#define RADEON_PLL_PREFER_LOW_REF_DIV   (1 << 4)
+#define RADEON_PLL_PREFER_HIGH_REF_DIV  (1 << 5)
+#define RADEON_PLL_PREFER_LOW_FB_DIV    (1 << 6)
+#define RADEON_PLL_PREFER_HIGH_FB_DIV   (1 << 7)
+#define RADEON_PLL_PREFER_LOW_POST_DIV  (1 << 8)
+#define RADEON_PLL_PREFER_HIGH_POST_DIV (1 << 9)
+#define RADEON_PLL_USE_FRAC_FB_DIV      (1 << 10)
+
+struct radeon_pll {
+	uint16_t reference_freq;
+	uint16_t reference_div;
+	uint32_t pll_in_min;
+	uint32_t pll_in_max;
+	uint32_t pll_out_min;
+	uint32_t pll_out_max;
+	uint16_t xclk;
+
+	uint32_t min_ref_div;
+	uint32_t max_ref_div;
+	uint32_t min_post_div;
+	uint32_t max_post_div;
+	uint32_t min_feedback_div;
+	uint32_t max_feedback_div;
+	uint32_t min_frac_feedback_div;
+	uint32_t max_frac_feedback_div;
+	uint32_t best_vco;
+};
+
+struct radeon_i2c_chan {
+	struct drm_device *dev;
+	struct i2c_adapter adapter;
+	struct i2c_algo_bit_data algo;
+	struct radeon_i2c_bus_rec rec;
+};
+
+/* mostly for macs, but really any system without connector tables */
+enum radeon_connector_table {
+	CT_NONE,
+	CT_GENERIC,
+	CT_IBOOK,
+	CT_POWERBOOK_EXTERNAL,
+	CT_POWERBOOK_INTERNAL,
+	CT_POWERBOOK_VGA,
+	CT_MINI_EXTERNAL,
+	CT_MINI_INTERNAL,
+	CT_IMAC_G5_ISIGHT,
+	CT_EMAC,
+};
+
+struct radeon_mode_info {
+	struct atom_context *atom_context;
+	enum radeon_connector_table connector_table;
+	bool mode_config_initialized;
+};
+
+struct radeon_crtc {
+	struct drm_crtc base;
+	int crtc_id;
+	u16 lut_r[256], lut_g[256], lut_b[256];
+	bool enabled;
+	bool can_tile;
+	uint32_t crtc_offset;
+	struct radeon_framebuffer *fbdev_fb;
+	struct drm_mode_set mode_set;
+	struct drm_gem_object *cursor_bo;
+	uint64_t cursor_addr;
+	int cursor_width;
+	int cursor_height;
+};
+
+#define RADEON_USE_RMX 1
+
+struct radeon_native_mode {
+	/* preferred mode */
+	uint32_t panel_xres, panel_yres;
+	uint32_t hoverplus, hsync_width;
+	uint32_t hblank;
+	uint32_t voverplus, vsync_width;
+	uint32_t vblank;
+	uint32_t dotclock;
+	uint32_t flags;
+};
+
+struct radeon_encoder_primary_dac {
+	/* legacy primary dac */
+	uint32_t ps2_pdac_adj;
+};
+
+struct radeon_encoder_lvds {
+	/* legacy lvds */
+	uint16_t panel_vcc_delay;
+	uint8_t  panel_pwr_delay;
+	uint8_t  panel_digon_delay;
+	uint8_t  panel_blon_delay;
+	uint16_t panel_ref_divider;
+	uint8_t  panel_post_divider;
+	uint16_t panel_fb_divider;
+	bool     use_bios_dividers;
+	uint32_t lvds_gen_cntl;
+	/* panel mode */
+	struct radeon_native_mode native_mode;
+};
+
+struct radeon_encoder_tv_dac {
+	/* legacy tv dac */
+	uint32_t ps2_tvdac_adj;
+	uint32_t ntsc_tvdac_adj;
+	uint32_t pal_tvdac_adj;
+
+	enum radeon_tv_std tv_std;
+};
+
+struct radeon_encoder_int_tmds {
+	/* legacy int tmds */
+	struct radeon_tmds_pll tmds_pll[4];
+};
+
+struct radeon_encoder_atom_dig {
+	/* atom dig */
+	bool coherent_mode;
+	int dig_block;
+	/* atom lvds */
+	uint32_t lvds_misc;
+	uint16_t panel_pwr_delay;
+	/* panel mode */
+	struct radeon_native_mode native_mode;
+};
+
+struct radeon_encoder {
+	struct drm_encoder base;
+	uint32_t encoder_id;
+	uint32_t devices;
+	uint32_t flags;
+	uint32_t pixel_clock;
+	enum radeon_rmx_type rmx_type;
+	struct radeon_native_mode native_mode;
+	void *enc_priv;
+};
+
+struct radeon_connector_atom_dig {
+	uint32_t igp_lane_info;
+	bool linkb;
+};
+
+struct radeon_connector {
+	struct drm_connector base;
+	uint32_t connector_id;
+	uint32_t devices;
+	struct radeon_i2c_chan *ddc_bus;
+	int use_digital;
+	void *con_priv;
+};
+
+struct radeon_framebuffer {
+	struct drm_framebuffer base;
+	struct drm_gem_object *obj;
+};
+
+extern struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
+						 struct radeon_i2c_bus_rec *rec,
+						 const char *name);
+extern void radeon_i2c_destroy(struct radeon_i2c_chan *i2c);
+extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector);
+extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
+
+extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector);
+
+extern void radeon_compute_pll(struct radeon_pll *pll,
+			       uint64_t freq,
+			       uint32_t *dot_clock_p,
+			       uint32_t *fb_div_p,
+			       uint32_t *frac_fb_div_p,
+			       uint32_t *ref_div_p,
+			       uint32_t *post_div_p,
+			       int flags);
+
+struct drm_encoder *radeon_encoder_legacy_lvds_add(struct drm_device *dev, int bios_index);
+struct drm_encoder *radeon_encoder_legacy_primary_dac_add(struct drm_device *dev, int bios_index, int with_tv);
+struct drm_encoder *radeon_encoder_legacy_tv_dac_add(struct drm_device *dev, int bios_index, int with_tv);
+struct drm_encoder *radeon_encoder_legacy_tmds_int_add(struct drm_device *dev, int bios_index);
+struct drm_encoder *radeon_encoder_legacy_tmds_ext_add(struct drm_device *dev, int bios_index);
+extern void atombios_external_tmds_setup(struct drm_encoder *encoder, int action);
+extern int atombios_get_encoder_mode(struct drm_encoder *encoder);
+
+extern void radeon_crtc_load_lut(struct drm_crtc *crtc);
+extern int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				   struct drm_framebuffer *old_fb);
+extern int atombios_crtc_mode_set(struct drm_crtc *crtc,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *adjusted_mode,
+				   int x, int y,
+				   struct drm_framebuffer *old_fb);
+extern void atombios_crtc_dpms(struct drm_crtc *crtc, int mode);
+
+extern int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				 struct drm_framebuffer *old_fb);
+extern void radeon_legacy_atom_set_surface(struct drm_crtc *crtc);
+
+extern int radeon_crtc_cursor_set(struct drm_crtc *crtc,
+				  struct drm_file *file_priv,
+				  uint32_t handle,
+				  uint32_t width,
+				  uint32_t height);
+extern int radeon_crtc_cursor_move(struct drm_crtc *crtc,
+				   int x, int y);
+
+extern bool radeon_atom_get_clock_info(struct drm_device *dev);
+extern bool radeon_combios_get_clock_info(struct drm_device *dev);
+extern struct radeon_encoder_atom_dig *
+radeon_atombios_get_lvds_info(struct radeon_encoder *encoder);
+extern struct radeon_encoder_int_tmds *
+radeon_atombios_get_tmds_info(struct radeon_encoder *encoder);
+extern struct radeon_encoder_lvds *
+radeon_combios_get_lvds_info(struct radeon_encoder *encoder);
+extern struct radeon_encoder_int_tmds *
+radeon_combios_get_tmds_info(struct radeon_encoder *encoder);
+extern void radeon_combios_get_ext_tmds_info(struct radeon_encoder *encoder);
+extern struct radeon_encoder_tv_dac *
+radeon_combios_get_tv_dac_info(struct radeon_encoder *encoder);
+extern struct radeon_encoder_primary_dac *
+radeon_combios_get_primary_dac_info(struct radeon_encoder *encoder);
+extern void radeon_combios_output_lock(struct drm_encoder *encoder, bool lock);
+extern void radeon_combios_initialize_bios_scratch_regs(struct drm_device *dev);
+extern void radeon_atom_output_lock(struct drm_encoder *encoder, bool lock);
+extern void radeon_atom_initialize_bios_scratch_regs(struct drm_device *dev);
+extern void
+radeon_atombios_encoder_crtc_scratch_regs(struct drm_encoder *encoder, int crtc);
+extern void
+radeon_atombios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on);
+extern void
+radeon_combios_encoder_crtc_scratch_regs(struct drm_encoder *encoder, int crtc);
+extern void
+radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on);
+extern void radeon_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
+				     u16 blue, int regno);
+struct drm_framebuffer *radeon_framebuffer_create(struct drm_device *dev,
+						  struct drm_mode_fb_cmd *mode_cmd,
+						  struct drm_gem_object *obj);
+
+int radeonfb_probe(struct drm_device *dev);
+
+int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
+bool radeon_get_legacy_connector_info_from_bios(struct drm_device *dev);
+bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev);
+void radeon_atombios_init_crtc(struct drm_device *dev,
+			       struct radeon_crtc *radeon_crtc);
+void radeon_legacy_init_crtc(struct drm_device *dev,
+			     struct radeon_crtc *radeon_crtc);
+void radeon_i2c_do_lock(struct radeon_connector *radeon_connector, int lock_state);
+
+void radeon_get_clock_info(struct drm_device *dev);
+
+extern bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev);
+extern bool radeon_get_atom_connector_info_from_supported_devices_table(struct drm_device *dev);
+
+void radeon_rmx_mode_fixup(struct drm_encoder *encoder,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+void radeon_enc_destroy(struct drm_encoder *encoder);
+void radeon_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
+void radeon_combios_asic_init(struct drm_device *dev);
+extern int radeon_static_clocks_init(struct drm_device *dev);
+void radeon_init_disp_bw_legacy(struct drm_device *dev,
+				struct drm_display_mode *mode1,
+				uint32_t pixel_bytes1,
+				struct drm_display_mode *mode2,
+				uint32_t pixel_bytes2);
+void radeon_init_disp_bw_avivo(struct drm_device *dev,
+			       struct drm_display_mode *mode1,
+			       uint32_t pixel_bytes1,
+			       struct drm_display_mode *mode2,
+			       uint32_t pixel_bytes2);
+void radeon_init_disp_bandwidth(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
new file mode 100644
index 00000000000..983e8df5e00
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ *    Dave Airlie
+ */
+#include <linux/list.h>
+#include <drm/drmP.h>
+#include "radeon_drm.h"
+#include "radeon.h"
+
+struct radeon_object {
+	struct ttm_buffer_object	tobj;
+	struct list_head		list;
+	struct radeon_device		*rdev;
+	struct drm_gem_object		*gobj;
+	struct ttm_bo_kmap_obj		kmap;
+	unsigned			pin_count;
+	uint64_t			gpu_addr;
+	void				*kptr;
+	bool				is_iomem;
+};
+
+int radeon_ttm_init(struct radeon_device *rdev);
+void radeon_ttm_fini(struct radeon_device *rdev);
+
+/*
+ * To exclude mutual BO access we rely on bo_reserve exclusion, as all
+ * function are calling it.
+ */
+
+static int radeon_object_reserve(struct radeon_object *robj, bool interruptible)
+{
+	return ttm_bo_reserve(&robj->tobj, interruptible, false, false, 0);
+}
+
+static void radeon_object_unreserve(struct radeon_object *robj)
+{
+	ttm_bo_unreserve(&robj->tobj);
+}
+
+static void radeon_ttm_object_object_destroy(struct ttm_buffer_object *tobj)
+{
+	struct radeon_object *robj;
+
+	robj = container_of(tobj, struct radeon_object, tobj);
+	list_del_init(&robj->list);
+	kfree(robj);
+}
+
+static inline void radeon_object_gpu_addr(struct radeon_object *robj)
+{
+	/* Default gpu address */
+	robj->gpu_addr = 0xFFFFFFFFFFFFFFFFULL;
+	if (robj->tobj.mem.mm_node == NULL) {
+		return;
+	}
+	robj->gpu_addr = ((u64)robj->tobj.mem.mm_node->start) << PAGE_SHIFT;
+	switch (robj->tobj.mem.mem_type) {
+	case TTM_PL_VRAM:
+		robj->gpu_addr += (u64)robj->rdev->mc.vram_location;
+		break;
+	case TTM_PL_TT:
+		robj->gpu_addr += (u64)robj->rdev->mc.gtt_location;
+		break;
+	default:
+		DRM_ERROR("Unknown placement %d\n", robj->tobj.mem.mem_type);
+		robj->gpu_addr = 0xFFFFFFFFFFFFFFFFULL;
+		return;
+	}
+}
+
+static inline uint32_t radeon_object_flags_from_domain(uint32_t domain)
+{
+	uint32_t flags = 0;
+	if (domain & RADEON_GEM_DOMAIN_VRAM) {
+		flags |= TTM_PL_FLAG_VRAM;
+	}
+	if (domain & RADEON_GEM_DOMAIN_GTT) {
+		flags |= TTM_PL_FLAG_TT;
+	}
+	if (domain & RADEON_GEM_DOMAIN_CPU) {
+		flags |= TTM_PL_FLAG_SYSTEM;
+	}
+	if (!flags) {
+		flags |= TTM_PL_FLAG_SYSTEM;
+	}
+	return flags;
+}
+
+int radeon_object_create(struct radeon_device *rdev,
+			 struct drm_gem_object *gobj,
+			 unsigned long size,
+			 bool kernel,
+			 uint32_t domain,
+			 bool interruptible,
+			 struct radeon_object **robj_ptr)
+{
+	struct radeon_object *robj;
+	enum ttm_bo_type type;
+	uint32_t flags;
+	int r;
+
+	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
+		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
+	}
+	if (kernel) {
+		type = ttm_bo_type_kernel;
+	} else {
+		type = ttm_bo_type_device;
+	}
+	*robj_ptr = NULL;
+	robj = kzalloc(sizeof(struct radeon_object), GFP_KERNEL);
+	if (robj == NULL) {
+		return -ENOMEM;
+	}
+	robj->rdev = rdev;
+	robj->gobj = gobj;
+	INIT_LIST_HEAD(&robj->list);
+
+	flags = radeon_object_flags_from_domain(domain);
+	r = ttm_buffer_object_init(&rdev->mman.bdev, &robj->tobj, size, type, flags,
+				   0, 0, false, NULL, size,
+				   &radeon_ttm_object_object_destroy);
+	if (unlikely(r != 0)) {
+		/* ttm call radeon_ttm_object_object_destroy if error happen */
+		DRM_ERROR("Failed to allocate TTM object (%ld, 0x%08X, %u)\n",
+			  size, flags, 0);
+		return r;
+	}
+	*robj_ptr = robj;
+	if (gobj) {
+		list_add_tail(&robj->list, &rdev->gem.objects);
+	}
+	return 0;
+}
+
+int radeon_object_kmap(struct radeon_object *robj, void **ptr)
+{
+	int r;
+
+	spin_lock(&robj->tobj.lock);
+	if (robj->kptr) {
+		if (ptr) {
+			*ptr = robj->kptr;
+		}
+		spin_unlock(&robj->tobj.lock);
+		return 0;
+	}
+	spin_unlock(&robj->tobj.lock);
+	r = ttm_bo_kmap(&robj->tobj, 0, robj->tobj.num_pages, &robj->kmap);
+	if (r) {
+		return r;
+	}
+	spin_lock(&robj->tobj.lock);
+	robj->kptr = ttm_kmap_obj_virtual(&robj->kmap, &robj->is_iomem);
+	spin_unlock(&robj->tobj.lock);
+	if (ptr) {
+		*ptr = robj->kptr;
+	}
+	return 0;
+}
+
+void radeon_object_kunmap(struct radeon_object *robj)
+{
+	spin_lock(&robj->tobj.lock);
+	if (robj->kptr == NULL) {
+		spin_unlock(&robj->tobj.lock);
+		return;
+	}
+	robj->kptr = NULL;
+	spin_unlock(&robj->tobj.lock);
+	ttm_bo_kunmap(&robj->kmap);
+}
+
+void radeon_object_unref(struct radeon_object **robj)
+{
+	struct ttm_buffer_object *tobj;
+
+	if ((*robj) == NULL) {
+		return;
+	}
+	tobj = &((*robj)->tobj);
+	ttm_bo_unref(&tobj);
+	if (tobj == NULL) {
+		*robj = NULL;
+	}
+}
+
+int radeon_object_mmap(struct radeon_object *robj, uint64_t *offset)
+{
+	*offset = robj->tobj.addr_space_offset;
+	return 0;
+}
+
+int radeon_object_pin(struct radeon_object *robj, uint32_t domain,
+		      uint64_t *gpu_addr)
+{
+	uint32_t flags;
+	uint32_t tmp;
+	void *fbptr;
+	int r;
+
+	flags = radeon_object_flags_from_domain(domain);
+	spin_lock(&robj->tobj.lock);
+	if (robj->pin_count) {
+		robj->pin_count++;
+		if (gpu_addr != NULL) {
+			*gpu_addr = robj->gpu_addr;
+		}
+		spin_unlock(&robj->tobj.lock);
+		return 0;
+	}
+	spin_unlock(&robj->tobj.lock);
+	r = radeon_object_reserve(robj, false);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("radeon: failed to reserve object for pinning it.\n");
+		return r;
+	}
+	if (robj->rdev->fbdev_robj == robj) {
+		mutex_lock(&robj->rdev->fbdev_info->lock);
+		radeon_object_kunmap(robj);
+	}
+	tmp = robj->tobj.mem.placement;
+	ttm_flag_masked(&tmp, flags, TTM_PL_MASK_MEM);
+	robj->tobj.proposed_placement = tmp | TTM_PL_FLAG_NO_EVICT | TTM_PL_MASK_CACHING;
+	r = ttm_buffer_object_validate(&robj->tobj,
+				       robj->tobj.proposed_placement,
+				       false, false);
+	radeon_object_gpu_addr(robj);
+	if (gpu_addr != NULL) {
+		*gpu_addr = robj->gpu_addr;
+	}
+	robj->pin_count = 1;
+	if (unlikely(r != 0)) {
+		DRM_ERROR("radeon: failed to pin object.\n");
+	}
+	radeon_object_unreserve(robj);
+	if (robj->rdev->fbdev_robj == robj) {
+		if (!r) {
+			r = radeon_object_kmap(robj, &fbptr);
+		}
+		if (!r) {
+			robj->rdev->fbdev_info->screen_base = fbptr;
+			robj->rdev->fbdev_info->fix.smem_start = (unsigned long)fbptr;
+		}
+		mutex_unlock(&robj->rdev->fbdev_info->lock);
+	}
+	return r;
+}
+
+void radeon_object_unpin(struct radeon_object *robj)
+{
+	uint32_t flags;
+	void *fbptr;
+	int r;
+
+	spin_lock(&robj->tobj.lock);
+	if (!robj->pin_count) {
+		spin_unlock(&robj->tobj.lock);
+		printk(KERN_WARNING "Unpin not necessary for %p !\n", robj);
+		return;
+	}
+	robj->pin_count--;
+	if (robj->pin_count) {
+		spin_unlock(&robj->tobj.lock);
+		return;
+	}
+	spin_unlock(&robj->tobj.lock);
+	r = radeon_object_reserve(robj, false);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("radeon: failed to reserve object for unpinning it.\n");
+		return;
+	}
+	if (robj->rdev->fbdev_robj == robj) {
+		mutex_lock(&robj->rdev->fbdev_info->lock);
+		radeon_object_kunmap(robj);
+	}
+	flags = robj->tobj.mem.placement;
+	robj->tobj.proposed_placement = flags & ~TTM_PL_FLAG_NO_EVICT;
+	r = ttm_buffer_object_validate(&robj->tobj,
+				       robj->tobj.proposed_placement,
+				       false, false);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("radeon: failed to unpin buffer.\n");
+	}
+	radeon_object_unreserve(robj);
+	if (robj->rdev->fbdev_robj == robj) {
+		if (!r) {
+			r = radeon_object_kmap(robj, &fbptr);
+		}
+		if (!r) {
+			robj->rdev->fbdev_info->screen_base = fbptr;
+			robj->rdev->fbdev_info->fix.smem_start = (unsigned long)fbptr;
+		}
+		mutex_unlock(&robj->rdev->fbdev_info->lock);
+	}
+}
+
+int radeon_object_wait(struct radeon_object *robj)
+{
+	int r = 0;
+
+	/* FIXME: should use block reservation instead */
+	r = radeon_object_reserve(robj, true);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("radeon: failed to reserve object for waiting.\n");
+		return r;
+	}
+	spin_lock(&robj->tobj.lock);
+	if (robj->tobj.sync_obj) {
+		r = ttm_bo_wait(&robj->tobj, true, false, false);
+	}
+	spin_unlock(&robj->tobj.lock);
+	radeon_object_unreserve(robj);
+	return r;
+}
+
+int radeon_object_evict_vram(struct radeon_device *rdev)
+{
+	if (rdev->flags & RADEON_IS_IGP) {
+		/* Useless to evict on IGP chips */
+		return 0;
+	}
+	return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
+}
+
+void radeon_object_force_delete(struct radeon_device *rdev)
+{
+	struct radeon_object *robj, *n;
+	struct drm_gem_object *gobj;
+
+	if (list_empty(&rdev->gem.objects)) {
+		return;
+	}
+	DRM_ERROR("Userspace still has active objects !\n");
+	list_for_each_entry_safe(robj, n, &rdev->gem.objects, list) {
+		mutex_lock(&rdev->ddev->struct_mutex);
+		gobj = robj->gobj;
+		DRM_ERROR("Force free for (%p,%p,%lu,%lu)\n",
+			  gobj, robj, (unsigned long)gobj->size,
+			  *((unsigned long *)&gobj->refcount));
+		list_del_init(&robj->list);
+		radeon_object_unref(&robj);
+		gobj->driver_private = NULL;
+		drm_gem_object_unreference(gobj);
+		mutex_unlock(&rdev->ddev->struct_mutex);
+	}
+}
+
+int radeon_object_init(struct radeon_device *rdev)
+{
+	return radeon_ttm_init(rdev);
+}
+
+void radeon_object_fini(struct radeon_device *rdev)
+{
+	radeon_ttm_fini(rdev);
+}
+
+void radeon_object_list_add_object(struct radeon_object_list *lobj,
+				   struct list_head *head)
+{
+	if (lobj->wdomain) {
+		list_add(&lobj->list, head);
+	} else {
+		list_add_tail(&lobj->list, head);
+	}
+}
+
+int radeon_object_list_reserve(struct list_head *head)
+{
+	struct radeon_object_list *lobj;
+	struct list_head *i;
+	int r;
+
+	list_for_each(i, head) {
+		lobj = list_entry(i, struct radeon_object_list, list);
+		if (!lobj->robj->pin_count) {
+			r = radeon_object_reserve(lobj->robj, true);
+			if (unlikely(r != 0)) {
+				DRM_ERROR("radeon: failed to reserve object.\n");
+				return r;
+			}
+		} else {
+		}
+	}
+	return 0;
+}
+
+void radeon_object_list_unreserve(struct list_head *head)
+{
+	struct radeon_object_list *lobj;
+	struct list_head *i;
+
+	list_for_each(i, head) {
+		lobj = list_entry(i, struct radeon_object_list, list);
+		if (!lobj->robj->pin_count) {
+			radeon_object_unreserve(lobj->robj);
+		} else {
+		}
+	}
+}
+
+int radeon_object_list_validate(struct list_head *head, void *fence)
+{
+	struct radeon_object_list *lobj;
+	struct radeon_object *robj;
+	struct radeon_fence *old_fence = NULL;
+	struct list_head *i;
+	uint32_t flags;
+	int r;
+
+	r = radeon_object_list_reserve(head);
+	if (unlikely(r != 0)) {
+		radeon_object_list_unreserve(head);
+		return r;
+	}
+	list_for_each(i, head) {
+		lobj = list_entry(i, struct radeon_object_list, list);
+		robj = lobj->robj;
+		if (lobj->wdomain) {
+			flags = radeon_object_flags_from_domain(lobj->wdomain);
+			flags |= TTM_PL_FLAG_TT;
+		} else {
+			flags = radeon_object_flags_from_domain(lobj->rdomain);
+			flags |= TTM_PL_FLAG_TT;
+			flags |= TTM_PL_FLAG_VRAM;
+		}
+		if (!robj->pin_count) {
+			robj->tobj.proposed_placement = flags | TTM_PL_MASK_CACHING;
+			r = ttm_buffer_object_validate(&robj->tobj,
+						       robj->tobj.proposed_placement,
+						       true, false);
+			if (unlikely(r)) {
+				radeon_object_list_unreserve(head);
+				DRM_ERROR("radeon: failed to validate.\n");
+				return r;
+			}
+			radeon_object_gpu_addr(robj);
+		}
+		lobj->gpu_offset = robj->gpu_addr;
+		if (fence) {
+			old_fence = (struct radeon_fence *)robj->tobj.sync_obj;
+			robj->tobj.sync_obj = radeon_fence_ref(fence);
+			robj->tobj.sync_obj_arg = NULL;
+		}
+		if (old_fence) {
+			radeon_fence_unref(&old_fence);
+		}
+	}
+	return 0;
+}
+
+void radeon_object_list_unvalidate(struct list_head *head)
+{
+	struct radeon_object_list *lobj;
+	struct radeon_fence *old_fence = NULL;
+	struct list_head *i;
+
+	list_for_each(i, head) {
+		lobj = list_entry(i, struct radeon_object_list, list);
+		old_fence = (struct radeon_fence *)lobj->robj->tobj.sync_obj;
+		lobj->robj->tobj.sync_obj = NULL;
+		if (old_fence) {
+			radeon_fence_unref(&old_fence);
+		}
+	}
+	radeon_object_list_unreserve(head);
+}
+
+void radeon_object_list_clean(struct list_head *head)
+{
+	radeon_object_list_unreserve(head);
+}
+
+int radeon_object_fbdev_mmap(struct radeon_object *robj,
+			     struct vm_area_struct *vma)
+{
+	return ttm_fbdev_mmap(vma, &robj->tobj);
+}
+
+unsigned long radeon_object_size(struct radeon_object *robj)
+{
+	return robj->tobj.num_pages << PAGE_SHIFT;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
new file mode 100644
index 00000000000..473e4775dc5
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __RADEON_OBJECT_H__
+#define __RADEON_OBJECT_H__
+
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <ttm/ttm_module.h>
+
+/*
+ * TTM.
+ */
+struct radeon_mman {
+	struct ttm_global_reference	mem_global_ref;
+	bool				mem_global_referenced;
+	struct ttm_bo_device		bdev;
+};
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
new file mode 100644
index 00000000000..6d3d90406a2
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -0,0 +1,3570 @@
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@xfree86.org>
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Alan Hourihane <alanh@fairlite.demon.co.uk>
+ *
+ * References:
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
+ *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
+ *   1999.
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
+ *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
+ *
+ */
+
+/* !!!! FIXME !!!!  NOTE: THIS FILE HAS BEEN CONVERTED FROM r128_reg.h
+ * AND CONTAINS REGISTERS AND REGISTER DEFINITIONS THAT ARE NOT CORRECT
+ * ON THE RADEON.  A FULL AUDIT OF THIS CODE IS NEEDED!  */
+#ifndef _RADEON_REG_H_
+#define _RADEON_REG_H_
+
+#include "r300_reg.h"
+#include "r500_reg.h"
+#include "r600_reg.h"
+
+
+#define RADEON_MC_AGP_LOCATION		0x014c
+#define		RADEON_MC_AGP_START_MASK	0x0000FFFF
+#define		RADEON_MC_AGP_START_SHIFT	0
+#define		RADEON_MC_AGP_TOP_MASK		0xFFFF0000
+#define		RADEON_MC_AGP_TOP_SHIFT		16
+#define RADEON_MC_FB_LOCATION		0x0148
+#define		RADEON_MC_FB_START_MASK		0x0000FFFF
+#define		RADEON_MC_FB_START_SHIFT	0
+#define		RADEON_MC_FB_TOP_MASK		0xFFFF0000
+#define		RADEON_MC_FB_TOP_SHIFT		16
+#define RADEON_AGP_BASE_2		0x015c /* r200+ only */
+#define RADEON_AGP_BASE			0x0170
+
+#define ATI_DATATYPE_VQ				0
+#define ATI_DATATYPE_CI4			1
+#define ATI_DATATYPE_CI8			2
+#define ATI_DATATYPE_ARGB1555			3
+#define ATI_DATATYPE_RGB565			4
+#define ATI_DATATYPE_RGB888			5
+#define ATI_DATATYPE_ARGB8888			6
+#define ATI_DATATYPE_RGB332			7
+#define ATI_DATATYPE_Y8				8
+#define ATI_DATATYPE_RGB8			9
+#define ATI_DATATYPE_CI16			10
+#define ATI_DATATYPE_VYUY_422			11
+#define ATI_DATATYPE_YVYU_422			12
+#define ATI_DATATYPE_AYUV_444			14
+#define ATI_DATATYPE_ARGB4444			15
+
+				/* Registers for 2D/Video/Overlay */
+#define RADEON_ADAPTER_ID                   0x0f2c /* PCI */
+#define RADEON_AGP_BASE                     0x0170
+#define RADEON_AGP_CNTL                     0x0174
+#       define RADEON_AGP_APER_SIZE_256MB   (0x00 << 0)
+#       define RADEON_AGP_APER_SIZE_128MB   (0x20 << 0)
+#       define RADEON_AGP_APER_SIZE_64MB    (0x30 << 0)
+#       define RADEON_AGP_APER_SIZE_32MB    (0x38 << 0)
+#       define RADEON_AGP_APER_SIZE_16MB    (0x3c << 0)
+#       define RADEON_AGP_APER_SIZE_8MB     (0x3e << 0)
+#       define RADEON_AGP_APER_SIZE_4MB     (0x3f << 0)
+#       define RADEON_AGP_APER_SIZE_MASK    (0x3f << 0)
+#define RADEON_STATUS_PCI_CONFIG            0x06
+#       define RADEON_CAP_LIST              0x100000
+#define RADEON_CAPABILITIES_PTR_PCI_CONFIG  0x34 /* offset in PCI config*/
+#       define RADEON_CAP_PTR_MASK          0xfc /* mask off reserved bits of CAP_PTR */
+#       define RADEON_CAP_ID_NULL           0x00 /* End of capability list */
+#       define RADEON_CAP_ID_AGP            0x02 /* AGP capability ID */
+#       define RADEON_CAP_ID_EXP            0x10 /* PCI Express */
+#define RADEON_AGP_COMMAND                  0x0f60 /* PCI */
+#define RADEON_AGP_COMMAND_PCI_CONFIG       0x0060 /* offset in PCI config*/
+#       define RADEON_AGP_ENABLE            (1<<8)
+#define RADEON_AGP_PLL_CNTL                 0x000b /* PLL */
+#define RADEON_AGP_STATUS                   0x0f5c /* PCI */
+#       define RADEON_AGP_1X_MODE           0x01
+#       define RADEON_AGP_2X_MODE           0x02
+#       define RADEON_AGP_4X_MODE           0x04
+#       define RADEON_AGP_FW_MODE           0x10
+#       define RADEON_AGP_MODE_MASK         0x17
+#       define RADEON_AGPv3_MODE            0x08
+#       define RADEON_AGPv3_4X_MODE         0x01
+#       define RADEON_AGPv3_8X_MODE         0x02
+#define RADEON_ATTRDR                       0x03c1 /* VGA */
+#define RADEON_ATTRDW                       0x03c0 /* VGA */
+#define RADEON_ATTRX                        0x03c0 /* VGA */
+#define RADEON_AUX_SC_CNTL                  0x1660
+#       define RADEON_AUX1_SC_EN            (1 << 0)
+#       define RADEON_AUX1_SC_MODE_OR       (0 << 1)
+#       define RADEON_AUX1_SC_MODE_NAND     (1 << 1)
+#       define RADEON_AUX2_SC_EN            (1 << 2)
+#       define RADEON_AUX2_SC_MODE_OR       (0 << 3)
+#       define RADEON_AUX2_SC_MODE_NAND     (1 << 3)
+#       define RADEON_AUX3_SC_EN            (1 << 4)
+#       define RADEON_AUX3_SC_MODE_OR       (0 << 5)
+#       define RADEON_AUX3_SC_MODE_NAND     (1 << 5)
+#define RADEON_AUX1_SC_BOTTOM               0x1670
+#define RADEON_AUX1_SC_LEFT                 0x1664
+#define RADEON_AUX1_SC_RIGHT                0x1668
+#define RADEON_AUX1_SC_TOP                  0x166c
+#define RADEON_AUX2_SC_BOTTOM               0x1680
+#define RADEON_AUX2_SC_LEFT                 0x1674
+#define RADEON_AUX2_SC_RIGHT                0x1678
+#define RADEON_AUX2_SC_TOP                  0x167c
+#define RADEON_AUX3_SC_BOTTOM               0x1690
+#define RADEON_AUX3_SC_LEFT                 0x1684
+#define RADEON_AUX3_SC_RIGHT                0x1688
+#define RADEON_AUX3_SC_TOP                  0x168c
+#define RADEON_AUX_WINDOW_HORZ_CNTL         0x02d8
+#define RADEON_AUX_WINDOW_VERT_CNTL         0x02dc
+
+#define RADEON_BASE_CODE                    0x0f0b
+#define RADEON_BIOS_0_SCRATCH               0x0010
+#       define RADEON_FP_PANEL_SCALABLE     (1 << 16)
+#       define RADEON_FP_PANEL_SCALE_EN     (1 << 17)
+#       define RADEON_FP_CHIP_SCALE_EN      (1 << 18)
+#       define RADEON_DRIVER_BRIGHTNESS_EN  (1 << 26)
+#       define RADEON_DISPLAY_ROT_MASK      (3 << 28)
+#       define RADEON_DISPLAY_ROT_00        (0 << 28)
+#       define RADEON_DISPLAY_ROT_90        (1 << 28)
+#       define RADEON_DISPLAY_ROT_180       (2 << 28)
+#       define RADEON_DISPLAY_ROT_270       (3 << 28)
+#define RADEON_BIOS_1_SCRATCH               0x0014
+#define RADEON_BIOS_2_SCRATCH               0x0018
+#define RADEON_BIOS_3_SCRATCH               0x001c
+#define RADEON_BIOS_4_SCRATCH               0x0020
+#       define RADEON_CRT1_ATTACHED_MASK    (3 << 0)
+#       define RADEON_CRT1_ATTACHED_MONO    (1 << 0)
+#       define RADEON_CRT1_ATTACHED_COLOR   (2 << 0)
+#       define RADEON_LCD1_ATTACHED         (1 << 2)
+#       define RADEON_DFP1_ATTACHED         (1 << 3)
+#       define RADEON_TV1_ATTACHED_MASK     (3 << 4)
+#       define RADEON_TV1_ATTACHED_COMP     (1 << 4)
+#       define RADEON_TV1_ATTACHED_SVIDEO   (2 << 4)
+#       define RADEON_CRT2_ATTACHED_MASK    (3 << 8)
+#       define RADEON_CRT2_ATTACHED_MONO    (1 << 8)
+#       define RADEON_CRT2_ATTACHED_COLOR   (2 << 8)
+#       define RADEON_DFP2_ATTACHED         (1 << 11)
+#define RADEON_BIOS_5_SCRATCH               0x0024
+#       define RADEON_LCD1_ON               (1 << 0)
+#       define RADEON_CRT1_ON               (1 << 1)
+#       define RADEON_TV1_ON                (1 << 2)
+#       define RADEON_DFP1_ON               (1 << 3)
+#       define RADEON_CRT2_ON               (1 << 5)
+#       define RADEON_CV1_ON                (1 << 6)
+#       define RADEON_DFP2_ON               (1 << 7)
+#       define RADEON_LCD1_CRTC_MASK        (1 << 8)
+#       define RADEON_LCD1_CRTC_SHIFT       8
+#       define RADEON_CRT1_CRTC_MASK        (1 << 9)
+#       define RADEON_CRT1_CRTC_SHIFT       9
+#       define RADEON_TV1_CRTC_MASK         (1 << 10)
+#       define RADEON_TV1_CRTC_SHIFT        10
+#       define RADEON_DFP1_CRTC_MASK        (1 << 11)
+#       define RADEON_DFP1_CRTC_SHIFT       11
+#       define RADEON_CRT2_CRTC_MASK        (1 << 12)
+#       define RADEON_CRT2_CRTC_SHIFT       12
+#       define RADEON_CV1_CRTC_MASK         (1 << 13)
+#       define RADEON_CV1_CRTC_SHIFT        13
+#       define RADEON_DFP2_CRTC_MASK        (1 << 14)
+#       define RADEON_DFP2_CRTC_SHIFT       14
+#       define RADEON_ACC_REQ_LCD1          (1 << 16)
+#       define RADEON_ACC_REQ_CRT1          (1 << 17)
+#       define RADEON_ACC_REQ_TV1           (1 << 18)
+#       define RADEON_ACC_REQ_DFP1          (1 << 19)
+#       define RADEON_ACC_REQ_CRT2          (1 << 21)
+#       define RADEON_ACC_REQ_TV2           (1 << 22)
+#       define RADEON_ACC_REQ_DFP2          (1 << 23)
+#define RADEON_BIOS_6_SCRATCH               0x0028
+#       define RADEON_ACC_MODE_CHANGE       (1 << 2)
+#       define RADEON_EXT_DESKTOP_MODE      (1 << 3)
+#       define RADEON_LCD_DPMS_ON           (1 << 20)
+#       define RADEON_CRT_DPMS_ON           (1 << 21)
+#       define RADEON_TV_DPMS_ON            (1 << 22)
+#       define RADEON_DFP_DPMS_ON           (1 << 23)
+#       define RADEON_DPMS_MASK             (3 << 24)
+#       define RADEON_DPMS_ON               (0 << 24)
+#       define RADEON_DPMS_STANDBY          (1 << 24)
+#       define RADEON_DPMS_SUSPEND          (2 << 24)
+#       define RADEON_DPMS_OFF              (3 << 24)
+#       define RADEON_SCREEN_BLANKING       (1 << 26)
+#       define RADEON_DRIVER_CRITICAL       (1 << 27)
+#       define RADEON_DISPLAY_SWITCHING_DIS (1 << 30)
+#define RADEON_BIOS_7_SCRATCH               0x002c
+#       define RADEON_SYS_HOTKEY            (1 << 10)
+#       define RADEON_DRV_LOADED            (1 << 12)
+#define RADEON_BIOS_ROM                     0x0f30 /* PCI */
+#define RADEON_BIST                         0x0f0f /* PCI */
+#define RADEON_BRUSH_DATA0                  0x1480
+#define RADEON_BRUSH_DATA1                  0x1484
+#define RADEON_BRUSH_DATA10                 0x14a8
+#define RADEON_BRUSH_DATA11                 0x14ac
+#define RADEON_BRUSH_DATA12                 0x14b0
+#define RADEON_BRUSH_DATA13                 0x14b4
+#define RADEON_BRUSH_DATA14                 0x14b8
+#define RADEON_BRUSH_DATA15                 0x14bc
+#define RADEON_BRUSH_DATA16                 0x14c0
+#define RADEON_BRUSH_DATA17                 0x14c4
+#define RADEON_BRUSH_DATA18                 0x14c8
+#define RADEON_BRUSH_DATA19                 0x14cc
+#define RADEON_BRUSH_DATA2                  0x1488
+#define RADEON_BRUSH_DATA20                 0x14d0
+#define RADEON_BRUSH_DATA21                 0x14d4
+#define RADEON_BRUSH_DATA22                 0x14d8
+#define RADEON_BRUSH_DATA23                 0x14dc
+#define RADEON_BRUSH_DATA24                 0x14e0
+#define RADEON_BRUSH_DATA25                 0x14e4
+#define RADEON_BRUSH_DATA26                 0x14e8
+#define RADEON_BRUSH_DATA27                 0x14ec
+#define RADEON_BRUSH_DATA28                 0x14f0
+#define RADEON_BRUSH_DATA29                 0x14f4
+#define RADEON_BRUSH_DATA3                  0x148c
+#define RADEON_BRUSH_DATA30                 0x14f8
+#define RADEON_BRUSH_DATA31                 0x14fc
+#define RADEON_BRUSH_DATA32                 0x1500
+#define RADEON_BRUSH_DATA33                 0x1504
+#define RADEON_BRUSH_DATA34                 0x1508
+#define RADEON_BRUSH_DATA35                 0x150c
+#define RADEON_BRUSH_DATA36                 0x1510
+#define RADEON_BRUSH_DATA37                 0x1514
+#define RADEON_BRUSH_DATA38                 0x1518
+#define RADEON_BRUSH_DATA39                 0x151c
+#define RADEON_BRUSH_DATA4                  0x1490
+#define RADEON_BRUSH_DATA40                 0x1520
+#define RADEON_BRUSH_DATA41                 0x1524
+#define RADEON_BRUSH_DATA42                 0x1528
+#define RADEON_BRUSH_DATA43                 0x152c
+#define RADEON_BRUSH_DATA44                 0x1530
+#define RADEON_BRUSH_DATA45                 0x1534
+#define RADEON_BRUSH_DATA46                 0x1538
+#define RADEON_BRUSH_DATA47                 0x153c
+#define RADEON_BRUSH_DATA48                 0x1540
+#define RADEON_BRUSH_DATA49                 0x1544
+#define RADEON_BRUSH_DATA5                  0x1494
+#define RADEON_BRUSH_DATA50                 0x1548
+#define RADEON_BRUSH_DATA51                 0x154c
+#define RADEON_BRUSH_DATA52                 0x1550
+#define RADEON_BRUSH_DATA53                 0x1554
+#define RADEON_BRUSH_DATA54                 0x1558
+#define RADEON_BRUSH_DATA55                 0x155c
+#define RADEON_BRUSH_DATA56                 0x1560
+#define RADEON_BRUSH_DATA57                 0x1564
+#define RADEON_BRUSH_DATA58                 0x1568
+#define RADEON_BRUSH_DATA59                 0x156c
+#define RADEON_BRUSH_DATA6                  0x1498
+#define RADEON_BRUSH_DATA60                 0x1570
+#define RADEON_BRUSH_DATA61                 0x1574
+#define RADEON_BRUSH_DATA62                 0x1578
+#define RADEON_BRUSH_DATA63                 0x157c
+#define RADEON_BRUSH_DATA7                  0x149c
+#define RADEON_BRUSH_DATA8                  0x14a0
+#define RADEON_BRUSH_DATA9                  0x14a4
+#define RADEON_BRUSH_SCALE                  0x1470
+#define RADEON_BRUSH_Y_X                    0x1474
+#define RADEON_BUS_CNTL                     0x0030
+#       define RADEON_BUS_MASTER_DIS         (1 << 6)
+#       define RADEON_BUS_BIOS_DIS_ROM       (1 << 12)
+#       define RADEON_BUS_RD_DISCARD_EN      (1 << 24)
+#       define RADEON_BUS_RD_ABORT_EN        (1 << 25)
+#       define RADEON_BUS_MSTR_DISCONNECT_EN (1 << 28)
+#       define RADEON_BUS_WRT_BURST          (1 << 29)
+#       define RADEON_BUS_READ_BURST         (1 << 30)
+#define RADEON_BUS_CNTL1                    0x0034
+#       define RADEON_BUS_WAIT_ON_LOCK_EN    (1 << 4)
+
+/* #define RADEON_PCIE_INDEX                   0x0030 */
+/* #define RADEON_PCIE_DATA                    0x0034 */
+#define RADEON_PCIE_LC_LINK_WIDTH_CNTL             0xa2 /* PCIE */
+#       define RADEON_PCIE_LC_LINK_WIDTH_SHIFT     0
+#       define RADEON_PCIE_LC_LINK_WIDTH_MASK      0x7
+#       define RADEON_PCIE_LC_LINK_WIDTH_X0        0
+#       define RADEON_PCIE_LC_LINK_WIDTH_X1        1
+#       define RADEON_PCIE_LC_LINK_WIDTH_X2        2
+#       define RADEON_PCIE_LC_LINK_WIDTH_X4        3
+#       define RADEON_PCIE_LC_LINK_WIDTH_X8        4
+#       define RADEON_PCIE_LC_LINK_WIDTH_X12       5
+#       define RADEON_PCIE_LC_LINK_WIDTH_X16       6
+#       define RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT  4
+#       define RADEON_PCIE_LC_LINK_WIDTH_RD_MASK   0x70
+#       define RADEON_PCIE_LC_RECONFIG_NOW         (1 << 8)
+#       define RADEON_PCIE_LC_RECONFIG_LATER       (1 << 9)
+#       define RADEON_PCIE_LC_SHORT_RECONFIG_EN    (1 << 10)
+
+#define RADEON_CACHE_CNTL                   0x1724
+#define RADEON_CACHE_LINE                   0x0f0c /* PCI */
+#define RADEON_CAPABILITIES_ID              0x0f50 /* PCI */
+#define RADEON_CAPABILITIES_PTR             0x0f34 /* PCI */
+#define RADEON_CLK_PIN_CNTL                 0x0001 /* PLL */
+#       define RADEON_DONT_USE_XTALIN       (1 << 4)
+#       define RADEON_SCLK_DYN_START_CNTL   (1 << 15)
+#define RADEON_CLOCK_CNTL_DATA              0x000c
+#define RADEON_CLOCK_CNTL_INDEX             0x0008
+#       define RADEON_PLL_WR_EN             (1 << 7)
+#       define RADEON_PLL_DIV_SEL           (3 << 8)
+#       define RADEON_PLL2_DIV_SEL_MASK     (~(3 << 8))
+#define RADEON_CLK_PWRMGT_CNTL              0x0014
+#       define RADEON_ENGIN_DYNCLK_MODE     (1 << 12)
+#       define RADEON_ACTIVE_HILO_LAT_MASK  (3 << 13)
+#       define RADEON_ACTIVE_HILO_LAT_SHIFT 13
+#       define RADEON_DISP_DYN_STOP_LAT_MASK (1 << 12)
+#       define RADEON_MC_BUSY               (1 << 16)
+#       define RADEON_DLL_READY             (1 << 19)
+#       define RADEON_CG_NO1_DEBUG_0        (1 << 24)
+#       define RADEON_CG_NO1_DEBUG_MASK     (0x1f << 24)
+#       define RADEON_DYN_STOP_MODE_MASK    (7 << 21)
+#       define RADEON_TVPLL_PWRMGT_OFF      (1 << 30)
+#       define RADEON_TVCLK_TURNOFF         (1 << 31)
+#define RADEON_PLL_PWRMGT_CNTL              0x0015 /* PLL */
+#       define RADEON_TCL_BYPASS_DISABLE    (1 << 20)
+#define RADEON_CLR_CMP_CLR_3D               0x1a24
+#define RADEON_CLR_CMP_CLR_DST              0x15c8
+#define RADEON_CLR_CMP_CLR_SRC              0x15c4
+#define RADEON_CLR_CMP_CNTL                 0x15c0
+#       define RADEON_SRC_CMP_EQ_COLOR      (4 <<  0)
+#       define RADEON_SRC_CMP_NEQ_COLOR     (5 <<  0)
+#       define RADEON_CLR_CMP_SRC_SOURCE    (1 << 24)
+#define RADEON_CLR_CMP_MASK                 0x15cc
+#       define RADEON_CLR_CMP_MSK           0xffffffff
+#define RADEON_CLR_CMP_MASK_3D              0x1A28
+#define RADEON_COMMAND                      0x0f04 /* PCI */
+#define RADEON_COMPOSITE_SHADOW_ID          0x1a0c
+#define RADEON_CONFIG_APER_0_BASE           0x0100
+#define RADEON_CONFIG_APER_1_BASE           0x0104
+#define RADEON_CONFIG_APER_SIZE             0x0108
+#define RADEON_CONFIG_BONDS                 0x00e8
+#define RADEON_CONFIG_CNTL                  0x00e0
+#       define RADEON_CFG_ATI_REV_A11       (0   << 16)
+#       define RADEON_CFG_ATI_REV_A12       (1   << 16)
+#       define RADEON_CFG_ATI_REV_A13       (2   << 16)
+#       define RADEON_CFG_ATI_REV_ID_MASK   (0xf << 16)
+#define RADEON_CONFIG_MEMSIZE               0x00f8
+#define RADEON_CONFIG_MEMSIZE_EMBEDDED      0x0114
+#define RADEON_CONFIG_REG_1_BASE            0x010c
+#define RADEON_CONFIG_REG_APER_SIZE         0x0110
+#define RADEON_CONFIG_XSTRAP                0x00e4
+#define RADEON_CONSTANT_COLOR_C             0x1d34
+#       define RADEON_CONSTANT_COLOR_MASK   0x00ffffff
+#       define RADEON_CONSTANT_COLOR_ONE    0x00ffffff
+#       define RADEON_CONSTANT_COLOR_ZERO   0x00000000
+#define RADEON_CRC_CMDFIFO_ADDR             0x0740
+#define RADEON_CRC_CMDFIFO_DOUT             0x0744
+#define RADEON_GRPH_BUFFER_CNTL             0x02f0
+#       define RADEON_GRPH_START_REQ_MASK          (0x7f)
+#       define RADEON_GRPH_START_REQ_SHIFT         0
+#       define RADEON_GRPH_STOP_REQ_MASK           (0x7f<<8)
+#       define RADEON_GRPH_STOP_REQ_SHIFT          8
+#       define RADEON_GRPH_CRITICAL_POINT_MASK     (0x7f<<16)
+#       define RADEON_GRPH_CRITICAL_POINT_SHIFT    16
+#       define RADEON_GRPH_CRITICAL_CNTL           (1<<28)
+#       define RADEON_GRPH_BUFFER_SIZE             (1<<29)
+#       define RADEON_GRPH_CRITICAL_AT_SOF         (1<<30)
+#       define RADEON_GRPH_STOP_CNTL               (1<<31)
+#define RADEON_GRPH2_BUFFER_CNTL            0x03f0
+#       define RADEON_GRPH2_START_REQ_MASK         (0x7f)
+#       define RADEON_GRPH2_START_REQ_SHIFT         0
+#       define RADEON_GRPH2_STOP_REQ_MASK          (0x7f<<8)
+#       define RADEON_GRPH2_STOP_REQ_SHIFT         8
+#       define RADEON_GRPH2_CRITICAL_POINT_MASK    (0x7f<<16)
+#       define RADEON_GRPH2_CRITICAL_POINT_SHIFT   16
+#       define RADEON_GRPH2_CRITICAL_CNTL          (1<<28)
+#       define RADEON_GRPH2_BUFFER_SIZE            (1<<29)
+#       define RADEON_GRPH2_CRITICAL_AT_SOF        (1<<30)
+#       define RADEON_GRPH2_STOP_CNTL              (1<<31)
+#define RADEON_CRTC_CRNT_FRAME              0x0214
+#define RADEON_CRTC_EXT_CNTL                0x0054
+#       define RADEON_CRTC_VGA_XOVERSCAN    (1 <<  0)
+#       define RADEON_VGA_ATI_LINEAR        (1 <<  3)
+#       define RADEON_XCRT_CNT_EN           (1 <<  6)
+#       define RADEON_CRTC_HSYNC_DIS        (1 <<  8)
+#       define RADEON_CRTC_VSYNC_DIS        (1 <<  9)
+#       define RADEON_CRTC_DISPLAY_DIS      (1 << 10)
+#       define RADEON_CRTC_SYNC_TRISTAT     (1 << 11)
+#       define RADEON_CRTC_CRT_ON           (1 << 15)
+#define RADEON_CRTC_EXT_CNTL_DPMS_BYTE      0x0055
+#       define RADEON_CRTC_HSYNC_DIS_BYTE   (1 <<  0)
+#       define RADEON_CRTC_VSYNC_DIS_BYTE   (1 <<  1)
+#       define RADEON_CRTC_DISPLAY_DIS_BYTE (1 <<  2)
+#define RADEON_CRTC_GEN_CNTL                0x0050
+#       define RADEON_CRTC_DBL_SCAN_EN      (1 <<  0)
+#       define RADEON_CRTC_INTERLACE_EN     (1 <<  1)
+#       define RADEON_CRTC_CSYNC_EN         (1 <<  4)
+#       define RADEON_CRTC_ICON_EN          (1 << 15)
+#       define RADEON_CRTC_CUR_EN           (1 << 16)
+#       define RADEON_CRTC_CUR_MODE_MASK    (7 << 20)
+#       define RADEON_CRTC_CUR_MODE_SHIFT   20
+#       define RADEON_CRTC_CUR_MODE_MONO    0
+#       define RADEON_CRTC_CUR_MODE_24BPP   2
+#       define RADEON_CRTC_EXT_DISP_EN      (1 << 24)
+#       define RADEON_CRTC_EN               (1 << 25)
+#       define RADEON_CRTC_DISP_REQ_EN_B    (1 << 26)
+#define RADEON_CRTC2_GEN_CNTL               0x03f8
+#       define RADEON_CRTC2_DBL_SCAN_EN     (1 <<  0)
+#       define RADEON_CRTC2_INTERLACE_EN    (1 <<  1)
+#       define RADEON_CRTC2_SYNC_TRISTAT    (1 <<  4)
+#       define RADEON_CRTC2_HSYNC_TRISTAT   (1 <<  5)
+#       define RADEON_CRTC2_VSYNC_TRISTAT   (1 <<  6)
+#       define RADEON_CRTC2_CRT2_ON         (1 <<  7)
+#       define RADEON_CRTC2_PIX_WIDTH_SHIFT 8
+#       define RADEON_CRTC2_PIX_WIDTH_MASK  (0xf << 8)
+#       define RADEON_CRTC2_ICON_EN         (1 << 15)
+#       define RADEON_CRTC2_CUR_EN          (1 << 16)
+#       define RADEON_CRTC2_CUR_MODE_MASK   (7 << 20)
+#       define RADEON_CRTC2_DISP_DIS        (1 << 23)
+#       define RADEON_CRTC2_EN              (1 << 25)
+#       define RADEON_CRTC2_DISP_REQ_EN_B   (1 << 26)
+#       define RADEON_CRTC2_CSYNC_EN        (1 << 27)
+#       define RADEON_CRTC2_HSYNC_DIS       (1 << 28)
+#       define RADEON_CRTC2_VSYNC_DIS       (1 << 29)
+#define RADEON_CRTC_MORE_CNTL               0x27c
+#       define RADEON_CRTC_AUTO_HORZ_CENTER_EN (1<<2)
+#       define RADEON_CRTC_AUTO_VERT_CENTER_EN (1<<3)
+#       define RADEON_CRTC_H_CUTOFF_ACTIVE_EN (1<<4)
+#       define RADEON_CRTC_V_CUTOFF_ACTIVE_EN (1<<5)
+#define RADEON_CRTC_GUI_TRIG_VLINE          0x0218
+#define RADEON_CRTC_H_SYNC_STRT_WID         0x0204
+#       define RADEON_CRTC_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define RADEON_CRTC_H_SYNC_STRT_CHAR       (0x3ff <<  3)
+#       define RADEON_CRTC_H_SYNC_STRT_CHAR_SHIFT 3
+#       define RADEON_CRTC_H_SYNC_WID             (0x3f  << 16)
+#       define RADEON_CRTC_H_SYNC_WID_SHIFT       16
+#       define RADEON_CRTC_H_SYNC_POL             (1     << 23)
+#define RADEON_CRTC2_H_SYNC_STRT_WID        0x0304
+#       define RADEON_CRTC2_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define RADEON_CRTC2_H_SYNC_STRT_CHAR       (0x3ff <<  3)
+#       define RADEON_CRTC2_H_SYNC_STRT_CHAR_SHIFT 3
+#       define RADEON_CRTC2_H_SYNC_WID             (0x3f  << 16)
+#       define RADEON_CRTC2_H_SYNC_WID_SHIFT       16
+#       define RADEON_CRTC2_H_SYNC_POL             (1     << 23)
+#define RADEON_CRTC_H_TOTAL_DISP            0x0200
+#       define RADEON_CRTC_H_TOTAL          (0x03ff << 0)
+#       define RADEON_CRTC_H_TOTAL_SHIFT    0
+#       define RADEON_CRTC_H_DISP           (0x01ff << 16)
+#       define RADEON_CRTC_H_DISP_SHIFT     16
+#define RADEON_CRTC2_H_TOTAL_DISP           0x0300
+#       define RADEON_CRTC2_H_TOTAL         (0x03ff << 0)
+#       define RADEON_CRTC2_H_TOTAL_SHIFT   0
+#       define RADEON_CRTC2_H_DISP          (0x01ff << 16)
+#       define RADEON_CRTC2_H_DISP_SHIFT    16
+
+#define RADEON_CRTC_OFFSET_RIGHT	    0x0220
+#define RADEON_CRTC_OFFSET                  0x0224
+#	define RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET (1<<30)
+#	define RADEON_CRTC_OFFSET__OFFSET_LOCK	   (1<<31)
+
+#define RADEON_CRTC2_OFFSET                 0x0324
+#	define RADEON_CRTC2_OFFSET__GUI_TRIG_OFFSET (1<<30)
+#	define RADEON_CRTC2_OFFSET__OFFSET_LOCK	    (1<<31)
+#define RADEON_CRTC_OFFSET_CNTL             0x0228
+#       define RADEON_CRTC_TILE_LINE_SHIFT              0
+#       define RADEON_CRTC_TILE_LINE_RIGHT_SHIFT        4
+#	define R300_CRTC_X_Y_MODE_EN_RIGHT		(1 << 6)
+#	define R300_CRTC_MICRO_TILE_BUFFER_RIGHT_MASK   (3 << 7)
+#	define R300_CRTC_MICRO_TILE_BUFFER_RIGHT_AUTO   (0 << 7)
+#	define R300_CRTC_MICRO_TILE_BUFFER_RIGHT_SINGLE (1 << 7)
+#	define R300_CRTC_MICRO_TILE_BUFFER_RIGHT_DOUBLE (2 << 7)
+#	define R300_CRTC_MICRO_TILE_BUFFER_RIGHT_DIS    (3 << 7)
+#	define R300_CRTC_X_Y_MODE_EN			(1 << 9)
+#	define R300_CRTC_MICRO_TILE_BUFFER_MASK		(3 << 10)
+#	define R300_CRTC_MICRO_TILE_BUFFER_AUTO		(0 << 10)
+#	define R300_CRTC_MICRO_TILE_BUFFER_SINGLE	(1 << 10)
+#	define R300_CRTC_MICRO_TILE_BUFFER_DOUBLE	(2 << 10)
+#	define R300_CRTC_MICRO_TILE_BUFFER_DIS		(3 << 10)
+#	define R300_CRTC_MICRO_TILE_EN_RIGHT		(1 << 12)
+#	define R300_CRTC_MICRO_TILE_EN			(1 << 13)
+#	define R300_CRTC_MACRO_TILE_EN_RIGHT		(1 << 14)
+#       define R300_CRTC_MACRO_TILE_EN                  (1 << 15)
+#       define RADEON_CRTC_TILE_EN_RIGHT                (1 << 14)
+#       define RADEON_CRTC_TILE_EN                      (1 << 15)
+#       define RADEON_CRTC_OFFSET_FLIP_CNTL             (1 << 16)
+#       define RADEON_CRTC_STEREO_OFFSET_EN             (1 << 17)
+
+#define R300_CRTC_TILE_X0_Y0	            0x0350
+#define R300_CRTC2_TILE_X0_Y0	            0x0358
+
+#define RADEON_CRTC2_OFFSET_CNTL            0x0328
+#       define RADEON_CRTC2_OFFSET_FLIP_CNTL (1 << 16)
+#       define RADEON_CRTC2_TILE_EN         (1 << 15)
+#define RADEON_CRTC_PITCH                   0x022c
+#	define RADEON_CRTC_PITCH__SHIFT		 0
+#	define RADEON_CRTC_PITCH__RIGHT_SHIFT	16
+
+#define RADEON_CRTC2_PITCH                  0x032c
+#define RADEON_CRTC_STATUS                  0x005c
+#       define RADEON_CRTC_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC2_STATUS                  0x03fc
+#       define RADEON_CRTC2_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC2_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC_V_SYNC_STRT_WID         0x020c
+#       define RADEON_CRTC_V_SYNC_STRT        (0x7ff <<  0)
+#       define RADEON_CRTC_V_SYNC_STRT_SHIFT  0
+#       define RADEON_CRTC_V_SYNC_WID         (0x1f  << 16)
+#       define RADEON_CRTC_V_SYNC_WID_SHIFT   16
+#       define RADEON_CRTC_V_SYNC_POL         (1     << 23)
+#define RADEON_CRTC2_V_SYNC_STRT_WID        0x030c
+#       define RADEON_CRTC2_V_SYNC_STRT       (0x7ff <<  0)
+#       define RADEON_CRTC2_V_SYNC_STRT_SHIFT 0
+#       define RADEON_CRTC2_V_SYNC_WID        (0x1f  << 16)
+#       define RADEON_CRTC2_V_SYNC_WID_SHIFT  16
+#       define RADEON_CRTC2_V_SYNC_POL        (1     << 23)
+#define RADEON_CRTC_V_TOTAL_DISP            0x0208
+#       define RADEON_CRTC_V_TOTAL          (0x07ff << 0)
+#       define RADEON_CRTC_V_TOTAL_SHIFT    0
+#       define RADEON_CRTC_V_DISP           (0x07ff << 16)
+#       define RADEON_CRTC_V_DISP_SHIFT     16
+#define RADEON_CRTC2_V_TOTAL_DISP           0x0308
+#       define RADEON_CRTC2_V_TOTAL         (0x07ff << 0)
+#       define RADEON_CRTC2_V_TOTAL_SHIFT   0
+#       define RADEON_CRTC2_V_DISP          (0x07ff << 16)
+#       define RADEON_CRTC2_V_DISP_SHIFT    16
+#define RADEON_CRTC_VLINE_CRNT_VLINE        0x0210
+#       define RADEON_CRTC_CRNT_VLINE_MASK  (0x7ff << 16)
+#define RADEON_CRTC2_CRNT_FRAME             0x0314
+#define RADEON_CRTC2_GUI_TRIG_VLINE         0x0318
+#define RADEON_CRTC2_STATUS                 0x03fc
+#define RADEON_CRTC2_VLINE_CRNT_VLINE       0x0310
+#define RADEON_CRTC8_DATA                   0x03d5 /* VGA, 0x3b5 */
+#define RADEON_CRTC8_IDX                    0x03d4 /* VGA, 0x3b4 */
+#define RADEON_CUR_CLR0                     0x026c
+#define RADEON_CUR_CLR1                     0x0270
+#define RADEON_CUR_HORZ_VERT_OFF            0x0268
+#define RADEON_CUR_HORZ_VERT_POSN           0x0264
+#define RADEON_CUR_OFFSET                   0x0260
+#       define RADEON_CUR_LOCK              (1 << 31)
+#define RADEON_CUR2_CLR0                    0x036c
+#define RADEON_CUR2_CLR1                    0x0370
+#define RADEON_CUR2_HORZ_VERT_OFF           0x0368
+#define RADEON_CUR2_HORZ_VERT_POSN          0x0364
+#define RADEON_CUR2_OFFSET                  0x0360
+#       define RADEON_CUR2_LOCK             (1 << 31)
+
+#define RADEON_DAC_CNTL                     0x0058
+#       define RADEON_DAC_RANGE_CNTL        (3 <<  0)
+#       define RADEON_DAC_RANGE_CNTL_PS2    (2 <<  0)
+#       define RADEON_DAC_RANGE_CNTL_MASK   0x03
+#       define RADEON_DAC_BLANKING          (1 <<  2)
+#       define RADEON_DAC_CMP_EN            (1 <<  3)
+#       define RADEON_DAC_CMP_OUTPUT        (1 <<  7)
+#       define RADEON_DAC_8BIT_EN           (1 <<  8)
+#       define RADEON_DAC_TVO_EN            (1 << 10)
+#       define RADEON_DAC_VGA_ADR_EN        (1 << 13)
+#       define RADEON_DAC_PDWN              (1 << 15)
+#       define RADEON_DAC_MASK_ALL          (0xff << 24)
+#define RADEON_DAC_CNTL2                    0x007c
+#       define RADEON_DAC2_TV_CLK_SEL       (0 <<  1)
+#       define RADEON_DAC2_DAC_CLK_SEL      (1 <<  0)
+#       define RADEON_DAC2_DAC2_CLK_SEL     (1 <<  1)
+#       define RADEON_DAC2_PALETTE_ACC_CTL  (1 <<  5)
+#       define RADEON_DAC2_CMP_EN           (1 <<  7)
+#       define RADEON_DAC2_CMP_OUT_R        (1 <<  8)
+#       define RADEON_DAC2_CMP_OUT_G        (1 <<  9)
+#       define RADEON_DAC2_CMP_OUT_B        (1 << 10)
+#       define RADEON_DAC2_CMP_OUTPUT       (1 << 11)
+#define RADEON_DAC_EXT_CNTL                 0x0280
+#       define RADEON_DAC2_FORCE_BLANK_OFF_EN (1 << 0)
+#       define RADEON_DAC2_FORCE_DATA_EN      (1 << 1)
+#       define RADEON_DAC_FORCE_BLANK_OFF_EN  (1 << 4)
+#       define RADEON_DAC_FORCE_DATA_EN       (1 << 5)
+#       define RADEON_DAC_FORCE_DATA_SEL_MASK (3 << 6)
+#       define RADEON_DAC_FORCE_DATA_SEL_R    (0 << 6)
+#       define RADEON_DAC_FORCE_DATA_SEL_G    (1 << 6)
+#       define RADEON_DAC_FORCE_DATA_SEL_B    (2 << 6)
+#       define RADEON_DAC_FORCE_DATA_SEL_RGB  (3 << 6)
+#       define RADEON_DAC_FORCE_DATA_MASK   0x0003ff00
+#       define RADEON_DAC_FORCE_DATA_SHIFT  8
+#define RADEON_DAC_MACRO_CNTL               0x0d04
+#       define RADEON_DAC_PDWN_R            (1 << 16)
+#       define RADEON_DAC_PDWN_G            (1 << 17)
+#       define RADEON_DAC_PDWN_B            (1 << 18)
+#define RADEON_DISP_PWR_MAN                 0x0d08
+#       define RADEON_DISP_PWR_MAN_D3_CRTC_EN      (1 << 0)
+#       define RADEON_DISP_PWR_MAN_D3_CRTC2_EN     (1 << 4)
+#       define RADEON_DISP_PWR_MAN_DPMS_ON  (0 << 8)
+#       define RADEON_DISP_PWR_MAN_DPMS_STANDBY    (1 << 8)
+#       define RADEON_DISP_PWR_MAN_DPMS_SUSPEND    (2 << 8)
+#       define RADEON_DISP_PWR_MAN_DPMS_OFF (3 << 8)
+#       define RADEON_DISP_D3_RST           (1 << 16)
+#       define RADEON_DISP_D3_REG_RST       (1 << 17)
+#       define RADEON_DISP_D3_GRPH_RST      (1 << 18)
+#       define RADEON_DISP_D3_SUBPIC_RST    (1 << 19)
+#       define RADEON_DISP_D3_OV0_RST       (1 << 20)
+#       define RADEON_DISP_D1D2_GRPH_RST    (1 << 21)
+#       define RADEON_DISP_D1D2_SUBPIC_RST  (1 << 22)
+#       define RADEON_DISP_D1D2_OV0_RST     (1 << 23)
+#       define RADEON_DIG_TMDS_ENABLE_RST   (1 << 24)
+#       define RADEON_TV_ENABLE_RST         (1 << 25)
+#       define RADEON_AUTO_PWRUP_EN         (1 << 26)
+#define RADEON_TV_DAC_CNTL                  0x088c
+#       define RADEON_TV_DAC_NBLANK         (1 << 0)
+#       define RADEON_TV_DAC_NHOLD          (1 << 1)
+#       define RADEON_TV_DAC_PEDESTAL       (1 <<  2)
+#       define RADEON_TV_MONITOR_DETECT_EN  (1 <<  4)
+#       define RADEON_TV_DAC_CMPOUT         (1 <<  5)
+#       define RADEON_TV_DAC_STD_MASK       (3 <<  8)
+#       define RADEON_TV_DAC_STD_PAL        (0 <<  8)
+#       define RADEON_TV_DAC_STD_NTSC       (1 <<  8)
+#       define RADEON_TV_DAC_STD_PS2        (2 <<  8)
+#       define RADEON_TV_DAC_STD_RS343      (3 <<  8)
+#       define RADEON_TV_DAC_BGSLEEP        (1 <<  6)
+#       define RADEON_TV_DAC_BGADJ_MASK     (0xf <<  16)
+#       define RADEON_TV_DAC_BGADJ_SHIFT    16
+#       define RADEON_TV_DAC_DACADJ_MASK    (0xf <<  20)
+#       define RADEON_TV_DAC_DACADJ_SHIFT   20
+#       define RADEON_TV_DAC_RDACPD         (1 <<  24)
+#       define RADEON_TV_DAC_GDACPD         (1 <<  25)
+#       define RADEON_TV_DAC_BDACPD         (1 <<  26)
+#       define RADEON_TV_DAC_RDACDET        (1 << 29)
+#       define RADEON_TV_DAC_GDACDET        (1 << 30)
+#       define RADEON_TV_DAC_BDACDET        (1 << 31)
+#       define R420_TV_DAC_DACADJ_MASK      (0x1f <<  20)
+#       define R420_TV_DAC_RDACPD           (1 <<  25)
+#       define R420_TV_DAC_GDACPD           (1 <<  26)
+#       define R420_TV_DAC_BDACPD           (1 <<  27)
+#       define R420_TV_DAC_TVENABLE         (1 <<  28)
+#define RADEON_DISP_HW_DEBUG                0x0d14
+#       define RADEON_CRT2_DISP1_SEL        (1 <<  5)
+#define RADEON_DISP_OUTPUT_CNTL             0x0d64
+#       define RADEON_DISP_DAC_SOURCE_MASK  0x03
+#       define RADEON_DISP_DAC2_SOURCE_MASK  0x0c
+#       define RADEON_DISP_DAC_SOURCE_CRTC2 0x01
+#       define RADEON_DISP_DAC_SOURCE_RMX   0x02
+#       define RADEON_DISP_DAC_SOURCE_LTU   0x03
+#       define RADEON_DISP_DAC2_SOURCE_CRTC2 0x04
+#       define RADEON_DISP_TVDAC_SOURCE_MASK  (0x03 << 2)
+#       define RADEON_DISP_TVDAC_SOURCE_CRTC  0x0
+#       define RADEON_DISP_TVDAC_SOURCE_CRTC2 (0x01 << 2)
+#       define RADEON_DISP_TVDAC_SOURCE_RMX   (0x02 << 2)
+#       define RADEON_DISP_TVDAC_SOURCE_LTU   (0x03 << 2)
+#       define RADEON_DISP_TRANS_MATRIX_MASK  (0x03 << 4)
+#       define RADEON_DISP_TRANS_MATRIX_ALPHA_MSB (0x00 << 4)
+#       define RADEON_DISP_TRANS_MATRIX_GRAPHICS  (0x01 << 4)
+#       define RADEON_DISP_TRANS_MATRIX_VIDEO     (0x02 << 4)
+#       define RADEON_DISP_TV_SOURCE_CRTC   (1 << 16) /* crtc1 or crtc2 */
+#       define RADEON_DISP_TV_SOURCE_LTU    (0 << 16) /* linear transform unit */
+#define RADEON_DISP_TV_OUT_CNTL             0x0d6c
+#       define RADEON_DISP_TV_PATH_SRC_CRTC2 (1 << 16)
+#       define RADEON_DISP_TV_PATH_SRC_CRTC1 (0 << 16)
+#define RADEON_DAC_CRC_SIG                  0x02cc
+#define RADEON_DAC_DATA                     0x03c9 /* VGA */
+#define RADEON_DAC_MASK                     0x03c6 /* VGA */
+#define RADEON_DAC_R_INDEX                  0x03c7 /* VGA */
+#define RADEON_DAC_W_INDEX                  0x03c8 /* VGA */
+#define RADEON_DDA_CONFIG                   0x02e0
+#define RADEON_DDA_ON_OFF                   0x02e4
+#define RADEON_DEFAULT_OFFSET               0x16e0
+#define RADEON_DEFAULT_PITCH                0x16e4
+#define RADEON_DEFAULT_SC_BOTTOM_RIGHT      0x16e8
+#       define RADEON_DEFAULT_SC_RIGHT_MAX  (0x1fff <<  0)
+#       define RADEON_DEFAULT_SC_BOTTOM_MAX (0x1fff << 16)
+#define RADEON_DESTINATION_3D_CLR_CMP_VAL   0x1820
+#define RADEON_DESTINATION_3D_CLR_CMP_MSK   0x1824
+#define RADEON_DEVICE_ID                    0x0f02 /* PCI */
+#define RADEON_DISP_MISC_CNTL               0x0d00
+#       define RADEON_SOFT_RESET_GRPH_PP    (1 << 0)
+#define RADEON_DISP_MERGE_CNTL		  0x0d60
+#       define RADEON_DISP_ALPHA_MODE_MASK  0x03
+#       define RADEON_DISP_ALPHA_MODE_KEY   0
+#       define RADEON_DISP_ALPHA_MODE_PER_PIXEL 1
+#       define RADEON_DISP_ALPHA_MODE_GLOBAL 2
+#       define RADEON_DISP_RGB_OFFSET_EN    (1 << 8)
+#       define RADEON_DISP_GRPH_ALPHA_MASK  (0xff << 16)
+#       define RADEON_DISP_OV0_ALPHA_MASK   (0xff << 24)
+#	define RADEON_DISP_LIN_TRANS_BYPASS (0x01 << 9)
+#define RADEON_DISP2_MERGE_CNTL		    0x0d68
+#       define RADEON_DISP2_RGB_OFFSET_EN   (1 << 8)
+#define RADEON_DISP_LIN_TRANS_GRPH_A        0x0d80
+#define RADEON_DISP_LIN_TRANS_GRPH_B        0x0d84
+#define RADEON_DISP_LIN_TRANS_GRPH_C        0x0d88
+#define RADEON_DISP_LIN_TRANS_GRPH_D        0x0d8c
+#define RADEON_DISP_LIN_TRANS_GRPH_E        0x0d90
+#define RADEON_DISP_LIN_TRANS_GRPH_F        0x0d98
+#define RADEON_DP_BRUSH_BKGD_CLR            0x1478
+#define RADEON_DP_BRUSH_FRGD_CLR            0x147c
+#define RADEON_DP_CNTL                      0x16c0
+#       define RADEON_DST_X_LEFT_TO_RIGHT   (1 <<  0)
+#       define RADEON_DST_Y_TOP_TO_BOTTOM   (1 <<  1)
+#       define RADEON_DP_DST_TILE_LINEAR    (0 <<  3)
+#       define RADEON_DP_DST_TILE_MACRO     (1 <<  3)
+#       define RADEON_DP_DST_TILE_MICRO     (2 <<  3)
+#       define RADEON_DP_DST_TILE_BOTH      (3 <<  3)
+#define RADEON_DP_CNTL_XDIR_YDIR_YMAJOR     0x16d0
+#       define RADEON_DST_Y_MAJOR             (1 <<  2)
+#       define RADEON_DST_Y_DIR_TOP_TO_BOTTOM (1 << 15)
+#       define RADEON_DST_X_DIR_LEFT_TO_RIGHT (1 << 31)
+#define RADEON_DP_DATATYPE                  0x16c4
+#       define RADEON_HOST_BIG_ENDIAN_EN    (1 << 29)
+#define RADEON_DP_GUI_MASTER_CNTL           0x146c
+#       define RADEON_GMC_SRC_PITCH_OFFSET_CNTL   (1    <<  0)
+#       define RADEON_GMC_DST_PITCH_OFFSET_CNTL   (1    <<  1)
+#       define RADEON_GMC_SRC_CLIPPING            (1    <<  2)
+#       define RADEON_GMC_DST_CLIPPING            (1    <<  3)
+#       define RADEON_GMC_BRUSH_DATATYPE_MASK     (0x0f <<  4)
+#       define RADEON_GMC_BRUSH_8X8_MONO_FG_BG    (0    <<  4)
+#       define RADEON_GMC_BRUSH_8X8_MONO_FG_LA    (1    <<  4)
+#       define RADEON_GMC_BRUSH_1X8_MONO_FG_BG    (4    <<  4)
+#       define RADEON_GMC_BRUSH_1X8_MONO_FG_LA    (5    <<  4)
+#       define RADEON_GMC_BRUSH_32x1_MONO_FG_BG   (6    <<  4)
+#       define RADEON_GMC_BRUSH_32x1_MONO_FG_LA   (7    <<  4)
+#       define RADEON_GMC_BRUSH_32x32_MONO_FG_BG  (8    <<  4)
+#       define RADEON_GMC_BRUSH_32x32_MONO_FG_LA  (9    <<  4)
+#       define RADEON_GMC_BRUSH_8x8_COLOR         (10   <<  4)
+#       define RADEON_GMC_BRUSH_1X8_COLOR         (12   <<  4)
+#       define RADEON_GMC_BRUSH_SOLID_COLOR       (13   <<  4)
+#       define RADEON_GMC_BRUSH_NONE              (15   <<  4)
+#       define RADEON_GMC_DST_8BPP_CI             (2    <<  8)
+#       define RADEON_GMC_DST_15BPP               (3    <<  8)
+#       define RADEON_GMC_DST_16BPP               (4    <<  8)
+#       define RADEON_GMC_DST_24BPP               (5    <<  8)
+#       define RADEON_GMC_DST_32BPP               (6    <<  8)
+#       define RADEON_GMC_DST_8BPP_RGB            (7    <<  8)
+#       define RADEON_GMC_DST_Y8                  (8    <<  8)
+#       define RADEON_GMC_DST_RGB8                (9    <<  8)
+#       define RADEON_GMC_DST_VYUY                (11   <<  8)
+#       define RADEON_GMC_DST_YVYU                (12   <<  8)
+#       define RADEON_GMC_DST_AYUV444             (14   <<  8)
+#       define RADEON_GMC_DST_ARGB4444            (15   <<  8)
+#       define RADEON_GMC_DST_DATATYPE_MASK       (0x0f <<  8)
+#       define RADEON_GMC_DST_DATATYPE_SHIFT      8
+#       define RADEON_GMC_SRC_DATATYPE_MASK       (3    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_MONO_FG_BG (0    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_MONO_FG_LA (1    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_COLOR      (3    << 12)
+#       define RADEON_GMC_BYTE_PIX_ORDER          (1    << 14)
+#       define RADEON_GMC_BYTE_MSB_TO_LSB         (0    << 14)
+#       define RADEON_GMC_BYTE_LSB_TO_MSB         (1    << 14)
+#       define RADEON_GMC_CONVERSION_TEMP         (1    << 15)
+#       define RADEON_GMC_CONVERSION_TEMP_6500    (0    << 15)
+#       define RADEON_GMC_CONVERSION_TEMP_9300    (1    << 15)
+#       define RADEON_GMC_ROP3_MASK               (0xff << 16)
+#       define RADEON_DP_SRC_SOURCE_MASK          (7    << 24)
+#       define RADEON_DP_SRC_SOURCE_MEMORY        (2    << 24)
+#       define RADEON_DP_SRC_SOURCE_HOST_DATA     (3    << 24)
+#       define RADEON_GMC_3D_FCN_EN               (1    << 27)
+#       define RADEON_GMC_CLR_CMP_CNTL_DIS        (1    << 28)
+#       define RADEON_GMC_AUX_CLIP_DIS            (1    << 29)
+#       define RADEON_GMC_WR_MSK_DIS              (1    << 30)
+#       define RADEON_GMC_LD_BRUSH_Y_X            (1    << 31)
+#       define RADEON_ROP3_ZERO             0x00000000
+#       define RADEON_ROP3_DSa              0x00880000
+#       define RADEON_ROP3_SDna             0x00440000
+#       define RADEON_ROP3_S                0x00cc0000
+#       define RADEON_ROP3_DSna             0x00220000
+#       define RADEON_ROP3_D                0x00aa0000
+#       define RADEON_ROP3_DSx              0x00660000
+#       define RADEON_ROP3_DSo              0x00ee0000
+#       define RADEON_ROP3_DSon             0x00110000
+#       define RADEON_ROP3_DSxn             0x00990000
+#       define RADEON_ROP3_Dn               0x00550000
+#       define RADEON_ROP3_SDno             0x00dd0000
+#       define RADEON_ROP3_Sn               0x00330000
+#       define RADEON_ROP3_DSno             0x00bb0000
+#       define RADEON_ROP3_DSan             0x00770000
+#       define RADEON_ROP3_ONE              0x00ff0000
+#       define RADEON_ROP3_DPa              0x00a00000
+#       define RADEON_ROP3_PDna             0x00500000
+#       define RADEON_ROP3_P                0x00f00000
+#       define RADEON_ROP3_DPna             0x000a0000
+#       define RADEON_ROP3_D                0x00aa0000
+#       define RADEON_ROP3_DPx              0x005a0000
+#       define RADEON_ROP3_DPo              0x00fa0000
+#       define RADEON_ROP3_DPon             0x00050000
+#       define RADEON_ROP3_PDxn             0x00a50000
+#       define RADEON_ROP3_PDno             0x00f50000
+#       define RADEON_ROP3_Pn               0x000f0000
+#       define RADEON_ROP3_DPno             0x00af0000
+#       define RADEON_ROP3_DPan             0x005f0000
+#define RADEON_DP_GUI_MASTER_CNTL_C         0x1c84
+#define RADEON_DP_MIX                       0x16c8
+#define RADEON_DP_SRC_BKGD_CLR              0x15dc
+#define RADEON_DP_SRC_FRGD_CLR              0x15d8
+#define RADEON_DP_WRITE_MASK                0x16cc
+#define RADEON_DST_BRES_DEC                 0x1630
+#define RADEON_DST_BRES_ERR                 0x1628
+#define RADEON_DST_BRES_INC                 0x162c
+#define RADEON_DST_BRES_LNTH                0x1634
+#define RADEON_DST_BRES_LNTH_SUB            0x1638
+#define RADEON_DST_HEIGHT                   0x1410
+#define RADEON_DST_HEIGHT_WIDTH             0x143c
+#define RADEON_DST_HEIGHT_WIDTH_8           0x158c
+#define RADEON_DST_HEIGHT_WIDTH_BW          0x15b4
+#define RADEON_DST_HEIGHT_Y                 0x15a0
+#define RADEON_DST_LINE_START               0x1600
+#define RADEON_DST_LINE_END                 0x1604
+#define RADEON_DST_LINE_PATCOUNT            0x1608
+#       define RADEON_BRES_CNTL_SHIFT       8
+#define RADEON_DST_OFFSET                   0x1404
+#define RADEON_DST_PITCH                    0x1408
+#define RADEON_DST_PITCH_OFFSET             0x142c
+#define RADEON_DST_PITCH_OFFSET_C           0x1c80
+#       define RADEON_PITCH_SHIFT           21
+#       define RADEON_DST_TILE_LINEAR       (0 << 30)
+#       define RADEON_DST_TILE_MACRO        (1 << 30)
+#       define RADEON_DST_TILE_MICRO        (2 << 30)
+#       define RADEON_DST_TILE_BOTH         (3 << 30)
+#define RADEON_DST_WIDTH                    0x140c
+#define RADEON_DST_WIDTH_HEIGHT             0x1598
+#define RADEON_DST_WIDTH_X                  0x1588
+#define RADEON_DST_WIDTH_X_INCY             0x159c
+#define RADEON_DST_X                        0x141c
+#define RADEON_DST_X_SUB                    0x15a4
+#define RADEON_DST_X_Y                      0x1594
+#define RADEON_DST_Y                        0x1420
+#define RADEON_DST_Y_SUB                    0x15a8
+#define RADEON_DST_Y_X                      0x1438
+
+#define RADEON_FCP_CNTL                     0x0910
+#      define RADEON_FCP0_SRC_PCICLK             0
+#      define RADEON_FCP0_SRC_PCLK               1
+#      define RADEON_FCP0_SRC_PCLKb              2
+#      define RADEON_FCP0_SRC_HREF               3
+#      define RADEON_FCP0_SRC_GND                4
+#      define RADEON_FCP0_SRC_HREFb              5
+#define RADEON_FLUSH_1                      0x1704
+#define RADEON_FLUSH_2                      0x1708
+#define RADEON_FLUSH_3                      0x170c
+#define RADEON_FLUSH_4                      0x1710
+#define RADEON_FLUSH_5                      0x1714
+#define RADEON_FLUSH_6                      0x1718
+#define RADEON_FLUSH_7                      0x171c
+#define RADEON_FOG_3D_TABLE_START           0x1810
+#define RADEON_FOG_3D_TABLE_END             0x1814
+#define RADEON_FOG_3D_TABLE_DENSITY         0x181c
+#define RADEON_FOG_TABLE_INDEX              0x1a14
+#define RADEON_FOG_TABLE_DATA               0x1a18
+#define RADEON_FP_CRTC_H_TOTAL_DISP         0x0250
+#define RADEON_FP_CRTC_V_TOTAL_DISP         0x0254
+#       define RADEON_FP_CRTC_H_TOTAL_MASK      0x000003ff
+#       define RADEON_FP_CRTC_H_DISP_MASK       0x01ff0000
+#       define RADEON_FP_CRTC_V_TOTAL_MASK      0x00000fff
+#       define RADEON_FP_CRTC_V_DISP_MASK       0x0fff0000
+#       define RADEON_FP_H_SYNC_STRT_CHAR_MASK  0x00001ff8
+#       define RADEON_FP_H_SYNC_WID_MASK        0x003f0000
+#       define RADEON_FP_V_SYNC_STRT_MASK       0x00000fff
+#       define RADEON_FP_V_SYNC_WID_MASK        0x001f0000
+#       define RADEON_FP_CRTC_H_TOTAL_SHIFT     0x00000000
+#       define RADEON_FP_CRTC_H_DISP_SHIFT      0x00000010
+#       define RADEON_FP_CRTC_V_TOTAL_SHIFT     0x00000000
+#       define RADEON_FP_CRTC_V_DISP_SHIFT      0x00000010
+#       define RADEON_FP_H_SYNC_STRT_CHAR_SHIFT 0x00000003
+#       define RADEON_FP_H_SYNC_WID_SHIFT       0x00000010
+#       define RADEON_FP_V_SYNC_STRT_SHIFT      0x00000000
+#       define RADEON_FP_V_SYNC_WID_SHIFT       0x00000010
+#define RADEON_FP_GEN_CNTL                  0x0284
+#       define RADEON_FP_FPON                  (1 <<  0)
+#       define RADEON_FP_BLANK_EN              (1 <<  1)
+#       define RADEON_FP_TMDS_EN               (1 <<  2)
+#       define RADEON_FP_PANEL_FORMAT          (1 <<  3)
+#       define RADEON_FP_EN_TMDS               (1 <<  7)
+#       define RADEON_FP_DETECT_SENSE          (1 <<  8)
+#       define R200_FP_SOURCE_SEL_MASK         (3 <<  10)
+#       define R200_FP_SOURCE_SEL_CRTC1        (0 <<  10)
+#       define R200_FP_SOURCE_SEL_CRTC2        (1 <<  10)
+#       define R200_FP_SOURCE_SEL_RMX          (2 <<  10)
+#       define R200_FP_SOURCE_SEL_TRANS        (3 <<  10)
+#       define RADEON_FP_SEL_CRTC1             (0 << 13)
+#       define RADEON_FP_SEL_CRTC2             (1 << 13)
+#       define RADEON_FP_CRTC_DONT_SHADOW_HPAR (1 << 15)
+#       define RADEON_FP_CRTC_DONT_SHADOW_VPAR (1 << 16)
+#       define RADEON_FP_CRTC_DONT_SHADOW_HEND (1 << 17)
+#       define RADEON_FP_CRTC_USE_SHADOW_VEND  (1 << 18)
+#       define RADEON_FP_RMX_HVSYNC_CONTROL_EN (1 << 20)
+#       define RADEON_FP_DFP_SYNC_SEL          (1 << 21)
+#       define RADEON_FP_CRTC_LOCK_8DOT        (1 << 22)
+#       define RADEON_FP_CRT_SYNC_SEL          (1 << 23)
+#       define RADEON_FP_USE_SHADOW_EN         (1 << 24)
+#       define RADEON_FP_CRT_SYNC_ALT          (1 << 26)
+#define RADEON_FP2_GEN_CNTL                 0x0288
+#       define RADEON_FP2_BLANK_EN             (1 <<  1)
+#       define RADEON_FP2_ON                   (1 <<  2)
+#       define RADEON_FP2_PANEL_FORMAT         (1 <<  3)
+#       define RADEON_FP2_DETECT_SENSE         (1 <<  8)
+#       define R200_FP2_SOURCE_SEL_MASK        (3 << 10)
+#       define R200_FP2_SOURCE_SEL_CRTC1       (0 << 10)
+#       define R200_FP2_SOURCE_SEL_CRTC2       (1 << 10)
+#       define R200_FP2_SOURCE_SEL_RMX         (2 << 10)
+#       define R200_FP2_SOURCE_SEL_TRANS_UNIT  (3 << 10)
+#       define RADEON_FP2_SRC_SEL_MASK         (3 << 13)
+#       define RADEON_FP2_SRC_SEL_CRTC2        (1 << 13)
+#       define RADEON_FP2_FP_POL               (1 << 16)
+#       define RADEON_FP2_LP_POL               (1 << 17)
+#       define RADEON_FP2_SCK_POL              (1 << 18)
+#       define RADEON_FP2_LCD_CNTL_MASK        (7 << 19)
+#       define RADEON_FP2_PAD_FLOP_EN          (1 << 22)
+#       define RADEON_FP2_CRC_EN               (1 << 23)
+#       define RADEON_FP2_CRC_READ_EN          (1 << 24)
+#       define RADEON_FP2_DVO_EN               (1 << 25)
+#       define RADEON_FP2_DVO_RATE_SEL_SDR     (1 << 26)
+#       define R200_FP2_DVO_RATE_SEL_SDR       (1 << 27)
+#       define R300_FP2_DVO_CLOCK_MODE_SINGLE  (1 << 28)
+#       define R300_FP2_DVO_DUAL_CHANNEL_EN    (1 << 29)
+#define RADEON_FP_H_SYNC_STRT_WID           0x02c4
+#define RADEON_FP_H2_SYNC_STRT_WID          0x03c4
+#define RADEON_FP_HORZ_STRETCH              0x028c
+#define RADEON_FP_HORZ2_STRETCH             0x038c
+#       define RADEON_HORZ_STRETCH_RATIO_MASK 0xffff
+#       define RADEON_HORZ_STRETCH_RATIO_MAX  4096
+#       define RADEON_HORZ_PANEL_SIZE         (0x1ff   << 16)
+#       define RADEON_HORZ_PANEL_SHIFT        16
+#       define RADEON_HORZ_STRETCH_PIXREP     (0      << 25)
+#       define RADEON_HORZ_STRETCH_BLEND      (1      << 26)
+#       define RADEON_HORZ_STRETCH_ENABLE     (1      << 25)
+#       define RADEON_HORZ_AUTO_RATIO         (1      << 27)
+#       define RADEON_HORZ_FP_LOOP_STRETCH    (0x7    << 28)
+#       define RADEON_HORZ_AUTO_RATIO_INC     (1      << 31)
+#define RADEON_FP_HORZ_VERT_ACTIVE          0x0278
+#define RADEON_FP_V_SYNC_STRT_WID           0x02c8
+#define RADEON_FP_VERT_STRETCH              0x0290
+#define RADEON_FP_V2_SYNC_STRT_WID          0x03c8
+#define RADEON_FP_VERT2_STRETCH             0x0390
+#       define RADEON_VERT_PANEL_SIZE          (0xfff << 12)
+#       define RADEON_VERT_PANEL_SHIFT         12
+#       define RADEON_VERT_STRETCH_RATIO_MASK  0xfff
+#       define RADEON_VERT_STRETCH_RATIO_SHIFT 0
+#       define RADEON_VERT_STRETCH_RATIO_MAX   4096
+#       define RADEON_VERT_STRETCH_ENABLE      (1     << 25)
+#       define RADEON_VERT_STRETCH_LINEREP     (0     << 26)
+#       define RADEON_VERT_STRETCH_BLEND       (1     << 26)
+#       define RADEON_VERT_AUTO_RATIO_EN       (1     << 27)
+#	define RADEON_VERT_AUTO_RATIO_INC      (1     << 31)
+#       define RADEON_VERT_STRETCH_RESERVED    0x71000000
+#define RS400_FP_2ND_GEN_CNTL               0x0384
+#       define RS400_FP_2ND_ON              (1 << 0)
+#       define RS400_FP_2ND_BLANK_EN        (1 << 1)
+#       define RS400_TMDS_2ND_EN            (1 << 2)
+#       define RS400_PANEL_FORMAT_2ND       (1 << 3)
+#       define RS400_FP_2ND_EN_TMDS         (1 << 7)
+#       define RS400_FP_2ND_DETECT_SENSE    (1 << 8)
+#       define RS400_FP_2ND_SOURCE_SEL_MASK        (3 << 10)
+#       define RS400_FP_2ND_SOURCE_SEL_CRTC1       (0 << 10)
+#       define RS400_FP_2ND_SOURCE_SEL_CRTC2       (1 << 10)
+#       define RS400_FP_2ND_SOURCE_SEL_RMX         (2 << 10)
+#       define RS400_FP_2ND_DETECT_EN       (1 << 12)
+#       define RS400_HPD_2ND_SEL            (1 << 13)
+#define RS400_FP2_2_GEN_CNTL                0x0388
+#       define RS400_FP2_2_BLANK_EN         (1 << 1)
+#       define RS400_FP2_2_ON               (1 << 2)
+#       define RS400_FP2_2_PANEL_FORMAT     (1 << 3)
+#       define RS400_FP2_2_DETECT_SENSE     (1 << 8)
+#       define RS400_FP2_2_SOURCE_SEL_MASK        (3 << 10)
+#       define RS400_FP2_2_SOURCE_SEL_CRTC1       (0 << 10)
+#       define RS400_FP2_2_SOURCE_SEL_CRTC2       (1 << 10)
+#       define RS400_FP2_2_SOURCE_SEL_RMX         (2 << 10)
+#       define RS400_FP2_2_DVO2_EN          (1 << 25)
+#define RS400_TMDS2_CNTL                    0x0394
+#define RS400_TMDS2_TRANSMITTER_CNTL        0x03a4
+#       define RS400_TMDS2_PLLEN            (1 << 0)
+#       define RS400_TMDS2_PLLRST           (1 << 1)
+
+#define RADEON_GEN_INT_CNTL                 0x0040
+#	define RADEON_SW_INT_ENABLE		(1 << 25)
+#define RADEON_GEN_INT_STATUS               0x0044
+#       define RADEON_VSYNC_INT_AK          (1 <<  2)
+#       define RADEON_VSYNC_INT             (1 <<  2)
+#       define RADEON_VSYNC2_INT_AK         (1 <<  6)
+#       define RADEON_VSYNC2_INT            (1 <<  6)
+#	define RADEON_SW_INT_FIRE		(1 << 26)
+#	define RADEON_SW_INT_TEST		(1 << 25)
+#	define RADEON_SW_INT_TEST_ACK		(1 << 25)
+#define RADEON_GENENB                       0x03c3 /* VGA */
+#define RADEON_GENFC_RD                     0x03ca /* VGA */
+#define RADEON_GENFC_WT                     0x03da /* VGA, 0x03ba */
+#define RADEON_GENMO_RD                     0x03cc /* VGA */
+#define RADEON_GENMO_WT                     0x03c2 /* VGA */
+#define RADEON_GENS0                        0x03c2 /* VGA */
+#define RADEON_GENS1                        0x03da /* VGA, 0x03ba */
+#define RADEON_GPIO_MONID                   0x0068 /* DDC interface via I2C */ /* DDC3 */
+#define RADEON_GPIO_MONIDB                  0x006c
+#define RADEON_GPIO_CRT2_DDC                0x006c
+#define RADEON_GPIO_DVI_DDC                 0x0064 /* DDC2 */
+#define RADEON_GPIO_VGA_DDC                 0x0060 /* DDC1 */
+#       define RADEON_GPIO_A_0              (1 <<  0)
+#       define RADEON_GPIO_A_1              (1 <<  1)
+#       define RADEON_GPIO_Y_0              (1 <<  8)
+#       define RADEON_GPIO_Y_1              (1 <<  9)
+#       define RADEON_GPIO_Y_SHIFT_0        8
+#       define RADEON_GPIO_Y_SHIFT_1        9
+#       define RADEON_GPIO_EN_0             (1 << 16)
+#       define RADEON_GPIO_EN_1             (1 << 17)
+#       define RADEON_GPIO_MASK_0           (1 << 24) /*??*/
+#       define RADEON_GPIO_MASK_1           (1 << 25) /*??*/
+#define RADEON_GRPH8_DATA                   0x03cf /* VGA */
+#define RADEON_GRPH8_IDX                    0x03ce /* VGA */
+#define RADEON_GUI_SCRATCH_REG0             0x15e0
+#define RADEON_GUI_SCRATCH_REG1             0x15e4
+#define RADEON_GUI_SCRATCH_REG2             0x15e8
+#define RADEON_GUI_SCRATCH_REG3             0x15ec
+#define RADEON_GUI_SCRATCH_REG4             0x15f0
+#define RADEON_GUI_SCRATCH_REG5             0x15f4
+
+#define RADEON_HEADER                       0x0f0e /* PCI */
+#define RADEON_HOST_DATA0                   0x17c0
+#define RADEON_HOST_DATA1                   0x17c4
+#define RADEON_HOST_DATA2                   0x17c8
+#define RADEON_HOST_DATA3                   0x17cc
+#define RADEON_HOST_DATA4                   0x17d0
+#define RADEON_HOST_DATA5                   0x17d4
+#define RADEON_HOST_DATA6                   0x17d8
+#define RADEON_HOST_DATA7                   0x17dc
+#define RADEON_HOST_DATA_LAST               0x17e0
+#define RADEON_HOST_PATH_CNTL               0x0130
+#	define RADEON_HP_LIN_RD_CACHE_DIS   (1 << 24)
+#	define RADEON_HDP_READ_BUFFER_INVALIDATE   (1 << 27)
+#       define RADEON_HDP_SOFT_RESET        (1 << 26)
+#       define RADEON_HDP_APER_CNTL         (1 << 23)
+#define RADEON_HTOTAL_CNTL                  0x0009 /* PLL */
+#       define RADEON_HTOT_CNTL_VGA_EN      (1 << 28)
+#define RADEON_HTOTAL2_CNTL                 0x002e /* PLL */
+
+       /* Multimedia I2C bus */
+#define RADEON_I2C_CNTL_0		    0x0090
+#define RADEON_I2C_DONE (1<<0)
+#define RADEON_I2C_NACK (1<<1)
+#define RADEON_I2C_HALT (1<<2)
+#define RADEON_I2C_SOFT_RST (1<<5)
+#define RADEON_I2C_DRIVE_EN (1<<6)
+#define RADEON_I2C_DRIVE_SEL (1<<7)
+#define RADEON_I2C_START (1<<8)
+#define RADEON_I2C_STOP (1<<9)
+#define RADEON_I2C_RECEIVE (1<<10)
+#define RADEON_I2C_ABORT (1<<11)
+#define RADEON_I2C_GO (1<<12)
+#define RADEON_I2C_CNTL_1                   0x0094
+#define RADEON_I2C_SEL         (1<<16)
+#define RADEON_I2C_EN          (1<<17)
+#define RADEON_I2C_DATA			    0x0098
+
+#define RADEON_DVI_I2C_CNTL_0		    0x02e0
+#       define R200_DVI_I2C_PIN_SEL(x)      ((x) << 3)
+#       define R200_SEL_DDC1                0 /* 0x60 - VGA_DDC */
+#       define R200_SEL_DDC2                1 /* 0x64 - DVI_DDC */
+#       define R200_SEL_DDC3                2 /* 0x68 - MONID_DDC */
+#define RADEON_DVI_I2C_CNTL_1               0x02e4 /* ? */
+#define RADEON_DVI_I2C_DATA		    0x02e8
+
+#define RADEON_INTERRUPT_LINE               0x0f3c /* PCI */
+#define RADEON_INTERRUPT_PIN                0x0f3d /* PCI */
+#define RADEON_IO_BASE                      0x0f14 /* PCI */
+
+#define RADEON_LATENCY                      0x0f0d /* PCI */
+#define RADEON_LEAD_BRES_DEC                0x1608
+#define RADEON_LEAD_BRES_LNTH               0x161c
+#define RADEON_LEAD_BRES_LNTH_SUB           0x1624
+#define RADEON_LVDS_GEN_CNTL                0x02d0
+#       define RADEON_LVDS_ON               (1   <<  0)
+#       define RADEON_LVDS_DISPLAY_DIS      (1   <<  1)
+#       define RADEON_LVDS_PANEL_TYPE       (1   <<  2)
+#       define RADEON_LVDS_PANEL_FORMAT     (1   <<  3)
+#       define RADEON_LVDS_NO_FM            (0   <<  4)
+#       define RADEON_LVDS_2_GREY           (1   <<  4)
+#       define RADEON_LVDS_4_GREY           (2   <<  4)
+#       define RADEON_LVDS_RST_FM           (1   <<  6)
+#       define RADEON_LVDS_EN               (1   <<  7)
+#       define RADEON_LVDS_BL_MOD_LEVEL_SHIFT 8
+#       define RADEON_LVDS_BL_MOD_LEVEL_MASK (0xff << 8)
+#       define RADEON_LVDS_BL_MOD_EN        (1   << 16)
+#       define RADEON_LVDS_BL_CLK_SEL       (1   << 17)
+#       define RADEON_LVDS_DIGON            (1   << 18)
+#       define RADEON_LVDS_BLON             (1   << 19)
+#       define RADEON_LVDS_FP_POL_LOW       (1   << 20)
+#       define RADEON_LVDS_LP_POL_LOW       (1   << 21)
+#       define RADEON_LVDS_DTM_POL_LOW      (1   << 22)
+#       define RADEON_LVDS_SEL_CRTC2        (1   << 23)
+#       define RADEON_LVDS_FPDI_EN          (1   << 27)
+#       define RADEON_LVDS_HSYNC_DELAY_SHIFT        28
+#define RADEON_LVDS_PLL_CNTL                0x02d4
+#       define RADEON_HSYNC_DELAY_SHIFT     28
+#       define RADEON_HSYNC_DELAY_MASK      (0xf << 28)
+#       define RADEON_LVDS_PLL_EN           (1   << 16)
+#       define RADEON_LVDS_PLL_RESET        (1   << 17)
+#       define R300_LVDS_SRC_SEL_MASK       (3   << 18)
+#       define R300_LVDS_SRC_SEL_CRTC1      (0   << 18)
+#       define R300_LVDS_SRC_SEL_CRTC2      (1   << 18)
+#       define R300_LVDS_SRC_SEL_RMX        (2   << 18)
+#define RADEON_LVDS_SS_GEN_CNTL             0x02ec
+#       define RADEON_LVDS_PWRSEQ_DELAY1_SHIFT     16
+#       define RADEON_LVDS_PWRSEQ_DELAY2_SHIFT     20
+
+#define RADEON_MAX_LATENCY                  0x0f3f /* PCI */
+#define RADEON_DISPLAY_BASE_ADDR            0x23c
+#define RADEON_DISPLAY2_BASE_ADDR           0x33c
+#define RADEON_OV0_BASE_ADDR                0x43c
+#define RADEON_NB_TOM                       0x15c
+#define R300_MC_INIT_MISC_LAT_TIMER         0x180
+#       define R300_MC_DISP0R_INIT_LAT_SHIFT 8
+#       define R300_MC_DISP0R_INIT_LAT_MASK  0xf
+#       define R300_MC_DISP1R_INIT_LAT_SHIFT 12
+#       define R300_MC_DISP1R_INIT_LAT_MASK  0xf
+#define RADEON_MCLK_CNTL                    0x0012 /* PLL */
+#       define RADEON_MCLKA_SRC_SEL_MASK    0x7
+#       define RADEON_FORCEON_MCLKA         (1 << 16)
+#       define RADEON_FORCEON_MCLKB         (1 << 17)
+#       define RADEON_FORCEON_YCLKA         (1 << 18)
+#       define RADEON_FORCEON_YCLKB         (1 << 19)
+#       define RADEON_FORCEON_MC            (1 << 20)
+#       define RADEON_FORCEON_AIC           (1 << 21)
+#       define R300_DISABLE_MC_MCLKA        (1 << 21)
+#       define R300_DISABLE_MC_MCLKB        (1 << 21)
+#define RADEON_MCLK_MISC                    0x001f /* PLL */
+#       define RADEON_MC_MCLK_MAX_DYN_STOP_LAT (1 << 12)
+#       define RADEON_IO_MCLK_MAX_DYN_STOP_LAT (1 << 13)
+#       define RADEON_MC_MCLK_DYN_ENABLE    (1 << 14)
+#       define RADEON_IO_MCLK_DYN_ENABLE    (1 << 15)
+#define RADEON_LCD_GPIO_MASK                0x01a0
+#define RADEON_GPIOPAD_EN                   0x01a0
+#define RADEON_LCD_GPIO_Y_REG               0x01a4
+#define RADEON_MDGPIO_A_REG                 0x01ac
+#define RADEON_MDGPIO_EN_REG                0x01b0
+#define RADEON_MDGPIO_MASK                  0x0198
+#define RADEON_GPIOPAD_MASK                 0x0198
+#define RADEON_GPIOPAD_A		    0x019c
+#define RADEON_MDGPIO_Y_REG                 0x01b4
+#define RADEON_MEM_ADDR_CONFIG              0x0148
+#define RADEON_MEM_BASE                     0x0f10 /* PCI */
+#define RADEON_MEM_CNTL                     0x0140
+#       define RADEON_MEM_NUM_CHANNELS_MASK 0x01
+#       define RADEON_MEM_USE_B_CH_ONLY     (1 <<  1)
+#       define RV100_HALF_MODE              (1 <<  3)
+#       define R300_MEM_NUM_CHANNELS_MASK   0x03
+#       define R300_MEM_USE_CD_CH_ONLY      (1 <<  2)
+#define RADEON_MEM_TIMING_CNTL              0x0144 /* EXT_MEM_CNTL */
+#define RADEON_MEM_INIT_LAT_TIMER           0x0154
+#define RADEON_MEM_INTF_CNTL                0x014c
+#define RADEON_MEM_SDRAM_MODE_REG           0x0158
+#       define RADEON_SDRAM_MODE_MASK       0xffff0000
+#       define RADEON_B3MEM_RESET_MASK      0x6fffffff
+#       define RADEON_MEM_CFG_TYPE_DDR      (1 << 30)
+#define RADEON_MEM_STR_CNTL                 0x0150
+#       define RADEON_MEM_PWRUP_COMPL_A     (1 <<  0)
+#       define RADEON_MEM_PWRUP_COMPL_B     (1 <<  1)
+#       define R300_MEM_PWRUP_COMPL_C       (1 <<  2)
+#       define R300_MEM_PWRUP_COMPL_D       (1 <<  3)
+#       define RADEON_MEM_PWRUP_COMPLETE    0x03
+#       define R300_MEM_PWRUP_COMPLETE      0x0f
+#define RADEON_MC_STATUS                    0x0150
+#       define RADEON_MC_IDLE               (1 << 2)
+#       define R300_MC_IDLE                 (1 << 4)
+#define RADEON_MEM_VGA_RP_SEL               0x003c
+#define RADEON_MEM_VGA_WP_SEL               0x0038
+#define RADEON_MIN_GRANT                    0x0f3e /* PCI */
+#define RADEON_MM_DATA                      0x0004
+#define RADEON_MM_INDEX                     0x0000
+#	define RADEON_MM_APER		(1 << 31)
+#define RADEON_MPLL_CNTL                    0x000e /* PLL */
+#define RADEON_MPP_TB_CONFIG                0x01c0 /* ? */
+#define RADEON_MPP_GP_CONFIG                0x01c8 /* ? */
+#define RADEON_SEPROM_CNTL1                 0x01c0
+#       define RADEON_SCK_PRESCALE_SHIFT    24
+#       define RADEON_SCK_PRESCALE_MASK     (0xff << 24)
+#define R300_MC_IND_INDEX                   0x01f8
+#       define R300_MC_IND_ADDR_MASK        0x3f
+#       define R300_MC_IND_WR_EN            (1 << 8)
+#define R300_MC_IND_DATA                    0x01fc
+#define R300_MC_READ_CNTL_AB                0x017c
+#       define R300_MEM_RBS_POSITION_A_MASK 0x03
+#define R300_MC_READ_CNTL_CD_mcind	    0x24
+#       define R300_MEM_RBS_POSITION_C_MASK 0x03
+
+#define RADEON_N_VIF_COUNT                  0x0248
+
+#define RADEON_OV0_AUTO_FLIP_CNTL           0x0470
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_SOFT_BUF_NUM        0x00000007
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_SOFT_REPEAT_FIELD   0x00000008
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_SOFT_BUF_ODD        0x00000010
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_IGNORE_REPEAT_FIELD 0x00000020
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_SOFT_EOF_TOGGLE     0x00000040
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_VID_PORT_SELECT     0x00000300
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_P1_FIRST_LINE_EVEN  0x00010000
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_SHIFT_EVEN_DOWN     0x00040000
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_SHIFT_ODD_DOWN      0x00080000
+#       define  RADEON_OV0_AUTO_FLIP_CNTL_FIELD_POL_SOURCE    0x00800000
+
+#define RADEON_OV0_COLOUR_CNTL              0x04E0
+#define RADEON_OV0_DEINTERLACE_PATTERN      0x0474
+#define RADEON_OV0_EXCLUSIVE_HORZ           0x0408
+#       define  RADEON_EXCL_HORZ_START_MASK        0x000000ff
+#       define  RADEON_EXCL_HORZ_END_MASK          0x0000ff00
+#       define  RADEON_EXCL_HORZ_BACK_PORCH_MASK   0x00ff0000
+#       define  RADEON_EXCL_HORZ_EXCLUSIVE_EN      0x80000000
+#define RADEON_OV0_EXCLUSIVE_VERT           0x040C
+#       define  RADEON_EXCL_VERT_START_MASK        0x000003ff
+#       define  RADEON_EXCL_VERT_END_MASK          0x03ff0000
+#define RADEON_OV0_FILTER_CNTL              0x04A0
+#       define RADEON_FILTER_PROGRAMMABLE_COEF            0x0
+#       define RADEON_FILTER_HC_COEF_HORZ_Y               0x1
+#       define RADEON_FILTER_HC_COEF_HORZ_UV              0x2
+#       define RADEON_FILTER_HC_COEF_VERT_Y               0x4
+#       define RADEON_FILTER_HC_COEF_VERT_UV              0x8
+#       define RADEON_FILTER_HARDCODED_COEF               0xf
+#       define RADEON_FILTER_COEF_MASK                    0xf
+
+#define RADEON_OV0_FOUR_TAP_COEF_0          0x04B0
+#define RADEON_OV0_FOUR_TAP_COEF_1          0x04B4
+#define RADEON_OV0_FOUR_TAP_COEF_2          0x04B8
+#define RADEON_OV0_FOUR_TAP_COEF_3          0x04BC
+#define RADEON_OV0_FOUR_TAP_COEF_4          0x04C0
+#define RADEON_OV0_FLAG_CNTL                0x04DC
+#define RADEON_OV0_GAMMA_000_00F            0x0d40
+#define RADEON_OV0_GAMMA_010_01F            0x0d44
+#define RADEON_OV0_GAMMA_020_03F            0x0d48
+#define RADEON_OV0_GAMMA_040_07F            0x0d4c
+#define RADEON_OV0_GAMMA_080_0BF            0x0e00
+#define RADEON_OV0_GAMMA_0C0_0FF            0x0e04
+#define RADEON_OV0_GAMMA_100_13F            0x0e08
+#define RADEON_OV0_GAMMA_140_17F            0x0e0c
+#define RADEON_OV0_GAMMA_180_1BF            0x0e10
+#define RADEON_OV0_GAMMA_1C0_1FF            0x0e14
+#define RADEON_OV0_GAMMA_200_23F            0x0e18
+#define RADEON_OV0_GAMMA_240_27F            0x0e1c
+#define RADEON_OV0_GAMMA_280_2BF            0x0e20
+#define RADEON_OV0_GAMMA_2C0_2FF            0x0e24
+#define RADEON_OV0_GAMMA_300_33F            0x0e28
+#define RADEON_OV0_GAMMA_340_37F            0x0e2c
+#define RADEON_OV0_GAMMA_380_3BF            0x0d50
+#define RADEON_OV0_GAMMA_3C0_3FF            0x0d54
+#define RADEON_OV0_GRAPHICS_KEY_CLR_LOW     0x04EC
+#define RADEON_OV0_GRAPHICS_KEY_CLR_HIGH    0x04F0
+#define RADEON_OV0_H_INC                    0x0480
+#define RADEON_OV0_KEY_CNTL                 0x04F4
+#       define  RADEON_VIDEO_KEY_FN_MASK    0x00000003L
+#       define  RADEON_VIDEO_KEY_FN_FALSE   0x00000000L
+#       define  RADEON_VIDEO_KEY_FN_TRUE    0x00000001L
+#       define  RADEON_VIDEO_KEY_FN_EQ      0x00000002L
+#       define  RADEON_VIDEO_KEY_FN_NE      0x00000003L
+#       define  RADEON_GRAPHIC_KEY_FN_MASK  0x00000030L
+#       define  RADEON_GRAPHIC_KEY_FN_FALSE 0x00000000L
+#       define  RADEON_GRAPHIC_KEY_FN_TRUE  0x00000010L
+#       define  RADEON_GRAPHIC_KEY_FN_EQ    0x00000020L
+#       define  RADEON_GRAPHIC_KEY_FN_NE    0x00000030L
+#       define  RADEON_CMP_MIX_MASK         0x00000100L
+#       define  RADEON_CMP_MIX_OR           0x00000000L
+#       define  RADEON_CMP_MIX_AND          0x00000100L
+#define RADEON_OV0_LIN_TRANS_A              0x0d20
+#define RADEON_OV0_LIN_TRANS_B              0x0d24
+#define RADEON_OV0_LIN_TRANS_C              0x0d28
+#define RADEON_OV0_LIN_TRANS_D              0x0d2c
+#define RADEON_OV0_LIN_TRANS_E              0x0d30
+#define RADEON_OV0_LIN_TRANS_F              0x0d34
+#define RADEON_OV0_P1_BLANK_LINES_AT_TOP    0x0430
+#       define  RADEON_P1_BLNK_LN_AT_TOP_M1_MASK   0x00000fffL
+#       define  RADEON_P1_ACTIVE_LINES_M1          0x0fff0000L
+#define RADEON_OV0_P1_H_ACCUM_INIT          0x0488
+#define RADEON_OV0_P1_V_ACCUM_INIT          0x0428
+#       define  RADEON_OV0_P1_MAX_LN_IN_PER_LN_OUT 0x00000003L
+#       define  RADEON_OV0_P1_V_ACCUM_INIT_MASK    0x01ff8000L
+#define RADEON_OV0_P1_X_START_END           0x0494
+#define RADEON_OV0_P2_X_START_END           0x0498
+#define RADEON_OV0_P23_BLANK_LINES_AT_TOP   0x0434
+#       define  RADEON_P23_BLNK_LN_AT_TOP_M1_MASK  0x000007ffL
+#       define  RADEON_P23_ACTIVE_LINES_M1         0x07ff0000L
+#define RADEON_OV0_P23_H_ACCUM_INIT         0x048C
+#define RADEON_OV0_P23_V_ACCUM_INIT         0x042C
+#define RADEON_OV0_P3_X_START_END           0x049C
+#define RADEON_OV0_REG_LOAD_CNTL            0x0410
+#       define  RADEON_REG_LD_CTL_LOCK                 0x00000001L
+#       define  RADEON_REG_LD_CTL_VBLANK_DURING_LOCK   0x00000002L
+#       define  RADEON_REG_LD_CTL_STALL_GUI_UNTIL_FLIP 0x00000004L
+#       define  RADEON_REG_LD_CTL_LOCK_READBACK        0x00000008L
+#       define  RADEON_REG_LD_CTL_FLIP_READBACK        0x00000010L
+#define RADEON_OV0_SCALE_CNTL               0x0420
+#       define  RADEON_SCALER_HORZ_PICK_NEAREST    0x00000004L
+#       define  RADEON_SCALER_VERT_PICK_NEAREST    0x00000008L
+#       define  RADEON_SCALER_SIGNED_UV            0x00000010L
+#       define  RADEON_SCALER_GAMMA_SEL_MASK       0x00000060L
+#       define  RADEON_SCALER_GAMMA_SEL_BRIGHT     0x00000000L
+#       define  RADEON_SCALER_GAMMA_SEL_G22        0x00000020L
+#       define  RADEON_SCALER_GAMMA_SEL_G18        0x00000040L
+#       define  RADEON_SCALER_GAMMA_SEL_G14        0x00000060L
+#       define  RADEON_SCALER_COMCORE_SHIFT_UP_ONE 0x00000080L
+#       define  RADEON_SCALER_SURFAC_FORMAT        0x00000f00L
+#       define  RADEON_SCALER_SOURCE_15BPP         0x00000300L
+#       define  RADEON_SCALER_SOURCE_16BPP         0x00000400L
+#       define  RADEON_SCALER_SOURCE_32BPP         0x00000600L
+#       define  RADEON_SCALER_SOURCE_YUV9          0x00000900L
+#       define  RADEON_SCALER_SOURCE_YUV12         0x00000A00L
+#       define  RADEON_SCALER_SOURCE_VYUY422       0x00000B00L
+#       define  RADEON_SCALER_SOURCE_YVYU422       0x00000C00L
+#       define  RADEON_SCALER_ADAPTIVE_DEINT       0x00001000L
+#       define  RADEON_SCALER_TEMPORAL_DEINT       0x00002000L
+#       define  RADEON_SCALER_CRTC_SEL             0x00004000L
+#       define  RADEON_SCALER_SMART_SWITCH         0x00008000L
+#       define  RADEON_SCALER_BURST_PER_PLANE      0x007F0000L
+#       define  RADEON_SCALER_DOUBLE_BUFFER        0x01000000L
+#       define  RADEON_SCALER_DIS_LIMIT            0x08000000L
+#       define  RADEON_SCALER_LIN_TRANS_BYPASS     0x10000000L
+#       define  RADEON_SCALER_INT_EMU              0x20000000L
+#       define  RADEON_SCALER_ENABLE               0x40000000L
+#       define  RADEON_SCALER_SOFT_RESET           0x80000000L
+#define RADEON_OV0_STEP_BY                  0x0484
+#define RADEON_OV0_TEST                     0x04F8
+#define RADEON_OV0_V_INC                    0x0424
+#define RADEON_OV0_VID_BUF_PITCH0_VALUE     0x0460
+#define RADEON_OV0_VID_BUF_PITCH1_VALUE     0x0464
+#define RADEON_OV0_VID_BUF0_BASE_ADRS       0x0440
+#       define  RADEON_VIF_BUF0_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF0_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF0_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF0_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF1_BASE_ADRS       0x0444
+#       define  RADEON_VIF_BUF1_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF1_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF1_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF1_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF2_BASE_ADRS       0x0448
+#       define  RADEON_VIF_BUF2_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF2_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF2_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF2_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF3_BASE_ADRS       0x044C
+#define RADEON_OV0_VID_BUF4_BASE_ADRS       0x0450
+#define RADEON_OV0_VID_BUF5_BASE_ADRS       0x0454
+#define RADEON_OV0_VIDEO_KEY_CLR_HIGH       0x04E8
+#define RADEON_OV0_VIDEO_KEY_CLR_LOW        0x04E4
+#define RADEON_OV0_Y_X_START                0x0400
+#define RADEON_OV0_Y_X_END                  0x0404
+#define RADEON_OV1_Y_X_START                0x0600
+#define RADEON_OV1_Y_X_END                  0x0604
+#define RADEON_OVR_CLR                      0x0230
+#define RADEON_OVR_WID_LEFT_RIGHT           0x0234
+#define RADEON_OVR_WID_TOP_BOTTOM           0x0238
+
+/* first capture unit */
+
+#define RADEON_CAP0_BUF0_OFFSET           0x0920
+#define RADEON_CAP0_BUF1_OFFSET           0x0924
+#define RADEON_CAP0_BUF0_EVEN_OFFSET      0x0928
+#define RADEON_CAP0_BUF1_EVEN_OFFSET      0x092C
+
+#define RADEON_CAP0_BUF_PITCH             0x0930
+#define RADEON_CAP0_V_WINDOW              0x0934
+#define RADEON_CAP0_H_WINDOW              0x0938
+#define RADEON_CAP0_VBI0_OFFSET           0x093C
+#define RADEON_CAP0_VBI1_OFFSET           0x0940
+#define RADEON_CAP0_VBI_V_WINDOW          0x0944
+#define RADEON_CAP0_VBI_H_WINDOW          0x0948
+#define RADEON_CAP0_PORT_MODE_CNTL        0x094C
+#define RADEON_CAP0_TRIG_CNTL             0x0950
+#define RADEON_CAP0_DEBUG                 0x0954
+#define RADEON_CAP0_CONFIG                0x0958
+#       define RADEON_CAP0_CONFIG_CONTINUOS          0x00000001
+#       define RADEON_CAP0_CONFIG_START_FIELD_EVEN   0x00000002
+#       define RADEON_CAP0_CONFIG_START_BUF_GET      0x00000004
+#       define RADEON_CAP0_CONFIG_START_BUF_SET      0x00000008
+#       define RADEON_CAP0_CONFIG_BUF_TYPE_ALT       0x00000010
+#       define RADEON_CAP0_CONFIG_BUF_TYPE_FRAME     0x00000020
+#       define RADEON_CAP0_CONFIG_ONESHOT_MODE_FRAME 0x00000040
+#       define RADEON_CAP0_CONFIG_BUF_MODE_DOUBLE    0x00000080
+#       define RADEON_CAP0_CONFIG_BUF_MODE_TRIPLE    0x00000100
+#       define RADEON_CAP0_CONFIG_MIRROR_EN          0x00000200
+#       define RADEON_CAP0_CONFIG_ONESHOT_MIRROR_EN  0x00000400
+#       define RADEON_CAP0_CONFIG_VIDEO_SIGNED_UV    0x00000800
+#       define RADEON_CAP0_CONFIG_ANC_DECODE_EN      0x00001000
+#       define RADEON_CAP0_CONFIG_VBI_EN             0x00002000
+#       define RADEON_CAP0_CONFIG_SOFT_PULL_DOWN_EN  0x00004000
+#       define RADEON_CAP0_CONFIG_VIP_EXTEND_FLAG_EN 0x00008000
+#       define RADEON_CAP0_CONFIG_FAKE_FIELD_EN      0x00010000
+#       define RADEON_CAP0_CONFIG_ODD_ONE_MORE_LINE  0x00020000
+#       define RADEON_CAP0_CONFIG_EVEN_ONE_MORE_LINE 0x00040000
+#       define RADEON_CAP0_CONFIG_HORZ_DIVIDE_2      0x00080000
+#       define RADEON_CAP0_CONFIG_HORZ_DIVIDE_4      0x00100000
+#       define RADEON_CAP0_CONFIG_VERT_DIVIDE_2      0x00200000
+#       define RADEON_CAP0_CONFIG_VERT_DIVIDE_4      0x00400000
+#       define RADEON_CAP0_CONFIG_FORMAT_BROOKTREE   0x00000000
+#       define RADEON_CAP0_CONFIG_FORMAT_CCIR656     0x00800000
+#       define RADEON_CAP0_CONFIG_FORMAT_ZV          0x01000000
+#       define RADEON_CAP0_CONFIG_FORMAT_VIP         0x01800000
+#       define RADEON_CAP0_CONFIG_FORMAT_TRANSPORT   0x02000000
+#       define RADEON_CAP0_CONFIG_HORZ_DECIMATOR     0x04000000
+#       define RADEON_CAP0_CONFIG_VIDEO_IN_YVYU422   0x00000000
+#       define RADEON_CAP0_CONFIG_VIDEO_IN_VYUY422   0x20000000
+#       define RADEON_CAP0_CONFIG_VBI_DIVIDE_2       0x40000000
+#       define RADEON_CAP0_CONFIG_VBI_DIVIDE_4       0x80000000
+#define RADEON_CAP0_ANC_ODD_OFFSET        0x095C
+#define RADEON_CAP0_ANC_EVEN_OFFSET       0x0960
+#define RADEON_CAP0_ANC_H_WINDOW          0x0964
+#define RADEON_CAP0_VIDEO_SYNC_TEST       0x0968
+#define RADEON_CAP0_ONESHOT_BUF_OFFSET    0x096C
+#define RADEON_CAP0_BUF_STATUS            0x0970
+/* #define RADEON_CAP0_DWNSC_XRATIO       0x0978 */
+/* #define RADEON_CAP0_XSHARPNESS                 0x097C */
+#define RADEON_CAP0_VBI2_OFFSET           0x0980
+#define RADEON_CAP0_VBI3_OFFSET           0x0984
+#define RADEON_CAP0_ANC2_OFFSET           0x0988
+#define RADEON_CAP0_ANC3_OFFSET           0x098C
+#define RADEON_VID_BUFFER_CONTROL         0x0900
+
+/* second capture unit */
+
+#define RADEON_CAP1_BUF0_OFFSET           0x0990
+#define RADEON_CAP1_BUF1_OFFSET           0x0994
+#define RADEON_CAP1_BUF0_EVEN_OFFSET      0x0998
+#define RADEON_CAP1_BUF1_EVEN_OFFSET      0x099C
+
+#define RADEON_CAP1_BUF_PITCH             0x09A0
+#define RADEON_CAP1_V_WINDOW              0x09A4
+#define RADEON_CAP1_H_WINDOW              0x09A8
+#define RADEON_CAP1_VBI_ODD_OFFSET        0x09AC
+#define RADEON_CAP1_VBI_EVEN_OFFSET       0x09B0
+#define RADEON_CAP1_VBI_V_WINDOW                  0x09B4
+#define RADEON_CAP1_VBI_H_WINDOW                  0x09B8
+#define RADEON_CAP1_PORT_MODE_CNTL        0x09BC
+#define RADEON_CAP1_TRIG_CNTL             0x09C0
+#define RADEON_CAP1_DEBUG                         0x09C4
+#define RADEON_CAP1_CONFIG                0x09C8
+#define RADEON_CAP1_ANC_ODD_OFFSET        0x09CC
+#define RADEON_CAP1_ANC_EVEN_OFFSET       0x09D0
+#define RADEON_CAP1_ANC_H_WINDOW                  0x09D4
+#define RADEON_CAP1_VIDEO_SYNC_TEST       0x09D8
+#define RADEON_CAP1_ONESHOT_BUF_OFFSET    0x09DC
+#define RADEON_CAP1_BUF_STATUS            0x09E0
+#define RADEON_CAP1_DWNSC_XRATIO                  0x09E8
+#define RADEON_CAP1_XSHARPNESS            0x09EC
+
+/* misc multimedia registers */
+
+#define RADEON_IDCT_RUNS                  0x1F80
+#define RADEON_IDCT_LEVELS                0x1F84
+#define RADEON_IDCT_CONTROL               0x1FBC
+#define RADEON_IDCT_AUTH_CONTROL          0x1F88
+#define RADEON_IDCT_AUTH                  0x1F8C
+
+#define RADEON_P2PLL_CNTL                   0x002a /* P2PLL */
+#       define RADEON_P2PLL_RESET                (1 <<  0)
+#       define RADEON_P2PLL_SLEEP                (1 <<  1)
+#       define RADEON_P2PLL_PVG_MASK             (7 << 11)
+#       define RADEON_P2PLL_PVG_SHIFT            11
+#       define RADEON_P2PLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define RADEON_P2PLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#       define RADEON_P2PLL_ATOMIC_UPDATE_VSYNC  (1 << 18)
+#define RADEON_P2PLL_DIV_0                  0x002c
+#       define RADEON_P2PLL_FB0_DIV_MASK    0x07ff
+#       define RADEON_P2PLL_POST0_DIV_MASK  0x00070000
+#define RADEON_P2PLL_REF_DIV                0x002B /* PLL */
+#       define RADEON_P2PLL_REF_DIV_MASK    0x03ff
+#       define RADEON_P2PLL_ATOMIC_UPDATE_R (1 << 15) /* same as _W */
+#       define RADEON_P2PLL_ATOMIC_UPDATE_W (1 << 15) /* same as _R */
+#       define R300_PPLL_REF_DIV_ACC_MASK   (0x3ff << 18)
+#       define R300_PPLL_REF_DIV_ACC_SHIFT  18
+#define RADEON_PALETTE_DATA                 0x00b4
+#define RADEON_PALETTE_30_DATA              0x00b8
+#define RADEON_PALETTE_INDEX                0x00b0
+#define RADEON_PCI_GART_PAGE                0x017c
+#define RADEON_PIXCLKS_CNTL                 0x002d
+#       define RADEON_PIX2CLK_SRC_SEL_MASK     0x03
+#       define RADEON_PIX2CLK_SRC_SEL_CPUCLK   0x00
+#       define RADEON_PIX2CLK_SRC_SEL_PSCANCLK 0x01
+#       define RADEON_PIX2CLK_SRC_SEL_BYTECLK  0x02
+#       define RADEON_PIX2CLK_SRC_SEL_P2PLLCLK 0x03
+#       define RADEON_PIX2CLK_ALWAYS_ONb       (1<<6)
+#       define RADEON_PIX2CLK_DAC_ALWAYS_ONb   (1<<7)
+#       define RADEON_PIXCLK_TV_SRC_SEL        (1 << 8)
+#       define RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb (1 << 9)
+#       define R300_DVOCLK_ALWAYS_ONb          (1 << 10)
+#       define RADEON_PIXCLK_BLEND_ALWAYS_ONb  (1 << 11)
+#       define RADEON_PIXCLK_GV_ALWAYS_ONb     (1 << 12)
+#       define RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb (1 << 13)
+#       define R300_PIXCLK_DVO_ALWAYS_ONb      (1 << 13)
+#       define RADEON_PIXCLK_LVDS_ALWAYS_ONb   (1 << 14)
+#       define RADEON_PIXCLK_TMDS_ALWAYS_ONb   (1 << 15)
+#       define R300_PIXCLK_TRANS_ALWAYS_ONb    (1 << 16)
+#       define R300_PIXCLK_TVO_ALWAYS_ONb      (1 << 17)
+#       define R300_P2G2CLK_ALWAYS_ONb         (1 << 18)
+#       define R300_P2G2CLK_DAC_ALWAYS_ONb     (1 << 19)
+#       define R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF (1 << 23)
+#define RADEON_PLANE_3D_MASK_C              0x1d44
+#define RADEON_PLL_TEST_CNTL                0x0013 /* PLL */
+#       define RADEON_PLL_MASK_READ_B          (1 << 9)
+#define RADEON_PMI_CAP_ID                   0x0f5c /* PCI */
+#define RADEON_PMI_DATA                     0x0f63 /* PCI */
+#define RADEON_PMI_NXT_CAP_PTR              0x0f5d /* PCI */
+#define RADEON_PMI_PMC_REG                  0x0f5e /* PCI */
+#define RADEON_PMI_PMCSR_REG                0x0f60 /* PCI */
+#define RADEON_PMI_REGISTER                 0x0f5c /* PCI */
+#define RADEON_PPLL_CNTL                    0x0002 /* PLL */
+#       define RADEON_PPLL_RESET                (1 <<  0)
+#       define RADEON_PPLL_SLEEP                (1 <<  1)
+#       define RADEON_PPLL_PVG_MASK             (7 << 11)
+#       define RADEON_PPLL_PVG_SHIFT            11
+#       define RADEON_PPLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define RADEON_PPLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#       define RADEON_PPLL_ATOMIC_UPDATE_VSYNC  (1 << 18)
+#define RADEON_PPLL_DIV_0                   0x0004 /* PLL */
+#define RADEON_PPLL_DIV_1                   0x0005 /* PLL */
+#define RADEON_PPLL_DIV_2                   0x0006 /* PLL */
+#define RADEON_PPLL_DIV_3                   0x0007 /* PLL */
+#       define RADEON_PPLL_FB3_DIV_MASK     0x07ff
+#       define RADEON_PPLL_POST3_DIV_MASK   0x00070000
+#define RADEON_PPLL_REF_DIV                 0x0003 /* PLL */
+#       define RADEON_PPLL_REF_DIV_MASK     0x03ff
+#       define RADEON_PPLL_ATOMIC_UPDATE_R  (1 << 15) /* same as _W */
+#       define RADEON_PPLL_ATOMIC_UPDATE_W  (1 << 15) /* same as _R */
+#define RADEON_PWR_MNGMT_CNTL_STATUS        0x0f60 /* PCI */
+
+#define RADEON_RBBM_GUICNTL                 0x172c
+#       define RADEON_HOST_DATA_SWAP_NONE   (0 << 0)
+#       define RADEON_HOST_DATA_SWAP_16BIT  (1 << 0)
+#       define RADEON_HOST_DATA_SWAP_32BIT  (2 << 0)
+#       define RADEON_HOST_DATA_SWAP_HDW    (3 << 0)
+#define RADEON_RBBM_SOFT_RESET              0x00f0
+#       define RADEON_SOFT_RESET_CP         (1 <<  0)
+#       define RADEON_SOFT_RESET_HI         (1 <<  1)
+#       define RADEON_SOFT_RESET_SE         (1 <<  2)
+#       define RADEON_SOFT_RESET_RE         (1 <<  3)
+#       define RADEON_SOFT_RESET_PP         (1 <<  4)
+#       define RADEON_SOFT_RESET_E2         (1 <<  5)
+#       define RADEON_SOFT_RESET_RB         (1 <<  6)
+#       define RADEON_SOFT_RESET_HDP        (1 <<  7)
+#define RADEON_RBBM_STATUS                  0x0e40
+#       define RADEON_RBBM_FIFOCNT_MASK     0x007f
+#       define RADEON_RBBM_ACTIVE           (1 << 31)
+#define RADEON_RB2D_DSTCACHE_CTLSTAT        0x342c
+#       define RADEON_RB2D_DC_FLUSH         (3 << 0)
+#       define RADEON_RB2D_DC_FREE          (3 << 2)
+#       define RADEON_RB2D_DC_FLUSH_ALL     0xf
+#       define RADEON_RB2D_DC_BUSY          (1 << 31)
+#define RADEON_RB2D_DSTCACHE_MODE           0x3428
+#define RADEON_DSTCACHE_CTLSTAT             0x1714
+
+#define RADEON_RB3D_ZCACHE_MODE             0x3250
+#define RADEON_RB3D_ZCACHE_CTLSTAT          0x3254
+#       define RADEON_RB3D_ZC_FLUSH_ALL     0x5
+#define RADEON_RB3D_DSTCACHE_MODE           0x3258
+# define RADEON_RB3D_DC_CACHE_ENABLE            (0)
+# define RADEON_RB3D_DC_2D_CACHE_DISABLE        (1)
+# define RADEON_RB3D_DC_3D_CACHE_DISABLE        (2)
+# define RADEON_RB3D_DC_CACHE_DISABLE           (3)
+# define RADEON_RB3D_DC_2D_CACHE_LINESIZE_128   (1 << 2)
+# define RADEON_RB3D_DC_3D_CACHE_LINESIZE_128   (2 << 2)
+# define RADEON_RB3D_DC_2D_CACHE_AUTOFLUSH      (1 << 8)
+# define RADEON_RB3D_DC_3D_CACHE_AUTOFLUSH      (2 << 8)
+# define R200_RB3D_DC_2D_CACHE_AUTOFREE         (1 << 10)
+# define R200_RB3D_DC_3D_CACHE_AUTOFREE         (2 << 10)
+# define RADEON_RB3D_DC_FORCE_RMW               (1 << 16)
+# define RADEON_RB3D_DC_DISABLE_RI_FILL         (1 << 24)
+# define RADEON_RB3D_DC_DISABLE_RI_READ         (1 << 25)
+
+#define RADEON_RB3D_DSTCACHE_CTLSTAT            0x325C
+# define RADEON_RB3D_DC_FLUSH                   (3 << 0)
+# define RADEON_RB3D_DC_FREE                    (3 << 2)
+# define RADEON_RB3D_DC_FLUSH_ALL               0xf
+# define RADEON_RB3D_DC_BUSY                    (1 << 31)
+
+#define RADEON_REG_BASE                     0x0f18 /* PCI */
+#define RADEON_REGPROG_INF                  0x0f09 /* PCI */
+#define RADEON_REVISION_ID                  0x0f08 /* PCI */
+
+#define RADEON_SC_BOTTOM                    0x164c
+#define RADEON_SC_BOTTOM_RIGHT              0x16f0
+#define RADEON_SC_BOTTOM_RIGHT_C            0x1c8c
+#define RADEON_SC_LEFT                      0x1640
+#define RADEON_SC_RIGHT                     0x1644
+#define RADEON_SC_TOP                       0x1648
+#define RADEON_SC_TOP_LEFT                  0x16ec
+#define RADEON_SC_TOP_LEFT_C                0x1c88
+#       define RADEON_SC_SIGN_MASK_LO       0x8000
+#       define RADEON_SC_SIGN_MASK_HI       0x80000000
+#define RADEON_M_SPLL_REF_FB_DIV            0x000a /* PLL */
+#	define RADEON_M_SPLL_REF_DIV_SHIFT  0
+#	define RADEON_M_SPLL_REF_DIV_MASK   0xff
+#	define RADEON_MPLL_FB_DIV_SHIFT     8
+#	define RADEON_MPLL_FB_DIV_MASK      0xff
+#	define RADEON_SPLL_FB_DIV_SHIFT     16
+#	define RADEON_SPLL_FB_DIV_MASK      0xff
+#define RADEON_SPLL_CNTL                    0x000c /* PLL */
+#       define RADEON_SPLL_SLEEP            (1 << 0)
+#       define RADEON_SPLL_RESET            (1 << 1)
+#       define RADEON_SPLL_PCP_MASK         0x7
+#       define RADEON_SPLL_PCP_SHIFT        8
+#       define RADEON_SPLL_PVG_MASK         0x7
+#       define RADEON_SPLL_PVG_SHIFT        11
+#       define RADEON_SPLL_PDC_MASK         0x3
+#       define RADEON_SPLL_PDC_SHIFT        14
+#define RADEON_SCLK_CNTL                    0x000d /* PLL */
+#       define RADEON_SCLK_SRC_SEL_MASK     0x0007
+#       define RADEON_DYN_STOP_LAT_MASK     0x00007ff8
+#       define RADEON_CP_MAX_DYN_STOP_LAT   0x0008
+#       define RADEON_SCLK_FORCEON_MASK     0xffff8000
+#       define RADEON_SCLK_FORCE_DISP2      (1<<15)
+#       define RADEON_SCLK_FORCE_CP         (1<<16)
+#       define RADEON_SCLK_FORCE_HDP        (1<<17)
+#       define RADEON_SCLK_FORCE_DISP1      (1<<18)
+#       define RADEON_SCLK_FORCE_TOP        (1<<19)
+#       define RADEON_SCLK_FORCE_E2         (1<<20)
+#       define RADEON_SCLK_FORCE_SE         (1<<21)
+#       define RADEON_SCLK_FORCE_IDCT       (1<<22)
+#       define RADEON_SCLK_FORCE_VIP        (1<<23)
+#       define RADEON_SCLK_FORCE_RE         (1<<24)
+#       define RADEON_SCLK_FORCE_PB         (1<<25)
+#       define RADEON_SCLK_FORCE_TAM        (1<<26)
+#       define RADEON_SCLK_FORCE_TDM        (1<<27)
+#       define RADEON_SCLK_FORCE_RB         (1<<28)
+#       define RADEON_SCLK_FORCE_TV_SCLK    (1<<29)
+#       define RADEON_SCLK_FORCE_SUBPIC     (1<<30)
+#       define RADEON_SCLK_FORCE_OV0        (1<<31)
+#       define R300_SCLK_FORCE_VAP          (1<<21)
+#       define R300_SCLK_FORCE_SR           (1<<25)
+#       define R300_SCLK_FORCE_PX           (1<<26)
+#       define R300_SCLK_FORCE_TX           (1<<27)
+#       define R300_SCLK_FORCE_US           (1<<28)
+#       define R300_SCLK_FORCE_SU           (1<<30)
+#define R300_SCLK_CNTL2                     0x1e   /* PLL */
+#       define R300_SCLK_TCL_MAX_DYN_STOP_LAT (1<<10)
+#       define R300_SCLK_GA_MAX_DYN_STOP_LAT  (1<<11)
+#       define R300_SCLK_CBA_MAX_DYN_STOP_LAT (1<<12)
+#       define R300_SCLK_FORCE_TCL          (1<<13)
+#       define R300_SCLK_FORCE_CBA          (1<<14)
+#       define R300_SCLK_FORCE_GA           (1<<15)
+#define RADEON_SCLK_MORE_CNTL               0x0035 /* PLL */
+#       define RADEON_SCLK_MORE_MAX_DYN_STOP_LAT 0x0007
+#       define RADEON_SCLK_MORE_FORCEON     0x0700
+#define RADEON_SDRAM_MODE_REG               0x0158
+#define RADEON_SEQ8_DATA                    0x03c5 /* VGA */
+#define RADEON_SEQ8_IDX                     0x03c4 /* VGA */
+#define RADEON_SNAPSHOT_F_COUNT             0x0244
+#define RADEON_SNAPSHOT_VH_COUNTS           0x0240
+#define RADEON_SNAPSHOT_VIF_COUNT           0x024c
+#define RADEON_SRC_OFFSET                   0x15ac
+#define RADEON_SRC_PITCH                    0x15b0
+#define RADEON_SRC_PITCH_OFFSET             0x1428
+#define RADEON_SRC_SC_BOTTOM                0x165c
+#define RADEON_SRC_SC_BOTTOM_RIGHT          0x16f4
+#define RADEON_SRC_SC_RIGHT                 0x1654
+#define RADEON_SRC_X                        0x1414
+#define RADEON_SRC_X_Y                      0x1590
+#define RADEON_SRC_Y                        0x1418
+#define RADEON_SRC_Y_X                      0x1434
+#define RADEON_STATUS                       0x0f06 /* PCI */
+#define RADEON_SUBPIC_CNTL                  0x0540 /* ? */
+#define RADEON_SUB_CLASS                    0x0f0a /* PCI */
+#define RADEON_SURFACE_CNTL                 0x0b00
+#       define RADEON_SURF_TRANSLATION_DIS  (1 << 8)
+#       define RADEON_NONSURF_AP0_SWP_16BPP (1 << 20)
+#       define RADEON_NONSURF_AP0_SWP_32BPP (1 << 21)
+#       define RADEON_NONSURF_AP1_SWP_16BPP (1 << 22)
+#       define RADEON_NONSURF_AP1_SWP_32BPP (1 << 23)
+#define RADEON_SURFACE0_INFO                0x0b0c
+#       define RADEON_SURF_TILE_COLOR_MACRO (0 << 16)
+#       define RADEON_SURF_TILE_COLOR_BOTH  (1 << 16)
+#       define RADEON_SURF_TILE_DEPTH_32BPP (2 << 16)
+#       define RADEON_SURF_TILE_DEPTH_16BPP (3 << 16)
+#       define R200_SURF_TILE_NONE          (0 << 16)
+#       define R200_SURF_TILE_COLOR_MACRO   (1 << 16)
+#       define R200_SURF_TILE_COLOR_MICRO   (2 << 16)
+#       define R200_SURF_TILE_COLOR_BOTH    (3 << 16)
+#       define R200_SURF_TILE_DEPTH_32BPP   (4 << 16)
+#       define R200_SURF_TILE_DEPTH_16BPP   (5 << 16)
+#       define R300_SURF_TILE_NONE          (0 << 16)
+#       define R300_SURF_TILE_COLOR_MACRO   (1 << 16)
+#       define R300_SURF_TILE_DEPTH_32BPP   (2 << 16)
+#       define RADEON_SURF_AP0_SWP_16BPP    (1 << 20)
+#       define RADEON_SURF_AP0_SWP_32BPP    (1 << 21)
+#       define RADEON_SURF_AP1_SWP_16BPP    (1 << 22)
+#       define RADEON_SURF_AP1_SWP_32BPP    (1 << 23)
+#define RADEON_SURFACE0_LOWER_BOUND         0x0b04
+#define RADEON_SURFACE0_UPPER_BOUND         0x0b08
+#define RADEON_SURFACE1_INFO                0x0b1c
+#define RADEON_SURFACE1_LOWER_BOUND         0x0b14
+#define RADEON_SURFACE1_UPPER_BOUND         0x0b18
+#define RADEON_SURFACE2_INFO                0x0b2c
+#define RADEON_SURFACE2_LOWER_BOUND         0x0b24
+#define RADEON_SURFACE2_UPPER_BOUND         0x0b28
+#define RADEON_SURFACE3_INFO                0x0b3c
+#define RADEON_SURFACE3_LOWER_BOUND         0x0b34
+#define RADEON_SURFACE3_UPPER_BOUND         0x0b38
+#define RADEON_SURFACE4_INFO                0x0b4c
+#define RADEON_SURFACE4_LOWER_BOUND         0x0b44
+#define RADEON_SURFACE4_UPPER_BOUND         0x0b48
+#define RADEON_SURFACE5_INFO                0x0b5c
+#define RADEON_SURFACE5_LOWER_BOUND         0x0b54
+#define RADEON_SURFACE5_UPPER_BOUND         0x0b58
+#define RADEON_SURFACE6_INFO                0x0b6c
+#define RADEON_SURFACE6_LOWER_BOUND         0x0b64
+#define RADEON_SURFACE6_UPPER_BOUND         0x0b68
+#define RADEON_SURFACE7_INFO                0x0b7c
+#define RADEON_SURFACE7_LOWER_BOUND         0x0b74
+#define RADEON_SURFACE7_UPPER_BOUND         0x0b78
+#define RADEON_SW_SEMAPHORE                 0x013c
+
+#define RADEON_TEST_DEBUG_CNTL              0x0120
+#define RADEON_TEST_DEBUG_CNTL__TEST_DEBUG_OUT_EN 0x00000001
+
+#define RADEON_TEST_DEBUG_MUX               0x0124
+#define RADEON_TEST_DEBUG_OUT               0x012c
+#define RADEON_TMDS_PLL_CNTL                0x02a8
+#define RADEON_TMDS_TRANSMITTER_CNTL        0x02a4
+#       define RADEON_TMDS_TRANSMITTER_PLLEN  1
+#       define RADEON_TMDS_TRANSMITTER_PLLRST 2
+#define RADEON_TRAIL_BRES_DEC               0x1614
+#define RADEON_TRAIL_BRES_ERR               0x160c
+#define RADEON_TRAIL_BRES_INC               0x1610
+#define RADEON_TRAIL_X                      0x1618
+#define RADEON_TRAIL_X_SUB                  0x1620
+
+#define RADEON_VCLK_ECP_CNTL                0x0008 /* PLL */
+#       define RADEON_VCLK_SRC_SEL_MASK     0x03
+#       define RADEON_VCLK_SRC_SEL_CPUCLK   0x00
+#       define RADEON_VCLK_SRC_SEL_PSCANCLK 0x01
+#       define RADEON_VCLK_SRC_SEL_BYTECLK  0x02
+#       define RADEON_VCLK_SRC_SEL_PPLLCLK  0x03
+#       define RADEON_PIXCLK_ALWAYS_ONb     (1<<6)
+#       define RADEON_PIXCLK_DAC_ALWAYS_ONb (1<<7)
+#       define R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF (1<<23)
+
+#define RADEON_VENDOR_ID                    0x0f00 /* PCI */
+#define RADEON_VGA_DDA_CONFIG               0x02e8
+#define RADEON_VGA_DDA_ON_OFF               0x02ec
+#define RADEON_VID_BUFFER_CONTROL           0x0900
+#define RADEON_VIDEOMUX_CNTL                0x0190
+
+/* VIP bus */
+#define RADEON_VIPH_CH0_DATA                0x0c00
+#define RADEON_VIPH_CH1_DATA                0x0c04
+#define RADEON_VIPH_CH2_DATA                0x0c08
+#define RADEON_VIPH_CH3_DATA                0x0c0c
+#define RADEON_VIPH_CH0_ADDR                0x0c10
+#define RADEON_VIPH_CH1_ADDR                0x0c14
+#define RADEON_VIPH_CH2_ADDR                0x0c18
+#define RADEON_VIPH_CH3_ADDR                0x0c1c
+#define RADEON_VIPH_CH0_SBCNT               0x0c20
+#define RADEON_VIPH_CH1_SBCNT               0x0c24
+#define RADEON_VIPH_CH2_SBCNT               0x0c28
+#define RADEON_VIPH_CH3_SBCNT               0x0c2c
+#define RADEON_VIPH_CH0_ABCNT               0x0c30
+#define RADEON_VIPH_CH1_ABCNT               0x0c34
+#define RADEON_VIPH_CH2_ABCNT               0x0c38
+#define RADEON_VIPH_CH3_ABCNT               0x0c3c
+#define RADEON_VIPH_CONTROL                 0x0c40
+#       define RADEON_VIP_BUSY 0
+#       define RADEON_VIP_IDLE 1
+#       define RADEON_VIP_RESET 2
+#       define RADEON_VIPH_EN               (1 << 21)
+#define RADEON_VIPH_DV_LAT                  0x0c44
+#define RADEON_VIPH_BM_CHUNK                0x0c48
+#define RADEON_VIPH_DV_INT                  0x0c4c
+#define RADEON_VIPH_TIMEOUT_STAT            0x0c50
+#define RADEON_VIPH_TIMEOUT_STAT__VIPH_REG_STAT 0x00000010
+#define RADEON_VIPH_TIMEOUT_STAT__VIPH_REG_AK   0x00000010
+#define RADEON_VIPH_TIMEOUT_STAT__VIPH_REGR_DIS 0x01000000
+
+#define RADEON_VIPH_REG_DATA                0x0084
+#define RADEON_VIPH_REG_ADDR                0x0080
+
+
+#define RADEON_WAIT_UNTIL                   0x1720
+#       define RADEON_WAIT_CRTC_PFLIP       (1 << 0)
+#       define RADEON_WAIT_RE_CRTC_VLINE    (1 << 1)
+#       define RADEON_WAIT_FE_CRTC_VLINE    (1 << 2)
+#       define RADEON_WAIT_CRTC_VLINE       (1 << 3)
+#       define RADEON_WAIT_DMA_VID_IDLE     (1 << 8)
+#       define RADEON_WAIT_DMA_GUI_IDLE     (1 << 9)
+#       define RADEON_WAIT_CMDFIFO          (1 << 10) /* wait for CMDFIFO_ENTRIES */
+#       define RADEON_WAIT_OV0_FLIP         (1 << 11)
+#       define RADEON_WAIT_AGP_FLUSH        (1 << 13)
+#       define RADEON_WAIT_2D_IDLE          (1 << 14)
+#       define RADEON_WAIT_3D_IDLE          (1 << 15)
+#       define RADEON_WAIT_2D_IDLECLEAN     (1 << 16)
+#       define RADEON_WAIT_3D_IDLECLEAN     (1 << 17)
+#       define RADEON_WAIT_HOST_IDLECLEAN   (1 << 18)
+#       define RADEON_CMDFIFO_ENTRIES_SHIFT 10
+#       define RADEON_CMDFIFO_ENTRIES_MASK  0x7f
+#       define RADEON_WAIT_VAP_IDLE         (1 << 28)
+#       define RADEON_WAIT_BOTH_CRTC_PFLIP  (1 << 30)
+#       define RADEON_ENG_DISPLAY_SELECT_CRTC0    (0 << 31)
+#       define RADEON_ENG_DISPLAY_SELECT_CRTC1    (1 << 31)
+
+#define RADEON_X_MPLL_REF_FB_DIV            0x000a /* PLL */
+#define RADEON_XCLK_CNTL                    0x000d /* PLL */
+#define RADEON_XDLL_CNTL                    0x000c /* PLL */
+#define RADEON_XPLL_CNTL                    0x000b /* PLL */
+
+
+
+				/* Registers for 3D/TCL */
+#define RADEON_PP_BORDER_COLOR_0            0x1d40
+#define RADEON_PP_BORDER_COLOR_1            0x1d44
+#define RADEON_PP_BORDER_COLOR_2            0x1d48
+#define RADEON_PP_CNTL                      0x1c38
+#       define RADEON_STIPPLE_ENABLE        (1 <<  0)
+#       define RADEON_SCISSOR_ENABLE        (1 <<  1)
+#       define RADEON_PATTERN_ENABLE        (1 <<  2)
+#       define RADEON_SHADOW_ENABLE         (1 <<  3)
+#       define RADEON_TEX_ENABLE_MASK       (0xf << 4)
+#       define RADEON_TEX_0_ENABLE          (1 <<  4)
+#       define RADEON_TEX_1_ENABLE          (1 <<  5)
+#       define RADEON_TEX_2_ENABLE          (1 <<  6)
+#       define RADEON_TEX_3_ENABLE          (1 <<  7)
+#       define RADEON_TEX_BLEND_ENABLE_MASK (0xf << 12)
+#       define RADEON_TEX_BLEND_0_ENABLE    (1 << 12)
+#       define RADEON_TEX_BLEND_1_ENABLE    (1 << 13)
+#       define RADEON_TEX_BLEND_2_ENABLE    (1 << 14)
+#       define RADEON_TEX_BLEND_3_ENABLE    (1 << 15)
+#       define RADEON_PLANAR_YUV_ENABLE     (1 << 20)
+#       define RADEON_SPECULAR_ENABLE       (1 << 21)
+#       define RADEON_FOG_ENABLE            (1 << 22)
+#       define RADEON_ALPHA_TEST_ENABLE     (1 << 23)
+#       define RADEON_ANTI_ALIAS_NONE       (0 << 24)
+#       define RADEON_ANTI_ALIAS_LINE       (1 << 24)
+#       define RADEON_ANTI_ALIAS_POLY       (2 << 24)
+#       define RADEON_ANTI_ALIAS_LINE_POLY  (3 << 24)
+#       define RADEON_BUMP_MAP_ENABLE       (1 << 26)
+#       define RADEON_BUMPED_MAP_T0         (0 << 27)
+#       define RADEON_BUMPED_MAP_T1         (1 << 27)
+#       define RADEON_BUMPED_MAP_T2         (2 << 27)
+#       define RADEON_TEX_3D_ENABLE_0       (1 << 29)
+#       define RADEON_TEX_3D_ENABLE_1       (1 << 30)
+#       define RADEON_MC_ENABLE             (1 << 31)
+#define RADEON_PP_FOG_COLOR                 0x1c18
+#       define RADEON_FOG_COLOR_MASK        0x00ffffff
+#       define RADEON_FOG_VERTEX            (0 << 24)
+#       define RADEON_FOG_TABLE             (1 << 24)
+#       define RADEON_FOG_USE_DEPTH         (0 << 25)
+#       define RADEON_FOG_USE_DIFFUSE_ALPHA (2 << 25)
+#       define RADEON_FOG_USE_SPEC_ALPHA    (3 << 25)
+#define RADEON_PP_LUM_MATRIX                0x1d00
+#define RADEON_PP_MISC                      0x1c14
+#       define RADEON_REF_ALPHA_MASK        0x000000ff
+#       define RADEON_ALPHA_TEST_FAIL       (0 << 8)
+#       define RADEON_ALPHA_TEST_LESS       (1 << 8)
+#       define RADEON_ALPHA_TEST_LEQUAL     (2 << 8)
+#       define RADEON_ALPHA_TEST_EQUAL      (3 << 8)
+#       define RADEON_ALPHA_TEST_GEQUAL     (4 << 8)
+#       define RADEON_ALPHA_TEST_GREATER    (5 << 8)
+#       define RADEON_ALPHA_TEST_NEQUAL     (6 << 8)
+#       define RADEON_ALPHA_TEST_PASS       (7 << 8)
+#       define RADEON_ALPHA_TEST_OP_MASK    (7 << 8)
+#       define RADEON_CHROMA_FUNC_FAIL      (0 << 16)
+#       define RADEON_CHROMA_FUNC_PASS      (1 << 16)
+#       define RADEON_CHROMA_FUNC_NEQUAL    (2 << 16)
+#       define RADEON_CHROMA_FUNC_EQUAL     (3 << 16)
+#       define RADEON_CHROMA_KEY_NEAREST    (0 << 18)
+#       define RADEON_CHROMA_KEY_ZERO       (1 << 18)
+#       define RADEON_SHADOW_ID_AUTO_INC    (1 << 20)
+#       define RADEON_SHADOW_FUNC_EQUAL     (0 << 21)
+#       define RADEON_SHADOW_FUNC_NEQUAL    (1 << 21)
+#       define RADEON_SHADOW_PASS_1         (0 << 22)
+#       define RADEON_SHADOW_PASS_2         (1 << 22)
+#       define RADEON_RIGHT_HAND_CUBE_D3D   (0 << 24)
+#       define RADEON_RIGHT_HAND_CUBE_OGL   (1 << 24)
+#define RADEON_PP_ROT_MATRIX_0              0x1d58
+#define RADEON_PP_ROT_MATRIX_1              0x1d5c
+#define RADEON_PP_TXFILTER_0                0x1c54
+#define RADEON_PP_TXFILTER_1                0x1c6c
+#define RADEON_PP_TXFILTER_2                0x1c84
+#       define RADEON_MAG_FILTER_NEAREST                   (0  <<  0)
+#       define RADEON_MAG_FILTER_LINEAR                    (1  <<  0)
+#       define RADEON_MAG_FILTER_MASK                      (1  <<  0)
+#       define RADEON_MIN_FILTER_NEAREST                   (0  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR                    (1  <<  1)
+#       define RADEON_MIN_FILTER_NEAREST_MIP_NEAREST       (2  <<  1)
+#       define RADEON_MIN_FILTER_NEAREST_MIP_LINEAR        (3  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR_MIP_NEAREST        (6  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR_MIP_LINEAR         (7  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST             (8  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_LINEAR              (9  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  (11 <<  1)
+#       define RADEON_MIN_FILTER_MASK                      (15 <<  1)
+#       define RADEON_MAX_ANISO_1_TO_1                     (0  <<  5)
+#       define RADEON_MAX_ANISO_2_TO_1                     (1  <<  5)
+#       define RADEON_MAX_ANISO_4_TO_1                     (2  <<  5)
+#       define RADEON_MAX_ANISO_8_TO_1                     (3  <<  5)
+#       define RADEON_MAX_ANISO_16_TO_1                    (4  <<  5)
+#       define RADEON_MAX_ANISO_MASK                       (7  <<  5)
+#       define RADEON_LOD_BIAS_MASK                        (0xff <<  8)
+#       define RADEON_LOD_BIAS_SHIFT                       8
+#       define RADEON_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
+#       define RADEON_MAX_MIP_LEVEL_SHIFT                  16
+#       define RADEON_YUV_TO_RGB                           (1  << 20)
+#       define RADEON_YUV_TEMPERATURE_COOL                 (0  << 21)
+#       define RADEON_YUV_TEMPERATURE_HOT                  (1  << 21)
+#       define RADEON_YUV_TEMPERATURE_MASK                 (1  << 21)
+#       define RADEON_WRAPEN_S                             (1  << 22)
+#       define RADEON_CLAMP_S_WRAP                         (0  << 23)
+#       define RADEON_CLAMP_S_MIRROR                       (1  << 23)
+#       define RADEON_CLAMP_S_CLAMP_LAST                   (2  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
+#       define RADEON_CLAMP_S_CLAMP_BORDER                 (4  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#       define RADEON_CLAMP_S_CLAMP_GL                     (6  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
+#       define RADEON_CLAMP_S_MASK                         (7  << 23)
+#       define RADEON_WRAPEN_T                             (1  << 26)
+#       define RADEON_CLAMP_T_WRAP                         (0  << 27)
+#       define RADEON_CLAMP_T_MIRROR                       (1  << 27)
+#       define RADEON_CLAMP_T_CLAMP_LAST                   (2  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
+#       define RADEON_CLAMP_T_CLAMP_BORDER                 (4  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#       define RADEON_CLAMP_T_CLAMP_GL                     (6  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
+#       define RADEON_CLAMP_T_MASK                         (7  << 27)
+#       define RADEON_BORDER_MODE_OGL                      (0  << 31)
+#       define RADEON_BORDER_MODE_D3D                      (1  << 31)
+#define RADEON_PP_TXFORMAT_0                0x1c58
+#define RADEON_PP_TXFORMAT_1                0x1c70
+#define RADEON_PP_TXFORMAT_2                0x1c88
+#       define RADEON_TXFORMAT_I8                 (0  <<  0)
+#       define RADEON_TXFORMAT_AI88               (1  <<  0)
+#       define RADEON_TXFORMAT_RGB332             (2  <<  0)
+#       define RADEON_TXFORMAT_ARGB1555           (3  <<  0)
+#       define RADEON_TXFORMAT_RGB565             (4  <<  0)
+#       define RADEON_TXFORMAT_ARGB4444           (5  <<  0)
+#       define RADEON_TXFORMAT_ARGB8888           (6  <<  0)
+#       define RADEON_TXFORMAT_RGBA8888           (7  <<  0)
+#       define RADEON_TXFORMAT_Y8                 (8  <<  0)
+#       define RADEON_TXFORMAT_VYUY422            (10 <<  0)
+#       define RADEON_TXFORMAT_YVYU422            (11 <<  0)
+#       define RADEON_TXFORMAT_DXT1               (12 <<  0)
+#       define RADEON_TXFORMAT_DXT23              (14 <<  0)
+#       define RADEON_TXFORMAT_DXT45              (15 <<  0)
+#       define RADEON_TXFORMAT_FORMAT_MASK        (31 <<  0)
+#       define RADEON_TXFORMAT_FORMAT_SHIFT       0
+#       define RADEON_TXFORMAT_APPLE_YUV_MODE     (1  <<  5)
+#       define RADEON_TXFORMAT_ALPHA_IN_MAP       (1  <<  6)
+#       define RADEON_TXFORMAT_NON_POWER2         (1  <<  7)
+#       define RADEON_TXFORMAT_WIDTH_MASK         (15 <<  8)
+#       define RADEON_TXFORMAT_WIDTH_SHIFT        8
+#       define RADEON_TXFORMAT_HEIGHT_MASK        (15 << 12)
+#       define RADEON_TXFORMAT_HEIGHT_SHIFT       12
+#       define RADEON_TXFORMAT_F5_WIDTH_MASK      (15 << 16)
+#       define RADEON_TXFORMAT_F5_WIDTH_SHIFT     16
+#       define RADEON_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#       define RADEON_TXFORMAT_F5_HEIGHT_SHIFT    20
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_MASK      (3  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ2      (2  << 24)
+#       define RADEON_TXFORMAT_ENDIAN_NO_SWAP     (0  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_16BPP_SWAP  (1  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_32BPP_SWAP  (2  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_HALFDW_SWAP (3  << 26)
+#       define RADEON_TXFORMAT_ALPHA_MASK_ENABLE  (1  << 28)
+#       define RADEON_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
+#       define RADEON_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
+#       define RADEON_TXFORMAT_PERSPECTIVE_ENABLE (1  << 31)
+#define RADEON_PP_CUBIC_FACES_0             0x1d24
+#define RADEON_PP_CUBIC_FACES_1             0x1d28
+#define RADEON_PP_CUBIC_FACES_2             0x1d2c
+#       define RADEON_FACE_WIDTH_1_SHIFT          0
+#       define RADEON_FACE_HEIGHT_1_SHIFT         4
+#       define RADEON_FACE_WIDTH_1_MASK           (0xf << 0)
+#       define RADEON_FACE_HEIGHT_1_MASK          (0xf << 4)
+#       define RADEON_FACE_WIDTH_2_SHIFT          8
+#       define RADEON_FACE_HEIGHT_2_SHIFT         12
+#       define RADEON_FACE_WIDTH_2_MASK           (0xf << 8)
+#       define RADEON_FACE_HEIGHT_2_MASK          (0xf << 12)
+#       define RADEON_FACE_WIDTH_3_SHIFT          16
+#       define RADEON_FACE_HEIGHT_3_SHIFT         20
+#       define RADEON_FACE_WIDTH_3_MASK           (0xf << 16)
+#       define RADEON_FACE_HEIGHT_3_MASK          (0xf << 20)
+#       define RADEON_FACE_WIDTH_4_SHIFT          24
+#       define RADEON_FACE_HEIGHT_4_SHIFT         28
+#       define RADEON_FACE_WIDTH_4_MASK           (0xf << 24)
+#       define RADEON_FACE_HEIGHT_4_MASK          (0xf << 28)
+
+#define RADEON_PP_TXOFFSET_0                0x1c5c
+#define RADEON_PP_TXOFFSET_1                0x1c74
+#define RADEON_PP_TXOFFSET_2                0x1c8c
+#       define RADEON_TXO_ENDIAN_NO_SWAP     (0 << 0)
+#       define RADEON_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
+#       define RADEON_TXO_ENDIAN_WORD_SWAP   (2 << 0)
+#       define RADEON_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#       define RADEON_TXO_MACRO_LINEAR       (0 << 2)
+#       define RADEON_TXO_MACRO_TILE         (1 << 2)
+#       define RADEON_TXO_MICRO_LINEAR       (0 << 3)
+#       define RADEON_TXO_MICRO_TILE_X2      (1 << 3)
+#       define RADEON_TXO_MICRO_TILE_OPT     (2 << 3)
+#       define RADEON_TXO_OFFSET_MASK        0xffffffe0
+#       define RADEON_TXO_OFFSET_SHIFT       5
+
+#define RADEON_PP_CUBIC_OFFSET_T0_0         0x1dd0  /* bits [31:5] */
+#define RADEON_PP_CUBIC_OFFSET_T0_1         0x1dd4
+#define RADEON_PP_CUBIC_OFFSET_T0_2         0x1dd8
+#define RADEON_PP_CUBIC_OFFSET_T0_3         0x1ddc
+#define RADEON_PP_CUBIC_OFFSET_T0_4         0x1de0
+#define RADEON_PP_CUBIC_OFFSET_T1_0         0x1e00
+#define RADEON_PP_CUBIC_OFFSET_T1_1         0x1e04
+#define RADEON_PP_CUBIC_OFFSET_T1_2         0x1e08
+#define RADEON_PP_CUBIC_OFFSET_T1_3         0x1e0c
+#define RADEON_PP_CUBIC_OFFSET_T1_4         0x1e10
+#define RADEON_PP_CUBIC_OFFSET_T2_0         0x1e14
+#define RADEON_PP_CUBIC_OFFSET_T2_1         0x1e18
+#define RADEON_PP_CUBIC_OFFSET_T2_2         0x1e1c
+#define RADEON_PP_CUBIC_OFFSET_T2_3         0x1e20
+#define RADEON_PP_CUBIC_OFFSET_T2_4         0x1e24
+
+#define RADEON_PP_TEX_SIZE_0                0x1d04  /* NPOT */
+#define RADEON_PP_TEX_SIZE_1                0x1d0c
+#define RADEON_PP_TEX_SIZE_2                0x1d14
+#       define RADEON_TEX_USIZE_MASK        (0x7ff << 0)
+#       define RADEON_TEX_USIZE_SHIFT       0
+#       define RADEON_TEX_VSIZE_MASK        (0x7ff << 16)
+#       define RADEON_TEX_VSIZE_SHIFT       16
+#       define RADEON_SIGNED_RGB_MASK       (1 << 30)
+#       define RADEON_SIGNED_RGB_SHIFT      30
+#       define RADEON_SIGNED_ALPHA_MASK     (1 << 31)
+#       define RADEON_SIGNED_ALPHA_SHIFT    31
+#define RADEON_PP_TEX_PITCH_0               0x1d08  /* NPOT */
+#define RADEON_PP_TEX_PITCH_1               0x1d10  /* NPOT */
+#define RADEON_PP_TEX_PITCH_2               0x1d18  /* NPOT */
+/* note: bits 13-5: 32 byte aligned stride of texture map */
+
+#define RADEON_PP_TXCBLEND_0                0x1c60
+#define RADEON_PP_TXCBLEND_1                0x1c78
+#define RADEON_PP_TXCBLEND_2                0x1c90
+#       define RADEON_COLOR_ARG_A_SHIFT          0
+#       define RADEON_COLOR_ARG_A_MASK           (0x1f << 0)
+#       define RADEON_COLOR_ARG_A_ZERO           (0    << 0)
+#       define RADEON_COLOR_ARG_A_CURRENT_COLOR  (2    << 0)
+#       define RADEON_COLOR_ARG_A_CURRENT_ALPHA  (3    << 0)
+#       define RADEON_COLOR_ARG_A_DIFFUSE_COLOR  (4    << 0)
+#       define RADEON_COLOR_ARG_A_DIFFUSE_ALPHA  (5    << 0)
+#       define RADEON_COLOR_ARG_A_SPECULAR_COLOR (6    << 0)
+#       define RADEON_COLOR_ARG_A_SPECULAR_ALPHA (7    << 0)
+#       define RADEON_COLOR_ARG_A_TFACTOR_COLOR  (8    << 0)
+#       define RADEON_COLOR_ARG_A_TFACTOR_ALPHA  (9    << 0)
+#       define RADEON_COLOR_ARG_A_T0_COLOR       (10   << 0)
+#       define RADEON_COLOR_ARG_A_T0_ALPHA       (11   << 0)
+#       define RADEON_COLOR_ARG_A_T1_COLOR       (12   << 0)
+#       define RADEON_COLOR_ARG_A_T1_ALPHA       (13   << 0)
+#       define RADEON_COLOR_ARG_A_T2_COLOR       (14   << 0)
+#       define RADEON_COLOR_ARG_A_T2_ALPHA       (15   << 0)
+#       define RADEON_COLOR_ARG_A_T3_COLOR       (16   << 0)
+#       define RADEON_COLOR_ARG_A_T3_ALPHA       (17   << 0)
+#       define RADEON_COLOR_ARG_B_SHIFT          5
+#       define RADEON_COLOR_ARG_B_MASK           (0x1f << 5)
+#       define RADEON_COLOR_ARG_B_ZERO           (0    << 5)
+#       define RADEON_COLOR_ARG_B_CURRENT_COLOR  (2    << 5)
+#       define RADEON_COLOR_ARG_B_CURRENT_ALPHA  (3    << 5)
+#       define RADEON_COLOR_ARG_B_DIFFUSE_COLOR  (4    << 5)
+#       define RADEON_COLOR_ARG_B_DIFFUSE_ALPHA  (5    << 5)
+#       define RADEON_COLOR_ARG_B_SPECULAR_COLOR (6    << 5)
+#       define RADEON_COLOR_ARG_B_SPECULAR_ALPHA (7    << 5)
+#       define RADEON_COLOR_ARG_B_TFACTOR_COLOR  (8    << 5)
+#       define RADEON_COLOR_ARG_B_TFACTOR_ALPHA  (9    << 5)
+#       define RADEON_COLOR_ARG_B_T0_COLOR       (10   << 5)
+#       define RADEON_COLOR_ARG_B_T0_ALPHA       (11   << 5)
+#       define RADEON_COLOR_ARG_B_T1_COLOR       (12   << 5)
+#       define RADEON_COLOR_ARG_B_T1_ALPHA       (13   << 5)
+#       define RADEON_COLOR_ARG_B_T2_COLOR       (14   << 5)
+#       define RADEON_COLOR_ARG_B_T2_ALPHA       (15   << 5)
+#       define RADEON_COLOR_ARG_B_T3_COLOR       (16   << 5)
+#       define RADEON_COLOR_ARG_B_T3_ALPHA       (17   << 5)
+#       define RADEON_COLOR_ARG_C_SHIFT          10
+#       define RADEON_COLOR_ARG_C_MASK           (0x1f << 10)
+#       define RADEON_COLOR_ARG_C_ZERO           (0    << 10)
+#       define RADEON_COLOR_ARG_C_CURRENT_COLOR  (2    << 10)
+#       define RADEON_COLOR_ARG_C_CURRENT_ALPHA  (3    << 10)
+#       define RADEON_COLOR_ARG_C_DIFFUSE_COLOR  (4    << 10)
+#       define RADEON_COLOR_ARG_C_DIFFUSE_ALPHA  (5    << 10)
+#       define RADEON_COLOR_ARG_C_SPECULAR_COLOR (6    << 10)
+#       define RADEON_COLOR_ARG_C_SPECULAR_ALPHA (7    << 10)
+#       define RADEON_COLOR_ARG_C_TFACTOR_COLOR  (8    << 10)
+#       define RADEON_COLOR_ARG_C_TFACTOR_ALPHA  (9    << 10)
+#       define RADEON_COLOR_ARG_C_T0_COLOR       (10   << 10)
+#       define RADEON_COLOR_ARG_C_T0_ALPHA       (11   << 10)
+#       define RADEON_COLOR_ARG_C_T1_COLOR       (12   << 10)
+#       define RADEON_COLOR_ARG_C_T1_ALPHA       (13   << 10)
+#       define RADEON_COLOR_ARG_C_T2_COLOR       (14   << 10)
+#       define RADEON_COLOR_ARG_C_T2_ALPHA       (15   << 10)
+#       define RADEON_COLOR_ARG_C_T3_COLOR       (16   << 10)
+#       define RADEON_COLOR_ARG_C_T3_ALPHA       (17   << 10)
+#       define RADEON_COMP_ARG_A                 (1 << 15)
+#       define RADEON_COMP_ARG_A_SHIFT           15
+#       define RADEON_COMP_ARG_B                 (1 << 16)
+#       define RADEON_COMP_ARG_B_SHIFT           16
+#       define RADEON_COMP_ARG_C                 (1 << 17)
+#       define RADEON_COMP_ARG_C_SHIFT           17
+#       define RADEON_BLEND_CTL_MASK             (7 << 18)
+#       define RADEON_BLEND_CTL_ADD              (0 << 18)
+#       define RADEON_BLEND_CTL_SUBTRACT         (1 << 18)
+#       define RADEON_BLEND_CTL_ADDSIGNED        (2 << 18)
+#       define RADEON_BLEND_CTL_BLEND            (3 << 18)
+#       define RADEON_BLEND_CTL_DOT3             (4 << 18)
+#       define RADEON_SCALE_SHIFT                21
+#       define RADEON_SCALE_MASK                 (3 << 21)
+#       define RADEON_SCALE_1X                   (0 << 21)
+#       define RADEON_SCALE_2X                   (1 << 21)
+#       define RADEON_SCALE_4X                   (2 << 21)
+#       define RADEON_CLAMP_TX                   (1 << 23)
+#       define RADEON_T0_EQ_TCUR                 (1 << 24)
+#       define RADEON_T1_EQ_TCUR                 (1 << 25)
+#       define RADEON_T2_EQ_TCUR                 (1 << 26)
+#       define RADEON_T3_EQ_TCUR                 (1 << 27)
+#       define RADEON_COLOR_ARG_MASK             0x1f
+#       define RADEON_COMP_ARG_SHIFT             15
+#define RADEON_PP_TXABLEND_0                0x1c64
+#define RADEON_PP_TXABLEND_1                0x1c7c
+#define RADEON_PP_TXABLEND_2                0x1c94
+#       define RADEON_ALPHA_ARG_A_SHIFT          0
+#       define RADEON_ALPHA_ARG_A_MASK           (0xf << 0)
+#       define RADEON_ALPHA_ARG_A_ZERO           (0   << 0)
+#       define RADEON_ALPHA_ARG_A_CURRENT_ALPHA  (1   << 0)
+#       define RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA  (2   << 0)
+#       define RADEON_ALPHA_ARG_A_SPECULAR_ALPHA (3   << 0)
+#       define RADEON_ALPHA_ARG_A_TFACTOR_ALPHA  (4   << 0)
+#       define RADEON_ALPHA_ARG_A_T0_ALPHA       (5   << 0)
+#       define RADEON_ALPHA_ARG_A_T1_ALPHA       (6   << 0)
+#       define RADEON_ALPHA_ARG_A_T2_ALPHA       (7   << 0)
+#       define RADEON_ALPHA_ARG_A_T3_ALPHA       (8   << 0)
+#       define RADEON_ALPHA_ARG_B_SHIFT          4
+#       define RADEON_ALPHA_ARG_B_MASK           (0xf << 4)
+#       define RADEON_ALPHA_ARG_B_ZERO           (0   << 4)
+#       define RADEON_ALPHA_ARG_B_CURRENT_ALPHA  (1   << 4)
+#       define RADEON_ALPHA_ARG_B_DIFFUSE_ALPHA  (2   << 4)
+#       define RADEON_ALPHA_ARG_B_SPECULAR_ALPHA (3   << 4)
+#       define RADEON_ALPHA_ARG_B_TFACTOR_ALPHA  (4   << 4)
+#       define RADEON_ALPHA_ARG_B_T0_ALPHA       (5   << 4)
+#       define RADEON_ALPHA_ARG_B_T1_ALPHA       (6   << 4)
+#       define RADEON_ALPHA_ARG_B_T2_ALPHA       (7   << 4)
+#       define RADEON_ALPHA_ARG_B_T3_ALPHA       (8   << 4)
+#       define RADEON_ALPHA_ARG_C_SHIFT          8
+#       define RADEON_ALPHA_ARG_C_MASK           (0xf << 8)
+#       define RADEON_ALPHA_ARG_C_ZERO           (0   << 8)
+#       define RADEON_ALPHA_ARG_C_CURRENT_ALPHA  (1   << 8)
+#       define RADEON_ALPHA_ARG_C_DIFFUSE_ALPHA  (2   << 8)
+#       define RADEON_ALPHA_ARG_C_SPECULAR_ALPHA (3   << 8)
+#       define RADEON_ALPHA_ARG_C_TFACTOR_ALPHA  (4   << 8)
+#       define RADEON_ALPHA_ARG_C_T0_ALPHA       (5   << 8)
+#       define RADEON_ALPHA_ARG_C_T1_ALPHA       (6   << 8)
+#       define RADEON_ALPHA_ARG_C_T2_ALPHA       (7   << 8)
+#       define RADEON_ALPHA_ARG_C_T3_ALPHA       (8   << 8)
+#       define RADEON_DOT_ALPHA_DONT_REPLICATE   (1   << 9)
+#       define RADEON_ALPHA_ARG_MASK             0xf
+
+#define RADEON_PP_TFACTOR_0                 0x1c68
+#define RADEON_PP_TFACTOR_1                 0x1c80
+#define RADEON_PP_TFACTOR_2                 0x1c98
+
+#define RADEON_RB3D_BLENDCNTL               0x1c20
+#       define RADEON_COMB_FCN_MASK                    (3  << 12)
+#       define RADEON_COMB_FCN_ADD_CLAMP               (0  << 12)
+#       define RADEON_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#       define RADEON_COMB_FCN_SUB_CLAMP               (2  << 12)
+#       define RADEON_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#       define RADEON_SRC_BLEND_GL_ZERO                (32 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE                 (33 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_COLOR           (34 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16)
+#       define RADEON_SRC_BLEND_GL_DST_COLOR           (36 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_ALPHA           (38 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16)
+#       define RADEON_SRC_BLEND_GL_DST_ALPHA           (40 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE  (42 << 16)
+#       define RADEON_SRC_BLEND_MASK                   (63 << 16)
+#       define RADEON_DST_BLEND_GL_ZERO                (32 << 24)
+#       define RADEON_DST_BLEND_GL_ONE                 (33 << 24)
+#       define RADEON_DST_BLEND_GL_SRC_COLOR           (34 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24)
+#       define RADEON_DST_BLEND_GL_DST_COLOR           (36 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24)
+#       define RADEON_DST_BLEND_GL_SRC_ALPHA           (38 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24)
+#       define RADEON_DST_BLEND_GL_DST_ALPHA           (40 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24)
+#       define RADEON_DST_BLEND_MASK                   (63 << 24)
+#define RADEON_RB3D_CNTL                    0x1c3c
+#       define RADEON_ALPHA_BLEND_ENABLE       (1  <<  0)
+#       define RADEON_PLANE_MASK_ENABLE        (1  <<  1)
+#       define RADEON_DITHER_ENABLE            (1  <<  2)
+#       define RADEON_ROUND_ENABLE             (1  <<  3)
+#       define RADEON_SCALE_DITHER_ENABLE      (1  <<  4)
+#       define RADEON_DITHER_INIT              (1  <<  5)
+#       define RADEON_ROP_ENABLE               (1  <<  6)
+#       define RADEON_STENCIL_ENABLE           (1  <<  7)
+#       define RADEON_Z_ENABLE                 (1  <<  8)
+#       define RADEON_DEPTH_XZ_OFFEST_ENABLE   (1  <<  9)
+#       define RADEON_RB3D_COLOR_FORMAT_SHIFT  10
+
+#       define RADEON_COLOR_FORMAT_ARGB1555    3
+#       define RADEON_COLOR_FORMAT_RGB565      4
+#       define RADEON_COLOR_FORMAT_ARGB8888    6
+#       define RADEON_COLOR_FORMAT_RGB332      7
+#       define RADEON_COLOR_FORMAT_Y8          8
+#       define RADEON_COLOR_FORMAT_RGB8        9
+#       define RADEON_COLOR_FORMAT_YUV422_VYUY 11
+#       define RADEON_COLOR_FORMAT_YUV422_YVYU 12
+#       define RADEON_COLOR_FORMAT_aYUV444     14
+#       define RADEON_COLOR_FORMAT_ARGB4444    15
+
+#       define RADEON_CLRCMP_FLIP_ENABLE       (1  << 14)
+#define RADEON_RB3D_COLOROFFSET             0x1c40
+#       define RADEON_COLOROFFSET_MASK      0xfffffff0
+#define RADEON_RB3D_COLORPITCH              0x1c48
+#       define RADEON_COLORPITCH_MASK         0x000001ff8
+#       define RADEON_COLOR_TILE_ENABLE       (1 << 16)
+#       define RADEON_COLOR_MICROTILE_ENABLE  (1 << 17)
+#       define RADEON_COLOR_ENDIAN_NO_SWAP    (0 << 18)
+#       define RADEON_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
+#       define RADEON_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_DEPTHOFFSET             0x1c24
+#define RADEON_RB3D_DEPTHPITCH              0x1c28
+#       define RADEON_DEPTHPITCH_MASK         0x00001ff8
+#       define RADEON_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
+#       define RADEON_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
+#       define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_PLANEMASK               0x1d84
+#define RADEON_RB3D_ROPCNTL                 0x1d80
+#       define RADEON_ROP_MASK              (15 << 8)
+#       define RADEON_ROP_CLEAR             (0  << 8)
+#       define RADEON_ROP_NOR               (1  << 8)
+#       define RADEON_ROP_AND_INVERTED      (2  << 8)
+#       define RADEON_ROP_COPY_INVERTED     (3  << 8)
+#       define RADEON_ROP_AND_REVERSE       (4  << 8)
+#       define RADEON_ROP_INVERT            (5  << 8)
+#       define RADEON_ROP_XOR               (6  << 8)
+#       define RADEON_ROP_NAND              (7  << 8)
+#       define RADEON_ROP_AND               (8  << 8)
+#       define RADEON_ROP_EQUIV             (9  << 8)
+#       define RADEON_ROP_NOOP              (10 << 8)
+#       define RADEON_ROP_OR_INVERTED       (11 << 8)
+#       define RADEON_ROP_COPY              (12 << 8)
+#       define RADEON_ROP_OR_REVERSE        (13 << 8)
+#       define RADEON_ROP_OR                (14 << 8)
+#       define RADEON_ROP_SET               (15 << 8)
+#define RADEON_RB3D_STENCILREFMASK          0x1d7c
+#       define RADEON_STENCIL_REF_SHIFT       0
+#       define RADEON_STENCIL_REF_MASK        (0xff << 0)
+#       define RADEON_STENCIL_MASK_SHIFT      16
+#       define RADEON_STENCIL_VALUE_MASK      (0xff << 16)
+#       define RADEON_STENCIL_WRITEMASK_SHIFT 24
+#       define RADEON_STENCIL_WRITE_MASK      (0xff << 24)
+#define RADEON_RB3D_ZSTENCILCNTL            0x1c2c
+#       define RADEON_DEPTH_FORMAT_MASK          (0xf << 0)
+#       define RADEON_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_INT_Z   (2  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_FLOAT_Z (3  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_INT_Z   (4  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_FLOAT_Z (5  <<  0)
+#       define RADEON_DEPTH_FORMAT_16BIT_FLOAT_W (7  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_FLOAT_W (9  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_FLOAT_W (11 <<  0)
+#       define RADEON_Z_TEST_NEVER               (0  <<  4)
+#       define RADEON_Z_TEST_LESS                (1  <<  4)
+#       define RADEON_Z_TEST_LEQUAL              (2  <<  4)
+#       define RADEON_Z_TEST_EQUAL               (3  <<  4)
+#       define RADEON_Z_TEST_GEQUAL              (4  <<  4)
+#       define RADEON_Z_TEST_GREATER             (5  <<  4)
+#       define RADEON_Z_TEST_NEQUAL              (6  <<  4)
+#       define RADEON_Z_TEST_ALWAYS              (7  <<  4)
+#       define RADEON_Z_TEST_MASK                (7  <<  4)
+#       define RADEON_STENCIL_TEST_NEVER         (0  << 12)
+#       define RADEON_STENCIL_TEST_LESS          (1  << 12)
+#       define RADEON_STENCIL_TEST_LEQUAL        (2  << 12)
+#       define RADEON_STENCIL_TEST_EQUAL         (3  << 12)
+#       define RADEON_STENCIL_TEST_GEQUAL        (4  << 12)
+#       define RADEON_STENCIL_TEST_GREATER       (5  << 12)
+#       define RADEON_STENCIL_TEST_NEQUAL        (6  << 12)
+#       define RADEON_STENCIL_TEST_ALWAYS        (7  << 12)
+#       define RADEON_STENCIL_TEST_MASK          (0x7 << 12)
+#       define RADEON_STENCIL_FAIL_KEEP          (0  << 16)
+#       define RADEON_STENCIL_FAIL_ZERO          (1  << 16)
+#       define RADEON_STENCIL_FAIL_REPLACE       (2  << 16)
+#       define RADEON_STENCIL_FAIL_INC           (3  << 16)
+#       define RADEON_STENCIL_FAIL_DEC           (4  << 16)
+#       define RADEON_STENCIL_FAIL_INVERT        (5  << 16)
+#       define RADEON_STENCIL_FAIL_MASK          (0x7 << 16)
+#       define RADEON_STENCIL_ZPASS_KEEP         (0  << 20)
+#       define RADEON_STENCIL_ZPASS_ZERO         (1  << 20)
+#       define RADEON_STENCIL_ZPASS_REPLACE      (2  << 20)
+#       define RADEON_STENCIL_ZPASS_INC          (3  << 20)
+#       define RADEON_STENCIL_ZPASS_DEC          (4  << 20)
+#       define RADEON_STENCIL_ZPASS_INVERT       (5  << 20)
+#       define RADEON_STENCIL_ZPASS_MASK         (0x7 << 20)
+#       define RADEON_STENCIL_ZFAIL_KEEP         (0  << 24)
+#       define RADEON_STENCIL_ZFAIL_ZERO         (1  << 24)
+#       define RADEON_STENCIL_ZFAIL_REPLACE      (2  << 24)
+#       define RADEON_STENCIL_ZFAIL_INC          (3  << 24)
+#       define RADEON_STENCIL_ZFAIL_DEC          (4  << 24)
+#       define RADEON_STENCIL_ZFAIL_INVERT       (5  << 24)
+#       define RADEON_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#       define RADEON_Z_COMPRESSION_ENABLE       (1  << 28)
+#       define RADEON_FORCE_Z_DIRTY              (1  << 29)
+#       define RADEON_Z_WRITE_ENABLE             (1  << 30)
+#define RADEON_RE_LINE_PATTERN              0x1cd0
+#       define RADEON_LINE_PATTERN_MASK             0x0000ffff
+#       define RADEON_LINE_REPEAT_COUNT_SHIFT       16
+#       define RADEON_LINE_PATTERN_START_SHIFT      24
+#       define RADEON_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28)
+#       define RADEON_LINE_PATTERN_BIG_BIT_ORDER    (1 << 28)
+#       define RADEON_LINE_PATTERN_AUTO_RESET       (1 << 29)
+#define RADEON_RE_LINE_STATE                0x1cd4
+#       define RADEON_LINE_CURRENT_PTR_SHIFT   0
+#       define RADEON_LINE_CURRENT_COUNT_SHIFT 8
+#define RADEON_RE_MISC                      0x26c4
+#       define RADEON_STIPPLE_COORD_MASK       0x1f
+#       define RADEON_STIPPLE_X_OFFSET_SHIFT   0
+#       define RADEON_STIPPLE_X_OFFSET_MASK    (0x1f << 0)
+#       define RADEON_STIPPLE_Y_OFFSET_SHIFT   8
+#       define RADEON_STIPPLE_Y_OFFSET_MASK    (0x1f << 8)
+#       define RADEON_STIPPLE_LITTLE_BIT_ORDER (0 << 16)
+#       define RADEON_STIPPLE_BIG_BIT_ORDER    (1 << 16)
+#define RADEON_RE_SOLID_COLOR               0x1c1c
+#define RADEON_RE_TOP_LEFT                  0x26c0
+#       define RADEON_RE_LEFT_SHIFT         0
+#       define RADEON_RE_TOP_SHIFT          16
+#define RADEON_RE_WIDTH_HEIGHT              0x1c44
+#       define RADEON_RE_WIDTH_SHIFT        0
+#       define RADEON_RE_HEIGHT_SHIFT       16
+
+#define RADEON_SE_CNTL                      0x1c4c
+#       define RADEON_FFACE_CULL_CW          (0 <<  0)
+#       define RADEON_FFACE_CULL_CCW         (1 <<  0)
+#       define RADEON_FFACE_CULL_DIR_MASK    (1 <<  0)
+#       define RADEON_BFACE_CULL             (0 <<  1)
+#       define RADEON_BFACE_SOLID            (3 <<  1)
+#       define RADEON_FFACE_CULL             (0 <<  3)
+#       define RADEON_FFACE_SOLID            (3 <<  3)
+#       define RADEON_FFACE_CULL_MASK        (3 <<  3)
+#       define RADEON_BADVTX_CULL_DISABLE    (1 <<  5)
+#       define RADEON_FLAT_SHADE_VTX_0       (0 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_1       (1 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_2       (2 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_LAST    (3 <<  6)
+#       define RADEON_DIFFUSE_SHADE_SOLID    (0 <<  8)
+#       define RADEON_DIFFUSE_SHADE_FLAT     (1 <<  8)
+#       define RADEON_DIFFUSE_SHADE_GOURAUD  (2 <<  8)
+#       define RADEON_DIFFUSE_SHADE_MASK     (3 <<  8)
+#       define RADEON_ALPHA_SHADE_SOLID      (0 << 10)
+#       define RADEON_ALPHA_SHADE_FLAT       (1 << 10)
+#       define RADEON_ALPHA_SHADE_GOURAUD    (2 << 10)
+#       define RADEON_ALPHA_SHADE_MASK       (3 << 10)
+#       define RADEON_SPECULAR_SHADE_SOLID   (0 << 12)
+#       define RADEON_SPECULAR_SHADE_FLAT    (1 << 12)
+#       define RADEON_SPECULAR_SHADE_GOURAUD (2 << 12)
+#       define RADEON_SPECULAR_SHADE_MASK    (3 << 12)
+#       define RADEON_FOG_SHADE_SOLID        (0 << 14)
+#       define RADEON_FOG_SHADE_FLAT         (1 << 14)
+#       define RADEON_FOG_SHADE_GOURAUD      (2 << 14)
+#       define RADEON_FOG_SHADE_MASK         (3 << 14)
+#       define RADEON_ZBIAS_ENABLE_POINT     (1 << 16)
+#       define RADEON_ZBIAS_ENABLE_LINE      (1 << 17)
+#       define RADEON_ZBIAS_ENABLE_TRI       (1 << 18)
+#       define RADEON_WIDELINE_ENABLE        (1 << 20)
+#       define RADEON_VPORT_XY_XFORM_ENABLE  (1 << 24)
+#       define RADEON_VPORT_Z_XFORM_ENABLE   (1 << 25)
+#       define RADEON_VTX_PIX_CENTER_D3D     (0 << 27)
+#       define RADEON_VTX_PIX_CENTER_OGL     (1 << 27)
+#       define RADEON_ROUND_MODE_TRUNC       (0 << 28)
+#       define RADEON_ROUND_MODE_ROUND       (1 << 28)
+#       define RADEON_ROUND_MODE_ROUND_EVEN  (2 << 28)
+#       define RADEON_ROUND_MODE_ROUND_ODD   (3 << 28)
+#       define RADEON_ROUND_PREC_16TH_PIX    (0 << 30)
+#       define RADEON_ROUND_PREC_8TH_PIX     (1 << 30)
+#       define RADEON_ROUND_PREC_4TH_PIX     (2 << 30)
+#       define RADEON_ROUND_PREC_HALF_PIX    (3 << 30)
+#define R200_RE_CNTL				0x1c50
+#       define R200_STIPPLE_ENABLE		0x1
+#       define R200_SCISSOR_ENABLE		0x2
+#       define R200_PATTERN_ENABLE		0x4
+#       define R200_PERSPECTIVE_ENABLE		0x8
+#       define R200_POINT_SMOOTH		0x20
+#       define R200_VTX_STQ0_D3D		0x00010000
+#       define R200_VTX_STQ1_D3D		0x00040000
+#       define R200_VTX_STQ2_D3D		0x00100000
+#       define R200_VTX_STQ3_D3D		0x00400000
+#       define R200_VTX_STQ4_D3D		0x01000000
+#       define R200_VTX_STQ5_D3D		0x04000000
+#define RADEON_SE_CNTL_STATUS               0x2140
+#       define RADEON_VC_NO_SWAP            (0 << 0)
+#       define RADEON_VC_16BIT_SWAP         (1 << 0)
+#       define RADEON_VC_32BIT_SWAP         (2 << 0)
+#       define RADEON_VC_HALF_DWORD_SWAP    (3 << 0)
+#       define RADEON_TCL_BYPASS            (1 << 8)
+#define RADEON_SE_COORD_FMT                 0x1c50
+#       define RADEON_VTX_XY_PRE_MULT_1_OVER_W0  (1 <<  0)
+#       define RADEON_VTX_Z_PRE_MULT_1_OVER_W0   (1 <<  1)
+#       define RADEON_VTX_ST0_NONPARAMETRIC      (1 <<  8)
+#       define RADEON_VTX_ST1_NONPARAMETRIC      (1 <<  9)
+#       define RADEON_VTX_ST2_NONPARAMETRIC      (1 << 10)
+#       define RADEON_VTX_ST3_NONPARAMETRIC      (1 << 11)
+#       define RADEON_VTX_W0_NORMALIZE           (1 << 12)
+#       define RADEON_VTX_W0_IS_NOT_1_OVER_W0    (1 << 16)
+#       define RADEON_VTX_ST0_PRE_MULT_1_OVER_W0 (1 << 17)
+#       define RADEON_VTX_ST1_PRE_MULT_1_OVER_W0 (1 << 19)
+#       define RADEON_VTX_ST2_PRE_MULT_1_OVER_W0 (1 << 21)
+#       define RADEON_VTX_ST3_PRE_MULT_1_OVER_W0 (1 << 23)
+#       define RADEON_TEX1_W_ROUTING_USE_W0      (0 << 26)
+#       define RADEON_TEX1_W_ROUTING_USE_Q1      (1 << 26)
+#define RADEON_SE_LINE_WIDTH                0x1db8
+#define RADEON_SE_TCL_LIGHT_MODEL_CTL       0x226c
+#       define RADEON_LIGHTING_ENABLE              (1 << 0)
+#       define RADEON_LIGHT_IN_MODELSPACE          (1 << 1)
+#       define RADEON_LOCAL_VIEWER                 (1 << 2)
+#       define RADEON_NORMALIZE_NORMALS            (1 << 3)
+#       define RADEON_RESCALE_NORMALS              (1 << 4)
+#       define RADEON_SPECULAR_LIGHTS              (1 << 5)
+#       define RADEON_DIFFUSE_SPECULAR_COMBINE     (1 << 6)
+#       define RADEON_LIGHT_ALPHA                  (1 << 7)
+#       define RADEON_LOCAL_LIGHT_VEC_GL           (1 << 8)
+#       define RADEON_LIGHT_NO_NORMAL_AMBIENT_ONLY (1 << 9)
+#       define RADEON_LM_SOURCE_STATE_PREMULT      0
+#       define RADEON_LM_SOURCE_STATE_MULT         1
+#       define RADEON_LM_SOURCE_VERTEX_DIFFUSE     2
+#       define RADEON_LM_SOURCE_VERTEX_SPECULAR    3
+#       define RADEON_EMISSIVE_SOURCE_SHIFT        16
+#       define RADEON_AMBIENT_SOURCE_SHIFT         18
+#       define RADEON_DIFFUSE_SOURCE_SHIFT         20
+#       define RADEON_SPECULAR_SOURCE_SHIFT        22
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_RED     0x2220
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN   0x2224
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE    0x2228
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA   0x222c
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_RED     0x2230
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN   0x2234
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE    0x2238
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA   0x223c
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED   0x2210
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN 0x2214
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE  0x2218
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA 0x221c
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_RED    0x2240
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN  0x2244
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE   0x2248
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA  0x224c
+#define RADEON_SE_TCL_MATRIX_SELECT_0       0x225c
+#       define RADEON_MODELVIEW_0_SHIFT        0
+#       define RADEON_MODELVIEW_1_SHIFT        4
+#       define RADEON_MODELVIEW_2_SHIFT        8
+#       define RADEON_MODELVIEW_3_SHIFT        12
+#       define RADEON_IT_MODELVIEW_0_SHIFT     16
+#       define RADEON_IT_MODELVIEW_1_SHIFT     20
+#       define RADEON_IT_MODELVIEW_2_SHIFT     24
+#       define RADEON_IT_MODELVIEW_3_SHIFT     28
+#define RADEON_SE_TCL_MATRIX_SELECT_1       0x2260
+#       define RADEON_MODELPROJECT_0_SHIFT     0
+#       define RADEON_MODELPROJECT_1_SHIFT     4
+#       define RADEON_MODELPROJECT_2_SHIFT     8
+#       define RADEON_MODELPROJECT_3_SHIFT     12
+#       define RADEON_TEXMAT_0_SHIFT           16
+#       define RADEON_TEXMAT_1_SHIFT           20
+#       define RADEON_TEXMAT_2_SHIFT           24
+#       define RADEON_TEXMAT_3_SHIFT           28
+
+
+#define RADEON_SE_TCL_OUTPUT_VTX_FMT        0x2254
+#       define RADEON_TCL_VTX_W0                 (1 <<  0)
+#       define RADEON_TCL_VTX_FP_DIFFUSE         (1 <<  1)
+#       define RADEON_TCL_VTX_FP_ALPHA           (1 <<  2)
+#       define RADEON_TCL_VTX_PK_DIFFUSE         (1 <<  3)
+#       define RADEON_TCL_VTX_FP_SPEC            (1 <<  4)
+#       define RADEON_TCL_VTX_FP_FOG             (1 <<  5)
+#       define RADEON_TCL_VTX_PK_SPEC            (1 <<  6)
+#       define RADEON_TCL_VTX_ST0                (1 <<  7)
+#       define RADEON_TCL_VTX_ST1                (1 <<  8)
+#       define RADEON_TCL_VTX_Q1                 (1 <<  9)
+#       define RADEON_TCL_VTX_ST2                (1 << 10)
+#       define RADEON_TCL_VTX_Q2                 (1 << 11)
+#       define RADEON_TCL_VTX_ST3                (1 << 12)
+#       define RADEON_TCL_VTX_Q3                 (1 << 13)
+#       define RADEON_TCL_VTX_Q0                 (1 << 14)
+#       define RADEON_TCL_VTX_WEIGHT_COUNT_SHIFT 15
+#       define RADEON_TCL_VTX_NORM0              (1 << 18)
+#       define RADEON_TCL_VTX_XY1                (1 << 27)
+#       define RADEON_TCL_VTX_Z1                 (1 << 28)
+#       define RADEON_TCL_VTX_W1                 (1 << 29)
+#       define RADEON_TCL_VTX_NORM1              (1 << 30)
+#       define RADEON_TCL_VTX_Z0                 (1 << 31)
+
+#define RADEON_SE_TCL_OUTPUT_VTX_SEL        0x2258
+#       define RADEON_TCL_COMPUTE_XYZW           (1 << 0)
+#       define RADEON_TCL_COMPUTE_DIFFUSE        (1 << 1)
+#       define RADEON_TCL_COMPUTE_SPECULAR       (1 << 2)
+#       define RADEON_TCL_FORCE_NAN_IF_COLOR_NAN (1 << 3)
+#       define RADEON_TCL_FORCE_INORDER_PROC     (1 << 4)
+#       define RADEON_TCL_TEX_INPUT_TEX_0        0
+#       define RADEON_TCL_TEX_INPUT_TEX_1        1
+#       define RADEON_TCL_TEX_INPUT_TEX_2        2
+#       define RADEON_TCL_TEX_INPUT_TEX_3        3
+#       define RADEON_TCL_TEX_COMPUTED_TEX_0     8
+#       define RADEON_TCL_TEX_COMPUTED_TEX_1     9
+#       define RADEON_TCL_TEX_COMPUTED_TEX_2     10
+#       define RADEON_TCL_TEX_COMPUTED_TEX_3     11
+#       define RADEON_TCL_TEX_0_OUTPUT_SHIFT     16
+#       define RADEON_TCL_TEX_1_OUTPUT_SHIFT     20
+#       define RADEON_TCL_TEX_2_OUTPUT_SHIFT     24
+#       define RADEON_TCL_TEX_3_OUTPUT_SHIFT     28
+
+#define RADEON_SE_TCL_PER_LIGHT_CTL_0       0x2270
+#       define RADEON_LIGHT_0_ENABLE               (1 <<  0)
+#       define RADEON_LIGHT_0_ENABLE_AMBIENT       (1 <<  1)
+#       define RADEON_LIGHT_0_ENABLE_SPECULAR      (1 <<  2)
+#       define RADEON_LIGHT_0_IS_LOCAL             (1 <<  3)
+#       define RADEON_LIGHT_0_IS_SPOT              (1 <<  4)
+#       define RADEON_LIGHT_0_DUAL_CONE            (1 <<  5)
+#       define RADEON_LIGHT_0_ENABLE_RANGE_ATTEN   (1 <<  6)
+#       define RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN (1 <<  7)
+#       define RADEON_LIGHT_0_SHIFT                0
+#       define RADEON_LIGHT_1_ENABLE               (1 << 16)
+#       define RADEON_LIGHT_1_ENABLE_AMBIENT       (1 << 17)
+#       define RADEON_LIGHT_1_ENABLE_SPECULAR      (1 << 18)
+#       define RADEON_LIGHT_1_IS_LOCAL             (1 << 19)
+#       define RADEON_LIGHT_1_IS_SPOT              (1 << 20)
+#       define RADEON_LIGHT_1_DUAL_CONE            (1 << 21)
+#       define RADEON_LIGHT_1_ENABLE_RANGE_ATTEN   (1 << 22)
+#       define RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN (1 << 23)
+#       define RADEON_LIGHT_1_SHIFT                16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_1       0x2274
+#       define RADEON_LIGHT_2_SHIFT            0
+#       define RADEON_LIGHT_3_SHIFT            16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_2       0x2278
+#       define RADEON_LIGHT_4_SHIFT            0
+#       define RADEON_LIGHT_5_SHIFT            16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_3       0x227c
+#       define RADEON_LIGHT_6_SHIFT            0
+#       define RADEON_LIGHT_7_SHIFT            16
+
+#define RADEON_SE_TCL_SHININESS             0x2250
+
+#define RADEON_SE_TCL_TEXTURE_PROC_CTL      0x2268
+#       define RADEON_TEXGEN_TEXMAT_0_ENABLE      (1 << 0)
+#       define RADEON_TEXGEN_TEXMAT_1_ENABLE      (1 << 1)
+#       define RADEON_TEXGEN_TEXMAT_2_ENABLE      (1 << 2)
+#       define RADEON_TEXGEN_TEXMAT_3_ENABLE      (1 << 3)
+#       define RADEON_TEXMAT_0_ENABLE             (1 << 4)
+#       define RADEON_TEXMAT_1_ENABLE             (1 << 5)
+#       define RADEON_TEXMAT_2_ENABLE             (1 << 6)
+#       define RADEON_TEXMAT_3_ENABLE             (1 << 7)
+#       define RADEON_TEXGEN_INPUT_MASK           0xf
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_0     0
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_1     1
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_2     2
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_3     3
+#       define RADEON_TEXGEN_INPUT_OBJ            4
+#       define RADEON_TEXGEN_INPUT_EYE            5
+#       define RADEON_TEXGEN_INPUT_EYE_NORMAL     6
+#       define RADEON_TEXGEN_INPUT_EYE_REFLECT    7
+#       define RADEON_TEXGEN_INPUT_EYE_NORMALIZED 8
+#       define RADEON_TEXGEN_0_INPUT_SHIFT        16
+#       define RADEON_TEXGEN_1_INPUT_SHIFT        20
+#       define RADEON_TEXGEN_2_INPUT_SHIFT        24
+#       define RADEON_TEXGEN_3_INPUT_SHIFT        28
+
+#define RADEON_SE_TCL_UCP_VERT_BLEND_CTL    0x2264
+#       define RADEON_UCP_IN_CLIP_SPACE            (1 <<  0)
+#       define RADEON_UCP_IN_MODEL_SPACE           (1 <<  1)
+#       define RADEON_UCP_ENABLE_0                 (1 <<  2)
+#       define RADEON_UCP_ENABLE_1                 (1 <<  3)
+#       define RADEON_UCP_ENABLE_2                 (1 <<  4)
+#       define RADEON_UCP_ENABLE_3                 (1 <<  5)
+#       define RADEON_UCP_ENABLE_4                 (1 <<  6)
+#       define RADEON_UCP_ENABLE_5                 (1 <<  7)
+#       define RADEON_TCL_FOG_MASK                 (3 <<  8)
+#       define RADEON_TCL_FOG_DISABLE              (0 <<  8)
+#       define RADEON_TCL_FOG_EXP                  (1 <<  8)
+#       define RADEON_TCL_FOG_EXP2                 (2 <<  8)
+#       define RADEON_TCL_FOG_LINEAR               (3 <<  8)
+#       define RADEON_RNG_BASED_FOG                (1 << 10)
+#       define RADEON_LIGHT_TWOSIDE                (1 << 11)
+#       define RADEON_BLEND_OP_COUNT_MASK          (7 << 12)
+#       define RADEON_BLEND_OP_COUNT_SHIFT         12
+#       define RADEON_POSITION_BLEND_OP_ENABLE     (1 << 16)
+#       define RADEON_NORMAL_BLEND_OP_ENABLE       (1 << 17)
+#       define RADEON_VERTEX_BLEND_SRC_0_PRIMARY   (1 << 18)
+#       define RADEON_VERTEX_BLEND_SRC_0_SECONDARY (1 << 18)
+#       define RADEON_VERTEX_BLEND_SRC_1_PRIMARY   (1 << 19)
+#       define RADEON_VERTEX_BLEND_SRC_1_SECONDARY (1 << 19)
+#       define RADEON_VERTEX_BLEND_SRC_2_PRIMARY   (1 << 20)
+#       define RADEON_VERTEX_BLEND_SRC_2_SECONDARY (1 << 20)
+#       define RADEON_VERTEX_BLEND_SRC_3_PRIMARY   (1 << 21)
+#       define RADEON_VERTEX_BLEND_SRC_3_SECONDARY (1 << 21)
+#       define RADEON_VERTEX_BLEND_WGT_MINUS_ONE   (1 << 22)
+#       define RADEON_CULL_FRONT_IS_CW             (0 << 28)
+#       define RADEON_CULL_FRONT_IS_CCW            (1 << 28)
+#       define RADEON_CULL_FRONT                   (1 << 29)
+#       define RADEON_CULL_BACK                    (1 << 30)
+#       define RADEON_FORCE_W_TO_ONE               (1 << 31)
+
+#define RADEON_SE_VPORT_XSCALE              0x1d98
+#define RADEON_SE_VPORT_XOFFSET             0x1d9c
+#define RADEON_SE_VPORT_YSCALE              0x1da0
+#define RADEON_SE_VPORT_YOFFSET             0x1da4
+#define RADEON_SE_VPORT_ZSCALE              0x1da8
+#define RADEON_SE_VPORT_ZOFFSET             0x1dac
+#define RADEON_SE_ZBIAS_FACTOR              0x1db0
+#define RADEON_SE_ZBIAS_CONSTANT            0x1db4
+
+#define RADEON_SE_VTX_FMT                   0x2080
+#       define RADEON_SE_VTX_FMT_XY         0x00000000
+#       define RADEON_SE_VTX_FMT_W0         0x00000001
+#       define RADEON_SE_VTX_FMT_FPCOLOR    0x00000002
+#       define RADEON_SE_VTX_FMT_FPALPHA    0x00000004
+#       define RADEON_SE_VTX_FMT_PKCOLOR    0x00000008
+#       define RADEON_SE_VTX_FMT_FPSPEC     0x00000010
+#       define RADEON_SE_VTX_FMT_FPFOG      0x00000020
+#       define RADEON_SE_VTX_FMT_PKSPEC     0x00000040
+#       define RADEON_SE_VTX_FMT_ST0        0x00000080
+#       define RADEON_SE_VTX_FMT_ST1        0x00000100
+#       define RADEON_SE_VTX_FMT_Q1         0x00000200
+#       define RADEON_SE_VTX_FMT_ST2        0x00000400
+#       define RADEON_SE_VTX_FMT_Q2         0x00000800
+#       define RADEON_SE_VTX_FMT_ST3        0x00001000
+#       define RADEON_SE_VTX_FMT_Q3         0x00002000
+#       define RADEON_SE_VTX_FMT_Q0         0x00004000
+#       define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK  0x00038000
+#       define RADEON_SE_VTX_FMT_N0         0x00040000
+#       define RADEON_SE_VTX_FMT_XY1        0x08000000
+#       define RADEON_SE_VTX_FMT_Z1         0x10000000
+#       define RADEON_SE_VTX_FMT_W1         0x20000000
+#       define RADEON_SE_VTX_FMT_N1         0x40000000
+#       define RADEON_SE_VTX_FMT_Z          0x80000000
+
+#define RADEON_SE_VF_CNTL                             0x2084
+#       define RADEON_VF_PRIM_TYPE_POINT_LIST         1
+#       define RADEON_VF_PRIM_TYPE_LINE_LIST          2
+#       define RADEON_VF_PRIM_TYPE_LINE_STRIP         3
+#       define RADEON_VF_PRIM_TYPE_TRIANGLE_LIST      4
+#       define RADEON_VF_PRIM_TYPE_TRIANGLE_FAN       5
+#       define RADEON_VF_PRIM_TYPE_TRIANGLE_STRIP     6
+#       define RADEON_VF_PRIM_TYPE_TRIANGLE_FLAG      7
+#       define RADEON_VF_PRIM_TYPE_RECTANGLE_LIST     8
+#       define RADEON_VF_PRIM_TYPE_POINT_LIST_3       9
+#       define RADEON_VF_PRIM_TYPE_LINE_LIST_3        10
+#       define RADEON_VF_PRIM_TYPE_SPIRIT_LIST        11
+#       define RADEON_VF_PRIM_TYPE_LINE_LOOP          12
+#       define RADEON_VF_PRIM_TYPE_QUAD_LIST          13
+#       define RADEON_VF_PRIM_TYPE_QUAD_STRIP         14
+#       define RADEON_VF_PRIM_TYPE_POLYGON            15
+#       define RADEON_VF_PRIM_WALK_STATE              (0<<4)
+#       define RADEON_VF_PRIM_WALK_INDEX              (1<<4)
+#       define RADEON_VF_PRIM_WALK_LIST               (2<<4)
+#       define RADEON_VF_PRIM_WALK_DATA               (3<<4)
+#       define RADEON_VF_COLOR_ORDER_RGBA             (1<<6)
+#       define RADEON_VF_RADEON_MODE                  (1<<8)
+#       define RADEON_VF_TCL_OUTPUT_CTL_ENA           (1<<9)
+#       define RADEON_VF_PROG_STREAM_ENA              (1<<10)
+#       define RADEON_VF_INDEX_SIZE_SHIFT             11
+#       define RADEON_VF_NUM_VERTICES_SHIFT           16
+
+#define RADEON_SE_PORT_DATA0			0x2000
+
+#define R200_SE_VAP_CNTL			0x2080
+#       define R200_VAP_TCL_ENABLE		0x00000001
+#       define R200_VAP_SINGLE_BUF_STATE_ENABLE	0x00000010
+#       define R200_VAP_FORCE_W_TO_ONE		0x00010000
+#       define R200_VAP_D3D_TEX_DEFAULT		0x00020000
+#       define R200_VAP_VF_MAX_VTX_NUM__SHIFT	18
+#       define R200_VAP_VF_MAX_VTX_NUM		(9 << 18)
+#       define R200_VAP_DX_CLIP_SPACE_DEF	0x00400000
+#define R200_VF_MAX_VTX_INDX			0x210c
+#define R200_VF_MIN_VTX_INDX			0x2110
+#define R200_SE_VTE_CNTL			0x20b0
+#       define R200_VPORT_X_SCALE_ENA			0x00000001
+#       define R200_VPORT_X_OFFSET_ENA			0x00000002
+#       define R200_VPORT_Y_SCALE_ENA			0x00000004
+#       define R200_VPORT_Y_OFFSET_ENA			0x00000008
+#       define R200_VPORT_Z_SCALE_ENA			0x00000010
+#       define R200_VPORT_Z_OFFSET_ENA			0x00000020
+#       define R200_VTX_XY_FMT				0x00000100
+#       define R200_VTX_Z_FMT				0x00000200
+#       define R200_VTX_W0_FMT				0x00000400
+#       define R200_VTX_W0_NORMALIZE			0x00000800
+#       define R200_VTX_ST_DENORMALIZED		0x00001000
+#define R200_SE_VAP_CNTL_STATUS			0x2140
+#       define R200_VC_NO_SWAP			(0 << 0)
+#       define R200_VC_16BIT_SWAP		(1 << 0)
+#       define R200_VC_32BIT_SWAP		(2 << 0)
+#define R200_PP_TXFILTER_0			0x2c00
+#define R200_PP_TXFILTER_1			0x2c20
+#define R200_PP_TXFILTER_2			0x2c40
+#define R200_PP_TXFILTER_3			0x2c60
+#define R200_PP_TXFILTER_4			0x2c80
+#define R200_PP_TXFILTER_5			0x2ca0
+#       define R200_MAG_FILTER_NEAREST		(0  <<  0)
+#       define R200_MAG_FILTER_LINEAR		(1  <<  0)
+#       define R200_MAG_FILTER_MASK		(1  <<  0)
+#       define R200_MIN_FILTER_NEAREST		(0  <<  1)
+#       define R200_MIN_FILTER_LINEAR		(1  <<  1)
+#       define R200_MIN_FILTER_NEAREST_MIP_NEAREST (2  <<  1)
+#       define R200_MIN_FILTER_NEAREST_MIP_LINEAR (3  <<  1)
+#       define R200_MIN_FILTER_LINEAR_MIP_NEAREST (6  <<  1)
+#       define R200_MIN_FILTER_LINEAR_MIP_LINEAR (7  <<  1)
+#       define R200_MIN_FILTER_ANISO_NEAREST	(8  <<  1)
+#       define R200_MIN_FILTER_ANISO_LINEAR	(9  <<  1)
+#       define R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#       define R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (11 <<  1)
+#       define R200_MIN_FILTER_MASK		(15 <<  1)
+#       define R200_MAX_ANISO_1_TO_1		(0  <<  5)
+#       define R200_MAX_ANISO_2_TO_1		(1  <<  5)
+#       define R200_MAX_ANISO_4_TO_1		(2  <<  5)
+#       define R200_MAX_ANISO_8_TO_1		(3  <<  5)
+#       define R200_MAX_ANISO_16_TO_1		(4  <<  5)
+#       define R200_MAX_ANISO_MASK		(7  <<  5)
+#       define R200_MAX_MIP_LEVEL_MASK		(0x0f << 16)
+#       define R200_MAX_MIP_LEVEL_SHIFT		16
+#       define R200_YUV_TO_RGB			(1  << 20)
+#       define R200_YUV_TEMPERATURE_COOL	(0  << 21)
+#       define R200_YUV_TEMPERATURE_HOT		(1  << 21)
+#       define R200_YUV_TEMPERATURE_MASK	(1  << 21)
+#       define R200_WRAPEN_S			(1  << 22)
+#       define R200_CLAMP_S_WRAP		(0  << 23)
+#       define R200_CLAMP_S_MIRROR		(1  << 23)
+#       define R200_CLAMP_S_CLAMP_LAST		(2  << 23)
+#       define R200_CLAMP_S_MIRROR_CLAMP_LAST	(3  << 23)
+#       define R200_CLAMP_S_CLAMP_BORDER	(4  << 23)
+#       define R200_CLAMP_S_MIRROR_CLAMP_BORDER	(5  << 23)
+#       define R200_CLAMP_S_CLAMP_GL		(6  << 23)
+#       define R200_CLAMP_S_MIRROR_CLAMP_GL	(7  << 23)
+#       define R200_CLAMP_S_MASK		(7  << 23)
+#       define R200_WRAPEN_T			(1  << 26)
+#       define R200_CLAMP_T_WRAP		(0  << 27)
+#       define R200_CLAMP_T_MIRROR		(1  << 27)
+#       define R200_CLAMP_T_CLAMP_LAST		(2  << 27)
+#       define R200_CLAMP_T_MIRROR_CLAMP_LAST	(3  << 27)
+#       define R200_CLAMP_T_CLAMP_BORDER	(4  << 27)
+#       define R200_CLAMP_T_MIRROR_CLAMP_BORDER	(5  << 27)
+#       define R200_CLAMP_T_CLAMP_GL		(6  << 27)
+#       define R200_CLAMP_T_MIRROR_CLAMP_GL	(7  << 27)
+#       define R200_CLAMP_T_MASK		(7  << 27)
+#       define R200_KILL_LT_ZERO		(1  << 30)
+#       define R200_BORDER_MODE_OGL		(0  << 31)
+#       define R200_BORDER_MODE_D3D		(1  << 31)
+#define R200_PP_TXFORMAT_0			0x2c04
+#define R200_PP_TXFORMAT_1			0x2c24
+#define R200_PP_TXFORMAT_2			0x2c44
+#define R200_PP_TXFORMAT_3			0x2c64
+#define R200_PP_TXFORMAT_4			0x2c84
+#define R200_PP_TXFORMAT_5			0x2ca4
+#       define R200_TXFORMAT_I8			(0 << 0)
+#       define R200_TXFORMAT_AI88		(1 << 0)
+#       define R200_TXFORMAT_RGB332		(2 << 0)
+#       define R200_TXFORMAT_ARGB1555		(3 << 0)
+#       define R200_TXFORMAT_RGB565		(4 << 0)
+#       define R200_TXFORMAT_ARGB4444		(5 << 0)
+#       define R200_TXFORMAT_ARGB8888		(6 << 0)
+#       define R200_TXFORMAT_RGBA8888		(7 << 0)
+#       define R200_TXFORMAT_Y8			(8 << 0)
+#       define R200_TXFORMAT_AVYU4444		(9 << 0)
+#       define R200_TXFORMAT_VYUY422		(10 << 0)
+#       define R200_TXFORMAT_YVYU422		(11 << 0)
+#       define R200_TXFORMAT_DXT1		(12 << 0)
+#       define R200_TXFORMAT_DXT23		(14 << 0)
+#       define R200_TXFORMAT_DXT45		(15 << 0)
+#       define R200_TXFORMAT_ABGR8888		(22 << 0)
+#       define R200_TXFORMAT_FORMAT_MASK	(31 <<	0)
+#       define R200_TXFORMAT_FORMAT_SHIFT	0
+#       define R200_TXFORMAT_ALPHA_IN_MAP	(1 << 6)
+#       define R200_TXFORMAT_NON_POWER2		(1 << 7)
+#       define R200_TXFORMAT_WIDTH_MASK		(15 <<	8)
+#       define R200_TXFORMAT_WIDTH_SHIFT	8
+#       define R200_TXFORMAT_HEIGHT_MASK	(15 << 12)
+#       define R200_TXFORMAT_HEIGHT_SHIFT	12
+#       define R200_TXFORMAT_F5_WIDTH_MASK	(15 << 16)	/* cube face 5 */
+#       define R200_TXFORMAT_F5_WIDTH_SHIFT	16
+#       define R200_TXFORMAT_F5_HEIGHT_MASK	(15 << 20)
+#       define R200_TXFORMAT_F5_HEIGHT_SHIFT	20
+#       define R200_TXFORMAT_ST_ROUTE_STQ0	(0 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_STQ1	(1 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_STQ2	(2 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_STQ3	(3 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_STQ4	(4 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_STQ5	(5 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_MASK	(7 << 24)
+#       define R200_TXFORMAT_ST_ROUTE_SHIFT	24
+#       define R200_TXFORMAT_ALPHA_MASK_ENABLE	(1 << 28)
+#       define R200_TXFORMAT_CHROMA_KEY_ENABLE	(1 << 29)
+#       define R200_TXFORMAT_CUBIC_MAP_ENABLE		(1 << 30)
+#define R200_PP_TXFORMAT_X_0                    0x2c08
+#define R200_PP_TXFORMAT_X_1                    0x2c28
+#define R200_PP_TXFORMAT_X_2                    0x2c48
+#define R200_PP_TXFORMAT_X_3                    0x2c68
+#define R200_PP_TXFORMAT_X_4                    0x2c88
+#define R200_PP_TXFORMAT_X_5                    0x2ca8
+
+#define R200_PP_TXSIZE_0			0x2c0c /* NPOT only */
+#define R200_PP_TXSIZE_1			0x2c2c /* NPOT only */
+#define R200_PP_TXSIZE_2			0x2c4c /* NPOT only */
+#define R200_PP_TXSIZE_3			0x2c6c /* NPOT only */
+#define R200_PP_TXSIZE_4			0x2c8c /* NPOT only */
+#define R200_PP_TXSIZE_5			0x2cac /* NPOT only */
+
+#define R200_PP_TXPITCH_0                       0x2c10 /* NPOT only */
+#define R200_PP_TXPITCH_1			0x2c30 /* NPOT only */
+#define R200_PP_TXPITCH_2			0x2c50 /* NPOT only */
+#define R200_PP_TXPITCH_3			0x2c70 /* NPOT only */
+#define R200_PP_TXPITCH_4			0x2c90 /* NPOT only */
+#define R200_PP_TXPITCH_5			0x2cb0 /* NPOT only */
+
+#define R200_PP_TXOFFSET_0			0x2d00
+#       define R200_TXO_ENDIAN_NO_SWAP		(0 << 0)
+#       define R200_TXO_ENDIAN_BYTE_SWAP	(1 << 0)
+#       define R200_TXO_ENDIAN_WORD_SWAP	(2 << 0)
+#       define R200_TXO_ENDIAN_HALFDW_SWAP	(3 << 0)
+#       define R200_TXO_MACRO_LINEAR		(0 << 2)
+#       define R200_TXO_MACRO_TILE		(1 << 2)
+#       define R200_TXO_MICRO_LINEAR		(0 << 3)
+#       define R200_TXO_MICRO_TILE		(1 << 3)
+#       define R200_TXO_OFFSET_MASK		0xffffffe0
+#       define R200_TXO_OFFSET_SHIFT		5
+#define R200_PP_TXOFFSET_1			0x2d18
+#define R200_PP_TXOFFSET_2			0x2d30
+#define R200_PP_TXOFFSET_3			0x2d48
+#define R200_PP_TXOFFSET_4			0x2d60
+#define R200_PP_TXOFFSET_5			0x2d78
+
+#define R200_PP_TFACTOR_0			0x2ee0
+#define R200_PP_TFACTOR_1			0x2ee4
+#define R200_PP_TFACTOR_2			0x2ee8
+#define R200_PP_TFACTOR_3			0x2eec
+#define R200_PP_TFACTOR_4			0x2ef0
+#define R200_PP_TFACTOR_5			0x2ef4
+
+#define R200_PP_TXCBLEND_0			0x2f00
+#       define R200_TXC_ARG_A_ZERO		(0)
+#       define R200_TXC_ARG_A_CURRENT_COLOR	(2)
+#       define R200_TXC_ARG_A_CURRENT_ALPHA	(3)
+#       define R200_TXC_ARG_A_DIFFUSE_COLOR	(4)
+#       define R200_TXC_ARG_A_DIFFUSE_ALPHA	(5)
+#       define R200_TXC_ARG_A_SPECULAR_COLOR	(6)
+#       define R200_TXC_ARG_A_SPECULAR_ALPHA	(7)
+#       define R200_TXC_ARG_A_TFACTOR_COLOR	(8)
+#       define R200_TXC_ARG_A_TFACTOR_ALPHA	(9)
+#       define R200_TXC_ARG_A_R0_COLOR		(10)
+#       define R200_TXC_ARG_A_R0_ALPHA		(11)
+#       define R200_TXC_ARG_A_R1_COLOR		(12)
+#       define R200_TXC_ARG_A_R1_ALPHA		(13)
+#       define R200_TXC_ARG_A_R2_COLOR		(14)
+#       define R200_TXC_ARG_A_R2_ALPHA		(15)
+#       define R200_TXC_ARG_A_R3_COLOR		(16)
+#       define R200_TXC_ARG_A_R3_ALPHA		(17)
+#       define R200_TXC_ARG_A_R4_COLOR		(18)
+#       define R200_TXC_ARG_A_R4_ALPHA		(19)
+#       define R200_TXC_ARG_A_R5_COLOR		(20)
+#       define R200_TXC_ARG_A_R5_ALPHA		(21)
+#       define R200_TXC_ARG_A_TFACTOR1_COLOR	(26)
+#       define R200_TXC_ARG_A_TFACTOR1_ALPHA	(27)
+#       define R200_TXC_ARG_A_MASK		(31 << 0)
+#       define R200_TXC_ARG_A_SHIFT		0
+#       define R200_TXC_ARG_B_ZERO		(0 << 5)
+#       define R200_TXC_ARG_B_CURRENT_COLOR	(2 << 5)
+#       define R200_TXC_ARG_B_CURRENT_ALPHA	(3 << 5)
+#       define R200_TXC_ARG_B_DIFFUSE_COLOR	(4 << 5)
+#       define R200_TXC_ARG_B_DIFFUSE_ALPHA	(5 << 5)
+#       define R200_TXC_ARG_B_SPECULAR_COLOR	(6 << 5)
+#       define R200_TXC_ARG_B_SPECULAR_ALPHA	(7 << 5)
+#       define R200_TXC_ARG_B_TFACTOR_COLOR	(8 << 5)
+#       define R200_TXC_ARG_B_TFACTOR_ALPHA	(9 << 5)
+#       define R200_TXC_ARG_B_R0_COLOR		(10 << 5)
+#       define R200_TXC_ARG_B_R0_ALPHA		(11 << 5)
+#       define R200_TXC_ARG_B_R1_COLOR		(12 << 5)
+#       define R200_TXC_ARG_B_R1_ALPHA		(13 << 5)
+#       define R200_TXC_ARG_B_R2_COLOR		(14 << 5)
+#       define R200_TXC_ARG_B_R2_ALPHA		(15 << 5)
+#       define R200_TXC_ARG_B_R3_COLOR		(16 << 5)
+#       define R200_TXC_ARG_B_R3_ALPHA		(17 << 5)
+#       define R200_TXC_ARG_B_R4_COLOR		(18 << 5)
+#       define R200_TXC_ARG_B_R4_ALPHA		(19 << 5)
+#       define R200_TXC_ARG_B_R5_COLOR		(20 << 5)
+#       define R200_TXC_ARG_B_R5_ALPHA		(21 << 5)
+#       define R200_TXC_ARG_B_TFACTOR1_COLOR	(26 << 5)
+#       define R200_TXC_ARG_B_TFACTOR1_ALPHA	(27 << 5)
+#       define R200_TXC_ARG_B_MASK		(31 << 5)
+#       define R200_TXC_ARG_B_SHIFT		5
+#       define R200_TXC_ARG_C_ZERO		(0 << 10)
+#       define R200_TXC_ARG_C_CURRENT_COLOR	(2 << 10)
+#       define R200_TXC_ARG_C_CURRENT_ALPHA	(3 << 10)
+#       define R200_TXC_ARG_C_DIFFUSE_COLOR	(4 << 10)
+#       define R200_TXC_ARG_C_DIFFUSE_ALPHA	(5 << 10)
+#       define R200_TXC_ARG_C_SPECULAR_COLOR	(6 << 10)
+#       define R200_TXC_ARG_C_SPECULAR_ALPHA	(7 << 10)
+#       define R200_TXC_ARG_C_TFACTOR_COLOR	(8 << 10)
+#       define R200_TXC_ARG_C_TFACTOR_ALPHA	(9 << 10)
+#       define R200_TXC_ARG_C_R0_COLOR		(10 << 10)
+#       define R200_TXC_ARG_C_R0_ALPHA		(11 << 10)
+#       define R200_TXC_ARG_C_R1_COLOR		(12 << 10)
+#       define R200_TXC_ARG_C_R1_ALPHA		(13 << 10)
+#       define R200_TXC_ARG_C_R2_COLOR		(14 << 10)
+#       define R200_TXC_ARG_C_R2_ALPHA		(15 << 10)
+#       define R200_TXC_ARG_C_R3_COLOR		(16 << 10)
+#       define R200_TXC_ARG_C_R3_ALPHA		(17 << 10)
+#       define R200_TXC_ARG_C_R4_COLOR		(18 << 10)
+#       define R200_TXC_ARG_C_R4_ALPHA		(19 << 10)
+#       define R200_TXC_ARG_C_R5_COLOR		(20 << 10)
+#       define R200_TXC_ARG_C_R5_ALPHA		(21 << 10)
+#       define R200_TXC_ARG_C_TFACTOR1_COLOR	(26 << 10)
+#       define R200_TXC_ARG_C_TFACTOR1_ALPHA	(27 << 10)
+#       define R200_TXC_ARG_C_MASK		(31 << 10)
+#       define R200_TXC_ARG_C_SHIFT		10
+#       define R200_TXC_COMP_ARG_A		(1 << 16)
+#       define R200_TXC_COMP_ARG_A_SHIFT	(16)
+#       define R200_TXC_BIAS_ARG_A		(1 << 17)
+#       define R200_TXC_SCALE_ARG_A		(1 << 18)
+#       define R200_TXC_NEG_ARG_A		(1 << 19)
+#       define R200_TXC_COMP_ARG_B		(1 << 20)
+#       define R200_TXC_COMP_ARG_B_SHIFT	(20)
+#       define R200_TXC_BIAS_ARG_B		(1 << 21)
+#       define R200_TXC_SCALE_ARG_B		(1 << 22)
+#       define R200_TXC_NEG_ARG_B		(1 << 23)
+#       define R200_TXC_COMP_ARG_C		(1 << 24)
+#       define R200_TXC_COMP_ARG_C_SHIFT	(24)
+#       define R200_TXC_BIAS_ARG_C		(1 << 25)
+#       define R200_TXC_SCALE_ARG_C		(1 << 26)
+#       define R200_TXC_NEG_ARG_C		(1 << 27)
+#       define R200_TXC_OP_MADD			(0 << 28)
+#       define R200_TXC_OP_CND0			(2 << 28)
+#       define R200_TXC_OP_LERP			(3 << 28)
+#       define R200_TXC_OP_DOT3			(4 << 28)
+#       define R200_TXC_OP_DOT4			(5 << 28)
+#       define R200_TXC_OP_CONDITIONAL		(6 << 28)
+#       define R200_TXC_OP_DOT2_ADD		(7 << 28)
+#       define R200_TXC_OP_MASK			(7 << 28)
+#define R200_PP_TXCBLEND2_0		0x2f04
+#       define R200_TXC_TFACTOR_SEL_SHIFT	0
+#       define R200_TXC_TFACTOR_SEL_MASK	0x7
+#       define R200_TXC_TFACTOR1_SEL_SHIFT	4
+#       define R200_TXC_TFACTOR1_SEL_MASK	(0x7 << 4)
+#       define R200_TXC_SCALE_SHIFT		8
+#       define R200_TXC_SCALE_MASK		(7 << 8)
+#       define R200_TXC_SCALE_1X		(0 << 8)
+#       define R200_TXC_SCALE_2X		(1 << 8)
+#       define R200_TXC_SCALE_4X		(2 << 8)
+#       define R200_TXC_SCALE_8X		(3 << 8)
+#       define R200_TXC_SCALE_INV2		(5 << 8)
+#       define R200_TXC_SCALE_INV4		(6 << 8)
+#       define R200_TXC_SCALE_INV8		(7 << 8)
+#       define R200_TXC_CLAMP_SHIFT		12
+#       define R200_TXC_CLAMP_MASK		(3 << 12)
+#       define R200_TXC_CLAMP_WRAP		(0 << 12)
+#       define R200_TXC_CLAMP_0_1		(1 << 12)
+#       define R200_TXC_CLAMP_8_8		(2 << 12)
+#       define R200_TXC_OUTPUT_REG_MASK		(7 << 16)
+#       define R200_TXC_OUTPUT_REG_NONE		(0 << 16)
+#       define R200_TXC_OUTPUT_REG_R0		(1 << 16)
+#       define R200_TXC_OUTPUT_REG_R1		(2 << 16)
+#       define R200_TXC_OUTPUT_REG_R2		(3 << 16)
+#       define R200_TXC_OUTPUT_REG_R3		(4 << 16)
+#       define R200_TXC_OUTPUT_REG_R4		(5 << 16)
+#       define R200_TXC_OUTPUT_REG_R5		(6 << 16)
+#       define R200_TXC_OUTPUT_MASK_MASK	(7 << 20)
+#       define R200_TXC_OUTPUT_MASK_RGB		(0 << 20)
+#       define R200_TXC_OUTPUT_MASK_RG		(1 << 20)
+#       define R200_TXC_OUTPUT_MASK_RB		(2 << 20)
+#       define R200_TXC_OUTPUT_MASK_R		(3 << 20)
+#       define R200_TXC_OUTPUT_MASK_GB		(4 << 20)
+#       define R200_TXC_OUTPUT_MASK_G		(5 << 20)
+#       define R200_TXC_OUTPUT_MASK_B		(6 << 20)
+#       define R200_TXC_OUTPUT_MASK_NONE	(7 << 20)
+#       define R200_TXC_REPL_NORMAL		0
+#       define R200_TXC_REPL_RED		1
+#       define R200_TXC_REPL_GREEN		2
+#       define R200_TXC_REPL_BLUE		3
+#       define R200_TXC_REPL_ARG_A_SHIFT	26
+#       define R200_TXC_REPL_ARG_A_MASK		(3 << 26)
+#       define R200_TXC_REPL_ARG_B_SHIFT	28
+#       define R200_TXC_REPL_ARG_B_MASK		(3 << 28)
+#       define R200_TXC_REPL_ARG_C_SHIFT	30
+#       define R200_TXC_REPL_ARG_C_MASK		(3 << 30)
+#define R200_PP_TXABLEND_0			0x2f08
+#       define R200_TXA_ARG_A_ZERO		(0)
+#       define R200_TXA_ARG_A_CURRENT_ALPHA	(2) /* guess */
+#       define R200_TXA_ARG_A_CURRENT_BLUE	(3) /* guess */
+#       define R200_TXA_ARG_A_DIFFUSE_ALPHA	(4)
+#       define R200_TXA_ARG_A_DIFFUSE_BLUE	(5)
+#       define R200_TXA_ARG_A_SPECULAR_ALPHA	(6)
+#       define R200_TXA_ARG_A_SPECULAR_BLUE	(7)
+#       define R200_TXA_ARG_A_TFACTOR_ALPHA	(8)
+#       define R200_TXA_ARG_A_TFACTOR_BLUE	(9)
+#       define R200_TXA_ARG_A_R0_ALPHA		(10)
+#       define R200_TXA_ARG_A_R0_BLUE		(11)
+#       define R200_TXA_ARG_A_R1_ALPHA		(12)
+#       define R200_TXA_ARG_A_R1_BLUE		(13)
+#       define R200_TXA_ARG_A_R2_ALPHA		(14)
+#       define R200_TXA_ARG_A_R2_BLUE		(15)
+#       define R200_TXA_ARG_A_R3_ALPHA		(16)
+#       define R200_TXA_ARG_A_R3_BLUE		(17)
+#       define R200_TXA_ARG_A_R4_ALPHA		(18)
+#       define R200_TXA_ARG_A_R4_BLUE		(19)
+#       define R200_TXA_ARG_A_R5_ALPHA		(20)
+#       define R200_TXA_ARG_A_R5_BLUE		(21)
+#       define R200_TXA_ARG_A_TFACTOR1_ALPHA	(26)
+#       define R200_TXA_ARG_A_TFACTOR1_BLUE	(27)
+#       define R200_TXA_ARG_A_MASK		(31 << 0)
+#       define R200_TXA_ARG_A_SHIFT		0
+#       define R200_TXA_ARG_B_ZERO		(0 << 5)
+#       define R200_TXA_ARG_B_CURRENT_ALPHA	(2 << 5) /* guess */
+#       define R200_TXA_ARG_B_CURRENT_BLUE	(3 << 5) /* guess */
+#       define R200_TXA_ARG_B_DIFFUSE_ALPHA	(4 << 5)
+#       define R200_TXA_ARG_B_DIFFUSE_BLUE	(5 << 5)
+#       define R200_TXA_ARG_B_SPECULAR_ALPHA	(6 << 5)
+#       define R200_TXA_ARG_B_SPECULAR_BLUE	(7 << 5)
+#       define R200_TXA_ARG_B_TFACTOR_ALPHA	(8 << 5)
+#       define R200_TXA_ARG_B_TFACTOR_BLUE	(9 << 5)
+#       define R200_TXA_ARG_B_R0_ALPHA		(10 << 5)
+#       define R200_TXA_ARG_B_R0_BLUE		(11 << 5)
+#       define R200_TXA_ARG_B_R1_ALPHA		(12 << 5)
+#       define R200_TXA_ARG_B_R1_BLUE		(13 << 5)
+#       define R200_TXA_ARG_B_R2_ALPHA		(14 << 5)
+#       define R200_TXA_ARG_B_R2_BLUE		(15 << 5)
+#       define R200_TXA_ARG_B_R3_ALPHA		(16 << 5)
+#       define R200_TXA_ARG_B_R3_BLUE		(17 << 5)
+#       define R200_TXA_ARG_B_R4_ALPHA		(18 << 5)
+#       define R200_TXA_ARG_B_R4_BLUE		(19 << 5)
+#       define R200_TXA_ARG_B_R5_ALPHA		(20 << 5)
+#       define R200_TXA_ARG_B_R5_BLUE		(21 << 5)
+#       define R200_TXA_ARG_B_TFACTOR1_ALPHA	(26 << 5)
+#       define R200_TXA_ARG_B_TFACTOR1_BLUE	(27 << 5)
+#       define R200_TXA_ARG_B_MASK		(31 << 5)
+#       define R200_TXA_ARG_B_SHIFT			5
+#       define R200_TXA_ARG_C_ZERO		(0 << 10)
+#       define R200_TXA_ARG_C_CURRENT_ALPHA	(2 << 10) /* guess */
+#       define R200_TXA_ARG_C_CURRENT_BLUE	(3 << 10) /* guess */
+#       define R200_TXA_ARG_C_DIFFUSE_ALPHA	(4 << 10)
+#       define R200_TXA_ARG_C_DIFFUSE_BLUE	(5 << 10)
+#       define R200_TXA_ARG_C_SPECULAR_ALPHA	(6 << 10)
+#       define R200_TXA_ARG_C_SPECULAR_BLUE	(7 << 10)
+#       define R200_TXA_ARG_C_TFACTOR_ALPHA	(8 << 10)
+#       define R200_TXA_ARG_C_TFACTOR_BLUE	(9 << 10)
+#       define R200_TXA_ARG_C_R0_ALPHA		(10 << 10)
+#       define R200_TXA_ARG_C_R0_BLUE		(11 << 10)
+#       define R200_TXA_ARG_C_R1_ALPHA		(12 << 10)
+#       define R200_TXA_ARG_C_R1_BLUE		(13 << 10)
+#       define R200_TXA_ARG_C_R2_ALPHA		(14 << 10)
+#       define R200_TXA_ARG_C_R2_BLUE		(15 << 10)
+#       define R200_TXA_ARG_C_R3_ALPHA		(16 << 10)
+#       define R200_TXA_ARG_C_R3_BLUE		(17 << 10)
+#       define R200_TXA_ARG_C_R4_ALPHA		(18 << 10)
+#       define R200_TXA_ARG_C_R4_BLUE		(19 << 10)
+#       define R200_TXA_ARG_C_R5_ALPHA		(20 << 10)
+#       define R200_TXA_ARG_C_R5_BLUE		(21 << 10)
+#       define R200_TXA_ARG_C_TFACTOR1_ALPHA	(26 << 10)
+#       define R200_TXA_ARG_C_TFACTOR1_BLUE	(27 << 10)
+#       define R200_TXA_ARG_C_MASK		(31 << 10)
+#       define R200_TXA_ARG_C_SHIFT		10
+#       define R200_TXA_COMP_ARG_A		(1 << 16)
+#       define R200_TXA_COMP_ARG_A_SHIFT	(16)
+#       define R200_TXA_BIAS_ARG_A		(1 << 17)
+#       define R200_TXA_SCALE_ARG_A		(1 << 18)
+#       define R200_TXA_NEG_ARG_A		(1 << 19)
+#       define R200_TXA_COMP_ARG_B		(1 << 20)
+#       define R200_TXA_COMP_ARG_B_SHIFT	(20)
+#       define R200_TXA_BIAS_ARG_B		(1 << 21)
+#       define R200_TXA_SCALE_ARG_B		(1 << 22)
+#       define R200_TXA_NEG_ARG_B		(1 << 23)
+#       define R200_TXA_COMP_ARG_C		(1 << 24)
+#       define R200_TXA_COMP_ARG_C_SHIFT	(24)
+#       define R200_TXA_BIAS_ARG_C		(1 << 25)
+#       define R200_TXA_SCALE_ARG_C		(1 << 26)
+#       define R200_TXA_NEG_ARG_C		(1 << 27)
+#       define R200_TXA_OP_MADD			(0 << 28)
+#       define R200_TXA_OP_CND0			(2 << 28)
+#       define R200_TXA_OP_LERP			(3 << 28)
+#       define R200_TXA_OP_CONDITIONAL		(6 << 28)
+#       define R200_TXA_OP_MASK			(7 << 28)
+#define R200_PP_TXABLEND2_0			0x2f0c
+#       define R200_TXA_TFACTOR_SEL_SHIFT	0
+#       define R200_TXA_TFACTOR_SEL_MASK	0x7
+#       define R200_TXA_TFACTOR1_SEL_SHIFT	4
+#       define R200_TXA_TFACTOR1_SEL_MASK	(0x7 << 4)
+#       define R200_TXA_SCALE_SHIFT		8
+#       define R200_TXA_SCALE_MASK		(7 << 8)
+#       define R200_TXA_SCALE_1X		(0 << 8)
+#       define R200_TXA_SCALE_2X		(1 << 8)
+#       define R200_TXA_SCALE_4X		(2 << 8)
+#       define R200_TXA_SCALE_8X		(3 << 8)
+#       define R200_TXA_SCALE_INV2		(5 << 8)
+#       define R200_TXA_SCALE_INV4		(6 << 8)
+#       define R200_TXA_SCALE_INV8		(7 << 8)
+#       define R200_TXA_CLAMP_SHIFT		12
+#       define R200_TXA_CLAMP_MASK		(3 << 12)
+#       define R200_TXA_CLAMP_WRAP		(0 << 12)
+#       define R200_TXA_CLAMP_0_1		(1 << 12)
+#       define R200_TXA_CLAMP_8_8		(2 << 12)
+#       define R200_TXA_OUTPUT_REG_MASK		(7 << 16)
+#       define R200_TXA_OUTPUT_REG_NONE		(0 << 16)
+#       define R200_TXA_OUTPUT_REG_R0		(1 << 16)
+#       define R200_TXA_OUTPUT_REG_R1		(2 << 16)
+#       define R200_TXA_OUTPUT_REG_R2		(3 << 16)
+#       define R200_TXA_OUTPUT_REG_R3		(4 << 16)
+#       define R200_TXA_OUTPUT_REG_R4		(5 << 16)
+#       define R200_TXA_OUTPUT_REG_R5		(6 << 16)
+#       define R200_TXA_DOT_ALPHA		(1 << 20)
+#       define R200_TXA_REPL_NORMAL		0
+#       define R200_TXA_REPL_RED		1
+#       define R200_TXA_REPL_GREEN		2
+#       define R200_TXA_REPL_ARG_A_SHIFT	26
+#       define R200_TXA_REPL_ARG_A_MASK		(3 << 26)
+#       define R200_TXA_REPL_ARG_B_SHIFT	28
+#       define R200_TXA_REPL_ARG_B_MASK		(3 << 28)
+#       define R200_TXA_REPL_ARG_C_SHIFT	30
+#       define R200_TXA_REPL_ARG_C_MASK		(3 << 30)
+
+#define R200_SE_VTX_FMT_0			0x2088
+#       define R200_VTX_XY			0 /* always have xy */
+#       define R200_VTX_Z0			(1<<0)
+#       define R200_VTX_W0			(1<<1)
+#       define R200_VTX_WEIGHT_COUNT_SHIFT	(2)
+#       define R200_VTX_PV_MATRIX_SEL		(1<<5)
+#       define R200_VTX_N0			(1<<6)
+#       define R200_VTX_POINT_SIZE		(1<<7)
+#       define R200_VTX_DISCRETE_FOG		(1<<8)
+#       define R200_VTX_SHININESS_0		(1<<9)
+#       define R200_VTX_SHININESS_1		(1<<10)
+#       define   R200_VTX_COLOR_NOT_PRESENT	0
+#       define   R200_VTX_PK_RGBA		1
+#       define   R200_VTX_FP_RGB		2
+#       define   R200_VTX_FP_RGBA		3
+#       define   R200_VTX_COLOR_MASK		3
+#       define R200_VTX_COLOR_0_SHIFT		11
+#       define R200_VTX_COLOR_1_SHIFT		13
+#       define R200_VTX_COLOR_2_SHIFT		15
+#       define R200_VTX_COLOR_3_SHIFT		17
+#       define R200_VTX_COLOR_4_SHIFT		19
+#       define R200_VTX_COLOR_5_SHIFT		21
+#       define R200_VTX_COLOR_6_SHIFT		23
+#       define R200_VTX_COLOR_7_SHIFT		25
+#       define R200_VTX_XY1			(1<<28)
+#       define R200_VTX_Z1			(1<<29)
+#       define R200_VTX_W1			(1<<30)
+#       define R200_VTX_N1			(1<<31)
+#define R200_SE_VTX_FMT_1			0x208c
+#       define R200_VTX_TEX0_COMP_CNT_SHIFT	0
+#       define R200_VTX_TEX1_COMP_CNT_SHIFT	3
+#       define R200_VTX_TEX2_COMP_CNT_SHIFT	6
+#       define R200_VTX_TEX3_COMP_CNT_SHIFT	9
+#       define R200_VTX_TEX4_COMP_CNT_SHIFT	12
+#       define R200_VTX_TEX5_COMP_CNT_SHIFT	15
+
+#define R200_SE_TCL_OUTPUT_VTX_FMT_0		0x2090
+#define R200_SE_TCL_OUTPUT_VTX_FMT_1		0x2094
+#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL		0x2250
+#       define R200_OUTPUT_XYZW			(1<<0)
+#       define R200_OUTPUT_COLOR_0		(1<<8)
+#       define R200_OUTPUT_COLOR_1		(1<<9)
+#       define R200_OUTPUT_TEX_0		(1<<16)
+#       define R200_OUTPUT_TEX_1		(1<<17)
+#       define R200_OUTPUT_TEX_2		(1<<18)
+#       define R200_OUTPUT_TEX_3		(1<<19)
+#       define R200_OUTPUT_TEX_4		(1<<20)
+#       define R200_OUTPUT_TEX_5		(1<<21)
+#       define R200_OUTPUT_TEX_MASK		(0x3f<<16)
+#       define R200_OUTPUT_DISCRETE_FOG		(1<<24)
+#       define R200_OUTPUT_PT_SIZE		(1<<25)
+#       define R200_FORCE_INORDER_PROC		(1<<31)
+#define R200_PP_CNTL_X				0x2cc4
+#define R200_PP_TXMULTI_CTL_0			0x2c1c
+#define R200_SE_VTX_STATE_CNTL			0x2180
+#       define R200_UPDATE_USER_COLOR_0_ENA_MASK (1<<16)
+
+				/* Registers for CP and Microcode Engine */
+#define RADEON_CP_ME_RAM_ADDR               0x07d4
+#define RADEON_CP_ME_RAM_RADDR              0x07d8
+#define RADEON_CP_ME_RAM_DATAH              0x07dc
+#define RADEON_CP_ME_RAM_DATAL              0x07e0
+
+#define RADEON_CP_RB_BASE                   0x0700
+#define RADEON_CP_RB_CNTL                   0x0704
+#	define RADEON_RB_BUFSZ_SHIFT		0
+#	define RADEON_RB_BUFSZ_MASK		(0x3f << 0)
+#	define RADEON_RB_BLKSZ_SHIFT		8
+#	define RADEON_RB_BLKSZ_MASK		(0x3f << 8)
+#	define RADEON_MAX_FETCH_SHIFT		18
+#	define RADEON_MAX_FETCH_MASK		(0x3 << 18)
+#	define RADEON_RB_NO_UPDATE		(1 << 27)
+#	define RADEON_RB_RPTR_WR_ENA		(1 << 31)
+#define RADEON_CP_RB_RPTR_ADDR              0x070c
+#define RADEON_CP_RB_RPTR                   0x0710
+#define RADEON_CP_RB_WPTR                   0x0714
+#define RADEON_CP_RB_RPTR_WR                0x071c
+
+#define RADEON_CP_IB_BASE                   0x0738
+#define RADEON_CP_IB_BUFSZ                  0x073c
+
+#define RADEON_CP_CSQ_CNTL                  0x0740
+#       define RADEON_CSQ_CNT_PRIMARY_MASK     (0xff << 0)
+#       define RADEON_CSQ_PRIDIS_INDDIS        (0    << 28)
+#       define RADEON_CSQ_PRIPIO_INDDIS        (1    << 28)
+#       define RADEON_CSQ_PRIBM_INDDIS         (2    << 28)
+#       define RADEON_CSQ_PRIPIO_INDBM         (3    << 28)
+#       define RADEON_CSQ_PRIBM_INDBM          (4    << 28)
+#       define RADEON_CSQ_PRIPIO_INDPIO        (15   << 28)
+
+#define R300_CP_RESYNC_ADDR                 0x778
+#define R300_CP_RESYNC_DATA                 0x77c
+
+#define RADEON_CP_CSQ_STAT                  0x07f8
+#       define RADEON_CSQ_RPTR_PRIMARY_MASK    (0xff <<  0)
+#       define RADEON_CSQ_WPTR_PRIMARY_MASK    (0xff <<  8)
+#       define RADEON_CSQ_RPTR_INDIRECT_MASK   (0xff << 16)
+#       define RADEON_CSQ_WPTR_INDIRECT_MASK   (0xff << 24)
+#define RADEON_CP_CSQ2_STAT                  0x07fc
+#define RADEON_CP_CSQ_ADDR                  0x07f0
+#define RADEON_CP_CSQ_DATA                  0x07f4
+#define RADEON_CP_CSQ_APER_PRIMARY          0x1000
+#define RADEON_CP_CSQ_APER_INDIRECT         0x1300
+
+#define RADEON_CP_RB_WPTR_DELAY             0x0718
+#       define RADEON_PRE_WRITE_TIMER_SHIFT    0
+#       define RADEON_PRE_WRITE_LIMIT_SHIFT    23
+#define RADEON_CP_CSQ_MODE		0x0744
+#	define RADEON_INDIRECT2_START_SHIFT	0
+#	define RADEON_INDIRECT2_START_MASK	(0x7f << 0)
+#	define RADEON_INDIRECT1_START_SHIFT	8
+#	define RADEON_INDIRECT1_START_MASK	(0x7f << 8)
+
+#define RADEON_AIC_CNTL                     0x01d0
+#       define RADEON_PCIGART_TRANSLATE_EN     (1 << 0)
+#       define RADEON_DIS_OUT_OF_PCI_GART_ACCESS     (1 << 1)
+#define RADEON_AIC_LO_ADDR                  0x01dc
+#define RADEON_AIC_PT_BASE		0x01d8
+#define RADEON_AIC_HI_ADDR		0x01e0
+
+
+
+				/* Constants */
+/* #define RADEON_LAST_FRAME_REG               RADEON_GUI_SCRATCH_REG0 */
+/* efine RADEON_LAST_CLEAR_REG               RADEON_GUI_SCRATCH_REG2 */
+
+
+
+				/* CP packet types */
+#define RADEON_CP_PACKET0                           0x00000000
+#define RADEON_CP_PACKET1                           0x40000000
+#define RADEON_CP_PACKET2                           0x80000000
+#define RADEON_CP_PACKET3                           0xC0000000
+#       define RADEON_CP_PACKET_MASK                0xC0000000
+#       define RADEON_CP_PACKET_COUNT_MASK          0x3fff0000
+#       define RADEON_CP_PACKET_MAX_DWORDS          (1 << 12)
+#       define RADEON_CP_PACKET0_REG_MASK           0x000007ff
+#       define R300_CP_PACKET0_REG_MASK             0x00001fff
+#       define RADEON_CP_PACKET1_REG0_MASK          0x000007ff
+#       define RADEON_CP_PACKET1_REG1_MASK          0x003ff800
+
+#define RADEON_CP_PACKET0_ONE_REG_WR                0x00008000
+
+#define RADEON_CP_PACKET3_NOP                       0xC0001000
+#define RADEON_CP_PACKET3_NEXT_CHAR                 0xC0001900
+#define RADEON_CP_PACKET3_PLY_NEXTSCAN              0xC0001D00
+#define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
+#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
+#define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
+#define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
+#define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
+#define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
+#define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
+#define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
+#define R200_CP_PACKET3_3D_DRAW_IMMD_2              0xc0003500
+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
+#define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
+#define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
+#define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
+#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT         0xC0009400
+#define RADEON_CP_PACKET3_CNTL_POLYLINE             0xC0009500
+#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES        0xC0009800
+#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI          0xC0009A00
+#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI         0xC0009B00
+#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT         0xC0009C00
+
+
+#define RADEON_CP_VC_FRMT_XY                        0x00000000
+#define RADEON_CP_VC_FRMT_W0                        0x00000001
+#define RADEON_CP_VC_FRMT_FPCOLOR                   0x00000002
+#define RADEON_CP_VC_FRMT_FPALPHA                   0x00000004
+#define RADEON_CP_VC_FRMT_PKCOLOR                   0x00000008
+#define RADEON_CP_VC_FRMT_FPSPEC                    0x00000010
+#define RADEON_CP_VC_FRMT_FPFOG                     0x00000020
+#define RADEON_CP_VC_FRMT_PKSPEC                    0x00000040
+#define RADEON_CP_VC_FRMT_ST0                       0x00000080
+#define RADEON_CP_VC_FRMT_ST1                       0x00000100
+#define RADEON_CP_VC_FRMT_Q1                        0x00000200
+#define RADEON_CP_VC_FRMT_ST2                       0x00000400
+#define RADEON_CP_VC_FRMT_Q2                        0x00000800
+#define RADEON_CP_VC_FRMT_ST3                       0x00001000
+#define RADEON_CP_VC_FRMT_Q3                        0x00002000
+#define RADEON_CP_VC_FRMT_Q0                        0x00004000
+#define RADEON_CP_VC_FRMT_BLND_WEIGHT_CNT_MASK      0x00038000
+#define RADEON_CP_VC_FRMT_N0                        0x00040000
+#define RADEON_CP_VC_FRMT_XY1                       0x08000000
+#define RADEON_CP_VC_FRMT_Z1                        0x10000000
+#define RADEON_CP_VC_FRMT_W1                        0x20000000
+#define RADEON_CP_VC_FRMT_N1                        0x40000000
+#define RADEON_CP_VC_FRMT_Z                         0x80000000
+
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_NONE            0x00000000
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_POINT           0x00000001
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE            0x00000002
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP      0x00000003
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST        0x00000004
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN         0x00000005
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP       0x00000006
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_TYPE_2      0x00000007
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST       0x00000008
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST 0x00000009
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST  0x0000000a
+#define RADEON_CP_VC_CNTL_PRIM_WALK_IND             0x00000010
+#define RADEON_CP_VC_CNTL_PRIM_WALK_LIST            0x00000020
+#define RADEON_CP_VC_CNTL_PRIM_WALK_RING            0x00000030
+#define RADEON_CP_VC_CNTL_COLOR_ORDER_BGRA          0x00000000
+#define RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA          0x00000040
+#define RADEON_CP_VC_CNTL_MAOS_ENABLE               0x00000080
+#define RADEON_CP_VC_CNTL_VTX_FMT_NON_RADEON_MODE   0x00000000
+#define RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE       0x00000100
+#define RADEON_CP_VC_CNTL_TCL_DISABLE               0x00000000
+#define RADEON_CP_VC_CNTL_TCL_ENABLE                0x00000200
+#define RADEON_CP_VC_CNTL_NUM_SHIFT                 16
+
+#define RADEON_VS_MATRIX_0_ADDR                   0
+#define RADEON_VS_MATRIX_1_ADDR                   4
+#define RADEON_VS_MATRIX_2_ADDR                   8
+#define RADEON_VS_MATRIX_3_ADDR                  12
+#define RADEON_VS_MATRIX_4_ADDR                  16
+#define RADEON_VS_MATRIX_5_ADDR                  20
+#define RADEON_VS_MATRIX_6_ADDR                  24
+#define RADEON_VS_MATRIX_7_ADDR                  28
+#define RADEON_VS_MATRIX_8_ADDR                  32
+#define RADEON_VS_MATRIX_9_ADDR                  36
+#define RADEON_VS_MATRIX_10_ADDR                 40
+#define RADEON_VS_MATRIX_11_ADDR                 44
+#define RADEON_VS_MATRIX_12_ADDR                 48
+#define RADEON_VS_MATRIX_13_ADDR                 52
+#define RADEON_VS_MATRIX_14_ADDR                 56
+#define RADEON_VS_MATRIX_15_ADDR                 60
+#define RADEON_VS_LIGHT_AMBIENT_ADDR             64
+#define RADEON_VS_LIGHT_DIFFUSE_ADDR             72
+#define RADEON_VS_LIGHT_SPECULAR_ADDR            80
+#define RADEON_VS_LIGHT_DIRPOS_ADDR              88
+#define RADEON_VS_LIGHT_HWVSPOT_ADDR             96
+#define RADEON_VS_LIGHT_ATTENUATION_ADDR        104
+#define RADEON_VS_MATRIX_EYE2CLIP_ADDR          112
+#define RADEON_VS_UCP_ADDR                      116
+#define RADEON_VS_GLOBAL_AMBIENT_ADDR           122
+#define RADEON_VS_FOG_PARAM_ADDR                123
+#define RADEON_VS_EYE_VECTOR_ADDR               124
+
+#define RADEON_SS_LIGHT_DCD_ADDR                  0
+#define RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR        8
+#define RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR         16
+#define RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR     24
+#define RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR        32
+#define RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR       48
+#define RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR    49
+#define RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR       50
+#define RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR    51
+#define RADEON_SS_SHININESS                      60
+
+#define RADEON_TV_MASTER_CNTL                    0x0800
+#       define RADEON_TV_ASYNC_RST               (1 <<  0)
+#       define RADEON_CRT_ASYNC_RST              (1 <<  1)
+#       define RADEON_RESTART_PHASE_FIX          (1 <<  3)
+#	define RADEON_TV_FIFO_ASYNC_RST		 (1 <<  4)
+#	define RADEON_VIN_ASYNC_RST		 (1 <<  5)
+#	define RADEON_AUD_ASYNC_RST		 (1 <<  6)
+#	define RADEON_DVS_ASYNC_RST		 (1 <<  7)
+#       define RADEON_CRT_FIFO_CE_EN             (1 <<  9)
+#       define RADEON_TV_FIFO_CE_EN              (1 << 10)
+#       define RADEON_RE_SYNC_NOW_SEL_MASK       (3 << 14)
+#       define RADEON_TVCLK_ALWAYS_ONb           (1 << 30)
+#	define RADEON_TV_ON			 (1 << 31)
+#define RADEON_TV_PRE_DAC_MUX_CNTL               0x0888
+#       define RADEON_Y_RED_EN                   (1 << 0)
+#       define RADEON_C_GRN_EN                   (1 << 1)
+#       define RADEON_CMP_BLU_EN                 (1 << 2)
+#       define RADEON_DAC_DITHER_EN              (1 << 3)
+#       define RADEON_RED_MX_FORCE_DAC_DATA      (6 << 4)
+#       define RADEON_GRN_MX_FORCE_DAC_DATA      (6 << 8)
+#       define RADEON_BLU_MX_FORCE_DAC_DATA      (6 << 12)
+#       define RADEON_TV_FORCE_DAC_DATA_SHIFT    16
+#define RADEON_TV_RGB_CNTL                           0x0804
+#       define RADEON_SWITCH_TO_BLUE		  (1 <<  4)
+#       define RADEON_RGB_DITHER_EN		  (1 <<  5)
+#       define RADEON_RGB_SRC_SEL_MASK		  (3 <<  8)
+#       define RADEON_RGB_SRC_SEL_CRTC1		  (0 <<  8)
+#       define RADEON_RGB_SRC_SEL_RMX		  (1 <<  8)
+#       define RADEON_RGB_SRC_SEL_CRTC2		  (2 <<  8)
+#       define RADEON_RGB_CONVERT_BY_PASS	  (1 << 10)
+#       define RADEON_UVRAM_READ_MARGIN_SHIFT	  16
+#       define RADEON_FIFORAM_FFMACRO_READ_MARGIN_SHIFT	  20
+#	define RADEON_TVOUT_SCALE_EN		  (1 << 26)
+#define RADEON_TV_SYNC_CNTL                          0x0808
+#       define RADEON_SYNC_OE                     (1 <<  0)
+#       define RADEON_SYNC_OUT                    (1 <<  1)
+#       define RADEON_SYNC_IN                     (1 <<  2)
+#       define RADEON_SYNC_PUB                    (1 <<  3)
+#       define RADEON_SYNC_PD                     (1 <<  4)
+#       define RADEON_TV_SYNC_IO_DRIVE            (1 <<  5)
+#define RADEON_TV_HTOTAL                             0x080c
+#define RADEON_TV_HDISP                              0x0810
+#define RADEON_TV_HSTART                             0x0818
+#define RADEON_TV_HCOUNT                             0x081C
+#define RADEON_TV_VTOTAL                             0x0820
+#define RADEON_TV_VDISP                              0x0824
+#define RADEON_TV_VCOUNT                             0x0828
+#define RADEON_TV_FTOTAL                             0x082c
+#define RADEON_TV_FCOUNT                             0x0830
+#define RADEON_TV_FRESTART                           0x0834
+#define RADEON_TV_HRESTART                           0x0838
+#define RADEON_TV_VRESTART                           0x083c
+#define RADEON_TV_HOST_READ_DATA                     0x0840
+#define RADEON_TV_HOST_WRITE_DATA                    0x0844
+#define RADEON_TV_HOST_RD_WT_CNTL                    0x0848
+#	define RADEON_HOST_FIFO_RD		 (1 << 12)
+#	define RADEON_HOST_FIFO_RD_ACK		 (1 << 13)
+#	define RADEON_HOST_FIFO_WT		 (1 << 14)
+#	define RADEON_HOST_FIFO_WT_ACK		 (1 << 15)
+#define RADEON_TV_VSCALER_CNTL1                      0x084c
+#       define RADEON_UV_INC_MASK                0xffff
+#       define RADEON_UV_INC_SHIFT               0
+#       define RADEON_Y_W_EN			 (1 << 24)
+#       define RADEON_RESTART_FIELD              (1 << 29) /* restart on field 0 */
+#       define RADEON_Y_DEL_W_SIG_SHIFT          26
+#define RADEON_TV_TIMING_CNTL                        0x0850
+#       define RADEON_H_INC_MASK                 0xfff
+#       define RADEON_H_INC_SHIFT                0
+#       define RADEON_REQ_Y_FIRST                (1 << 19)
+#       define RADEON_FORCE_BURST_ALWAYS         (1 << 21)
+#       define RADEON_UV_POST_SCALE_BYPASS       (1 << 23)
+#       define RADEON_UV_OUTPUT_POST_SCALE_SHIFT 24
+#define RADEON_TV_VSCALER_CNTL2                      0x0854
+#       define RADEON_DITHER_MODE                (1 <<  0)
+#       define RADEON_Y_OUTPUT_DITHER_EN         (1 <<  1)
+#       define RADEON_UV_OUTPUT_DITHER_EN        (1 <<  2)
+#       define RADEON_UV_TO_BUF_DITHER_EN        (1 <<  3)
+#define RADEON_TV_Y_FALL_CNTL                        0x0858
+#       define RADEON_Y_FALL_PING_PONG           (1 << 16)
+#       define RADEON_Y_COEF_EN                  (1 << 17)
+#define RADEON_TV_Y_RISE_CNTL                        0x085c
+#       define RADEON_Y_RISE_PING_PONG           (1 << 16)
+#define RADEON_TV_Y_SAW_TOOTH_CNTL                   0x0860
+#define RADEON_TV_UPSAMP_AND_GAIN_CNTL               0x0864
+#	define RADEON_YUPSAMP_EN		 (1 <<  0)
+#	define RADEON_UVUPSAMP_EN		 (1 <<  2)
+#define RADEON_TV_GAIN_LIMIT_SETTINGS                0x0868
+#       define RADEON_Y_GAIN_LIMIT_SHIFT         0
+#       define RADEON_UV_GAIN_LIMIT_SHIFT        16
+#define RADEON_TV_LINEAR_GAIN_SETTINGS               0x086c
+#       define RADEON_Y_GAIN_SHIFT               0
+#       define RADEON_UV_GAIN_SHIFT              16
+#define RADEON_TV_MODULATOR_CNTL1                    0x0870
+#	define RADEON_YFLT_EN			 (1 <<  2)
+#	define RADEON_UVFLT_EN			 (1 <<  3)
+#       define RADEON_ALT_PHASE_EN               (1 <<  6)
+#       define RADEON_SYNC_TIP_LEVEL             (1 <<  7)
+#       define RADEON_BLANK_LEVEL_SHIFT          8
+#       define RADEON_SET_UP_LEVEL_SHIFT         16
+#	define RADEON_SLEW_RATE_LIMIT		 (1 << 23)
+#       define RADEON_CY_FILT_BLEND_SHIFT        28
+#define RADEON_TV_MODULATOR_CNTL2                    0x0874
+#       define RADEON_TV_U_BURST_LEVEL_MASK     0x1ff
+#       define RADEON_TV_V_BURST_LEVEL_MASK     0x1ff
+#       define RADEON_TV_V_BURST_LEVEL_SHIFT    16
+#define RADEON_TV_CRC_CNTL                           0x0890
+#define RADEON_TV_UV_ADR                             0x08ac
+#	define RADEON_MAX_UV_ADR_MASK		 0x000000ff
+#	define RADEON_MAX_UV_ADR_SHIFT		 0
+#	define RADEON_TABLE1_BOT_ADR_MASK	 0x0000ff00
+#	define RADEON_TABLE1_BOT_ADR_SHIFT	 8
+#	define RADEON_TABLE3_TOP_ADR_MASK	 0x00ff0000
+#	define RADEON_TABLE3_TOP_ADR_SHIFT	 16
+#	define RADEON_HCODE_TABLE_SEL_MASK	 0x06000000
+#	define RADEON_HCODE_TABLE_SEL_SHIFT	 25
+#	define RADEON_VCODE_TABLE_SEL_MASK	 0x18000000
+#	define RADEON_VCODE_TABLE_SEL_SHIFT	 27
+#	define RADEON_TV_MAX_FIFO_ADDR		 0x1a7
+#	define RADEON_TV_MAX_FIFO_ADDR_INTERNAL	 0x1ff
+#define RADEON_TV_PLL_FINE_CNTL			     0x0020	/* PLL */
+#define RADEON_TV_PLL_CNTL                           0x0021	/* PLL */
+#       define RADEON_TV_M0LO_MASK               0xff
+#       define RADEON_TV_M0HI_MASK               0x7
+#       define RADEON_TV_M0HI_SHIFT              18
+#       define RADEON_TV_N0LO_MASK               0x1ff
+#       define RADEON_TV_N0LO_SHIFT              8
+#       define RADEON_TV_N0HI_MASK               0x3
+#       define RADEON_TV_N0HI_SHIFT              21
+#       define RADEON_TV_P_MASK                  0xf
+#       define RADEON_TV_P_SHIFT                 24
+#       define RADEON_TV_SLIP_EN                 (1 << 23)
+#       define RADEON_TV_DTO_EN                  (1 << 28)
+#define RADEON_TV_PLL_CNTL1                          0x0022	/* PLL */
+#       define RADEON_TVPLL_RESET                (1 <<  1)
+#       define RADEON_TVPLL_SLEEP                (1 <<  3)
+#       define RADEON_TVPLL_REFCLK_SEL           (1 <<  4)
+#       define RADEON_TVPCP_SHIFT                8
+#       define RADEON_TVPCP_MASK                 (7 << 8)
+#       define RADEON_TVPVG_SHIFT                11
+#       define RADEON_TVPVG_MASK                 (7 << 11)
+#       define RADEON_TVPDC_SHIFT                14
+#       define RADEON_TVPDC_MASK                 (3 << 14)
+#       define RADEON_TVPLL_TEST_DIS             (1 << 31)
+#       define RADEON_TVCLK_SRC_SEL_TVPLL        (1 << 30)
+
+#define RS400_DISP2_REQ_CNTL1			0xe30
+#       define RS400_DISP2_START_REQ_LEVEL_SHIFT   0
+#       define RS400_DISP2_START_REQ_LEVEL_MASK    0x3ff
+#       define RS400_DISP2_STOP_REQ_LEVEL_SHIFT    12
+#       define RS400_DISP2_STOP_REQ_LEVEL_MASK     0x3ff
+#       define RS400_DISP2_ALLOW_FID_LEVEL_SHIFT   22
+#       define RS400_DISP2_ALLOW_FID_LEVEL_MASK    0x3ff
+#define RS400_DISP2_REQ_CNTL2			0xe34
+#       define RS400_DISP2_CRITICAL_POINT_START_SHIFT    12
+#       define RS400_DISP2_CRITICAL_POINT_START_MASK     0x3ff
+#       define RS400_DISP2_CRITICAL_POINT_STOP_SHIFT     22
+#       define RS400_DISP2_CRITICAL_POINT_STOP_MASK      0x3ff
+#define RS400_DMIF_MEM_CNTL1			0xe38
+#       define RS400_DISP2_START_ADR_SHIFT      0
+#       define RS400_DISP2_START_ADR_MASK       0x3ff
+#       define RS400_DISP1_CRITICAL_POINT_START_SHIFT    12
+#       define RS400_DISP1_CRITICAL_POINT_START_MASK     0x3ff
+#       define RS400_DISP1_CRITICAL_POINT_STOP_SHIFT     22
+#       define RS400_DISP1_CRITICAL_POINT_STOP_MASK      0x3ff
+#define RS400_DISP1_REQ_CNTL1			0xe3c
+#       define RS400_DISP1_START_REQ_LEVEL_SHIFT   0
+#       define RS400_DISP1_START_REQ_LEVEL_MASK    0x3ff
+#       define RS400_DISP1_STOP_REQ_LEVEL_SHIFT    12
+#       define RS400_DISP1_STOP_REQ_LEVEL_MASK     0x3ff
+#       define RS400_DISP1_ALLOW_FID_LEVEL_SHIFT   22
+#       define RS400_DISP1_ALLOW_FID_LEVEL_MASK    0x3ff
+
+#define RADEON_PCIE_INDEX               0x0030
+#define RADEON_PCIE_DATA                0x0034
+#define RADEON_PCIE_TX_GART_CNTL	0x10
+#	define RADEON_PCIE_TX_GART_EN		(1 << 0)
+#	define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_PASS_THRU (0 << 1)
+#	define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_CLAMP_LO  (1 << 1)
+#	define RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD   (3 << 1)
+#	define RADEON_PCIE_TX_GART_MODE_32_128_CACHE	(0 << 3)
+#	define RADEON_PCIE_TX_GART_MODE_8_4_128_CACHE	(1 << 3)
+#	define RADEON_PCIE_TX_GART_CHK_RW_VALID_EN      (1 << 5)
+#	define RADEON_PCIE_TX_GART_INVALIDATE_TLB	(1 << 8)
+#define RADEON_PCIE_TX_DISCARD_RD_ADDR_LO 0x11
+#define RADEON_PCIE_TX_DISCARD_RD_ADDR_HI 0x12
+#define RADEON_PCIE_TX_GART_BASE	0x13
+#define RADEON_PCIE_TX_GART_START_LO	0x14
+#define RADEON_PCIE_TX_GART_START_HI	0x15
+#define RADEON_PCIE_TX_GART_END_LO	0x16
+#define RADEON_PCIE_TX_GART_END_HI	0x17
+#define RADEON_PCIE_TX_GART_ERROR	0x18
+
+#define RADEON_SCRATCH_REG0		0x15e0
+#define RADEON_SCRATCH_REG1		0x15e4
+#define RADEON_SCRATCH_REG2		0x15e8
+#define RADEON_SCRATCH_REG3		0x15ec
+#define RADEON_SCRATCH_REG4		0x15f0
+#define RADEON_SCRATCH_REG5		0x15f4
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
new file mode 100644
index 00000000000..a853261d188
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/seq_file.h>
+#include "drmP.h"
+#include "radeon_drm.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+#include "atom.h"
+
+int radeon_debugfs_ib_init(struct radeon_device *rdev);
+
+/*
+ * IB.
+ */
+int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib)
+{
+	struct radeon_fence *fence;
+	struct radeon_ib *nib;
+	unsigned long i;
+	int r = 0;
+
+	*ib = NULL;
+	r = radeon_fence_create(rdev, &fence);
+	if (r) {
+		DRM_ERROR("failed to create fence for new IB\n");
+		return r;
+	}
+	mutex_lock(&rdev->ib_pool.mutex);
+	i = find_first_zero_bit(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE);
+	if (i < RADEON_IB_POOL_SIZE) {
+		set_bit(i, rdev->ib_pool.alloc_bm);
+		rdev->ib_pool.ibs[i].length_dw = 0;
+		*ib = &rdev->ib_pool.ibs[i];
+		goto out;
+	}
+	if (list_empty(&rdev->ib_pool.scheduled_ibs)) {
+		/* we go do nothings here */
+		DRM_ERROR("all IB allocated none scheduled.\n");
+		r = -EINVAL;
+		goto out;
+	}
+	/* get the first ib on the scheduled list */
+	nib = list_entry(rdev->ib_pool.scheduled_ibs.next,
+			 struct radeon_ib, list);
+	if (nib->fence == NULL) {
+		/* we go do nothings here */
+		DRM_ERROR("IB %lu scheduled without a fence.\n", nib->idx);
+		r = -EINVAL;
+		goto out;
+	}
+	r = radeon_fence_wait(nib->fence, false);
+	if (r) {
+		DRM_ERROR("radeon: IB(%lu:0x%016lX:%u)\n", nib->idx,
+			  (unsigned long)nib->gpu_addr, nib->length_dw);
+		DRM_ERROR("radeon: GPU lockup detected, fail to get a IB\n");
+		goto out;
+	}
+	radeon_fence_unref(&nib->fence);
+	nib->length_dw = 0;
+	list_del(&nib->list);
+	INIT_LIST_HEAD(&nib->list);
+	*ib = nib;
+out:
+	mutex_unlock(&rdev->ib_pool.mutex);
+	if (r) {
+		radeon_fence_unref(&fence);
+	} else {
+		(*ib)->fence = fence;
+	}
+	return r;
+}
+
+void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
+{
+	struct radeon_ib *tmp = *ib;
+
+	*ib = NULL;
+	if (tmp == NULL) {
+		return;
+	}
+	mutex_lock(&rdev->ib_pool.mutex);
+	if (!list_empty(&tmp->list) && !radeon_fence_signaled(tmp->fence)) {
+		/* IB is scheduled & not signaled don't do anythings */
+		mutex_unlock(&rdev->ib_pool.mutex);
+		return;
+	}
+	list_del(&tmp->list);
+	INIT_LIST_HEAD(&tmp->list);
+	if (tmp->fence) {
+		radeon_fence_unref(&tmp->fence);
+	}
+	tmp->length_dw = 0;
+	clear_bit(tmp->idx, rdev->ib_pool.alloc_bm);
+	mutex_unlock(&rdev->ib_pool.mutex);
+}
+
+static void radeon_ib_align(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	while ((ib->length_dw & rdev->cp.align_mask)) {
+		ib->ptr[ib->length_dw++] = PACKET2(0);
+	}
+}
+
+static void radeon_ib_cpu_flush(struct radeon_device *rdev,
+				struct radeon_ib *ib)
+{
+	unsigned long tmp;
+	unsigned i;
+
+	/* To force CPU cache flush ugly but seems reliable */
+	for (i = 0; i < ib->length_dw; i += (rdev->cp.align_mask + 1)) {
+		tmp = readl(&ib->ptr[i]);
+	}
+}
+
+int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	int r = 0;
+
+	mutex_lock(&rdev->ib_pool.mutex);
+	radeon_ib_align(rdev, ib);
+	radeon_ib_cpu_flush(rdev, ib);
+	if (!ib->length_dw || !rdev->cp.ready) {
+		/* TODO: Nothings in the ib we should report. */
+		mutex_unlock(&rdev->ib_pool.mutex);
+		DRM_ERROR("radeon: couldn't schedule IB(%lu).\n", ib->idx);
+		return -EINVAL;
+	}
+	/* 64 dwords should be enought for fence too */
+	r = radeon_ring_lock(rdev, 64);
+	if (r) {
+		DRM_ERROR("radeon: scheduling IB failled (%d).\n", r);
+		mutex_unlock(&rdev->ib_pool.mutex);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
+	radeon_ring_write(rdev, ib->gpu_addr);
+	radeon_ring_write(rdev, ib->length_dw);
+	radeon_fence_emit(rdev, ib->fence);
+	radeon_ring_unlock_commit(rdev);
+	list_add_tail(&ib->list, &rdev->ib_pool.scheduled_ibs);
+	mutex_unlock(&rdev->ib_pool.mutex);
+	return 0;
+}
+
+int radeon_ib_pool_init(struct radeon_device *rdev)
+{
+	void *ptr;
+	uint64_t gpu_addr;
+	int i;
+	int r = 0;
+
+	/* Allocate 1M object buffer */
+	INIT_LIST_HEAD(&rdev->ib_pool.scheduled_ibs);
+	r = radeon_object_create(rdev, NULL,  RADEON_IB_POOL_SIZE*64*1024,
+				 true, RADEON_GEM_DOMAIN_GTT,
+				 false, &rdev->ib_pool.robj);
+	if (r) {
+		DRM_ERROR("radeon: failed to ib pool (%d).\n", r);
+		return r;
+	}
+	r = radeon_object_pin(rdev->ib_pool.robj, RADEON_GEM_DOMAIN_GTT, &gpu_addr);
+	if (r) {
+		DRM_ERROR("radeon: failed to pin ib pool (%d).\n", r);
+		return r;
+	}
+	r = radeon_object_kmap(rdev->ib_pool.robj, &ptr);
+	if (r) {
+		DRM_ERROR("radeon: failed to map ib poll (%d).\n", r);
+		return r;
+	}
+	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
+		unsigned offset;
+
+		offset = i * 64 * 1024;
+		rdev->ib_pool.ibs[i].gpu_addr = gpu_addr + offset;
+		rdev->ib_pool.ibs[i].ptr = ptr + offset;
+		rdev->ib_pool.ibs[i].idx = i;
+		rdev->ib_pool.ibs[i].length_dw = 0;
+		INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].list);
+	}
+	bitmap_zero(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE);
+	rdev->ib_pool.ready = true;
+	DRM_INFO("radeon: ib pool ready.\n");
+	if (radeon_debugfs_ib_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for IB !\n");
+	}
+	return r;
+}
+
+void radeon_ib_pool_fini(struct radeon_device *rdev)
+{
+	if (!rdev->ib_pool.ready) {
+		return;
+	}
+	mutex_lock(&rdev->ib_pool.mutex);
+	bitmap_zero(rdev->ib_pool.alloc_bm, RADEON_IB_POOL_SIZE);
+	if (rdev->ib_pool.robj) {
+		radeon_object_kunmap(rdev->ib_pool.robj);
+		radeon_object_unref(&rdev->ib_pool.robj);
+		rdev->ib_pool.robj = NULL;
+	}
+	mutex_unlock(&rdev->ib_pool.mutex);
+}
+
+int radeon_ib_test(struct radeon_device *rdev)
+{
+	struct radeon_ib *ib;
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ib_get(rdev, &ib);
+	if (r) {
+		return r;
+	}
+	ib->ptr[0] = PACKET0(scratch, 0);
+	ib->ptr[1] = 0xDEADBEEF;
+	ib->ptr[2] = PACKET2(0);
+	ib->ptr[3] = PACKET2(0);
+	ib->ptr[4] = PACKET2(0);
+	ib->ptr[5] = PACKET2(0);
+	ib->ptr[6] = PACKET2(0);
+	ib->ptr[7] = PACKET2(0);
+	ib->length_dw = 8;
+	r = radeon_ib_schedule(rdev, ib);
+	if (r) {
+		radeon_scratch_free(rdev, scratch);
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
+	r = radeon_fence_wait(ib->fence, false);
+	if (r) {
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF) {
+			break;
+		}
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test succeeded in %u usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
+
+
+/*
+ * Ring.
+ */
+void radeon_ring_free_size(struct radeon_device *rdev)
+{
+	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
+	/* This works because ring_size is a power of 2 */
+	rdev->cp.ring_free_dw = (rdev->cp.rptr + (rdev->cp.ring_size / 4));
+	rdev->cp.ring_free_dw -= rdev->cp.wptr;
+	rdev->cp.ring_free_dw &= rdev->cp.ptr_mask;
+	if (!rdev->cp.ring_free_dw) {
+		rdev->cp.ring_free_dw = rdev->cp.ring_size / 4;
+	}
+}
+
+int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw)
+{
+	int r;
+
+	/* Align requested size with padding so unlock_commit can
+	 * pad safely */
+	ndw = (ndw + rdev->cp.align_mask) & ~rdev->cp.align_mask;
+	mutex_lock(&rdev->cp.mutex);
+	while (ndw > (rdev->cp.ring_free_dw - 1)) {
+		radeon_ring_free_size(rdev);
+		if (ndw < rdev->cp.ring_free_dw) {
+			break;
+		}
+		r = radeon_fence_wait_next(rdev);
+		if (r) {
+			mutex_unlock(&rdev->cp.mutex);
+			return r;
+		}
+	}
+	rdev->cp.count_dw = ndw;
+	rdev->cp.wptr_old = rdev->cp.wptr;
+	return 0;
+}
+
+void radeon_ring_unlock_commit(struct radeon_device *rdev)
+{
+	unsigned count_dw_pad;
+	unsigned i;
+
+	/* We pad to match fetch size */
+	count_dw_pad = (rdev->cp.align_mask + 1) -
+		       (rdev->cp.wptr & rdev->cp.align_mask);
+	for (i = 0; i < count_dw_pad; i++) {
+		radeon_ring_write(rdev, PACKET2(0));
+	}
+	DRM_MEMORYBARRIER();
+	WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
+	(void)RREG32(RADEON_CP_RB_WPTR);
+	mutex_unlock(&rdev->cp.mutex);
+}
+
+void radeon_ring_unlock_undo(struct radeon_device *rdev)
+{
+	rdev->cp.wptr = rdev->cp.wptr_old;
+	mutex_unlock(&rdev->cp.mutex);
+}
+
+int radeon_ring_test(struct radeon_device *rdev)
+{
+	uint32_t scratch;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	r = radeon_scratch_get(rdev, &scratch);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+		return r;
+	}
+	WREG32(scratch, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, 2);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+		radeon_scratch_free(rdev, scratch);
+		return r;
+	}
+	radeon_ring_write(rdev, PACKET0(scratch, 0));
+	radeon_ring_write(rdev, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF) {
+			break;
+		}
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test succeeded in %d usecs\n", i);
+	} else {
+		DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
+			  scratch, tmp);
+		r = -EINVAL;
+	}
+	radeon_scratch_free(rdev, scratch);
+	return r;
+}
+
+int radeon_ring_init(struct radeon_device *rdev, unsigned ring_size)
+{
+	int r;
+
+	rdev->cp.ring_size = ring_size;
+	/* Allocate ring buffer */
+	if (rdev->cp.ring_obj == NULL) {
+		r = radeon_object_create(rdev, NULL, rdev->cp.ring_size,
+					 true,
+					 RADEON_GEM_DOMAIN_GTT,
+					 false,
+					 &rdev->cp.ring_obj);
+		if (r) {
+			DRM_ERROR("radeon: failed to create ring buffer (%d).\n", r);
+			mutex_unlock(&rdev->cp.mutex);
+			return r;
+		}
+		r = radeon_object_pin(rdev->cp.ring_obj,
+				      RADEON_GEM_DOMAIN_GTT,
+				      &rdev->cp.gpu_addr);
+		if (r) {
+			DRM_ERROR("radeon: failed to pin ring buffer (%d).\n", r);
+			mutex_unlock(&rdev->cp.mutex);
+			return r;
+		}
+		r = radeon_object_kmap(rdev->cp.ring_obj,
+				       (void **)&rdev->cp.ring);
+		if (r) {
+			DRM_ERROR("radeon: failed to map ring buffer (%d).\n", r);
+			mutex_unlock(&rdev->cp.mutex);
+			return r;
+		}
+	}
+	rdev->cp.ptr_mask = (rdev->cp.ring_size / 4) - 1;
+	rdev->cp.ring_free_dw = rdev->cp.ring_size / 4;
+	return 0;
+}
+
+void radeon_ring_fini(struct radeon_device *rdev)
+{
+	mutex_lock(&rdev->cp.mutex);
+	if (rdev->cp.ring_obj) {
+		radeon_object_kunmap(rdev->cp.ring_obj);
+		radeon_object_unpin(rdev->cp.ring_obj);
+		radeon_object_unref(&rdev->cp.ring_obj);
+		rdev->cp.ring = NULL;
+		rdev->cp.ring_obj = NULL;
+	}
+	mutex_unlock(&rdev->cp.mutex);
+}
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct radeon_ib *ib = node->info_ent->data;
+	unsigned i;
+
+	if (ib == NULL) {
+		return 0;
+	}
+	seq_printf(m, "IB %04lu\n", ib->idx);
+	seq_printf(m, "IB fence %p\n", ib->fence);
+	seq_printf(m, "IB size %05u dwords\n", ib->length_dw);
+	for (i = 0; i < ib->length_dw; i++) {
+		seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]);
+	}
+	return 0;
+}
+
+static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE];
+static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
+#endif
+
+int radeon_debugfs_ib_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	unsigned i;
+
+	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
+		sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i);
+		radeon_debugfs_ib_list[i].name = radeon_debugfs_ib_names[i];
+		radeon_debugfs_ib_list[i].show = &radeon_debugfs_ib_info;
+		radeon_debugfs_ib_list[i].driver_features = 0;
+		radeon_debugfs_ib_list[i].data = &rdev->ib_pool.ibs[i];
+	}
+	return radeon_debugfs_add_files(rdev, radeon_debugfs_ib_list,
+					RADEON_IB_POOL_SIZE);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
new file mode 100644
index 00000000000..4c087c1510d
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -0,0 +1,653 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ *    Dave Airlie
+ */
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <ttm/ttm_module.h>
+#include <drm/drmP.h>
+#include <drm/radeon_drm.h>
+#include "radeon_reg.h"
+#include "radeon.h"
+
+#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
+
+static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev)
+{
+	struct radeon_mman *mman;
+	struct radeon_device *rdev;
+
+	mman = container_of(bdev, struct radeon_mman, bdev);
+	rdev = container_of(mman, struct radeon_device, mman);
+	return rdev;
+}
+
+
+/*
+ * Global memory.
+ */
+static int radeon_ttm_mem_global_init(struct ttm_global_reference *ref)
+{
+	return ttm_mem_global_init(ref->object);
+}
+
+static void radeon_ttm_mem_global_release(struct ttm_global_reference *ref)
+{
+	ttm_mem_global_release(ref->object);
+}
+
+static int radeon_ttm_global_init(struct radeon_device *rdev)
+{
+	struct ttm_global_reference *global_ref;
+	int r;
+
+	rdev->mman.mem_global_referenced = false;
+	global_ref = &rdev->mman.mem_global_ref;
+	global_ref->global_type = TTM_GLOBAL_TTM_MEM;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &radeon_ttm_mem_global_init;
+	global_ref->release = &radeon_ttm_mem_global_release;
+	r = ttm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed referencing a global TTM memory object.\n");
+		return r;
+	}
+	rdev->mman.mem_global_referenced = true;
+	return 0;
+}
+
+static void radeon_ttm_global_fini(struct radeon_device *rdev)
+{
+	if (rdev->mman.mem_global_referenced) {
+		ttm_global_item_unref(&rdev->mman.mem_global_ref);
+		rdev->mman.mem_global_referenced = false;
+	}
+}
+
+struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev);
+
+static struct ttm_backend*
+radeon_create_ttm_backend_entry(struct ttm_bo_device *bdev)
+{
+	struct radeon_device *rdev;
+
+	rdev = radeon_get_rdev(bdev);
+#if __OS_HAS_AGP
+	if (rdev->flags & RADEON_IS_AGP) {
+		return ttm_agp_backend_init(bdev, rdev->ddev->agp->bridge);
+	} else
+#endif
+	{
+		return radeon_ttm_backend_create(rdev);
+	}
+}
+
+static int radeon_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
+{
+	return 0;
+}
+
+static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+				struct ttm_mem_type_manager *man)
+{
+	struct radeon_device *rdev;
+
+	rdev = radeon_get_rdev(bdev);
+
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		/* System memory */
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_TT:
+		man->gpu_offset = 0;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+#if __OS_HAS_AGP
+		if (rdev->flags & RADEON_IS_AGP) {
+			if (!(drm_core_has_AGP(rdev->ddev) && rdev->ddev->agp)) {
+				DRM_ERROR("AGP is not enabled for memory type %u\n",
+					  (unsigned)type);
+				return -EINVAL;
+			}
+			man->io_offset = rdev->mc.agp_base;
+			man->io_size = rdev->mc.gtt_size;
+			man->io_addr = NULL;
+			man->flags = TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
+				     TTM_MEMTYPE_FLAG_MAPPABLE;
+			man->available_caching = TTM_PL_FLAG_UNCACHED |
+						 TTM_PL_FLAG_WC;
+			man->default_caching = TTM_PL_FLAG_WC;
+		} else
+#endif
+		{
+			man->io_offset = 0;
+			man->io_size = 0;
+			man->io_addr = NULL;
+			man->flags = TTM_MEMTYPE_FLAG_MAPPABLE |
+				     TTM_MEMTYPE_FLAG_CMA;
+		}
+		break;
+	case TTM_PL_VRAM:
+		/* "On-card" video ram */
+		man->gpu_offset = 0;
+		man->flags = TTM_MEMTYPE_FLAG_FIXED |
+			     TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
+			     TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
+		man->default_caching = TTM_PL_FLAG_WC;
+		man->io_addr = NULL;
+		man->io_offset = rdev->mc.aper_base;
+		man->io_size = rdev->mc.aper_size;
+		break;
+	default:
+		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static uint32_t radeon_evict_flags(struct ttm_buffer_object *bo)
+{
+	uint32_t cur_placement = bo->mem.placement & ~TTM_PL_MASK_MEMTYPE;
+
+	switch (bo->mem.mem_type) {
+	default:
+		return (cur_placement & ~TTM_PL_MASK_CACHING) |
+			TTM_PL_FLAG_SYSTEM |
+			TTM_PL_FLAG_CACHED;
+	}
+}
+
+static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp)
+{
+	return 0;
+}
+
+static void radeon_move_null(struct ttm_buffer_object *bo,
+			     struct ttm_mem_reg *new_mem)
+{
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	BUG_ON(old_mem->mm_node != NULL);
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+}
+
+static int radeon_move_blit(struct ttm_buffer_object *bo,
+			    bool evict, int no_wait,
+			    struct ttm_mem_reg *new_mem,
+			    struct ttm_mem_reg *old_mem)
+{
+	struct radeon_device *rdev;
+	uint64_t old_start, new_start;
+	struct radeon_fence *fence;
+	int r;
+
+	rdev = radeon_get_rdev(bo->bdev);
+	r = radeon_fence_create(rdev, &fence);
+	if (unlikely(r)) {
+		return r;
+	}
+	old_start = old_mem->mm_node->start << PAGE_SHIFT;
+	new_start = new_mem->mm_node->start << PAGE_SHIFT;
+
+	switch (old_mem->mem_type) {
+	case TTM_PL_VRAM:
+		old_start += rdev->mc.vram_location;
+		break;
+	case TTM_PL_TT:
+		old_start += rdev->mc.gtt_location;
+		break;
+	default:
+		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
+		return -EINVAL;
+	}
+	switch (new_mem->mem_type) {
+	case TTM_PL_VRAM:
+		new_start += rdev->mc.vram_location;
+		break;
+	case TTM_PL_TT:
+		new_start += rdev->mc.gtt_location;
+		break;
+	default:
+		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
+		return -EINVAL;
+	}
+	if (!rdev->cp.ready) {
+		DRM_ERROR("Trying to move memory with CP turned off.\n");
+		return -EINVAL;
+	}
+	r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages, fence);
+	/* FIXME: handle copy error */
+	r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
+				      evict, no_wait, new_mem);
+	radeon_fence_unref(&fence);
+	return r;
+}
+
+static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
+				bool evict, bool interruptible, bool no_wait,
+				struct ttm_mem_reg *new_mem)
+{
+	struct radeon_device *rdev;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg tmp_mem;
+	uint32_t proposed_placement;
+	int r;
+
+	rdev = radeon_get_rdev(bo->bdev);
+	tmp_mem = *new_mem;
+	tmp_mem.mm_node = NULL;
+	proposed_placement = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
+	r = ttm_bo_mem_space(bo, proposed_placement, &tmp_mem,
+			     interruptible, no_wait);
+	if (unlikely(r)) {
+		return r;
+	}
+	r = ttm_tt_bind(bo->ttm, &tmp_mem);
+	if (unlikely(r)) {
+		goto out_cleanup;
+	}
+	r = radeon_move_blit(bo, true, no_wait, &tmp_mem, old_mem);
+	if (unlikely(r)) {
+		goto out_cleanup;
+	}
+	r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
+out_cleanup:
+	if (tmp_mem.mm_node) {
+		spin_lock(&rdev->mman.bdev.lru_lock);
+		drm_mm_put_block(tmp_mem.mm_node);
+		spin_unlock(&rdev->mman.bdev.lru_lock);
+		return r;
+	}
+	return r;
+}
+
+static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
+				bool evict, bool interruptible, bool no_wait,
+				struct ttm_mem_reg *new_mem)
+{
+	struct radeon_device *rdev;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg tmp_mem;
+	uint32_t proposed_flags;
+	int r;
+
+	rdev = radeon_get_rdev(bo->bdev);
+	tmp_mem = *new_mem;
+	tmp_mem.mm_node = NULL;
+	proposed_flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
+	r = ttm_bo_mem_space(bo, proposed_flags, &tmp_mem,
+			     interruptible, no_wait);
+	if (unlikely(r)) {
+		return r;
+	}
+	r = ttm_bo_move_ttm(bo, true, no_wait, &tmp_mem);
+	if (unlikely(r)) {
+		goto out_cleanup;
+	}
+	r = radeon_move_blit(bo, true, no_wait, new_mem, old_mem);
+	if (unlikely(r)) {
+		goto out_cleanup;
+	}
+out_cleanup:
+	if (tmp_mem.mm_node) {
+		spin_lock(&rdev->mman.bdev.lru_lock);
+		drm_mm_put_block(tmp_mem.mm_node);
+		spin_unlock(&rdev->mman.bdev.lru_lock);
+		return r;
+	}
+	return r;
+}
+
+static int radeon_bo_move(struct ttm_buffer_object *bo,
+			  bool evict, bool interruptible, bool no_wait,
+			  struct ttm_mem_reg *new_mem)
+{
+	struct radeon_device *rdev;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	int r;
+
+	rdev = radeon_get_rdev(bo->bdev);
+	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
+		radeon_move_null(bo, new_mem);
+		return 0;
+	}
+	if ((old_mem->mem_type == TTM_PL_TT &&
+	     new_mem->mem_type == TTM_PL_SYSTEM) ||
+	    (old_mem->mem_type == TTM_PL_SYSTEM &&
+	     new_mem->mem_type == TTM_PL_TT)) {
+		/* bind is enought */
+		radeon_move_null(bo, new_mem);
+		return 0;
+	}
+	if (!rdev->cp.ready) {
+		/* use memcpy */
+		DRM_ERROR("CP is not ready use memcpy.\n");
+		return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+	}
+
+	if (old_mem->mem_type == TTM_PL_VRAM &&
+	    new_mem->mem_type == TTM_PL_SYSTEM) {
+		return radeon_move_vram_ram(bo, evict, interruptible,
+					    no_wait, new_mem);
+	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
+		   new_mem->mem_type == TTM_PL_VRAM) {
+		return radeon_move_ram_vram(bo, evict, interruptible,
+					    no_wait, new_mem);
+	} else {
+		r = radeon_move_blit(bo, evict, no_wait, new_mem, old_mem);
+		if (unlikely(r)) {
+			return r;
+		}
+	}
+	return r;
+}
+
+const uint32_t radeon_mem_prios[] = {
+	TTM_PL_VRAM,
+	TTM_PL_TT,
+	TTM_PL_SYSTEM,
+};
+
+const uint32_t radeon_busy_prios[] = {
+	TTM_PL_TT,
+	TTM_PL_VRAM,
+	TTM_PL_SYSTEM,
+};
+
+static int radeon_sync_obj_wait(void *sync_obj, void *sync_arg,
+				bool lazy, bool interruptible)
+{
+	return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
+}
+
+static int radeon_sync_obj_flush(void *sync_obj, void *sync_arg)
+{
+	return 0;
+}
+
+static void radeon_sync_obj_unref(void **sync_obj)
+{
+	radeon_fence_unref((struct radeon_fence **)sync_obj);
+}
+
+static void *radeon_sync_obj_ref(void *sync_obj)
+{
+	return radeon_fence_ref((struct radeon_fence *)sync_obj);
+}
+
+static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg)
+{
+	return radeon_fence_signaled((struct radeon_fence *)sync_obj);
+}
+
+static struct ttm_bo_driver radeon_bo_driver = {
+	.mem_type_prio = radeon_mem_prios,
+	.mem_busy_prio = radeon_busy_prios,
+	.num_mem_type_prio = ARRAY_SIZE(radeon_mem_prios),
+	.num_mem_busy_prio = ARRAY_SIZE(radeon_busy_prios),
+	.create_ttm_backend_entry = &radeon_create_ttm_backend_entry,
+	.invalidate_caches = &radeon_invalidate_caches,
+	.init_mem_type = &radeon_init_mem_type,
+	.evict_flags = &radeon_evict_flags,
+	.move = &radeon_bo_move,
+	.verify_access = &radeon_verify_access,
+	.sync_obj_signaled = &radeon_sync_obj_signaled,
+	.sync_obj_wait = &radeon_sync_obj_wait,
+	.sync_obj_flush = &radeon_sync_obj_flush,
+	.sync_obj_unref = &radeon_sync_obj_unref,
+	.sync_obj_ref = &radeon_sync_obj_ref,
+};
+
+int radeon_ttm_init(struct radeon_device *rdev)
+{
+	int r;
+
+	r = radeon_ttm_global_init(rdev);
+	if (r) {
+		return r;
+	}
+	/* No others user of address space so set it to 0 */
+	r = ttm_bo_device_init(&rdev->mman.bdev,
+			       rdev->mman.mem_global_ref.object,
+			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
+	if (r) {
+		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+		return r;
+	}
+	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_VRAM, 0,
+			   ((rdev->mc.aper_size) >> PAGE_SHIFT));
+	if (r) {
+		DRM_ERROR("Failed initializing VRAM heap.\n");
+		return r;
+	}
+	r = radeon_object_create(rdev, NULL, 256 * 1024, true,
+				 RADEON_GEM_DOMAIN_VRAM, false,
+				 &rdev->stollen_vga_memory);
+	if (r) {
+		return r;
+	}
+	r = radeon_object_pin(rdev->stollen_vga_memory, RADEON_GEM_DOMAIN_VRAM, NULL);
+	if (r) {
+		radeon_object_unref(&rdev->stollen_vga_memory);
+		return r;
+	}
+	DRM_INFO("radeon: %uM of VRAM memory ready\n",
+		 rdev->mc.vram_size / (1024 * 1024));
+	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT, 0,
+			   ((rdev->mc.gtt_size) >> PAGE_SHIFT));
+	if (r) {
+		DRM_ERROR("Failed initializing GTT heap.\n");
+		return r;
+	}
+	DRM_INFO("radeon: %uM of GTT memory ready.\n",
+		 rdev->mc.gtt_size / (1024 * 1024));
+	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
+		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
+	}
+	return 0;
+}
+
+void radeon_ttm_fini(struct radeon_device *rdev)
+{
+	if (rdev->stollen_vga_memory) {
+		radeon_object_unpin(rdev->stollen_vga_memory);
+		radeon_object_unref(&rdev->stollen_vga_memory);
+	}
+	ttm_bo_clean_mm(&rdev->mman.bdev, TTM_PL_VRAM);
+	ttm_bo_clean_mm(&rdev->mman.bdev, TTM_PL_TT);
+	ttm_bo_device_release(&rdev->mman.bdev);
+	radeon_gart_fini(rdev);
+	radeon_ttm_global_fini(rdev);
+	DRM_INFO("radeon: ttm finalized\n");
+}
+
+static struct vm_operations_struct radeon_ttm_vm_ops;
+static struct vm_operations_struct *ttm_vm_ops = NULL;
+
+static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *bo;
+	int r;
+
+	bo = (struct ttm_buffer_object *)vma->vm_private_data;
+	if (bo == NULL) {
+		return VM_FAULT_NOPAGE;
+	}
+	r = ttm_vm_ops->fault(vma, vmf);
+	return r;
+}
+
+int radeon_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv;
+	struct radeon_device *rdev;
+	int r;
+
+	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) {
+		return drm_mmap(filp, vma);
+	}
+
+	file_priv = (struct drm_file *)filp->private_data;
+	rdev = file_priv->minor->dev->dev_private;
+	if (rdev == NULL) {
+		return -EINVAL;
+	}
+	r = ttm_bo_mmap(filp, vma, &rdev->mman.bdev);
+	if (unlikely(r != 0)) {
+		return r;
+	}
+	if (unlikely(ttm_vm_ops == NULL)) {
+		ttm_vm_ops = vma->vm_ops;
+		radeon_ttm_vm_ops = *ttm_vm_ops;
+		radeon_ttm_vm_ops.fault = &radeon_ttm_fault;
+	}
+	vma->vm_ops = &radeon_ttm_vm_ops;
+	return 0;
+}
+
+
+/*
+ * TTM backend functions.
+ */
+struct radeon_ttm_backend {
+	struct ttm_backend		backend;
+	struct radeon_device		*rdev;
+	unsigned long			num_pages;
+	struct page			**pages;
+	struct page			*dummy_read_page;
+	bool				populated;
+	bool				bound;
+	unsigned			offset;
+};
+
+static int radeon_ttm_backend_populate(struct ttm_backend *backend,
+				       unsigned long num_pages,
+				       struct page **pages,
+				       struct page *dummy_read_page)
+{
+	struct radeon_ttm_backend *gtt;
+
+	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+	gtt->pages = pages;
+	gtt->num_pages = num_pages;
+	gtt->dummy_read_page = dummy_read_page;
+	gtt->populated = true;
+	return 0;
+}
+
+static void radeon_ttm_backend_clear(struct ttm_backend *backend)
+{
+	struct radeon_ttm_backend *gtt;
+
+	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+	gtt->pages = NULL;
+	gtt->num_pages = 0;
+	gtt->dummy_read_page = NULL;
+	gtt->populated = false;
+	gtt->bound = false;
+}
+
+
+static int radeon_ttm_backend_bind(struct ttm_backend *backend,
+				   struct ttm_mem_reg *bo_mem)
+{
+	struct radeon_ttm_backend *gtt;
+	int r;
+
+	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+	gtt->offset = bo_mem->mm_node->start << PAGE_SHIFT;
+	if (!gtt->num_pages) {
+		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", gtt->num_pages, bo_mem, backend);
+	}
+	r = radeon_gart_bind(gtt->rdev, gtt->offset,
+			     gtt->num_pages, gtt->pages);
+	if (r) {
+		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
+			  gtt->num_pages, gtt->offset);
+		return r;
+	}
+	gtt->bound = true;
+	return 0;
+}
+
+static int radeon_ttm_backend_unbind(struct ttm_backend *backend)
+{
+	struct radeon_ttm_backend *gtt;
+
+	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+	radeon_gart_unbind(gtt->rdev, gtt->offset, gtt->num_pages);
+	gtt->bound = false;
+	return 0;
+}
+
+static void radeon_ttm_backend_destroy(struct ttm_backend *backend)
+{
+	struct radeon_ttm_backend *gtt;
+
+	gtt = container_of(backend, struct radeon_ttm_backend, backend);
+	if (gtt->bound) {
+		radeon_ttm_backend_unbind(backend);
+	}
+	kfree(gtt);
+}
+
+static struct ttm_backend_func radeon_backend_func = {
+	.populate = &radeon_ttm_backend_populate,
+	.clear = &radeon_ttm_backend_clear,
+	.bind = &radeon_ttm_backend_bind,
+	.unbind = &radeon_ttm_backend_unbind,
+	.destroy = &radeon_ttm_backend_destroy,
+};
+
+struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev)
+{
+	struct radeon_ttm_backend *gtt;
+
+	gtt = kzalloc(sizeof(struct radeon_ttm_backend), GFP_KERNEL);
+	if (gtt == NULL) {
+		return NULL;
+	}
+	gtt->backend.bdev = &rdev->mman.bdev;
+	gtt->backend.flags = 0;
+	gtt->backend.func = &radeon_backend_func;
+	gtt->rdev = rdev;
+	gtt->pages = NULL;
+	gtt->num_pages = 0;
+	gtt->dummy_read_page = NULL;
+	gtt->populated = false;
+	gtt->bound = false;
+	return &gtt->backend;
+}
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
new file mode 100644
index 00000000000..cc074b5a8f7
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/seq_file.h>
+#include <drm/drmP.h>
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* rs400,rs480 depends on : */
+void r100_hdp_reset(struct radeon_device *rdev);
+void r100_mc_disable_clients(struct radeon_device *rdev);
+int r300_mc_wait_for_idle(struct radeon_device *rdev);
+void r420_pipes_init(struct radeon_device *rdev);
+
+/* This files gather functions specifics to :
+ * rs400,rs480
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void rs400_gpu_init(struct radeon_device *rdev);
+int rs400_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
+
+
+/*
+ * GART functions.
+ */
+void rs400_gart_adjust_size(struct radeon_device *rdev)
+{
+	/* Check gart size */
+	switch (rdev->mc.gtt_size/(1024*1024)) {
+	case 32:
+	case 64:
+	case 128:
+	case 256:
+	case 512:
+	case 1024:
+	case 2048:
+		break;
+	default:
+		DRM_ERROR("Unable to use IGP GART size %uM\n",
+			  rdev->mc.gtt_size >> 20);
+		DRM_ERROR("Valid GART size for IGP are 32M,64M,128M,256M,512M,1G,2G\n");
+		DRM_ERROR("Forcing to 32M GART size\n");
+		rdev->mc.gtt_size = 32 * 1024 * 1024;
+		return;
+	}
+	if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) {
+		/* FIXME: RS400 & RS480 seems to have issue with GART size
+		 * if 4G of system memory (needs more testing) */
+		rdev->mc.gtt_size = 32 * 1024 * 1024;
+		DRM_ERROR("Forcing to 32M GART size (because of ASIC bug ?)\n");
+	}
+}
+
+void rs400_gart_tlb_flush(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	unsigned int timeout = rdev->usec_timeout;
+
+	WREG32_MC(RS480_GART_CACHE_CNTRL, RS480_GART_CACHE_INVALIDATE);
+	do {
+		tmp = RREG32_MC(RS480_GART_CACHE_CNTRL);
+		if ((tmp & RS480_GART_CACHE_INVALIDATE) == 0)
+			break;
+		DRM_UDELAY(1);
+		timeout--;
+	} while (timeout > 0);
+	WREG32_MC(RS480_GART_CACHE_CNTRL, 0);
+}
+
+int rs400_gart_enable(struct radeon_device *rdev)
+{
+	uint32_t size_reg;
+	uint32_t tmp;
+	int r;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	if (rs400_debugfs_pcie_gart_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RS400 GART !\n");
+	}
+
+	tmp = RREG32_MC(RS690_AIC_CTRL_SCRATCH);
+	tmp |= RS690_DIS_OUT_OF_PCI_GART_ACCESS;
+	WREG32_MC(RS690_AIC_CTRL_SCRATCH, tmp);
+	/* Check gart size */
+	switch(rdev->mc.gtt_size / (1024 * 1024)) {
+	case 32:
+		size_reg = RS480_VA_SIZE_32MB;
+		break;
+	case 64:
+		size_reg = RS480_VA_SIZE_64MB;
+		break;
+	case 128:
+		size_reg = RS480_VA_SIZE_128MB;
+		break;
+	case 256:
+		size_reg = RS480_VA_SIZE_256MB;
+		break;
+	case 512:
+		size_reg = RS480_VA_SIZE_512MB;
+		break;
+	case 1024:
+		size_reg = RS480_VA_SIZE_1GB;
+		break;
+	case 2048:
+		size_reg = RS480_VA_SIZE_2GB;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (rdev->gart.table.ram.ptr == NULL) {
+		rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
+		r = radeon_gart_table_ram_alloc(rdev);
+		if (r) {
+			return r;
+		}
+	}
+	/* It should be fine to program it to max value */
+	if (rdev->family == CHIP_RS690 || (rdev->family == CHIP_RS740)) {
+		WREG32_MC(RS690_MCCFG_AGP_BASE, 0xFFFFFFFF);
+		WREG32_MC(RS690_MCCFG_AGP_BASE_2, 0);
+	} else {
+		WREG32(RADEON_AGP_BASE, 0xFFFFFFFF);
+		WREG32(RS480_AGP_BASE_2, 0);
+	}
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	tmp = REG_SET(RS690_MC_AGP_TOP, tmp >> 16);
+	tmp |= REG_SET(RS690_MC_AGP_START, rdev->mc.gtt_location >> 16);
+	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740)) {
+		WREG32_MC(RS690_MCCFG_AGP_LOCATION, tmp);
+		tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS;
+		WREG32(RADEON_BUS_CNTL, tmp);
+	} else {
+		WREG32(RADEON_MC_AGP_LOCATION, tmp);
+		tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
+		WREG32(RADEON_BUS_CNTL, tmp);
+	}
+	/* Table should be in 32bits address space so ignore bits above. */
+	tmp = rdev->gart.table_addr & 0xfffff000;
+	WREG32_MC(RS480_GART_BASE, tmp);
+	/* TODO: more tweaking here */
+	WREG32_MC(RS480_GART_FEATURE_ID,
+		  (RS480_TLB_ENABLE |
+		   RS480_GTW_LAC_EN | RS480_1LEVEL_GART));
+	/* Disable snooping */
+	WREG32_MC(RS480_AGP_MODE_CNTL,
+		  (1 << RS480_REQ_TYPE_SNOOP_SHIFT) | RS480_REQ_TYPE_SNOOP_DIS);
+	/* Disable AGP mode */
+	/* FIXME: according to doc we should set HIDE_MMCFG_BAR=0,
+	 * AGPMODE30=0 & AGP30ENHANCED=0 in NB_CNTL */
+	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740)) {
+		WREG32_MC(RS480_MC_MISC_CNTL,
+			  (RS480_GART_INDEX_REG_EN | RS690_BLOCK_GFX_D3_EN));
+	} else {
+		WREG32_MC(RS480_MC_MISC_CNTL, RS480_GART_INDEX_REG_EN);
+	}
+	/* Enable gart */
+	WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg));
+	rs400_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void rs400_gart_disable(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	tmp = RREG32_MC(RS690_AIC_CTRL_SCRATCH);
+	tmp |= RS690_DIS_OUT_OF_PCI_GART_ACCESS;
+	WREG32_MC(RS690_AIC_CTRL_SCRATCH, tmp);
+	WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, 0);
+}
+
+int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+{
+	if (i < 0 || i > rdev->gart.num_gpu_pages) {
+		return -EINVAL;
+	}
+	rdev->gart.table.ram.ptr[i] = cpu_to_le32(((uint32_t)addr) | 0xC);
+	return 0;
+}
+
+
+/*
+ * MC functions.
+ */
+int rs400_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+
+	rs400_gpu_init(rdev);
+	rs400_gart_disable(rdev);
+	rdev->mc.gtt_location = rdev->mc.vram_size;
+	rdev->mc.gtt_location += (rdev->mc.gtt_size - 1);
+	rdev->mc.gtt_location &= ~(rdev->mc.gtt_size - 1);
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	r100_mc_disable_clients(rdev);
+	if (r300_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32(RADEON_MC_FB_LOCATION, tmp);
+	tmp = RREG32(RADEON_HOST_PATH_CNTL) | RADEON_HP_LIN_RD_CACHE_DIS;
+	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
+	(void)RREG32(RADEON_HOST_PATH_CNTL);
+	WREG32(RADEON_HOST_PATH_CNTL, tmp);
+	(void)RREG32(RADEON_HOST_PATH_CNTL);
+	return 0;
+}
+
+void rs400_mc_fini(struct radeon_device *rdev)
+{
+	rs400_gart_disable(rdev);
+	radeon_gart_table_ram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Global GPU functions
+ */
+void rs400_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+void rs400_gpu_init(struct radeon_device *rdev)
+{
+	/* FIXME: HDP same place on rs400 ? */
+	r100_hdp_reset(rdev);
+	/* FIXME: is this correct ? */
+	r420_pipes_init(rdev);
+	if (r300_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+
+/*
+ * VRAM info.
+ */
+void rs400_vram_info(struct radeon_device *rdev)
+{
+	uint32_t tom;
+
+	rs400_gart_adjust_size(rdev);
+	/* DDR for all card after R300 & IGP */
+	rdev->mc.vram_is_ddr = true;
+	rdev->mc.vram_width = 128;
+
+	/* read NB_TOM to get the amount of ram stolen for the GPU */
+	tom = RREG32(RADEON_NB_TOM);
+	rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
+
+
+/*
+ * Indirect registers accessor
+ */
+uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(RS480_NB_MC_INDEX, reg & 0xff);
+	r = RREG32(RS480_NB_MC_DATA);
+	WREG32(RS480_NB_MC_INDEX, 0xff);
+	return r;
+}
+
+void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(RS480_NB_MC_INDEX, ((reg) & 0xff) | RS480_NB_MC_IND_WR_EN);
+	WREG32(RS480_NB_MC_DATA, (v));
+	WREG32(RS480_NB_MC_INDEX, 0xff);
+}
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int rs400_debugfs_gart_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	tmp = RREG32(RADEON_HOST_PATH_CNTL);
+	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
+	tmp = RREG32(RADEON_BUS_CNTL);
+	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
+	tmp = RREG32_MC(RS690_AIC_CTRL_SCRATCH);
+	seq_printf(m, "AIC_CTRL_SCRATCH 0x%08x\n", tmp);
+	if (rdev->family == CHIP_RS690 || (rdev->family == CHIP_RS740)) {
+		tmp = RREG32_MC(RS690_MCCFG_AGP_BASE);
+		seq_printf(m, "MCCFG_AGP_BASE 0x%08x\n", tmp);
+		tmp = RREG32_MC(RS690_MCCFG_AGP_BASE_2);
+		seq_printf(m, "MCCFG_AGP_BASE_2 0x%08x\n", tmp);
+		tmp = RREG32_MC(RS690_MCCFG_AGP_LOCATION);
+		seq_printf(m, "MCCFG_AGP_LOCATION 0x%08x\n", tmp);
+		tmp = RREG32_MC(0x100);
+		seq_printf(m, "MCCFG_FB_LOCATION 0x%08x\n", tmp);
+		tmp = RREG32(0x134);
+		seq_printf(m, "HDP_FB_LOCATION 0x%08x\n", tmp);
+	} else {
+		tmp = RREG32(RADEON_AGP_BASE);
+		seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
+		tmp = RREG32(RS480_AGP_BASE_2);
+		seq_printf(m, "AGP_BASE_2 0x%08x\n", tmp);
+		tmp = RREG32(RADEON_MC_AGP_LOCATION);
+		seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
+	}
+	tmp = RREG32_MC(RS480_GART_BASE);
+	seq_printf(m, "GART_BASE 0x%08x\n", tmp);
+	tmp = RREG32_MC(RS480_GART_FEATURE_ID);
+	seq_printf(m, "GART_FEATURE_ID 0x%08x\n", tmp);
+	tmp = RREG32_MC(RS480_AGP_MODE_CNTL);
+	seq_printf(m, "AGP_MODE_CONTROL 0x%08x\n", tmp);
+	tmp = RREG32_MC(RS480_MC_MISC_CNTL);
+	seq_printf(m, "MC_MISC_CNTL 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x5F);
+	seq_printf(m, "MC_MISC_UMA_CNTL 0x%08x\n", tmp);
+	tmp = RREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE);
+	seq_printf(m, "AGP_ADDRESS_SPACE_SIZE 0x%08x\n", tmp);
+	tmp = RREG32_MC(RS480_GART_CACHE_CNTRL);
+	seq_printf(m, "GART_CACHE_CNTRL 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x3B);
+	seq_printf(m, "MC_GART_ERROR_ADDRESS 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x3C);
+	seq_printf(m, "MC_GART_ERROR_ADDRESS_HI 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x30);
+	seq_printf(m, "GART_ERROR_0 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x31);
+	seq_printf(m, "GART_ERROR_1 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x32);
+	seq_printf(m, "GART_ERROR_2 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x33);
+	seq_printf(m, "GART_ERROR_3 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x34);
+	seq_printf(m, "GART_ERROR_4 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x35);
+	seq_printf(m, "GART_ERROR_5 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x36);
+	seq_printf(m, "GART_ERROR_6 0x%08x\n", tmp);
+	tmp = RREG32_MC(0x37);
+	seq_printf(m, "GART_ERROR_7 0x%08x\n", tmp);
+	return 0;
+}
+
+static struct drm_info_list rs400_gart_info_list[] = {
+	{"rs400_gart_info", rs400_debugfs_gart_info, 0, NULL},
+};
+#endif
+
+int rs400_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, rs400_gart_info_list, 1);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
new file mode 100644
index 00000000000..ab0c967553e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* rs600 depends on : */
+void r100_hdp_reset(struct radeon_device *rdev);
+int r100_gui_wait_for_idle(struct radeon_device *rdev);
+int r300_mc_wait_for_idle(struct radeon_device *rdev);
+void r420_pipes_init(struct radeon_device *rdev);
+
+/* This files gather functions specifics to :
+ * rs600
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void rs600_gpu_init(struct radeon_device *rdev);
+int rs600_mc_wait_for_idle(struct radeon_device *rdev);
+void rs600_disable_vga(struct radeon_device *rdev);
+
+
+/*
+ * GART.
+ */
+void rs600_gart_tlb_flush(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	tmp = RREG32_MC(RS600_MC_PT0_CNTL);
+	tmp &= ~(RS600_INVALIDATE_ALL_L1_TLBS | RS600_INVALIDATE_L2_CACHE);
+	WREG32_MC(RS600_MC_PT0_CNTL, tmp);
+
+	tmp = RREG32_MC(RS600_MC_PT0_CNTL);
+	tmp |= RS600_INVALIDATE_ALL_L1_TLBS | RS600_INVALIDATE_L2_CACHE;
+	WREG32_MC(RS600_MC_PT0_CNTL, tmp);
+
+	tmp = RREG32_MC(RS600_MC_PT0_CNTL);
+	tmp &= ~(RS600_INVALIDATE_ALL_L1_TLBS | RS600_INVALIDATE_L2_CACHE);
+	WREG32_MC(RS600_MC_PT0_CNTL, tmp);
+	tmp = RREG32_MC(RS600_MC_PT0_CNTL);
+}
+
+int rs600_gart_enable(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int i;
+	int r;
+
+	/* Initialize common gart structure */
+	r = radeon_gart_init(rdev);
+	if (r) {
+		return r;
+	}
+	rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+	r = radeon_gart_table_vram_alloc(rdev);
+	if (r) {
+		return r;
+	}
+	/* FIXME: setup default page */
+	WREG32_MC(RS600_MC_PT0_CNTL,
+		 (RS600_EFFECTIVE_L2_CACHE_SIZE(6) |
+		  RS600_EFFECTIVE_L2_QUEUE_SIZE(6)));
+	for (i = 0; i < 19; i++) {
+		WREG32_MC(RS600_MC_PT0_CLIENT0_CNTL + i,
+			 (RS600_ENABLE_TRANSLATION_MODE_OVERRIDE |
+			  RS600_SYSTEM_ACCESS_MODE_IN_SYS |
+			  RS600_SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE |
+			  RS600_EFFECTIVE_L1_CACHE_SIZE(3) |
+			  RS600_ENABLE_FRAGMENT_PROCESSING |
+			  RS600_EFFECTIVE_L1_QUEUE_SIZE(3)));
+	}
+
+	/* System context map to GART space */
+	WREG32_MC(RS600_MC_PT0_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.gtt_location);
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	WREG32_MC(RS600_MC_PT0_SYSTEM_APERTURE_HIGH_ADDR, tmp);
+
+	/* enable first context */
+	WREG32_MC(RS600_MC_PT0_CONTEXT0_FLAT_START_ADDR, rdev->mc.gtt_location);
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	WREG32_MC(RS600_MC_PT0_CONTEXT0_FLAT_END_ADDR, tmp);
+	WREG32_MC(RS600_MC_PT0_CONTEXT0_CNTL,
+		 (RS600_ENABLE_PAGE_TABLE | RS600_PAGE_TABLE_TYPE_FLAT));
+	/* disable all other contexts */
+	for (i = 1; i < 8; i++) {
+		WREG32_MC(RS600_MC_PT0_CONTEXT0_CNTL + i, 0);
+	}
+
+	/* setup the page table */
+	WREG32_MC(RS600_MC_PT0_CONTEXT0_FLAT_BASE_ADDR,
+		 rdev->gart.table_addr);
+	WREG32_MC(RS600_MC_PT0_CONTEXT0_DEFAULT_READ_ADDR, 0);
+
+	/* enable page tables */
+	tmp = RREG32_MC(RS600_MC_PT0_CNTL);
+	WREG32_MC(RS600_MC_PT0_CNTL, (tmp | RS600_ENABLE_PT));
+	tmp = RREG32_MC(RS600_MC_CNTL1);
+	WREG32_MC(RS600_MC_CNTL1, (tmp | RS600_ENABLE_PAGE_TABLES));
+	rs600_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void rs600_gart_disable(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	/* FIXME: disable out of gart access */
+	WREG32_MC(RS600_MC_PT0_CNTL, 0);
+	tmp = RREG32_MC(RS600_MC_CNTL1);
+	tmp &= ~RS600_ENABLE_PAGE_TABLES;
+	WREG32_MC(RS600_MC_CNTL1, tmp);
+	radeon_object_kunmap(rdev->gart.table.vram.robj);
+	radeon_object_unpin(rdev->gart.table.vram.robj);
+}
+
+#define R600_PTE_VALID     (1 << 0)
+#define R600_PTE_SYSTEM    (1 << 1)
+#define R600_PTE_SNOOPED   (1 << 2)
+#define R600_PTE_READABLE  (1 << 5)
+#define R600_PTE_WRITEABLE (1 << 6)
+
+int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+{
+	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
+
+	if (i < 0 || i > rdev->gart.num_gpu_pages) {
+		return -EINVAL;
+	}
+	addr = addr & 0xFFFFFFFFFFFFF000ULL;
+	addr |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED;
+	addr |= R600_PTE_READABLE | R600_PTE_WRITEABLE;
+	writeq(addr, ((void __iomem *)ptr) + (i * 8));
+	return 0;
+}
+
+
+/*
+ * MC.
+ */
+void rs600_mc_disable_clients(struct radeon_device *rdev)
+{
+	unsigned tmp;
+
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	tmp = RREG32(AVIVO_D1VGA_CONTROL);
+	WREG32(AVIVO_D1VGA_CONTROL, tmp & ~AVIVO_DVGA_CONTROL_MODE_ENABLE);
+	tmp = RREG32(AVIVO_D2VGA_CONTROL);
+	WREG32(AVIVO_D2VGA_CONTROL, tmp & ~AVIVO_DVGA_CONTROL_MODE_ENABLE);
+
+	tmp = RREG32(AVIVO_D1CRTC_CONTROL);
+	WREG32(AVIVO_D1CRTC_CONTROL, tmp & ~AVIVO_CRTC_EN);
+	tmp = RREG32(AVIVO_D2CRTC_CONTROL);
+	WREG32(AVIVO_D2CRTC_CONTROL, tmp & ~AVIVO_CRTC_EN);
+
+	/* make sure all previous write got through */
+	tmp = RREG32(AVIVO_D2CRTC_CONTROL);
+
+	mdelay(1);
+}
+
+int rs600_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+
+	rs600_gpu_init(rdev);
+	rs600_gart_disable(rdev);
+
+	/* Setup GPU memory space */
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	rdev->mc.gtt_location = 0xFFFFFFFFUL;
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Program GPU memory space */
+	/* Enable bus master */
+	tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS;
+	WREG32(RADEON_BUS_CNTL, tmp);
+	/* FIXME: What does AGP means for such chipset ? */
+	WREG32_MC(RS600_MC_AGP_LOCATION, 0x0FFFFFFF);
+	/* FIXME: are this AGP reg in indirect MC range ? */
+	WREG32_MC(RS600_MC_AGP_BASE, 0);
+	WREG32_MC(RS600_MC_AGP_BASE_2, 0);
+	rs600_mc_disable_clients(rdev);
+	if (rs600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(RS600_MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(RS600_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32_MC(RS600_MC_FB_LOCATION, tmp);
+	WREG32(RS690_HDP_FB_LOCATION, rdev->mc.vram_location >> 16);
+	return 0;
+}
+
+void rs600_mc_fini(struct radeon_device *rdev)
+{
+	rs600_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Global GPU functions
+ */
+void rs600_disable_vga(struct radeon_device *rdev)
+{
+	unsigned tmp;
+
+	WREG32(0x330, 0);
+	WREG32(0x338, 0);
+	tmp = RREG32(0x300);
+	tmp &= ~(3 << 16);
+	WREG32(0x300, tmp);
+	WREG32(0x308, (1 << 8));
+	WREG32(0x310, rdev->mc.vram_location);
+	WREG32(0x594, 0);
+}
+
+int rs600_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_MC(RS600_MC_STATUS);
+		if (tmp & RS600_MC_STATUS_IDLE) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+void rs600_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+void rs600_gpu_init(struct radeon_device *rdev)
+{
+	/* FIXME: HDP same place on rs600 ? */
+	r100_hdp_reset(rdev);
+	rs600_disable_vga(rdev);
+	/* FIXME: is this correct ? */
+	r420_pipes_init(rdev);
+	if (rs600_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+
+/*
+ * VRAM info.
+ */
+void rs600_vram_info(struct radeon_device *rdev)
+{
+	/* FIXME: to do or is these values sane ? */
+	rdev->mc.vram_is_ddr = true;
+	rdev->mc.vram_width = 128;
+}
+
+
+/*
+ * Indirect registers accessor
+ */
+uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(RS600_MC_INDEX,
+	       ((reg & RS600_MC_ADDR_MASK) | RS600_MC_IND_CITF_ARB0));
+	r = RREG32(RS600_MC_DATA);
+	return r;
+}
+
+void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(RS600_MC_INDEX,
+		RS600_MC_IND_WR_EN | RS600_MC_IND_CITF_ARB0 |
+		((reg) & RS600_MC_ADDR_MASK));
+	WREG32(RS600_MC_DATA, v);
+}
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
new file mode 100644
index 00000000000..79ba85042b5
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* rs690,rs740 depends on : */
+void r100_hdp_reset(struct radeon_device *rdev);
+int r300_mc_wait_for_idle(struct radeon_device *rdev);
+void r420_pipes_init(struct radeon_device *rdev);
+void rs400_gart_disable(struct radeon_device *rdev);
+int rs400_gart_enable(struct radeon_device *rdev);
+void rs400_gart_adjust_size(struct radeon_device *rdev);
+void rs600_mc_disable_clients(struct radeon_device *rdev);
+void rs600_disable_vga(struct radeon_device *rdev);
+
+/* This files gather functions specifics to :
+ * rs690,rs740
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+void rs690_gpu_init(struct radeon_device *rdev);
+int rs690_mc_wait_for_idle(struct radeon_device *rdev);
+
+
+/*
+ * MC functions.
+ */
+int rs690_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+
+	rs690_gpu_init(rdev);
+	rs400_gart_disable(rdev);
+
+	/* Setup GPU memory space */
+	rdev->mc.gtt_location = rdev->mc.vram_size;
+	rdev->mc.gtt_location += (rdev->mc.gtt_size - 1);
+	rdev->mc.gtt_location &= ~(rdev->mc.gtt_size - 1);
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Program GPU memory space */
+	rs600_mc_disable_clients(rdev);
+	if (rs690_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(RS690_MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(RS690_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32_MC(RS690_MCCFG_FB_LOCATION, tmp);
+	/* FIXME: Does this reg exist on RS480,RS740 ? */
+	WREG32(0x310, rdev->mc.vram_location);
+	WREG32(RS690_HDP_FB_LOCATION, rdev->mc.vram_location >> 16);
+	return 0;
+}
+
+void rs690_mc_fini(struct radeon_device *rdev)
+{
+	rs400_gart_disable(rdev);
+	radeon_gart_table_ram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Global GPU functions
+ */
+int rs690_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_MC(RS690_MC_STATUS);
+		if (tmp & RS690_MC_STATUS_IDLE) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+void rs690_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+void rs690_gpu_init(struct radeon_device *rdev)
+{
+	/* FIXME: HDP same place on rs690 ? */
+	r100_hdp_reset(rdev);
+	rs600_disable_vga(rdev);
+	/* FIXME: is this correct ? */
+	r420_pipes_init(rdev);
+	if (rs690_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+
+/*
+ * VRAM info.
+ */
+void rs690_vram_info(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	rs400_gart_adjust_size(rdev);
+	/* DDR for all card after R300 & IGP */
+	rdev->mc.vram_is_ddr = true;
+	/* FIXME: is this correct for RS690/RS740 ? */
+	tmp = RREG32(RADEON_MEM_CNTL);
+	if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
+		rdev->mc.vram_width = 128;
+	} else {
+		rdev->mc.vram_width = 64;
+	}
+	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
+
+
+/*
+ * Indirect registers accessor
+ */
+uint32_t rs690_mc_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(RS690_MC_INDEX, (reg & RS690_MC_INDEX_MASK));
+	r = RREG32(RS690_MC_DATA);
+	WREG32(RS690_MC_INDEX, RS690_MC_INDEX_MASK);
+	return r;
+}
+
+void rs690_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(RS690_MC_INDEX,
+	       RS690_MC_INDEX_WR_EN | ((reg) & RS690_MC_INDEX_MASK));
+	WREG32(RS690_MC_DATA, v);
+	WREG32(RS690_MC_INDEX, RS690_MC_INDEX_WR_ACK);
+}
diff --git a/drivers/gpu/drm/radeon/rs780.c b/drivers/gpu/drm/radeon/rs780.c
new file mode 100644
index 00000000000..0affcff8182
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rs780.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* rs780  depends on : */
+void rs600_mc_disable_clients(struct radeon_device *rdev);
+
+/* This files gather functions specifics to:
+ * rs780
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+int rs780_mc_wait_for_idle(struct radeon_device *rdev);
+void rs780_gpu_init(struct radeon_device *rdev);
+
+
+/*
+ * MC
+ */
+int rs780_mc_init(struct radeon_device *rdev)
+{
+	rs780_gpu_init(rdev);
+	/* FIXME: implement */
+
+	rs600_mc_disable_clients(rdev);
+	if (rs780_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	return 0;
+}
+
+void rs780_mc_fini(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * Global GPU functions
+ */
+void rs780_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+int rs780_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+void rs780_gpu_init(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * VRAM info
+ */
+void rs780_vram_get_type(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+void rs780_vram_info(struct radeon_device *rdev)
+{
+	rs780_vram_get_type(rdev);
+
+	/* FIXME: implement */
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
new file mode 100644
index 00000000000..7eab95db58a
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/seq_file.h>
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* rv515 depends on : */
+void r100_hdp_reset(struct radeon_device *rdev);
+int r100_cp_reset(struct radeon_device *rdev);
+int r100_rb2d_reset(struct radeon_device *rdev);
+int r100_gui_wait_for_idle(struct radeon_device *rdev);
+int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
+int rv370_pcie_gart_enable(struct radeon_device *rdev);
+void rv370_pcie_gart_disable(struct radeon_device *rdev);
+void r420_pipes_init(struct radeon_device *rdev);
+void rs600_mc_disable_clients(struct radeon_device *rdev);
+void rs600_disable_vga(struct radeon_device *rdev);
+
+/* This files gather functions specifics to:
+ * rv515
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+int rv515_debugfs_pipes_info_init(struct radeon_device *rdev);
+int rv515_debugfs_ga_info_init(struct radeon_device *rdev);
+void rv515_gpu_init(struct radeon_device *rdev);
+int rv515_mc_wait_for_idle(struct radeon_device *rdev);
+
+
+/*
+ * MC
+ */
+int rv515_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	int r;
+
+	if (r100_debugfs_rbbm_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
+	}
+	if (rv515_debugfs_pipes_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for pipes !\n");
+	}
+	if (rv515_debugfs_ga_info_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for pipes !\n");
+	}
+
+	rv515_gpu_init(rdev);
+	rv370_pcie_gart_disable(rdev);
+
+	/* Setup GPU memory space */
+	rdev->mc.vram_location = 0xFFFFFFFFUL;
+	rdev->mc.gtt_location = 0xFFFFFFFFUL;
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			printk(KERN_WARNING "[drm] Disabling AGP\n");
+			rdev->flags &= ~RADEON_IS_AGP;
+			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+		} else {
+			rdev->mc.gtt_location = rdev->mc.agp_base;
+		}
+	}
+	r = radeon_mc_setup(rdev);
+	if (r) {
+		return r;
+	}
+
+	/* Program GPU memory space */
+	rs600_mc_disable_clients(rdev);
+	if (rv515_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+	/* Write VRAM size in case we are limiting it */
+	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
+	tmp = REG_SET(RV515_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32(0x134, tmp);
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(RV515_MC_FB_TOP, tmp >> 16);
+	tmp |= REG_SET(RV515_MC_FB_START, rdev->mc.vram_location >> 16);
+	WREG32_MC(RV515_MC_FB_LOCATION, tmp);
+	WREG32(RS690_HDP_FB_LOCATION, rdev->mc.vram_location >> 16);
+	WREG32(0x310, rdev->mc.vram_location);
+	if (rdev->flags & RADEON_IS_AGP) {
+		tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+		tmp = REG_SET(RV515_MC_AGP_TOP, tmp >> 16);
+		tmp |= REG_SET(RV515_MC_AGP_START, rdev->mc.gtt_location >> 16);
+		WREG32_MC(RV515_MC_AGP_LOCATION, tmp);
+		WREG32_MC(RV515_MC_AGP_BASE, rdev->mc.agp_base);
+		WREG32_MC(RV515_MC_AGP_BASE_2, 0);
+	} else {
+		WREG32_MC(RV515_MC_AGP_LOCATION, 0x0FFFFFFF);
+		WREG32_MC(RV515_MC_AGP_BASE, 0);
+		WREG32_MC(RV515_MC_AGP_BASE_2, 0);
+	}
+	return 0;
+}
+
+void rv515_mc_fini(struct radeon_device *rdev)
+{
+	rv370_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+/*
+ * Global GPU functions
+ */
+void rv515_ring_start(struct radeon_device *rdev)
+{
+	unsigned gb_tile_config;
+	int r;
+
+	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
+	gb_tile_config = R300_ENABLE_TILING | R300_TILE_SIZE_16;
+	switch (rdev->num_gb_pipes) {
+	case 2:
+		gb_tile_config |= R300_PIPE_COUNT_R300;
+		break;
+	case 3:
+		gb_tile_config |= R300_PIPE_COUNT_R420_3P;
+		break;
+	case 4:
+		gb_tile_config |= R300_PIPE_COUNT_R420;
+		break;
+	case 1:
+	default:
+		gb_tile_config |= R300_PIPE_COUNT_RV350;
+		break;
+	}
+
+	r = radeon_ring_lock(rdev, 64);
+	if (r) {
+		return;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_ISYNC_ANY2D_IDLE3D |
+			  RADEON_ISYNC_ANY3D_IDLE2D |
+			  RADEON_ISYNC_WAIT_IDLEGUI |
+			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
+	radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
+	radeon_ring_write(rdev, gb_tile_config);
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_WAIT_2D_IDLECLEAN |
+			  RADEON_WAIT_3D_IDLECLEAN);
+	radeon_ring_write(rdev, PACKET0(0x170C, 0));
+	radeon_ring_write(rdev, 1 << 31);
+	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(0x42C8, 0));
+	radeon_ring_write(rdev, (1 << rdev->num_gb_pipes) - 1);
+	radeon_ring_write(rdev, PACKET0(R500_VAP_INDEX_OFFSET, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev,
+			  RADEON_WAIT_2D_IDLECLEAN |
+			  RADEON_WAIT_3D_IDLECLEAN);
+	radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
+	radeon_ring_write(rdev, 0);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
+	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
+	radeon_ring_write(rdev,
+			  ((6 << R300_MS_X0_SHIFT) |
+			   (6 << R300_MS_Y0_SHIFT) |
+			   (6 << R300_MS_X1_SHIFT) |
+			   (6 << R300_MS_Y1_SHIFT) |
+			   (6 << R300_MS_X2_SHIFT) |
+			   (6 << R300_MS_Y2_SHIFT) |
+			   (6 << R300_MSBD0_Y_SHIFT) |
+			   (6 << R300_MSBD0_X_SHIFT)));
+	radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
+	radeon_ring_write(rdev,
+			  ((6 << R300_MS_X3_SHIFT) |
+			   (6 << R300_MS_Y3_SHIFT) |
+			   (6 << R300_MS_X4_SHIFT) |
+			   (6 << R300_MS_Y4_SHIFT) |
+			   (6 << R300_MS_X5_SHIFT) |
+			   (6 << R300_MS_Y5_SHIFT) |
+			   (6 << R300_MSBD1_SHIFT)));
+	radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
+	radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
+	radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
+	radeon_ring_write(rdev,
+			  R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
+	radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
+	radeon_ring_write(rdev,
+			  R300_GEOMETRY_ROUND_NEAREST |
+			  R300_COLOR_ROUND_NEAREST);
+	radeon_ring_unlock_commit(rdev);
+}
+
+void rv515_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+int rv515_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	uint32_t tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_MC(RV515_MC_STATUS);
+		if (tmp & RV515_MC_STATUS_IDLE) {
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	return -1;
+}
+
+void rv515_gpu_init(struct radeon_device *rdev)
+{
+	unsigned pipe_select_current, gb_pipe_select, tmp;
+
+	r100_hdp_reset(rdev);
+	r100_rb2d_reset(rdev);
+
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "reseting GPU. Bad things might happen.\n");
+	}
+
+	rs600_disable_vga(rdev);
+
+	r420_pipes_init(rdev);
+	gb_pipe_select = RREG32(0x402C);
+	tmp = RREG32(0x170C);
+	pipe_select_current = (tmp >> 2) & 3;
+	tmp = (1 << pipe_select_current) |
+	      (((gb_pipe_select >> 8) & 0xF) << 4);
+	WREG32_PLL(0x000D, tmp);
+	if (r100_gui_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait GUI idle while "
+		       "reseting GPU. Bad things might happen.\n");
+	}
+	if (rv515_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+}
+
+int rv515_ga_reset(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	bool reinit_cp;
+	int i;
+
+	reinit_cp = rdev->cp.ready;
+	rdev->cp.ready = false;
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		WREG32(RADEON_CP_CSQ_MODE, 0);
+		WREG32(RADEON_CP_CSQ_CNTL, 0);
+		WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
+		(void)RREG32(RADEON_RBBM_SOFT_RESET);
+		udelay(200);
+		WREG32(RADEON_RBBM_SOFT_RESET, 0);
+		/* Wait to prevent race in RBBM_STATUS */
+		mdelay(1);
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (tmp & ((1 << 20) | (1 << 26))) {
+			DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)\n", tmp);
+			/* GA still busy soft reset it */
+			WREG32(0x429C, 0x200);
+			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+			WREG32(0x43E0, 0);
+			WREG32(0x43E4, 0);
+			WREG32(0x24AC, 0);
+		}
+		/* Wait to prevent race in RBBM_STATUS */
+		mdelay(1);
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & ((1 << 20) | (1 << 26)))) {
+			break;
+		}
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(RADEON_RBBM_STATUS);
+		if (!(tmp & ((1 << 20) | (1 << 26)))) {
+			DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
+				 tmp);
+			DRM_INFO("GA_IDLE=0x%08X\n", RREG32(0x425C));
+			DRM_INFO("RB3D_RESET_STATUS=0x%08X\n", RREG32(0x46f0));
+			DRM_INFO("ISYNC_CNTL=0x%08X\n", RREG32(0x1724));
+			if (reinit_cp) {
+				return r100_cp_init(rdev, rdev->cp.ring_size);
+			}
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+	tmp = RREG32(RADEON_RBBM_STATUS);
+	DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
+	return -1;
+}
+
+int rv515_gpu_reset(struct radeon_device *rdev)
+{
+	uint32_t status;
+
+	/* reset order likely matter */
+	status = RREG32(RADEON_RBBM_STATUS);
+	/* reset HDP */
+	r100_hdp_reset(rdev);
+	/* reset rb2d */
+	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
+		r100_rb2d_reset(rdev);
+	}
+	/* reset GA */
+	if (status & ((1 << 20) | (1 << 26))) {
+		rv515_ga_reset(rdev);
+	}
+	/* reset CP */
+	status = RREG32(RADEON_RBBM_STATUS);
+	if (status & (1 << 16)) {
+		r100_cp_reset(rdev);
+	}
+	/* Check if GPU is idle */
+	status = RREG32(RADEON_RBBM_STATUS);
+	if (status & (1 << 31)) {
+		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
+		return -1;
+	}
+	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
+	return 0;
+}
+
+
+/*
+ * VRAM info
+ */
+static void rv515_vram_get_type(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	rdev->mc.vram_width = 128;
+	rdev->mc.vram_is_ddr = true;
+	tmp = RREG32_MC(RV515_MC_CNTL);
+	tmp &= RV515_MEM_NUM_CHANNELS_MASK;
+	switch (tmp) {
+	case 0:
+		rdev->mc.vram_width = 64;
+		break;
+	case 1:
+		rdev->mc.vram_width = 128;
+		break;
+	default:
+		rdev->mc.vram_width = 128;
+		break;
+	}
+}
+
+void rv515_vram_info(struct radeon_device *rdev)
+{
+	rv515_vram_get_type(rdev);
+	rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
+
+
+/*
+ * Indirect registers accessor
+ */
+uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(R520_MC_IND_INDEX, 0x7f0000 | (reg & 0xffff));
+	r = RREG32(R520_MC_IND_DATA);
+	WREG32(R520_MC_IND_INDEX, 0);
+	return r;
+}
+
+void rv515_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(R520_MC_IND_INDEX, 0xff0000 | ((reg) & 0xffff));
+	WREG32(R520_MC_IND_DATA, (v));
+	WREG32(R520_MC_IND_INDEX, 0);
+}
+
+uint32_t rv515_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	uint32_t r;
+
+	WREG32(RADEON_PCIE_INDEX, ((reg) & 0x7ff));
+	(void)RREG32(RADEON_PCIE_INDEX);
+	r = RREG32(RADEON_PCIE_DATA);
+	return r;
+}
+
+void rv515_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	WREG32(RADEON_PCIE_INDEX, ((reg) & 0x7ff));
+	(void)RREG32(RADEON_PCIE_INDEX);
+	WREG32(RADEON_PCIE_DATA, (v));
+	(void)RREG32(RADEON_PCIE_DATA);
+}
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int rv515_debugfs_pipes_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	tmp = RREG32(R400_GB_PIPE_SELECT);
+	seq_printf(m, "GB_PIPE_SELECT 0x%08x\n", tmp);
+	tmp = RREG32(R500_SU_REG_DEST);
+	seq_printf(m, "SU_REG_DEST 0x%08x\n", tmp);
+	tmp = RREG32(R300_GB_TILE_CONFIG);
+	seq_printf(m, "GB_TILE_CONFIG 0x%08x\n", tmp);
+	tmp = RREG32(R300_DST_PIPE_CONFIG);
+	seq_printf(m, "DST_PIPE_CONFIG 0x%08x\n", tmp);
+	return 0;
+}
+
+static int rv515_debugfs_ga_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint32_t tmp;
+
+	tmp = RREG32(0x2140);
+	seq_printf(m, "VAP_CNTL_STATUS 0x%08x\n", tmp);
+	radeon_gpu_reset(rdev);
+	tmp = RREG32(0x425C);
+	seq_printf(m, "GA_IDLE 0x%08x\n", tmp);
+	return 0;
+}
+
+static struct drm_info_list rv515_pipes_info_list[] = {
+	{"rv515_pipes_info", rv515_debugfs_pipes_info, 0, NULL},
+};
+
+static struct drm_info_list rv515_ga_info_list[] = {
+	{"rv515_ga_info", rv515_debugfs_ga_info, 0, NULL},
+};
+#endif
+
+int rv515_debugfs_pipes_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, rv515_pipes_info_list, 1);
+#else
+	return 0;
+#endif
+}
+
+int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return radeon_debugfs_add_files(rdev, rv515_ga_info_list, 1);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
new file mode 100644
index 00000000000..da50cc51ede
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon_reg.h"
+#include "radeon.h"
+
+/* rv770,rv730,rv710  depends on : */
+void rs600_mc_disable_clients(struct radeon_device *rdev);
+
+/* This files gather functions specifics to:
+ * rv770,rv730,rv710
+ *
+ * Some of these functions might be used by newer ASICs.
+ */
+int rv770_mc_wait_for_idle(struct radeon_device *rdev);
+void rv770_gpu_init(struct radeon_device *rdev);
+
+
+/*
+ * MC
+ */
+int rv770_mc_init(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+
+	rv770_gpu_init(rdev);
+
+	/* setup the gart before changing location so we can ask to
+	 * discard unmapped mc request
+	 */
+	/* FIXME: disable out of gart access */
+	tmp = rdev->mc.gtt_location / 4096;
+	tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
+	WREG32(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
+	tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
+	tmp = REG_SET(R700_LOGICAL_PAGE_NUMBER, tmp);
+	WREG32(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
+
+	rs600_mc_disable_clients(rdev);
+	if (rv770_mc_wait_for_idle(rdev)) {
+		printk(KERN_WARNING "Failed to wait MC idle while "
+		       "programming pipes. Bad things might happen.\n");
+	}
+
+	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
+	tmp = REG_SET(R700_MC_FB_TOP, tmp >> 24);
+	tmp |= REG_SET(R700_MC_FB_BASE, rdev->mc.vram_location >> 24);
+	WREG32(R700_MC_VM_FB_LOCATION, tmp);
+	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	tmp = REG_SET(R700_MC_AGP_TOP, tmp >> 22);
+	WREG32(R700_MC_VM_AGP_TOP, tmp);
+	tmp = REG_SET(R700_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
+	WREG32(R700_MC_VM_AGP_BOT, tmp);
+	return 0;
+}
+
+void rv770_mc_fini(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * Global GPU functions
+ */
+void rv770_errata(struct radeon_device *rdev)
+{
+	rdev->pll_errata = 0;
+}
+
+int rv770_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+	return 0;
+}
+
+void rv770_gpu_init(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+
+/*
+ * VRAM info
+ */
+void rv770_vram_get_type(struct radeon_device *rdev)
+{
+	/* FIXME: implement */
+}
+
+void rv770_vram_info(struct radeon_device *rdev)
+{
+	rv770_vram_get_type(rdev);
+
+	/* FIXME: implement */
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+}
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 0dcf9ca0b0a..d0fcf36c2ab 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -115,5 +115,7 @@ source "drivers/staging/line6/Kconfig"
 
 source "drivers/staging/serqt_usb/Kconfig"
 
+source "drivers/gpu/drm/radeon/Kconfig"
+
 endif # !STAGING_EXCLUDE_BUILD
 endif # STAGING
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index fe3e3a4b4ae..41862e9a4c2 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -496,6 +496,16 @@ typedef struct {
 #define DRM_RADEON_SETPARAM   0x19
 #define DRM_RADEON_SURF_ALLOC 0x1a
 #define DRM_RADEON_SURF_FREE  0x1b
+/* KMS ioctl */
+#define DRM_RADEON_GEM_INFO		0x1c
+#define DRM_RADEON_GEM_CREATE		0x1d
+#define DRM_RADEON_GEM_MMAP		0x1e
+#define DRM_RADEON_GEM_PREAD		0x21
+#define DRM_RADEON_GEM_PWRITE		0x22
+#define DRM_RADEON_GEM_SET_DOMAIN	0x23
+#define DRM_RADEON_GEM_WAIT_IDLE	0x24
+#define DRM_RADEON_CS			0x26
+#define DRM_RADEON_INFO			0x27
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -524,6 +534,17 @@ typedef struct {
 #define DRM_IOCTL_RADEON_SETPARAM   DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t)
 #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t)
 #define DRM_IOCTL_RADEON_SURF_FREE  DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t)
+/* KMS */
+#define DRM_IOCTL_RADEON_GEM_INFO	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info)
+#define DRM_IOCTL_RADEON_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create)
+#define DRM_IOCTL_RADEON_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap)
+#define DRM_IOCTL_RADEON_GEM_PREAD	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread)
+#define DRM_IOCTL_RADEON_GEM_PWRITE	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite)
+#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain)
+#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle)
+#define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
+#define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
+
 
 typedef struct drm_radeon_init {
 	enum {
@@ -682,6 +703,7 @@ typedef struct drm_radeon_indirect {
 #define RADEON_PARAM_VBLANK_CRTC           13   /* VBLANK CRTC */
 #define RADEON_PARAM_FB_LOCATION           14   /* FB location */
 #define RADEON_PARAM_NUM_GB_PIPES          15   /* num GB pipes */
+#define RADEON_PARAM_DEVICE_ID             16
 
 typedef struct drm_radeon_getparam {
 	int param;
@@ -751,4 +773,112 @@ typedef struct drm_radeon_surface_free {
 #define	DRM_RADEON_VBLANK_CRTC1		1
 #define	DRM_RADEON_VBLANK_CRTC2		2
 
+/*
+ * Kernel modesetting world below.
+ */
+#define RADEON_GEM_DOMAIN_CPU		0x1
+#define RADEON_GEM_DOMAIN_GTT		0x2
+#define RADEON_GEM_DOMAIN_VRAM		0x4
+
+struct drm_radeon_gem_info {
+	uint64_t	gart_size;
+	uint64_t	vram_size;
+	uint64_t	vram_visible;
+};
+
+#define RADEON_GEM_NO_BACKING_STORE 1
+
+struct drm_radeon_gem_create {
+	uint64_t	size;
+	uint64_t	alignment;
+	uint32_t	handle;
+	uint32_t	initial_domain;
+	uint32_t	flags;
+};
+
+struct drm_radeon_gem_mmap {
+	uint32_t	handle;
+	uint32_t	pad;
+	uint64_t	offset;
+	uint64_t	size;
+	uint64_t	addr_ptr;
+};
+
+struct drm_radeon_gem_set_domain {
+	uint32_t	handle;
+	uint32_t	read_domains;
+	uint32_t	write_domain;
+};
+
+struct drm_radeon_gem_wait_idle {
+	uint32_t	handle;
+	uint32_t	pad;
+};
+
+struct drm_radeon_gem_busy {
+	uint32_t	handle;
+	uint32_t	busy;
+};
+
+struct drm_radeon_gem_pread {
+	/** Handle for the object being read. */
+	uint32_t handle;
+	uint32_t pad;
+	/** Offset into the object to read from */
+	uint64_t offset;
+	/** Length of data to read */
+	uint64_t size;
+	/** Pointer to write the data into. */
+	/* void *, but pointers are not 32/64 compatible */
+	uint64_t data_ptr;
+};
+
+struct drm_radeon_gem_pwrite {
+	/** Handle for the object being written to. */
+	uint32_t handle;
+	uint32_t pad;
+	/** Offset into the object to write to */
+	uint64_t offset;
+	/** Length of data to write */
+	uint64_t size;
+	/** Pointer to read the data from. */
+	/* void *, but pointers are not 32/64 compatible */
+	uint64_t data_ptr;
+};
+
+#define RADEON_CHUNK_ID_RELOCS	0x01
+#define RADEON_CHUNK_ID_IB	0x02
+
+struct drm_radeon_cs_chunk {
+	uint32_t		chunk_id;
+	uint32_t		length_dw;
+	uint64_t		chunk_data;
+};
+
+struct drm_radeon_cs_reloc {
+	uint32_t		handle;
+	uint32_t		read_domains;
+	uint32_t		write_domain;
+	uint32_t		flags;
+};
+
+struct drm_radeon_cs {
+	uint32_t		num_chunks;
+	uint32_t		cs_id;
+	/* this points to uint64_t * which point to cs chunks */
+	uint64_t		chunks;
+	/* updates to the limits after this CS ioctl */
+	uint64_t		gart_limit;
+	uint64_t		vram_limit;
+};
+
+#define RADEON_INFO_DEVICE_ID		0x00
+#define RADEON_INFO_NUM_GB_PIPES	0x01
+
+struct drm_radeon_info {
+	uint32_t		request;
+	uint32_t		pad;
+	uint64_t		value;
+};
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 09d4e0edd4614e787393acc582ac701c6ec3565b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 12 Jun 2009 21:10:05 +0000
Subject: lib: Provide generic atomic64_t implementation

Many processor architectures have no 64-bit atomic instructions, but
we need atomic64_t in order to support the perf_counter subsystem.

This adds an implementation of 64-bit atomic operations using hashed
spinlocks to provide atomicity.  For each atomic operation, the address
of the atomic64_t variable is hashed to an index into an array of 16
spinlocks.  That spinlock is taken (with interrupts disabled) around the
operation, which can then be coded non-atomically within the lock.

On UP, all the spinlock manipulation goes away and we simply disable
interrupts around each operation.  In fact gcc eliminates the whole
atomic64_lock variable as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/asm-generic/atomic64.h |  42 ++++++++++
 lib/Kconfig                    |   6 ++
 lib/Makefile                   |   2 +
 lib/atomic64.c                 | 175 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 225 insertions(+)
 create mode 100644 include/asm-generic/atomic64.h
 create mode 100644 lib/atomic64.c

(limited to 'include')

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
new file mode 100644
index 00000000000..b18ce4f9ee3
--- /dev/null
+++ b/include/asm-generic/atomic64.h
@@ -0,0 +1,42 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_GENERIC_ATOMIC64_H
+#define _ASM_GENERIC_ATOMIC64_H
+
+typedef struct {
+	long long counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(i)	{ (i) }
+
+extern long long atomic64_read(const atomic64_t *v);
+extern void	 atomic64_set(atomic64_t *v, long long i);
+extern void	 atomic64_add(long long a, atomic64_t *v);
+extern long long atomic64_add_return(long long a, atomic64_t *v);
+extern void	 atomic64_sub(long long a, atomic64_t *v);
+extern long long atomic64_sub_return(long long a, atomic64_t *v);
+extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
+extern long long atomic64_xchg(atomic64_t *v, long long new);
+extern int	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v) 	atomic64_add_unless((v), 1LL, 0LL)
+
+#endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/lib/Kconfig b/lib/Kconfig
index 9960be04cbb..bb1326d3839 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -194,4 +194,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 config NLATTR
 	bool
 
+#
+# Generic 64-bit atomic support is selected if needed
+#
+config GENERIC_ATOMIC64
+       bool
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 34c5c0e6222..8e9bcf9d326 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -95,6 +95,8 @@ obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
+obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/atomic64.c b/lib/atomic64.c
new file mode 100644
index 00000000000..c5e72556241
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,175 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable.  Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS	16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+	spinlock_t lock;
+	char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+	unsigned long addr = (unsigned long) v;
+
+	addr >>= L1_CACHE_SHIFT;
+	addr ^= (addr >> 8) ^ (addr >> 16);
+	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter = i;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter += a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+
+	spin_lock_irqsave(lock, flags);
+	v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+}
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter -= a;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter - 1;
+	if (val >= 0)
+		v->counter = val;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	if (val == o)
+		v->counter = n;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	long long val;
+
+	spin_lock_irqsave(lock, flags);
+	val = v->counter;
+	v->counter = new;
+	spin_unlock_irqrestore(lock, flags);
+	return val;
+}
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	unsigned long flags;
+	spinlock_t *lock = lock_addr(v);
+	int ret = 1;
+
+	spin_lock_irqsave(lock, flags);
+	if (v->counter != u) {
+		v->counter += a;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(lock, flags);
+	return ret;
+}
+
+static int init_atomic64_lock(void)
+{
+	int i;
+
+	for (i = 0; i < NR_LOCKS; ++i)
+		spin_lock_init(&atomic64_lock[i].lock);
+	return 0;
+}
+
+pure_initcall(init_atomic64_lock);
-- 
cgit v1.2.3-70-g09d2


From 13685a1654b65357fb34066a98ef40445f7820fc Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Wed, 10 Jun 2009 04:38:40 +0000
Subject: block: Add bio_list_peek()

Introduce bio_list_peek(), to obtain a pointer to the first bio on the bio_list
without actually removing it from the list. This is needed when you want to
serialize based on the list being empty or not.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/bio.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 12737be5860..2a04eb54c0d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -590,6 +590,11 @@ static inline void bio_list_merge_head(struct bio_list *bl,
 	bl->head = bl2->head;
 }
 
+static inline struct bio *bio_list_peek(struct bio_list *bl)
+{
+	return bl->head;
+}
+
 static inline struct bio *bio_list_pop(struct bio_list *bl)
 {
 	struct bio *bio = bl->head;
-- 
cgit v1.2.3-70-g09d2


From ca44d6e60f9de26281fda203f58b570e1748c015 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 15 Jun 2009 02:31:47 -0700
Subject: pkt_sched: Rename PSCHED_US2NS and PSCHED_NS2US

Let's use TICKS instead of US, so PSCHED_TICKS2NS and PSCHED_NS2TICKS
(like in PSCHED_TICKS_PER_SEC already) to avoid misleading.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 8 ++++----
 net/sched/sch_api.c     | 4 ++--
 net/sched/sch_cbq.c     | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 120935b2abd..82a3191375f 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -43,15 +43,15 @@ typedef long	psched_tdiff_t;
 
 /* Avoid doing 64 bit divide */
 #define PSCHED_SHIFT			6
-#define PSCHED_US2NS(x)			((s64)(x) << PSCHED_SHIFT)
-#define PSCHED_NS2US(x)			((x) >> PSCHED_SHIFT)
+#define PSCHED_TICKS2NS(x)		((s64)(x) << PSCHED_SHIFT)
+#define PSCHED_NS2TICKS(x)		((x) >> PSCHED_SHIFT)
 
-#define PSCHED_TICKS_PER_SEC		PSCHED_NS2US(NSEC_PER_SEC)
+#define PSCHED_TICKS_PER_SEC		PSCHED_NS2TICKS(NSEC_PER_SEC)
 #define PSCHED_PASTPERFECT		0
 
 static inline psched_time_t psched_get_time(void)
 {
-	return PSCHED_NS2US(ktime_to_ns(ktime_get()));
+	return PSCHED_NS2TICKS(ktime_to_ns(ktime_get()));
 }
 
 static inline psched_tdiff_t
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 32009793307..24d17ce9c29 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -484,7 +484,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
 	time = ktime_set(0, 0);
-	time = ktime_add_ns(time, PSCHED_US2NS(expires));
+	time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
 	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
 }
 EXPORT_SYMBOL(qdisc_watchdog_schedule);
@@ -1680,7 +1680,7 @@ static int psched_show(struct seq_file *seq, void *v)
 
 	hrtimer_get_res(CLOCK_MONOTONIC, &ts);
 	seq_printf(seq, "%08x %08x %08x %08x\n",
-		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
 		   1000000,
 		   (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d728d811173..23a167670fd 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -509,7 +509,7 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 			q->pmask |= (1<<TC_CBQ_MAXPRIO);
 
 			expires = ktime_set(0, 0);
-			expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
+			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 			if (hrtimer_try_to_cancel(&q->delay_timer) &&
 			    ktime_to_ns(ktime_sub(
 					hrtimer_get_expires(&q->delay_timer),
@@ -620,7 +620,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 		ktime_t time;
 
 		time = ktime_set(0, 0);
-		time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
+		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 55f4fa4e33e90c6b25b4c8ed038392a73b654fef Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Thu, 23 Apr 2009 20:10:43 +0200
Subject: Maxim 1586 regulator driver

The Maxim 1586 regulator is a voltage regulator with 2
voltage outputs, specially suitable for Marvell PXA
chips. One output is in the range of required VCC_CORE by
the PXA27x chips, the other in the VCC_USIM required as well
by PXA27x chips.

The chip is controlled through the I2C bus.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   9 ++
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max1586.c       | 249 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max1586.h |  52 ++++++++
 4 files changed, 311 insertions(+)
 create mode 100644 drivers/regulator/max1586.c
 create mode 100644 include/linux/regulator/max1586.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index e58c0ce65aa..707da4d2353 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -56,6 +56,15 @@ config REGULATOR_BQ24022
 	  charging select between 100 mA and 500 mA charging current
 	  limit.
 
+config REGULATOR_MAX1586
+	tristate "Maxim 1586/1587 voltage regulator"
+	depends on I2C
+	default n
+	help
+	  This driver controls a Maxim 1586 or 1587 voltage output
+	  regulator via I2C bus. The provided regulator is suitable
+	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
+
 config REGULATOR_TWL4030
 	bool "TI TWL4030/TWL5030/TPS695x0 PMIC"
 	depends on TWL4030_CORE
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index bac133afc06..1d7de87a8e1 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
 
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
 obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c
new file mode 100644
index 00000000000..bbbb55fcfe8
--- /dev/null
+++ b/drivers/regulator/max1586.c
@@ -0,0 +1,249 @@
+/*
+ * max1586.c  --  Voltage and current regulation for the Maxim 1586
+ *
+ * Copyright (C) 2008 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max1586.h>
+
+#define MAX1586_V3_MAX_VSEL 31
+#define MAX1586_V6_MAX_VSEL 3
+
+#define MAX1586_V3_MIN_UV   700000
+#define MAX1586_V3_MAX_UV  1475000
+#define MAX1586_V3_STEP_UV   25000
+
+#define MAX1586_V6_MIN_UV        0
+#define MAX1586_V6_MAX_UV  3000000
+
+#define I2C_V3_SELECT (0 << 5)
+#define I2C_V6_SELECT (1 << 5)
+
+/*
+ * V3 voltage
+ * On I2C bus, sending a "x" byte to the max1586 means :
+ *   set V3 to 0.700V + (x & 0x1f) * 0.025V
+ */
+static int max1586_v3_calc_voltage(unsigned selector)
+{
+	return MAX1586_V3_MIN_UV + (MAX1586_V3_STEP_UV * selector);
+}
+
+static int max1586_v3_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct i2c_client *client = rdev_get_drvdata(rdev);
+	unsigned selector;
+	u8 v3_prog;
+
+	if (min_uV < MAX1586_V3_MIN_UV || min_uV > MAX1586_V3_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX1586_V3_MIN_UV || max_uV > MAX1586_V3_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - MAX1586_V3_MIN_UV) / MAX1586_V3_STEP_UV;
+	if (max1586_v3_calc_voltage(selector) > max_uV)
+		return -EINVAL;
+
+	dev_dbg(&client->dev, "changing voltage v3 to %dmv\n",
+		max1586_v3_calc_voltage(selector) / 1000);
+
+	v3_prog = I2C_V3_SELECT | (u8) selector;
+	return i2c_smbus_write_byte(client, v3_prog);
+}
+
+static int max1586_v3_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX1586_V3_MAX_VSEL)
+		return -EINVAL;
+	return max1586_v3_calc_voltage(selector);
+}
+
+/*
+ * V6 voltage
+ * On I2C bus, sending a "x" byte to the max1586 means :
+ *   set V6 to either 0V, 1.8V, 2.5V, 3V depending on (x & 0x3)
+ * As regulator framework doesn't accept voltages to be 0V, we use 1uV.
+ */
+static int max1586_v6_calc_voltage(unsigned selector)
+{
+	static int voltages_uv[] = { 1, 1800000, 2500000, 3000000 };
+
+	return voltages_uv[selector];
+}
+
+static int max1586_v6_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct i2c_client *client = rdev_get_drvdata(rdev);
+	unsigned selector;
+	u8 v6_prog;
+
+	if (min_uV < MAX1586_V6_MIN_UV || min_uV > MAX1586_V6_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX1586_V6_MIN_UV || max_uV > MAX1586_V6_MAX_UV)
+		return -EINVAL;
+
+	if (min_uV >= 3000000)
+		selector = 3;
+	if (min_uV < 3000000)
+		selector = 2;
+	if (min_uV < 2500000)
+		selector = 1;
+	if (min_uV < 1800000)
+		selector = 0;
+
+	if (max1586_v6_calc_voltage(selector) > max_uV)
+		return -EINVAL;
+
+	dev_dbg(&client->dev, "changing voltage v6 to %dmv\n",
+		max1586_v6_calc_voltage(selector) / 1000);
+
+	v6_prog = I2C_V6_SELECT | (u8) selector;
+	return i2c_smbus_write_byte(client, v6_prog);
+}
+
+static int max1586_v6_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX1586_V6_MAX_VSEL)
+		return -EINVAL;
+	return max1586_v6_calc_voltage(selector);
+}
+
+/*
+ * The Maxim 1586 controls V3 and V6 voltages, but offers no way of reading back
+ * the set up value.
+ */
+static struct regulator_ops max1586_v3_ops = {
+	.set_voltage = max1586_v3_set,
+	.list_voltage = max1586_v3_list,
+};
+
+static struct regulator_ops max1586_v6_ops = {
+	.set_voltage = max1586_v6_set,
+	.list_voltage = max1586_v6_list,
+};
+
+static struct regulator_desc max1586_reg[] = {
+	{
+		.name = "Output_V3",
+		.id = MAX1586_V3,
+		.ops = &max1586_v3_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX1586_V3_MAX_VSEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "Output_V6",
+		.id = MAX1586_V6,
+		.ops = &max1586_v6_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX1586_V6_MAX_VSEL + 1,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int max1586_pmic_probe(struct i2c_client *client,
+			      const struct i2c_device_id *i2c_id)
+{
+	struct regulator_dev **rdev;
+	struct max1586_platform_data *pdata = client->dev.platform_data;
+	int i, id, ret = 0;
+
+	rdev = kzalloc(sizeof(struct regulator_dev *) * (MAX1586_V6 + 1),
+		       GFP_KERNEL);
+	if (!rdev)
+		return -ENOMEM;
+
+	ret = -EINVAL;
+	for (i = 0; i < pdata->num_subdevs && i <= MAX1586_V6; i++) {
+		id = pdata->subdevs[i].id;
+		if (!pdata->subdevs[i].platform_data)
+			continue;
+		if (id < MAX1586_V3 || id > MAX1586_V6) {
+			dev_err(&client->dev, "invalid regulator id %d\n", id);
+			goto err;
+		}
+		rdev[i] = regulator_register(&max1586_reg[id], &client->dev,
+					     pdata->subdevs[i].platform_data,
+					     client);
+		if (IS_ERR(rdev[i])) {
+			ret = PTR_ERR(rdev[i]);
+			dev_err(&client->dev, "failed to register %s\n",
+				max1586_reg[id].name);
+			goto err;
+		}
+	}
+
+	i2c_set_clientdata(client, rdev);
+	dev_info(&client->dev, "Maxim 1586 regulator driver loaded\n");
+	return 0;
+
+err:
+	while (--i >= 0)
+		regulator_unregister(rdev[i]);
+	kfree(rdev);
+	return ret;
+}
+
+static int max1586_pmic_remove(struct i2c_client *client)
+{
+	struct regulator_dev **rdev = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i <= MAX1586_V6; i++)
+		if (rdev[i])
+			regulator_unregister(rdev[i]);
+	kfree(rdev);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max1586_id[] = {
+	{ "max1586", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max1586_id);
+
+static struct i2c_driver max1586_pmic_driver = {
+	.probe = max1586_pmic_probe,
+	.remove = max1586_pmic_remove,
+	.driver		= {
+		.name	= "max1586",
+	},
+	.id_table	= max1586_id,
+};
+
+static int __init max1586_pmic_init(void)
+{
+	return i2c_add_driver(&max1586_pmic_driver);
+}
+subsys_initcall(max1586_pmic_init);
+
+static void __exit max1586_pmic_exit(void)
+{
+	i2c_del_driver(&max1586_pmic_driver);
+}
+module_exit(max1586_pmic_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 1586 voltage regulator driver");
+MODULE_AUTHOR("Robert Jarzmik");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
new file mode 100644
index 00000000000..2056973396b
--- /dev/null
+++ b/include/linux/regulator/max1586.h
@@ -0,0 +1,52 @@
+/*
+ * max1586.h  --  Voltage regulation for the Maxim 1586
+ *
+ * Copyright (C) 2008 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef REGULATOR_MAX1586
+#define REGULATOR_MAX1586
+
+#include <linux/regulator/machine.h>
+
+#define MAX1586_V3 0
+#define MAX1586_V6 1
+
+/**
+ * max1586_subdev_data - regulator data
+ * @id: regulator Id (either MAX1586_V3 or MAX1586_V6)
+ * @name: regulator cute name (example for V3: "vcc_core")
+ * @platform_data: regulator init data (contraints, supplies, ...)
+ */
+struct max1586_subdev_data {
+	int				id;
+	char				*name;
+	struct regulator_init_data	*platform_data;
+};
+
+/**
+ * max1586_platform_data - platform data for max1586
+ * @num_subdevs: number of regultors used (may be 1 or 2)
+ * @subdevs: regulator used
+ *           At most, there will be a regulator for V3 and one for V6 voltages.
+ */
+struct max1586_platform_data {
+	int num_subdevs;
+	struct max1586_subdev_data *subdevs;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 1d98cccf7f8b944ba4ea56d14bbb7c2eeee59bfe Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Sun, 26 Apr 2009 16:49:39 +0300
Subject: regulator: add userspace-consumer driver

The userspace-consumer driver allows control of voltage and current
regulator state from userspace. This is required for fine-grained
power management of devices that are completely controller by userspace
applications, e.g. a GPS transciever connected to a serial port.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig                    |  10 ++
 drivers/regulator/Makefile                   |   1 +
 drivers/regulator/userspace-consumer.c       | 200 +++++++++++++++++++++++++++
 include/linux/regulator/userspace-consumer.h |  25 ++++
 4 files changed, 236 insertions(+)
 create mode 100644 drivers/regulator/userspace-consumer.c
 create mode 100644 include/linux/regulator/userspace-consumer.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 707da4d2353..5bec17cf1d5 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -47,6 +47,16 @@ config REGULATOR_VIRTUAL_CONSUMER
 
           If unsure, say no.
 
+config REGULATOR_USERSPACE_CONSUMER
+	tristate "Userspace regulator consumer support"
+	default n
+	help
+	  There are some classes of devices that are controlled entirely
+	  from user space. Usersapce consumer driver provides ability to
+	  control power supplies for such devices.
+
+          If unsure, say no.
+
 config REGULATOR_BQ24022
 	tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
 	default n
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 1d7de87a8e1..faf7bcc1af9 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -6,6 +6,7 @@
 obj-$(CONFIG_REGULATOR) += core.o
 obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
+obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
 
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
diff --git a/drivers/regulator/userspace-consumer.c b/drivers/regulator/userspace-consumer.c
new file mode 100644
index 00000000000..71fcf9df00f
--- /dev/null
+++ b/drivers/regulator/userspace-consumer.c
@@ -0,0 +1,200 @@
+/*
+ * userspace-consumer.c
+ *
+ * Copyright 2009 CompuLab, Ltd.
+ *
+ * Author: Mike Rapoport <mike@compulab.co.il>
+ *
+ * Based of virtual consumer driver:
+ *   Copyright 2008 Wolfson Microelectronics PLC.
+ *   Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/userspace-consumer.h>
+
+struct userspace_consumer_data {
+	const char *name;
+
+	struct mutex lock;
+	bool enabled;
+
+	int num_supplies;
+	struct regulator_bulk_data *supplies;
+};
+
+static ssize_t show_name(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct userspace_consumer_data *data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", data->name);
+}
+
+static ssize_t show_state(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct userspace_consumer_data *data = dev_get_drvdata(dev);
+
+	if (data->enabled)
+		return sprintf(buf, "enabled\n");
+
+	return sprintf(buf, "disabled\n");
+}
+
+static ssize_t set_state(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct userspace_consumer_data *data = dev_get_drvdata(dev);
+	bool enabled;
+	int ret;
+
+	/*
+	 * sysfs_streq() doesn't need the \n's, but we add them so the strings
+	 * will be shared with show_state(), above.
+	 */
+	if (sysfs_streq(buf, "enabled\n") || sysfs_streq(buf, "1"))
+		enabled = true;
+	else if (sysfs_streq(buf, "disabled\n") || sysfs_streq(buf, "0"))
+		enabled = false;
+	else {
+		dev_err(dev, "Configuring invalid mode\n");
+		return count;
+	}
+
+	mutex_lock(&data->lock);
+	if (enabled != data->enabled) {
+		if (enabled)
+			ret = regulator_bulk_enable(data->num_supplies,
+						    data->supplies);
+		else
+			ret = regulator_bulk_disable(data->num_supplies,
+						     data->supplies);
+
+		if (ret == 0)
+			data->enabled = enabled;
+		else
+			dev_err(dev, "Failed to configure state: %d\n", ret);
+	}
+	mutex_unlock(&data->lock);
+
+	return count;
+}
+
+static DEVICE_ATTR(name, 0444, show_name, NULL);
+static DEVICE_ATTR(state, 0644, show_state, set_state);
+
+static struct device_attribute *attributes[] = {
+	&dev_attr_name,
+	&dev_attr_state,
+};
+
+static int regulator_userspace_consumer_probe(struct platform_device *pdev)
+{
+	struct regulator_userspace_consumer_data *pdata;
+	struct userspace_consumer_data *drvdata;
+	int ret, i;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata)
+		return -EINVAL;
+
+	drvdata = kzalloc(sizeof(struct userspace_consumer_data), GFP_KERNEL);
+	if (drvdata == NULL)
+		return -ENOMEM;
+
+	drvdata->name = pdata->name;
+	drvdata->num_supplies = pdata->num_supplies;
+	drvdata->supplies = pdata->supplies;
+
+	mutex_init(&drvdata->lock);
+
+	ret = regulator_bulk_get(&pdev->dev, drvdata->num_supplies,
+				 drvdata->supplies);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to get supplies: %d\n", ret);
+		goto err_alloc_supplies;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(attributes); i++) {
+		ret = device_create_file(&pdev->dev, attributes[i]);
+		if (ret != 0)
+			goto err_create_attrs;
+	}
+
+	if (pdata->init_on)
+		ret = regulator_bulk_enable(drvdata->num_supplies,
+					    drvdata->supplies);
+
+	drvdata->enabled = pdata->init_on;
+
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to set initial state: %d\n", ret);
+		goto err_create_attrs;
+	}
+
+	platform_set_drvdata(pdev, drvdata);
+
+	return 0;
+
+err_create_attrs:
+	for (i = 0; i < ARRAY_SIZE(attributes); i++)
+		device_remove_file(&pdev->dev, attributes[i]);
+
+	regulator_bulk_free(drvdata->num_supplies, drvdata->supplies);
+
+err_alloc_supplies:
+	kfree(drvdata);
+	return ret;
+}
+
+static int regulator_userspace_consumer_remove(struct platform_device *pdev)
+{
+	struct userspace_consumer_data *data = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(attributes); i++)
+		device_remove_file(&pdev->dev, attributes[i]);
+
+	if (data->enabled)
+		regulator_bulk_disable(data->num_supplies, data->supplies);
+
+	regulator_bulk_free(data->num_supplies, data->supplies);
+	kfree(data);
+
+	return 0;
+}
+
+static struct platform_driver regulator_userspace_consumer_driver = {
+	.probe		= regulator_userspace_consumer_probe,
+	.remove		= regulator_userspace_consumer_remove,
+	.driver		= {
+		.name		= "reg-userspace-consumer",
+	},
+};
+
+
+static int __init regulator_userspace_consumer_init(void)
+{
+	return platform_driver_register(&regulator_userspace_consumer_driver);
+}
+module_init(regulator_userspace_consumer_init);
+
+static void __exit regulator_userspace_consumer_exit(void)
+{
+	platform_driver_unregister(&regulator_userspace_consumer_driver);
+}
+module_exit(regulator_userspace_consumer_exit);
+
+MODULE_AUTHOR("Mike Rapoport <mike@compulab.co.il>");
+MODULE_DESCRIPTION("Userspace consumer for voltage and current regulators");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/userspace-consumer.h b/include/linux/regulator/userspace-consumer.h
new file mode 100644
index 00000000000..b4554ce9d4b
--- /dev/null
+++ b/include/linux/regulator/userspace-consumer.h
@@ -0,0 +1,25 @@
+#ifndef __REGULATOR_PLATFORM_CONSUMER_H_
+#define __REGULATOR_PLATFORM_CONSUMER_H_
+
+struct regulator_consumer_supply;
+
+/**
+ * struct regulator_userspace_consumer_data - line consumer
+ * initialisation data.
+ *
+ * @name: Name for the consumer line
+ * @num_supplies: Number of supplies feeding the line
+ * @supplies: Supplies configuration.
+ * @init_on: Set if the regulators supplying the line should be
+ *           enabled during initialisation
+ */
+struct regulator_userspace_consumer_data {
+	const char *name;
+
+	int num_supplies;
+	struct regulator_bulk_data *supplies;
+
+	bool init_on;
+};
+
+#endif /* __REGULATOR_PLATFORM_CONSUMER_H_ */
-- 
cgit v1.2.3-70-g09d2


From 0cbdf7bce5b98807b946d1a96956f30dcae24a50 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Tue, 19 May 2009 07:33:55 +0200
Subject: LP3971 PMIC regulator driver (updated and combined version)

This patch adds regulator drivers for National Semiconductors LP3971 PMIC.
This LP3971 PMIC controller has 3 DC/DC voltage converters and 5 low
drop-out (LDO) regulators. LP3971 PMIC controller uses I2C interface.

Reviewed-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig        |   7 +
 drivers/regulator/Makefile       |   1 +
 drivers/regulator/lp3971.c       | 562 +++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/lp3971.h |  51 ++++
 4 files changed, 621 insertions(+)
 create mode 100644 drivers/regulator/lp3971.c
 create mode 100644 include/linux/regulator/lp3971.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 5bec17cf1d5..f4317798e47 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -110,4 +110,11 @@ config REGULATOR_PCF50633
 	 Say Y here to support the voltage regulators and convertors
 	 on PCF50633
 
+config REGULATOR_LP3971
+	tristate "National Semiconductors LP3971 PMIC regulator driver"
+	depends on I2C
+	help
+	 Say Y here to support the voltage regulators and convertors
+	 on National Semiconductors LP3971 PMIC
+
 endif
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index faf7bcc1af9..4d762c4cccf 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
 obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
 
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
new file mode 100644
index 00000000000..35abebea4ed
--- /dev/null
+++ b/drivers/regulator/lp3971.c
@@ -0,0 +1,562 @@
+/*
+ * Regulator driver for National Semiconductors LP3971 PMIC chip
+ *
+ *  Copyright (C) 2009 Samsung Electronics
+ *  Author: Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * Based on wm8350.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/lp3971.h>
+
+struct lp3971 {
+	struct device *dev;
+	struct mutex io_lock;
+	struct i2c_client *i2c;
+	int num_regulators;
+	struct regulator_dev **rdev;
+};
+
+static u8 lp3971_reg_read(struct lp3971 *lp3971, u8 reg);
+static int lp3971_set_bits(struct lp3971 *lp3971, u8 reg, u16 mask, u16 val);
+
+#define LP3971_SYS_CONTROL1_REG 0x07
+
+/* System control register 1 initial value,
+   bits 4 and 5 are EPROM programmable */
+#define SYS_CONTROL1_INIT_VAL 0x40
+#define SYS_CONTROL1_INIT_MASK 0xCF
+
+#define LP3971_BUCK_VOL_ENABLE_REG 0x10
+#define LP3971_BUCK_VOL_CHANGE_REG 0x20
+
+/*	Voltage control registers shift:
+	LP3971_BUCK1 -> 0
+	LP3971_BUCK2 -> 4
+	LP3971_BUCK3 -> 6
+*/
+#define BUCK_VOL_CHANGE_SHIFT(x) (((1 << x) & ~0x01) << 1)
+#define BUCK_VOL_CHANGE_FLAG_GO 0x01
+#define BUCK_VOL_CHANGE_FLAG_TARGET 0x02
+#define BUCK_VOL_CHANGE_FLAG_MASK 0x03
+
+#define LP3971_BUCK1_BASE 0x23
+#define LP3971_BUCK2_BASE 0x29
+#define LP3971_BUCK3_BASE 0x32
+
+const static int buck_base_addr[] = {
+	LP3971_BUCK1_BASE,
+	LP3971_BUCK2_BASE,
+	LP3971_BUCK3_BASE,
+};
+
+#define LP3971_BUCK_TARGET_VOL1_REG(x) (buck_base_addr[x])
+#define LP3971_BUCK_TARGET_VOL2_REG(x) (buck_base_addr[x]+1)
+
+const static int buck_voltage_map[] = {
+	   0,  800,  850,  900,  950, 1000, 1050, 1100,
+	1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500,
+	1550, 1600, 1650, 1700, 1800, 1900, 2500, 2800,
+	3000, 3300,
+};
+
+#define BUCK_TARGET_VOL_MASK 0x3f
+#define BUCK_TARGET_VOL_MIN_IDX 0x01
+#define BUCK_TARGET_VOL_MAX_IDX 0x19
+
+#define LP3971_BUCK_RAMP_REG(x)	(buck_base_addr[x]+2)
+
+#define LP3971_LDO_ENABLE_REG 0x12
+#define LP3971_LDO_VOL_CONTR_BASE 0x39
+
+/*	Voltage control registers:
+	LP3971_LDO1 -> LP3971_LDO_VOL_CONTR_BASE + 0
+	LP3971_LDO2 -> LP3971_LDO_VOL_CONTR_BASE + 0
+	LP3971_LDO3 -> LP3971_LDO_VOL_CONTR_BASE + 1
+	LP3971_LDO4 -> LP3971_LDO_VOL_CONTR_BASE + 1
+	LP3971_LDO5 -> LP3971_LDO_VOL_CONTR_BASE + 2
+*/
+#define LP3971_LDO_VOL_CONTR_REG(x)	(LP3971_LDO_VOL_CONTR_BASE + (x >> 1))
+
+/*	Voltage control registers shift:
+	LP3971_LDO1 -> 0, LP3971_LDO2 -> 4
+	LP3971_LDO3 -> 0, LP3971_LDO4 -> 4
+	LP3971_LDO5 -> 0
+*/
+#define LDO_VOL_CONTR_SHIFT(x) ((x & 1) << 2)
+#define LDO_VOL_CONTR_MASK 0x0f
+
+const static int ldo45_voltage_map[] = {
+	1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350,
+	1400, 1500, 1800, 1900, 2500, 2800, 3000, 3300,
+};
+
+const static int ldo123_voltage_map[] = {
+	1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500,
+	2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300,
+};
+
+const static int *ldo_voltage_map[] = {
+	ldo123_voltage_map, /* LDO1 */
+	ldo123_voltage_map, /* LDO2 */
+	ldo123_voltage_map, /* LDO3 */
+	ldo45_voltage_map, /* LDO4 */
+	ldo45_voltage_map, /* LDO5 */
+};
+
+#define LDO_VOL_VALUE_MAP(x) (ldo_voltage_map[(x - LP3971_LDO1)])
+
+#define LDO_VOL_MIN_IDX 0x00
+#define LDO_VOL_MAX_IDX 0x0f
+
+static int lp3971_ldo_list_voltage(struct regulator_dev *dev, unsigned index)
+{
+	int ldo = rdev_get_id(dev) - LP3971_LDO1;
+	return 1000 * LDO_VOL_VALUE_MAP(ldo)[index];
+}
+
+static int lp3971_ldo_is_enabled(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3971_LDO1;
+	u16 mask = 1 << (1 + ldo);
+	u16 val;
+
+	val = lp3971_reg_read(lp3971, LP3971_LDO_ENABLE_REG);
+	return (val & mask) != 0;
+}
+
+static int lp3971_ldo_enable(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3971_LDO1;
+	u16 mask = 1 << (1 + ldo);
+
+	return lp3971_set_bits(lp3971, LP3971_LDO_ENABLE_REG, mask, mask);
+}
+
+static int lp3971_ldo_disable(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3971_LDO1;
+	u16 mask = 1 << (1 + ldo);
+
+	return lp3971_set_bits(lp3971, LP3971_LDO_ENABLE_REG, mask, 0);
+}
+
+static int lp3971_ldo_get_voltage(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3971_LDO1;
+	u16 val, reg;
+
+	reg = lp3971_reg_read(lp3971, LP3971_LDO_VOL_CONTR_REG(ldo));
+	val = (reg >> LDO_VOL_CONTR_SHIFT(ldo)) & LDO_VOL_CONTR_MASK;
+
+	return 1000 * LDO_VOL_VALUE_MAP(ldo)[val];
+}
+
+static int lp3971_ldo_set_voltage(struct regulator_dev *dev,
+				  int min_uV, int max_uV)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int ldo = rdev_get_id(dev) - LP3971_LDO1;
+	int min_vol = min_uV / 1000, max_vol = max_uV / 1000;
+	const int *vol_map = LDO_VOL_VALUE_MAP(ldo);
+	u16 val;
+
+	if (min_vol < vol_map[LDO_VOL_MIN_IDX] ||
+	    min_vol > vol_map[LDO_VOL_MAX_IDX])
+		return -EINVAL;
+
+	for (val = LDO_VOL_MIN_IDX; val <= LDO_VOL_MAX_IDX; val++)
+		if (vol_map[val] >= min_vol)
+			break;
+
+	if (vol_map[val] > max_vol)
+		return -EINVAL;
+
+	return lp3971_set_bits(lp3971, LP3971_LDO_VOL_CONTR_REG(ldo),
+		LDO_VOL_CONTR_MASK << LDO_VOL_CONTR_SHIFT(ldo), val);
+}
+
+static struct regulator_ops lp3971_ldo_ops = {
+	.list_voltage = lp3971_ldo_list_voltage,
+	.is_enabled = lp3971_ldo_is_enabled,
+	.enable = lp3971_ldo_enable,
+	.disable = lp3971_ldo_disable,
+	.get_voltage = lp3971_ldo_get_voltage,
+	.set_voltage = lp3971_ldo_set_voltage,
+};
+
+static int lp3971_dcdc_list_voltage(struct regulator_dev *dev, unsigned index)
+{
+	return 1000 * buck_voltage_map[index];
+}
+
+static int lp3971_dcdc_is_enabled(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3971_DCDC1;
+	u16 mask = 1 << (buck * 2);
+	u16 val;
+
+	val = lp3971_reg_read(lp3971, LP3971_BUCK_VOL_ENABLE_REG);
+	return (val & mask) != 0;
+}
+
+static int lp3971_dcdc_enable(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3971_DCDC1;
+	u16 mask = 1 << (buck * 2);
+
+	return lp3971_set_bits(lp3971, LP3971_BUCK_VOL_ENABLE_REG, mask, mask);
+}
+
+static int lp3971_dcdc_disable(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3971_DCDC1;
+	u16 mask = 1 << (buck * 2);
+
+	return lp3971_set_bits(lp3971, LP3971_BUCK_VOL_ENABLE_REG, mask, 0);
+}
+
+static int lp3971_dcdc_get_voltage(struct regulator_dev *dev)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3971_DCDC1;
+	u16 reg;
+	int val;
+
+	reg = lp3971_reg_read(lp3971, LP3971_BUCK_TARGET_VOL1_REG(buck));
+	reg &= BUCK_TARGET_VOL_MASK;
+
+	if (reg <= BUCK_TARGET_VOL_MAX_IDX)
+		val = 1000 * buck_voltage_map[reg];
+	else {
+		val = 0;
+		dev_warn(&dev->dev, "chip reported incorrect voltage value.\n");
+	}
+
+	return val;
+}
+
+static int lp3971_dcdc_set_voltage(struct regulator_dev *dev,
+				  int min_uV, int max_uV)
+{
+	struct lp3971 *lp3971 = rdev_get_drvdata(dev);
+	int buck = rdev_get_id(dev) - LP3971_DCDC1;
+	int min_vol = min_uV / 1000, max_vol = max_uV / 1000;
+	const int *vol_map = buck_voltage_map;
+	u16 val;
+	int ret;
+
+	if (min_vol < vol_map[BUCK_TARGET_VOL_MIN_IDX] ||
+	    min_vol > vol_map[BUCK_TARGET_VOL_MAX_IDX])
+		return -EINVAL;
+
+	for (val = BUCK_TARGET_VOL_MIN_IDX; val <= BUCK_TARGET_VOL_MAX_IDX;
+	     val++)
+		if (vol_map[val] >= min_vol)
+			break;
+
+	if (vol_map[val] > max_vol)
+		return -EINVAL;
+
+	ret = lp3971_set_bits(lp3971, LP3971_BUCK_TARGET_VOL1_REG(buck),
+	       BUCK_TARGET_VOL_MASK, val);
+	if (ret)
+		return ret;
+
+	ret = lp3971_set_bits(lp3971, LP3971_BUCK_VOL_CHANGE_REG,
+	       BUCK_VOL_CHANGE_FLAG_MASK << BUCK_VOL_CHANGE_SHIFT(buck),
+	       BUCK_VOL_CHANGE_FLAG_GO << BUCK_VOL_CHANGE_SHIFT(buck));
+	if (ret)
+		return ret;
+
+	return lp3971_set_bits(lp3971, LP3971_BUCK_VOL_CHANGE_REG,
+	       BUCK_VOL_CHANGE_FLAG_MASK << BUCK_VOL_CHANGE_SHIFT(buck),
+	       0 << BUCK_VOL_CHANGE_SHIFT(buck));
+}
+
+static struct regulator_ops lp3971_dcdc_ops = {
+	.list_voltage = lp3971_dcdc_list_voltage,
+	.is_enabled = lp3971_dcdc_is_enabled,
+	.enable = lp3971_dcdc_enable,
+	.disable = lp3971_dcdc_disable,
+	.get_voltage = lp3971_dcdc_get_voltage,
+	.set_voltage = lp3971_dcdc_set_voltage,
+};
+
+static struct regulator_desc regulators[] = {
+	{
+		.name = "LDO1",
+		.id = LP3971_LDO1,
+		.ops = &lp3971_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo123_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO2",
+		.id = LP3971_LDO2,
+		.ops = &lp3971_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo123_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO3",
+		.id = LP3971_LDO3,
+		.ops = &lp3971_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo123_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO4",
+		.id = LP3971_LDO4,
+		.ops = &lp3971_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo45_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO5",
+		.id = LP3971_LDO5,
+		.ops = &lp3971_ldo_ops,
+		.n_voltages = ARRAY_SIZE(ldo45_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "DCDC1",
+		.id = LP3971_DCDC1,
+		.ops = &lp3971_dcdc_ops,
+		.n_voltages = ARRAY_SIZE(buck_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "DCDC2",
+		.id = LP3971_DCDC2,
+		.ops = &lp3971_dcdc_ops,
+		.n_voltages = ARRAY_SIZE(buck_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "DCDC3",
+		.id = LP3971_DCDC3,
+		.ops = &lp3971_dcdc_ops,
+		.n_voltages = ARRAY_SIZE(buck_voltage_map),
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int lp3971_i2c_read(struct i2c_client *i2c, char reg, int count,
+	u16 *dest)
+{
+	int ret;
+
+	if (count != 1)
+		return -EIO;
+	ret = i2c_smbus_read_byte_data(i2c, reg);
+	if (ret < 0 || count != 1)
+		return -EIO;
+
+	*dest = ret;
+	return 0;
+}
+
+static int lp3971_i2c_write(struct i2c_client *i2c, char reg, int count,
+	const u16 *src)
+{
+	int ret;
+
+	if (count != 1)
+		return -EIO;
+	ret = i2c_smbus_write_byte_data(i2c, reg, *src);
+	if (ret >= 0)
+		return 0;
+
+	return ret;
+}
+
+static u8 lp3971_reg_read(struct lp3971 *lp3971, u8 reg)
+{
+	u16 val = 0;
+
+	mutex_lock(&lp3971->io_lock);
+
+	lp3971_i2c_read(lp3971->i2c, reg, 1, &val);
+
+	dev_dbg(lp3971->dev, "reg read 0x%02x -> 0x%02x\n", (int)reg,
+		(unsigned)val&0xff);
+
+	mutex_unlock(&lp3971->io_lock);
+
+	return val & 0xff;
+}
+
+static int lp3971_set_bits(struct lp3971 *lp3971, u8 reg, u16 mask, u16 val)
+{
+	u16 tmp;
+	int ret;
+
+	mutex_lock(&lp3971->io_lock);
+
+	ret = lp3971_i2c_read(lp3971->i2c, reg, 1, &tmp);
+	tmp = (tmp & ~mask) | val;
+	if (ret == 0) {
+		ret = lp3971_i2c_write(lp3971->i2c, reg, 1, &tmp);
+		dev_dbg(lp3971->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg,
+			(unsigned)val&0xff);
+	}
+	mutex_unlock(&lp3971->io_lock);
+
+	return ret;
+}
+
+static int setup_regulators(struct lp3971 *lp3971,
+	struct lp3971_platform_data *pdata)
+{
+	int i, err;
+	int num_regulators = pdata->num_regulators;
+	lp3971->num_regulators = num_regulators;
+	lp3971->rdev = kzalloc(sizeof(struct regulator_dev *) * num_regulators,
+		GFP_KERNEL);
+
+	/* Instantiate the regulators */
+	for (i = 0; i < num_regulators; i++) {
+		int id = pdata->regulators[i].id;
+		lp3971->rdev[i] = regulator_register(&regulators[id],
+			lp3971->dev, pdata->regulators[i].initdata, lp3971);
+
+		err = IS_ERR(lp3971->rdev[i]);
+		if (err) {
+			dev_err(lp3971->dev, "regulator init failed: %d\n",
+				err);
+			goto error;
+		}
+	}
+
+	return 0;
+error:
+	for (i = 0; i < num_regulators; i++)
+		if (lp3971->rdev[i])
+			regulator_unregister(lp3971->rdev[i]);
+	kfree(lp3971->rdev);
+	lp3971->rdev = NULL;
+	return err;
+}
+
+static int __devinit lp3971_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct lp3971 *lp3971;
+	struct lp3971_platform_data *pdata = i2c->dev.platform_data;
+	int ret;
+	u16 val;
+
+	lp3971 = kzalloc(sizeof(struct lp3971), GFP_KERNEL);
+	if (lp3971 == NULL) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	lp3971->i2c = i2c;
+	lp3971->dev = &i2c->dev;
+	i2c_set_clientdata(i2c, lp3971);
+
+	mutex_init(&lp3971->io_lock);
+
+	/* Detect LP3971 */
+	ret = lp3971_i2c_read(i2c, LP3971_SYS_CONTROL1_REG, 1, &val);
+	if (ret == 0 && (val & SYS_CONTROL1_INIT_MASK) != SYS_CONTROL1_INIT_VAL)
+		ret = -ENODEV;
+	if (ret < 0) {
+		dev_err(&i2c->dev, "failed to detect device\n");
+		goto err_detect;
+	}
+
+	if (pdata) {
+		ret = setup_regulators(lp3971, pdata);
+		if (ret < 0)
+			goto err_detect;
+	} else
+		dev_warn(lp3971->dev, "No platform init data supplied\n");
+
+	return 0;
+
+err_detect:
+	i2c_set_clientdata(i2c, NULL);
+	kfree(lp3971);
+err:
+	return ret;
+}
+
+static int __devexit lp3971_i2c_remove(struct i2c_client *i2c)
+{
+	struct lp3971 *lp3971 = i2c_get_clientdata(i2c);
+	int i;
+	for (i = 0; i < lp3971->num_regulators; i++)
+		if (lp3971->rdev[i])
+			regulator_unregister(lp3971->rdev[i]);
+	kfree(lp3971->rdev);
+	i2c_set_clientdata(i2c, NULL);
+	kfree(lp3971);
+
+	return 0;
+}
+
+static const struct i2c_device_id lp3971_i2c_id[] = {
+       { "lp3971", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, lp3971_i2c_id);
+
+static struct i2c_driver lp3971_i2c_driver = {
+	.driver = {
+		.name = "LP3971",
+		.owner = THIS_MODULE,
+	},
+	.probe    = lp3971_i2c_probe,
+	.remove   = lp3971_i2c_remove,
+	.id_table = lp3971_i2c_id,
+};
+
+static int __init lp3971_module_init(void)
+{
+	int ret = -ENODEV;
+
+	ret = i2c_add_driver(&lp3971_i2c_driver);
+	if (ret != 0)
+		pr_err("Failed to register I2C driver: %d\n", ret);
+
+	return ret;
+}
+module_init(lp3971_module_init);
+
+static void __exit lp3971_module_exit(void)
+{
+	i2c_del_driver(&lp3971_i2c_driver);
+}
+module_exit(lp3971_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marek Szyprowski <m.szyprowski@samsung.com>");
+MODULE_DESCRIPTION("LP3971 PMIC driver");
diff --git a/include/linux/regulator/lp3971.h b/include/linux/regulator/lp3971.h
new file mode 100644
index 00000000000..61401649fe7
--- /dev/null
+++ b/include/linux/regulator/lp3971.h
@@ -0,0 +1,51 @@
+/*
+ * National Semiconductors LP3971 PMIC chip client interface
+ *
+ *  Copyright (C) 2009 Samsung Electronics
+ *  Author: Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * Based on wm8400.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_REGULATOR_LP3971_H
+#define __LINUX_REGULATOR_LP3971_H
+
+#include <linux/regulator/machine.h>
+
+#define LP3971_LDO1  0
+#define LP3971_LDO2  1
+#define LP3971_LDO3  2
+#define LP3971_LDO4  3
+#define LP3971_LDO5  4
+
+#define LP3971_DCDC1 5
+#define LP3971_DCDC2 6
+#define LP3971_DCDC3 7
+
+#define LP3971_NUM_REGULATORS 8
+
+struct lp3971_regulator_subdev {
+	int id;
+	struct regulator_init_data *initdata;
+};
+
+struct lp3971_platform_data {
+	int num_regulators;
+	struct lp3971_regulator_subdev *regulators;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From b110a8fb242bc34e4b7686252899ce0fca956e2c Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Thu, 28 May 2009 07:15:16 +0200
Subject: regulator/max1586: support increased V3 voltage range

The V3 regulator can be configured with an external resistor
connected to the feedback pin (R24 in the data sheet) to
increase the voltage range.

For example, hx4700 has R24 = 3.32 kOhm to achieve a maximum
V3 voltage of 1.55 V which is needed for 624 MHz CPU frequency.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/max1586.c       | 71 ++++++++++++++++++++++++++++-----------
 include/linux/regulator/max1586.h | 11 ++++++
 2 files changed, 63 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c
index bbbb55fcfe8..92799f40c86 100644
--- a/drivers/regulator/max1586.c
+++ b/drivers/regulator/max1586.c
@@ -29,7 +29,6 @@
 
 #define MAX1586_V3_MIN_UV   700000
 #define MAX1586_V3_MAX_UV  1475000
-#define MAX1586_V3_STEP_UV   25000
 
 #define MAX1586_V6_MIN_UV        0
 #define MAX1586_V6_MAX_UV  3000000
@@ -37,33 +36,52 @@
 #define I2C_V3_SELECT (0 << 5)
 #define I2C_V6_SELECT (1 << 5)
 
+struct max1586_data {
+	struct i2c_client *client;
+
+	/* min/max V3 voltage */
+	int min_uV;
+	int max_uV;
+
+	struct regulator_dev *rdev[0];
+};
+
 /*
  * V3 voltage
  * On I2C bus, sending a "x" byte to the max1586 means :
  *   set V3 to 0.700V + (x & 0x1f) * 0.025V
+ * This voltage can be increased by external resistors
+ * R24 and R25=100kOhm as described in the data sheet.
+ * The gain is approximately: 1 + R24/R25 + R24/185.5kOhm
  */
-static int max1586_v3_calc_voltage(unsigned selector)
+static int max1586_v3_calc_voltage(struct max1586_data *max1586,
+	unsigned selector)
 {
-	return MAX1586_V3_MIN_UV + (MAX1586_V3_STEP_UV * selector);
+	unsigned range_uV = max1586->max_uV - max1586->min_uV;
+
+	return max1586->min_uV + (selector * range_uV / MAX1586_V3_MAX_VSEL);
 }
 
 static int max1586_v3_set(struct regulator_dev *rdev, int min_uV, int max_uV)
 {
-	struct i2c_client *client = rdev_get_drvdata(rdev);
+	struct max1586_data *max1586 = rdev_get_drvdata(rdev);
+	struct i2c_client *client = max1586->client;
+	unsigned range_uV = max1586->max_uV - max1586->min_uV;
 	unsigned selector;
 	u8 v3_prog;
 
-	if (min_uV < MAX1586_V3_MIN_UV || min_uV > MAX1586_V3_MAX_UV)
-		return -EINVAL;
-	if (max_uV < MAX1586_V3_MIN_UV || max_uV > MAX1586_V3_MAX_UV)
+	if (min_uV > max1586->max_uV || max_uV < max1586->min_uV)
 		return -EINVAL;
+	if (min_uV < max1586->min_uV)
+		min_uV = max1586->min_uV;
 
-	selector = (min_uV - MAX1586_V3_MIN_UV) / MAX1586_V3_STEP_UV;
-	if (max1586_v3_calc_voltage(selector) > max_uV)
+	selector = ((min_uV - max1586->min_uV) * MAX1586_V3_MAX_VSEL +
+			range_uV - 1) / range_uV;
+	if (max1586_v3_calc_voltage(max1586, selector) > max_uV)
 		return -EINVAL;
 
 	dev_dbg(&client->dev, "changing voltage v3 to %dmv\n",
-		max1586_v3_calc_voltage(selector) / 1000);
+		max1586_v3_calc_voltage(max1586, selector) / 1000);
 
 	v3_prog = I2C_V3_SELECT | (u8) selector;
 	return i2c_smbus_write_byte(client, v3_prog);
@@ -71,9 +89,11 @@ static int max1586_v3_set(struct regulator_dev *rdev, int min_uV, int max_uV)
 
 static int max1586_v3_list(struct regulator_dev *rdev, unsigned selector)
 {
+	struct max1586_data *max1586 = rdev_get_drvdata(rdev);
+
 	if (selector > MAX1586_V3_MAX_VSEL)
 		return -EINVAL;
-	return max1586_v3_calc_voltage(selector);
+	return max1586_v3_calc_voltage(max1586, selector);
 }
 
 /*
@@ -164,14 +184,25 @@ static int max1586_pmic_probe(struct i2c_client *client,
 {
 	struct regulator_dev **rdev;
 	struct max1586_platform_data *pdata = client->dev.platform_data;
-	int i, id, ret = 0;
+	struct max1586_data *max1586;
+	int i, id, ret = -ENOMEM;
+
+	max1586 = kzalloc(sizeof(struct max1586_data) +
+			sizeof(struct regulator_dev *) * (MAX1586_V6 + 1),
+			GFP_KERNEL);
+	if (!max1586)
+		goto out;
 
-	rdev = kzalloc(sizeof(struct regulator_dev *) * (MAX1586_V6 + 1),
-		       GFP_KERNEL);
-	if (!rdev)
-		return -ENOMEM;
+	max1586->client = client;
 
-	ret = -EINVAL;
+	if (!pdata->v3_gain) {
+		ret = -EINVAL;
+		goto out_unmap;
+	}
+	max1586->min_uV = MAX1586_V3_MIN_UV * pdata->v3_gain / 1000000;
+	max1586->max_uV = MAX1586_V3_MAX_UV * pdata->v3_gain / 1000000;
+
+	rdev = max1586->rdev;
 	for (i = 0; i < pdata->num_subdevs && i <= MAX1586_V6; i++) {
 		id = pdata->subdevs[i].id;
 		if (!pdata->subdevs[i].platform_data)
@@ -182,7 +213,7 @@ static int max1586_pmic_probe(struct i2c_client *client,
 		}
 		rdev[i] = regulator_register(&max1586_reg[id], &client->dev,
 					     pdata->subdevs[i].platform_data,
-					     client);
+					     max1586);
 		if (IS_ERR(rdev[i])) {
 			ret = PTR_ERR(rdev[i]);
 			dev_err(&client->dev, "failed to register %s\n",
@@ -198,7 +229,9 @@ static int max1586_pmic_probe(struct i2c_client *client,
 err:
 	while (--i >= 0)
 		regulator_unregister(rdev[i]);
-	kfree(rdev);
+out_unmap:
+	kfree(max1586);
+out:
 	return ret;
 }
 
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
index 2056973396b..44563192bf1 100644
--- a/include/linux/regulator/max1586.h
+++ b/include/linux/regulator/max1586.h
@@ -26,6 +26,12 @@
 #define MAX1586_V3 0
 #define MAX1586_V6 1
 
+/* precalculated values for v3_gain */
+#define MAX1586_GAIN_NO_R24   1000000  /* 700000 .. 1475000 mV */
+#define MAX1586_GAIN_R24_3k32 1051098  /* 735768 .. 1550369 mV */
+#define MAX1586_GAIN_R24_5k11 1078648  /* 755053 .. 1591005 mV */
+#define MAX1586_GAIN_R24_7k5  1115432  /* 780802 .. 1645262 mV */
+
 /**
  * max1586_subdev_data - regulator data
  * @id: regulator Id (either MAX1586_V3 or MAX1586_V6)
@@ -43,10 +49,15 @@ struct max1586_subdev_data {
  * @num_subdevs: number of regultors used (may be 1 or 2)
  * @subdevs: regulator used
  *           At most, there will be a regulator for V3 and one for V6 voltages.
+ * @v3_gain: gain on the V3 voltage output multiplied by 1e6.
+ *           This can be calculated as ((1 + R24/R25 + R24/185.5kOhm) * 1e6)
+ *           for an external resistor configuration as described in the
+ *           data sheet (R25=100kOhm).
  */
 struct max1586_platform_data {
 	int num_subdevs;
 	struct max1586_subdev_data *subdevs;
+	int v3_gain;
 };
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 2dff440525f8faba8836e9f05297b76f23b4af30 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sat, 31 May 2008 15:56:17 +0200
Subject: kmemcheck: add mm functions

With kmemcheck enabled, the slab allocator needs to do this:

1. Tell kmemcheck to allocate the shadow memory which stores the status of
   each byte in the allocation proper, e.g. whether it is initialized or
   uninitialized.
2. Tell kmemcheck which parts of memory that should be marked uninitialized.
   There are actually a few more states, such as "not yet allocated" and
   "recently freed".

If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
memory that can take page faults because of kmemcheck.

If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
request memory with the __GFP_NOTRACK flag. This does not prevent the page
faults from occuring, however, but marks the object in question as being
initialized so that no warnings will ever be produced for this object.

In addition to (and in contrast to) __GFP_NOTRACK, the
__GFP_NOTRACK_FALSE_POSITIVE flag indicates that the allocation should
not be tracked _because_ it would produce a false positive. Their values
are identical, but need not be so in the future (for example, we could now
enable/disable false positives with a config option).

Parts of this patch were contributed by Pekka Enberg but merged for
atomicity.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 arch/x86/kernel/process.c |   2 +-
 include/linux/gfp.h       |   9 +++-
 include/linux/kmemcheck.h |  47 +++++++++++++++++++++
 include/linux/slab.h      |   7 ++++
 kernel/fork.c             |  14 +++----
 mm/Makefile               |   1 +
 mm/kmemcheck.c            | 103 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 174 insertions(+), 9 deletions(-)
 create mode 100644 mm/kmemcheck.c

(limited to 'include')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3bb2be1649b..994dd6a4a2a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -63,7 +63,7 @@ void arch_task_cache_init(void)
         task_xstate_cachep =
         	kmem_cache_create("task_xstate", xstate_size,
 				  __alignof__(union thread_xstate),
-				  SLAB_PANIC, NULL);
+				  SLAB_PANIC | SLAB_NOTRACK, NULL);
 }
 
 /*
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0bbc15f5453..daeaa8fe1bb 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -51,8 +51,15 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
 #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+#define __GFP_NOTRACK	((__force gfp_t)0x200000u)  /* Don't track with kmemcheck */
 
-#define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
+/*
+ * This may seem redundant, but it's a way of annotating false positives vs.
+ * allocations that simply cannot be supported (e.g. page tables).
+ */
+#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+
+#define __GFP_BITS_SHIFT 22	/* Room for 22 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 39480c91b2f..5b65f4ebead 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -7,11 +7,58 @@
 #ifdef CONFIG_KMEMCHECK
 extern int kmemcheck_enabled;
 
+/* The slab-related functions. */
+void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
+			    struct page *page, int order);
+void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order);
+void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
+			  size_t size);
+void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size);
+
+void kmemcheck_show_pages(struct page *p, unsigned int n);
+void kmemcheck_hide_pages(struct page *p, unsigned int n);
+
+bool kmemcheck_page_is_tracked(struct page *p);
+
+void kmemcheck_mark_unallocated(void *address, unsigned int n);
+void kmemcheck_mark_uninitialized(void *address, unsigned int n);
+void kmemcheck_mark_initialized(void *address, unsigned int n);
+void kmemcheck_mark_freed(void *address, unsigned int n);
+
+void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n);
+void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n);
+
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
 #else
 #define kmemcheck_enabled 0
 
+static inline void
+kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
+		       struct page *page, int order)
+{
+}
+
+static inline void
+kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order)
+{
+}
+
+static inline void
+kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
+		     size_t size)
+{
+}
+
+static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object,
+				       size_t size)
+{
+}
+
+static inline bool kmemcheck_page_is_tracked(struct page *p)
+{
+	return false;
+}
 #endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 48803064ced..e339fcf17cd 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -64,6 +64,13 @@
 
 #define SLAB_NOLEAKTRACE	0x00800000UL	/* Avoid kmemleak tracing */
 
+/* Don't track use of uninitialized memory */
+#ifdef CONFIG_KMEMCHECK
+# define SLAB_NOTRACK		0x01000000UL
+#else
+# define SLAB_NOTRACK		0x00000000UL
+#endif
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
diff --git a/kernel/fork.c b/kernel/fork.c
index 4430eb1376f..be022c200da 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -178,7 +178,7 @@ void __init fork_init(unsigned long mempages)
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
+			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
 #endif
 
 	/* do the arch specific task caches init */
@@ -1470,20 +1470,20 @@ void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
-			sighand_ctor);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
+			SLAB_NOTRACK, sighand_ctor);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	files_cachep = kmem_cache_create("files_cache",
 			sizeof(struct files_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	fs_cachep = kmem_cache_create("fs_cache",
 			sizeof(struct fs_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
 	mmap_init();
 }
diff --git a/mm/Makefile b/mm/Makefile
index e89acb090b4..c379ce08354 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
+obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
 obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
new file mode 100644
index 00000000000..eaa41b80261
--- /dev/null
+++ b/mm/kmemcheck.c
@@ -0,0 +1,103 @@
+#include <linux/mm_types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/kmemcheck.h>
+
+void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
+			   struct page *page, int order)
+{
+	struct page *shadow;
+	int pages;
+	int i;
+
+	pages = 1 << order;
+
+	/*
+	 * With kmemcheck enabled, we need to allocate a memory area for the
+	 * shadow bits as well.
+	 */
+	shadow = alloc_pages_node(node, flags, order);
+	if (!shadow) {
+		if (printk_ratelimit())
+			printk(KERN_ERR "kmemcheck: failed to allocate "
+				"shadow bitmap\n");
+		return;
+	}
+
+	for(i = 0; i < pages; ++i)
+		page[i].shadow = page_address(&shadow[i]);
+
+	/*
+	 * Mark it as non-present for the MMU so that our accesses to
+	 * this memory will trigger a page fault and let us analyze
+	 * the memory accesses.
+	 */
+	kmemcheck_hide_pages(page, pages);
+
+	/*
+	 * Objects from caches that have a constructor don't get
+	 * cleared when they're allocated, so we need to do it here.
+	 */
+	if (s->ctor)
+		kmemcheck_mark_uninitialized_pages(page, pages);
+	else
+		kmemcheck_mark_unallocated_pages(page, pages);
+}
+
+void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order)
+{
+	struct page *shadow;
+	int pages;
+	int i;
+
+	pages = 1 << order;
+
+	kmemcheck_show_pages(page, pages);
+
+	shadow = virt_to_page(page[0].shadow);
+
+	for(i = 0; i < pages; ++i)
+		page[i].shadow = NULL;
+
+	__free_pages(shadow, order);
+}
+
+void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
+			  size_t size)
+{
+	/*
+	 * Has already been memset(), which initializes the shadow for us
+	 * as well.
+	 */
+	if (gfpflags & __GFP_ZERO)
+		return;
+
+	/* No need to initialize the shadow of a non-tracked slab. */
+	if (s->flags & SLAB_NOTRACK)
+		return;
+
+	if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) {
+		/*
+		 * Allow notracked objects to be allocated from
+		 * tracked caches. Note however that these objects
+		 * will still get page faults on access, they just
+		 * won't ever be flagged as uninitialized. If page
+		 * faults are not acceptable, the slab cache itself
+		 * should be marked NOTRACK.
+		 */
+		kmemcheck_mark_initialized(object, size);
+	} else if (!s->ctor) {
+		/*
+		 * New objects should be marked uninitialized before
+		 * they're returned to the called.
+		 */
+		kmemcheck_mark_uninitialized(object, size);
+	}
+}
+
+void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
+{
+	/* TODO: RCU freeing is unsupported for now; hide false positives. */
+	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+		kmemcheck_mark_freed(object, size);
+}
-- 
cgit v1.2.3-70-g09d2


From d7002857dee6e9a3ce1f78d23f37caba106b29c5 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sun, 20 Jul 2008 10:44:54 +0200
Subject: kmemcheck: add DMA hooks

This patch hooks into the DMA API to prevent the reporting of the
false positives that would otherwise be reported when memory is
accessed that is also used directly by devices.

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 arch/x86/include/asm/dma-mapping.h |  2 ++
 include/linux/kmemcheck.h          | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f82fdc412c6..d57d0c1857b 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -6,6 +6,7 @@
  * Documentation/DMA-API.txt for documentation.
  */
 
+#include <linux/kmemcheck.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-debug.h>
 #include <linux/dma-attrs.h>
@@ -60,6 +61,7 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size,
 	dma_addr_t addr;
 
 	BUG_ON(!valid_dma_direction(dir));
+	kmemcheck_mark_initialized(ptr, size);
 	addr = ops->map_page(hwdev, virt_to_page(ptr),
 			     (unsigned long)ptr & ~PAGE_MASK, size,
 			     dir, NULL);
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 5b65f4ebead..71f21ae33d1 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -59,6 +59,22 @@ static inline bool kmemcheck_page_is_tracked(struct page *p)
 {
 	return false;
 }
+
+static inline void kmemcheck_mark_unallocated(void *address, unsigned int n)
+{
+}
+
+static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n)
+{
+}
+
+static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
+{
+}
+
+static inline void kmemcheck_mark_freed(void *address, unsigned int n)
+{
+}
 #endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3-70-g09d2


From b1eeab67682a5e397aecf172046b3a8bd4808ae4 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 25 Nov 2008 16:55:53 +0100
Subject: kmemcheck: add hooks for the page allocator

This adds support for tracking the initializedness of memory that
was allocated with the page allocator. Highmem requests are not
tracked.

Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>

[build fix for !CONFIG_KMEMCHECK]
Signed-off-by: Ingo Molnar <mingo@elte.hu>

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 arch/x86/include/asm/thread_info.h |  4 ++--
 arch/x86/mm/kmemcheck/shadow.c     |  8 +++++++
 include/linux/gfp.h                |  5 +++++
 include/linux/kmemcheck.h          | 35 ++++++++++++++++++++++++-----
 mm/kmemcheck.c                     | 45 +++++++++++++++++++++++++++-----------
 mm/page_alloc.c                    | 18 +++++++++++++++
 mm/slab.c                          | 15 ++++++++-----
 mm/slub.c                          | 23 ++++++++++++++-----
 8 files changed, 122 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 602c769fc98..b0783520988 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -154,9 +154,9 @@ struct thread_info {
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 #else
-#define THREAD_FLAGS GFP_KERNEL
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
 #endif
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index e7346d3873b..e773b6bd007 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -116,6 +116,14 @@ void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
 		kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
 }
 
+void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; ++i)
+		kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE);
+}
+
 enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
 {
 	uint8_t *x;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index daeaa8fe1bb..3885e7f7556 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -51,7 +51,12 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
 #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+
+#ifdef CONFIG_KMEMCHECK
 #define __GFP_NOTRACK	((__force gfp_t)0x200000u)  /* Don't track with kmemcheck */
+#else
+#define __GFP_NOTRACK	((__force gfp_t)0)
+#endif
 
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 71f21ae33d1..093d23969b1 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -8,13 +8,15 @@
 extern int kmemcheck_enabled;
 
 /* The slab-related functions. */
-void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
-			    struct page *page, int order);
-void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order);
+void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node);
+void kmemcheck_free_shadow(struct page *page, int order);
 void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
 			  size_t size);
 void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size);
 
+void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order,
+			       gfp_t gfpflags);
+
 void kmemcheck_show_pages(struct page *p, unsigned int n);
 void kmemcheck_hide_pages(struct page *p, unsigned int n);
 
@@ -27,6 +29,7 @@ void kmemcheck_mark_freed(void *address, unsigned int n);
 
 void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n);
 void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n);
+void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
 
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
@@ -34,13 +37,12 @@ int kmemcheck_hide_addr(unsigned long address);
 #define kmemcheck_enabled 0
 
 static inline void
-kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
-		       struct page *page, int order)
+kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
 {
 }
 
 static inline void
-kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order)
+kmemcheck_free_shadow(struct page *page, int order)
 {
 }
 
@@ -55,6 +57,11 @@ static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object,
 {
 }
 
+static inline void kmemcheck_pagealloc_alloc(struct page *p,
+	unsigned int order, gfp_t gfpflags)
+{
+}
+
 static inline bool kmemcheck_page_is_tracked(struct page *p)
 {
 	return false;
@@ -75,6 +82,22 @@ static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
 static inline void kmemcheck_mark_freed(void *address, unsigned int n)
 {
 }
+
+static inline void kmemcheck_mark_unallocated_pages(struct page *p,
+						    unsigned int n)
+{
+}
+
+static inline void kmemcheck_mark_uninitialized_pages(struct page *p,
+						      unsigned int n)
+{
+}
+
+static inline void kmemcheck_mark_initialized_pages(struct page *p,
+						    unsigned int n)
+{
+}
+
 #endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index eaa41b80261..fd814fd6131 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -1,10 +1,10 @@
+#include <linux/gfp.h>
 #include <linux/mm_types.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/kmemcheck.h>
 
-void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
-			   struct page *page, int order)
+void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
 {
 	struct page *shadow;
 	int pages;
@@ -16,7 +16,7 @@ void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
 	 * With kmemcheck enabled, we need to allocate a memory area for the
 	 * shadow bits as well.
 	 */
-	shadow = alloc_pages_node(node, flags, order);
+	shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order);
 	if (!shadow) {
 		if (printk_ratelimit())
 			printk(KERN_ERR "kmemcheck: failed to allocate "
@@ -33,23 +33,17 @@ void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
 	 * the memory accesses.
 	 */
 	kmemcheck_hide_pages(page, pages);
-
-	/*
-	 * Objects from caches that have a constructor don't get
-	 * cleared when they're allocated, so we need to do it here.
-	 */
-	if (s->ctor)
-		kmemcheck_mark_uninitialized_pages(page, pages);
-	else
-		kmemcheck_mark_unallocated_pages(page, pages);
 }
 
-void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order)
+void kmemcheck_free_shadow(struct page *page, int order)
 {
 	struct page *shadow;
 	int pages;
 	int i;
 
+	if (!kmemcheck_page_is_tracked(page))
+		return;
+
 	pages = 1 << order;
 
 	kmemcheck_show_pages(page, pages);
@@ -101,3 +95,28 @@ void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
 	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
 		kmemcheck_mark_freed(object, size);
 }
+
+void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order,
+			       gfp_t gfpflags)
+{
+	int pages;
+
+	if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK))
+		return;
+
+	pages = 1 << order;
+
+	/*
+	 * NOTE: We choose to track GFP_ZERO pages too; in fact, they
+	 * can become uninitialized by copying uninitialized memory
+	 * into them.
+	 */
+
+	/* XXX: Can use zone->node for node? */
+	kmemcheck_alloc_shadow(page, order, gfpflags, -1);
+
+	if (gfpflags & __GFP_ZERO)
+		kmemcheck_mark_initialized_pages(page, pages);
+	else
+		kmemcheck_mark_uninitialized_pages(page, pages);
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 17d5f539a9a..0727896a88a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -23,6 +23,7 @@
 #include <linux/bootmem.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/kmemcheck.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
 #include <linux/pagevec.h>
@@ -546,6 +547,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	int i;
 	int bad = 0;
 
+	kmemcheck_free_shadow(page, order);
+
 	for (i = 0 ; i < (1 << order) ; ++i)
 		bad += free_pages_check(page + i);
 	if (bad)
@@ -994,6 +997,8 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
 
+	kmemcheck_free_shadow(page, 0);
+
 	if (PageAnon(page))
 		page->mapping = NULL;
 	if (free_pages_check(page))
@@ -1047,6 +1052,16 @@ void split_page(struct page *page, unsigned int order)
 
 	VM_BUG_ON(PageCompound(page));
 	VM_BUG_ON(!page_count(page));
+
+#ifdef CONFIG_KMEMCHECK
+	/*
+	 * Split shadow pages too, because free(page[0]) would
+	 * otherwise free the whole shadow.
+	 */
+	if (kmemcheck_page_is_tracked(page))
+		split_page(virt_to_page(page[0].shadow), order);
+#endif
+
 	for (i = 1; i < (1 << order); i++)
 		set_page_refcounted(page + i);
 }
@@ -1667,7 +1682,10 @@ nopage:
 		dump_stack();
 		show_mem();
 	}
+	return page;
 got_pg:
+	if (kmemcheck_enabled)
+		kmemcheck_pagealloc_alloc(page, order, gfp_mask);
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_internal);
diff --git a/mm/slab.c b/mm/slab.c
index 95b6c5eb40b..6a1ad0b9a94 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1612,7 +1612,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		flags |= __GFP_RECLAIMABLE;
 
-	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	page = alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
 	if (!page)
 		return NULL;
 
@@ -1626,8 +1626,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 	for (i = 0; i < nr_pages; i++)
 		__SetPageSlab(page + i);
 
-	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK))
-		kmemcheck_alloc_shadow(cachep, flags, nodeid, page, cachep->gfporder);
+	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
+		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
+
+		if (cachep->ctor)
+			kmemcheck_mark_uninitialized_pages(page, nr_pages);
+		else
+			kmemcheck_mark_unallocated_pages(page, nr_pages);
+	}
 
 	return page_address(page);
 }
@@ -1641,8 +1647,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	struct page *page = virt_to_page(addr);
 	const unsigned long nr_freed = i;
 
-	if (kmemcheck_page_is_tracked(page))
-		kmemcheck_free_shadow(cachep, page, cachep->gfporder);
+	kmemcheck_free_shadow(page, cachep->gfporder);
 
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		sub_zone_page_state(page_zone(page),
diff --git a/mm/slub.c b/mm/slub.c
index 1cebaa747ad..898fb5047dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1066,6 +1066,8 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
 {
 	int order = oo_order(oo);
 
+	flags |= __GFP_NOTRACK;
+
 	if (node == -1)
 		return alloc_pages(flags, order);
 	else
@@ -1097,7 +1099,18 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (kmemcheck_enabled
 		&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS)))
 	{
-		kmemcheck_alloc_shadow(s, flags, node, page, compound_order(page));
+		int pages = 1 << oo_order(oo);
+
+		kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
+
+		/*
+		 * Objects from caches that have a constructor don't get
+		 * cleared when they're allocated, so we need to do it here.
+		 */
+		if (s->ctor)
+			kmemcheck_mark_uninitialized_pages(page, pages);
+		else
+			kmemcheck_mark_unallocated_pages(page, pages);
 	}
 
 	page->objects = oo_objects(oo);
@@ -1173,8 +1186,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		__ClearPageSlubDebug(page);
 	}
 
-	if (kmemcheck_page_is_tracked(page))
-		kmemcheck_free_shadow(s, page, compound_order(page));
+	kmemcheck_free_shadow(page, compound_order(page));
 
 	mod_zone_page_state(page_zone(page),
 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -2734,9 +2746,10 @@ EXPORT_SYMBOL(__kmalloc);
 
 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 {
-	struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
-						get_order(size));
+	struct page *page;
 
+	flags |= __GFP_COMP | __GFP_NOTRACK;
+	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
 		return page_address(page);
 	else
-- 
cgit v1.2.3-70-g09d2


From fc7d0c9f2122e8bf58deaf1252b0e750df5b0e91 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sat, 30 Aug 2008 12:16:05 +0200
Subject: kmemcheck: introduce bitfield API

Add the bitfield API which can be used to annotate bitfields in structs
and get rid of false positive reports.

According to Al Viro, the syntax we were using (putting #ifdef inside
macro arguments) was not valid C. He also suggested using begin/end
markers instead, which is what we do now.

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/kmemcheck.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 093d23969b1..47b39b7c7e8 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -33,6 +33,7 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
 
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
+
 #else
 #define kmemcheck_enabled 0
 
@@ -100,4 +101,53 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
 
 #endif /* CONFIG_KMEMCHECK */
 
+/*
+ * Bitfield annotations
+ *
+ * How to use: If you have a struct using bitfields, for example
+ *
+ *     struct a {
+ *             int x:8, y:8;
+ *     };
+ *
+ * then this should be rewritten as
+ *
+ *     struct a {
+ *             kmemcheck_bitfield_begin(flags);
+ *             int x:8, y:8;
+ *             kmemcheck_bitfield_end(flags);
+ *     };
+ *
+ * Now the "flags_begin" and "flags_end" members may be used to refer to the
+ * beginning and end, respectively, of the bitfield (and things like
+ * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
+ * fields should be annotated:
+ *
+ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
+ *     kmemcheck_annotate_bitfield(a, flags);
+ *
+ * Note: We provide the same definitions for both kmemcheck and non-
+ * kmemcheck kernels. This makes it harder to introduce accidental errors. It
+ * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield().
+ */
+#define kmemcheck_bitfield_begin(name)	\
+	int name##_begin[0];
+
+#define kmemcheck_bitfield_end(name)	\
+	int name##_end[0];
+
+#define kmemcheck_annotate_bitfield(ptr, name)				\
+	do if (ptr) {							\
+		int _n = (long) &((ptr)->name##_end)			\
+			- (long) &((ptr)->name##_begin);		\
+		BUILD_BUG_ON(_n < 0);					\
+									\
+		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+	} while (0)
+
+#define kmemcheck_annotate_variable(var)				\
+	do {								\
+		kmemcheck_mark_initialized(&(var), sizeof(var));	\
+	} while (0)							\
+
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3-70-g09d2


From fe55f6d5c0cfec4a710ef6ff63f162b99d5f7842 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sat, 30 Aug 2008 12:16:35 +0200
Subject: net: use kmemcheck bitfields API for skbuff

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/skbuff.h | 7 +++++++
 net/core/skbuff.c      | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5fd389162f0..ed6537fc5b4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -15,6 +15,7 @@
 #define _LINUX_SKBUFF_H
 
 #include <linux/kernel.h>
+#include <linux/kmemcheck.h>
 #include <linux/compiler.h>
 #include <linux/time.h>
 #include <linux/cache.h>
@@ -346,6 +347,7 @@ struct sk_buff {
 		};
 	};
 	__u32			priority;
+	kmemcheck_bitfield_begin(flags1);
 	__u8			local_df:1,
 				cloned:1,
 				ip_summed:2,
@@ -356,6 +358,7 @@ struct sk_buff {
 				ipvs_property:1,
 				peeked:1,
 				nf_trace:1;
+	kmemcheck_bitfield_end(flags1);
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
@@ -375,6 +378,8 @@ struct sk_buff {
 	__u16			tc_verd;	/* traffic control verdict */
 #endif
 #endif
+
+	kmemcheck_bitfield_begin(flags2);
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
@@ -382,6 +387,8 @@ struct sk_buff {
 	__u8			do_not_encrypt:1;
 	__u8			requeue:1;
 #endif
+	kmemcheck_bitfield_end(flags2);
+
 	/* 0/13/14 bit hole */
 
 #ifdef CONFIG_NET_DMA
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c2e4fb8f354..f0c4c6ad774 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -39,6 +39,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/kmemcheck.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/in.h>
@@ -201,6 +202,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
 	skb->end = skb->tail + size;
+	kmemcheck_annotate_bitfield(skb, flags1);
+	kmemcheck_annotate_bitfield(skb, flags2);
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -217,6 +220,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		struct sk_buff *child = skb + 1;
 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
 
+		kmemcheck_annotate_bitfield(child, flags1);
+		kmemcheck_annotate_bitfield(child, flags2);
 		skb->fclone = SKB_FCLONE_ORIG;
 		atomic_set(fclone_ref, 1);
 
@@ -633,6 +638,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
 		if (!n)
 			return NULL;
+
+		kmemcheck_annotate_bitfield(n, flags1);
+		kmemcheck_annotate_bitfield(n, flags2);
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 45e3ff82708c65c895d5c5882aff17ecf62a80b5 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 9 Sep 2008 06:43:12 +0200
Subject: net: annotate bitfields in struct inet_sock

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/net/inet_sock.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index de0ecc71cf0..cbcda0b0b96 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -17,6 +17,7 @@
 #define _INET_SOCK_H
 
 
+#include <linux/kmemcheck.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/jhash.h>
@@ -66,14 +67,16 @@ struct inet_request_sock {
 	__be32			loc_addr;
 	__be32			rmt_addr;
 	__be16			rmt_port;
-	u16			snd_wscale : 4, 
-				rcv_wscale : 4, 
+	kmemcheck_bitfield_begin(flags);
+	u16			snd_wscale : 4,
+				rcv_wscale : 4,
 				tstamp_ok  : 1,
 				sack_ok	   : 1,
 				wscale_ok  : 1,
 				ecn_ok	   : 1,
 				acked	   : 1,
 				no_srccheck: 1;
+	kmemcheck_bitfield_end(flags);
 	struct ip_options	*opt;
 };
 
@@ -198,9 +201,12 @@ static inline int inet_sk_ehashfn(const struct sock *sk)
 static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
 {
 	struct request_sock *req = reqsk_alloc(ops);
+	struct inet_request_sock *ireq = inet_rsk(req);
 
-	if (req != NULL)
-		inet_rsk(req)->opt = NULL;
+	if (req != NULL) {
+		kmemcheck_annotate_bitfield(ireq, flags);
+		ireq->opt = NULL;
+	}
 
 	return req;
 }
-- 
cgit v1.2.3-70-g09d2


From 9e337b0fb3baa3c22490365b1bdee6f4741413d4 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sat, 18 Oct 2008 17:37:51 +0200
Subject: net: annotate inet_timewait_sock bitfields

The use of bitfields here would lead to false positive warnings with
kmemcheck. Silence them.

(Additionally, one erroneous comment related to the bitfield was also
fixed.)

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/net/inet_timewait_sock.h | 5 ++++-
 net/ipv4/inet_timewait_sock.c    | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 4b8ece22b8e..b63b80fac56 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -16,6 +16,7 @@
 #define _INET_TIMEWAIT_SOCK_
 
 
+#include <linux/kmemcheck.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/timer.h>
@@ -127,10 +128,12 @@ struct inet_timewait_sock {
 	__be32			tw_rcv_saddr;
 	__be16			tw_dport;
 	__u16			tw_num;
+	kmemcheck_bitfield_begin(flags);
 	/* And these are ours. */
 	__u8			tw_ipv6only:1,
 				tw_transparent:1;
-	/* 15 bits hole, try to pack */
+	/* 14 bits hole, try to pack */
+	kmemcheck_bitfield_end(flags);
 	__u16			tw_ipv6_offset;
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 8554d0ea171..03169fc8487 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/kmemcheck.h>
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
@@ -117,6 +118,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
+		kmemcheck_annotate_bitfield(tw, flags);
+
 		/* Give us an identity. */
 		tw->tw_daddr	    = inet->daddr;
 		tw->tw_rcv_saddr    = inet->rcv_saddr;
-- 
cgit v1.2.3-70-g09d2


From c53bd2e1949ddbe06fe2a6079c0658d58ce25edb Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Thu, 26 Feb 2009 14:05:59 +0100
Subject: c2port: annotate bitfield for kmemcheck

This silences a false positive warning with kmemcheck.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 drivers/misc/c2port/core.c | 2 ++
 include/linux/c2port.h     | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
index 0207dd59090..b5346b4db91 100644
--- a/drivers/misc/c2port/core.c
+++ b/drivers/misc/c2port/core.c
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
+#include <linux/kmemcheck.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/idr.h>
@@ -891,6 +892,7 @@ struct c2port_device *c2port_device_register(char *name,
 		return ERR_PTR(-EINVAL);
 
 	c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
+	kmemcheck_annotate_bitfield(c2dev, flags);
 	if (unlikely(!c2dev))
 		return ERR_PTR(-ENOMEM);
 
diff --git a/include/linux/c2port.h b/include/linux/c2port.h
index 7b5a2388ba6..2a5cd867c36 100644
--- a/include/linux/c2port.h
+++ b/include/linux/c2port.h
@@ -10,6 +10,7 @@
  */
 
 #include <linux/device.h>
+#include <linux/kmemcheck.h>
 
 #define C2PORT_NAME_LEN			32
 
@@ -20,8 +21,10 @@
 /* Main struct */
 struct c2port_ops;
 struct c2port_device {
+	kmemcheck_bitfield_begin(flags);
 	unsigned int access:1;
 	unsigned int flash_access:1;
+	kmemcheck_bitfield_end(flags);
 
 	int id;
 	char name[C2PORT_NAME_LEN];
-- 
cgit v1.2.3-70-g09d2


From a98b65a3ad71e702e760bc63f57684301628e837 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Thu, 26 Feb 2009 14:46:57 +0100
Subject: net: annotate struct sock bitfield

2009/2/24 Ingo Molnar <mingo@elte.hu>:
> ok, this is the last warning i have from today's overnight -tip
> testruns - a 32-bit system warning in sock_init_data():
>
> [    2.610389] NET: Registered protocol family 16
> [    2.616138] initcall netlink_proto_init+0x0/0x170 returned 0 after 7812 usecs
> [    2.620010] WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (f642c184)
> [    2.624002] 010000000200000000000000604990c000000000000000000000000000000000
> [    2.634076]  i i i i i i u u i i i i i i i i i i i i i i i i i i i i i i i i
> [    2.641038]          ^
> [    2.643376]
> [    2.644004] Pid: 1, comm: swapper Not tainted (2.6.29-rc6-tip-01751-g4d1c22c-dirty #885)
> [    2.648003] EIP: 0060:[<c07141a1>] EFLAGS: 00010282 CPU: 0
> [    2.652008] EIP is at sock_init_data+0xa1/0x190
> [    2.656003] EAX: 0001a800 EBX: f6836c00 ECX: 00463000 EDX: c0e46fe0
> [    2.660003] ESI: f642c180 EDI: c0b83088 EBP: f6863ed8 ESP: c0c412ec
> [    2.664003]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
> [    2.668003] CR0: 8005003b CR2: f682c400 CR3: 00b91000 CR4: 000006f0
> [    2.672003] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [    2.676003] DR6: ffff4ff0 DR7: 00000400
> [    2.680002]  [<c07423e5>] __netlink_create+0x35/0xa0
> [    2.684002]  [<c07443cc>] netlink_kernel_create+0x4c/0x140
> [    2.688002]  [<c072755e>] rtnetlink_net_init+0x1e/0x40
> [    2.696002]  [<c071b601>] register_pernet_operations+0x11/0x30
> [    2.700002]  [<c071b72c>] register_pernet_subsys+0x1c/0x30
> [    2.704002]  [<c0bf3c8c>] rtnetlink_init+0x4c/0x100
> [    2.708002]  [<c0bf4669>] netlink_proto_init+0x159/0x170
> [    2.712002]  [<c0101124>] do_one_initcall+0x24/0x150
> [    2.716002]  [<c0bbf3c7>] do_initcalls+0x27/0x40
> [    2.723201]  [<c0bbf3fc>] do_basic_setup+0x1c/0x20
> [    2.728002]  [<c0bbfb8a>] kernel_init+0x5a/0xa0
> [    2.732002]  [<c0103e47>] kernel_thread_helper+0x7/0x10
> [    2.736002]  [<ffffffff>] 0xffffffff

We fix this false positive by annotating the bitfield in struct
sock.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/net/sock.h | 2 ++
 net/core/sock.c    | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 4bb1ff9fd15..d933da00d50 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -218,9 +218,11 @@ struct sock {
 #define sk_hash			__sk_common.skc_hash
 #define sk_prot			__sk_common.skc_prot
 #define sk_net			__sk_common.skc_net
+	kmemcheck_bitfield_begin(flags);
 	unsigned char		sk_shutdown : 2,
 				sk_no_check : 2,
 				sk_userlocks : 4;
+	kmemcheck_bitfield_end(flags);
 	unsigned char		sk_protocol;
 	unsigned short		sk_type;
 	int			sk_rcvbuf;
diff --git a/net/core/sock.c b/net/core/sock.c
index 7dbf3ffb35c..ce72c0ae424 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -941,6 +941,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		sk = kmalloc(prot->obj_size, priority);
 
 	if (sk != NULL) {
+		kmemcheck_annotate_bitfield(sk, flags);
+
 		if (security_sk_alloc(sk, family, priority))
 			goto out_free;
 
-- 
cgit v1.2.3-70-g09d2


From 1744a21d57d9c60136461adb6afa85e51b3e94d9 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sat, 28 Feb 2009 08:29:44 +0100
Subject: trace: annotate bitfields in struct ring_buffer_event

This gets rid of a heap of false-positive warnings from the tracer
code due to the use of bitfields.

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/ring_buffer.h | 4 ++++
 kernel/trace/ring_buffer.c  | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 8670f1575fe..29f8599e6be 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_RING_BUFFER_H
 #define _LINUX_RING_BUFFER_H
 
+#include <linux/kmemcheck.h>
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 
@@ -11,7 +12,10 @@ struct ring_buffer_iter;
  * Don't refer to this struct directly, use functions below.
  */
 struct ring_buffer_event {
+	kmemcheck_bitfield_begin(bitfield);
 	u32		type_len:5, time_delta:27;
+	kmemcheck_bitfield_end(bitfield);
+
 	u32		array[];
 };
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2e642b2b725..dc4dc70171c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -10,6 +10,7 @@
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
+#include <linux/kmemcheck.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/mutex.h>
@@ -1270,6 +1271,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	if (tail < BUF_PAGE_SIZE) {
 		/* Mark the rest of the page with padding */
 		event = __rb_page_index(tail_page, tail);
+		kmemcheck_annotate_bitfield(event, bitfield);
 		rb_event_set_padding(event);
 	}
 
@@ -1327,6 +1329,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		return NULL;
 
 	event = __rb_page_index(tail_page, tail);
+	kmemcheck_annotate_bitfield(event, bitfield);
 	rb_update_event(event, type, length);
 
 	/* The passed in type is zero for DATA */
-- 
cgit v1.2.3-70-g09d2


From 3446a8aa7ebcbc0a799e5e8fc4f2da0738d6bc21 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sat, 16 May 2009 11:22:14 +0200
Subject: fs: introduce __getname_gfp()

The purpose of this change is to allow __getname() users to pass a
custom GFP mask to kmem_cache_alloc(). This is needed for annotating
a certain kmemcheck false positive.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/fs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ede84fa7da5..6d12174fbe1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1919,8 +1919,9 @@ extern void __init vfs_caches_init(unsigned long);
 
 extern struct kmem_cache *names_cachep;
 
-#define __getname()	kmem_cache_alloc(names_cachep, GFP_KERNEL)
-#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
+#define __getname_gfp(gfp)	kmem_cache_alloc(names_cachep, (gfp))
+#define __getname()		__getname_gfp(GFP_KERNEL)
+#define __putname(name)		kmem_cache_free(names_cachep, (void *)(name))
 #ifndef CONFIG_AUDITSYSCALL
 #define putname(name)   __putname(name)
 #else
-- 
cgit v1.2.3-70-g09d2


From 465a454f254ee2ff7acc4aececbe31f8af046bc0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 15 Jun 2009 12:31:37 +0200
Subject: x86, mm: Add __get_user_pages_fast()

Introduce a gup_fast() variant which is usable from IRQ/NMI context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Nick Piggin <npiggin@suse.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/gup.c  | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |  6 ++++++
 2 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 6340cef6798..697d5727c11 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 	return 1;
 }
 
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	unsigned long flags;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					(void __user *)start, len)))
+		return 0;
+
+	/*
+	 * XXX: batch / limit 'nr', to avoid large irq off latency
+	 * needs some instrumenting to determine the common sizes used by
+	 * important workloads (eg. DB2), and whether limiting the batch size
+	 * will decrease performance.
+	 *
+	 * It seems like we're in the clear for the moment. Direct-IO is
+	 * the main guy that batches up lots of get_user_pages, and even
+	 * they are limited to 64-at-a-time which is not so many.
+	 */
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables and pages from being freed on x86.
+	 *
+	 * So long as we atomically load page table pointers versus teardown
+	 * (which we do on x86, with the above PAE exception), we can follow the
+	 * address down to the the page and take a ref on it.
+	 */
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			break;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
 /**
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad613ed66ab..b457bc047ab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -862,6 +862,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 
+/*
+ * doesn't attempt to fault and will return short.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages);
+
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
-- 
cgit v1.2.3-70-g09d2


From 9974458e2f9a11dbd2f4bd14fab5a79af4907b41 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Jun 2009 21:45:16 +1000
Subject: perf_counter: Make set_perf_counter_pending() declaration common

At present, every architecture that supports perf_counters has to
declare set_perf_counter_pending() in its arch-specific headers.
This consolidates the declarations into a single declaration in one
common place, include/linux/perf_counter.h.  On powerpc, we continue
to provide a static inline definition of set_perf_counter_pending()
in the powerpc hw_irq.h.

Also, this removes from the x86 perf_counter.h the unused null
definitions of {test,clear}_perf_counter_pending.

Reported-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <18998.13388.920691.523227@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/include/asm/hw_irq.h       | 1 -
 arch/powerpc/include/asm/perf_counter.h | 2 ++
 arch/x86/include/asm/perf_counter.h     | 5 -----
 include/linux/perf_counter.h            | 1 +
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 53512374e1c..1974cf191b0 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -163,7 +163,6 @@ static inline unsigned long test_perf_counter_pending(void)
 	return 0;
 }
 
-static inline void set_perf_counter_pending(void) {}
 static inline void clear_perf_counter_pending(void) {}
 #endif /* CONFIG_PERF_COUNTERS */
 
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index cc7c887705b..b398a84edce 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -10,6 +10,8 @@
  */
 #include <linux/types.h>
 
+#include <asm/hw_irq.h>
+
 #define MAX_HWCOUNTERS		8
 #define MAX_EVENT_ALTERNATIVES	8
 #define MAX_LIMITED_HWCOUNTERS	2
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index 876ed97147b..5fb33e160ea 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,11 +84,6 @@ union cpuid10_edx {
 #define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
 #define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
 
-extern void set_perf_counter_pending(void);
-
-#define clear_perf_counter_pending()	do { } while (0)
-#define test_perf_counter_pending()	(0)
-
 #ifdef CONFIG_PERF_COUNTERS
 extern void init_hw_perf_counters(void);
 extern void perf_counters_lapic_init(void);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1b3118a1023..eccae437fe3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -604,6 +604,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu);
 extern int perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_free_task(struct task_struct *task);
+extern void set_perf_counter_pending(void);
 extern void perf_counter_do_pending(void);
 extern void perf_counter_print_debug(void);
 extern void __perf_disable(void);
-- 
cgit v1.2.3-70-g09d2


From ea4431906d86686e541de527915ccbe556761b16 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 13 Jun 2009 12:19:05 -0500
Subject: [SCSI] aic79xx: make driver respect nvram for IU and QAS settings

This patch allows the Adaptec firmware to pass on its values for Packetize and
QAS.  To do this, the settings max_iu and max_qas have been introduced into
the SPI transport class and populated from the adaptec NVram tables.  Domain
validation in the SPI transport class will respect the max settings when
configuring to the highest possible speed for testing.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/aic7xxx/aic79xx_osm.c | 10 +++-------
 drivers/scsi/scsi_transport_spi.c  | 18 ++++++++++++++----
 include/scsi/scsi_transport_spi.h  |  4 ++++
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 0f829b3b8ab..75b23317bd2 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -627,19 +627,15 @@ ahd_linux_target_alloc(struct scsi_target *starget)
 					    starget->id, &tstate);
 
 		if ((flags  & CFPACKETIZED) == 0) {
-			/* Do not negotiate packetized transfers */
-			spi_rd_strm(starget) = 0;
-			spi_pcomp_en(starget) = 0;
-			spi_rti(starget) = 0;
-			spi_wr_flow(starget) = 0;
-			spi_hold_mcs(starget) = 0;
+			/* don't negotiate packetized (IU) transfers */
+			spi_max_iu(starget) = 0;
 		} else {
 			if ((ahd->features & AHD_RTI) == 0)
 				spi_rti(starget) = 0;
 		}
 
 		if ((flags & CFQAS) == 0)
-			spi_qas(starget) = 0;
+			spi_max_qas(starget) = 0;
 
 		/* Transinfo values have been set to BIOS settings */
 		spi_max_width(starget) = (flags & CFWIDEB) ? 1 : 0;
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index f49f55c6bfc..654a34fb04c 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -234,8 +234,10 @@ static int spi_setup_transport_attrs(struct transport_container *tc,
 	spi_width(starget) = 0;	/* narrow */
 	spi_max_width(starget) = 1;
 	spi_iu(starget) = 0;	/* no IU */
+	spi_max_iu(starget) = 1;
 	spi_dt(starget) = 0;	/* ST */
 	spi_qas(starget) = 0;
+	spi_max_qas(starget) = 1;
 	spi_wr_flow(starget) = 0;
 	spi_rd_strm(starget) = 0;
 	spi_rti(starget) = 0;
@@ -360,9 +362,9 @@ static DEVICE_ATTR(field, S_IRUGO,				\
 /* The Parallel SCSI Tranport Attributes: */
 spi_transport_max_attr(offset, "%d\n");
 spi_transport_max_attr(width, "%d\n");
-spi_transport_rd_attr(iu, "%d\n");
+spi_transport_max_attr(iu, "%d\n");
 spi_transport_rd_attr(dt, "%d\n");
-spi_transport_rd_attr(qas, "%d\n");
+spi_transport_max_attr(qas, "%d\n");
 spi_transport_rd_attr(wr_flow, "%d\n");
 spi_transport_rd_attr(rd_strm, "%d\n");
 spi_transport_rd_attr(rti, "%d\n");
@@ -874,13 +876,13 @@ spi_dv_device_internal(struct scsi_device *sdev, u8 *buffer)
 
 	/* try QAS requests; this should be harmless to set if the
 	 * target supports it */
-	if (scsi_device_qas(sdev)) {
+	if (scsi_device_qas(sdev) && spi_max_qas(starget)) {
 		DV_SET(qas, 1);
 	} else {
 		DV_SET(qas, 0);
 	}
 
-	if (scsi_device_ius(sdev) && min_period < 9) {
+	if (scsi_device_ius(sdev) && spi_max_iu(starget) && min_period < 9) {
 		/* This u320 (or u640). Set IU transfers */
 		DV_SET(iu, 1);
 		/* Then set the optional parameters */
@@ -1412,12 +1414,18 @@ static mode_t target_attribute_is_visible(struct kobject *kobj,
 	else if (attr == &dev_attr_iu.attr &&
 		 spi_support_ius(starget))
 		return TARGET_ATTRIBUTE_HELPER(iu);
+	else if (attr == &dev_attr_max_iu.attr &&
+		 spi_support_ius(starget))
+		return TARGET_ATTRIBUTE_HELPER(iu);
 	else if (attr == &dev_attr_dt.attr &&
 		 spi_support_dt(starget))
 		return TARGET_ATTRIBUTE_HELPER(dt);
 	else if (attr == &dev_attr_qas.attr &&
 		 spi_support_qas(starget))
 		return TARGET_ATTRIBUTE_HELPER(qas);
+	else if (attr == &dev_attr_max_qas.attr &&
+		 spi_support_qas(starget))
+		return TARGET_ATTRIBUTE_HELPER(qas);
 	else if (attr == &dev_attr_wr_flow.attr &&
 		 spi_support_ius(starget))
 		return TARGET_ATTRIBUTE_HELPER(wr_flow);
@@ -1447,8 +1455,10 @@ static struct attribute *target_attributes[] = {
 	&dev_attr_width.attr,
 	&dev_attr_max_width.attr,
 	&dev_attr_iu.attr,
+	&dev_attr_max_iu.attr,
 	&dev_attr_dt.attr,
 	&dev_attr_qas.attr,
+	&dev_attr_max_qas.attr,
 	&dev_attr_wr_flow.attr,
 	&dev_attr_rd_strm.attr,
 	&dev_attr_rti.attr,
diff --git a/include/scsi/scsi_transport_spi.h b/include/scsi/scsi_transport_spi.h
index 286e9628ed8..7497a383b1a 100644
--- a/include/scsi/scsi_transport_spi.h
+++ b/include/scsi/scsi_transport_spi.h
@@ -36,8 +36,10 @@ struct spi_transport_attrs {
 	unsigned int width:1;	/* 0 - narrow, 1 - wide */
 	unsigned int max_width:1;
 	unsigned int iu:1;	/* Information Units enabled */
+	unsigned int max_iu:1;
 	unsigned int dt:1;	/* DT clocking enabled */
 	unsigned int qas:1;	/* Quick Arbitration and Selection enabled */
+	unsigned int max_qas:1;
 	unsigned int wr_flow:1;	/* Write Flow control enabled */
 	unsigned int rd_strm:1;	/* Read streaming enabled */
 	unsigned int rti:1;	/* Retain Training Information */
@@ -77,8 +79,10 @@ struct spi_host_attrs {
 #define spi_width(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->width)
 #define spi_max_width(x) (((struct spi_transport_attrs *)&(x)->starget_data)->max_width)
 #define spi_iu(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->iu)
+#define spi_max_iu(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->max_iu)
 #define spi_dt(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->dt)
 #define spi_qas(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->qas)
+#define spi_max_qas(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->max_qas)
 #define spi_wr_flow(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->wr_flow)
 #define spi_rd_strm(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->rd_strm)
 #define spi_rti(x)	(((struct spi_transport_attrs *)&(x)->starget_data)->rti)
-- 
cgit v1.2.3-70-g09d2


From 78be6914cb5c6d648617c51bb99bf81f28471d89 Mon Sep 17 00:00:00 2001
From: Wu Zhangjin <wuzj@lemote.com>
Date: Sun, 14 Jun 2009 14:52:30 +0800
Subject: tracing: fix undeclared 'PAGE_SIZE' in include/linux/trace_seq.h

when compiling linux-mips with kmemtrace enabled, there will be an
error:

include/linux/trace_seq.h:12: error: 'PAGE_SIZE' undeclared here (not in
				a function)

I checked the source code and found trace_seq.h used PAGE_SIZE but not
included the relative header file, so, fix it via adding the header file
<asm/page.h>

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Wu Zhangjin <wuzj@lemote.com>
LKML-Reference: <1244962350-28702-1-git-send-email-wuzhangjin@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index c68bccba207..c134dd1fe6b 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -3,6 +3,8 @@
 
 #include <linux/fs.h>
 
+#include <asm/page.h>
+
 /*
  * Trace sequences are used to allow a function to call several other functions
  * to create a string of data to use (up to a max of PAGE_SIZE.
-- 
cgit v1.2.3-70-g09d2


From 6dae44f9a55f13765c877687602cd2bf1a057cfd Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 15 Jun 2009 18:52:52 +0200
Subject: ata: add ata_id_pio_need_iordy() helper (v2)

v2:
Minor fixes per Sergei's review.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ata.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 915da43edee..9c75921f0c1 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -800,6 +800,20 @@ static inline int ata_id_is_ssd(const u16 *id)
 	return id[ATA_ID_ROT_SPEED] == 0x01;
 }
 
+static inline int ata_id_pio_need_iordy(const u16 *id, const u8 pio)
+{
+	/* CF spec. r4.1 Table 22 says no IORDY on PIO5 and PIO6. */
+	if (pio > 4 && ata_id_is_cfa(id))
+		return 0;
+	/* For PIO3 and higher it is mandatory. */
+	if (pio > 2)
+		return 1;
+	/* Turn it on when possible. */
+	if (ata_id_has_iordy(id))
+		return 1;
+	return 0;
+}
+
 static inline int ata_drive_40wire(const u16 *dev_id)
 {
 	if (ata_id_is_sata(dev_id))
-- 
cgit v1.2.3-70-g09d2


From c9ef59ff01b6bd1c7360a64fcc8556a1193c2ed0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 15 Jun 2009 18:52:53 +0200
Subject: ide: IORDY handling fixes

Add ide_pio_need_iordy() helper and convert host drivers to use it.

This fixes it8172, it8213, pdc202xx_old, piix, slc90e66 and siimage
host drivers to handle IORDY correctly.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/at91_ide.c      |  3 +--
 drivers/ide/ide-xfer-mode.c |  6 ++++++
 drivers/ide/it8172.c        |  2 +-
 drivers/ide/it8213.c        |  2 +-
 drivers/ide/pdc202xx_old.c  |  2 +-
 drivers/ide/piix.c          |  2 +-
 drivers/ide/siimage.c       | 15 ++++++++-------
 drivers/ide/sl82c105.c      |  3 +--
 drivers/ide/slc90e66.c      |  2 +-
 include/linux/ide.h         |  1 +
 10 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index fc0949a8cfd..dbfeda42b94 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -185,8 +185,7 @@ static void at91_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	timing = ide_timing_find_mode(XFER_PIO_0 + pio);
 	BUG_ON(!timing);
 
-	if ((pio > 2 || ata_id_has_iordy(drive->id)) &&
-	    !(ata_id_is_cfa(drive->id) && pio > 4))
+	if (ide_pio_need_iordy(drive, pio))
 		use_iordy = 1;
 
 	apply_timings(chipselect, pio, timing, use_iordy);
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index af44be9d546..0b47ca13907 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -107,6 +107,12 @@ u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
 }
 EXPORT_SYMBOL_GPL(ide_get_best_pio_mode);
 
+int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio)
+{
+	return ata_id_pio_need_iordy(drive->id, pio);
+}
+EXPORT_SYMBOL_GPL(ide_pio_need_iordy);
+
 int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 {
 	ide_hwif_t *hwif = drive->hwif;
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
index e021078cd06..0d266a5b524 100644
--- a/drivers/ide/it8172.c
+++ b/drivers/ide/it8172.c
@@ -66,7 +66,7 @@ static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	if (drive->media == ide_disk)
 		/* enable prefetch */
 		drive_enables |= 0x0004 << (drive->dn * 4);
-	if (ata_id_has_iordy(drive->id))
+	if (ide_pio_need_iordy(drive, pio))
 		/* enable IORDY sample-point */
 		drive_enables |= 0x0002 << (drive->dn * 4);
 
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index d7969b6d139..47976167796 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -50,7 +50,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		control |= 1;	/* Programmable timing on */
 	if (drive->media != ide_disk)
 		control |= 4;	/* ATAPI */
-	if (pio > 2)
+	if (ide_pio_need_iordy(drive, pio))
 		control |= 2;	/* IORDY */
 	if (is_slave) {
 		master_data |=  0x4000;
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index b6abf7e52ca..4f5b536282a 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -73,7 +73,7 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 		 * Prefetch_EN / IORDY_EN / PA[3:0] bits of register A
 		 */
 		AP &= ~0x3f;
-		if (ata_id_iordy_disable(drive->id))
+		if (ide_pio_need_iordy(drive, speed - XFER_PIO_0))
 			AP |= 0x20;	/* set IORDY_EN bit */
 		if (drive->media == ide_disk)
 			AP |= 0x10;	/* set Prefetch_EN bit */
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index 69860dea382..bf14f39bd3a 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -98,7 +98,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		control |= 1;	/* Programmable timing on */
 	if (drive->media == ide_disk)
 		control |= 4;	/* Prefetch, post write */
-	if (pio > 2)
+	if (ide_pio_need_iordy(drive, pio))
 		control |= 2;	/* IORDY */
 	if (is_slave) {
 		master_data |=  0x4000;
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index bd82d228608..6a643fdf0e6 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -32,7 +32,6 @@
  *  smarter code in libata.
  *
  * TODO:
- * - IORDY fixes
  * - VDMA support
  */
 
@@ -234,8 +233,7 @@ static u8 sil_sata_udma_filter(ide_drive_t *drive)
  *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
- *	controller. If we are in PIO mode 3 or 4 turn on IORDY
- *	monitoring (bit 9). The TF timing is bits 31:16
+ *	controller.
  */
 
 static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
@@ -276,13 +274,16 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
 	/* now set up IORDY */
 	speedp = sil_ioread16(dev, tfaddr - 2);
 	speedp &= ~0x200;
-	if (pio > 2)
-		speedp |= 0x200;
-	sil_iowrite16(dev, speedp, tfaddr - 2);
 
 	mode = sil_ioread8(dev, base + addr_mask);
 	mode &= ~(unit ? 0x30 : 0x03);
-	mode |= unit ? 0x10 : 0x01;
+
+	if (ide_pio_need_iordy(drive, pio)) {
+		speedp |= 0x200;
+		mode |= unit ? 0x10 : 0x01;
+	}
+
+	sil_iowrite16(dev, speedp, tfaddr - 2);
 	sil_iowrite8(dev, mode, base + addr_mask);
 }
 
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 0924abff52f..88ac47085cd 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -61,8 +61,7 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
 	if (cmd_off == 0)
 		cmd_off = 1;
 
-	if ((pio > 2 || ata_id_has_iordy(drive->id)) &&
-	    !(pio > 4 && ata_id_is_cfa(drive->id)))
+	if (ide_pio_need_iordy(drive, pio))
 		iordy = 0x40;
 
 	return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy;
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index f55d7d6313e..9aec78d3bcf 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -44,7 +44,7 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		control |= 1;	/* Programmable timing on */
 	if (drive->media == ide_disk)
 		control |= 4;	/* Prefetch, post write */
-	if (pio > 2)
+	if (ide_pio_need_iordy(drive, pio))
 		control |= 2;	/* IORDY */
 	if (is_slave) {
 		master_data |=  0x4000;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index cdb29b6c195..8c7f5e50e91 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1514,6 +1514,7 @@ int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int);
 int ide_scan_pio_blacklist(char *);
 const char *ide_xfer_verbose(u8);
 u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8);
+int ide_pio_need_iordy(ide_drive_t *, const u8);
 int ide_set_pio_mode(ide_drive_t *, u8);
 int ide_set_dma_mode(ide_drive_t *, u8);
 void ide_set_pio(ide_drive_t *, u8);
-- 
cgit v1.2.3-70-g09d2


From 5880b5de7101cc123778c5d17d4f3986351f3122 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 15 Jun 2009 18:52:54 +0200
Subject: ide: don't enable IORDY at a probe time

* Add 'unsigned long port_flags' field to ide_hwif_t.

* Add IDE_PFLAG_PROBING port flag and keep it set during probing.

* Fix ide_pio_need_iordy() to not enable IORDY at a probe time
  (IORDY may lead to controller lock up on certain controllers
   if the port is not occupied).

Loosely based on the recent libata's fix by Tejun, thanks to Alan
for the hint that IDE may also need it.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c     | 16 +++++++++++++++-
 drivers/ide/ide-xfer-mode.c |  6 ++++++
 include/linux/ide.h         |  6 ++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f371b0de314..fdd04bcd556 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1378,6 +1378,9 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 
 		ide_init_port(hwif, i & 1, d);
 		ide_port_cable_detect(hwif);
+
+		hwif->port_flags |= IDE_PFLAG_PROBING;
+
 		ide_port_init_devices(hwif);
 	}
 
@@ -1388,6 +1391,8 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 		if (ide_probe_port(hwif) == 0)
 			hwif->present = 1;
 
+		hwif->port_flags &= ~IDE_PFLAG_PROBING;
+
 		if ((hwif->host_flags & IDE_HFLAG_4DRIVES) == 0 ||
 		    hwif->mate == NULL || hwif->mate->present == 0) {
 			if (ide_register_port(hwif)) {
@@ -1569,11 +1574,20 @@ EXPORT_SYMBOL_GPL(ide_host_remove);
 
 void ide_port_scan(ide_hwif_t *hwif)
 {
+	int rc;
+
 	ide_port_apply_params(hwif);
 	ide_port_cable_detect(hwif);
+
+	hwif->port_flags |= IDE_PFLAG_PROBING;
+
 	ide_port_init_devices(hwif);
 
-	if (ide_probe_port(hwif) < 0)
+	rc = ide_probe_port(hwif);
+
+	hwif->port_flags &= ~IDE_PFLAG_PROBING;
+
+	if (rc < 0)
 		return;
 
 	hwif->present = 1;
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index 0b47ca13907..46d203ce60c 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -109,6 +109,12 @@ EXPORT_SYMBOL_GPL(ide_get_best_pio_mode);
 
 int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio)
 {
+	/*
+	 * IORDY may lead to controller lock up on certain controllers
+	 * if the port is not occupied.
+	 */
+	if (pio == 0 && (drive->hwif->port_flags & IDE_PFLAG_PROBING))
+		return 0;
 	return ata_id_pio_need_iordy(drive->id, pio);
 }
 EXPORT_SYMBOL_GPL(ide_pio_need_iordy);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 8c7f5e50e91..8771d49aa87 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -661,6 +661,10 @@ struct ide_dma_ops {
 	u8	(*dma_sff_read_status)(struct hwif_s *);
 };
 
+enum {
+	IDE_PFLAG_PROBING		= (1 << 0),
+};
+
 struct ide_host;
 
 typedef struct hwif_s {
@@ -677,6 +681,8 @@ typedef struct hwif_s {
 
 	ide_drive_t	*devices[MAX_DRIVES + 1];
 
+	unsigned long	port_flags;
+
 	u8 major;	/* our major number */
 	u8 index;	/* 0 for ide0; 1 for ide1; ... */
 	u8 channel;	/* for dual-port chips: 0=primary, 1=secondary */
-- 
cgit v1.2.3-70-g09d2


From f4d3ffa52a402ec9e8699571cf3811763d284459 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Mon, 15 Jun 2009 18:52:58 +0200
Subject: ide: move ack_intr() method into 'struct ide_port_ops' (take 2)

Move the ack_intr() method into 'struct ide_port_ops', also renaming it to
test_irq() while at it...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/buddha.c    | 15 +++++++++------
 drivers/ide/falconide.c |  1 -
 drivers/ide/gayle.c     | 14 +++++++++-----
 drivers/ide/ide-io.c    |  3 ++-
 drivers/ide/ide-probe.c |  1 -
 drivers/ide/macide.c    | 12 ++++--------
 drivers/ide/q40ide.c    |  7 ++-----
 include/linux/ide.h     | 10 +---------
 8 files changed, 27 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
index 9cd7b115763..ab4f169d083 100644
--- a/drivers/ide/buddha.c
+++ b/drivers/ide/buddha.c
@@ -99,7 +99,7 @@ static const char *buddha_board_name[] = { "Buddha", "Catweasel", "X-Surf" };
      *  Check and acknowledge the interrupt status
      */
 
-static int buddha_ack_intr(ide_hwif_t *hwif)
+static int buddha_test_irq(ide_hwif_t *hwif)
 {
     unsigned char ch;
 
@@ -118,8 +118,7 @@ static void xsurf_clear_irq(ide_drive_t *drive)
 }
 
 static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
-				      unsigned long ctl, unsigned long irq_port,
-				      ide_ack_intr_t *ack_intr)
+				      unsigned long ctl, unsigned long irq_port)
 {
 	int i;
 
@@ -134,14 +133,19 @@ static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
 	hw->io_ports.irq_addr = irq_port;
 
 	hw->irq = IRQ_AMIGA_PORTS;
-	hw->ack_intr = ack_intr;
 }
 
+static const struct ide_port_ops buddha_port_ops = {
+	.test_irq		= buddha_test_irq,
+};
+
 static const struct ide_port_ops xsurf_port_ops = {
 	.clear_irq		= xsurf_clear_irq,
+	.test_irq		= buddha_test_irq,
 };
 
 static const struct ide_port_info buddha_port_info = {
+	.port_ops		= &buddha_port_ops,
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
 	.irq_flags		= IRQF_SHARED,
 	.chipset		= ide_generic,
@@ -217,8 +221,7 @@ fail_base2:
 				irq_port = buddha_board + xsurf_irqports[i];
 			}
 
-			buddha_setup_ports(&hw[i], base, ctl, irq_port,
-					   buddha_ack_intr);
+			buddha_setup_ports(&hw[i], base, ctl, irq_port);
 
 			hws[i] = &hw[i];
 		}
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 22fa27389c3..a5a07ccb81a 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -128,7 +128,6 @@ static void __init falconide_setup_ports(struct ide_hw *hw)
 	hw->io_ports.ctl_addr = ATA_HD_BASE + ATA_HD_CONTROL;
 
 	hw->irq = IRQ_MFP_IDE;
-	hw->ack_intr = NULL;
 }
 
     /*
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index c5dd1e5cca4..b9e517de6a8 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
      *  Check and acknowledge the interrupt status
      */
 
-static int gayle_ack_intr(ide_hwif_t *hwif)
+static int gayle_test_irq(ide_hwif_t *hwif)
 {
     unsigned char ch;
 
@@ -85,8 +85,7 @@ static void gayle_a1200_clear_irq(ide_drive_t *drive)
 }
 
 static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
-				     unsigned long ctl, unsigned long irq_port,
-				     ide_ack_intr_t *ack_intr)
+				     unsigned long ctl, unsigned long irq_port)
 {
 	int i;
 
@@ -101,11 +100,15 @@ static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
 	hw->io_ports.irq_addr = irq_port;
 
 	hw->irq = IRQ_AMIGA_PORTS;
-	hw->ack_intr = ack_intr;
 }
 
+static const struct ide_port_ops gayle_a4000_port_ops = {
+	.test_irq		= gayle_test_irq,
+};
+
 static const struct ide_port_ops gayle_a1200_port_ops = {
 	.clear_irq		= gayle_a1200_clear_irq,
+	.test_irq		= gayle_test_irq,
 };
 
 static const struct ide_port_info gayle_port_info = {
@@ -148,6 +151,7 @@ found:
 	if (a4000) {
 	    phys_base = GAYLE_BASE_4000;
 	    irqport = (unsigned long)ZTWO_VADDR(GAYLE_IRQ_4000);
+	    d.port_ops = &gayle_a4000_port_ops;
 	} else {
 	    phys_base = GAYLE_BASE_1200;
 	    irqport = (unsigned long)ZTWO_VADDR(GAYLE_IRQ_1200);
@@ -164,7 +168,7 @@ found:
 	base = (unsigned long)ZTWO_VADDR(phys_base + i * GAYLE_NEXT_PORT);
 	ctrlport = GAYLE_HAS_CONTROL_REG ? (base + GAYLE_CONTROL) : 0;
 
-	gayle_setup_ports(&hw[i], base, ctrlport, irqport, gayle_ack_intr);
+	gayle_setup_ports(&hw[i], base, ctrlport, irqport);
 
 	hws[i] = &hw[i];
     }
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 9e53efe9fb2..1059f809b80 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -804,7 +804,8 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 
 	spin_lock_irqsave(&hwif->lock, flags);
 
-	if (hwif->ack_intr && hwif->ack_intr(hwif) == 0)
+	if (hwif->port_ops && hwif->port_ops->test_irq &&
+	    hwif->port_ops->test_irq(hwif) == 0)
 		goto out;
 
 	handler = hwif->handler;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index fdd04bcd556..c2e7159d793 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1170,7 +1170,6 @@ static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
 	hwif->irq = hw->irq;
 	hwif->dev = hw->dev;
 	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
-	hwif->ack_intr = hw->ack_intr;
 	hwif->config_data = hw->config;
 }
 
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index 05cdab35a75..505ec43e560 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -53,7 +53,7 @@
 
 volatile unsigned char *ide_ifr = (unsigned char *) (IDE_BASE + IDE_IFR);
 
-int macide_ack_intr(ide_hwif_t* hwif)
+int macide_test_irq(ide_hwif_t *hwif)
 {
 	if (*ide_ifr & 0x20)
 		return 1;
@@ -66,7 +66,7 @@ static void macide_clear_irq(ide_drive_t *drive)
 }
 
 static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
-				      int irq, ide_ack_intr_t *ack_intr)
+				      int irq)
 {
 	int i;
 
@@ -78,11 +78,11 @@ static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
 	hw->io_ports.ctl_addr = base + IDE_CONTROL;
 
 	hw->irq = irq;
-	hw->ack_intr = ack_intr;
 }
 
 static const struct ide_port_ops macide_port_ops = {
 	.clear_irq		= macide_clear_irq,
+	.test_irq		= macide_test_irq,
 };
 
 static const struct ide_port_info macide_port_info = {
@@ -101,7 +101,6 @@ static const char *mac_ide_name[] =
 
 static int __init macide_init(void)
 {
-	ide_ack_intr_t *ack_intr;
 	unsigned long base;
 	int irq;
 	struct ide_hw hw, *hws[] = { &hw };
@@ -113,17 +112,14 @@ static int __init macide_init(void)
 	switch (macintosh_config->ide_type) {
 	case MAC_IDE_QUADRA:
 		base = IDE_BASE;
-		ack_intr = macide_ack_intr;
 		irq = IRQ_NUBUS_F;
 		break;
 	case MAC_IDE_PB:
 		base = IDE_BASE;
-		ack_intr = macide_ack_intr;
 		irq = IRQ_NUBUS_C;
 		break;
 	case MAC_IDE_BABOON:
 		base = BABOON_BASE;
-		ack_intr = NULL;
 		d.port_ops = NULL;
 		irq = IRQ_BABOON_1;
 		break;
@@ -134,7 +130,7 @@ static int __init macide_init(void)
 	printk(KERN_INFO "ide: Macintosh %s IDE controller\n",
 			 mac_ide_name[macintosh_config->ide_type - 1]);
 
-	macide_setup_ports(&hw, base, irq, ack_intr);
+	macide_setup_ports(&hw, base, irq);
 
 	return ide_host_add(&d, hws, 1, NULL);
 }
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index ab49a97023d..90786083b43 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -51,9 +51,7 @@ static int q40ide_default_irq(unsigned long base)
 /*
  * Addresses are pretranslated for Q40 ISA access.
  */
-static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
-			ide_ack_intr_t *ack_intr,
-			int irq)
+static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base, int irq)
 {
 	memset(hw, 0, sizeof(*hw));
 	/* BIG FAT WARNING: 
@@ -69,7 +67,6 @@ static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base,
 	hw->io_ports.ctl_addr = Q40_ISA_IO_B(base + 0x206);
 
 	hw->irq = irq;
-	hw->ack_intr = ack_intr;
 }
 
 static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
@@ -156,7 +153,7 @@ static int __init q40ide_init(void)
 		release_region(pcide_bases[i], 8);
 		continue;
 	}
-	q40_ide_setup_ports(&hw[i], pcide_bases[i], NULL,
+	q40_ide_setup_ports(&hw[i], pcide_bases[i],
 			q40ide_default_irq(pcide_bases[i]));
 
 	hws[i] = &hw[i];
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 8771d49aa87..08c91e21cf4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -156,12 +156,6 @@ enum {
 #define REQ_PARK_HEADS		0x22
 #define REQ_UNPARK_HEADS	0x23
 
-/*
- * Check for an interrupt and acknowledge the interrupt status
- */
-struct hwif_s;
-typedef int (ide_ack_intr_t)(struct hwif_s *);
-
 /*
  * hwif_chipset_t is used to keep track of the specific hardware
  * chipset used by each IDE interface, if known.
@@ -185,7 +179,6 @@ struct ide_hw {
 	};
 
 	int		irq;			/* our irq number */
-	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
 	struct device	*dev, *parent;
 	unsigned long	config;
 };
@@ -636,6 +629,7 @@ struct ide_port_ops {
 	void	(*maskproc)(ide_drive_t *, int);
 	void	(*quirkproc)(ide_drive_t *);
 	void	(*clear_irq)(ide_drive_t *);
+	int	(*test_irq)(struct hwif_s *);
 
 	u8	(*mdma_filter)(ide_drive_t *);
 	u8	(*udma_filter)(ide_drive_t *);
@@ -701,8 +695,6 @@ typedef struct hwif_s {
 
 	struct device *dev;
 
-	ide_ack_intr_t *ack_intr;
-
 	void (*rw_disk)(ide_drive_t *, struct request *);
 
 	const struct ide_tp_ops		*tp_ops;
-- 
cgit v1.2.3-70-g09d2


From 5bfb151f1f565e6082304a30e8c81dfb6ed0b0c8 Mon Sep 17 00:00:00 2001
From: Joao Ramos <joao.ramos@inov.pt>
Date: Mon, 15 Jun 2009 22:13:44 +0200
Subject: ide: do not access ide_drive_t 'drive_data' field directly

Change ide_drive_t 'drive_data' field from 'unsigned int' type to 'void *'
type, allowing a wider range of values/types to be stored in this field.

Added 'ide_get_drivedata' and 'ide_set_drivedata' helpers to get and set
the 'drive_data' field.

Fixed all host drivers to maintain coherency with the change in the
'drive_data' field type.

Signed-off-by: Joao Ramos <joao.ramos@inov.pt>
[bart: fix qd65xx build, cast to 'unsigned long', minor Coding Style fixups]
Acked-by: Sergei Shtylyov <sshtylyov@ru.montavista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/cmd64x.c   |  8 +++++---
 drivers/ide/cs5536.c   | 23 +++++++++++++++--------
 drivers/ide/ht6560b.c  | 33 ++++++++++++++++++++++++---------
 drivers/ide/icside.c   | 10 ++++++----
 drivers/ide/opti621.c  |  8 +++++---
 drivers/ide/qd65xx.c   | 11 +++++++----
 drivers/ide/qd65xx.h   | 11 +++++++++--
 drivers/ide/sl82c105.c | 18 ++++++++++++------
 include/linux/ide.h    | 12 +++++++++++-
 9 files changed, 94 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index fd904e923a9..03c86209446 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -118,8 +118,9 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
 	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_timing *t	= ide_timing_find_mode(XFER_PIO_0 + pio);
+	unsigned long setup_count;
 	unsigned int cycle_time;
-	u8 setup_count, arttim = 0;
+	u8 arttim = 0;
 
 	static const u8 setup_values[] = {0x40, 0x40, 0x40, 0x80, 0, 0xc0};
 	static const u8 arttim_regs[4] = {ARTTIM0, ARTTIM1, ARTTIM23, ARTTIM23};
@@ -140,10 +141,11 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
 	if (hwif->channel) {
 		ide_drive_t *pair = ide_get_pair_dev(drive);
 
-		drive->drive_data = setup_count;
+		ide_set_drivedata(drive, (void *)setup_count);
 
 		if (pair)
-			setup_count = max_t(u8, setup_count, pair->drive_data);
+			setup_count = max_t(u8, setup_count,
+					(unsigned long)ide_get_drivedata(pair));
 	}
 
 	if (setup_count > 5)		/* shouldn't actually happen... */
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
index 0332a95eefd..9623b852c61 100644
--- a/drivers/ide/cs5536.c
+++ b/drivers/ide/cs5536.c
@@ -146,14 +146,16 @@ static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	int cshift = (drive->dn & 1) ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
+	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 cast;
 	u8 cmd_pio = pio;
 
 	if (pair)
 		cmd_pio = min(pio, ide_get_best_pio_mode(pair, 255, 4));
 
-	drive->drive_data &= (IDE_DRV_MASK << 8);
-	drive->drive_data |= drv_timings[pio];
+	timings &= (IDE_DRV_MASK << 8);
+	timings |= drv_timings[pio];
+	ide_set_drivedata(drive, (void *)timings);
 
 	cs5536_program_dtc(drive, drv_timings[pio]);
 
@@ -186,6 +188,7 @@ static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
 
 	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
 	int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT;
+	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 etc;
 
 	cs5536_read(pdev, ETC, &etc);
@@ -195,8 +198,9 @@ static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		etc |= udma_timings[mode - XFER_UDMA_0] << dshift;
 	} else { /* MWDMA */
 		etc &= ~(IDE_ETC_UDMA_MASK << dshift);
-		drive->drive_data &= IDE_DRV_MASK;
-		drive->drive_data |= mwdma_timings[mode - XFER_MW_DMA_0] << 8;
+		timings &= IDE_DRV_MASK;
+		timings |= mwdma_timings[mode - XFER_MW_DMA_0] << 8;
+		ide_set_drivedata(drive, (void *)timings);
 	}
 
 	cs5536_write(pdev, ETC, etc);
@@ -204,9 +208,11 @@ static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
 
 static void cs5536_dma_start(ide_drive_t *drive)
 {
+	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
+
 	if (drive->current_speed < XFER_UDMA_0 &&
-	    (drive->drive_data >> 8) != (drive->drive_data & IDE_DRV_MASK))
-		cs5536_program_dtc(drive, drive->drive_data >> 8);
+	    (timings >> 8) != (timings & IDE_DRV_MASK))
+		cs5536_program_dtc(drive, timings >> 8);
 
 	ide_dma_start(drive);
 }
@@ -214,10 +220,11 @@ static void cs5536_dma_start(ide_drive_t *drive)
 static int cs5536_dma_end(ide_drive_t *drive)
 {
 	int ret = ide_dma_end(drive);
+	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 
 	if (drive->current_speed < XFER_UDMA_0 &&
-	    (drive->drive_data >> 8) != (drive->drive_data & IDE_DRV_MASK))
-		cs5536_program_dtc(drive, drive->drive_data & IDE_DRV_MASK);
+	    (timings >> 8) != (timings & IDE_DRV_MASK))
+		cs5536_program_dtc(drive, timings & IDE_DRV_MASK);
 
 	return ret;
 }
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index 2fb0f296500..aafed8060e1 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -44,7 +44,12 @@
  *    bit3 (0x08): "1" 3 cycle time, "0" 2 cycle time	      (?)
  */
 #define HT_CONFIG_PORT	  0x3e6
-#define HT_CONFIG(drivea) (u8)(((drivea)->drive_data & 0xff00) >> 8)
+
+static inline u8 HT_CONFIG(ide_drive_t *drive)
+{
+	return ((unsigned long)ide_get_drivedata(drive) & 0xff00) >> 8;
+}
+
 /*
  * FIFO + PREFETCH (both a/b-model)
  */
@@ -90,7 +95,11 @@
  * Active Time for each drive. Smaller value gives higher speed.
  * In case of failures you should probably fall back to a higher value.
  */
-#define HT_TIMING(drivea) (u8)((drivea)->drive_data & 0x00ff)
+static inline u8 HT_TIMING(ide_drive_t *drive)
+{
+	return (unsigned long)ide_get_drivedata(drive) & 0x00ff;
+}
+
 #define HT_TIMING_DEFAULT 0xff
 
 /*
@@ -242,23 +251,27 @@ static DEFINE_SPINLOCK(ht6560b_lock);
  */
 static void ht_set_prefetch(ide_drive_t *drive, u8 state)
 {
-	unsigned long flags;
+	unsigned long flags, config;
 	int t = HT_PREFETCH_MODE << 8;
 
 	spin_lock_irqsave(&ht6560b_lock, flags);
 
+	config = (unsigned long)ide_get_drivedata(drive);
+
 	/*
 	 *  Prefetch mode and unmask irq seems to conflict
 	 */
 	if (state) {
-		drive->drive_data |= t;   /* enable prefetch mode */
+		config |= t;   /* enable prefetch mode */
 		drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
 		drive->dev_flags &= ~IDE_DFLAG_UNMASK;
 	} else {
-		drive->drive_data &= ~t;  /* disable prefetch mode */
+		config &= ~t;  /* disable prefetch mode */
 		drive->dev_flags &= ~IDE_DFLAG_NO_UNMASK;
 	}
 
+	ide_set_drivedata(drive, (void *)config);
+
 	spin_unlock_irqrestore(&ht6560b_lock, flags);
 
 #ifdef DEBUG
@@ -268,7 +281,7 @@ static void ht_set_prefetch(ide_drive_t *drive, u8 state)
 
 static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	unsigned long flags;
+	unsigned long flags, config;
 	u8 timing;
 	
 	switch (pio) {
@@ -281,8 +294,10 @@ static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	timing = ht_pio2timings(drive, pio);
 
 	spin_lock_irqsave(&ht6560b_lock, flags);
-	drive->drive_data &= 0xff00;
-	drive->drive_data |= timing;
+	config = (unsigned long)ide_get_drivedata(drive);
+	config &= 0xff00;
+	config |= timing;
+	ide_set_drivedata(drive, (void *)config);
 	spin_unlock_irqrestore(&ht6560b_lock, flags);
 
 #ifdef DEBUG
@@ -299,7 +314,7 @@ static void __init ht6560b_init_dev(ide_drive_t *drive)
 	if (hwif->channel)
 		t |= (HT_SECONDARY_IF << 8);
 
-	drive->drive_data = t;
+	ide_set_drivedata(drive, (void *)t);
 }
 
 static int probe_ht6560b;
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 5af3d0ffaf0..0f67f1abbbd 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -187,7 +187,8 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
  */
 static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
 {
-	int cycle_time, use_dma_info = 0;
+	unsigned long cycle_time;
+	int use_dma_info = 0;
 
 	switch (xfer_mode) {
 	case XFER_MW_DMA_2:
@@ -218,10 +219,11 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
 	if (use_dma_info && drive->id[ATA_ID_EIDE_DMA_TIME] > cycle_time)
 		cycle_time = drive->id[ATA_ID_EIDE_DMA_TIME];
 
-	drive->drive_data = cycle_time;
+	ide_set_drivedata(drive, (void *)cycle_time);
 
 	printk("%s: %s selected (peak %dMB/s)\n", drive->name,
-		ide_xfer_verbose(xfer_mode), 2000 / drive->drive_data);
+		ide_xfer_verbose(xfer_mode),
+		2000 / (unsigned long)ide_get_drivedata(drive));
 }
 
 static const struct ide_port_ops icside_v6_port_ops = {
@@ -277,7 +279,7 @@ static int icside_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
 	/*
 	 * Select the correct timing for this drive.
 	 */
-	set_dma_speed(ec->dma, drive->drive_data);
+	set_dma_speed(ec->dma, (unsigned long)ide_get_drivedata(drive));
 
 	/*
 	 * Tell the DMA engine about the SG table and
diff --git a/drivers/ide/opti621.c b/drivers/ide/opti621.c
index 6048eda3cd6..f1d70d6630f 100644
--- a/drivers/ide/opti621.c
+++ b/drivers/ide/opti621.c
@@ -138,6 +138,7 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	unsigned long flags;
+	unsigned long mode = XFER_PIO_0 + pio, pair_mode;
 	u8 tim, misc, addr_pio = pio, clk;
 
 	/* DRDY is default 2 (by OPTi Databook) */
@@ -150,11 +151,12 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		{ 0x48, 0x34, 0x21, 0x10, 0x10 }	/* 25 MHz */
 	};
 
-	drive->drive_data = XFER_PIO_0 + pio;
+	ide_set_drivedata(drive, (void *)mode);
 
 	if (pair) {
-		if (pair->drive_data && pair->drive_data < drive->drive_data)
-			addr_pio = pair->drive_data - XFER_PIO_0;
+		pair_mode = (unsigned long)ide_get_drivedata(pair);
+		if (pair_mode && pair_mode < mode)
+			addr_pio = pair_mode - XFER_PIO_0;
 	}
 
 	spin_lock_irqsave(&opti621_lock, flags);
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index c9a13498689..74696edc8d1 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -180,8 +180,11 @@ static int qd_find_disk_type (ide_drive_t *drive,
 
 static void qd_set_timing (ide_drive_t *drive, u8 timing)
 {
-	drive->drive_data &= 0xff00;
-	drive->drive_data |= timing;
+	unsigned long data = (unsigned long)ide_get_drivedata(drive);
+
+	data &= 0xff00;
+	data |= timing;
+	ide_set_drivedata(drive, (void *)data);
 
 	printk(KERN_DEBUG "%s: %#x\n", drive->name, timing);
 }
@@ -292,7 +295,7 @@ static void __init qd6500_init_dev(ide_drive_t *drive)
 	u8 base = (hwif->config_data & 0xff00) >> 8;
 	u8 config = QD_CONFIG(hwif);
 
-	drive->drive_data = QD6500_DEF_DATA;
+	ide_set_drivedata(drive, (void *)QD6500_DEF_DATA);
 }
 
 static void __init qd6580_init_dev(ide_drive_t *drive)
@@ -308,7 +311,7 @@ static void __init qd6580_init_dev(ide_drive_t *drive)
 	} else
 		t2 = t1 = hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA;
 
-	drive->drive_data = (drive->dn & 1) ? t2 : t1;
+	ide_set_drivedata(drive, (void *)((drive->dn & 1) ? t2 : t1));
 }
 
 static const struct ide_tp_ops qd65xx_tp_ops = {
diff --git a/drivers/ide/qd65xx.h b/drivers/ide/qd65xx.h
index d7e67a1a1dc..1fba2a5f281 100644
--- a/drivers/ide/qd65xx.h
+++ b/drivers/ide/qd65xx.h
@@ -31,8 +31,15 @@
 
 #define QD_CONFIG(hwif)		((hwif)->config_data & 0x00ff)
 
-#define QD_TIMING(drive)	(u8)(((drive)->drive_data) & 0x00ff)
-#define QD_TIMREG(drive)	(u8)((((drive)->drive_data) & 0xff00) >> 8)
+static inline u8 QD_TIMING(ide_drive_t *drive)
+{
+	return (unsigned long)ide_get_drivedata(drive) & 0x00ff;
+}
+
+static inline u8 QD_TIMREG(ide_drive_t *drive)
+{
+	return ((unsigned long)ide_get_drivedata(drive) & 0xff00) >> 8;
+}
 
 #define QD6500_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08))
 #define QD6580_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00))
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 6246bea585c..d698da470d6 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -73,6 +73,7 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
 static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	struct pci_dev *dev	= to_pci_dev(drive->hwif->dev);
+	unsigned long timings	= (unsigned long)ide_get_drivedata(drive);
 	int reg			= 0x44 + drive->dn * 4;
 	u16 drv_ctrl;
 
@@ -82,8 +83,9 @@ static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	 * Store the PIO timings so that we can restore them
 	 * in case DMA will be turned off...
 	 */
-	drive->drive_data &= 0xffff0000;
-	drive->drive_data |= drv_ctrl;
+	timings &= 0xffff0000;
+	timings |= drv_ctrl;
+	ide_set_drivedata(drive, (void *)timings);
 
 	pci_write_config_word(dev, reg,  drv_ctrl);
 	pci_read_config_word (dev, reg, &drv_ctrl);
@@ -99,6 +101,7 @@ static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio)
 static void sl82c105_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
 	static u16 mwdma_timings[] = {0x0707, 0x0201, 0x0200};
+	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u16 drv_ctrl;
 
  	DBG(("sl82c105_tune_chipset(drive:%s, speed:%s)\n",
@@ -110,8 +113,9 @@ static void sl82c105_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	 * Store the DMA timings so that we can actually program
 	 * them when DMA will be turned on...
 	 */
-	drive->drive_data &= 0x0000ffff;
-	drive->drive_data |= (unsigned long)drv_ctrl << 16;
+	timings &= 0x0000ffff;
+	timings |= (unsigned long)drv_ctrl << 16;
+	ide_set_drivedata(drive, (void *)timings);
 }
 
 static int sl82c105_test_irq(ide_hwif_t *hwif)
@@ -194,7 +198,8 @@ static void sl82c105_dma_start(ide_drive_t *drive)
 
 	DBG(("%s(drive:%s)\n", __func__, drive->name));
 
-	pci_write_config_word(dev, reg, drive->drive_data >> 16);
+	pci_write_config_word(dev, reg,
+			      (unsigned long)ide_get_drivedata(drive) >> 16);
 
 	sl82c105_reset_host(dev);
 	ide_dma_start(drive);
@@ -219,7 +224,8 @@ static int sl82c105_dma_end(ide_drive_t *drive)
 
 	ret = ide_dma_end(drive);
 
-	pci_write_config_word(dev, reg, drive->drive_data);
+	pci_write_config_word(dev, reg,
+			      (unsigned long)ide_get_drivedata(drive));
 
 	return ret;
 }
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 08c91e21cf4..95c6e00a72e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -532,7 +532,7 @@ struct ide_drive_s {
 
 	unsigned int	bios_cyl;	/* BIOS/fdisk/LILO number of cyls */
 	unsigned int	cyl;		/* "real" number of cyls */
-	unsigned int	drive_data;	/* used by set_pio_mode/dev_select() */
+	void		*drive_data;	/* used by set_pio_mode/dev_select() */
 	unsigned int	failures;	/* current failure count */
 	unsigned int	max_failures;	/* maximum allowed failure count */
 	u64		probed_capacity;/* initial/native media capacity */
@@ -1550,6 +1550,16 @@ static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
 	return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL;
 }
 
+static inline void *ide_get_drivedata(ide_drive_t *drive)
+{
+	return drive->drive_data;
+}
+
+static inline void ide_set_drivedata(ide_drive_t *drive, void *data)
+{
+	drive->drive_data = data;
+}
+
 #define ide_port_for_each_dev(i, dev, port) \
 	for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++)
 
-- 
cgit v1.2.3-70-g09d2


From 3959214f971417f4162926ac52ad4cd042958caa Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 24 Mar 2009 15:43:30 +0100
Subject: sched: delayed cleanup of user_struct

During bootup performance tracing we see repeated occurrences of
/sys/kernel/uid/* events for the same uid, leading to a,
in this case, rather pointless userspace processing for the
same uid over and over.

This is usually caused by tools which change their uid to "nobody",
to run without privileges to read data supplied by untrusted users.

This change delays the execution of the (already existing) scheduled
work, to cleanup the uid after one second, so the allocated and announced
uid can possibly be re-used by another process.

This is the current behavior, where almost every invocation of a
binary, which changes the uid, creates two events:
  $ read START < /sys/kernel/uevent_seqnum; \
  for i in `seq 100`; do su --shell=/bin/true bin; done; \
  read END < /sys/kernel/uevent_seqnum; \
  echo $(($END - $START))
  178

With the delayed cleanup, we get only two events, and userspace finishes
a bit faster too:
  $ read START < /sys/kernel/uevent_seqnum; \
  for i in `seq 100`; do su --shell=/bin/true bin; done; \
  read END < /sys/kernel/uevent_seqnum; \
  echo $(($END - $START))
  1

Acked-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sched.h |  2 +-
 kernel/user.c         | 67 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 40 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c900aa53007..7531b1c2820 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,7 +674,7 @@ struct user_struct {
 	struct task_group *tg;
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;
-	struct work_struct work;
+	struct delayed_work work;
 #endif
 #endif
 
diff --git a/kernel/user.c b/kernel/user.c
index 850e0ba41c1..2c000e7132a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -75,21 +75,6 @@ static void uid_hash_remove(struct user_struct *up)
 	put_user_ns(up->user_ns);
 }
 
-static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
-{
-	struct user_struct *user;
-	struct hlist_node *h;
-
-	hlist_for_each_entry(user, h, hashent, uidhash_node) {
-		if (user->uid == uid) {
-			atomic_inc(&user->__count);
-			return user;
-		}
-	}
-
-	return NULL;
-}
-
 #ifdef CONFIG_USER_SCHED
 
 static void sched_destroy_user(struct user_struct *up)
@@ -119,6 +104,23 @@ static int sched_create_user(struct user_struct *up) { return 0; }
 
 #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
 
+static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
+{
+	struct user_struct *user;
+	struct hlist_node *h;
+
+	hlist_for_each_entry(user, h, hashent, uidhash_node) {
+		if (user->uid == uid) {
+			/* possibly resurrect an "almost deleted" object */
+			if (atomic_inc_return(&user->__count) == 1)
+				cancel_delayed_work(&user->work);
+			return user;
+		}
+	}
+
+	return NULL;
+}
+
 static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
 static DEFINE_MUTEX(uids_mutex);
 
@@ -283,12 +285,12 @@ int __init uids_sysfs_init(void)
 	return uids_user_create(&root_user);
 }
 
-/* work function to remove sysfs directory for a user and free up
+/* delayed work function to remove sysfs directory for a user and free up
  * corresponding structures.
  */
 static void cleanup_user_struct(struct work_struct *w)
 {
-	struct user_struct *up = container_of(w, struct user_struct, work);
+	struct user_struct *up = container_of(w, struct user_struct, work.work);
 	unsigned long flags;
 	int remove_user = 0;
 
@@ -297,15 +299,12 @@ static void cleanup_user_struct(struct work_struct *w)
 	 */
 	uids_mutex_lock();
 
-	local_irq_save(flags);
-
-	if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
+	spin_lock_irqsave(&uidhash_lock, flags);
+	if (atomic_read(&up->__count) == 0) {
 		uid_hash_remove(up);
 		remove_user = 1;
-		spin_unlock_irqrestore(&uidhash_lock, flags);
-	} else {
-		local_irq_restore(flags);
 	}
+	spin_unlock_irqrestore(&uidhash_lock, flags);
 
 	if (!remove_user)
 		goto done;
@@ -331,16 +330,28 @@ done:
  */
 static void free_user(struct user_struct *up, unsigned long flags)
 {
-	/* restore back the count */
-	atomic_inc(&up->__count);
 	spin_unlock_irqrestore(&uidhash_lock, flags);
-
-	INIT_WORK(&up->work, cleanup_user_struct);
-	schedule_work(&up->work);
+	INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
+	schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
 }
 
 #else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
 
+static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
+{
+	struct user_struct *user;
+	struct hlist_node *h;
+
+	hlist_for_each_entry(user, h, hashent, uidhash_node) {
+		if (user->uid == uid) {
+			atomic_inc(&user->__count);
+			return user;
+		}
+	}
+
+	return NULL;
+}
+
 int uids_sysfs_init(void) { return 0; }
 static inline int uids_user_create(struct user_struct *up) { return 0; }
 static inline void uids_mutex_lock(void) { }
-- 
cgit v1.2.3-70-g09d2


From 309b7d60a345f402bec3cf9caadb53de4028e2aa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 24 Apr 2009 14:57:00 +0200
Subject: driver core: add BUS_NOTIFY_UNBOUND_DRIVER event

This patch adds a new bus notifier event which is emitted _after_ a
device is removed from its driver. This event will be used by the
dma-api debug code to check if a driver has released all dma allocations
for that device.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/dd.c      | 4 ++++
 include/linux/device.h | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 742cbe6b042..efd00de183b 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -320,6 +320,10 @@ static void __device_release_driver(struct device *dev)
 		devres_release_all(dev);
 		dev->driver = NULL;
 		klist_remove(&dev->p->knode_driver);
+		if (dev->bus)
+			blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
+						     BUS_NOTIFY_UNBOUND_DRIVER,
+						     dev);
 	}
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index a4a7b10aaa4..4410464b134 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -114,6 +114,8 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 #define BUS_NOTIFY_BOUND_DRIVER		0x00000003 /* driver bound to device */
 #define BUS_NOTIFY_UNBIND_DRIVER	0x00000004 /* driver about to be
 						      unbound */
+#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000005 /* driver is unbound
+						      from the device */
 
 extern struct kset *bus_get_kset(struct bus_type *bus);
 extern struct klist *bus_get_device_klist(struct bus_type *bus);
-- 
cgit v1.2.3-70-g09d2


From c0afe7ba5e71d8ab66bc42f90b3e237581d3c509 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.ml.walleij@gmail.com>
Date: Mon, 27 Apr 2009 02:38:16 +0200
Subject: driver core: Const-correct platform getbyname functions

This converts resource and IRQ getbyname functions for the platform
bus to use const char *, I ran into compiler moanings when I tried
using a const char * for looking up a certain resource.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 5 +++--
 include/linux/platform_device.h | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index ead3f64c41d..59df6290951 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -69,7 +69,8 @@ EXPORT_SYMBOL_GPL(platform_get_irq);
  * @name: resource name
  */
 struct resource *platform_get_resource_byname(struct platform_device *dev,
-					      unsigned int type, char *name)
+					      unsigned int type,
+					      const char *name)
 {
 	int i;
 
@@ -88,7 +89,7 @@ EXPORT_SYMBOL_GPL(platform_get_resource_byname);
  * @dev: platform device
  * @name: IRQ name
  */
-int platform_get_irq_byname(struct platform_device *dev, char *name)
+int platform_get_irq_byname(struct platform_device *dev, const char *name)
 {
 	struct resource *r = platform_get_resource_byname(dev, IORESOURCE_IRQ,
 							  name);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index b67bb5d7b22..8dc5123b630 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -36,8 +36,8 @@ extern struct device platform_bus;
 
 extern struct resource *platform_get_resource(struct platform_device *, unsigned int, unsigned int);
 extern int platform_get_irq(struct platform_device *, unsigned int);
-extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, char *);
-extern int platform_get_irq_byname(struct platform_device *, char *);
+extern struct resource *platform_get_resource_byname(struct platform_device *, unsigned int, const char *);
+extern int platform_get_irq_byname(struct platform_device *, const char *);
 extern int platform_add_devices(struct platform_device **, int);
 
 extern struct platform_device *platform_device_register_simple(const char *, int id,
-- 
cgit v1.2.3-70-g09d2


From 5e8e9245f9bb4c6ed2a2f4af42f2ec9733dc5378 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 27 May 2009 00:49:37 +0200
Subject: firmware: FIRMWARE_NAME_MAX removal

As we're allocating the firmware name dynamically, we no longer need this
definition.
This patch must be applied only after the 5 previous patches from this pacth
set have been applied.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/firmware.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index c8ecf5b2a20..d3154462843 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-#define FIRMWARE_NAME_MAX 30 
 #define FW_ACTION_NOHOTPLUG 0
 #define FW_ACTION_HOTPLUG 1
 
-- 
cgit v1.2.3-70-g09d2


From 6fcf53acccf85b4b0d0260e66c692a341760f464 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Apr 2009 15:23:42 +0200
Subject: Driver Core: add nodename callbacks

This adds the nodename callback for struct class, struct device_type and
struct device, to allow drivers to send userspace hints on the device
name and subdirectory that should be used for it.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/device.h |  3 +++
 2 files changed, 53 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4d59975c24a..7ecb1938e59 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -162,10 +162,18 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	struct device *dev = to_dev(kobj);
 	int retval = 0;
 
-	/* add the major/minor if present */
+	/* add device node properties if present */
 	if (MAJOR(dev->devt)) {
+		const char *tmp;
+		const char *name;
+
 		add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
 		add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
+		name = device_get_nodename(dev, &tmp);
+		if (name) {
+			add_uevent_var(env, "DEVNAME=%s", name);
+			kfree(tmp);
+		}
 	}
 
 	if (dev->type && dev->type->name)
@@ -1128,6 +1136,47 @@ static struct device *next_device(struct klist_iter *i)
 	return dev;
 }
 
+/**
+ * device_get_nodename - path of device node file
+ * @dev: device
+ * @tmp: possibly allocated string
+ *
+ * Return the relative path of a possible device node.
+ * Non-default names may need to allocate a memory to compose
+ * a name. This memory is returned in tmp and needs to be
+ * freed by the caller.
+ */
+const char *device_get_nodename(struct device *dev, const char **tmp)
+{
+	char *s;
+
+	*tmp = NULL;
+
+	/* the device type may provide a specific name */
+	if (dev->type && dev->type->nodename)
+		*tmp = dev->type->nodename(dev);
+	if (*tmp)
+		return *tmp;
+
+	/* the class may provide a specific name */
+	if (dev->class && dev->class->nodename)
+		*tmp = dev->class->nodename(dev);
+	if (*tmp)
+		return *tmp;
+
+	/* return name without allocation, tmp == NULL */
+	if (strchr(dev_name(dev), '!') == NULL)
+		return dev_name(dev);
+
+	/* replace '!' in the name with '/' */
+	*tmp = kstrdup(dev_name(dev), GFP_KERNEL);
+	if (!*tmp)
+		return NULL;
+	while ((s = strchr(*tmp, '!')))
+		s[0] = '/';
+	return *tmp;
+}
+
 /**
  * device_for_each_child - device child iterator.
  * @parent: parent struct device.
diff --git a/include/linux/device.h b/include/linux/device.h
index 4410464b134..ed4e39f2c42 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -194,6 +194,7 @@ struct class {
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
+	char *(*nodename)(struct device *dev);
 
 	void (*class_release)(struct class *class);
 	void (*dev_release)(struct device *dev);
@@ -289,6 +290,7 @@ struct device_type {
 	const char *name;
 	struct attribute_group **groups;
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
+	char *(*nodename)(struct device *dev);
 	void (*release)(struct device *dev);
 
 	struct dev_pm_ops *pm;
@@ -488,6 +490,7 @@ extern struct device *device_find_child(struct device *dev, void *data,
 extern int device_rename(struct device *dev, char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
+extern const char *device_get_nodename(struct device *dev, const char **tmp);
 
 /*
  * Root device objects for grouping under /sys/devices
-- 
cgit v1.2.3-70-g09d2


From d405640539555b601e52f7d18f1f0b1345d18bf5 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Apr 2009 15:23:42 +0200
Subject: Driver Core: misc: add nodename support for misc devices.

This adds support for misc devices to report their requested nodename to
userspace.  It also updates a number of misc drivers to provide the
needed subdirectory and device name to be used for them.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/microcode_core.c |  1 +
 drivers/char/hw_random/core.c    |  1 +
 drivers/char/misc.c              | 15 ++++++++++++---
 drivers/md/dm-ioctl.c            |  1 +
 drivers/net/tun.c                |  1 +
 include/linux/miscdevice.h       |  1 +
 6 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9c4461501fc..9371448290a 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -236,6 +236,7 @@ static const struct file_operations microcode_fops = {
 static struct miscdevice microcode_dev = {
 	.minor			= MICROCODE_MINOR,
 	.name			= "microcode",
+	.devnode		= "cpu/microcode",
 	.fops			= &microcode_fops,
 };
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index e5d583c84e4..fc93e2fc7c7 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -153,6 +153,7 @@ static const struct file_operations rng_chrdev_ops = {
 static struct miscdevice rng_miscdev = {
 	.minor		= RNG_MISCDEV_MINOR,
 	.name		= RNG_MODULE_NAME,
+	.devnode	= "hwrng",
 	.fops		= &rng_chrdev_ops,
 };
 
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index a5e0db9d766..62c99fa59e2 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -168,7 +168,6 @@ static const struct file_operations misc_fops = {
 	.open		= misc_open,
 };
 
-
 /**
  *	misc_register	-	register a miscellaneous device
  *	@misc: device structure
@@ -217,8 +216,8 @@ int misc_register(struct miscdevice * misc)
 		misc_minors[misc->minor >> 3] |= 1 << (misc->minor & 7);
 	dev = MKDEV(MISC_MAJOR, misc->minor);
 
-	misc->this_device = device_create(misc_class, misc->parent, dev, NULL,
-					  "%s", misc->name);
+	misc->this_device = device_create(misc_class, misc->parent, dev,
+					  misc, "%s", misc->name);
 	if (IS_ERR(misc->this_device)) {
 		err = PTR_ERR(misc->this_device);
 		goto out;
@@ -264,6 +263,15 @@ int misc_deregister(struct miscdevice *misc)
 EXPORT_SYMBOL(misc_register);
 EXPORT_SYMBOL(misc_deregister);
 
+static char *misc_nodename(struct device *dev)
+{
+	struct miscdevice *c = dev_get_drvdata(dev);
+
+	if (c->devnode)
+		return kstrdup(c->devnode, GFP_KERNEL);
+	return NULL;
+}
+
 static int __init misc_init(void)
 {
 	int err;
@@ -279,6 +287,7 @@ static int __init misc_init(void)
 	err = -EIO;
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops))
 		goto fail_printk;
+	misc_class->nodename = misc_nodename;
 	return 0;
 
 fail_printk:
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 823ceba6efa..1128d3fba79 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1513,6 +1513,7 @@ static const struct file_operations _ctl_fops = {
 static struct miscdevice _dm_misc = {
 	.minor 		= MISC_DYNAMIC_MINOR,
 	.name  		= DM_NAME,
+	.devnode	= "mapper/control",
 	.fops  		= &_ctl_fops
 };
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 811d3517fce..11a0ba47b67 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1366,6 +1366,7 @@ static const struct file_operations tun_fops = {
 static struct miscdevice tun_miscdev = {
 	.minor = TUN_MINOR,
 	.name = "tun",
+	.devnode = "net/tun",
 	.fops = &tun_fops,
 };
 
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index beb6ec99cfe..05211774462 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -41,6 +41,7 @@ struct miscdevice  {
 	struct list_head list;
 	struct device *parent;
 	struct device *this_device;
+	const char *devnode;
 };
 
 extern int misc_register(struct miscdevice * misc);
-- 
cgit v1.2.3-70-g09d2


From f7a386c5b8ff34cd84ae922603d1c6f9d234edee Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Apr 2009 15:23:42 +0200
Subject: Driver Core: usb: add nodename support for usb drivers.

This adds support for USB drivers to report their requested nodename to
userspace.  It also updates a number of USB drivers to provide the
needed subdirectory and device name to be used for them.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/hid/usbhid/hiddev.c     |  7 ++++++-
 drivers/media/video/dabusb.c    |  6 ++++++
 drivers/usb/class/usblp.c       |  6 ++++++
 drivers/usb/core/file.c         | 13 ++++++++++++-
 drivers/usb/core/usb.c          | 11 +++++++++++
 drivers/usb/misc/iowarrior.c    |  6 ++++++
 drivers/usb/misc/legousbtower.c |  6 ++++++
 include/linux/usb.h             |  3 +++
 8 files changed, 56 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index e9b436d2d94..9e9421525fb 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -850,8 +850,14 @@ static const struct file_operations hiddev_fops = {
 #endif
 };
 
+static char *hiddev_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
+}
+
 static struct usb_class_driver hiddev_class = {
 	.name =		"hiddev%d",
+	.nodename =	hiddev_nodename,
 	.fops =		&hiddev_fops,
 	.minor_base =	HIDDEV_MINOR_BASE,
 };
@@ -955,7 +961,6 @@ static int hiddev_usbd_probe(struct usb_interface *intf,
 	return -ENODEV;
 }
 
-
 static /* const */ struct usb_driver hiddev_driver = {
 	.name =		"hiddev",
 	.probe =	hiddev_usbd_probe,
diff --git a/drivers/media/video/dabusb.c b/drivers/media/video/dabusb.c
index ba3709bec3f..ec2f45dde16 100644
--- a/drivers/media/video/dabusb.c
+++ b/drivers/media/video/dabusb.c
@@ -747,8 +747,14 @@ static const struct file_operations dabusb_fops =
 	.release =	dabusb_release,
 };
 
+static char *dabusb_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
+}
+
 static struct usb_class_driver dabusb_class = {
 	.name =		"dabusb%d",
+	.nodename =	dabusb_nodename,
 	.fops =		&dabusb_fops,
 	.minor_base =	DABUSB_MINOR,
 };
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index d2747a49b97..26c09f0257d 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -1057,8 +1057,14 @@ static const struct file_operations usblp_fops = {
 	.release =	usblp_release,
 };
 
+static char *usblp_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
+}
+
 static struct usb_class_driver usblp_class = {
 	.name =		"lp%d",
+	.nodename =	usblp_nodename,
 	.fops =		&usblp_fops,
 	.minor_base =	USBLP_MINOR_BASE,
 };
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 997e659ff69..5cef88929b3 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -67,6 +67,16 @@ static struct usb_class {
 	struct class *class;
 } *usb_class;
 
+static char *usb_nodename(struct device *dev)
+{
+	struct usb_class_driver *drv;
+
+	drv = dev_get_drvdata(dev);
+	if (!drv || !drv->nodename)
+		return NULL;
+	return drv->nodename(dev);
+}
+
 static int init_usb_class(void)
 {
 	int result = 0;
@@ -90,6 +100,7 @@ static int init_usb_class(void)
 		kfree(usb_class);
 		usb_class = NULL;
 	}
+	usb_class->class->nodename = usb_nodename;
 
 exit:
 	return result;
@@ -198,7 +209,7 @@ int usb_register_dev(struct usb_interface *intf,
 	else
 		temp = name;
 	intf->usb_dev = device_create(usb_class->class, &intf->dev,
-				      MKDEV(USB_MAJOR, minor), NULL,
+				      MKDEV(USB_MAJOR, minor), class_driver,
 				      "%s", temp);
 	if (IS_ERR(intf->usb_dev)) {
 		down_write(&minor_rwsem);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 7eee400d3e3..927a27dd2f8 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -305,10 +305,21 @@ static struct dev_pm_ops usb_device_pm_ops = {
 
 #endif	/* CONFIG_PM */
 
+
+static char *usb_nodename(struct device *dev)
+{
+	struct usb_device *usb_dev;
+
+	usb_dev = to_usb_device(dev);
+	return kasprintf(GFP_KERNEL, "bus/usb/%03d/%03d",
+			 usb_dev->bus->busnum, usb_dev->devnum);
+}
+
 struct device_type usb_device_type = {
 	.name =		"usb_device",
 	.release =	usb_release_dev,
 	.uevent =	usb_dev_uevent,
+	.nodename = 	usb_nodename,
 	.pm =		&usb_device_pm_ops,
 };
 
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index a4ef77ef917..3c5fe5cee05 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -726,12 +726,18 @@ static const struct file_operations iowarrior_fops = {
 	.poll = iowarrior_poll,
 };
 
+static char *iowarrior_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
+}
+
 /*
  * usb class driver info in order to get a minor number from the usb core,
  * and to have the device registered with devfs and the driver core
  */
 static struct usb_class_driver iowarrior_class = {
 	.name = "iowarrior%d",
+	.nodename = iowarrior_nodename,
 	.fops = &iowarrior_fops,
 	.minor_base = IOWARRIOR_MINOR_BASE,
 };
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index ab0f3226158..c1e2433f640 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -266,12 +266,18 @@ static const struct file_operations tower_fops = {
 	.llseek =	tower_llseek,
 };
 
+static char *legousbtower_nodename(struct device *dev)
+{
+	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
+}
+
 /*
  * usb class driver info in order to get a minor number from the usb core,
  * and to have the device registered with the driver core
  */
 static struct usb_class_driver tower_class = {
 	.name =		"legousbtower%d",
+	.nodename = 	legousbtower_nodename,
 	.fops =		&tower_fops,
 	.minor_base =	LEGO_USB_TOWER_MINOR_BASE,
 };
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3aa2cd1f8d0..34cdfcac455 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -869,6 +869,8 @@ struct usb_driver {
  * struct usb_device_driver - identifies USB device driver to usbcore
  * @name: The driver name should be unique among USB drivers,
  *	and should normally be the same as the module name.
+ * @nodename: Callback to provide a naming hint for a possible
+ *	device node to create.
  * @probe: Called to see if the driver is willing to manage a particular
  *	device.  If it is, probe returns zero and uses dev_set_drvdata()
  *	to associate driver-specific data with the device.  If unwilling
@@ -912,6 +914,7 @@ extern struct bus_type usb_bus_type;
  */
 struct usb_class_driver {
 	char *name;
+	char *(*nodename)(struct device *dev);
 	const struct file_operations *fops;
 	int minor_base;
 };
-- 
cgit v1.2.3-70-g09d2


From b03f38b685e2e1db174fb8982930e789a516f414 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Apr 2009 15:23:42 +0200
Subject: Driver Core: block: add nodename support for block drivers.

This adds support for block drivers to report their requested nodename
to userspace.  It also updates a number of block drivers to provide the
needed subdirectory and device name to be used for them.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 block/genhd.c           | 10 ++++++++++
 drivers/block/pktcdvd.c |  7 +++++++
 include/linux/genhd.h   |  2 +-
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index fe7ccc0a618..f4c64c2b303 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -996,10 +996,20 @@ struct class block_class = {
 	.name		= "block",
 };
 
+static char *block_nodename(struct device *dev)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (disk->nodename)
+		return disk->nodename(disk);
+	return NULL;
+}
+
 static struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
+	.nodename	= block_nodename,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d57f1175948..37e0f81cada 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2855,6 +2855,11 @@ static struct block_device_operations pktcdvd_ops = {
 	.media_changed =	pkt_media_changed,
 };
 
+static char *pktcdvd_nodename(struct gendisk *gd)
+{
+	return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
+}
+
 /*
  * Set up mapping from pktcdvd device to CD-ROM device.
  */
@@ -2907,6 +2912,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	disk->fops = &pktcdvd_ops;
 	disk->flags = GENHD_FL_REMOVABLE;
 	strcpy(disk->disk_name, pd->name);
+	disk->nodename = pktcdvd_nodename;
 	disk->private_data = pd;
 	disk->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!disk->queue)
@@ -3062,6 +3068,7 @@ static const struct file_operations pkt_ctl_fops = {
 static struct miscdevice pkt_misc = {
 	.minor 		= MISC_DYNAMIC_MINOR,
 	.name  		= DRIVER_NAME,
+	.name  		= "pktcdvd/control",
 	.fops  		= &pkt_ctl_fops
 };
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7cbd38d363a..45fc320a53c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -142,7 +142,7 @@ struct gendisk {
                                          * disks that can't be partitioned. */
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
-
+	char *(*nodename)(struct gendisk *gd);
 	/* Array of pointers to partitions indexed by partno.
 	 * Protected with matching bdev lock but stat and other
 	 * non-critical accesses use RCU.  Always access through
-- 
cgit v1.2.3-70-g09d2


From 4b9d0d3b81ec0900971cbe8aba8ba0ae9c08a087 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 30 Apr 2009 14:43:31 -0700
Subject: eisa: remove driver_data direct access of struct device

In the near future, the driver core is going to not allow direct access
to the driver_data pointer in struct device.  Instead, the functions
dev_get_drvdata() and dev_set_drvdata() should be used.  These functions
have been around since the beginning, so are backwards compatible with
all older kernel versions.


Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/eisa/pci_eisa.c     | 2 +-
 drivers/eisa/virtual_root.c | 2 +-
 include/linux/eisa.h        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 74edb1d0110..0dd0f633b18 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -31,11 +31,11 @@ static int __init pci_eisa_init(struct pci_dev *pdev,
 	}
 
 	pci_eisa_root.dev              = &pdev->dev;
-	pci_eisa_root.dev->driver_data = &pci_eisa_root;
 	pci_eisa_root.res	       = pdev->bus->resource[0];
 	pci_eisa_root.bus_base_addr    = pdev->bus->resource[0]->start;
 	pci_eisa_root.slots	       = EISA_MAX_SLOTS;
 	pci_eisa_root.dma_mask         = pdev->dma_mask;
+	dev_set_drvdata(pci_eisa_root.dev, &pci_eisa_root);
 
 	if (eisa_root_register (&pci_eisa_root)) {
 		printk (KERN_ERR "pci_eisa : Could not register EISA root\n");
diff --git a/drivers/eisa/virtual_root.c b/drivers/eisa/virtual_root.c
index 3074879f231..535e4f9c83f 100644
--- a/drivers/eisa/virtual_root.c
+++ b/drivers/eisa/virtual_root.c
@@ -57,7 +57,7 @@ static int __init virtual_eisa_root_init (void)
 
 	eisa_bus_root.force_probe = force_probe;
 	
-	eisa_root_dev.dev.driver_data = &eisa_bus_root;
+	dev_set_drvdata(&eisa_root_dev.dev, &eisa_bus_root);
 
 	if (eisa_root_register (&eisa_bus_root)) {
 		/* A real bridge may have been registered before
diff --git a/include/linux/eisa.h b/include/linux/eisa.h
index e61c0be2a45..6925249a5ac 100644
--- a/include/linux/eisa.h
+++ b/include/linux/eisa.h
@@ -78,12 +78,12 @@ static inline void eisa_driver_unregister (struct eisa_driver *edrv) { }
 /* Mimics pci.h... */
 static inline void *eisa_get_drvdata (struct eisa_device *edev)
 {
-        return edev->dev.driver_data;
+        return dev_get_drvdata(&edev->dev);
 }
 
 static inline void eisa_set_drvdata (struct eisa_device *edev, void *data)
 {
-        edev->dev.driver_data = data;
+        dev_set_drvdata(&edev->dev, data);
 }
 
 /* The EISA root device. There's rumours about machines with multiple
-- 
cgit v1.2.3-70-g09d2


From 156f5a7801195fa2ce44aeeb62d6cf8468f3332a Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Tue, 2 Jun 2009 15:01:37 +0900
Subject: debugfs: Fix terminology inconsistency of dir name to mount debugfs
 filesystem.

Many developers use "/debug/" or "/debugfs/" or "/sys/kernel/debug/"
directory name to mount debugfs filesystem for ftrace according to
./Documentation/tracers/ftrace.txt file.

And, three directory names(ex:/debug/, /debugfs/, /sys/kernel/debug/) is
existed in kernel source like ftrace, DRM, Wireless, Documentation,
Network[sky2]files to mount debugfs filesystem.

debugfs means debug filesystem for debugging easy to use by greg kroah
hartman. "/sys/kernel/debug/" name is suitable as directory name
of debugfs filesystem.
- debugfs related reference: http://lwn.net/Articles/334546/

Fix inconsistency of directory name to mount debugfs filesystem.

* From Steven Rostedt
  - find_debugfs() and tracing_files() in this patch.

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Acked-by     : Inaky Perez-Gonzalez <inaky@linux.intel.com>
Reviewed-by  : Steven Rostedt <rostedt@goodmis.org>
Reviewed-by  : James Smart <james.smart@emulex.com>
CC: Jiri Kosina <trivial@kernel.org>
CC: David Airlie <airlied@linux.ie>
CC: Peter Osterlund <petero2@telia.com>
CC: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
CC: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
CC: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/debugobjects.tmpl           |   2 +-
 Documentation/cdrom/packet-writing.txt            |   2 +-
 Documentation/fault-injection/fault-injection.txt |  70 +++----
 Documentation/kprobes.txt                         |   6 +-
 Documentation/trace/ftrace.txt                    | 233 +++++++++++++---------
 Documentation/trace/mmiotrace.txt                 |  26 +--
 drivers/block/pktcdvd.c                           |   2 +-
 drivers/gpu/drm/drm_debugfs.c                     |  12 +-
 drivers/gpu/drm/drm_drv.c                         |   2 +-
 drivers/gpu/drm/drm_stub.c                        |   2 +-
 drivers/net/Kconfig                               |   4 +-
 drivers/net/wimax/i2400m/i2400m.h                 |   2 +-
 drivers/net/wireless/ath/ath5k/Kconfig            |   5 +-
 drivers/net/wireless/libertas/README              |  12 +-
 drivers/scsi/lpfc/lpfc_debugfs.c                  |   3 +-
 include/linux/kernel.h                            |   2 +-
 include/linux/tracepoint.h                        |   4 +-
 kernel/trace/Kconfig                              |  10 +-
 kernel/trace/trace.c                              |  23 +--
 scripts/tracing/draw_functrace.py                 |   7 +-
 20 files changed, 238 insertions(+), 191 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
index 7f5f218015f..08ff908aa7a 100644
--- a/Documentation/DocBook/debugobjects.tmpl
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -106,7 +106,7 @@
       number of errors are printk'ed including a full stack trace.
     </para>
     <para>
-      The statistics are available via debugfs/debug_objects/stats.
+      The statistics are available via /sys/kernel/debug/debug_objects/stats.
       They provide information about the number of warnings and the
       number of successful fixups along with information about the
       usage of the internal tracking objects and the state of the
diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt
index cf1f8126991..1c407778c8b 100644
--- a/Documentation/cdrom/packet-writing.txt
+++ b/Documentation/cdrom/packet-writing.txt
@@ -117,7 +117,7 @@ Using the pktcdvd debugfs interface
 
 To read pktcdvd device infos in human readable form, do:
 
-	# cat /debug/pktcdvd/pktcdvd[0-7]/info
+	# cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
 
 For a description of the debugfs interface look into the file:
 
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 4bc374a1434..07930564079 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -29,16 +29,16 @@ o debugfs entries
 fault-inject-debugfs kernel module provides some debugfs entries for runtime
 configuration of fault-injection capabilities.
 
-- /debug/fail*/probability:
+- /sys/kernel/debug/fail*/probability:
 
 	likelihood of failure injection, in percent.
 	Format: <percent>
 
 	Note that one-failure-per-hundred is a very high error rate
 	for some testcases.  Consider setting probability=100 and configure
-	/debug/fail*/interval for such testcases.
+	/sys/kernel/debug/fail*/interval for such testcases.
 
-- /debug/fail*/interval:
+- /sys/kernel/debug/fail*/interval:
 
 	specifies the interval between failures, for calls to
 	should_fail() that pass all the other tests.
@@ -46,18 +46,18 @@ configuration of fault-injection capabilities.
 	Note that if you enable this, by setting interval>1, you will
 	probably want to set probability=100.
 
-- /debug/fail*/times:
+- /sys/kernel/debug/fail*/times:
 
 	specifies how many times failures may happen at most.
 	A value of -1 means "no limit".
 
-- /debug/fail*/space:
+- /sys/kernel/debug/fail*/space:
 
 	specifies an initial resource "budget", decremented by "size"
 	on each call to should_fail(,size).  Failure injection is
 	suppressed until "space" reaches zero.
 
-- /debug/fail*/verbose
+- /sys/kernel/debug/fail*/verbose
 
 	Format: { 0 | 1 | 2 }
 	specifies the verbosity of the messages when failure is
@@ -65,17 +65,17 @@ configuration of fault-injection capabilities.
 	log line per failure; '2' will print a call trace too -- useful
 	to debug the problems revealed by fault injection.
 
-- /debug/fail*/task-filter:
+- /sys/kernel/debug/fail*/task-filter:
 
 	Format: { 'Y' | 'N' }
 	A value of 'N' disables filtering by process (default).
 	Any positive value limits failures to only processes indicated by
 	/proc/<pid>/make-it-fail==1.
 
-- /debug/fail*/require-start:
-- /debug/fail*/require-end:
-- /debug/fail*/reject-start:
-- /debug/fail*/reject-end:
+- /sys/kernel/debug/fail*/require-start:
+- /sys/kernel/debug/fail*/require-end:
+- /sys/kernel/debug/fail*/reject-start:
+- /sys/kernel/debug/fail*/reject-end:
 
 	specifies the range of virtual addresses tested during
 	stacktrace walking.  Failure is injected only if some caller
@@ -84,26 +84,26 @@ configuration of fault-injection capabilities.
 	Default required range is [0,ULONG_MAX) (whole of virtual address space).
 	Default rejected range is [0,0).
 
-- /debug/fail*/stacktrace-depth:
+- /sys/kernel/debug/fail*/stacktrace-depth:
 
 	specifies the maximum stacktrace depth walked during search
 	for a caller within [require-start,require-end) OR
 	[reject-start,reject-end).
 
-- /debug/fail_page_alloc/ignore-gfp-highmem:
+- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem:
 
 	Format: { 'Y' | 'N' }
 	default is 'N', setting it to 'Y' won't inject failures into
 	highmem/user allocations.
 
-- /debug/failslab/ignore-gfp-wait:
-- /debug/fail_page_alloc/ignore-gfp-wait:
+- /sys/kernel/debug/failslab/ignore-gfp-wait:
+- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
 
 	Format: { 'Y' | 'N' }
 	default is 'N', setting it to 'Y' will inject failures
 	only into non-sleep allocations (GFP_ATOMIC allocations).
 
-- /debug/fail_page_alloc/min-order:
+- /sys/kernel/debug/fail_page_alloc/min-order:
 
 	specifies the minimum page allocation order to be injected
 	failures.
@@ -166,13 +166,13 @@ o Inject slab allocation failures into module init/exit code
 #!/bin/bash
 
 FAILTYPE=failslab
-echo Y > /debug/$FAILTYPE/task-filter
-echo 10 > /debug/$FAILTYPE/probability
-echo 100 > /debug/$FAILTYPE/interval
-echo -1 > /debug/$FAILTYPE/times
-echo 0 > /debug/$FAILTYPE/space
-echo 2 > /debug/$FAILTYPE/verbose
-echo 1 > /debug/$FAILTYPE/ignore-gfp-wait
+echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
 
 faulty_system()
 {
@@ -217,20 +217,20 @@ then
 	exit 1
 fi
 
-cat /sys/module/$module/sections/.text > /debug/$FAILTYPE/require-start
-cat /sys/module/$module/sections/.data > /debug/$FAILTYPE/require-end
+cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start
+cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end
 
-echo N > /debug/$FAILTYPE/task-filter
-echo 10 > /debug/$FAILTYPE/probability
-echo 100 > /debug/$FAILTYPE/interval
-echo -1 > /debug/$FAILTYPE/times
-echo 0 > /debug/$FAILTYPE/space
-echo 2 > /debug/$FAILTYPE/verbose
-echo 1 > /debug/$FAILTYPE/ignore-gfp-wait
-echo 1 > /debug/$FAILTYPE/ignore-gfp-highmem
-echo 10 > /debug/$FAILTYPE/stacktrace-depth
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
+echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth
 
-trap "echo 0 > /debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
+trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
 
 echo "Injecting errors into the module $module... (interrupt to stop)"
 sleep 1000000
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 1e7a769a10f..053037a1fe6 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -507,9 +507,9 @@ http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
 Appendix A: The kprobes debugfs interface
 
 With recent kernels (> 2.6.20) the list of registered kprobes is visible
-under the /debug/kprobes/ directory (assuming debugfs is mounted at /debug).
+under the /sys/kernel/debug/kprobes/ directory (assuming debugfs is mounted at //sys/kernel/debug).
 
-/debug/kprobes/list: Lists all registered probes on the system
+/sys/kernel/debug/kprobes/list: Lists all registered probes on the system
 
 c015d71a  k  vfs_read+0x0
 c011a316  j  do_fork+0x0
@@ -525,7 +525,7 @@ virtual addresses that correspond to modules that've been unloaded),
 such probes are marked with [GONE]. If the probe is temporarily disabled,
 such probes are marked with [DISABLED].
 
-/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
+/sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
 
 Provides a knob to globally and forcibly turn registered kprobes ON or OFF.
 By default, all kprobes are enabled. By echoing "0" to this file, all
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 7bd27f0e288..a39b3c749de 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -7,7 +7,6 @@ Copyright 2008 Red Hat Inc.
                (dual licensed under the GPL v2)
 Reviewers:   Elias Oltmanns, Randy Dunlap, Andrew Morton,
 	     John Kacur, and David Teigland.
-
 Written for: 2.6.28-rc2
 
 Introduction
@@ -33,13 +32,26 @@ The File System
 Ftrace uses the debugfs file system to hold the control files as
 well as the files to display output.
 
-To mount the debugfs system:
+When debugfs is configured into the kernel (which selecting any ftrace
+option will do) the directory /sys/kernel/debug will be created. To mount
+this directory, you can add to your /etc/fstab file:
+
+ debugfs       /sys/kernel/debug          debugfs defaults        0       0
+
+Or you can mount it at run time with:
+
+ mount -t debugfs nodev /sys/kernel/debug
 
-  # mkdir /debug
-  # mount -t debugfs nodev /debug
+For quicker access to that directory you may want to make a soft link to
+it:
 
-( Note: it is more common to mount at /sys/kernel/debug, but for
-  simplicity this document will use /debug)
+ ln -s /sys/kernel/debug /debug
+
+Any selected ftrace option will also create a directory called tracing
+within the debugfs. The rest of the document will assume that you are in
+the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate
+on the files within that directory and not distract from the content with
+the extended "/sys/kernel/debug/tracing" path name.
 
 That's it! (assuming that you have ftrace configured into your kernel)
 
@@ -389,18 +401,18 @@ trace_options
 The trace_options file is used to control what gets printed in
 the trace output. To see what is available, simply cat the file:
 
-  cat /debug/tracing/trace_options
+  cat trace_options
   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
   noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
 
 To disable one of the options, echo in the option prepended with
 "no".
 
-  echo noprint-parent > /debug/tracing/trace_options
+  echo noprint-parent > trace_options
 
 To enable an option, leave off the "no".
 
-  echo sym-offset > /debug/tracing/trace_options
+  echo sym-offset > trace_options
 
 Here are the available options:
 
@@ -476,11 +488,11 @@ sched_switch
 This tracer simply records schedule switches. Here is an example
 of how to use it.
 
- # echo sched_switch > /debug/tracing/current_tracer
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo sched_switch > current_tracer
+ # echo 1 > tracing_enabled
  # sleep 1
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/trace
+ # echo 0 > tracing_enabled
+ # cat trace
 
 # tracer: sched_switch
 #
@@ -583,13 +595,13 @@ new trace is saved.
 To reset the maximum, echo 0 into tracing_max_latency. Here is
 an example:
 
- # echo irqsoff > /debug/tracing/current_tracer
- # echo 0 > /debug/tracing/tracing_max_latency
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo irqsoff > current_tracer
+ # echo 0 > tracing_max_latency
+ # echo 1 > tracing_enabled
  # ls -ltr
  [...]
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/latency_trace
+ # echo 0 > tracing_enabled
+ # cat latency_trace
 # tracer: irqsoff
 #
 irqsoff latency trace v1.1.5 on 2.6.26
@@ -690,13 +702,13 @@ Like the irqsoff tracer, it records the maximum latency for
 which preemption was disabled. The control of preemptoff tracer
 is much like the irqsoff tracer.
 
- # echo preemptoff > /debug/tracing/current_tracer
- # echo 0 > /debug/tracing/tracing_max_latency
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo preemptoff > current_tracer
+ # echo 0 > tracing_max_latency
+ # echo 1 > tracing_enabled
  # ls -ltr
  [...]
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/latency_trace
+ # echo 0 > tracing_enabled
+ # cat latency_trace
 # tracer: preemptoff
 #
 preemptoff latency trace v1.1.5 on 2.6.26-rc8
@@ -837,13 +849,13 @@ tracer.
 Again, using this trace is much like the irqsoff and preemptoff
 tracers.
 
- # echo preemptirqsoff > /debug/tracing/current_tracer
- # echo 0 > /debug/tracing/tracing_max_latency
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo preemptirqsoff > current_tracer
+ # echo 0 > tracing_max_latency
+ # echo 1 > tracing_enabled
  # ls -ltr
  [...]
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/latency_trace
+ # echo 0 > tracing_enabled
+ # cat latency_trace
 # tracer: preemptirqsoff
 #
 preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
@@ -999,12 +1011,12 @@ slightly differently than we did with the previous tracers.
 Instead of performing an 'ls', we will run 'sleep 1' under
 'chrt' which changes the priority of the task.
 
- # echo wakeup > /debug/tracing/current_tracer
- # echo 0 > /debug/tracing/tracing_max_latency
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo wakeup > current_tracer
+ # echo 0 > tracing_max_latency
+ # echo 1 > tracing_enabled
  # chrt -f 5 sleep 1
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/latency_trace
+ # echo 0 > tracing_enabled
+ # cat latency_trace
 # tracer: wakeup
 #
 wakeup latency trace v1.1.5 on 2.6.26-rc8
@@ -1114,11 +1126,11 @@ can be done from the debug file system. Make sure the
 ftrace_enabled is set; otherwise this tracer is a nop.
 
  # sysctl kernel.ftrace_enabled=1
- # echo function > /debug/tracing/current_tracer
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo function > current_tracer
+ # echo 1 > tracing_enabled
  # usleep 1
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/trace
+ # echo 0 > tracing_enabled
+ # cat trace
 # tracer: function
 #
 #           TASK-PID   CPU#    TIMESTAMP  FUNCTION
@@ -1155,7 +1167,7 @@ int trace_fd;
 [...]
 int main(int argc, char *argv[]) {
 	[...]
-	trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY);
+	trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY);
 	[...]
 	if (condition_hit()) {
 		write(trace_fd, "0", 1);
@@ -1163,26 +1175,20 @@ int main(int argc, char *argv[]) {
 	[...]
 }
 
-Note: Here we hard coded the path name. The debugfs mount is not
-guaranteed to be at /debug (and is more commonly at
-/sys/kernel/debug). For simple one time traces, the above is
-sufficent. For anything else, a search through /proc/mounts may
-be needed to find where the debugfs file-system is mounted.
-
 
 Single thread tracing
 ---------------------
 
-By writing into /debug/tracing/set_ftrace_pid you can trace a
+By writing into set_ftrace_pid you can trace a
 single thread. For example:
 
-# cat /debug/tracing/set_ftrace_pid
+# cat set_ftrace_pid
 no pid
-# echo 3111 > /debug/tracing/set_ftrace_pid
-# cat /debug/tracing/set_ftrace_pid
+# echo 3111 > set_ftrace_pid
+# cat set_ftrace_pid
 3111
-# echo function > /debug/tracing/current_tracer
-# cat /debug/tracing/trace | head
+# echo function > current_tracer
+# cat trace | head
  # tracer: function
  #
  #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
@@ -1193,8 +1199,8 @@ no pid
      yum-updatesd-3111  [003]  1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
      yum-updatesd-3111  [003]  1637.254685: fget_light <-do_sys_poll
      yum-updatesd-3111  [003]  1637.254686: pipe_poll <-do_sys_poll
-# echo -1 > /debug/tracing/set_ftrace_pid
-# cat /debug/tracing/trace |head
+# echo -1 > set_ftrace_pid
+# cat trace |head
  # tracer: function
  #
  #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
@@ -1216,6 +1222,51 @@ something like this simple program:
 #include <fcntl.h>
 #include <unistd.h>
 
+#define _STR(x) #x
+#define STR(x) _STR(x)
+#define MAX_PATH 256
+
+const char *find_debugfs(void)
+{
+       static char debugfs[MAX_PATH+1];
+       static int debugfs_found;
+       char type[100];
+       FILE *fp;
+
+       if (debugfs_found)
+               return debugfs;
+
+       if ((fp = fopen("/proc/mounts","r")) == NULL) {
+               perror("/proc/mounts");
+               return NULL;
+       }
+
+       while (fscanf(fp, "%*s %"
+                     STR(MAX_PATH)
+                     "s %99s %*s %*d %*d\n",
+                     debugfs, type) == 2) {
+               if (strcmp(type, "debugfs") == 0)
+                       break;
+       }
+       fclose(fp);
+
+       if (strcmp(type, "debugfs") != 0) {
+               fprintf(stderr, "debugfs not mounted");
+               return NULL;
+       }
+
+       debugfs_found = 1;
+
+       return debugfs;
+}
+
+const char *tracing_file(const char *file_name)
+{
+       static char trace_file[MAX_PATH+1];
+       snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name);
+       return trace_file;
+}
+
 int main (int argc, char **argv)
 {
         if (argc < 1)
@@ -1226,12 +1277,12 @@ int main (int argc, char **argv)
                 char line[64];
                 int s;
 
-                ffd = open("/debug/tracing/current_tracer", O_WRONLY);
+                ffd = open(tracing_file("current_tracer"), O_WRONLY);
                 if (ffd < 0)
                         exit(-1);
                 write(ffd, "nop", 3);
 
-                fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY);
+                fd = open(tracing_file("set_ftrace_pid"), O_WRONLY);
                 s = sprintf(line, "%d\n", getpid());
                 write(fd, line, s);
 
@@ -1383,22 +1434,22 @@ want, depending on your needs.
   tracing_cpu_mask file) or you might sometimes see unordered
   function calls while cpu tracing switch.
 
-	hide: echo nofuncgraph-cpu > /debug/tracing/trace_options
-	show: echo funcgraph-cpu > /debug/tracing/trace_options
+	hide: echo nofuncgraph-cpu > trace_options
+	show: echo funcgraph-cpu > trace_options
 
 - The duration (function's time of execution) is displayed on
   the closing bracket line of a function or on the same line
   than the current function in case of a leaf one. It is default
   enabled.
 
-	hide: echo nofuncgraph-duration > /debug/tracing/trace_options
-	show: echo funcgraph-duration > /debug/tracing/trace_options
+	hide: echo nofuncgraph-duration > trace_options
+	show: echo funcgraph-duration > trace_options
 
 - The overhead field precedes the duration field in case of
   reached duration thresholds.
 
-	hide: echo nofuncgraph-overhead > /debug/tracing/trace_options
-	show: echo funcgraph-overhead > /debug/tracing/trace_options
+	hide: echo nofuncgraph-overhead > trace_options
+	show: echo funcgraph-overhead > trace_options
 	depends on: funcgraph-duration
 
   ie:
@@ -1427,8 +1478,8 @@ want, depending on your needs.
 - The task/pid field displays the thread cmdline and pid which
   executed the function. It is default disabled.
 
-	hide: echo nofuncgraph-proc > /debug/tracing/trace_options
-	show: echo funcgraph-proc > /debug/tracing/trace_options
+	hide: echo nofuncgraph-proc > trace_options
+	show: echo funcgraph-proc > trace_options
 
   ie:
 
@@ -1451,8 +1502,8 @@ want, depending on your needs.
   system clock since it started. A snapshot of this time is
   given on each entry/exit of functions
 
-	hide: echo nofuncgraph-abstime > /debug/tracing/trace_options
-	show: echo funcgraph-abstime > /debug/tracing/trace_options
+	hide: echo nofuncgraph-abstime > trace_options
+	show: echo funcgraph-abstime > trace_options
 
   ie:
 
@@ -1549,7 +1600,7 @@ listed in:
 
    available_filter_functions
 
- # cat /debug/tracing/available_filter_functions
+ # cat available_filter_functions
 put_prev_task_idle
 kmem_cache_create
 pick_next_task_rt
@@ -1561,12 +1612,12 @@ mutex_lock
 If I am only interested in sys_nanosleep and hrtimer_interrupt:
 
  # echo sys_nanosleep hrtimer_interrupt \
-		> /debug/tracing/set_ftrace_filter
- # echo ftrace > /debug/tracing/current_tracer
- # echo 1 > /debug/tracing/tracing_enabled
+		> set_ftrace_filter
+ # echo ftrace > current_tracer
+ # echo 1 > tracing_enabled
  # usleep 1
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/trace
+ # echo 0 > tracing_enabled
+ # cat trace
 # tracer: ftrace
 #
 #           TASK-PID   CPU#    TIMESTAMP  FUNCTION
@@ -1577,7 +1628,7 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt:
 
 To see which functions are being traced, you can cat the file:
 
- # cat /debug/tracing/set_ftrace_filter
+ # cat set_ftrace_filter
 hrtimer_interrupt
 sys_nanosleep
 
@@ -1597,7 +1648,7 @@ Note: It is better to use quotes to enclose the wild cards,
       otherwise the shell may expand the parameters into names
       of files in the local directory.
 
- # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
+ # echo 'hrtimer_*' > set_ftrace_filter
 
 Produces:
 
@@ -1618,7 +1669,7 @@ Produces:
 
 Notice that we lost the sys_nanosleep.
 
- # cat /debug/tracing/set_ftrace_filter
+ # cat set_ftrace_filter
 hrtimer_run_queues
 hrtimer_run_pending
 hrtimer_init
@@ -1644,17 +1695,17 @@ To append to the filters, use '>>'
 To clear out a filter so that all functions will be recorded
 again:
 
- # echo > /debug/tracing/set_ftrace_filter
- # cat /debug/tracing/set_ftrace_filter
+ # echo > set_ftrace_filter
+ # cat set_ftrace_filter
  #
 
 Again, now we want to append.
 
- # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
- # cat /debug/tracing/set_ftrace_filter
+ # echo sys_nanosleep > set_ftrace_filter
+ # cat set_ftrace_filter
 sys_nanosleep
- # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
- # cat /debug/tracing/set_ftrace_filter
+ # echo 'hrtimer_*' >> set_ftrace_filter
+ # cat set_ftrace_filter
 hrtimer_run_queues
 hrtimer_run_pending
 hrtimer_init
@@ -1677,7 +1728,7 @@ hrtimer_init_sleeper
 The set_ftrace_notrace prevents those functions from being
 traced.
 
- # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace
+ # echo '*preempt*' '*lock*' > set_ftrace_notrace
 
 Produces:
 
@@ -1767,13 +1818,13 @@ the effect on the tracing is different. Every read from
 trace_pipe is consumed. This means that subsequent reads will be
 different. The trace is live.
 
- # echo function > /debug/tracing/current_tracer
- # cat /debug/tracing/trace_pipe > /tmp/trace.out &
+ # echo function > current_tracer
+ # cat trace_pipe > /tmp/trace.out &
 [1] 4153
- # echo 1 > /debug/tracing/tracing_enabled
+ # echo 1 > tracing_enabled
  # usleep 1
- # echo 0 > /debug/tracing/tracing_enabled
- # cat /debug/tracing/trace
+ # echo 0 > tracing_enabled
+ # cat trace
 # tracer: function
 #
 #           TASK-PID   CPU#    TIMESTAMP  FUNCTION
@@ -1809,7 +1860,7 @@ number listed is the number of entries that can be recorded per
 CPU. To know the full size, multiply the number of possible CPUS
 with the number of entries.
 
- # cat /debug/tracing/buffer_size_kb
+ # cat buffer_size_kb
 1408 (units kilobytes)
 
 Note, to modify this, you must have tracing completely disabled.
@@ -1817,18 +1868,18 @@ To do that, echo "nop" into the current_tracer. If the
 current_tracer is not set to "nop", an EINVAL error will be
 returned.
 
- # echo nop > /debug/tracing/current_tracer
- # echo 10000 > /debug/tracing/buffer_size_kb
- # cat /debug/tracing/buffer_size_kb
+ # echo nop > current_tracer
+ # echo 10000 > buffer_size_kb
+ # cat buffer_size_kb
 10000 (units kilobytes)
 
 The number of pages which will be allocated is limited to a
 percentage of available memory. Allocating too much will produce
 an error.
 
- # echo 1000000000000 > /debug/tracing/buffer_size_kb
+ # echo 1000000000000 > buffer_size_kb
 -bash: echo: write error: Cannot allocate memory
- # cat /debug/tracing/buffer_size_kb
+ # cat buffer_size_kb
 85
 
 -----------
diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.txt
index 5731c67abc5..162effbfbde 100644
--- a/Documentation/trace/mmiotrace.txt
+++ b/Documentation/trace/mmiotrace.txt
@@ -32,41 +32,41 @@ is no way to automatically detect if you are losing events due to CPUs racing.
 Usage Quick Reference
 ---------------------
 
-$ mount -t debugfs debugfs /debug
-$ echo mmiotrace > /debug/tracing/current_tracer
-$ cat /debug/tracing/trace_pipe > mydump.txt &
+$ mount -t debugfs debugfs /sys/kernel/debug
+$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer
+$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
-$ echo "X is up" > /debug/tracing/trace_marker
-$ echo nop > /debug/tracing/current_tracer
+$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker
+$ echo nop > /sys/kernel/debug/tracing/current_tracer
 Check for lost events.
 
 
 Usage
 -----
 
-Make sure debugfs is mounted to /debug. If not, (requires root privileges)
-$ mount -t debugfs debugfs /debug
+Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges)
+$ mount -t debugfs debugfs /sys/kernel/debug
 
 Check that the driver you are about to trace is not loaded.
 
 Activate mmiotrace (requires root privileges):
-$ echo mmiotrace > /debug/tracing/current_tracer
+$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer
 
 Start storing the trace:
-$ cat /debug/tracing/trace_pipe > mydump.txt &
+$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt &
 The 'cat' process should stay running (sleeping) in the background.
 
 Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
 accesses to areas that are ioremapped while mmiotrace is active.
 
 During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/trace_marker
+$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker
 This makes it easier to see which part of the (huge) trace corresponds to
 which action. It is recommended to place descriptive markers about what you
 do.
 
 Shut down mmiotrace (requires root privileges):
-$ echo nop > /debug/tracing/current_tracer
+$ echo nop > /sys/kernel/debug/tracing/current_tracer
 The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
 pressing ctrl+c.
 
@@ -78,10 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If
 events were lost, the trace is incomplete. You should enlarge the buffers and
 try again. Buffers are enlarged by first seeing how large the current buffers
 are:
-$ cat /debug/tracing/buffer_size_kb
+$ cat /sys/kernel/debug/tracing/buffer_size_kb
 gives you a number. Approximately double this number and write it back, for
 instance:
-$ echo 128000 > /debug/tracing/buffer_size_kb
+$ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb
 Then start again from the top.
 
 If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 37e0f81cada..83650e00632 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -430,7 +430,7 @@ static void pkt_sysfs_cleanup(void)
 /********************************************************************
   entries in debugfs
 
-  /debugfs/pktcdvd[0-7]/
+  /sys/kernel/debug/pktcdvd[0-7]/
 			info
 
  *******************************************************************/
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index c77c6c6d9d2..6ce0e2667a8 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -105,7 +105,7 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
 		ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
 					  root, tmp, &drm_debugfs_fops);
 		if (!ent) {
-			DRM_ERROR("Cannot create /debugfs/dri/%s/%s\n",
+			DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n",
 				  name, files[i].name);
 			drm_free(tmp, sizeof(struct drm_info_node),
 				 _DRM_DRIVER);
@@ -133,9 +133,9 @@ EXPORT_SYMBOL(drm_debugfs_create_files);
  * \param minor device minor number
  * \param root DRI debugfs dir entry.
  *
- * Create the DRI debugfs root entry "/debugfs/dri", the device debugfs root entry
- * "/debugfs/dri/%minor%/", and each entry in debugfs_list as
- * "/debugfs/dri/%minor%/%name%".
+ * Create the DRI debugfs root entry "/sys/kernel/debug/dri", the device debugfs root entry
+ * "/sys/kernel/debug/dri/%minor%/", and each entry in debugfs_list as
+ * "/sys/kernel/debug/dri/%minor%/%name%".
  */
 int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 		     struct dentry *root)
@@ -148,7 +148,7 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 	sprintf(name, "%d", minor_id);
 	minor->debugfs_root = debugfs_create_dir(name, root);
 	if (!minor->debugfs_root) {
-		DRM_ERROR("Cannot create /debugfs/dri/%s\n", name);
+		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s\n", name);
 		return -1;
 	}
 
@@ -165,7 +165,7 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 		ret = dev->driver->debugfs_init(minor);
 		if (ret) {
 			DRM_ERROR("DRM: Driver failed to initialize "
-				  "/debugfs/dri.\n");
+				  "/sys/kernel/debug/dri.\n");
 			return ret;
 		}
 	}
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 019b7c57823..1bf7efd8d33 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -339,7 +339,7 @@ static int __init drm_core_init(void)
 
 	drm_debugfs_root = debugfs_create_dir("dri", NULL);
 	if (!drm_debugfs_root) {
-		DRM_ERROR("Cannot create /debugfs/dri\n");
+		DRM_ERROR("Cannot create /sys/kernel/debug/dri\n");
 		ret = -1;
 		goto err_p3;
 	}
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 89050684fe0..387a8de1bc7 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -343,7 +343,7 @@ static int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int t
 #if defined(CONFIG_DEBUG_FS)
 	ret = drm_debugfs_init(new_minor, minor_id, drm_debugfs_root);
 	if (ret) {
-		DRM_ERROR("DRM: Failed to initialize /debugfs/dri.\n");
+		DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
 		goto err_g2;
 	}
 #endif
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 01f282cd098..3b6383168c6 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2206,7 +2206,7 @@ config SKGE_DEBUG
        depends on SKGE && DEBUG_FS
        help
 	 This option adds the ability to dump driver state for debugging.
-	 The file debugfs/skge/ethX displays the state of the internal
+	 The file /sys/kernel/debug/skge/ethX displays the state of the internal
 	 transmit and receive rings.
 
 	 If unsure, say N.
@@ -2232,7 +2232,7 @@ config SKY2_DEBUG
        depends on SKY2 && DEBUG_FS
        help
 	 This option adds the ability to dump driver state for debugging.
-	 The file debugfs/sky2/ethX displays the state of the internal
+	 The file /sys/kernel/debug/sky2/ethX displays the state of the internal
 	 transmit and receive rings.
 
 	 If unsure, say N.
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 1fe5da4cf0a..60330f313f2 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -432,7 +432,7 @@ struct i2400m {
 	unsigned ready:1;		/* all probing steps done */
 	unsigned rx_reorder:1;		/* RX reorder is enabled */
 	u8 trace_msg_from_user;		/* echo rx msgs to 'trace' pipe */
-					/* typed u8 so debugfs/u8 can tweak */
+					/* typed u8 so /sys/kernel/debug/u8 can tweak */
 	enum i2400m_system_state state;
 	wait_queue_head_t state_wq;	/* Woken up when on state updates */
 
diff --git a/drivers/net/wireless/ath/ath5k/Kconfig b/drivers/net/wireless/ath/ath5k/Kconfig
index 509b6f94f73..daf0c83527d 100644
--- a/drivers/net/wireless/ath/ath5k/Kconfig
+++ b/drivers/net/wireless/ath/ath5k/Kconfig
@@ -28,11 +28,10 @@ config ATH5K_DEBUG
 	  Say Y, if and you will get debug options for ath5k.
 	  To use this, you need to mount debugfs:
 
-	  mkdir /debug/
-	  mount -t debugfs debug /debug/
+	  mount -t debugfs debug /sys/kernel/debug
 
 	  You will get access to files under:
-	  /debug/ath5k/phy0/
+	  /sys/kernel/debug/ath5k/phy0/
 
 	  To enable debug, pass the debug level to the debug module
 	  parameter. For example:
diff --git a/drivers/net/wireless/libertas/README b/drivers/net/wireless/libertas/README
index d860fc37575..ab6a2d518af 100644
--- a/drivers/net/wireless/libertas/README
+++ b/drivers/net/wireless/libertas/README
@@ -72,7 +72,7 @@ rdrf
 	location that is to be read.  This parameter must be specified in
 	hexadecimal (its possible to preceed preceding the number with a "0x").
 
-	Path: /debugfs/libertas_wireless/ethX/registers/
+	Path: /sys/kernel/debug/libertas_wireless/ethX/registers/
 
 	Usage:
 		echo "0xa123" > rdmac ; cat rdmac
@@ -95,7 +95,7 @@ wrrf
 sleepparams
 	This command is used to set the sleepclock configurations
 
-	Path: /debugfs/libertas_wireless/ethX/
+	Path: /sys/kernel/debug/libertas_wireless/ethX/
 
 	Usage:
 		cat sleepparams: reads the current sleepclock configuration
@@ -115,7 +115,7 @@ subscribed_events
 	The subscribed_events directory contains the interface for the
 	subscribed events API.
 
-	Path: /debugfs/libertas_wireless/ethX/subscribed_events/
+	Path: /sys/kernel/debug/libertas_wireless/ethX/subscribed_events/
 
 	Each event is represented by a filename. Each filename consists of the
 	following three fields:
@@ -165,7 +165,7 @@ subscribed_events
 extscan
 	This command is used to do a specific scan.
 
-	Path: /debugfs/libertas_wireless/ethX/
+	Path: /sys/kernel/debug/libertas_wireless/ethX/
 
 	Usage: echo "SSID" > extscan
 
@@ -179,7 +179,7 @@ getscantable
 	Display the current contents of the driver scan table (ie. get the
 	scan results).
 
-	Path: /debugfs/libertas_wireless/ethX/
+	Path: /sys/kernel/debug/libertas_wireless/ethX/
 
 	Usage:
 		cat getscantable
@@ -188,7 +188,7 @@ setuserscan
 	Initiate a customized scan and retrieve the results
 
 
-	Path: /debugfs/libertas_wireless/ethX/
+	Path: /sys/kernel/debug/libertas_wireless/ethX/
 
     Usage:
        echo "[ARGS]" > setuserscan
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 2b02b1fb39a..8d0f0de76b6 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -53,8 +53,7 @@
  * debugfs interface
  *
  * To access this interface the user should:
- * # mkdir /debug
- * # mount -t debugfs none /debug
+ * # mount -t debugfs none /sys/kernel/debug
  *
  * The lpfc debugfs directory hierarchy is:
  * lpfc/lpfcX/vportY
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 883cd44ff76..99b7aada28d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -406,7 +406,7 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
  *
  * Use tracing_on/tracing_off when you want to quickly turn on or off
  * tracing. It simply enables or disables the recording of the trace events.
- * This also corresponds to the user space debugfs/tracing/tracing_on
+ * This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on
  * file, which gives a means for the kernel and userspace to interact.
  * Place a tracing_off() in the kernel where you want tracing to end.
  * From user space, examine the trace, and then echo 1 > tracing_on
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 14df7e635d4..b9dc4ca0246 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -198,7 +198,7 @@ static inline void tracepoint_synchronize_unregister(void)
  *	* This is how the trace record is structured and will
  *	* be saved into the ring buffer. These are the fields
  *	* that will be exposed to user-space in
- *	* /debug/tracing/events/<*>/format.
+ *	* /sys/kernel/debug/tracing/events/<*>/format.
  *	*
  *	* The declared 'local variable' is called '__entry'
  *	*
@@ -258,7 +258,7 @@ static inline void tracepoint_synchronize_unregister(void)
  * tracepoint callback (this is used by programmatic plugins and
  * can also by used by generic instrumentation like SystemTap), and
  * it is also used to expose a structured trace record in
- * /debug/tracing/events/.
+ * /sys/kernel/debug/tracing/events/.
  */
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4a13e5a01ce..61071fecc82 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -147,7 +147,7 @@ config IRQSOFF_TRACER
 	  disabled by default and can be runtime (re-)started
 	  via:
 
-	      echo 0 > /debugfs/tracing/tracing_max_latency
+	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
 	  (Note that kernel size and overhead increases with this option
 	  enabled. This option and the preempt-off timing option can be
@@ -168,7 +168,7 @@ config PREEMPT_TRACER
 	  disabled by default and can be runtime (re-)started
 	  via:
 
-	      echo 0 > /debugfs/tracing/tracing_max_latency
+	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
 
 	  (Note that kernel size and overhead increases with this option
 	  enabled. This option and the irqs-off timing option can be
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES
 	  This tracer profiles all the the likely and unlikely macros
 	  in the kernel. It will display the results in:
 
-	  /debugfs/tracing/profile_annotated_branch
+	  /sys/kernel/debug/tracing/profile_annotated_branch
 
 	  Note: this will add a significant overhead, only turn this
 	  on if you need to profile the system's use of these macros.
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES
 	  taken in the kernel is recorded whether it hit or miss.
 	  The results will be displayed in:
 
-	  /debugfs/tracing/profile_branch
+	  /sys/kernel/debug/tracing/profile_branch
 
 	  This option also enables the likely/unlikely profiler.
 
@@ -323,7 +323,7 @@ config STACK_TRACER
 	select KALLSYMS
 	help
 	  This special tracer records the maximum stack footprint of the
-	  kernel and displays it in debugfs/tracing/stack_trace.
+	  kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
 
 	  This tracer works by hooking into every function call that the
 	  kernel executes, and keeping a maximum stack depth value and
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8acd9b81a5d..c1878bfb2e1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock =
 /*
  * Copy the new maximum trace into the separate maximum-trace
  * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /debugfs/tracing/latency_trace)
+ * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
  */
 static void
 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = {
 
 static const char readme_msg[] =
 	"tracing mini-HOWTO:\n\n"
-	"# mkdir /debug\n"
-	"# mount -t debugfs nodev /debug\n\n"
-	"# cat /debug/tracing/available_tracers\n"
+	"# mount -t debugfs nodev /sys/kernel/debug\n\n"
+	"# cat /sys/kernel/debug/tracing/available_tracers\n"
 	"wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
-	"# cat /debug/tracing/current_tracer\n"
+	"# cat /sys/kernel/debug/tracing/current_tracer\n"
 	"nop\n"
-	"# echo sched_switch > /debug/tracing/current_tracer\n"
-	"# cat /debug/tracing/current_tracer\n"
+	"# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
+	"# cat /sys/kernel/debug/tracing/current_tracer\n"
 	"sched_switch\n"
-	"# cat /debug/tracing/trace_options\n"
+	"# cat /sys/kernel/debug/tracing/trace_options\n"
 	"noprint-parent nosym-offset nosym-addr noverbose\n"
-	"# echo print-parent > /debug/tracing/trace_options\n"
-	"# echo 1 > /debug/tracing/tracing_enabled\n"
-	"# cat /debug/tracing/trace > /tmp/trace.txt\n"
-	"# echo 0 > /debug/tracing/tracing_enabled\n"
+	"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
+	"# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
+	"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
+	"# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
 ;
 
 static ssize_t
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
index 902f9a99262..db40fa04cd5 100644
--- a/scripts/tracing/draw_functrace.py
+++ b/scripts/tracing/draw_functrace.py
@@ -12,10 +12,9 @@ calls. Only the functions's names and the the call time are provided.
 
 Usage:
 	Be sure that you have CONFIG_FUNCTION_TRACER
-	# mkdir /debugfs
-	# mount -t debug debug /debug
-	# echo function > /debug/tracing/current_tracer
-	$ cat /debug/tracing/trace_pipe > ~/raw_trace_func
+	# mount -t debugfs nodev /sys/kernel/debug
+	# echo function > /sys/kernel/debug/tracing/current_tracer
+	$ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
 	Wait some times but not too much, the script is a bit slow.
 	Break the pipe (Ctrl + Z)
 	$ scripts/draw_functrace.py < raw_trace_func > draw_functrace
-- 
cgit v1.2.3-70-g09d2


From cc835e321a9f3fa5e083436872e198095f4805b9 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 31 Mar 2009 12:28:31 -0700
Subject: USB: nop-usb-xceiv: behave when linked as a module

The NOP OTG transceiver driver needs to be usable from modules.
Make sure its symbols are always accessible at both compile and
link time, and make sure the device instance is allocated from
the heap so that device lifetime rules are obeyed.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/nop-usb-xceiv.c | 25 ++++++++++---------------
 include/linux/usb/otg.h         |  4 ++--
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/otg/nop-usb-xceiv.c b/drivers/usb/otg/nop-usb-xceiv.c
index c567168f89a..9ed5ea56867 100644
--- a/drivers/usb/otg/nop-usb-xceiv.c
+++ b/drivers/usb/otg/nop-usb-xceiv.c
@@ -22,8 +22,8 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  * Current status:
- * 	this is to add "nop" transceiver for all those phy which is
- * 	autonomous such as isp1504 etc.
+ *	This provides a "nop" transceiver for PHYs which are
+ *	autonomous such as isp1504, isp1707, etc.
  */
 
 #include <linux/module.h>
@@ -36,30 +36,25 @@ struct nop_usb_xceiv {
 	struct device		*dev;
 };
 
-static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
-
-static struct platform_device nop_xceiv_device = {
-	.name           = "nop_usb_xceiv",
-	.id             = -1,
-	.dev = {
-		.dma_mask               = &nop_xceiv_dmamask,
-		.coherent_dma_mask      = DMA_BIT_MASK(32),
-		.platform_data          = NULL,
-	},
-};
+static struct platform_device *pd;
 
 void usb_nop_xceiv_register(void)
 {
-	if (platform_device_register(&nop_xceiv_device) < 0) {
+	if (pd)
+		return;
+	pd = platform_device_register_simple("nop_usb_xceiv", -1, NULL, 0);
+	if (!pd) {
 		printk(KERN_ERR "Unable to register usb nop transceiver\n");
 		return;
 	}
 }
+EXPORT_SYMBOL(usb_nop_xceiv_register);
 
 void usb_nop_xceiv_unregister(void)
 {
-	platform_device_unregister(&nop_xceiv_device);
+	platform_device_unregister(pd);
 }
+EXPORT_SYMBOL(usb_nop_xceiv_unregister);
 
 static inline struct nop_usb_xceiv *xceiv_to_nop(struct otg_transceiver *x)
 {
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 1aaa826396a..2443c0e7a80 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -80,10 +80,10 @@ struct otg_transceiver {
 
 /* for board-specific init logic */
 extern int otg_set_transceiver(struct otg_transceiver *);
-#ifdef CONFIG_NOP_USB_XCEIV
+
+/* sometimes transceivers are accessed only through e.g. ULPI */
 extern void usb_nop_xceiv_register(void);
 extern void usb_nop_xceiv_unregister(void);
-#endif
 
 
 /* for usb host and peripheral controller drivers */
-- 
cgit v1.2.3-70-g09d2


From 00048b8bde5a6cbd9c3a76f272cc9ddb55705e37 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 24 Apr 2009 14:56:26 -0700
Subject: USB: add usb debugfs directory

Add a common usb directory in debugfs that the usb subsystem can use.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 22 ++++++++++++++++++++++
 include/linux/usb.h    |  3 +++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 7eee400d3e3..5f6873f5f26 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -34,6 +34,7 @@
 #include <linux/usb.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
+#include <linux/debugfs.h>
 
 #include <asm/io.h>
 #include <linux/scatterlist.h>
@@ -1001,6 +1002,22 @@ static struct notifier_block usb_bus_nb = {
 	.notifier_call = usb_bus_notify,
 };
 
+struct dentry *usb_debug_root;
+EXPORT_SYMBOL_GPL(usb_debug_root);
+
+static int usb_debugfs_init(void)
+{
+	usb_debug_root = debugfs_create_dir("usb", NULL);
+	if (!usb_debug_root)
+		return -ENOENT;
+	return 0;
+}
+
+static void usb_debugfs_cleanup(void)
+{
+	debugfs_remove(usb_debug_root);
+}
+
 /*
  * Init
  */
@@ -1012,6 +1029,10 @@ static int __init usb_init(void)
 		return 0;
 	}
 
+	retval = usb_debugfs_init();
+	if (retval)
+		goto out;
+
 	retval = ksuspend_usb_init();
 	if (retval)
 		goto out;
@@ -1083,6 +1104,7 @@ static void __exit usb_exit(void)
 	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 	bus_unregister(&usb_bus_type);
 	ksuspend_usb_cleanup();
+	usb_debugfs_cleanup();
 }
 
 subsys_initcall(usb_init);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3aa2cd1f8d0..29060dad81e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1558,6 +1558,9 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
 
+/* debugfs stuff */
+extern struct dentry *usb_debug_root;
+
 #endif  /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 74675a58507e769beee7d949dbed788af3c4139d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 30 Apr 2009 10:08:18 -0400
Subject: NLS: update handling of Unicode

This patch (as1239) updates the kernel's treatment of Unicode.  The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.

The old wchar_t 16-bit type is retained because it's still used in
lots of places.  This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.

Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/message.c |  10 +--
 fs/befs/linuxvfs.c         |  20 +++---
 fs/fat/dir.c               |  29 ++++----
 fs/fat/namei_vfat.c        |   4 +-
 fs/isofs/joliet.c          |  36 +---------
 fs/ncpfs/ncplib_kernel.c   |   8 ++-
 fs/nls/nls_base.c          | 164 +++++++++++++++++++++++++++++----------------
 fs/nls/nls_utf8.c          |  13 +++-
 include/linux/nls.h        |  35 ++++++++--
 9 files changed, 182 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index e98f928c08e..9bd26dec759 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -780,14 +780,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
 {
 	unsigned char *tbuf;
 	int err;
-	unsigned int u;
 
 	if (dev->state == USB_STATE_SUSPENDED)
 		return -EHOSTUNREACH;
 	if (size <= 0 || !buf || !index)
 		return -EINVAL;
 	buf[0] = 0;
-	tbuf = kmalloc(256 + 2, GFP_NOIO);
+	tbuf = kmalloc(256, GFP_NOIO);
 	if (!tbuf)
 		return -ENOMEM;
 
@@ -814,12 +813,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
 	if (err < 0)
 		goto errout;
 
-	for (u = 2; u < err; u += 2)
-		le16_to_cpus((u16 *)&tbuf[u]);
-	tbuf[u] = 0;
-	tbuf[u + 1] = 0;
 	size--;		/* leave room for trailing NULL char in output buffer */
-	err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size);
+	err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2,
+			UTF16_LITTLE_ENDIAN, buf, size);
 	buf[err] = 0;
 
 	if (tbuf[1] != USB_DT_STRING)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 9367b6297d8..89cd2deeb4a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
 {
 	struct nls_table *nls = BEFS_SB(sb)->nls;
 	int i, o;
-	wchar_t uni;
+	unicode_t uni;
 	int unilen, utflen;
 	char *result;
 	/* The utf8->nls conversion won't make the final nls string bigger
@@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in,
 	for (i = o = 0; i < in_len; i += utflen, o += unilen) {
 
 		/* convert from UTF-8 to Unicode */
-		utflen = utf8_mbtowc(&uni, &in[i], in_len - i);
-		if (utflen < 0) {
+		utflen = utf8_to_utf32(&in[i], in_len - i, &uni);
+		if (utflen < 0)
 			goto conv_err;
-		}
 
 		/* convert from Unicode to nls */
+		if (uni > MAX_WCHAR_T)
+			goto conv_err;
 		unilen = nls->uni2char(uni, &result[o], in_len - o);
-		if (unilen < 0) {
+		if (unilen < 0)
 			goto conv_err;
-		}
 	}
 	result[o] = '\0';
 	*out_len = o;
@@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in,
 
 		/* convert from nls to unicode */
 		unilen = nls->char2uni(&in[i], in_len - i, &uni);
-		if (unilen < 0) {
+		if (unilen < 0)
 			goto conv_err;
-		}
 
 		/* convert from unicode to UTF-8 */
-		utflen = utf8_wctomb(&result[o], uni, 3);
-		if (utflen <= 0) {
+		utflen = utf32_to_utf8(uni, &result[o], 3);
+		if (utflen <= 0)
 			goto conv_err;
-		}
 	}
 
 	result[o] = '\0';
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index f3500294eec..7c14c8cbbab 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -22,6 +22,19 @@
 #include <asm/uaccess.h>
 #include "fat.h"
 
+/*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE	((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS	((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE	(FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+
 static inline loff_t fat_make_i_pos(struct super_block *sb,
 				    struct buffer_head *bh,
 				    struct msdos_dir_entry *de)
@@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
 				unsigned char *buf, int size)
 {
 	if (sbi->options.utf8)
-		return utf8_wcstombs(buf, uni, size);
+		return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
+				UTF16_HOST_ENDIAN, buf, size);
 	else
 		return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
 				   sbi->nls_io);
@@ -324,19 +338,6 @@ parse_long:
 	return 0;
 }
 
-/*
- * Maximum buffer size of short name.
- * [(MSDOS_NAME + '.') * max one char + nul]
- * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
- */
-#define FAT_MAX_SHORT_SIZE	((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
-/*
- * Maximum buffer size of unicode chars from slots.
- * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
- */
-#define FAT_MAX_UNI_CHARS	((MSDOS_SLOTS - 1) * 13 + 1)
-#define FAT_MAX_UNI_SIZE	(FAT_MAX_UNI_CHARS * sizeof(wchar_t))
-
 /*
  * Return values: negative -> error, 0 -> not found, positive -> found,
  * value is the total amount of slots, including the shortname entry.
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b50ecbe97f8..f92ad999535 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 	if (utf8) {
 		int name_len = strlen(name);
 
-		*outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX);
+		*outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname);
 
 		/*
 		 * We stripped '.'s before and set len appropriately,
-		 * but utf8_mbstowcs doesn't care about len
+		 * but utf8s_to_utf16s doesn't care about len
 		 */
 		*outlen -= (name_len - len);
 
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 92c14b850e9..a048de81c09 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
 	return (op - ascii);
 }
 
-/* Convert big endian wide character string to utf8 */
-static int
-wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
-{
-	const __u8 *ip;
-	__u8 *op;
-	int size;
-	__u16 c;
-
-	op = s;
-	ip = pwcs;
-	while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
-		c = (*ip << 8) | ip[1];
-		if (c > 0x7f) {
-			size = utf8_wctomb(op, c, maxlen);
-			if (size == -1) {
-				/* Ignore character and move on */
-				maxlen--;
-			} else {
-				op += size;
-				maxlen -= size;
-			}
-		} else {
-			*op++ = (__u8) c;
-		}
-		ip += 2;
-		inlen--;
-	}
-	return (op - s);
-}
-
 int
 get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
 {
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
 	nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
 
 	if (utf8) {
-		len = wcsntombs_be(outname, de->name,
-				de->name_len[0] >> 1, PAGE_SIZE);
+		len = utf16s_to_utf8s((const wchar_t *) de->name,
+				de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
+				outname, PAGE_SIZE);
 	} else {
 		len = uni16_to_x8(outname, (__be16 *) de->name,
 				de->name_len[0] >> 1, nls);
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 97645f11211..0ec6237a597 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen,
 
 		if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
 			int k;
+			unicode_t u;
 
-			k = utf8_mbtowc(&ec, iname, iname_end - iname);
-			if (k < 0)
+			k = utf8_to_utf32(iname, iname_end - iname, &u);
+			if (k < 0 || u > MAX_WCHAR_T)
 				return -EINVAL;
 			iname += k;
+			ec = u;
 		} else {
 			if (*iname == NCP_ESC) {
 				int k;
@@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen,
 		if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) {
 			int k;
 
-			k = utf8_wctomb(iname, ec, iname_end - iname);
+			k = utf32_to_utf8(ec, iname, iname_end - iname);
 			if (k < 0) {
 				err = -ENAMETOOLONG;
 				goto quit;
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 750abf211e2..477d37d83b3 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/kmod.h>
 #include <linux/spinlock.h>
+#include <asm/byteorder.h>
 
 static struct nls_table default_table;
 static struct nls_table *tables = &default_table;
@@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] =
     {0,						       /* end of table    */}
 };
 
-int
-utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
+#define UNICODE_MAX	0x0010ffff
+#define PLANE_SIZE	0x00010000
+
+#define SURROGATE_MASK	0xfffff800
+#define SURROGATE_PAIR	0x0000d800
+#define SURROGATE_LOW	0x00000400
+#define SURROGATE_BITS	0x000003ff
+
+int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
 {
-	long l;
+	unsigned long l;
 	int c0, c, nc;
 	const struct utf8_table *t;
   
@@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
 		nc++;
 		if ((c0 & t->cmask) == t->cval) {
 			l &= t->lmask;
-			if (l < t->lval)
+			if (l < t->lval || l > UNICODE_MAX ||
+					(l & SURROGATE_MASK) == SURROGATE_PAIR)
 				return -1;
-			*p = l;
+			*pu = (unicode_t) l;
 			return nc;
 		}
-		if (n <= nc)
+		if (len <= nc)
 			return -1;
 		s++;
 		c = (*s ^ 0x80) & 0xFF;
@@ -72,76 +81,119 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
 	}
 	return -1;
 }
+EXPORT_SYMBOL(utf8_to_utf32);
 
-int
-utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
+int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
 {
-	__u16 *op;
-	const __u8 *ip;
-	int size;
-
-	op = pwcs;
-	ip = s;
-	while (*ip && n > 0) {
-		if (*ip & 0x80) {
-			size = utf8_mbtowc(op, ip, n);
-			if (size == -1) {
-				/* Ignore character and move on */
-				ip++;
-				n--;
-			} else {
-				op++;
-				ip += size;
-				n -= size;
-			}
-		} else {
-			*op++ = *ip++;
-			n--;
-		}
-	}
-	return (op - pwcs);
-}
-
-int
-utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
-{
-	long l;
+	unsigned long l;
 	int c, nc;
 	const struct utf8_table *t;
-  
+
 	if (!s)
 		return 0;
-  
-	l = wc;
+
+	l = u;
+	if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
+		return -1;
+
 	nc = 0;
 	for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
 		nc++;
 		if (l <= t->lmask) {
 			c = t->shift;
-			*s = t->cval | (l >> c);
+			*s = (u8) (t->cval | (l >> c));
 			while (c > 0) {
 				c -= 6;
 				s++;
-				*s = 0x80 | ((l >> c) & 0x3F);
+				*s = (u8) (0x80 | ((l >> c) & 0x3F));
 			}
 			return nc;
 		}
 	}
 	return -1;
 }
+EXPORT_SYMBOL(utf32_to_utf8);
 
-int
-utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
+int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
 {
-	const __u16 *ip;
-	__u8 *op;
+	u16 *op;
 	int size;
+	unicode_t u;
+
+	op = pwcs;
+	while (*s && len > 0) {
+		if (*s & 0x80) {
+			size = utf8_to_utf32(s, len, &u);
+			if (size < 0) {
+				/* Ignore character and move on */
+				size = 1;
+			} else if (u >= PLANE_SIZE) {
+				u -= PLANE_SIZE;
+				*op++ = (wchar_t) (SURROGATE_PAIR |
+						((u >> 10) & SURROGATE_BITS));
+				*op++ = (wchar_t) (SURROGATE_PAIR |
+						SURROGATE_LOW |
+						(u & SURROGATE_BITS));
+			} else {
+				*op++ = (wchar_t) u;
+			}
+			s += size;
+			len -= size;
+		} else {
+			*op++ = *s++;
+			len--;
+		}
+	}
+	return op - pwcs;
+}
+EXPORT_SYMBOL(utf8s_to_utf16s);
+
+static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
+{
+	switch (endian) {
+	default:
+		return c;
+	case UTF16_LITTLE_ENDIAN:
+		return __le16_to_cpu(c);
+	case UTF16_BIG_ENDIAN:
+		return __be16_to_cpu(c);
+	}
+}
+
+int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
+		u8 *s, int maxlen)
+{
+	u8 *op;
+	int size;
+	unsigned long u, v;
 
 	op = s;
-	ip = pwcs;
-	while (*ip && maxlen > 0) {
-		if (*ip > 0x7f) {
-			size = utf8_wctomb(op, *ip, maxlen);
+	while (len > 0 && maxlen > 0) {
+		u = get_utf16(*pwcs, endian);
+		if (!u)
+			break;
+		pwcs++;
+		len--;
+		if (u > 0x7f) {
+			if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
+				if (u & SURROGATE_LOW) {
+					/* Ignore character and move on */
+					continue;
+				}
+				if (len <= 0)
+					break;
+				v = get_utf16(*pwcs, endian);
+				if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
+						!(v & SURROGATE_LOW)) {
+					/* Ignore character and move on */
+					continue;
+				}
+				u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
+						+ (v & SURROGATE_BITS);
+				pwcs++;
+				len--;
+			}
+			size = utf32_to_utf8(u, op, maxlen);
 			if (size == -1) {
 				/* Ignore character and move on */
 			} else {
@@ -149,13 +201,13 @@ utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
 				maxlen -= size;
 			}
 		} else {
-			*op++ = (__u8) *ip;
+			*op++ = (u8) u;
 			maxlen--;
 		}
-		ip++;
 	}
-	return (op - s);
+	return op - s;
 }
+EXPORT_SYMBOL(utf16s_to_utf8s);
 
 int register_nls(struct nls_table * nls)
 {
@@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls);
 EXPORT_SYMBOL(unload_nls);
 EXPORT_SYMBOL(load_nls);
 EXPORT_SYMBOL(load_nls_default);
-EXPORT_SYMBOL(utf8_mbtowc);
-EXPORT_SYMBOL(utf8_mbstowcs);
-EXPORT_SYMBOL(utf8_wctomb);
-EXPORT_SYMBOL(utf8_wcstombs);
 
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c
index aa2c42fdd97..0d60a44acac 100644
--- a/fs/nls/nls_utf8.c
+++ b/fs/nls/nls_utf8.c
@@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
 {
 	int n;
 
-	if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) {
+	if (boundlen <= 0)
+		return -ENAMETOOLONG;
+
+	n = utf32_to_utf8(uni, out, boundlen);
+	if (n < 0) {
 		*out = '?';
 		return -EINVAL;
 	}
@@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
 static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
 {
 	int n;
+	unicode_t u;
 
-	if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) {
+	n = utf8_to_utf32(rawstring, boundlen, &u);
+	if (n < 0 || u > MAX_WCHAR_T) {
 		*uni = 0x003f;	/* ? */
-		n = -EINVAL;
+		return -EINVAL;
 	}
+	*uni = (wchar_t) u;
 	return n;
 }
 
diff --git a/include/linux/nls.h b/include/linux/nls.h
index 52b1a76c1b4..d47beef08df 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -3,8 +3,23 @@
 
 #include <linux/init.h>
 
-/* unicode character */
-typedef __u16 wchar_t;
+/* Unicode has changed over the years.  Unicode code points no longer
+ * fit into 16 bits; as of Unicode 5 valid code points range from 0
+ * to 0x10ffff (17 planes, where each plane holds 65536 code points).
+ *
+ * The original decision to represent Unicode characters as 16-bit
+ * wchar_t values is now outdated.  But plane 0 still includes the
+ * most commonly used characters, so we will retain it.  The newer
+ * 32-bit unicode_t type can be used when it is necessary to
+ * represent the full Unicode character set.
+ */
+
+/* Plane-0 Unicode character */
+typedef u16 wchar_t;
+#define MAX_WCHAR_T	0xffff
+
+/* Arbitrary Unicode character */
+typedef u32 unicode_t;
 
 struct nls_table {
 	const char *charset;
@@ -21,6 +36,13 @@ struct nls_table {
 /* this value hold the maximum octet of charset */
 #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */
 
+/* Byte order for UTF-16 strings */
+enum utf16_endian {
+	UTF16_HOST_ENDIAN,
+	UTF16_LITTLE_ENDIAN,
+	UTF16_BIG_ENDIAN
+};
+
 /* nls.c */
 extern int register_nls(struct nls_table *);
 extern int unregister_nls(struct nls_table *);
@@ -28,10 +50,11 @@ extern struct nls_table *load_nls(char *);
 extern void unload_nls(struct nls_table *);
 extern struct nls_table *load_nls_default(void);
 
-extern int utf8_mbtowc(wchar_t *, const __u8 *, int);
-extern int utf8_mbstowcs(wchar_t *, const __u8 *, int);
-extern int utf8_wctomb(__u8 *, wchar_t, int);
-extern int utf8_wcstombs(__u8 *, const wchar_t *, int);
+extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
+extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
+extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
+extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
+		enum utf16_endian endian, u8 *s, int maxlen);
 
 static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c)
 {
-- 
cgit v1.2.3-70-g09d2


From 820d7a253c5e59a786d5b608f6e8d0419fdc2f6e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 27 Apr 2009 13:17:21 -0700
Subject: USB: remove unused usb_host class

The usb_host class isn't used for anything anymore (it was used for
debug files, but they have moved to debugfs a few kernel releases ago),
so let's delete it before someone accidentally puts a file in it.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 27 ---------------------------
 drivers/usb/core/usb.c |  6 ------
 drivers/usb/core/usb.h |  2 --
 include/linux/usb.h    |  1 -
 4 files changed, 36 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 42b93da1085..c65d9560402 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -755,23 +755,6 @@ static struct attribute_group usb_bus_attr_group = {
 
 /*-------------------------------------------------------------------------*/
 
-static struct class *usb_host_class;
-
-int usb_host_init(void)
-{
-	int retval = 0;
-
-	usb_host_class = class_create(THIS_MODULE, "usb_host");
-	if (IS_ERR(usb_host_class))
-		retval = PTR_ERR(usb_host_class);
-	return retval;
-}
-
-void usb_host_cleanup(void)
-{
-	class_destroy(usb_host_class);
-}
-
 /**
  * usb_bus_init - shared initialization code
  * @bus: the bus structure being initialized
@@ -818,12 +801,6 @@ static int usb_register_bus(struct usb_bus *bus)
 	set_bit (busnum, busmap.busmap);
 	bus->busnum = busnum;
 
-	bus->dev = device_create(usb_host_class, bus->controller, MKDEV(0, 0),
-				 bus, "usb_host%d", busnum);
-	result = PTR_ERR(bus->dev);
-	if (IS_ERR(bus->dev))
-		goto error_create_class_dev;
-
 	/* Add it to the local list of buses */
 	list_add (&bus->bus_list, &usb_bus_list);
 	mutex_unlock(&usb_bus_list_lock);
@@ -834,8 +811,6 @@ static int usb_register_bus(struct usb_bus *bus)
 		  "number %d\n", bus->busnum);
 	return 0;
 
-error_create_class_dev:
-	clear_bit(busnum, busmap.busmap);
 error_find_busnum:
 	mutex_unlock(&usb_bus_list_lock);
 	return result;
@@ -865,8 +840,6 @@ static void usb_deregister_bus (struct usb_bus *bus)
 	usb_notify_remove_bus(bus);
 
 	clear_bit (bus->busnum, busmap.busmap);
-
-	device_unregister(bus->dev);
 }
 
 /**
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index c71590666ad..eb810bbe7bb 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -1055,9 +1055,6 @@ static int __init usb_init(void)
 	retval = bus_register_notifier(&usb_bus_type, &usb_bus_nb);
 	if (retval)
 		goto bus_notifier_failed;
-	retval = usb_host_init();
-	if (retval)
-		goto host_init_failed;
 	retval = usb_major_init();
 	if (retval)
 		goto major_init_failed;
@@ -1087,8 +1084,6 @@ usb_devio_init_failed:
 driver_register_failed:
 	usb_major_cleanup();
 major_init_failed:
-	usb_host_cleanup();
-host_init_failed:
 	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 bus_notifier_failed:
 	bus_unregister(&usb_bus_type);
@@ -1113,7 +1108,6 @@ static void __exit usb_exit(void)
 	usb_deregister(&usbfs_driver);
 	usb_devio_cleanup();
 	usb_hub_cleanup();
-	usb_host_cleanup();
 	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 	bus_unregister(&usb_bus_type);
 	ksuspend_usb_cleanup();
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 79d8a9ea559..dabf9255a10 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -41,8 +41,6 @@ extern int  usb_hub_init(void);
 extern void usb_hub_cleanup(void);
 extern int usb_major_init(void);
 extern void usb_major_cleanup(void);
-extern int usb_host_init(void);
-extern void usb_host_cleanup(void);
 
 #ifdef	CONFIG_PM
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 29060dad81e..606e0aa5bbe 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -336,7 +336,6 @@ struct usb_bus {
 #ifdef CONFIG_USB_DEVICEFS
 	struct dentry *usbfs_dentry;	/* usbfs dentry entry for the bus */
 #endif
-	struct device *dev;		/* device for this bus */
 
 #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
-- 
cgit v1.2.3-70-g09d2


From 00240c3839d843ccf07abd52806f421f7b87bbdc Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 27 Apr 2009 13:33:16 -0400
Subject: PCI: add power-state name strings

This patch (as1235) adds an array of PCI power-state names, together
with a simple inline accessor routine.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pci.c   | 5 +++++
 include/linux/pci.h | 8 ++++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1a91bf9687a..07bbb9b3b93 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -24,6 +24,11 @@
 #include <asm/setup.h>
 #include "pci.h"
 
+const char *pci_power_names[] = {
+	"error", "D0", "D1", "D2", "D3hot", "D3cold", "unknown",
+};
+EXPORT_SYMBOL_GPL(pci_power_names);
+
 unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
 
 #ifdef CONFIG_PCI_DOMAINS
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 72698d89e76..8e366bb0705 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -124,6 +124,14 @@ typedef int __bitwise pci_power_t;
 #define PCI_UNKNOWN	((pci_power_t __force) 5)
 #define PCI_POWER_ERROR	((pci_power_t __force) -1)
 
+/* Remember to update this when the list above changes! */
+extern const char *pci_power_names[];
+
+static inline const char *pci_power_name(pci_power_t state)
+{
+	return pci_power_names[1 + (int) state];
+}
+
 #define PCI_PM_D2_DELAY	200
 #define PCI_PM_D3_WAIT	10
 #define PCI_PM_BUS_WAIT	50
-- 
cgit v1.2.3-70-g09d2


From cac85a8b4e8e7c51bc0ce2980bba0e35cfec5c2e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 29 Apr 2009 21:04:19 -0700
Subject: USB: composite.h: mark private struct members as private:

Mark internal struct members as /* private: */ so that kernel-doc
won't produce warnings about missing descriptions for them.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/composite.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index acd7b0f06c8..4f6bb3d2160 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -124,6 +124,7 @@ struct usb_function {
 	void			(*suspend)(struct usb_function *);
 	void			(*resume)(struct usb_function *);
 
+	/* private: */
 	/* internals */
 	struct list_head		list;
 };
@@ -219,6 +220,7 @@ struct usb_configuration {
 
 	struct usb_composite_dev	*cdev;
 
+	/* private: */
 	/* internals */
 	struct list_head	list;
 	struct list_head	functions;
@@ -321,6 +323,7 @@ struct usb_composite_dev {
 
 	struct usb_configuration	*config;
 
+	/* private: */
 	/* internals */
 	struct usb_device_descriptor	desc;
 	struct list_head		configs;
-- 
cgit v1.2.3-70-g09d2


From bf92c1906e4f294a48fafc15755c65af636195e0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 29 Apr 2009 21:02:49 -0700
Subject: USB: usb.h: change private: kernel-doc for new format requirement

Use "/* private:" to mark struct members as private so that
scripts/kernel-doc will handle them correctly.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 606e0aa5bbe..fb1f2a32ae0 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1421,8 +1421,8 @@ struct usb_sg_request {
 	int			status;
 	size_t			bytes;
 
-	/*
-	 * members below are private: to usbcore,
+	/* private:
+	 * members below are private to usbcore,
 	 * and are not provided for driver access!
 	 */
 	spinlock_t		lock;
-- 
cgit v1.2.3-70-g09d2


From 715b1dc01fe44537e8fce9566e4bb48d6821d84b Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 11 May 2009 15:24:07 -0500
Subject: USB: usb_debug, usb_generic_serial: implement multi urb write

The usb_debug driver, when used as the console, will always fail to
insert the carriage return and new line sequence as well as randomly
drop console output.  This is a result of only having the single
write_urb and that the tty layer will have a lock that prevents the
processing of the back to back urb requests.

The solution is to allow more than one urb to be outstanding and have
a slightly deeper transmit queue.  The idea and some code is borrowed
from the ftdi_sio usb driver.

The generic usb serial driver was modified so as to allow the classic
method of 1 write urb, or a multi write urb scheme with N allowed
outstanding urbs where N is controlled by max_in_flight_urbs.  When
max_in_flight_urbs in a "struct usb_serial_driver" is non zero the
multi write urb scheme will be used.

The size of 4000 was selected for the usb_debug driver so that the
driver lowers possibility of losing the queued console messages during
the kernel startup.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c   | 121 ++++++++++++++++++++++++++++++++++++++---
 drivers/usb/serial/usb_debug.c |   2 +
 include/linux/usb/serial.h     |   4 ++
 3 files changed, 120 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index be82ea95672..c919686521f 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -190,6 +190,88 @@ void usb_serial_generic_close(struct usb_serial_port *port)
 	generic_cleanup(port);
 }
 
+static int usb_serial_multi_urb_write(struct tty_struct *tty,
+	struct usb_serial_port *port, const unsigned char *buf, int count)
+{
+	unsigned long flags;
+	struct urb *urb;
+	unsigned char *buffer;
+	int status;
+	int towrite;
+	int bwrite = 0;
+
+	dbg("%s - port %d", __func__, port->number);
+
+	if (count == 0)
+		dbg("%s - write request of 0 bytes", __func__);
+
+	while (count > 0) {
+		towrite = (count > port->bulk_out_size) ?
+			port->bulk_out_size : count;
+		spin_lock_irqsave(&port->lock, flags);
+		if (port->urbs_in_flight >
+		    port->serial->type->max_in_flight_urbs) {
+			spin_unlock_irqrestore(&port->lock, flags);
+			dbg("%s - write limit hit\n", __func__);
+			return bwrite;
+		}
+		port->tx_bytes_flight += towrite;
+		port->urbs_in_flight++;
+		spin_unlock_irqrestore(&port->lock, flags);
+
+		buffer = kmalloc(towrite, GFP_ATOMIC);
+		if (!buffer) {
+			dev_err(&port->dev,
+			"%s ran out of kernel memory for urb ...\n", __func__);
+			goto error_no_buffer;
+		}
+
+		urb = usb_alloc_urb(0, GFP_ATOMIC);
+		if (!urb) {
+			dev_err(&port->dev, "%s - no more free urbs\n",
+				__func__);
+			goto error_no_urb;
+		}
+
+		/* Copy data */
+		memcpy(buffer, buf + bwrite, towrite);
+		usb_serial_debug_data(debug, &port->dev, __func__,
+				      towrite, buffer);
+		/* fill the buffer and send it */
+		usb_fill_bulk_urb(urb, port->serial->dev,
+			usb_sndbulkpipe(port->serial->dev,
+					port->bulk_out_endpointAddress),
+			buffer, towrite,
+			usb_serial_generic_write_bulk_callback, port);
+
+		status = usb_submit_urb(urb, GFP_ATOMIC);
+		if (status) {
+			dev_err(&port->dev,
+				"%s - failed submitting write urb, error %d\n",
+				__func__, status);
+			goto error;
+		}
+
+		/* This urb is the responsibility of the host driver now */
+		usb_free_urb(urb);
+		dbg("%s write: %d", __func__, towrite);
+		count -= towrite;
+		bwrite += towrite;
+	}
+	return bwrite;
+
+error:
+	usb_free_urb(urb);
+error_no_urb:
+	kfree(buffer);
+error_no_buffer:
+	spin_lock_irqsave(&port->lock, flags);
+	port->urbs_in_flight--;
+	port->tx_bytes_flight -= towrite;
+	spin_unlock_irqrestore(&port->lock, flags);
+	return bwrite;
+}
+
 int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count)
 {
@@ -207,6 +289,11 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	/* only do something if we have a bulk out endpoint */
 	if (serial->num_bulk_out) {
 		unsigned long flags;
+
+		if (serial->type->max_in_flight_urbs)
+			return usb_serial_multi_urb_write(tty, port,
+							  buf, count);
+
 		spin_lock_irqsave(&port->lock, flags);
 		if (port->write_urb_busy) {
 			spin_unlock_irqrestore(&port->lock, flags);
@@ -257,15 +344,18 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial = port->serial;
+	unsigned long flags;
 	int room = 0;
 
 	dbg("%s - port %d", __func__, port->number);
-
-	/* FIXME: Locking */
-	if (serial->num_bulk_out) {
-		if (!(port->write_urb_busy))
+	spin_lock_irqsave(&port->lock, flags);
+	if (serial->type->max_in_flight_urbs) {
+		if (port->urbs_in_flight < serial->type->max_in_flight_urbs)
 			room = port->bulk_out_size;
+	} else if (serial->num_bulk_out && !(port->write_urb_busy)) {
+		room = port->bulk_out_size;
 	}
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
 	return room;
@@ -276,11 +366,16 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial = port->serial;
 	int chars = 0;
+	unsigned long flags;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* FIXME: Locking */
-	if (serial->num_bulk_out) {
+	if (serial->type->max_in_flight_urbs) {
+		spin_lock_irqsave(&port->lock, flags);
+		chars = port->tx_bytes_flight;
+		spin_unlock_irqrestore(&port->lock, flags);
+	} else if (serial->num_bulk_out) {
+		/* FIXME: Locking */
 		if (port->write_urb_busy)
 			chars = port->write_urb->transfer_buffer_length;
 	}
@@ -363,12 +458,24 @@ EXPORT_SYMBOL_GPL(usb_serial_generic_read_bulk_callback);
 
 void usb_serial_generic_write_bulk_callback(struct urb *urb)
 {
+	unsigned long flags;
 	struct usb_serial_port *port = urb->context;
 	int status = urb->status;
 
 	dbg("%s - port %d", __func__, port->number);
 
-	port->write_urb_busy = 0;
+	if (port->serial->type->max_in_flight_urbs) {
+		spin_lock_irqsave(&port->lock, flags);
+		--port->urbs_in_flight;
+		port->tx_bytes_flight -= urb->transfer_buffer_length;
+		if (port->urbs_in_flight < 0)
+			port->urbs_in_flight = 0;
+		spin_unlock_irqrestore(&port->lock, flags);
+	} else {
+		/* Handle the case for single urb mode */
+		port->write_urb_busy = 0;
+	}
+
 	if (status) {
 		dbg("%s - nonzero write bulk status received: %d",
 		    __func__, status);
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index 6c9cbb59552..a9427a8b867 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -15,6 +15,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
+#define URB_DEBUG_MAX_IN_FLIGHT_URBS	4000
 #define USB_DEBUG_MAX_PACKET_SIZE	8
 
 static struct usb_device_id id_table [] = {
@@ -46,6 +47,7 @@ static struct usb_serial_driver debug_device = {
 	.id_table =		id_table,
 	.num_ports =		1,
 	.open =			usb_debug_open,
+	.max_in_flight_urbs =	URB_DEBUG_MAX_IN_FLIGHT_URBS,
 };
 
 static int __init debug_init(void)
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 8cdfed738fe..e2938fd179e 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -91,6 +91,9 @@ struct usb_serial_port {
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
+	int			tx_bytes_flight;
+	int			urbs_in_flight;
+
 	wait_queue_head_t	write_wait;
 	struct work_struct	work;
 	char			throttled;
@@ -207,6 +210,7 @@ struct usb_serial_driver {
 	struct device_driver	driver;
 	struct usb_driver	*usb_driver;
 	struct usb_dynids	dynids;
+	int			max_in_flight_urbs;
 
 	int (*probe)(struct usb_serial *serial, const struct usb_device_id *id);
 	int (*attach)(struct usb_serial *serial);
-- 
cgit v1.2.3-70-g09d2


From 98fcb5f78165b8a3d93870ad7afd4d9ebbb8b43a Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 11 May 2009 15:24:09 -0500
Subject: USB: serial: usb_debug,usb_generic_serial: implement sysrq and serial
 break

The usb_debug driver was modified to implement serial break handling
by using a "magic" data packet comprised of the sequence:

       0x00 0xff 0x01 0xfe   0x00 0xfe 0x01 0xff

When the tty layer requests a serial break the usb_debug driver sends
the magic packet.  On the receiving side the magic packet is thrown
away or a sysrq is activated depending on what kernel .config options
have been set.

The generic serial driver was modified as well as the usb serial
headers to generically implement sysrq processing in the same way the
non usb uart based drivers implement the sysrq handling.  This will
allow other usb serial devices to implement sysrq handling as desired.

The new usb serial functions are named similarly and implemented
similarly to the uart functions as follows:

usb_serial_handle_break <-> uart_handle_break
usb_serial_handle_sysrq_char <-> uart_handle_sysrq_char

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c   | 56 +++++++++++++++++++++++++++++++++---------
 drivers/usb/serial/usb_debug.c | 39 +++++++++++++++++++++++++++++
 include/linux/usb/serial.h     |  8 ++++++
 3 files changed, 91 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index c919686521f..9fccc26235a 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -339,6 +339,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	/* no bulk out, so return 0 bytes written */
 	return 0;
 }
+EXPORT_SYMBOL_GPL(usb_serial_generic_write);
 
 int usb_serial_generic_write_room(struct tty_struct *tty)
 {
@@ -351,7 +352,9 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 	spin_lock_irqsave(&port->lock, flags);
 	if (serial->type->max_in_flight_urbs) {
 		if (port->urbs_in_flight < serial->type->max_in_flight_urbs)
-			room = port->bulk_out_size;
+			room = port->bulk_out_size *
+				(serial->type->max_in_flight_urbs -
+				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out && !(port->write_urb_busy)) {
 		room = port->bulk_out_size;
 	}
@@ -385,7 +388,8 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 }
 
 
-static void resubmit_read_urb(struct usb_serial_port *port, gfp_t mem_flags)
+void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
+			gfp_t mem_flags)
 {
 	struct urb *urb = port->read_urb;
 	struct usb_serial *serial = port->serial;
@@ -406,25 +410,28 @@ static void resubmit_read_urb(struct usb_serial_port *port, gfp_t mem_flags)
 			"%s - failed resubmitting read urb, error %d\n",
 							__func__, result);
 }
+EXPORT_SYMBOL_GPL(usb_serial_generic_resubmit_read_urb);
 
 /* Push data to tty layer and resubmit the bulk read URB */
 static void flush_and_resubmit_read_urb(struct usb_serial_port *port)
 {
 	struct urb *urb = port->read_urb;
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
-	int room;
+	char *ch = (char *)urb->transfer_buffer;
+	int i;
+
+	if (!tty)
+		goto done;
 
 	/* Push data to tty */
-	if (tty && urb->actual_length) {
-		room = tty_buffer_request_room(tty, urb->actual_length);
-		if (room) {
-			tty_insert_flip_string(tty, urb->transfer_buffer, room);
-			tty_flip_buffer_push(tty);
-		}
+	for (i = 0; i < urb->actual_length; i++, ch++) {
+		if (!usb_serial_handle_sysrq_char(port, *ch))
+			tty_insert_flip_char(tty, *ch, TTY_NORMAL);
 	}
+	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
-
-	resubmit_read_urb(port, GFP_ATOMIC);
+done:
+	usb_serial_generic_resubmit_read_urb(port, GFP_ATOMIC);
 }
 
 void usb_serial_generic_read_bulk_callback(struct urb *urb)
@@ -515,10 +522,35 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty)
 
 	if (was_throttled) {
 		/* Resume reading from device */
-		resubmit_read_urb(port, GFP_KERNEL);
+		usb_serial_generic_resubmit_read_urb(port, GFP_KERNEL);
 	}
 }
 
+int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
+{
+	if (port->sysrq) {
+		if (ch && time_before(jiffies, port->sysrq)) {
+			handle_sysrq(ch, tty_port_tty_get(&port->port));
+			port->sysrq = 0;
+			return 1;
+		}
+		port->sysrq = 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_serial_handle_sysrq_char);
+
+int usb_serial_handle_break(struct usb_serial_port *port)
+{
+	if (!port->sysrq) {
+		port->sysrq = jiffies + HZ*5;
+		return 1;
+	}
+	port->sysrq = 0;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_serial_handle_break);
+
 void usb_serial_generic_shutdown(struct usb_serial *serial)
 {
 	int i;
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index a9427a8b867..614800972dc 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -17,6 +17,17 @@
 
 #define URB_DEBUG_MAX_IN_FLIGHT_URBS	4000
 #define USB_DEBUG_MAX_PACKET_SIZE	8
+#define USB_DEBUG_BRK_SIZE		8
+static char USB_DEBUG_BRK[USB_DEBUG_BRK_SIZE] = {
+	0x00,
+	0xff,
+	0x01,
+	0xfe,
+	0x00,
+	0xfe,
+	0x01,
+	0xff,
+};
 
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(0x0525, 0x127a) },
@@ -39,6 +50,32 @@ static int usb_debug_open(struct tty_struct *tty, struct usb_serial_port *port,
 	return usb_serial_generic_open(tty, port, filp);
 }
 
+/* This HW really does not support a serial break, so one will be
+ * emulated when ever the break state is set to true.
+ */
+static void usb_debug_break_ctl(struct tty_struct *tty, int break_state)
+{
+	struct usb_serial_port *port = tty->driver_data;
+	if (!break_state)
+		return;
+	usb_serial_generic_write(tty, port, USB_DEBUG_BRK, USB_DEBUG_BRK_SIZE);
+}
+
+static void usb_debug_read_bulk_callback(struct urb *urb)
+{
+	struct usb_serial_port *port = urb->context;
+
+	if (urb->actual_length == USB_DEBUG_BRK_SIZE &&
+	    memcmp(urb->transfer_buffer, USB_DEBUG_BRK,
+		   USB_DEBUG_BRK_SIZE) == 0) {
+		usb_serial_handle_break(port);
+		usb_serial_generic_resubmit_read_urb(port, GFP_ATOMIC);
+		return;
+	}
+
+	usb_serial_generic_read_bulk_callback(urb);
+}
+
 static struct usb_serial_driver debug_device = {
 	.driver = {
 		.owner =	THIS_MODULE,
@@ -48,6 +85,8 @@ static struct usb_serial_driver debug_device = {
 	.num_ports =		1,
 	.open =			usb_debug_open,
 	.max_in_flight_urbs =	URB_DEBUG_MAX_IN_FLIGHT_URBS,
+	.break_ctl =		usb_debug_break_ctl,
+	.read_bulk_callback =	usb_debug_read_bulk_callback,
 };
 
 static int __init debug_init(void)
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index e2938fd179e..e29ebcf3287 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -15,6 +15,7 @@
 
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/sysrq.h>
 
 #define SERIAL_TTY_MAJOR	188	/* Nice legal number now */
 #define SERIAL_TTY_MINORS	254	/* loads of devices :) */
@@ -99,6 +100,7 @@ struct usb_serial_port {
 	char			throttled;
 	char			throttle_req;
 	char			console;
+	unsigned long		sysrq; /* sysrq timeout */
 	struct device		dev;
 };
 #define to_usb_serial_port(d) container_of(d, struct usb_serial_port, dev)
@@ -301,6 +303,12 @@ extern void usb_serial_generic_unthrottle(struct tty_struct *tty);
 extern void usb_serial_generic_shutdown(struct usb_serial *serial);
 extern int usb_serial_generic_register(int debug);
 extern void usb_serial_generic_deregister(void);
+extern void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
+						 gfp_t mem_flags);
+extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port,
+					unsigned int ch);
+extern int usb_serial_handle_break(struct usb_serial_port *port);
+
 
 extern int usb_serial_bus_register(struct usb_serial_driver *device);
 extern void usb_serial_bus_deregister(struct usb_serial_driver *device);
-- 
cgit v1.2.3-70-g09d2


From 5effabbe9e6e0089f7afdde35cb51e8c8b4cf6bc Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Date: Tue, 26 May 2009 18:24:34 +0900
Subject: USB: r8a66597-hcd: use platform_data instead of module_param

CPU/board specific parameters (PLL clock, vif etc...) can be set
by platform_data instead of module_param.

v2: remove irq_sense member in platform_data because it can OR in
    IRQF_TRIGGER_LOW or IRQF_TRIGGER_FALLING against IORESOURCE_IRQ in
    the struct resource.

Signed-off-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Reviewed-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/r8a66597-hcd.c | 60 ++++++++++-------------------------------
 drivers/usb/host/r8a66597.h     | 38 +++++++++++++++++++++++---
 include/linux/usb/r8a66597.h    | 44 ++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 50 deletions(-)
 create mode 100644 include/linux/usb/r8a66597.h

(limited to 'include')

diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 3e1216ad86b..56976cc0352 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -46,31 +46,10 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yoshihiro Shimoda");
 MODULE_ALIAS("platform:r8a66597_hcd");
 
-#define DRIVER_VERSION	"10 Apr 2008"
+#define DRIVER_VERSION	"2009-05-26"
 
 static const char hcd_name[] = "r8a66597_hcd";
 
-/* module parameters */
-#if !defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-static unsigned short clock = XTAL12;
-module_param(clock, ushort, 0644);
-MODULE_PARM_DESC(clock, "input clock: 48MHz=32768, 24MHz=16384, 12MHz=0 "
-		"(default=0)");
-#endif
-
-static unsigned short vif = LDRV;
-module_param(vif, ushort, 0644);
-MODULE_PARM_DESC(vif, "input VIF: 3.3V=32768, 1.5V=0(default=32768)");
-
-static unsigned short endian;
-module_param(endian, ushort, 0644);
-MODULE_PARM_DESC(endian, "data endian: big=256, little=0 (default=0)");
-
-static unsigned short irq_sense = 0xff;
-module_param(irq_sense, ushort, 0644);
-MODULE_PARM_DESC(irq_sense, "IRQ sense: low level=32, falling edge=0 "
-		"(default=32)");
-
 static void packet_write(struct r8a66597 *r8a66597, u16 pipenum);
 static int r8a66597_get_frame(struct usb_hcd *hcd);
 
@@ -136,7 +115,8 @@ static int r8a66597_clock_enable(struct r8a66597 *r8a66597)
 		}
 	} while ((tmp & USBE) != USBE);
 	r8a66597_bclr(r8a66597, USBE, SYSCFG0);
-	r8a66597_mdfy(r8a66597, clock, XTAL, SYSCFG0);
+	r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), XTAL,
+			SYSCFG0);
 
 	i = 0;
 	r8a66597_bset(r8a66597, XCKE, SYSCFG0);
@@ -203,6 +183,9 @@ static void r8a66597_disable_port(struct r8a66597 *r8a66597, int port)
 static int enable_controller(struct r8a66597 *r8a66597)
 {
 	int ret, port;
+	u16 vif = r8a66597->pdata->vif ? LDRV : 0;
+	u16 irq_sense = r8a66597->irq_sense_low ? INTL : 0;
+	u16 endian = r8a66597->pdata->endian ? BIGEND : 0;
 
 	ret = r8a66597_clock_enable(r8a66597);
 	if (ret < 0)
@@ -2418,6 +2401,12 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
+	if (pdev->dev.platform_data == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		ret = -ENODEV;
+		goto clean_up;
+	}
+
 	/* initialize hcd */
 	hcd = usb_create_hcd(&r8a66597_hc_driver, &pdev->dev, (char *)hcd_name);
 	if (!hcd) {
@@ -2428,6 +2417,8 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 	r8a66597 = hcd_to_r8a66597(hcd);
 	memset(r8a66597, 0, sizeof(struct r8a66597));
 	dev_set_drvdata(&pdev->dev, r8a66597);
+	r8a66597->pdata = pdev->dev.platform_data;
+	r8a66597->irq_sense_low = irq_trigger == IRQF_TRIGGER_LOW;
 
 #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
 	snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id);
@@ -2458,29 +2449,6 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 
 	hcd->rsrc_start = res->start;
 
-	/* irq_sense setting on cmdline takes precedence over resource
-	 * settings, so the introduction of irqflags in IRQ resourse
-	 * won't disturb existing setups */
-	switch (irq_sense) {
-		case INTL:
-			irq_trigger = IRQF_TRIGGER_LOW;
-			break;
-		case 0:
-			irq_trigger = IRQF_TRIGGER_FALLING;
-			break;
-		case 0xff:
-			if (irq_trigger)
-				irq_sense = (irq_trigger & IRQF_TRIGGER_LOW) ?
-					    INTL : 0;
-			else {
-				irq_sense = INTL;
-				irq_trigger = IRQF_TRIGGER_LOW;
-			}
-			break;
-		default:
-			dev_err(&pdev->dev, "Unknown irq_sense value.\n");
-	}
-
 	ret = usb_add_hcd(hcd, irq, IRQF_DISABLED | irq_trigger);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to add hcd\n");
diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h
index f49208f1bb7..d72680b433f 100644
--- a/drivers/usb/host/r8a66597.h
+++ b/drivers/usb/host/r8a66597.h
@@ -30,6 +30,8 @@
 #include <linux/clk.h>
 #endif
 
+#include <linux/usb/r8a66597.h>
+
 #define SYSCFG0		0x00
 #define SYSCFG1		0x02
 #define SYSSTS0		0x04
@@ -488,6 +490,7 @@ struct r8a66597 {
 #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
 	struct clk *clk;
 #endif
+	struct r8a66597_platdata	*pdata;
 	struct r8a66597_device		device0;
 	struct r8a66597_root_hub	root_hub[R8A66597_MAX_ROOT_HUB];
 	struct list_head		pipe_queue[R8A66597_MAX_NUM_PIPE];
@@ -506,6 +509,7 @@ struct r8a66597 {
 	unsigned long child_connect_map[4];
 
 	unsigned bus_suspended:1;
+	unsigned irq_sense_low:1;
 };
 
 static inline struct r8a66597 *hcd_to_r8a66597(struct usb_hcd *hcd)
@@ -660,10 +664,36 @@ static inline void r8a66597_port_power(struct r8a66597 *r8a66597, int port,
 {
 	unsigned long dvstctr_reg = get_dvstctr_reg(port);
 
-	if (power)
-		r8a66597_bset(r8a66597, VBOUT, dvstctr_reg);
-	else
-		r8a66597_bclr(r8a66597, VBOUT, dvstctr_reg);
+	if (r8a66597->pdata->port_power) {
+		r8a66597->pdata->port_power(port, power);
+	} else {
+		if (power)
+			r8a66597_bset(r8a66597, VBOUT, dvstctr_reg);
+		else
+			r8a66597_bclr(r8a66597, VBOUT, dvstctr_reg);
+	}
+}
+
+static inline u16 get_xtal_from_pdata(struct r8a66597_platdata *pdata)
+{
+	u16 clock = 0;
+
+	switch (pdata->xtal) {
+	case R8A66597_PLATDATA_XTAL_12MHZ:
+		clock = XTAL12;
+		break;
+	case R8A66597_PLATDATA_XTAL_24MHZ:
+		clock = XTAL24;
+		break;
+	case R8A66597_PLATDATA_XTAL_48MHZ:
+		clock = XTAL48;
+		break;
+	default:
+		printk(KERN_ERR "r8a66597: platdata clock is wrong.\n");
+		break;
+	}
+
+	return clock;
 }
 
 #define get_pipectr_addr(pipenum)	(PIPE1CTR + (pipenum - 1) * 2)
diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h
new file mode 100644
index 00000000000..e9f0384fa20
--- /dev/null
+++ b/include/linux/usb/r8a66597.h
@@ -0,0 +1,44 @@
+/*
+ * R8A66597 driver platform data
+ *
+ * Copyright (C) 2009  Renesas Solutions Corp.
+ *
+ * Author : Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef __LINUX_USB_R8A66597_H
+#define __LINUX_USB_R8A66597_H
+
+#define R8A66597_PLATDATA_XTAL_12MHZ	0x01
+#define R8A66597_PLATDATA_XTAL_24MHZ	0x02
+#define R8A66597_PLATDATA_XTAL_48MHZ	0x03
+
+struct r8a66597_platdata {
+	/* This ops can controll port power instead of DVSTCTR register. */
+	void (*port_power)(int port, int power);
+
+	/* (external controller only) set R8A66597_PLATDATA_XTAL_nnMHZ */
+	unsigned	xtal:2;
+
+	/* set one = 3.3V, set zero = 1.5V */
+	unsigned	vif:1;
+
+	/* set one = big endian, set zero = little endian */
+	unsigned	endian:1;
+};
+#endif
+
-- 
cgit v1.2.3-70-g09d2


From c47d7b09891abb4f8b222317c89c7469bed8db3a Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@kernel.org>
Date: Wed, 3 Jun 2009 09:17:57 -0400
Subject: USB: audio: add USB audio class definitions

Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/audio.h | 265 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 263 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 8cb025fef63..b5744bc218a 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -24,10 +24,75 @@
 #define USB_SUBCLASS_AUDIOCONTROL	0x01
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
+#define USB_SUBCLASS_VENDOR_SPEC	0xff
 
+/* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
+#define HEADER				0x01
+#define INPUT_TERMINAL			0x02
+#define OUTPUT_TERMINAL			0x03
+#define MIXER_UNIT			0x04
+#define SELECTOR_UNIT			0x05
+#define FEATURE_UNIT			0x06
+#define PROCESSING_UNIT			0x07
+#define EXTENSION_UNIT			0x08
+
+#define AS_GENERAL			0x01
+#define FORMAT_TYPE			0x02
+#define FORMAT_SPECIFIC			0x03
+
+#define EP_GENERAL			0x01
+
+#define MS_GENERAL			0x01
+#define MIDI_IN_JACK			0x02
+#define MIDI_OUT_JACK			0x03
+
+/* endpoint attributes */
+#define EP_ATTR_MASK			0x0c
+#define EP_ATTR_ASYNC			0x04
+#define EP_ATTR_ADAPTIVE		0x08
+#define EP_ATTR_SYNC			0x0c
+
+/* cs endpoint attributes */
+#define EP_CS_ATTR_SAMPLE_RATE		0x01
+#define EP_CS_ATTR_PITCH_CONTROL	0x02
+#define EP_CS_ATTR_FILL_MAX		0x80
+
+/* Audio Class specific Request Codes */
+#define USB_AUDIO_SET_INTF		0x21
+#define USB_AUDIO_SET_ENDPOINT		0x22
+#define USB_AUDIO_GET_INTF		0xa1
+#define USB_AUDIO_GET_ENDPOINT		0xa2
+
+#define SET_	0x00
+#define GET_	0x80
+
+#define _CUR	0x1
+#define _MIN	0x2
+#define _MAX	0x3
+#define _RES	0x4
+#define _MEM	0x5
+
+#define SET_CUR		(SET_ | _CUR)
+#define GET_CUR		(GET_ | _CUR)
+#define SET_MIN		(SET_ | _MIN)
+#define GET_MIN		(GET_ | _MIN)
+#define SET_MAX		(SET_ | _MAX)
+#define GET_MAX		(GET_ | _MAX)
+#define SET_RES		(SET_ | _RES)
+#define GET_RES		(GET_ | _RES)
+#define SET_MEM		(SET_ | _MEM)
+#define GET_MEM		(GET_ | _MEM)
+
+#define GET_STAT	0xff
+
+#define USB_AC_TERMINAL_UNDEFINED	0x100
+#define USB_AC_TERMINAL_STREAMING	0x101
+#define USB_AC_TERMINAL_VENDOR_SPEC	0x1FF
+
+/* Terminal Control Selectors */
 /* 4.3.2  Class-Specific AC Interface Descriptor */
 struct usb_ac_header_descriptor {
-	__u8  bLength;			/* 8+n */
+	__u8  bLength;			/* 8 + n */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* USB_MS_HEADER */
 	__le16 bcdADC;			/* 0x0100 */
@@ -36,7 +101,7 @@ struct usb_ac_header_descriptor {
 	__u8  baInterfaceNr[];		/* [n] */
 } __attribute__ ((packed));
 
-#define USB_DT_AC_HEADER_SIZE(n)	(8+(n))
+#define USB_DT_AC_HEADER_SIZE(n)	(8 + (n))
 
 /* As above, but more useful for defining your own descriptors: */
 #define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) 			\
@@ -50,4 +115,200 @@ struct usb_ac_header_descriptor_##n {				\
 	__u8  baInterfaceNr[n];					\
 } __attribute__ ((packed))
 
+/* 4.3.2.1 Input Terminal Descriptor */
+struct usb_input_terminal_descriptor {
+	__u8  bLength;			/* in bytes: 12 */
+	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
+	__u8  bDescriptorSubtype;	/* INPUT_TERMINAL descriptor subtype */
+	__u8  bTerminalID;		/* Constant uniquely terminal ID */
+	__le16 wTerminalType;		/* USB Audio Terminal Types */
+	__u8  bAssocTerminal;		/* ID of the Output Terminal associated */
+	__u8  bNrChannels;		/* Number of logical output channels */
+	__le16 wChannelConfig;
+	__u8  iChannelNames;
+	__u8  iTerminal;
+} __attribute__ ((packed));
+
+#define USB_DT_AC_INPUT_TERMINAL_SIZE			12
+
+#define USB_AC_INPUT_TERMINAL_UNDEFINED			0x200
+#define USB_AC_INPUT_TERMINAL_MICROPHONE		0x201
+#define USB_AC_INPUT_TERMINAL_DESKTOP_MICROPHONE	0x202
+#define USB_AC_INPUT_TERMINAL_PERSONAL_MICROPHONE	0x203
+#define USB_AC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE	0x204
+#define USB_AC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
+#define USB_AC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
+
+/* 4.3.2.2 Output Terminal Descriptor */
+struct usb_output_terminal_descriptor {
+	__u8  bLength;			/* in bytes: 9 */
+	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
+	__u8  bDescriptorSubtype;	/* OUTPUT_TERMINAL descriptor subtype */
+	__u8  bTerminalID;		/* Constant uniquely terminal ID */
+	__le16 wTerminalType;		/* USB Audio Terminal Types */
+	__u8  bAssocTerminal;		/* ID of the Input Terminal associated */
+	__u8  bSourceID;		/* ID of the connected Unit or Terminal*/
+	__u8  iTerminal;
+} __attribute__ ((packed));
+
+#define USB_DT_AC_OUTPUT_TERMINAL_SIZE				9
+
+#define USB_AC_OUTPUT_TERMINAL_UNDEFINED			0x300
+#define USB_AC_OUTPUT_TERMINAL_SPEAKER				0x301
+#define USB_AC_OUTPUT_TERMINAL_HEADPHONES			0x302
+#define USB_AC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
+#define USB_AC_OUTPUT_TERMINAL_DESKTOP_SPEAKER			0x304
+#define USB_AC_OUTPUT_TERMINAL_ROOM_SPEAKER			0x305
+#define USB_AC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER		0x306
+#define USB_AC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER		0x307
+
+/* Set bControlSize = 2 as default setting */
+#define USB_DT_AC_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
+struct usb_ac_feature_unit_descriptor_##ch {			\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bUnitID;						\
+	__u8  bSourceID;					\
+	__u8  bControlSize;					\
+	__le16 bmaControls[ch + 1];				\
+	__u8  iFeature;						\
+} __attribute__ ((packed))
+
+/* 4.5.2 Class-Specific AS Interface Descriptor */
+struct usb_as_header_descriptor {
+	__u8  bLength;			/* in bytes: 7 */
+	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
+	__u8  bDescriptorSubtype;	/* AS_GENERAL */
+	__u8  bTerminalLink;		/* Terminal ID of connected Terminal */
+	__u8  bDelay;			/* Delay introduced by the data path */
+	__le16 wFormatTag;		/* The Audio Data Format */
+} __attribute__ ((packed));
+
+#define USB_DT_AS_HEADER_SIZE		7
+
+#define USB_AS_AUDIO_FORMAT_TYPE_I_UNDEFINED	0x0
+#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM		0x1
+#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM8		0x2
+#define USB_AS_AUDIO_FORMAT_TYPE_I_IEEE_FLOAT	0x3
+#define USB_AS_AUDIO_FORMAT_TYPE_I_ALAW		0x4
+#define USB_AS_AUDIO_FORMAT_TYPE_I_MULAW	0x5
+
+struct usb_as_format_type_i_continuous_descriptor {
+	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
+	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
+	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
+	__u8  bFormatType;		/* FORMAT_TYPE_1 */
+	__u8  bNrChannels;		/* physical channels in the stream */
+	__u8  bSubframeSize;		/* */
+	__u8  bBitResolution;
+	__u8  bSamFreqType;
+	__u8  tLowerSamFreq[3];
+	__u8  tUpperSamFreq[3];
+} __attribute__ ((packed));
+
+#define USB_AS_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
+
+struct usb_as_formate_type_i_discrete_descriptor {
+	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
+	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
+	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
+	__u8  bFormatType;		/* FORMAT_TYPE_1 */
+	__u8  bNrChannels;		/* physical channels in the stream */
+	__u8  bSubframeSize;		/* */
+	__u8  bBitResolution;
+	__u8  bSamFreqType;
+	__u8  tSamFreq[][3];
+} __attribute__ ((packed));
+
+#define DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
+struct usb_as_formate_type_i_discrete_descriptor_##n {		\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bFormatType;					\
+	__u8  bNrChannels;					\
+	__u8  bSubframeSize;					\
+	__u8  bBitResolution;					\
+	__u8  bSamFreqType;					\
+	__u8  tSamFreq[n][3];					\
+} __attribute__ ((packed))
+
+#define USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
+
+#define USB_AS_FORMAT_TYPE_UNDEFINED	0x0
+#define USB_AS_FORMAT_TYPE_I		0x1
+#define USB_AS_FORMAT_TYPE_II		0x2
+#define USB_AS_FORMAT_TYPE_III		0x3
+
+#define USB_AS_ENDPOINT_ASYNC		(1 << 2)
+#define USB_AS_ENDPOINT_ADAPTIVE	(2 << 2)
+#define USB_AS_ENDPOINT_SYNC		(3 << 2)
+
+struct usb_as_iso_endpoint_descriptor {
+	__u8  bLength;			/* in bytes: 7 */
+	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
+	__u8  bDescriptorSubtype;	/* EP_GENERAL */
+	__u8  bmAttributes;
+	__u8  bLockDelayUnits;
+	__le16 wLockDelay;
+};
+#define USB_AS_ISO_ENDPOINT_DESC_SIZE	7
+
+#define FU_CONTROL_UNDEFINED		0x00
+#define MUTE_CONTROL			0x01
+#define VOLUME_CONTROL			0x02
+#define BASS_CONTROL			0x03
+#define MID_CONTROL			0x04
+#define TREBLE_CONTROL			0x05
+#define GRAPHIC_EQUALIZER_CONTROL	0x06
+#define AUTOMATIC_GAIN_CONTROL		0x07
+#define DELAY_CONTROL			0x08
+#define BASS_BOOST_CONTROL		0x09
+#define LOUDNESS_CONTROL		0x0a
+
+#define FU_MUTE		(1 << (MUTE_CONTROL - 1))
+#define FU_VOLUME	(1 << (VOLUME_CONTROL - 1))
+#define FU_BASS		(1 << (BASS_CONTROL - 1))
+#define FU_MID		(1 << (MID_CONTROL - 1))
+#define FU_TREBLE	(1 << (TREBLE_CONTROL - 1))
+#define FU_GRAPHIC_EQ	(1 << (GRAPHIC_EQUALIZER_CONTROL - 1))
+#define FU_AUTO_GAIN	(1 << (AUTOMATIC_GAIN_CONTROL - 1))
+#define FU_DELAY	(1 << (DELAY_CONTROL - 1))
+#define FU_BASS_BOOST	(1 << (BASS_BOOST_CONTROL - 1))
+#define FU_LOUDNESS	(1 << (LOUDNESS_CONTROL - 1))
+
+struct usb_audio_control {
+	struct list_head list;
+	const char *name;
+	u8 type;
+	int data[5];
+	int (*set)(struct usb_audio_control *con, u8 cmd, int value);
+	int (*get)(struct usb_audio_control *con, u8 cmd);
+};
+
+static inline int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
+{
+	con->data[cmd] = value;
+
+	return 0;
+}
+
+static inline int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
+{
+	return con->data[cmd];
+}
+
+struct usb_audio_control_selector {
+	struct list_head list;
+	struct list_head control;
+	u8 id;
+	const char *name;
+	u8 type;
+	struct usb_descriptor_header *desc;
+};
+
 #endif /* __LINUX_USB_AUDIO_H */
-- 
cgit v1.2.3-70-g09d2


From c706ebdfc8955b850e477255a8c0f93f9f14712d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 2 Jun 2009 11:54:11 -0400
Subject: USB: usb-serial: call port_probe and port_remove at the right times

This patch (as1253) prevents the usb-serial core from calling a
driver's port_probe and port_remove methods more than once per port.
It also removes some unnecessary try_module_get() calls and adds a
missing port_remove method call in a failure path.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/bus.c        | 27 +++++++++++----------------
 drivers/usb/serial/usb-serial.c | 24 ++++++++++++++++++++++--
 include/linux/usb/serial.h      |  8 ++++++++
 3 files changed, 41 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/bus.c b/drivers/usb/serial/bus.c
index 83bbb5bca2e..ba555c528cc 100644
--- a/drivers/usb/serial/bus.c
+++ b/drivers/usb/serial/bus.c
@@ -59,23 +59,22 @@ static int usb_serial_device_probe(struct device *dev)
 		retval = -ENODEV;
 		goto exit;
 	}
+	if (port->dev_state != PORT_REGISTERING)
+		goto exit;
 
 	driver = port->serial->type;
 	if (driver->port_probe) {
-		if (!try_module_get(driver->driver.owner)) {
-			dev_err(dev, "module get failed, exiting\n");
-			retval = -EIO;
-			goto exit;
-		}
 		retval = driver->port_probe(port);
-		module_put(driver->driver.owner);
 		if (retval)
 			goto exit;
 	}
 
 	retval = device_create_file(dev, &dev_attr_port_number);
-	if (retval)
+	if (retval) {
+		if (driver->port_remove)
+			retval = driver->port_remove(port);
 		goto exit;
+	}
 
 	minor = port->number;
 	tty_register_device(usb_serial_tty_driver, minor, dev);
@@ -98,19 +97,15 @@ static int usb_serial_device_remove(struct device *dev)
 	if (!port)
 		return -ENODEV;
 
+	if (port->dev_state != PORT_UNREGISTERING)
+		return retval;
+
 	device_remove_file(&port->dev, &dev_attr_port_number);
 
 	driver = port->serial->type;
-	if (driver->port_remove) {
-		if (!try_module_get(driver->driver.owner)) {
-			dev_err(dev, "module get failed, exiting\n");
-			retval = -EIO;
-			goto exit;
-		}
+	if (driver->port_remove)
 		retval = driver->port_remove(port);
-		module_put(driver->driver.owner);
-	}
-exit:
+
 	minor = port->number;
 	tty_unregister_device(usb_serial_tty_driver, minor);
 	dev_info(dev, "%s converter now disconnected from ttyUSB%d\n",
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 1967a7edc10..da890f030fa 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1046,10 +1046,15 @@ int usb_serial_probe(struct usb_interface *interface,
 
 		dev_set_name(&port->dev, "ttyUSB%d", port->number);
 		dbg ("%s - registering %s", __func__, dev_name(&port->dev));
+		port->dev_state = PORT_REGISTERING;
 		retval = device_register(&port->dev);
-		if (retval)
+		if (retval) {
 			dev_err(&port->dev, "Error registering port device, "
 				"continuing\n");
+			port->dev_state = PORT_UNREGISTERED;
+		} else {
+			port->dev_state = PORT_REGISTERED;
+		}
 	}
 
 	usb_serial_console_init(debug, minor);
@@ -1130,7 +1135,22 @@ void usb_serial_disconnect(struct usb_interface *interface)
 			}
 			kill_traffic(port);
 			cancel_work_sync(&port->work);
-			device_del(&port->dev);
+			if (port->dev_state == PORT_REGISTERED) {
+
+				/* Make sure the port is bound so that the
+				 * driver's port_remove method is called.
+				 */
+				if (!port->dev.driver) {
+					int rc;
+
+					port->dev.driver =
+							&serial->type->driver;
+					rc = device_bind_driver(&port->dev);
+				}
+				port->dev_state = PORT_UNREGISTERING;
+				device_del(&port->dev);
+				port->dev_state = PORT_UNREGISTERED;
+			}
 		}
 	}
 	serial->type->shutdown(serial);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index e29ebcf3287..ed4aa0fa7ed 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -27,6 +27,13 @@
 /* parity check flag */
 #define RELEVANT_IFLAG(iflag)	(iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
 
+enum port_dev_state {
+	PORT_UNREGISTERED,
+	PORT_REGISTERING,
+	PORT_REGISTERED,
+	PORT_UNREGISTERING,
+};
+
 /**
  * usb_serial_port: structure for the specific ports of a device.
  * @serial: pointer back to the struct usb_serial owner of this port.
@@ -102,6 +109,7 @@ struct usb_serial_port {
 	char			console;
 	unsigned long		sysrq; /* sysrq timeout */
 	struct device		dev;
+	enum port_dev_state	dev_state;
 };
 #define to_usb_serial_port(d) container_of(d, struct usb_serial_port, dev)
 
-- 
cgit v1.2.3-70-g09d2


From f9c99bb8b3a1ec81af68d484a551307326c2e933 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 2 Jun 2009 11:53:55 -0400
Subject: USB: usb-serial: replace shutdown with disconnect, release

This patch (as1254) splits up the shutdown method of usb_serial_driver
into a disconnect and a release method.

The problem is that the usb-serial core was calling shutdown during
disconnect handling, but drivers didn't expect it to be called until
after all the open file references had been closed.  The result was an
oops when the close method tried to use memory that had been
deallocated by shutdown.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/uc2322/aten2011.c     |  4 ++--
 drivers/usb/serial/aircable.c         |  5 ++---
 drivers/usb/serial/belkin_sa.c        |  7 +++---
 drivers/usb/serial/cp210x.c           |  6 ++---
 drivers/usb/serial/cyberjack.c        | 20 ++++++++++++-----
 drivers/usb/serial/cypress_m8.c       | 11 +++++----
 drivers/usb/serial/digi_acceleport.c  | 20 ++++++++++++-----
 drivers/usb/serial/empeg.c            |  8 -------
 drivers/usb/serial/ftdi_sio.c         | 14 ------------
 drivers/usb/serial/garmin_gps.c       | 16 ++++++++++---
 drivers/usb/serial/generic.c          |  9 ++++++--
 drivers/usb/serial/io_edgeport.c      | 29 ++++++++++++++++--------
 drivers/usb/serial/io_tables.h        | 12 ++++++----
 drivers/usb/serial/io_ti.c            | 22 +++++++++++++-----
 drivers/usb/serial/ipaq.c             |  7 ------
 drivers/usb/serial/iuu_phoenix.c      |  6 ++---
 drivers/usb/serial/keyspan.c          | 13 ++++++++++-
 drivers/usb/serial/keyspan.h          | 12 ++++++----
 drivers/usb/serial/keyspan_pda.c      |  4 ++--
 drivers/usb/serial/kl5kusb105.c       | 39 ++++++++++++++++++--------------
 drivers/usb/serial/kobil_sct.c        | 12 ++++------
 drivers/usb/serial/mct_u232.c         | 13 +++++------
 drivers/usb/serial/mos7720.c          |  9 +++-----
 drivers/usb/serial/mos7840.c          | 42 ++++++++++++++++++++++++++++++-----
 drivers/usb/serial/omninet.c          | 19 ++++++++++++----
 drivers/usb/serial/opticon.c          | 14 +++++++++---
 drivers/usb/serial/option.c           | 17 +++++++++-----
 drivers/usb/serial/oti6858.c          |  7 +++---
 drivers/usb/serial/pl2303.c           |  5 ++---
 drivers/usb/serial/sierra.c           |  4 ++--
 drivers/usb/serial/spcp8x5.c          |  5 ++---
 drivers/usb/serial/symbolserial.c     | 14 +++++++++---
 drivers/usb/serial/ti_usb_3410_5052.c | 10 ++++-----
 drivers/usb/serial/usb-serial.c       | 29 +++++++++++-------------
 drivers/usb/serial/visor.c            | 13 +++++------
 drivers/usb/serial/whiteheat.c        |  6 ++---
 include/linux/usb/serial.h            | 12 ++++++----
 37 files changed, 297 insertions(+), 198 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/uc2322/aten2011.c b/drivers/staging/uc2322/aten2011.c
index 9c62f787cc9..39d0926d1a9 100644
--- a/drivers/staging/uc2322/aten2011.c
+++ b/drivers/staging/uc2322/aten2011.c
@@ -2336,7 +2336,7 @@ static int ATEN2011_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void ATEN2011_shutdown(struct usb_serial *serial)
+static void ATEN2011_release(struct usb_serial *serial)
 {
 	int i;
 	struct ATENINTL_port *ATEN2011_port;
@@ -2382,7 +2382,7 @@ static struct usb_serial_driver aten_serial_driver = {
 	.tiocmget =		ATEN2011_tiocmget,
 	.tiocmset =		ATEN2011_tiocmset,
 	.attach =		ATEN2011_startup,
-	.shutdown =		ATEN2011_shutdown,
+	.release =		ATEN2011_release,
 	.read_bulk_callback =	ATEN2011_bulk_in_callback,
 	.read_int_callback =	ATEN2011_interrupt_callback,
 };
diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index 6d106e74265..2cbfab3716e 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -364,7 +364,7 @@ static int aircable_attach(struct usb_serial *serial)
 	return 0;
 }
 
-static void aircable_shutdown(struct usb_serial *serial)
+static void aircable_release(struct usb_serial *serial)
 {
 
 	struct usb_serial_port *port = serial->port[0];
@@ -375,7 +375,6 @@ static void aircable_shutdown(struct usb_serial *serial)
 	if (priv) {
 		serial_buf_free(priv->tx_buf);
 		serial_buf_free(priv->rx_buf);
-		usb_set_serial_port_data(port, NULL);
 		kfree(priv);
 	}
 }
@@ -601,7 +600,7 @@ static struct usb_serial_driver aircable_device = {
 	.num_ports =		1,
 	.attach =		aircable_attach,
 	.probe =		aircable_probe,
-	.shutdown =		aircable_shutdown,
+	.release =		aircable_release,
 	.write =		aircable_write,
 	.write_room =		aircable_write_room,
 	.write_bulk_callback =	aircable_write_bulk_callback,
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 2bfd6dd85b5..7033b031b44 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -90,7 +90,7 @@ static int debug;
 
 /* function prototypes for a Belkin USB Serial Adapter F5U103 */
 static int  belkin_sa_startup(struct usb_serial *serial);
-static void belkin_sa_shutdown(struct usb_serial *serial);
+static void belkin_sa_release(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void belkin_sa_close(struct usb_serial_port *port);
@@ -142,7 +142,7 @@ static struct usb_serial_driver belkin_device = {
 	.tiocmget =		belkin_sa_tiocmget,
 	.tiocmset =		belkin_sa_tiocmset,
 	.attach =		belkin_sa_startup,
-	.shutdown =		belkin_sa_shutdown,
+	.release =		belkin_sa_release,
 };
 
 
@@ -197,14 +197,13 @@ static int belkin_sa_startup(struct usb_serial *serial)
 }
 
 
-static void belkin_sa_shutdown(struct usb_serial *serial)
+static void belkin_sa_release(struct usb_serial *serial)
 {
 	struct belkin_sa_private *priv;
 	int i;
 
 	dbg("%s", __func__);
 
-	/* stop reads and writes on all ports */
 	for (i = 0; i < serial->num_ports; ++i) {
 		/* My special items, the standard routines free my urbs */
 		priv = usb_get_serial_port_data(serial->port[i]);
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 16a154d3b2f..2b9eeda62bf 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -50,7 +50,7 @@ static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *,
 		unsigned int, unsigned int);
 static void cp210x_break_ctl(struct tty_struct *, int);
 static int cp210x_startup(struct usb_serial *);
-static void cp210x_shutdown(struct usb_serial *);
+static void cp210x_disconnect(struct usb_serial *);
 
 static int debug;
 
@@ -137,7 +137,7 @@ static struct usb_serial_driver cp210x_device = {
 	.tiocmget 		= cp210x_tiocmget,
 	.tiocmset		= cp210x_tiocmset,
 	.attach			= cp210x_startup,
-	.shutdown		= cp210x_shutdown,
+	.disconnect		= cp210x_disconnect,
 };
 
 /* Config request types */
@@ -792,7 +792,7 @@ static int cp210x_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void cp210x_shutdown(struct usb_serial *serial)
+static void cp210x_disconnect(struct usb_serial *serial)
 {
 	int i;
 
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 933ba913e66..336523fd736 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -58,7 +58,8 @@ static int debug;
 
 /* Function prototypes */
 static int cyberjack_startup(struct usb_serial *serial);
-static void cyberjack_shutdown(struct usb_serial *serial);
+static void cyberjack_disconnect(struct usb_serial *serial);
+static void cyberjack_release(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void cyberjack_close(struct usb_serial_port *port);
@@ -94,7 +95,8 @@ static struct usb_serial_driver cyberjack_device = {
 	.id_table =		id_table,
 	.num_ports =		1,
 	.attach =		cyberjack_startup,
-	.shutdown =		cyberjack_shutdown,
+	.disconnect =		cyberjack_disconnect,
+	.release =		cyberjack_release,
 	.open =			cyberjack_open,
 	.close =		cyberjack_close,
 	.write =		cyberjack_write,
@@ -148,17 +150,25 @@ static int cyberjack_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void cyberjack_shutdown(struct usb_serial *serial)
+static void cyberjack_disconnect(struct usb_serial *serial)
 {
 	int i;
 
 	dbg("%s", __func__);
 
-	for (i = 0; i < serial->num_ports; ++i) {
+	for (i = 0; i < serial->num_ports; ++i)
 		usb_kill_urb(serial->port[i]->interrupt_in_urb);
+}
+
+static void cyberjack_release(struct usb_serial *serial)
+{
+	int i;
+
+	dbg("%s", __func__);
+
+	for (i = 0; i < serial->num_ports; ++i) {
 		/* My special items, the standard routines free my urbs */
 		kfree(usb_get_serial_port_data(serial->port[i]));
-		usb_set_serial_port_data(serial->port[i], NULL);
 	}
 }
 
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 669f9384853..9734085fd2f 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -171,7 +171,7 @@ struct cypress_buf {
 static int  cypress_earthmate_startup(struct usb_serial *serial);
 static int  cypress_hidcom_startup(struct usb_serial *serial);
 static int  cypress_ca42v2_startup(struct usb_serial *serial);
-static void cypress_shutdown(struct usb_serial *serial);
+static void cypress_release(struct usb_serial *serial);
 static int  cypress_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void cypress_close(struct usb_serial_port *port);
@@ -215,7 +215,7 @@ static struct usb_serial_driver cypress_earthmate_device = {
 	.id_table =			id_table_earthmate,
 	.num_ports =			1,
 	.attach =			cypress_earthmate_startup,
-	.shutdown =			cypress_shutdown,
+	.release =			cypress_release,
 	.open =				cypress_open,
 	.close =			cypress_close,
 	.dtr_rts =			cypress_dtr_rts,
@@ -242,7 +242,7 @@ static struct usb_serial_driver cypress_hidcom_device = {
 	.id_table =			id_table_cyphidcomrs232,
 	.num_ports =			1,
 	.attach =			cypress_hidcom_startup,
-	.shutdown =			cypress_shutdown,
+	.release =			cypress_release,
 	.open =				cypress_open,
 	.close =			cypress_close,
 	.dtr_rts =			cypress_dtr_rts,
@@ -269,7 +269,7 @@ static struct usb_serial_driver cypress_ca42v2_device = {
 	.id_table =			id_table_nokiaca42v2,
 	.num_ports =			1,
 	.attach =			cypress_ca42v2_startup,
-	.shutdown =			cypress_shutdown,
+	.release =			cypress_release,
 	.open =				cypress_open,
 	.close =			cypress_close,
 	.dtr_rts =			cypress_dtr_rts,
@@ -616,7 +616,7 @@ static int cypress_ca42v2_startup(struct usb_serial *serial)
 } /* cypress_ca42v2_startup */
 
 
-static void cypress_shutdown(struct usb_serial *serial)
+static void cypress_release(struct usb_serial *serial)
 {
 	struct cypress_private *priv;
 
@@ -629,7 +629,6 @@ static void cypress_shutdown(struct usb_serial *serial)
 	if (priv) {
 		cypress_buf_free(priv->buf);
 		kfree(priv);
-		usb_set_serial_port_data(serial->port[0], NULL);
 	}
 }
 
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 30f5140eff0..f4808091c47 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -460,7 +460,8 @@ static int digi_carrier_raised(struct usb_serial_port *port);
 static void digi_dtr_rts(struct usb_serial_port *port, int on);
 static int digi_startup_device(struct usb_serial *serial);
 static int digi_startup(struct usb_serial *serial);
-static void digi_shutdown(struct usb_serial *serial);
+static void digi_disconnect(struct usb_serial *serial);
+static void digi_release(struct usb_serial *serial);
 static void digi_read_bulk_callback(struct urb *urb);
 static int digi_read_inb_callback(struct urb *urb);
 static int digi_read_oob_callback(struct urb *urb);
@@ -524,7 +525,8 @@ static struct usb_serial_driver digi_acceleport_2_device = {
 	.tiocmget =			digi_tiocmget,
 	.tiocmset =			digi_tiocmset,
 	.attach =			digi_startup,
-	.shutdown =			digi_shutdown,
+	.disconnect =			digi_disconnect,
+	.release =			digi_release,
 };
 
 static struct usb_serial_driver digi_acceleport_4_device = {
@@ -550,7 +552,8 @@ static struct usb_serial_driver digi_acceleport_4_device = {
 	.tiocmget =			digi_tiocmget,
 	.tiocmset =			digi_tiocmset,
 	.attach =			digi_startup,
-	.shutdown =			digi_shutdown,
+	.disconnect =			digi_disconnect,
+	.release =			digi_release,
 };
 
 
@@ -1556,16 +1559,23 @@ static int digi_startup(struct usb_serial *serial)
 }
 
 
-static void digi_shutdown(struct usb_serial *serial)
+static void digi_disconnect(struct usb_serial *serial)
 {
 	int i;
-	dbg("digi_shutdown: TOP, in_interrupt()=%ld", in_interrupt());
+	dbg("digi_disconnect: TOP, in_interrupt()=%ld", in_interrupt());
 
 	/* stop reads and writes on all ports */
 	for (i = 0; i < serial->type->num_ports + 1; i++) {
 		usb_kill_urb(serial->port[i]->read_urb);
 		usb_kill_urb(serial->port[i]->write_urb);
 	}
+}
+
+
+static void digi_release(struct usb_serial *serial)
+{
+	int i;
+	dbg("digi_release: TOP, in_interrupt()=%ld", in_interrupt());
 
 	/* free the private data structures for all ports */
 	/* number of regular ports + 1 for the out-of-band port */
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index 2b141ccb0cd..80cb3471adb 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -90,7 +90,6 @@ static int  empeg_chars_in_buffer(struct tty_struct *tty);
 static void empeg_throttle(struct tty_struct *tty);
 static void empeg_unthrottle(struct tty_struct *tty);
 static int  empeg_startup(struct usb_serial *serial);
-static void empeg_shutdown(struct usb_serial *serial);
 static void empeg_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios);
 static void empeg_write_bulk_callback(struct urb *urb);
@@ -124,7 +123,6 @@ static struct usb_serial_driver empeg_device = {
 	.throttle =		empeg_throttle,
 	.unthrottle =		empeg_unthrottle,
 	.attach =		empeg_startup,
-	.shutdown =		empeg_shutdown,
 	.set_termios =		empeg_set_termios,
 	.write =		empeg_write,
 	.write_room =		empeg_write_room,
@@ -427,12 +425,6 @@ static int  empeg_startup(struct usb_serial *serial)
 }
 
 
-static void empeg_shutdown(struct usb_serial *serial)
-{
-	dbg("%s", __func__);
-}
-
-
 static void empeg_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index fc423583eed..3dc3768ca71 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -720,7 +720,6 @@ static const char *ftdi_chip_name[] = {
 /* function prototypes for a FTDI serial converter */
 static int  ftdi_sio_probe(struct usb_serial *serial,
 					const struct usb_device_id *id);
-static void ftdi_shutdown(struct usb_serial *serial);
 static int  ftdi_sio_port_probe(struct usb_serial_port *port);
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
 static int  ftdi_open(struct tty_struct *tty,
@@ -779,7 +778,6 @@ static struct usb_serial_driver ftdi_sio_device = {
 	.ioctl =		ftdi_ioctl,
 	.set_termios =		ftdi_set_termios,
 	.break_ctl =		ftdi_break_ctl,
-	.shutdown =		ftdi_shutdown,
 };
 
 
@@ -1594,18 +1592,6 @@ static int ftdi_mtxorb_hack_setup(struct usb_serial *serial)
 	return 0;
 }
 
-/* ftdi_shutdown is called from usbserial:usb_serial_disconnect
- *   it is called when the usb device is disconnected
- *
- *   usbserial:usb_serial_disconnect
- *      calls __serial_close for each open of the port
- *      shutdown is called then (ie ftdi_shutdown)
- */
-static void ftdi_shutdown(struct usb_serial *serial)
-{
-	dbg("%s", __func__);
-}
-
 static void ftdi_sio_priv_release(struct kref *k)
 {
 	struct ftdi_private *priv = container_of(k, struct ftdi_private, kref);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 5092d6aba65..8839f1c70b7 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -1475,7 +1475,7 @@ static int garmin_attach(struct usb_serial *serial)
 }
 
 
-static void garmin_shutdown(struct usb_serial *serial)
+static void garmin_disconnect(struct usb_serial *serial)
 {
 	struct usb_serial_port *port = serial->port[0];
 	struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
@@ -1484,8 +1484,17 @@ static void garmin_shutdown(struct usb_serial *serial)
 
 	usb_kill_urb(port->interrupt_in_urb);
 	del_timer_sync(&garmin_data_p->timer);
+}
+
+
+static void garmin_release(struct usb_serial *serial)
+{
+	struct usb_serial_port *port = serial->port[0];
+	struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
+
+	dbg("%s", __func__);
+
 	kfree(garmin_data_p);
-	usb_set_serial_port_data(port, NULL);
 }
 
 
@@ -1504,7 +1513,8 @@ static struct usb_serial_driver garmin_device = {
 	.throttle            = garmin_throttle,
 	.unthrottle          = garmin_unthrottle,
 	.attach              = garmin_attach,
-	.shutdown            = garmin_shutdown,
+	.disconnect          = garmin_disconnect,
+	.release             = garmin_release,
 	.write               = garmin_write,
 	.write_room          = garmin_write_room,
 	.write_bulk_callback = garmin_write_bulk_callback,
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 8dabac1929b..932d6241b78 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -63,7 +63,8 @@ struct usb_serial_driver usb_serial_generic_device = {
 	.id_table =		generic_device_ids,
 	.usb_driver = 		&generic_driver,
 	.num_ports =		1,
-	.shutdown =		usb_serial_generic_shutdown,
+	.disconnect =		usb_serial_generic_disconnect,
+	.release =		usb_serial_generic_release,
 	.throttle =		usb_serial_generic_throttle,
 	.unthrottle =		usb_serial_generic_unthrottle,
 	.resume =		usb_serial_generic_resume,
@@ -551,7 +552,7 @@ int usb_serial_handle_break(struct usb_serial_port *port)
 }
 EXPORT_SYMBOL_GPL(usb_serial_handle_break);
 
-void usb_serial_generic_shutdown(struct usb_serial *serial)
+void usb_serial_generic_disconnect(struct usb_serial *serial)
 {
 	int i;
 
@@ -562,3 +563,7 @@ void usb_serial_generic_shutdown(struct usb_serial *serial)
 		generic_cleanup(serial->port[i]);
 }
 
+void usb_serial_generic_release(struct usb_serial *serial)
+{
+	dbg("%s", __func__);
+}
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 53ef5996e33..0191693625d 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -224,7 +224,8 @@ static int  edge_tiocmget(struct tty_struct *tty, struct file *file);
 static int  edge_tiocmset(struct tty_struct *tty, struct file *file,
 					unsigned int set, unsigned int clear);
 static int  edge_startup(struct usb_serial *serial);
-static void edge_shutdown(struct usb_serial *serial);
+static void edge_disconnect(struct usb_serial *serial);
+static void edge_release(struct usb_serial *serial);
 
 #include "io_tables.h"	/* all of the devices that this driver supports */
 
@@ -3193,21 +3194,16 @@ static int edge_startup(struct usb_serial *serial)
 
 
 /****************************************************************************
- * edge_shutdown
+ * edge_disconnect
  *	This function is called whenever the device is removed from the usb bus.
  ****************************************************************************/
-static void edge_shutdown(struct usb_serial *serial)
+static void edge_disconnect(struct usb_serial *serial)
 {
 	struct edgeport_serial *edge_serial = usb_get_serial_data(serial);
-	int i;
 
 	dbg("%s", __func__);
 
 	/* stop reads and writes on all ports */
-	for (i = 0; i < serial->num_ports; ++i) {
-		kfree(usb_get_serial_port_data(serial->port[i]));
-		usb_set_serial_port_data(serial->port[i],  NULL);
-	}
 	/* free up our endpoint stuff */
 	if (edge_serial->is_epic) {
 		usb_kill_urb(edge_serial->interrupt_read_urb);
@@ -3218,9 +3214,24 @@ static void edge_shutdown(struct usb_serial *serial)
 		usb_free_urb(edge_serial->read_urb);
 		kfree(edge_serial->bulk_in_buffer);
 	}
+}
+
+
+/****************************************************************************
+ * edge_release
+ *	This function is called when the device structure is deallocated.
+ ****************************************************************************/
+static void edge_release(struct usb_serial *serial)
+{
+	struct edgeport_serial *edge_serial = usb_get_serial_data(serial);
+	int i;
+
+	dbg("%s", __func__);
+
+	for (i = 0; i < serial->num_ports; ++i)
+		kfree(usb_get_serial_port_data(serial->port[i]));
 
 	kfree(edge_serial);
-	usb_set_serial_data(serial, NULL);
 }
 
 
diff --git a/drivers/usb/serial/io_tables.h b/drivers/usb/serial/io_tables.h
index 7eb9d67b81b..9241d314751 100644
--- a/drivers/usb/serial/io_tables.h
+++ b/drivers/usb/serial/io_tables.h
@@ -117,7 +117,8 @@ static struct usb_serial_driver edgeport_2port_device = {
 	.throttle		= edge_throttle,
 	.unthrottle		= edge_unthrottle,
 	.attach			= edge_startup,
-	.shutdown		= edge_shutdown,
+	.disconnect		= edge_disconnect,
+	.release		= edge_release,
 	.ioctl			= edge_ioctl,
 	.set_termios		= edge_set_termios,
 	.tiocmget		= edge_tiocmget,
@@ -145,7 +146,8 @@ static struct usb_serial_driver edgeport_4port_device = {
 	.throttle		= edge_throttle,
 	.unthrottle		= edge_unthrottle,
 	.attach			= edge_startup,
-	.shutdown		= edge_shutdown,
+	.disconnect		= edge_disconnect,
+	.release		= edge_release,
 	.ioctl			= edge_ioctl,
 	.set_termios		= edge_set_termios,
 	.tiocmget		= edge_tiocmget,
@@ -173,7 +175,8 @@ static struct usb_serial_driver edgeport_8port_device = {
 	.throttle		= edge_throttle,
 	.unthrottle		= edge_unthrottle,
 	.attach			= edge_startup,
-	.shutdown		= edge_shutdown,
+	.disconnect		= edge_disconnect,
+	.release		= edge_release,
 	.ioctl			= edge_ioctl,
 	.set_termios		= edge_set_termios,
 	.tiocmget		= edge_tiocmget,
@@ -200,7 +203,8 @@ static struct usb_serial_driver epic_device = {
 	.throttle		= edge_throttle,
 	.unthrottle		= edge_unthrottle,
 	.attach			= edge_startup,
-	.shutdown		= edge_shutdown,
+	.disconnect		= edge_disconnect,
+	.release		= edge_release,
 	.ioctl			= edge_ioctl,
 	.set_termios		= edge_set_termios,
 	.tiocmget		= edge_tiocmget,
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index db964db42d3..e8bc42f92e7 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2663,7 +2663,7 @@ cleanup:
 	return -ENOMEM;
 }
 
-static void edge_shutdown(struct usb_serial *serial)
+static void edge_disconnect(struct usb_serial *serial)
 {
 	int i;
 	struct edgeport_port *edge_port;
@@ -2673,12 +2673,22 @@ static void edge_shutdown(struct usb_serial *serial)
 	for (i = 0; i < serial->num_ports; ++i) {
 		edge_port = usb_get_serial_port_data(serial->port[i]);
 		edge_remove_sysfs_attrs(edge_port->port);
+	}
+}
+
+static void edge_release(struct usb_serial *serial)
+{
+	int i;
+	struct edgeport_port *edge_port;
+
+	dbg("%s", __func__);
+
+	for (i = 0; i < serial->num_ports; ++i) {
+		edge_port = usb_get_serial_port_data(serial->port[i]);
 		edge_buf_free(edge_port->ep_out_buf);
 		kfree(edge_port);
-		usb_set_serial_port_data(serial->port[i], NULL);
 	}
 	kfree(usb_get_serial_data(serial));
-	usb_set_serial_data(serial, NULL);
 }
 
 
@@ -2915,7 +2925,8 @@ static struct usb_serial_driver edgeport_1port_device = {
 	.throttle		= edge_throttle,
 	.unthrottle		= edge_unthrottle,
 	.attach			= edge_startup,
-	.shutdown		= edge_shutdown,
+	.disconnect		= edge_disconnect,
+	.release		= edge_release,
 	.port_probe		= edge_create_sysfs_attrs,
 	.ioctl			= edge_ioctl,
 	.set_termios		= edge_set_termios,
@@ -2944,7 +2955,8 @@ static struct usb_serial_driver edgeport_2port_device = {
 	.throttle		= edge_throttle,
 	.unthrottle		= edge_unthrottle,
 	.attach			= edge_startup,
-	.shutdown		= edge_shutdown,
+	.disconnect		= edge_disconnect,
+	.release		= edge_release,
 	.port_probe		= edge_create_sysfs_attrs,
 	.ioctl			= edge_ioctl,
 	.set_termios		= edge_set_termios,
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index c610a99fa47..2545d45ce16 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -79,7 +79,6 @@ static int  ipaq_open(struct tty_struct *tty,
 static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
-static void ipaq_shutdown(struct usb_serial *serial);
 static int ipaq_write(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
 static int ipaq_write_bulk(struct usb_serial_port *port,
@@ -576,7 +575,6 @@ static struct usb_serial_driver ipaq_device = {
 	.close =		ipaq_close,
 	.attach =		ipaq_startup,
 	.calc_num_ports =	ipaq_calc_num_ports,
-	.shutdown =		ipaq_shutdown,
 	.write =		ipaq_write,
 	.write_room =		ipaq_write_room,
 	.chars_in_buffer =	ipaq_chars_in_buffer,
@@ -990,11 +988,6 @@ static int ipaq_startup(struct usb_serial *serial)
 	return usb_reset_configuration(serial->dev);
 }
 
-static void ipaq_shutdown(struct usb_serial *serial)
-{
-	dbg("%s", __func__);
-}
-
 static int __init ipaq_init(void)
 {
 	int retval;
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 76a3cc327bb..96873a7a32b 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -121,8 +121,8 @@ static int iuu_startup(struct usb_serial *serial)
 	return 0;
 }
 
-/* Shutdown function */
-static void iuu_shutdown(struct usb_serial *serial)
+/* Release function */
+static void iuu_release(struct usb_serial *serial)
 {
 	struct usb_serial_port *port = serial->port[0];
 	struct iuu_private *priv = usb_get_serial_port_data(port);
@@ -1202,7 +1202,7 @@ static struct usb_serial_driver iuu_device = {
 	.tiocmset = iuu_tiocmset,
 	.set_termios = iuu_set_termios,
 	.attach = iuu_startup,
-	.shutdown = iuu_shutdown,
+	.release = iuu_release,
 };
 
 static int __init iuu_init(void)
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index f1195a98f31..2594b8743d3 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -2689,7 +2689,7 @@ static int keyspan_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void keyspan_shutdown(struct usb_serial *serial)
+static void keyspan_disconnect(struct usb_serial *serial)
 {
 	int				i, j;
 	struct usb_serial_port		*port;
@@ -2729,6 +2729,17 @@ static void keyspan_shutdown(struct usb_serial *serial)
 			usb_free_urb(p_priv->out_urbs[j]);
 		}
 	}
+}
+
+static void keyspan_release(struct usb_serial *serial)
+{
+	int				i;
+	struct usb_serial_port		*port;
+	struct keyspan_serial_private 	*s_priv;
+
+	dbg("%s", __func__);
+
+	s_priv = usb_get_serial_data(serial);
 
 	/*  dbg("Freeing serial->private."); */
 	kfree(s_priv);
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 0d4569b6076..3107ed15af6 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -41,7 +41,8 @@ static int  keyspan_open		(struct tty_struct *tty,
 static void keyspan_close		(struct usb_serial_port *port);
 static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
-static void keyspan_shutdown		(struct usb_serial *serial);
+static void keyspan_disconnect		(struct usb_serial *serial);
+static void keyspan_release		(struct usb_serial *serial);
 static int  keyspan_write_room		(struct tty_struct *tty);
 
 static int  keyspan_write		(struct tty_struct *tty,
@@ -569,7 +570,8 @@ static struct usb_serial_driver keyspan_1port_device = {
 	.tiocmget		= keyspan_tiocmget,
 	.tiocmset		= keyspan_tiocmset,
 	.attach			= keyspan_startup,
-	.shutdown		= keyspan_shutdown,
+	.disconnect		= keyspan_disconnect,
+	.release		= keyspan_release,
 };
 
 static struct usb_serial_driver keyspan_2port_device = {
@@ -590,7 +592,8 @@ static struct usb_serial_driver keyspan_2port_device = {
 	.tiocmget		= keyspan_tiocmget,
 	.tiocmset		= keyspan_tiocmset,
 	.attach			= keyspan_startup,
-	.shutdown		= keyspan_shutdown,
+	.disconnect		= keyspan_disconnect,
+	.release		= keyspan_release,
 };
 
 static struct usb_serial_driver keyspan_4port_device = {
@@ -611,7 +614,8 @@ static struct usb_serial_driver keyspan_4port_device = {
 	.tiocmget		= keyspan_tiocmget,
 	.tiocmset		= keyspan_tiocmset,
 	.attach			= keyspan_startup,
-	.shutdown		= keyspan_shutdown,
+	.disconnect		= keyspan_disconnect,
+	.release		= keyspan_release,
 };
 
 #endif
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index ab769dbea1b..d0b12e40c2b 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -809,7 +809,7 @@ static int keyspan_pda_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void keyspan_pda_shutdown(struct usb_serial *serial)
+static void keyspan_pda_release(struct usb_serial *serial)
 {
 	dbg("%s", __func__);
 
@@ -869,7 +869,7 @@ static struct usb_serial_driver keyspan_pda_device = {
 	.tiocmget =		keyspan_pda_tiocmget,
 	.tiocmset =		keyspan_pda_tiocmset,
 	.attach =		keyspan_pda_startup,
-	.shutdown =		keyspan_pda_shutdown,
+	.release =		keyspan_pda_release,
 };
 
 
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fa817c66b3e..0f44bb8e8d4 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -73,7 +73,8 @@ static int debug;
  * Function prototypes
  */
 static int  klsi_105_startup(struct usb_serial *serial);
-static void klsi_105_shutdown(struct usb_serial *serial);
+static void klsi_105_disconnect(struct usb_serial *serial);
+static void klsi_105_release(struct usb_serial *serial);
 static int  klsi_105_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void klsi_105_close(struct usb_serial_port *port);
@@ -131,7 +132,8 @@ static struct usb_serial_driver kl5kusb105d_device = {
 	.tiocmget =          klsi_105_tiocmget,
 	.tiocmset =          klsi_105_tiocmset,
 	.attach =	     klsi_105_startup,
-	.shutdown =	     klsi_105_shutdown,
+	.disconnect =	     klsi_105_disconnect,
+	.release =	     klsi_105_release,
 	.throttle =	     klsi_105_throttle,
 	.unthrottle =	     klsi_105_unthrottle,
 };
@@ -315,7 +317,7 @@ err_cleanup:
 } /* klsi_105_startup */
 
 
-static void klsi_105_shutdown(struct usb_serial *serial)
+static void klsi_105_disconnect(struct usb_serial *serial)
 {
 	int i;
 
@@ -325,33 +327,36 @@ static void klsi_105_shutdown(struct usb_serial *serial)
 	for (i = 0; i < serial->num_ports; ++i) {
 		struct klsi_105_private *priv =
 				usb_get_serial_port_data(serial->port[i]);
-		unsigned long flags;
 
 		if (priv) {
 			/* kill our write urb pool */
 			int j;
 			struct urb **write_urbs = priv->write_urb_pool;
-			spin_lock_irqsave(&priv->lock, flags);
 
 			for (j = 0; j < NUM_URBS; j++) {
 				if (write_urbs[j]) {
-					/* FIXME - uncomment the following
-					 * usb_kill_urb call when the host
-					 * controllers get fixed to set
-					 * urb->dev = NULL after the urb is
-					 * finished.  Otherwise this call
-					 * oopses. */
-					/* usb_kill_urb(write_urbs[j]); */
-					kfree(write_urbs[j]->transfer_buffer);
+					usb_kill_urb(write_urbs[j]);
 					usb_free_urb(write_urbs[j]);
 				}
 			}
-			spin_unlock_irqrestore(&priv->lock, flags);
-			kfree(priv);
-			usb_set_serial_port_data(serial->port[i], NULL);
 		}
 	}
-} /* klsi_105_shutdown */
+} /* klsi_105_disconnect */
+
+
+static void klsi_105_release(struct usb_serial *serial)
+{
+	int i;
+
+	dbg("%s", __func__);
+
+	for (i = 0; i < serial->num_ports; ++i) {
+		struct klsi_105_private *priv =
+				usb_get_serial_port_data(serial->port[i]);
+
+		kfree(priv);
+	}
+} /* klsi_105_release */
 
 static int  klsi_105_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 6b570498287..6db0e561f68 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -69,7 +69,7 @@ static int debug;
 
 /* Function prototypes */
 static int  kobil_startup(struct usb_serial *serial);
-static void kobil_shutdown(struct usb_serial *serial);
+static void kobil_release(struct usb_serial *serial);
 static int  kobil_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void kobil_close(struct usb_serial_port *port);
@@ -117,7 +117,7 @@ static struct usb_serial_driver kobil_device = {
 	.id_table =		id_table,
 	.num_ports =		1,
 	.attach =		kobil_startup,
-	.shutdown =		kobil_shutdown,
+	.release =		kobil_release,
 	.ioctl =		kobil_ioctl,
 	.set_termios =		kobil_set_termios,
 	.tiocmget =		kobil_tiocmget,
@@ -201,17 +201,13 @@ static int kobil_startup(struct usb_serial *serial)
 }
 
 
-static void kobil_shutdown(struct usb_serial *serial)
+static void kobil_release(struct usb_serial *serial)
 {
 	int i;
 	dbg("%s - port %d", __func__, serial->port[0]->number);
 
-	for (i = 0; i < serial->num_ports; ++i) {
-		while (serial->port[i]->port.count > 0)
-			kobil_close(serial->port[i]);
+	for (i = 0; i < serial->num_ports; ++i)
 		kfree(usb_get_serial_port_data(serial->port[i]));
-		usb_set_serial_port_data(serial->port[i], NULL);
-	}
 }
 
 
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index 873795548fc..d8825e159aa 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -92,7 +92,7 @@ static int debug;
  * Function prototypes
  */
 static int  mct_u232_startup(struct usb_serial *serial);
-static void mct_u232_shutdown(struct usb_serial *serial);
+static void mct_u232_release(struct usb_serial *serial);
 static int  mct_u232_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void mct_u232_close(struct usb_serial_port *port);
@@ -149,7 +149,7 @@ static struct usb_serial_driver mct_u232_device = {
 	.tiocmget =	     mct_u232_tiocmget,
 	.tiocmset =	     mct_u232_tiocmset,
 	.attach =	     mct_u232_startup,
-	.shutdown =	     mct_u232_shutdown,
+	.release =	     mct_u232_release,
 };
 
 
@@ -407,7 +407,7 @@ static int mct_u232_startup(struct usb_serial *serial)
 } /* mct_u232_startup */
 
 
-static void mct_u232_shutdown(struct usb_serial *serial)
+static void mct_u232_release(struct usb_serial *serial)
 {
 	struct mct_u232_private *priv;
 	int i;
@@ -417,12 +417,9 @@ static void mct_u232_shutdown(struct usb_serial *serial)
 	for (i = 0; i < serial->num_ports; ++i) {
 		/* My special items, the standard routines free my urbs */
 		priv = usb_get_serial_port_data(serial->port[i]);
-		if (priv) {
-			usb_set_serial_port_data(serial->port[i], NULL);
-			kfree(priv);
-		}
+		kfree(priv);
 	}
-} /* mct_u232_shutdown */
+} /* mct_u232_release */
 
 static int  mct_u232_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp)
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 9e1a013ee7f..bfc5ce000ef 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -1521,19 +1521,16 @@ static int mos7720_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void mos7720_shutdown(struct usb_serial *serial)
+static void mos7720_release(struct usb_serial *serial)
 {
 	int i;
 
 	/* free private structure allocated for serial port */
-	for (i = 0; i < serial->num_ports; ++i) {
+	for (i = 0; i < serial->num_ports; ++i)
 		kfree(usb_get_serial_port_data(serial->port[i]));
-		usb_set_serial_port_data(serial->port[i], NULL);
-	}
 
 	/* free private structure allocated for serial device */
 	kfree(usb_get_serial_data(serial));
-	usb_set_serial_data(serial, NULL);
 }
 
 static struct usb_driver usb_driver = {
@@ -1558,7 +1555,7 @@ static struct usb_serial_driver moschip7720_2port_driver = {
 	.throttle		= mos7720_throttle,
 	.unthrottle		= mos7720_unthrottle,
 	.attach			= mos7720_startup,
-	.shutdown		= mos7720_shutdown,
+	.release		= mos7720_release,
 	.ioctl			= mos7720_ioctl,
 	.set_termios		= mos7720_set_termios,
 	.write			= mos7720_write,
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 5fe9fe3df77..c40f95c1951 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -2633,16 +2633,16 @@ error:
 }
 
 /****************************************************************************
- * mos7840_shutdown
+ * mos7840_disconnect
  *	This function is called whenever the device is removed from the usb bus.
  ****************************************************************************/
 
-static void mos7840_shutdown(struct usb_serial *serial)
+static void mos7840_disconnect(struct usb_serial *serial)
 {
 	int i;
 	unsigned long flags;
 	struct moschip_port *mos7840_port;
-	dbg("%s", " shutdown :entering..........");
+	dbg("%s", " disconnect :entering..........");
 
 	if (!serial) {
 		dbg("%s", "Invalid Handler");
@@ -2662,11 +2662,42 @@ static void mos7840_shutdown(struct usb_serial *serial)
 			mos7840_port->zombie = 1;
 			spin_unlock_irqrestore(&mos7840_port->pool_lock, flags);
 			usb_kill_urb(mos7840_port->control_urb);
+		}
+	}
+
+	dbg("%s", "Thank u :: ");
+
+}
+
+/****************************************************************************
+ * mos7840_release
+ *	This function is called when the usb_serial structure is freed.
+ ****************************************************************************/
+
+static void mos7840_release(struct usb_serial *serial)
+{
+	int i;
+	struct moschip_port *mos7840_port;
+	dbg("%s", " release :entering..........");
+
+	if (!serial) {
+		dbg("%s", "Invalid Handler");
+		return;
+	}
+
+	/* check for the ports to be closed,close the ports and disconnect */
+
+	/* free private structure allocated for serial port  *
+	 * stop reads and writes on all ports                */
+
+	for (i = 0; i < serial->num_ports; ++i) {
+		mos7840_port = mos7840_get_port_private(serial->port[i]);
+		dbg("mos7840_port %d = %p", i, mos7840_port);
+		if (mos7840_port) {
 			kfree(mos7840_port->ctrl_buf);
 			kfree(mos7840_port->dr);
 			kfree(mos7840_port);
 		}
-		mos7840_set_port_private(serial->port[i], NULL);
 	}
 
 	dbg("%s", "Thank u :: ");
@@ -2707,7 +2738,8 @@ static struct usb_serial_driver moschip7840_4port_device = {
 	.tiocmget = mos7840_tiocmget,
 	.tiocmset = mos7840_tiocmset,
 	.attach = mos7840_startup,
-	.shutdown = mos7840_shutdown,
+	.disconnect = mos7840_disconnect,
+	.release = mos7840_release,
 	.read_bulk_callback = mos7840_bulk_in_callback,
 	.read_int_callback = mos7840_interrupt_callback,
 };
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 1104617334f..56857ddbd70 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -72,7 +72,8 @@ static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
 				const unsigned char *buf, int count);
 static int  omninet_write_room(struct tty_struct *tty);
-static void omninet_shutdown(struct usb_serial *serial);
+static void omninet_disconnect(struct usb_serial *serial);
+static void omninet_release(struct usb_serial *serial);
 static int omninet_attach(struct usb_serial *serial);
 
 static struct usb_device_id id_table[] = {
@@ -108,7 +109,8 @@ static struct usb_serial_driver zyxel_omninet_device = {
 	.write_room =		omninet_write_room,
 	.read_bulk_callback =	omninet_read_bulk_callback,
 	.write_bulk_callback =	omninet_write_bulk_callback,
-	.shutdown =		omninet_shutdown,
+	.disconnect =		omninet_disconnect,
+	.release =		omninet_release,
 };
 
 
@@ -345,13 +347,22 @@ static void omninet_write_bulk_callback(struct urb *urb)
 }
 
 
-static void omninet_shutdown(struct usb_serial *serial)
+static void omninet_disconnect(struct usb_serial *serial)
 {
 	struct usb_serial_port *wport = serial->port[1];
-	struct usb_serial_port *port = serial->port[0];
+
 	dbg("%s", __func__);
 
 	usb_kill_urb(wport->write_urb);
+}
+
+
+static void omninet_release(struct usb_serial *serial)
+{
+	struct usb_serial_port *port = serial->port[0];
+
+	dbg("%s", __func__);
+
 	kfree(usb_get_serial_port_data(port));
 }
 
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index c20480aa975..336bba79ad3 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -463,7 +463,7 @@ error:
 	return retval;
 }
 
-static void opticon_shutdown(struct usb_serial *serial)
+static void opticon_disconnect(struct usb_serial *serial)
 {
 	struct opticon_private *priv = usb_get_serial_data(serial);
 
@@ -471,9 +471,16 @@ static void opticon_shutdown(struct usb_serial *serial)
 
 	usb_kill_urb(priv->bulk_read_urb);
 	usb_free_urb(priv->bulk_read_urb);
+}
+
+static void opticon_release(struct usb_serial *serial)
+{
+	struct opticon_private *priv = usb_get_serial_data(serial);
+
+	dbg("%s", __func__);
+
 	kfree(priv->bulk_in_buffer);
 	kfree(priv);
-	usb_set_serial_data(serial, NULL);
 }
 
 static int opticon_suspend(struct usb_interface *intf, pm_message_t message)
@@ -524,7 +531,8 @@ static struct usb_serial_driver opticon_device = {
 	.close =		opticon_close,
 	.write =		opticon_write,
 	.write_room = 		opticon_write_room,
-	.shutdown =		opticon_shutdown,
+	.disconnect =		opticon_disconnect,
+	.release =		opticon_release,
 	.throttle = 		opticon_throttle,
 	.unthrottle =		opticon_unthrottle,
 	.ioctl =		opticon_ioctl,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index a38971cc373..575816e6ba3 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -51,7 +51,8 @@ static void option_close(struct usb_serial_port *port);
 static void option_dtr_rts(struct usb_serial_port *port, int on);
 
 static int  option_startup(struct usb_serial *serial);
-static void option_shutdown(struct usb_serial *serial);
+static void option_disconnect(struct usb_serial *serial);
+static void option_release(struct usb_serial *serial);
 static int  option_write_room(struct tty_struct *tty);
 
 static void option_instat_callback(struct urb *urb);
@@ -568,7 +569,8 @@ static struct usb_serial_driver option_1port_device = {
 	.tiocmget          = option_tiocmget,
 	.tiocmset          = option_tiocmset,
 	.attach            = option_startup,
-	.shutdown          = option_shutdown,
+	.disconnect        = option_disconnect,
+	.release           = option_release,
 	.read_int_callback = option_instat_callback,
 	.suspend           = option_suspend,
 	.resume            = option_resume,
@@ -1149,7 +1151,14 @@ static void stop_read_write_urbs(struct usb_serial *serial)
 	}
 }
 
-static void option_shutdown(struct usb_serial *serial)
+static void option_disconnect(struct usb_serial *serial)
+{
+	dbg("%s", __func__);
+
+	stop_read_write_urbs(serial);
+}
+
+static void option_release(struct usb_serial *serial)
 {
 	int i, j;
 	struct usb_serial_port *port;
@@ -1157,8 +1166,6 @@ static void option_shutdown(struct usb_serial *serial)
 
 	dbg("%s", __func__);
 
-	stop_read_write_urbs(serial);
-
 	/* Now free them */
 	for (i = 0; i < serial->num_ports; ++i) {
 		port = serial->port[i];
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index 7de54781fe6..3cece27325e 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -159,7 +159,7 @@ static int oti6858_tiocmget(struct tty_struct *tty, struct file *file);
 static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
 				unsigned int set, unsigned int clear);
 static int oti6858_startup(struct usb_serial *serial);
-static void oti6858_shutdown(struct usb_serial *serial);
+static void oti6858_release(struct usb_serial *serial);
 
 /* functions operating on buffers */
 static struct oti6858_buf *oti6858_buf_alloc(unsigned int size);
@@ -194,7 +194,7 @@ static struct usb_serial_driver oti6858_device = {
 	.write_room =		oti6858_write_room,
 	.chars_in_buffer =	oti6858_chars_in_buffer,
 	.attach =		oti6858_startup,
-	.shutdown =		oti6858_shutdown,
+	.release =		oti6858_release,
 };
 
 struct oti6858_private {
@@ -782,7 +782,7 @@ static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
 }
 
 
-static void oti6858_shutdown(struct usb_serial *serial)
+static void oti6858_release(struct usb_serial *serial)
 {
 	struct oti6858_private *priv;
 	int i;
@@ -794,7 +794,6 @@ static void oti6858_shutdown(struct usb_serial *serial)
 		if (priv) {
 			oti6858_buf_free(priv->buf);
 			kfree(priv);
-			usb_set_serial_port_data(serial->port[i], NULL);
 		}
 	}
 }
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 6357b57f628..ec6c132a25b 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -878,7 +878,7 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state)
 		dbg("%s - error sending break = %d", __func__, result);
 }
 
-static void pl2303_shutdown(struct usb_serial *serial)
+static void pl2303_release(struct usb_serial *serial)
 {
 	int i;
 	struct pl2303_private *priv;
@@ -890,7 +890,6 @@ static void pl2303_shutdown(struct usb_serial *serial)
 		if (priv) {
 			pl2303_buf_free(priv->buf);
 			kfree(priv);
-			usb_set_serial_port_data(serial->port[i], NULL);
 		}
 	}
 }
@@ -1123,7 +1122,7 @@ static struct usb_serial_driver pl2303_device = {
 	.write_room =		pl2303_write_room,
 	.chars_in_buffer =	pl2303_chars_in_buffer,
 	.attach =		pl2303_startup,
-	.shutdown =		pl2303_shutdown,
+	.release =		pl2303_release,
 };
 
 static int __init pl2303_init(void)
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index a88cde95016..032f7aeb40a 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -814,7 +814,7 @@ static int sierra_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static void sierra_shutdown(struct usb_serial *serial)
+static void sierra_disconnect(struct usb_serial *serial)
 {
 	int i;
 	struct usb_serial_port *port;
@@ -853,7 +853,7 @@ static struct usb_serial_driver sierra_device = {
 	.tiocmget          = sierra_tiocmget,
 	.tiocmset          = sierra_tiocmset,
 	.attach            = sierra_startup,
-	.shutdown          = sierra_shutdown,
+	.disconnect        = sierra_disconnect,
 	.read_int_callback = sierra_instat_callback,
 };
 
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 8f7ed8f1399..3c249d8e8b8 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -356,7 +356,7 @@ cleanup:
 }
 
 /* call when the device plug out. free all the memory alloced by probe */
-static void spcp8x5_shutdown(struct usb_serial *serial)
+static void spcp8x5_release(struct usb_serial *serial)
 {
 	int i;
 	struct spcp8x5_private *priv;
@@ -366,7 +366,6 @@ static void spcp8x5_shutdown(struct usb_serial *serial)
 		if (priv) {
 			free_ringbuf(priv->buf);
 			kfree(priv);
-			usb_set_serial_port_data(serial->port[i] , NULL);
 		}
 	}
 }
@@ -1020,7 +1019,7 @@ static struct usb_serial_driver spcp8x5_device = {
 	.write_bulk_callback	= spcp8x5_write_bulk_callback,
 	.chars_in_buffer 	= spcp8x5_chars_in_buffer,
 	.attach 		= spcp8x5_startup,
-	.shutdown 		= spcp8x5_shutdown,
+	.release 		= spcp8x5_release,
 };
 
 static int __init spcp8x5_init(void)
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 8b07ebc6bae..6157fac9366 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -267,7 +267,7 @@ error:
 	return retval;
 }
 
-static void symbol_shutdown(struct usb_serial *serial)
+static void symbol_disconnect(struct usb_serial *serial)
 {
 	struct symbol_private *priv = usb_get_serial_data(serial);
 
@@ -275,9 +275,16 @@ static void symbol_shutdown(struct usb_serial *serial)
 
 	usb_kill_urb(priv->int_urb);
 	usb_free_urb(priv->int_urb);
+}
+
+static void symbol_release(struct usb_serial *serial)
+{
+	struct symbol_private *priv = usb_get_serial_data(serial);
+
+	dbg("%s", __func__);
+
 	kfree(priv->int_buffer);
 	kfree(priv);
-	usb_set_serial_data(serial, NULL);
 }
 
 static struct usb_driver symbol_driver = {
@@ -299,7 +306,8 @@ static struct usb_serial_driver symbol_device = {
 	.attach =		symbol_startup,
 	.open =			symbol_open,
 	.close =		symbol_close,
-	.shutdown =		symbol_shutdown,
+	.disconnect =		symbol_disconnect,
+	.release =		symbol_release,
 	.throttle = 		symbol_throttle,
 	.unthrottle =		symbol_unthrottle,
 };
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 42cb04c403b..991d8232e37 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -97,7 +97,7 @@ struct ti_device {
 /* Function Declarations */
 
 static int ti_startup(struct usb_serial *serial);
-static void ti_shutdown(struct usb_serial *serial);
+static void ti_release(struct usb_serial *serial);
 static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
 		struct file *file);
 static void ti_close(struct usb_serial_port *port);
@@ -230,7 +230,7 @@ static struct usb_serial_driver ti_1port_device = {
 	.id_table		= ti_id_table_3410,
 	.num_ports		= 1,
 	.attach			= ti_startup,
-	.shutdown		= ti_shutdown,
+	.release		= ti_release,
 	.open			= ti_open,
 	.close			= ti_close,
 	.write			= ti_write,
@@ -258,7 +258,7 @@ static struct usb_serial_driver ti_2port_device = {
 	.id_table		= ti_id_table_5052,
 	.num_ports		= 2,
 	.attach			= ti_startup,
-	.shutdown		= ti_shutdown,
+	.release		= ti_release,
 	.open			= ti_open,
 	.close			= ti_close,
 	.write			= ti_write,
@@ -473,7 +473,7 @@ free_tdev:
 }
 
 
-static void ti_shutdown(struct usb_serial *serial)
+static void ti_release(struct usb_serial *serial)
 {
 	int i;
 	struct ti_device *tdev = usb_get_serial_data(serial);
@@ -486,12 +486,10 @@ static void ti_shutdown(struct usb_serial *serial)
 		if (tport) {
 			ti_buf_free(tport->tp_write_buf);
 			kfree(tport);
-			usb_set_serial_port_data(serial->port[i], NULL);
 		}
 	}
 
 	kfree(tdev);
-	usb_set_serial_data(serial, NULL);
 }
 
 
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index da890f030fa..d595aa5586a 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -141,6 +141,14 @@ static void destroy_serial(struct kref *kref)
 	if (serial->minor != SERIAL_TTY_NO_MINOR)
 		return_serial(serial);
 
+	serial->type->release(serial);
+
+	for (i = 0; i < serial->num_ports; ++i) {
+		port = serial->port[i];
+		if (port)
+			put_device(&port->dev);
+	}
+
 	/* If this is a "fake" port, we have to clean it up here, as it will
 	 * not get cleaned up in port_release() as it was never registered with
 	 * the driver core */
@@ -148,9 +156,8 @@ static void destroy_serial(struct kref *kref)
 		for (i = serial->num_ports;
 					i < serial->num_port_pointers; ++i) {
 			port = serial->port[i];
-			if (!port)
-				continue;
-			port_free(port);
+			if (port)
+				port_free(port);
 		}
 	}
 
@@ -1118,10 +1125,6 @@ void usb_serial_disconnect(struct usb_interface *interface)
 	serial->disconnected = 1;
 	mutex_unlock(&serial->disc_mutex);
 
-	/* Unfortunately, many of the sub-drivers expect the port structures
-	 * to exist when their shutdown method is called, so we have to go
-	 * through this awkward two-step unregistration procedure.
-	 */
 	for (i = 0; i < serial->num_ports; ++i) {
 		port = serial->port[i];
 		if (port) {
@@ -1153,14 +1156,7 @@ void usb_serial_disconnect(struct usb_interface *interface)
 			}
 		}
 	}
-	serial->type->shutdown(serial);
-	for (i = 0; i < serial->num_ports; ++i) {
-		port = serial->port[i];
-		if (port) {
-			put_device(&port->dev);
-			serial->port[i] = NULL;
-		}
-	}
+	serial->type->disconnect(serial);
 
 	/* let the last holder of this object
 	 * cause it to be cleaned up */
@@ -1338,7 +1334,8 @@ static void fixup_generic(struct usb_serial_driver *device)
 	set_to_generic_if_null(device, chars_in_buffer);
 	set_to_generic_if_null(device, read_bulk_callback);
 	set_to_generic_if_null(device, write_bulk_callback);
-	set_to_generic_if_null(device, shutdown);
+	set_to_generic_if_null(device, disconnect);
+	set_to_generic_if_null(device, release);
 }
 
 int usb_serial_register(struct usb_serial_driver *driver)
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index b15f1c0e1d4..f5d0f64dcc5 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -47,7 +47,7 @@ static void visor_unthrottle(struct tty_struct *tty);
 static int  visor_probe(struct usb_serial *serial,
 					const struct usb_device_id *id);
 static int  visor_calc_num_ports(struct usb_serial *serial);
-static void visor_shutdown(struct usb_serial *serial);
+static void visor_release(struct usb_serial *serial);
 static void visor_write_bulk_callback(struct urb *urb);
 static void visor_read_bulk_callback(struct urb *urb);
 static void visor_read_int_callback(struct urb *urb);
@@ -202,7 +202,7 @@ static struct usb_serial_driver handspring_device = {
 	.attach =		treo_attach,
 	.probe =		visor_probe,
 	.calc_num_ports =	visor_calc_num_ports,
-	.shutdown =		visor_shutdown,
+	.release =		visor_release,
 	.write =		visor_write,
 	.write_room =		visor_write_room,
 	.write_bulk_callback =	visor_write_bulk_callback,
@@ -227,7 +227,7 @@ static struct usb_serial_driver clie_5_device = {
 	.attach =		clie_5_attach,
 	.probe =		visor_probe,
 	.calc_num_ports =	visor_calc_num_ports,
-	.shutdown =		visor_shutdown,
+	.release =		visor_release,
 	.write =		visor_write,
 	.write_room =		visor_write_room,
 	.write_bulk_callback =	visor_write_bulk_callback,
@@ -918,7 +918,7 @@ static int clie_5_attach(struct usb_serial *serial)
 	return generic_startup(serial);
 }
 
-static void visor_shutdown(struct usb_serial *serial)
+static void visor_release(struct usb_serial *serial)
 {
 	struct visor_private *priv;
 	int i;
@@ -927,10 +927,7 @@ static void visor_shutdown(struct usb_serial *serial)
 
 	for (i = 0; i < serial->num_ports; i++) {
 		priv = usb_get_serial_port_data(serial->port[i]);
-		if (priv) {
-			usb_set_serial_port_data(serial->port[i], NULL);
-			kfree(priv);
-		}
+		kfree(priv);
 	}
 }
 
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 7c7295d09f3..8d126dd7a02 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -144,7 +144,7 @@ static int  whiteheat_firmware_attach(struct usb_serial *serial);
 
 /* function prototypes for the Connect Tech WhiteHEAT serial converter */
 static int  whiteheat_attach(struct usb_serial *serial);
-static void whiteheat_shutdown(struct usb_serial *serial);
+static void whiteheat_release(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
 			struct usb_serial_port *port, struct file *filp);
 static void whiteheat_close(struct usb_serial_port *port);
@@ -189,7 +189,7 @@ static struct usb_serial_driver whiteheat_device = {
 	.id_table =		id_table_std,
 	.num_ports =		4,
 	.attach =		whiteheat_attach,
-	.shutdown =		whiteheat_shutdown,
+	.release =		whiteheat_release,
 	.open =			whiteheat_open,
 	.close =		whiteheat_close,
 	.write =		whiteheat_write,
@@ -617,7 +617,7 @@ no_command_buffer:
 }
 
 
-static void whiteheat_shutdown(struct usb_serial *serial)
+static void whiteheat_release(struct usb_serial *serial)
 {
 	struct usb_serial_port *command_port;
 	struct usb_serial_port *port;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index ed4aa0fa7ed..44801d26a37 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -194,8 +194,10 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data)
  *	This will be called when the struct usb_serial structure is fully set
  *	set up.  Do any local initialization of the device, or any private
  *	memory structure allocation at this point in time.
- * @shutdown: pointer to the driver's shutdown function.  This will be
- *	called when the device is removed from the system.
+ * @disconnect: pointer to the driver's disconnect function.  This will be
+ *	called when the device is unplugged or unbound from the driver.
+ * @release: pointer to the driver's release function.  This will be called
+ *	when the usb_serial data structure is about to be destroyed.
  * @usb_driver: pointer to the struct usb_driver that controls this
  *	device.  This is necessary to allow dynamic ids to be added to
  *	the driver from sysfs.
@@ -226,7 +228,8 @@ struct usb_serial_driver {
 	int (*attach)(struct usb_serial *serial);
 	int (*calc_num_ports) (struct usb_serial *serial);
 
-	void (*shutdown)(struct usb_serial *serial);
+	void (*disconnect)(struct usb_serial *serial);
+	void (*release)(struct usb_serial *serial);
 
 	int (*port_probe)(struct usb_serial_port *port);
 	int (*port_remove)(struct usb_serial_port *port);
@@ -308,7 +311,8 @@ extern void usb_serial_generic_read_bulk_callback(struct urb *urb);
 extern void usb_serial_generic_write_bulk_callback(struct urb *urb);
 extern void usb_serial_generic_throttle(struct tty_struct *tty);
 extern void usb_serial_generic_unthrottle(struct tty_struct *tty);
-extern void usb_serial_generic_shutdown(struct usb_serial *serial);
+extern void usb_serial_generic_disconnect(struct usb_serial *serial);
+extern void usb_serial_generic_release(struct usb_serial *serial);
 extern int usb_serial_generic_register(int debug);
 extern void usb_serial_generic_deregister(void);
 extern void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
-- 
cgit v1.2.3-70-g09d2


From 5be19a9daa2df2507adf5b4676a7db8d131cf56e Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Thu, 4 Jun 2009 15:34:49 +0800
Subject: USB: Add Intel Langwell USB Device Controller driver

Intel Langwell USB Device Controller is a High-Speed USB OTG device
controller in Intel Moorestown platform. It can work in OTG device mode
with Intel Langwell USB OTG transceiver driver as well as device-only
mode. The number of programmable endpoints is different through
controller revision.

NOTE:
This patch is the first version Intel Langwell USB OTG device controller
driver. The bug fixing is on going for some hardware and software
issues.  Intel Langwell USB OTG transceiver driver and EHCI driver
patches will be submitted later.

Supported features:
 - USB OTG protocol support with Intel Langwell USB OTG transceiver
   driver (turn on CONFIG_USB_LANGWELL_OTG)
 - Support control, bulk, interrupt and isochronous endpoints
   (isochronous not tested)
 - PCI D0/D3 power management support
 - Link Power Management (LPM) support

Tested gadget drivers:
 - g_file_storage
 - g_ether
 - g_zero

The passed tests:
 - g_file_storage: USBCV Chapter 9 tests
 - g_file_storage: USBCV MSC tests
 - g_file_storage: from/to host files copying
 - g_ether: ping, ftp and scp files from/to host
 - Hotplug, with and without hubs

Known issues:
 - g_ether: failed part of USBCV chap9 tests
 - LPM support not fully tested

TODO:
 - g_ether: pass all USBCV chap9 tests
 - g_zero: pass usbtest tests
 - Stress tests on different gadget drivers
 - On-chip private SRAM caching support

Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/Kconfig        |   21 +
 drivers/usb/gadget/Makefile       |    1 +
 drivers/usb/gadget/gadget_chips.h |    8 +
 drivers/usb/gadget/langwell_udc.c | 3373 +++++++++++++++++++++++++++++++++++++
 drivers/usb/gadget/langwell_udc.h |  228 +++
 include/linux/usb/langwell_udc.h  |  310 ++++
 6 files changed, 3941 insertions(+)
 create mode 100644 drivers/usb/gadget/langwell_udc.c
 create mode 100644 drivers/usb/gadget/langwell_udc.h
 create mode 100644 include/linux/usb/langwell_udc.h

(limited to 'include')

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index ef9d9086cdd..5d1ddf485d1 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -474,6 +474,27 @@ config USB_GOKU
 	default USB_GADGET
 	select USB_GADGET_SELECTED
 
+config USB_GADGET_LANGWELL
+	boolean "Intel Langwell USB Device Controller"
+	depends on PCI
+	select USB_GADGET_DUALSPEED
+	help
+	   Intel Langwell USB Device Controller is a High-Speed USB
+	   On-The-Go device controller.
+
+	   The number of programmable endpoints is different through
+	   controller revision.
+
+	   Say "y" to link the driver statically, or "m" to build a
+	   dynamically linked module called "langwell_udc" and force all
+	   gadget drivers to also be dynamically linked.
+
+config USB_LANGWELL
+	tristate
+	depends on USB_GADGET_LANGWELL
+	default USB_GADGET
+	select USB_GADGET_SELECTED
+
 
 #
 # LAST -- dummy/emulated controller
diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile
index fc9a7dc0104..e6017e6bf6d 100644
--- a/drivers/usb/gadget/Makefile
+++ b/drivers/usb/gadget/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_USB_M66592)	+= m66592-udc.o
 obj-$(CONFIG_USB_FSL_QE)	+= fsl_qe_udc.o
 obj-$(CONFIG_USB_CI13XXX)	+= ci13xxx_udc.o
 obj-$(CONFIG_USB_S3C_HSOTG)	+= s3c-hsotg.o
+obj-$(CONFIG_USB_LANGWELL)	+= langwell_udc.o
 
 #
 # USB gadget drivers
diff --git a/drivers/usb/gadget/gadget_chips.h b/drivers/usb/gadget/gadget_chips.h
index ec6d439a2aa..8e0e9a0b736 100644
--- a/drivers/usb/gadget/gadget_chips.h
+++ b/drivers/usb/gadget/gadget_chips.h
@@ -137,6 +137,12 @@
 #define gadget_is_musbhdrc(g)	0
 #endif
 
+#ifdef CONFIG_USB_GADGET_LANGWELL
+#define gadget_is_langwell(g)	(!strcmp("langwell_udc", (g)->name))
+#else
+#define gadget_is_langwell(g)	0
+#endif
+
 /* from Montavista kernel (?) */
 #ifdef CONFIG_USB_GADGET_MPC8272
 #define gadget_is_mpc8272(g)	!strcmp("mpc8272_udc", (g)->name)
@@ -231,6 +237,8 @@ static inline int usb_gadget_controller_number(struct usb_gadget *gadget)
 		return 0x22;
 	else if (gadget_is_ci13xxx(gadget))
 		return 0x23;
+	else if (gadget_is_langwell(gadget))
+		return 0x24;
 	return -ENOENT;
 }
 
diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c
new file mode 100644
index 00000000000..6829d596135
--- /dev/null
+++ b/drivers/usb/gadget/langwell_udc.c
@@ -0,0 +1,3373 @@
+/*
+ * Intel Langwell USB Device Controller driver
+ * Copyright (C) 2008-2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+
+/* #undef	DEBUG */
+/* #undef	VERBOSE */
+
+#if defined(CONFIG_USB_LANGWELL_OTG)
+#define	OTG_TRANSCEIVER
+#endif
+
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/gadget.h>
+#include <linux/usb/otg.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <asm/system.h>
+#include <asm/unaligned.h>
+
+#include "langwell_udc.h"
+
+
+#define	DRIVER_DESC		"Intel Langwell USB Device Controller driver"
+#define	DRIVER_VERSION		"16 May 2009"
+
+static const char driver_name[] = "langwell_udc";
+static const char driver_desc[] = DRIVER_DESC;
+
+
+/* controller device global variable */
+static struct langwell_udc	*the_controller;
+
+/* for endpoint 0 operations */
+static const struct usb_endpoint_descriptor
+langwell_ep0_desc = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	0,
+	.bmAttributes =		USB_ENDPOINT_XFER_CONTROL,
+	.wMaxPacketSize =	EP0_MAX_PKT_SIZE,
+};
+
+
+/*-------------------------------------------------------------------------*/
+/* debugging */
+
+#ifdef	DEBUG
+#define	DBG(dev, fmt, args...) \
+	pr_debug("%s %s: " fmt , driver_name, \
+			pci_name(dev->pdev), ## args)
+#else
+#define	DBG(dev, fmt, args...) \
+	do { } while (0)
+#endif /* DEBUG */
+
+
+#ifdef	VERBOSE
+#define	VDBG DBG
+#else
+#define	VDBG(dev, fmt, args...) \
+	do { } while (0)
+#endif	/* VERBOSE */
+
+
+#define	ERROR(dev, fmt, args...) \
+	pr_err("%s %s: " fmt , driver_name, \
+			pci_name(dev->pdev), ## args)
+
+#define	WARNING(dev, fmt, args...) \
+	pr_warning("%s %s: " fmt , driver_name, \
+			pci_name(dev->pdev), ## args)
+
+#define	INFO(dev, fmt, args...) \
+	pr_info("%s %s: " fmt , driver_name, \
+			pci_name(dev->pdev), ## args)
+
+
+#ifdef	VERBOSE
+static inline void print_all_registers(struct langwell_udc *dev)
+{
+	int	i;
+
+	/* Capability Registers */
+	printk(KERN_DEBUG "Capability Registers (offset: "
+			"0x%04x, length: 0x%08x)\n",
+			CAP_REG_OFFSET,
+			(u32)sizeof(struct langwell_cap_regs));
+	printk(KERN_DEBUG "caplength=0x%02x\n",
+			readb(&dev->cap_regs->caplength));
+	printk(KERN_DEBUG "hciversion=0x%04x\n",
+			readw(&dev->cap_regs->hciversion));
+	printk(KERN_DEBUG "hcsparams=0x%08x\n",
+			readl(&dev->cap_regs->hcsparams));
+	printk(KERN_DEBUG "hccparams=0x%08x\n",
+			readl(&dev->cap_regs->hccparams));
+	printk(KERN_DEBUG "dciversion=0x%04x\n",
+			readw(&dev->cap_regs->dciversion));
+	printk(KERN_DEBUG "dccparams=0x%08x\n",
+			readl(&dev->cap_regs->dccparams));
+
+	/* Operational Registers */
+	printk(KERN_DEBUG "Operational Registers (offset: "
+			"0x%04x, length: 0x%08x)\n",
+			OP_REG_OFFSET,
+			(u32)sizeof(struct langwell_op_regs));
+	printk(KERN_DEBUG "extsts=0x%08x\n",
+			readl(&dev->op_regs->extsts));
+	printk(KERN_DEBUG "extintr=0x%08x\n",
+			readl(&dev->op_regs->extintr));
+	printk(KERN_DEBUG "usbcmd=0x%08x\n",
+			readl(&dev->op_regs->usbcmd));
+	printk(KERN_DEBUG "usbsts=0x%08x\n",
+			readl(&dev->op_regs->usbsts));
+	printk(KERN_DEBUG "usbintr=0x%08x\n",
+			readl(&dev->op_regs->usbintr));
+	printk(KERN_DEBUG "frindex=0x%08x\n",
+			readl(&dev->op_regs->frindex));
+	printk(KERN_DEBUG "ctrldssegment=0x%08x\n",
+			readl(&dev->op_regs->ctrldssegment));
+	printk(KERN_DEBUG "deviceaddr=0x%08x\n",
+			readl(&dev->op_regs->deviceaddr));
+	printk(KERN_DEBUG "endpointlistaddr=0x%08x\n",
+			readl(&dev->op_regs->endpointlistaddr));
+	printk(KERN_DEBUG "ttctrl=0x%08x\n",
+			readl(&dev->op_regs->ttctrl));
+	printk(KERN_DEBUG "burstsize=0x%08x\n",
+			readl(&dev->op_regs->burstsize));
+	printk(KERN_DEBUG "txfilltuning=0x%08x\n",
+			readl(&dev->op_regs->txfilltuning));
+	printk(KERN_DEBUG "txttfilltuning=0x%08x\n",
+			readl(&dev->op_regs->txttfilltuning));
+	printk(KERN_DEBUG "ic_usb=0x%08x\n",
+			readl(&dev->op_regs->ic_usb));
+	printk(KERN_DEBUG "ulpi_viewport=0x%08x\n",
+			readl(&dev->op_regs->ulpi_viewport));
+	printk(KERN_DEBUG "configflag=0x%08x\n",
+			readl(&dev->op_regs->configflag));
+	printk(KERN_DEBUG "portsc1=0x%08x\n",
+			readl(&dev->op_regs->portsc1));
+	printk(KERN_DEBUG "devlc=0x%08x\n",
+			readl(&dev->op_regs->devlc));
+	printk(KERN_DEBUG "otgsc=0x%08x\n",
+			readl(&dev->op_regs->otgsc));
+	printk(KERN_DEBUG "usbmode=0x%08x\n",
+			readl(&dev->op_regs->usbmode));
+	printk(KERN_DEBUG "endptnak=0x%08x\n",
+			readl(&dev->op_regs->endptnak));
+	printk(KERN_DEBUG "endptnaken=0x%08x\n",
+			readl(&dev->op_regs->endptnaken));
+	printk(KERN_DEBUG "endptsetupstat=0x%08x\n",
+			readl(&dev->op_regs->endptsetupstat));
+	printk(KERN_DEBUG "endptprime=0x%08x\n",
+			readl(&dev->op_regs->endptprime));
+	printk(KERN_DEBUG "endptflush=0x%08x\n",
+			readl(&dev->op_regs->endptflush));
+	printk(KERN_DEBUG "endptstat=0x%08x\n",
+			readl(&dev->op_regs->endptstat));
+	printk(KERN_DEBUG "endptcomplete=0x%08x\n",
+			readl(&dev->op_regs->endptcomplete));
+
+	for (i = 0; i < dev->ep_max / 2; i++) {
+		printk(KERN_DEBUG "endptctrl[%d]=0x%08x\n",
+				i, readl(&dev->op_regs->endptctrl[i]));
+	}
+}
+#endif /* VERBOSE */
+
+
+/*-------------------------------------------------------------------------*/
+
+#define	DIR_STRING(bAddress)	(((bAddress) & USB_DIR_IN) ? "in" : "out")
+
+#define is_in(ep)	(((ep)->ep_num == 0) ? ((ep)->dev->ep0_dir == \
+			USB_DIR_IN) : ((ep)->desc->bEndpointAddress \
+			& USB_DIR_IN) == USB_DIR_IN)
+
+
+#ifdef	DEBUG
+static char *type_string(u8 bmAttributes)
+{
+	switch ((bmAttributes) & USB_ENDPOINT_XFERTYPE_MASK) {
+	case USB_ENDPOINT_XFER_BULK:
+		return "bulk";
+	case USB_ENDPOINT_XFER_ISOC:
+		return "iso";
+	case USB_ENDPOINT_XFER_INT:
+		return "int";
+	};
+
+	return "control";
+}
+#endif
+
+
+/* configure endpoint control registers */
+static void ep_reset(struct langwell_ep *ep, unsigned char ep_num,
+		unsigned char is_in, unsigned char ep_type)
+{
+	struct langwell_udc	*dev;
+	u32			endptctrl;
+
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	endptctrl = readl(&dev->op_regs->endptctrl[ep_num]);
+	if (is_in) {	/* TX */
+		if (ep_num)
+			endptctrl |= EPCTRL_TXR;
+		endptctrl |= EPCTRL_TXE;
+		endptctrl |= ep_type << EPCTRL_TXT_SHIFT;
+	} else {	/* RX */
+		if (ep_num)
+			endptctrl |= EPCTRL_RXR;
+		endptctrl |= EPCTRL_RXE;
+		endptctrl |= ep_type << EPCTRL_RXT_SHIFT;
+	}
+
+	writel(endptctrl, &dev->op_regs->endptctrl[ep_num]);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* reset ep0 dQH and endptctrl */
+static void ep0_reset(struct langwell_udc *dev)
+{
+	struct langwell_ep	*ep;
+	int			i;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* ep0 in and out */
+	for (i = 0; i < 2; i++) {
+		ep = &dev->ep[i];
+		ep->dev = dev;
+
+		/* ep0 dQH */
+		ep->dqh = &dev->ep_dqh[i];
+
+		/* configure ep0 endpoint capabilities in dQH */
+		ep->dqh->dqh_ios = 1;
+		ep->dqh->dqh_mpl = EP0_MAX_PKT_SIZE;
+
+		/* FIXME: enable ep0-in HW zero length termination select */
+		if (is_in(ep))
+			ep->dqh->dqh_zlt = 0;
+		ep->dqh->dqh_mult = 0;
+
+		/* configure ep0 control registers */
+		ep_reset(&dev->ep[0], 0, i, USB_ENDPOINT_XFER_CONTROL);
+	}
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return;
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* endpoints operations */
+
+/* configure endpoint, making it usable */
+static int langwell_ep_enable(struct usb_ep *_ep,
+		const struct usb_endpoint_descriptor *desc)
+{
+	struct langwell_udc	*dev;
+	struct langwell_ep	*ep;
+	u16			max = 0;
+	unsigned long		flags;
+	int			retval = 0;
+	unsigned char		zlt, ios = 0, mult = 0;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !desc || ep->desc
+			|| desc->bDescriptorType != USB_DT_ENDPOINT)
+		return -EINVAL;
+
+	if (!dev->driver || dev->gadget.speed == USB_SPEED_UNKNOWN)
+		return -ESHUTDOWN;
+
+	max = le16_to_cpu(desc->wMaxPacketSize);
+
+	/*
+	 * disable HW zero length termination select
+	 * driver handles zero length packet through req->req.zero
+	 */
+	zlt = 1;
+
+	/*
+	 * sanity check type, direction, address, and then
+	 * initialize the endpoint capabilities fields in dQH
+	 */
+	switch (desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
+	case USB_ENDPOINT_XFER_CONTROL:
+		ios = 1;
+		break;
+	case USB_ENDPOINT_XFER_BULK:
+		if ((dev->gadget.speed == USB_SPEED_HIGH
+					&& max != 512)
+				|| (dev->gadget.speed == USB_SPEED_FULL
+					&& max > 64)) {
+			goto done;
+		}
+		break;
+	case USB_ENDPOINT_XFER_INT:
+		if (strstr(ep->ep.name, "-iso")) /* bulk is ok */
+			goto done;
+
+		switch (dev->gadget.speed) {
+		case USB_SPEED_HIGH:
+			if (max <= 1024)
+				break;
+		case USB_SPEED_FULL:
+			if (max <= 64)
+				break;
+		default:
+			if (max <= 8)
+				break;
+			goto done;
+		}
+		break;
+	case USB_ENDPOINT_XFER_ISOC:
+		if (strstr(ep->ep.name, "-bulk")
+				|| strstr(ep->ep.name, "-int"))
+			goto done;
+
+		switch (dev->gadget.speed) {
+		case USB_SPEED_HIGH:
+			if (max <= 1024)
+				break;
+		case USB_SPEED_FULL:
+			if (max <= 1023)
+				break;
+		default:
+			goto done;
+		}
+		/*
+		 * FIXME:
+		 * calculate transactions needed for high bandwidth iso
+		 */
+		mult = (unsigned char)(1 + ((max >> 11) & 0x03));
+		max = max & 0x8ff;	/* bit 0~10 */
+		/* 3 transactions at most */
+		if (mult > 3)
+			goto done;
+		break;
+	default:
+		goto done;
+	}
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* configure endpoint capabilities in dQH */
+	ep->dqh->dqh_ios = ios;
+	ep->dqh->dqh_mpl = cpu_to_le16(max);
+	ep->dqh->dqh_zlt = zlt;
+	ep->dqh->dqh_mult = mult;
+
+	ep->ep.maxpacket = max;
+	ep->desc = desc;
+	ep->stopped = 0;
+	ep->ep_num = desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
+
+	/* ep_type */
+	ep->ep_type = desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK;
+
+	/* configure endpoint control registers */
+	ep_reset(ep, ep->ep_num, is_in(ep), ep->ep_type);
+
+	DBG(dev, "enabled %s (ep%d%s-%s), max %04x\n",
+			_ep->name,
+			ep->ep_num,
+			DIR_STRING(desc->bEndpointAddress),
+			type_string(desc->bmAttributes),
+			max);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+done:
+	VDBG(dev, "<--- %s()\n", __func__);
+	return retval;
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* retire a request */
+static void done(struct langwell_ep *ep, struct langwell_request *req,
+		int status)
+{
+	struct langwell_udc	*dev = ep->dev;
+	unsigned		stopped = ep->stopped;
+	struct langwell_dtd	*curr_dtd, *next_dtd;
+	int			i;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* remove the req from ep->queue */
+	list_del_init(&req->queue);
+
+	if (req->req.status == -EINPROGRESS)
+		req->req.status = status;
+	else
+		status = req->req.status;
+
+	/* free dTD for the request */
+	next_dtd = req->head;
+	for (i = 0; i < req->dtd_count; i++) {
+		curr_dtd = next_dtd;
+		if (i != req->dtd_count - 1)
+			next_dtd = curr_dtd->next_dtd_virt;
+		dma_pool_free(dev->dtd_pool, curr_dtd, curr_dtd->dtd_dma);
+	}
+
+	if (req->mapped) {
+		dma_unmap_single(&dev->pdev->dev, req->req.dma, req->req.length,
+			is_in(ep) ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+		req->req.dma = DMA_ADDR_INVALID;
+		req->mapped = 0;
+	} else
+		dma_sync_single_for_cpu(&dev->pdev->dev, req->req.dma,
+				req->req.length,
+				is_in(ep) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+	if (status != -ESHUTDOWN)
+		DBG(dev, "complete %s, req %p, stat %d, len %u/%u\n",
+			ep->ep.name, &req->req, status,
+			req->req.actual, req->req.length);
+
+	/* don't modify queue heads during completion callback */
+	ep->stopped = 1;
+
+	spin_unlock(&dev->lock);
+	/* complete routine from gadget driver */
+	if (req->req.complete)
+		req->req.complete(&ep->ep, &req->req);
+
+	spin_lock(&dev->lock);
+	ep->stopped = stopped;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+static void langwell_ep_fifo_flush(struct usb_ep *_ep);
+
+/* delete all endpoint requests, called with spinlock held */
+static void nuke(struct langwell_ep *ep, int status)
+{
+	/* called with spinlock held */
+	ep->stopped = 1;
+
+	/* endpoint fifo flush */
+	if (&ep->ep && ep->desc)
+		langwell_ep_fifo_flush(&ep->ep);
+
+	while (!list_empty(&ep->queue)) {
+		struct langwell_request	*req = NULL;
+		req = list_entry(ep->queue.next, struct langwell_request,
+				queue);
+		done(ep, req, status);
+	}
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* endpoint is no longer usable */
+static int langwell_ep_disable(struct usb_ep *_ep)
+{
+	struct langwell_ep	*ep;
+	unsigned long		flags;
+	struct langwell_udc	*dev;
+	int			ep_num;
+	u32			endptctrl;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !ep->desc)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* disable endpoint control register */
+	ep_num = ep->ep_num;
+	endptctrl = readl(&dev->op_regs->endptctrl[ep_num]);
+	if (is_in(ep))
+		endptctrl &= ~EPCTRL_TXE;
+	else
+		endptctrl &= ~EPCTRL_RXE;
+	writel(endptctrl, &dev->op_regs->endptctrl[ep_num]);
+
+	/* nuke all pending requests (does flush) */
+	nuke(ep, -ESHUTDOWN);
+
+	ep->desc = NULL;
+	ep->stopped = 1;
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	DBG(dev, "disabled %s\n", _ep->name);
+	VDBG(dev, "<--- %s()\n", __func__);
+
+	return 0;
+}
+
+
+/* allocate a request object to use with this endpoint */
+static struct usb_request *langwell_alloc_request(struct usb_ep *_ep,
+		gfp_t gfp_flags)
+{
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+	struct langwell_request	*req = NULL;
+
+	if (!_ep)
+		return NULL;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	req = kzalloc(sizeof(*req), gfp_flags);
+	if (!req)
+		return NULL;
+
+	req->req.dma = DMA_ADDR_INVALID;
+	INIT_LIST_HEAD(&req->queue);
+
+	VDBG(dev, "alloc request for %s\n", _ep->name);
+	VDBG(dev, "<--- %s()\n", __func__);
+	return &req->req;
+}
+
+
+/* free a request object */
+static void langwell_free_request(struct usb_ep *_ep,
+		struct usb_request *_req)
+{
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+	struct langwell_request	*req = NULL;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !_req)
+		return;
+
+	req = container_of(_req, struct langwell_request, req);
+	WARN_ON(!list_empty(&req->queue));
+
+	if (_req)
+		kfree(req);
+
+	VDBG(dev, "free request for %s\n", _ep->name);
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* queue dTD and PRIME endpoint */
+static int queue_dtd(struct langwell_ep *ep, struct langwell_request *req)
+{
+	u32			bit_mask, usbcmd, endptstat, dtd_dma;
+	u8			dtd_status;
+	int			i;
+	struct langwell_dqh	*dqh;
+	struct langwell_udc	*dev;
+
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	i = ep->ep_num * 2 + is_in(ep);
+	dqh = &dev->ep_dqh[i];
+
+	if (ep->ep_num)
+		VDBG(dev, "%s\n", ep->name);
+	else
+		/* ep0 */
+		VDBG(dev, "%s-%s\n", ep->name, is_in(ep) ? "in" : "out");
+
+	VDBG(dev, "ep_dqh[%d] addr: 0x%08x\n", i, (u32)&(dev->ep_dqh[i]));
+
+	bit_mask = is_in(ep) ?
+		(1 << (ep->ep_num + 16)) : (1 << (ep->ep_num));
+
+	VDBG(dev, "bit_mask = 0x%08x\n", bit_mask);
+
+	/* check if the pipe is empty */
+	if (!(list_empty(&ep->queue))) {
+		/* add dTD to the end of linked list */
+		struct langwell_request	*lastreq;
+		lastreq = list_entry(ep->queue.prev,
+				struct langwell_request, queue);
+
+		lastreq->tail->dtd_next =
+			cpu_to_le32(req->head->dtd_dma & DTD_NEXT_MASK);
+
+		/* read prime bit, if 1 goto out */
+		if (readl(&dev->op_regs->endptprime) & bit_mask)
+			goto out;
+
+		do {
+			/* set ATDTW bit in USBCMD */
+			usbcmd = readl(&dev->op_regs->usbcmd);
+			writel(usbcmd | CMD_ATDTW, &dev->op_regs->usbcmd);
+
+			/* read correct status bit */
+			endptstat = readl(&dev->op_regs->endptstat) & bit_mask;
+
+		} while (!(readl(&dev->op_regs->usbcmd) & CMD_ATDTW));
+
+		/* write ATDTW bit to 0 */
+		usbcmd = readl(&dev->op_regs->usbcmd);
+		writel(usbcmd & ~CMD_ATDTW, &dev->op_regs->usbcmd);
+
+		if (endptstat)
+			goto out;
+	}
+
+	/* write dQH next pointer and terminate bit to 0 */
+	dtd_dma = req->head->dtd_dma & DTD_NEXT_MASK;
+	dqh->dtd_next = cpu_to_le32(dtd_dma);
+
+	/* clear active and halt bit */
+	dtd_status = (u8) ~(DTD_STS_ACTIVE | DTD_STS_HALTED);
+	dqh->dtd_status &= dtd_status;
+	VDBG(dev, "dqh->dtd_status = 0x%x\n", dqh->dtd_status);
+
+	/* write 1 to endptprime register to PRIME endpoint */
+	bit_mask = is_in(ep) ? (1 << (ep->ep_num + 16)) : (1 << ep->ep_num);
+	VDBG(dev, "endprime bit_mask = 0x%08x\n", bit_mask);
+	writel(bit_mask, &dev->op_regs->endptprime);
+out:
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* fill in the dTD structure to build a transfer descriptor */
+static struct langwell_dtd *build_dtd(struct langwell_request *req,
+		unsigned *length, dma_addr_t *dma, int *is_last)
+{
+	u32			 buf_ptr;
+	struct langwell_dtd	*dtd;
+	struct langwell_udc	*dev;
+	int			i;
+
+	dev = req->ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* the maximum transfer length, up to 16k bytes */
+	*length = min(req->req.length - req->req.actual,
+			(unsigned)DTD_MAX_TRANSFER_LENGTH);
+
+	/* create dTD dma_pool resource */
+	dtd = dma_pool_alloc(dev->dtd_pool, GFP_KERNEL, dma);
+	if (dtd == NULL)
+		return dtd;
+	dtd->dtd_dma = *dma;
+
+	/* initialize buffer page pointers */
+	buf_ptr = (u32)(req->req.dma + req->req.actual);
+	for (i = 0; i < 5; i++)
+		dtd->dtd_buf[i] = cpu_to_le32(buf_ptr + i * PAGE_SIZE);
+
+	req->req.actual += *length;
+
+	/* fill in total bytes with transfer size */
+	dtd->dtd_total = cpu_to_le16(*length);
+	VDBG(dev, "dtd->dtd_total = %d\n", dtd->dtd_total);
+
+	/* set is_last flag if req->req.zero is set or not */
+	if (req->req.zero) {
+		if (*length == 0 || (*length % req->ep->ep.maxpacket) != 0)
+			*is_last = 1;
+		else
+			*is_last = 0;
+	} else if (req->req.length == req->req.actual) {
+		*is_last = 1;
+	} else
+		*is_last = 0;
+
+	if (*is_last == 0)
+		VDBG(dev, "multi-dtd request!\n");
+
+	/* set interrupt on complete bit for the last dTD */
+	if (*is_last && !req->req.no_interrupt)
+		dtd->dtd_ioc = 1;
+
+	/* set multiplier override 0 for non-ISO and non-TX endpoint */
+	dtd->dtd_multo = 0;
+
+	/* set the active bit of status field to 1 */
+	dtd->dtd_status = DTD_STS_ACTIVE;
+	VDBG(dev, "dtd->dtd_status = 0x%02x\n", dtd->dtd_status);
+
+	VDBG(dev, "length = %d, dma addr= 0x%08x\n", *length, (int)*dma);
+	VDBG(dev, "<--- %s()\n", __func__);
+	return dtd;
+}
+
+
+/* generate dTD linked list for a request */
+static int req_to_dtd(struct langwell_request *req)
+{
+	unsigned		count;
+	int			is_last, is_first = 1;
+	struct langwell_dtd	*dtd, *last_dtd = NULL;
+	struct langwell_udc	*dev;
+	dma_addr_t		dma;
+
+	dev = req->ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+	do {
+		dtd = build_dtd(req, &count, &dma, &is_last);
+		if (dtd == NULL)
+			return -ENOMEM;
+
+		if (is_first) {
+			is_first = 0;
+			req->head = dtd;
+		} else {
+			last_dtd->dtd_next = cpu_to_le32(dma);
+			last_dtd->next_dtd_virt = dtd;
+		}
+		last_dtd = dtd;
+		req->dtd_count++;
+	} while (!is_last);
+
+	/* set terminate bit to 1 for the last dTD */
+	dtd->dtd_next = DTD_TERM;
+
+	req->tail = dtd;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* queue (submits) an I/O requests to an endpoint */
+static int langwell_ep_queue(struct usb_ep *_ep, struct usb_request *_req,
+		gfp_t gfp_flags)
+{
+	struct langwell_request	*req;
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+	unsigned long		flags;
+	int			is_iso = 0, zlflag = 0;
+
+	/* always require a cpu-view buffer */
+	req = container_of(_req, struct langwell_request, req);
+	ep = container_of(_ep, struct langwell_ep, ep);
+
+	if (!_req || !_req->complete || !_req->buf
+			|| !list_empty(&req->queue)) {
+		return -EINVAL;
+	}
+
+	if (unlikely(!_ep || !ep->desc))
+		return -EINVAL;
+
+	dev = ep->dev;
+	req->ep = ep;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (ep->desc->bmAttributes == USB_ENDPOINT_XFER_ISOC) {
+		if (req->req.length > ep->ep.maxpacket)
+			return -EMSGSIZE;
+		is_iso = 1;
+	}
+
+	if (unlikely(!dev->driver || dev->gadget.speed == USB_SPEED_UNKNOWN))
+		return -ESHUTDOWN;
+
+	/* set up dma mapping in case the caller didn't */
+	if (_req->dma == DMA_ADDR_INVALID) {
+		/* WORKAROUND: WARN_ON(size == 0) */
+		if (_req->length == 0) {
+			VDBG(dev, "req->length: 0->1\n");
+			zlflag = 1;
+			_req->length++;
+		}
+
+		_req->dma = dma_map_single(&dev->pdev->dev,
+				_req->buf, _req->length,
+				is_in(ep) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		if (zlflag && (_req->length == 1)) {
+			VDBG(dev, "req->length: 1->0\n");
+			zlflag = 0;
+			_req->length = 0;
+		}
+
+		req->mapped = 1;
+		VDBG(dev, "req->mapped = 1\n");
+	} else {
+		dma_sync_single_for_device(&dev->pdev->dev,
+				_req->dma, _req->length,
+				is_in(ep) ?  DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		req->mapped = 0;
+		VDBG(dev, "req->mapped = 0\n");
+	}
+
+	DBG(dev, "%s queue req %p, len %u, buf %p, dma 0x%08x\n",
+			_ep->name,
+			_req, _req->length, _req->buf, _req->dma);
+
+	_req->status = -EINPROGRESS;
+	_req->actual = 0;
+	req->dtd_count = 0;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* build and put dTDs to endpoint queue */
+	if (!req_to_dtd(req)) {
+		queue_dtd(ep, req);
+	} else {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -ENOMEM;
+	}
+
+	/* update ep0 state */
+	if (ep->ep_num == 0)
+		dev->ep0_state = DATA_STATE_XMIT;
+
+	if (likely(req != NULL)) {
+		list_add_tail(&req->queue, &ep->queue);
+		VDBG(dev, "list_add_tail() \n");
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* dequeue (cancels, unlinks) an I/O request from an endpoint */
+static int langwell_ep_dequeue(struct usb_ep *_ep, struct usb_request *_req)
+{
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+	struct langwell_request	*req;
+	unsigned long		flags;
+	int			stopped, ep_num, retval = 0;
+	u32			endptctrl;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !ep->desc || !_req)
+		return -EINVAL;
+
+	if (!dev->driver)
+		return -ESHUTDOWN;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	stopped = ep->stopped;
+
+	/* quiesce dma while we patch the queue */
+	ep->stopped = 1;
+	ep_num = ep->ep_num;
+
+	/* disable endpoint control register */
+	endptctrl = readl(&dev->op_regs->endptctrl[ep_num]);
+	if (is_in(ep))
+		endptctrl &= ~EPCTRL_TXE;
+	else
+		endptctrl &= ~EPCTRL_RXE;
+	writel(endptctrl, &dev->op_regs->endptctrl[ep_num]);
+
+	/* make sure it's still queued on this endpoint */
+	list_for_each_entry(req, &ep->queue, queue) {
+		if (&req->req == _req)
+			break;
+	}
+
+	if (&req->req != _req) {
+		retval = -EINVAL;
+		goto done;
+	}
+
+	/* queue head may be partially complete. */
+	if (ep->queue.next == &req->queue) {
+		DBG(dev, "unlink (%s) dma\n", _ep->name);
+		_req->status = -ECONNRESET;
+		langwell_ep_fifo_flush(&ep->ep);
+
+		/* not the last request in endpoint queue */
+		if (likely(ep->queue.next == &req->queue)) {
+			struct langwell_dqh	*dqh;
+			struct langwell_request	*next_req;
+
+			dqh = ep->dqh;
+			next_req = list_entry(req->queue.next,
+					struct langwell_request, queue);
+
+			/* point the dQH to the first dTD of next request */
+			writel((u32) next_req->head, &dqh->dqh_current);
+		}
+	} else {
+		struct langwell_request	*prev_req;
+
+		prev_req = list_entry(req->queue.prev,
+				struct langwell_request, queue);
+		writel(readl(&req->tail->dtd_next),
+				&prev_req->tail->dtd_next);
+	}
+
+	done(ep, req, -ECONNRESET);
+
+done:
+	/* enable endpoint again */
+	endptctrl = readl(&dev->op_regs->endptctrl[ep_num]);
+	if (is_in(ep))
+		endptctrl |= EPCTRL_TXE;
+	else
+		endptctrl |= EPCTRL_RXE;
+	writel(endptctrl, &dev->op_regs->endptctrl[ep_num]);
+
+	ep->stopped = stopped;
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return retval;
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* endpoint set/clear halt */
+static void ep_set_halt(struct langwell_ep *ep, int value)
+{
+	u32			endptctrl = 0;
+	int			ep_num;
+	struct langwell_udc	*dev = ep->dev;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	ep_num = ep->ep_num;
+	endptctrl = readl(&dev->op_regs->endptctrl[ep_num]);
+
+	/* value: 1 - set halt, 0 - clear halt */
+	if (value) {
+		/* set the stall bit */
+		if (is_in(ep))
+			endptctrl |= EPCTRL_TXS;
+		else
+			endptctrl |= EPCTRL_RXS;
+	} else {
+		/* clear the stall bit and reset data toggle */
+		if (is_in(ep)) {
+			endptctrl &= ~EPCTRL_TXS;
+			endptctrl |= EPCTRL_TXR;
+		} else {
+			endptctrl &= ~EPCTRL_RXS;
+			endptctrl |= EPCTRL_RXR;
+		}
+	}
+
+	writel(endptctrl, &dev->op_regs->endptctrl[ep_num]);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* set the endpoint halt feature */
+static int langwell_ep_set_halt(struct usb_ep *_ep, int value)
+{
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+	unsigned long		flags;
+	int			retval = 0;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !ep->desc)
+		return -EINVAL;
+
+	if (!dev->driver || dev->gadget.speed == USB_SPEED_UNKNOWN)
+		return -ESHUTDOWN;
+
+	if (ep->desc && (ep->desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
+			== USB_ENDPOINT_XFER_ISOC)
+		return  -EOPNOTSUPP;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/*
+	 * attempt to halt IN ep will fail if any transfer requests
+	 * are still queue
+	 */
+	if (!list_empty(&ep->queue) && is_in(ep) && value) {
+		/* IN endpoint FIFO holds bytes */
+		DBG(dev, "%s FIFO holds bytes\n", _ep->name);
+		retval = -EAGAIN;
+		goto done;
+	}
+
+	/* endpoint set/clear halt */
+	if (ep->ep_num) {
+		ep_set_halt(ep, value);
+	} else { /* endpoint 0 */
+		dev->ep0_state = WAIT_FOR_SETUP;
+		dev->ep0_dir = USB_DIR_OUT;
+	}
+done:
+	spin_unlock_irqrestore(&dev->lock, flags);
+	DBG(dev, "%s %s halt\n", _ep->name, value ? "set" : "clear");
+	VDBG(dev, "<--- %s()\n", __func__);
+	return retval;
+}
+
+
+/* set the halt feature and ignores clear requests */
+static int langwell_ep_set_wedge(struct usb_ep *_ep)
+{
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !ep->desc)
+		return -EINVAL;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return usb_ep_set_halt(_ep);
+}
+
+
+/* flush contents of a fifo */
+static void langwell_ep_fifo_flush(struct usb_ep *_ep)
+{
+	struct langwell_ep	*ep;
+	struct langwell_udc	*dev;
+	u32			flush_bit;
+	unsigned long		timeout;
+
+	ep = container_of(_ep, struct langwell_ep, ep);
+	dev = ep->dev;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (!_ep || !ep->desc) {
+		VDBG(dev, "ep or ep->desc is NULL\n");
+		VDBG(dev, "<--- %s()\n", __func__);
+		return;
+	}
+
+	VDBG(dev, "%s-%s fifo flush\n", _ep->name, is_in(ep) ? "in" : "out");
+
+	/* flush endpoint buffer */
+	if (ep->ep_num == 0)
+		flush_bit = (1 << 16) | 1;
+	else if (is_in(ep))
+		flush_bit = 1 << (ep->ep_num + 16);	/* TX */
+	else
+		flush_bit = 1 << ep->ep_num;		/* RX */
+
+	/* wait until flush complete */
+	timeout = jiffies + FLUSH_TIMEOUT;
+	do {
+		writel(flush_bit, &dev->op_regs->endptflush);
+		while (readl(&dev->op_regs->endptflush)) {
+			if (time_after(jiffies, timeout)) {
+				ERROR(dev, "ep flush timeout\n");
+				goto done;
+			}
+			cpu_relax();
+		}
+	} while (readl(&dev->op_regs->endptstat) & flush_bit);
+done:
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* endpoints operations structure */
+static const struct usb_ep_ops langwell_ep_ops = {
+
+	/* configure endpoint, making it usable */
+	.enable		= langwell_ep_enable,
+
+	/* endpoint is no longer usable */
+	.disable	= langwell_ep_disable,
+
+	/* allocate a request object to use with this endpoint */
+	.alloc_request	= langwell_alloc_request,
+
+	/* free a request object */
+	.free_request	= langwell_free_request,
+
+	/* queue (submits) an I/O requests to an endpoint */
+	.queue		= langwell_ep_queue,
+
+	/* dequeue (cancels, unlinks) an I/O request from an endpoint */
+	.dequeue	= langwell_ep_dequeue,
+
+	/* set the endpoint halt feature */
+	.set_halt	= langwell_ep_set_halt,
+
+	/* set the halt feature and ignores clear requests */
+	.set_wedge	= langwell_ep_set_wedge,
+
+	/* flush contents of a fifo */
+	.fifo_flush	= langwell_ep_fifo_flush,
+};
+
+
+/*-------------------------------------------------------------------------*/
+
+/* device controller usb_gadget_ops structure */
+
+/* returns the current frame number */
+static int langwell_get_frame(struct usb_gadget *_gadget)
+{
+	struct langwell_udc	*dev;
+	u16			retval;
+
+	if (!_gadget)
+		return -ENODEV;
+
+	dev = container_of(_gadget, struct langwell_udc, gadget);
+	VDBG(dev, "---> %s()\n", __func__);
+
+	retval = readl(&dev->op_regs->frindex) & FRINDEX_MASK;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return retval;
+}
+
+
+/* tries to wake up the host connected to this gadget */
+static int langwell_wakeup(struct usb_gadget *_gadget)
+{
+	struct langwell_udc	*dev;
+	u32 			portsc1, devlc;
+	unsigned long   	flags;
+
+	if (!_gadget)
+		return 0;
+
+	dev = container_of(_gadget, struct langwell_udc, gadget);
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* Remote Wakeup feature not enabled by host */
+	if (!dev->remote_wakeup)
+		return -ENOTSUPP;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	portsc1 = readl(&dev->op_regs->portsc1);
+	if (!(portsc1 & PORTS_SUSP)) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return 0;
+	}
+
+	/* LPM L1 to L0, remote wakeup */
+	if (dev->lpm && dev->lpm_state == LPM_L1) {
+		portsc1 |= PORTS_SLP;
+		writel(portsc1, &dev->op_regs->portsc1);
+	}
+
+	/* force port resume */
+	if (dev->usb_state == USB_STATE_SUSPENDED) {
+		portsc1 |= PORTS_FPR;
+		writel(portsc1, &dev->op_regs->portsc1);
+	}
+
+	/* exit PHY low power suspend */
+	devlc = readl(&dev->op_regs->devlc);
+	VDBG(dev, "devlc = 0x%08x\n", devlc);
+	devlc &= ~LPM_PHCD;
+	writel(devlc, &dev->op_regs->devlc);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* notify controller that VBUS is powered or not */
+static int langwell_vbus_session(struct usb_gadget *_gadget, int is_active)
+{
+	struct langwell_udc	*dev;
+	unsigned long		flags;
+	u32             	usbcmd;
+
+	if (!_gadget)
+		return -ENODEV;
+
+	dev = container_of(_gadget, struct langwell_udc, gadget);
+	VDBG(dev, "---> %s()\n", __func__);
+
+	spin_lock_irqsave(&dev->lock, flags);
+	VDBG(dev, "VBUS status: %s\n", is_active ? "on" : "off");
+
+	dev->vbus_active = (is_active != 0);
+	if (dev->driver && dev->softconnected && dev->vbus_active) {
+		usbcmd = readl(&dev->op_regs->usbcmd);
+		usbcmd |= CMD_RUNSTOP;
+		writel(usbcmd, &dev->op_regs->usbcmd);
+	} else {
+		usbcmd = readl(&dev->op_regs->usbcmd);
+		usbcmd &= ~CMD_RUNSTOP;
+		writel(usbcmd, &dev->op_regs->usbcmd);
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* constrain controller's VBUS power usage */
+static int langwell_vbus_draw(struct usb_gadget *_gadget, unsigned mA)
+{
+	struct langwell_udc	*dev;
+
+	if (!_gadget)
+		return -ENODEV;
+
+	dev = container_of(_gadget, struct langwell_udc, gadget);
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (dev->transceiver) {
+		VDBG(dev, "otg_set_power\n");
+		VDBG(dev, "<--- %s()\n", __func__);
+		return otg_set_power(dev->transceiver, mA);
+	}
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return -ENOTSUPP;
+}
+
+
+/* D+ pullup, software-controlled connect/disconnect to USB host */
+static int langwell_pullup(struct usb_gadget *_gadget, int is_on)
+{
+	struct langwell_udc	*dev;
+	u32             	usbcmd;
+	unsigned long   	flags;
+
+	if (!_gadget)
+		return -ENODEV;
+
+	dev = container_of(_gadget, struct langwell_udc, gadget);
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->softconnected = (is_on != 0);
+
+	if (dev->driver && dev->softconnected && dev->vbus_active) {
+		usbcmd = readl(&dev->op_regs->usbcmd);
+		usbcmd |= CMD_RUNSTOP;
+		writel(usbcmd, &dev->op_regs->usbcmd);
+	} else {
+		usbcmd = readl(&dev->op_regs->usbcmd);
+		usbcmd &= ~CMD_RUNSTOP;
+		writel(usbcmd, &dev->op_regs->usbcmd);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* device controller usb_gadget_ops structure */
+static const struct usb_gadget_ops langwell_ops = {
+
+	/* returns the current frame number */
+	.get_frame	= langwell_get_frame,
+
+	/* tries to wake up the host connected to this gadget */
+	.wakeup		= langwell_wakeup,
+
+	/* set the device selfpowered feature, always selfpowered */
+	/* .set_selfpowered = langwell_set_selfpowered, */
+
+	/* notify controller that VBUS is powered or not */
+	.vbus_session	= langwell_vbus_session,
+
+	/* constrain controller's VBUS power usage */
+	.vbus_draw	= langwell_vbus_draw,
+
+	/* D+ pullup, software-controlled connect/disconnect to USB host */
+	.pullup		= langwell_pullup,
+};
+
+
+/*-------------------------------------------------------------------------*/
+
+/* device controller operations */
+
+/* reset device controller */
+static int langwell_udc_reset(struct langwell_udc *dev)
+{
+	u32		usbcmd, usbmode, devlc, endpointlistaddr;
+	unsigned long	timeout;
+
+	if (!dev)
+		return -EINVAL;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	/* set controller to stop state */
+	usbcmd = readl(&dev->op_regs->usbcmd);
+	usbcmd &= ~CMD_RUNSTOP;
+	writel(usbcmd, &dev->op_regs->usbcmd);
+
+	/* reset device controller */
+	usbcmd = readl(&dev->op_regs->usbcmd);
+	usbcmd |= CMD_RST;
+	writel(usbcmd, &dev->op_regs->usbcmd);
+
+	/* wait for reset to complete */
+	timeout = jiffies + RESET_TIMEOUT;
+	while (readl(&dev->op_regs->usbcmd) & CMD_RST) {
+		if (time_after(jiffies, timeout)) {
+			ERROR(dev, "device reset timeout\n");
+			return -ETIMEDOUT;
+		}
+		cpu_relax();
+	}
+
+	/* set controller to device mode */
+	usbmode = readl(&dev->op_regs->usbmode);
+	usbmode |= MODE_DEVICE;
+
+	/* turn setup lockout off, require setup tripwire in usbcmd */
+	usbmode |= MODE_SLOM;
+
+	writel(usbmode, &dev->op_regs->usbmode);
+	usbmode = readl(&dev->op_regs->usbmode);
+	VDBG(dev, "usbmode=0x%08x\n", usbmode);
+
+	/* Write-Clear setup status */
+	writel(0, &dev->op_regs->usbsts);
+
+	/* if support USB LPM, ACK all LPM token */
+	if (dev->lpm) {
+		devlc = readl(&dev->op_regs->devlc);
+		devlc &= ~LPM_STL;	/* don't STALL LPM token */
+		devlc &= ~LPM_NYT_ACK;	/* ACK LPM token */
+		writel(devlc, &dev->op_regs->devlc);
+	}
+
+	/* fill endpointlistaddr register */
+	endpointlistaddr = dev->ep_dqh_dma;
+	endpointlistaddr &= ENDPOINTLISTADDR_MASK;
+	writel(endpointlistaddr, &dev->op_regs->endpointlistaddr);
+
+	VDBG(dev, "dQH base (vir: %p, phy: 0x%08x), endpointlistaddr=0x%08x\n",
+			dev->ep_dqh, endpointlistaddr,
+			readl(&dev->op_regs->endpointlistaddr));
+	DBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* reinitialize device controller endpoints */
+static int eps_reinit(struct langwell_udc *dev)
+{
+	struct langwell_ep	*ep;
+	char			name[14];
+	int			i;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* initialize ep0 */
+	ep = &dev->ep[0];
+	ep->dev = dev;
+	strncpy(ep->name, "ep0", sizeof(ep->name));
+	ep->ep.name = ep->name;
+	ep->ep.ops = &langwell_ep_ops;
+	ep->stopped = 0;
+	ep->ep.maxpacket = EP0_MAX_PKT_SIZE;
+	ep->ep_num = 0;
+	ep->desc = &langwell_ep0_desc;
+	INIT_LIST_HEAD(&ep->queue);
+
+	ep->ep_type = USB_ENDPOINT_XFER_CONTROL;
+
+	/* initialize other endpoints */
+	for (i = 2; i < dev->ep_max; i++) {
+		ep = &dev->ep[i];
+		if (i % 2)
+			snprintf(name, sizeof(name), "ep%din", i / 2);
+		else
+			snprintf(name, sizeof(name), "ep%dout", i / 2);
+		ep->dev = dev;
+		strncpy(ep->name, name, sizeof(ep->name));
+		ep->ep.name = ep->name;
+
+		ep->ep.ops = &langwell_ep_ops;
+		ep->stopped = 0;
+		ep->ep.maxpacket = (unsigned short) ~0;
+		ep->ep_num = i / 2;
+
+		INIT_LIST_HEAD(&ep->queue);
+		list_add_tail(&ep->ep.ep_list, &dev->gadget.ep_list);
+
+		ep->dqh = &dev->ep_dqh[i];
+	}
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* enable interrupt and set controller to run state */
+static void langwell_udc_start(struct langwell_udc *dev)
+{
+	u32	usbintr, usbcmd;
+	DBG(dev, "---> %s()\n", __func__);
+
+	/* enable interrupts */
+	usbintr = INTR_ULPIE	/* ULPI */
+		| INTR_SLE	/* suspend */
+		/* | INTR_SRE	SOF received */
+		| INTR_URE	/* USB reset */
+		| INTR_AAE	/* async advance */
+		| INTR_SEE	/* system error */
+		| INTR_FRE	/* frame list rollover */
+		| INTR_PCE	/* port change detect */
+		| INTR_UEE	/* USB error interrupt */
+		| INTR_UE;	/* USB interrupt */
+	writel(usbintr, &dev->op_regs->usbintr);
+
+	/* clear stopped bit */
+	dev->stopped = 0;
+
+	/* set controller to run */
+	usbcmd = readl(&dev->op_regs->usbcmd);
+	usbcmd |= CMD_RUNSTOP;
+	writel(usbcmd, &dev->op_regs->usbcmd);
+
+	DBG(dev, "<--- %s()\n", __func__);
+	return;
+}
+
+
+/* disable interrupt and set controller to stop state */
+static void langwell_udc_stop(struct langwell_udc *dev)
+{
+	u32	usbcmd;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	/* disable all interrupts */
+	writel(0, &dev->op_regs->usbintr);
+
+	/* set stopped bit */
+	dev->stopped = 1;
+
+	/* set controller to stop state */
+	usbcmd = readl(&dev->op_regs->usbcmd);
+	usbcmd &= ~CMD_RUNSTOP;
+	writel(usbcmd, &dev->op_regs->usbcmd);
+
+	DBG(dev, "<--- %s()\n", __func__);
+	return;
+}
+
+
+/* stop all USB activities */
+static void stop_activity(struct langwell_udc *dev,
+		struct usb_gadget_driver *driver)
+{
+	struct langwell_ep	*ep;
+	DBG(dev, "---> %s()\n", __func__);
+
+	nuke(&dev->ep[0], -ESHUTDOWN);
+
+	list_for_each_entry(ep, &dev->gadget.ep_list, ep.ep_list) {
+		nuke(ep, -ESHUTDOWN);
+	}
+
+	/* report disconnect; the driver is already quiesced */
+	if (driver) {
+		spin_unlock(&dev->lock);
+		driver->disconnect(&dev->gadget);
+		spin_lock(&dev->lock);
+	}
+
+	DBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* device "function" sysfs attribute file */
+static ssize_t show_function(struct device *_dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct langwell_udc	*dev = the_controller;
+
+	if (!dev->driver || !dev->driver->function
+			|| strlen(dev->driver->function) > PAGE_SIZE)
+		return 0;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", dev->driver->function);
+}
+static DEVICE_ATTR(function, S_IRUGO, show_function, NULL);
+
+
+/* device "langwell_udc" sysfs attribute file */
+static ssize_t show_langwell_udc(struct device *_dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct langwell_udc	*dev = the_controller;
+	struct langwell_request *req;
+	struct langwell_ep	*ep = NULL;
+	char			*next;
+	unsigned		size;
+	unsigned		t;
+	unsigned		i;
+	unsigned long		flags;
+	u32			tmp_reg;
+
+	next = buf;
+	size = PAGE_SIZE;
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* driver basic information */
+	t = scnprintf(next, size,
+			DRIVER_DESC "\n"
+			"%s version: %s\n"
+			"Gadget driver: %s\n\n",
+			driver_name, DRIVER_VERSION,
+			dev->driver ? dev->driver->driver.name : "(none)");
+	size -= t;
+	next += t;
+
+	/* device registers */
+	tmp_reg = readl(&dev->op_regs->usbcmd);
+	t = scnprintf(next, size,
+			"USBCMD reg:\n"
+			"SetupTW: %d\n"
+			"Run/Stop: %s\n\n",
+			(tmp_reg & CMD_SUTW) ? 1 : 0,
+			(tmp_reg & CMD_RUNSTOP) ? "Run" : "Stop");
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->usbsts);
+	t = scnprintf(next, size,
+			"USB Status Reg:\n"
+			"Device Suspend: %d\n"
+			"Reset Received: %d\n"
+			"System Error: %s\n"
+			"USB Error Interrupt: %s\n\n",
+			(tmp_reg & STS_SLI) ? 1 : 0,
+			(tmp_reg & STS_URI) ? 1 : 0,
+			(tmp_reg & STS_SEI) ? "Error" : "No error",
+			(tmp_reg & STS_UEI) ? "Error detected" : "No error");
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->usbintr);
+	t = scnprintf(next, size,
+			"USB Intrrupt Enable Reg:\n"
+			"Sleep Enable: %d\n"
+			"SOF Received Enable: %d\n"
+			"Reset Enable: %d\n"
+			"System Error Enable: %d\n"
+			"Port Change Dectected Enable: %d\n"
+			"USB Error Intr Enable: %d\n"
+			"USB Intr Enable: %d\n\n",
+			(tmp_reg & INTR_SLE) ? 1 : 0,
+			(tmp_reg & INTR_SRE) ? 1 : 0,
+			(tmp_reg & INTR_URE) ? 1 : 0,
+			(tmp_reg & INTR_SEE) ? 1 : 0,
+			(tmp_reg & INTR_PCE) ? 1 : 0,
+			(tmp_reg & INTR_UEE) ? 1 : 0,
+			(tmp_reg & INTR_UE) ? 1 : 0);
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->frindex);
+	t = scnprintf(next, size,
+			"USB Frame Index Reg:\n"
+			"Frame Number is 0x%08x\n\n",
+			(tmp_reg & FRINDEX_MASK));
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->deviceaddr);
+	t = scnprintf(next, size,
+			"USB Device Address Reg:\n"
+			"Device Addr is 0x%x\n\n",
+			USBADR(tmp_reg));
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->endpointlistaddr);
+	t = scnprintf(next, size,
+			"USB Endpoint List Address Reg:\n"
+			"Endpoint List Pointer is 0x%x\n\n",
+			EPBASE(tmp_reg));
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->portsc1);
+	t = scnprintf(next, size,
+		"USB Port Status & Control Reg:\n"
+		"Port Reset: %s\n"
+		"Port Suspend Mode: %s\n"
+		"Over-current Change: %s\n"
+		"Port Enable/Disable Change: %s\n"
+		"Port Enabled/Disabled: %s\n"
+		"Current Connect Status: %s\n\n",
+		(tmp_reg & PORTS_PR) ? "Reset" : "Not Reset",
+		(tmp_reg & PORTS_SUSP) ? "Suspend " : "Not Suspend",
+		(tmp_reg & PORTS_OCC) ? "Detected" : "No",
+		(tmp_reg & PORTS_PEC) ? "Changed" : "Not Changed",
+		(tmp_reg & PORTS_PE) ? "Enable" : "Not Correct",
+		(tmp_reg & PORTS_CCS) ?  "Attached" : "Not Attached");
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->devlc);
+	t = scnprintf(next, size,
+		"Device LPM Control Reg:\n"
+		"Parallel Transceiver : %d\n"
+		"Serial Transceiver : %d\n"
+		"Port Speed: %s\n"
+		"Port Force Full Speed Connenct: %s\n"
+		"PHY Low Power Suspend Clock Disable: %s\n"
+		"BmAttributes: %d\n\n",
+		LPM_PTS(tmp_reg),
+		(tmp_reg & LPM_STS) ? 1 : 0,
+		({
+			char	*s;
+			switch (LPM_PSPD(tmp_reg)) {
+			case LPM_SPEED_FULL:
+				s = "Full Speed"; break;
+			case LPM_SPEED_LOW:
+				s = "Low Speed"; break;
+			case LPM_SPEED_HIGH:
+				s = "High Speed"; break;
+			default:
+				s = "Unknown Speed"; break;
+			}
+			s;
+		}),
+		(tmp_reg & LPM_PFSC) ? "Force Full Speed" : "Not Force",
+		(tmp_reg & LPM_PHCD) ? "Disabled" : "Enabled",
+		LPM_BA(tmp_reg));
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->usbmode);
+	t = scnprintf(next, size,
+			"USB Mode Reg:\n"
+			"Controller Mode is : %s\n\n", ({
+				char *s;
+				switch (MODE_CM(tmp_reg)) {
+				case MODE_IDLE:
+					s = "Idle"; break;
+				case MODE_DEVICE:
+					s = "Device Controller"; break;
+				case MODE_HOST:
+					s = "Host Controller"; break;
+				default:
+					s = "None"; break;
+				}
+				s;
+			}));
+	size -= t;
+	next += t;
+
+	tmp_reg = readl(&dev->op_regs->endptsetupstat);
+	t = scnprintf(next, size,
+			"Endpoint Setup Status Reg:\n"
+			"SETUP on ep 0x%04x\n\n",
+			tmp_reg & SETUPSTAT_MASK);
+	size -= t;
+	next += t;
+
+	for (i = 0; i < dev->ep_max / 2; i++) {
+		tmp_reg = readl(&dev->op_regs->endptctrl[i]);
+		t = scnprintf(next, size, "EP Ctrl Reg [%d]: 0x%08x\n",
+				i, tmp_reg);
+		size -= t;
+		next += t;
+	}
+	tmp_reg = readl(&dev->op_regs->endptprime);
+	t = scnprintf(next, size, "EP Prime Reg: 0x%08x\n\n", tmp_reg);
+	size -= t;
+	next += t;
+
+	/* langwell_udc, langwell_ep, langwell_request structure information */
+	ep = &dev->ep[0];
+	t = scnprintf(next, size, "%s MaxPacketSize: 0x%x, ep_num: %d\n",
+			ep->ep.name, ep->ep.maxpacket, ep->ep_num);
+	size -= t;
+	next += t;
+
+	if (list_empty(&ep->queue)) {
+		t = scnprintf(next, size, "its req queue is empty\n\n");
+		size -= t;
+		next += t;
+	} else {
+		list_for_each_entry(req, &ep->queue, queue) {
+			t = scnprintf(next, size,
+				"req %p actual 0x%x length 0x%x  buf %p\n",
+				&req->req, req->req.actual,
+				req->req.length, req->req.buf);
+			size -= t;
+			next += t;
+		}
+	}
+	/* other gadget->eplist ep */
+	list_for_each_entry(ep, &dev->gadget.ep_list, ep.ep_list) {
+		if (ep->desc) {
+			t = scnprintf(next, size,
+					"\n%s MaxPacketSize: 0x%x, "
+					"ep_num: %d\n",
+					ep->ep.name, ep->ep.maxpacket,
+					ep->ep_num);
+			size -= t;
+			next += t;
+
+			if (list_empty(&ep->queue)) {
+				t = scnprintf(next, size,
+						"its req queue is empty\n\n");
+				size -= t;
+				next += t;
+			} else {
+				list_for_each_entry(req, &ep->queue, queue) {
+					t = scnprintf(next, size,
+						"req %p actual 0x%x length "
+						"0x%x  buf %p\n",
+						&req->req, req->req.actual,
+						req->req.length, req->req.buf);
+					size -= t;
+					next += t;
+				}
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return PAGE_SIZE - size;
+}
+static DEVICE_ATTR(langwell_udc, S_IRUGO, show_langwell_udc, NULL);
+
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * when a driver is successfully registered, it will receive
+ * control requests including set_configuration(), which enables
+ * non-control requests.  then usb traffic follows until a
+ * disconnect is reported.  then a host may connect again, or
+ * the driver might get unbound.
+ */
+
+int usb_gadget_register_driver(struct usb_gadget_driver *driver)
+{
+	struct langwell_udc	*dev = the_controller;
+	unsigned long		flags;
+	int			retval;
+
+	if (!dev)
+		return -ENODEV;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	if (dev->driver)
+		return -EBUSY;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* hook up the driver ... */
+	driver->driver.bus = NULL;
+	dev->driver = driver;
+	dev->gadget.dev.driver = &driver->driver;
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	retval = driver->bind(&dev->gadget);
+	if (retval) {
+		DBG(dev, "bind to driver %s --> %d\n",
+				driver->driver.name, retval);
+		dev->driver = NULL;
+		dev->gadget.dev.driver = NULL;
+		return retval;
+	}
+
+	retval = device_create_file(&dev->pdev->dev, &dev_attr_function);
+	if (retval)
+		goto err_unbind;
+
+	dev->usb_state = USB_STATE_ATTACHED;
+	dev->ep0_state = WAIT_FOR_SETUP;
+	dev->ep0_dir = USB_DIR_OUT;
+
+	/* enable interrupt and set controller to run state */
+	if (dev->got_irq)
+		langwell_udc_start(dev);
+
+	VDBG(dev, "After langwell_udc_start(), print all registers:\n");
+#ifdef	VERBOSE
+	print_all_registers(dev);
+#endif
+
+	INFO(dev, "register driver: %s\n", driver->driver.name);
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+
+err_unbind:
+	driver->unbind(&dev->gadget);
+	dev->gadget.dev.driver = NULL;
+	dev->driver = NULL;
+
+	DBG(dev, "<--- %s()\n", __func__);
+	return retval;
+}
+EXPORT_SYMBOL(usb_gadget_register_driver);
+
+
+/* unregister gadget driver */
+int usb_gadget_unregister_driver(struct usb_gadget_driver *driver)
+{
+	struct langwell_udc	*dev = the_controller;
+	unsigned long		flags;
+
+	if (!dev)
+		return -ENODEV;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	if (unlikely(!driver || !driver->bind || !driver->unbind))
+		return -EINVAL;
+
+	/* unbind OTG transceiver */
+	if (dev->transceiver)
+		(void)otg_set_peripheral(dev->transceiver, 0);
+
+	/* disable interrupt and set controller to stop state */
+	langwell_udc_stop(dev);
+
+	dev->usb_state = USB_STATE_ATTACHED;
+	dev->ep0_state = WAIT_FOR_SETUP;
+	dev->ep0_dir = USB_DIR_OUT;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* stop all usb activities */
+	dev->gadget.speed = USB_SPEED_UNKNOWN;
+	stop_activity(dev, driver);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* unbind gadget driver */
+	driver->unbind(&dev->gadget);
+	dev->gadget.dev.driver = NULL;
+	dev->driver = NULL;
+
+	device_remove_file(&dev->pdev->dev, &dev_attr_function);
+
+	INFO(dev, "unregistered driver '%s'\n", driver->driver.name);
+	DBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+EXPORT_SYMBOL(usb_gadget_unregister_driver);
+
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * setup tripwire is used as a semaphore to ensure that the setup data
+ * payload is extracted from a dQH without being corrupted
+ */
+static void setup_tripwire(struct langwell_udc *dev)
+{
+	u32			usbcmd,
+				endptsetupstat;
+	unsigned long		timeout;
+	struct langwell_dqh	*dqh;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* ep0 OUT dQH */
+	dqh = &dev->ep_dqh[EP_DIR_OUT];
+
+	/* Write-Clear endptsetupstat */
+	endptsetupstat = readl(&dev->op_regs->endptsetupstat);
+	writel(endptsetupstat, &dev->op_regs->endptsetupstat);
+
+	/* wait until endptsetupstat is cleared */
+	timeout = jiffies + SETUPSTAT_TIMEOUT;
+	while (readl(&dev->op_regs->endptsetupstat)) {
+		if (time_after(jiffies, timeout)) {
+			ERROR(dev, "setup_tripwire timeout\n");
+			break;
+		}
+		cpu_relax();
+	}
+
+	/* while a hazard exists when setup packet arrives */
+	do {
+		/* set setup tripwire bit */
+		usbcmd = readl(&dev->op_regs->usbcmd);
+		writel(usbcmd | CMD_SUTW, &dev->op_regs->usbcmd);
+
+		/* copy the setup packet to local buffer */
+		memcpy(&dev->local_setup_buff, &dqh->dqh_setup, 8);
+	} while (!(readl(&dev->op_regs->usbcmd) & CMD_SUTW));
+
+	/* Write-Clear setup tripwire bit */
+	usbcmd = readl(&dev->op_regs->usbcmd);
+	writel(usbcmd & ~CMD_SUTW, &dev->op_regs->usbcmd);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* protocol ep0 stall, will automatically be cleared on new transaction */
+static void ep0_stall(struct langwell_udc *dev)
+{
+	u32	endptctrl;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* set TX and RX to stall */
+	endptctrl = readl(&dev->op_regs->endptctrl[0]);
+	endptctrl |= EPCTRL_TXS | EPCTRL_RXS;
+	writel(endptctrl, &dev->op_regs->endptctrl[0]);
+
+	/* update ep0 state */
+	dev->ep0_state = WAIT_FOR_SETUP;
+	dev->ep0_dir = USB_DIR_OUT;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* PRIME a status phase for ep0 */
+static int prime_status_phase(struct langwell_udc *dev, int dir)
+{
+	struct langwell_request	*req;
+	struct langwell_ep	*ep;
+	int			status = 0;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (dir == EP_DIR_IN)
+		dev->ep0_dir = USB_DIR_IN;
+	else
+		dev->ep0_dir = USB_DIR_OUT;
+
+	ep = &dev->ep[0];
+	dev->ep0_state = WAIT_FOR_OUT_STATUS;
+
+	req = dev->status_req;
+
+	req->ep = ep;
+	req->req.length = 0;
+	req->req.status = -EINPROGRESS;
+	req->req.actual = 0;
+	req->req.complete = NULL;
+	req->dtd_count = 0;
+
+	if (!req_to_dtd(req))
+		status = queue_dtd(ep, req);
+	else
+		return -ENOMEM;
+
+	if (status)
+		ERROR(dev, "can't queue ep0 status request\n");
+
+	list_add_tail(&req->queue, &ep->queue);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return status;
+}
+
+
+/* SET_ADDRESS request routine */
+static void set_address(struct langwell_udc *dev, u16 value,
+		u16 index, u16 length)
+{
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* save the new address to device struct */
+	dev->dev_addr = (u8) value;
+	VDBG(dev, "dev->dev_addr = %d\n", dev->dev_addr);
+
+	/* update usb state */
+	dev->usb_state = USB_STATE_ADDRESS;
+
+	/* STATUS phase */
+	if (prime_status_phase(dev, EP_DIR_IN))
+		ep0_stall(dev);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* return endpoint by windex */
+static struct langwell_ep *get_ep_by_windex(struct langwell_udc *dev,
+		u16 wIndex)
+{
+	struct langwell_ep		*ep;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if ((wIndex & USB_ENDPOINT_NUMBER_MASK) == 0)
+		return &dev->ep[0];
+
+	list_for_each_entry(ep, &dev->gadget.ep_list, ep.ep_list) {
+		u8	bEndpointAddress;
+		if (!ep->desc)
+			continue;
+
+		bEndpointAddress = ep->desc->bEndpointAddress;
+		if ((wIndex ^ bEndpointAddress) & USB_DIR_IN)
+			continue;
+
+		if ((wIndex & USB_ENDPOINT_NUMBER_MASK)
+			== (bEndpointAddress & USB_ENDPOINT_NUMBER_MASK))
+			return ep;
+	}
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return NULL;
+}
+
+
+/* return whether endpoint is stalled, 0: not stalled; 1: stalled */
+static int ep_is_stall(struct langwell_ep *ep)
+{
+	struct langwell_udc	*dev = ep->dev;
+	u32			endptctrl;
+	int			retval;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	endptctrl = readl(&dev->op_regs->endptctrl[ep->ep_num]);
+	if (is_in(ep))
+		retval = endptctrl & EPCTRL_TXS ? 1 : 0;
+	else
+		retval = endptctrl & EPCTRL_RXS ? 1 : 0;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return retval;
+}
+
+
+/* GET_STATUS request routine */
+static void get_status(struct langwell_udc *dev, u8 request_type, u16 value,
+		u16 index, u16 length)
+{
+	struct langwell_request	*req;
+	struct langwell_ep	*ep;
+	u16	status_data = 0;	/* 16 bits cpu view status data */
+	int	status = 0;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	ep = &dev->ep[0];
+
+	if ((request_type & USB_RECIP_MASK) == USB_RECIP_DEVICE) {
+		/* get device status */
+		status_data = 1 << USB_DEVICE_SELF_POWERED;
+		status_data |= dev->remote_wakeup << USB_DEVICE_REMOTE_WAKEUP;
+	} else if ((request_type & USB_RECIP_MASK) == USB_RECIP_INTERFACE) {
+		/* get interface status */
+		status_data = 0;
+	} else if ((request_type & USB_RECIP_MASK) == USB_RECIP_ENDPOINT) {
+		/* get endpoint status */
+		struct langwell_ep	*epn;
+		epn = get_ep_by_windex(dev, index);
+		/* stall if endpoint doesn't exist */
+		if (!epn)
+			goto stall;
+
+		status_data = ep_is_stall(epn) << USB_ENDPOINT_HALT;
+	}
+
+	dev->ep0_dir = USB_DIR_IN;
+
+	/* borrow the per device status_req */
+	req = dev->status_req;
+
+	/* fill in the reqest structure */
+	*((u16 *) req->req.buf) = cpu_to_le16(status_data);
+	req->ep = ep;
+	req->req.length = 2;
+	req->req.status = -EINPROGRESS;
+	req->req.actual = 0;
+	req->req.complete = NULL;
+	req->dtd_count = 0;
+
+	/* prime the data phase */
+	if (!req_to_dtd(req))
+		status = queue_dtd(ep, req);
+	else			/* no mem */
+		goto stall;
+
+	if (status) {
+		ERROR(dev, "response error on GET_STATUS request\n");
+		goto stall;
+	}
+
+	list_add_tail(&req->queue, &ep->queue);
+	dev->ep0_state = DATA_STATE_XMIT;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return;
+stall:
+	ep0_stall(dev);
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* setup packet interrupt handler */
+static void handle_setup_packet(struct langwell_udc *dev,
+		struct usb_ctrlrequest *setup)
+{
+	u16	wValue = le16_to_cpu(setup->wValue);
+	u16	wIndex = le16_to_cpu(setup->wIndex);
+	u16	wLength = le16_to_cpu(setup->wLength);
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* ep0 fifo flush */
+	nuke(&dev->ep[0], -ESHUTDOWN);
+
+	DBG(dev, "SETUP %02x.%02x v%04x i%04x l%04x\n",
+			setup->bRequestType, setup->bRequest,
+			wValue, wIndex, wLength);
+
+	/* RNDIS gadget delegate */
+	if ((setup->bRequestType == 0x21) && (setup->bRequest == 0x00)) {
+		/* USB_CDC_SEND_ENCAPSULATED_COMMAND */
+		goto delegate;
+	}
+
+	/* USB_CDC_GET_ENCAPSULATED_RESPONSE */
+	if ((setup->bRequestType == 0xa1) && (setup->bRequest == 0x01)) {
+		/* USB_CDC_GET_ENCAPSULATED_RESPONSE */
+		goto delegate;
+	}
+
+	/* We process some stardard setup requests here */
+	switch (setup->bRequest) {
+	case USB_REQ_GET_STATUS:
+		DBG(dev, "SETUP: USB_REQ_GET_STATUS\n");
+		/* get status, DATA and STATUS phase */
+		if ((setup->bRequestType & (USB_DIR_IN | USB_TYPE_MASK))
+					!= (USB_DIR_IN | USB_TYPE_STANDARD))
+			break;
+		get_status(dev, setup->bRequestType, wValue, wIndex, wLength);
+		goto end;
+
+	case USB_REQ_SET_ADDRESS:
+		DBG(dev, "SETUP: USB_REQ_SET_ADDRESS\n");
+		/* STATUS phase */
+		if (setup->bRequestType != (USB_DIR_OUT | USB_TYPE_STANDARD
+						| USB_RECIP_DEVICE))
+			break;
+		set_address(dev, wValue, wIndex, wLength);
+		goto end;
+
+	case USB_REQ_CLEAR_FEATURE:
+	case USB_REQ_SET_FEATURE:
+		/* STATUS phase */
+	{
+		int rc = -EOPNOTSUPP;
+		if (setup->bRequest == USB_REQ_SET_FEATURE)
+			DBG(dev, "SETUP: USB_REQ_SET_FEATURE\n");
+		else if (setup->bRequest == USB_REQ_CLEAR_FEATURE)
+			DBG(dev, "SETUP: USB_REQ_CLEAR_FEATURE\n");
+
+		if ((setup->bRequestType & (USB_RECIP_MASK | USB_TYPE_MASK))
+				== (USB_RECIP_ENDPOINT | USB_TYPE_STANDARD)) {
+			struct langwell_ep	*epn;
+			epn = get_ep_by_windex(dev, wIndex);
+			/* stall if endpoint doesn't exist */
+			if (!epn) {
+				ep0_stall(dev);
+				goto end;
+			}
+
+			if (wValue != 0 || wLength != 0
+					|| epn->ep_num > dev->ep_max)
+				break;
+
+			spin_unlock(&dev->lock);
+			rc = langwell_ep_set_halt(&epn->ep,
+					(setup->bRequest == USB_REQ_SET_FEATURE)
+						? 1 : 0);
+			spin_lock(&dev->lock);
+
+		} else if ((setup->bRequestType & (USB_RECIP_MASK
+				| USB_TYPE_MASK)) == (USB_RECIP_DEVICE
+				| USB_TYPE_STANDARD)) {
+			if (!gadget_is_otg(&dev->gadget))
+				break;
+			else if (setup->bRequest == USB_DEVICE_B_HNP_ENABLE) {
+				dev->gadget.b_hnp_enable = 1;
+#ifdef	OTG_TRANSCEIVER
+				if (!dev->lotg->otg.default_a)
+					dev->lotg->hsm.b_hnp_enable = 1;
+#endif
+			} else if (setup->bRequest == USB_DEVICE_A_HNP_SUPPORT)
+				dev->gadget.a_hnp_support = 1;
+			else if (setup->bRequest ==
+					USB_DEVICE_A_ALT_HNP_SUPPORT)
+				dev->gadget.a_alt_hnp_support = 1;
+			else
+				break;
+			rc = 0;
+		} else
+			break;
+
+		if (rc == 0) {
+			if (prime_status_phase(dev, EP_DIR_IN))
+				ep0_stall(dev);
+		}
+		goto end;
+	}
+
+	case USB_REQ_GET_DESCRIPTOR:
+		DBG(dev, "SETUP: USB_REQ_GET_DESCRIPTOR\n");
+		goto delegate;
+
+	case USB_REQ_SET_DESCRIPTOR:
+		DBG(dev, "SETUP: USB_REQ_SET_DESCRIPTOR unsupported\n");
+		goto delegate;
+
+	case USB_REQ_GET_CONFIGURATION:
+		DBG(dev, "SETUP: USB_REQ_GET_CONFIGURATION\n");
+		goto delegate;
+
+	case USB_REQ_SET_CONFIGURATION:
+		DBG(dev, "SETUP: USB_REQ_SET_CONFIGURATION\n");
+		goto delegate;
+
+	case USB_REQ_GET_INTERFACE:
+		DBG(dev, "SETUP: USB_REQ_GET_INTERFACE\n");
+		goto delegate;
+
+	case USB_REQ_SET_INTERFACE:
+		DBG(dev, "SETUP: USB_REQ_SET_INTERFACE\n");
+		goto delegate;
+
+	case USB_REQ_SYNCH_FRAME:
+		DBG(dev, "SETUP: USB_REQ_SYNCH_FRAME unsupported\n");
+		goto delegate;
+
+	default:
+		/* delegate USB standard requests to the gadget driver */
+		goto delegate;
+delegate:
+		/* USB requests handled by gadget */
+		if (wLength) {
+			/* DATA phase from gadget, STATUS phase from udc */
+			dev->ep0_dir = (setup->bRequestType & USB_DIR_IN)
+					?  USB_DIR_IN : USB_DIR_OUT;
+			VDBG(dev, "dev->ep0_dir = 0x%x, wLength = %d\n",
+					dev->ep0_dir, wLength);
+			spin_unlock(&dev->lock);
+			if (dev->driver->setup(&dev->gadget,
+					&dev->local_setup_buff) < 0)
+				ep0_stall(dev);
+			spin_lock(&dev->lock);
+			dev->ep0_state = (setup->bRequestType & USB_DIR_IN)
+					?  DATA_STATE_XMIT : DATA_STATE_RECV;
+		} else {
+			/* no DATA phase, IN STATUS phase from gadget */
+			dev->ep0_dir = USB_DIR_IN;
+			VDBG(dev, "dev->ep0_dir = 0x%x, wLength = %d\n",
+					dev->ep0_dir, wLength);
+			spin_unlock(&dev->lock);
+			if (dev->driver->setup(&dev->gadget,
+					&dev->local_setup_buff) < 0)
+				ep0_stall(dev);
+			spin_lock(&dev->lock);
+			dev->ep0_state = WAIT_FOR_OUT_STATUS;
+		}
+		break;
+	}
+end:
+	VDBG(dev, "<--- %s()\n", __func__);
+	return;
+}
+
+
+/* transfer completion, process endpoint request and free the completed dTDs
+ * for this request
+ */
+static int process_ep_req(struct langwell_udc *dev, int index,
+		struct langwell_request *curr_req)
+{
+	struct langwell_dtd	*curr_dtd;
+	struct langwell_dqh	*curr_dqh;
+	int			td_complete, actual, remaining_length;
+	int			i, dir;
+	u8			dtd_status = 0;
+	int			retval = 0;
+
+	curr_dqh = &dev->ep_dqh[index];
+	dir = index % 2;
+
+	curr_dtd = curr_req->head;
+	td_complete = 0;
+	actual = curr_req->req.length;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	for (i = 0; i < curr_req->dtd_count; i++) {
+		remaining_length = le16_to_cpu(curr_dtd->dtd_total);
+		actual -= remaining_length;
+
+		/* command execution states by dTD */
+		dtd_status = curr_dtd->dtd_status;
+
+		if (!dtd_status) {
+			/* transfers completed successfully */
+			if (!remaining_length) {
+				td_complete++;
+				VDBG(dev, "dTD transmitted successfully\n");
+			} else {
+				if (dir) {
+					VDBG(dev, "TX dTD remains data\n");
+					retval = -EPROTO;
+					break;
+
+				} else {
+					td_complete++;
+					break;
+				}
+			}
+		} else {
+			/* transfers completed with errors */
+			if (dtd_status & DTD_STS_ACTIVE) {
+				DBG(dev, "request not completed\n");
+				retval = 1;
+				return retval;
+			} else if (dtd_status & DTD_STS_HALTED) {
+				ERROR(dev, "dTD error %08x dQH[%d]\n",
+						dtd_status, index);
+				/* clear the errors and halt condition */
+				curr_dqh->dtd_status = 0;
+				retval = -EPIPE;
+				break;
+			} else if (dtd_status & DTD_STS_DBE) {
+				DBG(dev, "data buffer (overflow) error\n");
+				retval = -EPROTO;
+				break;
+			} else if (dtd_status & DTD_STS_TRE) {
+				DBG(dev, "transaction(ISO) error\n");
+				retval = -EILSEQ;
+				break;
+			} else
+				ERROR(dev, "unknown error (0x%x)!\n",
+						dtd_status);
+		}
+
+		if (i != curr_req->dtd_count - 1)
+			curr_dtd = (struct langwell_dtd *)
+				curr_dtd->next_dtd_virt;
+	}
+
+	if (retval)
+		return retval;
+
+	curr_req->req.actual = actual;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* complete DATA or STATUS phase of ep0 prime status phase if needed */
+static void ep0_req_complete(struct langwell_udc *dev,
+		struct langwell_ep *ep0, struct langwell_request *req)
+{
+	u32	new_addr;
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (dev->usb_state == USB_STATE_ADDRESS) {
+		/* set the new address */
+		new_addr = (u32)dev->dev_addr;
+		writel(new_addr << USBADR_SHIFT, &dev->op_regs->deviceaddr);
+
+		new_addr = USBADR(readl(&dev->op_regs->deviceaddr));
+		VDBG(dev, "new_addr = %d\n", new_addr);
+	}
+
+	done(ep0, req, 0);
+
+	switch (dev->ep0_state) {
+	case DATA_STATE_XMIT:
+		/* receive status phase */
+		if (prime_status_phase(dev, EP_DIR_OUT))
+			ep0_stall(dev);
+		break;
+	case DATA_STATE_RECV:
+		/* send status phase */
+		if (prime_status_phase(dev, EP_DIR_IN))
+			ep0_stall(dev);
+		break;
+	case WAIT_FOR_OUT_STATUS:
+		dev->ep0_state = WAIT_FOR_SETUP;
+		break;
+	case WAIT_FOR_SETUP:
+		ERROR(dev, "unexpect ep0 packets\n");
+		break;
+	default:
+		ep0_stall(dev);
+		break;
+	}
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* USB transfer completion interrupt */
+static void handle_trans_complete(struct langwell_udc *dev)
+{
+	u32			complete_bits;
+	int			i, ep_num, dir, bit_mask, status;
+	struct langwell_ep	*epn;
+	struct langwell_request	*curr_req, *temp_req;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	complete_bits = readl(&dev->op_regs->endptcomplete);
+	VDBG(dev, "endptcomplete register: 0x%08x\n", complete_bits);
+
+	/* Write-Clear the bits in endptcomplete register */
+	writel(complete_bits, &dev->op_regs->endptcomplete);
+
+	if (!complete_bits) {
+		DBG(dev, "complete_bits = 0\n");
+		goto done;
+	}
+
+	for (i = 0; i < dev->ep_max; i++) {
+		ep_num = i / 2;
+		dir = i % 2;
+
+		bit_mask = 1 << (ep_num + 16 * dir);
+
+		if (!(complete_bits & bit_mask))
+			continue;
+
+		/* ep0 */
+		if (i == 1)
+			epn = &dev->ep[0];
+		else
+			epn = &dev->ep[i];
+
+		if (epn->name == NULL) {
+			WARNING(dev, "invalid endpoint\n");
+			continue;
+		}
+
+		if (i < 2)
+			/* ep0 in and out */
+			DBG(dev, "%s-%s transfer completed\n",
+					epn->name,
+					is_in(epn) ? "in" : "out");
+		else
+			DBG(dev, "%s transfer completed\n", epn->name);
+
+		/* process the req queue until an uncomplete request */
+		list_for_each_entry_safe(curr_req, temp_req,
+				&epn->queue, queue) {
+			status = process_ep_req(dev, i, curr_req);
+			VDBG(dev, "%s req status: %d\n", epn->name, status);
+
+			if (status)
+				break;
+
+			/* write back status to req */
+			curr_req->req.status = status;
+
+			/* ep0 request completion */
+			if (ep_num == 0) {
+				ep0_req_complete(dev, epn, curr_req);
+				break;
+			} else {
+				done(epn, curr_req, status);
+			}
+		}
+	}
+done:
+	VDBG(dev, "<--- %s()\n", __func__);
+	return;
+}
+
+
+/* port change detect interrupt handler */
+static void handle_port_change(struct langwell_udc *dev)
+{
+	u32	portsc1, devlc;
+	u32	speed;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (dev->bus_reset)
+		dev->bus_reset = 0;
+
+	portsc1 = readl(&dev->op_regs->portsc1);
+	devlc = readl(&dev->op_regs->devlc);
+	VDBG(dev, "portsc1 = 0x%08x, devlc = 0x%08x\n",
+			portsc1, devlc);
+
+	/* bus reset is finished */
+	if (!(portsc1 & PORTS_PR)) {
+		/* get the speed */
+		speed = LPM_PSPD(devlc);
+		switch (speed) {
+		case LPM_SPEED_HIGH:
+			dev->gadget.speed = USB_SPEED_HIGH;
+			break;
+		case LPM_SPEED_FULL:
+			dev->gadget.speed = USB_SPEED_FULL;
+			break;
+		case LPM_SPEED_LOW:
+			dev->gadget.speed = USB_SPEED_LOW;
+			break;
+		default:
+			dev->gadget.speed = USB_SPEED_UNKNOWN;
+			break;
+		}
+		VDBG(dev, "speed = %d, dev->gadget.speed = %d\n",
+				speed, dev->gadget.speed);
+	}
+
+	/* LPM L0 to L1 */
+	if (dev->lpm && dev->lpm_state == LPM_L0)
+		if (portsc1 & PORTS_SUSP && portsc1 & PORTS_SLP) {
+				INFO(dev, "LPM L0 to L1\n");
+				dev->lpm_state = LPM_L1;
+		}
+
+	/* LPM L1 to L0, force resume or remote wakeup finished */
+	if (dev->lpm && dev->lpm_state == LPM_L1)
+		if (!(portsc1 & PORTS_SUSP)) {
+			if (portsc1 & PORTS_SLP)
+				INFO(dev, "LPM L1 to L0, force resume\n");
+			else
+				INFO(dev, "LPM L1 to L0, remote wakeup\n");
+
+			dev->lpm_state = LPM_L0;
+		}
+
+	/* update USB state */
+	if (!dev->resume_state)
+		dev->usb_state = USB_STATE_DEFAULT;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* USB reset interrupt handler */
+static void handle_usb_reset(struct langwell_udc *dev)
+{
+	u32		deviceaddr,
+			endptsetupstat,
+			endptcomplete;
+	unsigned long	timeout;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	/* Write-Clear the device address */
+	deviceaddr = readl(&dev->op_regs->deviceaddr);
+	writel(deviceaddr & ~USBADR_MASK, &dev->op_regs->deviceaddr);
+
+	dev->dev_addr = 0;
+
+	/* clear usb state */
+	dev->resume_state = 0;
+
+	/* LPM L1 to L0, reset */
+	if (dev->lpm)
+		dev->lpm_state = LPM_L0;
+
+	dev->ep0_dir = USB_DIR_OUT;
+	dev->ep0_state = WAIT_FOR_SETUP;
+	dev->remote_wakeup = 0;		/* default to 0 on reset */
+	dev->gadget.b_hnp_enable = 0;
+	dev->gadget.a_hnp_support = 0;
+	dev->gadget.a_alt_hnp_support = 0;
+
+	/* Write-Clear all the setup token semaphores */
+	endptsetupstat = readl(&dev->op_regs->endptsetupstat);
+	writel(endptsetupstat, &dev->op_regs->endptsetupstat);
+
+	/* Write-Clear all the endpoint complete status bits */
+	endptcomplete = readl(&dev->op_regs->endptcomplete);
+	writel(endptcomplete, &dev->op_regs->endptcomplete);
+
+	/* wait until all endptprime bits cleared */
+	timeout = jiffies + PRIME_TIMEOUT;
+	while (readl(&dev->op_regs->endptprime)) {
+		if (time_after(jiffies, timeout)) {
+			ERROR(dev, "USB reset timeout\n");
+			break;
+		}
+		cpu_relax();
+	}
+
+	/* write 1s to endptflush register to clear any primed buffers */
+	writel((u32) ~0, &dev->op_regs->endptflush);
+
+	if (readl(&dev->op_regs->portsc1) & PORTS_PR) {
+		VDBG(dev, "USB bus reset\n");
+		/* bus is reseting */
+		dev->bus_reset = 1;
+
+		/* reset all the queues, stop all USB activities */
+		stop_activity(dev, dev->driver);
+		dev->usb_state = USB_STATE_DEFAULT;
+	} else {
+		VDBG(dev, "device controller reset\n");
+		/* controller reset */
+		langwell_udc_reset(dev);
+
+		/* reset all the queues, stop all USB activities */
+		stop_activity(dev, dev->driver);
+
+		/* reset ep0 dQH and endptctrl */
+		ep0_reset(dev);
+
+		/* enable interrupt and set controller to run state */
+		langwell_udc_start(dev);
+
+		dev->usb_state = USB_STATE_ATTACHED;
+	}
+
+#ifdef	OTG_TRANSCEIVER
+	/* refer to USB OTG 6.6.2.3 b_hnp_en is cleared */
+	if (!dev->lotg->otg.default_a)
+		dev->lotg->hsm.b_hnp_enable = 0;
+#endif
+
+	VDBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* USB bus suspend/resume interrupt */
+static void handle_bus_suspend(struct langwell_udc *dev)
+{
+	u32		devlc;
+	DBG(dev, "---> %s()\n", __func__);
+
+	dev->resume_state = dev->usb_state;
+	dev->usb_state = USB_STATE_SUSPENDED;
+
+#ifdef	OTG_TRANSCEIVER
+	if (dev->lotg->otg.default_a) {
+		if (dev->lotg->hsm.b_bus_suspend_vld == 1) {
+			dev->lotg->hsm.b_bus_suspend = 1;
+			/* notify transceiver the state changes */
+			if (spin_trylock(&dev->lotg->wq_lock)) {
+				langwell_update_transceiver();
+				spin_unlock(&dev->lotg->wq_lock);
+			}
+		}
+		dev->lotg->hsm.b_bus_suspend_vld++;
+	} else {
+		if (!dev->lotg->hsm.a_bus_suspend) {
+			dev->lotg->hsm.a_bus_suspend = 1;
+			/* notify transceiver the state changes */
+			if (spin_trylock(&dev->lotg->wq_lock)) {
+				langwell_update_transceiver();
+				spin_unlock(&dev->lotg->wq_lock);
+			}
+		}
+	}
+#endif
+
+	/* report suspend to the driver */
+	if (dev->driver) {
+		if (dev->driver->suspend) {
+			spin_unlock(&dev->lock);
+			dev->driver->suspend(&dev->gadget);
+			spin_lock(&dev->lock);
+			DBG(dev, "suspend %s\n", dev->driver->driver.name);
+		}
+	}
+
+	/* enter PHY low power suspend */
+	devlc = readl(&dev->op_regs->devlc);
+	VDBG(dev, "devlc = 0x%08x\n", devlc);
+	devlc |= LPM_PHCD;
+	writel(devlc, &dev->op_regs->devlc);
+
+	DBG(dev, "<--- %s()\n", __func__);
+}
+
+
+static void handle_bus_resume(struct langwell_udc *dev)
+{
+	u32		devlc;
+	DBG(dev, "---> %s()\n", __func__);
+
+	dev->usb_state = dev->resume_state;
+	dev->resume_state = 0;
+
+	/* exit PHY low power suspend */
+	devlc = readl(&dev->op_regs->devlc);
+	VDBG(dev, "devlc = 0x%08x\n", devlc);
+	devlc &= ~LPM_PHCD;
+	writel(devlc, &dev->op_regs->devlc);
+
+#ifdef	OTG_TRANSCEIVER
+	if (dev->lotg->otg.default_a == 0)
+		dev->lotg->hsm.a_bus_suspend = 0;
+#endif
+
+	/* report resume to the driver */
+	if (dev->driver) {
+		if (dev->driver->resume) {
+			spin_unlock(&dev->lock);
+			dev->driver->resume(&dev->gadget);
+			spin_lock(&dev->lock);
+			DBG(dev, "resume %s\n", dev->driver->driver.name);
+		}
+	}
+
+	DBG(dev, "<--- %s()\n", __func__);
+}
+
+
+/* USB device controller interrupt handler */
+static irqreturn_t langwell_irq(int irq, void *_dev)
+{
+	struct langwell_udc	*dev = _dev;
+	u32			usbsts,
+				usbintr,
+				irq_sts,
+				portsc1;
+
+	VDBG(dev, "---> %s()\n", __func__);
+
+	if (dev->stopped) {
+		VDBG(dev, "handle IRQ_NONE\n");
+		VDBG(dev, "<--- %s()\n", __func__);
+		return IRQ_NONE;
+	}
+
+	spin_lock(&dev->lock);
+
+	/* USB status */
+	usbsts = readl(&dev->op_regs->usbsts);
+
+	/* USB interrupt enable */
+	usbintr = readl(&dev->op_regs->usbintr);
+
+	irq_sts = usbsts & usbintr;
+	VDBG(dev, "usbsts = 0x%08x, usbintr = 0x%08x, irq_sts = 0x%08x\n",
+			usbsts, usbintr, irq_sts);
+
+	if (!irq_sts) {
+		VDBG(dev, "handle IRQ_NONE\n");
+		VDBG(dev, "<--- %s()\n", __func__);
+		spin_unlock(&dev->lock);
+		return IRQ_NONE;
+	}
+
+	/* Write-Clear interrupt status bits */
+	writel(irq_sts, &dev->op_regs->usbsts);
+
+	/* resume from suspend */
+	portsc1 = readl(&dev->op_regs->portsc1);
+	if (dev->usb_state == USB_STATE_SUSPENDED)
+		if (!(portsc1 & PORTS_SUSP))
+			handle_bus_resume(dev);
+
+	/* USB interrupt */
+	if (irq_sts & STS_UI) {
+		VDBG(dev, "USB interrupt\n");
+
+		/* setup packet received from ep0 */
+		if (readl(&dev->op_regs->endptsetupstat)
+				& EP0SETUPSTAT_MASK) {
+			VDBG(dev, "USB SETUP packet received interrupt\n");
+			/* setup tripwire semaphone */
+			setup_tripwire(dev);
+			handle_setup_packet(dev, &dev->local_setup_buff);
+		}
+
+		/* USB transfer completion */
+		if (readl(&dev->op_regs->endptcomplete)) {
+			VDBG(dev, "USB transfer completion interrupt\n");
+			handle_trans_complete(dev);
+		}
+	}
+
+	/* SOF received interrupt (for ISO transfer) */
+	if (irq_sts & STS_SRI) {
+		/* FIXME */
+		/* VDBG(dev, "SOF received interrupt\n"); */
+	}
+
+	/* port change detect interrupt */
+	if (irq_sts & STS_PCI) {
+		VDBG(dev, "port change detect interrupt\n");
+		handle_port_change(dev);
+	}
+
+	/* suspend interrrupt */
+	if (irq_sts & STS_SLI) {
+		VDBG(dev, "suspend interrupt\n");
+		handle_bus_suspend(dev);
+	}
+
+	/* USB reset interrupt */
+	if (irq_sts & STS_URI) {
+		VDBG(dev, "USB reset interrupt\n");
+		handle_usb_reset(dev);
+	}
+
+	/* USB error or system error interrupt */
+	if (irq_sts & (STS_UEI | STS_SEI)) {
+		/* FIXME */
+		WARNING(dev, "error IRQ, irq_sts: %x\n", irq_sts);
+	}
+
+	spin_unlock(&dev->lock);
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return IRQ_HANDLED;
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+/* release device structure */
+static void gadget_release(struct device *_dev)
+{
+	struct langwell_udc	*dev = the_controller;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	complete(dev->done);
+
+	DBG(dev, "<--- %s()\n", __func__);
+	kfree(dev);
+}
+
+
+/* tear down the binding between this driver and the pci device */
+static void langwell_udc_remove(struct pci_dev *pdev)
+{
+	struct langwell_udc	*dev = the_controller;
+
+	DECLARE_COMPLETION(done);
+
+	BUG_ON(dev->driver);
+	DBG(dev, "---> %s()\n", __func__);
+
+	dev->done = &done;
+
+	/* free memory allocated in probe */
+	if (dev->dtd_pool)
+		dma_pool_destroy(dev->dtd_pool);
+
+	if (dev->status_req) {
+		kfree(dev->status_req->req.buf);
+		kfree(dev->status_req);
+	}
+
+	if (dev->ep_dqh)
+		dma_free_coherent(&pdev->dev, dev->ep_dqh_size,
+			dev->ep_dqh, dev->ep_dqh_dma);
+
+	kfree(dev->ep);
+
+	/* diable IRQ handler */
+	if (dev->got_irq)
+		free_irq(pdev->irq, dev);
+
+#ifndef	OTG_TRANSCEIVER
+	if (dev->cap_regs)
+		iounmap(dev->cap_regs);
+
+	if (dev->region)
+		release_mem_region(pci_resource_start(pdev, 0),
+				pci_resource_len(pdev, 0));
+
+	if (dev->enabled)
+		pci_disable_device(pdev);
+#else
+	if (dev->transceiver) {
+		otg_put_transceiver(dev->transceiver);
+		dev->transceiver = NULL;
+		dev->lotg = NULL;
+	}
+#endif
+
+	dev->cap_regs = NULL;
+
+	INFO(dev, "unbind\n");
+	DBG(dev, "<--- %s()\n", __func__);
+
+	device_unregister(&dev->gadget.dev);
+	device_remove_file(&pdev->dev, &dev_attr_langwell_udc);
+
+#ifndef	OTG_TRANSCEIVER
+	pci_set_drvdata(pdev, NULL);
+#endif
+
+	/* free dev, wait for the release() finished */
+	wait_for_completion(&done);
+
+	the_controller = NULL;
+}
+
+
+/*
+ * wrap this driver around the specified device, but
+ * don't respond over USB until a gadget driver binds to us.
+ */
+static int langwell_udc_probe(struct pci_dev *pdev,
+		const struct pci_device_id *id)
+{
+	struct langwell_udc	*dev;
+#ifndef	OTG_TRANSCEIVER
+	unsigned long		resource, len;
+#endif
+	void			__iomem *base = NULL;
+	size_t			size;
+	int			retval;
+
+	if (the_controller) {
+		dev_warn(&pdev->dev, "ignoring\n");
+		return -EBUSY;
+	}
+
+	/* alloc, and start init */
+	dev = kzalloc(sizeof *dev, GFP_KERNEL);
+	if (dev == NULL) {
+		retval = -ENOMEM;
+		goto error;
+	}
+
+	/* initialize device spinlock */
+	spin_lock_init(&dev->lock);
+
+	dev->pdev = pdev;
+	DBG(dev, "---> %s()\n", __func__);
+
+#ifdef	OTG_TRANSCEIVER
+	/* PCI device is already enabled by otg_transceiver driver */
+	dev->enabled = 1;
+
+	/* mem region and register base */
+	dev->region = 1;
+	dev->transceiver = otg_get_transceiver();
+	dev->lotg = otg_to_langwell(dev->transceiver);
+	base = dev->lotg->regs;
+#else
+	pci_set_drvdata(pdev, dev);
+
+	/* now all the pci goodies ... */
+	if (pci_enable_device(pdev) < 0) {
+		retval = -ENODEV;
+		goto error;
+	}
+	dev->enabled = 1;
+
+	/* control register: BAR 0 */
+	resource = pci_resource_start(pdev, 0);
+	len = pci_resource_len(pdev, 0);
+	if (!request_mem_region(resource, len, driver_name)) {
+		ERROR(dev, "controller already in use\n");
+		retval = -EBUSY;
+		goto error;
+	}
+	dev->region = 1;
+
+	base = ioremap_nocache(resource, len);
+#endif
+	if (base == NULL) {
+		ERROR(dev, "can't map memory\n");
+		retval = -EFAULT;
+		goto error;
+	}
+
+	dev->cap_regs = (struct langwell_cap_regs __iomem *) base;
+	VDBG(dev, "dev->cap_regs: %p\n", dev->cap_regs);
+	dev->op_regs = (struct langwell_op_regs __iomem *)
+		(base + OP_REG_OFFSET);
+	VDBG(dev, "dev->op_regs: %p\n", dev->op_regs);
+
+	/* irq setup after old hardware is cleaned up */
+	if (!pdev->irq) {
+		ERROR(dev, "No IRQ. Check PCI setup!\n");
+		retval = -ENODEV;
+		goto error;
+	}
+
+#ifndef	OTG_TRANSCEIVER
+	INFO(dev, "irq %d, io mem: 0x%08lx, len: 0x%08lx, pci mem 0x%p\n",
+			pdev->irq, resource, len, base);
+	/* enables bus-mastering for device dev */
+	pci_set_master(pdev);
+
+	if (request_irq(pdev->irq, langwell_irq, IRQF_SHARED,
+				driver_name, dev) != 0) {
+		ERROR(dev, "request interrupt %d failed\n", pdev->irq);
+		retval = -EBUSY;
+		goto error;
+	}
+	dev->got_irq = 1;
+#endif
+
+	/* set stopped bit */
+	dev->stopped = 1;
+
+	/* capabilities and endpoint number */
+	dev->lpm = (readl(&dev->cap_regs->hccparams) & HCC_LEN) ? 1 : 0;
+	dev->dciversion = readw(&dev->cap_regs->dciversion);
+	dev->devcap = (readl(&dev->cap_regs->dccparams) & DEVCAP) ? 1 : 0;
+	VDBG(dev, "dev->lpm: %d\n", dev->lpm);
+	VDBG(dev, "dev->dciversion: 0x%04x\n", dev->dciversion);
+	VDBG(dev, "dccparams: 0x%08x\n", readl(&dev->cap_regs->dccparams));
+	VDBG(dev, "dev->devcap: %d\n", dev->devcap);
+	if (!dev->devcap) {
+		ERROR(dev, "can't support device mode\n");
+		retval = -ENODEV;
+		goto error;
+	}
+
+	/* a pair of endpoints (out/in) for each address */
+	dev->ep_max = DEN(readl(&dev->cap_regs->dccparams)) * 2;
+	VDBG(dev, "dev->ep_max: %d\n", dev->ep_max);
+
+	/* allocate endpoints memory */
+	dev->ep = kzalloc(sizeof(struct langwell_ep) * dev->ep_max,
+			GFP_KERNEL);
+	if (!dev->ep) {
+		ERROR(dev, "allocate endpoints memory failed\n");
+		retval = -ENOMEM;
+		goto error;
+	}
+
+	/* allocate device dQH memory */
+	size = dev->ep_max * sizeof(struct langwell_dqh);
+	VDBG(dev, "orig size = %d\n", size);
+	if (size < DQH_ALIGNMENT)
+		size = DQH_ALIGNMENT;
+	else if ((size % DQH_ALIGNMENT) != 0) {
+		size += DQH_ALIGNMENT + 1;
+		size &= ~(DQH_ALIGNMENT - 1);
+	}
+	dev->ep_dqh = dma_alloc_coherent(&pdev->dev, size,
+					&dev->ep_dqh_dma, GFP_KERNEL);
+	if (!dev->ep_dqh) {
+		ERROR(dev, "allocate dQH memory failed\n");
+		retval = -ENOMEM;
+		goto error;
+	}
+	dev->ep_dqh_size = size;
+	VDBG(dev, "ep_dqh_size = %d\n", dev->ep_dqh_size);
+
+	/* initialize ep0 status request structure */
+	dev->status_req = kzalloc(sizeof(struct langwell_request), GFP_KERNEL);
+	if (!dev->status_req) {
+		ERROR(dev, "allocate status_req memory failed\n");
+		retval = -ENOMEM;
+		goto error;
+	}
+	INIT_LIST_HEAD(&dev->status_req->queue);
+
+	/* allocate a small amount of memory to get valid address */
+	dev->status_req->req.buf = kmalloc(8, GFP_KERNEL);
+	dev->status_req->req.dma = virt_to_phys(dev->status_req->req.buf);
+
+	dev->resume_state = USB_STATE_NOTATTACHED;
+	dev->usb_state = USB_STATE_POWERED;
+	dev->ep0_dir = USB_DIR_OUT;
+	dev->remote_wakeup = 0;	/* default to 0 on reset */
+
+#ifndef	OTG_TRANSCEIVER
+	/* reset device controller */
+	langwell_udc_reset(dev);
+#endif
+
+	/* initialize gadget structure */
+	dev->gadget.ops = &langwell_ops;	/* usb_gadget_ops */
+	dev->gadget.ep0 = &dev->ep[0].ep;	/* gadget ep0 */
+	INIT_LIST_HEAD(&dev->gadget.ep_list);	/* ep_list */
+	dev->gadget.speed = USB_SPEED_UNKNOWN;	/* speed */
+	dev->gadget.is_dualspeed = 1;		/* support dual speed */
+#ifdef	OTG_TRANSCEIVER
+	dev->gadget.is_otg = 1;			/* support otg mode */
+#endif
+
+	/* the "gadget" abstracts/virtualizes the controller */
+	dev_set_name(&dev->gadget.dev, "gadget");
+	dev->gadget.dev.parent = &pdev->dev;
+	dev->gadget.dev.dma_mask = pdev->dev.dma_mask;
+	dev->gadget.dev.release = gadget_release;
+	dev->gadget.name = driver_name;		/* gadget name */
+
+	/* controller endpoints reinit */
+	eps_reinit(dev);
+
+#ifndef	OTG_TRANSCEIVER
+	/* reset ep0 dQH and endptctrl */
+	ep0_reset(dev);
+#endif
+
+	/* create dTD dma_pool resource */
+	dev->dtd_pool = dma_pool_create("langwell_dtd",
+			&dev->pdev->dev,
+			sizeof(struct langwell_dtd),
+			DTD_ALIGNMENT,
+			DMA_BOUNDARY);
+
+	if (!dev->dtd_pool) {
+		retval = -ENOMEM;
+		goto error;
+	}
+
+	/* done */
+	INFO(dev, "%s\n", driver_desc);
+	INFO(dev, "irq %d, pci mem %p\n", pdev->irq, base);
+	INFO(dev, "Driver version: " DRIVER_VERSION "\n");
+	INFO(dev, "Support (max) %d endpoints\n", dev->ep_max);
+	INFO(dev, "Device interface version: 0x%04x\n", dev->dciversion);
+	INFO(dev, "Controller mode: %s\n", dev->devcap ? "Device" : "Host");
+	INFO(dev, "Support USB LPM: %s\n", dev->lpm ? "Yes" : "No");
+
+	VDBG(dev, "After langwell_udc_probe(), print all registers:\n");
+#ifdef	VERBOSE
+	print_all_registers(dev);
+#endif
+
+	the_controller = dev;
+
+	retval = device_register(&dev->gadget.dev);
+	if (retval)
+		goto error;
+
+	retval = device_create_file(&pdev->dev, &dev_attr_langwell_udc);
+	if (retval)
+		goto error;
+
+	VDBG(dev, "<--- %s()\n", __func__);
+	return 0;
+
+error:
+	if (dev) {
+		DBG(dev, "<--- %s()\n", __func__);
+		langwell_udc_remove(pdev);
+	}
+
+	return retval;
+}
+
+
+/* device controller suspend */
+static int langwell_udc_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct langwell_udc	*dev = the_controller;
+	u32			devlc;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	/* disable interrupt and set controller to stop state */
+	langwell_udc_stop(dev);
+
+	/* diable IRQ handler */
+	if (dev->got_irq)
+		free_irq(pdev->irq, dev);
+	dev->got_irq = 0;
+
+
+	/* save PCI state */
+	pci_save_state(pdev);
+
+	/* set device power state */
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	/* enter PHY low power suspend */
+	devlc = readl(&dev->op_regs->devlc);
+	VDBG(dev, "devlc = 0x%08x\n", devlc);
+	devlc |= LPM_PHCD;
+	writel(devlc, &dev->op_regs->devlc);
+
+	DBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* device controller resume */
+static int langwell_udc_resume(struct pci_dev *pdev)
+{
+	struct langwell_udc	*dev = the_controller;
+	u32			devlc;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	/* exit PHY low power suspend */
+	devlc = readl(&dev->op_regs->devlc);
+	VDBG(dev, "devlc = 0x%08x\n", devlc);
+	devlc &= ~LPM_PHCD;
+	writel(devlc, &dev->op_regs->devlc);
+
+	/* set device D0 power state */
+	pci_set_power_state(pdev, PCI_D0);
+
+	/* restore PCI state */
+	pci_restore_state(pdev);
+
+	/* enable IRQ handler */
+	if (request_irq(pdev->irq, langwell_irq, IRQF_SHARED, driver_name, dev)
+			!= 0) {
+		ERROR(dev, "request interrupt %d failed\n", pdev->irq);
+		return -1;
+	}
+	dev->got_irq = 1;
+
+	/* reset and start controller to run state */
+	if (dev->stopped) {
+		/* reset device controller */
+		langwell_udc_reset(dev);
+
+		/* reset ep0 dQH and endptctrl */
+		ep0_reset(dev);
+
+		/* start device if gadget is loaded */
+		if (dev->driver)
+			langwell_udc_start(dev);
+	}
+
+	/* reset USB status */
+	dev->usb_state = USB_STATE_ATTACHED;
+	dev->ep0_state = WAIT_FOR_SETUP;
+	dev->ep0_dir = USB_DIR_OUT;
+
+	DBG(dev, "<--- %s()\n", __func__);
+	return 0;
+}
+
+
+/* pci driver shutdown */
+static void langwell_udc_shutdown(struct pci_dev *pdev)
+{
+	struct langwell_udc	*dev = the_controller;
+	u32			usbmode;
+
+	DBG(dev, "---> %s()\n", __func__);
+
+	/* reset controller mode to IDLE */
+	usbmode = readl(&dev->op_regs->usbmode);
+	DBG(dev, "usbmode = 0x%08x\n", usbmode);
+	usbmode &= (~3 | MODE_IDLE);
+	writel(usbmode, &dev->op_regs->usbmode);
+
+	DBG(dev, "<--- %s()\n", __func__);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static const struct pci_device_id pci_ids[] = { {
+	.class =	((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class_mask =	~0,
+	.vendor =	0x8086,
+	.device =	0x0811,
+	.subvendor =	PCI_ANY_ID,
+	.subdevice =	PCI_ANY_ID,
+}, { /* end: all zeroes */ }
+};
+
+
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+
+static struct pci_driver langwell_pci_driver = {
+	.name =		(char *) driver_name,
+	.id_table =	pci_ids,
+
+	.probe =	langwell_udc_probe,
+	.remove =	langwell_udc_remove,
+
+	/* device controller suspend/resume */
+	.suspend =	langwell_udc_suspend,
+	.resume =	langwell_udc_resume,
+
+	.shutdown =	langwell_udc_shutdown,
+};
+
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Xiaochen Shen <xiaochen.shen@intel.com>");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+
+
+static int __init init(void)
+{
+#ifdef	OTG_TRANSCEIVER
+	return langwell_register_peripheral(&langwell_pci_driver);
+#else
+	return pci_register_driver(&langwell_pci_driver);
+#endif
+}
+module_init(init);
+
+
+static void __exit cleanup(void)
+{
+#ifdef	OTG_TRANSCEIVER
+	return langwell_unregister_peripheral(&langwell_pci_driver);
+#else
+	pci_unregister_driver(&langwell_pci_driver);
+#endif
+}
+module_exit(cleanup);
+
diff --git a/drivers/usb/gadget/langwell_udc.h b/drivers/usb/gadget/langwell_udc.h
new file mode 100644
index 00000000000..9719934e1c0
--- /dev/null
+++ b/drivers/usb/gadget/langwell_udc.h
@@ -0,0 +1,228 @@
+/*
+ * Intel Langwell USB Device Controller driver
+ * Copyright (C) 2008-2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/usb/langwell_udc.h>
+
+#if defined(CONFIG_USB_LANGWELL_OTG)
+#include <linux/usb/langwell_otg.h>
+#endif
+
+
+/*-------------------------------------------------------------------------*/
+
+/* driver data structures and utilities */
+
+/*
+ * dTD: Device Endpoint Transfer Descriptor
+ * describe to the device controller the location and quantity of
+ * data to be send/received for given transfer
+ */
+struct langwell_dtd {
+	u32	dtd_next;
+/* bits 31:5, next transfer element pointer */
+#define	DTD_NEXT(d)	(((d)>>5)&0x7ffffff)
+#define	DTD_NEXT_MASK	(0x7ffffff << 5)
+/* terminate */
+#define	DTD_TERM	BIT(0)
+	/* bits 7:0, execution back states */
+	u32	dtd_status:8;
+#define	DTD_STATUS(d)	(((d)>>0)&0xff)
+#define	DTD_STS_ACTIVE	BIT(7)	/* active */
+#define	DTD_STS_HALTED	BIT(6)	/* halted */
+#define	DTD_STS_DBE	BIT(5)	/* data buffer error */
+#define	DTD_STS_TRE	BIT(3)	/* transaction error  */
+	/* bits 9:8 */
+	u32	dtd_res0:2;
+	/* bits 11:10, multipier override */
+	u32	dtd_multo:2;
+#define	DTD_MULTO	(BIT(11) | BIT(10))
+	/* bits 14:12 */
+	u32	dtd_res1:3;
+	/* bit 15, interrupt on complete */
+	u32	dtd_ioc:1;
+#define	DTD_IOC		BIT(15)
+	/* bits 30:16, total bytes */
+	u32	dtd_total:15;
+#define	DTD_TOTAL(d)	(((d)>>16)&0x7fff)
+#define	DTD_MAX_TRANSFER_LENGTH	0x4000
+	/* bit 31 */
+	u32	dtd_res2:1;
+	/* dTD buffer pointer page 0 to 4 */
+	u32	dtd_buf[5];
+#define	DTD_OFFSET_MASK	0xfff
+/* bits 31:12, buffer pointer */
+#define	DTD_BUFFER(d)	(((d)>>12)&0x3ff)
+/* bits 11:0, current offset */
+#define	DTD_C_OFFSET(d)	(((d)>>0)&0xfff)
+/* bits 10:0, frame number */
+#define	DTD_FRAME(d)	(((d)>>0)&0x7ff)
+
+	/* driver-private parts */
+
+	/* dtd dma address */
+	dma_addr_t		dtd_dma;
+	/* next dtd virtual address */
+	struct langwell_dtd	*next_dtd_virt;
+};
+
+
+/*
+ * dQH: Device Endpoint Queue Head
+ * describe where all transfers are managed
+ * 48-byte data structure, aligned on 64-byte boundary
+ *
+ * These are associated with dTD structure
+ */
+struct langwell_dqh {
+	/* endpoint capabilities and characteristics */
+	u32	dqh_res0:15;	/* bits 14:0 */
+	u32	dqh_ios:1;	/* bit 15, interrupt on setup */
+#define	DQH_IOS		BIT(15)
+	u32	dqh_mpl:11;	/* bits 26:16, maximum packet length */
+#define	DQH_MPL		(0x7ff << 16)
+	u32	dqh_res1:2;	/* bits 28:27 */
+	u32	dqh_zlt:1;	/* bit 29, zero length termination */
+#define	DQH_ZLT		BIT(29)
+	u32	dqh_mult:2;	/* bits 31:30 */
+#define	DQH_MULT	(BIT(30) | BIT(31))
+
+	/* current dTD pointer */
+	u32	dqh_current;	/* locate the transfer in progress */
+#define DQH_C_DTD(e)	\
+	(((e)>>5)&0x7ffffff)	/* bits 31:5, current dTD pointer */
+
+	/* transfer overlay, hardware parts of a struct langwell_dtd */
+	u32	dtd_next;
+	u32	dtd_status:8;	/* bits 7:0, execution back states */
+	u32	dtd_res0:2;	/* bits 9:8 */
+	u32	dtd_multo:2;	/* bits 11:10, multipier override */
+	u32	dtd_res1:3;	/* bits 14:12 */
+	u32	dtd_ioc:1;	/* bit 15, interrupt on complete */
+	u32	dtd_total:15;	/* bits 30:16, total bytes */
+	u32	dtd_res2:1;	/* bit 31 */
+	u32	dtd_buf[5];	/* dTD buffer pointer page 0 to 4 */
+
+	u32	dqh_res2;
+	struct usb_ctrlrequest	dqh_setup;	/* setup packet buffer */
+} __attribute__ ((aligned(64)));
+
+
+/* endpoint data structure */
+struct langwell_ep {
+	struct usb_ep		ep;
+	dma_addr_t		dma;
+	struct langwell_udc	*dev;
+	unsigned long		irqs;
+	struct list_head	queue;
+	struct langwell_dqh	*dqh;
+	const struct usb_endpoint_descriptor	*desc;
+	char			name[14];
+	unsigned		stopped:1,
+				ep_type:2,
+				ep_num:8;
+};
+
+
+/* request data structure */
+struct langwell_request {
+	struct usb_request	req;
+	struct langwell_dtd	*dtd, *head, *tail;
+	struct langwell_ep	*ep;
+	dma_addr_t		dtd_dma;
+	struct list_head	queue;
+	unsigned		dtd_count;
+	unsigned		mapped:1;
+};
+
+
+/* ep0 transfer state */
+enum ep0_state {
+	WAIT_FOR_SETUP,
+	DATA_STATE_XMIT,
+	DATA_STATE_NEED_ZLP,
+	WAIT_FOR_OUT_STATUS,
+	DATA_STATE_RECV,
+};
+
+
+/* device suspend state */
+enum lpm_state {
+	LPM_L0,	/* on */
+	LPM_L1,	/* LPM L1 sleep */
+	LPM_L2,	/* suspend */
+	LPM_L3,	/* off */
+};
+
+
+/* device data structure */
+struct langwell_udc {
+	/* each pci device provides one gadget, several endpoints */
+	struct usb_gadget	gadget;
+	spinlock_t		lock;	/* device lock */
+	struct langwell_ep	*ep;
+	struct usb_gadget_driver	*driver;
+	struct otg_transceiver	*transceiver;
+	u8			dev_addr;
+	u32			usb_state;
+	u32			resume_state;
+	u32			bus_reset;
+	enum lpm_state		lpm_state;
+	enum ep0_state		ep0_state;
+	u32			ep0_dir;
+	u16			dciversion;
+	unsigned		ep_max;
+	unsigned		devcap:1,
+				enabled:1,
+				region:1,
+				got_irq:1,
+				powered:1,
+				remote_wakeup:1,
+				rate:1,
+				is_reset:1,
+				softconnected:1,
+				vbus_active:1,
+				suspended:1,
+				stopped:1,
+				lpm:1;	/* LPM capability */
+
+	/* pci state used to access those endpoints */
+	struct pci_dev		*pdev;
+
+	/* Langwell otg transceiver */
+	struct langwell_otg	*lotg;
+
+	/* control registers */
+	struct langwell_cap_regs	__iomem	*cap_regs;
+	struct langwell_op_regs		__iomem	*op_regs;
+
+	struct usb_ctrlrequest	local_setup_buff;
+	struct langwell_dqh	*ep_dqh;
+	size_t			ep_dqh_size;
+	dma_addr_t		ep_dqh_dma;
+
+	/* ep0 status request */
+	struct langwell_request	*status_req;
+
+	/* dma pool */
+	struct dma_pool		*dtd_pool;
+
+	/* make sure release() is done */
+	struct completion	*done;
+};
+
diff --git a/include/linux/usb/langwell_udc.h b/include/linux/usb/langwell_udc.h
new file mode 100644
index 00000000000..c949178a653
--- /dev/null
+++ b/include/linux/usb/langwell_udc.h
@@ -0,0 +1,310 @@
+/*
+ * Intel Langwell USB Device Controller driver
+ * Copyright (C) 2008-2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef __LANGWELL_UDC_H
+#define __LANGWELL_UDC_H
+
+
+/* MACRO defines */
+#define	CAP_REG_OFFSET		0x0
+#define	OP_REG_OFFSET		0x28
+
+#define	DMA_ADDR_INVALID	(~(dma_addr_t)0)
+
+#define	DQH_ALIGNMENT		2048
+#define	DTD_ALIGNMENT		64
+#define	DMA_BOUNDARY		4096
+
+#define	EP0_MAX_PKT_SIZE	64
+#define EP_DIR_IN		1
+#define EP_DIR_OUT		0
+
+#define FLUSH_TIMEOUT		1000
+#define RESET_TIMEOUT		1000
+#define SETUPSTAT_TIMEOUT	100
+#define PRIME_TIMEOUT		100
+
+
+/* device memory space registers */
+
+/* Capability Registers, BAR0 + CAP_REG_OFFSET */
+struct langwell_cap_regs {
+	/* offset: 0x0 */
+	u8	caplength;	/* offset of Operational Register */
+	u8	_reserved3;
+	u16	hciversion;	/* H: BCD encoding of host version */
+	u32	hcsparams;	/* H: host port steering logic capability */
+	u32	hccparams;	/* H: host multiple mode control capability */
+#define	HCC_LEN	BIT(17)		/* Link power management (LPM) capability */
+	u8	_reserved4[0x20-0xc];
+	/* offset: 0x20 */
+	u16	dciversion;	/* BCD encoding of device version */
+	u8	_reserved5[0x24-0x22];
+	u32	dccparams;	/* overall device controller capability */
+#define	HOSTCAP	BIT(8)		/* host capable */
+#define	DEVCAP	BIT(7)		/* device capable */
+#define DEN(d)	\
+	(((d)>>0)&0x1f)		/* bits 4:0, device endpoint number */
+} __attribute__ ((packed));
+
+
+/* Operational Registers, BAR0 + OP_REG_OFFSET */
+struct langwell_op_regs {
+	/* offset: 0x28 */
+	u32	extsts;
+#define	EXTS_TI1	BIT(4)	/* general purpose timer interrupt 1 */
+#define	EXTS_TI1TI0	BIT(3)	/* general purpose timer interrupt 0 */
+#define	EXTS_TI1UPI	BIT(2)	/* USB host periodic interrupt */
+#define	EXTS_TI1UAI	BIT(1)	/* USB host asynchronous interrupt */
+#define	EXTS_TI1NAKI	BIT(0)	/* NAK interrupt */
+	u32	extintr;
+#define	EXTI_TIE1	BIT(4)	/* general purpose timer interrupt enable 1 */
+#define	EXTI_TIE0	BIT(3)	/* general purpose timer interrupt enable 0 */
+#define	EXTI_UPIE	BIT(2)	/* USB host periodic interrupt enable */
+#define	EXTI_UAIE	BIT(1)	/* USB host asynchronous interrupt enable */
+#define	EXTI_NAKE	BIT(0)	/* NAK interrupt enable */
+	/* offset: 0x30 */
+	u32	usbcmd;
+#define	CMD_HIRD(u)	\
+	(((u)>>24)&0xf)		/* bits 27:24, host init resume duration */
+#define	CMD_ITC(u)	\
+	(((u)>>16)&0xff)	/* bits 23:16, interrupt threshold control */
+#define	CMD_PPE		BIT(15)	/* per-port change events enable */
+#define	CMD_ATDTW	BIT(14)	/* add dTD tripwire */
+#define	CMD_SUTW	BIT(13)	/* setup tripwire */
+#define	CMD_ASPE	BIT(11) /* asynchronous schedule park mode enable */
+#define	CMD_FS2		BIT(10)	/* frame list size */
+#define	CMD_ASP1	BIT(9)	/* asynchronous schedule park mode count */
+#define	CMD_ASP0	BIT(8)
+#define	CMD_LR		BIT(7)	/* light host/device controller reset */
+#define	CMD_IAA		BIT(6)	/* interrupt on async advance doorbell */
+#define	CMD_ASE		BIT(5)	/* asynchronous schedule enable */
+#define	CMD_PSE		BIT(4)	/* periodic schedule enable */
+#define	CMD_FS1		BIT(3)
+#define	CMD_FS0		BIT(2)
+#define	CMD_RST		BIT(1)	/* controller reset */
+#define	CMD_RUNSTOP	BIT(0)	/* run/stop */
+	u32	usbsts;
+#define	STS_PPCI(u)	\
+	(((u)>>16)&0xffff)	/* bits 31:16, port-n change detect */
+#define	STS_AS		BIT(15)	/* asynchronous schedule status */
+#define	STS_PS		BIT(14)	/* periodic schedule status */
+#define	STS_RCL		BIT(13)	/* reclamation */
+#define	STS_HCH		BIT(12)	/* HC halted */
+#define	STS_ULPII	BIT(10)	/* ULPI interrupt */
+#define	STS_SLI		BIT(8)	/* DC suspend */
+#define	STS_SRI		BIT(7)	/* SOF received */
+#define	STS_URI		BIT(6)	/* USB reset received */
+#define	STS_AAI		BIT(5)	/* interrupt on async advance */
+#define	STS_SEI		BIT(4)	/* system error */
+#define	STS_FRI		BIT(3)	/* frame list rollover */
+#define	STS_PCI		BIT(2)	/* port change detect */
+#define	STS_UEI		BIT(1)	/* USB error interrupt */
+#define	STS_UI		BIT(0)	/* USB interrupt */
+	u32	usbintr;
+/* bits 31:16, per-port interrupt enable */
+#define	INTR_PPCE(u)	(((u)>>16)&0xffff)
+#define	INTR_ULPIE	BIT(10)	/* ULPI enable */
+#define	INTR_SLE	BIT(8)	/* DC sleep/suspend enable */
+#define	INTR_SRE	BIT(7)	/* SOF received enable */
+#define	INTR_URE	BIT(6)	/* USB reset enable */
+#define	INTR_AAE	BIT(5)	/* interrupt on async advance enable */
+#define	INTR_SEE	BIT(4)	/* system error enable */
+#define	INTR_FRE	BIT(3)	/* frame list rollover enable */
+#define	INTR_PCE	BIT(2)	/* port change detect enable */
+#define	INTR_UEE	BIT(1)	/* USB error interrupt enable */
+#define	INTR_UE		BIT(0)	/* USB interrupt enable */
+	u32	frindex;	/* frame index */
+#define	FRINDEX_MASK	(0x3fff << 0)
+	u32	ctrldssegment;	/* not used */
+	u32	deviceaddr;
+#define USBADR_SHIFT	25
+#define	USBADR(d)	\
+	(((d)>>25)&0x7f)	/* bits 31:25, device address */
+#define USBADR_MASK	(0x7f << 25)
+#define	USBADRA		BIT(24)	/* device address advance */
+	u32	endpointlistaddr;/* endpoint list top memory address */
+/* bits 31:11, endpoint list pointer */
+#define	EPBASE(d)	(((d)>>11)&0x1fffff)
+#define	ENDPOINTLISTADDR_MASK	(0x1fffff << 11)
+	u32	ttctrl;		/* H: TT operatin, not used */
+	/* offset: 0x50 */
+	u32	burstsize;	/* burst size of data movement */
+#define	TXPBURST(b)	\
+	(((b)>>8)&0xff)		/* bits 15:8, TX burst length */
+#define	RXPBURST(b)	\
+	(((b)>>0)&0xff)		/* bits 7:0, RX burst length */
+	u32	txfilltuning;	/* TX tuning */
+	u32	txttfilltuning;	/* H: TX TT tuning */
+	u32	ic_usb;		/* control the IC_USB FS/LS transceiver */
+	/* offset: 0x60 */
+	u32	ulpi_viewport;	/* indirect access to ULPI PHY */
+#define	ULPIWU		BIT(31)	/* ULPI wakeup */
+#define	ULPIRUN		BIT(30)	/* ULPI read/write run */
+#define	ULPIRW		BIT(29)	/* ULPI read/write control */
+#define	ULPISS		BIT(27)	/* ULPI sync state */
+#define	ULPIPORT(u)	\
+	(((u)>>24)&7)		/* bits 26:24, ULPI port number */
+#define	ULPIADDR(u)	\
+	(((u)>>16)&0xff)	/* bits 23:16, ULPI data address */
+#define	ULPIDATRD(u)	\
+	(((u)>>8)&0xff)		/* bits 15:8, ULPI data read */
+#define	ULPIDATWR(u)	\
+	(((u)>>0)&0xff)		/* bits 7:0, ULPI date write */
+	u8	_reserved6[0x70-0x64];
+	/* offset: 0x70 */
+	u32	configflag;	/* H: not used */
+	u32	portsc1;	/* port status */
+#define	DA(p)	\
+	(((p)>>25)&0x7f)	/* bits 31:25, device address */
+#define	PORTS_SSTS	(BIT(24) | BIT(23))	/* suspend status */
+#define	PORTS_WKOC	BIT(22)	/* wake on over-current enable */
+#define	PORTS_WKDS	BIT(21)	/* wake on disconnect enable */
+#define	PORTS_WKCN	BIT(20)	/* wake on connect enable */
+#define	PORTS_PTC(p)	(((p)>>16)&0xf)	/* bits 19:16, port test control */
+#define	PORTS_PIC	(BIT(15) | BIT(14))	/* port indicator control */
+#define	PORTS_PO	BIT(13)	/* port owner */
+#define	PORTS_PP	BIT(12)	/* port power */
+#define	PORTS_LS	(BIT(11) | BIT(10)) 	/* line status */
+#define	PORTS_SLP	BIT(9)	/* suspend using L1 */
+#define	PORTS_PR	BIT(8)	/* port reset */
+#define	PORTS_SUSP	BIT(7)	/* suspend */
+#define	PORTS_FPR	BIT(6)	/* force port resume */
+#define	PORTS_OCC	BIT(5)	/* over-current change */
+#define	PORTS_OCA	BIT(4)	/* over-current active */
+#define	PORTS_PEC	BIT(3)	/* port enable/disable change */
+#define	PORTS_PE	BIT(2)	/* port enable/disable */
+#define	PORTS_CSC	BIT(1)	/* connect status change */
+#define	PORTS_CCS	BIT(0)	/* current connect status */
+	u8	_reserved7[0xb4-0x78];
+	/* offset: 0xb4 */
+	u32	devlc;		/* control LPM and each USB port behavior */
+/* bits 31:29, parallel transceiver select */
+#define	LPM_PTS(d)	(((d)>>29)&7)
+#define	LPM_STS		BIT(28)	/* serial transceiver select */
+#define	LPM_PTW		BIT(27)	/* parallel transceiver width */
+#define	LPM_PSPD(d)	(((d)>>25)&3)	/* bits 26:25, port speed */
+#define LPM_PSPD_MASK	(BIT(26) | BIT(25))
+#define LPM_SPEED_FULL	0
+#define LPM_SPEED_LOW	1
+#define LPM_SPEED_HIGH	2
+#define	LPM_SRT		BIT(24)	/* shorten reset time */
+#define	LPM_PFSC	BIT(23)	/* port force full speed connect */
+#define	LPM_PHCD	BIT(22) /* PHY low power suspend clock disable */
+#define	LPM_STL		BIT(16)	/* STALL reply to LPM token */
+#define	LPM_BA(d)	\
+	(((d)>>1)&0x7ff)	/* bits 11:1, BmAttributes */
+#define	LPM_NYT_ACK	BIT(0)	/* NYET/ACK reply to LPM token */
+	u8	_reserved8[0xf4-0xb8];
+	/* offset: 0xf4 */
+	u32	otgsc;		/* On-The-Go status and control */
+#define	OTGSC_DPIE	BIT(30)	/* data pulse interrupt enable */
+#define	OTGSC_MSE	BIT(29)	/* 1 ms timer interrupt enable */
+#define	OTGSC_BSEIE	BIT(28)	/* B session end interrupt enable */
+#define	OTGSC_BSVIE	BIT(27)	/* B session valid interrupt enable */
+#define	OTGSC_ASVIE	BIT(26)	/* A session valid interrupt enable */
+#define	OTGSC_AVVIE	BIT(25)	/* A VBUS valid interrupt enable */
+#define	OTGSC_IDIE	BIT(24)	/* USB ID interrupt enable */
+#define	OTGSC_DPIS	BIT(22)	/* data pulse interrupt status */
+#define	OTGSC_MSS	BIT(21)	/* 1 ms timer interrupt status */
+#define	OTGSC_BSEIS	BIT(20)	/* B session end interrupt status */
+#define	OTGSC_BSVIS	BIT(19)	/* B session valid interrupt status */
+#define	OTGSC_ASVIS	BIT(18)	/* A session valid interrupt status */
+#define	OTGSC_AVVIS	BIT(17)	/* A VBUS valid interrupt status */
+#define	OTGSC_IDIS	BIT(16)	/* USB ID interrupt status */
+#define	OTGSC_DPS	BIT(14)	/* data bus pulsing status */
+#define	OTGSC_MST	BIT(13)	/* 1 ms timer toggle */
+#define	OTGSC_BSE	BIT(12)	/* B session end */
+#define	OTGSC_BSV	BIT(11)	/* B session valid */
+#define	OTGSC_ASV	BIT(10)	/* A session valid */
+#define	OTGSC_AVV	BIT(9)	/* A VBUS valid */
+#define	OTGSC_USBID	BIT(8)	/* USB ID */
+#define	OTGSC_HABA	BIT(7)	/* hw assist B-disconnect to A-connect */
+#define	OTGSC_HADP	BIT(6)	/* hw assist data pulse */
+#define	OTGSC_IDPU	BIT(5)	/* ID pullup */
+#define	OTGSC_DP	BIT(4)	/* data pulsing */
+#define	OTGSC_OT	BIT(3)	/* OTG termination */
+#define	OTGSC_HAAR	BIT(2)	/* hw assist auto reset */
+#define	OTGSC_VC	BIT(1)	/* VBUS charge */
+#define	OTGSC_VD	BIT(0)	/* VBUS discharge */
+	u32	usbmode;
+#define	MODE_VBPS	BIT(5)	/* R/W VBUS power select */
+#define	MODE_SDIS	BIT(4)	/* R/W stream disable mode */
+#define	MODE_SLOM	BIT(3)	/* R/W setup lockout mode */
+#define	MODE_ENSE	BIT(2)	/* endian select */
+#define	MODE_CM(u)	(((u)>>0)&3)	/* bits 1:0, controller mode */
+#define	MODE_IDLE	0
+#define	MODE_DEVICE	2
+#define	MODE_HOST	3
+	u8	_reserved9[0x100-0xfc];
+	/* offset: 0x100 */
+	u32	endptnak;
+#define	EPTN(e)		\
+	(((e)>>16)&0xffff)	/* bits 31:16, TX endpoint NAK */
+#define	EPRN(e)		\
+	(((e)>>0)&0xffff)	/* bits 15:0, RX endpoint NAK */
+	u32	endptnaken;
+#define	EPTNE(e)	\
+	(((e)>>16)&0xffff)	/* bits 31:16, TX endpoint NAK enable */
+#define	EPRNE(e)	\
+	(((e)>>0)&0xffff)	/* bits 15:0, RX endpoint NAK enable */
+	u32	endptsetupstat;
+#define	SETUPSTAT_MASK		(0xffff << 0)	/* bits 15:0 */
+#define EP0SETUPSTAT_MASK	1
+	u32	endptprime;
+/* bits 31:16, prime endpoint transmit buffer */
+#define	PETB(e)		(((e)>>16)&0xffff)
+/* bits 15:0, prime endpoint receive buffer */
+#define	PERB(e)		(((e)>>0)&0xffff)
+	/* offset: 0x110 */
+	u32	endptflush;
+/* bits 31:16, flush endpoint transmit buffer */
+#define	FETB(e)		(((e)>>16)&0xffff)
+/* bits 15:0, flush endpoint receive buffer */
+#define	FERB(e)		(((e)>>0)&0xffff)
+	u32	endptstat;
+/* bits 31:16, endpoint transmit buffer ready */
+#define	ETBR(e)		(((e)>>16)&0xffff)
+/* bits 15:0, endpoint receive buffer ready */
+#define	ERBR(e)		(((e)>>0)&0xffff)
+	u32	endptcomplete;
+/* bits 31:16, endpoint transmit complete event */
+#define	ETCE(e)		(((e)>>16)&0xffff)
+/* bits 15:0, endpoint receive complete event */
+#define	ERCE(e)		(((e)>>0)&0xffff)
+	/* offset: 0x11c */
+	u32	endptctrl[16];
+#define	EPCTRL_TXE	BIT(23)	/* TX endpoint enable */
+#define	EPCTRL_TXR	BIT(22)	/* TX data toggle reset */
+#define	EPCTRL_TXI	BIT(21)	/* TX data toggle inhibit */
+#define	EPCTRL_TXT(e)	(((e)>>18)&3)	/* bits 19:18, TX endpoint type */
+#define	EPCTRL_TXT_SHIFT	18
+#define	EPCTRL_TXD	BIT(17)	/* TX endpoint data source */
+#define	EPCTRL_TXS	BIT(16)	/* TX endpoint STALL */
+#define	EPCTRL_RXE	BIT(7)	/* RX endpoint enable */
+#define	EPCTRL_RXR	BIT(6)	/* RX data toggle reset */
+#define	EPCTRL_RXI	BIT(5)	/* RX data toggle inhibit */
+#define	EPCTRL_RXT(e)	(((e)>>2)&3)	/* bits 3:2, RX endpoint type */
+#define	EPCTRL_RXT_SHIFT	2	/* bits 19:18, TX endpoint type */
+#define	EPCTRL_RXD	BIT(1)	/* RX endpoint data sink */
+#define	EPCTRL_RXS	BIT(0)	/* RX endpoint STALL */
+} __attribute__ ((packed));
+
+#endif /* __LANGWELL_UDC_H */
+
-- 
cgit v1.2.3-70-g09d2


From 453f77558810ffa669ed5a510a7173ec49def396 Mon Sep 17 00:00:00 2001
From: Hao Wu <hao.wu@intel.com>
Date: Thu, 4 Jun 2009 16:06:50 +0800
Subject: USB: Add Intel Langwell USB OTG Transceiver Drive

Description:
This driver is used for Intel Langwell* USB OTG controller in Intel
Moorestown* platform. It tries to implement host/device role switch
according to OTG spec.  The actual hsot and device functions are
accomplished in modified EHCI driver and Intel Langwell USB OTG client
controller driver.

* Langwell and Moorestown are names used in development. They are not
  approved official name.

Note:
This patch is the first version Intel Langwell USB OTG Transceiver
driver. The development is not finished, and the bug fixing is on going
for some hardware and software issues. The main purpose of this
submission is for code view.

Supported features:
- Data-line Pulsing SRP
- Support HNP to switch roles
- PCI D0/D3 power management support

Known issues:
- HNP is only tested with another Moorestown platform.
- PCI D0/D3 power management support is not fully tested.
- VBus Pulsing SRP is not support in current version.

Signed-off-by: Hao Wu <hao.wu@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig          |   14 +
 drivers/usb/otg/Makefile         |    1 +
 drivers/usb/otg/langwell_otg.c   | 1915 ++++++++++++++++++++++++++++++++++++++
 include/linux/usb/langwell_otg.h |  177 ++++
 4 files changed, 2107 insertions(+)
 create mode 100644 drivers/usb/otg/langwell_otg.c
 create mode 100644 include/linux/usb/langwell_otg.h

(limited to 'include')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index aa884d072f0..69feeec1628 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -59,4 +59,18 @@ config NOP_USB_XCEIV
 	 built-in with usb ip or which are autonomous and doesn't require any
 	 phy programming such as ISP1x04 etc.
 
+config USB_LANGWELL_OTG
+	tristate "Intel Langwell USB OTG dual-role support"
+	depends on USB && MRST
+	select USB_OTG
+	select USB_OTG_UTILS
+	help
+	  Say Y here if you want to build Intel Langwell USB OTG
+	  transciever driver in kernel. This driver implements role
+	  switch between EHCI host driver and Langwell USB OTG
+	  client driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called langwell_otg.
+
 endif # USB || OTG
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index 20816785652..6d1abdd3c0a 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_USB_OTG_UTILS)	+= otg.o
 obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
+obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 
 ccflags-$(CONFIG_USB_DEBUG)	+= -DDEBUG
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
new file mode 100644
index 00000000000..6f628d0e9f3
--- /dev/null
+++ b/drivers/usb/otg/langwell_otg.c
@@ -0,0 +1,1915 @@
+/*
+ * Intel Langwell USB OTG transceiver driver
+ * Copyright (C) 2008 - 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+/* This driver helps to switch Langwell OTG controller function between host
+ * and peripheral. It works with EHCI driver and Langwell client controller
+ * driver together.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/moduleparam.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/gadget.h>
+#include <linux/usb.h>
+#include <linux/usb/otg.h>
+#include <linux/notifier.h>
+#include <asm/ipc_defs.h>
+#include <linux/delay.h>
+#include "../core/hcd.h"
+
+#include <linux/usb/langwell_otg.h>
+
+#define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
+#define	DRIVER_VERSION		"3.0.0.32L.0002"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+
+static const char driver_name[] = "langwell_otg";
+
+static int langwell_otg_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id);
+static void langwell_otg_remove(struct pci_dev *pdev);
+static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message);
+static int langwell_otg_resume(struct pci_dev *pdev);
+
+static int langwell_otg_set_host(struct otg_transceiver *otg,
+				struct usb_bus *host);
+static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
+				struct usb_gadget *gadget);
+static int langwell_otg_start_srp(struct otg_transceiver *otg);
+
+static const struct pci_device_id pci_ids[] = {{
+	.class =        ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	.class_mask =   ~0,
+	.vendor =	0x8086,
+	.device =	0x0811,
+	.subvendor =	PCI_ANY_ID,
+	.subdevice =	PCI_ANY_ID,
+}, { /* end: all zeroes */ }
+};
+
+static struct pci_driver otg_pci_driver = {
+	.name =		(char *) driver_name,
+	.id_table =	pci_ids,
+
+	.probe =	langwell_otg_probe,
+	.remove =	langwell_otg_remove,
+
+	.suspend =	langwell_otg_suspend,
+	.resume =	langwell_otg_resume,
+};
+
+static const char *state_string(enum usb_otg_state state)
+{
+	switch (state) {
+	case OTG_STATE_A_IDLE:
+		return "a_idle";
+	case OTG_STATE_A_WAIT_VRISE:
+		return "a_wait_vrise";
+	case OTG_STATE_A_WAIT_BCON:
+		return "a_wait_bcon";
+	case OTG_STATE_A_HOST:
+		return "a_host";
+	case OTG_STATE_A_SUSPEND:
+		return "a_suspend";
+	case OTG_STATE_A_PERIPHERAL:
+		return "a_peripheral";
+	case OTG_STATE_A_WAIT_VFALL:
+		return "a_wait_vfall";
+	case OTG_STATE_A_VBUS_ERR:
+		return "a_vbus_err";
+	case OTG_STATE_B_IDLE:
+		return "b_idle";
+	case OTG_STATE_B_SRP_INIT:
+		return "b_srp_init";
+	case OTG_STATE_B_PERIPHERAL:
+		return "b_peripheral";
+	case OTG_STATE_B_WAIT_ACON:
+		return "b_wait_acon";
+	case OTG_STATE_B_HOST:
+		return "b_host";
+	default:
+		return "UNDEFINED";
+	}
+}
+
+/* HSM timers */
+static inline struct langwell_otg_timer *otg_timer_initializer
+(void (*function)(unsigned long), unsigned long expires, unsigned long data)
+{
+	struct langwell_otg_timer *timer;
+	timer = kmalloc(sizeof(struct langwell_otg_timer), GFP_KERNEL);
+	timer->function = function;
+	timer->expires = expires;
+	timer->data = data;
+	return timer;
+}
+
+static struct langwell_otg_timer *a_wait_vrise_tmr, *a_wait_bcon_tmr,
+	*a_aidl_bdis_tmr, *b_ase0_brst_tmr, *b_se0_srp_tmr, *b_srp_res_tmr,
+	*b_bus_suspend_tmr;
+
+static struct list_head active_timers;
+
+static struct langwell_otg *the_transceiver;
+
+/* host/client notify transceiver when event affects HNP state */
+void langwell_update_transceiver()
+{
+	otg_dbg("transceiver driver is notified\n");
+	queue_work(the_transceiver->qwork, &the_transceiver->work);
+}
+EXPORT_SYMBOL(langwell_update_transceiver);
+
+static int langwell_otg_set_host(struct otg_transceiver *otg,
+					struct usb_bus *host)
+{
+	otg->host = host;
+
+	return 0;
+}
+
+static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
+					struct usb_gadget *gadget)
+{
+	otg->gadget = gadget;
+
+	return 0;
+}
+
+static int langwell_otg_set_power(struct otg_transceiver *otg,
+				unsigned mA)
+{
+	return 0;
+}
+
+/* A-device drives vbus, controlled through PMIC CHRGCNTL register*/
+static void langwell_otg_drv_vbus(int on)
+{
+	struct ipc_pmic_reg_data	pmic_data = {0};
+	struct ipc_pmic_reg_data	battery_data;
+
+	/* Check if battery is attached or not */
+	battery_data.pmic_reg_data[0].register_address = 0xd2;
+	battery_data.ioc = 0;
+	battery_data.num_entries = 1;
+	if (ipc_pmic_register_read(&battery_data)) {
+		otg_dbg("Failed to read PMIC register 0xd2.\n");
+		return;
+	}
+
+	if ((battery_data.pmic_reg_data[0].value & 0x20) == 0) {
+		otg_dbg("no battery attached\n");
+		return;
+	}
+
+	/* Workaround for battery attachment issue */
+	if (battery_data.pmic_reg_data[0].value == 0x34) {
+		otg_dbg("battery \n");
+		return;
+	}
+
+	otg_dbg("battery attached\n");
+
+	pmic_data.ioc = 0;
+	pmic_data.pmic_reg_data[0].register_address = 0xD4;
+	pmic_data.num_entries = 1;
+	if (on)
+		pmic_data.pmic_reg_data[0].value = 0x20;
+	else
+		pmic_data.pmic_reg_data[0].value = 0xc0;
+
+	if (ipc_pmic_register_write(&pmic_data, TRUE))
+		otg_dbg("Failed to write PMIC.\n");
+
+}
+
+/* charge vbus or discharge vbus through a resistor to ground */
+static void langwell_otg_chrg_vbus(int on)
+{
+
+	u32	val;
+
+	val = readl(the_transceiver->regs + CI_OTGSC);
+
+	if (on)
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VC,
+				the_transceiver->regs + CI_OTGSC);
+	else
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VD,
+				the_transceiver->regs + CI_OTGSC);
+
+}
+
+/* Start SRP */
+static int langwell_otg_start_srp(struct otg_transceiver *otg)
+{
+	u32	val;
+
+	otg_dbg("Start SRP ->\n");
+
+	val = readl(the_transceiver->regs + CI_OTGSC);
+
+	writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HADP,
+		the_transceiver->regs + CI_OTGSC);
+
+	/* Check if the data plus is finished or not */
+	msleep(8);
+	val = readl(the_transceiver->regs + CI_OTGSC);
+	if (val & (OTGSC_HADP | OTGSC_DP))
+		otg_dbg("DataLine SRP Error\n");
+
+	/* FIXME: VBus SRP */
+
+	return 0;
+}
+
+
+/* stop SOF via bus_suspend */
+static void langwell_otg_loc_sof(int on)
+{
+	struct usb_hcd	*hcd;
+	int		err;
+
+	otg_dbg("loc_sof -> %d\n", on);
+
+	hcd = bus_to_hcd(the_transceiver->otg.host);
+	if (on)
+		err = hcd->driver->bus_resume(hcd);
+	else
+		err = hcd->driver->bus_suspend(hcd);
+
+	if (err)
+		otg_dbg("Failed to resume/suspend bus - %d\n", err);
+}
+
+static void langwell_otg_phy_low_power(int on)
+{
+	u32	val;
+
+	otg_dbg("phy low power mode-> %d\n", on);
+
+	val = readl(the_transceiver->regs + CI_HOSTPC1);
+	if (on)
+		writel(val | HOSTPC1_PHCD, the_transceiver->regs + CI_HOSTPC1);
+	else
+		writel(val & ~HOSTPC1_PHCD, the_transceiver->regs + CI_HOSTPC1);
+}
+
+/* Enable/Disable OTG interrupt */
+static void langwell_otg_intr(int on)
+{
+	u32 val;
+
+	otg_dbg("interrupt -> %d\n", on);
+
+	val = readl(the_transceiver->regs + CI_OTGSC);
+	if (on) {
+		val = val | (OTGSC_INTEN_MASK | OTGSC_IDPU);
+		writel(val, the_transceiver->regs + CI_OTGSC);
+	} else {
+		val = val & ~(OTGSC_INTEN_MASK | OTGSC_IDPU);
+		writel(val, the_transceiver->regs + CI_OTGSC);
+	}
+}
+
+/* set HAAR: Hardware Assist Auto-Reset */
+static void langwell_otg_HAAR(int on)
+{
+	u32	val;
+
+	otg_dbg("HAAR -> %d\n", on);
+
+	val = readl(the_transceiver->regs + CI_OTGSC);
+	if (on)
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HAAR,
+				the_transceiver->regs + CI_OTGSC);
+	else
+		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HAAR,
+				the_transceiver->regs + CI_OTGSC);
+}
+
+/* set HABA: Hardware Assist B-Disconnect to A-Connect */
+static void langwell_otg_HABA(int on)
+{
+	u32	val;
+
+	otg_dbg("HABA -> %d\n", on);
+
+	val = readl(the_transceiver->regs + CI_OTGSC);
+	if (on)
+		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HABA,
+				the_transceiver->regs + CI_OTGSC);
+	else
+		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HABA,
+				the_transceiver->regs + CI_OTGSC);
+}
+
+static int langwell_otg_check_se0_srp(int on)
+{
+	u32 val;
+
+	int delay_time = TB_SE0_SRP * 10; /* step is 100us */
+
+	otg_dbg("check_se0_srp -> \n");
+
+	do {
+		udelay(100);
+		if (!delay_time--)
+			break;
+		val = readl(the_transceiver->regs + CI_PORTSC1);
+		val &= PORTSC_LS;
+	} while (!val);
+
+	otg_dbg("check_se0_srp <- \n");
+	return val;
+}
+
+/* The timeout callback function to set time out bit */
+static void set_tmout(unsigned long indicator)
+{
+	*(int *)indicator = 1;
+}
+
+void langwell_otg_nsf_msg(unsigned long indicator)
+{
+	switch (indicator) {
+	case 2:
+	case 4:
+	case 6:
+	case 7:
+		printk(KERN_ERR "OTG:NSF-%lu - deivce not responding\n",
+				indicator);
+		break;
+	case 3:
+		printk(KERN_ERR "OTG:NSF-%lu - deivce not supported\n",
+				indicator);
+		break;
+	default:
+		printk(KERN_ERR "Do not have this kind of NSF\n");
+		break;
+	}
+}
+
+/* Initialize timers */
+static void langwell_otg_init_timers(struct otg_hsm *hsm)
+{
+	/* HSM used timers */
+	a_wait_vrise_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_VRISE,
+				(unsigned long)&hsm->a_wait_vrise_tmout);
+	a_wait_bcon_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_BCON,
+				(unsigned long)&hsm->a_wait_bcon_tmout);
+	a_aidl_bdis_tmr = otg_timer_initializer(&set_tmout, TA_AIDL_BDIS,
+				(unsigned long)&hsm->a_aidl_bdis_tmout);
+	b_ase0_brst_tmr = otg_timer_initializer(&set_tmout, TB_ASE0_BRST,
+				(unsigned long)&hsm->b_ase0_brst_tmout);
+	b_se0_srp_tmr = otg_timer_initializer(&set_tmout, TB_SE0_SRP,
+				(unsigned long)&hsm->b_se0_srp);
+	b_srp_res_tmr = otg_timer_initializer(&set_tmout, TB_SRP_RES,
+				(unsigned long)&hsm->b_srp_res_tmout);
+	b_bus_suspend_tmr = otg_timer_initializer(&set_tmout, TB_BUS_SUSPEND,
+				(unsigned long)&hsm->b_bus_suspend_tmout);
+}
+
+/* Free timers */
+static void langwell_otg_free_timers(void)
+{
+	kfree(a_wait_vrise_tmr);
+	kfree(a_wait_bcon_tmr);
+	kfree(a_aidl_bdis_tmr);
+	kfree(b_ase0_brst_tmr);
+	kfree(b_se0_srp_tmr);
+	kfree(b_srp_res_tmr);
+	kfree(b_bus_suspend_tmr);
+}
+
+/* Add timer to timer list */
+static void langwell_otg_add_timer(void *gtimer)
+{
+	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
+	struct langwell_otg_timer *tmp_timer;
+	u32	val32;
+
+	/* Check if the timer is already in the active list,
+	 * if so update timer count
+	 */
+	list_for_each_entry(tmp_timer, &active_timers, list)
+		if (tmp_timer == timer) {
+			timer->count = timer->expires;
+			return;
+		}
+	timer->count = timer->expires;
+
+	if (list_empty(&active_timers)) {
+		val32 = readl(the_transceiver->regs + CI_OTGSC);
+		writel(val32 | OTGSC_1MSE, the_transceiver->regs + CI_OTGSC);
+	}
+
+	list_add_tail(&timer->list, &active_timers);
+}
+
+/* Remove timer from the timer list; clear timeout status */
+static void langwell_otg_del_timer(void *gtimer)
+{
+	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
+	struct langwell_otg_timer *tmp_timer, *del_tmp;
+	u32 val32;
+
+	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list)
+		if (tmp_timer == timer)
+			list_del(&timer->list);
+
+	if (list_empty(&active_timers)) {
+		val32 = readl(the_transceiver->regs + CI_OTGSC);
+		writel(val32 & ~OTGSC_1MSE, the_transceiver->regs + CI_OTGSC);
+	}
+}
+
+/* Reduce timer count by 1, and find timeout conditions.*/
+static int langwell_otg_tick_timer(u32 *int_sts)
+{
+	struct langwell_otg_timer *tmp_timer, *del_tmp;
+	int expired = 0;
+
+	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list) {
+		tmp_timer->count--;
+		/* check if timer expires */
+		if (!tmp_timer->count) {
+			list_del(&tmp_timer->list);
+			tmp_timer->function(tmp_timer->data);
+			expired = 1;
+		}
+	}
+
+	if (list_empty(&active_timers)) {
+		otg_dbg("tick timer: disable 1ms int\n");
+		*int_sts = *int_sts & ~OTGSC_1MSE;
+	}
+	return expired;
+}
+
+static void reset_otg(void)
+{
+	u32	val;
+	int	delay_time = 1000;
+
+	otg_dbg("reseting OTG controller ...\n");
+	val = readl(the_transceiver->regs + CI_USBCMD);
+	writel(val | USBCMD_RST, the_transceiver->regs + CI_USBCMD);
+	do {
+		udelay(100);
+		if (!delay_time--)
+			otg_dbg("reset timeout\n");
+		val = readl(the_transceiver->regs + CI_USBCMD);
+		val &= USBCMD_RST;
+	} while (val != 0);
+	otg_dbg("reset done.\n");
+}
+
+static void set_host_mode(void)
+{
+	u32 	val;
+
+	reset_otg();
+	val = readl(the_transceiver->regs + CI_USBMODE);
+	val = (val & (~USBMODE_CM)) | USBMODE_HOST;
+	writel(val, the_transceiver->regs + CI_USBMODE);
+}
+
+static void set_client_mode(void)
+{
+	u32 	val;
+
+	reset_otg();
+	val = readl(the_transceiver->regs + CI_USBMODE);
+	val = (val & (~USBMODE_CM)) | USBMODE_DEVICE;
+	writel(val, the_transceiver->regs + CI_USBMODE);
+}
+
+static void init_hsm(void)
+{
+	struct langwell_otg	*langwell = the_transceiver;
+	u32			val32;
+
+	/* read OTGSC after reset */
+	val32 = readl(langwell->regs + CI_OTGSC);
+	otg_dbg("%s: OTGSC init value = 0x%x\n", __func__, val32);
+
+	/* set init state */
+	if (val32 & OTGSC_ID) {
+		langwell->hsm.id = 1;
+		langwell->otg.default_a = 0;
+		set_client_mode();
+		langwell->otg.state = OTG_STATE_B_IDLE;
+		langwell_otg_drv_vbus(0);
+	} else {
+		langwell->hsm.id = 0;
+		langwell->otg.default_a = 1;
+		set_host_mode();
+		langwell->otg.state = OTG_STATE_A_IDLE;
+	}
+
+	/* set session indicator */
+	if (val32 & OTGSC_BSE)
+		langwell->hsm.b_sess_end = 1;
+	if (val32 & OTGSC_BSV)
+		langwell->hsm.b_sess_vld = 1;
+	if (val32 & OTGSC_ASV)
+		langwell->hsm.a_sess_vld = 1;
+	if (val32 & OTGSC_AVV)
+		langwell->hsm.a_vbus_vld = 1;
+
+	/* defautly power the bus */
+	langwell->hsm.a_bus_req = 1;
+	langwell->hsm.a_bus_drop = 0;
+	/* defautly don't request bus as B device */
+	langwell->hsm.b_bus_req = 0;
+	/* no system error */
+	langwell->hsm.a_clr_err = 0;
+}
+
+static irqreturn_t otg_dummy_irq(int irq, void *_dev)
+{
+	void __iomem	*reg_base = _dev;
+	u32	val;
+	u32	int_mask = 0;
+
+	val = readl(reg_base + CI_USBMODE);
+	if ((val & USBMODE_CM) != USBMODE_DEVICE)
+		return IRQ_NONE;
+
+	val = readl(reg_base + CI_USBSTS);
+	int_mask = val & INTR_DUMMY_MASK;
+
+	if (int_mask == 0)
+		return IRQ_NONE;
+
+	/* clear hsm.b_conn here since host driver can't detect it
+	*  otg_dummy_irq called means B-disconnect happened.
+	*/
+	if (the_transceiver->hsm.b_conn) {
+		the_transceiver->hsm.b_conn = 0;
+		if (spin_trylock(&the_transceiver->wq_lock)) {
+			queue_work(the_transceiver->qwork,
+				&the_transceiver->work);
+			spin_unlock(&the_transceiver->wq_lock);
+		}
+	}
+	/* Clear interrupts */
+	writel(int_mask, reg_base + CI_USBSTS);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t otg_irq(int irq, void *_dev)
+{
+	struct	langwell_otg *langwell = _dev;
+	u32	int_sts, int_en;
+	u32	int_mask = 0;
+	int	flag = 0;
+
+	int_sts = readl(langwell->regs + CI_OTGSC);
+	int_en = (int_sts & OTGSC_INTEN_MASK) >> 8;
+	int_mask = int_sts & int_en;
+	if (int_mask == 0)
+		return IRQ_NONE;
+
+	if (int_mask & OTGSC_IDIS) {
+		otg_dbg("%s: id change int\n", __func__);
+		langwell->hsm.id = (int_sts & OTGSC_ID) ? 1 : 0;
+		flag = 1;
+	}
+	if (int_mask & OTGSC_DPIS) {
+		otg_dbg("%s: data pulse int\n", __func__);
+		langwell->hsm.a_srp_det = (int_sts & OTGSC_DPS) ? 1 : 0;
+		flag = 1;
+	}
+	if (int_mask & OTGSC_BSEIS) {
+		otg_dbg("%s: b session end int\n", __func__);
+		langwell->hsm.b_sess_end = (int_sts & OTGSC_BSE) ? 1 : 0;
+		flag = 1;
+	}
+	if (int_mask & OTGSC_BSVIS) {
+		otg_dbg("%s: b session valid int\n", __func__);
+		langwell->hsm.b_sess_vld = (int_sts & OTGSC_BSV) ? 1 : 0;
+		flag = 1;
+	}
+	if (int_mask & OTGSC_ASVIS) {
+		otg_dbg("%s: a session valid int\n", __func__);
+		langwell->hsm.a_sess_vld = (int_sts & OTGSC_ASV) ? 1 : 0;
+		flag = 1;
+	}
+	if (int_mask & OTGSC_AVVIS) {
+		otg_dbg("%s: a vbus valid int\n", __func__);
+		langwell->hsm.a_vbus_vld = (int_sts & OTGSC_AVV) ? 1 : 0;
+		flag = 1;
+	}
+
+	if (int_mask & OTGSC_1MSS) {
+		/* need to schedule otg_work if any timer is expired */
+		if (langwell_otg_tick_timer(&int_sts))
+			flag = 1;
+	}
+
+	writel((int_sts & ~OTGSC_INTSTS_MASK) | int_mask,
+			langwell->regs + CI_OTGSC);
+	if (flag)
+		queue_work(langwell->qwork, &langwell->work);
+
+	return IRQ_HANDLED;
+}
+
+static void langwell_otg_work(struct work_struct *work)
+{
+	struct langwell_otg *langwell = container_of(work,
+					struct langwell_otg, work);
+	int	retval;
+
+	otg_dbg("%s: old state = %s\n", __func__,
+			state_string(langwell->otg.state));
+
+	switch (langwell->otg.state) {
+	case OTG_STATE_UNDEFINED:
+	case OTG_STATE_B_IDLE:
+		if (!langwell->hsm.id) {
+			langwell_otg_del_timer(b_srp_res_tmr);
+			langwell->otg.default_a = 1;
+			langwell->hsm.a_srp_det = 0;
+
+			langwell_otg_chrg_vbus(0);
+			langwell_otg_drv_vbus(0);
+
+			set_host_mode();
+			langwell->otg.state = OTG_STATE_A_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.b_srp_res_tmout) {
+			langwell->hsm.b_srp_res_tmout = 0;
+			langwell->hsm.b_bus_req = 0;
+			langwell_otg_nsf_msg(6);
+		} else if (langwell->hsm.b_sess_vld) {
+			langwell_otg_del_timer(b_srp_res_tmr);
+			langwell->hsm.b_sess_end = 0;
+			langwell->hsm.a_bus_suspend = 0;
+
+			langwell_otg_chrg_vbus(0);
+			if (langwell->client_ops) {
+				langwell->client_ops->resume(langwell->pdev);
+				langwell->otg.state = OTG_STATE_B_PERIPHERAL;
+			} else
+				otg_dbg("client driver not loaded.\n");
+
+		} else if (langwell->hsm.b_bus_req &&
+				(langwell->hsm.b_sess_end)) {
+			/* workaround for b_se0_srp detection */
+			retval = langwell_otg_check_se0_srp(0);
+			if (retval) {
+				langwell->hsm.b_bus_req = 0;
+				otg_dbg("LS is not SE0, try again later\n");
+			} else {
+				/* Start SRP */
+				langwell_otg_start_srp(&langwell->otg);
+				langwell_otg_add_timer(b_srp_res_tmr);
+			}
+		}
+		break;
+	case OTG_STATE_B_SRP_INIT:
+		if (!langwell->hsm.id) {
+			langwell->otg.default_a = 1;
+			langwell->hsm.a_srp_det = 0;
+
+			langwell_otg_drv_vbus(0);
+			langwell_otg_chrg_vbus(0);
+
+			langwell->otg.state = OTG_STATE_A_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.b_sess_vld) {
+			langwell_otg_chrg_vbus(0);
+			if (langwell->client_ops) {
+				langwell->client_ops->resume(langwell->pdev);
+				langwell->otg.state = OTG_STATE_B_PERIPHERAL;
+			} else
+				otg_dbg("client driver not loaded.\n");
+		}
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		if (!langwell->hsm.id) {
+			langwell->otg.default_a = 1;
+			langwell->hsm.a_srp_det = 0;
+
+			langwell_otg_drv_vbus(0);
+			langwell_otg_chrg_vbus(0);
+			set_host_mode();
+
+			if (langwell->client_ops) {
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			} else
+				otg_dbg("client driver has been removed.\n");
+
+			langwell->otg.state = OTG_STATE_A_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (!langwell->hsm.b_sess_vld) {
+			langwell->hsm.b_hnp_enable = 0;
+
+			if (langwell->client_ops) {
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			} else
+				otg_dbg("client driver has been removed.\n");
+
+			langwell->otg.state = OTG_STATE_B_IDLE;
+		} else if (langwell->hsm.b_bus_req && langwell->hsm.b_hnp_enable
+			&& langwell->hsm.a_bus_suspend) {
+
+			if (langwell->client_ops) {
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			} else
+				otg_dbg("client driver has been removed.\n");
+
+			langwell_otg_HAAR(1);
+			langwell->hsm.a_conn = 0;
+
+			if (langwell->host_ops) {
+				langwell->host_ops->probe(langwell->pdev,
+					langwell->host_ops->id_table);
+				langwell->otg.state = OTG_STATE_B_WAIT_ACON;
+			} else
+				otg_dbg("host driver not loaded.\n");
+
+			langwell->hsm.a_bus_resume = 0;
+			langwell->hsm.b_ase0_brst_tmout = 0;
+			langwell_otg_add_timer(b_ase0_brst_tmr);
+		}
+		break;
+
+	case OTG_STATE_B_WAIT_ACON:
+		if (!langwell->hsm.id) {
+			langwell_otg_del_timer(b_ase0_brst_tmr);
+			langwell->otg.default_a = 1;
+			langwell->hsm.a_srp_det = 0;
+
+			langwell_otg_drv_vbus(0);
+			langwell_otg_chrg_vbus(0);
+			set_host_mode();
+
+			langwell_otg_HAAR(0);
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell->otg.state = OTG_STATE_A_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (!langwell->hsm.b_sess_vld) {
+			langwell_otg_del_timer(b_ase0_brst_tmr);
+			langwell->hsm.b_hnp_enable = 0;
+			langwell->hsm.b_bus_req = 0;
+			langwell_otg_chrg_vbus(0);
+			langwell_otg_HAAR(0);
+
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell->otg.state = OTG_STATE_B_IDLE;
+		} else if (langwell->hsm.a_conn) {
+			langwell_otg_del_timer(b_ase0_brst_tmr);
+			langwell_otg_HAAR(0);
+			langwell->otg.state = OTG_STATE_B_HOST;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.a_bus_resume ||
+				langwell->hsm.b_ase0_brst_tmout) {
+			langwell_otg_del_timer(b_ase0_brst_tmr);
+			langwell_otg_HAAR(0);
+			langwell_otg_nsf_msg(7);
+
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+
+			langwell->hsm.a_bus_suspend = 0;
+			langwell->hsm.b_bus_req = 0;
+
+			if (langwell->client_ops)
+				langwell->client_ops->resume(langwell->pdev);
+			else
+				otg_dbg("client driver not loaded.\n");
+
+			langwell->otg.state = OTG_STATE_B_PERIPHERAL;
+		}
+		break;
+
+	case OTG_STATE_B_HOST:
+		if (!langwell->hsm.id) {
+			langwell->otg.default_a = 1;
+			langwell->hsm.a_srp_det = 0;
+
+			langwell_otg_drv_vbus(0);
+			langwell_otg_chrg_vbus(0);
+			set_host_mode();
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell->otg.state = OTG_STATE_A_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (!langwell->hsm.b_sess_vld) {
+			langwell->hsm.b_hnp_enable = 0;
+			langwell->hsm.b_bus_req = 0;
+			langwell_otg_chrg_vbus(0);
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell->otg.state = OTG_STATE_B_IDLE;
+		} else if ((!langwell->hsm.b_bus_req) ||
+				(!langwell->hsm.a_conn)) {
+			langwell->hsm.b_bus_req = 0;
+			langwell_otg_loc_sof(0);
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+
+			langwell->hsm.a_bus_suspend = 0;
+
+			if (langwell->client_ops)
+				langwell->client_ops->resume(langwell->pdev);
+			else
+				otg_dbg("client driver not loaded.\n");
+
+			langwell->otg.state = OTG_STATE_B_PERIPHERAL;
+		}
+		break;
+
+	case OTG_STATE_A_IDLE:
+		langwell->otg.default_a = 1;
+		if (langwell->hsm.id) {
+			langwell->otg.default_a = 0;
+			langwell->hsm.b_bus_req = 0;
+			langwell_otg_drv_vbus(0);
+			langwell_otg_chrg_vbus(0);
+
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.a_sess_vld) {
+			langwell_otg_drv_vbus(1);
+			langwell->hsm.a_srp_det = 1;
+			langwell->hsm.a_wait_vrise_tmout = 0;
+			langwell_otg_add_timer(a_wait_vrise_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_VRISE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (!langwell->hsm.a_bus_drop &&
+			(langwell->hsm.a_srp_det || langwell->hsm.a_bus_req)) {
+			langwell_otg_drv_vbus(1);
+			langwell->hsm.a_wait_vrise_tmout = 0;
+			langwell_otg_add_timer(a_wait_vrise_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_VRISE;
+			queue_work(langwell->qwork, &langwell->work);
+		}
+		break;
+	case OTG_STATE_A_WAIT_VRISE:
+		if (langwell->hsm.id) {
+			langwell_otg_del_timer(a_wait_vrise_tmr);
+			langwell->hsm.b_bus_req = 0;
+			langwell->otg.default_a = 0;
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_B_IDLE;
+		} else if (langwell->hsm.a_vbus_vld) {
+			langwell_otg_del_timer(a_wait_vrise_tmr);
+			if (langwell->host_ops)
+				langwell->host_ops->probe(langwell->pdev,
+						langwell->host_ops->id_table);
+			else
+				otg_dbg("host driver not loaded.\n");
+			langwell->hsm.b_conn = 0;
+			langwell->hsm.a_set_b_hnp_en = 0;
+			langwell->hsm.a_wait_bcon_tmout = 0;
+			langwell_otg_add_timer(a_wait_bcon_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
+		} else if (langwell->hsm.a_wait_vrise_tmout) {
+			if (langwell->hsm.a_vbus_vld) {
+				if (langwell->host_ops)
+					langwell->host_ops->probe(
+						langwell->pdev,
+						langwell->host_ops->id_table);
+				else
+					otg_dbg("host driver not loaded.\n");
+				langwell->hsm.b_conn = 0;
+				langwell->hsm.a_set_b_hnp_en = 0;
+				langwell->hsm.a_wait_bcon_tmout = 0;
+				langwell_otg_add_timer(a_wait_bcon_tmr);
+				langwell->otg.state = OTG_STATE_A_WAIT_BCON;
+			} else {
+				langwell_otg_drv_vbus(0);
+				langwell->otg.state = OTG_STATE_A_VBUS_ERR;
+			}
+		}
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		if (langwell->hsm.id) {
+			langwell_otg_del_timer(a_wait_bcon_tmr);
+
+			langwell->otg.default_a = 0;
+			langwell->hsm.b_bus_req = 0;
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (!langwell->hsm.a_vbus_vld) {
+			langwell_otg_del_timer(a_wait_bcon_tmr);
+
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
+		} else if (langwell->hsm.a_bus_drop ||
+				(langwell->hsm.a_wait_bcon_tmout &&
+				!langwell->hsm.a_bus_req)) {
+			langwell_otg_del_timer(a_wait_bcon_tmr);
+
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (langwell->hsm.b_conn) {
+			langwell_otg_del_timer(a_wait_bcon_tmr);
+
+			langwell->hsm.a_suspend_req = 0;
+			langwell->otg.state = OTG_STATE_A_HOST;
+			if (!langwell->hsm.a_bus_req &&
+				langwell->hsm.a_set_b_hnp_en) {
+				/* It is not safe enough to do a fast
+				 * transistion from A_WAIT_BCON to
+				 * A_SUSPEND */
+				msleep(10000);
+				if (langwell->hsm.a_bus_req)
+					break;
+
+				if (request_irq(langwell->pdev->irq,
+					otg_dummy_irq, IRQF_SHARED,
+					driver_name, langwell->regs) != 0) {
+					otg_dbg("request interrupt %d fail\n",
+					langwell->pdev->irq);
+				}
+
+				langwell_otg_HABA(1);
+				langwell->hsm.b_bus_resume = 0;
+				langwell->hsm.a_aidl_bdis_tmout = 0;
+				langwell_otg_add_timer(a_aidl_bdis_tmr);
+
+				langwell_otg_loc_sof(0);
+				langwell->otg.state = OTG_STATE_A_SUSPEND;
+			} else if (!langwell->hsm.a_bus_req &&
+				!langwell->hsm.a_set_b_hnp_en) {
+				struct pci_dev *pdev = langwell->pdev;
+				if (langwell->host_ops)
+					langwell->host_ops->remove(pdev);
+				else
+					otg_dbg("host driver removed.\n");
+				langwell_otg_drv_vbus(0);
+				langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+			}
+		}
+		break;
+	case OTG_STATE_A_HOST:
+		if (langwell->hsm.id) {
+			langwell->otg.default_a = 0;
+			langwell->hsm.b_bus_req = 0;
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.a_bus_drop ||
+		(!langwell->hsm.a_set_b_hnp_en && !langwell->hsm.a_bus_req)) {
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (!langwell->hsm.a_vbus_vld) {
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
+		} else if (langwell->hsm.a_set_b_hnp_en
+				&& !langwell->hsm.a_bus_req) {
+			/* Set HABA to enable hardware assistance to signal
+			 *  A-connect after receiver B-disconnect. Hardware
+			 *  will then set client mode and enable URE, SLE and
+			 *  PCE after the assistance. otg_dummy_irq is used to
+			 *  clean these ints when client driver is not resumed.
+			 */
+			if (request_irq(langwell->pdev->irq,
+				otg_dummy_irq, IRQF_SHARED, driver_name,
+				langwell->regs) != 0) {
+				otg_dbg("request interrupt %d failed\n",
+						langwell->pdev->irq);
+			}
+
+			/* set HABA */
+			langwell_otg_HABA(1);
+			langwell->hsm.b_bus_resume = 0;
+			langwell->hsm.a_aidl_bdis_tmout = 0;
+			langwell_otg_add_timer(a_aidl_bdis_tmr);
+			langwell_otg_loc_sof(0);
+			langwell->otg.state = OTG_STATE_A_SUSPEND;
+		} else if (!langwell->hsm.b_conn || !langwell->hsm.a_bus_req) {
+			langwell->hsm.a_wait_bcon_tmout = 0;
+			langwell->hsm.a_set_b_hnp_en = 0;
+			langwell_otg_add_timer(a_wait_bcon_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
+		}
+		break;
+	case OTG_STATE_A_SUSPEND:
+		if (langwell->hsm.id) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(langwell->pdev->irq, langwell->regs);
+			langwell->otg.default_a = 0;
+			langwell->hsm.b_bus_req = 0;
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.a_bus_req ||
+				langwell->hsm.b_bus_resume) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(langwell->pdev->irq, langwell->regs);
+			langwell->hsm.a_suspend_req = 0;
+			langwell_otg_loc_sof(1);
+			langwell->otg.state = OTG_STATE_A_HOST;
+		} else if (langwell->hsm.a_aidl_bdis_tmout ||
+				langwell->hsm.a_bus_drop) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(langwell->pdev->irq, langwell->regs);
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (!langwell->hsm.b_conn &&
+				langwell->hsm.a_set_b_hnp_en) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(langwell->pdev->irq, langwell->regs);
+
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+
+			langwell->hsm.b_bus_suspend = 0;
+			langwell->hsm.b_bus_suspend_vld = 0;
+			langwell->hsm.b_bus_suspend_tmout = 0;
+
+			/* msleep(200); */
+			if (langwell->client_ops)
+				langwell->client_ops->resume(langwell->pdev);
+			else
+				otg_dbg("client driver not loaded.\n");
+
+			langwell_otg_add_timer(b_bus_suspend_tmr);
+			langwell->otg.state = OTG_STATE_A_PERIPHERAL;
+			break;
+		} else if (!langwell->hsm.a_vbus_vld) {
+			langwell_otg_del_timer(a_aidl_bdis_tmr);
+			langwell_otg_HABA(0);
+			free_irq(langwell->pdev->irq, langwell->regs);
+			if (langwell->host_ops)
+				langwell->host_ops->remove(langwell->pdev);
+			else
+				otg_dbg("host driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
+		}
+		break;
+	case OTG_STATE_A_PERIPHERAL:
+		if (langwell->hsm.id) {
+			langwell_otg_del_timer(b_bus_suspend_tmr);
+			langwell->otg.default_a = 0;
+			langwell->hsm.b_bus_req = 0;
+			if (langwell->client_ops)
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			else
+				otg_dbg("client driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (!langwell->hsm.a_vbus_vld) {
+			langwell_otg_del_timer(b_bus_suspend_tmr);
+			if (langwell->client_ops)
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			else
+				otg_dbg("client driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
+		} else if (langwell->hsm.a_bus_drop) {
+			langwell_otg_del_timer(b_bus_suspend_tmr);
+			if (langwell->client_ops)
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			else
+				otg_dbg("client driver has been removed.\n");
+			langwell_otg_drv_vbus(0);
+			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+		} else if (langwell->hsm.b_bus_suspend) {
+			langwell_otg_del_timer(b_bus_suspend_tmr);
+			if (langwell->client_ops)
+				langwell->client_ops->suspend(langwell->pdev,
+					PMSG_FREEZE);
+			else
+				otg_dbg("client driver has been removed.\n");
+
+			if (langwell->host_ops)
+				langwell->host_ops->probe(langwell->pdev,
+						langwell->host_ops->id_table);
+			else
+				otg_dbg("host driver not loaded.\n");
+			langwell->hsm.a_set_b_hnp_en = 0;
+			langwell->hsm.a_wait_bcon_tmout = 0;
+			langwell_otg_add_timer(a_wait_bcon_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
+		} else if (langwell->hsm.b_bus_suspend_tmout) {
+			u32	val;
+			val = readl(langwell->regs + CI_PORTSC1);
+			if (!(val & PORTSC_SUSP))
+				break;
+			if (langwell->client_ops)
+				langwell->client_ops->suspend(langwell->pdev,
+						PMSG_FREEZE);
+			else
+				otg_dbg("client driver has been removed.\n");
+			if (langwell->host_ops)
+				langwell->host_ops->probe(langwell->pdev,
+						langwell->host_ops->id_table);
+			else
+				otg_dbg("host driver not loaded.\n");
+			langwell->hsm.a_set_b_hnp_en = 0;
+			langwell->hsm.a_wait_bcon_tmout = 0;
+			langwell_otg_add_timer(a_wait_bcon_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
+		}
+		break;
+	case OTG_STATE_A_VBUS_ERR:
+		if (langwell->hsm.id) {
+			langwell->otg.default_a = 0;
+			langwell->hsm.a_clr_err = 0;
+			langwell->hsm.a_srp_det = 0;
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.a_clr_err) {
+			langwell->hsm.a_clr_err = 0;
+			langwell->hsm.a_srp_det = 0;
+			reset_otg();
+			init_hsm();
+			if (langwell->otg.state == OTG_STATE_A_IDLE)
+				queue_work(langwell->qwork, &langwell->work);
+		}
+		break;
+	case OTG_STATE_A_WAIT_VFALL:
+		if (langwell->hsm.id) {
+			langwell->otg.default_a = 0;
+			langwell->otg.state = OTG_STATE_B_IDLE;
+			queue_work(langwell->qwork, &langwell->work);
+		} else if (langwell->hsm.a_bus_req) {
+			langwell_otg_drv_vbus(1);
+			langwell->hsm.a_wait_vrise_tmout = 0;
+			langwell_otg_add_timer(a_wait_vrise_tmr);
+			langwell->otg.state = OTG_STATE_A_WAIT_VRISE;
+		} else if (!langwell->hsm.a_sess_vld) {
+			langwell->hsm.a_srp_det = 0;
+			langwell_otg_drv_vbus(0);
+			set_host_mode();
+			langwell->otg.state = OTG_STATE_A_IDLE;
+		}
+		break;
+	default:
+		;
+	}
+
+	otg_dbg("%s: new state = %s\n", __func__,
+			state_string(langwell->otg.state));
+}
+
+	static ssize_t
+show_registers(struct device *_dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg *langwell;
+	char *next;
+	unsigned size;
+	unsigned t;
+
+	langwell = the_transceiver;
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size,
+		"\n"
+		"USBCMD = 0x%08x \n"
+		"USBSTS = 0x%08x \n"
+		"USBINTR = 0x%08x \n"
+		"ASYNCLISTADDR = 0x%08x \n"
+		"PORTSC1 = 0x%08x \n"
+		"HOSTPC1 = 0x%08x \n"
+		"OTGSC = 0x%08x \n"
+		"USBMODE = 0x%08x \n",
+		readl(langwell->regs + 0x30),
+		readl(langwell->regs + 0x34),
+		readl(langwell->regs + 0x38),
+		readl(langwell->regs + 0x48),
+		readl(langwell->regs + 0x74),
+		readl(langwell->regs + 0xb4),
+		readl(langwell->regs + 0xf4),
+		readl(langwell->regs + 0xf8)
+		);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
+
+static ssize_t
+show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg *langwell;
+	char *next;
+	unsigned size;
+	unsigned t;
+
+	langwell = the_transceiver;
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size,
+		"\n"
+		"current state = %s\n"
+		"a_bus_resume = \t%d\n"
+		"a_bus_suspend = \t%d\n"
+		"a_conn = \t%d\n"
+		"a_sess_vld = \t%d\n"
+		"a_srp_det = \t%d\n"
+		"a_vbus_vld = \t%d\n"
+		"b_bus_resume = \t%d\n"
+		"b_bus_suspend = \t%d\n"
+		"b_conn = \t%d\n"
+		"b_se0_srp = \t%d\n"
+		"b_sess_end = \t%d\n"
+		"b_sess_vld = \t%d\n"
+		"id = \t%d\n"
+		"a_set_b_hnp_en = \t%d\n"
+		"b_srp_done = \t%d\n"
+		"b_hnp_enable = \t%d\n"
+		"a_wait_vrise_tmout = \t%d\n"
+		"a_wait_bcon_tmout = \t%d\n"
+		"a_aidl_bdis_tmout = \t%d\n"
+		"b_ase0_brst_tmout = \t%d\n"
+		"a_bus_drop = \t%d\n"
+		"a_bus_req = \t%d\n"
+		"a_clr_err = \t%d\n"
+		"a_suspend_req = \t%d\n"
+		"b_bus_req = \t%d\n"
+		"b_bus_suspend_tmout = \t%d\n"
+		"b_bus_suspend_vld = \t%d\n",
+		state_string(langwell->otg.state),
+		langwell->hsm.a_bus_resume,
+		langwell->hsm.a_bus_suspend,
+		langwell->hsm.a_conn,
+		langwell->hsm.a_sess_vld,
+		langwell->hsm.a_srp_det,
+		langwell->hsm.a_vbus_vld,
+		langwell->hsm.b_bus_resume,
+		langwell->hsm.b_bus_suspend,
+		langwell->hsm.b_conn,
+		langwell->hsm.b_se0_srp,
+		langwell->hsm.b_sess_end,
+		langwell->hsm.b_sess_vld,
+		langwell->hsm.id,
+		langwell->hsm.a_set_b_hnp_en,
+		langwell->hsm.b_srp_done,
+		langwell->hsm.b_hnp_enable,
+		langwell->hsm.a_wait_vrise_tmout,
+		langwell->hsm.a_wait_bcon_tmout,
+		langwell->hsm.a_aidl_bdis_tmout,
+		langwell->hsm.b_ase0_brst_tmout,
+		langwell->hsm.a_bus_drop,
+		langwell->hsm.a_bus_req,
+		langwell->hsm.a_clr_err,
+		langwell->hsm.a_suspend_req,
+		langwell->hsm.b_bus_req,
+		langwell->hsm.b_bus_suspend_tmout,
+		langwell->hsm.b_bus_suspend_vld
+		);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+static DEVICE_ATTR(hsm, S_IRUGO, show_hsm, NULL);
+
+static ssize_t
+get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg *langwell;
+	char *next;
+	unsigned size;
+	unsigned t;
+
+	langwell =  the_transceiver;
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size, "%d", langwell->hsm.a_bus_req);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+
+static ssize_t
+set_a_bus_req(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg *langwell;
+	langwell = the_transceiver;
+	if (!langwell->otg.default_a)
+		return -1;
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		langwell->hsm.a_bus_req = 0;
+		otg_dbg("a_bus_req = 0\n");
+	} else if (buf[0] == '1') {
+		/* If a_bus_drop is TRUE, a_bus_req can't be set */
+		if (langwell->hsm.a_bus_drop)
+			return -1;
+		langwell->hsm.a_bus_req = 1;
+		otg_dbg("a_bus_req = 1\n");
+	}
+	if (spin_trylock(&langwell->wq_lock)) {
+		queue_work(langwell->qwork, &langwell->work);
+		spin_unlock(&langwell->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUGO, get_a_bus_req, set_a_bus_req);
+
+static ssize_t
+get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg *langwell;
+	char *next;
+	unsigned size;
+	unsigned t;
+
+	langwell =  the_transceiver;
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size, "%d", langwell->hsm.a_bus_drop);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+
+static ssize_t
+set_a_bus_drop(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg *langwell;
+	langwell = the_transceiver;
+	if (!langwell->otg.default_a)
+		return -1;
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		langwell->hsm.a_bus_drop = 0;
+		otg_dbg("a_bus_drop = 0\n");
+	} else if (buf[0] == '1') {
+		langwell->hsm.a_bus_drop = 1;
+		langwell->hsm.a_bus_req = 0;
+		otg_dbg("a_bus_drop = 1, then a_bus_req = 0\n");
+	}
+	if (spin_trylock(&langwell->wq_lock)) {
+		queue_work(langwell->qwork, &langwell->work);
+		spin_unlock(&langwell->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUGO,
+	get_a_bus_drop, set_a_bus_drop);
+
+static ssize_t
+get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct langwell_otg *langwell;
+	char *next;
+	unsigned size;
+	unsigned t;
+
+	langwell =  the_transceiver;
+	next = buf;
+	size = PAGE_SIZE;
+
+	t = scnprintf(next, size, "%d", langwell->hsm.b_bus_req);
+	size -= t;
+	next += t;
+
+	return PAGE_SIZE - size;
+}
+
+static ssize_t
+set_b_bus_req(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg *langwell;
+	langwell = the_transceiver;
+
+	if (langwell->otg.default_a)
+		return -1;
+
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '0') {
+		langwell->hsm.b_bus_req = 0;
+		otg_dbg("b_bus_req = 0\n");
+	} else if (buf[0] == '1') {
+		langwell->hsm.b_bus_req = 1;
+		otg_dbg("b_bus_req = 1\n");
+	}
+	if (spin_trylock(&langwell->wq_lock)) {
+		queue_work(langwell->qwork, &langwell->work);
+		spin_unlock(&langwell->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUGO, get_b_bus_req, set_b_bus_req);
+
+static ssize_t
+set_a_clr_err(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct langwell_otg *langwell;
+	langwell = the_transceiver;
+
+	if (!langwell->otg.default_a)
+		return -1;
+	if (count > 2)
+		return -1;
+
+	if (buf[0] == '1') {
+		langwell->hsm.a_clr_err = 1;
+		otg_dbg("a_clr_err = 1\n");
+	}
+	if (spin_trylock(&langwell->wq_lock)) {
+		queue_work(langwell->qwork, &langwell->work);
+		spin_unlock(&langwell->wq_lock);
+	}
+	return count;
+}
+static DEVICE_ATTR(a_clr_err, S_IWUGO, NULL, set_a_clr_err);
+
+static struct attribute *inputs_attrs[] = {
+	&dev_attr_a_bus_req.attr,
+	&dev_attr_a_bus_drop.attr,
+	&dev_attr_b_bus_req.attr,
+	&dev_attr_a_clr_err.attr,
+	NULL,
+};
+
+static struct attribute_group debug_dev_attr_group = {
+	.name = "inputs",
+	.attrs = inputs_attrs,
+};
+
+int langwell_register_host(struct pci_driver *host_driver)
+{
+	int	ret = 0;
+
+	the_transceiver->host_ops = host_driver;
+	queue_work(the_transceiver->qwork, &the_transceiver->work);
+	otg_dbg("host controller driver is registered\n");
+
+	return ret;
+}
+EXPORT_SYMBOL(langwell_register_host);
+
+void langwell_unregister_host(struct pci_driver *host_driver)
+{
+	if (the_transceiver->host_ops)
+		the_transceiver->host_ops->remove(the_transceiver->pdev);
+	the_transceiver->host_ops = NULL;
+	the_transceiver->hsm.a_bus_drop = 1;
+	queue_work(the_transceiver->qwork, &the_transceiver->work);
+	otg_dbg("host controller driver is unregistered\n");
+}
+EXPORT_SYMBOL(langwell_unregister_host);
+
+int langwell_register_peripheral(struct pci_driver *client_driver)
+{
+	int	ret = 0;
+
+	if (client_driver)
+		ret = client_driver->probe(the_transceiver->pdev,
+				client_driver->id_table);
+	if (!ret) {
+		the_transceiver->client_ops = client_driver;
+		queue_work(the_transceiver->qwork, &the_transceiver->work);
+		otg_dbg("client controller driver is registered\n");
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(langwell_register_peripheral);
+
+void langwell_unregister_peripheral(struct pci_driver *client_driver)
+{
+	if (the_transceiver->client_ops)
+		the_transceiver->client_ops->remove(the_transceiver->pdev);
+	the_transceiver->client_ops = NULL;
+	the_transceiver->hsm.b_bus_req = 0;
+	queue_work(the_transceiver->qwork, &the_transceiver->work);
+	otg_dbg("client controller driver is unregistered\n");
+}
+EXPORT_SYMBOL(langwell_unregister_peripheral);
+
+static int langwell_otg_probe(struct pci_dev *pdev,
+		const struct pci_device_id *id)
+{
+	unsigned long		resource, len;
+	void __iomem 		*base = NULL;
+	int			retval;
+	u32			val32;
+	struct langwell_otg	*langwell;
+	char			qname[] = "langwell_otg_queue";
+
+	retval = 0;
+	otg_dbg("\notg controller is detected.\n");
+	if (pci_enable_device(pdev) < 0) {
+		retval = -ENODEV;
+		goto done;
+	}
+
+	langwell = kzalloc(sizeof *langwell, GFP_KERNEL);
+	if (langwell == NULL) {
+		retval = -ENOMEM;
+		goto done;
+	}
+	the_transceiver = langwell;
+
+	/* control register: BAR 0 */
+	resource = pci_resource_start(pdev, 0);
+	len = pci_resource_len(pdev, 0);
+	if (!request_mem_region(resource, len, driver_name)) {
+		retval = -EBUSY;
+		goto err;
+	}
+	langwell->region = 1;
+
+	base = ioremap_nocache(resource, len);
+	if (base == NULL) {
+		retval = -EFAULT;
+		goto err;
+	}
+	langwell->regs = base;
+
+	if (!pdev->irq) {
+		otg_dbg("No IRQ.\n");
+		retval = -ENODEV;
+		goto err;
+	}
+
+	langwell->qwork = create_workqueue(qname);
+	if (!langwell->qwork) {
+		otg_dbg("cannot create workqueue %s\n", qname);
+		retval = -ENOMEM;
+		goto err;
+	}
+	INIT_WORK(&langwell->work, langwell_otg_work);
+
+	/* OTG common part */
+	langwell->pdev = pdev;
+	langwell->otg.dev = &pdev->dev;
+	langwell->otg.label = driver_name;
+	langwell->otg.set_host = langwell_otg_set_host;
+	langwell->otg.set_peripheral = langwell_otg_set_peripheral;
+	langwell->otg.set_power = langwell_otg_set_power;
+	langwell->otg.start_srp = langwell_otg_start_srp;
+	langwell->otg.state = OTG_STATE_UNDEFINED;
+	if (otg_set_transceiver(&langwell->otg)) {
+		otg_dbg("can't set transceiver\n");
+		retval = -EBUSY;
+		goto err;
+	}
+
+	reset_otg();
+	init_hsm();
+
+	spin_lock_init(&langwell->lock);
+	spin_lock_init(&langwell->wq_lock);
+	INIT_LIST_HEAD(&active_timers);
+	langwell_otg_init_timers(&langwell->hsm);
+
+	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
+				driver_name, langwell) != 0) {
+		otg_dbg("request interrupt %d failed\n", pdev->irq);
+		retval = -EBUSY;
+		goto err;
+	}
+
+	/* enable OTGSC int */
+	val32 = OTGSC_DPIE | OTGSC_BSEIE | OTGSC_BSVIE |
+		OTGSC_ASVIE | OTGSC_AVVIE | OTGSC_IDIE | OTGSC_IDPU;
+	writel(val32, langwell->regs + CI_OTGSC);
+
+	retval = device_create_file(&pdev->dev, &dev_attr_registers);
+	if (retval < 0) {
+		otg_dbg("Can't register sysfs attribute: %d\n", retval);
+		goto err;
+	}
+
+	retval = device_create_file(&pdev->dev, &dev_attr_hsm);
+	if (retval < 0) {
+		otg_dbg("Can't hsm sysfs attribute: %d\n", retval);
+		goto err;
+	}
+
+	retval = sysfs_create_group(&pdev->dev.kobj, &debug_dev_attr_group);
+	if (retval < 0) {
+		otg_dbg("Can't register sysfs attr group: %d\n", retval);
+		goto err;
+	}
+
+	if (langwell->otg.state == OTG_STATE_A_IDLE)
+		queue_work(langwell->qwork, &langwell->work);
+
+	return 0;
+
+err:
+	if (the_transceiver)
+		langwell_otg_remove(pdev);
+done:
+	return retval;
+}
+
+static void langwell_otg_remove(struct pci_dev *pdev)
+{
+	struct langwell_otg *langwell;
+
+	langwell = the_transceiver;
+
+	if (langwell->qwork) {
+		flush_workqueue(langwell->qwork);
+		destroy_workqueue(langwell->qwork);
+	}
+	langwell_otg_free_timers();
+
+	/* disable OTGSC interrupt as OTGSC doesn't change in reset */
+	writel(0, langwell->regs + CI_OTGSC);
+
+	if (pdev->irq)
+		free_irq(pdev->irq, langwell);
+	if (langwell->regs)
+		iounmap(langwell->regs);
+	if (langwell->region)
+		release_mem_region(pci_resource_start(pdev, 0),
+				pci_resource_len(pdev, 0));
+
+	otg_set_transceiver(NULL);
+	pci_disable_device(pdev);
+	sysfs_remove_group(&pdev->dev.kobj, &debug_dev_attr_group);
+	device_remove_file(&pdev->dev, &dev_attr_hsm);
+	device_remove_file(&pdev->dev, &dev_attr_registers);
+	kfree(langwell);
+	langwell = NULL;
+}
+
+static void transceiver_suspend(struct pci_dev *pdev)
+{
+	pci_save_state(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+	langwell_otg_phy_low_power(1);
+}
+
+static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message)
+{
+	int 	ret = 0;
+	struct langwell_otg *langwell;
+
+	langwell = the_transceiver;
+
+	/* Disbale OTG interrupts */
+	langwell_otg_intr(0);
+
+	if (pdev->irq)
+		free_irq(pdev->irq, langwell);
+
+	/* Prevent more otg_work */
+	flush_workqueue(langwell->qwork);
+	spin_lock(&langwell->wq_lock);
+
+	/* start actions */
+	switch (langwell->otg.state) {
+	case OTG_STATE_A_IDLE:
+	case OTG_STATE_B_IDLE:
+	case OTG_STATE_A_WAIT_VFALL:
+	case OTG_STATE_A_VBUS_ERR:
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_WAIT_VRISE:
+		langwell_otg_del_timer(a_wait_vrise_tmr);
+		langwell->hsm.a_srp_det = 0;
+		langwell_otg_drv_vbus(0);
+		langwell->otg.state = OTG_STATE_A_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		langwell_otg_del_timer(a_wait_bcon_tmr);
+		if (langwell->host_ops)
+			ret = langwell->host_ops->suspend(pdev, message);
+		langwell_otg_drv_vbus(0);
+		break;
+	case OTG_STATE_A_HOST:
+		if (langwell->host_ops)
+			ret = langwell->host_ops->suspend(pdev, message);
+		langwell_otg_drv_vbus(0);
+		langwell_otg_phy_low_power(1);
+		break;
+	case OTG_STATE_A_SUSPEND:
+		langwell_otg_del_timer(a_aidl_bdis_tmr);
+		langwell_otg_HABA(0);
+		if (langwell->host_ops)
+			langwell->host_ops->remove(pdev);
+		else
+			otg_dbg("host driver has been removed.\n");
+		langwell_otg_drv_vbus(0);
+		transceiver_suspend(pdev);
+		langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+		break;
+	case OTG_STATE_A_PERIPHERAL:
+		if (langwell->client_ops)
+			ret = langwell->client_ops->suspend(pdev, message);
+		else
+			otg_dbg("client driver has been removed.\n");
+		langwell_otg_drv_vbus(0);
+		transceiver_suspend(pdev);
+		langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
+		break;
+	case OTG_STATE_B_HOST:
+		if (langwell->host_ops)
+			langwell->host_ops->remove(pdev);
+		else
+			otg_dbg("host driver has been removed.\n");
+		langwell->hsm.b_bus_req = 0;
+		transceiver_suspend(pdev);
+		langwell->otg.state = OTG_STATE_B_IDLE;
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		if (langwell->client_ops)
+			ret = langwell->client_ops->suspend(pdev, message);
+		else
+			otg_dbg("client driver has been removed.\n");
+		break;
+	case OTG_STATE_B_WAIT_ACON:
+		langwell_otg_del_timer(b_ase0_brst_tmr);
+		langwell_otg_HAAR(0);
+		if (langwell->host_ops)
+			langwell->host_ops->remove(pdev);
+		else
+			otg_dbg("host driver has been removed.\n");
+		langwell->hsm.b_bus_req = 0;
+		langwell->otg.state = OTG_STATE_B_IDLE;
+		transceiver_suspend(pdev);
+		break;
+	default:
+		otg_dbg("error state before suspend\n ");
+		break;
+	}
+	spin_unlock(&langwell->wq_lock);
+
+	return ret;
+}
+
+static void transceiver_resume(struct pci_dev *pdev)
+{
+	pci_restore_state(pdev);
+	pci_set_power_state(pdev, PCI_D0);
+	langwell_otg_phy_low_power(0);
+}
+
+static int langwell_otg_resume(struct pci_dev *pdev)
+{
+	int 	ret = 0;
+	struct langwell_otg *langwell;
+
+	langwell = the_transceiver;
+
+	spin_lock(&langwell->wq_lock);
+
+	switch (langwell->otg.state) {
+	case OTG_STATE_A_IDLE:
+	case OTG_STATE_B_IDLE:
+	case OTG_STATE_A_WAIT_VFALL:
+	case OTG_STATE_A_VBUS_ERR:
+		transceiver_resume(pdev);
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		langwell_otg_add_timer(a_wait_bcon_tmr);
+		langwell_otg_drv_vbus(1);
+		if (langwell->host_ops)
+			ret = langwell->host_ops->resume(pdev);
+		break;
+	case OTG_STATE_A_HOST:
+		langwell_otg_drv_vbus(1);
+		langwell_otg_phy_low_power(0);
+		if (langwell->host_ops)
+			ret = langwell->host_ops->resume(pdev);
+		break;
+	case OTG_STATE_B_PERIPHERAL:
+		if (langwell->client_ops)
+			ret = langwell->client_ops->resume(pdev);
+		else
+			otg_dbg("client driver not loaded.\n");
+		break;
+	default:
+		otg_dbg("error state before suspend\n ");
+		break;
+	}
+
+	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
+				driver_name, the_transceiver) != 0) {
+		otg_dbg("request interrupt %d failed\n", pdev->irq);
+		ret = -EBUSY;
+	}
+
+	/* enable OTG interrupts */
+	langwell_otg_intr(1);
+
+	spin_unlock(&langwell->wq_lock);
+
+	queue_work(langwell->qwork, &langwell->work);
+
+
+	return ret;
+}
+
+static int __init langwell_otg_init(void)
+{
+	return pci_register_driver(&otg_pci_driver);
+}
+module_init(langwell_otg_init);
+
+static void __exit langwell_otg_cleanup(void)
+{
+	pci_unregister_driver(&otg_pci_driver);
+}
+module_exit(langwell_otg_cleanup);
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
new file mode 100644
index 00000000000..e115ae6df1d
--- /dev/null
+++ b/include/linux/usb/langwell_otg.h
@@ -0,0 +1,177 @@
+/*
+ * Intel Langwell USB OTG transceiver driver
+ * Copyright (C) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef __LANGWELL_OTG_H__
+#define __LANGWELL_OTG_H__
+
+/* notify transceiver driver about OTG events */
+extern void langwell_update_transceiver(void);
+/* HCD register bus driver */
+extern int langwell_register_host(struct pci_driver *host_driver);
+/* HCD unregister bus driver */
+extern void langwell_unregister_host(struct pci_driver *host_driver);
+/* DCD register bus driver */
+extern int langwell_register_peripheral(struct pci_driver *client_driver);
+/* DCD unregister bus driver */
+extern void langwell_unregister_peripheral(struct pci_driver *client_driver);
+/* No silent failure, output warning message */
+extern void langwell_otg_nsf_msg(unsigned long message);
+
+#define CI_USBCMD		0x30
+#	define USBCMD_RST		BIT(1)
+#	define USBCMD_RS		BIT(0)
+#define CI_USBSTS		0x34
+#	define USBSTS_SLI		BIT(8)
+#	define USBSTS_URI		BIT(6)
+#	define USBSTS_PCI		BIT(2)
+#define CI_PORTSC1		0x74
+#	define PORTSC_PP		BIT(12)
+#	define PORTSC_LS		(BIT(11) | BIT(10))
+#	define PORTSC_SUSP		BIT(7)
+#	define PORTSC_CCS		BIT(0)
+#define CI_HOSTPC1		0xb4
+#	define HOSTPC1_PHCD		BIT(22)
+#define CI_OTGSC		0xf4
+#	define OTGSC_DPIE		BIT(30)
+#	define OTGSC_1MSE		BIT(29)
+#	define OTGSC_BSEIE		BIT(28)
+#	define OTGSC_BSVIE		BIT(27)
+#	define OTGSC_ASVIE		BIT(26)
+#	define OTGSC_AVVIE		BIT(25)
+#	define OTGSC_IDIE		BIT(24)
+#	define OTGSC_DPIS		BIT(22)
+#	define OTGSC_1MSS		BIT(21)
+#	define OTGSC_BSEIS		BIT(20)
+#	define OTGSC_BSVIS		BIT(19)
+#	define OTGSC_ASVIS		BIT(18)
+#	define OTGSC_AVVIS		BIT(17)
+#	define OTGSC_IDIS		BIT(16)
+#	define OTGSC_DPS		BIT(14)
+#	define OTGSC_1MST		BIT(13)
+#	define OTGSC_BSE		BIT(12)
+#	define OTGSC_BSV		BIT(11)
+#	define OTGSC_ASV		BIT(10)
+#	define OTGSC_AVV		BIT(9)
+#	define OTGSC_ID			BIT(8)
+#	define OTGSC_HABA		BIT(7)
+#	define OTGSC_HADP		BIT(6)
+#	define OTGSC_IDPU		BIT(5)
+#	define OTGSC_DP			BIT(4)
+#	define OTGSC_OT			BIT(3)
+#	define OTGSC_HAAR		BIT(2)
+#	define OTGSC_VC			BIT(1)
+#	define OTGSC_VD			BIT(0)
+#	define OTGSC_INTEN_MASK		(0x7f << 24)
+#	define OTGSC_INTSTS_MASK	(0x7f << 16)
+#define CI_USBMODE		0xf8
+#	define USBMODE_CM		(BIT(1) | BIT(0))
+#	define USBMODE_IDLE		0
+#	define USBMODE_DEVICE		0x2
+#	define USBMODE_HOST		0x3
+
+#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI)
+
+struct otg_hsm {
+	/* Input */
+	int a_bus_resume;
+	int a_bus_suspend;
+	int a_conn;
+	int a_sess_vld;
+	int a_srp_det;
+	int a_vbus_vld;
+	int b_bus_resume;
+	int b_bus_suspend;
+	int b_conn;
+	int b_se0_srp;
+	int b_sess_end;
+	int b_sess_vld;
+	int id;
+
+	/* Internal variables */
+	int a_set_b_hnp_en;
+	int b_srp_done;
+	int b_hnp_enable;
+
+	/* Timeout indicator for timers */
+	int a_wait_vrise_tmout;
+	int a_wait_bcon_tmout;
+	int a_aidl_bdis_tmout;
+	int b_ase0_brst_tmout;
+	int b_bus_suspend_tmout;
+	int b_srp_res_tmout;
+
+	/* Informative variables */
+	int a_bus_drop;
+	int a_bus_req;
+	int a_clr_err;
+	int a_suspend_req;
+	int b_bus_req;
+
+	/* Output */
+	int drv_vbus;
+	int loc_conn;
+	int loc_sof;
+
+	/* Others */
+	int b_bus_suspend_vld;
+};
+
+#define TA_WAIT_VRISE	100
+#define TA_WAIT_BCON	30000
+#define TA_AIDL_BDIS	15000
+#define TB_ASE0_BRST	5000
+#define TB_SE0_SRP	2
+#define TB_SRP_RES	100
+#define TB_BUS_SUSPEND	500
+
+struct langwell_otg_timer {
+	unsigned long expires;	/* Number of count increase to timeout */
+	unsigned long count;	/* Tick counter */
+	void (*function)(unsigned long);	/* Timeout function */
+	unsigned long data;	/* Data passed to function */
+	struct list_head list;
+};
+
+struct langwell_otg {
+	struct otg_transceiver 	otg;
+	struct otg_hsm 		hsm;
+	void __iomem 		*regs;
+	unsigned 		region;
+	struct pci_driver	*host_ops;
+	struct pci_driver	*client_ops;
+	struct pci_dev		*pdev;
+	struct work_struct 	work;
+	struct workqueue_struct	*qwork;
+	spinlock_t 		lock;
+	spinlock_t 		wq_lock;
+};
+
+static inline struct langwell_otg *otg_to_langwell(struct otg_transceiver *otg)
+{
+	return container_of(otg, struct langwell_otg, otg);
+}
+
+#ifdef DEBUG
+#define otg_dbg(fmt, args...) \
+	printk(KERN_DEBUG fmt , ## args)
+#else
+#define otg_dbg(fmt, args...) \
+	do { } while (0)
+#endif /* DEBUG */
+#endif /* __LANGWELL_OTG_H__ */
-- 
cgit v1.2.3-70-g09d2


From 66d4eadd8d067269ea8fead1a50fe87c2979a80d Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:52:28 -0700
Subject: USB: xhci: BIOS handoff and HW initialization.

Add PCI initialization code to take control of the xHCI host controller
away from the BIOS, halt, and reset the host controller.  The xHCI spec
says that BIOSes must give up the host controller within 5 seconds.

Add some host controller glue functions to handle hardware initialization
and memory allocation for the host controller.  The current xHCI
prototypes use PCI interrupts, but the xHCI spec requires MSI-X
interrupts.  Add code to support MSI-X interrupts, but use the PCI
interrupts for now.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.h        |   1 +
 drivers/usb/host/pci-quirks.c | 123 ++++++++++++++
 drivers/usb/host/xhci-hcd.c   | 377 ++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/xhci-mem.c   |  75 +++++++++
 drivers/usb/host/xhci-pci.c   | 150 +++++++++++++++++
 drivers/usb/host/xhci.h       |  63 ++++++-
 include/linux/pci_ids.h       |   1 +
 7 files changed, 788 insertions(+), 2 deletions(-)
 create mode 100644 drivers/usb/host/xhci-hcd.c
 create mode 100644 drivers/usb/host/xhci-mem.c
 create mode 100644 drivers/usb/host/xhci-pci.c

(limited to 'include')

diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 174170f14f0..7a47498b0aa 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -173,6 +173,7 @@ struct hc_driver {
 #define	HCD_LOCAL_MEM	0x0002		/* HC needs local memory */
 #define	HCD_USB11	0x0010		/* USB 1.1 */
 #define	HCD_USB2	0x0020		/* USB 2.0 */
+#define	HCD_USB3	0x0040		/* USB 3.0 */
 
 	/* called to init HCD and root hub */
 	int	(*reset) (struct usb_hcd *hcd);
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 033c2846ce5..83b5f9cea85 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/acpi.h>
 #include "pci-quirks.h"
+#include "xhci-ext-caps.h"
 
 
 #define UHCI_USBLEGSUP		0xc0		/* legacy support */
@@ -341,7 +342,127 @@ static void __devinit quirk_usb_disable_ehci(struct pci_dev *pdev)
 	return;
 }
 
+/*
+ * handshake - spin reading a register until handshake completes
+ * @ptr: address of hc register to be read
+ * @mask: bits to look at in result of read
+ * @done: value of those bits when handshake succeeds
+ * @wait_usec: timeout in microseconds
+ * @delay_usec: delay in microseconds to wait between polling
+ *
+ * Polls a register every delay_usec microseconds.
+ * Returns 0 when the mask bits have the value done.
+ * Returns -ETIMEDOUT if this condition is not true after
+ * wait_usec microseconds have passed.
+ */
+static int handshake(void __iomem *ptr, u32 mask, u32 done,
+		int wait_usec, int delay_usec)
+{
+	u32	result;
+
+	do {
+		result = readl(ptr);
+		result &= mask;
+		if (result == done)
+			return 0;
+		udelay(delay_usec);
+		wait_usec -= delay_usec;
+	} while (wait_usec > 0);
+	return -ETIMEDOUT;
+}
+
+/**
+ * PCI Quirks for xHCI.
+ *
+ * Takes care of the handoff between the Pre-OS (i.e. BIOS) and the OS.
+ * It signals to the BIOS that the OS wants control of the host controller,
+ * and then waits 5 seconds for the BIOS to hand over control.
+ * If we timeout, assume the BIOS is broken and take control anyway.
+ */
+static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev)
+{
+	void __iomem *base;
+	int ext_cap_offset;
+	void __iomem *op_reg_base;
+	u32 val;
+	int timeout;
+
+	if (!mmio_resource_enabled(pdev, 0))
+		return;
+
+	base = ioremap_nocache(pci_resource_start(pdev, 0),
+				pci_resource_len(pdev, 0));
+	if (base == NULL)
+		return;
 
+	/*
+	 * Find the Legacy Support Capability register -
+	 * this is optional for xHCI host controllers.
+	 */
+	ext_cap_offset = xhci_find_next_cap_offset(base, XHCI_HCC_PARAMS_OFFSET);
+	do {
+		if (!ext_cap_offset)
+			/* We've reached the end of the extended capabilities */
+			goto hc_init;
+		val = readl(base + ext_cap_offset);
+		if (XHCI_EXT_CAPS_ID(val) == XHCI_EXT_CAPS_LEGACY)
+			break;
+		ext_cap_offset = xhci_find_next_cap_offset(base, ext_cap_offset);
+	} while (1);
+
+	/* If the BIOS owns the HC, signal that the OS wants it, and wait */
+	if (val & XHCI_HC_BIOS_OWNED) {
+		writel(val & XHCI_HC_OS_OWNED, base + ext_cap_offset);
+
+		/* Wait for 5 seconds with 10 microsecond polling interval */
+		timeout = handshake(base + ext_cap_offset, XHCI_HC_BIOS_OWNED,
+				0, 5000, 10);
+
+		/* Assume a buggy BIOS and take HC ownership anyway */
+		if (timeout) {
+			dev_warn(&pdev->dev, "xHCI BIOS handoff failed"
+					" (BIOS bug ?) %08x\n", val);
+			writel(val & ~XHCI_HC_BIOS_OWNED, base + ext_cap_offset);
+		}
+	}
+
+	/* Disable any BIOS SMIs */
+	writel(XHCI_LEGACY_DISABLE_SMI,
+			base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
+
+hc_init:
+	op_reg_base = base + XHCI_HC_LENGTH(readl(base));
+
+	/* Wait for the host controller to be ready before writing any
+	 * operational or runtime registers.  Wait 5 seconds and no more.
+	 */
+	timeout = handshake(op_reg_base + XHCI_STS_OFFSET, XHCI_STS_CNR, 0,
+			5000, 10);
+	/* Assume a buggy HC and start HC initialization anyway */
+	if (timeout) {
+		val = readl(op_reg_base + XHCI_STS_OFFSET);
+		dev_warn(&pdev->dev,
+				"xHCI HW not ready after 5 sec (HC bug?) "
+				"status = 0x%x\n", val);
+	}
+
+	/* Send the halt and disable interrupts command */
+	val = readl(op_reg_base + XHCI_CMD_OFFSET);
+	val &= ~(XHCI_CMD_RUN | XHCI_IRQS);
+	writel(val, op_reg_base + XHCI_CMD_OFFSET);
+
+	/* Wait for the HC to halt - poll every 125 usec (one microframe). */
+	timeout = handshake(op_reg_base + XHCI_STS_OFFSET, XHCI_STS_HALT, 1,
+			XHCI_MAX_HALT_USEC, 125);
+	if (timeout) {
+		val = readl(op_reg_base + XHCI_STS_OFFSET);
+		dev_warn(&pdev->dev,
+				"xHCI HW did not halt within %d usec "
+				"status = 0x%x\n", XHCI_MAX_HALT_USEC, val);
+	}
+
+	iounmap(base);
+}
 
 static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev)
 {
@@ -351,5 +472,7 @@ static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev)
 		quirk_usb_handoff_ohci(pdev);
 	else if (pdev->class == PCI_CLASS_SERIAL_USB_EHCI)
 		quirk_usb_disable_ehci(pdev);
+	else if (pdev->class == PCI_CLASS_SERIAL_USB_XHCI)
+		quirk_usb_handoff_xhci(pdev);
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff);
diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
new file mode 100644
index 00000000000..64fcc22e9d5
--- /dev/null
+++ b/drivers/usb/host/xhci-hcd.c
@@ -0,0 +1,377 @@
+/*
+ * xHCI host controller driver
+ *
+ * Copyright (C) 2008 Intel Corp.
+ *
+ * Author: Sarah Sharp
+ * Some code borrowed from the Linux EHCI driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include "xhci.h"
+
+#define DRIVER_AUTHOR "Sarah Sharp"
+#define DRIVER_DESC "'eXtensible' Host Controller (xHC) Driver"
+
+/* TODO: copied from ehci-hcd.c - can this be refactored? */
+/*
+ * handshake - spin reading hc until handshake completes or fails
+ * @ptr: address of hc register to be read
+ * @mask: bits to look at in result of read
+ * @done: value of those bits when handshake succeeds
+ * @usec: timeout in microseconds
+ *
+ * Returns negative errno, or zero on success
+ *
+ * Success happens when the "mask" bits have the specified value (hardware
+ * handshake done).  There are two failure modes:  "usec" have passed (major
+ * hardware flakeout), or the register reads as all-ones (hardware removed).
+ */
+static int handshake(struct xhci_hcd *xhci, void __iomem *ptr,
+		      u32 mask, u32 done, int usec)
+{
+	u32	result;
+
+	do {
+		result = xhci_readl(xhci, ptr);
+		if (result == ~(u32)0)		/* card removed */
+			return -ENODEV;
+		result &= mask;
+		if (result == done)
+			return 0;
+		udelay(1);
+		usec--;
+	} while (usec > 0);
+	return -ETIMEDOUT;
+}
+
+/*
+ * Force HC into halt state.
+ *
+ * Disable any IRQs and clear the run/stop bit.
+ * HC will complete any current and actively pipelined transactions, and
+ * should halt within 16 microframes of the run/stop bit being cleared.
+ * Read HC Halted bit in the status register to see when the HC is finished.
+ * XXX: shouldn't we set HC_STATE_HALT here somewhere?
+ */
+int xhci_halt(struct xhci_hcd *xhci)
+{
+	u32 halted;
+	u32 cmd;
+	u32 mask;
+
+	xhci_dbg(xhci, "// Halt the HC\n");
+	/* Disable all interrupts from the host controller */
+	mask = ~(XHCI_IRQS);
+	halted = xhci_readl(xhci, &xhci->op_regs->status) & STS_HALT;
+	if (!halted)
+		mask &= ~CMD_RUN;
+
+	cmd = xhci_readl(xhci, &xhci->op_regs->command);
+	cmd &= mask;
+	xhci_writel(xhci, cmd, &xhci->op_regs->command);
+
+	return handshake(xhci, &xhci->op_regs->status,
+			STS_HALT, STS_HALT, XHCI_MAX_HALT_USEC);
+}
+
+/*
+ * Reset a halted HC, and set the internal HC state to HC_STATE_HALT.
+ *
+ * This resets pipelines, timers, counters, state machines, etc.
+ * Transactions will be terminated immediately, and operational registers
+ * will be set to their defaults.
+ */
+int xhci_reset(struct xhci_hcd *xhci)
+{
+	u32 command;
+	u32 state;
+
+	state = xhci_readl(xhci, &xhci->op_regs->status);
+	BUG_ON((state & STS_HALT) == 0);
+
+	xhci_dbg(xhci, "// Reset the HC\n");
+	command = xhci_readl(xhci, &xhci->op_regs->command);
+	command |= CMD_RESET;
+	xhci_writel(xhci, command, &xhci->op_regs->command);
+	/* XXX: Why does EHCI set this here?  Shouldn't other code do this? */
+	xhci_to_hcd(xhci)->state = HC_STATE_HALT;
+
+	return handshake(xhci, &xhci->op_regs->command, CMD_RESET, 0, 250 * 1000);
+}
+
+/*
+ * Stop the HC from processing the endpoint queues.
+ */
+static void xhci_quiesce(struct xhci_hcd *xhci)
+{
+	/*
+	 * Queues are per endpoint, so we need to disable an endpoint or slot.
+	 *
+	 * To disable a slot, we need to insert a disable slot command on the
+	 * command ring and ring the doorbell.  This will also free any internal
+	 * resources associated with the slot (which might not be what we want).
+	 *
+	 * A Release Endpoint command sounds better - doesn't free internal HC
+	 * memory, but removes the endpoints from the schedule and releases the
+	 * bandwidth, disables the doorbells, and clears the endpoint enable
+	 * flag.  Usually used prior to a set interface command.
+	 *
+	 * TODO: Implement after command ring code is done.
+	 */
+	BUG_ON(!HC_IS_RUNNING(xhci_to_hcd(xhci)->state));
+	xhci_dbg(xhci, "Finished quiescing -- code not written yet\n");
+}
+
+#if 0
+/* Set up MSI-X table for entry 0 (may claim other entries later) */
+static int xhci_setup_msix(struct xhci_hcd *xhci)
+{
+	int ret;
+	struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
+
+	xhci->msix_count = 0;
+	/* XXX: did I do this right?  ixgbe does kcalloc for more than one */
+	xhci->msix_entries = kmalloc(sizeof(struct msix_entry), GFP_KERNEL);
+	if (!xhci->msix_entries) {
+		xhci_err(xhci, "Failed to allocate MSI-X entries\n");
+		return -ENOMEM;
+	}
+	xhci->msix_entries[0].entry = 0;
+
+	ret = pci_enable_msix(pdev, xhci->msix_entries, xhci->msix_count);
+	if (ret) {
+		xhci_err(xhci, "Failed to enable MSI-X\n");
+		goto free_entries;
+	}
+
+	/*
+	 * Pass the xhci pointer value as the request_irq "cookie".
+	 * If more irqs are added, this will need to be unique for each one.
+	 */
+	ret = request_irq(xhci->msix_entries[0].vector, &xhci_irq, 0,
+			"xHCI", xhci_to_hcd(xhci));
+	if (ret) {
+		xhci_err(xhci, "Failed to allocate MSI-X interrupt\n");
+		goto disable_msix;
+	}
+	xhci_dbg(xhci, "Finished setting up MSI-X\n");
+	return 0;
+
+disable_msix:
+	pci_disable_msix(pdev);
+free_entries:
+	kfree(xhci->msix_entries);
+	xhci->msix_entries = NULL;
+	return ret;
+}
+
+/* XXX: code duplication; can xhci_setup_msix call this? */
+/* Free any IRQs and disable MSI-X */
+static void xhci_cleanup_msix(struct xhci_hcd *xhci)
+{
+	struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
+	if (!xhci->msix_entries)
+		return;
+
+	free_irq(xhci->msix_entries[0].vector, xhci);
+	pci_disable_msix(pdev);
+	kfree(xhci->msix_entries);
+	xhci->msix_entries = NULL;
+	xhci_dbg(xhci, "Finished cleaning up MSI-X\n");
+}
+#endif
+
+/*
+ * Initialize memory for HCD and xHC (one-time init).
+ *
+ * Program the PAGESIZE register, initialize the device context array, create
+ * device contexts (?), set up a command ring segment (or two?), create event
+ * ring (one for now).
+ */
+int xhci_init(struct usb_hcd *hcd)
+{
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	int retval = 0;
+
+	xhci_dbg(xhci, "xhci_init\n");
+	spin_lock_init(&xhci->lock);
+	retval = xhci_mem_init(xhci, GFP_KERNEL);
+	xhci_dbg(xhci, "Finished xhci_init\n");
+
+	return retval;
+}
+
+/*
+ * Start the HC after it was halted.
+ *
+ * This function is called by the USB core when the HC driver is added.
+ * Its opposite is xhci_stop().
+ *
+ * xhci_init() must be called once before this function can be called.
+ * Reset the HC, enable device slot contexts, program DCBAAP, and
+ * set command ring pointer and event ring pointer.
+ *
+ * Setup MSI-X vectors and enable interrupts.
+ */
+int xhci_run(struct usb_hcd *hcd)
+{
+	u32 temp;
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	xhci_dbg(xhci, "xhci_run\n");
+
+#if 0	/* FIXME: MSI not setup yet */
+	/* Do this at the very last minute */
+	ret = xhci_setup_msix(xhci);
+	if (!ret)
+		return ret;
+
+	return -ENOSYS;
+#endif
+	xhci_dbg(xhci, "// Set the interrupt modulation register\n");
+	temp = xhci_readl(xhci, &xhci->ir_set->irq_control);
+	temp &= 0xffff;
+	temp |= (u32) 160;
+	xhci_writel(xhci, temp, &xhci->ir_set->irq_control);
+
+	/* Set the HCD state before we enable the irqs */
+	hcd->state = HC_STATE_RUNNING;
+	temp = xhci_readl(xhci, &xhci->op_regs->command);
+	temp |= (CMD_EIE);
+	xhci_dbg(xhci, "// Enable interrupts, cmd = 0x%x.\n",
+			temp);
+	xhci_writel(xhci, temp, &xhci->op_regs->command);
+
+	temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
+	xhci_dbg(xhci, "// Enabling event ring interrupter 0x%x"
+			" by writing 0x%x to irq_pending\n",
+			(unsigned int) xhci->ir_set,
+			(unsigned int) ER_IRQ_ENABLE(temp));
+	xhci_writel(xhci, ER_IRQ_ENABLE(temp),
+			&xhci->ir_set->irq_pending);
+	xhci_print_ir_set(xhci, xhci->ir_set, 0);
+
+	temp = xhci_readl(xhci, &xhci->op_regs->command);
+	temp |= (CMD_RUN);
+	xhci_dbg(xhci, "// Turn on HC, cmd = 0x%x.\n",
+			temp);
+	xhci_writel(xhci, temp, &xhci->op_regs->command);
+	/* Flush PCI posted writes */
+	temp = xhci_readl(xhci, &xhci->op_regs->command);
+	xhci_dbg(xhci, "// @%x = 0x%x\n",
+			(unsigned int) &xhci->op_regs->command, temp);
+
+	xhci_dbg(xhci, "Finished xhci_run\n");
+	return 0;
+}
+
+/*
+ * Stop xHCI driver.
+ *
+ * This function is called by the USB core when the HC driver is removed.
+ * Its opposite is xhci_run().
+ *
+ * Disable device contexts, disable IRQs, and quiesce the HC.
+ * Reset the HC, finish any completed transactions, and cleanup memory.
+ */
+void xhci_stop(struct usb_hcd *hcd)
+{
+	u32 temp;
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+	spin_lock_irq(&xhci->lock);
+	if (HC_IS_RUNNING(hcd->state))
+		xhci_quiesce(xhci);
+	xhci_halt(xhci);
+	xhci_reset(xhci);
+	spin_unlock_irq(&xhci->lock);
+
+#if 0	/* No MSI yet */
+	xhci_cleanup_msix(xhci);
+#endif
+	xhci_dbg(xhci, "// Disabling event ring interrupts\n");
+	temp = xhci_readl(xhci, &xhci->op_regs->status);
+	xhci_writel(xhci, temp & ~STS_EINT, &xhci->op_regs->status);
+	temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
+	xhci_writel(xhci, ER_IRQ_DISABLE(temp),
+			&xhci->ir_set->irq_pending);
+	xhci_print_ir_set(xhci, xhci->ir_set, 0);
+
+	xhci_dbg(xhci, "cleaning up memory\n");
+	xhci_mem_cleanup(xhci);
+	xhci_dbg(xhci, "xhci_stop completed - status = %x\n",
+		    xhci_readl(xhci, &xhci->op_regs->status));
+}
+
+/*
+ * Shutdown HC (not bus-specific)
+ *
+ * This is called when the machine is rebooting or halting.  We assume that the
+ * machine will be powered off, and the HC's internal state will be reset.
+ * Don't bother to free memory.
+ */
+void xhci_shutdown(struct usb_hcd *hcd)
+{
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+	spin_lock_irq(&xhci->lock);
+	xhci_halt(xhci);
+	spin_unlock_irq(&xhci->lock);
+
+#if 0
+	xhci_cleanup_msix(xhci);
+#endif
+
+	xhci_dbg(xhci, "xhci_shutdown completed - status = %x\n",
+		    xhci_readl(xhci, &xhci->op_regs->status));
+}
+
+int xhci_get_frame(struct usb_hcd *hcd)
+{
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	/* EHCI mods by the periodic size.  Why? */
+	return xhci_readl(xhci, &xhci->run_regs->microframe_index) >> 3;
+}
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE("GPL");
+
+static int __init xhci_hcd_init(void)
+{
+#ifdef CONFIG_PCI
+	int retval = 0;
+
+	retval = xhci_register_pci();
+
+	if (retval < 0) {
+		printk(KERN_DEBUG "Problem registering PCI driver.");
+		return retval;
+	}
+#endif
+	return 0;
+}
+module_init(xhci_hcd_init);
+
+static void __exit xhci_hcd_cleanup(void)
+{
+#ifdef CONFIG_PCI
+	xhci_unregister_pci();
+#endif
+}
+module_exit(xhci_hcd_cleanup);
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
new file mode 100644
index 00000000000..0e383f9c380
--- /dev/null
+++ b/drivers/usb/host/xhci-mem.c
@@ -0,0 +1,75 @@
+/*
+ * xHCI host controller driver
+ *
+ * Copyright (C) 2008 Intel Corp.
+ *
+ * Author: Sarah Sharp
+ * Some code borrowed from the Linux EHCI driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/usb.h>
+
+#include "xhci.h"
+
+void xhci_mem_cleanup(struct xhci_hcd *xhci)
+{
+	xhci->page_size = 0;
+	xhci->page_shift = 0;
+}
+
+int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
+{
+	unsigned int	val, val2;
+	u32 page_size;
+	int i;
+
+	page_size = xhci_readl(xhci, &xhci->op_regs->page_size);
+	xhci_dbg(xhci, "Supported page size register = 0x%x\n", page_size);
+	for (i = 0; i < 16; i++) {
+		if ((0x1 & page_size) != 0)
+			break;
+		page_size = page_size >> 1;
+	}
+	if (i < 16)
+		xhci_dbg(xhci, "Supported page size of %iK\n", (1 << (i+12)) / 1024);
+	else
+		xhci_warn(xhci, "WARN: no supported page size\n");
+	/* Use 4K pages, since that's common and the minimum the HC supports */
+	xhci->page_shift = 12;
+	xhci->page_size = 1 << xhci->page_shift;
+	xhci_dbg(xhci, "HCD page size set to %iK\n", xhci->page_size / 1024);
+
+	/*
+	 * Program the Number of Device Slots Enabled field in the CONFIG
+	 * register with the max value of slots the HC can handle.
+	 */
+	val = HCS_MAX_SLOTS(xhci_readl(xhci, &xhci->cap_regs->hcs_params1));
+	xhci_dbg(xhci, "// xHC can handle at most %d device slots.\n",
+			(unsigned int) val);
+	val2 = xhci_readl(xhci, &xhci->op_regs->config_reg);
+	val |= (val2 & ~HCS_SLOTS_MASK);
+	xhci_dbg(xhci, "// Setting Max device slots reg = 0x%x.\n",
+			(unsigned int) val);
+	xhci_writel(xhci, val, &xhci->op_regs->config_reg);
+
+	xhci->ir_set = &xhci->run_regs->ir_set[0];
+
+	return 0;
+fail:
+	xhci_warn(xhci, "Couldn't initialize memory\n");
+	xhci_mem_cleanup(xhci);
+	return -ENOMEM;
+}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
new file mode 100644
index 00000000000..4015082adf6
--- /dev/null
+++ b/drivers/usb/host/xhci-pci.c
@@ -0,0 +1,150 @@
+/*
+ * xHCI host controller driver PCI Bus Glue.
+ *
+ * Copyright (C) 2008 Intel Corp.
+ *
+ * Author: Sarah Sharp
+ * Some code borrowed from the Linux EHCI driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/pci.h>
+
+#include "xhci.h"
+
+static const char hcd_name[] = "xhci_hcd";
+
+/* called after powerup, by probe or system-pm "wakeup" */
+static int xhci_pci_reinit(struct xhci_hcd *xhci, struct pci_dev *pdev)
+{
+	/*
+	 * TODO: Implement finding debug ports later.
+	 * TODO: see if there are any quirks that need to be added to handle
+	 * new extended capabilities.
+	 */
+
+	/* PCI Memory-Write-Invalidate cycle support is optional (uncommon) */
+	if (!pci_set_mwi(pdev))
+		xhci_dbg(xhci, "MWI active\n");
+
+	xhci_dbg(xhci, "Finished xhci_pci_reinit\n");
+	return 0;
+}
+
+/* called during probe() after chip reset completes */
+static int xhci_pci_setup(struct usb_hcd *hcd)
+{
+	struct xhci_hcd		*xhci = hcd_to_xhci(hcd);
+	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
+	int			retval;
+
+	xhci->cap_regs = hcd->regs;
+	xhci->op_regs = hcd->regs +
+		HC_LENGTH(xhci_readl(xhci, &xhci->cap_regs->hc_capbase));
+	xhci->run_regs = hcd->regs +
+		(xhci_readl(xhci, &xhci->cap_regs->run_regs_off) & RTSOFF_MASK);
+	/* Cache read-only capability registers */
+	xhci->hcs_params1 = xhci_readl(xhci, &xhci->cap_regs->hcs_params1);
+	xhci->hcs_params2 = xhci_readl(xhci, &xhci->cap_regs->hcs_params2);
+	xhci->hcs_params3 = xhci_readl(xhci, &xhci->cap_regs->hcs_params3);
+	xhci->hcc_params = xhci_readl(xhci, &xhci->cap_regs->hcc_params);
+	xhci_print_registers(xhci);
+
+	/* Make sure the HC is halted. */
+	retval = xhci_halt(xhci);
+	if (retval)
+		return retval;
+
+	xhci_dbg(xhci, "Resetting HCD\n");
+	/* Reset the internal HC memory state and registers. */
+	retval = xhci_reset(xhci);
+	if (retval)
+		return retval;
+	xhci_dbg(xhci, "Reset complete\n");
+
+	xhci_dbg(xhci, "Calling HCD init\n");
+	/* Initialize HCD and host controller data structures. */
+	retval = xhci_init(hcd);
+	if (retval)
+		return retval;
+	xhci_dbg(xhci, "Called HCD init\n");
+
+	pci_read_config_byte(pdev, XHCI_SBRN_OFFSET, &xhci->sbrn);
+	xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn);
+
+	/* Find any debug ports */
+	return xhci_pci_reinit(xhci, pdev);
+}
+
+static const struct hc_driver xhci_pci_hc_driver = {
+	.description =		hcd_name,
+	.product_desc =		"xHCI Host Controller",
+	.hcd_priv_size =	sizeof(struct xhci_hcd),
+
+	/*
+	 * generic hardware linkage
+	 */
+	.flags =		HCD_MEMORY | HCD_USB3,
+
+	/*
+	 * basic lifecycle operations
+	 */
+	.reset =		xhci_pci_setup,
+	.start =		xhci_run,
+	/* suspend and resume implemented later */
+	.stop =			xhci_stop,
+	.shutdown =		xhci_shutdown,
+
+	/*
+	 * scheduling support
+	 */
+	.get_frame_number =	xhci_get_frame,
+
+	/* Implement root hub support later. */
+};
+
+/*-------------------------------------------------------------------------*/
+
+/* PCI driver selection metadata; PCI hotplugging uses this */
+static const struct pci_device_id pci_ids[] = { {
+	/* handle any USB 3.0 xHCI controller */
+	PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_XHCI, ~0),
+	.driver_data =	(unsigned long) &xhci_pci_hc_driver,
+	},
+	{ /* end: all zeroes */ }
+};
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+/* pci driver glue; this is a "new style" PCI driver module */
+static struct pci_driver xhci_pci_driver = {
+	.name =		(char *) hcd_name,
+	.id_table =	pci_ids,
+
+	.probe =	usb_hcd_pci_probe,
+	.remove =	usb_hcd_pci_remove,
+	/* suspend and resume implemented later */
+
+	.shutdown = 	usb_hcd_pci_shutdown,
+};
+
+int xhci_register_pci()
+{
+	return pci_register_driver(&xhci_pci_driver);
+}
+
+void xhci_unregister_pci()
+{
+	pci_unregister_driver(&xhci_pci_driver);
+}
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index a4d44aad069..59fae2e5ea5 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -32,6 +32,9 @@
 /* xHCI PCI Configuration Registers */
 #define XHCI_SBRN_OFFSET	(0x60)
 
+/* Max number of USB devices for any host controller - limit in section 6.1 */
+#define MAX_HC_SLOTS		256
+
 /*
  * xHCI register interface.
  * This corresponds to the eXtensible Host Controller Interface (xHCI)
@@ -359,10 +362,35 @@ struct intr_reg {
 	u32	erst_dequeue[2];
 } __attribute__ ((packed));
 
+/* irq_pending bitmasks */
 #define	ER_IRQ_PENDING(p)	((p) & 0x1)
-#define	ER_IRQ_ENABLE(p)	((p) | 0x2)
+/* bits 2:31 need to be preserved */
+#define	ER_IRQ_CLEAR(p)		((p) & 0xfffffffe)
+#define	ER_IRQ_ENABLE(p)	((ER_IRQ_CLEAR(p)) | 0x2)
+#define	ER_IRQ_DISABLE(p)	((ER_IRQ_CLEAR(p)) & ~(0x2))
+
+/* irq_control bitmasks */
+/* Minimum interval between interrupts (in 250ns intervals).  The interval
+ * between interrupts will be longer if there are no events on the event ring.
+ * Default is 4000 (1 ms).
+ */
+#define ER_IRQ_INTERVAL_MASK	(0xffff)
+/* Counter used to count down the time to the next interrupt - HW use only */
+#define ER_IRQ_COUNTER_MASK	(0xffff << 16)
+
+/* erst_size bitmasks */
 /* Preserve bits 16:31 of erst_size */
-#define	ERST_SIZE_MASK	(0xffff<<16)
+#define	ERST_SIZE_MASK		(0xffff << 16)
+
+/* erst_dequeue bitmasks */
+/* Dequeue ERST Segment Index (DESI) - Segment number (or alias)
+ * where the current dequeue pointer lies.  This is an optional HW hint.
+ */
+#define ERST_DESI_MASK		(0x7)
+/* Event Handler Busy (EHB) - is the event ring scheduled to be serviced by
+ * a work queue (or delayed service routine)?
+ */
+#define ERST_EHB		(1 << 3)
 
 /**
  * struct xhci_run_regs
@@ -386,6 +414,8 @@ struct xhci_hcd {
 	struct xhci_cap_regs __iomem *cap_regs;
 	struct xhci_op_regs __iomem *op_regs;
 	struct xhci_run_regs __iomem *run_regs;
+	/* Our HCD's current interrupter register set */
+	struct	intr_reg __iomem *ir_set;
 
 	/* Cached register copies of read-only HC data */
 	__u32		hcs_params1;
@@ -404,7 +434,13 @@ struct xhci_hcd {
 	u8		isoc_threshold;
 	int		event_ring_max;
 	int		addr_64;
+	/* 4KB min, 128MB max */
 	int		page_size;
+	/* Valid values are 12 to 20, inclusive */
+	int		page_shift;
+	/* only one MSI vector for now, but might need more later */
+	int		msix_count;
+	struct msix_entry	*msix_entries;
 };
 
 /* convert between an HCD pointer and the corresponding EHCI_HCD */
@@ -449,4 +485,27 @@ static inline void xhci_writel(const struct xhci_hcd *xhci,
 	writel(val, regs);
 }
 
+/* xHCI debugging */
+void xhci_print_ir_set(struct xhci_hcd *xhci, struct intr_reg *ir_set, int set_num);
+void xhci_print_registers(struct xhci_hcd *xhci);
+
+/* xHCI memory managment */
+void xhci_mem_cleanup(struct xhci_hcd *xhci);
+int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags);
+
+#ifdef CONFIG_PCI
+/* xHCI PCI glue */
+int xhci_register_pci(void);
+void xhci_unregister_pci(void);
+#endif
+
+/* xHCI host controller glue */
+int xhci_halt(struct xhci_hcd *xhci);
+int xhci_reset(struct xhci_hcd *xhci);
+int xhci_init(struct usb_hcd *hcd);
+int xhci_run(struct usb_hcd *hcd);
+void xhci_stop(struct usb_hcd *hcd);
+void xhci_shutdown(struct usb_hcd *hcd);
+int xhci_get_frame(struct usb_hcd *hcd);
+
 #endif /* __LINUX_XHCI_HCD_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index aa01d38c997..a3b00036579 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -104,6 +104,7 @@
 #define PCI_CLASS_SERIAL_USB_UHCI	0x0c0300
 #define PCI_CLASS_SERIAL_USB_OHCI	0x0c0310
 #define PCI_CLASS_SERIAL_USB_EHCI	0x0c0320
+#define PCI_CLASS_SERIAL_USB_XHCI	0x0c0330
 #define PCI_CLASS_SERIAL_FIBER		0x0c04
 #define PCI_CLASS_SERIAL_SMBUS		0x0c05
 
-- 
cgit v1.2.3-70-g09d2


From 6b403b020c1f42180b14d28d832da61167cff822 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:54:10 -0700
Subject: USB: Add SuperSpeed to the list of USB device speeds.

Modify the USB core to handle the new USB 3.0 speed, "SuperSpeed".  This
is 5.0 Gbps (wire speed).  There are probably more places that check for
speed that I've missed.

SuperSpeed devices have a 512 byte endpoint 0 max packet size.  This shows
up as a bMaxPacketSize0 set to 0x09 (see table 9-8 of the USB 3.0 bus
spec).

xHCI spec says that the xHC can handle intervals up to 2^15 microframes.  That
might change when real silicon becomes available.

Add FIXME note for SuperSpeed isochronous endpoints.  They can transmit up
to 16 packets in one "burst" before they wait for an acknowledgment of the
packets.  They can do up to 3 bursts per microframe (determined by the
mult value in the endpoint companion descriptor).  The xHCI driver doesn't
have support for isoc yet, so fix this later.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/config.c |  1 +
 drivers/usb/core/hcd.c    | 16 ++++++++++++++--
 drivers/usb/core/hcd.h    |  1 +
 drivers/usb/core/hub.c    | 11 +++++++++--
 drivers/usb/core/urb.c    |  6 ++++++
 include/linux/usb/ch9.h   |  1 +
 6 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 568244c99bd..e9426acf568 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -92,6 +92,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 	if (usb_endpoint_xfer_int(d)) {
 		i = 1;
 		switch (to_usb_device(ddev)->speed) {
+		case USB_SPEED_SUPER:
 		case USB_SPEED_HIGH:
 			/* Many device manufacturers are using full-speed
 			 * bInterval values in high-speed interrupt endpoint
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index c65d9560402..120bb56c4b8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1870,8 +1870,20 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		retval = -ENOMEM;
 		goto err_allocate_root_hub;
 	}
-	rhdev->speed = (hcd->driver->flags & HCD_USB2) ? USB_SPEED_HIGH :
-			USB_SPEED_FULL;
+
+	switch (hcd->driver->flags & HCD_MASK) {
+	case HCD_USB11:
+		rhdev->speed = USB_SPEED_FULL;
+		break;
+	case HCD_USB2:
+		rhdev->speed = USB_SPEED_HIGH;
+		break;
+	case HCD_USB3:
+		rhdev->speed = USB_SPEED_SUPER;
+		break;
+	default:
+		goto err_allocate_root_hub;
+	}
 	hcd->self.root_hub = rhdev;
 
 	/* wakeup flag init defaults to "everything works" for root hubs,
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 7a47498b0aa..4f6ee60d97c 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -174,6 +174,7 @@ struct hc_driver {
 #define	HCD_USB11	0x0010		/* USB 1.1 */
 #define	HCD_USB2	0x0020		/* USB 2.0 */
 #define	HCD_USB3	0x0040		/* USB 3.0 */
+#define	HCD_MASK	0x0070
 
 	/* called to init HCD and root hub */
 	int	(*reset) (struct usb_hcd *hcd);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index c7a1a1d4bcb..fa0208e0da6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2471,6 +2471,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	 * reported as 0xff in the device descriptor). WUSB1.0[4.8.1].
 	 */
 	switch (udev->speed) {
+	case USB_SPEED_SUPER:
 	case USB_SPEED_VARIABLE:	/* fixed at 512 */
 		udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512);
 		break;
@@ -2496,6 +2497,9 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	case USB_SPEED_LOW:	speed = "low";	break;
 	case USB_SPEED_FULL:	speed = "full";	break;
 	case USB_SPEED_HIGH:	speed = "high";	break;
+	case USB_SPEED_SUPER:
+				speed = "super";
+				break;
 	case USB_SPEED_VARIABLE:
 				speed = "variable";
 				type = "Wireless ";
@@ -2634,8 +2638,11 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	if (retval)
 		goto fail;
 
-	i = udev->descriptor.bMaxPacketSize0 == 0xff?	/* wusb device? */
-	    512 : udev->descriptor.bMaxPacketSize0;
+	if (udev->descriptor.bMaxPacketSize0 == 0xff ||
+			udev->speed == USB_SPEED_SUPER)
+		i = 512;
+	else
+		i = udev->descriptor.bMaxPacketSize0;
 	if (le16_to_cpu(udev->ep0.desc.wMaxPacketSize) != i) {
 		if (udev->speed != USB_SPEED_FULL ||
 				!(i == 8 || i == 16 || i == 32 || i == 64)) {
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 3376055f36e..02eb0ef7a4c 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -351,6 +351,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	if (xfertype == USB_ENDPOINT_XFER_ISOC) {
 		int	n, len;
 
+		/* FIXME SuperSpeed isoc endpoints have up to 16 bursts */
 		/* "high bandwidth" mode, 1-3 packets/uframe? */
 		if (dev->speed == USB_SPEED_HIGH) {
 			int	mult = 1 + ((max >> 11) & 0x03);
@@ -426,6 +427,11 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 			return -EINVAL;
 		/* too big? */
 		switch (dev->speed) {
+		case USB_SPEED_SUPER:	/* units are 125us */
+			/* Handle up to 2^(16-1) microframes */
+			if (urb->interval > (1 << 15))
+				return -EINVAL;
+			max = 1 << 15;
 		case USB_SPEED_HIGH:	/* units are microframes */
 			/* NOTE usb handles 2^15 */
 			if (urb->interval > (1024 * 8))
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index b145119a90d..93bfe635234 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -752,6 +752,7 @@ enum usb_device_speed {
 	USB_SPEED_LOW, USB_SPEED_FULL,		/* usb 1.1 */
 	USB_SPEED_HIGH,				/* usb 2.0 */
 	USB_SPEED_VARIABLE,			/* wireless (usb 2.5) */
+	USB_SPEED_SUPER,			/* usb 3.0 */
 };
 
 enum usb_device_state {
-- 
cgit v1.2.3-70-g09d2


From 7206b00164a1c3ca533e01db285955617e1019f8 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:54:49 -0700
Subject: USB: Add route string to struct usb_device.

This patch adds a hex route string to each USB device.  The route string is used
by the USB 3.0 host controller to send packets through the device tree.  USB 3.0
hubs use this string to route packets to the correct port.  This is fundamental
bus change from USB 2.0, where all packets were broadcast across the bus.

Devices (including hubs) under a root port receive the route string 0x0.  Every
four bits in the route string represent a port on a hub.  This length works
because USB 3.0 hubs are limited to 15 ports, and USB 2.0 hubs (with potentially
more ports) will never see packets with a route string.  A port number of 0
means the packet is destined for that hub.

For example, a peripheral device might have a route string of 0x00097.
This means the device is connected to port 9 of the hub at depth 1.
The hub at depth 1 is connected to port 7 of a hub at depth 0.
The hub at depth 0 is connected to a root port.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 10 ++++++++--
 include/linux/usb.h    |  2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 020b58528d9..f026991d0bd 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -375,18 +375,24 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 	 */
 	if (unlikely(!parent)) {
 		dev->devpath[0] = '0';
+		dev->route = 0;
 
 		dev->dev.parent = bus->controller;
 		dev_set_name(&dev->dev, "usb%d", bus->busnum);
 		root_hub = 1;
 	} else {
 		/* match any labeling on the hubs; it's one-based */
-		if (parent->devpath[0] == '0')
+		if (parent->devpath[0] == '0') {
 			snprintf(dev->devpath, sizeof dev->devpath,
 				"%d", port1);
-		else
+			/* Root ports are not counted in route string */
+			dev->route = 0;
+		} else {
 			snprintf(dev->devpath, sizeof dev->devpath,
 				"%s.%d", parent->devpath, port1);
+			dev->route = parent->route +
+				(port1 << ((parent->level - 1)*4));
+		}
 
 		dev->dev.parent = &parent->dev;
 		dev_set_name(&dev->dev, "%d-%s", bus->busnum, dev->devpath);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index fb1f2a32ae0..2b380a16c62 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -362,6 +362,7 @@ struct usb_tt;
  * struct usb_device - kernel's representation of a USB device
  * @devnum: device number; address on a USB bus
  * @devpath: device ID string for use in messages (e.g., /port/...)
+ * @route: tree topology hex string for use with xHCI
  * @state: device state: configured, not attached, etc.
  * @speed: device speed: high/full/low (or error)
  * @tt: Transaction Translator info; used with low/full speed dev, highspeed hub
@@ -427,6 +428,7 @@ struct usb_tt;
 struct usb_device {
 	int		devnum;
 	char		devpath [16];
+	u32		route;
 	enum usb_device_state	state;
 	enum usb_device_speed	speed;
 
-- 
cgit v1.2.3-70-g09d2


From c6515272b858742962c1de0f3bf497a048b9abd7 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:57:26 -0700
Subject: USB: Support for addressing a USB device under xHCI

Add host controller driver API and a slot_id variable to struct
usb_device.  This allows the xHCI host controller driver to ask the
hardware to allocate a slot for the device when a struct usb_device is
allocated.  The slot needs to be allocated at that point because the
hardware can run out of internal resources, and we want to know that very
early in the device connection process.  Don't call this new API for root
hubs, since they aren't real devices.

Add HCD API to let the host controller choose the device address.  This is
especially important for xHCI hardware running in a virtualized
environment.  The guests running under the VM don't need to know which
addresses on the bus are taken, because the hardware picks the address for
them.  Announce SuperSpeed USB devices after the address has been assigned
by the hardware.

Don't use the new get descriptor/set address scheme with xHCI.  Unless
special handling is done in the host controller driver, the xHC can't
issue control transfers before you set the device address.  Support for
the older addressing scheme will be added when the xHCI driver supports
the Block Set Address Request (BSR) flag in the Address Device command.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.h |  8 ++++++
 drivers/usb/core/hub.c | 74 +++++++++++++++++++++++++++++++++++++-------------
 drivers/usb/core/usb.c | 14 +++++++++-
 include/linux/usb.h    |  2 ++
 4 files changed, 78 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 4f6ee60d97c..ae6d9db41ca 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -226,6 +226,14 @@ struct hc_driver {
 	void	(*relinquish_port)(struct usb_hcd *, int);
 		/* has a port been handed over to a companion? */
 	int	(*port_handed_over)(struct usb_hcd *, int);
+
+	/* xHCI specific functions */
+		/* Called by usb_alloc_dev to alloc HC device structures */
+	int	(*alloc_dev)(struct usb_hcd *, struct usb_device *);
+		/* Called by usb_release_dev to free HC device structures */
+	void	(*free_dev)(struct usb_hcd *, struct usb_device *);
+		/* Returns the hardware-chosen device address */
+	int	(*address_device)(struct usb_hcd *, struct usb_device *udev);
 };
 
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 3c28bde6cbd..2af3b4f0605 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1328,6 +1328,11 @@ EXPORT_SYMBOL_GPL(usb_set_device_state);
  * 0 is reserved by USB for default address; (b) Linux's USB stack
  * uses always #1 for the root hub of the controller. So USB stack's
  * port #1, which is wusb virtual-port #0 has address #2.
+ *
+ * Devices connected under xHCI are not as simple.  The host controller
+ * supports virtualization, so the hardware assigns device addresses and
+ * the HCD must setup data structures before issuing a set address
+ * command to the hardware.
  */
 static void choose_address(struct usb_device *udev)
 {
@@ -1647,6 +1652,9 @@ int usb_new_device(struct usb_device *udev)
 	err = usb_configure_device(udev);	/* detect & probe dev/intfs */
 	if (err < 0)
 		goto fail;
+	dev_dbg(&udev->dev, "udev %d, busnum %d, minor = %d\n",
+			udev->devnum, udev->bus->busnum,
+			(((udev->bus->busnum-1) * 128) + (udev->devnum-1)));
 	/* export the usbdev device-node for libusb */
 	udev->dev.devt = MKDEV(USB_DEVICE_MAJOR,
 			(((udev->bus->busnum-1) * 128) + (udev->devnum-1)));
@@ -2400,19 +2408,29 @@ EXPORT_SYMBOL_GPL(usb_ep0_reinit);
 static int hub_set_address(struct usb_device *udev, int devnum)
 {
 	int retval;
+	struct usb_hcd *hcd = bus_to_hcd(udev->bus);
 
-	if (devnum <= 1)
+	/*
+	 * The host controller will choose the device address,
+	 * instead of the core having chosen it earlier
+	 */
+	if (!hcd->driver->address_device && devnum <= 1)
 		return -EINVAL;
 	if (udev->state == USB_STATE_ADDRESS)
 		return 0;
 	if (udev->state != USB_STATE_DEFAULT)
 		return -EINVAL;
-	retval = usb_control_msg(udev, usb_sndaddr0pipe(),
-		USB_REQ_SET_ADDRESS, 0, devnum, 0,
-		NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (hcd->driver->address_device) {
+		retval = hcd->driver->address_device(hcd, udev);
+	} else {
+		retval = usb_control_msg(udev, usb_sndaddr0pipe(),
+				USB_REQ_SET_ADDRESS, 0, devnum, 0,
+				NULL, 0, USB_CTRL_SET_TIMEOUT);
+		if (retval == 0)
+			update_address(udev, devnum);
+	}
 	if (retval == 0) {
 		/* Device now using proper address. */
-		update_address(udev, devnum);
 		usb_set_device_state(udev, USB_STATE_ADDRESS);
 		usb_ep0_reinit(udev);
 	}
@@ -2525,10 +2543,11 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 				break;
 	default: 		speed = "?";	break;
 	}
-	dev_info (&udev->dev,
-		  "%s %s speed %sUSB device using %s and address %d\n",
-		  (udev->config) ? "reset" : "new", speed, type,
-		  udev->bus->controller->driver->name, devnum);
+	if (udev->speed != USB_SPEED_SUPER)
+		dev_info(&udev->dev,
+				"%s %s speed %sUSB device using %s and address %d\n",
+				(udev->config) ? "reset" : "new", speed, type,
+				udev->bus->controller->driver->name, devnum);
 
 	/* Set up TT records, if needed  */
 	if (hdev->tt) {
@@ -2553,7 +2572,11 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	 * value.
 	 */
 	for (i = 0; i < GET_DESCRIPTOR_TRIES; (++i, msleep(100))) {
-		if (USE_NEW_SCHEME(retry_counter)) {
+		/*
+		 * An xHCI controller cannot send any packets to a device until
+		 * a set address command successfully completes.
+		 */
+		if (USE_NEW_SCHEME(retry_counter) && !(hcd->driver->flags & HCD_USB3)) {
 			struct usb_device_descriptor *buf;
 			int r = 0;
 
@@ -2619,7 +2642,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
  		 * unauthorized address in the Connect Ack sequence;
  		 * authorization will assign the final address.
  		 */
- 		if (udev->wusb == 0) {
+		if (udev->wusb == 0) {
 			for (j = 0; j < SET_ADDRESS_TRIES; ++j) {
 				retval = hub_set_address(udev, devnum);
 				if (retval >= 0)
@@ -2632,13 +2655,20 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 					devnum, retval);
 				goto fail;
 			}
+			if (udev->speed == USB_SPEED_SUPER) {
+				devnum = udev->devnum;
+				dev_info(&udev->dev,
+						"%s SuperSpeed USB device using %s and address %d\n",
+						(udev->config) ? "reset" : "new",
+						udev->bus->controller->driver->name, devnum);
+			}
 
 			/* cope with hardware quirkiness:
 			 *  - let SET_ADDRESS settle, some device hardware wants it
 			 *  - read ep0 maxpacket even for high and low speed,
 			 */
 			msleep(10);
-			if (USE_NEW_SCHEME(retry_counter))
+			if (USE_NEW_SCHEME(retry_counter) && !(hcd->driver->flags & HCD_USB3))
 				break;
   		}
 
@@ -2877,13 +2907,6 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 		udev->level = hdev->level + 1;
 		udev->wusb = hub_is_wusb(hub);
 
-		/* set the address */
-		choose_address(udev);
-		if (udev->devnum <= 0) {
-			status = -ENOTCONN;	/* Don't retry */
-			goto loop;
-		}
-
 		/*
 		 * USB 3.0 devices are reset automatically before the connect
 		 * port status change appears, and the root hub port status
@@ -2901,6 +2924,19 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 		else
 			udev->speed = USB_SPEED_UNKNOWN;
 
+		/*
+		 * xHCI needs to issue an address device command later
+		 * in the hub_port_init sequence for SS/HS/FS/LS devices.
+		 */
+		if (!(hcd->driver->flags & HCD_USB3)) {
+			/* set the address */
+			choose_address(udev);
+			if (udev->devnum <= 0) {
+				status = -ENOTCONN;	/* Don't retry */
+				goto loop;
+			}
+		}
+
 		/* reset (non-USB 3.0 devices) and get descriptor */
 		status = hub_port_init(hub, udev, port1, i);
 		if (status < 0)
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index f026991d0bd..55b8d3a22d2 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -184,11 +184,16 @@ EXPORT_SYMBOL_GPL(usb_find_interface);
 static void usb_release_dev(struct device *dev)
 {
 	struct usb_device *udev;
+	struct usb_hcd *hcd;
 
 	udev = to_usb_device(dev);
+	hcd = bus_to_hcd(udev->bus);
 
 	usb_destroy_configuration(udev);
-	usb_put_hcd(bus_to_hcd(udev->bus));
+	/* Root hubs aren't real devices, so don't free HCD resources */
+	if (hcd->driver->free_dev && udev->parent)
+		hcd->driver->free_dev(hcd, udev);
+	usb_put_hcd(hcd);
 	kfree(udev->product);
 	kfree(udev->manufacturer);
 	kfree(udev->serial);
@@ -348,6 +353,13 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 		kfree(dev);
 		return NULL;
 	}
+	/* Root hubs aren't true devices, so don't allocate HCD resources */
+	if (usb_hcd->driver->alloc_dev && parent &&
+		!usb_hcd->driver->alloc_dev(usb_hcd, dev)) {
+		usb_put_hcd(bus_to_hcd(bus));
+		kfree(dev);
+		return NULL;
+	}
 
 	device_initialize(&dev->dev);
 	dev->dev.bus = &usb_bus_type;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2b380a16c62..475cb75cc37 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -420,6 +420,7 @@ struct usb_tt;
  * @skip_sys_resume: skip the next system resume
  * @wusb_dev: if this is a Wireless USB device, link to the WUSB
  *	specific data for the device.
+ * @slot_id: Slot ID assigned by xHCI
  *
  * Notes:
  * Usbcore drivers should not set usbdev->state directly.  Instead use
@@ -504,6 +505,7 @@ struct usb_device {
 	unsigned skip_sys_resume:1;
 #endif
 	struct wusb_dev *wusb_dev;
+	int slot_id;
 };
 #define	to_usb_device(d) container_of(d, struct usb_device, dev)
 
-- 
cgit v1.2.3-70-g09d2


From 6d65b78a093552fb42448480d4c66bf093a6d4cf Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:57:50 -0700
Subject: USB: Support for submitting control URBs under xHCI.

Warn users of URB_NO_SETUP_DMA_MAP about xHCI behavior.

Device drivers can choose to DMA map the setup packet of a control transfer
before submitting the URB to the USB core.  Drivers then set the
URB_NO_SETUP_DMA_MAP and pass in the DMA memory address in setup_dma, instead of
providing a kernel address for setup_packet.  However, xHCI requires that the
setup packet be copied into an internal data structure, and we need a kernel
memory address pointer for that.  Warn users of URB_NO_SETUP_DMA_MAP that they
should provide a valid pointer for setup_packet, along with the DMA address.

FIXME:  I'm not entirely sure how to work around this in the xHCI driver
or USB core.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 475cb75cc37..112a2d6e922 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1044,7 +1044,9 @@ typedef void (*usb_complete_t)(struct urb *);
  * @setup_dma: For control transfers with URB_NO_SETUP_DMA_MAP set, the
  *	device driver has provided this DMA address for the setup packet.
  *	The host controller driver should use this in preference to
- *	setup_packet.
+ *	setup_packet, but the HCD may chose to ignore the address if it must
+ *	copy the setup packet into internal structures.  Therefore, setup_packet
+ *	must always point to a valid buffer.
  * @start_frame: Returns the initial frame for isochronous transfers.
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
-- 
cgit v1.2.3-70-g09d2


From 663c30d0829d556efabd5fbd98fb8473da7fe694 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:58:14 -0700
Subject: USB: Parse and store the SuperSpeed endpoint companion descriptors.

The USB 3.0 bus specification added an "Endpoint Companion" descriptor that is
supposed to follow all SuperSpeed Endpoint descriptors.  This descriptor is used
to extend the bus protocol to allow more packets to be sent to an endpoint per
"microframe".  The word microframe was removed from the USB 3.0 specification
because the host controller does not send Start Of Frame (SOF) symbols down the
USB 3.0 wires.

The descriptor defines a bMaxBurst field, which indicates the number of packets
of wMaxPacketSize that a SuperSpeed device can send or recieve in a service
interval.  All non-control endpoints may set this value as high as 16 packets
(bMaxBurst = 15).

The descriptor also allows isochronous endpoints to further specify that they
can send and receive multiple bursts per service interval.  The bmAttributes
allows them to specify a "Mult" of up to 3 (bmAttributes = 2).

Bulk endpoints use bmAttributes to report the number of "Streams" they support.
This was an extension of the endpoint pipe concept to allow multiple mass
storage device commands to be outstanding for one bulk endpoint at a time.  This
should allow USB 3.0 mass storage devices to support SCSI command queueing.
Bulk endpoints can say they support up to 2^16 (65,536) streams.

The information in the endpoint companion descriptor must be stored with the
other device, config, interface, and endpoint descriptors because the host
controller needs to access them quickly, and we need to install some default
values if a SuperSpeed device doesn't provide an endpoint companion descriptor.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/config.c | 189 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/usb.h       |  16 ++++
 include/linux/usb/ch9.h   |  16 ++++
 3 files changed, 212 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index e9426acf568..7103758bb48 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -19,6 +19,32 @@ static inline const char *plural(int n)
 	return (n == 1 ? "" : "s");
 }
 
+/* FIXME: this is a kludge */
+static int find_next_descriptor_more(unsigned char *buffer, int size,
+    int dt1, int dt2, int dt3, int *num_skipped)
+{
+	struct usb_descriptor_header *h;
+	int n = 0;
+	unsigned char *buffer0 = buffer;
+
+	/* Find the next descriptor of type dt1 or dt2 or dt3 */
+	while (size > 0) {
+		h = (struct usb_descriptor_header *) buffer;
+		if (h->bDescriptorType == dt1 || h->bDescriptorType == dt2 ||
+				h->bDescriptorType == dt3)
+			break;
+		buffer += h->bLength;
+		size -= h->bLength;
+		++n;
+	}
+
+	/* Store the number of descriptors skipped and return the
+	 * number of bytes skipped */
+	if (num_skipped)
+		*num_skipped = n;
+	return buffer - buffer0;
+}
+
 static int find_next_descriptor(unsigned char *buffer, int size,
     int dt1, int dt2, int *num_skipped)
 {
@@ -43,6 +69,128 @@ static int find_next_descriptor(unsigned char *buffer, int size,
 	return buffer - buffer0;
 }
 
+static int usb_parse_endpoint_companion(struct device *ddev, int cfgno,
+		int inum, int asnum, struct usb_host_endpoint *ep,
+		int num_ep, unsigned char *buffer, int size)
+{
+	unsigned char *buffer_start = buffer;
+	struct usb_ep_comp_descriptor	*desc;
+	int retval;
+	int num_skipped;
+	int max_tx;
+	int i;
+
+	/* Allocate space for the companion descriptor */
+	ep->ep_comp = kzalloc(sizeof(struct usb_host_ep_comp), GFP_KERNEL);
+	if (!ep->ep_comp)
+		return -ENOMEM;
+	desc = (struct usb_ep_comp_descriptor *) buffer;
+	if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP) {
+		dev_warn(ddev, "No SuperSpeed endpoint companion for config %d "
+				" interface %d altsetting %d ep %d: "
+				"using minimum values\n",
+				cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		ep->ep_comp->desc.bLength = USB_DT_EP_COMP_SIZE;
+		ep->ep_comp->desc.bDescriptorType = USB_DT_SS_ENDPOINT_COMP;
+		ep->ep_comp->desc.bMaxBurst = 0;
+		/*
+		 * Leave bmAttributes as zero, which will mean no streams for
+		 * bulk, and isoc won't support multiple bursts of packets.
+		 * With bursts of only one packet, and a Mult of 1, the max
+		 * amount of data moved per endpoint service interval is one
+		 * packet.
+		 */
+		if (usb_endpoint_xfer_isoc(&ep->desc) ||
+				usb_endpoint_xfer_int(&ep->desc))
+			ep->ep_comp->desc.wBytesPerInterval =
+				ep->desc.wMaxPacketSize;
+		/*
+		 * The next descriptor is for an Endpoint or Interface,
+		 * no extra descriptors to copy into the companion structure,
+		 * and we didn't eat up any of the buffer.
+		 */
+		retval = 0;
+		goto valid;
+	}
+	memcpy(&ep->ep_comp->desc, desc, USB_DT_EP_COMP_SIZE);
+	desc = &ep->ep_comp->desc;
+	buffer += desc->bLength;
+	size -= desc->bLength;
+
+	/* Eat up the other descriptors we don't care about */
+	ep->ep_comp->extra = buffer;
+	i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
+			USB_DT_INTERFACE, &num_skipped);
+	ep->ep_comp->extralen = i;
+	buffer += i;
+	size -= i;
+	retval = buffer - buffer_start + i;
+	if (num_skipped > 0)
+		dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
+				num_skipped, plural(num_skipped),
+				"SuperSpeed endpoint companion");
+
+	/* Check the various values */
+	if (usb_endpoint_xfer_control(&ep->desc) && desc->bMaxBurst != 0) {
+		dev_warn(ddev, "Control endpoint with bMaxBurst = %d in "
+				"config %d interface %d altsetting %d ep %d: "
+				"setting to zero\n", desc->bMaxBurst,
+				cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		desc->bMaxBurst = 0;
+	}
+	if (desc->bMaxBurst > 15) {
+		dev_warn(ddev, "Endpoint with bMaxBurst = %d in "
+				"config %d interface %d altsetting %d ep %d: "
+				"setting to 15\n", desc->bMaxBurst,
+				cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		desc->bMaxBurst = 15;
+	}
+	if ((usb_endpoint_xfer_control(&ep->desc) || usb_endpoint_xfer_int(&ep->desc))
+			&& desc->bmAttributes != 0) {
+		dev_warn(ddev, "%s endpoint with bmAttributes = %d in "
+				"config %d interface %d altsetting %d ep %d: "
+				"setting to zero\n",
+				usb_endpoint_xfer_control(&ep->desc) ? "Control" : "Bulk",
+				desc->bmAttributes,
+				cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		desc->bmAttributes = 0;
+	}
+	if (usb_endpoint_xfer_bulk(&ep->desc) && desc->bmAttributes > 16) {
+		dev_warn(ddev, "Bulk endpoint with more than 65536 streams in "
+				"config %d interface %d altsetting %d ep %d: "
+				"setting to max\n",
+				cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		desc->bmAttributes = 16;
+	}
+	if (usb_endpoint_xfer_isoc(&ep->desc) && desc->bmAttributes > 2) {
+		dev_warn(ddev, "Isoc endpoint has Mult of %d in "
+				"config %d interface %d altsetting %d ep %d: "
+				"setting to 3\n", desc->bmAttributes + 1,
+				cfgno, inum, asnum, ep->desc.bEndpointAddress);
+		desc->bmAttributes = 2;
+	}
+	if (usb_endpoint_xfer_isoc(&ep->desc)) {
+		max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1) *
+			(desc->bmAttributes + 1);
+	} else if (usb_endpoint_xfer_int(&ep->desc)) {
+		max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1);
+	} else {
+		goto valid;
+	}
+	if (desc->wBytesPerInterval > max_tx) {
+		dev_warn(ddev, "%s endpoint with wBytesPerInterval of %d in "
+				"config %d interface %d altsetting %d ep %d: "
+				"setting to %d\n",
+				usb_endpoint_xfer_isoc(&ep->desc) ? "Isoc" : "Int",
+				desc->wBytesPerInterval,
+				cfgno, inum, asnum, ep->desc.bEndpointAddress,
+				max_tx);
+		desc->wBytesPerInterval = max_tx;
+	}
+valid:
+	return retval;
+}
+
 static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
     int asnum, struct usb_host_interface *ifp, int num_ep,
     unsigned char *buffer, int size)
@@ -50,7 +198,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 	unsigned char *buffer0 = buffer;
 	struct usb_endpoint_descriptor *d;
 	struct usb_host_endpoint *endpoint;
-	int n, i, j;
+	int n, i, j, retval;
 
 	d = (struct usb_endpoint_descriptor *) buffer;
 	buffer += d->bLength;
@@ -162,17 +310,38 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 				cfgno, inum, asnum, d->bEndpointAddress,
 				maxp);
 	}
-
-	/* Skip over any Class Specific or Vendor Specific descriptors;
-	 * find the next endpoint or interface descriptor */
-	endpoint->extra = buffer;
-	i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
-	    USB_DT_INTERFACE, &n);
-	endpoint->extralen = i;
+	/* Allocate room for and parse any endpoint companion descriptors */
+	if (to_usb_device(ddev)->speed == USB_SPEED_SUPER) {
+		endpoint->extra = buffer;
+		i = find_next_descriptor_more(buffer, size, USB_DT_SS_ENDPOINT_COMP,
+				USB_DT_ENDPOINT, USB_DT_INTERFACE, &n);
+		endpoint->extralen = i;
+		buffer += i;
+		size -= i;
+
+		if (size > 0) {
+			retval = usb_parse_endpoint_companion(ddev, cfgno, inum, asnum,
+					endpoint, num_ep, buffer, size);
+			if (retval >= 0) {
+				buffer += retval;
+				retval = buffer - buffer0;
+			}
+		} else {
+			retval = buffer - buffer0;
+		}
+	} else {
+		/* Skip over any Class Specific or Vendor Specific descriptors;
+		 * find the next endpoint or interface descriptor */
+		endpoint->extra = buffer;
+		i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
+				USB_DT_INTERFACE, &n);
+		endpoint->extralen = i;
+		retval = buffer - buffer0 + i;
+	}
 	if (n > 0)
 		dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
 		    n, plural(n), "endpoint");
-	return buffer - buffer0 + i;
+	return retval;
 
 skip_to_next_endpoint_or_interface_descriptor:
 	i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
@@ -453,6 +622,8 @@ static int usb_parse_configuration(struct device *ddev, int cfgidx,
 		kref_init(&intfc->ref);
 	}
 
+	/* FIXME: parse the BOS descriptor */
+
 	/* Skip over any Class Specific or Vendor Specific descriptors;
 	 * find the first interface descriptor */
 	config->extra = buffer;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 112a2d6e922..13bced521b8 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -36,6 +36,7 @@ struct wusb_dev;
  *  - configs have one (often) or more interfaces;
  *  - interfaces have one (usually) or more settings;
  *  - each interface setting has zero or (usually) more endpoints.
+ *  - a SuperSpeed endpoint has a companion descriptor
  *
  * And there might be other descriptors mixed in with those.
  *
@@ -44,6 +45,19 @@ struct wusb_dev;
 
 struct ep_device;
 
+/* For SS devices */
+/**
+ * struct usb_host_ep_comp - Valid for SuperSpeed devices only
+ * @desc: endpoint companion descriptor, wMaxPacketSize in native byteorder
+ * @extra: descriptors following this endpoint companion descriptor
+ * @extralen: how many bytes of "extra" are valid
+ */
+struct usb_host_ep_comp {
+	struct usb_ep_comp_descriptor	desc;
+	unsigned char			*extra;   /* Extra descriptors */
+	int				extralen;
+};
+
 /**
  * struct usb_host_endpoint - host-side endpoint descriptor and queue
  * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder
@@ -51,6 +65,7 @@ struct ep_device;
  * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
  *	with one or more transfer descriptors (TDs) per urb
  * @ep_dev: ep_device for sysfs info
+ * @ep_comp: companion descriptor information for this endpoint
  * @extra: descriptors following this endpoint in the configuration
  * @extralen: how many bytes of "extra" are valid
  * @enabled: URBs may be submitted to this endpoint
@@ -63,6 +78,7 @@ struct usb_host_endpoint {
 	struct list_head		urb_list;
 	void				*hcpriv;
 	struct ep_device 		*ep_dev;	/* For sysfs info */
+	struct usb_host_ep_comp		*ep_comp;	/* For SS devices */
 
 	unsigned char *extra;   /* Extra descriptors */
 	int extralen;
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 93bfe635234..9e9c5c0a3d7 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -191,6 +191,8 @@ struct usb_ctrlrequest {
 #define USB_DT_WIRE_ADAPTER		0x21
 #define USB_DT_RPIPE			0x22
 #define USB_DT_CS_RADIO_CONTROL		0x23
+/* From the USB 3.0 spec */
+#define	USB_DT_SS_ENDPOINT_COMP		0x30
 
 /* Conventional codes for class-specific descriptors.  The convention is
  * defined in the USB "Common Class" Spec (3.11).  Individual class specs
@@ -535,6 +537,20 @@ static inline int usb_endpoint_is_isoc_out(
 
 /*-------------------------------------------------------------------------*/
 
+/* USB_DT_SS_ENDPOINT_COMP: SuperSpeed Endpoint Companion descriptor */
+struct usb_ep_comp_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__u8  bMaxBurst;
+	__u8  bmAttributes;
+	__u16 wBytesPerInterval;
+} __attribute__ ((packed));
+
+#define USB_DT_EP_COMP_SIZE		6
+
+/*-------------------------------------------------------------------------*/
+
 /* USB_DT_DEVICE_QUALIFIER: Device Qualifier descriptor */
 struct usb_qualifier_descriptor {
 	__u8  bLength;
-- 
cgit v1.2.3-70-g09d2


From e04748e3a87271fcf30d383e3780c5d3ee1c1618 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 27 Apr 2009 19:59:01 -0700
Subject: USB: Push scatter gather lists down to host controller drivers.

This is the original patch I created before David Vrabel posted a better
patch (http://marc.info/?l=linux-usb&m=123377477209109&w=2) that does
basically the same thing.  This patch will get replaced with his
(modified) patch later.

Allow USB device drivers that use usb_sg_init() and usb_sg_wait() to push
bulk endpoint scatter gather lists down to the host controller drivers.
This allows host controller drivers to more efficiently enqueue these
transfers, and allows the xHCI host controller to better take advantage of
USB 3.0 "bursts" for bulk endpoints.

This patch currently only enables scatter gather lists for bulk endpoints.
Other endpoint types that use the usb_sg_* functions will not have their
scatter gather lists pushed down to the host controller.  For periodic
endpoints, we want each scatterlist entry to be a separate transfer.
Eventually, HCDs could parse these scatter-gather lists for periodic
endpoints also.  For now, we use the old code and call usb_submit_urb()
for each scatterlist entry.

The caller of usb_sg_init() can request that all bytes in the scatter
gather list be transferred by passing in a length of zero.  Handle that
request for a bulk endpoint under xHCI by walking the scatter gather list
and calculating the length.  We could let the HCD handle a zero length in
this case, but I'm not sure if the core layers in between will get
confused by this.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c     |   3 +-
 drivers/usb/core/message.c | 139 +++++++++++++++++++++++++++++----------------
 include/linux/usb.h        |   2 +
 3 files changed, 95 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index b2da4753b12..1609623ec82 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1239,7 +1239,8 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 
 	/* Map the URB's buffers for DMA access.
 	 * Lower level HCD code should use *_dma exclusively,
-	 * unless it uses pio or talks to another transport.
+	 * unless it uses pio or talks to another transport,
+	 * or uses the provided scatter gather list for bulk.
 	 */
 	if (is_root_hub(urb->dev))
 		return 0;
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 3a2e69ec2f2..2bed83caacb 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -365,6 +365,7 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 	int i;
 	int urb_flags;
 	int dma;
+	int use_sg;
 
 	if (!io || !dev || !sg
 			|| usb_pipecontrol(pipe)
@@ -392,7 +393,19 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 	if (io->entries <= 0)
 		return io->entries;
 
-	io->urbs = kmalloc(io->entries * sizeof *io->urbs, mem_flags);
+	/* If we're running on an xHCI host controller, queue the whole scatter
+	 * gather list with one call to urb_enqueue().  This is only for bulk,
+	 * as that endpoint type does not care how the data gets broken up
+	 * across frames.
+	 */
+	if (usb_pipebulk(pipe) &&
+			bus_to_hcd(dev->bus)->driver->flags & HCD_USB3) {
+		io->urbs = kmalloc(sizeof *io->urbs, mem_flags);
+		use_sg = true;
+	} else {
+		io->urbs = kmalloc(io->entries * sizeof *io->urbs, mem_flags);
+		use_sg = false;
+	}
 	if (!io->urbs)
 		goto nomem;
 
@@ -402,62 +415,92 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 	if (usb_pipein(pipe))
 		urb_flags |= URB_SHORT_NOT_OK;
 
-	for_each_sg(sg, sg, io->entries, i) {
-		unsigned len;
-
-		io->urbs[i] = usb_alloc_urb(0, mem_flags);
-		if (!io->urbs[i]) {
-			io->entries = i;
+	if (use_sg) {
+		io->urbs[0] = usb_alloc_urb(0, mem_flags);
+		if (!io->urbs[0]) {
+			io->entries = 0;
 			goto nomem;
 		}
 
-		io->urbs[i]->dev = NULL;
-		io->urbs[i]->pipe = pipe;
-		io->urbs[i]->interval = period;
-		io->urbs[i]->transfer_flags = urb_flags;
-
-		io->urbs[i]->complete = sg_complete;
-		io->urbs[i]->context = io;
-
-		/*
-		 * Some systems need to revert to PIO when DMA is temporarily
-		 * unavailable.  For their sakes, both transfer_buffer and
-		 * transfer_dma are set when possible.  However this can only
-		 * work on systems without:
-		 *
-		 *  - HIGHMEM, since DMA buffers located in high memory are
-		 *    not directly addressable by the CPU for PIO;
-		 *
-		 *  - IOMMU, since dma_map_sg() is allowed to use an IOMMU to
-		 *    make virtually discontiguous buffers be "dma-contiguous"
-		 *    so that PIO and DMA need diferent numbers of URBs.
-		 *
-		 * So when HIGHMEM or IOMMU are in use, transfer_buffer is NULL
-		 * to prevent stale pointers and to help spot bugs.
-		 */
-		if (dma) {
-			io->urbs[i]->transfer_dma = sg_dma_address(sg);
-			len = sg_dma_len(sg);
+		io->urbs[0]->dev = NULL;
+		io->urbs[0]->pipe = pipe;
+		io->urbs[0]->interval = period;
+		io->urbs[0]->transfer_flags = urb_flags;
+
+		io->urbs[0]->complete = sg_complete;
+		io->urbs[0]->context = io;
+		/* A length of zero means transfer the whole sg list */
+		io->urbs[0]->transfer_buffer_length = length;
+		if (length == 0) {
+			for_each_sg(sg, sg, io->entries, i) {
+				io->urbs[0]->transfer_buffer_length +=
+					sg_dma_len(sg);
+			}
+		}
+		io->urbs[0]->sg = io;
+		io->urbs[0]->num_sgs = io->entries;
+		io->entries = 1;
+	} else {
+		for_each_sg(sg, sg, io->entries, i) {
+			unsigned len;
+
+			io->urbs[i] = usb_alloc_urb(0, mem_flags);
+			if (!io->urbs[i]) {
+				io->entries = i;
+				goto nomem;
+			}
+
+			io->urbs[i]->dev = NULL;
+			io->urbs[i]->pipe = pipe;
+			io->urbs[i]->interval = period;
+			io->urbs[i]->transfer_flags = urb_flags;
+
+			io->urbs[i]->complete = sg_complete;
+			io->urbs[i]->context = io;
+
+			/*
+			 * Some systems need to revert to PIO when DMA is
+			 * temporarily unavailable.  For their sakes, both
+			 * transfer_buffer and transfer_dma are set when
+			 * possible.  However this can only work on systems
+			 * without:
+			 *
+			 *  - HIGHMEM, since DMA buffers located in high memory
+			 *    are not directly addressable by the CPU for PIO;
+			 *
+			 *  - IOMMU, since dma_map_sg() is allowed to use an
+			 *    IOMMU to make virtually discontiguous buffers be
+			 *    "dma-contiguous" so that PIO and DMA need diferent
+			 *    numbers of URBs.
+			 *
+			 * So when HIGHMEM or IOMMU are in use, transfer_buffer
+			 * is NULL to prevent stale pointers and to help spot
+			 * bugs.
+			 */
+			if (dma) {
+				io->urbs[i]->transfer_dma = sg_dma_address(sg);
+				len = sg_dma_len(sg);
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_GART_IOMMU)
-			io->urbs[i]->transfer_buffer = NULL;
+				io->urbs[i]->transfer_buffer = NULL;
 #else
-			io->urbs[i]->transfer_buffer = sg_virt(sg);
+				io->urbs[i]->transfer_buffer = sg_virt(sg);
 #endif
-		} else {
-			/* hc may use _only_ transfer_buffer */
-			io->urbs[i]->transfer_buffer = sg_virt(sg);
-			len = sg->length;
-		}
+			} else {
+				/* hc may use _only_ transfer_buffer */
+				io->urbs[i]->transfer_buffer = sg_virt(sg);
+				len = sg->length;
+			}
 
-		if (length) {
-			len = min_t(unsigned, len, length);
-			length -= len;
-			if (length == 0)
-				io->entries = i + 1;
+			if (length) {
+				len = min_t(unsigned, len, length);
+				length -= len;
+				if (length == 0)
+					io->entries = i + 1;
+			}
+			io->urbs[i]->transfer_buffer_length = len;
 		}
-		io->urbs[i]->transfer_buffer_length = len;
+		io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;
 	}
-	io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;
 
 	/* transaction state */
 	io->count = io->entries;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 13bced521b8..0a1819a6497 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1198,6 +1198,8 @@ struct urb {
 	unsigned int transfer_flags;	/* (in) URB_SHORT_NOT_OK | ...*/
 	void *transfer_buffer;		/* (in) associated data buffer */
 	dma_addr_t transfer_dma;	/* (in) dma addr for transfer_buffer */
+	struct usb_sg_request *sg;	/* (in) scatter gather buffer list */
+	int num_sgs;			/* (in) number of entries in the sg list */
 	u32 transfer_buffer_length;	/* (in) data buffer length */
 	u32 actual_length;		/* (return) actual transfer length */
 	unsigned char *setup_packet;	/* (in) setup packet (control only) */
-- 
cgit v1.2.3-70-g09d2


From f0058c627855ecb3b6c7185b7ad1910463c24c42 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 29 Apr 2009 19:06:20 -0700
Subject: USB: Change names of SuperSpeed ep companion descriptor structs.

Differentiate between SuperSpeed endpoint companion descriptor and the
wireless USB endpoint companion descriptor.  Make all structure names for
this descriptor have "ss" (SuperSpeed) in them.  David Vrabel asked for
this change in http://marc.info/?l=linux-usb&m=124091465109367&w=2

Reported-by: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/config.c   | 36 +++++++++++++++++++-----------------
 drivers/usb/host/xhci-mem.c |  2 +-
 include/linux/usb.h         | 14 +++++++-------
 include/linux/usb/ch9.h     |  4 ++--
 4 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 7103758bb48..24dfb33f90c 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -69,30 +69,31 @@ static int find_next_descriptor(unsigned char *buffer, int size,
 	return buffer - buffer0;
 }
 
-static int usb_parse_endpoint_companion(struct device *ddev, int cfgno,
+static int usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		int inum, int asnum, struct usb_host_endpoint *ep,
 		int num_ep, unsigned char *buffer, int size)
 {
 	unsigned char *buffer_start = buffer;
-	struct usb_ep_comp_descriptor	*desc;
+	struct usb_ss_ep_comp_descriptor	*desc;
 	int retval;
 	int num_skipped;
 	int max_tx;
 	int i;
 
-	/* Allocate space for the companion descriptor */
-	ep->ep_comp = kzalloc(sizeof(struct usb_host_ep_comp), GFP_KERNEL);
-	if (!ep->ep_comp)
+	/* Allocate space for the SS endpoint companion descriptor */
+	ep->ss_ep_comp = kzalloc(sizeof(struct usb_host_ss_ep_comp),
+			GFP_KERNEL);
+	if (!ep->ss_ep_comp)
 		return -ENOMEM;
-	desc = (struct usb_ep_comp_descriptor *) buffer;
+	desc = (struct usb_ss_ep_comp_descriptor *) buffer;
 	if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP) {
 		dev_warn(ddev, "No SuperSpeed endpoint companion for config %d "
 				" interface %d altsetting %d ep %d: "
 				"using minimum values\n",
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		ep->ep_comp->desc.bLength = USB_DT_EP_COMP_SIZE;
-		ep->ep_comp->desc.bDescriptorType = USB_DT_SS_ENDPOINT_COMP;
-		ep->ep_comp->desc.bMaxBurst = 0;
+		ep->ss_ep_comp->desc.bLength = USB_DT_SS_EP_COMP_SIZE;
+		ep->ss_ep_comp->desc.bDescriptorType = USB_DT_SS_ENDPOINT_COMP;
+		ep->ss_ep_comp->desc.bMaxBurst = 0;
 		/*
 		 * Leave bmAttributes as zero, which will mean no streams for
 		 * bulk, and isoc won't support multiple bursts of packets.
@@ -102,7 +103,7 @@ static int usb_parse_endpoint_companion(struct device *ddev, int cfgno,
 		 */
 		if (usb_endpoint_xfer_isoc(&ep->desc) ||
 				usb_endpoint_xfer_int(&ep->desc))
-			ep->ep_comp->desc.wBytesPerInterval =
+			ep->ss_ep_comp->desc.wBytesPerInterval =
 				ep->desc.wMaxPacketSize;
 		/*
 		 * The next descriptor is for an Endpoint or Interface,
@@ -112,16 +113,16 @@ static int usb_parse_endpoint_companion(struct device *ddev, int cfgno,
 		retval = 0;
 		goto valid;
 	}
-	memcpy(&ep->ep_comp->desc, desc, USB_DT_EP_COMP_SIZE);
-	desc = &ep->ep_comp->desc;
+	memcpy(&ep->ss_ep_comp->desc, desc, USB_DT_SS_EP_COMP_SIZE);
+	desc = &ep->ss_ep_comp->desc;
 	buffer += desc->bLength;
 	size -= desc->bLength;
 
 	/* Eat up the other descriptors we don't care about */
-	ep->ep_comp->extra = buffer;
+	ep->ss_ep_comp->extra = buffer;
 	i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
 			USB_DT_INTERFACE, &num_skipped);
-	ep->ep_comp->extralen = i;
+	ep->ss_ep_comp->extralen = i;
 	buffer += i;
 	size -= i;
 	retval = buffer - buffer_start + i;
@@ -310,7 +311,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 				cfgno, inum, asnum, d->bEndpointAddress,
 				maxp);
 	}
-	/* Allocate room for and parse any endpoint companion descriptors */
+	/* Allocate room for and parse any SS endpoint companion descriptors */
 	if (to_usb_device(ddev)->speed == USB_SPEED_SUPER) {
 		endpoint->extra = buffer;
 		i = find_next_descriptor_more(buffer, size, USB_DT_SS_ENDPOINT_COMP,
@@ -320,8 +321,9 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 		size -= i;
 
 		if (size > 0) {
-			retval = usb_parse_endpoint_companion(ddev, cfgno, inum, asnum,
-					endpoint, num_ep, buffer, size);
+			retval = usb_parse_ss_endpoint_companion(ddev, cfgno,
+					inum, asnum, endpoint, num_ep, buffer,
+					size);
 			if (retval >= 0) {
 				buffer += retval;
 				retval = buffer - buffer0;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index f49f280cfd4..a7fbd6c10ad 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -496,7 +496,7 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
 		max_packet = ep->desc.wMaxPacketSize;
 		ep_ctx->ep_info2 |= MAX_PACKET(max_packet);
 		/* dig out max burst from ep companion desc */
-		max_packet = ep->ep_comp->desc.bMaxBurst;
+		max_packet = ep->ss_ep_comp->desc.bMaxBurst;
 		ep_ctx->ep_info2 |= MAX_BURST(max_packet);
 		break;
 	case USB_SPEED_HIGH:
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0a1819a6497..7e6b5259ea3 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -47,15 +47,15 @@ struct ep_device;
 
 /* For SS devices */
 /**
- * struct usb_host_ep_comp - Valid for SuperSpeed devices only
+ * struct usb_host_ss_ep_comp - Valid for SuperSpeed devices only
  * @desc: endpoint companion descriptor, wMaxPacketSize in native byteorder
  * @extra: descriptors following this endpoint companion descriptor
  * @extralen: how many bytes of "extra" are valid
  */
-struct usb_host_ep_comp {
-	struct usb_ep_comp_descriptor	desc;
-	unsigned char			*extra;   /* Extra descriptors */
-	int				extralen;
+struct usb_host_ss_ep_comp {
+	struct usb_ss_ep_comp_descriptor	desc;
+	unsigned char				*extra;   /* Extra descriptors */
+	int					extralen;
 };
 
 /**
@@ -65,7 +65,7 @@ struct usb_host_ep_comp {
  * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
  *	with one or more transfer descriptors (TDs) per urb
  * @ep_dev: ep_device for sysfs info
- * @ep_comp: companion descriptor information for this endpoint
+ * @ss_ep_comp: companion descriptor information for this endpoint
  * @extra: descriptors following this endpoint in the configuration
  * @extralen: how many bytes of "extra" are valid
  * @enabled: URBs may be submitted to this endpoint
@@ -78,7 +78,7 @@ struct usb_host_endpoint {
 	struct list_head		urb_list;
 	void				*hcpriv;
 	struct ep_device 		*ep_dev;	/* For sysfs info */
-	struct usb_host_ep_comp		*ep_comp;	/* For SS devices */
+	struct usb_host_ss_ep_comp	*ss_ep_comp;	/* For SS devices */
 
 	unsigned char *extra;   /* Extra descriptors */
 	int extralen;
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 9e9c5c0a3d7..93223638f70 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -538,7 +538,7 @@ static inline int usb_endpoint_is_isoc_out(
 /*-------------------------------------------------------------------------*/
 
 /* USB_DT_SS_ENDPOINT_COMP: SuperSpeed Endpoint Companion descriptor */
-struct usb_ep_comp_descriptor {
+struct usb_ss_ep_comp_descriptor {
 	__u8  bLength;
 	__u8  bDescriptorType;
 
@@ -547,7 +547,7 @@ struct usb_ep_comp_descriptor {
 	__u16 wBytesPerInterval;
 } __attribute__ ((packed));
 
-#define USB_DT_EP_COMP_SIZE		6
+#define USB_DT_SS_EP_COMP_SIZE		6
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3-70-g09d2


From e475bba2fdee9c3dbfe25f026f8fb8de69508ad2 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 16 Jun 2009 08:23:52 +0200
Subject: block: Introduce helper to reset queue limits to default values

DM reuses the request queue when swapping in a new device table
Introduce blk_set_default_limits() which can be used to reset the the
queue_limits prior to stacking devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Alasdair G Kergon <agk@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 33 +++++++++++++++++++++++++++------
 include/linux/blkdev.h |  1 +
 2 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 138610b9895..7541ea4bf9f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -95,6 +95,31 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 }
 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 
+/**
+ * blk_set_default_limits - reset limits to default values
+ * @limits:  the queue_limits structure to reset
+ *
+ * Description:
+ *   Returns a queue_limit struct to its default state.  Can be used by
+ *   stacking drivers like DM that stage table swaps and reuse an
+ *   existing device queue.
+ */
+void blk_set_default_limits(struct queue_limits *lim)
+{
+	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
+	lim->max_hw_segments = MAX_HW_SEGMENTS;
+	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+	lim->max_segment_size = MAX_SEGMENT_SIZE;
+	lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
+	lim->bounce_pfn = BLK_BOUNCE_ANY;
+	lim->alignment_offset = 0;
+	lim->io_opt = 0;
+	lim->misaligned = 0;
+	lim->no_cluster = 0;
+}
+EXPORT_SYMBOL(blk_set_default_limits);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
@@ -123,14 +148,8 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	 * set defaults
 	 */
 	q->nr_requests = BLKDEV_MAX_RQ;
-	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
-	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
-	blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
-	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
 
 	q->make_request_fn = mfn;
-	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
-	blk_queue_logical_block_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
 	q->nr_batching = BLK_BATCH_REQ;
@@ -143,6 +162,8 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
 
+	blk_set_default_limits(&q->limits);
+
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0b1a6cae9de..8963d9149b5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -926,6 +926,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
-- 
cgit v1.2.3-70-g09d2


From 5ced504b1bd1979378de35c56aa5d3d79fb5033f Mon Sep 17 00:00:00 2001
From: unsik Kim <donari75@gmail.com>
Date: Tue, 16 Jun 2009 08:40:20 +0200
Subject: mg_disk: seperate mg_disk.h again

eec9462088a26c046d4db3100796a340a50890b8 fold mg_disk.h into mg_disk.c,
but mg_disk platform driver needs private data for operation. This also
make mg_disk.c as machine independent. Seperate only needed structure and
defines to mg_disk.h

Signed-off-by: unsik Kim <donari75@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/mg_disk.c | 28 +---------------------------
 include/linux/mg_disk.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 27 deletions(-)
 create mode 100644 include/linux/mg_disk.h

(limited to 'include')

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 60de5a01e71..f703f547824 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -22,13 +22,12 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
+#include <linux/mg_disk.h>
 
 #define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
 
 /* name for block device */
 #define MG_DISK_NAME "mgd"
-/* name for platform device */
-#define MG_DEV_NAME "mg_disk"
 
 #define MG_DISK_MAJ 0
 #define MG_DISK_MAX_PART 16
@@ -103,33 +102,8 @@
 #define MG_TMAX_SWRST_TO_RDY	500
 #define MG_TMAX_RSTOUT		3000
 
-/* device attribution */
-/* use mflash as boot device */
-#define MG_BOOT_DEV		(1 << 0)
-/* use mflash as storage device */
-#define MG_STORAGE_DEV		(1 << 1)
-/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
-#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
-
 #define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
 
-/* names of GPIO resource */
-#define MG_RST_PIN	"mg_rst"
-/* except MG_BOOT_DEV, reset-out pin should be assigned */
-#define MG_RSTOUT_PIN	"mg_rstout"
-
-/* private driver data */
-struct mg_drv_data {
-	/* disk resource */
-	u32 use_polling;
-
-	/* device attribution */
-	u32 dev_attr;
-
-	/* internally used */
-	struct mg_host *host;
-};
-
 /* main structure for mflash driver */
 struct mg_host {
 	struct device *dev;
diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h
new file mode 100644
index 00000000000..e11f4d9f1c2
--- /dev/null
+++ b/include/linux/mg_disk.h
@@ -0,0 +1,45 @@
+/*
+ *  include/linux/mg_disk.c
+ *
+ *  Private data for mflash platform driver
+ *
+ * (c) 2008 mGine Co.,LTD
+ * (c) 2008 unsik Kim <donari75@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#ifndef __MG_DISK_H__
+#define __MG_DISK_H__
+
+/* name for platform device */
+#define MG_DEV_NAME "mg_disk"
+
+/* names of GPIO resource */
+#define MG_RST_PIN	"mg_rst"
+/* except MG_BOOT_DEV, reset-out pin should be assigned */
+#define MG_RSTOUT_PIN	"mg_rstout"
+
+/* device attribution */
+/* use mflash as boot device */
+#define MG_BOOT_DEV		(1 << 0)
+/* use mflash as storage device */
+#define MG_STORAGE_DEV		(1 << 1)
+/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */
+#define MG_STORAGE_DEV_SKIP_RST	(1 << 2)
+
+/* private driver data */
+struct mg_drv_data {
+	/* disk resource */
+	u32 use_polling;
+
+	/* device attribution */
+	u32 dev_attr;
+
+	/* internally used */
+	void *host;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 14ebaf81e13ce66bff275380b246796fd16cbfa1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 16 Jun 2009 05:40:30 -0700
Subject: x25: Fix sleep from timer on socket destroy.

If socket destuction gets delayed to a timer, we try to
lock_sock() from that timer which won't work.

Use bh_lock_sock() in that case.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Ingo Molnar <mingo@elte.hu>
---
 include/net/x25.h   |  2 +-
 net/x25/af_x25.c    | 23 ++++++++++++++++++-----
 net/x25/x25_timer.c |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/x25.h b/include/net/x25.h
index fc3f03d976f..2cda0401156 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -187,7 +187,7 @@ extern int  x25_addr_ntoa(unsigned char *, struct x25_address *,
 extern int  x25_addr_aton(unsigned char *, struct x25_address *,
 			  struct x25_address *);
 extern struct sock *x25_find_socket(unsigned int, struct x25_neigh *);
-extern void x25_destroy_socket(struct sock *);
+extern void x25_destroy_socket_from_timer(struct sock *);
 extern int  x25_rx_call_request(struct sk_buff *, struct x25_neigh *, unsigned int);
 extern void x25_kill_by_neigh(struct x25_neigh *);
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ed80af8ca5f..c51f3095739 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -332,14 +332,14 @@ static unsigned int x25_new_lci(struct x25_neigh *nb)
 /*
  *	Deferred destroy.
  */
-void x25_destroy_socket(struct sock *);
+static void __x25_destroy_socket(struct sock *);
 
 /*
  *	handler for deferred kills.
  */
 static void x25_destroy_timer(unsigned long data)
 {
-	x25_destroy_socket((struct sock *)data);
+	x25_destroy_socket_from_timer((struct sock *)data);
 }
 
 /*
@@ -349,12 +349,10 @@ static void x25_destroy_timer(unsigned long data)
  *	will touch it and we are (fairly 8-) ) safe.
  *	Not static as it's used by the timer
  */
-void x25_destroy_socket(struct sock *sk)
+static void __x25_destroy_socket(struct sock *sk)
 {
 	struct sk_buff *skb;
 
-	sock_hold(sk);
-	lock_sock(sk);
 	x25_stop_heartbeat(sk);
 	x25_stop_timer(sk);
 
@@ -385,7 +383,22 @@ void x25_destroy_socket(struct sock *sk)
 		/* drop last reference so sock_put will free */
 		__sock_put(sk);
 	}
+}
 
+void x25_destroy_socket_from_timer(struct sock *sk)
+{
+	sock_hold(sk);
+	bh_lock_sock(sk);
+	__x25_destroy_socket(sk);
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+static void x25_destroy_socket(struct sock *sk)
+{
+	sock_hold(sk);
+	lock_sock(sk);
+	__x25_destroy_socket(sk);
 	release_sock(sk);
 	sock_put(sk);
 }
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index d3e3e54db93..5c5db1a3639 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -113,7 +113,7 @@ static void x25_heartbeat_expiry(unsigned long param)
 			    (sk->sk_state == TCP_LISTEN &&
 			     sock_flag(sk, SOCK_DEAD))) {
 				bh_unlock_sock(sk);
-				x25_destroy_socket(sk);
+				x25_destroy_socket_from_timer(sk);
 				return;
 			}
 			break;
-- 
cgit v1.2.3-70-g09d2


From 6c18ba9f5e506b8115b89b1aa7bdc25178f40b0a Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <Alexandros.Batsakis@netapp.com>
Date: Tue, 16 Jun 2009 04:19:13 +0300
Subject: nfsd41: move channel attributes from nfsd4_session to a
 nfsd4_channel_attr struct

the change is valid for both the forechannel and the backchannel (currently dummy)

Signed-off-by: Alexandros Batsakis <Alexandros.Batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 28 +++++++++++++++-------------
 fs/nfsd/nfs4xdr.c          |  2 +-
 include/linux/nfsd/state.h | 18 +++++++++++++-----
 include/linux/nfsd/xdr4.h  | 11 -----------
 4 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 89d9ac55c03..d5caf2a709d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -444,8 +444,8 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
  * fchan holds the client values on input, and the server values on output
  */
 static int init_forechannel_attrs(struct svc_rqst *rqstp,
-				    struct nfsd4_session *session,
-				    struct nfsd4_channel_attrs *fchan)
+				  struct nfsd4_channel_attrs *session_fchan,
+				  struct nfsd4_channel_attrs *fchan)
 {
 	int status = 0;
 	__u32   maxcount = svc_max_payload(rqstp);
@@ -455,21 +455,21 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	/* Use the client's max request and max response size if possible */
 	if (fchan->maxreq_sz > maxcount)
 		fchan->maxreq_sz = maxcount;
-	session->se_fmaxreq_sz = fchan->maxreq_sz;
+	session_fchan->maxreq_sz = fchan->maxreq_sz;
 
 	if (fchan->maxresp_sz > maxcount)
 		fchan->maxresp_sz = maxcount;
-	session->se_fmaxresp_sz = fchan->maxresp_sz;
+	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
 	/* Set the max response cached size our default which is
 	 * a multiple of PAGE_SIZE and small */
-	session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
-	fchan->maxresp_cached = session->se_fmaxresp_cached;
+	session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
+	fchan->maxresp_cached = session_fchan->maxresp_cached;
 
 	/* Use the client's maxops if possible */
 	if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
-	session->se_fmaxops = fchan->maxops;
+	session_fchan->maxops = fchan->maxops;
 
 	/* try to use the client requested number of slots */
 	if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
@@ -481,7 +481,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	 */
 	status = set_forechannel_maxreqs(fchan);
 
-	session->se_fnumslots = fchan->maxreqs;
+	session_fchan->maxreqs = fchan->maxreqs;
 	return status;
 }
 
@@ -495,12 +495,14 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	memset(&tmp, 0, sizeof(tmp));
 
 	/* FIXME: For now, we just accept the client back channel attributes. */
-	status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel);
+	tmp.se_bchannel = cses->back_channel;
+	status = init_forechannel_attrs(rqstp, &tmp.se_fchannel,
+					&cses->fore_channel);
 	if (status)
 		goto out;
 
 	/* allocate struct nfsd4_session and slot table in one piece */
-	slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot);
+	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
 	new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
 	if (!new)
 		goto out;
@@ -574,7 +576,7 @@ free_session(struct kref *kref)
 	int i;
 
 	ses = container_of(kref, struct nfsd4_session, se_ref);
-	for (i = 0; i < ses->se_fnumslots; i++) {
+	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
@@ -1130,7 +1132,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 	 * is sent (lease renewal).
 	 */
 	if (seq && nfsd4_not_cached(resp)) {
-		seq->maxslots = resp->cstate.session->se_fnumslots;
+		seq->maxslots = resp->cstate.session->se_fchannel.maxreqs;
 		return nfs_ok;
 	}
 
@@ -1473,7 +1475,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		goto out;
 
 	status = nfserr_badslot;
-	if (seq->slotid >= session->se_fnumslots)
+	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out;
 
 	slot = &session->se_slots[seq->slotid];
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d07f704a2ac..2dcc7feaa6f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3183,7 +3183,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
 	dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
 		length, xb->page_len, tlen, pad);
 
-	if (length <= session->se_fmaxresp_cached)
+	if (length <= session->se_fchannel.maxresp_cached)
 		return status;
 	else
 		return nfserr_rep_too_big_to_cache;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index c0c49215ddc..105cc100de0 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -115,6 +115,17 @@ struct nfsd4_slot {
 	struct nfsd4_cache_entry	sl_cache_entry;
 };
 
+struct nfsd4_channel_attrs {
+	u32		headerpadsz;
+	u32		maxreq_sz;
+	u32		maxresp_sz;
+	u32		maxresp_cached;
+	u32		maxops;
+	u32		maxreqs;
+	u32		nr_rdma_attrs;
+	u32		rdma_attrs;
+};
+
 struct nfsd4_session {
 	struct kref		se_ref;
 	struct list_head	se_hash;	/* hash by sessionid */
@@ -122,11 +133,8 @@ struct nfsd4_session {
 	u32			se_flags;
 	struct nfs4_client	*se_client;	/* for expire_client */
 	struct nfs4_sessionid	se_sessionid;
-	u32			se_fmaxreq_sz;
-	u32			se_fmaxresp_sz;
-	u32			se_fmaxresp_cached;
-	u32			se_fmaxops;
-	u32			se_fnumslots;
+	struct nfsd4_channel_attrs se_fchannel;
+	struct nfsd4_channel_attrs se_bchannel;
 	struct nfsd4_slot	se_slots[];	/* forward channel slots */
 };
 
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index d0f050f01ec..2bacf753506 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -366,17 +366,6 @@ struct nfsd4_exchange_id {
 	int		spa_how;
 };
 
-struct nfsd4_channel_attrs {
-	u32		headerpadsz;
-	u32		maxreq_sz;
-	u32		maxresp_sz;
-	u32		maxresp_cached;
-	u32		maxops;
-	u32		maxreqs;
-	u32		nr_rdma_attrs;
-	u32		rdma_attrs;
-};
-
 struct nfsd4_create_session {
 	clientid_t		clientid;
 	struct nfs4_sessionid	sessionid;
-- 
cgit v1.2.3-70-g09d2


From 5fd29d6ccbc98884569d6f3105aeca70858b3e0f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 16 Jun 2009 10:57:02 -0700
Subject: printk: clean up handling of log-levels and newlines

It used to be that we would only look at the log-level in a printk()
after explicit newlines, which can cause annoying problems when the
previous printk() did not end with a '\n'. In that case, the log-level
marker would be just printed out in the middle of the line, and be
seen as just noise rather than change the logging level.

This changes things to always look at the log-level in the first
bytes of the printout. If a log level marker is found, it is always
used as the log-level. Additionally, if no newline existed, one is
added (unless the log-level is the explicit KERN_CONT marker, to
explicitly show that it's a continuation of a previous line).

Acked-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  2 +-
 kernel/printk.c        | 31 ++++++++++++++++++++++---------
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 883cd44ff76..066bb1eddfe 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -102,7 +102,7 @@ extern const char linux_proc_banner[];
  * line that had no enclosing \n). Only to be used by core/arch code
  * during early bootup (a continued line is not SMP-safe otherwise).
  */
-#define	KERN_CONT	""
+#define	KERN_CONT	"<c>"
 
 extern int console_printk[];
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 5052b5497c6..a87770ce73a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -687,20 +687,33 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 				  sizeof(printk_buf) - printed_len, fmt, args);
 
 
+	p = printk_buf;
+
+	/* Do we have a loglevel in the string? */
+	if (p[0] == '<') {
+		unsigned char c = p[1];
+		if (c && p[2] == '>') {
+			switch (c) {
+			case '0' ... '7': /* loglevel */
+				current_log_level = c - '0';
+				if (!new_text_line) {
+					emit_log_char('\n');
+					new_text_line = 1;
+				}
+			/* Fallthrough - skip the loglevel */
+			case 'c': /* KERN_CONT */
+				p += 3;
+				break;
+			}
+		}
+	}
+
 	/*
 	 * Copy the output into log_buf.  If the caller didn't provide
 	 * appropriate log level tags, we insert them here
 	 */
-	for (p = printk_buf; *p; p++) {
+	for ( ; *p; p++) {
 		if (new_text_line) {
-			/* If a token, set current_log_level and skip over */
-			if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' &&
-			    p[2] == '>') {
-				current_log_level = p[1] - '0';
-				p += 3;
-				printed_len -= 3;
-			}
-
 			/* Always output the token */
 			emit_log_char('<');
 			emit_log_char(current_log_level + '0');
-- 
cgit v1.2.3-70-g09d2


From e28d713704117bca0820c732210df6075b09f13b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 16 Jun 2009 11:02:28 -0700
Subject: printk: Add KERN_DEFAULT printk log-level

This adds a KERN_DEFAULT loglevel marker, for when you cannot decide
which loglevel you want, and just want to keep an existing printk
with the default loglevel.

The difference between having KERN_DEFAULT and having no log-level
marker at all is two-fold:

 - having the log-level marker will now force a new-line if the
   previous printout had not added one (perhaps because it forgot,
   but perhaps because it expected a continuation)

 - having a log-level marker is required if you are printing out a
   message that otherwise itself could perhaps otherwise be mistaken
   for a log-level.

Signed-of-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 ++
 kernel/printk.c        | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 066bb1eddfe..1b2e1747df1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -97,6 +97,8 @@ extern const char linux_proc_banner[];
 #define	KERN_INFO	"<6>"	/* informational			*/
 #define	KERN_DEBUG	"<7>"	/* debug-level messages			*/
 
+/* Use the default kernel loglevel */
+#define KERN_DEFAULT	"<d>"
 /*
  * Annotation for a "continued" line of log printout (only done after a
  * line that had no enclosing \n). Only to be used by core/arch code
diff --git a/kernel/printk.c b/kernel/printk.c
index a87770ce73a..b4d97b54c1e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -696,6 +696,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 			switch (c) {
 			case '0' ... '7': /* loglevel */
 				current_log_level = c - '0';
+			/* Fallthrough - make sure we're on a new line */
+			case 'd': /* KERN_DEFAULT */
 				if (!new_text_line) {
 					emit_log_char('\n');
 					new_text_line = 1;
-- 
cgit v1.2.3-70-g09d2


From 1351a58ce0481afd80b756ecd9307c9fbe9f39b4 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 16 Apr 2009 13:30:38 -0300
Subject: V4L/DVB (11517): v4l: remove driver-core BUS_ID_SIZE

The name size limit is gone from the driver core, the BUS_ID_SIZE
value will be removed.

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index 0dd3e8e8653..9afd39fb2cf 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -30,7 +30,7 @@
    basic V4L2 device-level support.
  */
 
-#define V4L2_DEVICE_NAME_SIZE (BUS_ID_SIZE + 16)
+#define V4L2_DEVICE_NAME_SIZE (20 + 16)
 
 struct v4l2_device {
 	/* dev->driver_data points to this struct.
-- 
cgit v1.2.3-70-g09d2


From 594bb46dbc63934bc65fa95743f83204bd26a641 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 24 Apr 2009 12:53:51 -0300
Subject: V4L/DVB (11607): soc-camera: add a free_bus method to struct
 soc_camera_link

Currently pcm990 camera bus-width management functions request a GPIO and never
free it again. With this approach the GPIO extender driver cannot be unloaded
once camera drivers have been loaded, also unloading theb i2c-pxa bus driver
produces errors, because the GPIO extender driver cannot unregister properly.
Another problem is, that if camera drivers are once loaded before the GPIO
extender driver, the platform code marks the GPIO unavailable and only a reboot
helps to recover. Adding an explicit free_bus method and using it in mt9m001
and mt9v022 drivers fixes these problems.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/arm/mach-pxa/pcm990-baseboard.c | 23 ++++++++++++++++-------
 drivers/media/video/mt9m001.c        |  3 +++
 drivers/media/video/mt9v022.c        |  3 +++
 include/media/soc_camera.h           |  1 +
 4 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index 095521e9ee2..01791d74e08 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -380,12 +380,12 @@ static struct pca953x_platform_data pca9536_data = {
 	.gpio_base	= NR_BUILTIN_GPIO,
 };
 
-static int gpio_bus_switch;
+static int gpio_bus_switch = -EINVAL;
 
 static int pcm990_camera_set_bus_param(struct soc_camera_link *link,
-		unsigned long flags)
+				       unsigned long flags)
 {
-	if (gpio_bus_switch <= 0) {
+	if (gpio_bus_switch < 0) {
 		if (flags == SOCAM_DATAWIDTH_10)
 			return 0;
 		else
@@ -404,25 +404,34 @@ static unsigned long pcm990_camera_query_bus_param(struct soc_camera_link *link)
 {
 	int ret;
 
-	if (!gpio_bus_switch) {
+	if (gpio_bus_switch < 0) {
 		ret = gpio_request(NR_BUILTIN_GPIO, "camera");
 		if (!ret) {
 			gpio_bus_switch = NR_BUILTIN_GPIO;
 			gpio_direction_output(gpio_bus_switch, 0);
-		} else
-			gpio_bus_switch = -EINVAL;
+		}
 	}
 
-	if (gpio_bus_switch > 0)
+	if (gpio_bus_switch >= 0)
 		return SOCAM_DATAWIDTH_8 | SOCAM_DATAWIDTH_10;
 	else
 		return SOCAM_DATAWIDTH_10;
 }
 
+static void pcm990_camera_free_bus(struct soc_camera_link *link)
+{
+	if (gpio_bus_switch < 0)
+		return;
+
+	gpio_free(gpio_bus_switch);
+	gpio_bus_switch = -EINVAL;
+}
+
 static struct soc_camera_link iclink = {
 	.bus_id	= 0, /* Must match with the camera ID above */
 	.query_bus_param = pcm990_camera_query_bus_param,
 	.set_bus_param = pcm990_camera_set_bus_param,
+	.free_bus = pcm990_camera_free_bus,
 };
 
 /* Board I2C devices. */
diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 684f62fa789..3838ff77381 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -604,10 +604,13 @@ ei2c:
 static void mt9m001_video_remove(struct soc_camera_device *icd)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
 
 	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9m001->client->addr,
 		icd->dev.parent, icd->vdev);
 	soc_camera_video_stop(icd);
+	if (icl->free_bus)
+		icl->free_bus(icl);
 }
 
 static int mt9m001_probe(struct i2c_client *client,
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 4d3b4813c32..412b399baca 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -735,10 +735,13 @@ ei2c:
 static void mt9v022_video_remove(struct soc_camera_device *icd)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
 
 	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9v022->client->addr,
 		icd->dev.parent, icd->vdev);
 	soc_camera_video_stop(icd);
+	if (icl->free_bus)
+		icl->free_bus(icl);
 }
 
 static int mt9v022_probe(struct i2c_client *client,
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 37013688af4..396c32550e0 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -107,6 +107,7 @@ struct soc_camera_link {
 	 */
 	int (*set_bus_param)(struct soc_camera_link *, unsigned long flags);
 	unsigned long (*query_bus_param)(struct soc_camera_link *);
+	void (*free_bus)(struct soc_camera_link *);
 };
 
 static inline struct soc_camera_device *to_soc_camera_dev(struct device *dev)
-- 
cgit v1.2.3-70-g09d2


From eff505fa1511b753b7cfb397a754b8ff4367cd55 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 24 Apr 2009 12:55:48 -0300
Subject: V4L/DVB (11609): soc-camera: remove an extra device generation from
 struct soc_camera_host

Make camera devices direct children of host platform devices, move the
inheritance management into the soc_camera.c core driver.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mx1_camera.c           | 35 ++++++-----
 drivers/media/video/mx3_camera.c           | 40 ++++++------
 drivers/media/video/pxa_camera.c           | 97 +++++++++++++++---------------
 drivers/media/video/sh_mobile_ceu_camera.c | 21 +++----
 drivers/media/video/soc_camera.c           | 35 +++--------
 include/media/soc_camera.h                 |  4 +-
 6 files changed, 107 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 48dd984dadf..2d075205bdf 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -106,7 +106,6 @@ struct mx1_camera_dev {
 	struct soc_camera_device	*icd;
 	struct mx1_camera_pdata		*pdata;
 	struct mx1_buffer		*active;
-	struct device			*dev;
 	struct resource			*res;
 	struct clk			*clk;
 	struct list_head		capture;
@@ -220,7 +219,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev)
 	int ret;
 
 	if (unlikely(!pcdev->active)) {
-		dev_err(pcdev->dev, "DMA End IRQ with no active buffer\n");
+		dev_err(pcdev->soc_host.dev, "DMA End IRQ with no active buffer\n");
 		return -EFAULT;
 	}
 
@@ -230,7 +229,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev)
 		vbuf->size, pcdev->res->start +
 		CSIRXR, DMA_MODE_READ);
 	if (unlikely(ret))
-		dev_err(pcdev->dev, "Failed to setup DMA sg list\n");
+		dev_err(pcdev->soc_host.dev, "Failed to setup DMA sg list\n");
 
 	return ret;
 }
@@ -339,14 +338,14 @@ static void mx1_camera_dma_irq(int channel, void *data)
 	imx_dma_disable(channel);
 
 	if (unlikely(!pcdev->active)) {
-		dev_err(pcdev->dev, "DMA End IRQ with no active buffer\n");
+		dev_err(pcdev->soc_host.dev, "DMA End IRQ with no active buffer\n");
 		goto out;
 	}
 
 	vb = &pcdev->active->vb;
 	buf = container_of(vb, struct mx1_buffer, vb);
 	WARN_ON(buf->inwork || list_empty(&vb->queue));
-	dev_dbg(pcdev->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
+	dev_dbg(pcdev->soc_host.dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
 		vb, vb->baddr, vb->bsize);
 
 	mx1_camera_wakeup(pcdev, vb, buf);
@@ -367,7 +366,7 @@ static void mx1_camera_init_videobuf(struct videobuf_queue *q,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx1_camera_dev *pcdev = ici->priv;
 
-	videobuf_queue_dma_contig_init(q, &mx1_videobuf_ops, pcdev->dev,
+	videobuf_queue_dma_contig_init(q, &mx1_videobuf_ops, ici->dev,
 					&pcdev->lock,
 					V4L2_BUF_TYPE_VIDEO_CAPTURE,
 					V4L2_FIELD_NONE,
@@ -386,7 +385,7 @@ static int mclk_get_divisor(struct mx1_camera_dev *pcdev)
 	 * they get a nice Oops */
 	div = (lcdclk + 2 * mclk - 1) / (2 * mclk) - 1;
 
-	dev_dbg(pcdev->dev, "System clock %lukHz, target freq %dkHz, "
+	dev_dbg(pcdev->soc_host.dev, "System clock %lukHz, target freq %dkHz, "
 		"divisor %lu\n", lcdclk / 1000, mclk / 1000, div);
 
 	return div;
@@ -396,7 +395,7 @@ static void mx1_camera_activate(struct mx1_camera_dev *pcdev)
 {
 	unsigned int csicr1 = CSICR1_EN;
 
-	dev_dbg(pcdev->dev, "Activate device\n");
+	dev_dbg(pcdev->soc_host.dev, "Activate device\n");
 
 	clk_enable(pcdev->clk);
 
@@ -412,7 +411,7 @@ static void mx1_camera_activate(struct mx1_camera_dev *pcdev)
 
 static void mx1_camera_deactivate(struct mx1_camera_dev *pcdev)
 {
-	dev_dbg(pcdev->dev, "Deactivate device\n");
+	dev_dbg(pcdev->soc_host.dev, "Deactivate device\n");
 
 	/* Disable all CSI interface */
 	__raw_writel(0x00, pcdev->base + CSICR1);
@@ -551,7 +550,7 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -668,7 +667,6 @@ static int __init mx1_camera_probe(struct platform_device *pdev)
 		goto exit_put_clk;
 	}
 
-	platform_set_drvdata(pdev, pcdev);
 	pcdev->res = res;
 	pcdev->clk = clk;
 
@@ -702,16 +700,15 @@ static int __init mx1_camera_probe(struct platform_device *pdev)
 	}
 	pcdev->irq = irq;
 	pcdev->base = base;
-	pcdev->dev = &pdev->dev;
 
 	/* request dma */
 	pcdev->dma_chan = imx_dma_request_by_prio(DRIVER_NAME, DMA_PRIO_HIGH);
 	if (pcdev->dma_chan < 0) {
-		dev_err(pcdev->dev, "Can't request DMA for MX1 CSI\n");
+		dev_err(&pdev->dev, "Can't request DMA for MX1 CSI\n");
 		err = -EBUSY;
 		goto exit_iounmap;
 	}
-	dev_dbg(pcdev->dev, "got DMA channel %d\n", pcdev->dma_chan);
+	dev_dbg(&pdev->dev, "got DMA channel %d\n", pcdev->dma_chan);
 
 	imx_dma_setup_handlers(pcdev->dma_chan, mx1_camera_dma_irq, NULL,
 			       pcdev);
@@ -724,7 +721,7 @@ static int __init mx1_camera_probe(struct platform_device *pdev)
 	/* request irq */
 	err = claim_fiq(&fh);
 	if (err) {
-		dev_err(pcdev->dev, "Camera interrupt register failed \n");
+		dev_err(&pdev->dev, "Camera interrupt register failed \n");
 		goto exit_free_dma;
 	}
 
@@ -744,7 +741,7 @@ static int __init mx1_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= DRIVER_NAME;
 	pcdev->soc_host.ops		= &mx1_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.dev.parent	= &pdev->dev;
+	pcdev->soc_host.dev		= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 	err = soc_camera_host_register(&pcdev->soc_host);
 	if (err)
@@ -774,7 +771,9 @@ exit:
 
 static int __exit mx1_camera_remove(struct platform_device *pdev)
 {
-	struct mx1_camera_dev *pcdev = platform_get_drvdata(pdev);
+	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
+	struct mx1_camera_dev *pcdev = container_of(soc_host,
+					struct mx1_camera_dev, soc_host);
 	struct resource *res;
 
 	imx_dma_free(pcdev->dma_chan);
@@ -784,7 +783,7 @@ static int __exit mx1_camera_remove(struct platform_device *pdev)
 
 	clk_put(pcdev->clk);
 
-	soc_camera_host_unregister(&pcdev->soc_host);
+	soc_camera_host_unregister(soc_host);
 
 	iounmap(pcdev->base);
 
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 3d187f966d6..4d47eeb1445 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -87,7 +87,6 @@ struct mx3_camera_buffer {
  * @soc_host:		embedded soc_host object
  */
 struct mx3_camera_dev {
-	struct device		*dev;
 	/*
 	 * i.MX3x is only supposed to handle one camera on its Camera Sensor
 	 * Interface. If anyone ever builds hardware to enable more than one
@@ -431,7 +430,7 @@ static void mx3_camera_init_videobuf(struct videobuf_queue *q,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
 
-	videobuf_queue_dma_contig_init(q, &mx3_videobuf_ops, mx3_cam->dev,
+	videobuf_queue_dma_contig_init(q, &mx3_videobuf_ops, ici->dev,
 				       &mx3_cam->lock,
 				       V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				       V4L2_FIELD_NONE,
@@ -599,7 +598,8 @@ static int test_platform_param(struct mx3_camera_dev *mx3_cam,
 		*flags |= SOCAM_DATAWIDTH_4;
 		break;
 	default:
-		dev_info(mx3_cam->dev, "Unsupported bus width %d\n", buswidth);
+		dev_info(mx3_cam->soc_host.dev, "Unsupported bus width %d\n",
+			 buswidth);
 		return -EINVAL;
 	}
 
@@ -614,7 +614,7 @@ static int mx3_camera_try_bus_param(struct soc_camera_device *icd,
 	unsigned long bus_flags, camera_flags;
 	int ret = test_platform_param(mx3_cam, depth, &bus_flags);
 
-	dev_dbg(&ici->dev, "requested bus width %d bit: %d\n", depth, ret);
+	dev_dbg(ici->dev, "requested bus width %d bit: %d\n", depth, ret);
 
 	if (ret < 0)
 		return ret;
@@ -637,7 +637,7 @@ static bool chan_filter(struct dma_chan *chan, void *arg)
 	if (!rq)
 		return false;
 
-	pdata = rq->mx3_cam->dev->platform_data;
+	pdata = rq->mx3_cam->soc_host.dev->platform_data;
 
 	return rq->id == chan->chan_id &&
 		pdata->dma_dev == chan->device->dev;
@@ -697,7 +697,7 @@ static int mx3_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(&ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->dev, "Providing format %s using %s\n",
 				mx3_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -709,7 +709,7 @@ static int mx3_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(&ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->dev, "Providing format %s using %s\n",
 				mx3_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -722,7 +722,7 @@ passthrough:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(&ici->dev,
+			dev_dbg(ici->dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -829,7 +829,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -866,7 +866,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (pixfmt && !xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -933,11 +933,11 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	dev_dbg(&ici->dev, "requested bus width %d bit: %d\n",
+	dev_dbg(ici->dev, "requested bus width %d bit: %d\n",
 		icd->buswidth, ret);
 
 	if (ret < 0)
@@ -947,7 +947,7 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
 	if (!common_flags) {
-		dev_dbg(&ici->dev, "no common flags: camera %lx, host %lx\n",
+		dev_dbg(ici->dev, "no common flags: camera %lx, host %lx\n",
 			camera_flags, bus_flags);
 		return -EINVAL;
 	}
@@ -1054,7 +1054,7 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	csi_reg_write(mx3_cam, sens_conf | dw, CSI_SENS_CONF);
 
-	dev_dbg(&ici->dev, "Set SENS_CONF to %x\n", sens_conf | dw);
+	dev_dbg(ici->dev, "Set SENS_CONF to %x\n", sens_conf | dw);
 
 	return 0;
 }
@@ -1102,8 +1102,6 @@ static int mx3_camera_probe(struct platform_device *pdev)
 		goto eclkget;
 	}
 
-	platform_set_drvdata(pdev, mx3_cam);
-
 	mx3_cam->pdata = pdev->dev.platform_data;
 	mx3_cam->platform_flags = mx3_cam->pdata->flags;
 	if (!(mx3_cam->platform_flags & (MX3_CAMERA_DATAWIDTH_4 |
@@ -1135,14 +1133,14 @@ static int mx3_camera_probe(struct platform_device *pdev)
 	}
 
 	mx3_cam->base	= base;
-	mx3_cam->dev	= &pdev->dev;
 
 	soc_host		= &mx3_cam->soc_host;
 	soc_host->drv_name	= MX3_CAM_DRV_NAME;
 	soc_host->ops		= &mx3_soc_camera_host_ops;
 	soc_host->priv		= mx3_cam;
-	soc_host->dev.parent	= &pdev->dev;
+	soc_host->dev		= &pdev->dev;
 	soc_host->nr		= pdev->id;
+
 	err = soc_camera_host_register(soc_host);
 	if (err)
 		goto ecamhostreg;
@@ -1165,11 +1163,13 @@ egetres:
 
 static int __devexit mx3_camera_remove(struct platform_device *pdev)
 {
-	struct mx3_camera_dev *mx3_cam = platform_get_drvdata(pdev);
+	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
+	struct mx3_camera_dev *mx3_cam = container_of(soc_host,
+					struct mx3_camera_dev, soc_host);
 
 	clk_put(mx3_cam->clk);
 
-	soc_camera_host_unregister(&mx3_cam->soc_host);
+	soc_camera_host_unregister(soc_host);
 
 	iounmap(mx3_cam->base);
 
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index ad0d58c01bb..2da5eef19b7 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -203,7 +203,6 @@ struct pxa_buffer {
 
 struct pxa_camera_dev {
 	struct soc_camera_host	soc_host;
-	struct device		*dev;
 	/* PXA27x is only supposed to handle one camera on its Quick Capture
 	 * interface. If anyone ever builds hardware to enable more than
 	 * one camera, they will have to modify this driver too */
@@ -262,7 +261,6 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 {
 	struct soc_camera_device *icd = vq->priv_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct pxa_camera_dev *pcdev = ici->priv;
 	struct videobuf_dmabuf *dma = videobuf_to_dma(&buf->vb);
 	int i;
 
@@ -279,7 +277,7 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 
 	for (i = 0; i < ARRAY_SIZE(buf->dmas); i++) {
 		if (buf->dmas[i].sg_cpu)
-			dma_free_coherent(pcdev->dev, buf->dmas[i].sg_size,
+			dma_free_coherent(ici->dev, buf->dmas[i].sg_size,
 					  buf->dmas[i].sg_cpu,
 					  buf->dmas[i].sg_dma);
 		buf->dmas[i].sg_cpu = NULL;
@@ -339,14 +337,14 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 	int dma_len = 0, xfer_len = 0;
 
 	if (pxa_dma->sg_cpu)
-		dma_free_coherent(pcdev->dev, pxa_dma->sg_size,
+		dma_free_coherent(pcdev->soc_host.dev, pxa_dma->sg_size,
 				  pxa_dma->sg_cpu, pxa_dma->sg_dma);
 
 	sglen = calculate_dma_sglen(*sg_first, dma->sglen,
 				    *sg_first_ofs, size);
 
 	pxa_dma->sg_size = (sglen + 1) * sizeof(struct pxa_dma_desc);
-	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->dev, pxa_dma->sg_size,
+	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->soc_host.dev, pxa_dma->sg_size,
 					     &pxa_dma->sg_dma, GFP_KERNEL);
 	if (!pxa_dma->sg_cpu)
 		return -ENOMEM;
@@ -354,7 +352,7 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 	pxa_dma->sglen = sglen;
 	offset = *sg_first_ofs;
 
-	dev_dbg(pcdev->dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
+	dev_dbg(pcdev->soc_host.dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
 		*sg_first, sglen, *sg_first_ofs, pxa_dma->sg_dma);
 
 
@@ -377,7 +375,7 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 		pxa_dma->sg_cpu[i].ddadr =
 			pxa_dma->sg_dma + (i + 1) * sizeof(struct pxa_dma_desc);
 
-		dev_vdbg(pcdev->dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
+		dev_vdbg(pcdev->soc_host.dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
 			 pxa_dma->sg_dma + i * sizeof(struct pxa_dma_desc),
 			 sg_dma_address(sg) + offset, xfer_len);
 		offset = 0;
@@ -489,7 +487,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, CIBR0, size_y,
 					   &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->dev,
+			dev_err(pcdev->soc_host.dev,
 				"DMA initialization for Y/RGB failed\n");
 			goto fail;
 		}
@@ -499,7 +497,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, CIBR1,
 						   size_u, &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->dev,
+			dev_err(pcdev->soc_host.dev,
 				"DMA initialization for U failed\n");
 			goto fail_u;
 		}
@@ -509,7 +507,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, CIBR2,
 						   size_v, &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->dev,
+			dev_err(pcdev->soc_host.dev,
 				"DMA initialization for V failed\n");
 			goto fail_v;
 		}
@@ -523,10 +521,10 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	return 0;
 
 fail_v:
-	dma_free_coherent(pcdev->dev, buf->dmas[1].sg_size,
+	dma_free_coherent(pcdev->soc_host.dev, buf->dmas[1].sg_size,
 			  buf->dmas[1].sg_cpu, buf->dmas[1].sg_dma);
 fail_u:
-	dma_free_coherent(pcdev->dev, buf->dmas[0].sg_size,
+	dma_free_coherent(pcdev->soc_host.dev, buf->dmas[0].sg_size,
 			  buf->dmas[0].sg_cpu, buf->dmas[0].sg_dma);
 fail:
 	free_buffer(vq, buf);
@@ -550,7 +548,7 @@ static void pxa_dma_start_channels(struct pxa_camera_dev *pcdev)
 	active = pcdev->active;
 
 	for (i = 0; i < pcdev->channels; i++) {
-		dev_dbg(pcdev->dev, "%s (channel=%d) ddadr=%08x\n", __func__,
+		dev_dbg(pcdev->soc_host.dev, "%s (channel=%d) ddadr=%08x\n", __func__,
 			i, active->dmas[i].sg_dma);
 		DDADR(pcdev->dma_chans[i]) = active->dmas[i].sg_dma;
 		DCSR(pcdev->dma_chans[i]) = DCSR_RUN;
@@ -562,7 +560,7 @@ static void pxa_dma_stop_channels(struct pxa_camera_dev *pcdev)
 	int i;
 
 	for (i = 0; i < pcdev->channels; i++) {
-		dev_dbg(pcdev->dev, "%s (channel=%d)\n", __func__, i);
+		dev_dbg(pcdev->soc_host.dev, "%s (channel=%d)\n", __func__, i);
 		DCSR(pcdev->dma_chans[i]) = 0;
 	}
 }
@@ -598,7 +596,7 @@ static void pxa_camera_start_capture(struct pxa_camera_dev *pcdev)
 {
 	unsigned long cicr0, cifr;
 
-	dev_dbg(pcdev->dev, "%s\n", __func__);
+	dev_dbg(pcdev->soc_host.dev, "%s\n", __func__);
 	/* Reset the FIFOs */
 	cifr = __raw_readl(pcdev->base + CIFR) | CIFR_RESET_F;
 	__raw_writel(cifr, pcdev->base + CIFR);
@@ -618,7 +616,7 @@ static void pxa_camera_stop_capture(struct pxa_camera_dev *pcdev)
 	__raw_writel(cicr0, pcdev->base + CICR0);
 
 	pcdev->active = NULL;
-	dev_dbg(pcdev->dev, "%s\n", __func__);
+	dev_dbg(pcdev->soc_host.dev, "%s\n", __func__);
 }
 
 static void pxa_videobuf_queue(struct videobuf_queue *vq,
@@ -687,7 +685,7 @@ static void pxa_camera_wakeup(struct pxa_camera_dev *pcdev,
 	do_gettimeofday(&vb->ts);
 	vb->field_count++;
 	wake_up(&vb->done);
-	dev_dbg(pcdev->dev, "%s dequeud buffer (vb=0x%p)\n", __func__, vb);
+	dev_dbg(pcdev->soc_host.dev, "%s dequeud buffer (vb=0x%p)\n", __func__, vb);
 
 	if (list_empty(&pcdev->capture)) {
 		pxa_camera_stop_capture(pcdev);
@@ -723,7 +721,7 @@ static void pxa_camera_check_link_miss(struct pxa_camera_dev *pcdev)
 	for (i = 0; i < pcdev->channels; i++)
 		if (DDADR(pcdev->dma_chans[i]) != DDADR_STOP)
 			is_dma_stopped = 0;
-	dev_dbg(pcdev->dev, "%s : top queued buffer=%p, dma_stopped=%d\n",
+	dev_dbg(pcdev->soc_host.dev, "%s : top queued buffer=%p, dma_stopped=%d\n",
 		__func__, pcdev->active, is_dma_stopped);
 	if (pcdev->active && is_dma_stopped)
 		pxa_camera_start_capture(pcdev);
@@ -748,12 +746,12 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 		overrun |= CISR_IFO_1 | CISR_IFO_2;
 
 	if (status & DCSR_BUSERR) {
-		dev_err(pcdev->dev, "DMA Bus Error IRQ!\n");
+		dev_err(pcdev->soc_host.dev, "DMA Bus Error IRQ!\n");
 		goto out;
 	}
 
 	if (!(status & (DCSR_ENDINTR | DCSR_STARTINTR))) {
-		dev_err(pcdev->dev, "Unknown DMA IRQ source, "
+		dev_err(pcdev->soc_host.dev, "Unknown DMA IRQ source, "
 			"status: 0x%08x\n", status);
 		goto out;
 	}
@@ -777,7 +775,7 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 	buf = container_of(vb, struct pxa_buffer, vb);
 	WARN_ON(buf->inwork || list_empty(&vb->queue));
 
-	dev_dbg(pcdev->dev, "%s channel=%d %s%s(vb=0x%p) dma.desc=%x\n",
+	dev_dbg(pcdev->soc_host.dev, "%s channel=%d %s%s(vb=0x%p) dma.desc=%x\n",
 		__func__, channel, status & DCSR_STARTINTR ? "SOF " : "",
 		status & DCSR_ENDINTR ? "EOF " : "", vb, DDADR(channel));
 
@@ -788,7 +786,7 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 		 */
 		if (camera_status & overrun &&
 		    !list_is_last(pcdev->capture.next, &pcdev->capture)) {
-			dev_dbg(pcdev->dev, "FIFO overrun! CISR: %x\n",
+			dev_dbg(pcdev->soc_host.dev, "FIFO overrun! CISR: %x\n",
 				camera_status);
 			pxa_camera_stop_capture(pcdev);
 			pxa_camera_start_capture(pcdev);
@@ -855,7 +853,7 @@ static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
 	/* mclk <= ciclk / 4 (27.4.2) */
 	if (mclk > lcdclk / 4) {
 		mclk = lcdclk / 4;
-		dev_warn(pcdev->dev, "Limiting master clock to %lu\n", mclk);
+		dev_warn(pcdev->soc_host.dev, "Limiting master clock to %lu\n", mclk);
 	}
 
 	/* We verify mclk != 0, so if anyone breaks it, here comes their Oops */
@@ -865,7 +863,7 @@ static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		pcdev->mclk = lcdclk / (2 * (div + 1));
 
-	dev_dbg(pcdev->dev, "LCD clock %luHz, target freq %luHz, "
+	dev_dbg(pcdev->soc_host.dev, "LCD clock %luHz, target freq %luHz, "
 		"divisor %u\n", lcdclk, mclk, div);
 
 	return div;
@@ -885,12 +883,12 @@ static void pxa_camera_activate(struct pxa_camera_dev *pcdev)
 	struct pxacamera_platform_data *pdata = pcdev->pdata;
 	u32 cicr4 = 0;
 
-	dev_dbg(pcdev->dev, "Registered platform device at %p data %p\n",
+	dev_dbg(pcdev->soc_host.dev, "Registered platform device at %p data %p\n",
 		pcdev, pdata);
 
 	if (pdata && pdata->init) {
-		dev_dbg(pcdev->dev, "%s: Init gpios\n", __func__);
-		pdata->init(pcdev->dev);
+		dev_dbg(pcdev->soc_host.dev, "%s: Init gpios\n", __func__);
+		pdata->init(pcdev->soc_host.dev);
 	}
 
 	/* disable all interrupts */
@@ -932,7 +930,7 @@ static irqreturn_t pxa_camera_irq(int irq, void *data)
 	struct videobuf_buffer *vb;
 
 	status = __raw_readl(pcdev->base + CISR);
-	dev_dbg(pcdev->dev, "Camera interrupt status 0x%lx\n", status);
+	dev_dbg(pcdev->soc_host.dev, "Camera interrupt status 0x%lx\n", status);
 
 	if (!status)
 		return IRQ_NONE;
@@ -1260,7 +1258,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(&ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->dev, "Providing format %s using %s\n",
 				pxa_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -1275,7 +1273,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(&ici->dev, "Providing format %s packed\n",
+			dev_dbg(ici->dev, "Providing format %s packed\n",
 				icd->formats[idx].name);
 		}
 		break;
@@ -1287,7 +1285,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(&ici->dev,
+			dev_dbg(ici->dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -1316,11 +1314,11 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	icd->sense = NULL;
 
 	if (ret < 0) {
-		dev_warn(&ici->dev, "Failed to crop to %ux%u@%u:%u\n",
+		dev_warn(ici->dev, "Failed to crop to %ux%u@%u:%u\n",
 			 rect->width, rect->height, rect->left, rect->top);
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
-			dev_err(&ici->dev,
+			dev_err(ici->dev,
 				"pixel clock %lu set by the camera too high!",
 				sense.pixel_clock);
 			return -EIO;
@@ -1348,7 +1346,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -1364,11 +1362,11 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 	icd->sense = NULL;
 
 	if (ret < 0) {
-		dev_warn(&ici->dev, "Failed to configure for format %x\n",
+		dev_warn(ici->dev, "Failed to configure for format %x\n",
 			 pix->pixelformat);
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
-			dev_err(&ici->dev,
+			dev_err(ici->dev,
 				"pixel clock %lu set by the camera too high!",
 				sense.pixel_clock);
 			return -EIO;
@@ -1396,7 +1394,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -1581,7 +1579,6 @@ static int pxa_camera_probe(struct platform_device *pdev)
 		goto exit_kfree;
 	}
 
-	platform_set_drvdata(pdev, pcdev);
 	pcdev->res = res;
 
 	pcdev->pdata = pdev->dev.platform_data;
@@ -1602,7 +1599,6 @@ static int pxa_camera_probe(struct platform_device *pdev)
 		pcdev->mclk = 20000000;
 	}
 
-	pcdev->dev = &pdev->dev;
 	pcdev->mclk_divisor = mclk_get_divisor(pcdev);
 
 	INIT_LIST_HEAD(&pcdev->capture);
@@ -1629,29 +1625,29 @@ static int pxa_camera_probe(struct platform_device *pdev)
 	err = pxa_request_dma("CI_Y", DMA_PRIO_HIGH,
 			      pxa_camera_dma_irq_y, pcdev);
 	if (err < 0) {
-		dev_err(pcdev->dev, "Can't request DMA for Y\n");
+		dev_err(&pdev->dev, "Can't request DMA for Y\n");
 		goto exit_iounmap;
 	}
 	pcdev->dma_chans[0] = err;
-	dev_dbg(pcdev->dev, "got DMA channel %d\n", pcdev->dma_chans[0]);
+	dev_dbg(&pdev->dev, "got DMA channel %d\n", pcdev->dma_chans[0]);
 
 	err = pxa_request_dma("CI_U", DMA_PRIO_HIGH,
 			      pxa_camera_dma_irq_u, pcdev);
 	if (err < 0) {
-		dev_err(pcdev->dev, "Can't request DMA for U\n");
+		dev_err(&pdev->dev, "Can't request DMA for U\n");
 		goto exit_free_dma_y;
 	}
 	pcdev->dma_chans[1] = err;
-	dev_dbg(pcdev->dev, "got DMA channel (U) %d\n", pcdev->dma_chans[1]);
+	dev_dbg(&pdev->dev, "got DMA channel (U) %d\n", pcdev->dma_chans[1]);
 
 	err = pxa_request_dma("CI_V", DMA_PRIO_HIGH,
 			      pxa_camera_dma_irq_v, pcdev);
 	if (err < 0) {
-		dev_err(pcdev->dev, "Can't request DMA for V\n");
+		dev_err(&pdev->dev, "Can't request DMA for V\n");
 		goto exit_free_dma_u;
 	}
 	pcdev->dma_chans[2] = err;
-	dev_dbg(pcdev->dev, "got DMA channel (V) %d\n", pcdev->dma_chans[2]);
+	dev_dbg(&pdev->dev, "got DMA channel (V) %d\n", pcdev->dma_chans[2]);
 
 	DRCMR(68) = pcdev->dma_chans[0] | DRCMR_MAPVLD;
 	DRCMR(69) = pcdev->dma_chans[1] | DRCMR_MAPVLD;
@@ -1661,15 +1657,16 @@ static int pxa_camera_probe(struct platform_device *pdev)
 	err = request_irq(pcdev->irq, pxa_camera_irq, 0, PXA_CAM_DRV_NAME,
 			  pcdev);
 	if (err) {
-		dev_err(pcdev->dev, "Camera interrupt register failed \n");
+		dev_err(&pdev->dev, "Camera interrupt register failed \n");
 		goto exit_free_dma;
 	}
 
 	pcdev->soc_host.drv_name	= PXA_CAM_DRV_NAME;
 	pcdev->soc_host.ops		= &pxa_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.dev.parent	= &pdev->dev;
+	pcdev->soc_host.dev		= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
+
 	err = soc_camera_host_register(&pcdev->soc_host);
 	if (err)
 		goto exit_free_irq;
@@ -1698,7 +1695,9 @@ exit:
 
 static int __devexit pxa_camera_remove(struct platform_device *pdev)
 {
-	struct pxa_camera_dev *pcdev = platform_get_drvdata(pdev);
+	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
+	struct pxa_camera_dev *pcdev = container_of(soc_host,
+					struct pxa_camera_dev, soc_host);
 	struct resource *res;
 
 	clk_put(pcdev->clk);
@@ -1708,7 +1707,7 @@ static int __devexit pxa_camera_remove(struct platform_device *pdev)
 	pxa_free_dma(pcdev->dma_chans[2]);
 	free_irq(pcdev->irq, pcdev);
 
-	soc_camera_host_unregister(&pcdev->soc_host);
+	soc_camera_host_unregister(soc_host);
 
 	iounmap(pcdev->base);
 
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 8e4a8fca976..d369e8409ab 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -81,7 +81,6 @@ struct sh_mobile_ceu_buffer {
 };
 
 struct sh_mobile_ceu_dev {
-	struct device *dev;
 	struct soc_camera_host ici;
 	struct soc_camera_device *icd;
 
@@ -617,7 +616,7 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(&ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->dev, "Providing format %s using %s\n",
 				sh_mobile_ceu_formats[k].name,
 				icd->formats[idx].name);
 		}
@@ -630,7 +629,7 @@ add_single_format:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(&ici->dev,
+			dev_dbg(ici->dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -657,7 +656,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -684,7 +683,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(&ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -782,7 +781,7 @@ static void sh_mobile_ceu_init_videobuf(struct videobuf_queue *q,
 
 	videobuf_queue_dma_contig_init(q,
 				       &sh_mobile_ceu_videobuf_ops,
-				       &ici->dev, &pcdev->lock,
+				       ici->dev, &pcdev->lock,
 				       V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				       pcdev->is_interlaced ?
 				       V4L2_FIELD_INTERLACED : V4L2_FIELD_NONE,
@@ -829,7 +828,6 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 		goto exit;
 	}
 
-	platform_set_drvdata(pdev, pcdev);
 	INIT_LIST_HEAD(&pcdev->capture);
 	spin_lock_init(&pcdev->lock);
 
@@ -850,7 +848,6 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	pcdev->irq = irq;
 	pcdev->base = base;
 	pcdev->video_limit = 0; /* only enabled if second resource exists */
-	pcdev->dev = &pdev->dev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (res) {
@@ -885,7 +882,7 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	}
 
 	pcdev->ici.priv = pcdev;
-	pcdev->ici.dev.parent = &pdev->dev;
+	pcdev->ici.dev = &pdev->dev;
 	pcdev->ici.nr = pdev->id;
 	pcdev->ici.drv_name = dev_name(&pdev->dev);
 	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
@@ -913,9 +910,11 @@ exit:
 
 static int sh_mobile_ceu_remove(struct platform_device *pdev)
 {
-	struct sh_mobile_ceu_dev *pcdev = platform_get_drvdata(pdev);
+	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
+	struct sh_mobile_ceu_dev *pcdev = container_of(soc_host,
+					struct sh_mobile_ceu_dev, ici);
 
-	soc_camera_host_unregister(&pcdev->ici);
+	soc_camera_host_unregister(soc_host);
 	clk_put(pcdev->clk);
 	free_irq(pcdev->irq, pcdev);
 	if (platform_get_resource(pdev, IORESOURCE_MEM, 1))
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 0e890cc2337..2d341f537d5 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -279,7 +279,7 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return ret;
 	} else if (!icd->current_fmt ||
 		   icd->current_fmt->fourcc != pix->pixelformat) {
-		dev_err(&ici->dev,
+		dev_err(ici->dev,
 			"Host driver hasn't set up current format correctly!\n");
 		return -EINVAL;
 	}
@@ -794,7 +794,7 @@ static void scan_add_host(struct soc_camera_host *ici)
 
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->iface == ici->nr) {
-			icd->dev.parent = &ici->dev;
+			icd->dev.parent = ici->dev;
 			device_register_link(icd);
 		}
 	}
@@ -818,7 +818,7 @@ static int scan_add_device(struct soc_camera_device *icd)
 	list_for_each_entry(ici, &hosts, list) {
 		if (icd->iface == ici->nr) {
 			ret = 1;
-			icd->dev.parent = &ici->dev;
+			icd->dev.parent = ici->dev;
 			break;
 		}
 	}
@@ -952,7 +952,6 @@ static void dummy_release(struct device *dev)
 
 int soc_camera_host_register(struct soc_camera_host *ici)
 {
-	int ret;
 	struct soc_camera_host *ix;
 
 	if (!ici || !ici->ops ||
@@ -965,12 +964,10 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	    !ici->ops->reqbufs ||
 	    !ici->ops->add ||
 	    !ici->ops->remove ||
-	    !ici->ops->poll)
+	    !ici->ops->poll ||
+	    !ici->dev)
 		return -EINVAL;
 
-	/* Number might be equal to the platform device ID */
-	dev_set_name(&ici->dev, "camera_host%d", ici->nr);
-
 	mutex_lock(&list_lock);
 	list_for_each_entry(ix, &hosts, list) {
 		if (ix->nr == ici->nr) {
@@ -979,26 +976,14 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 		}
 	}
 
+	dev_set_drvdata(ici->dev, ici);
+
 	list_add_tail(&ici->list, &hosts);
 	mutex_unlock(&list_lock);
 
-	ici->dev.release = dummy_release;
-
-	ret = device_register(&ici->dev);
-
-	if (ret)
-		goto edevr;
-
 	scan_add_host(ici);
 
 	return 0;
-
-edevr:
-	mutex_lock(&list_lock);
-	list_del(&ici->list);
-	mutex_unlock(&list_lock);
-
-	return ret;
 }
 EXPORT_SYMBOL(soc_camera_host_register);
 
@@ -1012,7 +997,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 	list_del(&ici->list);
 
 	list_for_each_entry(icd, &devices, list) {
-		if (icd->dev.parent == &ici->dev) {
+		if (icd->dev.parent == ici->dev) {
 			device_unregister(&icd->dev);
 			/* Not before device_unregister(), .remove
 			 * needs parent to call ici->ops->remove() */
@@ -1023,7 +1008,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 
 	mutex_unlock(&list_lock);
 
-	device_unregister(&ici->dev);
+	dev_set_drvdata(ici->dev, NULL);
 }
 EXPORT_SYMBOL(soc_camera_host_unregister);
 
@@ -1130,7 +1115,7 @@ int soc_camera_video_start(struct soc_camera_device *icd)
 	vdev = video_device_alloc();
 	if (!vdev)
 		goto evidallocd;
-	dev_dbg(&ici->dev, "Allocated video_device %p\n", vdev);
+	dev_dbg(ici->dev, "Allocated video_device %p\n", vdev);
 
 	strlcpy(vdev->name, ici->drv_name, sizeof(vdev->name));
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 396c32550e0..bef5e81d693 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -60,7 +60,7 @@ struct soc_camera_file {
 
 struct soc_camera_host {
 	struct list_head list;
-	struct device dev;
+	struct device *dev;
 	unsigned char nr;				/* Host number */
 	void *priv;
 	const char *drv_name;
@@ -117,7 +117,7 @@ static inline struct soc_camera_device *to_soc_camera_dev(struct device *dev)
 
 static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
 {
-	return container_of(dev, struct soc_camera_host, dev);
+	return dev_get_drvdata(dev);
 }
 
 extern int soc_camera_host_register(struct soc_camera_host *ici);
-- 
cgit v1.2.3-70-g09d2


From 102e78136446faca7d7d241b628c5bd0e0d61d5d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 2 May 2009 10:12:50 -0300
Subject: V4L/DVB (11671): v4l2: add v4l2_device_set_name()

Add a utility function that can be used to setup the v4l2_device's name
field in a standard manner.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt |  5 +++++
 drivers/media/video/v4l2-device.c            | 16 ++++++++++++++++
 include/media/v4l2-device.h                  | 21 +++++++++++++++++++++
 3 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 854808b67fa..d54c1e4c6a9 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -89,6 +89,11 @@ from dev (driver name followed by the bus_id, to be precise). If you set it
 up before calling v4l2_device_register then it will be untouched. If dev is
 NULL, then you *must* setup v4l2_dev->name before calling v4l2_device_register.
 
+You can use v4l2_device_set_name() to set the name based on a driver name and
+a driver-global atomic_t instance. This will generate names like ivtv0, ivtv1,
+etc. If the name ends with a digit, then it will insert a dash: cx18-0,
+cx18-1, etc. This function returns the instance number.
+
 The first 'dev' argument is normally the struct device pointer of a pci_dev,
 usb_interface or platform_device. It is rare for dev to be NULL, but it happens
 with ISA devices or when one device creates multiple PCI devices, thus making
diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c
index 94aa485ade5..ffb425f7c24 100644
--- a/drivers/media/video/v4l2-device.c
+++ b/drivers/media/video/v4l2-device.c
@@ -49,6 +49,22 @@ int v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev)
 }
 EXPORT_SYMBOL_GPL(v4l2_device_register);
 
+int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename,
+						atomic_t *instance)
+{
+	int num = atomic_inc_return(instance) - 1;
+	int len = strlen(basename);
+
+	if (basename[len - 1] >= '0' && basename[len - 1] <= '9')
+		snprintf(v4l2_dev->name, sizeof(v4l2_dev->name),
+				"%s-%d", basename, num);
+	else
+		snprintf(v4l2_dev->name, sizeof(v4l2_dev->name),
+				"%s%d", basename, num);
+	return num;
+}
+EXPORT_SYMBOL_GPL(v4l2_device_set_name);
+
 void v4l2_device_disconnect(struct v4l2_device *v4l2_dev)
 {
 	if (v4l2_dev->dev) {
diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index 9afd39fb2cf..5d5d550e63a 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -53,10 +53,31 @@ struct v4l2_device {
    dev may be NULL in rare cases (ISA devices). In that case you
    must fill in the v4l2_dev->name field before calling this function. */
 int __must_check v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev);
+
+/* Optional function to initialize the name field of struct v4l2_device using
+   the driver name and a driver-global atomic_t instance.
+   This function will increment the instance counter and returns the instance
+   value used in the name.
+
+   Example:
+
+   static atomic_t drv_instance = ATOMIC_INIT(0);
+
+   ...
+
+   instance = v4l2_device_set_name(&v4l2_dev, "foo", &drv_instance);
+
+   The first time this is called the name field will be set to foo0 and
+   this function returns 0. If the name ends with a digit (e.g. cx18),
+   then the name will be set to cx18-0 since cx180 looks really odd. */
+int v4l2_device_set_name(struct v4l2_device *v4l2_dev, const char *basename,
+						atomic_t *instance);
+
 /* Set v4l2_dev->dev to NULL. Call when the USB parent disconnects.
    Since the parent disappears this ensures that v4l2_dev doesn't have an
    invalid parent pointer. */
 void v4l2_device_disconnect(struct v4l2_device *v4l2_dev);
+
 /* Unregister all sub-devices and any other resources related to v4l2_dev. */
 void v4l2_device_unregister(struct v4l2_device *v4l2_dev);
 
-- 
cgit v1.2.3-70-g09d2


From b5b2b7ed569cedac4f5da38e08b01c88443187bd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 2 May 2009 10:58:51 -0300
Subject: V4L/DVB (11673): v4l2-device: unregister i2c_clients when
 unregistering the v4l2_device.

Until now I relied on i2c_del_adapter to unregister the i2c_clients for
me, however, if the i2c bus is a platform bus then it is never deleted.

So instead I need to unregister i2c clients when unregistering the
v4l2_device.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c |  1 +
 drivers/media/video/v4l2-device.c | 13 ++++++++++++-
 include/media/v4l2-subdev.h       |  5 +++++
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index e32de9e9b0f..f96475626da 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -746,6 +746,7 @@ void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
 		const struct v4l2_subdev_ops *ops)
 {
 	v4l2_subdev_init(sd, ops);
+	sd->flags |= V4L2_SUBDEV_FL_IS_I2C;
 	/* the owner is the same as the i2c_client's driver owner */
 	sd->owner = client->driver->driver.owner;
 	/* i2c_client and v4l2_subdev point to one another */
diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c
index ffb425f7c24..e1520bc84f7 100644
--- a/drivers/media/video/v4l2-device.c
+++ b/drivers/media/video/v4l2-device.c
@@ -83,8 +83,19 @@ void v4l2_device_unregister(struct v4l2_device *v4l2_dev)
 	v4l2_device_disconnect(v4l2_dev);
 
 	/* Unregister subdevs */
-	list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list)
+	list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list) {
 		v4l2_device_unregister_subdev(sd);
+		if (sd->flags & V4L2_SUBDEV_FL_IS_I2C) {
+			struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+			/* We need to unregister the i2c client explicitly.
+			   We cannot rely on i2c_del_adapter to always
+			   unregister clients for us, since if the i2c bus
+			   is a platform bus, then it is never deleted. */
+			if (client)
+				i2c_unregister_device(client);
+		}
+	}
 }
 EXPORT_SYMBOL_GPL(v4l2_device_unregister);
 
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 17856081c80..a503e1cee78 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -230,12 +230,16 @@ struct v4l2_subdev_ops {
 
 #define V4L2_SUBDEV_NAME_SIZE 32
 
+/* Set this flag if this subdev is a i2c device. */
+#define V4L2_SUBDEV_FL_IS_I2C (1U << 0)
+
 /* Each instance of a subdev driver should create this struct, either
    stand-alone or embedded in a larger struct.
  */
 struct v4l2_subdev {
 	struct list_head list;
 	struct module *owner;
+	u32 flags;
 	struct v4l2_device *v4l2_dev;
 	const struct v4l2_subdev_ops *ops;
 	/* name must be unique */
@@ -264,6 +268,7 @@ static inline void v4l2_subdev_init(struct v4l2_subdev *sd,
 	BUG_ON(!ops || !ops->core);
 	sd->ops = ops;
 	sd->v4l2_dev = NULL;
+	sd->flags = 0;
 	sd->name[0] = '\0';
 	sd->grp_id = 0;
 	sd->priv = NULL;
-- 
cgit v1.2.3-70-g09d2


From 0fd327bd0d1b508eb64da3876098f6f43bfc1509 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 7 May 2009 13:25:32 -0300
Subject: V4L/DVB (11705): soc-camera: prepare for the platform driver
 conversion

Add a platform driver to soc_camera.c. This way we preserve backwards
compatibility with existing platforms and can start converting them one by one
to the new platform-device soc-camera interface.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 71 +++++++++++++++++++++++++++++++++++++---
 include/media/soc_camera.h       |  5 +++
 2 files changed, 71 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 2d341f537d5..2014e9e32b3 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -16,19 +16,21 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/device.h>
-#include <linux/list.h>
 #include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 
+#include <media/soc_camera.h>
 #include <media/v4l2-common.h>
-#include <media/v4l2-ioctl.h>
 #include <media/v4l2-dev.h>
+#include <media/v4l2-ioctl.h>
 #include <media/videobuf-core.h>
-#include <media/soc_camera.h>
 
 /* Default to VGA resolution */
 #define DEFAULT_WIDTH	640
@@ -1159,6 +1161,57 @@ void soc_camera_video_stop(struct soc_camera_device *icd)
 }
 EXPORT_SYMBOL(soc_camera_video_stop);
 
+static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+{
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+	struct i2c_adapter *adap;
+	struct i2c_client *client;
+
+	if (!icl)
+		return -EINVAL;
+
+	adap = i2c_get_adapter(icl->i2c_adapter_id);
+	if (!adap) {
+		dev_warn(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
+			 icl->i2c_adapter_id);
+		/* -ENODEV and -ENXIO do not produce an error on probe()... */
+		return -ENOENT;
+	}
+
+	icl->board_info->platform_data = icl;
+	client = i2c_new_device(adap, icl->board_info);
+	if (!client) {
+		i2c_put_adapter(adap);
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, client);
+
+	return 0;
+}
+
+static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
+{
+	struct i2c_client *client = platform_get_drvdata(pdev);
+
+	if (!client)
+		return -ENODEV;
+
+	i2c_unregister_device(client);
+	i2c_put_adapter(client->adapter);
+
+	return 0;
+}
+
+static struct platform_driver __refdata soc_camera_pdrv = {
+	.probe	= soc_camera_pdrv_probe,
+	.remove	= __exit_p(soc_camera_pdrv_remove),
+	.driver	= {
+		.name = "soc-camera-pdrv",
+		.owner = THIS_MODULE,
+	},
+};
+
 static int __init soc_camera_init(void)
 {
 	int ret = bus_register(&soc_camera_bus_type);
@@ -1168,8 +1221,14 @@ static int __init soc_camera_init(void)
 	if (ret)
 		goto edrvr;
 
+	ret = platform_driver_register(&soc_camera_pdrv);
+	if (ret)
+		goto epdr;
+
 	return 0;
 
+epdr:
+	driver_unregister(&ic_drv);
 edrvr:
 	bus_unregister(&soc_camera_bus_type);
 	return ret;
@@ -1177,6 +1236,7 @@ edrvr:
 
 static void __exit soc_camera_exit(void)
 {
+	platform_driver_unregister(&soc_camera_pdrv);
 	driver_unregister(&ic_drv);
 	bus_unregister(&soc_camera_bus_type);
 }
@@ -1187,3 +1247,4 @@ module_exit(soc_camera_exit);
 MODULE_DESCRIPTION("Image capture bus driver");
 MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:soc-camera-pdrv");
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index bef5e81d693..23ecead35e7 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -92,11 +92,16 @@ struct soc_camera_host_ops {
 #define SOCAM_SENSOR_INVERT_VSYNC	(1 << 3)
 #define SOCAM_SENSOR_INVERT_DATA	(1 << 4)
 
+struct i2c_board_info;
+
 struct soc_camera_link {
 	/* Camera bus id, used to match a camera and a bus */
 	int bus_id;
 	/* Per camera SOCAM_SENSOR_* bus flags */
 	unsigned long flags;
+	int i2c_adapter_id;
+	struct i2c_board_info *board_info;
+	const char *module_name;
 	/* Optional callbacks to power on or off and reset the sensor */
 	int (*power)(struct device *, int);
 	int (*reset)(struct device *);
-- 
cgit v1.2.3-70-g09d2


From 40199c50b891d24d1a8f1d480f886680a3ac9b74 Mon Sep 17 00:00:00 2001
From: Chaithrika U S <chaithrika@ti.com>
Date: Thu, 7 May 2009 09:29:25 -0300
Subject: V4L/DVB (11742): TI THS7303 video amplifier driver code

This patch adds driver for TI THS7303 video amplifier. This driver is
implemented as a v4l2 sub device. Tested on TI DM646x EVM.

This version has updates based on review comments by Mauro Chehab.

Signed-off-by: Chaithrika U S <chaithrika@ti.com>
Reviewed-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   9 +++
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/ths7303.c   | 151 ++++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-chip-ident.h |   3 +
 4 files changed, 164 insertions(+)
 create mode 100644 drivers/media/video/ths7303.c

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 57835f5715f..3308462a835 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -440,6 +440,15 @@ config VIDEO_ADV7175
 	  To compile this driver as a module, choose M here: the
 	  module will be called adv7175.
 
+config VIDEO_THS7303
+	tristate "THS7303 Video Amplifier"
+	depends on I2C
+	help
+	  Support for TI THS7303 video amplifier
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ths7303.
+
 comment "Video improvement chips"
 
 config VIDEO_UPD64031A
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 7aefac64330..352dd6d3200 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_VIDEO_BT819) += bt819.o
 obj-$(CONFIG_VIDEO_BT856) += bt856.o
 obj-$(CONFIG_VIDEO_BT866) += bt866.o
 obj-$(CONFIG_VIDEO_KS0127) += ks0127.o
+obj-$(CONFIG_VIDEO_THS7303) += ths7303.o
 
 obj-$(CONFIG_VIDEO_ZORAN) += zoran/
 
diff --git a/drivers/media/video/ths7303.c b/drivers/media/video/ths7303.c
new file mode 100644
index 00000000000..21781f8a0e8
--- /dev/null
+++ b/drivers/media/video/ths7303.c
@@ -0,0 +1,151 @@
+/*
+ * ths7303- THS7303 Video Amplifier driver
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed .as is. WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <linux/i2c.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/videodev2.h>
+
+#include <media/v4l2-device.h>
+#include <media/v4l2-subdev.h>
+#include <media/v4l2-chip-ident.h>
+
+MODULE_DESCRIPTION("TI THS7303 video amplifier driver");
+MODULE_AUTHOR("Chaithrika U S");
+MODULE_LICENSE("GPL");
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug level 0-1");
+
+/* following function is used to set ths7303 */
+static int ths7303_setvalue(struct v4l2_subdev *sd, v4l2_std_id std)
+{
+	int err = 0;
+	u8 val;
+	struct i2c_client *client;
+
+	client = v4l2_get_subdevdata(sd);
+
+	if (std & (V4L2_STD_ALL & ~V4L2_STD_SECAM)) {
+		val = 0x02;
+		v4l2_dbg(1, debug, sd, "setting value for SDTV format\n");
+	} else {
+		val = 0x00;
+		v4l2_dbg(1, debug, sd, "disabling all channels\n");
+	}
+
+	err |= i2c_smbus_write_byte_data(client, 0x01, val);
+	err |= i2c_smbus_write_byte_data(client, 0x02, val);
+	err |= i2c_smbus_write_byte_data(client, 0x03, val);
+
+	if (err)
+		v4l2_err(sd, "write failed\n");
+
+	return err;
+}
+
+static int ths7303_s_std_output(struct v4l2_subdev *sd, v4l2_std_id norm)
+{
+	return ths7303_setvalue(sd, norm);
+}
+
+static int ths7303_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *chip)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_THS7303, 0);
+}
+
+static const struct v4l2_subdev_video_ops ths7303_video_ops = {
+	.s_std_output	= ths7303_s_std_output,
+};
+
+static const struct v4l2_subdev_core_ops ths7303_core_ops = {
+	.g_chip_ident = ths7303_g_chip_ident,
+};
+
+static const struct v4l2_subdev_ops ths7303_ops = {
+	.core	= &ths7303_core_ops,
+	.video 	= &ths7303_video_ops,
+};
+
+static int ths7303_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct v4l2_subdev *sd;
+	v4l2_std_id std_id = V4L2_STD_NTSC;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	v4l_info(client, "chip found @ 0x%x (%s)\n",
+			client->addr << 1, client->adapter->name);
+
+	sd = kzalloc(sizeof(struct v4l2_subdev), GFP_KERNEL);
+	if (sd == NULL)
+		return -ENOMEM;
+
+	v4l2_i2c_subdev_init(sd, client, &ths7303_ops);
+
+	return ths7303_setvalue(sd, std_id);
+}
+
+static int ths7303_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+	v4l2_device_unregister_subdev(sd);
+	kfree(sd);
+
+	return 0;
+}
+
+static const struct i2c_device_id ths7303_id[] = {
+	{"ths7303", 0},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, ths7303_id);
+
+static struct i2c_driver ths7303_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "ths7303",
+	},
+	.probe		= ths7303_probe,
+	.remove		= ths7303_remove,
+	.id_table	= ths7303_id,
+};
+
+static int __init ths7303_init(void)
+{
+	return i2c_add_driver(&ths7303_driver);
+}
+
+static void __exit ths7303_exit(void)
+{
+	i2c_del_driver(&ths7303_driver);
+}
+
+module_init(ths7303_init);
+module_exit(ths7303_exit);
+
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 1be461a2907..fbeb98f8ee2 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -137,6 +137,9 @@ enum {
 	/* module saa7191: just ident 7191 */
 	V4L2_IDENT_SAA7191 = 7191,
 
+	/* module ths7303: just ident 7303 */
+	V4L2_IDENT_THS7303 = 7303,
+
 	/* module wm8739: just ident 8739 */
 	V4L2_IDENT_WM8739 = 8739,
 
-- 
cgit v1.2.3-70-g09d2


From 06e61f8d5f5df68104168ac20d0527ecee13638a Mon Sep 17 00:00:00 2001
From: Chaithrika U S <chaithrika@ti.com>
Date: Thu, 7 May 2009 09:30:01 -0300
Subject: V4L/DVB (11743): Analog Devices ADV7343 video encoder driver

Add ADV7343 I2C based video encoder driver. This follows the
v4l2-subdev framework. This driver has been tested on TI DM646x EVM. It
has been tested for Composite and Component outputs.

Updates as per review by Mauro Chehab, added support for more standards
supported by the encoder. Also adding the missed out signed-offs.Tested
only NTSC and PAL standards.

[hverkuil@xs4all.nl: s_routing API changed, updated driver to use new API]
Signed-off-by: Manjunath Hadli <mrh@ti.com>
Signed-off-by: Brijesh Jadav <brijesh.j@ti.com>
Signed-off-by: Chaithrika U S <chaithrika@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig        |   9 +
 drivers/media/video/Makefile       |   1 +
 drivers/media/video/adv7343.c      | 534 +++++++++++++++++++++++++++++++++++++
 drivers/media/video/adv7343_regs.h | 185 +++++++++++++
 include/media/adv7343.h            |  23 ++
 include/media/v4l2-chip-ident.h    |   3 +
 6 files changed, 755 insertions(+)
 create mode 100644 drivers/media/video/adv7343.c
 create mode 100644 drivers/media/video/adv7343_regs.h
 create mode 100644 include/media/adv7343.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 3308462a835..af576eaeab7 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -449,6 +449,15 @@ config VIDEO_THS7303
 	  To compile this driver as a module, choose M here: the
 	  module will be called ths7303.
 
+config VIDEO_ADV7343
+	tristate "ADV7343 video encoder"
+	depends on I2C
+	help
+	  Support for Analog Devices I2C bus based ADV7343 encoder.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adv7343.
+
 comment "Video improvement chips"
 
 config VIDEO_UPD64031A
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 352dd6d3200..f7d9a4c6f45 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_VIDEO_SAA7185) += saa7185.o
 obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
 obj-$(CONFIG_VIDEO_ADV7170) += adv7170.o
 obj-$(CONFIG_VIDEO_ADV7175) += adv7175.o
+obj-$(CONFIG_VIDEO_ADV7343) += adv7343.o
 obj-$(CONFIG_VIDEO_VPX3220) += vpx3220.o
 obj-$(CONFIG_VIDEO_BT819) += bt819.o
 obj-$(CONFIG_VIDEO_BT856) += bt856.o
diff --git a/drivers/media/video/adv7343.c b/drivers/media/video/adv7343.c
new file mode 100644
index 00000000000..30f5caf5dda
--- /dev/null
+++ b/drivers/media/video/adv7343.c
@@ -0,0 +1,534 @@
+/*
+ * adv7343 - ADV7343 Video Encoder Driver
+ *
+ * The encoder hardware does not support SECAM.
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed .as is. WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <linux/i2c.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/videodev2.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+
+#include <media/adv7343.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-chip-ident.h>
+
+#include "adv7343_regs.h"
+
+MODULE_DESCRIPTION("ADV7343 video encoder driver");
+MODULE_LICENSE("GPL");
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug level 0-1");
+
+struct adv7343_state {
+	struct v4l2_subdev sd;
+	u8 reg00;
+	u8 reg01;
+	u8 reg02;
+	u8 reg35;
+	u8 reg80;
+	u8 reg82;
+	int bright;
+	int hue;
+	int gain;
+	u32 output;
+	v4l2_std_id std;
+};
+
+static inline struct adv7343_state *to_state(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct adv7343_state, sd);
+}
+
+static inline int adv7343_write(struct v4l2_subdev *sd, u8 reg, u8 value)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return i2c_smbus_write_byte_data(client, reg, value);
+}
+
+static const u8 adv7343_init_reg_val[] = {
+	ADV7343_SOFT_RESET, ADV7343_SOFT_RESET_DEFAULT,
+	ADV7343_POWER_MODE_REG, ADV7343_POWER_MODE_REG_DEFAULT,
+
+	ADV7343_HD_MODE_REG1, ADV7343_HD_MODE_REG1_DEFAULT,
+	ADV7343_HD_MODE_REG2, ADV7343_HD_MODE_REG2_DEFAULT,
+	ADV7343_HD_MODE_REG3, ADV7343_HD_MODE_REG3_DEFAULT,
+	ADV7343_HD_MODE_REG4, ADV7343_HD_MODE_REG4_DEFAULT,
+	ADV7343_HD_MODE_REG5, ADV7343_HD_MODE_REG5_DEFAULT,
+	ADV7343_HD_MODE_REG6, ADV7343_HD_MODE_REG6_DEFAULT,
+	ADV7343_HD_MODE_REG7, ADV7343_HD_MODE_REG7_DEFAULT,
+
+	ADV7343_SD_MODE_REG1, ADV7343_SD_MODE_REG1_DEFAULT,
+	ADV7343_SD_MODE_REG2, ADV7343_SD_MODE_REG2_DEFAULT,
+	ADV7343_SD_MODE_REG3, ADV7343_SD_MODE_REG3_DEFAULT,
+	ADV7343_SD_MODE_REG4, ADV7343_SD_MODE_REG4_DEFAULT,
+	ADV7343_SD_MODE_REG5, ADV7343_SD_MODE_REG5_DEFAULT,
+	ADV7343_SD_MODE_REG6, ADV7343_SD_MODE_REG6_DEFAULT,
+	ADV7343_SD_MODE_REG7, ADV7343_SD_MODE_REG7_DEFAULT,
+	ADV7343_SD_MODE_REG8, ADV7343_SD_MODE_REG8_DEFAULT,
+
+	ADV7343_SD_HUE_REG, ADV7343_SD_HUE_REG_DEFAULT,
+	ADV7343_SD_CGMS_WSS0, ADV7343_SD_CGMS_WSS0_DEFAULT,
+	ADV7343_SD_BRIGHTNESS_WSS, ADV7343_SD_BRIGHTNESS_WSS_DEFAULT,
+};
+
+/*
+ * 			    2^32
+ * FSC(reg) =  FSC (HZ) * --------
+ *			  27000000
+ */
+static const struct adv7343_std_info stdinfo[] = {
+	{
+		/* FSC(Hz) = 3,579,545.45 Hz */
+		SD_STD_NTSC, 569408542, V4L2_STD_NTSC,
+	}, {
+		/* FSC(Hz) = 3,575,611.00 Hz */
+		SD_STD_PAL_M, 568782678, V4L2_STD_PAL_M,
+	}, {
+		/* FSC(Hz) = 3,582,056.00 */
+		SD_STD_PAL_N, 569807903, V4L2_STD_PAL_Nc,
+	}, {
+		/* FSC(Hz) = 4,433,618.75 Hz */
+		SD_STD_PAL_N, 705268427, V4L2_STD_PAL_N,
+	}, {
+		/* FSC(Hz) = 4,433,618.75 Hz */
+		SD_STD_PAL_BDGHI, 705268427, V4L2_STD_PAL,
+	}, {
+		/* FSC(Hz) = 4,433,618.75 Hz */
+		SD_STD_NTSC, 705268427, V4L2_STD_NTSC_443,
+	}, {
+		/* FSC(Hz) = 4,433,618.75 Hz */
+		SD_STD_PAL_M, 705268427, V4L2_STD_PAL_60,
+	},
+};
+
+static int adv7343_setstd(struct v4l2_subdev *sd, v4l2_std_id std)
+{
+	struct adv7343_state *state = to_state(sd);
+	struct adv7343_std_info *std_info;
+	int output_idx, num_std;
+	char *fsc_ptr;
+	u8 reg, val;
+	int err = 0;
+	int i = 0;
+
+	output_idx = state->output;
+
+	std_info = (struct adv7343_std_info *)stdinfo;
+	num_std = ARRAY_SIZE(stdinfo);
+
+	for (i = 0; i < num_std; i++) {
+		if (std_info[i].stdid & std)
+			break;
+	}
+
+	if (i == num_std) {
+		v4l2_dbg(1, debug, sd,
+				"Invalid std or std is not supported: %llx\n",
+						(unsigned long long)std);
+		return -EINVAL;
+	}
+
+	/* Set the standard */
+	val = state->reg80 & (~(SD_STD_MASK));
+	val |= std_info[i].standard_val3;
+	err = adv7343_write(sd, ADV7343_SD_MODE_REG1, val);
+	if (err < 0)
+		goto setstd_exit;
+
+	state->reg80 = val;
+
+	/* Configure the input mode register */
+	val = state->reg01 & (~((u8) INPUT_MODE_MASK));
+	val |= SD_INPUT_MODE;
+	err = adv7343_write(sd, ADV7343_MODE_SELECT_REG, val);
+	if (err < 0)
+		goto setstd_exit;
+
+	state->reg01 = val;
+
+	/* Program the sub carrier frequency registers */
+	fsc_ptr = (unsigned char *)&std_info[i].fsc_val;
+	reg = ADV7343_FSC_REG0;
+	for (i = 0; i < 4; i++, reg++, fsc_ptr++) {
+		err = adv7343_write(sd, reg, *fsc_ptr);
+		if (err < 0)
+			goto setstd_exit;
+	}
+
+	val = state->reg80;
+
+	/* Filter settings */
+	if (std & (V4L2_STD_NTSC | V4L2_STD_NTSC_443))
+		val &= 0x03;
+	else if (std & ~V4L2_STD_SECAM)
+		val |= 0x04;
+
+	err = adv7343_write(sd, ADV7343_SD_MODE_REG1, val);
+	if (err < 0)
+		goto setstd_exit;
+
+	state->reg80 = val;
+
+setstd_exit:
+	if (err != 0)
+		v4l2_err(sd, "Error setting std, write failed\n");
+
+	return err;
+}
+
+static int adv7343_setoutput(struct v4l2_subdev *sd, u32 output_type)
+{
+	struct adv7343_state *state = to_state(sd);
+	unsigned char val;
+	int err = 0;
+
+	if (output_type > ADV7343_SVIDEO_ID) {
+		v4l2_dbg(1, debug, sd,
+			"Invalid output type or output type not supported:%d\n",
+								output_type);
+		return -EINVAL;
+	}
+
+	/* Enable Appropriate DAC */
+	val = state->reg00 & 0x03;
+
+	if (output_type == ADV7343_COMPOSITE_ID)
+		val |= ADV7343_COMPOSITE_POWER_VALUE;
+	else if (output_type == ADV7343_COMPONENT_ID)
+		val |= ADV7343_COMPONENT_POWER_VALUE;
+	else
+		val |= ADV7343_SVIDEO_POWER_VALUE;
+
+	err = adv7343_write(sd, ADV7343_POWER_MODE_REG, val);
+	if (err < 0)
+		goto setoutput_exit;
+
+	state->reg00 = val;
+
+	/* Enable YUV output */
+	val = state->reg02 | YUV_OUTPUT_SELECT;
+	err = adv7343_write(sd, ADV7343_MODE_REG0, val);
+	if (err < 0)
+		goto setoutput_exit;
+
+	state->reg02 = val;
+
+	/* configure SD DAC Output 2 and SD DAC Output 1 bit to zero */
+	val = state->reg82 & (SD_DAC_1_DI & SD_DAC_2_DI);
+	err = adv7343_write(sd, ADV7343_SD_MODE_REG2, val);
+	if (err < 0)
+		goto setoutput_exit;
+
+	state->reg82 = val;
+
+	/* configure ED/HD Color DAC Swap and ED/HD RGB Input Enable bit to
+	 * zero */
+	val = state->reg35 & (HD_RGB_INPUT_DI & HD_DAC_SWAP_DI);
+	err = adv7343_write(sd, ADV7343_HD_MODE_REG6, val);
+	if (err < 0)
+		goto setoutput_exit;
+
+	state->reg35 = val;
+
+setoutput_exit:
+	if (err != 0)
+		v4l2_err(sd, "Error setting output, write failed\n");
+
+	return err;
+}
+
+static int adv7343_log_status(struct v4l2_subdev *sd)
+{
+	struct adv7343_state *state = to_state(sd);
+
+	v4l2_info(sd, "Standard: %llx\n", (unsigned long long)state->std);
+	v4l2_info(sd, "Output: %s\n", (state->output == 0) ? "Composite" :
+			((state->output == 1) ? "Component" : "S-Video"));
+	return 0;
+}
+
+static int adv7343_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
+{
+	switch (qc->id) {
+	case V4L2_CID_BRIGHTNESS:
+		return v4l2_ctrl_query_fill(qc, ADV7343_BRIGHTNESS_MIN,
+						ADV7343_BRIGHTNESS_MAX, 1,
+						ADV7343_BRIGHTNESS_DEF);
+	case V4L2_CID_HUE:
+		return v4l2_ctrl_query_fill(qc, ADV7343_HUE_MIN,
+						ADV7343_HUE_MAX, 1 ,
+						ADV7343_HUE_DEF);
+	case V4L2_CID_GAIN:
+		return v4l2_ctrl_query_fill(qc, ADV7343_GAIN_MIN,
+						ADV7343_GAIN_MAX, 1,
+						ADV7343_GAIN_DEF);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int adv7343_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct adv7343_state *state = to_state(sd);
+	int err = 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_BRIGHTNESS:
+		if (ctrl->value < ADV7343_BRIGHTNESS_MIN ||
+					ctrl->value > ADV7343_BRIGHTNESS_MAX) {
+			v4l2_dbg(1, debug, sd,
+					"invalid brightness settings %d\n",
+								ctrl->value);
+			return -ERANGE;
+		}
+
+		state->bright = ctrl->value;
+		err = adv7343_write(sd, ADV7343_SD_BRIGHTNESS_WSS,
+					state->bright);
+		break;
+
+	case V4L2_CID_HUE:
+		if (ctrl->value < ADV7343_HUE_MIN ||
+					ctrl->value > ADV7343_HUE_MAX) {
+			v4l2_dbg(1, debug, sd, "invalid hue settings %d\n",
+								ctrl->value);
+			return -ERANGE;
+		}
+
+		state->hue = ctrl->value;
+		err = adv7343_write(sd, ADV7343_SD_HUE_REG, state->hue);
+		break;
+
+	case V4L2_CID_GAIN:
+		if (ctrl->value < ADV7343_GAIN_MIN ||
+					ctrl->value > ADV7343_GAIN_MAX) {
+			v4l2_dbg(1, debug, sd, "invalid gain settings %d\n",
+								ctrl->value);
+			return -ERANGE;
+		}
+
+		if ((ctrl->value > POSITIVE_GAIN_MAX) &&
+			(ctrl->value < NEGATIVE_GAIN_MIN)) {
+			v4l2_dbg(1, debug, sd,
+				"gain settings not within the specified range\n");
+			return -ERANGE;
+		}
+
+		state->gain = ctrl->value;
+		err = adv7343_write(sd, ADV7343_DAC2_OUTPUT_LEVEL, state->gain);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (err < 0)
+		v4l2_err(sd, "Failed to set the encoder controls\n");
+
+	return err;
+}
+
+static int adv7343_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct adv7343_state *state = to_state(sd);
+
+	switch (ctrl->id) {
+	case V4L2_CID_BRIGHTNESS:
+		ctrl->value = state->bright;
+		break;
+
+	case V4L2_CID_HUE:
+		ctrl->value = state->hue;
+		break;
+
+	case V4L2_CID_GAIN:
+		ctrl->value = state->gain;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int adv7343_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *chip)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_ADV7343, 0);
+}
+
+static const struct v4l2_subdev_core_ops adv7343_core_ops = {
+	.log_status	= adv7343_log_status,
+	.g_chip_ident	= adv7343_g_chip_ident,
+	.g_ctrl		= adv7343_g_ctrl,
+	.s_ctrl		= adv7343_s_ctrl,
+	.queryctrl	= adv7343_queryctrl,
+};
+
+static int adv7343_s_std_output(struct v4l2_subdev *sd, v4l2_std_id std)
+{
+	struct adv7343_state *state = to_state(sd);
+	int err = 0;
+
+	if (state->std == std)
+		return 0;
+
+	err = adv7343_setstd(sd, std);
+	if (!err)
+		state->std = std;
+
+	return err;
+}
+
+static int adv7343_s_routing(struct v4l2_subdev *sd,
+		u32 input, u32 output, u32 config)
+{
+	struct adv7343_state *state = to_state(sd);
+	int err = 0;
+
+	if (state->output == output)
+		return 0;
+
+	err = adv7343_setoutput(sd, output);
+	if (!err)
+		state->output = output;
+
+	return err;
+}
+
+static const struct v4l2_subdev_video_ops adv7343_video_ops = {
+	.s_std_output	= adv7343_s_std_output,
+	.s_routing	= adv7343_s_routing,
+};
+
+static const struct v4l2_subdev_ops adv7343_ops = {
+	.core	= &adv7343_core_ops,
+	.video	= &adv7343_video_ops,
+};
+
+static int adv7343_initialize(struct v4l2_subdev *sd)
+{
+	struct adv7343_state *state = to_state(sd);
+	int err = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adv7343_init_reg_val); i += 2) {
+
+		err = adv7343_write(sd, adv7343_init_reg_val[i],
+					adv7343_init_reg_val[i+1]);
+		if (err) {
+			v4l2_err(sd, "Error initializing\n");
+			return err;
+		}
+	}
+
+	/* Configure for default video standard */
+	err = adv7343_setoutput(sd, state->output);
+	if (err < 0) {
+		v4l2_err(sd, "Error setting output during init\n");
+		return -EINVAL;
+	}
+
+	err = adv7343_setstd(sd, state->std);
+	if (err < 0) {
+		v4l2_err(sd, "Error setting std during init\n");
+		return -EINVAL;
+	}
+
+	return err;
+}
+
+static int adv7343_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	struct adv7343_state *state;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	v4l_info(client, "chip found @ 0x%x (%s)\n",
+			client->addr << 1, client->adapter->name);
+
+	state = kzalloc(sizeof(struct adv7343_state), GFP_KERNEL);
+	if (state == NULL)
+		return -ENOMEM;
+
+	state->reg00	= 0x80;
+	state->reg01	= 0x00;
+	state->reg02	= 0x20;
+	state->reg35	= 0x00;
+	state->reg80	= ADV7343_SD_MODE_REG1_DEFAULT;
+	state->reg82	= ADV7343_SD_MODE_REG2_DEFAULT;
+
+	state->output = ADV7343_COMPOSITE_ID;
+	state->std = V4L2_STD_NTSC;
+
+	v4l2_i2c_subdev_init(&state->sd, client, &adv7343_ops);
+	return adv7343_initialize(&state->sd);
+}
+
+static int adv7343_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+	v4l2_device_unregister_subdev(sd);
+	kfree(to_state(sd));
+
+	return 0;
+}
+
+static const struct i2c_device_id adv7343_id[] = {
+	{"adv7343", 0},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, adv7343_id);
+
+static struct i2c_driver adv7343_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "adv7343",
+	},
+	.probe		= adv7343_probe,
+	.remove		= adv7343_remove,
+	.id_table	= adv7343_id,
+};
+
+static __init int init_adv7343(void)
+{
+	return i2c_add_driver(&adv7343_driver);
+}
+
+static __exit void exit_adv7343(void)
+{
+	i2c_del_driver(&adv7343_driver);
+}
+
+module_init(init_adv7343);
+module_exit(exit_adv7343);
diff --git a/drivers/media/video/adv7343_regs.h b/drivers/media/video/adv7343_regs.h
new file mode 100644
index 00000000000..3431045b33d
--- /dev/null
+++ b/drivers/media/video/adv7343_regs.h
@@ -0,0 +1,185 @@
+/*
+ * ADV7343 encoder related structure and register definitions
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed .as is. WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef ADV7343_REG_H
+#define ADV7343_REGS_H
+
+struct adv7343_std_info {
+	u32 standard_val3;
+	u32 fsc_val;
+	v4l2_std_id stdid;
+};
+
+/* Register offset macros */
+#define ADV7343_POWER_MODE_REG		(0x00)
+#define ADV7343_MODE_SELECT_REG		(0x01)
+#define ADV7343_MODE_REG0		(0x02)
+
+#define ADV7343_DAC2_OUTPUT_LEVEL	(0x0b)
+
+#define ADV7343_SOFT_RESET		(0x17)
+
+#define ADV7343_HD_MODE_REG1		(0x30)
+#define ADV7343_HD_MODE_REG2		(0x31)
+#define ADV7343_HD_MODE_REG3		(0x32)
+#define ADV7343_HD_MODE_REG4		(0x33)
+#define ADV7343_HD_MODE_REG5		(0x34)
+#define ADV7343_HD_MODE_REG6		(0x35)
+
+#define ADV7343_HD_MODE_REG7		(0x39)
+
+#define ADV7343_SD_MODE_REG1		(0x80)
+#define ADV7343_SD_MODE_REG2		(0x82)
+#define ADV7343_SD_MODE_REG3		(0x83)
+#define ADV7343_SD_MODE_REG4		(0x84)
+#define ADV7343_SD_MODE_REG5		(0x86)
+#define ADV7343_SD_MODE_REG6		(0x87)
+#define ADV7343_SD_MODE_REG7		(0x88)
+#define ADV7343_SD_MODE_REG8		(0x89)
+
+#define ADV7343_FSC_REG0		(0x8C)
+#define ADV7343_FSC_REG1		(0x8D)
+#define ADV7343_FSC_REG2		(0x8E)
+#define ADV7343_FSC_REG3		(0x8F)
+
+#define ADV7343_SD_CGMS_WSS0		(0x99)
+
+#define ADV7343_SD_HUE_REG		(0xA0)
+#define ADV7343_SD_BRIGHTNESS_WSS	(0xA1)
+
+/* Default values for the registers */
+#define ADV7343_POWER_MODE_REG_DEFAULT		(0x10)
+#define ADV7343_HD_MODE_REG1_DEFAULT		(0x3C)	/* Changed Default
+							   720p EAVSAV code*/
+#define ADV7343_HD_MODE_REG2_DEFAULT		(0x01)	/* Changed Pixel data
+							   valid */
+#define ADV7343_HD_MODE_REG3_DEFAULT		(0x00)	/* Color delay 0 clks */
+#define ADV7343_HD_MODE_REG4_DEFAULT		(0xE8)	/* Changed */
+#define ADV7343_HD_MODE_REG5_DEFAULT		(0x08)
+#define ADV7343_HD_MODE_REG6_DEFAULT		(0x00)
+#define ADV7343_HD_MODE_REG7_DEFAULT		(0x00)
+#define ADV7343_SD_MODE_REG8_DEFAULT		(0x00)
+#define ADV7343_SOFT_RESET_DEFAULT		(0x02)
+#define ADV7343_COMPOSITE_POWER_VALUE		(0x80)
+#define ADV7343_COMPONENT_POWER_VALUE		(0x1C)
+#define ADV7343_SVIDEO_POWER_VALUE		(0x60)
+#define ADV7343_SD_HUE_REG_DEFAULT		(127)
+#define ADV7343_SD_BRIGHTNESS_WSS_DEFAULT	(0x03)
+
+#define ADV7343_SD_CGMS_WSS0_DEFAULT		(0x10)
+
+#define ADV7343_SD_MODE_REG1_DEFAULT		(0x00)
+#define ADV7343_SD_MODE_REG2_DEFAULT		(0xC9)
+#define ADV7343_SD_MODE_REG3_DEFAULT		(0x10)
+#define ADV7343_SD_MODE_REG4_DEFAULT		(0x01)
+#define ADV7343_SD_MODE_REG5_DEFAULT		(0x02)
+#define ADV7343_SD_MODE_REG6_DEFAULT		(0x0C)
+#define ADV7343_SD_MODE_REG7_DEFAULT		(0x04)
+#define ADV7343_SD_MODE_REG8_DEFAULT		(0x00)
+
+/* Bit masks for Mode Select Register */
+#define INPUT_MODE_MASK			(0x70)
+#define SD_INPUT_MODE			(0x00)
+#define HD_720P_INPUT_MODE		(0x10)
+#define HD_1080I_INPUT_MODE		(0x10)
+
+/* Bit masks for Mode Register 0 */
+#define TEST_PATTERN_BLACK_BAR_EN	(0x04)
+#define YUV_OUTPUT_SELECT		(0x20)
+#define RGB_OUTPUT_SELECT		(0xDF)
+
+/* Bit masks for DAC output levels */
+#define DAC_OUTPUT_LEVEL_MASK		(0xFF)
+#define POSITIVE_GAIN_MAX		(0x40)
+#define POSITIVE_GAIN_MIN		(0x00)
+#define NEGATIVE_GAIN_MAX		(0xFF)
+#define NEGATIVE_GAIN_MIN		(0xC0)
+
+/* Bit masks for soft reset register */
+#define SOFT_RESET			(0x02)
+
+/* Bit masks for HD Mode Register 1 */
+#define OUTPUT_STD_MASK		(0x03)
+#define OUTPUT_STD_SHIFT	(0)
+#define OUTPUT_STD_EIA0_2	(0x00)
+#define OUTPUT_STD_EIA0_1	(0x01)
+#define OUTPUT_STD_FULL		(0x02)
+#define EMBEDDED_SYNC		(0x04)
+#define EXTERNAL_SYNC		(0xFB)
+#define STD_MODE_SHIFT		(3)
+#define STD_MODE_MASK		(0x1F)
+#define STD_MODE_720P		(0x05)
+#define STD_MODE_720P_25	(0x08)
+#define STD_MODE_720P_30	(0x07)
+#define STD_MODE_720P_50	(0x06)
+#define STD_MODE_1080I		(0x0D)
+#define STD_MODE_1080I_25fps	(0x0E)
+#define STD_MODE_1080P_24	(0x12)
+#define STD_MODE_1080P_25	(0x10)
+#define STD_MODE_1080P_30	(0x0F)
+#define STD_MODE_525P		(0x00)
+#define STD_MODE_625P		(0x03)
+
+/* Bit masks for SD Mode Register 1 */
+#define SD_STD_MASK		(0x03)
+#define SD_STD_NTSC		(0x00)
+#define SD_STD_PAL_BDGHI	(0x01)
+#define SD_STD_PAL_M		(0x02)
+#define SD_STD_PAL_N		(0x03)
+#define SD_LUMA_FLTR_MASK	(0x7)
+#define SD_LUMA_FLTR_SHIFT	(0x2)
+#define SD_CHROMA_FLTR_MASK	(0x7)
+#define SD_CHROMA_FLTR_SHIFT	(0x5)
+
+/* Bit masks for SD Mode Register 2 */
+#define SD_PBPR_SSAF_EN		(0x01)
+#define SD_PBPR_SSAF_DI		(0xFE)
+#define SD_DAC_1_DI		(0xFD)
+#define SD_DAC_2_DI		(0xFB)
+#define SD_PEDESTAL_EN		(0x08)
+#define SD_PEDESTAL_DI		(0xF7)
+#define SD_SQUARE_PIXEL_EN	(0x10)
+#define SD_SQUARE_PIXEL_DI	(0xEF)
+#define SD_PIXEL_DATA_VALID	(0x40)
+#define SD_ACTIVE_EDGE_EN	(0x80)
+#define SD_ACTIVE_EDGE_DI	(0x7F)
+
+/* Bit masks for HD Mode Register 6 */
+#define HD_RGB_INPUT_EN		(0x02)
+#define HD_RGB_INPUT_DI		(0xFD)
+#define HD_PBPR_SYNC_EN		(0x04)
+#define HD_PBPR_SYNC_DI		(0xFB)
+#define HD_DAC_SWAP_EN		(0x08)
+#define HD_DAC_SWAP_DI		(0xF7)
+#define HD_GAMMA_CURVE_A	(0xEF)
+#define HD_GAMMA_CURVE_B	(0x10)
+#define HD_GAMMA_EN		(0x20)
+#define HD_GAMMA_DI		(0xDF)
+#define HD_ADPT_FLTR_MODEB	(0x40)
+#define HD_ADPT_FLTR_MODEA	(0xBF)
+#define HD_ADPT_FLTR_EN		(0x80)
+#define HD_ADPT_FLTR_DI		(0x7F)
+
+#define ADV7343_BRIGHTNESS_MAX	(127)
+#define ADV7343_BRIGHTNESS_MIN	(0)
+#define ADV7343_BRIGHTNESS_DEF	(3)
+#define ADV7343_HUE_MAX		(255)
+#define ADV7343_HUE_MIN		(0)
+#define ADV7343_HUE_DEF		(127)
+#define ADV7343_GAIN_MAX	(255)
+#define ADV7343_GAIN_MIN	(0)
+#define ADV7343_GAIN_DEF	(0)
+
+#endif
diff --git a/include/media/adv7343.h b/include/media/adv7343.h
new file mode 100644
index 00000000000..d6f8a4e1a1f
--- /dev/null
+++ b/include/media/adv7343.h
@@ -0,0 +1,23 @@
+/*
+ * ADV7343 header file
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed .as is. WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef ADV7343_H
+#define ADV7343_H
+
+#define ADV7343_COMPOSITE_ID	(0)
+#define ADV7343_COMPONENT_ID	(1)
+#define ADV7343_SVIDEO_ID	(2)
+
+#endif				/* End of #ifndef ADV7343_H */
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index fbeb98f8ee2..4d7e2272c42 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -140,6 +140,9 @@ enum {
 	/* module ths7303: just ident 7303 */
 	V4L2_IDENT_THS7303 = 7303,
 
+	/* module adv7343: just ident 7343 */
+	V4L2_IDENT_ADV7343 = 7343,
+
 	/* module wm8739: just ident 8739 */
 	V4L2_IDENT_WM8739 = 8739,
 
-- 
cgit v1.2.3-70-g09d2


From 8475cbcb0f885189969915eb3680d10fc525d722 Mon Sep 17 00:00:00 2001
From: Dmitri Belimov <d.belimov@gmail.com>
Date: Mon, 11 May 2009 08:16:06 -0300
Subject: V4L/DVB (11775): tuner: add support Philips MK5 tuner

Signed-off-by: Beholder Intl. Ltd. Dmitry Belimov <d.belimov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c | 30 ++++++++++++++++++++++++++++++
 include/media/tuner.h                     |  1 +
 3 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index 691d2f37dc5..22c1c55979c 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -76,3 +76,4 @@ tuner=75 - Philips TEA5761 FM Radio
 tuner=76 - Xceive 5000 tuner
 tuner=77 - TCL tuner MF02GIP-5N-E
 tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
+tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index 7c0bc064c00..a9ce5b2ce08 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -578,6 +578,31 @@ static struct tuner_params tuner_fm1216me_mk3_params[] = {
 	},
 };
 
+/* ------------ TUNER_PHILIPS_FM1216MK5 - Philips PAL ------------ */
+
+static struct tuner_range tuner_fm1216mk5_pal_ranges[] = {
+	{ 16 * 158.00 /*MHz*/, 0xce, 0x01, },
+	{ 16 * 441.00 /*MHz*/, 0xce, 0x02, },
+	{ 16 * 864.00        , 0xce, 0x04, },
+};
+
+static struct tuner_params tuner_fm1216mk5_params[] = {
+	{
+		.type   = TUNER_PARAM_TYPE_PAL,
+		.ranges = tuner_fm1216mk5_pal_ranges,
+		.count  = ARRAY_SIZE(tuner_fm1216mk5_pal_ranges),
+		.cb_first_if_lower_freq = 1,
+		.has_tda9887 = 1,
+		.port1_active = 1,
+		.port2_active = 1,
+		.port2_invert_for_secam_lc = 1,
+		.port1_fm_high_sensitivity = 1,
+		.default_top_mid = -2,
+		.default_top_secam_mid = -2,
+		.default_top_secam_high = -2,
+	},
+};
+
 /* ------------ TUNER_LG_NTSC_NEW_TAPC - LGINNOTEK NTSC ------------ */
 
 static struct tuner_params tuner_lg_ntsc_new_tapc_params[] = {
@@ -1694,6 +1719,11 @@ struct tunertype tuners[] = {
 		.initdata = tua603x_agc112,
 		.sleepdata = (u8[]){ 4, 0x9c, 0x60, 0x85, 0x54 },
 	},
+		[TUNER_PHILIPS_FM1216MK5] = { /* Philips PAL */
+		.name   = "Philips PAL/SECAM multi (FM1216 MK5)",
+		.params = tuner_fm1216mk5_params,
+		.count  = ARRAY_SIZE(tuner_fm1216mk5_params),
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/include/media/tuner.h b/include/media/tuner.h
index 7d4e2db7807..735a2a94116 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -124,6 +124,7 @@
 #define TUNER_XC5000			76	/* Xceive Silicon Tuner */
 #define TUNER_TCL_MF02GIP_5N		77	/* TCL MF02GIP_5N */
 #define TUNER_PHILIPS_FMD1216MEX_MK3	78
+#define TUNER_PHILIPS_FM1216MK5		79
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3-70-g09d2


From 1df8e9861cf9fac5737ccb61c7f7fefa77711d40 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 13 May 2009 16:48:07 -0300
Subject: V4L/DVB (11843): ir-kbd-i2c: Don't use i2c_client.name for our own
 needs

In the standard device driver binding model, the name field of
struct i2c_client is used to match devices to their drivers, so we
must stop using it for internal purposes. Define a separate field
in struct IR_i2c as a replacement, and use it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/cx231xx/cx231xx-input.c |  2 +-
 drivers/media/video/em28xx/em28xx-cards.c   | 10 +++++-----
 drivers/media/video/em28xx/em28xx-input.c   |  2 +-
 drivers/media/video/ir-kbd-i2c.c            |  5 +++--
 drivers/media/video/saa7134/saa7134-input.c | 12 ++++++------
 include/media/ir-kbd-i2c.h                  |  1 +
 6 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/cx231xx/cx231xx-input.c b/drivers/media/video/cx231xx/cx231xx-input.c
index 97e304c3c79..48f22fa38e6 100644
--- a/drivers/media/video/cx231xx/cx231xx-input.c
+++ b/drivers/media/video/cx231xx/cx231xx-input.c
@@ -36,7 +36,7 @@ MODULE_PARM_DESC(ir_debug, "enable debug messages [IR]");
 
 #define i2cdprintk(fmt, arg...) \
 	if (ir_debug) { \
-		printk(KERN_DEBUG "%s/ir: " fmt, ir->c.name , ## arg); \
+		printk(KERN_DEBUG "%s/ir: " fmt, ir->name , ## arg); \
 	}
 
 #define dprintk(fmt, arg...) \
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 3958f168fc5..b2aed29e2d7 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -1384,13 +1384,13 @@ struct em28xx_board em28xx_boards[] = {
 			.type     = EM28XX_VMUX_COMPOSITE1,
 			.vmux     = TVP5150_COMPOSITE1,
 			.amux     = EM28XX_AUDIO_SRC_LINE,
-                        .gpio     = terratec_av350_unmute_gpio,
+			.gpio     = terratec_av350_unmute_gpio,
 
 		}, {
 			.type     = EM28XX_VMUX_SVIDEO,
 			.vmux     = TVP5150_SVIDEO,
 			.amux     = EM28XX_AUDIO_SRC_LINE,
-                        .gpio     = terratec_av350_unmute_gpio,
+			.gpio     = terratec_av350_unmute_gpio,
 		} },
 	},
 };
@@ -1929,19 +1929,19 @@ void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir)
 	case (EM2820_BOARD_TERRATEC_CINERGY_250):
 		ir->ir_codes = ir_codes_em_terratec;
 		ir->get_key = em28xx_get_key_terratec;
-		snprintf(ir->c.name, sizeof(ir->c.name),
+		snprintf(ir->name, sizeof(ir->name),
 			 "i2c IR (EM28XX Terratec)");
 		break;
 	case (EM2820_BOARD_PINNACLE_USB_2):
 		ir->ir_codes = ir_codes_pinnacle_grey;
 		ir->get_key = em28xx_get_key_pinnacle_usb_grey;
-		snprintf(ir->c.name, sizeof(ir->c.name),
+		snprintf(ir->name, sizeof(ir->name),
 			 "i2c IR (EM28XX Pinnacle PCTV)");
 		break;
 	case (EM2820_BOARD_HAUPPAUGE_WINTV_USB_2):
 		ir->ir_codes = ir_codes_hauppauge_new;
 		ir->get_key = em28xx_get_key_em_haup;
-		snprintf(ir->c.name, sizeof(ir->c.name),
+		snprintf(ir->name, sizeof(ir->name),
 			 "i2c IR (EM2840 Hauppauge)");
 		break;
 	case (EM2820_BOARD_MSI_VOX_USB_2):
diff --git a/drivers/media/video/em28xx/em28xx-input.c b/drivers/media/video/em28xx/em28xx-input.c
index a5abfd7a19f..7450ba7dee8 100644
--- a/drivers/media/video/em28xx/em28xx-input.c
+++ b/drivers/media/video/em28xx/em28xx-input.c
@@ -40,7 +40,7 @@ MODULE_PARM_DESC(ir_debug, "enable debug messages [IR]");
 
 #define i2cdprintk(fmt, arg...) \
 	if (ir_debug) { \
-		printk(KERN_DEBUG "%s/ir: " fmt, ir->c.name , ## arg); \
+		printk(KERN_DEBUG "%s/ir: " fmt, ir->name , ## arg); \
 	}
 
 #define dprintk(fmt, arg...) \
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 092c7da0f37..ba341e6fb2d 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -337,6 +337,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 
 	ir->c.adapter = adap;
 	ir->c.addr    = addr;
+	snprintf(ir->c.name, sizeof(ir->c.name), "ir-kbd");
 
 	i2c_set_clientdata(&ir->c, ir);
 
@@ -410,7 +411,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	}
 
 	/* Sets name */
-	snprintf(ir->c.name, sizeof(ir->c.name), "i2c IR (%s)", name);
+	snprintf(ir->name, sizeof(ir->name), "i2c IR (%s)", name);
 	ir->ir_codes = ir_codes;
 
 	/* register i2c device
@@ -435,7 +436,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	/* init + register input device */
 	ir_input_init(input_dev, &ir->ir, ir_type, ir->ir_codes);
 	input_dev->id.bustype = BUS_I2C;
-	input_dev->name       = ir->c.name;
+	input_dev->name       = ir->name;
 	input_dev->phys       = ir->phys;
 
 	err = input_register_device(ir->input);
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 45063751726..6cd693d3b8e 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -60,7 +60,7 @@ MODULE_PARM_DESC(disable_other_ir, "disable full codes of "
 #define dprintk(fmt, arg...)	if (ir_debug) \
 	printk(KERN_DEBUG "%s/ir: " fmt, dev->name , ## arg)
 #define i2cdprintk(fmt, arg...)    if (ir_debug) \
-	printk(KERN_DEBUG "%s/ir: " fmt, ir->c.name , ## arg)
+	printk(KERN_DEBUG "%s/ir: " fmt, ir->name , ## arg)
 
 /* Helper functions for RC5 and NEC decoding at GPIO16 or GPIO18 */
 static int saa7134_rc5_irq(struct saa7134_dev *dev);
@@ -693,7 +693,7 @@ void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
 	switch (dev->board) {
 	case SAA7134_BOARD_PINNACLE_PCTV_110i:
 	case SAA7134_BOARD_PINNACLE_PCTV_310i:
-		snprintf(ir->c.name, sizeof(ir->c.name), "Pinnacle PCTV");
+		snprintf(ir->name, sizeof(ir->name), "Pinnacle PCTV");
 		if (pinnacle_remote == 0) {
 			ir->get_key   = get_key_pinnacle_color;
 			ir->ir_codes = ir_codes_pinnacle_color;
@@ -703,17 +703,17 @@ void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
 		}
 		break;
 	case SAA7134_BOARD_UPMOST_PURPLE_TV:
-		snprintf(ir->c.name, sizeof(ir->c.name), "Purple TV");
+		snprintf(ir->name, sizeof(ir->name), "Purple TV");
 		ir->get_key   = get_key_purpletv;
 		ir->ir_codes  = ir_codes_purpletv;
 		break;
 	case SAA7134_BOARD_MSI_TVATANYWHERE_PLUS:
-		snprintf(ir->c.name, sizeof(ir->c.name), "MSI TV@nywhere Plus");
+		snprintf(ir->name, sizeof(ir->name), "MSI TV@nywhere Plus");
 		ir->get_key  = get_key_msi_tvanywhere_plus;
 		ir->ir_codes = ir_codes_msi_tvanywhere_plus;
 		break;
 	case SAA7134_BOARD_HAUPPAUGE_HVR1110:
-		snprintf(ir->c.name, sizeof(ir->c.name), "HVR 1110");
+		snprintf(ir->name, sizeof(ir->name), "HVR 1110");
 		ir->get_key   = get_key_hvr1110;
 		ir->ir_codes  = ir_codes_hauppauge_new;
 		break;
@@ -729,7 +729,7 @@ void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
 	case SAA7134_BOARD_BEHOLD_M63:
 	case SAA7134_BOARD_BEHOLD_M6_EXTRA:
 	case SAA7134_BOARD_BEHOLD_H6:
-		snprintf(ir->c.name, sizeof(ir->c.name), "BeholdTV");
+		snprintf(ir->name, sizeof(ir->name), "BeholdTV");
 		ir->get_key   = get_key_beholdm6xx;
 		ir->ir_codes  = ir_codes_behold;
 		break;
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 07963d70540..6a9719c8e90 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -15,6 +15,7 @@ struct IR_i2c {
 	unsigned char          old;
 
 	struct delayed_work    work;
+	char                   name[32];
 	char                   phys[32];
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
 };
-- 
cgit v1.2.3-70-g09d2


From c668f32dca105d876e51862a003a302fa61e4ae4 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 13 May 2009 16:48:50 -0300
Subject: V4L/DVB (11844): ir-kbd-i2c: Switch to the new-style device binding
 model

Let card drivers probe for IR receiver devices and instantiate them if
found. Ultimately it would be better if we could stop probing
completely, but I suspect this won't be possible for all card types.

There's certainly room for cleanups. For example, some drivers are
sharing I2C adapter IDs, so they also had to share the list of I2C
addresses being probed for an IR receiver. Now that each driver
explicitly says which addresses should be probed, maybe some addresses
can be dropped from some drivers.

Also, the special cases in saa7134-i2c should probably be handled on a
per-board basis. This would be more efficient and less risky than always
probing extra addresses on all boards. I'll give it a try later.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/bt8xx/bttv-i2c.c        |  21 +++
 drivers/media/video/cx231xx/cx231xx-cards.c |  11 +-
 drivers/media/video/cx231xx/cx231xx-i2c.c   |   3 +
 drivers/media/video/cx231xx/cx231xx.h       |   1 +
 drivers/media/video/cx23885/cx23885-i2c.c   |  12 ++
 drivers/media/video/cx88/cx88-i2c.c         |  13 ++
 drivers/media/video/em28xx/em28xx-cards.c   |  20 ++-
 drivers/media/video/em28xx/em28xx-i2c.c     |   3 +
 drivers/media/video/em28xx/em28xx-input.c   |   6 +-
 drivers/media/video/em28xx/em28xx.h         |   1 +
 drivers/media/video/ir-kbd-i2c.c            | 200 +++++-----------------------
 drivers/media/video/ivtv/ivtv-i2c.c         |  31 ++++-
 drivers/media/video/saa7134/saa7134-i2c.c   |   3 +
 drivers/media/video/saa7134/saa7134-input.c |  86 ++++++++++--
 drivers/media/video/saa7134/saa7134.h       |   1 +
 include/media/ir-kbd-i2c.h                  |   2 +-
 16 files changed, 220 insertions(+), 194 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/bt8xx/bttv-i2c.c b/drivers/media/video/bt8xx/bttv-i2c.c
index a99d92fac3d..ebd1ee9dc87 100644
--- a/drivers/media/video/bt8xx/bttv-i2c.c
+++ b/drivers/media/video/bt8xx/bttv-i2c.c
@@ -389,6 +389,27 @@ int __devinit init_bttv_i2c(struct bttv *btv)
 	}
 	if (0 == btv->i2c_rc && i2c_scan)
 		do_i2c_scan(btv->c.v4l2_dev.name, &btv->i2c_client);
+
+	/* Instantiate the IR receiver device, if present */
+	if (0 == btv->i2c_rc) {
+		struct i2c_board_info info;
+		/* The external IR receiver is at i2c address 0x34 (0x35 for
+		   reads).  Future Hauppauge cards will have an internal
+		   receiver at 0x30 (0x31 for reads).  In theory, both can be
+		   fitted, and Hauppauge suggest an external overrides an
+		   internal.
+
+		   That's why we probe 0x1a (~0x34) first. CB
+		*/
+		const unsigned short addr_list[] = {
+			0x1a, 0x18, 0x4b, 0x64, 0x30,
+			I2C_CLIENT_END
+		};
+
+		memset(&info, 0, sizeof(struct i2c_board_info));
+		strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+		i2c_new_probed_device(&btv->c.i2c_adap, &info, addr_list);
+	}
 	return btv->i2c_rc;
 }
 
diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c
index c8a32b1b538..734f6eaefd9 100644
--- a/drivers/media/video/cx231xx/cx231xx-cards.c
+++ b/drivers/media/video/cx231xx/cx231xx-cards.c
@@ -281,13 +281,16 @@ static void cx231xx_config_tuner(struct cx231xx *dev)
 }
 
 /* ----------------------------------------------------------------------- */
-void cx231xx_set_ir(struct cx231xx *dev, struct IR_i2c *ir)
+void cx231xx_register_i2c_ir(struct cx231xx *dev)
 {
-	if (disable_ir) {
-		ir->get_key = NULL;
+	if (disable_ir)
 		return;
-	}
 
+	/* REVISIT: instantiate IR device */
+}
+
+void cx231xx_set_ir(struct cx231xx *dev, struct IR_i2c *ir)
+{
 	/* detect & configure */
 	switch (dev->model) {
 
diff --git a/drivers/media/video/cx231xx/cx231xx-i2c.c b/drivers/media/video/cx231xx/cx231xx-i2c.c
index b4a03d813e0..ac4099a49da 100644
--- a/drivers/media/video/cx231xx/cx231xx-i2c.c
+++ b/drivers/media/video/cx231xx/cx231xx-i2c.c
@@ -537,6 +537,9 @@ int cx231xx_i2c_register(struct cx231xx_i2c *bus)
 	if (0 == bus->i2c_rc) {
 		if (i2c_scan)
 			cx231xx_do_i2c_scan(dev, &bus->i2c_client);
+
+		/* Instantiate the IR receiver device, if present */
+		cx231xx_register_i2c_ir(dev);
 	} else
 		cx231xx_warn("%s: i2c bus %d register FAILED\n",
 			     dev->name, bus->nr);
diff --git a/drivers/media/video/cx231xx/cx231xx.h b/drivers/media/video/cx231xx/cx231xx.h
index aa4a23ef491..8c300f60fdf 100644
--- a/drivers/media/video/cx231xx/cx231xx.h
+++ b/drivers/media/video/cx231xx/cx231xx.h
@@ -738,6 +738,7 @@ extern void cx231xx_card_setup(struct cx231xx *dev);
 extern struct cx231xx_board cx231xx_boards[];
 extern struct usb_device_id cx231xx_id_table[];
 extern const unsigned int cx231xx_bcount;
+void cx231xx_register_i2c_ir(struct cx231xx *dev);
 void cx231xx_set_ir(struct cx231xx *dev, struct IR_i2c *ir);
 int cx231xx_tuner_callback(void *ptr, int component, int command, int arg);
 
diff --git a/drivers/media/video/cx23885/cx23885-i2c.c b/drivers/media/video/cx23885/cx23885-i2c.c
index 3421bd12056..384dec34134 100644
--- a/drivers/media/video/cx23885/cx23885-i2c.c
+++ b/drivers/media/video/cx23885/cx23885-i2c.c
@@ -357,6 +357,18 @@ int cx23885_i2c_register(struct cx23885_i2c *bus)
 		printk(KERN_WARNING "%s: i2c bus %d register FAILED\n",
 			dev->name, bus->nr);
 
+	/* Instantiate the IR receiver device, if present */
+	if (0 == bus->i2c_rc) {
+		struct i2c_board_info info;
+		const unsigned short addr_list[] = {
+			0x6b, I2C_CLIENT_END
+		};
+
+		memset(&info, 0, sizeof(struct i2c_board_info));
+		strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+		i2c_new_probed_device(&bus->i2c_adap, &info, addr_list);
+	}
+
 	return bus->i2c_rc;
 }
 
diff --git a/drivers/media/video/cx88/cx88-i2c.c b/drivers/media/video/cx88/cx88-i2c.c
index 996b4ed5a4f..ee1ca39db06 100644
--- a/drivers/media/video/cx88/cx88-i2c.c
+++ b/drivers/media/video/cx88/cx88-i2c.c
@@ -180,6 +180,19 @@ int cx88_i2c_init(struct cx88_core *core, struct pci_dev *pci)
 			do_i2c_scan(core->name,&core->i2c_client);
 	} else
 		printk("%s: i2c register FAILED\n", core->name);
+
+	/* Instantiate the IR receiver device, if present */
+	if (0 == core->i2c_rc) {
+		struct i2c_board_info info;
+		const unsigned short addr_list[] = {
+			0x18, 0x6b, 0x71,
+			I2C_CLIENT_END
+		};
+
+		memset(&info, 0, sizeof(struct i2c_board_info));
+		strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+		i2c_new_probed_device(&core->i2c_adap, &info, addr_list);
+	}
 	return core->i2c_rc;
 }
 
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index b2aed29e2d7..edba71115a9 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -1912,13 +1912,23 @@ static int em28xx_hint_board(struct em28xx *dev)
 }
 
 /* ----------------------------------------------------------------------- */
-void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir)
+void em28xx_register_i2c_ir(struct em28xx *dev)
 {
-	if (disable_ir) {
-		ir->get_key = NULL;
-		return ;
-	}
+	struct i2c_board_info info;
+	const unsigned short addr_list[] = {
+		 0x30, 0x47, I2C_CLIENT_END
+	};
+
+	if (disable_ir)
+		return;
 
+	memset(&info, 0, sizeof(struct i2c_board_info));
+	strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+	i2c_new_probed_device(&dev->i2c_adap, &info, addr_list);
+}
+
+void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir)
+{
 	/* detect & configure */
 	switch (dev->model) {
 	case (EM2800_BOARD_UNKNOWN):
diff --git a/drivers/media/video/em28xx/em28xx-i2c.c b/drivers/media/video/em28xx/em28xx-i2c.c
index f0bf1d960c7..d90294cbb70 100644
--- a/drivers/media/video/em28xx/em28xx-i2c.c
+++ b/drivers/media/video/em28xx/em28xx-i2c.c
@@ -575,6 +575,9 @@ int em28xx_i2c_register(struct em28xx *dev)
 	if (i2c_scan)
 		em28xx_do_i2c_scan(dev);
 
+	/* Instantiate the IR receiver device, if present */
+	em28xx_register_i2c_ir(dev);
+
 	return 0;
 }
 
diff --git a/drivers/media/video/em28xx/em28xx-input.c b/drivers/media/video/em28xx/em28xx-input.c
index 7450ba7dee8..7a0fe3816e3 100644
--- a/drivers/media/video/em28xx/em28xx-input.c
+++ b/drivers/media/video/em28xx/em28xx-input.c
@@ -85,7 +85,7 @@ int em28xx_get_key_terratec(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char b;
 
 	/* poll IR chip */
-	if (1 != i2c_master_recv(&ir->c, &b, 1)) {
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
 		i2cdprintk("read error\n");
 		return -EIO;
 	}
@@ -114,7 +114,7 @@ int em28xx_get_key_em_haup(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char code;
 
 	/* poll IR chip */
-	if (2 != i2c_master_recv(&ir->c, buf, 2))
+	if (2 != i2c_master_recv(ir->c, buf, 2))
 		return -EIO;
 
 	/* Does eliminate repeated parity code */
@@ -147,7 +147,7 @@ int em28xx_get_key_pinnacle_usb_grey(struct IR_i2c *ir, u32 *ir_key,
 
 	/* poll IR chip */
 
-	if (3 != i2c_master_recv(&ir->c, buf, 3)) {
+	if (3 != i2c_master_recv(ir->c, buf, 3)) {
 		i2cdprintk("read error\n");
 		return -EIO;
 	}
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 58c0ef4a2dc..9c632541df1 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -643,6 +643,7 @@ extern void em28xx_card_setup(struct em28xx *dev);
 extern struct em28xx_board em28xx_boards[];
 extern struct usb_device_id em28xx_id_table[];
 extern const unsigned int em28xx_bcount;
+void em28xx_register_i2c_ir(struct em28xx *dev);
 void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir);
 int em28xx_tuner_callback(void *ptr, int component, int command, int arg);
 void em28xx_release_resources(struct em28xx *dev);
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index ba341e6fb2d..cb833a63041 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -74,7 +74,7 @@ static int get_key_haup_common(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw,
 	int start, range, toggle, dev, code, ircode;
 
 	/* poll IR chip */
-	if (size != i2c_master_recv(&ir->c,buf,size))
+	if (size != i2c_master_recv(ir->c, buf, size))
 		return -EIO;
 
 	/* split rc5 data block ... */
@@ -137,7 +137,7 @@ static int get_key_pixelview(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char b;
 
 	/* poll IR chip */
-	if (1 != i2c_master_recv(&ir->c,&b,1)) {
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
 		dprintk(1,"read error\n");
 		return -EIO;
 	}
@@ -151,7 +151,7 @@ static int get_key_pv951(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char b;
 
 	/* poll IR chip */
-	if (1 != i2c_master_recv(&ir->c,&b,1)) {
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
 		dprintk(1,"read error\n");
 		return -EIO;
 	}
@@ -171,7 +171,7 @@ static int get_key_fusionhdtv(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char buf[4];
 
 	/* poll IR chip */
-	if (4 != i2c_master_recv(&ir->c,buf,4)) {
+	if (4 != i2c_master_recv(ir->c, buf, 4)) {
 		dprintk(1,"read error\n");
 		return -EIO;
 	}
@@ -195,7 +195,7 @@ static int get_key_knc1(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char b;
 
 	/* poll IR chip */
-	if (1 != i2c_master_recv(&ir->c,&b,1)) {
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
 		dprintk(1,"read error\n");
 		return -EIO;
 	}
@@ -222,12 +222,12 @@ static int get_key_avermedia_cardbus(struct IR_i2c *ir,
 				     u32 *ir_key, u32 *ir_raw)
 {
 	unsigned char subaddr, key, keygroup;
-	struct i2c_msg msg[] = { { .addr = ir->c.addr, .flags = 0,
+	struct i2c_msg msg[] = { { .addr = ir->c->addr, .flags = 0,
 				   .buf = &subaddr, .len = 1},
-				 { .addr = ir->c.addr, .flags = I2C_M_RD,
+				 { .addr = ir->c->addr, .flags = I2C_M_RD,
 				  .buf = &key, .len = 1} };
 	subaddr = 0x0d;
-	if (2 != i2c_transfer(ir->c.adapter, msg, 2)) {
+	if (2 != i2c_transfer(ir->c->adapter, msg, 2)) {
 		dprintk(1, "read error\n");
 		return -EIO;
 	}
@@ -237,7 +237,7 @@ static int get_key_avermedia_cardbus(struct IR_i2c *ir,
 
 	subaddr = 0x0b;
 	msg[1].buf = &keygroup;
-	if (2 != i2c_transfer(ir->c.adapter, msg, 2)) {
+	if (2 != i2c_transfer(ir->c->adapter, msg, 2)) {
 		dprintk(1, "read error\n");
 		return -EIO;
 	}
@@ -286,7 +286,7 @@ static void ir_work(struct work_struct *work)
 
 	/* MSI TV@nywhere Plus requires more frequent polling
 	   otherwise it will miss some keypresses */
-	if (ir->c.adapter->id == I2C_HW_SAA7134 && ir->c.addr == 0x30)
+	if (ir->c->adapter->id == I2C_HW_SAA7134 && ir->c->addr == 0x30)
 		polling_interval = 50;
 
 	ir_key_poll(ir);
@@ -295,34 +295,15 @@ static void ir_work(struct work_struct *work)
 
 /* ----------------------------------------------------------------------- */
 
-static int ir_attach(struct i2c_adapter *adap, int addr,
-		      unsigned short flags, int kind);
-static int ir_detach(struct i2c_client *client);
-static int ir_probe(struct i2c_adapter *adap);
-
-static struct i2c_driver driver = {
-	.driver = {
-		.name   = "ir-kbd-i2c",
-	},
-	.id             = I2C_DRIVERID_INFRARED,
-	.attach_adapter = ir_probe,
-	.detach_client  = ir_detach,
-};
-
-static struct i2c_client client_template =
-{
-	.name = "unset",
-	.driver = &driver
-};
-
-static int ir_attach(struct i2c_adapter *adap, int addr,
-		     unsigned short flags, int kind)
+static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	IR_KEYTAB_TYPE *ir_codes = NULL;
 	char *name;
 	int ir_type;
 	struct IR_i2c *ir;
 	struct input_dev *input_dev;
+	struct i2c_adapter *adap = client->adapter;
+	unsigned short addr = client->addr;
 	int err;
 
 	ir = kzalloc(sizeof(struct IR_i2c),GFP_KERNEL);
@@ -332,14 +313,9 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 		goto err_out_free;
 	}
 
-	ir->c = client_template;
+	ir->c = client;
 	ir->input = input_dev;
-
-	ir->c.adapter = adap;
-	ir->c.addr    = addr;
-	snprintf(ir->c.name, sizeof(ir->c.name), "ir-kbd");
-
-	i2c_set_clientdata(&ir->c, ir);
+	i2c_set_clientdata(client, ir);
 
 	switch(addr) {
 	case 0x64:
@@ -414,24 +390,9 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	snprintf(ir->name, sizeof(ir->name), "i2c IR (%s)", name);
 	ir->ir_codes = ir_codes;
 
-	/* register i2c device
-	 * At device register, IR codes may be changed to be
-	 * board dependent.
-	 */
-	err = i2c_attach_client(&ir->c);
-	if (err)
-		goto err_out_free;
-
-	/* If IR not supported or disabled, unregisters driver */
-	if (ir->get_key == NULL) {
-		err = -ENODEV;
-		goto err_out_detach;
-	}
-
-	/* Phys addr can only be set after attaching (for ir->c.dev) */
 	snprintf(ir->phys, sizeof(ir->phys), "%s/%s/ir0",
-		 dev_name(&ir->c.adapter->dev),
-		 dev_name(&ir->c.dev));
+		 dev_name(&adap->dev),
+		 dev_name(&client->dev));
 
 	/* init + register input device */
 	ir_input_init(input_dev, &ir->ir, ir_type, ir->ir_codes);
@@ -441,7 +402,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 
 	err = input_register_device(ir->input);
 	if (err)
-		goto err_out_detach;
+		goto err_out_free;
 
 	printk(DEVNAME ": %s detected at %s [%s]\n",
 	       ir->input->name, ir->input->phys, adap->name);
@@ -452,135 +413,42 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 
 	return 0;
 
- err_out_detach:
-	i2c_detach_client(&ir->c);
  err_out_free:
 	input_free_device(input_dev);
 	kfree(ir);
 	return err;
 }
 
-static int ir_detach(struct i2c_client *client)
+static int ir_remove(struct i2c_client *client)
 {
 	struct IR_i2c *ir = i2c_get_clientdata(client);
 
 	/* kill outstanding polls */
 	cancel_delayed_work_sync(&ir->work);
 
-	/* unregister devices */
+	/* unregister device */
 	input_unregister_device(ir->input);
-	i2c_detach_client(&ir->c);
 
 	/* free memory */
 	kfree(ir);
 	return 0;
 }
 
-static int ir_probe(struct i2c_adapter *adap)
-{
-
-	/* The external IR receiver is at i2c address 0x34 (0x35 for
-	   reads).  Future Hauppauge cards will have an internal
-	   receiver at 0x30 (0x31 for reads).  In theory, both can be
-	   fitted, and Hauppauge suggest an external overrides an
-	   internal.
-
-	   That's why we probe 0x1a (~0x34) first. CB
-	*/
-
-	static const int probe_bttv[] = { 0x1a, 0x18, 0x4b, 0x64, 0x30, -1};
-	static const int probe_saa7134[] = { 0x7a, 0x47, 0x71, 0x2d, -1 };
-	static const int probe_em28XX[] = { 0x30, 0x47, -1 };
-	static const int probe_cx88[] = { 0x18, 0x6b, 0x71, -1 };
-	static const int probe_cx23885[] = { 0x6b, -1 };
-	const int *probe;
-	struct i2c_msg msg = {
-		.flags = I2C_M_RD,
-		.len = 0,
-		.buf = NULL,
-	};
-	int i, rc;
-
-	switch (adap->id) {
-	case I2C_HW_B_BT848:
-		probe = probe_bttv;
-		break;
-	case I2C_HW_B_CX2341X:
-		probe = probe_bttv;
-		break;
-	case I2C_HW_SAA7134:
-		probe = probe_saa7134;
-		break;
-	case I2C_HW_B_EM28XX:
-		probe = probe_em28XX;
-		break;
-	case I2C_HW_B_CX2388x:
-		probe = probe_cx88;
-		break;
-	case I2C_HW_B_CX23885:
-		probe = probe_cx23885;
-		break;
-	default:
-		return 0;
-	}
-
-	for (i = 0; -1 != probe[i]; i++) {
-		msg.addr = probe[i];
-		rc = i2c_transfer(adap, &msg, 1);
-		dprintk(1,"probe 0x%02x @ %s: %s\n",
-			probe[i], adap->name,
-			(1 == rc) ? "yes" : "no");
-		if (1 == rc) {
-			ir_attach(adap, probe[i], 0, 0);
-			return 0;
-		}
-	}
-
-	/* Special case for MSI TV@nywhere Plus remote */
-	if (adap->id == I2C_HW_SAA7134) {
-		u8 temp;
-
-		/* MSI TV@nywhere Plus controller doesn't seem to
-		   respond to probes unless we read something from
-		   an existing device. Weird... */
-
-		msg.addr = 0x50;
-		rc = i2c_transfer(adap, &msg, 1);
-			dprintk(1, "probe 0x%02x @ %s: %s\n",
-			msg.addr, adap->name,
-			(1 == rc) ? "yes" : "no");
-
-		/* Now do the probe. The controller does not respond
-		   to 0-byte reads, so we use a 1-byte read instead. */
-		msg.addr = 0x30;
-		msg.len = 1;
-		msg.buf = &temp;
-		rc = i2c_transfer(adap, &msg, 1);
-		dprintk(1, "probe 0x%02x @ %s: %s\n",
-			msg.addr, adap->name,
-			(1 == rc) ? "yes" : "no");
-		if (1 == rc)
-			ir_attach(adap, msg.addr, 0, 0);
-	}
-
-	/* Special case for AVerMedia Cardbus remote */
-	if (adap->id == I2C_HW_SAA7134) {
-		unsigned char subaddr, data;
-		struct i2c_msg msg[] = { { .addr = 0x40, .flags = 0,
-					   .buf = &subaddr, .len = 1},
-					 { .addr = 0x40, .flags = I2C_M_RD,
-					   .buf = &data, .len = 1} };
-		subaddr = 0x0d;
-		rc = i2c_transfer(adap, msg, 2);
-		dprintk(1, "probe 0x%02x/0x%02x @ %s: %s\n",
-			msg[0].addr, subaddr, adap->name,
-			(2 == rc) ? "yes" : "no");
-		if (2 == rc)
-			ir_attach(adap, msg[0].addr, 0, 0);
-	}
+static const struct i2c_device_id ir_kbd_id[] = {
+	/* Generic entry for any IR receiver */
+	{ "ir_video", 0 },
+	/* IR device specific entries could be added here */
+	{ }
+};
 
-	return 0;
-}
+static struct i2c_driver driver = {
+	.driver = {
+		.name   = "ir-kbd-i2c",
+	},
+	.probe          = ir_probe,
+	.remove         = ir_remove,
+	.id_table       = ir_kbd_id,
+};
 
 /* ----------------------------------------------------------------------- */
 
diff --git a/drivers/media/video/ivtv/ivtv-i2c.c b/drivers/media/video/ivtv/ivtv-i2c.c
index 9e3d32b8004..0ecde9ca05c 100644
--- a/drivers/media/video/ivtv/ivtv-i2c.c
+++ b/drivers/media/video/ivtv/ivtv-i2c.c
@@ -579,9 +579,11 @@ static struct i2c_client ivtv_i2c_client_template = {
 	.name = "ivtv internal",
 };
 
-/* init + register i2c algo-bit adapter */
+/* init + register i2c adapter + instantiate IR receiver */
 int init_ivtv_i2c(struct ivtv *itv)
 {
+	int retval;
+
 	IVTV_DEBUG_I2C("i2c init\n");
 
 	/* Sanity checks for the I2C hardware arrays. They must be the
@@ -619,9 +621,32 @@ int init_ivtv_i2c(struct ivtv *itv)
 	ivtv_setsda(itv, 1);
 
 	if (itv->options.newi2c > 0)
-		return i2c_add_adapter(&itv->i2c_adap);
+		retval = i2c_add_adapter(&itv->i2c_adap);
 	else
-		return i2c_bit_add_bus(&itv->i2c_adap);
+		retval = i2c_bit_add_bus(&itv->i2c_adap);
+
+	/* Instantiate the IR receiver device, if present */
+	if (retval == 0) {
+		struct i2c_board_info info;
+		/* The external IR receiver is at i2c address 0x34 (0x35 for
+		   reads).  Future Hauppauge cards will have an internal
+		   receiver at 0x30 (0x31 for reads).  In theory, both can be
+		   fitted, and Hauppauge suggest an external overrides an
+		   internal.
+
+		   That's why we probe 0x1a (~0x34) first. CB
+		*/
+		const unsigned short addr_list[] = {
+			0x1a, 0x18, 0x64, 0x30,
+			I2C_CLIENT_END
+		};
+
+		memset(&info, 0, sizeof(struct i2c_board_info));
+		strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+		i2c_new_probed_device(&itv->i2c_adap, &info, addr_list);
+	}
+
+	return retval;
 }
 
 void exit_ivtv_i2c(struct ivtv *itv)
diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c
index f3e285aa2fb..a96f75985cb 100644
--- a/drivers/media/video/saa7134/saa7134-i2c.c
+++ b/drivers/media/video/saa7134/saa7134-i2c.c
@@ -433,6 +433,9 @@ int saa7134_i2c_register(struct saa7134_dev *dev)
 	saa7134_i2c_eeprom(dev,dev->eedata,sizeof(dev->eedata));
 	if (i2c_scan)
 		do_i2c_scan(dev->name,&dev->i2c_client);
+
+	/* Instantiate the IR receiver device, if present */
+	saa7134_probe_i2c_ir(dev);
 	return 0;
 }
 
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 6cd693d3b8e..4144ca9cc7e 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -134,10 +134,10 @@ static int get_key_msi_tvanywhere_plus(struct IR_i2c *ir, u32 *ir_key,
 	int gpio;
 
 	/* <dev> is needed to access GPIO. Used by the saa_readl macro. */
-	struct saa7134_dev *dev = ir->c.adapter->algo_data;
+	struct saa7134_dev *dev = ir->c->adapter->algo_data;
 	if (dev == NULL) {
 		dprintk("get_key_msi_tvanywhere_plus: "
-			"gir->c.adapter->algo_data is NULL!\n");
+			"gir->c->adapter->algo_data is NULL!\n");
 		return -EIO;
 	}
 
@@ -156,7 +156,7 @@ static int get_key_msi_tvanywhere_plus(struct IR_i2c *ir, u32 *ir_key,
 
 	/* GPIO says there is a button press. Get it. */
 
-	if (1 != i2c_master_recv(&ir->c, &b, 1)) {
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
 		i2cdprintk("read error\n");
 		return -EIO;
 	}
@@ -179,7 +179,7 @@ static int get_key_purpletv(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char b;
 
 	/* poll IR chip */
-	if (1 != i2c_master_recv(&ir->c,&b,1)) {
+	if (1 != i2c_master_recv(ir->c, &b, 1)) {
 		i2cdprintk("read error\n");
 		return -EIO;
 	}
@@ -202,7 +202,7 @@ static int get_key_hvr1110(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char buf[5], cod4, code3, code4;
 
 	/* poll IR chip */
-	if (5 != i2c_master_recv(&ir->c,buf,5))
+	if (5 != i2c_master_recv(ir->c, buf, 5))
 		return -EIO;
 
 	cod4	= buf[4];
@@ -224,7 +224,7 @@ static int get_key_beholdm6xx(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	unsigned char data[12];
 	u32 gpio;
 
-	struct saa7134_dev *dev = ir->c.adapter->algo_data;
+	struct saa7134_dev *dev = ir->c->adapter->algo_data;
 
 	/* rising SAA7134_GPIO_GPRESCAN reads the status */
 	saa_clearb(SAA7134_GPIO_GPMODE3, SAA7134_GPIO_GPRESCAN);
@@ -235,9 +235,9 @@ static int get_key_beholdm6xx(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw)
 	if (0x400000 & ~gpio)
 		return 0; /* No button press */
 
-	ir->c.addr = 0x5a >> 1;
+	ir->c->addr = 0x5a >> 1;
 
-	if (12 != i2c_master_recv(&ir->c, data, 12)) {
+	if (12 != i2c_master_recv(ir->c, data, 12)) {
 		i2cdprintk("read error\n");
 		return -EIO;
 	}
@@ -267,7 +267,7 @@ static int get_key_pinnacle(struct IR_i2c *ir, u32 *ir_key, u32 *ir_raw,
 	unsigned int start = 0,parity = 0,code = 0;
 
 	/* poll IR chip */
-	if (4 != i2c_master_recv(&ir->c, b, 4)) {
+	if (4 != i2c_master_recv(ir->c, b, 4)) {
 		i2cdprintk("read error\n");
 		return -EIO;
 	}
@@ -682,14 +682,76 @@ void saa7134_input_fini(struct saa7134_dev *dev)
 	dev->remote = NULL;
 }
 
-void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
+void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 {
+	struct i2c_board_info info;
+	const unsigned short addr_list[] = {
+		0x7a, 0x47, 0x71, 0x2d,
+		I2C_CLIENT_END
+	};
+
+	const unsigned short addr_list_msi[] = {
+		0x30, I2C_CLIENT_END
+	};
+	struct i2c_msg msg_msi = {
+		.addr = 0x50,
+		.flags = I2C_M_RD,
+		.len = 0,
+		.buf = NULL,
+	};
+
+	unsigned char subaddr, data;
+	struct i2c_msg msg_avermedia[] = { {
+		.addr = 0x40,
+		.flags = 0,
+		.len = 1,
+		.buf = &subaddr,
+	}, {
+		.addr = 0x40,
+		.flags = I2C_M_RD,
+		.len = 1,
+		.buf = &data,
+	} };
+
+	struct i2c_client *client;
+	int rc;
+
 	if (disable_ir) {
-		dprintk("Found supported i2c remote, but IR has been disabled\n");
-		ir->get_key=NULL;
+		dprintk("IR has been disabled, not probing for i2c remote\n");
 		return;
 	}
 
+	memset(&info, 0, sizeof(struct i2c_board_info));
+	strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
+	client = i2c_new_probed_device(&dev->i2c_adap, &info, addr_list);
+	if (client)
+		return;
+
+	/* MSI TV@nywhere Plus controller doesn't seem to
+	   respond to probes unless we read something from
+	   an existing device. Weird... */
+	rc = i2c_transfer(&dev->i2c_adap, &msg_msi, 1);
+	dprintk(KERN_DEBUG "probe 0x%02x @ %s: %s\n",
+		msg_msi.addr, dev->i2c_adap.name,
+		(1 == rc) ? "yes" : "no");
+	client = i2c_new_probed_device(&dev->i2c_adap, &info, addr_list_msi);
+	if (client)
+		return;
+
+	/* Special case for AVerMedia Cardbus remote */
+	subaddr = 0x0d;
+	rc = i2c_transfer(&dev->i2c_adap, msg_avermedia, 2);
+	dprintk(KERN_DEBUG "probe 0x%02x/0x%02x @ %s: %s\n",
+		msg_avermedia[0].addr, subaddr, dev->i2c_adap.name,
+		(2 == rc) ? "yes" : "no");
+	if (2 == rc) {
+		info.addr = msg_avermedia[0].addr;
+		i2c_new_device(&dev->i2c_adap, &info);
+	}
+}
+
+void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
+{
 	switch (dev->board) {
 	case SAA7134_BOARD_PINNACLE_PCTV_110i:
 	case SAA7134_BOARD_PINNACLE_PCTV_310i:
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index 8229ab21322..116534ec33e 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -799,6 +799,7 @@ void saa7134_irq_oss_done(struct saa7134_dev *dev, unsigned long status);
 int  saa7134_input_init1(struct saa7134_dev *dev);
 void saa7134_input_fini(struct saa7134_dev *dev);
 void saa7134_input_irq(struct saa7134_dev *dev);
+void saa7134_probe_i2c_ir(struct saa7134_dev *dev);
 void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir);
 void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir);
 void saa7134_ir_stop(struct saa7134_dev *dev);
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 6a9719c8e90..94a77b15a30 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -7,7 +7,7 @@ struct IR_i2c;
 
 struct IR_i2c {
 	IR_KEYTAB_TYPE         *ir_codes;
-	struct i2c_client      c;
+	struct i2c_client      *c;
 	struct input_dev       *input;
 	struct ir_input_state  ir;
 
-- 
cgit v1.2.3-70-g09d2


From 4d7a2d6721a6380d4ffc26d81d2c8232fd0d2dfc Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 13 May 2009 16:49:32 -0300
Subject: V4L/DVB (11845): ir-kbd-i2c: Use initialization data

For specific boards, pass initialization data to ir-kbd-i2c instead
of modifying the settings after the device is initialized. This is
more efficient and easier to read.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/cx231xx/cx231xx-cards.c |  3 -
 drivers/media/video/cx231xx/cx231xx-i2c.c   | 29 ---------
 drivers/media/video/cx231xx/cx231xx.h       |  1 -
 drivers/media/video/em28xx/em28xx-cards.c   | 31 +++++-----
 drivers/media/video/em28xx/em28xx-i2c.c     | 22 -------
 drivers/media/video/em28xx/em28xx.h         |  1 -
 drivers/media/video/ir-kbd-i2c.c            | 12 +++-
 drivers/media/video/saa7134/saa7134-i2c.c   | 28 ---------
 drivers/media/video/saa7134/saa7134-input.c | 94 ++++++++++++++---------------
 drivers/media/video/saa7134/saa7134.h       |  1 -
 include/media/ir-kbd-i2c.h                  |  7 +++
 11 files changed, 79 insertions(+), 150 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c
index 734f6eaefd9..63d2239fd32 100644
--- a/drivers/media/video/cx231xx/cx231xx-cards.c
+++ b/drivers/media/video/cx231xx/cx231xx-cards.c
@@ -287,10 +287,7 @@ void cx231xx_register_i2c_ir(struct cx231xx *dev)
 		return;
 
 	/* REVISIT: instantiate IR device */
-}
 
-void cx231xx_set_ir(struct cx231xx *dev, struct IR_i2c *ir)
-{
 	/* detect & configure */
 	switch (dev->model) {
 
diff --git a/drivers/media/video/cx231xx/cx231xx-i2c.c b/drivers/media/video/cx231xx/cx231xx-i2c.c
index ac4099a49da..33219dc4d64 100644
--- a/drivers/media/video/cx231xx/cx231xx-i2c.c
+++ b/drivers/media/video/cx231xx/cx231xx-i2c.c
@@ -424,34 +424,6 @@ static u32 functionality(struct i2c_adapter *adap)
 	return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C;
 }
 
-/*
- * attach_inform()
- * gets called when a device attaches to the i2c bus
- * does some basic configuration
- */
-static int attach_inform(struct i2c_client *client)
-{
-	struct cx231xx_i2c *bus = i2c_get_adapdata(client->adapter);
-	struct cx231xx *dev = bus->dev;
-
-	switch (client->addr << 1) {
-	case 0x8e:
-		{
-			struct IR_i2c *ir = i2c_get_clientdata(client);
-			dprintk1(1, "attach_inform: IR detected (%s).\n",
-				 ir->phys);
-			cx231xx_set_ir(dev, ir);
-			break;
-		}
-		break;
-
-	default:
-		break;
-	}
-
-	return 0;
-}
-
 static struct i2c_algorithm cx231xx_algo = {
 	.master_xfer = cx231xx_i2c_xfer,
 	.functionality = functionality,
@@ -462,7 +434,6 @@ static struct i2c_adapter cx231xx_adap_template = {
 	.name = "cx231xx",
 	.id = I2C_HW_B_CX231XX,
 	.algo = &cx231xx_algo,
-	.client_register = attach_inform,
 };
 
 static struct i2c_client cx231xx_client_template = {
diff --git a/drivers/media/video/cx231xx/cx231xx.h b/drivers/media/video/cx231xx/cx231xx.h
index 8c300f60fdf..e38eb2d425f 100644
--- a/drivers/media/video/cx231xx/cx231xx.h
+++ b/drivers/media/video/cx231xx/cx231xx.h
@@ -739,7 +739,6 @@ extern struct cx231xx_board cx231xx_boards[];
 extern struct usb_device_id cx231xx_id_table[];
 extern const unsigned int cx231xx_bcount;
 void cx231xx_register_i2c_ir(struct cx231xx *dev);
-void cx231xx_set_ir(struct cx231xx *dev, struct IR_i2c *ir);
 int cx231xx_tuner_callback(void *ptr, int component, int command, int arg);
 
 /* Provided by cx231xx-input.c */
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index edba71115a9..fe2a471e5f6 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -1915,6 +1915,7 @@ static int em28xx_hint_board(struct em28xx *dev)
 void em28xx_register_i2c_ir(struct em28xx *dev)
 {
 	struct i2c_board_info info;
+	struct IR_i2c_init_data init_data;
 	const unsigned short addr_list[] = {
 		 0x30, 0x47, I2C_CLIENT_END
 	};
@@ -1923,12 +1924,9 @@ void em28xx_register_i2c_ir(struct em28xx *dev)
 		return;
 
 	memset(&info, 0, sizeof(struct i2c_board_info));
+	memset(&init_data, 0, sizeof(struct IR_i2c_init_data));
 	strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
-	i2c_new_probed_device(&dev->i2c_adap, &info, addr_list);
-}
 
-void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir)
-{
 	/* detect & configure */
 	switch (dev->model) {
 	case (EM2800_BOARD_UNKNOWN):
@@ -1937,22 +1935,19 @@ void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir)
 		break;
 	case (EM2800_BOARD_TERRATEC_CINERGY_200):
 	case (EM2820_BOARD_TERRATEC_CINERGY_250):
-		ir->ir_codes = ir_codes_em_terratec;
-		ir->get_key = em28xx_get_key_terratec;
-		snprintf(ir->name, sizeof(ir->name),
-			 "i2c IR (EM28XX Terratec)");
+		init_data.ir_codes = ir_codes_em_terratec;
+		init_data.get_key = em28xx_get_key_terratec;
+		init_data.name = "i2c IR (EM28XX Terratec)";
 		break;
 	case (EM2820_BOARD_PINNACLE_USB_2):
-		ir->ir_codes = ir_codes_pinnacle_grey;
-		ir->get_key = em28xx_get_key_pinnacle_usb_grey;
-		snprintf(ir->name, sizeof(ir->name),
-			 "i2c IR (EM28XX Pinnacle PCTV)");
+		init_data.ir_codes = ir_codes_pinnacle_grey;
+		init_data.get_key = em28xx_get_key_pinnacle_usb_grey;
+		init_data.name = "i2c IR (EM28XX Pinnacle PCTV)";
 		break;
 	case (EM2820_BOARD_HAUPPAUGE_WINTV_USB_2):
-		ir->ir_codes = ir_codes_hauppauge_new;
-		ir->get_key = em28xx_get_key_em_haup;
-		snprintf(ir->name, sizeof(ir->name),
-			 "i2c IR (EM2840 Hauppauge)");
+		init_data.ir_codes = ir_codes_hauppauge_new;
+		init_data.get_key = em28xx_get_key_em_haup;
+		init_data.name = "i2c IR (EM2840 Hauppauge)";
 		break;
 	case (EM2820_BOARD_MSI_VOX_USB_2):
 		break;
@@ -1963,6 +1958,10 @@ void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir)
 	case (EM2800_BOARD_GRABBEEX_USB2800):
 		break;
 	}
+
+	if (init_data.name)
+		info.platform_data = &init_data;
+	i2c_new_probed_device(&dev->i2c_adap, &info, addr_list);
 }
 
 void em28xx_card_setup(struct em28xx *dev)
diff --git a/drivers/media/video/em28xx/em28xx-i2c.c b/drivers/media/video/em28xx/em28xx-i2c.c
index d90294cbb70..2c86fcf089f 100644
--- a/drivers/media/video/em28xx/em28xx-i2c.c
+++ b/drivers/media/video/em28xx/em28xx-i2c.c
@@ -451,27 +451,6 @@ static u32 functionality(struct i2c_adapter *adap)
 	return I2C_FUNC_SMBUS_EMUL;
 }
 
-/*
- * attach_inform()
- * gets called when a device attaches to the i2c bus
- * does some basic configuration
- */
-static int attach_inform(struct i2c_client *client)
-{
-	struct em28xx *dev = client->adapter->algo_data;
-	struct IR_i2c *ir = i2c_get_clientdata(client);
-
-	switch (client->addr << 1) {
-	case 0x60:
-	case 0x8e:
-		dprintk1(1, "attach_inform: IR detected (%s).\n", ir->phys);
-		em28xx_set_ir(dev, ir);
-		break;
-	}
-
-	return 0;
-}
-
 static struct i2c_algorithm em28xx_algo = {
 	.master_xfer   = em28xx_i2c_xfer,
 	.functionality = functionality,
@@ -482,7 +461,6 @@ static struct i2c_adapter em28xx_adap_template = {
 	.name = "em28xx",
 	.id = I2C_HW_B_EM28XX,
 	.algo = &em28xx_algo,
-	.client_register = attach_inform,
 };
 
 static struct i2c_client em28xx_client_template = {
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 9c632541df1..8db797fedb7 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -644,7 +644,6 @@ extern struct em28xx_board em28xx_boards[];
 extern struct usb_device_id em28xx_id_table[];
 extern const unsigned int em28xx_bcount;
 void em28xx_register_i2c_ir(struct em28xx *dev);
-void em28xx_set_ir(struct em28xx *dev, struct IR_i2c *ir);
 int em28xx_tuner_callback(void *ptr, int component, int command, int arg);
 void em28xx_release_resources(struct em28xx *dev);
 
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index cb833a63041..3a8880243b1 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -298,7 +298,7 @@ static void ir_work(struct work_struct *work)
 static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	IR_KEYTAB_TYPE *ir_codes = NULL;
-	char *name;
+	const char *name;
 	int ir_type;
 	struct IR_i2c *ir;
 	struct input_dev *input_dev;
@@ -386,6 +386,16 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		goto err_out_free;
 	}
 
+	/* Let the caller override settings */
+	if (client->dev.platform_data) {
+		const struct IR_i2c_init_data *init_data =
+						client->dev.platform_data;
+
+		ir_codes = init_data->ir_codes;
+		name = init_data->name;
+		ir->get_key = init_data->get_key;
+	}
+
 	/* Sets name */
 	snprintf(ir->name, sizeof(ir->name), "i2c IR (%s)", name);
 	ir->ir_codes = ir_codes;
diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c
index a96f75985cb..a8a355e2879 100644
--- a/drivers/media/video/saa7134/saa7134-i2c.c
+++ b/drivers/media/video/saa7134/saa7134-i2c.c
@@ -321,33 +321,6 @@ static u32 functionality(struct i2c_adapter *adap)
 	return I2C_FUNC_SMBUS_EMUL;
 }
 
-static int attach_inform(struct i2c_client *client)
-{
-	struct saa7134_dev *dev = client->adapter->algo_data;
-
-	d1printk( "%s i2c attach [addr=0x%x,client=%s]\n",
-		client->driver->driver.name, client->addr, client->name);
-
-	/* Am I an i2c remote control? */
-
-	switch (client->addr) {
-		case 0x7a:
-		case 0x47:
-		case 0x71:
-		case 0x2d:
-		case 0x30:
-		{
-			struct IR_i2c *ir = i2c_get_clientdata(client);
-			d1printk("%s i2c IR detected (%s).\n",
-				 client->driver->driver.name, ir->phys);
-			saa7134_set_i2c_ir(dev,ir);
-			break;
-		}
-	}
-
-	return 0;
-}
-
 static struct i2c_algorithm saa7134_algo = {
 	.master_xfer   = saa7134_i2c_xfer,
 	.functionality = functionality,
@@ -358,7 +331,6 @@ static struct i2c_adapter saa7134_adap_template = {
 	.name          = "saa7134",
 	.id            = I2C_HW_SAA7134,
 	.algo          = &saa7134_algo,
-	.client_register = attach_inform,
 };
 
 static struct i2c_client saa7134_client_template = {
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 4144ca9cc7e..a20f687f81a 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -685,6 +685,7 @@ void saa7134_input_fini(struct saa7134_dev *dev)
 void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 {
 	struct i2c_board_info info;
+	struct IR_i2c_init_data init_data;
 	const unsigned short addr_list[] = {
 		0x7a, 0x47, 0x71, 0x2d,
 		I2C_CLIENT_END
@@ -722,62 +723,35 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 	}
 
 	memset(&info, 0, sizeof(struct i2c_board_info));
+	memset(&init_data, 0, sizeof(struct IR_i2c_init_data));
 	strlcpy(info.type, "ir_video", I2C_NAME_SIZE);
-	client = i2c_new_probed_device(&dev->i2c_adap, &info, addr_list);
-	if (client)
-		return;
-
-	/* MSI TV@nywhere Plus controller doesn't seem to
-	   respond to probes unless we read something from
-	   an existing device. Weird... */
-	rc = i2c_transfer(&dev->i2c_adap, &msg_msi, 1);
-	dprintk(KERN_DEBUG "probe 0x%02x @ %s: %s\n",
-		msg_msi.addr, dev->i2c_adap.name,
-		(1 == rc) ? "yes" : "no");
-	client = i2c_new_probed_device(&dev->i2c_adap, &info, addr_list_msi);
-	if (client)
-		return;
 
-	/* Special case for AVerMedia Cardbus remote */
-	subaddr = 0x0d;
-	rc = i2c_transfer(&dev->i2c_adap, msg_avermedia, 2);
-	dprintk(KERN_DEBUG "probe 0x%02x/0x%02x @ %s: %s\n",
-		msg_avermedia[0].addr, subaddr, dev->i2c_adap.name,
-		(2 == rc) ? "yes" : "no");
-	if (2 == rc) {
-		info.addr = msg_avermedia[0].addr;
-		i2c_new_device(&dev->i2c_adap, &info);
-	}
-}
-
-void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
-{
 	switch (dev->board) {
 	case SAA7134_BOARD_PINNACLE_PCTV_110i:
 	case SAA7134_BOARD_PINNACLE_PCTV_310i:
-		snprintf(ir->name, sizeof(ir->name), "Pinnacle PCTV");
+		init_data.name = "Pinnacle PCTV";
 		if (pinnacle_remote == 0) {
-			ir->get_key   = get_key_pinnacle_color;
-			ir->ir_codes = ir_codes_pinnacle_color;
+			init_data.get_key = get_key_pinnacle_color;
+			init_data.ir_codes = ir_codes_pinnacle_color;
 		} else {
-			ir->get_key   = get_key_pinnacle_grey;
-			ir->ir_codes = ir_codes_pinnacle_grey;
+			init_data.get_key = get_key_pinnacle_grey;
+			init_data.ir_codes = ir_codes_pinnacle_grey;
 		}
 		break;
 	case SAA7134_BOARD_UPMOST_PURPLE_TV:
-		snprintf(ir->name, sizeof(ir->name), "Purple TV");
-		ir->get_key   = get_key_purpletv;
-		ir->ir_codes  = ir_codes_purpletv;
+		init_data.name = "Purple TV";
+		init_data.get_key = get_key_purpletv;
+		init_data.ir_codes = ir_codes_purpletv;
 		break;
 	case SAA7134_BOARD_MSI_TVATANYWHERE_PLUS:
-		snprintf(ir->name, sizeof(ir->name), "MSI TV@nywhere Plus");
-		ir->get_key  = get_key_msi_tvanywhere_plus;
-		ir->ir_codes = ir_codes_msi_tvanywhere_plus;
+		init_data.name = "MSI TV@nywhere Plus";
+		init_data.get_key = get_key_msi_tvanywhere_plus;
+		init_data.ir_codes = ir_codes_msi_tvanywhere_plus;
 		break;
 	case SAA7134_BOARD_HAUPPAUGE_HVR1110:
-		snprintf(ir->name, sizeof(ir->name), "HVR 1110");
-		ir->get_key   = get_key_hvr1110;
-		ir->ir_codes  = ir_codes_hauppauge_new;
+		init_data.name = "HVR 1110";
+		init_data.get_key = get_key_hvr1110;
+		init_data.ir_codes = ir_codes_hauppauge_new;
 		break;
 	case SAA7134_BOARD_BEHOLD_607FM_MK3:
 	case SAA7134_BOARD_BEHOLD_607FM_MK5:
@@ -791,15 +765,39 @@ void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir)
 	case SAA7134_BOARD_BEHOLD_M63:
 	case SAA7134_BOARD_BEHOLD_M6_EXTRA:
 	case SAA7134_BOARD_BEHOLD_H6:
-		snprintf(ir->name, sizeof(ir->name), "BeholdTV");
-		ir->get_key   = get_key_beholdm6xx;
-		ir->ir_codes  = ir_codes_behold;
-		break;
-	default:
-		dprintk("Shouldn't get here: Unknown board %x for I2C IR?\n",dev->board);
+		init_data.name = "BeholdTV";
+		init_data.get_key = get_key_beholdm6xx;
+		init_data.ir_codes = ir_codes_behold;
 		break;
 	}
 
+	if (init_data.name)
+		info.platform_data = &init_data;
+	client = i2c_new_probed_device(&dev->i2c_adap, &info, addr_list);
+	if (client)
+		return;
+
+	/* MSI TV@nywhere Plus controller doesn't seem to
+	   respond to probes unless we read something from
+	   an existing device. Weird... */
+	rc = i2c_transfer(&dev->i2c_adap, &msg_msi, 1);
+	dprintk(KERN_DEBUG "probe 0x%02x @ %s: %s\n",
+		msg_msi.addr, dev->i2c_adap.name,
+		(1 == rc) ? "yes" : "no");
+	client = i2c_new_probed_device(&dev->i2c_adap, &info, addr_list_msi);
+	if (client)
+		return;
+
+	/* Special case for AVerMedia Cardbus remote */
+	subaddr = 0x0d;
+	rc = i2c_transfer(&dev->i2c_adap, msg_avermedia, 2);
+	dprintk(KERN_DEBUG "probe 0x%02x/0x%02x @ %s: %s\n",
+		msg_avermedia[0].addr, subaddr, dev->i2c_adap.name,
+		(2 == rc) ? "yes" : "no");
+	if (2 == rc) {
+		info.addr = msg_avermedia[0].addr;
+		i2c_new_device(&dev->i2c_adap, &info);
+	}
 }
 
 static int saa7134_rc5_irq(struct saa7134_dev *dev)
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index 116534ec33e..ae7602d343c 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -800,7 +800,6 @@ int  saa7134_input_init1(struct saa7134_dev *dev);
 void saa7134_input_fini(struct saa7134_dev *dev);
 void saa7134_input_irq(struct saa7134_dev *dev);
 void saa7134_probe_i2c_ir(struct saa7134_dev *dev);
-void saa7134_set_i2c_ir(struct saa7134_dev *dev, struct IR_i2c *ir);
 void saa7134_ir_start(struct saa7134_dev *dev, struct card_ir *ir);
 void saa7134_ir_stop(struct saa7134_dev *dev);
 
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 94a77b15a30..3ad4ed5402f 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -19,4 +19,11 @@ struct IR_i2c {
 	char                   phys[32];
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
 };
+
+/* Can be passed when instantiating an ir_video i2c device */
+struct IR_i2c_init_data {
+	IR_KEYTAB_TYPE         *ir_codes;
+	const char             *name;
+	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
+};
 #endif
-- 
cgit v1.2.3-70-g09d2


From 84845c070ce3ac4d3bd2c148fa20ba8ce5409167 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Tue, 26 May 2009 16:05:06 +0900
Subject: PCI: use pci_is_root_bus() in acpi_pci_get_bridge_handle()

Use pci_is_root_bus() in acpi_pci_get_bridge_handle() to check if the
pci bus is root, for code consistency.

Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 092e82e0048..df67c78dfe2 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -23,7 +23,7 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 
 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 {
-	if (pbus->parent)
+	if (!pci_is_root_bus(pbus))
 		return DEVICE_ACPI_HANDLE(&(pbus->self->dev));
 	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
 					      pbus->number);
-- 
cgit v1.2.3-70-g09d2


From a222b8f83b995e9c6fe2aff2a8125facb49f658e Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Tue, 26 May 2009 16:05:33 +0900
Subject: PCI: use pci_is_root_bus() in acpi_find_root_bridge_handle()

Use pci_is_root_bus() in acpi_find_root_bridge_handle() to check if
the pci bus is root, for code consistency.

Reviewed-by: Alex Chiang <achiang@hp.com>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index df67c78dfe2..93a7c08f869 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -15,7 +15,7 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
 	struct pci_bus *pbus = pdev->bus;
 	/* Find a PCI root bus */
-	while (pbus->parent)
+	while (!pci_is_root_bus(pbus))
 		pbus = pbus->parent;
 	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
 					      pbus->number);
-- 
cgit v1.2.3-70-g09d2


From a72b46c3849cdb05993015991bde548ab8b6d7ac Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 24 Apr 2009 10:45:17 +0800
Subject: PCI: Add pci_bus_set_ops

pci_bus_set_ops changes pci_ops associated with a pci_bus. This can be
used by debug tools such as PCIE AER error injection to fake some PCI
configuration registers.

Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c | 19 +++++++++++++++++++
 include/linux/pci.h  |  1 +
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 0f370651268..db23200c487 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -66,6 +66,25 @@ EXPORT_SYMBOL(pci_bus_write_config_byte);
 EXPORT_SYMBOL(pci_bus_write_config_word);
 EXPORT_SYMBOL(pci_bus_write_config_dword);
 
+/**
+ * pci_bus_set_ops - Set raw operations of pci bus
+ * @bus:	pci bus struct
+ * @ops:	new raw operations
+ *
+ * Return previous raw operations
+ */
+struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops)
+{
+	struct pci_ops *old_ops;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_lock, flags);
+	old_ops = bus->ops;
+	bus->ops = ops;
+	spin_unlock_irqrestore(&pci_lock, flags);
+	return old_ops;
+}
+EXPORT_SYMBOL(pci_bus_set_ops);
 
 /**
  * pci_read_vpd - Read one entry from Vital Product Data
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ec03b90d351..ea2a153a912 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -637,6 +637,7 @@ int pci_bus_write_config_word(struct pci_bus *bus, unsigned int devfn,
 			      int where, u16 val);
 int pci_bus_write_config_dword(struct pci_bus *bus, unsigned int devfn,
 			       int where, u32 val);
+struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops);
 
 static inline int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val)
 {
-- 
cgit v1.2.3-70-g09d2


From bd3d99c17039fd05a29587db3f4a180c48da115a Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Tue, 2 Jun 2009 13:52:26 +0900
Subject: PCI: Remove untested Electromechanical Interlock (EMI) support in
 pciehp.

The EMI support in pciehp is obviously broken. It is implemented using
struct hotplug_slot_attribute, but sysfs_ops for pci_slot_ktype is NOT
for struct hotplug_slot_attribute, but for struct pci_slot_attribute.
This bug had been there for a long time, maybe it was introduced when
PCI slot framework was introduced. The reason why this bug didn't
cause any problem is maybe the EMI support is not tested at all
because of lack of test environment.

As described above, the EMI support in pciehp seems not to be tested
at all. So this patch removes EMI support from pciehp, instead of
fixing the bug.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp.h      |   3 --
 drivers/pci/hotplug/pciehp_core.c | 111 +-------------------------------------
 drivers/pci/hotplug/pciehp_hpc.c  |  31 -----------
 include/linux/pci_hotplug.h       |   8 ---
 4 files changed, 1 insertion(+), 152 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 0a368547e63..e6cf096498b 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -81,7 +81,6 @@ struct slot {
 	struct hpc_ops *hpc_ops;
 	struct hotplug_slot *hotplug_slot;
 	struct list_head	slot_list;
-	unsigned long last_emi_toggle;
 	struct delayed_work work;	/* work for button event */
 	struct mutex lock;
 };
@@ -203,8 +202,6 @@ struct hpc_ops {
 	int (*set_attention_status)(struct slot *slot, u8 status);
 	int (*get_latch_status)(struct slot *slot, u8 *status);
 	int (*get_adapter_status)(struct slot *slot, u8 *status);
-	int (*get_emi_status)(struct slot *slot, u8 *status);
-	int (*toggle_emi)(struct slot *slot);
 	int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
 	int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
 	int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index fb254b2454d..eb183d1d091 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -85,99 +85,6 @@ static struct hotplug_slot_ops pciehp_hotplug_slot_ops = {
   	.get_cur_bus_speed =	get_cur_bus_speed,
 };
 
-/*
- * Check the status of the Electro Mechanical Interlock (EMI)
- */
-static int get_lock_status(struct hotplug_slot *hotplug_slot, u8 *value)
-{
-	struct slot *slot = hotplug_slot->private;
-	return (slot->hpc_ops->get_emi_status(slot, value));
-}
-
-/*
- * sysfs interface for the Electro Mechanical Interlock (EMI)
- * 1 == locked, 0 == unlocked
- */
-static ssize_t lock_read_file(struct hotplug_slot *slot, char *buf)
-{
-	int retval;
-	u8 value;
-
-	retval = get_lock_status(slot, &value);
-	if (retval)
-		goto lock_read_exit;
-	retval = sprintf (buf, "%d\n", value);
-
-lock_read_exit:
-	return retval;
-}
-
-/*
- * Change the status of the Electro Mechanical Interlock (EMI)
- * This is a toggle - in addition there must be at least 1 second
- * in between toggles.
- */
-static int set_lock_status(struct hotplug_slot *hotplug_slot, u8 status)
-{
-	struct slot *slot = hotplug_slot->private;
-	int retval;
-	u8 value;
-
-	mutex_lock(&slot->ctrl->crit_sect);
-
-	/* has it been >1 sec since our last toggle? */
-	if ((get_seconds() - slot->last_emi_toggle) < 1) {
-		mutex_unlock(&slot->ctrl->crit_sect);
-		return -EINVAL;
-	}
-
-	/* see what our current state is */
-	retval = get_lock_status(hotplug_slot, &value);
-	if (retval || (value == status))
-		goto set_lock_exit;
-
-	slot->hpc_ops->toggle_emi(slot);
-set_lock_exit:
-	mutex_unlock(&slot->ctrl->crit_sect);
-	return 0;
-}
-
-/*
- * sysfs interface which allows the user to toggle the Electro Mechanical
- * Interlock.  Valid values are either 0 or 1.  0 == unlock, 1 == lock
- */
-static ssize_t lock_write_file(struct hotplug_slot *hotplug_slot,
-		const char *buf, size_t count)
-{
-	struct slot *slot = hotplug_slot->private;
-	unsigned long llock;
-	u8 lock;
-	int retval = 0;
-
-	llock = simple_strtoul(buf, NULL, 10);
-	lock = (u8)(llock & 0xff);
-
-	switch (lock) {
-		case 0:
-		case 1:
-			retval = set_lock_status(hotplug_slot, lock);
-			break;
-		default:
-			ctrl_err(slot->ctrl, "%d is an invalid lock value\n",
-				 lock);
-			retval = -EINVAL;
-	}
-	if (retval)
-		return retval;
-	return count;
-}
-
-static struct hotplug_slot_attribute hotplug_slot_attr_lock = {
-	.attr = {.name = "lock", .mode = S_IFREG | S_IRUGO | S_IWUSR},
-	.show = lock_read_file,
-	.store = lock_write_file
-};
-
 /**
  * release_slot - free up the memory used by a slot
  * @hotplug_slot: slot to free
@@ -236,17 +143,6 @@ static int init_slots(struct controller *ctrl)
 		get_attention_status(hotplug_slot, &info->attention_status);
 		get_latch_status(hotplug_slot, &info->latch_status);
 		get_adapter_status(hotplug_slot, &info->adapter_status);
-		/* create additional sysfs entries */
-		if (EMI(ctrl)) {
-			retval = sysfs_create_file(&hotplug_slot->pci_slot->kobj,
-				&hotplug_slot_attr_lock.attr);
-			if (retval) {
-				pci_hp_deregister(hotplug_slot);
-				ctrl_err(ctrl, "Cannot create additional sysfs "
-					 "entries\n");
-				goto error_info;
-			}
-		}
 	}
 
 	return 0;
@@ -261,13 +157,8 @@ error:
 static void cleanup_slots(struct controller *ctrl)
 {
 	struct slot *slot;
-
-	list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
-		if (EMI(ctrl))
-			sysfs_remove_file(&slot->hotplug_slot->pci_slot->kobj,
-				&hotplug_slot_attr_lock.attr);
+	list_for_each_entry(slot, &ctrl->slot_list, slot_list)
 		pci_hp_deregister(slot->hotplug_slot);
-	}
 }
 
 /*
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 07bd3215114..52813257e5b 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -422,35 +422,6 @@ static int hpc_query_power_fault(struct slot *slot)
 	return !!(slot_status & PCI_EXP_SLTSTA_PFD);
 }
 
-static int hpc_get_emi_status(struct slot *slot, u8 *status)
-{
-	struct controller *ctrl = slot->ctrl;
-	u16 slot_status;
-	int retval;
-
-	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
-	if (retval) {
-		ctrl_err(ctrl, "Cannot check EMI status\n");
-		return retval;
-	}
-	*status = !!(slot_status & PCI_EXP_SLTSTA_EIS);
-	return retval;
-}
-
-static int hpc_toggle_emi(struct slot *slot)
-{
-	u16 slot_cmd;
-	u16 cmd_mask;
-	int rc;
-
-	slot_cmd = PCI_EXP_SLTCTL_EIC;
-	cmd_mask = PCI_EXP_SLTCTL_EIC;
-	rc = pcie_write_cmd(slot->ctrl, slot_cmd, cmd_mask);
-	slot->last_emi_toggle = get_seconds();
-
-	return rc;
-}
-
 static int hpc_set_attention_status(struct slot *slot, u8 value)
 {
 	struct controller *ctrl = slot->ctrl;
@@ -874,8 +845,6 @@ static struct hpc_ops pciehp_hpc_ops = {
 	.get_attention_status		= hpc_get_attention_status,
 	.get_latch_status		= hpc_get_latch_status,
 	.get_adapter_status		= hpc_get_adapter_status,
-	.get_emi_status			= hpc_get_emi_status,
-	.toggle_emi			= hpc_toggle_emi,
 
 	.get_max_bus_speed		= hpc_get_max_lnk_speed,
 	.get_cur_bus_speed		= hpc_get_cur_lnk_speed,
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 20998746518..11936fd0b56 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -66,14 +66,6 @@ enum pcie_link_speed {
 	PCIE_LNK_SPEED_UNKNOWN	= 0xFF,
 };
 
-struct hotplug_slot;
-struct hotplug_slot_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct hotplug_slot *, char *);
-	ssize_t (*store)(struct hotplug_slot *, const char *, size_t);
-};
-#define to_hotplug_attr(n) container_of(n, struct hotplug_slot_attribute, attr);
-
 /**
  * struct hotplug_slot_ops -the callbacks that the hotplug pci core can use
  * @owner: The module owner of this structure
-- 
cgit v1.2.3-70-g09d2


From c825bc94c8c1908750ab20413eb639c6be029e2d Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Tue, 16 Jun 2009 11:01:25 +0900
Subject: PCI hotplug: create symlink to hotplug driver module

Create symbolic link to hotplug driver module in the PCI slot
directory (/sys/bus/pci/slots/<SLOT#>). In the past, we need to load
hotplug drivers one by one to identify the hotplug driver that handles
the slot, and it was very inconvenient especially for trouble shooting.
With this change, we can easily identify the hotplug driver.

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |  7 ++++++
 drivers/pci/hotplug/pci_hotplug_core.c  | 23 +++++++++++++------
 drivers/pci/slot.c                      | 39 +++++++++++++++++++++++++++++++++
 include/linux/pci.h                     |  5 +++++
 include/linux/pci_hotplug.h             | 15 +++++++++++--
 5 files changed, 80 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 97ad190e13a..6bf68053e4b 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -122,3 +122,10 @@ Description:
 		This symbolic link appears when a device is a Virtual Function.
 		The symbolic link points to the PCI device sysfs entry of the
 		Physical Function this device associates with.
+
+What:		/sys/bus/pci/slots/.../module
+Date:		June 2009
+Contact:	linux-pci@vger.kernel.org
+Description:
+		This symbolic link points to the PCI hotplug controller driver
+		module that manages the hotplug slot.
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index ff32c6b4ae1..844580489d4 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -424,6 +424,9 @@ static int fs_add_slot(struct pci_slot *slot)
 {
 	int retval = 0;
 
+	/* Create symbolic link to the hotplug driver module */
+	pci_hp_create_module_link(slot);
+
 	if (has_power_file(slot)) {
 		retval = sysfs_create_file(&slot->kobj,
 					   &hotplug_slot_attr_power.attr);
@@ -498,6 +501,7 @@ exit_attention:
 	if (has_power_file(slot))
 		sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_power.attr);
 exit_power:
+	pci_hp_remove_module_link(slot);
 exit:
 	return retval;
 }
@@ -528,6 +532,8 @@ static void fs_remove_slot(struct pci_slot *slot)
 
 	if (has_test_file(slot))
 		sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_test.attr);
+
+	pci_hp_remove_module_link(slot);
 }
 
 static struct hotplug_slot *get_slot_from_name (const char *name)
@@ -544,10 +550,10 @@ static struct hotplug_slot *get_slot_from_name (const char *name)
 }
 
 /**
- * pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem
+ * __pci_hp_register - register a hotplug_slot with the PCI hotplug subsystem
  * @bus: bus this slot is on
  * @slot: pointer to the &struct hotplug_slot to register
- * @slot_nr: slot number
+ * @devnr: device number
  * @name: name registered with kobject core
  *
  * Registers a hotplug slot with the pci hotplug subsystem, which will allow
@@ -555,8 +561,9 @@ static struct hotplug_slot *get_slot_from_name (const char *name)
  *
  * Returns 0 if successful, anything else for an error.
  */
-int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr,
-			const char *name)
+int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus,
+		      int devnr, const char *name,
+		      struct module *owner, const char *mod_name)
 {
 	int result;
 	struct pci_slot *pci_slot;
@@ -571,14 +578,16 @@ int pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus, int slot_nr,
 		return -EINVAL;
 	}
 
-	mutex_lock(&pci_hp_mutex);
+	slot->ops->owner = owner;
+	slot->ops->mod_name = mod_name;
 
+	mutex_lock(&pci_hp_mutex);
 	/*
 	 * No problems if we call this interface from both ACPI_PCI_SLOT
 	 * driver and call it here again. If we've already created the
 	 * pci_slot, the interface will simply bump the refcount.
 	 */
-	pci_slot = pci_create_slot(bus, slot_nr, name, slot);
+	pci_slot = pci_create_slot(bus, devnr, name, slot);
 	if (IS_ERR(pci_slot)) {
 		result = PTR_ERR(pci_slot);
 		goto out;
@@ -688,6 +697,6 @@ MODULE_LICENSE("GPL");
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debugging mode enabled or not");
 
-EXPORT_SYMBOL_GPL(pci_hp_register);
+EXPORT_SYMBOL_GPL(__pci_hp_register);
 EXPORT_SYMBOL_GPL(pci_hp_deregister);
 EXPORT_SYMBOL_GPL(pci_hp_change_slot_info);
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index fe95ce20bcb..eddb0748b0e 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -307,6 +307,45 @@ void pci_destroy_slot(struct pci_slot *slot)
 }
 EXPORT_SYMBOL_GPL(pci_destroy_slot);
 
+#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
+#include <linux/pci_hotplug.h>
+/**
+ * pci_hp_create_link - create symbolic link to the hotplug driver module.
+ * @slot: struct pci_slot
+ *
+ * Helper function for pci_hotplug_core.c to create symbolic link to
+ * the hotplug driver module.
+ */
+void pci_hp_create_module_link(struct pci_slot *pci_slot)
+{
+	struct hotplug_slot *slot = pci_slot->hotplug;
+	struct kobject *kobj = NULL;
+	int no_warn;
+
+	if (!slot || !slot->ops)
+		return;
+	kobj = kset_find_obj(module_kset, slot->ops->mod_name);
+	if (!kobj)
+		return;
+	no_warn = sysfs_create_link(&pci_slot->kobj, kobj, "module");
+	kobject_put(kobj);
+}
+EXPORT_SYMBOL_GPL(pci_hp_create_module_link);
+
+/**
+ * pci_hp_remove_link - remove symbolic link to the hotplug driver module.
+ * @slot: struct pci_slot
+ *
+ * Helper function for pci_hotplug_core.c to remove symbolic link to
+ * the hotplug driver module.
+ */
+void pci_hp_remove_module_link(struct pci_slot *pci_slot)
+{
+	sysfs_remove_link(&pci_slot->kobj, "module");
+}
+EXPORT_SYMBOL_GPL(pci_hp_remove_module_link);
+#endif
+
 static int pci_slot_init(void)
 {
 	struct kset *pci_bus_kset;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ea2a153a912..6a1800ecd95 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1261,5 +1261,10 @@ static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
 }
 #endif
 
+#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
+extern void pci_hp_create_module_link(struct pci_slot *pci_slot);
+extern void pci_hp_remove_module_link(struct pci_slot *pci_slot);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 11936fd0b56..b3646cd7fd5 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -69,6 +69,7 @@ enum pcie_link_speed {
 /**
  * struct hotplug_slot_ops -the callbacks that the hotplug pci core can use
  * @owner: The module owner of this structure
+ * @mod_name: The module name (KBUILD_MODNAME) of this structure
  * @enable_slot: Called when the user wants to enable a specific pci slot
  * @disable_slot: Called when the user wants to disable a specific pci slot
  * @set_attention_status: Called to set the specific slot's attention LED to
@@ -101,6 +102,7 @@ enum pcie_link_speed {
  */
 struct hotplug_slot_ops {
 	struct module *owner;
+	const char *mod_name;
 	int (*enable_slot)		(struct hotplug_slot *slot);
 	int (*disable_slot)		(struct hotplug_slot *slot);
 	int (*set_attention_status)	(struct hotplug_slot *slot, u8 value);
@@ -159,12 +161,21 @@ static inline const char *hotplug_slot_name(const struct hotplug_slot *slot)
 	return pci_slot_name(slot->pci_slot);
 }
 
-extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr,
-			   const char *name);
+extern int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *pbus,
+			     int nr, const char *name,
+			     struct module *owner, const char *mod_name);
 extern int pci_hp_deregister(struct hotplug_slot *slot);
 extern int __must_check pci_hp_change_slot_info	(struct hotplug_slot *slot,
 						 struct hotplug_slot_info *info);
 
+static inline int pci_hp_register(struct hotplug_slot *slot,
+				  struct pci_bus *pbus,
+				  int devnr, const char *name)
+{
+	return __pci_hp_register(slot, pbus, devnr, name,
+				 THIS_MODULE, KBUILD_MODNAME);
+}
+
 /* PCI Setting Record (Type 0) */
 struct hpp_type0 {
 	u32 revision;
-- 
cgit v1.2.3-70-g09d2


From 70298c6e6c1ba68346336b4ea54bd5c0abbf73c8 Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Tue, 16 Jun 2009 13:34:38 +0800
Subject: PCI AER: support Multiple Error Received and no error source id

Based on PCI Express AER specs, a root port might receive multiple
TLP errors while it could only save a correctable error source id
and an uncorrectable error source id at the same time. In addition,
some root port hardware might be unable to provide a correct source
id, i.e., the source id, or the bus id part of the source id provided
by root port might be equal to 0.

The patchset implements the support in kernel by searching the device
tree under the root port.

Patch 1 changes parameter cb of function pci_walk_bus to return a value.
When cb return non-zero, pci_walk_bus stops more searching on the
device tree.

Reviewed-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/platforms/pseries/eeh_driver.c | 38 ++++++++++++++++++-----------
 drivers/pci/bus.c                           | 11 +++++++--
 drivers/pci/pcie/aer/aerdrv_core.c          | 30 ++++++++++++-----------
 include/linux/pci.h                         |  2 +-
 4 files changed, 50 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 9a2a6e32f00..0e8db677125 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -122,7 +122,7 @@ static void eeh_enable_irq(struct pci_dev *dev)
  * passed back in "userdata".
  */
 
-static void eeh_report_error(struct pci_dev *dev, void *userdata)
+static int eeh_report_error(struct pci_dev *dev, void *userdata)
 {
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver = dev->driver;
@@ -130,19 +130,21 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata)
 	dev->error_state = pci_channel_io_frozen;
 
 	if (!driver)
-		return;
+		return 0;
 
 	eeh_disable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
-		return;
+		return 0;
 
 	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
 
 	/* A driver that needs a reset trumps all others */
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+
+	return 0;
 }
 
 /**
@@ -153,7 +155,7 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata)
  * Cumulative response passed back in "userdata".
  */
 
-static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
 {
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver = dev->driver;
@@ -161,26 +163,28 @@ static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
 	if (!driver ||
 	    !driver->err_handler ||
 	    !driver->err_handler->mmio_enabled)
-		return;
+		return 0;
 
 	rc = driver->err_handler->mmio_enabled (dev);
 
 	/* A driver that needs a reset trumps all others */
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+
+	return 0;
 }
 
 /**
  * eeh_report_reset - tell device that slot has been reset
  */
 
-static void eeh_report_reset(struct pci_dev *dev, void *userdata)
+static int eeh_report_reset(struct pci_dev *dev, void *userdata)
 {
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver = dev->driver;
 
 	if (!driver)
-		return;
+		return 0;
 
 	dev->error_state = pci_channel_io_normal;
 
@@ -188,35 +192,39 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata)
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset)
-		return;
+		return 0;
 
 	rc = driver->err_handler->slot_reset(dev);
 	if ((*res == PCI_ERS_RESULT_NONE) ||
 	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
 	if (*res == PCI_ERS_RESULT_DISCONNECT &&
 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+
+	return 0;
 }
 
 /**
  * eeh_report_resume - tell device to resume normal operations
  */
 
-static void eeh_report_resume(struct pci_dev *dev, void *userdata)
+static int eeh_report_resume(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
 
 	dev->error_state = pci_channel_io_normal;
 
 	if (!driver)
-		return;
+		return 0;
 
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume)
-		return;
+		return 0;
 
 	driver->err_handler->resume(dev);
+
+	return 0;
 }
 
 /**
@@ -226,22 +234,24 @@ static void eeh_report_resume(struct pci_dev *dev, void *userdata)
  * dead, and that no further recovery attempts will be made on it.
  */
 
-static void eeh_report_failure(struct pci_dev *dev, void *userdata)
+static int eeh_report_failure(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
 
 	dev->error_state = pci_channel_io_perm_failure;
 
 	if (!driver)
-		return;
+		return 0;
 
 	eeh_disable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
-		return;
+		return 0;
 
 	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+
+	return 0;
 }
 
 /* ------------------------------------------------------- */
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 40af27f3104..cef28a79103 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -206,13 +206,18 @@ void pci_enable_bridges(struct pci_bus *bus)
  *  Walk the given bus, including any bridged devices
  *  on buses under this bus.  Call the provided callback
  *  on each device found.
+ *
+ *  We check the return of @cb each time. If it returns anything
+ *  other than 0, we break out.
+ *
  */
-void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
+void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 		  void *userdata)
 {
 	struct pci_dev *dev;
 	struct pci_bus *bus;
 	struct list_head *next;
+	int retval;
 
 	bus = top;
 	down_read(&pci_bus_sem);
@@ -236,8 +241,10 @@ void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
 
 		/* Run device routines with the device locked */
 		down(&dev->dev.sem);
-		cb(dev, userdata);
+		retval = cb(dev, userdata);
 		up(&dev->dev.sem);
+		if (retval)
+			break;
 	}
 	up_read(&pci_bus_sem);
 }
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index dd3829e68e3..a7a3919904b 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -109,7 +109,7 @@ int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
 #endif  /*  0  */
 
 
-static void set_device_error_reporting(struct pci_dev *dev, void *data)
+static int set_device_error_reporting(struct pci_dev *dev, void *data)
 {
 	bool enable = *((bool *)data);
 
@@ -124,6 +124,8 @@ static void set_device_error_reporting(struct pci_dev *dev, void *data)
 
 	if (enable)
 		pcie_set_ecrc_checking(dev);
+
+	return 0;
 }
 
 /**
@@ -207,7 +209,7 @@ static struct device* find_source_device(struct pci_dev *parent, u16 id)
 	return NULL;
 }
 
-static void report_error_detected(struct pci_dev *dev, void *data)
+static int report_error_detected(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote;
 	struct pci_error_handlers *err_handler;
@@ -232,16 +234,16 @@ static void report_error_detected(struct pci_dev *dev, void *data)
 				   dev->driver ?
 				   "no AER-aware driver" : "no driver");
 		}
-		return;
+		return 0;
 	}
 
 	err_handler = dev->driver->err_handler;
 	vote = err_handler->error_detected(dev, result_data->state);
 	result_data->result = merge_result(result_data->result, vote);
-	return;
+	return 0;
 }
 
-static void report_mmio_enabled(struct pci_dev *dev, void *data)
+static int report_mmio_enabled(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote;
 	struct pci_error_handlers *err_handler;
@@ -251,15 +253,15 @@ static void report_mmio_enabled(struct pci_dev *dev, void *data)
 	if (!dev->driver ||
 		!dev->driver->err_handler ||
 		!dev->driver->err_handler->mmio_enabled)
-		return;
+		return 0;
 
 	err_handler = dev->driver->err_handler;
 	vote = err_handler->mmio_enabled(dev);
 	result_data->result = merge_result(result_data->result, vote);
-	return;
+	return 0;
 }
 
-static void report_slot_reset(struct pci_dev *dev, void *data)
+static int report_slot_reset(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote;
 	struct pci_error_handlers *err_handler;
@@ -269,15 +271,15 @@ static void report_slot_reset(struct pci_dev *dev, void *data)
 	if (!dev->driver ||
 		!dev->driver->err_handler ||
 		!dev->driver->err_handler->slot_reset)
-		return;
+		return 0;
 
 	err_handler = dev->driver->err_handler;
 	vote = err_handler->slot_reset(dev);
 	result_data->result = merge_result(result_data->result, vote);
-	return;
+	return 0;
 }
 
-static void report_resume(struct pci_dev *dev, void *data)
+static int report_resume(struct pci_dev *dev, void *data)
 {
 	struct pci_error_handlers *err_handler;
 
@@ -286,11 +288,11 @@ static void report_resume(struct pci_dev *dev, void *data)
 	if (!dev->driver ||
 		!dev->driver->err_handler ||
 		!dev->driver->err_handler->resume)
-		return;
+		return 0;
 
 	err_handler = dev->driver->err_handler;
 	err_handler->resume(dev);
-	return;
+	return 0;
 }
 
 /**
@@ -307,7 +309,7 @@ static void report_resume(struct pci_dev *dev, void *data)
 static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
 	enum pci_channel_state state,
 	char *error_mesg,
-	void (*cb)(struct pci_dev *, void *))
+	int (*cb)(struct pci_dev *, void *))
 {
 	struct aer_broadcast_data result_data;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6a1800ecd95..61d9b790d21 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -789,7 +789,7 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
 		    int pass);
 
-void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
+void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 		  void *userdata);
 int pci_cfg_space_size_ext(struct pci_dev *dev);
 int pci_cfg_space_size(struct pci_dev *dev);
-- 
cgit v1.2.3-70-g09d2


From 8c1c699fec9e9021bf6ff0285dee086bb27aec90 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sat, 13 Jun 2009 15:52:13 +0800
Subject: PCI: cleanup Function Level Reset

This patch enhances the FLR functions:
  1) remove disable_irq() so the shared IRQ won't be disabled.
  2) replace the 1s wait with 100, 200 and 400ms wait intervals
     for the Pending Transaction.
  3) replace mdelay() with msleep().
  4) add might_sleep().
  5) lock the device to prevent PM suspend from accessing the CSRs
     during the reset.
  6) coding style fixes.

Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/iov.c   |   4 +-
 drivers/pci/pci.c   | 166 ++++++++++++++++++++++++++--------------------------
 include/linux/pci.h |   2 +-
 3 files changed, 87 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index e87fe95da81..03c7706c0a0 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -110,7 +110,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
 	}
 
 	if (reset)
-		pci_execute_reset_function(virtfn);
+		__pci_reset_function(virtfn);
 
 	pci_device_add(virtfn, virtfn->bus);
 	mutex_unlock(&iov->dev->sriov->lock);
@@ -164,7 +164,7 @@ static void virtfn_remove(struct pci_dev *dev, int id, int reset)
 
 	if (reset) {
 		device_release_driver(&virtfn->dev);
-		pci_execute_reset_function(virtfn);
+		__pci_reset_function(virtfn);
 	}
 
 	sprintf(buf, "virtfn%u", id);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8ea911e5572..6a052ada3fe 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2055,111 +2055,112 @@ int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask)
 EXPORT_SYMBOL(pci_set_dma_seg_boundary);
 #endif
 
-static int __pcie_flr(struct pci_dev *dev, int probe)
+static int pcie_flr(struct pci_dev *dev, int probe)
 {
-	u16 status;
+	int i;
+	int pos;
 	u32 cap;
-	int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	u16 status;
 
-	if (!exppos)
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
 		return -ENOTTY;
-	pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap);
+
+	pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP, &cap);
 	if (!(cap & PCI_EXP_DEVCAP_FLR))
 		return -ENOTTY;
 
 	if (probe)
 		return 0;
 
-	pci_block_user_cfg_access(dev);
-
 	/* Wait for Transaction Pending bit clean */
-	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-	if (!(status & PCI_EXP_DEVSTA_TRPND))
-		goto transaction_done;
+	for (i = 0; i < 4; i++) {
+		if (i)
+			msleep((1 << (i - 1)) * 100);
 
-	msleep(100);
-	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-	if (!(status & PCI_EXP_DEVSTA_TRPND))
-		goto transaction_done;
-
-	dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
-			"sleeping for 1 second\n");
-	ssleep(1);
-	pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
-	if (status & PCI_EXP_DEVSTA_TRPND)
-		dev_info(&dev->dev, "Still busy after 1s; "
-				"proceeding with reset anyway\n");
-
-transaction_done:
-	pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
+		pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &status);
+		if (!(status & PCI_EXP_DEVSTA_TRPND))
+			goto clear;
+	}
+
+	dev_err(&dev->dev, "transaction is not cleared; "
+			"proceeding with reset anyway\n");
+
+clear:
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL,
 				PCI_EXP_DEVCTL_BCR_FLR);
-	mdelay(100);
+	msleep(100);
 
-	pci_unblock_user_cfg_access(dev);
 	return 0;
 }
 
-static int __pci_af_flr(struct pci_dev *dev, int probe)
+static int pci_af_flr(struct pci_dev *dev, int probe)
 {
-	int cappos = pci_find_capability(dev, PCI_CAP_ID_AF);
-	u8 status;
+	int i;
+	int pos;
 	u8 cap;
+	u8 status;
 
-	if (!cappos)
+	pos = pci_find_capability(dev, PCI_CAP_ID_AF);
+	if (!pos)
 		return -ENOTTY;
-	pci_read_config_byte(dev, cappos + PCI_AF_CAP, &cap);
+
+	pci_read_config_byte(dev, pos + PCI_AF_CAP, &cap);
 	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
 		return -ENOTTY;
 
 	if (probe)
 		return 0;
 
-	pci_block_user_cfg_access(dev);
-
 	/* Wait for Transaction Pending bit clean */
-	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
-	if (!(status & PCI_AF_STATUS_TP))
-		goto transaction_done;
+	for (i = 0; i < 4; i++) {
+		if (i)
+			msleep((1 << (i - 1)) * 100);
+
+		pci_read_config_byte(dev, pos + PCI_AF_STATUS, &status);
+		if (!(status & PCI_AF_STATUS_TP))
+			goto clear;
+	}
+
+	dev_err(&dev->dev, "transaction is not cleared; "
+			"proceeding with reset anyway\n");
 
+clear:
+	pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
 	msleep(100);
-	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
-	if (!(status & PCI_AF_STATUS_TP))
-		goto transaction_done;
-
-	dev_info(&dev->dev, "Busy after 100ms while trying to"
-			" reset; sleeping for 1 second\n");
-	ssleep(1);
-	pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status);
-	if (status & PCI_AF_STATUS_TP)
-		dev_info(&dev->dev, "Still busy after 1s; "
-				"proceeding with reset anyway\n");
-
-transaction_done:
-	pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
-	mdelay(100);
-
-	pci_unblock_user_cfg_access(dev);
+
 	return 0;
 }
 
-static int __pci_reset_function(struct pci_dev *pdev, int probe)
+static int pci_dev_reset(struct pci_dev *dev, int probe)
 {
-	int res;
+	int rc;
+
+	might_sleep();
+
+	if (!probe) {
+		pci_block_user_cfg_access(dev);
+		/* block PM suspend, driver probe, etc. */
+		down(&dev->dev.sem);
+	}
 
-	res = __pcie_flr(pdev, probe);
-	if (res != -ENOTTY)
-		return res;
+	rc = pcie_flr(dev, probe);
+	if (rc != -ENOTTY)
+		goto done;
 
-	res = __pci_af_flr(pdev, probe);
-	if (res != -ENOTTY)
-		return res;
+	rc = pci_af_flr(dev, probe);
+done:
+	if (!probe) {
+		up(&dev->dev.sem);
+		pci_unblock_user_cfg_access(dev);
+	}
 
-	return res;
+	return rc;
 }
 
 /**
- * pci_execute_reset_function() - Reset a PCI device function
- * @dev: Device function to reset
+ * __pci_reset_function - reset a PCI device function
+ * @dev: PCI device to reset
  *
  * Some devices allow an individual function to be reset without affecting
  * other functions in the same device.  The PCI device must be responsive
@@ -2171,18 +2172,18 @@ static int __pci_reset_function(struct pci_dev *pdev, int probe)
  * device including MSI, bus mastering, BARs, decoding IO and memory spaces,
  * etc.
  *
- * Returns 0 if the device function was successfully reset or -ENOTTY if the
+ * Returns 0 if the device function was successfully reset or negative if the
  * device doesn't support resetting a single function.
  */
-int pci_execute_reset_function(struct pci_dev *dev)
+int __pci_reset_function(struct pci_dev *dev)
 {
-	return __pci_reset_function(dev, 0);
+	return pci_dev_reset(dev, 0);
 }
-EXPORT_SYMBOL_GPL(pci_execute_reset_function);
+EXPORT_SYMBOL_GPL(__pci_reset_function);
 
 /**
- * pci_reset_function() - quiesce and reset a PCI device function
- * @dev: Device function to reset
+ * pci_reset_function - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
  *
  * Some devices allow an individual function to be reset without affecting
  * other functions in the same device.  The PCI device must be responsive
@@ -2190,32 +2191,33 @@ EXPORT_SYMBOL_GPL(pci_execute_reset_function);
  *
  * This function does not just reset the PCI portion of a device, but
  * clears all the state associated with the device.  This function differs
- * from pci_execute_reset_function in that it saves and restores device state
+ * from __pci_reset_function in that it saves and restores device state
  * over the reset.
  *
- * Returns 0 if the device function was successfully reset or -ENOTTY if the
+ * Returns 0 if the device function was successfully reset or negative if the
  * device doesn't support resetting a single function.
  */
 int pci_reset_function(struct pci_dev *dev)
 {
-	int r = __pci_reset_function(dev, 1);
+	int rc;
 
-	if (r < 0)
-		return r;
+	rc = pci_dev_reset(dev, 1);
+	if (rc)
+		return rc;
 
-	if (!dev->msi_enabled && !dev->msix_enabled && dev->irq != 0)
-		disable_irq(dev->irq);
 	pci_save_state(dev);
 
+	/*
+	 * both INTx and MSI are disabled after the Interrupt Disable bit
+	 * is set and the Bus Master bit is cleared.
+	 */
 	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
 
-	r = pci_execute_reset_function(dev);
+	rc = pci_dev_reset(dev, 0);
 
 	pci_restore_state(dev);
-	if (!dev->msi_enabled && !dev->msix_enabled && dev->irq != 0)
-		enable_irq(dev->irq);
 
-	return r;
+	return rc;
 }
 EXPORT_SYMBOL_GPL(pci_reset_function);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61d9b790d21..91b06be2f01 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -702,8 +702,8 @@ int pcix_get_mmrbc(struct pci_dev *dev);
 int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
 int pcie_get_readrq(struct pci_dev *dev);
 int pcie_set_readrq(struct pci_dev *dev, int rq);
+int __pci_reset_function(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-int pci_execute_reset_function(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
-- 
cgit v1.2.3-70-g09d2


From 5ddc9b100fc96e8f3c6d435cecd9d09e5b9673f9 Mon Sep 17 00:00:00 2001
From: Andy Walls <awalls@radix.net>
Date: Sun, 7 Jun 2009 21:39:03 -0300
Subject: V4L/DVB (11933): tuner-simple, tveeprom: Add Philips FQ1216LME MK3
 analog tuner

Signed-off-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner   |  1 +
 drivers/media/common/tuners/tuner-simple.c | 38 +++++++++++++++++++++---------
 drivers/media/common/tuners/tuner-types.c  | 29 +++++++++++++++++++++++
 drivers/media/video/tveeprom.c             |  4 ++--
 include/media/tuner.h                      |  1 +
 5 files changed, 60 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index 22c1c55979c..be67844074d 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -77,3 +77,4 @@ tuner=76 - Xceive 5000 tuner
 tuner=77 - TCL tuner MF02GIP-5N-E
 tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
 tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
+tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
diff --git a/drivers/media/common/tuners/tuner-simple.c b/drivers/media/common/tuners/tuner-simple.c
index 78412c9c424..a5eb6592abe 100644
--- a/drivers/media/common/tuners/tuner-simple.c
+++ b/drivers/media/common/tuners/tuner-simple.c
@@ -416,6 +416,24 @@ static int simple_std_setup(struct dvb_frontend *fe,
 	return 0;
 }
 
+static int simple_set_aux_byte(struct dvb_frontend *fe, u8 config, u8 aux)
+{
+	struct tuner_simple_priv *priv = fe->tuner_priv;
+	int rc;
+	u8 buffer[2];
+
+	buffer[0] = (config & ~0x38) | 0x18;
+	buffer[1] = aux;
+
+	tuner_dbg("setting aux byte: 0x%02x 0x%02x\n", buffer[0], buffer[1]);
+
+	rc = tuner_i2c_xfer_send(&priv->i2c_props, buffer, 2);
+	if (2 != rc)
+		tuner_warn("i2c i/o error: rc == %d (should be 2)\n", rc);
+
+	return rc == 2 ? 0 : rc;
+}
+
 static int simple_post_tune(struct dvb_frontend *fe, u8 *buffer,
 			    u16 div, u8 config, u8 cb)
 {
@@ -424,17 +442,10 @@ static int simple_post_tune(struct dvb_frontend *fe, u8 *buffer,
 
 	switch (priv->type) {
 	case TUNER_LG_TDVS_H06XF:
-		/* Set the Auxiliary Byte. */
-		buffer[0] = buffer[2];
-		buffer[0] &= ~0x20;
-		buffer[0] |= 0x18;
-		buffer[1] = 0x20;
-		tuner_dbg("tv 0x%02x 0x%02x\n", buffer[0], buffer[1]);
-
-		rc = tuner_i2c_xfer_send(&priv->i2c_props, buffer, 2);
-		if (2 != rc)
-			tuner_warn("i2c i/o error: rc == %d "
-				   "(should be 2)\n", rc);
+		simple_set_aux_byte(fe, config, 0x20);
+		break;
+	case TUNER_PHILIPS_FQ1216LME_MK3:
+		simple_set_aux_byte(fe, config, 0x60); /* External AGC */
 		break;
 	case TUNER_MICROTUNE_4042FI5:
 	{
@@ -506,6 +517,11 @@ static int simple_radio_bandswitch(struct dvb_frontend *fe, u8 *buffer)
 	case TUNER_THOMSON_DTT761X:
 		buffer[3] = 0x39;
 		break;
+	case TUNER_PHILIPS_FQ1216LME_MK3:
+		tuner_err("This tuner doesn't have FM\n");
+		/* Set the low band for sanity, since it covers 88-108 MHz */
+		buffer[3] = 0x01;
+		break;
 	case TUNER_MICROTUNE_4049FM5:
 	default:
 		buffer[3] = 0xa4;
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index a9ce5b2ce08..6a7f1a417c2 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1279,6 +1279,28 @@ static struct tuner_params tuner_tcl_mf02gip_5n_params[] = {
 	},
 };
 
+/* 80-89 */
+/* --------- TUNER_PHILIPS_FQ1216LME_MK3 -- active loopthrough, no FM ------- */
+
+static struct tuner_params tuner_fq1216lme_mk3_params[] = {
+	{
+		.type   = TUNER_PARAM_TYPE_PAL,
+		.ranges = tuner_fm1216me_mk3_pal_ranges,
+		.count  = ARRAY_SIZE(tuner_fm1216me_mk3_pal_ranges),
+		.cb_first_if_lower_freq = 1, /* not specified, but safe to do */
+		.has_tda9887 = 1, /* TDA9886 */
+		.port1_active = 1,
+		.port2_active = 1,
+		.port2_invert_for_secam_lc = 1,
+		.default_top_low = 4,
+		.default_top_mid = 4,
+		.default_top_high = 4,
+		.default_top_secam_low = 4,
+		.default_top_secam_mid = 4,
+		.default_top_secam_high = 4,
+	},
+};
+
 /* --------------------------------------------------------------------- */
 
 struct tunertype tuners[] = {
@@ -1724,6 +1746,13 @@ struct tunertype tuners[] = {
 		.params = tuner_fm1216mk5_params,
 		.count  = ARRAY_SIZE(tuner_fm1216mk5_params),
 	},
+
+	/* 80-89 */
+	[TUNER_PHILIPS_FQ1216LME_MK3] = { /* PAL/SECAM, Loop-thru, no FM */
+		.name = "Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough",
+		.params = tuner_fq1216lme_mk3_params,
+		.count  = ARRAY_SIZE(tuner_fq1216lme_mk3_params),
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c
index d0de03c6e68..ac02808106c 100644
--- a/drivers/media/video/tveeprom.c
+++ b/drivers/media/video/tveeprom.c
@@ -184,7 +184,7 @@ hauppauge_tuner[] =
 	{ TUNER_ABSENT,        		"Silicon TDA8275C1 8290 FM"},
 	{ TUNER_ABSENT,        		"Thompson DTT757"},
 	/* 80-89 */
-	{ TUNER_PHILIPS_FM1216ME_MK3, 	"Philips FQ1216LME MK3"},
+	{ TUNER_PHILIPS_FQ1216LME_MK3, 	"Philips FQ1216LME MK3"},
 	{ TUNER_LG_PAL_NEW_TAPC, 	"LG TAPC G701D"},
 	{ TUNER_LG_NTSC_NEW_TAPC, 	"LG TAPC H791F"},
 	{ TUNER_LG_PAL_NEW_TAPC, 	"TCL 2002MB 3"},
@@ -229,7 +229,7 @@ hauppauge_tuner[] =
 	{ TUNER_ABSENT,        		"Samsung THPD5222FG30A"},
 	/* 120-129 */
 	{ TUNER_XC2028,        		"Xceive XC3028"},
-	{ TUNER_ABSENT,        		"Philips FQ1216LME MK5"},
+	{ TUNER_PHILIPS_FQ1216LME_MK3,	"Philips FQ1216LME MK5"},
 	{ TUNER_ABSENT,        		"Philips FQD1216LME"},
 	{ TUNER_ABSENT,        		"Conexant CX24118A"},
 	{ TUNER_ABSENT,        		"TCL DMF11WIP"},
diff --git a/include/media/tuner.h b/include/media/tuner.h
index 735a2a94116..cbf97f45fbe 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -125,6 +125,7 @@
 #define TUNER_TCL_MF02GIP_5N		77	/* TCL MF02GIP_5N */
 #define TUNER_PHILIPS_FMD1216MEX_MK3	78
 #define TUNER_PHILIPS_FM1216MK5		79
+#define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3-70-g09d2


From 49809d6a511960e5ccfb85b780894f45ac119065 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 7 Jun 2009 12:10:39 -0300
Subject: V4L/DVB (11970): gspca - ov519: Add support for the ov518 bridge.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jean-Francois Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/ov519.c | 520 +++++++++++++++++++++++++++++++++++---
 include/linux/videodev2.h         |   3 +-
 2 files changed, 488 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/gspca/ov519.c b/drivers/media/video/gspca/ov519.c
index 1fff37b7989..188866ac6ce 100644
--- a/drivers/media/video/gspca/ov519.c
+++ b/drivers/media/video/gspca/ov519.c
@@ -50,6 +50,13 @@ static int i2c_detect_tries = 10;
 struct sd {
 	struct gspca_dev gspca_dev;		/* !! must be the first item */
 
+	char bridge;
+#define BRIDGE_OV511		0
+#define BRIDGE_OV511PLUS	1
+#define BRIDGE_OV518		2
+#define BRIDGE_OV518PLUS	3
+#define BRIDGE_OV519		4
+
 	/* Determined by sensor type */
 	__u8 sif;
 
@@ -87,6 +94,9 @@ static int sd_sethflip(struct gspca_dev *gspca_dev, __s32 val);
 static int sd_gethflip(struct gspca_dev *gspca_dev, __s32 *val);
 static int sd_setvflip(struct gspca_dev *gspca_dev, __s32 val);
 static int sd_getvflip(struct gspca_dev *gspca_dev, __s32 *val);
+static void setbrightness(struct gspca_dev *gspca_dev);
+static void setcontrast(struct gspca_dev *gspca_dev);
+static void setcolors(struct gspca_dev *gspca_dev);
 
 static struct ctrl sd_ctrls[] = {
 	{
@@ -164,7 +174,7 @@ static struct ctrl sd_ctrls[] = {
 	},
 };
 
-static const struct v4l2_pix_format vga_mode[] = {
+static const struct v4l2_pix_format ov519_vga_mode[] = {
 	{320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
 		.bytesperline = 320,
 		.sizeimage = 320 * 240 * 3 / 8 + 590,
@@ -176,7 +186,7 @@ static const struct v4l2_pix_format vga_mode[] = {
 		.colorspace = V4L2_COLORSPACE_JPEG,
 		.priv = 0},
 };
-static const struct v4l2_pix_format sif_mode[] = {
+static const struct v4l2_pix_format ov519_sif_mode[] = {
 	{176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
 		.bytesperline = 176,
 		.sizeimage = 176 * 144 * 3 / 8 + 590,
@@ -189,6 +199,47 @@ static const struct v4l2_pix_format sif_mode[] = {
 		.priv = 0},
 };
 
+static const struct v4l2_pix_format ov518_vga_mode[] = {
+	{320, 240, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 * 3 / 8 + 590,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 1},
+	{640, 480, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480 * 3 / 8 + 590,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 0},
+};
+static const struct v4l2_pix_format ov518_sif_mode[] = {
+	{176, 144, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE,
+		.bytesperline = 176,
+		.sizeimage = 40000,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 1},
+	{352, 288, V4L2_PIX_FMT_OV518, V4L2_FIELD_NONE,
+		.bytesperline = 352,
+		.sizeimage = 352 * 288 * 3 / 8 + 590,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 0},
+};
+
+
+/* Registers common to OV511 / OV518 */
+#define R51x_SYS_RESET          	0x50
+#define R51x_SYS_INIT         		0x53
+#define R51x_SYS_SNAP			0x52
+#define R51x_SYS_CUST_ID		0x5F
+#define R51x_COMP_LUT_BEGIN		0x80
+
+/* OV511 Camera interface register numbers */
+#define R511_SYS_LED_CTL		0x55	/* OV511+ only */
+#define	OV511_RESET_NOREGS		0x3F	/* All but OV511 & regs */
+
+/* OV518 Camera interface register numbers */
+#define R518_GPIO_OUT			0x56	/* OV518(+) only */
+#define R518_GPIO_CTL			0x57	/* OV518(+) only */
+
 /* OV519 Camera interface register numbers */
 #define OV519_R10_H_SIZE		0x10
 #define OV519_R11_V_SIZE		0x11
@@ -224,6 +275,8 @@ static const struct v4l2_pix_format sif_mode[] = {
 
 /* OV7610 registers */
 #define OV7610_REG_GAIN		0x00	/* gain setting (5:0) */
+#define OV7610_REG_BLUE		0x01	/* blue channel balance */
+#define OV7610_REG_RED		0x02	/* red channel balance */
 #define OV7610_REG_SAT		0x03	/* saturation */
 #define OV8610_REG_HUE		0x04	/* 04 reserved */
 #define OV7610_REG_CNT		0x05	/* Y contrast */
@@ -846,11 +899,12 @@ static unsigned char ov7670_abs_to_sm(unsigned char v)
 static int reg_w(struct sd *sd, __u16 index, __u8 value)
 {
 	int ret;
+	int req = (sd->bridge <= BRIDGE_OV511PLUS) ? 2 : 1;
 
 	sd->gspca_dev.usb_buf[0] = value;
 	ret = usb_control_msg(sd->gspca_dev.dev,
 			usb_sndctrlpipe(sd->gspca_dev.dev, 0),
-			1,			/* REQ_IO (ov518/519) */
+			req,
 			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 			0, index,
 			sd->gspca_dev.usb_buf, 1, 500);
@@ -864,10 +918,11 @@ static int reg_w(struct sd *sd, __u16 index, __u8 value)
 static int reg_r(struct sd *sd, __u16 index)
 {
 	int ret;
+	int req = (sd->bridge <= BRIDGE_OV511PLUS) ? 3 : 1;
 
 	ret = usb_control_msg(sd->gspca_dev.dev,
 			usb_rcvctrlpipe(sd->gspca_dev.dev, 0),
-			1,			/* REQ_IO */
+			req,
 			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 			0, index, sd->gspca_dev.usb_buf, 1, 500);
 
@@ -923,6 +978,28 @@ static int reg_w_mask(struct sd *sd,
 	return reg_w(sd, index, value);
 }
 
+/*
+ * Writes multiple (n) byte value to a single register. Only valid with certain
+ * registers (0x30 and 0xc4 - 0xce).
+ */
+static int ov518_reg_w32(struct sd *sd, __u16 index, u32 value, int n)
+{
+	int ret;
+
+	*((u32 *)sd->gspca_dev.usb_buf) = __cpu_to_le32(value);
+
+	ret = usb_control_msg(sd->gspca_dev.dev,
+			usb_sndctrlpipe(sd->gspca_dev.dev, 0),
+			1 /* REG_IO */,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			0, index,
+			sd->gspca_dev.usb_buf, n, 500);
+	if (ret < 0)
+		PDEBUG(D_ERR, "Write reg32 [%02x] %08x failed", index, value);
+	return ret;
+}
+
+
 /*
  * The OV518 I2C I/O procedure is different, hence, this function.
  * This is normally only called from i2c_w(). Note that this function
@@ -1014,20 +1091,47 @@ static inline int ov51x_stop(struct sd *sd)
 {
 	PDEBUG(D_STREAM, "stopping");
 	sd->stopped = 1;
-	return reg_w(sd, OV519_SYS_RESET1, 0x0f);
+	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		return reg_w(sd, R51x_SYS_RESET, 0x3d);
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		return reg_w_mask(sd, R51x_SYS_RESET, 0x3a, 0x3a);
+	case BRIDGE_OV519:
+		return reg_w(sd, OV519_SYS_RESET1, 0x0f);
+	}
+
+	return 0;
 }
 
 /* Restarts OV511 after ov511_stop() is called. Has no effect if it is not
  * actually stopped (for performance). */
 static inline int ov51x_restart(struct sd *sd)
 {
+	int rc;
+
 	PDEBUG(D_STREAM, "restarting");
 	if (!sd->stopped)
 		return 0;
 	sd->stopped = 0;
 
 	/* Reinitialize the stream */
-	return reg_w(sd, OV519_SYS_RESET1, 0x00);
+	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		return reg_w(sd, R51x_SYS_RESET, 0x00);
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		rc = reg_w(sd, 0x2f, 0x80);
+		if (rc < 0)
+			return rc;
+		return reg_w(sd, R51x_SYS_RESET, 0x00);
+	case BRIDGE_OV519:
+		return reg_w(sd, OV519_SYS_RESET1, 0x00);
+	}
+
+	return 0;
 }
 
 /* This does an initial reset of an OmniVision sensor and ensures that I2C
@@ -1287,16 +1391,161 @@ static int ov6xx0_configure(struct sd *sd)
 /* Turns on or off the LED. Only has an effect with OV511+/OV518(+)/OV519 */
 static void ov51x_led_control(struct sd *sd, int on)
 {
-	reg_w_mask(sd, OV519_GPIO_DATA_OUT0, !on, 1);	/* 0 / 1 */
+	switch (sd->bridge) {
+	/* OV511 has no LED control */
+	case BRIDGE_OV511PLUS:
+		reg_w(sd, R511_SYS_LED_CTL, on ? 1 : 0);
+		break;
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		reg_w_mask(sd, R518_GPIO_OUT, on ? 0x02 : 0x00, 0x02);
+		break;
+	case BRIDGE_OV519:
+		reg_w_mask(sd, OV519_GPIO_DATA_OUT0, !on, 1);	/* 0 / 1 */
+		break;
+	}
 }
 
-/* this function is called at probe time */
-static int sd_config(struct gspca_dev *gspca_dev,
-			const struct usb_device_id *id)
+/* OV518 quantization tables are 8x4 (instead of 8x8) */
+static int ov518_upload_quan_tables(struct sd *sd)
+{
+	const unsigned char yQuanTable518[] = {
+		5, 4, 5, 6, 6, 7, 7, 7,
+		5, 5, 5, 5, 6, 7, 7, 7,
+		6, 6, 6, 6, 7, 7, 7, 8,
+		7, 7, 6, 7, 7, 7, 8, 8
+	};
+
+	const unsigned char uvQuanTable518[] = {
+		6, 6, 6, 7, 7, 7, 7, 7,
+		6, 6, 6, 7, 7, 7, 7, 7,
+		6, 6, 6, 7, 7, 7, 7, 8,
+		7, 7, 7, 7, 7, 7, 8, 8
+	};
+
+	const unsigned char *pYTable = yQuanTable518;
+	const unsigned char *pUVTable = uvQuanTable518;
+	unsigned char val0, val1;
+	int i, rc, reg = R51x_COMP_LUT_BEGIN;
+
+	PDEBUG(D_PROBE, "Uploading quantization tables");
+
+	for (i = 0; i < 16; i++) {
+		val0 = *pYTable++;
+		val1 = *pYTable++;
+		val0 &= 0x0f;
+		val1 &= 0x0f;
+		val0 |= val1 << 4;
+		rc = reg_w(sd, reg, val0);
+		if (rc < 0)
+			return rc;
+
+		val0 = *pUVTable++;
+		val1 = *pUVTable++;
+		val0 &= 0x0f;
+		val1 &= 0x0f;
+		val0 |= val1 << 4;
+		rc = reg_w(sd, reg + 16, val0);
+		if (rc < 0)
+			return rc;
+
+		reg++;
+	}
+
+	return 0;
+}
+
+/* This initializes the OV518/OV518+ and the sensor */
+static int ov518_configure(struct gspca_dev *gspca_dev)
 {
 	struct sd *sd = (struct sd *) gspca_dev;
-	struct cam *cam;
+	int rc;
+
+	/* For 518 and 518+ */
+	static struct ov_regvals init_518[] = {
+		{ R51x_SYS_RESET,	0x40 },
+		{ R51x_SYS_INIT,	0xe1 },
+		{ R51x_SYS_RESET,	0x3e },
+		{ R51x_SYS_INIT,	0xe1 },
+		{ R51x_SYS_RESET,	0x00 },
+		{ R51x_SYS_INIT,	0xe1 },
+		{ 0x46,			0x00 },
+		{ 0x5d,			0x03 },
+	};
+
+	static struct ov_regvals norm_518[] = {
+		{ R51x_SYS_SNAP,	0x02 }, /* Reset */
+		{ R51x_SYS_SNAP,	0x01 }, /* Enable */
+		{ 0x31, 		0x0f },
+		{ 0x5d,			0x03 },
+		{ 0x24,			0x9f },
+		{ 0x25,			0x90 },
+		{ 0x20,			0x00 },
+		{ 0x51,			0x04 },
+		{ 0x71,			0x19 },
+		{ 0x2f,			0x80 },
+	};
+
+	static struct ov_regvals norm_518_p[] = {
+		{ R51x_SYS_SNAP,	0x02 }, /* Reset */
+		{ R51x_SYS_SNAP,	0x01 }, /* Enable */
+		{ 0x31, 		0x0f },
+		{ 0x5d,			0x03 },
+		{ 0x24,			0x9f },
+		{ 0x25,			0x90 },
+		{ 0x20,			0x60 },
+		{ 0x51,			0x02 },
+		{ 0x71,			0x19 },
+		{ 0x40,			0xff },
+		{ 0x41,			0x42 },
+		{ 0x46,			0x00 },
+		{ 0x33,			0x04 },
+		{ 0x21,			0x19 },
+		{ 0x3f,			0x10 },
+		{ 0x2f,			0x80 },
+	};
+
+	/* First 5 bits of custom ID reg are a revision ID on OV518 */
+	PDEBUG(D_PROBE, "Device revision %d",
+	       0x1F & reg_r(sd, R51x_SYS_CUST_ID));
+
+	rc = write_regvals(sd, init_518, ARRAY_SIZE(init_518));
+	if (rc < 0)
+		return rc;
+
+	/* Set LED GPIO pin to output mode */
+	rc = reg_w_mask(sd, R518_GPIO_CTL, 0x00, 0x02);
+	if (rc < 0)
+		return rc;
 
+	switch (sd->bridge) {
+	case BRIDGE_OV518:
+		rc = write_regvals(sd, norm_518, ARRAY_SIZE(norm_518));
+		if (rc < 0)
+			return rc;
+		break;
+	case BRIDGE_OV518PLUS:
+		rc = write_regvals(sd, norm_518_p, ARRAY_SIZE(norm_518_p));
+		if (rc < 0)
+			return rc;
+		break;
+	}
+
+	rc = ov518_upload_quan_tables(sd);
+	if (rc < 0) {
+		PDEBUG(D_ERR, "Error uploading quantization tables");
+		return rc;
+	}
+
+	rc = reg_w(sd, 0x2f, 0x80);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+static int ov519_configure(struct sd *sd)
+{
 	static const struct ov_regvals init_519[] = {
 		{ 0x5a,  0x6d }, /* EnableSystem */
 		{ 0x53,  0x9b },
@@ -1313,8 +1562,32 @@ static int sd_config(struct gspca_dev *gspca_dev,
 		/* windows reads 0x55 at this point*/
 	};
 
-	if (write_regvals(sd, init_519, ARRAY_SIZE(init_519)))
+	return write_regvals(sd, init_519, ARRAY_SIZE(init_519));
+}
+
+/* this function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct cam *cam;
+	int ret = 0;
+
+	sd->bridge = id->driver_info;
+
+	switch (sd->bridge) {
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		ret = ov518_configure(gspca_dev);
+		break;
+	case BRIDGE_OV519:
+		ret = ov519_configure(sd);
+		break;
+	}
+
+	if (ret)
 		goto error;
+
 	ov51x_led_control(sd, 0);	/* turn LED off */
 
 	/* Test for 76xx */
@@ -1360,12 +1633,26 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	}
 
 	cam = &gspca_dev->cam;
-	if (!sd->sif) {
-		cam->cam_mode = vga_mode;
-		cam->nmodes = ARRAY_SIZE(vga_mode);
-	} else {
-		cam->cam_mode = sif_mode;
-		cam->nmodes = ARRAY_SIZE(sif_mode);
+	switch (sd->bridge) {
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		if (!sd->sif) {
+			cam->cam_mode = ov518_vga_mode;
+			cam->nmodes = ARRAY_SIZE(ov518_vga_mode);
+		} else {
+			cam->cam_mode = ov518_sif_mode;
+			cam->nmodes = ARRAY_SIZE(ov518_sif_mode);
+		}
+		break;
+	case BRIDGE_OV519:
+		if (!sd->sif) {
+			cam->cam_mode = ov519_vga_mode;
+			cam->nmodes = ARRAY_SIZE(ov519_vga_mode);
+		} else {
+			cam->cam_mode = ov519_sif_mode;
+			cam->nmodes = ARRAY_SIZE(ov519_sif_mode);
+		}
+		break;
 	}
 	sd->brightness = BRIGHTNESS_DEF;
 	sd->contrast = CONTRAST_DEF;
@@ -1422,6 +1709,106 @@ static int sd_init(struct gspca_dev *gspca_dev)
 	return 0;
 }
 
+/* Sets up the OV518/OV518+ with the given image parameters
+ *
+ * OV518 needs a completely different approach, until we can figure out what
+ * the individual registers do. Also, only 15 FPS is supported now.
+ *
+ * Do not put any sensor-specific code in here (including I2C I/O functions)
+ */
+static int ov518_mode_init_regs(struct sd *sd)
+{
+	int hsegs, vsegs;
+
+	/******** Set the mode ********/
+
+	reg_w(sd, 0x2b, 0);
+	reg_w(sd, 0x2c, 0);
+	reg_w(sd, 0x2d, 0);
+	reg_w(sd, 0x2e, 0);
+	reg_w(sd, 0x3b, 0);
+	reg_w(sd, 0x3c, 0);
+	reg_w(sd, 0x3d, 0);
+	reg_w(sd, 0x3e, 0);
+
+	if (sd->bridge == BRIDGE_OV518) {
+		/* Set 8-bit (YVYU) input format */
+		reg_w_mask(sd, 0x20, 0x08, 0x08);
+
+		/* Set 12-bit (4:2:0) output format */
+		reg_w_mask(sd, 0x28, 0x80, 0xf0);
+		reg_w_mask(sd, 0x38, 0x80, 0xf0);
+	} else {
+		reg_w(sd, 0x28, 0x80);
+		reg_w(sd, 0x38, 0x80);
+	}
+
+	hsegs = sd->gspca_dev.width / 16;
+	vsegs = sd->gspca_dev.height / 4;
+
+	reg_w(sd, 0x29, hsegs);
+	reg_w(sd, 0x2a, vsegs);
+
+	reg_w(sd, 0x39, hsegs);
+	reg_w(sd, 0x3a, vsegs);
+
+	/* Windows driver does this here; who knows why */
+	reg_w(sd, 0x2f, 0x80);
+
+	/******** Set the framerate (to 30 FPS) ********/
+	if (sd->bridge == BRIDGE_OV518PLUS)
+		sd->clockdiv = 1;
+	else
+		sd->clockdiv = 0;
+
+	/* Mode independent, but framerate dependent, regs */
+	reg_w(sd, 0x51, 0x04);	/* Clock divider; lower==faster */
+	reg_w(sd, 0x22, 0x18);
+	reg_w(sd, 0x23, 0xff);
+
+	if (sd->bridge == BRIDGE_OV518PLUS)
+		reg_w(sd, 0x21, 0x19);
+	else
+		reg_w(sd, 0x71, 0x17);	/* Compression-related? */
+
+	/* FIXME: Sensor-specific */
+	/* Bit 5 is what matters here. Of course, it is "reserved" */
+	i2c_w(sd, 0x54, 0x23);
+
+	reg_w(sd, 0x2f, 0x80);
+
+	if (sd->bridge == BRIDGE_OV518PLUS) {
+		reg_w(sd, 0x24, 0x94);
+		reg_w(sd, 0x25, 0x90);
+		ov518_reg_w32(sd, 0xc4,    400, 2);	/* 190h   */
+		ov518_reg_w32(sd, 0xc6,    540, 2);	/* 21ch   */
+		ov518_reg_w32(sd, 0xc7,    540, 2);	/* 21ch   */
+		ov518_reg_w32(sd, 0xc8,    108, 2);	/* 6ch    */
+		ov518_reg_w32(sd, 0xca, 131098, 3);	/* 2001ah */
+		ov518_reg_w32(sd, 0xcb,    532, 2);	/* 214h   */
+		ov518_reg_w32(sd, 0xcc,   2400, 2);	/* 960h   */
+		ov518_reg_w32(sd, 0xcd,     32, 2);	/* 20h    */
+		ov518_reg_w32(sd, 0xce,    608, 2);	/* 260h   */
+	} else {
+		reg_w(sd, 0x24, 0x9f);
+		reg_w(sd, 0x25, 0x90);
+		ov518_reg_w32(sd, 0xc4,    400, 2);	/* 190h   */
+		ov518_reg_w32(sd, 0xc6,    381, 2);	/* 17dh   */
+		ov518_reg_w32(sd, 0xc7,    381, 2);	/* 17dh   */
+		ov518_reg_w32(sd, 0xc8,    128, 2);	/* 80h    */
+		ov518_reg_w32(sd, 0xca, 183331, 3);	/* 2cc23h */
+		ov518_reg_w32(sd, 0xcb,    746, 2);	/* 2eah   */
+		ov518_reg_w32(sd, 0xcc,   1750, 2);	/* 6d6h   */
+		ov518_reg_w32(sd, 0xcd,     45, 2);	/* 2dh    */
+		ov518_reg_w32(sd, 0xce,    851, 2);	/* 353h   */
+	}
+
+	reg_w(sd, 0x2f, 0x80);
+
+	return 0;
+}
+
+
 /* Sets up the OV519 with the given image parameters
  *
  * OV519 needs a completely different approach, until we can figure out what
@@ -1740,6 +2127,11 @@ static int set_ov_sensor_window(struct sd *sd)
 		hwebase = 0x3a;
 		vwsbase = 0x05;
 		vwebase = 0x06;
+		if (qvga) {
+			/* HDG: this fixes U and V getting swapped */
+			hwsbase--;
+			vwsbase--;
+		}
 		break;
 	case SEN_OV7620:
 		hwsbase = 0x2f;		/* From 7620.SET (spec is wrong) */
@@ -1855,15 +2247,28 @@ static int set_ov_sensor_window(struct sd *sd)
 static int sd_start(struct gspca_dev *gspca_dev)
 {
 	struct sd *sd = (struct sd *) gspca_dev;
-	int ret;
+	int ret = 0;
 
-	ret = ov519_mode_init_regs(sd);
+	switch (sd->bridge) {
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		ret = ov518_mode_init_regs(sd);
+		break;
+	case BRIDGE_OV519:
+		ret = ov519_mode_init_regs(sd);
+		break;
+	}
 	if (ret < 0)
 		goto out;
+
 	ret = set_ov_sensor_window(sd);
 	if (ret < 0)
 		goto out;
 
+	setcontrast(gspca_dev);
+	setbrightness(gspca_dev);
+	setcolors(gspca_dev);
+
 	ret = ov51x_restart(sd);
 	if (ret < 0)
 		goto out;
@@ -1882,7 +2287,30 @@ static void sd_stopN(struct gspca_dev *gspca_dev)
 	ov51x_led_control(sd, 0);
 }
 
-static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+static void ov518_pkt_scan(struct gspca_dev *gspca_dev,
+			struct gspca_frame *frame,	/* target */
+			__u8 *data,			/* isoc packet */
+			int len)			/* iso packet length */
+{
+	PDEBUG(D_STREAM, "ov518_pkt_scan: %d bytes", len);
+
+	if (len & 7) {
+		len--;
+		PDEBUG(D_STREAM, "packet number: %d\n", (int)data[len]);
+	}
+
+	/* A false positive here is likely, until OVT gives me
+	 * the definitive SOF/EOF format */
+	if ((!(data[0] | data[1] | data[2] | data[3] | data[5])) && data[6]) {
+		gspca_frame_add(gspca_dev, LAST_PACKET, frame, data, 0);
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame, data, 0);
+	}
+
+	/* intermediate packet */
+	gspca_frame_add(gspca_dev, INTER_PACKET, frame, data, len);
+}
+
+static void ov519_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
@@ -1926,6 +2354,27 @@ static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			data, len);
 }
 
+static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+			struct gspca_frame *frame,	/* target */
+			__u8 *data,			/* isoc packet */
+			int len)			/* iso packet length */
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		break;
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+		ov518_pkt_scan(gspca_dev, frame, data, len);
+		break;
+	case BRIDGE_OV519:
+		ov519_pkt_scan(gspca_dev, frame, data, len);
+		break;
+	}
+}
+
 /* -- management routines -- */
 
 static void setbrightness(struct gspca_dev *gspca_dev)
@@ -1970,6 +2419,7 @@ static void setcontrast(struct gspca_dev *gspca_dev)
 		break;
 	case SEN_OV6630:
 		i2c_w_mask(sd, OV7610_REG_CNT, val >> 4, 0x0f);
+		break;
 	case SEN_OV8610: {
 		static const __u8 ctab[] = {
 			0x03, 0x09, 0x0b, 0x0f, 0x53, 0x6f, 0x35, 0x7f
@@ -2136,19 +2586,21 @@ static const struct sd_desc sd_desc = {
 
 /* -- module initialisation -- */
 static const __devinitdata struct usb_device_id device_table[] = {
-	{USB_DEVICE(0x041e, 0x4052)},
-	{USB_DEVICE(0x041e, 0x405f)},
-	{USB_DEVICE(0x041e, 0x4060)},
-	{USB_DEVICE(0x041e, 0x4061)},
-	{USB_DEVICE(0x041e, 0x4064)},
-	{USB_DEVICE(0x041e, 0x4068)},
-	{USB_DEVICE(0x045e, 0x028c)},
-	{USB_DEVICE(0x054c, 0x0154)},
-	{USB_DEVICE(0x054c, 0x0155)},
-	{USB_DEVICE(0x05a9, 0x0519)},
-	{USB_DEVICE(0x05a9, 0x0530)},
-	{USB_DEVICE(0x05a9, 0x4519)},
-	{USB_DEVICE(0x05a9, 0x8519)},
+	{USB_DEVICE(0x041e, 0x4052), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x041e, 0x405f), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x041e, 0x4060), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x041e, 0x4061), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x041e, 0x4064), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x041e, 0x4068), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x045e, 0x028c), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x054c, 0x0154), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x054c, 0x0155), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0x0518), .driver_info = BRIDGE_OV518 },
+	{USB_DEVICE(0x05a9, 0x0519), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0x0530), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0x4519), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0x8519), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0xa518), .driver_info = BRIDGE_OV518PLUS },
 	{}
 };
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index ebb2ea6b499..f24eceecc5a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -347,7 +347,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_MR97310A v4l2_fourcc('M', '3', '1', '0') /* compressed BGGR bayer */
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
-#define V4L2_PIX_FMT_YVYU    v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16  YVU 4:2:2     */
+#define V4L2_PIX_FMT_YVYU     v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
+#define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3-70-g09d2


From ecfe0cfa3cae9a8402df12d81b159d851b61cf29 Mon Sep 17 00:00:00 2001
From: Uri Shkolnik <uris@siano-ms.com>
Date: Thu, 12 Mar 2009 10:10:40 -0300
Subject: V4L/DVB (11239): sdio: add cards ids for sms (Siano Mobile Silicon)
 MDTV receivers

sdio: add cards id for sms (Siano Mobile Silicon) MDTV receivers

Add SDIO vendor ID, and multiple device IDs for
various SMS-based MDTV SDIO adapters.

Signed-off-by: Uri Shkolnik <uris@siano-ms.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/mmc/sdio_ids.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index c7211ab6dd4..39751c8cde9 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -28,4 +28,12 @@
 #define SDIO_DEVICE_ID_MARVELL_8688WLAN		0x9104
 #define SDIO_DEVICE_ID_MARVELL_8688BT		0x9105
 
+#define SDIO_VENDOR_ID_SIANO			0x039a
+#define SDIO_DEVICE_ID_SIANO_NOVA_B0		0x0201
+#define SDIO_DEVICE_ID_SIANO_NICE		0x0202
+#define SDIO_DEVICE_ID_SIANO_VEGA_A0		0x0300
+#define SDIO_DEVICE_ID_SIANO_VENICE		0x0301
+#define SDIO_DEVICE_ID_SIANO_NOVA_A0		0x1100
+#define SDIO_DEVICE_ID_SIANO_STELLAR 		0x5347
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 7d9a73f6dcf4390d256bf19330c81e91523a26d5 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 17 Jun 2009 00:16:15 +0200
Subject: PCI PM: consistently use type bool for wake enable variable

Other functions use type bool, so use that for pci_enable_wake as well.

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 2 +-
 include/linux/pci.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7b59fd7c957..ccc0a0ccbef 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1205,7 +1205,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
  * Error code depending on the platform is returned if both the platform and
  * the native mechanism fail to enable the generation of wake-up events
  */
-int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable)
+int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 {
 	int error = 0;
 	bool pme_done = false;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 91b06be2f01..62e8452c2ec 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -723,7 +723,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
-int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable);
+int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable);
 int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
-- 
cgit v1.2.3-70-g09d2


From 08604bd9935dc98fb62ef61d5b7baa7ccc10f8c2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Jun 2009 15:31:12 -0700
Subject: time: move PIT_TICK_RATE to linux/timex.h

PIT_TICK_RATE is currently defined in four architectures, but in three
different places.  While linux/timex.h is not the perfect place for it, it
is still a reasonable replacement for those drivers that traditionally use
asm/timex.h to get CLOCK_TICK_RATE and expect it to be the PIT frequency.

Note that for Alpha, the actual value changed from 1193182UL to 1193180UL.
 This is unlikely to make a difference, and probably can only improve
accuracy.  There was a discussion on the correct value of CLOCK_TICK_RATE
a few years ago, after which every existing instance was getting changed
to 1193182.  According to the specification, it should be
1193181.818181...

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Len Brown <lenb@kernel.org>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/8253pit.h   | 7 -------
 arch/alpha/kernel/sys_ruffian.c    | 1 +
 arch/mips/include/asm/i8253.h      | 2 --
 arch/powerpc/include/asm/8253pit.h | 7 -------
 arch/x86/include/asm/timex.h       | 4 +---
 arch/x86/kernel/i8253.c            | 1 +
 arch/x86/kernel/tsc.c              | 1 +
 drivers/clocksource/acpi_pm.c      | 1 +
 drivers/input/joystick/analog.c    | 2 +-
 drivers/input/misc/pcspkr.c        | 1 +
 include/linux/timex.h              | 3 +++
 sound/drivers/pcsp/pcsp.h          | 1 +
 sound/oss/pas2_pcm.c               | 2 +-
 13 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/8253pit.h b/arch/alpha/include/asm/8253pit.h
index fef5c1450e4..a71c9c1455a 100644
--- a/arch/alpha/include/asm/8253pit.h
+++ b/arch/alpha/include/asm/8253pit.h
@@ -1,10 +1,3 @@
 /*
  * 8253/8254 Programmable Interval Timer
  */
-
-#ifndef _8253PIT_H
-#define _8253PIT_H
-
-#define PIT_TICK_RATE 	1193180UL
-
-#endif
diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c
index f15a329b601..d9f9cfeb993 100644
--- a/arch/alpha/kernel/sys_ruffian.c
+++ b/arch/alpha/kernel/sys_ruffian.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/ioport.h>
+#include <linux/timex.h>
 #include <linux/init.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h
index 5dabc870b32..032ca73f181 100644
--- a/arch/mips/include/asm/i8253.h
+++ b/arch/mips/include/asm/i8253.h
@@ -12,8 +12,6 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
-#define PIT_TICK_RATE		1193182UL
-
 extern spinlock_t i8253_lock;
 
 extern void setup_pit_timer(void);
diff --git a/arch/powerpc/include/asm/8253pit.h b/arch/powerpc/include/asm/8253pit.h
index b70d6e53b30..a71c9c1455a 100644
--- a/arch/powerpc/include/asm/8253pit.h
+++ b/arch/powerpc/include/asm/8253pit.h
@@ -1,10 +1,3 @@
-#ifndef _ASM_POWERPC_8253PIT_H
-#define _ASM_POWERPC_8253PIT_H
-
 /*
  * 8253/8254 Programmable Interval Timer
  */
-
-#define PIT_TICK_RATE	1193182UL
-
-#endif	/* _ASM_POWERPC_8253PIT_H */
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index b5c9d45c981..1375cfc9396 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -4,9 +4,7 @@
 #include <asm/processor.h>
 #include <asm/tsc.h>
 
-/* The PIT ticks at this frequency (in HZ): */
-#define PIT_TICK_RATE		1193182
-
+/* Assume we use the PIT time source for the clock tick */
 #define CLOCK_TICK_RATE		PIT_TICK_RATE
 
 #define ARCH_HAS_READ_CURRENT_TIMER
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c2e0bb0890d..5cf36c053ac 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -7,6 +7,7 @@
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
+#include <linux/timex.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3e1c057e98f..ae3180c506a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/clocksource.h>
 #include <linux/percpu.h>
+#include <linux/timex.h>
 
 #include <asm/hpet.h>
 #include <asm/timer.h>
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 40bd8c61c7d..72a633a6ec9 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -18,6 +18,7 @@
 
 #include <linux/acpi_pmtmr.h>
 #include <linux/clocksource.h>
+#include <linux/timex.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/pci.h>
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 356b3a25efa..1c0b529c06a 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -35,7 +35,7 @@
 #include <linux/input.h>
 #include <linux/gameport.h>
 #include <linux/jiffies.h>
-#include <asm/timex.h>
+#include <linux/timex.h>
 
 #define DRIVER_DESC	"Analog joystick and gamepad driver"
 
diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index d6a30cee7bc..6d67af5387a 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/input.h>
 #include <linux/platform_device.h>
+#include <linux/timex.h>
 #include <asm/io.h>
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 9910e3bd5b3..e6967d10d9e 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -280,6 +280,9 @@ extern int do_adjtimex(struct timex *);
 
 int read_current_timer(unsigned long *timer_val);
 
+/* The clock frequency of the i8253/i8254 PIT */
+#define PIT_TICK_RATE 1193182ul
+
 #endif /* KERNEL */
 
 #endif /* LINUX_TIMEX_H */
diff --git a/sound/drivers/pcsp/pcsp.h b/sound/drivers/pcsp/pcsp.h
index cdef2664218..174dd2ff0f2 100644
--- a/sound/drivers/pcsp/pcsp.h
+++ b/sound/drivers/pcsp/pcsp.h
@@ -10,6 +10,7 @@
 #define __PCSP_H__
 
 #include <linux/hrtimer.h>
+#include <linux/timex.h>
 #if defined(CONFIG_MIPS) || defined(CONFIG_X86)
 /* Use the global PIT lock ! */
 #include <asm/i8253.h>
diff --git a/sound/oss/pas2_pcm.c b/sound/oss/pas2_pcm.c
index 36c3ea62086..8f7d175767a 100644
--- a/sound/oss/pas2_pcm.c
+++ b/sound/oss/pas2_pcm.c
@@ -17,7 +17,7 @@
 
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <asm/timex.h>
+#include <linux/timex.h>
 #include "sound_config.h"
 
 #include "pas2.h"
-- 
cgit v1.2.3-70-g09d2


From 3b0fde0fac19c180317eb0601b3504083f4b9bf5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 16 Jun 2009 15:31:16 -0700
Subject: firmware_map: fix hang with x86/32bit

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13484

Peer reported:
| The bug is introduced from kernel 2.6.27, if E820 table reserve the memory
| above 4G in 32bit OS(BIOS-e820: 00000000fff80000 - 0000000120000000
| (reserved)), system will report Int 6 error and hang up. The bug is caused by
| the following code in drivers/firmware/memmap.c, the resource_size_t is 32bit
| variable in 32bit OS, the BUG_ON() will be invoked to result in the Int 6
| error. I try the latest 32bit Ubuntu and Fedora distributions, all hit this
| bug.
|======
|static int firmware_map_add_entry(resource_size_t start, resource_size_t end,
|                  const char *type,
|                  struct firmware_map_entry *entry)

and it only happen with CONFIG_PHYS_ADDR_T_64BIT is not set.

it turns out we need to pass u64 instead of resource_size_t for that.

[akpm@linux-foundation.org: add comment]
Reported-and-tested-by: Peer Chen <pchen@nvidia.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/memmap.c    | 16 +++++++++-------
 include/linux/firmware-map.h | 12 ++++--------
 2 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 05aa2d406ac..d5ea8a68d33 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -31,8 +31,12 @@
  * information is necessary as for the resource tree.
  */
 struct firmware_map_entry {
-	resource_size_t		start;	/* start of the memory range */
-	resource_size_t		end;	/* end of the memory range (incl.) */
+	/*
+	 * start and end must be u64 rather than resource_size_t, because e820
+	 * resources can lie at addresses above 4G.
+	 */
+	u64			start;	/* start of the memory range */
+	u64			end;	/* end of the memory range (incl.) */
 	const char		*type;	/* type of the memory range */
 	struct list_head	list;	/* entry for the linked list */
 	struct kobject		kobj;   /* kobject for each entry */
@@ -101,7 +105,7 @@ static LIST_HEAD(map_entries);
  * Common implementation of firmware_map_add() and firmware_map_add_early()
  * which expects a pre-allocated struct firmware_map_entry.
  **/
-static int firmware_map_add_entry(resource_size_t start, resource_size_t end,
+static int firmware_map_add_entry(u64 start, u64 end,
 				  const char *type,
 				  struct firmware_map_entry *entry)
 {
@@ -132,8 +136,7 @@ static int firmware_map_add_entry(resource_size_t start, resource_size_t end,
  *
  * Returns 0 on success, or -ENOMEM if no memory could be allocated.
  **/
-int firmware_map_add(resource_size_t start, resource_size_t end,
-		     const char *type)
+int firmware_map_add(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
@@ -157,8 +160,7 @@ int firmware_map_add(resource_size_t start, resource_size_t end,
  *
  * Returns 0 on success, or -ENOMEM if no memory could be allocated.
  **/
-int __init firmware_map_add_early(resource_size_t start, resource_size_t end,
-				  const char *type)
+int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index cca686b3912..875451f1373 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -24,21 +24,17 @@
  */
 #ifdef CONFIG_FIRMWARE_MEMMAP
 
-int firmware_map_add(resource_size_t start, resource_size_t end,
-		     const char *type);
-int firmware_map_add_early(resource_size_t start, resource_size_t end,
-			   const char *type);
+int firmware_map_add(u64 start, u64 end, const char *type);
+int firmware_map_add_early(u64 start, u64 end, const char *type);
 
 #else /* CONFIG_FIRMWARE_MEMMAP */
 
-static inline int firmware_map_add(resource_size_t start, resource_size_t end,
-				   const char *type)
+static inline int firmware_map_add(u64 start, u64 end, const char *type)
 {
 	return 0;
 }
 
-static inline int firmware_map_add_early(resource_size_t start,
-					 resource_size_t end, const char *type)
+static inline int firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From bb1f17b0372de93758653ca3454bc0df18dc2e5c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 16 Jun 2009 15:31:18 -0700
Subject: mm: consolidate init_mm definition

* create mm/init-mm.c, move init_mm there
* remove INIT_MM, initialize init_mm with C99 initializer
* unexport init_mm on all arches:

  init_mm is already unexported on x86.

  One strange place is some OMAP driver (drivers/video/omap/) which
  won't build modular, but it's already wants get_vm_area() export.
  Somebody should look there.

[akpm@linux-foundation.org: add missing #includes]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Mike Frysinger <vapier.adi@gmail.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/init_task.c     |  3 ---
 arch/arm/kernel/init_task.c       |  4 ----
 arch/avr32/kernel/init_task.c     |  4 ----
 arch/blackfin/kernel/init_task.c  |  4 ----
 arch/cris/kernel/process.c        |  4 ----
 arch/frv/kernel/init_task.c       |  4 ----
 arch/h8300/kernel/init_task.c     |  4 ----
 arch/ia64/kernel/init_task.c      |  4 ----
 arch/m32r/kernel/init_task.c      |  4 ----
 arch/m68k/kernel/process.c        |  4 ----
 arch/m68knommu/kernel/init_task.c |  4 ----
 arch/mips/kernel/init_task.c      |  4 ----
 arch/mn10300/kernel/init_task.c   |  3 ---
 arch/parisc/kernel/init_task.c    |  4 ----
 arch/powerpc/kernel/init_task.c   |  4 ----
 arch/s390/kernel/init_task.c      |  4 ----
 arch/sh/kernel/init_task.c        |  3 ---
 arch/sparc/kernel/init_task.c     |  3 ---
 arch/um/kernel/init_task.c        |  3 ---
 arch/x86/kernel/init_task.c       |  1 -
 arch/xtensa/kernel/init_task.c    |  4 ----
 include/linux/init_task.h         | 12 ------------
 mm/Makefile                       |  1 +
 mm/init-mm.c                      | 20 ++++++++++++++++++++
 24 files changed, 21 insertions(+), 88 deletions(-)
 create mode 100644 mm/init-mm.c

(limited to 'include')

diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index c2938e574a4..19b86328ffd 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -10,10 +10,7 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
 struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_mm);
 EXPORT_SYMBOL(init_task);
 
 union thread_union init_thread_union
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index e859af34946..3f470866bb8 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -14,10 +14,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 993d56ee3cf..57ec9f2dcd9 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -15,10 +15,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure. Must be aligned on an 8192-byte boundary.
  */
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index 2c228c02097..c26c34de9f3 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -35,10 +35,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-
-struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 4df0b320d52..51dcd04d277 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -38,10 +38,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index 29429a8b7f6..1d3df1d9495 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -12,10 +12,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index cb5dc552da9..089c65ed6eb 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -14,10 +14,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 5b0e830c6f3..c475fc281be 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -19,10 +19,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 016885c6f26..fce57e5d3f9 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -13,10 +13,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index ec37fb56c12..72bad65dba3 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -42,10 +42,6 @@
  */
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 union thread_union init_thread_union
 __attribute__((section(".data.init_task"), aligned(THREAD_SIZE)))
        = { INIT_THREAD_INFO(init_task) };
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index fe282de1d59..45e97a207fe 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -14,10 +14,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index 149cd914526..5b457a40c78 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -11,10 +11,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index 5ac3566f8c9..80d423b80af 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -20,9 +20,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index 1e25a45d64c..82974b20fc1 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -36,10 +36,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 688b329800b..ffc4253fef5 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -9,10 +9,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index 7db95c0b869..fe787f9e5f3 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -18,10 +18,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index 80c35ff71d5..1719957c0a6 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -10,9 +10,6 @@
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct pt_regs fake_swapper_regs;
-struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial thread structure.
  *
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index f28cb8278e9..28125c5b3d3 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -10,10 +10,7 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
 struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_mm);
 EXPORT_SYMBOL(init_task);
 
 /* .text section in head.S is aligned at 8k boundary and this gets linked
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index 806d381947b..b25121b537d 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -10,11 +10,8 @@
 #include "linux/mqueue.h"
 #include "asm/uaccess.h"
 
-struct mm_struct init_mm = INIT_MM(init_mm);
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-EXPORT_SYMBOL(init_mm);
-
 /*
  * Initial task structure.
  *
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index df3bf269bea..270ff83efc1 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -12,7 +12,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
 
 /*
  * Initial thread structure.
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index e07f5c9fcd3..c4302f0e4ba 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -23,10 +23,6 @@
 
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
 union thread_union init_thread_union
 	__attribute__((__section__(".data.init_task"))) =
 { INIT_THREAD_INFO(init_task) };
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 28b1f30601b..5368fbdc780 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,18 +15,6 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
-#define INIT_MM(name) \
-{			 					\
-	.mm_rb		= RB_ROOT,				\
-	.pgd		= swapper_pg_dir, 			\
-	.mm_users	= ATOMIC_INIT(2), 			\
-	.mm_count	= ATOMIC_INIT(1), 			\
-	.mmap_sem	= __RWSEM_INITIALIZER(name.mmap_sem),	\
-	.page_table_lock =  __SPIN_LOCK_UNLOCKED(name.page_table_lock),	\
-	.mmlist		= LIST_HEAD_INIT(name.mmlist),		\
-	.cpu_vm_mask	= CPU_MASK_ALL,				\
-}
-
 #define INIT_SIGNALS(sig) {						\
 	.count		= ATOMIC_INIT(1), 				\
 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
diff --git a/mm/Makefile b/mm/Makefile
index e89acb090b4..cf76be785ad 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -12,6 +12,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
 			   page_isolation.o mm_init.o $(mmu-y)
+obj-y += init-mm.o
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
diff --git a/mm/init-mm.c b/mm/init-mm.c
new file mode 100644
index 00000000000..57aba0da966
--- /dev/null
+++ b/mm/init-mm.c
@@ -0,0 +1,20 @@
+#include <linux/mm_types.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/cpumask.h>
+
+#include <asm/atomic.h>
+#include <asm/pgtable.h>
+
+struct mm_struct init_mm = {
+	.mm_rb		= RB_ROOT,
+	.pgd		= swapper_pg_dir,
+	.mm_users	= ATOMIC_INIT(2),
+	.mm_count	= ATOMIC_INIT(1),
+	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
+	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
+	.cpu_vm_mask	= CPU_MASK_ALL,
+};
-- 
cgit v1.2.3-70-g09d2


From 1ebf26a9b338534def47f307c6c8694b6dfc0a79 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Jun 2009 15:31:19 -0700
Subject: readahead: make mmap_miss an unsigned int

This makes the performance impact of possible mmap_miss wrap around to be
temporary and tolerable: i.e.  MMAP_LOTSAMISS=100 extra readarounds.

Otherwise if ever mmap_miss wraps around to negative, it takes INT_MAX
cache misses to bring it back to normal state.  During the time mmap
readaround will be _enabled_ for whatever wild random workload.  That's
almost permanent performance impact.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ede84fa7da5..8146e0264ef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -879,7 +879,7 @@ struct file_ra_state {
 					   there are only # of pages ahead */
 
 	unsigned int ra_pages;		/* Maximum readahead window */
-	int mmap_miss;			/* Cache miss stat for mmap accesses */
+	unsigned int mmap_miss;		/* Cache miss stat for mmap accesses */
 	loff_t prev_pos;		/* Cache last read() position */
 };
 
-- 
cgit v1.2.3-70-g09d2


From d30a11004e3411909f2448546f036a011978062e Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Jun 2009 15:31:30 -0700
Subject: readahead: record mmap read-around states in file_ra_state

Mmap read-around now shares the same code style and data structure with
readahead code.

This also removes do_page_cache_readahead().  Its last user, mmap
read-around, has been changed to call ra_submit().

The no-readahead-if-congested logic is dumped by the way.  Users will be
pretty sensitive about the slow loading of executables.  So it's
unfavorable to disabled mmap read-around on a congested queue.

[akpm@linux-foundation.org: coding-style fixes]
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  5 +++--
 mm/filemap.c       | 12 +++++++-----
 mm/readahead.c     | 23 ++---------------------
 3 files changed, 12 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad613ed66ab..33da7f53884 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1178,8 +1178,6 @@ void task_dirty_inc(struct task_struct *tsk);
 #define VM_MAX_READAHEAD	128	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 
-int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			pgoff_t offset, unsigned long nr_to_read);
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
 
@@ -1197,6 +1195,9 @@ void page_cache_async_readahead(struct address_space *mapping,
 				unsigned long size);
 
 unsigned long max_sane_readahead(unsigned long nr);
+unsigned long ra_submit(struct file_ra_state *ra,
+			struct address_space *mapping,
+			struct file *filp);
 
 /* Do stack extension */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/mm/filemap.c b/mm/filemap.c
index 5c0c6518f34..734891d0663 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1488,13 +1488,15 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
 	if (ra->mmap_miss > MMAP_LOTSAMISS)
 		return;
 
+	/*
+	 * mmap read-around
+	 */
 	ra_pages = max_sane_readahead(ra->ra_pages);
 	if (ra_pages) {
-		pgoff_t start = 0;
-
-		if (offset > ra_pages / 2)
-			start = offset - ra_pages / 2;
-		do_page_cache_readahead(mapping, file, start, ra_pages);
+		ra->start = max_t(long, 0, offset - ra_pages/2);
+		ra->size = ra_pages;
+		ra->async_size = 0;
+		ra_submit(ra, mapping, file);
 	}
 }
 
diff --git a/mm/readahead.c b/mm/readahead.c
index d7c6e143a12..a7f01fcce9e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -133,15 +133,12 @@ out:
 }
 
 /*
- * do_page_cache_readahead actually reads a chunk of disk.  It allocates all
+ * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates all
  * the pages first, then submits them all for I/O. This avoids the very bad
  * behaviour which would occur if page allocations are causing VM writeback.
  * We really don't want to intermingle reads and writes like that.
  *
  * Returns the number of pages requested, or the maximum amount of I/O allowed.
- *
- * do_page_cache_readahead() returns -1 if it encountered request queue
- * congestion.
  */
 static int
 __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
@@ -231,22 +228,6 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	return ret;
 }
 
-/*
- * This version skips the IO if the queue is read-congested, and will tell the
- * block layer to abandon the readahead if request allocation would block.
- *
- * force_page_cache_readahead() will ignore queue congestion and will block on
- * request queues.
- */
-int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			pgoff_t offset, unsigned long nr_to_read)
-{
-	if (bdi_read_congested(mapping->backing_dev_info))
-		return -1;
-
-	return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0);
-}
-
 /*
  * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
  * sensible upper limit.
@@ -260,7 +241,7 @@ unsigned long max_sane_readahead(unsigned long nr)
 /*
  * Submit IO for the read-ahead request in file_ra_state.
  */
-static unsigned long ra_submit(struct file_ra_state *ra,
+unsigned long ra_submit(struct file_ra_state *ra,
 		       struct address_space *mapping, struct file *filp)
 {
 	int actual;
-- 
cgit v1.2.3-70-g09d2


From dc566127dd161b6c997466a2349ac179527ea89b Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Jun 2009 15:31:32 -0700
Subject: radix-tree: add radix_tree_prev_hole()

The counterpart of radix_tree_next_hole(). To be used by context readahead.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Vladislav Bolkhovitin <vst@vlnb.net>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h |  2 ++
 lib/radix-tree.c           | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 355f6e80db0..c5da7491809 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -167,6 +167,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 			unsigned long first_index, unsigned int max_items);
 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 				unsigned long index, unsigned long max_scan);
+unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan);
 int radix_tree_preload(gfp_t gfp_mask);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *root,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 4bb42a0344e..5301a52cdb4 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -666,6 +666,43 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_next_hole);
 
+/**
+ *	radix_tree_prev_hole    -    find the prev hole (not-present entry)
+ *	@root:		tree root
+ *	@index:		index key
+ *	@max_scan:	maximum range to search
+ *
+ *	Search backwards in the range [max(index-max_scan+1, 0), index]
+ *	for the first hole.
+ *
+ *	Returns: the index of the hole if found, otherwise returns an index
+ *	outside of the set specified (in which case 'index - return >= max_scan'
+ *	will be true). In rare cases of wrap-around, LONG_MAX will be returned.
+ *
+ *	radix_tree_next_hole may be called under rcu_read_lock. However, like
+ *	radix_tree_gang_lookup, this will not atomically search a snapshot of
+ *	the tree at a single point in time. For example, if a hole is created
+ *	at index 10, then subsequently a hole is created at index 5,
+ *	radix_tree_prev_hole covering both indexes may return 5 if called under
+ *	rcu_read_lock.
+ */
+unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+				   unsigned long index, unsigned long max_scan)
+{
+	unsigned long i;
+
+	for (i = 0; i < max_scan; i++) {
+		if (!radix_tree_lookup(root, index))
+			break;
+		index--;
+		if (index == LONG_MAX)
+			break;
+	}
+
+	return index;
+}
+EXPORT_SYMBOL(radix_tree_prev_hole);
+
 static unsigned int
 __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index)
-- 
cgit v1.2.3-70-g09d2


From d2bf6be8ab63aa84e6149aac934649aadf3828b1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Tue, 16 Jun 2009 15:31:39 -0700
Subject: mm: clean up get_user_pages_fast() documentation

Move more documentation for get_user_pages_fast into the new kerneldoc comment.
Add some comments for get_user_pages as well.

Also, move get_user_pages_fast declaration up to get_user_pages. It wasn't
there initially because it was once a static inline function.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Andy Grover <andy.grover@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 20 +++++---------------
 mm/memory.c        | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/util.c          | 16 ++++++++++++----
 3 files changed, 67 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 33da7f53884..a880161a385 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -824,8 +824,11 @@ static inline int handle_mm_fault(struct mm_struct *mm,
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
-		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+			unsigned long start, int len, int write, int force,
+			struct page **pages, struct vm_area_struct **vmas);
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned long offset);
@@ -849,19 +852,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
 
-/*
- * get_user_pages_fast provides equivalent functionality to get_user_pages,
- * operating on current and current->mm (force=0 and doesn't return any vmas).
- *
- * get_user_pages_fast may take mmap_sem and page tables, so no assumptions
- * can be made about locking. get_user_pages_fast is to be implemented in a
- * way that is advantageous (vs get_user_pages()) when the user memory area is
- * already faulted in and present in ptes. However if the pages have to be
- * faulted in, it may turn out to be slightly slower).
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-			struct page **pages);
-
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
diff --git a/mm/memory.c b/mm/memory.c
index 4126dd16778..891bad0613f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1360,6 +1360,56 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	return i;
 }
 
+/**
+ * get_user_pages() - pin user pages in memory
+ * @tsk:	task_struct of target task
+ * @mm:		mm_struct of target mm
+ * @start:	starting user address
+ * @len:	number of pages from start to pin
+ * @write:	whether pages will be written to by the caller
+ * @force:	whether to force write access even if user mapping is
+ *		readonly. This will result in the page being COWed even
+ *		in MAP_SHARED mappings. You do not want this.
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_pages long. Or NULL, if caller
+ *		only intends to ensure the pages are faulted in.
+ * @vmas:	array of pointers to vmas corresponding to each page.
+ *		Or NULL if the caller does not require them.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If len is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with. vmas will only
+ * remain valid while mmap_sem is held.
+ *
+ * Must be called with mmap_sem held for read or write.
+ *
+ * get_user_pages walks a process's page tables and takes a reference to
+ * each struct page that each user address corresponds to at a given
+ * instant. That is, it takes the page that would be accessed if a user
+ * thread accesses the given user virtual address at that instant.
+ *
+ * This does not guarantee that the page exists in the user mappings when
+ * get_user_pages returns, and there may even be a completely different
+ * page there in some cases (eg. if mmapped pagecache has been invalidated
+ * and subsequently re faulted). However it does guarantee that the page
+ * won't be freed completely. And mostly callers simply care that the page
+ * contains data that was valid *at some point in time*. Typically, an IO
+ * or similar operation cannot guarantee anything stronger anyway because
+ * locks can't be held over the syscall boundary.
+ *
+ * If write=0, the page must not be written to. If the page is written to,
+ * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
+ * after the page is finished with, and before put_page is called.
+ *
+ * get_user_pages is typically used for fewer-copy IO operations, to get a
+ * handle on the memory by some means other than accesses via the user virtual
+ * addresses. The pages may be submitted for DMA to devices or accessed via
+ * their kernel linear mapping (via the kmap APIs). Care should be taken to
+ * use the correct cache flushing APIs.
+ *
+ * See also get_user_pages_fast, for performance critical applications.
+ */
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int len, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
diff --git a/mm/util.c b/mm/util.c
index abc65aa7cdf..d5d2213728c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -233,13 +233,21 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
  *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
  * Returns number of pages pinned. This may be fewer than the number
  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno.
+ *
+ * get_user_pages_fast provides equivalent functionality to get_user_pages,
+ * operating on current and current->mm, with force=0 and vma=NULL. However
+ * unlike get_user_pages, it must be called without mmap_sem held.
+ *
+ * get_user_pages_fast may take mmap_sem and page table locks, so no
+ * assumptions can be made about lack of locking. get_user_pages_fast is to be
+ * implemented in a way that is advantageous (vs get_user_pages()) when the
+ * user memory area is already faulted in and present in ptes. However if the
+ * pages have to be faulted in, it may turn out to be slightly slower so
+ * callers need to carefully consider what to use. On many architectures,
+ * get_user_pages_fast simply falls back to get_user_pages.
  */
 int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 				int nr_pages, int write, struct page **pages)
-- 
cgit v1.2.3-70-g09d2


From 58568d2a8215cb6f55caf2332017d7bdff954e1c Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Tue, 16 Jun 2009 15:31:49 -0700
Subject: cpuset,mm: update tasks' mems_allowed in time

Fix allocating page cache/slab object on the unallowed node when memory
spread is set by updating tasks' mems_allowed after its cpuset's mems is
changed.

In order to update tasks' mems_allowed in time, we must modify the code of
memory policy.  Because the memory policy is applied in the process's
context originally.  After applying this patch, one task directly
manipulates anothers mems_allowed, and we use alloc_lock in the
task_struct to protect mems_allowed and memory policy of the task.

But in the fast path, we didn't use lock to protect them, because adding a
lock may lead to performance regression.  But if we don't add a lock,the
task might see no nodes when changing cpuset's mems_allowed to some
non-overlapping set.  In order to avoid it, we set all new allowed nodes,
then clear newly disallowed ones.

[lee.schermerhorn@hp.com:
  The rework of mpol_new() to extract the adjusting of the node mask to
  apply cpuset and mpol flags "context" breaks set_mempolicy() and mbind()
  with MPOL_PREFERRED and a NULL nodemask--i.e., explicit local
  allocation.  Fix this by adding the check for MPOL_PREFERRED and empty
  node mask to mpol_new_mpolicy().

  Remove the now unneeded 'nodes = NULL' from mpol_new().

  Note that mpol_new_mempolicy() is always called with a non-NULL
  'nodes' parameter now that it has been removed from mpol_new().
  Therefore, we don't need to test nodes for NULL before testing it for
  'empty'.  However, just to be extra paranoid, add a VM_BUG_ON() to
  verify this assumption.]
[lee.schermerhorn@hp.com:

  I don't think the function name 'mpol_new_mempolicy' is descriptive
  enough to differentiate it from mpol_new().

  This function applies cpuset set context, usually constraining nodes
  to those allowed by the cpuset.  However, when the 'RELATIVE_NODES flag
  is set, it also translates the nodes.  So I settled on
  'mpol_set_nodemask()', because the comment block for mpol_new() mentions
  that we need to call this function to "set nodes".

  Some additional minor line length, whitespace and typo cleanup.]
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Paul Menage <menage@google.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h |  13 ++--
 include/linux/sched.h  |   8 +--
 init/main.c            |   6 +-
 kernel/cpuset.c        | 184 +++++++++++++------------------------------------
 kernel/kthread.c       |   2 +
 mm/mempolicy.c         | 143 +++++++++++++++++++++++++++-----------
 mm/page_alloc.c        |   5 +-
 7 files changed, 170 insertions(+), 191 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 05ea1dd7d68..a5740fc4d04 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -18,7 +18,6 @@
 
 extern int number_of_cpusets;	/* How many cpusets are defined in system? */
 
-extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
@@ -27,7 +26,6 @@ extern void cpuset_cpus_allowed_locked(struct task_struct *p,
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
-void cpuset_update_task_memory_state(void);
 int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
 
 extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask);
@@ -92,9 +90,13 @@ extern void rebuild_sched_domains(void);
 
 extern void cpuset_print_task_mems_allowed(struct task_struct *p);
 
+static inline void set_mems_allowed(nodemask_t nodemask)
+{
+	current->mems_allowed = nodemask;
+}
+
 #else /* !CONFIG_CPUSETS */
 
-static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
@@ -116,7 +118,6 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 
 #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
 static inline void cpuset_init_current_mems_allowed(void) {}
-static inline void cpuset_update_task_memory_state(void) {}
 
 static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
 {
@@ -188,6 +189,10 @@ static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
 {
 }
 
+static inline void set_mems_allowed(nodemask_t nodemask)
+{
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c900aa53007..1048bf50540 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1318,7 +1318,8 @@ struct task_struct {
 /* Thread group tracking */
    	u32 parent_exec_id;
    	u32 self_exec_id;
-/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
+/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
+ * mempolicy */
 	spinlock_t alloc_lock;
 
 #ifdef CONFIG_GENERIC_HARDIRQS
@@ -1386,8 +1387,7 @@ struct task_struct {
 	cputime_t acct_timexpd;	/* stime + utime since last update */
 #endif
 #ifdef CONFIG_CPUSETS
-	nodemask_t mems_allowed;
-	int cpuset_mems_generation;
+	nodemask_t mems_allowed;	/* Protected by alloc_lock */
 	int cpuset_mem_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
@@ -1410,7 +1410,7 @@ struct task_struct {
 	struct list_head perf_counter_list;
 #endif
 #ifdef CONFIG_NUMA
-	struct mempolicy *mempolicy;
+	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
 	short il_next;
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
diff --git a/init/main.c b/init/main.c
index f6204f712e7..5e0d3f047ea 100644
--- a/init/main.c
+++ b/init/main.c
@@ -670,7 +670,6 @@ asmlinkage void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
-	cpuset_init_early();
 	page_cgroup_init();
 	enable_debug_pagealloc();
 	cpu_hotplug_init();
@@ -867,6 +866,11 @@ static noinline int init_post(void)
 static int __init kernel_init(void * unused)
 {
 	lock_kernel();
+
+	/*
+	 * init can allocate pages on any node
+	 */
+	set_mems_allowed(node_possible_map);
 	/*
 	 * init can run on any cpu.
 	 */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index af5a83d5218..7e75a41bd50 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -97,12 +97,6 @@ struct cpuset {
 
 	struct cpuset *parent;		/* my parent */
 
-	/*
-	 * Copy of global cpuset_mems_generation as of the most
-	 * recent time this cpuset changed its mems_allowed.
-	 */
-	int mems_generation;
-
 	struct fmeter fmeter;		/* memory_pressure filter */
 
 	/* partition number for rebuild_sched_domains() */
@@ -176,27 +170,6 @@ static inline int is_spread_slab(const struct cpuset *cs)
 	return test_bit(CS_SPREAD_SLAB, &cs->flags);
 }
 
-/*
- * Increment this integer everytime any cpuset changes its
- * mems_allowed value.  Users of cpusets can track this generation
- * number, and avoid having to lock and reload mems_allowed unless
- * the cpuset they're using changes generation.
- *
- * A single, global generation is needed because cpuset_attach_task() could
- * reattach a task to a different cpuset, which must not have its
- * generation numbers aliased with those of that tasks previous cpuset.
- *
- * Generations are needed for mems_allowed because one task cannot
- * modify another's memory placement.  So we must enable every task,
- * on every visit to __alloc_pages(), to efficiently check whether
- * its current->cpuset->mems_allowed has changed, requiring an update
- * of its current->mems_allowed.
- *
- * Since writes to cpuset_mems_generation are guarded by the cgroup lock
- * there is no need to mark it atomic.
- */
-static int cpuset_mems_generation;
-
 static struct cpuset top_cpuset = {
 	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
 };
@@ -228,8 +201,9 @@ static struct cpuset top_cpuset = {
  * If a task is only holding callback_mutex, then it has read-only
  * access to cpusets.
  *
- * The task_struct fields mems_allowed and mems_generation may only
- * be accessed in the context of that task, so require no locks.
+ * Now, the task_struct fields mems_allowed and mempolicy may be changed
+ * by other task, we use alloc_lock in the task_struct fields to protect
+ * them.
  *
  * The cpuset_common_file_read() handlers only hold callback_mutex across
  * small pieces of code, such as when reading out possibly multi-word
@@ -349,69 +323,6 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
 		tsk->flags &= ~PF_SPREAD_SLAB;
 }
 
-/**
- * cpuset_update_task_memory_state - update task memory placement
- *
- * If the current tasks cpusets mems_allowed changed behind our
- * backs, update current->mems_allowed, mems_generation and task NUMA
- * mempolicy to the new value.
- *
- * Task mempolicy is updated by rebinding it relative to the
- * current->cpuset if a task has its memory placement changed.
- * Do not call this routine if in_interrupt().
- *
- * Call without callback_mutex or task_lock() held.  May be
- * called with or without cgroup_mutex held.  Thanks in part to
- * 'the_top_cpuset_hack', the task's cpuset pointer will never
- * be NULL.  This routine also might acquire callback_mutex during
- * call.
- *
- * Reading current->cpuset->mems_generation doesn't need task_lock
- * to guard the current->cpuset derefence, because it is guarded
- * from concurrent freeing of current->cpuset using RCU.
- *
- * The rcu_dereference() is technically probably not needed,
- * as I don't actually mind if I see a new cpuset pointer but
- * an old value of mems_generation.  However this really only
- * matters on alpha systems using cpusets heavily.  If I dropped
- * that rcu_dereference(), it would save them a memory barrier.
- * For all other arch's, rcu_dereference is a no-op anyway, and for
- * alpha systems not using cpusets, another planned optimization,
- * avoiding the rcu critical section for tasks in the root cpuset
- * which is statically allocated, so can't vanish, will make this
- * irrelevant.  Better to use RCU as intended, than to engage in
- * some cute trick to save a memory barrier that is impossible to
- * test, for alpha systems using cpusets heavily, which might not
- * even exist.
- *
- * This routine is needed to update the per-task mems_allowed data,
- * within the tasks context, when it is trying to allocate memory
- * (in various mm/mempolicy.c routines) and notices that some other
- * task has been modifying its cpuset.
- */
-
-void cpuset_update_task_memory_state(void)
-{
-	int my_cpusets_mem_gen;
-	struct task_struct *tsk = current;
-	struct cpuset *cs;
-
-	rcu_read_lock();
-	my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
-	rcu_read_unlock();
-
-	if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
-		mutex_lock(&callback_mutex);
-		task_lock(tsk);
-		cs = task_cs(tsk); /* Maybe changed when task not locked */
-		guarantee_online_mems(cs, &tsk->mems_allowed);
-		tsk->cpuset_mems_generation = cs->mems_generation;
-		task_unlock(tsk);
-		mutex_unlock(&callback_mutex);
-		mpol_rebind_task(tsk, &tsk->mems_allowed);
-	}
-}
-
 /*
  * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
  *
@@ -1017,14 +928,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  *    other task, the task_struct mems_allowed that we are hacking
  *    is for our current task, which must allocate new pages for that
  *    migrating memory region.
- *
- *    We call cpuset_update_task_memory_state() before hacking
- *    our tasks mems_allowed, so that we are assured of being in
- *    sync with our tasks cpuset, and in particular, callbacks to
- *    cpuset_update_task_memory_state() from nested page allocations
- *    won't see any mismatch of our cpuset and task mems_generation
- *    values, so won't overwrite our hacked tasks mems_allowed
- *    nodemask.
  */
 
 static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
@@ -1032,22 +935,37 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 {
 	struct task_struct *tsk = current;
 
-	cpuset_update_task_memory_state();
-
-	mutex_lock(&callback_mutex);
 	tsk->mems_allowed = *to;
-	mutex_unlock(&callback_mutex);
 
 	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
 
-	mutex_lock(&callback_mutex);
 	guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
-	mutex_unlock(&callback_mutex);
 }
 
 /*
- * Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
- * nodes if memory_migrate flag is set. Called with cgroup_mutex held.
+ * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
+ * @tsk: the task to change
+ * @newmems: new nodes that the task will be set
+ *
+ * In order to avoid seeing no nodes if the old and new nodes are disjoint,
+ * we structure updates as setting all new allowed nodes, then clearing newly
+ * disallowed ones.
+ *
+ * Called with task's alloc_lock held
+ */
+static void cpuset_change_task_nodemask(struct task_struct *tsk,
+					nodemask_t *newmems)
+{
+	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
+	mpol_rebind_task(tsk, &tsk->mems_allowed);
+	mpol_rebind_task(tsk, newmems);
+	tsk->mems_allowed = *newmems;
+}
+
+/*
+ * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
+ * of it to cpuset's new mems_allowed, and migrate pages to new nodes if
+ * memory_migrate flag is set. Called with cgroup_mutex held.
  */
 static void cpuset_change_nodemask(struct task_struct *p,
 				   struct cgroup_scanner *scan)
@@ -1056,12 +974,19 @@ static void cpuset_change_nodemask(struct task_struct *p,
 	struct cpuset *cs;
 	int migrate;
 	const nodemask_t *oldmem = scan->data;
+	nodemask_t newmems;
+
+	cs = cgroup_cs(scan->cg);
+	guarantee_online_mems(cs, &newmems);
+
+	task_lock(p);
+	cpuset_change_task_nodemask(p, &newmems);
+	task_unlock(p);
 
 	mm = get_task_mm(p);
 	if (!mm)
 		return;
 
-	cs = cgroup_cs(scan->cg);
 	migrate = is_memory_migrate(cs);
 
 	mpol_rebind_mm(mm, &cs->mems_allowed);
@@ -1114,10 +1039,10 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
 /*
  * Handle user request to change the 'mems' memory placement
  * of a cpuset.  Needs to validate the request, update the
- * cpusets mems_allowed and mems_generation, and for each
- * task in the cpuset, rebind any vma mempolicies and if
- * the cpuset is marked 'memory_migrate', migrate the tasks
- * pages to the new memory.
+ * cpusets mems_allowed, and for each task in the cpuset,
+ * update mems_allowed and rebind task's mempolicy and any vma
+ * mempolicies and if the cpuset is marked 'memory_migrate',
+ * migrate the tasks pages to the new memory.
  *
  * Call with cgroup_mutex held.  May take callback_mutex during call.
  * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
@@ -1170,7 +1095,6 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 
 	mutex_lock(&callback_mutex);
 	cs->mems_allowed = trialcs->mems_allowed;
-	cs->mems_generation = cpuset_mems_generation++;
 	mutex_unlock(&callback_mutex);
 
 	update_tasks_nodemask(cs, &oldmem, &heap);
@@ -1434,15 +1358,18 @@ static void cpuset_attach(struct cgroup_subsys *ss,
 
 	if (cs == &top_cpuset) {
 		cpumask_copy(cpus_attach, cpu_possible_mask);
+		to = node_possible_map;
 	} else {
-		mutex_lock(&callback_mutex);
 		guarantee_online_cpus(cs, cpus_attach);
-		mutex_unlock(&callback_mutex);
+		guarantee_online_mems(cs, &to);
 	}
 	err = set_cpus_allowed_ptr(tsk, cpus_attach);
 	if (err)
 		return;
 
+	task_lock(tsk);
+	cpuset_change_task_nodemask(tsk, &to);
+	task_unlock(tsk);
 	cpuset_update_task_spread_flag(cs, tsk);
 
 	from = oldcs->mems_allowed;
@@ -1848,8 +1775,6 @@ static struct cgroup_subsys_state *cpuset_create(
 	struct cpuset *parent;
 
 	if (!cont->parent) {
-		/* This is early initialization for the top cgroup */
-		top_cpuset.mems_generation = cpuset_mems_generation++;
 		return &top_cpuset.css;
 	}
 	parent = cgroup_cs(cont->parent);
@@ -1861,7 +1786,6 @@ static struct cgroup_subsys_state *cpuset_create(
 		return ERR_PTR(-ENOMEM);
 	}
 
-	cpuset_update_task_memory_state();
 	cs->flags = 0;
 	if (is_spread_page(parent))
 		set_bit(CS_SPREAD_PAGE, &cs->flags);
@@ -1870,7 +1794,6 @@ static struct cgroup_subsys_state *cpuset_create(
 	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cpumask_clear(cs->cpus_allowed);
 	nodes_clear(cs->mems_allowed);
-	cs->mems_generation = cpuset_mems_generation++;
 	fmeter_init(&cs->fmeter);
 	cs->relax_domain_level = -1;
 
@@ -1889,8 +1812,6 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
 {
 	struct cpuset *cs = cgroup_cs(cont);
 
-	cpuset_update_task_memory_state();
-
 	if (is_sched_load_balance(cs))
 		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
 
@@ -1911,21 +1832,6 @@ struct cgroup_subsys cpuset_subsys = {
 	.early_init = 1,
 };
 
-/*
- * cpuset_init_early - just enough so that the calls to
- * cpuset_update_task_memory_state() in early init code
- * are harmless.
- */
-
-int __init cpuset_init_early(void)
-{
-	alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT);
-
-	top_cpuset.mems_generation = cpuset_mems_generation++;
-	return 0;
-}
-
-
 /**
  * cpuset_init - initialize cpusets at system boot
  *
@@ -1936,11 +1842,13 @@ int __init cpuset_init(void)
 {
 	int err = 0;
 
+	if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
+		BUG();
+
 	cpumask_setall(top_cpuset.cpus_allowed);
 	nodes_setall(top_cpuset.mems_allowed);
 
 	fmeter_init(&top_cpuset.fmeter);
-	top_cpuset.mems_generation = cpuset_mems_generation++;
 	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
 	top_cpuset.relax_domain_level = -1;
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 41c88fe4050..7fa44133352 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -9,6 +9,7 @@
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/err.h>
+#include <linux/cpuset.h>
 #include <linux/unistd.h>
 #include <linux/file.h>
 #include <linux/module.h>
@@ -236,6 +237,7 @@ int kthreadd(void *unused)
 	ignore_signals(tsk);
 	set_user_nice(tsk, KTHREAD_NICE_LEVEL);
 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
+	set_mems_allowed(node_possible_map);
 
 	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3eb4a6fdc04..46bdf9ddf2b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -182,13 +182,54 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
 	return 0;
 }
 
-/* Create a new policy */
+/*
+ * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if
+ * any, for the new policy.  mpol_new() has already validated the nodes
+ * parameter with respect to the policy mode and flags.  But, we need to
+ * handle an empty nodemask with MPOL_PREFERRED here.
+ *
+ * Must be called holding task's alloc_lock to protect task's mems_allowed
+ * and mempolicy.  May also be called holding the mmap_semaphore for write.
+ */
+static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
+{
+	nodemask_t cpuset_context_nmask;
+	int ret;
+
+	/* if mode is MPOL_DEFAULT, pol is NULL. This is right. */
+	if (pol == NULL)
+		return 0;
+
+	VM_BUG_ON(!nodes);
+	if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
+		nodes = NULL;	/* explicit local allocation */
+	else {
+		if (pol->flags & MPOL_F_RELATIVE_NODES)
+			mpol_relative_nodemask(&cpuset_context_nmask, nodes,
+					       &cpuset_current_mems_allowed);
+		else
+			nodes_and(cpuset_context_nmask, *nodes,
+				  cpuset_current_mems_allowed);
+		if (mpol_store_user_nodemask(pol))
+			pol->w.user_nodemask = *nodes;
+		else
+			pol->w.cpuset_mems_allowed =
+						cpuset_current_mems_allowed;
+	}
+
+	ret = mpol_ops[pol->mode].create(pol,
+				nodes ? &cpuset_context_nmask : NULL);
+	return ret;
+}
+
+/*
+ * This function just creates a new policy, does some check and simple
+ * initialization. You must invoke mpol_set_nodemask() to set nodes.
+ */
 static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
 				  nodemask_t *nodes)
 {
 	struct mempolicy *policy;
-	nodemask_t cpuset_context_nmask;
-	int ret;
 
 	pr_debug("setting mode %d flags %d nodes[0] %lx\n",
 		 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
@@ -210,7 +251,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
 			if (((flags & MPOL_F_STATIC_NODES) ||
 			     (flags & MPOL_F_RELATIVE_NODES)))
 				return ERR_PTR(-EINVAL);
-			nodes = NULL;	/* flag local alloc */
 		}
 	} else if (nodes_empty(*nodes))
 		return ERR_PTR(-EINVAL);
@@ -221,30 +261,6 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
 	policy->mode = mode;
 	policy->flags = flags;
 
-	if (nodes) {
-		/*
-		 * cpuset related setup doesn't apply to local allocation
-		 */
-		cpuset_update_task_memory_state();
-		if (flags & MPOL_F_RELATIVE_NODES)
-			mpol_relative_nodemask(&cpuset_context_nmask, nodes,
-					       &cpuset_current_mems_allowed);
-		else
-			nodes_and(cpuset_context_nmask, *nodes,
-				  cpuset_current_mems_allowed);
-		if (mpol_store_user_nodemask(policy))
-			policy->w.user_nodemask = *nodes;
-		else
-			policy->w.cpuset_mems_allowed =
-						cpuset_mems_allowed(current);
-	}
-
-	ret = mpol_ops[mode].create(policy,
-				nodes ? &cpuset_context_nmask : NULL);
-	if (ret < 0) {
-		kmem_cache_free(policy_cache, policy);
-		return ERR_PTR(ret);
-	}
 	return policy;
 }
 
@@ -324,6 +340,8 @@ static void mpol_rebind_policy(struct mempolicy *pol,
 /*
  * Wrapper for mpol_rebind_policy() that just requires task
  * pointer, and updates task mempolicy.
+ *
+ * Called with task's alloc_lock held.
  */
 
 void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
@@ -600,8 +618,9 @@ static void mpol_set_task_struct_flag(void)
 static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 			     nodemask_t *nodes)
 {
-	struct mempolicy *new;
+	struct mempolicy *new, *old;
 	struct mm_struct *mm = current->mm;
+	int ret;
 
 	new = mpol_new(mode, flags, nodes);
 	if (IS_ERR(new))
@@ -615,20 +634,33 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 	 */
 	if (mm)
 		down_write(&mm->mmap_sem);
-	mpol_put(current->mempolicy);
+	task_lock(current);
+	ret = mpol_set_nodemask(new, nodes);
+	if (ret) {
+		task_unlock(current);
+		if (mm)
+			up_write(&mm->mmap_sem);
+		mpol_put(new);
+		return ret;
+	}
+	old = current->mempolicy;
 	current->mempolicy = new;
 	mpol_set_task_struct_flag();
 	if (new && new->mode == MPOL_INTERLEAVE &&
 	    nodes_weight(new->v.nodes))
 		current->il_next = first_node(new->v.nodes);
+	task_unlock(current);
 	if (mm)
 		up_write(&mm->mmap_sem);
 
+	mpol_put(old);
 	return 0;
 }
 
 /*
  * Return nodemask for policy for get_mempolicy() query
+ *
+ * Called with task's alloc_lock held
  */
 static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
 {
@@ -674,7 +706,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	struct vm_area_struct *vma = NULL;
 	struct mempolicy *pol = current->mempolicy;
 
-	cpuset_update_task_memory_state();
 	if (flags &
 		~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
 		return -EINVAL;
@@ -683,7 +714,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
 			return -EINVAL;
 		*policy = 0;	/* just so it's initialized */
+		task_lock(current);
 		*nmask  = cpuset_current_mems_allowed;
+		task_unlock(current);
 		return 0;
 	}
 
@@ -738,8 +771,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	}
 
 	err = 0;
-	if (nmask)
+	if (nmask) {
+		task_lock(current);
 		get_policy_nodemask(pol, nmask);
+		task_unlock(current);
+	}
 
  out:
 	mpol_cond_put(pol);
@@ -979,6 +1015,14 @@ static long do_mbind(unsigned long start, unsigned long len,
 			return err;
 	}
 	down_write(&mm->mmap_sem);
+	task_lock(current);
+	err = mpol_set_nodemask(new, nmask);
+	task_unlock(current);
+	if (err) {
+		up_write(&mm->mmap_sem);
+		mpol_put(new);
+		return err;
+	}
 	vma = check_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
 
@@ -1545,8 +1589,6 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
 	struct mempolicy *pol = get_vma_policy(current, vma, addr);
 	struct zonelist *zl;
 
-	cpuset_update_task_memory_state();
-
 	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
 		unsigned nid;
 
@@ -1593,8 +1635,6 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 {
 	struct mempolicy *pol = current->mempolicy;
 
-	if ((gfp & __GFP_WAIT) && !in_interrupt())
-		cpuset_update_task_memory_state();
 	if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
 		pol = &default_policy;
 
@@ -1854,6 +1894,8 @@ restart:
  */
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 {
+	int ret;
+
 	sp->root = RB_ROOT;		/* empty tree == default mempolicy */
 	spin_lock_init(&sp->lock);
 
@@ -1863,9 +1905,19 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 
 		/* contextualize the tmpfs mount point mempolicy */
 		new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
-		mpol_put(mpol);	/* drop our ref on sb mpol */
-		if (IS_ERR(new))
+		if (IS_ERR(new)) {
+			mpol_put(mpol);	/* drop our ref on sb mpol */
 			return;		/* no valid nodemask intersection */
+		}
+
+		task_lock(current);
+		ret = mpol_set_nodemask(new, &mpol->w.user_nodemask);
+		task_unlock(current);
+		mpol_put(mpol);	/* drop our ref on sb mpol */
+		if (ret) {
+			mpol_put(new);
+			return;
+		}
 
 		/* Create pseudo-vma that contains just the policy */
 		memset(&pvma, 0, sizeof(struct vm_area_struct));
@@ -2086,8 +2138,19 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
 	new = mpol_new(mode, mode_flags, &nodes);
 	if (IS_ERR(new))
 		err = 1;
-	else if (no_context)
-		new->w.user_nodemask = nodes;	/* save for contextualization */
+	else {
+		int ret;
+
+		task_lock(current);
+		ret = mpol_set_nodemask(new, &nodes);
+		task_unlock(current);
+		if (ret)
+			err = 1;
+		else if (no_context) {
+			/* save for contextualization */
+			new->w.user_nodemask = nodes;
+		}
+	}
 
 out:
 	/* Restore string for error message */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 17d5f539a9a..7cc3179e359 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1569,10 +1569,7 @@ nofail_alloc:
 
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
-	/*
-	 * The task's cpuset might have expanded its set of allowable nodes
-	 */
-	cpuset_update_task_memory_state();
+
 	p->flags |= PF_MEMALLOC;
 
 	lockdep_set_current_reclaim_state(gfp_mask);
-- 
cgit v1.2.3-70-g09d2


From d239171e4f6efd58d7e423853056b1b6a74f1446 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 16 Jun 2009 15:31:52 -0700
Subject: page allocator: replace __alloc_pages_internal() with
 __alloc_pages_nodemask()

The start of a large patch series to clean up and optimise the page
allocator.

The performance improvements are in a wide range depending on the exact
machine but the results I've seen so fair are approximately;

kernbench:	0	to	 0.12% (elapsed time)
		0.49%	to	 3.20% (sys time)
aim9:		-4%	to	30% (for page_test and brk_test)
tbench:		-1%	to	 4%
hackbench:	-2.5%	to	 3.45% (mostly within the noise though)
netperf-udp	-1.34%  to	 4.06% (varies between machines a bit)
netperf-tcp	-0.44%  to	 5.22% (varies between machines a bit)

I haven't sysbench figures at hand, but previously they were within the
-0.5% to 2% range.

On netperf, the client and server were bound to opposite number CPUs to
maximise the problems with cache line bouncing of the struct pages so I
expect different people to report different results for netperf depending
on their exact machine and how they ran the test (different machines, same
cpus client/server, shared cache but two threads client/server, different
socket client/server etc).

I also measured the vmlinux sizes for a single x86-based config with
CONFIG_DEBUG_INFO enabled but not CONFIG_DEBUG_VM.  The core of the
.config is based on the Debian Lenny kernel config so I expect it to be
reasonably typical.

This patch:

__alloc_pages_internal is the core page allocator function but essentially
it is an alias of __alloc_pages_nodemask.  Naming a publicly available and
exported function "internal" is also a big ugly.  This patch renames
__alloc_pages_internal() to __alloc_pages_nodemask() and deletes the old
nodemask function.

Warning - This patch renames an exported symbol.  No kernel driver is
affected by external drivers calling __alloc_pages_internal() should
change the call to __alloc_pages_nodemask() without any alteration of
parameters.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 12 ++----------
 mm/page_alloc.c     |  4 ++--
 2 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3760e7c5de0..549ec558310 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -172,24 +172,16 @@ static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
 struct page *
-__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 		       struct zonelist *zonelist, nodemask_t *nodemask);
 
 static inline struct page *
 __alloc_pages(gfp_t gfp_mask, unsigned int order,
 		struct zonelist *zonelist)
 {
-	return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
+	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
 }
 
-static inline struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-		struct zonelist *zonelist, nodemask_t *nodemask)
-{
-	return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
-}
-
-
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cbed869fd83..d58df903150 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1458,7 +1458,7 @@ try_next_zone:
  * This is the 'heart' of the zoned buddy allocator.
  */
 struct page *
-__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 			struct zonelist *zonelist, nodemask_t *nodemask)
 {
 	const gfp_t wait = gfp_mask & __GFP_WAIT;
@@ -1667,7 +1667,7 @@ nopage:
 got_pg:
 	return page;
 }
-EXPORT_SYMBOL(__alloc_pages_internal);
+EXPORT_SYMBOL(__alloc_pages_nodemask);
 
 /*
  * Common helper functions.
-- 
cgit v1.2.3-70-g09d2


From b3c466ce512923298ae8c0121d3e9f397a3f1210 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 16 Jun 2009 15:31:53 -0700
Subject: page allocator: do not sanity check order in the fast path

No user of the allocator API should be passing in an order >= MAX_ORDER
but we check for it on each and every allocation.  Delete this check and
make it a VM_BUG_ON check further down the call path.

[akpm@linux-foundation.org: s/VM_BUG_ON/WARN_ON_ONCE/]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 6 ------
 mm/page_alloc.c     | 3 +++
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 549ec558310..c2d3fe03b5d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -185,9 +185,6 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
-	if (unlikely(order >= MAX_ORDER))
-		return NULL;
-
 	/* Unknown node is current node */
 	if (nid < 0)
 		nid = numa_node_id();
@@ -201,9 +198,6 @@ extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
 static inline struct page *
 alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
-	if (unlikely(order >= MAX_ORDER))
-		return NULL;
-
 	return alloc_pages_current(gfp_mask, order);
 }
 extern struct page *alloc_page_vma(gfp_t gfp_mask,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d58df903150..bfbd95c0610 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1401,6 +1401,9 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
 
 	classzone_idx = zone_idx(preferred_zone);
 
+	if (WARN_ON_ONCE(order >= MAX_ORDER))
+		return NULL;
+
 zonelist_scan:
 	/*
 	 * Scan zonelist, looking for a zone with enough free.
-- 
cgit v1.2.3-70-g09d2


From 6484eb3e2a81807722c5f28efef94d8338b7b996 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 16 Jun 2009 15:31:54 -0700
Subject: page allocator: do not check NUMA node ID when the caller knows the
 node is valid

Callers of alloc_pages_node() can optionally specify -1 as a node to mean
"allocate from the current node".  However, a number of the callers in
fast paths know for a fact their node is valid.  To avoid a comparison and
branch, this patch adds alloc_pages_exact_node() that only checks the nid
with VM_BUG_ON().  Callers that know their node is valid are then
converted.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Paul Mundt <lethal@linux-sh.org>	[for the SLOB NUMA bits]
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/hp/common/sba_iommu.c   | 2 +-
 arch/ia64/kernel/mca.c            | 3 +--
 arch/ia64/kernel/uncached.c       | 3 ++-
 arch/ia64/sn/pci/pci_dma.c        | 3 ++-
 arch/powerpc/platforms/cell/ras.c | 4 ++--
 arch/x86/kvm/vmx.c                | 2 +-
 drivers/misc/sgi-gru/grufile.c    | 2 +-
 drivers/misc/sgi-xp/xpc_uv.c      | 2 +-
 include/linux/gfp.h               | 9 +++++++++
 include/linux/mm.h                | 1 -
 kernel/profile.c                  | 8 ++++----
 mm/filemap.c                      | 2 +-
 mm/hugetlb.c                      | 4 ++--
 mm/mempolicy.c                    | 2 +-
 mm/migrate.c                      | 2 +-
 mm/slab.c                         | 4 ++--
 mm/slob.c                         | 4 ++--
 17 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 56ceb68eb99..fe63b2dc9d0 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1131,7 +1131,7 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
 #ifdef CONFIG_NUMA
 	{
 		struct page *page;
-		page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
+		page = alloc_pages_exact_node(ioc->node == MAX_NUMNODES ?
 		                        numa_node_id() : ioc->node, flags,
 		                        get_order(size));
 
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 8f33a884042..5b17bd40227 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1829,8 +1829,7 @@ ia64_mca_cpu_init(void *cpu_data)
 			data = mca_bootmem();
 			first_time = 0;
 		} else
-			data = page_address(alloc_pages_node(numa_node_id(),
-					GFP_KERNEL, get_order(sz)));
+			data = __get_free_pages(GFP_KERNEL, get_order(sz));
 		if (!data)
 			panic("Could not allocate MCA memory for cpu %d\n",
 					cpu);
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 8eff8c1d40a..6ba72ab42fc 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,7 +98,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 
 	/* attempt to allocate a granule's worth of cached memory pages */
 
-	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+	page = alloc_pages_exact_node(nid,
+				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				IA64_GRANULE_SHIFT-PAGE_SHIFT);
 	if (!page) {
 		mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index d876423e4e7..98b684928e1 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -90,7 +90,8 @@ static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
 	 */
 	node = pcibus_to_node(pdev->bus);
 	if (likely(node >=0)) {
-		struct page *p = alloc_pages_node(node, flags, get_order(size));
+		struct page *p = alloc_pages_exact_node(node,
+						flags, get_order(size));
 
 		if (likely(p))
 			cpuaddr = page_address(p);
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 296b5268754..5e0a191764f 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -122,8 +122,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
 
 	area->nid = nid;
 	area->order = order;
-	area->pages = alloc_pages_node(area->nid, GFP_KERNEL | GFP_THISNODE,
-					area->order);
+	area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE,
+						area->order);
 
 	if (!area->pages) {
 		printk(KERN_WARNING "%s: no page on node %d\n",
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32d6ae8fb60..e770bf349ec 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1277,7 +1277,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
 	struct page *pages;
 	struct vmcs *vmcs;
 
-	pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
+	pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order);
 	if (!pages)
 		return NULL;
 	vmcs = page_address(pages);
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index bbefe77c67a..3ce2920e2bf 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -302,7 +302,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
 		pnode = uv_node_to_pnode(nid);
 		if (bid < 0 || gru_base[bid])
 			continue;
-		page = alloc_pages_node(nid, GFP_KERNEL, order);
+		page = alloc_pages_exact_node(nid, GFP_KERNEL, order);
 		if (!page)
 			goto fail;
 		gru_base[bid] = page_address(page);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 9172fcdee4e..c76677afda1 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -232,7 +232,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
 
 	nid = cpu_to_node(cpu);
-	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+	page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				pg_order);
 	if (page == NULL) {
 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c2d3fe03b5d..4efa33088a8 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/linkage.h>
 #include <linux/topology.h>
+#include <linux/mmdebug.h>
 
 struct vm_area_struct;
 
@@ -192,6 +193,14 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
 }
 
+static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
+						unsigned int order)
+{
+	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+
+	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
+}
+
 #ifdef CONFIG_NUMA
 extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a880161a385..7b548e7cfbd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -7,7 +7,6 @@
 
 #include <linux/gfp.h>
 #include <linux/list.h>
-#include <linux/mmdebug.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
 #include <linux/prio_tree.h>
diff --git a/kernel/profile.c b/kernel/profile.c
index 28cf26ad2d2..69911b5745e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -365,7 +365,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
 		node = cpu_to_node(cpu);
 		per_cpu(cpu_profile_flip, cpu) = 0;
 		if (!per_cpu(cpu_profile_hits, cpu)[1]) {
-			page = alloc_pages_node(node,
+			page = alloc_pages_exact_node(node,
 					GFP_KERNEL | __GFP_ZERO,
 					0);
 			if (!page)
@@ -373,7 +373,7 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
 			per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
 		}
 		if (!per_cpu(cpu_profile_hits, cpu)[0]) {
-			page = alloc_pages_node(node,
+			page = alloc_pages_exact_node(node,
 					GFP_KERNEL | __GFP_ZERO,
 					0);
 			if (!page)
@@ -564,14 +564,14 @@ static int create_hash_tables(void)
 		int node = cpu_to_node(cpu);
 		struct page *page;
 
-		page = alloc_pages_node(node,
+		page = alloc_pages_exact_node(node,
 				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				0);
 		if (!page)
 			goto out_cleanup;
 		per_cpu(cpu_profile_hits, cpu)[1]
 				= (struct profile_hit *)page_address(page);
-		page = alloc_pages_node(node,
+		page = alloc_pages_exact_node(node,
 				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
 				0);
 		if (!page)
diff --git a/mm/filemap.c b/mm/filemap.c
index 6846a902f5c..22396713feb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -521,7 +521,7 @@ struct page *__page_cache_alloc(gfp_t gfp)
 {
 	if (cpuset_do_page_mem_spread()) {
 		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, gfp, 0);
+		return alloc_pages_exact_node(n, gfp, 0);
 	}
 	return alloc_pages(gfp, 0);
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e83ad2c9228..2f8241f300f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -630,7 +630,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 	if (h->order >= MAX_ORDER)
 		return NULL;
 
-	page = alloc_pages_node(nid,
+	page = alloc_pages_exact_node(nid,
 		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
 						__GFP_REPEAT|__GFP_NOWARN,
 		huge_page_order(h));
@@ -649,7 +649,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
  * Use a helper variable to find the next node and then
  * copy it back to hugetlb_next_nid afterwards:
  * otherwise there's a window in which a racer might
- * pass invalid nid MAX_NUMNODES to alloc_pages_node.
+ * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
  * But we don't need to use a spin_lock here: it really
  * doesn't matter if occasionally a racer chooses the
  * same nid as we do.  Move nid forward in the mask even
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 46bdf9ddf2b..e08e2c4da63 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -803,7 +803,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 
 static struct page *new_node_page(struct page *page, unsigned long node, int **x)
 {
-	return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
+	return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
 }
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index 068655d8f88..5a24923e7fd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -802,7 +802,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
 
 	*result = &pm->status;
 
-	return alloc_pages_node(pm->node,
+	return alloc_pages_exact_node(pm->node,
 				GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
 }
 
diff --git a/mm/slab.c b/mm/slab.c
index 18e3164de09..bb3254c95cd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1707,7 +1707,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		flags |= __GFP_RECLAIMABLE;
 
-	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+	page = alloc_pages_exact_node(nodeid, flags, cachep->gfporder);
 	if (!page)
 		return NULL;
 
@@ -3261,7 +3261,7 @@ retry:
 		if (local_flags & __GFP_WAIT)
 			local_irq_enable();
 		kmem_flagcheck(cache, flags);
-		obj = kmem_getpages(cache, local_flags, -1);
+		obj = kmem_getpages(cache, local_flags, numa_node_id());
 		if (local_flags & __GFP_WAIT)
 			local_irq_disable();
 		if (obj) {
diff --git a/mm/slob.c b/mm/slob.c
index 12f26149992..64f6db1943b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -46,7 +46,7 @@
  * NUMA support in SLOB is fairly simplistic, pushing most of the real
  * logic down to the page allocator, and simply doing the node accounting
  * on the upper levels. In the event that a node id is explicitly
- * provided, alloc_pages_node() with the specified node id is used
+ * provided, alloc_pages_exact_node() with the specified node id is used
  * instead. The common case (or when the node id isn't explicitly provided)
  * will default to the current node, as per numa_node_id().
  *
@@ -244,7 +244,7 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
 
 #ifdef CONFIG_NUMA
 	if (node != -1)
-		page = alloc_pages_node(node, gfp, order);
+		page = alloc_pages_exact_node(node, gfp, order);
 	else
 #endif
 		page = alloc_pages(gfp, order);
-- 
cgit v1.2.3-70-g09d2


From 49255c619fbd482d704289b5eb2795f8e3b7ff2e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 16 Jun 2009 15:31:58 -0700
Subject: page allocator: move check for disabled anti-fragmentation out of
 fastpath

On low-memory systems, anti-fragmentation gets disabled as there is
nothing it can do and it would just incur overhead shuffling pages between
lists constantly.  Currently the check is made in the free page fast path
for every page.  This patch moves it to a slow path.  On machines with low
memory, there will be small amount of additional overhead as pages get
shuffled between lists but it should quickly settle.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 3 ---
 mm/page_alloc.c        | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a47c879e130..0aa4445b0b8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -50,9 +50,6 @@ extern int page_group_by_mobility_disabled;
 
 static inline int get_pageblock_migratetype(struct page *page)
 {
-	if (unlikely(page_group_by_mobility_disabled))
-		return MIGRATE_UNMOVABLE;
-
 	return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 512bf9a618c..b09859629e9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -168,6 +168,10 @@ int page_group_by_mobility_disabled __read_mostly;
 
 static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
+
+	if (unlikely(page_group_by_mobility_disabled))
+		migratetype = MIGRATE_UNMOVABLE;
+
 	set_pageblock_flags_group(page, (unsigned long)migratetype,
 					PB_migrate, PB_migrate_end);
 }
-- 
cgit v1.2.3-70-g09d2


From 418589663d6011de9006425b6c5721e1544fb47a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 16 Jun 2009 15:32:12 -0700
Subject: page allocator: use allocation flags as an index to the zone
 watermark

ALLOC_WMARK_MIN, ALLOC_WMARK_LOW and ALLOC_WMARK_HIGH determin whether
pages_min, pages_low or pages_high is used as the zone watermark when
allocating the pages.  Two branches in the allocator hotpath determine
which watermark to use.

This patch uses the flags as an array index into a watermark array that is
indexed with WMARK_* defines accessed via helpers.  All call sites that
use zone->pages_* are updated to use the helpers for accessing the values
and the array offsets for setting.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt | 11 +++++-----
 Documentation/vm/balance    | 18 ++++++++--------
 arch/m32r/mm/discontig.c    |  6 +++---
 include/linux/mmzone.h      | 16 +++++++++++++-
 mm/page_alloc.c             | 51 +++++++++++++++++++++++----------------------
 mm/vmscan.c                 | 39 ++++++++++++++++++----------------
 mm/vmstat.c                 |  6 +++---
 7 files changed, 83 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 6fab2dcbb4d..0ea5adbc5b1 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -233,8 +233,8 @@ These protections are added to score to judge whether this zone should be used
 for page allocation or should be reclaimed.
 
 In this example, if normal pages (index=2) are required to this DMA zone and
-pages_high is used for watermark, the kernel judges this zone should not be
-used because pages_free(1355) is smaller than watermark + protection[2]
+watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
+not be used because pages_free(1355) is smaller than watermark + protection[2]
 (4 + 2004 = 2008). If this protection value is 0, this zone would be used for
 normal page requirement. If requirement is DMA zone(index=0), protection[0]
 (=0) is used.
@@ -280,9 +280,10 @@ The default value is 65536.
 min_free_kbytes:
 
 This is used to force the Linux VM to keep a minimum number
-of kilobytes free.  The VM uses this number to compute a pages_min
-value for each lowmem zone in the system.  Each lowmem zone gets
-a number of reserved free pages based proportionally on its size.
+of kilobytes free.  The VM uses this number to compute a
+watermark[WMARK_MIN] value for each lowmem zone in the system.
+Each lowmem zone gets a number of reserved free pages based
+proportionally on its size.
 
 Some minimal amount of memory is needed to satisfy PF_MEMALLOC
 allocations; if you set this to lower than 1024KB, your system will
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index bd3d31bc491..c46e68cf934 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -75,15 +75,15 @@ Page stealing from process memory and shm is done if stealing the page would
 alleviate memory pressure on any zone in the page's node that has fallen below
 its watermark.
 
-pages_min/pages_low/pages_high/low_on_memory/zone_wake_kswapd: These are 
-per-zone fields, used to determine when a zone needs to be balanced. When
-the number of pages falls below pages_min, the hysteric field low_on_memory
-gets set. This stays set till the number of free pages becomes pages_high.
-When low_on_memory is set, page allocation requests will try to free some
-pages in the zone (providing GFP_WAIT is set in the request). Orthogonal
-to this, is the decision to poke kswapd to free some zone pages. That
-decision is not hysteresis based, and is done when the number of free
-pages is below pages_low; in which case zone_wake_kswapd is also set.
+watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These
+are per-zone fields, used to determine when a zone needs to be balanced. When
+the number of pages falls below watermark[WMARK_MIN], the hysteric field
+low_on_memory gets set. This stays set till the number of free pages becomes
+watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will
+try to free some pages in the zone (providing GFP_WAIT is set in the request).
+Orthogonal to this, is the decision to poke kswapd to free some zone pages.
+That decision is not hysteresis based, and is done when the number of free
+pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set.
 
 
 (Good) Ideas that I have heard:
diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index 7daf897292c..b7a78ad429b 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -154,9 +154,9 @@ unsigned long __init zone_sizes_init(void)
 	 *  Use all area of internal RAM.
 	 *  see __alloc_pages()
 	 */
-	NODE_DATA(1)->node_zones->pages_min = 0;
-	NODE_DATA(1)->node_zones->pages_low = 0;
-	NODE_DATA(1)->node_zones->pages_high = 0;
+	NODE_DATA(1)->node_zones->watermark[WMARK_MIN] = 0;
+	NODE_DATA(1)->node_zones->watermark[WMARK_LOW] = 0;
+	NODE_DATA(1)->node_zones->watermark[WMARK_HIGH] = 0;
 
 	return holes;
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0aa4445b0b8..dd8487f0442 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -163,6 +163,17 @@ static inline int is_unevictable_lru(enum lru_list l)
 #endif
 }
 
+enum zone_watermarks {
+	WMARK_MIN,
+	WMARK_LOW,
+	WMARK_HIGH,
+	NR_WMARK
+};
+
+#define min_wmark_pages(z) (z->watermark[WMARK_MIN])
+#define low_wmark_pages(z) (z->watermark[WMARK_LOW])
+#define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
+
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
@@ -275,7 +286,10 @@ struct zone_reclaim_stat {
 
 struct zone {
 	/* Fields commonly accessed by the page allocator */
-	unsigned long		pages_min, pages_low, pages_high;
+
+	/* zone watermarks, access with *_wmark_pages(zone) macros */
+	unsigned long watermark[NR_WMARK];
+
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8485735fc69..abe26003124 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1150,10 +1150,15 @@ failed:
 	return NULL;
 }
 
-#define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
-#define ALLOC_WMARK_MIN		0x02 /* use pages_min watermark */
-#define ALLOC_WMARK_LOW		0x04 /* use pages_low watermark */
-#define ALLOC_WMARK_HIGH	0x08 /* use pages_high watermark */
+/* The ALLOC_WMARK bits are used as an index to zone->watermark */
+#define ALLOC_WMARK_MIN		WMARK_MIN
+#define ALLOC_WMARK_LOW		WMARK_LOW
+#define ALLOC_WMARK_HIGH	WMARK_HIGH
+#define ALLOC_NO_WATERMARKS	0x04 /* don't check watermarks at all */
+
+/* Mask to get the watermark bits */
+#define ALLOC_WMARK_MASK	(ALLOC_NO_WATERMARKS-1)
+
 #define ALLOC_HARDER		0x10 /* try to alloc harder */
 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
@@ -1440,14 +1445,10 @@ zonelist_scan:
 			!cpuset_zone_allowed_softwall(zone, gfp_mask))
 				goto try_next_zone;
 
+		BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
 			unsigned long mark;
-			if (alloc_flags & ALLOC_WMARK_MIN)
-				mark = zone->pages_min;
-			else if (alloc_flags & ALLOC_WMARK_LOW)
-				mark = zone->pages_low;
-			else
-				mark = zone->pages_high;
+			mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
 			if (!zone_watermark_ok(zone, order, mark,
 				    classzone_idx, alloc_flags)) {
 				if (!zone_reclaim_mode ||
@@ -1959,7 +1960,7 @@ static unsigned int nr_free_zone_pages(int offset)
 
 	for_each_zone_zonelist(zone, z, zonelist, offset) {
 		unsigned long size = zone->present_pages;
-		unsigned long high = zone->pages_high;
+		unsigned long high = high_wmark_pages(zone);
 		if (size > high)
 			sum += size - high;
 	}
@@ -2096,9 +2097,9 @@ void show_free_areas(void)
 			"\n",
 			zone->name,
 			K(zone_page_state(zone, NR_FREE_PAGES)),
-			K(zone->pages_min),
-			K(zone->pages_low),
-			K(zone->pages_high),
+			K(min_wmark_pages(zone)),
+			K(low_wmark_pages(zone)),
+			K(high_wmark_pages(zone)),
 			K(zone_page_state(zone, NR_ACTIVE_ANON)),
 			K(zone_page_state(zone, NR_INACTIVE_ANON)),
 			K(zone_page_state(zone, NR_ACTIVE_FILE)),
@@ -2702,8 +2703,8 @@ static inline unsigned long wait_table_bits(unsigned long size)
 
 /*
  * Mark a number of pageblocks as MIGRATE_RESERVE. The number
- * of blocks reserved is based on zone->pages_min. The memory within the
- * reserve will tend to store contiguous free pages. Setting min_free_kbytes
+ * of blocks reserved is based on min_wmark_pages(zone). The memory within
+ * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
  * higher will lead to a bigger reserve which will get freed as contiguous
  * blocks as reclaim kicks in
  */
@@ -2716,7 +2717,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
 	/* Get the start pfn, end pfn and the number of blocks to reserve */
 	start_pfn = zone->zone_start_pfn;
 	end_pfn = start_pfn + zone->spanned_pages;
-	reserve = roundup(zone->pages_min, pageblock_nr_pages) >>
+	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
 							pageblock_order;
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
@@ -4319,8 +4320,8 @@ static void calculate_totalreserve_pages(void)
 					max = zone->lowmem_reserve[j];
 			}
 
-			/* we treat pages_high as reserved pages. */
-			max += zone->pages_high;
+			/* we treat the high watermark as reserved pages. */
+			max += high_wmark_pages(zone);
 
 			if (max > zone->present_pages)
 				max = zone->present_pages;
@@ -4400,7 +4401,7 @@ void setup_per_zone_pages_min(void)
 			 * need highmem pages, so cap pages_min to a small
 			 * value here.
 			 *
-			 * The (pages_high-pages_low) and (pages_low-pages_min)
+			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
 			 * deltas controls asynch page reclaim, and so should
 			 * not be capped for highmem.
 			 */
@@ -4411,17 +4412,17 @@ void setup_per_zone_pages_min(void)
 				min_pages = SWAP_CLUSTER_MAX;
 			if (min_pages > 128)
 				min_pages = 128;
-			zone->pages_min = min_pages;
+			zone->watermark[WMARK_MIN] = min_pages;
 		} else {
 			/*
 			 * If it's a lowmem zone, reserve a number of pages
 			 * proportionate to the zone's size.
 			 */
-			zone->pages_min = tmp;
+			zone->watermark[WMARK_MIN] = tmp;
 		}
 
-		zone->pages_low   = zone->pages_min + (tmp >> 2);
-		zone->pages_high  = zone->pages_min + (tmp >> 1);
+		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
+		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
 		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
 	}
@@ -4566,7 +4567,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
  *	whenever sysctl_lowmem_reserve_ratio changes.
  *
  * The reserve ratio obviously has absolutely no relation with the
- * pages_min watermarks. The lowmem reserve ratio can only make sense
+ * minimum watermarks. The lowmem reserve ratio can only make sense
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6b7d14812e..e5245d05164 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1401,7 +1401,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
 		free  = zone_page_state(zone, NR_FREE_PAGES);
 		/* If we have very few page cache pages,
 		   force-scan anon pages. */
-		if (unlikely(file + free <= zone->pages_high)) {
+		if (unlikely(file + free <= high_wmark_pages(zone))) {
 			percent[0] = 100;
 			percent[1] = 0;
 			return;
@@ -1533,11 +1533,13 @@ static void shrink_zone(int priority, struct zone *zone,
  * try to reclaim pages from zones which will satisfy the caller's allocation
  * request.
  *
- * We reclaim from a zone even if that zone is over pages_high.  Because:
+ * We reclaim from a zone even if that zone is over high_wmark_pages(zone).
+ * Because:
  * a) The caller may be trying to free *extra* pages to satisfy a higher-order
  *    allocation or
- * b) The zones may be over pages_high but they must go *over* pages_high to
- *    satisfy the `incremental min' zone defense algorithm.
+ * b) The target zone may be at high_wmark_pages(zone) but the lower zones
+ *    must go *over* high_wmark_pages(zone) to satisfy the `incremental min'
+ *    zone defense algorithm.
  *
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
@@ -1743,7 +1745,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 
 /*
  * For kswapd, balance_pgdat() will work across all this node's zones until
- * they are all at pages_high.
+ * they are all at high_wmark_pages(zone).
  *
  * Returns the number of pages which were actually freed.
  *
@@ -1756,11 +1758,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
  * the zone for when the problem goes away.
  *
  * kswapd scans the zones in the highmem->normal->dma direction.  It skips
- * zones which have free_pages > pages_high, but once a zone is found to have
- * free_pages <= pages_high, we scan that zone and the lower zones regardless
- * of the number of free pages in the lower zones.  This interoperates with
- * the page allocator fallback scheme to ensure that aging of pages is balanced
- * across the zones.
+ * zones which have free_pages > high_wmark_pages(zone), but once a zone is
+ * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
+ * lower zones regardless of the number of free pages in the lower zones. This
+ * interoperates with the page allocator fallback scheme to ensure that aging
+ * of pages is balanced across the zones.
  */
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 {
@@ -1781,7 +1783,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 	};
 	/*
 	 * temp_priority is used to remember the scanning priority at which
-	 * this zone was successfully refilled to free_pages == pages_high.
+	 * this zone was successfully refilled to
+	 * free_pages == high_wmark_pages(zone).
 	 */
 	int temp_priority[MAX_NR_ZONES];
 
@@ -1826,8 +1829,8 @@ loop_again:
 				shrink_active_list(SWAP_CLUSTER_MAX, zone,
 							&sc, priority, 0);
 
-			if (!zone_watermark_ok(zone, order, zone->pages_high,
-					       0, 0)) {
+			if (!zone_watermark_ok(zone, order,
+					high_wmark_pages(zone), 0, 0)) {
 				end_zone = i;
 				break;
 			}
@@ -1861,8 +1864,8 @@ loop_again:
 					priority != DEF_PRIORITY)
 				continue;
 
-			if (!zone_watermark_ok(zone, order, zone->pages_high,
-					       end_zone, 0))
+			if (!zone_watermark_ok(zone, order,
+					high_wmark_pages(zone), end_zone, 0))
 				all_zones_ok = 0;
 			temp_priority[i] = priority;
 			sc.nr_scanned = 0;
@@ -1871,8 +1874,8 @@ loop_again:
 			 * We put equal pressure on every zone, unless one
 			 * zone has way too many pages free already.
 			 */
-			if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
-						end_zone, 0))
+			if (!zone_watermark_ok(zone, order,
+					8*high_wmark_pages(zone), end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2038,7 +2041,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 		return;
 
 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
+	if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 74d66dba0cb..415110772c7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -714,9 +714,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        spanned  %lu"
 		   "\n        present  %lu",
 		   zone_page_state(zone, NR_FREE_PAGES),
-		   zone->pages_min,
-		   zone->pages_low,
-		   zone->pages_high,
+		   min_wmark_pages(zone),
+		   low_wmark_pages(zone),
+		   high_wmark_pages(zone),
 		   zone->pages_scanned,
 		   zone->lru[LRU_ACTIVE_ANON].nr_scan,
 		   zone->lru[LRU_INACTIVE_ANON].nr_scan,
-- 
cgit v1.2.3-70-g09d2


From 62bc62a873116805774ffd37d7f86aa4faa832b1 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 16 Jun 2009 15:32:15 -0700
Subject: page allocator: use a pre-calculated value instead of
 num_online_nodes() in fast paths

num_online_nodes() is called in a number of places but most often by the
page allocator when deciding whether the zonelist needs to be filtered
based on cpusets or the zonelist cache.  This is actually a heavy function
and touches a number of cache lines.

This patch stores the number of online nodes at boot time and updates the
value when nodes get onlined and offlined.  The value is then used in a
number of important paths in place of num_online_nodes().

[rientjes@google.com: do not override definition of node_set_online() with macro]
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 19 ++++++++++++++++---
 mm/hugetlb.c             |  4 ++--
 mm/page_alloc.c          | 10 ++++++----
 mm/slub.c                |  2 +-
 net/sunrpc/svc.c         |  2 +-
 5 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 848025cd708..829b94b156f 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -408,6 +408,19 @@ static inline int num_node_state(enum node_states state)
 #define next_online_node(nid)	next_node((nid), node_states[N_ONLINE])
 
 extern int nr_node_ids;
+extern int nr_online_nodes;
+
+static inline void node_set_online(int nid)
+{
+	node_set_state(nid, N_ONLINE);
+	nr_online_nodes = num_node_state(N_ONLINE);
+}
+
+static inline void node_set_offline(int nid)
+{
+	node_clear_state(nid, N_ONLINE);
+	nr_online_nodes = num_node_state(N_ONLINE);
+}
 #else
 
 static inline int node_state(int node, enum node_states state)
@@ -434,7 +447,10 @@ static inline int num_node_state(enum node_states state)
 #define first_online_node	0
 #define next_online_node(nid)	(MAX_NUMNODES)
 #define nr_node_ids		1
+#define nr_online_nodes		1
 
+#define node_set_online(node)	   node_set_state((node), N_ONLINE)
+#define node_set_offline(node)	   node_clear_state((node), N_ONLINE)
 #endif
 
 #define node_online_map 	node_states[N_ONLINE]
@@ -454,9 +470,6 @@ static inline int num_node_state(enum node_states state)
 #define node_online(node)	node_state((node), N_ONLINE)
 #define node_possible(node)	node_state((node), N_POSSIBLE)
 
-#define node_set_online(node)	   node_set_state((node), N_ONLINE)
-#define node_set_offline(node)	   node_clear_state((node), N_ONLINE)
-
 #define for_each_node(node)	   for_each_node_state(node, N_POSSIBLE)
 #define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2f8241f300f..7b9b6015b2e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -875,7 +875,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 	 * can no longer free unreserved surplus pages. This occurs when
 	 * the nodes with surplus pages have no free pages.
 	 */
-	unsigned long remaining_iterations = num_online_nodes();
+	unsigned long remaining_iterations = nr_online_nodes;
 
 	/* Uncommit the reservation */
 	h->resv_huge_pages -= unused_resv_pages;
@@ -904,7 +904,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 			h->surplus_huge_pages--;
 			h->surplus_huge_pages_node[nid]--;
 			nr_pages--;
-			remaining_iterations = num_online_nodes();
+			remaining_iterations = nr_online_nodes;
 		}
 	}
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e60e4147433..0c9f406e3c4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -161,7 +161,9 @@ static unsigned long __meminitdata dma_reserve;
 
 #if MAX_NUMNODES > 1
 int nr_node_ids __read_mostly = MAX_NUMNODES;
+int nr_online_nodes __read_mostly = 1;
 EXPORT_SYMBOL(nr_node_ids);
+EXPORT_SYMBOL(nr_online_nodes);
 #endif
 
 int page_group_by_mobility_disabled __read_mostly;
@@ -1466,7 +1468,7 @@ this_zone_full:
 		if (NUMA_BUILD)
 			zlc_mark_zone_full(zonelist, z);
 try_next_zone:
-		if (NUMA_BUILD && !did_zlc_setup && num_online_nodes() > 1) {
+		if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
 			/*
 			 * we do zlc_setup after the first zone is tried but only
 			 * if there are multiple nodes make it worthwhile
@@ -2265,7 +2267,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
 }
 
 
-#define MAX_NODE_LOAD (num_online_nodes())
+#define MAX_NODE_LOAD (nr_online_nodes)
 static int node_load[MAX_NUMNODES];
 
 /**
@@ -2474,7 +2476,7 @@ static void build_zonelists(pg_data_t *pgdat)
 
 	/* NUMA-aware ordering of nodes */
 	local_node = pgdat->node_id;
-	load = num_online_nodes();
+	load = nr_online_nodes;
 	prev_node = local_node;
 	nodes_clear(used_mask);
 
@@ -2625,7 +2627,7 @@ void build_all_zonelists(void)
 
 	printk("Built %i zonelists in %s order, mobility grouping %s.  "
 		"Total pages: %ld\n",
-			num_online_nodes(),
+			nr_online_nodes,
 			zonelist_order_name[current_zonelist_order],
 			page_group_by_mobility_disabled ? "off" : "on",
 			vm_total_pages);
diff --git a/mm/slub.c b/mm/slub.c
index 30354bfeb43..1c950775d6b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3737,7 +3737,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 						 to_cpumask(l->cpus));
 		}
 
-		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
+		if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " nodes=");
 			len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 8847add6ca1..5ed8931dfe9 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -124,7 +124,7 @@ svc_pool_map_choose_mode(void)
 {
 	unsigned int node;
 
-	if (num_online_nodes() > 1) {
+	if (nr_online_nodes > 1) {
 		/*
 		 * Actually have multiple NUMA nodes,
 		 * so split pools on NUMA node boundaries
-- 
cgit v1.2.3-70-g09d2


From 20a0307c0396c2edb651401d2f2db193dda2f3c9 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Jun 2009 15:32:22 -0700
Subject: mm: introduce PageHuge() for testing huge/gigantic pages

A series of patches to enhance the /proc/pagemap interface and to add a
userspace executable which can be used to present the pagemap data.

Export 10 more flags to end users (and more for kernel developers):

        11. KPF_MMAP            (pseudo flag) memory mapped page
        12. KPF_ANON            (pseudo flag) memory mapped page (anonymous)
        13. KPF_SWAPCACHE       page is in swap cache
        14. KPF_SWAPBACKED      page is swap/RAM backed
        15. KPF_COMPOUND_HEAD   (*)
        16. KPF_COMPOUND_TAIL   (*)
        17. KPF_HUGE		hugeTLB pages
        18. KPF_UNEVICTABLE     page is in the unevictable LRU list
        19. KPF_HWPOISON        hardware detected corruption
        20. KPF_NOPAGE          (pseudo flag) no page frame at the address

        (*) For compound pages, exporting _both_ head/tail info enables
            users to tell where a compound page starts/ends, and its order.

a simple demo of the page-types tool

# ./page-types -h
page-types [options]
            -r|--raw                  Raw mode, for kernel developers
            -a|--addr    addr-spec    Walk a range of pages
            -b|--bits    bits-spec    Walk pages with specified bits
            -l|--list                 Show page details in ranges
            -L|--list-each            Show page details one by one
            -N|--no-summary           Don't show summay info
            -h|--help                 Show this usage message
addr-spec:
            N                         one page at offset N (unit: pages)
            N+M                       pages range from N to N+M-1
            N,M                       pages range from N to M-1
            N,                        pages range from N to end
            ,M                        pages range from 0 to M
bits-spec:
            bit1,bit2                 (flags & (bit1|bit2)) != 0
            bit1,bit2=bit1            (flags & (bit1|bit2)) == bit1
            bit1,~bit2                (flags & (bit1|bit2)) == bit1
            =bit1,bit2                flags == (bit1|bit2)
bit-names:
          locked              error         referenced           uptodate
           dirty                lru             active               slab
       writeback            reclaim              buddy               mmap
       anonymous          swapcache         swapbacked      compound_head
   compound_tail               huge        unevictable           hwpoison
          nopage           reserved(r)         mlocked(r)    mappedtodisk(r)
         private(r)       private_2(r)   owner_private(r)            arch(r)
        uncached(r)       readahead(o)       slob_free(o)     slub_frozen(o)
      slub_debug(o)
                                   (r) raw mode bits  (o) overloaded bits

# ./page-types
             flags      page-count       MB  symbolic-flags                     long-symbolic-flags
0x0000000000000000          487369     1903  _________________________________
0x0000000000000014               5        0  __R_D____________________________  referenced,dirty
0x0000000000000020               1        0  _____l___________________________  lru
0x0000000000000024              34        0  __R__l___________________________  referenced,lru
0x0000000000000028            3838       14  ___U_l___________________________  uptodate,lru
0x0001000000000028              48        0  ___U_l_______________________I___  uptodate,lru,readahead
0x000000000000002c            6478       25  __RU_l___________________________  referenced,uptodate,lru
0x000100000000002c              47        0  __RU_l_______________________I___  referenced,uptodate,lru,readahead
0x0000000000000040            8344       32  ______A__________________________  active
0x0000000000000060               1        0  _____lA__________________________  lru,active
0x0000000000000068             348        1  ___U_lA__________________________  uptodate,lru,active
0x0001000000000068              12        0  ___U_lA______________________I___  uptodate,lru,active,readahead
0x000000000000006c             988        3  __RU_lA__________________________  referenced,uptodate,lru,active
0x000100000000006c              48        0  __RU_lA______________________I___  referenced,uptodate,lru,active,readahead
0x0000000000004078               1        0  ___UDlA_______b__________________  uptodate,dirty,lru,active,swapbacked
0x000000000000407c              34        0  __RUDlA_______b__________________  referenced,uptodate,dirty,lru,active,swapbacked
0x0000000000000400             503        1  __________B______________________  buddy
0x0000000000000804               1        0  __R________M_____________________  referenced,mmap
0x0000000000000828            1029        4  ___U_l_____M_____________________  uptodate,lru,mmap
0x0001000000000828              43        0  ___U_l_____M_________________I___  uptodate,lru,mmap,readahead
0x000000000000082c             382        1  __RU_l_____M_____________________  referenced,uptodate,lru,mmap
0x000100000000082c              12        0  __RU_l_____M_________________I___  referenced,uptodate,lru,mmap,readahead
0x0000000000000868             192        0  ___U_lA____M_____________________  uptodate,lru,active,mmap
0x0001000000000868              12        0  ___U_lA____M_________________I___  uptodate,lru,active,mmap,readahead
0x000000000000086c             800        3  __RU_lA____M_____________________  referenced,uptodate,lru,active,mmap
0x000100000000086c              31        0  __RU_lA____M_________________I___  referenced,uptodate,lru,active,mmap,readahead
0x0000000000004878               2        0  ___UDlA____M__b__________________  uptodate,dirty,lru,active,mmap,swapbacked
0x0000000000001000             492        1  ____________a____________________  anonymous
0x0000000000005808               4        0  ___U_______Ma_b__________________  uptodate,mmap,anonymous,swapbacked
0x0000000000005868            2839       11  ___U_lA____Ma_b__________________  uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c              30        0  __RU_lA____Ma_b__________________  referenced,uptodate,lru,active,mmap,anonymous,swapbacked
             total          513968     2007

# ./page-types -r
             flags      page-count       MB  symbolic-flags                     long-symbolic-flags
0x0000000000000000          468002     1828  _________________________________
0x0000000100000000           19102       74  _____________________r___________  reserved
0x0000000000008000              41        0  _______________H_________________  compound_head
0x0000000000010000             188        0  ________________T________________  compound_tail
0x0000000000008014               1        0  __R_D__________H_________________  referenced,dirty,compound_head
0x0000000000010014               4        0  __R_D___________T________________  referenced,dirty,compound_tail
0x0000000000000020               1        0  _____l___________________________  lru
0x0000000800000024              34        0  __R__l__________________P________  referenced,lru,private
0x0000000000000028            3794       14  ___U_l___________________________  uptodate,lru
0x0001000000000028              46        0  ___U_l_______________________I___  uptodate,lru,readahead
0x0000000400000028              44        0  ___U_l_________________d_________  uptodate,lru,mappedtodisk
0x0001000400000028               2        0  ___U_l_________________d_____I___  uptodate,lru,mappedtodisk,readahead
0x000000000000002c            6434       25  __RU_l___________________________  referenced,uptodate,lru
0x000100000000002c              47        0  __RU_l_______________________I___  referenced,uptodate,lru,readahead
0x000000040000002c              14        0  __RU_l_________________d_________  referenced,uptodate,lru,mappedtodisk
0x000000080000002c              30        0  __RU_l__________________P________  referenced,uptodate,lru,private
0x0000000800000040            8124       31  ______A_________________P________  active,private
0x0000000000000040             219        0  ______A__________________________  active
0x0000000800000060               1        0  _____lA_________________P________  lru,active,private
0x0000000000000068             322        1  ___U_lA__________________________  uptodate,lru,active
0x0001000000000068              12        0  ___U_lA______________________I___  uptodate,lru,active,readahead
0x0000000400000068              13        0  ___U_lA________________d_________  uptodate,lru,active,mappedtodisk
0x0000000800000068              12        0  ___U_lA_________________P________  uptodate,lru,active,private
0x000000000000006c             977        3  __RU_lA__________________________  referenced,uptodate,lru,active
0x000100000000006c              48        0  __RU_lA______________________I___  referenced,uptodate,lru,active,readahead
0x000000040000006c               5        0  __RU_lA________________d_________  referenced,uptodate,lru,active,mappedtodisk
0x000000080000006c               3        0  __RU_lA_________________P________  referenced,uptodate,lru,active,private
0x0000000c0000006c               3        0  __RU_lA________________dP________  referenced,uptodate,lru,active,mappedtodisk,private
0x0000000c00000068               1        0  ___U_lA________________dP________  uptodate,lru,active,mappedtodisk,private
0x0000000000004078               1        0  ___UDlA_______b__________________  uptodate,dirty,lru,active,swapbacked
0x000000000000407c              34        0  __RUDlA_______b__________________  referenced,uptodate,dirty,lru,active,swapbacked
0x0000000000000400             538        2  __________B______________________  buddy
0x0000000000000804               1        0  __R________M_____________________  referenced,mmap
0x0000000000000828            1029        4  ___U_l_____M_____________________  uptodate,lru,mmap
0x0001000000000828              43        0  ___U_l_____M_________________I___  uptodate,lru,mmap,readahead
0x000000000000082c             382        1  __RU_l_____M_____________________  referenced,uptodate,lru,mmap
0x000100000000082c              12        0  __RU_l_____M_________________I___  referenced,uptodate,lru,mmap,readahead
0x0000000000000868             192        0  ___U_lA____M_____________________  uptodate,lru,active,mmap
0x0001000000000868              12        0  ___U_lA____M_________________I___  uptodate,lru,active,mmap,readahead
0x000000000000086c             800        3  __RU_lA____M_____________________  referenced,uptodate,lru,active,mmap
0x000100000000086c              31        0  __RU_lA____M_________________I___  referenced,uptodate,lru,active,mmap,readahead
0x0000000000004878               2        0  ___UDlA____M__b__________________  uptodate,dirty,lru,active,mmap,swapbacked
0x0000000000001000             492        1  ____________a____________________  anonymous
0x0000000000005008               2        0  ___U________a_b__________________  uptodate,anonymous,swapbacked
0x0000000000005808               4        0  ___U_______Ma_b__________________  uptodate,mmap,anonymous,swapbacked
0x000000000000580c               1        0  __RU_______Ma_b__________________  referenced,uptodate,mmap,anonymous,swapbacked
0x0000000000005868            2839       11  ___U_lA____Ma_b__________________  uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c              29        0  __RU_lA____Ma_b__________________  referenced,uptodate,lru,active,mmap,anonymous,swapbacked
             total          513968     2007

# ./page-types --raw --list --no-summary --bits reserved
offset  count   flags
0       15      _____________________r___________
31      4       _____________________r___________
159     97      _____________________r___________
4096    2067    _____________________r___________
6752    2390    _____________________r___________
9355    3       _____________________r___________
9728    14526   _____________________r___________

This patch:

Introduce PageHuge(), which identifies huge/gigantic pages by their
dedicated compound destructor functions.

Also move prep_compound_gigantic_page() to hugetlb.c and make
__free_pages_ok() non-static.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/page.c          |  1 +
 include/linux/hugetlb.h |  7 ++++
 mm/hugetlb.c            | 98 +++++++++++++++++++++++++++++++------------------
 mm/internal.h           |  5 +--
 mm/page_alloc.c         | 17 ---------
 5 files changed, 73 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index e9983837d08..38dd88b7ce8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -6,6 +6,7 @@
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/hugetlb.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 03be7f29ca0..a05a5ef3339 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -11,6 +11,8 @@
 
 struct ctl_table;
 
+int PageHuge(struct page *page);
+
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
 	return vma->vm_flags & VM_HUGETLB;
@@ -61,6 +63,11 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 
 #else /* !CONFIG_HUGETLB_PAGE */
 
+static inline int PageHuge(struct page *page)
+{
+	return 0;
+}
+
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 {
 	return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7b9b6015b2e..a56e6f3ce97 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -578,41 +578,6 @@ static void free_huge_page(struct page *page)
 		hugetlb_put_quota(mapping, 1);
 }
 
-/*
- * Increment or decrement surplus_huge_pages.  Keep node-specific counters
- * balanced by operating on them in a round-robin fashion.
- * Returns 1 if an adjustment was made.
- */
-static int adjust_pool_surplus(struct hstate *h, int delta)
-{
-	static int prev_nid;
-	int nid = prev_nid;
-	int ret = 0;
-
-	VM_BUG_ON(delta != -1 && delta != 1);
-	do {
-		nid = next_node(nid, node_online_map);
-		if (nid == MAX_NUMNODES)
-			nid = first_node(node_online_map);
-
-		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !h->surplus_huge_pages_node[nid])
-			continue;
-		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
-						h->nr_huge_pages_node[nid])
-			continue;
-
-		h->surplus_huge_pages += delta;
-		h->surplus_huge_pages_node[nid] += delta;
-		ret = 1;
-		break;
-	} while (nid != prev_nid);
-
-	prev_nid = nid;
-	return ret;
-}
-
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
 	set_compound_page_dtor(page, free_huge_page);
@@ -623,6 +588,34 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 	put_page(page); /* free it into the hugepage allocator */
 }
 
+static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+{
+	int i;
+	int nr_pages = 1 << order;
+	struct page *p = page + 1;
+
+	/* we rely on prep_new_huge_page to set the destructor */
+	set_compound_order(page, order);
+	__SetPageHead(page);
+	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+		__SetPageTail(p);
+		p->first_page = page;
+	}
+}
+
+int PageHuge(struct page *page)
+{
+	compound_page_dtor *dtor;
+
+	if (!PageCompound(page))
+		return 0;
+
+	page = compound_head(page);
+	dtor = get_compound_page_dtor(page);
+
+	return dtor == free_huge_page;
+}
+
 static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
@@ -1140,6 +1133,41 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
 }
 #endif
 
+/*
+ * Increment or decrement surplus_huge_pages.  Keep node-specific counters
+ * balanced by operating on them in a round-robin fashion.
+ * Returns 1 if an adjustment was made.
+ */
+static int adjust_pool_surplus(struct hstate *h, int delta)
+{
+	static int prev_nid;
+	int nid = prev_nid;
+	int ret = 0;
+
+	VM_BUG_ON(delta != -1 && delta != 1);
+	do {
+		nid = next_node(nid, node_online_map);
+		if (nid == MAX_NUMNODES)
+			nid = first_node(node_online_map);
+
+		/* To shrink on this node, there must be a surplus page */
+		if (delta < 0 && !h->surplus_huge_pages_node[nid])
+			continue;
+		/* Surplus cannot exceed the total number of pages */
+		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+						h->nr_huge_pages_node[nid])
+			continue;
+
+		h->surplus_huge_pages += delta;
+		h->surplus_huge_pages_node[nid] += delta;
+		ret = 1;
+		break;
+	} while (nid != prev_nid);
+
+	prev_nid = nid;
+	return ret;
+}
+
 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
 static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 {
diff --git a/mm/internal.h b/mm/internal.h
index 4b1672a8cf7..b4ac332e807 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -16,9 +16,6 @@
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
-extern void prep_compound_page(struct page *page, unsigned long order);
-extern void prep_compound_gigantic_page(struct page *page, unsigned long order);
-
 static inline void set_page_count(struct page *page, int v)
 {
 	atomic_set(&page->_count, v);
@@ -51,6 +48,8 @@ extern void putback_lru_page(struct page *page);
  */
 extern unsigned long highest_memmap_pfn;
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
+extern void prep_compound_page(struct page *page, unsigned long order);
+
 
 /*
  * function for dealing with page's order in buddy system.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8ca06d87dc1..131655cdb6b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -300,23 +300,6 @@ void prep_compound_page(struct page *page, unsigned long order)
 	}
 }
 
-#ifdef CONFIG_HUGETLBFS
-void prep_compound_gigantic_page(struct page *page, unsigned long order)
-{
-	int i;
-	int nr_pages = 1 << order;
-	struct page *p = page + 1;
-
-	set_compound_page_dtor(page, free_compound_page);
-	set_compound_order(page, order);
-	__SetPageHead(page);
-	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-		__SetPageTail(p);
-		p->first_page = page;
-	}
-}
-#endif
-
 static int destroy_compound_page(struct page *page, unsigned long order)
 {
 	int i;
-- 
cgit v1.2.3-70-g09d2


From 56e49d218890f49b0057710a4b6fef31f5ffbfec Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 16 Jun 2009 15:32:28 -0700
Subject: vmscan: evict use-once pages first

When the file LRU lists are dominated by streaming IO pages, evict those
pages first, before considering evicting other pages.

This should be safe from deadlocks or performance problems
because only three things can happen to an inactive file page:

1) referenced twice and promoted to the active list
2) evicted by the pageout code
3) under IO, after which it will get evicted or promoted

The pages freed in this way can either be reused for streaming IO, or
allocated for something else.  If the pages are used for streaming IO,
this pageout pattern continues.  Otherwise, we will fall back to the
normal pageout pattern.

Signed-off-by: Rik van Riel <riel@redhat.com>
Reported-by: Elladan <elladan@eskimo.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 11 +++++++++++
 mm/vmscan.c                | 38 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 25b9ca93d23..45add35dda1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -94,6 +94,7 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
 int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
 				       struct zone *zone,
 				       enum lru_list lru);
@@ -239,6 +240,12 @@ mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
 	return 1;
 }
 
+static inline int
+mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+{
+	return 1;
+}
+
 static inline unsigned long
 mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
 			 enum lru_list lru)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 78eb8552818..70db6e0a5ee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -570,6 +570,17 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
 	return 0;
 }
 
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+{
+	unsigned long active;
+	unsigned long inactive;
+
+	inactive = mem_cgroup_get_local_zonestat(memcg, LRU_INACTIVE_FILE);
+	active = mem_cgroup_get_local_zonestat(memcg, LRU_ACTIVE_FILE);
+
+	return (active > inactive);
+}
+
 unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
 				       struct zone *zone,
 				       enum lru_list lru)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5245d05164..9673437a545 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1351,12 +1351,48 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
 	return low;
 }
 
+static int inactive_file_is_low_global(struct zone *zone)
+{
+	unsigned long active, inactive;
+
+	active = zone_page_state(zone, NR_ACTIVE_FILE);
+	inactive = zone_page_state(zone, NR_INACTIVE_FILE);
+
+	return (active > inactive);
+}
+
+/**
+ * inactive_file_is_low - check if file pages need to be deactivated
+ * @zone: zone to check
+ * @sc:   scan control of this context
+ *
+ * When the system is doing streaming IO, memory pressure here
+ * ensures that active file pages get deactivated, until more
+ * than half of the file pages are on the inactive list.
+ *
+ * Once we get to that situation, protect the system's working
+ * set from being evicted by disabling active file page aging.
+ *
+ * This uses a different ratio than the anonymous pages, because
+ * the page cache uses a use-once replacement algorithm.
+ */
+static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
+{
+	int low;
+
+	if (scanning_global_lru(sc))
+		low = inactive_file_is_low_global(zone);
+	else
+		low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
+	return low;
+}
+
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 	struct zone *zone, struct scan_control *sc, int priority)
 {
 	int file = is_file_lru(lru);
 
-	if (lru == LRU_ACTIVE_FILE) {
+	if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) {
 		shrink_active_list(nr_to_scan, zone, sc, priority, file);
 		return 0;
 	}
-- 
cgit v1.2.3-70-g09d2


From 6e08a369ee10b361ac1cdcdf4fabd420fd08beb3 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Jun 2009 15:32:29 -0700
Subject: vmscan: cleanup the scan batching code

The vmscan batching logic is twisting.  Move it into a standalone function
nr_scan_try_batch() and document it.  No behavior change.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  4 ++--
 mm/page_alloc.c        |  2 +-
 mm/vmscan.c            | 39 ++++++++++++++++++++++++++++-----------
 mm/vmstat.c            |  8 ++++----
 4 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dd8487f0442..db976b9f879 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -334,9 +334,9 @@ struct zone {
 
 	/* Fields commonly accessed by the page reclaim scanner */
 	spinlock_t		lru_lock;	
-	struct {
+	struct zone_lru {
 		struct list_head list;
-		unsigned long nr_scan;
+		unsigned long nr_saved_scan;	/* accumulated for batching */
 	} lru[NR_LRU_LISTS];
 
 	struct zone_reclaim_stat reclaim_stat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 131655cdb6b..e5b8f628d16 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3657,7 +3657,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone_pcp_init(zone);
 		for_each_lru(l) {
 			INIT_LIST_HEAD(&zone->lru[l].list);
-			zone->lru[l].nr_scan = 0;
+			zone->lru[l].nr_saved_scan = 0;
 		}
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9673437a545..d4da097533c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1492,6 +1492,26 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
 	percent[1] = 100 - percent[0];
 }
 
+/*
+ * Smallish @nr_to_scan's are deposited in @nr_saved_scan,
+ * until we collected @swap_cluster_max pages to scan.
+ */
+static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
+				       unsigned long *nr_saved_scan,
+				       unsigned long swap_cluster_max)
+{
+	unsigned long nr;
+
+	*nr_saved_scan += nr_to_scan;
+	nr = *nr_saved_scan;
+
+	if (nr >= swap_cluster_max)
+		*nr_saved_scan = 0;
+	else
+		nr = 0;
+
+	return nr;
+}
 
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
@@ -1517,14 +1537,11 @@ static void shrink_zone(int priority, struct zone *zone,
 			scan >>= priority;
 			scan = (scan * percent[file]) / 100;
 		}
-		if (scanning_global_lru(sc)) {
-			zone->lru[l].nr_scan += scan;
-			nr[l] = zone->lru[l].nr_scan;
-			if (nr[l] >= swap_cluster_max)
-				zone->lru[l].nr_scan = 0;
-			else
-				nr[l] = 0;
-		} else
+		if (scanning_global_lru(sc))
+			nr[l] = nr_scan_try_batch(scan,
+						  &zone->lru[l].nr_saved_scan,
+						  swap_cluster_max);
+		else
 			nr[l] = scan;
 	}
 
@@ -2124,11 +2141,11 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 						l == LRU_ACTIVE_FILE))
 				continue;
 
-			zone->lru[l].nr_scan += (lru_pages >> prio) + 1;
-			if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+			zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
+			if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
 				unsigned long nr_to_scan;
 
-				zone->lru[l].nr_scan = 0;
+				zone->lru[l].nr_saved_scan = 0;
 				nr_to_scan = min(nr_pages, lru_pages);
 				nr_reclaimed += shrink_list(l, nr_to_scan, zone,
 								sc, prio);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 415110772c7..84c05555691 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -718,10 +718,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
 		   zone->pages_scanned,
-		   zone->lru[LRU_ACTIVE_ANON].nr_scan,
-		   zone->lru[LRU_INACTIVE_ANON].nr_scan,
-		   zone->lru[LRU_ACTIVE_FILE].nr_scan,
-		   zone->lru[LRU_INACTIVE_FILE].nr_scan,
+		   zone->lru[LRU_ACTIVE_ANON].nr_saved_scan,
+		   zone->lru[LRU_INACTIVE_ANON].nr_saved_scan,
+		   zone->lru[LRU_ACTIVE_FILE].nr_saved_scan,
+		   zone->lru[LRU_INACTIVE_FILE].nr_saved_scan,
 		   zone->spanned_pages,
 		   zone->present_pages);
 
-- 
cgit v1.2.3-70-g09d2


From 3b6748e2dd69906af3835db4dc9d1c8a3ee4c68c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 16 Jun 2009 15:32:35 -0700
Subject: mm: introduce follow_pfn()

Analoguous to follow_phys(), add a helper that looks up the PFN at a
user virtual address in an IO mapping or a raw PFN mapping.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: Magnus Damm <magnus.damm@gmail.com>
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 29 +++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b548e7cfbd..c80dbd4a62c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -792,6 +792,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+	unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
 		unsigned int flags, unsigned long *prot, resource_size_t *phys);
 int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index 24ff20480bb..d5d1653d60a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3140,6 +3140,35 @@ out:
 	return -EINVAL;
 }
 
+/**
+ * follow_pfn - look up PFN at a user virtual address
+ * @vma: memory mapping
+ * @address: user virtual address
+ * @pfn: location to store found PFN
+ *
+ * Only IO mappings and raw PFN mappings are allowed.
+ *
+ * Returns zero and the pfn at @pfn on success, -ve otherwise.
+ */
+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
+	unsigned long *pfn)
+{
+	int ret = -EINVAL;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		return ret;
+
+	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
+	if (ret)
+		return ret;
+	*pfn = pte_pfn(*ptep);
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(follow_pfn);
+
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 int follow_phys(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags,
-- 
cgit v1.2.3-70-g09d2


From 7f33d49a2ed546e01f7b1d0607661810f2421859 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 16 Jun 2009 15:32:41 -0700
Subject: mm, PM/Freezer: Disable OOM killer when tasks are frozen

Currently, the following scenario appears to be possible in theory:

* Tasks are frozen for hibernation or suspend.
* Free pages are almost exhausted.
* Certain piece of code in the suspend code path attempts to allocate
  some memory using GFP_KERNEL and allocation order less than or
  equal to PAGE_ALLOC_COSTLY_ORDER.
* __alloc_pages_internal() cannot find a free page so it invokes the
  OOM killer.
* The OOM killer attempts to kill a task, but the task is frozen, so
  it doesn't die immediately.
* __alloc_pages_internal() jumps to 'restart', unsuccessfully tries
  to find a free page and invokes the OOM killer.
* No progress can be made.

Although it is now hard to trigger during hibernation due to the memory
shrinking carried out by the hibernation code, it is theoretically
possible to trigger during suspend after the memory shrinking has been
removed from that code path.  Moreover, since memory allocations are
going to be used for the hibernation memory shrinking, it will be even
more likely to happen during hibernation.

To prevent it from happening, introduce the oom_killer_disabled switch
that will cause __alloc_pages_internal() to fail in the situations in
which the OOM killer would have been called and make the freezer set
this switch after tasks have been successfully frozen.

[akpm@linux-foundation.org: be nicer to the namespace]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Fengguang Wu <fengguang.wu@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    | 12 ++++++++++++
 kernel/power/process.c |  5 +++++
 mm/page_alloc.c        |  4 ++++
 3 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4efa33088a8..06b7e8cc80a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -243,4 +243,16 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(void);
 void drain_local_pages(void *dummy);
 
+extern bool oom_killer_disabled;
+
+static inline void oom_killer_disable(void)
+{
+	oom_killer_disabled = true;
+}
+
+static inline void oom_killer_enable(void)
+{
+	oom_killer_disabled = false;
+}
+
 #endif /* __LINUX_GFP_H */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index ca634019497..da2072d7381 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -117,9 +117,12 @@ int freeze_processes(void)
 	if (error)
 		goto Exit;
 	printk("done.");
+
+	oom_killer_disable();
  Exit:
 	BUG_ON(in_atomic());
 	printk("\n");
+
 	return error;
 }
 
@@ -145,6 +148,8 @@ static void thaw_tasks(bool nosig_only)
 
 void thaw_processes(void)
 {
+	oom_killer_enable();
+
 	printk("Restarting tasks ... ");
 	thaw_tasks(true);
 	thaw_tasks(false);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 61290ea721c..5b09488d0f5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -178,6 +178,8 @@ static void set_pageblock_migratetype(struct page *page, int migratetype)
 					PB_migrate, PB_migrate_end);
 }
 
+bool oom_killer_disabled __read_mostly;
+
 #ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
@@ -1769,6 +1771,8 @@ rebalance:
 	 */
 	if (!did_some_progress) {
 		if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
+			if (oom_killer_disabled)
+				goto nopage;
 			page = __alloc_pages_may_oom(gfp_mask, order,
 					zonelist, high_zoneidx,
 					nodemask, preferred_zone,
-- 
cgit v1.2.3-70-g09d2


From 31c911329e048b715a1dfeaaf617be9430fd7f4e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Tue, 16 Jun 2009 15:32:45 -0700
Subject: mm: check the argument of kunmap on architectures without highmem

If you're using a non-highmem architecture, passing an argument with the
wrong type to kunmap() doesn't give you a warning because the ifdef
doesn't check the type.

Using a static inline function solves the problem nicely.

Reported-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 1fcb7126a01..211ff449726 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -55,7 +55,9 @@ static inline void *kmap(struct page *page)
 	return page_address(page);
 }
 
-#define kunmap(page) do { (void) (page); } while (0)
+static inline void kunmap(struct page *page)
+{
+}
 
 static inline void *kmap_atomic(struct page *page, enum km_type idx)
 {
-- 
cgit v1.2.3-70-g09d2


From b70d94ee438b3fd9c15c7691d7a932a135c18101 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 16 Jun 2009 15:32:46 -0700
Subject: page-allocator: use integer fields lookup for gfp_zone and check for
 errors in flags passed to the page allocator

This simplifies the code in gfp_zone() and also keeps the ability of the
compiler to use constant folding to get rid of gfp_zone processing.

The lookup of the zone is done using a bitfield stored in an integer.  So
the code in gfp_zone is a simple extraction of bits from a constant
bitfield.  The compiler is generating a load of a constant into a register
and then performs a shift and mask operation to get the zone from a gfp_t.
 No cachelines are touched and no branches have to be predicted by the
compiler.

We are doing some macro tricks here to convince the compiler to always do
the constant folding if possible.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 111 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 96 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 06b7e8cc80a..412178afd42 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -21,7 +21,8 @@ struct vm_area_struct;
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
 #define __GFP_DMA32	((__force gfp_t)0x04u)
-
+#define __GFP_MOVABLE	((__force gfp_t)0x08u)  /* Page is movable */
+#define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
 /*
  * Action modifiers - doesn't change the zoning
  *
@@ -51,7 +52,6 @@ struct vm_area_struct;
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
-#define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
 
 #define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -116,24 +116,105 @@ static inline int allocflags_to_migratetype(gfp_t gfp_flags)
 		((gfp_flags & __GFP_RECLAIMABLE) != 0);
 }
 
-static inline enum zone_type gfp_zone(gfp_t flags)
-{
+#ifdef CONFIG_HIGHMEM
+#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
+#else
+#define OPT_ZONE_HIGHMEM ZONE_NORMAL
+#endif
+
 #ifdef CONFIG_ZONE_DMA
-	if (flags & __GFP_DMA)
-		return ZONE_DMA;
+#define OPT_ZONE_DMA ZONE_DMA
+#else
+#define OPT_ZONE_DMA ZONE_NORMAL
 #endif
+
 #ifdef CONFIG_ZONE_DMA32
-	if (flags & __GFP_DMA32)
-		return ZONE_DMA32;
+#define OPT_ZONE_DMA32 ZONE_DMA32
+#else
+#define OPT_ZONE_DMA32 ZONE_NORMAL
 #endif
-	if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) ==
-			(__GFP_HIGHMEM | __GFP_MOVABLE))
-		return ZONE_MOVABLE;
-#ifdef CONFIG_HIGHMEM
-	if (flags & __GFP_HIGHMEM)
-		return ZONE_HIGHMEM;
+
+/*
+ * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
+ * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
+ * and there are 16 of them to cover all possible combinations of
+ * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM
+ *
+ * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
+ * But GFP_MOVABLE is not only a zone specifier but also an allocation
+ * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
+ * Only 1bit of the lowest 3 bit (DMA,DMA32,HIGHMEM) can be set to "1".
+ *
+ *       bit       result
+ *       =================
+ *       0x0    => NORMAL
+ *       0x1    => DMA or NORMAL
+ *       0x2    => HIGHMEM or NORMAL
+ *       0x3    => BAD (DMA+HIGHMEM)
+ *       0x4    => DMA32 or DMA or NORMAL
+ *       0x5    => BAD (DMA+DMA32)
+ *       0x6    => BAD (HIGHMEM+DMA32)
+ *       0x7    => BAD (HIGHMEM+DMA32+DMA)
+ *       0x8    => NORMAL (MOVABLE+0)
+ *       0x9    => DMA or NORMAL (MOVABLE+DMA)
+ *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
+ *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
+ *       0xc    => DMA32 (MOVABLE+HIGHMEM+DMA32)
+ *       0xd    => BAD (MOVABLE+DMA32+DMA)
+ *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
+ *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
+ *
+ * ZONES_SHIFT must be <= 2 on 32 bit platforms.
+ */
+
+#if 16 * ZONES_SHIFT > BITS_PER_LONG
+#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
+#endif
+
+#define GFP_ZONE_TABLE ( \
+	(ZONE_NORMAL << 0 * ZONES_SHIFT)				\
+	| (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) 			\
+	| (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT)		\
+	| (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT)			\
+	| (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT)			\
+	| (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT)	\
+	| (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\
+	| (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\
+)
+
+/*
+ * GFP_ZONE_BAD is a bitmap for all combination of __GFP_DMA, __GFP_DMA32
+ * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
+ * entry starting with bit 0. Bit is set if the combination is not
+ * allowed.
+ */
+#define GFP_ZONE_BAD ( \
+	1 << (__GFP_DMA | __GFP_HIGHMEM)				\
+	| 1 << (__GFP_DMA | __GFP_DMA32)				\
+	| 1 << (__GFP_DMA32 | __GFP_HIGHMEM)				\
+	| 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)		\
+	| 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA)		\
+	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA)		\
+	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM)		\
+	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\
+)
+
+static inline enum zone_type gfp_zone(gfp_t flags)
+{
+	enum zone_type z;
+	int bit = flags & GFP_ZONEMASK;
+
+	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
+					 ((1 << ZONES_SHIFT) - 1);
+
+	if (__builtin_constant_p(bit))
+		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+	else {
+#ifdef CONFIG_DEBUG_VM
+		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 #endif
-	return ZONE_NORMAL;
+	}
+	return z;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From bc75d33f0fc1d56e734db1f56d3cfc8097b8e0cf Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 16 Jun 2009 15:32:48 -0700
Subject: page-allocator: clean up functions related to pages_min

Change the names of two functions. It doesn't affect behavior.

Presently, setup_per_zone_pages_min() changes low, high of zone as well as
min.  So a better name is setup_per_zone_wmarks().  That's because Mel
changed zone->pages_[hig/low/min] to zone->watermark array in "page
allocator: replace the watermark-related union in struct zone with a
watermark[] array".

 * setup_per_zone_pages_min => setup_per_zone_wmarks

Of course, we have to change init_per_zone_pages_min, too.  There are not
pages_min any more.

 * init_per_zone_pages_min => init_per_zone_wmark_min

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  2 +-
 mm/memory_hotplug.c |  2 +-
 mm/page_alloc.c     | 17 +++++++++--------
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c80dbd4a62c..6e11cda1ba0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1052,7 +1052,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn);
 extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
-extern void setup_per_zone_pages_min(void);
+extern void setup_per_zone_wmarks(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c083cf5fd6d..037291e15b2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -422,7 +422,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 
-	setup_per_zone_pages_min();
+	setup_per_zone_wmarks();
 	if (onlined_pages) {
 		kswapd_run(zone_to_nid(zone));
 		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b09488d0f5..8629c84fd9b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4396,12 +4396,13 @@ static void setup_per_zone_lowmem_reserve(void)
 }
 
 /**
- * setup_per_zone_pages_min - called when min_free_kbytes changes.
+ * setup_per_zone_wmarks - called when min_free_kbytes changes
+ * or when memory is hot-added
  *
- * Ensures that the pages_{min,low,high} values for each zone are set correctly
- * with respect to min_free_kbytes.
+ * Ensures that the watermark[min,low,high] values for each zone are set
+ * correctly with respect to min_free_kbytes.
  */
-void setup_per_zone_pages_min(void)
+void setup_per_zone_wmarks(void)
 {
 	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
 	unsigned long lowmem_pages = 0;
@@ -4519,7 +4520,7 @@ static void __init setup_per_zone_inactive_ratio(void)
  * 8192MB:	11584k
  * 16384MB:	16384k
  */
-static int __init init_per_zone_pages_min(void)
+static int __init init_per_zone_wmark_min(void)
 {
 	unsigned long lowmem_kbytes;
 
@@ -4530,12 +4531,12 @@ static int __init init_per_zone_pages_min(void)
 		min_free_kbytes = 128;
 	if (min_free_kbytes > 65536)
 		min_free_kbytes = 65536;
-	setup_per_zone_pages_min();
+	setup_per_zone_wmarks();
 	setup_per_zone_lowmem_reserve();
 	setup_per_zone_inactive_ratio();
 	return 0;
 }
-module_init(init_per_zone_pages_min)
+module_init(init_per_zone_wmark_min)
 
 /*
  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so 
@@ -4547,7 +4548,7 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
 {
 	proc_dointvec(table, write, file, buffer, length, ppos);
 	if (write)
-		setup_per_zone_pages_min();
+		setup_per_zone_wmarks();
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 96cb4df5ddf5e6d5785b5acd4003e3689b87f896 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 16 Jun 2009 15:32:49 -0700
Subject: page-allocator: add inactive ratio calculation function of each zone

Factor the per-zone arithemetic inside setup_per_zone_inactive_ratio()'s
loop into a a separate function, calculate_zone_inactive_ratio().  This
function will be used in a later patch

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/page_alloc.c    | 28 ++++++++++++++++------------
 2 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e11cda1ba0..f0473a88ca2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1053,6 +1053,7 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
 extern void setup_per_zone_wmarks(void);
+extern void calculate_zone_inactive_ratio(struct zone *zone);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8629c84fd9b..303607f1d32 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4478,22 +4478,26 @@ void setup_per_zone_wmarks(void)
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-static void __init setup_per_zone_inactive_ratio(void)
+void calculate_zone_inactive_ratio(struct zone *zone)
 {
-	struct zone *zone;
+	unsigned int gb, ratio;
 
-	for_each_zone(zone) {
-		unsigned int gb, ratio;
+	/* Zone size in gigabytes */
+	gb = zone->present_pages >> (30 - PAGE_SHIFT);
+	if (gb)
+		ratio = int_sqrt(10 * gb);
+	else
+		ratio = 1;
 
-		/* Zone size in gigabytes */
-		gb = zone->present_pages >> (30 - PAGE_SHIFT);
-		if (gb)
-			ratio = int_sqrt(10 * gb);
-		else
-			ratio = 1;
+	zone->inactive_ratio = ratio;
+}
 
-		zone->inactive_ratio = ratio;
-	}
+static void __init setup_per_zone_inactive_ratio(void)
+{
+	struct zone *zone;
+
+	for_each_zone(zone)
+		calculate_zone_inactive_ratio(zone);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 6837765963f1723e80ca97b1fae660f3a60d77df Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 16 Jun 2009 15:32:51 -0700
Subject: mm: remove CONFIG_UNEVICTABLE_LRU config option

Currently, nobody wants to turn UNEVICTABLE_LRU off.  Thus this
configurability is unnecessary.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andi Kleen <andi@firstfloor.org>
Acked-by: Minchan Kim <minchan.kim@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c        |  4 ----
 fs/proc/meminfo.c          |  4 ----
 fs/proc/page.c             |  2 --
 include/linux/mmzone.h     | 13 -------------
 include/linux/page-flags.h | 16 +---------------
 include/linux/pagemap.h    | 12 ------------
 include/linux/rmap.h       |  7 -------
 include/linux/swap.h       | 19 -------------------
 include/linux/vmstat.h     |  2 --
 kernel/sysctl.c            |  2 --
 mm/Kconfig                 | 14 +-------------
 mm/internal.h              |  6 ------
 mm/mlock.c                 | 22 ----------------------
 mm/page_alloc.c            |  9 ---------
 mm/rmap.c                  |  3 +--
 mm/vmscan.c                | 17 -----------------
 mm/vmstat.c                |  4 ----
 17 files changed, 3 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 40b809742a1..91d4087b403 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -72,10 +72,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d Inactive(anon): %8lu kB\n"
 		       "Node %d Active(file):   %8lu kB\n"
 		       "Node %d Inactive(file): %8lu kB\n"
-#ifdef CONFIG_UNEVICTABLE_LRU
 		       "Node %d Unevictable:    %8lu kB\n"
 		       "Node %d Mlocked:        %8lu kB\n"
-#endif
 #ifdef CONFIG_HIGHMEM
 		       "Node %d HighTotal:      %8lu kB\n"
 		       "Node %d HighFree:       %8lu kB\n"
@@ -105,10 +103,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
 		       nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
 		       nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
-#ifdef CONFIG_UNEVICTABLE_LRU
 		       nid, K(node_page_state(nid, NR_UNEVICTABLE)),
 		       nid, K(node_page_state(nid, NR_MLOCK)),
-#endif
 #ifdef CONFIG_HIGHMEM
 		       nid, K(i.totalhigh),
 		       nid, K(i.freehigh),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index c6b0302af4c..d5c410d47fa 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -64,10 +64,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Inactive(anon): %8lu kB\n"
 		"Active(file):   %8lu kB\n"
 		"Inactive(file): %8lu kB\n"
-#ifdef CONFIG_UNEVICTABLE_LRU
 		"Unevictable:    %8lu kB\n"
 		"Mlocked:        %8lu kB\n"
-#endif
 #ifdef CONFIG_HIGHMEM
 		"HighTotal:      %8lu kB\n"
 		"HighFree:       %8lu kB\n"
@@ -109,10 +107,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(pages[LRU_INACTIVE_ANON]),
 		K(pages[LRU_ACTIVE_FILE]),
 		K(pages[LRU_INACTIVE_FILE]),
-#ifdef CONFIG_UNEVICTABLE_LRU
 		K(pages[LRU_UNEVICTABLE]),
 		K(global_page_state(NR_MLOCK)),
-#endif
 #ifdef CONFIG_HIGHMEM
 		K(i.totalhigh),
 		K(i.freehigh),
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 9d926bd279a..2707c6c7a20 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -172,10 +172,8 @@ static u64 get_uflags(struct page *page)
 	u |= kpf_copy_bit(k, KPF_SWAPCACHE,	PG_swapcache);
 	u |= kpf_copy_bit(k, KPF_SWAPBACKED,	PG_swapbacked);
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 	u |= kpf_copy_bit(k, KPF_UNEVICTABLE,	PG_unevictable);
 	u |= kpf_copy_bit(k, KPF_MLOCKED,	PG_mlocked);
-#endif
 
 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 	u |= kpf_copy_bit(k, KPF_UNCACHED,	PG_uncached);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index db976b9f879..88959853737 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -83,13 +83,8 @@ enum zone_stat_item {
 	NR_ACTIVE_ANON,		/*  "     "     "   "       "         */
 	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
 	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
-#ifdef CONFIG_UNEVICTABLE_LRU
 	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
-#else
-	NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */
-	NR_MLOCK = NR_ACTIVE_FILE,
-#endif
 	NR_ANON_PAGES,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
@@ -132,11 +127,7 @@ enum lru_list {
 	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
 	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
 	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
-#ifdef CONFIG_UNEVICTABLE_LRU
 	LRU_UNEVICTABLE,
-#else
-	LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */
-#endif
 	NR_LRU_LISTS
 };
 
@@ -156,11 +147,7 @@ static inline int is_active_lru(enum lru_list l)
 
 static inline int is_unevictable_lru(enum lru_list l)
 {
-#ifdef CONFIG_UNEVICTABLE_LRU
 	return (l == LRU_UNEVICTABLE);
-#else
-	return 0;
-#endif
 }
 
 enum zone_watermarks {
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 62214c7d2d9..d6792f88a17 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -95,9 +95,7 @@ enum pageflags {
 	PG_reclaim,		/* To be reclaimed asap */
 	PG_buddy,		/* Page is free, on buddy lists */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
-#ifdef CONFIG_UNEVICTABLE_LRU
 	PG_unevictable,		/* Page is "unevictable"  */
-#endif
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 	PG_mlocked,		/* Page is vma mlocked */
 #endif
@@ -248,14 +246,8 @@ PAGEFLAG_FALSE(SwapCache)
 	SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 	TESTCLEARFLAG(Unevictable, unevictable)
-#else
-PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
-	SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
-	__CLEARPAGEFLAG_NOOP(Unevictable)
-#endif
 
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 #define MLOCK_PAGES 1
@@ -382,12 +374,6 @@ static inline void __ClearPageTail(struct page *page)
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
-#ifdef CONFIG_UNEVICTABLE_LRU
-#define __PG_UNEVICTABLE	(1 << PG_unevictable)
-#else
-#define __PG_UNEVICTABLE	0
-#endif
-
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 #define __PG_MLOCKED		(1 << PG_mlocked)
 #else
@@ -403,7 +389,7 @@ static inline void __ClearPageTail(struct page *page)
 	 1 << PG_private | 1 << PG_private_2 | \
 	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
-	 __PG_UNEVICTABLE | __PG_MLOCKED)
+	 1 << PG_unevictable | __PG_MLOCKED)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 34da5230faa..aec3252afcf 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -22,9 +22,7 @@ enum mapping_flags {
 	AS_EIO		= __GFP_BITS_SHIFT + 0,	/* IO error on async write */
 	AS_ENOSPC	= __GFP_BITS_SHIFT + 1,	/* ENOSPC on async write */
 	AS_MM_ALL_LOCKS	= __GFP_BITS_SHIFT + 2,	/* under mm_take_all_locks() */
-#ifdef CONFIG_UNEVICTABLE_LRU
 	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
-#endif
 };
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -37,8 +35,6 @@ static inline void mapping_set_error(struct address_space *mapping, int error)
 	}
 }
 
-#ifdef CONFIG_UNEVICTABLE_LRU
-
 static inline void mapping_set_unevictable(struct address_space *mapping)
 {
 	set_bit(AS_UNEVICTABLE, &mapping->flags);
@@ -55,14 +51,6 @@ static inline int mapping_unevictable(struct address_space *mapping)
 		return test_bit(AS_UNEVICTABLE, &mapping->flags);
 	return !!mapping;
 }
-#else
-static inline void mapping_set_unevictable(struct address_space *mapping) { }
-static inline void mapping_clear_unevictable(struct address_space *mapping) { }
-static inline int mapping_unevictable(struct address_space *mapping)
-{
-	return 0;
-}
-#endif
 
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b35bc0e19cd..619379a1dd9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -105,18 +105,11 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
  */
 int page_mkclean(struct page *);
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 /*
  * called in munlock()/munmap() path to check for other vmas holding
  * the page mlocked.
  */
 int try_to_munlock(struct page *);
-#else
-static inline int try_to_munlock(struct page *page)
-{
-	return 0;	/* a.k.a. SWAP_SUCCESS */
-}
-#endif
 
 #else	/* !CONFIG_MMU */
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d476aad3ff5..f30c06908f0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -235,7 +235,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 }
 #endif
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
 extern void scan_mapping_unevictable_pages(struct address_space *);
 
@@ -244,24 +243,6 @@ extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
 					void __user *, size_t *, loff_t *);
 extern int scan_unevictable_register_node(struct node *node);
 extern void scan_unevictable_unregister_node(struct node *node);
-#else
-static inline int page_evictable(struct page *page,
-						struct vm_area_struct *vma)
-{
-	return 1;
-}
-
-static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-}
-
-static inline int scan_unevictable_register_node(struct node *node)
-{
-	return 0;
-}
-
-static inline void scan_unevictable_unregister_node(struct node *node) { }
-#endif
 
 extern int kswapd_run(int nid);
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 524cd1b28ec..ff4696c6dce 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -41,7 +41,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_HUGETLB_PAGE
 		HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
 #endif
-#ifdef CONFIG_UNEVICTABLE_LRU
 		UNEVICTABLE_PGCULLED,	/* culled to noreclaim list */
 		UNEVICTABLE_PGSCANNED,	/* scanned for reclaimability */
 		UNEVICTABLE_PGRESCUED,	/* rescued from noreclaim list */
@@ -50,7 +49,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		UNEVICTABLE_PGCLEARED,	/* on COW, page truncate */
 		UNEVICTABLE_PGSTRANDED,	/* unable to isolate on unlock */
 		UNEVICTABLE_MLOCKFREED,
-#endif
 		NR_VM_EVENT_ITEMS
 };
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0e51a35a448..2ccee08f92f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1325,7 +1325,6 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-#ifdef CONFIG_UNEVICTABLE_LRU
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "scan_unevictable_pages",
@@ -1334,7 +1333,6 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &scan_unevictable_handler,
 	},
-#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/Kconfig b/mm/Kconfig
index 71830ba7b98..97d2c88b745 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -203,25 +203,13 @@ config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
 
-config UNEVICTABLE_LRU
-	bool "Add LRU list to track non-evictable pages"
-	default y
-	help
-	  Keeps unevictable pages off of the active and inactive pageout
-	  lists, so kswapd will not waste CPU time or have its balancing
-	  algorithms thrown off by scanning these pages.  Selecting this
-	  will use one page flag and increase the code size a little,
-	  say Y unless you know what you are doing.
-
-	  See Documentation/vm/unevictable-lru.txt for more information.
-
 config HAVE_MLOCK
 	bool
 	default y if MMU=y
 
 config HAVE_MLOCKED_PAGE_BIT
 	bool
-	default y if HAVE_MLOCK=y && UNEVICTABLE_LRU=y
+	default y if HAVE_MLOCK=y
 
 config MMU_NOTIFIER
 	bool
diff --git a/mm/internal.h b/mm/internal.h
index b4ac332e807..f02c7508068 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -73,7 +73,6 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
 }
 #endif
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 /*
  * unevictable_migrate_page() called only from migrate_page_copy() to
  * migrate unevictable flag to new page.
@@ -85,11 +84,6 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old)
 	if (TestClearPageUnevictable(old))
 		SetPageUnevictable(new);
 }
-#else
-static inline void unevictable_migrate_page(struct page *new, struct page *old)
-{
-}
-#endif
 
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 /*
diff --git a/mm/mlock.c b/mm/mlock.c
index ac130433c7d..45eb650b965 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -31,7 +31,6 @@ int can_do_mlock(void)
 }
 EXPORT_SYMBOL(can_do_mlock);
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 /*
  * Mlocked pages are marked with PageMlocked() flag for efficient testing
  * in vmscan and, possibly, the fault path; and to support semi-accurate
@@ -261,27 +260,6 @@ static int __mlock_posix_error_return(long retval)
 	return retval;
 }
 
-#else /* CONFIG_UNEVICTABLE_LRU */
-
-/*
- * Just make pages present if VM_LOCKED.  No-op if unlocking.
- */
-static long __mlock_vma_pages_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end,
-				   int mlock)
-{
-	if (mlock && (vma->vm_flags & VM_LOCKED))
-		return make_pages_present(start, end);
-	return 0;
-}
-
-static inline int __mlock_posix_error_return(long retval)
-{
-	return 0;
-}
-
-#endif /* CONFIG_UNEVICTABLE_LRU */
-
 /**
  * mlock_vma_pages_range() - mlock pages in specified vma range.
  * @vma - the vma containing the specfied address range
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 00e293734fc..c95a77cd581 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2077,19 +2077,14 @@ void show_free_areas(void)
 
 	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
 		" inactive_file:%lu"
-//TODO:  check/adjust line lengths
-#ifdef CONFIG_UNEVICTABLE_LRU
 		" unevictable:%lu"
-#endif
 		" dirty:%lu writeback:%lu unstable:%lu\n"
 		" free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
 		global_page_state(NR_INACTIVE_FILE),
-#ifdef CONFIG_UNEVICTABLE_LRU
 		global_page_state(NR_UNEVICTABLE),
-#endif
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
 		global_page_state(NR_UNSTABLE_NFS),
@@ -2113,9 +2108,7 @@ void show_free_areas(void)
 			" inactive_anon:%lukB"
 			" active_file:%lukB"
 			" inactive_file:%lukB"
-#ifdef CONFIG_UNEVICTABLE_LRU
 			" unevictable:%lukB"
-#endif
 			" present:%lukB"
 			" pages_scanned:%lu"
 			" all_unreclaimable? %s"
@@ -2129,9 +2122,7 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_INACTIVE_ANON)),
 			K(zone_page_state(zone, NR_ACTIVE_FILE)),
 			K(zone_page_state(zone, NR_INACTIVE_FILE)),
-#ifdef CONFIG_UNEVICTABLE_LRU
 			K(zone_page_state(zone, NR_UNEVICTABLE)),
-#endif
 			K(zone->present_pages),
 			zone->pages_scanned,
 			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
diff --git a/mm/rmap.c b/mm/rmap.c
index 23122af3261..316c9d6930a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1202,7 +1202,6 @@ int try_to_unmap(struct page *page, int migration)
 	return ret;
 }
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 /**
  * try_to_munlock - try to munlock a page
  * @page: the page to be munlocked
@@ -1226,4 +1225,4 @@ int try_to_munlock(struct page *page)
 	else
 		return try_to_unmap_file(page, 1, 0);
 }
-#endif
+
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 879d034930c..2c4b945b011 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -514,7 +514,6 @@ int remove_mapping(struct address_space *mapping, struct page *page)
  *
  * lru_lock must not be held, interrupts must be enabled.
  */
-#ifdef CONFIG_UNEVICTABLE_LRU
 void putback_lru_page(struct page *page)
 {
 	int lru;
@@ -568,20 +567,6 @@ redo:
 	put_page(page);		/* drop ref from isolate */
 }
 
-#else /* CONFIG_UNEVICTABLE_LRU */
-
-void putback_lru_page(struct page *page)
-{
-	int lru;
-	VM_BUG_ON(PageLRU(page));
-
-	lru = !!TestClearPageActive(page) + page_is_file_cache(page);
-	lru_cache_add_lru(page, lru);
-	put_page(page);
-}
-#endif /* CONFIG_UNEVICTABLE_LRU */
-
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -2470,7 +2455,6 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 }
 #endif
 
-#ifdef CONFIG_UNEVICTABLE_LRU
 /*
  * page_evictable - test whether a page is evictable
  * @page: the page to test
@@ -2717,4 +2701,3 @@ void scan_unevictable_unregister_node(struct node *node)
 	sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
 }
 
-#endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1e151cf6bf8..1e3aa8139f2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -629,10 +629,8 @@ static const char * const vmstat_text[] = {
 	"nr_active_anon",
 	"nr_inactive_file",
 	"nr_active_file",
-#ifdef CONFIG_UNEVICTABLE_LRU
 	"nr_unevictable",
 	"nr_mlock",
-#endif
 	"nr_anon_pages",
 	"nr_mapped",
 	"nr_file_pages",
@@ -687,7 +685,6 @@ static const char * const vmstat_text[] = {
 	"htlb_buddy_alloc_success",
 	"htlb_buddy_alloc_fail",
 #endif
-#ifdef CONFIG_UNEVICTABLE_LRU
 	"unevictable_pgs_culled",
 	"unevictable_pgs_scanned",
 	"unevictable_pgs_rescued",
@@ -697,7 +694,6 @@ static const char * const vmstat_text[] = {
 	"unevictable_pgs_stranded",
 	"unevictable_pgs_mlockfreed",
 #endif
-#endif
 };
 
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
-- 
cgit v1.2.3-70-g09d2


From cb4b86ba47bb0937b71fb825b3ed88adf7a190f0 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 16 Jun 2009 15:32:52 -0700
Subject: mm: add swap cache interface for swap reference

In a following patch, the usage of swap cache is recorded into swap_map.
This patch is for necessary interface changes to do that.

2 interfaces:

  - swapcache_prepare()
  - swapcache_free()

are added for allocating/freeing refcnt from swap-cache to existing swap
entries.  But implementation itself is not changed under this patch.  At
adding swapcache_free(), memcg's hook code is moved under
swapcache_free().  This is better than using scattered hooks.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  7 +++++++
 mm/shmem.c           |  2 +-
 mm/swap_state.c      | 11 +++++------
 mm/swapfile.c        | 19 +++++++++++++++++++
 mm/vmscan.c          |  3 +--
 5 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f30c06908f0..259e96c150e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -282,8 +282,10 @@ extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int swap_duplicate(swp_entry_t);
+extern int swapcache_prepare(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
+extern void swapcache_free(swp_entry_t, struct page *page);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
@@ -352,11 +354,16 @@ static inline void show_swap_cache_info(void)
 
 #define free_swap_and_cache(swp)	is_migration_entry(swp)
 #define swap_duplicate(swp)		is_migration_entry(swp)
+#define swapcache_prepare(swp)		is_migration_entry(swp)
 
 static inline void swap_free(swp_entry_t swp)
 {
 }
 
+static inline void swapcache_free(swp_entry_t swp, struct page *page)
+{
+}
+
 static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr)
 {
diff --git a/mm/shmem.c b/mm/shmem.c
index 0132fbd45a2..47ab1918228 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1097,7 +1097,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	shmem_swp_unmap(entry);
 unlock:
 	spin_unlock(&info->lock);
-	swap_free(swap);
+	swapcache_free(swap, NULL);
 redirty:
 	set_page_dirty(page);
 	if (wbc->for_reclaim)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 1416e7e9e02..19bdf3017a9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -162,11 +162,11 @@ int add_to_swap(struct page *page)
 			return 1;
 		case -EEXIST:
 			/* Raced with "speculative" read_swap_cache_async */
-			swap_free(entry);
+			swapcache_free(entry, NULL);
 			continue;
 		default:
 			/* -ENOMEM radix-tree allocation failure */
-			swap_free(entry);
+			swapcache_free(entry, NULL);
 			return 0;
 		}
 	}
@@ -188,8 +188,7 @@ void delete_from_swap_cache(struct page *page)
 	__delete_from_swap_cache(page);
 	spin_unlock_irq(&swapper_space.tree_lock);
 
-	mem_cgroup_uncharge_swapcache(page, entry);
-	swap_free(entry);
+	swapcache_free(entry, page);
 	page_cache_release(page);
 }
 
@@ -293,7 +292,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		/*
 		 * Swap entry may have been freed since our caller observed it.
 		 */
-		if (!swap_duplicate(entry))
+		if (!swapcache_prepare(entry))
 			break;
 
 		/*
@@ -317,7 +316,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		}
 		ClearPageSwapBacked(new_page);
 		__clear_page_locked(new_page);
-		swap_free(entry);
+		swapcache_free(entry, NULL);
 	} while (err != -ENOMEM);
 
 	if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 312fafe0ab6..3187079903f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -509,6 +509,16 @@ void swap_free(swp_entry_t entry)
 	}
 }
 
+/*
+ * Called after dropping swapcache to decrease refcnt to swap entries.
+ */
+void swapcache_free(swp_entry_t entry, struct page *page)
+{
+	if (page)
+		mem_cgroup_uncharge_swapcache(page, entry);
+	return swap_free(entry);
+}
+
 /*
  * How many references to page are currently swapped out?
  */
@@ -1979,6 +1989,15 @@ bad_file:
 	goto out;
 }
 
+/*
+ * Called when allocating swap cache for exising swap entry,
+ */
+int swapcache_prepare(swp_entry_t entry)
+{
+	return swap_duplicate(entry);
+}
+
+
 struct swap_info_struct *
 get_swap_info_struct(unsigned type)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2c4b945b011..52339dd7bf8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,8 +470,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
 		swp_entry_t swap = { .val = page_private(page) };
 		__delete_from_swap_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
-		mem_cgroup_uncharge_swapcache(page, swap);
-		swap_free(swap);
+		swapcache_free(swap, page);
 	} else {
 		__remove_from_page_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
-- 
cgit v1.2.3-70-g09d2


From 355cfa73ddff2fb8fa14e93bd94a057cc022512e Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 16 Jun 2009 15:32:53 -0700
Subject: mm: modify swap_map and add SWAP_HAS_CACHE flag

This is a part of the patches for fixing memcg's swap accountinf leak.
But, IMHO, not a bad patch even if no memcg.

There are 2 kinds of references to swap.
 - reference from swap entry
 - reference from swap cache

Then,

 - If there is swap cache && swap's refcnt is 1, there is only swap cache.
  (*) swapcount(entry) == 1 && find_get_page(swapper_space, entry) != NULL

This counting logic have worked well for a long time.  But considering
that we cannot know there is a _real_ reference or not by swap_map[],
current usage of counter is not very good.

This patch adds a flag SWAP_HAS_CACHE and recored information that a swap
entry has a cache or not.  This will remove -1 magic used in swapfile.c
and be a help to avoid unnecessary find_get_page().

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  14 ++--
 mm/swap_state.c      |   5 +-
 mm/swapfile.c        | 214 ++++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 172 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 259e96c150e..fed5e8e1104 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -129,9 +129,10 @@ enum {
 
 #define SWAP_CLUSTER_MAX 32
 
-#define SWAP_MAP_MAX	0x7fff
-#define SWAP_MAP_BAD	0x8000
-
+#define SWAP_MAP_MAX	0x7ffe
+#define SWAP_MAP_BAD	0x7fff
+#define SWAP_HAS_CACHE  0x8000		/* There is a swap cache of entry. */
+#define SWAP_COUNT_MASK (~SWAP_HAS_CACHE)
 /*
  * The in-memory structure used to track swap areas.
  */
@@ -281,7 +282,7 @@ extern long total_swap_pages;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
-extern int swap_duplicate(swp_entry_t);
+extern void swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
@@ -353,9 +354,12 @@ static inline void show_swap_cache_info(void)
 }
 
 #define free_swap_and_cache(swp)	is_migration_entry(swp)
-#define swap_duplicate(swp)		is_migration_entry(swp)
 #define swapcache_prepare(swp)		is_migration_entry(swp)
 
+static inline void swap_duplicate(swp_entry_t swp)
+{
+}
+
 static inline void swap_free(swp_entry_t swp)
 {
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 19bdf3017a9..b9ca029673a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -292,7 +292,10 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		/*
 		 * Swap entry may have been freed since our caller observed it.
 		 */
-		if (!swapcache_prepare(entry))
+		err = swapcache_prepare(entry);
+		if (err == -EEXIST) /* seems racy */
+			continue;
+		if (err)           /* swp entry is obsolete ? */
 			break;
 
 		/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 3187079903f..0d7296971ad 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -53,6 +53,33 @@ static struct swap_info_struct swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
+/* For reference count accounting in swap_map */
+/* enum for swap_map[] handling. internal use only */
+enum {
+	SWAP_MAP = 0,	/* ops for reference from swap users */
+	SWAP_CACHE,	/* ops for reference from swap cache */
+};
+
+static inline int swap_count(unsigned short ent)
+{
+	return ent & SWAP_COUNT_MASK;
+}
+
+static inline bool swap_has_cache(unsigned short ent)
+{
+	return !!(ent & SWAP_HAS_CACHE);
+}
+
+static inline unsigned short encode_swapmap(int count, bool has_cache)
+{
+	unsigned short ret = count;
+
+	if (has_cache)
+		return SWAP_HAS_CACHE | ret;
+	return ret;
+}
+
+
 /*
  * We need this because the bdev->unplug_fn can sleep and we cannot
  * hold swap_lock while calling the unplug_fn. And swap_lock
@@ -167,7 +194,8 @@ static int wait_for_discard(void *word)
 #define SWAPFILE_CLUSTER	256
 #define LATENCY_LIMIT		256
 
-static inline unsigned long scan_swap_map(struct swap_info_struct *si)
+static inline unsigned long scan_swap_map(struct swap_info_struct *si,
+					  int cache)
 {
 	unsigned long offset;
 	unsigned long scan_base;
@@ -285,7 +313,10 @@ checks:
 		si->lowest_bit = si->max;
 		si->highest_bit = 0;
 	}
-	si->swap_map[offset] = 1;
+	if (cache == SWAP_CACHE) /* at usual swap-out via vmscan.c */
+		si->swap_map[offset] = encode_swapmap(0, true);
+	else /* at suspend */
+		si->swap_map[offset] = encode_swapmap(1, false);
 	si->cluster_next = offset + 1;
 	si->flags -= SWP_SCANNING;
 
@@ -401,7 +432,8 @@ swp_entry_t get_swap_page(void)
 			continue;
 
 		swap_list.next = next;
-		offset = scan_swap_map(si);
+		/* This is called for allocating swap entry for cache */
+		offset = scan_swap_map(si, SWAP_CACHE);
 		if (offset) {
 			spin_unlock(&swap_lock);
 			return swp_entry(type, offset);
@@ -415,6 +447,7 @@ noswap:
 	return (swp_entry_t) {0};
 }
 
+/* The only caller of this function is now susupend routine */
 swp_entry_t get_swap_page_of_type(int type)
 {
 	struct swap_info_struct *si;
@@ -424,7 +457,8 @@ swp_entry_t get_swap_page_of_type(int type)
 	si = swap_info + type;
 	if (si->flags & SWP_WRITEOK) {
 		nr_swap_pages--;
-		offset = scan_swap_map(si);
+		/* This is called for allocating swap entry, not cache */
+		offset = scan_swap_map(si, SWAP_MAP);
 		if (offset) {
 			spin_unlock(&swap_lock);
 			return swp_entry(type, offset);
@@ -471,25 +505,38 @@ out:
 	return NULL;
 }
 
-static int swap_entry_free(struct swap_info_struct *p, swp_entry_t ent)
+static int swap_entry_free(struct swap_info_struct *p,
+			   swp_entry_t ent, int cache)
 {
 	unsigned long offset = swp_offset(ent);
-	int count = p->swap_map[offset];
-
-	if (count < SWAP_MAP_MAX) {
-		count--;
-		p->swap_map[offset] = count;
-		if (!count) {
-			if (offset < p->lowest_bit)
-				p->lowest_bit = offset;
-			if (offset > p->highest_bit)
-				p->highest_bit = offset;
-			if (p->prio > swap_info[swap_list.next].prio)
-				swap_list.next = p - swap_info;
-			nr_swap_pages++;
-			p->inuse_pages--;
-			mem_cgroup_uncharge_swap(ent);
+	int count = swap_count(p->swap_map[offset]);
+	bool has_cache;
+
+	has_cache = swap_has_cache(p->swap_map[offset]);
+
+	if (cache == SWAP_MAP) { /* dropping usage count of swap */
+		if (count < SWAP_MAP_MAX) {
+			count--;
+			p->swap_map[offset] = encode_swapmap(count, has_cache);
 		}
+	} else { /* dropping swap cache flag */
+		VM_BUG_ON(!has_cache);
+		p->swap_map[offset] = encode_swapmap(count, false);
+
+	}
+	/* return code. */
+	count = p->swap_map[offset];
+	/* free if no reference */
+	if (!count) {
+		if (offset < p->lowest_bit)
+			p->lowest_bit = offset;
+		if (offset > p->highest_bit)
+			p->highest_bit = offset;
+		if (p->prio > swap_info[swap_list.next].prio)
+			swap_list.next = p - swap_info;
+		nr_swap_pages++;
+		p->inuse_pages--;
+		mem_cgroup_uncharge_swap(ent);
 	}
 	return count;
 }
@@ -504,7 +551,7 @@ void swap_free(swp_entry_t entry)
 
 	p = swap_info_get(entry);
 	if (p) {
-		swap_entry_free(p, entry);
+		swap_entry_free(p, entry, SWAP_MAP);
 		spin_unlock(&swap_lock);
 	}
 }
@@ -514,9 +561,16 @@ void swap_free(swp_entry_t entry)
  */
 void swapcache_free(swp_entry_t entry, struct page *page)
 {
+	struct swap_info_struct *p;
+
 	if (page)
 		mem_cgroup_uncharge_swapcache(page, entry);
-	return swap_free(entry);
+	p = swap_info_get(entry);
+	if (p) {
+		swap_entry_free(p, entry, SWAP_CACHE);
+		spin_unlock(&swap_lock);
+	}
+	return;
 }
 
 /*
@@ -531,8 +585,7 @@ static inline int page_swapcount(struct page *page)
 	entry.val = page_private(page);
 	p = swap_info_get(entry);
 	if (p) {
-		/* Subtract the 1 for the swap cache itself */
-		count = p->swap_map[swp_offset(entry)] - 1;
+		count = swap_count(p->swap_map[swp_offset(entry)]);
 		spin_unlock(&swap_lock);
 	}
 	return count;
@@ -594,7 +647,7 @@ int free_swap_and_cache(swp_entry_t entry)
 
 	p = swap_info_get(entry);
 	if (p) {
-		if (swap_entry_free(p, entry) == 1) {
+		if (swap_entry_free(p, entry, SWAP_MAP) == SWAP_HAS_CACHE) {
 			page = find_get_page(&swapper_space, entry.val);
 			if (page && !trylock_page(page)) {
 				page_cache_release(page);
@@ -901,7 +954,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 			i = 1;
 		}
 		count = si->swap_map[i];
-		if (count && count != SWAP_MAP_BAD)
+		if (count && swap_count(count) != SWAP_MAP_BAD)
 			break;
 	}
 	return i;
@@ -1005,13 +1058,13 @@ static int try_to_unuse(unsigned int type)
 		 */
 		shmem = 0;
 		swcount = *swap_map;
-		if (swcount > 1) {
+		if (swap_count(swcount)) {
 			if (start_mm == &init_mm)
 				shmem = shmem_unuse(entry, page);
 			else
 				retval = unuse_mm(start_mm, entry, page);
 		}
-		if (*swap_map > 1) {
+		if (swap_count(*swap_map)) {
 			int set_start_mm = (*swap_map >= swcount);
 			struct list_head *p = &start_mm->mmlist;
 			struct mm_struct *new_start_mm = start_mm;
@@ -1021,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
 			atomic_inc(&new_start_mm->mm_users);
 			atomic_inc(&prev_mm->mm_users);
 			spin_lock(&mmlist_lock);
-			while (*swap_map > 1 && !retval && !shmem &&
+			while (swap_count(*swap_map) && !retval && !shmem &&
 					(p = p->next) != &start_mm->mmlist) {
 				mm = list_entry(p, struct mm_struct, mmlist);
 				if (!atomic_inc_not_zero(&mm->mm_users))
@@ -1033,14 +1086,16 @@ static int try_to_unuse(unsigned int type)
 				cond_resched();
 
 				swcount = *swap_map;
-				if (swcount <= 1)
+				if (!swap_count(swcount)) /* any usage ? */
 					;
 				else if (mm == &init_mm) {
 					set_start_mm = 1;
 					shmem = shmem_unuse(entry, page);
 				} else
 					retval = unuse_mm(mm, entry, page);
-				if (set_start_mm && *swap_map < swcount) {
+
+				if (set_start_mm &&
+				    swap_count(*swap_map) < swcount) {
 					mmput(new_start_mm);
 					atomic_inc(&mm->mm_users);
 					new_start_mm = mm;
@@ -1067,21 +1122,25 @@ static int try_to_unuse(unsigned int type)
 		}
 
 		/*
-		 * How could swap count reach 0x7fff when the maximum
-		 * pid is 0x7fff, and there's no way to repeat a swap
-		 * page within an mm (except in shmem, where it's the
-		 * shared object which takes the reference count)?
-		 * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
-		 *
+		 * How could swap count reach 0x7ffe ?
+		 * There's no way to repeat a swap page within an mm
+		 * (except in shmem, where it's the shared object which takes
+		 * the reference count)?
+		 * We believe SWAP_MAP_MAX cannot occur.(if occur, unsigned
+		 * short is too small....)
 		 * If that's wrong, then we should worry more about
 		 * exit_mmap() and do_munmap() cases described above:
 		 * we might be resetting SWAP_MAP_MAX too early here.
 		 * We know "Undead"s can happen, they're okay, so don't
 		 * report them; but do report if we reset SWAP_MAP_MAX.
 		 */
-		if (*swap_map == SWAP_MAP_MAX) {
+		/* We might release the lock_page() in unuse_mm(). */
+		if (!PageSwapCache(page) || page_private(page) != entry.val)
+			goto retry;
+
+		if (swap_count(*swap_map) == SWAP_MAP_MAX) {
 			spin_lock(&swap_lock);
-			*swap_map = 1;
+			*swap_map = encode_swapmap(0, true);
 			spin_unlock(&swap_lock);
 			reset_overflow = 1;
 		}
@@ -1099,7 +1158,8 @@ static int try_to_unuse(unsigned int type)
 		 * pages would be incorrect if swap supported "shared
 		 * private" pages, but they are handled by tmpfs files.
 		 */
-		if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
+		if (swap_count(*swap_map) &&
+		     PageDirty(page) && PageSwapCache(page)) {
 			struct writeback_control wbc = {
 				.sync_mode = WB_SYNC_NONE,
 			};
@@ -1126,6 +1186,7 @@ static int try_to_unuse(unsigned int type)
 		 * mark page dirty so shrink_page_list will preserve it.
 		 */
 		SetPageDirty(page);
+retry:
 		unlock_page(page);
 		page_cache_release(page);
 
@@ -1952,15 +2013,23 @@ void si_swapinfo(struct sysinfo *val)
  *
  * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
  * "permanent", but will be reclaimed by the next swapoff.
+ * Returns error code in following case.
+ * - success -> 0
+ * - swp_entry is invalid -> EINVAL
+ * - swp_entry is migration entry -> EINVAL
+ * - swap-cache reference is requested but there is already one. -> EEXIST
+ * - swap-cache reference is requested but the entry is not used. -> ENOENT
  */
-int swap_duplicate(swp_entry_t entry)
+static int __swap_duplicate(swp_entry_t entry, bool cache)
 {
 	struct swap_info_struct * p;
 	unsigned long offset, type;
-	int result = 0;
+	int result = -EINVAL;
+	int count;
+	bool has_cache;
 
 	if (is_migration_entry(entry))
-		return 1;
+		return -EINVAL;
 
 	type = swp_type(entry);
 	if (type >= nr_swapfiles)
@@ -1969,17 +2038,40 @@ int swap_duplicate(swp_entry_t entry)
 	offset = swp_offset(entry);
 
 	spin_lock(&swap_lock);
-	if (offset < p->max && p->swap_map[offset]) {
-		if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
-			p->swap_map[offset]++;
-			result = 1;
-		} else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
+
+	if (unlikely(offset >= p->max))
+		goto unlock_out;
+
+	count = swap_count(p->swap_map[offset]);
+	has_cache = swap_has_cache(p->swap_map[offset]);
+
+	if (cache == SWAP_CACHE) { /* called for swapcache/swapin-readahead */
+
+		/* set SWAP_HAS_CACHE if there is no cache and entry is used */
+		if (!has_cache && count) {
+			p->swap_map[offset] = encode_swapmap(count, true);
+			result = 0;
+		} else if (has_cache) /* someone added cache */
+			result = -EEXIST;
+		else if (!count) /* no users */
+			result = -ENOENT;
+
+	} else if (count || has_cache) {
+		if (count < SWAP_MAP_MAX - 1) {
+			p->swap_map[offset] = encode_swapmap(count + 1,
+							     has_cache);
+			result = 0;
+		} else if (count <= SWAP_MAP_MAX) {
 			if (swap_overflow++ < 5)
-				printk(KERN_WARNING "swap_dup: swap entry overflow\n");
-			p->swap_map[offset] = SWAP_MAP_MAX;
-			result = 1;
+				printk(KERN_WARNING
+				       "swap_dup: swap entry overflow\n");
+			p->swap_map[offset] = encode_swapmap(SWAP_MAP_MAX,
+							      has_cache);
+			result = 0;
 		}
-	}
+	} else
+		result = -ENOENT; /* unused swap entry */
+unlock_out:
 	spin_unlock(&swap_lock);
 out:
 	return result;
@@ -1988,13 +2080,25 @@ bad_file:
 	printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
 	goto out;
 }
+/*
+ * increase reference count of swap entry by 1.
+ */
+void swap_duplicate(swp_entry_t entry)
+{
+	__swap_duplicate(entry, SWAP_MAP);
+}
 
 /*
+ * @entry: swap entry for which we allocate swap cache.
+ *
  * Called when allocating swap cache for exising swap entry,
+ * This can return error codes. Returns 0 at success.
+ * -EBUSY means there is a swap cache.
+ * Note: return code is different from swap_duplicate().
  */
 int swapcache_prepare(swp_entry_t entry)
 {
-	return swap_duplicate(entry);
+	return __swap_duplicate(entry, SWAP_CACHE);
 }
 
 
@@ -2035,7 +2139,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 		/* Don't read in free or bad pages */
 		if (!si->swap_map[toff])
 			break;
-		if (si->swap_map[toff] == SWAP_MAP_BAD)
+		if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
 			break;
 	}
 	/* Count contiguous allocated slots below our target */
@@ -2043,7 +2147,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 		/* Don't read in free or bad pages */
 		if (!si->swap_map[toff])
 			break;
-		if (si->swap_map[toff] == SWAP_MAP_BAD)
+		if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
 			break;
 	}
 	spin_unlock(&swap_lock);
-- 
cgit v1.2.3-70-g09d2


From 2ff05b2b4eac2e63d345fc731ea151a060247f53 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 16 Jun 2009 15:32:56 -0700
Subject: oom: move oom_adj value from task_struct to mm_struct

The per-task oom_adj value is a characteristic of its mm more than the
task itself since it's not possible to oom kill any thread that shares the
mm.  If a task were to be killed while attached to an mm that could not be
freed because another thread were set to OOM_DISABLE, it would have
needlessly been terminated since there is no potential for future memory
freeing.

This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct mm_struct.  This requires task_lock() on a
task to check its oom_adj value to protect against exec, but it's already
necessary to take the lock when dereferencing the mm to find the total VM
size for the badness heuristic.

This fixes a livelock if the oom killer chooses a task and another thread
sharing the same memory has an oom_adj value of OOM_DISABLE.  This occurs
because oom_kill_task() repeatedly returns 1 and refuses to kill the
chosen task while select_bad_process() will repeatedly choose the same
task during the next retry.

Taking task_lock() in select_bad_process() to check for OOM_DISABLE and in
oom_kill_task() to check for threads sharing the same memory will be
removed in the next patch in this series where it will no longer be
necessary.

Writing to /proc/pid/oom_adj for a kthread will now return -EINVAL since
these threads are immune from oom killing already.  They simply report an
oom_adj value of OOM_DISABLE.

Cc: Nick Piggin <npiggin@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 15 ++++++++++-----
 fs/proc/base.c                     | 19 ++++++++++++++++---
 include/linux/mm_types.h           |  2 ++
 include/linux/sched.h              |  1 -
 mm/oom_kill.c                      | 34 ++++++++++++++++++++++------------
 5 files changed, 50 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index cd8717a3627..ebff3c10a07 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1003,11 +1003,13 @@ CHAPTER 3: PER-PROCESS PARAMETERS
 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
 ------------------------------------------------------
 
-This file can be used to adjust the score used to select which processes
-should be killed in an  out-of-memory  situation.  Giving it a high score will
-increase the likelihood of this process being killed by the oom-killer.  Valid
-values are in the range -16 to +15, plus the special value -17, which disables
-oom-killing altogether for this process.
+This file can be used to adjust the score used to select which processes should
+be killed in an out-of-memory situation.  The oom_adj value is a characteristic
+of the task's mm, so all threads that share an mm with pid will have the same
+oom_adj value.  A high value will increase the likelihood of this process being
+killed by the oom-killer.  Valid values are in the range -16 to +15 as
+explained below and a special value of -17, which disables oom-killing
+altogether for threads sharing pid's mm.
 
 The process to be killed in an out-of-memory situation is selected among all others
 based on its badness score. This value equals the original memory size of the process
@@ -1021,6 +1023,9 @@ the parent's score if they do not share the same memory. Thus forking servers
 are the prime candidates to be killed. Having only one 'hungry' child will make
 parent less preferable than the child.
 
+/proc/<pid>/oom_adj cannot be changed for kthreads since they are immune from
+oom-killing already.
+
 /proc/<pid>/oom_score shows process' current badness score.
 
 The following heuristics are then applied:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1539e630c47..3ce5ae9e3d2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1006,7 +1006,12 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 
 	if (!task)
 		return -ESRCH;
-	oom_adjust = task->oomkilladj;
+	task_lock(task);
+	if (task->mm)
+		oom_adjust = task->mm->oom_adj;
+	else
+		oom_adjust = OOM_DISABLE;
+	task_unlock(task);
 	put_task_struct(task);
 
 	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1035,11 +1040,19 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
-	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+	task_lock(task);
+	if (!task->mm) {
+		task_unlock(task);
+		put_task_struct(task);
+		return -EINVAL;
+	}
+	if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
+		task_unlock(task);
 		put_task_struct(task);
 		return -EACCES;
 	}
-	task->oomkilladj = oom_adjust;
+	task->mm->oom_adj = oom_adjust;
+	task_unlock(task);
 	put_task_struct(task);
 	if (end - buffer == 0)
 		return -EIO;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e80e26ecf2..f4408106fcb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -232,6 +232,8 @@ struct mm_struct {
 
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+	s8 oom_adj;	/* OOM kill score adjustment (bit shift) */
+
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1048bf50540..1bc6fae0c13 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1178,7 +1178,6 @@ struct task_struct {
 	 * a short time
 	 */
 	unsigned char fpu_counter;
-	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a7b2460e922..b60913520ef 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,6 +58,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	unsigned long points, cpu_time, run_time;
 	struct mm_struct *mm;
 	struct task_struct *child;
+	int oom_adj;
 
 	task_lock(p);
 	mm = p->mm;
@@ -65,6 +66,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		task_unlock(p);
 		return 0;
 	}
+	oom_adj = mm->oom_adj;
 
 	/*
 	 * The memory size of the process is the basis for the badness.
@@ -148,15 +150,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		points /= 8;
 
 	/*
-	 * Adjust the score by oomkilladj.
+	 * Adjust the score by oom_adj.
 	 */
-	if (p->oomkilladj) {
-		if (p->oomkilladj > 0) {
+	if (oom_adj) {
+		if (oom_adj > 0) {
 			if (!points)
 				points = 1;
-			points <<= p->oomkilladj;
+			points <<= oom_adj;
 		} else
-			points >>= -(p->oomkilladj);
+			points >>= -(oom_adj);
 	}
 
 #ifdef DEBUG
@@ -251,8 +253,12 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 			*ppoints = ULONG_MAX;
 		}
 
-		if (p->oomkilladj == OOM_DISABLE)
+		task_lock(p);
+		if (p->mm && p->mm->oom_adj == OOM_DISABLE) {
+			task_unlock(p);
 			continue;
+		}
+		task_unlock(p);
 
 		points = badness(p, uptime.tv_sec);
 		if (points > *ppoints || !chosen) {
@@ -304,8 +310,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
 		}
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
 		       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
-		       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
-		       p->comm);
+		       get_mm_rss(mm), (int)task_cpu(p), mm->oom_adj, p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
 }
@@ -367,8 +372,12 @@ static int oom_kill_task(struct task_struct *p)
 	 * Don't kill the process if any threads are set to OOM_DISABLE
 	 */
 	do_each_thread(g, q) {
-		if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
+		task_lock(q);
+		if (q->mm == mm && q->mm && q->mm->oom_adj == OOM_DISABLE) {
+			task_unlock(q);
 			return 1;
+		}
+		task_unlock(q);
 	} while_each_thread(g, q);
 
 	__oom_kill_task(p, 1);
@@ -393,10 +402,11 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	struct task_struct *c;
 
 	if (printk_ratelimit()) {
-		printk(KERN_WARNING "%s invoked oom-killer: "
-			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
-			current->comm, gfp_mask, order, current->oomkilladj);
 		task_lock(current);
+		printk(KERN_WARNING "%s invoked oom-killer: "
+			"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
+			current->comm, gfp_mask, order,
+			current->mm ? current->mm->oom_adj : OOM_DISABLE);
 		cpuset_print_task_mems_allowed(current);
 		task_unlock(current);
 		dump_stack();
-- 
cgit v1.2.3-70-g09d2


From 286973552f051404abdb58dd9b2f8f7558efe4e5 Mon Sep 17 00:00:00 2001
From: Mike Waychison <mikew@google.com>
Date: Tue, 16 Jun 2009 15:32:59 -0700
Subject: mm: remove __invalidate_mapping_pages variant

Remove __invalidate_mapping_pages atomic variant now that its sole caller
can sleep (fixed in eccb95cee4f0d56faa46ef22fb94dd4a3578d3eb ("vfs: fix
lock inversion in drop_pagecache_sb()")).

This fixes softlockups that can occur while in the drop_caches path.

Signed-off-by: Mike Waychison <mikew@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/drop_caches.c   |  2 +-
 include/linux/fs.h |  3 ---
 mm/truncate.c      | 39 ++++++++++++++++-----------------------
 3 files changed, 17 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index b6a719a909f..a2edb791344 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -24,7 +24,7 @@ static void drop_pagecache_sb(struct super_block *sb)
 			continue;
 		__iget(inode);
 		spin_unlock(&inode_lock);
-		__invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
+		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 		iput(toput_inode);
 		toput_inode = inode;
 		spin_lock(&inode_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8146e0264ef..f5ae9f19b8a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2036,9 +2036,6 @@ extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
 #endif
 extern int invalidate_inodes(struct super_block *);
-unsigned long __invalidate_mapping_pages(struct address_space *mapping,
-					pgoff_t start, pgoff_t end,
-					bool be_atomic);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index 12e1579f916..ccc3ecf7cb9 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -267,8 +267,21 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 }
 EXPORT_SYMBOL(truncate_inode_pages);
 
-unsigned long __invalidate_mapping_pages(struct address_space *mapping,
-				pgoff_t start, pgoff_t end, bool be_atomic)
+/**
+ * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
+ * @mapping: the address_space which holds the pages to invalidate
+ * @start: the offset 'from' which to invalidate
+ * @end: the offset 'to' which to invalidate (inclusive)
+ *
+ * This function only removes the unlocked pages, if you want to
+ * remove all the pages of one inode, you must call truncate_inode_pages.
+ *
+ * invalidate_mapping_pages() will not block on IO activity. It will not
+ * invalidate pages which are dirty, locked, under writeback or mapped into
+ * pagetables.
+ */
+unsigned long invalidate_mapping_pages(struct address_space *mapping,
+				       pgoff_t start, pgoff_t end)
 {
 	struct pagevec pvec;
 	pgoff_t next = start;
@@ -309,30 +322,10 @@ unlock:
 				break;
 		}
 		pagevec_release(&pvec);
-		if (likely(!be_atomic))
-			cond_resched();
+		cond_resched();
 	}
 	return ret;
 }
-
-/**
- * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
- * @mapping: the address_space which holds the pages to invalidate
- * @start: the offset 'from' which to invalidate
- * @end: the offset 'to' which to invalidate (inclusive)
- *
- * This function only removes the unlocked pages, if you want to
- * remove all the pages of one inode, you must call truncate_inode_pages.
- *
- * invalidate_mapping_pages() will not block on IO activity. It will not
- * invalidate pages which are dirty, locked, under writeback or mapped into
- * pagetables.
- */
-unsigned long invalidate_mapping_pages(struct address_space *mapping,
-				pgoff_t start, pgoff_t end)
-{
-	return __invalidate_mapping_pages(mapping, start, end, false);
-}
 EXPORT_SYMBOL(invalidate_mapping_pages);
 
 /*
-- 
cgit v1.2.3-70-g09d2


From aca8bf323edd31ad462dc98c107c23a5c6022ca2 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 16 Jun 2009 15:33:02 -0700
Subject: mm: remove file argument from swap_readpage()

The file argument resulted from address_space's readpage long time ago.

We don't use it any more.  Let's remove unnecessary argement.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 +-
 mm/page_io.c         | 2 +-
 mm/swap_state.c      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index fed5e8e1104..0cedf31af0b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -256,7 +256,7 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
 
 #ifdef CONFIG_SWAP
 /* linux/mm/page_io.c */
-extern int swap_readpage(struct file *, struct page *);
+extern int swap_readpage(struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern void end_swap_bio_read(struct bio *bio, int err);
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 3023c475e04..c6f3e5071de 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -120,7 +120,7 @@ out:
 	return ret;
 }
 
-int swap_readpage(struct file *file, struct page *page)
+int swap_readpage(struct page *page)
 {
 	struct bio *bio;
 	int ret = 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b62e7f56051..42cd38eba79 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -313,7 +313,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			 * Initiate read into locked page and return.
 			 */
 			lru_cache_add_anon(new_page);
-			swap_readpage(NULL, new_page);
+			swap_readpage(new_page);
 			return new_page;
 		}
 		ClearPageSwapBacked(new_page);
-- 
cgit v1.2.3-70-g09d2


From 168f5ac668f63dfb64439766e3ef9e866b83719d Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyfox@inbox.ru>
Date: Tue, 16 Jun 2009 15:33:02 -0700
Subject: mm cleanup: shmem_file_setup: 'char *' -> 'const char *' for name
 argument

As function shmem_file_setup does not modify/allocate/free/pass given
filename - mark it as const.

Signed-off-by: Sergei Trofimovich <slyfox@inbox.ru>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 mm/shmem.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0473a88ca2..d88d6fc530a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -724,7 +724,7 @@ static inline int shmem_lock(struct file *file, int lock,
 	return 0;
 }
 #endif
-struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
 
 int shmem_zero_setup(struct vm_area_struct *);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 47ab1918228..e89d7ec18ed 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2612,7 +2612,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
  * @size: size to be set for the file
  * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
  */
-struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags)
 {
 	int error;
 	struct file *file;
-- 
cgit v1.2.3-70-g09d2


From 6fe6b7e35785e3232ffe7f81d3893f1316710a02 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Jun 2009 15:33:05 -0700
Subject: vmscan: report vm_flags in page_referenced()

Collect vma->vm_flags of the VMAs that actually referenced the page.

This is preparing for more informed reclaim heuristics, eg.  to protect
executable file pages more aggressively.  For now only the VM_EXEC bit
will be used by the caller.

Thanks to Johannes, Peter and Minchan for all the good tips.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  5 +++--
 mm/rmap.c            | 37 ++++++++++++++++++++++++++-----------
 mm/vmscan.c          |  7 +++++--
 3 files changed, 34 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 619379a1dd9..216d024f830 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -83,7 +83,8 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma,
 /*
  * Called from mm/vmscan.c to handle paging out
  */
-int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt);
+int page_referenced(struct page *, int is_locked,
+			struct mem_cgroup *cnt, unsigned long *vm_flags);
 int try_to_unmap(struct page *, int ignore_refs);
 
 /*
@@ -117,7 +118,7 @@ int try_to_munlock(struct page *);
 #define anon_vma_prepare(vma)	(0)
 #define anon_vma_link(vma)	do {} while (0)
 
-#define page_referenced(page,l,cnt) TestClearPageReferenced(page)
+#define page_referenced(page, locked, cnt, flags) TestClearPageReferenced(page)
 #define try_to_unmap(page, refs) SWAP_FAIL
 
 static inline int page_mkclean(struct page *page)
diff --git a/mm/rmap.c b/mm/rmap.c
index 316c9d6930a..c9ccc1a72dc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -333,7 +333,9 @@ static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
  * repeatedly from either page_referenced_anon or page_referenced_file.
  */
 static int page_referenced_one(struct page *page,
-	struct vm_area_struct *vma, unsigned int *mapcount)
+			       struct vm_area_struct *vma,
+			       unsigned int *mapcount,
+			       unsigned long *vm_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -381,11 +383,14 @@ out_unmap:
 	(*mapcount)--;
 	pte_unmap_unlock(pte, ptl);
 out:
+	if (referenced)
+		*vm_flags |= vma->vm_flags;
 	return referenced;
 }
 
 static int page_referenced_anon(struct page *page,
-				struct mem_cgroup *mem_cont)
+				struct mem_cgroup *mem_cont,
+				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
 	struct anon_vma *anon_vma;
@@ -405,7 +410,8 @@ static int page_referenced_anon(struct page *page,
 		 */
 		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
-		referenced += page_referenced_one(page, vma, &mapcount);
+		referenced += page_referenced_one(page, vma,
+						  &mapcount, vm_flags);
 		if (!mapcount)
 			break;
 	}
@@ -418,6 +424,7 @@ static int page_referenced_anon(struct page *page,
  * page_referenced_file - referenced check for object-based rmap
  * @page: the page we're checking references on.
  * @mem_cont: target memory controller
+ * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * For an object-based mapped page, find all the places it is mapped and
  * check/clear the referenced flag.  This is done by following the page->mapping
@@ -427,7 +434,8 @@ static int page_referenced_anon(struct page *page,
  * This function is only called from page_referenced for object-based pages.
  */
 static int page_referenced_file(struct page *page,
-				struct mem_cgroup *mem_cont)
+				struct mem_cgroup *mem_cont,
+				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
 	struct address_space *mapping = page->mapping;
@@ -467,7 +475,8 @@ static int page_referenced_file(struct page *page,
 		 */
 		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
-		referenced += page_referenced_one(page, vma, &mapcount);
+		referenced += page_referenced_one(page, vma,
+						  &mapcount, vm_flags);
 		if (!mapcount)
 			break;
 	}
@@ -481,29 +490,35 @@ static int page_referenced_file(struct page *page,
  * @page: the page to test
  * @is_locked: caller holds lock on the page
  * @mem_cont: target memory controller
+ * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * Quick test_and_clear_referenced for all mappings to a page,
  * returns the number of ptes which referenced the page.
  */
-int page_referenced(struct page *page, int is_locked,
-			struct mem_cgroup *mem_cont)
+int page_referenced(struct page *page,
+		    int is_locked,
+		    struct mem_cgroup *mem_cont,
+		    unsigned long *vm_flags)
 {
 	int referenced = 0;
 
 	if (TestClearPageReferenced(page))
 		referenced++;
 
+	*vm_flags = 0;
 	if (page_mapped(page) && page->mapping) {
 		if (PageAnon(page))
-			referenced += page_referenced_anon(page, mem_cont);
+			referenced += page_referenced_anon(page, mem_cont,
+								vm_flags);
 		else if (is_locked)
-			referenced += page_referenced_file(page, mem_cont);
+			referenced += page_referenced_file(page, mem_cont,
+								vm_flags);
 		else if (!trylock_page(page))
 			referenced++;
 		else {
 			if (page->mapping)
-				referenced +=
-					page_referenced_file(page, mem_cont);
+				referenced += page_referenced_file(page,
+							mem_cont, vm_flags);
 			unlock_page(page);
 		}
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 52339dd7bf8..6be2068f61c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -577,6 +577,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 	struct pagevec freed_pvec;
 	int pgactivate = 0;
 	unsigned long nr_reclaimed = 0;
+	unsigned long vm_flags;
 
 	cond_resched();
 
@@ -627,7 +628,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto keep_locked;
 		}
 
-		referenced = page_referenced(page, 1, sc->mem_cgroup);
+		referenced = page_referenced(page, 1,
+						sc->mem_cgroup, &vm_flags);
 		/* In active use or really unfreeable?  Activate it. */
 		if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
 					referenced && page_mapping_inuse(page))
@@ -1208,6 +1210,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 {
 	unsigned long pgmoved;
 	unsigned long pgscanned;
+	unsigned long vm_flags;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
 	LIST_HEAD(l_inactive);
 	struct page *page;
@@ -1248,7 +1251,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 
 		/* page_referenced clears PageReferenced */
 		if (page_mapping_inuse(page) &&
-		    page_referenced(page, 0, sc->mem_cgroup))
+		    page_referenced(page, 0, sc->mem_cgroup, &vm_flags))
 			pgmoved++;
 
 		list_add(&page->lru, &l_inactive);
-- 
cgit v1.2.3-70-g09d2


From 24cf72518c79cdcda486ed26074ff8151291cf65 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 16 Jun 2009 15:33:23 -0700
Subject: vmscan: count the number of times zone_reclaim() scans and fails

On NUMA machines, the administrator can configure zone_reclaim_mode that
is a more targetted form of direct reclaim.  On machines with large NUMA
distances for example, a zone_reclaim_mode defaults to 1 meaning that
clean unmapped pages will be reclaimed if the zone watermarks are not
being met.

There is a heuristic that determines if the scan is worthwhile but it is
possible that the heuristic will fail and the CPU gets tied up scanning
uselessly.  Detecting the situation requires some guesswork and
experimentation so this patch adds a counter "zreclaim_failed" to
/proc/vmstat.  If during high CPU utilisation this counter is increasing
rapidly, then the resolution to the problem may be to set
/proc/sys/vm/zone_reclaim_mode to 0.

[akpm@linux-foundation.org: name things consistently]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 3 +++
 mm/vmscan.c            | 3 +++
 mm/vmstat.c            | 3 +++
 3 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ff4696c6dce..81a97cf8f0a 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -36,6 +36,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGSTEAL),
 		FOR_ALL_ZONES(PGSCAN_KSWAPD),
 		FOR_ALL_ZONES(PGSCAN_DIRECT),
+#ifdef CONFIG_NUMA
+		PGSCAN_ZONE_RECLAIM_FAILED,
+#endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 16c82a868e2..3018ad75613 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2519,6 +2519,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	ret = __zone_reclaim(zone, gfp_mask, order);
 	zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
 
+	if (!ret)
+		count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
+
 	return ret;
 }
 #endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1e3aa8139f2..138bed53706 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -673,6 +673,9 @@ static const char * const vmstat_text[] = {
 	TEXTS_FOR_ZONES("pgscan_kswapd")
 	TEXTS_FOR_ZONES("pgscan_direct")
 
+#ifdef CONFIG_NUMA
+	"zone_reclaim_failed",
+#endif
 	"pginodesteal",
 	"slabs_scanned",
 	"kswapd_steal",
-- 
cgit v1.2.3-70-g09d2


From a7d932af06e8eee2163627d19898e18da5635449 Mon Sep 17 00:00:00 2001
From: Dan Smith <danms@us.ibm.com>
Date: Tue, 16 Jun 2009 15:33:33 -0700
Subject: utsname.h: make new_utsname fields use the proper length constant

The members of the new_utsname structure are defined with magic numbers
that *should* correspond to the constant __NEW_UTS_LEN+1.  Everywhere
else, code assumes this and uses the constant, so this patch makes the
structure match.

Originally suggested by Serge here:

https://lists.linux-foundation.org/pipermail/containers/2009-March/016258.html

Signed-off-by: Dan Smith <danms@us.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/utsname.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 11232676bff..3656b300de3 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -22,12 +22,12 @@ struct old_utsname {
 };
 
 struct new_utsname {
-	char sysname[65];
-	char nodename[65];
-	char release[65];
-	char version[65];
-	char machine[65];
-	char domainname[65];
+	char sysname[__NEW_UTS_LEN + 1];
+	char nodename[__NEW_UTS_LEN + 1];
+	char release[__NEW_UTS_LEN + 1];
+	char version[__NEW_UTS_LEN + 1];
+	char machine[__NEW_UTS_LEN + 1];
+	char domainname[__NEW_UTS_LEN + 1];
 };
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3-70-g09d2


From 4938d7e0233a455f04507bac81d0886c71529537 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 16 Jun 2009 15:33:36 -0700
Subject: poll: avoid extra wakeups in select/poll

After introduction of keyed wakeups Davide Libenzi did on epoll, we are
able to avoid spurious wakeups in poll()/select() code too.

For example, typical use of poll()/select() is to wait for incoming
network frames on many sockets.  But TX completion for UDP/TCP frames call
sock_wfree() which in turn schedules thread.

When scheduled, thread does a full scan of all polled fds and can sleep
again, because nothing is really available.  If number of fds is large,
this cause significant load.

This patch makes select()/poll() aware of keyed wakeups and useless
wakeups are avoided.  This reduces number of context switches by about 50%
on some setups, and work performed by sofirq handlers.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/select.c          | 40 ++++++++++++++++++++++++++++++++++++----
 include/linux/poll.h |  3 +++
 2 files changed, 39 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/select.c b/fs/select.c
index 0fe0e1469df..d870237e42c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -168,7 +168,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 	return table->entry++;
 }
 
-static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	struct poll_wqueues *pwq = wait->private;
 	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -194,6 +194,16 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	return default_wake_function(&dummy_wait, mode, sync, key);
 }
 
+static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct poll_table_entry *entry;
+
+	entry = container_of(wait, struct poll_table_entry, wait);
+	if (key && !((unsigned long)key & entry->key))
+		return 0;
+	return __pollwake(wait, mode, sync, key);
+}
+
 /* Add a new entry */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 				poll_table *p)
@@ -205,6 +215,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 	get_file(filp);
 	entry->filp = filp;
 	entry->wait_address = wait_address;
+	entry->key = p->key;
 	init_waitqueue_func_entry(&entry->wait, pollwake);
 	entry->wait.private = pwq;
 	add_wait_queue(wait_address, &entry->wait);
@@ -362,6 +373,18 @@ get_max:
 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 #define POLLEX_SET (POLLPRI)
 
+static inline void wait_key_set(poll_table *wait, unsigned long in,
+				unsigned long out, unsigned long bit)
+{
+	if (wait) {
+		wait->key = POLLEX_SET;
+		if (in & bit)
+			wait->key |= POLLIN_SET;
+		if (out & bit)
+			wait->key |= POLLOUT_SET;
+	}
+}
+
 int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 {
 	ktime_t expire, *to = NULL;
@@ -418,20 +441,25 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 				if (file) {
 					f_op = file->f_op;
 					mask = DEFAULT_POLLMASK;
-					if (f_op && f_op->poll)
-						mask = (*f_op->poll)(file, retval ? NULL : wait);
+					if (f_op && f_op->poll) {
+						wait_key_set(wait, in, out, bit);
+						mask = (*f_op->poll)(file, wait);
+					}
 					fput_light(file, fput_needed);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
 						retval++;
+						wait = NULL;
 					}
 					if ((mask & POLLOUT_SET) && (out & bit)) {
 						res_out |= bit;
 						retval++;
+						wait = NULL;
 					}
 					if ((mask & POLLEX_SET) && (ex & bit)) {
 						res_ex |= bit;
 						retval++;
+						wait = NULL;
 					}
 				}
 			}
@@ -685,8 +713,12 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
 		mask = POLLNVAL;
 		if (file != NULL) {
 			mask = DEFAULT_POLLMASK;
-			if (file->f_op && file->f_op->poll)
+			if (file->f_op && file->f_op->poll) {
+				if (pwait)
+					pwait->key = pollfd->events |
+							POLLERR | POLLHUP;
 				mask = file->f_op->poll(file, pwait);
+			}
 			/* Mask out unneeded events. */
 			mask &= pollfd->events | POLLERR | POLLHUP;
 			fput_light(file, fput_needed);
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 8c24ef8d997..fa287f25138 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -32,6 +32,7 @@ typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_
 
 typedef struct poll_table_struct {
 	poll_queue_proc qproc;
+	unsigned long key;
 } poll_table;
 
 static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
@@ -43,10 +44,12 @@ static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_addres
 static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 {
 	pt->qproc = qproc;
+	pt->key   = ~0UL; /* all events enabled */
 }
 
 struct poll_table_entry {
 	struct file *filp;
+	unsigned long key;
 	wait_queue_t wait;
 	wait_queue_head_t *wait_address;
 };
-- 
cgit v1.2.3-70-g09d2


From 0d9c25dde878a636ee9a9b53923569171bf9a55b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 16 Jun 2009 15:33:37 -0700
Subject: headers: move module_bug_finalize()/module_bug_cleanup() definitions
 into module.h

They're in linux/bug.h at present, which causes include order tangles.  In
particular, linux/bug.h cannot be used by linux/atomic.h because,
according to Nikanth:

linux/bug.h pulls in linux/module.h => linux/spinlock.h => asm/spinlock.h
(which uses atomic_inc) => asm/atomic.h.

bug.h is a pretty low-level thing and module.h is a higher-level thing,
IMO.

Cc: Nikanth Karthikesan <knikanth@novell.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h    | 12 ------------
 include/linux/module.h | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 54398d2c6d8..d276b5510c8 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_BUG_H
 #define _LINUX_BUG_H
 
-#include <linux/module.h>
 #include <asm/bug.h>
 
 enum bug_trap_type {
@@ -24,10 +23,6 @@ const struct bug_entry *find_bug(unsigned long bugaddr);
 
 enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs);
 
-int  module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
-			 struct module *);
-void module_bug_cleanup(struct module *);
-
 /* These are defined by the architecture */
 int is_valid_bugaddr(unsigned long addr);
 
@@ -38,13 +33,6 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
 {
 	return BUG_TRAP_TYPE_BUG;
 }
-static inline int  module_bug_finalize(const Elf_Ehdr *hdr,
-					const Elf_Shdr *sechdrs,
-					struct module *mod)
-{
-	return 0;
-}
-static inline void module_bug_cleanup(struct module *mod) {}
 
 #endif	/* CONFIG_GENERIC_BUG */
 #endif	/* _LINUX_BUG_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index a7bc6e7b43a..505f20dcc1c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -697,4 +697,21 @@ static inline void module_remove_modinfo_attrs(struct module *mod)
 
 #define __MODULE_STRING(x) __stringify(x)
 
+
+#ifdef CONFIG_GENERIC_BUG
+int  module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
+			 struct module *);
+void module_bug_cleanup(struct module *);
+
+#else	/* !CONFIG_GENERIC_BUG */
+
+static inline int  module_bug_finalize(const Elf_Ehdr *hdr,
+					const Elf_Shdr *sechdrs,
+					struct module *mod)
+{
+	return 0;
+}
+static inline void module_bug_cleanup(struct module *mod) {}
+#endif	/* CONFIG_GENERIC_BUG */
+
 #endif /* _LINUX_MODULE_H */
-- 
cgit v1.2.3-70-g09d2


From 10fc89d01a7ea2ecc2a58d2f4e7700f47178cd62 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 16 Jun 2009 15:33:38 -0700
Subject: drbd: add major number to major.h

Since we have had a LANANA major number for years, and it is documented in
devices.txt, I think that this first patch can go upstream.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/major.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/major.h b/include/linux/major.h
index 058ec15dd06..6a8ca98c9a9 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -145,6 +145,7 @@
 #define UNIX98_PTY_MAJOR_COUNT	8
 #define UNIX98_PTY_SLAVE_MAJOR	(UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
 
+#define DRBD_MAJOR		147
 #define RTF_MAJOR		150
 #define RAW_MAJOR		162
 
-- 
cgit v1.2.3-70-g09d2


From 8b0b1db0133e4218a9b45c09e53793c039edebe1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 16 Jun 2009 15:33:39 -0700
Subject: remove put_cpu_no_resched()

put_cpu_no_resched() is an optimization of put_cpu() which unfortunately
can cause high latencies.

The nfs iostats code uses put_cpu_no_resched() in a code sequence where a
reschedule request caused by an interrupt between the get_cpu() and the
put_cpu_no_resched() can delay the reschedule for at least HZ.

The other users of put_cpu_no_resched() optimize correctly in interrupt
code, but there is no real harm in using the put_cpu() function which is
an alias for preempt_enable().  The extra check of the preemmpt count is
not as critical as the potential source of missing a reschedule.

Debugged in the preempt-rt tree and verified in mainline.

Impact: remove a high latency source

[akpm@linux-foundation.org: build fix]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c | 2 +-
 fs/nfs/iostat.h            | 6 +++---
 include/linux/smp.h        | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 8a06dc48059..bdc176cb5e8 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -5595,7 +5595,7 @@ pfm_interrupt_handler(int irq, void *arg)
 		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
 	}
 
-	put_cpu_no_resched();
+	put_cpu();
 	return IRQ_HANDLED;
 }
 
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index a2ab2529b5c..ceda50aad73 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -31,7 +31,7 @@ static inline void nfs_inc_server_stats(const struct nfs_server *server,
 	cpu = get_cpu();
 	iostats = per_cpu_ptr(server->io_stats, cpu);
 	iostats->events[stat]++;
-	put_cpu_no_resched();
+	put_cpu();
 }
 
 static inline void nfs_inc_stats(const struct inode *inode,
@@ -50,7 +50,7 @@ static inline void nfs_add_server_stats(const struct nfs_server *server,
 	cpu = get_cpu();
 	iostats = per_cpu_ptr(server->io_stats, cpu);
 	iostats->bytes[stat] += addend;
-	put_cpu_no_resched();
+	put_cpu();
 }
 
 static inline void nfs_add_stats(const struct inode *inode,
@@ -71,7 +71,7 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
 	cpu = get_cpu();
 	iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
 	iostats->fscache[stat] += addend;
-	put_cpu_no_resched();
+	put_cpu();
 }
 #endif
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index a69db820eed..9e3d8af0920 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -177,7 +177,6 @@ static inline void init_call_single_data(void)
 
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
 #define put_cpu()		preempt_enable()
-#define put_cpu_no_resched()	preempt_enable_no_resched()
 
 /*
  * Callback to arch code if there's nosmp or maxcpus=0 on the
-- 
cgit v1.2.3-70-g09d2


From e4c9dd0fbad60c098a026e9b06d9de1bc98c5e89 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 16 Jun 2009 15:33:47 -0700
Subject: kmap_types: make most arches use generic header file

Convert most arches to use asm-generic/kmap_types.h.

Move the KM_FENCE_ macro additions into asm-generic/kmap_types.h,
controlled by __WITH_KM_FENCE from each arch's kmap_types.h file.

Would be nice to be able to add custom KM_types per arch, but I don't yet
see a nice, clean way to do that.

Built on x86_64, i386, mips, sparc, alpha(tonyb), powerpc(tonyb), and
68k(tonyb).

Note: avr32 should be able to remove KM_PTE2 (since it's not used) and
then just use the generic kmap_types.h file.  Get avr32 maintainer
approval.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: <linux-arch@vger.kernel.org>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: "Luck Tony" <tony.luck@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/kmap_types.h      | 24 +++---------------------
 arch/blackfin/include/asm/kmap_types.h   | 17 +----------------
 arch/cris/include/asm/kmap_types.h       | 17 +----------------
 arch/h8300/include/asm/kmap_types.h      | 17 +----------------
 arch/ia64/include/asm/kmap_types.h       | 24 +++---------------------
 arch/m32r/include/asm/kmap_types.h       | 23 +++--------------------
 arch/m68k/include/asm/kmap_types.h       | 17 +----------------
 arch/microblaze/include/asm/kmap_types.h | 25 +------------------------
 arch/mips/include/asm/kmap_types.h       | 24 +++---------------------
 arch/mn10300/include/asm/kmap_types.h    | 27 +--------------------------
 arch/parisc/include/asm/kmap_types.h     | 24 +++---------------------
 arch/s390/include/asm/kmap_types.h       | 17 +----------------
 arch/sh/include/asm/kmap_types.h         | 24 +++---------------------
 arch/sparc/include/asm/kmap_types.h      | 17 +----------------
 arch/x86/include/asm/kmap_types.h        | 23 +++--------------------
 arch/xtensa/include/asm/kmap_types.h     | 27 +--------------------------
 include/asm-generic/kmap_types.h         |  2 +-
 17 files changed, 31 insertions(+), 318 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/kmap_types.h b/arch/alpha/include/asm/kmap_types.h
index 3e6735a34c5..a8d4ec8ea4b 100644
--- a/arch/alpha/include/asm/kmap_types.h
+++ b/arch/alpha/include/asm/kmap_types.h
@@ -3,30 +3,12 @@
 
 /* Dummy header just to define km_type. */
 
-
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif
diff --git a/arch/blackfin/include/asm/kmap_types.h b/arch/blackfin/include/asm/kmap_types.h
index e215f710497..0a88622339e 100644
--- a/arch/blackfin/include/asm/kmap_types.h
+++ b/arch/blackfin/include/asm/kmap_types.h
@@ -1,21 +1,6 @@
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif
diff --git a/arch/cris/include/asm/kmap_types.h b/arch/cris/include/asm/kmap_types.h
index 492988cb907..d2d643c4ea5 100644
--- a/arch/cris/include/asm/kmap_types.h
+++ b/arch/cris/include/asm/kmap_types.h
@@ -5,21 +5,6 @@
  * is actually used on cris. 
  */
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif
diff --git a/arch/h8300/include/asm/kmap_types.h b/arch/h8300/include/asm/kmap_types.h
index 1ec8a342712..be12a716011 100644
--- a/arch/h8300/include/asm/kmap_types.h
+++ b/arch/h8300/include/asm/kmap_types.h
@@ -1,21 +1,6 @@
 #ifndef _ASM_H8300_KMAP_TYPES_H
 #define _ASM_H8300_KMAP_TYPES_H
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif
diff --git a/arch/ia64/include/asm/kmap_types.h b/arch/ia64/include/asm/kmap_types.h
index 5d1658aa2b3..05d5f999610 100644
--- a/arch/ia64/include/asm/kmap_types.h
+++ b/arch/ia64/include/asm/kmap_types.h
@@ -1,30 +1,12 @@
 #ifndef _ASM_IA64_KMAP_TYPES_H
 #define _ASM_IA64_KMAP_TYPES_H
 
-
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif /* _ASM_IA64_KMAP_TYPES_H */
diff --git a/arch/m32r/include/asm/kmap_types.h b/arch/m32r/include/asm/kmap_types.h
index fa94dc6410e..4cdb5e3a06b 100644
--- a/arch/m32r/include/asm/kmap_types.h
+++ b/arch/m32r/include/asm/kmap_types.h
@@ -2,28 +2,11 @@
 #define __M32R_KMAP_TYPES_H
 
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif /* __M32R_KMAP_TYPES_H */
diff --git a/arch/m68k/include/asm/kmap_types.h b/arch/m68k/include/asm/kmap_types.h
index c843c63d380..3413cc1390e 100644
--- a/arch/m68k/include/asm/kmap_types.h
+++ b/arch/m68k/include/asm/kmap_types.h
@@ -1,21 +1,6 @@
 #ifndef __ASM_M68K_KMAP_TYPES_H
 #define __ASM_M68K_KMAP_TYPES_H
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif	/* __ASM_M68K_KMAP_TYPES_H */
diff --git a/arch/microblaze/include/asm/kmap_types.h b/arch/microblaze/include/asm/kmap_types.h
index 4d7e222f5dd..25975252d83 100644
--- a/arch/microblaze/include/asm/kmap_types.h
+++ b/arch/microblaze/include/asm/kmap_types.h
@@ -1,29 +1,6 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
 #ifndef _ASM_MICROBLAZE_KMAP_TYPES_H
 #define _ASM_MICROBLAZE_KMAP_TYPES_H
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR,
-};
+#include <asm-generic/kmap_types.h>
 
 #endif /* _ASM_MICROBLAZE_KMAP_TYPES_H */
diff --git a/arch/mips/include/asm/kmap_types.h b/arch/mips/include/asm/kmap_types.h
index 806aae3c533..58e91ed0388 100644
--- a/arch/mips/include/asm/kmap_types.h
+++ b/arch/mips/include/asm/kmap_types.h
@@ -1,30 +1,12 @@
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif
diff --git a/arch/mn10300/include/asm/kmap_types.h b/arch/mn10300/include/asm/kmap_types.h
index 3398f9f3560..76d093b58d4 100644
--- a/arch/mn10300/include/asm/kmap_types.h
+++ b/arch/mn10300/include/asm/kmap_types.h
@@ -1,31 +1,6 @@
-/* MN10300 kmap_atomic() slot IDs
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif /* _ASM_KMAP_TYPES_H */
diff --git a/arch/parisc/include/asm/kmap_types.h b/arch/parisc/include/asm/kmap_types.h
index 806aae3c533..58e91ed0388 100644
--- a/arch/parisc/include/asm/kmap_types.h
+++ b/arch/parisc/include/asm/kmap_types.h
@@ -1,30 +1,12 @@
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif
diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h
index fd157464822..94ec3ee0798 100644
--- a/arch/s390/include/asm/kmap_types.h
+++ b/arch/s390/include/asm/kmap_types.h
@@ -2,22 +2,7 @@
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,	
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif
 #endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/kmap_types.h b/arch/sh/include/asm/kmap_types.h
index 84d565c696b..5962b08b6dd 100644
--- a/arch/sh/include/asm/kmap_types.h
+++ b/arch/sh/include/asm/kmap_types.h
@@ -3,30 +3,12 @@
 
 /* Dummy header just to define km_type. */
 
-
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif
diff --git a/arch/sparc/include/asm/kmap_types.h b/arch/sparc/include/asm/kmap_types.h
index 602f5e034f7..aad21745fbb 100644
--- a/arch/sparc/include/asm/kmap_types.h
+++ b/arch/sparc/include/asm/kmap_types.h
@@ -5,21 +5,6 @@
  * is actually used on sparc.  -DaveM
  */
 
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h
index 5759c165a5c..9e00a731a7f 100644
--- a/arch/x86/include/asm/kmap_types.h
+++ b/arch/x86/include/asm/kmap_types.h
@@ -2,28 +2,11 @@
 #define _ASM_X86_KMAP_TYPES_H
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
-# define D(n) __KM_FENCE_##n ,
-#else
-# define D(n)
+#define  __WITH_KM_FENCE
 #endif
 
-enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
-#undef D
+#undef __WITH_KM_FENCE
 
 #endif /* _ASM_X86_KMAP_TYPES_H */
diff --git a/arch/xtensa/include/asm/kmap_types.h b/arch/xtensa/include/asm/kmap_types.h
index 9e822d2e3bc..11c687e527f 100644
--- a/arch/xtensa/include/asm/kmap_types.h
+++ b/arch/xtensa/include/asm/kmap_types.h
@@ -1,31 +1,6 @@
-/*
- * include/asm-xtensa/kmap_types.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
 #ifndef _XTENSA_KMAP_TYPES_H
 #define _XTENSA_KMAP_TYPES_H
 
-enum km_type {
-  KM_BOUNCE_READ,
-  KM_SKB_SUNRPC_DATA,
-  KM_SKB_DATA_SOFTIRQ,
-  KM_USER0,
-  KM_USER1,
-  KM_BIO_SRC_IRQ,
-  KM_BIO_DST_IRQ,
-  KM_PTE0,
-  KM_PTE1,
-  KM_IRQ0,
-  KM_IRQ1,
-  KM_SOFTIRQ0,
-  KM_SOFTIRQ1,
-  KM_TYPE_NR
-};
+#include <asm-generic/kmap_types.h>
 
 #endif	/* _XTENSA_KMAP_TYPES_H */
diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h
index 58c33055c30..54e8b3d956b 100644
--- a/include/asm-generic/kmap_types.h
+++ b/include/asm-generic/kmap_types.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_GENERIC_KMAP_TYPES_H
 #define _ASM_GENERIC_KMAP_TYPES_H
 
-#ifdef CONFIG_DEBUG_HIGHMEM
+#ifdef __WITH_KM_FENCE
 # define D(n) __KM_FENCE_##n ,
 #else
 # define D(n)
-- 
cgit v1.2.3-70-g09d2


From cc6f26774136b7f5307abcd3887f08360c9b7554 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Tue, 16 Jun 2009 15:33:49 -0700
Subject: syscalls.h: remove duplicated declarations for sys_pipe2

sys_pipe2 is declared twice in include/linux/syscalls.h.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 418d90f5eff..fa4242cdade 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -434,6 +434,7 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
 asmlinkage long sys_fcntl64(unsigned int fd,
 				unsigned int cmd, unsigned long arg);
 #endif
+asmlinkage long sys_pipe(int __user *fildes);
 asmlinkage long sys_pipe2(int __user *fildes, int flags);
 asmlinkage long sys_dup(unsigned int fildes);
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd);
@@ -751,8 +752,6 @@ asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
 asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
 			  struct timespec __user *, const sigset_t __user *,
 			  size_t);
-asmlinkage long sys_pipe2(int __user *, int);
-asmlinkage long sys_pipe(int __user *);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
-- 
cgit v1.2.3-70-g09d2


From 55e331cf7ebe20665253770589cd9eb06048bf25 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Tue, 16 Jun 2009 15:33:53 -0700
Subject: drivers: add support for the TI VLYNQ bus

Add support for the TI VLYNQ high-speed, serial and packetized bus.

This bus allows external devices to be connected to the System-on-Chip and
appear in the main system memory just like any memory mapped peripheral.
It is widely used in TI's networking and multimedia SoC, including the AR7
SoC.

Signed-off-by: Eugene Konev <ejka@imfi.kspu.ru>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS            |   8 +
 drivers/Kconfig        |   2 +
 drivers/Makefile       |   1 +
 drivers/vlynq/Kconfig  |  20 ++
 drivers/vlynq/Makefile |   5 +
 drivers/vlynq/vlynq.c  | 814 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/vlynq.h  | 161 ++++++++++
 7 files changed, 1011 insertions(+)
 create mode 100644 drivers/vlynq/Kconfig
 create mode 100644 drivers/vlynq/Makefile
 create mode 100644 drivers/vlynq/vlynq.c
 create mode 100644 include/linux/vlynq.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 2cb7566904b..b735f7dc91d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6281,6 +6281,14 @@ F:	drivers/net/macvlan.c
 F:	include/linux/if_*vlan.h
 F:	net/8021q/
 
+VLYNQ BUS
+P:	Florian Fainelli
+M:	florian@openwrt.org
+L:	openwrt-devel@lists.openwrt.org
+S:	Maintained
+F:	drivers/vlynq/vlynq.c
+F:	include/linux/vlynq.h
+
 VOLTAGE AND CURRENT REGULATOR FRAMEWORK
 P:	Liam Girdwood
 M:	lrg@slimlogic.co.uk
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 00cf9553f74..a442c8f29fc 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -104,6 +104,8 @@ source "drivers/auxdisplay/Kconfig"
 
 source "drivers/uio/Kconfig"
 
+source "drivers/vlynq/Kconfig"
+
 source "drivers/xen/Kconfig"
 
 source "drivers/staging/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 9e7d4e56c85..00b44f4ccf0 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -105,6 +105,7 @@ obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_OF)		+= of/
 obj-$(CONFIG_SSB)		+= ssb/
 obj-$(CONFIG_VIRTIO)		+= virtio/
+obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
 obj-y				+= platform/
 obj-y				+= ieee802154/
diff --git a/drivers/vlynq/Kconfig b/drivers/vlynq/Kconfig
new file mode 100644
index 00000000000..f6542211db4
--- /dev/null
+++ b/drivers/vlynq/Kconfig
@@ -0,0 +1,20 @@
+menu "TI VLYNQ"
+
+config VLYNQ
+	bool "TI VLYNQ bus support"
+	depends on AR7 && EXPERIMENTAL
+	help
+	  Support for Texas Instruments(R) VLYNQ bus.
+	  The VLYNQ bus is a high-speed, serial and packetized
+	  data bus which allows external peripherals of a SoC
+	  to appear into the system's main memory.
+
+	  If unsure, say N
+
+config VLYNQ_DEBUG
+	bool "VLYNQ bus debug"
+	depends on VLYNQ && KERNEL_DEBUG
+	help
+	  Turn on VLYNQ bus debugging.
+
+endmenu
diff --git a/drivers/vlynq/Makefile b/drivers/vlynq/Makefile
new file mode 100644
index 00000000000..b3f61149b59
--- /dev/null
+++ b/drivers/vlynq/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for kernel vlynq drivers
+#
+
+obj-$(CONFIG_VLYNQ) += vlynq.o
diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c
new file mode 100644
index 00000000000..7335433b067
--- /dev/null
+++ b/drivers/vlynq/vlynq.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Parts of the VLYNQ specification can be found here:
+ * http://www.ti.com/litv/pdf/sprue36a
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <linux/vlynq.h>
+
+#define VLYNQ_CTRL_PM_ENABLE		0x80000000
+#define VLYNQ_CTRL_CLOCK_INT		0x00008000
+#define VLYNQ_CTRL_CLOCK_DIV(x)		(((x) & 7) << 16)
+#define VLYNQ_CTRL_INT_LOCAL		0x00004000
+#define VLYNQ_CTRL_INT_ENABLE		0x00002000
+#define VLYNQ_CTRL_INT_VECTOR(x)	(((x) & 0x1f) << 8)
+#define VLYNQ_CTRL_INT2CFG		0x00000080
+#define VLYNQ_CTRL_RESET		0x00000001
+
+#define VLYNQ_CTRL_CLOCK_MASK          (0x7 << 16)
+
+#define VLYNQ_INT_OFFSET		0x00000014
+#define VLYNQ_REMOTE_OFFSET		0x00000080
+
+#define VLYNQ_STATUS_LINK		0x00000001
+#define VLYNQ_STATUS_LERROR		0x00000080
+#define VLYNQ_STATUS_RERROR		0x00000100
+
+#define VINT_ENABLE			0x00000100
+#define VINT_TYPE_EDGE			0x00000080
+#define VINT_LEVEL_LOW			0x00000040
+#define VINT_VECTOR(x)			((x) & 0x1f)
+#define VINT_OFFSET(irq)		(8 * ((irq) % 4))
+
+#define VLYNQ_AUTONEGO_V2		0x00010000
+
+struct vlynq_regs {
+	u32 revision;
+	u32 control;
+	u32 status;
+	u32 int_prio;
+	u32 int_status;
+	u32 int_pending;
+	u32 int_ptr;
+	u32 tx_offset;
+	struct vlynq_mapping rx_mapping[4];
+	u32 chip;
+	u32 autonego;
+	u32 unused[6];
+	u32 int_device[8];
+};
+
+#ifdef VLYNQ_DEBUG
+static void vlynq_dump_regs(struct vlynq_device *dev)
+{
+	int i;
+
+	printk(KERN_DEBUG "VLYNQ local=%p remote=%p\n",
+			dev->local, dev->remote);
+	for (i = 0; i < 32; i++) {
+		printk(KERN_DEBUG "VLYNQ: local %d: %08x\n",
+			i + 1, ((u32 *)dev->local)[i]);
+		printk(KERN_DEBUG "VLYNQ: remote %d: %08x\n",
+			i + 1, ((u32 *)dev->remote)[i]);
+	}
+}
+
+static void vlynq_dump_mem(u32 *base, int count)
+{
+	int i;
+
+	for (i = 0; i < (count + 3) / 4; i++) {
+		if (i % 4 == 0)
+			printk(KERN_DEBUG "\nMEM[0x%04x]:", i * 4);
+		printk(KERN_DEBUG " 0x%08x", *(base + i));
+	}
+	printk(KERN_DEBUG "\n");
+}
+#endif
+
+/* Check the VLYNQ link status with a given device */
+static int vlynq_linked(struct vlynq_device *dev)
+{
+	int i;
+
+	for (i = 0; i < 100; i++)
+		if (readl(&dev->local->status) & VLYNQ_STATUS_LINK)
+			return 1;
+		else
+			cpu_relax();
+
+	return 0;
+}
+
+static void vlynq_reset(struct vlynq_device *dev)
+{
+	writel(readl(&dev->local->control) | VLYNQ_CTRL_RESET,
+			&dev->local->control);
+
+	/* Wait for the devices to finish resetting */
+	msleep(5);
+
+	/* Remove reset bit */
+	writel(readl(&dev->local->control) & ~VLYNQ_CTRL_RESET,
+			&dev->local->control);
+
+	/* Give some time for the devices to settle */
+	msleep(5);
+}
+
+static void vlynq_irq_unmask(unsigned int irq)
+{
+	u32 val;
+	struct vlynq_device *dev = get_irq_chip_data(irq);
+	int virq;
+
+	BUG_ON(!dev);
+	virq = irq - dev->irq_start;
+	val = readl(&dev->remote->int_device[virq >> 2]);
+	val |= (VINT_ENABLE | virq) << VINT_OFFSET(virq);
+	writel(val, &dev->remote->int_device[virq >> 2]);
+}
+
+static void vlynq_irq_mask(unsigned int irq)
+{
+	u32 val;
+	struct vlynq_device *dev = get_irq_chip_data(irq);
+	int virq;
+
+	BUG_ON(!dev);
+	virq = irq - dev->irq_start;
+	val = readl(&dev->remote->int_device[virq >> 2]);
+	val &= ~(VINT_ENABLE << VINT_OFFSET(virq));
+	writel(val, &dev->remote->int_device[virq >> 2]);
+}
+
+static int vlynq_irq_type(unsigned int irq, unsigned int flow_type)
+{
+	u32 val;
+	struct vlynq_device *dev = get_irq_chip_data(irq);
+	int virq;
+
+	BUG_ON(!dev);
+	virq = irq - dev->irq_start;
+	val = readl(&dev->remote->int_device[virq >> 2]);
+	switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+	case IRQ_TYPE_EDGE_FALLING:
+	case IRQ_TYPE_EDGE_BOTH:
+		val |= VINT_TYPE_EDGE << VINT_OFFSET(virq);
+		val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq));
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq));
+		val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq));
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq));
+		val |= VINT_LEVEL_LOW << VINT_OFFSET(virq);
+		break;
+	default:
+		return -EINVAL;
+	}
+	writel(val, &dev->remote->int_device[virq >> 2]);
+	return 0;
+}
+
+static void vlynq_local_ack(unsigned int irq)
+{
+	struct vlynq_device *dev = get_irq_chip_data(irq);
+
+	u32 status = readl(&dev->local->status);
+
+	pr_debug("%s: local status: 0x%08x\n",
+		       dev_name(&dev->dev), status);
+	writel(status, &dev->local->status);
+}
+
+static void vlynq_remote_ack(unsigned int irq)
+{
+	struct vlynq_device *dev = get_irq_chip_data(irq);
+
+	u32 status = readl(&dev->remote->status);
+
+	pr_debug("%s: remote status: 0x%08x\n",
+		       dev_name(&dev->dev), status);
+	writel(status, &dev->remote->status);
+}
+
+static irqreturn_t vlynq_irq(int irq, void *dev_id)
+{
+	struct vlynq_device *dev = dev_id;
+	u32 status;
+	int virq = 0;
+
+	status = readl(&dev->local->int_status);
+	writel(status, &dev->local->int_status);
+
+	if (unlikely(!status))
+		spurious_interrupt();
+
+	while (status) {
+		if (status & 1)
+			do_IRQ(dev->irq_start + virq);
+		status >>= 1;
+		virq++;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct irq_chip vlynq_irq_chip = {
+	.name = "vlynq",
+	.unmask = vlynq_irq_unmask,
+	.mask = vlynq_irq_mask,
+	.set_type = vlynq_irq_type,
+};
+
+static struct irq_chip vlynq_local_chip = {
+	.name = "vlynq local error",
+	.unmask = vlynq_irq_unmask,
+	.mask = vlynq_irq_mask,
+	.ack = vlynq_local_ack,
+};
+
+static struct irq_chip vlynq_remote_chip = {
+	.name = "vlynq local error",
+	.unmask = vlynq_irq_unmask,
+	.mask = vlynq_irq_mask,
+	.ack = vlynq_remote_ack,
+};
+
+static int vlynq_setup_irq(struct vlynq_device *dev)
+{
+	u32 val;
+	int i, virq;
+
+	if (dev->local_irq == dev->remote_irq) {
+		printk(KERN_ERR
+		       "%s: local vlynq irq should be different from remote\n",
+		       dev_name(&dev->dev));
+		return -EINVAL;
+	}
+
+	/* Clear local and remote error bits */
+	writel(readl(&dev->local->status), &dev->local->status);
+	writel(readl(&dev->remote->status), &dev->remote->status);
+
+	/* Now setup interrupts */
+	val = VLYNQ_CTRL_INT_VECTOR(dev->local_irq);
+	val |= VLYNQ_CTRL_INT_ENABLE | VLYNQ_CTRL_INT_LOCAL |
+		VLYNQ_CTRL_INT2CFG;
+	val |= readl(&dev->local->control);
+	writel(VLYNQ_INT_OFFSET, &dev->local->int_ptr);
+	writel(val, &dev->local->control);
+
+	val = VLYNQ_CTRL_INT_VECTOR(dev->remote_irq);
+	val |= VLYNQ_CTRL_INT_ENABLE;
+	val |= readl(&dev->remote->control);
+	writel(VLYNQ_INT_OFFSET, &dev->remote->int_ptr);
+	writel(val, &dev->remote->int_ptr);
+	writel(val, &dev->remote->control);
+
+	for (i = dev->irq_start; i <= dev->irq_end; i++) {
+		virq = i - dev->irq_start;
+		if (virq == dev->local_irq) {
+			set_irq_chip_and_handler(i, &vlynq_local_chip,
+						 handle_level_irq);
+			set_irq_chip_data(i, dev);
+		} else if (virq == dev->remote_irq) {
+			set_irq_chip_and_handler(i, &vlynq_remote_chip,
+						 handle_level_irq);
+			set_irq_chip_data(i, dev);
+		} else {
+			set_irq_chip_and_handler(i, &vlynq_irq_chip,
+						 handle_simple_irq);
+			set_irq_chip_data(i, dev);
+			writel(0, &dev->remote->int_device[virq >> 2]);
+		}
+	}
+
+	if (request_irq(dev->irq, vlynq_irq, IRQF_SHARED, "vlynq", dev)) {
+		printk(KERN_ERR "%s: request_irq failed\n",
+					dev_name(&dev->dev));
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static void vlynq_device_release(struct device *dev)
+{
+	struct vlynq_device *vdev = to_vlynq_device(dev);
+	kfree(vdev);
+}
+
+static int vlynq_device_match(struct device *dev,
+			      struct device_driver *drv)
+{
+	struct vlynq_device *vdev = to_vlynq_device(dev);
+	struct vlynq_driver *vdrv = to_vlynq_driver(drv);
+	struct vlynq_device_id *ids = vdrv->id_table;
+
+	while (ids->id) {
+		if (ids->id == vdev->dev_id) {
+			vdev->divisor = ids->divisor;
+			vlynq_set_drvdata(vdev, ids);
+			printk(KERN_INFO "Driver found for VLYNQ "
+				"device: %08x\n", vdev->dev_id);
+			return 1;
+		}
+		printk(KERN_DEBUG "Not using the %08x VLYNQ device's driver"
+			" for VLYNQ device: %08x\n", ids->id, vdev->dev_id);
+		ids++;
+	}
+	return 0;
+}
+
+static int vlynq_device_probe(struct device *dev)
+{
+	struct vlynq_device *vdev = to_vlynq_device(dev);
+	struct vlynq_driver *drv = to_vlynq_driver(dev->driver);
+	struct vlynq_device_id *id = vlynq_get_drvdata(vdev);
+	int result = -ENODEV;
+
+	if (drv->probe)
+		result = drv->probe(vdev, id);
+	if (result)
+		put_device(dev);
+	return result;
+}
+
+static int vlynq_device_remove(struct device *dev)
+{
+	struct vlynq_driver *drv = to_vlynq_driver(dev->driver);
+
+	if (drv->remove)
+		drv->remove(to_vlynq_device(dev));
+
+	return 0;
+}
+
+int __vlynq_register_driver(struct vlynq_driver *driver, struct module *owner)
+{
+	driver->driver.name = driver->name;
+	driver->driver.bus = &vlynq_bus_type;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL(__vlynq_register_driver);
+
+void vlynq_unregister_driver(struct vlynq_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL(vlynq_unregister_driver);
+
+/*
+ * A VLYNQ remote device can clock the VLYNQ bus master
+ * using a dedicated clock line. In that case, both the
+ * remove device and the bus master should have the same
+ * serial clock dividers configured. Iterate through the
+ * 8 possible dividers until we actually link with the
+ * device.
+ */
+static int __vlynq_try_remote(struct vlynq_device *dev)
+{
+	int i;
+
+	vlynq_reset(dev);
+	for (i = dev->dev_id ? vlynq_rdiv2 : vlynq_rdiv8; dev->dev_id ?
+			i <= vlynq_rdiv8 : i >= vlynq_rdiv2;
+		dev->dev_id ? i++ : i--) {
+
+		if (!vlynq_linked(dev))
+			break;
+
+		writel((readl(&dev->remote->control) &
+				~VLYNQ_CTRL_CLOCK_MASK) |
+				VLYNQ_CTRL_CLOCK_INT |
+				VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1),
+				&dev->remote->control);
+		writel((readl(&dev->local->control)
+				& ~(VLYNQ_CTRL_CLOCK_INT |
+				VLYNQ_CTRL_CLOCK_MASK)) |
+				VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1),
+				&dev->local->control);
+
+		if (vlynq_linked(dev)) {
+			printk(KERN_DEBUG
+				"%s: using remote clock divisor %d\n",
+				dev_name(&dev->dev), i - vlynq_rdiv1 + 1);
+			dev->divisor = i;
+			return 0;
+		} else {
+			vlynq_reset(dev);
+		}
+	}
+
+	return -ENODEV;
+}
+
+/*
+ * A VLYNQ remote device can be clocked by the VLYNQ bus
+ * master using a dedicated clock line. In that case, only
+ * the bus master configures the serial clock divider.
+ * Iterate through the 8 possible dividers until we
+ * actually get a link with the device.
+ */
+static int __vlynq_try_local(struct vlynq_device *dev)
+{
+	int i;
+
+	vlynq_reset(dev);
+
+	for (i = dev->dev_id ? vlynq_ldiv2 : vlynq_ldiv8; dev->dev_id ?
+			i <= vlynq_ldiv8 : i >= vlynq_ldiv2;
+		dev->dev_id ? i++ : i--) {
+
+		writel((readl(&dev->local->control) &
+				~VLYNQ_CTRL_CLOCK_MASK) |
+				VLYNQ_CTRL_CLOCK_INT |
+				VLYNQ_CTRL_CLOCK_DIV(i - vlynq_ldiv1),
+				&dev->local->control);
+
+		if (vlynq_linked(dev)) {
+			printk(KERN_DEBUG
+				"%s: using local clock divisor %d\n",
+				dev_name(&dev->dev), i - vlynq_ldiv1 + 1);
+			dev->divisor = i;
+			return 0;
+		} else {
+			vlynq_reset(dev);
+		}
+	}
+
+	return -ENODEV;
+}
+
+/*
+ * When using external clocking method, serial clock
+ * is supplied by an external oscillator, therefore we
+ * should mask the local clock bit in the clock control
+ * register for both the bus master and the remote device.
+ */
+static int __vlynq_try_external(struct vlynq_device *dev)
+{
+	vlynq_reset(dev);
+	if (!vlynq_linked(dev))
+		return -ENODEV;
+
+	writel((readl(&dev->remote->control) &
+			~VLYNQ_CTRL_CLOCK_INT),
+			&dev->remote->control);
+
+	writel((readl(&dev->local->control) &
+			~VLYNQ_CTRL_CLOCK_INT),
+			&dev->local->control);
+
+	if (vlynq_linked(dev)) {
+		printk(KERN_DEBUG "%s: using external clock\n",
+			dev_name(&dev->dev));
+			dev->divisor = vlynq_div_external;
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+static int __vlynq_enable_device(struct vlynq_device *dev)
+{
+	int result;
+	struct plat_vlynq_ops *ops = dev->dev.platform_data;
+
+	result = ops->on(dev);
+	if (result)
+		return result;
+
+	switch (dev->divisor) {
+	case vlynq_div_external:
+	case vlynq_div_auto:
+		/* When the device is brought from reset it should have clock
+		 * generation negotiated by hardware.
+		 * Check which device is generating clocks and perform setup
+		 * accordingly */
+		if (vlynq_linked(dev) && readl(&dev->remote->control) &
+		   VLYNQ_CTRL_CLOCK_INT) {
+			if (!__vlynq_try_remote(dev) ||
+				!__vlynq_try_local(dev)  ||
+				!__vlynq_try_external(dev))
+				return 0;
+		} else {
+			if (!__vlynq_try_external(dev) ||
+				!__vlynq_try_local(dev)    ||
+				!__vlynq_try_remote(dev))
+				return 0;
+		}
+		break;
+	case vlynq_ldiv1:
+	case vlynq_ldiv2:
+	case vlynq_ldiv3:
+	case vlynq_ldiv4:
+	case vlynq_ldiv5:
+	case vlynq_ldiv6:
+	case vlynq_ldiv7:
+	case vlynq_ldiv8:
+		writel(VLYNQ_CTRL_CLOCK_INT |
+			VLYNQ_CTRL_CLOCK_DIV(dev->divisor -
+			vlynq_ldiv1), &dev->local->control);
+		writel(0, &dev->remote->control);
+		if (vlynq_linked(dev)) {
+			printk(KERN_DEBUG
+				"%s: using local clock divisor %d\n",
+				dev_name(&dev->dev),
+				dev->divisor - vlynq_ldiv1 + 1);
+			return 0;
+		}
+		break;
+	case vlynq_rdiv1:
+	case vlynq_rdiv2:
+	case vlynq_rdiv3:
+	case vlynq_rdiv4:
+	case vlynq_rdiv5:
+	case vlynq_rdiv6:
+	case vlynq_rdiv7:
+	case vlynq_rdiv8:
+		writel(0, &dev->local->control);
+		writel(VLYNQ_CTRL_CLOCK_INT |
+			VLYNQ_CTRL_CLOCK_DIV(dev->divisor -
+			vlynq_rdiv1), &dev->remote->control);
+		if (vlynq_linked(dev)) {
+			printk(KERN_DEBUG
+				"%s: using remote clock divisor %d\n",
+				dev_name(&dev->dev),
+				dev->divisor - vlynq_rdiv1 + 1);
+			return 0;
+		}
+		break;
+	}
+
+	ops->off(dev);
+	return -ENODEV;
+}
+
+int vlynq_enable_device(struct vlynq_device *dev)
+{
+	struct plat_vlynq_ops *ops = dev->dev.platform_data;
+	int result = -ENODEV;
+
+	result = __vlynq_enable_device(dev);
+	if (result)
+		return result;
+
+	result = vlynq_setup_irq(dev);
+	if (result)
+		ops->off(dev);
+
+	dev->enabled = !result;
+	return result;
+}
+EXPORT_SYMBOL(vlynq_enable_device);
+
+
+void vlynq_disable_device(struct vlynq_device *dev)
+{
+	struct plat_vlynq_ops *ops = dev->dev.platform_data;
+
+	dev->enabled = 0;
+	free_irq(dev->irq, dev);
+	ops->off(dev);
+}
+EXPORT_SYMBOL(vlynq_disable_device);
+
+int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset,
+			    struct vlynq_mapping *mapping)
+{
+	int i;
+
+	if (!dev->enabled)
+		return -ENXIO;
+
+	writel(tx_offset, &dev->local->tx_offset);
+	for (i = 0; i < 4; i++) {
+		writel(mapping[i].offset, &dev->local->rx_mapping[i].offset);
+		writel(mapping[i].size, &dev->local->rx_mapping[i].size);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(vlynq_set_local_mapping);
+
+int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset,
+			     struct vlynq_mapping *mapping)
+{
+	int i;
+
+	if (!dev->enabled)
+		return -ENXIO;
+
+	writel(tx_offset, &dev->remote->tx_offset);
+	for (i = 0; i < 4; i++) {
+		writel(mapping[i].offset, &dev->remote->rx_mapping[i].offset);
+		writel(mapping[i].size, &dev->remote->rx_mapping[i].size);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(vlynq_set_remote_mapping);
+
+int vlynq_set_local_irq(struct vlynq_device *dev, int virq)
+{
+	int irq = dev->irq_start + virq;
+	if (dev->enabled)
+		return -EBUSY;
+
+	if ((irq < dev->irq_start) || (irq > dev->irq_end))
+		return -EINVAL;
+
+	if (virq == dev->remote_irq)
+		return -EINVAL;
+
+	dev->local_irq = virq;
+
+	return 0;
+}
+EXPORT_SYMBOL(vlynq_set_local_irq);
+
+int vlynq_set_remote_irq(struct vlynq_device *dev, int virq)
+{
+	int irq = dev->irq_start + virq;
+	if (dev->enabled)
+		return -EBUSY;
+
+	if ((irq < dev->irq_start) || (irq > dev->irq_end))
+		return -EINVAL;
+
+	if (virq == dev->local_irq)
+		return -EINVAL;
+
+	dev->remote_irq = virq;
+
+	return 0;
+}
+EXPORT_SYMBOL(vlynq_set_remote_irq);
+
+static int vlynq_probe(struct platform_device *pdev)
+{
+	struct vlynq_device *dev;
+	struct resource *regs_res, *mem_res, *irq_res;
+	int len, result;
+
+	regs_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	if (!regs_res)
+		return -ENODEV;
+
+	mem_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mem");
+	if (!mem_res)
+		return -ENODEV;
+
+	irq_res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "devirq");
+	if (!irq_res)
+		return -ENODEV;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		printk(KERN_ERR
+		       "vlynq: failed to allocate device structure\n");
+		return -ENOMEM;
+	}
+
+	dev->id = pdev->id;
+	dev->dev.bus = &vlynq_bus_type;
+	dev->dev.parent = &pdev->dev;
+	dev_set_name(&dev->dev, "vlynq%d", dev->id);
+	dev->dev.platform_data = pdev->dev.platform_data;
+	dev->dev.release = vlynq_device_release;
+
+	dev->regs_start = regs_res->start;
+	dev->regs_end = regs_res->end;
+	dev->mem_start = mem_res->start;
+	dev->mem_end = mem_res->end;
+
+	len = regs_res->end - regs_res->start;
+	if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
+		printk(KERN_ERR "%s: Can't request vlynq registers\n",
+		       dev_name(&dev->dev));
+		result = -ENXIO;
+		goto fail_request;
+	}
+
+	dev->local = ioremap(regs_res->start, len);
+	if (!dev->local) {
+		printk(KERN_ERR "%s: Can't remap vlynq registers\n",
+		       dev_name(&dev->dev));
+		result = -ENXIO;
+		goto fail_remap;
+	}
+
+	dev->remote = (struct vlynq_regs *)((void *)dev->local +
+					    VLYNQ_REMOTE_OFFSET);
+
+	dev->irq = platform_get_irq_byname(pdev, "irq");
+	dev->irq_start = irq_res->start;
+	dev->irq_end = irq_res->end;
+	dev->local_irq = dev->irq_end - dev->irq_start;
+	dev->remote_irq = dev->local_irq - 1;
+
+	if (device_register(&dev->dev))
+		goto fail_register;
+	platform_set_drvdata(pdev, dev);
+
+	printk(KERN_INFO "%s: regs 0x%p, irq %d, mem 0x%p\n",
+	       dev_name(&dev->dev), (void *)dev->regs_start, dev->irq,
+	       (void *)dev->mem_start);
+
+	dev->dev_id = 0;
+	dev->divisor = vlynq_div_auto;
+	result = __vlynq_enable_device(dev);
+	if (result == 0) {
+		dev->dev_id = readl(&dev->remote->chip);
+		((struct plat_vlynq_ops *)(dev->dev.platform_data))->off(dev);
+	}
+	if (dev->dev_id)
+		printk(KERN_INFO "Found a VLYNQ device: %08x\n", dev->dev_id);
+
+	return 0;
+
+fail_register:
+	iounmap(dev->local);
+fail_remap:
+fail_request:
+	release_mem_region(regs_res->start, len);
+	kfree(dev);
+	return result;
+}
+
+static int vlynq_remove(struct platform_device *pdev)
+{
+	struct vlynq_device *dev = platform_get_drvdata(pdev);
+
+	device_unregister(&dev->dev);
+	iounmap(dev->local);
+	release_mem_region(dev->regs_start, dev->regs_end - dev->regs_start);
+
+	kfree(dev);
+
+	return 0;
+}
+
+static struct platform_driver vlynq_platform_driver = {
+	.driver.name = "vlynq",
+	.probe = vlynq_probe,
+	.remove = __devexit_p(vlynq_remove),
+};
+
+struct bus_type vlynq_bus_type = {
+	.name = "vlynq",
+	.match = vlynq_device_match,
+	.probe = vlynq_device_probe,
+	.remove = vlynq_device_remove,
+};
+EXPORT_SYMBOL(vlynq_bus_type);
+
+static int __devinit vlynq_init(void)
+{
+	int res = 0;
+
+	res = bus_register(&vlynq_bus_type);
+	if (res)
+		goto fail_bus;
+
+	res = platform_driver_register(&vlynq_platform_driver);
+	if (res)
+		goto fail_platform;
+
+	return 0;
+
+fail_platform:
+	bus_unregister(&vlynq_bus_type);
+fail_bus:
+	return res;
+}
+
+static void __devexit vlynq_exit(void)
+{
+	platform_driver_unregister(&vlynq_platform_driver);
+	bus_unregister(&vlynq_bus_type);
+}
+
+module_init(vlynq_init);
+module_exit(vlynq_exit);
diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h
new file mode 100644
index 00000000000..8f6a95882b0
--- /dev/null
+++ b/include/linux/vlynq.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2006, 2007 Eugene Konev <ejka@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __VLYNQ_H__
+#define __VLYNQ_H__
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define VLYNQ_NUM_IRQS 32
+
+struct vlynq_mapping {
+	u32 size;
+	u32 offset;
+};
+
+enum vlynq_divisor {
+	vlynq_div_auto = 0,
+	vlynq_ldiv1,
+	vlynq_ldiv2,
+	vlynq_ldiv3,
+	vlynq_ldiv4,
+	vlynq_ldiv5,
+	vlynq_ldiv6,
+	vlynq_ldiv7,
+	vlynq_ldiv8,
+	vlynq_rdiv1,
+	vlynq_rdiv2,
+	vlynq_rdiv3,
+	vlynq_rdiv4,
+	vlynq_rdiv5,
+	vlynq_rdiv6,
+	vlynq_rdiv7,
+	vlynq_rdiv8,
+	vlynq_div_external
+};
+
+struct vlynq_device_id {
+	u32 id;
+	enum vlynq_divisor divisor;
+	unsigned long driver_data;
+};
+
+struct vlynq_regs;
+struct vlynq_device {
+	u32 id, dev_id;
+	int local_irq;
+	int remote_irq;
+	enum vlynq_divisor divisor;
+	u32 regs_start, regs_end;
+	u32 mem_start, mem_end;
+	u32 irq_start, irq_end;
+	int irq;
+	int enabled;
+	struct vlynq_regs *local;
+	struct vlynq_regs *remote;
+	struct device dev;
+};
+
+struct vlynq_driver {
+	char *name;
+	struct vlynq_device_id *id_table;
+	int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id);
+	void (*remove)(struct vlynq_device *dev);
+	struct device_driver driver;
+};
+
+struct plat_vlynq_ops {
+	int (*on)(struct vlynq_device *dev);
+	void (*off)(struct vlynq_device *dev);
+};
+
+static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv)
+{
+	return container_of(drv, struct vlynq_driver, driver);
+}
+
+static inline struct vlynq_device *to_vlynq_device(struct device *device)
+{
+	return container_of(device, struct vlynq_device, dev);
+}
+
+extern struct bus_type vlynq_bus_type;
+
+extern int __vlynq_register_driver(struct vlynq_driver *driver,
+				   struct module *owner);
+
+static inline int vlynq_register_driver(struct vlynq_driver *driver)
+{
+	return __vlynq_register_driver(driver, THIS_MODULE);
+}
+
+static inline void *vlynq_get_drvdata(struct vlynq_device *dev)
+{
+	return dev_get_drvdata(&dev->dev);
+}
+
+static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data)
+{
+	dev_set_drvdata(&dev->dev, data);
+}
+
+static inline u32 vlynq_mem_start(struct vlynq_device *dev)
+{
+	return dev->mem_start;
+}
+
+static inline u32 vlynq_mem_end(struct vlynq_device *dev)
+{
+	return dev->mem_end;
+}
+
+static inline u32 vlynq_mem_len(struct vlynq_device *dev)
+{
+	return dev->mem_end - dev->mem_start + 1;
+}
+
+static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq)
+{
+	int irq = dev->irq_start + virq;
+	if ((irq < dev->irq_start) || (irq > dev->irq_end))
+		return -EINVAL;
+
+	return irq;
+}
+
+static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq)
+{
+	if ((irq < dev->irq_start) || (irq > dev->irq_end))
+		return -EINVAL;
+
+	return irq - dev->irq_start;
+}
+
+extern void vlynq_unregister_driver(struct vlynq_driver *driver);
+extern int vlynq_enable_device(struct vlynq_device *dev);
+extern void vlynq_disable_device(struct vlynq_device *dev);
+extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset,
+				   struct vlynq_mapping *mapping);
+extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset,
+				    struct vlynq_mapping *mapping);
+extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq);
+extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq);
+
+#endif /* __VLYNQ_H__ */
-- 
cgit v1.2.3-70-g09d2


From 8f3128e714ded7cf1e8c786c204a4f253b5d8ff4 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 16 Jun 2009 15:34:17 -0700
Subject: lis3: add click function

The LIS302DL accelerometer chip has a 'click' feature which can be used to
detect sudden motion on any of the three axis.  Configuration data is
passed via spi platform_data and no action is taken if that's not
specified, so it won't harm any existing platform.

To make the configuration effective, the IRQ lines need to be set up
appropriately.  This patch also adds a way to do that from board support
code.

The DD_* definitions were factored out to an own enum because they are
specific to LIS3LV02D devices.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/lis3lv02d.c     | 20 ++++++++++++++++++++
 drivers/hwmon/lis3lv02d.h     | 19 ++++++++++++++++++-
 drivers/hwmon/lis3lv02d_spi.c |  1 +
 include/linux/lis3lv02d.h     | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/lis3lv02d.h

(limited to 'include')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 366190609c7..271338bdb6b 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -438,6 +438,26 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 	if (lis3lv02d_joystick_enable())
 		printk(KERN_ERR DRIVER_NAME ": joystick initialization failed\n");
 
+	/* passing in platform specific data is purely optional and only
+	 * used by the SPI transport layer at the moment */
+	if (dev->pdata) {
+		struct lis3lv02d_platform_data *p = dev->pdata;
+
+		if (p->click_flags && (dev->whoami == LIS_SINGLE_ID)) {
+			dev->write(dev, CLICK_CFG, p->click_flags);
+			dev->write(dev, CLICK_TIMELIMIT, p->click_time_limit);
+			dev->write(dev, CLICK_LATENCY, p->click_latency);
+			dev->write(dev, CLICK_WINDOW, p->click_window);
+			dev->write(dev, CLICK_THSZ, p->click_thresh_z & 0xf);
+			dev->write(dev, CLICK_THSY_X,
+					(p->click_thresh_x & 0xf) |
+					(p->click_thresh_y << 4));
+		}
+
+		if (p->irq_cfg)
+			dev->write(dev, CTRL_REG3, p->irq_cfg);
+	}
+
 	/* bail if we did not get an IRQ from the bus layer */
 	if (!dev->irq) {
 		printk(KERN_ERR DRIVER_NAME
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index 5a5a196e6a6..e320e2f511f 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -29,12 +29,14 @@
  * They can also be connected via I²C.
  */
 
+#include <linux/lis3lv02d.h>
+
 /* 2-byte registers */
 #define LIS_DOUBLE_ID	0x3A /* LIS3LV02D[LQ] */
 /* 1-byte registers */
 #define LIS_SINGLE_ID	0x3B /* LIS[32]02DL and others */
 
-enum lis3lv02d_reg {
+enum lis3_reg {
 	WHO_AM_I	= 0x0F,
 	OFFSET_X	= 0x16,
 	OFFSET_Y	= 0x17,
@@ -62,6 +64,19 @@ enum lis3lv02d_reg {
 	FF_WU_THS_L	= 0x34,
 	FF_WU_THS_H	= 0x35,
 	FF_WU_DURATION	= 0x36,
+};
+
+enum lis302d_reg {
+	CLICK_CFG	= 0x38,
+	CLICK_SRC	= 0x39,
+	CLICK_THSY_X	= 0x3B,
+	CLICK_THSZ	= 0x3C,
+	CLICK_TIMELIMIT	= 0x3D,
+	CLICK_LATENCY	= 0x3E,
+	CLICK_WINDOW	= 0x3F,
+};
+
+enum lis3lv02d_reg {
 	DD_CFG		= 0x38,
 	DD_SRC		= 0x39,
 	DD_ACK		= 0x3A,
@@ -183,6 +198,8 @@ struct lis3lv02d {
 	struct fasync_struct	*async_queue; /* queue for the misc device */
 	wait_queue_head_t	misc_wait; /* Wait queue for the misc device */
 	unsigned long		misc_opened; /* bit0: whether the device is open */
+
+	struct lis3lv02d_platform_data *pdata;	/* for passing board config */
 };
 
 int lis3lv02d_init_device(struct lis3lv02d *lis3);
diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 07ae74b0e19..3827ff04485 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -72,6 +72,7 @@ static int __devinit lis302dl_spi_probe(struct spi_device *spi)
 	lis3_dev.write = lis3_spi_write;
 	lis3_dev.irq = spi->irq;
 	lis3_dev.ac = lis3lv02d_axis_normal;
+	lis3_dev.pdata = spi->dev.platform_data;
 	spi_set_drvdata(spi, &lis3_dev);
 
 	ret = lis3lv02d_init_device(&lis3_dev);
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
new file mode 100644
index 00000000000..ad651f4e45a
--- /dev/null
+++ b/include/linux/lis3lv02d.h
@@ -0,0 +1,39 @@
+#ifndef __LIS3LV02D_H_
+#define __LIS3LV02D_H_
+
+struct lis3lv02d_platform_data {
+	/* please note: the 'click' feature is only supported for
+	 * LIS[32]02DL variants of the chip and will be ignored for
+	 * others */
+#define LIS3_CLICK_SINGLE_X	(1 << 0)
+#define LIS3_CLICK_DOUBLE_X	(1 << 1)
+#define LIS3_CLICK_SINGLE_Y	(1 << 2)
+#define LIS3_CLICK_DOUBLE_Y	(1 << 3)
+#define LIS3_CLICK_SINGLE_Z	(1 << 4)
+#define LIS3_CLICK_DOUBLE_Z	(1 << 5)
+	unsigned char click_flags;
+	unsigned char click_thresh_x;
+	unsigned char click_thresh_y;
+	unsigned char click_thresh_z;
+	unsigned char click_time_limit;
+	unsigned char click_latency;
+	unsigned char click_window;
+
+#define LIS3_IRQ1_DISABLE	(0 << 0)
+#define LIS3_IRQ1_FF_WU_1	(1 << 0)
+#define LIS3_IRQ1_FF_WU_2	(2 << 0)
+#define LIS3_IRQ1_FF_WU_12	(3 << 0)
+#define LIS3_IRQ1_DATA_READY	(4 << 0)
+#define LIS3_IRQ1_CLICK		(7 << 0)
+#define LIS3_IRQ2_DISABLE	(0 << 3)
+#define LIS3_IRQ2_FF_WU_1	(1 << 3)
+#define LIS3_IRQ2_FF_WU_2	(2 << 3)
+#define LIS3_IRQ2_FF_WU_12	(3 << 3)
+#define LIS3_IRQ2_DATA_READY	(4 << 3)
+#define LIS3_IRQ2_CLICK		(7 << 3)
+#define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
+#define LIS3_IRQ_ACTIVE_HIGH	(1 << 7)
+	unsigned char irq_cfg;
+};
+
+#endif /* __LIS3LV02D_H_ */
-- 
cgit v1.2.3-70-g09d2


From ae52bb2384f721562f15f719de1acb8e934733cb Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Tue, 16 Jun 2009 15:34:19 -0700
Subject: fbdev: move logo externs to header file

Now we have __initconst, we can finally move the external declarations for
the various Linux logo structures to <linux/linux_logo.h>.

James' ack dates back to the previous submission (way to long ago), when the
logos were still __initdata, which caused failures on some platforms with some
toolchain versions.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Acked-by: James Simmons <jsimmons@infradead.org>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/prom_init.c        |  3 ---
 arch/powerpc/platforms/cell/spu_base.c | 11 +----------
 arch/um/include/shared/init.h          |  2 +-
 drivers/video/logo/logo.c              | 15 ---------------
 include/linux/init.h                   |  2 +-
 include/linux/linux_logo.h             | 16 ++++++++++++++++
 scripts/pnmtologo.c                    | 18 +++++++++---------
 7 files changed, 28 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 2f0e64b5364..ef6f64950e9 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -44,10 +44,7 @@
 #include <asm/sections.h>
 #include <asm/machdep.h>
 
-#ifdef CONFIG_LOGO_LINUX_CLUT224
 #include <linux/linux_logo.h>
-extern const struct linux_logo logo_linux_clut224;
-#endif
 
 /*
  * Properties whose value is longer than this get excluded from our
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 9abd210d87c..8547e86bfb4 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -752,17 +752,8 @@ static int __init init_spu_base(void)
 		goto out_unregister_sysdev_class;
 	}
 
-	if (ret > 0) {
-		/*
-		 * We cannot put the forward declaration in
-		 * <linux/linux_logo.h> because of conflicting session type
-		 * conflicts for const and __initdata with different compiler
-		 * versions
-		 */
-		extern const struct linux_logo logo_spe_clut224;
-
+	if (ret > 0)
 		fb_append_extra_logo(&logo_spe_clut224, ret);
-	}
 
 	mutex_lock(&spu_full_list_mutex);
 	xmon_register_spus(&spu_full_list);
diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h
index 37dd097c16c..b3906f860a8 100644
--- a/arch/um/include/shared/init.h
+++ b/arch/um/include/shared/init.h
@@ -27,7 +27,7 @@
  * sign followed by value, e.g.:
  *
  * static int init_variable __initdata = 0;
- * static char linux_logo[] __initdata = { 0x32, 0x36, ... };
+ * static const char linux_logo[] __initconst = { 0x32, 0x36, ... };
  *
  * Don't forget to initialize data not at file scope, i.e. within a function,
  * as gcc otherwise puts the data into the bss section and not into the init
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index 2e85a2b52d0..ea7a8ccc830 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -21,21 +21,6 @@
 #include <asm/bootinfo.h>
 #endif
 
-extern const struct linux_logo logo_linux_mono;
-extern const struct linux_logo logo_linux_vga16;
-extern const struct linux_logo logo_linux_clut224;
-extern const struct linux_logo logo_blackfin_vga16;
-extern const struct linux_logo logo_blackfin_clut224;
-extern const struct linux_logo logo_dec_clut224;
-extern const struct linux_logo logo_mac_clut224;
-extern const struct linux_logo logo_parisc_clut224;
-extern const struct linux_logo logo_sgi_clut224;
-extern const struct linux_logo logo_sun_clut224;
-extern const struct linux_logo logo_superh_mono;
-extern const struct linux_logo logo_superh_vga16;
-extern const struct linux_logo logo_superh_clut224;
-extern const struct linux_logo logo_m32r_clut224;
-
 static int nologo;
 module_param(nologo, bool, 0);
 MODULE_PARM_DESC(nologo, "Disables startup logo");
diff --git a/include/linux/init.h b/include/linux/init.h
index b2189803f19..8c2c9989626 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -29,7 +29,7 @@
  * sign followed by value, e.g.:
  *
  * static int init_variable __initdata = 0;
- * static char linux_logo[] __initdata = { 0x32, 0x36, ... };
+ * static const char linux_logo[] __initconst = { 0x32, 0x36, ... };
  *
  * Don't forget to initialize data not at file scope, i.e. within a function,
  * as gcc otherwise puts the data into the bss section and not into the init
diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h
index 08a92969c76..ca5bd91d12e 100644
--- a/include/linux/linux_logo.h
+++ b/include/linux/linux_logo.h
@@ -32,6 +32,22 @@ struct linux_logo {
 	const unsigned char *data;
 };
 
+extern const struct linux_logo logo_linux_mono;
+extern const struct linux_logo logo_linux_vga16;
+extern const struct linux_logo logo_linux_clut224;
+extern const struct linux_logo logo_blackfin_vga16;
+extern const struct linux_logo logo_blackfin_clut224;
+extern const struct linux_logo logo_dec_clut224;
+extern const struct linux_logo logo_mac_clut224;
+extern const struct linux_logo logo_parisc_clut224;
+extern const struct linux_logo logo_sgi_clut224;
+extern const struct linux_logo logo_sun_clut224;
+extern const struct linux_logo logo_superh_mono;
+extern const struct linux_logo logo_superh_vga16;
+extern const struct linux_logo logo_superh_clut224;
+extern const struct linux_logo logo_m32r_clut224;
+extern const struct linux_logo logo_spe_clut224;
+
 extern const struct linux_logo *fb_find_logo(int depth);
 #ifdef CONFIG_FB_LOGO_EXTRA
 extern void fb_append_extra_logo(const struct linux_logo *logo,
diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c
index 6aa2a2483f8..64f5ddb09ea 100644
--- a/scripts/pnmtologo.c
+++ b/scripts/pnmtologo.c
@@ -237,22 +237,22 @@ static void write_header(void)
     fprintf(out, " *  Linux logo %s\n", logoname);
     fputs(" */\n\n", out);
     fputs("#include <linux/linux_logo.h>\n\n", out);
-    fprintf(out, "static unsigned char %s_data[] __initdata = {\n",
+    fprintf(out, "static const unsigned char %s_data[] __initconst = {\n",
 	    logoname);
 }
 
 static void write_footer(void)
 {
     fputs("\n};\n\n", out);
-    fprintf(out, "struct linux_logo %s __initdata = {\n", logoname);
-    fprintf(out, "    .type\t= %s,\n", logo_types[logo_type]);
-    fprintf(out, "    .width\t= %d,\n", logo_width);
-    fprintf(out, "    .height\t= %d,\n", logo_height);
+    fprintf(out, "const struct linux_logo %s __initconst = {\n", logoname);
+    fprintf(out, "\t.type\t\t= %s,\n", logo_types[logo_type]);
+    fprintf(out, "\t.width\t\t= %d,\n", logo_width);
+    fprintf(out, "\t.height\t\t= %d,\n", logo_height);
     if (logo_type == LINUX_LOGO_CLUT224) {
-	fprintf(out, "    .clutsize\t= %d,\n", logo_clutsize);
-	fprintf(out, "    .clut\t= %s_clut,\n", logoname);
+	fprintf(out, "\t.clutsize\t= %d,\n", logo_clutsize);
+	fprintf(out, "\t.clut\t\t= %s_clut,\n", logoname);
     }
-    fprintf(out, "    .data\t= %s_data\n", logoname);
+    fprintf(out, "\t.data\t\t= %s_data\n", logoname);
     fputs("};\n\n", out);
 
     /* close logo file */
@@ -374,7 +374,7 @@ static void write_logo_clut224(void)
     fputs("\n};\n\n", out);
 
     /* write logo clut */
-    fprintf(out, "static unsigned char %s_clut[] __initdata = {\n",
+    fprintf(out, "static const unsigned char %s_clut[] __initconst = {\n",
 	    logoname);
     write_hex_cnt = 0;
     for (i = 0; i < logo_clutsize; i++) {
-- 
cgit v1.2.3-70-g09d2


From 4410f3910947dcea8672280b3adecd53cec4e85e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 16 Jun 2009 15:34:38 -0700
Subject: fbdev: add support for handoff from firmware to hw framebuffers

With KMS we have ran into an issue where we really want the KMS fb driver
to be the one running the console, so panics etc can be shown by switching
out of X etc.

However with vesafb/efifb built-in, we end up with those on fb0 and the
KMS fb driver on fb1, driving the same piece of hw, so this adds an fb
info flag to denote a firmware fbdev, and adds a new aperture base/size
range which can be compared when the hw drivers are installed to see if
there is a conflict with a firmware driver, and if there is the firmware
driver is unregistered and the hw driver takes over.

It uses new aperture_base/size members instead of comparing on the fix
smem_start/length, as smem_start/length might for example only cover the
first 1MB of the PCI aperture, and we could allocate the kms fb from 8MB
into the aperture, thus they would never overlap.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Airlie <airlied@redhat.com>
Acked-by: Peter Jones <pjones@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/intel_fb.c |  8 ++++++++
 drivers/video/efifb.c           |  5 ++++-
 drivers/video/fbmem.c           | 31 +++++++++++++++++++++++++++++++
 drivers/video/vesafb.c          | 15 ++++++++++++++-
 include/linux/fb.h              | 12 +++++++++++-
 5 files changed, 68 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 0ecf6b76a40..8e28e5993df 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -504,6 +504,14 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	info->fbops = &intelfb_ops;
 
 	info->fix.line_length = fb->pitch;
+
+	/* setup aperture base/size for vesafb takeover */
+	info->aperture_base = dev->mode_config.fb_base;
+	if (IS_I9XX(dev))
+		info->aperture_size = pci_resource_len(dev->pdev, 2);
+	else
+		info->aperture_size = pci_resource_len(dev->pdev, 0);
+
 	info->fix.smem_start = dev->mode_config.fb_base + obj_priv->gtt_offset;
 	info->fix.smem_len = size;
 
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index 8dea2bc9270..eb12182b205 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -280,6 +280,9 @@ static int __init efifb_probe(struct platform_device *dev)
 	info->pseudo_palette = info->par;
 	info->par = NULL;
 
+	info->aperture_base = efifb_fix.smem_start;
+	info->aperture_size = size_total;
+
 	info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len);
 	if (!info->screen_base) {
 		printk(KERN_ERR "efifb: abort, cannot ioremap video memory "
@@ -337,7 +340,7 @@ static int __init efifb_probe(struct platform_device *dev)
 	info->fbops = &efifb_ops;
 	info->var = efifb_defined;
 	info->fix = efifb_fix;
-	info->flags = FBINFO_FLAG_DEFAULT;
+	info->flags = FBINFO_FLAG_DEFAULT | FBINFO_MISC_FIRMWARE;
 
 	if ((err = fb_alloc_cmap(&info->cmap, 256, 0)) < 0) {
 		printk(KERN_ERR "efifb: cannot allocate colormap\n");
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index d412a1ddc12..f8a09bf8d0c 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1462,6 +1462,16 @@ static int fb_check_foreignness(struct fb_info *fi)
 	return 0;
 }
 
+static bool fb_do_apertures_overlap(struct fb_info *gen, struct fb_info *hw)
+{
+	/* is the generic aperture base the same as the HW one */
+	if (gen->aperture_base == hw->aperture_base)
+		return true;
+	/* is the generic aperture base inside the hw base->hw base+size */
+	if (gen->aperture_base > hw->aperture_base && gen->aperture_base <= hw->aperture_base + hw->aperture_size)
+		return true;
+	return false;
+}
 /**
  *	register_framebuffer - registers a frame buffer device
  *	@fb_info: frame buffer info structure
@@ -1485,6 +1495,23 @@ register_framebuffer(struct fb_info *fb_info)
 	if (fb_check_foreignness(fb_info))
 		return -ENOSYS;
 
+	/* check all firmware fbs and kick off if the base addr overlaps */
+	for (i = 0 ; i < FB_MAX; i++) {
+		if (!registered_fb[i])
+			continue;
+
+		if (registered_fb[i]->flags & FBINFO_MISC_FIRMWARE) {
+			if (fb_do_apertures_overlap(registered_fb[i], fb_info)) {
+				printk(KERN_ERR "fb: conflicting fb hw usage "
+				       "%s vs %s - removing generic driver\n",
+				       fb_info->fix.id,
+				       registered_fb[i]->fix.id);
+				unregister_framebuffer(registered_fb[i]);
+				break;
+			}
+		}
+	}
+
 	num_registered_fb++;
 	for (i = 0 ; i < FB_MAX; i++)
 		if (!registered_fb[i])
@@ -1586,6 +1613,10 @@ unregister_framebuffer(struct fb_info *fb_info)
 	device_destroy(fb_class, MKDEV(FB_MAJOR, i));
 	event.info = fb_info;
 	fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
+
+	/* this may free fb info */
+	if (fb_info->fbops->fb_destroy)
+		fb_info->fbops->fb_destroy(fb_info);
 done:
 	return ret;
 }
diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c
index d6856f43d24..bd37ee1f6a2 100644
--- a/drivers/video/vesafb.c
+++ b/drivers/video/vesafb.c
@@ -174,8 +174,17 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return err;
 }
 
+static void vesafb_destroy(struct fb_info *info)
+{
+	if (info->screen_base)
+		iounmap(info->screen_base);
+	release_mem_region(info->aperture_base, info->aperture_size);
+	framebuffer_release(info);
+}
+
 static struct fb_ops vesafb_ops = {
 	.owner		= THIS_MODULE,
+	.fb_destroy     = vesafb_destroy,
 	.fb_setcolreg	= vesafb_setcolreg,
 	.fb_pan_display	= vesafb_pan_display,
 	.fb_fillrect	= cfb_fillrect,
@@ -286,6 +295,10 @@ static int __init vesafb_probe(struct platform_device *dev)
 	info->pseudo_palette = info->par;
 	info->par = NULL;
 
+	/* set vesafb aperture size for generic probing */
+	info->aperture_base = screen_info.lfb_base;
+	info->aperture_size = size_total;
+
 	info->screen_base = ioremap(vesafb_fix.smem_start, vesafb_fix.smem_len);
 	if (!info->screen_base) {
 		printk(KERN_ERR
@@ -437,7 +450,7 @@ static int __init vesafb_probe(struct platform_device *dev)
 	info->fbops = &vesafb_ops;
 	info->var = vesafb_defined;
 	info->fix = vesafb_fix;
-	info->flags = FBINFO_FLAG_DEFAULT |
+	info->flags = FBINFO_FLAG_DEFAULT | FBINFO_MISC_FIRMWARE |
 		(ypan ? FBINFO_HWACCEL_YPAN : 0);
 
 	if (!ypan)
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 330c4b1bfca..3c5562a5216 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -677,6 +677,9 @@ struct fb_ops {
 	/* get capability given var */
 	void (*fb_get_caps)(struct fb_info *info, struct fb_blit_caps *caps,
 			    struct fb_var_screeninfo *var);
+
+	/* teardown any resources to do with this framebuffer */
+	void (*fb_destroy)(struct fb_info *info);
 };
 
 #ifdef CONFIG_FB_TILEBLITTING
@@ -786,6 +789,8 @@ struct fb_tile_ops {
 #define FBINFO_MISC_USEREVENT          0x10000 /* event request
 						  from userspace */
 #define FBINFO_MISC_TILEBLITTING       0x20000 /* use tile blitting */
+#define FBINFO_MISC_FIRMWARE           0x40000 /* a replaceable firmware
+						  inited framebuffer */
 
 /* A driver may set this flag to indicate that it does want a set_par to be
  * called every time when fbcon_switch is executed. The advantage is that with
@@ -854,7 +859,12 @@ struct fb_info {
 	u32 state;			/* Hardware state i.e suspend */
 	void *fbcon_par;                /* fbcon use-only private area */
 	/* From here on everything is device dependent */
-	void *par;	
+	void *par;
+	/* we need the PCI or similiar aperture base/size not
+	   smem_start/size as smem_start may just be an object
+	   allocated inside the aperture so may not actually overlap */
+	resource_size_t aperture_base;
+	resource_size_t aperture_size;
 };
 
 #ifdef MODULE
-- 
cgit v1.2.3-70-g09d2


From 3ed167af96ed098187ea41353fe02d1af20d38a1 Mon Sep 17 00:00:00 2001
From: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Date: Tue, 16 Jun 2009 15:34:40 -0700
Subject: fbdev: s1d13xxxfb: add accelerated bitblt functions

Add accelerated bitblt functions to s1d13xxx based video chipsets, more
specificly functions copyarea and fillrect.

It has only been tested and activated for 13506 chipsets but is expected
to work for the majority of s1d13xxx based chips.  This patch also cleans
up the driver with respect of whitespaces and other formatting issues.  We
update the current status comments.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/s1d13xxxfb.c | 341 ++++++++++++++++++++++++++++++++++++++++-----
 include/video/s1d13xxxfb.h |   9 ++
 2 files changed, 317 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/video/s1d13xxxfb.c b/drivers/video/s1d13xxxfb.c
index 0726aecf3b7..0deb0a8867b 100644
--- a/drivers/video/s1d13xxxfb.c
+++ b/drivers/video/s1d13xxxfb.c
@@ -2,6 +2,7 @@
  *
  * (c) 2004 Simtec Electronics
  * (c) 2005 Thibaut VARENE <varenet@parisc-linux.org>
+ * (c) 2009 Kristoffer Ericson <kristoffer.ericson@gmail.com>
  *
  * Driver for Epson S1D13xxx series framebuffer chips
  *
@@ -10,18 +11,10 @@
  *  linux/drivers/video/epson1355fb.c
  *  linux/drivers/video/epson/s1d13xxxfb.c (2.4 driver by Epson)
  *
- * Note, currently only tested on S1D13806 with 16bit CRT.
- * As such, this driver might still contain some hardcoded bits relating to
- * S1D13806.
- * Making it work on other S1D13XXX chips should merely be a matter of adding
- * a few switch()s, some missing glue here and there maybe, and split header
- * files.
- *
  * TODO: - handle dual screen display (CRT and LCD at the same time).
  *	 - check_var(), mode change, etc.
- *	 - PM untested.
- *	 - Accelerated interfaces.
- *	 - Probably not SMP safe :)
+ *	 - probably not SMP safe :)
+ *       - support all bitblt operations on all cards
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License. See the file COPYING in the main directory of this archive for
@@ -31,19 +24,24 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
-
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/fb.h>
+#include <linux/spinlock_types.h>
+#include <linux/spinlock.h>
 
 #include <asm/io.h>
 
 #include <video/s1d13xxxfb.h>
 
-#define PFX "s1d13xxxfb: "
+#define PFX	"s1d13xxxfb: "
+#define BLIT	"s1d13xxxfb_bitblt: "
 
+/*
+ * set this to enable debugging on general functions
+ */
 #if 0
 #define dbg(fmt, args...) do { printk(KERN_INFO fmt, ## args); } while(0)
 #else
@@ -51,7 +49,21 @@
 #endif
 
 /*
- * List of card production ids
+ * set this to enable debugging on 2D acceleration
+ */
+#if 0
+#define dbg_blit(fmt, args...) do { printk(KERN_INFO BLIT fmt, ## args); } while (0)
+#else
+#define dbg_blit(fmt, args...) do { } while (0)
+#endif
+
+/*
+ * we make sure only one bitblt operation is running
+ */
+static DEFINE_SPINLOCK(s1d13xxxfb_bitblt_lock);
+
+/*
+ * list of card production ids
  */
 static const int s1d13xxxfb_prod_ids[] = {
 	S1D13505_PROD_ID,
@@ -69,7 +81,7 @@ static const char *s1d13xxxfb_prod_names[] = {
 };
 
 /*
- * Here we define the default struct fb_fix_screeninfo
+ * here we define the default struct fb_fix_screeninfo
  */
 static struct fb_fix_screeninfo __devinitdata s1d13xxxfb_fix = {
 	.id		= S1D_FBID,
@@ -145,8 +157,10 @@ crt_enable(struct s1d13xxxfb_par *par, int enable)
 	s1d13xxxfb_writereg(par, S1DREG_COM_DISP_MODE, mode);
 }
 
-/* framebuffer control routines */
 
+/*************************************************************
+ framebuffer control functions
+ *************************************************************/
 static inline void
 s1d13xxxfb_setup_pseudocolour(struct fb_info *info)
 {
@@ -242,13 +256,13 @@ s1d13xxxfb_set_par(struct fb_info *info)
 }
 
 /**
- *  	s1d13xxxfb_setcolreg - sets a color register.
- *      @regno: Which register in the CLUT we are programming
- *      @red: The red value which can be up to 16 bits wide
+ *	s1d13xxxfb_setcolreg - sets a color register.
+ *	@regno: Which register in the CLUT we are programming
+ *	@red: The red value which can be up to 16 bits wide
  *	@green: The green value which can be up to 16 bits wide
  *	@blue:  The blue value which can be up to 16 bits wide.
  *	@transp: If supported the alpha value which can be up to 16 bits wide.
- *      @info: frame buffer info structure
+ *	@info: frame buffer info structure
  *
  *	Returns negative errno on error, or zero on success.
  */
@@ -351,15 +365,15 @@ s1d13xxxfb_blank(int blank_mode, struct fb_info *info)
 }
 
 /**
- *      s1d13xxxfb_pan_display - Pans the display.
- *      @var: frame buffer variable screen structure
- *      @info: frame buffer structure that represents a single frame buffer
+ *	s1d13xxxfb_pan_display - Pans the display.
+ *	@var: frame buffer variable screen structure
+ *	@info: frame buffer structure that represents a single frame buffer
  *
  *	Pan (or wrap, depending on the `vmode' field) the display using the
- *  	`yoffset' field of the `var' structure (`xoffset'  not yet supported).
- *  	If the values don't fit, return -EINVAL.
+ *	`yoffset' field of the `var' structure (`xoffset'  not yet supported).
+ *	If the values don't fit, return -EINVAL.
  *
- *      Returns negative errno on error, or zero on success.
+ *	Returns negative errno on error, or zero on success.
  */
 static int
 s1d13xxxfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
@@ -390,8 +404,259 @@ s1d13xxxfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
 	return 0;
 }
 
-/* framebuffer information structures */
+/************************************************************
+ functions to handle bitblt acceleration
+ ************************************************************/
+
+/**
+ *	bltbit_wait_bitset - waits for change in register value
+ *	@info : framebuffer structure
+ *	@bit  : value expected in register
+ *	@timeout : ...
+ *
+ *	waits until value changes INTO bit
+ */
+static u8
+bltbit_wait_bitset(struct fb_info *info, u8 bit, int timeout)
+{
+	while (!(s1d13xxxfb_readreg(info->par, S1DREG_BBLT_CTL0) & bit)) {
+		udelay(10);
+		if (!--timeout) {
+			dbg_blit("wait_bitset timeout\n");
+			break;
+		}
+	}
+
+	return timeout;
+}
+
+/**
+ *	bltbit_wait_bitclear - waits for change in register value
+ *	@info : frambuffer structure
+ *	@bit  : value currently in register
+ *	@timeout : ...
+ *
+ *	waits until value changes FROM bit
+ *
+ */
+static u8
+bltbit_wait_bitclear(struct fb_info *info, u8 bit, int timeout)
+{
+	while (s1d13xxxfb_readreg(info->par, S1DREG_BBLT_CTL0) & bit) {
+		udelay(10);
+		if (!--timeout) {
+			dbg_blit("wait_bitclear timeout\n");
+			break;
+		}
+	}
+
+	return timeout;
+}
+
+/**
+ *	bltbit_fifo_status - checks the current status of the fifo
+ *	@info : framebuffer structure
+ *
+ *	returns number of free words in buffer
+ */
+static u8
+bltbit_fifo_status(struct fb_info *info)
+{
+	u8 status;
 
+	status = s1d13xxxfb_readreg(info->par, S1DREG_BBLT_CTL0);
+
+	/* its empty so room for 16 words */
+	if (status & BBLT_FIFO_EMPTY)
+		return 16;
+
+	/* its full so we dont want to add */
+	if (status & BBLT_FIFO_FULL)
+		return 0;
+
+	/* its atleast half full but we can add one atleast */
+	if (status & BBLT_FIFO_NOT_FULL)
+		return 1;
+
+	return 0;
+}
+
+/*
+ *	s1d13xxxfb_bitblt_copyarea - accelerated copyarea function
+ *	@info : framebuffer structure
+ *	@area : fb_copyarea structure
+ *
+ *	supports (atleast) S1D13506
+ *
+ */
+static void
+s1d13xxxfb_bitblt_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+	u32 dst, src;
+	u32 stride;
+	u16 reverse = 0;
+	u16 sx = area->sx, sy = area->sy;
+	u16 dx = area->dx, dy = area->dy;
+	u16 width = area->width, height = area->height;
+	u16 bpp;
+
+	spin_lock(&s1d13xxxfb_bitblt_lock);
+
+	/* bytes per xres line */
+	bpp = (info->var.bits_per_pixel >> 3);
+	stride = bpp * info->var.xres;
+
+	/* reverse, calculate the last pixel in rectangle */
+	if ((dy > sy) || ((dy == sy) && (dx >= sx))) {
+		dst = (((dy + height - 1) * stride) + (bpp * (dx + width - 1)));
+		src = (((sy + height - 1) * stride) + (bpp * (sx + width - 1)));
+		reverse = 1;
+	/* not reverse, calculate the first pixel in rectangle */
+	} else { /* (y * xres) + (bpp * x) */
+		dst = (dy * stride) + (bpp * dx);
+		src = (sy * stride) + (bpp * sx);
+	}
+
+	/* set source adress */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START0, (src & 0xff));
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START1, (src >> 8) & 0x00ff);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START2, (src >> 16) & 0x00ff);
+
+	/* set destination adress */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START0, (dst & 0xff));
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START1, (dst >> 8) & 0x00ff);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START2, (dst >> 16) & 0x00ff);
+
+	/* program height and width */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_WIDTH0, (width & 0xff) - 1);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_WIDTH1, (width >> 8));
+
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_HEIGHT0, (height & 0xff) - 1);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_HEIGHT1, (height >> 8));
+
+	/* negative direction ROP */
+	if (reverse == 1) {
+		dbg_blit("(copyarea) negative rop\n");
+		s1d13xxxfb_writereg(info->par, S1DREG_BBLT_OP, 0x03);
+	} else /* positive direction ROP */ {
+		s1d13xxxfb_writereg(info->par, S1DREG_BBLT_OP, 0x02);
+		dbg_blit("(copyarea) positive rop\n");
+	}
+
+	/* set for rectangel mode and not linear */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CTL0, 0x0);
+
+	/* setup the bpp 1 = 16bpp, 0 = 8bpp*/
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CTL1, (bpp >> 1));
+
+	/* set words per xres */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_MEM_OFF0, (stride >> 1) & 0xff);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_MEM_OFF1, (stride >> 9));
+
+	dbg_blit("(copyarea) dx=%d, dy=%d\n", dx, dy);
+	dbg_blit("(copyarea) sx=%d, sy=%d\n", sx, sy);
+	dbg_blit("(copyarea) width=%d, height=%d\n", width - 1, height - 1);
+	dbg_blit("(copyarea) stride=%d\n", stride);
+	dbg_blit("(copyarea) bpp=%d=0x0%d, mem_offset1=%d, mem_offset2=%d\n", bpp, (bpp >> 1),
+		(stride >> 1) & 0xff, stride >> 9);
+
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CC_EXP, 0x0c);
+
+	/* initialize the engine */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CTL0, 0x80);
+
+	/* wait to complete */
+	bltbit_wait_bitclear(info, 0x80, 8000);
+
+	spin_unlock(&s1d13xxxfb_bitblt_lock);
+}
+
+/**
+ *
+ *	s1d13xxxfb_bitblt_solidfill - accelerated solidfill function
+ *	@info : framebuffer structure
+ *	@rect : fb_fillrect structure
+ *
+ *	supports (atleast 13506)
+ *
+ **/
+static void
+s1d13xxxfb_bitblt_solidfill(struct fb_info *info, const struct fb_fillrect *rect)
+{
+	u32 screen_stride, dest;
+	u32 fg;
+	u16 bpp = (info->var.bits_per_pixel >> 3);
+
+	/* grab spinlock */
+	spin_lock(&s1d13xxxfb_bitblt_lock);
+
+	/* bytes per x width */
+	screen_stride = (bpp * info->var.xres);
+
+	/* bytes to starting point */
+	dest = ((rect->dy * screen_stride) + (bpp * rect->dx));
+
+	dbg_blit("(solidfill) dx=%d, dy=%d, stride=%d, dest=%d\n"
+		 "(solidfill) : rect_width=%d, rect_height=%d\n",
+				rect->dx, rect->dy, screen_stride, dest,
+				rect->width - 1, rect->height - 1);
+
+	dbg_blit("(solidfill) : xres=%d, yres=%d, bpp=%d\n",
+				info->var.xres, info->var.yres,
+				info->var.bits_per_pixel);
+	dbg_blit("(solidfill) : rop=%d\n", rect->rop);
+
+	/* We split the destination into the three registers */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START0, (dest & 0x00ff));
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START1, ((dest >> 8) & 0x00ff));
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START2, ((dest >> 16) & 0x00ff));
+
+	/* give information regarding rectangel width */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_WIDTH0, ((rect->width) & 0x00ff) - 1);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_WIDTH1, (rect->width >> 8));
+
+	/* give information regarding rectangel height */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_HEIGHT0, ((rect->height) & 0x00ff) - 1);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_HEIGHT1, (rect->height >> 8));
+
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
+		info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+		fg = ((u32 *)info->pseudo_palette)[rect->color];
+		dbg_blit("(solidfill) truecolor/directcolor\n");
+		dbg_blit("(solidfill) pseudo_palette[%d] = %d\n", rect->color, fg);
+	} else {
+		fg = rect->color;
+		dbg_blit("(solidfill) color = %d\n", rect->color);
+	}
+
+	/* set foreground color */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_FGC0, (fg & 0xff));
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_FGC1, (fg >> 8) & 0xff);
+
+	/* set rectangual region of memory (rectangle and not linear) */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CTL0, 0x0);
+
+	/* set operation mode SOLID_FILL */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_OP, BBLT_SOLID_FILL);
+
+	/* set bits per pixel (1 = 16bpp, 0 = 8bpp) */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CTL1, (info->var.bits_per_pixel >> 4));
+
+	/* set the memory offset for the bblt in word sizes */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_MEM_OFF0, (screen_stride >> 1) & 0x00ff);
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_MEM_OFF1, (screen_stride >> 9));
+
+	/* and away we go.... */
+	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_CTL0, 0x80);
+
+	/* wait until its done */
+	bltbit_wait_bitclear(info, 0x80, 8000);
+
+	/* let others play */
+	spin_unlock(&s1d13xxxfb_bitblt_lock);
+}
+
+/* framebuffer information structures */
 static struct fb_ops s1d13xxxfb_fbops = {
 	.owner		= THIS_MODULE,
 	.fb_set_par	= s1d13xxxfb_set_par,
@@ -400,7 +665,7 @@ static struct fb_ops s1d13xxxfb_fbops = {
 
 	.fb_pan_display	= s1d13xxxfb_pan_display,
 
-	/* to be replaced by any acceleration we can */
+	/* gets replaced at chip detection time */
 	.fb_fillrect	= cfb_fillrect,
 	.fb_copyarea	= cfb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
@@ -412,9 +677,9 @@ static int s1d13xxxfb_width_tab[2][4] __devinitdata = {
 };
 
 /**
- *      s1d13xxxfb_fetch_hw_state - Configure the framebuffer according to
+ *	s1d13xxxfb_fetch_hw_state - Configure the framebuffer according to
  *	hardware setup.
- *      @info: frame buffer structure
+ *	@info: frame buffer structure
  *
  *	We setup the framebuffer structures according to the current
  *	hardware setup. On some machines, the BIOS will have filled
@@ -569,7 +834,6 @@ s1d13xxxfb_probe(struct platform_device *pdev)
 	if (pdata && pdata->platform_init_video)
 		pdata->platform_init_video();
 
-
 	if (pdev->num_resources != 2) {
 		dev_err(&pdev->dev, "invalid num_resources: %i\n",
 		       pdev->num_resources);
@@ -655,16 +919,27 @@ s1d13xxxfb_probe(struct platform_device *pdev)
 
 	info->fix = s1d13xxxfb_fix;
 	info->fix.mmio_start = pdev->resource[1].start;
-	info->fix.mmio_len = pdev->resource[1].end - pdev->resource[1].start +1;
+	info->fix.mmio_len = pdev->resource[1].end - pdev->resource[1].start + 1;
 	info->fix.smem_start = pdev->resource[0].start;
-	info->fix.smem_len = pdev->resource[0].end - pdev->resource[0].start +1;
+	info->fix.smem_len = pdev->resource[0].end - pdev->resource[0].start + 1;
 
 	printk(KERN_INFO PFX "regs mapped at 0x%p, fb %d KiB mapped at 0x%p\n",
 	       default_par->regs, info->fix.smem_len / 1024, info->screen_base);
 
 	info->par = default_par;
-	info->fbops = &s1d13xxxfb_fbops;
 	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
+	info->fbops = &s1d13xxxfb_fbops;
+
+	switch(prod_id) {
+	case S1D13506_PROD_ID:	/* activate acceleration */
+		s1d13xxxfb_fbops.fb_fillrect = s1d13xxxfb_bitblt_solidfill;
+		s1d13xxxfb_fbops.fb_copyarea = s1d13xxxfb_bitblt_copyarea;
+		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN |
+			FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_COPYAREA;
+		break;
+	default:
+		break;
+	}
 
 	/* perform "manual" chip initialization, if needed */
 	if (pdata && pdata->initregs)
diff --git a/include/video/s1d13xxxfb.h b/include/video/s1d13xxxfb.h
index c3b2a2aa714..f0736cff2ca 100644
--- a/include/video/s1d13xxxfb.h
+++ b/include/video/s1d13xxxfb.h
@@ -136,6 +136,15 @@
 #define S1DREG_DELAYOFF			0xFFFE
 #define S1DREG_DELAYON			0xFFFF
 
+#define BBLT_FIFO_EMPTY			0x00
+#define BBLT_FIFO_NOT_EMPTY		0x40
+#define BBLT_FIFO_NOT_FULL		0x30
+#define BBLT_FIFO_HALF_FULL		0x20
+#define BBLT_FIFO_FULL			0x10
+
+#define BBLT_SOLID_FILL			0x0c
+
+
 /* Note: all above defines should go in separate header files
    when implementing other S1D13xxx chip support. */
 
-- 
cgit v1.2.3-70-g09d2


From 00115e6690ed5aa90530e1e41c467cd895dd18fc Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 16 Jun 2009 15:34:40 -0700
Subject: fbdev: blackfin has __raw I/O accessors, so use them in fb.h

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Krzysztof Helt <krzysztof.h1@poczta.fm>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3c5562a5216..dd68358996b 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -903,7 +903,7 @@ struct fb_info {
 #define fb_writeq sbus_writeq
 #define fb_memset sbus_memset_io
 
-#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__)
+#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__)
 
 #define fb_readb __raw_readb
 #define fb_readw __raw_readw
-- 
cgit v1.2.3-70-g09d2


From 608ba50bd0225d95469154feba8f00a6457848c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jun 2009 14:52:13 -0400
Subject: Cleanup of adfs headers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/adfs.h             | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/adfs/dir.c              |  8 -------
 fs/adfs/dir_f.c            |  8 -------
 fs/adfs/dir_fplus.c        |  8 -------
 fs/adfs/file.c             |  4 ----
 fs/adfs/inode.c            | 10 ---------
 fs/adfs/map.c              |  6 -----
 fs/adfs/super.c            | 17 ++------------
 include/linux/adfs_fs.h    | 13 -----------
 include/linux/adfs_fs_i.h  | 24 --------------------
 include/linux/adfs_fs_sb.h | 38 --------------------------------
 11 files changed, 57 insertions(+), 134 deletions(-)
 delete mode 100644 include/linux/adfs_fs_i.h
 delete mode 100644 include/linux/adfs_fs_sb.h

(limited to 'include')

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index a6665f37f45..9cc18775b83 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -1,3 +1,6 @@
+#include <linux/fs.h>
+#include <linux/adfs_fs.h>
+
 /* Internal data structures for ADFS */
 
 #define ADFS_FREE_FRAG		 0
@@ -16,6 +19,58 @@
 
 struct buffer_head;
 
+/*
+ * adfs file system inode data in memory
+ */
+struct adfs_inode_info {
+	loff_t		mmu_private;
+	unsigned long	parent_id;	/* object id of parent		*/
+	__u32		loadaddr;	/* RISC OS load address		*/
+	__u32		execaddr;	/* RISC OS exec address		*/
+	unsigned int	filetype;	/* RISC OS file type		*/
+	unsigned int	attr;		/* RISC OS permissions		*/
+	unsigned int	stamped:1;	/* RISC OS file has date/time	*/
+	struct inode vfs_inode;
+};
+
+/*
+ * Forward-declare this
+ */
+struct adfs_discmap;
+struct adfs_dir_ops;
+
+/*
+ * ADFS file system superblock data in memory
+ */
+struct adfs_sb_info {
+	struct adfs_discmap *s_map;	/* bh list containing map		 */
+	struct adfs_dir_ops *s_dir;	/* directory operations			 */
+
+	uid_t		s_uid;		/* owner uid				 */
+	gid_t		s_gid;		/* owner gid				 */
+	umode_t		s_owner_mask;	/* ADFS owner perm -> unix perm		 */
+	umode_t		s_other_mask;	/* ADFS other perm -> unix perm		 */
+
+	__u32		s_ids_per_zone;	/* max. no ids in one zone		 */
+	__u32		s_idlen;	/* length of ID in map			 */
+	__u32		s_map_size;	/* sector size of a map			 */
+	unsigned long	s_size;		/* total size (in blocks) of this fs	 */
+	signed int	s_map2blk;	/* shift left by this for map->sector	 */
+	unsigned int	s_log2sharesize;/* log2 share size			 */
+	__le32		s_version;	/* disc format version			 */
+	unsigned int	s_namelen;	/* maximum number of characters in name	 */
+};
+
+static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct adfs_inode_info *ADFS_I(struct inode *inode)
+{
+	return container_of(inode, struct adfs_inode_info, vfs_inode);
+}
+
 /*
  * Directory handling
  */
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 4d4073447d1..23aa52f548a 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -9,15 +9,7 @@
  *
  *  Common directory handling for ADFS
  */
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/adfs_fs.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/spinlock.h>
 #include <linux/smp_lock.h>
-#include <linux/buffer_head.h>		/* for file_fsync() */
-
 #include "adfs.h"
 
 /*
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index 31df6adf0de..bafc71222e2 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -9,15 +9,7 @@
  *
  *  E and F format directory handling
  */
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/adfs_fs.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/spinlock.h>
 #include <linux/buffer_head.h>
-#include <linux/string.h>
-
 #include "adfs.h"
 #include "dir_f.h"
 
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 139e0f345f1..1796bb352d0 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -7,15 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/adfs_fs.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/spinlock.h>
 #include <linux/buffer_head.h>
-#include <linux/string.h>
-
 #include "adfs.h"
 #include "dir_fplus.h"
 
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 8224d54a2af..005ea34d175 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -19,10 +19,6 @@
  *
  *  adfs regular file handling primitives           
  */
-#include <linux/fs.h>
-#include <linux/buffer_head.h>			/* for file_fsync() */
-#include <linux/adfs_fs.h>
-
 #include "adfs.h"
 
 const struct file_operations adfs_file_operations = {
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 05b3a677201..798cb071d13 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -7,17 +7,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/adfs_fs.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/mm.h>
 #include <linux/smp_lock.h>
-#include <linux/module.h>
 #include <linux/buffer_head.h>
-
 #include "adfs.h"
 
 /*
@@ -395,4 +386,3 @@ int adfs_write_inode(struct inode *inode, int wait)
 	unlock_kernel();
 	return ret;
 }
-MODULE_LICENSE("GPL");
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index 568081b93f7..d1a5932bb0f 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -7,14 +7,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/adfs_fs.h>
-#include <linux/spinlock.h>
 #include <linux/buffer_head.h>
-
 #include <asm/unaligned.h>
-
 #include "adfs.h"
 
 /*
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 0ec5aaf47aa..aad92f0a104 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -8,26 +8,12 @@
  * published by the Free Software Foundation.
  */
 #include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/adfs_fs.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/string.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
-#include <linux/vfs.h>
 #include <linux/parser.h>
-#include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <stdarg.h>
-
+#include <linux/statfs.h>
 #include "adfs.h"
 #include "dir_f.h"
 #include "dir_fplus.h"
@@ -534,3 +520,4 @@ static void __exit exit_adfs_fs(void)
 
 module_init(init_adfs_fs)
 module_exit(exit_adfs_fs)
+MODULE_LICENSE("GPL");
diff --git a/include/linux/adfs_fs.h b/include/linux/adfs_fs.h
index ef788c2085a..b19801f7389 100644
--- a/include/linux/adfs_fs.h
+++ b/include/linux/adfs_fs.h
@@ -41,8 +41,6 @@ struct adfs_discrecord {
 #define ADFS_DR_SIZE_BITS	(ADFS_DR_SIZE << 3)
 
 #ifdef __KERNEL__
-#include <linux/adfs_fs_i.h>
-#include <linux/adfs_fs_sb.h>
 /*
  * Calculate the boot block checksum on an ADFS drive.  Note that this will
  * appear to be correct if the sector contains all zeros, so also check that
@@ -60,17 +58,6 @@ static inline int adfs_checkbblk(unsigned char *ptr)
 
 	return (result & 0xff) != ptr[511];
 }
-
-static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb)
-{
-	return sb->s_fs_info;
-}
-
-static inline struct adfs_inode_info *ADFS_I(struct inode *inode)
-{
-	return container_of(inode, struct adfs_inode_info, vfs_inode);
-}
-
 #endif
 
 #endif
diff --git a/include/linux/adfs_fs_i.h b/include/linux/adfs_fs_i.h
deleted file mode 100644
index cb543034e54..00000000000
--- a/include/linux/adfs_fs_i.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  linux/include/linux/adfs_fs_i.h
- *
- * Copyright (C) 1997 Russell King
- */
-
-#ifndef _ADFS_FS_I
-#define _ADFS_FS_I
-
-/*
- * adfs file system inode data in memory
- */
-struct adfs_inode_info {
-	loff_t		mmu_private;
-	unsigned long	parent_id;	/* object id of parent		*/
-	__u32		loadaddr;	/* RISC OS load address		*/
-	__u32		execaddr;	/* RISC OS exec address		*/
-	unsigned int	filetype;	/* RISC OS file type		*/
-	unsigned int	attr;		/* RISC OS permissions		*/
-	unsigned int	stamped:1;	/* RISC OS file has date/time	*/
-	struct inode vfs_inode;
-};
-
-#endif
diff --git a/include/linux/adfs_fs_sb.h b/include/linux/adfs_fs_sb.h
deleted file mode 100644
index d9bf05c02cc..00000000000
--- a/include/linux/adfs_fs_sb.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- *  linux/include/linux/adfs_fs_sb.h
- *
- * Copyright (C) 1997-1999 Russell King
- */
-
-#ifndef _ADFS_FS_SB
-#define _ADFS_FS_SB
-
-/*
- * Forward-declare this
- */
-struct adfs_discmap;
-struct adfs_dir_ops;
-
-/*
- * ADFS file system superblock data in memory
- */
-struct adfs_sb_info {
-	struct adfs_discmap *s_map;	/* bh list containing map		 */
-	struct adfs_dir_ops *s_dir;	/* directory operations			 */
-
-	uid_t		s_uid;		/* owner uid				 */
-	gid_t		s_gid;		/* owner gid				 */
-	umode_t		s_owner_mask;	/* ADFS owner perm -> unix perm		 */
-	umode_t		s_other_mask;	/* ADFS other perm -> unix perm		 */
-
-	__u32		s_ids_per_zone;	/* max. no ids in one zone		 */
-	__u32		s_idlen;	/* length of ID in map			 */
-	__u32		s_map_size;	/* sector size of a map			 */
-	unsigned long	s_size;		/* total size (in blocks) of this fs	 */
-	signed int	s_map2blk;	/* shift left by this for map->sector	 */
-	unsigned int	s_log2sharesize;/* log2 share size			 */
-	__le32		s_version;	/* disc format version			 */
-	unsigned int	s_namelen;	/* maximum number of characters in name	 */
-};
-
-#endif
-- 
cgit v1.2.3-70-g09d2


From a42fc8f6943127787ad2a416436cf211d5531229 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 16 Jun 2009 16:56:38 +0000
Subject: skbuff.h: fix skb_dst kernel-doc

Fix kernel-doc warnings (missing + extra entries) in skbuff.h.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fa51293f270..3d289367aae 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
- *	@dst: destination entry
+ *	@_skb_dst: destination entry
  *	@sp: the security path, used for xfrm
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
  *	@len: Length of actual data
-- 
cgit v1.2.3-70-g09d2


From c564039fd83ea16a86a96d52632794b24849e507 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 16 Jun 2009 10:12:03 +0000
Subject: net: sk_wmem_alloc has initial value of one, not zero

commit 2b85a34e911bf483c27cfdd124aeb1605145dc80
(net: No more expensive sock_hold()/sock_put() on each tx)
changed initial sk_wmem_alloc value.

Some protocols check sk_wmem_alloc value to determine if a timer
must delay socket deallocation. We must take care of the sk_wmem_alloc
value being one instead of zero when no write allocations are pending.

Reported by Ingo Molnar, and full diagnostic from David Miller.

This patch introduces three helpers to get read/write allocations
and a followup patch will use these helpers to report correct
write allocations to user.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 33 +++++++++++++++++++++++++++++++++
 net/appletalk/ddp.c    |  6 ++----
 net/ax25/af_ax25.c     |  3 +--
 net/econet/af_econet.c |  6 ++----
 net/netrom/af_netrom.c |  3 +--
 net/rose/af_rose.c     |  3 +--
 net/x25/af_x25.c       |  3 +--
 7 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 010e14a93c9..570c7a12b54 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1206,6 +1206,39 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from,
 	return 0;
 }
 
+/**
+ * sk_wmem_alloc_get - returns write allocations
+ * @sk: socket
+ *
+ * Returns sk_wmem_alloc minus initial offset of one
+ */
+static inline int sk_wmem_alloc_get(const struct sock *sk)
+{
+	return atomic_read(&sk->sk_wmem_alloc) - 1;
+}
+
+/**
+ * sk_rmem_alloc_get - returns read allocations
+ * @sk: socket
+ *
+ * Returns sk_rmem_alloc
+ */
+static inline int sk_rmem_alloc_get(const struct sock *sk)
+{
+	return atomic_read(&sk->sk_rmem_alloc);
+}
+
+/**
+ * sk_has_allocations - check if allocations are outstanding
+ * @sk: socket
+ *
+ * Returns true if socket has write or read allocations
+ */
+static inline int sk_has_allocations(const struct sock *sk)
+{
+	return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
+}
+
 /*
  * 	Queue a received datagram if it will fit. Stream and sequenced
  *	protocols can't normally use this as they need to fit buffers in
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index b603cbacdc5..f7a53b219ef 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -162,8 +162,7 @@ static void atalk_destroy_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 
-	if (atomic_read(&sk->sk_wmem_alloc) ||
-	    atomic_read(&sk->sk_rmem_alloc)) {
+	if (sk_has_allocations(sk)) {
 		sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME;
 		add_timer(&sk->sk_timer);
 	} else
@@ -175,8 +174,7 @@ static inline void atalk_destroy_socket(struct sock *sk)
 	atalk_remove_socket(sk);
 	skb_queue_purge(&sk->sk_receive_queue);
 
-	if (atomic_read(&sk->sk_wmem_alloc) ||
-	    atomic_read(&sk->sk_rmem_alloc)) {
+	if (sk_has_allocations(sk)) {
 		setup_timer(&sk->sk_timer, atalk_destroy_timer,
 				(unsigned long)sk);
 		sk->sk_timer.expires	= jiffies + SOCK_DESTROY_TIME;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fd9d06f291d..61b35b95549 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -330,8 +330,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
 	}
 
 	if (ax25->sk != NULL) {
-		if (atomic_read(&ax25->sk->sk_wmem_alloc) ||
-		    atomic_read(&ax25->sk->sk_rmem_alloc)) {
+		if (sk_has_allocations(ax25->sk)) {
 			/* Defer: outstanding buffers */
 			setup_timer(&ax25->dtimer, ax25_destroy_timer,
 					(unsigned long)ax25);
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 8121bf0029e..2e1f836d424 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -540,8 +540,7 @@ static void econet_destroy_timer(unsigned long data)
 {
 	struct sock *sk=(struct sock *)data;
 
-	if (!atomic_read(&sk->sk_wmem_alloc) &&
-	    !atomic_read(&sk->sk_rmem_alloc)) {
+	if (!sk_has_allocations(sk)) {
 		sk_free(sk);
 		return;
 	}
@@ -579,8 +578,7 @@ static int econet_release(struct socket *sock)
 
 	skb_queue_purge(&sk->sk_receive_queue);
 
-	if (atomic_read(&sk->sk_rmem_alloc) ||
-	    atomic_read(&sk->sk_wmem_alloc)) {
+	if (sk_has_allocations(sk)) {
 		sk->sk_timer.data     = (unsigned long)sk;
 		sk->sk_timer.expires  = jiffies + HZ;
 		sk->sk_timer.function = econet_destroy_timer;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 3be0e016ab7..cd911904cbe 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -286,8 +286,7 @@ void nr_destroy_socket(struct sock *sk)
 		kfree_skb(skb);
 	}
 
-	if (atomic_read(&sk->sk_wmem_alloc) ||
-	    atomic_read(&sk->sk_rmem_alloc)) {
+	if (sk_has_allocations(sk)) {
 		/* Defer: outstanding buffers */
 		sk->sk_timer.function = nr_destroy_timer;
 		sk->sk_timer.expires  = jiffies + 2 * HZ;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 877a7f65f70..4dd9a7d1894 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -356,8 +356,7 @@ void rose_destroy_socket(struct sock *sk)
 		kfree_skb(skb);
 	}
 
-	if (atomic_read(&sk->sk_wmem_alloc) ||
-	    atomic_read(&sk->sk_rmem_alloc)) {
+	if (sk_has_allocations(sk)) {
 		/* Defer: outstanding buffers */
 		setup_timer(&sk->sk_timer, rose_destroy_timer,
 				(unsigned long)sk);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index c51f3095739..8cd2390b0d4 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -372,8 +372,7 @@ static void __x25_destroy_socket(struct sock *sk)
 		kfree_skb(skb);
 	}
 
-	if (atomic_read(&sk->sk_wmem_alloc) ||
-	    atomic_read(&sk->sk_rmem_alloc)) {
+	if (sk_has_allocations(sk)) {
 		/* Defer: outstanding buffers */
 		sk->sk_timer.expires  = jiffies + 10 * HZ;
 		sk->sk_timer.function = x25_destroy_timer;
-- 
cgit v1.2.3-70-g09d2


From e088a4ad7fa53c3dc3c29f930025f41ccf01953e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Fri, 22 May 2009 13:49:49 -0700
Subject: [IA64] Convert ia64 to use int-ll64.h

It is generally agreed that it would be beneficial for u64 to be an
unsigned long long on all architectures.  ia64 (in common with several
other 64-bit architectures) currently uses unsigned long.  Migrating
piecemeal is too painful; this giant patch fixes all compilation warnings
and errors that come as a result of switching to use int-ll64.h.

Note that userspace will still see __u64 defined as unsigned long.  This
is important as it affects C++ name mangling.

[Updated by Tony Luck to change efi.h:efi_freemem_callback_t to use
 u64 for start/end rather than unsigned long]

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/hp/common/sba_iommu.c         |  2 +-
 arch/ia64/include/asm/gcc_intrin.h      | 18 ++++-----
 arch/ia64/include/asm/mca.h             | 38 +++++++++---------
 arch/ia64/include/asm/meminit.h         | 18 ++++-----
 arch/ia64/include/asm/pal.h             | 24 ++++++------
 arch/ia64/include/asm/processor.h       | 56 +++++++++++++--------------
 arch/ia64/include/asm/sal.h             |  8 ++--
 arch/ia64/include/asm/sn/sn_sal.h       |  2 +-
 arch/ia64/include/asm/types.h           | 13 +++++--
 arch/ia64/kernel/efi.c                  | 10 ++---
 arch/ia64/kernel/mca.c                  | 18 ++++-----
 arch/ia64/kernel/module.c               | 14 ++++---
 arch/ia64/kernel/palinfo.c              | 68 ++++++++++++++++-----------------
 arch/ia64/kernel/pci-dma.c              |  2 +-
 arch/ia64/kernel/perfmon.c              |  6 +--
 arch/ia64/kernel/setup.c                | 32 ++++++++--------
 arch/ia64/kernel/smp.c                  |  2 +-
 arch/ia64/kernel/smpboot.c              |  2 +-
 arch/ia64/kernel/time.c                 |  2 +-
 arch/ia64/kernel/topology.c             |  4 +-
 arch/ia64/kernel/uncached.c             |  3 +-
 arch/ia64/mm/contig.c                   |  9 ++---
 arch/ia64/mm/init.c                     | 15 +++-----
 arch/ia64/mm/tlb.c                      |  4 +-
 arch/ia64/pci/pci.c                     | 12 +++---
 arch/ia64/sn/kernel/io_acpi_init.c      |  4 +-
 arch/ia64/sn/kernel/io_common.c         |  2 +-
 arch/ia64/sn/kernel/sn2/sn_hwperf.c     |  4 +-
 arch/ia64/sn/kernel/sn2/sn_proc_fs.c    |  2 +-
 arch/ia64/sn/kernel/tiocx.c             |  2 +-
 arch/ia64/sn/pci/pcibr/pcibr_provider.c |  2 +-
 arch/ia64/sn/pci/tioca_provider.c       |  6 +--
 arch/ia64/sn/pci/tioce_provider.c       |  6 +--
 drivers/char/agp/hp-agp.c               |  5 ++-
 drivers/firmware/pcdp.c                 |  4 +-
 include/linux/efi.h                     |  2 +-
 36 files changed, 212 insertions(+), 209 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 56ceb68eb99..21c04114ddd 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1787,7 +1787,7 @@ static struct ioc_iommu ioc_iommu_info[] __initdata = {
 };
 
 static struct ioc * __init
-ioc_init(u64 hpa, void *handle)
+ioc_init(unsigned long hpa, void *handle)
 {
 	struct ioc *ioc;
 	struct ioc_iommu *info;
diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h
index c2c5fd8fcac..21ddee54ada 100644
--- a/arch/ia64/include/asm/gcc_intrin.h
+++ b/arch/ia64/include/asm/gcc_intrin.h
@@ -388,7 +388,7 @@ register unsigned long ia64_r13 asm ("r13") __used;
 
 #define ia64_native_thash(addr)							\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr));	\
 	ia64_intri_res;								\
 })
@@ -419,7 +419,7 @@ register unsigned long ia64_r13 asm ("r13") __used;
 
 #define ia64_tpa(addr)								\
 ({										\
-	__u64 ia64_pa;								\
+	unsigned long ia64_pa;							\
 	asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory");	\
 	ia64_pa;								\
 })
@@ -444,35 +444,35 @@ register unsigned long ia64_r13 asm ("r13") __used;
 
 #define ia64_native_get_cpuid(index)							\
 ({											\
-	__u64 ia64_intri_res;								\
+	unsigned long ia64_intri_res;							\
 	asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index));	\
 	ia64_intri_res;									\
 })
 
 #define __ia64_get_dbr(index)							\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
 	ia64_intri_res;								\
 })
 
 #define ia64_get_ibr(index)							\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
 	ia64_intri_res;								\
 })
 
 #define ia64_get_pkr(index)							\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
 	ia64_intri_res;								\
 })
 
 #define ia64_get_pmc(index)							\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
 	ia64_intri_res;								\
 })
@@ -480,14 +480,14 @@ register unsigned long ia64_r13 asm ("r13") __used;
 
 #define ia64_native_get_pmd(index)						\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
 	ia64_intri_res;								\
 })
 
 #define ia64_native_get_rr(index)						\
 ({										\
-	__u64 ia64_intri_res;							\
+	unsigned long ia64_intri_res;						\
 	asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index));	\
 	ia64_intri_res;								\
 })
diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h
index 18a4321349a..44a0b53df90 100644
--- a/arch/ia64/include/asm/mca.h
+++ b/arch/ia64/include/asm/mca.h
@@ -72,39 +72,39 @@ typedef struct ia64_mc_info_s {
 struct ia64_sal_os_state {
 
 	/* SAL to OS */
-	u64			os_gp;			/* GP of the os registered with the SAL, physical */
-	u64			pal_proc;		/* PAL_PROC entry point, physical */
-	u64			sal_proc;		/* SAL_PROC entry point, physical */
-	u64			rv_rc;			/* MCA - Rendezvous state, INIT - reason code */
-	u64			proc_state_param;	/* from R18 */
-	u64			monarch;		/* 1 for a monarch event, 0 for a slave */
+	unsigned long		os_gp;			/* GP of the os registered with the SAL, physical */
+	unsigned long		pal_proc;		/* PAL_PROC entry point, physical */
+	unsigned long		sal_proc;		/* SAL_PROC entry point, physical */
+	unsigned long		rv_rc;			/* MCA - Rendezvous state, INIT - reason code */
+	unsigned long		proc_state_param;	/* from R18 */
+	unsigned long		monarch;		/* 1 for a monarch event, 0 for a slave */
 
 	/* common */
-	u64			sal_ra;			/* Return address in SAL, physical */
-	u64			sal_gp;			/* GP of the SAL - physical */
+	unsigned long		sal_ra;			/* Return address in SAL, physical */
+	unsigned long		sal_gp;			/* GP of the SAL - physical */
 	pal_min_state_area_t	*pal_min_state;		/* from R17.  physical in asm, virtual in C */
 	/* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK).
 	 * Note: if the MCA/INIT recovery code wants to resume to a new context
 	 * then it must change these values to reflect the new kernel stack.
 	 */
-	u64			prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
-	u64			prev_IA64_KR_CURRENT_STACK;
+	unsigned long		prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
+	unsigned long		prev_IA64_KR_CURRENT_STACK;
 	struct task_struct	*prev_task;		/* previous task, NULL if it is not useful */
 	/* Some interrupt registers are not saved in minstate, pt_regs or
 	 * switch_stack.  Because MCA/INIT can occur when interrupts are
 	 * disabled, we need to save the additional interrupt registers over
 	 * MCA/INIT and resume.
 	 */
-	u64			isr;
-	u64			ifa;
-	u64			itir;
-	u64			iipa;
-	u64			iim;
-	u64			iha;
+	unsigned long		isr;
+	unsigned long		ifa;
+	unsigned long		itir;
+	unsigned long		iipa;
+	unsigned long		iim;
+	unsigned long		iha;
 
 	/* OS to SAL */
-	u64			os_status;		/* OS status to SAL, enum below */
-	u64			context;		/* 0 if return to same context
+	unsigned long		os_status;		/* OS status to SAL, enum below */
+	unsigned long		context;		/* 0 if return to same context
 							   1 if return to new context */
 };
 
@@ -150,7 +150,7 @@ extern void ia64_slave_init_handler(void);
 extern void ia64_mca_cmc_vector_setup(void);
 extern int  ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *));
 extern void ia64_unreg_MCA_extension(void);
-extern u64 ia64_get_rnat(u64 *);
+extern unsigned long ia64_get_rnat(unsigned long *);
 extern void ia64_mca_printk(const char * fmt, ...)
 	 __attribute__ ((format (printf, 1, 2)));
 
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index c0cea375620..688a812c017 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -25,8 +25,8 @@
 #define IA64_MAX_RSVD_REGIONS 9
 
 struct rsvd_region {
-	unsigned long start;	/* virtual address of beginning of element */
-	unsigned long end;	/* virtual address of end of element + 1 */
+	u64 start;	/* virtual address of beginning of element */
+	u64 end;	/* virtual address of end of element + 1 */
 };
 
 extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
@@ -35,13 +35,13 @@ extern int num_rsvd_regions;
 extern void find_memory (void);
 extern void reserve_memory (void);
 extern void find_initrd (void);
-extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
-extern int filter_memory (unsigned long start, unsigned long end, void *arg);
-extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
-extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
+extern int filter_rsvd_memory (u64 start, u64 end, void *arg);
+extern int filter_memory (u64 start, u64 end, void *arg);
+extern unsigned long efi_memmap_init(u64 *s, u64 *e);
+extern int find_max_min_low_pfn (u64, u64, void *);
 
 extern unsigned long vmcore_find_descriptor_size(unsigned long address);
-extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end);
+extern int reserve_elfcorehdr(u64 *start, u64 *end);
 
 /*
  * For rounding an address to the next IA64_GRANULE_SIZE or order
@@ -63,8 +63,8 @@ extern int register_active_ranges(u64 start, u64 len, int nid);
 # define LARGE_GAP	0x40000000 /* Use virtual mem map if hole is > than this */
   extern unsigned long vmalloc_end;
   extern struct page *vmem_map;
-  extern int find_largest_hole (u64 start, u64 end, void *arg);
-  extern int create_mem_map_page_table (u64 start, u64 end, void *arg);
+  extern int find_largest_hole(u64 start, u64 end, void *arg);
+  extern int create_mem_map_page_table(u64 start, u64 end, void *arg);
   extern int vmemmap_find_next_valid_pfn(int, int);
 #else
 static inline int vmemmap_find_next_valid_pfn(int node, int i)
diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h
index 67b02901ead..6a292505b39 100644
--- a/arch/ia64/include/asm/pal.h
+++ b/arch/ia64/include/asm/pal.h
@@ -989,8 +989,8 @@ ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr)
 }
 
 /* Return summary information about the hierarchy of caches controlled by the processor */
-static inline s64
-ia64_pal_cache_summary (u64 *cache_levels, u64 *unique_caches)
+static inline long ia64_pal_cache_summary(unsigned long *cache_levels,
+						unsigned long *unique_caches)
 {
 	struct ia64_pal_retval iprv;
 	PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0);
@@ -1038,8 +1038,8 @@ ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc
 }
 
 /* Return the number of instruction and data debug register pairs */
-static inline s64
-ia64_pal_debug_info (u64 *inst_regs,  u64 *data_regs)
+static inline long ia64_pal_debug_info(unsigned long *inst_regs,
+						unsigned long *data_regs)
 {
 	struct ia64_pal_retval iprv;
 	PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0);
@@ -1074,8 +1074,7 @@ ia64_pal_fixed_addr (u64 *global_unique_addr)
 }
 
 /* Get base frequency of the platform if generated by the processor */
-static inline s64
-ia64_pal_freq_base (u64 *platform_base_freq)
+static inline long ia64_pal_freq_base(unsigned long *platform_base_freq)
 {
 	struct ia64_pal_retval iprv;
 	PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0);
@@ -1437,7 +1436,7 @@ ia64_pal_proc_set_features (u64 feature_select)
  * possible.
  */
 typedef struct ia64_ptce_info_s {
-	u64		base;
+	unsigned long	base;
 	u32		count[2];
 	u32		stride[2];
 } ia64_ptce_info_t;
@@ -1478,9 +1477,9 @@ ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2)
 }
 
 typedef union pal_hints_u {
-	u64			ph_data;
+	unsigned long		ph_data;
 	struct {
-	       u64		si		: 1,
+	       unsigned long	si		: 1,
 				li		: 1,
 				reserved	: 62;
 	} pal_hints_s;
@@ -1489,8 +1488,8 @@ typedef union pal_hints_u {
 /* Return information about the register stack and RSE for this processor
  * implementation.
  */
-static inline s64
-ia64_pal_rse_info (u64 *num_phys_stacked, pal_hints_u_t *hints)
+static inline long ia64_pal_rse_info(unsigned long *num_phys_stacked,
+							pal_hints_u_t *hints)
 {
 	struct ia64_pal_retval iprv;
 	PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0);
@@ -1608,8 +1607,7 @@ ia64_pal_vm_info (u64 tc_level, u64 tc_type,  pal_tc_info_u_t *tc_info, u64 *tc_
 /* Get page size information about the virtual memory characteristics of the processor
  * implementation.
  */
-static inline s64
-ia64_pal_vm_page_size (u64 *tr_pages, u64 *vw_pages)
+static inline s64 ia64_pal_vm_page_size(u64 *tr_pages, u64 *vw_pages)
 {
 	struct ia64_pal_retval iprv;
 	PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0);
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index f88fa054d01..3eaeedf1aef 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -187,40 +187,40 @@ union  ia64_rr {
  * state comes earlier:
  */
 struct cpuinfo_ia64 {
-	__u32 softirq_pending;
-	__u64 itm_delta;	/* # of clock cycles between clock ticks */
-	__u64 itm_next;		/* interval timer mask value to use for next clock tick */
-	__u64 nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
-	__u64 unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
-	__u64 unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
-	__u64 itc_freq;		/* frequency of ITC counter */
-	__u64 proc_freq;	/* frequency of processor */
-	__u64 cyc_per_usec;	/* itc_freq/1000000 */
-	__u64 ptce_base;
-	__u32 ptce_count[2];
-	__u32 ptce_stride[2];
+	unsigned int softirq_pending;
+	unsigned long itm_delta;	/* # of clock cycles between clock ticks */
+	unsigned long itm_next;		/* interval timer mask value to use for next clock tick */
+	unsigned long nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
+	unsigned long unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
+	unsigned long unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
+	unsigned long itc_freq;		/* frequency of ITC counter */
+	unsigned long proc_freq;	/* frequency of processor */
+	unsigned long cyc_per_usec;	/* itc_freq/1000000 */
+	unsigned long ptce_base;
+	unsigned int ptce_count[2];
+	unsigned int ptce_stride[2];
 	struct task_struct *ksoftirqd;	/* kernel softirq daemon for this CPU */
 
 #ifdef CONFIG_SMP
-	__u64 loops_per_jiffy;
+	unsigned long loops_per_jiffy;
 	int cpu;
-	__u32 socket_id;	/* physical processor socket id */
-	__u16 core_id;		/* core id */
-	__u16 thread_id;	/* thread id */
-	__u16 num_log;		/* Total number of logical processors on
+	unsigned int socket_id;	/* physical processor socket id */
+	unsigned short core_id;	/* core id */
+	unsigned short thread_id; /* thread id */
+	unsigned short num_log;	/* Total number of logical processors on
 				 * this socket that were successfully booted */
-	__u8  cores_per_socket;	/* Cores per processor socket */
-	__u8  threads_per_core;	/* Threads per core */
+	unsigned char cores_per_socket;	/* Cores per processor socket */
+	unsigned char threads_per_core;	/* Threads per core */
 #endif
 
 	/* CPUID-derived information: */
-	__u64 ppn;
-	__u64 features;
-	__u8 number;
-	__u8 revision;
-	__u8 model;
-	__u8 family;
-	__u8 archrev;
+	unsigned long ppn;
+	unsigned long features;
+	unsigned char number;
+	unsigned char revision;
+	unsigned char model;
+	unsigned char family;
+	unsigned char archrev;
 	char vendor[16];
 	char *model_name;
 
@@ -329,8 +329,8 @@ struct thread_struct {
 #else
 # define INIT_THREAD_PM
 #endif
-	__u64 dbr[IA64_NUM_DBG_REGS];
-	__u64 ibr[IA64_NUM_DBG_REGS];
+	unsigned long dbr[IA64_NUM_DBG_REGS];
+	unsigned long ibr[IA64_NUM_DBG_REGS];
 	struct ia64_fpreg fph[96];	/* saved/loaded on demand */
 };
 
diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h
index 966797a97c9..d19ddba4e32 100644
--- a/arch/ia64/include/asm/sal.h
+++ b/arch/ia64/include/asm/sal.h
@@ -106,10 +106,10 @@ struct ia64_sal_retval {
 	 * informational value should be printed (e.g., "reboot for
 	 * change to take effect").
 	 */
-	s64 status;
-	u64 v0;
-	u64 v1;
-	u64 v2;
+	long status;
+	unsigned long v0;
+	unsigned long v1;
+	unsigned long v2;
 };
 
 typedef struct ia64_sal_retval (*ia64_sal_handler) (u64, ...);
diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h
index e310fc0135d..1f5ff470a5a 100644
--- a/arch/ia64/include/asm/sn/sn_sal.h
+++ b/arch/ia64/include/asm/sn/sn_sal.h
@@ -929,7 +929,7 @@ ia64_sn_sysctl_tio_clock_reset(nasid_t nasid)
 /*
  * Get the associated ioboard type for a given nasid.
  */
-static inline s64
+static inline long
 ia64_sn_sysctl_ioboard_get(nasid_t nasid, u16 *ioboard)
 {
 	struct ia64_sal_retval isrv;
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
index fbf1ed3b44c..bcd260e597d 100644
--- a/arch/ia64/include/asm/types.h
+++ b/arch/ia64/include/asm/types.h
@@ -2,10 +2,11 @@
 #define _ASM_IA64_TYPES_H
 
 /*
- * This file is never included by application software unless explicitly requested (e.g.,
- * via linux/types.h) in which case the application is Linux specific so (user-) name
- * space pollution is not a major issue.  However, for interoperability, libraries still
- * need to be careful to avoid a name clashes.
+ * This file is never included by application software unless explicitly
+ * requested (e.g., via linux/types.h) in which case the application is
+ * Linux specific so (user-) name space pollution is not a major issue.
+ * However, for interoperability, libraries still need to be careful to
+ * avoid naming clashes.
  *
  * Based on <asm-alpha/types.h>.
  *
@@ -13,7 +14,11 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
+#ifdef __KERNEL__
+#include <asm-generic/int-ll64.h>
+#else
 #include <asm-generic/int-l64.h>
+#endif
 
 #ifdef __ASSEMBLY__
 # define __IA64_UL(x)		(x)
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 7ef80e8161c..c745d0aeb6e 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -46,7 +46,7 @@ extern efi_status_t efi_call_phys (void *, ...);
 struct efi efi;
 EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
+static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
 #define efi_call_virt(f, args...)	(*(f))(args)
 
@@ -356,7 +356,7 @@ efi_get_pal_addr (void)
 
 		if (++pal_code_count > 1) {
 			printk(KERN_ERR "Too many EFI Pal Code memory ranges, "
-			       "dropped @ %lx\n", md->phys_addr);
+			       "dropped @ %llx\n", md->phys_addr);
 			continue;
 		}
 		/*
@@ -490,10 +490,10 @@ efi_init (void)
 		}
 	}
 	if (min_addr != 0UL)
-		printk(KERN_INFO "Ignoring memory below %luMB\n",
+		printk(KERN_INFO "Ignoring memory below %lluMB\n",
 		       min_addr >> 20);
 	if (max_addr != ~0UL)
-		printk(KERN_INFO "Ignoring memory above %luMB\n",
+		printk(KERN_INFO "Ignoring memory above %lluMB\n",
 		       max_addr >> 20);
 
 	efi.systab = __va(ia64_boot_param->efi_systab);
@@ -1066,7 +1066,7 @@ find_memmap_space (void)
  * parts exist, and are WB.
  */
 unsigned long
-efi_memmap_init(unsigned long *s, unsigned long *e)
+efi_memmap_init(u64 *s, u64 *e)
 {
 	struct kern_memdesc *k, *prev = NULL;
 	u64	contig_low=0, contig_high=0;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 1cce4ceb48a..c259b9467fc 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -850,7 +850,7 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension);
 
 
 static inline void
-copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
+copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat)
 {
 	u64 fslot, tslot, nat;
 	*tr = *fr;
@@ -914,9 +914,9 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	struct switch_stack *old_sw;
 	unsigned size = sizeof(struct pt_regs) +
 			sizeof(struct switch_stack) + 16;
-	u64 *old_bspstore, *old_bsp;
-	u64 *new_bspstore, *new_bsp;
-	u64 old_unat, old_rnat, new_rnat, nat;
+	unsigned long *old_bspstore, *old_bsp;
+	unsigned long *new_bspstore, *new_bsp;
+	unsigned long old_unat, old_rnat, new_rnat, nat;
 	u64 slots, loadrs = regs->loadrs;
 	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
 	u64 ar_bspstore = regs->ar_bspstore;
@@ -968,10 +968,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	 * loadrs for the new stack and save it in the new pt_regs, where
 	 * ia64_old_stack() can get it.
 	 */
-	old_bspstore = (u64 *)ar_bspstore;
-	old_bsp = (u64 *)ar_bsp;
+	old_bspstore = (unsigned long *)ar_bspstore;
+	old_bsp = (unsigned long *)ar_bsp;
 	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
-	new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET);
+	new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET);
 	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
 	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
 
@@ -1918,9 +1918,9 @@ ia64_mca_init(void)
 	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
 	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
 	int i;
-	s64 rc;
+	long rc;
 	struct ia64_sal_retval isrv;
-	u64 timeout = IA64_MCA_RENDEZ_TIMEOUT;	/* platform specific */
+	unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
 	static struct notifier_block default_init_monarch_nb = {
 		.notifier_call = default_monarch_init_process,
 		.priority = 0/* we need to notified last */
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index da3b0cf495a..1481b0a28ca 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -171,7 +171,8 @@ apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
 		return 0;
 	}
 	if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
-		printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val);
+		printk(KERN_ERR "%s: value %ld out of IMM60 range\n",
+			mod->name, (long) val);
 		return 0;
 	}
 	ia64_patch_imm60((u64) insn, val);
@@ -182,7 +183,8 @@ static int
 apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
 {
 	if (val + (1 << 21) >= (1 << 22)) {
-		printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val);
+		printk(KERN_ERR "%s: value %li out of IMM22 range\n",
+			mod->name, (long)val);
 		return 0;
 	}
 	ia64_patch((u64) insn, 0x01fffcfe000UL, (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
@@ -196,7 +198,8 @@ static int
 apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
 {
 	if (val + (1 << 20) >= (1 << 21)) {
-		printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val);
+		printk(KERN_ERR "%s: value %li out of IMM21b range\n",
+			mod->name, (long)val);
 		return 0;
 	}
 	ia64_patch((u64) insn, 0x11ffffe000UL, (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
@@ -701,8 +704,9 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
 	      case RV_PCREL2:
 		if (r_type == R_IA64_PCREL21BI) {
 			if (!is_internal(mod, val)) {
-				printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n",
-				       __func__, reloc_name[r_type], val);
+				printk(KERN_ERR "%s: %s reloc against "
+					"non-local symbol (%lx)\n", __func__,
+					reloc_name[r_type], (unsigned long)val);
 				return -ENOEXEC;
 			}
 			format = RF_INSN21B;
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index a4f19c70aad..fdf6f9d013e 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -218,10 +218,10 @@ static int
 cache_info(char *page)
 {
 	char *p = page;
-	u64 i, levels, unique_caches;
+	unsigned long i, levels, unique_caches;
 	pal_cache_config_info_t cci;
 	int j, k;
-	s64 status;
+	long status;
 
 	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
 		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
@@ -303,7 +303,7 @@ vm_info(char *page)
 	ia64_ptce_info_t ptce;
 	const char *sep;
 	int i, j;
-	s64 status;
+	long status;
 
 	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
 		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
@@ -431,9 +431,9 @@ register_info(char *page)
 	char *p = page;
 	u64 reg_info[2];
 	u64 info;
-	u64 phys_stacked;
+	unsigned long phys_stacked;
 	pal_hints_u_t hints;
-	u64 iregs, dregs;
+	unsigned long iregs, dregs;
 	char *info_type[]={
 		"Implemented AR(s)",
 		"AR(s) with read side-effects",
@@ -530,8 +530,8 @@ static char **proc_features[]={
 	NULL, NULL, NULL, NULL,
 };
 
-static char *
-feature_set_info(char *page, u64 avail, u64 status, u64 control, u64 set)
+static char * feature_set_info(char *page, u64 avail, u64 status, u64 control,
+							unsigned long set)
 {
 	char *p = page;
 	char **vf, **v;
@@ -714,7 +714,7 @@ frequency_info(char *page)
 {
 	char *p = page;
 	struct pal_freq_ratio proc, itc, bus;
-	u64 base;
+	unsigned long base;
 
 	if (ia64_pal_freq_base(&base) == -1)
 		p += sprintf(p, "Output clock            : not implemented\n");
@@ -736,43 +736,43 @@ static int
 tr_info(char *page)
 {
 	char *p = page;
-	s64 status;
+	long status;
 	pal_tr_valid_u_t tr_valid;
 	u64 tr_buffer[4];
 	pal_vm_info_1_u_t vm_info_1;
 	pal_vm_info_2_u_t vm_info_2;
-	u64 i, j;
-	u64 max[3], pgm;
+	unsigned long i, j;
+	unsigned long max[3], pgm;
 	struct ifa_reg {
-		u64 valid:1;
-		u64 ig:11;
-		u64 vpn:52;
+		unsigned long valid:1;
+		unsigned long ig:11;
+		unsigned long vpn:52;
 	} *ifa_reg;
 	struct itir_reg {
-		u64 rv1:2;
-		u64 ps:6;
-		u64 key:24;
-		u64 rv2:32;
+		unsigned long rv1:2;
+		unsigned long ps:6;
+		unsigned long key:24;
+		unsigned long rv2:32;
 	} *itir_reg;
 	struct gr_reg {
-		u64 p:1;
-		u64 rv1:1;
-		u64 ma:3;
-		u64 a:1;
-		u64 d:1;
-		u64 pl:2;
-		u64 ar:3;
-		u64 ppn:38;
-		u64 rv2:2;
-		u64 ed:1;
-		u64 ig:11;
+		unsigned long p:1;
+		unsigned long rv1:1;
+		unsigned long ma:3;
+		unsigned long a:1;
+		unsigned long d:1;
+		unsigned long pl:2;
+		unsigned long ar:3;
+		unsigned long ppn:38;
+		unsigned long rv2:2;
+		unsigned long ed:1;
+		unsigned long ig:11;
 	} *gr_reg;
 	struct rid_reg {
-		u64 ig1:1;
-		u64 rv1:1;
-		u64 ig2:6;
-		u64 rid:24;
-		u64 rv2:32;
+		unsigned long ig1:1;
+		unsigned long rv1:1;
+		unsigned long ig2:6;
+		unsigned long rid:24;
+		unsigned long rv2:32;
 	} *rid_reg;
 
 	if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index eb987386f69..1376da45fd0 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -91,7 +91,7 @@ int iommu_dma_supported(struct device *dev, u64 mask)
 	   type. Normally this doesn't make any difference, but gives
 	   more gentle handling of IOMMU overflow. */
 	if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
-		dev_info(dev, "Force SAC with mask %lx\n", mask);
+		dev_info(dev, "Force SAC with mask %llx\n", mask);
 		return 0;
 	}
 
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 8a06dc48059..89ad0bbb861 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -312,7 +312,7 @@ typedef struct pfm_context {
 	unsigned long		th_pmcs[PFM_NUM_PMC_REGS];	/* PMC thread save state */
 	unsigned long		th_pmds[PFM_NUM_PMD_REGS];	/* PMD thread save state */
 
-	u64			ctx_saved_psr_up;	/* only contains psr.up value */
+	unsigned long		ctx_saved_psr_up;	/* only contains psr.up value */
 
 	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
 	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
@@ -5213,8 +5213,8 @@ pfm_end_notify_user(pfm_context_t *ctx)
  * main overflow processing routine.
  * it can be called from the interrupt path or explicitly during the context switch code
  */
-static void
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
+static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx,
+				unsigned long pmc0, struct pt_regs *regs)
 {
 	pfm_ovfl_arg_t *ovfl_arg;
 	unsigned long mask;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 714066aeda7..1b23ec126b6 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -151,9 +151,9 @@ int num_rsvd_regions __initdata;
  * This routine does not assume the incoming segments are sorted.
  */
 int __init
-filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+filter_rsvd_memory (u64 start, u64 end, void *arg)
 {
-	unsigned long range_start, range_end, prev_start;
+	u64 range_start, range_end, prev_start;
 	void (*func)(unsigned long, unsigned long, int);
 	int i;
 
@@ -191,7 +191,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
  * are not filtered out.
  */
 int __init
-filter_memory(unsigned long start, unsigned long end, void *arg)
+filter_memory(u64 start, u64 end, void *arg)
 {
 	void (*func)(unsigned long, unsigned long, int);
 
@@ -397,7 +397,7 @@ find_initrd (void)
 		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
 		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
 
-		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n",
 		       initrd_start, ia64_boot_param->initrd_size);
 	}
 #endif
@@ -505,9 +505,9 @@ static int __init parse_elfcorehdr(char *arg)
 }
 early_param("elfcorehdr", parse_elfcorehdr);
 
-int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
+int __init reserve_elfcorehdr(u64 *start, u64 *end)
 {
-	unsigned long length;
+	u64 length;
 
 	/* We get the address using the kernel command line,
 	 * but the size is extracted from the EFI tables.
@@ -588,7 +588,7 @@ setup_arch (char **cmdline_p)
 	ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
 #else
 	{
-		u64 num_phys_stacked;
+		unsigned long num_phys_stacked;
 
 		if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96)
 			ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
@@ -872,9 +872,9 @@ static void __cpuinit
 get_cache_info(void)
 {
 	unsigned long line_size, max = 1;
-	u64 l, levels, unique_caches;
-        pal_cache_config_info_t cci;
-        s64 status;
+	unsigned long l, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	long status;
 
         status = ia64_pal_cache_summary(&levels, &unique_caches);
         if (status != 0) {
@@ -892,9 +892,9 @@ get_cache_info(void)
 		/* cache_type (data_or_unified)=2 */
 		status = ia64_pal_cache_config_info(l, 2, &cci);
 		if (status != 0) {
-			printk(KERN_ERR
-			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
-			       __func__, l, status);
+			printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+				"(l=%lu, 2) failed (status=%ld)\n",
+				__func__, l, status);
 			max = SMP_CACHE_BYTES;
 			/* The safest setup for "flush_icache_range()" */
 			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
@@ -914,10 +914,10 @@ get_cache_info(void)
 			/* cache_type (instruction)=1*/
 			status = ia64_pal_cache_config_info(l, 1, &cci);
 			if (status != 0) {
-				printk(KERN_ERR
-				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
+				printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+					"(l=%lu, 1) failed (status=%ld)\n",
 					__func__, l, status);
-				/* The safest setup for "flush_icache_range()" */
+				/* The safest setup for flush_icache_range() */
 				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
 			}
 		}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 5230eaafd83..f0c521b0ba4 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -66,7 +66,7 @@ static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cachelin
 #define IPI_KDUMP_CPU_STOP	3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
-static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation);
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation);
 
 extern void cpu_halt (void);
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 2a70af473b3..de100aa7ff0 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -865,7 +865,7 @@ init_smp_config(void)
 void __devinit
 identify_siblings(struct cpuinfo_ia64 *c)
 {
-	s64 status;
+	long status;
 	u16 pltid;
 	pal_logical_to_physical_t info;
 
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 604c1a35db3..4990495d753 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -385,7 +385,7 @@ ia64_init_itm (void)
 
 static cycle_t itc_get_cycles(struct clocksource *cs)
 {
-	u64 lcycle, now, ret;
+	unsigned long lcycle, now, ret;
 
 	if (!itc_jitter_data.itc_jitter)
 		return get_cycles();
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index a8d61a3e9a9..bc80dff1df7 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -306,10 +306,10 @@ static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
 
 static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
 {
-	u64 i, levels, unique_caches;
+	unsigned long i, levels, unique_caches;
 	pal_cache_config_info_t cci;
 	int j;
-	s64 status;
+	long status;
 	struct cache_info *this_cache;
 	int num_cache_leaves = 0;
 
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 8eff8c1d40a..e6ac3c332d1 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -249,8 +249,7 @@ EXPORT_SYMBOL(uncached_free_page);
  * Called at boot time to build a map of pages that can be used for
  * memory special operations.
  */
-static int __init uncached_build_memmap(unsigned long uc_start,
-					unsigned long uc_end, void *arg)
+static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg)
 {
 	int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET);
 	struct gen_pool *pool = uncached_pools[nid].pool;
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 0ee085efbe2..2f724d2bf29 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -107,10 +107,10 @@ unsigned long bootmap_start;
  * bootmap_start.  This address must be page-aligned.
  */
 static int __init
-find_bootmap_location (unsigned long start, unsigned long end, void *arg)
+find_bootmap_location (u64 start, u64 end, void *arg)
 {
-	unsigned long needed = *(unsigned long *)arg;
-	unsigned long range_start, range_end, free_start;
+	u64 needed = *(unsigned long *)arg;
+	u64 range_start, range_end, free_start;
 	int i;
 
 #if IGNORE_PFN0
@@ -229,8 +229,7 @@ find_memory (void)
 	alloc_per_cpu_data();
 }
 
-static int
-count_pages (u64 start, u64 end, void *arg)
+static int count_pages(u64 start, u64 end, void *arg)
 {
 	unsigned long *count = arg;
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index c0f3bee6904..b115b3bbf04 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -422,8 +422,7 @@ retry_pte:
 	return hole_next_pfn - pgdat->node_start_pfn;
 }
 
-int __init
-create_mem_map_page_table (u64 start, u64 end, void *arg)
+int __init create_mem_map_page_table(u64 start, u64 end, void *arg)
 {
 	unsigned long address, start_page, end_page;
 	struct page *map_start, *map_end;
@@ -469,7 +468,7 @@ struct memmap_init_callback_data {
 };
 
 static int __meminit
-virtual_memmap_init (u64 start, u64 end, void *arg)
+virtual_memmap_init(u64 start, u64 end, void *arg)
 {
 	struct memmap_init_callback_data *args;
 	struct page *map_start, *map_end;
@@ -531,8 +530,7 @@ ia64_pfn_valid (unsigned long pfn)
 }
 EXPORT_SYMBOL(ia64_pfn_valid);
 
-int __init
-find_largest_hole (u64 start, u64 end, void *arg)
+int __init find_largest_hole(u64 start, u64 end, void *arg)
 {
 	u64 *max_gap = arg;
 
@@ -548,8 +546,7 @@ find_largest_hole (u64 start, u64 end, void *arg)
 
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
-int __init
-register_active_ranges(u64 start, u64 len, int nid)
+int __init register_active_ranges(u64 start, u64 len, int nid)
 {
 	u64 end = start + len;
 
@@ -567,7 +564,7 @@ register_active_ranges(u64 start, u64 len, int nid)
 }
 
 static int __init
-count_reserved_pages (u64 start, u64 end, void *arg)
+count_reserved_pages(u64 start, u64 end, void *arg)
 {
 	unsigned long num_reserved = 0;
 	unsigned long *count = arg;
@@ -580,7 +577,7 @@ count_reserved_pages (u64 start, u64 end, void *arg)
 }
 
 int
-find_max_min_low_pfn (unsigned long start, unsigned long end, void *arg)
+find_max_min_low_pfn (u64 start, u64 end, void *arg)
 {
 	unsigned long pfn_start, pfn_end;
 #ifdef CONFIG_FLATMEM
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index b9f3d7bbb33..f426dc78d95 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -34,7 +34,7 @@
 #include <asm/tlb.h>
 
 static struct {
-	unsigned long mask;	/* mask of supported purge page-sizes */
+	u64 mask;		/* mask of supported purge page-sizes */
 	unsigned long max_bits;	/* log2 of largest supported purge page-size */
 } purge;
 
@@ -328,7 +328,7 @@ void __devinit
 ia64_tlb_init (void)
 {
 	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
-	unsigned long tr_pgbits;
+	u64 tr_pgbits;
 	long status;
 	pal_vm_info_1_u_t vm_info_1;
 	pal_vm_info_2_u_t vm_info_2;
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 61f1af5c23c..e643373e470 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -163,7 +163,7 @@ add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
 {
 	struct resource *resource;
 	char *name;
-	u64 base, min, max, base_port;
+	unsigned long base, min, max, base_port;
 	unsigned int sparse = 0, space_nr, len;
 
 	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
@@ -292,7 +292,7 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
 	window->offset = offset;
 
 	if (insert_resource(root, &window->resource)) {
-		printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
+		printk(KERN_ERR "alloc 0x%llx-0x%llx from %s for %s failed\n",
 			window->resource.start, window->resource.end,
 			root->name, info->name);
 	}
@@ -314,8 +314,8 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
 		    (res->end - res->start < 16))
 			continue;
 		if (j >= PCI_BUS_NUM_RESOURCES) {
-			printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
-					res->end, res->flags);
+			printk("Ignoring range [%#llx-%#llx] (%lx)\n",
+					res->start, res->end, res->flags);
 			continue;
 		}
 		bus->resource[j++] = res;
@@ -728,8 +728,8 @@ extern u8 pci_cache_line_size;
  */
 static void __init set_pci_cacheline_size(void)
 {
-	u64 levels, unique_caches;
-	s64 status;
+	unsigned long levels, unique_caches;
+	long status;
 	pal_cache_config_info_t cci;
 
 	status = ia64_pal_cache_summary(&levels, &unique_caches);
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index d0223abbbbd..fd50ff94302 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -40,7 +40,7 @@ struct sn_pcidev_match {
 /*
  * Perform the early IO init in PROM.
  */
-static s64
+static long
 sal_ioif_init(u64 *result)
 {
 	struct ia64_sal_retval isrv = {0,0,0,0};
@@ -492,7 +492,7 @@ void __init
 sn_io_acpi_init(void)
 {
 	u64 result;
-	s64 status;
+	long status;
 
 	/* SN Altix does not follow the IOSAPIC IRQ routing model */
 	acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 57f280dd9de..76645cf6ac5 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -342,7 +342,7 @@ sn_common_bus_fixup(struct pci_bus *bus,
 		struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
 
 		printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u "
-		       "L_IO=%lx L_MEM=%lx BASE=%lx\n",
+		       "L_IO=%llx L_MEM=%llx BASE=%llx\n",
 		       b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
 		       b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
 		printk(KERN_WARNING "on node %d but only %d nodes online."
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 9e6491cf72b..4c7e7479095 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -414,7 +414,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
 		}
 		seq_printf(s, "partition %u %s local "
 			"shubtype %s, "
-			"nasid_mask 0x%016lx, "
+			"nasid_mask 0x%016llx, "
 			"nasid_bits %d:%d, "
 			"system_size %d, "
 			"sharing_size %d, "
@@ -683,7 +683,7 @@ static int sn_hwperf_map_err(int hwperf_err)
  * ioctl for "sn_hwperf" misc device
  */
 static int
-sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
+sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, unsigned long arg)
 {
 	struct sn_hwperf_ioctl_args a;
 	struct cpuinfo_ia64 *cdata;
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 2526e5c783a..c76d8dc3aea 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -36,7 +36,7 @@ static int system_serial_number_open(struct inode *inode, struct file *file)
 
 static int licenseID_show(struct seq_file *s, void *p)
 {
-	seq_printf(s, "0x%lx\n", sn_partition_serial_number_val());
+	seq_printf(s, "0x%llx\n", sn_partition_serial_number_val());
 	return 0;
 }
 
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 3f864238566..c1bd1cfda32 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -368,7 +368,7 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
 static int is_fpga_tio(int nasid, int *bt)
 {
 	u16 uninitialized_var(ioboard_type);	/* GCC be quiet */
-	s64 rc;
+	long rc;
 
 	rc = ia64_sn_sysctl_ioboard_get(nasid, &ioboard_type);
 	if (rc) {
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 2c676cc0541..d13e5a22a55 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -79,7 +79,7 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
 
 u16 sn_ioboard_to_pci_bus(struct pci_bus *pci_bus)
 {
-	s64 rc;
+	long rc;
 	u16 uninitialized_var(ioboard);		/* GCC be quiet */
 	nasid_t nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base);
 
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 79165122501..35b2a27d2e7 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -123,7 +123,7 @@ tioca_gart_init(struct tioca_kernel *tioca_kern)
 
 	if (!tmp) {
 		printk(KERN_ERR "%s:  Could not allocate "
-		       "%lu bytes (order %d) for GART\n",
+		       "%llu bytes (order %d) for GART\n",
 		       __func__,
 		       tioca_kern->ca_gart_size,
 		       get_order(tioca_kern->ca_gart_size));
@@ -348,7 +348,7 @@ tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
 	agp_dma_extn = __sn_readq_relaxed(&ca_base->ca_agp_dma_addr_extn);
 	if (node_upper != (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT)) {
 		printk(KERN_ERR "%s:  coretalk upper node (%u) "
-		       "mismatch with ca_agp_dma_addr_extn (%lu)\n",
+		       "mismatch with ca_agp_dma_addr_extn (%llu)\n",
 		       __func__,
 		       node_upper, (agp_dma_extn >> CA_AGP_DMA_NODE_ID_SHFT));
 		return 0;
@@ -367,7 +367,7 @@ tioca_dma_d48(struct pci_dev *pdev, u64 paddr)
  * dma_addr_t is guaranteed to be contiguous in CA bus space.
  */
 static dma_addr_t
-tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size)
+tioca_dma_mapped(struct pci_dev *pdev, unsigned long paddr, size_t req_size)
 {
 	int i, ps, ps_shift, entry, entries, mapsize, last_entry;
 	u64 xio_addr, end_xio_addr;
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 94e584527f4..012f3b82ee5 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -493,7 +493,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
 
 		if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) {
 			printk(KERN_WARNING
-			       "%s:  %s - no map found for bus_addr 0x%lx\n",
+			       "%s:  %s - no map found for bus_addr 0x%llx\n",
 			       __func__, pci_name(pdev), bus_addr);
 		} else if (--map->refcnt == 0) {
 			for (i = 0; i < map->ate_count; i++) {
@@ -642,7 +642,7 @@ dma_map_done:
  * in the address.
  */
 static u64
-tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags)
+tioce_dma(struct pci_dev *pdev, unsigned long  paddr, size_t byte_count, int dma_flags)
 {
 	return tioce_do_dma_map(pdev, paddr, byte_count, 0, dma_flags);
 }
@@ -657,7 +657,7 @@ tioce_dma(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags)
  * in the address.
  */
 static u64
-tioce_dma_consistent(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags)
+tioce_dma_consistent(struct pci_dev *pdev, unsigned long  paddr, size_t byte_count, int dma_flags)
 {
 	return tioce_do_dma_map(pdev, paddr, byte_count, 1, dma_flags);
 }
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c
index 183ac3fe44f..9c7e2343c39 100644
--- a/drivers/char/agp/hp-agp.c
+++ b/drivers/char/agp/hp-agp.c
@@ -518,8 +518,9 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 	if (hp_zx1_setup(sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa))
 		return AE_OK;
 
-	printk(KERN_INFO PFX "Detected HP ZX1 %s AGP chipset (ioc=%lx, lba=%lx)\n",
-		(char *) context, sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa);
+	printk(KERN_INFO PFX "Detected HP ZX1 %s AGP chipset "
+		"(ioc=%llx, lba=%llx)\n", (char *)context,
+		sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa);
 
 	hp_zx1_gart_found = 1;
 	return AE_CTRL_TERMINATE; /* we only support one bridge; quit looking */
diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c
index 58e9f8e457f..51e0e2d8fac 100644
--- a/drivers/firmware/pcdp.c
+++ b/drivers/firmware/pcdp.c
@@ -28,10 +28,10 @@ setup_serial_console(struct pcdp_uart *uart)
 	char parity;
 
 	mmio = (uart->addr.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY);
-	p += sprintf(p, "uart8250,%s,0x%lx",
+	p += sprintf(p, "uart8250,%s,0x%llx",
 		mmio ? "mmio" : "io", uart->addr.address);
 	if (uart->baud) {
-		p += sprintf(p, ",%lu", uart->baud);
+		p += sprintf(p, ",%llu", uart->baud);
 		if (uart->bits) {
 			switch (uart->parity) {
 			    case 0x2: parity = 'e'; break;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bb66feb164b..ce4581fbc08 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -101,7 +101,7 @@ typedef struct {
 	u64 attribute;
 } efi_memory_desc_t;
 
-typedef int (*efi_freemem_callback_t) (unsigned long start, unsigned long end, void *arg);
+typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg);
 
 /*
  * Types and defines for Time Services
-- 
cgit v1.2.3-70-g09d2


From 14fa56917d73d823538151b0429d98211fa439c1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.ml.walleij@gmail.com>
Date: Thu, 21 May 2009 23:17:06 +0200
Subject: mfd: add U300 AB3100 core support

This adds a core driver for the AB3100 mixed-signal circuit
found in the ST-Ericsson U300 series platforms. This driver
is a singleton proxy for all accesses to the AB3100
sub-drivers which will be merged on top of this one, RTC,
regulators, battery and system power control, vibrator,
LEDs, and an ALSA codec.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Reviewed-by: Mike Rapoport <mike@compulab.co.il>
Reviewed-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig        |  14 +
 drivers/mfd/Makefile       |   3 +-
 drivers/mfd/ab3100-core.c  | 991 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/ab3100.h | 103 +++++
 4 files changed, 1110 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mfd/ab3100-core.c
 create mode 100644 include/linux/mfd/ab3100.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ee3927ab11e..61f0346650b 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -241,6 +241,20 @@ config PCF50633_GPIO
 	 Say yes here if you want to include support GPIO for pins on
 	 the PCF50633 chip.
 
+config AB3100_CORE
+	tristate "ST-Ericsson AB3100 Mixed Signal Circuit core functions"
+	depends on I2C
+	default y if ARCH_U300
+	help
+	  Select this to enable the AB3100 Mixed Signal IC core
+	  functionality. This connects to a AB3100 on the I2C bus
+	  and expose a number of symbols needed for dependent devices
+	  to read and write registers and subscribe to events from
+	  this multi-functional IC. This is needed to use other features
+	  of the AB3100 such as battery-backed RTC, charging control,
+	  LEDs, vibrator, system power and temperature, power management
+	  and ALSA sound.
+
 endmenu
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 3afb5192e4d..f5f337143e6 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -40,4 +40,5 @@ obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
 
 obj-$(CONFIG_MFD_PCF50633)	+= pcf50633-core.o
 obj-$(CONFIG_PCF50633_ADC)	+= pcf50633-adc.o
-obj-$(CONFIG_PCF50633_GPIO)	+= pcf50633-gpio.o
\ No newline at end of file
+obj-$(CONFIG_PCF50633_GPIO)	+= pcf50633-gpio.o
+obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
new file mode 100644
index 00000000000..13e7d7bfe85
--- /dev/null
+++ b/drivers/mfd/ab3100-core.c
@@ -0,0 +1,991 @@
+/*
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Low-level core for exclusive access to the AB3100 IC on the I2C bus
+ * and some basic chip-configuration.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/mfd/ab3100.h>
+
+/* These are the only registers inside AB3100 used in this main file */
+
+/* Interrupt event registers */
+#define AB3100_EVENTA1		0x21
+#define AB3100_EVENTA2		0x22
+#define AB3100_EVENTA3		0x23
+
+/* AB3100 DAC converter registers */
+#define AB3100_DIS		0x00
+#define AB3100_D0C		0x01
+#define AB3100_D1C		0x02
+#define AB3100_D2C		0x03
+#define AB3100_D3C		0x04
+
+/* Chip ID register */
+#define AB3100_CID		0x20
+
+/* AB3100 interrupt registers */
+#define AB3100_IMRA1		0x24
+#define AB3100_IMRA2		0x25
+#define AB3100_IMRA3		0x26
+#define AB3100_IMRB1		0x2B
+#define AB3100_IMRB2		0x2C
+#define AB3100_IMRB3		0x2D
+
+/* System Power Monitoring and control registers */
+#define AB3100_MCA		0x2E
+#define AB3100_MCB		0x2F
+
+/* SIM power up */
+#define AB3100_SUP		0x50
+
+/*
+ * I2C communication
+ *
+ * The AB3100 is usually assigned address 0x48 (7-bit)
+ * The chip is defined in the platform i2c_board_data section.
+ */
+static unsigned short normal_i2c[] = { 0x48, I2C_CLIENT_END };
+I2C_CLIENT_INSMOD_1(ab3100);
+
+u8 ab3100_get_chip_type(struct ab3100 *ab3100)
+{
+	u8 chip = ABUNKNOWN;
+
+	switch (ab3100->chip_id & 0xf0) {
+	case  0xa0:
+		chip = AB3000;
+		break;
+	case  0xc0:
+		chip = AB3100;
+		break;
+	}
+	return chip;
+}
+EXPORT_SYMBOL(ab3100_get_chip_type);
+
+int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval)
+{
+	u8 regandval[2] = {reg, regval};
+	int err;
+
+	err = mutex_lock_interruptible(&ab3100->access_mutex);
+	if (err)
+		return err;
+
+	/*
+	 * A two-byte write message with the first byte containing the register
+	 * number and the second byte containing the value to be written
+	 * effectively sets a register in the AB3100.
+	 */
+	err = i2c_master_send(ab3100->i2c_client, regandval, 2);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (write register): %d\n",
+			err);
+	} else if (err != 2) {
+		dev_err(ab3100->dev,
+			"write error (write register) "
+			"%d bytes transferred (expected 2)\n",
+			err);
+		err = -EIO;
+	} else {
+		/* All is well */
+		err = 0;
+	}
+	mutex_unlock(&ab3100->access_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ab3100_set_register);
+
+/*
+ * The test registers exist at an I2C bus address up one
+ * from the ordinary base. They are not supposed to be used
+ * in production code, but sometimes you have to do that
+ * anyway. It's currently only used from this file so declare
+ * it static and do not export.
+ */
+static int ab3100_set_test_register(struct ab3100 *ab3100,
+				    u8 reg, u8 regval)
+{
+	u8 regandval[2] = {reg, regval};
+	int err;
+
+	err = mutex_lock_interruptible(&ab3100->access_mutex);
+	if (err)
+		return err;
+
+	err = i2c_master_send(ab3100->testreg_client, regandval, 2);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (write test register): %d\n",
+			err);
+	} else if (err != 2) {
+		dev_err(ab3100->dev,
+			"write error (write test register) "
+			"%d bytes transferred (expected 2)\n",
+			err);
+		err = -EIO;
+	} else {
+		/* All is well */
+		err = 0;
+	}
+	mutex_unlock(&ab3100->access_mutex);
+
+	return err;
+}
+
+int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&ab3100->access_mutex);
+	if (err)
+		return err;
+
+	/*
+	 * AB3100 require an I2C "stop" command between each message, else
+	 * it will not work. The only way of achieveing this with the
+	 * message transport layer is to send the read and write messages
+	 * separately.
+	 */
+	err = i2c_master_send(ab3100->i2c_client, &reg, 1);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (send register address): %d\n",
+			err);
+		goto get_reg_out_unlock;
+	} else if (err != 1) {
+		dev_err(ab3100->dev,
+			"write error (send register address) "
+			"%d bytes transferred (expected 1)\n",
+			err);
+		err = -EIO;
+		goto get_reg_out_unlock;
+	} else {
+		/* All is well */
+		err = 0;
+	}
+
+	err = i2c_master_recv(ab3100->i2c_client, regval, 1);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (read register): %d\n",
+			err);
+		goto get_reg_out_unlock;
+	} else if (err != 1) {
+		dev_err(ab3100->dev,
+			"write error (read register) "
+			"%d bytes transferred (expected 1)\n",
+			err);
+		err = -EIO;
+		goto get_reg_out_unlock;
+	} else {
+		/* All is well */
+		err = 0;
+	}
+
+ get_reg_out_unlock:
+	mutex_unlock(&ab3100->access_mutex);
+	return err;
+}
+EXPORT_SYMBOL(ab3100_get_register);
+
+int ab3100_get_register_page(struct ab3100 *ab3100,
+			     u8 first_reg, u8 *regvals, u8 numregs)
+{
+	int err;
+
+	if (ab3100->chip_id == 0xa0 ||
+	    ab3100->chip_id == 0xa1)
+		/* These don't support paged reads */
+		return -EIO;
+
+	err = mutex_lock_interruptible(&ab3100->access_mutex);
+	if (err)
+		return err;
+
+	/*
+	 * Paged read also require an I2C "stop" command.
+	 */
+	err = i2c_master_send(ab3100->i2c_client, &first_reg, 1);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (send first register address): %d\n",
+			err);
+		goto get_reg_page_out_unlock;
+	} else if (err != 1) {
+		dev_err(ab3100->dev,
+			"write error (send first register address) "
+			"%d bytes transferred (expected 1)\n",
+			err);
+		err = -EIO;
+		goto get_reg_page_out_unlock;
+	}
+
+	err = i2c_master_recv(ab3100->i2c_client, regvals, numregs);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (read register page): %d\n",
+			err);
+		goto get_reg_page_out_unlock;
+	} else if (err != numregs) {
+		dev_err(ab3100->dev,
+			"write error (read register page) "
+			"%d bytes transferred (expected %d)\n",
+			err, numregs);
+		err = -EIO;
+		goto get_reg_page_out_unlock;
+	}
+
+	/* All is well */
+	err = 0;
+
+ get_reg_page_out_unlock:
+	mutex_unlock(&ab3100->access_mutex);
+	return err;
+}
+EXPORT_SYMBOL(ab3100_get_register_page);
+
+int ab3100_mask_and_set_register(struct ab3100 *ab3100,
+				 u8 reg, u8 andmask, u8 ormask)
+{
+	u8 regandval[2] = {reg, 0};
+	int err;
+
+	err = mutex_lock_interruptible(&ab3100->access_mutex);
+	if (err)
+		return err;
+
+	/* First read out the target register */
+	err = i2c_master_send(ab3100->i2c_client, &reg, 1);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (maskset send address): %d\n",
+			err);
+		goto get_maskset_unlock;
+	} else if (err != 1) {
+		dev_err(ab3100->dev,
+			"write error (maskset send address) "
+			"%d bytes transferred (expected 1)\n",
+			err);
+		err = -EIO;
+		goto get_maskset_unlock;
+	}
+
+	err = i2c_master_recv(ab3100->i2c_client, &regandval[1], 1);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (maskset read register): %d\n",
+			err);
+		goto get_maskset_unlock;
+	} else if (err != 1) {
+		dev_err(ab3100->dev,
+			"write error (maskset read register) "
+			"%d bytes transferred (expected 1)\n",
+			err);
+		err = -EIO;
+		goto get_maskset_unlock;
+	}
+
+	/* Modify the register */
+	regandval[1] &= andmask;
+	regandval[1] |= ormask;
+
+	/* Write the register */
+	err = i2c_master_send(ab3100->i2c_client, regandval, 2);
+	if (err < 0) {
+		dev_err(ab3100->dev,
+			"write error (write register): %d\n",
+			err);
+		goto get_maskset_unlock;
+	} else if (err != 2) {
+		dev_err(ab3100->dev,
+			"write error (write register) "
+			"%d bytes transferred (expected 2)\n",
+			err);
+		err = -EIO;
+		goto get_maskset_unlock;
+	}
+
+	/* All is well */
+	err = 0;
+
+ get_maskset_unlock:
+	mutex_unlock(&ab3100->access_mutex);
+	return err;
+}
+EXPORT_SYMBOL(ab3100_mask_and_set_register);
+
+/*
+ * Register a simple callback for handling any AB3100 events.
+ */
+int ab3100_event_register(struct ab3100 *ab3100,
+			  struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&ab3100->event_subscribers,
+					       nb);
+}
+EXPORT_SYMBOL(ab3100_event_register);
+
+/*
+ * Remove a previously registered callback.
+ */
+int ab3100_event_unregister(struct ab3100 *ab3100,
+			    struct notifier_block *nb)
+{
+  return blocking_notifier_chain_unregister(&ab3100->event_subscribers,
+					    nb);
+}
+EXPORT_SYMBOL(ab3100_event_unregister);
+
+
+int ab3100_event_registers_startup_state_get(struct ab3100 *ab3100,
+					     u32 *fatevent)
+{
+	if (!ab3100->startup_events_read)
+		return -EAGAIN; /* Try again later */
+	*fatevent = ab3100->startup_events;
+	return 0;
+}
+EXPORT_SYMBOL(ab3100_event_registers_startup_state_get);
+
+/* Interrupt handling worker */
+static void ab3100_work(struct work_struct *work)
+{
+	struct ab3100 *ab3100 = container_of(work, struct ab3100, work);
+	u8 event_regs[3];
+	u32 fatevent;
+	int err;
+
+	err = ab3100_get_register_page(ab3100, AB3100_EVENTA1,
+				       event_regs, 3);
+	if (err)
+		goto err_event_wq;
+
+	fatevent = (event_regs[0] << 16) |
+		(event_regs[1] << 8) |
+		event_regs[2];
+
+	if (!ab3100->startup_events_read) {
+		ab3100->startup_events = fatevent;
+		ab3100->startup_events_read = true;
+	}
+	/*
+	 * The notified parties will have to mask out the events
+	 * they're interested in and react to them. They will be
+	 * notified on all events, then they use the fatevent value
+	 * to determine if they're interested.
+	 */
+	blocking_notifier_call_chain(&ab3100->event_subscribers,
+				     fatevent, NULL);
+
+	dev_dbg(ab3100->dev,
+		"IRQ Event: 0x%08x\n", fatevent);
+
+	/* By now the IRQ should be acked and deasserted so enable it again */
+	enable_irq(ab3100->i2c_client->irq);
+	return;
+
+ err_event_wq:
+	dev_dbg(ab3100->dev,
+		"error in event workqueue\n");
+	/* Enable the IRQ anyway, what choice do we have? */
+	enable_irq(ab3100->i2c_client->irq);
+	return;
+}
+
+static irqreturn_t ab3100_irq_handler(int irq, void *data)
+{
+	struct ab3100 *ab3100 = data;
+	/*
+	 * Disable the IRQ and dispatch a worker to handle the
+	 * event. Since the chip resides on I2C this is slow
+	 * stuff and we will re-enable the interrupts once th
+	 * worker has finished.
+	 */
+	disable_irq(ab3100->i2c_client->irq);
+	schedule_work(&ab3100->work);
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Some debugfs entries only exposed if we're using debug
+ */
+static int ab3100_registers_print(struct seq_file *s, void *p)
+{
+	struct ab3100 *ab3100 = s->private;
+	u8 value;
+	u8 reg;
+
+	seq_printf(s, "AB3100 registers:\n");
+
+	for (reg = 0; reg < 0xff; reg++) {
+		ab3100_get_register(ab3100, reg, &value);
+		seq_printf(s, "[0x%x]:  0x%x\n", reg, value);
+	}
+	return 0;
+}
+
+static int ab3100_registers_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ab3100_registers_print, inode->i_private);
+}
+
+static const struct file_operations ab3100_registers_fops = {
+	.open = ab3100_registers_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+struct ab3100_get_set_reg_priv {
+	struct ab3100 *ab3100;
+	bool mode;
+};
+
+static int ab3100_get_set_reg_open_file(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static int ab3100_get_set_reg(struct file *file,
+			      const char __user *user_buf,
+			      size_t count, loff_t *ppos)
+{
+	struct ab3100_get_set_reg_priv *priv = file->private_data;
+	struct ab3100 *ab3100 = priv->ab3100;
+	char buf[32];
+	int buf_size;
+	int regp;
+	unsigned long user_reg;
+	int err;
+	int i = 0;
+
+	/* Get userspace string and assure termination */
+	buf_size = min(count, (sizeof(buf)-1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+	buf[buf_size] = 0;
+
+	/*
+	 * The idea is here to parse a string which is either
+	 * "0xnn" for reading a register, or "0xaa 0xbb" for
+	 * writing 0xbb to the register 0xaa. First move past
+	 * whitespace and then begin to parse the register.
+	 */
+	while ((i < buf_size) && (buf[i] == ' '))
+		i++;
+	regp = i;
+
+	/*
+	 * Advance pointer to end of string then terminate
+	 * the register string. This is needed to satisfy
+	 * the strict_strtoul() function.
+	 */
+	while ((i < buf_size) && (buf[i] != ' '))
+		i++;
+	buf[i] = '\0';
+
+	err = strict_strtoul(&buf[regp], 16, &user_reg);
+	if (err)
+		return err;
+	if (user_reg > 0xff)
+		return -EINVAL;
+
+	/* Either we read or we write a register here */
+	if (!priv->mode) {
+		/* Reading */
+		u8 reg = (u8) user_reg;
+		u8 regvalue;
+
+		ab3100_get_register(ab3100, reg, &regvalue);
+
+		dev_info(ab3100->dev,
+			 "debug read AB3100 reg[0x%02x]: 0x%02x\n",
+			 reg, regvalue);
+	} else {
+		int valp;
+		unsigned long user_value;
+		u8 reg = (u8) user_reg;
+		u8 value;
+		u8 regvalue;
+
+		/*
+		 * Writing, we need some value to write to
+		 * the register so keep parsing the string
+		 * from userspace.
+		 */
+		i++;
+		while ((i < buf_size) && (buf[i] == ' '))
+			i++;
+		valp = i;
+		while ((i < buf_size) && (buf[i] != ' '))
+			i++;
+		buf[i] = '\0';
+
+		err = strict_strtoul(&buf[valp], 16, &user_value);
+		if (err)
+			return err;
+		if (user_reg > 0xff)
+			return -EINVAL;
+
+		value = (u8) user_value;
+		ab3100_set_register(ab3100, reg, value);
+		ab3100_get_register(ab3100, reg, &regvalue);
+
+		dev_info(ab3100->dev,
+			 "debug write reg[0x%02x] with 0x%02x, "
+			 "after readback: 0x%02x\n",
+			 reg, value, regvalue);
+	}
+	return buf_size;
+}
+
+static const struct file_operations ab3100_get_set_reg_fops = {
+	.open = ab3100_get_set_reg_open_file,
+	.write = ab3100_get_set_reg,
+};
+
+static struct dentry *ab3100_dir;
+static struct dentry *ab3100_reg_file;
+static struct ab3100_get_set_reg_priv ab3100_get_priv;
+static struct dentry *ab3100_get_reg_file;
+static struct ab3100_get_set_reg_priv ab3100_set_priv;
+static struct dentry *ab3100_set_reg_file;
+
+static void ab3100_setup_debugfs(struct ab3100 *ab3100)
+{
+	int err;
+
+	ab3100_dir = debugfs_create_dir("ab3100", NULL);
+	if (!ab3100_dir)
+		goto exit_no_debugfs;
+
+	ab3100_reg_file = debugfs_create_file("registers",
+				S_IRUGO, ab3100_dir, ab3100,
+				&ab3100_registers_fops);
+	if (!ab3100_reg_file) {
+		err = -ENOMEM;
+		goto exit_destroy_dir;
+	}
+
+	ab3100_get_priv.ab3100 = ab3100;
+	ab3100_get_priv.mode = false;
+	ab3100_get_reg_file = debugfs_create_file("get_reg",
+				S_IWUGO, ab3100_dir, &ab3100_get_priv,
+				&ab3100_get_set_reg_fops);
+	if (!ab3100_get_reg_file) {
+		err = -ENOMEM;
+		goto exit_destroy_reg;
+	}
+
+	ab3100_set_priv.ab3100 = ab3100;
+	ab3100_set_priv.mode = true;
+	ab3100_set_reg_file = debugfs_create_file("set_reg",
+				S_IWUGO, ab3100_dir, &ab3100_set_priv,
+				&ab3100_get_set_reg_fops);
+	if (!ab3100_set_reg_file) {
+		err = -ENOMEM;
+		goto exit_destroy_get_reg;
+	}
+	return;
+
+ exit_destroy_get_reg:
+	debugfs_remove(ab3100_get_reg_file);
+ exit_destroy_reg:
+	debugfs_remove(ab3100_reg_file);
+ exit_destroy_dir:
+	debugfs_remove(ab3100_dir);
+ exit_no_debugfs:
+	return;
+}
+static inline void ab3100_remove_debugfs(void)
+{
+	debugfs_remove(ab3100_set_reg_file);
+	debugfs_remove(ab3100_get_reg_file);
+	debugfs_remove(ab3100_reg_file);
+	debugfs_remove(ab3100_dir);
+}
+#else
+static inline void ab3100_setup_debugfs(struct ab3100 *ab3100)
+{
+}
+static inline void ab3100_remove_debugfs(void)
+{
+}
+#endif
+
+/*
+ * Basic set-up, datastructure creation/destruction and I2C interface.
+ * This sets up a default config in the AB3100 chip so that it
+ * will work as expected.
+ */
+
+struct ab3100_init_setting {
+	u8 abreg;
+	u8 setting;
+};
+
+static const struct ab3100_init_setting __initdata
+ab3100_init_settings[] = {
+	{
+		.abreg = AB3100_MCA,
+		.setting = 0x01
+	}, {
+		.abreg = AB3100_MCB,
+		.setting = 0x30
+	}, {
+		.abreg = AB3100_IMRA1,
+		.setting = 0x00
+	}, {
+		.abreg = AB3100_IMRA2,
+		.setting = 0xFF
+	}, {
+		.abreg = AB3100_IMRA3,
+		.setting = 0x01
+	}, {
+		.abreg = AB3100_IMRB1,
+		.setting = 0xFF
+	}, {
+		.abreg = AB3100_IMRB2,
+		.setting = 0xFF
+	}, {
+		.abreg = AB3100_IMRB3,
+		.setting = 0xFF
+	}, {
+		.abreg = AB3100_SUP,
+		.setting = 0x00
+	}, {
+		.abreg = AB3100_DIS,
+		.setting = 0xF0
+	}, {
+		.abreg = AB3100_D0C,
+		.setting = 0x00
+	}, {
+		.abreg = AB3100_D1C,
+		.setting = 0x00
+	}, {
+		.abreg = AB3100_D2C,
+		.setting = 0x00
+	}, {
+		.abreg = AB3100_D3C,
+		.setting = 0x00
+	},
+};
+
+static int __init ab3100_setup(struct ab3100 *ab3100)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ab3100_init_settings); i++) {
+		err = ab3100_set_register(ab3100,
+					  ab3100_init_settings[i].abreg,
+					  ab3100_init_settings[i].setting);
+		if (err)
+			goto exit_no_setup;
+	}
+
+	/*
+	 * Special trick to make the AB3100 use the 32kHz clock (RTC)
+	 * bit 3 in test registe 0x02 is a special, undocumented test
+	 * register bit that only exist in AB3100 P1E
+	 */
+	if (ab3100->chip_id == 0xc4) {
+		dev_warn(ab3100->dev,
+			 "AB3100 P1E variant detected, "
+			 "forcing chip to 32KHz\n");
+		err = ab3100_set_test_register(ab3100, 0x02, 0x08);
+	}
+
+ exit_no_setup:
+	return err;
+}
+
+/*
+ * Here we define all the platform devices that appear
+ * as children of the AB3100. These are regular platform
+ * devices with the IORESOURCE_IO .start and .end set
+ * to correspond to the internal AB3100 register range
+ * mapping to the corresponding subdevice.
+ */
+
+#define AB3100_DEVICE(devname, devid)				\
+static struct platform_device ab3100_##devname##_device = {	\
+	.name		= devid,				\
+	.id		= -1,					\
+}
+
+/*
+ * This lists all the subdevices and corresponding register
+ * ranges.
+ */
+AB3100_DEVICE(dac, "ab3100-dac");
+AB3100_DEVICE(leds, "ab3100-leds");
+AB3100_DEVICE(power, "ab3100-power");
+AB3100_DEVICE(regulators, "ab3100-regulators");
+AB3100_DEVICE(sim, "ab3100-sim");
+AB3100_DEVICE(uart, "ab3100-uart");
+AB3100_DEVICE(rtc, "ab3100-rtc");
+AB3100_DEVICE(charger, "ab3100-charger");
+AB3100_DEVICE(boost, "ab3100-boost");
+AB3100_DEVICE(adc, "ab3100-adc");
+AB3100_DEVICE(fuelgauge, "ab3100-fuelgauge");
+AB3100_DEVICE(vibrator, "ab3100-vibrator");
+AB3100_DEVICE(otp, "ab3100-otp");
+AB3100_DEVICE(codec, "ab3100-codec");
+
+static struct platform_device *
+ab3100_platform_devs[] = {
+	&ab3100_dac_device,
+	&ab3100_leds_device,
+	&ab3100_power_device,
+	&ab3100_regulators_device,
+	&ab3100_sim_device,
+	&ab3100_uart_device,
+	&ab3100_rtc_device,
+	&ab3100_charger_device,
+	&ab3100_boost_device,
+	&ab3100_adc_device,
+	&ab3100_fuelgauge_device,
+	&ab3100_vibrator_device,
+	&ab3100_otp_device,
+	&ab3100_codec_device,
+};
+
+struct ab_family_id {
+	u8	id;
+	char	*name;
+};
+
+static const struct ab_family_id ids[] __initdata = {
+	/* AB3100 */
+	{
+		.id = 0xc0,
+		.name = "P1A"
+	}, {
+		.id = 0xc1,
+		.name = "P1B"
+	}, {
+		.id = 0xc2,
+		.name = "P1C"
+	}, {
+		.id = 0xc3,
+		.name = "P1D"
+	}, {
+		.id = 0xc4,
+		.name = "P1E"
+	}, {
+		.id = 0xc5,
+		.name = "P1F/R1A"
+	}, {
+		.id = 0xc6,
+		.name = "P1G/R1A"
+	}, {
+		.id = 0xc7,
+		.name = "P2A/R2A"
+	}, {
+		.id = 0xc8,
+		.name = "P2B/R2B"
+	},
+	/* AB3000 variants, not supported */
+	{
+		.id = 0xa0
+	}, {
+		.id = 0xa1
+	}, {
+		.id = 0xa2
+	}, {
+		.id = 0xa3
+	}, {
+		.id = 0xa4
+	}, {
+		.id = 0xa5
+	}, {
+		.id = 0xa6
+	}, {
+		.id = 0xa7
+	},
+	/* Terminator */
+	{
+		.id = 0x00,
+	},
+};
+
+static int __init ab3100_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct ab3100 *ab3100;
+	int err;
+	int i;
+
+	ab3100 = kzalloc(sizeof(struct ab3100), GFP_KERNEL);
+	if (!ab3100) {
+		dev_err(&client->dev, "could not allocate AB3100 device\n");
+		return -ENOMEM;
+	}
+
+	/* Initialize data structure */
+	mutex_init(&ab3100->access_mutex);
+	BLOCKING_INIT_NOTIFIER_HEAD(&ab3100->event_subscribers);
+
+	ab3100->i2c_client = client;
+	ab3100->dev = &ab3100->i2c_client->dev;
+
+	i2c_set_clientdata(client, ab3100);
+
+	/* Read chip ID register */
+	err = ab3100_get_register(ab3100, AB3100_CID,
+				  &ab3100->chip_id);
+	if (err) {
+		dev_err(&client->dev,
+			"could not communicate with the AB3100 analog "
+			"baseband chip\n");
+		goto exit_no_detect;
+	}
+
+	for (i = 0; ids[i].id != 0x0; i++) {
+		if (ids[i].id == ab3100->chip_id) {
+			if (ids[i].name != NULL) {
+				snprintf(&ab3100->chip_name[0],
+					 sizeof(ab3100->chip_name) - 1,
+					 "AB3100 %s",
+					 ids[i].name);
+				break;
+			} else {
+				dev_err(&client->dev,
+					"AB3000 is not supported\n");
+				goto exit_no_detect;
+			}
+		}
+	}
+
+	if (ids[i].id == 0x0) {
+		dev_err(&client->dev, "unknown analog baseband chip id: 0x%x\n",
+			ab3100->chip_id);
+		dev_err(&client->dev, "accepting it anyway. Please update "
+			"the driver.\n");
+		goto exit_no_detect;
+	}
+
+	dev_info(&client->dev, "Detected chip: %s\n",
+		 &ab3100->chip_name[0]);
+
+	/* Attach a second dummy i2c_client to the test register address */
+	ab3100->testreg_client = i2c_new_dummy(client->adapter,
+						     client->addr + 1);
+	if (!ab3100->testreg_client) {
+		err = -ENOMEM;
+		goto exit_no_testreg_client;
+	}
+
+	strlcpy(ab3100->testreg_client->name, id->name,
+		sizeof(ab3100->testreg_client->name));
+
+	err = ab3100_setup(ab3100);
+	if (err)
+		goto exit_no_setup;
+
+	INIT_WORK(&ab3100->work, ab3100_work);
+
+	/* This real unpredictable IRQ is of course sampled for entropy */
+	err = request_irq(client->irq, ab3100_irq_handler,
+			  IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
+			  "AB3100 IRQ", ab3100);
+	if (err)
+		goto exit_no_irq;
+
+	/* Set parent and a pointer back to the container in device data */
+	for (i = 0; i < ARRAY_SIZE(ab3100_platform_devs); i++) {
+		ab3100_platform_devs[i]->dev.parent =
+			&client->dev;
+		platform_set_drvdata(ab3100_platform_devs[i], ab3100);
+	}
+
+	/* Register the platform devices */
+	platform_add_devices(ab3100_platform_devs,
+			     ARRAY_SIZE(ab3100_platform_devs));
+
+	ab3100_setup_debugfs(ab3100);
+
+	return 0;
+
+ exit_no_irq:
+ exit_no_setup:
+	i2c_unregister_device(ab3100->testreg_client);
+ exit_no_testreg_client:
+ exit_no_detect:
+	kfree(ab3100);
+	return err;
+}
+
+static int __exit ab3100_remove(struct i2c_client *client)
+{
+	struct ab3100 *ab3100 = i2c_get_clientdata(client);
+	int i;
+
+	/* Unregister subdevices */
+	for (i = 0; i < ARRAY_SIZE(ab3100_platform_devs); i++)
+		platform_device_unregister(ab3100_platform_devs[i]);
+
+	ab3100_remove_debugfs();
+	i2c_unregister_device(ab3100->testreg_client);
+
+	/*
+	 * At this point, all subscribers should have unregistered
+	 * their notifiers so deactivate IRQ
+	 */
+	free_irq(client->irq, ab3100);
+	kfree(ab3100);
+	return 0;
+}
+
+static const struct i2c_device_id ab3100_id[] = {
+	{ "ab3100", ab3100 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ab3100_id);
+
+static struct i2c_driver ab3100_driver = {
+	.driver = {
+		.name	= "ab3100",
+		.owner	= THIS_MODULE,
+	},
+	.id_table	= ab3100_id,
+	.probe		= ab3100_probe,
+	.remove		= __exit_p(ab3100_remove),
+};
+
+static int __init ab3100_i2c_init(void)
+{
+	return i2c_add_driver(&ab3100_driver);
+}
+
+static void __exit ab3100_i2c_exit(void)
+{
+	i2c_del_driver(&ab3100_driver);
+}
+
+subsys_initcall(ab3100_i2c_init);
+module_exit(ab3100_i2c_exit);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
+MODULE_DESCRIPTION("AB3100 core driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
new file mode 100644
index 00000000000..7a3f316e384
--- /dev/null
+++ b/include/linux/mfd/ab3100.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * AB3100 core access functions
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#include <linux/device.h>
+
+#ifndef MFD_AB3100_H
+#define MFD_AB3100_H
+
+#define ABUNKNOWN	0
+#define	AB3000		1
+#define	AB3100		2
+
+/*
+ * AB3100, EVENTA1, A2 and A3 event register flags
+ * these are catenated into a single 32-bit flag in the code
+ * for event notification broadcasts.
+ */
+#define AB3100_EVENTA1_ONSWA				(0x01<<16)
+#define AB3100_EVENTA1_ONSWB				(0x02<<16)
+#define AB3100_EVENTA1_ONSWC				(0x04<<16)
+#define AB3100_EVENTA1_DCIO				(0x08<<16)
+#define AB3100_EVENTA1_OVER_TEMP			(0x10<<16)
+#define AB3100_EVENTA1_SIM_OFF				(0x20<<16)
+#define AB3100_EVENTA1_VBUS				(0x40<<16)
+#define AB3100_EVENTA1_VSET_USB				(0x80<<16)
+
+#define AB3100_EVENTA2_READY_TX				(0x01<<8)
+#define AB3100_EVENTA2_READY_RX				(0x02<<8)
+#define AB3100_EVENTA2_OVERRUN_ERROR			(0x04<<8)
+#define AB3100_EVENTA2_FRAMING_ERROR			(0x08<<8)
+#define AB3100_EVENTA2_CHARG_OVERCURRENT		(0x10<<8)
+#define AB3100_EVENTA2_MIDR				(0x20<<8)
+#define AB3100_EVENTA2_BATTERY_REM			(0x40<<8)
+#define AB3100_EVENTA2_ALARM				(0x80<<8)
+
+#define AB3100_EVENTA3_ADC_TRIG5			(0x01)
+#define AB3100_EVENTA3_ADC_TRIG4			(0x02)
+#define AB3100_EVENTA3_ADC_TRIG3			(0x04)
+#define AB3100_EVENTA3_ADC_TRIG2			(0x08)
+#define AB3100_EVENTA3_ADC_TRIGVBAT			(0x10)
+#define AB3100_EVENTA3_ADC_TRIGVTX			(0x20)
+#define AB3100_EVENTA3_ADC_TRIG1			(0x40)
+#define AB3100_EVENTA3_ADC_TRIG0			(0x80)
+
+/* AB3100, STR register flags */
+#define AB3100_STR_ONSWA				(0x01)
+#define AB3100_STR_ONSWB				(0x02)
+#define AB3100_STR_ONSWC				(0x04)
+#define AB3100_STR_DCIO					(0x08)
+#define AB3100_STR_BOOT_MODE				(0x10)
+#define AB3100_STR_SIM_OFF				(0x20)
+#define AB3100_STR_BATT_REMOVAL				(0x40)
+#define AB3100_STR_VBUS					(0x80)
+
+/**
+ * struct ab3100
+ * @access_mutex: lock out concurrent accesses to the AB3100 registers
+ * @dev: pointer to the containing device
+ * @i2c_client: I2C client for this chip
+ * @testreg_client: secondary client for test registers
+ * @chip_name: name of this chip variant
+ * @chip_id: 8 bit chip ID for this chip variant
+ * @work: an event handling worker
+ * @event_subscribers: event subscribers are listed here
+ * @startup_events: a copy of the first reading of the event registers
+ * @startup_events_read: whether the first events have been read
+ *
+ * This struct is PRIVATE and devices using it should NOT
+ * access ANY fields. It is used as a token for calling the
+ * AB3100 functions.
+ */
+struct ab3100 {
+	struct mutex access_mutex;
+	struct device *dev;
+	struct i2c_client *i2c_client;
+	struct i2c_client *testreg_client;
+	char chip_name[32];
+	u8 chip_id;
+	struct work_struct work;
+	struct blocking_notifier_head event_subscribers;
+	u32 startup_events;
+	bool startup_events_read;
+};
+
+int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval);
+int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval);
+int ab3100_get_register_page(struct ab3100 *ab3100,
+			     u8 first_reg, u8 *regvals, u8 numregs);
+int ab3100_mask_and_set_register(struct ab3100 *ab3100,
+				 u8 reg, u8 andmask, u8 ormask);
+u8 ab3100_get_chip_type(struct ab3100 *ab3100);
+int ab3100_event_register(struct ab3100 *ab3100,
+			  struct notifier_block *nb);
+int ab3100_event_unregister(struct ab3100 *ab3100,
+			    struct notifier_block *nb);
+int ab3100_event_registers_startup_state_get(struct ab3100 *ab3100,
+					     u32 *fatevent);
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 13a09f93d2bf3a20c748e1d6a30160a00fc58169 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Thu, 28 May 2009 15:43:37 -0300
Subject: mfd: add PCAP driver

The PCAP Asic as present on EZX phones is a multi function device with
voltage regulators, ADC, touch screen controller, RTC, USB transceiver,
leds controller, and audio codec.

It has two SPI ports, typically one is connected to the application
processor and another to the baseband, this driver provides read/write
functions to its registers, irq demultiplexer and ADC
queueing/abstraction.

This chip is used on a lot of Motorola phones, it was manufactured by TI
as a custom product with the name PTWL93017, later this design evolved
into the ATLAS PMIC from Freescale (MC13783).

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig          |   7 +
 drivers/mfd/Makefile         |   2 +
 drivers/mfd/ezx-pcap.c       | 505 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/ezx-pcap.h | 256 ++++++++++++++++++++++
 4 files changed, 770 insertions(+)
 create mode 100644 drivers/mfd/ezx-pcap.c
 create mode 100644 include/linux/mfd/ezx-pcap.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 61f0346650b..287d47b78e7 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -255,6 +255,13 @@ config AB3100_CORE
 	  LEDs, vibrator, system power and temperature, power management
 	  and ALSA sound.
 
+config EZX_PCAP
+	bool "PCAP Support"
+	depends on GENERIC_HARDIRQS && SPI_MASTER
+	help
+	  This enables the PCAP ASIC present on EZX Phones. This is
+	  needed for MMC, TouchScreen, Sound, USB, etc..
+
 endmenu
 
 menu "Multimedia Capabilities Port drivers"
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f5f337143e6..6f8a9a1af20 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
 
+obj-$(CONFIG_EZX_PCAP)		+= ezx-pcap.o
+
 obj-$(CONFIG_MCP)		+= mcp-core.o
 obj-$(CONFIG_MCP_SA11X0)	+= mcp-sa11x0.o
 obj-$(CONFIG_MCP_UCB1200)	+= ucb1x00-core.o
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
new file mode 100644
index 00000000000..671a7efe86a
--- /dev/null
+++ b/drivers/mfd/ezx-pcap.c
@@ -0,0 +1,505 @@
+/*
+ * Driver for Motorola PCAP2 as present in EZX phones
+ *
+ * Copyright (C) 2006 Harald Welte <laforge@openezx.org>
+ * Copyright (C) 2009 Daniel Ribeiro <drwyrm@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/ezx-pcap.h>
+#include <linux/spi/spi.h>
+
+#define PCAP_ADC_MAXQ		8
+struct pcap_adc_request {
+	u8 bank;
+	u8 ch[2];
+	u32 flags;
+	void (*callback)(void *, u16[]);
+	void *data;
+};
+
+struct pcap_adc_sync_request {
+	u16 res[2];
+	struct completion completion;
+};
+
+struct pcap_chip {
+	struct spi_device *spi;
+
+	/* IO */
+	u32 buf;
+	struct mutex io_mutex;
+
+	/* IRQ */
+	unsigned int irq_base;
+	u32 msr;
+	struct work_struct isr_work;
+	struct work_struct msr_work;
+	struct workqueue_struct *workqueue;
+
+	/* ADC */
+	struct pcap_adc_request *adc_queue[PCAP_ADC_MAXQ];
+	u8 adc_head;
+	u8 adc_tail;
+	struct mutex adc_mutex;
+};
+
+/* IO */
+static int ezx_pcap_putget(struct pcap_chip *pcap, u32 *data)
+{
+	struct spi_transfer t;
+	struct spi_message m;
+	int status;
+
+	memset(&t, 0, sizeof t);
+	spi_message_init(&m);
+	t.len = sizeof(u32);
+	spi_message_add_tail(&t, &m);
+
+	pcap->buf = *data;
+	t.tx_buf = (u8 *) &pcap->buf;
+	t.rx_buf = (u8 *) &pcap->buf;
+	status = spi_sync(pcap->spi, &m);
+
+	if (status == 0)
+		*data = pcap->buf;
+
+	return status;
+}
+
+int ezx_pcap_write(struct pcap_chip *pcap, u8 reg_num, u32 value)
+{
+	int ret;
+
+	mutex_lock(&pcap->io_mutex);
+	value &= PCAP_REGISTER_VALUE_MASK;
+	value |= PCAP_REGISTER_WRITE_OP_BIT
+		| (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
+	ret = ezx_pcap_putget(pcap, &value);
+	mutex_unlock(&pcap->io_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ezx_pcap_write);
+
+int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
+{
+	int ret;
+
+	mutex_lock(&pcap->io_mutex);
+	*value = PCAP_REGISTER_READ_OP_BIT
+		| (reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
+
+	ret = ezx_pcap_putget(pcap, value);
+	mutex_unlock(&pcap->io_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ezx_pcap_read);
+
+/* IRQ */
+static inline unsigned int irq2pcap(struct pcap_chip *pcap, int irq)
+{
+	return 1 << (irq - pcap->irq_base);
+}
+
+int pcap_to_irq(struct pcap_chip *pcap, int irq)
+{
+	return pcap->irq_base + irq;
+}
+EXPORT_SYMBOL_GPL(pcap_to_irq);
+
+static void pcap_mask_irq(unsigned int irq)
+{
+	struct pcap_chip *pcap = get_irq_chip_data(irq);
+
+	pcap->msr |= irq2pcap(pcap, irq);
+	queue_work(pcap->workqueue, &pcap->msr_work);
+}
+
+static void pcap_unmask_irq(unsigned int irq)
+{
+	struct pcap_chip *pcap = get_irq_chip_data(irq);
+
+	pcap->msr &= ~irq2pcap(pcap, irq);
+	queue_work(pcap->workqueue, &pcap->msr_work);
+}
+
+static struct irq_chip pcap_irq_chip = {
+	.name	= "pcap",
+	.mask	= pcap_mask_irq,
+	.unmask	= pcap_unmask_irq,
+};
+
+static void pcap_msr_work(struct work_struct *work)
+{
+	struct pcap_chip *pcap = container_of(work, struct pcap_chip, msr_work);
+
+	ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
+}
+
+static void pcap_isr_work(struct work_struct *work)
+{
+	struct pcap_chip *pcap = container_of(work, struct pcap_chip, isr_work);
+	struct pcap_platform_data *pdata = pcap->spi->dev.platform_data;
+	u32 msr, isr, int_sel, service;
+	int irq;
+
+	ezx_pcap_read(pcap, PCAP_REG_MSR, &msr);
+	ezx_pcap_read(pcap, PCAP_REG_ISR, &isr);
+
+	/* We cant service/ack irqs that are assigned to port 2 */
+	if (!(pdata->config & PCAP_SECOND_PORT)) {
+		ezx_pcap_read(pcap, PCAP_REG_INT_SEL, &int_sel);
+		isr &= ~int_sel;
+	}
+	ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
+
+	local_irq_disable();
+	service = isr & ~msr;
+
+	for (irq = pcap->irq_base; service; service >>= 1, irq++) {
+		if (service & 1) {
+			struct irq_desc *desc = irq_to_desc(irq);
+
+			if (WARN(!desc, KERN_WARNING
+					"Invalid PCAP IRQ %d\n", irq))
+				break;
+
+			if (desc->status & IRQ_DISABLED)
+				note_interrupt(irq, desc, IRQ_NONE);
+			else
+				desc->handle_irq(irq, desc);
+		}
+	}
+	local_irq_enable();
+}
+
+static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct pcap_chip *pcap = get_irq_data(irq);
+
+	desc->chip->ack(irq);
+	queue_work(pcap->workqueue, &pcap->isr_work);
+	return;
+}
+
+/* ADC */
+static void pcap_disable_adc(struct pcap_chip *pcap)
+{
+	u32 tmp;
+
+	ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
+	tmp &= ~(PCAP_ADC_ADEN|PCAP_ADC_BATT_I_ADC|PCAP_ADC_BATT_I_POLARITY);
+	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+}
+
+static void pcap_adc_trigger(struct pcap_chip *pcap)
+{
+	u32 tmp;
+	u8 head;
+
+	mutex_lock(&pcap->adc_mutex);
+	head = pcap->adc_head;
+	if (!pcap->adc_queue[head]) {
+		/* queue is empty, save power */
+		pcap_disable_adc(pcap);
+		mutex_unlock(&pcap->adc_mutex);
+		return;
+	}
+	mutex_unlock(&pcap->adc_mutex);
+
+	/* start conversion on requested bank */
+	tmp = pcap->adc_queue[head]->flags | PCAP_ADC_ADEN;
+
+	if (pcap->adc_queue[head]->bank == PCAP_ADC_BANK_1)
+		tmp |= PCAP_ADC_AD_SEL1;
+
+	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+	ezx_pcap_write(pcap, PCAP_REG_ADR, PCAP_ADR_ASC);
+}
+
+static irqreturn_t pcap_adc_irq(int irq, void *_pcap)
+{
+	struct pcap_chip *pcap = _pcap;
+	struct pcap_adc_request *req;
+	u16 res[2];
+	u32 tmp;
+
+	mutex_lock(&pcap->adc_mutex);
+	req = pcap->adc_queue[pcap->adc_head];
+
+	if (WARN(!req, KERN_WARNING "adc irq without pending request\n"))
+		return IRQ_HANDLED;
+
+	/* read requested channels results */
+	ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
+	tmp &= ~(PCAP_ADC_ADA1_MASK | PCAP_ADC_ADA2_MASK);
+	tmp |= (req->ch[0] << PCAP_ADC_ADA1_SHIFT);
+	tmp |= (req->ch[1] << PCAP_ADC_ADA2_SHIFT);
+	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+	ezx_pcap_read(pcap, PCAP_REG_ADR, &tmp);
+	res[0] = (tmp & PCAP_ADR_ADD1_MASK) >> PCAP_ADR_ADD1_SHIFT;
+	res[1] = (tmp & PCAP_ADR_ADD2_MASK) >> PCAP_ADR_ADD2_SHIFT;
+
+	pcap->adc_queue[pcap->adc_head] = NULL;
+	pcap->adc_head = (pcap->adc_head + 1) & (PCAP_ADC_MAXQ - 1);
+	mutex_unlock(&pcap->adc_mutex);
+
+	/* pass the results and release memory */
+	req->callback(req->data, res);
+	kfree(req);
+
+	/* trigger next conversion (if any) on queue */
+	pcap_adc_trigger(pcap);
+
+	return IRQ_HANDLED;
+}
+
+int pcap_adc_async(struct pcap_chip *pcap, u8 bank, u32 flags, u8 ch[],
+						void *callback, void *data)
+{
+	struct pcap_adc_request *req;
+
+	/* This will be freed after we have a result */
+	req = kmalloc(sizeof(struct pcap_adc_request), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	req->bank = bank;
+	req->flags = flags;
+	req->ch[0] = ch[0];
+	req->ch[1] = ch[1];
+	req->callback = callback;
+	req->data = data;
+
+	mutex_lock(&pcap->adc_mutex);
+	if (pcap->adc_queue[pcap->adc_tail]) {
+		mutex_unlock(&pcap->adc_mutex);
+		kfree(req);
+		return -EBUSY;
+	}
+	pcap->adc_queue[pcap->adc_tail] = req;
+	pcap->adc_tail = (pcap->adc_tail + 1) & (PCAP_ADC_MAXQ - 1);
+	mutex_unlock(&pcap->adc_mutex);
+
+	/* start conversion */
+	pcap_adc_trigger(pcap);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pcap_adc_async);
+
+static void pcap_adc_sync_cb(void *param, u16 res[])
+{
+	struct pcap_adc_sync_request *req = param;
+
+	req->res[0] = res[0];
+	req->res[1] = res[1];
+	complete(&req->completion);
+}
+
+int pcap_adc_sync(struct pcap_chip *pcap, u8 bank, u32 flags, u8 ch[],
+								u16 res[])
+{
+	struct pcap_adc_sync_request sync_data;
+	int ret;
+
+	init_completion(&sync_data.completion);
+	ret = pcap_adc_async(pcap, bank, flags, ch, pcap_adc_sync_cb,
+								&sync_data);
+	if (ret)
+		return ret;
+	wait_for_completion(&sync_data.completion);
+	res[0] = sync_data.res[0];
+	res[1] = sync_data.res[1];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pcap_adc_sync);
+
+/* subdevs */
+static int pcap_remove_subdev(struct device *dev, void *unused)
+{
+	platform_device_unregister(to_platform_device(dev));
+	return 0;
+}
+
+static int __devinit pcap_add_subdev(struct pcap_chip *pcap,
+						struct pcap_subdev *subdev)
+{
+	struct platform_device *pdev;
+
+	pdev = platform_device_alloc(subdev->name, subdev->id);
+	pdev->dev.parent = &pcap->spi->dev;
+	pdev->dev.platform_data = subdev->platform_data;
+	platform_set_drvdata(pdev, pcap);
+
+	return platform_device_add(pdev);
+}
+
+static int __devexit ezx_pcap_remove(struct spi_device *spi)
+{
+	struct pcap_chip *pcap = dev_get_drvdata(&spi->dev);
+	struct pcap_platform_data *pdata = spi->dev.platform_data;
+	int i, adc_irq;
+
+	/* remove all registered subdevs */
+	device_for_each_child(&spi->dev, NULL, pcap_remove_subdev);
+
+	/* cleanup ADC */
+	adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ?
+				PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE);
+	free_irq(adc_irq, pcap);
+	mutex_lock(&pcap->adc_mutex);
+	for (i = 0; i < PCAP_ADC_MAXQ; i++)
+		kfree(pcap->adc_queue[i]);
+	mutex_unlock(&pcap->adc_mutex);
+
+	/* cleanup irqchip */
+	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
+		set_irq_chip_and_handler(i, NULL, NULL);
+
+	destroy_workqueue(pcap->workqueue);
+
+	kfree(pcap);
+
+	return 0;
+}
+
+static int __devinit ezx_pcap_probe(struct spi_device *spi)
+{
+	struct pcap_platform_data *pdata = spi->dev.platform_data;
+	struct pcap_chip *pcap;
+	int i, adc_irq;
+	int ret = -ENODEV;
+
+	/* platform data is required */
+	if (!pdata)
+		goto ret;
+
+	pcap = kzalloc(sizeof(*pcap), GFP_KERNEL);
+	if (!pcap) {
+		ret = -ENOMEM;
+		goto ret;
+	}
+
+	mutex_init(&pcap->io_mutex);
+	mutex_init(&pcap->adc_mutex);
+	INIT_WORK(&pcap->isr_work, pcap_isr_work);
+	INIT_WORK(&pcap->msr_work, pcap_msr_work);
+	dev_set_drvdata(&spi->dev, pcap);
+
+	/* setup spi */
+	spi->bits_per_word = 32;
+	spi->mode = SPI_MODE_0 | (pdata->config & PCAP_CS_AH ? SPI_CS_HIGH : 0);
+	ret = spi_setup(spi);
+	if (ret)
+		goto free_pcap;
+
+	pcap->spi = spi;
+
+	/* setup irq */
+	pcap->irq_base = pdata->irq_base;
+	pcap->workqueue = create_singlethread_workqueue("pcapd");
+	if (!pcap->workqueue) {
+		dev_err(&spi->dev, "cant create pcap thread\n");
+		goto free_pcap;
+	}
+
+	/* redirect interrupts to AP, except adcdone2 */
+	if (!(pdata->config & PCAP_SECOND_PORT))
+		ezx_pcap_write(pcap, PCAP_REG_INT_SEL,
+					(1 << PCAP_IRQ_ADCDONE2));
+
+	/* setup irq chip */
+	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) {
+		set_irq_chip_and_handler(i, &pcap_irq_chip, handle_simple_irq);
+		set_irq_chip_data(i, pcap);
+#ifdef CONFIG_ARM
+		set_irq_flags(i, IRQF_VALID);
+#else
+		set_irq_noprobe(i);
+#endif
+	}
+
+	/* mask/ack all PCAP interrupts */
+	ezx_pcap_write(pcap, PCAP_REG_MSR, PCAP_MASK_ALL_INTERRUPT);
+	ezx_pcap_write(pcap, PCAP_REG_ISR, PCAP_CLEAR_INTERRUPT_REGISTER);
+	pcap->msr = PCAP_MASK_ALL_INTERRUPT;
+
+	set_irq_type(spi->irq, IRQ_TYPE_EDGE_RISING);
+	set_irq_data(spi->irq, pcap);
+	set_irq_chained_handler(spi->irq, pcap_irq_handler);
+	set_irq_wake(spi->irq, 1);
+
+	/* ADC */
+	adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ?
+					PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE);
+
+	ret = request_irq(adc_irq, pcap_adc_irq, 0, "ADC", pcap);
+	if (ret)
+		goto free_irqchip;
+
+	/* setup subdevs */
+	for (i = 0; i < pdata->num_subdevs; i++) {
+		ret = pcap_add_subdev(pcap, &pdata->subdevs[i]);
+		if (ret)
+			goto remove_subdevs;
+	}
+
+	/* board specific quirks */
+	if (pdata->init)
+		pdata->init(pcap);
+
+	return 0;
+
+remove_subdevs:
+	device_for_each_child(&spi->dev, NULL, pcap_remove_subdev);
+/* free_adc: */
+	free_irq(adc_irq, pcap);
+free_irqchip:
+	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
+		set_irq_chip_and_handler(i, NULL, NULL);
+/* destroy_workqueue: */
+	destroy_workqueue(pcap->workqueue);
+free_pcap:
+	kfree(pcap);
+ret:
+	return ret;
+}
+
+static struct spi_driver ezxpcap_driver = {
+	.probe	= ezx_pcap_probe,
+	.remove = __devexit_p(ezx_pcap_remove),
+	.driver = {
+		.name	= "ezx-pcap",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init ezx_pcap_init(void)
+{
+	return spi_register_driver(&ezxpcap_driver);
+}
+
+static void __exit ezx_pcap_exit(void)
+{
+	spi_unregister_driver(&ezxpcap_driver);
+}
+
+module_init(ezx_pcap_init);
+module_exit(ezx_pcap_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Ribeiro / Harald Welte");
+MODULE_DESCRIPTION("Motorola PCAP2 ASIC Driver");
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
new file mode 100644
index 00000000000..c12c3c0932b
--- /dev/null
+++ b/include/linux/mfd/ezx-pcap.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2009 Daniel Ribeiro <drwyrm@gmail.com>
+ *
+ * For further information, please see http://wiki.openezx.org/PCAP2
+ */
+
+#ifndef EZX_PCAP_H
+#define EZX_PCAP_H
+
+struct pcap_subdev {
+	int id;
+	const char *name;
+	void *platform_data;
+};
+
+struct pcap_platform_data {
+	unsigned int irq_base;
+	unsigned int config;
+	void (*init) (void *);	/* board specific init */
+	int num_subdevs;
+	struct pcap_subdev *subdevs;
+};
+
+struct pcap_chip;
+
+int ezx_pcap_write(struct pcap_chip *, u8, u32);
+int ezx_pcap_read(struct pcap_chip *, u8, u32 *);
+int pcap_to_irq(struct pcap_chip *, int);
+int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
+int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]);
+
+#define PCAP_SECOND_PORT	1
+#define PCAP_CS_AH		2
+
+#define PCAP_REGISTER_WRITE_OP_BIT	0x80000000
+#define PCAP_REGISTER_READ_OP_BIT	0x00000000
+
+#define PCAP_REGISTER_VALUE_MASK	0x01ffffff
+#define PCAP_REGISTER_ADDRESS_MASK	0x7c000000
+#define PCAP_REGISTER_ADDRESS_SHIFT	26
+#define PCAP_REGISTER_NUMBER		32
+#define PCAP_CLEAR_INTERRUPT_REGISTER	0x01ffffff
+#define PCAP_MASK_ALL_INTERRUPT		0x01ffffff
+
+/* registers acessible by both pcap ports */
+#define PCAP_REG_ISR		0x0	/* Interrupt Status */
+#define PCAP_REG_MSR		0x1	/* Interrupt Mask */
+#define PCAP_REG_PSTAT		0x2	/* Processor Status */
+#define PCAP_REG_VREG2		0x6	/* Regulator Bank 2 Control */
+#define PCAP_REG_AUXVREG	0x7	/* Auxiliary Regulator Control */
+#define PCAP_REG_BATT		0x8	/* Battery Control */
+#define PCAP_REG_ADC		0x9	/* AD Control */
+#define PCAP_REG_ADR		0xa	/* AD Result */
+#define PCAP_REG_CODEC		0xb	/* Audio Codec Control */
+#define PCAP_REG_RX_AMPS	0xc	/* RX Audio Amplifiers Control */
+#define PCAP_REG_ST_DAC		0xd	/* Stereo DAC Control */
+#define PCAP_REG_BUSCTRL	0x14	/* Connectivity Control */
+#define PCAP_REG_PERIPH		0x15	/* Peripheral Control */
+#define PCAP_REG_LOWPWR		0x18	/* Regulator Low Power Control */
+#define PCAP_REG_TX_AMPS	0x1a	/* TX Audio Amplifiers Control */
+#define PCAP_REG_GP		0x1b	/* General Purpose */
+#define PCAP_REG_TEST1		0x1c
+#define PCAP_REG_TEST2		0x1d
+#define PCAP_REG_VENDOR_TEST1	0x1e
+#define PCAP_REG_VENDOR_TEST2	0x1f
+
+/* registers acessible by pcap port 1 only (a1200, e2 & e6) */
+#define PCAP_REG_INT_SEL	0x3	/* Interrupt Select */
+#define PCAP_REG_SWCTRL		0x4	/* Switching Regulator Control */
+#define PCAP_REG_VREG1		0x5	/* Regulator Bank 1 Control */
+#define PCAP_REG_RTC_TOD	0xe	/* RTC Time of Day */
+#define PCAP_REG_RTC_TODA	0xf	/* RTC Time of Day Alarm */
+#define PCAP_REG_RTC_DAY	0x10	/* RTC Day */
+#define PCAP_REG_RTC_DAYA	0x11	/* RTC Day Alarm */
+#define PCAP_REG_MTRTMR		0x12	/* AD Monitor Timer */
+#define PCAP_REG_PWR		0x13	/* Power Control */
+#define PCAP_REG_AUXVREG_MASK	0x16	/* Auxiliary Regulator Mask */
+#define PCAP_REG_VENDOR_REV	0x17
+#define PCAP_REG_PERIPH_MASK	0x19	/* Peripheral Mask */
+
+/* PCAP2 Interrupts */
+#define PCAP_NIRQS		23
+#define PCAP_IRQ_ADCDONE	0	/* ADC done port 1 */
+#define PCAP_IRQ_TS		1	/* Touch Screen */
+#define PCAP_IRQ_1HZ		2	/* 1HZ timer */
+#define PCAP_IRQ_WH		3	/* ADC above high limit */
+#define PCAP_IRQ_WL		4	/* ADC below low limit */
+#define PCAP_IRQ_TODA		5	/* Time of day alarm */
+#define PCAP_IRQ_USB4V		6	/* USB above 4V */
+#define PCAP_IRQ_ONOFF		7	/* On/Off button */
+#define PCAP_IRQ_ONOFF2		8	/* On/Off button 2 */
+#define PCAP_IRQ_USB1V		9	/* USB above 1V */
+#define PCAP_IRQ_MOBPORT	10
+#define PCAP_IRQ_MIC		11	/* Mic attach/HS button */
+#define PCAP_IRQ_HS		12	/* Headset attach */
+#define PCAP_IRQ_ST		13
+#define PCAP_IRQ_PC		14	/* Power Cut */
+#define PCAP_IRQ_WARM		15
+#define PCAP_IRQ_EOL		16	/* Battery End Of Life */
+#define PCAP_IRQ_CLK		17
+#define PCAP_IRQ_SYSRST		18	/* System Reset */
+#define PCAP_IRQ_DUMMY		19
+#define PCAP_IRQ_ADCDONE2	20	/* ADC done port 2 */
+#define PCAP_IRQ_SOFTRESET	21
+#define PCAP_IRQ_MNEXB		22
+
+/* voltage regulators */
+#define V1		0
+#define V2		1
+#define V3		2
+#define V4		3
+#define V5		4
+#define V6		5
+#define V7		6
+#define V8		7
+#define V9		8
+#define V10		9
+#define VAUX1		10
+#define VAUX2		11
+#define VAUX3		12
+#define VAUX4		13
+#define VSIM		14
+#define VSIM2		15
+#define VVIB		16
+#define SW1		17
+#define SW2		18
+#define SW3		19
+#define SW1S		20
+#define SW2S		21
+
+#define PCAP_BATT_DAC_MASK		0x000000ff
+#define PCAP_BATT_DAC_SHIFT		0
+#define PCAP_BATT_B_FDBK		(1 << 8)
+#define PCAP_BATT_EXT_ISENSE		(1 << 9)
+#define PCAP_BATT_V_COIN_MASK		0x00003c00
+#define PCAP_BATT_V_COIN_SHIFT		10
+#define PCAP_BATT_I_COIN		(1 << 14)
+#define PCAP_BATT_COIN_CH_EN		(1 << 15)
+#define PCAP_BATT_EOL_SEL_MASK		0x000e0000
+#define PCAP_BATT_EOL_SEL_SHIFT		17
+#define PCAP_BATT_EOL_CMP_EN		(1 << 20)
+#define PCAP_BATT_BATT_DET_EN		(1 << 21)
+#define PCAP_BATT_THERMBIAS_CTRL	(1 << 22)
+
+#define PCAP_ADC_ADEN			(1 << 0)
+#define PCAP_ADC_RAND			(1 << 1)
+#define PCAP_ADC_AD_SEL1		(1 << 2)
+#define PCAP_ADC_AD_SEL2		(1 << 3)
+#define PCAP_ADC_ADA1_MASK		0x00000070
+#define PCAP_ADC_ADA1_SHIFT		4
+#define PCAP_ADC_ADA2_MASK		0x00000380
+#define PCAP_ADC_ADA2_SHIFT		7
+#define PCAP_ADC_ATO_MASK		0x00003c00
+#define PCAP_ADC_ATO_SHIFT		10
+#define PCAP_ADC_ATOX			(1 << 14)
+#define PCAP_ADC_MTR1			(1 << 15)
+#define PCAP_ADC_MTR2			(1 << 16)
+#define PCAP_ADC_TS_M_MASK		0x000e0000
+#define PCAP_ADC_TS_M_SHIFT		17
+#define PCAP_ADC_TS_REF_LOWPWR		(1 << 20)
+#define PCAP_ADC_TS_REFENB		(1 << 21)
+#define PCAP_ADC_BATT_I_POLARITY	(1 << 22)
+#define PCAP_ADC_BATT_I_ADC		(1 << 23)
+
+#define PCAP_ADC_BANK_0			0
+#define PCAP_ADC_BANK_1			1
+/* ADC bank 0 */
+#define PCAP_ADC_CH_COIN		0
+#define PCAP_ADC_CH_BATT		1
+#define PCAP_ADC_CH_BPLUS		2
+#define PCAP_ADC_CH_MOBPORTB		3
+#define PCAP_ADC_CH_TEMPERATURE		4
+#define PCAP_ADC_CH_CHARGER_ID		5
+#define PCAP_ADC_CH_AD6			6
+/* ADC bank 1 */
+#define PCAP_ADC_CH_AD7			0
+#define PCAP_ADC_CH_AD8			1
+#define PCAP_ADC_CH_AD9			2
+#define PCAP_ADC_CH_TS_X1		3
+#define PCAP_ADC_CH_TS_X2		4
+#define PCAP_ADC_CH_TS_Y1		5
+#define PCAP_ADC_CH_TS_Y2		6
+
+#define PCAP_ADC_T_NOW			0
+#define PCAP_ADC_T_IN_BURST		1
+#define PCAP_ADC_T_OUT_BURST		2
+
+#define PCAP_ADC_ATO_IN_BURST		6
+#define PCAP_ADC_ATO_OUT_BURST		0
+
+#define PCAP_ADC_TS_M_XY		1
+#define PCAP_ADC_TS_M_PRESSURE		2
+#define PCAP_ADC_TS_M_PLATE_X		3
+#define PCAP_ADC_TS_M_PLATE_Y		4
+#define PCAP_ADC_TS_M_STANDBY		5
+#define PCAP_ADC_TS_M_NONTS		6
+
+#define PCAP_ADR_ADD1_MASK		0x000003ff
+#define PCAP_ADR_ADD1_SHIFT		0
+#define PCAP_ADR_ADD2_MASK		0x000ffc00
+#define PCAP_ADR_ADD2_SHIFT		10
+#define PCAP_ADR_ADINC1			(1 << 20)
+#define PCAP_ADR_ADINC2			(1 << 21)
+#define PCAP_ADR_ASC			(1 << 22)
+#define PCAP_ADR_ONESHOT		(1 << 23)
+
+#define PCAP_BUSCTRL_FSENB		(1 << 0)
+#define PCAP_BUSCTRL_USB_SUSPEND	(1 << 1)
+#define PCAP_BUSCTRL_USB_PU		(1 << 2)
+#define PCAP_BUSCTRL_USB_PD		(1 << 3)
+#define PCAP_BUSCTRL_VUSB_EN		(1 << 4)
+#define PCAP_BUSCTRL_USB_PS		(1 << 5)
+#define PCAP_BUSCTRL_VUSB_MSTR_EN	(1 << 6)
+#define PCAP_BUSCTRL_VBUS_PD_ENB	(1 << 7)
+#define PCAP_BUSCTRL_CURRLIM		(1 << 8)
+#define PCAP_BUSCTRL_RS232ENB		(1 << 9)
+#define PCAP_BUSCTRL_RS232_DIR		(1 << 10)
+#define PCAP_BUSCTRL_SE0_CONN		(1 << 11)
+#define PCAP_BUSCTRL_USB_PDM		(1 << 12)
+#define PCAP_BUSCTRL_BUS_PRI_ADJ	(1 << 24)
+
+/* leds */
+#define PCAP_LED0		0
+#define PCAP_LED1		1
+#define PCAP_BL0		2
+#define PCAP_BL1		3
+#define PCAP_VIB		4
+#define PCAP_LED_3MA		0
+#define PCAP_LED_4MA		1
+#define PCAP_LED_5MA		2
+#define PCAP_LED_9MA		3
+#define PCAP_LED_GPIO_VAL_MASK	0x00ffffff
+#define PCAP_LED_GPIO_EN	0x01000000
+#define PCAP_LED_GPIO_INVERT	0x02000000
+#define PCAP_LED_T_MASK		0xf
+#define PCAP_LED_C_MASK		0x3
+#define PCAP_BL_MASK		0x1f
+#define PCAP_BL0_SHIFT		0
+#define PCAP_LED0_EN		(1 << 5)
+#define PCAP_LED1_EN		(1 << 6)
+#define PCAP_LED0_T_SHIFT	7
+#define PCAP_LED1_T_SHIFT	11
+#define PCAP_LED0_C_SHIFT	15
+#define PCAP_LED1_C_SHIFT	17
+#define PCAP_BL1_SHIFT		20
+#define PCAP_VIB_MASK		0x3
+#define PCAP_VIB_SHIFT		20
+#define PCAP_VIB_EN		(1 << 19)
+
+/* RTC */
+#define PCAP_RTC_DAY_MASK	0x3fff
+#define PCAP_RTC_TOD_MASK	0xffff
+#define PCAP_RTC_PC_MASK	0x7
+#define SEC_PER_DAY		86400
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From 6483c1b5e1a6e3489640a1376e951395982e9615 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 5 Jun 2009 18:31:01 +0200
Subject: mfd: asic3: add asic3_set_register common operation

Used to configure single bits of the SDHWCTRL_SDCONF and EXTCF_RESET/SELECT
registers needed for DS1WM, MMC/SDIO and PCMCIA functionality.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/asic3.c       | 15 +++++++++++++++
 include/linux/mfd/asic3.h | 10 +++++-----
 2 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 9e485459f63..ad3c5913599 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -52,6 +52,21 @@ static inline u32 asic3_read_register(struct asic3 *asic,
 			(reg >> asic->bus_shift));
 }
 
+void asic3_set_register(struct asic3 *asic, u32 reg, u32 bits, bool set)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&asic->lock, flags);
+	val = asic3_read_register(asic, reg);
+	if (set)
+		val |= bits;
+	else
+		val &= ~bits;
+	asic3_write_register(asic, reg, val);
+	spin_unlock_irqrestore(&asic->lock, flags);
+}
+
 /* IRQs */
 #define MAX_ASIC_ISR_LOOPS    20
 #define ASIC3_GPIO_BASE_INCR \
diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index 322cd6deb9f..6b427ec4b24 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -227,8 +227,8 @@ struct asic3_platform_data {
 
 
 /* Basic control of the SD ASIC */
-#define ASIC3_SDHWCTRL_Base	0x0E00
-#define ASIC3_SDHWCTRL_SDConf    0x00
+#define ASIC3_SDHWCTRL_BASE     0x0E00
+#define ASIC3_SDHWCTRL_SDCONF     0x00
 
 #define ASIC3_SDHWCTRL_SUSPEND    (1 << 0)  /* 1=suspend all SD operations */
 #define ASIC3_SDHWCTRL_CLKSEL     (1 << 1)  /* 1=SDICK, 0=HCLK */
@@ -242,10 +242,10 @@ struct asic3_platform_data {
 /* SD card power supply ctrl 1=enable */
 #define ASIC3_SDHWCTRL_SDPWR      (1 << 6)
 
-#define ASIC3_EXTCF_Base		0x1100
+#define ASIC3_EXTCF_BASE        0x1100
 
-#define ASIC3_EXTCF_Select         0x00
-#define ASIC3_EXTCF_Reset          0x04
+#define ASIC3_EXTCF_SELECT        0x00
+#define ASIC3_EXTCF_RESET         0x04
 
 #define ASIC3_EXTCF_SMOD0	         (1 << 0)  /* slot number of mode 0 */
 #define ASIC3_EXTCF_SMOD1	         (1 << 1)  /* slot number of mode 1 */
-- 
cgit v1.2.3-70-g09d2


From 01906d6d780e84c51537f6b56da472959e846314 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 5 Jun 2009 18:31:03 +0200
Subject: mfd: add ASIC3 IRQ numbers

IRQ number definitions for PWM, LED, SPI and OWM (ds1wm).

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/asic3.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index 6b427ec4b24..d2ecaafaefe 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -30,6 +30,13 @@ struct asic3_platform_data {
 #define ASIC3_NUM_GPIOS		64
 #define ASIC3_NR_IRQS		ASIC3_NUM_GPIOS + 6
 
+#define ASIC3_IRQ_LED0		64
+#define ASIC3_IRQ_LED1		65
+#define ASIC3_IRQ_LED2		66
+#define ASIC3_IRQ_SPI		67
+#define ASIC3_IRQ_SMBUS		68
+#define ASIC3_IRQ_OWM		69
+
 #define ASIC3_TO_GPIO(gpio) (NR_BUILTIN_GPIO + (gpio))
 
 #define ASIC3_GPIO_BANK_A	0
-- 
cgit v1.2.3-70-g09d2


From 1b89040c3aa4500c97859bad714e010441e9c477 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 5 Jun 2009 18:31:05 +0200
Subject: mfd: asic3: remove SD/SDIO controller register definitions

Only the base addresses remain, as they are needed to set up
the IOMEM resources.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/asic3.h | 219 +---------------------------------------------
 1 file changed, 3 insertions(+), 216 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h
index d2ecaafaefe..de3c4ad19af 100644
--- a/include/linux/mfd/asic3.h
+++ b/include/linux/mfd/asic3.h
@@ -286,222 +286,9 @@ struct asic3_platform_data {
  *  SDIO_CTRL         Control registers for SDIO operations
  *
  *****************************************************************************/
-#define ASIC3_SD_CONFIG_Base            0x0400 /* Assumes 32 bit addressing */
-
-#define ASIC3_SD_CONFIG_Command           0x08   /* R/W: Command */
-
-/* [0:8] SD Control Register Base Address */
-#define ASIC3_SD_CONFIG_Addr0             0x20
-
-/* [9:31] SD Control Register Base Address */
-#define ASIC3_SD_CONFIG_Addr1             0x24
-
-/* R/O: interrupt assigned to pin */
-#define ASIC3_SD_CONFIG_IntPin            0x78
-
-/*
- * Set to 0x1f to clock SD controller, 0 otherwise.
- * At 0x82 - Gated Clock Ctrl
- */
-#define ASIC3_SD_CONFIG_ClkStop           0x80
-
-/* Control clock of SD controller */
-#define ASIC3_SD_CONFIG_ClockMode         0x84
-#define ASIC3_SD_CONFIG_SDHC_PinStatus    0x88   /* R/0: SD pins status */
-#define ASIC3_SD_CONFIG_SDHC_Power1       0x90   /* Power1 - manual pwr ctrl */
-
-/* auto power up after card inserted */
-#define ASIC3_SD_CONFIG_SDHC_Power2       0x92
-
-/* auto power down when card removed */
-#define ASIC3_SD_CONFIG_SDHC_Power3       0x94
-#define ASIC3_SD_CONFIG_SDHC_CardDetect   0x98
-#define ASIC3_SD_CONFIG_SDHC_Slot         0xA0   /* R/O: support slot number */
-#define ASIC3_SD_CONFIG_SDHC_ExtGateClk1  0x1E0  /* Not used */
-#define ASIC3_SD_CONFIG_SDHC_ExtGateClk2  0x1E2  /* Not used*/
-
-/* GPIO Output Reg. , at 0x1EA - GPIO Output Enable Reg. */
-#define ASIC3_SD_CONFIG_SDHC_GPIO_OutAndEnable  0x1E8
-#define ASIC3_SD_CONFIG_SDHC_GPIO_Status  0x1EC  /* GPIO Status Reg. */
-
-/* Bit 1: double buffer/single buffer */
-#define ASIC3_SD_CONFIG_SDHC_ExtGateClk3  0x1F0
-
-/* Memory access enable (set to 1 to access SD Controller) */
-#define SD_CONFIG_COMMAND_MAE                (1<<1)
-
-#define SD_CONFIG_CLK_ENABLE_ALL             0x1f
-
-#define SD_CONFIG_POWER1_PC_33V              0x0200    /* Set for 3.3 volts */
-#define SD_CONFIG_POWER1_PC_OFF              0x0000    /* Turn off power */
-
- /* two bits - number of cycles for card detection */
-#define SD_CONFIG_CARDDETECTMODE_CLK           ((x) & 0x3)
-
-
-#define ASIC3_SD_CTRL_Base            0x1000
-
-#define ASIC3_SD_CTRL_Cmd                  0x00
-#define ASIC3_SD_CTRL_Arg0                 0x08
-#define ASIC3_SD_CTRL_Arg1                 0x0C
-#define ASIC3_SD_CTRL_StopInternal         0x10
-#define ASIC3_SD_CTRL_TransferSectorCount  0x14
-#define ASIC3_SD_CTRL_Response0            0x18
-#define ASIC3_SD_CTRL_Response1            0x1C
-#define ASIC3_SD_CTRL_Response2            0x20
-#define ASIC3_SD_CTRL_Response3            0x24
-#define ASIC3_SD_CTRL_Response4            0x28
-#define ASIC3_SD_CTRL_Response5            0x2C
-#define ASIC3_SD_CTRL_Response6            0x30
-#define ASIC3_SD_CTRL_Response7            0x34
-#define ASIC3_SD_CTRL_CardStatus           0x38
-#define ASIC3_SD_CTRL_BufferCtrl           0x3C
-#define ASIC3_SD_CTRL_IntMaskCard          0x40
-#define ASIC3_SD_CTRL_IntMaskBuffer        0x44
-#define ASIC3_SD_CTRL_CardClockCtrl        0x48
-#define ASIC3_SD_CTRL_MemCardXferDataLen   0x4C
-#define ASIC3_SD_CTRL_MemCardOptionSetup   0x50
-#define ASIC3_SD_CTRL_ErrorStatus0         0x58
-#define ASIC3_SD_CTRL_ErrorStatus1         0x5C
-#define ASIC3_SD_CTRL_DataPort             0x60
-#define ASIC3_SD_CTRL_TransactionCtrl      0x68
-#define ASIC3_SD_CTRL_SoftwareReset        0x1C0
-
-#define SD_CTRL_SOFTWARE_RESET_CLEAR            (1<<0)
-
-#define SD_CTRL_TRANSACTIONCONTROL_SET          (1<<8)
-
-#define SD_CTRL_CARDCLOCKCONTROL_FOR_SD_CARD    (1<<15)
-#define SD_CTRL_CARDCLOCKCONTROL_ENABLE_CLOCK   (1<<8)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_512    (1<<7)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_256    (1<<6)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_128    (1<<5)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_64     (1<<4)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_32     (1<<3)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_16     (1<<2)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_8      (1<<1)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_4      (1<<0)
-#define SD_CTRL_CARDCLOCKCONTROL_CLK_DIV_2      (0<<0)
-
-#define MEM_CARD_OPTION_REQUIRED                   0x000e
-#define MEM_CARD_OPTION_DATA_RESPONSE_TIMEOUT(x)   (((x) & 0x0f) << 4)
-#define MEM_CARD_OPTION_C2_MODULE_NOT_PRESENT      (1<<14)
-#define MEM_CARD_OPTION_DATA_XFR_WIDTH_1           (1<<15)
-#define MEM_CARD_OPTION_DATA_XFR_WIDTH_4           0
-
-#define SD_CTRL_COMMAND_INDEX(x)                   ((x) & 0x3f)
-#define SD_CTRL_COMMAND_TYPE_CMD                   (0 << 6)
-#define SD_CTRL_COMMAND_TYPE_ACMD                  (1 << 6)
-#define SD_CTRL_COMMAND_TYPE_AUTHENTICATION        (2 << 6)
-#define SD_CTRL_COMMAND_RESPONSE_TYPE_NORMAL       (0 << 8)
-#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R1       (4 << 8)
-#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R1B      (5 << 8)
-#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R2       (6 << 8)
-#define SD_CTRL_COMMAND_RESPONSE_TYPE_EXT_R3       (7 << 8)
-#define SD_CTRL_COMMAND_DATA_PRESENT               (1 << 11)
-#define SD_CTRL_COMMAND_TRANSFER_READ              (1 << 12)
-#define SD_CTRL_COMMAND_TRANSFER_WRITE             (0 << 12)
-#define SD_CTRL_COMMAND_MULTI_BLOCK                (1 << 13)
-#define SD_CTRL_COMMAND_SECURITY_CMD               (1 << 14)
-
-#define SD_CTRL_STOP_INTERNAL_ISSSUE_CMD12         (1 << 0)
-#define SD_CTRL_STOP_INTERNAL_AUTO_ISSUE_CMD12     (1 << 8)
-
-#define SD_CTRL_CARDSTATUS_RESPONSE_END            (1 << 0)
-#define SD_CTRL_CARDSTATUS_RW_END                  (1 << 2)
-#define SD_CTRL_CARDSTATUS_CARD_REMOVED_0          (1 << 3)
-#define SD_CTRL_CARDSTATUS_CARD_INSERTED_0         (1 << 4)
-#define SD_CTRL_CARDSTATUS_SIGNAL_STATE_PRESENT_0  (1 << 5)
-#define SD_CTRL_CARDSTATUS_WRITE_PROTECT           (1 << 7)
-#define SD_CTRL_CARDSTATUS_CARD_REMOVED_3          (1 << 8)
-#define SD_CTRL_CARDSTATUS_CARD_INSERTED_3         (1 << 9)
-#define SD_CTRL_CARDSTATUS_SIGNAL_STATE_PRESENT_3  (1 << 10)
-
-#define SD_CTRL_BUFFERSTATUS_CMD_INDEX_ERROR       (1 << 0)
-#define SD_CTRL_BUFFERSTATUS_CRC_ERROR             (1 << 1)
-#define SD_CTRL_BUFFERSTATUS_STOP_BIT_END_ERROR    (1 << 2)
-#define SD_CTRL_BUFFERSTATUS_DATA_TIMEOUT          (1 << 3)
-#define SD_CTRL_BUFFERSTATUS_BUFFER_OVERFLOW       (1 << 4)
-#define SD_CTRL_BUFFERSTATUS_BUFFER_UNDERFLOW      (1 << 5)
-#define SD_CTRL_BUFFERSTATUS_CMD_TIMEOUT           (1 << 6)
-#define SD_CTRL_BUFFERSTATUS_UNK7                  (1 << 7)
-#define SD_CTRL_BUFFERSTATUS_BUFFER_READ_ENABLE    (1 << 8)
-#define SD_CTRL_BUFFERSTATUS_BUFFER_WRITE_ENABLE   (1 << 9)
-#define SD_CTRL_BUFFERSTATUS_ILLEGAL_FUNCTION      (1 << 13)
-#define SD_CTRL_BUFFERSTATUS_CMD_BUSY              (1 << 14)
-#define SD_CTRL_BUFFERSTATUS_ILLEGAL_ACCESS        (1 << 15)
-
-#define SD_CTRL_INTMASKCARD_RESPONSE_END           (1 << 0)
-#define SD_CTRL_INTMASKCARD_RW_END                 (1 << 2)
-#define SD_CTRL_INTMASKCARD_CARD_REMOVED_0         (1 << 3)
-#define SD_CTRL_INTMASKCARD_CARD_INSERTED_0        (1 << 4)
-#define SD_CTRL_INTMASKCARD_SIGNAL_STATE_PRESENT_0 (1 << 5)
-#define SD_CTRL_INTMASKCARD_UNK6                   (1 << 6)
-#define SD_CTRL_INTMASKCARD_WRITE_PROTECT          (1 << 7)
-#define SD_CTRL_INTMASKCARD_CARD_REMOVED_3         (1 << 8)
-#define SD_CTRL_INTMASKCARD_CARD_INSERTED_3        (1 << 9)
-#define SD_CTRL_INTMASKCARD_SIGNAL_STATE_PRESENT_3 (1 << 10)
-
-#define SD_CTRL_INTMASKBUFFER_CMD_INDEX_ERROR      (1 << 0)
-#define SD_CTRL_INTMASKBUFFER_CRC_ERROR            (1 << 1)
-#define SD_CTRL_INTMASKBUFFER_STOP_BIT_END_ERROR   (1 << 2)
-#define SD_CTRL_INTMASKBUFFER_DATA_TIMEOUT         (1 << 3)
-#define SD_CTRL_INTMASKBUFFER_BUFFER_OVERFLOW      (1 << 4)
-#define SD_CTRL_INTMASKBUFFER_BUFFER_UNDERFLOW     (1 << 5)
-#define SD_CTRL_INTMASKBUFFER_CMD_TIMEOUT          (1 << 6)
-#define SD_CTRL_INTMASKBUFFER_UNK7                 (1 << 7)
-#define SD_CTRL_INTMASKBUFFER_BUFFER_READ_ENABLE   (1 << 8)
-#define SD_CTRL_INTMASKBUFFER_BUFFER_WRITE_ENABLE  (1 << 9)
-#define SD_CTRL_INTMASKBUFFER_ILLEGAL_FUNCTION     (1 << 13)
-#define SD_CTRL_INTMASKBUFFER_CMD_BUSY             (1 << 14)
-#define SD_CTRL_INTMASKBUFFER_ILLEGAL_ACCESS       (1 << 15)
-
-#define SD_CTRL_DETAIL0_RESPONSE_CMD_ERROR                   (1 << 0)
-#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_RESPONSE_NON_CMD12 (1 << 2)
-#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_RESPONSE_CMD12     (1 << 3)
-#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_READ_DATA          (1 << 4)
-#define SD_CTRL_DETAIL0_END_BIT_ERROR_FOR_WRITE_CRC_STATUS   (1 << 5)
-#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_RESPONSE_NON_CMD12     (1 << 8)
-#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_RESPONSE_CMD12         (1 << 9)
-#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_READ_DATA              (1 << 10)
-#define SD_CTRL_DETAIL0_CRC_ERROR_FOR_WRITE_CMD              (1 << 11)
-
-#define SD_CTRL_DETAIL1_NO_CMD_RESPONSE                      (1 << 0)
-#define SD_CTRL_DETAIL1_TIMEOUT_READ_DATA                    (1 << 4)
-#define SD_CTRL_DETAIL1_TIMEOUT_CRS_STATUS                   (1 << 5)
-#define SD_CTRL_DETAIL1_TIMEOUT_CRC_BUSY                     (1 << 6)
-
-#define ASIC3_SDIO_CTRL_Base          0x1200
-
-#define ASIC3_SDIO_CTRL_Cmd                  0x00
-#define ASIC3_SDIO_CTRL_CardPortSel          0x04
-#define ASIC3_SDIO_CTRL_Arg0                 0x08
-#define ASIC3_SDIO_CTRL_Arg1                 0x0C
-#define ASIC3_SDIO_CTRL_TransferBlockCount   0x14
-#define ASIC3_SDIO_CTRL_Response0            0x18
-#define ASIC3_SDIO_CTRL_Response1            0x1C
-#define ASIC3_SDIO_CTRL_Response2            0x20
-#define ASIC3_SDIO_CTRL_Response3            0x24
-#define ASIC3_SDIO_CTRL_Response4            0x28
-#define ASIC3_SDIO_CTRL_Response5            0x2C
-#define ASIC3_SDIO_CTRL_Response6            0x30
-#define ASIC3_SDIO_CTRL_Response7            0x34
-#define ASIC3_SDIO_CTRL_CardStatus           0x38
-#define ASIC3_SDIO_CTRL_BufferCtrl           0x3C
-#define ASIC3_SDIO_CTRL_IntMaskCard          0x40
-#define ASIC3_SDIO_CTRL_IntMaskBuffer        0x44
-#define ASIC3_SDIO_CTRL_CardXferDataLen      0x4C
-#define ASIC3_SDIO_CTRL_CardOptionSetup      0x50
-#define ASIC3_SDIO_CTRL_ErrorStatus0         0x54
-#define ASIC3_SDIO_CTRL_ErrorStatus1         0x58
-#define ASIC3_SDIO_CTRL_DataPort             0x60
-#define ASIC3_SDIO_CTRL_TransactionCtrl      0x68
-#define ASIC3_SDIO_CTRL_CardIntCtrl          0x6C
-#define ASIC3_SDIO_CTRL_ClocknWaitCtrl       0x70
-#define ASIC3_SDIO_CTRL_HostInformation      0x74
-#define ASIC3_SDIO_CTRL_ErrorCtrl            0x78
-#define ASIC3_SDIO_CTRL_LEDCtrl              0x7C
-#define ASIC3_SDIO_CTRL_SoftwareReset        0x1C0
+#define ASIC3_SD_CONFIG_BASE	0x0400 /* Assumes 32 bit addressing */
+#define ASIC3_SD_CTRL_BASE	0x1000
+#define ASIC3_SDIO_CTRL_BASE	0x1200
 
 #define ASIC3_MAP_SIZE_32BIT	0x2000
 #define ASIC3_MAP_SIZE_16BIT	0x1000
-- 
cgit v1.2.3-70-g09d2


From 4d3792e054f706f73837769a0e5607b3b7ad25a2 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Mon, 15 Jun 2009 15:43:31 +0200
Subject: mfd: fix tmio related warnings

We can not have .driver_data as const since platform_set_drvdata() doesnt take
a const.
The hclk mmc_data field can be const though.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/t7l66xb.c    | 2 +-
 drivers/mfd/tc6387xb.c   | 2 +-
 drivers/mfd/tc6393xb.c   | 2 +-
 include/linux/mfd/tmio.h | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 875f7a87573..0a255c1f1ce 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -108,7 +108,7 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 
 /*--------------------------------------------------------------------------*/
 
-static const struct tmio_mmc_data t7166xb_mmc_data = {
+static struct tmio_mmc_data t7166xb_mmc_data = {
 	.hclk = 24000000,
 };
 
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index c3993ac2054..3280ab33f88 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -75,7 +75,7 @@ static int tc6387xb_mmc_disable(struct platform_device *mmc)
 
 /*--------------------------------------------------------------------------*/
 
-const static struct tmio_mmc_data tc6387xb_mmc_data = {
+static struct tmio_mmc_data tc6387xb_mmc_data = {
 	.hclk = 24000000,
 };
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 9d2abb5d6e2..1429a7341a9 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,7 +136,7 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
-const static struct tmio_mmc_data tc6393xb_mmc_data = {
+static struct tmio_mmc_data tc6393xb_mmc_data = {
 	.hclk = 24000000,
 };
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index c377118884e..6b9c5d06690 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -22,7 +22,7 @@
  * data for the MMC controller
  */
 struct tmio_mmc_data {
-	unsigned int		hclk;
+	const unsigned int		hclk;
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 44549dff82753b6a5ffabcefeead34be63e95d96 Mon Sep 17 00:00:00 2001
From: Mike Sager <sager@netapp.com>
Date: Wed, 1 Apr 2009 09:21:47 -0400
Subject: nfs41: define NFS4_MAX_MINOR_VERSION based on CONFIG_NFS_V4_1

If 4.1 isn't supported, NFS4_MAX_MINOR_VERSION will be 0.

Signed-off-by: Mike Sager <sager@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e3f0cbcbd0d..7c36fcf2dfb 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -462,6 +462,13 @@ enum lock_type4 {
 #define NFSPROC4_NULL 0
 #define NFSPROC4_COMPOUND 1
 #define NFS4_MINOR_VERSION 0
+
+#if defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MINOR_VERSION 1
+#else
+#define NFS4_MAX_MINOR_VERSION 0
+#endif /* CONFIG_NFS_V4_1 */
+
 #define NFS4_DEBUG 1
 
 /* Index of predefined Linux client operations */
-- 
cgit v1.2.3-70-g09d2


From 94a417f3d7a02478a2d7842e693a61339fb54ea4 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:49 -0400
Subject: nfs41: nfs_client.cl_minorversion

This field is set to the nfsv4 minor version for this mount.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>

Note: This patch sets the referral to the same minorversion as the
current mount. Revisit in future patch.

Signed-off-by: Andy Adamson <andros@netapp.com>
[removed cl_minorversion assignment in nfs_set_client]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[always define nfs_client.cl_minorversion]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 9 ++++++---
 include/linux/nfs_fs_sb.h | 3 ++-
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 75c9cd2aa11..0efcb55c2ca 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1101,7 +1101,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		const size_t addrlen,
 		const char *ip_addr,
 		rpc_authflavor_t authflavour,
-		int proto, const struct rpc_timeout *timeparms)
+		int proto, const struct rpc_timeout *timeparms,
+		u32 minorversion)
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
@@ -1164,7 +1165,8 @@ static int nfs4_init_server(struct nfs_server *server,
 			data->client_address,
 			data->auth_flavors[0],
 			data->nfs_server.protocol,
-			&timeparms);
+			&timeparms,
+			data->minorversion);
 	if (error < 0)
 		goto error;
 
@@ -1282,7 +1284,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 				parent_client->cl_ipaddr,
 				data->authflavor,
 				parent_server->client->cl_xprt->prot,
-				parent_server->client->cl_timeout);
+				parent_server->client->cl_timeout,
+				parent_client->cl_minorversion);
 	if (error < 0)
 		goto error;
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6ad75948cbf..e9a51fe46aa 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -32,6 +32,7 @@ struct nfs_client {
 	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
 	int			cl_proto;	/* Network transport protocol */
 
+	u32			cl_minorversion;/* NFSv4 minorversion */
 	struct rpc_cred		*cl_machine_cred;
 
 #ifdef CONFIG_NFS_V4
@@ -63,7 +64,7 @@ struct nfs_client {
 	 */
 	char			cl_ipaddr[48];
 	unsigned char		cl_id_uniquifier;
-#endif
+#endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;	/* client index cache cookie */
-- 
cgit v1.2.3-70-g09d2


From 9ff71c3a9827b99699510076dffa0bbe7c36bfd4 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:52 -0400
Subject: nfs41: client xdr definitions

Define stubs for sequence args and res data structures and embed
them in all other nfs4 and nfs41 xdr types.  They are needed for
sending any op in a nfs41 compound rpc.

Signed-off-by: Andy Adamson<andros@netapp.com>
[moved new args/res definitions away, to where they're first used]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b89c34e40bc..f2c5700c7b6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -145,6 +145,15 @@ struct nfs4_change_info {
 };
 
 struct nfs_seqid;
+
+struct nfs4_sequence_args {
+	/* stub */
+};
+
+struct nfs4_sequence_res {
+	/* stub */
+};
+
 /*
  * Arguments to the open call.
  */
@@ -165,6 +174,7 @@ struct nfs_openargs {
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
 	__u32			claim;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_openres {
@@ -181,6 +191,7 @@ struct nfs_openres {
 	__u32			do_recall;
 	__u64			maxsize;
 	__u32			attrset[NFS4_BITMAP_SIZE];
+	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -206,6 +217,7 @@ struct nfs_closeargs {
 	struct nfs_seqid *	seqid;
 	fmode_t			fmode;
 	const u32 *		bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_closeres {
@@ -213,6 +225,7 @@ struct nfs_closeres {
 	struct nfs_fattr *	fattr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
+	struct nfs4_sequence_res	seq_res;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
@@ -233,12 +246,14 @@ struct nfs_lock_args {
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
 	unsigned char		new_lock_owner : 1;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lock_res {
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	lock_seqid;
 	struct nfs_seqid *	open_seqid;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_locku_args {
@@ -246,32 +261,38 @@ struct nfs_locku_args {
 	struct file_lock *	fl;
 	struct nfs_seqid *	seqid;
 	nfs4_stateid *		stateid;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_locku_res {
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	seqid;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_lockt_args {
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_lowner	lock_owner;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lockt_res {
 	struct file_lock *	denied; /* LOCK, LOCKT failed */
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_delegreturnargs {
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
 	const u32 * bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_delegreturnres {
 	struct nfs_fattr * fattr;
 	const struct nfs_server *server;
+	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -284,12 +305,14 @@ struct nfs_readargs {
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_readres {
 	struct nfs_fattr *	fattr;
 	__u32			count;
 	int                     eof;
+	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -304,6 +327,7 @@ struct nfs_writeargs {
 	unsigned int		pgbase;
 	struct page **		pages;
 	const u32 *		bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_writeverf {
@@ -316,6 +340,7 @@ struct nfs_writeres {
 	struct nfs_writeverf *	verf;
 	__u32			count;
 	const struct nfs_server *server;
+	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -325,12 +350,14 @@ struct nfs_removeargs {
 	const struct nfs_fh	*fh;
 	struct qstr		name;
 	const u32 *		bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_removeres {
 	const struct nfs_server *server;
 	struct nfs4_change_info	cinfo;
 	struct nfs_fattr	dir_attr;
+	struct nfs4_sequence_res 	seq_res;
 };
 
 /*
@@ -383,6 +410,7 @@ struct nfs_setattrargs {
 	struct iattr *                  iap;
 	const struct nfs_server *	server; /* Needed for name mapping */
 	const u32 *			bitmask;
+	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs_setaclargs {
@@ -390,6 +418,7 @@ struct nfs_setaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_getaclargs {
@@ -397,11 +426,13 @@ struct nfs_getaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
+	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs_setattrres {
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_linkargs {
@@ -583,6 +614,7 @@ struct nfs4_accessargs {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
 	u32				access;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_accessres {
@@ -590,6 +622,7 @@ struct nfs4_accessres {
 	struct nfs_fattr *		fattr;
 	u32				supported;
 	u32				access;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_create_arg {
@@ -609,6 +642,7 @@ struct nfs4_create_arg {
 	const struct iattr *		attrs;
 	const struct nfs_fh *		dir_fh;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_create_res {
@@ -617,21 +651,25 @@ struct nfs4_create_res {
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
 	struct nfs_fattr *		dir_fattr;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_fsinfo_arg {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_getattr_arg {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_getattr_res {
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_link_arg {
@@ -639,6 +677,7 @@ struct nfs4_link_arg {
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_link_res {
@@ -646,6 +685,7 @@ struct nfs4_link_res {
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		cinfo;
 	struct nfs_fattr *		dir_attr;
+	struct nfs4_sequence_res	seq_res;
 };
 
 
@@ -653,21 +693,25 @@ struct nfs4_lookup_arg {
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_lookup_res {
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs_fh *			fh;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_lookup_root_arg {
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_pathconf_arg {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readdir_arg {
@@ -678,11 +722,13 @@ struct nfs4_readdir_arg {
 	struct page **			pages;	/* zero-copy data */
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readdir_res {
 	nfs4_verifier			verifier;
 	unsigned int			pgbase;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_readlink {
@@ -690,6 +736,7 @@ struct nfs4_readlink {
 	unsigned int			pgbase;
 	unsigned int			pglen;   /* zero-copy data */
 	struct page **			pages;   /* zero-copy data */
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_rename_arg {
@@ -698,6 +745,7 @@ struct nfs4_rename_arg {
 	const struct qstr *		old_name;
 	const struct qstr *		new_name;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_rename_res {
@@ -706,6 +754,7 @@ struct nfs4_rename_res {
 	struct nfs_fattr *		old_fattr;
 	struct nfs4_change_info		new_cinfo;
 	struct nfs_fattr *		new_fattr;
+	struct nfs4_sequence_res	seq_res;
 };
 
 #define NFS4_SETCLIENTID_NAMELEN	(127)
@@ -724,6 +773,7 @@ struct nfs4_setclientid {
 struct nfs4_statfs_arg {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_server_caps_res {
@@ -731,6 +781,7 @@ struct nfs4_server_caps_res {
 	u32				acl_bitmask;
 	u32				has_links;
 	u32				has_symlinks;
+	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_string {
@@ -765,6 +816,7 @@ struct nfs4_fs_locations_arg {
 	const struct qstr *name;
 	struct page *page;
 	const u32 *bitmask;
+	struct nfs4_sequence_args	seq_args;
 };
 
 #endif /* CONFIG_NFS_V4 */
-- 
cgit v1.2.3-70-g09d2


From 557134a39c8d2ab79d8b8d53438e03e29feb5ec4 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:21:53 -0400
Subject: nfs41: sessions client infrastructure

NFSv4.1 Sessions basic data types, initialization, and destruction.

The session is always associated with a struct nfs_client that holds
the exchange_id results.

Signed-off-by: Rahul Iyer <iyer@netapp.com>
Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[remove extraneous rpc_clnt pointer, use the struct nfs_client cl_rpcclient.
remove the rpc_clnt parameter from nfs4 nfs4_init_session]
Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Use the presence of a session to determine behaviour instead of the
minorversion number.]
Signed-off-by: Andy Adamson <andros@netapp.com>
[constified nfs4_has_session's struct nfs_client parameter]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Rename nfs4_put_session() to nfs4_destroy_session() and call it from nfs4_free_client() not nfs4_free_server().
Also get rid of nfs4_get_session() and the ref_count in nfs4_session struct as keeping track of nfs_client should be sufficient]
Signed-off-by: Alexandros Batsakis <Alexandros.Batsakis@netapp.com>
[nfs41: pass rsize and wsize into nfs4_init_session]
Signed-off-by: Andy Adamson <andros@netapp.com>
[separated out removal of rpc_clnt parameter from nfs4_init_session ot a
 patch of its own]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Pass the nfs_client pointer into nfs4_alloc_session]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: don't assign to session->clp->cl_session in nfs4_destroy_session]
[nfs41: fixup nfs4_clear_client_minor_version]
[introduce nfs4_clear_client_minor_version() in this patch]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Refactor nfs4_init_session]
    Moved session allocation into nfs4_init_client_minor_version, called from
    nfs4_init_client.
    Leave rwise and wsize initialization in nfs4_init_session, called from
    nfs4_init_server.
    Reverted moving of nfs_fsid definition to nfs_fs_sb.h
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: Move NFS4_MAX_SLOT_TABLE define from under CONFIG_NFS_V4_1]
[Fix comile error when CONFIG_NFS_V4_1 is not set.]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[moved nfs4_init_slot_table definition to "create_session operation"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: alloc session with GFP_KERNEL]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 60 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h         | 12 ++++++++++
 fs/nfs/nfs4_fs.h          |  4 ++++
 fs/nfs/nfs4proc.c         | 34 +++++++++++++++++++++++++++
 include/linux/nfs_fs_sb.h | 49 ++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_xdr.h   | 15 ++++++++++++
 6 files changed, 174 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a736160046c..f1506f14852 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -183,6 +183,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 #endif
 }
 
+/*
+ * Clears/puts all minor version specific parts from an nfs_client struct
+ * reverting it to minorversion 0.
+ */
+static void nfs4_clear_client_minor_version(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (nfs4_has_session(clp)) {
+		nfs4_destroy_session(clp->cl_session);
+		clp->cl_session = NULL;
+	}
+#endif /* CONFIG_NFS_V4_1 */
+}
+
 /*
  * Destroy a shared client record
  */
@@ -190,6 +204,7 @@ static void nfs_free_client(struct nfs_client *clp)
 {
 	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
 
+	nfs4_clear_client_minor_version(clp);
 	nfs4_shutdown_client(clp);
 
 	nfs_fscache_release_client_cookie(clp);
@@ -1053,6 +1068,30 @@ error:
 }
 
 #ifdef CONFIG_NFS_V4
+/*
+ * Initialize the minor version specific parts of an NFS4 client record
+ */
+static int nfs4_init_client_minor_version(struct nfs_client *clp)
+{
+#if defined(CONFIG_NFS_V4_1)
+	if (clp->cl_minorversion) {
+		struct nfs4_session *session = NULL;
+		/*
+		 * Create the session and mark it expired.
+		 * When a SEQUENCE operation encounters the expired session
+		 * it will do session recovery to initialize it.
+		 */
+		session = nfs4_alloc_session(clp);
+		if (!session)
+			return -ENOMEM;
+
+		clp->cl_session = session;
+	}
+#endif /* CONFIG_NFS_V4_1 */
+
+	return 0;
+}
+
 /*
  * Initialise an NFS4 client record
  */
@@ -1087,6 +1126,10 @@ static int nfs4_init_client(struct nfs_client *clp,
 	}
 	__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
 
+	error = nfs4_init_client_minor_version(clp);
+	if (error < 0)
+		goto error;
+
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 	return 0;
 
@@ -1143,6 +1186,21 @@ error:
 	return error;
 }
 
+/*
+ * Initialize a session.
+ * Note: save the mount rsize and wsize for create_server negotiation.
+ */
+static void nfs4_init_session(struct nfs_client *clp,
+			      unsigned int wsize, unsigned int rsize)
+{
+#if defined(CONFIG_NFS_V4_1)
+	if (nfs4_has_session(clp)) {
+		clp->cl_session->fc_attrs.max_rqst_sz = wsize;
+		clp->cl_session->fc_attrs.max_resp_sz = rsize;
+	}
+#endif /* CONFIG_NFS_V4_1 */
+}
+
 /*
  * Create a version 4 volume record
  */
@@ -1221,6 +1279,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 	BUG_ON(!server->nfs_client->rpc_ops);
 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
 
+	nfs4_init_session(server->nfs_client, server->wsize, server->rsize);
+
 	/* Probe the root fh to retrieve its FSID */
 	error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
 	if (error < 0)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ffa6bd54d43..7cef45db925 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -206,6 +206,18 @@ extern int nfs4_path_walk(struct nfs_server *server,
 			  const char *path);
 #endif
 
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4_1
+	if (clp->cl_session)
+		return 1;
+#endif /* CONFIG_NFS_V4_1 */
+	return 0;
+}
+
 /*
  * Determine the device name as a string
  */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 84345deab26..acac6f8c3d3 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -202,6 +202,10 @@ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops;
+#if defined(CONFIG_NFS_V4_1)
+extern void nfs4_destroy_session(struct nfs4_session *session);
+extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
+#endif /* CONFIG_NFS_V4_1 */
 
 extern const u32 nfs4_fattr_bitmap[2];
 extern const u32 nfs4_statfs_bitmap[2];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4674f8092da..cdd8e74c47d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3723,6 +3723,40 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 	return status;
 }
 
+#ifdef CONFIG_NFS_V4_1
+/* Destroy the slot table */
+static void nfs4_destroy_slot_table(struct nfs4_session *session)
+{
+	if (session->fc_slot_table.slots == NULL)
+		return;
+	kfree(session->fc_slot_table.slots);
+	session->fc_slot_table.slots = NULL;
+	return;
+}
+
+struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
+{
+	struct nfs4_session *session;
+	struct nfs4_slot_table *tbl;
+
+	session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
+	if (!session)
+		return NULL;
+	tbl = &session->fc_slot_table;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "Slot table");
+	session->clp = clp;
+	return session;
+}
+
+void nfs4_destroy_session(struct nfs4_session *session)
+{
+	nfs4_destroy_slot_table(session);
+	kfree(session);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
 	.owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
 	.state_flag_bit	= NFS_STATE_RECLAIM_REBOOT,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e9a51fe46aa..b47c0fc55d4 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,9 +4,12 @@
 #include <linux/list.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
+#include <linux/nfs_xdr.h>
+#include <linux/sunrpc/xprt.h>
 
 #include <asm/atomic.h>
 
+struct nfs4_session;
 struct nfs_iostats;
 struct nlm_host;
 
@@ -66,6 +69,10 @@ struct nfs_client {
 	unsigned char		cl_id_uniquifier;
 #endif /* CONFIG_NFS_V4 */
 
+#ifdef CONFIG_NFS_V4_1
+	struct nfs4_session	*cl_session; 	/* sharred session */
+#endif /* CONFIG_NFS_V4_1 */
+
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;	/* client index cache cookie */
 #endif
@@ -146,4 +153,46 @@ struct nfs_server {
 #define NFS_CAP_ACLS		(1U << 3)
 #define NFS_CAP_ATOMIC_OPEN	(1U << 4)
 
+
+/* maximum number of slots to use */
+#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
+
+#if defined(CONFIG_NFS_V4_1)
+
+/* Sessions */
+#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
+struct nfs4_slot_table {
+	struct nfs4_slot *slots;		/* seqid per slot */
+	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
+	spinlock_t	slot_tbl_lock;
+	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
+	int		max_slots;		/* # slots in table */
+	int		highest_used_slotid;	/* sent to server on each SEQ.
+						 * op for dynamic resizing */
+};
+
+static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
+{
+	return sp - tbl->slots;
+}
+
+/*
+ * Session related parameters
+ */
+struct nfs4_session {
+	struct nfs4_sessionid		sess_id;
+	u32				flags;
+	unsigned long			session_state;
+	u32				hash_alg;
+	u32				ssv_len;
+
+	/* The fore and back channel */
+	struct nfs4_channel_attrs	fc_attrs;
+	struct nfs4_slot_table		fc_slot_table;
+	struct nfs4_channel_attrs	bc_attrs;
+					/* back channel has one slot */
+	struct nfs_client		*clp;
+};
+
+#endif /* CONFIG_NFS_V4_1 */
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f2c5700c7b6..8f8c026c558 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -146,6 +146,21 @@ struct nfs4_change_info {
 
 struct nfs_seqid;
 
+/* nfs41 sessions channel attributes */
+struct nfs4_channel_attrs {
+	u32			headerpadsz;
+	u32			max_rqst_sz;
+	u32			max_resp_sz;
+	u32			max_resp_sz_cached;
+	u32			max_ops;
+	u32			max_reqs;
+};
+
+/* nfs41 sessions slot seqid */
+struct nfs4_slot {
+	u32		 	seq_nr;
+};
+
 struct nfs4_sequence_args {
 	/* stub */
 };
-- 
cgit v1.2.3-70-g09d2


From 43652ad55342d9146d8035932101a5814b22315a Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:54 -0400
Subject: nfs41: use nfs4_server_caps_arg

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[define nfs4_server_caps_arg]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 5 ++++-
 fs/nfs/nfs4xdr.c        | 5 +++--
 include/linux/nfs_xdr.h | 5 +++++
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cdd8e74c47d..5ef1022b7e6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1584,10 +1584,13 @@ void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
+	struct nfs4_server_caps_arg args = {
+		.fhandle = fhandle,
+	};
 	struct nfs4_server_caps_res res = {};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
-		.rpc_argp = fhandle,
+		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
 	int status;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1690f0e44b9..91305861037 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1900,7 +1900,8 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
 /*
  * GETATTR_BITMAP request
  */
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struct nfs_fh *fhandle)
+static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
+				    struct nfs4_server_caps_arg *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1909,7 +1910,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struc
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
-	encode_putfh(&xdr, fhandle, &hdr);
+	encode_putfh(&xdr, args->fhandle, &hdr);
 	encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
 			   FATTR4_WORD0_LINK_SUPPORT|
 			   FATTR4_WORD0_SYMLINK_SUPPORT|
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8f8c026c558..a7b7f2a059c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -791,6 +791,11 @@ struct nfs4_statfs_arg {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs4_server_caps_arg {
+	struct nfs_fh		       *fhandle;
+	struct nfs4_sequence_args	seq_args;
+};
+
 struct nfs4_server_caps_res {
 	u32				attr_bitmask[2];
 	u32				acl_bitmask;
-- 
cgit v1.2.3-70-g09d2


From f50c7000817e7cb4e676ac5d911a82c0f3fd226f Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:55 -0400
Subject: nfs41: use nfs4_readlink_res

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[define nfs4_readlink_res]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 3 ++-
 fs/nfs/nfs4xdr.c        | 3 ++-
 include/linux/nfs_xdr.h | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5ef1022b7e6..b0ec8ff96eb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1960,10 +1960,11 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
 		.pglen    = pglen,
 		.pages    = &page,
 	};
+	struct nfs4_readlink_res res;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
 		.rpc_argp = &args,
-		.rpc_resp = NULL,
+		.rpc_resp = &res,
 	};
 
 	return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 91305861037..1e41420916a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4235,7 +4235,8 @@ out:
 /*
  * Decode READLINK response
  */
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res)
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
+				 struct nfs4_readlink_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a7b7f2a059c..f71260aeb80 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -754,6 +754,10 @@ struct nfs4_readlink {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs4_readlink_res {
+	struct nfs4_sequence_res	seq_res;
+};
+
 struct nfs4_rename_arg {
 	const struct nfs_fh *		old_dir;
 	const struct nfs_fh *		new_dir;
-- 
cgit v1.2.3-70-g09d2


From 24ad148a0ff74b1e703a8bc5b3e0793dc7d4e3a9 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:56 -0400
Subject: nfs41: use nfs4_statfs_res

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[define nfs4_statfs_res]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 5 ++++-
 fs/nfs/nfs4xdr.c        | 5 +++--
 include/linux/nfs_xdr.h | 5 +++++
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b0ec8ff96eb..b715f605761 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2426,10 +2426,13 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		.fh = fhandle,
 		.bitmask = server->attr_bitmask,
 	};
+	struct nfs4_statfs_res res = {
+		.fsstat = fsstat,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
 		.rpc_argp = &args,
-		.rpc_resp = fsstat,
+		.rpc_resp = &res,
 	};
 
 	nfs_fattr_init(fsstat->fattr);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1e41420916a..b7871ad82aa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4387,7 +4387,8 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pat
 /*
  * STATFS request
  */
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *fsstat)
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
+			       struct nfs4_statfs_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4398,7 +4399,7 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fssta
 	if (!status)
 		status = decode_putfh(&xdr);
 	if (!status)
-		status = decode_statfs(&xdr, fsstat);
+		status = decode_statfs(&xdr, res->fsstat);
 	return status;
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f71260aeb80..4dac59ef6f4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -795,6 +795,11 @@ struct nfs4_statfs_arg {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs4_statfs_res {
+	struct nfs_fsstat	       *fsstat;
+	struct nfs4_sequence_res	seq_res;
+};
+
 struct nfs4_server_caps_arg {
 	struct nfs_fh		       *fhandle;
 	struct nfs4_sequence_args	seq_args;
-- 
cgit v1.2.3-70-g09d2


From 3dda5e434721f942870ee30bc6103761618d410f Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:57 -0400
Subject: nfs41: use nfs4_fsinfo_res

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[define nfs4_fsinfo_res]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 5 ++++-
 fs/nfs/nfs4xdr.c        | 5 +++--
 include/linux/nfs_xdr.h | 5 +++++
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b715f605761..b8915ef533d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2458,10 +2458,13 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		.fh = fhandle,
 		.bitmask = server->attr_bitmask,
 	};
+	struct nfs4_fsinfo_res res = {
+		.fsinfo = fsinfo,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO],
 		.rpc_argp = &args,
-		.rpc_resp = fsinfo,
+		.rpc_resp = &res,
 	};
 
 	return rpc_call_sync(server->client, &msg, 0);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b7871ad82aa..d9ab8209c28 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4351,7 +4351,8 @@ out:
 /*
  * FSINFO request
  */
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
+			       struct nfs4_fsinfo_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4362,7 +4363,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
 	if (!status)
 		status = decode_putfh(&xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, fsinfo);
+		status = decode_fsinfo(&xdr, res->fsinfo);
 	return status;
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4dac59ef6f4..7d64913cbb1 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -675,6 +675,11 @@ struct nfs4_fsinfo_arg {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs4_fsinfo_res {
+	struct nfs_fsinfo	       *fsinfo;
+	struct nfs4_sequence_res	seq_res;
+};
+
 struct nfs4_getattr_arg {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-- 
cgit v1.2.3-70-g09d2


From d45b2989a7956ae9e71d584ceac942278c0371c7 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:58 -0400
Subject: nfs41: use nfs4_pathconf_res

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[define nfs4_pathconf_res]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 5 ++++-
 fs/nfs/nfs4xdr.c        | 5 +++--
 include/linux/nfs_xdr.h | 5 +++++
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b8915ef533d..aea2e83d393 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2496,10 +2496,13 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 		.fh = fhandle,
 		.bitmask = server->attr_bitmask,
 	};
+	struct nfs4_pathconf_res res = {
+		.pathconf = pathconf,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
 		.rpc_argp = &args,
-		.rpc_resp = pathconf,
+		.rpc_resp = &res,
 	};
 
 	/* None of the pathconf attributes are mandatory to implement */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d9ab8209c28..a77ee3dd0b3 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4370,7 +4370,8 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
 /*
  * PATHCONF request
  */
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *pathconf)
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
+				 struct nfs4_pathconf_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4381,7 +4382,7 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pat
 	if (!status)
 		status = decode_putfh(&xdr);
 	if (!status)
-		status = decode_pathconf(&xdr, pathconf);
+		status = decode_pathconf(&xdr, res->pathconf);
 	return status;
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7d64913cbb1..56523319e14 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -734,6 +734,11 @@ struct nfs4_pathconf_arg {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs4_pathconf_res {
+	struct nfs_pathconf	       *pathconf;
+	struct nfs4_sequence_res	seq_res;
+};
+
 struct nfs4_readdir_arg {
 	const struct nfs_fh *		fh;
 	u64				cookie;
-- 
cgit v1.2.3-70-g09d2


From 663c79b3cd8f5fe21fe7d7565fec0072e3234ddc Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:21:59 -0400
Subject: nfs41: use nfs4_getaclres

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: embed resp_len in nfs_getaclres]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 20 +++++++++++---------
 fs/nfs/nfs4xdr.c        |  5 +++--
 include/linux/nfs_xdr.h |  5 +++++
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index aea2e83d393..20c9acf689f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2755,12 +2755,14 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		.acl_pages = pages,
 		.acl_len = buflen,
 	};
-	size_t resp_len = buflen;
+	struct nfs_getaclres res = {
+		.acl_len = buflen,
+	};
 	void *resp_buf;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
 		.rpc_argp = &args,
-		.rpc_resp = &resp_len,
+		.rpc_resp = &res,
 	};
 	struct page *localpage = NULL;
 	int ret;
@@ -2774,7 +2776,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 			return -ENOMEM;
 		args.acl_pages[0] = localpage;
 		args.acl_pgbase = 0;
-		resp_len = args.acl_len = PAGE_SIZE;
+		args.acl_len = PAGE_SIZE;
 	} else {
 		resp_buf = buf;
 		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
@@ -2782,18 +2784,18 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (ret)
 		goto out_free;
-	if (resp_len > args.acl_len)
-		nfs4_write_cached_acl(inode, NULL, resp_len);
+	if (res.acl_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, res.acl_len);
 	else
-		nfs4_write_cached_acl(inode, resp_buf, resp_len);
+		nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
 	if (buf) {
 		ret = -ERANGE;
-		if (resp_len > buflen)
+		if (res.acl_len > buflen)
 			goto out_free;
 		if (localpage)
-			memcpy(buf, resp_buf, resp_len);
+			memcpy(buf, resp_buf, res.acl_len);
 	}
-	ret = resp_len;
+	ret = res.acl_len;
 out_free:
 	if (localpage)
 		__free_page(localpage);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a77ee3dd0b3..3e777893e2b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4018,7 +4018,8 @@ out:
  * Decode GETACL response
  */
 static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len)
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
+		    struct nfs_getaclres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4031,7 +4032,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len)
 	status = decode_putfh(&xdr);
 	if (status)
 		goto out;
-	status = decode_getacl(&xdr, rqstp, acl_len);
+	status = decode_getacl(&xdr, rqstp, &res->acl_len);
 
 out:
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 56523319e14..6e9ee284860 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -444,6 +444,11 @@ struct nfs_getaclargs {
 	struct nfs4_sequence_args 	seq_args;
 };
 
+struct nfs_getaclres {
+	size_t				acl_len;
+	struct nfs4_sequence_res	seq_res;
+};
+
 struct nfs_setattrres {
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-- 
cgit v1.2.3-70-g09d2


From 73c403a9a93743b068103c13c05ed136dc687d05 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:22:01 -0400
Subject: nfs41: use nfs4_setaclres

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[define nfs_setaclres]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 3 ++-
 fs/nfs/nfs4xdr.c        | 3 ++-
 include/linux/nfs_xdr.h | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 20c9acf689f..62bbe25d942 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2842,10 +2842,11 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		.acl_pages	= pages,
 		.acl_len	= buflen,
 	};
+	struct nfs_setaclres res;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETACL],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
+		.rpc_resp	= &res,
 	};
 	int ret;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 27dd25d9ad4..aa350d5bf20 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3996,7 +3996,8 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args
  * Decode SETACL response
  */
 static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res)
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
+		    struct nfs_setaclres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6e9ee284860..0f2dc8f4cc3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -436,6 +436,10 @@ struct nfs_setaclargs {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs_setaclres {
+	struct nfs4_sequence_res	seq_res;
+};
+
 struct nfs_getaclargs {
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-- 
cgit v1.2.3-70-g09d2


From 22958463d5dca8548e19430779f379e66fd6e4a4 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:22:02 -0400
Subject: nfs41: use nfs4_fs_locations_res

In preparation for nfs41 sequence processing.

Signed-off-by: Andy Admason <andros@netapp.com>
[find nfs4_fs_locations_res]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 5 ++++-
 fs/nfs/nfs4xdr.c        | 6 ++++--
 include/linux/nfs_xdr.h | 5 +++++
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62bbe25d942..e08edc99faa 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3722,10 +3722,13 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 		.page = page,
 		.bitmask = bitmask,
 	};
+	struct nfs4_fs_locations_res res = {
+		.fs_locations = fs_locations,
+	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
 		.rpc_argp = &args,
-		.rpc_resp = fs_locations,
+		.rpc_resp = &res,
 	};
 	int status;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index aa350d5bf20..e448e33b4d0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4504,7 +4504,8 @@ out:
 /*
  * FS_LOCATIONS request
  */
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations *res)
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
+				     struct nfs4_fs_locations_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4519,7 +4520,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
 	if ((status = decode_lookup(&xdr)) != 0)
 		goto out;
 	xdr_enter_page(&xdr, PAGE_SIZE);
-	status = decode_getfattr(&xdr, &res->fattr, res->server);
+	status = decode_getfattr(&xdr, &res->fs_locations->fattr,
+				 res->fs_locations->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0f2dc8f4cc3..d837f10c49e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -867,6 +867,11 @@ struct nfs4_fs_locations_arg {
 	struct nfs4_sequence_args	seq_args;
 };
 
+struct nfs4_fs_locations_res {
+	struct nfs4_fs_locations       *fs_locations;
+	struct nfs4_sequence_res	seq_res;
+};
+
 #endif /* CONFIG_NFS_V4 */
 
 struct nfs_page;
-- 
cgit v1.2.3-70-g09d2


From cccef3b96a4759ae0790452280c00ea505412157 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:03 -0400
Subject: nfs41: introduce nfs4_call_sync

Use nfs4_call_sync rather than rpc_call_sync to provide
for a nfs41 sessions-enabled interface for sessions manipulation.

The nfs41 rpc logic uses the rpc_call_prepare method to
recover and create the session, as well as selecting a free slot id
and the rpc_call_done to free the slot and update slot table
related metadata.

In the coming patches we'll add rpc prepare and done routines
for setting up the sequence op and processing the sequence result.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: nfs4_call_sync]
As per 11-14-08 review.
Squash into "nfs41: introduce nfs4_call_sync" and "nfs41: nfs4_setup_sequence"
Define two functions one for v4 and one for v41
add a pointer to struct nfs4_client to the correct one.
Signed-off-by: Andy Adamson <andros@netapp.com>
[added BUG() in _nfs4_call_sync_session if !CONFIG_NFS_V4_1]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: check for session not minorversion]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[group minorversion specific stuff together]
Signed-off-by: Alexandros Batsakis <Alexandros.Batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
[nfs41: fixup nfs4_clear_client_minor_version]
[introduce nfs4_init_client_minor_version() in this patch]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[cleaned-up patch: got rid of nfs_call_sync_t, dprintks, cosmetics, extra server defs]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  5 ++++
 fs/nfs/internal.h         | 12 +++++++++
 fs/nfs/nfs4proc.c         | 67 +++++++++++++++++++++++++++++++++--------------
 include/linux/nfs_fs_sb.h |  8 ++++++
 4 files changed, 73 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f1506f14852..a9828baaa44 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -194,6 +194,8 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp)
 		nfs4_destroy_session(clp->cl_session);
 		clp->cl_session = NULL;
 	}
+
+	clp->cl_call_sync = _nfs4_call_sync;
 #endif /* CONFIG_NFS_V4_1 */
 }
 
@@ -1073,6 +1075,8 @@ error:
  */
 static int nfs4_init_client_minor_version(struct nfs_client *clp)
 {
+	clp->cl_call_sync = _nfs4_call_sync;
+
 #if defined(CONFIG_NFS_V4_1)
 	if (clp->cl_minorversion) {
 		struct nfs4_session *session = NULL;
@@ -1086,6 +1090,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
 			return -ENOMEM;
 
 		clp->cl_session = session;
+		clp->cl_call_sync = _nfs4_call_sync_session;
 	}
 #endif /* CONFIG_NFS_V4_1 */
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7cef45db925..8d67c2865dc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -206,6 +206,18 @@ extern int nfs4_path_walk(struct nfs_server *server,
 			  const char *path);
 #endif
 
+/* nfs4proc.c */
+extern int _nfs4_call_sync(struct nfs_server *server,
+			   struct rpc_message *msg,
+			   struct nfs4_sequence_args *args,
+			   struct nfs4_sequence_res *res,
+			   int cache_reply);
+extern int _nfs4_call_sync_session(struct nfs_server *server,
+				   struct rpc_message *msg,
+				   struct nfs4_sequence_args *args,
+				   struct nfs4_sequence_res *res,
+				   int cache_reply);
+
 /*
  * Determine if sessions are in use.
  */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e08edc99faa..4fc5b385f61 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -271,6 +271,33 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
 	spin_unlock(&clp->cl_lock);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+
+int _nfs4_call_sync_session(struct nfs_server *server,
+			    struct rpc_message *msg,
+			    struct nfs4_sequence_args *args,
+			    struct nfs4_sequence_res *res,
+			    int cache_reply)
+{
+	/* in preparation for setting up the sequence op */
+	return rpc_call_sync(server->client, msg, 0);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+int _nfs4_call_sync(struct nfs_server *server,
+		    struct rpc_message *msg,
+		    struct nfs4_sequence_args *args,
+		    struct nfs4_sequence_res *res,
+		    int cache_reply)
+{
+	return rpc_call_sync(server->client, msg, 0);
+}
+
+#define nfs4_call_sync(server, msg, args, res, cache_reply) \
+	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
+			&(res)->seq_res, (cache_reply))
+
 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
@@ -1269,7 +1296,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	if (status == 0 && state != NULL)
 		renew_lease(server, timestamp);
 	return status;
@@ -1595,7 +1622,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 	};
 	int status;
 
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	if (status == 0) {
 		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
 		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
@@ -1609,6 +1636,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 		server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
 		server->acl_bitmask = res.acl_bitmask;
 	}
+
 	return status;
 }
 
@@ -1641,7 +1669,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp = &res,
 	};
 	nfs_fattr_init(info->fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -1731,7 +1759,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	};
 	
 	nfs_fattr_init(fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -1815,7 +1843,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
 	nfs_fattr_init(fattr);
 
 	dprintk("NFS call  lookupfh %s\n", name->name);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	dprintk("NFS reply lookupfh: %d\n", status);
 	return status;
 }
@@ -1901,7 +1929,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
 			args.access |= NFS4_ACCESS_EXECUTE;
 	}
 	nfs_fattr_init(&fattr);
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	if (!status) {
 		entry->mask = 0;
 		if (res.access & NFS4_ACCESS_READ)
@@ -1967,7 +1995,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
 		.rpc_resp = &res,
 	};
 
-	return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2061,7 +2089,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 	int			status;
 
 	nfs_fattr_init(&res.dir_attr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &res.cinfo);
 		nfs_post_op_update_inode(dir, &res.dir_attr);
@@ -2129,7 +2157,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	
 	nfs_fattr_init(res.old_fattr);
 	nfs_fattr_init(res.new_fattr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 
 	if (!status) {
 		update_changeattr(old_dir, &res.old_cinfo);
@@ -2178,7 +2206,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
 
 	nfs_fattr_init(res.fattr);
 	nfs_fattr_init(res.dir_attr);
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	if (!status) {
 		update_changeattr(dir, &res.cinfo);
 		nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2239,7 +2267,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
 
 static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
 {
-	int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
+	int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg,
+				    &data->arg, &data->res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &data->res.dir_cinfo);
 		nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2348,7 +2377,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 			(unsigned long long)cookie);
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
 	if (status == 0)
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 
@@ -2436,7 +2465,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 	};
 
 	nfs_fattr_init(fsstat->fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return  nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -2467,7 +2496,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp = &res,
 	};
 
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -2512,7 +2541,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 	}
 
 	nfs_fattr_init(pathconf->fattr);
-	return rpc_call_sync(server->client, &msg, 0);
+	return nfs4_call_sync(server, &msg, &args, &res, 0);
 }
 
 static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2781,7 +2810,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		resp_buf = buf;
 		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
 	}
-	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
 	if (ret)
 		goto out_free;
 	if (res.acl_len > args.acl_len)
@@ -2854,7 +2883,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	nfs_access_zap_cache(inode);
 	nfs_zap_acl_cache(inode);
 	return ret;
@@ -3143,7 +3172,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
 	arg.lock_owner.id = lsp->ls_id.id;
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
 	switch (status) {
 		case 0:
 			request->fl_type = F_UNLCK;
@@ -3736,7 +3765,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 	nfs_fattr_init(&fs_locations->fattr);
 	fs_locations->server = server;
 	fs_locations->nlocations = 0;
-	status = rpc_call_sync(server->client, &msg, 0);
+	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	nfs_fixup_referral_attributes(&fs_locations->fattr);
 	dprintk("%s: returned status = %d\n", __func__, status);
 	return status;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b47c0fc55d4..206485e5082 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -12,6 +12,9 @@
 struct nfs4_session;
 struct nfs_iostats;
 struct nlm_host;
+struct nfs4_sequence_args;
+struct nfs4_sequence_res;
+struct nfs_server;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -67,6 +70,11 @@ struct nfs_client {
 	 */
 	char			cl_ipaddr[48];
 	unsigned char		cl_id_uniquifier;
+	int		     (* cl_call_sync)(struct nfs_server *server,
+					      struct rpc_message *msg,
+					      struct nfs4_sequence_args *args,
+					      struct nfs4_sequence_res *res,
+					      int cache_reply);
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_V4_1
-- 
cgit v1.2.3-70-g09d2


From f3752975caa716709c5ea0b0820b86111d921df4 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:22:04 -0400
Subject: nfs41: nfs41: pass *session in seq_args and seq_res

To be used for getting the rpc's minorversion and for nfs41 xdr
{en,de}coding of the sequence operation.
Reset the seq session ptrs for minorversion=0 rpc calls.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 1 +
 include/linux/nfs_xdr.h | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4fc5b385f61..49cf969417c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -291,6 +291,7 @@ int _nfs4_call_sync(struct nfs_server *server,
 		    struct nfs4_sequence_res *res,
 		    int cache_reply)
 {
+	args->sa_session = res->sr_session = NULL;
 	return rpc_call_sync(server->client, msg, 0);
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d837f10c49e..f5675063f95 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -162,11 +162,11 @@ struct nfs4_slot {
 };
 
 struct nfs4_sequence_args {
-	/* stub */
+	struct nfs4_session	*sa_session;
 };
 
 struct nfs4_sequence_res {
-	/* stub */
+	struct nfs4_session	*sr_session;
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 5f7dbd5c752d88310d8fe1feedefd5c6496eff48 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:05 -0400
Subject: nfs41: set up seq_res.sr_slotid

Initialize nfs4_sequence_res sr_slotid to NFS4_MAX_SLOT_TABLE.

[was nfs41: sequence res use slotid]
Signed-off-by: Andy Adamson <andros@netapp.com>
[pulled definition of struct nfs4_sequence_res.sr_slotid to here]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 5 +++++
 fs/nfs/read.c           | 1 +
 fs/nfs/unlink.c         | 1 +
 fs/nfs/write.c          | 2 ++
 include/linux/nfs_xdr.h | 1 +
 5 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 49cf969417c..5878930b4c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -371,6 +371,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+	p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	if (flags & O_EXCL) {
 		u32 *s = (u32 *) p->o_arg.u.verifier.data;
 		s[0] = jiffies;
@@ -1463,6 +1464,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
+	calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	calldata->path.mnt = mntget(path->mnt);
 	calldata->path.dentry = dget(path->dentry);
 
@@ -3088,6 +3090,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	memcpy(&data->stateid, stateid, sizeof(data->stateid));
 	data->res.fattr = &data->fattr;
 	data->res.server = server;
+	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
@@ -3240,6 +3243,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
 	p->arg.fl = &p->fl;
 	p->arg.seqid = seqid;
 	p->res.seqid = seqid;
+	p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	p->arg.stateid = &lsp->ls_stateid;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3412,6 +3416,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->res.lock_seqid = p->arg.lock_seqid;
+	p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4ace3c50a8e..70ba2b4cb9a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,6 +46,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
+		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index ecc29534777..83d0ce2600a 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -241,6 +241,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 		status = PTR_ERR(data->cred);
 		goto out_free;
 	}
+	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 
 	status = -EBUSY;
 	spin_lock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e560a78995a..035e6fb9f57 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -52,6 +52,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 	}
 	return p;
 }
@@ -71,6 +72,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
+		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 		if (pagecount <= ARRAY_SIZE(p->page_array))
 			p->pagevec = p->page_array;
 		else {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f5675063f95..db0d1236aae 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -167,6 +167,7 @@ struct nfs4_sequence_args {
 
 struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
+	u8			sr_slotid;	/* slot used to send request */
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From fbcd4abcb3841f85578985c09c6df85aa41b0ae8 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:15 -0400
Subject: nfs41: setup_sequence method

Allocate a slot in the session slot table and set the sequence op arguments.

Called at the rpc prepare stage.

Add a status to nfs41_sequence_res, initialize it to one so that we catch
rpc level failures which do not go through decode_sequence which sets
the new status field.

Note that upon an rpc level failure, we don't know if the server processed the
sequence operation or not. Proceed as if the server did process the sequence
operation.

Signed-off-by: Rahul Iyer <iyer@netapp.com>
[nfs41: sequence args use slotid]
[nfs41: find slot return slotid]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: remove SEQ4_STATUS_USE_TK_STATUS]
As per 11-14-08 review
[move extern declaration from nfs41: sequence setup/done support]
[removed sa_session definition, changed sa_cache_this into a u8 to reduce footprint]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: rpc_sleep_on slot_tbl_waitq must be called under slot_tbl_lock]
    Otherwise there's a race (we've hit) with nfs4_free_slot where
    nfs41_setup_sequence sees a full slot table, unlocks slot_tbl_lock,
    nfs4_free_slots happen concurrently and call rpc_wake_up_next
    where there's nobody to wake up yet, context goes back to
    nfs41_setup_sequence which goes to sleep when the slot table
    is actually empty now and there's no-one to wake it up anymore.
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        | 10 ++++++++++
 fs/nfs/nfs4proc.c       | 34 ++++++++++++++++++++++++++++++++--
 include/linux/nfs_xdr.h |  4 ++++
 3 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index acac6f8c3d3..eccf4e93e7d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -203,8 +203,18 @@ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops;
 #if defined(CONFIG_NFS_V4_1)
+extern int nfs4_setup_sequence(struct nfs_client *clp,
+		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+		int cache_reply, struct rpc_task *task);
 extern void nfs4_destroy_session(struct nfs4_session *session);
 extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
+#else /* CONFIG_NFS_v4_1 */
+static inline int nfs4_setup_sequence(struct nfs_client *clp,
+		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+		int cache_reply, struct rpc_task *task)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 extern const u32 nfs4_fattr_bitmap[2];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a0946a0d116..c9618080317 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -280,6 +280,8 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
  * If found, we mark the slot as used, update the highest_used_slotid,
  * and respectively set up the sequence operation args.
  * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise.
+ *
+ * Note: must be called with under the slot_tbl_lock.
  */
 static u8
 nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task)
@@ -288,7 +290,6 @@ nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task)
 	u8 ret_id = NFS4_MAX_SLOT_TABLE;
 	BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE);
 
-	spin_lock(&tbl->slot_tbl_lock);
 	dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
 		tbl->max_slots);
@@ -302,7 +303,6 @@ nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task)
 out:
 	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id);
-	spin_unlock(&tbl->slot_tbl_lock);
 	return ret_id;
 }
 
@@ -312,12 +312,42 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 				int cache_reply,
 				struct rpc_task *task)
 {
+	struct nfs4_slot *slot;
+	struct nfs4_slot_table *tbl;
+	u8 slotid;
+
+	dprintk("--> %s\n", __func__);
 	/* slot already allocated? */
 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
 		return 0;
 
 	memset(res, 0, sizeof(*res));
 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+	tbl = &session->fc_slot_table;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	slotid = nfs4_find_slot(tbl, task);
+	if (slotid == NFS4_MAX_SLOT_TABLE) {
+		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+		spin_unlock(&tbl->slot_tbl_lock);
+		dprintk("<-- %s: no free slots\n", __func__);
+		return -EAGAIN;
+	}
+	spin_unlock(&tbl->slot_tbl_lock);
+
+	slot = tbl->slots + slotid;
+	args->sa_slotid = slotid;
+	args->sa_cache_this = cache_reply;
+
+	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
+
+	res->sr_slotid = slotid;
+	res->sr_renewal_time = jiffies;
+	/*
+	 * sr_status is only set in decode_sequence, and so will remain
+	 * set to 1 if an rpc level failure occurs.
+	 */
+	res->sr_status = 1;
 	return 0;
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index db0d1236aae..4ac14b40efc 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -163,11 +163,15 @@ struct nfs4_slot {
 
 struct nfs4_sequence_args {
 	struct nfs4_session	*sa_session;
+	u8			sa_slotid;
+	u8			sa_cache_this;
 };
 
 struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
 	u8			sr_slotid;	/* slot used to send request */
+	unsigned long		sr_renewal_time;
+	int			sr_status;	/* sequence operation status */
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 99fe60d062cfecf382c036065b3278b82b6c5eff Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:22:29 -0400
Subject: nfs41: exchange_id operation

Implement the exchange_id operation conforming to
http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26

Unlike NFSv4.0, NFSv4.1 requires machine credentials. RPC_AUTH_GSS machine
credentials will be passed into the kernel at mount time to be available for
the exchange_id operation.

RPC_AUTH_UNIX root mounts can use the UNIX root credential. Store the root
credential in the nfs_client struct.

Without a credential, NFSv4.1 state renewal fails.

[nfs41: establish clientid via exchange id only if cred != NULL]
Signed-off-by: Andy Adamson<andros@umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: move nfstime4 from under CONFIG_NFS_V4_1]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: do not wait a lease time in exchange id]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: pass *session in seq_args and seq_res]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
[nfs41: Ignoring impid in decode_exchange_id is missing a READ_BUF]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: fix Xcode_exchange_id's xdr Xcoding pointer type]
[nfs41: get rid of unused struct nfs41_exchange_id_res members]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/nfs4proc.c          |  61 ++++++++++++++++++++
 fs/nfs/nfs4xdr.c           | 139 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h       |   1 +
 include/linux/nfs_fs_sb.h  |   6 ++
 include/linux/nfs_xdr.h    |  40 +++++++++++++
 include/linux/nfsd/state.h |   1 -
 6 files changed, 247 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dc0feb5837b..6f384e29075 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -48,6 +48,7 @@
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/module.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -433,11 +434,13 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	spin_unlock(&tbl->slot_tbl_lock);
 
 	slot = tbl->slots + slotid;
+	args->sa_session = session;
 	args->sa_slotid = slotid;
 	args->sa_cache_this = cache_reply;
 
 	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
+	res->sr_session = session;
 	res->sr_slotid = slotid;
 	res->sr_renewal_time = jiffies;
 	/*
@@ -4128,6 +4131,64 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
 }
 
 #ifdef CONFIG_NFS_V4_1
+/*
+ * nfs4_proc_exchange_id()
+ *
+ * Since the clientid has expired, all compounds using sessions
+ * associated with the stale clientid will be returning
+ * NFS4ERR_BADSESSION in the sequence operation, and will therefore
+ * be in some phase of session reset.
+ */
+static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+{
+	nfs4_verifier verifier;
+	struct nfs41_exchange_id_args args = {
+		.client = clp,
+		.flags = clp->cl_exchange_flags,
+	};
+	struct nfs41_exchange_id_res res = {
+		.client = clp,
+	};
+	int status;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+		.rpc_cred = cred,
+	};
+	__be32 *p;
+
+	dprintk("--> %s\n", __func__);
+	BUG_ON(clp == NULL);
+	p = (u32 *)verifier.data;
+	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
+	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
+	args.verifier = &verifier;
+
+	while (1) {
+		args.id_len = scnprintf(args.id, sizeof(args.id),
+					"%s/%s %u",
+					clp->cl_ipaddr,
+					rpc_peeraddr2str(clp->cl_rpcclient,
+							 RPC_DISPLAY_ADDR),
+					clp->cl_id_uniquifier);
+
+		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+
+		if (status != NFS4ERR_CLID_INUSE)
+			break;
+
+		if (signalled())
+			break;
+
+		if (++clp->cl_id_uniquifier == 0)
+			break;
+	}
+
+	dprintk("<-- %s status= %d\n", __func__, status);
+	return status;
+}
+
 /* Destroy the slot table */
 static void nfs4_destroy_slot_table(struct nfs4_session *session)
 {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5b944cd5721..783c4214dcc 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -246,6 +246,27 @@ static int nfs4_stat_to_errno(int);
 				(0)
 
 #if defined(CONFIG_NFS_V4_1)
+#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
+				encode_verifier_maxsz + \
+				1 /* co_ownerid.len */ + \
+				XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
+				1 /* flags */ + \
+				1 /* spa_how */ + \
+				0 /* SP4_NONE (for now) */ + \
+				1 /* zero implemetation id array */)
+#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \
+				2 /* eir_clientid */ + \
+				1 /* eir_sequenceid */ + \
+				1 /* eir_flags */ + \
+				1 /* spr_how */ + \
+				0 /* SP4_NONE (for now) */ + \
+				2 /* eir_server_owner.so_minor_id */ + \
+				/* eir_server_owner.so_major_id<> */ \
+				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+				/* eir_server_scope<> */ \
+				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+				1 /* eir_server_impl_id array length */ + \
+				0 /* ignored eir_server_impl_id contents */)
 #define encode_sequence_maxsz	0 /* stub */
 #define decode_sequence_maxsz	0 /* stub */
 #else /* CONFIG_NFS_V4_1 */
@@ -594,6 +615,14 @@ static int nfs4_stat_to_errno(int);
 				 decode_putfh_maxsz + \
 				 decode_lookup_maxsz + \
 				 decode_fs_locations_maxsz)
+#if defined(CONFIG_NFS_V4_1)
+#define NFS4_enc_exchange_id_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_exchange_id_maxsz)
+#define NFS4_dec_exchange_id_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_exchange_id_maxsz)
+#endif /* CONFIG_NFS_V4_1 */
 
 static const umode_t nfs_type2fmt[] = {
 	[NF4BAD] = 0,
@@ -1455,7 +1484,29 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
 	hdr->replen += decode_delegreturn_maxsz;
 }
 
+#if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
+static void encode_exchange_id(struct xdr_stream *xdr,
+			       struct nfs41_exchange_id_args *args,
+			       struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	RESERVE_SPACE(4 + sizeof(args->verifier->data));
+	WRITE32(OP_EXCHANGE_ID);
+	WRITEMEM(args->verifier->data, sizeof(args->verifier->data));
+
+	encode_string(xdr, args->id_len, args->id);
+
+	RESERVE_SPACE(12);
+	WRITE32(args->flags);
+	WRITE32(0);	/* zero length state_protect4_a */
+	WRITE32(0);	/* zero length implementation id array */
+	hdr->nops++;
+	hdr->replen += decode_exchange_id_maxsz;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static void encode_sequence(struct xdr_stream *xdr,
 			    const struct nfs4_sequence_args *args,
 			    struct compound_hdr *hdr)
@@ -2162,6 +2213,26 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
 	return 0;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * EXCHANGE_ID request
+ */
+static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
+				    struct nfs41_exchange_id_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = args->client->cl_minorversion,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_exchange_id(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * START OF "GENERIC" DECODE ROUTINES.
  *   These may look a little ugly since they are imported from a "generic"
@@ -3877,6 +3948,52 @@ static int decode_delegreturn(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_DELEGRETURN);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int decode_exchange_id(struct xdr_stream *xdr,
+			      struct nfs41_exchange_id_res *res)
+{
+	__be32 *p;
+	uint32_t dummy;
+	int status;
+	struct nfs_client *clp = res->client;
+
+	status = decode_op_hdr(xdr, OP_EXCHANGE_ID);
+	if (status)
+		return status;
+
+	READ_BUF(8);
+	READ64(clp->cl_ex_clid);
+	READ_BUF(12);
+	READ32(clp->cl_seqid);
+	READ32(clp->cl_exchange_flags);
+
+	/* We ask for SP4_NONE */
+	READ32(dummy);
+	if (dummy != SP4_NONE)
+		return -EIO;
+
+	/* Throw away minor_id */
+	READ_BUF(8);
+
+	/* Throw away Major id */
+	READ_BUF(4);
+	READ32(dummy);
+	READ_BUF(dummy);
+
+	/* Throw away server_scope */
+	READ_BUF(4);
+	READ32(dummy);
+	READ_BUF(dummy);
+
+	/* Throw away Implementation id array */
+	READ_BUF(4);
+	READ32(dummy);
+	READ_BUF(dummy);
+
+	return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 static int decode_sequence(struct xdr_stream *xdr,
 			   struct nfs4_sequence_res *res,
 			   struct rpc_rqst *rqstp)
@@ -4774,6 +4891,25 @@ out:
 	return status;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * EXCHANGE_ID request
+ */
+static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
+				    void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_exchange_id(&xdr, res);
+	return status;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	uint32_t bitmap[2] = {0};
@@ -4943,6 +5079,9 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(GETACL,		enc_getacl,	dec_getacl),
   PROC(SETACL,		enc_setacl,	dec_setacl),
   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
+#if defined(CONFIG_NFS_V4_1)
+  PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+#endif /* CONFIG_NFS_V4_1 */
 };
 
 struct rpc_version		nfs_version4 = {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 7c36fcf2dfb..ad65709ed8d 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -21,6 +21,7 @@
 #define NFS4_FHSIZE		128
 #define NFS4_MAXPATHLEN		PATH_MAX
 #define NFS4_MAXNAMLEN		NAME_MAX
+#define NFS4_OPAQUE_LIMIT	1024
 #define NFS4_MAX_SESSIONID_LEN	16
 
 #define NFS4_ACCESS_READ        0x0001
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 206485e5082..435ed556efb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -78,6 +78,12 @@ struct nfs_client {
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_V4_1
+	/* clientid returned from EXCHANGE_ID, used by session operations */
+	u64			cl_ex_clid;
+	/* The sequence id to use for the next CREATE_SESSION */
+	u32			cl_seqid;
+	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+	u32			cl_exchange_flags;
 	struct nfs4_session	*cl_session; 	/* sharred session */
 #endif /* CONFIG_NFS_V4_1 */
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4ac14b40efc..5d70b924af5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -879,6 +879,46 @@ struct nfs4_fs_locations_res {
 
 #endif /* CONFIG_NFS_V4 */
 
+struct nfstime4 {
+	u64	seconds;
+	u32	nseconds;
+};
+
+#ifdef CONFIG_NFS_V4_1
+struct nfs_impl_id4 {
+	u32		domain_len;
+	char		*domain;
+	u32		name_len;
+	char		*name;
+	struct nfstime4	date;
+};
+
+#define NFS4_EXCHANGE_ID_LEN	(48)
+struct nfs41_exchange_id_args {
+	struct nfs_client		*client;
+	nfs4_verifier			*verifier;
+	unsigned int 			id_len;
+	char 				id[NFS4_EXCHANGE_ID_LEN];
+	u32				flags;
+};
+
+struct server_owner {
+	uint64_t			minor_id;
+	uint32_t			major_id_sz;
+	char				major_id[NFS4_OPAQUE_LIMIT];
+};
+
+struct server_scope {
+	uint32_t			server_scope_sz;
+	char 				server_scope[NFS4_OPAQUE_LIMIT];
+};
+
+struct nfs41_exchange_id_res {
+	struct nfs_client		*client;
+	u32				flags;
+};
+#endif /* CONFIG_NFS_V4_1 */
+
 struct nfs_page;
 
 #define NFS_PAGEVEC_SIZE	(8U)
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 4d61c873fee..7ef4b7ad121 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -41,7 +41,6 @@
 #include <linux/kref.h>
 #include <linux/sunrpc/clnt.h>
 
-#define NFS4_OPAQUE_LIMIT 1024
 typedef struct {
 	u32             cl_boot;
 	u32             cl_id;
-- 
cgit v1.2.3-70-g09d2


From 2050f0cc0703aab7cee798b3cb47037754f368bc Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:30 -0400
Subject: nfs41: get_lease_time

get_lease_time uses the FSINFO rpc operation to
get the lease time attribute.

nfs4_get_lease_time() is only called from the state manager on session setup
so don't recover from clientid or sequence level errors.

We do need to recover from NFS4ERR_DELAY or NFS4ERR_GRACE.
Use NFS4_POLL_RETRY_MIN - the Linux server returns NFS4ERR_DELAY when an
upcall is needed to resolve an uncached export referenced by a file handle.

[nfs41: sequence res use slotid]
Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: remove extraneous rpc_clnt pointer]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: have get_lease_time work on nfs_client]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: get_lease_time recover from NFS4ERR_DELAY]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: pass *session in seq_args and seq_res]
[define nfs4_get_lease_time_{args,res}]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 94 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        | 51 +++++++++++++++++++++++++++
 include/linux/nfs_xdr.h |  9 +++++
 3 files changed, 154 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6f384e29075..eafc99afd35 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4189,6 +4189,100 @@ static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
+struct nfs4_get_lease_time_data {
+	struct nfs4_get_lease_time_args *args;
+	struct nfs4_get_lease_time_res *res;
+	struct nfs_client *clp;
+};
+
+static void nfs4_get_lease_time_prepare(struct rpc_task *task,
+					void *calldata)
+{
+	int ret;
+	struct nfs4_get_lease_time_data *data =
+			(struct nfs4_get_lease_time_data *)calldata;
+
+	dprintk("--> %s\n", __func__);
+	/* just setup sequence, do not trigger session recovery
+	   since we're invoked within one */
+	ret = nfs41_setup_sequence(data->clp->cl_session,
+					&data->args->la_seq_args,
+					&data->res->lr_seq_res, 0, task);
+
+	BUG_ON(ret == -EAGAIN);
+	rpc_call_start(task);
+	dprintk("<-- %s\n", __func__);
+}
+
+/*
+ * Called from nfs4_state_manager thread for session setup, so don't recover
+ * from sequence operation or clientid errors.
+ */
+static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_get_lease_time_data *data =
+			(struct nfs4_get_lease_time_data *)calldata;
+
+	dprintk("--> %s\n", __func__);
+	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+	case -NFS4ERR_GRACE:
+		dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
+		rpc_delay(task, NFS4_POLL_RETRY_MIN);
+		task->tk_status = 0;
+		rpc_restart_call(task);
+		return;
+	}
+	nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res);
+	dprintk("<-- %s\n", __func__);
+}
+
+struct rpc_call_ops nfs4_get_lease_time_ops = {
+	.rpc_call_prepare = nfs4_get_lease_time_prepare,
+	.rpc_call_done = nfs4_get_lease_time_done,
+};
+
+int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
+{
+	struct rpc_task *task;
+	struct nfs4_get_lease_time_args args;
+	struct nfs4_get_lease_time_res res = {
+		.lr_fsinfo = fsinfo,
+	};
+	struct nfs4_get_lease_time_data data = {
+		.args = &args,
+		.res = &res,
+		.clp = clp,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_LEASE_TIME],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	struct rpc_task_setup task_setup = {
+		.rpc_client = clp->cl_rpcclient,
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_get_lease_time_ops,
+		.callback_data = &data
+	};
+	int status;
+
+	res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
+	dprintk("--> %s\n", __func__);
+	task = rpc_run_task(&task_setup);
+
+	if (IS_ERR(task))
+		status = PTR_ERR(task);
+	else {
+		status = task->tk_status;
+		rpc_put_task(task);
+	}
+	dprintk("<-- %s return %d\n", __func__, status);
+
+	return status;
+}
+
 /* Destroy the slot table */
 static void nfs4_destroy_slot_table(struct nfs4_session *session)
 {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 783c4214dcc..85ee1d17a46 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -622,6 +622,14 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_dec_exchange_id_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_exchange_id_maxsz)
+#define NFS4_enc_get_lease_time_sz	(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_putrootfh_maxsz + \
+					 encode_fsinfo_maxsz)
+#define NFS4_dec_get_lease_time_sz	(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_putrootfh_maxsz + \
+					 decode_fsinfo_maxsz)
 #endif /* CONFIG_NFS_V4_1 */
 
 static const umode_t nfs_type2fmt[] = {
@@ -2231,6 +2239,27 @@ static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
 	encode_nops(&hdr);
 	return 0;
 }
+
+/*
+ * a GET_LEASE_TIME request
+ */
+static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
+				       struct nfs4_get_lease_time_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+	};
+	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_sequence(&xdr, &args->la_seq_args, &hdr);
+	encode_putrootfh(&xdr, &hdr);
+	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -4908,6 +4937,27 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
 		status = decode_exchange_id(&xdr, res);
 	return status;
 }
+
+/*
+ * a GET_LEASE_TIME request
+ */
+static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
+				       struct nfs4_get_lease_time_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
+	if (!status)
+		status = decode_putrootfh(&xdr);
+	if (!status)
+		status = decode_fsinfo(&xdr, res->lr_fsinfo);
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
@@ -5081,6 +5131,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
 #if defined(CONFIG_NFS_V4_1)
   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+  PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5d70b924af5..ca643aa87d4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -174,6 +174,15 @@ struct nfs4_sequence_res {
 	int			sr_status;	/* sequence operation status */
 };
 
+struct nfs4_get_lease_time_args {
+	struct nfs4_sequence_args	la_seq_args;
+};
+
+struct nfs4_get_lease_time_res {
+	struct nfs_fsinfo	       *lr_fsinfo;
+	struct nfs4_sequence_res	lr_seq_res;
+};
+
 /*
  * Arguments to the open call.
  */
-- 
cgit v1.2.3-70-g09d2


From fc931582c260e53ca5ca23bd70ccc9b2265cca9f Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:31 -0400
Subject: nfs41: create_session operation

Implement the create_session operation conforming to
http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26

Set the real fore channel max operations to preserve server resources.
Note: If the server returns < NFS4_MAX_OPS, the client will very soon
get an NFS4ERR_TOO_MANY_OPS. A later patch will handle this.

Set the max_rqst_sz and max_resp_sz to PAGE_SIZE - we preallocate the buffers.

Set the back channel max_resp_sz_cached to zero to force the client to
always set csa_cachethis to FALSE because the current implementation
of the back channel DRC only supports caching the CB_SEQUENCE operation.

The client back channel server supports one slot, and desires 2 operations
per compound.

Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>
Signed-off-by: Andy Adamson<andros@umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: remove extraneous rpc_clnt pointer]
Use the struct nfs_client cl_rpcclient.
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: nfs4_init_channel_attrs, just use nfs41_create_session_args]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: use rsize and wsize for session channel attributes]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: set channel max operations]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: set back channel attributes]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: obliterate nfs4_adjust_channel_attrs]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: have create_session work on nfs_client]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: move CONFIG_NFS_V4_1 endif]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: pass *session in seq_args and seq_res]
[moved nfs4_init_slot_table definition here]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: use kcalloc to allocate slot table]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
[nfs41: fix Xcode_create_session's xdr Xcoding pointer type]
[nfs41: refactor decoding of channel attributes]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/nfs4proc.c       | 172 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4xdr.c        | 185 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs4.h    |  10 +++
 include/linux/nfs_xdr.h |  12 ++++
 4 files changed, 379 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index eafc99afd35..7d81d6e5753 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -54,6 +54,7 @@
 #include "delegation.h"
 #include "internal.h"
 #include "iostat.h"
+#include "callback.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -4283,6 +4284,50 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	return status;
 }
 
+/*
+ * Initialize slot table
+ */
+static int nfs4_init_slot_table(struct nfs4_session *session)
+{
+	struct nfs4_slot_table *tbl = &session->fc_slot_table;
+	int i, max_slots = session->fc_attrs.max_reqs;
+	struct nfs4_slot *slot;
+	int ret = -ENOMEM;
+
+	BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
+
+	dprintk("--> %s: max_reqs=%u\n", __func__,
+		session->fc_attrs.max_reqs);
+
+	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
+	if (!slot)
+		goto out;
+	for (i = 0; i < max_slots; ++i)
+		slot[i].seq_nr = 1;
+	ret = 0;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	if (tbl->slots != NULL) {
+		spin_unlock(&tbl->slot_tbl_lock);
+		dprintk("%s: slot table already initialized. tbl=%p slots=%p\n",
+			__func__, tbl, tbl->slots);
+		WARN_ON(1);
+		goto out_free;
+	}
+	tbl->max_slots = max_slots;
+	tbl->slots = slot;
+	tbl->highest_used_slotid = -1;  /* no slot is currently used */
+	spin_unlock(&tbl->slot_tbl_lock);
+	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
+		tbl, tbl->slots, tbl->max_slots);
+out:
+	dprintk("<-- %s: return %d\n", __func__, ret);
+	return ret;
+out_free:
+	kfree(slot);
+	goto out;
+}
+
 /* Destroy the slot table */
 static void nfs4_destroy_slot_table(struct nfs4_session *session)
 {
@@ -4314,6 +4359,133 @@ void nfs4_destroy_session(struct nfs4_session *session)
 	kfree(session);
 }
 
+/*
+ * Initialize the values to be used by the client in CREATE_SESSION
+ * If nfs4_init_session set the fore channel request and response sizes,
+ * use them.
+ *
+ * Set the back channel max_resp_sz_cached to zero to force the client to
+ * always set csa_cachethis to FALSE because the current implementation
+ * of the back channel DRC only supports caching the CB_SEQUENCE operation.
+ */
+static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
+{
+	struct nfs4_session *session = args->client->cl_session;
+	unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz,
+		     mxresp_sz = session->fc_attrs.max_resp_sz;
+
+	if (mxrqst_sz == 0)
+		mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
+	if (mxresp_sz == 0)
+		mxresp_sz = NFS_MAX_FILE_IO_SIZE;
+	/* Fore channel attributes */
+	args->fc_attrs.headerpadsz = 0;
+	args->fc_attrs.max_rqst_sz = mxrqst_sz;
+	args->fc_attrs.max_resp_sz = mxresp_sz;
+	args->fc_attrs.max_resp_sz_cached = mxresp_sz;
+	args->fc_attrs.max_ops = NFS4_MAX_OPS;
+	args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
+
+	dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
+		"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
+		__func__,
+		args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz,
+		args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops,
+		args->fc_attrs.max_reqs);
+
+	/* Back channel attributes */
+	args->bc_attrs.headerpadsz = 0;
+	args->bc_attrs.max_rqst_sz = PAGE_SIZE;
+	args->bc_attrs.max_resp_sz = PAGE_SIZE;
+	args->bc_attrs.max_resp_sz_cached = 0;
+	args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
+	args->bc_attrs.max_reqs = 1;
+
+	dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
+		"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
+		__func__,
+		args->bc_attrs.max_rqst_sz, args->bc_attrs.max_resp_sz,
+		args->bc_attrs.max_resp_sz_cached, args->bc_attrs.max_ops,
+		args->bc_attrs.max_reqs);
+}
+
+static int _nfs4_proc_create_session(struct nfs_client *clp)
+{
+	struct nfs4_session *session = clp->cl_session;
+	struct nfs41_create_session_args args = {
+		.client = clp,
+		.cb_program = NFS4_CALLBACK,
+	};
+	struct nfs41_create_session_res res = {
+		.client = clp,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	int status;
+
+	nfs4_init_channel_attrs(&args);
+	args.flags = (SESSION4_PERSIST);
+
+	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0);
+
+	/* Set the negotiated values in the session's channel_attrs struct */
+
+	if (!status) {
+		/* Increment the clientid slot sequence id */
+		clp->cl_seqid++;
+	}
+
+	return status;
+}
+
+/*
+ * Issues a CREATE_SESSION operation to the server.
+ * It is the responsibility of the caller to verify the session is
+ * expired before calling this routine.
+ */
+int nfs4_proc_create_session(struct nfs_client *clp)
+{
+	int status;
+	unsigned *ptr;
+	struct nfs_fsinfo fsinfo;
+	struct nfs4_session *session = clp->cl_session;
+
+	dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
+
+	status = _nfs4_proc_create_session(clp);
+	if (status)
+		goto out;
+
+	/* Init the fore channel */
+	status = nfs4_init_slot_table(session);
+	dprintk("fore channel slot table initialization returned %d\n", status);
+	if (status)
+		goto out;
+
+	ptr = (unsigned *)&session->sess_id.data[0];
+	dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
+		clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
+
+	/* Get the lease time */
+	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+	if (status == 0) {
+		/* Update lease time and schedule renewal */
+		spin_lock(&clp->cl_lock);
+		clp->cl_lease_time = fsinfo.lease_time * HZ;
+		clp->cl_last_renewal = jiffies;
+		clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		spin_unlock(&clp->cl_lock);
+
+		nfs4_schedule_state_renewal(clp);
+	}
+out:
+	dprintk("<-- %s\n", __func__);
+	return status;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 85ee1d17a46..7a243a2cf0b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -246,6 +246,8 @@ static int nfs4_stat_to_errno(int);
 				(0)
 
 #if defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MACHINE_NAME_LEN (64)
+
 #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \
 				encode_verifier_maxsz + \
 				1 /* co_ownerid.len */ + \
@@ -267,6 +269,31 @@ static int nfs4_stat_to_errno(int);
 				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
 				1 /* eir_server_impl_id array length */ + \
 				0 /* ignored eir_server_impl_id contents */)
+#define encode_channel_attrs_maxsz  (6 + 1 /* ca_rdma_ird.len (0) */)
+#define decode_channel_attrs_maxsz  (6 + \
+				     1 /* ca_rdma_ird.len */ + \
+				     1 /* ca_rdma_ird */)
+#define encode_create_session_maxsz  (op_encode_hdr_maxsz + \
+				     2 /* csa_clientid */ + \
+				     1 /* csa_sequence */ + \
+				     1 /* csa_flags */ + \
+				     encode_channel_attrs_maxsz + \
+				     encode_channel_attrs_maxsz + \
+				     1 /* csa_cb_program */ + \
+				     1 /* csa_sec_parms.len (1) */ + \
+				     1 /* cb_secflavor (AUTH_SYS) */ + \
+				     1 /* stamp */ + \
+				     1 /* machinename.len */ + \
+				     XDR_QUADLEN(NFS4_MAX_MACHINE_NAME_LEN) + \
+				     1 /* uid */ + \
+				     1 /* gid */ + \
+				     1 /* gids.len (0) */)
+#define decode_create_session_maxsz  (op_decode_hdr_maxsz +	\
+				     XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+				     1 /* csr_sequence */ + \
+				     1 /* csr_flags */ + \
+				     decode_channel_attrs_maxsz + \
+				     decode_channel_attrs_maxsz)
 #define encode_sequence_maxsz	0 /* stub */
 #define decode_sequence_maxsz	0 /* stub */
 #else /* CONFIG_NFS_V4_1 */
@@ -622,6 +649,12 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_dec_exchange_id_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_exchange_id_maxsz)
+#define NFS4_enc_create_session_sz \
+				(compound_encode_hdr_maxsz + \
+				 encode_create_session_maxsz)
+#define NFS4_dec_create_session_sz \
+				(compound_decode_hdr_maxsz + \
+				 decode_create_session_maxsz)
 #define NFS4_enc_get_lease_time_sz	(compound_encode_hdr_maxsz + \
 					 encode_sequence_maxsz + \
 					 encode_putrootfh_maxsz + \
@@ -712,6 +745,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
 
 static void encode_nops(struct compound_hdr *hdr)
 {
+	BUG_ON(hdr->nops > NFS4_MAX_OPS);
 	*hdr->nops_p = htonl(hdr->nops);
 }
 
@@ -1513,6 +1547,68 @@ static void encode_exchange_id(struct xdr_stream *xdr,
 	hdr->nops++;
 	hdr->replen += decode_exchange_id_maxsz;
 }
+
+static void encode_create_session(struct xdr_stream *xdr,
+				  struct nfs41_create_session_args *args,
+				  struct compound_hdr *hdr)
+{
+	__be32 *p;
+	char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
+	uint32_t len;
+	struct nfs_client *clp = args->client;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_CREATE_SESSION);
+
+	RESERVE_SPACE(8);
+	WRITE64(clp->cl_ex_clid);
+
+	RESERVE_SPACE(8);
+	WRITE32(clp->cl_seqid);			/*Sequence id */
+	WRITE32(args->flags);			/*flags */
+
+	RESERVE_SPACE(2*28);			/* 2 channel_attrs */
+	/* Fore Channel */
+	WRITE32(args->fc_attrs.headerpadsz);	/* header padding size */
+	WRITE32(args->fc_attrs.max_rqst_sz);	/* max req size */
+	WRITE32(args->fc_attrs.max_resp_sz);	/* max resp size */
+	WRITE32(args->fc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
+	WRITE32(args->fc_attrs.max_ops);	/* max operations */
+	WRITE32(args->fc_attrs.max_reqs);	/* max requests */
+	WRITE32(0);				/* rdmachannel_attrs */
+
+	/* Back Channel */
+	WRITE32(args->fc_attrs.headerpadsz);	/* header padding size */
+	WRITE32(args->bc_attrs.max_rqst_sz);	/* max req size */
+	WRITE32(args->bc_attrs.max_resp_sz);	/* max resp size */
+	WRITE32(args->bc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
+	WRITE32(args->bc_attrs.max_ops);	/* max operations */
+	WRITE32(args->bc_attrs.max_reqs);	/* max requests */
+	WRITE32(0);				/* rdmachannel_attrs */
+
+	RESERVE_SPACE(4);
+	WRITE32(args->cb_program);		/* cb_program */
+
+	RESERVE_SPACE(4);			/* # of security flavors */
+	WRITE32(1);
+
+	RESERVE_SPACE(4);
+	WRITE32(RPC_AUTH_UNIX);			/* auth_sys */
+
+	/* authsys_parms rfc1831 */
+	RESERVE_SPACE(4);
+	WRITE32((u32)clp->cl_boot_time.tv_nsec);	/* stamp */
+	len = scnprintf(machine_name, sizeof(machine_name), "%s",
+			clp->cl_ipaddr);
+	RESERVE_SPACE(16 + len);
+	WRITE32(len);
+	WRITEMEM(machine_name, len);
+	WRITE32(0);				/* UID */
+	WRITE32(0);				/* GID */
+	WRITE32(0);				/* No more gids */
+	hdr->nops++;
+	hdr->replen += decode_create_session_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void encode_sequence(struct xdr_stream *xdr,
@@ -2240,6 +2336,24 @@ static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
 	return 0;
 }
 
+/*
+ * a CREATE_SESSION request
+ */
+static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
+				       struct nfs41_create_session_args *args)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr = {
+		.minorversion = args->client->cl_minorversion,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, req, &hdr);
+	encode_create_session(&xdr, args, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
+
 /*
  * a GET_LEASE_TIME request
  */
@@ -4021,6 +4135,59 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 
 	return 0;
 }
+
+static int decode_chan_attrs(struct xdr_stream *xdr,
+			     struct nfs4_channel_attrs *attrs)
+{
+	__be32 *p;
+	u32 nr_attrs;
+
+	READ_BUF(28);
+	READ32(attrs->headerpadsz);
+	READ32(attrs->max_rqst_sz);
+	READ32(attrs->max_resp_sz);
+	READ32(attrs->max_resp_sz_cached);
+	READ32(attrs->max_ops);
+	READ32(attrs->max_reqs);
+	READ32(nr_attrs);
+	if (unlikely(nr_attrs > 1)) {
+		printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
+			__func__, nr_attrs);
+		return -EINVAL;
+	}
+	if (nr_attrs == 1)
+		READ_BUF(4); /* skip rdma_attrs */
+	return 0;
+}
+
+static int decode_create_session(struct xdr_stream *xdr,
+				 struct nfs41_create_session_res *res)
+{
+	__be32 *p;
+	int status;
+	struct nfs_client *clp = res->client;
+	struct nfs4_session *session = clp->cl_session;
+
+	status = decode_op_hdr(xdr, OP_CREATE_SESSION);
+
+	if (status)
+		return status;
+
+	/* sessionid */
+	READ_BUF(NFS4_MAX_SESSIONID_LEN);
+	COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
+
+	/* seqid, flags */
+	READ_BUF(8);
+	READ32(clp->cl_seqid);
+	READ32(session->flags);
+
+	/* Channel attributes */
+	status = decode_chan_attrs(xdr, &session->fc_attrs);
+	if (!status)
+		status = decode_chan_attrs(xdr, &session->bc_attrs);
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static int decode_sequence(struct xdr_stream *xdr,
@@ -4938,6 +5105,23 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
 	return status;
 }
 
+/*
+ * a CREATE_SESSION request
+ */
+static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
+				       struct nfs41_create_session_res *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (!status)
+		status = decode_create_session(&xdr, res);
+	return status;
+}
+
 /*
  * a GET_LEASE_TIME request
  */
@@ -5131,6 +5315,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
 #if defined(CONFIG_NFS_V4_1)
   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+  PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
 #endif /* CONFIG_NFS_V4_1 */
 };
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index ad65709ed8d..bd2eba53066 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -131,6 +131,16 @@
 
 #define NFS4_MAX_UINT64	(~(u64)0)
 
+/* An NFS4 sessions server must support at least NFS4_MAX_OPS operations.
+ * If a compound requires more operations, adjust NFS4_MAX_OPS accordingly.
+ */
+#define NFS4_MAX_OPS   8
+
+/* Our NFS4 client back channel server only wants the cb_sequene and the
+ * actual operation per compound
+ */
+#define NFS4_MAX_BACK_CHANNEL_OPS 2
+
 enum nfs4_acl_whotype {
 	NFS4_ACL_WHO_NAMED = 0,
 	NFS4_ACL_WHO_OWNER,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ca643aa87d4..62f63fb0c4c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -926,6 +926,18 @@ struct nfs41_exchange_id_res {
 	struct nfs_client		*client;
 	u32				flags;
 };
+
+struct nfs41_create_session_args {
+	struct nfs_client	       *client;
+	uint32_t			flags;
+	uint32_t			cb_program;
+	struct nfs4_channel_attrs	fc_attrs;	/* Fore Channel */
+	struct nfs4_channel_attrs	bc_attrs;	/* Back Channel */
+};
+
+struct nfs41_create_session_res {
+	struct nfs_client	       *client;
+};
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
-- 
cgit v1.2.3-70-g09d2


From 76db6d9500caeaa774a3e32a997eba30bbdc176b Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:38 -0400
Subject: nfs41: add session setup to the state manager

At mount, nfs_alloc_client sets the cl_state NFS4CLNT_LEASE_EXPIRED bit
and nfs4_alloc_session sets the NFS4CLNT_SESSION_SETUP bit, so both bits are
set when nfs4_lookup_root calls nfs4_recover_expired_lease which schedules
the nfs4_state_manager and waits for it to complete.

Place the session setup after the clientid establishment in nfs4_state_manager
so that the session is setup right after the clientid has been established
without rescheduling the state manager.

Unlike nfsv4.0, the nfs_client struct is not ready to use until the session
has been established.  Postpone marking the nfs_client struct to NFS_CS_READY
until after a successful CREATE_SESSION call so that other threads cannot use
the client until the session is established.

If the EXCHANGE_ID call fails and the session has not been setup (the
NFS4CLNT_SESSION_SETUP bit is set), mark the client with the error and return.

If the session setup CREATE_SESSION call fails with NFS4ERR_STALE_CLIENTID
which could occur due to server reboot or network partition inbetween the
EXCHANGE_ID and CREATE_SESSION call, reset the NFS4CLNT_LEASE_EXPIRED and
NFS4CLNT_SESSION_SETUP bits and try again.

If the CREATE_SESSION call fails with other errors, mark the client with
the error and return.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>

[nfs41: NFS_CS_SESSION_SETUP cl_cons_state for back channel setup]
  On session setup, the CREATE_SESSION reply races with the server back channel
  probe which needs to succeed to setup the back channel. Set a new
  cl_cons_state NFS_CS_SESSION_SETUP just prior to the CREATE_SESSION call
  and add it as a valid state to nfs_find_client so that the client back channel
  can find the nfs_client struct and won't drop the server backchannel probe.
  Use a new cl_cons_state so that NFSv4.0 back channel behaviour which only
  sets NFS_CS_READY is unchanged.
  Adjust waiting on the nfs_client_active_wq accordingly.
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>

[nfs41: rename NFS_CS_SESSION_SETUP to NFS_CS_SESSION_INITING]
Signed-off-by: Andy Adamson <andros@netapp.com>
[nfs41: set NFS_CL_SESSION_INITING in alloc_session]
Signed-off-by: Andy Adamson <andros@netapp.com>
[nfs41: move session setup into a function]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[moved nfs4_proc_create_session declaration here]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 10 ++++++----
 fs/nfs/internal.h         |  1 +
 fs/nfs/nfs4_fs.h          |  2 ++
 fs/nfs/nfs4proc.c         | 10 ++++++++++
 fs/nfs/nfs4state.c        | 35 ++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs_sb.h |  1 +
 6 files changed, 54 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bb7432d83b5..d28a987f569 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -366,7 +366,8 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
 		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 
 		/* Don't match clients that failed to initialise properly */
-		if (clp->cl_cons_state != NFS_CS_READY)
+		if (!(clp->cl_cons_state == NFS_CS_READY ||
+		      clp->cl_cons_state == NFS_CS_SESSION_INITING))
 			continue;
 
 		/* Different NFS versions cannot share the same nfs_client */
@@ -499,7 +500,7 @@ found_client:
 		nfs_free_client(new);
 
 	error = wait_event_killable(nfs_client_active_wq,
-				clp->cl_cons_state != NFS_CS_INITING);
+				clp->cl_cons_state < NFS_CS_INITING);
 	if (error < 0) {
 		nfs_put_client(clp);
 		return ERR_PTR(-ERESTARTSYS);
@@ -520,7 +521,7 @@ found_client:
 /*
  * Mark a server as ready or failed
  */
-static void nfs_mark_client_ready(struct nfs_client *clp, int state)
+void nfs_mark_client_ready(struct nfs_client *clp, int state)
 {
 	clp->cl_cons_state = state;
 	wake_up_all(&nfs_client_active_wq);
@@ -1135,7 +1136,8 @@ static int nfs4_init_client(struct nfs_client *clp,
 	if (error < 0)
 		goto error;
 
-	nfs_mark_client_ready(clp, NFS_CS_READY);
+	if (!nfs4_has_session(clp))
+		nfs_mark_client_ready(clp, NFS_CS_READY);
 	return 0;
 
 error:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f62bc522615..f3b310e8ea0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -100,6 +100,7 @@ extern void nfs_free_server(struct nfs_server *server);
 extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *);
+extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
 #ifdef CONFIG_PROC_FS
 extern int __init nfs_fs_proc_init(void);
 extern void nfs_fs_proc_exit(void);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index eccf4e93e7d..288717abadd 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
 	NFS4CLNT_RECLAIM_REBOOT,
 	NFS4CLNT_RECLAIM_NOGRACE,
 	NFS4CLNT_DELEGRETURN,
+	NFS4CLNT_SESSION_SETUP,
 };
 
 /*
@@ -208,6 +209,7 @@ extern int nfs4_setup_sequence(struct nfs_client *clp,
 		int cache_reply, struct rpc_task *task);
 extern void nfs4_destroy_session(struct nfs4_session *session);
 extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
+extern int nfs4_proc_create_session(struct nfs_client *, int reset);
 #else /* CONFIG_NFS_v4_1 */
 static inline int nfs4_setup_sequence(struct nfs_client *clp,
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0b121474024..7fc0c9c8f5e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4382,6 +4382,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 	session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
 	if (!session)
 		return NULL;
+
+	set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+	/*
+	 * The create session reply races with the server back
+	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+	 * so that the client back channel can find the
+	 * nfs_client struct
+	 */
+	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+
 	tbl = &session->fc_slot_table;
 	spin_lock_init(&tbl->slot_tbl_lock);
 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "Slot table");
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index bc683ed477e..df5b4807daa 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1113,6 +1113,27 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
 	return status;
 }
 
+#ifdef CONFIG_NFS_V4_1
+
+static int nfs4_initialize_session(struct nfs_client *clp)
+{
+	int status;
+
+	status = nfs4_proc_create_session(clp, 0);
+	if (!status) {
+		nfs_mark_client_ready(clp, NFS_CS_READY);
+	} else if (status == -NFS4ERR_STALE_CLIENTID) {
+		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+		set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state);
+	} else {
+		nfs_mark_client_ready(clp, status);
+	}
+	return status;
+}
+#else /* CONFIG_NFS_V4_1 */
+static int nfs4_initialize_session(struct nfs_client *clp) { return 0; }
+#endif /* CONFIG_NFS_V4_1 */
+
 static void nfs4_state_manager(struct nfs_client *clp)
 {
 	int status = 0;
@@ -1126,6 +1147,9 @@ static void nfs4_state_manager(struct nfs_client *clp)
 				set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 				if (status == -EAGAIN)
 					continue;
+				if (clp->cl_cons_state ==
+							NFS_CS_SESSION_INITING)
+					nfs_mark_client_ready(clp, status);
 				goto out_error;
 			}
 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
@@ -1136,7 +1160,16 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			if (status != 0)
 				continue;
 		}
-
+		/* Setup the session */
+		if (nfs4_has_session(clp) &&
+		   test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) {
+			status = nfs4_initialize_session(clp);
+			if (status) {
+				if (status == -NFS4ERR_STALE_CLIENTID)
+					continue;
+				goto out_error;
+			}
+		}
 		/* First recover reboot state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
 			status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 435ed556efb..d0902ccec9c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -24,6 +24,7 @@ struct nfs_client {
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
+#define NFS_CS_SESSION_INITING	2		/* busy initialising  session */
 	unsigned long		cl_res_state;	/* NFS resources state */
 #define NFS_CS_CALLBACK		1		/* - callback started */
 #define NFS_CS_IDMAP		2		/* - idmap started */
-- 
cgit v1.2.3-70-g09d2


From aae2006e9b0c294114915c13022fa348e1a88023 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:22:40 -0400
Subject: nfs41: sunrpc: Export the call prepare state for session reset

Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h  |  1 +
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/clnt.c            | 13 +++++++++++++
 net/sunrpc/sched.c           |  2 +-
 4 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c39a21040dc..37881f1a0bd 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -143,6 +143,7 @@ int		rpc_call_sync(struct rpc_clnt *clnt,
 			      const struct rpc_message *msg, int flags);
 struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
 			       int flags);
+void		rpc_restart_call_prepare(struct rpc_task *);
 void		rpc_restart_call(struct rpc_task *);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 64981a2f1ca..177376880fa 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -237,6 +237,7 @@ void		rpc_show_tasks(void);
 int		rpc_init_mempool(void);
 void		rpc_destroy_mempool(void);
 extern struct workqueue_struct *rpciod_workqueue;
+void		rpc_prepare_task(struct rpc_task *task);
 
 static inline void rpc_exit(struct rpc_task *task, int status)
 {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5abab094441..d00e8135f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -694,6 +694,19 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL_GPL(rpc_force_rebind);
 
+/*
+ * Restart an (async) RPC call from the call_prepare state.
+ * Usually called from within the exit handler.
+ */
+void
+rpc_restart_call_prepare(struct rpc_task *task)
+{
+	if (RPC_ASSASSINATED(task))
+		return;
+	task->tk_action = rpc_prepare_task;
+}
+EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
+
 /*
  * Restart an (async) RPC call. Usually called from within the
  * exit handler.
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ff50a054686..1102ce1251f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(rpc_delay);
 /*
  * Helper to call task->tk_ops->rpc_call_prepare
  */
-static void rpc_prepare_task(struct rpc_task *task)
+void rpc_prepare_task(struct rpc_task *task)
 {
 	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
 }
-- 
cgit v1.2.3-70-g09d2


From 56632b5bff5af10eb12d7e9499b5ffcadcb7a7b2 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:22:58 -0400
Subject: nfs41: client callback structures

Adds new list of rpc_xprt structures, and a readers/writers lock to
protect the list.  The list is used to preallocate resources for
the backchannel during backchannel requests.  Callbacks are not
expected to cause significant latency, so only one callback will
be allowed at this time.

It also adds a pointer to the NFS callback service so that
requests can be directed to it for processing.

New callback members added to svc_serv. The NFSv4.1 callback service will
sleep on the svc_serv->svc_cb_waitq until new callback requests arrive.
The request will be queued in svc_serv->svc_cb_list. This patch adds this
list, the sleep queue and spinlock to svc_serv.

[nfs41: NFSv4.1 callback support]
Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/svc.h  |  8 ++++++++
 include/linux/sunrpc/xprt.h | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2a30775959e..4a8afbd6200 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -96,6 +96,14 @@ struct svc_serv {
 	svc_thread_fn		sv_function;	/* main function for threads */
 	unsigned int		sv_drc_max_pages; /* Total pages for DRC */
 	unsigned int		sv_drc_pages_used;/* DRC pages used */
+#if defined(CONFIG_NFS_V4_1)
+	struct list_head	sv_cb_list;	/* queue for callback requests
+						 * that arrive over the same
+						 * connection */
+	spinlock_t		sv_cb_lock;	/* protects the svc_cb_list */
+	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
+						 * entries in the svc_cb_list */
+#endif /* CONFIG_NFS_V4_1 */
 };
 
 /*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 08afe43118f..703af7ebf6c 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -97,6 +97,12 @@ struct rpc_rqst {
 
 	unsigned long		rq_xtime;	/* when transmitted */
 	int			rq_ntrans;
+
+#if defined(CONFIG_NFS_V4_1)
+	struct list_head	rq_bc_list;	/* Callback service list */
+	unsigned long		rq_bc_pa_state;	/* Backchannel prealloc state */
+	struct list_head	rq_bc_pa_list;	/* Backchannel prealloc list */
+#endif /* CONFIG_NFS_V4_1 */
 };
 #define rq_svec			rq_snd_buf.head
 #define rq_slen			rq_snd_buf.len
@@ -174,6 +180,14 @@ struct rpc_xprt {
 	spinlock_t		reserve_lock;	/* lock slot table */
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
+#if defined(CONFIG_NFS_V4_1)
+	struct svc_serv		*bc_serv;       /* The RPC service which will */
+						/* process the callback */
+	spinlock_t		bc_pa_lock;	/* Protects the preallocated
+						 * items */
+	struct list_head	bc_pa_list;	/* List of preallocated
+						 * backchannel rpc_rqst's */
+#endif /* CONFIG_NFS_V4_1 */
 	struct list_head	recv;
 
 	struct {
@@ -192,6 +206,14 @@ struct rpc_xprt {
 	const char		*address_strings[RPC_DISPLAY_MAX];
 };
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * Backchannel flags
+ */
+#define	RPC_BC_PA_IN_USE	0x0001		/* Preallocated backchannel */
+						/* buffer in use */
+#endif /* CONFIG_NFS_V4_1 */
+
 struct xprt_create {
 	int			ident;		/* XPRT_TRANSPORT identifier */
 	struct sockaddr *	srcaddr;	/* optional local address */
-- 
cgit v1.2.3-70-g09d2


From fb7a0b9addbdbbb13b7bc02abf55ee524ea19ce1 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:00 -0400
Subject: nfs41: New backchannel helper routines

This patch introduces support to setup the callback xprt on the client side.
It allocates/ destroys the preallocated memory structures used to process
backchannel requests.

At setup time, xprt_setup_backchannel() is invoked to allocate one or
more rpc_rqst structures and substructures.  This ensures that they
are available when an RPC callback arrives.  The rpc_rqst structures
are maintained in a linked list attached to the rpc_xprt structure.
We keep track of the number of allocations so that they can be correctly
removed when the channel is destroyed.

When an RPC callback arrives, xprt_alloc_bc_request() is invoked to
obtain a preallocated rpc_rqst structure.  An rpc_xprt structure is
returned, and its RPC_BC_PREALLOC_IN_USE bit is set in
rpc_xprt->bc_flags.  The structure is removed from the the list
since it is now in use, and it will be later added back when its
user is done with it.

After the RPC callback replies, the rpc_rqst structure is returned
by invoking xprt_free_bc_request().  This clears the
RPC_BC_PREALLOC_IN_USE bit and adds it back to the list, allowing it
to be reused by a subsequent RPC callback request.

To be consistent with the reception of RPC messages, the backchannel requests
should be placed into the 'struct rpc_rqst' rq_rcv_buf, which is then in turn
copied to the 'struct rpc_rqst' rq_private_buf.

[nfs41: Preallocate rpc_rqst receive buffer for handling callbacks]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Update copyright notice and explain page allocation]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/xprt.h   |   1 +
 net/sunrpc/Makefile           |   1 +
 net/sunrpc/backchannel_rqst.c | 278 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 280 insertions(+)
 create mode 100644 net/sunrpc/backchannel_rqst.c

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 703af7ebf6c..beae030e80b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -183,6 +183,7 @@ struct rpc_xprt {
 #if defined(CONFIG_NFS_V4_1)
 	struct svc_serv		*bc_serv;       /* The RPC service which will */
 						/* process the callback */
+	unsigned int		bc_alloc_count;	/* Total number of preallocs */
 	spinlock_t		bc_pa_lock;	/* Protects the preallocated
 						 * items */
 	struct list_head	bc_pa_list;	/* List of preallocated
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 5369aa369b3..4a01f9684b8 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -13,5 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
 	    svc_xprt.o
+sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
new file mode 100644
index 00000000000..f56e18a2349
--- /dev/null
+++ b/net/sunrpc/backchannel_rqst.c
@@ -0,0 +1,278 @@
+/******************************************************************************
+
+(c) 2007 Network Appliance, Inc.  All Rights Reserved.
+(c) 2009 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+#include <linux/tcp.h>
+#include <linux/sunrpc/xprt.h>
+
+#ifdef RPC_DEBUG
+#define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+#if defined(CONFIG_NFS_V4_1)
+
+/*
+ * Helper routines that track the number of preallocation elements
+ * on the transport.
+ */
+static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
+{
+	return xprt->bc_alloc_count > 0;
+}
+
+static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
+{
+	xprt->bc_alloc_count += n;
+}
+
+static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
+{
+	return xprt->bc_alloc_count -= n;
+}
+
+/*
+ * Free the preallocated rpc_rqst structure and the memory
+ * buffers hanging off of it.
+ */
+static void xprt_free_allocation(struct rpc_rqst *req)
+{
+	struct xdr_buf *xbufp;
+
+	dprintk("RPC:        free allocations for req= %p\n", req);
+	BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
+	xbufp = &req->rq_private_buf;
+	free_page((unsigned long)xbufp->head[0].iov_base);
+	xbufp = &req->rq_snd_buf;
+	free_page((unsigned long)xbufp->head[0].iov_base);
+	list_del(&req->rq_bc_pa_list);
+	kfree(req);
+}
+
+/*
+ * Preallocate up to min_reqs structures and related buffers for use
+ * by the backchannel.  This function can be called multiple times
+ * when creating new sessions that use the same rpc_xprt.  The
+ * preallocated buffers are added to the pool of resources used by
+ * the rpc_xprt.  Anyone of these resources may be used used by an
+ * incoming callback request.  It's up to the higher levels in the
+ * stack to enforce that the maximum number of session slots is not
+ * being exceeded.
+ *
+ * Some callback arguments can be large.  For example, a pNFS server
+ * using multiple deviceids.  The list can be unbound, but the client
+ * has the ability to tell the server the maximum size of the callback
+ * requests.  Each deviceID is 16 bytes, so allocate one page
+ * for the arguments to have enough room to receive a number of these
+ * deviceIDs.  The NFS client indicates to the pNFS server that its
+ * callback requests can be up to 4096 bytes in size.
+ */
+int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
+{
+	struct page *page_rcv = NULL, *page_snd = NULL;
+	struct xdr_buf *xbufp = NULL;
+	struct rpc_rqst *req, *tmp;
+	struct list_head tmp_list;
+	int i;
+
+	dprintk("RPC:       setup backchannel transport\n");
+
+	/*
+	 * We use a temporary list to keep track of the preallocated
+	 * buffers.  Once we're done building the list we splice it
+	 * into the backchannel preallocation list off of the rpc_xprt
+	 * struct.  This helps minimize the amount of time the list
+	 * lock is held on the rpc_xprt struct.  It also makes cleanup
+	 * easier in case of memory allocation errors.
+	 */
+	INIT_LIST_HEAD(&tmp_list);
+	for (i = 0; i < min_reqs; i++) {
+		/* Pre-allocate one backchannel rpc_rqst */
+		req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+		if (req == NULL) {
+			printk(KERN_ERR "Failed to create bc rpc_rqst\n");
+			goto out_free;
+		}
+
+		/* Add the allocated buffer to the tmp list */
+		dprintk("RPC:       adding req= %p\n", req);
+		list_add(&req->rq_bc_pa_list, &tmp_list);
+
+		req->rq_xprt = xprt;
+		INIT_LIST_HEAD(&req->rq_list);
+		INIT_LIST_HEAD(&req->rq_bc_list);
+
+		/* Preallocate one XDR receive buffer */
+		page_rcv = alloc_page(GFP_KERNEL);
+		if (page_rcv == NULL) {
+			printk(KERN_ERR "Failed to create bc receive xbuf\n");
+			goto out_free;
+		}
+		xbufp = &req->rq_rcv_buf;
+		xbufp->head[0].iov_base = page_address(page_rcv);
+		xbufp->head[0].iov_len = PAGE_SIZE;
+		xbufp->tail[0].iov_base = NULL;
+		xbufp->tail[0].iov_len = 0;
+		xbufp->page_len = 0;
+		xbufp->len = PAGE_SIZE;
+		xbufp->buflen = PAGE_SIZE;
+
+		/* Preallocate one XDR send buffer */
+		page_snd = alloc_page(GFP_KERNEL);
+		if (page_snd == NULL) {
+			printk(KERN_ERR "Failed to create bc snd xbuf\n");
+			goto out_free;
+		}
+
+		xbufp = &req->rq_snd_buf;
+		xbufp->head[0].iov_base = page_address(page_snd);
+		xbufp->head[0].iov_len = 0;
+		xbufp->tail[0].iov_base = NULL;
+		xbufp->tail[0].iov_len = 0;
+		xbufp->page_len = 0;
+		xbufp->len = 0;
+		xbufp->buflen = PAGE_SIZE;
+	}
+
+	/*
+	 * Add the temporary list to the backchannel preallocation list
+	 */
+	spin_lock_bh(&xprt->bc_pa_lock);
+	list_splice(&tmp_list, &xprt->bc_pa_list);
+	xprt_inc_alloc_count(xprt, min_reqs);
+	spin_unlock_bh(&xprt->bc_pa_lock);
+
+	dprintk("RPC:       setup backchannel transport done\n");
+	return 0;
+
+out_free:
+	/*
+	 * Memory allocation failed, free the temporary list
+	 */
+	list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list)
+		xprt_free_allocation(req);
+
+	dprintk("RPC:       setup backchannel transport failed\n");
+	return -1;
+}
+EXPORT_SYMBOL(xprt_setup_backchannel);
+
+/*
+ * Destroys the backchannel preallocated structures.
+ * Since these structures may have been allocated by multiple calls
+ * to xprt_setup_backchannel, we only destroy up to the maximum number
+ * of reqs specified by the caller.
+ * @xprt:	the transport holding the preallocated strucures
+ * @max_reqs	the maximum number of preallocated structures to destroy
+ */
+void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
+{
+	struct rpc_rqst *req = NULL, *tmp = NULL;
+
+	dprintk("RPC:        destroy backchannel transport\n");
+
+	BUG_ON(max_reqs == 0);
+	spin_lock_bh(&xprt->bc_pa_lock);
+	xprt_dec_alloc_count(xprt, max_reqs);
+	list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
+		dprintk("RPC:        req=%p\n", req);
+		xprt_free_allocation(req);
+		if (--max_reqs == 0)
+			break;
+	}
+	spin_unlock_bh(&xprt->bc_pa_lock);
+
+	dprintk("RPC:        backchannel list empty= %s\n",
+		list_empty(&xprt->bc_pa_list) ? "true" : "false");
+}
+EXPORT_SYMBOL(xprt_destroy_backchannel);
+
+/*
+ * One or more rpc_rqst structure have been preallocated during the
+ * backchannel setup.  Buffer space for the send and private XDR buffers
+ * has been preallocated as well.  Use xprt_alloc_bc_request to allocate
+ * to this request.  Use xprt_free_bc_request to return it.
+ *
+ * Return an available rpc_rqst, otherwise NULL if non are available.
+ */
+struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt)
+{
+	struct rpc_rqst *req;
+
+	dprintk("RPC:       allocate a backchannel request\n");
+	spin_lock_bh(&xprt->bc_pa_lock);
+	if (!list_empty(&xprt->bc_pa_list)) {
+		req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
+				rq_bc_pa_list);
+		list_del(&req->rq_bc_pa_list);
+	} else {
+		req = NULL;
+	}
+	spin_unlock_bh(&xprt->bc_pa_lock);
+
+	if (req != NULL) {
+		set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+		req->rq_received = 0;
+		req->rq_bytes_sent = 0;
+		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+			sizeof(req->rq_private_buf));
+	}
+	dprintk("RPC:       backchannel req=%p\n", req);
+	return req;
+}
+
+/*
+ * Return the preallocated rpc_rqst structure and XDR buffers
+ * associated with this rpc_task.
+ */
+void xprt_free_bc_request(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	dprintk("RPC:       free backchannel req=%p\n", req);
+
+	smp_mb__before_clear_bit();
+	BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
+	clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+	smp_mb__after_clear_bit();
+
+	if (!xprt_need_to_requeue(xprt)) {
+		/*
+		 * The last remaining session was destroyed while this
+		 * entry was in use.  Free the entry and don't attempt
+		 * to add back to the list because there is no need to
+		 * have anymore preallocated entries.
+		 */
+		dprintk("RPC:       Last session removed req=%p\n", req);
+		xprt_free_allocation(req);
+		return;
+	}
+
+	/*
+	 * Return it to the list of preallocations so that it
+	 * may be reused by a new callback request.
+	 */
+	spin_lock_bh(&xprt->bc_pa_lock);
+	list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list);
+	spin_unlock_bh(&xprt->bc_pa_lock);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3-70-g09d2


From 4a8d70bfef01f8e6b27785e2625e88e9a80924a5 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:01 -0400
Subject: nfs41: New include/linux/sunrpc/bc_xprt.h

Contains prototype for backchannel helper routines.

Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: xprt_setup_backchannel v4.0 only inline]
    Fix compile error when CONFIG_NFS_V4_1 is not set.
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Update Copyright notice and fix formatting]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/bc_xprt.h | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 include/linux/sunrpc/bc_xprt.h

(limited to 'include')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
new file mode 100644
index 00000000000..5965ae4f902
--- /dev/null
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+
+(c) 2008 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+/*
+ * Functions to create and manage the backchannel
+ */
+
+#ifndef _LINUX_SUNRPC_BC_XPRT_H
+#define _LINUX_SUNRPC_BC_XPRT_H
+
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/xprt.h>
+
+#ifdef CONFIG_NFS_V4_1
+struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
+void xprt_free_bc_request(struct rpc_rqst *req);
+int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
+void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
+#else /* CONFIG_NFS_V4_1 */
+static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
+					 unsigned int min_reqs)
+{
+	return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+#endif /* _LINUX_SUNRPC_BC_XPRT_H */
+
-- 
cgit v1.2.3-70-g09d2


From a6c140969b4685f9b9f6773c0760f55ca66d1825 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Wed, 17 Jun 2009 16:33:34 -0400
Subject: Delete pcibios_select_root

This function was only used by pci_claim_resource(), and the last commit
deleted that use.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/pci.h    | 13 -------------
 arch/arm/include/asm/pci.h      | 13 -------------
 arch/ia64/include/asm/pci.h     | 13 -------------
 arch/mips/include/asm/pci.h     | 13 -------------
 arch/mn10300/include/asm/pci.h  | 13 -------------
 arch/parisc/include/asm/pci.h   | 13 -------------
 arch/powerpc/include/asm/pci.h  | 13 -------------
 arch/sh/include/asm/pci.h       | 13 -------------
 arch/sparc/include/asm/pci_64.h |  2 --
 arch/sparc/kernel/pci.c         | 13 -------------
 include/asm-generic/pci.h       | 13 -------------
 11 files changed, 132 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index cb04eaa6ba3..d22ace99d13 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -237,19 +237,6 @@ extern void pcibios_resource_to_bus(struct pci_dev *, struct pci_bus_region *,
 extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 				    struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 918d0cbbf06..0abf386ba3d 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -65,19 +65,6 @@ extern void
 pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 			struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 /*
  * Dummy implementation; always return 0.
  */
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 1d660d89db0..fcfca56bb85 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -135,19 +135,6 @@ extern void pcibios_resource_to_bus(struct pci_dev *dev,
 extern void pcibios_bus_to_resource(struct pci_dev *dev,
 		struct resource *res, struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 #define pcibios_scan_all_fns(a, b)	0
 
 #define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 053e4634ace..a68d111e55e 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -142,19 +142,6 @@ extern void pcibios_resource_to_bus(struct pci_dev *dev,
 extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 				    struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 0517b45313d..e58b9a46e1b 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -106,19 +106,6 @@ extern void pcibios_bus_to_resource(struct pci_dev *dev,
 				    struct resource *res,
 				    struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 #define pcibios_scan_all_fns(a, b)	0
 
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 4ba868f44a5..7d842d699df 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -268,19 +268,6 @@ extern void
 pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 			struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 static inline void pcibios_penalize_isa_irq(int irq, int active)
 {
 	/* We don't need to penalize isa irq's */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index ba17d5d90a4..d9483c504d2 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -195,19 +195,6 @@ extern void pcibios_bus_to_resource(struct pci_dev *dev,
 			struct resource *res,
 			struct pci_bus_region *region);
 
-static inline struct resource *pcibios_select_root(struct pci_dev *pdev,
-			struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 extern void pcibios_claim_one_bus(struct pci_bus *b);
 
 extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index ae0da6f48b6..d3633f513eb 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -137,19 +137,6 @@ extern void pcibios_resource_to_bus(struct pci_dev *dev,
 extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 				    struct pci_bus_region *region);
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 /* Chances are this interrupt is wired PC-style ...  */
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 4f79a54948f..7a1e3566e59 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -191,8 +191,6 @@ extern void
 pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 			struct pci_bus_region *region);
 
-extern struct resource *pcibios_select_root(struct pci_dev *, struct resource *);
-
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
 	return PCI_IRQ_NONE;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 4638fba799e..57859ad2354 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -711,19 +711,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
 	pbus->resource[1] = &pbm->mem_space;
 }
 
-struct resource *pcibios_select_root(struct pci_dev *pdev, struct resource *r)
-{
-	struct pci_pbm_info *pbm = pdev->bus->sysdata;
-	struct resource *root = NULL;
-
-	if (r->flags & IORESOURCE_IO)
-		root = &pbm->io_space;
-	if (r->flags & IORESOURCE_MEM)
-		root = &pbm->mem_space;
-
-	return root;
-}
-
 void pcibios_update_irq(struct pci_dev *pdev, int irq)
 {
 }
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index 515c6e2e321..b4326b5466e 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -30,19 +30,6 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->end = region->end;
 }
 
-static inline struct resource *
-pcibios_select_root(struct pci_dev *pdev, struct resource *res)
-{
-	struct resource *root = NULL;
-
-	if (res->flags & IORESOURCE_IO)
-		root = &ioport_resource;
-	if (res->flags & IORESOURCE_MEM)
-		root = &iomem_resource;
-
-	return root;
-}
-
 #define pcibios_scan_all_fns(a, b)	0
 
 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
-- 
cgit v1.2.3-70-g09d2


From 55ae1aabfb108106dd095de2578ceef1c755a8b8 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:03 -0400
Subject: nfs41: Add backchannel processing support to RPC state machine

Adds rpc_run_bc_task() which is called by the NFS callback service to
process backchannel requests.  It performs similar work to rpc_run_task()
though "schedules" the backchannel task to be executed starting at the
call_trasmit state in the RPC state machine.

It also introduces some miscellaneous updates to the argument validation,
call_transmit, and transport cleanup functions to take into account
that there are now forechannel and backchannel tasks.

Backchannel requests do not carry an RPC message structure, since the
payload has already been XDR encoded using the existing NFSv4 callback
mechanism.

Introduce a new transmit state for the client to reply on to backchannel
requests.  This new state simply reserves the transport and issues the
reply.  In case of a connection related error, disconnects the transport and
drops the reply.  It requires the forechannel to re-establish the connection
and the server to retransmit the request, as stated in NFSv4.1 section
2.9.2 "Client and Server Transport Behavior".

Note: There is no need to loop attempting to reserve the transport.  If EAGAIN
is returned by xprt_prepare_transmit(), return with tk_status == 0,
setting tk_action to call_bc_transmit.  rpc_execute() will invoke it again
after the task is taken off the sleep queue.

[nfs41: rpc_run_bc_task() need not be exported outside RPC module]
[nfs41: New call_bc_transmit RPC state]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfs41: Backchannel: No need to loop in call_bc_transmit()]
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[rpc_count_iostats incorrectly exits early]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Convert rpc_reply_expected() to inline function]
[Remove unnecessary BUG_ON()]
[Rename variable]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/sched.h |   2 +
 include/linux/sunrpc/xprt.h  |  12 +++++
 net/sunrpc/clnt.c            | 117 ++++++++++++++++++++++++++++++++++++++++++-
 net/sunrpc/stats.c           |   6 ++-
 net/sunrpc/sunrpc.h          |  37 ++++++++++++++
 net/sunrpc/xprt.c            |  38 +++++++++++---
 6 files changed, 203 insertions(+), 9 deletions(-)
 create mode 100644 net/sunrpc/sunrpc.h

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 177376880fa..401097781fc 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -210,6 +210,8 @@ struct rpc_wait_queue {
  */
 struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
 struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+				const struct rpc_call_ops *ops);
 void		rpc_put_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index beae030e80b..55c6c37e249 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -215,6 +215,18 @@ struct rpc_xprt {
 						/* buffer in use */
 #endif /* CONFIG_NFS_V4_1 */
 
+#if defined(CONFIG_NFS_V4_1)
+static inline int bc_prealloc(struct rpc_rqst *req)
+{
+	return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
+}
+#else
+static inline int bc_prealloc(struct rpc_rqst *req)
+{
+	return 0;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 struct xprt_create {
 	int			ident;		/* XPRT_TRANSPORT identifier */
 	struct sockaddr *	srcaddr;	/* optional local address */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index aca3ab6fc14..f3e93b8eb90 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -36,7 +36,9 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/bc_xprt.h>
 
+#include "sunrpc.h"
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_CALL
@@ -63,6 +65,9 @@ static void	call_decode(struct rpc_task *task);
 static void	call_bind(struct rpc_task *task);
 static void	call_bind_status(struct rpc_task *task);
 static void	call_transmit(struct rpc_task *task);
+#if defined(CONFIG_NFS_V4_1)
+static void	call_bc_transmit(struct rpc_task *task);
+#endif /* CONFIG_NFS_V4_1 */
 static void	call_status(struct rpc_task *task);
 static void	call_transmit_status(struct rpc_task *task);
 static void	call_refresh(struct rpc_task *task);
@@ -613,6 +618,50 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
 }
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
+#if defined(CONFIG_NFS_V4_1)
+/**
+ * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
+ * rpc_execute against it
+ * @ops: RPC call ops
+ */
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+					const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_task *task;
+	struct xdr_buf *xbufp = &req->rq_snd_buf;
+	struct rpc_task_setup task_setup_data = {
+		.callback_ops = tk_ops,
+	};
+
+	dprintk("RPC: rpc_run_bc_task req= %p\n", req);
+	/*
+	 * Create an rpc_task to send the data
+	 */
+	task = rpc_new_task(&task_setup_data);
+	if (!task) {
+		xprt_free_bc_request(req);
+		goto out;
+	}
+	task->tk_rqstp = req;
+
+	/*
+	 * Set up the xdr_buf length.
+	 * This also indicates that the buffer is XDR encoded already.
+	 */
+	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
+			xbufp->tail[0].iov_len;
+
+	task->tk_action = call_bc_transmit;
+	atomic_inc(&task->tk_count);
+	BUG_ON(atomic_read(&task->tk_count) != 2);
+	rpc_execute(task);
+
+out:
+	dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
+	return task;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 void
 rpc_call_start(struct rpc_task *task)
 {
@@ -1098,7 +1147,7 @@ call_transmit(struct rpc_task *task)
 	 * in order to allow access to the socket to other RPC requests.
 	 */
 	call_transmit_status(task);
-	if (task->tk_msg.rpc_proc->p_decode != NULL)
+	if (rpc_reply_expected(task))
 		return;
 	task->tk_action = rpc_exit_task;
 	rpc_wake_up_queued_task(&task->tk_xprt->pending, task);
@@ -1133,6 +1182,72 @@ call_transmit_status(struct rpc_task *task)
 	}
 }
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * 5b.	Send the backchannel RPC reply.  On error, drop the reply.  In
+ * addition, disconnect on connectivity errors.
+ */
+static void
+call_bc_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	BUG_ON(task->tk_status != 0);
+	task->tk_status = xprt_prepare_transmit(task);
+	if (task->tk_status == -EAGAIN) {
+		/*
+		 * Could not reserve the transport. Try again after the
+		 * transport is released.
+		 */
+		task->tk_status = 0;
+		task->tk_action = call_bc_transmit;
+		return;
+	}
+
+	task->tk_action = rpc_exit_task;
+	if (task->tk_status < 0) {
+		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
+			"error: %d\n", task->tk_status);
+		return;
+	}
+
+	xprt_transmit(task);
+	xprt_end_transmit(task);
+	dprint_status(task);
+	switch (task->tk_status) {
+	case 0:
+		/* Success */
+		break;
+	case -EHOSTDOWN:
+	case -EHOSTUNREACH:
+	case -ENETUNREACH:
+	case -ETIMEDOUT:
+		/*
+		 * Problem reaching the server.  Disconnect and let the
+		 * forechannel reestablish the connection.  The server will
+		 * have to retransmit the backchannel request and we'll
+		 * reprocess it.  Since these ops are idempotent, there's no
+		 * need to cache our reply at this time.
+		 */
+		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
+			"error: %d\n", task->tk_status);
+		xprt_conditional_disconnect(task->tk_xprt,
+			req->rq_connect_cookie);
+		break;
+	default:
+		/*
+		 * We were unable to reply and will have to drop the
+		 * request.  The server should reconnect and retransmit.
+		 */
+		BUG_ON(task->tk_status == -EAGAIN);
+		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
+			"error: %d\n", task->tk_status);
+		break;
+	}
+	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * 6.	Sort out the RPC call status
  */
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 1ef6e46d9da..8487aa0f1f5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -141,12 +141,14 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats);
 void rpc_count_iostats(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_iostats *stats = task->tk_client->cl_metrics;
+	struct rpc_iostats *stats;
 	struct rpc_iostats *op_metrics;
 	long rtt, execute, queue;
 
-	if (!stats || !req)
+	if (!task->tk_client || !task->tk_client->cl_metrics || !req)
 		return;
+
+	stats = task->tk_client->cl_metrics;
 	op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
 
 	op_metrics->om_ops++;
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
new file mode 100644
index 00000000000..5d9dd742264
--- /dev/null
+++ b/net/sunrpc/sunrpc.h
@@ -0,0 +1,37 @@
+/******************************************************************************
+
+(c) 2008 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+/*
+ * Functions and macros used internally by RPC
+ */
+
+#ifndef _NET_SUNRPC_SUNRPC_H
+#define _NET_SUNRPC_SUNRPC_H
+
+static inline int rpc_reply_expected(struct rpc_task *task)
+{
+	return (task->tk_msg.rpc_proc != NULL) &&
+		(task->tk_msg.rpc_proc->p_decode != NULL);
+}
+
+#endif /* _NET_SUNRPC_SUNRPC_H */
+
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 52739f82df1..0eea2bfe111 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -12,8 +12,9 @@
  *  -	Next, the caller puts together the RPC message, stuffs it into
  *	the request struct, and calls xprt_transmit().
  *  -	xprt_transmit sends the message and installs the caller on the
- *	transport's wait list. At the same time, it installs a timer that
- *	is run after the packet's timeout has expired.
+ *	transport's wait list. At the same time, if a reply is expected,
+ *	it installs a timer that is run after the packet's timeout has
+ *	expired.
  *  -	When a packet arrives, the data_ready handler walks the list of
  *	pending requests for that transport. If a matching XID is found, the
  *	caller is woken up, and the timer removed.
@@ -46,6 +47,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
 
+#include "sunrpc.h"
+
 /*
  * Local variables
  */
@@ -873,7 +876,10 @@ void xprt_transmit(struct rpc_task *task)
 	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
 	if (!req->rq_received) {
-		if (list_empty(&req->rq_list)) {
+		if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
+			/*
+			 * Add to the list only if we're expecting a reply
+			 */
 			spin_lock_bh(&xprt->transport_lock);
 			/* Update the softirq receive buffer */
 			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
@@ -908,8 +914,13 @@ void xprt_transmit(struct rpc_task *task)
 	/* Don't race with disconnect */
 	if (!xprt_connected(xprt))
 		task->tk_status = -ENOTCONN;
-	else if (!req->rq_received)
+	else if (!req->rq_received && rpc_reply_expected(task)) {
+		/*
+		 * Sleep on the pending queue since
+		 * we're expecting a reply.
+		 */
 		rpc_sleep_on(&xprt->pending, task, xprt_timer);
+	}
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
@@ -982,11 +993,17 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
  */
 void xprt_release(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt = task->tk_xprt;
+	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
+	int is_bc_request;
 
 	if (!(req = task->tk_rqstp))
 		return;
+
+	/* Preallocated backchannel request? */
+	is_bc_request = bc_prealloc(req);
+
+	xprt = req->rq_xprt;
 	rpc_count_iostats(task);
 	spin_lock_bh(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
@@ -999,10 +1016,19 @@ void xprt_release(struct rpc_task *task)
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
-	xprt->ops->buf_free(req->rq_buffer);
+	if (!bc_prealloc(req))
+		xprt->ops->buf_free(req->rq_buffer);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
+
+	/*
+	 * Early exit if this is a backchannel preallocated request.
+	 * There is no need to have it added to the RPC slot list.
+	 */
+	if (is_bc_request)
+		return;
+
 	memset(req, 0, sizeof(*req));	/* mark unused */
 
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
-- 
cgit v1.2.3-70-g09d2


From 0d90ba1cd416525c4825c111db862d8b15a02e9b Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:04 -0400
Subject: nfs41: Backchannel callback service helper routines

Executes the backchannel task on the RPC state machine using
the existing open connection previously established by the client.

Signed-off-by: Ricardo Labiaga <ricardo.labiaga@netapp.com>

nfs41: Add bc_svc.o to sunrpc Makefile.

[nfs41: bc_send() does not need to be exported outside RPC module]
[nfs41: xprt_free_bc_request() need not be exported outside RPC module]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Update copyright]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/bc_xprt.h |  3 ++
 net/sunrpc/Makefile            |  2 +-
 net/sunrpc/bc_svc.c            | 81 ++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/xprtsock.c          |  3 ++
 4 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 net/sunrpc/bc_svc.c

(limited to 'include')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 5965ae4f902..6508f0dc0ef 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -29,12 +29,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
 
 #ifdef CONFIG_NFS_V4_1
 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
+void bc_release_request(struct rpc_task *);
+int bc_send(struct rpc_rqst *req);
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 4a01f9684b8..db73fd2a3f0 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
 	    svc_xprt.o
-sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o
+sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
new file mode 100644
index 00000000000..13f214f5312
--- /dev/null
+++ b/net/sunrpc/bc_svc.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+
+(c) 2007 Network Appliance, Inc.  All Rights Reserved.
+(c) 2009 NetApp.  All Rights Reserved.
+
+NetApp provides this source code under the GPL v2 License.
+The GPL v2 license is available at
+http://opensource.org/licenses/gpl-license.php.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+
+/*
+ * The NFSv4.1 callback service helper routines.
+ * They implement the transport level processing required to send the
+ * reply over an existing open connection previously established by the client.
+ */
+
+#if defined(CONFIG_NFS_V4_1)
+
+#include <linux/module.h>
+
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/bc_xprt.h>
+
+#define RPCDBG_FACILITY	RPCDBG_SVCDSP
+
+void bc_release_request(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	dprintk("RPC:       bc_release_request: task= %p\n", task);
+
+	/*
+	 * Release this request only if it's a backchannel
+	 * preallocated request
+	 */
+	if (!bc_prealloc(req))
+		return;
+	xprt_free_bc_request(req);
+}
+
+/* Empty callback ops */
+static const struct rpc_call_ops nfs41_callback_ops = {
+};
+
+
+/*
+ * Send the callback reply
+ */
+int bc_send(struct rpc_rqst *req)
+{
+	struct rpc_task *task;
+	int ret;
+
+	dprintk("RPC:       bc_send req= %p\n", req);
+	task = rpc_run_bc_task(req, &nfs41_callback_ops);
+	if (IS_ERR(task))
+		ret = PTR_ERR(task);
+	else {
+		BUG_ON(atomic_read(&task->tk_count) != 1);
+		ret = task->tk_status;
+		rpc_put_task(task);
+	}
+	return ret;
+	dprintk("RPC:       bc_send ret= %d \n", ret);
+}
+
+#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e3e3a57116f..8a721867b60 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2183,6 +2183,9 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+#if defined(CONFIG_NFS_V4_1)
+	.release_request	= bc_release_request,
+#endif /* CONFIG_NFS_V4_1 */
 	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
-- 
cgit v1.2.3-70-g09d2


From 4d6bbb6233c9cf23822a2f66f8470c9f40854b77 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:07 -0400
Subject: nfs41: Backchannel bc_svc_process()

Implement the NFSv4.1 backchannel service.  Invokes the common callback
processing logic svc_process_common() to authenticate the call and
dispatch the appropriate NFSv4.1 XDR decoder and operation procedure.
It then invokes bc_send() to send the reply over the same connection.
bc_send() is implemented in a separate patch.

At this time there is no slot validation or reply cache handling.

[nfs41: Preallocate rpc_rqst receive buffer for handling callbacks]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[Move bc_svc_process() declaration to correct patch]
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/svc.h |  2 ++
 net/sunrpc/svc.c           | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4a8afbd6200..16043c4a8bf 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -419,6 +419,8 @@ int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
 void		   svc_destroy(struct svc_serv *);
 int		   svc_process(struct svc_rqst *);
+int		   bc_svc_process(struct svc_serv *, struct rpc_rqst *,
+			struct svc_rqst *);
 int		   svc_register(const struct svc_serv *, const int,
 				const unsigned short, const unsigned short);
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bfda66db2f4..06b52e465f4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -25,6 +25,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
@@ -1239,6 +1240,54 @@ svc_process(struct svc_rqst *rqstp)
 	return svc_send(rqstp);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+/*
+ * Process a backchannel RPC request that arrived over an existing
+ * outbound connection
+ */
+int
+bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
+	       struct svc_rqst *rqstp)
+{
+	struct kvec	*argv = &rqstp->rq_arg.head[0];
+	struct kvec	*resv = &rqstp->rq_res.head[0];
+	int 		error;
+
+	/* Build the svc_rqst used by the common processing routine */
+	rqstp->rq_xid = req->rq_xid;
+	rqstp->rq_prot = req->rq_xprt->prot;
+	rqstp->rq_server = serv;
+
+	rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
+	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
+	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
+	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
+
+	/* reset result send buffer "put" position */
+	resv->iov_len = 0;
+
+	if (rqstp->rq_prot != IPPROTO_TCP) {
+		printk(KERN_ERR "No support for Non-TCP transports!\n");
+		BUG();
+	}
+
+	/*
+	 * Skip the next two words because they've already been
+	 * processed in the trasport
+	 */
+	svc_getu32(argv);	/* XID */
+	svc_getnl(argv);	/* CALLDIR */
+
+	error = svc_process_common(rqstp, argv, resv);
+	if (error <= 0)
+		return error;
+
+	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
+	return bc_send(req);
+}
+EXPORT_SYMBOL(bc_svc_process);
+#endif /* CONFIG_NFS_V4_1 */
+
 /*
  * Return (transport-specific) limit on the rpc payload.
  */
-- 
cgit v1.2.3-70-g09d2


From 7652e5a09ba319241607b22d9055ce93fd5b8039 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 1 Apr 2009 09:23:09 -0400
Subject: nfs41: sunrpc: provide functions to create and destroy a svc_xprt for
 backchannel use

For nfs41 callbacks we need an svc_xprt to process requests coming up the
backchannel socket as rpc_rqst's that are transformed into svc_rqst's that
need a rq_xprt to be processed.

The svc_{udp,tcp}_create methods are too heavy for this job as svc_create_socket
creates an actual socket to listen on while for nfs41 we're "reusing" the
fore channel's socket.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/svcsock.h |  2 ++
 net/sunrpc/svcsock.c           | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 483e10380aa..6bb1ec4ae31 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -42,6 +42,8 @@ int		svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
 int		svc_addsock(struct svc_serv *serv, int fd, char *name_return);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
+struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
+void		svc_sock_destroy(struct svc_xprt *);
 
 /*
  * svc_makesock socket characteristics
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9d504234af4..a2a03e50053 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1327,3 +1327,42 @@ static void svc_sock_free(struct svc_xprt *xprt)
 		sock_release(svsk->sk_sock);
 	kfree(svsk);
 }
+
+/*
+ * Create a svc_xprt.
+ *
+ * For internal use only (e.g. nfsv4.1 backchannel).
+ * Callers should typically use the xpo_create() method.
+ */
+struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
+{
+	struct svc_sock *svsk;
+	struct svc_xprt *xprt = NULL;
+
+	dprintk("svc: %s\n", __func__);
+	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
+	if (!svsk)
+		goto out;
+
+	xprt = &svsk->sk_xprt;
+	if (prot == IPPROTO_TCP)
+		svc_xprt_init(&svc_tcp_class, xprt, serv);
+	else if (prot == IPPROTO_UDP)
+		svc_xprt_init(&svc_udp_class, xprt, serv);
+	else
+		BUG();
+out:
+	dprintk("svc: %s return %p\n", __func__, xprt);
+	return xprt;
+}
+EXPORT_SYMBOL_GPL(svc_sock_create);
+
+/*
+ * Destroy a svc_sock.
+ */
+void svc_sock_destroy(struct svc_xprt *xprt)
+{
+	if (xprt)
+		kfree(container_of(xprt, struct svc_sock, sk_xprt));
+}
+EXPORT_SYMBOL_GPL(svc_sock_destroy);
-- 
cgit v1.2.3-70-g09d2


From 9c9f3f5fa62cc4959e4d4d1cf1ec74f2d6ac1197 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 1 Apr 2009 09:23:10 -0400
Subject: nfs41: sunrpc: add a struct svc_xprt pointer to struct svc_serv for
 backchannel use

This svc_xprt is passed on to the callback service thread to be later used
to processes incoming svc_rqst's

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/svc.h | 1 +
 net/sunrpc/svc.c           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 16043c4a8bf..ea8009695c6 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -103,6 +103,7 @@ struct svc_serv {
 	spinlock_t		sv_cb_lock;	/* protects the svc_cb_list */
 	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
 						 * entries in the svc_cb_list */
+	struct svc_xprt		*bc_xprt;
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 06b52e465f4..b35048fabe2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -487,6 +487,10 @@ svc_destroy(struct svc_serv *serv)
 	if (svc_serv_is_pooled(serv))
 		svc_pool_map_put();
 
+#if defined(CONFIG_NFS_V4_1)
+	svc_sock_destroy(serv->bc_xprt);
+#endif /* CONFIG_NFS_V4_1 */
+
 	svc_unregister(serv);
 	kfree(serv->sv_pools);
 	kfree(serv);
-- 
cgit v1.2.3-70-g09d2


From dd2b63d049480979016b959abc2d141cdddb1389 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:28 -0400
Subject: nfs41: Rename rq_received to rq_reply_bytes_recvd

The 'rq_received' member of 'struct rpc_rqst' is used to track when we
have received a reply to our request.  With v4.1, the backchannel
can now accept callback requests over the existing connection.  Rename
this field to make it clear that it is only used for tracking reply bytes
and not all bytes received on the connection.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 include/linux/sunrpc/xprt.h   |  3 ++-
 net/sunrpc/backchannel_rqst.c |  2 +-
 net/sunrpc/clnt.c             |  8 ++++----
 net/sunrpc/stats.c            |  2 +-
 net/sunrpc/xprt.c             | 15 ++++++++-------
 5 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 55c6c37e249..1175d58efc2 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -67,7 +67,8 @@ struct rpc_rqst {
 	struct rpc_task *	rq_task;	/* RPC task data */
 	__be32			rq_xid;		/* request XID */
 	int			rq_cong;	/* has incremented xprt->cong */
-	int			rq_received;	/* receive completed */
+	int			rq_reply_bytes_recvd;	/* number of reply */
+							/* bytes received */
 	u32			rq_seqno;	/* gss seq no. used on req. */
 	int			rq_enc_pages_num;
 	struct page		**rq_enc_pages;	/* scratch pages for use by
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index f56e18a2349..5a7d342e308 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -230,7 +230,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt)
 
 	if (req != NULL) {
 		set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
-		req->rq_received = 0;
+		req->rq_reply_bytes_recvd = 0;
 		req->rq_bytes_sent = 0;
 		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
 			sizeof(req->rq_private_buf));
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f3e93b8eb90..5bc2f45bddf 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1258,8 +1258,8 @@ call_status(struct rpc_task *task)
 	struct rpc_rqst	*req = task->tk_rqstp;
 	int		status;
 
-	if (req->rq_received > 0 && !req->rq_bytes_sent)
-		task->tk_status = req->rq_received;
+	if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
+		task->tk_status = req->rq_reply_bytes_recvd;
 
 	dprint_status(task);
 
@@ -1376,7 +1376,7 @@ call_decode(struct rpc_task *task)
 
 	/*
 	 * Ensure that we see all writes made by xprt_complete_rqst()
-	 * before it changed req->rq_received.
+	 * before it changed req->rq_reply_bytes_recvd.
 	 */
 	smp_rmb();
 	req->rq_rcv_buf.len = req->rq_private_buf.len;
@@ -1417,7 +1417,7 @@ out_retry:
 	task->tk_status = 0;
 	/* Note: rpc_verify_header() may have freed the RPC slot */
 	if (task->tk_rqstp == req) {
-		req->rq_received = req->rq_rcv_buf.len = 0;
+		req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
 		if (task->tk_client->cl_discrtry)
 			xprt_conditional_disconnect(task->tk_xprt,
 					req->rq_connect_cookie);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 8487aa0f1f5..1b4e6791ecf 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -156,7 +156,7 @@ void rpc_count_iostats(struct rpc_task *task)
 	op_metrics->om_timeouts += task->tk_timeouts;
 
 	op_metrics->om_bytes_sent += task->tk_bytes_sent;
-	op_metrics->om_bytes_recv += req->rq_received;
+	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
 	queue = (long)req->rq_xtime - task->tk_start;
 	if (queue < 0)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c144611223f..f412a852bc7 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -806,9 +806,10 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 
 	list_del_init(&req->rq_list);
 	req->rq_private_buf.len = copied;
-	/* Ensure all writes are done before we update req->rq_received */
+	/* Ensure all writes are done before we update */
+	/* req->rq_reply_bytes_recvd */
 	smp_wmb();
-	req->rq_received = copied;
+	req->rq_reply_bytes_recvd = copied;
 	rpc_wake_up_queued_task(&xprt->pending, task);
 }
 EXPORT_SYMBOL_GPL(xprt_complete_rqst);
@@ -823,7 +824,7 @@ static void xprt_timer(struct rpc_task *task)
 	dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
 
 	spin_lock_bh(&xprt->transport_lock);
-	if (!req->rq_received) {
+	if (!req->rq_reply_bytes_recvd) {
 		if (xprt->ops->timer)
 			xprt->ops->timer(task);
 	} else
@@ -845,8 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
 	dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
 
 	spin_lock_bh(&xprt->transport_lock);
-	if (req->rq_received && !req->rq_bytes_sent) {
-		err = req->rq_received;
+	if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) {
+		err = req->rq_reply_bytes_recvd;
 		goto out_unlock;
 	}
 	if (!xprt->ops->reserve_xprt(task))
@@ -875,7 +876,7 @@ void xprt_transmit(struct rpc_task *task)
 
 	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
-	if (!req->rq_received) {
+	if (!req->rq_reply_bytes_recvd) {
 		if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
 			/*
 			 * Add to the list only if we're expecting a reply
@@ -914,7 +915,7 @@ void xprt_transmit(struct rpc_task *task)
 	/* Don't race with disconnect */
 	if (!xprt_connected(xprt))
 		task->tk_status = -ENOTCONN;
-	else if (!req->rq_received && rpc_reply_expected(task)) {
+	else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) {
 		/*
 		 * Sleep on the pending queue since
 		 * we're expecting a reply.
-- 
cgit v1.2.3-70-g09d2


From f8625a6a4bb76207302be58453603d8e324df490 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Wed, 1 Apr 2009 09:23:33 -0400
Subject: nfs41: Backchannel: Add a backchannel slot table to the session

Defines a new 'struct nfs4_slot_table' in the 'struct nfs4_session'
for use by the backchannel.  Initializes, resets, and destroys the backchannel
slot table in the same manner the forechannel slot table is initialized,
reset, and destroyed.

The sequenceid for each slot in the backchannel slot table is initialized
to 0, whereas the forechannel slotid's sequenceid is set to 1.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfs/nfs4proc.c         | 48 +++++++++++++++++++++++++++++++++++------------
 include/linux/nfs_fs_sb.h |  2 +-
 2 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c3019ad8589..57dabb8a048 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4414,9 +4414,30 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session)
 			session->fc_attrs.max_reqs,
 			session->fc_slot_table.max_slots,
 			1);
+	if (status)
+		return status;
+
+	status = nfs4_reset_slot_table(&session->bc_slot_table,
+			session->bc_attrs.max_reqs,
+			session->bc_slot_table.max_slots,
+			0);
 	return status;
 }
 
+/* Destroy the slot table */
+static void nfs4_destroy_slot_tables(struct nfs4_session *session)
+{
+	if (session->fc_slot_table.slots != NULL) {
+		kfree(session->fc_slot_table.slots);
+		session->fc_slot_table.slots = NULL;
+	}
+	if (session->bc_slot_table.slots != NULL) {
+		kfree(session->bc_slot_table.slots);
+		session->bc_slot_table.slots = NULL;
+	}
+	return;
+}
+
 /*
  * Initialize slot table
  */
@@ -4470,17 +4491,15 @@ static int nfs4_init_slot_tables(struct nfs4_session *session)
 
 	status = nfs4_init_slot_table(&session->fc_slot_table,
 			session->fc_attrs.max_reqs, 1);
-	return status;
-}
+	if (status)
+		return status;
 
-/* Destroy the slot table */
-static void nfs4_destroy_slot_table(struct nfs4_session *session)
-{
-	if (session->fc_slot_table.slots == NULL)
-		return;
-	kfree(session->fc_slot_table.slots);
-	session->fc_slot_table.slots = NULL;
-	return;
+	status = nfs4_init_slot_table(&session->bc_slot_table,
+			session->bc_attrs.max_reqs, 0);
+	if (status)
+		nfs4_destroy_slot_tables(session);
+
+	return status;
 }
 
 struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
@@ -4503,7 +4522,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 
 	tbl = &session->fc_slot_table;
 	spin_lock_init(&tbl->slot_tbl_lock);
-	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "Slot table");
+	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+
+	tbl = &session->bc_slot_table;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+
 	session->clp = clp;
 	return session;
 }
@@ -4515,7 +4539,7 @@ void nfs4_destroy_session(struct nfs4_session *session)
 		__func__, session->clp->cl_rpcclient->cl_xprt);
 	xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt,
 				NFS41_BC_MIN_CALLBACKS);
-	nfs4_destroy_slot_table(session);
+	nfs4_destroy_slot_tables(session);
 	kfree(session);
 }
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d0902ccec9c..19fe15d1204 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -205,7 +205,7 @@ struct nfs4_session {
 	struct nfs4_channel_attrs	fc_attrs;
 	struct nfs4_slot_table		fc_slot_table;
 	struct nfs4_channel_attrs	bc_attrs;
-					/* back channel has one slot */
+	struct nfs4_slot_table		bc_slot_table;
 	struct nfs_client		*clp;
 };
 
-- 
cgit v1.2.3-70-g09d2


From cdc2ae6d6a30df8fd92c5e300d0e3005e13eb6b0 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Thu, 18 Jun 2009 08:46:47 +1000
Subject: md: fix some comments.

1/ Raid5 has learned to take over also raid4 and raid6 arrays.
2/ new_chunk in mdp_superblock_1 is in sectors, not bytes.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid5.c        | 2 --
 include/linux/raid/md_p.h | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 72e8a324dcf..00934415675 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5231,8 +5231,6 @@ static void *raid5_takeover(mddev_t *mddev)
 	 *  raid1 - if there are two drives.  We need to know the chunk size
 	 *  raid4 - trivial - just use a raid4 layout.
 	 *  raid6 - Providing it is a *_6 layout
-	 *
-	 * For now, just do raid1
 	 */
 
 	if (mddev->level == 1)
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 6ba830fa853..ffa2efbbe38 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -232,7 +232,7 @@ struct mdp_superblock_1 {
 	__le64	reshape_position;	/* next address in array-space for reshape */
 	__le32	delta_disks;	/* change in number of raid_disks		*/
 	__le32	new_layout;	/* new layout					*/
-	__le32	new_chunk;	/* new chunk size (bytes)			*/
+	__le32	new_chunk;	/* new chunk size (512byte sectors)		*/
 	__u8	pad1[128-124];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
-- 
cgit v1.2.3-70-g09d2


From 6c9dc4255108bab4ef5c177d369b99c3c23492a7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 17 Jun 2009 18:02:10 -0700
Subject: lockd: Update NSM state from SM_MON replies

When rpc.statd starts up in user space at boot time, it attempts to
write the latest NSM local state number into
/proc/sys/fs/nfs/nsm_local_state.

If lockd.ko isn't loaded yet (as is the case in most configurations),
that file doesn't exist, thus the kernel's NSM state remains set to
its initial value of zero during lockd operation.

This is a problem because rpc.statd and lockd use the NSM state number
to prevent repeated lock recovery on rebooted hosts.  If lockd sends
a zero NSM state, but then a delayed SM_NOTIFY with a real NSM state
number is received, there is no way for lockd or rpc.statd to
distinguish that stale SM_NOTIFY from an actual reboot.  Thus lock
recovery could be performed after the rebooted host has already
started reclaiming locks, and those locks will be lost.

We could change /etc/init.d/nfslock so it always modprobes lockd.ko
before starting rpc.statd.  However, if lockd.ko is ever unloaded
and reloaded, we are back at square one, since the NSM state is not
preserved across an unload/reload cycle.  This may happen frequently
on clients that use automounter.  A period of NFS inactivity causes
lockd.ko to be unloaded, and the kernel loses its NSM state setting.

Instead, let's use the fact that rpc.statd plants the local system's
NSM state in every SM_MON (and SM_UNMON) reply.  lockd performs a
synchronous SM_MON upcall to the local rpc.statd _before_ sending its
first NLM request to a new remote.  This would permit rpc.statd to
provide the current NSM state to lockd, even after lockd.ko had been
unloaded and reloaded.

Note that NLMPROC_LOCK arguments are constructed before the
nsm_monitor() call, so we have to rearrange argument construction very
slightly to make this all work out.

And, the kernel appears to treat NSM state as a u32 (see struct
nlm_args and nsm_res).  Make nsm_local_state a u32 as well, to ensure
we don't get bogus comparison results.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         |  2 +-
 fs/lockd/mon.c              | 18 ++++++++++++------
 include/linux/lockd/lockd.h |  2 +-
 3 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 273e229353f..f2fdcbce143 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -126,7 +126,6 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_lock	*lock = &argp->lock;
 
 	nlmclnt_next_cookie(&argp->cookie);
-	argp->state   = nsm_local_state;
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = utsname()->nodename;
 	lock->oh.data = req->a_owner;
@@ -521,6 +520,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 
 	if (nsm_monitor(host) < 0)
 		goto out;
+	req->a_args.state = nsm_local_state;
 
 	fl->fl_flags |= FL_ACCESS;
 	status = do_vfs_lock(fl);
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 6d5d4a4169e..38385336614 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -53,7 +53,7 @@ static				DEFINE_SPINLOCK(nsm_lock);
 /*
  * Local NSM state
  */
-int	__read_mostly		nsm_local_state;
+u32	__read_mostly		nsm_local_state;
 int	__read_mostly		nsm_use_hostnames;
 
 static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
@@ -184,13 +184,19 @@ int nsm_monitor(const struct nlm_host *host)
 	nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
 
 	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res);
-	if (res.status != 0)
+	if (unlikely(res.status != 0))
 		status = -EIO;
-	if (status < 0)
+	if (unlikely(status < 0)) {
 		printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name);
-	else
-		nsm->sm_monitored = 1;
-	return status;
+		return status;
+	}
+
+	nsm->sm_monitored = 1;
+	if (unlikely(nsm_local_state != res.state)) {
+		nsm_local_state = res.state;
+		dprintk("lockd: NSM state changed to %d\n", nsm_local_state);
+	}
+	return 0;
 }
 
 /**
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 51855dfd8ad..c325b187966 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -195,7 +195,7 @@ extern struct svc_procedure	nlmsvc_procedures4[];
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
 extern int			nsm_use_hostnames;
-extern int			nsm_local_state;
+extern u32			nsm_local_state;
 
 /*
  * Lockd client functions
-- 
cgit v1.2.3-70-g09d2


From 2ad780978b7c0c3e7877949f098cbd06e7c73839 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 17 Jun 2009 18:02:11 -0700
Subject: NFS: Clean up MNT program definitions

Clean up:  Relocate MNT program procedure number definitions to the
only file that uses them.  Relocate the version number definitions,
which are shared, to nfs.h.  Remove duplicate program number
definitions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/mount_clnt.c  | 32 ++++++++++++++++++++++++++++----
 fs/nfs/nfsroot.c     |  3 +++
 include/linux/nfs.h  |  5 +++--
 include/linux/nfs2.h |  7 -------
 include/linux/nfs3.h |  5 -----
 5 files changed, 34 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca905a5bb1b..af45a374d56 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -20,6 +20,30 @@
 # define NFSDBG_FACILITY	NFSDBG_MOUNT
 #endif
 
+/*
+ * Defined by RFC 1094, section A.5
+ */
+enum {
+	MOUNTPROC_NULL		= 0,
+	MOUNTPROC_MNT		= 1,
+	MOUNTPROC_DUMP		= 2,
+	MOUNTPROC_UMNT		= 3,
+	MOUNTPROC_UMNTALL	= 4,
+	MOUNTPROC_EXPORT	= 5,
+};
+
+/*
+ * Defined by RFC 1813, section 5.2
+ */
+enum {
+	MOUNTPROC3_NULL		= 0,
+	MOUNTPROC3_MNT		= 1,
+	MOUNTPROC3_DUMP		= 2,
+	MOUNTPROC3_UMNT		= 3,
+	MOUNTPROC3_UMNTALL	= 4,
+	MOUNTPROC3_EXPORT	= 5,
+};
+
 static struct rpc_program	mnt_program;
 
 struct mnt_fhstatus {
@@ -68,7 +92,7 @@ int nfs_mount(struct nfs_mount_request *info)
 	if (info->version == NFS_MNT3_VERSION)
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
 	else
-		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
 
 	status = rpc_call_sync(mnt_clnt, &msg, 0);
 	rpc_shutdown_client(mnt_clnt);
@@ -145,13 +169,13 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
 #define MNT_fhstatus3_sz	(1 + 16)
 
 static struct rpc_procinfo mnt_procedures[] = {
-	[MNTPROC_MNT] = {
-		.p_proc		= MNTPROC_MNT,
+	[MOUNTPROC_MNT] = {
+		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
 		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus,
 		.p_arglen	= MNT_dirpath_sz,
 		.p_replen	= MNT_fhstatus_sz,
-		.p_statidx	= MNTPROC_MNT,
+		.p_statidx	= MOUNTPROC_MNT,
 		.p_name		= "MOUNT",
 	},
 };
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index e3ed5908820..24c1b93874c 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -92,6 +92,9 @@
 #undef NFSROOT_DEBUG
 #define NFSDBG_FACILITY NFSDBG_ROOT
 
+/* Default port to use if server is not running a portmapper */
+#define NFS_MNT_PORT	627
+
 /* Default path we try to mount. "%s" gets replaced by our IP address */
 #define NFS_ROOT		"/tftpboot/%s"
 
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 214d499718f..f387919bbc5 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -25,8 +25,9 @@
 #define NFSMODE_SOCK	0140000
 #define NFSMODE_FIFO	0010000
 
-#define NFS_MNT_PROGRAM	100005
-#define NFS_MNT_PORT	627
+#define NFS_MNT_PROGRAM		100005
+#define NFS_MNT_VERSION		1
+#define NFS_MNT3_VERSION	3
 
 /*
  * NFS stats. The good thing with these values is that NFSv3 errors are
diff --git a/include/linux/nfs2.h b/include/linux/nfs2.h
index 0ed9517138f..fde24b30cc9 100644
--- a/include/linux/nfs2.h
+++ b/include/linux/nfs2.h
@@ -64,11 +64,4 @@ struct nfs2_fh {
 #define NFSPROC_READDIR		16
 #define NFSPROC_STATFS		17
 
-#define NFS_MNT_PROGRAM		100005
-#define NFS_MNT_VERSION		1
-#define MNTPROC_NULL		0
-#define MNTPROC_MNT		1
-#define MNTPROC_UMNT		3
-#define MNTPROC_UMNTALL		4
-
 #endif /* _LINUX_NFS2_H */
diff --git a/include/linux/nfs3.h b/include/linux/nfs3.h
index 539f3b550ea..ac33806ec7f 100644
--- a/include/linux/nfs3.h
+++ b/include/linux/nfs3.h
@@ -88,12 +88,7 @@ struct nfs3_fh {
 #define NFS3PROC_PATHCONF	20
 #define NFS3PROC_COMMIT		21
 
-#define NFS_MNT3_PROGRAM	100005
 #define NFS_MNT3_VERSION	3
-#define MOUNTPROC3_NULL		0
-#define MOUNTPROC3_MNT		1
-#define MOUNTPROC3_UMNT		3
-#define MOUNTPROC3_UMNTALL	4
  
 
 #if defined(__KERNEL__)
-- 
cgit v1.2.3-70-g09d2


From ce597bb42aa84bc73db80509b7c37e7fbc0b75c4 Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:09 +0000
Subject: ACPI: make acpi_pci_bind() static

acpi_pci_root_add() explicitly assigns device->ops.bind, and later
calls acpi_pci_bind_root(), which also does the same thing.

We don't need to repeat ourselves; removing the explicit assignment
allows us to make acpi_pci_bind() static.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_bind.c     | 3 ++-
 drivers/acpi/pci_root.c     | 2 --
 include/acpi/acpi_drivers.h | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index bc46de3d967..236765c6017 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -44,6 +44,7 @@ struct acpi_pci_data {
 	struct pci_dev *dev;
 };
 
+static int acpi_pci_bind(struct acpi_device *device);
 static int acpi_pci_unbind(struct acpi_device *device);
 
 static void acpi_pci_data_handler(acpi_handle handle, u32 function,
@@ -108,7 +109,7 @@ acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id)
 
 EXPORT_SYMBOL(acpi_get_pci_id);
 
-int acpi_pci_bind(struct acpi_device *device)
+static int acpi_pci_bind(struct acpi_device *device)
 {
 	int result = 0;
 	acpi_status status;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 196f97d0095..ca8dba3b40b 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -386,8 +386,6 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
 	device->driver_data = root;
 
-	device->ops.bind = acpi_pci_bind;
-
 	/*
 	 * All supported architectures that use ACPI have support for
 	 * PCI domains, so we indicate this in _OSC support capabilities.
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 0352c8f0b05..7a2ce53146a 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -99,7 +99,6 @@ void acpi_pci_irq_del_prt(int segment, int bus);
 struct pci_bus;
 
 acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id);
-int acpi_pci_bind(struct acpi_device *device);
 int acpi_pci_bind_root(struct acpi_device *device, struct acpi_pci_id *id,
 		       struct pci_bus *bus);
 
-- 
cgit v1.2.3-70-g09d2


From 275582031f9b3597a1b973f3ff617adfe639faa2 Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:14 +0000
Subject: ACPI: Introduce acpi_is_root_bridge()

Returns whether an ACPI CA node is a PCI root bridge or not.

This API is generically useful, and shouldn't just be a hotplug function.

The implementation becomes much simpler as well.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_root.c            | 24 +++++++++++++++++++++++
 drivers/pci/hotplug/acpi_pcihp.c   | 40 ++------------------------------------
 drivers/pci/hotplug/acpiphp_glue.c |  2 +-
 include/acpi/acpi_bus.h            |  1 +
 include/linux/pci_hotplug.h        |  1 -
 5 files changed, 28 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index ca8dba3b40b..888cb9f5c5f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -142,6 +142,30 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus)
 
 EXPORT_SYMBOL_GPL(acpi_get_pci_rootbridge_handle);
 
+/**
+ * acpi_is_root_bridge - determine whether an ACPI CA node is a PCI root bridge
+ * @handle - the ACPI CA node in question.
+ *
+ * Note: we could make this API take a struct acpi_device * instead, but
+ * for now, it's more convenient to operate on an acpi_handle.
+ */
+int acpi_is_root_bridge(acpi_handle handle)
+{
+	int ret;
+	struct acpi_device *device;
+
+	ret = acpi_bus_get_device(handle, &device);
+	if (ret)
+		return 0;
+
+	ret = acpi_match_device_ids(device, root_device_ids);
+	if (ret)
+		return 0;
+	else
+		return 1;
+}
+EXPORT_SYMBOL_GPL(acpi_is_root_bridge);
+
 static acpi_status
 get_root_bridge_busnr_callback(struct acpi_resource *resource, void *data)
 {
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index fbc63d5e459..eb159587d0b 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -354,7 +354,7 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 		status = acpi_run_hpp(handle, hpp);
 		if (ACPI_SUCCESS(status))
 			break;
-		if (acpi_root_bridge(handle))
+		if (acpi_is_root_bridge(handle))
 			break;
 		status = acpi_get_parent(handle, &phandle);
 		if (ACPI_FAILURE(status))
@@ -428,7 +428,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
 		status = acpi_run_oshp(handle);
 		if (ACPI_SUCCESS(status))
 			goto got_one;
-		if (acpi_root_bridge(handle))
+		if (acpi_is_root_bridge(handle))
 			break;
 		chandle = handle;
 		status = acpi_get_parent(chandle, &handle);
@@ -449,42 +449,6 @@ got_one:
 }
 EXPORT_SYMBOL(acpi_get_hp_hw_control_from_firmware);
 
-/* acpi_root_bridge - check to see if this acpi object is a root bridge
- *
- * @handle - the acpi object in question.
- */
-int acpi_root_bridge(acpi_handle handle)
-{
-	acpi_status status;
-	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-	int i;
-
-	status = acpi_get_object_info(handle, &buffer);
-	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
-		if ((info->valid & ACPI_VALID_HID) &&
-			!strcmp(PCI_ROOT_HID_STRING,
-					info->hardware_id.value)) {
-			kfree(buffer.pointer);
-			return 1;
-		}
-		if (info->valid & ACPI_VALID_CID) {
-			for (i=0; i < info->compatibility_id.count; i++) {
-				if (!strcmp(PCI_ROOT_HID_STRING,
-					info->compatibility_id.id[i].value)) {
-					kfree(buffer.pointer);
-					return 1;
-				}
-			}
-		}
-		kfree(buffer.pointer);
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(acpi_root_bridge);
-
-
 static int is_ejectable(acpi_handle handle)
 {
 	acpi_status status;
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 3a6064bce56..fc6636e3300 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -1631,7 +1631,7 @@ find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
 	int *count = (int *)context;
 
-	if (acpi_root_bridge(handle)) {
+	if (acpi_is_root_bridge(handle)) {
 		acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
 				handle_hotplug_event_bridge, NULL);
 			(*count)++;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c34b1102290..96d593ee485 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -369,6 +369,7 @@ struct device *acpi_get_physical_pci_device(acpi_handle);
 
 /* helper */
 acpi_handle acpi_get_child(acpi_handle, acpi_integer);
+int acpi_is_root_bridge(acpi_handle);
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
 
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 20998746518..a3576ef9fc7 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -226,7 +226,6 @@ struct hotplug_params {
 extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
-int acpi_root_bridge(acpi_handle handle);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(struct pci_bus *pbus);
 #endif
-- 
cgit v1.2.3-70-g09d2


From 2f7bbceb5b6aa938024bb4dad93c410fa59ed3b9 Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:20 +0000
Subject: ACPI: Introduce acpi_get_pci_dev()

Convert an ACPI CA handle to a struct pci_dev.

Performing this lookup dynamically allows us to get rid of the
ACPI-PCI binding code, which:

	- eliminates struct acpi_device vs struct pci_dev lifetime issues
	- lays more groundwork for eliminating .start from acpi_device_ops
	  and thus simplifying ACPI drivers
	- whacks out a lot of code

This change lays the groundwork for eliminating much of pci_bind.c.

Although pci_root.c may not be the most logical place for this
change, putting it here saves us from having to export acpi_pci_find_root.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_root.c     | 81 +++++++++++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_drivers.h |  1 +
 2 files changed, 82 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 888cb9f5c5f..e5099919e57 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -329,6 +329,87 @@ static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
 	return NULL;
 }
 
+struct acpi_handle_node {
+	struct list_head node;
+	acpi_handle handle;
+};
+
+/**
+ * acpi_get_pci_dev - convert ACPI CA handle to struct pci_dev
+ * @handle: the handle in question
+ *
+ * Given an ACPI CA handle, the desired PCI device is located in the
+ * list of PCI devices.
+ *
+ * If the device is found, its reference count is increased and this
+ * function returns a pointer to its data structure.  The caller must
+ * decrement the reference count by calling pci_dev_put().
+ * If no device is found, %NULL is returned.
+ */
+struct pci_dev *acpi_get_pci_dev(acpi_handle handle)
+{
+	int dev, fn;
+	unsigned long long adr;
+	acpi_status status;
+	acpi_handle phandle;
+	struct pci_bus *pbus;
+	struct pci_dev *pdev = NULL;
+	struct acpi_handle_node *node, *tmp;
+	struct acpi_pci_root *root;
+	LIST_HEAD(device_list);
+
+	/*
+	 * Walk up the ACPI CA namespace until we reach a PCI root bridge.
+	 */
+	phandle = handle;
+	while (!acpi_is_root_bridge(phandle)) {
+		node = kzalloc(sizeof(struct acpi_handle_node), GFP_KERNEL);
+		if (!node)
+			goto out;
+
+		INIT_LIST_HEAD(&node->node);
+		node->handle = phandle;
+		list_add(&node->node, &device_list);
+
+		status = acpi_get_parent(phandle, &phandle);
+		if (ACPI_FAILURE(status))
+			goto out;
+	}
+
+	root = acpi_pci_find_root(phandle);
+	if (!root)
+		goto out;
+
+	pbus = root->bus;
+
+	/*
+	 * Now, walk back down the PCI device tree until we return to our
+	 * original handle. Assumes that everything between the PCI root
+	 * bridge and the device we're looking for must be a P2P bridge.
+	 */
+	list_for_each_entry(node, &device_list, node) {
+		acpi_handle hnd = node->handle;
+		status = acpi_evaluate_integer(hnd, "_ADR", NULL, &adr);
+		if (ACPI_FAILURE(status))
+			goto out;
+		dev = (adr >> 16) & 0xffff;
+		fn  = adr & 0xffff;
+
+		pdev = pci_get_slot(pbus, PCI_DEVFN(dev, fn));
+		if (hnd == handle)
+			break;
+
+		pbus = pdev->subordinate;
+		pci_dev_put(pdev);
+	}
+out:
+	list_for_each_entry_safe(node, tmp, &device_list, node)
+		kfree(node);
+
+	return pdev;
+}
+EXPORT_SYMBOL_GPL(acpi_get_pci_dev);
+
 /**
  * acpi_pci_osc_control_set - commit requested control to Firmware
  * @handle: acpi_handle for the target ACPI object
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 7a2ce53146a..dbe3989952e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -98,6 +98,7 @@ void acpi_pci_irq_del_prt(int segment, int bus);
 
 struct pci_bus;
 
+struct pci_dev *acpi_get_pci_dev(acpi_handle);
 acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id);
 int acpi_pci_bind_root(struct acpi_device *device, struct acpi_pci_id *id,
 		       struct pci_bus *bus);
-- 
cgit v1.2.3-70-g09d2


From 499650de6906722184b639989b47227a362b62f8 Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:30 +0000
Subject: ACPI: eviscerate pci_bind.c

Now that we can dynamically convert an ACPI CA handle to a
struct pci_dev at runtime, there's no need to statically bind
them during boot.

acpi_pci_bind/unbind are vastly simplified, and are only used
to evaluate _PRT methods on P2P bridges and non-bridge children.

This patch also changes the time-space tradeoff ever so slightly.

Looking up the ACPI-PCI binding is never in the performance path, and by
eliminating this caching, we save 24 bytes for each _ADR device in the
ACPI namespace.

This patch lays further groundwork to eventually eliminate
the acpi_driver_ops.bind callback.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_bind.c     | 245 +++++++-------------------------------------
 drivers/acpi/pci_root.c     |   2 +-
 include/acpi/acpi_drivers.h |   3 +-
 3 files changed, 39 insertions(+), 211 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index c283c29662a..703d2a3e801 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -24,12 +24,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/spinlock.h>
-#include <linux/pm.h>
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
@@ -111,238 +106,72 @@ EXPORT_SYMBOL(acpi_get_pci_id);
 
 static int acpi_pci_unbind(struct acpi_device *device)
 {
-	int result = 0;
-	acpi_status status;
-	struct acpi_pci_data *data;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-
-
-	if (!device || !device->parent)
-		return -EINVAL;
-
-	status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unbinding PCI device [%s]...\n",
-			  (char *) buffer.pointer));
-	kfree(buffer.pointer);
+	struct pci_dev *dev;
 
-	status =
-	    acpi_get_data(device->handle, acpi_pci_data_handler,
-			  (void **)&data);
-	if (ACPI_FAILURE(status)) {
-		result = -ENODEV;
-		goto end;
-	}
+	dev = acpi_get_pci_dev(device->handle);
+	if (!dev)
+		return 0;
 
-	status = acpi_detach_data(device->handle, acpi_pci_data_handler);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, status,
-				"Unable to detach data from device %s",
-				acpi_device_bid(device)));
-		result = -ENODEV;
-		goto end;
-	}
-	if (data->dev->subordinate) {
-		acpi_pci_irq_del_prt(data->id.segment, data->bus->number);
-	}
-	pci_dev_put(data->dev);
-	kfree(data);
+	if (dev->subordinate)
+		acpi_pci_irq_del_prt(pci_domain_nr(dev->bus),
+				     dev->subordinate->number);
 
-      end:
-	return result;
+	pci_dev_put(dev);
+	return 0;
 }
 
 static int acpi_pci_bind(struct acpi_device *device)
 {
-	int result = 0;
 	acpi_status status;
-	struct acpi_pci_data *data;
-	struct acpi_pci_data *pdata;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	acpi_handle handle;
+	unsigned char bus;
+	struct pci_dev *dev;
 
-	if (!device || !device->parent)
-		return -EINVAL;
-
-	data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
-	if (ACPI_FAILURE(status)) {
-		kfree(data);
-		return -ENODEV;
-	}
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI device [%s]...\n",
-			  (char *)buffer.pointer));
-
-	/* 
-	 * Segment & Bus
-	 * -------------
-	 * These are obtained via the parent device's ACPI-PCI context.
-	 */
-	status = acpi_get_data(device->parent->handle, acpi_pci_data_handler,
-			       (void **)&pdata);
-	if (ACPI_FAILURE(status) || !pdata || !pdata->bus) {
-		ACPI_EXCEPTION((AE_INFO, status,
-				"Invalid ACPI-PCI context for parent device %s",
-				acpi_device_bid(device->parent)));
-		result = -ENODEV;
-		goto end;
-	}
-	data->id.segment = pdata->id.segment;
-	data->id.bus = pdata->bus->number;
-
-	/*
-	 * Device & Function
-	 * -----------------
-	 * These are simply obtained from the device's _ADR method.  Note
-	 * that a value of zero is valid.
-	 */
-	data->id.device = device->pnp.bus_address >> 16;
-	data->id.function = device->pnp.bus_address & 0xFFFF;
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "...to %04x:%02x:%02x.%d\n",
-			  data->id.segment, data->id.bus, data->id.device,
-			  data->id.function));
-
-	/*
-	 * TBD: Support slot devices (e.g. function=0xFFFF).
-	 */
-
-	/* 
-	 * Locate PCI Device
-	 * -----------------
-	 * Locate matching device in PCI namespace.  If it doesn't exist
-	 * this typically means that the device isn't currently inserted
-	 * (e.g. docking station, port replicator, etc.).
-	 */
-	data->dev = pci_get_slot(pdata->bus,
-				PCI_DEVFN(data->id.device, data->id.function));
-	if (!data->dev) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Device %04x:%02x:%02x.%d not present in PCI namespace\n",
-				  data->id.segment, data->id.bus,
-				  data->id.device, data->id.function));
-		result = -ENODEV;
-		goto end;
-	}
-	if (!data->dev->bus) {
-		printk(KERN_ERR PREFIX
-			    "Device %04x:%02x:%02x.%d has invalid 'bus' field\n",
-			    data->id.segment, data->id.bus,
-			    data->id.device, data->id.function);
-		result = -ENODEV;
-		goto end;
-	}
+	dev = acpi_get_pci_dev(device->handle);
+	if (!dev)
+		return 0;
 
 	/*
-	 * PCI Bridge?
-	 * -----------
-	 * If so, set the 'bus' field and install the 'bind' function to 
-	 * facilitate callbacks for all of its children.
+	 * Install the 'bind' function to facilitate callbacks for
+	 * children of the P2P bridge.
 	 */
-	if (data->dev->subordinate) {
+	if (dev->subordinate) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 				  "Device %04x:%02x:%02x.%d is a PCI bridge\n",
-				  data->id.segment, data->id.bus,
-				  data->id.device, data->id.function));
-		data->bus = data->dev->subordinate;
+				  pci_domain_nr(dev->bus), dev->bus->number,
+				  PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)));
 		device->ops.bind = acpi_pci_bind;
 		device->ops.unbind = acpi_pci_unbind;
 	}
 
 	/*
-	 * Attach ACPI-PCI Context
-	 * -----------------------
-	 * Thus binding the ACPI and PCI devices.
-	 */
-	status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, status,
-				"Unable to attach ACPI-PCI context to device %s",
-				acpi_device_bid(device)));
-		result = -ENODEV;
-		goto end;
-	}
-
-	/*
-	 * PCI Routing Table
-	 * -----------------
-	 * Evaluate and parse _PRT, if exists.  This code is independent of 
-	 * PCI bridges (above) to allow parsing of _PRT objects within the
-	 * scope of non-bridge devices.  Note that _PRTs within the scope of
-	 * a PCI bridge assume the bridge's subordinate bus number.
+	 * Evaluate and parse _PRT, if exists.  This code allows parsing of
+	 * _PRT objects within the scope of non-bridge devices.  Note that
+	 * _PRTs within the scope of a PCI bridge assume the bridge's
+	 * subordinate bus number.
 	 *
 	 * TBD: Can _PRTs exist within the scope of non-bridge PCI devices?
 	 */
 	status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
-	if (ACPI_SUCCESS(status)) {
-		if (data->bus)	/* PCI-PCI bridge */
-			acpi_pci_irq_add_prt(device->handle, data->id.segment,
-					     data->bus->number);
-		else		/* non-bridge PCI device */
-			acpi_pci_irq_add_prt(device->handle, data->id.segment,
-					     data->id.bus);
-	}
-
-      end:
-	kfree(buffer.pointer);
-	if (result) {
-		pci_dev_put(data->dev);
-		kfree(data);
-	}
-	return result;
-}
+	if (ACPI_FAILURE(status))
+		goto out;
 
-int
-acpi_pci_bind_root(struct acpi_device *device,
-		   struct acpi_pci_id *id, struct pci_bus *bus)
-{
-	int result = 0;
-	acpi_status status;
-	struct acpi_pci_data *data = NULL;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	if (dev->subordinate)
+		bus = dev->subordinate->number;
+	else
+		bus = dev->bus->number;
 
-	if (!device || !id || !bus) {
-		return -EINVAL;
-	}
+	acpi_pci_irq_add_prt(device->handle, pci_domain_nr(dev->bus), bus);
 
-	data = kzalloc(sizeof(struct acpi_pci_data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
+out:
+	pci_dev_put(dev);
+	return 0;
+}
 
-	data->id = *id;
-	data->bus = bus;
+int acpi_pci_bind_root(struct acpi_device *device)
+{
 	device->ops.bind = acpi_pci_bind;
 	device->ops.unbind = acpi_pci_unbind;
 
-	status = acpi_get_name(device->handle, ACPI_FULL_PATHNAME, &buffer);
-	if (ACPI_FAILURE(status)) {
-		kfree (data);
-		return -ENODEV;
-	}
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Binding PCI root bridge [%s] to "
-			"%04x:%02x\n", (char *)buffer.pointer,
-			id->segment, id->bus));
-
-	status = acpi_attach_data(device->handle, acpi_pci_data_handler, data);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, status,
-				"Unable to attach ACPI-PCI context to device %s",
-				(char *)buffer.pointer));
-		result = -ENODEV;
-		goto end;
-	}
-
-      end:
-	kfree(buffer.pointer);
-	if (result != 0)
-		kfree(data);
-
-	return result;
+	return 0;
 }
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index e5099919e57..f23fcc5c967 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -603,7 +603,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	 * -----------------------
 	 * Thus binding the ACPI and PCI devices.
 	 */
-	result = acpi_pci_bind_root(device, &root->id, root->bus);
+	result = acpi_pci_bind_root(device);
 	if (result)
 		goto end;
 
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index dbe3989952e..2740a289483 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -100,8 +100,7 @@ struct pci_bus;
 
 struct pci_dev *acpi_get_pci_dev(acpi_handle);
 acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id);
-int acpi_pci_bind_root(struct acpi_device *device, struct acpi_pci_id *id,
-		       struct pci_bus *bus);
+int acpi_pci_bind_root(struct acpi_device *device);
 
 /* Arch-defined function to add a bus to the system */
 
-- 
cgit v1.2.3-70-g09d2


From 859a3f86ca83346f4097e956d0b27d96aa7a1cff Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:35 +0000
Subject: ACPI: simplify acpi_pci_irq_add_prt() API

A PCI domain cannot change as you descend down subordinate buses, which
makes the 'segment' argument to acpi_pci_irq_add_prt() useless.

Change the interface to take a struct pci_bus *, from whence we can derive
the bus number and segment. Reducing the number of arguments makes life
simpler for callers.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_bind.c     |  8 ++++----
 drivers/acpi/pci_irq.c      | 10 +++++-----
 drivers/acpi/pci_root.c     |  3 +--
 include/acpi/acpi_drivers.h |  2 +-
 4 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index 703d2a3e801..6eb58ef366e 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -124,7 +124,7 @@ static int acpi_pci_bind(struct acpi_device *device)
 {
 	acpi_status status;
 	acpi_handle handle;
-	unsigned char bus;
+	struct pci_bus *bus;
 	struct pci_dev *dev;
 
 	dev = acpi_get_pci_dev(device->handle);
@@ -157,11 +157,11 @@ static int acpi_pci_bind(struct acpi_device *device)
 		goto out;
 
 	if (dev->subordinate)
-		bus = dev->subordinate->number;
+		bus = dev->subordinate;
 	else
-		bus = dev->bus->number;
+		bus = dev->bus;
 
-	acpi_pci_irq_add_prt(device->handle, pci_domain_nr(dev->bus), bus);
+	acpi_pci_irq_add_prt(device->handle, bus);
 
 out:
 	pci_dev_put(dev);
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 51b9f8280f8..3ed944cefb3 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -182,7 +182,7 @@ static void do_prt_fixups(struct acpi_prt_entry *entry,
 	}
 }
 
-static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
+static int acpi_pci_irq_add_entry(acpi_handle handle, struct pci_bus *bus,
 				  struct acpi_pci_routing_table *prt)
 {
 	struct acpi_prt_entry *entry;
@@ -196,8 +196,8 @@ static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
 	 * 1=INTA, 2=INTB.  We use the PCI encoding throughout, so convert
 	 * it here.
 	 */
-	entry->id.segment = segment;
-	entry->id.bus = bus;
+	entry->id.segment = pci_domain_nr(bus);
+	entry->id.bus = bus->number;
 	entry->id.device = (prt->address >> 16) & 0xFFFF;
 	entry->pin = prt->pin + 1;
 
@@ -242,7 +242,7 @@ static int acpi_pci_irq_add_entry(acpi_handle handle, int segment, int bus,
 	return 0;
 }
 
-int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
+int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus)
 {
 	acpi_status status;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -271,7 +271,7 @@ int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus)
 
 	entry = buffer.pointer;
 	while (entry && (entry->length > 0)) {
-		acpi_pci_irq_add_entry(handle, segment, bus, entry);
+		acpi_pci_irq_add_entry(handle, bus, entry);
 		entry = (struct acpi_pci_routing_table *)
 		    ((unsigned long)entry + entry->length);
 	}
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index f23fcc5c967..f341b0756c9 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -614,8 +614,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	 */
 	status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
 	if (ACPI_SUCCESS(status))
-		result = acpi_pci_irq_add_prt(device->handle, root->id.segment,
-					      root->id.bus);
+		result = acpi_pci_irq_add_prt(device->handle, root->bus);
 
 	/*
 	 * Scan and bind all _ADR-Based Devices
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 2740a289483..686a960bde3 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -91,7 +91,7 @@ int acpi_pci_link_free_irq(acpi_handle handle);
 
 /* ACPI PCI Interrupt Routing (pci_irq.c) */
 
-int acpi_pci_irq_add_prt(acpi_handle handle, int segment, int bus);
+int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus);
 void acpi_pci_irq_del_prt(int segment, int bus);
 
 /* ACPI PCI Device Binding (pci_bind.c) */
-- 
cgit v1.2.3-70-g09d2


From d9efae3688addb15994c9ad9761dada6f988bc14 Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:40 +0000
Subject: ACPI: simplify acpi_pci_irq_del_prt() API

There is no need to pass a segment/bus tuple to this API, as the callsite
always has a struct pci_bus. We can derive segment/bus from the
struct pci_bus, so let's take this opportunit to simplify the API and
make life easier for the callers.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_bind.c     | 3 +--
 drivers/acpi/pci_irq.c      | 7 ++++---
 include/acpi/acpi_drivers.h | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index 6eb58ef366e..62cb383222f 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -113,8 +113,7 @@ static int acpi_pci_unbind(struct acpi_device *device)
 		return 0;
 
 	if (dev->subordinate)
-		acpi_pci_irq_del_prt(pci_domain_nr(dev->bus),
-				     dev->subordinate->number);
+		acpi_pci_irq_del_prt(dev->subordinate);
 
 	pci_dev_put(dev);
 	return 0;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 3ed944cefb3..ef9509e3319 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -280,16 +280,17 @@ int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus)
 	return 0;
 }
 
-void acpi_pci_irq_del_prt(int segment, int bus)
+void acpi_pci_irq_del_prt(struct pci_bus *bus)
 {
 	struct acpi_prt_entry *entry, *tmp;
 
 	printk(KERN_DEBUG
 	       "ACPI: Delete PCI Interrupt Routing Table for %04x:%02x\n",
-	       segment, bus);
+	       pci_domain_nr(bus), bus->number);
 	spin_lock(&acpi_prt_lock);
 	list_for_each_entry_safe(entry, tmp, &acpi_prt_list, list) {
-		if (segment == entry->id.segment && bus == entry->id.bus) {
+		if (pci_domain_nr(bus) == entry->id.segment
+			&& bus->number == entry->id.bus) {
 			list_del(&entry->list);
 			kfree(entry);
 		}
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 686a960bde3..98ebaaedc07 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -92,7 +92,7 @@ int acpi_pci_link_free_irq(acpi_handle handle);
 /* ACPI PCI Interrupt Routing (pci_irq.c) */
 
 int acpi_pci_irq_add_prt(acpi_handle handle, struct pci_bus *bus);
-void acpi_pci_irq_del_prt(int segment, int bus);
+void acpi_pci_irq_del_prt(struct pci_bus *bus);
 
 /* ACPI PCI Device Binding (pci_bind.c) */
 
-- 
cgit v1.2.3-70-g09d2


From 80ffdedf6020a77adcd06c01cfe6c488312b28f8 Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:55:55 +0000
Subject: ACPI: kill acpi_get_pci_id

acpi_get_pci_dev() is better, and all callers have been converted, so
eliminate acpi_get_pci_id().

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_bind.c     | 71 ---------------------------------------------
 include/acpi/acpi_drivers.h |  1 -
 2 files changed, 72 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index a205769f1d0..a5a77b78a72 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -33,77 +33,6 @@
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_bind");
 
-struct acpi_pci_data {
-	struct acpi_pci_id id;
-	struct pci_bus *bus;
-	struct pci_dev *dev;
-};
-
-static int acpi_pci_bind(struct acpi_device *device);
-static int acpi_pci_unbind(struct acpi_device *device);
-
-static void acpi_pci_data_handler(acpi_handle handle, u32 function,
-				  void *context)
-{
-
-	/* TBD: Anything we need to do here? */
-
-	return;
-}
-
-/**
- * acpi_get_pci_id
- * ------------------
- * This function is used by the ACPI Interpreter (a.k.a. Core Subsystem)
- * to resolve PCI information for ACPI-PCI devices defined in the namespace.
- * This typically occurs when resolving PCI operation region information.
- */
-acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id)
-{
-	int result = 0;
-	acpi_status status = AE_OK;
-	struct acpi_device *device = NULL;
-	struct acpi_pci_data *data = NULL;
-
-
-	if (!id)
-		return AE_BAD_PARAMETER;
-
-	result = acpi_bus_get_device(handle, &device);
-	if (result) {
-		printk(KERN_ERR PREFIX
-			    "Invalid ACPI Bus context for device %s\n",
-			    acpi_device_bid(device));
-		return AE_NOT_EXIST;
-	}
-
-	status = acpi_get_data(handle, acpi_pci_data_handler, (void **)&data);
-	if (ACPI_FAILURE(status) || !data) {
-		ACPI_EXCEPTION((AE_INFO, status,
-				"Invalid ACPI-PCI context for device %s",
-				acpi_device_bid(device)));
-		return status;
-	}
-
-	*id = data->id;
-
-	/*
-	   id->segment = data->id.segment;
-	   id->bus = data->id.bus;
-	   id->device = data->id.device;
-	   id->function = data->id.function;
-	 */
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-			  "Device %s has PCI address %04x:%02x:%02x.%d\n",
-			  acpi_device_bid(device), id->segment, id->bus,
-			  id->device, id->function));
-
-	return AE_OK;
-}
-
-EXPORT_SYMBOL(acpi_get_pci_id);
-
 static int acpi_pci_unbind(struct acpi_device *device)
 {
 	struct pci_dev *dev;
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 98ebaaedc07..b6928577317 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -99,7 +99,6 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus);
 struct pci_bus;
 
 struct pci_dev *acpi_get_pci_dev(acpi_handle);
-acpi_status acpi_get_pci_id(acpi_handle handle, struct acpi_pci_id *id);
 int acpi_pci_bind_root(struct acpi_device *device);
 
 /* Arch-defined function to add a bus to the system */
-- 
cgit v1.2.3-70-g09d2


From 7fe2a6c275a5bcec52fb3ef643daaf8265b7af0d Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Wed, 10 Jun 2009 19:56:05 +0000
Subject: ACPI: kill acpi_get_physical_pci_device()

acpi_get_pci_dev() is (hopefully) better, and all callers have been
converted, so let's get rid of this duplicated functionality.

Cc: Thomas Renninger <trenn@suse.de>
Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/glue.c     | 40 ----------------------------------------
 include/acpi/acpi_bus.h |  1 -
 2 files changed, 41 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 8bd2c2a6884..a8a5c29958c 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -140,46 +140,6 @@ struct device *acpi_get_physical_device(acpi_handle handle)
 
 EXPORT_SYMBOL(acpi_get_physical_device);
 
-/* ToDo: When a PCI bridge is found, return the PCI device behind the bridge
- *       This should work in general, but did not on a Lenovo T61 for the
- *	 graphics card. But this must be fixed when the PCI device is
- *       bound and the kernel device struct is attached to the acpi device
- * Note: A success call will increase reference count by one
- *       Do call put_device(dev) on the returned device then
- */
-struct device *acpi_get_physical_pci_device(acpi_handle handle)
-{
-	struct device *dev;
-	long long device_id;
-	acpi_status status;
-
-	status =
-		acpi_evaluate_integer(handle, "_ADR", NULL, &device_id);
-
-	if (ACPI_FAILURE(status))
-		return NULL;
-
-	/* We need to attempt to determine whether the _ADR refers to a
-	   PCI device or not. There's no terribly good way to do this,
-	   so the best we can hope for is to assume that there'll never
-	   be a device in the host bridge */
-	if (device_id >= 0x10000) {
-		/* It looks like a PCI device. Does it exist? */
-		dev = acpi_get_physical_device(handle);
-	} else {
-		/* It doesn't look like a PCI device. Does its parent
-		   exist? */
-		acpi_handle phandle;
-		if (acpi_get_parent(handle, &phandle))
-			return NULL;
-		dev = acpi_get_physical_device(phandle);
-	}
-	if (!dev)
-		return NULL;
-	return dev;
-}
-EXPORT_SYMBOL(acpi_get_physical_pci_device);
-
 static int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
 	struct acpi_device *acpi_dev;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 96d593ee485..700b263c898 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -365,7 +365,6 @@ struct acpi_bus_type {
 int register_acpi_bus_type(struct acpi_bus_type *);
 int unregister_acpi_bus_type(struct acpi_bus_type *);
 struct device *acpi_get_physical_device(acpi_handle);
-struct device *acpi_get_physical_pci_device(acpi_handle);
 
 /* helper */
 acpi_handle acpi_get_child(acpi_handle, acpi_integer);
-- 
cgit v1.2.3-70-g09d2


From 6d2781310036a8d3fa2b590a6f83a298010fd64a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 30 Apr 2009 09:35:37 -0600
Subject: ACPI: allow drivers to request both device and system notify events

System notify events (0x00-0x7f) are common across all device types
and should be handled in Linux/ACPI, not in drivers.  However, some
BIOSes use system notify events in device-specific ways that require
the driver to be involved.

This patch adds a ACPI_DRIVER_ALL_NOTIFY_EVENTS driver flag.  When a
driver sets this flag and supplies a .notify method, Linux/ACPI calls
the .notify method for ALL notify events on the device, not just the
device-specific (0x80-0xff) events.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c      | 6 +++++-
 include/acpi/acpi_bus.h | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index ae862f1798d..cdfecc0a2ac 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -549,6 +549,7 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 {
 	int result = 0;
 	struct acpi_device *device = NULL;
+	struct acpi_driver *driver;
 
 	blocking_notifier_call_chain(&acpi_bus_notify_list,
 		type, (void *)handle);
@@ -629,7 +630,10 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 		break;
 	}
 
-	return;
+	driver = device->driver;
+	if (driver && driver->ops.notify &&
+	    (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
+		driver->ops.notify(device, type);
 }
 
 /* --------------------------------------------------------------------------
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c34b1102290..84e35d5646a 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -114,10 +114,13 @@ struct acpi_device_ops {
 	acpi_op_notify notify;
 };
 
+#define ACPI_DRIVER_ALL_NOTIFY_EVENTS	0x1	/* system AND device events */
+
 struct acpi_driver {
 	char name[80];
 	char class[80];
 	const struct acpi_device_id *ids; /* Supported Hardware IDs */
+	unsigned int flags;
 	struct acpi_device_ops ops;
 	struct device_driver drv;
 	struct module *owner;
-- 
cgit v1.2.3-70-g09d2


From 7522060c95395f479ee4a6af3bbf9e097e92e48f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 18 Jun 2009 08:00:17 +0200
Subject: perf report: Add validation of call-chain entries

Add boundary checks for call-chain events. In case of corrupted
entries we could crash otherwise.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 20 ++++++------
 tools/perf/builtin-report.c  | 74 +++++++++++++++++++++++++++-----------------
 2 files changed, 56 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index eccae437fe3..a7d3a61a59b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -337,6 +337,16 @@ enum perf_event_type {
 	 */
 };
 
+#define MAX_STACK_DEPTH			255
+
+struct perf_callchain_entry {
+	__u16				nr;
+	__u16				hv;
+	__u16				kernel;
+	__u16				user;
+	__u64				ip[MAX_STACK_DEPTH];
+};
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -652,16 +662,6 @@ extern void perf_counter_fork(struct task_struct *tsk);
 
 extern void perf_counter_task_migration(struct task_struct *task, int cpu);
 
-#define MAX_STACK_DEPTH			255
-
-struct perf_callchain_entry {
-	u16				nr;
-	u16				hv;
-	u16				kernel;
-	u16				user;
-	u64				ip[MAX_STACK_DEPTH];
-};
-
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_paranoid;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 986834623b4..e14e9867617 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -39,6 +39,8 @@ static int		dump_trace = 0;
 #define cdprintf(x...)	do { if (dump_trace) color_fprintf(stdout, color, x); } while (0)
 
 static int		verbose;
+#define eprintf(x...)	do { if (verbose) fprintf(stderr, x); } while (0)
+
 static int		full_paths;
 
 static unsigned long	page_size;
@@ -47,14 +49,6 @@ static unsigned long	mmap_window = 32;
 static char		*parent_pattern = "^sys_|^do_page_fault";
 static regex_t		parent_regex;
 
-struct ip_chain_event {
-	__u16 nr;
-	__u16 hv;
-	__u16 kernel;
-	__u16 user;
-	__u64 ips[];
-};
-
 struct ip_event {
 	struct perf_event_header header;
 	__u64 ip;
@@ -131,15 +125,11 @@ static struct dso *dsos__findnew(const char *name)
 
 	nr = dso__load(dso, NULL, verbose);
 	if (nr < 0) {
-		if (verbose)
-			fprintf(stderr, "Failed to open: %s\n", name);
+		eprintf("Failed to open: %s\n", name);
 		goto out_delete_dso;
 	}
-	if (!nr && verbose) {
-		fprintf(stderr,
-		"No symbols found in: %s, maybe install a debug package?\n",
-				name);
-	}
+	if (!nr)
+		eprintf("No symbols found in: %s, maybe install a debug package?\n", name);
 
 	dsos__add(dso);
 
@@ -844,7 +834,7 @@ static struct symbol *call__match(struct symbol *sym)
 
 static int
 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-		struct symbol *sym, __u64 ip, struct ip_chain_event *chain,
+		struct symbol *sym, __u64 ip, struct perf_callchain_entry *chain,
 		char level, __u64 count)
 {
 	struct rb_node **p = &hist.rb_node;
@@ -868,7 +858,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		__u64 ip;
 
 		for (i = 0; i < chain->kernel; i++) {
-			ip = chain->ips[nr + i];
+			ip = chain->ip[nr + i];
 			dso = kernel_dso;
 			sym = resolve_symbol(thread, NULL, &dso, &ip);
 			entry.parent = call__match(sym);
@@ -878,7 +868,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		nr += i;
 
 		for (i = 0; i < chain->user; i++) {
-			ip = chain->ips[nr + i];
+			ip = chain->ip[nr + i];
 			sym = resolve_symbol(thread, NULL, NULL, &ip);
 			entry.parent = call__match(sym);
 			if (entry.parent)
@@ -1080,6 +1070,30 @@ static unsigned long total = 0,
 		     total_fork = 0,
 		     total_unknown = 0;
 
+static int validate_chain(struct perf_callchain_entry *chain, event_t *event)
+{
+	unsigned int chain_size;
+
+	if (chain->nr > MAX_STACK_DEPTH)
+		return -1;
+	if (chain->hv > MAX_STACK_DEPTH)
+		return -1;
+	if (chain->kernel > MAX_STACK_DEPTH)
+		return -1;
+	if (chain->user > MAX_STACK_DEPTH)
+		return -1;
+	if (chain->hv + chain->kernel + chain->user != chain->nr)
+		return -1;
+
+	chain_size = event->header.size;
+	chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
+
+	if (chain->nr*sizeof(__u64) > chain_size)
+		return -1;
+
+	return 0;
+}
+
 static int
 process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 {
@@ -1091,7 +1105,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	__u64 period = 1;
 	struct map *map = NULL;
 	void *more_data = event->ip.__more_data;
-	struct ip_chain_event *chain = NULL;
+	struct perf_callchain_entry *chain = NULL;
 
 	if (event->header.type & PERF_SAMPLE_PERIOD) {
 		period = *(__u64 *)more_data;
@@ -1111,21 +1125,26 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 
 		chain = (void *)more_data;
 
-		if (dump_trace) {
-			dprintf("... chain: u:%d, k:%d, nr:%d\n",
-				chain->user,
-				chain->kernel,
-				chain->nr);
+		dprintf("... chain: u:%d, k:%d, nr:%d\n",
+			chain->user,
+			chain->kernel,
+			chain->nr);
 
+		if (validate_chain(chain, event) < 0) {
+			eprintf("call-chain problem with event, skipping it.\n");
+			return 0;
+		}
+
+		if (dump_trace) {
 			for (i = 0; i < chain->nr; i++)
-				dprintf("..... %2d: %016Lx\n", i, chain->ips[i]);
+				dprintf("..... %2d: %016Lx\n", i, chain->ip[i]);
 		}
 	}
 
 	dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
 	if (thread == NULL) {
-		fprintf(stderr, "problem processing %d event, skipping it.\n",
+		eprintf("problem processing %d event, skipping it.\n",
 			event->header.type);
 		return -1;
 	}
@@ -1153,8 +1172,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 		struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
 
 		if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
-			fprintf(stderr,
-		"problem incrementing symbol count, skipping event\n");
+			eprintf("problem incrementing symbol count, skipping event\n");
 			return -1;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 31278e71471399beaff9280737e52b47db4dc345 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 17 Jun 2009 01:12:19 +0000
Subject: net: group address list and its count

This patch is inspired by patch recently posted by Johannes Berg. Basically what
my patch does is to group list and a count of addresses into newly introduced
structure netdev_hw_addr_list. This brings us two benefits:
1) struct net_device becames a bit nicer.
2) in the future there will be a possibility to operate with lists independently
   on netdevices (with exporting right functions).
I wanted to introduce this patch before I'll post a multicast lists conversion.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

 drivers/net/bnx2.c              |    4 +-
 drivers/net/e1000/e1000_main.c  |    4 +-
 drivers/net/ixgbe/ixgbe_main.c  |    6 +-
 drivers/net/mv643xx_eth.c       |    2 +-
 drivers/net/niu.c               |    4 +-
 drivers/net/virtio_net.c        |   10 ++--
 drivers/s390/net/qeth_l2_main.c |    2 +-
 include/linux/netdevice.h       |   17 +++--
 net/core/dev.c                  |  130 ++++++++++++++++++--------------------
 9 files changed, 89 insertions(+), 90 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c              |   4 +-
 drivers/net/e1000/e1000_main.c  |   4 +-
 drivers/net/ixgbe/ixgbe_main.c  |   6 +-
 drivers/net/mv643xx_eth.c       |   2 +-
 drivers/net/niu.c               |   4 +-
 drivers/net/virtio_net.c        |  10 ++--
 drivers/s390/net/qeth_l2_main.c |   2 +-
 include/linux/netdevice.h       |  17 ++++--
 net/core/dev.c                  | 130 +++++++++++++++++++---------------------
 9 files changed, 89 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 7e3738112c4..38f1c3375d7 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3552,14 +3552,14 @@ bnx2_set_rx_mode(struct net_device *dev)
 		sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN;
 	}
 
-	if (dev->uc_count > BNX2_MAX_UNICAST_ADDRESSES) {
+	if (dev->uc.count > BNX2_MAX_UNICAST_ADDRESSES) {
 		rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS;
 		sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN |
 			     BNX2_RPM_SORT_USER0_PROM_VLAN;
 	} else if (!(dev->flags & IFF_PROMISC)) {
 		/* Add all entries into to the match filter list */
 		i = 0;
-		list_for_each_entry(ha, &dev->uc_list, list) {
+		list_for_each_entry(ha, &dev->uc.list, list) {
 			bnx2_set_mac_addr(bp, ha->addr,
 					  i + BNX2_START_UNICAST_ADDRESS_INDEX);
 			sort_mode |= (1 <<
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 8d36743c814..5e3356f8eb5 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2370,7 +2370,7 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 			rctl |= E1000_RCTL_VFE;
 	}
 
-	if (netdev->uc_count > rar_entries - 1) {
+	if (netdev->uc.count > rar_entries - 1) {
 		rctl |= E1000_RCTL_UPE;
 	} else if (!(netdev->flags & IFF_PROMISC)) {
 		rctl &= ~E1000_RCTL_UPE;
@@ -2394,7 +2394,7 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	 */
 	i = 1;
 	if (use_uc)
-		list_for_each_entry(ha, &netdev->uc_list, list) {
+		list_for_each_entry(ha, &netdev->uc.list, list) {
 			if (i == rar_entries)
 				break;
 			e1000_rar_set(hw, ha->addr, i++);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index a551a96ce67..e756e220db3 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2321,7 +2321,7 @@ static void ixgbe_set_rx_mode(struct net_device *netdev)
 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 
 	/* reprogram secondary unicast list */
-	hw->mac.ops.update_uc_addr_list(hw, &netdev->uc_list);
+	hw->mac.ops.update_uc_addr_list(hw, &netdev->uc.list);
 
 	/* reprogram multicast list */
 	addr_count = netdev->mc_count;
@@ -5261,7 +5261,7 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
 
 /**
  * ixgbe_add_sanmac_netdev - Add the SAN MAC address to the corresponding
- * netdev->dev_addr_list
+ * netdev->dev_addrs
  * @netdev: network interface device structure
  *
  * Returns non-zero on failure
@@ -5282,7 +5282,7 @@ static int ixgbe_add_sanmac_netdev(struct net_device *dev)
 
 /**
  * ixgbe_del_sanmac_netdev - Removes the SAN MAC address to the corresponding
- * netdev->dev_addr_list
+ * netdev->dev_addrs
  * @netdev: network interface device structure
  *
  * Returns non-zero on failure
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index b4e18a58cb1..745ae8b4a2e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1729,7 +1729,7 @@ static u32 uc_addr_filter_mask(struct net_device *dev)
 		return 0;
 
 	nibbles = 1 << (dev->dev_addr[5] & 0x0f);
-	list_for_each_entry(ha, &dev->uc_list, list) {
+	list_for_each_entry(ha, &dev->uc.list, list) {
 		if (memcmp(dev->dev_addr, ha->addr, 5))
 			return 0;
 		if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0)
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index fa61a12c5e1..d2146d4a10f 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6376,7 +6376,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if ((dev->flags & IFF_ALLMULTI) || (dev->mc_count > 0))
 		np->flags |= NIU_FLAGS_MCAST;
 
-	alt_cnt = dev->uc_count;
+	alt_cnt = dev->uc.count;
 	if (alt_cnt > niu_num_alt_addr(np)) {
 		alt_cnt = 0;
 		np->flags |= NIU_FLAGS_PROMISC;
@@ -6385,7 +6385,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if (alt_cnt) {
 		int index = 0;
 
-		list_for_each_entry(ha, &dev->uc_list, list) {
+		list_for_each_entry(ha, &dev->uc.list, list) {
 			err = niu_set_alt_mac(np, index, ha->addr);
 			if (err)
 				printk(KERN_WARNING PFX "%s: Error %d "
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 52198f6797a..2a6e81d5b57 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -709,7 +709,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 			 allmulti ? "en" : "dis");
 
 	/* MAC filter - use one buffer for both lists */
-	mac_data = buf = kzalloc(((dev->uc_count + dev->mc_count) * ETH_ALEN) +
+	mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) +
 				 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	if (!buf) {
 		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
@@ -719,16 +719,16 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	sg_init_table(sg, 2);
 
 	/* Store the unicast list and count in the front of the buffer */
-	mac_data->entries = dev->uc_count;
+	mac_data->entries = dev->uc.count;
 	i = 0;
-	list_for_each_entry(ha, &dev->uc_list, list)
+	list_for_each_entry(ha, &dev->uc.list, list)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
 	sg_set_buf(&sg[0], mac_data,
-		   sizeof(mac_data->entries) + (dev->uc_count * ETH_ALEN));
+		   sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN));
 
 	/* multicast list and count fill the end */
-	mac_data = (void *)&mac_data->macs[dev->uc_count][0];
+	mac_data = (void *)&mac_data->macs[dev->uc.count][0];
 
 	mac_data->entries = dev->mc_count;
 	addr = dev->mc_list;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index ecd3d06c0d5..3607d107f49 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -655,7 +655,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 	for (dm = dev->mc_list; dm; dm = dm->next)
 		qeth_l2_add_mc(card, dm->da_addr, 0);
 
-	list_for_each_entry(ha, &dev->uc_list, list)
+	list_for_each_entry(ha, &dev->uc.list, list)
 		qeth_l2_add_mc(card, ha->addr, 1);
 
 	spin_unlock_bh(&card->mclock);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ea8d6dfe54..d4a4d986779 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -224,6 +224,11 @@ struct netdev_hw_addr {
 	struct rcu_head		rcu_head;
 };
 
+struct netdev_hw_addr_list {
+	struct list_head	list;
+	int			count;
+};
+
 struct hh_cache
 {
 	struct hh_cache *hh_next;	/* Next entry			     */
@@ -776,9 +781,8 @@ struct net_device
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
-	struct list_head	uc_list;	/* Secondary unicast mac
-						   addresses */
-	int			uc_count;	/* Number of installed ucasts	*/
+	struct netdev_hw_addr_list	uc;	/* Secondary unicast
+						   mac addresses */
 	int			uc_promisc;
 	spinlock_t		addr_list_lock;
 	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
@@ -810,7 +814,8 @@ struct net_device
 						   because most packets are
 						   unicast) */
 
-	struct list_head	dev_addr_list; /* list of device hw addresses */
+	struct netdev_hw_addr_list	dev_addrs; /* list of device
+						      hw addresses */
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
@@ -1806,11 +1811,11 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 }
 
 /*
- * dev_addr_list walker. Should be used only for read access. Call with
+ * dev_addrs walker. Should be used only for read access. Call with
  * rcu_read_lock held.
  */
 #define for_each_dev_addr(dev, ha) \
-		list_for_each_entry_rcu(ha, &dev->dev_addr_list, list)
+		list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list)
 
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 576a61574a9..baf2dc13a34 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3461,10 +3461,10 @@ void __dev_set_rx_mode(struct net_device *dev)
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
-		if (dev->uc_count > 0 && !dev->uc_promisc) {
+		if (dev->uc.count > 0 && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1);
 			dev->uc_promisc = 1;
-		} else if (dev->uc_count == 0 && dev->uc_promisc) {
+		} else if (dev->uc.count == 0 && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1);
 			dev->uc_promisc = 0;
 		}
@@ -3483,9 +3483,8 @@ void dev_set_rx_mode(struct net_device *dev)
 
 /* hw addresses list handling functions */
 
-static int __hw_addr_add(struct list_head *list, int *delta,
-			 unsigned char *addr, int addr_len,
-			 unsigned char addr_type)
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	int alloc_size;
@@ -3493,7 +3492,7 @@ static int __hw_addr_add(struct list_head *list, int *delta,
 	if (addr_len > MAX_ADDR_LEN)
 		return -EINVAL;
 
-	list_for_each_entry(ha, list, list) {
+	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    ha->type == addr_type) {
 			ha->refcount++;
@@ -3512,9 +3511,8 @@ static int __hw_addr_add(struct list_head *list, int *delta,
 	ha->type = addr_type;
 	ha->refcount = 1;
 	ha->synced = false;
-	list_add_tail_rcu(&ha->list, list);
-	if (delta)
-		(*delta)++;
+	list_add_tail_rcu(&ha->list, &list->list);
+	list->count++;
 	return 0;
 }
 
@@ -3526,120 +3524,121 @@ static void ha_rcu_free(struct rcu_head *head)
 	kfree(ha);
 }
 
-static int __hw_addr_del(struct list_head *list, int *delta,
-			 unsigned char *addr, int addr_len,
-			 unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 
-	list_for_each_entry(ha, list, list) {
+	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    (ha->type == addr_type || !addr_type)) {
 			if (--ha->refcount)
 				return 0;
 			list_del_rcu(&ha->list);
 			call_rcu(&ha->rcu_head, ha_rcu_free);
-			if (delta)
-				(*delta)--;
+			list->count--;
 			return 0;
 		}
 	}
 	return -ENOENT;
 }
 
-static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta,
-				  struct list_head *from_list, int addr_len,
+static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+				  struct netdev_hw_addr_list *from_list,
+				  int addr_len,
 				  unsigned char addr_type)
 {
 	int err;
 	struct netdev_hw_addr *ha, *ha2;
 	unsigned char type;
 
-	list_for_each_entry(ha, from_list, list) {
+	list_for_each_entry(ha, &from_list->list, list) {
 		type = addr_type ? addr_type : ha->type;
-		err = __hw_addr_add(to_list, to_delta, ha->addr,
-				    addr_len, type);
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
 		if (err)
 			goto unroll;
 	}
 	return 0;
 
 unroll:
-	list_for_each_entry(ha2, from_list, list) {
+	list_for_each_entry(ha2, &from_list->list, list) {
 		if (ha2 == ha)
 			break;
 		type = addr_type ? addr_type : ha2->type;
-		__hw_addr_del(to_list, to_delta, ha2->addr,
-			      addr_len, type);
+		__hw_addr_del(to_list, ha2->addr, addr_len, type);
 	}
 	return err;
 }
 
-static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta,
-				   struct list_head *from_list, int addr_len,
+static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len,
 				   unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	unsigned char type;
 
-	list_for_each_entry(ha, from_list, list) {
+	list_for_each_entry(ha, &from_list->list, list) {
 		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, to_delta, ha->addr,
-			      addr_len, addr_type);
+		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
 	}
 }
 
-static int __hw_addr_sync(struct list_head *to_list, int *to_delta,
-			  struct list_head *from_list, int *from_delta,
+static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+			  struct netdev_hw_addr_list *from_list,
 			  int addr_len)
 {
 	int err = 0;
 	struct netdev_hw_addr *ha, *tmp;
 
-	list_for_each_entry_safe(ha, tmp, from_list, list) {
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
 		if (!ha->synced) {
-			err = __hw_addr_add(to_list, to_delta, ha->addr,
+			err = __hw_addr_add(to_list, ha->addr,
 					    addr_len, ha->type);
 			if (err)
 				break;
 			ha->synced = true;
 			ha->refcount++;
 		} else if (ha->refcount == 1) {
-			__hw_addr_del(to_list, to_delta, ha->addr,
-				      addr_len, ha->type);
-			__hw_addr_del(from_list, from_delta, ha->addr,
-				      addr_len, ha->type);
+			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
 		}
 	}
 	return err;
 }
 
-static void __hw_addr_unsync(struct list_head *to_list, int *to_delta,
-			     struct list_head *from_list, int *from_delta,
+static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+			     struct netdev_hw_addr_list *from_list,
 			     int addr_len)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
-	list_for_each_entry_safe(ha, tmp, from_list, list) {
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
 		if (ha->synced) {
-			__hw_addr_del(to_list, to_delta, ha->addr,
+			__hw_addr_del(to_list, ha->addr,
 				      addr_len, ha->type);
 			ha->synced = false;
-			__hw_addr_del(from_list, from_delta, ha->addr,
+			__hw_addr_del(from_list, ha->addr,
 				      addr_len, ha->type);
 		}
 	}
 }
 
-
-static void __hw_addr_flush(struct list_head *list)
+static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
-	list_for_each_entry_safe(ha, tmp, list, list) {
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
 		list_del_rcu(&ha->list);
 		call_rcu(&ha->rcu_head, ha_rcu_free);
 	}
+	list->count = 0;
+}
+
+static void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	list->count = 0;
 }
 
 /* Device addresses handling functions */
@@ -3648,7 +3647,7 @@ static void dev_addr_flush(struct net_device *dev)
 {
 	/* rtnl_mutex must be held here */
 
-	__hw_addr_flush(&dev->dev_addr_list);
+	__hw_addr_flush(&dev->dev_addrs);
 	dev->dev_addr = NULL;
 }
 
@@ -3660,16 +3659,16 @@ static int dev_addr_init(struct net_device *dev)
 
 	/* rtnl_mutex must be held here */
 
-	INIT_LIST_HEAD(&dev->dev_addr_list);
+	__hw_addr_init(&dev->dev_addrs);
 	memset(addr, 0, sizeof(addr));
-	err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr),
+	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
 			    NETDEV_HW_ADDR_T_LAN);
 	if (!err) {
 		/*
 		 * Get the first (previously created) address from the list
 		 * and set dev_addr pointer to this location.
 		 */
-		ha = list_first_entry(&dev->dev_addr_list,
+		ha = list_first_entry(&dev->dev_addrs.list,
 				      struct netdev_hw_addr, list);
 		dev->dev_addr = ha->addr;
 	}
@@ -3694,8 +3693,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr,
 
 	ASSERT_RTNL();
 
-	err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len,
-			    addr_type);
+	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	return err;
@@ -3725,11 +3723,12 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr,
 	 * We can not remove the first address from the list because
 	 * dev->dev_addr points to that.
 	 */
-	ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list);
+	ha = list_first_entry(&dev->dev_addrs.list,
+			      struct netdev_hw_addr, list);
 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
 		return -ENOENT;
 
-	err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len,
+	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
 			    addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
@@ -3757,8 +3756,7 @@ int dev_addr_add_multiple(struct net_device *to_dev,
 
 	if (from_dev->addr_len != to_dev->addr_len)
 		return -EINVAL;
-	err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL,
-				     &from_dev->dev_addr_list,
+	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
 				     to_dev->addr_len, addr_type);
 	if (!err)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
@@ -3784,15 +3782,14 @@ int dev_addr_del_multiple(struct net_device *to_dev,
 
 	if (from_dev->addr_len != to_dev->addr_len)
 		return -EINVAL;
-	__hw_addr_del_multiple(&to_dev->dev_addr_list, NULL,
-			       &from_dev->dev_addr_list,
+	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
 			       to_dev->addr_len, addr_type);
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
 	return 0;
 }
 EXPORT_SYMBOL(dev_addr_del_multiple);
 
-/* unicast and multicast addresses handling functions */
+/* multicast addresses handling functions */
 
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
 		      void *addr, int alen, int glbl)
@@ -3868,8 +3865,8 @@ int dev_unicast_delete(struct net_device *dev, void *addr)
 
 	ASSERT_RTNL();
 
-	err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr,
-			    dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
+	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
 	if (!err)
 		__dev_set_rx_mode(dev);
 	return err;
@@ -3892,8 +3889,8 @@ int dev_unicast_add(struct net_device *dev, void *addr)
 
 	ASSERT_RTNL();
 
-	err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr,
-			    dev->addr_len, NETDEV_HW_ADDR_T_UNICAST);
+	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
 	if (!err)
 		__dev_set_rx_mode(dev);
 	return err;
@@ -3966,8 +3963,7 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	err = __hw_addr_sync(&to->uc_list, &to->uc_count,
-			     &from->uc_list, &from->uc_count, to->addr_len);
+	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
 	return err;
@@ -3990,8 +3986,7 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return;
 
-	__hw_addr_unsync(&to->uc_list, &to->uc_count,
-			 &from->uc_list, &from->uc_count, to->addr_len);
+	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
 	__dev_set_rx_mode(to);
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
@@ -4000,15 +3995,14 @@ static void dev_unicast_flush(struct net_device *dev)
 {
 	/* rtnl_mutex must be held here */
 
-	__hw_addr_flush(&dev->uc_list);
-	dev->uc_count = 0;
+	__hw_addr_flush(&dev->uc);
 }
 
 static void dev_unicast_init(struct net_device *dev)
 {
 	/* rtnl_mutex must be held here */
 
-	INIT_LIST_HEAD(&dev->uc_list);
+	__hw_addr_init(&dev->uc);
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 43a21ea81a2400992561146327c4785ce7f7be38 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 25 Mar 2009 19:39:37 +0100
Subject: perf_counter: Add event overlow handling

Alternative method of mmap() data output handling that provides
better overflow management and a more reliable data stream.

Unlike the previous method, that didn't have any user->kernel
feedback and relied on userspace keeping up, this method relies on
userspace writing its last read position into the control page.

It will ensure new output doesn't overwrite not-yet read events,
new events for which there is no space left are lost and the
overflow counter is incremented, providing exact event loss
numbers.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  40 +++++++---
 kernel/perf_counter.c        | 185 ++++++++++++++++++++++++++++++-------------
 2 files changed, 158 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a7d3a61a59b..0765e8e6984 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -236,10 +236,16 @@ struct perf_counter_mmap_page {
 	/*
 	 * Control data for the mmap() data buffer.
 	 *
-	 * User-space reading this value should issue an rmb(), on SMP capable
-	 * platforms, after reading this value -- see perf_counter_wakeup().
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_counter_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
 	 */
 	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
 };
 
 #define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
@@ -273,6 +279,15 @@ enum perf_event_type {
 	 */
 	PERF_EVENT_MMAP			= 1,
 
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				id;
+	 * 	u64				lost;
+	 * };
+	 */
+	PERF_EVENT_LOST			= 2,
+
 	/*
 	 * struct {
 	 *	struct perf_event_header	header;
@@ -313,26 +328,26 @@ enum perf_event_type {
 
 	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
-	 * will be PERF_RECORD_*
+	 * will be PERF_SAMPLE_*
 	 *
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *
-	 *	{ u64			ip;	  } && PERF_RECORD_IP
-	 *	{ u32			pid, tid; } && PERF_RECORD_TID
-	 *	{ u64			time;     } && PERF_RECORD_TIME
-	 *	{ u64			addr;     } && PERF_RECORD_ADDR
-	 *	{ u64			config;   } && PERF_RECORD_CONFIG
-	 *	{ u32			cpu, res; } && PERF_RECORD_CPU
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			config;   } && PERF_SAMPLE_CONFIG
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
 	 *
 	 *	{ u64			nr;
-	 *	  { u64 id, val; }	cnt[nr];  } && PERF_RECORD_GROUP
+	 *	  { u64 id, val; }	cnt[nr];  } && PERF_SAMPLE_GROUP
 	 *
 	 *	{ u16			nr,
 	 *				hv,
 	 *				kernel,
 	 *				user;
-	 *	  u64			ips[nr];  } && PERF_RECORD_CALLCHAIN
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 * };
 	 */
 };
@@ -424,6 +439,7 @@ struct file;
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
 	int				nr_pages;	/* nr of data pages  */
+	int				writable;	/* are we writable   */
 	int				nr_locked;	/* nr pages mlocked  */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
@@ -433,8 +449,8 @@ struct perf_mmap_data {
 	atomic_long_t			done_head;	/* completed head    */
 
 	atomic_t			lock;		/* concurrent writes */
-
 	atomic_t			wakeup;		/* needs a wakeup    */
+	atomic_t			lost;		/* nr records lost   */
 
 	struct perf_counter_mmap_page   *user_page;
 	void				*data_pages[0];
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 109a9572385..7e9108efd30 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1794,6 +1794,12 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct perf_mmap_data *data;
 	int ret = VM_FAULT_SIGBUS;
 
+	if (vmf->flags & FAULT_FLAG_MKWRITE) {
+		if (vmf->pgoff == 0)
+			ret = 0;
+		return ret;
+	}
+
 	rcu_read_lock();
 	data = rcu_dereference(counter->data);
 	if (!data)
@@ -1807,9 +1813,16 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		if ((unsigned)nr > data->nr_pages)
 			goto unlock;
 
+		if (vmf->flags & FAULT_FLAG_WRITE)
+			goto unlock;
+
 		vmf->page = virt_to_page(data->data_pages[nr]);
 	}
+
 	get_page(vmf->page);
+	vmf->page->mapping = vma->vm_file->f_mapping;
+	vmf->page->index   = vmf->pgoff;
+
 	ret = 0;
 unlock:
 	rcu_read_unlock();
@@ -1862,6 +1875,14 @@ fail:
 	return -ENOMEM;
 }
 
+static void perf_mmap_free_page(unsigned long addr)
+{
+	struct page *page = virt_to_page(addr);
+
+	page->mapping = NULL;
+	__free_page(page);
+}
+
 static void __perf_mmap_data_free(struct rcu_head *rcu_head)
 {
 	struct perf_mmap_data *data;
@@ -1869,9 +1890,10 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head)
 
 	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
 
-	free_page((unsigned long)data->user_page);
+	perf_mmap_free_page((unsigned long)data->user_page);
 	for (i = 0; i < data->nr_pages; i++)
-		free_page((unsigned long)data->data_pages[i]);
+		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+
 	kfree(data);
 }
 
@@ -1908,9 +1930,10 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 }
 
 static struct vm_operations_struct perf_mmap_vmops = {
-	.open  = perf_mmap_open,
-	.close = perf_mmap_close,
-	.fault = perf_mmap_fault,
+	.open		= perf_mmap_open,
+	.close		= perf_mmap_close,
+	.fault		= perf_mmap_fault,
+	.page_mkwrite	= perf_mmap_fault,
 };
 
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1924,7 +1947,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	long user_extra, extra;
 	int ret = 0;
 
-	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
 	vma_size = vma->vm_end - vma->vm_start;
@@ -1983,10 +2006,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
+	if (vma->vm_flags & VM_WRITE)
+		counter->data->writable = 1;
+
 unlock:
 	mutex_unlock(&counter->mmap_mutex);
 
-	vma->vm_flags &= ~VM_MAYWRITE;
 	vma->vm_flags |= VM_RESERVED;
 	vma->vm_ops = &perf_mmap_vmops;
 
@@ -2163,11 +2188,38 @@ struct perf_output_handle {
 	unsigned long		head;
 	unsigned long		offset;
 	int			nmi;
-	int			overflow;
+	int			sample;
 	int			locked;
 	unsigned long		flags;
 };
 
+static bool perf_output_space(struct perf_mmap_data *data,
+			      unsigned int offset, unsigned int head)
+{
+	unsigned long tail;
+	unsigned long mask;
+
+	if (!data->writable)
+		return true;
+
+	mask = (data->nr_pages << PAGE_SHIFT) - 1;
+	/*
+	 * Userspace could choose to issue a mb() before updating the tail
+	 * pointer. So that all reads will be completed before the write is
+	 * issued.
+	 */
+	tail = ACCESS_ONCE(data->user_page->data_tail);
+	smp_rmb();
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return false;
+
+	return true;
+}
+
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
 	atomic_set(&handle->data->poll, POLL_IN);
@@ -2258,12 +2310,57 @@ out:
 	local_irq_restore(handle->flags);
 }
 
+static void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len)
+{
+	unsigned int pages_mask;
+	unsigned int offset;
+	unsigned int size;
+	void **pages;
+
+	offset		= handle->offset;
+	pages_mask	= handle->data->nr_pages - 1;
+	pages		= handle->data->data_pages;
+
+	do {
+		unsigned int page_offset;
+		int nr;
+
+		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
+		page_offset = offset & (PAGE_SIZE - 1);
+		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+		memcpy(pages[nr] + page_offset, buf, size);
+
+		len	    -= size;
+		buf	    += size;
+		offset	    += size;
+	} while (len);
+
+	handle->offset = offset;
+
+	/*
+	 * Check we didn't copy past our reservation window, taking the
+	 * possible unsigned int wrap into account.
+	 */
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+}
+
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
 static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size,
-			     int nmi, int overflow)
+			     int nmi, int sample)
 {
 	struct perf_mmap_data *data;
 	unsigned int offset, head;
+	int have_lost;
+	struct {
+		struct perf_event_header header;
+		u64			 id;
+		u64			 lost;
+	} lost_event;
 
 	/*
 	 * For inherited counters we send all the output towards the parent.
@@ -2276,19 +2373,25 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if (!data)
 		goto out;
 
-	handle->data	 = data;
-	handle->counter	 = counter;
-	handle->nmi	 = nmi;
-	handle->overflow = overflow;
+	handle->data	= data;
+	handle->counter	= counter;
+	handle->nmi	= nmi;
+	handle->sample	= sample;
 
 	if (!data->nr_pages)
 		goto fail;
 
+	have_lost = atomic_read(&data->lost);
+	if (have_lost)
+		size += sizeof(lost_event);
+
 	perf_output_lock(handle);
 
 	do {
 		offset = head = atomic_long_read(&data->head);
 		head += size;
+		if (unlikely(!perf_output_space(data, offset, head)))
+			goto fail;
 	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
 
 	handle->offset	= offset;
@@ -2297,55 +2400,27 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
 		atomic_set(&data->wakeup, 1);
 
+	if (have_lost) {
+		lost_event.header.type = PERF_EVENT_LOST;
+		lost_event.header.misc = 0;
+		lost_event.header.size = sizeof(lost_event);
+		lost_event.id          = counter->id;
+		lost_event.lost        = atomic_xchg(&data->lost, 0);
+
+		perf_output_put(handle, lost_event);
+	}
+
 	return 0;
 
 fail:
-	perf_output_wakeup(handle);
+	atomic_inc(&data->lost);
+	perf_output_unlock(handle);
 out:
 	rcu_read_unlock();
 
 	return -ENOSPC;
 }
 
-static void perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len)
-{
-	unsigned int pages_mask;
-	unsigned int offset;
-	unsigned int size;
-	void **pages;
-
-	offset		= handle->offset;
-	pages_mask	= handle->data->nr_pages - 1;
-	pages		= handle->data->data_pages;
-
-	do {
-		unsigned int page_offset;
-		int nr;
-
-		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
-		page_offset = offset & (PAGE_SIZE - 1);
-		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
-
-		memcpy(pages[nr] + page_offset, buf, size);
-
-		len	    -= size;
-		buf	    += size;
-		offset	    += size;
-	} while (len);
-
-	handle->offset = offset;
-
-	/*
-	 * Check we didn't copy past our reservation window, taking the
-	 * possible unsigned int wrap into account.
-	 */
-	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
-}
-
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
-
 static void perf_output_end(struct perf_output_handle *handle)
 {
 	struct perf_counter *counter = handle->counter;
@@ -2353,7 +2428,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 
 	int wakeup_events = counter->attr.wakeup_events;
 
-	if (handle->overflow && wakeup_events) {
+	if (handle->sample && wakeup_events) {
 		int events = atomic_inc_return(&data->events);
 		if (events >= wakeup_events) {
 			atomic_sub(wakeup_events, &data->events);
@@ -2958,7 +3033,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 }
 
 /*
- * Generic counter overflow handling.
+ * Generic counter overflow handling, sampling.
  */
 
 int perf_counter_overflow(struct perf_counter *counter, int nmi,
-- 
cgit v1.2.3-70-g09d2


From 9a298b2acd771d8a5c0004d8f8e4156c65b11f6b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 24 Mar 2009 12:23:04 -0700
Subject: drm: Remove memory debugging infrastructure.

It hasn't been used in ages, and having the user tell your how much
memory is being freed at free time is a recipe for disaster even if it
was ever used.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/drm_agpsupport.c      |  12 +-
 drivers/gpu/drm/drm_auth.c            |   4 +-
 drivers/gpu/drm/drm_bufs.c            | 140 ++++++---------
 drivers/gpu/drm/drm_context.c         |   4 +-
 drivers/gpu/drm/drm_debugfs.c         |   9 +-
 drivers/gpu/drm/drm_dma.c             |  31 +---
 drivers/gpu/drm/drm_drawable.c        |  25 ++-
 drivers/gpu/drm/drm_drv.c             |  18 +-
 drivers/gpu/drm/drm_fops.c            |   8 +-
 drivers/gpu/drm/drm_gem.c             |   8 +-
 drivers/gpu/drm/drm_hashtab.c         |   6 +-
 drivers/gpu/drm/drm_ioctl.c           |  14 +-
 drivers/gpu/drm/drm_irq.c             |  44 ++---
 drivers/gpu/drm/drm_memory.c          |  25 ---
 drivers/gpu/drm/drm_pci.c             |  53 +-----
 drivers/gpu/drm/drm_proc.c            |   8 +-
 drivers/gpu/drm/drm_scatter.c         |  33 ++--
 drivers/gpu/drm/drm_sman.c            |  29 ++--
 drivers/gpu/drm/drm_stub.c            |  19 +--
 drivers/gpu/drm/drm_vm.c              |   8 +-
 drivers/gpu/drm/i810/i810_dma.c       |   6 +-
 drivers/gpu/drm/i830/i830_dma.c       |   6 +-
 drivers/gpu/drm/i915/i915_dma.c       |  40 ++---
 drivers/gpu/drm/i915/i915_gem.c       |  35 ++--
 drivers/gpu/drm/i915/i915_mem.c       |  24 +--
 drivers/gpu/drm/i915/intel_bios.c     |   6 +-
 drivers/gpu/drm/i915/intel_tv.c       |  11 +-
 drivers/gpu/drm/mga/mga_dma.c         |  14 +-
 drivers/gpu/drm/r128/r128_cce.c       |  12 +-
 drivers/gpu/drm/r128/r128_state.c     |  84 ++++-----
 drivers/gpu/drm/radeon/radeon_cp.c    |   9 +-
 drivers/gpu/drm/radeon/radeon_i2c.c   |   6 +-
 drivers/gpu/drm/radeon/radeon_kms.c   |   4 +-
 drivers/gpu/drm/radeon/radeon_mem.c   |  24 +--
 drivers/gpu/drm/radeon/radeon_state.c |  16 +-
 drivers/gpu/drm/savage/savage_bci.c   |  21 +--
 drivers/gpu/drm/savage/savage_state.c |  17 +-
 drivers/gpu/drm/sis/sis_drv.c         |   6 +-
 drivers/gpu/drm/via/via_map.c         |   8 +-
 include/drm/drmP.h                    |  52 ------
 include/drm/drm_memory_debug.h        | 309 ----------------------------------
 41 files changed, 324 insertions(+), 884 deletions(-)
 delete mode 100644 include/drm/drm_memory_debug.h

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 14796594e5d..7a0d042c8d6 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -203,7 +203,7 @@ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request)
 
 	if (!dev->agp || !dev->agp->acquired)
 		return -EINVAL;
-	if (!(entry = drm_alloc(sizeof(*entry), DRM_MEM_AGPLISTS)))
+	if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL)))
 		return -ENOMEM;
 
 	memset(entry, 0, sizeof(*entry));
@@ -211,7 +211,7 @@ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request)
 	pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE;
 	type = (u32) request->type;
 	if (!(memory = drm_alloc_agp(dev, pages, type))) {
-		drm_free(entry, sizeof(*entry), DRM_MEM_AGPLISTS);
+		kfree(entry);
 		return -ENOMEM;
 	}
 
@@ -369,7 +369,7 @@ int drm_agp_free(struct drm_device *dev, struct drm_agp_buffer *request)
 	list_del(&entry->head);
 
 	drm_free_agp(entry->memory, entry->pages);
-	drm_free(entry, sizeof(*entry), DRM_MEM_AGPLISTS);
+	kfree(entry);
 	return 0;
 }
 EXPORT_SYMBOL(drm_agp_free);
@@ -397,13 +397,13 @@ struct drm_agp_head *drm_agp_init(struct drm_device *dev)
 {
 	struct drm_agp_head *head = NULL;
 
-	if (!(head = drm_alloc(sizeof(*head), DRM_MEM_AGPLISTS)))
+	if (!(head = kmalloc(sizeof(*head), GFP_KERNEL)))
 		return NULL;
 	memset((void *)head, 0, sizeof(*head));
 	head->bridge = agp_find_bridge(dev->pdev);
 	if (!head->bridge) {
 		if (!(head->bridge = agp_backend_acquire(dev->pdev))) {
-			drm_free(head, sizeof(*head), DRM_MEM_AGPLISTS);
+			kfree(head);
 			return NULL;
 		}
 		agp_copy_info(head->bridge, &head->agp_info);
@@ -412,7 +412,7 @@ struct drm_agp_head *drm_agp_init(struct drm_device *dev)
 		agp_copy_info(head->bridge, &head->agp_info);
 	}
 	if (head->agp_info.chipset == NOT_SUPPORTED) {
-		drm_free(head, sizeof(*head), DRM_MEM_AGPLISTS);
+		kfree(head);
 		return NULL;
 	}
 	INIT_LIST_HEAD(&head->memory);
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index ca7a9ef5007..932b5aa96a6 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -79,7 +79,7 @@ static int drm_add_magic(struct drm_master *master, struct drm_file *priv,
 	struct drm_device *dev = master->minor->dev;
 	DRM_DEBUG("%d\n", magic);
 
-	entry = drm_alloc(sizeof(*entry), DRM_MEM_MAGIC);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return -ENOMEM;
 	memset(entry, 0, sizeof(*entry));
@@ -120,7 +120,7 @@ static int drm_remove_magic(struct drm_master *master, drm_magic_t magic)
 	list_del(&pt->head);
 	mutex_unlock(&dev->struct_mutex);
 
-	drm_free(pt, sizeof(*pt), DRM_MEM_MAGIC);
+	kfree(pt);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 80a257554b3..6246e3f3dad 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -151,7 +151,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	unsigned long user_token;
 	int ret;
 
-	map = drm_alloc(sizeof(*map), DRM_MEM_MAPS);
+	map = kmalloc(sizeof(*map), GFP_KERNEL);
 	if (!map)
 		return -ENOMEM;
 
@@ -165,7 +165,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	 * when processes fork.
 	 */
 	if ((map->flags & _DRM_REMOVABLE) && map->type != _DRM_SHM) {
-		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+		kfree(map);
 		return -EINVAL;
 	}
 	DRM_DEBUG("offset = 0x%08llx, size = 0x%08lx, type = %d\n",
@@ -179,7 +179,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		map->size = PAGE_ALIGN(map->size);
 
 	if ((map->offset & (~(resource_size_t)PAGE_MASK)) || (map->size & (~PAGE_MASK))) {
-		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+		kfree(map);
 		return -EINVAL;
 	}
 	map->mtrr = -1;
@@ -191,7 +191,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 #if !defined(__sparc__) && !defined(__alpha__) && !defined(__ia64__) && !defined(__powerpc64__) && !defined(__x86_64__)
 		if (map->offset + (map->size-1) < map->offset ||
 		    map->offset < virt_to_phys(high_memory)) {
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			return -EINVAL;
 		}
 #endif
@@ -212,7 +212,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 				list->map->size = map->size;
 			}
 
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			*maplist = list;
 			return 0;
 		}
@@ -227,7 +227,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		if (map->type == _DRM_REGISTERS) {
 			map->handle = ioremap(map->offset, map->size);
 			if (!map->handle) {
-				drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+				kfree(map);
 				return -ENOMEM;
 			}
 		}
@@ -243,7 +243,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 				list->map->size = map->size;
 			}
 
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			*maplist = list;
 			return 0;
 		}
@@ -251,7 +251,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		DRM_DEBUG("%lu %d %p\n",
 			  map->size, drm_order(map->size), map->handle);
 		if (!map->handle) {
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			return -ENOMEM;
 		}
 		map->offset = (unsigned long)map->handle;
@@ -259,7 +259,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 			/* Prevent a 2nd X Server from creating a 2nd lock */
 			if (dev->primary->master->lock.hw_lock != NULL) {
 				vfree(map->handle);
-				drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+				kfree(map);
 				return -EBUSY;
 			}
 			dev->sigdata.lock = dev->primary->master->lock.hw_lock = map->handle;	/* Pointer to lock */
@@ -270,7 +270,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		int valid = 0;
 
 		if (!drm_core_has_AGP(dev)) {
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			return -EINVAL;
 		}
 #ifdef __alpha__
@@ -303,7 +303,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 			}
 		}
 		if (!list_empty(&dev->agp->memory) && !valid) {
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			return -EPERM;
 		}
 		DRM_DEBUG("AGP offset = 0x%08llx, size = 0x%08lx\n",
@@ -316,7 +316,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	}
 	case _DRM_SCATTER_GATHER:
 		if (!dev->sg) {
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			return -EINVAL;
 		}
 		map->offset += (unsigned long)dev->sg->virtual;
@@ -328,7 +328,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		 * need to point to a 64bit variable first. */
 		dmah = drm_pci_alloc(dev, map->size, map->size, 0xffffffffUL);
 		if (!dmah) {
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 			return -ENOMEM;
 		}
 		map->handle = dmah->vaddr;
@@ -336,15 +336,15 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		kfree(dmah);
 		break;
 	default:
-		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+		kfree(map);
 		return -EINVAL;
 	}
 
-	list = drm_alloc(sizeof(*list), DRM_MEM_MAPS);
+	list = kmalloc(sizeof(*list), GFP_KERNEL);
 	if (!list) {
 		if (map->type == _DRM_REGISTERS)
 			iounmap(map->handle);
-		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+		kfree(map);
 		return -EINVAL;
 	}
 	memset(list, 0, sizeof(*list));
@@ -362,8 +362,8 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	if (ret) {
 		if (map->type == _DRM_REGISTERS)
 			iounmap(map->handle);
-		drm_free(map, sizeof(*map), DRM_MEM_MAPS);
-		drm_free(list, sizeof(*list), DRM_MEM_MAPS);
+		kfree(map);
+		kfree(list);
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}
@@ -448,7 +448,7 @@ int drm_rmmap_locked(struct drm_device *dev, struct drm_local_map *map)
 			list_del(&r_list->head);
 			drm_ht_remove_key(&dev->map_hash,
 					  r_list->user_token >> PAGE_SHIFT);
-			drm_free(r_list, sizeof(*r_list), DRM_MEM_MAPS);
+			kfree(r_list);
 			found = 1;
 			break;
 		}
@@ -491,7 +491,7 @@ int drm_rmmap_locked(struct drm_device *dev, struct drm_local_map *map)
 		DRM_ERROR("tried to rmmap GEM object\n");
 		break;
 	}
-	drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+	kfree(map);
 
 	return 0;
 }
@@ -582,24 +582,16 @@ static void drm_cleanup_buf_error(struct drm_device * dev,
 				drm_pci_free(dev, entry->seglist[i]);
 			}
 		}
-		drm_free(entry->seglist,
-			 entry->seg_count *
-			 sizeof(*entry->seglist), DRM_MEM_SEGS);
+		kfree(entry->seglist);
 
 		entry->seg_count = 0;
 	}
 
 	if (entry->buf_count) {
 		for (i = 0; i < entry->buf_count; i++) {
-			if (entry->buflist[i].dev_private) {
-				drm_free(entry->buflist[i].dev_private,
-					 entry->buflist[i].dev_priv_size,
-					 DRM_MEM_BUFS);
-			}
+			kfree(entry->buflist[i].dev_private);
 		}
-		drm_free(entry->buflist,
-			 entry->buf_count *
-			 sizeof(*entry->buflist), DRM_MEM_BUFS);
+		kfree(entry->buflist);
 
 		entry->buf_count = 0;
 	}
@@ -698,8 +690,7 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request)
 		return -EINVAL;
 	}
 
-	entry->buflist = drm_alloc(count * sizeof(*entry->buflist),
-				   DRM_MEM_BUFS);
+	entry->buflist = kmalloc(count * sizeof(*entry->buflist), GFP_KERNEL);
 	if (!entry->buflist) {
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
@@ -729,7 +720,7 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request)
 		buf->file_priv = NULL;
 
 		buf->dev_priv_size = dev->driver->dev_priv_size;
-		buf->dev_private = drm_alloc(buf->dev_priv_size, DRM_MEM_BUFS);
+		buf->dev_private = kmalloc(buf->dev_priv_size, GFP_KERNEL);
 		if (!buf->dev_private) {
 			/* Set count correctly so we free the proper amount. */
 			entry->buf_count = count;
@@ -749,10 +740,9 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request)
 
 	DRM_DEBUG("byte_count: %d\n", byte_count);
 
-	temp_buflist = drm_realloc(dma->buflist,
-				   dma->buf_count * sizeof(*dma->buflist),
-				   (dma->buf_count + entry->buf_count)
-				   * sizeof(*dma->buflist), DRM_MEM_BUFS);
+	temp_buflist = krealloc(dma->buflist,
+				(dma->buf_count + entry->buf_count) *
+				sizeof(*dma->buflist), GFP_KERNEL);
 	if (!temp_buflist) {
 		/* Free the entry because it isn't valid */
 		drm_cleanup_buf_error(dev, entry);
@@ -854,8 +844,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 		return -EINVAL;
 	}
 
-	entry->buflist = drm_alloc(count * sizeof(*entry->buflist),
-				   DRM_MEM_BUFS);
+	entry->buflist = kmalloc(count * sizeof(*entry->buflist), GFP_KERNEL);
 	if (!entry->buflist) {
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
@@ -863,11 +852,9 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 	}
 	memset(entry->buflist, 0, count * sizeof(*entry->buflist));
 
-	entry->seglist = drm_alloc(count * sizeof(*entry->seglist),
-				   DRM_MEM_SEGS);
+	entry->seglist = kmalloc(count * sizeof(*entry->seglist), GFP_KERNEL);
 	if (!entry->seglist) {
-		drm_free(entry->buflist,
-			 count * sizeof(*entry->buflist), DRM_MEM_BUFS);
+		kfree(entry->buflist);
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
 		return -ENOMEM;
@@ -877,13 +864,11 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 	/* Keep the original pagelist until we know all the allocations
 	 * have succeeded
 	 */
-	temp_pagelist = drm_alloc((dma->page_count + (count << page_order))
-				  * sizeof(*dma->pagelist), DRM_MEM_PAGES);
+	temp_pagelist = kmalloc((dma->page_count + (count << page_order)) *
+			       sizeof(*dma->pagelist), GFP_KERNEL);
 	if (!temp_pagelist) {
-		drm_free(entry->buflist,
-			 count * sizeof(*entry->buflist), DRM_MEM_BUFS);
-		drm_free(entry->seglist,
-			 count * sizeof(*entry->seglist), DRM_MEM_SEGS);
+		kfree(entry->buflist);
+		kfree(entry->seglist);
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
 		return -ENOMEM;
@@ -907,9 +892,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 			entry->buf_count = count;
 			entry->seg_count = count;
 			drm_cleanup_buf_error(dev, entry);
-			drm_free(temp_pagelist,
-				 (dma->page_count + (count << page_order))
-				 * sizeof(*dma->pagelist), DRM_MEM_PAGES);
+			kfree(temp_pagelist);
 			mutex_unlock(&dev->struct_mutex);
 			atomic_dec(&dev->buf_alloc);
 			return -ENOMEM;
@@ -940,18 +923,14 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 			buf->file_priv = NULL;
 
 			buf->dev_priv_size = dev->driver->dev_priv_size;
-			buf->dev_private = drm_alloc(buf->dev_priv_size,
-						     DRM_MEM_BUFS);
+			buf->dev_private = kmalloc(buf->dev_priv_size,
+						  GFP_KERNEL);
 			if (!buf->dev_private) {
 				/* Set count correctly so we free the proper amount. */
 				entry->buf_count = count;
 				entry->seg_count = count;
 				drm_cleanup_buf_error(dev, entry);
-				drm_free(temp_pagelist,
-					 (dma->page_count +
-					  (count << page_order))
-					 * sizeof(*dma->pagelist),
-					 DRM_MEM_PAGES);
+				kfree(temp_pagelist);
 				mutex_unlock(&dev->struct_mutex);
 				atomic_dec(&dev->buf_alloc);
 				return -ENOMEM;
@@ -964,16 +943,13 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 		byte_count += PAGE_SIZE << page_order;
 	}
 
-	temp_buflist = drm_realloc(dma->buflist,
-				   dma->buf_count * sizeof(*dma->buflist),
-				   (dma->buf_count + entry->buf_count)
-				   * sizeof(*dma->buflist), DRM_MEM_BUFS);
+	temp_buflist = krealloc(dma->buflist,
+				(dma->buf_count + entry->buf_count) *
+				sizeof(*dma->buflist), GFP_KERNEL);
 	if (!temp_buflist) {
 		/* Free the entry because it isn't valid */
 		drm_cleanup_buf_error(dev, entry);
-		drm_free(temp_pagelist,
-			 (dma->page_count + (count << page_order))
-			 * sizeof(*dma->pagelist), DRM_MEM_PAGES);
+		kfree(temp_pagelist);
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
 		return -ENOMEM;
@@ -988,9 +964,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 	 * with the new one.
 	 */
 	if (dma->page_count) {
-		drm_free(dma->pagelist,
-			 dma->page_count * sizeof(*dma->pagelist),
-			 DRM_MEM_PAGES);
+		kfree(dma->pagelist);
 	}
 	dma->pagelist = temp_pagelist;
 
@@ -1086,8 +1060,8 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request
 		return -EINVAL;
 	}
 
-	entry->buflist = drm_alloc(count * sizeof(*entry->buflist),
-				   DRM_MEM_BUFS);
+	entry->buflist = kmalloc(count * sizeof(*entry->buflist),
+				GFP_KERNEL);
 	if (!entry->buflist) {
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
@@ -1118,7 +1092,7 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request
 		buf->file_priv = NULL;
 
 		buf->dev_priv_size = dev->driver->dev_priv_size;
-		buf->dev_private = drm_alloc(buf->dev_priv_size, DRM_MEM_BUFS);
+		buf->dev_private = kmalloc(buf->dev_priv_size, GFP_KERNEL);
 		if (!buf->dev_private) {
 			/* Set count correctly so we free the proper amount. */
 			entry->buf_count = count;
@@ -1139,10 +1113,9 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request
 
 	DRM_DEBUG("byte_count: %d\n", byte_count);
 
-	temp_buflist = drm_realloc(dma->buflist,
-				   dma->buf_count * sizeof(*dma->buflist),
-				   (dma->buf_count + entry->buf_count)
-				   * sizeof(*dma->buflist), DRM_MEM_BUFS);
+	temp_buflist = krealloc(dma->buflist,
+				(dma->buf_count + entry->buf_count) *
+				sizeof(*dma->buflist), GFP_KERNEL);
 	if (!temp_buflist) {
 		/* Free the entry because it isn't valid */
 		drm_cleanup_buf_error(dev, entry);
@@ -1248,8 +1221,8 @@ static int drm_addbufs_fb(struct drm_device * dev, struct drm_buf_desc * request
 		return -EINVAL;
 	}
 
-	entry->buflist = drm_alloc(count * sizeof(*entry->buflist),
-				   DRM_MEM_BUFS);
+	entry->buflist = kmalloc(count * sizeof(*entry->buflist),
+				GFP_KERNEL);
 	if (!entry->buflist) {
 		mutex_unlock(&dev->struct_mutex);
 		atomic_dec(&dev->buf_alloc);
@@ -1279,7 +1252,7 @@ static int drm_addbufs_fb(struct drm_device * dev, struct drm_buf_desc * request
 		buf->file_priv = NULL;
 
 		buf->dev_priv_size = dev->driver->dev_priv_size;
-		buf->dev_private = drm_alloc(buf->dev_priv_size, DRM_MEM_BUFS);
+		buf->dev_private = kmalloc(buf->dev_priv_size, GFP_KERNEL);
 		if (!buf->dev_private) {
 			/* Set count correctly so we free the proper amount. */
 			entry->buf_count = count;
@@ -1299,10 +1272,9 @@ static int drm_addbufs_fb(struct drm_device * dev, struct drm_buf_desc * request
 
 	DRM_DEBUG("byte_count: %d\n", byte_count);
 
-	temp_buflist = drm_realloc(dma->buflist,
-				   dma->buf_count * sizeof(*dma->buflist),
-				   (dma->buf_count + entry->buf_count)
-				   * sizeof(*dma->buflist), DRM_MEM_BUFS);
+	temp_buflist = krealloc(dma->buflist,
+				(dma->buf_count + entry->buf_count) *
+				sizeof(*dma->buflist), GFP_KERNEL);
 	if (!temp_buflist) {
 		/* Free the entry because it isn't valid */
 		drm_cleanup_buf_error(dev, entry);
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index 7d1e53c10d4..2607753a320 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -341,7 +341,7 @@ int drm_addctx(struct drm_device *dev, void *data,
 			}
 	}
 
-	ctx_entry = drm_alloc(sizeof(*ctx_entry), DRM_MEM_CTXLIST);
+	ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL);
 	if (!ctx_entry) {
 		DRM_DEBUG("out of memory\n");
 		return -ENOMEM;
@@ -456,7 +456,7 @@ int drm_rmctx(struct drm_device *dev, void *data,
 		list_for_each_entry_safe(pos, n, &dev->ctxlist, head) {
 			if (pos->handle == ctx->handle) {
 				list_del(&pos->head);
-				drm_free(pos, sizeof(*pos), DRM_MEM_CTXLIST);
+				kfree(pos);
 				--dev->ctx_count;
 			}
 		}
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 6ce0e2667a8..2960b6d7345 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -100,15 +100,13 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
 		    (dev->driver->driver_features & features) != features)
 			continue;
 
-		tmp = drm_alloc(sizeof(struct drm_info_node),
-				_DRM_DRIVER);
+		tmp = kmalloc(sizeof(struct drm_info_node), GFP_KERNEL);
 		ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
 					  root, tmp, &drm_debugfs_fops);
 		if (!ent) {
 			DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n",
 				  name, files[i].name);
-			drm_free(tmp, sizeof(struct drm_info_node),
-				 _DRM_DRIVER);
+			kfree(tmp);
 			ret = -1;
 			goto fail;
 		}
@@ -196,8 +194,7 @@ int drm_debugfs_remove_files(struct drm_info_list *files, int count,
 			if (tmp->info_ent == &files[i]) {
 				debugfs_remove(tmp->dent);
 				list_del(pos);
-				drm_free(tmp, sizeof(struct drm_info_node),
-					 _DRM_DRIVER);
+				kfree(tmp);
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c
index 7a8e2fba467..13f1537413f 100644
--- a/drivers/gpu/drm/drm_dma.c
+++ b/drivers/gpu/drm/drm_dma.c
@@ -47,7 +47,7 @@ int drm_dma_setup(struct drm_device *dev)
 {
 	int i;
 
-	dev->dma = drm_alloc(sizeof(*dev->dma), DRM_MEM_DRIVER);
+	dev->dma = kmalloc(sizeof(*dev->dma), GFP_KERNEL);
 	if (!dev->dma)
 		return -ENOMEM;
 
@@ -88,36 +88,19 @@ void drm_dma_takedown(struct drm_device *dev)
 					drm_pci_free(dev, dma->bufs[i].seglist[j]);
 				}
 			}
-			drm_free(dma->bufs[i].seglist,
-				 dma->bufs[i].seg_count
-				 * sizeof(*dma->bufs[0].seglist), DRM_MEM_SEGS);
+			kfree(dma->bufs[i].seglist);
 		}
 		if (dma->bufs[i].buf_count) {
 			for (j = 0; j < dma->bufs[i].buf_count; j++) {
-				if (dma->bufs[i].buflist[j].dev_private) {
-					drm_free(dma->bufs[i].buflist[j].
-						 dev_private,
-						 dma->bufs[i].buflist[j].
-						 dev_priv_size, DRM_MEM_BUFS);
-				}
+				kfree(dma->bufs[i].buflist[j].dev_private);
 			}
-			drm_free(dma->bufs[i].buflist,
-				 dma->bufs[i].buf_count *
-				 sizeof(*dma->bufs[0].buflist), DRM_MEM_BUFS);
+			kfree(dma->bufs[i].buflist);
 		}
 	}
 
-	if (dma->buflist) {
-		drm_free(dma->buflist,
-			 dma->buf_count * sizeof(*dma->buflist), DRM_MEM_BUFS);
-	}
-
-	if (dma->pagelist) {
-		drm_free(dma->pagelist,
-			 dma->page_count * sizeof(*dma->pagelist),
-			 DRM_MEM_PAGES);
-	}
-	drm_free(dev->dma, sizeof(*dev->dma), DRM_MEM_DRIVER);
+	kfree(dma->buflist);
+	kfree(dma->pagelist);
+	kfree(dev->dma);
 	dev->dma = NULL;
 }
 
diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c
index 80be1cab62a..c53c9768cc1 100644
--- a/drivers/gpu/drm/drm_drawable.c
+++ b/drivers/gpu/drm/drm_drawable.c
@@ -85,9 +85,8 @@ int drm_rmdraw(struct drm_device *dev, void *data, struct drm_file *file_priv)
 		spin_unlock_irqrestore(&dev->drw_lock, irqflags);
 		return -EINVAL;
 	}
-	drm_free(info->rects, info->num_rects * sizeof(struct drm_clip_rect),
-			DRM_MEM_BUFS);
-	drm_free(info, sizeof(struct drm_drawable_info), DRM_MEM_BUFS);
+	kfree(info->rects);
+	kfree(info);
 
 	idr_remove(&dev->drw_idr, draw->handle);
 
@@ -106,12 +105,12 @@ int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file
 
 	info = idr_find(&dev->drw_idr, update->handle);
 	if (!info) {
-		info = drm_calloc(1, sizeof(*info), DRM_MEM_BUFS);
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 		if (IS_ERR(idr_replace(&dev->drw_idr, info, update->handle))) {
 			DRM_ERROR("No such drawable %d\n", update->handle);
-			drm_free(info, sizeof(*info), DRM_MEM_BUFS);
+			kfree(info);
 			return -EINVAL;
 		}
 	}
@@ -121,8 +120,9 @@ int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file
 		if (update->num == 0)
 			rects = NULL;
 		else if (update->num != info->num_rects) {
-			rects = drm_alloc(update->num * sizeof(struct drm_clip_rect),
-					 DRM_MEM_BUFS);
+			rects = kmalloc(update->num *
+					sizeof(struct drm_clip_rect),
+					GFP_KERNEL);
 		} else
 			rects = info->rects;
 
@@ -145,8 +145,7 @@ int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file
 		spin_lock_irqsave(&dev->drw_lock, irqflags);
 
 		if (rects != info->rects) {
-			drm_free(info->rects, info->num_rects *
-				 sizeof(struct drm_clip_rect), DRM_MEM_BUFS);
+			kfree(info->rects);
 		}
 
 		info->rects = rects;
@@ -166,8 +165,7 @@ int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file
 
 error:
 	if (rects != info->rects)
-		drm_free(rects, update->num * sizeof(struct drm_clip_rect),
-			 DRM_MEM_BUFS);
+		kfree(rects);
 
 	return err;
 }
@@ -186,9 +184,8 @@ static int drm_drawable_free(int idr, void *p, void *data)
 	struct drm_drawable_info *info = p;
 
 	if (info) {
-		drm_free(info->rects, info->num_rects *
-			 sizeof(struct drm_clip_rect), DRM_MEM_BUFS);
-		drm_free(info, sizeof(*info), DRM_MEM_BUFS);
+		kfree(info->rects);
+		kfree(info);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 1bf7efd8d33..b39d7bfc0c9 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -189,7 +189,7 @@ int drm_lastclose(struct drm_device * dev)
 			if (entry->bound)
 				drm_unbind_agp(entry->memory);
 			drm_free_agp(entry->memory, entry->pages);
-			drm_free(entry, sizeof(*entry), DRM_MEM_AGPLISTS);
+			kfree(entry);
 		}
 		INIT_LIST_HEAD(&dev->agp->memory);
 
@@ -208,21 +208,15 @@ int drm_lastclose(struct drm_device * dev)
 	/* Clear vma list (only built for debugging) */
 	list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) {
 		list_del(&vma->head);
-		drm_free(vma, sizeof(*vma), DRM_MEM_VMAS);
+		kfree(vma);
 	}
 
 	if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE) && dev->queuelist) {
 		for (i = 0; i < dev->queue_count; i++) {
-			if (dev->queuelist[i]) {
-				drm_free(dev->queuelist[i],
-					 sizeof(*dev->queuelist[0]),
-					 DRM_MEM_QUEUES);
-				dev->queuelist[i] = NULL;
-			}
+			kfree(dev->queuelist[i]);
+			dev->queuelist[i] = NULL;
 		}
-		drm_free(dev->queuelist,
-			 dev->queue_slots * sizeof(*dev->queuelist),
-			 DRM_MEM_QUEUES);
+		kfree(dev->queuelist);
 		dev->queuelist = NULL;
 	}
 	dev->queue_count = 0;
@@ -344,8 +338,6 @@ static int __init drm_core_init(void)
 		goto err_p3;
 	}
 
-	drm_mem_init();
-
 	DRM_INFO("Initialized %s %d.%d.%d %s\n",
 		 CORE_NAME, CORE_MAJOR, CORE_MINOR, CORE_PATCHLEVEL, CORE_DATE);
 	return 0;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 09a3571c990..251bc0e3b5e 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -240,7 +240,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 
 	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor_id);
 
-	priv = drm_alloc(sizeof(*priv), DRM_MEM_FILES);
+	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
@@ -328,7 +328,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 
 	return 0;
       out_free:
-	drm_free(priv, sizeof(*priv), DRM_MEM_FILES);
+	kfree(priv);
 	filp->private_data = NULL;
 	return ret;
 }
@@ -471,7 +471,7 @@ int drm_release(struct inode *inode, struct file *filp)
 				drm_ctxbitmap_free(dev, pos->handle);
 
 				list_del(&pos->head);
-				drm_free(pos, sizeof(*pos), DRM_MEM_CTXLIST);
+				kfree(pos);
 				--dev->ctx_count;
 			}
 		}
@@ -516,7 +516,7 @@ int drm_release(struct inode *inode, struct file *filp)
 
 	if (dev->driver->postclose)
 		dev->driver->postclose(dev, file_priv);
-	drm_free(file_priv, sizeof(*file_priv), DRM_MEM_FILES);
+	kfree(file_priv);
 
 	/* ========================================================
 	 * End inline drm_release
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ec43005100d..8104ecaea26 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -89,7 +89,7 @@ drm_gem_init(struct drm_device *dev)
 	atomic_set(&dev->gtt_count, 0);
 	atomic_set(&dev->gtt_memory, 0);
 
-	mm = drm_calloc(1, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	mm = kzalloc(sizeof(struct drm_gem_mm), GFP_KERNEL);
 	if (!mm) {
 		DRM_ERROR("out of memory\n");
 		return -ENOMEM;
@@ -98,14 +98,14 @@ drm_gem_init(struct drm_device *dev)
 	dev->mm_private = mm;
 
 	if (drm_ht_create(&mm->offset_hash, 19)) {
-		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		kfree(mm);
 		return -ENOMEM;
 	}
 
 	if (drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START,
 			DRM_FILE_PAGE_OFFSET_SIZE)) {
 		drm_ht_remove(&mm->offset_hash);
-		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		kfree(mm);
 		return -ENOMEM;
 	}
 
@@ -119,7 +119,7 @@ drm_gem_destroy(struct drm_device *dev)
 
 	drm_mm_takedown(&mm->offset_manager);
 	drm_ht_remove(&mm->offset_hash);
-	drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	kfree(mm);
 	dev->mm_private = NULL;
 }
 
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index ac35145c3e2..f36b21c5b2e 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -46,8 +46,7 @@ int drm_ht_create(struct drm_open_hash *ht, unsigned int order)
 	ht->table = NULL;
 	ht->use_vmalloc = ((ht->size * sizeof(*ht->table)) > PAGE_SIZE);
 	if (!ht->use_vmalloc) {
-		ht->table = drm_calloc(ht->size, sizeof(*ht->table),
-				       DRM_MEM_HASHTAB);
+		ht->table = kcalloc(ht->size, sizeof(*ht->table), GFP_KERNEL);
 	}
 	if (!ht->table) {
 		ht->use_vmalloc = 1;
@@ -200,8 +199,7 @@ void drm_ht_remove(struct drm_open_hash *ht)
 		if (ht->use_vmalloc)
 			vfree(ht->table);
 		else
-			drm_free(ht->table, ht->size * sizeof(*ht->table),
-				 DRM_MEM_HASHTAB);
+			kfree(ht->table);
 		ht->table = NULL;
 	}
 }
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 1fad76289e6..9b9ff46c237 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -93,7 +93,7 @@ int drm_setunique(struct drm_device *dev, void *data,
 
 	master->unique_len = u->unique_len;
 	master->unique_size = u->unique_len + 1;
-	master->unique = drm_alloc(master->unique_size, DRM_MEM_DRIVER);
+	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
 	if (!master->unique)
 		return -ENOMEM;
 	if (copy_from_user(master->unique, u->unique, master->unique_len))
@@ -101,9 +101,8 @@ int drm_setunique(struct drm_device *dev, void *data,
 
 	master->unique[master->unique_len] = '\0';
 
-	dev->devname =
-	    drm_alloc(strlen(dev->driver->pci_driver.name) +
-		      strlen(master->unique) + 2, DRM_MEM_DRIVER);
+	dev->devname = kmalloc(strlen(dev->driver->pci_driver.name) +
+			       strlen(master->unique) + 2, GFP_KERNEL);
 	if (!dev->devname)
 		return -ENOMEM;
 
@@ -138,7 +137,7 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 
 	master->unique_len = 40;
 	master->unique_size = master->unique_len;
-	master->unique = drm_alloc(master->unique_size, DRM_MEM_DRIVER);
+	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
 	if (master->unique == NULL)
 		return -ENOMEM;
 
@@ -152,9 +151,8 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 	else
 		master->unique_len = len;
 
-	dev->devname =
-	    drm_alloc(strlen(dev->driver->pci_driver.name) + master->unique_len +
-		      2, DRM_MEM_DRIVER);
+	dev->devname = kmalloc(strlen(dev->driver->pci_driver.name) +
+			       master->unique_len + 2, GFP_KERNEL);
 	if (dev->devname == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index fc8e5acd9d9..b4a3dbcebe9 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -104,21 +104,13 @@ void drm_vblank_cleanup(struct drm_device *dev)
 
 	vblank_disable_fn((unsigned long)dev);
 
-	drm_free(dev->vbl_queue, sizeof(*dev->vbl_queue) * dev->num_crtcs,
-		 DRM_MEM_DRIVER);
-	drm_free(dev->_vblank_count, sizeof(*dev->_vblank_count) *
-		 dev->num_crtcs, DRM_MEM_DRIVER);
-	drm_free(dev->vblank_refcount, sizeof(*dev->vblank_refcount) *
-		 dev->num_crtcs, DRM_MEM_DRIVER);
-	drm_free(dev->vblank_enabled, sizeof(*dev->vblank_enabled) *
-		 dev->num_crtcs, DRM_MEM_DRIVER);
-	drm_free(dev->last_vblank, sizeof(*dev->last_vblank) * dev->num_crtcs,
-		 DRM_MEM_DRIVER);
-	drm_free(dev->last_vblank_wait,
-		 sizeof(*dev->last_vblank_wait) * dev->num_crtcs,
-		 DRM_MEM_DRIVER);
-	drm_free(dev->vblank_inmodeset, sizeof(*dev->vblank_inmodeset) *
-		 dev->num_crtcs, DRM_MEM_DRIVER);
+	kfree(dev->vbl_queue);
+	kfree(dev->_vblank_count);
+	kfree(dev->vblank_refcount);
+	kfree(dev->vblank_enabled);
+	kfree(dev->last_vblank);
+	kfree(dev->last_vblank_wait);
+	kfree(dev->vblank_inmodeset);
 
 	dev->num_crtcs = 0;
 }
@@ -132,37 +124,33 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	spin_lock_init(&dev->vbl_lock);
 	dev->num_crtcs = num_crtcs;
 
-	dev->vbl_queue = drm_alloc(sizeof(wait_queue_head_t) * num_crtcs,
-				   DRM_MEM_DRIVER);
+	dev->vbl_queue = kmalloc(sizeof(wait_queue_head_t) * num_crtcs,
+				 GFP_KERNEL);
 	if (!dev->vbl_queue)
 		goto err;
 
-	dev->_vblank_count = drm_alloc(sizeof(atomic_t) * num_crtcs,
-				      DRM_MEM_DRIVER);
+	dev->_vblank_count = kmalloc(sizeof(atomic_t) * num_crtcs, GFP_KERNEL);
 	if (!dev->_vblank_count)
 		goto err;
 
-	dev->vblank_refcount = drm_alloc(sizeof(atomic_t) * num_crtcs,
-					 DRM_MEM_DRIVER);
+	dev->vblank_refcount = kmalloc(sizeof(atomic_t) * num_crtcs,
+				       GFP_KERNEL);
 	if (!dev->vblank_refcount)
 		goto err;
 
-	dev->vblank_enabled = drm_calloc(num_crtcs, sizeof(int),
-					 DRM_MEM_DRIVER);
+	dev->vblank_enabled = kcalloc(num_crtcs, sizeof(int), GFP_KERNEL);
 	if (!dev->vblank_enabled)
 		goto err;
 
-	dev->last_vblank = drm_calloc(num_crtcs, sizeof(u32), DRM_MEM_DRIVER);
+	dev->last_vblank = kcalloc(num_crtcs, sizeof(u32), GFP_KERNEL);
 	if (!dev->last_vblank)
 		goto err;
 
-	dev->last_vblank_wait = drm_calloc(num_crtcs, sizeof(u32),
-					   DRM_MEM_DRIVER);
+	dev->last_vblank_wait = kcalloc(num_crtcs, sizeof(u32), GFP_KERNEL);
 	if (!dev->last_vblank_wait)
 		goto err;
 
-	dev->vblank_inmodeset = drm_calloc(num_crtcs, sizeof(int),
-					 DRM_MEM_DRIVER);
+	dev->vblank_inmodeset = kcalloc(num_crtcs, sizeof(int), GFP_KERNEL);
 	if (!dev->vblank_inmodeset)
 		goto err;
 
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 0c707f533ea..0a436184dd8 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -36,15 +36,6 @@
 #include <linux/highmem.h>
 #include "drmP.h"
 
-#ifdef DEBUG_MEMORY
-#include "drm_memory_debug.h"
-#else
-
-/** No-op. */
-void drm_mem_init(void)
-{
-}
-
 /**
  * Called when "/proc/dri/%dev%/mem" is read.
  *
@@ -64,20 +55,6 @@ int drm_mem_info(char *buf, char **start, off_t offset,
 	return 0;
 }
 
-/** Wrapper around kmalloc() and kfree() */
-void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area)
-{
-	void *pt;
-
-	if (!(pt = kmalloc(size, GFP_KERNEL)))
-		return NULL;
-	if (oldpt && oldsize) {
-		memcpy(pt, oldpt, oldsize);
-		kfree(oldpt);
-	}
-	return pt;
-}
-
 #if __OS_HAS_AGP
 static void *agp_remap(unsigned long offset, unsigned long size,
 		       struct drm_device * dev)
@@ -157,8 +134,6 @@ static inline void *agp_remap(unsigned long offset, unsigned long size,
 
 #endif				/* agp */
 
-#endif				/* debug_memory */
-
 void drm_core_ioremap(struct drm_local_map *map, struct drm_device *dev)
 {
 	if (drm_core_has_AGP(dev) &&
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index b55d5bc6ea6..577094fb199 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -55,17 +55,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
 	unsigned long addr;
 	size_t sz;
 #endif
-#ifdef DRM_DEBUG_MEMORY
-	int area = DRM_MEM_DMA;
-
-	spin_lock(&drm_mem_lock);
-	if ((drm_ram_used >> PAGE_SHIFT)
-	    > (DRM_RAM_PERCENT * drm_ram_available) / 100) {
-		spin_unlock(&drm_mem_lock);
-		return 0;
-	}
-	spin_unlock(&drm_mem_lock);
-#endif
 
 	/* pci_alloc_consistent only guarantees alignment to the smallest
 	 * PAGE_SIZE order which is greater than or equal to the requested size.
@@ -86,26 +75,10 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
 	dmah->size = size;
 	dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size, &dmah->busaddr, GFP_KERNEL | __GFP_COMP);
 
-#ifdef DRM_DEBUG_MEMORY
-	if (dmah->vaddr == NULL) {
-		spin_lock(&drm_mem_lock);
-		++drm_mem_stats[area].fail_count;
-		spin_unlock(&drm_mem_lock);
-		kfree(dmah);
-		return NULL;
-	}
-
-	spin_lock(&drm_mem_lock);
-	++drm_mem_stats[area].succeed_count;
-	drm_mem_stats[area].bytes_allocated += size;
-	drm_ram_used += size;
-	spin_unlock(&drm_mem_lock);
-#else
 	if (dmah->vaddr == NULL) {
 		kfree(dmah);
 		return NULL;
 	}
-#endif
 
 	memset(dmah->vaddr, 0, size);
 
@@ -132,17 +105,8 @@ void __drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 	unsigned long addr;
 	size_t sz;
 #endif
-#ifdef DRM_DEBUG_MEMORY
-	int area = DRM_MEM_DMA;
-	int alloc_count;
-	int free_count;
-#endif
 
-	if (!dmah->vaddr) {
-#ifdef DRM_DEBUG_MEMORY
-		DRM_MEM_ERROR(area, "Attempt to free address 0\n");
-#endif
-	} else {
+	if (dmah->vaddr) {
 		/* XXX - Is virt_to_page() legal for consistent mem? */
 		/* Unreserve */
 		for (addr = (unsigned long)dmah->vaddr, sz = dmah->size;
@@ -152,21 +116,6 @@ void __drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 		dma_free_coherent(&dev->pdev->dev, dmah->size, dmah->vaddr,
 				  dmah->busaddr);
 	}
-
-#ifdef DRM_DEBUG_MEMORY
-	spin_lock(&drm_mem_lock);
-	free_count = ++drm_mem_stats[area].free_count;
-	alloc_count = drm_mem_stats[area].succeed_count;
-	drm_mem_stats[area].bytes_freed += size;
-	drm_ram_used -= size;
-	spin_unlock(&drm_mem_lock);
-	if (free_count > alloc_count) {
-		DRM_MEM_ERROR(area,
-			      "Excess frees: %d frees, %d allocs\n",
-			      free_count, alloc_count);
-	}
-#endif
-
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index bae5391165a..bbd4b3d1074 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -105,13 +105,12 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 		    (dev->driver->driver_features & features) != features)
 			continue;
 
-		tmp = drm_alloc(sizeof(struct drm_info_node), _DRM_DRIVER);
+		tmp = kmalloc(sizeof(struct drm_info_node), GFP_KERNEL);
 		ent = create_proc_entry(files[i].name, S_IFREG | S_IRUGO, root);
 		if (!ent) {
 			DRM_ERROR("Cannot create /proc/dri/%s/%s\n",
 				  name, files[i].name);
-			drm_free(tmp, sizeof(struct drm_info_node),
-				 _DRM_DRIVER);
+			kfree(tmp);
 			ret = -1;
 			goto fail;
 		}
@@ -192,8 +191,7 @@ int drm_proc_remove_files(struct drm_info_list *files, int count,
 				remove_proc_entry(files[i].name,
 						  minor->proc_root);
 				list_del(pos);
-				drm_free(tmp, sizeof(struct drm_info_node),
-					 _DRM_DRIVER);
+				kfree(tmp);
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index b2b0f3d4171..c7823c863d4 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -58,11 +58,9 @@ void drm_sg_cleanup(struct drm_sg_mem * entry)
 
 	vfree(entry->virtual);
 
-	drm_free(entry->busaddr,
-		 entry->pages * sizeof(*entry->busaddr), DRM_MEM_PAGES);
-	drm_free(entry->pagelist,
-		 entry->pages * sizeof(*entry->pagelist), DRM_MEM_PAGES);
-	drm_free(entry, sizeof(*entry), DRM_MEM_SGLISTS);
+	kfree(entry->busaddr);
+	kfree(entry->pagelist);
+	kfree(entry);
 }
 
 #ifdef _LP64
@@ -84,7 +82,7 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
 	if (dev->sg)
 		return -EINVAL;
 
-	entry = drm_alloc(sizeof(*entry), DRM_MEM_SGLISTS);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return -ENOMEM;
 
@@ -93,34 +91,27 @@ int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
 	DRM_DEBUG("size=%ld pages=%ld\n", request->size, pages);
 
 	entry->pages = pages;
-	entry->pagelist = drm_alloc(pages * sizeof(*entry->pagelist),
-				    DRM_MEM_PAGES);
+	entry->pagelist = kmalloc(pages * sizeof(*entry->pagelist), GFP_KERNEL);
 	if (!entry->pagelist) {
-		drm_free(entry, sizeof(*entry), DRM_MEM_SGLISTS);
+		kfree(entry);
 		return -ENOMEM;
 	}
 
 	memset(entry->pagelist, 0, pages * sizeof(*entry->pagelist));
 
-	entry->busaddr = drm_alloc(pages * sizeof(*entry->busaddr),
-				   DRM_MEM_PAGES);
+	entry->busaddr = kmalloc(pages * sizeof(*entry->busaddr), GFP_KERNEL);
 	if (!entry->busaddr) {
-		drm_free(entry->pagelist,
-			 entry->pages * sizeof(*entry->pagelist),
-			 DRM_MEM_PAGES);
-		drm_free(entry, sizeof(*entry), DRM_MEM_SGLISTS);
+		kfree(entry->pagelist);
+		kfree(entry);
 		return -ENOMEM;
 	}
 	memset((void *)entry->busaddr, 0, pages * sizeof(*entry->busaddr));
 
 	entry->virtual = drm_vmalloc_dma(pages << PAGE_SHIFT);
 	if (!entry->virtual) {
-		drm_free(entry->busaddr,
-			 entry->pages * sizeof(*entry->busaddr), DRM_MEM_PAGES);
-		drm_free(entry->pagelist,
-			 entry->pages * sizeof(*entry->pagelist),
-			 DRM_MEM_PAGES);
-		drm_free(entry, sizeof(*entry), DRM_MEM_SGLISTS);
+		kfree(entry->busaddr);
+		kfree(entry->pagelist);
+		kfree(entry);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/drm_sman.c b/drivers/gpu/drm/drm_sman.c
index 926f146390c..463aed9403d 100644
--- a/drivers/gpu/drm/drm_sman.c
+++ b/drivers/gpu/drm/drm_sman.c
@@ -48,9 +48,7 @@ void drm_sman_takedown(struct drm_sman * sman)
 {
 	drm_ht_remove(&sman->user_hash_tab);
 	drm_ht_remove(&sman->owner_hash_tab);
-	if (sman->mm)
-		drm_free(sman->mm, sman->num_managers * sizeof(*sman->mm),
-			 DRM_MEM_MM);
+	kfree(sman->mm);
 }
 
 EXPORT_SYMBOL(drm_sman_takedown);
@@ -61,8 +59,9 @@ drm_sman_init(struct drm_sman * sman, unsigned int num_managers,
 {
 	int ret = 0;
 
-	sman->mm = (struct drm_sman_mm *) drm_calloc(num_managers, sizeof(*sman->mm),
-						DRM_MEM_MM);
+	sman->mm = (struct drm_sman_mm *) kcalloc(num_managers,
+						  sizeof(*sman->mm),
+						  GFP_KERNEL);
 	if (!sman->mm) {
 		ret = -ENOMEM;
 		goto out;
@@ -78,7 +77,7 @@ drm_sman_init(struct drm_sman * sman, unsigned int num_managers,
 
 	drm_ht_remove(&sman->owner_hash_tab);
 out1:
-	drm_free(sman->mm, num_managers * sizeof(*sman->mm), DRM_MEM_MM);
+	kfree(sman->mm);
 out:
 	return ret;
 }
@@ -110,7 +109,7 @@ static void drm_sman_mm_destroy(void *private)
 {
 	struct drm_mm *mm = (struct drm_mm *) private;
 	drm_mm_takedown(mm);
-	drm_free(mm, sizeof(*mm), DRM_MEM_MM);
+	kfree(mm);
 }
 
 static unsigned long drm_sman_mm_offset(void *private, void *ref)
@@ -130,7 +129,7 @@ drm_sman_set_range(struct drm_sman * sman, unsigned int manager,
 	BUG_ON(manager >= sman->num_managers);
 
 	sman_mm = &sman->mm[manager];
-	mm = drm_calloc(1, sizeof(*mm), DRM_MEM_MM);
+	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
 	if (!mm) {
 		return -ENOMEM;
 	}
@@ -138,7 +137,7 @@ drm_sman_set_range(struct drm_sman * sman, unsigned int manager,
 	ret = drm_mm_init(mm, start, size);
 
 	if (ret) {
-		drm_free(mm, sizeof(*mm), DRM_MEM_MM);
+		kfree(mm);
 		return ret;
 	}
 
@@ -176,7 +175,7 @@ static struct drm_owner_item *drm_sman_get_owner_item(struct drm_sman * sman,
 				      owner_hash);
 	}
 
-	owner_item = drm_calloc(1, sizeof(*owner_item), DRM_MEM_MM);
+	owner_item = kzalloc(sizeof(*owner_item), GFP_KERNEL);
 	if (!owner_item)
 		goto out;
 
@@ -189,7 +188,7 @@ static struct drm_owner_item *drm_sman_get_owner_item(struct drm_sman * sman,
 	return owner_item;
 
 out1:
-	drm_free(owner_item, sizeof(*owner_item), DRM_MEM_MM);
+	kfree(owner_item);
 out:
 	return NULL;
 }
@@ -212,7 +211,7 @@ struct drm_memblock_item *drm_sman_alloc(struct drm_sman *sman, unsigned int man
 		return NULL;
 	}
 
-	memblock = drm_calloc(1, sizeof(*memblock), DRM_MEM_MM);
+	memblock = kzalloc(sizeof(*memblock), GFP_KERNEL);
 
 	if (!memblock)
 		goto out;
@@ -237,7 +236,7 @@ struct drm_memblock_item *drm_sman_alloc(struct drm_sman *sman, unsigned int man
 out2:
 	drm_ht_remove_item(&sman->user_hash_tab, &memblock->user_hash);
 out1:
-	drm_free(memblock, sizeof(*memblock), DRM_MEM_MM);
+	kfree(memblock);
 out:
 	sman_mm->free(sman_mm->private, tmp);
 
@@ -253,7 +252,7 @@ static void drm_sman_free(struct drm_memblock_item *item)
 	list_del(&item->owner_list);
 	drm_ht_remove_item(&sman->user_hash_tab, &item->user_hash);
 	item->mm->free(item->mm->private, item->mm_info);
-	drm_free(item, sizeof(*item), DRM_MEM_MM);
+	kfree(item);
 }
 
 int drm_sman_free_key(struct drm_sman *sman, unsigned int key)
@@ -277,7 +276,7 @@ static void drm_sman_remove_owner(struct drm_sman *sman,
 {
 	list_del(&owner_item->sman_list);
 	drm_ht_remove_item(&sman->owner_hash_tab, &owner_item->owner_hash);
-	drm_free(owner_item, sizeof(*owner_item), DRM_MEM_MM);
+	kfree(owner_item);
 }
 
 int drm_sman_owner_clean(struct drm_sman *sman, unsigned long owner)
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 387a8de1bc7..155a5bbce68 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -107,7 +107,7 @@ struct drm_master *drm_master_create(struct drm_minor *minor)
 {
 	struct drm_master *master;
 
-	master = drm_calloc(1, sizeof(*master), DRM_MEM_DRIVER);
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
 	if (!master)
 		return NULL;
 
@@ -149,7 +149,7 @@ static void drm_master_destroy(struct kref *kref)
 	}
 
 	if (master->unique) {
-		drm_free(master->unique, master->unique_size, DRM_MEM_DRIVER);
+		kfree(master->unique);
 		master->unique = NULL;
 		master->unique_len = 0;
 	}
@@ -157,12 +157,12 @@ static void drm_master_destroy(struct kref *kref)
 	list_for_each_entry_safe(pt, next, &master->magicfree, head) {
 		list_del(&pt->head);
 		drm_ht_remove_item(&master->magiclist, &pt->hash_item);
-		drm_free(pt, sizeof(*pt), DRM_MEM_MAGIC);
+		kfree(pt);
 	}
 
 	drm_ht_remove(&master->magiclist);
 
-	drm_free(master, sizeof(*master), DRM_MEM_DRIVER);
+	kfree(master);
 }
 
 void drm_master_put(struct drm_master **master)
@@ -390,7 +390,7 @@ int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
 
 	DRM_DEBUG("\n");
 
-	dev = drm_calloc(1, sizeof(*dev), DRM_MEM_STUB);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
 
@@ -443,7 +443,7 @@ err_g3:
 err_g2:
 	pci_disable_device(pdev);
 err_g1:
-	drm_free(dev, sizeof(*dev), DRM_MEM_STUB);
+	kfree(dev);
 	return ret;
 }
 EXPORT_SYMBOL(drm_get_dev);
@@ -516,7 +516,7 @@ void drm_put_dev(struct drm_device *dev)
 		dev->driver->unload(dev);
 
 	if (drm_core_has_AGP(dev) && dev->agp) {
-		drm_free(dev->agp, sizeof(*dev->agp), DRM_MEM_AGPLISTS);
+		kfree(dev->agp);
 		dev->agp = NULL;
 	}
 
@@ -535,10 +535,9 @@ void drm_put_dev(struct drm_device *dev)
 	drm_put_minor(&dev->primary);
 
 	if (dev->devname) {
-		drm_free(dev->devname, strlen(dev->devname) + 1,
-			 DRM_MEM_DRIVER);
+		kfree(dev->devname);
 		dev->devname = NULL;
 	}
-	drm_free(dev, sizeof(*dev), DRM_MEM_STUB);
+	kfree(dev);
 }
 EXPORT_SYMBOL(drm_put_dev);
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 22f76567ac7..f95d03ac978 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -227,7 +227,7 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 			found_maps++;
 		if (pt->vma == vma) {
 			list_del(&pt->head);
-			drm_free(pt, sizeof(*pt), DRM_MEM_VMAS);
+			kfree(pt);
 		}
 	}
 
@@ -273,7 +273,7 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 				DRM_ERROR("tried to rmmap GEM object\n");
 				break;
 			}
-			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
+			kfree(map);
 		}
 	}
 	mutex_unlock(&dev->struct_mutex);
@@ -414,7 +414,7 @@ void drm_vm_open_locked(struct vm_area_struct *vma)
 		  vma->vm_start, vma->vm_end - vma->vm_start);
 	atomic_inc(&dev->vma_count);
 
-	vma_entry = drm_alloc(sizeof(*vma_entry), DRM_MEM_VMAS);
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
 	if (vma_entry) {
 		vma_entry->vma = vma;
 		vma_entry->pid = current->pid;
@@ -454,7 +454,7 @@ static void drm_vm_close(struct vm_area_struct *vma)
 	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
 		if (pt->vma == vma) {
 			list_del(&pt->head);
-			drm_free(pt, sizeof(*pt), DRM_MEM_VMAS);
+			kfree(pt);
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index e5de8ea4154..7d1d88cdf2d 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -227,8 +227,7 @@ static int i810_dma_cleanup(struct drm_device * dev)
 			/* Need to rewrite hardware status page */
 			I810_WRITE(0x02080, 0x1ffff000);
 		}
-		drm_free(dev->dev_private, sizeof(drm_i810_private_t),
-			 DRM_MEM_DRIVER);
+		kfree(dev->dev_private);
 		dev->dev_private = NULL;
 
 		for (i = 0; i < dma->buf_count; i++) {
@@ -439,8 +438,7 @@ static int i810_dma_init(struct drm_device *dev, void *data,
 	switch (init->func) {
 	case I810_INIT_DMA_1_4:
 		DRM_INFO("Using v1.4 init.\n");
-		dev_priv = drm_alloc(sizeof(drm_i810_private_t),
-				     DRM_MEM_DRIVER);
+		dev_priv = kmalloc(sizeof(drm_i810_private_t), GFP_KERNEL);
 		if (dev_priv == NULL)
 			return -ENOMEM;
 		retcode = i810_dma_initialize(dev, dev_priv, init);
diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c
index a86ab30b462..877bf6cb14a 100644
--- a/drivers/gpu/drm/i830/i830_dma.c
+++ b/drivers/gpu/drm/i830/i830_dma.c
@@ -232,8 +232,7 @@ static int i830_dma_cleanup(struct drm_device * dev)
 			I830_WRITE(0x02080, 0x1ffff000);
 		}
 
-		drm_free(dev->dev_private, sizeof(drm_i830_private_t),
-			 DRM_MEM_DRIVER);
+		kfree(dev->dev_private);
 		dev->dev_private = NULL;
 
 		for (i = 0; i < dma->buf_count; i++) {
@@ -459,8 +458,7 @@ static int i830_dma_init(struct drm_device *dev, void *data,
 
 	switch (init->func) {
 	case I830_INIT_DMA:
-		dev_priv = drm_alloc(sizeof(drm_i830_private_t),
-				     DRM_MEM_DRIVER);
+		dev_priv = kmalloc(sizeof(drm_i830_private_t), GFP_KERNEL);
 		if (dev_priv == NULL)
 			return -ENOMEM;
 		retcode = i830_dma_initialize(dev, dev_priv, init);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 1a60626f680..9b28c55c54f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -643,9 +643,9 @@ static int i915_batchbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	if (batch->num_cliprects) {
-		cliprects = drm_calloc(batch->num_cliprects,
-				       sizeof(struct drm_clip_rect),
-				       DRM_MEM_DRIVER);
+		cliprects = kcalloc(batch->num_cliprects,
+				    sizeof(struct drm_clip_rect),
+				    GFP_KERNEL);
 		if (cliprects == NULL)
 			return -ENOMEM;
 
@@ -664,9 +664,7 @@ static int i915_batchbuffer(struct drm_device *dev, void *data,
 		sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv);
 
 fail_free:
-	drm_free(cliprects,
-		 batch->num_cliprects * sizeof(struct drm_clip_rect),
-		 DRM_MEM_DRIVER);
+	kfree(cliprects);
 
 	return ret;
 }
@@ -692,7 +690,7 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 	if (cmdbuf->num_cliprects < 0)
 		return -EINVAL;
 
-	batch_data = drm_alloc(cmdbuf->sz, DRM_MEM_DRIVER);
+	batch_data = kmalloc(cmdbuf->sz, GFP_KERNEL);
 	if (batch_data == NULL)
 		return -ENOMEM;
 
@@ -701,9 +699,8 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 		goto fail_batch_free;
 
 	if (cmdbuf->num_cliprects) {
-		cliprects = drm_calloc(cmdbuf->num_cliprects,
-				       sizeof(struct drm_clip_rect),
-				       DRM_MEM_DRIVER);
+		cliprects = kcalloc(cmdbuf->num_cliprects,
+				    sizeof(struct drm_clip_rect), GFP_KERNEL);
 		if (cliprects == NULL)
 			goto fail_batch_free;
 
@@ -726,11 +723,9 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 		sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv);
 
 fail_clip_free:
-	drm_free(cliprects,
-		 cmdbuf->num_cliprects * sizeof(struct drm_clip_rect),
-		 DRM_MEM_DRIVER);
+	kfree(cliprects);
 fail_batch_free:
-	drm_free(batch_data, cmdbuf->sz, DRM_MEM_DRIVER);
+	kfree(batch_data);
 
 	return ret;
 }
@@ -1067,7 +1062,7 @@ int i915_master_create(struct drm_device *dev, struct drm_master *master)
 {
 	struct drm_i915_master_private *master_priv;
 
-	master_priv = drm_calloc(1, sizeof(*master_priv), DRM_MEM_DRIVER);
+	master_priv = kzalloc(sizeof(*master_priv), GFP_KERNEL);
 	if (!master_priv)
 		return -ENOMEM;
 
@@ -1082,7 +1077,7 @@ void i915_master_destroy(struct drm_device *dev, struct drm_master *master)
 	if (!master_priv)
 		return;
 
-	drm_free(master_priv, sizeof(*master_priv), DRM_MEM_DRIVER);
+	kfree(master_priv);
 
 	master->driver_priv = NULL;
 }
@@ -1111,12 +1106,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev->types[8] = _DRM_STAT_SECONDARY;
 	dev->types[9] = _DRM_STAT_DMA;
 
-	dev_priv = drm_alloc(sizeof(drm_i915_private_t), DRM_MEM_DRIVER);
+	dev_priv = kzalloc(sizeof(drm_i915_private_t), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
 
-	memset(dev_priv, 0, sizeof(drm_i915_private_t));
-
 	dev->dev_private = (void *)dev_priv;
 	dev_priv->dev = dev;
 
@@ -1221,7 +1214,7 @@ out_iomapfree:
 out_rmmap:
 	iounmap(dev_priv->regs);
 free_priv:
-	drm_free(dev_priv, sizeof(struct drm_i915_private), DRM_MEM_DRIVER);
+	kfree(dev_priv);
 	return ret;
 }
 
@@ -1261,8 +1254,7 @@ int i915_driver_unload(struct drm_device *dev)
 		i915_gem_lastclose(dev);
 	}
 
-	drm_free(dev->dev_private, sizeof(drm_i915_private_t),
-		 DRM_MEM_DRIVER);
+	kfree(dev->dev_private);
 
 	return 0;
 }
@@ -1273,7 +1265,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 
 	DRM_DEBUG_DRIVER(I915_DRV, "\n");
 	i915_file_priv = (struct drm_i915_file_private *)
-	    drm_alloc(sizeof(*i915_file_priv), DRM_MEM_FILES);
+	    kmalloc(sizeof(*i915_file_priv), GFP_KERNEL);
 
 	if (!i915_file_priv)
 		return -ENOMEM;
@@ -1326,7 +1318,7 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
 
-	drm_free(i915_file_priv, sizeof(*i915_file_priv), DRM_MEM_FILES);
+	kfree(i915_file_priv);
 }
 
 struct drm_ioctl_desc i915_ioctls[] = {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 174aef2d648..fd2b8bdffe3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1207,8 +1207,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj)
 
 	/* Set the object up for mmap'ing */
 	list = &obj->map_list;
-	list->map = drm_calloc(1, sizeof(struct drm_map_list),
-			       DRM_MEM_DRIVER);
+	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
 	if (!list->map)
 		return -ENOMEM;
 
@@ -1248,7 +1247,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj)
 out_free_mm:
 	drm_mm_put_block(list->file_offset_node);
 out_free_list:
-	drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
+	kfree(list->map);
 
 	return ret;
 }
@@ -1270,7 +1269,7 @@ i915_gem_free_mmap_offset(struct drm_gem_object *obj)
 	}
 
 	if (list->map) {
-		drm_free(list->map, sizeof(struct drm_map), DRM_MEM_DRIVER);
+		kfree(list->map);
 		list->map = NULL;
 	}
 
@@ -1493,7 +1492,7 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 	if (file_priv != NULL)
 		i915_file_priv = file_priv->driver_priv;
 
-	request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
+	request = kzalloc(sizeof(*request), GFP_KERNEL);
 	if (request == NULL)
 		return 0;
 
@@ -1675,7 +1674,7 @@ i915_gem_retire_requests(struct drm_device *dev)
 
 			list_del(&request->list);
 			list_del(&request->client_list);
-			drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
+			kfree(request);
 		} else
 			break;
 	}
@@ -2833,8 +2832,7 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
 	/* Free the page_cpu_valid mappings which are now stale, whether
 	 * or not we've got I915_GEM_DOMAIN_CPU.
 	 */
-	drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE,
-		 DRM_MEM_DRIVER);
+	kfree(obj_priv->page_cpu_valid);
 	obj_priv->page_cpu_valid = NULL;
 }
 
@@ -2876,8 +2874,8 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
 	 * newly adding I915_GEM_DOMAIN_CPU
 	 */
 	if (obj_priv->page_cpu_valid == NULL) {
-		obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
-						      DRM_MEM_DRIVER);
+		obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
+						   GFP_KERNEL);
 		if (obj_priv->page_cpu_valid == NULL)
 			return -ENOMEM;
 	} else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
@@ -3300,8 +3298,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	if (args->num_cliprects != 0) {
-		cliprects = drm_calloc(args->num_cliprects, sizeof(*cliprects),
-				       DRM_MEM_DRIVER);
+		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
+				    GFP_KERNEL);
 		if (cliprects == NULL)
 			goto pre_mutex_err;
 
@@ -3554,8 +3552,7 @@ err:
 pre_mutex_err:
 	drm_free_large(object_list);
 	drm_free_large(exec_list);
-	drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,
-		 DRM_MEM_DRIVER);
+	kfree(cliprects);
 
 	return ret;
 }
@@ -3772,7 +3769,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 {
 	struct drm_i915_gem_object *obj_priv;
 
-	obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
+	obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL);
 	if (obj_priv == NULL)
 		return -ENOMEM;
 
@@ -3810,9 +3807,9 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
 	i915_gem_free_mmap_offset(obj);
 
-	drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
+	kfree(obj_priv->page_cpu_valid);
 	kfree(obj_priv->bit_17);
-	drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
+	kfree(obj->driver_private);
 }
 
 /** Unbinds all objects that are on the given buffer list. */
@@ -4266,7 +4263,7 @@ int i915_gem_init_phys_object(struct drm_device *dev,
 	if (dev_priv->mm.phys_objs[id - 1] || !size)
 		return 0;
 
-	phys_obj = drm_calloc(1, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER);
+	phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
 	if (!phys_obj)
 		return -ENOMEM;
 
@@ -4285,7 +4282,7 @@ int i915_gem_init_phys_object(struct drm_device *dev,
 
 	return 0;
 kfree_obj:
-	drm_free(phys_obj, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER);
+	kfree(phys_obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_mem.c b/drivers/gpu/drm/i915/i915_mem.c
index 96e271986d2..83b7b81bb2b 100644
--- a/drivers/gpu/drm/i915/i915_mem.c
+++ b/drivers/gpu/drm/i915/i915_mem.c
@@ -94,8 +94,8 @@ static struct mem_block *split_block(struct mem_block *p, int start, int size,
 {
 	/* Maybe cut off the start of an existing block */
 	if (start > p->start) {
-		struct mem_block *newblock =
-		    drm_alloc(sizeof(*newblock), DRM_MEM_BUFLISTS);
+		struct mem_block *newblock = kmalloc(sizeof(*newblock),
+						     GFP_KERNEL);
 		if (!newblock)
 			goto out;
 		newblock->start = start;
@@ -111,8 +111,8 @@ static struct mem_block *split_block(struct mem_block *p, int start, int size,
 
 	/* Maybe cut off the end of an existing block */
 	if (size < p->size) {
-		struct mem_block *newblock =
-		    drm_alloc(sizeof(*newblock), DRM_MEM_BUFLISTS);
+		struct mem_block *newblock = kmalloc(sizeof(*newblock),
+						     GFP_KERNEL);
 		if (!newblock)
 			goto out;
 		newblock->start = start + size;
@@ -169,7 +169,7 @@ static void free_block(struct mem_block *p)
 		p->size += q->size;
 		p->next = q->next;
 		p->next->prev = p;
-		drm_free(q, sizeof(*q), DRM_MEM_BUFLISTS);
+		kfree(q);
 	}
 
 	if (p->prev->file_priv == NULL) {
@@ -177,7 +177,7 @@ static void free_block(struct mem_block *p)
 		q->size += p->size;
 		q->next = p->next;
 		q->next->prev = q;
-		drm_free(p, sizeof(*q), DRM_MEM_BUFLISTS);
+		kfree(p);
 	}
 }
 
@@ -185,14 +185,14 @@ static void free_block(struct mem_block *p)
  */
 static int init_heap(struct mem_block **heap, int start, int size)
 {
-	struct mem_block *blocks = drm_alloc(sizeof(*blocks), DRM_MEM_BUFLISTS);
+	struct mem_block *blocks = kmalloc(sizeof(*blocks), GFP_KERNEL);
 
 	if (!blocks)
 		return -ENOMEM;
 
-	*heap = drm_alloc(sizeof(**heap), DRM_MEM_BUFLISTS);
+	*heap = kmalloc(sizeof(**heap), GFP_KERNEL);
 	if (!*heap) {
-		drm_free(blocks, sizeof(*blocks), DRM_MEM_BUFLISTS);
+		kfree(blocks);
 		return -ENOMEM;
 	}
 
@@ -233,7 +233,7 @@ void i915_mem_release(struct drm_device * dev, struct drm_file *file_priv,
 			p->size += q->size;
 			p->next = q->next;
 			p->next->prev = p;
-			drm_free(q, sizeof(*q), DRM_MEM_BUFLISTS);
+			kfree(q);
 		}
 	}
 }
@@ -250,10 +250,10 @@ void i915_mem_takedown(struct mem_block **heap)
 	for (p = (*heap)->next; p != *heap;) {
 		struct mem_block *q = p;
 		p = p->next;
-		drm_free(q, sizeof(*q), DRM_MEM_BUFLISTS);
+		kfree(q);
 	}
 
-	drm_free(*heap, sizeof(**heap), DRM_MEM_BUFLISTS);
+	kfree(*heap);
 	*heap = NULL;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 754dd22fdd7..cdd126d068a 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -124,8 +124,7 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 	entry = &lvds_lfp_data->data[lvds_options->panel_type];
 	dvo_timing = &entry->dvo_timing;
 
-	panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
-				      DRM_MEM_DRIVER);
+	panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL);
 
 	fill_detail_timing_data(panel_fixed_mode, dvo_timing);
 
@@ -156,8 +155,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
 	if (!dvo_timing)
 		return;
 
-	panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode),
-				      DRM_MEM_DRIVER);
+	panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL);
 
 	if (!panel_fixed_mode)
 		return;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 50d7ed70b33..ea68992e441 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1561,8 +1561,7 @@ intel_tv_destroy (struct drm_connector *connector)
 
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
-	drm_free(intel_output, sizeof(struct intel_output) + sizeof(struct intel_tv_priv),
-		 DRM_MEM_DRIVER);
+	kfree(intel_output);
 }
 
 
@@ -1695,8 +1694,8 @@ intel_tv_init(struct drm_device *dev)
 	    (tv_dac_off & TVDAC_STATE_CHG_EN) != 0)
 		return;
 
-	intel_output = drm_calloc(1, sizeof(struct intel_output) +
-				  sizeof(struct intel_tv_priv), DRM_MEM_DRIVER);
+	intel_output = kzalloc(sizeof(struct intel_output) +
+			       sizeof(struct intel_tv_priv), GFP_KERNEL);
 	if (!intel_output) {
 		return;
 	}
@@ -1730,8 +1729,8 @@ intel_tv_init(struct drm_device *dev)
 	connector->doublescan_allowed = false;
 
 	/* Create TV properties then attach current values */
-	tv_format_names = drm_alloc(sizeof(char *) * NUM_TV_MODES,
-				    DRM_MEM_DRIVER);
+	tv_format_names = kmalloc(sizeof(char *) * NUM_TV_MODES,
+				  GFP_KERNEL);
 	if (!tv_format_names)
 		goto out;
 	for (i = 0; i < NUM_TV_MODES; i++)
diff --git a/drivers/gpu/drm/mga/mga_dma.c b/drivers/gpu/drm/mga/mga_dma.c
index 7a6bf9ffc5a..6c67a02910c 100644
--- a/drivers/gpu/drm/mga/mga_dma.c
+++ b/drivers/gpu/drm/mga/mga_dma.c
@@ -254,23 +254,20 @@ static int mga_freelist_init(struct drm_device * dev, drm_mga_private_t * dev_pr
 	int i;
 	DRM_DEBUG("count=%d\n", dma->buf_count);
 
-	dev_priv->head = drm_alloc(sizeof(drm_mga_freelist_t), DRM_MEM_DRIVER);
+	dev_priv->head = kzalloc(sizeof(drm_mga_freelist_t), GFP_KERNEL);
 	if (dev_priv->head == NULL)
 		return -ENOMEM;
 
-	memset(dev_priv->head, 0, sizeof(drm_mga_freelist_t));
 	SET_AGE(&dev_priv->head->age, MGA_BUFFER_USED, 0);
 
 	for (i = 0; i < dma->buf_count; i++) {
 		buf = dma->buflist[i];
 		buf_priv = buf->dev_private;
 
-		entry = drm_alloc(sizeof(drm_mga_freelist_t), DRM_MEM_DRIVER);
+		entry = kzalloc(sizeof(drm_mga_freelist_t), GFP_KERNEL);
 		if (entry == NULL)
 			return -ENOMEM;
 
-		memset(entry, 0, sizeof(drm_mga_freelist_t));
-
 		entry->next = dev_priv->head->next;
 		entry->prev = dev_priv->head;
 		SET_AGE(&entry->age, MGA_BUFFER_FREE, 0);
@@ -301,7 +298,7 @@ static void mga_freelist_cleanup(struct drm_device * dev)
 	entry = dev_priv->head;
 	while (entry) {
 		next = entry->next;
-		drm_free(entry, sizeof(drm_mga_freelist_t), DRM_MEM_DRIVER);
+		kfree(entry);
 		entry = next;
 	}
 
@@ -399,12 +396,11 @@ int mga_driver_load(struct drm_device * dev, unsigned long flags)
 	drm_mga_private_t *dev_priv;
 	int ret;
 
-	dev_priv = drm_alloc(sizeof(drm_mga_private_t), DRM_MEM_DRIVER);
+	dev_priv = kzalloc(sizeof(drm_mga_private_t), GFP_KERNEL);
 	if (!dev_priv)
 		return -ENOMEM;
 
 	dev->dev_private = (void *)dev_priv;
-	memset(dev_priv, 0, sizeof(drm_mga_private_t));
 
 	dev_priv->usec_timeout = MGA_DEFAULT_USEC_TIMEOUT;
 	dev_priv->chipset = flags;
@@ -1150,7 +1146,7 @@ int mga_dma_buffers(struct drm_device *dev, void *data,
  */
 int mga_driver_unload(struct drm_device * dev)
 {
-	drm_free(dev->dev_private, sizeof(drm_mga_private_t), DRM_MEM_DRIVER);
+	kfree(dev->dev_private);
 	dev->dev_private = NULL;
 
 	return 0;
diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c
index 077c0455a6b..c75fd356404 100644
--- a/drivers/gpu/drm/r128/r128_cce.c
+++ b/drivers/gpu/drm/r128/r128_cce.c
@@ -353,12 +353,10 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init)
 
 	DRM_DEBUG("\n");
 
-	dev_priv = drm_alloc(sizeof(drm_r128_private_t), DRM_MEM_DRIVER);
+	dev_priv = kzalloc(sizeof(drm_r128_private_t), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
 
-	memset(dev_priv, 0, sizeof(drm_r128_private_t));
-
 	dev_priv->is_pci = init->is_pci;
 
 	if (dev_priv->is_pci && !dev->sg) {
@@ -619,8 +617,7 @@ int r128_do_cleanup_cce(struct drm_device * dev)
 					    ("failed to cleanup PCI GART!\n");
 		}
 
-		drm_free(dev->dev_private, sizeof(drm_r128_private_t),
-			 DRM_MEM_DRIVER);
+		kfree(dev->dev_private);
 		dev->dev_private = NULL;
 	}
 
@@ -768,18 +765,17 @@ static int r128_freelist_init(struct drm_device * dev)
 	drm_r128_freelist_t *entry;
 	int i;
 
-	dev_priv->head = drm_alloc(sizeof(drm_r128_freelist_t), DRM_MEM_DRIVER);
+	dev_priv->head = kzalloc(sizeof(drm_r128_freelist_t), GFP_KERNEL);
 	if (dev_priv->head == NULL)
 		return -ENOMEM;
 
-	memset(dev_priv->head, 0, sizeof(drm_r128_freelist_t));
 	dev_priv->head->age = R128_BUFFER_USED;
 
 	for (i = 0; i < dma->buf_count; i++) {
 		buf = dma->buflist[i];
 		buf_priv = buf->dev_private;
 
-		entry = drm_alloc(sizeof(drm_r128_freelist_t), DRM_MEM_DRIVER);
+		entry = kmalloc(sizeof(drm_r128_freelist_t), GFP_KERNEL);
 		if (!entry)
 			return -ENOMEM;
 
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index f7a5b574076..026a48c95c8 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -910,24 +910,24 @@ static int r128_cce_dispatch_write_span(struct drm_device * dev,
 	}
 
 	buffer_size = depth->n * sizeof(u32);
-	buffer = drm_alloc(buffer_size, DRM_MEM_BUFS);
+	buffer = kmalloc(buffer_size, GFP_KERNEL);
 	if (buffer == NULL)
 		return -ENOMEM;
 	if (DRM_COPY_FROM_USER(buffer, depth->buffer, buffer_size)) {
-		drm_free(buffer, buffer_size, DRM_MEM_BUFS);
+		kfree(buffer);
 		return -EFAULT;
 	}
 
 	mask_size = depth->n * sizeof(u8);
 	if (depth->mask) {
-		mask = drm_alloc(mask_size, DRM_MEM_BUFS);
+		mask = kmalloc(mask_size, GFP_KERNEL);
 		if (mask == NULL) {
-			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
+			kfree(buffer);
 			return -ENOMEM;
 		}
 		if (DRM_COPY_FROM_USER(mask, depth->mask, mask_size)) {
-			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
-			drm_free(mask, mask_size, DRM_MEM_BUFS);
+			kfree(buffer);
+			kfree(mask);
 			return -EFAULT;
 		}
 
@@ -954,7 +954,7 @@ static int r128_cce_dispatch_write_span(struct drm_device * dev,
 			}
 		}
 
-		drm_free(mask, mask_size, DRM_MEM_BUFS);
+		kfree(mask);
 	} else {
 		for (i = 0; i < count; i++, x++) {
 			BEGIN_RING(6);
@@ -978,7 +978,7 @@ static int r128_cce_dispatch_write_span(struct drm_device * dev,
 		}
 	}
 
-	drm_free(buffer, buffer_size, DRM_MEM_BUFS);
+	kfree(buffer);
 
 	return 0;
 }
@@ -1000,54 +1000,54 @@ static int r128_cce_dispatch_write_pixels(struct drm_device * dev,
 
 	xbuf_size = count * sizeof(*x);
 	ybuf_size = count * sizeof(*y);
-	x = drm_alloc(xbuf_size, DRM_MEM_BUFS);
+	x = kmalloc(xbuf_size, GFP_KERNEL);
 	if (x == NULL) {
 		return -ENOMEM;
 	}
-	y = drm_alloc(ybuf_size, DRM_MEM_BUFS);
+	y = kmalloc(ybuf_size, GFP_KERNEL);
 	if (y == NULL) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
+		kfree(x);
 		return -ENOMEM;
 	}
 	if (DRM_COPY_FROM_USER(x, depth->x, xbuf_size)) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
-		drm_free(y, ybuf_size, DRM_MEM_BUFS);
+		kfree(x);
+		kfree(y);
 		return -EFAULT;
 	}
 	if (DRM_COPY_FROM_USER(y, depth->y, xbuf_size)) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
-		drm_free(y, ybuf_size, DRM_MEM_BUFS);
+		kfree(x);
+		kfree(y);
 		return -EFAULT;
 	}
 
 	buffer_size = depth->n * sizeof(u32);
-	buffer = drm_alloc(buffer_size, DRM_MEM_BUFS);
+	buffer = kmalloc(buffer_size, GFP_KERNEL);
 	if (buffer == NULL) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
-		drm_free(y, ybuf_size, DRM_MEM_BUFS);
+		kfree(x);
+		kfree(y);
 		return -ENOMEM;
 	}
 	if (DRM_COPY_FROM_USER(buffer, depth->buffer, buffer_size)) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
-		drm_free(y, ybuf_size, DRM_MEM_BUFS);
-		drm_free(buffer, buffer_size, DRM_MEM_BUFS);
+		kfree(x);
+		kfree(y);
+		kfree(buffer);
 		return -EFAULT;
 	}
 
 	if (depth->mask) {
 		mask_size = depth->n * sizeof(u8);
-		mask = drm_alloc(mask_size, DRM_MEM_BUFS);
+		mask = kmalloc(mask_size, GFP_KERNEL);
 		if (mask == NULL) {
-			drm_free(x, xbuf_size, DRM_MEM_BUFS);
-			drm_free(y, ybuf_size, DRM_MEM_BUFS);
-			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
+			kfree(x);
+			kfree(y);
+			kfree(buffer);
 			return -ENOMEM;
 		}
 		if (DRM_COPY_FROM_USER(mask, depth->mask, mask_size)) {
-			drm_free(x, xbuf_size, DRM_MEM_BUFS);
-			drm_free(y, ybuf_size, DRM_MEM_BUFS);
-			drm_free(buffer, buffer_size, DRM_MEM_BUFS);
-			drm_free(mask, mask_size, DRM_MEM_BUFS);
+			kfree(x);
+			kfree(y);
+			kfree(buffer);
+			kfree(mask);
 			return -EFAULT;
 		}
 
@@ -1074,7 +1074,7 @@ static int r128_cce_dispatch_write_pixels(struct drm_device * dev,
 			}
 		}
 
-		drm_free(mask, mask_size, DRM_MEM_BUFS);
+		kfree(mask);
 	} else {
 		for (i = 0; i < count; i++) {
 			BEGIN_RING(6);
@@ -1098,9 +1098,9 @@ static int r128_cce_dispatch_write_pixels(struct drm_device * dev,
 		}
 	}
 
-	drm_free(x, xbuf_size, DRM_MEM_BUFS);
-	drm_free(y, ybuf_size, DRM_MEM_BUFS);
-	drm_free(buffer, buffer_size, DRM_MEM_BUFS);
+	kfree(x);
+	kfree(y);
+	kfree(buffer);
 
 	return 0;
 }
@@ -1167,23 +1167,23 @@ static int r128_cce_dispatch_read_pixels(struct drm_device * dev,
 
 	xbuf_size = count * sizeof(*x);
 	ybuf_size = count * sizeof(*y);
-	x = drm_alloc(xbuf_size, DRM_MEM_BUFS);
+	x = kmalloc(xbuf_size, GFP_KERNEL);
 	if (x == NULL) {
 		return -ENOMEM;
 	}
-	y = drm_alloc(ybuf_size, DRM_MEM_BUFS);
+	y = kmalloc(ybuf_size, GFP_KERNEL);
 	if (y == NULL) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
+		kfree(x);
 		return -ENOMEM;
 	}
 	if (DRM_COPY_FROM_USER(x, depth->x, xbuf_size)) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
-		drm_free(y, ybuf_size, DRM_MEM_BUFS);
+		kfree(x);
+		kfree(y);
 		return -EFAULT;
 	}
 	if (DRM_COPY_FROM_USER(y, depth->y, ybuf_size)) {
-		drm_free(x, xbuf_size, DRM_MEM_BUFS);
-		drm_free(y, ybuf_size, DRM_MEM_BUFS);
+		kfree(x);
+		kfree(y);
 		return -EFAULT;
 	}
 
@@ -1210,8 +1210,8 @@ static int r128_cce_dispatch_read_pixels(struct drm_device * dev,
 		ADVANCE_RING();
 	}
 
-	drm_free(x, xbuf_size, DRM_MEM_BUFS);
-	drm_free(y, ybuf_size, DRM_MEM_BUFS);
+	kfree(x);
+	kfree(y);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 89c4c44169f..d8356827ef1 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -2045,11 +2045,10 @@ int radeon_driver_load(struct drm_device *dev, unsigned long flags)
 	drm_radeon_private_t *dev_priv;
 	int ret = 0;
 
-	dev_priv = drm_alloc(sizeof(drm_radeon_private_t), DRM_MEM_DRIVER);
+	dev_priv = kzalloc(sizeof(drm_radeon_private_t), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
 
-	memset(dev_priv, 0, sizeof(drm_radeon_private_t));
 	dev->dev_private = (void *)dev_priv;
 	dev_priv->flags = flags;
 
@@ -2103,7 +2102,7 @@ int radeon_master_create(struct drm_device *dev, struct drm_master *master)
 	unsigned long sareapage;
 	int ret;
 
-	master_priv = drm_calloc(1, sizeof(*master_priv), DRM_MEM_DRIVER);
+	master_priv = kzalloc(sizeof(*master_priv), GFP_KERNEL);
 	if (!master_priv)
 		return -ENOMEM;
 
@@ -2137,7 +2136,7 @@ void radeon_master_destroy(struct drm_device *dev, struct drm_master *master)
 	if (master_priv->sarea)
 		drm_rmmap_locked(dev, master_priv->sarea);
 
-	drm_free(master_priv, sizeof(*master_priv), DRM_MEM_DRIVER);
+	kfree(master_priv);
 
 	master->driver_priv = NULL;
 }
@@ -2171,7 +2170,7 @@ int radeon_driver_unload(struct drm_device *dev)
 
 	drm_rmmap(dev, dev_priv->mmio);
 
-	drm_free(dev_priv, sizeof(*dev_priv), DRM_MEM_DRIVER);
+	kfree(dev_priv);
 
 	dev->dev_private = NULL;
 	return 0;
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 71465ed2688..dd438d32e5c 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -162,7 +162,7 @@ struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
 	struct radeon_i2c_chan *i2c;
 	int ret;
 
-	i2c = drm_calloc(1, sizeof(struct radeon_i2c_chan), DRM_MEM_DRIVER);
+	i2c = kzalloc(sizeof(struct radeon_i2c_chan), GFP_KERNEL);
 	if (i2c == NULL)
 		return NULL;
 
@@ -189,7 +189,7 @@ struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
 
 	return i2c;
 out_free:
-	drm_free(i2c, sizeof(struct radeon_i2c_chan), DRM_MEM_DRIVER);
+	kfree(i2c);
 	return NULL;
 
 }
@@ -200,7 +200,7 @@ void radeon_i2c_destroy(struct radeon_i2c_chan *i2c)
 		return;
 
 	i2c_del_adapter(&i2c->adapter);
-	drm_free(i2c, sizeof(struct radeon_i2c_chan), DRM_MEM_DRIVER);
+	kfree(i2c);
 }
 
 struct drm_encoder *radeon_best_encoder(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 64f42b19cbf..4612a7c146d 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -169,7 +169,7 @@ int radeon_master_create_kms(struct drm_device *dev, struct drm_master *master)
 	unsigned long sareapage;
 	int ret;
 
-	master_priv = drm_calloc(1, sizeof(*master_priv), DRM_MEM_DRIVER);
+	master_priv = kzalloc(sizeof(*master_priv), GFP_KERNEL);
 	if (master_priv == NULL) {
 		return -ENOMEM;
 	}
@@ -199,7 +199,7 @@ void radeon_master_destroy_kms(struct drm_device *dev,
 	if (master_priv->sarea) {
 		drm_rmmap_locked(dev, master_priv->sarea);
 	}
-	drm_free(master_priv, sizeof(*master_priv), DRM_MEM_DRIVER);
+	kfree(master_priv);
 	master->driver_priv = NULL;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_mem.c b/drivers/gpu/drm/radeon/radeon_mem.c
index 4af5286a36f..ed95155c4b1 100644
--- a/drivers/gpu/drm/radeon/radeon_mem.c
+++ b/drivers/gpu/drm/radeon/radeon_mem.c
@@ -43,8 +43,8 @@ static struct mem_block *split_block(struct mem_block *p, int start, int size,
 {
 	/* Maybe cut off the start of an existing block */
 	if (start > p->start) {
-		struct mem_block *newblock =
-		    drm_alloc(sizeof(*newblock), DRM_MEM_BUFS);
+		struct mem_block *newblock = kmalloc(sizeof(*newblock),
+						     GFP_KERNEL);
 		if (!newblock)
 			goto out;
 		newblock->start = start;
@@ -60,8 +60,8 @@ static struct mem_block *split_block(struct mem_block *p, int start, int size,
 
 	/* Maybe cut off the end of an existing block */
 	if (size < p->size) {
-		struct mem_block *newblock =
-		    drm_alloc(sizeof(*newblock), DRM_MEM_BUFS);
+		struct mem_block *newblock = kmalloc(sizeof(*newblock),
+						     GFP_KERNEL);
 		if (!newblock)
 			goto out;
 		newblock->start = start + size;
@@ -118,7 +118,7 @@ static void free_block(struct mem_block *p)
 		p->size += q->size;
 		p->next = q->next;
 		p->next->prev = p;
-		drm_free(q, sizeof(*q), DRM_MEM_BUFS);
+		kfree(q);
 	}
 
 	if (p->prev->file_priv == NULL) {
@@ -126,7 +126,7 @@ static void free_block(struct mem_block *p)
 		q->size += p->size;
 		q->next = p->next;
 		q->next->prev = q;
-		drm_free(p, sizeof(*q), DRM_MEM_BUFS);
+		kfree(p);
 	}
 }
 
@@ -134,14 +134,14 @@ static void free_block(struct mem_block *p)
  */
 static int init_heap(struct mem_block **heap, int start, int size)
 {
-	struct mem_block *blocks = drm_alloc(sizeof(*blocks), DRM_MEM_BUFS);
+	struct mem_block *blocks = kmalloc(sizeof(*blocks), GFP_KERNEL);
 
 	if (!blocks)
 		return -ENOMEM;
 
-	*heap = drm_alloc(sizeof(**heap), DRM_MEM_BUFS);
+	*heap = kmalloc(sizeof(**heap), GFP_KERNEL);
 	if (!*heap) {
-		drm_free(blocks, sizeof(*blocks), DRM_MEM_BUFS);
+		kfree(blocks);
 		return -ENOMEM;
 	}
 
@@ -179,7 +179,7 @@ void radeon_mem_release(struct drm_file *file_priv, struct mem_block *heap)
 			p->size += q->size;
 			p->next = q->next;
 			p->next->prev = p;
-			drm_free(q, sizeof(*q), DRM_MEM_DRIVER);
+			kfree(q);
 		}
 	}
 }
@@ -196,10 +196,10 @@ void radeon_mem_takedown(struct mem_block **heap)
 	for (p = (*heap)->next; p != *heap;) {
 		struct mem_block *q = p;
 		p = p->next;
-		drm_free(q, sizeof(*q), DRM_MEM_DRIVER);
+		kfree(q);
 	}
 
-	drm_free(*heap, sizeof(**heap), DRM_MEM_DRIVER);
+	kfree(*heap);
 	*heap = NULL;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index fa728ec6ed3..46645f3e032 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -2866,12 +2866,12 @@ static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file
 	 */
 	orig_bufsz = cmdbuf->bufsz;
 	if (orig_bufsz != 0) {
-		kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
+		kbuf = kmalloc(cmdbuf->bufsz, GFP_KERNEL);
 		if (kbuf == NULL)
 			return -ENOMEM;
 		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
 				       cmdbuf->bufsz)) {
-			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
+			kfree(kbuf);
 			return -EFAULT;
 		}
 		cmdbuf->buf = kbuf;
@@ -2884,7 +2884,7 @@ static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file
 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
 
 		if (orig_bufsz != 0)
-			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
+			kfree(kbuf);
 
 		return temp;
 	}
@@ -2991,7 +2991,7 @@ static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file
 	}
 
 	if (orig_bufsz != 0)
-		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
+		kfree(kbuf);
 
 	DRM_DEBUG("DONE\n");
 	COMMIT_RING();
@@ -2999,7 +2999,7 @@ static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file
 
       err:
 	if (orig_bufsz != 0)
-		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
+		kfree(kbuf);
 	return -EINVAL;
 }
 
@@ -3175,9 +3175,7 @@ int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 	struct drm_radeon_driver_file_fields *radeon_priv;
 
 	DRM_DEBUG("\n");
-	radeon_priv =
-	    (struct drm_radeon_driver_file_fields *)
-	    drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
+	radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
 
 	if (!radeon_priv)
 		return -ENOMEM;
@@ -3196,7 +3194,7 @@ void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 	struct drm_radeon_driver_file_fields *radeon_priv =
 	    file_priv->driver_priv;
 
-	drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
+	kfree(radeon_priv);
 }
 
 struct drm_ioctl_desc radeon_ioctls[] = {
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index 456cd040f31..bff6fc2524c 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -298,8 +298,8 @@ static int savage_dma_init(drm_savage_private_t * dev_priv)
 
 	dev_priv->nr_dma_pages = dev_priv->cmd_dma->size /
 	    (SAVAGE_DMA_PAGE_SIZE * 4);
-	dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) *
-					dev_priv->nr_dma_pages, DRM_MEM_DRIVER);
+	dev_priv->dma_pages = kmalloc(sizeof(drm_savage_dma_page_t) *
+				      dev_priv->nr_dma_pages, GFP_KERNEL);
 	if (dev_priv->dma_pages == NULL)
 		return -ENOMEM;
 
@@ -539,7 +539,7 @@ int savage_driver_load(struct drm_device *dev, unsigned long chipset)
 {
 	drm_savage_private_t *dev_priv;
 
-	dev_priv = drm_alloc(sizeof(drm_savage_private_t), DRM_MEM_DRIVER);
+	dev_priv = kmalloc(sizeof(drm_savage_private_t), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
 
@@ -671,7 +671,7 @@ int savage_driver_unload(struct drm_device *dev)
 {
 	drm_savage_private_t *dev_priv = dev->dev_private;
 
-	drm_free(dev_priv, sizeof(drm_savage_private_t), DRM_MEM_DRIVER);
+	kfree(dev_priv);
 
 	return 0;
 }
@@ -804,8 +804,8 @@ static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
 		dev_priv->fake_dma.offset = 0;
 		dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE;
 		dev_priv->fake_dma.type = _DRM_SHM;
-		dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE,
-						      DRM_MEM_DRIVER);
+		dev_priv->fake_dma.handle = kmalloc(SAVAGE_FAKE_DMA_SIZE,
+						    GFP_KERNEL);
 		if (!dev_priv->fake_dma.handle) {
 			DRM_ERROR("could not allocate faked DMA buffer!\n");
 			savage_do_cleanup_bci(dev);
@@ -903,9 +903,7 @@ static int savage_do_cleanup_bci(struct drm_device * dev)
 	drm_savage_private_t *dev_priv = dev->dev_private;
 
 	if (dev_priv->cmd_dma == &dev_priv->fake_dma) {
-		if (dev_priv->fake_dma.handle)
-			drm_free(dev_priv->fake_dma.handle,
-				 SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER);
+		kfree(dev_priv->fake_dma.handle);
 	} else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle &&
 		   dev_priv->cmd_dma->type == _DRM_AGP &&
 		   dev_priv->dma_type == SAVAGE_DMA_AGP)
@@ -920,10 +918,7 @@ static int savage_do_cleanup_bci(struct drm_device * dev)
 		dev->agp_buffer_map = NULL;
 	}
 
-	if (dev_priv->dma_pages)
-		drm_free(dev_priv->dma_pages,
-			 sizeof(drm_savage_dma_page_t) * dev_priv->nr_dma_pages,
-			 DRM_MEM_DRIVER);
+	kfree(dev_priv->dma_pages);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/savage/savage_state.c b/drivers/gpu/drm/savage/savage_state.c
index 5f6238fdf1f..8a3e31599c9 100644
--- a/drivers/gpu/drm/savage/savage_state.c
+++ b/drivers/gpu/drm/savage/savage_state.c
@@ -988,20 +988,20 @@ int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_
 	 * for locking on FreeBSD.
 	 */
 	if (cmdbuf->size) {
-		kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER);
+		kcmd_addr = kmalloc(cmdbuf->size * 8, GFP_KERNEL);
 		if (kcmd_addr == NULL)
 			return -ENOMEM;
 
 		if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr,
 				       cmdbuf->size * 8))
 		{
-			drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
+			kfree(kcmd_addr);
 			return -EFAULT;
 		}
 		cmdbuf->cmd_addr = kcmd_addr;
 	}
 	if (cmdbuf->vb_size) {
-		kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER);
+		kvb_addr = kmalloc(cmdbuf->vb_size, GFP_KERNEL);
 		if (kvb_addr == NULL) {
 			ret = -ENOMEM;
 			goto done;
@@ -1015,8 +1015,8 @@ int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_
 		cmdbuf->vb_addr = kvb_addr;
 	}
 	if (cmdbuf->nbox) {
-		kbox_addr = drm_alloc(cmdbuf->nbox * sizeof(struct drm_clip_rect),
-				       DRM_MEM_DRIVER);
+		kbox_addr = kmalloc(cmdbuf->nbox * sizeof(struct drm_clip_rect),
+				    GFP_KERNEL);
 		if (kbox_addr == NULL) {
 			ret = -ENOMEM;
 			goto done;
@@ -1154,10 +1154,9 @@ int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_
 
 done:
 	/* If we didn't need to allocate them, these'll be NULL */
-	drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
-	drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER);
-	drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect),
-		 DRM_MEM_DRIVER);
+	kfree(kcmd_addr);
+	kfree(kvb_addr);
+	kfree(kbox_addr);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 7dacc64e9b5..e725cc0b115 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -40,7 +40,7 @@ static int sis_driver_load(struct drm_device *dev, unsigned long chipset)
 	drm_sis_private_t *dev_priv;
 	int ret;
 
-	dev_priv = drm_calloc(1, sizeof(drm_sis_private_t), DRM_MEM_DRIVER);
+	dev_priv = kzalloc(sizeof(drm_sis_private_t), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
 
@@ -48,7 +48,7 @@ static int sis_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->chipset = chipset;
 	ret = drm_sman_init(&dev_priv->sman, 2, 12, 8);
 	if (ret) {
-		drm_free(dev_priv, sizeof(dev_priv), DRM_MEM_DRIVER);
+		kfree(dev_priv);
 	}
 
 	return ret;
@@ -59,7 +59,7 @@ static int sis_driver_unload(struct drm_device *dev)
 	drm_sis_private_t *dev_priv = dev->dev_private;
 
 	drm_sman_takedown(&dev_priv->sman);
-	drm_free(dev_priv, sizeof(*dev_priv), DRM_MEM_DRIVER);
+	kfree(dev_priv);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c
index 2c4f0b48579..6e6f9159163 100644
--- a/drivers/gpu/drm/via/via_map.c
+++ b/drivers/gpu/drm/via/via_map.c
@@ -96,7 +96,7 @@ int via_driver_load(struct drm_device *dev, unsigned long chipset)
 	drm_via_private_t *dev_priv;
 	int ret = 0;
 
-	dev_priv = drm_calloc(1, sizeof(drm_via_private_t), DRM_MEM_DRIVER);
+	dev_priv = kzalloc(sizeof(drm_via_private_t), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
 
@@ -106,14 +106,14 @@ int via_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	ret = drm_sman_init(&dev_priv->sman, 2, 12, 8);
 	if (ret) {
-		drm_free(dev_priv, sizeof(*dev_priv), DRM_MEM_DRIVER);
+		kfree(dev_priv);
 		return ret;
 	}
 
 	ret = drm_vblank_init(dev, 1);
 	if (ret) {
 		drm_sman_takedown(&dev_priv->sman);
-		drm_free(dev_priv, sizeof(drm_via_private_t), DRM_MEM_DRIVER);
+		kfree(dev_priv);
 		return ret;
 	}
 
@@ -126,7 +126,7 @@ int via_driver_unload(struct drm_device *dev)
 
 	drm_sman_takedown(&dev_priv->sman);
 
-	drm_free(dev_priv, sizeof(drm_via_private_t), DRM_MEM_DRIVER);
+	kfree(dev_priv);
 
 	return 0;
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index d4ddc22e46b..45b67d9c39c 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -34,9 +34,6 @@
 #ifndef _DRM_P_H_
 #define _DRM_P_H_
 
-/* If you want the memory alloc debug functionality, change define below */
-/* #define DEBUG_MEMORY */
-
 #ifdef __KERNEL__
 #ifdef __alpha__
 /* add include of current.h so that "current" is defined
@@ -133,31 +130,6 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 
 #define DRM_FLAG_DEBUG	  0x01
 
-#define DRM_MEM_DMA	   0
-#define DRM_MEM_SAREA	   1
-#define DRM_MEM_DRIVER	   2
-#define DRM_MEM_MAGIC	   3
-#define DRM_MEM_IOCTLS	   4
-#define DRM_MEM_MAPS	   5
-#define DRM_MEM_VMAS	   6
-#define DRM_MEM_BUFS	   7
-#define DRM_MEM_SEGS	   8
-#define DRM_MEM_PAGES	   9
-#define DRM_MEM_FILES	  10
-#define DRM_MEM_QUEUES	  11
-#define DRM_MEM_CMDS	  12
-#define DRM_MEM_MAPPINGS  13
-#define DRM_MEM_BUFLISTS  14
-#define DRM_MEM_AGPLISTS  15
-#define DRM_MEM_TOTALAGP  16
-#define DRM_MEM_BOUNDAGP  17
-#define DRM_MEM_CTXBITMAP 18
-#define DRM_MEM_STUB      19
-#define DRM_MEM_SGLISTS   20
-#define DRM_MEM_CTXLIST   21
-#define DRM_MEM_MM        22
-#define DRM_MEM_HASHTAB   23
-
 #define DRM_MAX_CTXBITMAP (PAGE_SIZE * 8)
 #define DRM_MAP_HASH_OFFSET 0x10000000
 
@@ -1517,24 +1489,6 @@ static __inline__ void drm_core_dropmap(struct drm_local_map *map)
 {
 }
 
-#ifndef DEBUG_MEMORY
-/** Wrapper around kmalloc() */
-static __inline__ void *drm_alloc(size_t size, int area)
-{
-	return kmalloc(size, GFP_KERNEL);
-}
-
-/** Wrapper around kfree() */
-static __inline__ void drm_free(void *pt, size_t size, int area)
-{
-	kfree(pt);
-}
-
-/** Wrapper around kcalloc() */
-static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area)
-{
-	return kcalloc(nmemb, size, GFP_KERNEL);
-}
 
 static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
 {
@@ -1555,12 +1509,6 @@ static __inline void drm_free_large(void *ptr)
 
 	vfree(ptr);
 }
-#else
-extern void *drm_alloc(size_t size, int area);
-extern void drm_free(void *pt, size_t size, int area);
-extern void *drm_calloc(size_t nmemb, size_t size, int area);
-#endif
-
 /*@}*/
 
 #endif				/* __KERNEL__ */
diff --git a/include/drm/drm_memory_debug.h b/include/drm/drm_memory_debug.h
deleted file mode 100644
index 6463271deea..00000000000
--- a/include/drm/drm_memory_debug.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/**
- * \file drm_memory_debug.h
- * Memory management wrappers for DRM.
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
-/*
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "drmP.h"
-
-typedef struct drm_mem_stats {
-	const char *name;
-	int succeed_count;
-	int free_count;
-	int fail_count;
-	unsigned long bytes_allocated;
-	unsigned long bytes_freed;
-} drm_mem_stats_t;
-
-static DEFINE_SPINLOCK(drm_mem_lock);
-static unsigned long drm_ram_available = 0;	/* In pages */
-static unsigned long drm_ram_used = 0;
-static drm_mem_stats_t drm_mem_stats[] =
-{
-	[DRM_MEM_DMA] = {"dmabufs"},
-	[DRM_MEM_SAREA] = {"sareas"},
-	[DRM_MEM_DRIVER] = {"driver"},
-	[DRM_MEM_MAGIC] = {"magic"},
-	[DRM_MEM_IOCTLS] = {"ioctltab"},
-	[DRM_MEM_MAPS] = {"maplist"},
-	[DRM_MEM_VMAS] = {"vmalist"},
-	[DRM_MEM_BUFS] = {"buflist"},
-	[DRM_MEM_SEGS] = {"seglist"},
-	[DRM_MEM_PAGES] = {"pagelist"},
-	[DRM_MEM_FILES] = {"files"},
-	[DRM_MEM_QUEUES] = {"queues"},
-	[DRM_MEM_CMDS] = {"commands"},
-	[DRM_MEM_MAPPINGS] = {"mappings"},
-	[DRM_MEM_BUFLISTS] = {"buflists"},
-	[DRM_MEM_AGPLISTS] = {"agplist"},
-	[DRM_MEM_SGLISTS] = {"sglist"},
-	[DRM_MEM_TOTALAGP] = {"totalagp"},
-	[DRM_MEM_BOUNDAGP] = {"boundagp"},
-	[DRM_MEM_CTXBITMAP] = {"ctxbitmap"},
-	[DRM_MEM_CTXLIST] = {"ctxlist"},
-	[DRM_MEM_STUB] = {"stub"},
-	{NULL, 0,}		/* Last entry must be null */
-};
-
-void drm_mem_init (void) {
-	drm_mem_stats_t *mem;
-	struct sysinfo si;
-
-	for (mem = drm_mem_stats; mem->name; ++mem) {
-		mem->succeed_count = 0;
-		mem->free_count = 0;
-		mem->fail_count = 0;
-		mem->bytes_allocated = 0;
-		mem->bytes_freed = 0;
-	}
-
-	si_meminfo(&si);
-	drm_ram_available = si.totalram;
-	drm_ram_used = 0;
-}
-
-/* drm_mem_info is called whenever a process reads /dev/drm/mem. */
-
-static int drm__mem_info (char *buf, char **start, off_t offset,
-			   int request, int *eof, void *data) {
-	drm_mem_stats_t *pt;
-	int len = 0;
-
-	if (offset > DRM_PROC_LIMIT) {
-		*eof = 1;
-		return 0;
-	}
-
-	*eof = 0;
-	*start = &buf[offset];
-
-	DRM_PROC_PRINT("		  total counts			"
-		       " |    outstanding  \n");
-	DRM_PROC_PRINT("type	   alloc freed fail	bytes	   freed"
-		       " | allocs      bytes\n\n");
-	DRM_PROC_PRINT("%-9.9s %5d %5d %4d %10lu kB         |\n",
-		       "system", 0, 0, 0,
-		       drm_ram_available << (PAGE_SHIFT - 10));
-	DRM_PROC_PRINT("%-9.9s %5d %5d %4d %10lu kB         |\n",
-		       "locked", 0, 0, 0, drm_ram_used >> 10);
-	DRM_PROC_PRINT("\n");
-	for (pt = drm_mem_stats; pt->name; pt++) {
-		DRM_PROC_PRINT("%-9.9s %5d %5d %4d %10lu %10lu | %6d %10ld\n",
-			       pt->name,
-			       pt->succeed_count,
-			       pt->free_count,
-			       pt->fail_count,
-			       pt->bytes_allocated,
-			       pt->bytes_freed,
-			       pt->succeed_count - pt->free_count,
-			       (long)pt->bytes_allocated
-			       - (long)pt->bytes_freed);
-	}
-
-	if (len > request + offset)
-		return request;
-	*eof = 1;
-	return len - offset;
-}
-
-int drm_mem_info (char *buf, char **start, off_t offset,
-		   int len, int *eof, void *data) {
-	int ret;
-
-	spin_lock(&drm_mem_lock);
-	ret = drm__mem_info (buf, start, offset, len, eof, data);
-	spin_unlock(&drm_mem_lock);
-	return ret;
-}
-
-void *drm_alloc (size_t size, int area) {
-	void *pt;
-
-	if (!size) {
-		DRM_MEM_ERROR(area, "Allocating 0 bytes\n");
-		return NULL;
-	}
-
-	if (!(pt = kmalloc(size, GFP_KERNEL))) {
-		spin_lock(&drm_mem_lock);
-		++drm_mem_stats[area].fail_count;
-		spin_unlock(&drm_mem_lock);
-		return NULL;
-	}
-	spin_lock(&drm_mem_lock);
-	++drm_mem_stats[area].succeed_count;
-	drm_mem_stats[area].bytes_allocated += size;
-	spin_unlock(&drm_mem_lock);
-	return pt;
-}
-
-void *drm_calloc (size_t nmemb, size_t size, int area) {
-	void *addr;
-
-	addr = drm_alloc (nmemb * size, area);
-	if (addr != NULL)
-		memset((void *)addr, 0, size * nmemb);
-
-	return addr;
-}
-
-void *drm_realloc (void *oldpt, size_t oldsize, size_t size, int area) {
-	void *pt;
-
-	if (!(pt = drm_alloc (size, area)))
-		return NULL;
-	if (oldpt && oldsize) {
-		memcpy(pt, oldpt, oldsize);
-		drm_free (oldpt, oldsize, area);
-	}
-	return pt;
-}
-
-void drm_free (void *pt, size_t size, int area) {
-	int alloc_count;
-	int free_count;
-
-	if (!pt)
-		DRM_MEM_ERROR(area, "Attempt to free NULL pointer\n");
-	else
-		kfree(pt);
-	spin_lock(&drm_mem_lock);
-	drm_mem_stats[area].bytes_freed += size;
-	free_count = ++drm_mem_stats[area].free_count;
-	alloc_count = drm_mem_stats[area].succeed_count;
-	spin_unlock(&drm_mem_lock);
-	if (free_count > alloc_count) {
-		DRM_MEM_ERROR(area, "Excess frees: %d frees, %d allocs\n",
-			      free_count, alloc_count);
-	}
-}
-
-#if __OS_HAS_AGP
-
-DRM_AGP_MEM *drm_alloc_agp (drm_device_t *dev, int pages, u32 type) {
-	DRM_AGP_MEM *handle;
-
-	if (!pages) {
-		DRM_MEM_ERROR(DRM_MEM_TOTALAGP, "Allocating 0 pages\n");
-		return NULL;
-	}
-
-	if ((handle = drm_agp_allocate_memory (pages, type))) {
-		spin_lock(&drm_mem_lock);
-		++drm_mem_stats[DRM_MEM_TOTALAGP].succeed_count;
-		drm_mem_stats[DRM_MEM_TOTALAGP].bytes_allocated
-		    += pages << PAGE_SHIFT;
-		spin_unlock(&drm_mem_lock);
-		return handle;
-	}
-	spin_lock(&drm_mem_lock);
-	++drm_mem_stats[DRM_MEM_TOTALAGP].fail_count;
-	spin_unlock(&drm_mem_lock);
-	return NULL;
-}
-
-int drm_free_agp (DRM_AGP_MEM * handle, int pages) {
-	int alloc_count;
-	int free_count;
-	int retval = -EINVAL;
-
-	if (!handle) {
-		DRM_MEM_ERROR(DRM_MEM_TOTALAGP,
-			      "Attempt to free NULL AGP handle\n");
-		return retval;
-	}
-
-	if (drm_agp_free_memory (handle)) {
-		spin_lock(&drm_mem_lock);
-		free_count = ++drm_mem_stats[DRM_MEM_TOTALAGP].free_count;
-		alloc_count = drm_mem_stats[DRM_MEM_TOTALAGP].succeed_count;
-		drm_mem_stats[DRM_MEM_TOTALAGP].bytes_freed
-		    += pages << PAGE_SHIFT;
-		spin_unlock(&drm_mem_lock);
-		if (free_count > alloc_count) {
-			DRM_MEM_ERROR(DRM_MEM_TOTALAGP,
-				      "Excess frees: %d frees, %d allocs\n",
-				      free_count, alloc_count);
-		}
-		return 0;
-	}
-	return retval;
-}
-
-int drm_bind_agp (DRM_AGP_MEM * handle, unsigned int start) {
-	int retcode = -EINVAL;
-
-	if (!handle) {
-		DRM_MEM_ERROR(DRM_MEM_BOUNDAGP,
-			      "Attempt to bind NULL AGP handle\n");
-		return retcode;
-	}
-
-	if (!(retcode = drm_agp_bind_memory (handle, start))) {
-		spin_lock(&drm_mem_lock);
-		++drm_mem_stats[DRM_MEM_BOUNDAGP].succeed_count;
-		drm_mem_stats[DRM_MEM_BOUNDAGP].bytes_allocated
-		    += handle->page_count << PAGE_SHIFT;
-		spin_unlock(&drm_mem_lock);
-		return retcode;
-	}
-	spin_lock(&drm_mem_lock);
-	++drm_mem_stats[DRM_MEM_BOUNDAGP].fail_count;
-	spin_unlock(&drm_mem_lock);
-	return retcode;
-}
-
-int drm_unbind_agp (DRM_AGP_MEM * handle) {
-	int alloc_count;
-	int free_count;
-	int retcode = -EINVAL;
-
-	if (!handle) {
-		DRM_MEM_ERROR(DRM_MEM_BOUNDAGP,
-			      "Attempt to unbind NULL AGP handle\n");
-		return retcode;
-	}
-
-	if ((retcode = drm_agp_unbind_memory (handle)))
-		return retcode;
-	spin_lock(&drm_mem_lock);
-	free_count = ++drm_mem_stats[DRM_MEM_BOUNDAGP].free_count;
-	alloc_count = drm_mem_stats[DRM_MEM_BOUNDAGP].succeed_count;
-	drm_mem_stats[DRM_MEM_BOUNDAGP].bytes_freed
-	    += handle->page_count << PAGE_SHIFT;
-	spin_unlock(&drm_mem_lock);
-	if (free_count > alloc_count) {
-		DRM_MEM_ERROR(DRM_MEM_BOUNDAGP,
-			      "Excess frees: %d frees, %d allocs\n",
-			      free_count, alloc_count);
-	}
-	return retcode;
-}
-#endif
-- 
cgit v1.2.3-70-g09d2


From aa0ce5bbc2dbb1853bd0c6d13f17716fcc38ac5a Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Wed, 17 Jun 2009 16:25:52 -0700
Subject: softirq: introduce statistics for softirq

Statistics for softirq doesn't exist.
It will be helpful like statistics for interrupts.
This patch introduces counting the number of softirq,
which will be exported in /proc/softirqs.

When softirq handler consumes much CPU time,
/proc/stat is like the following.

$ while :; do  cat /proc/stat | head -n1 ; sleep 10 ; done
cpu  88 0 408 739665 583 28 2 0 0
cpu  450 0 1090 740970 594 28 1294 0 0
                              ^^^^
                             softirq

In such a situation,
/proc/softirqs shows us which softirq handler is invoked.
We can see the increase rate of softirqs.

<before>
$ cat /proc/softirqs
                CPU0       CPU1       CPU2       CPU3
HI                 0          0          0          0
TIMER         462850     462805     462782     462718
NET_TX             0          0          0        365
NET_RX          2472          2          2         40
BLOCK              0          0        381       1164
TASKLET            0          0          0        224
SCHED         462654     462689     462698     462427
RCU             3046       2423       3367       3173

<after>
$ cat /proc/softirqs
                CPU0       CPU1       CPU2       CPU3
HI                 0          0          0          0
TIMER         463361     465077     465056     464991
NET_TX            53          0          1        365
NET_RX          3757          2          2         40
BLOCK              0          0        398       1170
TASKLET            0          0          0        224
SCHED         463074     464318     464612     463330
RCU             3505       2948       3947       3673

When CPU TIME of softirq is high,
the rates of increase is the following.
  TIMER  : 220/sec     : CPU1-3
  NET_TX : 5/sec       : CPU0
  NET_RX : 120/sec     : CPU0
  SCHED  : 40-200/sec  : all CPU
  RCU    : 45-58/sec   : all CPU

The rates of increase in an idle mode is the following.
  TIMER  : 250/sec
  SCHED  : 250/sec
  RCU    : 2/sec

It seems many softirqs for receiving packets and rcu are invoked.  This
gives us help for checking system.

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Reviewed-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel_stat.h | 12 ++++++++++++
 kernel/softirq.c            |  1 +
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a77c6007dc9..348fa8874b5 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -5,6 +5,7 @@
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
+#include <linux/interrupt.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
 
@@ -31,6 +32,7 @@ struct kernel_stat {
 #ifndef CONFIG_GENERIC_HARDIRQS
        unsigned int irqs[NR_IRQS];
 #endif
+	unsigned int softirqs[NR_SOFTIRQS];
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -67,6 +69,16 @@ extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
 
 #endif
 
+static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
+{
+	kstat_this_cpu.softirqs[irq]++;
+}
+
+static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
+{
+       return kstat_cpu(cpu).softirqs[irq];
+}
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b41fb710e11..3a94905fa5d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -213,6 +213,7 @@ restart:
 	do {
 		if (pending & 1) {
 			int prev_count = preempt_count();
+			kstat_incr_softirqs_this_cpu(h - softirq_vec);
 
 			trace_softirq_entry(h, softirq_vec);
 			h->action(h);
-- 
cgit v1.2.3-70-g09d2


From 7d0771970c51e736758525dd71fb82dd036b823a Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 17 Jun 2009 16:26:03 -0700
Subject: spi: move common spi_setup() functionality into core

Start moving some spi_setup() functionality into the SPI core from the
various spi_master controller drivers:

 - Make that function stop being an inline;

 - Move two common idioms from drivers into that new function:
    * Default bits_per_word to 8 if that field isn't set
    * Issue a standardized dev_dbg() message

This is a net minor source code shrink, and supports enhancments found in
some follow-up patches.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/atmel_spi.c   |  2 --
 drivers/spi/au1550_spi.c  |  2 --
 drivers/spi/omap2_mcspi.c |  4 +---
 drivers/spi/omap_uwire.c  |  2 --
 drivers/spi/orion_spi.c   |  3 ---
 drivers/spi/pxa2xx_spi.c  | 11 ++--------
 drivers/spi/spi.c         | 55 ++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/spi/spi_bfin5xx.c |  4 ----
 drivers/spi/spi_bitbang.c |  7 +-----
 drivers/spi/spi_imx.c     |  5 +----
 drivers/spi/spi_mpc83xx.c |  6 ------
 drivers/spi/spi_s3c24xx.c |  7 ------
 drivers/spi/spi_txx9.c    |  2 +-
 drivers/spi/xilinx_spi.c  |  6 ------
 include/linux/spi/spi.h   | 25 +--------------------
 15 files changed, 61 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index 12e443cc4ac..9f9ff3af72d 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -555,8 +555,6 @@ static int atmel_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	if (bits == 0)
-		bits = 8;
 	if (bits < 8 || bits > 16) {
 		dev_dbg(&spi->dev,
 				"setup: invalid bits_per_word %u (8 to 16)\n",
diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index b02f25c702f..6a407e60f05 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -291,8 +291,6 @@ static int au1550_spi_setup(struct spi_device *spi)
 {
 	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
 
-	if (spi->bits_per_word == 0)
-		spi->bits_per_word = 8;
 	if (spi->bits_per_word < 4 || spi->bits_per_word > 24) {
 		dev_err(&spi->dev, "setup: invalid bits_per_word=%d\n",
 			spi->bits_per_word);
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index d6d0c5d241c..b4f3b753d0f 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -619,9 +619,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	if (spi->bits_per_word == 0)
-		spi->bits_per_word = 8;
-	else if (spi->bits_per_word < 4 || spi->bits_per_word > 32) {
+	if (spi->bits_per_word < 4 || spi->bits_per_word > 32) {
 		dev_dbg(&spi->dev, "setup: unsupported %d bit words\n",
 			spi->bits_per_word);
 		return -EINVAL;
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index fe8b9ac0cce..747d29be45d 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -339,8 +339,6 @@ static int uwire_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 	bits = spi->bits_per_word;
 	if (t != NULL && t->bits_per_word)
 		bits = t->bits_per_word;
-	if (!bits)
-		bits = 8;
 
 	if (bits > 16) {
 		pr_debug("%s: wordsize %d?\n", dev_name(&spi->dev), bits);
diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
index c8b0babdc2a..6d5e33bb4b4 100644
--- a/drivers/spi/orion_spi.c
+++ b/drivers/spi/orion_spi.c
@@ -369,9 +369,6 @@ static int orion_spi_setup(struct spi_device *spi)
 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
 				  (1 << 14));
 
-	if (spi->bits_per_word == 0)
-		spi->bits_per_word = 8;
-
 	if ((spi->max_speed_hz == 0)
 			|| (spi->max_speed_hz > orion_spi->max_speed))
 		spi->max_speed_hz = orion_spi->max_speed;
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 3f3c08c6ba4..c7365e0b22d 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1236,9 +1236,6 @@ static int setup(struct spi_device *spi)
 	uint tx_thres = TX_THRESH_DFLT;
 	uint rx_thres = RX_THRESH_DFLT;
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	if (drv_data->ssp_type != PXA25x_SSP
 		&& (spi->bits_per_word < 4 || spi->bits_per_word > 32)) {
 		dev_err(&spi->dev, "failed setup: ssp_type=%d, bits/wrd=%d "
@@ -1328,18 +1325,14 @@ static int setup(struct spi_device *spi)
 
 	/* NOTE:  PXA25x_SSP _could_ use external clocking ... */
 	if (drv_data->ssp_type != PXA25x_SSP)
-		dev_dbg(&spi->dev, "%d bits/word, %ld Hz, mode %d, %s\n",
-				spi->bits_per_word,
+		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
 				clk_get_rate(ssp->clk)
 					/ (1 + ((chip->cr0 & SSCR0_SCR) >> 8)),
-				spi->mode & 0x3,
 				chip->enable_dma ? "DMA" : "PIO");
 	else
-		dev_dbg(&spi->dev, "%d bits/word, %ld Hz, mode %d, %s\n",
-				spi->bits_per_word,
+		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
 				clk_get_rate(ssp->clk) / 2
 					/ (1 + ((chip->cr0 & SSCR0_SCR) >> 8)),
-				spi->mode & 0x3,
 				chip->enable_dma ? "DMA" : "PIO");
 
 	if (spi->bits_per_word <= 8) {
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8eba98c8ed1..0276bc37e25 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -265,7 +265,7 @@ int spi_add_device(struct spi_device *spi)
 	 * normally rely on the device being setup.  Devices
 	 * using SPI_CS_HIGH can't coexist well otherwise...
 	 */
-	status = spi->master->setup(spi);
+	status = spi_setup(spi);
 	if (status < 0) {
 		dev_err(dev, "can't %s %s, status %d\n",
 				"setup", dev_name(&spi->dev), status);
@@ -583,6 +583,59 @@ EXPORT_SYMBOL_GPL(spi_busnum_to_master);
 
 /*-------------------------------------------------------------------------*/
 
+/* Core methods for SPI master protocol drivers.  Some of the
+ * other core methods are currently defined as inline functions.
+ */
+
+/**
+ * spi_setup - setup SPI mode and clock rate
+ * @spi: the device whose settings are being modified
+ * Context: can sleep, and no requests are queued to the device
+ *
+ * SPI protocol drivers may need to update the transfer mode if the
+ * device doesn't work with its default.  They may likewise need
+ * to update clock rates or word sizes from initial values.  This function
+ * changes those settings, and must be called from a context that can sleep.
+ * Except for SPI_CS_HIGH, which takes effect immediately, the changes take
+ * effect the next time the device is selected and data is transferred to
+ * or from it.  When this function returns, the spi device is deselected.
+ *
+ * Note that this call will fail if the protocol driver specifies an option
+ * that the underlying controller or its driver does not support.  For
+ * example, not all hardware supports wire transfers using nine bit words,
+ * LSB-first wire encoding, or active-high chipselects.
+ */
+int spi_setup(struct spi_device *spi)
+{
+	int		status;
+
+	if (!spi->bits_per_word)
+		spi->bits_per_word = 8;
+
+	status = spi->master->setup(spi);
+
+	dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s"
+				"%u bits/w, %u Hz max --> %d\n",
+			(int) (spi->mode & (SPI_CPOL | SPI_CPHA)),
+			(spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
+			(spi->mode & SPI_LSB_FIRST) ? "lsb, " : "",
+			(spi->mode & SPI_3WIRE) ? "3wire, " : "",
+			(spi->mode & SPI_LOOP) ? "loopback, " : "",
+			spi->bits_per_word, spi->max_speed_hz,
+			status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(spi_setup);
+
+
+/*-------------------------------------------------------------------------*/
+
+/* Utility methods for SPI master protocol drivers, layered on
+ * top of the core.  Some other utility methods are defined as
+ * inline functions.
+ */
+
 static void spi_complete(void *arg)
 {
 	complete(arg);
diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c
index 2e5fd097733..d54058a903b 100644
--- a/drivers/spi/spi_bfin5xx.c
+++ b/drivers/spi/spi_bfin5xx.c
@@ -1016,10 +1016,6 @@ static int bfin_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	/* Zero (the default) here means 8 bits */
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	if (spi->bits_per_word != 8 && spi->bits_per_word != 16)
 		return -EINVAL;
 
diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index 85e61f45121..855b0b06e62 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -201,9 +201,6 @@ int spi_bitbang_setup(struct spi_device *spi)
 		spi->controller_state = cs;
 	}
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	/* per-word shift register access, in hardware or bitbanging */
 	cs->txrx_word = bitbang->txrx_word[spi->mode & (SPI_CPOL|SPI_CPHA)];
 	if (!cs->txrx_word)
@@ -213,9 +210,7 @@ int spi_bitbang_setup(struct spi_device *spi)
 	if (retval < 0)
 		return retval;
 
-	dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n",
-			__func__, spi->mode & (SPI_CPOL | SPI_CPHA),
-			spi->bits_per_word, 2 * cs->nsecs);
+	dev_dbg(&spi->dev, "%s, %u nsec/bit\n", __func__, 2 * cs->nsecs);
 
 	/* NOTE we _need_ to call chipselect() early, ideally with adapter
 	 * setup, unless the hardware defaults cooperate to avoid confusion
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
index 0671aeef579..26d5ef06dbd 100644
--- a/drivers/spi/spi_imx.c
+++ b/drivers/spi/spi_imx.c
@@ -1286,10 +1286,7 @@ static int setup(struct spi_device *spi)
 
 	/* SPI word width */
 	tmp = spi->bits_per_word;
-	if (tmp == 0) {
-		tmp = 8;
-		spi->bits_per_word = 8;
-	} else if (tmp > 16) {
+	if (tmp > 16) {
 		status = -EINVAL;
 		dev_err(&spi->dev,
 			"setup - "
diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index a32ccb44065..0926a3e293e 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -447,9 +447,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi)
 	}
 	mpc83xx_spi = spi_master_get_devdata(spi->master);
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	hw_mode = cs->hw_mode; /* Save orginal settings */
 	cs->hw_mode = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode);
 	/* mask out bits we are going to set */
@@ -471,9 +468,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi)
 		return retval;
 	}
 
-	dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u Hz\n",
-		__func__, spi->mode & (SPI_CPOL | SPI_CPHA),
-		spi->bits_per_word, spi->max_speed_hz);
 #if 0 /* Don't think this is needed */
 	/* NOTE we _need_ to call chipselect() early, ideally with adapter
 	 * setup, unless the hardware defaults cooperate to avoid confusion
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index b3ebc1d0f85..18a4c7f5438 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -153,9 +153,6 @@ static int s3c24xx_spi_setup(struct spi_device *spi)
 {
 	int ret;
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	if (spi->mode & ~MODEBITS) {
 		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
 			spi->mode & ~MODEBITS);
@@ -168,10 +165,6 @@ static int s3c24xx_spi_setup(struct spi_device *spi)
 		return ret;
 	}
 
-	dev_dbg(&spi->dev, "%s: mode %d, %u bpw, %d hz\n",
-		__func__, spi->mode, spi->bits_per_word,
-		spi->max_speed_hz);
-
 	return 0;
 }
 
diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c
index 29cbb065618..8e36b2153d9 100644
--- a/drivers/spi/spi_txx9.c
+++ b/drivers/spi/spi_txx9.c
@@ -126,7 +126,7 @@ static int txx9spi_setup(struct spi_device *spi)
 			|| spi->max_speed_hz < c->min_speed_hz)
 		return -EINVAL;
 
-	bits_per_word = spi->bits_per_word ? : 8;
+	bits_per_word = spi->bits_per_word;
 	if (bits_per_word != 8 && bits_per_word != 16)
 		return -EINVAL;
 
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 494d3f756e2..2d7e6b81fb4 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -170,9 +170,6 @@ static int xilinx_spi_setup(struct spi_device *spi)
 	xspi = spi_master_get_devdata(spi->master);
 	bitbang = &xspi->bitbang;
 
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
 	if (spi->mode & ~MODEBITS) {
 		dev_err(&spi->dev, "%s, unsupported mode bits %x\n",
 			__func__, spi->mode & ~MODEBITS);
@@ -183,9 +180,6 @@ static int xilinx_spi_setup(struct spi_device *spi)
 	if (retval < 0)
 		return retval;
 
-	dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n",
-		__func__, spi->mode & MODEBITS, spi->bits_per_word, 0);
-
 	return 0;
 }
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index a0faa18f7b1..0db5d64fc5e 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -523,30 +523,7 @@ static inline void spi_message_free(struct spi_message *m)
 	kfree(m);
 }
 
-/**
- * spi_setup - setup SPI mode and clock rate
- * @spi: the device whose settings are being modified
- * Context: can sleep, and no requests are queued to the device
- *
- * SPI protocol drivers may need to update the transfer mode if the
- * device doesn't work with its default.  They may likewise need
- * to update clock rates or word sizes from initial values.  This function
- * changes those settings, and must be called from a context that can sleep.
- * Except for SPI_CS_HIGH, which takes effect immediately, the changes take
- * effect the next time the device is selected and data is transferred to
- * or from it.  When this function returns, the spi device is deselected.
- *
- * Note that this call will fail if the protocol driver specifies an option
- * that the underlying controller or its driver does not support.  For
- * example, not all hardware supports wire transfers using nine bit words,
- * LSB-first wire encoding, or active-high chipselects.
- */
-static inline int
-spi_setup(struct spi_device *spi)
-{
-	return spi->master->setup(spi);
-}
-
+extern int spi_setup(struct spi_device *spi);
 
 /**
  * spi_async - asynchronous SPI transfer
-- 
cgit v1.2.3-70-g09d2


From e7db06b5d5afcef15c4c3e61c3a7441ed7ad1407 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 17 Jun 2009 16:26:04 -0700
Subject: spi: move more spi_setup() functionality into core

Move some common spi_setup() error checks into the SPI framework from the
spi_master controller drivers:

 - Add a new "mode_bits" field to spi_master

 - Use that in spi_setup to validate the spi->mode value being
   requested.  Setting this new field is now mandatory for any
   controller supporting more than vanilla SPI_MODE_0.

 - Update all spi_master drivers to:

     * Initialize that field
     * Remove current spi_setup() checks using that value.

This is a net minor code shrink.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/atmel_spi.c       | 12 +++---------
 drivers/spi/au1550_spi.c      | 12 +++---------
 drivers/spi/mpc52xx_psc_spi.c | 12 +++---------
 drivers/spi/omap2_mcspi.c     | 12 +++---------
 drivers/spi/omap_uwire.c      | 12 +++---------
 drivers/spi/orion_spi.c       |  9 +++------
 drivers/spi/pxa2xx_spi.c      | 12 +++---------
 drivers/spi/spi.c             | 11 +++++++++++
 drivers/spi/spi_bfin5xx.c     |  9 +++------
 drivers/spi/spi_bitbang.c     |  9 +++------
 drivers/spi/spi_imx.c         | 12 +++---------
 drivers/spi/spi_mpc83xx.c     | 14 ++++----------
 drivers/spi/spi_s3c24xx.c     | 12 +++---------
 drivers/spi/spi_txx9.c        |  9 +++------
 drivers/spi/xilinx_spi.c      | 12 +++---------
 include/linux/spi/spi.h       |  3 +++
 16 files changed, 57 insertions(+), 115 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index 9f9ff3af72d..f5b3fdbb1e2 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -530,9 +530,6 @@ atmel_spi_interrupt(int irq, void *dev_id)
 	return ret;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
 static int atmel_spi_setup(struct spi_device *spi)
 {
 	struct atmel_spi	*as;
@@ -562,12 +559,6 @@ static int atmel_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	/* see notes above re chipselect */
 	if (!atmel_spi_is_v2()
 			&& spi->chip_select == 0
@@ -773,6 +764,9 @@ static int __init atmel_spi_probe(struct platform_device *pdev)
 	if (!master)
 		goto out_free;
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
 	master->bus_num = pdev->id;
 	master->num_chipselect = 4;
 	master->setup = atmel_spi_setup;
diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index 6a407e60f05..76cbc1a6659 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -284,9 +284,6 @@ static int au1550_spi_setupxfer(struct spi_device *spi, struct spi_transfer *t)
 	return 0;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST)
-
 static int au1550_spi_setup(struct spi_device *spi)
 {
 	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
@@ -297,12 +294,6 @@ static int au1550_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	if (spi->max_speed_hz == 0)
 		spi->max_speed_hz = hw->freq_max;
 	if (spi->max_speed_hz > hw->freq_max
@@ -779,6 +770,9 @@ static int __init au1550_spi_probe(struct platform_device *pdev)
 		goto err_nomem;
 	}
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+
 	hw = spi_master_get_devdata(master);
 
 	hw->master = spi_master_get(master);
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index 68c77a91159..bdae9f90897 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -261,9 +261,6 @@ static void mpc52xx_psc_spi_work(struct work_struct *work)
 	spin_unlock_irq(&mps->lock);
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST)
-
 static int mpc52xx_psc_spi_setup(struct spi_device *spi)
 {
 	struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master);
@@ -273,12 +270,6 @@ static int mpc52xx_psc_spi_setup(struct spi_device *spi)
 	if (spi->bits_per_word%8)
 		return -EINVAL;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	if (!cs) {
 		cs = kzalloc(sizeof *cs, GFP_KERNEL);
 		if (!cs)
@@ -385,6 +376,9 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	dev_set_drvdata(dev, master);
 	mps = spi_master_get_devdata(master);
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+
 	mps->irq = irq;
 	if (pdata == NULL) {
 		dev_warn(dev, "probe called without platform data, no "
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index b4f3b753d0f..eee4b6e0af2 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -603,9 +603,6 @@ static int omap2_mcspi_request_dma(struct spi_device *spi)
 	return 0;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
 static int omap2_mcspi_setup(struct spi_device *spi)
 {
 	int			ret;
@@ -613,12 +610,6 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 	struct omap2_mcspi_dma	*mcspi_dma;
 	struct omap2_mcspi_cs	*cs = spi->controller_state;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	if (spi->bits_per_word < 4 || spi->bits_per_word > 32) {
 		dev_dbg(&spi->dev, "setup: unsupported %d bit words\n",
 			spi->bits_per_word);
@@ -982,6 +973,9 @@ static int __init omap2_mcspi_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
 	if (pdev->id != -1)
 		master->bus_num = pdev->id;
 
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index 747d29be45d..aa90ddb3706 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -447,19 +447,10 @@ done:
 	return status;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
 static int uwire_setup(struct spi_device *spi)
 {
 	struct uwire_state *ust = spi->controller_state;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	if (ust == NULL) {
 		ust = kzalloc(sizeof(*ust), GFP_KERNEL);
 		if (ust == NULL)
@@ -520,6 +511,9 @@ static int __init uwire_probe(struct platform_device *pdev)
 
 	uwire_write_reg(UWIRE_SR3, 1);
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
 	master->bus_num = 2;	/* "official" */
 	master->num_chipselect = 4;
 	master->setup = uwire_setup;
diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
index 6d5e33bb4b4..3aea50da7b2 100644
--- a/drivers/spi/orion_spi.c
+++ b/drivers/spi/orion_spi.c
@@ -358,12 +358,6 @@ static int orion_spi_setup(struct spi_device *spi)
 
 	orion_spi = spi_master_get_devdata(spi->master);
 
-	if (spi->mode) {
-		dev_err(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode);
-		return -EINVAL;
-	}
-
 	/* Fix ac timing if required.   */
 	if (orion_spi->spi_info->enable_clock_fix)
 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
@@ -473,6 +467,9 @@ static int __init orion_spi_probe(struct platform_device *pdev)
 	if (pdev->id != -1)
 		master->bus_num = pdev->id;
 
+	/* we support only mode 0, and no options */
+	master->mode_bits = 0;
+
 	master->setup = orion_spi_setup;
 	master->transfer = orion_spi_transfer;
 	master->num_chipselect = ORION_NUM_CHIPSELECTS;
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index c7365e0b22d..9c311dc4771 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1185,9 +1185,6 @@ static int transfer(struct spi_device *spi, struct spi_message *msg)
 	return 0;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA)
-
 static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 		    struct pxa2xx_spi_chip *chip_info)
 {
@@ -1252,12 +1249,6 @@ static int setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	/* Only alloc on first setup */
 	chip = spi_get_ctldata(spi);
 	if (!chip) {
@@ -1493,6 +1484,9 @@ static int __init pxa2xx_spi_probe(struct platform_device *pdev)
 	drv_data->pdev = pdev;
 	drv_data->ssp = ssp;
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+
 	master->bus_num = pdev->id;
 	master->num_chipselect = platform_info->num_chipselect;
 	master->dma_alignment = DMA_ALIGNMENT;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 0276bc37e25..a525a3a848c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -607,8 +607,19 @@ EXPORT_SYMBOL_GPL(spi_busnum_to_master);
  */
 int spi_setup(struct spi_device *spi)
 {
+	unsigned	bad_bits;
 	int		status;
 
+	/* help drivers fail *cleanly* when they need options
+	 * that aren't supported with their current master
+	 */
+	bad_bits = spi->mode & ~spi->master->mode_bits;
+	if (bad_bits) {
+		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
+			bad_bits);
+		return -EINVAL;
+	}
+
 	if (!spi->bits_per_word)
 		spi->bits_per_word = 8;
 
diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c
index d54058a903b..73e24ef5a2f 100644
--- a/drivers/spi/spi_bfin5xx.c
+++ b/drivers/spi/spi_bfin5xx.c
@@ -1010,12 +1010,6 @@ static int bfin_spi_setup(struct spi_device *spi)
 	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
 	int ret;
 
-	/* Abort device setup if requested features are not supported */
-	if (spi->mode & ~(SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST)) {
-		dev_err(&spi->dev, "requested mode not fully supported\n");
-		return -EINVAL;
-	}
-
 	if (spi->bits_per_word != 8 && spi->bits_per_word != 16)
 		return -EINVAL;
 
@@ -1283,6 +1277,9 @@ static int __init bfin_spi_probe(struct platform_device *pdev)
 	drv_data->pdev = pdev;
 	drv_data->pin_req = platform_info->pin_req;
 
+	/* the spi->mode bits supported by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+
 	master->bus_num = pdev->id;
 	master->num_chipselect = platform_info->num_chipselect;
 	master->cleanup = bfin_spi_cleanup;
diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index 855b0b06e62..2a5abc08e85 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -188,12 +188,6 @@ int spi_bitbang_setup(struct spi_device *spi)
 
 	bitbang = spi_master_get_devdata(spi->master);
 
-	/* Bitbangers can support SPI_CS_HIGH, SPI_3WIRE, and so on;
-	 * add those to master->flags, and provide the other support.
-	 */
-	if ((spi->mode & ~(SPI_CPOL|SPI_CPHA|bitbang->flags)) != 0)
-		return -EINVAL;
-
 	if (!cs) {
 		cs = kzalloc(sizeof *cs, GFP_KERNEL);
 		if (!cs)
@@ -452,6 +446,9 @@ int spi_bitbang_start(struct spi_bitbang *bitbang)
 	spin_lock_init(&bitbang->lock);
 	INIT_LIST_HEAD(&bitbang->queue);
 
+	if (!bitbang->master->mode_bits)
+		bitbang->master->mode_bits = SPI_CPOL | SPI_CPHA | bitbang->flags;
+
 	if (!bitbang->master->transfer)
 		bitbang->master->transfer = spi_bitbang_transfer;
 	if (!bitbang->txrx_bufs) {
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
index 26d5ef06dbd..c195e45f7f3 100644
--- a/drivers/spi/spi_imx.c
+++ b/drivers/spi/spi_imx.c
@@ -1171,9 +1171,6 @@ msg_rejected:
 	return -EINVAL;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
 /* On first setup bad values must free chip_data memory since will cause
    spi_new_device to fail. Bad value setup from protocol driver are simply not
    applied and notified to the calling driver. */
@@ -1186,12 +1183,6 @@ static int setup(struct spi_device *spi)
 	u32 tmp;
 	int status = 0;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	/* Get controller data */
 	chip_info = spi->controller_data;
 
@@ -1478,6 +1469,9 @@ static int __init spi_imx_probe(struct platform_device *pdev)
 	drv_data->master_info = platform_info;
 	drv_data->pdev = pdev;
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
 	master->bus_num = pdev->id;
 	master->num_chipselect = platform_info->num_chipselect;
 	master->dma_alignment = DMA_ALIGNMENT;
diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index 0926a3e293e..ce61be98e06 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -419,10 +419,6 @@ static void mpc83xx_spi_work(struct work_struct *work)
 	spin_unlock_irq(&mpc83xx_spi->lock);
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
-			| SPI_LSB_FIRST | SPI_LOOP)
-
 static int mpc83xx_spi_setup(struct spi_device *spi)
 {
 	struct mpc83xx_spi *mpc83xx_spi;
@@ -430,12 +426,6 @@ static int mpc83xx_spi_setup(struct spi_device *spi)
 	u32 hw_mode;
 	struct spi_mpc83xx_cs	*cs = spi->controller_state;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	if (!spi->max_speed_hz)
 		return -EINVAL;
 
@@ -562,6 +552,10 @@ mpc83xx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 
 	dev_set_drvdata(dev, master);
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH
+			| SPI_LSB_FIRST | SPI_LOOP;
+
 	master->setup = mpc83xx_spi_setup;
 	master->transfer = mpc83xx_spi_transfer;
 	master->cleanup = mpc83xx_spi_cleanup;
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 18a4c7f5438..e0d44af4745 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -146,19 +146,10 @@ static int s3c24xx_spi_setupxfer(struct spi_device *spi,
 	return 0;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
-
 static int s3c24xx_spi_setup(struct spi_device *spi)
 {
 	int ret;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_dbg(&spi->dev, "setup: unsupported mode bits %x\n",
-			spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	ret = s3c24xx_spi_setupxfer(spi, NULL);
 	if (ret < 0) {
 		dev_err(&spi->dev, "setupxfer returned %d\n", ret);
@@ -283,6 +274,9 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 
 	/* setup the master state. */
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
 	master->num_chipselect = hw->pdata->num_cs;
 	master->bus_num = pdata->bus_num;
 
diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c
index 8e36b2153d9..96057de133a 100644
--- a/drivers/spi/spi_txx9.c
+++ b/drivers/spi/spi_txx9.c
@@ -110,17 +110,11 @@ static void txx9spi_cs_func(struct spi_device *spi, struct txx9spi *c,
 	ndelay(cs_delay);	/* CS Setup Time / CS Recovery Time */
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS	(SPI_CS_HIGH|SPI_CPOL|SPI_CPHA)
-
 static int txx9spi_setup(struct spi_device *spi)
 {
 	struct txx9spi *c = spi_master_get_devdata(spi->master);
 	u8 bits_per_word;
 
-	if (spi->mode & ~MODEBITS)
-		return -EINVAL;
-
 	if (!spi->max_speed_hz
 			|| spi->max_speed_hz > c->max_speed_hz
 			|| spi->max_speed_hz < c->min_speed_hz)
@@ -414,6 +408,9 @@ static int __init txx9spi_probe(struct platform_device *dev)
 		 (unsigned long long)res->start, irq,
 		 (c->baseclk + 500000) / 1000000);
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA;
+
 	master->bus_num = dev->id;
 	master->setup = txx9spi_setup;
 	master->transfer = txx9spi_transfer;
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 2d7e6b81fb4..46b8c5c2f45 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -158,9 +158,6 @@ static int xilinx_spi_setup_transfer(struct spi_device *spi,
 	return 0;
 }
 
-/* the spi->mode bits understood by this driver: */
-#define MODEBITS (SPI_CPOL | SPI_CPHA)
-
 static int xilinx_spi_setup(struct spi_device *spi)
 {
 	struct spi_bitbang *bitbang;
@@ -170,12 +167,6 @@ static int xilinx_spi_setup(struct spi_device *spi)
 	xspi = spi_master_get_devdata(spi->master);
 	bitbang = &xspi->bitbang;
 
-	if (spi->mode & ~MODEBITS) {
-		dev_err(&spi->dev, "%s, unsupported mode bits %x\n",
-			__func__, spi->mode & ~MODEBITS);
-		return -EINVAL;
-	}
-
 	retval = xilinx_spi_setup_transfer(spi, NULL);
 	if (retval < 0)
 		return retval;
@@ -327,6 +318,9 @@ static int __init xilinx_spi_of_probe(struct of_device *ofdev,
 		goto put_master;
 	}
 
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+
 	xspi = spi_master_get_devdata(master);
 	xspi->bitbang.master = spi_master_get(master);
 	xspi->bitbang.chipselect = xilinx_spi_chipselect;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 0db5d64fc5e..9c4cd27f468 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -245,6 +245,9 @@ struct spi_master {
 	 */
 	u16			dma_alignment;
 
+	/* spi_device.mode flags understood by this controller driver */
+	u16			mode_bits;
+
 	/* Setup mode and clock, etc (spi driver may call many times).
 	 *
 	 * IMPORTANT:  this may be called when transfers to another
-- 
cgit v1.2.3-70-g09d2


From 7390284290b184a7f4bb648ca15dc62c3dea3e75 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 17 Jun 2009 16:26:05 -0700
Subject: mpc52xx_psc_spi: convert to cs_control callback

mpc52xx_psc_spi driver is the last user of the legacy activate_cs and
deactivate_cs callbacks, so convert the driver to the cs_control hook and
remove the legacy callbacks from fsl_spi_platform_data struct.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/mpc52xx_psc_spi.c | 22 +++++++++-------------
 include/linux/fsl_devices.h   |  4 ----
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index bdae9f90897..1b74d5ca03f 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
@@ -30,8 +31,7 @@
 
 struct mpc52xx_psc_spi {
 	/* fsl_spi_platform data */
-	void (*activate_cs)(u8, u8);
-	void (*deactivate_cs)(u8, u8);
+	void (*cs_control)(struct spi_device *spi, bool on);
 	u32 sysclk;
 
 	/* driver internal data */
@@ -111,18 +111,16 @@ static void mpc52xx_psc_spi_activate_cs(struct spi_device *spi)
 	out_be16((u16 __iomem *)&psc->ccr, ccr);
 	mps->bits_per_word = cs->bits_per_word;
 
-	if (mps->activate_cs)
-		mps->activate_cs(spi->chip_select,
-				(spi->mode & SPI_CS_HIGH) ? 1 : 0);
+	if (mps->cs_control)
+		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
 }
 
 static void mpc52xx_psc_spi_deactivate_cs(struct spi_device *spi)
 {
 	struct mpc52xx_psc_spi *mps = spi_master_get_devdata(spi->master);
 
-	if (mps->deactivate_cs)
-		mps->deactivate_cs(spi->chip_select,
-				(spi->mode & SPI_CS_HIGH) ? 1 : 0);
+	if (mps->cs_control)
+		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
 }
 
 #define MPC52xx_PSC_BUFSIZE (MPC52xx_PSC_RFNUM_MASK + 1)
@@ -382,15 +380,13 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	mps->irq = irq;
 	if (pdata == NULL) {
 		dev_warn(dev, "probe called without platform data, no "
-				"(de)activate_cs function will be called\n");
-		mps->activate_cs = NULL;
-		mps->deactivate_cs = NULL;
+				"cs_control function will be called\n");
+		mps->cs_control = NULL;
 		mps->sysclk = 0;
 		master->bus_num = bus_num;
 		master->num_chipselect = 255;
 	} else {
-		mps->activate_cs = pdata->activate_cs;
-		mps->deactivate_cs = pdata->deactivate_cs;
+		mps->cs_control = pdata->cs_control;
 		mps->sysclk = pdata->sysclk;
 		master->bus_num = pdata->bus_num;
 		master->num_chipselect = pdata->max_chipselect;
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 244677cc082..43fc95d822d 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -79,10 +79,6 @@ struct fsl_spi_platform_data {
 	u16	max_chipselect;
 	void	(*cs_control)(struct spi_device *spi, bool on);
 	u32	sysclk;
-
-	/* Legacy hooks, used by mpc52xx_psc_spi driver. */
-	void	(*activate_cs)(u8 cs, u8 polarity);
-	void	(*deactivate_cs)(u8 cs, u8 polarity);
 };
 
 struct mpc8xx_pcmcia_ops {
-- 
cgit v1.2.3-70-g09d2


From 77906a546127e056dbdac618a7afb5b92d00c058 Mon Sep 17 00:00:00 2001
From: Daniel Silverstone <dsilvers@simtec.co.uk>
Date: Wed, 17 Jun 2009 16:26:15 -0700
Subject: pca953x: support GPIOLIB GPIO naming

Add support to the PCA953x driver to use the GPIOLIB naming facility for
GPIOs.

Signed-off-by: Daniel Silverstone <dsilvers@simtec.co.uk>
Cc: Ben Gardner <bgardner@wabtec.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/pca953x.c      | 4 ++++
 include/linux/i2c/pca953x.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 8dc0164bd51..2dae94fbabf 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -50,6 +50,7 @@ struct pca953x_chip {
 
 	struct i2c_client *client;
 	struct gpio_chip gpio_chip;
+	char **names;
 };
 
 static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
@@ -192,6 +193,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
 	gc->label = chip->client->name;
 	gc->dev = &chip->client->dev;
 	gc->owner = THIS_MODULE;
+	gc->names = chip->names;
 }
 
 static int __devinit pca953x_probe(struct i2c_client *client,
@@ -215,6 +217,8 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 
 	chip->gpio_start = pdata->gpio_base;
 
+	chip->names = pdata->names;
+
 	/* initialize cached registers from their original values.
 	 * we can't share this chip with another i2c master.
 	 */
diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
index 3c7361217df..81736d6a8db 100644
--- a/include/linux/i2c/pca953x.h
+++ b/include/linux/i2c/pca953x.h
@@ -15,4 +15,5 @@ struct pca953x_platform_data {
 	int		(*teardown)(struct i2c_client *client,
 				unsigned gpio, unsigned ngpio,
 				void *context);
+	char		**names;
 };
-- 
cgit v1.2.3-70-g09d2


From 1d965fe0eb435b3f9a10538815f6a68de0aef03c Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@jeffreymahoney.com>
Date: Wed, 17 Jun 2009 16:26:29 -0700
Subject: reiserfs: fix warnings with gcc 4.4

Several code paths in reiserfs have a construct like:

 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num))) ...

which, in addition to being ugly, end up causing compiler warnings with
gcc 4.4.0.  Previous compilers didn't issue a warning.

fs/reiserfs/do_balan.c:1273: warning: operation on `aux_ih' may be undefined
fs/reiserfs/lbalance.c:393: warning: operation on `ih' may be undefined
fs/reiserfs/lbalance.c:421: warning: operation on `ih' may be undefined
fs/reiserfs/lbalance.c:777: warning: operation on `ih' may be undefined

I believe this is due to the ih being passed to macros which evaluate the
argument more than once.  This is old code and we haven't seen any
problems with it, but this patch eliminates the warnings.

It converts the multiple evaluation macros to static inlines and does a
preassignment for the cases that were causing the warnings because that
code is just ugly.

Reported-by: Chris Mason <mason@oracle.com>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/do_balan.c      |  5 ++---
 fs/reiserfs/lbalance.c      | 10 ++++++----
 include/linux/reiserfs_fs.h | 47 +++++++++++++++++++++++++++++++++++++--------
 3 files changed, 47 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 4beb964a2a3..128d3f7c8aa 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -1270,9 +1270,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
 
 					RFALSE(ih, "PAP-12210: ih must be 0");
 
-					if (is_direntry_le_ih
-					    (aux_ih =
-					     B_N_PITEM_HEAD(tbS0, item_pos))) {
+					aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
+					if (is_direntry_le_ih(aux_ih)) {
 						/* we append to directory item */
 
 						int entry_count;
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 381750a155f..03d85cbf90b 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -390,7 +390,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
 
 	if (last_first == FIRST_TO_LAST) {
 		/* if ( if item in position item_num in buffer SOURCE is directory item ) */
-		if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
+		ih = B_N_PITEM_HEAD(src, item_num);
+		if (is_direntry_le_ih(ih))
 			leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
 					      item_num, 0, cpy_bytes);
 		else {
@@ -418,7 +419,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
 		}
 	} else {
 		/*  if ( if item in position item_num in buffer SOURCE is directory item ) */
-		if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
+		ih = B_N_PITEM_HEAD(src, item_num);
+		if (is_direntry_le_ih(ih))
 			leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
 					      item_num,
 					      I_ENTRY_COUNT(ih) - cpy_bytes,
@@ -774,8 +776,8 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
 			leaf_delete_items_entirely(cur_bi, first + 1,
 						   del_num - 1);
 
-			if (is_direntry_le_ih
-			    (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1)))
+			ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
+			if (is_direntry_le_ih(ih))
 				/* the last item is directory  */
 				/* len = numbers of directory entries in this item */
 				len = ih_entry_count(ih);
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 2245c78d587..dd31e7bae35 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -660,23 +660,54 @@ static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
 		   cpu_to_le32(type2uniqueness(type)))
 	    : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type));
 }
+
 static inline void set_le_ih_k_type(struct item_head *ih, int type)
 {
 	set_le_key_k_type(ih_version(ih), &(ih->ih_key), type);
 }
 
-#define is_direntry_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRENTRY)
-#define is_direct_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRECT)
-#define is_indirect_le_key(version,key) (le_key_k_type (version, key) == TYPE_INDIRECT)
-#define is_statdata_le_key(version,key) (le_key_k_type (version, key) == TYPE_STAT_DATA)
+static inline int is_direntry_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_DIRENTRY;
+}
+
+static inline int is_direct_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_DIRECT;
+}
+
+static inline int is_indirect_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_INDIRECT;
+}
+
+static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
+{
+	return le_key_k_type(version, key) == TYPE_STAT_DATA;
+}
 
 //
 // item header has version.
 //
-#define is_direntry_le_ih(ih) is_direntry_le_key (ih_version (ih), &((ih)->ih_key))
-#define is_direct_le_ih(ih) is_direct_le_key (ih_version (ih), &((ih)->ih_key))
-#define is_indirect_le_ih(ih) is_indirect_le_key (ih_version(ih), &((ih)->ih_key))
-#define is_statdata_le_ih(ih) is_statdata_le_key (ih_version (ih), &((ih)->ih_key))
+static inline int is_direntry_le_ih(struct item_head *ih)
+{
+	return is_direntry_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_direct_le_ih(struct item_head *ih)
+{
+	return is_direct_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_indirect_le_ih(struct item_head *ih)
+{
+	return is_indirect_le_key(ih_version(ih), &ih->ih_key);
+}
+
+static inline int is_statdata_le_ih(struct item_head *ih)
+{
+	return is_statdata_le_key(ih_version(ih), &ih->ih_key);
+}
 
 //
 // key is pointer to cpu key, result is cpu
-- 
cgit v1.2.3-70-g09d2


From d69b042f3d7406ddba560143b1796020df760800 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2009 16:26:34 -0700
Subject: memcg: add file-based RSS accounting

Add file RSS tracking per memory cgroup

We currently don't track file RSS, the RSS we report is actually anon RSS.
 All the file mapped pages, come in through the page cache and get
accounted there.  This patch adds support for accounting file RSS pages.
It should

1. Help improve the metrics reported by the memory resource controller
2. Will form the basis for a future shared memory accounting heuristic
   that has been proposed by Kamezawa.

Unfortunately, we cannot rename the existing "rss" keyword used in
memory.stat to "anon_rss".  We however, add "mapped_file" data and hope to
educate the end user through documentation.

[hugh.dickins@tiscali.co.uk: fix mem_cgroup_update_mapped_file_stat oops]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.cn>
Cc: Paul Menage <menage@google.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 ++++-
 mm/memcontrol.c            | 66 +++++++++++++++++++++++++++++++++++++++++++++-
 mm/page_cgroup.c           |  2 ++
 mm/rmap.c                  |  5 +++-
 4 files changed, 77 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 45add35dda1..e46a0734ab6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -117,7 +117,7 @@ static inline bool mem_cgroup_disabled(void)
 }
 
 extern bool mem_cgroup_oom_called(struct task_struct *task);
-
+void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -271,6 +271,11 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
+static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
+							int val)
+{
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 70db6e0a5ee..6f682901deb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,7 +62,8 @@ enum mem_cgroup_stat_index {
 	 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
 	 */
 	MEM_CGROUP_STAT_CACHE, 	   /* # of pages charged as cache */
-	MEM_CGROUP_STAT_RSS,	   /* # of pages charged as rss */
+	MEM_CGROUP_STAT_RSS,	   /* # of pages charged as anon rss */
+	MEM_CGROUP_STAT_MAPPED_FILE,  /* # of pages charged as file rss */
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
 
@@ -900,6 +901,44 @@ static void record_last_oom(struct mem_cgroup *mem)
 	mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
 }
 
+/*
+ * Currently used to update mapped file statistics, but the routine can be
+ * generalized to update other statistics as well.
+ */
+void mem_cgroup_update_mapped_file_stat(struct page *page, int val)
+{
+	struct mem_cgroup *mem;
+	struct mem_cgroup_stat *stat;
+	struct mem_cgroup_stat_cpu *cpustat;
+	int cpu;
+	struct page_cgroup *pc;
+
+	if (!page_is_file_cache(page))
+		return;
+
+	pc = lookup_page_cgroup(page);
+	if (unlikely(!pc))
+		return;
+
+	lock_page_cgroup(pc);
+	mem = pc->mem_cgroup;
+	if (!mem)
+		goto done;
+
+	if (!PageCgroupUsed(pc))
+		goto done;
+
+	/*
+	 * Preemption is already disabled, we don't need get_cpu()
+	 */
+	cpu = smp_processor_id();
+	stat = &mem->stat;
+	cpustat = &stat->cpustat[cpu];
+
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val);
+done:
+	unlock_page_cgroup(pc);
+}
 
 /*
  * Unlike exported interface, "oom" parameter is added. if oom==true,
@@ -1098,6 +1137,10 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	struct mem_cgroup_per_zone *from_mz, *to_mz;
 	int nid, zid;
 	int ret = -EBUSY;
+	struct page *page;
+	int cpu;
+	struct mem_cgroup_stat *stat;
+	struct mem_cgroup_stat_cpu *cpustat;
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON(PageLRU(pc->page));
@@ -1118,6 +1161,23 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 
 	res_counter_uncharge(&from->res, PAGE_SIZE);
 	mem_cgroup_charge_statistics(from, pc, false);
+
+	page = pc->page;
+	if (page_is_file_cache(page) && page_mapped(page)) {
+		cpu = smp_processor_id();
+		/* Update mapped_file data for mem_cgroup "from" */
+		stat = &from->stat;
+		cpustat = &stat->cpustat[cpu];
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
+						-1);
+
+		/* Update mapped_file data for mem_cgroup "to" */
+		stat = &to->stat;
+		cpustat = &stat->cpustat[cpu];
+		__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
+						1);
+	}
+
 	if (do_swap_account)
 		res_counter_uncharge(&from->memsw, PAGE_SIZE);
 	css_put(&from->css);
@@ -2046,6 +2106,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 enum {
 	MCS_CACHE,
 	MCS_RSS,
+	MCS_MAPPED_FILE,
 	MCS_PGPGIN,
 	MCS_PGPGOUT,
 	MCS_INACTIVE_ANON,
@@ -2066,6 +2127,7 @@ struct {
 } memcg_stat_strings[NR_MCS_STAT] = {
 	{"cache", "total_cache"},
 	{"rss", "total_rss"},
+	{"mapped_file", "total_mapped_file"},
 	{"pgpgin", "total_pgpgin"},
 	{"pgpgout", "total_pgpgout"},
 	{"inactive_anon", "total_inactive_anon"},
@@ -2086,6 +2148,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
 	s->stat[MCS_CACHE] += val * PAGE_SIZE;
 	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
 	s->stat[MCS_RSS] += val * PAGE_SIZE;
+	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE);
+	s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE;
 	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
 	s->stat[MCS_PGPGIN] += val;
 	val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 4f31c9b3e94..672089d5819 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -99,6 +99,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
 	unsigned long pfn = page_to_pfn(page);
 	struct mem_section *section = __pfn_to_section(pfn);
 
+	if (!section->page_cgroup)
+		return NULL;
 	return section->page_cgroup + pfn;
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index c9ccc1a72dc..836c6c63e1f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -703,8 +703,10 @@ void page_add_new_anon_rmap(struct page *page,
  */
 void page_add_file_rmap(struct page *page)
 {
-	if (atomic_inc_and_test(&page->_mapcount))
+	if (atomic_inc_and_test(&page->_mapcount)) {
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
+		mem_cgroup_update_mapped_file_stat(page, 1);
+	}
 }
 
 #ifdef CONFIG_DEBUG_VM
@@ -753,6 +755,7 @@ void page_remove_rmap(struct page *page)
 			mem_cgroup_uncharge_page(page);
 		__dec_zone_page_state(page,
 			PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
+		mem_cgroup_update_mapped_file_stat(page, -1);
 		/*
 		 * It would be tidy to reset the PageAnon mapping here,
 		 * but that might overwrite a racing page_add_anon_rmap
-- 
cgit v1.2.3-70-g09d2


From 302362c5abdda80b5c2e4e57be610c2e3c2ab3c5 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 17 Jun 2009 16:27:15 -0700
Subject: memcg: remove mem_cgroup_cache_charge_swapin()

mem_cgroup_cache_charge_swapin() isn't used any more, so remove no-op
definition of it in header file.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0cedf31af0b..dca9b9999ae 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -423,12 +423,6 @@ static inline swp_entry_t get_swap_page(void)
 #define has_swap_token(x) 0
 #define disable_swap_token() do { } while(0)
 
-static inline int mem_cgroup_cache_charge_swapin(struct page *page,
-			struct mm_struct *mm, gfp_t mask, bool locked)
-{
-	return 0;
-}
-
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 {
-- 
cgit v1.2.3-70-g09d2


From 20ebcdda78a282d1d5266887ddf8a2d670182576 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 17 Jun 2009 16:27:16 -0700
Subject: memcg: remove unneeded forward declaration from sched.h

This forward declaration seems pointless.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 02042e7f219..d0342101756 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,7 +92,6 @@ struct sched_param {
 
 #include <asm/processor.h>
 
-struct mem_cgroup;
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
-- 
cgit v1.2.3-70-g09d2


From 8a9478ca7f4bcb8945cec7f95d52dae2d5e50cbd Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 17 Jun 2009 16:27:17 -0700
Subject: memcg: fix swap accounting

This patch fixes mis-accounting of swap usage in memcg.

In the current implementation, memcg's swap account is uncharged only when
swap is completely freed.  But there are several cases where swap cannot
be freed cleanly.  For handling that, this patch changes that memcg
uncharges swap account when swap has no references other than cache.

By this, memcg's swap entry accounting can be fully synchronous with the
application's behavior.

This patch also changes memcg's hooks for swap-out.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  5 +++--
 mm/memcontrol.c      | 17 ++++++++++++-----
 mm/swapfile.c        | 16 ++++++++++++----
 3 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index dca9b9999ae..c88b36665f7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -319,10 +319,11 @@ static inline void disable_swap_token(void)
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent);
+extern void
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
 #else
 static inline void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 {
 }
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3468c38adde..a83e0395444 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -189,6 +189,7 @@ enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_SHMEM,	/* used by page migration of shmem */
 	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
 	MEM_CGROUP_CHARGE_TYPE_SWAPOUT,	/* for accounting swapcache */
+	MEM_CGROUP_CHARGE_TYPE_DROP,	/* a page was unused swap cache */
 	NR_CHARGE_TYPE,
 };
 
@@ -1493,6 +1494,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 
 	switch (ctype) {
 	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+	case MEM_CGROUP_CHARGE_TYPE_DROP:
 		if (page_mapped(page))
 			goto unlock_out;
 		break;
@@ -1556,18 +1558,23 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
  * called after __delete_from_swap_cache() and drop "page" account.
  * memcg information is recorded to swap_cgroup of "ent"
  */
-void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+void
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 {
 	struct mem_cgroup *memcg;
+	int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
+
+	if (!swapout) /* this was a swap cache but the swap is unused ! */
+		ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
+
+	memcg = __mem_cgroup_uncharge_common(page, ctype);
 
-	memcg = __mem_cgroup_uncharge_common(page,
-					MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
 	/* record memcg information */
-	if (do_swap_account && memcg) {
+	if (do_swap_account && swapout && memcg) {
 		swap_cgroup_record(ent, css_id(&memcg->css));
 		mem_cgroup_get(memcg);
 	}
-	if (memcg)
+	if (swapout && memcg)
 		css_put(&memcg->css);
 }
 #endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 28faa01cf57..d1ade1a48ee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -583,8 +583,9 @@ static int swap_entry_free(struct swap_info_struct *p,
 			swap_list.next = p - swap_info;
 		nr_swap_pages++;
 		p->inuse_pages--;
-		mem_cgroup_uncharge_swap(ent);
 	}
+	if (!swap_count(count))
+		mem_cgroup_uncharge_swap(ent);
 	return count;
 }
 
@@ -609,12 +610,19 @@ void swap_free(swp_entry_t entry)
 void swapcache_free(swp_entry_t entry, struct page *page)
 {
 	struct swap_info_struct *p;
+	int ret;
 
-	if (page)
-		mem_cgroup_uncharge_swapcache(page, entry);
 	p = swap_info_get(entry);
 	if (p) {
-		swap_entry_free(p, entry, SWAP_CACHE);
+		ret = swap_entry_free(p, entry, SWAP_CACHE);
+		if (page) {
+			bool swapout;
+			if (ret)
+				swapout = true; /* the end of swap out */
+			else
+				swapout = false; /* no more swap users! */
+			mem_cgroup_uncharge_swapcache(page, entry, swapout);
+		}
 		spin_unlock(&swap_lock);
 	}
 	return;
-- 
cgit v1.2.3-70-g09d2


From c5b947b28828e82814605824e5db0bc58d66d8c0 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 17 Jun 2009 16:27:20 -0700
Subject: memcg: add interface to reset limits

We don't have an interface to reset mem.limit or memsw.limit now.

This patch allows to reset mem.limit or memsw.limit when they are being
set to -1.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |  1 +
 include/linux/res_counter.h      |  2 ++
 kernel/res_counter.c             | 12 +++++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index af48135bd9b..23d1262c077 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -209,6 +209,7 @@ We can alter the memory limit:
 
 NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
 mega or gigabytes.
+NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
 
 # cat /cgroups/0/memory.limit_in_bytes
 4194304
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 4c5bcf6ca7e..511f42fc681 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -49,6 +49,8 @@ struct res_counter {
 	struct res_counter *parent;
 };
 
+#define RESOURCE_MAX (unsigned long long)LLONG_MAX
+
 /**
  * Helpers to interact with userspace
  * res_counter_read_u64() - returns the value of the specified member.
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bf8e7534c80..e1338f07431 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -18,7 +18,7 @@
 void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
 	spin_lock_init(&counter->lock);
-	counter->limit = (unsigned long long)LLONG_MAX;
+	counter->limit = RESOURCE_MAX;
 	counter->parent = parent;
 }
 
@@ -133,6 +133,16 @@ int res_counter_memparse_write_strategy(const char *buf,
 					unsigned long long *res)
 {
 	char *end;
+
+	/* return RESOURCE_MAX(unlimited) if "-1" is specified */
+	if (*buf == '-') {
+		*res = simple_strtoull(buf + 1, &end, 10);
+		if (*res != 1 || *end != '\0')
+			return -EINVAL;
+		*res = RESOURCE_MAX;
+		return 0;
+	}
+
 	/* FIXME - make memparse() take const char* args */
 	*res = memparse((char *)buf, &end);
 	if (*end != '\0')
-- 
cgit v1.2.3-70-g09d2


From 1c216279539bd65c5a3d497e25d441dbddbcf1ec Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 17 Jun 2009 16:27:29 -0700
Subject: ptrace: tracehook_unsafe_exec(): remove the stale comment

tracehook_unsafe_exec() doesn't need task_lock(), remove the old comment.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index eb96603d92d..17ba82efa48 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -143,7 +143,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
  *
  * Return %LSM_UNSAFE_* bits applied to an exec because of tracing.
  *
- * Called with task_lock() held on @task.
+ * @task->cred_guard_mutex is held by the caller through the do_execve().
  */
 static inline int tracehook_unsafe_exec(struct task_struct *task)
 {
-- 
cgit v1.2.3-70-g09d2


From 8053bdd5ce15dcf043d41a4dd6cac4a5567effdc Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 17 Jun 2009 16:27:34 -0700
Subject: ptrace_get_task_struct: s/tasklist/rcu/, make it static

- Use rcu_read_lock() instead of tasklist_lock to find/get the task
  in ptrace_get_task_struct().

- Make it static, it has no callers outside of ptrace.c.

- The comment doesn't match the reality, this helper does not do
  any checks. Beacuse it is really trivial and static I removed the
  whole comment.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h |  1 -
 kernel/ptrace.c        | 16 +++-------------
 2 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 59e133d39d5..7456d7d87a1 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -81,7 +81,6 @@
 
 
 extern long arch_ptrace(struct task_struct *child, long request, long addr, long data);
-extern struct task_struct *ptrace_get_task_struct(pid_t pid);
 extern int ptrace_traceme(void);
 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 38fdfea1a15..a64fe75a48b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -581,26 +581,16 @@ int ptrace_request(struct task_struct *child, long request,
 	return ret;
 }
 
-/**
- * ptrace_get_task_struct  --  grab a task struct reference for ptrace
- * @pid:       process id to grab a task_struct reference of
- *
- * This function is a helper for ptrace implementations.  It checks
- * permissions and then grabs a task struct for use of the actual
- * ptrace implementation.
- *
- * Returns the task_struct for @pid or an ERR_PTR() on failure.
- */
-struct task_struct *ptrace_get_task_struct(pid_t pid)
+static struct task_struct *ptrace_get_task_struct(pid_t pid)
 {
 	struct task_struct *child;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	child = find_task_by_vpid(pid);
 	if (child)
 		get_task_struct(child);
+	rcu_read_unlock();
 
-	read_unlock(&tasklist_lock);
 	if (!child)
 		return ERR_PTR(-ESRCH);
 	return child;
-- 
cgit v1.2.3-70-g09d2


From 17f98dcf6010a1cfd25d179fd0ce77d3dc2685c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 17 Jun 2009 16:27:51 -0700
Subject: pids: clean up find_task_by_pid variants

find_task_by_pid_type_ns is only used to implement find_task_by_vpid and
find_task_by_pid_ns, but both of them pass PIDTYPE_PID as first argument.
So just fold find_task_by_pid_type_ns into find_task_by_pid_ns and use
find_task_by_pid_ns to implement find_task_by_vpid.

While we're at it also remove the exports for find_task_by_pid_ns and
find_task_by_vpid - we don't have any modular callers left as the only
modular caller of he old pre pid namespace find_task_by_pid (gfs2) was
switched to pid_task which operates on a struct pid pointer instead of a
pid_t.  Given the confusion about pid_t values vs namespace that's
generally the better option anyway and I think we're better of restricting
modules to do it that way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  6 ------
 kernel/pid.c          | 17 +++--------------
 2 files changed, 3 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0342101756..4d075426988 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1878,9 +1878,6 @@ extern struct pid_namespace init_pid_ns;
 /*
  * find a task by one of its numerical ids
  *
- * find_task_by_pid_type_ns():
- *      it is the most generic call - it finds a task by all id,
- *      type and namespace specified
  * find_task_by_pid_ns():
  *      finds a task by its pid in the specified namespace
  * find_task_by_vpid():
@@ -1889,9 +1886,6 @@ extern struct pid_namespace init_pid_ns;
  * see also find_vpid() etc in include/linux/pid.h
  */
 
-extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
-		struct pid_namespace *ns);
-
 extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
diff --git a/kernel/pid.c b/kernel/pid.c
index b2e5f78fd28..31310b5d3f5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -378,26 +378,15 @@ EXPORT_SYMBOL(pid_task);
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
-struct task_struct *find_task_by_pid_type_ns(int type, int nr,
-		struct pid_namespace *ns)
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
 {
-	return pid_task(find_pid_ns(nr, ns), type);
+	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type_ns);
-
 struct task_struct *find_task_by_vpid(pid_t vnr)
 {
-	return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
-			current->nsproxy->pid_ns);
-}
-EXPORT_SYMBOL(find_task_by_vpid);
-
-struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
-{
-	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
+	return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
 }
-EXPORT_SYMBOL(find_task_by_pid_ns);
 
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
-- 
cgit v1.2.3-70-g09d2


From b4188def441197d38f20e0935372780ed7c0e19d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 17 Jun 2009 16:27:56 -0700
Subject: ipcns: make free_ipc_ns() static

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  1 -
 ipc/namespace.c               | 48 +++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 3bf40e246a8..804e4e4a2b6 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -94,7 +94,6 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
 
 #if defined(CONFIG_IPC_NS)
-extern void free_ipc_ns(struct ipc_namespace *ns);
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 				       struct ipc_namespace *ns);
 extern void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 231ee5359ab..a1094ff0bef 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -83,6 +83,30 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 	up_write(&ids->rw_mutex);
 }
 
+static void free_ipc_ns(struct ipc_namespace *ns)
+{
+	/*
+	 * Unregistering the hotplug notifier at the beginning guarantees
+	 * that the ipc namespace won't be freed while we are inside the
+	 * callback routine. Since the blocking_notifier_chain_XXX routines
+	 * hold a rw lock on the notifier list, unregister_ipcns_notifier()
+	 * won't take the rw lock before blocking_notifier_call_chain() has
+	 * released the rd lock.
+	 */
+	unregister_ipcns_notifier(ns);
+	sem_exit_ns(ns);
+	msg_exit_ns(ns);
+	shm_exit_ns(ns);
+	kfree(ns);
+	atomic_dec(&nr_ipc_ns);
+
+	/*
+	 * Do the ipcns removal notification after decrementing nr_ipc_ns in
+	 * order to have a correct value when recomputing msgmni.
+	 */
+	ipcns_notify(IPCNS_REMOVED);
+}
+
 /*
  * put_ipc_ns - drop a reference to an ipc namespace.
  * @ns: the namespace to put
@@ -108,27 +132,3 @@ void put_ipc_ns(struct ipc_namespace *ns)
 		free_ipc_ns(ns);
 	}
 }
-
-void free_ipc_ns(struct ipc_namespace *ns)
-{
-	/*
-	 * Unregistering the hotplug notifier at the beginning guarantees
-	 * that the ipc namespace won't be freed while we are inside the
-	 * callback routine. Since the blocking_notifier_chain_XXX routines
-	 * hold a rw lock on the notifier list, unregister_ipcns_notifier()
-	 * won't take the rw lock before blocking_notifier_call_chain() has
-	 * released the rd lock.
-	 */
-	unregister_ipcns_notifier(ns);
-	sem_exit_ns(ns);
-	msg_exit_ns(ns);
-	shm_exit_ns(ns);
-	kfree(ns);
-	atomic_dec(&nr_ipc_ns);
-
-	/*
-	 * Do the ipcns removal notification after decrementing nr_ipc_ns in
-	 * order to have a correct value when recomputing msgmni.
-	 */
-	ipcns_notify(IPCNS_REMOVED);
-}
-- 
cgit v1.2.3-70-g09d2


From 665c7741fb63c7ceeb515f1d1ed8b016efe65bf3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 17 Jun 2009 16:27:57 -0700
Subject: ipcns: move free_ipcs() proto

Function is really private to ipc/ and avoid struct kern_ipc_perm
forward declaration.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 4 ----
 ipc/util.h                    | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 804e4e4a2b6..e408722a84c 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -96,10 +96,6 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #if defined(CONFIG_IPC_NS)
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
 				       struct ipc_namespace *ns);
-extern void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
-		      void (*free)(struct ipc_namespace *,
-				   struct kern_ipc_perm *));
-
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
 	if (ns)
diff --git a/ipc/util.h b/ipc/util.h
index f9fe90e4886..ab3ebf2621b 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -171,5 +171,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
 struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
 			struct ipc_ops *ops, struct ipc_params *params);
-
+void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
+		void (*free)(struct ipc_namespace *, struct kern_ipc_perm *));
 #endif
-- 
cgit v1.2.3-70-g09d2


From b99b87f70c7785ab1e253c6220f4b0b57ce3a7f7 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2009 16:28:03 -0700
Subject: kernel: constructor support

Call constructors (gcc-generated initcall-like functions) during kernel
start and module load.  Constructors are e.g.  used for gcov data
initialization.

Disable constructor support for usermode Linux to prevent conflicts with
host glibc.

Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Li Wei <W.Li@Sun.COM>
Cc: Michael Ellerman <michaele@au1.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Heiko Carstens <heicars2@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <mschwid2@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/sections.h    |  3 +++
 include/asm-generic/vmlinux.lds.h |  9 +++++++++
 include/linux/init.h              |  3 +++
 include/linux/module.h            |  6 ++++++
 init/Kconfig                      |  5 +++++
 init/main.c                       | 12 ++++++++++++
 kernel/module.c                   | 16 ++++++++++++++++
 7 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 4ce48e87853..d083561337f 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -14,6 +14,9 @@ extern char __kprobes_text_start[], __kprobes_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __start_rodata[], __end_rodata[];
 
+/* Start and end of .ctors section - used for constructor calls. */
+extern char __ctors_start[], __ctors_end[];
+
 /* function descriptor handling (if any).  Override
  * in asm/sections.h */
 #ifndef dereference_function_descriptor
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6bdba10fef4..55413e568f0 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -440,12 +440,21 @@
 		INIT_TASK						\
 	}
 
+#ifdef CONFIG_CONSTRUCTORS
+#define KERNEL_CTORS()	VMLINUX_SYMBOL(__ctors_start) = .; \
+			*(.ctors)			   \
+			VMLINUX_SYMBOL(__ctors_end) = .;
+#else
+#define KERNEL_CTORS()
+#endif
+
 /* init and exit section handling */
 #define INIT_DATA							\
 	*(.init.data)							\
 	DEV_DISCARD(init.data)						\
 	CPU_DISCARD(init.data)						\
 	MEM_DISCARD(init.data)						\
+	KERNEL_CTORS()							\
 	*(.init.rodata)							\
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
diff --git a/include/linux/init.h b/include/linux/init.h
index 8c2c9989626..13b633ed695 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -134,6 +134,9 @@ typedef void (*exitcall_t)(void);
 extern initcall_t __con_initcall_start[], __con_initcall_end[];
 extern initcall_t __security_initcall_start[], __security_initcall_end[];
 
+/* Used for contructor calls. */
+typedef void (*ctor_fn_t)(void);
+
 /* Defined in init/main.c */
 extern int do_one_initcall(initcall_t fn);
 extern char __initdata boot_command_line[];
diff --git a/include/linux/module.h b/include/linux/module.h
index 505f20dcc1c..098bdb7bfac 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -363,6 +363,12 @@ struct module
 	local_t ref;
 #endif
 #endif
+
+#ifdef CONFIG_CONSTRUCTORS
+	/* Constructor functions. */
+	ctor_fn_t *ctors;
+	unsigned int num_ctors;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
diff --git a/init/Kconfig b/init/Kconfig
index c4b3c6d51a7..1ce05a4cb5f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -16,6 +16,11 @@ config DEFCONFIG_LIST
 	default "$ARCH_DEFCONFIG"
 	default "arch/$ARCH/defconfig"
 
+config CONSTRUCTORS
+	bool
+	depends on !UML
+	default y
+
 menu "General setup"
 
 config EXPERIMENTAL
diff --git a/init/main.c b/init/main.c
index 0e7aedeaa05..1a65fdd0631 100644
--- a/init/main.c
+++ b/init/main.c
@@ -720,6 +720,17 @@ asmlinkage void __init start_kernel(void)
 	rest_init();
 }
 
+/* Call all constructor functions linked into the kernel. */
+static void __init do_ctors(void)
+{
+#ifdef CONFIG_CONSTRUCTORS
+	ctor_fn_t *call = (ctor_fn_t *) __ctors_start;
+
+	for (; call < (ctor_fn_t *) __ctors_end; call++)
+		(*call)();
+#endif
+}
+
 int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
@@ -800,6 +811,7 @@ static void __init do_basic_setup(void)
 	usermodehelper_init();
 	driver_init();
 	init_irq_proc();
+	do_ctors();
 	do_initcalls();
 }
 
diff --git a/kernel/module.c b/kernel/module.c
index 215aaab09e9..38928fcaff2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2216,6 +2216,10 @@ static noinline struct module *load_module(void __user *umod,
 	mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
 					    "__kcrctab_unused_gpl");
 #endif
+#ifdef CONFIG_CONSTRUCTORS
+	mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
+				  sizeof(*mod->ctors), &mod->num_ctors);
+#endif
 
 #ifdef CONFIG_MARKERS
 	mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
@@ -2389,6 +2393,17 @@ static noinline struct module *load_module(void __user *umod,
 	goto free_hdr;
 }
 
+/* Call module constructors. */
+static void do_mod_ctors(struct module *mod)
+{
+#ifdef CONFIG_CONSTRUCTORS
+	unsigned long i;
+
+	for (i = 0; i < mod->num_ctors; i++)
+		mod->ctors[i]();
+#endif
+}
+
 /* This is where the real work happens */
 SYSCALL_DEFINE3(init_module, void __user *, umod,
 		unsigned long, len, const char __user *, uargs)
@@ -2417,6 +2432,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	blocking_notifier_call_chain(&module_notify_list,
 			MODULE_STATE_COMING, mod);
 
+	do_mod_ctors(mod);
 	/* Start the module */
 	if (mod->init != NULL)
 		ret = do_one_initcall(mod->init);
-- 
cgit v1.2.3-70-g09d2


From 0b923606e75f1ab672e25b14ac039a1cdcfa382f Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2009 16:28:05 -0700
Subject: seq_file: add function to write binary data

seq_write() can be used to construct seq_files containing arbitrary data.
Required by the gcov-profiling interface to synthesize binary profiling
data files.

Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Li Wei <W.Li@Sun.COM>
Cc: Michael Ellerman <michaele@au1.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Heiko Carstens <heicars2@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <mschwid2@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/seq_file.c            | 20 ++++++++++++++++++++
 include/linux/seq_file.h |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 7f40f30c55c..6c959275f2d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -640,6 +640,26 @@ int seq_puts(struct seq_file *m, const char *s)
 }
 EXPORT_SYMBOL(seq_puts);
 
+/**
+ * seq_write - write arbitrary data to buffer
+ * @seq: seq_file identifying the buffer to which data should be written
+ * @data: data address
+ * @len: number of bytes
+ *
+ * Return 0 on success, non-zero otherwise.
+ */
+int seq_write(struct seq_file *seq, const void *data, size_t len)
+{
+	if (seq->count + len < seq->size) {
+		memcpy(seq->buf + seq->count, data, len);
+		seq->count += len;
+		return 0;
+	}
+	seq->count = seq->size;
+	return -1;
+}
+EXPORT_SYMBOL(seq_write);
+
 struct list_head *seq_list_start(struct list_head *head, loff_t pos)
 {
 	struct list_head *lh;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 004f3b3342c..0c6a86b7959 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -43,6 +43,7 @@ int seq_release(struct inode *, struct file *);
 int seq_escape(struct seq_file *, const char *, const char *);
 int seq_putc(struct seq_file *m, char c);
 int seq_puts(struct seq_file *m, const char *s);
+int seq_write(struct seq_file *seq, const void *data, size_t len);
 
 int seq_printf(struct seq_file *, const char *, ...)
 	__attribute__ ((format (printf,2,3)));
-- 
cgit v1.2.3-70-g09d2


From 2521f2c228ad750701ba4702484e31d876dbc386 Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2009 16:28:08 -0700
Subject: gcov: add gcov profiling infrastructure

Enable the use of GCC's coverage testing tool gcov [1] with the Linux
kernel.  gcov may be useful for:

 * debugging (has this code been reached at all?)
 * test improvement (how do I change my test to cover these lines?)
 * minimizing kernel configurations (do I need this option if the
   associated code is never run?)

The profiling patch incorporates the following changes:

 * change kbuild to include profiling flags
 * provide functions needed by profiling code
 * present profiling data as files in debugfs

Note that on some architectures, enabling gcc's profiling option
"-fprofile-arcs" for the entire kernel may trigger compile/link/
run-time problems, some of which are caused by toolchain bugs and
others which require adjustment of architecture code.

For this reason profiling the entire kernel is initially restricted
to those architectures for which it is known to work without changes.
This restriction can be lifted once an architecture has been tested
and found compatible with gcc's profiling. Profiling of single files
or directories is still available on all platforms (see config help
text).

[1] http://gcc.gnu.org/onlinedocs/gcc/Gcov.html

Signed-off-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Li Wei <W.Li@Sun.COM>
Cc: Michael Ellerman <michaele@au1.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Heiko Carstens <heicars2@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <mschwid2@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gcov.txt              | 246 +++++++++++++
 Documentation/kernel-parameters.txt |   7 +
 Makefile                            |  11 +-
 arch/Kconfig                        |   2 +
 include/linux/compiler-gcc3.h       |   6 +
 kernel/Makefile                     |   1 +
 kernel/gcov/Kconfig                 |  48 +++
 kernel/gcov/Makefile                |   3 +
 kernel/gcov/base.c                  | 148 ++++++++
 kernel/gcov/fs.c                    | 673 ++++++++++++++++++++++++++++++++++++
 kernel/gcov/gcc_3_4.c               | 447 ++++++++++++++++++++++++
 kernel/gcov/gcov.h                  | 128 +++++++
 scripts/Makefile.lib                |  11 +
 13 files changed, 1726 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/gcov.txt
 create mode 100644 kernel/gcov/Kconfig
 create mode 100644 kernel/gcov/Makefile
 create mode 100644 kernel/gcov/base.c
 create mode 100644 kernel/gcov/fs.c
 create mode 100644 kernel/gcov/gcc_3_4.c
 create mode 100644 kernel/gcov/gcov.h

(limited to 'include')

diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt
new file mode 100644
index 00000000000..e716aadb3a3
--- /dev/null
+++ b/Documentation/gcov.txt
@@ -0,0 +1,246 @@
+Using gcov with the Linux kernel
+================================
+
+1. Introduction
+2. Preparation
+3. Customization
+4. Files
+5. Modules
+6. Separated build and test machines
+7. Troubleshooting
+Appendix A: sample script: gather_on_build.sh
+Appendix B: sample script: gather_on_test.sh
+
+
+1. Introduction
+===============
+
+gcov profiling kernel support enables the use of GCC's coverage testing
+tool gcov [1] with the Linux kernel. Coverage data of a running kernel
+is exported in gcov-compatible format via the "gcov" debugfs directory.
+To get coverage data for a specific file, change to the kernel build
+directory and use gcov with the -o option as follows (requires root):
+
+# cd /tmp/linux-out
+# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
+
+This will create source code files annotated with execution counts
+in the current directory. In addition, graphical gcov front-ends such
+as lcov [2] can be used to automate the process of collecting data
+for the entire kernel and provide coverage overviews in HTML format.
+
+Possible uses:
+
+* debugging (has this line been reached at all?)
+* test improvement (how do I change my test to cover these lines?)
+* minimizing kernel configurations (do I need this option if the
+  associated code is never run?)
+
+--
+
+[1] http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+[2] http://ltp.sourceforge.net/coverage/lcov.php
+
+
+2. Preparation
+==============
+
+Configure the kernel with:
+
+        CONFIG_DEBUGFS=y
+        CONFIG_GCOV_KERNEL=y
+
+and to get coverage data for the entire kernel:
+
+        CONFIG_GCOV_PROFILE_ALL=y
+
+Note that kernels compiled with profiling flags will be significantly
+larger and run slower. Also CONFIG_GCOV_PROFILE_ALL may not be supported
+on all architectures.
+
+Profiling data will only become accessible once debugfs has been
+mounted:
+
+        mount -t debugfs none /sys/kernel/debug
+
+
+3. Customization
+================
+
+To enable profiling for specific files or directories, add a line
+similar to the following to the respective kernel Makefile:
+
+        For a single file (e.g. main.o):
+                GCOV_PROFILE_main.o := y
+
+        For all files in one directory:
+                GCOV_PROFILE := y
+
+To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
+is specified, use:
+
+                GCOV_PROFILE_main.o := n
+        and:
+                GCOV_PROFILE := n
+
+Only files which are linked to the main kernel image or are compiled as
+kernel modules are supported by this mechanism.
+
+
+4. Files
+========
+
+The gcov kernel support creates the following files in debugfs:
+
+        /sys/kernel/debug/gcov
+                Parent directory for all gcov-related files.
+
+        /sys/kernel/debug/gcov/reset
+                Global reset file: resets all coverage data to zero when
+                written to.
+
+        /sys/kernel/debug/gcov/path/to/compile/dir/file.gcda
+                The actual gcov data file as understood by the gcov
+                tool. Resets file coverage data to zero when written to.
+
+        /sys/kernel/debug/gcov/path/to/compile/dir/file.gcno
+                Symbolic link to a static data file required by the gcov
+                tool. This file is generated by gcc when compiling with
+                option -ftest-coverage.
+
+
+5. Modules
+==========
+
+Kernel modules may contain cleanup code which is only run during
+module unload time. The gcov mechanism provides a means to collect
+coverage data for such code by keeping a copy of the data associated
+with the unloaded module. This data remains available through debugfs.
+Once the module is loaded again, the associated coverage counters are
+initialized with the data from its previous instantiation.
+
+This behavior can be deactivated by specifying the gcov_persist kernel
+parameter:
+
+        gcov_persist=0
+
+At run-time, a user can also choose to discard data for an unloaded
+module by writing to its data file or the global reset file.
+
+
+6. Separated build and test machines
+====================================
+
+The gcov kernel profiling infrastructure is designed to work out-of-the
+box for setups where kernels are built and run on the same machine. In
+cases where the kernel runs on a separate machine, special preparations
+must be made, depending on where the gcov tool is used:
+
+a) gcov is run on the TEST machine
+
+The gcov tool version on the test machine must be compatible with the
+gcc version used for kernel build. Also the following files need to be
+copied from build to test machine:
+
+from the source tree:
+  - all C source files + headers
+
+from the build tree:
+  - all C source files + headers
+  - all .gcda and .gcno files
+  - all links to directories
+
+It is important to note that these files need to be placed into the
+exact same file system location on the test machine as on the build
+machine. If any of the path components is symbolic link, the actual
+directory needs to be used instead (due to make's CURDIR handling).
+
+b) gcov is run on the BUILD machine
+
+The following files need to be copied after each test case from test
+to build machine:
+
+from the gcov directory in sysfs:
+  - all .gcda files
+  - all links to .gcno files
+
+These files can be copied to any location on the build machine. gcov
+must then be called with the -o option pointing to that directory.
+
+Example directory setup on the build machine:
+
+  /tmp/linux:    kernel source tree
+  /tmp/out:      kernel build directory as specified by make O=
+  /tmp/coverage: location of the files copied from the test machine
+
+  [user@build] cd /tmp/out
+  [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+
+
+7. Troubleshooting
+==================
+
+Problem:  Compilation aborts during linker step.
+Cause:    Profiling flags are specified for source files which are not
+          linked to the main kernel or which are linked by a custom
+          linker procedure.
+Solution: Exclude affected source files from profiling by specifying
+          GCOV_PROFILE := n or GCOV_PROFILE_basename.o := n in the
+          corresponding Makefile.
+
+
+Appendix A: gather_on_build.sh
+==============================
+
+Sample script to gather coverage meta files on the build machine
+(see 6a):
+
+#!/bin/bash
+
+KSRC=$1
+KOBJ=$2
+DEST=$3
+
+if [ -z "$KSRC" ] || [ -z "$KOBJ" ] || [ -z "$DEST" ]; then
+  echo "Usage: $0 <ksrc directory> <kobj directory> <output.tar.gz>" >&2
+  exit 1
+fi
+
+KSRC=$(cd $KSRC; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
+KOBJ=$(cd $KOBJ; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
+
+find $KSRC $KOBJ \( -name '*.gcno' -o -name '*.[ch]' -o -type l \) -a \
+                 -perm /u+r,g+r | tar cfz $DEST -P -T -
+
+if [ $? -eq 0 ] ; then
+  echo "$DEST successfully created, copy to test system and unpack with:"
+  echo "  tar xfz $DEST -P"
+else
+  echo "Could not create file $DEST"
+fi
+
+
+Appendix B: gather_on_test.sh
+=============================
+
+Sample script to gather coverage data files on the test machine
+(see 6b):
+
+#!/bin/bash
+
+DEST=$1
+GCDA=/sys/kernel/debug/gcov
+
+if [ -z "$DEST" ] ; then
+  echo "Usage: $0 <output.tar.gz>" >&2
+  exit 1
+fi
+
+find $GCDA -name '*.gcno' -o -name '*.gcda' | tar cfz $DEST -T -
+
+if [ $? -eq 0 ] ; then
+  echo "$DEST successfully created, copy to build system and unpack with:"
+  echo "  tar xfz $DEST"
+else
+  echo "Could not create file $DEST"
+fi
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5578248c18a..08def8deb5f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -48,6 +48,7 @@ parameter is applicable:
 	EFI	EFI Partitioning (GPT) is enabled
 	EIDE	EIDE/ATAPI support is enabled.
 	FB	The frame buffer device is enabled.
+	GCOV	GCOV profiling is enabled.
 	HW	Appropriate hardware is enabled.
 	IA-64	IA-64 architecture is enabled.
 	IMA     Integrity measurement architecture is enabled.
@@ -796,6 +797,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: off | on
 			default: on
 
+	gcov_persist=	[GCOV] When non-zero (default), profiling data for
+			kernel modules is saved and remains accessible via
+			debugfs, even when the module is unloaded/reloaded.
+			When zero, profiling data is discarded and associated
+			debugfs files are removed at module unload time.
+
 	gdth=		[HW,SCSI]
 			See header of drivers/scsi/gdth.c.
 
diff --git a/Makefile b/Makefile
index ea63667617f..46e1c9d03d5 100644
--- a/Makefile
+++ b/Makefile
@@ -330,6 +330,7 @@ AFLAGS_MODULE   = $(MODFLAGS)
 LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
+CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
 
 
 # Use LINUXINCLUDE when you must reference the include/ directory.
@@ -356,7 +357,7 @@ export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 
 # When compiling out-of-tree modules, put MODVERDIR in the module
@@ -1216,8 +1217,8 @@ clean: archclean $(clean-dirs)
 		\( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
 		-o -name '*.symtypes' -o -name 'modules.order' \
-		-o -name 'Module.markers' -o -name '.tmp_*.o.*' \) \
-		-type f -print | xargs rm -f
+		-o -name 'Module.markers' -o -name '.tmp_*.o.*' \
+		-o -name '*.gcno' \) -type f -print | xargs rm -f
 
 # mrproper - Delete all generated files, including .config
 #
@@ -1421,8 +1422,8 @@ clean: $(clean-dirs)
 	$(call cmd,rmfiles)
 	@find $(KBUILD_EXTMOD) $(RCS_FIND_IGNORE) \
 		\( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
-		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \
-		-type f -print | xargs rm -f
+		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
+		-o -name '*.gcno' \) -type f -print | xargs rm -f
 
 help:
 	@echo  '  Building external modules.'
diff --git a/arch/Kconfig b/arch/Kconfig
index 78a35e9dc10..99193b16023 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -112,3 +112,5 @@ config HAVE_DMA_API_DEBUG
 
 config HAVE_DEFAULT_NO_SPIN_MUTEXES
 	bool
+
+source "kernel/gcov/Kconfig"
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index 8005effc04f..b721129e046 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -16,6 +16,12 @@
 #define __must_check		__attribute__((warn_unused_result))
 #endif
 
+#ifdef CONFIG_GCOV_KERNEL
+# if __GNUC_MINOR__ < 4
+#   error "GCOV profiling support for gcc versions below 3.4 not included"
+# endif /* __GNUC_MINOR__ */
+#endif /* CONFIG_GCOV_KERNEL */
+
 /*
  * A trick to suppress uninitialized variable warning without generating any
  * code
diff --git a/kernel/Makefile b/kernel/Makefile
index 9df4501cb92..0a32cb21ec9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
new file mode 100644
index 00000000000..aab593abfba
--- /dev/null
+++ b/kernel/gcov/Kconfig
@@ -0,0 +1,48 @@
+menu "GCOV-based kernel profiling"
+
+config GCOV_KERNEL
+	bool "Enable gcov-based kernel profiling"
+	depends on DEBUG_FS && CONSTRUCTORS
+	default n
+	---help---
+	This option enables gcov-based code profiling (e.g. for code coverage
+	measurements).
+
+	If unsure, say N.
+
+	Additionally specify CONFIG_GCOV_PROFILE_ALL=y to get profiling data
+	for the entire kernel. To enable profiling for specific files or
+	directories, add a line similar to the following to the respective
+	Makefile:
+
+	For a single file (e.g. main.o):
+	        GCOV_PROFILE_main.o := y
+
+	For all files in one directory:
+	        GCOV_PROFILE := y
+
+	To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
+	is specified, use:
+
+	        GCOV_PROFILE_main.o := n
+	and:
+	        GCOV_PROFILE := n
+
+	Note that the debugfs filesystem has to be mounted to access
+	profiling data.
+
+config GCOV_PROFILE_ALL
+	bool "Profile entire Kernel"
+	depends on GCOV_KERNEL
+	depends on S390 || X86_32
+	default n
+	---help---
+	This options activates profiling for the entire kernel.
+
+	If unsure, say N.
+
+	Note that a kernel compiled with profiling flags will be significantly
+	larger and run slower. Also be sure to exclude files from profiling
+	which are not linked to the kernel image to prevent linker errors.
+
+endmenu
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
new file mode 100644
index 00000000000..3f761001d51
--- /dev/null
+++ b/kernel/gcov/Makefile
@@ -0,0 +1,3 @@
+EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
+
+obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
new file mode 100644
index 00000000000..9b22d03cc58
--- /dev/null
+++ b/kernel/gcov/base.c
@@ -0,0 +1,148 @@
+/*
+ *  This code maintains a list of active profiling data structures.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ *    Based on the gcov-kernel patch by:
+ *		 Hubertus Franke <frankeh@us.ibm.com>
+ *		 Nigel Hinds <nhinds@us.ibm.com>
+ *		 Rajan Ravindran <rajancr@us.ibm.com>
+ *		 Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *		 Paul Larson
+ */
+
+#define pr_fmt(fmt)	"gcov: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include "gcov.h"
+
+static struct gcov_info *gcov_info_head;
+static int gcov_events_enabled;
+static DEFINE_MUTEX(gcov_lock);
+
+/*
+ * __gcov_init is called by gcc-generated constructor code for each object
+ * file compiled with -fprofile-arcs.
+ */
+void __gcov_init(struct gcov_info *info)
+{
+	static unsigned int gcov_version;
+
+	mutex_lock(&gcov_lock);
+	if (gcov_version == 0) {
+		gcov_version = info->version;
+		/*
+		 * Printing gcc's version magic may prove useful for debugging
+		 * incompatibility reports.
+		 */
+		pr_info("version magic: 0x%x\n", gcov_version);
+	}
+	/*
+	 * Add new profiling data structure to list and inform event
+	 * listener.
+	 */
+	info->next = gcov_info_head;
+	gcov_info_head = info;
+	if (gcov_events_enabled)
+		gcov_event(GCOV_ADD, info);
+	mutex_unlock(&gcov_lock);
+}
+EXPORT_SYMBOL(__gcov_init);
+
+/*
+ * These functions may be referenced by gcc-generated profiling code but serve
+ * no function for kernel profiling.
+ */
+void __gcov_flush(void)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_flush);
+
+void __gcov_merge_add(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_add);
+
+void __gcov_merge_single(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_single);
+
+void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_merge_delta);
+
+/**
+ * gcov_enable_events - enable event reporting through gcov_event()
+ *
+ * Turn on reporting of profiling data load/unload-events through the
+ * gcov_event() callback. Also replay all previous events once. This function
+ * is needed because some events are potentially generated too early for the
+ * callback implementation to handle them initially.
+ */
+void gcov_enable_events(void)
+{
+	struct gcov_info *info;
+
+	mutex_lock(&gcov_lock);
+	gcov_events_enabled = 1;
+	/* Perform event callback for previously registered entries. */
+	for (info = gcov_info_head; info; info = info->next)
+		gcov_event(GCOV_ADD, info);
+	mutex_unlock(&gcov_lock);
+}
+
+#ifdef CONFIG_MODULES
+static inline int within(void *addr, void *start, unsigned long size)
+{
+	return ((addr >= start) && (addr < start + size));
+}
+
+/* Update list and generate events when modules are unloaded. */
+static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
+				void *data)
+{
+	struct module *mod = data;
+	struct gcov_info *info;
+	struct gcov_info *prev;
+
+	if (event != MODULE_STATE_GOING)
+		return NOTIFY_OK;
+	mutex_lock(&gcov_lock);
+	prev = NULL;
+	/* Remove entries located in module from linked list. */
+	for (info = gcov_info_head; info; info = info->next) {
+		if (within(info, mod->module_core, mod->core_size)) {
+			if (prev)
+				prev->next = info->next;
+			else
+				gcov_info_head = info->next;
+			if (gcov_events_enabled)
+				gcov_event(GCOV_REMOVE, info);
+		} else
+			prev = info;
+	}
+	mutex_unlock(&gcov_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gcov_nb = {
+	.notifier_call	= gcov_module_notifier,
+};
+
+static int __init gcov_init(void)
+{
+	return register_module_notifier(&gcov_nb);
+}
+device_initcall(gcov_init);
+#endif /* CONFIG_MODULES */
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
new file mode 100644
index 00000000000..ef3c3f88a7a
--- /dev/null
+++ b/kernel/gcov/fs.c
@@ -0,0 +1,673 @@
+/*
+ *  This code exports profiling data as debugfs files to userspace.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ *    Based on the gcov-kernel patch by:
+ *		 Hubertus Franke <frankeh@us.ibm.com>
+ *		 Nigel Hinds <nhinds@us.ibm.com>
+ *		 Rajan Ravindran <rajancr@us.ibm.com>
+ *		 Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *		 Paul Larson
+ *		 Yi CDL Yang
+ */
+
+#define pr_fmt(fmt)	"gcov: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
+#include "gcov.h"
+
+/**
+ * struct gcov_node - represents a debugfs entry
+ * @list: list head for child node list
+ * @children: child nodes
+ * @all: list head for list of all nodes
+ * @parent: parent node
+ * @info: associated profiling data structure if not a directory
+ * @ghost: when an object file containing profiling data is unloaded we keep a
+ *         copy of the profiling data here to allow collecting coverage data
+ *         for cleanup code. Such a node is called a "ghost".
+ * @dentry: main debugfs entry, either a directory or data file
+ * @links: associated symbolic links
+ * @name: data file basename
+ *
+ * struct gcov_node represents an entity within the gcov/ subdirectory
+ * of debugfs. There are directory and data file nodes. The latter represent
+ * the actual synthesized data file plus any associated symbolic links which
+ * are needed by the gcov tool to work correctly.
+ */
+struct gcov_node {
+	struct list_head list;
+	struct list_head children;
+	struct list_head all;
+	struct gcov_node *parent;
+	struct gcov_info *info;
+	struct gcov_info *ghost;
+	struct dentry *dentry;
+	struct dentry **links;
+	char name[0];
+};
+
+static const char objtree[] = OBJTREE;
+static const char srctree[] = SRCTREE;
+static struct gcov_node root_node;
+static struct dentry *reset_dentry;
+static LIST_HEAD(all_head);
+static DEFINE_MUTEX(node_lock);
+
+/* If non-zero, keep copies of profiling data for unloaded modules. */
+static int gcov_persist = 1;
+
+static int __init gcov_persist_setup(char *str)
+{
+	unsigned long val;
+
+	if (strict_strtoul(str, 0, &val)) {
+		pr_warning("invalid gcov_persist parameter '%s'\n", str);
+		return 0;
+	}
+	gcov_persist = val;
+	pr_info("setting gcov_persist to %d\n", gcov_persist);
+
+	return 1;
+}
+__setup("gcov_persist=", gcov_persist_setup);
+
+/*
+ * seq_file.start() implementation for gcov data files. Note that the
+ * gcov_iterator interface is designed to be more restrictive than seq_file
+ * (no start from arbitrary position, etc.), to simplify the iterator
+ * implementation.
+ */
+static void *gcov_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	loff_t i;
+
+	gcov_iter_start(seq->private);
+	for (i = 0; i < *pos; i++) {
+		if (gcov_iter_next(seq->private))
+			return NULL;
+	}
+	return seq->private;
+}
+
+/* seq_file.next() implementation for gcov data files. */
+static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos)
+{
+	struct gcov_iterator *iter = data;
+
+	if (gcov_iter_next(iter))
+		return NULL;
+	(*pos)++;
+
+	return iter;
+}
+
+/* seq_file.show() implementation for gcov data files. */
+static int gcov_seq_show(struct seq_file *seq, void *data)
+{
+	struct gcov_iterator *iter = data;
+
+	if (gcov_iter_write(iter, seq))
+		return -EINVAL;
+	return 0;
+}
+
+static void gcov_seq_stop(struct seq_file *seq, void *data)
+{
+	/* Unused. */
+}
+
+static const struct seq_operations gcov_seq_ops = {
+	.start	= gcov_seq_start,
+	.next	= gcov_seq_next,
+	.show	= gcov_seq_show,
+	.stop	= gcov_seq_stop,
+};
+
+/*
+ * Return the profiling data set for a given node. This can either be the
+ * original profiling data structure or a duplicate (also called "ghost")
+ * in case the associated object file has been unloaded.
+ */
+static struct gcov_info *get_node_info(struct gcov_node *node)
+{
+	if (node->info)
+		return node->info;
+
+	return node->ghost;
+}
+
+/*
+ * open() implementation for gcov data files. Create a copy of the profiling
+ * data set and initialize the iterator and seq_file interface.
+ */
+static int gcov_seq_open(struct inode *inode, struct file *file)
+{
+	struct gcov_node *node = inode->i_private;
+	struct gcov_iterator *iter;
+	struct seq_file *seq;
+	struct gcov_info *info;
+	int rc = -ENOMEM;
+
+	mutex_lock(&node_lock);
+	/*
+	 * Read from a profiling data copy to minimize reference tracking
+	 * complexity and concurrent access.
+	 */
+	info = gcov_info_dup(get_node_info(node));
+	if (!info)
+		goto out_unlock;
+	iter = gcov_iter_new(info);
+	if (!iter)
+		goto err_free_info;
+	rc = seq_open(file, &gcov_seq_ops);
+	if (rc)
+		goto err_free_iter_info;
+	seq = file->private_data;
+	seq->private = iter;
+out_unlock:
+	mutex_unlock(&node_lock);
+	return rc;
+
+err_free_iter_info:
+	gcov_iter_free(iter);
+err_free_info:
+	gcov_info_free(info);
+	goto out_unlock;
+}
+
+/*
+ * release() implementation for gcov data files. Release resources allocated
+ * by open().
+ */
+static int gcov_seq_release(struct inode *inode, struct file *file)
+{
+	struct gcov_iterator *iter;
+	struct gcov_info *info;
+	struct seq_file *seq;
+
+	seq = file->private_data;
+	iter = seq->private;
+	info = gcov_iter_get_info(iter);
+	gcov_iter_free(iter);
+	gcov_info_free(info);
+	seq_release(inode, file);
+
+	return 0;
+}
+
+/*
+ * Find a node by the associated data file name. Needs to be called with
+ * node_lock held.
+ */
+static struct gcov_node *get_node_by_name(const char *name)
+{
+	struct gcov_node *node;
+	struct gcov_info *info;
+
+	list_for_each_entry(node, &all_head, all) {
+		info = get_node_info(node);
+		if (info && (strcmp(info->filename, name) == 0))
+			return node;
+	}
+
+	return NULL;
+}
+
+static void remove_node(struct gcov_node *node);
+
+/*
+ * write() implementation for gcov data files. Reset profiling data for the
+ * associated file. If the object file has been unloaded (i.e. this is
+ * a "ghost" node), remove the debug fs node as well.
+ */
+static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+			      size_t len, loff_t *pos)
+{
+	struct seq_file *seq;
+	struct gcov_info *info;
+	struct gcov_node *node;
+
+	seq = file->private_data;
+	info = gcov_iter_get_info(seq->private);
+	mutex_lock(&node_lock);
+	node = get_node_by_name(info->filename);
+	if (node) {
+		/* Reset counts or remove node for unloaded modules. */
+		if (node->ghost)
+			remove_node(node);
+		else
+			gcov_info_reset(node->info);
+	}
+	/* Reset counts for open file. */
+	gcov_info_reset(info);
+	mutex_unlock(&node_lock);
+
+	return len;
+}
+
+/*
+ * Given a string <path> representing a file path of format:
+ *   path/to/file.gcda
+ * construct and return a new string:
+ *   <dir/>path/to/file.<ext>
+ */
+static char *link_target(const char *dir, const char *path, const char *ext)
+{
+	char *target;
+	char *old_ext;
+	char *copy;
+
+	copy = kstrdup(path, GFP_KERNEL);
+	if (!copy)
+		return NULL;
+	old_ext = strrchr(copy, '.');
+	if (old_ext)
+		*old_ext = '\0';
+	if (dir)
+		target = kasprintf(GFP_KERNEL, "%s/%s.%s", dir, copy, ext);
+	else
+		target = kasprintf(GFP_KERNEL, "%s.%s", copy, ext);
+	kfree(copy);
+
+	return target;
+}
+
+/*
+ * Construct a string representing the symbolic link target for the given
+ * gcov data file name and link type. Depending on the link type and the
+ * location of the data file, the link target can either point to a
+ * subdirectory of srctree, objtree or in an external location.
+ */
+static char *get_link_target(const char *filename, const struct gcov_link *ext)
+{
+	const char *rel;
+	char *result;
+
+	if (strncmp(filename, objtree, strlen(objtree)) == 0) {
+		rel = filename + strlen(objtree) + 1;
+		if (ext->dir == SRC_TREE)
+			result = link_target(srctree, rel, ext->ext);
+		else
+			result = link_target(objtree, rel, ext->ext);
+	} else {
+		/* External compilation. */
+		result = link_target(NULL, filename, ext->ext);
+	}
+
+	return result;
+}
+
+#define SKEW_PREFIX	".tmp_"
+
+/*
+ * For a filename .tmp_filename.ext return filename.ext. Needed to compensate
+ * for filename skewing caused by the mod-versioning mechanism.
+ */
+static const char *deskew(const char *basename)
+{
+	if (strncmp(basename, SKEW_PREFIX, sizeof(SKEW_PREFIX) - 1) == 0)
+		return basename + sizeof(SKEW_PREFIX) - 1;
+	return basename;
+}
+
+/*
+ * Create links to additional files (usually .c and .gcno files) which the
+ * gcov tool expects to find in the same directory as the gcov data file.
+ */
+static void add_links(struct gcov_node *node, struct dentry *parent)
+{
+	char *basename;
+	char *target;
+	int num;
+	int i;
+
+	for (num = 0; gcov_link[num].ext; num++)
+		/* Nothing. */;
+	node->links = kcalloc(num, sizeof(struct dentry *), GFP_KERNEL);
+	if (!node->links)
+		return;
+	for (i = 0; i < num; i++) {
+		target = get_link_target(get_node_info(node)->filename,
+					 &gcov_link[i]);
+		if (!target)
+			goto out_err;
+		basename = strrchr(target, '/');
+		if (!basename)
+			goto out_err;
+		basename++;
+		node->links[i] = debugfs_create_symlink(deskew(basename),
+							parent,	target);
+		if (!node->links[i])
+			goto out_err;
+		kfree(target);
+	}
+
+	return;
+out_err:
+	kfree(target);
+	while (i-- > 0)
+		debugfs_remove(node->links[i]);
+	kfree(node->links);
+	node->links = NULL;
+}
+
+static const struct file_operations gcov_data_fops = {
+	.open		= gcov_seq_open,
+	.release	= gcov_seq_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.write		= gcov_seq_write,
+};
+
+/* Basic initialization of a new node. */
+static void init_node(struct gcov_node *node, struct gcov_info *info,
+		      const char *name, struct gcov_node *parent)
+{
+	INIT_LIST_HEAD(&node->list);
+	INIT_LIST_HEAD(&node->children);
+	INIT_LIST_HEAD(&node->all);
+	node->info = info;
+	node->parent = parent;
+	if (name)
+		strcpy(node->name, name);
+}
+
+/*
+ * Create a new node and associated debugfs entry. Needs to be called with
+ * node_lock held.
+ */
+static struct gcov_node *new_node(struct gcov_node *parent,
+				  struct gcov_info *info, const char *name)
+{
+	struct gcov_node *node;
+
+	node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
+	if (!node) {
+		pr_warning("out of memory\n");
+		return NULL;
+	}
+	init_node(node, info, name, parent);
+	/* Differentiate between gcov data file nodes and directory nodes. */
+	if (info) {
+		node->dentry = debugfs_create_file(deskew(node->name), 0600,
+					parent->dentry, node, &gcov_data_fops);
+	} else
+		node->dentry = debugfs_create_dir(node->name, parent->dentry);
+	if (!node->dentry) {
+		pr_warning("could not create file\n");
+		kfree(node);
+		return NULL;
+	}
+	if (info)
+		add_links(node, parent->dentry);
+	list_add(&node->list, &parent->children);
+	list_add(&node->all, &all_head);
+
+	return node;
+}
+
+/* Remove symbolic links associated with node. */
+static void remove_links(struct gcov_node *node)
+{
+	int i;
+
+	if (!node->links)
+		return;
+	for (i = 0; gcov_link[i].ext; i++)
+		debugfs_remove(node->links[i]);
+	kfree(node->links);
+	node->links = NULL;
+}
+
+/*
+ * Remove node from all lists and debugfs and release associated resources.
+ * Needs to be called with node_lock held.
+ */
+static void release_node(struct gcov_node *node)
+{
+	list_del(&node->list);
+	list_del(&node->all);
+	debugfs_remove(node->dentry);
+	remove_links(node);
+	if (node->ghost)
+		gcov_info_free(node->ghost);
+	kfree(node);
+}
+
+/* Release node and empty parents. Needs to be called with node_lock held. */
+static void remove_node(struct gcov_node *node)
+{
+	struct gcov_node *parent;
+
+	while ((node != &root_node) && list_empty(&node->children)) {
+		parent = node->parent;
+		release_node(node);
+		node = parent;
+	}
+}
+
+/*
+ * Find child node with given basename. Needs to be called with node_lock
+ * held.
+ */
+static struct gcov_node *get_child_by_name(struct gcov_node *parent,
+					   const char *name)
+{
+	struct gcov_node *node;
+
+	list_for_each_entry(node, &parent->children, list) {
+		if (strcmp(node->name, name) == 0)
+			return node;
+	}
+
+	return NULL;
+}
+
+/*
+ * write() implementation for reset file. Reset all profiling data to zero
+ * and remove ghost nodes.
+ */
+static ssize_t reset_write(struct file *file, const char __user *addr,
+			   size_t len, loff_t *pos)
+{
+	struct gcov_node *node;
+
+	mutex_lock(&node_lock);
+restart:
+	list_for_each_entry(node, &all_head, all) {
+		if (node->info)
+			gcov_info_reset(node->info);
+		else if (list_empty(&node->children)) {
+			remove_node(node);
+			/* Several nodes may have gone - restart loop. */
+			goto restart;
+		}
+	}
+	mutex_unlock(&node_lock);
+
+	return len;
+}
+
+/* read() implementation for reset file. Unused. */
+static ssize_t reset_read(struct file *file, char __user *addr, size_t len,
+			  loff_t *pos)
+{
+	/* Allow read operation so that a recursive copy won't fail. */
+	return 0;
+}
+
+static const struct file_operations gcov_reset_fops = {
+	.write	= reset_write,
+	.read	= reset_read,
+};
+
+/*
+ * Create a node for a given profiling data set and add it to all lists and
+ * debugfs. Needs to be called with node_lock held.
+ */
+static void add_node(struct gcov_info *info)
+{
+	char *filename;
+	char *curr;
+	char *next;
+	struct gcov_node *parent;
+	struct gcov_node *node;
+
+	filename = kstrdup(info->filename, GFP_KERNEL);
+	if (!filename)
+		return;
+	parent = &root_node;
+	/* Create directory nodes along the path. */
+	for (curr = filename; (next = strchr(curr, '/')); curr = next + 1) {
+		if (curr == next)
+			continue;
+		*next = 0;
+		if (strcmp(curr, ".") == 0)
+			continue;
+		if (strcmp(curr, "..") == 0) {
+			if (!parent->parent)
+				goto err_remove;
+			parent = parent->parent;
+			continue;
+		}
+		node = get_child_by_name(parent, curr);
+		if (!node) {
+			node = new_node(parent, NULL, curr);
+			if (!node)
+				goto err_remove;
+		}
+		parent = node;
+	}
+	/* Create file node. */
+	node = new_node(parent, info, curr);
+	if (!node)
+		goto err_remove;
+out:
+	kfree(filename);
+	return;
+
+err_remove:
+	remove_node(parent);
+	goto out;
+}
+
+/*
+ * The profiling data set associated with this node is being unloaded. Store a
+ * copy of the profiling data and turn this node into a "ghost".
+ */
+static int ghost_node(struct gcov_node *node)
+{
+	node->ghost = gcov_info_dup(node->info);
+	if (!node->ghost) {
+		pr_warning("could not save data for '%s' (out of memory)\n",
+			   node->info->filename);
+		return -ENOMEM;
+	}
+	node->info = NULL;
+
+	return 0;
+}
+
+/*
+ * Profiling data for this node has been loaded again. Add profiling data
+ * from previous instantiation and turn this node into a regular node.
+ */
+static void revive_node(struct gcov_node *node, struct gcov_info *info)
+{
+	if (gcov_info_is_compatible(node->ghost, info))
+		gcov_info_add(info, node->ghost);
+	else {
+		pr_warning("discarding saved data for '%s' (version changed)\n",
+			   info->filename);
+	}
+	gcov_info_free(node->ghost);
+	node->ghost = NULL;
+	node->info = info;
+}
+
+/*
+ * Callback to create/remove profiling files when code compiled with
+ * -fprofile-arcs is loaded/unloaded.
+ */
+void gcov_event(enum gcov_action action, struct gcov_info *info)
+{
+	struct gcov_node *node;
+
+	mutex_lock(&node_lock);
+	node = get_node_by_name(info->filename);
+	switch (action) {
+	case GCOV_ADD:
+		/* Add new node or revive ghost. */
+		if (!node) {
+			add_node(info);
+			break;
+		}
+		if (gcov_persist)
+			revive_node(node, info);
+		else {
+			pr_warning("could not add '%s' (already exists)\n",
+				   info->filename);
+		}
+		break;
+	case GCOV_REMOVE:
+		/* Remove node or turn into ghost. */
+		if (!node) {
+			pr_warning("could not remove '%s' (not found)\n",
+				   info->filename);
+			break;
+		}
+		if (gcov_persist) {
+			if (!ghost_node(node))
+				break;
+		}
+		remove_node(node);
+		break;
+	}
+	mutex_unlock(&node_lock);
+}
+
+/* Create debugfs entries. */
+static __init int gcov_fs_init(void)
+{
+	int rc = -EIO;
+
+	init_node(&root_node, NULL, NULL, NULL);
+	/*
+	 * /sys/kernel/debug/gcov will be parent for the reset control file
+	 * and all profiling files.
+	 */
+	root_node.dentry = debugfs_create_dir("gcov", NULL);
+	if (!root_node.dentry)
+		goto err_remove;
+	/*
+	 * Create reset file which resets all profiling counts when written
+	 * to.
+	 */
+	reset_dentry = debugfs_create_file("reset", 0600, root_node.dentry,
+					   NULL, &gcov_reset_fops);
+	if (!reset_dentry)
+		goto err_remove;
+	/* Replay previous events to get our fs hierarchy up-to-date. */
+	gcov_enable_events();
+	return 0;
+
+err_remove:
+	pr_err("init failed\n");
+	if (root_node.dentry)
+		debugfs_remove(root_node.dentry);
+
+	return rc;
+}
+device_initcall(gcov_fs_init);
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
new file mode 100644
index 00000000000..ae5bb426003
--- /dev/null
+++ b/kernel/gcov/gcc_3_4.c
@@ -0,0 +1,447 @@
+/*
+ *  This code provides functions to handle gcc's profiling data format
+ *  introduced with gcc 3.4. Future versions of gcc may change the gcov
+ *  format (as happened before), so all format-specific information needs
+ *  to be kept modular and easily exchangeable.
+ *
+ *  This file is based on gcc-internal definitions. Functions and data
+ *  structures are defined to be compatible with gcc counterparts.
+ *  For a better understanding, refer to gcc source: gcc/gcov-io.h.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include "gcov.h"
+
+/* Symbolic links to be created for each profiling data file. */
+const struct gcov_link gcov_link[] = {
+	{ OBJ_TREE, "gcno" },	/* Link to .gcno file in $(objtree). */
+	{ 0, NULL},
+};
+
+/*
+ * Determine whether a counter is active. Based on gcc magic. Doesn't change
+ * at run-time.
+ */
+static int counter_active(struct gcov_info *info, unsigned int type)
+{
+	return (1 << type) & info->ctr_mask;
+}
+
+/* Determine number of active counters. Based on gcc magic. */
+static unsigned int num_counter_active(struct gcov_info *info)
+{
+	unsigned int i;
+	unsigned int result = 0;
+
+	for (i = 0; i < GCOV_COUNTERS; i++) {
+		if (counter_active(info, i))
+			result++;
+	}
+	return result;
+}
+
+/**
+ * gcov_info_reset - reset profiling data to zero
+ * @info: profiling data set
+ */
+void gcov_info_reset(struct gcov_info *info)
+{
+	unsigned int active = num_counter_active(info);
+	unsigned int i;
+
+	for (i = 0; i < active; i++) {
+		memset(info->counts[i].values, 0,
+		       info->counts[i].num * sizeof(gcov_type));
+	}
+}
+
+/**
+ * gcov_info_is_compatible - check if profiling data can be added
+ * @info1: first profiling data set
+ * @info2: second profiling data set
+ *
+ * Returns non-zero if profiling data can be added, zero otherwise.
+ */
+int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2)
+{
+	return (info1->stamp == info2->stamp);
+}
+
+/**
+ * gcov_info_add - add up profiling data
+ * @dest: profiling data set to which data is added
+ * @source: profiling data set which is added
+ *
+ * Adds profiling counts of @source to @dest.
+ */
+void gcov_info_add(struct gcov_info *dest, struct gcov_info *source)
+{
+	unsigned int i;
+	unsigned int j;
+
+	for (i = 0; i < num_counter_active(dest); i++) {
+		for (j = 0; j < dest->counts[i].num; j++) {
+			dest->counts[i].values[j] +=
+				source->counts[i].values[j];
+		}
+	}
+}
+
+/* Get size of function info entry. Based on gcc magic. */
+static size_t get_fn_size(struct gcov_info *info)
+{
+	size_t size;
+
+	size = sizeof(struct gcov_fn_info) + num_counter_active(info) *
+	       sizeof(unsigned int);
+	if (__alignof__(struct gcov_fn_info) > sizeof(unsigned int))
+		size = ALIGN(size, __alignof__(struct gcov_fn_info));
+	return size;
+}
+
+/* Get address of function info entry. Based on gcc magic. */
+static struct gcov_fn_info *get_fn_info(struct gcov_info *info, unsigned int fn)
+{
+	return (struct gcov_fn_info *)
+		((char *) info->functions + fn * get_fn_size(info));
+}
+
+/**
+ * gcov_info_dup - duplicate profiling data set
+ * @info: profiling data set to duplicate
+ *
+ * Return newly allocated duplicate on success, %NULL on error.
+ */
+struct gcov_info *gcov_info_dup(struct gcov_info *info)
+{
+	struct gcov_info *dup;
+	unsigned int i;
+	unsigned int active;
+
+	/* Duplicate gcov_info. */
+	active = num_counter_active(info);
+	dup = kzalloc(sizeof(struct gcov_info) +
+		      sizeof(struct gcov_ctr_info) * active, GFP_KERNEL);
+	if (!dup)
+		return NULL;
+	dup->version		= info->version;
+	dup->stamp		= info->stamp;
+	dup->n_functions	= info->n_functions;
+	dup->ctr_mask		= info->ctr_mask;
+	/* Duplicate filename. */
+	dup->filename		= kstrdup(info->filename, GFP_KERNEL);
+	if (!dup->filename)
+		goto err_free;
+	/* Duplicate table of functions. */
+	dup->functions = kmemdup(info->functions, info->n_functions *
+				 get_fn_size(info), GFP_KERNEL);
+	if (!dup->functions)
+		goto err_free;
+	/* Duplicate counter arrays. */
+	for (i = 0; i < active ; i++) {
+		struct gcov_ctr_info *ctr = &info->counts[i];
+		size_t size = ctr->num * sizeof(gcov_type);
+
+		dup->counts[i].num = ctr->num;
+		dup->counts[i].merge = ctr->merge;
+		dup->counts[i].values = vmalloc(size);
+		if (!dup->counts[i].values)
+			goto err_free;
+		memcpy(dup->counts[i].values, ctr->values, size);
+	}
+	return dup;
+
+err_free:
+	gcov_info_free(dup);
+	return NULL;
+}
+
+/**
+ * gcov_info_free - release memory for profiling data set duplicate
+ * @info: profiling data set duplicate to free
+ */
+void gcov_info_free(struct gcov_info *info)
+{
+	unsigned int active = num_counter_active(info);
+	unsigned int i;
+
+	for (i = 0; i < active ; i++)
+		vfree(info->counts[i].values);
+	kfree(info->functions);
+	kfree(info->filename);
+	kfree(info);
+}
+
+/**
+ * struct type_info - iterator helper array
+ * @ctr_type: counter type
+ * @offset: index of the first value of the current function for this type
+ *
+ * This array is needed to convert the in-memory data format into the in-file
+ * data format:
+ *
+ * In-memory:
+ *   for each counter type
+ *     for each function
+ *       values
+ *
+ * In-file:
+ *   for each function
+ *     for each counter type
+ *       values
+ *
+ * See gcc source gcc/gcov-io.h for more information on data organization.
+ */
+struct type_info {
+	int ctr_type;
+	unsigned int offset;
+};
+
+/**
+ * struct gcov_iterator - specifies current file position in logical records
+ * @info: associated profiling data
+ * @record: record type
+ * @function: function number
+ * @type: counter type
+ * @count: index into values array
+ * @num_types: number of counter types
+ * @type_info: helper array to get values-array offset for current function
+ */
+struct gcov_iterator {
+	struct gcov_info *info;
+
+	int record;
+	unsigned int function;
+	unsigned int type;
+	unsigned int count;
+
+	int num_types;
+	struct type_info type_info[0];
+};
+
+static struct gcov_fn_info *get_func(struct gcov_iterator *iter)
+{
+	return get_fn_info(iter->info, iter->function);
+}
+
+static struct type_info *get_type(struct gcov_iterator *iter)
+{
+	return &iter->type_info[iter->type];
+}
+
+/**
+ * gcov_iter_new - allocate and initialize profiling data iterator
+ * @info: profiling data set to be iterated
+ *
+ * Return file iterator on success, %NULL otherwise.
+ */
+struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
+{
+	struct gcov_iterator *iter;
+
+	iter = kzalloc(sizeof(struct gcov_iterator) +
+		       num_counter_active(info) * sizeof(struct type_info),
+		       GFP_KERNEL);
+	if (iter)
+		iter->info = info;
+
+	return iter;
+}
+
+/**
+ * gcov_iter_free - release memory for iterator
+ * @iter: file iterator to free
+ */
+void gcov_iter_free(struct gcov_iterator *iter)
+{
+	kfree(iter);
+}
+
+/**
+ * gcov_iter_get_info - return profiling data set for given file iterator
+ * @iter: file iterator
+ */
+struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
+{
+	return iter->info;
+}
+
+/**
+ * gcov_iter_start - reset file iterator to starting position
+ * @iter: file iterator
+ */
+void gcov_iter_start(struct gcov_iterator *iter)
+{
+	int i;
+
+	iter->record = 0;
+	iter->function = 0;
+	iter->type = 0;
+	iter->count = 0;
+	iter->num_types = 0;
+	for (i = 0; i < GCOV_COUNTERS; i++) {
+		if (counter_active(iter->info, i)) {
+			iter->type_info[iter->num_types].ctr_type = i;
+			iter->type_info[iter->num_types++].offset = 0;
+		}
+	}
+}
+
+/* Mapping of logical record number to actual file content. */
+#define RECORD_FILE_MAGIC	0
+#define RECORD_GCOV_VERSION	1
+#define RECORD_TIME_STAMP	2
+#define RECORD_FUNCTION_TAG	3
+#define RECORD_FUNCTON_TAG_LEN	4
+#define RECORD_FUNCTION_IDENT	5
+#define RECORD_FUNCTION_CHECK	6
+#define RECORD_COUNT_TAG	7
+#define RECORD_COUNT_LEN	8
+#define RECORD_COUNT		9
+
+/**
+ * gcov_iter_next - advance file iterator to next logical record
+ * @iter: file iterator
+ *
+ * Return zero if new position is valid, non-zero if iterator has reached end.
+ */
+int gcov_iter_next(struct gcov_iterator *iter)
+{
+	switch (iter->record) {
+	case RECORD_FILE_MAGIC:
+	case RECORD_GCOV_VERSION:
+	case RECORD_FUNCTION_TAG:
+	case RECORD_FUNCTON_TAG_LEN:
+	case RECORD_FUNCTION_IDENT:
+	case RECORD_COUNT_TAG:
+		/* Advance to next record */
+		iter->record++;
+		break;
+	case RECORD_COUNT:
+		/* Advance to next count */
+		iter->count++;
+		/* fall through */
+	case RECORD_COUNT_LEN:
+		if (iter->count < get_func(iter)->n_ctrs[iter->type]) {
+			iter->record = 9;
+			break;
+		}
+		/* Advance to next counter type */
+		get_type(iter)->offset += iter->count;
+		iter->count = 0;
+		iter->type++;
+		/* fall through */
+	case RECORD_FUNCTION_CHECK:
+		if (iter->type < iter->num_types) {
+			iter->record = 7;
+			break;
+		}
+		/* Advance to next function */
+		iter->type = 0;
+		iter->function++;
+		/* fall through */
+	case RECORD_TIME_STAMP:
+		if (iter->function < iter->info->n_functions)
+			iter->record = 3;
+		else
+			iter->record = -1;
+		break;
+	}
+	/* Check for EOF. */
+	if (iter->record == -1)
+		return -EINVAL;
+	else
+		return 0;
+}
+
+/**
+ * seq_write_gcov_u32 - write 32 bit number in gcov format to seq_file
+ * @seq: seq_file handle
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file.
+ */
+static int seq_write_gcov_u32(struct seq_file *seq, u32 v)
+{
+	return seq_write(seq, &v, sizeof(v));
+}
+
+/**
+ * seq_write_gcov_u64 - write 64 bit number in gcov format to seq_file
+ * @seq: seq_file handle
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file. 64 bit numbers are stored as two 32 bit numbers, the low part
+ * first.
+ */
+static int seq_write_gcov_u64(struct seq_file *seq, u64 v)
+{
+	u32 data[2];
+
+	data[0] = (v & 0xffffffffUL);
+	data[1] = (v >> 32);
+	return seq_write(seq, data, sizeof(data));
+}
+
+/**
+ * gcov_iter_write - write data for current pos to seq_file
+ * @iter: file iterator
+ * @seq: seq_file handle
+ *
+ * Return zero on success, non-zero otherwise.
+ */
+int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
+{
+	int rc = -EINVAL;
+
+	switch (iter->record) {
+	case RECORD_FILE_MAGIC:
+		rc = seq_write_gcov_u32(seq, GCOV_DATA_MAGIC);
+		break;
+	case RECORD_GCOV_VERSION:
+		rc = seq_write_gcov_u32(seq, iter->info->version);
+		break;
+	case RECORD_TIME_STAMP:
+		rc = seq_write_gcov_u32(seq, iter->info->stamp);
+		break;
+	case RECORD_FUNCTION_TAG:
+		rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION);
+		break;
+	case RECORD_FUNCTON_TAG_LEN:
+		rc = seq_write_gcov_u32(seq, 2);
+		break;
+	case RECORD_FUNCTION_IDENT:
+		rc = seq_write_gcov_u32(seq, get_func(iter)->ident);
+		break;
+	case RECORD_FUNCTION_CHECK:
+		rc = seq_write_gcov_u32(seq, get_func(iter)->checksum);
+		break;
+	case RECORD_COUNT_TAG:
+		rc = seq_write_gcov_u32(seq,
+			GCOV_TAG_FOR_COUNTER(get_type(iter)->ctr_type));
+		break;
+	case RECORD_COUNT_LEN:
+		rc = seq_write_gcov_u32(seq,
+				get_func(iter)->n_ctrs[iter->type] * 2);
+		break;
+	case RECORD_COUNT:
+		rc = seq_write_gcov_u64(seq,
+			iter->info->counts[iter->type].
+				values[iter->count + get_type(iter)->offset]);
+		break;
+	}
+	return rc;
+}
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h
new file mode 100644
index 00000000000..060073ebf7a
--- /dev/null
+++ b/kernel/gcov/gcov.h
@@ -0,0 +1,128 @@
+/*
+ *  Profiling infrastructure declarations.
+ *
+ *  This file is based on gcc-internal definitions. Data structures are
+ *  defined to be compatible with gcc counterparts. For a better
+ *  understanding, refer to gcc source: gcc/gcov-io.h.
+ *
+ *    Copyright IBM Corp. 2009
+ *    Author(s): Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+ *
+ *    Uses gcc-internal data definitions.
+ */
+
+#ifndef GCOV_H
+#define GCOV_H GCOV_H
+
+#include <linux/types.h>
+
+/*
+ * Profiling data types used for gcc 3.4 and above - these are defined by
+ * gcc and need to be kept as close to the original definition as possible to
+ * remain compatible.
+ */
+#define GCOV_COUNTERS		5
+#define GCOV_DATA_MAGIC		((unsigned int) 0x67636461)
+#define GCOV_TAG_FUNCTION	((unsigned int) 0x01000000)
+#define GCOV_TAG_COUNTER_BASE	((unsigned int) 0x01a10000)
+#define GCOV_TAG_FOR_COUNTER(count)					\
+	(GCOV_TAG_COUNTER_BASE + ((unsigned int) (count) << 17))
+
+#if BITS_PER_LONG >= 64
+typedef long gcov_type;
+#else
+typedef long long gcov_type;
+#endif
+
+/**
+ * struct gcov_fn_info - profiling meta data per function
+ * @ident: object file-unique function identifier
+ * @checksum: function checksum
+ * @n_ctrs: number of values per counter type belonging to this function
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time.
+ */
+struct gcov_fn_info {
+	unsigned int ident;
+	unsigned int checksum;
+	unsigned int n_ctrs[0];
+};
+
+/**
+ * struct gcov_ctr_info - profiling data per counter type
+ * @num: number of counter values for this type
+ * @values: array of counter values for this type
+ * @merge: merge function for counter values of this type (unused)
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time with the exception of the values array.
+ */
+struct gcov_ctr_info {
+	unsigned int	num;
+	gcov_type	*values;
+	void		(*merge)(gcov_type *, unsigned int);
+};
+
+/**
+ * struct gcov_info - profiling data per object file
+ * @version: gcov version magic indicating the gcc version used for compilation
+ * @next: list head for a singly-linked list
+ * @stamp: time stamp
+ * @filename: name of the associated gcov data file
+ * @n_functions: number of instrumented functions
+ * @functions: function data
+ * @ctr_mask: mask specifying which counter types are active
+ * @counts: counter data per counter type
+ *
+ * This data is generated by gcc during compilation and doesn't change
+ * at run-time with the exception of the next pointer.
+ */
+struct gcov_info {
+	unsigned int			version;
+	struct gcov_info		*next;
+	unsigned int			stamp;
+	const char			*filename;
+	unsigned int			n_functions;
+	const struct gcov_fn_info	*functions;
+	unsigned int			ctr_mask;
+	struct gcov_ctr_info		counts[0];
+};
+
+/* Base interface. */
+enum gcov_action {
+	GCOV_ADD,
+	GCOV_REMOVE,
+};
+
+void gcov_event(enum gcov_action action, struct gcov_info *info);
+void gcov_enable_events(void);
+
+/* Iterator control. */
+struct seq_file;
+struct gcov_iterator;
+
+struct gcov_iterator *gcov_iter_new(struct gcov_info *info);
+void gcov_iter_free(struct gcov_iterator *iter);
+void gcov_iter_start(struct gcov_iterator *iter);
+int gcov_iter_next(struct gcov_iterator *iter);
+int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq);
+struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter);
+
+/* gcov_info control. */
+void gcov_info_reset(struct gcov_info *info);
+int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2);
+void gcov_info_add(struct gcov_info *dest, struct gcov_info *source);
+struct gcov_info *gcov_info_dup(struct gcov_info *info);
+void gcov_info_free(struct gcov_info *info);
+
+struct gcov_link {
+	enum {
+		OBJ_TREE,
+		SRC_TREE,
+	} dir;
+	const char *ext;
+};
+extern const struct gcov_link gcov_link[];
+
+#endif /* GCOV_H */
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 2b706617c89..7a7778746ea 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -116,6 +116,17 @@ _a_flags       = $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(KBUILD_SUBDIR_ASFLAGS) \
                  $(asflags-y) $(AFLAGS_$(basetarget).o)
 _cpp_flags     = $(KBUILD_CPPFLAGS) $(cppflags-y) $(CPPFLAGS_$(@F))
 
+#
+# Enable gcov profiling flags for a file, directory or for all files depending
+# on variables GCOV_PROFILE_obj.o, GCOV_PROFILE and CONFIG_GCOV_PROFILE_ALL
+# (in this order)
+#
+ifeq ($(CONFIG_GCOV_KERNEL),y)
+_c_flags += $(if $(patsubst n%,, \
+		$(GCOV_PROFILE_$(basetarget).o)$(GCOV_PROFILE)$(CONFIG_GCOV_PROFILE_ALL)), \
+		$(CFLAGS_GCOV))
+endif
+
 # If building the kernel in a separate objtree expand all occurrences
 # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
 
-- 
cgit v1.2.3-70-g09d2


From c147d8ea3e2f6f953647f2347ae732fd99b32e73 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 17 Jun 2009 16:28:10 -0700
Subject: dma-mapping: add asm-generic/dma-mapping-common.h

We unified x86 and IA64's handling of multiple dma mapping operations
(struct dma_map_ops in linux/dma-mapping.h) so we can remove duplication
in their arch/include/asm/dma-mapping.h.

This patchset adds include/asm-generic/dma-mapping-common.h that provides
some generic dma mapping function definitions for the users of struct
dma_map_ops.  This enables us to remove about 100 lines.  This also
enables us to easily add CONFIG_DMA_API_DEBUG support, which only x86
supports for now.  The 4th patch adds CONFIG_DMA_API_DEBUG support to IA64
by adding only 8 lines.

This patch:

This header file provides some mapping function definitions that the users
of struct dma_map_ops can use.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/dma-mapping-common.h | 190 +++++++++++++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 include/asm-generic/dma-mapping-common.h

(limited to 'include')

diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
new file mode 100644
index 00000000000..5406a601185
--- /dev/null
+++ b/include/asm-generic/dma-mapping-common.h
@@ -0,0 +1,190 @@
+#ifndef _ASM_GENERIC_DMA_MAPPING_H
+#define _ASM_GENERIC_DMA_MAPPING_H
+
+#include <linux/kmemcheck.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-debug.h>
+#include <linux/dma-attrs.h>
+
+static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
+					      size_t size,
+					      enum dma_data_direction dir,
+					      struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	dma_addr_t addr;
+
+	kmemcheck_mark_initialized(ptr, size);
+	BUG_ON(!valid_dma_direction(dir));
+	addr = ops->map_page(dev, virt_to_page(ptr),
+			     (unsigned long)ptr & ~PAGE_MASK, size,
+			     dir, attrs);
+	debug_dma_map_page(dev, virt_to_page(ptr),
+			   (unsigned long)ptr & ~PAGE_MASK, size,
+			   dir, addr, true);
+	return addr;
+}
+
+static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
+					  size_t size,
+					  enum dma_data_direction dir,
+					  struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->unmap_page)
+		ops->unmap_page(dev, addr, size, dir, attrs);
+	debug_dma_unmap_page(dev, addr, size, dir, true);
+}
+
+static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+				   int nents, enum dma_data_direction dir,
+				   struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	int i, ents;
+	struct scatterlist *s;
+
+	for_each_sg(sg, s, nents, i)
+		kmemcheck_mark_initialized(sg_virt(s), s->length);
+	BUG_ON(!valid_dma_direction(dir));
+	ents = ops->map_sg(dev, sg, nents, dir, attrs);
+	debug_dma_map_sg(dev, sg, nents, ents, dir);
+
+	return ents;
+}
+
+static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+				      int nents, enum dma_data_direction dir,
+				      struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	debug_dma_unmap_sg(dev, sg, nents, dir);
+	if (ops->unmap_sg)
+		ops->unmap_sg(dev, sg, nents, dir, attrs);
+}
+
+static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
+				      size_t offset, size_t size,
+				      enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	dma_addr_t addr;
+
+	kmemcheck_mark_initialized(page_address(page) + offset, size);
+	BUG_ON(!valid_dma_direction(dir));
+	addr = ops->map_page(dev, page, offset, size, dir, NULL);
+	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
+
+	return addr;
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+				  size_t size, enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->unmap_page)
+		ops->unmap_page(dev, addr, size, dir, NULL);
+	debug_dma_unmap_page(dev, addr, size, dir, false);
+}
+
+static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+					   size_t size,
+					   enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->sync_single_for_cpu)
+		ops->sync_single_for_cpu(dev, addr, size, dir);
+	debug_dma_sync_single_for_cpu(dev, addr, size, dir);
+	flush_write_buffers();
+}
+
+static inline void dma_sync_single_for_device(struct device *dev,
+					      dma_addr_t addr, size_t size,
+					      enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->sync_single_for_device)
+		ops->sync_single_for_device(dev, addr, size, dir);
+	debug_dma_sync_single_for_device(dev, addr, size, dir);
+	flush_write_buffers();
+}
+
+static inline void dma_sync_single_range_for_cpu(struct device *dev,
+						 dma_addr_t addr,
+						 unsigned long offset,
+						 size_t size,
+						 enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->sync_single_range_for_cpu) {
+		ops->sync_single_range_for_cpu(dev, addr, offset, size, dir);
+		debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
+
+		flush_write_buffers();
+	} else
+		dma_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static inline void dma_sync_single_range_for_device(struct device *dev,
+						    dma_addr_t addr,
+						    unsigned long offset,
+						    size_t size,
+						    enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->sync_single_range_for_device) {
+		ops->sync_single_range_for_device(dev, addr, offset, size, dir);
+		debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
+
+		flush_write_buffers();
+	} else
+		dma_sync_single_for_device(dev, addr, size, dir);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+		    int nelems, enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->sync_sg_for_cpu)
+		ops->sync_sg_for_cpu(dev, sg, nelems, dir);
+	debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
+	flush_write_buffers();
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+		       int nelems, enum dma_data_direction dir)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!valid_dma_direction(dir));
+	if (ops->sync_sg_for_device)
+		ops->sync_sg_for_device(dev, sg, nelems, dir);
+	debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
+
+	flush_write_buffers();
+}
+
+#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
+#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
+#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
+#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL)
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From c8a06c1ef0bc45915fc45a170c7c60426971304c Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 17 Jun 2009 16:28:15 -0700
Subject: w1-gpio: add external pull-up enable callback

On embedded devices, sleep mode conditions can be tricky to handle,
Especially when processors tend to pull-down the w1 bus during sleep.  Bus
slaves (such as the ds2760) may interpret this as a reason for power-down
conditions and entirely switch off the device.

This patch adds a callback function pointer to let users switch on and off
the external pull-up resistor.  This lets the outside world know whether
the processor is currently actively driving the bus or not.

When this callback is not provided, the code behaviour won't change.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Ville Syrjala <syrjala@sci.fi>
Acked-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/w1/masters/w1-gpio.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/w1-gpio.h      |  1 +
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c
index a411702413d..6f8866d6a90 100644
--- a/drivers/w1/masters/w1-gpio.c
+++ b/drivers/w1/masters/w1-gpio.c
@@ -74,6 +74,9 @@ static int __init w1_gpio_probe(struct platform_device *pdev)
 	if (err)
 		goto free_gpio;
 
+	if (pdata->enable_external_pullup)
+		pdata->enable_external_pullup(1);
+
 	platform_set_drvdata(pdev, master);
 
 	return 0;
@@ -91,6 +94,9 @@ static int __exit w1_gpio_remove(struct platform_device *pdev)
 	struct w1_bus_master *master = platform_get_drvdata(pdev);
 	struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
 
+	if (pdata->enable_external_pullup)
+		pdata->enable_external_pullup(0);
+
 	w1_remove_master_device(master);
 	gpio_free(pdata->pin);
 	kfree(master);
@@ -98,12 +104,41 @@ static int __exit w1_gpio_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+
+static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+	if (pdata->enable_external_pullup)
+		pdata->enable_external_pullup(0);
+
+	return 0;
+}
+
+static int w1_gpio_resume(struct platform_device *pdev)
+{
+	struct w1_gpio_platform_data *pdata = pdev->dev.platform_data;
+
+	if (pdata->enable_external_pullup)
+		pdata->enable_external_pullup(1);
+
+	return 0;
+}
+
+#else
+#define w1_gpio_suspend	NULL
+#define w1_gpio_resume	NULL
+#endif
+
 static struct platform_driver w1_gpio_driver = {
 	.driver = {
 		.name	= "w1-gpio",
 		.owner	= THIS_MODULE,
 	},
 	.remove	= __exit_p(w1_gpio_remove),
+	.suspend = w1_gpio_suspend,
+	.resume = w1_gpio_resume,
 };
 
 static int __init w1_gpio_init(void)
diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h
index 9797fec7748..3adeff82212 100644
--- a/include/linux/w1-gpio.h
+++ b/include/linux/w1-gpio.h
@@ -18,6 +18,7 @@
 struct w1_gpio_platform_data {
 	unsigned int pin;
 	unsigned int is_open_drain:1;
+	void (*enable_external_pullup)(int enable);
 };
 
 #endif /* _LINUX_W1_GPIO_H */
-- 
cgit v1.2.3-70-g09d2


From eae9d2ba0cfc27a2ad9765f23efb98fb80d80234 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 17 Jun 2009 16:28:37 -0700
Subject: LinuxPPS: core support

This patch adds the kernel side of the PPS support currently named
"LinuxPPS".

PPS means "pulse per second" and a PPS source is just a device which
provides a high precision signal each second so that an application can
use it to adjust system clock time.

Common use is the combination of the NTPD as userland program with a GPS
receiver as PPS source to obtain a wallclock-time with sub-millisecond
synchronisation to UTC.

To obtain this goal the userland programs shoud use the PPS API
specification (RFC 2783 - Pulse-Per-Second API for UNIX-like Operating
Systems, Version 1.0) which in part is implemented by this patch.  It
provides a set of chars devices, one per PPS source, which can be used to
get the time signal.  The RFC's functions can be implemented by accessing
to these char devices.

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-pps  |  73 ++++++++
 Documentation/ioctl/ioctl-number.txt |   2 +
 Documentation/pps/pps.txt            | 172 ++++++++++++++++++
 MAINTAINERS                          |   7 +
 drivers/Kconfig                      |   2 +
 drivers/Makefile                     |   1 +
 drivers/pps/Kconfig                  |  33 ++++
 drivers/pps/Makefile                 |   8 +
 drivers/pps/kapi.c                   | 329 +++++++++++++++++++++++++++++++++++
 drivers/pps/pps.c                    | 312 +++++++++++++++++++++++++++++++++
 drivers/pps/sysfs.c                  |  98 +++++++++++
 include/linux/Kbuild                 |   1 +
 include/linux/pps.h                  | 122 +++++++++++++
 include/linux/pps_kernel.h           |  89 ++++++++++
 14 files changed, 1249 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-pps
 create mode 100644 Documentation/pps/pps.txt
 create mode 100644 drivers/pps/Kconfig
 create mode 100644 drivers/pps/Makefile
 create mode 100644 drivers/pps/kapi.c
 create mode 100644 drivers/pps/pps.c
 create mode 100644 drivers/pps/sysfs.c
 create mode 100644 include/linux/pps.h
 create mode 100644 include/linux/pps_kernel.h

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-pps b/Documentation/ABI/testing/sysfs-pps
new file mode 100644
index 00000000000..25028c7bc37
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps
@@ -0,0 +1,73 @@
+What:		/sys/class/pps/
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ directory will contain files and
+		directories that will provide a unified interface to
+		the PPS sources.
+
+What:		/sys/class/pps/ppsX/
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/ directory is related to X-th
+		PPS source into the system. Each directory will
+		contain files to manage and control its PPS source.
+
+What:		/sys/class/pps/ppsX/assert
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/assert file reports the assert events
+		and the assert sequence number of the X-th source in the form:
+
+			<secs>.<nsec>#<sequence>
+
+		If the source has no assert events the content of this file
+		is empty.
+
+What:		/sys/class/pps/ppsX/clear
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/clear file reports the clear events
+		and the clear sequence number of the X-th source in the form:
+
+			<secs>.<nsec>#<sequence>
+
+		If the source has no clear events the content of this file
+		is empty.
+
+What:		/sys/class/pps/ppsX/mode
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/mode file reports the functioning
+		mode of the X-th source in hexadecimal encoding.
+
+		Please, refer to linux/include/linux/pps.h for further
+		info.
+
+What:		/sys/class/pps/ppsX/echo
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/echo file reports if the X-th does
+		or does not support an "echo" function.
+
+What:		/sys/class/pps/ppsX/name
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/name file reports the name of the
+		X-th source.
+
+What:		/sys/class/pps/ppsX/path
+Date:		February 2008
+Contact:	Rodolfo Giometti <giometti@linux.it>
+Description:
+		The /sys/class/pps/ppsX/path file reports the path name of
+		the device connected with the X-th source.
+
+		If the source is not connected with any device the content
+		of this file is empty.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 1f779a25c70..7bb0d934b6d 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -149,6 +149,8 @@ Code	Seq#	Include File		Comments
 'p'	40-7F	linux/nvram.h
 'p'	80-9F				user-space parport
 					<mailto:tim@cyberelk.net>
+'p'	a1-a4	linux/pps.h		LinuxPPS
+					<mailto:giometti@linux.it>
 'q'	00-1F	linux/serio.h
 'q'	80-FF				Internet PhoneJACK, Internet LineJACK
 					<http://www.quicknet.net>
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt
new file mode 100644
index 00000000000..125f4ab4899
--- /dev/null
+++ b/Documentation/pps/pps.txt
@@ -0,0 +1,172 @@
+
+			PPS - Pulse Per Second
+			----------------------
+
+(C) Copyright 2007 Rodolfo Giometti <giometti@enneenne.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+
+
+Overview
+--------
+
+LinuxPPS provides a programming interface (API) to define in the
+system several PPS sources.
+
+PPS means "pulse per second" and a PPS source is just a device which
+provides a high precision signal each second so that an application
+can use it to adjust system clock time.
+
+A PPS source can be connected to a serial port (usually to the Data
+Carrier Detect pin) or to a parallel port (ACK-pin) or to a special
+CPU's GPIOs (this is the common case in embedded systems) but in each
+case when a new pulse arrives the system must apply to it a timestamp
+and record it for userland.
+
+Common use is the combination of the NTPD as userland program, with a
+GPS receiver as PPS source, to obtain a wallclock-time with
+sub-millisecond synchronisation to UTC.
+
+
+RFC considerations
+------------------
+
+While implementing a PPS API as RFC 2783 defines and using an embedded
+CPU GPIO-Pin as physical link to the signal, I encountered a deeper
+problem:
+
+   At startup it needs a file descriptor as argument for the function
+   time_pps_create().
+
+This implies that the source has a /dev/... entry. This assumption is
+ok for the serial and parallel port, where you can do something
+useful besides(!) the gathering of timestamps as it is the central
+task for a PPS-API. But this assumption does not work for a single
+purpose GPIO line. In this case even basic file-related functionality
+(like read() and write()) makes no sense at all and should not be a
+precondition for the use of a PPS-API.
+
+The problem can be simply solved if you consider that a PPS source is
+not always connected with a GPS data source.
+
+So your programs should check if the GPS data source (the serial port
+for instance) is a PPS source too, and if not they should provide the
+possibility to open another device as PPS source.
+
+In LinuxPPS the PPS sources are simply char devices usually mapped
+into files /dev/pps0, /dev/pps1, etc..
+
+
+Coding example
+--------------
+
+To register a PPS source into the kernel you should define a struct
+pps_source_info_s as follows:
+
+    static struct pps_source_info pps_ktimer_info = {
+	    .name         = "ktimer",
+	    .path         = "",
+	    .mode         = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | \
+			    PPS_ECHOASSERT | \
+			    PPS_CANWAIT | PPS_TSFMT_TSPEC,
+	    .echo         = pps_ktimer_echo,
+	    .owner        = THIS_MODULE,
+    };
+
+and then calling the function pps_register_source() in your
+intialization routine as follows:
+
+    source = pps_register_source(&pps_ktimer_info,
+			PPS_CAPTUREASSERT | PPS_OFFSETASSERT);
+
+The pps_register_source() prototype is:
+
+  int pps_register_source(struct pps_source_info_s *info, int default_params)
+
+where "info" is a pointer to a structure that describes a particular
+PPS source, "default_params" tells the system what the initial default
+parameters for the device should be (it is obvious that these parameters
+must be a subset of ones defined in the struct
+pps_source_info_s which describe the capabilities of the driver).
+
+Once you have registered a new PPS source into the system you can
+signal an assert event (for example in the interrupt handler routine)
+just using:
+
+    pps_event(source, &ts, PPS_CAPTUREASSERT, ptr)
+
+where "ts" is the event's timestamp.
+
+The same function may also run the defined echo function
+(pps_ktimer_echo(), passing to it the "ptr" pointer) if the user
+asked for that... etc..
+
+Please see the file drivers/pps/clients/ktimer.c for example code.
+
+
+SYSFS support
+-------------
+
+If the SYSFS filesystem is enabled in the kernel it provides a new class:
+
+   $ ls /sys/class/pps/
+   pps0/  pps1/  pps2/
+
+Every directory is the ID of a PPS sources defined in the system and
+inside you find several files:
+
+   $ ls /sys/class/pps/pps0/
+   assert	clear  echo  mode  name  path  subsystem@  uevent
+
+Inside each "assert" and "clear" file you can find the timestamp and a
+sequence number:
+
+   $ cat /sys/class/pps/pps0/assert
+   1170026870.983207967#8
+
+Where before the "#" is the timestamp in seconds; after it is the
+sequence number. Other files are:
+
+* echo: reports if the PPS source has an echo function or not;
+
+* mode: reports available PPS functioning modes;
+
+* name: reports the PPS source's name;
+
+* path: reports the PPS source's device path, that is the device the
+  PPS source is connected to (if it exists).
+
+
+Testing the PPS support
+-----------------------
+
+In order to test the PPS support even without specific hardware you can use
+the ktimer driver (see the client subsection in the PPS configuration menu)
+and the userland tools provided into Documentaion/pps/ directory.
+
+Once you have enabled the compilation of ktimer just modprobe it (if
+not statically compiled):
+
+   # modprobe ktimer
+
+and the run ppstest as follow:
+
+   $ ./ppstest /dev/pps0
+   trying PPS source "/dev/pps1"
+   found PPS source "/dev/pps1"
+   ok, found 1 source(s), now start fetching data...
+   source 0 - assert 1186592699.388832443, sequence: 364 - clear  0.000000000, sequence: 0
+   source 0 - assert 1186592700.388931295, sequence: 365 - clear  0.000000000, sequence: 0
+   source 0 - assert 1186592701.389032765, sequence: 366 - clear  0.000000000, sequence: 0
+
+Please, note that to compile userland programs you need the file timepps.h
+(see Documentation/pps/).
diff --git a/MAINTAINERS b/MAINTAINERS
index 5b7a08d88e4..035df9d2660 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4577,6 +4577,13 @@ S:	Maintained
 F:	drivers/net/pppol2tp.c
 F:	include/linux/if_pppol2tp.h
 
+PPS SUPPORT
+P:	Rodolfo Giometti
+M:	giometti@enneenne.com
+W:	http://wiki.enneenne.com/index.php/LinuxPPS_support
+L:	linuxpps@ml.enneenne.com (subscribers-only)
+S:	Maintained
+
 PREEMPTIBLE KERNEL
 P:	Robert Love
 M:	rml@tech9.net
diff --git a/drivers/Kconfig b/drivers/Kconfig
index a442c8f29fc..48bbdbe43e6 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -52,6 +52,8 @@ source "drivers/i2c/Kconfig"
 
 source "drivers/spi/Kconfig"
 
+source "drivers/pps/Kconfig"
+
 source "drivers/gpio/Kconfig"
 
 source "drivers/w1/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 00b44f4ccf0..bc4205d2fc3 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_INPUT)		+= input/
 obj-$(CONFIG_I2O)		+= message/
 obj-$(CONFIG_RTC_LIB)		+= rtc/
 obj-y				+= i2c/ media/
+obj-$(CONFIG_PPS)		+= pps/
 obj-$(CONFIG_W1)		+= w1/
 obj-$(CONFIG_POWER_SUPPLY)	+= power/
 obj-$(CONFIG_HWMON)		+= hwmon/
diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig
new file mode 100644
index 00000000000..cc2eb8edb51
--- /dev/null
+++ b/drivers/pps/Kconfig
@@ -0,0 +1,33 @@
+#
+# PPS support configuration
+#
+
+menu "PPS support"
+
+config PPS
+	tristate "PPS support"
+	depends on EXPERIMENTAL
+	---help---
+	  PPS (Pulse Per Second) is a special pulse provided by some GPS
+	  antennae. Userland can use it to get a high-precision time
+	  reference.
+
+	  Some antennae's PPS signals are connected with the CD (Carrier
+	  Detect) pin of the serial line they use to communicate with the
+	  host. In this case use the SERIAL_LINE client support.
+
+	  Some antennae's PPS signals are connected with some special host
+	  inputs so you have to enable the corresponding client support.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pps_core.ko.
+
+config PPS_DEBUG
+	bool "PPS debugging messages"
+	depends on PPS
+	help
+	  Say Y here if you want the PPS support to produce a bunch of debug
+	  messages to the system log.  Select this if you are having a
+	  problem with PPS support and want to see more of what is going on.
+
+endmenu
diff --git a/drivers/pps/Makefile b/drivers/pps/Makefile
new file mode 100644
index 00000000000..19ea582f431
--- /dev/null
+++ b/drivers/pps/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the PPS core.
+#
+
+pps_core-y			:= pps.o kapi.o sysfs.o
+obj-$(CONFIG_PPS)		:= pps_core.o
+
+ccflags-$(CONFIG_PPS_DEBUG) := -DDEBUG
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
new file mode 100644
index 00000000000..35a0b192d76
--- /dev/null
+++ b/drivers/pps/kapi.c
@@ -0,0 +1,329 @@
+/*
+ * kernel API
+ *
+ *
+ * Copyright (C) 2005-2009   Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/fs.h>
+#include <linux/pps_kernel.h>
+
+/*
+ * Global variables
+ */
+
+DEFINE_SPINLOCK(pps_idr_lock);
+DEFINE_IDR(pps_idr);
+
+/*
+ * Local functions
+ */
+
+static void pps_add_offset(struct pps_ktime *ts, struct pps_ktime *offset)
+{
+	ts->nsec += offset->nsec;
+	while (ts->nsec >= NSEC_PER_SEC) {
+		ts->nsec -= NSEC_PER_SEC;
+		ts->sec++;
+	}
+	while (ts->nsec < 0) {
+		ts->nsec += NSEC_PER_SEC;
+		ts->sec--;
+	}
+	ts->sec += offset->sec;
+}
+
+/*
+ * Exported functions
+ */
+
+/* pps_get_source - find a PPS source
+ * @source: the PPS source ID.
+ *
+ * This function is used to find an already registered PPS source into the
+ * system.
+ *
+ * The function returns NULL if found nothing, otherwise it returns a pointer
+ * to the PPS source data struct (the refcounter is incremented by 1).
+ */
+
+struct pps_device *pps_get_source(int source)
+{
+	struct pps_device *pps;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pps_idr_lock, flags);
+
+	pps = idr_find(&pps_idr, source);
+	if (pps != NULL)
+		atomic_inc(&pps->usage);
+
+	spin_unlock_irqrestore(&pps_idr_lock, flags);
+
+	return pps;
+}
+
+/* pps_put_source - free the PPS source data
+ * @pps: a pointer to the PPS source.
+ *
+ * This function is used to free a PPS data struct if its refcount is 0.
+ */
+
+void pps_put_source(struct pps_device *pps)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pps_idr_lock, flags);
+	BUG_ON(atomic_read(&pps->usage) == 0);
+
+	if (!atomic_dec_and_test(&pps->usage)) {
+		pps = NULL;
+		goto exit;
+	}
+
+	/* No more reference to the PPS source. We can safely remove the
+	 * PPS data struct.
+	 */
+	idr_remove(&pps_idr, pps->id);
+
+exit:
+	spin_unlock_irqrestore(&pps_idr_lock, flags);
+	kfree(pps);
+}
+
+/* pps_register_source - add a PPS source in the system
+ * @info: the PPS info struct
+ * @default_params: the default PPS parameters of the new source
+ *
+ * This function is used to add a new PPS source in the system. The new
+ * source is described by info's fields and it will have, as default PPS
+ * parameters, the ones specified into default_params.
+ *
+ * The function returns, in case of success, the PPS source ID.
+ */
+
+int pps_register_source(struct pps_source_info *info, int default_params)
+{
+	struct pps_device *pps;
+	int id;
+	int err;
+
+	/* Sanity checks */
+	if ((info->mode & default_params) != default_params) {
+		printk(KERN_ERR "pps: %s: unsupported default parameters\n",
+					info->name);
+		err = -EINVAL;
+		goto pps_register_source_exit;
+	}
+	if ((info->mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)) != 0 &&
+			info->echo == NULL) {
+		printk(KERN_ERR "pps: %s: echo function is not defined\n",
+					info->name);
+		err = -EINVAL;
+		goto pps_register_source_exit;
+	}
+	if ((info->mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
+		printk(KERN_ERR "pps: %s: unspecified time format\n",
+					info->name);
+		err = -EINVAL;
+		goto pps_register_source_exit;
+	}
+
+	/* Allocate memory for the new PPS source struct */
+	pps = kzalloc(sizeof(struct pps_device), GFP_KERNEL);
+	if (pps == NULL) {
+		err = -ENOMEM;
+		goto pps_register_source_exit;
+	}
+
+	/* These initializations must be done before calling idr_get_new()
+	 * in order to avoid reces into pps_event().
+	 */
+	pps->params.api_version = PPS_API_VERS;
+	pps->params.mode = default_params;
+	pps->info = *info;
+
+	init_waitqueue_head(&pps->queue);
+	spin_lock_init(&pps->lock);
+	atomic_set(&pps->usage, 1);
+
+	/* Get new ID for the new PPS source */
+	if (idr_pre_get(&pps_idr, GFP_KERNEL) == 0) {
+		err = -ENOMEM;
+		goto kfree_pps;
+	}
+
+	spin_lock_irq(&pps_idr_lock);
+
+	/* Now really allocate the PPS source.
+	 * After idr_get_new() calling the new source will be freely available
+	 * into the kernel.
+	 */
+	err = idr_get_new(&pps_idr, pps, &id);
+	if (err < 0) {
+		spin_unlock_irq(&pps_idr_lock);
+		goto kfree_pps;
+	}
+
+	id = id & MAX_ID_MASK;
+	if (id >= PPS_MAX_SOURCES) {
+		spin_unlock_irq(&pps_idr_lock);
+
+		printk(KERN_ERR "pps: %s: too many PPS sources in the system\n",
+					info->name);
+		err = -EBUSY;
+		goto free_idr;
+	}
+	pps->id = id;
+
+	spin_unlock_irq(&pps_idr_lock);
+
+	/* Create the char device */
+	err = pps_register_cdev(pps);
+	if (err < 0) {
+		printk(KERN_ERR "pps: %s: unable to create char device\n",
+					info->name);
+		goto free_idr;
+	}
+
+	pr_info("new PPS source %s at ID %d\n", info->name, id);
+
+	return id;
+
+free_idr:
+	spin_lock_irq(&pps_idr_lock);
+	idr_remove(&pps_idr, id);
+	spin_unlock_irq(&pps_idr_lock);
+
+kfree_pps:
+	kfree(pps);
+
+pps_register_source_exit:
+	printk(KERN_ERR "pps: %s: unable to register source\n", info->name);
+
+	return err;
+}
+EXPORT_SYMBOL(pps_register_source);
+
+/* pps_unregister_source - remove a PPS source from the system
+ * @source: the PPS source ID
+ *
+ * This function is used to remove a previously registered PPS source from
+ * the system.
+ */
+
+void pps_unregister_source(int source)
+{
+	struct pps_device *pps;
+
+	spin_lock_irq(&pps_idr_lock);
+	pps = idr_find(&pps_idr, source);
+
+	if (!pps) {
+		BUG();
+		spin_unlock_irq(&pps_idr_lock);
+		return;
+	}
+	spin_unlock_irq(&pps_idr_lock);
+
+	pps_unregister_cdev(pps);
+	pps_put_source(pps);
+}
+EXPORT_SYMBOL(pps_unregister_source);
+
+/* pps_event - register a PPS event into the system
+ * @source: the PPS source ID
+ * @ts: the event timestamp
+ * @event: the event type
+ * @data: userdef pointer
+ *
+ * This function is used by each PPS client in order to register a new
+ * PPS event into the system (it's usually called inside an IRQ handler).
+ *
+ * If an echo function is associated with the PPS source it will be called
+ * as:
+ *	pps->info.echo(source, event, data);
+ */
+
+void pps_event(int source, struct pps_ktime *ts, int event, void *data)
+{
+	struct pps_device *pps;
+	unsigned long flags;
+
+	if ((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0) {
+		printk(KERN_ERR "pps: unknown event (%x) for source %d\n",
+			event, source);
+		return;
+	}
+
+	pps = pps_get_source(source);
+	if (!pps)
+		return;
+
+	pr_debug("PPS event on source %d at %llu.%06u\n",
+			pps->id, (unsigned long long) ts->sec, ts->nsec);
+
+	spin_lock_irqsave(&pps->lock, flags);
+
+	/* Must call the echo function? */
+	if ((pps->params.mode & (PPS_ECHOASSERT | PPS_ECHOCLEAR)))
+		pps->info.echo(source, event, data);
+
+	/* Check the event */
+	pps->current_mode = pps->params.mode;
+	if (event & PPS_CAPTUREASSERT) {
+		/* We have to add an offset? */
+		if (pps->params.mode & PPS_OFFSETASSERT)
+			pps_add_offset(ts, &pps->params.assert_off_tu);
+
+		/* Save the time stamp */
+		pps->assert_tu = *ts;
+		pps->assert_sequence++;
+		pr_debug("capture assert seq #%u for source %d\n",
+			pps->assert_sequence, source);
+	}
+	if (event & PPS_CAPTURECLEAR) {
+		/* We have to add an offset? */
+		if (pps->params.mode & PPS_OFFSETCLEAR)
+			pps_add_offset(ts, &pps->params.clear_off_tu);
+
+		/* Save the time stamp */
+		pps->clear_tu = *ts;
+		pps->clear_sequence++;
+		pr_debug("capture clear seq #%u for source %d\n",
+			pps->clear_sequence, source);
+	}
+
+	pps->go = ~0;
+	wake_up_interruptible(&pps->queue);
+
+	kill_fasync(&pps->async_queue, SIGIO, POLL_IN);
+
+	spin_unlock_irqrestore(&pps->lock, flags);
+
+	/* Now we can release the PPS source for (possible) deregistration */
+	pps_put_source(pps);
+}
+EXPORT_SYMBOL(pps_event);
diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
new file mode 100644
index 00000000000..ac8cc8cea1e
--- /dev/null
+++ b/drivers/pps/pps.c
@@ -0,0 +1,312 @@
+/*
+ * PPS core file
+ *
+ *
+ * Copyright (C) 2005-2009   Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/idr.h>
+#include <linux/cdev.h>
+#include <linux/poll.h>
+#include <linux/pps_kernel.h>
+
+/*
+ * Local variables
+ */
+
+static dev_t pps_devt;
+static struct class *pps_class;
+
+/*
+ * Char device methods
+ */
+
+static unsigned int pps_cdev_poll(struct file *file, poll_table *wait)
+{
+	struct pps_device *pps = file->private_data;
+
+	poll_wait(file, &pps->queue, wait);
+
+	return POLLIN | POLLRDNORM;
+}
+
+static int pps_cdev_fasync(int fd, struct file *file, int on)
+{
+	struct pps_device *pps = file->private_data;
+	return fasync_helper(fd, file, on, &pps->async_queue);
+}
+
+static long pps_cdev_ioctl(struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	struct pps_device *pps = file->private_data;
+	struct pps_kparams params;
+	struct pps_fdata fdata;
+	unsigned long ticks;
+	void __user *uarg = (void __user *) arg;
+	int __user *iuarg = (int __user *) arg;
+	int err;
+
+	switch (cmd) {
+	case PPS_GETPARAMS:
+		pr_debug("PPS_GETPARAMS: source %d\n", pps->id);
+
+		/* Return current parameters */
+		err = copy_to_user(uarg, &pps->params,
+						sizeof(struct pps_kparams));
+		if (err)
+			return -EFAULT;
+
+		break;
+
+	case PPS_SETPARAMS:
+		pr_debug("PPS_SETPARAMS: source %d\n", pps->id);
+
+		/* Check the capabilities */
+		if (!capable(CAP_SYS_TIME))
+			return -EPERM;
+
+		err = copy_from_user(&params, uarg, sizeof(struct pps_kparams));
+		if (err)
+			return -EFAULT;
+		if (!(params.mode & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR))) {
+			pr_debug("capture mode unspecified (%x)\n",
+								params.mode);
+			return -EINVAL;
+		}
+
+		/* Check for supported capabilities */
+		if ((params.mode & ~pps->info.mode) != 0) {
+			pr_debug("unsupported capabilities (%x)\n",
+								params.mode);
+			return -EINVAL;
+		}
+
+		spin_lock_irq(&pps->lock);
+
+		/* Save the new parameters */
+		pps->params = params;
+
+		/* Restore the read only parameters */
+		if ((params.mode & (PPS_TSFMT_TSPEC | PPS_TSFMT_NTPFP)) == 0) {
+			/* section 3.3 of RFC 2783 interpreted */
+			pr_debug("time format unspecified (%x)\n",
+								params.mode);
+			pps->params.mode |= PPS_TSFMT_TSPEC;
+		}
+		if (pps->info.mode & PPS_CANWAIT)
+			pps->params.mode |= PPS_CANWAIT;
+		pps->params.api_version = PPS_API_VERS;
+
+		spin_unlock_irq(&pps->lock);
+
+		break;
+
+	case PPS_GETCAP:
+		pr_debug("PPS_GETCAP: source %d\n", pps->id);
+
+		err = put_user(pps->info.mode, iuarg);
+		if (err)
+			return -EFAULT;
+
+		break;
+
+	case PPS_FETCH:
+		pr_debug("PPS_FETCH: source %d\n", pps->id);
+
+		err = copy_from_user(&fdata, uarg, sizeof(struct pps_fdata));
+		if (err)
+			return -EFAULT;
+
+		pps->go = 0;
+
+		/* Manage the timeout */
+		if (fdata.timeout.flags & PPS_TIME_INVALID)
+			err = wait_event_interruptible(pps->queue, pps->go);
+		else {
+			pr_debug("timeout %lld.%09d\n",
+					(long long) fdata.timeout.sec,
+					fdata.timeout.nsec);
+			ticks = fdata.timeout.sec * HZ;
+			ticks += fdata.timeout.nsec / (NSEC_PER_SEC / HZ);
+
+			if (ticks != 0) {
+				err = wait_event_interruptible_timeout(
+						pps->queue, pps->go, ticks);
+				if (err == 0)
+					return -ETIMEDOUT;
+			}
+		}
+
+		/* Check for pending signals */
+		if (err == -ERESTARTSYS) {
+			pr_debug("pending signal caught\n");
+			return -EINTR;
+		}
+
+		/* Return the fetched timestamp */
+		spin_lock_irq(&pps->lock);
+
+		fdata.info.assert_sequence = pps->assert_sequence;
+		fdata.info.clear_sequence = pps->clear_sequence;
+		fdata.info.assert_tu = pps->assert_tu;
+		fdata.info.clear_tu = pps->clear_tu;
+		fdata.info.current_mode = pps->current_mode;
+
+		spin_unlock_irq(&pps->lock);
+
+		err = copy_to_user(uarg, &fdata, sizeof(struct pps_fdata));
+		if (err)
+			return -EFAULT;
+
+		break;
+
+	default:
+		return -ENOTTY;
+		break;
+	}
+
+	return 0;
+}
+
+static int pps_cdev_open(struct inode *inode, struct file *file)
+{
+	struct pps_device *pps = container_of(inode->i_cdev,
+						struct pps_device, cdev);
+	int found;
+
+	found = pps_get_source(pps->id) != 0;
+	if (!found)
+		return -ENODEV;
+
+	file->private_data = pps;
+
+	return 0;
+}
+
+static int pps_cdev_release(struct inode *inode, struct file *file)
+{
+	struct pps_device *pps = file->private_data;
+
+	/* Free the PPS source and wake up (possible) deregistration */
+	pps_put_source(pps);
+
+	return 0;
+}
+
+/*
+ * Char device stuff
+ */
+
+static const struct file_operations pps_cdev_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.poll		= pps_cdev_poll,
+	.fasync		= pps_cdev_fasync,
+	.unlocked_ioctl	= pps_cdev_ioctl,
+	.open		= pps_cdev_open,
+	.release	= pps_cdev_release,
+};
+
+int pps_register_cdev(struct pps_device *pps)
+{
+	int err;
+
+	pps->devno = MKDEV(MAJOR(pps_devt), pps->id);
+	cdev_init(&pps->cdev, &pps_cdev_fops);
+	pps->cdev.owner = pps->info.owner;
+
+	err = cdev_add(&pps->cdev, pps->devno, 1);
+	if (err) {
+		printk(KERN_ERR "pps: %s: failed to add char device %d:%d\n",
+				pps->info.name, MAJOR(pps_devt), pps->id);
+		return err;
+	}
+	pps->dev = device_create(pps_class, pps->info.dev, pps->devno, NULL,
+							"pps%d", pps->id);
+	if (err)
+		goto del_cdev;
+	dev_set_drvdata(pps->dev, pps);
+
+	pr_debug("source %s got cdev (%d:%d)\n", pps->info.name,
+			MAJOR(pps_devt), pps->id);
+
+	return 0;
+
+del_cdev:
+	cdev_del(&pps->cdev);
+
+	return err;
+}
+
+void pps_unregister_cdev(struct pps_device *pps)
+{
+	device_destroy(pps_class, pps->devno);
+	cdev_del(&pps->cdev);
+}
+
+/*
+ * Module stuff
+ */
+
+static void __exit pps_exit(void)
+{
+	class_destroy(pps_class);
+	unregister_chrdev_region(pps_devt, PPS_MAX_SOURCES);
+}
+
+static int __init pps_init(void)
+{
+	int err;
+
+	pps_class = class_create(THIS_MODULE, "pps");
+	if (!pps_class) {
+		printk(KERN_ERR "pps: failed to allocate class\n");
+		return -ENOMEM;
+	}
+	pps_class->dev_attrs = pps_attrs;
+
+	err = alloc_chrdev_region(&pps_devt, 0, PPS_MAX_SOURCES, "pps");
+	if (err < 0) {
+		printk(KERN_ERR "pps: failed to allocate char device region\n");
+		goto remove_class;
+	}
+
+	pr_info("LinuxPPS API ver. %d registered\n", PPS_API_VERS);
+	pr_info("Software ver. %s - Copyright 2005-2007 Rodolfo Giometti "
+		"<giometti@linux.it>\n", PPS_VERSION);
+
+	return 0;
+
+remove_class:
+	class_destroy(pps_class);
+
+	return err;
+}
+
+subsys_initcall(pps_init);
+module_exit(pps_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("LinuxPPS support (RFC 2783) - ver. " PPS_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/pps/sysfs.c b/drivers/pps/sysfs.c
new file mode 100644
index 00000000000..ef0978c71ee
--- /dev/null
+++ b/drivers/pps/sysfs.c
@@ -0,0 +1,98 @@
+/*
+ * PPS sysfs support
+ *
+ *
+ * Copyright (C) 2007-2009   Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/pps_kernel.h>
+
+/*
+ * Attribute functions
+ */
+
+static ssize_t pps_show_assert(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pps_device *pps = dev_get_drvdata(dev);
+
+	if (!(pps->info.mode & PPS_CAPTUREASSERT))
+		return 0;
+
+	return sprintf(buf, "%lld.%09d#%d\n",
+			(long long) pps->assert_tu.sec, pps->assert_tu.nsec,
+			pps->assert_sequence);
+}
+
+static ssize_t pps_show_clear(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pps_device *pps = dev_get_drvdata(dev);
+
+	if (!(pps->info.mode & PPS_CAPTURECLEAR))
+		return 0;
+
+	return sprintf(buf, "%lld.%09d#%d\n",
+			(long long) pps->clear_tu.sec, pps->clear_tu.nsec,
+			pps->clear_sequence);
+}
+
+static ssize_t pps_show_mode(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pps_device *pps = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%4x\n", pps->info.mode);
+}
+
+static ssize_t pps_show_echo(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pps_device *pps = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", !!pps->info.echo);
+}
+
+static ssize_t pps_show_name(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pps_device *pps = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", pps->info.name);
+}
+
+static ssize_t pps_show_path(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct pps_device *pps = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", pps->info.path);
+}
+
+struct device_attribute pps_attrs[] = {
+	__ATTR(assert, S_IRUGO, pps_show_assert, NULL),
+	__ATTR(clear, S_IRUGO, pps_show_clear, NULL),
+	__ATTR(mode, S_IRUGO, pps_show_mode, NULL),
+	__ATTR(echo, S_IRUGO, pps_show_echo, NULL),
+	__ATTR(name, S_IRUGO, pps_show_name, NULL),
+	__ATTR(path, S_IRUGO, pps_show_path, NULL),
+	__ATTR_NULL,
+};
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index a2df7030d49..03f22076381 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -308,6 +308,7 @@ unifdef-y += pmu.h
 unifdef-y += poll.h
 unifdef-y += ppp_defs.h
 unifdef-y += ppp-comp.h
+unifdef-y += pps.h
 unifdef-y += ptrace.h
 unifdef-y += quota.h
 unifdef-y += random.h
diff --git a/include/linux/pps.h b/include/linux/pps.h
new file mode 100644
index 00000000000..cfe5c7214ec
--- /dev/null
+++ b/include/linux/pps.h
@@ -0,0 +1,122 @@
+/*
+ * PPS API header
+ *
+ * Copyright (C) 2005-2009   Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef _PPS_H_
+#define _PPS_H_
+
+#define PPS_VERSION		"5.3.6"
+#define PPS_MAX_SOURCES		16		/* should be enough... */
+
+/* Implementation note: the logical states ``assert'' and ``clear''
+ * are implemented in terms of the chip register, i.e. ``assert''
+ * means the bit is set.  */
+
+/*
+ * 3.2 New data structures
+ */
+
+#define PPS_API_VERS_1		1
+#define PPS_API_VERS		PPS_API_VERS_1	/* we use API version 1 */
+#define PPS_MAX_NAME_LEN	32
+
+/* 32-bit vs. 64-bit compatibility.
+ *
+ * 0n i386, the alignment of a uint64_t is only 4 bytes, while on most other
+ * architectures it's 8 bytes. On i386, there will be no padding between the
+ * two consecutive 'struct pps_ktime' members of struct pps_kinfo and struct
+ * pps_kparams. But on most platforms there will be padding to ensure correct
+ * alignment.
+ *
+ * The simple fix is probably to add an explicit padding.
+ *					 		[David Woodhouse]
+ */
+struct pps_ktime {
+	__s64 sec;
+	__s32 nsec;
+	__u32 flags;
+};
+#define PPS_TIME_INVALID	(1<<0)	/* used to specify timeout==NULL */
+
+struct pps_kinfo {
+	__u32 assert_sequence;		/* seq. num. of assert event */
+	__u32 clear_sequence; 		/* seq. num. of clear event */
+	struct pps_ktime assert_tu;	/* time of assert event */
+	struct pps_ktime clear_tu;	/* time of clear event */
+	int current_mode;		/* current mode bits */
+};
+
+struct pps_kparams {
+	int api_version;		/* API version # */
+	int mode;			/* mode bits */
+	struct pps_ktime assert_off_tu;	/* offset compensation for assert */
+	struct pps_ktime clear_off_tu;	/* offset compensation for clear */
+};
+
+/*
+ * 3.3 Mode bit definitions
+ */
+
+/* Device/implementation parameters */
+#define PPS_CAPTUREASSERT	0x01	/* capture assert events */
+#define PPS_CAPTURECLEAR	0x02	/* capture clear events */
+#define PPS_CAPTUREBOTH		0x03	/* capture assert and clear events */
+
+#define PPS_OFFSETASSERT	0x10	/* apply compensation for assert ev. */
+#define PPS_OFFSETCLEAR		0x20	/* apply compensation for clear ev. */
+
+#define PPS_CANWAIT		0x100	/* can we wait for an event? */
+#define PPS_CANPOLL		0x200	/* bit reserved for future use */
+
+/* Kernel actions */
+#define PPS_ECHOASSERT		0x40	/* feed back assert event to output */
+#define PPS_ECHOCLEAR		0x80	/* feed back clear event to output */
+
+/* Timestamp formats */
+#define PPS_TSFMT_TSPEC		0x1000	/* select timespec format */
+#define PPS_TSFMT_NTPFP		0x2000	/* select NTP format */
+
+/*
+ * 3.4.4 New functions: disciplining the kernel timebase
+ */
+
+/* Kernel consumers */
+#define PPS_KC_HARDPPS		0	/* hardpps() (or equivalent) */
+#define PPS_KC_HARDPPS_PLL	1	/* hardpps() constrained to
+					   use a phase-locked loop */
+#define PPS_KC_HARDPPS_FLL	2	/* hardpps() constrained to
+					   use a frequency-locked loop */
+/*
+ * Here begins the implementation-specific part!
+ */
+
+struct pps_fdata {
+	struct pps_kinfo info;
+	struct pps_ktime timeout;
+};
+
+#include <linux/ioctl.h>
+
+#define PPS_GETPARAMS		_IOR('p', 0xa1, struct pps_kparams *)
+#define PPS_SETPARAMS		_IOW('p', 0xa2, struct pps_kparams *)
+#define PPS_GETCAP		_IOR('p', 0xa3, int *)
+#define PPS_FETCH		_IOWR('p', 0xa4, struct pps_fdata *)
+
+#endif /* _PPS_H_ */
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
new file mode 100644
index 00000000000..e0a193f830e
--- /dev/null
+++ b/include/linux/pps_kernel.h
@@ -0,0 +1,89 @@
+/*
+ * PPS API kernel header
+ *
+ * Copyright (C) 2009   Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/pps.h>
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/time.h>
+
+/*
+ * Global defines
+ */
+
+/* The specific PPS source info */
+struct pps_source_info {
+	char name[PPS_MAX_NAME_LEN];		/* simbolic name */
+	char path[PPS_MAX_NAME_LEN];		/* path of connected device */
+	int mode;				/* PPS's allowed mode */
+
+	void (*echo)(int source, int event, void *data); /* PPS echo function */
+
+	struct module *owner;
+	struct device *dev;
+};
+
+/* The main struct */
+struct pps_device {
+	struct pps_source_info info;		/* PSS source info */
+
+	struct pps_kparams params;		/* PPS's current params */
+
+	__u32 assert_sequence;			/* PPS' assert event seq # */
+	__u32 clear_sequence;			/* PPS' clear event seq # */
+	struct pps_ktime assert_tu;
+	struct pps_ktime clear_tu;
+	int current_mode;			/* PPS mode at event time */
+
+	int go;					/* PPS event is arrived? */
+	wait_queue_head_t queue;		/* PPS event queue */
+
+	unsigned int id;			/* PPS source unique ID */
+	struct cdev cdev;
+	struct device *dev;
+	int devno;
+	struct fasync_struct *async_queue;	/* fasync method */
+	spinlock_t lock;
+
+	atomic_t usage;				/* usage count */
+};
+
+/*
+ * Global variables
+ */
+
+extern spinlock_t pps_idr_lock;
+extern struct idr pps_idr;
+extern struct timespec pps_irq_ts[];
+
+extern struct device_attribute pps_attrs[];
+
+/*
+ * Exported functions
+ */
+
+struct pps_device *pps_get_source(int source);
+extern void pps_put_source(struct pps_device *pps);
+extern int pps_register_source(struct pps_source_info *info,
+				int default_params);
+extern void pps_unregister_source(int source);
+extern int pps_register_cdev(struct pps_device *pps);
+extern void pps_unregister_cdev(struct pps_device *pps);
+extern void pps_event(int source, struct pps_ktime *ts, int event, void *data);
-- 
cgit v1.2.3-70-g09d2


From d2829224619866daf336141b71550e223a198838 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Wed, 17 Jun 2009 16:28:38 -0700
Subject: lib: add lib/gcd.c

This patch adds lib/gcd.c which contains a greatest common divider
implementation taken from sound/core/pcm_timer.c

Several usages of this new library function will be sent to subsystem
maintainers.

[akpm@linux-foundation.org: use swap() (pointed out by Joe)]
[akpm@linux-foundation.org: just add gcd.o to obj-y, remove Kconfig changes]
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julius Volz <juliusv@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gcd.h |  8 ++++++++
 lib/Makefile        |  2 +-
 lib/gcd.c           | 18 ++++++++++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/gcd.h
 create mode 100644 lib/gcd.c

(limited to 'include')

diff --git a/include/linux/gcd.h b/include/linux/gcd.h
new file mode 100644
index 00000000000..69f5e8a01ba
--- /dev/null
+++ b/include/linux/gcd.h
@@ -0,0 +1,8 @@
+#ifndef _GCD_H
+#define _GCD_H
+
+#include <linux/compiler.h>
+
+unsigned long gcd(unsigned long a, unsigned long b) __attribute_const__;
+
+#endif /* _GCD_H */
diff --git a/lib/Makefile b/lib/Makefile
index 8e9bcf9d326..b6d1857bbf0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o
+	 string_helpers.o gcd.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/gcd.c b/lib/gcd.c
new file mode 100644
index 00000000000..f879033d982
--- /dev/null
+++ b/lib/gcd.c
@@ -0,0 +1,18 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Greatest common divisor */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+	unsigned long r;
+
+	if (a < b)
+		swap(a, b);
+	while ((r = a % b) != 0) {
+		a = b;
+		b = r;
+	}
+	return b;
+}
+EXPORT_SYMBOL_GPL(gcd);
-- 
cgit v1.2.3-70-g09d2


From dcce284a259373f9e5570f2e33f79eca84fcf565 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 18 Jun 2009 13:24:12 +1000
Subject: mm: Extend gfp masking to the page allocator

The page allocator also needs the masking of gfp flags during boot,
so this moves it out of slab/slub and uses it with the page allocator
as well.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  9 ++++++++-
 init/main.c         |  4 ++++
 mm/page_alloc.c     |  3 +++
 mm/slab.c           | 15 ++-------------
 mm/slub.c           | 12 +-----------
 5 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cfdb35d71bc..7c777a0da17 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -99,7 +99,7 @@ struct vm_area_struct;
 			__GFP_NORETRY|__GFP_NOMEMALLOC)
 
 /* Control slab gfp mask during early boot */
-#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)
+#define GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)
 
 /* Control allocation constraints */
 #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
@@ -348,4 +348,11 @@ static inline void oom_killer_enable(void)
 	oom_killer_disabled = false;
 }
 
+extern gfp_t gfp_allowed_mask;
+
+static inline void set_gfp_allowed_mask(gfp_t mask)
+{
+	gfp_allowed_mask = mask;
+}
+
 #endif /* __LINUX_GFP_H */
diff --git a/init/main.c b/init/main.c
index 1a65fdd0631..09131ec090c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -642,6 +642,10 @@ asmlinkage void __init start_kernel(void)
 				 "enabled early\n");
 	early_boot_irqs_on();
 	local_irq_enable();
+
+	/* Interrupts are enabled now so all GFP allocations are safe. */
+	set_gfp_allowed_mask(__GFP_BITS_MASK);
+
 	kmem_cache_init_late();
 
 	/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a5f3c278c57..6f0753fe694 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -73,6 +73,7 @@ unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 unsigned long highest_memmap_pfn __read_mostly;
 int percpu_pagelist_fraction;
+gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 int pageblock_order __read_mostly;
@@ -1863,6 +1864,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	struct page *page;
 	int migratetype = allocflags_to_migratetype(gfp_mask);
 
+	gfp_mask &= gfp_allowed_mask;
+
 	lockdep_trace_alloc(gfp_mask);
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
diff --git a/mm/slab.c b/mm/slab.c
index d08692303f6..e74a16e4ced 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -304,12 +304,6 @@ struct kmem_list3 {
 	int free_touched;		/* updated without locking */
 };
 
-/*
- * The slab allocator is initialized with interrupts disabled. Therefore, make
- * sure early boot allocations don't accidentally enable interrupts.
- */
-static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
-
 /*
  * Need this for bootstrapping a per node allocator.
  */
@@ -1559,11 +1553,6 @@ void __init kmem_cache_init_late(void)
 {
 	struct kmem_cache *cachep;
 
-	/*
-	 * Interrupts are enabled now so all GFP allocations are safe.
-	 */
-	slab_gfp_mask = __GFP_BITS_MASK;
-
 	/* 6) resize the head arrays to their final sizes */
 	mutex_lock(&cache_chain_mutex);
 	list_for_each_entry(cachep, &cache_chain, next)
@@ -3307,7 +3296,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 
-	flags &= slab_gfp_mask;
+	flags &= gfp_allowed_mask;
 
 	lockdep_trace_alloc(flags);
 
@@ -3392,7 +3381,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	unsigned long save_flags;
 	void *objp;
 
-	flags &= slab_gfp_mask;
+	flags &= gfp_allowed_mask;
 
 	lockdep_trace_alloc(flags);
 
diff --git a/mm/slub.c b/mm/slub.c
index 4c6449310a0..ce62b770e2f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -179,12 +179,6 @@ static enum {
 	SYSFS		/* Sysfs up */
 } slab_state = DOWN;
 
-/*
- * The slab allocator is initialized with interrupts disabled. Therefore, make
- * sure early boot allocations don't accidentally enable interrupts.
- */
-static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
-
 /* A list of all slab caches on the system */
 static DECLARE_RWSEM(slub_lock);
 static LIST_HEAD(slab_caches);
@@ -1692,7 +1686,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	unsigned long flags;
 	unsigned int objsize;
 
-	gfpflags &= slab_gfp_mask;
+	gfpflags &= gfp_allowed_mask;
 
 	lockdep_trace_alloc(gfpflags);
 	might_sleep_if(gfpflags & __GFP_WAIT);
@@ -3220,10 +3214,6 @@ void __init kmem_cache_init(void)
 
 void __init kmem_cache_init_late(void)
 {
-	/*
-	 * Interrupts are enabled now so all GFP allocations are safe.
-	 */
-	slab_gfp_mask = __GFP_BITS_MASK;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 71e308a239c098673570d0b417d42262bb535909 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 18 Jun 2009 12:45:08 -0400
Subject: function-graph: add stack frame test

In case gcc does something funny with the stack frames, or the return
from function code, we would like to detect that.

An arch may implement passing of a variable that is unique to the
function and can be saved on entering a function and can be tested
when exiting the function. Usually the frame pointer can be used for
this purpose.

This patch also implements this for x86. Where it passes in the stack
frame of the parent function, and will test that frame on exit.

There was a case in x86_32 with optimize for size (-Os) where, for a
few functions, gcc would align the stack frame and place a copy of the
return address into it. The function graph tracer modified the copy and
not the actual return address. On return from the funtion, it did not go
to the tracer hook, but returned to the parent. This broke the function
graph tracer, because the return of the parent (where gcc did not do
this funky manipulation) returned to the location that the child function
was suppose to. This caused strange kernel crashes.

This test detected the problem and pointed out where the issue was.

This modifies the parameters of one of the functions that the arch
specific code calls, so it includes changes to arch code to accommodate
the new prototype.

Note, I notice that the parsic arch implements its own push_return_trace.
This is now a generic function and the ftrace_push_return_trace should be
used instead. This patch does not touch that code.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/powerpc/kernel/ftrace.c         |  2 +-
 arch/s390/kernel/ftrace.c            |  2 +-
 arch/x86/Kconfig                     |  1 +
 arch/x86/kernel/entry_32.S           |  2 ++
 arch/x86/kernel/entry_64.S           |  2 ++
 arch/x86/kernel/ftrace.c             |  6 ++++--
 include/linux/ftrace.h               |  4 +++-
 kernel/trace/Kconfig                 |  7 +++++++
 kernel/trace/trace_functions_graph.c | 36 ++++++++++++++++++++++++++++++++----
 9 files changed, 53 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 2d182f119d1..58d6a610904 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -605,7 +605,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
+	if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
 		*parent = old;
 		return;
 	}
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 82ddfd3a75a..3e298e64f0d 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -190,7 +190,7 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent)
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
-	if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY)
+	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
 		goto out;
 	trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN;
 	/* Only trace if the calling function expects to. */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 356d2ec8e2f..fcf12af0742 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,6 +33,7 @@ config X86
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select HAVE_FTRACE_SYSCALLS
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add475c..0d4b28564c1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1154,6 +1154,7 @@ ENTRY(ftrace_graph_caller)
 	pushl %edx
 	movl 0xc(%esp), %edx
 	lea 0x4(%ebp), %eax
+	movl (%ebp), %ecx
 	subl $MCOUNT_INSN_SIZE, %edx
 	call prepare_ftrace_return
 	popl %edx
@@ -1168,6 +1169,7 @@ return_to_handler:
 	pushl %eax
 	pushl %ecx
 	pushl %edx
+	movl %ebp, %eax
 	call ftrace_return_to_handler
 	movl %eax, 0xc(%esp)
 	popl %edx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index de74f0a3e0e..c251be74510 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller)
 
 	leaq 8(%rbp), %rdi
 	movq 0x38(%rsp), %rsi
+	movq (%rbp), %rdx
 	subq $MCOUNT_INSN_SIZE, %rsi
 
 	call	prepare_ftrace_return
@@ -150,6 +151,7 @@ GLOBAL(return_to_handler)
 	/* Save the return values */
 	movq %rax, (%rsp)
 	movq %rdx, 8(%rsp)
+	movq %rbp, %rdi
 
 	call ftrace_return_to_handler
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b79c5533c42..d94e1ea3b9f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * Hook the return address and push it in the stack of return addrs
  * in current thread info.
  */
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
 {
 	unsigned long old;
 	int faulted;
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
+	if (ftrace_push_return_trace(old, self_addr, &trace.depth,
+		    frame_pointer) == -EBUSY) {
 		*parent = old;
 		return;
 	}
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 39b95c56587..dc3b1328aae 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -362,6 +362,7 @@ struct ftrace_ret_stack {
 	unsigned long func;
 	unsigned long long calltime;
 	unsigned long long subtime;
+	unsigned long fp;
 };
 
 /*
@@ -372,7 +373,8 @@ struct ftrace_ret_stack {
 extern void return_to_handler(void);
 
 extern int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
+ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+			 unsigned long frame_pointer);
 
 /*
  * Sometimes we don't want to trace a function with the function
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1eac85253ce..b17ed8787de 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER
 config HAVE_FUNCTION_GRAPH_TRACER
 	bool
 
+config HAVE_FUNCTION_GRAPH_FP_TEST
+	bool
+	help
+	 An arch may pass in a unique value (frame pointer) to both the
+	 entering and exiting of a function. On exit, the value is compared
+	 and if it does not match, then it will panic the kernel.
+
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	bool
 	help
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8b592418d8b..d2249abafb5 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = {
 
 /* Add a function return address to the trace stack on thread info.*/
 int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
+ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+			 unsigned long frame_pointer)
 {
 	unsigned long long calltime;
 	int index;
@@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
 	current->ret_stack[index].subtime = 0;
+	current->ret_stack[index].fp = frame_pointer;
 	*depth = index;
 
 	return 0;
@@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
 
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
+			unsigned long frame_pointer)
 {
 	int index;
 
@@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 		return;
 	}
 
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+	/*
+	 * The arch may choose to record the frame pointer used
+	 * and check it here to make sure that it is what we expect it
+	 * to be. If gcc does not set the place holder of the return
+	 * address in the frame pointer, and does a copy instead, then
+	 * the function graph trace will fail. This test detects this
+	 * case.
+	 *
+	 * Currently, x86_32 with optimize for size (-Os) makes the latest
+	 * gcc do the above.
+	 */
+	if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+		ftrace_graph_stop();
+		WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
+		     "  from func %pF return to %lx\n",
+		     current->ret_stack[index].fp,
+		     frame_pointer,
+		     (void *)current->ret_stack[index].func,
+		     current->ret_stack[index].ret);
+		*ret = (unsigned long)panic;
+		return;
+	}
+#endif
+
 	*ret = current->ret_stack[index].ret;
 	trace->func = current->ret_stack[index].func;
 	trace->calltime = current->ret_stack[index].calltime;
@@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
  * Send the trace to the ring-buffer.
  * @return the original return address.
  */
-unsigned long ftrace_return_to_handler(void)
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 {
 	struct ftrace_graph_ret trace;
 	unsigned long ret;
 
-	ftrace_pop_return_trace(&trace, &ret);
+	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
 	barrier();
-- 
cgit v1.2.3-70-g09d2


From 89579f778266d5a4d08d0c64c46b1565218de9f9 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 17 Jun 2009 12:29:56 +0200
Subject: drm: Apply "Memory fragmentation from lost alignment blocks"

also for the atomic path by using a common code-path.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c | 48 +++++++-----------------------------------------
 include/drm/drm_mm.h     | 21 +++++++++++++++------
 2 files changed, 22 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a912a0ff11c..3e47869d6da 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -187,9 +187,10 @@ static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent,
 }
 
 
-
-struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *node,
-				     unsigned long size, unsigned alignment)
+struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
+					     unsigned long size,
+					     unsigned alignment,
+					     int atomic)
 {
 
 	struct drm_mm_node *align_splitoff = NULL;
@@ -200,7 +201,7 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *node,
 
 	if (tmp) {
 		align_splitoff =
-		    drm_mm_split_at_start(node, alignment - tmp, 0);
+		    drm_mm_split_at_start(node, alignment - tmp, atomic);
 		if (unlikely(align_splitoff == NULL))
 			return NULL;
 	}
@@ -209,7 +210,7 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *node,
 		list_del_init(&node->fl_entry);
 		node->free = 0;
 	} else {
-		node = drm_mm_split_at_start(node, size, 0);
+		node = drm_mm_split_at_start(node, size, atomic);
 	}
 
 	if (align_splitoff)
@@ -217,42 +218,7 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *node,
 
 	return node;
 }
-
-EXPORT_SYMBOL(drm_mm_get_block);
-
-struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
-					    unsigned long size,
-					    unsigned alignment)
-{
-
-	struct drm_mm_node *align_splitoff = NULL;
-	struct drm_mm_node *child;
-	unsigned tmp = 0;
-
-	if (alignment)
-		tmp = parent->start % alignment;
-
-	if (tmp) {
-		align_splitoff =
-		    drm_mm_split_at_start(parent, alignment - tmp, 1);
-		if (unlikely(align_splitoff == NULL))
-			return NULL;
-	}
-
-	if (parent->size == size) {
-		list_del_init(&parent->fl_entry);
-		parent->free = 0;
-		return parent;
-	} else {
-		child = drm_mm_split_at_start(parent, size, 1);
-	}
-
-	if (align_splitoff)
-		drm_mm_put_block(align_splitoff);
-
-	return child;
-}
-EXPORT_SYMBOL(drm_mm_get_block_atomic);
+EXPORT_SYMBOL(drm_mm_get_block_generic);
 
 /*
  * Put a block. Merge with the previous and / or next block if they are free.
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 5662f4278ef..f8332073d27 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -59,13 +59,22 @@ struct drm_mm {
 /*
  * Basic range manager support (drm_mm.c)
  */
-
-extern struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent,
-					    unsigned long size,
-					    unsigned alignment);
-extern struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
+extern struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
+						    unsigned long size,
+						    unsigned alignment,
+						    int atomic);
+static inline struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent,
 						   unsigned long size,
-						   unsigned alignment);
+						   unsigned alignment)
+{
+	return drm_mm_get_block_generic(parent, size, alignment, 0);
+}
+static inline struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
+							  unsigned long size,
+							  unsigned alignment)
+{
+	return drm_mm_get_block_generic(parent, size, alignment, 1);
+}
 extern void drm_mm_put_block(struct drm_mm_node *cur);
 extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
 					      unsigned long size,
-- 
cgit v1.2.3-70-g09d2


From 0454beab0f6bc6d350860abd549b86959d2f6f40 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <daenzer@vmware.com>
Date: Mon, 15 Jun 2009 16:56:07 +0200
Subject: drm: EDID endianness fixes.

Mostly replacing bitfields with explicit masks and shifts.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_edid.c              | 100 +++++++++++++++++---------------
 drivers/gpu/drm/radeon/radeon_display.c |   2 +-
 include/drm/drm_edid.h                  |  92 ++++++++++++++---------------
 3 files changed, 96 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 801a0d0e081..7d0835226f6 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -252,16 +252,18 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev,
 {
 	struct drm_display_mode *mode;
 	int hsize = t->hsize * 8 + 248, vsize;
+	unsigned aspect_ratio = (t->vfreq_aspect & EDID_TIMING_ASPECT_MASK)
+		>> EDID_TIMING_ASPECT_SHIFT;
 
 	mode = drm_mode_create(dev);
 	if (!mode)
 		return NULL;
 
-	if (t->aspect_ratio == 0)
+	if (aspect_ratio == 0)
 		vsize = (hsize * 10) / 16;
-	else if (t->aspect_ratio == 1)
+	else if (aspect_ratio == 1)
 		vsize = (hsize * 3) / 4;
-	else if (t->aspect_ratio == 2)
+	else if (aspect_ratio == 2)
 		vsize = (hsize * 4) / 5;
 	else
 		vsize = (hsize * 9) / 16;
@@ -288,17 +290,24 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 {
 	struct drm_display_mode *mode;
 	struct detailed_pixel_timing *pt = &timing->data.pixel_data;
+	unsigned hactive = (pt->hactive_hblank_hi & 0xf0) << 4 | pt->hactive_lo;
+	unsigned vactive = (pt->vactive_vblank_hi & 0xf0) << 4 | pt->vactive_lo;
+	unsigned hblank = (pt->hactive_hblank_hi & 0xf) << 8 | pt->hblank_lo;
+	unsigned vblank = (pt->vactive_vblank_hi & 0xf) << 8 | pt->vblank_lo;
+	unsigned hsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0x3) << 8 | pt->hsync_offset_lo;
+	unsigned hsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) << 6 | pt->hsync_pulse_width_lo;
+	unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0x30) | (pt->vsync_offset_pulse_width_lo & 0xf);
+	unsigned vsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0xc0) >> 2 | pt->vsync_offset_pulse_width_lo >> 4;
 
 	/* ignore tiny modes */
-	if (((pt->hactive_hi << 8) | pt->hactive_lo) < 64 ||
-	    ((pt->vactive_hi << 8) | pt->hactive_lo) < 64)
+	if (hactive < 64 || vactive < 64)
 		return NULL;
 
-	if (pt->stereo) {
+	if (pt->misc & DRM_EDID_PT_STEREO) {
 		printk(KERN_WARNING "stereo mode not supported\n");
 		return NULL;
 	}
-	if (!pt->separate_sync) {
+	if (!(pt->misc & DRM_EDID_PT_SEPARATE_SYNC)) {
 		printk(KERN_WARNING "integrated sync not supported\n");
 		return NULL;
 	}
@@ -310,41 +319,36 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 	mode->type = DRM_MODE_TYPE_DRIVER;
 
 	if (quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH)
-		timing->pixel_clock = 1088;
-
-	mode->clock = timing->pixel_clock * 10;
-
-	mode->hdisplay = (pt->hactive_hi << 8) | pt->hactive_lo;
-	mode->hsync_start = mode->hdisplay + ((pt->hsync_offset_hi << 8) |
-					      pt->hsync_offset_lo);
-	mode->hsync_end = mode->hsync_start +
-		((pt->hsync_pulse_width_hi << 8) |
-		 pt->hsync_pulse_width_lo);
-	mode->htotal = mode->hdisplay + ((pt->hblank_hi << 8) | pt->hblank_lo);
-
-	mode->vdisplay = (pt->vactive_hi << 8) | pt->vactive_lo;
-	mode->vsync_start = mode->vdisplay + ((pt->vsync_offset_hi << 4) |
-					      pt->vsync_offset_lo);
-	mode->vsync_end = mode->vsync_start +
-		((pt->vsync_pulse_width_hi << 4) |
-		 pt->vsync_pulse_width_lo);
-	mode->vtotal = mode->vdisplay + ((pt->vblank_hi << 8) | pt->vblank_lo);
+		timing->pixel_clock = cpu_to_le16(1088);
+
+	mode->clock = le16_to_cpu(timing->pixel_clock) * 10;
+
+	mode->hdisplay = hactive;
+	mode->hsync_start = mode->hdisplay + hsync_offset;
+	mode->hsync_end = mode->hsync_start + hsync_pulse_width;
+	mode->htotal = mode->hdisplay + hblank;
+
+	mode->vdisplay = vactive;
+	mode->vsync_start = mode->vdisplay + vsync_offset;
+	mode->vsync_end = mode->vsync_start + vsync_pulse_width;
+	mode->vtotal = mode->vdisplay + vblank;
 
 	drm_mode_set_name(mode);
 
-	if (pt->interlaced)
+	if (pt->misc & DRM_EDID_PT_INTERLACED)
 		mode->flags |= DRM_MODE_FLAG_INTERLACE;
 
 	if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
-		pt->hsync_positive = 1;
-		pt->vsync_positive = 1;
+		pt->misc |= DRM_EDID_PT_HSYNC_POSITIVE | DRM_EDID_PT_VSYNC_POSITIVE;
 	}
 
-	mode->flags |= pt->hsync_positive ? DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
-	mode->flags |= pt->vsync_positive ? DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
+	mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
+		DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
+	mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
+		DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
 
-	mode->width_mm = pt->width_mm_lo | (pt->width_mm_hi << 8);
-	mode->height_mm = pt->height_mm_lo | (pt->height_mm_hi << 8);
+	mode->width_mm = pt->width_mm_lo | (pt->width_height_mm_hi & 0xf) << 8;
+	mode->height_mm = pt->height_mm_lo | (pt->width_height_mm_hi & 0xf0) << 4;
 
 	if (quirks & EDID_QUIRK_DETAILED_IN_CM) {
 		mode->width_mm *= 10;
@@ -465,7 +469,7 @@ static int add_standard_modes(struct drm_connector *connector, struct edid *edid
 		struct drm_display_mode *newmode;
 
 		/* If std timings bytes are 1, 1 it's empty */
-		if (t->hsize == 1 && (t->aspect_ratio | t->vfreq) == 1)
+		if (t->hsize == 1 && t->vfreq_aspect == 1)
 			continue;
 
 		newmode = drm_mode_std(dev, &edid->standard_timings[i]);
@@ -509,7 +513,7 @@ static int add_detailed_info(struct drm_connector *connector,
 				continue;
 
 			/* First detailed mode is preferred */
-			if (i == 0 && edid->preferred_timing)
+			if (i == 0 && (edid->features & DRM_EDID_FEATURE_PREFERRED_TIMING))
 				newmode->type |= DRM_MODE_TYPE_PREFERRED;
 			drm_mode_probed_add(connector, newmode);
 
@@ -767,22 +771,22 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
 		edid_fixup_preferred(connector, quirks);
 
-	connector->display_info.serration_vsync = edid->serration_vsync;
-	connector->display_info.sync_on_green = edid->sync_on_green;
-	connector->display_info.composite_sync = edid->composite_sync;
-	connector->display_info.separate_syncs = edid->separate_syncs;
-	connector->display_info.blank_to_black = edid->blank_to_black;
-	connector->display_info.video_level = edid->video_level;
-	connector->display_info.digital = edid->digital;
+	connector->display_info.serration_vsync = (edid->input & DRM_EDID_INPUT_SERRATION_VSYNC) ? 1 : 0;
+	connector->display_info.sync_on_green = (edid->input & DRM_EDID_INPUT_SYNC_ON_GREEN) ? 1 : 0;
+	connector->display_info.composite_sync = (edid->input & DRM_EDID_INPUT_COMPOSITE_SYNC) ? 1 : 0;
+	connector->display_info.separate_syncs = (edid->input & DRM_EDID_INPUT_SEPARATE_SYNCS) ? 1 : 0;
+	connector->display_info.blank_to_black = (edid->input & DRM_EDID_INPUT_BLANK_TO_BLACK) ? 1 : 0;
+	connector->display_info.video_level = (edid->input & DRM_EDID_INPUT_VIDEO_LEVEL) >> 5;
+	connector->display_info.digital = (edid->input & DRM_EDID_INPUT_DIGITAL) ? 1 : 0;
 	connector->display_info.width_mm = edid->width_cm * 10;
 	connector->display_info.height_mm = edid->height_cm * 10;
 	connector->display_info.gamma = edid->gamma;
-	connector->display_info.gtf_supported = edid->default_gtf;
-	connector->display_info.standard_color = edid->standard_color;
-	connector->display_info.display_type = edid->display_type;
-	connector->display_info.active_off_supported = edid->pm_active_off;
-	connector->display_info.suspend_supported = edid->pm_suspend;
-	connector->display_info.standby_supported = edid->pm_standby;
+	connector->display_info.gtf_supported = (edid->features & DRM_EDID_FEATURE_DEFAULT_GTF) ? 1 : 0;
+	connector->display_info.standard_color = (edid->features & DRM_EDID_FEATURE_STANDARD_COLOR) ? 1 : 0;
+	connector->display_info.display_type = (edid->features & DRM_EDID_FEATURE_DISPLAY_TYPE) >> 3;
+	connector->display_info.active_off_supported = (edid->features & DRM_EDID_FEATURE_PM_ACTIVE_OFF) ? 1 : 0;
+	connector->display_info.suspend_supported = (edid->features & DRM_EDID_FEATURE_PM_SUSPEND) ? 1 : 0;
+	connector->display_info.standby_supported = (edid->features & DRM_EDID_FEATURE_PM_STANDBY) ? 1 : 0;
 	connector->display_info.gamma = edid->gamma;
 
 	return num_modes;
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 5452bb9d925..3efcf1a526b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -351,7 +351,7 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 	radeon_i2c_do_lock(radeon_connector, 0);
 	if (edid) {
 		/* update digital bits here */
-		if (edid->digital)
+		if (edid->input & DRM_EDID_INPUT_DIGITAL)
 			radeon_connector->use_digital = 1;
 		else
 			radeon_connector->use_digital = 0;
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index a11cc9d3259..c263e4d7175 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -28,53 +28,49 @@
 #define EDID_LENGTH 128
 #define DDC_ADDR 0x50
 
-#ifdef BIG_ENDIAN
-#error "EDID structure is little endian, need big endian versions"
-#else
-
 struct est_timings {
 	u8 t1;
 	u8 t2;
 	u8 mfg_rsvd;
 } __attribute__((packed));
 
+/* 00=16:10, 01=4:3, 10=5:4, 11=16:9 */
+#define EDID_TIMING_ASPECT_SHIFT 0
+#define EDID_TIMING_ASPECT_MASK  (0x3 << EDID_TIMING_ASPECT_SHIFT)
+
+/* need to add 60 */
+#define EDID_TIMING_VFREQ_SHIFT  2
+#define EDID_TIMING_VFREQ_MASK   (0x3f << EDID_TIMING_VFREQ_SHIFT)
+
 struct std_timing {
 	u8 hsize; /* need to multiply by 8 then add 248 */
-	u8 vfreq:6; /* need to add 60 */
-	u8 aspect_ratio:2; /* 00=16:10, 01=4:3, 10=5:4, 11=16:9 */
+	u8 vfreq_aspect;
 } __attribute__((packed));
 
+#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 6)
+#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 5)
+#define DRM_EDID_PT_SEPARATE_SYNC  (3 << 3)
+#define DRM_EDID_PT_STEREO         (1 << 2)
+#define DRM_EDID_PT_INTERLACED     (1 << 1)
+
 /* If detailed data is pixel timing */
 struct detailed_pixel_timing {
 	u8 hactive_lo;
 	u8 hblank_lo;
-	u8 hblank_hi:4;
-	u8 hactive_hi:4;
+	u8 hactive_hblank_hi;
 	u8 vactive_lo;
 	u8 vblank_lo;
-	u8 vblank_hi:4;
-	u8 vactive_hi:4;
+	u8 vactive_vblank_hi;
 	u8 hsync_offset_lo;
 	u8 hsync_pulse_width_lo;
-	u8 vsync_pulse_width_lo:4;
-	u8 vsync_offset_lo:4;
-	u8 vsync_pulse_width_hi:2;
-	u8 vsync_offset_hi:2;
-	u8 hsync_pulse_width_hi:2;
-	u8 hsync_offset_hi:2;
+	u8 vsync_offset_pulse_width_lo;
+	u8 hsync_vsync_offset_pulse_width_hi;
 	u8 width_mm_lo;
 	u8 height_mm_lo;
-	u8 height_mm_hi:4;
-	u8 width_mm_hi:4;
+	u8 width_height_mm_hi;
 	u8 hborder;
 	u8 vborder;
-	u8 unknown0:1;
-	u8 hsync_positive:1;
-	u8 vsync_positive:1;
-	u8 separate_sync:2;
-	u8 stereo:1;
-	u8 unknown6:1;
-	u8 interlaced:1;
+	u8 misc;
 } __attribute__((packed));
 
 /* If it's not pixel timing, it'll be one of the below */
@@ -88,18 +84,16 @@ struct detailed_data_monitor_range {
 	u8 min_hfreq_khz;
 	u8 max_hfreq_khz;
 	u8 pixel_clock_mhz; /* need to multiply by 10 */
-	u16 sec_gtf_toggle; /* A000=use above, 20=use below */ /* FIXME: byte order */
+	__le16 sec_gtf_toggle; /* A000=use above, 20=use below */
 	u8 hfreq_start_khz; /* need to multiply by 2 */
 	u8 c; /* need to divide by 2 */
-	u16 m; /* FIXME: byte order */
+	__le16 m;
 	u8 k;
 	u8 j; /* need to divide by 2 */
 } __attribute__((packed));
 
 struct detailed_data_wpindex {
-	u8 white_y_lo:2;
-	u8 white_x_lo:2;
-	u8 pad:4;
+	u8 white_xy_lo; /* Upper 2 bits each */
 	u8 white_x_hi;
 	u8 white_y_hi;
 	u8 gamma; /* need to divide by 100 then add 1 */
@@ -134,13 +128,29 @@ struct detailed_non_pixel {
 #define EDID_DETAIL_MONITOR_SERIAL 0xff
 
 struct detailed_timing {
-	u16 pixel_clock; /* need to multiply by 10 KHz */ /* FIXME: byte order */
+	__le16 pixel_clock; /* need to multiply by 10 KHz */
 	union {
 		struct detailed_pixel_timing pixel_data;
 		struct detailed_non_pixel other_data;
 	} data;
 } __attribute__((packed));
 
+#define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 7)
+#define DRM_EDID_INPUT_SYNC_ON_GREEN   (1 << 5)
+#define DRM_EDID_INPUT_COMPOSITE_SYNC  (1 << 4)
+#define DRM_EDID_INPUT_SEPARATE_SYNCS  (1 << 3)
+#define DRM_EDID_INPUT_BLANK_TO_BLACK  (1 << 2)
+#define DRM_EDID_INPUT_VIDEO_LEVEL     (3 << 1)
+#define DRM_EDID_INPUT_DIGITAL         (1 << 0) /* bits above must be zero if set */
+
+#define DRM_EDID_FEATURE_DEFAULT_GTF      (1 << 7)
+#define DRM_EDID_FEATURE_PREFERRED_TIMING (1 << 6)
+#define DRM_EDID_FEATURE_STANDARD_COLOR   (1 << 5)
+#define DRM_EDID_FEATURE_DISPLAY_TYPE     (3 << 3) /* 00=mono, 01=rgb, 10=non-rgb, 11=unknown */
+#define DRM_EDID_FEATURE_PM_ACTIVE_OFF    (1 << 2)
+#define DRM_EDID_FEATURE_PM_SUSPEND       (1 << 1)
+#define DRM_EDID_FEATURE_PM_STANDBY       (1 << 0)
+
 struct edid {
 	u8 header[8];
 	/* Vendor & product info */
@@ -153,25 +163,11 @@ struct edid {
 	u8 version;
 	u8 revision;
 	/* Display info: */
-	/*   input definition */
-	u8 serration_vsync:1;
-	u8 sync_on_green:1;
-	u8 composite_sync:1;
-	u8 separate_syncs:1;
-	u8 blank_to_black:1;
-	u8 video_level:2;
-	u8 digital:1; /* bits below must be zero if set */
+	u8 input;
 	u8 width_cm;
 	u8 height_cm;
 	u8 gamma;
-	/*   feature support */
-	u8 default_gtf:1;
-	u8 preferred_timing:1;
-	u8 standard_color:1;
-	u8 display_type:2; /* 00=mono, 01=rgb, 10=non-rgb, 11=unknown */
-	u8 pm_active_off:1;
-	u8 pm_suspend:1;
-	u8 pm_standby:1;
+	u8 features;
 	/* Color characteristics */
 	u8 red_green_lo;
 	u8 black_white_lo;
@@ -195,8 +191,6 @@ struct edid {
 	u8 checksum;
 } __attribute__((packed));
 
-#endif /* little endian structs */
-
 #define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
 
 #endif /* __DRM_EDID_H__ */
-- 
cgit v1.2.3-70-g09d2


From 07613ba2f464f59949266f4337b75b91eb610795 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 12 Jun 2009 14:11:41 +1000
Subject: agp: switch AGP to use page array instead of unsigned long array

This switches AGP to use an array of pages for tracking the
pages allocated to the GART. This should enable GEM on PAE to work
a lot better as we can pass highmem pages to the PAT code and it will
do the right thing with them.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/agp.h                | 12 +++---
 drivers/char/agp/ali-agp.c            | 26 ++++++-------
 drivers/char/agp/amd-k7-agp.c         |  2 +-
 drivers/char/agp/amd64-agp.c          |  2 +-
 drivers/char/agp/ati-agp.c            |  5 ++-
 drivers/char/agp/backend.c            |  8 ++--
 drivers/char/agp/efficeon-agp.c       |  5 ++-
 drivers/char/agp/generic.c            | 69 ++++++++++++++---------------------
 drivers/char/agp/hp-agp.c             |  9 ++---
 drivers/char/agp/i460-agp.c           | 47 ++++++++++++++----------
 drivers/char/agp/intel-agp.c          | 49 ++++++++++++-------------
 drivers/char/agp/nvidia-agp.c         |  2 +-
 drivers/char/agp/parisc-agp.c         | 20 ++++++++--
 drivers/char/agp/sgi-agp.c            |  9 +++--
 drivers/char/agp/sworks-agp.c         |  2 +-
 drivers/char/agp/uninorth-agp.c       | 12 +++---
 drivers/gpu/drm/drm_agpsupport.c      |  2 +-
 drivers/gpu/drm/drm_memory.c          |  8 ++--
 drivers/gpu/drm/drm_vm.c              |  4 +-
 drivers/gpu/drm/ttm/ttm_agp_backend.c |  3 +-
 include/linux/agp_backend.h           |  2 +-
 21 files changed, 152 insertions(+), 146 deletions(-)

(limited to 'include')

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 46f50753117..178e2e9e9f0 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -107,7 +107,7 @@ struct agp_bridge_driver {
 	void (*agp_enable)(struct agp_bridge_data *, u32);
 	void (*cleanup)(void);
 	void (*tlb_flush)(struct agp_memory *);
-	unsigned long (*mask_memory)(struct agp_bridge_data *, unsigned long, int);
+	unsigned long (*mask_memory)(struct agp_bridge_data *, struct page *, int);
 	void (*cache_flush)(void);
 	int (*create_gatt_table)(struct agp_bridge_data *);
 	int (*free_gatt_table)(struct agp_bridge_data *);
@@ -115,9 +115,9 @@ struct agp_bridge_driver {
 	int (*remove_memory)(struct agp_memory *, off_t, int);
 	struct agp_memory *(*alloc_by_type) (size_t, int);
 	void (*free_by_type)(struct agp_memory *);
-	void *(*agp_alloc_page)(struct agp_bridge_data *);
+	struct page *(*agp_alloc_page)(struct agp_bridge_data *);
 	int (*agp_alloc_pages)(struct agp_bridge_data *, struct agp_memory *, size_t);
-	void (*agp_destroy_page)(void *, int flags);
+	void (*agp_destroy_page)(struct page *, int flags);
 	void (*agp_destroy_pages)(struct agp_memory *);
 	int (*agp_type_to_mask_type) (struct agp_bridge_data *, int);
 	void (*chipset_flush)(struct agp_bridge_data *);
@@ -278,10 +278,10 @@ int agp_generic_insert_memory(struct agp_memory *mem, off_t pg_start, int type);
 int agp_generic_remove_memory(struct agp_memory *mem, off_t pg_start, int type);
 struct agp_memory *agp_generic_alloc_by_type(size_t page_count, int type);
 void agp_generic_free_by_type(struct agp_memory *curr);
-void *agp_generic_alloc_page(struct agp_bridge_data *bridge);
+struct page *agp_generic_alloc_page(struct agp_bridge_data *bridge);
 int agp_generic_alloc_pages(struct agp_bridge_data *agp_bridge,
 			    struct agp_memory *memory, size_t page_count);
-void agp_generic_destroy_page(void *addr, int flags);
+void agp_generic_destroy_page(struct page *page, int flags);
 void agp_generic_destroy_pages(struct agp_memory *memory);
 void agp_free_key(int key);
 int agp_num_entries(void);
@@ -291,7 +291,7 @@ int agp_3_5_enable(struct agp_bridge_data *bridge);
 void global_cache_flush(void);
 void get_agp_version(struct agp_bridge_data *bridge);
 unsigned long agp_generic_mask_memory(struct agp_bridge_data *bridge,
-	unsigned long addr, int type);
+				      struct page *page, int type);
 int agp_generic_type_to_mask_type(struct agp_bridge_data *bridge,
 				  int type);
 struct agp_bridge_data *agp_generic_find_bridge(struct pci_dev *pdev);
diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index 58f26ed2a96..201ef3ffd48 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c
@@ -141,37 +141,37 @@ static void m1541_cache_flush(void)
 	}
 }
 
-static void *m1541_alloc_page(struct agp_bridge_data *bridge)
+static struct page *m1541_alloc_page(struct agp_bridge_data *bridge)
 {
-	void *addr = agp_generic_alloc_page(agp_bridge);
+	struct page *page = agp_generic_alloc_page(agp_bridge);
 	u32 temp;
 
-	if (!addr)
+	if (!page)
 		return NULL;
 
 	pci_read_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, &temp);
 	pci_write_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL,
 			(((temp & ALI_CACHE_FLUSH_ADDR_MASK) |
-			  virt_to_gart(addr)) | ALI_CACHE_FLUSH_EN ));
-	return addr;
+			  phys_to_gart(page_to_phys(page))) | ALI_CACHE_FLUSH_EN ));
+	return page;
 }
 
-static void ali_destroy_page(void * addr, int flags)
+static void ali_destroy_page(struct page *page, int flags)
 {
-	if (addr) {
+	if (page) {
 		if (flags & AGP_PAGE_DESTROY_UNMAP) {
 			global_cache_flush();	/* is this really needed?  --hch */
-			agp_generic_destroy_page(addr, flags);
+			agp_generic_destroy_page(page, flags);
 		} else
-			agp_generic_destroy_page(addr, flags);
+			agp_generic_destroy_page(page, flags);
 	}
 }
 
-static void m1541_destroy_page(void * addr, int flags)
+static void m1541_destroy_page(struct page *page, int flags)
 {
 	u32 temp;
 
-	if (addr == NULL)
+	if (page == NULL)
 		return;
 
 	if (flags & AGP_PAGE_DESTROY_UNMAP) {
@@ -180,9 +180,9 @@ static void m1541_destroy_page(void * addr, int flags)
 		pci_read_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL, &temp);
 		pci_write_config_dword(agp_bridge->dev, ALI_CACHE_FLUSH_CTRL,
 				       (((temp & ALI_CACHE_FLUSH_ADDR_MASK) |
-					 virt_to_gart(addr)) | ALI_CACHE_FLUSH_EN));
+					 phys_to_gart(page_to_phys(page))) | ALI_CACHE_FLUSH_EN));
 	}
-	agp_generic_destroy_page(addr, flags);
+	agp_generic_destroy_page(page, flags);
 }
 
 
diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c
index 3f98254b911..ba9bde71eaa 100644
--- a/drivers/char/agp/amd-k7-agp.c
+++ b/drivers/char/agp/amd-k7-agp.c
@@ -325,7 +325,7 @@ static int amd_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 		addr = (j * PAGE_SIZE) + agp_bridge->gart_bus_addr;
 		cur_gatt = GET_GATT(addr);
 		writel(agp_generic_mask_memory(agp_bridge,
-			mem->memory[i], mem->type), cur_gatt+GET_GATT_OFF(addr));
+			mem->pages[i], mem->type), cur_gatt+GET_GATT_OFF(addr));
 		readl(cur_gatt+GET_GATT_OFF(addr));	/* PCI Posting. */
 	}
 	amd_irongate_tlbflush(mem);
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index d765afda9c2..3bf5dda90f4 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -79,7 +79,7 @@ static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		tmp = agp_bridge->driver->mask_memory(agp_bridge,
-			mem->memory[i], mask_type);
+			mem->pages[i], mask_type);
 
 		BUG_ON(tmp & 0xffffff0000000ffcULL);
 		pte = (tmp & 0x000000ff00000000ULL) >> 28;
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index f1537eece07..4d38baa209f 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -296,8 +296,9 @@ static int ati_insert_memory(struct agp_memory * mem,
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		addr = (j * PAGE_SIZE) + agp_bridge->gart_bus_addr;
 		cur_gatt = GET_GATT(addr);
-		writel(agp_bridge->driver->mask_memory(agp_bridge,
-			mem->memory[i], mem->type), cur_gatt+GET_GATT_OFF(addr));
+		writel(agp_bridge->driver->mask_memory(agp_bridge,	
+						       mem->pages[i], mem->type),
+		       cur_gatt+GET_GATT_OFF(addr));
 		readl(cur_gatt+GET_GATT_OFF(addr));	/* PCI Posting. */
 	}
 	agp_bridge->driver->tlb_flush(mem);
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 8c617ad7497..cfa5a649dfe 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -141,17 +141,17 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 	bridge->version = &agp_current_version;
 
 	if (bridge->driver->needs_scratch_page) {
-		void *addr = bridge->driver->agp_alloc_page(bridge);
+		struct page *page = bridge->driver->agp_alloc_page(bridge);
 
-		if (!addr) {
+		if (!page) {
 			dev_err(&bridge->dev->dev,
 				"can't get memory for scratch page\n");
 			return -ENOMEM;
 		}
 
-		bridge->scratch_page_real = virt_to_gart(addr);
+		bridge->scratch_page_real = phys_to_gart(page_to_phys(page));
 		bridge->scratch_page =
-		    bridge->driver->mask_memory(bridge, bridge->scratch_page_real, 0);
+		    bridge->driver->mask_memory(bridge, page, 0);
 	}
 
 	size_value = bridge->driver->fetch_size();
diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index 453543a1f29..35d50f2861b 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -65,8 +65,9 @@ static const struct gatt_mask efficeon_generic_masks[] =
 };
 
 /* This function does the same thing as mask_memory() for this chipset... */
-static inline unsigned long efficeon_mask_memory(unsigned long addr)
+static inline unsigned long efficeon_mask_memory(struct page *page)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	return addr | 0x00000001;
 }
 
@@ -257,7 +258,7 @@ static int efficeon_insert_memory(struct agp_memory * mem, off_t pg_start, int t
 	last_page = NULL;
 	for (i = 0; i < count; i++) {
 		int index = pg_start + i;
-		unsigned long insert = efficeon_mask_memory(mem->memory[i]);
+		unsigned long insert = efficeon_mask_memory(mem->pages[i]);
 
 		page = (unsigned int *) efficeon_private.l1_table[index >> 10];
 
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 2224b762b7f..1e8b461b91f 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -95,13 +95,13 @@ EXPORT_SYMBOL(agp_flush_chipset);
 
 void agp_alloc_page_array(size_t size, struct agp_memory *mem)
 {
-	mem->memory = NULL;
+	mem->pages = NULL;
 	mem->vmalloc_flag = false;
 
 	if (size <= 2*PAGE_SIZE)
-		mem->memory = kmalloc(size, GFP_KERNEL | __GFP_NORETRY);
-	if (mem->memory == NULL) {
-		mem->memory = vmalloc(size);
+		mem->pages = kmalloc(size, GFP_KERNEL | __GFP_NORETRY);
+	if (mem->pages == NULL) {
+		mem->pages = vmalloc(size);
 		mem->vmalloc_flag = true;
 	}
 }
@@ -110,9 +110,9 @@ EXPORT_SYMBOL(agp_alloc_page_array);
 void agp_free_page_array(struct agp_memory *mem)
 {
 	if (mem->vmalloc_flag) {
-		vfree(mem->memory);
+		vfree(mem->pages);
 	} else {
-		kfree(mem->memory);
+		kfree(mem->pages);
 	}
 }
 EXPORT_SYMBOL(agp_free_page_array);
@@ -136,7 +136,7 @@ static struct agp_memory *agp_create_user_memory(unsigned long num_agp_pages)
 
 	agp_alloc_page_array(alloc_size, new);
 
-	if (new->memory == NULL) {
+	if (new->pages == NULL) {
 		agp_free_key(new->key);
 		kfree(new);
 		return NULL;
@@ -162,7 +162,7 @@ struct agp_memory *agp_create_memory(int scratch_pages)
 
 	agp_alloc_page_array(PAGE_SIZE * scratch_pages, new);
 
-	if (new->memory == NULL) {
+	if (new->pages == NULL) {
 		agp_free_key(new->key);
 		kfree(new);
 		return NULL;
@@ -206,15 +206,13 @@ void agp_free_memory(struct agp_memory *curr)
 		} else {
 
 			for (i = 0; i < curr->page_count; i++) {
-				curr->memory[i] = (unsigned long)gart_to_virt(
-					curr->memory[i]);
 				curr->bridge->driver->agp_destroy_page(
-					(void *)curr->memory[i],
+					curr->pages[i],
 					AGP_PAGE_DESTROY_UNMAP);
 			}
 			for (i = 0; i < curr->page_count; i++) {
 				curr->bridge->driver->agp_destroy_page(
-					(void *)curr->memory[i],
+					curr->pages[i],
 					AGP_PAGE_DESTROY_FREE);
 			}
 		}
@@ -282,13 +280,13 @@ struct agp_memory *agp_allocate_memory(struct agp_bridge_data *bridge,
 	}
 
 	for (i = 0; i < page_count; i++) {
-		void *addr = bridge->driver->agp_alloc_page(bridge);
+		struct page *page = bridge->driver->agp_alloc_page(bridge);
 
-		if (addr == NULL) {
+		if (page == NULL) {
 			agp_free_memory(new);
 			return NULL;
 		}
-		new->memory[i] = virt_to_gart(addr);
+		new->pages[i] = page;
 		new->page_count++;
 	}
 	new->bridge = bridge;
@@ -1134,7 +1132,7 @@ int agp_generic_insert_memory(struct agp_memory * mem, off_t pg_start, int type)
 	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-		writel(bridge->driver->mask_memory(bridge, mem->memory[i], mask_type),
+		writel(bridge->driver->mask_memory(bridge, mem->pages[i], mask_type),
 		       bridge->gatt_table+j);
 	}
 	readl(bridge->gatt_table+j-1);	/* PCI Posting. */
@@ -1204,7 +1202,7 @@ struct agp_memory *agp_generic_alloc_user(size_t page_count, int type)
 		return NULL;
 
 	for (i = 0; i < page_count; i++)
-		new->memory[i] = 0;
+		new->pages[i] = 0;
 	new->page_count = 0;
 	new->type = type;
 	new->num_scratch_pages = pages;
@@ -1237,23 +1235,20 @@ int agp_generic_alloc_pages(struct agp_bridge_data *bridge, struct agp_memory *m
 		get_page(page);
 		atomic_inc(&agp_bridge->current_memory_agp);
 
-		/* set_memory_array_uc() needs virtual address */
-		mem->memory[i] = (unsigned long)page_address(page);
+		mem->pages[i] = page;
 		mem->page_count++;
 	}
 
 #ifdef CONFIG_X86
-	set_memory_array_uc(mem->memory, num_pages);
+	set_pages_array_uc(mem->pages, num_pages);
 #endif
 	ret = 0;
 out:
-	for (i = 0; i < mem->page_count; i++)
-		mem->memory[i] = virt_to_gart((void *)mem->memory[i]);
 	return ret;
 }
 EXPORT_SYMBOL(agp_generic_alloc_pages);
 
-void *agp_generic_alloc_page(struct agp_bridge_data *bridge)
+struct page *agp_generic_alloc_page(struct agp_bridge_data *bridge)
 {
 	struct page * page;
 
@@ -1265,56 +1260,47 @@ void *agp_generic_alloc_page(struct agp_bridge_data *bridge)
 
 	get_page(page);
 	atomic_inc(&agp_bridge->current_memory_agp);
-	return page_address(page);
+	return page;
 }
 EXPORT_SYMBOL(agp_generic_alloc_page);
 
 void agp_generic_destroy_pages(struct agp_memory *mem)
 {
 	int i;
-	void *addr;
 	struct page *page;
 
 	if (!mem)
 		return;
 
-	for (i = 0; i < mem->page_count; i++)
-		mem->memory[i] = (unsigned long)gart_to_virt(mem->memory[i]);
-
 #ifdef CONFIG_X86
-	set_memory_array_wb(mem->memory, mem->page_count);
+	set_pages_array_wb(mem->pages, mem->page_count);
 #endif
 
 	for (i = 0; i < mem->page_count; i++) {
-		addr = (void *)mem->memory[i];
-		page = virt_to_page(addr);
+		page = mem->pages[i];
 
 #ifndef CONFIG_X86
 		unmap_page_from_agp(page);
 #endif
-
 		put_page(page);
-		free_page((unsigned long)addr);
+		__free_page(page);
 		atomic_dec(&agp_bridge->current_memory_agp);
-		mem->memory[i] = 0;
+		mem->pages[i] = NULL;
 	}
 }
 EXPORT_SYMBOL(agp_generic_destroy_pages);
 
-void agp_generic_destroy_page(void *addr, int flags)
+void agp_generic_destroy_page(struct page *page, int flags)
 {
-	struct page *page;
-
-	if (addr == NULL)
+	if (page == NULL)
 		return;
 
-	page = virt_to_page(addr);
 	if (flags & AGP_PAGE_DESTROY_UNMAP)
 		unmap_page_from_agp(page);
 
 	if (flags & AGP_PAGE_DESTROY_FREE) {
 		put_page(page);
-		free_page((unsigned long)addr);
+		__free_page(page);
 		atomic_dec(&agp_bridge->current_memory_agp);
 	}
 }
@@ -1361,8 +1347,9 @@ void global_cache_flush(void)
 EXPORT_SYMBOL(global_cache_flush);
 
 unsigned long agp_generic_mask_memory(struct agp_bridge_data *bridge,
-	unsigned long addr, int type)
+				      struct page *page, int type)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	/* memory type is ignored in the generic routine */
 	if (bridge->driver->masks)
 		return addr | bridge->driver->masks[0].mask;
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c
index 183ac3fe44f..abea273dcc2 100644
--- a/drivers/char/agp/hp-agp.c
+++ b/drivers/char/agp/hp-agp.c
@@ -361,13 +361,11 @@ hp_zx1_insert_memory (struct agp_memory *mem, off_t pg_start, int type)
 	for (i = 0, j = io_pg_start; i < mem->page_count; i++) {
 		unsigned long paddr;
 
-		paddr = mem->memory[i];
+		paddr = page_to_phys(mem->pages[i]);
 		for (k = 0;
 		     k < hp->io_pages_per_kpage;
 		     k++, j++, paddr += hp->io_page_size) {
-			hp->gatt[j] =
-				agp_bridge->driver->mask_memory(agp_bridge,
-					paddr, type);
+			hp->gatt[j] = HP_ZX1_PDIR_VALID_BIT | paddr;
 		}
 	}
 
@@ -397,8 +395,9 @@ hp_zx1_remove_memory (struct agp_memory *mem, off_t pg_start, int type)
 
 static unsigned long
 hp_zx1_mask_memory (struct agp_bridge_data *bridge,
-	unsigned long addr, int type)
+		    struct page *page, int type)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	return HP_ZX1_PDIR_VALID_BIT | addr;
 }
 
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c
index 10da687d131..60cc35bb5db 100644
--- a/drivers/char/agp/i460-agp.c
+++ b/drivers/char/agp/i460-agp.c
@@ -60,6 +60,9 @@
  */
 #define WR_FLUSH_GATT(index)	RD_GATT(index)
 
+static unsigned long i460_mask_memory (struct agp_bridge_data *bridge,
+				       unsigned long addr, int type);
+
 static struct {
 	void *gatt;				/* ioremap'd GATT area */
 
@@ -74,6 +77,7 @@ static struct {
 		unsigned long *alloced_map;	/* bitmap of kernel-pages in use */
 		int refcount;			/* number of kernel pages using the large page */
 		u64 paddr;			/* physical address of large page */
+		struct page *page; 		/* page pointer */
 	} *lp_desc;
 } i460;
 
@@ -294,7 +298,7 @@ static int i460_insert_memory_small_io_page (struct agp_memory *mem,
 	void *temp;
 
 	pr_debug("i460_insert_memory_small_io_page(mem=%p, pg_start=%ld, type=%d, paddr0=0x%lx)\n",
-		 mem, pg_start, type, mem->memory[0]);
+		 mem, pg_start, type, page_to_phys(mem->pages[0]));
 
 	if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES)
 		return -EINVAL;
@@ -321,10 +325,9 @@ static int i460_insert_memory_small_io_page (struct agp_memory *mem,
 
 	io_page_size = 1UL << I460_IO_PAGE_SHIFT;
 	for (i = 0, j = io_pg_start; i < mem->page_count; i++) {
-		paddr = mem->memory[i];
+		paddr = phys_to_gart(page_to_phys(mem->pages[i]));
 		for (k = 0; k < I460_IOPAGES_PER_KPAGE; k++, j++, paddr += io_page_size)
-			WR_GATT(j, agp_bridge->driver->mask_memory(agp_bridge,
-				paddr, mem->type));
+			WR_GATT(j, i460_mask_memory(agp_bridge, paddr, mem->type));
 	}
 	WR_FLUSH_GATT(j - 1);
 	return 0;
@@ -364,10 +367,9 @@ static int i460_alloc_large_page (struct lp_desc *lp)
 {
 	unsigned long order = I460_IO_PAGE_SHIFT - PAGE_SHIFT;
 	size_t map_size;
-	void *lpage;
 
-	lpage = (void *) __get_free_pages(GFP_KERNEL, order);
-	if (!lpage) {
+	lp->page = alloc_pages(GFP_KERNEL, order);
+	if (!lp->page) {
 		printk(KERN_ERR PFX "Couldn't alloc 4M GART page...\n");
 		return -ENOMEM;
 	}
@@ -375,12 +377,12 @@ static int i460_alloc_large_page (struct lp_desc *lp)
 	map_size = ((I460_KPAGES_PER_IOPAGE + BITS_PER_LONG - 1) & -BITS_PER_LONG)/8;
 	lp->alloced_map = kzalloc(map_size, GFP_KERNEL);
 	if (!lp->alloced_map) {
-		free_pages((unsigned long) lpage, order);
+		__free_pages(lp->page, order);
 		printk(KERN_ERR PFX "Out of memory, we're in trouble...\n");
 		return -ENOMEM;
 	}
 
-	lp->paddr = virt_to_gart(lpage);
+	lp->paddr = phys_to_gart(page_to_phys(lp->page));
 	lp->refcount = 0;
 	atomic_add(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp);
 	return 0;
@@ -391,7 +393,7 @@ static void i460_free_large_page (struct lp_desc *lp)
 	kfree(lp->alloced_map);
 	lp->alloced_map = NULL;
 
-	free_pages((unsigned long) gart_to_virt(lp->paddr), I460_IO_PAGE_SHIFT - PAGE_SHIFT);
+	__free_pages(lp->page, I460_IO_PAGE_SHIFT - PAGE_SHIFT);
 	atomic_sub(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp);
 }
 
@@ -439,8 +441,8 @@ static int i460_insert_memory_large_io_page (struct agp_memory *mem,
 			if (i460_alloc_large_page(lp) < 0)
 				return -ENOMEM;
 			pg = lp - i460.lp_desc;
-			WR_GATT(pg, agp_bridge->driver->mask_memory(agp_bridge,
-				lp->paddr, 0));
+			WR_GATT(pg, i460_mask_memory(agp_bridge,
+						     lp->paddr, 0));
 			WR_FLUSH_GATT(pg);
 		}
 
@@ -448,7 +450,7 @@ static int i460_insert_memory_large_io_page (struct agp_memory *mem,
 		     idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE);
 		     idx++, i++)
 		{
-			mem->memory[i] = lp->paddr + idx*PAGE_SIZE;
+			mem->pages[i] = lp->page;
 			__set_bit(idx, lp->alloced_map);
 			++lp->refcount;
 		}
@@ -463,7 +465,7 @@ static int i460_remove_memory_large_io_page (struct agp_memory *mem,
 	struct lp_desc *start, *end, *lp;
 	void *temp;
 
-	temp = agp_bridge->driver->current_size;
+	temp = agp_bridge->current_size;
 	num_entries = A_SIZE_8(temp)->num_entries;
 
 	/* Figure out what pg_start means in terms of our large GART pages */
@@ -477,7 +479,7 @@ static int i460_remove_memory_large_io_page (struct agp_memory *mem,
 		     idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE);
 		     idx++, i++)
 		{
-			mem->memory[i] = 0;
+			mem->pages[i] = NULL;
 			__clear_bit(idx, lp->alloced_map);
 			--lp->refcount;
 		}
@@ -521,7 +523,7 @@ static int i460_remove_memory (struct agp_memory *mem,
  * Let's just hope nobody counts on the allocated AGP memory being there before bind time
  * (I don't think current drivers do)...
  */
-static void *i460_alloc_page (struct agp_bridge_data *bridge)
+static struct page *i460_alloc_page (struct agp_bridge_data *bridge)
 {
 	void *page;
 
@@ -534,7 +536,7 @@ static void *i460_alloc_page (struct agp_bridge_data *bridge)
 	return page;
 }
 
-static void i460_destroy_page (void *page, int flags)
+static void i460_destroy_page (struct page *page, int flags)
 {
 	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
 		agp_generic_destroy_page(page, flags);
@@ -544,13 +546,20 @@ static void i460_destroy_page (void *page, int flags)
 #endif /* I460_LARGE_IO_PAGES */
 
 static unsigned long i460_mask_memory (struct agp_bridge_data *bridge,
-	unsigned long addr, int type)
+				       unsigned long addr, int type)
 {
 	/* Make sure the returned address is a valid GATT entry */
 	return bridge->driver->masks[0].mask
 		| (((addr & ~((1 << I460_IO_PAGE_SHIFT) - 1)) & 0xfffff000) >> 12);
 }
 
+static unsigned long i460_page_mask_memory(struct agp_bridge_data *bridge,
+					   struct page *page, int type)
+{
+	unsigned long addr = phys_to_gart(page_to_phys(page));
+	return i460_mask_memory(bridge, addr, type);
+}
+
 const struct agp_bridge_driver intel_i460_driver = {
 	.owner			= THIS_MODULE,
 	.aperture_sizes		= i460_sizes,
@@ -560,7 +569,7 @@ const struct agp_bridge_driver intel_i460_driver = {
 	.fetch_size		= i460_fetch_size,
 	.cleanup		= i460_cleanup,
 	.tlb_flush		= i460_tlb_flush,
-	.mask_memory		= i460_mask_memory,
+	.mask_memory		= i460_page_mask_memory,
 	.masks			= i460_masks,
 	.agp_enable		= agp_generic_enable,
 	.cache_flush		= global_cache_flush,
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 7a748fa0dfc..35977bfb699 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -257,7 +257,7 @@ static void intel_i810_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 }
 
 /* Exists to support ARGB cursors */
-static void *i8xx_alloc_pages(void)
+static struct page *i8xx_alloc_pages(void)
 {
 	struct page *page;
 
@@ -272,17 +272,14 @@ static void *i8xx_alloc_pages(void)
 	}
 	get_page(page);
 	atomic_inc(&agp_bridge->current_memory_agp);
-	return page_address(page);
+	return page;
 }
 
-static void i8xx_destroy_pages(void *addr)
+static void i8xx_destroy_pages(struct page *page)
 {
-	struct page *page;
-
-	if (addr == NULL)
+	if (page == NULL)
 		return;
 
-	page = virt_to_page(addr);
 	set_pages_wb(page, 4);
 	put_page(page);
 	__free_pages(page, 2);
@@ -346,7 +343,7 @@ static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start,
 			global_cache_flush();
 		for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 			writel(agp_bridge->driver->mask_memory(agp_bridge,
-							       mem->memory[i],
+							       mem->pages[i],
 							       mask_type),
 			       intel_private.registers+I810_PTE_BASE+(j*4));
 		}
@@ -389,37 +386,37 @@ static int intel_i810_remove_entries(struct agp_memory *mem, off_t pg_start,
 static struct agp_memory *alloc_agpphysmem_i8xx(size_t pg_count, int type)
 {
 	struct agp_memory *new;
-	void *addr;
+	struct page *page;
 
 	switch (pg_count) {
-	case 1: addr = agp_bridge->driver->agp_alloc_page(agp_bridge);
+	case 1: page = agp_bridge->driver->agp_alloc_page(agp_bridge);
 		break;
 	case 4:
 		/* kludge to get 4 physical pages for ARGB cursor */
-		addr = i8xx_alloc_pages();
+		page = i8xx_alloc_pages();
 		break;
 	default:
 		return NULL;
 	}
 
-	if (addr == NULL)
+	if (page == NULL)
 		return NULL;
 
 	new = agp_create_memory(pg_count);
 	if (new == NULL)
 		return NULL;
 
-	new->memory[0] = virt_to_gart(addr);
+	new->pages[0] = page;
 	if (pg_count == 4) {
 		/* kludge to get 4 physical pages for ARGB cursor */
-		new->memory[1] = new->memory[0] + PAGE_SIZE;
-		new->memory[2] = new->memory[1] + PAGE_SIZE;
-		new->memory[3] = new->memory[2] + PAGE_SIZE;
+		new->pages[1] = new->pages[0] + 1;
+		new->pages[2] = new->pages[1] + 1;
+		new->pages[3] = new->pages[2] + 1;
 	}
 	new->page_count = pg_count;
 	new->num_scratch_pages = pg_count;
 	new->type = AGP_PHYS_MEMORY;
-	new->physical = new->memory[0];
+	new->physical = page_to_phys(new->pages[0]);
 	return new;
 }
 
@@ -451,13 +448,11 @@ static void intel_i810_free_by_type(struct agp_memory *curr)
 	agp_free_key(curr->key);
 	if (curr->type == AGP_PHYS_MEMORY) {
 		if (curr->page_count == 4)
-			i8xx_destroy_pages(gart_to_virt(curr->memory[0]));
+			i8xx_destroy_pages(curr->pages[0]);
 		else {
-			void *va = gart_to_virt(curr->memory[0]);
-
-			agp_bridge->driver->agp_destroy_page(va,
+			agp_bridge->driver->agp_destroy_page(curr->pages[0],
 							     AGP_PAGE_DESTROY_UNMAP);
-			agp_bridge->driver->agp_destroy_page(va,
+			agp_bridge->driver->agp_destroy_page(curr->pages[0],
 							     AGP_PAGE_DESTROY_FREE);
 		}
 		agp_free_page_array(curr);
@@ -466,8 +461,9 @@ static void intel_i810_free_by_type(struct agp_memory *curr)
 }
 
 static unsigned long intel_i810_mask_memory(struct agp_bridge_data *bridge,
-	unsigned long addr, int type)
+					    struct page *page, int type)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	/* Type checking must be done elsewhere */
 	return addr | bridge->driver->masks[type].mask;
 }
@@ -855,7 +851,7 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start,
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
-						       mem->memory[i], mask_type),
+						       mem->pages[i], mask_type),
 		       intel_private.registers+I810_PTE_BASE+(j*4));
 	}
 	readl(intel_private.registers+I810_PTE_BASE+((j-1)*4));
@@ -1085,7 +1081,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start,
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
-			mem->memory[i], mask_type), intel_private.gtt+j);
+						       mem->pages[i], mask_type), intel_private.gtt+j);
 	}
 
 	readl(intel_private.gtt+j-1);
@@ -1200,8 +1196,9 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge)
  * this conditional.
  */
 static unsigned long intel_i965_mask_memory(struct agp_bridge_data *bridge,
-	unsigned long addr, int type)
+					    struct page *page, int type)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	/* Shift high bits down */
 	addr |= (addr >> 28) & 0xf0;
 
diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 16acee2de11..263d71dd441 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -225,7 +225,7 @@ static int nvidia_insert_memory(struct agp_memory *mem, off_t pg_start, int type
 	}
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
-			mem->memory[i], mask_type),
+			mem->pages[i], mask_type),
 			agp_bridge->gatt_table+nvidia_private.pg_offset+j);
 	}
 
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index 699e3422ad9..f4bb43fb801 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -31,6 +31,10 @@
 #define AGP8X_MODE_BIT		3
 #define AGP8X_MODE		(1 << AGP8X_MODE_BIT)
 
+static unsigned long
+parisc_agp_mask_memory(struct agp_bridge_data *bridge, unsigned long addr,
+		       int type);
+
 static struct _parisc_agp_info {
 	void __iomem *ioc_regs;
 	void __iomem *lba_regs;
@@ -149,12 +153,12 @@ parisc_agp_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 	for (i = 0, j = io_pg_start; i < mem->page_count; i++) {
 		unsigned long paddr;
 
-		paddr = mem->memory[i];
+		paddr = page_to_phys(mem->pages[i]);
 		for (k = 0;
 		     k < info->io_pages_per_kpage;
 		     k++, j++, paddr += info->io_page_size) {
 			info->gatt[j] =
-				agp_bridge->driver->mask_memory(agp_bridge,
+				parisc_agp_mask_memory(agp_bridge,
 					paddr, type);
 		}
 	}
@@ -185,9 +189,17 @@ parisc_agp_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
 }
 
 static unsigned long
-parisc_agp_mask_memory(struct agp_bridge_data *bridge,
-		    unsigned long addr, int type)
+parisc_agp_mask_memory(struct agp_bridge_data *bridge, unsigned long addr,
+		       int type)
+{
+	return SBA_PDIR_VALID_BIT | addr;
+}
+
+static unsigned long
+parisc_agp_page_mask_memory(struct agp_bridge_data *bridge, struct page *page,
+			    int type)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	return SBA_PDIR_VALID_BIT | addr;
 }
 
diff --git a/drivers/char/agp/sgi-agp.c b/drivers/char/agp/sgi-agp.c
index b972d83bb1b..d3ea2e4226b 100644
--- a/drivers/char/agp/sgi-agp.c
+++ b/drivers/char/agp/sgi-agp.c
@@ -38,7 +38,7 @@ static struct aper_size_info_fixed sgi_tioca_sizes[] = {
 	{0, 0, 0},
 };
 
-static void *sgi_tioca_alloc_page(struct agp_bridge_data *bridge)
+static struct page *sgi_tioca_alloc_page(struct agp_bridge_data *bridge)
 {
 	struct page *page;
 	int nid;
@@ -52,7 +52,7 @@ static void *sgi_tioca_alloc_page(struct agp_bridge_data *bridge)
 
 	get_page(page);
 	atomic_inc(&agp_bridge->current_memory_agp);
-	return page_address(page);
+	return page;
 }
 
 /*
@@ -71,8 +71,9 @@ static void sgi_tioca_tlbflush(struct agp_memory *mem)
  */
 static unsigned long
 sgi_tioca_mask_memory(struct agp_bridge_data *bridge,
-		      unsigned long addr, int type)
+		      struct page *page, int type)
 {
+	unsigned long addr = phys_to_gart(page_to_phys(page));
 	return tioca_physpage_to_gart(addr);
 }
 
@@ -189,7 +190,7 @@ static int sgi_tioca_insert_memory(struct agp_memory *mem, off_t pg_start,
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		table[j] =
-		    bridge->driver->mask_memory(bridge, mem->memory[i],
+		    bridge->driver->mask_memory(bridge, mem->pages[i],
 						mem->type);
 	}
 
diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c
index 6224df8b7f0..b964a219932 100644
--- a/drivers/char/agp/sworks-agp.c
+++ b/drivers/char/agp/sworks-agp.c
@@ -349,7 +349,7 @@ static int serverworks_insert_memory(struct agp_memory *mem,
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		addr = (j * PAGE_SIZE) + agp_bridge->gart_bus_addr;
 		cur_gatt = SVRWRKS_GET_GATT(addr);
-		writel(agp_bridge->driver->mask_memory(agp_bridge, mem->memory[i], mem->type), cur_gatt+GET_GATT_OFF(addr));
+		writel(agp_bridge->driver->mask_memory(agp_bridge, mem->pages[i], mem->type), cur_gatt+GET_GATT_OFF(addr));
 	}
 	serverworks_tlbflush(mem);
 	return 0;
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 880d3f6d5b9..f192c3b9ad4 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -173,9 +173,9 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start,
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		agp_bridge->gatt_table[j] =
-		    cpu_to_le32((mem->memory[i] & 0xFFFFF000UL) | 0x1UL);
-		flush_dcache_range((unsigned long)__va(mem->memory[i]),
-				   (unsigned long)__va(mem->memory[i])+0x1000);
+			cpu_to_le32((page_to_phys(mem->pages[i]) & 0xFFFFF000UL) | 0x1UL);
+		flush_dcache_range((unsigned long)__va(page_to_phys(mem->pages[i])),
+				   (unsigned long)__va(page_to_phys(mem->pages[i]))+0x1000);
 	}
 	(void)in_le32((volatile u32*)&agp_bridge->gatt_table[pg_start]);
 	mb();
@@ -219,9 +219,9 @@ static int u3_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 	}
 
 	for (i = 0; i < mem->page_count; i++) {
-		gp[i] = (mem->memory[i] >> PAGE_SHIFT) | 0x80000000UL;
-		flush_dcache_range((unsigned long)__va(mem->memory[i]),
-				   (unsigned long)__va(mem->memory[i])+0x1000);
+		gp[i] = (page_to_phys(mem->pages[i]) >> PAGE_SHIFT) | 0x80000000UL;
+		flush_dcache_range((unsigned long)__va(page_to_phys(mem->pages[i])),
+				   (unsigned long)__va(page_to_phys(mem->pages[i]))+0x1000);
 	}
 	mb();
 	flush_dcache_range((unsigned long)gp, (unsigned long) &gp[i]);
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 7a0d042c8d6..d68888fe3df 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -482,7 +482,7 @@ drm_agp_bind_pages(struct drm_device *dev,
 	}
 
 	for (i = 0; i < num_pages; i++)
-		mem->memory[i] = phys_to_gart(page_to_phys(pages[i]));
+		mem->pages[i] = pages[i];
 	mem->page_count = num_pages;
 
 	mem->is_flushed = true;
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 0a436184dd8..e4865f99989 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -59,10 +59,11 @@ int drm_mem_info(char *buf, char **start, off_t offset,
 static void *agp_remap(unsigned long offset, unsigned long size,
 		       struct drm_device * dev)
 {
-	unsigned long *phys_addr_map, i, num_pages =
+	unsigned long i, num_pages =
 	    PAGE_ALIGN(size) / PAGE_SIZE;
 	struct drm_agp_mem *agpmem;
 	struct page **page_map;
+	struct page **phys_page_map;
 	void *addr;
 
 	size = PAGE_ALIGN(size);
@@ -89,10 +90,9 @@ static void *agp_remap(unsigned long offset, unsigned long size,
 	if (!page_map)
 		return NULL;
 
-	phys_addr_map =
-	    agpmem->memory->memory + (offset - agpmem->bound) / PAGE_SIZE;
+	phys_page_map = (agpmem->memory->pages + (offset - agpmem->bound) / PAGE_SIZE);
 	for (i = 0; i < num_pages; ++i)
-		page_map[i] = pfn_to_page(phys_addr_map[i] >> PAGE_SHIFT);
+		page_map[i] = phys_page_map[i];
 	addr = vmap(page_map, num_pages, VM_IOREMAP, PAGE_AGP);
 	vfree(page_map);
 
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index f95d03ac978..7e1fbe5d477 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -144,14 +144,14 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		 * Get the page, inc the use count, and return it
 		 */
 		offset = (baddr - agpmem->bound) >> PAGE_SHIFT;
-		page = virt_to_page(__va(agpmem->memory->memory[offset]));
+		page = agpmem->memory->pages[offset];
 		get_page(page);
 		vmf->page = page;
 
 		DRM_DEBUG
 		    ("baddr = 0x%llx page = 0x%p, offset = 0x%llx, count=%d\n",
 		     (unsigned long long)baddr,
-		     __va(agpmem->memory->memory[offset]),
+		     agpmem->memory->pages[offset],
 		     (unsigned long long)offset,
 		     page_count(page));
 		return 0;
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index e8f6d2229d8..4648ed2f014 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -63,8 +63,7 @@ static int ttm_agp_populate(struct ttm_backend *backend,
 		if (!page)
 			page = dummy_read_page;
 
-		mem->memory[mem->page_count++] =
-		    phys_to_gart(page_to_phys(page));
+		mem->pages[mem->page_count++] = page;
 	}
 	agp_be->mem = mem;
 	return 0;
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 2b8df8b420f..76fa794fdac 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -70,7 +70,7 @@ struct agp_memory {
 	struct agp_memory *next;
 	struct agp_memory *prev;
 	struct agp_bridge_data *bridge;
-	unsigned long *memory;
+	struct page **pages;
 	size_t page_count;
 	int key;
 	int num_scratch_pages;
-- 
cgit v1.2.3-70-g09d2


From ab52ae6db035fa425f90146327ab7d2c5d3e5654 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 16 Jun 2009 04:20:53 +0300
Subject: nfsd41: Backchannel: minorversion support for the back channel

Prepare to share backchannel code with NFSv4.1.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[nfsd41: use nfsd4_cb_sequence for callback minorversion]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 3 ++-
 fs/nfsd/nfs4state.c        | 1 +
 include/linux/nfsd/state.h | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 353eb4a0b84..3fd23f7acec 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -141,6 +141,7 @@ struct nfs4_cb_compound_hdr {
 	u32		ident;
 	u32		nops;
 	__be32		*nops_p;
+	u32		minorversion;
 	u32		taglen;
 	char		*tag;
 };
@@ -209,7 +210,7 @@ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
 
 	RESERVE_SPACE(16);
 	WRITE32(0);            /* tag length is always 0 */
-	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(hdr->minorversion);
 	WRITE32(hdr->ident);
 	hdr->nops_p = p;
 	WRITE32(hdr->nops);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ef6944b19f0..980a216a48c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -984,6 +984,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
 	                 &cb->cb_addr, &cb->cb_port)))
 		goto out_err;
+	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
 	return;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 105cc100de0..f5a95fd3431 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -85,7 +85,8 @@ struct nfs4_cb_conn {
 	u32                     cb_addr;
 	unsigned short          cb_port;
 	u32                     cb_prog;
-	u32                     cb_ident;
+	u32			cb_minorversion;
+	u32                     cb_ident;	/* minorversion 0 only */
 	/* RPC client info */
 	atomic_t		cb_set;     /* successful CB_NULL call */
 	struct rpc_clnt *       cb_client;
-- 
cgit v1.2.3-70-g09d2


From 90c699a9ee4be165966d40f1837909ccb8890a68 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 19 Jun 2009 08:08:50 +0200
Subject: block: rename CONFIG_LBD to CONFIG_LBDAF

Follow-up to "block: enable by default support for large devices
and files on 32-bit archs".

Rename CONFIG_LBD to CONFIG_LBDAF to:
- allow update of existing [def]configs for "default y" change
- reflect that it is used also for large files support nowadays

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 Documentation/SubmitChecklist       |  2 +-
 Documentation/ja_JP/SubmitChecklist |  2 +-
 block/Kconfig                       |  4 ++--
 drivers/md/dm-exception-store.h     |  2 +-
 fs/ext3/resize.c                    |  2 +-
 fs/ext3/super.c                     |  2 +-
 fs/ext4/resize.c                    |  2 +-
 fs/ext4/super.c                     | 10 +++++-----
 fs/gfs2/Kconfig                     |  2 +-
 fs/ocfs2/super.c                    |  2 +-
 fs/xfs/linux-2.6/xfs_linux.h        |  2 +-
 fs/xfs/linux-2.6/xfs_super.c        |  2 +-
 include/linux/kernel.h              |  2 +-
 include/linux/types.h               |  2 +-
 14 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index ac5e0b2f109..78a9168ff37 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -54,7 +54,7 @@ kernel patches.
     CONFIG_PREEMPT.
 
 14: If the patch affects IO/Disk, etc: has been tested with and without
-    CONFIG_LBD.
+    CONFIG_LBDAF.
 
 15: All codepaths have been exercised with all lockdep features enabled.
 
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist
index 6c42e071d72..2df4576f117 100644
--- a/Documentation/ja_JP/SubmitChecklist
+++ b/Documentation/ja_JP/SubmitChecklist
@@ -75,7 +75,7 @@ Linux カーネルパッチ投稿者向けチェックリスト
     ビルドした上、動作確認を行ってください。
 
 14: もしパッチがディスクのI/O性能などに影響を与えるようであれば、
-    'CONFIG_LBD'オプションを有効にした場合と無効にした場合の両方で
+    'CONFIG_LBDAF'オプションを有効にした場合と無効にした場合の両方で
     テストを実施してみてください。
 
 15: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。
diff --git a/block/Kconfig b/block/Kconfig
index 2c39527aa7d..95a86adc33a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -23,8 +23,8 @@ menuconfig BLOCK
 
 if BLOCK
 
-config LBD
-	bool "Support for large block devices and files"
+config LBDAF
+	bool "Support for large (2TB+) block devices and files"
 	depends on !64BIT
 	default y
 	help
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 0a2e6e7f67b..c92701dc500 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -111,7 +111,7 @@ struct dm_exception_store {
 /*
  * Funtions to manipulate consecutive chunks
  */
-#  if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+#  if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
 #    define DM_CHUNK_CONSECUTIVE_BITS 8
 #    define DM_CHUNK_NUMBER_BITS 56
 
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8a0b26340b5..8359e7b3dc8 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -990,7 +990,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
 			ext3_warning(sb, __func__,
-			"CONFIG_LBD not enabled\n");
+			"CONFIG_LBDAF not enabled\n");
 		return -EINVAL;
 	}
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 26aa64dee6a..601e881e610 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1812,7 +1812,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
 			" too large to mount safely\n", sb->s_id);
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
+			printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not "
 					"enabled\n");
 		goto failed_mount;
 	}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 27eb289eea3..68b0351fc64 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1002,7 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 			" too large to resize to %llu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
-			ext4_warning(sb, __func__, "CONFIG_LBD not enabled");
+			ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
 		return -EINVAL;
 	}
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397..6e0c2d77c87 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1957,7 +1957,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
 	/* small i_blocks in vfs inode? */
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * CONFIG_LBD is not enabled implies the inode
+		 * CONFIG_LBDAF is not enabled implies the inode
 		 * i_block represent total blocks in 512 bytes
 		 * 32 == size of vfs inode i_blocks * 8
 		 */
@@ -2000,7 +2000,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * !has_huge_files or CONFIG_LBD not enabled implies that
+		 * !has_huge_files or CONFIG_LBDAF not enabled implies that
 		 * the inode i_block field represents total file blocks in
 		 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
 		 */
@@ -2436,13 +2436,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (has_huge_files) {
 		/*
 		 * Large file size enabled file system can only be
-		 * mount if kernel is build with CONFIG_LBD
+		 * mount if kernel is build with CONFIG_LBDAF
 		 */
 		if (sizeof(root->i_blocks) < sizeof(u64) &&
 				!(sb->s_flags & MS_RDONLY)) {
 			ext4_msg(sb, KERN_ERR, "Filesystem with huge "
 					"files cannot be mounted read-write "
-					"without CONFIG_LBD");
+					"without CONFIG_LBDAF");
 			goto failed_mount;
 		}
 	}
@@ -2566,7 +2566,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ext4_msg(sb, KERN_ERR, "filesystem"
 			" too large to mount safely");
 		if (sizeof(sector_t) < 8)
-			ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
+			ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
 		goto failed_mount;
 	}
 
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index cad957cdb1e..5971359d209 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,6 @@
 config GFS2_FS
 	tristate "GFS2 file system support"
-	depends on EXPERIMENTAL && (64BIT || LBD)
+	depends on EXPERIMENTAL && (64BIT || LBDAF)
 	select DLM if GFS2_FS_LOCKING_DLM
 	select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
 	select SYSFS if GFS2_FS_LOCKING_DLM
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d33767f17ba..0d3ed7407a0 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -552,7 +552,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
 	 */
 
 #if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
+# if defined(CONFIG_LBDAF)
 	BUILD_BUG_ON(sizeof(sector_t) != 8);
 	/*
 	 * We might be limited by page cache size.
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index f65a53f8752..6127e24062d 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -24,7 +24,7 @@
  * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
  * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
  */
-#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
 # define XFS_BIG_BLKNOS	1
 # define XFS_BIG_INUMS	1
 #else
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2e09efbca8d..a220d36f789 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -616,7 +616,7 @@ xfs_max_file_offset(
 	 */
 
 #if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
+# if defined(CONFIG_LBDAF)
 	ASSERT(sizeof(sector_t) == 8);
 	pagefactor = PAGE_CACHE_SIZE;
 	bitshift = BITS_PER_LONG;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c5a71c38a95..fac104e7186 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,7 +58,7 @@ extern const char linux_proc_banner[];
 #define _RET_IP_		(unsigned long)__builtin_return_address(0)
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
 
-#ifdef CONFIG_LBD
+#ifdef CONFIG_LBDAF
 # include <asm/div64.h>
 # define sector_div(a, b) do_div(a, b)
 #else
diff --git a/include/linux/types.h b/include/linux/types.h
index 5abe354020f..c42724f8c80 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -131,7 +131,7 @@ typedef		__s64		int64_t;
  *
  * blkcnt_t is the type of the inode's block count.
  */
-#ifdef CONFIG_LBD
+#ifdef CONFIG_LBDAF
 typedef u64 sector_t;
 typedef u64 blkcnt_t;
 #else
-- 
cgit v1.2.3-70-g09d2


From 0ea920d211e0a870871965418923b08da2025b4a Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2009 21:54:48 +0000
Subject: af_iucv: Return -EAGAIN if iucv msg limit is exceeded

If the iucv message limit for a communication path is exceeded,
sendmsg() returns -EAGAIN instead of -EPIPE.
The calling application can then handle this error situtation,
e.g. to try again after waiting some time.

For blocking sockets, sendmsg() waits up to the socket timeout
before returning -EAGAIN. For the new wait condition, a macro
has been introduced and the iucv_sock_wait_state() has been
refactored to this macro.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iucv/af_iucv.h |   2 -
 net/iucv/af_iucv.c         | 144 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 103 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 21ee49ffcba..f82a1e87737 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -94,8 +94,6 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 			    poll_table *wait);
 void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
 void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
-int  iucv_sock_wait_state(struct sock *sk, int state, int state2,
-			  unsigned long timeo);
 int  iucv_sock_wait_cnt(struct sock *sk, unsigned long timeo);
 void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
 void iucv_accept_unlink(struct sock *sk);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 42b7198a688..abadb4a846c 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -53,6 +53,38 @@ static const u8 iprm_shutdown[8] =
 #define CB_TRGCLS(skb)	((skb)->cb + CB_TAG_LEN) /* iucv msg target class */
 #define CB_TRGCLS_LEN	(TRGCLS_SIZE)
 
+#define __iucv_sock_wait(sk, condition, timeo, ret)			\
+do {									\
+	DEFINE_WAIT(__wait);						\
+	long __timeo = timeo;						\
+	ret = 0;							\
+	while (!(condition)) {						\
+		prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \
+		if (!__timeo) {						\
+			ret = -EAGAIN;					\
+			break;						\
+		}							\
+		if (signal_pending(current)) {				\
+			ret = sock_intr_errno(__timeo);			\
+			break;						\
+		}							\
+		release_sock(sk);					\
+		__timeo = schedule_timeout(__timeo);			\
+		lock_sock(sk);						\
+		ret = sock_error(sk);					\
+		if (ret)						\
+			break;						\
+	}								\
+	finish_wait(sk->sk_sleep, &__wait);				\
+} while (0)
+
+#define iucv_sock_wait(sk, condition, timeo)				\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__iucv_sock_wait(sk, condition, timeo, __ret);		\
+	__ret;								\
+})
 
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
@@ -121,6 +153,48 @@ static inline size_t iucv_msg_length(struct iucv_message *msg)
 	return msg->length;
 }
 
+/**
+ * iucv_sock_in_state() - check for specific states
+ * @sk:		sock structure
+ * @state:	first iucv sk state
+ * @state:	second iucv sk state
+ *
+ * Returns true if the socket in either in the first or second state.
+ */
+static int iucv_sock_in_state(struct sock *sk, int state, int state2)
+{
+	return (sk->sk_state == state || sk->sk_state == state2);
+}
+
+/**
+ * iucv_below_msglim() - function to check if messages can be sent
+ * @sk:		sock structure
+ *
+ * Returns true if the send queue length is lower than the message limit.
+ * Always returns true if the socket is not connected (no iucv path for
+ * checking the message limit).
+ */
+static inline int iucv_below_msglim(struct sock *sk)
+{
+	struct iucv_sock *iucv = iucv_sk(sk);
+
+	if (sk->sk_state != IUCV_CONNECTED)
+		return 1;
+	return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+}
+
+/**
+ * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit
+ */
+static void iucv_sock_wake_msglim(struct sock *sk)
+{
+	read_lock(&sk->sk_callback_lock);
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_all(sk->sk_sleep);
+	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+	read_unlock(&sk->sk_callback_lock);
+}
+
 /* Timers */
 static void iucv_sock_timeout(unsigned long arg)
 {
@@ -212,7 +286,9 @@ static void iucv_sock_close(struct sock *sk)
 				timeo = sk->sk_lingertime;
 			else
 				timeo = IUCV_DISCONN_TIMEOUT;
-			err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+			err = iucv_sock_wait(sk,
+					iucv_sock_in_state(sk, IUCV_CLOSED, 0),
+					timeo);
 		}
 
 	case IUCV_CLOSING:   /* fall through */
@@ -393,39 +469,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
 	return NULL;
 }
 
-int iucv_sock_wait_state(struct sock *sk, int state, int state2,
-			 unsigned long timeo)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int err = 0;
-
-	add_wait_queue(sk->sk_sleep, &wait);
-	while (sk->sk_state != state && sk->sk_state != state2) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (!timeo) {
-			err = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			err = sock_intr_errno(timeo);
-			break;
-		}
-
-		release_sock(sk);
-		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
-
-		err = sock_error(sk);
-		if (err)
-			break;
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
-	return err;
-}
-
 /* Bind an unbound socket */
 static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
 			  int addr_len)
@@ -570,8 +613,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
 	}
 
 	if (sk->sk_state != IUCV_CONNECTED) {
-		err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN,
-				sock_sndtimeo(sk, flags & O_NONBLOCK));
+		err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED,
+							    IUCV_DISCONN),
+				     sock_sndtimeo(sk, flags & O_NONBLOCK));
 	}
 
 	if (sk->sk_state == IUCV_DISCONN) {
@@ -725,9 +769,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct iucv_message txmsg;
 	struct cmsghdr *cmsg;
 	int cmsg_done;
+	long timeo;
 	char user_id[9];
 	char appl_id[9];
 	int err;
+	int noblock = msg->msg_flags & MSG_DONTWAIT;
 
 	err = sock_error(sk);
 	if (err)
@@ -799,8 +845,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 * this is fine for SOCK_SEQPACKET (unless we want to support
 	 * segmented records using the MSG_EOR flag), but
 	 * for SOCK_STREAM we might want to improve it in future */
-	skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT,
-				  &err);
+	skb = sock_alloc_send_skb(sk, len, noblock, &err);
 	if (!skb)
 		goto out;
 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
@@ -808,6 +853,18 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto fail;
 	}
 
+	/* wait if outstanding messages for iucv path has reached */
+	timeo = sock_sndtimeo(sk, noblock);
+	err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo);
+	if (err)
+		goto fail;
+
+	/* return -ECONNRESET if the socket is no longer connected */
+	if (sk->sk_state != IUCV_CONNECTED) {
+		err = -ECONNRESET;
+		goto fail;
+	}
+
 	/* increment and save iucv message tag for msg_completion cbk */
 	txmsg.tag = iucv->send_tag++;
 	memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
@@ -844,9 +901,10 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			pr_err("Application %s on z/VM guest %s"
 				" exceeds message limit\n",
 				appl_id, user_id);
-		}
+			err = -EAGAIN;
+		} else
+			err = -EPIPE;
 		skb_unlink(skb, &iucv->send_skb_q);
-		err = -EPIPE;
 		goto fail;
 	}
 
@@ -1463,7 +1521,11 @@ static void iucv_callback_txdone(struct iucv_path *path,
 
 		spin_unlock_irqrestore(&list->lock, flags);
 
-		kfree_skb(this);
+		if (this) {
+			kfree_skb(this);
+			/* wake up any process waiting for sending */
+			iucv_sock_wake_msglim(sk);
+		}
 	}
 	BUG_ON(!this);
 
-- 
cgit v1.2.3-70-g09d2


From f9188e023c248d73f5b4a589b480e065c1864068 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 18 Jun 2009 22:20:52 +0200
Subject: perf_counter: Make callchain samples extensible

Before exposing upstream tools to a callchain-samples ABI, tidy it
up to make it more extensible in the future:

Use markers in the IP chain to denote context, use (u64)-1..-4095 range
for these context markers because we use them for ERR_PTR(), so these
addresses are unlikely to be mapped.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c | 29 ++++++-----------------------
 include/linux/perf_counter.h       | 28 +++++++++++++++++-----------
 2 files changed, 23 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ce1ae3f1f86..76dfef23f78 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1555,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
  */
 
 static inline
-void callchain_store(struct perf_callchain_entry *entry, unsigned long ip)
+void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
-	if (entry->nr < MAX_STACK_DEPTH)
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
 		entry->ip[entry->nr++] = ip;
 }
 
@@ -1602,22 +1602,10 @@ static const struct stacktrace_ops backtrace_ops = {
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
-	unsigned long bp;
-	char *stack;
-	int nr = entry->nr;
-
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
 	callchain_store(entry, regs->ip);
 
-	stack = ((char *)regs + sizeof(struct pt_regs));
-#ifdef CONFIG_FRAME_POINTER
-	get_bp(bp);
-#else
-	bp = 0;
-#endif
-
-	dump_trace(NULL, regs, (void *)&stack, bp, &backtrace_ops, entry);
-
-	entry->kernel = entry->nr - nr;
+	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
 }
 
 /*
@@ -1669,16 +1657,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
 	struct stack_frame frame;
 	const void __user *fp;
-	int nr = entry->nr;
 
 	if (!user_mode(regs))
 		regs = task_pt_regs(current);
 
 	fp = (void __user *)regs->bp;
 
+	callchain_store(entry, PERF_CONTEXT_USER);
 	callchain_store(entry, regs->ip);
 
-	while (entry->nr < MAX_STACK_DEPTH) {
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;
 
@@ -1691,8 +1679,6 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		callchain_store(entry, frame.return_address);
 		fp = frame.next_frame;
 	}
-
-	entry->user = entry->nr - nr;
 }
 
 static void
@@ -1728,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 		entry = &__get_cpu_var(irq_entry);
 
 	entry->nr = 0;
-	entry->hv = 0;
-	entry->kernel = 0;
-	entry->user = 0;
 
 	perf_do_callchain(regs, entry);
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0765e8e6984..e7e7e024276 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -343,23 +343,22 @@ enum perf_event_type {
 	 *	{ u64			nr;
 	 *	  { u64 id, val; }	cnt[nr];  } && PERF_SAMPLE_GROUP
 	 *
-	 *	{ u16			nr,
-	 *				hv,
-	 *				kernel,
-	 *				user;
+	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 * };
 	 */
 };
 
-#define MAX_STACK_DEPTH			255
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
 
-struct perf_callchain_entry {
-	__u16				nr;
-	__u16				hv;
-	__u16				kernel;
-	__u16				user;
-	__u64				ip[MAX_STACK_DEPTH];
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
 #ifdef __KERNEL__
@@ -381,6 +380,13 @@ struct perf_callchain_entry {
 #include <linux/pid_namespace.h>
 #include <asm/atomic.h>
 
+#define PERF_MAX_STACK_DEPTH		255
+
+struct perf_callchain_entry {
+	__u64				nr;
+	__u64				ip[PERF_MAX_STACK_DEPTH];
+};
+
 struct task_struct;
 
 /**
-- 
cgit v1.2.3-70-g09d2


From e5289d4a181fb6c0b7a7607649af2ffdc491335c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 19 Jun 2009 13:22:51 +0200
Subject: perf_counter: Simplify and fix task migration counting

The task migrations counter was causing rare and hard to decypher
memory corruptions under load. After a day of debugging and bisection
we found that the problem was introduced with:

  3f731ca: perf_counter: Fix cpu migration counter

Turning them off fixes the crashes. Incidentally, the whole
perf_counter_task_migration() logic can be done simpler as well,
by injecting a proper sw-counter event.

This cleanup also fixed the crashes. The precise failure mode is
not completely clear yet, but we are clearly not unhappy about
having a fix ;-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 ----
 kernel/perf_counter.c        | 23 +----------------------
 kernel/sched.c               |  3 ++-
 3 files changed, 3 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e7e7e024276..89698d8aba5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -682,8 +682,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma)
 extern void perf_counter_comm(struct task_struct *tsk);
 extern void perf_counter_fork(struct task_struct *tsk);
 
-extern void perf_counter_task_migration(struct task_struct *task, int cpu);
-
 extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
 
 extern int sysctl_perf_counter_paranoid;
@@ -724,8 +722,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
-static inline void perf_counter_task_migration(struct task_struct *task,
-					       int cpu)			{ }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7e9108efd30..8d4f0dd41c2 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -124,7 +124,7 @@ void perf_enable(void)
 
 static void get_ctx(struct perf_counter_context *ctx)
 {
-	atomic_inc(&ctx->refcount);
+	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
 }
 
 static void free_ctx(struct rcu_head *head)
@@ -3467,27 +3467,6 @@ static const struct pmu perf_ops_task_clock = {
 	.read		= task_clock_perf_counter_read,
 };
 
-/*
- * Software counter: cpu migrations
- */
-void perf_counter_task_migration(struct task_struct *task, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx;
-
-	perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE,
-				 PERF_COUNT_SW_CPU_MIGRATIONS,
-				 1, 1, NULL, 0);
-
-	ctx = perf_pin_task_context(task);
-	if (ctx) {
-		perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE,
-					 PERF_COUNT_SW_CPU_MIGRATIONS,
-					 1, 1, NULL, 0);
-		perf_unpin_context(ctx);
-	}
-}
-
 #ifdef CONFIG_EVENT_PROFILE
 void perf_tpcounter_event(int event_id)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 8fb88a906aa..f46540b359c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
 #endif
-		perf_counter_task_migration(p, new_cpu);
+		perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+				     1, 1, NULL, 0);
 	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
 					 new_cfsrq->min_vruntime;
-- 
cgit v1.2.3-70-g09d2


From 9844813f226f6d07e1544e915529cb88f4fcb868 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sun, 14 Jun 2009 02:00:02 -0400
Subject: asm-generic: uaccess: add missing access_ok() check to strnlen_user()

The strnlen_user() function was missing a access_ok() check on the pointer
given.  We've had cases on Blackfin systems where test programs caused
kernel crashes here because userspace passed up a NULL/-1 pointer and the
kernel gladly attempted to run strlen() on it.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/uaccess.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index 6d8cab22e29..5dd511b62ce 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -291,6 +291,8 @@ strncpy_from_user(char *dst, const char __user *src, long count)
 #ifndef strnlen_user
 static inline long strnlen_user(const char __user *src, long n)
 {
+	if (!access_ok(VERIFY_READ, src, 1))
+		return 0;
 	return strlen((void * __force)src) + 1;
 }
 #endif
-- 
cgit v1.2.3-70-g09d2


From a9ede5b355aabd667ed690f858c925a23927027b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Sun, 14 Jun 2009 02:00:03 -0400
Subject: asm-generic: uaccess: fix up local access_ok() usage

There's no reason that I can see to use the short __access_ok() form
directly when the access_ok() is clearer in intent and for most people,
expands to the same C code (i.e. always specify the first field -- access
type).  Not all no-mmu systems lack memory protection, so the read/write
could feasibly be checked.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/uaccess.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index 5dd511b62ce..b218b8513d0 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -163,7 +163,7 @@ static inline __must_check long __copy_to_user(void __user *to,
 #define put_user(x, ptr)					\
 ({								\
 	might_sleep();						\
-	__access_ok(ptr, sizeof (*ptr)) ?			\
+	access_ok(VERIFY_WRITE, ptr, sizeof(*ptr)) ?		\
 		__put_user(x, ptr) :				\
 		-EFAULT;					\
 })
@@ -219,7 +219,7 @@ extern int __put_user_bad(void) __attribute__((noreturn));
 #define get_user(x, ptr)					\
 ({								\
 	might_sleep();						\
-	__access_ok(ptr, sizeof (*ptr)) ?			\
+	access_ok(VERIFY_READ, ptr, sizeof(*ptr)) ?		\
 		__get_user(x, ptr) :				\
 		-EFAULT;					\
 })
@@ -244,7 +244,7 @@ static inline long copy_from_user(void *to,
 		const void __user * from, unsigned long n)
 {
 	might_sleep();
-	if (__access_ok(from, n))
+	if (access_ok(VERIFY_READ, from, n))
 		return __copy_from_user(to, from, n);
 	else
 		return n;
@@ -254,7 +254,7 @@ static inline long copy_to_user(void __user *to,
 		const void *from, unsigned long n)
 {
 	might_sleep();
-	if (__access_ok(to, n))
+	if (access_ok(VERIFY_WRITE, to, n))
 		return __copy_to_user(to, from, n);
 	else
 		return n;
@@ -278,7 +278,7 @@ __strncpy_from_user(char *dst, const char __user *src, long count)
 static inline long
 strncpy_from_user(char *dst, const char __user *src, long count)
 {
-	if (!__access_ok(src, 1))
+	if (!access_ok(VERIFY_READ, src, 1))
 		return -EFAULT;
 	return __strncpy_from_user(dst, src, count);
 }
@@ -318,7 +318,7 @@ static inline __must_check unsigned long
 clear_user(void __user *to, unsigned long n)
 {
 	might_sleep();
-	if (!__access_ok(to, n))
+	if (!access_ok(VERIFY_WRITE, to, n))
 		return n;
 
 	return __clear_user(to, n);
-- 
cgit v1.2.3-70-g09d2


From 804387a1af87f66a4b93eee230ba98f8b906b088 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 14 Jun 2009 22:38:11 +0200
Subject: asm-generic: drop HARDIRQ_BITS definition from hardirq.h

Architechtures normally don't need to set a HARDIRQ_BITS
unless they have hardcoded a specific value in assembly.
This drops the definition from asm-generic/hardirq.h, which
results in linux/hardirq.h setting its default of 10.

Both the old default of 8 and the linux/hardirq.h default
of 10 are sufficient because they only limit the number
of nested hardirqs, and we normally run out of stack space
much earlier than exceeding 256 or even 1024 nested interrupts.

Reported-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/hardirq.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 3d5d2c906ab..23bb4dad496 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -11,19 +11,6 @@ typedef struct {
 
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
-#ifndef HARDIRQ_BITS
-#define HARDIRQ_BITS	8
-#endif
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #ifndef ack_bad_irq
 static inline void ack_bad_irq(unsigned int irq)
 {
-- 
cgit v1.2.3-70-g09d2


From fcec9bf12442d0cd50d6cee125d168cfc3f37c5e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 19 Jun 2009 10:30:29 +0200
Subject: asm-generic: hook up new system calls

sys_rt_tgsigqueueinfo and sys_perf_counter_open
have been added in 2.6.31, so hook them up in the
generic unistd.h file.

Since the file is now in the mainline kernel, we
are no longer reordering the numbers but just add
system calls at the end.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/unistd.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 5b34b6233d6..1125e5a1ee5 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -618,8 +618,13 @@ __SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 __SYSCALL(__NR_move_pages, sys_move_pages)
 #endif
 
+#define __NR_rt_tgsigqueueinfo 240
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
+#define __NR_perf_counter_open 241
+__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+
 #undef __NR_syscalls
-#define __NR_syscalls 240
+#define __NR_syscalls 242
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.2.3-70-g09d2


From 352da9820e5506e3b8496e6052a2ad9c488efae8 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:17 +0200
Subject: i2c: Kill client_register and client_unregister methods

These methods were useful in the legacy binding model but no longer in
the new (standard) binding model. There are no users left so we can
drop them.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
---
 Documentation/feature-removal-schedule.txt |  3 +--
 drivers/i2c/i2c-core.c                     | 30 +-----------------------------
 include/linux/i2c.h                        |  4 ----
 3 files changed, 2 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 8d07ed31207..9163dade070 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -368,8 +368,7 @@ Who:  Krzysztof Piotr Oledzki <ole@ans.pl>
 
 ---------------------------
 
-What:	i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client(),
-	i2c_adapter->client_register(), i2c_adapter->client_unregister
+What:	i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
 When:	2.6.30
 Check:	i2c_attach_client i2c_detach_client
 Why:	Deprecated by the new (standard) device driver binding model. Use
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 5ed622ee65c..fc18fdbffd3 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -308,14 +308,6 @@ void i2c_unregister_device(struct i2c_client *client)
 		return;
 	}
 
-	if (adapter->client_unregister) {
-		if (adapter->client_unregister(client)) {
-			dev_warn(&client->dev,
-				 "client_unregister [%s] failed\n",
-				 client->name);
-		}
-	}
-
 	mutex_lock(&adapter->clist_lock);
 	list_del(&client->list);
 	mutex_unlock(&adapter->clist_lock);
@@ -855,14 +847,6 @@ int i2c_attach_client(struct i2c_client *client)
 	dev_dbg(&adapter->dev, "client [%s] registered with bus id %s\n",
 		client->name, dev_name(&client->dev));
 
-	if (adapter->client_register)  {
-		if (adapter->client_register(client)) {
-			dev_dbg(&adapter->dev, "client_register "
-				"failed for client [%s] at 0x%02x\n",
-				client->name, client->addr);
-		}
-	}
-
 	return 0;
 
 out_err:
@@ -875,17 +859,6 @@ EXPORT_SYMBOL(i2c_attach_client);
 int i2c_detach_client(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
-	int res = 0;
-
-	if (adapter->client_unregister)  {
-		res = adapter->client_unregister(client);
-		if (res) {
-			dev_err(&client->dev,
-				"client_unregister [%s] failed, "
-				"client not detached\n", client->name);
-			goto out;
-		}
-	}
 
 	mutex_lock(&adapter->clist_lock);
 	list_del(&client->list);
@@ -895,8 +868,7 @@ int i2c_detach_client(struct i2c_client *client)
 	device_unregister(&client->dev);
 	wait_for_completion(&client->released);
 
- out:
-	return res;
+	return 0;
 }
 EXPORT_SYMBOL(i2c_detach_client);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index ad258059603..b3f4606afa0 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -352,10 +352,6 @@ struct i2c_adapter {
 	const struct i2c_algorithm *algo; /* the algorithm to access the bus */
 	void *algo_data;
 
-	/* --- administration stuff. */
-	int (*client_register)(struct i2c_client *) __deprecated;
-	int (*client_unregister)(struct i2c_client *) __deprecated;
-
 	/* data fields that are valid for all devices	*/
 	u8 level; 			/* nesting level for lockdep */
 	struct mutex bus_lock;
-- 
cgit v1.2.3-70-g09d2


From 729d6dd571464954f625e6b80950d9e4e3bd94f7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:18 +0200
Subject: i2c: Get rid of the legacy binding model

We converted all the legacy i2c drivers so we can finally get rid of
the legacy binding model. Hooray!

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
---
 Documentation/feature-removal-schedule.txt |  9 ----
 Documentation/i2c/writing-clients          | 16 ++----
 drivers/i2c/i2c-core.c                     | 83 +++---------------------------
 include/linux/i2c.h                        | 39 ++++----------
 4 files changed, 20 insertions(+), 127 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 9163dade070..f8cd450be9a 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -368,15 +368,6 @@ Who:  Krzysztof Piotr Oledzki <ole@ans.pl>
 
 ---------------------------
 
-What:	i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
-When:	2.6.30
-Check:	i2c_attach_client i2c_detach_client
-Why:	Deprecated by the new (standard) device driver binding model. Use
-	i2c_driver->probe() and ->remove() instead.
-Who:	Jean Delvare <khali@linux-fr.org>
-
----------------------------
-
 What:	fscher and fscpos drivers
 When:	June 2009
 Why:	Deprecated by the new fschmd driver.
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index c1a06f989cf..7860aafb483 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -126,19 +126,9 @@ different) configuration information, as do drivers handling chip variants
 that can't be distinguished by protocol probing, or which need some board
 specific information to operate correctly.
 
-Accordingly, the I2C stack now has two models for associating I2C devices
-with their drivers:  the original "legacy" model, and a newer one that's
-fully compatible with the Linux 2.6 driver model.  These models do not mix,
-since the "legacy" model requires drivers to create "i2c_client" device
-objects after SMBus style probing, while the Linux driver model expects
-drivers to be given such device objects in their probe() routines.
 
-The legacy model is deprecated now and will soon be removed, so we no
-longer document it here.
-
-
-Standard Driver Model Binding ("New Style")
--------------------------------------------
+Device/Driver Binding
+---------------------
 
 System infrastructure, typically board-specific initialization code or
 boot firmware, reports what I2C devices exist.  For example, there may be
@@ -201,7 +191,7 @@ a given I2C bus.  This is for example the case of hardware monitoring
 devices on a PC's SMBus.  In that case, you may want to let your driver
 detect supported devices automatically.  This is how the legacy model
 was working, and is now available as an extension to the standard
-driver model (so that we can finally get rid of the legacy model.)
+driver model.
 
 You simply have to define a detect callback which will attempt to
 identify supported devices (returning 0 for supported ones and -ENODEV
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index fc18fdbffd3..dc8bc913144 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -43,6 +43,7 @@ static DEFINE_IDR(i2c_adapter_idr);
 
 #define is_newstyle_driver(d) ((d)->probe || (d)->remove || (d)->detect)
 
+static int i2c_attach_client(struct i2c_client *client);
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 
 /* ------------------------------------------------------------------------- */
@@ -83,10 +84,6 @@ static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct i2c_client	*client = to_i2c_client(dev);
 
-	/* by definition, legacy drivers can't hotplug */
-	if (dev->driver)
-		return 0;
-
 	if (add_uevent_var(env, "MODALIAS=%s%s",
 			   I2C_MODULE_PREFIX, client->name))
 		return -ENOMEM;
@@ -455,7 +452,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 
 	dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
 
-	/* create pre-declared device nodes for new-style drivers */
+	/* create pre-declared device nodes */
 	if (adap->nr < __i2c_first_dynamic_bus_num)
 		i2c_scan_static_board_info(adap);
 
@@ -617,26 +614,9 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 	if (res)
 		goto out_unlock;
 
-	/* detach any active clients. This must be done first, because
-	 * it can fail; in which case we give up. */
+	/* Detach any active clients */
 	list_for_each_entry_safe_reverse(client, _n, &adap->clients, list) {
-		struct i2c_driver	*driver;
-
-		driver = client->driver;
-
-		/* new style, follow standard driver model */
-		if (!driver || is_newstyle_driver(driver)) {
-			i2c_unregister_device(client);
-			continue;
-		}
-
-		/* legacy drivers create and remove clients themselves */
-		if ((res = driver->detach_client(client))) {
-			dev_err(&adap->dev, "detach_client failed for client "
-				"[%s] at address 0x%02x\n", client->name,
-				client->addr);
-			goto out_unlock;
-		}
+		i2c_unregister_device(client);
 	}
 
 	/* clean up the sysfs representation */
@@ -680,11 +660,7 @@ static int __attach_adapter(struct device *dev, void *data)
 
 /*
  * An i2c_driver is used with one or more i2c_client (device) nodes to access
- * i2c slave chips, on a bus instance associated with some i2c_adapter.  There
- * are two models for binding the driver to its device:  "new style" drivers
- * follow the standard Linux driver model and just respond to probe() calls
- * issued if the driver core sees they match(); "legacy" drivers create device
- * nodes themselves.
+ * i2c slave chips, on a bus instance associated with some i2c_adapter.
  */
 
 int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
@@ -695,21 +671,11 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
 	if (unlikely(WARN_ON(!i2c_bus_type.p)))
 		return -EAGAIN;
 
-	/* new style driver methods can't mix with legacy ones */
-	if (is_newstyle_driver(driver)) {
-		if (driver->detach_adapter || driver->detach_client) {
-			printk(KERN_WARNING
-					"i2c-core: driver [%s] is confused\n",
-					driver->driver.name);
-			return -EINVAL;
-		}
-	}
-
 	/* add the driver to the list of i2c drivers in the driver core */
 	driver->driver.owner = owner;
 	driver->driver.bus = &i2c_bus_type;
 
-	/* for new style drivers, when registration returns the driver core
+	/* When registration returns, the driver core
 	 * will have called probe() for all matching-but-unbound devices.
 	 */
 	res = driver_register(&driver->driver);
@@ -748,29 +714,11 @@ static int __detach_adapter(struct device *dev, void *data)
 	if (is_newstyle_driver(driver))
 		return 0;
 
-	/* Have a look at each adapter, if clients of this driver are still
-	 * attached. If so, detach them to be able to kill the driver
-	 * afterwards.
-	 */
 	if (driver->detach_adapter) {
 		if (driver->detach_adapter(adapter))
 			dev_err(&adapter->dev,
 				"detach_adapter failed for driver [%s]\n",
 				driver->driver.name);
-	} else {
-		struct i2c_client *client, *_n;
-
-		list_for_each_entry_safe(client, _n, &adapter->clients, list) {
-			if (client->driver != driver)
-				continue;
-			dev_dbg(&adapter->dev,
-				"detaching client [%s] at 0x%02x\n",
-				client->name, client->addr);
-			if (driver->detach_client(client))
-				dev_err(&adapter->dev, "detach_client "
-					"failed for client [%s] at 0x%02x\n",
-					client->name, client->addr);
-		}
 	}
 
 	return 0;
@@ -812,7 +760,7 @@ static int i2c_check_addr(struct i2c_adapter *adapter, int addr)
 	return device_for_each_child(&adapter->dev, &addr, __i2c_check_addr);
 }
 
-int i2c_attach_client(struct i2c_client *client)
+static int i2c_attach_client(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int res;
@@ -854,23 +802,6 @@ out_err:
 		"(%d)\n", client->name, client->addr, res);
 	return res;
 }
-EXPORT_SYMBOL(i2c_attach_client);
-
-int i2c_detach_client(struct i2c_client *client)
-{
-	struct i2c_adapter *adapter = client->adapter;
-
-	mutex_lock(&adapter->clist_lock);
-	list_del(&client->list);
-	mutex_unlock(&adapter->clist_lock);
-
-	init_completion(&client->released);
-	device_unregister(&client->dev);
-	wait_for_completion(&client->released);
-
-	return 0;
-}
-EXPORT_SYMBOL(i2c_detach_client);
 
 /**
  * i2c_use_client - increments the reference count of the i2c client structure
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b3f4606afa0..43a3545670b 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -100,9 +100,8 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
  * @class: What kind of i2c device we instantiate (for detect)
  * @attach_adapter: Callback for bus addition (for legacy drivers)
  * @detach_adapter: Callback for bus removal (for legacy drivers)
- * @detach_client: Callback for device removal (for legacy drivers)
- * @probe: Callback for device binding (new-style drivers)
- * @remove: Callback for device unbinding (new-style drivers)
+ * @probe: Callback for device binding
+ * @remove: Callback for device unbinding
  * @shutdown: Callback for device shutdown
  * @suspend: Callback for device suspend
  * @resume: Callback for device resume
@@ -137,26 +136,14 @@ struct i2c_driver {
 	int id;
 	unsigned int class;
 
-	/* Notifies the driver that a new bus has appeared. This routine
-	 * can be used by the driver to test if the bus meets its conditions
-	 * & seek for the presence of the chip(s) it supports. If found, it
-	 * registers the client(s) that are on the bus to the i2c admin. via
-	 * i2c_attach_client.  (LEGACY I2C DRIVERS ONLY)
+	/* Notifies the driver that a new bus has appeared or is about to be
+	 * removed. You should avoid using this if you can, it will probably
+	 * be removed in a near future.
 	 */
 	int (*attach_adapter)(struct i2c_adapter *);
 	int (*detach_adapter)(struct i2c_adapter *);
 
-	/* tells the driver that a client is about to be deleted & gives it
-	 * the chance to remove its private data. Also, if the client struct
-	 * has been dynamically allocated by the driver in the function above,
-	 * it must be freed here.  (LEGACY I2C DRIVERS ONLY)
-	 */
-	int (*detach_client)(struct i2c_client *) __deprecated;
-
-	/* Standard driver model interfaces, for "new style" i2c drivers.
-	 * With the driver model, device enumeration is NEVER done by drivers;
-	 * it's done by infrastructure.  (NEW STYLE DRIVERS ONLY)
-	 */
+	/* Standard driver model interfaces */
 	int (*probe)(struct i2c_client *, const struct i2c_device_id *);
 	int (*remove)(struct i2c_client *);
 
@@ -248,11 +235,10 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data)
  * that, such as chip type, configuration, associated IRQ, and so on.
  *
  * i2c_board_info is used to build tables of information listing I2C devices
- * that are present.  This information is used to grow the driver model tree
- * for "new style" I2C drivers.  For mainboards this is done statically using
- * i2c_register_board_info(); bus numbers identify adapters that aren't
- * yet available.  For add-on boards, i2c_new_device() does this dynamically
- * with the adapter already known.
+ * that are present.  This information is used to grow the driver model tree.
+ * For mainboards this is done statically using i2c_register_board_info();
+ * bus numbers identify adapters that aren't yet available.  For add-on boards,
+ * i2c_new_device() does this dynamically with the adapter already known.
  */
 struct i2c_board_info {
 	char		type[I2C_NAME_SIZE];
@@ -425,11 +411,6 @@ static inline int i2c_add_driver(struct i2c_driver *driver)
 	return i2c_register_driver(THIS_MODULE, driver);
 }
 
-/* These are deprecated, your driver should use the standard .probe()
- * and .remove() methods instead. */
-extern int __deprecated i2c_attach_client(struct i2c_client *);
-extern int __deprecated i2c_detach_client(struct i2c_client *);
-
 extern struct i2c_client *i2c_use_client(struct i2c_client *client);
 extern void i2c_release_client(struct i2c_client *client);
 
-- 
cgit v1.2.3-70-g09d2


From 36789b5ea52bba961122b45f4383f553ec3b5a6c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:18 +0200
Subject: i2c: Drop i2c_probe function

The legacy i2c_probe() function has no users left, get rid of it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
---
 drivers/i2c/i2c-core.c | 137 -------------------------------------------------
 include/linux/i2c.h    |   8 ---
 2 files changed, 145 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index dc8bc913144..d48438908e1 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1032,144 +1032,7 @@ EXPORT_SYMBOL(i2c_master_recv);
  * Will not work for 10-bit addresses!
  * ----------------------------------------------------
  */
-static int i2c_probe_address(struct i2c_adapter *adapter, int addr, int kind,
-			     int (*found_proc) (struct i2c_adapter *, int, int))
-{
-	int err;
-
-	/* Make sure the address is valid */
-	if (addr < 0x03 || addr > 0x77) {
-		dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
-			 addr);
-		return -EINVAL;
-	}
-
-	/* Skip if already in use */
-	if (i2c_check_addr(adapter, addr))
-		return 0;
-
-	/* Make sure there is something at this address, unless forced */
-	if (kind < 0) {
-		if (i2c_smbus_xfer(adapter, addr, 0, 0, 0,
-				   I2C_SMBUS_QUICK, NULL) < 0)
-			return 0;
-
-		/* prevent 24RF08 corruption */
-		if ((addr & ~0x0f) == 0x50)
-			i2c_smbus_xfer(adapter, addr, 0, 0, 0,
-				       I2C_SMBUS_QUICK, NULL);
-	}
-
-	/* Finally call the custom detection function */
-	err = found_proc(adapter, addr, kind);
-	/* -ENODEV can be returned if there is a chip at the given address
-	   but it isn't supported by this chip driver. We catch it here as
-	   this isn't an error. */
-	if (err == -ENODEV)
-		err = 0;
-
-	if (err)
-		dev_warn(&adapter->dev, "Client creation failed at 0x%x (%d)\n",
-			 addr, err);
-	return err;
-}
-
-int i2c_probe(struct i2c_adapter *adapter,
-	      const struct i2c_client_address_data *address_data,
-	      int (*found_proc) (struct i2c_adapter *, int, int))
-{
-	int i, err;
-	int adap_id = i2c_adapter_id(adapter);
-
-	/* Force entries are done first, and are not affected by ignore
-	   entries */
-	if (address_data->forces) {
-		const unsigned short * const *forces = address_data->forces;
-		int kind;
-
-		for (kind = 0; forces[kind]; kind++) {
-			for (i = 0; forces[kind][i] != I2C_CLIENT_END;
-			     i += 2) {
-				if (forces[kind][i] == adap_id
-				 || forces[kind][i] == ANY_I2C_BUS) {
-					dev_dbg(&adapter->dev, "found force "
-						"parameter for adapter %d, "
-						"addr 0x%02x, kind %d\n",
-						adap_id, forces[kind][i + 1],
-						kind);
-					err = i2c_probe_address(adapter,
-						forces[kind][i + 1],
-						kind, found_proc);
-					if (err)
-						return err;
-				}
-			}
-		}
-	}
-
-	/* Stop here if we can't use SMBUS_QUICK */
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) {
-		if (address_data->probe[0] == I2C_CLIENT_END
-		 && address_data->normal_i2c[0] == I2C_CLIENT_END)
-			return 0;
-
-		dev_dbg(&adapter->dev, "SMBus Quick command not supported, "
-			"can't probe for chips\n");
-		return -EOPNOTSUPP;
-	}
-
-	/* Probe entries are done second, and are not affected by ignore
-	   entries either */
-	for (i = 0; address_data->probe[i] != I2C_CLIENT_END; i += 2) {
-		if (address_data->probe[i] == adap_id
-		 || address_data->probe[i] == ANY_I2C_BUS) {
-			dev_dbg(&adapter->dev, "found probe parameter for "
-				"adapter %d, addr 0x%02x\n", adap_id,
-				address_data->probe[i + 1]);
-			err = i2c_probe_address(adapter,
-						address_data->probe[i + 1],
-						-1, found_proc);
-			if (err)
-				return err;
-		}
-	}
-
-	/* Normal entries are done last, unless shadowed by an ignore entry */
-	for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) {
-		int j, ignore;
-
-		ignore = 0;
-		for (j = 0; address_data->ignore[j] != I2C_CLIENT_END;
-		     j += 2) {
-			if ((address_data->ignore[j] == adap_id ||
-			     address_data->ignore[j] == ANY_I2C_BUS)
-			 && address_data->ignore[j + 1]
-			    == address_data->normal_i2c[i]) {
-				dev_dbg(&adapter->dev, "found ignore "
-					"parameter for adapter %d, "
-					"addr 0x%02x\n", adap_id,
-					address_data->ignore[j + 1]);
-				ignore = 1;
-				break;
-			}
-		}
-		if (ignore)
-			continue;
-
-		dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
-			"addr 0x%02x\n", adap_id,
-			address_data->normal_i2c[i]);
-		err = i2c_probe_address(adapter, address_data->normal_i2c[i],
-					-1, found_proc);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(i2c_probe);
 
-/* Separate detection function for new-style drivers */
 static int i2c_detect_address(struct i2c_client *temp_client, int kind,
 			      struct i2c_driver *driver)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 43a3545670b..db25a870843 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -419,14 +419,6 @@ extern void i2c_release_client(struct i2c_client *client);
 extern void i2c_clients_command(struct i2c_adapter *adap,
 				unsigned int cmd, void *arg);
 
-/* Detect function. It iterates over all possible addresses itself.
- * It will only call found_proc if some client is connected at the
- * specific address (unless a 'force' matched);
- */
-extern int i2c_probe(struct i2c_adapter *adapter,
-		const struct i2c_client_address_data *address_data,
-		int (*found_proc) (struct i2c_adapter *, int, int));
-
 extern struct i2c_adapter *i2c_get_adapter(int id);
 extern void i2c_put_adapter(struct i2c_adapter *adap);
 
-- 
cgit v1.2.3-70-g09d2


From 1e40ac12dab22d98d0178e87364cf4e36862809c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:19 +0200
Subject: i2c: Kill is_newstyle_driver

Legacy i2c drivers are gone, all drivers are new-style now, so there
is no point to check.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
---
 drivers/i2c/i2c-core.c | 32 +-------------------------------
 include/linux/i2c.h    |  2 --
 2 files changed, 1 insertion(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 06c428b5822..95fb997b41e 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -41,8 +41,6 @@
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
 
-#define is_newstyle_driver(d) ((d)->probe || (d)->remove || (d)->detect)
-
 static int i2c_check_addr(struct i2c_adapter *adapter, int addr);
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 
@@ -64,12 +62,6 @@ static int i2c_device_match(struct device *dev, struct device_driver *drv)
 	struct i2c_client	*client = to_i2c_client(dev);
 	struct i2c_driver	*driver = to_i2c_driver(drv);
 
-	/* make legacy i2c drivers bypass driver model probing entirely;
-	 * such drivers scan each i2c adapter/bus themselves.
-	 */
-	if (!is_newstyle_driver(driver))
-		return 0;
-
 	/* match on an id table if there is one */
 	if (driver->id_table)
 		return i2c_match_id(driver->id_table, client) != NULL;
@@ -172,12 +164,6 @@ static int i2c_device_resume(struct device *dev)
 	return driver->resume(to_i2c_client(dev));
 }
 
-static void i2c_client_release(struct device *dev)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	complete(&client->released);
-}
-
 static void i2c_client_dev_release(struct device *dev)
 {
 	kfree(to_i2c_client(dev));
@@ -282,12 +268,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 
 	client->dev.parent = &client->adapter->dev;
 	client->dev.bus = &i2c_bus_type;
-
-	if (client->driver && !is_newstyle_driver(client->driver)) {
-		client->dev.release = i2c_client_release;
-		dev_set_uevent_suppress(&client->dev, 1);
-	} else
-		client->dev.release = i2c_client_dev_release;
+	client->dev.release = i2c_client_dev_release;
 
 	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
 		     client->addr);
@@ -321,14 +302,6 @@ EXPORT_SYMBOL_GPL(i2c_new_device);
 void i2c_unregister_device(struct i2c_client *client)
 {
 	struct i2c_adapter	*adapter = client->adapter;
-	struct i2c_driver	*driver = client->driver;
-
-	if (driver && !is_newstyle_driver(driver)) {
-		dev_err(&client->dev, "can't unregister devices "
-			"with legacy drivers\n");
-		WARN_ON(1);
-		return;
-	}
 
 	mutex_lock(&adapter->clist_lock);
 	list_del(&client->list);
@@ -736,9 +709,6 @@ static int __detach_adapter(struct device *dev, void *data)
 		i2c_unregister_device(client);
 	}
 
-	if (is_newstyle_driver(driver))
-		return 0;
-
 	if (driver->detach_adapter) {
 		if (driver->detach_adapter(adapter))
 			dev_err(&adapter->dev,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index db25a870843..28b27282496 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -180,7 +180,6 @@ struct i2c_driver {
  * @irq: indicates the IRQ generated by this device (if any)
  * @list: list of active/busy clients (DEPRECATED)
  * @detected: member of an i2c_driver.clients list
- * @released: used to synchronize client releases & detaches and references
  *
  * An i2c_client identifies a single device (i.e. chip) connected to an
  * i2c bus. The behaviour exposed to Linux is defined by the driver
@@ -198,7 +197,6 @@ struct i2c_client {
 	int irq;			/* irq issued by device		*/
 	struct list_head list;		/* DEPRECATED */
 	struct list_head detected;
-	struct completion released;
 };
 #define to_i2c_client(d) container_of(d, struct i2c_client, dev)
 
-- 
cgit v1.2.3-70-g09d2


From e549c2b54dd90a056d6824b885d438b7437874f0 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:19 +0200
Subject: i2c: Kill the redundant client list

We used to maintain our own per-adapter list of i2c clients, but this
is redundant with what the driver core does, and no longer needed.
Just drop the redundant list.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
---
 drivers/i2c/i2c-core.c | 28 +++++++++++-----------------
 include/linux/i2c.h    |  4 ----
 2 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 95fb997b41e..b1fd00d9a5c 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -276,10 +276,6 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 	if (status)
 		goto out_err;
 
-	mutex_lock(&adap->clist_lock);
-	list_add_tail(&client->list, &adap->clients);
-	mutex_unlock(&adap->clist_lock);
-
 	dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
 		client->name, dev_name(&client->dev));
 
@@ -301,12 +297,6 @@ EXPORT_SYMBOL_GPL(i2c_new_device);
  */
 void i2c_unregister_device(struct i2c_client *client)
 {
-	struct i2c_adapter	*adapter = client->adapter;
-
-	mutex_lock(&adapter->clist_lock);
-	list_del(&client->list);
-	mutex_unlock(&adapter->clist_lock);
-
 	device_unregister(&client->dev);
 }
 EXPORT_SYMBOL_GPL(i2c_unregister_device);
@@ -432,8 +422,6 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 		return -EAGAIN;
 
 	mutex_init(&adap->bus_lock);
-	mutex_init(&adap->clist_lock);
-	INIT_LIST_HEAD(&adap->clients);
 
 	mutex_lock(&core_lock);
 
@@ -583,6 +571,14 @@ static int i2c_do_del_adapter(struct device_driver *d, void *data)
 	return res;
 }
 
+static int __unregister_client(struct device *dev, void *dummy)
+{
+	struct i2c_client *client = i2c_verify_client(dev);
+	if (client)
+		i2c_unregister_device(client);
+	return 0;
+}
+
 /**
  * i2c_del_adapter - unregister I2C adapter
  * @adap: the adapter being unregistered
@@ -593,7 +589,6 @@ static int i2c_do_del_adapter(struct device_driver *d, void *data)
  */
 int i2c_del_adapter(struct i2c_adapter *adap)
 {
-	struct i2c_client *client, *_n;
 	int res = 0;
 
 	mutex_lock(&core_lock);
@@ -612,10 +607,9 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 	if (res)
 		goto out_unlock;
 
-	/* Detach any active clients */
-	list_for_each_entry_safe_reverse(client, _n, &adap->clients, list) {
-		i2c_unregister_device(client);
-	}
+	/* Detach any active clients. This can't fail, thus we do not
+	   checking the returned value. */
+	res = device_for_each_child(&adap->dev, NULL, __unregister_client);
 
 	/* clean up the sysfs representation */
 	init_completion(&adap->dev_released);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 28b27282496..5f8157610c6 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -178,7 +178,6 @@ struct i2c_driver {
  * @driver: device's driver, hence pointer to access routines
  * @dev: Driver model device node for the slave.
  * @irq: indicates the IRQ generated by this device (if any)
- * @list: list of active/busy clients (DEPRECATED)
  * @detected: member of an i2c_driver.clients list
  *
  * An i2c_client identifies a single device (i.e. chip) connected to an
@@ -195,7 +194,6 @@ struct i2c_client {
 	struct i2c_driver *driver;	/* and our access routines	*/
 	struct device dev;		/* the device structure		*/
 	int irq;			/* irq issued by device		*/
-	struct list_head list;		/* DEPRECATED */
 	struct list_head detected;
 };
 #define to_i2c_client(d) container_of(d, struct i2c_client, dev)
@@ -339,14 +337,12 @@ struct i2c_adapter {
 	/* data fields that are valid for all devices	*/
 	u8 level; 			/* nesting level for lockdep */
 	struct mutex bus_lock;
-	struct mutex clist_lock;
 
 	int timeout;			/* in jiffies */
 	int retries;
 	struct device dev;		/* the adapter device */
 
 	int nr;
-	struct list_head clients;	/* DEPRECATED */
 	char name[48];
 	struct completion dev_released;
 };
-- 
cgit v1.2.3-70-g09d2


From 99cd8e25875a109455b709b5a41d4891b8d8e58e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:20 +0200
Subject: i2c: Add a sysfs interface to instantiate devices

Add a sysfs interface to instantiate and delete I2C devices. This is
primarily a replacement of the force_* module parameters implemented
by some i2c drivers. These module parameters were implemented
internally by the I2C_CLIENT_INSMOD* macros, which don't scale well.

This can also be used when developing a driver on a self-soldered
board which doesn't yet have proper I2C device declaration at the
platform level, and presumably for various debugging situations.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <dbrownell@users.sourceforge.net>
---
 Documentation/i2c/instantiating-devices |  44 ++++++++++++
 drivers/i2c/i2c-core.c                  | 123 +++++++++++++++++++++++++++++++-
 include/linux/i2c.h                     |   3 +-
 3 files changed, 168 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
index b55ce57a84d..c740b7b4108 100644
--- a/Documentation/i2c/instantiating-devices
+++ b/Documentation/i2c/instantiating-devices
@@ -165,3 +165,47 @@ was done there. Two significant differences are:
 Once again, method 3 should be avoided wherever possible. Explicit device
 instantiation (methods 1 and 2) is much preferred for it is safer and
 faster.
+
+
+Method 4: Instantiate from user-space
+-------------------------------------
+
+In general, the kernel should know which I2C devices are connected and
+what addresses they live at. However, in certain cases, it does not, so a
+sysfs interface was added to let the user provide the information. This
+interface is made of 2 attribute files which are created in every I2C bus
+directory: new_device and delete_device. Both files are write only and you
+must write the right parameters to them in order to properly instantiate,
+respectively delete, an I2C device.
+
+File new_device takes 2 parameters: the name of the I2C device (a string)
+and the address of the I2C device (a number, typically expressed in
+hexadecimal starting with 0x, but can also be expressed in decimal.)
+
+File delete_device takes a single parameter: the address of the I2C
+device. As no two devices can live at the same address on a given I2C
+segment, the address is sufficient to uniquely identify the device to be
+deleted.
+
+Example:
+# echo eeprom 0x50 > /sys/class/i2c-adapter/i2c-3/new_device
+
+While this interface should only be used when in-kernel device declaration
+can't be done, there is a variety of cases where it can be helpful:
+* The I2C driver usually detects devices (method 3 above) but the bus
+  segment your device lives on doesn't have the proper class bit set and
+  thus detection doesn't trigger.
+* The I2C driver usually detects devices, but your device lives at an
+  unexpected address.
+* The I2C driver usually detects devices, but your device is not detected,
+  either because the detection routine is too strict, or because your
+  device is not officially supported yet but you know it is compatible.
+* You are developing a driver on a test board, where you soldered the I2C
+  device yourself.
+
+This interface is a replacement for the force_* module parameters some I2C
+drivers implement. Being implemented in i2c-core rather than in each
+device driver individually, it is much more efficient, and also has the
+advantage that you do not have to reload the driver to change a setting.
+You can also instantiate the device before the driver is loaded or even
+available, and you don't need to know what driver the device needs.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index a2f1cd3766f..eb084fa0df8 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -38,11 +38,12 @@
 #include "i2c-core.h"
 
 
-/* core_lock protects i2c_adapter_idr, and guarantees
+/* core_lock protects i2c_adapter_idr, userspace_devices, and guarantees
    that device detection, deletion of detected devices, and attach_adapter
    and detach_adapter calls are serialized */
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
+static LIST_HEAD(userspace_devices);
 
 static int i2c_check_addr(struct i2c_adapter *adapter, int addr);
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
@@ -373,8 +374,128 @@ show_adapter_name(struct device *dev, struct device_attribute *attr, char *buf)
 	return sprintf(buf, "%s\n", adap->name);
 }
 
+/*
+ * Let users instantiate I2C devices through sysfs. This can be used when
+ * platform initialization code doesn't contain the proper data for
+ * whatever reason. Also useful for drivers that do device detection and
+ * detection fails, either because the device uses an unexpected address,
+ * or this is a compatible device with different ID register values.
+ *
+ * Parameter checking may look overzealous, but we really don't want
+ * the user to provide incorrect parameters.
+ */
+static ssize_t
+i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct i2c_adapter *adap = to_i2c_adapter(dev);
+	struct i2c_board_info info;
+	struct i2c_client *client;
+	char *blank, end;
+	int res;
+
+	dev_warn(dev, "The new_device interface is still experimental "
+		 "and may change in a near future\n");
+	memset(&info, 0, sizeof(struct i2c_board_info));
+
+	blank = strchr(buf, ' ');
+	if (!blank) {
+		dev_err(dev, "%s: Missing parameters\n", "new_device");
+		return -EINVAL;
+	}
+	if (blank - buf > I2C_NAME_SIZE - 1) {
+		dev_err(dev, "%s: Invalid device name\n", "new_device");
+		return -EINVAL;
+	}
+	memcpy(info.type, buf, blank - buf);
+
+	/* Parse remaining parameters, reject extra parameters */
+	res = sscanf(++blank, "%hi%c", &info.addr, &end);
+	if (res < 1) {
+		dev_err(dev, "%s: Can't parse I2C address\n", "new_device");
+		return -EINVAL;
+	}
+	if (res > 1  && end != '\n') {
+		dev_err(dev, "%s: Extra parameters\n", "new_device");
+		return -EINVAL;
+	}
+
+	if (info.addr < 0x03 || info.addr > 0x77) {
+		dev_err(dev, "%s: Invalid I2C address 0x%hx\n", "new_device",
+			info.addr);
+		return -EINVAL;
+	}
+
+	client = i2c_new_device(adap, &info);
+	if (!client)
+		return -EEXIST;
+
+	/* Keep track of the added device */
+	mutex_lock(&core_lock);
+	list_add_tail(&client->detected, &userspace_devices);
+	mutex_unlock(&core_lock);
+	dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
+		 info.type, info.addr);
+
+	return count;
+}
+
+/*
+ * And of course let the users delete the devices they instantiated, if
+ * they got it wrong. This interface can only be used to delete devices
+ * instantiated by i2c_sysfs_new_device above. This guarantees that we
+ * don't delete devices to which some kernel code still has references.
+ *
+ * Parameter checking may look overzealous, but we really don't want
+ * the user to delete the wrong device.
+ */
+static ssize_t
+i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct i2c_adapter *adap = to_i2c_adapter(dev);
+	struct i2c_client *client, *next;
+	unsigned short addr;
+	char end;
+	int res;
+
+	/* Parse parameters, reject extra parameters */
+	res = sscanf(buf, "%hi%c", &addr, &end);
+	if (res < 1) {
+		dev_err(dev, "%s: Can't parse I2C address\n", "delete_device");
+		return -EINVAL;
+	}
+	if (res > 1  && end != '\n') {
+		dev_err(dev, "%s: Extra parameters\n", "delete_device");
+		return -EINVAL;
+	}
+
+	/* Make sure the device was added through sysfs */
+	res = -ENOENT;
+	mutex_lock(&core_lock);
+	list_for_each_entry_safe(client, next, &userspace_devices, detected) {
+		if (client->addr == addr && client->adapter == adap) {
+			dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
+				 "delete_device", client->name, client->addr);
+
+			list_del(&client->detected);
+			i2c_unregister_device(client);
+			res = count;
+			break;
+		}
+	}
+	mutex_unlock(&core_lock);
+
+	if (res < 0)
+		dev_err(dev, "%s: Can't find device in list\n",
+			"delete_device");
+	return res;
+}
+
 static struct device_attribute i2c_adapter_attrs[] = {
 	__ATTR(name, S_IRUGO, show_adapter_name, NULL),
+	__ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device),
+	__ATTR(delete_device, S_IWUSR, NULL, i2c_sysfs_delete_device),
 	{ },
 };
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 5f8157610c6..844d2662a87 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -178,7 +178,8 @@ struct i2c_driver {
  * @driver: device's driver, hence pointer to access routines
  * @dev: Driver model device node for the slave.
  * @irq: indicates the IRQ generated by this device (if any)
- * @detected: member of an i2c_driver.clients list
+ * @detected: member of an i2c_driver.clients list or i2c-core's
+ *	userspace_devices list
  *
  * An i2c_client identifies a single device (i.e. chip) connected to an
  * i2c bus. The behaviour exposed to Linux is defined by the driver
-- 
cgit v1.2.3-70-g09d2


From 23af8400571ac4431fac0caefeac18f8aef490df Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 19 Jun 2009 16:58:20 +0200
Subject: i2c: Don't advertise i2c functions when not available

Surround i2c function declarations with ifdefs, so that they aren't
advertised when i2c-core isn't actually built. That way, drivers using
these functions unconditionally will result in an immediate build
failure, rather than a late linking failure which is harder to figure
out.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/i2c.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 844d2662a87..bc17f7fa7d3 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -47,6 +47,7 @@ struct i2c_driver;
 union i2c_smbus_data;
 struct i2c_board_info;
 
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
 /*
  * The master routines are the ones normally used to transmit data to devices
  * on a bus (or read from them). Apart from two basic transfer functions to
@@ -93,6 +94,7 @@ extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client,
 extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
 					  u8 command, u8 length,
 					  const u8 *values);
+#endif /* I2C */
 
 /**
  * struct i2c_driver - represent an I2C device driver
@@ -260,6 +262,7 @@ struct i2c_board_info {
 	.type = dev_type, .addr = (dev_addr)
 
 
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
 /* Add-on boards should register/unregister their devices; e.g. a board
  * with integrated I2C, a config eeprom, sensors, and a codec that's
  * used in conjunction with the primary hardware.
@@ -283,6 +286,7 @@ extern struct i2c_client *
 i2c_new_dummy(struct i2c_adapter *adap, u16 address);
 
 extern void i2c_unregister_device(struct i2c_client *);
+#endif /* I2C */
 
 /* Mainboard arch_initcall() code should register all its I2C devices.
  * This is done at arch_initcall time, before declaring any i2c adapters.
@@ -299,7 +303,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
 {
 	return 0;
 }
-#endif
+#endif /* I2C_BOARDINFO */
 
 /*
  * The following structs are for those who like to implement new bus drivers:
@@ -394,6 +398,7 @@ struct i2c_client_address_data {
 
 /* administration...
  */
+#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
 extern int i2c_add_adapter(struct i2c_adapter *);
 extern int i2c_del_adapter(struct i2c_adapter *);
 extern int i2c_add_numbered_adapter(struct i2c_adapter *);
@@ -435,6 +440,7 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap)
 {
 	return adap->nr;
 }
+#endif /* I2C */
 #endif /* __KERNEL__ */
 
 /**
-- 
cgit v1.2.3-70-g09d2


From c70366732f67dbdb32f7fe9c6aa59299b76feca6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Jun 2009 16:58:21 +0200
Subject: i2c: New macro to initialize i2c address lists on the fly

For video4linux we sometimes need to probe for a single i2c address.
Normally you would do it like this:

static const unsigned short addrs[] = {
	addr, I2C_CLIENT_END
};

client = i2c_new_probed_device(adapter, &info, addrs);

This is a bit awkward and I came up with this macro:

#define V4L2_I2C_ADDRS(addr, addrs...) \
	((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END })

This can construct a list of one or more i2c addresses on the fly. But
this is something that really belongs in i2c.h, renamed to I2C_ADDRS.

With this macro we can just do:

client = i2c_new_probed_device(adapter, &info, I2C_ADDRS(addr));

Note that this can also be used to initialize an array:

static const unsigned short addrs[] = I2C_ADDRS(0x2a, 0x2c);

Whether you want to is another matter, but it works. This functionality is
also available in the oldest supported gcc (3.2).

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bc17f7fa7d3..f4784c0fe97 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -393,6 +393,10 @@ struct i2c_client_address_data {
 /* The numbers to use to set I2C bus address */
 #define ANY_I2C_BUS		0xffff
 
+/* Construct an I2C_CLIENT_END-terminated array of i2c addresses */
+#define I2C_ADDRS(addr, addrs...) \
+	((const unsigned short []){ addr, ## addrs, I2C_CLIENT_END })
+
 
 /* ----- functions exported by i2c.o */
 
-- 
cgit v1.2.3-70-g09d2


From 06d5caf47ef4fbd9efdceae33293c42778cb7b0c Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Tue, 16 Jun 2009 15:39:51 +0100
Subject: rfkill: don't restore software blocked state on persistent devices

The setting of the "persistent" flag is also made more explicit using
a new rfkill_init_sw_state() function, instead of special-casing
rfkill_set_sw_state() when it is called before registration.

Suspend is a bit of a corner case so we try to get away without adding
another hack to rfkill-input - it's going to be removed soon.
If the state does change over suspend, users will simply have to prod
rfkill-input twice in order to toggle the state.

Userspace policy agents will be able to implement a more consistent user
experience.  For example, they can avoid the above problem if they
toggle devices individually.  Then there would be no "global state"
to get out of sync.

Currently there are only two rfkill drivers with persistent soft-blocked
state.  thinkpad-acpi already checks the software state on resume.
eeepc-laptop will require modification.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
CC: Marcel Holtmann <marcel@holtmann.org>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/platform/x86/eeepc-laptop.c  |  8 ++++----
 drivers/platform/x86/thinkpad_acpi.c | 14 ++++++-------
 include/linux/rfkill.h               | 32 ++++++++++++++++++++++++-----
 net/rfkill/core.c                    | 40 ++++++++++++++++++++++--------------
 4 files changed, 63 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 03bf522bd7a..01682eca436 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -675,8 +675,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
 		if (!ehotk->eeepc_wlan_rfkill)
 			goto wlan_fail;
 
-		rfkill_set_sw_state(ehotk->eeepc_wlan_rfkill,
-				    get_acpi(CM_ASL_WLAN) != 1);
+		rfkill_init_sw_state(ehotk->eeepc_wlan_rfkill,
+				     get_acpi(CM_ASL_WLAN) != 1);
 		result = rfkill_register(ehotk->eeepc_wlan_rfkill);
 		if (result)
 			goto wlan_fail;
@@ -693,8 +693,8 @@ static int eeepc_hotk_add(struct acpi_device *device)
 		if (!ehotk->eeepc_bluetooth_rfkill)
 			goto bluetooth_fail;
 
-		rfkill_set_sw_state(ehotk->eeepc_bluetooth_rfkill,
-				    get_acpi(CM_ASL_BLUETOOTH) != 1);
+		rfkill_init_sw_state(ehotk->eeepc_bluetooth_rfkill,
+				     get_acpi(CM_ASL_BLUETOOTH) != 1);
 		result = rfkill_register(ehotk->eeepc_bluetooth_rfkill);
 		if (result)
 			goto bluetooth_fail;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 86e958539f4..40d64c03278 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1163,8 +1163,8 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id,
 {
 	struct tpacpi_rfk *atp_rfk;
 	int res;
-	bool initial_sw_state = false;
-	int initial_sw_status;
+	bool sw_state = false;
+	int sw_status;
 
 	BUG_ON(id >= TPACPI_RFK_SW_MAX || tpacpi_rfkill_switches[id]);
 
@@ -1185,17 +1185,17 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id,
 	atp_rfk->id = id;
 	atp_rfk->ops = tp_rfkops;
 
-	initial_sw_status = (tp_rfkops->get_status)();
-	if (initial_sw_status < 0) {
+	sw_status = (tp_rfkops->get_status)();
+	if (sw_status < 0) {
 		printk(TPACPI_ERR
 			"failed to read initial state for %s, error %d\n",
-			name, initial_sw_status);
+			name, sw_status);
 	} else {
-		initial_sw_state = (initial_sw_status == TPACPI_RFK_RADIO_OFF);
+		sw_state = (sw_status == TPACPI_RFK_RADIO_OFF);
 		if (set_default) {
 			/* try to keep the initial state, since we ask the
 			 * firmware to preserve it across S5 in NVRAM */
-			rfkill_set_sw_state(atp_rfk->rfkill, initial_sw_state);
+			rfkill_init_sw_state(atp_rfk->rfkill, sw_state);
 		}
 	}
 	rfkill_set_hw_state(atp_rfk->rfkill, tpacpi_rfk_check_hwblock_state());
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 16e39c7a67f..dcac724340d 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -160,8 +160,9 @@ struct rfkill * __must_check rfkill_alloc(const char *name,
  * the rfkill structure. Before calling this function the driver needs
  * to be ready to service method calls from rfkill.
  *
- * If the software blocked state is not set before registration,
- * set_block will be called to initialize it to a default value.
+ * If rfkill_init_sw_state() is not called before registration,
+ * set_block() will be called to initialize the software blocked state
+ * to a default value.
  *
  * If the hardware blocked state is not set before registration,
  * it is assumed to be unblocked.
@@ -234,9 +235,11 @@ bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
  * rfkill drivers that get events when the soft-blocked state changes
  * (yes, some platforms directly act on input but allow changing again)
  * use this function to notify the rfkill core (and through that also
- * userspace) of the current state.  It is not necessary to notify on
- * resume; since hibernation can always change the soft-blocked state,
- * the rfkill core will unconditionally restore the previous state.
+ * userspace) of the current state.
+ *
+ * Drivers should also call this function after resume if the state has
+ * been changed by the user.  This only makes sense for "persistent"
+ * devices (see rfkill_init_sw_state()).
  *
  * This function can be called in any context, even from within rfkill
  * callbacks.
@@ -246,6 +249,21 @@ bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
  */
 bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
 
+/**
+ * rfkill_init_sw_state - Initialize persistent software block state
+ * @rfkill: pointer to the rfkill class to modify.
+ * @state: the current software block state to set
+ *
+ * rfkill drivers that preserve their software block state over power off
+ * use this function to notify the rfkill core (and through that also
+ * userspace) of their initial state.  It should only be used before
+ * registration.
+ *
+ * In addition, it marks the device as "persistent".  Persistent devices
+ * are expected to preserve preserve their own state when suspended.
+ */
+void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked);
+
 /**
  * rfkill_set_states - Set the internal rfkill block states
  * @rfkill: pointer to the rfkill class to modify.
@@ -307,6 +325,10 @@ static inline bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
 	return blocked;
 }
 
+static inline void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked)
+{
+}
+
 static inline void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
 {
 }
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 868d79f8ac1..dcf8df7c573 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -56,7 +56,6 @@ struct rfkill {
 	u32			idx;
 
 	bool			registered;
-	bool			suspended;
 	bool			persistent;
 
 	const struct rfkill_ops	*ops;
@@ -224,7 +223,7 @@ static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op)
 
 static void rfkill_event(struct rfkill *rfkill)
 {
-	if (!rfkill->registered || rfkill->suspended)
+	if (!rfkill->registered)
 		return;
 
 	kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE);
@@ -508,19 +507,32 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked)
 	blocked = blocked || hwblock;
 	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	if (!rfkill->registered) {
-		rfkill->persistent = true;
-	} else {
-		if (prev != blocked && !hwblock)
-			schedule_work(&rfkill->uevent_work);
+	if (!rfkill->registered)
+		return blocked;
 
-		rfkill_led_trigger_event(rfkill);
-	}
+	if (prev != blocked && !hwblock)
+		schedule_work(&rfkill->uevent_work);
+
+	rfkill_led_trigger_event(rfkill);
 
 	return blocked;
 }
 EXPORT_SYMBOL(rfkill_set_sw_state);
 
+void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked)
+{
+	unsigned long flags;
+
+	BUG_ON(!rfkill);
+	BUG_ON(rfkill->registered);
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	__rfkill_set_sw_state(rfkill, blocked);
+	rfkill->persistent = true;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+}
+EXPORT_SYMBOL(rfkill_init_sw_state);
+
 void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
 {
 	unsigned long flags;
@@ -718,8 +730,6 @@ static int rfkill_suspend(struct device *dev, pm_message_t state)
 
 	rfkill_pause_polling(rfkill);
 
-	rfkill->suspended = true;
-
 	return 0;
 }
 
@@ -728,10 +738,10 @@ static int rfkill_resume(struct device *dev)
 	struct rfkill *rfkill = to_rfkill(dev);
 	bool cur;
 
-	cur = !!(rfkill->state & RFKILL_BLOCK_SW);
-	rfkill_set_block(rfkill, cur);
-
-	rfkill->suspended = false;
+	if (!rfkill->persistent) {
+		cur = !!(rfkill->state & RFKILL_BLOCK_SW);
+		rfkill_set_block(rfkill, cur);
+	}
 
 	rfkill_resume_polling(rfkill);
 
-- 
cgit v1.2.3-70-g09d2


From 464902e812025792c9e33e19e1555c343672d5cf Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Tue, 16 Jun 2009 14:54:04 +0100
Subject: rfkill: export persistent attribute in sysfs

This information allows userspace to implement a hybrid policy where
it can store the rfkill soft-blocked state in platform non-volatile
storage if available, and if not then file-based storage can be used.

Some users prefer platform non-volatile storage because of the behaviour
when dual-booting multiple versions of Linux, or if the rfkill setting
is changed in the BIOS setting screens, or if the BIOS responds to
wireless-toggle hotkeys itself before the relevant platform driver has
been loaded.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/rfkill.txt |  2 ++
 include/linux/rfkill.h   |  5 +++--
 net/rfkill/core.c        | 10 ++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index c8acd8659e9..b4860509c31 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -111,6 +111,8 @@ following attributes:
 
 	name: Name assigned by driver to this key (interface or driver name).
 	type: Driver type string ("wlan", "bluetooth", etc).
+	persistent: Whether the soft blocked state is initialised from
+	            non-volatile storage at startup.
 	state: Current state of the transmitter
 		0: RFKILL_STATE_SOFT_BLOCKED
 			transmitter is turned off by software
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index dcac724340d..e73e2429a1b 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -259,8 +259,9 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
  * userspace) of their initial state.  It should only be used before
  * registration.
  *
- * In addition, it marks the device as "persistent".  Persistent devices
- * are expected to preserve preserve their own state when suspended.
+ * In addition, it marks the device as "persistent", an attribute which
+ * can be read by userspace.  Persistent devices are expected to preserve
+ * their own state when suspended.
  */
 void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked);
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index dcf8df7c573..79693fe2001 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -610,6 +610,15 @@ static ssize_t rfkill_idx_show(struct device *dev,
 	return sprintf(buf, "%d\n", rfkill->idx);
 }
 
+static ssize_t rfkill_persistent_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+
+	return sprintf(buf, "%d\n", rfkill->persistent);
+}
+
 static u8 user_state_from_blocked(unsigned long state)
 {
 	if (state & RFKILL_BLOCK_HW)
@@ -668,6 +677,7 @@ static struct device_attribute rfkill_dev_attrs[] = {
 	__ATTR(name, S_IRUGO, rfkill_name_show, NULL),
 	__ATTR(type, S_IRUGO, rfkill_type_show, NULL),
 	__ATTR(index, S_IRUGO, rfkill_idx_show, NULL),
+	__ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
 	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
 	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
 	__ATTR_NULL
-- 
cgit v1.2.3-70-g09d2


From 1e9c28599879040039f610c5b177e61ef65ff100 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 18 Jun 2009 16:48:58 -0700
Subject: gpio: driver for PrimeCell PL061 GPIO controller

This is a driver for the ARM PrimeCell PL061 GPIO AMBA peripheral.  The
driver is implemented using the gpiolib framework.

This driver also includes support for the use of the PL061 as an interrupt
controller (secondary).

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Cc: David Brownell <david-b@pacbell.net>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig       |   6 +
 drivers/gpio/Makefile      |   1 +
 drivers/gpio/pl061.c       | 341 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/amba/pl061.h |  15 ++
 4 files changed, 363 insertions(+)
 create mode 100644 drivers/gpio/pl061.c
 create mode 100644 include/linux/amba/pl061.h

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 11f373971fa..3582c39f972 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -67,6 +67,12 @@ config GPIO_SYSFS
 
 comment "Memory mapped GPIO expanders:"
 
+config GPIO_PL061
+	bool "PrimeCell PL061 GPIO support"
+	depends on ARM_AMBA
+	help
+	  Say yes here to support the PrimeCell PL061 GPIO device
+
 config GPIO_XILINX
 	bool "Xilinx GPIO support"
 	depends on PPC_OF || MICROBLAZE
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 49ac64e515e..ef90203e8f3 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
+obj-$(CONFIG_GPIO_PL061)	+= pl061.o
 obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c
new file mode 100644
index 00000000000..aa8e7cb020d
--- /dev/null
+++ b/drivers/gpio/pl061.c
@@ -0,0 +1,341 @@
+/*
+ *  linux/drivers/gpio/pl061.c
+ *
+ *  Copyright (C) 2008, 2009 Provigent Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Driver for the ARM PrimeCell(tm) General Purpose Input/Output (PL061)
+ *
+ * Data sheet: ARM DDI 0190B, September 2000
+ */
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/bitops.h>
+#include <linux/workqueue.h>
+#include <linux/gpio.h>
+#include <linux/device.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/pl061.h>
+
+#define GPIODIR 0x400
+#define GPIOIS  0x404
+#define GPIOIBE 0x408
+#define GPIOIEV 0x40C
+#define GPIOIE  0x410
+#define GPIORIS 0x414
+#define GPIOMIS 0x418
+#define GPIOIC  0x41C
+
+#define PL061_GPIO_NR	8
+
+struct pl061_gpio {
+	/* We use a list of pl061_gpio structs for each trigger IRQ in the main
+	 * interrupts controller of the system. We need this to support systems
+	 * in which more that one PL061s are connected to the same IRQ. The ISR
+	 * interates through this list to find the source of the interrupt.
+	 */
+	struct list_head	list;
+
+	/* Each of the two spinlocks protects a different set of hardware
+	 * regiters and data structurs. This decouples the code of the IRQ from
+	 * the GPIO code. This also makes the case of a GPIO routine call from
+	 * the IRQ code simpler.
+	 */
+	spinlock_t		lock;		/* GPIO registers */
+	spinlock_t		irq_lock;	/* IRQ registers */
+
+	void __iomem		*base;
+	unsigned		irq_base;
+	struct gpio_chip	gc;
+};
+
+static int pl061_direction_input(struct gpio_chip *gc, unsigned offset)
+{
+	struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc);
+	unsigned long flags;
+	unsigned char gpiodir;
+
+	if (offset >= gc->ngpio)
+		return -EINVAL;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	gpiodir = readb(chip->base + GPIODIR);
+	gpiodir &= ~(1 << offset);
+	writeb(gpiodir, chip->base + GPIODIR);
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return 0;
+}
+
+static int pl061_direction_output(struct gpio_chip *gc, unsigned offset,
+		int value)
+{
+	struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc);
+	unsigned long flags;
+	unsigned char gpiodir;
+
+	if (offset >= gc->ngpio)
+		return -EINVAL;
+
+	spin_lock_irqsave(&chip->lock, flags);
+	writeb(!!value << offset, chip->base + (1 << (offset + 2)));
+	gpiodir = readb(chip->base + GPIODIR);
+	gpiodir |= 1 << offset;
+	writeb(gpiodir, chip->base + GPIODIR);
+	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return 0;
+}
+
+static int pl061_get_value(struct gpio_chip *gc, unsigned offset)
+{
+	struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc);
+
+	return !!readb(chip->base + (1 << (offset + 2)));
+}
+
+static void pl061_set_value(struct gpio_chip *gc, unsigned offset, int value)
+{
+	struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc);
+
+	writeb(!!value << offset, chip->base + (1 << (offset + 2)));
+}
+
+/*
+ * PL061 GPIO IRQ
+ */
+static void pl061_irq_disable(unsigned irq)
+{
+	struct pl061_gpio *chip = get_irq_chip_data(irq);
+	int offset = irq - chip->irq_base;
+	unsigned long flags;
+	u8 gpioie;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	gpioie = readb(chip->base + GPIOIE);
+	gpioie &= ~(1 << offset);
+	writeb(gpioie, chip->base + GPIOIE);
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+
+static void pl061_irq_enable(unsigned irq)
+{
+	struct pl061_gpio *chip = get_irq_chip_data(irq);
+	int offset = irq - chip->irq_base;
+	unsigned long flags;
+	u8 gpioie;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	gpioie = readb(chip->base + GPIOIE);
+	gpioie |= 1 << offset;
+	writeb(gpioie, chip->base + GPIOIE);
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+
+static int pl061_irq_type(unsigned irq, unsigned trigger)
+{
+	struct pl061_gpio *chip = get_irq_chip_data(irq);
+	int offset = irq - chip->irq_base;
+	unsigned long flags;
+	u8 gpiois, gpioibe, gpioiev;
+
+	if (offset < 0 || offset > PL061_GPIO_NR)
+		return -EINVAL;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+
+	gpioiev = readb(chip->base + GPIOIEV);
+
+	gpiois = readb(chip->base + GPIOIS);
+	if (trigger & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) {
+		gpiois |= 1 << offset;
+		if (trigger & IRQ_TYPE_LEVEL_HIGH)
+			gpioiev |= 1 << offset;
+		else
+			gpioiev &= ~(1 << offset);
+	} else
+		gpiois &= ~(1 << offset);
+	writeb(gpiois, chip->base + GPIOIS);
+
+	gpioibe = readb(chip->base + GPIOIBE);
+	if ((trigger & IRQ_TYPE_EDGE_BOTH) == IRQ_TYPE_EDGE_BOTH)
+		gpioibe |= 1 << offset;
+	else {
+		gpioibe &= ~(1 << offset);
+		if (trigger & IRQ_TYPE_EDGE_RISING)
+			gpioiev |= 1 << offset;
+		else
+			gpioiev &= ~(1 << offset);
+	}
+	writeb(gpioibe, chip->base + GPIOIBE);
+
+	writeb(gpioiev, chip->base + GPIOIEV);
+
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+
+	return 0;
+}
+
+static struct irq_chip pl061_irqchip = {
+	.name		= "GPIO",
+	.enable		= pl061_irq_enable,
+	.disable	= pl061_irq_disable,
+	.set_type	= pl061_irq_type,
+};
+
+static void pl061_irq_handler(unsigned irq, struct irq_desc *desc)
+{
+	struct list_head *chip_list = get_irq_chip_data(irq);
+	struct list_head *ptr;
+	struct pl061_gpio *chip;
+
+	desc->chip->ack(irq);
+	list_for_each(ptr, chip_list) {
+		unsigned long pending;
+		int gpio;
+
+		chip = list_entry(ptr, struct pl061_gpio, list);
+		pending = readb(chip->base + GPIOMIS);
+		writeb(pending, chip->base + GPIOIC);
+
+		if (pending == 0)
+			continue;
+
+		for_each_bit(gpio, &pending, PL061_GPIO_NR)
+			generic_handle_irq(gpio_to_irq(gpio));
+	}
+	desc->chip->unmask(irq);
+}
+
+static int __init pl061_probe(struct amba_device *dev, struct amba_id *id)
+{
+	struct pl061_platform_data *pdata;
+	struct pl061_gpio *chip;
+	struct list_head *chip_list;
+	int ret, irq, i;
+	static unsigned long init_irq[BITS_TO_LONGS(NR_IRQS)];
+
+	pdata = dev->dev.platform_data;
+	if (pdata == NULL)
+		return -ENODEV;
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	if (!request_mem_region(dev->res.start,
+				resource_size(&dev->res), "pl061")) {
+		ret = -EBUSY;
+		goto free_mem;
+	}
+
+	chip->base = ioremap(dev->res.start, resource_size(&dev->res));
+	if (chip->base == NULL) {
+		ret = -ENOMEM;
+		goto release_region;
+	}
+
+	spin_lock_init(&chip->lock);
+	spin_lock_init(&chip->irq_lock);
+	INIT_LIST_HEAD(&chip->list);
+
+	chip->gc.direction_input = pl061_direction_input;
+	chip->gc.direction_output = pl061_direction_output;
+	chip->gc.get = pl061_get_value;
+	chip->gc.set = pl061_set_value;
+	chip->gc.base = pdata->gpio_base;
+	chip->gc.ngpio = PL061_GPIO_NR;
+	chip->gc.label = dev_name(&dev->dev);
+	chip->gc.dev = &dev->dev;
+	chip->gc.owner = THIS_MODULE;
+
+	chip->irq_base = pdata->irq_base;
+
+	ret = gpiochip_add(&chip->gc);
+	if (ret)
+		goto iounmap;
+
+	/*
+	 * irq_chip support
+	 */
+
+	if (chip->irq_base == (unsigned) -1)
+		return 0;
+
+	writeb(0, chip->base + GPIOIE); /* disable irqs */
+	irq = dev->irq[0];
+	if (irq < 0) {
+		ret = -ENODEV;
+		goto iounmap;
+	}
+	set_irq_chained_handler(irq, pl061_irq_handler);
+	if (!test_and_set_bit(irq, init_irq)) { /* list initialized? */
+		chip_list = kmalloc(sizeof(*chip_list), GFP_KERNEL);
+		if (chip_list == NULL) {
+			ret = -ENOMEM;
+			goto iounmap;
+		}
+		INIT_LIST_HEAD(chip_list);
+		set_irq_chip_data(irq, chip_list);
+	} else
+		chip_list = get_irq_chip_data(irq);
+	list_add(&chip->list, chip_list);
+
+	for (i = 0; i < PL061_GPIO_NR; i++) {
+		if (pdata->directions & (1 << i))
+			pl061_direction_output(&chip->gc, i,
+					pdata->values & (1 << i));
+		else
+			pl061_direction_input(&chip->gc, i);
+
+		set_irq_chip(i+chip->irq_base, &pl061_irqchip);
+		set_irq_handler(i+chip->irq_base, handle_simple_irq);
+		set_irq_flags(i+chip->irq_base, IRQF_VALID);
+		set_irq_chip_data(i+chip->irq_base, chip);
+	}
+
+	return 0;
+
+iounmap:
+	iounmap(chip->base);
+release_region:
+	release_mem_region(dev->res.start, resource_size(&dev->res));
+free_mem:
+	kfree(chip);
+
+	return ret;
+}
+
+static struct amba_id pl061_ids[] __initdata = {
+	{
+		.id	= 0x00041061,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl061_gpio_driver = {
+	.drv = {
+		.name	= "pl061_gpio",
+	},
+	.id_table	= pl061_ids,
+	.probe		= pl061_probe,
+};
+
+static int __init pl061_gpio_init(void)
+{
+	return amba_driver_register(&pl061_gpio_driver);
+}
+subsys_initcall(pl061_gpio_init);
+
+MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>");
+MODULE_DESCRIPTION("PL061 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/amba/pl061.h b/include/linux/amba/pl061.h
new file mode 100644
index 00000000000..b4fbd986260
--- /dev/null
+++ b/include/linux/amba/pl061.h
@@ -0,0 +1,15 @@
+/* platform data for the PL061 GPIO driver */
+
+struct pl061_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	/* number of the first IRQ.
+	 * If the IRQ functionality in not desired this must be set to
+	 * (unsigned) -1.
+	 */
+	unsigned	irq_base;
+
+	u8		directions;	/* startup directions, 1: out, 0: in */
+	u8		values;		/* startup values */
+};
-- 
cgit v1.2.3-70-g09d2


From d68412b6d1e8e5ed42d568330e598bd81914ccbd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 18 Jun 2009 16:49:18 -0700
Subject: dma-mapping: mark DMA_nBITS_MASK as deprecated

Mark them deprecated so that out-of-tree developers get notified about
this before their modules break when these macros are removed.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Yang Hongyang <yanghy@cn.fujitsu.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8083b6a36a3..38f5608d246 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -63,24 +63,26 @@ struct dma_map_ops {
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
 
+typedef u64 DMA_nnBIT_MASK __deprecated;
+
 /*
  * NOTE: do not use the below macros in new code and do not add new definitions
  * here.
  *
  * Instead, just open-code DMA_BIT_MASK(n) within your driver
  */
-#define DMA_64BIT_MASK	DMA_BIT_MASK(64)
-#define DMA_48BIT_MASK	DMA_BIT_MASK(48)
-#define DMA_47BIT_MASK	DMA_BIT_MASK(47)
-#define DMA_40BIT_MASK	DMA_BIT_MASK(40)
-#define DMA_39BIT_MASK	DMA_BIT_MASK(39)
-#define DMA_35BIT_MASK	DMA_BIT_MASK(35)
-#define DMA_32BIT_MASK	DMA_BIT_MASK(32)
-#define DMA_31BIT_MASK	DMA_BIT_MASK(31)
-#define DMA_30BIT_MASK	DMA_BIT_MASK(30)
-#define DMA_29BIT_MASK	DMA_BIT_MASK(29)
-#define DMA_28BIT_MASK	DMA_BIT_MASK(28)
-#define DMA_24BIT_MASK	DMA_BIT_MASK(24)
+#define DMA_64BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(64)
+#define DMA_48BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(48)
+#define DMA_47BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(47)
+#define DMA_40BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(40)
+#define DMA_39BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(39)
+#define DMA_35BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(35)
+#define DMA_32BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(32)
+#define DMA_31BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(31)
+#define DMA_30BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(30)
+#define DMA_29BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(29)
+#define DMA_28BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(28)
+#define DMA_24BIT_MASK	(DMA_nnBIT_MASK)DMA_BIT_MASK(24)
 
 #define DMA_MASK_NONE	0x0ULL
 
-- 
cgit v1.2.3-70-g09d2


From dbe6f1869188b6e04e38aa861dd198befb08bcd7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 18 Jun 2009 16:49:19 -0700
Subject: dma-mapping: mark dma_sync_single and dma_sync_sg as deprecated

dma_sync_single() and dma_sync_sg() have been described as "Backwards
compat, remove in 2.7.x" for a long time (since 2.6.5).

This marks dma_sync_single() and dma_sync_sg() as deprecated so the users
get notified before removing them.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 38f5608d246..07dfd460d28 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -109,9 +109,20 @@ static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size)
 #include <asm-generic/dma-mapping-broken.h>
 #endif
 
-/* Backwards compat, remove in 2.7.x */
-#define dma_sync_single		dma_sync_single_for_cpu
-#define dma_sync_sg		dma_sync_sg_for_cpu
+/* for backwards compatibility, removed soon */
+static inline void __deprecated dma_sync_single(struct device *dev,
+						dma_addr_t addr, size_t size,
+						enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static inline void __deprecated dma_sync_sg(struct device *dev,
+					    struct scatterlist *sg, int nelems,
+					    enum dma_data_direction dir)
+{
+	dma_sync_sg_for_cpu(dev, sg, nelems, dir);
+}
 
 static inline u64 dma_get_mask(struct device *dev)
 {
-- 
cgit v1.2.3-70-g09d2


From 7b768f07dce463a054c9dd84862d15ccc3d2b712 Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Fri, 19 Jun 2009 17:14:59 -0700
Subject: ACPI: pdc init related memory leak with physical CPU hotplug

arch_acpi_processor_cleanup_pdc() in x86 and ia64 results in memory allocated
for _PDC objects that is never freed and will cause memory leak in case of
physical CPU remove and add. Patch fixes the memory leak by freeing the
objects soon after _PDC is evaluated.

Reported-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/kernel/acpi-processor.c | 12 ++++++++++++
 arch/x86/kernel/acpi/processor.c  | 13 +++++++++++++
 drivers/acpi/processor_core.c     |  2 ++
 include/acpi/processor.h          |  1 +
 4 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
index cbe6cee5a55..dbda7bde611 100644
--- a/arch/ia64/kernel/acpi-processor.c
+++ b/arch/ia64/kernel/acpi-processor.c
@@ -71,3 +71,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 }
 
 EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
+
+void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
+{
+	if (pr->pdc) {
+		kfree(pr->pdc->pointer->buffer.pointer);
+		kfree(pr->pdc->pointer);
+		kfree(pr->pdc);
+		pr->pdc = NULL;
+	}
+}
+
+EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index 7c074eec39f..d296f4a195c 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -72,6 +72,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
 	return;
 }
 
+
 /* Initialize _PDC data based on the CPU vendor */
 void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 {
@@ -85,3 +86,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 }
 
 EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
+
+void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
+{
+	if (pr->pdc) {
+		kfree(pr->pdc->pointer->buffer.pointer);
+		kfree(pr->pdc->pointer);
+		kfree(pr->pdc);
+		pr->pdc = NULL;
+	}
+}
+
+EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 23f0fb84f1c..d40d45e904a 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -731,6 +731,8 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 	/* _PDC call should be done before doing anything else (if reqd.). */
 	arch_acpi_processor_init_pdc(pr);
 	acpi_processor_set_pdc(pr);
+	arch_acpi_processor_cleanup_pdc(pr);
+
 #ifdef CONFIG_CPU_FREQ
 	acpi_processor_ppc_has_changed(pr);
 #endif
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 4927c063347..baf1e0a9a7e 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -258,6 +258,7 @@ DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
 void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
+void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
 
 #ifdef ARCH_HAS_POWER_INIT
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
-- 
cgit v1.2.3-70-g09d2


From ec20a022aa24fc63d3ab59584cb1e5aa9a21d46c Mon Sep 17 00:00:00 2001
From: Tero Saarni <tero.saarni@gmail.com>
Date: Wed, 10 Jun 2009 23:27:24 -0700
Subject: Input: synaptics - add support for reporting x/y resolution

Synaptics uses anisotropic coordinate system.  On some wide touchpads
vertical resolution can be twice as high as horizontal which causes
unequal sensitivity on x/y directions.  Add support for reading the
resolution with EVIOCGABS ioctl.

Signed-off-by: Tero Saarni <tero.saarni@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c           | 12 +++++++++---
 drivers/input/mouse/synaptics.c | 28 ++++++++++++++++++++++++++++
 drivers/input/mouse/synaptics.h |  2 ++
 include/linux/input.h           |  2 ++
 4 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c238116400b..114efd8dc8f 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -626,8 +626,11 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 				abs.maximum = dev->absmax[t];
 				abs.fuzz = dev->absfuzz[t];
 				abs.flat = dev->absflat[t];
+				abs.resolution = dev->absres[t];
 
-				if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+				if (copy_to_user(p, &abs, min_t(size_t,
+								_IOC_SIZE(cmd),
+								sizeof(struct input_absinfo))))
 					return -EFAULT;
 
 				return 0;
@@ -654,8 +657,9 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 
 				t = _IOC_NR(cmd) & ABS_MAX;
 
-				if (copy_from_user(&abs, p,
-						sizeof(struct input_absinfo)))
+				if (copy_from_user(&abs, p, min_t(size_t,
+								  _IOC_SIZE(cmd),
+								  sizeof(struct input_absinfo))))
 					return -EFAULT;
 
 				/*
@@ -670,6 +674,8 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 				dev->absmax[t] = abs.maximum;
 				dev->absfuzz[t] = abs.fuzz;
 				dev->absflat[t] = abs.flat;
+				dev->absres[t] = _IOC_SIZE(cmd) < sizeof(struct input_absinfo) ?
+							0 : abs.resolution;
 
 				spin_unlock_irq(&dev->event_lock);
 
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index f3e4f7b0240..19984bf06ca 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -180,6 +180,29 @@ static int synaptics_identify(struct psmouse *psmouse)
 	return -1;
 }
 
+/*
+ * Read touchpad resolution
+ * Resolution is left zero if touchpad does not support the query
+ */
+static int synaptics_resolution(struct psmouse *psmouse)
+{
+	struct synaptics_data *priv = psmouse->private;
+	unsigned char res[3];
+
+	if (SYN_ID_MAJOR(priv->identity) < 4)
+		return 0;
+
+	if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, res))
+		return 0;
+
+	if ((res[0] != 0) && (res[1] & 0x80) && (res[2] != 0)) {
+		priv->x_res = res[0]; /* x resolution in units/mm */
+		priv->y_res = res[2]; /* y resolution in units/mm */
+	}
+
+	return 0;
+}
+
 static int synaptics_query_hardware(struct psmouse *psmouse)
 {
 	if (synaptics_identify(psmouse))
@@ -188,6 +211,8 @@ static int synaptics_query_hardware(struct psmouse *psmouse)
 		return -1;
 	if (synaptics_capability(psmouse))
 		return -1;
+	if (synaptics_resolution(psmouse))
+		return -1;
 
 	return 0;
 }
@@ -563,6 +588,9 @@ static void set_input_params(struct input_dev *dev, struct synaptics_data *priv)
 	clear_bit(EV_REL, dev->evbit);
 	clear_bit(REL_X, dev->relbit);
 	clear_bit(REL_Y, dev->relbit);
+
+	dev->absres[ABS_X] = priv->x_res;
+	dev->absres[ABS_Y] = priv->y_res;
 }
 
 static void synaptics_disconnect(struct psmouse *psmouse)
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index 02aa4cf7bc7..30238215175 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -97,6 +97,8 @@ struct synaptics_data {
 	unsigned long int capabilities;		/* Capabilities */
 	unsigned long int ext_cap;		/* Extended Capabilities */
 	unsigned long int identity;		/* Identification */
+	int x_res;				/* X resolution in units/mm */
+	int y_res;				/* Y resolution in units/mm */
 
 	unsigned char pkt_type;			/* packet type - old, new, etc */
 	unsigned char mode;			/* current mode byte */
diff --git a/include/linux/input.h b/include/linux/input.h
index 6fed4f6a9c9..8b3bc3e0d14 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -53,6 +53,7 @@ struct input_absinfo {
 	__s32 maximum;
 	__s32 fuzz;
 	__s32 flat;
+	__s32 resolution;
 };
 
 #define EVIOCGVERSION		_IOR('E', 0x01, int)			/* get driver version */
@@ -1109,6 +1110,7 @@ struct input_dev {
 	int absmin[ABS_MAX + 1];
 	int absfuzz[ABS_MAX + 1];
 	int absflat[ABS_MAX + 1];
+	int absres[ABS_MAX + 1];
 
 	int (*open)(struct input_dev *dev);
 	void (*close)(struct input_dev *dev);
-- 
cgit v1.2.3-70-g09d2


From d355e57d58193b89283b0c8153649f0427b0bdad Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 15 Jun 2009 22:11:08 -0500
Subject: libiscsi: don't run scsi eh if iscsi task is making progress

If we are sending or receiving data for the task successfully do
not run the scsi eh, because we know the task is making progress.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libiscsi.c     | 62 +++++++++++++++++++++++++++++++++++----------
 drivers/scsi/libiscsi_tcp.c |  6 +++--
 include/scsi/libiscsi.h     |  4 +++
 3 files changed, 57 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 59908aead53..b55b7991d5f 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -954,6 +954,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		task = iscsi_itt_to_ctask(conn, hdr->itt);
 		if (!task)
 			return ISCSI_ERR_BAD_ITT;
+		task->last_xfer = jiffies;
 		break;
 	case ISCSI_OP_R2T:
 		/*
@@ -1192,10 +1193,12 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
 	spin_unlock_bh(&conn->session->lock);
 	rc = conn->session->tt->xmit_task(task);
 	spin_lock_bh(&conn->session->lock);
-	__iscsi_put_task(task);
-	if (!rc)
+	if (!rc) {
 		/* done with this task */
+		task->last_xfer = jiffies;
 		conn->task = NULL;
+	}
+	__iscsi_put_task(task);
 	return rc;
 }
 
@@ -1361,6 +1364,9 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 	task->state = ISCSI_TASK_PENDING;
 	task->conn = conn;
 	task->sc = sc;
+	task->have_checked_conn = false;
+	task->last_timeout = jiffies;
+	task->last_xfer = jiffies;
 	INIT_LIST_HEAD(&task->running);
 	return task;
 }
@@ -1716,17 +1722,18 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
 		return 0;
 }
 
-static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
+static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
+	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+	struct iscsi_task *task = NULL;
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
 	struct iscsi_conn *conn;
-	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
 
-	cls_session = starget_to_session(scsi_target(scmd->device));
+	cls_session = starget_to_session(scsi_target(sc->device));
 	session = cls_session->dd_data;
 
-	ISCSI_DBG_SESSION(session, "scsi cmd %p timedout\n", scmd);
+	ISCSI_DBG_SESSION(session, "scsi cmd %p timedout\n", sc);
 
 	spin_lock(&session->lock);
 	if (session->state != ISCSI_STATE_LOGGED_IN) {
@@ -1745,6 +1752,26 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 		goto done;
 	}
 
+	task = (struct iscsi_task *)sc->SCp.ptr;
+	if (!task)
+		goto done;
+	/*
+	 * If we have sent (at least queued to the network layer) a pdu or
+	 * recvd one for the task since the last timeout ask for
+	 * more time. If on the next timeout we have not made progress
+	 * we can check if it is the task or connection when we send the
+	 * nop as a ping.
+	 */
+	if (time_after_eq(task->last_xfer, task->last_timeout)) {
+		ISCSI_DBG_CONN(conn, "Command making progress. Asking "
+			       "scsi-ml for more time to complete. "
+			       "Last data recv at %lu. Last timeout was at "
+			       "%lu\n.", task->last_xfer, task->last_timeout);
+		task->have_checked_conn = false;
+		rc = BLK_EH_RESET_TIMER;
+		goto done;
+	}
+
 	if (!conn->recv_timeout && !conn->ping_timeout)
 		goto done;
 	/*
@@ -1755,20 +1782,29 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
+
+	/* Assumes nop timeout is shorter than scsi cmd timeout */
+	if (task->have_checked_conn)
+		goto done;
+
 	/*
-	 * if we are about to check the transport then give the command
-	 * more time
+	 * Checking the transport already or nop from a cmd timeout still
+	 * running
 	 */
-	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
-			   jiffies)) {
+	if (conn->ping_task) {
+		task->have_checked_conn = true;
 		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
 
-	/* if in the middle of checking the transport then give us more time */
-	if (conn->ping_task)
-		rc = BLK_EH_RESET_TIMER;
+	/* Make sure there is a transport check done */
+	iscsi_send_nopout(conn, NULL);
+	task->have_checked_conn = true;
+	rc = BLK_EH_RESET_TIMER;
+
 done:
+	if (task)
+		task->last_timeout = jiffies;
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
 			  "timer reset" : "nh");
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 2bc07090321..2e0746d7030 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -686,6 +686,7 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 				     "offset=%d, datalen=%d)\n",
 				      tcp_task->data_offset,
 				      tcp_conn->in.datalen);
+			task->last_xfer = jiffies;
 			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
 						   sdb->table.sgl,
 						   sdb->table.nents,
@@ -713,9 +714,10 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 			rc = ISCSI_ERR_BAD_ITT;
 		else if (ahslen)
 			rc = ISCSI_ERR_AHSLEN;
-		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
+		else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
+			task->last_xfer = jiffies;
 			rc = iscsi_tcp_r2t_rsp(conn, task);
-		else
+		} else
 			rc = ISCSI_ERR_PROTO;
 		spin_unlock(&conn->session->lock);
 		break;
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 196525cd402..61afeb59a83 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -125,6 +125,10 @@ struct iscsi_task {
 	struct scsi_cmnd	*sc;		/* associated SCSI cmd*/
 	struct iscsi_conn	*conn;		/* used connection    */
 
+	/* data processing tracking */
+	unsigned long		last_xfer;
+	unsigned long		last_timeout;
+	bool			have_checked_conn;
 	/* state set/tested under session->lock */
 	int			state;
 	atomic_t		refcount;
-- 
cgit v1.2.3-70-g09d2


From 7414705ea4aef9ce438e547f3138a680d2d1096c Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Wed, 10 Jun 2009 15:31:10 -0700
Subject: libfc: Add runtime debugging with debug_logging module parameter

This patch adds the /sys/module/libfc/parameters/debug_logging
file to sysfs as a module parameter. It accepts an integer
bitmask for logging. Currently it supports:

   bit
LSB 0 = general libfc debugging
    1 = lport debugging
    2 = disc debugging
    3 = rport debugging
    4 = fcp debugging
    5 = EM debugging
    6 = exch/seq debugging
    7 = scsi logging (mostly error handling)

the other bits are not used at this time.

The patch converts all of the libfc source files to use
these new macros and removes the old FC_DBG macro.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libfc/fc_disc.c  |  83 ++++++++++------------
 drivers/scsi/libfc/fc_exch.c  |  58 +++++++---------
 drivers/scsi/libfc/fc_fcp.c   |  97 +++++++++++++-------------
 drivers/scsi/libfc/fc_lport.c | 156 ++++++++++++++++++++----------------------
 drivers/scsi/libfc/fc_rport.c | 120 ++++++++++++++------------------
 include/scsi/fc_encode.h      |   2 -
 include/scsi/libfc.h          |  77 ++++++++++++++++++---
 7 files changed, 301 insertions(+), 292 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 4c880656990..6fabf66972b 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -45,14 +45,6 @@
 
 #define	FC_DISC_DELAY		3
 
-static int fc_disc_debug;
-
-#define FC_DEBUG_DISC(fmt...)			\
-	do {					\
-		if (fc_disc_debug)		\
-			FC_DBG(fmt);		\
-	} while (0)
-
 static void fc_disc_gpn_ft_req(struct fc_disc *);
 static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
 static int fc_disc_new_target(struct fc_disc *, struct fc_rport *,
@@ -137,8 +129,8 @@ static void fc_disc_rport_callback(struct fc_lport *lport,
 	struct fc_rport_libfc_priv *rdata = rport->dd_data;
 	struct fc_disc *disc = &lport->disc;
 
-	FC_DEBUG_DISC("Received a %d event for port (%6x)\n", event,
-		      rport->port_id);
+	FC_DISC_DBG(disc, "Received a %d event for port (%6x)\n", event,
+		    rport->port_id);
 
 	switch (event) {
 	case RPORT_EV_CREATED:
@@ -191,8 +183,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 
 	lport = disc->lport;
 
-	FC_DEBUG_DISC("Received an RSCN event on port (%6x)\n",
-		      fc_host_port_id(lport->host));
+	FC_DISC_DBG(disc, "Received an RSCN event\n");
 
 	/* make sure the frame contains an RSCN message */
 	rp = fc_frame_payload_get(fp, sizeof(*rp));
@@ -225,8 +216,8 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 		 */
 		switch (fmt) {
 		case ELS_ADDR_FMT_PORT:
-			FC_DEBUG_DISC("Port address format for port (%6x)\n",
-				      ntoh24(pp->rscn_fid));
+			FC_DISC_DBG(disc, "Port address format for port "
+				    "(%6x)\n", ntoh24(pp->rscn_fid));
 			dp = kzalloc(sizeof(*dp), GFP_KERNEL);
 			if (!dp) {
 				redisc = 1;
@@ -243,19 +234,19 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 		case ELS_ADDR_FMT_DOM:
 		case ELS_ADDR_FMT_FAB:
 		default:
-			FC_DEBUG_DISC("Address format is (%d)\n", fmt);
+			FC_DISC_DBG(disc, "Address format is (%d)\n", fmt);
 			redisc = 1;
 			break;
 		}
 	}
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
 	if (redisc) {
-		FC_DEBUG_DISC("RSCN received: rediscovering\n");
+		FC_DISC_DBG(disc, "RSCN received: rediscovering\n");
 		fc_disc_restart(disc);
 	} else {
-		FC_DEBUG_DISC("RSCN received: not rediscovering. "
-			      "redisc %d state %d in_prog %d\n",
-			      redisc, lport->state, disc->pending);
+		FC_DISC_DBG(disc, "RSCN received: not rediscovering. "
+			    "redisc %d state %d in_prog %d\n",
+			    redisc, lport->state, disc->pending);
 		list_for_each_entry_safe(dp, next, &disc_ports, peers) {
 			list_del(&dp->peers);
 			rport = lport->tt.rport_lookup(lport, dp->ids.port_id);
@@ -270,7 +261,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 	fc_frame_free(fp);
 	return;
 reject:
-	FC_DEBUG_DISC("Received a bad RSCN frame\n");
+	FC_DISC_DBG(disc, "Received a bad RSCN frame\n");
 	rjt_data.fp = NULL;
 	rjt_data.reason = ELS_RJT_LOGIC;
 	rjt_data.explan = ELS_EXPL_NONE;
@@ -302,7 +293,8 @@ static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 		mutex_unlock(&disc->disc_mutex);
 		break;
 	default:
-		FC_DBG("Received an unsupported request. opcode (%x)\n", op);
+		FC_DISC_DBG(disc, "Received an unsupported request, "
+			    "the opcode is (%x)\n", op);
 		break;
 	}
 }
@@ -320,12 +312,10 @@ static void fc_disc_restart(struct fc_disc *disc)
 	struct fc_rport_libfc_priv *rdata, *next;
 	struct fc_lport *lport = disc->lport;
 
-	FC_DEBUG_DISC("Restarting discovery for port (%6x)\n",
-		      fc_host_port_id(lport->host));
+	FC_DISC_DBG(disc, "Restarting discovery\n");
 
 	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
 		rport = PRIV_TO_RPORT(rdata);
-		FC_DEBUG_DISC("list_del(%6x)\n", rport->port_id);
 		list_del(&rdata->peers);
 		lport->tt.rport_logoff(rport);
 	}
@@ -485,8 +475,7 @@ static void fc_disc_done(struct fc_disc *disc)
 	struct fc_lport *lport = disc->lport;
 	enum fc_disc_event event;
 
-	FC_DEBUG_DISC("Discovery complete for port (%6x)\n",
-		      fc_host_port_id(lport->host));
+	FC_DISC_DBG(disc, "Discovery complete\n");
 
 	event = disc->event;
 	disc->event = DISC_EV_NONE;
@@ -510,10 +499,10 @@ static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp)
 {
 	struct fc_lport *lport = disc->lport;
 	unsigned long delay = 0;
-	if (fc_disc_debug)
-		FC_DBG("Error %ld, retries %d/%d\n",
-		       PTR_ERR(fp), disc->retry_count,
-		       FC_DISC_RETRY_LIMIT);
+
+	FC_DISC_DBG(disc, "Error %ld, retries %d/%d\n",
+		    PTR_ERR(fp), disc->retry_count,
+		    FC_DISC_RETRY_LIMIT);
 
 	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
 		/*
@@ -649,9 +638,9 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 					      &disc->rogue_rports);
 				lport->tt.rport_login(rport);
 			} else
-				FC_DBG("Failed to allocate memory for "
-				       "the newly discovered port (%6x)\n",
-				       dp.ids.port_id);
+				printk(KERN_WARNING "libfc: Failed to allocate "
+				       "memory for the newly discovered port "
+				       "(%6x)\n", dp.ids.port_id);
 		}
 
 		if (np->fp_flags & FC_NS_FID_LAST) {
@@ -671,9 +660,8 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	 */
 	if (error == 0 && len > 0 && len < sizeof(*np)) {
 		if (np != &disc->partial_buf) {
-			FC_DEBUG_DISC("Partial buffer remains "
-				      "for discovery by (%6x)\n",
-				      fc_host_port_id(lport->host));
+			FC_DISC_DBG(disc, "Partial buffer remains "
+				    "for discovery\n");
 			memcpy(&disc->partial_buf, np, len);
 		}
 		disc->buf_len = (unsigned char) len;
@@ -721,8 +709,7 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 	int error;
 
 	mutex_lock(&disc->disc_mutex);
-	FC_DEBUG_DISC("Received a GPN_FT response on port (%6x)\n",
-		      fc_host_port_id(disc->lport->host));
+	FC_DISC_DBG(disc, "Received a GPN_FT response\n");
 
 	if (IS_ERR(fp)) {
 		fc_disc_error(disc, fp);
@@ -738,30 +725,30 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 	    disc->seq_count == 0) {
 		cp = fc_frame_payload_get(fp, sizeof(*cp));
 		if (!cp) {
-			FC_DBG("GPN_FT response too short, len %d\n",
-			       fr_len(fp));
+			FC_DISC_DBG(disc, "GPN_FT response too short, len %d\n",
+				    fr_len(fp));
 		} else if (ntohs(cp->ct_cmd) == FC_FS_ACC) {
 
 			/* Accepted, parse the response. */
 			buf = cp + 1;
 			len -= sizeof(*cp);
 		} else if (ntohs(cp->ct_cmd) == FC_FS_RJT) {
-			FC_DBG("GPN_FT rejected reason %x exp %x "
-			       "(check zoning)\n", cp->ct_reason,
-			       cp->ct_explan);
+			FC_DISC_DBG(disc, "GPN_FT rejected reason %x exp %x "
+				    "(check zoning)\n", cp->ct_reason,
+				    cp->ct_explan);
 			disc->event = DISC_EV_FAILED;
 			fc_disc_done(disc);
 		} else {
-			FC_DBG("GPN_FT unexpected response code %x\n",
-			       ntohs(cp->ct_cmd));
+			FC_DISC_DBG(disc, "GPN_FT unexpected response code "
+				    "%x\n", ntohs(cp->ct_cmd));
 		}
 	} else if (fr_sof(fp) == FC_SOF_N3 &&
 		   seq_cnt == disc->seq_count) {
 		buf = fh + 1;
 	} else {
-		FC_DBG("GPN_FT unexpected frame - out of sequence? "
-		       "seq_cnt %x expected %x sof %x eof %x\n",
-		       seq_cnt, disc->seq_count, fr_sof(fp), fr_eof(fp));
+		FC_DISC_DBG(disc, "GPN_FT unexpected frame - out of sequence? "
+			    "seq_cnt %x expected %x sof %x eof %x\n",
+			    seq_cnt, disc->seq_count, fr_sof(fp), fr_eof(fp));
 	}
 	if (buf) {
 		error = fc_disc_gpn_ft_parse(disc, buf, len);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 7af9bceb8aa..2bc22be5f84 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -32,18 +32,7 @@
 #include <scsi/libfc.h>
 #include <scsi/fc_encode.h>
 
-/*
- * fc_exch_debug can be set in debugger or at compile time to get more logs.
- */
-static int fc_exch_debug;
-
-#define FC_DEBUG_EXCH(fmt...)			\
-	do {					\
-		if (fc_exch_debug)		\
-			FC_DBG(fmt);		\
-	} while (0)
-
-static struct kmem_cache *fc_em_cachep;	/* cache for exchanges */
+static struct kmem_cache *fc_em_cachep;        /* cache for exchanges */
 
 /*
  * Structure and function definitions for managing Fibre Channel Exchanges
@@ -333,8 +322,8 @@ static inline void fc_exch_timer_set_locked(struct fc_exch *ep,
 	if (ep->state & (FC_EX_RST_CLEANUP | FC_EX_DONE))
 		return;
 
-	FC_DEBUG_EXCH("Exchange (%4x) timed out, notifying the upper layer\n",
-		      ep->xid);
+	FC_EXCH_DBG(ep, "Exchange timed out, notifying the upper layer\n");
+
 	if (schedule_delayed_work(&ep->timeout_work,
 				  msecs_to_jiffies(timer_msec)))
 		fc_exch_hold(ep);		/* hold for timer */
@@ -545,7 +534,7 @@ struct fc_exch *fc_exch_alloc(struct fc_exch_mgr *mp,
 		/* alloc a new xid */
 		xid = fc_em_alloc_xid(mp, fp);
 		if (!xid) {
-			printk(KERN_ERR "fc_em_alloc_xid() failed\n");
+			printk(KERN_WARNING "libfc: Failed to allocate an exhange\n");
 			goto err;
 		}
 	}
@@ -820,8 +809,8 @@ static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp)
 	struct fc_exch *ep = fc_seq_exch(sp);
 
 	sp = fc_seq_alloc(ep, ep->seq_id++);
-	FC_DEBUG_EXCH("exch %4x f_ctl %6x seq %2x\n",
-		      ep->xid, ep->f_ctl, sp->id);
+	FC_EXCH_DBG(ep, "f_ctl %6x seq %2x\n",
+		    ep->f_ctl, sp->id);
 	return sp;
 }
 /*
@@ -901,7 +890,7 @@ void fc_seq_els_rsp_send(struct fc_seq *sp, enum fc_els_cmd els_cmd,
 		fc_exch_els_rec(sp, els_data->fp);
 		break;
 	default:
-		FC_DBG("Invalid ELS CMD:%x\n", els_cmd);
+		FC_EXCH_DBG(fc_seq_exch(sp), "Invalid ELS CMD:%x\n", els_cmd);
 	}
 }
 EXPORT_SYMBOL(fc_seq_els_rsp_send);
@@ -1134,7 +1123,7 @@ static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp,
 			lp->tt.lport_recv(lp, sp, fp);
 		fc_exch_release(ep);	/* release from lookup */
 	} else {
-		FC_DEBUG_EXCH("exch/seq lookup failed: reject %x\n", reject);
+		FC_EM_DBG(mp, "exch/seq lookup failed: reject %x\n", reject);
 		fc_frame_free(fp);
 	}
 }
@@ -1242,10 +1231,10 @@ static void fc_exch_recv_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
 	sp = fc_seq_lookup_orig(mp, fp);	/* doesn't hold sequence */
 	if (!sp) {
 		atomic_inc(&mp->stats.xid_not_found);
-		FC_DEBUG_EXCH("seq lookup failed\n");
+		FC_EM_DBG(mp, "seq lookup failed\n");
 	} else {
 		atomic_inc(&mp->stats.non_bls_resp);
-		FC_DEBUG_EXCH("non-BLS response to sequence");
+		FC_EM_DBG(mp, "non-BLS response to sequence");
 	}
 	fc_frame_free(fp);
 }
@@ -1266,8 +1255,8 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp)
 	int rc = 1, has_rec = 0;
 
 	fh = fc_frame_header_get(fp);
-	FC_DEBUG_EXCH("exch: BLS rctl %x - %s\n",
-		      fh->fh_r_ctl, fc_exch_rctl_name(fh->fh_r_ctl));
+	FC_EXCH_DBG(ep, "exch: BLS rctl %x - %s\n", fh->fh_r_ctl,
+		    fc_exch_rctl_name(fh->fh_r_ctl));
 
 	if (cancel_delayed_work_sync(&ep->timeout_work))
 		fc_exch_release(ep);	/* release from pending timer hold */
@@ -1359,9 +1348,9 @@ static void fc_exch_recv_bls(struct fc_exch_mgr *mp, struct fc_frame *fp)
 		case FC_RCTL_ACK_0:
 			break;
 		default:
-			FC_DEBUG_EXCH("BLS rctl %x - %s received",
-				      fh->fh_r_ctl,
-				      fc_exch_rctl_name(fh->fh_r_ctl));
+			FC_EXCH_DBG(ep, "BLS rctl %x - %s received",
+				    fh->fh_r_ctl,
+				    fc_exch_rctl_name(fh->fh_r_ctl));
 			break;
 		}
 		fc_frame_free(fp);
@@ -1599,7 +1588,8 @@ static void fc_exch_rrq_resp(struct fc_seq *sp, struct fc_frame *fp, void *arg)
 
 		if (err == -FC_EX_CLOSED || err == -FC_EX_TIMEOUT)
 			goto cleanup;
-		FC_DBG("Cannot process RRQ, because of frame error %d\n", err);
+		FC_EXCH_DBG(aborted_ep, "Cannot process RRQ, "
+			    "frame error %d\n", err);
 		return;
 	}
 
@@ -1608,12 +1598,13 @@ static void fc_exch_rrq_resp(struct fc_seq *sp, struct fc_frame *fp, void *arg)
 
 	switch (op) {
 	case ELS_LS_RJT:
-		FC_DBG("LS_RJT for RRQ");
+		FC_EXCH_DBG(aborted_ep, "LS_RJT for RRQ");
 		/* fall through */
 	case ELS_LS_ACC:
 		goto cleanup;
 	default:
-		FC_DBG("unexpected response op %x for RRQ", op);
+		FC_EXCH_DBG(aborted_ep, "unexpected response op %x "
+			    "for RRQ", op);
 		return;
 	}
 
@@ -1740,8 +1731,8 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	size_t len;
 
 	if (max_xid <= min_xid || min_xid == 0 || max_xid == FC_XID_UNKNOWN) {
-		FC_DBG("Invalid min_xid 0x:%x and max_xid 0x:%x\n",
-		       min_xid, max_xid);
+		FC_LPORT_DBG(lp, "Invalid min_xid 0x:%x and max_xid 0x:%x\n",
+			     min_xid, max_xid);
 		return NULL;
 	}
 
@@ -1878,7 +1869,8 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
 
 	/* lport lock ? */
 	if (!lp || !mp || (lp->state == LPORT_ST_NONE)) {
-		FC_DBG("fc_lport or EM is not allocated and configured");
+		FC_LPORT_DBG(lp, "Receiving frames for an lport that "
+			     "has not been initialized correctly\n");
 		fc_frame_free(fp);
 		return;
 	}
@@ -1904,7 +1896,7 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
 			fc_exch_recv_req(lp, mp, fp);
 		break;
 	default:
-		FC_DBG("dropping invalid frame (eof %x)", fr_eof(fp));
+		FC_EM_DBG(mp, "dropping invalid frame (eof %x)", fr_eof(fp));
 		fc_frame_free(fp);
 		break;
 	}
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index ad8b747837b..e303e0d12c4 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -43,13 +43,9 @@ MODULE_AUTHOR("Open-FCoE.org");
 MODULE_DESCRIPTION("libfc");
 MODULE_LICENSE("GPL v2");
 
-static int fc_fcp_debug;
-
-#define FC_DEBUG_FCP(fmt...)			\
-	do {					\
-		if (fc_fcp_debug)		\
-			FC_DBG(fmt);		\
-	} while (0)
+unsigned int fc_debug_logging;
+module_param_named(debug_logging, fc_debug_logging, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(debug_logging, "a bit mask of logging levels");
 
 static struct kmem_cache *scsi_pkt_cachep;
 
@@ -347,8 +343,8 @@ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
 		if ((fr_flags(fp) & FCPHF_CRC_UNCHECKED) &&
 		    fc_frame_crc_check(fp))
 			goto crc_err;
-		FC_DEBUG_FCP("data received past end. len %zx offset %zx "
-			     "data_len %x\n", len, offset, fsp->data_len);
+		FC_FCP_DBG(fsp, "data received past end. len %zx offset %zx "
+			   "data_len %x\n", len, offset, fsp->data_len);
 		fc_fcp_retry_cmd(fsp);
 		return;
 	}
@@ -411,7 +407,8 @@ crc_err:
 			stats->ErrorFrames++;
 			/* FIXME - per cpu count, not total count! */
 			if (stats->InvalidCRCCount++ < 5)
-				printk(KERN_WARNING "CRC error on data frame for port (%6x)\n",
+				printk(KERN_WARNING "libfc: CRC error on data "
+				       "frame for port (%6x)\n",
 				       fc_host_port_id(lp->host));
 			/*
 			 * Assume the frame is total garbage.
@@ -475,14 +472,14 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq,
 	WARN_ON(seq_blen <= 0);
 	if (unlikely(offset + seq_blen > fsp->data_len)) {
 		/* this should never happen */
-		FC_DEBUG_FCP("xfer-ready past end. seq_blen %zx offset %zx\n",
-			     seq_blen, offset);
+		FC_FCP_DBG(fsp, "xfer-ready past end. seq_blen %zx "
+			   "offset %zx\n", seq_blen, offset);
 		fc_fcp_send_abort(fsp);
 		return 0;
 	} else if (offset != fsp->xfer_len) {
 		/* Out of Order Data Request - no problem, but unexpected. */
-		FC_DEBUG_FCP("xfer-ready non-contiguous. "
-			     "seq_blen %zx offset %zx\n", seq_blen, offset);
+		FC_FCP_DBG(fsp, "xfer-ready non-contiguous. "
+			   "seq_blen %zx offset %zx\n", seq_blen, offset);
 	}
 
 	/*
@@ -493,7 +490,7 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq,
 	t_blen = fsp->max_payload;
 	if (lp->seq_offload) {
 		t_blen = min(seq_blen, (size_t)lp->lso_max);
-		FC_DEBUG_FCP("fsp=%p:lso:blen=%zx lso_max=0x%x t_blen=%zx\n",
+		FC_FCP_DBG(fsp, "fsp=%p:lso:blen=%zx lso_max=0x%x t_blen=%zx\n",
 			   fsp, seq_blen, lp->lso_max, t_blen);
 	}
 
@@ -694,7 +691,7 @@ static void fc_fcp_reduce_can_queue(struct fc_lport *lp)
 	if (!can_queue)
 		can_queue = 1;
 	lp->host->can_queue = can_queue;
-	shost_printk(KERN_ERR, lp->host, "Could not allocate frame.\n"
+	shost_printk(KERN_ERR, lp->host, "libfc: Could not allocate frame.\n"
 		     "Reducing can_queue to %d.\n", can_queue);
 done:
 	spin_unlock_irqrestore(lp->host->host_lock, flags);
@@ -768,7 +765,7 @@ static void fc_fcp_recv(struct fc_seq *seq, struct fc_frame *fp, void *arg)
 
 		fc_fcp_resp(fsp, fp);
 	} else {
-		FC_DBG("unexpected frame.  r_ctl %x\n", r_ctl);
+		FC_FCP_DBG(fsp, "unexpected frame.  r_ctl %x\n", r_ctl);
 	}
 unlock:
 	fc_fcp_unlock_pkt(fsp);
@@ -877,17 +874,17 @@ static void fc_fcp_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
 			return;
 		}
 		fsp->status_code = FC_DATA_OVRRUN;
-		FC_DBG("tgt %6x xfer len %zx greater than expected len %x. "
-		       "data len %x\n",
-		       fsp->rport->port_id,
-		       fsp->xfer_len, expected_len, fsp->data_len);
+		FC_FCP_DBG(fsp, "tgt %6x xfer len %zx greater than expected, "
+			   "len %x, data len %x\n",
+			   fsp->rport->port_id,
+			   fsp->xfer_len, expected_len, fsp->data_len);
 	}
 	fc_fcp_complete_locked(fsp);
 	return;
 
 len_err:
-	FC_DBG("short FCP response. flags 0x%x len %u respl %u snsl %u\n",
-	       flags, fr_len(fp), respl, snsl);
+	FC_FCP_DBG(fsp, "short FCP response. flags 0x%x len %u respl %u "
+		   "snsl %u\n", flags, fr_len(fp), respl, snsl);
 err:
 	fsp->status_code = FC_ERROR;
 	fc_fcp_complete_locked(fsp);
@@ -1107,13 +1104,11 @@ static void fc_fcp_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
 	if (fc_fcp_lock_pkt(fsp))
 		return;
 
-	switch (error) {
-	case -FC_EX_CLOSED:
+	if (error == -FC_EX_CLOSED) {
 		fc_fcp_retry_cmd(fsp);
 		goto unlock;
-	default:
-		FC_DBG("unknown error %ld\n", PTR_ERR(fp));
 	}
+
 	/*
 	 * clear abort pending, because the lower layer
 	 * decided to force completion.
@@ -1145,10 +1140,10 @@ static int fc_fcp_pkt_abort(struct fc_lport *lp, struct fc_fcp_pkt *fsp)
 	fsp->wait_for_comp = 0;
 
 	if (!rc) {
-		FC_DBG("target abort cmd  failed\n");
+		FC_FCP_DBG(fsp, "target abort cmd  failed\n");
 		rc = FAILED;
 	} else if (fsp->state & FC_SRB_ABORTED) {
-		FC_DBG("target abort cmd  passed\n");
+		FC_FCP_DBG(fsp, "target abort cmd  passed\n");
 		rc = SUCCESS;
 		fc_fcp_complete_locked(fsp);
 	}
@@ -1213,7 +1208,7 @@ static int fc_lun_reset(struct fc_lport *lp, struct fc_fcp_pkt *fsp,
 	spin_unlock_bh(&fsp->scsi_pkt_lock);
 
 	if (!rc) {
-		FC_DBG("lun reset failed\n");
+		FC_SCSI_DBG(lp, "lun reset failed\n");
 		return FAILED;
 	}
 
@@ -1221,7 +1216,7 @@ static int fc_lun_reset(struct fc_lport *lp, struct fc_fcp_pkt *fsp,
 	if (fsp->cdb_status != FCP_TMF_CMPL)
 		return FAILED;
 
-	FC_DBG("lun reset to lun %u completed\n", lun);
+	FC_SCSI_DBG(lp, "lun reset to lun %u completed\n", lun);
 	fc_fcp_cleanup_each_cmd(lp, id, lun, FC_CMD_ABORTED);
 	return SUCCESS;
 }
@@ -1388,13 +1383,13 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
 		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
 		switch (rjt->er_reason) {
 		default:
-			FC_DEBUG_FCP("device %x unexpected REC reject "
-				     "reason %d expl %d\n",
-				     fsp->rport->port_id, rjt->er_reason,
-				     rjt->er_explan);
+			FC_FCP_DBG(fsp, "device %x unexpected REC reject "
+				   "reason %d expl %d\n",
+				   fsp->rport->port_id, rjt->er_reason,
+				   rjt->er_explan);
 			/* fall through */
 		case ELS_RJT_UNSUP:
-			FC_DEBUG_FCP("device does not support REC\n");
+			FC_FCP_DBG(fsp, "device does not support REC\n");
 			rp = fsp->rport->dd_data;
 			/*
 			 * if we do not spport RECs or got some bogus
@@ -1514,8 +1509,8 @@ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
 		break;
 
 	default:
-		FC_DBG("REC %p fid %x error unexpected error %d\n",
-		       fsp, fsp->rport->port_id, error);
+		FC_FCP_DBG(fsp, "REC %p fid %x error unexpected error %d\n",
+			   fsp, fsp->rport->port_id, error);
 		fsp->status_code = FC_CMD_PLOGO;
 		/* fall through */
 
@@ -1524,9 +1519,9 @@ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
 		 * Assume REC or LS_ACC was lost.
 		 * The exchange manager will have aborted REC, so retry.
 		 */
-		FC_DBG("REC fid %x error error %d retry %d/%d\n",
-		       fsp->rport->port_id, error, fsp->recov_retry,
-		       FC_MAX_RECOV_RETRY);
+		FC_FCP_DBG(fsp, "REC fid %x error error %d retry %d/%d\n",
+			   fsp->rport->port_id, error, fsp->recov_retry,
+			   FC_MAX_RECOV_RETRY);
 		if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY)
 			fc_fcp_rec(fsp);
 		else
@@ -2011,9 +2006,11 @@ int fc_eh_device_reset(struct scsi_cmnd *sc_cmd)
 	if (lp->state != LPORT_ST_READY)
 		return rc;
 
+	FC_SCSI_DBG(lp, "Resetting rport (%6x)\n", rport->port_id);
+
 	fsp = fc_fcp_pkt_alloc(lp, GFP_NOIO);
 	if (fsp == NULL) {
-		FC_DBG("could not allocate scsi_pkt\n");
+		printk(KERN_WARNING "libfc: could not allocate scsi_pkt\n");
 		sc_cmd->result = DID_NO_CONNECT << 16;
 		goto out;
 	}
@@ -2048,17 +2045,21 @@ int fc_eh_host_reset(struct scsi_cmnd *sc_cmd)
 	struct fc_lport *lp = shost_priv(shost);
 	unsigned long wait_tmo;
 
+	FC_SCSI_DBG(lp, "Resetting host\n");
+
 	lp->tt.lport_reset(lp);
 	wait_tmo = jiffies + FC_HOST_RESET_TIMEOUT;
 	while (!fc_fcp_lport_queue_ready(lp) && time_before(jiffies, wait_tmo))
 		msleep(1000);
 
 	if (fc_fcp_lport_queue_ready(lp)) {
-		shost_printk(KERN_INFO, shost, "Host reset succeeded.\n");
+		shost_printk(KERN_INFO, shost, "libfc: Host reset succeeded "
+			     "on port (%6x)\n", fc_host_port_id(lp->host));
 		return SUCCESS;
 	} else {
-		shost_printk(KERN_INFO, shost, "Host reset failed. "
-			     "lport not ready.\n");
+		shost_printk(KERN_INFO, shost, "libfc: Host reset failed, "
+			     "port (%6x) is not ready.\n",
+			     fc_host_port_id(lp->host));
 		return FAILED;
 	}
 }
@@ -2117,7 +2118,8 @@ void fc_fcp_destroy(struct fc_lport *lp)
 	struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
 
 	if (!list_empty(&si->scsi_pkt_queue))
-		printk(KERN_ERR "Leaked scsi packets.\n");
+		printk(KERN_ERR "libfc: Leaked SCSI packets when destroying "
+		       "port (%6x)\n", fc_host_port_id(lp->host));
 
 	mempool_destroy(si->scsi_pkt_pool);
 	kfree(si);
@@ -2166,7 +2168,8 @@ static int __init libfc_init(void)
 					    sizeof(struct fc_fcp_pkt),
 					    0, SLAB_HWCACHE_ALIGN, NULL);
 	if (scsi_pkt_cachep == NULL) {
-		FC_DBG("Unable to allocate SRB cache...module load failed!");
+		printk(KERN_ERR "libfc: Unable to allocate SRB cache, "
+		       "module load failed!");
 		return -ENOMEM;
 	}
 
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index e0c247724d2..745fa5555d6 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -101,14 +101,6 @@
 
 #define	DNS_DELAY	      3 /* Discovery delay after RSCN (in seconds)*/
 
-static int fc_lport_debug;
-
-#define FC_DEBUG_LPORT(fmt...)			\
-	do {					\
-		if (fc_lport_debug)		\
-			FC_DBG(fmt);		\
-	} while (0)
-
 static void fc_lport_error(struct fc_lport *, struct fc_frame *);
 
 static void fc_lport_enter_reset(struct fc_lport *);
@@ -151,8 +143,8 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 				    struct fc_rport *rport,
 				    enum fc_rport_event event)
 {
-	FC_DEBUG_LPORT("Received a %d event for port (%6x)\n", event,
-		       rport->port_id);
+	FC_LPORT_DBG(lport, "Received a %d event for port (%6x)\n", event,
+		     rport->port_id);
 
 	switch (event) {
 	case RPORT_EV_CREATED:
@@ -162,19 +154,19 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 				lport->dns_rp = rport;
 				fc_lport_enter_rpn_id(lport);
 			} else {
-				FC_DEBUG_LPORT("Received an CREATED event on "
-					       "port (%6x) for the directory "
-					       "server, but the lport is not "
-					       "in the DNS state, it's in the "
-					       "%d state", rport->port_id,
-					       lport->state);
+				FC_LPORT_DBG(lport, "Received an CREATED event "
+					     "on port (%6x) for the directory "
+					     "server, but the lport is not "
+					     "in the DNS state, it's in the "
+					     "%d state", rport->port_id,
+					     lport->state);
 				lport->tt.rport_logoff(rport);
 			}
 			mutex_unlock(&lport->lp_mutex);
 		} else
-			FC_DEBUG_LPORT("Received an event for port (%6x) "
-				       "which is not the directory server\n",
-				       rport->port_id);
+			FC_LPORT_DBG(lport, "Received an event for port (%6x) "
+				     "which is not the directory server\n",
+				     rport->port_id);
 		break;
 	case RPORT_EV_LOGO:
 	case RPORT_EV_FAILED:
@@ -185,9 +177,9 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 			mutex_unlock(&lport->lp_mutex);
 
 		} else
-			FC_DEBUG_LPORT("Received an event for port (%6x) "
-				       "which is not the directory server\n",
-				       rport->port_id);
+			FC_LPORT_DBG(lport, "Received an event for port (%6x) "
+				     "which is not the directory server\n",
+				     rport->port_id);
 		break;
 	case RPORT_EV_NONE:
 		break;
@@ -363,8 +355,8 @@ static void fc_lport_add_fc4_type(struct fc_lport *lport, enum fc_fh_type type)
 static void fc_lport_recv_rlir_req(struct fc_seq *sp, struct fc_frame *fp,
 				   struct fc_lport *lport)
 {
-	FC_DEBUG_LPORT("Received RLIR request while in state %s\n",
-		       fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Received RLIR request while in state %s\n",
+		     fc_lport_state(lport));
 
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
 	fc_frame_free(fp);
@@ -389,8 +381,8 @@ static void fc_lport_recv_echo_req(struct fc_seq *sp, struct fc_frame *in_fp,
 	void *dp;
 	u32 f_ctl;
 
-	FC_DEBUG_LPORT("Received RLIR request while in state %s\n",
-		       fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Received RLIR request while in state %s\n",
+		     fc_lport_state(lport));
 
 	len = fr_len(in_fp) - sizeof(struct fc_frame_header);
 	pp = fc_frame_payload_get(in_fp, len);
@@ -437,8 +429,8 @@ static void fc_lport_recv_rnid_req(struct fc_seq *sp, struct fc_frame *in_fp,
 	size_t len;
 	u32 f_ctl;
 
-	FC_DEBUG_LPORT("Received RNID request while in state %s\n",
-		       fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Received RNID request while in state %s\n",
+		     fc_lport_state(lport));
 
 	req = fc_frame_payload_get(in_fp, sizeof(*req));
 	if (!req) {
@@ -498,8 +490,8 @@ static void fc_lport_recv_adisc_req(struct fc_seq *sp, struct fc_frame *in_fp,
 	size_t len;
 	u32 f_ctl;
 
-	FC_DEBUG_LPORT("Received ADISC request while in state %s\n",
-		       fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Received ADISC request while in state %s\n",
+		     fc_lport_state(lport));
 
 	req = fc_frame_payload_get(in_fp, sizeof(*req));
 	if (!req) {
@@ -574,8 +566,8 @@ EXPORT_SYMBOL(fc_fabric_login);
  */
 void fc_linkup(struct fc_lport *lport)
 {
-	FC_DEBUG_LPORT("Link is up for port (%6x)\n",
-		       fc_host_port_id(lport->host));
+	printk(KERN_INFO "libfc: Link up on port (%6x)\n",
+	       fc_host_port_id(lport->host));
 
 	mutex_lock(&lport->lp_mutex);
 	if (!lport->link_up) {
@@ -595,8 +587,8 @@ EXPORT_SYMBOL(fc_linkup);
 void fc_linkdown(struct fc_lport *lport)
 {
 	mutex_lock(&lport->lp_mutex);
-	FC_DEBUG_LPORT("Link is down for port (%6x)\n",
-		       fc_host_port_id(lport->host));
+	printk(KERN_INFO "libfc: Link down on port (%6x)\n",
+	       fc_host_port_id(lport->host));
 
 	if (lport->link_up) {
 		lport->link_up = 0;
@@ -701,12 +693,11 @@ void fc_lport_disc_callback(struct fc_lport *lport, enum fc_disc_event event)
 {
 	switch (event) {
 	case DISC_EV_SUCCESS:
-		FC_DEBUG_LPORT("Got a SUCCESS event for port (%6x)\n",
-			       fc_host_port_id(lport->host));
+		FC_LPORT_DBG(lport, "Discovery succeeded\n");
 		break;
 	case DISC_EV_FAILED:
-		FC_DEBUG_LPORT("Got a FAILED event for port (%6x)\n",
-			       fc_host_port_id(lport->host));
+		printk(KERN_ERR "libfc: Discovery failed for port (%6x)\n",
+		       fc_host_port_id(lport->host));
 		mutex_lock(&lport->lp_mutex);
 		fc_lport_enter_reset(lport);
 		mutex_unlock(&lport->lp_mutex);
@@ -726,8 +717,8 @@ void fc_lport_disc_callback(struct fc_lport *lport, enum fc_disc_event event)
  */
 static void fc_lport_enter_ready(struct fc_lport *lport)
 {
-	FC_DEBUG_LPORT("Port (%6x) entered Ready from state %s\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered READY from state %s\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_READY);
 
@@ -762,8 +753,8 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 	u32 local_fid;
 	u32 f_ctl;
 
-	FC_DEBUG_LPORT("Received FLOGI request while in state %s\n",
-		       fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Received FLOGI request while in state %s\n",
+		     fc_lport_state(lport));
 
 	fh = fc_frame_header_get(rx_fp);
 	remote_fid = ntoh24(fh->fh_s_id);
@@ -772,12 +763,11 @@ static void fc_lport_recv_flogi_req(struct fc_seq *sp_in,
 		goto out;
 	remote_wwpn = get_unaligned_be64(&flp->fl_wwpn);
 	if (remote_wwpn == lport->wwpn) {
-		FC_DBG("FLOGI from port with same WWPN %llx "
-		       "possible configuration error\n",
-		       (unsigned long long)remote_wwpn);
+		printk(KERN_WARNING "libfc: Received FLOGI from port "
+		       "with same WWPN %llx\n", remote_wwpn);
 		goto out;
 	}
-	FC_DBG("FLOGI from port WWPN %llx\n", (unsigned long long)remote_wwpn);
+	FC_LPORT_DBG(lport, "FLOGI from port WWPN %llx\n", remote_wwpn);
 
 	/*
 	 * XXX what is the right thing to do for FIDs?
@@ -909,7 +899,8 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 			}
 		}
 	} else {
-		FC_DBG("dropping invalid frame (eof %x)\n", fr_eof(fp));
+		FC_LPORT_DBG(lport, "dropping invalid frame (eof %x)\n",
+			     fr_eof(fp));
 		fc_frame_free(fp);
 	}
 	mutex_unlock(&lport->lp_mutex);
@@ -947,8 +938,8 @@ EXPORT_SYMBOL(fc_lport_reset);
  */
 static void fc_lport_enter_reset(struct fc_lport *lport)
 {
-	FC_DEBUG_LPORT("Port (%6x) entered RESET state from %s state\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered RESET state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_RESET);
 
@@ -982,9 +973,9 @@ static void fc_lport_enter_reset(struct fc_lport *lport)
 static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp)
 {
 	unsigned long delay = 0;
-	FC_DEBUG_LPORT("Error %ld in state %s, retries %d\n",
-		       PTR_ERR(fp), fc_lport_state(lport),
-		       lport->retry_count);
+	FC_LPORT_DBG(lport, "Error %ld in state %s, retries %d\n",
+		     PTR_ERR(fp), fc_lport_state(lport),
+		     lport->retry_count);
 
 	if (!fp || PTR_ERR(fp) == -FC_EX_TIMEOUT) {
 		/*
@@ -1040,11 +1031,11 @@ static void fc_lport_rft_id_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_DEBUG_LPORT("Received a RFT_ID response\n");
+	FC_LPORT_DBG(lport, "Received a RFT_ID response\n");
 
 	if (lport->state != LPORT_ST_RFT_ID) {
-		FC_DBG("Received a RFT_ID response, but in state %s\n",
-		       fc_lport_state(lport));
+		FC_LPORT_DBG(lport, "Received a RFT_ID response, but in state "
+			     "%s\n", fc_lport_state(lport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -1094,11 +1085,11 @@ static void fc_lport_rpn_id_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_DEBUG_LPORT("Received a RPN_ID response\n");
+	FC_LPORT_DBG(lport, "Received a RPN_ID response\n");
 
 	if (lport->state != LPORT_ST_RPN_ID) {
-		FC_DBG("Received a RPN_ID response, but in state %s\n",
-		       fc_lport_state(lport));
+		FC_LPORT_DBG(lport, "Received a RPN_ID response, but in state "
+			     "%s\n", fc_lport_state(lport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -1146,11 +1137,11 @@ static void fc_lport_scr_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_DEBUG_LPORT("Received a SCR response\n");
+	FC_LPORT_DBG(lport, "Received a SCR response\n");
 
 	if (lport->state != LPORT_ST_SCR) {
-		FC_DBG("Received a SCR response, but in state %s\n",
-		       fc_lport_state(lport));
+		FC_LPORT_DBG(lport, "Received a SCR response, but in state "
+			     "%s\n", fc_lport_state(lport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -1184,8 +1175,8 @@ static void fc_lport_enter_scr(struct fc_lport *lport)
 {
 	struct fc_frame *fp;
 
-	FC_DEBUG_LPORT("Port (%6x) entered SCR state from %s state\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered SCR state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_SCR);
 
@@ -1213,8 +1204,8 @@ static void fc_lport_enter_rft_id(struct fc_lport *lport)
 	struct fc_ns_fts *lps;
 	int i;
 
-	FC_DEBUG_LPORT("Port (%6x) entered RFT_ID state from %s state\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered RFT_ID state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_RFT_ID);
 
@@ -1253,8 +1244,8 @@ static void fc_lport_enter_rpn_id(struct fc_lport *lport)
 {
 	struct fc_frame *fp;
 
-	FC_DEBUG_LPORT("Port (%6x) entered RPN_ID state from %s state\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered RPN_ID state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_RPN_ID);
 
@@ -1294,8 +1285,8 @@ static void fc_lport_enter_dns(struct fc_lport *lport)
 	dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
 	dp.lp = lport;
 
-	FC_DEBUG_LPORT("Port (%6x) entered DNS state from %s state\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered DNS state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_DNS);
 
@@ -1374,11 +1365,11 @@ static void fc_lport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_DEBUG_LPORT("Received a LOGO response\n");
+	FC_LPORT_DBG(lport, "Received a LOGO response\n");
 
 	if (lport->state != LPORT_ST_LOGO) {
-		FC_DBG("Received a LOGO response, but in state %s\n",
-		       fc_lport_state(lport));
+		FC_LPORT_DBG(lport, "Received a LOGO response, but in state "
+			     "%s\n", fc_lport_state(lport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -1413,8 +1404,8 @@ static void fc_lport_enter_logo(struct fc_lport *lport)
 	struct fc_frame *fp;
 	struct fc_els_logo *logo;
 
-	FC_DEBUG_LPORT("Port (%6x) entered LOGO state from %s state\n",
-		       fc_host_port_id(lport->host), fc_lport_state(lport));
+	FC_LPORT_DBG(lport, "Entered LOGO state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_LOGO);
 
@@ -1456,11 +1447,11 @@ static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_DEBUG_LPORT("Received a FLOGI response\n");
+	FC_LPORT_DBG(lport, "Received a FLOGI response\n");
 
 	if (lport->state != LPORT_ST_FLOGI) {
-		FC_DBG("Received a FLOGI response, but in state %s\n",
-		       fc_lport_state(lport));
+		FC_LPORT_DBG(lport, "Received a FLOGI response, but in state "
+			     "%s\n", fc_lport_state(lport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -1475,7 +1466,8 @@ static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 	did = ntoh24(fh->fh_d_id);
 	if (fc_frame_payload_op(fp) == ELS_LS_ACC && did != 0) {
 
-		FC_DEBUG_LPORT("Assigned fid %x\n", did);
+		printk(KERN_INFO "libfc: Assigned FID (%6x) in FLOGI response\n",
+		       did);
 		fc_host_port_id(lport->host) = did;
 
 		flp = fc_frame_payload_get(fp, sizeof(*flp));
@@ -1494,7 +1486,8 @@ static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 				if (e_d_tov > lport->e_d_tov)
 					lport->e_d_tov = e_d_tov;
 				lport->r_a_tov = 2 * e_d_tov;
-				FC_DBG("Point-to-Point mode\n");
+				printk(KERN_INFO "libfc: Port (%6x) entered "
+				       "point to point mode\n", did);
 				fc_lport_ptp_setup(lport, ntoh24(fh->fh_s_id),
 						   get_unaligned_be64(
 							   &flp->fl_wwpn),
@@ -1517,7 +1510,7 @@ static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 			}
 		}
 	} else {
-		FC_DBG("bad FLOGI response\n");
+		FC_LPORT_DBG(lport, "Bad FLOGI response\n");
 	}
 
 out:
@@ -1537,7 +1530,8 @@ void fc_lport_enter_flogi(struct fc_lport *lport)
 {
 	struct fc_frame *fp;
 
-	FC_DEBUG_LPORT("Processing FLOGI state\n");
+	FC_LPORT_DBG(lport, "Entered FLOGI state from %s state\n",
+		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_FLOGI);
 
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 7bfbff7e0ef..7162385f52e 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -55,14 +55,6 @@
 #include <scsi/libfc.h>
 #include <scsi/fc_encode.h>
 
-static int fc_rport_debug;
-
-#define FC_DEBUG_RPORT(fmt...)			\
-	do {					\
-		if (fc_rport_debug)		\
-			FC_DBG(fmt);		\
-	} while (0)
-
 struct workqueue_struct *rport_event_queue;
 
 static void fc_rport_enter_plogi(struct fc_rport *);
@@ -97,7 +89,7 @@ static const char *fc_rport_state_names[] = {
 static void fc_rport_rogue_destroy(struct device *dev)
 {
 	struct fc_rport *rport = dev_to_rport(dev);
-	FC_DEBUG_RPORT("Destroying rogue rport (%6x)\n", rport->port_id);
+	FC_RPORT_DBG(rport, "Destroying rogue rport\n");
 	kfree(rport);
 }
 
@@ -263,8 +255,8 @@ static void fc_rport_work(struct work_struct *work)
 
 			fc_rport_state_enter(new_rport, RPORT_ST_READY);
 		} else {
-			FC_DBG("Failed to create the rport for port "
-			       "(%6x).\n", ids.port_id);
+			printk(KERN_WARNING "libfc: Failed to allocate "
+			       " memory for rport (%6x)\n", ids.port_id);
 			event = RPORT_EV_FAILED;
 		}
 		if (rport->port_id != FC_FID_DIR_SERV)
@@ -309,7 +301,7 @@ int fc_rport_login(struct fc_rport *rport)
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_DEBUG_RPORT("Login to port (%6x)\n", rport->port_id);
+	FC_RPORT_DBG(rport, "Login to port\n");
 
 	fc_rport_enter_plogi(rport);
 
@@ -329,16 +321,13 @@ int fc_rport_login(struct fc_rport *rport)
 int fc_rport_logoff(struct fc_rport *rport)
 {
 	struct fc_rport_libfc_priv *rdata = rport->dd_data;
-	struct fc_lport *lport = rdata->local_port;
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_DEBUG_RPORT("Remove port (%6x)\n", rport->port_id);
+	FC_RPORT_DBG(rport, "Remove port\n");
 
 	if (rdata->rp_state == RPORT_ST_NONE) {
-		FC_DEBUG_RPORT("(%6x): Port (%6x) in NONE state,"
-			       " not removing", fc_host_port_id(lport->host),
-			       rport->port_id);
+		FC_RPORT_DBG(rport, "Port in NONE state, not removing\n");
 		mutex_unlock(&rdata->rp_mutex);
 		goto out;
 	}
@@ -379,7 +368,7 @@ static void fc_rport_enter_ready(struct fc_rport *rport)
 
 	fc_rport_state_enter(rport, RPORT_ST_READY);
 
-	FC_DEBUG_RPORT("Port (%6x) is Ready\n", rport->port_id);
+	FC_RPORT_DBG(rport, "Port is Ready\n");
 
 	rdata->event = RPORT_EV_CREATED;
 	queue_work(rport_event_queue, &rdata->event_work);
@@ -436,8 +425,8 @@ static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
 {
 	struct fc_rport_libfc_priv *rdata = rport->dd_data;
 
-	FC_DEBUG_RPORT("Error %ld in state %s, retries %d\n",
-		       PTR_ERR(fp), fc_rport_state(rport), rdata->retries);
+	FC_RPORT_DBG(rport, "Error %ld in state %s, retries %d\n",
+		     PTR_ERR(fp), fc_rport_state(rport), rdata->retries);
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PLOGI:
@@ -479,8 +468,8 @@ static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp)
 		return fc_rport_error(rport, fp);
 
 	if (rdata->retries < rdata->local_port->max_rport_retry_count) {
-		FC_DEBUG_RPORT("Error %ld in state %s, retrying\n",
-			       PTR_ERR(fp), fc_rport_state(rport));
+		FC_RPORT_DBG(rport, "Error %ld in state %s, retrying\n",
+			     PTR_ERR(fp), fc_rport_state(rport));
 		rdata->retries++;
 		/* no additional delay on exchange timeouts */
 		if (PTR_ERR(fp) == -FC_EX_TIMEOUT)
@@ -517,12 +506,11 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_DEBUG_RPORT("Received a PLOGI response from port (%6x)\n",
-		       rport->port_id);
+	FC_RPORT_DBG(rport, "Received a PLOGI response\n");
 
 	if (rdata->rp_state != RPORT_ST_PLOGI) {
-		FC_DBG("Received a PLOGI response, but in state %s\n",
-		       fc_rport_state(rport));
+		FC_RPORT_DBG(rport, "Received a PLOGI response, but in state "
+			     "%s\n", fc_rport_state(rport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -583,8 +571,8 @@ static void fc_rport_enter_plogi(struct fc_rport *rport)
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
 
-	FC_DEBUG_RPORT("Port (%6x) entered PLOGI state from %s state\n",
-		       rport->port_id, fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Port entered PLOGI state from %s state\n",
+		     fc_rport_state(rport));
 
 	fc_rport_state_enter(rport, RPORT_ST_PLOGI);
 
@@ -628,12 +616,11 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_DEBUG_RPORT("Received a PRLI response from port (%6x)\n",
-		       rport->port_id);
+	FC_RPORT_DBG(rport, "Received a PRLI response\n");
 
 	if (rdata->rp_state != RPORT_ST_PRLI) {
-		FC_DBG("Received a PRLI response, but in state %s\n",
-		       fc_rport_state(rport));
+		FC_RPORT_DBG(rport, "Received a PRLI response, but in state "
+			     "%s\n", fc_rport_state(rport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -663,7 +650,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 		fc_rport_enter_rtv(rport);
 
 	} else {
-		FC_DBG("Bad ELS response\n");
+		FC_RPORT_DBG(rport, "Bad ELS response for PRLI command\n");
 		rdata->event = RPORT_EV_FAILED;
 		fc_rport_state_enter(rport, RPORT_ST_NONE);
 		queue_work(rport_event_queue, &rdata->event_work);
@@ -695,12 +682,11 @@ static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_DEBUG_RPORT("Received a LOGO response from port (%6x)\n",
-		       rport->port_id);
+	FC_RPORT_DBG(rport, "Received a LOGO response\n");
 
 	if (rdata->rp_state != RPORT_ST_LOGO) {
-		FC_DEBUG_RPORT("Received a LOGO response, but in state %s\n",
-			       fc_rport_state(rport));
+		FC_RPORT_DBG(rport, "Received a LOGO response, but in state "
+			     "%s\n", fc_rport_state(rport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -715,7 +701,7 @@ static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 	if (op == ELS_LS_ACC) {
 		fc_rport_enter_rtv(rport);
 	} else {
-		FC_DBG("Bad ELS response\n");
+		FC_RPORT_DBG(rport, "Bad ELS response for LOGO command\n");
 		rdata->event = RPORT_EV_LOGO;
 		fc_rport_state_enter(rport, RPORT_ST_NONE);
 		queue_work(rport_event_queue, &rdata->event_work);
@@ -745,8 +731,8 @@ static void fc_rport_enter_prli(struct fc_rport *rport)
 	} *pp;
 	struct fc_frame *fp;
 
-	FC_DEBUG_RPORT("Port (%6x) entered PRLI state from %s state\n",
-		       rport->port_id, fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Port entered PRLI state from %s state\n",
+		     fc_rport_state(rport));
 
 	fc_rport_state_enter(rport, RPORT_ST_PRLI);
 
@@ -784,12 +770,11 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_DEBUG_RPORT("Received a RTV response from port (%6x)\n",
-		       rport->port_id);
+	FC_RPORT_DBG(rport, "Received a RTV response\n");
 
 	if (rdata->rp_state != RPORT_ST_RTV) {
-		FC_DBG("Received a RTV response, but in state %s\n",
-		       fc_rport_state(rport));
+		FC_RPORT_DBG(rport, "Received a RTV response, but in state "
+			     "%s\n", fc_rport_state(rport));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
@@ -844,8 +829,8 @@ static void fc_rport_enter_rtv(struct fc_rport *rport)
 	struct fc_rport_libfc_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
-	FC_DEBUG_RPORT("Port (%6x) entered RTV state from %s state\n",
-		       rport->port_id, fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Port entered RTV state from %s state\n",
+		     fc_rport_state(rport));
 
 	fc_rport_state_enter(rport, RPORT_ST_RTV);
 
@@ -875,8 +860,8 @@ static void fc_rport_enter_logo(struct fc_rport *rport)
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
 
-	FC_DEBUG_RPORT("Port (%6x) entered LOGO state from %s state\n",
-		       rport->port_id, fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Port entered LOGO state from %s state\n",
+		     fc_rport_state(rport));
 
 	fc_rport_state_enter(rport, RPORT_ST_LOGO);
 
@@ -983,14 +968,13 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 
 	fh = fc_frame_header_get(fp);
 
-	FC_DEBUG_RPORT("Received PLOGI request from port (%6x) "
-		       "while in state %s\n", ntoh24(fh->fh_s_id),
-		       fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Received PLOGI request while in state %s\n",
+		     fc_rport_state(rport));
 
 	sid = ntoh24(fh->fh_s_id);
 	pl = fc_frame_payload_get(fp, sizeof(*pl));
 	if (!pl) {
-		FC_DBG("incoming PLOGI from %x too short\n", sid);
+		FC_RPORT_DBG(rport, "Received PLOGI too short\n");
 		WARN_ON(1);
 		/* XXX TBD: send reject? */
 		fc_frame_free(fp);
@@ -1012,26 +996,26 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 	 */
 	switch (rdata->rp_state) {
 	case RPORT_ST_INIT:
-		FC_DEBUG_RPORT("incoming PLOGI from %6x wwpn %llx state INIT "
-			       "- reject\n", sid, (unsigned long long)wwpn);
+		FC_RPORT_DBG(rport, "Received PLOGI, wwpn %llx state INIT "
+			     "- reject\n", (unsigned long long)wwpn);
 		reject = ELS_RJT_UNSUP;
 		break;
 	case RPORT_ST_PLOGI:
-		FC_DEBUG_RPORT("incoming PLOGI from %x in PLOGI state %d\n",
-			       sid, rdata->rp_state);
+		FC_RPORT_DBG(rport, "Received PLOGI in PLOGI state %d\n",
+			     rdata->rp_state);
 		if (wwpn < lport->wwpn)
 			reject = ELS_RJT_INPROG;
 		break;
 	case RPORT_ST_PRLI:
 	case RPORT_ST_READY:
-		FC_DEBUG_RPORT("incoming PLOGI from %x in logged-in state %d "
-			       "- ignored for now\n", sid, rdata->rp_state);
+		FC_RPORT_DBG(rport, "Received PLOGI in logged-in state %d "
+			     "- ignored for now\n", rdata->rp_state);
 		/* XXX TBD - should reset */
 		break;
 	case RPORT_ST_NONE:
 	default:
-		FC_DEBUG_RPORT("incoming PLOGI from %x in unexpected "
-			       "state %d\n", sid, rdata->rp_state);
+		FC_RPORT_DBG(rport, "Received PLOGI in unexpected "
+			     "state %d\n", rdata->rp_state);
 		fc_frame_free(fp);
 		return;
 		break;
@@ -1115,9 +1099,8 @@ static void fc_rport_recv_prli_req(struct fc_rport *rport,
 
 	fh = fc_frame_header_get(rx_fp);
 
-	FC_DEBUG_RPORT("Received PRLI request from port (%6x) "
-		       "while in state %s\n", ntoh24(fh->fh_s_id),
-		       fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Received PRLI request while in state %s\n",
+		     fc_rport_state(rport));
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PRLI:
@@ -1252,9 +1235,8 @@ static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
 
 	fh = fc_frame_header_get(fp);
 
-	FC_DEBUG_RPORT("Received PRLO request from port (%6x) "
-		       "while in state %s\n", ntoh24(fh->fh_s_id),
-		       fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Received PRLO request while in state %s\n",
+		     fc_rport_state(rport));
 
 	if (rdata->rp_state == RPORT_ST_NONE) {
 		fc_frame_free(fp);
@@ -1286,9 +1268,8 @@ static void fc_rport_recv_logo_req(struct fc_rport *rport, struct fc_seq *sp,
 
 	fh = fc_frame_header_get(fp);
 
-	FC_DEBUG_RPORT("Received LOGO request from port (%6x) "
-		       "while in state %s\n", ntoh24(fh->fh_s_id),
-		       fc_rport_state(rport));
+	FC_RPORT_DBG(rport, "Received LOGO request while in state %s\n",
+		     fc_rport_state(rport));
 
 	if (rdata->rp_state == RPORT_ST_NONE) {
 		fc_frame_free(fp);
@@ -1308,7 +1289,6 @@ static void fc_rport_flush_queue(void)
 	flush_workqueue(rport_event_queue);
 }
 
-
 int fc_rport_init(struct fc_lport *lport)
 {
 	if (!lport->tt.rport_create)
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
index 6300f556bce..a0ff61c3e93 100644
--- a/include/scsi/fc_encode.h
+++ b/include/scsi/fc_encode.h
@@ -107,7 +107,6 @@ static inline int fc_ct_fill(struct fc_lport *lport, struct fc_frame *fp,
 		break;
 
 	default:
-		FC_DBG("Invalid op code %x \n", op);
 		return -EINVAL;
 	}
 	*r_ctl = FC_RCTL_DD_UNSOL_CTL;
@@ -298,7 +297,6 @@ static inline int fc_els_fill(struct fc_lport *lport, struct fc_rport *rport,
 		break;
 
 	default:
-		FC_DBG("Invalid op code %x \n", op);
 		return -EINVAL;
 	}
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index ebdd9f4cf07..b92584a8843 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -34,17 +34,72 @@
 
 #include <scsi/fc_frame.h>
 
-#define LIBFC_DEBUG
-
-#ifdef LIBFC_DEBUG
-/* Log messages */
-#define FC_DBG(fmt, args...)						\
-	do {								\
-		printk(KERN_INFO "%s " fmt, __func__, ##args);		\
-	} while (0)
-#else
-#define FC_DBG(fmt, args...)
-#endif
+#define FC_LIBFC_LOGGING 0x01 /* General logging, not categorized */
+#define FC_LPORT_LOGGING 0x02 /* lport layer logging */
+#define FC_DISC_LOGGING  0x04 /* discovery layer logging */
+#define FC_RPORT_LOGGING 0x08 /* rport layer logging */
+#define FC_FCP_LOGGING   0x10 /* I/O path logging */
+#define FC_EM_LOGGING    0x20 /* Exchange Manager logging */
+#define FC_EXCH_LOGGING  0x40 /* Exchange/Sequence logging */
+#define FC_SCSI_LOGGING  0x80 /* SCSI logging (mostly error handling) */
+
+extern unsigned int fc_debug_logging;
+
+#define FC_CHECK_LOGGING(LEVEL, CMD)				\
+do {								\
+	if (unlikely(fc_debug_logging & LEVEL))			\
+		do {						\
+			CMD;					\
+		} while (0);					\
+} while (0);
+
+#define FC_LIBFC_DBG(fmt, args...)					\
+	FC_CHECK_LOGGING(FC_LIBFC_LOGGING,				\
+			 printk(KERN_INFO "libfc: " fmt, ##args);)
+
+#define FC_LPORT_DBG(lport, fmt, args...)				\
+	FC_CHECK_LOGGING(FC_LPORT_LOGGING,				\
+			 printk(KERN_INFO "lport: %6x: " fmt,		\
+				fc_host_port_id(lport->host), ##args);)
+
+#define FC_DISC_DBG(disc, fmt, args...)					\
+	FC_CHECK_LOGGING(FC_DISC_LOGGING,				\
+			 printk(KERN_INFO "disc: %6x: " fmt,		\
+				fc_host_port_id(disc->lport->host),	\
+				##args);)
+
+#define FC_RPORT_DBG(rport, fmt, args...)				\
+do {									\
+	struct fc_rport_libfc_priv *rdata = rport->dd_data;		\
+	struct fc_lport *lport = rdata->local_port;			\
+	FC_CHECK_LOGGING(FC_RPORT_LOGGING,				\
+			 printk(KERN_INFO "rport: %6x: %6x: " fmt,	\
+				fc_host_port_id(lport->host),		\
+				rport->port_id, ##args);)		\
+} while (0);
+
+#define FC_FCP_DBG(pkt, fmt, args...)					\
+	FC_CHECK_LOGGING(FC_FCP_LOGGING,				\
+			 printk(KERN_INFO "fcp: %6x: %6x: " fmt,	\
+				fc_host_port_id(pkt->lp->host),		\
+				pkt->rport->port_id, ##args);)
+
+#define FC_EM_DBG(em, fmt, args...)					\
+	FC_CHECK_LOGGING(FC_EM_LOGGING,					\
+			 printk(KERN_INFO "em: %6x: " fmt,		\
+				fc_host_port_id(em->lp->host),		\
+				##args);)
+
+#define FC_EXCH_DBG(exch, fmt, args...)					\
+	FC_CHECK_LOGGING(FC_EXCH_LOGGING,				\
+			 printk(KERN_INFO "exch: %6x: %4x: " fmt,	\
+				fc_host_port_id(exch->lp->host),	\
+				exch->xid, ##args);)
+
+#define FC_SCSI_DBG(lport, fmt, args...)				\
+	FC_CHECK_LOGGING(FC_SCSI_LOGGING,                               \
+			 printk(KERN_INFO "scsi: %6x: " fmt,		\
+				fc_host_port_id(lport->host), ##args);)
 
 /*
  * libfc error codes
-- 
cgit v1.2.3-70-g09d2


From b391277a56b9eaaff4474339c703e574ed7fab5b Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 18 Jun 2009 09:57:18 +0200
Subject: sd, sr: fix Driver 'sd' needs updating message

If a SCSI ULD driver sets blk_queue_prep_rq(), it should clean it
up itself on remove(), and not from the bus callbacks. This
removes the need to hook into bus->remove(), which should not
be used at the same time as driver->remove().

[jejb: fix sdkp initialisation problem due to mismerge]
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/scsi_lib.c    |  1 +
 drivers/scsi/scsi_priv.h   |  1 -
 drivers/scsi/scsi_sysfs.c  | 17 -----------------
 drivers/scsi/sd.c          |  1 +
 drivers/scsi/sr.c          |  1 +
 include/scsi/scsi_driver.h |  1 +
 6 files changed, 4 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 30f3275e119..f3c40898fc7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1207,6 +1207,7 @@ int scsi_prep_fn(struct request_queue *q, struct request *req)
 		ret = scsi_setup_blk_pc_cmnd(sdev, req);
 	return scsi_prep_return(q, req, ret);
 }
+EXPORT_SYMBOL(scsi_prep_fn);
 
 /*
  * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 00264aab8c8..021e503c8c4 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -87,7 +87,6 @@ extern int scsi_init_queue(void);
 extern void scsi_exit_queue(void);
 struct request_queue;
 struct request;
-extern int scsi_prep_fn(struct request_queue *, struct request *);
 extern struct kmem_cache *scsi_sdb_cache;
 
 /* scsi_proc.c */
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index fa4711d1274..91482f2dcc5 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -420,29 +420,12 @@ static int scsi_bus_resume(struct device * dev)
 	return err;
 }
 
-static int scsi_bus_remove(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	struct scsi_device *sdev = to_scsi_device(dev);
-	int err = 0;
-
-	/* reset the prep_fn back to the default since the
-	 * driver may have altered it and it's being removed */
-	blk_queue_prep_rq(sdev->request_queue, scsi_prep_fn);
-
-	if (drv && drv->remove)
-		err = drv->remove(dev);
-
-	return 0;
-}
-
 struct bus_type scsi_bus_type = {
         .name		= "scsi",
         .match		= scsi_bus_match,
 	.uevent		= scsi_bus_uevent,
 	.suspend	= scsi_bus_suspend,
 	.resume		= scsi_bus_resume,
-	.remove		= scsi_bus_remove,
 };
 EXPORT_SYMBOL_GPL(scsi_bus_type);
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e4ef11af18a..5616cd780ff 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2123,6 +2123,7 @@ static int sd_remove(struct device *dev)
 
 	async_synchronize_full();
 	sdkp = dev_get_drvdata(dev);
+	blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index cd350dfc121..cce0fe4c8a3 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -881,6 +881,7 @@ static int sr_remove(struct device *dev)
 {
 	struct scsi_cd *cd = dev_get_drvdata(dev);
 
+	blk_queue_prep_rq(cd->device->request_queue, scsi_prep_fn);
 	del_gendisk(cd->disk);
 
 	mutex_lock(&sr_ref_mutex);
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 1f5ca7f6211..9fd6702f02e 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -32,5 +32,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req);
 int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req);
 int scsi_prep_state_check(struct scsi_device *sdev, struct request *req);
 int scsi_prep_return(struct request_queue *q, struct request *req, int ret);
+int scsi_prep_fn(struct request_queue *, struct request *);
 
 #endif /* _SCSI_SCSI_DRIVER_H */
-- 
cgit v1.2.3-70-g09d2


From d06063cc221fdefcab86589e79ddfdb7c0e14b63 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 10 Apr 2009 09:01:23 -0700
Subject: Move FAULT_FLAG_xyz into handle_mm_fault() callers

This allows the callers to now pass down the full set of FAULT_FLAG_xyz
flags to handle_mm_fault().  All callers have been (mechanically)
converted to the new calling convention, there's almost certainly room
for architectures to clean up their code and then add FAULT_FLAG_RETRY
when that support is added.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/fault.c                   | 2 +-
 arch/arm/mm/fault.c                     | 2 +-
 arch/avr32/mm/fault.c                   | 2 +-
 arch/cris/mm/fault.c                    | 2 +-
 arch/frv/mm/fault.c                     | 2 +-
 arch/ia64/mm/fault.c                    | 2 +-
 arch/m32r/mm/fault.c                    | 2 +-
 arch/m68k/mm/fault.c                    | 2 +-
 arch/microblaze/mm/fault.c              | 2 +-
 arch/mips/mm/fault.c                    | 2 +-
 arch/mn10300/mm/fault.c                 | 2 +-
 arch/parisc/mm/fault.c                  | 2 +-
 arch/powerpc/mm/fault.c                 | 2 +-
 arch/powerpc/platforms/cell/spu_fault.c | 2 +-
 arch/s390/lib/uaccess_pt.c              | 2 +-
 arch/s390/mm/fault.c                    | 2 +-
 arch/sh/mm/fault_32.c                   | 2 +-
 arch/sh/mm/tlbflush_64.c                | 2 +-
 arch/sparc/mm/fault_32.c                | 4 ++--
 arch/sparc/mm/fault_64.c                | 2 +-
 arch/um/kernel/trap.c                   | 2 +-
 arch/x86/mm/fault.c                     | 2 +-
 arch/xtensa/mm/fault.c                  | 2 +-
 include/linux/mm.h                      | 4 ++--
 mm/memory.c                             | 8 ++++----
 25 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 4829f96585b..00a31deaa96 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -146,7 +146,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	/* If for any reason at all we couldn't handle the fault,
 	   make sure we exit gracefully rather than endlessly redo
 	   the fault.  */
-	fault = handle_mm_fault(mm, vma, address, cause > 0);
+	fault = handle_mm_fault(mm, vma, address, cause > 0 ? FAULT_FLAG_WRITE : 0);
 	up_read(&mm->mmap_sem);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 0455557a289..6fdcbb70982 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -208,7 +208,7 @@ good_area:
 	 * than endlessly redo the fault.
 	 */
 survive:
-	fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, fsr & (1 << 11));
+	fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & (1 << 11)) ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 62d4abbaa65..b61d86d3deb 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -133,7 +133,7 @@ good_area:
 	 * fault.
 	 */
 survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess);
+	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index c4c76db90f9..f925115e325 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -163,7 +163,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	 * the fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address, writeaccess & 1);
+	fault = handle_mm_fault(mm, vma, address, (writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index 05093d41d98..30f5d100a81 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -163,7 +163,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, ear0, write);
+	fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 23088bed111..19261a99e62 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -154,7 +154,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	 * sure we exit gracefully rather than endlessly redo the
 	 * fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0);
+	fault = handle_mm_fault(mm, vma, address, (mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		/*
 		 * We ran out of memory, or some other thing happened
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 4a71df4c1b3..7274b47f4c2 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -196,7 +196,7 @@ survive:
 	 */
 	addr = (address & PAGE_MASK);
 	set_thread_fault_code(error_code);
-	fault = handle_mm_fault(mm, vma, addr, write);
+	fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index f493f03231d..d0e35cf99fc 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -155,7 +155,7 @@ good_area:
 	 */
 
  survive:
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 #ifdef DEBUG
 	printk("handle_mm_fault returns %d\n",fault);
 #endif
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 5e67cd1fab4..956607a63f4 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -232,7 +232,7 @@ good_area:
 	 * the fault.
 	 */
 survive:
-	fault = handle_mm_fault(mm, vma, address, is_write);
+	fault = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 55767ad9f00..6751ce9ede9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -102,7 +102,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index 33cf25025da..a62e1e138bc 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -258,7 +258,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 92c7fa4ecc3..bfb6dd6ab38 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -202,7 +202,7 @@ good_area:
 	 * fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) != 0);
+	fault = handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		/*
 		 * We hit a shared mapping outside of the file, or some
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5beffc8f481..830bef0a113 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -302,7 +302,7 @@ good_area:
 	 * the fault.
 	 */
  survive:
-	ret = handle_mm_fault(mm, vma, address, is_write);
+	ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(ret & VM_FAULT_ERROR)) {
 		if (ret & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c
index 95d8dadf2d8..d06ba87f1a1 100644
--- a/arch/powerpc/platforms/cell/spu_fault.c
+++ b/arch/powerpc/platforms/cell/spu_fault.c
@@ -70,7 +70,7 @@ int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
 	}
 
 	ret = 0;
-	*flt = handle_mm_fault(mm, vma, ea, is_write);
+	*flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(*flt & VM_FAULT_ERROR)) {
 		if (*flt & VM_FAULT_OOM) {
 			ret = -ENOMEM;
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index b0b84c35b0a..cb5d59eab0e 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -66,7 +66,7 @@ static int __handle_fault(struct mm_struct *mm, unsigned long address,
 	}
 
 survive:
-	fault = handle_mm_fault(mm, vma, address, write_access);
+	fault = handle_mm_fault(mm, vma, address, write_access ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 220a152c836..74eb26bf197 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -352,7 +352,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM) {
 			up_read(&mm->mmap_sem);
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 2c50f80fc33..cc8ddbdf3d7 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -133,7 +133,7 @@ good_area:
 	 * the fault.
 	 */
 survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess);
+	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 7876997ba19..fcbb6e135ce 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -187,7 +187,7 @@ good_area:
 	 * the fault.
 	 */
 survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess);
+	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 12e447fc854..a5e30c642ee 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -241,7 +241,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
@@ -484,7 +484,7 @@ good_area:
 		if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
-	switch (handle_mm_fault(mm, vma, address, write)) {
+	switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
 	case VM_FAULT_SIGBUS:
 	case VM_FAULT_OOM:
 		goto do_sigbus;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 4ab8993b086..e5620b27c8b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -398,7 +398,7 @@ good_area:
 			goto bad_area;
 	}
 
-	fault = handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE));
+	fault = handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 7384d8accfe..637c6505dc0 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -65,7 +65,7 @@ good_area:
 	do {
 		int fault;
 
-		fault = handle_mm_fault(mm, vma, address, is_write);
+		fault = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
 		if (unlikely(fault & VM_FAULT_ERROR)) {
 			if (fault & VM_FAULT_OOM) {
 				goto out_of_memory;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c403526d5d1..78a5fff857b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1113,7 +1113,7 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault:
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, error_code, address, fault);
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index bdd860d93f7..bc0733359a8 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -106,7 +106,7 @@ good_area:
 	 * the fault.
 	 */
 survive:
-	fault = handle_mm_fault(mm, vma, address, is_write);
+	fault = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf260d848eb..d006e93d5c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -810,11 +810,11 @@ extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, int write_access);
+			unsigned long address, unsigned int flags);
 #else
 static inline int handle_mm_fault(struct mm_struct *mm,
 			struct vm_area_struct *vma, unsigned long address,
-			int write_access)
+			unsigned int flags)
 {
 	/* should never happen if there's no MMU */
 	BUG();
diff --git a/mm/memory.c b/mm/memory.c
index e6a9700359d..98bcb90d595 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1310,8 +1310,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			cond_resched();
 			while (!(page = follow_page(vma, start, foll_flags))) {
 				int ret;
-				ret = handle_mm_fault(mm, vma, start,
-						foll_flags & FOLL_WRITE);
+
+				/* FOLL_WRITE matches FAULT_FLAG_WRITE! */
+				ret = handle_mm_fault(mm, vma, start, foll_flags & FOLL_WRITE);
 				if (ret & VM_FAULT_ERROR) {
 					if (ret & VM_FAULT_OOM)
 						return i ? i : -ENOMEM;
@@ -2958,13 +2959,12 @@ unlock:
  * By the time we get here, we already hold the mm semaphore
  */
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-		unsigned long address, int write_access)
+		unsigned long address, unsigned int flags)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-	unsigned int flags = write_access ? FAULT_FLAG_WRITE : 0;
 
 	__set_current_state(TASK_RUNNING);
 
-- 
cgit v1.2.3-70-g09d2


From f9ab94cee313746573b2d693bc2afb807ebb0998 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 22 Jun 2009 10:12:20 +0100
Subject: dm: introduce num_flush_requests

Introduce num_flush_requests for a target to set to say how many flush
instructions (empty barriers) it wants to receive.  These are sent by
__clone_and_map_empty_barrier with map_info->flush_request going from 0
to (num_flush_requests - 1).

Old targets without flush support won't receive any flush requests.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c               | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/device-mapper.h | 11 +++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7d9ca709433..badb7519ccc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -750,6 +750,40 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
 	return clone;
 }
 
+static void __flush_target(struct clone_info *ci, struct dm_target *ti,
+			  unsigned flush_nr)
+{
+	struct dm_target_io *tio = alloc_tio(ci->md);
+	struct bio *clone;
+
+	tio->io = ci->io;
+	tio->ti = ti;
+
+	memset(&tio->info, 0, sizeof(tio->info));
+	tio->info.flush_request = flush_nr;
+
+	clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
+	__bio_clone(clone, ci->bio);
+	clone->bi_destructor = dm_bio_destructor;
+
+	__map_bio(ti, clone, tio);
+}
+
+static int __clone_and_map_empty_barrier(struct clone_info *ci)
+{
+	unsigned target_nr = 0, flush_nr;
+	struct dm_target *ti;
+
+	while ((ti = dm_table_get_target(ci->map, target_nr++)))
+		for (flush_nr = 0; flush_nr < ti->num_flush_requests;
+		     flush_nr++)
+			__flush_target(ci, ti, flush_nr);
+
+	ci->sector_count = 0;
+
+	return 0;
+}
+
 static int __clone_and_map(struct clone_info *ci)
 {
 	struct bio *clone, *bio = ci->bio;
@@ -757,6 +791,9 @@ static int __clone_and_map(struct clone_info *ci)
 	sector_t len = 0, max;
 	struct dm_target_io *tio;
 
+	if (unlikely(bio_empty_barrier(bio)))
+		return __clone_and_map_empty_barrier(ci);
+
 	ti = dm_table_find_target(ci->map, ci->sector);
 	if (!dm_target_is_valid(ti))
 		return -EIO;
@@ -877,6 +914,8 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 	ci.io->md = md;
 	ci.sector = bio->bi_sector;
 	ci.sector_count = bio_sectors(bio);
+	if (unlikely(bio_empty_barrier(bio)))
+		ci.sector_count = 1;
 	ci.idx = bio->bi_idx;
 
 	start_io_acct(ci.io);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 49c2362977f..fc36a4d0772 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -21,6 +21,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 union map_info {
 	void *ptr;
 	unsigned long long ll;
+	unsigned flush_request;
 };
 
 /*
@@ -167,6 +168,16 @@ struct dm_target {
 	/* Always a power of 2 */
 	sector_t split_io;
 
+	/*
+	 * A number of zero-length barrier requests that will be submitted
+	 * to the target for the purpose of flushing cache.
+	 *
+	 * The request number will be placed in union map_info->flush_request.
+	 * It is a responsibility of the target driver to remap these requests
+	 * to the real underlying devices.
+	 */
+	unsigned num_flush_requests;
+
 	/*
 	 * These are automatically filled in by
 	 * dm_table_get_device.
-- 
cgit v1.2.3-70-g09d2


From 60935eb21d3c5bac79618000f38f92c249d153c4 Mon Sep 17 00:00:00 2001
From: Milan Broz <mbroz@redhat.com>
Date: Mon, 22 Jun 2009 10:12:30 +0100
Subject: dm ioctl: support cookies for udev

Add support for passing a 32 bit "cookie" into the kernel with the
DM_SUSPEND, DM_DEV_RENAME and DM_DEV_REMOVE ioctls.  The (unsigned)
value of this cookie is returned to userspace alongside the uevents
issued by these ioctls in the variable DM_COOKIE.

This means the userspace process issuing these ioctls can be notified
by udev after udev has completed any actions triggered.

To minimise the interface extension, we pass the cookie into the
kernel in the event_nr field which is otherwise unused when calling
these ioctls.  Incrementing the version number allows userspace to
determine in advance whether or not the kernel supports the cookie.
If the kernel does support this but userspace does not, there should
be no impact as the new variable will just get ignored.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c    | 14 ++++++++++----
 drivers/md/dm.c          | 25 +++++++++++++++++++------
 drivers/md/dm.h          |  3 ++-
 include/linux/dm-ioctl.h | 14 ++++++++++++--
 4 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 1128d3fba79..1c871736f48 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -276,7 +276,7 @@ retry:
 	up_write(&_hash_lock);
 }
 
-static int dm_hash_rename(const char *old, const char *new)
+static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
 {
 	char *new_name, *old_name;
 	struct hash_cell *hc;
@@ -333,7 +333,7 @@ static int dm_hash_rename(const char *old, const char *new)
 		dm_table_put(table);
 	}
 
-	dm_kobject_uevent(hc->md);
+	dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
 
 	dm_put(hc->md);
 	up_write(&_hash_lock);
@@ -680,6 +680,9 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
 
 	__hash_remove(hc);
 	up_write(&_hash_lock);
+
+	dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
+
 	dm_put(md);
 	param->data_size = 0;
 	return 0;
@@ -715,7 +718,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 		return r;
 
 	param->data_size = 0;
-	return dm_hash_rename(param->name, new_name);
+	return dm_hash_rename(param->event_nr, param->name, new_name);
 }
 
 static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -842,8 +845,11 @@ static int do_resume(struct dm_ioctl *param)
 	if (dm_suspended(md))
 		r = dm_resume(md);
 
-	if (!r)
+
+	if (!r) {
+		dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
 		r = __dev_status(md, param);
+	}
 
 	dm_put(md);
 	return r;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 36142e947ff..a9210bb594e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -24,6 +24,13 @@
 
 #define DM_MSG_PREFIX "core"
 
+/*
+ * Cookies are numeric values sent with CHANGE and REMOVE
+ * uevents while resuming, removing or renaming the device.
+ */
+#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
+#define DM_COOKIE_LENGTH 24
+
 static const char *_name = DM_NAME;
 
 static unsigned int major = 0;
@@ -1731,11 +1738,7 @@ int dm_resume(struct mapped_device *md)
 	clear_bit(DMF_SUSPENDED, &md->flags);
 
 	dm_table_unplug_all(map);
-
-	dm_kobject_uevent(md);
-
 	r = 0;
-
 out:
 	dm_table_put(map);
 	mutex_unlock(&md->suspend_lock);
@@ -1746,9 +1749,19 @@ out:
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
-void dm_kobject_uevent(struct mapped_device *md)
+void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+		       unsigned cookie)
 {
-	kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
+	char udev_cookie[DM_COOKIE_LENGTH];
+	char *envp[] = { udev_cookie, NULL };
+
+	if (!cookie)
+		kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+	else {
+		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
+			 DM_COOKIE_ENV_VAR_NAME, cookie);
+		kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
+	}
 }
 
 uint32_t dm_next_uevent_seq(struct mapped_device *md)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a31506d93e9..b5935c610c4 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -92,7 +92,8 @@ void dm_stripe_exit(void);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
 
-void dm_kobject_uevent(struct mapped_device *md);
+void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+		       unsigned cookie);
 
 int dm_kcopyd_init(void);
 void dm_kcopyd_exit(void);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 48e44ee2b46..2ab84c83c31 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -123,6 +123,16 @@ struct dm_ioctl {
 	__u32 target_count;	/* in/out */
 	__s32 open_count;	/* out */
 	__u32 flags;		/* in/out */
+
+	/*
+	 * event_nr holds either the event number (input and output) or the
+	 * udev cookie value (input only).
+	 * The DM_DEV_WAIT ioctl takes an event number as input.
+	 * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+	 * use the field as a cookie to return in the DM_COOKIE
+	 * variable with the uevents they issue.
+	 * For output, the ioctls return the event number, not the cookie.
+	 */
 	__u32 event_nr;      	/* in/out */
 	__u32 padding;
 
@@ -256,9 +266,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	14
+#define DM_VERSION_MINOR	15
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2008-04-23)"
+#define DM_VERSION_EXTRA	"-ioctl (2009-04-01)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3-70-g09d2


From 5ab97588fb266187b88d1ad893251c94388f18ba Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 22 Jun 2009 10:12:32 +0100
Subject: dm table: replace struct io_restrictions with struct queue_limits

Use blk_stack_limits() to stack block limits (including topology) rather
than duplicate the equivalent within Device Mapper.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 138 +++++++++++++-----------------------------
 include/linux/device-mapper.h |  16 +----
 2 files changed, 45 insertions(+), 109 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e3bcfb8b15a..41ec2bf9fbe 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -66,7 +66,7 @@ struct dm_table {
 	 * These are optimistic limits taken from all the
 	 * targets, some targets will need smaller limits.
 	 */
-	struct io_restrictions limits;
+	struct queue_limits limits;
 
 	/* events get handed up using this callback */
 	void (*event_fn)(void *);
@@ -88,43 +88,6 @@ static unsigned int int_log(unsigned int n, unsigned int base)
 	return result;
 }
 
-/*
- * Returns the minimum that is _not_ zero, unless both are zero.
- */
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
-
-/*
- * Combine two io_restrictions, always taking the lower value.
- */
-static void combine_restrictions_low(struct io_restrictions *lhs,
-				     struct io_restrictions *rhs)
-{
-	lhs->max_sectors =
-		min_not_zero(lhs->max_sectors, rhs->max_sectors);
-
-	lhs->max_phys_segments =
-		min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
-
-	lhs->max_hw_segments =
-		min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
-
-	lhs->logical_block_size = max(lhs->logical_block_size,
-				      rhs->logical_block_size);
-
-	lhs->max_segment_size =
-		min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
-
-	lhs->max_hw_sectors =
-		min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
-
-	lhs->seg_boundary_mask =
-		min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
-
-	lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
-
-	lhs->no_cluster |= rhs->no_cluster;
-}
-
 /*
  * Calculate the index of the child node of the n'th node k'th key.
  */
@@ -511,10 +474,14 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
 	return 0;
 }
 
+/*
+ * Returns the minimum that is _not_ zero, unless both are zero.
+ */
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+
 void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	struct io_restrictions *rs = &ti->limits;
 	char b[BDEVNAME_SIZE];
 
 	if (unlikely(!q)) {
@@ -523,15 +490,9 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 		return;
 	}
 
-	/*
-	 * Combine the device limits low.
-	 *
-	 * FIXME: if we move an io_restriction struct
-	 *        into q this would just be a call to
-	 *        combine_restrictions_low()
-	 */
-	rs->max_sectors =
-		min_not_zero(rs->max_sectors, queue_max_sectors(q));
+	if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0)
+		DMWARN("%s: target device %s is misaligned",
+		       dm_device_name(ti->table->md), bdevname(bdev, b));
 
 	/*
 	 * Check if merge fn is supported.
@@ -540,33 +501,9 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 	 */
 
 	if (q->merge_bvec_fn && !ti->type->merge)
-		rs->max_sectors =
-			min_not_zero(rs->max_sectors,
+		ti->limits.max_sectors =
+			min_not_zero(ti->limits.max_sectors,
 				     (unsigned int) (PAGE_SIZE >> 9));
-
-	rs->max_phys_segments =
-		min_not_zero(rs->max_phys_segments,
-			     queue_max_phys_segments(q));
-
-	rs->max_hw_segments =
-		min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q));
-
-	rs->logical_block_size = max(rs->logical_block_size,
-				     queue_logical_block_size(q));
-
-	rs->max_segment_size =
-		min_not_zero(rs->max_segment_size, queue_max_segment_size(q));
-
-	rs->max_hw_sectors =
-		min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q));
-
-	rs->seg_boundary_mask =
-		min_not_zero(rs->seg_boundary_mask,
-			     queue_segment_boundary(q));
-
-	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q));
-
-	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
@@ -704,24 +641,32 @@ int dm_split_args(int *argc, char ***argvp, char *input)
 	return 0;
 }
 
-static void check_for_valid_limits(struct io_restrictions *rs)
+static void init_valid_queue_limits(struct queue_limits *limits)
 {
-	if (!rs->max_sectors)
-		rs->max_sectors = SAFE_MAX_SECTORS;
-	if (!rs->max_hw_sectors)
-		rs->max_hw_sectors = SAFE_MAX_SECTORS;
-	if (!rs->max_phys_segments)
-		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
-	if (!rs->max_hw_segments)
-		rs->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!rs->logical_block_size)
-		rs->logical_block_size = 1 << SECTOR_SHIFT;
-	if (!rs->max_segment_size)
-		rs->max_segment_size = MAX_SEGMENT_SIZE;
-	if (!rs->seg_boundary_mask)
-		rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-	if (!rs->bounce_pfn)
-		rs->bounce_pfn = -1;
+	if (!limits->max_sectors)
+		limits->max_sectors = SAFE_MAX_SECTORS;
+	if (!limits->max_hw_sectors)
+		limits->max_hw_sectors = SAFE_MAX_SECTORS;
+	if (!limits->max_phys_segments)
+		limits->max_phys_segments = MAX_PHYS_SEGMENTS;
+	if (!limits->max_hw_segments)
+		limits->max_hw_segments = MAX_HW_SEGMENTS;
+	if (!limits->logical_block_size)
+		limits->logical_block_size = 1 << SECTOR_SHIFT;
+	if (!limits->physical_block_size)
+		limits->physical_block_size = 1 << SECTOR_SHIFT;
+	if (!limits->io_min)
+		limits->io_min = 1 << SECTOR_SHIFT;
+	if (!limits->max_segment_size)
+		limits->max_segment_size = MAX_SEGMENT_SIZE;
+	if (!limits->seg_boundary_mask)
+		limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+	if (!limits->bounce_pfn)
+		limits->bounce_pfn = -1;
+	/*
+	 * The other fields (alignment_offset, io_opt, misaligned)
+	 * hold 0 from the kzalloc().
+	 */
 }
 
 /*
@@ -841,9 +786,12 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	/* FIXME: the plan is to combine high here and then have
-	 * the merge fn apply the target level restrictions. */
-	combine_restrictions_low(&t->limits, &tgt->limits);
+	if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0)
+		DMWARN("%s: target device (start sect %llu len %llu) "
+		       "is misaligned",
+		       dm_device_name(t->md),
+		       (unsigned long long) tgt->begin,
+		       (unsigned long long) tgt->len);
 	return 0;
 
  bad:
@@ -886,7 +834,7 @@ int dm_table_complete(struct dm_table *t)
 	int r = 0;
 	unsigned int leaf_nodes;
 
-	check_for_valid_limits(&t->limits);
+	init_valid_queue_limits(&t->limits);
 
 	r = validate_hardware_logical_block_alignment(t);
 	if (r)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index fc36a4d0772..236880c1dc3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -144,18 +144,6 @@ struct target_type {
 	struct list_head list;
 };
 
-struct io_restrictions {
-	unsigned long bounce_pfn;
-	unsigned long seg_boundary_mask;
-	unsigned max_hw_sectors;
-	unsigned max_sectors;
-	unsigned max_segment_size;
-	unsigned short logical_block_size;
-	unsigned short max_hw_segments;
-	unsigned short max_phys_segments;
-	unsigned char no_cluster; /* inverted so that 0 is default */
-};
-
 struct dm_target {
 	struct dm_table *table;
 	struct target_type *type;
@@ -164,7 +152,7 @@ struct dm_target {
 	sector_t begin;
 	sector_t len;
 
-	/* FIXME: turn this into a mask, and merge with io_restrictions */
+	/* FIXME: turn this into a mask, and merge with queue_limits */
 	/* Always a power of 2 */
 	sector_t split_io;
 
@@ -182,7 +170,7 @@ struct dm_target {
 	 * These are automatically filled in by
 	 * dm_table_get_device.
 	 */
-	struct io_restrictions limits;
+	struct queue_limits limits;
 
 	/* target specific data */
 	void *private;
-- 
cgit v1.2.3-70-g09d2


From af4874e03ed82f050d5872d8c39ce64bf16b5c38 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 22 Jun 2009 10:12:33 +0100
Subject: dm target:s introduce iterate devices fn

Add .iterate_devices to 'struct target_type' to allow a function to be
called for all devices in a DM target.  Implemented it for all targets
except those in dm-snap.c (origin and snapshot).

(The raid1 version number jumps to 1.12 because we originally reserved
1.1 to 1.11 for 'block_on_error' but ended up using 'handle_errors'
instead.)

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Cc: martin.petersen@oracle.com
---
 drivers/md/dm-crypt.c         | 11 ++++++++++-
 drivers/md/dm-delay.c         | 20 +++++++++++++++++++-
 drivers/md/dm-linear.c        | 11 ++++++++++-
 drivers/md/dm-mpath.c         | 23 ++++++++++++++++++++++-
 drivers/md/dm-raid1.c         | 17 ++++++++++++++++-
 drivers/md/dm-stripe.c        | 18 +++++++++++++++++-
 include/linux/device-mapper.h | 11 +++++++++++
 7 files changed, 105 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 04db6c4004a..9933eb861c7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1313,9 +1313,17 @@ static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
+static int crypt_iterate_devices(struct dm_target *ti,
+				 iterate_devices_callout_fn fn, void *data)
+{
+	struct crypt_config *cc = ti->private;
+
+	return fn(ti, cc->dev, cc->start, data);
+}
+
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version= {1, 6, 0},
+	.version = {1, 7, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
@@ -1326,6 +1334,7 @@ static struct target_type crypt_target = {
 	.resume = crypt_resume,
 	.message = crypt_message,
 	.merge  = crypt_merge,
+	.iterate_devices = crypt_iterate_devices,
 };
 
 static int __init dm_crypt_init(void)
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 8ad8a9044bb..4e5b843cd4d 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -318,9 +318,26 @@ static int delay_status(struct dm_target *ti, status_type_t type,
 	return 0;
 }
 
+static int delay_iterate_devices(struct dm_target *ti,
+				 iterate_devices_callout_fn fn, void *data)
+{
+	struct delay_c *dc = ti->private;
+	int ret = 0;
+
+	ret = fn(ti, dc->dev_read, dc->start_read, data);
+	if (ret)
+		goto out;
+
+	if (dc->dev_write)
+		ret = fn(ti, dc->dev_write, dc->start_write, data);
+
+out:
+	return ret;
+}
+
 static struct target_type delay_target = {
 	.name	     = "delay",
-	.version     = {1, 0, 2},
+	.version     = {1, 1, 0},
 	.module      = THIS_MODULE,
 	.ctr	     = delay_ctr,
 	.dtr	     = delay_dtr,
@@ -328,6 +345,7 @@ static struct target_type delay_target = {
 	.presuspend  = delay_presuspend,
 	.resume	     = delay_resume,
 	.status	     = delay_status,
+	.iterate_devices = delay_iterate_devices,
 };
 
 static int __init dm_delay_init(void)
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ecbb17421da..9184b6deb86 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -134,9 +134,17 @@ static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
+static int linear_iterate_devices(struct dm_target *ti,
+				  iterate_devices_callout_fn fn, void *data)
+{
+	struct linear_c *lc = ti->private;
+
+	return fn(ti, lc->dev, lc->start, data);
+}
+
 static struct target_type linear_target = {
 	.name   = "linear",
-	.version= {1, 0, 3},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr    = linear_ctr,
 	.dtr    = linear_dtr,
@@ -144,6 +152,7 @@ static struct target_type linear_target = {
 	.status = linear_status,
 	.ioctl  = linear_ioctl,
 	.merge  = linear_merge,
+	.iterate_devices = linear_iterate_devices,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 890c0e8ed13..f8aeaaa54af 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1450,12 +1450,32 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
 	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 }
 
+static int multipath_iterate_devices(struct dm_target *ti,
+				     iterate_devices_callout_fn fn, void *data)
+{
+	struct multipath *m = ti->private;
+	struct priority_group *pg;
+	struct pgpath *p;
+	int ret = 0;
+
+	list_for_each_entry(pg, &m->priority_groups, list) {
+		list_for_each_entry(p, &pg->pgpaths, list) {
+			ret = fn(ti, p->path.dev, ti->begin, data);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
 /*-----------------------------------------------------------------
  * Module setup
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 0, 5},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
@@ -1466,6 +1486,7 @@ static struct target_type multipath_target = {
 	.status = multipath_status,
 	.message = multipath_message,
 	.ioctl  = multipath_ioctl,
+	.iterate_devices = multipath_iterate_devices,
 };
 
 static int __init dm_multipath_init(void)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 076fbb4e967..ce8868c768c 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1283,9 +1283,23 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
 	return 0;
 }
 
+static int mirror_iterate_devices(struct dm_target *ti,
+				  iterate_devices_callout_fn fn, void *data)
+{
+	struct mirror_set *ms = ti->private;
+	int ret = 0;
+	unsigned i;
+
+	for (i = 0; !ret && i < ms->nr_mirrors; i++)
+		ret = fn(ti, ms->mirror[i].dev,
+			 ms->mirror[i].offset, data);
+
+	return ret;
+}
+
 static struct target_type mirror_target = {
 	.name	 = "mirror",
-	.version = {1, 0, 20},
+	.version = {1, 12, 0},
 	.module	 = THIS_MODULE,
 	.ctr	 = mirror_ctr,
 	.dtr	 = mirror_dtr,
@@ -1295,6 +1309,7 @@ static struct target_type mirror_target = {
 	.postsuspend = mirror_postsuspend,
 	.resume	 = mirror_resume,
 	.status	 = mirror_status,
+	.iterate_devices = mirror_iterate_devices,
 };
 
 static int __init dm_mirror_init(void)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c64fe827a5f..b240e85ae39 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -313,15 +313,31 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
 	return error;
 }
 
+static int stripe_iterate_devices(struct dm_target *ti,
+				  iterate_devices_callout_fn fn, void *data)
+{
+	struct stripe_c *sc = ti->private;
+	int ret = 0;
+	unsigned i = 0;
+
+	do
+		ret = fn(ti, sc->stripe[i].dev,
+			 sc->stripe[i].physical_start, data);
+	while (!ret && ++i < sc->stripes);
+
+	return ret;
+}
+
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 1, 0},
+	.version = {1, 2, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
 	.map    = stripe_map,
 	.end_io = stripe_end_io,
 	.status = stripe_status,
+	.iterate_devices = stripe_iterate_devices,
 };
 
 int __init dm_stripe_init(void)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 236880c1dc3..deac3b4e5e1 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -11,6 +11,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 
+struct dm_dev;
 struct dm_target;
 struct dm_table;
 struct mapped_device;
@@ -81,6 +82,15 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
 			    struct bio_vec *biovec, int max_size);
 
+typedef int (*iterate_devices_callout_fn) (struct dm_target *ti,
+					   struct dm_dev *dev,
+					   sector_t physical_start,
+					   void *data);
+
+typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
+				      iterate_devices_callout_fn fn,
+				      void *data);
+
 /*
  * Returns:
  *    0: The target can handle the next I/O immediately.
@@ -139,6 +149,7 @@ struct target_type {
 	dm_ioctl_fn ioctl;
 	dm_merge_fn merge;
 	dm_busy_fn busy;
+	dm_iterate_devices_fn iterate_devices;
 
 	/* For internal device-mapper use. */
 	struct list_head list;
-- 
cgit v1.2.3-70-g09d2


From 754c5fc7ebb417b23601a6222a6005cc2e7f2913 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 22 Jun 2009 10:12:34 +0100
Subject: dm: calculate queue limits during resume not load

Currently, device-mapper maintains a separate instance of 'struct
queue_limits' for each table of each device.  When the configuration of
a device is to be changed, first its table is loaded and this structure
is populated, then the device is 'resumed' and the calculated
queue_limits are applied.

This places restrictions on how userspace may process related devices,
where it is often advantageous to 'load' tables for several devices
at once before 'resuming' them together.  As the new queue_limits
only take effect after the 'resume', if they are changing and one
device uses another, the latter must be 'resumed' before the former
may be 'loaded'.

This patch moves the calculation of these queue_limits out of
the 'load' operation into 'resume'.  Since we are no longer
pre-calculating this struct, we no longer need to maintain copies
within our dm structs.

dm_set_device_limits() now passes the 'start' of the device's
data area (aka pe_start) as the 'offset' to blk_stack_limits().

init_valid_queue_limits() is replaced by blk_set_default_limits().

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: martin.petersen@oracle.com
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 185 +++++++++++++++++++++++-------------------
 drivers/md/dm.c               |  12 ++-
 drivers/md/dm.h               |   5 +-
 include/linux/device-mapper.h |  10 +--
 4 files changed, 117 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 267817edc84..09a57113955 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -62,12 +62,6 @@ struct dm_table {
 	/* a list of devices used by this table */
 	struct list_head devices;
 
-	/*
-	 * These are optimistic limits taken from all the
-	 * targets, some targets will need smaller limits.
-	 */
-	struct queue_limits limits;
-
 	/* events get handed up using this callback */
 	void (*event_fn)(void *);
 	void *event_context;
@@ -346,18 +340,21 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
 /*
  * If possible, this checks an area of a destination device is valid.
  */
-static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev,
-			     sector_t start, sector_t len)
+static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
+				sector_t start, void *data)
 {
-	sector_t dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
+	struct queue_limits *limits = data;
+	struct block_device *bdev = dev->bdev;
+	sector_t dev_size =
+		i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
 	unsigned short logical_block_size_sectors =
-		ti->limits.logical_block_size >> SECTOR_SHIFT;
+		limits->logical_block_size >> SECTOR_SHIFT;
 	char b[BDEVNAME_SIZE];
 
 	if (!dev_size)
 		return 1;
 
-	if ((start >= dev_size) || (start + len > dev_size)) {
+	if ((start >= dev_size) || (start + ti->len > dev_size)) {
 		DMWARN("%s: %s too small for target",
 		       dm_device_name(ti->table->md), bdevname(bdev, b));
 		return 0;
@@ -371,16 +368,16 @@ static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev,
 		       "logical block size %hu of %s",
 		       dm_device_name(ti->table->md),
 		       (unsigned long long)start,
-		       ti->limits.logical_block_size, bdevname(bdev, b));
+		       limits->logical_block_size, bdevname(bdev, b));
 		return 0;
 	}
 
-	if (len & (logical_block_size_sectors - 1)) {
+	if (ti->len & (logical_block_size_sectors - 1)) {
 		DMWARN("%s: len=%llu not aligned to h/w "
 		       "logical block size %hu of %s",
 		       dm_device_name(ti->table->md),
-		       (unsigned long long)len,
-		       ti->limits.logical_block_size, bdevname(bdev, b));
+		       (unsigned long long)ti->len,
+		       limits->logical_block_size, bdevname(bdev, b));
 		return 0;
 	}
 
@@ -479,18 +476,21 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
  */
 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 
-void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
+int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+			 sector_t start, void *data)
 {
+	struct queue_limits *limits = data;
+	struct block_device *bdev = dev->bdev;
 	struct request_queue *q = bdev_get_queue(bdev);
 	char b[BDEVNAME_SIZE];
 
 	if (unlikely(!q)) {
 		DMWARN("%s: Cannot set limits for nonexistent device %s",
 		       dm_device_name(ti->table->md), bdevname(bdev, b));
-		return;
+		return 0;
 	}
 
-	if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0)
+	if (blk_stack_limits(limits, &q->limits, start) < 0)
 		DMWARN("%s: target device %s is misaligned",
 		       dm_device_name(ti->table->md), bdevname(bdev, b));
 
@@ -501,32 +501,21 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 	 */
 
 	if (q->merge_bvec_fn && !ti->type->merge)
-		ti->limits.max_sectors =
-			min_not_zero(ti->limits.max_sectors,
+		limits->max_sectors =
+			min_not_zero(limits->max_sectors,
 				     (unsigned int) (PAGE_SIZE >> 9));
+	return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
 int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
 		  sector_t len, fmode_t mode, struct dm_dev **result)
 {
-	int r = __table_get_device(ti->table, ti, path,
-				   start, len, mode, result);
-
-	if (r)
-		return r;
-
-	dm_set_device_limits(ti, (*result)->bdev);
-
-	if (!device_area_is_valid(ti, (*result)->bdev, start, len)) {
-		dm_put_device(ti, *result);
-		*result = NULL;
-		return -EINVAL;
-	}
-
-	return r;
+	return __table_get_device(ti->table, ti, path,
+				  start, len, mode, result);
 }
 
+
 /*
  * Decrement a devices use count and remove it if necessary.
  */
@@ -641,34 +630,6 @@ int dm_split_args(int *argc, char ***argvp, char *input)
 	return 0;
 }
 
-static void init_valid_queue_limits(struct queue_limits *limits)
-{
-	if (!limits->max_sectors)
-		limits->max_sectors = SAFE_MAX_SECTORS;
-	if (!limits->max_hw_sectors)
-		limits->max_hw_sectors = SAFE_MAX_SECTORS;
-	if (!limits->max_phys_segments)
-		limits->max_phys_segments = MAX_PHYS_SEGMENTS;
-	if (!limits->max_hw_segments)
-		limits->max_hw_segments = MAX_HW_SEGMENTS;
-	if (!limits->logical_block_size)
-		limits->logical_block_size = 1 << SECTOR_SHIFT;
-	if (!limits->physical_block_size)
-		limits->physical_block_size = 1 << SECTOR_SHIFT;
-	if (!limits->io_min)
-		limits->io_min = 1 << SECTOR_SHIFT;
-	if (!limits->max_segment_size)
-		limits->max_segment_size = MAX_SEGMENT_SIZE;
-	if (!limits->seg_boundary_mask)
-		limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-	if (!limits->bounce_pfn)
-		limits->bounce_pfn = -1;
-	/*
-	 * The other fields (alignment_offset, io_opt, misaligned)
-	 * hold 0 from the kzalloc().
-	 */
-}
-
 /*
  * Impose necessary and sufficient conditions on a devices's table such
  * that any incoming bio which respects its logical_block_size can be
@@ -676,14 +637,15 @@ static void init_valid_queue_limits(struct queue_limits *limits)
  * two or more targets, the size of each piece it gets split into must
  * be compatible with the logical_block_size of the target processing it.
  */
-static int validate_hardware_logical_block_alignment(struct dm_table *table)
+static int validate_hardware_logical_block_alignment(struct dm_table *table,
+						 struct queue_limits *limits)
 {
 	/*
 	 * This function uses arithmetic modulo the logical_block_size
 	 * (in units of 512-byte sectors).
 	 */
 	unsigned short device_logical_block_size_sects =
-		table->limits.logical_block_size >> SECTOR_SHIFT;
+		limits->logical_block_size >> SECTOR_SHIFT;
 
 	/*
 	 * Offset of the start of the next table entry, mod logical_block_size.
@@ -697,6 +659,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table)
 	unsigned short remaining = 0;
 
 	struct dm_target *uninitialized_var(ti);
+	struct queue_limits ti_limits;
 	unsigned i = 0;
 
 	/*
@@ -705,12 +668,19 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table)
 	while (i < dm_table_get_num_targets(table)) {
 		ti = dm_table_get_target(table, i++);
 
+		blk_set_default_limits(&ti_limits);
+
+		/* combine all target devices' limits */
+		if (ti->type->iterate_devices)
+			ti->type->iterate_devices(ti, dm_set_device_limits,
+						  &ti_limits);
+
 		/*
 		 * If the remaining sectors fall entirely within this
 		 * table entry are they compatible with its logical_block_size?
 		 */
 		if (remaining < ti->len &&
-		    remaining & ((ti->limits.logical_block_size >>
+		    remaining & ((ti_limits.logical_block_size >>
 				  SECTOR_SHIFT) - 1))
 			break;	/* Error */
 
@@ -723,11 +693,11 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table)
 
 	if (remaining) {
 		DMWARN("%s: table line %u (start sect %llu len %llu) "
-		       "not aligned to hardware logical block size %hu",
+		       "not aligned to h/w logical block size %hu",
 		       dm_device_name(table->md), i,
 		       (unsigned long long) ti->begin,
 		       (unsigned long long) ti->len,
-		       table->limits.logical_block_size);
+		       limits->logical_block_size);
 		return -EINVAL;
 	}
 
@@ -786,12 +756,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0)
-		DMWARN("%s: target device (start sect %llu len %llu) "
-		       "is misaligned",
-		       dm_device_name(t->md),
-		       (unsigned long long) tgt->begin,
-		       (unsigned long long) tgt->len);
 	return 0;
 
  bad:
@@ -834,12 +798,6 @@ int dm_table_complete(struct dm_table *t)
 	int r = 0;
 	unsigned int leaf_nodes;
 
-	init_valid_queue_limits(&t->limits);
-
-	r = validate_hardware_logical_block_alignment(t);
-	if (r)
-		return r;
-
 	/* how many indexes will the btree have ? */
 	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
 	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -914,6 +872,57 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
 	return &t->targets[(KEYS_PER_NODE * n) + k];
 }
 
+/*
+ * Establish the new table's queue_limits and validate them.
+ */
+int dm_calculate_queue_limits(struct dm_table *table,
+			      struct queue_limits *limits)
+{
+	struct dm_target *uninitialized_var(ti);
+	struct queue_limits ti_limits;
+	unsigned i = 0;
+
+	blk_set_default_limits(limits);
+
+	while (i < dm_table_get_num_targets(table)) {
+		blk_set_default_limits(&ti_limits);
+
+		ti = dm_table_get_target(table, i++);
+
+		if (!ti->type->iterate_devices)
+			goto combine_limits;
+
+		/*
+		 * Combine queue limits of all the devices this target uses.
+		 */
+		ti->type->iterate_devices(ti, dm_set_device_limits,
+					  &ti_limits);
+
+		/*
+		 * Check each device area is consistent with the target's
+		 * overall queue limits.
+		 */
+		if (!ti->type->iterate_devices(ti, device_area_is_valid,
+					       &ti_limits))
+			return -EINVAL;
+
+combine_limits:
+		/*
+		 * Merge this target's queue limits into the overall limits
+		 * for the table.
+		 */
+		if (blk_stack_limits(limits, &ti_limits, 0) < 0)
+			DMWARN("%s: target device "
+			       "(start sect %llu len %llu) "
+			       "is misaligned",
+			       dm_device_name(table->md),
+			       (unsigned long long) ti->begin,
+			       (unsigned long long) ti->len);
+	}
+
+	return validate_hardware_logical_block_alignment(table, limits);
+}
+
 /*
  * Set the integrity profile for this device if all devices used have
  * matching profiles.
@@ -953,14 +962,24 @@ no_integrity:
 	return;
 }
 
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
+void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+			       struct queue_limits *limits)
 {
+	/*
+	 * Each target device in the table has a data area that should normally
+	 * be aligned such that the DM device's alignment_offset is 0.
+	 * FIXME: Propagate alignment_offsets up the stack and warn of
+	 *	  sub-optimal or inconsistent settings.
+	 */
+	limits->alignment_offset = 0;
+	limits->misaligned = 0;
+
 	/*
 	 * Copy table's limits to the DM device's request_queue
 	 */
-	q->limits = t->limits;
+	q->limits = *limits;
 
-	if (t->limits.no_cluster)
+	if (limits->no_cluster)
 		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 	else
 		queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a9210bb594e..f609793a92d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1313,7 +1313,8 @@ static void __set_size(struct mapped_device *md, sector_t size)
 	mutex_unlock(&md->bdev->bd_inode->i_mutex);
 }
 
-static int __bind(struct mapped_device *md, struct dm_table *t)
+static int __bind(struct mapped_device *md, struct dm_table *t,
+		  struct queue_limits *limits)
 {
 	struct request_queue *q = md->queue;
 	sector_t size;
@@ -1337,7 +1338,7 @@ static int __bind(struct mapped_device *md, struct dm_table *t)
 
 	write_lock(&md->map_lock);
 	md->map = t;
-	dm_table_set_restrictions(t, q);
+	dm_table_set_restrictions(t, q, limits);
 	write_unlock(&md->map_lock);
 
 	return 0;
@@ -1562,6 +1563,7 @@ static void dm_queue_flush(struct mapped_device *md)
  */
 int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
+	struct queue_limits limits;
 	int r = -EINVAL;
 
 	mutex_lock(&md->suspend_lock);
@@ -1570,8 +1572,12 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
 	if (!dm_suspended(md))
 		goto out;
 
+	r = dm_calculate_queue_limits(table, &limits);
+	if (r)
+		goto out;
+
 	__unbind(md);
-	r = __bind(md, table);
+	r = __bind(md, table, &limits);
 
 out:
 	mutex_unlock(&md->suspend_lock);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index b5935c610c4..604e85caadf 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -41,7 +41,10 @@ void dm_table_event_callback(struct dm_table *t,
 			     void (*fn)(void *), void *context);
 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q);
+int dm_calculate_queue_limits(struct dm_table *table,
+			      struct queue_limits *limits);
+void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+			       struct queue_limits *limits);
 struct list_head *dm_table_get_devices(struct dm_table *t);
 void dm_table_presuspend_targets(struct dm_table *t);
 void dm_table_postsuspend_targets(struct dm_table *t);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index deac3b4e5e1..e6bf3b8c7bf 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -103,7 +103,8 @@ void dm_error(const char *message);
 /*
  * Combine device limits.
  */
-void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev);
+int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+			 sector_t start, void *data);
 
 struct dm_dev {
 	struct block_device *bdev;
@@ -163,7 +164,6 @@ struct dm_target {
 	sector_t begin;
 	sector_t len;
 
-	/* FIXME: turn this into a mask, and merge with queue_limits */
 	/* Always a power of 2 */
 	sector_t split_io;
 
@@ -177,12 +177,6 @@ struct dm_target {
 	 */
 	unsigned num_flush_requests;
 
-	/*
-	 * These are automatically filled in by
-	 * dm_table_get_device.
-	 */
-	struct queue_limits limits;
-
 	/* target specific data */
 	void *private;
 
-- 
cgit v1.2.3-70-g09d2


From f5db4af466e2dca0fe822019812d586ca910b00c Mon Sep 17 00:00:00 2001
From: Jonthan Brassow <jbrassow@redhat.com>
Date: Mon, 22 Jun 2009 10:12:35 +0100
Subject: dm raid1: add userspace log

This patch contains a device-mapper mirror log module that forwards
requests to userspace for processing.

The structures used for communication between kernel and userspace are
located in include/linux/dm-log-userspace.h.  Due to the frequency,
diversity, and 2-way communication nature of the exchanges between
kernel and userspace, 'connector' was chosen as the interface for
communication.

The first log implementations written in userspace - "clustered-disk"
and "clustered-core" - support clustered shared storage.   A userspace
daemon (in the LVM2 source code repository) uses openAIS/corosync to
process requests in an ordered fashion with the rest of the nodes in the
cluster so as to prevent log state corruption.  Other implementations
with no association to LVM or openAIS/corosync, are certainly possible.

(Imagine if two machines are writing to the same region of a mirror.
They would both mark the region dirty, but you need a cluster-aware
entity that can handle properly marking the region clean when they are
done.  Otherwise, you might clear the region when the first machine is
done, not the second.)

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 Documentation/device-mapper/dm-log.txt |  54 +++
 drivers/md/Kconfig                     |  11 +
 drivers/md/Makefile                    |   3 +
 drivers/md/dm-log-userspace-base.c     | 696 +++++++++++++++++++++++++++++++++
 drivers/md/dm-log-userspace-transfer.c | 276 +++++++++++++
 drivers/md/dm-log-userspace-transfer.h |  18 +
 include/linux/Kbuild                   |   1 +
 include/linux/connector.h              |   4 +-
 include/linux/dm-log-userspace.h       | 386 ++++++++++++++++++
 9 files changed, 1448 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/device-mapper/dm-log.txt
 create mode 100644 drivers/md/dm-log-userspace-base.c
 create mode 100644 drivers/md/dm-log-userspace-transfer.c
 create mode 100644 drivers/md/dm-log-userspace-transfer.h
 create mode 100644 include/linux/dm-log-userspace.h

(limited to 'include')

diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt
new file mode 100644
index 00000000000..994dd75475a
--- /dev/null
+++ b/Documentation/device-mapper/dm-log.txt
@@ -0,0 +1,54 @@
+Device-Mapper Logging
+=====================
+The device-mapper logging code is used by some of the device-mapper
+RAID targets to track regions of the disk that are not consistent.
+A region (or portion of the address space) of the disk may be
+inconsistent because a RAID stripe is currently being operated on or
+a machine died while the region was being altered.  In the case of
+mirrors, a region would be considered dirty/inconsistent while you
+are writing to it because the writes need to be replicated for all
+the legs of the mirror and may not reach the legs at the same time.
+Once all writes are complete, the region is considered clean again.
+
+There is a generic logging interface that the device-mapper RAID
+implementations use to perform logging operations (see
+dm_dirty_log_type in include/linux/dm-dirty-log.h).  Various different
+logging implementations are available and provide different
+capabilities.  The list includes:
+
+Type		Files
+====		=====
+disk		drivers/md/dm-log.c
+core		drivers/md/dm-log.c
+userspace	drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
+
+The "disk" log type
+-------------------
+This log implementation commits the log state to disk.  This way, the
+logging state survives reboots/crashes.
+
+The "core" log type
+-------------------
+This log implementation keeps the log state in memory.  The log state
+will not survive a reboot or crash, but there may be a small boost in
+performance.  This method can also be used if no storage device is
+available for storing log state.
+
+The "userspace" log type
+------------------------
+This log type simply provides a way to export the log API to userspace,
+so log implementations can be done there.  This is done by forwarding most
+logging requests to userspace, where a daemon receives and processes the
+request.
+
+The structure used for communication between kernel and userspace are
+located in include/linux/dm-log-userspace.h.  Due to the frequency,
+diversity, and 2-way communication nature of the exchanges between
+kernel and userspace, 'connector' is used as the interface for
+communication.
+
+There are currently two userspace log implementations that leverage this
+framework - "clustered_disk" and "clustered_core".  These implementations
+provide a cluster-coherent log for shared-storage.  Device-mapper mirroring
+can be used in a shared-storage environment when the cluster log implementations
+are employed.
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 09f93fa6891..020f9573fd8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -231,6 +231,17 @@ config DM_MIRROR
          Allow volume managers to mirror logical volumes, also
          needed for live data migration tools such as 'pvmove'.
 
+config DM_LOG_USERSPACE
+	tristate "Mirror userspace logging (EXPERIMENTAL)"
+	depends on DM_MIRROR && EXPERIMENTAL && NET
+	select CONNECTOR
+	---help---
+	  The userspace logging module provides a mechanism for
+	  relaying the dm-dirty-log API to userspace.  Log designs
+	  which are more suited to userspace implementation (e.g.
+	  shared storage logs) or experimental logs can be implemented
+	  by leveraging this framework.
+
 config DM_ZERO
 	tristate "Zero target"
 	depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index dade52f6073..1dc4185bd78 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,8 @@ dm-multipath-y	+= dm-path-selector.o dm-mpath.o
 dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
 		    dm-snap-persistent.o
 dm-mirror-y	+= dm-raid1.o
+dm-log-userspace-y \
+		+= dm-log-userspace-base.o dm-log-userspace-transfer.o
 md-mod-y	+= md.o bitmap.o
 raid456-y	+= raid5.o
 raid6_pq-y	+= raid6algos.o raid6recov.o raid6tables.o \
@@ -40,6 +42,7 @@ obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
 obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o dm-log.o dm-region-hash.o
+obj-$(CONFIG_DM_LOG_USERSPACE)	+= dm-log-userspace.o
 obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
 
 quiet_cmd_unroll = UNROLL  $@
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
new file mode 100644
index 00000000000..e69b9656099
--- /dev/null
+++ b/drivers/md/dm-log-userspace-base.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include <linux/bio.h>
+#include <linux/dm-dirty-log.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-log-userspace.h>
+
+#include "dm-log-userspace-transfer.h"
+
+struct flush_entry {
+	int type;
+	region_t region;
+	struct list_head list;
+};
+
+struct log_c {
+	struct dm_target *ti;
+	uint32_t region_size;
+	region_t region_count;
+	char uuid[DM_UUID_LEN];
+
+	char *usr_argv_str;
+	uint32_t usr_argc;
+
+	/*
+	 * in_sync_hint gets set when doing is_remote_recovering.  It
+	 * represents the first region that needs recovery.  IOW, the
+	 * first zero bit of sync_bits.  This can be useful for to limit
+	 * traffic for calls like is_remote_recovering and get_resync_work,
+	 * but be take care in its use for anything else.
+	 */
+	uint64_t in_sync_hint;
+
+	spinlock_t flush_lock;
+	struct list_head flush_list;  /* only for clear and mark requests */
+};
+
+static mempool_t *flush_entry_pool;
+
+static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	return kmalloc(sizeof(struct flush_entry), gfp_mask);
+}
+
+static void flush_entry_free(void *element, void *pool_data)
+{
+	kfree(element);
+}
+
+static int userspace_do_request(struct log_c *lc, const char *uuid,
+				int request_type, char *data, size_t data_size,
+				char *rdata, size_t *rdata_size)
+{
+	int r;
+
+	/*
+	 * If the server isn't there, -ESRCH is returned,
+	 * and we must keep trying until the server is
+	 * restored.
+	 */
+retry:
+	r = dm_consult_userspace(uuid, request_type, data,
+				 data_size, rdata, rdata_size);
+
+	if (r != -ESRCH)
+		return r;
+
+	DMERR(" Userspace log server not found.");
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(2*HZ);
+		DMWARN("Attempting to contact userspace log server...");
+		r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+					 strlen(lc->usr_argv_str) + 1,
+					 NULL, NULL);
+		if (!r)
+			break;
+	}
+	DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
+	r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+				 0, NULL, NULL);
+	if (!r)
+		goto retry;
+
+	DMERR("Error trying to resume userspace log: %d", r);
+
+	return -ESRCH;
+}
+
+static int build_constructor_string(struct dm_target *ti,
+				    unsigned argc, char **argv,
+				    char **ctr_str)
+{
+	int i, str_size;
+	char *str = NULL;
+
+	*ctr_str = NULL;
+
+	for (i = 0, str_size = 0; i < argc; i++)
+		str_size += strlen(argv[i]) + 1; /* +1 for space between args */
+
+	str_size += 20; /* Max number of chars in a printed u64 number */
+
+	str = kzalloc(str_size, GFP_KERNEL);
+	if (!str) {
+		DMWARN("Unable to allocate memory for constructor string");
+		return -ENOMEM;
+	}
+
+	for (i = 0, str_size = 0; i < argc; i++)
+		str_size += sprintf(str + str_size, "%s ", argv[i]);
+	str_size += sprintf(str + str_size, "%llu",
+			    (unsigned long long)ti->len);
+
+	*ctr_str = str;
+	return str_size;
+}
+
+/*
+ * userspace_ctr
+ *
+ * argv contains:
+ *	<UUID> <other args>
+ * Where 'other args' is the userspace implementation specific log
+ * arguments.  An example might be:
+ *	<UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync]
+ *
+ * So, this module will strip off the <UUID> for identification purposes
+ * when communicating with userspace about a log; but will pass on everything
+ * else.
+ */
+static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
+			 unsigned argc, char **argv)
+{
+	int r = 0;
+	int str_size;
+	char *ctr_str = NULL;
+	struct log_c *lc = NULL;
+	uint64_t rdata;
+	size_t rdata_size = sizeof(rdata);
+
+	if (argc < 3) {
+		DMWARN("Too few arguments to userspace dirty log");
+		return -EINVAL;
+	}
+
+	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
+	if (!lc) {
+		DMWARN("Unable to allocate userspace log context.");
+		return -ENOMEM;
+	}
+
+	lc->ti = ti;
+
+	if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
+		DMWARN("UUID argument too long.");
+		kfree(lc);
+		return -EINVAL;
+	}
+
+	strncpy(lc->uuid, argv[0], DM_UUID_LEN);
+	spin_lock_init(&lc->flush_lock);
+	INIT_LIST_HEAD(&lc->flush_list);
+
+	str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
+	if (str_size < 0) {
+		kfree(lc);
+		return str_size;
+	}
+
+	/* Send table string */
+	r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+				 ctr_str, str_size, NULL, NULL);
+
+	if (r == -ESRCH) {
+		DMERR("Userspace log server not found");
+		goto out;
+	}
+
+	/* Since the region size does not change, get it now */
+	rdata_size = sizeof(rdata);
+	r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+				 NULL, 0, (char *)&rdata, &rdata_size);
+
+	if (r) {
+		DMERR("Failed to get region size of dirty log");
+		goto out;
+	}
+
+	lc->region_size = (uint32_t)rdata;
+	lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
+
+out:
+	if (r) {
+		kfree(lc);
+		kfree(ctr_str);
+	} else {
+		lc->usr_argv_str = ctr_str;
+		lc->usr_argc = argc;
+		log->context = lc;
+	}
+
+	return r;
+}
+
+static void userspace_dtr(struct dm_dirty_log *log)
+{
+	int r;
+	struct log_c *lc = log->context;
+
+	r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+				 NULL, 0,
+				 NULL, NULL);
+
+	kfree(lc->usr_argv_str);
+	kfree(lc);
+
+	return;
+}
+
+static int userspace_presuspend(struct dm_dirty_log *log)
+{
+	int r;
+	struct log_c *lc = log->context;
+
+	r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+				 NULL, 0,
+				 NULL, NULL);
+
+	return r;
+}
+
+static int userspace_postsuspend(struct dm_dirty_log *log)
+{
+	int r;
+	struct log_c *lc = log->context;
+
+	r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+				 NULL, 0,
+				 NULL, NULL);
+
+	return r;
+}
+
+static int userspace_resume(struct dm_dirty_log *log)
+{
+	int r;
+	struct log_c *lc = log->context;
+
+	lc->in_sync_hint = 0;
+	r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+				 NULL, 0,
+				 NULL, NULL);
+
+	return r;
+}
+
+static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
+{
+	struct log_c *lc = log->context;
+
+	return lc->region_size;
+}
+
+/*
+ * userspace_is_clean
+ *
+ * Check whether a region is clean.  If there is any sort of
+ * failure when consulting the server, we return not clean.
+ *
+ * Returns: 1 if clean, 0 otherwise
+ */
+static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
+{
+	int r;
+	uint64_t region64 = (uint64_t)region;
+	int64_t is_clean;
+	size_t rdata_size;
+	struct log_c *lc = log->context;
+
+	rdata_size = sizeof(is_clean);
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
+				 (char *)&region64, sizeof(region64),
+				 (char *)&is_clean, &rdata_size);
+
+	return (r) ? 0 : (int)is_clean;
+}
+
+/*
+ * userspace_in_sync
+ *
+ * Check if the region is in-sync.  If there is any sort
+ * of failure when consulting the server, we assume that
+ * the region is not in sync.
+ *
+ * If 'can_block' is set, return immediately
+ *
+ * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
+ */
+static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
+			     int can_block)
+{
+	int r;
+	uint64_t region64 = region;
+	int64_t in_sync;
+	size_t rdata_size;
+	struct log_c *lc = log->context;
+
+	/*
+	 * We can never respond directly - even if in_sync_hint is
+	 * set.  This is because another machine could see a device
+	 * failure and mark the region out-of-sync.  If we don't go
+	 * to userspace to ask, we might think the region is in-sync
+	 * and allow a read to pick up data that is stale.  (This is
+	 * very unlikely if a device actually fails; but it is very
+	 * likely if a connection to one device from one machine fails.)
+	 *
+	 * There still might be a problem if the mirror caches the region
+	 * state as in-sync... but then this call would not be made.  So,
+	 * that is a mirror problem.
+	 */
+	if (!can_block)
+		return -EWOULDBLOCK;
+
+	rdata_size = sizeof(in_sync);
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
+				 (char *)&region64, sizeof(region64),
+				 (char *)&in_sync, &rdata_size);
+	return (r) ? 0 : (int)in_sync;
+}
+
+/*
+ * userspace_flush
+ *
+ * This function is ok to block.
+ * The flush happens in two stages.  First, it sends all
+ * clear/mark requests that are on the list.  Then it
+ * tells the server to commit them.  This gives the
+ * server a chance to optimise the commit, instead of
+ * doing it for every request.
+ *
+ * Additionally, we could implement another thread that
+ * sends the requests up to the server - reducing the
+ * load on flush.  Then the flush would have less in
+ * the list and be responsible for the finishing commit.
+ *
+ * Returns: 0 on success, < 0 on failure
+ */
+static int userspace_flush(struct dm_dirty_log *log)
+{
+	int r = 0;
+	unsigned long flags;
+	struct log_c *lc = log->context;
+	LIST_HEAD(flush_list);
+	struct flush_entry *fe, *tmp_fe;
+
+	spin_lock_irqsave(&lc->flush_lock, flags);
+	list_splice_init(&lc->flush_list, &flush_list);
+	spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+	if (list_empty(&flush_list))
+		return 0;
+
+	/*
+	 * FIXME: Count up requests, group request types,
+	 * allocate memory to stick all requests in and
+	 * send to server in one go.  Failing the allocation,
+	 * do it one by one.
+	 */
+
+	list_for_each_entry(fe, &flush_list, list) {
+		r = userspace_do_request(lc, lc->uuid, fe->type,
+					 (char *)&fe->region,
+					 sizeof(fe->region),
+					 NULL, NULL);
+		if (r)
+			goto fail;
+	}
+
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
+				 NULL, 0, NULL, NULL);
+
+fail:
+	/*
+	 * We can safely remove these entries, even if failure.
+	 * Calling code will receive an error and will know that
+	 * the log facility has failed.
+	 */
+	list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) {
+		list_del(&fe->list);
+		mempool_free(fe, flush_entry_pool);
+	}
+
+	if (r)
+		dm_table_event(lc->ti->table);
+
+	return r;
+}
+
+/*
+ * userspace_mark_region
+ *
+ * This function should avoid blocking unless absolutely required.
+ * (Memory allocation is valid for blocking.)
+ */
+static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
+{
+	unsigned long flags;
+	struct log_c *lc = log->context;
+	struct flush_entry *fe;
+
+	/* Wait for an allocation, but _never_ fail */
+	fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
+	BUG_ON(!fe);
+
+	spin_lock_irqsave(&lc->flush_lock, flags);
+	fe->type = DM_ULOG_MARK_REGION;
+	fe->region = region;
+	list_add(&fe->list, &lc->flush_list);
+	spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+	return;
+}
+
+/*
+ * userspace_clear_region
+ *
+ * This function must not block.
+ * So, the alloc can't block.  In the worst case, it is ok to
+ * fail.  It would simply mean we can't clear the region.
+ * Does nothing to current sync context, but does mean
+ * the region will be re-sync'ed on a reload of the mirror
+ * even though it is in-sync.
+ */
+static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
+{
+	unsigned long flags;
+	struct log_c *lc = log->context;
+	struct flush_entry *fe;
+
+	/*
+	 * If we fail to allocate, we skip the clearing of
+	 * the region.  This doesn't hurt us in any way, except
+	 * to cause the region to be resync'ed when the
+	 * device is activated next time.
+	 */
+	fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
+	if (!fe) {
+		DMERR("Failed to allocate memory to clear region.");
+		return;
+	}
+
+	spin_lock_irqsave(&lc->flush_lock, flags);
+	fe->type = DM_ULOG_CLEAR_REGION;
+	fe->region = region;
+	list_add(&fe->list, &lc->flush_list);
+	spin_unlock_irqrestore(&lc->flush_lock, flags);
+
+	return;
+}
+
+/*
+ * userspace_get_resync_work
+ *
+ * Get a region that needs recovery.  It is valid to return
+ * an error for this function.
+ *
+ * Returns: 1 if region filled, 0 if no work, <0 on error
+ */
+static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
+{
+	int r;
+	size_t rdata_size;
+	struct log_c *lc = log->context;
+	struct {
+		int64_t i; /* 64-bit for mix arch compatibility */
+		region_t r;
+	} pkg;
+
+	if (lc->in_sync_hint >= lc->region_count)
+		return 0;
+
+	rdata_size = sizeof(pkg);
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
+				 NULL, 0,
+				 (char *)&pkg, &rdata_size);
+
+	*region = pkg.r;
+	return (r) ? r : (int)pkg.i;
+}
+
+/*
+ * userspace_set_region_sync
+ *
+ * Set the sync status of a given region.  This function
+ * must not fail.
+ */
+static void userspace_set_region_sync(struct dm_dirty_log *log,
+				      region_t region, int in_sync)
+{
+	int r;
+	struct log_c *lc = log->context;
+	struct {
+		region_t r;
+		int64_t i;
+	} pkg;
+
+	pkg.r = region;
+	pkg.i = (int64_t)in_sync;
+
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
+				 (char *)&pkg, sizeof(pkg),
+				 NULL, NULL);
+
+	/*
+	 * It would be nice to be able to report failures.
+	 * However, it is easy emough to detect and resolve.
+	 */
+	return;
+}
+
+/*
+ * userspace_get_sync_count
+ *
+ * If there is any sort of failure when consulting the server,
+ * we assume that the sync count is zero.
+ *
+ * Returns: sync count on success, 0 on failure
+ */
+static region_t userspace_get_sync_count(struct dm_dirty_log *log)
+{
+	int r;
+	size_t rdata_size;
+	uint64_t sync_count;
+	struct log_c *lc = log->context;
+
+	rdata_size = sizeof(sync_count);
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
+				 NULL, 0,
+				 (char *)&sync_count, &rdata_size);
+
+	if (r)
+		return 0;
+
+	if (sync_count >= lc->region_count)
+		lc->in_sync_hint = lc->region_count;
+
+	return (region_t)sync_count;
+}
+
+/*
+ * userspace_status
+ *
+ * Returns: amount of space consumed
+ */
+static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
+			    char *result, unsigned maxlen)
+{
+	int r = 0;
+	size_t sz = (size_t)maxlen;
+	struct log_c *lc = log->context;
+
+	switch (status_type) {
+	case STATUSTYPE_INFO:
+		r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
+					 NULL, 0,
+					 result, &sz);
+
+		if (r) {
+			sz = 0;
+			DMEMIT("%s 1 COM_FAILURE", log->type->name);
+		}
+		break;
+	case STATUSTYPE_TABLE:
+		sz = 0;
+		DMEMIT("%s %u %s %s", log->type->name, lc->usr_argc + 1,
+		       lc->uuid, lc->usr_argv_str);
+		break;
+	}
+	return (r) ? 0 : (int)sz;
+}
+
+/*
+ * userspace_is_remote_recovering
+ *
+ * Returns: 1 if region recovering, 0 otherwise
+ */
+static int userspace_is_remote_recovering(struct dm_dirty_log *log,
+					  region_t region)
+{
+	int r;
+	uint64_t region64 = region;
+	struct log_c *lc = log->context;
+	static unsigned long long limit;
+	struct {
+		int64_t is_recovering;
+		uint64_t in_sync_hint;
+	} pkg;
+	size_t rdata_size = sizeof(pkg);
+
+	/*
+	 * Once the mirror has been reported to be in-sync,
+	 * it will never again ask for recovery work.  So,
+	 * we can safely say there is not a remote machine
+	 * recovering if the device is in-sync.  (in_sync_hint
+	 * must be reset at resume time.)
+	 */
+	if (region < lc->in_sync_hint)
+		return 0;
+	else if (jiffies < limit)
+		return 1;
+
+	limit = jiffies + (HZ / 4);
+	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
+				 (char *)&region64, sizeof(region64),
+				 (char *)&pkg, &rdata_size);
+	if (r)
+		return 1;
+
+	lc->in_sync_hint = pkg.in_sync_hint;
+
+	return (int)pkg.is_recovering;
+}
+
+static struct dm_dirty_log_type _userspace_type = {
+	.name = "userspace",
+	.module = THIS_MODULE,
+	.ctr = userspace_ctr,
+	.dtr = userspace_dtr,
+	.presuspend = userspace_presuspend,
+	.postsuspend = userspace_postsuspend,
+	.resume = userspace_resume,
+	.get_region_size = userspace_get_region_size,
+	.is_clean = userspace_is_clean,
+	.in_sync = userspace_in_sync,
+	.flush = userspace_flush,
+	.mark_region = userspace_mark_region,
+	.clear_region = userspace_clear_region,
+	.get_resync_work = userspace_get_resync_work,
+	.set_region_sync = userspace_set_region_sync,
+	.get_sync_count = userspace_get_sync_count,
+	.status = userspace_status,
+	.is_remote_recovering = userspace_is_remote_recovering,
+};
+
+static int __init userspace_dirty_log_init(void)
+{
+	int r = 0;
+
+	flush_entry_pool = mempool_create(100, flush_entry_alloc,
+					  flush_entry_free, NULL);
+
+	if (!flush_entry_pool) {
+		DMWARN("Unable to create flush_entry_pool:  No memory.");
+		return -ENOMEM;
+	}
+
+	r = dm_ulog_tfr_init();
+	if (r) {
+		DMWARN("Unable to initialize userspace log communications");
+		mempool_destroy(flush_entry_pool);
+		return r;
+	}
+
+	r = dm_dirty_log_type_register(&_userspace_type);
+	if (r) {
+		DMWARN("Couldn't register userspace dirty log type");
+		dm_ulog_tfr_exit();
+		mempool_destroy(flush_entry_pool);
+		return r;
+	}
+
+	DMINFO("version 1.0.0 loaded");
+	return 0;
+}
+
+static void __exit userspace_dirty_log_exit(void)
+{
+	dm_dirty_log_type_unregister(&_userspace_type);
+	dm_ulog_tfr_exit();
+	mempool_destroy(flush_entry_pool);
+
+	DMINFO("version 1.0.0 unloaded");
+	return;
+}
+
+module_init(userspace_dirty_log_init);
+module_exit(userspace_dirty_log_exit);
+
+MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
+MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
new file mode 100644
index 00000000000..0ca1ee768a1
--- /dev/null
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <net/sock.h>
+#include <linux/workqueue.h>
+#include <linux/connector.h>
+#include <linux/device-mapper.h>
+#include <linux/dm-log-userspace.h>
+
+#include "dm-log-userspace-transfer.h"
+
+static uint32_t dm_ulog_seq;
+
+/*
+ * Netlink/Connector is an unreliable protocol.  How long should
+ * we wait for a response before assuming it was lost and retrying?
+ * (If we do receive a response after this time, it will be discarded
+ * and the response to the resent request will be waited for.
+ */
+#define DM_ULOG_RETRY_TIMEOUT (15 * HZ)
+
+/*
+ * Pre-allocated space for speed
+ */
+#define DM_ULOG_PREALLOCED_SIZE 512
+static struct cn_msg *prealloced_cn_msg;
+static struct dm_ulog_request *prealloced_ulog_tfr;
+
+static struct cb_id ulog_cn_id = {
+	.idx = CN_IDX_DM,
+	.val = CN_VAL_DM_USERSPACE_LOG
+};
+
+static DEFINE_MUTEX(dm_ulog_lock);
+
+struct receiving_pkg {
+	struct list_head list;
+	struct completion complete;
+
+	uint32_t seq;
+
+	int error;
+	size_t *data_size;
+	char *data;
+};
+
+static DEFINE_SPINLOCK(receiving_list_lock);
+static struct list_head receiving_list;
+
+static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
+{
+	int r;
+	struct cn_msg *msg = prealloced_cn_msg;
+
+	memset(msg, 0, sizeof(struct cn_msg));
+
+	msg->id.idx = ulog_cn_id.idx;
+	msg->id.val = ulog_cn_id.val;
+	msg->ack = 0;
+	msg->seq = tfr->seq;
+	msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
+
+	r = cn_netlink_send(msg, 0, gfp_any());
+
+	return r;
+}
+
+/*
+ * Parameters for this function can be either msg or tfr, but not
+ * both.  This function fills in the reply for a waiting request.
+ * If just msg is given, then the reply is simply an ACK from userspace
+ * that the request was received.
+ *
+ * Returns: 0 on success, -ENOENT on failure
+ */
+static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
+{
+	uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
+	struct receiving_pkg *pkg;
+
+	/*
+	 * The 'receiving_pkg' entries in this list are statically
+	 * allocated on the stack in 'dm_consult_userspace'.
+	 * Each process that is waiting for a reply from the user
+	 * space server will have an entry in this list.
+	 *
+	 * We are safe to do it this way because the stack space
+	 * is unique to each process, but still addressable by
+	 * other processes.
+	 */
+	list_for_each_entry(pkg, &receiving_list, list) {
+		if (rtn_seq != pkg->seq)
+			continue;
+
+		if (msg) {
+			pkg->error = -msg->ack;
+			/*
+			 * If we are trying again, we will need to know our
+			 * storage capacity.  Otherwise, along with the
+			 * error code, we make explicit that we have no data.
+			 */
+			if (pkg->error != -EAGAIN)
+				*(pkg->data_size) = 0;
+		} else if (tfr->data_size > *(pkg->data_size)) {
+			DMERR("Insufficient space to receive package [%u] "
+			      "(%u vs %lu)", tfr->request_type,
+			      tfr->data_size, *(pkg->data_size));
+
+			*(pkg->data_size) = 0;
+			pkg->error = -ENOSPC;
+		} else {
+			pkg->error = tfr->error;
+			memcpy(pkg->data, tfr->data, tfr->data_size);
+			*(pkg->data_size) = tfr->data_size;
+		}
+		complete(&pkg->complete);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+/*
+ * This is the connector callback that delivers data
+ * that was sent from userspace.
+ */
+static void cn_ulog_callback(void *data)
+{
+	struct cn_msg *msg = (struct cn_msg *)data;
+	struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
+
+	spin_lock(&receiving_list_lock);
+	if (msg->len == 0)
+		fill_pkg(msg, NULL);
+	else if (msg->len < sizeof(*tfr))
+		DMERR("Incomplete message received (expected %u, got %u): [%u]",
+		      (unsigned)sizeof(*tfr), msg->len, msg->seq);
+	else
+		fill_pkg(NULL, tfr);
+	spin_unlock(&receiving_list_lock);
+}
+
+/**
+ * dm_consult_userspace
+ * @uuid: log's uuid (must be DM_UUID_LEN in size)
+ * @request_type:  found in include/linux/dm-log-userspace.h
+ * @data: data to tx to the server
+ * @data_size: size of data in bytes
+ * @rdata: place to put return data from server
+ * @rdata_size: value-result (amount of space given/amount of space used)
+ *
+ * rdata_size is undefined on failure.
+ *
+ * Memory used to communicate with userspace is zero'ed
+ * before populating to ensure that no unwanted bits leak
+ * from kernel space to user-space.  All userspace log communications
+ * between kernel and user space go through this function.
+ *
+ * Returns: 0 on success, -EXXX on failure
+ **/
+int dm_consult_userspace(const char *uuid, int request_type,
+			 char *data, size_t data_size,
+			 char *rdata, size_t *rdata_size)
+{
+	int r = 0;
+	size_t dummy = 0;
+	int overhead_size =
+		sizeof(struct dm_ulog_request *) + sizeof(struct cn_msg);
+	struct dm_ulog_request *tfr = prealloced_ulog_tfr;
+	struct receiving_pkg pkg;
+
+	if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) {
+		DMINFO("Size of tfr exceeds preallocated size");
+		return -EINVAL;
+	}
+
+	if (!rdata_size)
+		rdata_size = &dummy;
+resend:
+	/*
+	 * We serialize the sending of requests so we can
+	 * use the preallocated space.
+	 */
+	mutex_lock(&dm_ulog_lock);
+
+	memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
+	memcpy(tfr->uuid, uuid, DM_UUID_LEN);
+	tfr->seq = dm_ulog_seq++;
+
+	/*
+	 * Must be valid request type (all other bits set to
+	 * zero).  This reserves other bits for possible future
+	 * use.
+	 */
+	tfr->request_type = request_type & DM_ULOG_REQUEST_MASK;
+
+	tfr->data_size = data_size;
+	if (data && data_size)
+		memcpy(tfr->data, data, data_size);
+
+	memset(&pkg, 0, sizeof(pkg));
+	init_completion(&pkg.complete);
+	pkg.seq = tfr->seq;
+	pkg.data_size = rdata_size;
+	pkg.data = rdata;
+	spin_lock(&receiving_list_lock);
+	list_add(&(pkg.list), &receiving_list);
+	spin_unlock(&receiving_list_lock);
+
+	r = dm_ulog_sendto_server(tfr);
+
+	mutex_unlock(&dm_ulog_lock);
+
+	if (r) {
+		DMERR("Unable to send log request [%u] to userspace: %d",
+		      request_type, r);
+		spin_lock(&receiving_list_lock);
+		list_del_init(&(pkg.list));
+		spin_unlock(&receiving_list_lock);
+
+		goto out;
+	}
+
+	r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
+	spin_lock(&receiving_list_lock);
+	list_del_init(&(pkg.list));
+	spin_unlock(&receiving_list_lock);
+	if (!r) {
+		DMWARN("[%s] Request timed out: [%u/%u] - retrying",
+		       (strlen(uuid) > 8) ?
+		       (uuid + (strlen(uuid) - 8)) : (uuid),
+		       request_type, pkg.seq);
+		goto resend;
+	}
+
+	r = pkg.error;
+	if (r == -EAGAIN)
+		goto resend;
+
+out:
+	return r;
+}
+
+int dm_ulog_tfr_init(void)
+{
+	int r;
+	void *prealloced;
+
+	INIT_LIST_HEAD(&receiving_list);
+
+	prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL);
+	if (!prealloced)
+		return -ENOMEM;
+
+	prealloced_cn_msg = prealloced;
+	prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg);
+
+	r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
+	if (r) {
+		cn_del_callback(&ulog_cn_id);
+		return r;
+	}
+
+	return 0;
+}
+
+void dm_ulog_tfr_exit(void)
+{
+	cn_del_callback(&ulog_cn_id);
+	kfree(prealloced_cn_msg);
+}
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h
new file mode 100644
index 00000000000..c26d8e4e271
--- /dev/null
+++ b/drivers/md/dm-log-userspace-transfer.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_LOG_USERSPACE_TRANSFER_H__
+#define __DM_LOG_USERSPACE_TRANSFER_H__
+
+#define DM_MSG_PREFIX "dm-log-userspace"
+
+int dm_ulog_tfr_init(void);
+void dm_ulog_tfr_exit(void);
+int dm_consult_userspace(const char *uuid, int request_type,
+			 char *data, size_t data_size,
+			 char *rdata, size_t *rdata_size);
+
+#endif /* __DM_LOG_USERSPACE_TRANSFER_H__ */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 03f22076381..334a3593cdf 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -57,6 +57,7 @@ header-y += dlmconstants.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
 header-y += dm-ioctl.h
+header-y += dm-log-userspace.h
 header-y += dn.h
 header-y += dqblk_xfs.h
 header-y += efs_fs_sb.h
diff --git a/include/linux/connector.h b/include/linux/connector.h
index b9966e64604..b68d27850d5 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -41,8 +41,10 @@
 #define CN_IDX_BB			0x5	/* BlackBoard, from the TSP GPL sampling framework */
 #define CN_DST_IDX			0x6
 #define CN_DST_VAL			0x1
+#define CN_IDX_DM			0x7	/* Device Mapper */
+#define CN_VAL_DM_USERSPACE_LOG		0x1
 
-#define CN_NETLINK_USERS		7
+#define CN_NETLINK_USERS		8
 
 /*
  * Maximum connector's message size.
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
new file mode 100644
index 00000000000..642e3017b51
--- /dev/null
+++ b/include/linux/dm-log-userspace.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_LOG_USERSPACE_H__
+#define __DM_LOG_USERSPACE_H__
+
+#include <linux/dm-ioctl.h> /* For DM_UUID_LEN */
+
+/*
+ * The device-mapper userspace log module consists of a kernel component and
+ * a user-space component.  The kernel component implements the API defined
+ * in dm-dirty-log.h.  Its purpose is simply to pass the parameters and
+ * return values of those API functions between kernel and user-space.
+ *
+ * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc.
+ * These request types represent the different functions in the device-mapper
+ * dirty log API.  Each of these is described in more detail below.
+ *
+ * The user-space program must listen for requests from the kernel (representing
+ * the various API functions) and process them.
+ *
+ * User-space begins by setting up the communication link (error checking
+ * removed for clarity):
+ *	fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ *	addr.nl_family = AF_NETLINK;
+ *	addr.nl_groups = CN_IDX_DM;
+ *	addr.nl_pid = 0;
+ *	r = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+ *	opt = addr.nl_groups;
+ *	setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
+ *
+ * User-space will then wait to receive requests form the kernel, which it
+ * will process as described below.  The requests are received in the form,
+ * ((struct dm_ulog_request) + (additional data)).  Depending on the request
+ * type, there may or may not be 'additional data'.  In the descriptions below,
+ * you will see 'Payload-to-userspace' and 'Payload-to-kernel'.  The
+ * 'Payload-to-userspace' is what the kernel sends in 'additional data' as
+ * necessary parameters to complete the request.  The 'Payload-to-kernel' is
+ * the 'additional data' returned to the kernel that contains the necessary
+ * results of the request.  The 'data_size' field in the dm_ulog_request
+ * structure denotes the availability and amount of payload data.
+ */
+
+/*
+ * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h):
+ * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
+ *	      unsigned argc, char **argv);
+ *
+ * Payload-to-userspace:
+ *	A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *	None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * The UUID contained in the dm_ulog_request structure is the reference that
+ * will be used by all request types to a specific log.  The constructor must
+ * record this assotiation with instance created.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_CTR                    1
+
+/*
+ * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h):
+ * void (*dtr)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *	None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being destroyed.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_DTR                    2
+
+/*
+ * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*presuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being presuspended.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_PRESUSPEND             3
+
+/*
+ * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*postsuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being postsuspended.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_POSTSUSPEND            4
+
+/*
+ * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h):
+ * int (*resume)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being resumed.  There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_RESUME                 5
+
+/*
+ * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
+ * uint32_t (*get_region_size)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	uint64_t - contains the region size
+ *
+ * The region size is something that was determined at constructor time.
+ * It is returned in the payload area and 'data_size' is set to
+ * reflect this.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_REGION_SIZE        6
+
+/*
+ * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h):
+ * int (*is_clean)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *	uint64_t - the region to get clean status on
+ * Payload-to-kernel:
+ *	int64_t  - 1 if clean, 0 otherwise
+ *
+ * Payload is sizeof(uint64_t) and contains the region for which the clean
+ * status is being made.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or
+ * 1 (clean), setting 'data_size' and 'error' appropriately.
+ */
+#define DM_ULOG_IS_CLEAN               7
+
+/*
+ * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h):
+ * int (*in_sync)(struct dm_dirty_log *log, region_t region,
+ *		  int can_block);
+ *
+ * Payload-to-userspace:
+ *	uint64_t - the region to get sync status on
+ * Payload-to-kernel:
+ *	int64_t - 1 if in-sync, 0 otherwise
+ *
+ * Exactly the same as 'is_clean' above, except this time asking "has the
+ * region been recovered?" vs. "is the region not being modified?"
+ */
+#define DM_ULOG_IN_SYNC                8
+
+/*
+ * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h):
+ * int (*flush)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	None.
+ *
+ * No incoming or outgoing payload.  Simply flush log state to disk.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_FLUSH                  9
+
+/*
+ * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*mark_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *	uint64_t [] - region(s) to mark
+ * Payload-to-kernel:
+ *	None.
+ *
+ * Incoming payload contains the one or more regions to mark dirty.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_MARK_REGION           10
+
+/*
+ * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*clear_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *	uint64_t [] - region(s) to clear
+ * Payload-to-kernel:
+ *	None.
+ *
+ * Incoming payload contains the one or more regions to mark clean.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_CLEAR_REGION          11
+
+/*
+ * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h):
+ * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	{
+ *		int64_t i; -- 1 if recovery necessary, 0 otherwise
+ *		uint64_t r; -- The region to recover if i=1
+ *	}
+ * 'data_size' should be set appropriately.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_RESYNC_WORK       12
+
+/*
+ * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h):
+ * void (*set_region_sync)(struct dm_dirty_log *log,
+ *			   region_t region, int in_sync);
+ *
+ * Payload-to-userspace:
+ *	{
+ *		uint64_t - region to set sync state on
+ *		int64_t  - 0 if not-in-sync, 1 if in-sync
+ *	}
+ * Payload-to-kernel:
+ *	None.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_SET_REGION_SYNC       13
+
+/*
+ * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h):
+ * region_t (*get_sync_count)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	uint64_t - the number of in-sync regions
+ *
+ * No incoming payload.  Kernel-bound payload contains the number of
+ * regions that are in-sync (in a size_t).
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_GET_SYNC_COUNT        14
+
+/*
+ * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO,
+ *		 char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	Character string containing STATUSTYPE_INFO
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_INFO           15
+
+/*
+ * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE,
+ *		 char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ *	None.
+ * Payload-to-kernel:
+ *	Character string containing STATUSTYPE_TABLE
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_TABLE          16
+
+/*
+ * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h):
+ * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ *	uint64_t - region to determine recovery status on
+ * Payload-to-kernel:
+ *	{
+ *		int64_t is_recovering;  -- 0 if no, 1 if yes
+ *		uint64_t in_sync_hint;  -- lowest region still needing resync
+ *	}
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_IS_REMOTE_RECOVERING  17
+
+/*
+ * (DM_ULOG_REQUEST_MASK & request_type) to get the request type
+ *
+ * Payload-to-userspace:
+ *	A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ *	None.  ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * We are reserving 8 bits of the 32-bit 'request_type' field for the
+ * various request types above.  The remaining 24-bits are currently
+ * set to zero and are reserved for future use and compatibility concerns.
+ *
+ * User-space should always use DM_ULOG_REQUEST_TYPE to aquire the
+ * request type from the 'request_type' field to maintain forward compatibility.
+ */
+#define DM_ULOG_REQUEST_MASK 0xFF
+#define DM_ULOG_REQUEST_TYPE(request_type) \
+	(DM_ULOG_REQUEST_MASK & (request_type))
+
+struct dm_ulog_request {
+	char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
+	char padding[7];        /* Padding because DM_UUID_LEN = 129 */
+
+	int32_t error;          /* Used to report back processing errors */
+
+	uint32_t seq;           /* Sequence number for request */
+	uint32_t request_type;  /* DM_ULOG_* defined above */
+	uint32_t data_size;     /* How much data (not including this struct) */
+
+	char data[0];
+};
+
+#endif /* __DM_LOG_USERSPACE_H__ */
-- 
cgit v1.2.3-70-g09d2


From cec47e3d4a861e1d942b3a580d0bbef2700d2bb2 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Mon, 22 Jun 2009 10:12:35 +0100
Subject: dm: prepare for request based option

This patch adds core functions for request-based dm.

When struct mapped device (md) is initialized, md->queue has
an I/O scheduler and the following functions are used for
request-based dm as the queue functions:
    make_request_fn: dm_make_request()
    pref_fn:         dm_prep_fn()
    request_fn:      dm_request_fn()
    softirq_done_fn: dm_softirq_done()
    lld_busy_fn:     dm_lld_busy()
Actual initializations are done in another patch (PATCH 2).

Below is a brief summary of how request-based dm behaves, including:
  - making request from bio
  - cloning, mapping and dispatching request
  - completing request and bio
  - suspending md
  - resuming md

  bio to request
  ==============
  md->queue->make_request_fn() (dm_make_request()) calls __make_request()
  for a bio submitted to the md.
  Then, the bio is kept in the queue as a new request or merged into
  another request in the queue if possible.

  Cloning and Mapping
  ===================
  Cloning and mapping are done in md->queue->request_fn() (dm_request_fn()),
  when requests are dispatched after they are sorted by the I/O scheduler.

  dm_request_fn() checks busy state of underlying devices using
  target's busy() function and stops dispatching requests to keep them
  on the dm device's queue if busy.
  It helps better I/O merging, since no merge is done for a request
  once it is dispatched to underlying devices.

  Actual cloning and mapping are done in dm_prep_fn() and map_request()
  called from dm_request_fn().
  dm_prep_fn() clones not only request but also bios of the request
  so that dm can hold bio completion in error cases and prevent
  the bio submitter from noticing the error.
  (See the "Completion" section below for details.)

  After the cloning, the clone is mapped by target's map_rq() function
    and inserted to underlying device's queue using
    blk_insert_cloned_request().

  Completion
  ==========
  Request completion can be hooked by rq->end_io(), but then, all bios
  in the request will have been completed even error cases, and the bio
  submitter will have noticed the error.
  To prevent the bio completion in error cases, request-based dm clones
  both bio and request and hooks both bio->bi_end_io() and rq->end_io():
      bio->bi_end_io(): end_clone_bio()
      rq->end_io():     end_clone_request()

  Summary of the request completion flow is below:
  blk_end_request() for a clone request
    => blk_update_request()
       => bio->bi_end_io() == end_clone_bio() for each clone bio
          => Free the clone bio
          => Success: Complete the original bio (blk_update_request())
             Error:   Don't complete the original bio
    => blk_finish_request()
       => rq->end_io() == end_clone_request()
          => blk_complete_request()
             => dm_softirq_done()
                => Free the clone request
                => Success: Complete the original request (blk_end_request())
                   Error:   Requeue the original request

  end_clone_bio() completes the original request on the size of
  the original bio in successful cases.
  Even if all bios in the original request are completed by that
  completion, the original request must not be completed yet to keep
  the ordering of request completion for the stacking.
  So end_clone_bio() uses blk_update_request() instead of
  blk_end_request().
  In error cases, end_clone_bio() doesn't complete the original bio.
  It just frees the cloned bio and gives over the error handling to
  end_clone_request().

  end_clone_request(), which is called with queue lock held, completes
  the clone request and the original request in a softirq context
  (dm_softirq_done()), which has no queue lock, to avoid a deadlock
  issue on submission of another request during the completion:
      - The submitted request may be mapped to the same device
      - Request submission requires queue lock, but the queue lock
        has been held by itself and it doesn't know that

  The clone request has no clone bio when dm_softirq_done() is called.
  So target drivers can't resubmit it again even error cases.
  Instead, they can ask dm core for requeueing and remapping
  the original request in that cases.

  suspend
  =======
  Request-based dm uses stopping md->queue as suspend of the md.
  For noflush suspend, just stops md->queue.

  For flush suspend, inserts a marker request to the tail of md->queue.
  And dispatches all requests in md->queue until the marker comes to
  the front of md->queue.  Then, stops dispatching request and waits
  for the all dispatched requests to complete.
  After that, completes the marker request, stops md->queue and
  wake up the waiter on the suspend queue, md->wait.

  resume
  ======
  Starts md->queue.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         |  14 +
 drivers/md/dm.c               | 705 +++++++++++++++++++++++++++++++++++++++++-
 drivers/md/dm.h               |   1 +
 include/linux/device-mapper.h |   9 +
 4 files changed, 725 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 09a57113955..c5f784419f2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1080,6 +1080,20 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
 	return r;
 }
 
+int dm_table_any_busy_target(struct dm_table *t)
+{
+	unsigned i;
+	struct dm_target *ti;
+
+	for (i = 0; i < t->num_targets; i++) {
+		ti = t->targets + i;
+		if (ti->type->busy && ti->type->busy(ti))
+			return 1;
+	}
+
+	return 0;
+}
+
 void dm_table_unplug_all(struct dm_table *t)
 {
 	struct dm_dev_internal *dd;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f609793a92d..be003e5fea3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -78,7 +78,7 @@ struct dm_rq_target_io {
  */
 struct dm_rq_clone_bio_info {
 	struct bio *orig;
-	struct request *rq;
+	struct dm_rq_target_io *tio;
 };
 
 union map_info *dm_get_mapinfo(struct bio *bio)
@@ -88,6 +88,14 @@ union map_info *dm_get_mapinfo(struct bio *bio)
 	return NULL;
 }
 
+union map_info *dm_get_rq_mapinfo(struct request *rq)
+{
+	if (rq && rq->end_io_data)
+		return &((struct dm_rq_target_io *)rq->end_io_data)->info;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
+
 #define MINOR_ALLOCED ((void *)-1)
 
 /*
@@ -169,6 +177,12 @@ struct mapped_device {
 	/* forced geometry settings */
 	struct hd_geometry geometry;
 
+	/* marker of flush suspend for request-based dm */
+	struct request suspend_rq;
+
+	/* For saving the address of __make_request for request based dm */
+	make_request_fn *saved_make_request_fn;
+
 	/* sysfs handle */
 	struct kobject kobj;
 
@@ -406,6 +420,26 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
 	mempool_free(tio, md->tio_pool);
 }
 
+static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md)
+{
+	return mempool_alloc(md->tio_pool, GFP_ATOMIC);
+}
+
+static void free_rq_tio(struct dm_rq_target_io *tio)
+{
+	mempool_free(tio, tio->md->tio_pool);
+}
+
+static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
+{
+	return mempool_alloc(md->io_pool, GFP_ATOMIC);
+}
+
+static void free_bio_info(struct dm_rq_clone_bio_info *info)
+{
+	mempool_free(info, info->tio->md->io_pool);
+}
+
 static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
@@ -615,6 +649,262 @@ static void clone_endio(struct bio *bio, int error)
 	dec_pending(io, error);
 }
 
+/*
+ * Partial completion handling for request-based dm
+ */
+static void end_clone_bio(struct bio *clone, int error)
+{
+	struct dm_rq_clone_bio_info *info = clone->bi_private;
+	struct dm_rq_target_io *tio = info->tio;
+	struct bio *bio = info->orig;
+	unsigned int nr_bytes = info->orig->bi_size;
+
+	bio_put(clone);
+
+	if (tio->error)
+		/*
+		 * An error has already been detected on the request.
+		 * Once error occurred, just let clone->end_io() handle
+		 * the remainder.
+		 */
+		return;
+	else if (error) {
+		/*
+		 * Don't notice the error to the upper layer yet.
+		 * The error handling decision is made by the target driver,
+		 * when the request is completed.
+		 */
+		tio->error = error;
+		return;
+	}
+
+	/*
+	 * I/O for the bio successfully completed.
+	 * Notice the data completion to the upper layer.
+	 */
+
+	/*
+	 * bios are processed from the head of the list.
+	 * So the completing bio should always be rq->bio.
+	 * If it's not, something wrong is happening.
+	 */
+	if (tio->orig->bio != bio)
+		DMERR("bio completion is going in the middle of the request");
+
+	/*
+	 * Update the original request.
+	 * Do not use blk_end_request() here, because it may complete
+	 * the original request before the clone, and break the ordering.
+	 */
+	blk_update_request(tio->orig, 0, nr_bytes);
+}
+
+/*
+ * Don't touch any member of the md after calling this function because
+ * the md may be freed in dm_put() at the end of this function.
+ * Or do dm_get() before calling this function and dm_put() later.
+ */
+static void rq_completed(struct mapped_device *md, int run_queue)
+{
+	int wakeup_waiters = 0;
+	struct request_queue *q = md->queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!queue_in_flight(q))
+		wakeup_waiters = 1;
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	/* nudge anyone waiting on suspend queue */
+	if (wakeup_waiters)
+		wake_up(&md->wait);
+
+	if (run_queue)
+		blk_run_queue(q);
+
+	/*
+	 * dm_put() must be at the end of this function. See the comment above
+	 */
+	dm_put(md);
+}
+
+static void dm_unprep_request(struct request *rq)
+{
+	struct request *clone = rq->special;
+	struct dm_rq_target_io *tio = clone->end_io_data;
+
+	rq->special = NULL;
+	rq->cmd_flags &= ~REQ_DONTPREP;
+
+	blk_rq_unprep_clone(clone);
+	free_rq_tio(tio);
+}
+
+/*
+ * Requeue the original request of a clone.
+ */
+void dm_requeue_unmapped_request(struct request *clone)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct mapped_device *md = tio->md;
+	struct request *rq = tio->orig;
+	struct request_queue *q = rq->q;
+	unsigned long flags;
+
+	dm_unprep_request(rq);
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (elv_queue_empty(q))
+		blk_plug_device(q);
+	blk_requeue_request(q, rq);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	rq_completed(md, 0);
+}
+EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
+
+static void __stop_queue(struct request_queue *q)
+{
+	blk_stop_queue(q);
+}
+
+static void stop_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	__stop_queue(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void __start_queue(struct request_queue *q)
+{
+	if (blk_queue_stopped(q))
+		blk_start_queue(q);
+}
+
+static void start_queue(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	__start_queue(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+/*
+ * Complete the clone and the original request.
+ * Must be called without queue lock.
+ */
+static void dm_end_request(struct request *clone, int error)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct mapped_device *md = tio->md;
+	struct request *rq = tio->orig;
+
+	if (blk_pc_request(rq)) {
+		rq->errors = clone->errors;
+		rq->resid_len = clone->resid_len;
+
+		if (rq->sense)
+			/*
+			 * We are using the sense buffer of the original
+			 * request.
+			 * So setting the length of the sense data is enough.
+			 */
+			rq->sense_len = clone->sense_len;
+	}
+
+	BUG_ON(clone->bio);
+	free_rq_tio(tio);
+
+	blk_end_request_all(rq, error);
+
+	rq_completed(md, 1);
+}
+
+/*
+ * Request completion handler for request-based dm
+ */
+static void dm_softirq_done(struct request *rq)
+{
+	struct request *clone = rq->completion_data;
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
+	int error = tio->error;
+
+	if (!(rq->cmd_flags & REQ_FAILED) && rq_end_io)
+		error = rq_end_io(tio->ti, clone, error, &tio->info);
+
+	if (error <= 0)
+		/* The target wants to complete the I/O */
+		dm_end_request(clone, error);
+	else if (error == DM_ENDIO_INCOMPLETE)
+		/* The target will handle the I/O */
+		return;
+	else if (error == DM_ENDIO_REQUEUE)
+		/* The target wants to requeue the I/O */
+		dm_requeue_unmapped_request(clone);
+	else {
+		DMWARN("unimplemented target endio return value: %d", error);
+		BUG();
+	}
+}
+
+/*
+ * Complete the clone and the original request with the error status
+ * through softirq context.
+ */
+static void dm_complete_request(struct request *clone, int error)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct request *rq = tio->orig;
+
+	tio->error = error;
+	rq->completion_data = clone;
+	blk_complete_request(rq);
+}
+
+/*
+ * Complete the not-mapped clone and the original request with the error status
+ * through softirq context.
+ * Target's rq_end_io() function isn't called.
+ * This may be used when the target's map_rq() function fails.
+ */
+void dm_kill_unmapped_request(struct request *clone, int error)
+{
+	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct request *rq = tio->orig;
+
+	rq->cmd_flags |= REQ_FAILED;
+	dm_complete_request(clone, error);
+}
+EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
+
+/*
+ * Called with the queue lock held
+ */
+static void end_clone_request(struct request *clone, int error)
+{
+	/*
+	 * For just cleaning up the information of the queue in which
+	 * the clone was dispatched.
+	 * The clone is *NOT* freed actually here because it is alloced from
+	 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
+	 */
+	__blk_put_request(clone->q, clone);
+
+	/*
+	 * Actual request completion is done in a softirq context which doesn't
+	 * hold the queue lock.  Otherwise, deadlock could occur because:
+	 *     - another request may be submitted by the upper level driver
+	 *       of the stacking during the completion
+	 *     - the submission which requires queue lock may be done
+	 *       against this queue
+	 */
+	dm_complete_request(clone, error);
+}
+
 static sector_t max_io_len(struct mapped_device *md,
 			   sector_t sector, struct dm_target *ti)
 {
@@ -998,7 +1288,7 @@ out:
  * The request function that just remaps the bio built up by
  * dm_merge_bvec.
  */
-static int dm_request(struct request_queue *q, struct bio *bio)
+static int _dm_request(struct request_queue *q, struct bio *bio)
 {
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
@@ -1035,12 +1325,274 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 	return 0;
 }
 
+static int dm_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct mapped_device *md = q->queuedata;
+
+	if (unlikely(bio_barrier(bio))) {
+		bio_endio(bio, -EOPNOTSUPP);
+		return 0;
+	}
+
+	return md->saved_make_request_fn(q, bio); /* call __make_request() */
+}
+
+static int dm_request_based(struct mapped_device *md)
+{
+	return blk_queue_stackable(md->queue);
+}
+
+static int dm_request(struct request_queue *q, struct bio *bio)
+{
+	struct mapped_device *md = q->queuedata;
+
+	if (dm_request_based(md))
+		return dm_make_request(q, bio);
+
+	return _dm_request(q, bio);
+}
+
+void dm_dispatch_request(struct request *rq)
+{
+	int r;
+
+	if (blk_queue_io_stat(rq->q))
+		rq->cmd_flags |= REQ_IO_STAT;
+
+	rq->start_time = jiffies;
+	r = blk_insert_cloned_request(rq->q, rq);
+	if (r)
+		dm_complete_request(rq, r);
+}
+EXPORT_SYMBOL_GPL(dm_dispatch_request);
+
+static void dm_rq_bio_destructor(struct bio *bio)
+{
+	struct dm_rq_clone_bio_info *info = bio->bi_private;
+	struct mapped_device *md = info->tio->md;
+
+	free_bio_info(info);
+	bio_free(bio, md->bs);
+}
+
+static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
+				 void *data)
+{
+	struct dm_rq_target_io *tio = data;
+	struct mapped_device *md = tio->md;
+	struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
+
+	if (!info)
+		return -ENOMEM;
+
+	info->orig = bio_orig;
+	info->tio = tio;
+	bio->bi_end_io = end_clone_bio;
+	bio->bi_private = info;
+	bio->bi_destructor = dm_rq_bio_destructor;
+
+	return 0;
+}
+
+static int setup_clone(struct request *clone, struct request *rq,
+		       struct dm_rq_target_io *tio)
+{
+	int r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
+				  dm_rq_bio_constructor, tio);
+
+	if (r)
+		return r;
+
+	clone->cmd = rq->cmd;
+	clone->cmd_len = rq->cmd_len;
+	clone->sense = rq->sense;
+	clone->buffer = rq->buffer;
+	clone->end_io = end_clone_request;
+	clone->end_io_data = tio;
+
+	return 0;
+}
+
+static int dm_rq_flush_suspending(struct mapped_device *md)
+{
+	return !md->suspend_rq.special;
+}
+
+/*
+ * Called with the queue lock held.
+ */
+static int dm_prep_fn(struct request_queue *q, struct request *rq)
+{
+	struct mapped_device *md = q->queuedata;
+	struct dm_rq_target_io *tio;
+	struct request *clone;
+
+	if (unlikely(rq == &md->suspend_rq)) {
+		if (dm_rq_flush_suspending(md))
+			return BLKPREP_OK;
+		else
+			/* The flush suspend was interrupted */
+			return BLKPREP_KILL;
+	}
+
+	if (unlikely(rq->special)) {
+		DMWARN("Already has something in rq->special.");
+		return BLKPREP_KILL;
+	}
+
+	tio = alloc_rq_tio(md); /* Only one for each original request */
+	if (!tio)
+		/* -ENOMEM */
+		return BLKPREP_DEFER;
+
+	tio->md = md;
+	tio->ti = NULL;
+	tio->orig = rq;
+	tio->error = 0;
+	memset(&tio->info, 0, sizeof(tio->info));
+
+	clone = &tio->clone;
+	if (setup_clone(clone, rq, tio)) {
+		/* -ENOMEM */
+		free_rq_tio(tio);
+		return BLKPREP_DEFER;
+	}
+
+	rq->special = clone;
+	rq->cmd_flags |= REQ_DONTPREP;
+
+	return BLKPREP_OK;
+}
+
+static void map_request(struct dm_target *ti, struct request *rq,
+			struct mapped_device *md)
+{
+	int r;
+	struct request *clone = rq->special;
+	struct dm_rq_target_io *tio = clone->end_io_data;
+
+	/*
+	 * Hold the md reference here for the in-flight I/O.
+	 * We can't rely on the reference count by device opener,
+	 * because the device may be closed during the request completion
+	 * when all bios are completed.
+	 * See the comment in rq_completed() too.
+	 */
+	dm_get(md);
+
+	tio->ti = ti;
+	r = ti->type->map_rq(ti, clone, &tio->info);
+	switch (r) {
+	case DM_MAPIO_SUBMITTED:
+		/* The target has taken the I/O to submit by itself later */
+		break;
+	case DM_MAPIO_REMAPPED:
+		/* The target has remapped the I/O so dispatch it */
+		dm_dispatch_request(clone);
+		break;
+	case DM_MAPIO_REQUEUE:
+		/* The target wants to requeue the I/O */
+		dm_requeue_unmapped_request(clone);
+		break;
+	default:
+		if (r > 0) {
+			DMWARN("unimplemented target map return value: %d", r);
+			BUG();
+		}
+
+		/* The target wants to complete the I/O */
+		dm_kill_unmapped_request(clone, r);
+		break;
+	}
+}
+
+/*
+ * q->request_fn for request-based dm.
+ * Called with the queue lock held.
+ */
+static void dm_request_fn(struct request_queue *q)
+{
+	struct mapped_device *md = q->queuedata;
+	struct dm_table *map = dm_get_table(md);
+	struct dm_target *ti;
+	struct request *rq;
+
+	/*
+	 * For noflush suspend, check blk_queue_stopped() to immediately
+	 * quit I/O dispatching.
+	 */
+	while (!blk_queue_plugged(q) && !blk_queue_stopped(q)) {
+		rq = blk_peek_request(q);
+		if (!rq)
+			goto plug_and_out;
+
+		if (unlikely(rq == &md->suspend_rq)) { /* Flush suspend maker */
+			if (queue_in_flight(q))
+				/* Not quiet yet.  Wait more */
+				goto plug_and_out;
+
+			/* This device should be quiet now */
+			__stop_queue(q);
+			blk_start_request(rq);
+			__blk_end_request_all(rq, 0);
+			wake_up(&md->wait);
+			goto out;
+		}
+
+		ti = dm_table_find_target(map, blk_rq_pos(rq));
+		if (ti->type->busy && ti->type->busy(ti))
+			goto plug_and_out;
+
+		blk_start_request(rq);
+		spin_unlock(q->queue_lock);
+		map_request(ti, rq, md);
+		spin_lock_irq(q->queue_lock);
+	}
+
+	goto out;
+
+plug_and_out:
+	if (!elv_queue_empty(q))
+		/* Some requests still remain, retry later */
+		blk_plug_device(q);
+
+out:
+	dm_table_put(map);
+
+	return;
+}
+
+int dm_underlying_device_busy(struct request_queue *q)
+{
+	return blk_lld_busy(q);
+}
+EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
+
+static int dm_lld_busy(struct request_queue *q)
+{
+	int r;
+	struct mapped_device *md = q->queuedata;
+	struct dm_table *map = dm_get_table(md);
+
+	if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
+		r = 1;
+	else
+		r = dm_table_any_busy_target(map);
+
+	dm_table_put(map);
+
+	return r;
+}
+
 static void dm_unplug_all(struct request_queue *q)
 {
 	struct mapped_device *md = q->queuedata;
 	struct dm_table *map = dm_get_table(md);
 
 	if (map) {
+		if (dm_request_based(md))
+			generic_unplug_device(q);
+
 		dm_table_unplug_all(map);
 		dm_table_put(map);
 	}
@@ -1055,7 +1607,16 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
 		map = dm_get_table(md);
 		if (map) {
-			r = dm_table_any_congested(map, bdi_bits);
+			/*
+			 * Request-based dm cares about only own queue for
+			 * the query about congestion status of request_queue
+			 */
+			if (dm_request_based(md))
+				r = md->queue->backing_dev_info.state &
+				    bdi_bits;
+			else
+				r = dm_table_any_congested(map, bdi_bits);
+
 			dm_table_put(map);
 		}
 	}
@@ -1458,6 +2019,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 {
 	int r = 0;
 	DECLARE_WAITQUEUE(wait, current);
+	struct request_queue *q = md->queue;
+	unsigned long flags;
 
 	dm_unplug_all(md->queue);
 
@@ -1467,7 +2030,14 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 		set_current_state(interruptible);
 
 		smp_mb();
-		if (!atomic_read(&md->pending))
+		if (dm_request_based(md)) {
+			spin_lock_irqsave(q->queue_lock, flags);
+			if (!queue_in_flight(q) && blk_queue_stopped(q)) {
+				spin_unlock_irqrestore(q->queue_lock, flags);
+				break;
+			}
+			spin_unlock_irqrestore(q->queue_lock, flags);
+		} else if (!atomic_read(&md->pending))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
@@ -1584,6 +2154,67 @@ out:
 	return r;
 }
 
+static void dm_rq_invalidate_suspend_marker(struct mapped_device *md)
+{
+	md->suspend_rq.special = (void *)0x1;
+}
+
+static void dm_rq_abort_suspend(struct mapped_device *md, int noflush)
+{
+	struct request_queue *q = md->queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!noflush)
+		dm_rq_invalidate_suspend_marker(md);
+	__start_queue(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_rq_start_suspend(struct mapped_device *md, int noflush)
+{
+	struct request *rq = &md->suspend_rq;
+	struct request_queue *q = md->queue;
+
+	if (noflush)
+		stop_queue(q);
+	else {
+		blk_rq_init(q, rq);
+		blk_insert_request(q, rq, 0, NULL);
+	}
+}
+
+static int dm_rq_suspend_available(struct mapped_device *md, int noflush)
+{
+	int r = 1;
+	struct request *rq = &md->suspend_rq;
+	struct request_queue *q = md->queue;
+	unsigned long flags;
+
+	if (noflush)
+		return r;
+
+	/* The marker must be protected by queue lock if it is in use */
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (unlikely(rq->ref_count)) {
+		/*
+		 * This can happen, when the previous flush suspend was
+		 * interrupted, the marker is still in the queue and
+		 * this flush suspend has been invoked, because we don't
+		 * remove the marker at the time of suspend interruption.
+		 * We have only one marker per mapped_device, so we can't
+		 * start another flush suspend while it is in use.
+		 */
+		BUG_ON(!rq->special); /* The marker should be invalidated */
+		DMWARN("Invalidating the previous flush suspend is still in"
+		       " progress.  Please retry later.");
+		r = 0;
+	}
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return r;
+}
+
 /*
  * Functions to lock and unlock any filesystem running on the
  * device.
@@ -1623,6 +2254,53 @@ static void unlock_fs(struct mapped_device *md)
  * dm_bind_table, dm_suspend must be called to flush any in
  * flight bios and ensure that any further io gets deferred.
  */
+/*
+ * Suspend mechanism in request-based dm.
+ *
+ * After the suspend starts, further incoming requests are kept in
+ * the request_queue and deferred.
+ * Remaining requests in the request_queue at the start of suspend are flushed
+ * if it is flush suspend.
+ * The suspend completes when the following conditions have been satisfied,
+ * so wait for it:
+ *    1. q->in_flight is 0 (which means no in_flight request)
+ *    2. queue has been stopped (which means no request dispatching)
+ *
+ *
+ * Noflush suspend
+ * ---------------
+ * Noflush suspend doesn't need to dispatch remaining requests.
+ * So stop the queue immediately.  Then, wait for all in_flight requests
+ * to be completed or requeued.
+ *
+ * To abort noflush suspend, start the queue.
+ *
+ *
+ * Flush suspend
+ * -------------
+ * Flush suspend needs to dispatch remaining requests.  So stop the queue
+ * after the remaining requests are completed. (Requeued request must be also
+ * re-dispatched and completed.  Until then, we can't stop the queue.)
+ *
+ * During flushing the remaining requests, further incoming requests are also
+ * inserted to the same queue.  To distinguish which requests are to be
+ * flushed, we insert a marker request to the queue at the time of starting
+ * flush suspend, like a barrier.
+ * The dispatching is blocked when the marker is found on the top of the queue.
+ * And the queue is stopped when all in_flight requests are completed, since
+ * that means the remaining requests are completely flushed.
+ * Then, the marker is removed from the queue.
+ *
+ * To abort flush suspend, we also need to take care of the marker, not only
+ * starting the queue.
+ * We don't remove the marker forcibly from the queue since it's against
+ * the block-layer manner.  Instead, we put a invalidated mark on the marker.
+ * When the invalidated marker is found on the top of the queue, it is
+ * immediately removed from the queue, so it doesn't block dispatching.
+ * Because we have only one marker per mapped_device, we can't start another
+ * flush suspend until the invalidated marker is removed from the queue.
+ * So fail and return with -EBUSY in such a case.
+ */
 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
 	struct dm_table *map = NULL;
@@ -1637,6 +2315,11 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 		goto out_unlock;
 	}
 
+	if (dm_request_based(md) && !dm_rq_suspend_available(md, noflush)) {
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
 	map = dm_get_table(md);
 
 	/*
@@ -1682,6 +2365,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 
 	flush_workqueue(md->wq);
 
+	if (dm_request_based(md))
+		dm_rq_start_suspend(md, noflush);
+
 	/*
 	 * At this point no more requests are entering target request routines.
 	 * We call dm_wait_for_completion to wait for all existing requests
@@ -1698,6 +2384,9 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 	if (r < 0) {
 		dm_queue_flush(md);
 
+		if (dm_request_based(md))
+			dm_rq_abort_suspend(md, noflush);
+
 		unlock_fs(md);
 		goto out; /* pushback list is already flushed, so skip flush */
 	}
@@ -1739,6 +2428,14 @@ int dm_resume(struct mapped_device *md)
 
 	dm_queue_flush(md);
 
+	/*
+	 * Flushing deferred I/Os must be done after targets are resumed
+	 * so that mapping of targets can work correctly.
+	 * Request-based dm is queueing the deferred I/Os in its request_queue.
+	 */
+	if (dm_request_based(md))
+		start_queue(md->queue);
+
 	unlock_fs(md);
 
 	clear_bit(DMF_SUSPENDED, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 604e85caadf..8dcabb1caff 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -50,6 +50,7 @@ void dm_table_presuspend_targets(struct dm_table *t);
 void dm_table_postsuspend_targets(struct dm_table *t);
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
+int dm_table_any_busy_target(struct dm_table *t);
 
 /*
  * To check the return value from dm_table_find_target().
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e6bf3b8c7bf..0d6310657f3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -234,6 +234,7 @@ struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct mapped_device *md);
 int dm_noflush_suspending(struct dm_target *ti);
 union map_info *dm_get_mapinfo(struct bio *bio);
+union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 /*
  * Geometry functions.
@@ -396,4 +397,12 @@ static inline unsigned long to_bytes(sector_t n)
 	return (n << SECTOR_SHIFT);
 }
 
+/*-----------------------------------------------------------------
+ * Helper for block layer and dm core operations
+ *---------------------------------------------------------------*/
+void dm_dispatch_request(struct request *rq);
+void dm_requeue_unmapped_request(struct request *rq);
+void dm_kill_unmapped_request(struct request *rq, int error);
+int dm_underlying_device_busy(struct request_queue *q);
+
 #endif	/* _LINUX_DEVICE_MAPPER_H */
-- 
cgit v1.2.3-70-g09d2


From 099d5270897606473d63091afcc63f53ee1894bc Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Mon, 22 Jun 2009 18:42:42 +0100
Subject: serial@ add OMAP wakeup-enable register

Add the wakeup enable register to the list of OMAP-specific UART
registers.  This is to support forthcoming OMAP PM enhancements which
use the wakeup feature of the OMAP's 8250-based UART.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial_reg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 96c0d93fc2c..850db2e8051 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -323,6 +323,7 @@
 #define UART_OMAP_MVER		0x14	/* Module version register */
 #define UART_OMAP_SYSC		0x15	/* System configuration register */
 #define UART_OMAP_SYSS		0x16	/* System status register */
+#define UART_OMAP_WER		0x17	/* Wake-up enable register */
 
 #endif /* _LINUX_SERIAL_REG_H */
 
-- 
cgit v1.2.3-70-g09d2


From 04896a77a97b87e1611dedd61be88264ef4ac96c Mon Sep 17 00:00:00 2001
From: Robert Love <rlove@google.com>
Date: Mon, 22 Jun 2009 18:43:11 +0100
Subject: msm_serial: serial driver for MSM7K onboard serial peripheral.

Signed-off-by: Brian Swetland <swetland@google.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/Kconfig      |  10 +
 drivers/serial/Makefile     |   1 +
 drivers/serial/msm_serial.c | 767 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/serial/msm_serial.h | 117 +++++++
 include/linux/serial_core.h |   3 +
 5 files changed, 898 insertions(+)
 create mode 100644 drivers/serial/msm_serial.c
 create mode 100644 drivers/serial/msm_serial.h

(limited to 'include')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 1132c5cae7a..037c1e0b7c4 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1320,6 +1320,16 @@ config SERIAL_SGI_IOC3
 	  If you have an SGI Altix with an IOC3 serial card,
 	  say Y or M.  Otherwise, say N.
 
+config SERIAL_MSM
+	bool "MSM on-chip serial port support"
+	depends on ARM && ARCH_MSM
+	select SERIAL_CORE
+
+config SERIAL_MSM_CONSOLE
+	bool "MSM serial console support"
+	depends on SERIAL_MSM=y
+	select SERIAL_CORE_CONSOLE
+
 config SERIAL_NETX
 	tristate "NetX serial port support"
 	depends on ARM && ARCH_NETX
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 45a8658f54d..d5a29981c6c 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o
 obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o
 obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o
 obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
+obj-$(CONFIG_SERIAL_MSM) += msm_serial.o
 obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
 obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o
 obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
diff --git a/drivers/serial/msm_serial.c b/drivers/serial/msm_serial.c
new file mode 100644
index 00000000000..1a7c856f76f
--- /dev/null
+++ b/drivers/serial/msm_serial.c
@@ -0,0 +1,767 @@
+/*
+ * drivers/serial/msm_serial.c - driver for msm7k serial device and console
+ *
+ * Copyright (C) 2007 Google, Inc.
+ * Author: Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#if defined(CONFIG_SERIAL_MSM_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+# define SUPPORT_SYSRQ
+#endif
+
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include "msm_serial.h"
+
+struct msm_port {
+	struct uart_port	uart;
+	char			name[16];
+	struct clk		*clk;
+	unsigned int		imr;
+};
+
+#define UART_TO_MSM(uart_port)	((struct msm_port *) uart_port)
+
+static inline void msm_write(struct uart_port *port, unsigned int val,
+			     unsigned int off)
+{
+	__raw_writel(val, port->membase + off);
+}
+
+static inline unsigned int msm_read(struct uart_port *port, unsigned int off)
+{
+	return __raw_readl(port->membase + off);
+}
+
+static void msm_stop_tx(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	msm_port->imr &= ~UART_IMR_TXLEV;
+	msm_write(port, msm_port->imr, UART_IMR);
+}
+
+static void msm_start_tx(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	msm_port->imr |= UART_IMR_TXLEV;
+	msm_write(port, msm_port->imr, UART_IMR);
+}
+
+static void msm_stop_rx(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	msm_port->imr &= ~(UART_IMR_RXLEV | UART_IMR_RXSTALE);
+	msm_write(port, msm_port->imr, UART_IMR);
+}
+
+static void msm_enable_ms(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	msm_port->imr |= UART_IMR_DELTA_CTS;
+	msm_write(port, msm_port->imr, UART_IMR);
+}
+
+static void handle_rx(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+	unsigned int sr;
+
+	/*
+	 * Handle overrun. My understanding of the hardware is that overrun
+	 * is not tied to the RX buffer, so we handle the case out of band.
+	 */
+	if ((msm_read(port, UART_SR) & UART_SR_OVERRUN)) {
+		port->icount.overrun++;
+		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR);
+	}
+
+	/* and now the main RX loop */
+	while ((sr = msm_read(port, UART_SR)) & UART_SR_RX_READY) {
+		unsigned int c;
+		char flag = TTY_NORMAL;
+
+		c = msm_read(port, UART_RF);
+
+		if (sr & UART_SR_RX_BREAK) {
+			port->icount.brk++;
+			if (uart_handle_break(port))
+				continue;
+		} else if (sr & UART_SR_PAR_FRAME_ERR) {
+			port->icount.frame++;
+		} else {
+			port->icount.rx++;
+		}
+
+		/* Mask conditions we're ignorning. */
+		sr &= port->read_status_mask;
+
+		if (sr & UART_SR_RX_BREAK) {
+			flag = TTY_BREAK;
+		} else if (sr & UART_SR_PAR_FRAME_ERR) {
+			flag = TTY_FRAME;
+		}
+
+		if (!uart_handle_sysrq_char(port, c))
+			tty_insert_flip_char(tty, c, flag);
+	}
+
+	tty_flip_buffer_push(tty);
+}
+
+static void handle_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->info->xmit;
+	struct msm_port *msm_port = UART_TO_MSM(port);
+	int sent_tx;
+
+	if (port->x_char) {
+		msm_write(port, port->x_char, UART_TF);
+		port->icount.tx++;
+		port->x_char = 0;
+	}
+
+	while (msm_read(port, UART_SR) & UART_SR_TX_READY) {
+		if (uart_circ_empty(xmit)) {
+			/* disable tx interrupts */
+			msm_port->imr &= ~UART_IMR_TXLEV;
+			msm_write(port, msm_port->imr, UART_IMR);
+			break;
+		}
+
+		msm_write(port, xmit->buf[xmit->tail], UART_TF);
+
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		sent_tx = 1;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+}
+
+static void handle_delta_cts(struct uart_port *port)
+{
+	msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
+	port->icount.cts++;
+	wake_up_interruptible(&port->info->delta_msr_wait);
+}
+
+static irqreturn_t msm_irq(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	struct msm_port *msm_port = UART_TO_MSM(port);
+	unsigned int misr;
+
+	spin_lock(&port->lock);
+	misr = msm_read(port, UART_MISR);
+	msm_write(port, 0, UART_IMR); /* disable interrupt */
+
+	if (misr & (UART_IMR_RXLEV | UART_IMR_RXSTALE))
+		handle_rx(port);
+	if (misr & UART_IMR_TXLEV)
+		handle_tx(port);
+	if (misr & UART_IMR_DELTA_CTS)
+		handle_delta_cts(port);
+
+	msm_write(port, msm_port->imr, UART_IMR); /* restore interrupt */
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int msm_tx_empty(struct uart_port *port)
+{
+	return (msm_read(port, UART_SR) & UART_SR_TX_EMPTY) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int msm_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CAR | TIOCM_CTS | TIOCM_DSR | TIOCM_RTS;
+}
+
+static void msm_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	unsigned int mr;
+
+	mr = msm_read(port, UART_MR1);
+
+	if (!(mctrl & TIOCM_RTS)) {
+		mr &= ~UART_MR1_RX_RDY_CTL;
+		msm_write(port, mr, UART_MR1);
+		msm_write(port, UART_CR_CMD_RESET_RFR, UART_CR);
+	} else {
+		mr |= UART_MR1_RX_RDY_CTL;
+		msm_write(port, mr, UART_MR1);
+	}
+}
+
+static void msm_break_ctl(struct uart_port *port, int break_ctl)
+{
+	if (break_ctl)
+		msm_write(port, UART_CR_CMD_START_BREAK, UART_CR);
+	else
+		msm_write(port, UART_CR_CMD_STOP_BREAK, UART_CR);
+}
+
+static void msm_set_baud_rate(struct uart_port *port, unsigned int baud)
+{
+	unsigned int baud_code, rxstale, watermark;
+
+	switch (baud) {
+	case 300:
+		baud_code = UART_CSR_300;
+		rxstale = 1;
+		break;
+	case 600:
+		baud_code = UART_CSR_600;
+		rxstale = 1;
+		break;
+	case 1200:
+		baud_code = UART_CSR_1200;
+		rxstale = 1;
+		break;
+	case 2400:
+		baud_code = UART_CSR_2400;
+		rxstale = 1;
+		break;
+	case 4800:
+		baud_code = UART_CSR_4800;
+		rxstale = 1;
+		break;
+	case 9600:
+		baud_code = UART_CSR_9600;
+		rxstale = 2;
+		break;
+	case 14400:
+		baud_code = UART_CSR_14400;
+		rxstale = 3;
+		break;
+	case 19200:
+		baud_code = UART_CSR_19200;
+		rxstale = 4;
+		break;
+	case 28800:
+		baud_code = UART_CSR_28800;
+		rxstale = 6;
+		break;
+	case 38400:
+		baud_code = UART_CSR_38400;
+		rxstale = 8;
+		break;
+	case 57600:
+		baud_code = UART_CSR_57600;
+		rxstale = 16;
+		break;
+	case 115200:
+	default:
+		baud_code = UART_CSR_115200;
+		rxstale = 31;
+		break;
+	}
+
+	msm_write(port, baud_code, UART_CSR);
+
+	/* RX stale watermark */
+	watermark = UART_IPR_STALE_LSB & rxstale;
+	watermark |= UART_IPR_RXSTALE_LAST;
+	watermark |= UART_IPR_STALE_TIMEOUT_MSB & (rxstale << 2);
+	msm_write(port, watermark, UART_IPR);
+
+	/* set RX watermark */
+	watermark = (port->fifosize * 3) / 4;
+	msm_write(port, watermark, UART_RFWR);
+
+	/* set TX watermark */
+	msm_write(port, 10, UART_TFWR);
+}
+
+static void msm_reset(struct uart_port *port)
+{
+	/* reset everything */
+	msm_write(port, UART_CR_CMD_RESET_RX, UART_CR);
+	msm_write(port, UART_CR_CMD_RESET_TX, UART_CR);
+	msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR);
+	msm_write(port, UART_CR_CMD_RESET_BREAK_INT, UART_CR);
+	msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
+	msm_write(port, UART_CR_CMD_SET_RFR, UART_CR);
+}
+
+static void msm_init_clock(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	clk_enable(msm_port->clk);
+
+	msm_write(port, 0xC0, UART_MREG);
+	msm_write(port, 0xB2, UART_NREG);
+	msm_write(port, 0x7D, UART_DREG);
+	msm_write(port, 0x1C, UART_MNDREG);
+}
+
+static int msm_startup(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+	unsigned int data, rfr_level;
+	int ret;
+
+	snprintf(msm_port->name, sizeof(msm_port->name),
+		 "msm_serial%d", port->line);
+
+	ret = request_irq(port->irq, msm_irq, IRQF_TRIGGER_HIGH,
+			  msm_port->name, port);
+	if (unlikely(ret))
+		return ret;
+
+	msm_init_clock(port);
+
+	if (likely(port->fifosize > 12))
+		rfr_level = port->fifosize - 12;
+	else
+		rfr_level = port->fifosize;
+
+	/* set automatic RFR level */
+	data = msm_read(port, UART_MR1);
+	data &= ~UART_MR1_AUTO_RFR_LEVEL1;
+	data &= ~UART_MR1_AUTO_RFR_LEVEL0;
+	data |= UART_MR1_AUTO_RFR_LEVEL1 & (rfr_level << 2);
+	data |= UART_MR1_AUTO_RFR_LEVEL0 & rfr_level;
+	msm_write(port, data, UART_MR1);
+
+	/* make sure that RXSTALE count is non-zero */
+	data = msm_read(port, UART_IPR);
+	if (unlikely(!data)) {
+		data |= UART_IPR_RXSTALE_LAST;
+		data |= UART_IPR_STALE_LSB;
+		msm_write(port, data, UART_IPR);
+	}
+
+	msm_reset(port);
+
+	msm_write(port, 0x05, UART_CR);	/* enable TX & RX */
+
+	/* turn on RX and CTS interrupts */
+	msm_port->imr = UART_IMR_RXLEV | UART_IMR_RXSTALE |
+			UART_IMR_CURRENT_CTS;
+	msm_write(port, msm_port->imr, UART_IMR);
+
+	return 0;
+}
+
+static void msm_shutdown(struct uart_port *port)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	msm_port->imr = 0;
+	msm_write(port, 0, UART_IMR); /* disable interrupts */
+
+	clk_disable(msm_port->clk);
+
+	free_irq(port->irq, port);
+}
+
+static void msm_set_termios(struct uart_port *port, struct ktermios *termios,
+			    struct ktermios *old)
+{
+	unsigned long flags;
+	unsigned int baud, mr;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* calculate and set baud rate */
+	baud = uart_get_baud_rate(port, termios, old, 300, 115200);
+	msm_set_baud_rate(port, baud);
+
+	/* calculate parity */
+	mr = msm_read(port, UART_MR2);
+	mr &= ~UART_MR2_PARITY_MODE;
+	if (termios->c_cflag & PARENB) {
+		if (termios->c_cflag & PARODD)
+			mr |= UART_MR2_PARITY_MODE_ODD;
+		else if (termios->c_cflag & CMSPAR)
+			mr |= UART_MR2_PARITY_MODE_SPACE;
+		else
+			mr |= UART_MR2_PARITY_MODE_EVEN;
+	}
+
+	/* calculate bits per char */
+	mr &= ~UART_MR2_BITS_PER_CHAR;
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		mr |= UART_MR2_BITS_PER_CHAR_5;
+		break;
+	case CS6:
+		mr |= UART_MR2_BITS_PER_CHAR_6;
+		break;
+	case CS7:
+		mr |= UART_MR2_BITS_PER_CHAR_7;
+		break;
+	case CS8:
+	default:
+		mr |= UART_MR2_BITS_PER_CHAR_8;
+		break;
+	}
+
+	/* calculate stop bits */
+	mr &= ~(UART_MR2_STOP_BIT_LEN_ONE | UART_MR2_STOP_BIT_LEN_TWO);
+	if (termios->c_cflag & CSTOPB)
+		mr |= UART_MR2_STOP_BIT_LEN_TWO;
+	else
+		mr |= UART_MR2_STOP_BIT_LEN_ONE;
+
+	/* set parity, bits per char, and stop bit */
+	msm_write(port, mr, UART_MR2);
+
+	/* calculate and set hardware flow control */
+	mr = msm_read(port, UART_MR1);
+	mr &= ~(UART_MR1_CTS_CTL | UART_MR1_RX_RDY_CTL);
+	if (termios->c_cflag & CRTSCTS) {
+		mr |= UART_MR1_CTS_CTL;
+		mr |= UART_MR1_RX_RDY_CTL;
+	}
+	msm_write(port, mr, UART_MR1);
+
+	/* Configure status bits to ignore based on termio flags. */
+	port->read_status_mask = 0;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= UART_SR_PAR_FRAME_ERR;
+	if (termios->c_iflag & (BRKINT | PARMRK))
+		port->read_status_mask |= UART_SR_RX_BREAK;
+
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *msm_type(struct uart_port *port)
+{
+	return "MSM";
+}
+
+static void msm_release_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	struct resource *resource;
+	resource_size_t size;
+
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(!resource))
+		return;
+	size = resource->end - resource->start + 1;
+
+	release_mem_region(port->mapbase, size);
+	iounmap(port->membase);
+	port->membase = NULL;
+}
+
+static int msm_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	struct resource *resource;
+	resource_size_t size;
+
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(!resource))
+		return -ENXIO;
+	size = resource->end - resource->start + 1;
+
+	if (unlikely(!request_mem_region(port->mapbase, size, "msm_serial")))
+		return -EBUSY;
+
+	port->membase = ioremap(port->mapbase, size);
+	if (!port->membase) {
+		release_mem_region(port->mapbase, size);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void msm_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_MSM;
+		msm_request_port(port);
+	}
+}
+
+static int msm_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+	if (unlikely(ser->type != PORT_UNKNOWN && ser->type != PORT_MSM))
+		return -EINVAL;
+	if (unlikely(port->irq != ser->irq))
+		return -EINVAL;
+	return 0;
+}
+
+static void msm_power(struct uart_port *port, unsigned int state,
+		      unsigned int oldstate)
+{
+	struct msm_port *msm_port = UART_TO_MSM(port);
+
+	switch (state) {
+	case 0:
+		clk_enable(msm_port->clk);
+		break;
+	case 3:
+		clk_disable(msm_port->clk);
+		break;
+	default:
+		printk(KERN_ERR "msm_serial: Unknown PM state %d\n", state);
+	}
+}
+
+static struct uart_ops msm_uart_pops = {
+	.tx_empty = msm_tx_empty,
+	.set_mctrl = msm_set_mctrl,
+	.get_mctrl = msm_get_mctrl,
+	.stop_tx = msm_stop_tx,
+	.start_tx = msm_start_tx,
+	.stop_rx = msm_stop_rx,
+	.enable_ms = msm_enable_ms,
+	.break_ctl = msm_break_ctl,
+	.startup = msm_startup,
+	.shutdown = msm_shutdown,
+	.set_termios = msm_set_termios,
+	.type = msm_type,
+	.release_port = msm_release_port,
+	.request_port = msm_request_port,
+	.config_port = msm_config_port,
+	.verify_port = msm_verify_port,
+	.pm = msm_power,
+};
+
+static struct msm_port msm_uart_ports[] = {
+	{
+		.uart = {
+			.iotype = UPIO_MEM,
+			.ops = &msm_uart_pops,
+			.flags = UPF_BOOT_AUTOCONF,
+			.fifosize = 512,
+			.line = 0,
+		},
+	},
+	{
+		.uart = {
+			.iotype = UPIO_MEM,
+			.ops = &msm_uart_pops,
+			.flags = UPF_BOOT_AUTOCONF,
+			.fifosize = 512,
+			.line = 1,
+		},
+	},
+	{
+		.uart = {
+			.iotype = UPIO_MEM,
+			.ops = &msm_uart_pops,
+			.flags = UPF_BOOT_AUTOCONF,
+			.fifosize = 64,
+			.line = 2,
+		},
+	},
+};
+
+#define UART_NR	ARRAY_SIZE(msm_uart_ports)
+
+static inline struct uart_port *get_port_from_line(unsigned int line)
+{
+	return &msm_uart_ports[line].uart;
+}
+
+#ifdef CONFIG_SERIAL_MSM_CONSOLE
+
+static void msm_console_putchar(struct uart_port *port, int c)
+{
+	while (!(msm_read(port, UART_SR) & UART_SR_TX_READY))
+		;
+	msm_write(port, c, UART_TF);
+}
+
+static void msm_console_write(struct console *co, const char *s,
+			      unsigned int count)
+{
+	struct uart_port *port;
+	struct msm_port *msm_port;
+
+	BUG_ON(co->index < 0 || co->index >= UART_NR);
+
+	port = get_port_from_line(co->index);
+	msm_port = UART_TO_MSM(port);
+
+	spin_lock(&port->lock);
+	uart_console_write(port, s, count, msm_console_putchar);
+	spin_unlock(&port->lock);
+}
+
+static int __init msm_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud, flow, bits, parity;
+
+	if (unlikely(co->index >= UART_NR || co->index < 0))
+		return -ENXIO;
+
+	port = get_port_from_line(co->index);
+
+	if (unlikely(!port->membase))
+		return -ENXIO;
+
+	port->cons = co;
+
+	msm_init_clock(port);
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	bits = 8;
+	parity = 'n';
+	flow = 'n';
+	msm_write(port, UART_MR2_BITS_PER_CHAR_8 | UART_MR2_STOP_BIT_LEN_ONE,
+		  UART_MR2);	/* 8N1 */
+
+	if (baud < 300 || baud > 115200)
+		baud = 115200;
+	msm_set_baud_rate(port, baud);
+
+	msm_reset(port);
+
+	printk(KERN_INFO "msm_serial: console setup on port #%d\n", port->line);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver msm_uart_driver;
+
+static struct console msm_console = {
+	.name = "ttyMSM",
+	.write = msm_console_write,
+	.device = uart_console_device,
+	.setup = msm_console_setup,
+	.flags = CON_PRINTBUFFER,
+	.index = -1,
+	.data = &msm_uart_driver,
+};
+
+#define MSM_CONSOLE	(&msm_console)
+
+#else
+#define MSM_CONSOLE	NULL
+#endif
+
+static struct uart_driver msm_uart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "msm_serial",
+	.dev_name = "ttyMSM",
+	.nr = UART_NR,
+	.cons = MSM_CONSOLE,
+};
+
+static int __init msm_serial_probe(struct platform_device *pdev)
+{
+	struct msm_port *msm_port;
+	struct resource *resource;
+	struct uart_port *port;
+
+	if (unlikely(pdev->id < 0 || pdev->id >= UART_NR))
+		return -ENXIO;
+
+	printk(KERN_INFO "msm_serial: detected port #%d\n", pdev->id);
+
+	port = get_port_from_line(pdev->id);
+	port->dev = &pdev->dev;
+	msm_port = UART_TO_MSM(port);
+
+	msm_port->clk = clk_get(&pdev->dev, "uart_clk");
+	if (unlikely(IS_ERR(msm_port->clk)))
+		return PTR_ERR(msm_port->clk);
+	port->uartclk = clk_get_rate(msm_port->clk);
+
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(!resource))
+		return -ENXIO;
+	port->mapbase = resource->start;
+
+	port->irq = platform_get_irq(pdev, 0);
+	if (unlikely(port->irq < 0))
+		return -ENXIO;
+
+	platform_set_drvdata(pdev, port);
+
+	return uart_add_one_port(&msm_uart_driver, port);
+}
+
+static int __devexit msm_serial_remove(struct platform_device *pdev)
+{
+	struct msm_port *msm_port = platform_get_drvdata(pdev);
+
+	clk_put(msm_port->clk);
+
+	return 0;
+}
+
+static struct platform_driver msm_platform_driver = {
+	.probe = msm_serial_probe,
+	.remove = msm_serial_remove,
+	.driver = {
+		.name = "msm_serial",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init msm_serial_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&msm_uart_driver);
+	if (unlikely(ret))
+		return ret;
+
+	ret = platform_driver_probe(&msm_platform_driver, msm_serial_probe);
+	if (unlikely(ret))
+		uart_unregister_driver(&msm_uart_driver);
+
+	printk(KERN_INFO "msm_serial: driver initialized\n");
+
+	return ret;
+}
+
+static void __exit msm_serial_exit(void)
+{
+#ifdef CONFIG_SERIAL_MSM_CONSOLE
+	unregister_console(&msm_console);
+#endif
+	platform_driver_unregister(&msm_platform_driver);
+	uart_unregister_driver(&msm_uart_driver);
+}
+
+module_init(msm_serial_init);
+module_exit(msm_serial_exit);
+
+MODULE_AUTHOR("Robert Love <rlove@google.com>");
+MODULE_DESCRIPTION("Driver for msm7x serial device");
+MODULE_LICENSE("GPL");
diff --git a/drivers/serial/msm_serial.h b/drivers/serial/msm_serial.h
new file mode 100644
index 00000000000..689f1fa0e84
--- /dev/null
+++ b/drivers/serial/msm_serial.h
@@ -0,0 +1,117 @@
+/*
+ * drivers/serial/msm_serial.h
+ *
+ * Copyright (C) 2007 Google, Inc.
+ * Author: Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DRIVERS_SERIAL_MSM_SERIAL_H
+#define __DRIVERS_SERIAL_MSM_SERIAL_H
+
+#define UART_MR1			0x0000
+
+#define UART_MR1_AUTO_RFR_LEVEL0	0x3F
+#define UART_MR1_AUTO_RFR_LEVEL1	0x3FF00
+#define UART_MR1_RX_RDY_CTL    		(1 << 7)
+#define UART_MR1_CTS_CTL       		(1 << 6)
+
+#define UART_MR2			0x0004
+#define UART_MR2_ERROR_MODE		(1 << 6)
+#define UART_MR2_BITS_PER_CHAR		0x30
+#define UART_MR2_BITS_PER_CHAR_5	(0x0 << 4)
+#define UART_MR2_BITS_PER_CHAR_6	(0x1 << 4)
+#define UART_MR2_BITS_PER_CHAR_7	(0x2 << 4)
+#define UART_MR2_BITS_PER_CHAR_8	(0x3 << 4)
+#define UART_MR2_STOP_BIT_LEN_ONE	(0x1 << 2)
+#define UART_MR2_STOP_BIT_LEN_TWO	(0x3 << 2)
+#define UART_MR2_PARITY_MODE_NONE	0x0
+#define UART_MR2_PARITY_MODE_ODD	0x1
+#define UART_MR2_PARITY_MODE_EVEN	0x2
+#define UART_MR2_PARITY_MODE_SPACE	0x3
+#define UART_MR2_PARITY_MODE		0x3
+
+#define UART_CSR	0x0008
+#define UART_CSR_115200	0xFF
+#define UART_CSR_57600	0xEE
+#define UART_CSR_38400	0xDD
+#define UART_CSR_28800	0xCC
+#define UART_CSR_19200	0xBB
+#define UART_CSR_14400	0xAA
+#define UART_CSR_9600	0x99
+#define UART_CSR_4800	0x77
+#define UART_CSR_2400	0x55
+#define UART_CSR_1200	0x44
+#define UART_CSR_600	0x33
+#define UART_CSR_300	0x22
+
+#define UART_TF		0x000C
+
+#define UART_CR				0x0010
+#define UART_CR_CMD_NULL		(0 << 4)
+#define UART_CR_CMD_RESET_RX		(1 << 4)
+#define UART_CR_CMD_RESET_TX		(2 << 4)
+#define UART_CR_CMD_RESET_ERR		(3 << 4)
+#define UART_CR_CMD_RESET_BREAK_INT	(4 << 4)
+#define UART_CR_CMD_START_BREAK		(5 << 4)
+#define UART_CR_CMD_STOP_BREAK		(6 << 4)
+#define UART_CR_CMD_RESET_CTS		(7 << 4)
+#define UART_CR_CMD_PACKET_MODE		(9 << 4)
+#define UART_CR_CMD_MODE_RESET		(12 << 4)
+#define UART_CR_CMD_SET_RFR		(13 << 4)
+#define UART_CR_CMD_RESET_RFR		(14 << 4)
+#define UART_CR_TX_DISABLE		(1 << 3)
+#define UART_CR_TX_ENABLE		(1 << 3)
+#define UART_CR_RX_DISABLE		(1 << 3)
+#define UART_CR_RX_ENABLE		(1 << 3)
+
+#define UART_IMR		0x0014
+#define UART_IMR_TXLEV		(1 << 0)
+#define UART_IMR_RXSTALE	(1 << 3)
+#define UART_IMR_RXLEV		(1 << 4)
+#define UART_IMR_DELTA_CTS	(1 << 5)
+#define UART_IMR_CURRENT_CTS	(1 << 6)
+
+#define UART_IPR_RXSTALE_LAST		0x20
+#define UART_IPR_STALE_LSB		0x1F
+#define UART_IPR_STALE_TIMEOUT_MSB	0x3FF80
+
+#define UART_IPR	0x0018
+#define UART_TFWR	0x001C
+#define UART_RFWR	0x0020
+#define UART_HCR	0x0024
+
+#define UART_MREG		0x0028
+#define UART_NREG		0x002C
+#define UART_DREG		0x0030
+#define UART_MNDREG		0x0034
+#define UART_IRDA		0x0038
+#define UART_MISR_MODE		0x0040
+#define UART_MISR_RESET		0x0044
+#define UART_MISR_EXPORT	0x0048
+#define UART_MISR_VAL		0x004C
+#define UART_TEST_CTRL		0x0050
+
+#define UART_SR			0x0008
+#define UART_SR_HUNT_CHAR	(1 << 7)
+#define UART_SR_RX_BREAK	(1 << 6)
+#define UART_SR_PAR_FRAME_ERR	(1 << 5)
+#define UART_SR_OVERRUN		(1 << 4)
+#define UART_SR_TX_EMPTY	(1 << 3)
+#define UART_SR_TX_READY	(1 << 2)
+#define UART_SR_RX_FULL		(1 << 1)
+#define UART_SR_RX_READY	(1 << 0)
+
+#define UART_RF		0x000C
+#define UART_MISR	0x0010
+#define UART_ISR	0x0014
+
+#endif	/* __DRIVERS_SERIAL_MSM_SERIAL_H */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 6fd80c4243f..23d2fb051f9 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -171,6 +171,9 @@
 /* Timberdale UART */
 #define PORT_TIMBUART	87
 
+/* Qualcomm MSM SoCs */
+#define PORT_MSM	88
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3-70-g09d2


From eadfe21989d728b5af936487627b4e288bd805f8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 Jun 2009 15:32:31 +0100
Subject: LDSCRIPT: Name INIT_RAM_FS consistently

In asm-generic/vmlinux.lds.h, name INIT_RAM_FS consistently, no matter the
setting of CONFIG_BLK_DEV_INITRD.  This corrects:

	commit ef53dae8658cf0e93d380983824a661067948d87
	Author: Sam Ravnborg <sam@ravnborg.org>
	Date:   Sun Jun 7 20:46:37 2009 +0200
	Subject: Improve vmlinux.lds.h support for arch specific linker scripts

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 55413e568f0..92b73b6140f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -625,7 +625,7 @@
 	*(.init.ramfs)							\
 	VMLINUX_SYMBOL(__initramfs_end) = .;
 #else
-#define INITRAMFS
+#define INIT_RAM_FS
 #endif
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 9a7aa12f3911853a3574d47d567b81a2a5df7208 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 4 Jun 2009 15:26:49 +0200
Subject: vfs: Set special lockdep map for dirs only if not set by fs

Some filesystems need to set lockdep map for i_mutex differently for
different directories. For example OCFS2 has system directories (for
orphan inode tracking and for gathering all system files like journal
or quota files into a single place) which have different locking
locking rules than standard directories. For a filesystem setting
lockdep map is naturaly done when the inode is read but we have to
modify unlock_new_inode() not to overwrite the lockdep map the filesystem
has set.

Acked-by: peterz@infradead.org
CC: mingo@redhat.com
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/inode.c              | 17 +++++++++++------
 include/linux/lockdep.h | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index f643be565df..04c785bb63c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -665,12 +665,17 @@ void unlock_new_inode(struct inode *inode)
 	if (inode->i_mode & S_IFDIR) {
 		struct file_system_type *type = inode->i_sb->s_type;
 
-		/*
-		 * ensure nobody is actually holding i_mutex
-		 */
-		mutex_destroy(&inode->i_mutex);
-		mutex_init(&inode->i_mutex);
-		lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+		/* Set new key only if filesystem hasn't already changed it */
+		if (!lockdep_match_class(&inode->i_mutex,
+		    &type->i_mutex_key)) {
+			/*
+			 * ensure nobody is actually holding i_mutex
+			 */
+			mutex_destroy(&inode->i_mutex);
+			mutex_init(&inode->i_mutex);
+			lockdep_set_class(&inode->i_mutex,
+					  &type->i_mutex_dir_key);
+		}
 	}
 #endif
 	/*
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index da5a5a1f4cd..b25d1b53df0 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -258,6 +258,16 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 #define lockdep_set_subclass(lock, sub)	\
 		lockdep_init_map(&(lock)->dep_map, #lock, \
 				 (lock)->dep_map.key, sub)
+/*
+ * Compare locking classes
+ */
+#define lockdep_match_class(lock, key) lockdep_match_key(&(lock)->dep_map, key)
+
+static inline int lockdep_match_key(struct lockdep_map *lock,
+				    struct lock_class_key *key)
+{
+	return lock->key == key;
+}
 
 /*
  * Acquire a lock.
@@ -326,6 +336,11 @@ static inline void lockdep_on(void)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
 		do { (void)(key); } while (0)
 #define lockdep_set_subclass(lock, sub)		do { } while (0)
+/*
+ * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
+ * case since the result is not well defined and the caller should rather
+ * #ifdef the call himself.
+ */
 
 # define INIT_LOCKDEP
 # define lockdep_reset()		do { debug_locks = 1; } while (0)
-- 
cgit v1.2.3-70-g09d2


From 31950eb66ff47c946fd9c65c2f8c94b6b7ba13fc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 22 Jun 2009 21:18:12 -0700
Subject: mm/init: cpu_hotplug_init() must be initialized before SLAB

SLAB uses get/put_online_cpus() which use a mutex which is itself only
initialized when cpu_hotplug_init() is called.  Currently we hang suring
boot in SLAB due to doing that too late.

Reported by James Bottomley and Sachin Sant (and possibly others).
Debugged by Benjamin Herrenschmidt.

This just removes the dynamic initialization of the data structures, and
replaces it with a static one, avoiding this dependency entirely, and
removing one unnecessary special initcall.

Tested-by: Sachin Sant <sachinp@in.ibm.com>
Tested-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Tested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h |  5 -----
 init/main.c         |  1 -
 kernel/cpu.c        | 13 +++++--------
 3 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 2643d848df9..4d668e05d45 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,7 +69,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
 
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
-extern void cpu_hotplug_init(void);
 extern void cpu_maps_update_begin(void);
 extern void cpu_maps_update_done(void);
 
@@ -84,10 +83,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
 {
 }
 
-static inline void cpu_hotplug_init(void)
-{
-}
-
 static inline void cpu_maps_update_begin(void)
 {
 }
diff --git a/init/main.c b/init/main.c
index 09131ec090c..4870dfeb9ee 100644
--- a/init/main.c
+++ b/init/main.c
@@ -678,7 +678,6 @@ asmlinkage void __init start_kernel(void)
 #endif
 	page_cgroup_init();
 	enable_debug_pagealloc();
-	cpu_hotplug_init();
 	kmemtrace_init();
 	kmemleak_init();
 	debug_objects_mem_init();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 395b6974dc8..8ce10043e4a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,14 +34,11 @@ static struct {
 	 * an ongoing cpu hotplug operation.
 	 */
 	int refcount;
-} cpu_hotplug;
-
-void __init cpu_hotplug_init(void)
-{
-	cpu_hotplug.active_writer = NULL;
-	mutex_init(&cpu_hotplug.lock);
-	cpu_hotplug.refcount = 0;
-}
+} cpu_hotplug = {
+	.active_writer = NULL,
+	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+	.refcount = 0,
+};
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-- 
cgit v1.2.3-70-g09d2


From 616511d039af402670de8500d0e24495113a9cab Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 22 Jun 2009 15:09:13 -0400
Subject: VFS: Uninline the function put_mnt_ns()

In order to allow modules to use it without having to export vfsmount_lock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c                | 8 ++++++--
 include/linux/mnt_namespace.h | 9 +--------
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 2dd333b0fe7..6645846f205 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2246,10 +2246,14 @@ void __init mnt_init(void)
 	init_mount_tree();
 }
 
-void __put_mnt_ns(struct mnt_namespace *ns)
+void put_mnt_ns(struct mnt_namespace *ns)
 {
-	struct vfsmount *root = ns->root;
+	struct vfsmount *root;
 	LIST_HEAD(umount_list);
+
+	if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+		return;
+	root = ns->root;
 	ns->root = NULL;
 	spin_unlock(&vfsmount_lock);
 	down_write(&namespace_sem);
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 3a059298cc1..299d11af5f7 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -26,14 +26,7 @@ struct fs_struct;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
-extern void __put_mnt_ns(struct mnt_namespace *ns);
-
-static inline void put_mnt_ns(struct mnt_namespace *ns)
-{
-	if (atomic_dec_and_lock(&ns->count, &vfsmount_lock))
-		/* releases vfsmount_lock */
-		__put_mnt_ns(ns);
-}
+extern void put_mnt_ns(struct mnt_namespace *ns);
 
 static inline void exit_mnt_ns(struct task_struct *p)
 {
-- 
cgit v1.2.3-70-g09d2


From cf8d2c11cb77f129675478792122f50827e5b0ae Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 22 Jun 2009 15:09:13 -0400
Subject: VFS: Add VFS helper functions for setting up private namespaces

The purpose of this patch is to improve the remote mount path lookup
support for distributed filesystems such as the NFSv4 client.

When given a mount command of the form "mount server:/foo/bar /mnt", the
NFSv4 client is required to look up the filehandle for "server:/", and
then look up each component of the remote mount path "foo/bar" in order
to find the directory that is actually going to be mounted on /mnt.
Following that remote mount path may involve following symlinks,
crossing server-side mount points and even following referrals to
filesystem volumes on other servers.

Since the standard VFS path lookup code already supports walking paths
that contain all these features (using in-kernel automounts for
following referrals) we would like to be able to reuse that rather than
duplicate the full path traversal functionality in the NFSv4 client code.

This patch therefore defines a VFS helper function create_mnt_ns(), that
sets up a temporary filesystem namespace and attaches a root filesystem to
it. It exports the create_mnt_ns() and put_mnt_ns() function for use by
filesystem modules.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c                | 45 +++++++++++++++++++++++++++++++++++--------
 include/linux/mnt_namespace.h |  1 +
 2 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 6645846f205..a7bea8c8bd4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1937,6 +1937,21 @@ dput_out:
 	return retval;
 }
 
+static struct mnt_namespace *alloc_mnt_ns(void)
+{
+	struct mnt_namespace *new_ns;
+
+	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+	if (!new_ns)
+		return ERR_PTR(-ENOMEM);
+	atomic_set(&new_ns->count, 1);
+	new_ns->root = NULL;
+	INIT_LIST_HEAD(&new_ns->list);
+	init_waitqueue_head(&new_ns->poll);
+	new_ns->event = 0;
+	return new_ns;
+}
+
 /*
  * Allocate a new namespace structure and populate it with contents
  * copied from the namespace of the passed in task structure.
@@ -1948,14 +1963,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
 	struct vfsmount *p, *q;
 
-	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-	if (!new_ns)
-		return ERR_PTR(-ENOMEM);
-
-	atomic_set(&new_ns->count, 1);
-	INIT_LIST_HEAD(&new_ns->list);
-	init_waitqueue_head(&new_ns->poll);
-	new_ns->event = 0;
+	new_ns = alloc_mnt_ns();
+	if (IS_ERR(new_ns))
+		return new_ns;
 
 	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
@@ -2019,6 +2029,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	return new_ns;
 }
 
+/**
+ * create_mnt_ns - creates a private namespace and adds a root filesystem
+ * @mnt: pointer to the new root filesystem mountpoint
+ */
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+{
+	struct mnt_namespace *new_ns;
+
+	new_ns = alloc_mnt_ns();
+	if (!IS_ERR(new_ns)) {
+		mnt->mnt_ns = new_ns;
+		new_ns->root = mnt;
+		list_add(&new_ns->list, &new_ns->root->mnt_list);
+	}
+	return new_ns;
+}
+EXPORT_SYMBOL(create_mnt_ns);
+
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
 {
@@ -2264,3 +2292,4 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	release_mounts(&umount_list);
 	kfree(ns);
 }
+EXPORT_SYMBOL(put_mnt_ns);
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 299d11af5f7..3beb2592b03 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -24,6 +24,7 @@ struct proc_mounts {
 
 struct fs_struct;
 
+extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3-70-g09d2


From 02ab18b0f497bed623814677577b76cc97234085 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 14 Jun 2009 04:32:04 -0300
Subject: V4L/DVB (12072): gspca-ov519: add extra controls

This patch adds autobrightness (so that it can
be turned off to make the already present brightness
control work) and light frequency filtering controls.

The lightfreq control needed 2 different entries
in the ctrls array, as the number of options differs
depending on the sensor. Always one of the 2 entires is
disabled ofcourse.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/ov519.c | 214 ++++++++++++++++++++++++++++++++++++--
 include/linux/videodev2.h         |   3 +-
 2 files changed, 207 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/gspca/ov519.c b/drivers/media/video/gspca/ov519.c
index 188866ac6ce..baa488dd33d 100644
--- a/drivers/media/video/gspca/ov519.c
+++ b/drivers/media/video/gspca/ov519.c
@@ -65,6 +65,8 @@ struct sd {
 	__u8 colors;
 	__u8 hflip;
 	__u8 vflip;
+	__u8 autobrightness;
+	__u8 freq;
 
 	__u8 stopped;		/* Streaming is temporarily paused */
 
@@ -94,11 +96,17 @@ static int sd_sethflip(struct gspca_dev *gspca_dev, __s32 val);
 static int sd_gethflip(struct gspca_dev *gspca_dev, __s32 *val);
 static int sd_setvflip(struct gspca_dev *gspca_dev, __s32 val);
 static int sd_getvflip(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setautobrightness(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getautobrightness(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setfreq(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getfreq(struct gspca_dev *gspca_dev, __s32 *val);
 static void setbrightness(struct gspca_dev *gspca_dev);
 static void setcontrast(struct gspca_dev *gspca_dev);
 static void setcolors(struct gspca_dev *gspca_dev);
+static void setautobrightness(struct sd *sd);
+static void setfreq(struct sd *sd);
 
-static struct ctrl sd_ctrls[] = {
+static const struct ctrl sd_ctrls[] = {
 	{
 	    {
 		.id      = V4L2_CID_BRIGHTNESS,
@@ -141,7 +149,7 @@ static struct ctrl sd_ctrls[] = {
 	    .set = sd_setcolors,
 	    .get = sd_getcolors,
 	},
-/* next controls work with ov7670 only */
+/* The flip controls work with ov7670 only */
 #define HFLIP_IDX 3
 	{
 	    {
@@ -172,6 +180,51 @@ static struct ctrl sd_ctrls[] = {
 	    .set = sd_setvflip,
 	    .get = sd_getvflip,
 	},
+#define AUTOBRIGHT_IDX 5
+	{
+	    {
+		.id      = V4L2_CID_AUTOBRIGHTNESS,
+		.type    = V4L2_CTRL_TYPE_BOOLEAN,
+		.name    = "Auto Brightness",
+		.minimum = 0,
+		.maximum = 1,
+		.step    = 1,
+#define AUTOBRIGHT_DEF 1
+		.default_value = AUTOBRIGHT_DEF,
+	    },
+	    .set = sd_setautobrightness,
+	    .get = sd_getautobrightness,
+	},
+#define FREQ_IDX 6
+	{
+	    {
+		.id	 = V4L2_CID_POWER_LINE_FREQUENCY,
+		.type    = V4L2_CTRL_TYPE_MENU,
+		.name    = "Light frequency filter",
+		.minimum = 0,
+		.maximum = 2,	/* 0: 0, 1: 50Hz, 2:60Hz */
+		.step    = 1,
+#define FREQ_DEF 0
+		.default_value = FREQ_DEF,
+	    },
+	    .set = sd_setfreq,
+	    .get = sd_getfreq,
+	},
+#define OV7670_FREQ_IDX 7
+	{
+	    {
+		.id	 = V4L2_CID_POWER_LINE_FREQUENCY,
+		.type    = V4L2_CTRL_TYPE_MENU,
+		.name    = "Light frequency filter",
+		.minimum = 0,
+		.maximum = 3,	/* 0: 0, 1: 50Hz, 2:60Hz 3: Auto Hz */
+		.step    = 1,
+#define OV7670_FREQ_DEF 3
+		.default_value = OV7670_FREQ_DEF,
+	    },
+	    .set = sd_setfreq,
+	    .get = sd_getfreq,
+	},
 };
 
 static const struct v4l2_pix_format ov519_vga_mode[] = {
@@ -416,7 +469,7 @@ static const struct ov_i2c_regvals norm_6x30[] = {
 	{ 0x07, 0x2d }, /* Sharpness */
 	{ 0x0c, 0x20 },
 	{ 0x0d, 0x20 },
-	{ 0x0e, 0x20 },
+	{ 0x0e, 0xa0 }, /* Was 0x20, bit7 enables a 2x gain which we need */
 	{ 0x0f, 0x05 },
 	{ 0x10, 0x9a },
 	{ 0x11, 0x00 }, /* Pixel clock = fastest */
@@ -1659,9 +1712,21 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	sd->colors = COLOR_DEF;
 	sd->hflip = HFLIP_DEF;
 	sd->vflip = VFLIP_DEF;
-	if (sd->sensor != SEN_OV7670)
-		gspca_dev->ctrl_dis = (1 << HFLIP_IDX)
-					| (1 << VFLIP_IDX);
+	sd->autobrightness = AUTOBRIGHT_DEF;
+	if (sd->sensor == SEN_OV7670) {
+		sd->freq = OV7670_FREQ_DEF;
+		gspca_dev->ctrl_dis = 1 << FREQ_IDX;
+	} else {
+		sd->freq = FREQ_DEF;
+		gspca_dev->ctrl_dis = (1 << HFLIP_IDX) | (1 << VFLIP_IDX) |
+				      (1 << OV7670_FREQ_IDX);
+	}
+	if (sd->sensor == SEN_OV7640 || sd->sensor == SEN_OV7670)
+		gspca_dev->ctrl_dis |= 1 << AUTOBRIGHT_IDX;
+	/* OV8610 Frequency filter control should work but needs testing */
+	if (sd->sensor == SEN_OV8610)
+		gspca_dev->ctrl_dis |= 1 << FREQ_IDX;
+
 	return 0;
 error:
 	PDEBUG(D_ERR, "OV519 Config failed");
@@ -2233,7 +2298,6 @@ static int set_ov_sensor_window(struct sd *sd)
 		msleep(10);	/* need to sleep between read and write to
 				 * same reg! */
 		i2c_w(sd, OV7670_REG_VREF, v);
-		sethvflip(sd);
 	} else {
 		i2c_w(sd, 0x17, hwsbase);
 		i2c_w(sd, 0x18, hwebase + (sd->gspca_dev.width >> hwscale));
@@ -2268,6 +2332,9 @@ static int sd_start(struct gspca_dev *gspca_dev)
 	setcontrast(gspca_dev);
 	setbrightness(gspca_dev);
 	setcolors(gspca_dev);
+	sethvflip(sd);
+	setautobrightness(sd);
+	setfreq(sd);
 
 	ret = ov51x_restart(sd);
 	if (ret < 0)
@@ -2394,8 +2461,7 @@ static void setbrightness(struct gspca_dev *gspca_dev)
 		break;
 	case SEN_OV7620:
 		/* 7620 doesn't like manual changes when in auto mode */
-/*fixme
- *		if (!sd->auto_brt) */
+		if (!sd->autobrightness)
 			i2c_w(sd, OV7610_REG_BRT, val);
 		break;
 	case SEN_OV7670:
@@ -2482,6 +2548,70 @@ static void setcolors(struct gspca_dev *gspca_dev)
 	}
 }
 
+static void setautobrightness(struct sd *sd)
+{
+	if (sd->sensor == SEN_OV7640 || sd->sensor == SEN_OV7670)
+		return;
+
+	i2c_w_mask(sd, 0x2d, sd->autobrightness ? 0x10 : 0x00, 0x10);
+}
+
+static void setfreq(struct sd *sd)
+{
+	if (sd->sensor == SEN_OV7670) {
+		switch (sd->freq) {
+		case 0: /* Banding filter disabled */
+			i2c_w_mask(sd, OV7670_REG_COM8, 0, OV7670_COM8_BFILT);
+			break;
+		case 1: /* 50 hz */
+			i2c_w_mask(sd, OV7670_REG_COM8, OV7670_COM8_BFILT,
+				   OV7670_COM8_BFILT);
+			i2c_w_mask(sd, OV7670_REG_COM11, 0x08, 0x18);
+			break;
+		case 2: /* 60 hz */
+			i2c_w_mask(sd, OV7670_REG_COM8, OV7670_COM8_BFILT,
+				   OV7670_COM8_BFILT);
+			i2c_w_mask(sd, OV7670_REG_COM11, 0x00, 0x18);
+			break;
+		case 3: /* Auto hz */
+			i2c_w_mask(sd, OV7670_REG_COM8, OV7670_COM8_BFILT,
+				   OV7670_COM8_BFILT);
+			i2c_w_mask(sd, OV7670_REG_COM11, OV7670_COM11_HZAUTO,
+				   0x18);
+			break;
+		}
+	} else {
+		switch (sd->freq) {
+		case 0: /* Banding filter disabled */
+			i2c_w_mask(sd, 0x2d, 0x00, 0x04);
+			i2c_w_mask(sd, 0x2a, 0x00, 0x80);
+			break;
+		case 1: /* 50 hz (filter on and framerate adj) */
+			i2c_w_mask(sd, 0x2d, 0x04, 0x04);
+			i2c_w_mask(sd, 0x2a, 0x80, 0x80);
+			/* 20 fps -> 16.667 fps */
+			if (sd->sensor == SEN_OV6620 ||
+			    sd->sensor == SEN_OV6630)
+				i2c_w(sd, 0x2b, 0x5e);
+			else
+				i2c_w(sd, 0x2b, 0xac);
+			break;
+		case 2: /* 60 hz (filter on, ...) */
+			i2c_w_mask(sd, 0x2d, 0x04, 0x04);
+			if (sd->sensor == SEN_OV6620 ||
+			    sd->sensor == SEN_OV6630) {
+				/* 20 fps -> 15 fps */
+				i2c_w_mask(sd, 0x2a, 0x80, 0x80);
+				i2c_w(sd, 0x2b, 0xa8);
+			} else {
+				/* no framerate adj. */
+				i2c_w_mask(sd, 0x2a, 0x00, 0x80);
+			}
+			break;
+		}
+	}
+}
+
 static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val)
 {
 	struct sd *sd = (struct sd *) gspca_dev;
@@ -2572,6 +2702,71 @@ static int sd_getvflip(struct gspca_dev *gspca_dev, __s32 *val)
 	return 0;
 }
 
+static int sd_setautobrightness(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->autobrightness = val;
+	if (gspca_dev->streaming)
+		setautobrightness(sd);
+	return 0;
+}
+
+static int sd_getautobrightness(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->autobrightness;
+	return 0;
+}
+
+static int sd_setfreq(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->freq = val;
+	if (gspca_dev->streaming)
+		setfreq(sd);
+	return 0;
+}
+
+static int sd_getfreq(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->freq;
+	return 0;
+}
+
+static int sd_querymenu(struct gspca_dev *gspca_dev,
+			struct v4l2_querymenu *menu)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (menu->id) {
+	case V4L2_CID_POWER_LINE_FREQUENCY:
+		switch (menu->index) {
+		case 0:		/* V4L2_CID_POWER_LINE_FREQUENCY_DISABLED */
+			strcpy((char *) menu->name, "NoFliker");
+			return 0;
+		case 1:		/* V4L2_CID_POWER_LINE_FREQUENCY_50HZ */
+			strcpy((char *) menu->name, "50 Hz");
+			return 0;
+		case 2:		/* V4L2_CID_POWER_LINE_FREQUENCY_60HZ */
+			strcpy((char *) menu->name, "60 Hz");
+			return 0;
+		case 3:
+			if (sd->sensor != SEN_OV7670)
+				return -EINVAL;
+
+			strcpy((char *) menu->name, "Automatic");
+			return 0;
+		}
+		break;
+	}
+	return -EINVAL;
+}
+
 /* sub-driver description */
 static const struct sd_desc sd_desc = {
 	.name = MODULE_NAME,
@@ -2582,6 +2777,7 @@ static const struct sd_desc sd_desc = {
 	.start = sd_start,
 	.stopN = sd_stopN,
 	.pkt_scan = sd_pkt_scan,
+	.querymenu = sd_querymenu,
 };
 
 /* -- module initialisation -- */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index f24eceecc5a..772d226cb5c 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -894,9 +894,10 @@ enum v4l2_colorfx {
 	V4L2_COLORFX_BW		= 1,
 	V4L2_COLORFX_SEPIA	= 2,
 };
+#define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
 
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+32)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+33)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3-70-g09d2


From 1876bb923c98c605eca69f0bfe295f7b5f5eba28 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 14 Jun 2009 06:45:50 -0300
Subject: V4L/DVB (12079): gspca_ov519: add support for the ov511 bridge

gspca_ov519: add support for the ov511 bridge

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/ov519.c | 533 +++++++++++++++++++++++++++++++++++++-
 include/linux/videodev2.h         |   1 +
 2 files changed, 521 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/gspca/ov519.c b/drivers/media/video/gspca/ov519.c
index 9d4b69dbf96..1f8e2613ecc 100644
--- a/drivers/media/video/gspca/ov519.c
+++ b/drivers/media/video/gspca/ov519.c
@@ -76,8 +76,8 @@ struct sd {
 
 	__u8 stopped;		/* Streaming is temporarily paused */
 
-	__u8 frame_rate;	/* current Framerate (OV519 only) */
-	__u8 clockdiv;		/* clockdiv override for OV519 only */
+	__u8 frame_rate;	/* current Framerate */
+	__u8 clockdiv;		/* clockdiv override */
 
 	char sensor;		/* Type of image sensor chip (SEN_*) */
 #define SEN_UNKNOWN 0
@@ -304,17 +304,77 @@ static const struct v4l2_pix_format ov518_sif_mode[] = {
 		.priv = 0},
 };
 
+static const struct v4l2_pix_format ov511_vga_mode[] = {
+	{320, 240, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 * 3,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 1},
+	{640, 480, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480 * 2,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 0},
+};
+static const struct v4l2_pix_format ov511_sif_mode[] = {
+	{160, 120, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE,
+		.bytesperline = 160,
+		.sizeimage = 40000,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 3},
+	{176, 144, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE,
+		.bytesperline = 176,
+		.sizeimage = 40000,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 1},
+	{320, 240, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 * 3,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 2},
+	{352, 288, V4L2_PIX_FMT_OV511, V4L2_FIELD_NONE,
+		.bytesperline = 352,
+		.sizeimage = 352 * 288 * 3,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 0},
+};
 
 /* Registers common to OV511 / OV518 */
+#define R51x_FIFO_PSIZE			0x30	/* 2 bytes wide w/ OV518(+) */
 #define R51x_SYS_RESET          	0x50
+	/* Reset type flags */
+	#define	OV511_RESET_OMNICE	0x08
 #define R51x_SYS_INIT         		0x53
 #define R51x_SYS_SNAP			0x52
 #define R51x_SYS_CUST_ID		0x5F
 #define R51x_COMP_LUT_BEGIN		0x80
 
 /* OV511 Camera interface register numbers */
+#define R511_CAM_DELAY			0x10
+#define R511_CAM_EDGE			0x11
+#define R511_CAM_PXCNT			0x12
+#define R511_CAM_LNCNT			0x13
+#define R511_CAM_PXDIV			0x14
+#define R511_CAM_LNDIV			0x15
+#define R511_CAM_UV_EN			0x16
+#define R511_CAM_LINE_MODE		0x17
+#define R511_CAM_OPTS			0x18
+
+#define R511_SNAP_FRAME			0x19
+#define R511_SNAP_PXCNT			0x1A
+#define R511_SNAP_LNCNT			0x1B
+#define R511_SNAP_PXDIV			0x1C
+#define R511_SNAP_LNDIV			0x1D
+#define R511_SNAP_UV_EN			0x1E
+#define R511_SNAP_UV_EN			0x1E
+#define R511_SNAP_OPTS			0x1F
+
+#define R511_DRAM_FLOW_CTL		0x20
+#define R511_FIFO_OPTS			0x31
+#define R511_I2C_CTL			0x40
 #define R511_SYS_LED_CTL		0x55	/* OV511+ only */
-#define	OV511_RESET_NOREGS		0x3F	/* All but OV511 & regs */
+#define R511_COMP_EN			0x78
+#define R511_COMP_LUT_EN		0x79
 
 /* OV518 Camera interface register numbers */
 #define R518_GPIO_OUT			0x56	/* OV518(+) only */
@@ -1079,13 +1139,128 @@ static int ov518_reg_w32(struct sd *sd, __u16 index, u32 value, int n)
 	return ret;
 }
 
+static int ov511_i2c_w(struct sd *sd, __u8 reg, __u8 value)
+{
+	int rc, retries;
+
+	PDEBUG(D_USBO, "i2c 0x%02x -> [0x%02x]", value, reg);
+
+	/* Three byte write cycle */
+	for (retries = 6; ; ) {
+		/* Select camera register */
+		rc = reg_w(sd, R51x_I2C_SADDR_3, reg);
+		if (rc < 0)
+			return rc;
+
+		/* Write "value" to I2C data port of OV511 */
+		rc = reg_w(sd, R51x_I2C_DATA, value);
+		if (rc < 0)
+			return rc;
+
+		/* Initiate 3-byte write cycle */
+		rc = reg_w(sd, R511_I2C_CTL, 0x01);
+		if (rc < 0)
+			return rc;
+
+		do
+			rc = reg_r(sd, R511_I2C_CTL);
+		while (rc > 0 && ((rc & 1) == 0)); /* Retry until idle */
+
+		if (rc < 0)
+			return rc;
+
+		if ((rc & 2) == 0) /* Ack? */
+			break;
+		if (--retries < 0) {
+			PDEBUG(D_USBO, "i2c write retries exhausted");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int ov511_i2c_r(struct sd *sd, __u8 reg)
+{
+	int rc, value, retries;
+
+	/* Two byte write cycle */
+	for (retries = 6; ; ) {
+		/* Select camera register */
+		rc = reg_w(sd, R51x_I2C_SADDR_2, reg);
+		if (rc < 0)
+			return rc;
+
+		/* Initiate 2-byte write cycle */
+		rc = reg_w(sd, R511_I2C_CTL, 0x03);
+		if (rc < 0)
+			return rc;
+
+		do
+			rc = reg_r(sd, R511_I2C_CTL);
+		while (rc > 0 && ((rc & 1) == 0)); /* Retry until idle */
+
+		if (rc < 0)
+			return rc;
+
+		if ((rc & 2) == 0) /* Ack? */
+			break;
+
+		/* I2C abort */
+		reg_w(sd, R511_I2C_CTL, 0x10);
+
+		if (--retries < 0) {
+			PDEBUG(D_USBI, "i2c write retries exhausted");
+			return -1;
+		}
+	}
+
+	/* Two byte read cycle */
+	for (retries = 6; ; ) {
+		/* Initiate 2-byte read cycle */
+		rc = reg_w(sd, R511_I2C_CTL, 0x05);
+		if (rc < 0)
+			return rc;
+
+		do
+			rc = reg_r(sd, R511_I2C_CTL);
+		while (rc > 0 && ((rc & 1) == 0)); /* Retry until idle */
+
+		if (rc < 0)
+			return rc;
+
+		if ((rc & 2) == 0) /* Ack? */
+			break;
+
+		/* I2C abort */
+		rc = reg_w(sd, R511_I2C_CTL, 0x10);
+		if (rc < 0)
+			return rc;
+
+		if (--retries < 0) {
+			PDEBUG(D_USBI, "i2c read retries exhausted");
+			return -1;
+		}
+	}
+
+	value = reg_r(sd, R51x_I2C_DATA);
+
+	PDEBUG(D_USBI, "i2c [0x%02X] -> 0x%02X", reg, value);
+
+	/* This is needed to make i2c_w() work */
+	rc = reg_w(sd, R511_I2C_CTL, 0x05);
+	if (rc < 0)
+		return rc;
+
+	return value;
+}
 
 /*
  * The OV518 I2C I/O procedure is different, hence, this function.
  * This is normally only called from i2c_w(). Note that this function
  * always succeeds regardless of whether the sensor is present and working.
  */
-static int i2c_w(struct sd *sd,
+static int ov518_i2c_w(struct sd *sd,
 		__u8 reg,
 		__u8 value)
 {
@@ -1120,7 +1295,7 @@ static int i2c_w(struct sd *sd,
  * This is normally only called from i2c_r(). Note that this function
  * always succeeds regardless of whether the sensor is present and working.
  */
-static int i2c_r(struct sd *sd, __u8 reg)
+static int ov518_i2c_r(struct sd *sd, __u8 reg)
 {
 	int rc, value;
 
@@ -1143,6 +1318,34 @@ static int i2c_r(struct sd *sd, __u8 reg)
 	return value;
 }
 
+static int i2c_w(struct sd *sd, __u8 reg, __u8 value)
+{
+	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		return ov511_i2c_w(sd, reg, value);
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+	case BRIDGE_OV519:
+		return ov518_i2c_w(sd, reg, value);
+	}
+	return -1; /* Should never happen */
+}
+
+static int i2c_r(struct sd *sd, __u8 reg)
+{
+	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		return ov511_i2c_r(sd, reg);
+	case BRIDGE_OV518:
+	case BRIDGE_OV518PLUS:
+	case BRIDGE_OV519:
+		return ov518_i2c_r(sd, reg);
+	}
+	return -1; /* Should never happen */
+}
+
 /* Writes bits at positions specified by mask to an I2C reg. Bits that are in
  * the same position as 1's in "mask" are cleared and set to "value". Bits
  * that are in the same position as 0's in "mask" are preserved, regardless
@@ -1490,9 +1693,31 @@ static void ov51x_led_control(struct sd *sd, int on)
 	}
 }
 
-/* OV518 quantization tables are 8x4 (instead of 8x8) */
-static int ov518_upload_quan_tables(struct sd *sd)
+static int ov51x_upload_quan_tables(struct sd *sd)
 {
+	const unsigned char yQuanTable511[] = {
+		0, 1, 1, 2, 2, 3, 3, 4,
+		1, 1, 1, 2, 2, 3, 4, 4,
+		1, 1, 2, 2, 3, 4, 4, 4,
+		2, 2, 2, 3, 4, 4, 4, 4,
+		2, 2, 3, 4, 4, 5, 5, 5,
+		3, 3, 4, 4, 5, 5, 5, 5,
+		3, 4, 4, 4, 5, 5, 5, 5,
+		4, 4, 4, 4, 5, 5, 5, 5
+	};
+
+	const unsigned char uvQuanTable511[] = {
+		0, 2, 2, 3, 4, 4, 4, 4,
+		2, 2, 2, 4, 4, 4, 4, 4,
+		2, 2, 3, 4, 4, 4, 4, 4,
+		3, 4, 4, 4, 4, 4, 4, 4,
+		4, 4, 4, 4, 4, 4, 4, 4,
+		4, 4, 4, 4, 4, 4, 4, 4,
+		4, 4, 4, 4, 4, 4, 4, 4,
+		4, 4, 4, 4, 4, 4, 4, 4
+	};
+
+	/* OV518 quantization tables are 8x4 (instead of 8x8) */
 	const unsigned char yQuanTable518[] = {
 		5, 4, 5, 6, 6, 7, 7, 7,
 		5, 5, 5, 5, 6, 7, 7, 7,
@@ -1507,14 +1732,23 @@ static int ov518_upload_quan_tables(struct sd *sd)
 		7, 7, 7, 7, 7, 7, 8, 8
 	};
 
-	const unsigned char *pYTable = yQuanTable518;
-	const unsigned char *pUVTable = uvQuanTable518;
+	const unsigned char *pYTable, *pUVTable;
 	unsigned char val0, val1;
-	int i, rc, reg = R51x_COMP_LUT_BEGIN;
+	int i, size, rc, reg = R51x_COMP_LUT_BEGIN;
 
 	PDEBUG(D_PROBE, "Uploading quantization tables");
 
-	for (i = 0; i < 16; i++) {
+	if (sd->bridge == BRIDGE_OV511 || sd->bridge == BRIDGE_OV511PLUS) {
+		pYTable = yQuanTable511;
+		pUVTable = uvQuanTable511;
+		size  = 32;
+	} else {
+		pYTable = yQuanTable518;
+		pUVTable = uvQuanTable518;
+		size  = 16;
+	}
+
+	for (i = 0; i < size; i++) {
 		val0 = *pYTable++;
 		val1 = *pYTable++;
 		val0 &= 0x0f;
@@ -1529,7 +1763,7 @@ static int ov518_upload_quan_tables(struct sd *sd)
 		val0 &= 0x0f;
 		val1 &= 0x0f;
 		val0 |= val1 << 4;
-		rc = reg_w(sd, reg + 16, val0);
+		rc = reg_w(sd, reg + size, val0);
 		if (rc < 0)
 			return rc;
 
@@ -1539,6 +1773,87 @@ static int ov518_upload_quan_tables(struct sd *sd)
 	return 0;
 }
 
+/* This initializes the OV511/OV511+ and the sensor */
+static int ov511_configure(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int rc;
+
+	/* For 511 and 511+ */
+	const struct ov_regvals init_511[] = {
+		{ R51x_SYS_RESET,	0x7f },
+		{ R51x_SYS_INIT,	0x01 },
+		{ R51x_SYS_RESET,	0x7f },
+		{ R51x_SYS_INIT,	0x01 },
+		{ R51x_SYS_RESET,	0x3f },
+		{ R51x_SYS_INIT,	0x01 },
+		{ R51x_SYS_RESET,	0x3d },
+	};
+
+	const struct ov_regvals norm_511[] = {
+		{ R511_DRAM_FLOW_CTL, 	0x01 },
+		{ R51x_SYS_SNAP,	0x00 },
+		{ R51x_SYS_SNAP,	0x02 },
+		{ R51x_SYS_SNAP,	0x00 },
+		{ R511_FIFO_OPTS,	0x1f },
+		{ R511_COMP_EN,		0x00 },
+		{ R511_COMP_LUT_EN,	0x03 },
+	};
+
+	const struct ov_regvals norm_511_p[] = {
+		{ R511_DRAM_FLOW_CTL,	0xff },
+		{ R51x_SYS_SNAP,	0x00 },
+		{ R51x_SYS_SNAP,	0x02 },
+		{ R51x_SYS_SNAP,	0x00 },
+		{ R511_FIFO_OPTS,	0xff },
+		{ R511_COMP_EN,		0x00 },
+		{ R511_COMP_LUT_EN,	0x03 },
+	};
+
+	const struct ov_regvals compress_511[] = {
+		{ 0x70, 0x1f },
+		{ 0x71, 0x05 },
+		{ 0x72, 0x06 },
+		{ 0x73, 0x06 },
+		{ 0x74, 0x14 },
+		{ 0x75, 0x03 },
+		{ 0x76, 0x04 },
+		{ 0x77, 0x04 },
+	};
+
+	PDEBUG(D_PROBE, "Device custom id %x", reg_r(sd, R51x_SYS_CUST_ID));
+
+	rc = write_regvals(sd, init_511, ARRAY_SIZE(init_511));
+	if (rc < 0)
+		return rc;
+
+	switch (sd->bridge) {
+	case BRIDGE_OV511:
+		rc = write_regvals(sd, norm_511, ARRAY_SIZE(norm_511));
+		if (rc < 0)
+			return rc;
+		break;
+	case BRIDGE_OV511PLUS:
+		rc = write_regvals(sd, norm_511_p, ARRAY_SIZE(norm_511_p));
+		if (rc < 0)
+			return rc;
+		break;
+	}
+
+	/* Init compression */
+	rc = write_regvals(sd, compress_511, ARRAY_SIZE(compress_511));
+	if (rc < 0)
+		return rc;
+
+	rc = ov51x_upload_quan_tables(sd);
+	if (rc < 0) {
+		PDEBUG(D_ERR, "Error uploading quantization tables");
+		return rc;
+	}
+
+	return 0;
+}
+
 /* This initializes the OV518/OV518+ and the sensor */
 static int ov518_configure(struct gspca_dev *gspca_dev)
 {
@@ -1615,7 +1930,7 @@ static int ov518_configure(struct gspca_dev *gspca_dev)
 		break;
 	}
 
-	rc = ov518_upload_quan_tables(sd);
+	rc = ov51x_upload_quan_tables(sd);
 	if (rc < 0) {
 		PDEBUG(D_ERR, "Error uploading quantization tables");
 		return rc;
@@ -1661,6 +1976,10 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	sd->invert_led = id->driver_info & BRIDGE_INVERT_LED;
 
 	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		ret = ov511_configure(gspca_dev);
+		break;
 	case BRIDGE_OV518:
 	case BRIDGE_OV518PLUS:
 		ret = ov518_configure(gspca_dev);
@@ -1719,6 +2038,16 @@ static int sd_config(struct gspca_dev *gspca_dev,
 
 	cam = &gspca_dev->cam;
 	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		if (!sd->sif) {
+			cam->cam_mode = ov511_vga_mode;
+			cam->nmodes = ARRAY_SIZE(ov511_vga_mode);
+		} else {
+			cam->cam_mode = ov511_sif_mode;
+			cam->nmodes = ARRAY_SIZE(ov511_sif_mode);
+		}
+		break;
 	case BRIDGE_OV518:
 	case BRIDGE_OV518PLUS:
 		if (!sd->sif) {
@@ -1810,6 +2139,126 @@ static int sd_init(struct gspca_dev *gspca_dev)
 	return 0;
 }
 
+/* Set up the OV511/OV511+ with the given image parameters.
+ *
+ * Do not put any sensor-specific code in here (including I2C I/O functions)
+ */
+static int ov511_mode_init_regs(struct sd *sd)
+{
+	int hsegs, vsegs, packet_size, fps, needed;
+	int interlaced = 0;
+	struct usb_host_interface *alt;
+	struct usb_interface *intf;
+
+	intf = usb_ifnum_to_if(sd->gspca_dev.dev, sd->gspca_dev.iface);
+	alt = usb_altnum_to_altsetting(intf, sd->gspca_dev.alt);
+	if (!alt) {
+		PDEBUG(D_ERR, "Couldn't get altsetting");
+		return -EIO;
+	}
+
+	packet_size = le16_to_cpu(alt->endpoint[0].desc.wMaxPacketSize);
+	reg_w(sd, R51x_FIFO_PSIZE, packet_size >> 5);
+
+	reg_w(sd, R511_CAM_UV_EN, 0x01);
+	reg_w(sd, R511_SNAP_UV_EN, 0x01);
+	reg_w(sd, R511_SNAP_OPTS, 0x03);
+
+	/* Here I'm assuming that snapshot size == image size.
+	 * I hope that's always true. --claudio
+	 */
+	hsegs = (sd->gspca_dev.width >> 3) - 1;
+	vsegs = (sd->gspca_dev.height >> 3) - 1;
+
+	reg_w(sd, R511_CAM_PXCNT, hsegs);
+	reg_w(sd, R511_CAM_LNCNT, vsegs);
+	reg_w(sd, R511_CAM_PXDIV, 0x00);
+	reg_w(sd, R511_CAM_LNDIV, 0x00);
+
+	/* YUV420, low pass filter on */
+	reg_w(sd, R511_CAM_OPTS, 0x03);
+
+	/* Snapshot additions */
+	reg_w(sd, R511_SNAP_PXCNT, hsegs);
+	reg_w(sd, R511_SNAP_LNCNT, vsegs);
+	reg_w(sd, R511_SNAP_PXDIV, 0x00);
+	reg_w(sd, R511_SNAP_LNDIV, 0x00);
+
+	/******** Set the framerate ********/
+	if (frame_rate > 0)
+		sd->frame_rate = frame_rate;
+
+	switch (sd->sensor) {
+	case SEN_OV6620:
+		/* No framerate control, doesn't like higher rates yet */
+		sd->clockdiv = 3;
+		break;
+
+	/* Note once the FIXME's in mode_init_ov_sensor_regs() are fixed
+	   for more sensors we need to do this for them too */
+	case SEN_OV7620:
+	case SEN_OV7640:
+		if (sd->gspca_dev.width == 320)
+			interlaced = 1;
+		/* Fall through */
+	case SEN_OV6630:
+	case SEN_OV76BE:
+	case SEN_OV7610:
+	case SEN_OV7670:
+		switch (sd->frame_rate) {
+		case 30:
+		case 25:
+			/* Not enough bandwidth to do 640x480 @ 30 fps */
+			if (sd->gspca_dev.width != 640) {
+				sd->clockdiv = 0;
+				break;
+			}
+			/* Fall through for 640x480 case */
+		default:
+/*		case 20: */
+/*		case 15: */
+			sd->clockdiv = 1;
+			break;
+		case 10:
+			sd->clockdiv = 2;
+			break;
+		case 5:
+			sd->clockdiv = 5;
+			break;
+		}
+		if (interlaced) {
+			sd->clockdiv = (sd->clockdiv + 1) * 2 - 1;
+			/* Higher then 10 does not work */
+			if (sd->clockdiv > 10)
+				sd->clockdiv = 10;
+		}
+		break;
+
+	case SEN_OV8610:
+		/* No framerate control ?? */
+		sd->clockdiv = 0;
+		break;
+	}
+
+	/* Check if we have enough bandwidth to disable compression */
+	fps = (interlaced ? 60 : 30) / (sd->clockdiv + 1) + 1;
+	needed = fps * sd->gspca_dev.width * sd->gspca_dev.height * 3 / 2;
+	/* 1400 is a conservative estimate of the max nr of isoc packets/sec */
+	if (needed > 1400 * packet_size) {
+		/* Enable Y and UV quantization and compression */
+		reg_w(sd, R511_COMP_EN, 0x07);
+		reg_w(sd, R511_COMP_LUT_EN, 0x03);
+	} else {
+		reg_w(sd, R511_COMP_EN, 0x06);
+		reg_w(sd, R511_COMP_LUT_EN, 0x00);
+	}
+
+	reg_w(sd, R51x_SYS_RESET, OV511_RESET_OMNICE);
+	reg_w(sd, R51x_SYS_RESET, 0);
+
+	return 0;
+}
+
 /* Sets up the OV518/OV518+ with the given image parameters
  *
  * OV518 needs a completely different approach, until we can figure out what
@@ -2363,6 +2812,10 @@ static int sd_start(struct gspca_dev *gspca_dev)
 	int ret = 0;
 
 	switch (sd->bridge) {
+	case BRIDGE_OV511:
+	case BRIDGE_OV511PLUS:
+		ret = ov511_mode_init_regs(sd);
+		break;
 	case BRIDGE_OV518:
 	case BRIDGE_OV518PLUS:
 		ret = ov518_mode_init_regs(sd);
@@ -2403,6 +2856,56 @@ static void sd_stopN(struct gspca_dev *gspca_dev)
 	ov51x_led_control(sd, 0);
 }
 
+static void ov511_pkt_scan(struct gspca_dev *gspca_dev,
+			struct gspca_frame *frame,	/* target */
+			__u8 *in,			/* isoc packet */
+			int len)			/* iso packet length */
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	/* SOF/EOF packets have 1st to 8th bytes zeroed and the 9th
+	 * byte non-zero. The EOF packet has image width/height in the
+	 * 10th and 11th bytes. The 9th byte is given as follows:
+	 *
+	 * bit 7: EOF
+	 *     6: compression enabled
+	 *     5: 422/420/400 modes
+	 *     4: 422/420/400 modes
+	 *     3: 1
+	 *     2: snapshot button on
+	 *     1: snapshot frame
+	 *     0: even/odd field
+	 */
+	if (!(in[0] | in[1] | in[2] | in[3] | in[4] | in[5] | in[6] | in[7]) &&
+	    (in[8] & 0x08)) {
+		if (in[8] & 0x80) {
+			/* Frame end */
+			if ((in[9] + 1) * 8 != gspca_dev->width ||
+			    (in[10] + 1) * 8 != gspca_dev->height) {
+				PDEBUG(D_ERR, "Invalid frame size, got: %dx%d,"
+					" requested: %dx%d\n",
+					(in[9] + 1) * 8, (in[10] + 1) * 8,
+					gspca_dev->width, gspca_dev->height);
+				gspca_dev->last_packet_type = DISCARD_PACKET;
+				return;
+			}
+			/* Add 11 byte footer to frame, might be usefull */
+			gspca_frame_add(gspca_dev, LAST_PACKET, frame, in, 11);
+			return;
+		} else {
+			/* Frame start */
+			gspca_frame_add(gspca_dev, FIRST_PACKET, frame, in, 0);
+			sd->packet_nr = 0;
+		}
+	}
+
+	/* Ignore the packet number */
+	len--;
+
+	/* intermediate packet */
+	gspca_frame_add(gspca_dev, INTER_PACKET, frame, in, len);
+}
+
 static void ov518_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
@@ -2495,6 +2998,7 @@ static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 	switch (sd->bridge) {
 	case BRIDGE_OV511:
 	case BRIDGE_OV511PLUS:
+		ov511_pkt_scan(gspca_dev, frame, data, len);
 		break;
 	case BRIDGE_OV518:
 	case BRIDGE_OV518PLUS:
@@ -2862,12 +3366,15 @@ static const __devinitdata struct usb_device_id device_table[] = {
 	{USB_DEVICE(0x045e, 0x028c), .driver_info = BRIDGE_OV519 },
 	{USB_DEVICE(0x054c, 0x0154), .driver_info = BRIDGE_OV519 },
 	{USB_DEVICE(0x054c, 0x0155), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0x0511), .driver_info = BRIDGE_OV511 },
 	{USB_DEVICE(0x05a9, 0x0518), .driver_info = BRIDGE_OV518 },
 	{USB_DEVICE(0x05a9, 0x0519), .driver_info = BRIDGE_OV519 },
 	{USB_DEVICE(0x05a9, 0x0530), .driver_info = BRIDGE_OV519 },
 	{USB_DEVICE(0x05a9, 0x4519), .driver_info = BRIDGE_OV519 },
 	{USB_DEVICE(0x05a9, 0x8519), .driver_info = BRIDGE_OV519 },
+	{USB_DEVICE(0x05a9, 0xa511), .driver_info = BRIDGE_OV511PLUS },
 	{USB_DEVICE(0x05a9, 0xa518), .driver_info = BRIDGE_OV518PLUS },
+	{USB_DEVICE(0x0813, 0x0002), .driver_info = BRIDGE_OV511PLUS },
 	{}
 };
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 772d226cb5c..8a025d51090 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -348,6 +348,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
 #define V4L2_PIX_FMT_YVYU     v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
+#define V4L2_PIX_FMT_OV511    v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */
 #define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a4c473033b6a100773a4fd8b7ba1e45baeb1e692 Mon Sep 17 00:00:00 2001
From: Devin Heitmueller <dheitmueller@kernellabs.com>
Date: Sat, 20 Jun 2009 21:34:42 -0300
Subject: V4L/DVB (12102): em28xx: add Remote control support for EVGA inDtube

Add an IR profile for the EVGA inDtube remote control (which is an NEC type
remote)

Signed-off-by: Devin Heitmueller <dheitmueller@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-keymaps.c         | 23 +++++++++++++++++++++++
 drivers/media/video/em28xx/em28xx-cards.c |  2 ++
 include/media/ir-common.h                 |  2 ++
 3 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 3fe158ac7bb..4216328552f 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2750,3 +2750,26 @@ IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE] = {
 	[0x1b] = KEY_B,		/*recall*/
 };
 EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec);
+
+/* EVGA inDtube
+   Devin Heitmueller <devin.heitmueller@gmail.com>
+ */
+IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE] = {
+	[0x12] = KEY_POWER,
+	[0x02] = KEY_MODE,	/* TV */
+	[0x14] = KEY_MUTE,
+	[0x1a] = KEY_CHANNELUP,
+	[0x16] = KEY_TV2,	/* PIP */
+	[0x1d] = KEY_VOLUMEUP,
+	[0x05] = KEY_CHANNELDOWN,
+	[0x0f] = KEY_PLAYPAUSE,
+	[0x19] = KEY_VOLUMEDOWN,
+	[0x1c] = KEY_REWIND,
+	[0x0d] = KEY_RECORD,
+	[0x18] = KEY_FORWARD,
+	[0x1e] = KEY_PREVIOUS,
+	[0x1b] = KEY_STOP,
+	[0x1f] = KEY_NEXT,
+	[0x13] = KEY_CAMERA,
+};
+EXPORT_SYMBOL_GPL(ir_codes_evga_indtube);
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 972c4addc40..c43fdb9bc88 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -1472,9 +1472,11 @@ struct em28xx_board em28xx_boards[] = {
 		.tuner_type   = TUNER_XC2028,
 		.tuner_gpio   = default_tuner_gpio,
 		.decoder      = EM28XX_TVP5150,
+		.xclk         = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */
 		.mts_firmware = 1,
 		.has_dvb      = 1,
 		.dvb_gpio     = evga_indtube_digital,
+		.ir_codes     = ir_codes_evga_indtube,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 7b5b91f6042..9dcb632f608 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -162,6 +162,8 @@ extern IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
+
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 719cd4ab9695059e00f5248d1dceb534381fccb3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 14 Jun 2009 07:12:11 -0300
Subject: V4L/DVB (12108): v4l2-i2c-drv.h: add comment describing when not to
 use this header.

Make it very clear that this header should not be used for i2c drivers that
do not need to be compiled for pre-2.6.26 kernels.

As soon as the minimum supported kernel in the v4l-dvb repository becomes
2.6.26 or up, then this header should be removed entirely.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-i2c-drv.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/v4l2-i2c-drv.h b/include/media/v4l2-i2c-drv.h
index 10a2882c3cb..74bf741d1a9 100644
--- a/include/media/v4l2-i2c-drv.h
+++ b/include/media/v4l2-i2c-drv.h
@@ -22,7 +22,7 @@
  */
 
 /* NOTE: the full version of this header is in the v4l-dvb repository
- * and allows v4l i2c drivers to be compiled on older kernels as well.
+ * and allows v4l i2c drivers to be compiled on pre-2.6.26 kernels.
  * The version of this header as it appears in the kernel is a stripped
  * version (without all the backwards compatibility stuff) and so it
  * looks a bit odd.
@@ -30,6 +30,9 @@
  * If you look at the full version then you will understand the reason
  * for introducing this header since you really don't want to have all
  * the tricky backwards compatibility code in each and every i2c driver.
+ *
+ * If the i2c driver will never be compiled for pre-2.6.26 kernels, then
+ * DO NOT USE this header! Just write it as a regular i2c driver.
  */
 
 #ifndef __V4L2_I2C_DRV_H__
-- 
cgit v1.2.3-70-g09d2


From f0222c7d860f09a61bec5e500539f28db0184b38 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 9 Jun 2009 17:12:33 -0300
Subject: V4L/DVB (12125): v4l2: add new s_config subdev ops and
 v4l2_i2c_new_subdev_cfg/board calls

Add a new s_config core ops call: this is called with the irq and platform
data to be used to initialize the subdev.

Added new v4l2_i2c_new_subdev_cfg and v4l2_i2c_new_subdev_board calls
that allows you to pass these new arguments.

The existing v4l2_i2c_new_subdev functions were modified to also call
s_config.

In the future the existing v4l2_i2c_new_subdev functions will be replaced
by a single v4l2_i2c_new_subdev function similar to v4l2_i2c_new_subdev_cfg
but without the irq and platform_data arguments.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c | 105 ++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-common.h       |  16 ++++++
 include/media/v4l2-subdev.h       |   7 ++-
 3 files changed, 127 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index f96475626da..e7b443c116f 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -802,6 +802,17 @@ struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
 	/* Decrease the module use count to match the first try_module_get. */
 	module_put(client->driver->driver.owner);
 
+	if (sd) {
+		/* We return errors from v4l2_subdev_call only if we have the
+		   callback as the .s_config is not mandatory */
+		int err = v4l2_subdev_call(sd, core, s_config, 0, NULL);
+
+		if (err && err != -ENOIOCTLCMD) {
+			v4l2_device_unregister_subdev(sd);
+			sd = NULL;
+		}
+	}
+
 error:
 	/* If we have a client but no subdev, then something went wrong and
 	   we must unregister the client. */
@@ -852,6 +863,17 @@ struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct v4l2_device *v4l2_dev,
 	/* Decrease the module use count to match the first try_module_get. */
 	module_put(client->driver->driver.owner);
 
+	if (sd) {
+		/* We return errors from v4l2_subdev_call only if we have the
+		   callback as the .s_config is not mandatory */
+		int err = v4l2_subdev_call(sd, core, s_config, 0, NULL);
+
+		if (err && err != -ENOIOCTLCMD) {
+			v4l2_device_unregister_subdev(sd);
+			sd = NULL;
+		}
+	}
+
 error:
 	/* If we have a client but no subdev, then something went wrong and
 	   we must unregister the client. */
@@ -872,6 +894,89 @@ struct v4l2_subdev *v4l2_i2c_new_probed_subdev_addr(struct v4l2_device *v4l2_dev
 }
 EXPORT_SYMBOL_GPL(v4l2_i2c_new_probed_subdev_addr);
 
+/* Load an i2c sub-device. */
+struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
+		struct i2c_adapter *adapter, const char *module_name,
+		struct i2c_board_info *info, const unsigned short *probe_addrs)
+{
+	struct v4l2_subdev *sd = NULL;
+	struct i2c_client *client;
+
+	BUG_ON(!v4l2_dev);
+
+	if (module_name)
+		request_module(module_name);
+
+	/* Create the i2c client */
+	if (info->addr == 0 && probe_addrs)
+		client = i2c_new_probed_device(adapter, info, probe_addrs);
+	else
+		client = i2c_new_device(adapter, info);
+
+	/* Note: by loading the module first we are certain that c->driver
+	   will be set if the driver was found. If the module was not loaded
+	   first, then the i2c core tries to delay-load the module for us,
+	   and then c->driver is still NULL until the module is finally
+	   loaded. This delay-load mechanism doesn't work if other drivers
+	   want to use the i2c device, so explicitly loading the module
+	   is the best alternative. */
+	if (client == NULL || client->driver == NULL)
+		goto error;
+
+	/* Lock the module so we can safely get the v4l2_subdev pointer */
+	if (!try_module_get(client->driver->driver.owner))
+		goto error;
+	sd = i2c_get_clientdata(client);
+
+	/* Register with the v4l2_device which increases the module's
+	   use count as well. */
+	if (v4l2_device_register_subdev(v4l2_dev, sd))
+		sd = NULL;
+	/* Decrease the module use count to match the first try_module_get. */
+	module_put(client->driver->driver.owner);
+
+	if (sd) {
+		/* We return errors from v4l2_subdev_call only if we have the
+		   callback as the .s_config is not mandatory */
+		int err = v4l2_subdev_call(sd, core, s_config,
+				info->irq, info->platform_data);
+
+		if (err && err != -ENOIOCTLCMD) {
+			v4l2_device_unregister_subdev(sd);
+			sd = NULL;
+		}
+	}
+
+error:
+	/* If we have a client but no subdev, then something went wrong and
+	   we must unregister the client. */
+	if (client && sd == NULL)
+		i2c_unregister_device(client);
+	return sd;
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev_board);
+
+struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
+		struct i2c_adapter *adapter,
+		const char *module_name, const char *client_type,
+		int irq, void *platform_data,
+		u8 addr, const unsigned short *probe_addrs)
+{
+	struct i2c_board_info info;
+
+	/* Setup the i2c board info with the device type and
+	   the device address. */
+	memset(&info, 0, sizeof(info));
+	strlcpy(info.type, client_type, sizeof(info.type));
+	info.addr = addr;
+	info.irq = irq;
+	info.platform_data = platform_data;
+
+	return v4l2_i2c_new_subdev_board(v4l2_dev, adapter, module_name,
+			&info, probe_addrs);
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev_cfg);
+
 /* Return i2c client address of v4l2_subdev. */
 unsigned short v4l2_i2c_subdev_addr(struct v4l2_subdev *sd)
 {
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index c48c24e4d0f..95f4364322e 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -153,6 +153,22 @@ struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct v4l2_device *v4l2_dev,
 struct v4l2_subdev *v4l2_i2c_new_probed_subdev_addr(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter,
 		const char *module_name, const char *client_type, u8 addr);
+
+/* Load an i2c module and return an initialized v4l2_subdev struct.
+   Only call request_module if module_name != NULL.
+   The client_type argument is the name of the chip that's on the adapter. */
+struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
+		struct i2c_adapter *adapter,
+		const char *module_name, const char *client_type,
+		int irq, void *platform_data,
+		u8 addr, const unsigned short *probe_addrs);
+
+struct i2c_board_info;
+
+struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
+		struct i2c_adapter *adapter, const char *module_name,
+		struct i2c_board_info *info, const unsigned short *probe_addrs);
+
 /* Initialize an v4l2_subdev with data from an i2c_client struct */
 void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
 		const struct v4l2_subdev_ops *ops);
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index a503e1cee78..5dcb3678552 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -79,7 +79,11 @@ struct v4l2_decode_vbi_line {
    not yet implemented) since ops provide proper type-checking.
  */
 
-/* init: initialize the sensor registors to some sort of reasonable default
+/* s_config: if set, then it is always called by the v4l2_i2c_new_subdev*
+	functions after the v4l2_subdev was registered. It is used to pass
+	platform data to the subdev which can be used during initialization.
+
+   init: initialize the sensor registors to some sort of reasonable default
 	values. Do not use for new drivers and should be removed in existing
 	drivers.
 
@@ -96,6 +100,7 @@ struct v4l2_decode_vbi_line {
 struct v4l2_subdev_core_ops {
 	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip);
 	int (*log_status)(struct v4l2_subdev *sd);
+	int (*s_config)(struct v4l2_subdev *sd, int irq, void *platform_data);
 	int (*init)(struct v4l2_subdev *sd, u32 val);
 	int (*load_fw)(struct v4l2_subdev *sd);
 	int (*reset)(struct v4l2_subdev *sd, u32 val);
-- 
cgit v1.2.3-70-g09d2


From b0d3159be9a36fd8b7b1cf88b812d951add53d11 Mon Sep 17 00:00:00 2001
From: Trent Piepho <xyzzy@speakeasy.org>
Date: Sat, 30 May 2009 21:45:46 -0300
Subject: V4L/DVB (11901): v4l2: Create helper function for bounding and
 aligning images

Most hardware has limits on minimum and maximum image dimensions and also
requirements about alignment.  For example, image width must be even or a
multiple of four.  Some hardware has requirements that the total image size
(width * height) be a multiple of some power of two.

v4l_bound_align_image() will enforce min and max width and height, power of
two alignment on width and height, and power of two alignment on total
image size.

It uses an efficient algorithm that will try to find the "closest" image
size that meets the requirements.

Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c | 75 ++++++++++++++++++++++++++++++++++++++-
 include/media/v4l2-common.h       | 10 ++++++
 2 files changed, 84 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index e7b443c116f..1ebbe738019 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -1021,4 +1021,77 @@ const unsigned short *v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type)
 }
 EXPORT_SYMBOL_GPL(v4l2_i2c_tuner_addrs);
 
-#endif
+/* Clamp x to be between min and max, aligned to a multiple of 2^align.  min
+ * and max don't have to be aligned, but there must be at least one valid
+ * value.  E.g., min=17,max=31,align=4 is not allowed as there are no multiples
+ * of 16 between 17 and 31.  */
+static unsigned int clamp_align(unsigned int x, unsigned int min,
+				unsigned int max, unsigned int align)
+{
+	/* Bits that must be zero to be aligned */
+	unsigned int mask = ~((1 << align) - 1);
+
+	/* Round to nearest aligned value */
+	if (align)
+		x = (x + (1 << (align - 1))) & mask;
+
+	/* Clamp to aligned value of min and max */
+	if (x < min)
+		x = (min + ~mask) & mask;
+	else if (x > max)
+		x = max & mask;
+
+	return x;
+}
+
+/* Bound an image to have a width between wmin and wmax, and height between
+ * hmin and hmax, inclusive.  Additionally, the width will be a multiple of
+ * 2^walign, the height will be a multiple of 2^halign, and the overall size
+ * (width*height) will be a multiple of 2^salign.  The image may be shrunk
+ * or enlarged to fit the alignment constraints.
+ *
+ * The width or height maximum must not be smaller than the corresponding
+ * minimum.  The alignments must not be so high there are no possible image
+ * sizes within the allowed bounds.  wmin and hmin must be at least 1
+ * (don't use 0).  If you don't care about a certain alignment, specify 0,
+ * as 2^0 is 1 and one byte alignment is equivalent to no alignment.  If
+ * you only want to adjust downward, specify a maximum that's the same as
+ * the initial value.
+ */
+void v4l_bound_align_image(u32 *w, unsigned int wmin, unsigned int wmax,
+			   unsigned int walign,
+			   u32 *h, unsigned int hmin, unsigned int hmax,
+			   unsigned int halign, unsigned int salign)
+{
+	*w = clamp_align(*w, wmin, wmax, walign);
+	*h = clamp_align(*h, hmin, hmax, halign);
+
+	/* Usually we don't need to align the size and are done now. */
+	if (!salign)
+		return;
+
+	/* How much alignment do we have? */
+	walign = __ffs(*w);
+	halign = __ffs(*h);
+	/* Enough to satisfy the image alignment? */
+	if (walign + halign < salign) {
+		/* Max walign where there is still a valid width */
+		unsigned int wmaxa = __fls(wmax ^ (wmin - 1));
+		/* Max halign where there is still a valid height */
+		unsigned int hmaxa = __fls(hmax ^ (hmin - 1));
+
+		/* up the smaller alignment until we have enough */
+		do {
+			if (halign >= hmaxa ||
+			    (walign <= halign && walign < wmaxa)) {
+				*w = clamp_align(*w, wmin, wmax, walign + 1);
+				walign = __ffs(*w);
+			} else {
+				*h = clamp_align(*h, hmin, hmax, halign + 1);
+				halign = __ffs(*h);
+			}
+		} while (halign + walign < salign);
+	}
+}
+EXPORT_SYMBOL_GPL(v4l_bound_align_image);
+#endif /* defined(CONFIG_I2C) */
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 95f4364322e..33a18426ab9 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -209,4 +209,14 @@ struct v4l2_routing {
 	u32 output;
 };
 
+/* ------------------------------------------------------------------------- */
+
+/* Miscellaneous helper functions */
+
+void v4l_bound_align_image(unsigned int *w, unsigned int wmin,
+			   unsigned int wmax, unsigned int walign,
+			   unsigned int *h, unsigned int hmin,
+			   unsigned int hmax, unsigned int halign,
+			   unsigned int salign);
+
 #endif /* V4L2_COMMON_H_ */
-- 
cgit v1.2.3-70-g09d2


From d5fdd6babcfc2b0e6a8da1acf492a69fb54b4c47 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Tue, 23 Jun 2009 04:31:07 -0700
Subject: ipv6: Use correct data types for ICMPv6 type and code

Change all the code that deals directly with ICMPv6 type and code
values to use u8 instead of a signed int as that's the actual data
type.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h  |  6 +++---
 include/net/protocol.h  |  2 +-
 include/net/rawv6.h     |  2 +-
 include/net/xfrm.h      |  2 +-
 net/dccp/ipv6.c         |  2 +-
 net/ipv6/ah6.c          |  2 +-
 net/ipv6/esp6.c         |  2 +-
 net/ipv6/icmp.c         | 12 ++++++------
 net/ipv6/ip6_tunnel.c   | 18 +++++++++---------
 net/ipv6/ipcomp6.c      |  2 +-
 net/ipv6/mip6.c         |  2 +-
 net/ipv6/raw.c          |  4 ++--
 net/ipv6/route.c        |  2 +-
 net/ipv6/tcp_ipv6.c     |  2 +-
 net/ipv6/tunnel6.c      |  2 +-
 net/ipv6/udp.c          |  6 +++---
 net/ipv6/udp_impl.h     |  2 +-
 net/ipv6/udplite.c      |  2 +-
 net/ipv6/xfrm6_tunnel.c |  2 +-
 net/sctp/ipv6.c         |  2 +-
 20 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 10d701eec48..b6a85183c33 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -175,16 +175,16 @@ struct icmp6_filter {
 
 
 extern void				icmpv6_send(struct sk_buff *skb,
-						    int type, int code,
+						    u8 type, u8 code,
 						    __u32 info, 
 						    struct net_device *dev);
 
 extern int				icmpv6_init(void);
-extern int				icmpv6_err_convert(int type, int code,
+extern int				icmpv6_err_convert(u8 type, u8 code,
 							   int *err);
 extern void				icmpv6_cleanup(void);
 extern void				icmpv6_param_prob(struct sk_buff *skb,
-							  int code, int pos);
+							  u8 code, int pos);
 
 struct flowi;
 struct in6_addr;
diff --git a/include/net/protocol.h b/include/net/protocol.h
index ffa5b8b1f1d..1089d5aabd4 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -53,7 +53,7 @@ struct inet6_protocol
 
 	void	(*err_handler)(struct sk_buff *skb,
 			       struct inet6_skb_parm *opt,
-			       int type, int code, int offset,
+			       u8 type, u8 code, int offset,
 			       __be32 info);
 
 	int	(*gso_send_check)(struct sk_buff *skb);
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index 8a22599f26b..f6b9b830df8 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -6,7 +6,7 @@
 #include <net/protocol.h>
 
 void raw6_icmp_error(struct sk_buff *, int nexthdr,
-		int type, int code, int inner_offset, __be32);
+		u8 type, u8 code, int inner_offset, __be32);
 int raw6_local_deliver(struct sk_buff *, int);
 
 extern int			rawv6_rcv(struct sock *sk,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 736bca45088..9e3a3f4c1f6 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1274,7 +1274,7 @@ struct xfrm_tunnel {
 struct xfrm6_tunnel {
 	int (*handler)(struct sk_buff *skb);
 	int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			   int type, int code, int offset, __be32 info);
+			   u8 type, u8 code, int offset, __be32 info);
 	struct xfrm6_tunnel *next;
 	int priority;
 };
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 05ea7440d9e..3e70faab298 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -85,7 +85,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
 }
 
 static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			int type, int code, int offset, __be32 info)
+			u8 type, u8 code, int offset, __be32 info)
 {
 	struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
 	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 52449f7a1b7..86f42a288c4 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -405,7 +405,7 @@ out:
 }
 
 static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		    int type, int code, int offset, __be32 info)
+		    u8 type, u8 code, int offset, __be32 info)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index c2f250150db..678bb95b152 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -354,7 +354,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		     int type, int code, int offset, __be32 info)
+		     u8 type, u8 code, int offset, __be32 info)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36dff880718..eab62a7a8f0 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -117,7 +117,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 /*
  * Slightly more convenient version of icmpv6_send.
  */
-void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
+void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
 	kfree_skb(skb);
@@ -161,7 +161,7 @@ static int is_ineligible(struct sk_buff *skb)
 /*
  * Check the ICMP output rate limit
  */
-static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
+static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type,
 				     struct flowi *fl)
 {
 	struct dst_entry *dst;
@@ -305,7 +305,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {}
 /*
  *	Send an ICMP message in response to a packet in error
  */
-void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 		 struct net_device *dev)
 {
 	struct net *net = dev_net(skb->dev);
@@ -590,7 +590,7 @@ out:
 	icmpv6_xmit_unlock(sk);
 }
 
-static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
+static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
 	struct inet6_protocol *ipprot;
 	int inner_offset;
@@ -643,7 +643,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct in6_addr *saddr, *daddr;
 	struct ipv6hdr *orig_hdr;
 	struct icmp6hdr *hdr;
-	int type;
+	u8 type;
 
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		struct sec_path *sp = skb_sec_path(skb);
@@ -914,7 +914,7 @@ static const struct icmp6_err {
 	},
 };
 
-int icmpv6_err_convert(int type, int code, int *err)
+int icmpv6_err_convert(u8 type, u8 code, int *err)
 {
 	int fatal = 0;
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 404d16a97d5..51f410e7775 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -394,13 +394,13 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 
 static int
 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
-	    int *type, int *code, int *msg, __u32 *info, int offset)
+	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 {
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
 	struct ip6_tnl *t;
 	int rel_msg = 0;
-	int rel_type = ICMPV6_DEST_UNREACH;
-	int rel_code = ICMPV6_ADDR_UNREACH;
+	u8 rel_type = ICMPV6_DEST_UNREACH;
+	u8 rel_code = ICMPV6_ADDR_UNREACH;
 	__u32 rel_info = 0;
 	__u16 len;
 	int err = -ENOENT;
@@ -488,11 +488,11 @@ out:
 
 static int
 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-	   int type, int code, int offset, __be32 info)
+	   u8 type, u8 code, int offset, __be32 info)
 {
 	int rel_msg = 0;
-	int rel_type = type;
-	int rel_code = code;
+	u8 rel_type = type;
+	u8 rel_code = code;
 	__u32 rel_info = ntohl(info);
 	int err;
 	struct sk_buff *skb2;
@@ -586,11 +586,11 @@ out:
 
 static int
 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-	   int type, int code, int offset, __be32 info)
+	   u8 type, u8 code, int offset, __be32 info)
 {
 	int rel_msg = 0;
-	int rel_type = type;
-	int rel_code = code;
+	u8 rel_type = type;
+	u8 rel_code = code;
 	__u32 rel_info = ntohl(info);
 	int err;
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 3a0b3be7ece..79c172f1ff0 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -51,7 +51,7 @@
 #include <linux/mutex.h>
 
 static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-				int type, int code, int offset, __be32 info)
+				u8 type, u8 code, int offset, __be32 info)
 {
 	__be32 spi;
 	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f995e19c87a..f797e8c6f3b 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -54,7 +54,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
 	return data + padlen;
 }
 
-static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos)
+static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
 	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8b0b6f94806..d6c3c1c34b2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -310,7 +310,7 @@ out:
 
 static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 	       struct inet6_skb_parm *opt,
-	       int type, int code, int offset, __be32 info)
+	       u8 type, u8 code, int offset, __be32 info)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -343,7 +343,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 }
 
 void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
-		int type, int code, int inner_offset, __be32 info)
+		u8 type, u8 code, int inner_offset, __be32 info)
 {
 	struct sock *sk;
 	int hash;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 658293ea05b..1473ee0a1f5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1865,7 +1865,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
  *	Drop the packet on the floor
  */
 
-static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
+static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 {
 	int type;
 	struct dst_entry *dst = skb_dst(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 53b6a4192b1..58810c65b63 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -317,7 +317,7 @@ failure:
 }
 
 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		int type, int code, int offset, __be32 info)
+		u8 type, u8 code, int offset, __be32 info)
 {
 	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 669f280989c..633ad789eff 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -124,7 +124,7 @@ drop:
 }
 
 static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			int type, int code, int offset, __be32 info)
+			u8 type, u8 code, int offset, __be32 info)
 {
 	struct xfrm6_tunnel *handler;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 023beda6b22..33b59bd92c4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -312,7 +312,7 @@ csum_copy_err:
 }
 
 void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		    int type, int code, int offset, __be32 info,
+		    u8 type, u8 code, int offset, __be32 info,
 		    struct udp_table *udptable)
 {
 	struct ipv6_pinfo *np;
@@ -346,8 +346,8 @@ out:
 }
 
 static __inline__ void udpv6_err(struct sk_buff *skb,
-				 struct inet6_skb_parm *opt, int type,
-				 int code, int offset, __be32 info     )
+				 struct inet6_skb_parm *opt, u8 type,
+				 u8 code, int offset, __be32 info     )
 {
 	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 23779208c33..6bb303471e2 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -9,7 +9,7 @@
 
 extern int  	__udp6_lib_rcv(struct sk_buff *, struct udp_table *, int );
 extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
-			       int , int , int , __be32 , struct udp_table *);
+			       u8 , u8 , int , __be32 , struct udp_table *);
 
 extern int	udp_v6_get_port(struct sock *sk, unsigned short snum);
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index ba162a82458..4818c48688f 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -20,7 +20,7 @@ static int udplitev6_rcv(struct sk_buff *skb)
 
 static void udplitev6_err(struct sk_buff *skb,
 			  struct inet6_skb_parm *opt,
-			  int type, int code, int offset, __be32 info)
+			  u8 type, u8 code, int offset, __be32 info)
 {
 	__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
 }
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 80193db224d..81a95c00e50 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -262,7 +262,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			    int type, int code, int offset, __be32 info)
+			    u8 type, u8 code, int offset, __be32 info)
 {
 	/* xfrm6_tunnel native err handling */
 	switch (type) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a63de3f7f18..6a4b1909414 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -133,7 +133,7 @@ static struct notifier_block sctp_inet6addr_notifier = {
 
 /* ICMP error handler. */
 SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			     int type, int code, int offset, __be32 info)
+			     u8 type, u8 code, int offset, __be32 info)
 {
 	struct inet6_dev *idev;
 	struct sock *sk;
-- 
cgit v1.2.3-70-g09d2


From 0634a632f5dea8281ae7c9a96800582ff9eb1475 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 23 Jun 2009 13:51:19 +0200
Subject: asm-generic: add dummy pgprot_noncached()

Most architectures now provide a pgprot_noncached(), the
remaining ones can simply use an dummy default implementation,
except for cris and xtensa, which should override the
default appropriately.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Magnus Damm <magnus.damm@gmail.com>
---
 include/asm-generic/pgtable.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index e410f602cab..e2bd73e8f9c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
 
+#ifndef pgprot_noncached
+#define pgprot_noncached(prot)	(prot)
+#endif
+
 #ifndef pgprot_writecombine
 #define pgprot_writecombine pgprot_noncached
 #endif
-- 
cgit v1.2.3-70-g09d2


From 788c7df451467df71638dd79a2d63d78c6e13b9c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Tue, 23 Jun 2009 13:49:05 +0100
Subject: hugetlb: fault flags instead of write_access

handle_mm_fault() is now passing fault flags rather than write_access
down to hugetlb_fault(), so better recognize that in hugetlb_fault(),
and in hugetlb_no_page().

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  4 ++--
 mm/hugetlb.c            | 17 +++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a05a5ef3339..2723513a565 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -33,7 +33,7 @@ void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(int, char *);
 unsigned long hugetlb_total_pages(void);
 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, int write_access);
+			unsigned long address, unsigned int flags);
 int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
 						int acctflags);
@@ -98,7 +98,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 #define pud_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
-#define hugetlb_fault(mm, vma, addr, write)	({ BUG(); 0; })
+#define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 
 #define hugetlb_change_protection(vma, address, end, newprot)
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a56e6f3ce97..d0351e31f47 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1985,7 +1985,7 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
 }
 
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, pte_t *ptep, int write_access)
+			unsigned long address, pte_t *ptep, unsigned int flags)
 {
 	struct hstate *h = hstate_vma(vma);
 	int ret = VM_FAULT_SIGBUS;
@@ -2053,7 +2053,7 @@ retry:
 	 * any allocations necessary to record that reservation occur outside
 	 * the spinlock.
 	 */
-	if (write_access && !(vma->vm_flags & VM_SHARED))
+	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
 		if (vma_needs_reservation(h, vma, address) < 0) {
 			ret = VM_FAULT_OOM;
 			goto backout_unlocked;
@@ -2072,7 +2072,7 @@ retry:
 				&& (vma->vm_flags & VM_SHARED)));
 	set_huge_pte_at(mm, address, ptep, new_pte);
 
-	if (write_access && !(vma->vm_flags & VM_SHARED)) {
+	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
 		/* Optimization, do the COW without a second fault */
 		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
 	}
@@ -2091,7 +2091,7 @@ backout_unlocked:
 }
 
 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address, int write_access)
+			unsigned long address, unsigned int flags)
 {
 	pte_t *ptep;
 	pte_t entry;
@@ -2112,7 +2112,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	mutex_lock(&hugetlb_instantiation_mutex);
 	entry = huge_ptep_get(ptep);
 	if (huge_pte_none(entry)) {
-		ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
+		ret = hugetlb_no_page(mm, vma, address, ptep, flags);
 		goto out_mutex;
 	}
 
@@ -2126,7 +2126,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * page now as it is used to determine if a reservation has been
 	 * consumed.
 	 */
-	if (write_access && !pte_write(entry)) {
+	if ((flags & FAULT_FLAG_WRITE) && !pte_write(entry)) {
 		if (vma_needs_reservation(h, vma, address) < 0) {
 			ret = VM_FAULT_OOM;
 			goto out_mutex;
@@ -2143,7 +2143,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out_page_table_lock;
 
 
-	if (write_access) {
+	if (flags & FAULT_FLAG_WRITE) {
 		if (!pte_write(entry)) {
 			ret = hugetlb_cow(mm, vma, address, ptep, entry,
 							pagecache_page);
@@ -2152,7 +2152,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		entry = pte_mkdirty(entry);
 	}
 	entry = pte_mkyoung(entry);
-	if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access))
+	if (huge_ptep_set_access_flags(vma, address, ptep, entry,
+						flags & FAULT_FLAG_WRITE))
 		update_mmu_cache(vma, address, entry);
 
 out_page_table_lock:
-- 
cgit v1.2.3-70-g09d2


From a5c9b696ec109bb54d547fdb437a7a0c2d514670 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Tue, 23 Jun 2009 12:36:58 -0700
Subject: mm: pass mm to grab_swap_token

If a kthread happens to use get_user_pages() on an mm (as KSM does),
there's a chance that it will end up trying to read in a swap page, then
oops in grab_swap_token() because the kthread has no mm: GUP passes down
the right mm, so grab_swap_token() ought to be using it.

We have not identified a stronger case than KSM's daemon (not yet in
mainline), but the issue must have come up before, since RHEL has included
a fix for this for years (though a different fix, they just back out of
grab_swap_token if current->mm is unset: which is what we first proposed,
but using the right mm here seems more correct).

Reported-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 12 ++++++------
 mm/memory.c          |  2 +-
 mm/thrash.c          | 32 +++++++++++++++-----------------
 3 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c88b36665f7..7c15334f3ff 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -298,8 +298,8 @@ extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
 /* linux/mm/thrash.c */
-extern struct mm_struct * swap_token_mm;
-extern void grab_swap_token(void);
+extern struct mm_struct *swap_token_mm;
+extern void grab_swap_token(struct mm_struct *);
 extern void __put_swap_token(struct mm_struct *);
 
 static inline int has_swap_token(struct mm_struct *mm)
@@ -419,10 +419,10 @@ static inline swp_entry_t get_swap_page(void)
 }
 
 /* linux/mm/thrash.c */
-#define put_swap_token(x) do { } while(0)
-#define grab_swap_token()  do { } while(0)
-#define has_swap_token(x) 0
-#define disable_swap_token() do { } while(0)
+#define put_swap_token(mm)	do { } while (0)
+#define grab_swap_token(mm)	do { } while (0)
+#define has_swap_token(mm)	0
+#define disable_swap_token()	do { } while (0)
 
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
diff --git a/mm/memory.c b/mm/memory.c
index 50da9511aa7..f46ac18ba23 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2519,7 +2519,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 	page = lookup_swap_cache(entry);
 	if (!page) {
-		grab_swap_token(); /* Contend for token _before_ read-in */
+		grab_swap_token(mm); /* Contend for token _before_ read-in */
 		page = swapin_readahead(entry,
 					GFP_HIGHUSER_MOVABLE, vma, address);
 		if (!page) {
diff --git a/mm/thrash.c b/mm/thrash.c
index c4c5205a9c3..2372d4ed5dd 100644
--- a/mm/thrash.c
+++ b/mm/thrash.c
@@ -26,47 +26,45 @@ static DEFINE_SPINLOCK(swap_token_lock);
 struct mm_struct *swap_token_mm;
 static unsigned int global_faults;
 
-void grab_swap_token(void)
+void grab_swap_token(struct mm_struct *mm)
 {
 	int current_interval;
 
 	global_faults++;
 
-	current_interval = global_faults - current->mm->faultstamp;
+	current_interval = global_faults - mm->faultstamp;
 
 	if (!spin_trylock(&swap_token_lock))
 		return;
 
 	/* First come first served */
 	if (swap_token_mm == NULL) {
-		current->mm->token_priority = current->mm->token_priority + 2;
-		swap_token_mm = current->mm;
+		mm->token_priority = mm->token_priority + 2;
+		swap_token_mm = mm;
 		goto out;
 	}
 
-	if (current->mm != swap_token_mm) {
-		if (current_interval < current->mm->last_interval)
-			current->mm->token_priority++;
+	if (mm != swap_token_mm) {
+		if (current_interval < mm->last_interval)
+			mm->token_priority++;
 		else {
-			if (likely(current->mm->token_priority > 0))
-				current->mm->token_priority--;
+			if (likely(mm->token_priority > 0))
+				mm->token_priority--;
 		}
 		/* Check if we deserve the token */
-		if (current->mm->token_priority >
-				swap_token_mm->token_priority) {
-			current->mm->token_priority += 2;
-			swap_token_mm = current->mm;
+		if (mm->token_priority > swap_token_mm->token_priority) {
+			mm->token_priority += 2;
+			swap_token_mm = mm;
 		}
 	} else {
 		/* Token holder came in again! */
-		current->mm->token_priority += 2;
+		mm->token_priority += 2;
 	}
 
 out:
-	current->mm->faultstamp = global_faults;
-	current->mm->last_interval = current_interval;
+	mm->faultstamp = global_faults;
+	mm->last_interval = current_interval;
 	spin_unlock(&swap_token_lock);
-return;
 }
 
 /* Called on process exit. */
-- 
cgit v1.2.3-70-g09d2


From 01ff53f416757da416413bc32229770a8448b6ef Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 23 Jun 2009 12:37:01 -0700
Subject: rmap: fixup page_referenced() for nommu systems

After the recent changes that went into mm/vmscan.c to overhaul stuff, we
ended up with these warnings on no-mmu systems:

  mm/vmscan.c: In function `shrink_page_list':
  mm/vmscan.c:580: warning: unused variable `vm_flags'
  mm/vmscan.c: In function `shrink_active_list':
  mm/vmscan.c:1294: warning: `vm_flags' may be used uninitialized in this function
  mm/vmscan.c:1242: note: `vm_flags' was declared here

This is because the no-mmu function defines page_referenced() to work on
the first argument only (the page).  It does not clear the vm_flags given
to it because for no-mmu systems, they never actually get utilized.  Since
that is no longer strictly true, we need to set vm_flags to 0 like
everyone else so gcc can do proper dead code elimination without annoying
us with unused warnings.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: David Howells <dhowells@redhat.com>
Acked-by: David McCullough <davidm@snapgear.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 216d024f830..bf116d0dbf2 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -118,7 +118,14 @@ int try_to_munlock(struct page *);
 #define anon_vma_prepare(vma)	(0)
 #define anon_vma_link(vma)	do {} while (0)
 
-#define page_referenced(page, locked, cnt, flags) TestClearPageReferenced(page)
+static inline int page_referenced(struct page *page, int is_locked,
+				  struct mem_cgroup *cnt,
+				  unsigned long *vm_flags)
+{
+	*vm_flags = 0;
+	return TestClearPageReferenced(page);
+}
+
 #define try_to_unmap(page, refs) SWAP_FAIL
 
 static inline int page_mkclean(struct page *page)
-- 
cgit v1.2.3-70-g09d2


From f007e99c8e2e322b8331aba72414715119a2920d Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Sat, 23 May 2009 00:41:15 +0800
Subject: Intel-IOMMU, intr-remap: source-id checking

To support domain-isolation usages, the platform hardware must be
capable of uniquely identifying the requestor (source-id) for each
interrupt message. Without source-id checking for interrupt remapping
, a rouge guest/VM with assigned devices can launch interrupt attacks
to bring down anothe guest/VM or the VMM itself.

This patch adds source-id checking for interrupt remapping, and then
really isolates interrupts for guests/VMs with assigned devices.

Because PCI subsystem is not initialized yet when set up IOAPIC
entries, use read_pci_config_byte to access PCI config space directly.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/x86/kernel/apic/io_apic.c |   6 +++
 drivers/pci/intr_remapping.c   | 120 +++++++++++++++++++++++++++++++++++++++--
 drivers/pci/intr_remapping.h   |   2 +
 include/linux/dmar.h           |  11 ++++
 4 files changed, 136 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b7a79207295..4d0216fcb36 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1414,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq,
 		irte.vector = vector;
 		irte.dest_id = IRTE_DEST(destination);
 
+		/* Set source-id of interrupt request */
+		set_ioapic_sid(&irte, apic_id);
+
 		modify_irte(irq, &irte);
 
 		ir_entry->index2 = (index >> 15) & 0x1;
@@ -3290,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 		irte.vector = cfg->vector;
 		irte.dest_id = IRTE_DEST(dest);
 
+		/* Set source-id of interrupt request */
+		set_msi_sid(&irte, pdev);
+
 		modify_irte(irq, &irte);
 
 		msg->address_hi = MSI_ADDR_BASE_HI;
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 44025a0c2bb..4f5b8712931 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -10,6 +10,8 @@
 #include <linux/intel-iommu.h>
 #include "intr_remapping.h"
 #include <acpi/acpi.h>
+#include <asm/pci-direct.h>
+#include "pci.h"
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
@@ -418,6 +420,91 @@ int free_irte(int irq)
 	return rc;
 }
 
+/*
+ * source validation type
+ */
+#define SVT_NO_VERIFY		0x0  /* no verification is required */
+#define SVT_VERIFY_SID_SQ	0x1  /* verify using SID and SQ fiels */
+#define SVT_VERIFY_BUS		0x2  /* verify bus of request-id */
+
+/*
+ * source-id qualifier
+ */
+#define SQ_ALL_16	0x0  /* verify all 16 bits of request-id */
+#define SQ_13_IGNORE_1	0x1  /* verify most significant 13 bits, ignore
+			      * the third least significant bit
+			      */
+#define SQ_13_IGNORE_2	0x2  /* verify most significant 13 bits, ignore
+			      * the second and third least significant bits
+			      */
+#define SQ_13_IGNORE_3	0x3  /* verify most significant 13 bits, ignore
+			      * the least three significant bits
+			      */
+
+/*
+ * set SVT, SQ and SID fields of irte to verify
+ * source ids of interrupt requests
+ */
+static void set_irte_sid(struct irte *irte, unsigned int svt,
+			 unsigned int sq, unsigned int sid)
+{
+	irte->svt = svt;
+	irte->sq = sq;
+	irte->sid = sid;
+}
+
+int set_ioapic_sid(struct irte *irte, int apic)
+{
+	int i;
+	u16 sid = 0;
+
+	if (!irte)
+		return -1;
+
+	for (i = 0; i < MAX_IO_APICS; i++) {
+		if (ir_ioapic[i].id == apic) {
+			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
+			break;
+		}
+	}
+
+	if (sid == 0) {
+		pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
+		return -1;
+	}
+
+	set_irte_sid(irte, 1, 0, sid);
+
+	return 0;
+}
+
+int set_msi_sid(struct irte *irte, struct pci_dev *dev)
+{
+	struct pci_dev *bridge;
+
+	if (!irte || !dev)
+		return -1;
+
+	/* PCIe device or Root Complex integrated PCI device */
+	if (dev->is_pcie || !dev->bus->parent) {
+		set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+			     (dev->bus->number << 8) | dev->devfn);
+		return 0;
+	}
+
+	bridge = pci_find_upstream_pcie_bridge(dev);
+	if (bridge) {
+		if (bridge->is_pcie) /* this is a PCIE-to-PCI/PCIX bridge */
+			set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
+				(bridge->bus->number << 8) | dev->bus->number);
+		else /* this is a legacy PCI bridge */
+			set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+				(bridge->bus->number << 8) | bridge->devfn);
+	}
+
+	return 0;
+}
+
 static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
 {
 	u64 addr;
@@ -624,6 +711,35 @@ error:
 	return -1;
 }
 
+static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
+				      struct intel_iommu *iommu)
+{
+	struct acpi_dmar_pci_path *path;
+	u8 bus;
+	int count;
+
+	bus = scope->bus;
+	path = (struct acpi_dmar_pci_path *)(scope + 1);
+	count = (scope->length - sizeof(struct acpi_dmar_device_scope))
+		/ sizeof(struct acpi_dmar_pci_path);
+
+	while (--count > 0) {
+		/*
+		 * Access PCI directly due to the PCI
+		 * subsystem isn't initialized yet.
+		 */
+		bus = read_pci_config_byte(bus, path->dev, path->fn,
+					   PCI_SECONDARY_BUS);
+		path++;
+	}
+
+	ir_ioapic[ir_ioapic_num].bus   = bus;
+	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->dev, path->fn);
+	ir_ioapic[ir_ioapic_num].iommu = iommu;
+	ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
+	ir_ioapic_num++;
+}
+
 static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
 				 struct intel_iommu *iommu)
 {
@@ -648,9 +764,7 @@ static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
 			       " 0x%Lx\n", scope->enumeration_id,
 			       drhd->address);
 
-			ir_ioapic[ir_ioapic_num].iommu = iommu;
-			ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
-			ir_ioapic_num++;
+			ir_parse_one_ioapic_scope(scope, iommu);
 		}
 		start += scope->length;
 	}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
index ca48f0df8ac..63a263c1841 100644
--- a/drivers/pci/intr_remapping.h
+++ b/drivers/pci/intr_remapping.h
@@ -3,6 +3,8 @@
 struct ioapic_scope {
 	struct intel_iommu *iommu;
 	unsigned int id;
+	unsigned int bus;	/* PCI bus number */
+	unsigned int devfn;	/* PCI devfn number */
 };
 
 #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 1731fb5fd77..4a2b162c256 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -126,6 +126,8 @@ extern int free_irte(int irq);
 extern int irq_remapped(int irq);
 extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
+extern int set_ioapic_sid(struct irte *irte, int apic);
+extern int set_msi_sid(struct irte *irte, struct pci_dev *dev);
 #else
 static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
@@ -156,6 +158,15 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
 {
 	return NULL;
 }
+static inline int set_ioapic_sid(struct irte *irte, int apic)
+{
+	return 0;
+}
+static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
+{
+	return 0;
+}
+
 #define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
 #define disable_intr_remapping()	(0)
-- 
cgit v1.2.3-70-g09d2


From d55d87fdff8252d0e2f7c28c2d443aee17e9d70f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 22 Jun 2009 02:25:25 +0000
Subject: net: Move rx skb_orphan call to where needed

In order to get the tun driver to account packets, we need to be
able to receive packets with destructors set.  To be on the safe
side, I added an skb_orphan call for all protocols by default since
some of them (IP in particular) cannot handle receiving packets
destructors properly.

Now it seems that at least one protocol (CAN) expects to be able
to pass skb->sk through the rx path without getting clobbered.

So this patch attempts to fix this properly by moving the skb_orphan
call to where it's actually needed.  In particular, I've added it
to skb_set_owner_[rw] which is what most users of skb->destructor
call.

This is actually an improvement for tun too since it means that
we only give back the amount charged to the socket when the skb
is passed to another socket that will also be charged accordingly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Oliver Hartkopp <olver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h      | 1 +
 include/net/sock.h           | 2 ++
 net/ax25/ax25_in.c           | 3 +--
 net/core/dev.c               | 2 --
 net/irda/af_irda.c           | 3 ---
 net/irda/ircomm/ircomm_lmp.c | 1 +
 6 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 9f80a766828..d16a304cbed 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -448,6 +448,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
 	struct sctp_ulpevent *event = sctp_skb2event(skb);
 
+	skb_orphan(skb);
 	skb->sk = sk;
 	skb->destructor = sctp_sock_rfree;
 	atomic_add(event->rmem_len, &sk->sk_rmem_alloc);
diff --git a/include/net/sock.h b/include/net/sock.h
index 570c7a12b54..7f5c41cc45a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1250,6 +1250,7 @@ static inline int sk_has_allocations(const struct sock *sk)
 
 static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 {
+	skb_orphan(skb);
 	skb->sk = sk;
 	skb->destructor = sock_wfree;
 	/*
@@ -1262,6 +1263,7 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 
 static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
+	skb_orphan(skb);
 	skb->sk = sk;
 	skb->destructor = sock_rfree;
 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 5f1d2107a1d..de56d3983de 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -437,8 +437,7 @@ free:
 int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev)
 {
-	skb->sk = NULL;		/* Initially we don't know who it's for */
-	skb->destructor = NULL;	/* Who initializes this, dammit?! */
+	skb_orphan(skb);
 
 	if (!net_eq(dev_net(dev), &init_net)) {
 		kfree_skb(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index baf2dc13a34..60b57281227 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2310,8 +2310,6 @@ ncls:
 	if (!skb)
 		goto out;
 
-	skb_orphan(skb);
-
 	type = skb->protocol;
 	list_for_each_entry_rcu(ptype,
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 5922febe25c..cb762c8723e 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -913,9 +913,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	/* Clean up the original one to keep it in listen state */
 	irttp_listen(self->tsap);
 
-	/* Wow ! What is that ? Jean II */
-	skb->sk = NULL;
-	skb->destructor = NULL;
 	kfree_skb(skb);
 	sk->sk_ack_backlog--;
 
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
index 67c99d20857..7ba96618660 100644
--- a/net/irda/ircomm/ircomm_lmp.c
+++ b/net/irda/ircomm/ircomm_lmp.c
@@ -196,6 +196,7 @@ static int ircomm_lmp_data_request(struct ircomm_cb *self,
 	/* Don't forget to refcount it - see ircomm_tty_do_softint() */
 	skb_get(skb);
 
+	skb_orphan(skb);
 	skb->destructor = ircomm_lmp_flow_control;
 
 	if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) {
-- 
cgit v1.2.3-70-g09d2


From 86e437f077c68112edcb6854ec036ed7e3f9a7f3 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Tue, 16 Jun 2009 11:23:13 +0800
Subject: ACPI: Add the reference count to avoid unloading ACPI video bus twice

Sometimes both acpi video and i915 driver are compiled as modules.
And there exists the strict dependency between the two drivers.
The acpi video bus will be unloaded in course of unloading the i915 driver.
If we unload the acpi video driver, then the kernel oops will be triggered.

Add the reference count to avoid unloading the ACPI video bus twice.
The reference count should be checked before unregistering the acpi video bus.
If the reference count is already zero, it won't unregister it again.
And after the acpi video bus is already unregistered, the reference count
will be set to zero.

http://bugzilla.kernel.org/show_bug.cgi?id=13396

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/video.c                 | 41 ++++++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_opregion.c |  2 +-
 include/acpi/video.h                 |  4 ++--
 3 files changed, 38 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 1bdfb37377e..a63566ba230 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -76,6 +76,7 @@ MODULE_LICENSE("GPL");
 static int brightness_switch_enabled = 1;
 module_param(brightness_switch_enabled, bool, 0644);
 
+static int register_count = 0;
 static int acpi_video_bus_add(struct acpi_device *device);
 static int acpi_video_bus_remove(struct acpi_device *device, int type);
 static int acpi_video_resume(struct acpi_device *device);
@@ -2318,6 +2319,13 @@ static int __init intel_opregion_present(void)
 int acpi_video_register(void)
 {
 	int result = 0;
+	if (register_count) {
+		/*
+		 * if the function of acpi_video_register is already called,
+		 * don't register the acpi_vide_bus again and return no error.
+		 */
+		return 0;
+	}
 
 	acpi_video_dir = proc_mkdir(ACPI_VIDEO_CLASS, acpi_root_dir);
 	if (!acpi_video_dir)
@@ -2329,10 +2337,35 @@ int acpi_video_register(void)
 		return -ENODEV;
 	}
 
+	/*
+	 * When the acpi_video_bus is loaded successfully, increase
+	 * the counter reference.
+	 */
+	register_count = 1;
+
 	return 0;
 }
 EXPORT_SYMBOL(acpi_video_register);
 
+void acpi_video_unregister(void)
+{
+	if (!register_count) {
+		/*
+		 * If the acpi video bus is already unloaded, don't
+		 * unload it again and return directly.
+		 */
+		return;
+	}
+	acpi_bus_unregister_driver(&acpi_video_bus);
+
+	remove_proc_entry(ACPI_VIDEO_CLASS, acpi_root_dir);
+
+	register_count = 0;
+
+	return;
+}
+EXPORT_SYMBOL(acpi_video_unregister);
+
 /*
  * This is kind of nasty. Hardware using Intel chipsets may require
  * the video opregion code to be run first in order to initialise
@@ -2350,16 +2383,12 @@ static int __init acpi_video_init(void)
 	return acpi_video_register();
 }
 
-void acpi_video_exit(void)
+static void __exit acpi_video_exit(void)
 {
-
-	acpi_bus_unregister_driver(&acpi_video_bus);
-
-	remove_proc_entry(ACPI_VIDEO_CLASS, acpi_root_dir);
+	acpi_video_unregister();
 
 	return;
 }
-EXPORT_SYMBOL(acpi_video_exit);
 
 module_init(acpi_video_init);
 module_exit(acpi_video_exit);
diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c
index dc425e74a26..e4b4e8898e3 100644
--- a/drivers/gpu/drm/i915/i915_opregion.c
+++ b/drivers/gpu/drm/i915/i915_opregion.c
@@ -419,7 +419,7 @@ void intel_opregion_free(struct drm_device *dev, int suspend)
 		return;
 
 	if (!suspend)
-		acpi_video_exit();
+		acpi_video_unregister();
 
 	opregion->acpi->drdy = 0;
 
diff --git a/include/acpi/video.h b/include/acpi/video.h
index af6fe95fd3d..cf7be3dd157 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -3,10 +3,10 @@
 
 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
 extern int acpi_video_register(void);
-extern int acpi_video_exit(void);
+extern void acpi_video_unregister(void);
 #else
 static inline int acpi_video_register(void) { return 0; }
-static inline void acpi_video_exit(void) { return; }
+static inline void acpi_video_unregister(void) { return; }
 #endif
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 9d9609851003ebed15957f0f2ce18492739ee124 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 11 Jun 2009 14:31:37 -0400
Subject: Audit: clean up all op= output to include string quoting

A number of places in the audit system we send an op= followed by a string
that includes spaces.  Somehow this works but it's just wrong.  This patch
moves all of those that I could find to be quoted.

Example:

Change From: type=CONFIG_CHANGE msg=audit(1244666690.117:31): auid=0 ses=1
subj=unconfined_u:unconfined_r:auditctl_t:s0-s0:c0.c1023 op=remove rule
key="number2" list=4 res=0

Change To: type=CONFIG_CHANGE msg=audit(1244666690.117:31): auid=0 ses=1
subj=unconfined_u:unconfined_r:auditctl_t:s0-s0:c0.c1023 op="remove rule"
key="number2" list=4 res=0

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  3 +++
 kernel/audit.c        |  9 +++++++++
 kernel/audit_tree.c   | 10 ++++------
 kernel/audit_watch.c  |  6 +-----
 kernel/auditfilter.c  | 12 +++++-------
 kernel/auditsc.c      |  8 ++------
 6 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4fa2810b675..3c7a358241a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -599,6 +599,8 @@ extern void		    audit_log_untrustedstring(struct audit_buffer *ab,
 extern void		    audit_log_d_path(struct audit_buffer *ab,
 					     const char *prefix,
 					     struct path *path);
+extern void		    audit_log_key(struct audit_buffer *ab,
+					  char *key);
 extern void		    audit_log_lost(const char *message);
 extern int		    audit_update_lsm_rules(void);
 
@@ -621,6 +623,7 @@ extern int audit_enabled;
 #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0)
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_d_path(b, p, d) do { ; } while (0)
+#define audit_log_key(b, k) do { ; } while (0)
 #define audit_enabled 0
 #endif
 #endif
diff --git a/kernel/audit.c b/kernel/audit.c
index e07ad2340db..6194c50e203 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1450,6 +1450,15 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
 	kfree(pathname);
 }
 
+void audit_log_key(struct audit_buffer *ab, char *key)
+{
+	audit_log_format(ab, " key=");
+	if (key)
+		audit_log_untrustedstring(ab, key);
+	else
+		audit_log_format(ab, "(null)");
+}
+
 /**
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 1f6396d7668..3ff0731284a 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -441,13 +441,11 @@ static void kill_rules(struct audit_tree *tree)
 		if (rule->tree) {
 			/* not a half-baked one */
 			ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
-			audit_log_format(ab, "op=remove rule dir=");
+			audit_log_format(ab, "op=");
+			audit_log_string(ab, "remove rule");
+			audit_log_format(ab, " dir=");
 			audit_log_untrustedstring(ab, rule->tree->pathname);
-			if (rule->filterkey) {
-				audit_log_format(ab, " key=");
-				audit_log_untrustedstring(ab, rule->filterkey);
-			} else
-				audit_log_format(ab, " key=(null)");
+			audit_log_key(ab, rule->filterkey);
 			audit_log_format(ab, " list=%d res=1", rule->listnr);
 			audit_log_end(ab);
 			rule->tree = NULL;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index b49ab019fdf..0e96dbc60ea 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -234,11 +234,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
 		audit_log_string(ab, op);
 		audit_log_format(ab, " path=");
 		audit_log_untrustedstring(ab, w->path);
-		if (r->filterkey) {
-			audit_log_format(ab, " key=");
-			audit_log_untrustedstring(ab, r->filterkey);
-		} else
-			audit_log_format(ab, " key=(null)");
+		audit_log_key(ab, r->filterkey);
 		audit_log_format(ab, " list=%d res=1", r->listnr);
 		audit_log_end(ab);
 	}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 21b623595aa..a70604047f3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1079,11 +1079,9 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid,
 			security_release_secctx(ctx, len);
 		}
 	}
-	audit_log_format(ab, " op=%s rule key=", action);
-	if (rule->filterkey)
-		audit_log_untrustedstring(ab, rule->filterkey);
-	else
-		audit_log_format(ab, "(null)");
+	audit_log_format(ab, " op=");
+	audit_log_string(ab, action);
+	audit_log_key(ab, rule->filterkey);
 	audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
 	audit_log_end(ab);
 }
@@ -1147,7 +1145,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 			return PTR_ERR(entry);
 
 		err = audit_add_rule(entry);
-		audit_log_rule_change(loginuid, sessionid, sid, "add",
+		audit_log_rule_change(loginuid, sessionid, sid, "add rule",
 				      &entry->rule, !err);
 
 		if (err)
@@ -1163,7 +1161,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 			return PTR_ERR(entry);
 
 		err = audit_del_rule(entry);
-		audit_log_rule_change(loginuid, sessionid, sid, "remove",
+		audit_log_rule_change(loginuid, sessionid, sid, "remove rule",
 				      &entry->rule, !err);
 
 		audit_free_rule(entry);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 0b862cac6ca..2de95d1582b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1137,7 +1137,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
 		if (has_cntl)
 			audit_log_n_hex(*ab, buf, to_send);
 		else
-			audit_log_format(*ab, "\"%s\"", buf);
+			audit_log_string(*ab, buf);
 
 		p += to_send;
 		len_left -= to_send;
@@ -1372,11 +1372,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 
 	audit_log_task_info(ab, tsk);
-	if (context->filterkey) {
-		audit_log_format(ab, " key=");
-		audit_log_untrustedstring(ab, context->filterkey);
-	} else
-		audit_log_format(ab, " key=(null)");
+	audit_log_key(ab, context->filterkey);
 	audit_log_end(ab);
 
 	for (aux = context->aux; aux; aux = aux->next) {
-- 
cgit v1.2.3-70-g09d2


From 3e63cbb1efca7dd3137de1bb475e2e068e38ef23 Mon Sep 17 00:00:00 2001
From: Ankit Jain <me@ankitjain.org>
Date: Fri, 19 Jun 2009 14:28:07 -0400
Subject: fs: Add new pre-allocation ioctls to vfs for compatibility with
 legacy xfs ioctls

This patch adds ioctls to vfs for compatibility with legacy XFS
pre-allocation ioctls (XFS_IOC_*RESVP*). The implementation
effectively invokes sys_fallocate for the new ioctls.
Also handles the compat_ioctl case.
Note: These legacy ioctls are also implemented by OCFS2.

[AV: folded fixes from hch]

Signed-off-by: Ankit Jain <me@ankitjain.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat_ioctl.c      | 48 +++++++++++++++++++++++++++++++++++++++++
 fs/ioctl.c             | 35 ++++++++++++++++++++++++++++++
 fs/open.c              | 58 +++++++++++++++++++++++++-------------------------
 include/linux/falloc.h | 21 ++++++++++++++++++
 include/linux/fs.h     |  6 ++++++
 5 files changed, 139 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c135202c38b..626c7483b4d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -31,6 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vt.h>
+#include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/ppp_defs.h>
@@ -1779,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return sys_ioctl(fd, cmd, (unsigned long)tn);
 }
 
+/* on ia32 l_start is on a 32-bit boundary */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+struct space_resv_32 {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start	__attribute__((packed));
+			/* len == 0 means until end of file */
+	__s64		l_len __attribute__((packed));
+	__s32		l_sysid;
+	__u32		l_pid;
+	__s32		l_pad[4];	/* reserve area */
+};
+
+#define FS_IOC_RESVSP_32		_IOW ('X', 40, struct space_resv_32)
+#define FS_IOC_RESVSP64_32	_IOW ('X', 42, struct space_resv_32)
+
+/* just account for different alignment */
+static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
+{
+	struct space_resv_32	__user *p32 = (void __user *)arg;
+	struct space_resv	__user *p = compat_alloc_user_space(sizeof(*p));
+
+	if (copy_in_user(&p->l_type,	&p32->l_type,	sizeof(s16)) ||
+	    copy_in_user(&p->l_whence,	&p32->l_whence, sizeof(s16)) ||
+	    copy_in_user(&p->l_start,	&p32->l_start,	sizeof(s64)) ||
+	    copy_in_user(&p->l_len,	&p32->l_len,	sizeof(s64)) ||
+	    copy_in_user(&p->l_sysid,	&p32->l_sysid,	sizeof(s32)) ||
+	    copy_in_user(&p->l_pid,	&p32->l_pid,	sizeof(u32)) ||
+	    copy_in_user(&p->l_pad,	&p32->l_pad,	4*sizeof(u32)))
+		return -EFAULT;
+
+	return ioctl_preallocate(file, p);
+}
+#endif
+
 
 typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
 					unsigned long, struct file *);
@@ -2756,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	case FIOQSIZE:
 		break;
 
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+	case FS_IOC_RESVSP_32:
+	case FS_IOC_RESVSP64_32:
+		error = compat_ioctl_preallocate(filp, arg);
+		goto out_fput;
+#else
+	case FS_IOC_RESVSP:
+	case FS_IOC_RESVSP64:
+		error = ioctl_preallocate(filp, (void __user *)arg);
+		goto out_fput;
+#endif
+
 	case FIBMAP:
 	case FIGETBSZ:
 	case FIONREAD:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 001f8d3118f..5612880fcbe 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/uaccess.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
+#include <linux/falloc.h>
 
 #include <asm/ioctls.h>
 
@@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
 
+/*
+ * This provides compatibility with legacy XFS pre-allocation ioctls
+ * which predate the fallocate syscall.
+ *
+ * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
+ * are used here, rest are ignored.
+ */
+int ioctl_preallocate(struct file *filp, void __user *argp)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct space_resv sr;
+
+	if (copy_from_user(&sr, argp, sizeof(sr)))
+		return -EFAULT;
+
+	switch (sr.l_whence) {
+	case SEEK_SET:
+		break;
+	case SEEK_CUR:
+		sr.l_start += filp->f_pos;
+		break;
+	case SEEK_END:
+		sr.l_start += i_size_read(inode);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+}
+
 static int file_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 		return ioctl_fibmap(filp, p);
 	case FIONREAD:
 		return put_user(i_size_read(inode) - filp->f_pos, p);
+	case FS_IOC_RESVSP:
+	case FS_IOC_RESVSP64:
+		return ioctl_preallocate(filp, p);
 	}
 
 	return vfs_ioctl(filp, cmd, arg);
diff --git a/fs/open.c b/fs/open.c
index 7200e23d925..dd98e807602 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
 #endif
 #endif /* BITS_PER_LONG == 32 */
 
-SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+
+int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-	struct file *file;
-	struct inode *inode;
-	long ret = -EINVAL;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	long ret;
 
 	if (offset < 0 || len <= 0)
-		goto out;
+		return -EINVAL;
 
 	/* Return error if mode is not supported */
-	ret = -EOPNOTSUPP;
 	if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
-		goto out;
+		return -EOPNOTSUPP;
 
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
 	if (!(file->f_mode & FMODE_WRITE))
-		goto out_fput;
+		return -EBADF;
 	/*
 	 * Revalidate the write permissions, in case security policy has
 	 * changed since the files were opened.
 	 */
 	ret = security_file_permission(file, MAY_WRITE);
 	if (ret)
-		goto out_fput;
+		return ret;
 
-	inode = file->f_path.dentry->d_inode;
-
-	ret = -ESPIPE;
 	if (S_ISFIFO(inode->i_mode))
-		goto out_fput;
+		return -ESPIPE;
 
-	ret = -ENODEV;
 	/*
 	 * Let individual file system decide if it supports preallocation
 	 * for directories or not.
 	 */
 	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
-		goto out_fput;
+		return -ENODEV;
 
-	ret = -EFBIG;
 	/* Check for wrap through zero too */
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
-		goto out_fput;
+		return -EFBIG;
 
-	if (inode->i_op->fallocate)
-		ret = inode->i_op->fallocate(inode, mode, offset, len);
-	else
-		ret = -EOPNOTSUPP;
+	if (!inode->i_op->fallocate)
+		return -EOPNOTSUPP;
 
-out_fput:
-	fput(file);
-out:
-	return ret;
+	return inode->i_op->fallocate(inode, mode, offset, len);
 }
+
+SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
+{
+	struct file *file;
+	int error = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		error = do_fallocate(file, mode, offset, len);
+		fput(file);
+	}
+
+	return error;
+}
+
 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
 {
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 8e912ab6a07..3c155107d61 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -3,4 +3,25 @@
 
 #define FALLOC_FL_KEEP_SIZE	0x01 /* default is extend size */
 
+#ifdef __KERNEL__
+
+/*
+ * Space reservation ioctls and argument structure
+ * are designed to be compatible with the legacy XFS ioctls.
+ */
+struct space_resv {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start;
+	__s64		l_len;		/* len == 0 means until end of file */
+	__s32		l_sysid;
+	__u32		l_pid;
+	__s32		l_pad[4];	/* reserved area */
+};
+
+#define FS_IOC_RESVSP		_IOW('X', 40, struct space_resv)
+#define FS_IOC_RESVSP64		_IOW('X', 42, struct space_resv)
+
+#endif /* __KERNEL__ */
+
 #endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ff5e4e0195..79e302ddde0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1906,6 +1906,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
+extern int do_fallocate(struct file *file, int mode, loff_t offset,
+			loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
 			int mode);
 extern struct file *filp_open(const char *, int, int);
@@ -1914,6 +1916,10 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
 extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 
+/* fs/ioctl.c */
+
+extern int ioctl_preallocate(struct file *filp, void __user *argp);
+
 /* fs/dcache.c */
 extern void __init vfs_caches_init_early(void);
 extern void __init vfs_caches_init(unsigned long);
-- 
cgit v1.2.3-70-g09d2


From f19d4a8fa6f9b6ccf54df0971c97ffcaa390b7b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:50:45 -0400
Subject: add caching of ACLs in struct inode

No helpers, no conversions yet.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 10 ++++++++++
 include/linux/fs.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index f643be565df..e193cd592fa 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
+#include <linux/posix_acl.h>
 
 /*
  * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	}
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+#endif
 
 #ifdef CONFIG_FSNOTIFY
 	inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
 	ima_inode_free(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
+#ifdef CONFIG_FS_POSIX_ACL
+	if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
+		posix_acl_release(inode->i_acl);
+	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
+		posix_acl_release(inode->i_default_acl);
+#endif
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 79e302ddde0..0872372184f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -710,6 +710,9 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
 #define i_size_ordered_init(inode) do { } while (0)
 #endif
 
+struct posix_acl;
+#define ACL_NOT_CACHED ((void *)(-1))
+
 struct inode {
 	struct hlist_node	i_hash;
 	struct list_head	i_list;
@@ -772,6 +775,10 @@ struct inode {
 	atomic_t		i_writecount;
 #ifdef CONFIG_SECURITY
 	void			*i_security;
+#endif
+#ifdef CONFIG_FS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
 #endif
 	void			*i_private; /* fs or device private pointer */
 };
-- 
cgit v1.2.3-70-g09d2


From 6582a0e6f6bc7bf64817b9e1a424782855292ab0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:53:58 -0400
Subject: switch ext3 to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/acl.c             | 22 ++++++++++------------
 fs/ext3/acl.h             |  4 ----
 fs/ext3/inode.c           |  4 ----
 fs/ext3/super.c           | 16 ----------------
 include/linux/ext3_fs_i.h |  4 ----
 5 files changed, 10 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e0c74545171..a9707689d9e 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -134,7 +134,7 @@ ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 	if (acl) {
 		spin_lock(&inode->i_lock);
 		acl = *i_acl;
-		if (acl != EXT3_ACL_NOT_CACHED)
+		if (acl != ACL_NOT_CACHED)
 			acl = posix_acl_dup(acl);
 		spin_unlock(&inode->i_lock);
 	}
@@ -147,7 +147,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
                   struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -161,7 +161,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
 static struct posix_acl *
 ext3_get_acl(struct inode *inode, int type)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -172,15 +171,15 @@ ext3_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext3_iget_acl(inode, &inode->i_acl);
+			if (acl != ACL_NOT_CACHED)
 				return acl;
 			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext3_iget_acl(inode, &inode->i_default_acl);
+			if (acl != ACL_NOT_CACHED)
 				return acl;
 			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			break;
@@ -206,11 +205,11 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext3_iset_acl(inode, &inode->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext3_iset_acl(inode, &inode->i_default_acl, acl);
 				break;
 		}
 	}
@@ -226,7 +225,6 @@ static int
 ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -274,11 +272,11 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 	if (!error) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext3_iset_acl(inode, &inode->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext3_iset_acl(inode, &inode->i_default_acl, acl);
 				break;
 		}
 	}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 42da16b8cac..07d15a3a596 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
 
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 
-/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
-   if the ACL has not been cached */
-#define EXT3_ACL_NOT_CACHED ((void *)-1)
-
 /* acl.c */
 extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 05dea8132fc..5f51fed5c75 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 		return inode;
 
 	ei = EXT3_I(inode);
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 
 	ret = __ext3_get_inode_loc(inode, &iloc, 0);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 601e881e610..524b349c629 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
-#endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
 static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	if (EXT3_I(inode)->i_acl &&
-			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_acl);
-		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-	}
-	if (EXT3_I(inode)->i_default_acl &&
-			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_default_acl);
-		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
-	}
-#endif
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 7894dd0f3b7..ca1bfe90004 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -103,10 +103,6 @@ struct ext3_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
 
 	struct list_head i_orphan;	/* unlinked but open inodes */
 
-- 
cgit v1.2.3-70-g09d2


From 7a77b15d9294749809de918e24bebc39e0fbc9ab Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 21:01:13 -0400
Subject: switch reiserfs to usual conventions for caching ACLs

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/super.c          | 12 ++++++------
 fs/reiserfs/xattr_acl.c      | 21 ++++++++-------------
 include/linux/reiserfs_acl.h |  4 ++--
 3 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2969773cfc2..b194451fc04 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -530,8 +530,8 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	inode_init_once(&ei->vfs_inode);
 #ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	ei->i_acl_access = NULL;
-	ei->i_acl_default = NULL;
+	ei->i_acl_access = ACL_NOT_CACHED;
+	ei->i_acl_default = ACL_NOT_CACHED;
 #endif
 }
 
@@ -586,14 +586,14 @@ static void reiserfs_clear_inode(struct inode *inode)
 	struct posix_acl *acl;
 
 	acl = REISERFS_I(inode)->i_acl_access;
-	if (acl && !IS_ERR(acl))
+	if (acl && acl != ACL_NOT_CACHED)
 		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_access = NULL;
+	REISERFS_I(inode)->i_acl_access = ACL_NOT_CACHED;
 
 	acl = REISERFS_I(inode)->i_acl_default;
-	if (acl && !IS_ERR(acl))
+	if (acl && acl != ACL_NOT_CACHED)
 		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_default = NULL;
+	REISERFS_I(inode)->i_acl_default = ACL_NOT_CACHED;
 }
 #else
 #define reiserfs_clear_inode NULL
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a1a7e3530e1..7b3aeb9327d 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -192,19 +192,19 @@ static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
 			    struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != ERR_PTR(-ENODATA))
+	if (*i_acl != ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
-	*i_acl = acl ? posix_acl_dup(acl) : ERR_PTR(-ENODATA);
+	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
 }
 
 static inline struct posix_acl *iget_acl(struct inode *inode,
 					 struct posix_acl **i_acl)
 {
-	struct posix_acl *acl = ERR_PTR(-ENODATA);
+	struct posix_acl *acl = ACL_NOT_CACHED;
 
 	spin_lock(&inode->i_lock);
-	if (*i_acl != ERR_PTR(-ENODATA))
+	if (*i_acl != ACL_NOT_CACHED)
 		acl = posix_acl_dup(*i_acl);
 	spin_unlock(&inode->i_lock);
 
@@ -239,15 +239,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 	}
 
 	acl = iget_acl(inode, p_acl);
-	if (acl && !IS_ERR(acl))
+	if (acl != ACL_NOT_CACHED)
 		return acl;
-	else if (PTR_ERR(acl) == -ENODATA)
-		return NULL;
 
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
 	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
-			*p_acl = ERR_PTR(-ENODATA);
+			*p_acl = NULL;
 			return NULL;
 		}
 		return ERR_PTR(size);
@@ -262,7 +260,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		/* This shouldn't actually happen as it should have
 		   been caught above.. but just in case */
 		acl = NULL;
-		*p_acl = ERR_PTR(-ENODATA);
+		*p_acl = acl;
 	} else if (retval < 0) {
 		acl = ERR_PTR(retval);
 	} else {
@@ -379,11 +377,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 	}
 
 	acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
-	if (IS_ERR(acl)) {
-		if (PTR_ERR(acl) == -ENODATA)
-			goto apply_umask;
+	if (IS_ERR(acl))
 		return PTR_ERR(acl);
-	}
 
 	if (acl) {
 		struct posix_acl *acl_copy;
diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index 8cc65757e47..8f4d8d718b1 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -58,12 +58,12 @@ extern struct xattr_handler reiserfs_posix_acl_access_handler;
 
 static inline void reiserfs_init_acl_access(struct inode *inode)
 {
-	REISERFS_I(inode)->i_acl_access = NULL;
+	REISERFS_I(inode)->i_acl_access = ACL_NOT_CACHED;
 }
 
 static inline void reiserfs_init_acl_default(struct inode *inode)
 {
-	REISERFS_I(inode)->i_acl_default = NULL;
+	REISERFS_I(inode)->i_acl_default = ACL_NOT_CACHED;
 }
 #else
 
-- 
cgit v1.2.3-70-g09d2


From 281eede0328c84a8f20e0e85b807d5b51c3de4f2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 21:07:04 -0400
Subject: switch reiserfs to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/inode.c           |  4 ----
 fs/reiserfs/super.c           | 24 ------------------------
 fs/reiserfs/xattr_acl.c       | 10 ++++------
 include/linux/reiserfs_acl.h  | 17 -----------------
 include/linux/reiserfs_fs_i.h |  4 ----
 5 files changed, 4 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6fd0f47e45d..a14d6cd9eed 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
 	REISERFS_I(inode)->i_trans_id = 0;
 	REISERFS_I(inode)->i_jl = NULL;
 	mutex_init(&(REISERFS_I(inode)->i_mmap));
-	reiserfs_init_acl_access(inode);
-	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
 	mutex_init(&(REISERFS_I(inode)->i_mmap));
-	reiserfs_init_acl_access(inode);
-	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	/* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b194451fc04..d3aeb061612 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,10 +529,6 @@ static void init_once(void *foo)
 
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	inode_init_once(&ei->vfs_inode);
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	ei->i_acl_access = ACL_NOT_CACHED;
-	ei->i_acl_default = ACL_NOT_CACHED;
-#endif
 }
 
 static int init_inodecache(void)
@@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
 	reiserfs_write_unlock(inode->i_sb);
 }
 
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-static void reiserfs_clear_inode(struct inode *inode)
-{
-	struct posix_acl *acl;
-
-	acl = REISERFS_I(inode)->i_acl_access;
-	if (acl && acl != ACL_NOT_CACHED)
-		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_access = ACL_NOT_CACHED;
-
-	acl = REISERFS_I(inode)->i_acl_default;
-	if (acl && acl != ACL_NOT_CACHED)
-		posix_acl_release(acl);
-	REISERFS_I(inode)->i_acl_default = ACL_NOT_CACHED;
-}
-#else
-#define reiserfs_clear_inode NULL
-#endif
-
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = {
 	.write_inode = reiserfs_write_inode,
 	.dirty_inode = reiserfs_dirty_inode,
 	.delete_inode = reiserfs_delete_inode,
-	.clear_inode = reiserfs_clear_inode,
 	.put_super = reiserfs_put_super,
 	.write_super = reiserfs_write_super,
 	.sync_fs = reiserfs_sync_fs,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 7b3aeb9327d..b6e473faa8b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -223,16 +223,15 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 	struct posix_acl *acl, **p_acl;
 	int size;
 	int retval;
-	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &reiserfs_i->i_acl_access;
+		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &reiserfs_i->i_acl_default;
+		p_acl = &inode->i_default_acl;
 		break;
 	default:
 		return ERR_PTR(-EINVAL);
@@ -288,7 +287,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	struct posix_acl **p_acl;
 	size_t size = 0;
 	int error;
-	struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
@@ -296,7 +294,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &reiserfs_i->i_acl_access;
+		p_acl = &inode->i_acl;
 		if (acl) {
 			mode_t mode = inode->i_mode;
 			error = posix_acl_equiv_mode(acl, &mode);
@@ -311,7 +309,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &reiserfs_i->i_acl_default;
+		p_acl = &inode->i_default_acl;
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		break;
diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index 8f4d8d718b1..b4448853900 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -56,15 +56,6 @@ int reiserfs_cache_default_acl(struct inode *dir);
 extern struct xattr_handler reiserfs_posix_acl_default_handler;
 extern struct xattr_handler reiserfs_posix_acl_access_handler;
 
-static inline void reiserfs_init_acl_access(struct inode *inode)
-{
-	REISERFS_I(inode)->i_acl_access = ACL_NOT_CACHED;
-}
-
-static inline void reiserfs_init_acl_default(struct inode *inode)
-{
-	REISERFS_I(inode)->i_acl_default = ACL_NOT_CACHED;
-}
 #else
 
 #define reiserfs_cache_default_acl(inode) 0
@@ -86,12 +77,4 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 {
 	return 0;
 }
-
-static inline void reiserfs_init_acl_access(struct inode *inode)
-{
-}
-
-static inline void reiserfs_init_acl_default(struct inode *inode)
-{
-}
 #endif
diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h
index 76360b36ac3..89f4d3abbf5 100644
--- a/include/linux/reiserfs_fs_i.h
+++ b/include/linux/reiserfs_fs_i.h
@@ -54,10 +54,6 @@ struct reiserfs_inode_info {
 	unsigned int i_trans_id;
 	struct reiserfs_journal_list *i_jl;
 	struct mutex i_mmap;
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-	struct posix_acl *i_acl_access;
-	struct posix_acl *i_acl_default;
-#endif
 #ifdef CONFIG_REISERFS_FS_XATTR
 	struct rw_semaphore i_xattr_sem;
 #endif
-- 
cgit v1.2.3-70-g09d2


From 06b16e9f68edaa1e71aee943d3c030bcf7380af1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 8 Jun 2009 19:56:00 -0400
Subject: switch shmem to inode->i_acl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/shmem_fs.h |  8 --------
 mm/shmem.c               |  9 ++++-----
 mm/shmem_acl.c           | 29 ++++++-----------------------
 3 files changed, 10 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index fd83f2584b1..abff6c9b413 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -19,10 +19,6 @@ struct shmem_inode_info {
 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct inode		vfs_inode;
-#ifdef CONFIG_TMPFS_POSIX_ACL
-	struct posix_acl	*i_acl;
-	struct posix_acl	*i_default_acl;
-#endif
 };
 
 struct shmem_sb_info {
@@ -45,7 +41,6 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 #ifdef CONFIG_TMPFS_POSIX_ACL
 int shmem_permission(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
-void shmem_acl_destroy_inode(struct inode *);
 
 extern struct xattr_handler shmem_xattr_acl_access_handler;
 extern struct xattr_handler shmem_xattr_acl_default_handler;
@@ -57,9 +52,6 @@ static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
 {
 	return 0;
 }
-static inline void shmem_acl_destroy_inode(struct inode *inode)
-{
-}
 #endif  /* CONFIG_TMPFS_POSIX_ACL */
 
 #endif
diff --git a/mm/shmem.c b/mm/shmem.c
index e89d7ec18ed..5f2019fc789 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2379,6 +2379,10 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
 	if (!p)
 		return NULL;
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	p->vfs_inode.i_acl = NULL;
+	p->vfs_inode.i_default_acl = NULL;
+#endif
 	return &p->vfs_inode;
 }
 
@@ -2388,7 +2392,6 @@ static void shmem_destroy_inode(struct inode *inode)
 		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	}
-	shmem_acl_destroy_inode(inode);
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
@@ -2397,10 +2400,6 @@ static void init_once(void *foo)
 	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
 
 	inode_init_once(&p->vfs_inode);
-#ifdef CONFIG_TMPFS_POSIX_ACL
-	p->i_acl = NULL;
-	p->i_default_acl = NULL;
-#endif
 }
 
 static int init_inodecache(void)
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index 8e5aadd7dcd..606a8e757a4 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -22,11 +22,11 @@ shmem_get_acl(struct inode *inode, int type)
 	spin_lock(&inode->i_lock);
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			acl = posix_acl_dup(SHMEM_I(inode)->i_acl);
+			acl = posix_acl_dup(inode->i_acl);
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl);
+			acl = posix_acl_dup(inode->i_default_acl);
 			break;
 	}
 	spin_unlock(&inode->i_lock);
@@ -45,13 +45,13 @@ shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	spin_lock(&inode->i_lock);
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			free = SHMEM_I(inode)->i_acl;
-			SHMEM_I(inode)->i_acl = posix_acl_dup(acl);
+			free = inode->i_acl;
+			inode->i_acl = posix_acl_dup(acl);
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			free = SHMEM_I(inode)->i_default_acl;
-			SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl);
+			free = inode->i_default_acl;
+			inode->i_default_acl = posix_acl_dup(acl);
 			break;
 	}
 	spin_unlock(&inode->i_lock);
@@ -154,23 +154,6 @@ shmem_acl_init(struct inode *inode, struct inode *dir)
 	return generic_acl_init(inode, dir, &shmem_acl_ops);
 }
 
-/**
- * shmem_acl_destroy_inode  -  destroy acls hanging off the in-memory inode
- *
- * This is done before destroying the actual inode.
- */
-
-void
-shmem_acl_destroy_inode(struct inode *inode)
-{
-	if (SHMEM_I(inode)->i_acl)
-		posix_acl_release(SHMEM_I(inode)->i_acl);
-	SHMEM_I(inode)->i_acl = NULL;
-	if (SHMEM_I(inode)->i_default_acl)
-		posix_acl_release(SHMEM_I(inode)->i_default_acl);
-	SHMEM_I(inode)->i_default_acl = NULL;
-}
-
 /**
  * shmem_check_acl  -  check_acl() callback for generic_permission()
  */
-- 
cgit v1.2.3-70-g09d2


From 073aaa1b142461d91f83da66db1184d7c1b1edea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 9 Jun 2009 12:11:54 -0400
Subject: helpers for acl caching + switch to those

helpers: get_cached_acl(inode, type), set_cached_acl(inode, type, acl),
forget_cached_acl(inode, type).

ubifs/xattr.c needed includes reordered, the rest is a plain switchover.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/acl.c            | 44 +++++--------------------
 fs/ext2/acl.c             | 79 ++++++++++----------------------------------
 fs/ext3/acl.c             | 83 +++++++++++------------------------------------
 fs/ext4/acl.c             | 65 +++++--------------------------------
 fs/jffs2/acl.c            | 60 +++++++---------------------------
 fs/jfs/acl.c              | 32 ++++++++----------
 fs/jfs/xattr.c            | 10 ++----
 fs/reiserfs/xattr_acl.c   | 49 ++++++----------------------
 fs/ubifs/xattr.c          |  2 +-
 include/linux/posix_acl.h | 64 ++++++++++++++++++++++++++++++++++++
 10 files changed, 155 insertions(+), 333 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6db8a42a3e5..f128427b995 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -29,51 +29,28 @@
 
 #ifdef CONFIG_FS_POSIX_ACL
 
-static void btrfs_update_cached_acl(struct inode *inode,
-				    struct posix_acl **p_acl,
-				    struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*p_acl && *p_acl != ACL_NOT_CACHED)
-		posix_acl_release(*p_acl);
-	*p_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 {
 	int size;
 	const char *name;
 	char *value = NULL;
-	struct posix_acl *acl = NULL, **p_acl;
+	struct posix_acl *acl;
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
 
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	/* Handle the cached NULL acl case without locking */
-	acl = ACCESS_ONCE(*p_acl);
-	if (!acl)
-		return acl;
-
-	spin_lock(&inode->i_lock);
-	acl = *p_acl;
-	if (acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-
-	if (acl != ACL_NOT_CACHED)
-		return acl;
-
 	size = __btrfs_getxattr(inode, name, "", 0);
 	if (size > 0) {
 		value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		size = __btrfs_getxattr(inode, name, value, size);
 		if (size > 0) {
 			acl = posix_acl_from_xattr(value, size);
-			btrfs_update_cached_acl(inode, p_acl, acl);
+			set_cached_acl(inode, type, acl);
 		}
 		kfree(value);
 	} else if (size == -ENOENT || size == -ENODATA || size == 0) {
 		/* FIXME, who returns -ENOENT?  I think nobody */
 		acl = NULL;
-		btrfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 	} else {
 		acl = ERR_PTR(-EIO);
 	}
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	int ret, size = 0;
 	const char *name;
-	struct posix_acl **p_acl;
 	char *value = NULL;
 	mode_t mode;
 
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		ret = 0;
 		inode->i_mode = mode;
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EINVAL : 0;
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		break;
 	default:
 		return -EINVAL;
@@ -172,7 +146,7 @@ out:
 	kfree(value);
 
 	if (!ret)
-		btrfs_update_cached_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 
 	return ret;
 }
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d2ffddc1211..d636e1297ca 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -125,30 +125,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
-static inline void
-ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-		   struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * inode->i_mutex: don't care
  */
@@ -163,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext2_iget_acl(inode, &inode->i_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext2_iget_acl(inode, &inode->i_default_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
 	}
 	retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -196,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &inode->i_acl, acl);
-				break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
 	return acl;
 }
 
@@ -261,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	error = ext2_xattr_set(inode, name_index, "", value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext2_iset_acl(inode, &inode->i_acl, acl);
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				ext2_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
+	if (!error)
+		set_cached_acl(inode, type, acl);
 	return error;
 }
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a9707689d9e..e167bae37ef 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -126,33 +126,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-	if (acl) {
-		spin_lock(&inode->i_lock);
-		acl = *i_acl;
-		if (acl != ACL_NOT_CACHED)
-			acl = posix_acl_dup(acl);
-		spin_unlock(&inode->i_lock);
-	}
-
-	return acl;
-}
-
-static inline void
-ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-                  struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -169,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &inode->i_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &inode->i_default_acl);
-			if (acl != ACL_NOT_CACHED)
-				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
-			break;
-
-		default:
-			return ERR_PTR(-EINVAL);
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
 	}
+
 	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
 		value = kmalloc(retval, GFP_NOFS);
@@ -202,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &inode->i_acl, acl);
-				break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
 	return acl;
 }
 
@@ -269,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 				      value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &inode->i_acl, acl);
-				break;
 
-			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &inode->i_default_acl, acl);
-				break;
-		}
-	}
+	if (!error)
+		set_cached_acl(inode, type, acl);
+
 	return error;
 }
 
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0084e3a19d8..f6d8967149c 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,33 +126,6 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct posix_acl *
-ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-
-	if (acl) {
-		spin_lock(&inode->i_lock);
-		acl = *i_acl;
-		if (acl != ACL_NOT_CACHED)
-			acl = posix_acl_dup(acl);
-		spin_unlock(&inode->i_lock);
-	}
-
-	return acl;
-}
-
-static inline void
-ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
-		struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -169,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return NULL;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = ext4_iget_acl(inode, &inode->i_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 		break;
-
 	case ACL_TYPE_DEFAULT:
-		acl = ext4_iget_acl(inode, &inode->i_default_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 		break;
-
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
@@ -202,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
 		acl = ERR_PTR(retval);
 	kfree(value);
 
-	if (!IS_ERR(acl)) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &inode->i_acl, acl);
-			break;
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
-		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
 	return acl;
 }
 
@@ -269,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 				      value, size, 0);
 
 	kfree(value);
-	if (!error) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			ext4_iset_acl(inode, &inode->i_acl, acl);
-			break;
+	if (!error)
+		set_cached_acl(inode, type, acl);
 
-		case ACL_TYPE_DEFAULT:
-			ext4_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
 	return error;
 }
 
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index ac16589ebbd..edd2ad6416d 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,47 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
 	return ERR_PTR(-EINVAL);
 }
 
-static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-	return acl;
-}
-
-static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
 static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 {
 	struct posix_acl *acl;
 	char *value = NULL;
 	int rc, xprefix;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
-		acl = jffs2_iget_acl(inode, &inode->i_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
 		break;
 	case ACL_TYPE_DEFAULT:
-		acl = jffs2_iget_acl(inode, &inode->i_default_acl);
-		if (acl != ACL_NOT_CACHED)
-			return acl;
 		xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 	rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
 	if (rc > 0) {
@@ -214,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
 	}
 	if (value)
 		kfree(value);
-	if (!IS_ERR(acl)) {
-		switch (type) {
-		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &inode->i_acl, acl);
-			break;
-		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 	return acl;
 }
 
@@ -283,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		return -EINVAL;
 	}
 	rc = __jffs2_set_acl(inode, xprefix, acl);
-	if (!rc) {
-		switch(type) {
-		case ACL_TYPE_ACCESS:
-			jffs2_iset_acl(inode, &inode->i_acl, acl);
-			break;
-		case ACL_TYPE_DEFAULT:
-			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
-			break;
-		}
-	}
+	if (!rc)
+		set_cached_acl(inode, type, acl);
 	return rc;
 }
 
@@ -336,7 +298,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 		*i_mode &= ~current_umask();
 	} else {
 		if (S_ISDIR(*i_mode))
-			jffs2_iset_acl(inode, &inode->i_default_acl, acl);
+			set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
 
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		if (!clone)
@@ -347,7 +309,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 			return rc;
 		}
 		if (rc > 0)
-			jffs2_iset_acl(inode, &inode->i_acl, clone);
+			set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 
 		posix_acl_release(clone);
 	}
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 5fcfc9857c1..f272bf032e1 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,26 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 {
 	struct posix_acl *acl;
 	char *ea_name;
-	struct posix_acl **p_acl;
 	int size;
 	char *value = NULL;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &inode->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &inode->i_default_acl;
 			break;
 		default:
 			return ERR_PTR(-EINVAL);
 	}
 
-	if (*p_acl != ACL_NOT_CACHED)
-		return posix_acl_dup(*p_acl);
-
 	size = __jfs_getxattr(inode, ea_name, NULL, 0);
 
 	if (size > 0) {
@@ -61,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 	}
 
 	if (size < 0) {
-		if (size == -ENODATA) {
-			*p_acl = NULL;
+		if (size == -ENODATA)
 			acl = NULL;
-		} else
+		else
 			acl = ERR_PTR(size);
 	} else {
 		acl = posix_acl_from_xattr(value, size);
-		if (!IS_ERR(acl))
-			*p_acl = posix_acl_dup(acl);
 	}
 	kfree(value);
+	if (!IS_ERR(acl)) {
+		set_cached_acl(inode, type, acl);
+		posix_acl_release(acl);
+	}
 	return acl;
 }
 
@@ -79,7 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 		       struct posix_acl *acl)
 {
 	char *ea_name;
-	struct posix_acl **p_acl;
 	int rc;
 	int size = 0;
 	char *value = NULL;
@@ -90,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 	switch(type) {
 		case ACL_TYPE_ACCESS:
 			ea_name = POSIX_ACL_XATTR_ACCESS;
-			p_acl = &inode->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
 			ea_name = POSIX_ACL_XATTR_DEFAULT;
-			p_acl = &inode->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
 			break;
@@ -114,11 +110,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 out:
 	kfree(value);
 
-	if (!rc) {
-		if (*p_acl && (*p_acl != ACL_NOT_CACHED))
-			posix_acl_release(*p_acl);
-		*p_acl = posix_acl_dup(acl);
-	}
+	if (!rc)
+		set_cached_acl(inode, type, acl);
+
 	return rc;
 }
 
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index f6e90e34359..fad364548bc 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the ACL.  Get rid of the cached one
 		 */
-		acl =inode->i_acl;
-		if (acl != ACL_NOT_CACHED)
-			posix_acl_release(acl);
-		inode->i_acl = ACL_NOT_CACHED;
+		forget_cached_acl(inode, ACL_TYPE_ACCESS);
 
 		return 0;
 	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		/*
 		 * We're changing the default ACL.  Get rid of the cached one
 		 */
-		acl = inode->i_default_acl;
-		if (acl && (acl != ACL_NOT_CACHED))
-			posix_acl_release(acl);
-		inode->i_default_acl = ACL_NOT_CACHED;
+		forget_cached_acl(inode, ACL_TYPE_DEFAULT);
 
 		return 0;
 	}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index b6e473faa8b..35d6e672a27 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
-			    struct posix_acl *acl)
-{
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		posix_acl_release(*i_acl);
-	*i_acl = posix_acl_dup(acl);
-	spin_unlock(&inode->i_lock);
-}
-
-static inline struct posix_acl *iget_acl(struct inode *inode,
-					 struct posix_acl **i_acl)
-{
-	struct posix_acl *acl = ACL_NOT_CACHED;
-
-	spin_lock(&inode->i_lock);
-	if (*i_acl != ACL_NOT_CACHED)
-		acl = posix_acl_dup(*i_acl);
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
 /*
  * Inode operation get_posix_acl().
  *
@@ -220,31 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 {
 	char *name, *value;
-	struct posix_acl *acl, **p_acl;
+	struct posix_acl *acl;
 	int size;
 	int retval;
 
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		BUG();
 	}
 
-	acl = iget_acl(inode, p_acl);
-	if (acl != ACL_NOT_CACHED)
-		return acl;
-
 	size = reiserfs_xattr_get(inode, name, NULL, 0);
 	if (size < 0) {
 		if (size == -ENODATA || size == -ENOSYS) {
-			*p_acl = NULL;
+			set_cached_acl(inode, type, NULL);
 			return NULL;
 		}
 		return ERR_PTR(size);
@@ -259,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 		/* This shouldn't actually happen as it should have
 		   been caught above.. but just in case */
 		acl = NULL;
-		*p_acl = acl;
 	} else if (retval < 0) {
 		acl = ERR_PTR(retval);
 	} else {
 		acl = posix_acl_from_disk(value, retval);
-		if (!IS_ERR(acl))
-			iset_acl(inode, p_acl, acl);
 	}
+	if (!IS_ERR(acl))
+		set_cached_acl(inode, type, acl);
 
 	kfree(value);
 	return acl;
@@ -284,7 +258,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 {
 	char *name;
 	void *value = NULL;
-	struct posix_acl **p_acl;
 	size_t size = 0;
 	int error;
 
@@ -294,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
-		p_acl = &inode->i_acl;
 		if (acl) {
 			mode_t mode = inode->i_mode;
 			error = posix_acl_equiv_mode(acl, &mode);
@@ -309,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 		break;
 	case ACL_TYPE_DEFAULT:
 		name = POSIX_ACL_XATTR_DEFAULT;
-		p_acl = &inode->i_default_acl;
 		if (!S_ISDIR(inode->i_mode))
 			return acl ? -EACCES : 0;
 		break;
@@ -342,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	kfree(value);
 
 	if (!error)
-		iset_acl(inode, p_acl, acl);
+		set_cached_acl(inode, type, acl);
 
 	return error;
 }
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e229c8..adafcf55653 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,9 @@
  * ACL support is not implemented.
  */
 
+#include "ubifs.h"
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
-#include "ubifs.h"
 
 /*
  * Limit the number of extended attributes per inode so that the total size
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 4bc241290c2..0cdba01b775 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -83,4 +83,68 @@ extern int posix_acl_chmod_masq(struct posix_acl *, mode_t);
 extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern int set_posix_acl(struct inode *, int, struct posix_acl *);
 
+static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
+{
+	struct posix_acl **p, *acl;
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		p = &inode->i_acl;
+		break;
+	case ACL_TYPE_DEFAULT:
+		p = &inode->i_default_acl;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	acl = ACCESS_ONCE(*p);
+	if (acl) {
+		spin_lock(&inode->i_lock);
+		acl = *p;
+		if (acl != ACL_NOT_CACHED)
+			acl = posix_acl_dup(acl);
+		spin_unlock(&inode->i_lock);
+	}
+	return acl;
+}
+
+static inline void set_cached_acl(struct inode *inode,
+				  int type,
+				  struct posix_acl *acl)
+{
+	struct posix_acl *old = NULL;
+	spin_lock(&inode->i_lock);
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		old = inode->i_acl;
+		inode->i_acl = posix_acl_dup(acl);
+		break;
+	case ACL_TYPE_DEFAULT:
+		old = inode->i_default_acl;
+		inode->i_default_acl = posix_acl_dup(acl);
+		break;
+	}
+	spin_unlock(&inode->i_lock);
+	if (old != ACL_NOT_CACHED)
+		posix_acl_release(old);
+}
+
+static inline void forget_cached_acl(struct inode *inode, int type)
+{
+	struct posix_acl *old = NULL;
+	spin_lock(&inode->i_lock);
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		old = inode->i_acl;
+		inode->i_acl = ACL_NOT_CACHED;
+		break;
+	case ACL_TYPE_DEFAULT:
+		old = inode->i_default_acl;
+		inode->i_default_acl = ACL_NOT_CACHED;
+		break;
+	}
+	spin_unlock(&inode->i_lock);
+	if (old != ACL_NOT_CACHED)
+		posix_acl_release(old);
+}
+
 #endif  /* __LINUX_POSIX_ACL_H */
-- 
cgit v1.2.3-70-g09d2


From 641cf4a668e9e69d2bc061e953422ff72a91f86e Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Wed, 24 Jun 2009 22:28:52 +0200
Subject: inline functions left without protection of ifdef (acl)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/posix_acl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 0cdba01b775..c513466c7dc 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -83,6 +83,7 @@ extern int posix_acl_chmod_masq(struct posix_acl *, mode_t);
 extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern int set_posix_acl(struct inode *, int, struct posix_acl *);
 
+#ifdef CONFIG_FS_POSIX_ACL
 static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
 {
 	struct posix_acl **p, *acl;
@@ -146,5 +147,5 @@ static inline void forget_cached_acl(struct inode *inode, int type)
 	if (old != ACL_NOT_CACHED)
 		posix_acl_release(old);
 }
-
+#endif
 #endif  /* __LINUX_POSIX_ACL_H */
-- 
cgit v1.2.3-70-g09d2


From 72c04902d1e27c8a324014cff1d4475c11b1cecd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 24 Jun 2009 16:58:48 -0400
Subject: Get "no acls for this inode" right, fix shmem breakage

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c          | 6 ++----
 fs/jffs2/acl.c            | 3 +--
 include/linux/posix_acl.h | 9 +++++++++
 mm/shmem.c                | 5 +----
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 78ad38ddd01..dbe1aabf96c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	 * any xattrs or acls
 	 */
 	maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
-	if (!maybe_acls) {
-		inode->i_acl = NULL;
-		inode->i_default_acl = NULL;
-	}
+	if (!maybe_acls)
+		cache_no_acl(inode);
 
 	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
 						alloc_group_block, 0);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index edd2ad6416d..8fcb6239218 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -284,8 +284,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 	struct posix_acl *acl, *clone;
 	int rc;
 
-	inode->i_default_acl = NULL;
-	inode->i_acl = NULL;
+	cache_no_acl(inode);
 
 	if (S_ISLNK(*i_mode))
 		return 0;	/* Symlink always has no-ACL */
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index c513466c7dc..065a3652a3e 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -148,4 +148,13 @@ static inline void forget_cached_acl(struct inode *inode, int type)
 		posix_acl_release(old);
 }
 #endif
+
+static inline void cache_no_acl(struct inode *inode)
+{
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = NULL;
+	inode->i_default_acl = NULL;
+#endif
+}
+
 #endif  /* __LINUX_POSIX_ACL_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index 5f2019fc789..d713239ce2c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1558,6 +1558,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
 		spin_lock_init(&info->lock);
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
+		cache_no_acl(inode);
 
 		switch (mode & S_IFMT) {
 		default:
@@ -2379,10 +2380,6 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
 	if (!p)
 		return NULL;
-#ifdef CONFIG_TMPFS_POSIX_ACL
-	p->vfs_inode.i_acl = NULL;
-	p->vfs_inode.i_default_acl = NULL;
-#endif
 	return &p->vfs_inode;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:53:04 -0700
Subject: dmaengine: kill tx_list

The tx_list attribute of struct dma_async_tx_descriptor is common to
most, but not all dma driver implementations.  None of the upper level
code (dmaengine/async_tx) uses it, so allow drivers to implement it
locally if they need it.  This saves sizeof(struct list_head) bytes for
drivers that do not manage descriptors with a linked list (e.g.: ioatdma
v2,3).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 1 -
 include/linux/dmaengine.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4f..562d182eae6 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -933,7 +933,6 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 {
 	tx->chan = chan;
 	spin_lock_init(&tx->lock);
-	INIT_LIST_HEAD(&tx->tx_list);
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c81..f114bc7790b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -180,8 +180,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
  * @flags: flags to augment operation preparation, control completion, and
  * 	communicate status
  * @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- *	descriptors
  * @chan: target channel for this operation
  * @tx_submit: set the prepared descriptor(s) to be executed by the engine
  * @callback: routine to call after this operation is complete
@@ -195,7 +193,6 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t cookie;
 	enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
 	dma_addr_t phys;
-	struct list_head tx_list;
 	struct dma_chan *chan;
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
-- 
cgit v1.2.3-70-g09d2